Beispiel #1
0
//Downloads the data (train, validation, test sets) into memory following specifications 
//in the attr file.
//filenames may be empty strings, if correspondent data is not provided
INDdata::INDdata(const char* trainFName, const char* validFName, const char* testFName,
	const char* attrFName, bool doOut)
{
	LogStream clog;

	//read attr file, collect info about boolean attributes and attrN
	clog << "Reading the attribute file: \"" << attrFName << "\"\n";
	fstream fattr;
	fattr.open(attrFName, ios_base::in);
	if(!fattr) 
		throw OPEN_ATTR_ERR;

	char buf[LINE_LEN];	//buffer for reading from input files
	getLineExt(fattr, buf);

	//read list of attributes, collect information about them
	int attrId, colNo; // counters
	string tarName; //name of the response attribute
	bool foundClass = false;	//response found flag
	weightColNo = -1;
	for(attrId = 0, colNo = 0; fattr.gcount(); attrId++, colNo++)
	{
		string attrStr(buf);	//a line of an attr file (corresponds to 1 attribute)
		
		//check for response attribute
		if(attrStr.find("(class)") != string::npos)	
		{
			if(foundClass)
				throw MULT_CLASS_ERR;

			tarColNo = colNo;
			attrId--;
			foundClass = true;

			string::size_type nameLen = attrStr.find(":");
			tarName = attrStr.substr(0, nameLen);

			getLineExt(fattr, buf);
			continue;
		}
		if(attrStr.find("(weight)") != string::npos)	
		{
			weightColNo = colNo;
			attrId--;

			getLineExt(fattr, buf);
			continue;
		}

		//parse attr name
		string::size_type nameLen = attrStr.find(":");
		if((attrStr.find("contexts") != -1) || (nameLen == -1)) 
			break; //end of listed attributes
		string attrName = attrStr.substr(0, nameLen);
		if(attrName.find_first_of("\\/*?\"<>|:") != string::npos)
			throw ATTR_NAME_DEF_ERR;
		attrNames.push_back(trimSpace(attrName));

		//parse attr type
		string::size_type endType = attrStr.find(".");
		string typeStr = attrStr.substr(nameLen + 1, endType - nameLen - 1);
		typeStr = trimSpace(typeStr);
		if(typeStr.compare("0,1") == 0)
			boolAttrs.insert(attrId);
		else if(typeStr.compare("nom") == 0)
			nomAttrs.insert(attrId);
		else if(attrStr.find("cont") == string::npos) 
			throw ATTR_TYPE_ERR;

		getLineExt(fattr, buf);
	}
	attrN = attrId;
	colN = colNo;
	if(!foundClass)
		throw NO_CLASS_ERR;
	
	//read contexts part (if any), add unused attributes into ignoreattrs
	while(fattr.gcount())
	{
		string attrStr(buf);
		if(attrStr.find(" never") != string::npos)
		{//extract name of the attribute, find its number, insert it into ignoreattrs
			int nameLen = (int)attrStr.find(" ");
			string attrName = attrStr.substr(0, nameLen);
			attrName = trimSpace(attrName);
			int neverAttrId = getAttrId(attrName);
			if (neverAttrId == -1)
				clog << "\nWARNING: trying to exclude \"" << attrName << "\" - this is not a valid feature\n\n";
			else
				ignoreAttrs.insert(neverAttrId);
		}
		getLineExt(fattr, buf);
	}
	fattr.close();
	
	int activeAttrN = attrN - (int)ignoreAttrs.size();
	clog << attrN << " attributes\n" << activeAttrN << " active attributes\n\n";
	if(!isSubset(nomAttrs, ignoreAttrs))
		throw NOM_ACTIVE_ERR;

	//Read data
	if(string(trainFName).compare("") != 0)
	{//Read train set
		clog << "Reading the train set: \"" << trainFName << "\"\n";
		fstream fin;
		fin.open(trainFName, ios_base::in);
		if(fin.fail()) 
			throw OPEN_TRAIN_ERR;
		 
		hasMV = false;
		getLineExt(fin, buf);
		int caseNo;
		for(caseNo = 0; fin.gcount(); caseNo++)
		{//read one line of data file, save class value in targets, attribute values in data
			if(doOut && ((caseNo + 1)% 100000 == 0))
				cout << "\tRead " << caseNo + 1 << " lines..." << endl;
			
			floatv item;	//single data point
			try {
				readData(buf, fin.gcount(), item, colN);
			} catch (TE_ERROR err) {
				cerr << "\nLine " << caseNo + 1 << "\n";
				throw err;
			}
			
			trainTar.push_back(item[tarColNo]);
			if(weightColNo != -1)
				trainW.push_back(item[weightColNo]);
			item.erase(item.begin() + max(tarColNo, weightColNo));
			if(weightColNo != -1)
				item.erase(item.begin() + min(tarColNo, weightColNo));

			for(intset::iterator boolIt = boolAttrs.begin(); boolIt != boolAttrs.end(); boolIt++)
				if((item[*boolIt] != 0) && (item[*boolIt] != 1) && !wxisNaN(item[*boolIt]))
					throw ATTR_NOT_BOOL_ERR;
			train.push_back(item);
			getLineExt(fin, buf);
		}
		trainN = caseNo;
		trainV = trainN;
		if(trainN == 0)
			throw TRAIN_EMPTY_ERR;
		if(weightColNo != -1)
		{
			double trainSum = 0;
			trainR.resize(trainN);
			for(int itemNo = 0; itemNo < trainN; itemNo++)
				trainSum += trainW[itemNo];
			double trCoef = trainN / trainSum;
			for(int itemNo = 0; itemNo < trainN; itemNo++)
			{
				trainW[itemNo] *= trCoef;
				trainR[itemNo] = (itemNo == 0) ? trainW[itemNo] : trainW[itemNo] + trainR[itemNo - 1];
			}

		}
		double trainStD = getTarStD(TRAIN);
		clog << trainN << " points in the train set, std. dev. of " << tarName << " values = " << trainStD 
			<< "\n\n"; 
		fin.close();

		//initialize bootstrap (bag of data)
		bootstrap.resize(trainN); 
		newBag();	
	}
	else //no train set
		trainN = 0;

	if(string(validFName).compare("") != 0)
	{//Read validation set
		clog << "Reading the validation set: \"" << validFName << "\"\n";
		fstream fvalid;
		fvalid.open(validFName, ios_base::in); 
		if(fvalid.fail())
			throw OPEN_VALID_ERR;

		getLineExt(fvalid, buf);
		int caseNo;
		for(caseNo=0; fvalid.gcount(); caseNo++)
		{//read one line of data file, save response value in validtar, attributes values in valid
			if (doOut && ((caseNo + 1) % 100000 == 0))
				cout << "\tRead " << caseNo + 1 << " lines..." << endl;
			
			floatv item;	//single data point
			try {
				readData(buf, fvalid.gcount(), item, colN);
			} catch (TE_ERROR err) {
				cerr << "\nLine " << caseNo + 1 << "\n";
				throw err;
			}

			validTar.push_back(item[tarColNo]);
			if(weightColNo != -1)
				validW.push_back(item[weightColNo]);
			item.erase(item.begin() + max(tarColNo, weightColNo));
			if(weightColNo != -1)
				item.erase(item.begin() + min(tarColNo, weightColNo));

			valid.push_back(item);
			getLineExt(fvalid, buf);
		}
		validN = caseNo;
		if(validN == 0)
			throw VALID_EMPTY_ERR;
		double validStD = getTarStD(VALID);
		clog << validN << " points in the validation set, std. dev. of " << tarName << " values = " 
			<< validStD << "\n\n"; 
		fvalid.close();
	}
	else	//no validation set
		validN = 0;

	if(string(testFName).compare("") != 0)
	{//Read test set
		clog << "Reading the test set: \"" << testFName << "\"\n";
		fstream ftest;
		ftest.open(testFName, ios_base::in); 
		if(ftest.fail()) 
			throw OPEN_TEST_ERR;

		getLineExt(ftest, buf);
		int caseNo;
		for(caseNo=0; ftest.gcount(); caseNo++)
		{//read one line of data file, save response value in testtar, attributes in test
			if (doOut && ((caseNo + 1) % 100000 == 0))
				cout << "\tRead " << caseNo + 1 << " lines...\n";

			floatv item;	//single data point
			try {
				readData(buf, ftest.gcount(), item, colN);
			} catch (TE_ERROR err) {
				cerr << "\nLine " << caseNo + 1 << "\n";
				throw err;
			}

			testTar.push_back(item[tarColNo]);
			if(weightColNo != -1)
				testW.push_back(item[weightColNo]);
			item.erase(item.begin() + max(tarColNo, weightColNo));
			if(weightColNo != -1)
				item.erase(item.begin() + min(tarColNo, weightColNo));

			test.push_back(item);
			getLineExt(ftest, buf);
		}
		testN = caseNo;
		double testStD = getTarStD(TEST);
		clog << testN << " points in the test set, std. dev. of " << tarName << " values = " << testStD 
			<< "\n\n";
		ftest.close();
	}
	else	//no test set
		testN = 0;
}
Beispiel #2
0
void
inDOMView::AttributeChanged(nsIDocument* aDocument, dom::Element* aElement,
                            PRInt32 aNameSpaceID, nsIAtom* aAttribute,
                            PRInt32 aModType)
{
  if (!mTree) {
    return;
  }

  if (!(mWhatToShow & nsIDOMNodeFilter::SHOW_ATTRIBUTE)) {
    return;
  }

  nsCOMPtr<nsIMutationObserver> kungFuDeathGrip(this);
  
  // get the dom attribute node, if there is any
  nsCOMPtr<nsIDOMNode> content(do_QueryInterface(aElement));
  nsCOMPtr<nsIDOMElement> el(do_QueryInterface(aElement));
  nsCOMPtr<nsIDOMAttr> domAttr;
  nsDependentAtomString attrStr(aAttribute);
  if (aNameSpaceID) {
    nsCOMPtr<nsINameSpaceManager> nsm =
      do_GetService(NS_NAMESPACEMANAGER_CONTRACTID);
    if (!nsm) {
      // we can't find out which attribute we want :(
      return;
    }
    nsString attrNS;
    nsresult rv = nsm->GetNameSpaceURI(aNameSpaceID, attrNS);
    if (NS_FAILED(rv)) {
      return;
    }
    (void)el->GetAttributeNodeNS(attrNS, attrStr, getter_AddRefs(domAttr));
  } else {
    (void)el->GetAttributeNode(attrStr, getter_AddRefs(domAttr));
  }

  if (aModType == nsIDOMMutationEvent::MODIFICATION) {
    // No fancy stuff here, just invalidate the changed row
    if (!domAttr) {
      return;
    }
    PRInt32 row = 0;
    NodeToRow(domAttr, &row);
    mTree->InvalidateRange(row, row);
  } else if (aModType == nsIDOMMutationEvent::ADDITION) {
    if (!domAttr) {
      return;
    }
    // get the number of attributes on this content node
    nsCOMPtr<nsIDOMNamedNodeMap> attrs;
    content->GetAttributes(getter_AddRefs(attrs));
    PRUint32 attrCount;
    attrs->GetLength(&attrCount);

    inDOMViewNode* contentNode = nsnull;
    PRInt32 contentRow;
    PRInt32 attrRow;
    if (mRootNode == content &&
        !(mWhatToShow & nsIDOMNodeFilter::SHOW_ELEMENT)) {
      // if this view has a root node but is not displaying it,
      // it is ok to act as if the changed attribute is on the root.
      attrRow = attrCount - 1;
    } else {
      if (NS_FAILED(NodeToRow(content, &contentRow))) {
        return;
      }
      RowToNode(contentRow, &contentNode);
      if (!contentNode->isOpen) {
        return;
      }
      attrRow = contentRow + attrCount;
    }

    inDOMViewNode* newNode = CreateNode(domAttr, contentNode);
    inDOMViewNode* insertNode = nsnull;
    RowToNode(attrRow, &insertNode);
    if (insertNode) {
      if (contentNode &&
          insertNode->level <= contentNode->level) {
        RowToNode(attrRow-1, &insertNode);
        InsertLinkAfter(newNode, insertNode);
      } else
        InsertLinkBefore(newNode, insertNode);
    }
    InsertNode(newNode, attrRow);
    mTree->RowCountChanged(attrRow, 1);
  } else if (aModType == nsIDOMMutationEvent::REMOVAL) {
    // At this point, the attribute is already gone from the DOM, but is still represented
    // in our mRows array.  Search through the content node's children for the corresponding
    // node and remove it.

    // get the row of the content node
    inDOMViewNode* contentNode = nsnull;
    PRInt32 contentRow;
    PRInt32 baseLevel;
    if (NS_SUCCEEDED(NodeToRow(content, &contentRow))) {
      RowToNode(contentRow, &contentNode);
      baseLevel = contentNode->level;
    } else {
      if (mRootNode == content) {
        contentRow = -1;
        baseLevel = -1;
      } else
        return;
    }

    // search for the attribute node that was removed
    inDOMViewNode* checkNode = nsnull;
    PRInt32 row = 0;
    for (row = contentRow+1; row < GetRowCount(); ++row) {
      checkNode = GetNodeAt(row);
      if (checkNode->level == baseLevel+1) {
        domAttr = do_QueryInterface(checkNode->node);
        if (domAttr) {
          nsAutoString attrName;
          domAttr->GetNodeName(attrName);
          if (attrName.Equals(attrStr)) {
            // we have found the row for the attribute that was removed
            RemoveLink(checkNode);
            RemoveNode(row);
            mTree->RowCountChanged(row, -1);
            break;
          }
        }
      }
      if (checkNode->level <= baseLevel)
        break;
    }

 }
}