//Downloads the data (train, validation, test sets) into memory following specifications //in the attr file. //filenames may be empty strings, if correspondent data is not provided INDdata::INDdata(const char* trainFName, const char* validFName, const char* testFName, const char* attrFName, bool doOut) { LogStream clog; //read attr file, collect info about boolean attributes and attrN clog << "Reading the attribute file: \"" << attrFName << "\"\n"; fstream fattr; fattr.open(attrFName, ios_base::in); if(!fattr) throw OPEN_ATTR_ERR; char buf[LINE_LEN]; //buffer for reading from input files getLineExt(fattr, buf); //read list of attributes, collect information about them int attrId, colNo; // counters string tarName; //name of the response attribute bool foundClass = false; //response found flag weightColNo = -1; for(attrId = 0, colNo = 0; fattr.gcount(); attrId++, colNo++) { string attrStr(buf); //a line of an attr file (corresponds to 1 attribute) //check for response attribute if(attrStr.find("(class)") != string::npos) { if(foundClass) throw MULT_CLASS_ERR; tarColNo = colNo; attrId--; foundClass = true; string::size_type nameLen = attrStr.find(":"); tarName = attrStr.substr(0, nameLen); getLineExt(fattr, buf); continue; } if(attrStr.find("(weight)") != string::npos) { weightColNo = colNo; attrId--; getLineExt(fattr, buf); continue; } //parse attr name string::size_type nameLen = attrStr.find(":"); if((attrStr.find("contexts") != -1) || (nameLen == -1)) break; //end of listed attributes string attrName = attrStr.substr(0, nameLen); if(attrName.find_first_of("\\/*?\"<>|:") != string::npos) throw ATTR_NAME_DEF_ERR; attrNames.push_back(trimSpace(attrName)); //parse attr type string::size_type endType = attrStr.find("."); string typeStr = attrStr.substr(nameLen + 1, endType - nameLen - 1); typeStr = trimSpace(typeStr); if(typeStr.compare("0,1") == 0) boolAttrs.insert(attrId); else if(typeStr.compare("nom") == 0) nomAttrs.insert(attrId); else if(attrStr.find("cont") == string::npos) throw ATTR_TYPE_ERR; getLineExt(fattr, buf); } attrN = attrId; colN = colNo; if(!foundClass) throw NO_CLASS_ERR; //read contexts part (if any), add unused attributes into ignoreattrs while(fattr.gcount()) { string attrStr(buf); if(attrStr.find(" never") != string::npos) {//extract name of the attribute, find its number, insert it into ignoreattrs int nameLen = (int)attrStr.find(" "); string attrName = attrStr.substr(0, nameLen); attrName = trimSpace(attrName); int neverAttrId = getAttrId(attrName); if (neverAttrId == -1) clog << "\nWARNING: trying to exclude \"" << attrName << "\" - this is not a valid feature\n\n"; else ignoreAttrs.insert(neverAttrId); } getLineExt(fattr, buf); } fattr.close(); int activeAttrN = attrN - (int)ignoreAttrs.size(); clog << attrN << " attributes\n" << activeAttrN << " active attributes\n\n"; if(!isSubset(nomAttrs, ignoreAttrs)) throw NOM_ACTIVE_ERR; //Read data if(string(trainFName).compare("") != 0) {//Read train set clog << "Reading the train set: \"" << trainFName << "\"\n"; fstream fin; fin.open(trainFName, ios_base::in); if(fin.fail()) throw OPEN_TRAIN_ERR; hasMV = false; getLineExt(fin, buf); int caseNo; for(caseNo = 0; fin.gcount(); caseNo++) {//read one line of data file, save class value in targets, attribute values in data if(doOut && ((caseNo + 1)% 100000 == 0)) cout << "\tRead " << caseNo + 1 << " lines..." << endl; floatv item; //single data point try { readData(buf, fin.gcount(), item, colN); } catch (TE_ERROR err) { cerr << "\nLine " << caseNo + 1 << "\n"; throw err; } trainTar.push_back(item[tarColNo]); if(weightColNo != -1) trainW.push_back(item[weightColNo]); item.erase(item.begin() + max(tarColNo, weightColNo)); if(weightColNo != -1) item.erase(item.begin() + min(tarColNo, weightColNo)); for(intset::iterator boolIt = boolAttrs.begin(); boolIt != boolAttrs.end(); boolIt++) if((item[*boolIt] != 0) && (item[*boolIt] != 1) && !wxisNaN(item[*boolIt])) throw ATTR_NOT_BOOL_ERR; train.push_back(item); getLineExt(fin, buf); } trainN = caseNo; trainV = trainN; if(trainN == 0) throw TRAIN_EMPTY_ERR; if(weightColNo != -1) { double trainSum = 0; trainR.resize(trainN); for(int itemNo = 0; itemNo < trainN; itemNo++) trainSum += trainW[itemNo]; double trCoef = trainN / trainSum; for(int itemNo = 0; itemNo < trainN; itemNo++) { trainW[itemNo] *= trCoef; trainR[itemNo] = (itemNo == 0) ? trainW[itemNo] : trainW[itemNo] + trainR[itemNo - 1]; } } double trainStD = getTarStD(TRAIN); clog << trainN << " points in the train set, std. dev. of " << tarName << " values = " << trainStD << "\n\n"; fin.close(); //initialize bootstrap (bag of data) bootstrap.resize(trainN); newBag(); } else //no train set trainN = 0; if(string(validFName).compare("") != 0) {//Read validation set clog << "Reading the validation set: \"" << validFName << "\"\n"; fstream fvalid; fvalid.open(validFName, ios_base::in); if(fvalid.fail()) throw OPEN_VALID_ERR; getLineExt(fvalid, buf); int caseNo; for(caseNo=0; fvalid.gcount(); caseNo++) {//read one line of data file, save response value in validtar, attributes values in valid if (doOut && ((caseNo + 1) % 100000 == 0)) cout << "\tRead " << caseNo + 1 << " lines..." << endl; floatv item; //single data point try { readData(buf, fvalid.gcount(), item, colN); } catch (TE_ERROR err) { cerr << "\nLine " << caseNo + 1 << "\n"; throw err; } validTar.push_back(item[tarColNo]); if(weightColNo != -1) validW.push_back(item[weightColNo]); item.erase(item.begin() + max(tarColNo, weightColNo)); if(weightColNo != -1) item.erase(item.begin() + min(tarColNo, weightColNo)); valid.push_back(item); getLineExt(fvalid, buf); } validN = caseNo; if(validN == 0) throw VALID_EMPTY_ERR; double validStD = getTarStD(VALID); clog << validN << " points in the validation set, std. dev. of " << tarName << " values = " << validStD << "\n\n"; fvalid.close(); } else //no validation set validN = 0; if(string(testFName).compare("") != 0) {//Read test set clog << "Reading the test set: \"" << testFName << "\"\n"; fstream ftest; ftest.open(testFName, ios_base::in); if(ftest.fail()) throw OPEN_TEST_ERR; getLineExt(ftest, buf); int caseNo; for(caseNo=0; ftest.gcount(); caseNo++) {//read one line of data file, save response value in testtar, attributes in test if (doOut && ((caseNo + 1) % 100000 == 0)) cout << "\tRead " << caseNo + 1 << " lines...\n"; floatv item; //single data point try { readData(buf, ftest.gcount(), item, colN); } catch (TE_ERROR err) { cerr << "\nLine " << caseNo + 1 << "\n"; throw err; } testTar.push_back(item[tarColNo]); if(weightColNo != -1) testW.push_back(item[weightColNo]); item.erase(item.begin() + max(tarColNo, weightColNo)); if(weightColNo != -1) item.erase(item.begin() + min(tarColNo, weightColNo)); test.push_back(item); getLineExt(ftest, buf); } testN = caseNo; double testStD = getTarStD(TEST); clog << testN << " points in the test set, std. dev. of " << tarName << " values = " << testStD << "\n\n"; ftest.close(); } else //no test set testN = 0; }
void inDOMView::AttributeChanged(nsIDocument* aDocument, dom::Element* aElement, PRInt32 aNameSpaceID, nsIAtom* aAttribute, PRInt32 aModType) { if (!mTree) { return; } if (!(mWhatToShow & nsIDOMNodeFilter::SHOW_ATTRIBUTE)) { return; } nsCOMPtr<nsIMutationObserver> kungFuDeathGrip(this); // get the dom attribute node, if there is any nsCOMPtr<nsIDOMNode> content(do_QueryInterface(aElement)); nsCOMPtr<nsIDOMElement> el(do_QueryInterface(aElement)); nsCOMPtr<nsIDOMAttr> domAttr; nsDependentAtomString attrStr(aAttribute); if (aNameSpaceID) { nsCOMPtr<nsINameSpaceManager> nsm = do_GetService(NS_NAMESPACEMANAGER_CONTRACTID); if (!nsm) { // we can't find out which attribute we want :( return; } nsString attrNS; nsresult rv = nsm->GetNameSpaceURI(aNameSpaceID, attrNS); if (NS_FAILED(rv)) { return; } (void)el->GetAttributeNodeNS(attrNS, attrStr, getter_AddRefs(domAttr)); } else { (void)el->GetAttributeNode(attrStr, getter_AddRefs(domAttr)); } if (aModType == nsIDOMMutationEvent::MODIFICATION) { // No fancy stuff here, just invalidate the changed row if (!domAttr) { return; } PRInt32 row = 0; NodeToRow(domAttr, &row); mTree->InvalidateRange(row, row); } else if (aModType == nsIDOMMutationEvent::ADDITION) { if (!domAttr) { return; } // get the number of attributes on this content node nsCOMPtr<nsIDOMNamedNodeMap> attrs; content->GetAttributes(getter_AddRefs(attrs)); PRUint32 attrCount; attrs->GetLength(&attrCount); inDOMViewNode* contentNode = nsnull; PRInt32 contentRow; PRInt32 attrRow; if (mRootNode == content && !(mWhatToShow & nsIDOMNodeFilter::SHOW_ELEMENT)) { // if this view has a root node but is not displaying it, // it is ok to act as if the changed attribute is on the root. attrRow = attrCount - 1; } else { if (NS_FAILED(NodeToRow(content, &contentRow))) { return; } RowToNode(contentRow, &contentNode); if (!contentNode->isOpen) { return; } attrRow = contentRow + attrCount; } inDOMViewNode* newNode = CreateNode(domAttr, contentNode); inDOMViewNode* insertNode = nsnull; RowToNode(attrRow, &insertNode); if (insertNode) { if (contentNode && insertNode->level <= contentNode->level) { RowToNode(attrRow-1, &insertNode); InsertLinkAfter(newNode, insertNode); } else InsertLinkBefore(newNode, insertNode); } InsertNode(newNode, attrRow); mTree->RowCountChanged(attrRow, 1); } else if (aModType == nsIDOMMutationEvent::REMOVAL) { // At this point, the attribute is already gone from the DOM, but is still represented // in our mRows array. Search through the content node's children for the corresponding // node and remove it. // get the row of the content node inDOMViewNode* contentNode = nsnull; PRInt32 contentRow; PRInt32 baseLevel; if (NS_SUCCEEDED(NodeToRow(content, &contentRow))) { RowToNode(contentRow, &contentNode); baseLevel = contentNode->level; } else { if (mRootNode == content) { contentRow = -1; baseLevel = -1; } else return; } // search for the attribute node that was removed inDOMViewNode* checkNode = nsnull; PRInt32 row = 0; for (row = contentRow+1; row < GetRowCount(); ++row) { checkNode = GetNodeAt(row); if (checkNode->level == baseLevel+1) { domAttr = do_QueryInterface(checkNode->node); if (domAttr) { nsAutoString attrName; domAttr->GetNodeName(attrName); if (attrName.Equals(attrStr)) { // we have found the row for the attribute that was removed RemoveLink(checkNode); RemoveNode(row); mTree->RowCountChanged(row, -1); break; } } } if (checkNode->level <= baseLevel) break; } } }