tiberius::mmap::Node * tiberius::mmap::SuffixTree::getChildNode(tiberius::mmap::Node *node, string &candidateTerm) { tiberius::mmap::Node *childNode = NULL; if (node->childListOffset > 0) { //tiberius::mmap::LinkedList *childList = node->childList; //cout << "node->childListOffset: " << node->childListOffset << endl; tiberius::mmap::LinkedList *childList = getLinkedList(node->childListOffset); while(childList->nodeOffset != 0) { childNode = getNode(childList->nodeOffset); string nodeTerm(childNode->term); //cout << "candidateTerm: " << candidateTerm << " : " << "nodeTerm: " << nodeTerm << endl; if (nodeTerm == candidateTerm) { break; }else{ childNode = NULL; } if (childList->nextOffset == 0) { break; } childList = getLinkedList(childList->nextOffset); } //cout << "returning childNode: " << childNode << endl; } return childNode; }
void tiberius::mmap::SuffixTree::persist(tiberius::mmap::WordAttributes *ptr) { long offset = this->globalVars->offset; //tiberius::mmap::Node *node = NULL; //map<string, tiberius::mmap::Node *> *nodeMap = NULL; tiberius::mmap::WordAttributes *treePtr = NULL; tiberius::mmap::Node *parent = NULL; if (!ptr) { treePtr = this->tree; parent = this->getRoot(); }else{ treePtr = ptr; parent = this->getNode(ptr->nodeOffset); } for (map<string, tiberius::mmap::WordAttributes *>::iterator it=treePtr->children.begin(); it!=treePtr->children.end(); it++) { string term = it->first; tiberius::mmap::WordAttributes *wa = it->second; // persist the children. tiberius::mmap::LinkedList *childList = NULL; //if (!node->childList) { if (!parent->childListOffset) { tiberius::mmap::LinkedList *newChild = (tiberius::mmap::LinkedList *) this->memoryFile+offset; //newChild->next = NULL; newChild->nextOffset = 0; parent->childListOffset = offset; parent->lastChildOffset = offset; childList = newChild; }else{ //childList = node->lastChild; childList = getLinkedList(parent->lastChildOffset); tiberius::mmap::LinkedList *newChild = (tiberius::mmap::LinkedList *) this->memoryFile+offset; // init next to NULL; //newChild->next = NULL; newChild->nextOffset = 0; //childList->next = newChild; childList->nextOffset = offset; childList = newChild; parent->lastChildOffset = offset; childList = newChild; } offset = offset + sizeof(tiberius::mmap::LinkedList); tiberius::mmap::Node *child = (tiberius::mmap::Node *) this->memoryFile+offset; childList->nodeOffset = offset; child->childListOffset = 0; child->lastChildOffset = 0; wa->nodeOffset = offset; offset = offset + sizeof(tiberius::mmap::Node); strcpy(child->term, term.c_str()); strcpy(child->pos, wa->pos.c_str()); child->level = wa->level; child->frequencyCount = wa->frequencyCount; child->docCount = wa->docCount; //childList->node = child; //node = node->childList->node; //node = this->getNode(node->childList->nodeOffset); //node = this->getNode(this->getLinkedList(node->childListOffset)->nodeOffset); //cout << "Adding term: " << term << " to nodeMap ... " << endl; //(*currentNodeMap)[term] = node; //(*currentNodeMap)[term] = child; } //delete nodeMap; //nodeMap = currentNodeMap; this->globalVars->offset = offset; for (map<string, tiberius::mmap::WordAttributes *>::iterator it=treePtr->children.begin(); it!=treePtr->children.end(); it++) { string term = it->first; tiberius::mmap::WordAttributes *wa = it->second; this->persist(wa); } // write the total number of documents processed. this->globalVars->docCount = this->docCount; /* long offset = this->globalVars->offset; tiberius::mmap::Node *node = NULL; cout << "about to write to file ..." << endl; map<string, tiberius::mmap::Node *> *nodeMap = NULL; vector<map<string, tiberius::mmap::Node *>* > nodeMapPerLevel; for (unsigned int i=0; i<this->levels.size(); i++) { if (!nodeMap) { nodeMap = new map<string, tiberius::mmap::Node *>(); (*nodeMap)[string("__ROOT__")] = this->getRoot(); } map<string, tiberius::mmap::Node *> *currentNodeMap = new map<string, tiberius::mmap::Node *>(); cout << "Writing level " << i << endl; map<string, tiberius::mmap::WordAttributes *> *level = this->levels[i]; for (map<string, tiberius::mmap::WordAttributes *>::iterator it=level->begin(); it!=level->end(); it++) { string term = it->first; tiberius::mmap::WordAttributes *wa = it->second; node = (*nodeMap)[wa->parent]; //cout << node->term << " " << term << endl; tiberius::mmap::LinkedList *childList = NULL; //if (!node->childList) { if (!node->childListOffset) { tiberius::mmap::LinkedList *newChild = (tiberius::mmap::LinkedList *) this->memoryFile+offset; //newChild->next = NULL; newChild->nextOffset = 0; if (node) { //node->childList = newChild; //node->lastChild = newChild; node->childListOffset = offset; node->lastChildOffset = offset; } childList = newChild; }else{ //childList = node->lastChild; childList = getLinkedList(node->lastChildOffset); tiberius::mmap::LinkedList *newChild = (tiberius::mmap::LinkedList *) this->memoryFile+offset; // init next to NULL; //newChild->next = NULL; newChild->nextOffset = 0; //childList->next = newChild; childList->nextOffset = offset; childList = newChild; if (node) { //node->lastChild = childList; node->lastChildOffset = offset; } childList = newChild; } offset = offset + sizeof(tiberius::mmap::LinkedList); tiberius::mmap::Node *child = (tiberius::mmap::Node *) this->memoryFile+offset; childList->nodeOffset = offset; offset = offset + sizeof(tiberius::mmap::Node); strcpy(child->term, term.c_str()); strcpy(child->pos, wa->pos.c_str()); child->level = i; child->frequencyCount = wa->frequencyCount; child->docCount = wa->docCount; //childList->node = child; //node = node->childList->node; //node = this->getNode(node->childList->nodeOffset); //node = this->getNode(this->getLinkedList(node->childListOffset)->nodeOffset); //cout << "Adding term: " << term << " to nodeMap ... " << endl; //(*currentNodeMap)[term] = node; (*currentNodeMap)[term] = child; } delete nodeMap; nodeMap = currentNodeMap; } this->globalVars->offset = offset; this->globalVars->docCount = this->docCount; */ // -- /* long offset = this->globalVars->offset; set<tiberius::mmap::Node *> nodes; for (list<sentence>::iterator it=sentences.begin(); it!= sentences.end(); it++) { vector<word> sent = it->get_words(); //this->globalVars->docCount++; for (unsigned int k=0; k<sent.size() && k<10; k++) { tiberius::mmap::Node *node = this->globalVars->root; if (nodes.find(node) == nodes.end()) { nodes.insert(node); node->docCount++; } for (unsigned int i=k; i<sent.size(); i++) { tiberius::mmap::LinkedList *childList = NULL; if (!node->childList) { //node->childList = (tiberius::mmap::LinkedList *) this->memoryFile+offset; //node->childList->next = NULL; //childList = node->childList; tiberius::mmap::LinkedList *newChild = (tiberius::mmap::LinkedList *) this->memoryFile+offset; // init next to NULL; newChild->next = NULL; node->childList = newChild; node->lastChild = newChild; childList = newChild; }else{ //childList = node->childList; //while(childList->next) { // childList = childList->next; //} childList = node->lastChild; tiberius::mmap::LinkedList *newChild = (tiberius::mmap::LinkedList *) this->memoryFile+offset; // init next to NULL; newChild->next = NULL; childList->next = newChild; childList = newChild; node->lastChild = childList; } offset = offset + sizeof(tiberius::mmap::LinkedList); tiberius::mmap::Node *child = (tiberius::mmap::Node *) this->memoryFile+offset; offset = offset + sizeof(tiberius::mmap::Node); strcpy(child->term, sent[i].get_form().c_str()); child->level = i+1; child->frequencyCount++; childList->node = child; node = node->childList->node; //node = node->children; // add to offset the size of a linkedlist element //this->globalVars->offset = this->globalVars->offset+this->llsize; // cout << sent[i].get_form() << " "; } } //sents.push_back(sent); } this->globalVars->offset = offset; */ }
void DLR() { pSymbolNode symbol_start = ll[0]; pRule rule_start = gRules; ll[2] = ll[0]; for (pSymbolNode i = symbol_start; i; i = i->next) { // Delete the indirect left recursion for (pSymbolNode j = symbol_start; j != i; j = j->next) { for (pRule ri = rule_start; ri; ri = ri->next) { // Rules that in the form of i ::= j... if (ri->addr->serial == i->serial) { if (ri->addr->next->serial == j->serial) { int changed = 0; // j is going to be substituted for (pRule rj = rule_start; rj; rj = rj->next) { // Rules that in the form of j ::= ... if (rj->addr->serial == j->serial) { char* buf; pRuleNode origanal = ri->addr->next; changed = 1; buf = Rule2String(rj->addr->next); ri->addr->next = (pRuleNode)getLinkedList(buf, origanal->next, newRuleNode); free(buf); buf = Rule2String(ri->addr); gRules = newRule((pRuleNode)getLinkedList(buf, 0, newRuleNode), gRules); free(buf); buf = NULL; for (pRuleNode prn = ri->addr->next; prn != origanal->next;) { pRuleNode temp = prn; prn = prn->next; free(temp); } ri->addr->next = origanal; } } // Delete the rule rj if (changed) { pRule preRi = gRules; while (preRi->next != ri && preRi != ri) preRi = preRi->next; for (pRuleNode pr = ri->addr; pr;) { pRuleNode prn = pr; pr = pr->next; free(prn->symbol); free(prn); } preRi->next = ri->next; free(ri); ri = preRi; } } } } } rule_start = gRules; // Delete the direct left recursion for (pRule r = rule_start; r; r = r->next) { // Rules that in the form of i ::= i... if (r->addr->serial == i->serial && r->addr->serial == r->addr->next->serial) { char newSymbol[BUFSIZ], *buf; size_t sLen, rLen; pRule preRi; int deleted = 0; // Fine all rules that indicate a direct left recursion rule of i for (pRule ri = r; ri; ri = ri->next) { if (ri->addr->serial == i->serial && ri->addr->serial == ri->addr->next->serial) { // Add symbol i' strcpy(newSymbol, i->symbol); sLen = strlen(i->symbol); newSymbol[sLen++] = '\''; newSymbol[sLen] = 0; if (strcmp(ll[0]->symbol, newSymbol)) { ll[0] = newSymbolNode(gNonTerSerial, newSymbol, 0, 0, ll[0]); gNonTerSerial += 2; } // Add rule i ::= 0 if (!deleted) { newSymbol[sLen] = 0x20; newSymbol[sLen + 1] = '0'; newSymbol[sLen + 2] = 0; gRules = newRule((pRuleNode)getLinkedList(newSymbol, 0, newRuleNode), gRules); } // Add rule i' ::= ...i' buf = Rule2String(ri->addr->next->next); newSymbol[sLen++] = 0x20; memcpy(newSymbol + sLen, buf, strlen(buf)); sLen += strlen(buf); free(buf); newSymbol[sLen++] = 0x20; rLen = sLen; sLen = 0; while (*(newSymbol + sLen) != 0x20) sLen++; newSymbol[sLen] = 0; strcpy(newSymbol + rLen, newSymbol); rLen += sLen; newSymbol[sLen] = 0x20; newSymbol[rLen] = 0; gRules = newRule((pRuleNode)getLinkedList(newSymbol, 0, newRuleNode), gRules); newSymbol[sLen] = 0; // Add rules i ::= a i' for all a in rules i ::= a starting without i for (pRule rr = rule_start; rr; rr = rr->next) { // If rules i ::= a i' for all a in rules i ::= a starting without i has been deleted if (deleted) break; // Rule in the form of i ::= a starting without i if (rr->addr->serial == i->serial && rr->addr->serial != rr->addr->next->serial) { char newR[BUFSIZ]; buf = Rule2String(rr->addr); rLen = strlen(buf); strcpy(newR, buf); free(buf); newR[rLen++] = 0x20; strcpy(newR + rLen, newSymbol); gRules = newRule((pRuleNode)getLinkedList(newR, 0, newRuleNode), gRules); // Delete the rule rr preRi = gRules; while (preRi->next != rr) preRi = preRi->next; for (pRuleNode pr = rr->addr; pr;) { pRuleNode prn = pr; pr = pr->next; free(prn->symbol); free(prn); } preRi->next = rr->next; if (rr == ri) ri = preRi; free(rr); rr = preRi; } } deleted = 1; } } // Delete all rules that indicates a direct left recursion of i for (pRule ri = r; ri; ri = ri->next) { if (ri->addr->serial == i->serial) { preRi = gRules; while (preRi->next != ri) preRi = preRi->next; for (pRuleNode pr = ri->addr; pr;) { pRuleNode prn = pr; pr = pr->next; free(prn->symbol); free(prn); } preRi->next = ri->next; if (ri == r) r = preRi; free(ri); ri = preRi; } } rule_start = gRules; } } } }