예제 #1
0
tiberius::mmap::Node * tiberius::mmap::SuffixTree::getChildNode(tiberius::mmap::Node *node, string &candidateTerm) {
  tiberius::mmap::Node *childNode = NULL;
  if (node->childListOffset > 0) {
    //tiberius::mmap::LinkedList *childList = node->childList;
    //cout << "node->childListOffset: " << node->childListOffset << endl;
    tiberius::mmap::LinkedList *childList = getLinkedList(node->childListOffset);
    while(childList->nodeOffset != 0) {
      childNode = getNode(childList->nodeOffset);
      string nodeTerm(childNode->term);
      //cout << "candidateTerm: " << candidateTerm << " : " << "nodeTerm: " << nodeTerm << endl;
      if (nodeTerm == candidateTerm) {
	break;
      }else{
	childNode = NULL;
      }
      if (childList->nextOffset == 0) {
	break;
      }
      childList = getLinkedList(childList->nextOffset);
    }
    //cout << "returning childNode: " << childNode << endl;
  }
  return childNode;
}
예제 #2
0
void tiberius::mmap::SuffixTree::persist(tiberius::mmap::WordAttributes *ptr) {
  long offset = this->globalVars->offset;  
  //tiberius::mmap::Node *node = NULL;
  //map<string, tiberius::mmap::Node *> *nodeMap = NULL;

  tiberius::mmap::WordAttributes *treePtr = NULL;
  tiberius::mmap::Node *parent = NULL;
  if (!ptr) {
    treePtr = this->tree;
    parent = this->getRoot();
  }else{
    treePtr = ptr;
    parent = this->getNode(ptr->nodeOffset);
  }
  for (map<string, tiberius::mmap::WordAttributes *>::iterator it=treePtr->children.begin(); it!=treePtr->children.end(); it++) {
    string term = it->first;
    tiberius::mmap::WordAttributes *wa = it->second;
    // persist the children.
    tiberius::mmap::LinkedList *childList = NULL;
    //if (!node->childList) {
    if (!parent->childListOffset) {
      tiberius::mmap::LinkedList *newChild = (tiberius::mmap::LinkedList *) this->memoryFile+offset;
      //newChild->next = NULL;
      newChild->nextOffset = 0;
      parent->childListOffset = offset;
      parent->lastChildOffset = offset;
      childList = newChild;
    }else{
      //childList = node->lastChild;
      childList = getLinkedList(parent->lastChildOffset);
      tiberius::mmap::LinkedList *newChild = (tiberius::mmap::LinkedList *) this->memoryFile+offset;
      // init next to NULL;
      //newChild->next = NULL;
      newChild->nextOffset = 0;
      //childList->next = newChild;
      childList->nextOffset = offset;
      childList = newChild;
      parent->lastChildOffset = offset;
      childList = newChild;
    }
    offset = offset + sizeof(tiberius::mmap::LinkedList);
    tiberius::mmap::Node *child = (tiberius::mmap::Node *) this->memoryFile+offset;
    childList->nodeOffset = offset;
    child->childListOffset = 0;
    child->lastChildOffset = 0;
    wa->nodeOffset = offset;
    offset = offset + sizeof(tiberius::mmap::Node);
    strcpy(child->term, term.c_str());
    strcpy(child->pos, wa->pos.c_str());
    child->level = wa->level;
    child->frequencyCount = wa->frequencyCount;
    child->docCount = wa->docCount;
    
    //childList->node = child;
    //node = node->childList->node;
    //node = this->getNode(node->childList->nodeOffset);
    //node = this->getNode(this->getLinkedList(node->childListOffset)->nodeOffset);
    //cout << "Adding term: " << term << " to nodeMap ... " << endl;
    //(*currentNodeMap)[term] = node;
    //(*currentNodeMap)[term] = child;
  }
  //delete nodeMap;
  //nodeMap = currentNodeMap;
  this->globalVars->offset = offset;
  for (map<string, tiberius::mmap::WordAttributes *>::iterator it=treePtr->children.begin(); it!=treePtr->children.end(); it++) {
    string term = it->first;
    tiberius::mmap::WordAttributes *wa = it->second;
    this->persist(wa);
  }
  // write the total number of documents processed.
  this->globalVars->docCount = this->docCount;
  
  /*
  long offset = this->globalVars->offset;  
  tiberius::mmap::Node *node = NULL;
  cout << "about to write to file ..." << endl;
  map<string, tiberius::mmap::Node *> *nodeMap = NULL;
  
  vector<map<string, tiberius::mmap::Node *>* > nodeMapPerLevel;
  for (unsigned int i=0; i<this->levels.size(); i++) {
    if (!nodeMap) {
      nodeMap = new map<string, tiberius::mmap::Node *>();      
      (*nodeMap)[string("__ROOT__")] = this->getRoot();
    }
    map<string, tiberius::mmap::Node *> *currentNodeMap = new map<string, tiberius::mmap::Node *>();
    cout << "Writing level " << i << endl;
    map<string, tiberius::mmap::WordAttributes *> *level = this->levels[i];
    for (map<string, tiberius::mmap::WordAttributes *>::iterator it=level->begin(); it!=level->end(); it++) {
      string term = it->first;
      tiberius::mmap::WordAttributes *wa = it->second;
      node = (*nodeMap)[wa->parent];
      //cout << node->term << " " << term << endl;
      tiberius::mmap::LinkedList *childList = NULL;
      //if (!node->childList) {
      if (!node->childListOffset) {
	tiberius::mmap::LinkedList *newChild = (tiberius::mmap::LinkedList *) this->memoryFile+offset;
	//newChild->next = NULL;
	newChild->nextOffset = 0;
	if (node) {
	  //node->childList = newChild;
	  //node->lastChild = newChild;
	  node->childListOffset = offset;
	  node->lastChildOffset = offset;
	}
	childList = newChild;
      }else{
	//childList = node->lastChild;
	childList = getLinkedList(node->lastChildOffset);
	tiberius::mmap::LinkedList *newChild = (tiberius::mmap::LinkedList *) this->memoryFile+offset;
	// init next to NULL;
	//newChild->next = NULL;
	newChild->nextOffset = 0;
	//childList->next = newChild;
	childList->nextOffset = offset;
	childList = newChild;
	if (node) {
	  //node->lastChild = childList;
	  node->lastChildOffset = offset;
	}
	childList = newChild;
      }
      offset = offset + sizeof(tiberius::mmap::LinkedList);
      tiberius::mmap::Node *child = (tiberius::mmap::Node *) this->memoryFile+offset;
      childList->nodeOffset = offset;
      offset = offset + sizeof(tiberius::mmap::Node);
      strcpy(child->term, term.c_str());
      strcpy(child->pos, wa->pos.c_str());
      child->level = i;
      child->frequencyCount = wa->frequencyCount;
      child->docCount = wa->docCount;
      
      //childList->node = child;
      //node = node->childList->node;
      //node = this->getNode(node->childList->nodeOffset);
      //node = this->getNode(this->getLinkedList(node->childListOffset)->nodeOffset);
      //cout << "Adding term: " << term << " to nodeMap ... " << endl;
      //(*currentNodeMap)[term] = node;
      (*currentNodeMap)[term] = child;
    }
    delete nodeMap;
    nodeMap = currentNodeMap;
  }
  this->globalVars->offset = offset;
  this->globalVars->docCount = this->docCount;
  */
  // --
  /*
  long offset = this->globalVars->offset;
  set<tiberius::mmap::Node *> nodes;
  for (list<sentence>::iterator it=sentences.begin(); it!= sentences.end(); it++) {
    vector<word> sent = it->get_words();
    //this->globalVars->docCount++;
    for (unsigned int k=0; k<sent.size() && k<10; k++) {
      tiberius::mmap::Node *node = this->globalVars->root;
      if (nodes.find(node) == nodes.end()) {
	nodes.insert(node);
	node->docCount++;
      }
      for (unsigned int i=k; i<sent.size(); i++) {
      tiberius::mmap::LinkedList *childList = NULL;
      if (!node->childList) {
	//node->childList = (tiberius::mmap::LinkedList *) this->memoryFile+offset;
	//node->childList->next = NULL;
	//childList = node->childList;
	tiberius::mmap::LinkedList *newChild = (tiberius::mmap::LinkedList *) this->memoryFile+offset;
	// init next to NULL;
	newChild->next = NULL;
	node->childList = newChild;
	node->lastChild = newChild;
	childList = newChild;
      }else{
	//childList = node->childList;
	//while(childList->next) {
	//  childList = childList->next;
	//}
	childList = node->lastChild;
	tiberius::mmap::LinkedList *newChild = (tiberius::mmap::LinkedList *) this->memoryFile+offset;
	// init next to NULL;
	newChild->next = NULL;
	childList->next = newChild;
	childList = newChild;
	node->lastChild = childList;
      }
      offset = offset + sizeof(tiberius::mmap::LinkedList);
      tiberius::mmap::Node *child = (tiberius::mmap::Node *) this->memoryFile+offset;
      offset = offset + sizeof(tiberius::mmap::Node);
      strcpy(child->term, sent[i].get_form().c_str());
      child->level = i+1;
      child->frequencyCount++;
      childList->node = child;
      node = node->childList->node;
      //node = node->children;
      // add to offset the size of a linkedlist element
      //this->globalVars->offset = this->globalVars->offset+this->llsize;
      //      cout << sent[i].get_form() << " ";
    }
    }
    //sents.push_back(sent);
  }
  this->globalVars->offset = offset;
*/
}
예제 #3
0
파일: PreLL1.c 프로젝트: HsuJv/Note
void DLR() {
	pSymbolNode symbol_start = ll[0];
	pRule rule_start = gRules;
	ll[2] = ll[0];

	for (pSymbolNode i = symbol_start; i; i = i->next) {
		// Delete the indirect left recursion
		for (pSymbolNode j = symbol_start; j != i; j = j->next) {
			for (pRule ri = rule_start; ri; ri = ri->next) {
				// Rules that in the form of i ::= j...
				if (ri->addr->serial == i->serial) {
					if (ri->addr->next->serial == j->serial) {
						int changed = 0;

						// j is going to be substituted
						for (pRule rj = rule_start; rj; rj = rj->next) {
							// Rules that in the form of j ::= ...
							if (rj->addr->serial == j->serial) {
								char* buf;
								pRuleNode origanal = ri->addr->next;

								changed = 1;

								buf = Rule2String(rj->addr->next);
								ri->addr->next = (pRuleNode)getLinkedList(buf, origanal->next, newRuleNode);
								free(buf);
								buf = Rule2String(ri->addr);
								gRules = newRule((pRuleNode)getLinkedList(buf, 0, newRuleNode), gRules);
								free(buf);
								buf = NULL;

								for (pRuleNode prn = ri->addr->next; prn != origanal->next;) {
									pRuleNode temp = prn;
									prn = prn->next;
									free(temp);
								}

								ri->addr->next = origanal;
							}
						}

						// Delete the rule rj
						if (changed) {
							pRule preRi = gRules;

							while (preRi->next != ri && preRi != ri) preRi = preRi->next;
							for (pRuleNode pr = ri->addr; pr;) {
								pRuleNode prn = pr;
								pr = pr->next;
								free(prn->symbol);
								free(prn);
							}
							preRi->next = ri->next;
							free(ri);
							ri = preRi;
						}
					}
				}
			}
		}

		rule_start = gRules;

		// Delete the direct left recursion
		for (pRule r = rule_start; r; r = r->next) {
			// Rules that in the form of i ::= i...
			if (r->addr->serial == i->serial && r->addr->serial == r->addr->next->serial) {
				char newSymbol[BUFSIZ], *buf;
				size_t sLen, rLen;
				pRule preRi;
				int deleted = 0;

				// Fine all rules that indicate a direct left recursion rule of i
				for (pRule ri = r; ri; ri = ri->next) {
					if (ri->addr->serial == i->serial && ri->addr->serial == ri->addr->next->serial) {
						// Add symbol i'
						strcpy(newSymbol, i->symbol);
						sLen = strlen(i->symbol);
						newSymbol[sLen++] = '\'';
						newSymbol[sLen] = 0;
						if (strcmp(ll[0]->symbol, newSymbol)) {
							ll[0] = newSymbolNode(gNonTerSerial, newSymbol, 0, 0, ll[0]);
							gNonTerSerial += 2;
						}					

						// Add rule i ::= 0
						if (!deleted) {
							newSymbol[sLen] = 0x20;
							newSymbol[sLen + 1] = '0';
							newSymbol[sLen + 2] = 0;
							gRules = newRule((pRuleNode)getLinkedList(newSymbol, 0, newRuleNode), gRules);
						}

						// Add rule i' ::= ...i'
						buf = Rule2String(ri->addr->next->next);
						newSymbol[sLen++] = 0x20;
						memcpy(newSymbol + sLen, buf, strlen(buf));
						sLen += strlen(buf);
						free(buf);
						newSymbol[sLen++] = 0x20;
						rLen = sLen;
						sLen = 0;
						while (*(newSymbol + sLen) != 0x20) sLen++;
						newSymbol[sLen] = 0;
						strcpy(newSymbol + rLen, newSymbol);
						rLen += sLen;
						newSymbol[sLen] = 0x20;
						newSymbol[rLen] = 0;
						gRules = newRule((pRuleNode)getLinkedList(newSymbol, 0, newRuleNode), gRules);

						newSymbol[sLen] = 0;

						// Add rules i ::= a i' for all a in rules i ::= a starting without i
						for (pRule rr = rule_start; rr; rr = rr->next) {
							// If rules i ::= a i' for all a in rules i ::= a starting without i has been deleted
							if (deleted) break;

							// Rule in the form of i ::= a starting without i
							if (rr->addr->serial == i->serial && rr->addr->serial != rr->addr->next->serial) {
								char newR[BUFSIZ];

								buf = Rule2String(rr->addr);
								rLen = strlen(buf);
								strcpy(newR, buf);
								free(buf);
								newR[rLen++] = 0x20;
								strcpy(newR + rLen, newSymbol);
								gRules = newRule((pRuleNode)getLinkedList(newR, 0, newRuleNode), gRules);

								// Delete the rule rr
								preRi = gRules;
								while (preRi->next != rr) preRi = preRi->next;
								for (pRuleNode pr = rr->addr; pr;) {
									pRuleNode prn = pr;
									pr = pr->next;
									free(prn->symbol);
									free(prn);
								}
								preRi->next = rr->next;
								if (rr == ri) ri = preRi;
								free(rr);
								rr = preRi;
							}
						}

						deleted = 1;
					}
				}

				// Delete all rules that indicates a direct left recursion of i
				for (pRule ri = r; ri; ri = ri->next) {
					if (ri->addr->serial == i->serial) {
						preRi = gRules;
						while (preRi->next != ri) preRi = preRi->next;
						for (pRuleNode pr = ri->addr; pr;) {
							pRuleNode prn = pr;
							pr = pr->next;
							free(prn->symbol);
							free(prn);
						}
						preRi->next = ri->next;
						if (ri == r) r = preRi;
						free(ri);
						ri = preRi;
					}
				}

				rule_start = gRules;
			}
		}
	}
}