void IndexContentTestCase::testIndexContent_DL() { Index* pIndex; IndexReaderPtr pReader; const Term* pTerm; TermIteratorPtr pTermIter; int docCount = 0; int termCount = 0; uint32_t i; uint32_t indexTermId; string fileName; //Check posting list Path indexPath = TestHelper::getTestDataPath(); indexPath.makeDirectory(); indexPath.pushDirectory(_T("test_dlindex")); pIndex = new Index(indexPath.toString().c_str(), Index::READ, NULL); auto_ptr<Index> indexPtr(pIndex); pReader = pIndex->acquireReader(); TermReaderPtr pTermReader = pReader->termReader(); pTermIter = pTermReader->termIterator("BODY"); StoredFieldsReaderPtr pDocReader = pReader->createStoredFieldsReader(); //Iterator all terms while(pTermIter->next()) { pTerm = pTermIter->term(); CPPUNIT_ASSERT(pTermReader->seek(pTerm)); indexTermId = (pTerm->cast<int32_t>())->getValue(); docCount = 0; TermPostingIteratorPtr pTermDocFreqs = pTermReader->termPostings(); while(pTermDocFreqs->nextDoc()) { DocumentPtr pDoc = pDocReader->document(pTermDocFreqs->doc()); docCount++; // 获取文件路径 fileName.assign(pDoc->getField("PATH")->getValue().c_str()); TermList* pTermIdList = m_pDocScanner->getTermListOfFile(fileName); CPPUNIT_ASSERT(pTermIdList != NULL); for(i = 0, termCount = 0; i < pTermIdList->getSize(); i++) { if(indexTermId == pTermIdList->getValue(i)) { termCount++; } } CPPUNIT_ASSERT_EQUAL((tf_t)termCount, pTermDocFreqs->freq()); }//end while nextDoc() CPPUNIT_ASSERT_EQUAL((df_t)docCount, pTermDocFreqs->getDocFreq()); } CPPUNIT_ASSERT(m_pDocScanner->getTotalTermCount() == pReader->getNumTerms()); }
const indri::index::TermList* indri::index::MemoryIndex::termList( lemur::api::DOCID_T documentID ) { int documentIndex = documentID - documentBase(); if( documentIndex < 0 || documentIndex >= (int)_documentData.size() ) return 0; const DocumentData& data = _documentData[documentIndex]; UINT64 documentOffset = data.offset; indri::utility::Buffer* documentBuffer = 0; std::list<indri::utility::Buffer*>::const_iterator iter; for( iter = _termLists.begin(); iter != _termLists.end(); ++iter ) { if( documentOffset < (*iter)->position() ) { documentBuffer = (*iter); break; } documentOffset -= (*iter)->position(); } assert( documentBuffer ); TermList* list = new TermList(); list->read( documentBuffer->front() + documentOffset, data.byteLength ); return list; }
void Term::inputsToList(TermList& out) { out.resize(numInputs()); for (int i=0; i < numInputs(); i++) out.setAt(i, input(i)); }
int StemAnalyzer::analyze_index( const TermList & input, TermList & output, unsigned char retFlag ) { string inputstr, stem; TermList::const_iterator it; Term newTerm; TermList::iterator term_it; for( it = input.begin(); it != input.end(); it++ ) { // if( retFlag_idx_ & ANALYZE_PRIME_ ) // { // term_it = output.insert( output.end(), *it ); // } // if( (retFlag_idx_ & ANALYZE_SECOND_) == 0 ) // continue; // // // it->text_.convertString( inputstr, UString::CP949 ); // stemmer_.stem( inputstr, stem ); // // if( !(retFlag_idx_ & ANALYZE_PRIME_) || inputstr != stem ) // { // term_it = output.insert( output.end(), *it ); // term_it->text_.assign( stem, UString::CP949 ); // } } return 0; }
void if_block_create_input_placeholders_for_outer_pointers(Term* ifCall) { Branch* contents = nested_contents(ifCall); TermList outerTerms; // Find outer pointers across each case for (CaseIterator it(contents); it.unfinished(); it.advance()) { list_outer_pointers(nested_contents(it.current()), &outerTerms); } ca_assert(ifCall->numInputs() == 0); // Create input placeholders and add inputs for all outer pointers for (int i=0; i < outerTerms.length(); i++) { Term* outer = outerTerms[i]; set_input(ifCall, i, outer); Term* placeholder = append_input_placeholder(nested_contents(ifCall)); rename(placeholder, outer->name); // Go through each case and repoint to this new placeholder for (CaseIterator it(contents); it.unfinished(); it.advance()) { remap_pointers_quick(nested_contents(it.current()), outer, placeholder); } } }
void formulaTest() { TermList *list = new TermList(); list->addTerm(new Term(Term::VAR, "x")); list->addTerm(new Term(Term::VAR, NULL)); list->addTerm(new Term(Term::CONS, "A")); Formula *f = new Formula(new Predicate("P", list)); Formula *g = new Formula(f); Formula *q = new Formula(f, g, '&'); Formula *r = new Formula(new Term(Term::VAR, NULL), q, Formula::UNIV); r->print(); Term *x = new Term(Term::VAR, "xy"); Term *y = new Term(Term::VAR, "x"); TermList *list2 = new TermList(); list2->addTerm(x); list2->addTerm(y); Term *f2 = new Term("f10", list2); Term *g2 = new Term("f10", list2); f2->print(); puts(""); g2->print(); puts(""); if (!(*f2 != *g2)) { printf("yes! equal!"); } }
/* =================================================== End of Prover =================================================== */ void testTerms() { int n; int m; int type; char name[10]; for (int i = 0; i < 5; i++) { printf("type and Name: "); scanf("%d %s", &type, name); if(type == Term::FUNC) { TermList *list = new TermList(); Term term(name, list); printf("place: "); scanf("%d", &m); for (int j = 0; j < m; ++j) { printf("%dth term: ",j); scanf("%s", name); Term *term = new Term(Term::VAR, name); list->addTerm(term); } term.print(); } else { Term term(type, name); term.print(); } } }
int StemAnalyzer::analyze_search( const TermList & input, TermList & output, unsigned char retFlag ) { string inputstr, stem; TermList::const_iterator it; //unsigned char level = 0; Term newTerm; TermList::iterator term_it; for( it = input.begin(); it != input.end(); it++ ) { // if( retFlag_sch_ & ANALYZE_PRIME_ ) // { // term_it = output.insert( output.end(), *it ); // term_it->stats_ = makeStatBit( Term::OR_BIT, level++ ); // } // if( (retFlag_sch_ & ANALYZE_SECOND_) == 0 ) // continue; // // // it->text_.convertString( inputstr, UString::CP949 ); // stemmer_.stem( inputstr, stem ); // // if( !(retFlag_sch_ & ANALYZE_PRIME_) || inputstr != stem ) // { // term_it = output.insert( output.end(), newTerm ); // // term_it->text_.assign( stem, UString::CP949 ); // term_it->stats_ = makeStatBit( Term::AND_BIT, level ); // } } return 0; }
bool TermList::operator==(TermList &list) { if (this->list->size() != list.getList()->size()) return false; for (int i = 0; i < this->list->size(); i++) { if (*(this->list->at(i)) != *(list.getList()->at(i))) return false; } return true; }
TermList::TermList(TermList &tList) { this->list = new vector<Term*>(); for (int i = 0; i < tList.getList()->size(); i++) { Term *t = new Term(*(tList.getList()->at(i))); this->list->push_back(t); } }
void CommonLanguageAnalyzer::analyzeSynonym(TermList& outList, size_t n) { static UString SPACE(" ", izenelib::util::UString::UTF_8); TermList syOutList; size_t wordCount = outList.size(); for (size_t i = 0; i < wordCount; i++) { // cout << "[off]" <<outList[i].wordOffset_<<" [level]"<<outList[i].getLevel() <<" [andor]" <<(unsigned int)(outList[i].getAndOrBit()) // << " "<< outList[i].textString()<<endl; // find synonym for word(s) for (size_t len = 1; (len <= n) && (i+len <= wordCount) ; len++) { // with space bool ret = false; unsigned int subLevel = 0; UString combine; if (len > 1) { for (size_t j = 0; j < len-1; j++) { combine.append(outList[i+j].text_); combine.append(SPACE); } combine.append(outList[i+len-1].text_); ret = getSynonym(combine, outList[i].wordOffset_, Term::OR, outList[i].getLevel(), syOutList, subLevel); } // without space if (!ret) { combine.clear(); for (size_t j = 0; j < len; j++) combine.append(outList[i+j].text_); ret = getSynonym(combine, outList[i].wordOffset_, Term::OR, outList[i].getLevel(), syOutList, subLevel); } // adjust if (ret) { outList[i].setStats(outList[i].getAndOrBit(), outList[i].getLevel()+subLevel); for (size_t j = 1; j < len; j++) { outList[i+j].wordOffset_ = outList[i].wordOffset_; outList[i+j].setStats(outList[i+j].getAndOrBit(), outList[i].getLevel()); } break; } } syOutList.push_back(outList[i]); } outList.swap(syOutList); }
Term* find_accessor_head_term(Term* accessor) { TermList chain; trace_accessor_chain(accessor, &chain); if (chain.length() == 0) return NULL; return chain[0]; }
Term* block_add_pack_state(Block* block) { TermList inputs; list_inputs_to_pack_state(block, block->length(), &inputs); // Don't create anything if there are no state outputs if (inputs.length() == 0) return NULL; return apply(block, FUNCS.pack_state, inputs); }
Term* branch_add_pack_state(Branch* branch) { TermList inputs; get_list_of_state_outputs(branch, branch->length(), &inputs); // Don't create anything if there are no state outputs if (inputs.length() == 0) return NULL; return apply(branch, FUNCS.pack_state, inputs); }
// 28/08/2002 Torrevieja void Atom::rectify (Substitution& subst, Var& last, VarList& freeVars) { TRACER ("Atom::rectify"); TermList ts (args()); ts.rectify (subst, last, freeVars); if (ts == args()) { // space-economic version return; } Atom a (functor(),ts); *this = a; } // Atom::rectify
// 28/08/2002 Torrevieja void Atom::apply ( const Substitution& subst ) { TRACER ("Atom::apply"); TermList ts (args()); ts.apply (subst); if (ts == args()) { // space-economic version return; } Atom a (functor(),ts); *this = a; } // Atom::apply
void block_update_pack_state_calls(Block* block) { if (block->stateType == NULL) { // No state type, make sure there's no pack_state call. // TODO: Handle this case properly (should search and destroy an existing pack_state call) return; } int stateOutputIndex = block->length() - 1 - find_state_output(block)->index; for (int i=0; i < block->length(); i++) { Term* term = block->get(i); if (term == NULL) continue; if (term->function == FUNCS.pack_state) { // Update the inputs for this pack_state call TermList inputs; list_inputs_to_pack_state(block, i, &inputs); set_inputs(term, inputs); } else if (should_have_preceeding_pack_state(term)) { // Check if we need to insert a pack_state call Term* existing = term->input(stateOutputIndex); if (existing == NULL || existing->function != FUNCS.pack_state) { TermList inputs; list_inputs_to_pack_state(block, i, &inputs); if (inputs.length() != 0) { Term* pack_state = apply(block, FUNCS.pack_state, inputs); move_before(pack_state, term); // Only set as an input for a non-minor block. if (term->nestedContents == NULL || !is_minor_block(term->nestedContents)) { set_input(term, stateOutputIndex, pack_state); set_input_hidden(term, stateOutputIndex, true); set_input_implicit(term, stateOutputIndex, true); } // Advance i to compensate for the term just added i++; } } } } }
// normalize the atom // 29/08/2002 Torrevieja, changed void Atom::normalize () { if ( ! isEquality() ) { return; } // equality TermList as (args()); Term l (as.head()); Term r (as.second()); if (l.compare(r) == LESS) { TermList newAs (r, TermList (l)); Atom newAtom (functor(), newAs); *this = newAtom; } } // Atom::normalize
// TODO there's a seriouxx need for refactoring here ! Solution LpsolveAdaptator::getAdmissibleSolution(LinearProblem * lp) { lprec *lprec; int nbCol = lp->getVariables().size(); lprec = make_lp(0, nbCol); if (lprec == NULL) { // TODO raise an exception } /* set variables name to ease debugging */ for (int i = 0; i < (int)lp->getVariables().size(); ++i) { Variable * var = (lp->getVariables())[i]; set_col_name(lprec, i+1, var->getNameToChar()); if (var->isBinary()) { set_binary(lprec, i+1, TRUE); } } /* to build the model faster when adding constraints one at a time */ set_add_rowmode(lprec, TRUE); for (int i = 0; i < (int)(lp->getConstraints().size()); ++i) { // FIXME there's a bug here but I can't find it Constraint c = (Constraint)(lp->getConstraints()[i]); TermList terms = c.getTerms(); int col[terms.size()]; REAL row[terms.size()]; int j = 0; for (TermList::const_iterator it = terms.begin(); it != terms.end(); ++it, ++j) { // TODO check if this is fixed col[j] = ((Term)*it).getVariable().getPosition(); row[j] = ((Term)*it).getCoeff(); } // WARNING the Consraint uses the same operator values than in lp_lib.h if (!add_constraintex(lprec, j, row, col, c.getOperator(), c.getBound())) { // TODO raise an exception } } /* the objective function requires rowmode to be off */ set_add_rowmode(lprec, FALSE); return getSolution(lprec); }
TermList* Parser::parseTermList() { TermList *list = NULL; while(isspace(*p)) p++; if (*p == '(') { list = new TermList(); while (*p && *p != ')') { if (isalpha(*p)) { Term *t = this->parseTerm(); list->addTerm(t); p--; } p++; } if (*p == ')') p++; } return list; }
void branch_update_existing_pack_state_calls(Branch* branch) { if (branch->stateType == NULL) { // No state type, make sure there's no pack_state call. // TODO: Handle this case properly (should search and destroy an existing pack_state call) return; } int stateOutputIndex = branch->length() - 1 - find_state_output(branch)->index; for (int i=0; i < branch->length(); i++) { Term* term = branch->get(i); if (term == NULL) continue; if (term->function == FUNCS.pack_state) { // Update the inputs for this pack_state call TermList inputs; get_list_of_state_outputs(branch, i, &inputs); set_inputs(term, inputs); } if (term->function == FUNCS.exit_point) { // Check if we need to insert a pack_state call Term* existing = term->input(stateOutputIndex); if (existing == NULL || existing->function != FUNCS.pack_state) { TermList inputs; get_list_of_state_outputs(branch, i, &inputs); if (inputs.length() != 0) { Term* pack_state = apply(branch, FUNCS.pack_state, inputs); move_before(pack_state, term); set_input(term, stateOutputIndex + 1, pack_state); // Advance i to compensate for the term just added i++; } } } } }
void POSTaggerEnglish::tag(const TermList & input, TermList & output ) { std::vector<Token> vt; TermList::const_iterator it = input.begin(); for(; it != input.end() ; it++) vt.push_back(Token(it->textString(), "?")); const multimap<std::string, std::string> dummy; bidir_decode_beam(vt, dummy, vme_); output = input; TermList::iterator it2 = output.begin(); for (size_t i = 0; i < vt.size(); i++,it2++) { it2->pos_ = vt[i].prd; } }
void test_equals_function(TermList const& a, TermList const& b, const char* aText, const char* bText, int line, const char* file) { if (a.length() != b.length()) { std::cout << "List equality fail in " << file << ", line " << line << std::endl; std::cout << " " << aText << " has " << a.length() << " items, "; std::cout << bText << " has " << b.length() << " items." << std::endl; declare_current_test_failed(); return; } for (int i=0; i < a.length(); i++) { if (a[i] != b[i]) { std::cout << "List equality fail in " << file << ", line " << line << std::endl; std::cout << " " << aText << " != " << bText << " (index " << i << " differs)" << std::endl; declare_current_test_failed(); return; } } }
Term* write_selector_for_accessor_chain(Branch* branch, TermList* chain) { TermList selectorInputs; // Skip index 0 - this is the head term. for (int i=1; i < chain->length(); i++) { Term* term = chain->get(i); if (term->function == FUNCS.get_index || term->function == FUNCS.get_field) { selectorInputs.append(term->input(1)); } else if (is_accessor_function(term)) { Term* element = create_string(branch, term->stringProp("syntax:functionName", "")); selectorInputs.append(element); } } return apply(branch, FUNCS.selector, selectorInputs); }
TermList* Parser::ParseTermList(int for_struct) { TermList* tlist = new(__FILE__, __LINE__) TermList; Term* term = ParseTerm(); while (term) { if (for_struct && !term->isDef()) { return (TermList*) error("(Parse) non-definition term in struct"); } else if (!for_struct && term->isDef()) { return (TermList*) error("(Parse) illegal definition in array"); } tlist->append(term); Token t = lexer->Get(); /*** OLD WAY: COMMA SEPARATORS REQUIRED *** if (t.type() != Token::Comma) { lexer->PutBack(); term = 0; } else term = ParseTerm(); /*******************************************/ // NEW WAY: COMMA SEPARATORS OPTIONAL: if (t.type() != Token::Comma) { lexer->PutBack(); } term = ParseTerm(); } return tlist; }
void IndexWriter::_writeDirectLists( WriterIndexContext* context, indri::file::SequentialWriteBuffer* directOutput, indri::file::SequentialWriteBuffer* lengthsOutput, indri::file::SequentialWriteBuffer* dataOutput ) { VocabularyIterator* vocabulary = context->index->frequentVocabularyIterator(); indri::index::Index* index = context->index; vocabulary->startIteration(); while( !vocabulary->finished() ) { indri::index::DiskTermData* diskTermData = vocabulary->currentEntry(); context->oldFrequent->add( diskTermData->termID, diskTermData->termData->term ); vocabulary->nextEntry(); } delete vocabulary; vocabulary = 0; TermListFileIterator* iterator = index->termListFileIterator(); TermTranslator* translator = _buildTermTranslator( _infrequentTermsReader, _frequentTermsReader, *context->oldFrequent, context->oldInfrequent, *context->newlyFrequent, index, context->bitmap ); iterator->startIteration(); TermList writeList; indri::utility::Buffer outputBuffer( 256*1024 ); indri::index::DocumentDataIterator* dataIterator = context->index->documentDataIterator(); dataIterator->startIteration(); while( !iterator->finished() ) { writeList.clear(); TermList* list = iterator->currentEntry(); assert( list ); int currentTerm; int translated; // copy and translate terms for( int i=0; i<list->terms().size(); i++ ) { currentTerm = list->terms()[i]; assert( currentTerm >= 0 ); assert( currentTerm <= index->uniqueTermCount() ); translated = (*translator)( currentTerm ); assert( translated > 0 || (translated == 0 && currentTerm == 0) ); writeList.addTerm( translated ); } // copy field data int fieldCount = list->fields().size(); const indri::utility::greedy_vector<indri::index::FieldExtent>& fields = list->fields(); for( int i=0; i<fieldCount; i++ ) { writeList.addField( fields[i] ); } // record the start position size_t writeStart = outputBuffer.position(); UINT32 length = 0; // write the list, leaving room for a length count outputBuffer.write( sizeof(UINT32) ); writeList.write( outputBuffer ); // record the end position, compute length size_t writeEnd = outputBuffer.position(); length = writeEnd - (writeStart + sizeof(UINT32)); // store length assert( outputBuffer.position() >= (sizeof(UINT32) + length + writeStart) ); memcpy( outputBuffer.front() + writeStart, &length, sizeof(UINT32) ); assert( dataIterator ); // get a copy of the document data assert( dataIterator ); assert( !dataIterator->finished() ); indri::index::DocumentData documentData = *dataIterator->currentEntry(); // store offset information documentData.byteLength = length; documentData.offset = directOutput->tell() + writeStart + sizeof(UINT32); // tell has to happen before a write or the offset will be wrong. if( outputBuffer.position() > 128*1024 ) { directOutput->write( outputBuffer.front(), outputBuffer.position() ); outputBuffer.clear(); } dataOutput->write( &documentData, sizeof(DocumentData) ); int termLength = documentData.totalLength; assert( termLength >= 0 ); lengthsOutput->write( &termLength, sizeof(UINT32) ); iterator->nextEntry(); dataIterator->nextEntry(); } delete iterator; delete dataIterator; delete translator; directOutput->write( outputBuffer.front(), outputBuffer.position() ); directOutput->flush(); lengthsOutput->flush(); outputBuffer.clear(); }
bool CommonLanguageAnalyzer::getSynonym( const UString& combine, int offset, const unsigned char andOrBit, const unsigned int level, TermList& syOutList, unsigned int& subLevel) { bool ret = false; //cout << "combined: "; combine.displayStringValue(izenelib::util::UString::UTF_8); cout << endl; char* combineStr = lowercase_string_buffer_; UString::convertString(UString::UTF_8, combine.c_str(), combine.length(), lowercase_string_buffer_, term_string_buffer_limit_); //cout << "combined string: " << string(combineStr) << endl; UString::CharT * synonymResultUstr = NULL; size_t synonymResultUstrLen = 0; pSynonymContainer_ = uscSPtr_->getSynonymContainer(); pSynonymContainer_->searchNgetSynonym(combineStr, pSynonymResult_); for (int i =0; i<pSynonymResult_->getSynonymCount(0); i++) { char * synonymResult = pSynonymResult_->getWord(0, i); if (synonymResult) { if (strcmp(combineStr, synonymResult) == 0) { //cout << "synonym self: "<<string(synonymResult) <<endl; continue; } cout << "synonym : "<<string(synonymResult) <<endl; ret = true; size_t synonymResultLen = strlen(synonymResult); if (synonymResultLen <= term_ustring_buffer_limit_) { synonymResultUstr = synonym_ustring_buffer_; synonymResultUstrLen = UString::toUcs2(synonymEncode_, synonymResult, synonymResultLen, synonym_ustring_buffer_, term_ustring_buffer_limit_); } // word segmentment UString term(synonymResultUstr, synonymResultUstrLen); TermList termList; if (innerAnalyzer_.get()) { innerAnalyzer_->analyze(term, termList); if (termList.size() <= 1) { syOutList.add(synonymResultUstr, synonymResultUstrLen, offset, NULL, andOrBit, level+subLevel); subLevel++; } else { for (TermList::iterator iter = termList.begin(); iter != termList.end(); ++iter) { syOutList.add(iter->text_.c_str(), iter->text_.length(), offset, NULL, Term::AND, level+subLevel); } subLevel++; } } else { syOutList.add(synonymResultUstr, synonymResultUstrLen, offset, NULL, andOrBit, level+subLevel); subLevel++; } } } return ret; }
void IndexContentTestCase::testIndexContent_WL() { Index* pIndex; IndexReaderPtr pReader; const Term* pTerm; TermIteratorPtr pTermIter; int docCount = 0; int termCount = 0; int pos = -1; uint32_t indexTermId; string fileName; //Check posting list Path indexPath = TestHelper::getTestDataPath(); indexPath.makeDirectory(); indexPath.pushDirectory(_T("test_wlindex")); pIndex = new Index(indexPath.toString().c_str(), Index::READ, NULL); auto_ptr<Index> indexPtr(pIndex); CPPUNIT_ASSERT(pIndex != NULL); pReader = pIndex->acquireReader(); TermReaderPtr pTermReader = pReader->termReader(); pTermIter = pTermReader->termIterator("BODY"); StoredFieldsReaderPtr pDocReader = pReader->createStoredFieldsReader(); //Iterator all terms while(pTermIter->next()) { pTerm = pTermIter->term(); CPPUNIT_ASSERT(pTermReader->seek(pTerm)); indexTermId = (pTerm->cast<int32_t>())->getValue(); TermPositionIteratorPtr pPositions = pTermReader->termPositions(); docCount = 0; while(pPositions->nextDoc()) { DocumentPtr pDoc = pDocReader->document(pPositions->doc()); docCount++; fileName.assign(pDoc->getField("PATH")->getValue().c_str()); TermList* pTermIdList = m_pDocScanner->getTermListOfFile(fileName); CPPUNIT_ASSERT(pTermIdList != NULL); pos = pPositions->nextPosition(); termCount = 0; while(pos != -1) { termCount++; CPPUNIT_ASSERT(indexTermId == pTermIdList->getValue(pos)); pos = pPositions->nextPosition(); } CPPUNIT_ASSERT(termCount == pPositions->freq()); }//end while nextDoc() CPPUNIT_ASSERT(docCount == pPositions->getDocFreq()); } CPPUNIT_ASSERT_EQUAL((int64_t)m_pDocScanner->getTotalTermCount(), (int64_t)pReader->getNumTerms()); }