C++ (Cpp) TermList Examples, TermList C++ (Cpp) Examples

Example #1

0

Show file

File: IndexContentTestCase.cpp Project: Web5design/firtex2

void IndexContentTestCase::testIndexContent_DL()
{
    Index* pIndex;
    IndexReaderPtr pReader;

    const Term* pTerm;
    TermIteratorPtr pTermIter;
    int	docCount = 0;
    int	termCount = 0;
    uint32_t i;
    uint32_t indexTermId;
    string fileName;

    //Check posting list
    Path indexPath = TestHelper::getTestDataPath();
    indexPath.makeDirectory();
    indexPath.pushDirectory(_T("test_dlindex"));    
    pIndex = new Index(indexPath.toString().c_str(), Index::READ, NULL);
    auto_ptr<Index> indexPtr(pIndex);
    pReader = pIndex->acquireReader();
    TermReaderPtr pTermReader = pReader->termReader();

    pTermIter = pTermReader->termIterator("BODY");

    StoredFieldsReaderPtr pDocReader = pReader->createStoredFieldsReader();
    //Iterator all terms
    while(pTermIter->next())
    {
        pTerm = pTermIter->term();
		
        CPPUNIT_ASSERT(pTermReader->seek(pTerm));
				
        indexTermId = (pTerm->cast<int32_t>())->getValue();
        docCount = 0;
        TermPostingIteratorPtr pTermDocFreqs = pTermReader->termPostings();
        while(pTermDocFreqs->nextDoc())
        {
            DocumentPtr pDoc = pDocReader->document(pTermDocFreqs->doc());
            docCount++;
            // 获取文件路径
            fileName.assign(pDoc->getField("PATH")->getValue().c_str());

            TermList* pTermIdList = m_pDocScanner->getTermListOfFile(fileName);
            CPPUNIT_ASSERT(pTermIdList != NULL);

            for(i = 0, termCount = 0; i < pTermIdList->getSize(); i++)
            {
                if(indexTermId == pTermIdList->getValue(i))
                {
                    termCount++;
                }
            }
			
            CPPUNIT_ASSERT_EQUAL((tf_t)termCount, pTermDocFreqs->freq());

        }//end while nextDoc()
        CPPUNIT_ASSERT_EQUAL((df_t)docCount, pTermDocFreqs->getDocFreq());
    }
    CPPUNIT_ASSERT(m_pDocScanner->getTotalTermCount() == pReader->getNumTerms());
}

Example #2

0

Show file

File: MemoryIndex.cpp Project: oroszgy/sefh

const indri::index::TermList* indri::index::MemoryIndex::termList( lemur::api::DOCID_T documentID ) {
  int documentIndex = documentID - documentBase();
  if( documentIndex < 0 || documentIndex >= (int)_documentData.size() )
    return 0;

  const DocumentData& data = _documentData[documentIndex];
  UINT64 documentOffset = data.offset;
  indri::utility::Buffer* documentBuffer = 0;
  std::list<indri::utility::Buffer*>::const_iterator iter;

  for( iter = _termLists.begin(); iter != _termLists.end(); ++iter ) {
    if( documentOffset < (*iter)->position() ) {
      documentBuffer = (*iter);
      break;
    }

    documentOffset -= (*iter)->position();
  }

  assert( documentBuffer );
  TermList* list = new TermList();

  list->read( documentBuffer->front() + documentOffset, data.byteLength );
  return list;
}

Example #3

0

Show file

File: term.cpp Project: andyfischer/circa

void
Term::inputsToList(TermList& out)
{
    out.resize(numInputs());
    for (int i=0; i < numInputs(); i++)
        out.setAt(i, input(i));
}

Example #4

0

Show file

File: StemAnalyzer.cpp Project: izenecloud/ilplib

    int StemAnalyzer::analyze_index( const TermList & input, TermList & output, unsigned char retFlag )
    {
        string inputstr, stem;
        TermList::const_iterator it;

        Term newTerm;
        TermList::iterator term_it;

        for( it = input.begin(); it != input.end(); it++ )
        {
//            if( retFlag_idx_ & ANALYZE_PRIME_ )
//            {
//                term_it = output.insert( output.end(), *it );
//            }
//            if( (retFlag_idx_ & ANALYZE_SECOND_) == 0 )
//                continue;
//
//
//            it->text_.convertString( inputstr, UString::CP949 );
//            stemmer_.stem( inputstr, stem );
//
//            if( !(retFlag_idx_ & ANALYZE_PRIME_) || inputstr != stem )
//            {
//                term_it = output.insert( output.end(), *it );
//                term_it->text_.assign( stem, UString::CP949 );
//            }
        }
        return 0;
    }

Example #5

0

Show file

File: if_block.cpp Project: levelplane/circa

void if_block_create_input_placeholders_for_outer_pointers(Term* ifCall)
{
    Branch* contents = nested_contents(ifCall);
    TermList outerTerms;

    // Find outer pointers across each case
    for (CaseIterator it(contents); it.unfinished(); it.advance()) {
        list_outer_pointers(nested_contents(it.current()), &outerTerms);
    }

    ca_assert(ifCall->numInputs() == 0);

    // Create input placeholders and add inputs for all outer pointers
    for (int i=0; i < outerTerms.length(); i++) {
        Term* outer = outerTerms[i];

        set_input(ifCall, i, outer);
        Term* placeholder = append_input_placeholder(nested_contents(ifCall));
        rename(placeholder, outer->name);

        // Go through each case and repoint to this new placeholder
        for (CaseIterator it(contents); it.unfinished(); it.advance()) {
            remap_pointers_quick(nested_contents(it.current()), outer, placeholder);
        }
    }
}

Example #6

0

Show file

File: fotableau.cpp Project: syeedibnfaiz/Fotableau

void formulaTest()
{
	TermList *list = new TermList();
	list->addTerm(new Term(Term::VAR, "x"));
	list->addTerm(new Term(Term::VAR, NULL));
	list->addTerm(new Term(Term::CONS, "A"));
	Formula *f = new Formula(new Predicate("P", list));
	Formula *g = new Formula(f);
	Formula *q = new Formula(f, g, '&');
	
	Formula *r = new Formula(new Term(Term::VAR, NULL), q, Formula::UNIV);
	r->print();

	Term *x = new Term(Term::VAR, "xy");
	Term *y = new Term(Term::VAR, "x");

	TermList *list2 = new TermList();
	list2->addTerm(x);
	list2->addTerm(y);
	Term *f2 = new Term("f10", list2);
	Term *g2 = new Term("f10", list2);

	f2->print();
	puts("");
	g2->print();
	puts("");
	if (!(*f2 != *g2)) {
		printf("yes! equal!");
	}

}

Example #7

0

Show file

File: fotableau.cpp Project: syeedibnfaiz/Fotableau

/*
===================================================
				End of Prover
===================================================
*/
void testTerms()
{
	int n;
	int m;
	int type;
	char name[10];

	for (int i = 0; i < 5; i++) {
		printf("type and Name: ");
		scanf("%d %s", &type, name);

		if(type == Term::FUNC) {
			
			TermList *list = new TermList();
			Term term(name, list);

			printf("place: ");
			scanf("%d", &m);

			for (int j = 0; j < m; ++j) {
				printf("%dth term: ",j); 
				scanf("%s", name);
				Term *term = new Term(Term::VAR, name);
				list->addTerm(term);
			}

			term.print();
		}
		else {
			Term term(type, name);
			term.print();
		}
	}
}

Example #8

0

Show file

File: StemAnalyzer.cpp Project: izenecloud/ilplib

    int StemAnalyzer::analyze_search( const TermList & input, TermList & output, unsigned char retFlag )
    {
        string inputstr, stem;
        TermList::const_iterator it;

        //unsigned char       level = 0;
        Term                newTerm;
        TermList::iterator  term_it;

        for( it = input.begin(); it != input.end(); it++ )
        {
//            if( retFlag_sch_ & ANALYZE_PRIME_ )
//            {
//                term_it = output.insert( output.end(), *it );
//                term_it->stats_ = makeStatBit( Term::OR_BIT, level++ );
//            }
//            if( (retFlag_sch_ & ANALYZE_SECOND_) == 0 )
//                continue;
//
//
//            it->text_.convertString( inputstr, UString::CP949 );
//            stemmer_.stem( inputstr, stem );
//
//            if( !(retFlag_sch_ & ANALYZE_PRIME_) || inputstr != stem )
//            {
//                term_it = output.insert( output.end(), newTerm );
//
//                term_it->text_.assign( stem, UString::CP949 );
//                term_it->stats_ = makeStatBit( Term::AND_BIT, level );
//            }
        }
        return 0;
    }

Example #9

0

Show file

File: fotableau.cpp Project: syeedibnfaiz/Fotableau

bool TermList::operator==(TermList &list)
{
	if (this->list->size() != list.getList()->size()) return false;
	for (int i = 0; i < this->list->size(); i++) {
		if (*(this->list->at(i)) != *(list.getList()->at(i))) return false;
	}
	return true;
}

Example #10

0

Show file

File: fotableau.cpp Project: syeedibnfaiz/Fotableau

TermList::TermList(TermList &tList)
{
	this->list = new vector<Term*>();
	for (int i = 0; i < tList.getList()->size(); i++) {
		Term *t = new Term(*(tList.getList()->at(i)));
		this->list->push_back(t);
	}
}

Example #11

0

Show file

File: CommonLanguageAnalyzer.cpp Project: izenecloud/ilplib

void CommonLanguageAnalyzer::analyzeSynonym(TermList& outList, size_t n)
{
    static UString SPACE(" ", izenelib::util::UString::UTF_8);
    TermList syOutList;

    size_t wordCount = outList.size();
    for (size_t i = 0; i < wordCount; i++)
    {
//        cout << "[off]" <<outList[i].wordOffset_<<" [level]"<<outList[i].getLevel() <<" [andor]" <<(unsigned int)(outList[i].getAndOrBit())
//             << "  "<< outList[i].textString()<<endl;

        // find synonym for word(s)
        for (size_t len = 1; (len <= n) && (i+len <= wordCount) ; len++)
        {
            // with space
            bool ret = false;
            unsigned int subLevel = 0;
            UString combine;
            if (len > 1)
            {
                for (size_t j = 0; j < len-1; j++)
                {
                    combine.append(outList[i+j].text_);
                    combine.append(SPACE);
                }
                combine.append(outList[i+len-1].text_);
                ret = getSynonym(combine, outList[i].wordOffset_, Term::OR, outList[i].getLevel(), syOutList, subLevel);
            }

            // without space
            if (!ret)
            {
                combine.clear();
                for (size_t j = 0; j < len; j++)
                    combine.append(outList[i+j].text_);
               ret = getSynonym(combine, outList[i].wordOffset_, Term::OR, outList[i].getLevel(), syOutList, subLevel);
            }

            // adjust
            if (ret)
            {
                outList[i].setStats(outList[i].getAndOrBit(), outList[i].getLevel()+subLevel);
                for (size_t j = 1; j < len; j++)
                {
                    outList[i+j].wordOffset_ = outList[i].wordOffset_;
                    outList[i+j].setStats(outList[i+j].getAndOrBit(), outList[i].getLevel());
                }
                break;
            }
        }

        syOutList.push_back(outList[i]);
    }

    outList.swap(syOutList);
}

Example #12

0

Show file

File: selector.cpp Project: mokerjoke/circa

Term* find_accessor_head_term(Term* accessor)
{
    TermList chain;
    trace_accessor_chain(accessor, &chain);

    if (chain.length() == 0)
        return NULL;

    return chain[0];
}

Example #13

0

Show file

File: stateful_code.cpp Project: whunmr/circa

Term* block_add_pack_state(Block* block)
{
    TermList inputs;
    list_inputs_to_pack_state(block, block->length(), &inputs);

    // Don't create anything if there are no state outputs
    if (inputs.length() == 0)
        return NULL;

    return apply(block, FUNCS.pack_state, inputs);
}

Example #14

0

Show file

File: stateful_code.cpp Project: mokerjoke/circa

Term* branch_add_pack_state(Branch* branch)
{
    TermList inputs;
    get_list_of_state_outputs(branch, branch->length(), &inputs);

    // Don't create anything if there are no state outputs
    if (inputs.length() == 0)
        return NULL;

    return apply(branch, FUNCS.pack_state, inputs);
}

Example #15

0

Show file

File: Atom.cpp Project: kdgerring/sigma

// 28/08/2002 Torrevieja
void Atom::rectify (Substitution& subst, Var& last, VarList& freeVars)
{
  TRACER ("Atom::rectify");

  TermList ts (args());
  ts.rectify (subst, last, freeVars);
  if (ts == args()) { // space-economic version
    return;
  }

  Atom a (functor(),ts);
  *this = a;
} // Atom::rectify

Example #16

0

Show file

File: Atom.cpp Project: kdgerring/sigma

// 28/08/2002 Torrevieja
void Atom::apply ( const Substitution& subst )
{
  TRACER ("Atom::apply");

  TermList ts (args());
  ts.apply (subst);
  if (ts == args()) { // space-economic version
    return;
  }

  Atom a (functor(),ts);
  *this = a;
} // Atom::apply

Example #17

0

Show file

File: stateful_code.cpp Project: whunmr/circa

void block_update_pack_state_calls(Block* block)
{
    if (block->stateType == NULL) {
        // No state type, make sure there's no pack_state call.
        // TODO: Handle this case properly (should search and destroy an existing pack_state call)
        return;
    }

    int stateOutputIndex = block->length() - 1 - find_state_output(block)->index;

    for (int i=0; i < block->length(); i++) {
        Term* term = block->get(i);
        if (term == NULL)
            continue;

        if (term->function == FUNCS.pack_state) {
            // Update the inputs for this pack_state call
            TermList inputs;
            list_inputs_to_pack_state(block, i, &inputs);
            set_inputs(term, inputs);
        }

        else if (should_have_preceeding_pack_state(term)) {
            // Check if we need to insert a pack_state call
            Term* existing = term->input(stateOutputIndex);

            if (existing == NULL || existing->function != FUNCS.pack_state) {
                TermList inputs;
                list_inputs_to_pack_state(block, i, &inputs);
                if (inputs.length() != 0) {
                    Term* pack_state = apply(block, FUNCS.pack_state, inputs);
                    move_before(pack_state, term);

                    // Only set as an input for a non-minor block.
                    if (term->nestedContents == NULL || !is_minor_block(term->nestedContents)) {
                        set_input(term, stateOutputIndex, pack_state);
                        set_input_hidden(term, stateOutputIndex, true);
                        set_input_implicit(term, stateOutputIndex, true);
                    }

                    // Advance i to compensate for the term just added
                    i++;
                }
            }
        }
    }
}

Example #18

0

Show file

File: Atom.cpp Project: kdgerring/sigma

// normalize the atom
// 29/08/2002 Torrevieja, changed
void Atom::normalize () 
{
  if ( ! isEquality() ) {
    return;
  }

  // equality
  TermList as (args());
  Term l (as.head());
  Term r (as.second());

  if (l.compare(r) == LESS) {
    TermList newAs (r, TermList (l));
    Atom newAtom (functor(), newAs);
    *this = newAtom;
  }
} // Atom::normalize

Example #19

0

Show file

File: LpsolveAdaptator.cpp Project: falcong/portfolio-opti

// TODO there's a seriouxx need for refactoring here !
Solution LpsolveAdaptator::getAdmissibleSolution(LinearProblem * lp) {
	lprec *lprec;
	int nbCol = lp->getVariables().size();
	lprec = make_lp(0, nbCol);

	if (lprec == NULL) {
		// TODO raise an exception
	}

	/* set variables name to ease debugging */
	for (int i = 0; i < (int)lp->getVariables().size(); ++i) {
		Variable * var = (lp->getVariables())[i];
		set_col_name(lprec, i+1, var->getNameToChar());
		if (var->isBinary()) {
			set_binary(lprec, i+1, TRUE);
		}
	}

	/* to build the model faster when adding constraints one at a time */
	set_add_rowmode(lprec, TRUE);

	for (int i = 0; i < (int)(lp->getConstraints().size()); ++i) {
		// FIXME there's a bug here but I can't find it
		Constraint c = (Constraint)(lp->getConstraints()[i]);
		TermList terms = c.getTerms();
		int col[terms.size()];
		REAL row[terms.size()];
		int j = 0;
		for (TermList::const_iterator it = terms.begin(); it != terms.end();
				++it, ++j) {
			// TODO check if this is fixed
			col[j] = ((Term)*it).getVariable().getPosition();
			row[j] = ((Term)*it).getCoeff();
		}
		// WARNING the Consraint uses the same operator values than in lp_lib.h
		if (!add_constraintex(lprec, j, row, col, c.getOperator(), c.getBound())) {
			// TODO raise an exception
		}
	}

	/* the objective function requires rowmode to be off */
	set_add_rowmode(lprec, FALSE);

	return getSolution(lprec);
}

Example #20

0

Show file

File: fotableau.cpp Project: syeedibnfaiz/Fotableau

TermList* Parser::parseTermList()
{
	TermList *list = NULL;
	while(isspace(*p)) p++;

	if (*p == '(') {
		list = new TermList();
		while (*p && *p != ')') {
			if (isalpha(*p)) {
				Term *t = this->parseTerm();
				list->addTerm(t);
				p--;
			}
			p++;
		}
		if (*p == ')') p++;
	}
	return list;
}

Example #21

0

Show file

File: stateful_code.cpp Project: mokerjoke/circa

void branch_update_existing_pack_state_calls(Branch* branch)
{
    if (branch->stateType == NULL) {
        // No state type, make sure there's no pack_state call.
        // TODO: Handle this case properly (should search and destroy an existing pack_state call)
        return;
    }

    int stateOutputIndex = branch->length() - 1 - find_state_output(branch)->index;

    for (int i=0; i < branch->length(); i++) {
        Term* term = branch->get(i);
        if (term == NULL)
            continue;

        if (term->function == FUNCS.pack_state) {
            // Update the inputs for this pack_state call
            TermList inputs;
            get_list_of_state_outputs(branch, i, &inputs);

            set_inputs(term, inputs);
        }

        if (term->function == FUNCS.exit_point) {
            // Check if we need to insert a pack_state call
            Term* existing = term->input(stateOutputIndex);

            if (existing == NULL || existing->function != FUNCS.pack_state) {
                TermList inputs;
                get_list_of_state_outputs(branch, i, &inputs);
                if (inputs.length() != 0) {
                    Term* pack_state = apply(branch, FUNCS.pack_state, inputs);
                    move_before(pack_state, term);
                    set_input(term, stateOutputIndex + 1, pack_state);

                    // Advance i to compensate for the term just added
                    i++;
                }
            }
        }
    }
}

Example #22

0

Show file

File: POSTaggerEnglish.cpp Project: izenecloud/ilplib

    void POSTaggerEnglish::tag(const TermList & input, TermList & output )
    {
        std::vector<Token> vt;

        TermList::const_iterator it = input.begin();
        for(; it != input.end() ; it++)
            vt.push_back(Token(it->textString(), "?"));

        const multimap<std::string, std::string> dummy;

        bidir_decode_beam(vt, dummy, vme_);

        output = input;
        TermList::iterator it2 = output.begin();

        for (size_t i = 0; i < vt.size(); i++,it2++) 
        {
            it2->pos_ = vt[i].prd;
        }
    }

Example #23

0

Show file

File: main.cpp Project: whunmr/circa

void test_equals_function(TermList const& a, TermList const& b,
        const char* aText, const char* bText, int line, const char* file)
{
    if (a.length() != b.length()) {
        std::cout << "List equality fail in " << file << ", line " << line << std::endl;
        std::cout << "  " << aText << " has " << a.length() << " items, ";
        std::cout << bText << " has " << b.length() << " items." << std::endl;
        declare_current_test_failed();
        return;
    }

    for (int i=0; i < a.length(); i++) {
        if (a[i] != b[i]) {
            std::cout << "List equality fail in " << file << ", line " << line << std::endl;
            std::cout << "  " << aText << " != " << bText
                << " (index " << i << " differs)" << std::endl;
            declare_current_test_failed();
            return;
        }
    }
}

Example #24

0

Show file

File: selector.cpp Project: mokerjoke/circa

Term* write_selector_for_accessor_chain(Branch* branch, TermList* chain)
{
    TermList selectorInputs;

    // Skip index 0 - this is the head term.
    
    for (int i=1; i < chain->length(); i++) {
        Term* term = chain->get(i);

        if (term->function == FUNCS.get_index
                || term->function == FUNCS.get_field) {

            selectorInputs.append(term->input(1));

        } else if (is_accessor_function(term)) {
            Term* element = create_string(branch, term->stringProp("syntax:functionName", ""));
            selectorInputs.append(element);
        }
    }

    return apply(branch, FUNCS.selector, selectorInputs);
}

Example #25

0

Show file

File: Parser.cpp Project: Banbury/starshatter-open

TermList*
Parser::ParseTermList(int for_struct)
{
    TermList*   tlist = new(__FILE__, __LINE__) TermList;
    
    Term* term = ParseTerm();
    while (term) {
        if (for_struct && !term->isDef()) {
            return (TermList*) error("(Parse) non-definition term in struct");
        }
        else if (!for_struct && term->isDef()) {
            return (TermList*) error("(Parse) illegal definition in array");
        }

        tlist->append(term);
        Token t = lexer->Get();

        /*** OLD WAY: COMMA SEPARATORS REQUIRED ***
        if (t.type() != Token::Comma) {
            lexer->PutBack();
            term = 0;
        }
        else
            term = ParseTerm();
        /*******************************************/

        // NEW WAY: COMMA SEPARATORS OPTIONAL:
        if (t.type() != Token::Comma) {
            lexer->PutBack();
        }

        term = ParseTerm();
    }
    
    return tlist;
}

Example #26

0

Show file

File: IndexWriter.cpp Project: busjaeger/cs410sp12

void IndexWriter::_writeDirectLists( WriterIndexContext* context,
                                     indri::file::SequentialWriteBuffer* directOutput,
                                     indri::file::SequentialWriteBuffer* lengthsOutput,
                                     indri::file::SequentialWriteBuffer* dataOutput ) {

  VocabularyIterator* vocabulary = context->index->frequentVocabularyIterator();
  indri::index::Index* index = context->index;
  
  vocabulary->startIteration();

  while( !vocabulary->finished() ) {
    indri::index::DiskTermData* diskTermData = vocabulary->currentEntry();

    context->oldFrequent->add( diskTermData->termID, diskTermData->termData->term );
    vocabulary->nextEntry();
  }

  delete vocabulary;
  vocabulary = 0;

  TermListFileIterator* iterator = index->termListFileIterator();
  TermTranslator* translator = _buildTermTranslator( _infrequentTermsReader,
                                                     _frequentTermsReader,
                                                     *context->oldFrequent,
                                                     context->oldInfrequent,
                                                     *context->newlyFrequent,
                                                     index,
                                                     context->bitmap );
  iterator->startIteration();
  TermList writeList;
  indri::utility::Buffer outputBuffer( 256*1024 );

  indri::index::DocumentDataIterator* dataIterator = context->index->documentDataIterator();
  dataIterator->startIteration();

  while( !iterator->finished() ) {
    writeList.clear();
    TermList* list = iterator->currentEntry();
    assert( list );

    int currentTerm;
    int translated;

    // copy and translate terms
    for( int i=0; i<list->terms().size(); i++ ) {
      currentTerm = list->terms()[i];
      assert( currentTerm >= 0 );
      assert( currentTerm <= index->uniqueTermCount() );
      translated = (*translator)( currentTerm );
      assert( translated > 0 || (translated == 0 && currentTerm == 0) );

      writeList.addTerm( translated );
    }

    // copy field data
    int fieldCount = list->fields().size();
    const indri::utility::greedy_vector<indri::index::FieldExtent>& fields = list->fields();

    for( int i=0; i<fieldCount; i++ ) {
      writeList.addField( fields[i] );
    }
  
    // record the start position
    size_t writeStart = outputBuffer.position();
    UINT32 length = 0;

    // write the list, leaving room for a length count
    outputBuffer.write( sizeof(UINT32) );
    writeList.write( outputBuffer );

    // record the end position, compute length
    size_t writeEnd = outputBuffer.position();
    length = writeEnd - (writeStart + sizeof(UINT32));

    // store length
    assert( outputBuffer.position() >= (sizeof(UINT32) + length + writeStart) );
    memcpy( outputBuffer.front() + writeStart, &length, sizeof(UINT32) );
    assert( dataIterator );

    // get a copy of the document data
    assert( dataIterator );
    assert( !dataIterator->finished() );
    indri::index::DocumentData documentData = *dataIterator->currentEntry();

    // store offset information
    documentData.byteLength = length;
    documentData.offset = directOutput->tell() + writeStart + sizeof(UINT32);
    // tell has to happen before a write or the offset will be wrong.
    if( outputBuffer.position() > 128*1024 ) {
      directOutput->write( outputBuffer.front(), outputBuffer.position() );
      outputBuffer.clear();
    }


    dataOutput->write( &documentData, sizeof(DocumentData) );
    int termLength = documentData.totalLength;
    assert( termLength >= 0 );
    lengthsOutput->write( &termLength, sizeof(UINT32) );
    
    iterator->nextEntry();
    dataIterator->nextEntry();
  }

  delete iterator;
  delete dataIterator;
  delete translator;
  directOutput->write( outputBuffer.front(), outputBuffer.position() );
  directOutput->flush();
  lengthsOutput->flush();
  outputBuffer.clear();
}

Example #27

0

Show file

File: CommonLanguageAnalyzer.cpp Project: izenecloud/ilplib

bool CommonLanguageAnalyzer::getSynonym(
        const UString& combine,
        int offset,
        const unsigned char andOrBit,
        const unsigned int level,
        TermList& syOutList,
        unsigned int& subLevel)
{
    bool ret = false;
    //cout << "combined: "; combine.displayStringValue(izenelib::util::UString::UTF_8); cout << endl;

    char* combineStr = lowercase_string_buffer_;
    UString::convertString(UString::UTF_8, combine.c_str(), combine.length(), lowercase_string_buffer_, term_string_buffer_limit_);

    //cout << "combined string: " << string(combineStr) << endl;

    UString::CharT * synonymResultUstr = NULL;
    size_t synonymResultUstrLen = 0;

    pSynonymContainer_ = uscSPtr_->getSynonymContainer();
    pSynonymContainer_->searchNgetSynonym(combineStr, pSynonymResult_);

    for (int i =0; i<pSynonymResult_->getSynonymCount(0); i++)
    {
        char * synonymResult = pSynonymResult_->getWord(0, i);
        if (synonymResult)
        {
            if (strcmp(combineStr, synonymResult) == 0)
            {
                //cout << "synonym self: "<<string(synonymResult) <<endl;
                continue;
            }
            cout << "synonym : "<<string(synonymResult) <<endl;
            ret = true;

            size_t synonymResultLen = strlen(synonymResult);
            if (synonymResultLen <= term_ustring_buffer_limit_)
            {
                synonymResultUstr = synonym_ustring_buffer_;
                synonymResultUstrLen = UString::toUcs2(synonymEncode_,
                        synonymResult, synonymResultLen, synonym_ustring_buffer_, term_ustring_buffer_limit_);
            }

            // word segmentment
            UString term(synonymResultUstr, synonymResultUstrLen);
            TermList termList;
            if (innerAnalyzer_.get())
            {
                innerAnalyzer_->analyze(term, termList);
                if (termList.size() <= 1)
                {
                    syOutList.add(synonymResultUstr, synonymResultUstrLen, offset, NULL, andOrBit, level+subLevel);
                    subLevel++;
                }
                else
                {
                    for (TermList::iterator iter = termList.begin(); iter != termList.end(); ++iter)
                    {
                        syOutList.add(iter->text_.c_str(), iter->text_.length(), offset, NULL, Term::AND, level+subLevel);
                    }
                    subLevel++;
                }
            }
            else
            {
                syOutList.add(synonymResultUstr, synonymResultUstrLen, offset, NULL, andOrBit, level+subLevel);
                subLevel++;
            }
        }
    }

    return ret;
}

Example #28

0

Show file

File: IndexContentTestCase.cpp Project: Web5design/firtex2

void IndexContentTestCase::testIndexContent_WL()
{
    Index* pIndex;
    IndexReaderPtr pReader;

    const Term* pTerm;
    TermIteratorPtr pTermIter;
    int	docCount = 0;
    int	termCount = 0;
    int	pos = -1;
    uint32_t indexTermId;
    string fileName;

    //Check posting list
    Path indexPath = TestHelper::getTestDataPath();
    indexPath.makeDirectory();
    indexPath.pushDirectory(_T("test_wlindex"));    
    pIndex = new Index(indexPath.toString().c_str(), Index::READ, NULL);
    auto_ptr<Index> indexPtr(pIndex);

    CPPUNIT_ASSERT(pIndex != NULL);

    pReader = pIndex->acquireReader();
    TermReaderPtr pTermReader = pReader->termReader();

    pTermIter = pTermReader->termIterator("BODY");

    StoredFieldsReaderPtr pDocReader = pReader->createStoredFieldsReader();
    //Iterator all terms
    while(pTermIter->next())
    {
        pTerm = pTermIter->term();

        CPPUNIT_ASSERT(pTermReader->seek(pTerm));
		
        indexTermId = (pTerm->cast<int32_t>())->getValue();
        TermPositionIteratorPtr pPositions = pTermReader->termPositions();
        docCount = 0;

        while(pPositions->nextDoc())
        {
            DocumentPtr pDoc = pDocReader->document(pPositions->doc());
            docCount++;

            fileName.assign(pDoc->getField("PATH")->getValue().c_str());

            TermList* pTermIdList = m_pDocScanner->getTermListOfFile(fileName);
            CPPUNIT_ASSERT(pTermIdList != NULL);

            pos = pPositions->nextPosition();
            termCount = 0;

            while(pos != -1)
            {
                termCount++;
                CPPUNIT_ASSERT(indexTermId == pTermIdList->getValue(pos));
                pos = pPositions->nextPosition();
            }
            CPPUNIT_ASSERT(termCount == pPositions->freq());
        }//end while nextDoc()
        CPPUNIT_ASSERT(docCount == pPositions->getDocFreq());
    }
    CPPUNIT_ASSERT_EQUAL((int64_t)m_pDocScanner->getTotalTermCount(), 
                         (int64_t)pReader->getNumTerms());
}