Beispiel #1
0
void LTRTextBuilder::updateLTR_TokenFromVertex(
    const LinguisticGraphVertex& vertex,
    const LinguisticGraph& graph,
    LTR_Token* tokenRep,
    uint64_t offset) const {

    // get data from the result of the linguistic analysis
    Token* fullToken = get(vertex_token, graph, vertex);
    MorphoSyntacticData* data = get(vertex_data, graph, vertex);
    const FsaStringsPool& sp = (Common::MediaticData::MediaticData::single().stringsPool(m_language));

    sort(data->begin(),data->end(),ltNormProperty(m_macroAccessor));

    StringsPoolIndex norm(0),lastNorm(0);
    LinguisticCode macro(0),lastMacro(0);
    for (MorphoSyntacticData::const_iterator elemItr=data->begin();
         elemItr!=data->end(); elemItr++) {
        norm = elemItr->normalizedForm;
        macro = m_macroAccessor->readValue(elemItr->properties);
        if (norm == lastNorm && macro == lastMacro) {
            continue;
        }
        else {
            lastNorm=norm;
            lastMacro=macro;
            LimaString normStr= sp[norm];
            // test if the same word was not already met at this position
            bool selectionFlag = true;
            LTR_Token::const_iterator itTok = tokenRep->begin();
            while (selectionFlag && (itTok != tokenRep->end())) {
                selectionFlag = (itTok->first->getLemma() != normStr) ||
                                (itTok->first->getCategory() != macro);
                itTok ++;
            }
            if (selectionFlag) {
                // test if the current token is a plain word
                bool plainWordFlag =
                    this->isWordToSelect(normStr, macro,m_microAccessor->readValue(elemItr->properties));
                BoWToken* bowToken = new BoWToken(normStr,macro,
                                                  fullToken->position() + offset,
                                                  fullToken->length());
                bowToken->setInflectedForm(fullToken->stringForm());
                tokenRep->push_back(make_pair(bowToken, plainWordFlag));
            }
        }
    }
}