void FrTextSpans::parseSpans(const FrList *spans, size_t numstrings) { for (const FrList *sp = spans ; sp ; sp = sp->rest()) { const FrObject *item = sp->first() ; if (item && (item->stringp() || item->symbolp())) { m_text = FrDupString(FrPrintableName(item)) ; if (m_text && setPositionMap()) break ; return ; } } m_spancount = spans->simplelistlength() - numstrings ; m_spans = FrNewN(FrTextSpan,m_spancount) ; if (!m_spans) { m_spancount = 0 ; return ; } size_t count = 0 ; for ( ; spans ; spans = spans->rest()) { FrList *span = (FrList*)spans->first() ; if (!span) continue ; if (span->consp() && m_spans[count].parse(span,this)) { count++ ; m_sorted = false ; } else if (span->structp()) { FrStruct *meta = (FrStruct*)span ; if (meta->typeName() && Fr_stricmp(FrPrintableName(meta->typeName()),"META") == 0) addMetaData(meta) ; } } m_spancount = count ; return ; }
bool FrTextSpan::parse(const FrList *span, FrTextSpans *contain) { if (span && span->consp() && span->simplelistlength() >= 2 && span->first() && span->second() && span->first()->numberp() && span->second()->numberp()) { size_t sp_start = span->first()->intValue() ; size_t sp_end = span->second()->intValue() ; span = span->rest()->rest() ; // we'll allow rather free-form input from the rest of the span's // description: the first two numbers are the score and weight, // respectively, the first string is the span's text, the second // string (if present) becomes the INIT_TEXT metadata. Then, // the first structure (if present) is the metadata, and any lists // starting with a symbol become additional metadata fields double sp_score = DEFAULT_SCORE ; double sp_weight = DEFAULT_WEIGHT ; // scan for the first two numbers for (const FrList *sp = span ; sp ; sp = sp->rest()) { if (sp->first() && sp->first()->numberp()) { sp_score = sp->first()->floatValue() ; for (sp = sp->rest() ; sp ; sp = sp->rest()) { if (sp->first() && sp->first()->numberp()) { sp_weight = sp->first()->floatValue() ; break ; } } break ; } } const char *curr_text = 0 ; const char *orig_text = 0 ; // scan for the first two strings or symbols for (const FrList *sp = span ; sp ; sp = sp->rest()) { FrObject *item = sp->first() ; if (item && (item->stringp() || item->symbolp())) { curr_text = item->printableName() ; for (sp = sp->rest() ; sp ; sp = sp->rest()) { item = sp->first() ; if (item && (item->stringp() || item->symbolp())) { orig_text = item->printableName() ; break ; } } break ; } } if (curr_text) (void)FrSkipWhitespace(curr_text) ; if (orig_text) (void)FrSkipWhitespace(orig_text) ; // scan for the first structure const FrStruct *meta = 0 ; for (const FrList *sp = span ; sp ; sp = sp->rest()) { if (sp->first() && sp->first()->structp()) { meta = (FrStruct*)sp->first() ; break ; } } init(sp_start,sp_end,sp_score,sp_weight,curr_text,contain) ; free_object(m_metadata) ; if (meta) { FrSymbol *symMETATYPE = FrSymbolTable::add(METADATA_TYPENAME) ; if (meta->typeName() == symMETATYPE) m_metadata = (FrStruct*)meta->deepcopy() ; else { // copy the keywords one by one FrList *keys = meta->fieldNames() ; while (keys) { FrSymbol *key = (FrSymbol*)poplist(keys) ; setMetaData(key,meta->get(key)) ; } } } if (orig_text) setMetaData(FrSymbolTable::add(init_text_tag), new FrString(orig_text),false) ; // finally, scan for any embedded lists and add them as metadata fields for (const FrList *sp = span ; sp ; sp = sp->rest()) { FrList *item = (FrList*)sp->first() ; if (item && item->consp() && item->first() && item->first()->symbolp()) { FrSymbol *key = (FrSymbol*)item->first() ; setMetaData(key,item->rest()) ; } } return true ; } return false ; }