size_t *FrTextSpans::wordMapping(bool skip_white) const { if (!originalString() || textLength() == 0) return 0 ; size_t *map = FrNewN(size_t,textLength()+1) ; if (map) { size_t count = 0 ; map[0] = count ; if (skip_white) { size_t i ; for (i = 1 ; i <= textLength() ; i++) { if (m_positions[i] != m_positions[i-1] + 1) count++ ; map[i] = count ; } } else { for (size_t i = 1 ; i <= textLength() ; i++) { if (Fr_isspace(m_text[i-1]) && !Fr_isspace(m_text[i])) count++ ; map[i] = count ; } } } return map ; }
void FrTextSpans::makeWordSpans(const char *text, FrCharEncoding enc, const char *word_delim) { m_text = FrDupString(text) ; if (m_text) { if (!setPositionMap()) { m_textlength = 0 ; return ; } char *canon = FrCanonicalizeSentence(m_text,enc,false,word_delim) ; if (canon && *canon) { m_spancount = 1 ; for (char *cptr = canon ; *cptr ; cptr++) { if (' ' == *cptr) m_spancount++ ; } m_spans = FrNewN(FrTextSpan,m_spancount) ; if (m_spans) { size_t tpos = 0 ; size_t cpos = 0 ; size_t start = 0 ; size_t end = 0 ; for (size_t i = 0 ; i < m_spancount ; i++) { // scan over the nonwhitespace chars at the current location for ( ; canon[cpos] && canon[cpos] != ' ' ; cpos++) { tpos++ ; end++ ; // counts toward m_positions index } // skip over any trailing whitespace while (m_text[tpos] && Fr_isspace(m_text[tpos])) tpos++ ; if (canon[cpos] == ' ') cpos++ ; m_spans[i].init(start,end-1,DEFAULT_SCORE,DEFAULT_WEIGHT, 0,this) ; m_sorted = false ; start = end ; } } } FrFree(canon) ; } return ; }
void FrTextSpans::makeWordSpans(const FrList *defn) { FrString *concat = new FrString(defn) ; m_text = FrDupString(concat->stringValue()) ; free_object(concat) ; if (setPositionMap()) { m_spancount = defn->simplelistlength() ; m_spans = FrNewN(FrTextSpan,m_spancount) ; if (!m_spans) { m_spancount = 0 ; return ; } size_t spannum = 0 ; size_t start = 0 ; size_t end = 0 ; for ( ; defn ; defn = defn->rest()) { FrObject *def = defn->first() ; if (def && def->structp()) { addMetaData((FrStruct*)def) ; continue ; } const char *item = FrPrintableName(def) ; if (!item) item = "" ; for (const char *i = item ; *i ; i++) { if (!Fr_isspace(*i)) end++ ; } if (end > 0) m_spans[spannum++].init(start,end-1,DEFAULT_SCORE,DEFAULT_WEIGHT, item,this) ; m_sorted = false ; start = end ; } m_spancount = spannum ; } return ; }
void FrTextSpans::parseSpans(const FrList *spans, size_t numstrings) { for (const FrList *sp = spans ; sp ; sp = sp->rest()) { const FrObject *item = sp->first() ; if (item && (item->stringp() || item->symbolp())) { m_text = FrDupString(FrPrintableName(item)) ; if (m_text && setPositionMap()) break ; return ; } } m_spancount = spans->simplelistlength() - numstrings ; m_spans = FrNewN(FrTextSpan,m_spancount) ; if (!m_spans) { m_spancount = 0 ; return ; } size_t count = 0 ; for ( ; spans ; spans = spans->rest()) { FrList *span = (FrList*)spans->first() ; if (!span) continue ; if (span->consp() && m_spans[count].parse(span,this)) { count++ ; m_sorted = false ; } else if (span->structp()) { FrStruct *meta = (FrStruct*)span ; if (meta->typeName() && Fr_stricmp(FrPrintableName(meta->typeName()),"META") == 0) addMetaData(meta) ; } } m_spancount = count ; return ; }
WildcardCollection::WildcardCollection(const WildcardCollection *orig, bool allow_all_if_empty) { if (!orig) { m_numsets = 0 ; return ; } m_wildcards = FrNewN(WildcardSet,orig->numSets()) ; if (!m_wildcards) { m_numsets = 0 ; return ; } else { m_numsets = orig->numSets() ; copy(orig,allow_all_if_empty) ; } return ; }
WildcardCollection::WildcardCollection(unsigned max_ref, bool allow_all) { m_wildcards = FrNewN(WildcardSet,max_ref) ; if (!m_wildcards) { m_numsets = 0 ; return ; } else { m_numsets = max_ref ; if (allow_all) { for (unsigned i = 0 ; i < numSets() ; i++) m_wildcards[i].addAll() ; } else { for (unsigned i = 0 ; i < numSets() ; i++) m_wildcards[i].removeAll() ; } } return ; }