FrTextSpan *FrTextSpan::merge(const FrTextSpan *span1, const FrTextSpan *span2, FrTextSpan_Operation score_op, FrTextSpan_Operation weight_op) { if (span1 && span2 && span1->pastEnd() == span2->start()) { if (weight_op == FrTSOp_Default) weight_op = score_op ; size_t len1 = strlen(span1->text()) ; size_t len2 = strlen(span2->text()) ; char *text = FrNewN(char,len1+len2+2) ; if (!text) return 0 ; FrTextSpan *span = new FrTextSpan ; if (!span) { FrFree(text) ; return 0 ; } memcpy(text,span1->text(),len1) ; text[len1] = ' ' ; memcpy(text+len1+1,span2->text(),len2+1) ; span->m_text = text ; span->m_start = span1->start() ; span->m_end = span2->end() ; span->m_score = combine_values(span1->score(),span2->score(),score_op) ; span->m_weight = combine_values(span1->weight(),span2->weight(), weight_op) ; return span ; } return 0 ; }
size_t FrTextSpan::wordCountOriginal() const { char *words = originalText() ; size_t count = word_count(words) ; FrFree(words) ; return count ; }
static bool write_tfidf(FrSymHashEntry *entry, va_list args) { if (entry) { FrVarArg(FILE*,fp) ; const FrSymbol *term = entry->getName() ; FrTFIDFrecord *rec = (FrTFIDFrecord*)entry->getUserData() ; if (term && rec && rec->termFrequency() > 0 && rec->docFrequency() > 0) { FrVarArg2(bool,int,verbosely) ; char *termname = term->print() ; if (verbosely) { FrVarArg(size_t,total_docs) ; fprintf(fp,"%s %ld %ld %g\n",termname, (unsigned long)rec->termFrequency(), (unsigned long)rec->docFrequency(), rec->TF_IDF(total_docs)) ; } else fprintf(fp,"%s %ld %ld\n",termname, (unsigned long)rec->termFrequency(), (unsigned long)rec->docFrequency()) ; FrFree(termname) ; } } return true ; // continue iterating }
void FramepaC_set_db_dir(const char *dir) { FrFree(db_directory) ; if (!dir) dir = "." ; db_directory = FrDupString(dir) ; return ; }
FrBoundedPriQueue::~FrBoundedPriQueue() { if (p_allocator && q_size == alloc_size) p_allocator->release(priorities) ; else FrFree(priorities) ; if (e_allocator && q_size == alloc_size) e_allocator->release(entries) ; else FrFree(entries) ; q_size = q_head = q_tail = 0 ; priorities = 0 ; entries = nullptr ; sort_descending = true ; copy_objects = false ; return ; }
FrObject *FrRegExp::match(const char *word) const { if (!regex || !word || !*word) return 0 ; char *groups[10] ; for (size_t i = 0 ; i < lengthof(groups) ; i++) groups[i] = 0 ; // char *end = strchr(word,'\0') ; FrObject *result ; char *matchbuf = 0 ; // if (re_match(regex,word,matchbuf,0,groups,lengthof(groups)) == end) const char *end ; if ((end = re_match(regex,word,matchbuf,0,groups,lengthof(groups))) != 0 && !*end) { char translation[FrMAX_SYMBOLNAME_LEN+1] ; char *trans_end = &translation[FrMAX_SYMBOLNAME_LEN] ; char *xlat = translation ; const char *repl ; for (repl = replacement ; *repl && xlat < trans_end ; repl++) { char c = *repl ; if (c == FrRE_QUOTE) { // escape-char plus digit specifies a replacement taken from the // source match c = *++repl ; if (Fr_isdigit(c)) { const char *targ = groups[c-'0'] ; if (targ) { size_t len = strlen(targ) ; memcpy(xlat,targ,len) ; xlat += len ; } else FrWarningVA("mismatch in r.e. replacement: %%%c",c) ; } else if (c) *xlat++ = *++repl ; else break ; } else *xlat++ = c ; } *xlat = '\0' ; result = new FrString(translation) ; } else result = 0 ; for (size_t j = 0 ; j < lengthof(groups) ; j++) if (groups[j]) FrFree(groups[j]) ; return result ; }
static char *locate_in_fallback_directory(const char *filename) { if (fallback_dir && *fallback_dir) { char *path = FrAddDefaultPath(FrFileBasename(filename),fallback_dir) ; if (path && FrFileExists(path)) return path ; FrFree(path) ; } return 0 ; }
void FrWordIDList::freeIDList() { FrWordIDList *list = this ; while (list) { FrWordIDList *tmp = list ; list = list->next() ; FrFree(tmp) ; } return ; }
FrSymbol *FrNumber::coerce2symbol(FrCharEncoding) const { char *buf = print() ; if (buf) { FrSymbol *sym = FrSymbolTable::add(buf) ; FrFree(buf) ; return sym ; } else return 0 ; }
void FrTextSpan::updateText(char *new_text, bool copy_text) { FrSymbol *symINIT = FrSymbolTable::add(init_text_tag) ; if (!getMetaData(symINIT)) { char *txt = getText() ; setMetaData(symINIT,new FrString(txt,strlen(txt),1,false),false) ; } FrFree(m_text) ; m_text = copy_text ? FrDupString(new_text) : new_text ; return ; }
void FrTextSpans::makeWordSpans(const char *text, FrCharEncoding enc, const char *word_delim) { m_text = FrDupString(text) ; if (m_text) { if (!setPositionMap()) { m_textlength = 0 ; return ; } char *canon = FrCanonicalizeSentence(m_text,enc,false,word_delim) ; if (canon && *canon) { m_spancount = 1 ; for (char *cptr = canon ; *cptr ; cptr++) { if (' ' == *cptr) m_spancount++ ; } m_spans = FrNewN(FrTextSpan,m_spancount) ; if (m_spans) { size_t tpos = 0 ; size_t cpos = 0 ; size_t start = 0 ; size_t end = 0 ; for (size_t i = 0 ; i < m_spancount ; i++) { // scan over the nonwhitespace chars at the current location for ( ; canon[cpos] && canon[cpos] != ' ' ; cpos++) { tpos++ ; end++ ; // counts toward m_positions index } // skip over any trailing whitespace while (m_text[tpos] && Fr_isspace(m_text[tpos])) tpos++ ; if (canon[cpos] == ' ') cpos++ ; m_spans[i].init(start,end-1,DEFAULT_SCORE,DEFAULT_WEIGHT, 0,this) ; m_sorted = false ; start = end ; } } } FrFree(canon) ; } return ; }
void FrTextSpan::updateText(FrTextSpanUpdateFn *fn) { if (fn) { FrSymbol *symINIT = FrSymbolTable::add(init_text_tag) ; if (!getMetaData(symINIT)) { char *txt = getText() ; setMetaData(symINIT,new FrString(txt,strlen(txt),1,false),false) ; } if (m_text) { char *new_text = fn(m_text) ; if (!new_text) return ; if (new_text != m_text) { FrFree(m_text) ; m_text = new_text ; } return ; } else { char *old_text = getText() ; char *new_text = fn(old_text) ; if (!new_text) { FrFree(old_text) ; return ; } if (new_text != old_text) FrFree(old_text) ; m_text = new_text ; } } return ; }
FrRegExp::~FrRegExp() { delete regex ; regex = 0 ; FrFree(replacement) ; replacement = 0 ; while (_classes) { FrCons *cl = (FrCons*)poplist(_classes) ; if (cl) { FrRegExClass *re_class = (FrRegExClass*)cl->consCdr() ; cl->freeObject() ; delete re_class ; } } _token = 0 ; return ; }
static void hash_remove(HashRequestOrder *order) { my_job_id = order->id ; size_t slice_end = order->slice_start + order->slice_size ; HashT *ht = (HashT*)order->ht ; KeyT *syms = (KeyT*)order->syms ; for (size_t i = order->slice_start ; i < slice_end ; ++i) { if (!ht->remove(syms[i]) && order->strict) { char *msg = Fr_aprintf("; Job %ld encountered missing symbol @ %ld.\n",order->id,i) ; cerr << msg << flush ; FrFree(msg) ; } } // ht->reclaimDeletions() ; return ; }
static void expected_right_paren(const FrList *list) { size_t len = list->listlength() ; FrObject *listhead = list->subseq(0,5) ; char *printed = listhead->print() ; free_object(listhead) ; if (!printed) printed = FrDupString("") ; const char *cont = "" ; if (len > 6) { cont = " ..." ; strchr(printed,'\0')[-1] = '\0' ; } FrWarningVA("malformed list (expected right parenthesis),\n\tread %s%s", printed,cont) ; FrFree(printed) ; return ; }
static void hash_random_add(HashRequestOrder *order) { my_job_id = order->id ; HashT *ht = (HashT*)order->ht ; KeyT *syms = (KeyT*)order->syms ; uint32_t *randnums = order->randnums + order->slice_start ; for (size_t i = 0 ; i < order->slice_size ; ++i) { size_t which = randnums[i] ; (void)ht->add(syms[which]) ; } if (order->m_verbose) { char *msg = Fr_aprintf("; Job %ld cycle %ld complete.\n",order->id,order->current_cycle) ; cout << msg << flush ; FrFree(msg) ; } return ; }
char *FrLocateFile(const char *filename, va_list args) { char *file = locate_in_default_directories(filename) ; if (file) return file ; FrVarArg(const char *,dir) ; while (dir) { if (dir) { char *path = FrAddDefaultPath(filename,dir) ; if (path && FrFileExists(path)) return path ; FrFree(path) ; } dir = va_arg(args,const char *) ; } // if we get here, the file was not found in any of the directories, so // make one last attempt at the fallback location return locate_in_fallback_directory(filename) ; }
char *FrLocateFile(const char *filename, const FrList *directories) { char *file = locate_in_default_directories(filename) ; if (file) return file ; // finally, check the given fallback directories in turn for ( ; directories ; directories = directories->rest()) { const char *directory = FrPrintableName(directories->first()) ; if (directory) { char *path = FrAddDefaultPath(filename,directory) ; if (path && FrFileExists(path)) return path ; FrFree(path) ; } } // if we get here, the file was not found in any of the directories, so // make one last attempt at the fallback location return locate_in_fallback_directory(filename) ; }
static void hash_checksyms(HashRequestOrder *order) { my_job_id = order->id ; size_t slice_end = order->slice_start + order->slice_size ; KeyT *syms = (KeyT*)order->syms ; for (size_t i = order->slice_start ; i < slice_end ; ++i) { if (!find_Symbol(syms[i]) && order->strict) { char buf[300] ; Fr_sprintf(buf,sizeof(buf),"; Job %d - missing symbol %s\n", (int)order->id,sym_name(syms[i])) ; cerr << buf << flush ; } } if (order->m_verbose) { char *msg = Fr_aprintf("; Job %ld cycle %ld complete.\n",order->id,order->current_cycle) ; cout << msg << flush ; FrFree(msg) ; } return ; }
static const char *re_match(const FrRegExElt *re, const char *candidate, size_t min_reps, size_t &max_reps, char *&match, const char *matchbuf_end, char **groups, size_t num_groups) { assertq(re != 0 && match != 0 && matchbuf_end != 0) ; switch (re->reType()) { case FrRegExElt::End: // match end of word if (*candidate) { max_reps = 0 ; // uh oh, not at end of word.... return 0 ; } else { max_reps = 1 ; return candidate ; } case FrRegExElt::Char: { char c = Fr_toupper(re->getChar()) ; size_t i ; for (i = 0 ; i < max_reps ; i++) { if (Fr_toupper(*candidate) != c) break ; if (match < matchbuf_end) *match++ = *candidate ; candidate++ ; } max_reps = i ; if (i >= min_reps ) return candidate ; else return 0 ; } case FrRegExElt::CharSet: { const char *set = re->getCharSet() ; if (!set) { max_reps = 0 ; return 0 ; } size_t i ; for (i = 0 ; i < max_reps && *candidate ; i++) { if (!set[*(unsigned char*)candidate]) break ; if (match < matchbuf_end) *match++ = *candidate ; candidate++ ; } max_reps = i ; if (i >= min_reps) return candidate ; else return 0 ; } case FrRegExElt::String: { const char *string = re->getString() ; size_t len = re->stringLength() ; if (!string) { max_reps = 0 ; return 0 ; } size_t i ; for (i = 0 ; i < max_reps ; i++) { if (Fr_memicmp(candidate,string,len) != 0) break ; const char *tstr = string+len+1 ; size_t tlen = strlen(tstr) ; if (match+tlen < matchbuf_end) { memcpy(match,tstr,tlen) ; match += tlen ; } candidate += len ; } max_reps = i ; if (i >= min_reps) return candidate ; else return 0 ; } case FrRegExElt::Alt: { FrRegExElt **alts = re->getAlternatives() ; char *matchbuf = match ; const char *end = re_match_alt(alts,candidate,min_reps,max_reps, match,matchbuf_end,groups,num_groups) ; if (max_reps >= min_reps) { int group_num = re->groupNumber() ; if (end && group_num >= 0 && group_num < (int)num_groups) { // since the r.e. compiler ensures that only alternations can // be used for grouping, we can get away with only recording // groupings right here *match = '\0' ; FrFree(groups[group_num]) ; groups[group_num] = FrDupString(matchbuf) ; } return end ; } else return 0 ; } case FrRegExElt::Class: { const char *end = re_match_class(re,candidate,min_reps,max_reps, match,matchbuf_end,groups, num_groups) ; return (max_reps >= min_reps) ? end : 0 ; } case FrRegExElt::Accept: return strchr(candidate,'\0') ; default: max_reps = 0 ; FrMissedCase("re_match") ; return 0 ; } }
static FrRegExElt *compile_simple(const char *&re, bool in_alt) { assertq(*re != '\0' && *re != '.') ; const char *orig_re = re ; const char *regex = re ; size_t stringlen = 0 ; const char *xlat = 0 ; size_t translen = 0 ; while (*regex) { char c = *regex ; // check whether we're escaping a special character if (c == FrRE_QUOTE) { if (regex[1]) { regex++ ; stringlen++ ; } } // the wildcard chacter ends the simple RE else if (c == '.') break ; // as do repetition specifiers else if (c == FrRE_MULTIPLE || c == FrRE_KLEENE || c == FrRE_OPTIONAL || c == FrRE_COUNT_BEG) { if (stringlen > 1) { stringlen-- ; regex-- ; // check for an escaped special character if (regex > re && regex[-1] == FrRE_QUOTE) regex-- ; break ; } else if (stringlen > 0) break ; stringlen++ ; } else if (c == FrRE_CHARSET_BEG || c == FrRE_ALT_BEG) break ; else if (in_alt) { if (c == '|') { // we have a replacement string for the current alternative const char *tmp = regex+1 ; translen = 0 ; while (*tmp) { // check for an escaped special character if (*tmp == FrRE_QUOTE) { if (tmp[1]) { tmp++ ; translen++ ; } } else if (*tmp == FrRE_ALT_SEP || *tmp == FrRE_ALT_END) break ; else translen++ ; tmp++ ; } xlat = regex+1 ; regex = tmp ; break ; } // the simple RE ends with the end of the current alternative else if (c == FrRE_ALT_SEP || c == FrRE_ALT_END) break ; else stringlen++ ; } else stringlen++ ; if (*regex) regex++ ; } assertq(stringlen > 0) ; if (!xlat) { xlat = orig_re ; translen = stringlen ; } char *string = FrNewN(char,stringlen+1) ; char *trans = FrNewN(char,translen+1) ; if (string) { const char *s = re ; for (size_t i = 0 ; i < stringlen ; i++) { // check for an escaped special character if (*s == FrRE_QUOTE) s++ ; string[i] = *s++ ; } } else string = (char*)re ; if (trans) { const char *t = xlat ; for (size_t i = 0 ; i < translen ; i++) { // check for an escaped special character if (*t == FrRE_QUOTE) t++ ; trans[i] = *t++ ; } } else trans = (char*)xlat ; FrRegExElt *elt ; if (stringlen == 1 && translen == 1 && *string == *trans) elt = new FrRegExElt(*string) ; else elt = new FrRegExElt(string,stringlen,trans,translen) ; if (string != re) FrFree(string) ; if (trans != xlat) FrFree(trans) ; re = regex ; return elt ; }
char *FrTextSpans::getText(size_t start, size_t end) { if (end < start) end = start ; if (m_text) { if (m_positions && end < m_textlength) { start = m_positions[start] ; end = m_positions[end+1] ; } // strip trailing whitespace left by the m_positions mapping while (end > start && Fr_isspace(m_text[end-1])) end-- ; char *buf = FrNewN(char,end-start+1) ; if (buf) { strncpy(buf,m_text+start,end-start) ; buf[end-start] = '\0' ; } return buf ; } // no original text stored, so try to assemble a string from the spans, // if all spans are unambiguous sort() ; // ensure that spans are sorted by posn bool unambig = true ; for (size_t i = start ; i <= end ; i++) { if (m_positions[i+1] > m_positions[i] + 1) { unambig = false ; break ; } } if (unambig) { size_t i ; size_t len = 0 ; for (i = start ; i <= end ; i++) { char *text = m_spans[i].originalText() ; if (text) { len += strlen(text) + 1 ; FrFree(text) ; } } char *buf = FrNewN(char,len+1) ; if (buf) { *buf = '\0' ; char *bufptr = buf ; for (i = start ; i <= end ; i++) { char *text = m_spans[i].originalText() ; if (text) { len = strlen(text) ; memcpy(bufptr,text,len) ; bufptr[len] = ' ' ; bufptr += len + 1 ; } } if (bufptr > buf) // change trailing blank into string bufptr[-1] = '\0' ; // terminator } return buf ; } // if we get here, we were unable to satisfy the request return 0 ; }
static void hash_test(FrThreadPool *user_pool, ostream &out, size_t threads, size_t cycles, HashT *ht, size_t maxsize, KeyT *syms, enum Operation op, bool terse, bool strict = true, uint32_t *randnums = 0) { FrThreadPool *tpool = user_pool ? user_pool : new FrThreadPool(threads) ; bool must_wait = (threads != 0) ; if (threads == 0) threads = 1 ; HashRequestOrder *hashorders = FrNewC(HashRequestOrder,threads) ; //out << " Dispatching threads" << endl ; size_t slice_size = (maxsize + threads/2) / threads ; if (ht) { ht->clearGlobalStats() ; ht->clearPerThreadStats() ; } FrElapsedTimer etimer ; FrTimer timer ; for (size_t i = 0 ; i < threads ; ++i) { hashorders[i].op = op ; hashorders[i].size = maxsize ; hashorders[i].ht = (void*)ht ; hashorders[i].syms = (FrSymbol**)syms ; hashorders[i].randnums = randnums ; hashorders[i].strict = strict ; hashorders[i].m_verbose = false ; hashorders[i].m_terse = terse ; hashorders[i].cycles = cycles ; hashorders[i].id = i+1 ; hashorders[i].threads = threads ; hashorders[i].pool = tpool ; hashorders[i].slice_start = i * slice_size ; hashorders[i].slice_size = (i+1 < threads) ? slice_size : (maxsize - hashorders[i].slice_start) ; hashorders[i].extra_arg = 0 ; hashorders[i].total_ops = 0 ; switch (op) { case Op_GENSYM: hashorders[i].func = hash_gensym ; break ; case Op_ADD: hashorders[i].func = hash_add<HashT,KeyT> ; break ; case Op_CHECK: hashorders[i].func = hash_check<HashT,KeyT> ; break ; case Op_CHECKMISS: hashorders[i].func = hash_check<HashT,KeyT> ; hashorders[i].extra_arg = 1 ; break ; case Op_CHECKSYMS: hashorders[i].func = hash_checksyms<HashT,KeyT> ; break ; case Op_REMOVE: hashorders[i].func = hash_remove<HashT,KeyT> ; break ; case Op_RANDOM: hashorders[i].func = hash_random<HashT,KeyT> ; hashorders[i].extra_arg = 3 ; break ; case Op_RANDOM_LOWREMOVE: hashorders[i].func = hash_random<HashT,KeyT> ; hashorders[i].extra_arg = 1 ; break ; case Op_RANDOM_HIGHREMOVE: hashorders[i].func = hash_random<HashT,KeyT> ; hashorders[i].extra_arg = 7 ; break ; case Op_RANDOM_NOREMOVE: hashorders[i].func = hash_random<HashT,KeyT> ; break ; case Op_RANDOM_ADDONLY: hashorders[i].func = hash_random_add<HashT,KeyT> ; break ; default: FrMissedCase("hash_test") ; } tpool->dispatch(&hash_dispatch<HashT>,&hashorders[i],0) ; } if (must_wait) { if (!terse) out << " Waiting for thread completion" << endl ; tpool->waitUntilIdle() ; } double walltime_noreclaim = etimer.read() ; if (ht && op == Op_REMOVE) { ht->reclaimDeletions() ; ht->updateGlobalStats() ; } double time = timer.readsec() ; double walltime = etimer.stop() ; if (!user_pool) delete tpool ; size_t ops = cycles * maxsize ; if (op == Op_RANDOM || op == Op_RANDOM_LOWREMOVE || op == Op_RANDOM_HIGHREMOVE || op == Op_RANDOM_NOREMOVE) { // sum up the per-thread counts of operations performed ops = 0 ; for (size_t i = 0 ; i < threads ; ++i) { FrCriticalSection::increment(ops,hashorders[i].total_ops) ; } } FrFree(hashorders) ; walltime = (round(10000*walltime)/10000) ; if (time <= 0.0) time = 0.00001 ; if (walltime <= 0.0) walltime = 0.00001 ; out << " Time: " << walltime << "s, " << time << "s CPU (" << 100.0*(time/walltime) << "%), " ; pretty_print((size_t)(ops / walltime),out) ; out << " ops/sec" << endl ; if (op == Op_REMOVE) { out << " RwTm: " << walltime_noreclaim << "s without reclamation (" ; pretty_print((size_t)(ops / walltime_noreclaim),out) ; out << " ops/sec)" << endl ; } // verify success size_t size = ht ? ht->currentSize() : 0 ; size_t count = ht ? ht->countItems() : 0 ; size_t deleted = ht ? ht->countDeletedItems() : 0 ; if (size != count) { out << "'size' and 'count' disagree! " << size << " vs " << count << endl ; } if (op == Op_ADD) { if (size > maxsize) out << " " << (size-maxsize) << "spurious additions to hash table!" << endl ; else if (size< maxsize) out << " Failed to add " << (maxsize-size) << " items to hash table!" << endl ; } if (op == Op_REMOVE || op == Op_RANDOM) { if (deleted > 0) { if (ht) ht->reclaimDeletions() ; out << " Pending deletions: " << deleted << " marked for deletion, " << (ht ? ht->countDeletedItems() : 0) << " after reclamation" << endl ; } } if (op == Op_REMOVE) { if (size != 0) out << " Hash table was not emptied! " << size << " items remain (activeitems=" << count << ")." << endl ; } if (!ht) return ; #ifdef FrHASHTABLE_STATS size_t stat_ins = ht->numberOfInsertions() ; size_t stat_ins_dup = ht->numberOfDupInsertions() ; size_t stat_ins_att = ht->numberOfInsertionAttempts() ; size_t stat_ins_forw = ht->numberOfForwardedInsertions() ; size_t stat_ins_resize = ht->numberOfResizeInsertions() ; size_t stat_cont = ht->numberOfContainsCalls() ; size_t stat_cont_succ = ht->numberOfSuccessfulContains() ; size_t stat_cont_forw = ht->numberOfForwardedContains() ; size_t stat_lookup = ht->numberOfLookups() ; size_t stat_lookup_succ = ht->numberOfSuccessfulLookups() ; size_t stat_lookup_forw = ht->numberOfForwardedLookups() ; size_t stat_rem = ht->numberOfRemovals() ; size_t stat_rem_count = ht->numberOfItemsRemoved() ; size_t stat_rem_forw = ht->numberOfForwardedRemovals() ; size_t stat_resize = ht->numberOfResizes() ; size_t stat_resize_assist = ht->numberOfResizeAssists() ; size_t stat_reclam = ht->numberOfReclamations() ; size_t stat_moves = ht->numberOfEntriesMoved() ; size_t stat_full = ht->numberOfFullNeighborhoods() ; size_t stat_chain = ht->numberOfChainLocks() ; size_t stat_chain_coll = ht->numberOfChainLockCollisions() ; size_t retries = (stat_ins_att >= stat_ins - stat_ins_dup) ? stat_ins_att - (stat_ins - stat_ins_dup) : 0 ; out << " Stat: " << (stat_ins-stat_ins_forw) << "+" << stat_ins_forw << " ins (" << stat_ins_dup << " dup, " << retries << " retry, " << stat_ins_resize << " resz), " << stat_cont_succ << '/' << stat_cont << '+' << stat_cont_forw << " cont, " << stat_lookup_succ << '/' << stat_lookup << '+' << stat_lookup_forw << " look, " << stat_rem_count << '/' << stat_rem << '+' << stat_rem_forw << " rem" << endl ; out << " Admn: " << stat_resize << " resizes (" << stat_resize_assist << " assists), " << stat_full << " congest, " << stat_reclam << " reclam, " << stat_moves << " moves, " << stat_chain_coll << '/' << stat_chain << " chainlock" << endl ; #ifdef FrMULTITHREAD size_t stat_spin = ht->numberOfSpins() ; size_t stat_yield = ht->numberOfYields() ; size_t stat_sleep = ht->numberOfSleeps() ; size_t stat_CAS = ht->numberOfCASCollisions() ; size_t stat_resize_cleanup = ht->numberOfResizeCleanups() ; out << " Thrd: " << stat_spin << " spins, " << stat_yield << " yields, " << stat_sleep << " sleeps, " << stat_CAS << " CAS, " << stat_resize_cleanup << " resize cleanups" << endl ; #endif /* FrMULTITHREAD */ #endif /* FrHASHTABLE_STATS */ return ; }
bool FrBWTIndex::compress() { if (m_compressed) return true ; m_bucketsize = DEFAULT_BUCKET_SIZE ; m_maxdelta = 255 - m_bucketsize ; m_numbuckets = (numItems() + bucketsize() - 1) / bucketsize() ; // figure out how big the pool of absolute pointers will be uint32_t prev_succ = ~0 ; size_t abs_pointers = 0 ; size_t comp_EORs = 0 ; m_poolsize = 0 ; FrAdviseMemoryUse(m_items,bytesPerPointer()*numItems(),FrMADV_SEQUENTIAL) ; for (size_t i = 0 ; i < numItems() ; i++) { if ((i % m_bucketsize) == 0) { abs_pointers = 0 ; comp_EORs = 0 ; } uint32_t succ = getUncompSuccessor(i) ; if (succ == m_EOR || (succ > m_EOR && m_eor_state == FrBWT_MergeEOR)) comp_EORs++ ; // will be stored without using an absolute pointer else if (succ <= prev_succ || succ - prev_succ > m_maxdelta || ((i+1)%m_bucketsize == 0 && (abs_pointers + comp_EORs == 0))) { // above enforces at least one absolute pointer per bucket abs_pointers++ ; m_poolsize++ ; } prev_succ = succ ; } size_t bpp = bytesPerPointer() ; // now that we know how big the pool is, check whether we will actually // save any space by compressing if ((m_poolsize + m_numbuckets) * bpp + numItems() >= numItems() * bpp) return false ; // can't (usefully) compress // allocate the various buffers for the compressed data m_buckets = FrNewN(char,bpp * m_numbuckets) ; unsigned char *comp_items = FrNewN(unsigned char,numItems()) ; m_bucket_pool = FrNewN(char,bpp * m_poolsize) ; if (comp_items && m_buckets && m_bucket_pool) { size_t bucket = 0 ; size_t ptr_count = 0 ; size_t ptr_index = 0 ; prev_succ = ~0 ; for (size_t i = 0 ; i < numItems() ; i++) { if ((i % m_bucketsize) == 0) { FrStoreLong(ptr_count,m_buckets + bpp * bucket++) ; ptr_index = 0 ; comp_EORs = 0 ; } if ((i % CHUNK_SIZE) == 0 && i > 0) { // let OS know we're done with another chunk of m_items FrDontNeedMemory(m_items + bpp*(i-CHUNK_SIZE), bpp*CHUNK_SIZE, (i > CHUNK_SIZE)) ; // and tell it to prefetch the next chunk FrWillNeedMemory(m_items + bpp*i, bpp*CHUNK_SIZE) ; } uint32_t succ = getUncompSuccessor(i) ; if (succ == m_EOR || (succ > m_EOR && m_eor_state == FrBWT_MergeEOR)) { comp_items[i] = COMPRESSED_EOR ; comp_EORs++ ; } else if (succ <= prev_succ || succ - prev_succ > m_maxdelta || ((i+1)%m_bucketsize == 0 && (ptr_index + comp_EORs == 0))) // (above ensures at least one abs.ptr per bucket) { FrStoreLong(succ,m_bucket_pool + bpp * ptr_count++); comp_items[i] = (unsigned char)(m_maxdelta + (++ptr_index)) ; } else comp_items[i] = (unsigned char)(succ - prev_succ) ; prev_succ = succ ; } assertq(ptr_count == m_poolsize) ; if (!m_fmap) FrFree(m_items) ; m_items = comp_items ; m_compressed = true ; return true ; } else // memory alloc failed { FrWarning("out of memory while compressing index, " "will remain uncompressed") ; FrFree(comp_items) ; FrFree(m_buckets) ; m_buckets = 0 ; FrFree(m_bucket_pool) ; m_bucket_pool = 0 ; m_numbuckets = 0 ; m_poolsize = 0 ; return false ; } }