Exemple #1
0
FrTextSpan *FrTextSpan::merge(const FrTextSpan *span1, const FrTextSpan *span2,
			 FrTextSpan_Operation score_op,
			 FrTextSpan_Operation weight_op)
{
   if (span1 && span2 && span1->pastEnd() == span2->start())
      {
      if (weight_op == FrTSOp_Default)
	 weight_op = score_op ;
      size_t len1 = strlen(span1->text()) ;
      size_t len2 = strlen(span2->text()) ;
      char *text = FrNewN(char,len1+len2+2) ;
      if (!text)
	 return 0 ;
      FrTextSpan *span = new FrTextSpan ;
      if (!span)
	 {
	 FrFree(text) ;
	 return 0 ;
	 }
      memcpy(text,span1->text(),len1) ;
      text[len1] = ' ' ;
      memcpy(text+len1+1,span2->text(),len2+1) ;
      span->m_text = text ;
      span->m_start = span1->start() ;
      span->m_end = span2->end() ;
      span->m_score = combine_values(span1->score(),span2->score(),score_op) ;
      span->m_weight = combine_values(span1->weight(),span2->weight(),
				      weight_op) ;
      return span ;
      }
   return 0 ;
}
Exemple #2
0
size_t FrTextSpan::wordCountOriginal() const
{
   char *words = originalText() ;
   size_t count = word_count(words) ;
   FrFree(words) ;
   return count ;
}
Exemple #3
0
static bool write_tfidf(FrSymHashEntry *entry, va_list args)
{
   if (entry)
      {
      FrVarArg(FILE*,fp) ;
      const FrSymbol *term = entry->getName() ;
      FrTFIDFrecord *rec = (FrTFIDFrecord*)entry->getUserData() ;
      if (term && rec && rec->termFrequency() > 0 && rec->docFrequency() > 0)
	 {
	 FrVarArg2(bool,int,verbosely) ;
	 char *termname = term->print() ;
	 if (verbosely)
	    {
	    FrVarArg(size_t,total_docs) ;
	    fprintf(fp,"%s %ld %ld %g\n",termname,
		    (unsigned long)rec->termFrequency(),
		    (unsigned long)rec->docFrequency(),
		    rec->TF_IDF(total_docs)) ;
	    }
	 else
	    fprintf(fp,"%s %ld %ld\n",termname,
		    (unsigned long)rec->termFrequency(),
		    (unsigned long)rec->docFrequency()) ;
	 FrFree(termname) ;
	 }
      }
   return true ;			// continue iterating
}
Exemple #4
0
void FramepaC_set_db_dir(const char *dir)
{
   FrFree(db_directory) ;
   if (!dir)
      dir = "." ;
   db_directory = FrDupString(dir) ;
   return ;
}
Exemple #5
0
FrBoundedPriQueue::~FrBoundedPriQueue()
{
   if (p_allocator && q_size == alloc_size)
      p_allocator->release(priorities) ;
   else
      FrFree(priorities) ;
   if (e_allocator && q_size == alloc_size)
      e_allocator->release(entries) ;
   else
      FrFree(entries) ;
   q_size = q_head = q_tail = 0 ;
   priorities = 0 ;
   entries = nullptr ;
   sort_descending = true ;
   copy_objects = false ;
   return ;
}
Exemple #6
0
FrObject *FrRegExp::match(const char *word) const
{
   if (!regex || !word || !*word)
      return 0 ;
   char *groups[10] ;
   for (size_t i = 0 ; i < lengthof(groups) ; i++)
      groups[i] = 0 ;
//   char *end = strchr(word,'\0') ;
   FrObject *result ;
   char *matchbuf = 0 ;
//   if (re_match(regex,word,matchbuf,0,groups,lengthof(groups)) == end)
   const char *end ;
   if ((end = re_match(regex,word,matchbuf,0,groups,lengthof(groups))) != 0 &&
       !*end)
      {
      char translation[FrMAX_SYMBOLNAME_LEN+1] ;
      char *trans_end = &translation[FrMAX_SYMBOLNAME_LEN] ;
      char *xlat = translation ;
      const char *repl ;
      for (repl = replacement ; *repl && xlat < trans_end ; repl++)
	 {
	 char c = *repl ;
	 if (c == FrRE_QUOTE)
	    {
	    // escape-char plus digit specifies a replacement taken from the
	    //   source match
	    c = *++repl ;
	    if (Fr_isdigit(c))
	       {
	       const char *targ = groups[c-'0'] ;
	       if (targ)
		  {
		  size_t len = strlen(targ) ;
		  memcpy(xlat,targ,len) ;
		  xlat += len ;
		  }
	       else
		  FrWarningVA("mismatch in r.e. replacement: %%%c",c) ;
	       }
	    else if (c)
	       *xlat++ = *++repl ;
	    else
	       break ;
	    }
	 else
	    *xlat++ = c ;
	 }
      *xlat = '\0' ;
      result = new FrString(translation) ;
      }
   else
      result = 0 ;
   for (size_t j = 0 ; j < lengthof(groups) ; j++)
      if (groups[j]) FrFree(groups[j]) ;
   return result ;
}
Exemple #7
0
static char *locate_in_fallback_directory(const char *filename)
{
   if (fallback_dir && *fallback_dir)
      {
      char *path = FrAddDefaultPath(FrFileBasename(filename),fallback_dir) ;
      if (path && FrFileExists(path))
	 return path ;
      FrFree(path) ;
      }
   return 0 ;
}
Exemple #8
0
void FrWordIDList::freeIDList()
{
   FrWordIDList *list = this ;
   while (list)
      {
      FrWordIDList *tmp = list ;
      list = list->next() ;
      FrFree(tmp) ;
      }
   return ;
}
Exemple #9
0
FrSymbol *FrNumber::coerce2symbol(FrCharEncoding) const
{
   char *buf = print() ;
   if (buf)
      {
      FrSymbol *sym = FrSymbolTable::add(buf) ;
      FrFree(buf) ;
      return sym ;
      }
   else
      return 0 ;
}
Exemple #10
0
void FrTextSpan::updateText(char *new_text, bool copy_text)
{
   FrSymbol *symINIT = FrSymbolTable::add(init_text_tag) ;
   if (!getMetaData(symINIT))
      {
      char *txt = getText() ;
      setMetaData(symINIT,new FrString(txt,strlen(txt),1,false),false) ;
      }
   FrFree(m_text) ;
   m_text = copy_text ? FrDupString(new_text) : new_text ;
   return ;
}
Exemple #11
0
void FrTextSpans::makeWordSpans(const char *text, FrCharEncoding enc,
				const char *word_delim)
{
   m_text = FrDupString(text) ;
   if (m_text)
      {
      if (!setPositionMap())
	 {
	 m_textlength = 0 ;
	 return ;
	 }
      char *canon = FrCanonicalizeSentence(m_text,enc,false,word_delim) ;
      if (canon && *canon)
	 {
	 m_spancount = 1 ;
	 for (char *cptr = canon ; *cptr ; cptr++)
	    {
	    if (' ' == *cptr)
	       m_spancount++ ;
	    }
	 m_spans = FrNewN(FrTextSpan,m_spancount) ;
	 if (m_spans)
	    {
	    size_t tpos = 0 ;
	    size_t cpos = 0 ;
	    size_t start = 0 ;
	    size_t end = 0 ;
	    for (size_t i = 0 ; i < m_spancount ; i++)
	       {
	       // scan over the nonwhitespace chars at the current location
	       for ( ; canon[cpos] && canon[cpos] != ' ' ; cpos++)
		  {
		  tpos++ ;
		  end++ ;		// counts toward m_positions index
		  }
	       // skip over any trailing whitespace
	       while (m_text[tpos] && Fr_isspace(m_text[tpos]))
		  tpos++ ;
	       if (canon[cpos] == ' ')
		  cpos++ ;
	       m_spans[i].init(start,end-1,DEFAULT_SCORE,DEFAULT_WEIGHT,
			       0,this) ;
	       m_sorted = false ;
	       start = end ;
	       }
	    }
	 }
      FrFree(canon) ;
      }
   return ;
}
Exemple #12
0
void FrTextSpan::updateText(FrTextSpanUpdateFn *fn)
{
   if (fn)
      {
      FrSymbol *symINIT = FrSymbolTable::add(init_text_tag) ;
      if (!getMetaData(symINIT))
	 {
	 char *txt = getText() ;
	 setMetaData(symINIT,new FrString(txt,strlen(txt),1,false),false) ;
	 }
      if (m_text)
	 {
	 char *new_text = fn(m_text) ;
	 if (!new_text)
	    return ;
	 if (new_text != m_text)
	    {
	    FrFree(m_text) ;
	    m_text = new_text ;
	    }
	 return ;
	 }
      else
	 {
	 char *old_text = getText() ;
	 char *new_text = fn(old_text) ;
	 if (!new_text)
	    {
	    FrFree(old_text) ;
	    return ;
	    }
	 if (new_text != old_text)
	    FrFree(old_text) ;
	 m_text = new_text ;
	 }
      }
   return ;
}
Exemple #13
0
FrRegExp::~FrRegExp()
{
   delete regex ; regex = 0 ;
   FrFree(replacement) ; replacement = 0 ;
   while (_classes)
      {
      FrCons *cl = (FrCons*)poplist(_classes) ;
      if (cl)
	 {
	 FrRegExClass *re_class = (FrRegExClass*)cl->consCdr() ;
	 cl->freeObject() ;
	 delete re_class ;
	 }
      }
   _token = 0 ;
   return ;
}
Exemple #14
0
static void hash_remove(HashRequestOrder *order)
{
   my_job_id = order->id ;
   size_t slice_end = order->slice_start + order->slice_size ;
   HashT *ht = (HashT*)order->ht ;
   KeyT *syms  = (KeyT*)order->syms ;
   for (size_t i = order->slice_start ; i < slice_end ; ++i)
      {
      if (!ht->remove(syms[i]) && order->strict)
	 {
	 char *msg = Fr_aprintf(";  Job %ld encountered missing symbol @ %ld.\n",order->id,i) ;
	 cerr << msg << flush ;
	 FrFree(msg) ;
	 }
      }
//   ht->reclaimDeletions() ;
   return ;
}
Exemple #15
0
static void expected_right_paren(const FrList *list)
{
   size_t len = list->listlength() ;
   FrObject *listhead = list->subseq(0,5) ;
   char *printed = listhead->print() ;
   free_object(listhead) ;
   if (!printed)
      printed = FrDupString("") ;
   const char *cont = "" ;
   if (len > 6)
      {
      cont = " ..." ;
      strchr(printed,'\0')[-1] = '\0' ;
      }
   FrWarningVA("malformed list (expected right parenthesis),\n\tread %s%s",
	       printed,cont) ;
   FrFree(printed) ;
   return ;
}
Exemple #16
0
static void hash_random_add(HashRequestOrder *order)
{
   my_job_id = order->id ;
   HashT *ht = (HashT*)order->ht ;
   KeyT *syms = (KeyT*)order->syms ;
   uint32_t *randnums = order->randnums + order->slice_start ;
   for (size_t i = 0 ; i < order->slice_size ; ++i)
      {
      size_t which = randnums[i] ;
      (void)ht->add(syms[which]) ;
      }
   if (order->m_verbose)
      {
      char *msg = Fr_aprintf(";  Job %ld cycle %ld complete.\n",order->id,order->current_cycle) ;
      cout << msg << flush ;
      FrFree(msg) ;
      }
   return ;
}
Exemple #17
0
char *FrLocateFile(const char *filename, va_list args)
{
   char *file = locate_in_default_directories(filename) ;
   if (file)
      return file ;
   FrVarArg(const char *,dir) ;
   while (dir)
      {
      if (dir)
	 {
	 char *path = FrAddDefaultPath(filename,dir) ;
	 if (path && FrFileExists(path))
	    return path ;
	 FrFree(path) ;
	 }
      dir = va_arg(args,const char *) ;
      }
   // if we get here, the file was not found in any of the directories, so
   //   make one last attempt at the fallback location
   return locate_in_fallback_directory(filename) ;
}
Exemple #18
0
char *FrLocateFile(const char *filename, const FrList *directories)
{
   char *file = locate_in_default_directories(filename) ;
   if (file)
      return file ;
   // finally, check the given fallback directories in turn
   for ( ; directories ; directories = directories->rest())
      {
      const char *directory = FrPrintableName(directories->first()) ;
      if (directory)
	 {
	 char *path = FrAddDefaultPath(filename,directory) ;
	 if (path && FrFileExists(path))
	    return path ;
	 FrFree(path) ;
	 }
      }
   // if we get here, the file was not found in any of the directories, so
   //   make one last attempt at the fallback location
   return locate_in_fallback_directory(filename) ;
}
Exemple #19
0
static void hash_checksyms(HashRequestOrder *order)
{
   my_job_id = order->id ;
   size_t slice_end = order->slice_start + order->slice_size ;
   KeyT *syms  = (KeyT*)order->syms ;
   for (size_t i = order->slice_start ; i < slice_end ; ++i)
      {
      if (!find_Symbol(syms[i]) && order->strict)
	 {
	 char buf[300] ;
	 Fr_sprintf(buf,sizeof(buf),";  Job %d - missing symbol %s\n",
		    (int)order->id,sym_name(syms[i])) ;
	 cerr << buf << flush ;
	 }
      }
   if (order->m_verbose)
      {
      char *msg = Fr_aprintf(";  Job %ld cycle %ld complete.\n",order->id,order->current_cycle) ;
      cout << msg << flush ;
      FrFree(msg) ;
      }
   return ;
}
Exemple #20
0
static const char *re_match(const FrRegExElt *re, const char *candidate,
			    size_t min_reps, size_t &max_reps,
			    char *&match, const char *matchbuf_end,
			    char **groups, size_t num_groups)
{
   assertq(re != 0 && match != 0 && matchbuf_end != 0) ;
   switch (re->reType())
      {
      case FrRegExElt::End:		// match end of word
	 if (*candidate)
	    {
	    max_reps = 0 ;		// uh oh, not at end of word....
	    return 0 ;
	    }
	 else
	    {
	    max_reps = 1 ;
	    return candidate ;
	    }
      case FrRegExElt::Char:
	 {
	 char c = Fr_toupper(re->getChar()) ;
	 size_t i ;
	 for (i = 0 ; i < max_reps ; i++)
	    {
	    if (Fr_toupper(*candidate) != c)
	       break ;
	    if (match < matchbuf_end)
	       *match++ = *candidate ;
	    candidate++ ;
	    }
	 max_reps = i ;
	 if (i >= min_reps )
	    return candidate ;
	 else
	    return 0 ;
	 }
      case FrRegExElt::CharSet:
	 {
	 const char *set = re->getCharSet() ;
	 if (!set)
	    {
	    max_reps = 0 ;
	    return 0 ;
	    }
	 size_t i ;
	 for (i = 0 ; i < max_reps && *candidate ; i++)
	    {
	    if (!set[*(unsigned char*)candidate])
	       break ;
	    if (match < matchbuf_end)
	       *match++ = *candidate ;
	    candidate++ ;
	    }
	 max_reps = i ;
	 if (i >= min_reps)
	    return candidate ;
	 else
	    return 0 ;
	 }
      case FrRegExElt::String:
	 {
	 const char *string = re->getString() ;
	 size_t len = re->stringLength() ;
	 if (!string)
	    {
	    max_reps = 0 ;
	    return 0 ;
	    }
	 size_t i ;
	 for (i = 0 ; i < max_reps ; i++)
	    {
	    if (Fr_memicmp(candidate,string,len) != 0)
	       break ;
	    const char *tstr = string+len+1 ;
	    size_t tlen = strlen(tstr) ;
	    if (match+tlen < matchbuf_end)
	       {
	       memcpy(match,tstr,tlen) ;
	       match += tlen ;
	       }
	    candidate += len ;
	    }
	 max_reps = i ;
	 if (i >= min_reps)
	    return candidate ;
	 else
	    return 0 ;
	 }
      case FrRegExElt::Alt:
	 {
	 FrRegExElt **alts = re->getAlternatives() ;
	 char *matchbuf = match ;
	 const char *end = re_match_alt(alts,candidate,min_reps,max_reps,
					match,matchbuf_end,groups,num_groups) ;
	 if (max_reps >= min_reps)
	    {
	    int group_num = re->groupNumber() ;
	    if (end && group_num >= 0 && group_num < (int)num_groups)
	       {
	       // since the r.e. compiler ensures that only alternations can
	       // be used for grouping, we can get away with only recording
	       // groupings right here
	       *match = '\0' ;
	       FrFree(groups[group_num]) ;
	       groups[group_num] = FrDupString(matchbuf) ;
	       }
	    return end ;
	    }
	 else
	    return 0 ;
	 }
      case FrRegExElt::Class:
         {
	 const char *end = re_match_class(re,candidate,min_reps,max_reps,
					  match,matchbuf_end,groups,
					  num_groups) ;
	 return (max_reps >= min_reps) ? end : 0 ;
	 }
      case FrRegExElt::Accept:
	 return strchr(candidate,'\0') ;
      default:
	 max_reps = 0 ;
	 FrMissedCase("re_match") ;
	 return 0 ;
      }
}
Exemple #21
0
static FrRegExElt *compile_simple(const char *&re, bool in_alt)
{
   assertq(*re != '\0' && *re != '.') ;
   const char *orig_re = re ;
   const char *regex = re ;
   size_t stringlen = 0 ;
   const char *xlat = 0 ;
   size_t translen = 0 ;
   while (*regex)
      {
      char c = *regex ;
      // check whether we're escaping a special character
      if (c == FrRE_QUOTE)
	 {
	 if (regex[1])
	    {
	    regex++ ;
	    stringlen++ ;
	    }
	 }
      // the wildcard chacter ends the simple RE
      else if (c == '.')
	 break ;
      // as do repetition specifiers
      else if (c == FrRE_MULTIPLE || c == FrRE_KLEENE || c == FrRE_OPTIONAL ||
	       c == FrRE_COUNT_BEG)
	 {
	 if (stringlen > 1)
	    {
	    stringlen-- ;
	    regex-- ;
	    // check for an escaped special character
	    if (regex > re && regex[-1] == FrRE_QUOTE)
	       regex-- ;
	    break ;
	    }
	 else if (stringlen > 0)
	    break ;
	 stringlen++ ;
	 }
      else if (c == FrRE_CHARSET_BEG || c == FrRE_ALT_BEG)
	 break ;
      else if (in_alt)
	 {
	 if (c == '|')
	    {
	    // we have a replacement string for the current alternative
	    const char *tmp = regex+1 ;
	    translen = 0 ;
	    while (*tmp)
	       {
	       // check for an escaped special character
	       if (*tmp == FrRE_QUOTE)
		  {
		  if (tmp[1])
		     {
		     tmp++ ;
		     translen++ ;
		     }
		  }
	       else if (*tmp == FrRE_ALT_SEP || *tmp == FrRE_ALT_END)
		  break ;
	       else
		  translen++ ;
	       tmp++ ;
	       }
	    xlat = regex+1 ;
	    regex = tmp ;
	    break ;
	    }
	 // the simple RE ends with the end of the current alternative
	 else if (c == FrRE_ALT_SEP || c == FrRE_ALT_END)
	    break ;
	 else
	    stringlen++ ;
	 }
      else
	 stringlen++ ;
      if (*regex)
	 regex++ ;
      }
   assertq(stringlen > 0) ;
   if (!xlat)
      {
      xlat = orig_re ;
      translen = stringlen ;
      }
   char *string = FrNewN(char,stringlen+1) ;
   char *trans = FrNewN(char,translen+1) ;
   if (string)
      {
      const char *s = re ;
      for (size_t i = 0 ; i < stringlen ; i++)
	 {
	 // check for an escaped special character
	 if (*s == FrRE_QUOTE)
	    s++ ;
	 string[i] = *s++ ;
	 }
      }
   else
      string = (char*)re ;
   if (trans)
      {
      const char *t = xlat ;
      for (size_t i = 0 ; i < translen ; i++)
	 {
	 // check for an escaped special character
	 if (*t == FrRE_QUOTE)
	    t++ ;
	 trans[i] = *t++ ;
	 }
      }
   else
      trans = (char*)xlat ;
   FrRegExElt *elt ;
   if (stringlen == 1 && translen == 1 && *string == *trans)
      elt = new FrRegExElt(*string) ;
   else
      elt = new FrRegExElt(string,stringlen,trans,translen) ;
   if (string != re)
      FrFree(string) ;
   if (trans != xlat)
      FrFree(trans) ;
   re = regex ;
   return elt ;
}
Exemple #22
0
char *FrTextSpans::getText(size_t start, size_t end)
{
   if (end < start)
      end = start ;
   if (m_text)
      {
      if (m_positions && end < m_textlength)
	 {
	 start = m_positions[start] ;
	 end = m_positions[end+1] ;
	 }
      // strip trailing whitespace left by the m_positions mapping
      while (end > start && Fr_isspace(m_text[end-1]))
	 end-- ;
      char *buf = FrNewN(char,end-start+1) ;
      if (buf)
	 {
	 strncpy(buf,m_text+start,end-start) ;
	 buf[end-start] = '\0' ;
	 }
      return buf ;
      }
   // no original text stored, so try to assemble a string from the spans,
   //   if all spans are unambiguous
   sort() ;				// ensure that spans are sorted by posn
   bool unambig = true ;
   for (size_t i = start ; i <= end ; i++)
      {
      if (m_positions[i+1] > m_positions[i] + 1)
	 {
	 unambig = false ;
	 break ;
	 }
      }
   if (unambig)
      {
      size_t i ;
      size_t len = 0 ;
      for (i = start ; i <= end ; i++)
	 {
	 char *text = m_spans[i].originalText() ;
	 if (text)
	    {
	    len += strlen(text) + 1 ;
	    FrFree(text) ;
	    }
	 }
      char *buf = FrNewN(char,len+1) ;
      if (buf)
	 {
	 *buf = '\0' ;
	 char *bufptr = buf ;
	 for (i = start ; i <= end ; i++)
	    {
	    char *text = m_spans[i].originalText() ;
	    if (text)
	       {
	       len = strlen(text) ;
	       memcpy(bufptr,text,len) ;
	       bufptr[len] = ' ' ;
	       bufptr += len + 1 ;
	       }
	    }
	 if (bufptr > buf)		// change trailing blank into string
	    bufptr[-1] = '\0' ;		//   terminator
	 }
      return buf ;
      }
   // if we get here, we were unable to satisfy the request
   return 0 ;
}
Exemple #23
0
static void hash_test(FrThreadPool *user_pool, ostream &out, size_t threads, size_t cycles, HashT *ht,
		      size_t maxsize, KeyT *syms, enum Operation op, bool terse, bool strict = true,
		      uint32_t *randnums = 0)
{
   FrThreadPool *tpool = user_pool ? user_pool : new FrThreadPool(threads) ;
   bool must_wait = (threads != 0) ;
   if (threads == 0) threads = 1 ;
   HashRequestOrder *hashorders = FrNewC(HashRequestOrder,threads) ;
   //out << "  Dispatching threads" << endl ;
   size_t slice_size = (maxsize + threads/2) / threads ;
   if (ht)
      {
      ht->clearGlobalStats() ;
      ht->clearPerThreadStats() ;
      }
   FrElapsedTimer etimer ;
   FrTimer timer ;
   for (size_t i = 0 ; i < threads ; ++i)
      {
      hashorders[i].op = op ;
      hashorders[i].size = maxsize ;
      hashorders[i].ht = (void*)ht ;
      hashorders[i].syms = (FrSymbol**)syms ;
      hashorders[i].randnums = randnums ;
      hashorders[i].strict = strict ;
      hashorders[i].m_verbose = false ;
      hashorders[i].m_terse = terse ;
      hashorders[i].cycles = cycles ;
      hashorders[i].id = i+1 ;
      hashorders[i].threads = threads ;
      hashorders[i].pool = tpool ;
      hashorders[i].slice_start = i * slice_size ;
      hashorders[i].slice_size = (i+1 < threads) ? slice_size : (maxsize - hashorders[i].slice_start) ;
      hashorders[i].extra_arg = 0 ;
      hashorders[i].total_ops = 0 ;
      switch (op)
	 {
	 case Op_GENSYM:
	    hashorders[i].func = hash_gensym ;
	    break ;
	 case Op_ADD:
	    hashorders[i].func = hash_add<HashT,KeyT> ;
	    break ;
	 case Op_CHECK:
	    hashorders[i].func = hash_check<HashT,KeyT> ;
	    break ;
	 case Op_CHECKMISS:
	    hashorders[i].func = hash_check<HashT,KeyT> ;
	    hashorders[i].extra_arg = 1 ;
	    break ;
	 case Op_CHECKSYMS:
	    hashorders[i].func = hash_checksyms<HashT,KeyT> ;
	    break ;
	 case Op_REMOVE:
	    hashorders[i].func = hash_remove<HashT,KeyT> ;
	    break ;
	 case Op_RANDOM:
	    hashorders[i].func = hash_random<HashT,KeyT> ;
	    hashorders[i].extra_arg = 3 ;
	    break ;
	 case Op_RANDOM_LOWREMOVE:
	    hashorders[i].func = hash_random<HashT,KeyT> ;
	    hashorders[i].extra_arg = 1 ;
	    break ;
	 case Op_RANDOM_HIGHREMOVE:
	    hashorders[i].func = hash_random<HashT,KeyT> ;
	    hashorders[i].extra_arg = 7 ;
	    break ;
	 case Op_RANDOM_NOREMOVE:
	    hashorders[i].func = hash_random<HashT,KeyT> ;
	    break ;
	 case Op_RANDOM_ADDONLY:
	    hashorders[i].func = hash_random_add<HashT,KeyT> ;
	    break ;
	 default:
	    FrMissedCase("hash_test") ;
	 }
      tpool->dispatch(&hash_dispatch<HashT>,&hashorders[i],0) ;
      }
   if (must_wait)
      {
      if (!terse)
	 out << "  Waiting for thread completion" << endl ;
      tpool->waitUntilIdle() ;
      }
   double walltime_noreclaim = etimer.read() ;
   if (ht && op == Op_REMOVE)
      {
      ht->reclaimDeletions() ;
      ht->updateGlobalStats() ;
      }
   double time = timer.readsec() ;
   double walltime = etimer.stop() ;
   if (!user_pool)
      delete tpool ;
   size_t ops = cycles * maxsize ;
   if (op == Op_RANDOM || op == Op_RANDOM_LOWREMOVE || op == Op_RANDOM_HIGHREMOVE ||
       op == Op_RANDOM_NOREMOVE)
      {
      // sum up the per-thread counts of operations performed
      ops = 0 ;
      for (size_t i = 0 ; i < threads ; ++i)
	 {
	 FrCriticalSection::increment(ops,hashorders[i].total_ops) ;
	 }
      }
   FrFree(hashorders) ;
   walltime = (round(10000*walltime)/10000) ;
   if (time <= 0.0) time = 0.00001 ;
   if (walltime <= 0.0) walltime = 0.00001 ;
   out << "  Time: " << walltime << "s, " << time << "s CPU (" << 100.0*(time/walltime) << "%), " ;
   pretty_print((size_t)(ops / walltime),out) ;
   out << " ops/sec" << endl ;
   if (op == Op_REMOVE)
      {
      out << "  RwTm: " << walltime_noreclaim << "s without reclamation (" ;
      pretty_print((size_t)(ops / walltime_noreclaim),out) ;
      out << " ops/sec)" << endl ;
      }
   // verify success
   size_t size = ht ? ht->currentSize() : 0 ;
   size_t count = ht ? ht->countItems() : 0 ;
   size_t deleted = ht ? ht->countDeletedItems() : 0 ;
   if (size != count)
      {
      out << "'size' and 'count' disagree!  " << size << " vs " << count << endl ;
      }
   if (op == Op_ADD)
      {
      if (size > maxsize)
	 out << "   " << (size-maxsize) <<  "spurious additions to hash table!" << endl ;
      else if (size< maxsize)
	 out << "   Failed to add " << (maxsize-size) << " items to hash table!" << endl ;
      }
   if (op == Op_REMOVE || op == Op_RANDOM)
      {
      if (deleted > 0)
	 {
	 if (ht) ht->reclaimDeletions() ;
	 out << "   Pending deletions: " << deleted << " marked for deletion, "
	     << (ht ? ht->countDeletedItems() : 0) << " after reclamation"
	     << endl ;
	 }
      }
   if (op == Op_REMOVE)
      {
      if (size != 0)
	 out << "   Hash table was not emptied!  " << size << " items remain (activeitems="
	     << count << ")." << endl ;
      }
   if (!ht)
      return ;
#ifdef FrHASHTABLE_STATS
   size_t stat_ins = ht->numberOfInsertions() ;
   size_t stat_ins_dup = ht->numberOfDupInsertions() ;
   size_t stat_ins_att = ht->numberOfInsertionAttempts() ;
   size_t stat_ins_forw = ht->numberOfForwardedInsertions() ;
   size_t stat_ins_resize = ht->numberOfResizeInsertions() ;
   size_t stat_cont = ht->numberOfContainsCalls() ;
   size_t stat_cont_succ = ht->numberOfSuccessfulContains() ;
   size_t stat_cont_forw = ht->numberOfForwardedContains() ;
   size_t stat_lookup = ht->numberOfLookups() ;
   size_t stat_lookup_succ = ht->numberOfSuccessfulLookups() ;
   size_t stat_lookup_forw = ht->numberOfForwardedLookups() ;
   size_t stat_rem = ht->numberOfRemovals() ;
   size_t stat_rem_count = ht->numberOfItemsRemoved() ;
   size_t stat_rem_forw = ht->numberOfForwardedRemovals() ;
   size_t stat_resize = ht->numberOfResizes() ;
   size_t stat_resize_assist = ht->numberOfResizeAssists() ;
   size_t stat_reclam = ht->numberOfReclamations() ;
   size_t stat_moves = ht->numberOfEntriesMoved() ;
   size_t stat_full = ht->numberOfFullNeighborhoods() ;
   size_t stat_chain = ht->numberOfChainLocks() ;
   size_t stat_chain_coll = ht->numberOfChainLockCollisions() ;
   size_t retries = (stat_ins_att >= stat_ins - stat_ins_dup) ? stat_ins_att - (stat_ins - stat_ins_dup) : 0 ;
   out << "  Stat: " << (stat_ins-stat_ins_forw) << "+" << stat_ins_forw << " ins (" 
       << stat_ins_dup << " dup, " << retries << " retry, " << stat_ins_resize << " resz), "
       << stat_cont_succ << '/' << stat_cont << '+' << stat_cont_forw << " cont, "
       << stat_lookup_succ << '/' << stat_lookup << '+' << stat_lookup_forw << " look, "
       << stat_rem_count << '/' << stat_rem << '+' << stat_rem_forw << " rem"
       << endl ;
   out << "  Admn: " << stat_resize << " resizes (" << stat_resize_assist << " assists), " << stat_full << " congest, "
       << stat_reclam << " reclam, " << stat_moves << " moves, "
       << stat_chain_coll << '/' << stat_chain << " chainlock" << endl ;
#ifdef FrMULTITHREAD
   size_t stat_spin = ht->numberOfSpins() ;
   size_t stat_yield = ht->numberOfYields() ;
   size_t stat_sleep = ht->numberOfSleeps() ;
   size_t stat_CAS = ht->numberOfCASCollisions() ;
   size_t stat_resize_cleanup = ht->numberOfResizeCleanups() ;
   out << "  Thrd: " << stat_spin << " spins, " << stat_yield << " yields, " << stat_sleep << " sleeps, "
       << stat_CAS << " CAS, " << stat_resize_cleanup << " resize cleanups" << endl ;
#endif /* FrMULTITHREAD */
#endif /* FrHASHTABLE_STATS */
   return  ;
}
Exemple #24
0
bool FrBWTIndex::compress()
{
   if (m_compressed)
      return true ;
   m_bucketsize = DEFAULT_BUCKET_SIZE ;
   m_maxdelta = 255 - m_bucketsize ;
   m_numbuckets = (numItems() + bucketsize() - 1) / bucketsize() ;

   // figure out how big the pool of absolute pointers will be
   uint32_t prev_succ = ~0 ;
   size_t abs_pointers = 0 ;
   size_t comp_EORs = 0 ;
   m_poolsize = 0 ;
   FrAdviseMemoryUse(m_items,bytesPerPointer()*numItems(),FrMADV_SEQUENTIAL) ;
   for (size_t i = 0 ; i < numItems() ; i++)
      {
      if ((i % m_bucketsize) == 0)
	 {
	 abs_pointers = 0 ;
	 comp_EORs = 0 ;
	 }
      uint32_t succ = getUncompSuccessor(i) ;
      if (succ == m_EOR || (succ > m_EOR && m_eor_state == FrBWT_MergeEOR))
	 comp_EORs++ ; // will be stored without using an absolute pointer
      else if (succ <= prev_succ ||
	       succ - prev_succ > m_maxdelta ||
	       ((i+1)%m_bucketsize == 0 && (abs_pointers + comp_EORs == 0)))
	 {      // above enforces at least one absolute pointer per bucket
	 abs_pointers++ ;
	 m_poolsize++ ;
	 }
      prev_succ = succ ;
      }

   size_t bpp = bytesPerPointer() ;
   // now that we know how big the pool is, check whether we will actually
   //   save any space by compressing
   if ((m_poolsize + m_numbuckets) * bpp + numItems() >= numItems() * bpp)
      return false ;			// can't (usefully) compress

   // allocate the various buffers for the compressed data
   m_buckets = FrNewN(char,bpp * m_numbuckets) ;
   unsigned char *comp_items = FrNewN(unsigned char,numItems()) ;
   m_bucket_pool = FrNewN(char,bpp * m_poolsize) ;
   if (comp_items && m_buckets && m_bucket_pool)
      {
      size_t bucket = 0 ;
      size_t ptr_count = 0 ;
      size_t ptr_index = 0 ;
      prev_succ = ~0 ;
      for (size_t i = 0 ; i < numItems() ; i++)
	 {
	 if ((i % m_bucketsize) == 0)
	    {
	    FrStoreLong(ptr_count,m_buckets + bpp * bucket++) ;
	    ptr_index = 0 ;
	    comp_EORs = 0 ;
	    }
	 if ((i % CHUNK_SIZE) == 0 && i > 0)
	    {
	    // let OS know we're done with another chunk of m_items
	    FrDontNeedMemory(m_items + bpp*(i-CHUNK_SIZE), bpp*CHUNK_SIZE,
			     (i > CHUNK_SIZE)) ;
	    // and tell it to prefetch the next chunk
	    FrWillNeedMemory(m_items + bpp*i, bpp*CHUNK_SIZE) ;
	    }
	 uint32_t succ = getUncompSuccessor(i) ;
	 if (succ == m_EOR ||
	     (succ > m_EOR && m_eor_state == FrBWT_MergeEOR))
	    {
	    comp_items[i] = COMPRESSED_EOR ;
	    comp_EORs++ ;
	    }
	 else if (succ <= prev_succ ||
		  succ - prev_succ > m_maxdelta ||
		  ((i+1)%m_bucketsize == 0 && (ptr_index + comp_EORs == 0)))
	    // (above ensures at least one abs.ptr per bucket)
	    {
	    FrStoreLong(succ,m_bucket_pool + bpp * ptr_count++);
	    comp_items[i] = (unsigned char)(m_maxdelta + (++ptr_index)) ;
	    }
	 else
	    comp_items[i] = (unsigned char)(succ - prev_succ) ;
	 prev_succ = succ ;
	 }
      assertq(ptr_count == m_poolsize) ;
      if (!m_fmap)
	 FrFree(m_items) ;
      m_items = comp_items ;
      m_compressed = true ;
      return true ;
      }
   else	// memory alloc failed
      {
      FrWarning("out of memory while compressing index, "
		"will remain uncompressed") ;
      FrFree(comp_items)  ;
      FrFree(m_buckets) ;	m_buckets = 0 ;
      FrFree(m_bucket_pool) ;	m_bucket_pool = 0 ;
      m_numbuckets = 0 ;
      m_poolsize = 0 ;
      return false ;
      }
}