예제 #1
0
static FrList *string_to_List(const char *&input, const char *)
{
   FrList *list, *prev ;
   FrObject *curr ;

   list = prev = 0 ;
   input++ ;			       // consume initial left parenthesis
   FramepaC_read_nesting_level++ ;
   char c ;
   while ((c = FrSkipWhitespace(input)) != ')' && c != '\0')
      {
      FrObject *obj = string_to_FrObject(input) ;
      if (obj == symbolPERIOD && FrSkipWhitespace(input) != ')')
	 {
	 // period is not last item, so check if it's a dotted pair
	 curr = string_to_FrObject(input) ;
	 // was period second-to-last in list?
	 if ((c = FrSkipWhitespace(input)) == ')')
	    {
	    if (!list)
	       prev = list = new FrList(0) ;
	    prev->replacd(curr) ;
	    break ;
	    }
	 else
	    {
            if (!list)
	       prev = list = new FrList(obj) ;
	    else
	       {
	       obj = new FrList(obj) ;
	       prev->replacd(obj) ;
	       prev = (FrList*)obj ;
	       }
	    obj = curr ;
            }
	 }
      curr = new FrList(obj) ;
      if (!list)
	 list = (FrList*)curr ;
      else
	 prev->replacd(curr) ;
      prev = (FrList*)curr ;
      }
   if (c != ')')
      expected_right_paren(list) ;
   else
      input++ ;
   if (--FramepaC_read_nesting_level <= 0 && FramepaC_read_associations)
      {
      FramepaC_read_associations->freeObject() ;
      FramepaC_read_associations = 0 ;
      }
   return list ;
}
예제 #2
0
static FrList *read_List(istream &input, const char *)
{
   FrList *list, *prev ;
   FrObject *curr ;

   list = prev = 0 ;
   input.get() ;		       // discard the initial left parenthesis
   FramepaC_read_nesting_level++ ;
   while (FrSkipWhitespace(input) != ')' && !input.eof() && !input.fail())
      {
      FrObject *obj = read_FrObject(input) ;
      if (obj == symbolPERIOD && FrSkipWhitespace(input) != ')')
	 {
	 // period is not last item, so check if it's a dotted pair
	 curr = read_FrObject(input) ;
	 // was period second-to-last in list?
	 if (FrSkipWhitespace(input) == ')')
	    {
	    if (!list)
	       prev = list = new FrList(0) ;
	    prev->replacd(curr) ;
	    break ;
	    }
	 else
	    {
            if (!list)
	       prev = list = new FrList(obj) ;
	    else
	       {
	       obj = new FrList(obj) ;
	       prev->replacd(obj) ;
	       prev = (FrList*)obj ;
	       }
	    obj = curr ;
            }
	 }
      curr = new FrList(obj) ;
      if (!list)
	 list = (FrList*)curr ;
      else
	 prev->replacd(curr) ;
      prev = (FrList*)curr ;
      }
   if (input.get() != ')')
      expected_right_paren(list) ;
   if (--FramepaC_read_nesting_level <= 0 && FramepaC_read_associations)
      {
      FramepaC_read_associations->freeObject() ;
      FramepaC_read_associations = 0 ;
      }
   return list ;
}
예제 #3
0
FrObject *string_to_Frame(const char *&input, const char *)
{
   FrFrame *frame ;
   FrSymbol *name ;

   input++ ;				// consume the initial left bracket
   name = string_to_Symbol(input) ;	// read frame name
   if (name && name->symbolp())        	// the name must be a symbol
      {
      frame = find_vframe_inline(name) ;
      if (!frame)
	 frame = (read_as_VFrame && FramepaC_new_VFrame)
	    	? FramepaC_new_VFrame(name) : new FrFrame(name) ;
      while (FrSkipWhitespace(input) == '[')
	 string_to_Slot(input,frame) ;
      if (*input == ']')
	 input++ ;
      else
	 FrWarning(errmsg_frame_malformed) ;
      return frame ;
      }
   else
      {
      FrWarning(errmsg_frame_name) ;
      return 0 ;
      }
}
예제 #4
0
static bool verify_List(const char *&input, const char *, bool)
{
   input++ ;			       // consume initial left parenthesis
   char c ;
   while ((c = FrSkipWhitespace(input)) != ')' && c != '\0')
      {
      if (!valid_FrObject_string(input,true))
	 return false ;
      }
   if (c == ')')
      {
      input++ ;				// skip terminating right paren
      return true ;			//   and indicate success
      }
   else
      return false ;
}
예제 #5
0
static bool verify_Frame(const char *&input, const char *, bool)
{
   input++ ;				// consume the initial left bracket
   if (!verify_Symbol(input,true))	// check frame name
      return false ;
   while (FrSkipWhitespace(input) == '[')
      {
      if (!verify_Slot(input))		// check for well-formed slot repres.
	 return false ;
      }
   if (*input == ']')
      {
      input++ ;				// skip over terminator
      return true ;			//   and indicate success
      }
   else
      return false ;
}
예제 #6
0
static void read_Slot(istream &input,FrFrame *frame)
{
   input.get() ;		    // consume the left bracket
   FrSymbol *slot ;
   slot = read_Symbol(input) ;      // get slot name
   if (!slot || !slot->symbolp())   // the name must be a symbol
      {
      FrWarning(errmsg_slot_symbol) ;
      free_object(slot) ;
      return ;
      }
   frame->createSlot(slot) ;
   while (FrSkipWhitespace(input) == '[')
      read_Facet(input,frame,slot) ;
   int ch ;
   ch = input.get() ;		    // get first non-whitespace character
   if (ch != ']')		    // ch may be EOF
      FrWarning(errmsg_slot_malformed) ;
}
예제 #7
0
static bool verify_Facet(const char *&input)
{
   input++ ;				// consume the left bracket
   if (!verify_Symbol(input,true))	// check facet name
      return false ;
   char c ;
   while ((c = FrSkipWhitespace(input)) != ']' && c != '\0')
      {
      if (!valid_FrObject_string(input,true))
	 return false ;
      }
   if (c == ']')
      {
      input++ ;				// skip over terminator
      return true ;			//   and indicate success
      }
   else
      return false ;
}
예제 #8
0
static void string_to_Slot(const char *&input,FrFrame *frame)
{
   FrSymbol *slot ;

   input++ ;				  // consume the left bracket
   slot = string_to_Symbol(input) ;	  // get slot name
   if (!slot || !slot->symbolp())	  // the name must be a symbol
      {
      FrWarning(errmsg_slot_symbol) ;
      return ;
      }
   frame->createSlot(slot) ;
   while (FrSkipWhitespace(input) == '[')
      string_to_Facet(input,frame,slot) ;
   if (*input != ']')
      FrWarning(errmsg_slot_malformed) ;
   else
      input++ ;
}
예제 #9
0
static void read_Facet(istream &input,FrFrame *frame,FrSymbol *slot)
{
   input.get() ;		    // consume the left bracket
   FrSymbol *facet ;
   facet = read_Symbol(input) ;	    // get facet name
   if (!facet || !facet->symbolp()) // the name must be a symbol
      {
      FrWarning(errmsg_facet_symbol) ;
      free_object(facet) ;
      return ;
      }
   frame->createFacet(slot,facet) ;
   char ch ;
   while ((ch = FrSkipWhitespace(input)) != 0 && ch != ']')
      {
      frame->addFillerNoCopy(slot,facet,read_FrObject(input)) ;
      }
   if (input.get() != ']')
      FrWarning(errmsg_facet_malformed) ;
}
예제 #10
0
static FrObject *read_Frame(istream &input, const char *)
{
   input.get() ;		    // consume the initial left bracket
   FrSymbol *name ;
   name = read_Symbol(input) ;	    // read frame name
   if (!name || !name->symbolp())   // the name must be a symbol
      {
      FrWarning(errmsg_frame_name) ;
      free_object(name) ;
      return 0 ;
      }
   FrFrame *frame = find_vframe_inline(name) ;
   if (!frame)
      frame = (read_as_VFrame && FramepaC_new_VFrame)
	 	? FramepaC_new_VFrame(name) : new FrFrame(name) ;
   while (FrSkipWhitespace(input) == '[')
      read_Slot(input,frame) ;
   if (input.get() != ']')	    // next non-whitespace char may be EOF
      FrWarning(errmsg_frame_malformed) ;
   return frame ;
}
예제 #11
0
static void string_to_Facet(const char *&input,FrFrame *frame,FrSymbol *slot)
{
   FrSymbol *facet ;

   input++ ;			       // consume the left bracket
   facet = string_to_Symbol(input) ;   // get facet name
   if (!facet || !facet->symbolp())    // the name must be a symbol
      {
      FrWarning(errmsg_facet_symbol) ;
      return ;
      }
   frame->createFacet(slot,facet) ;
   char c ;
   while ((c = FrSkipWhitespace(input)) != ']' && c != '\0')
      {
      frame->addFillerNoCopy(slot,facet,string_to_FrObject(input)) ;
      }
   if (c != ']')
      FrWarning(errmsg_facet_malformed) ;
   else
      input++ ;
}
예제 #12
0
static char *next_abbreviation(FILE *fp)
{
   static char line[FrMAX_LINE] ;
   if (feof(fp) || !fgets(line,sizeof(line),fp))
      return 0 ;
   char *lineptr = line ;
   FrSkipWhitespace(lineptr) ;
   // strip off comments
   char *cmt = strchr(lineptr,';') ;
   if (cmt)
      *cmt = '\0' ;
   // use only the first token on the line
   for (char *lp = lineptr ; *lp ; lp++)
      {
      if (Fr_isspace(*lp))
	 {
	 *lp = '\0' ;
	 break ;
	 }
      }
   return lineptr ;
}
예제 #13
0
bool FrTextSpan::parse(const FrList *span, FrTextSpans *contain)
{
   if (span && span->consp() && span->simplelistlength() >= 2 &&
       span->first() && span->second() &&
       span->first()->numberp() && span->second()->numberp())
      {
      size_t sp_start = span->first()->intValue() ;
      size_t sp_end = span->second()->intValue() ;
      span = span->rest()->rest() ;
      // we'll allow rather free-form input from the rest of the span's
      //   description: the first two numbers are the score and weight,
      //   respectively, the first string is the span's text, the second
      //   string (if present) becomes the INIT_TEXT metadata.  Then,
      //   the first structure (if present) is the metadata, and any lists
      //   starting with a symbol become additional metadata fields
      double sp_score = DEFAULT_SCORE ;
      double sp_weight = DEFAULT_WEIGHT ;
      // scan for the first two numbers
      for (const FrList *sp = span ; sp ; sp = sp->rest())
	 {
	 if (sp->first() && sp->first()->numberp())
	    {
	    sp_score = sp->first()->floatValue() ;
	    for (sp = sp->rest() ; sp ; sp = sp->rest())
	       {
	       if (sp->first() && sp->first()->numberp())
		  {
		  sp_weight = sp->first()->floatValue() ;
		  break ;
		  }
	       }
	    break ;
	    }
	 }
      const char *curr_text = 0 ;
      const char *orig_text = 0 ;
      // scan for the first two strings or symbols
      for (const FrList *sp = span ; sp ; sp = sp->rest())
	 {
	 FrObject *item = sp->first() ;
	 if (item && (item->stringp() || item->symbolp()))
	    {
	    curr_text = item->printableName() ;
	    for (sp = sp->rest() ; sp ; sp = sp->rest())
	       {
	       item = sp->first() ;
	       if (item && (item->stringp() || item->symbolp()))
		  {
		  orig_text = item->printableName() ;
		  break ;
		  }
	       }
	    break ;
	    }
	 }
      if (curr_text)
	 (void)FrSkipWhitespace(curr_text) ;
      if (orig_text)
	 (void)FrSkipWhitespace(orig_text) ;
      // scan for the first structure
      const FrStruct *meta = 0 ;
      for (const FrList *sp = span ; sp ; sp = sp->rest())
	 {
	 if (sp->first() && sp->first()->structp())
	    {
	    meta = (FrStruct*)sp->first() ;
	    break ;
	    }
	 }
      init(sp_start,sp_end,sp_score,sp_weight,curr_text,contain) ;
      free_object(m_metadata) ;
      if (meta)
	 {
	 FrSymbol *symMETATYPE = FrSymbolTable::add(METADATA_TYPENAME) ;
	 if (meta->typeName() == symMETATYPE)
	    m_metadata = (FrStruct*)meta->deepcopy() ;
	 else
	    {
	    // copy the keywords one by one
	    FrList *keys = meta->fieldNames() ;
	    while (keys)
	       {
	       FrSymbol *key = (FrSymbol*)poplist(keys) ;
	       setMetaData(key,meta->get(key)) ;
	       }
	    }
	 }
      if (orig_text)
	 setMetaData(FrSymbolTable::add(init_text_tag),
		     new FrString(orig_text),false) ;
      // finally, scan for any embedded lists and add them as metadata fields
      for (const FrList *sp = span ; sp ; sp = sp->rest())
	 {
	 FrList *item = (FrList*)sp->first() ;
	 if (item && item->consp() && item->first() &&
	     item->first()->symbolp())
	    {
	    FrSymbol *key = (FrSymbol*)item->first() ;
	    setMetaData(key,item->rest()) ;
	    }
	 }
      return true ;
      }
   return false ;
}
예제 #14
0
bool FrTFIDF::load(const char *filename)
{
   if (filename && *filename)
      {
      FrITextFile wt(filename) ;
      if (!wt.good())
	 {
	 FrWarningVA("unable to open term weights file '%s'",filename) ;
	 return false ;
	 }
      delete ht ;
      ht = new FrSymHashTable ;
      FrSymbol *symEOF = FrSymbolTable::add("*EOF*") ;
      char *line = wt.getline() ;
      bool expanded = false ;
      if (line && strncmp(line,"!!! ",4) == 0)
	 {
	 char *end = 0 ;
	 total_docs = (size_t)strtol(line+4,&end,10) ;
	 if (end && end != line+4)
	    {
	    char *tmp = end ;
	    size_t vocab_size = (size_t)strtol(tmp,&end,10) ;
	    if (vocab_size > 0 && end && end != tmp)
	       {
	       ht->expand(vocab_size+100) ;
	       expanded = true ;
	       }
	    }
	 }
      if (!expanded)			// ensure some reasonable starting size
	 ht->expand(5000) ;
      while ((line = wt.getline()) != 0)
	 {
	 if (FrSkipWhitespace(line) == ';' || *line == '\0')
	    continue ;
	 const char *origline = line ;
	 FrSymbol *term = (FrSymbol*)string_to_FrObject(line) ;
	 if (term == symEOF || !term || !term->symbolp())
	    {
	    FrWarning("invalid line in term-weights file") ;
	    free_object(term) ;
	    continue ;
	    }
	 char *end = 0 ;
	 size_t term_freq = strtol(line,&end,10) ;
	 if (end && end != line)
	    {
	    line = end ;
	    size_t doc_freq = strtol(line,&end,10) ;
	    if (end != line)
	       {
	       if (doc_freq > 0 && term_freq > 0)
		  {
		  FrSymHashEntry *entry = tfidfRecord(term) ;
		  FrTFIDFrecord *rec = new FrTFIDFrecord(term_freq,doc_freq) ;
		  if (entry)
		     {
		     delete (FrTFIDFrecord*)entry->getUserData() ;
		     entry->setUserData(rec) ;
		     }
		  else
		     ht->add(term,(void*)rec) ;
		  continue ;
		  }
	       FrWarning("invalid data in term-weights file -- both term\n"
			 "\tand document frequencies must be nonzero") ;
	       free_object(term) ;
	       continue ;
	       }
	    }
	 FrWarningVA("expected two integers following the term '%s'; line was\n"
		     "\t%s", term->symbolName(), origline) ;
	 free_object(term) ;
	 }
      return true ;
      }
   return false ;
}