FrObject *string_to_Frame(const char *&input, const char *) { FrFrame *frame ; FrSymbol *name ; input++ ; // consume the initial left bracket name = string_to_Symbol(input) ; // read frame name if (name && name->symbolp()) // the name must be a symbol { frame = find_vframe_inline(name) ; if (!frame) frame = (read_as_VFrame && FramepaC_new_VFrame) ? FramepaC_new_VFrame(name) : new FrFrame(name) ; while (FrSkipWhitespace(input) == '[') string_to_Slot(input,frame) ; if (*input == ']') input++ ; else FrWarning(errmsg_frame_malformed) ; return frame ; } else { FrWarning(errmsg_frame_name) ; return 0 ; } }
static void compile_count(const char *&re, size_t &low, size_t &high) { assertq(*re == FrRE_COUNT_BEG) ; low = 0 ; high = UINT_MAX ; re++ ; while (Fr_isdigit(*re)) low = 10*low + (*re - '0') ; if (*re == FrRE_COUNT_END) high = low ; else if (*re == ',') { high = 0 ; while (Fr_isdigit(*re)) high = 10*high + (*re - '0') ; if (high < low) high = low ; } else FrWarning("invalid {} specifier in regular expression") ; if (*re == FrRE_COUNT_END) re++ ; else FrWarning("unterminated {} specifier in regular expression") ; return ; }
static void bad_format(const char *message) { char *msg = Fr_aprintf("bad format--%s!", message) ; FrWarning(msg) ; FrFree(msg) ; return ; }
static void read_Slot(istream &input,FrFrame *frame) { input.get() ; // consume the left bracket FrSymbol *slot ; slot = read_Symbol(input) ; // get slot name if (!slot || !slot->symbolp()) // the name must be a symbol { FrWarning(errmsg_slot_symbol) ; free_object(slot) ; return ; } frame->createSlot(slot) ; while (FrSkipWhitespace(input) == '[') read_Facet(input,frame,slot) ; int ch ; ch = input.get() ; // get first non-whitespace character if (ch != ']') // ch may be EOF FrWarning(errmsg_slot_malformed) ; }
static void string_to_Slot(const char *&input,FrFrame *frame) { FrSymbol *slot ; input++ ; // consume the left bracket slot = string_to_Symbol(input) ; // get slot name if (!slot || !slot->symbolp()) // the name must be a symbol { FrWarning(errmsg_slot_symbol) ; return ; } frame->createSlot(slot) ; while (FrSkipWhitespace(input) == '[') string_to_Facet(input,frame,slot) ; if (*input != ']') FrWarning(errmsg_slot_malformed) ; else input++ ; }
static void read_Facet(istream &input,FrFrame *frame,FrSymbol *slot) { input.get() ; // consume the left bracket FrSymbol *facet ; facet = read_Symbol(input) ; // get facet name if (!facet || !facet->symbolp()) // the name must be a symbol { FrWarning(errmsg_facet_symbol) ; free_object(facet) ; return ; } frame->createFacet(slot,facet) ; char ch ; while ((ch = FrSkipWhitespace(input)) != 0 && ch != ']') { frame->addFillerNoCopy(slot,facet,read_FrObject(input)) ; } if (input.get() != ']') FrWarning(errmsg_facet_malformed) ; }
static FrObject *read_Frame(istream &input, const char *) { input.get() ; // consume the initial left bracket FrSymbol *name ; name = read_Symbol(input) ; // read frame name if (!name || !name->symbolp()) // the name must be a symbol { FrWarning(errmsg_frame_name) ; free_object(name) ; return 0 ; } FrFrame *frame = find_vframe_inline(name) ; if (!frame) frame = (read_as_VFrame && FramepaC_new_VFrame) ? FramepaC_new_VFrame(name) : new FrFrame(name) ; while (FrSkipWhitespace(input) == '[') read_Slot(input,frame) ; if (input.get() != ']') // next non-whitespace char may be EOF FrWarning(errmsg_frame_malformed) ; return frame ; }
static void string_to_Facet(const char *&input,FrFrame *frame,FrSymbol *slot) { FrSymbol *facet ; input++ ; // consume the left bracket facet = string_to_Symbol(input) ; // get facet name if (!facet || !facet->symbolp()) // the name must be a symbol { FrWarning(errmsg_facet_symbol) ; return ; } frame->createFacet(slot,facet) ; char c ; while ((c = FrSkipWhitespace(input)) != ']' && c != '\0') { frame->addFillerNoCopy(slot,facet,string_to_FrObject(input)) ; } if (c != ']') FrWarning(errmsg_facet_malformed) ; else input++ ; }
FrTextSpans::FrTextSpans(const FrObject *span_defn, FrCharEncoding enc, const char *word_delim) { clear() ; if (span_defn) { if (span_defn->stringp()) makeWordSpans(((FrString*)span_defn)->stringValue(),enc,word_delim) ; else if (span_defn->symbolp()) makeWordSpans(((FrSymbol*)span_defn)->symbolName(),enc,word_delim) ; else if (span_defn->consp()) { // parse the given list into the original text and individual // spans const FrList *defn = (FrList*)span_defn ; size_t num_strings = count_strings(defn) ; size_t defn_len = defn->simplelistlength() ; // if the list consists solely of FrString or FrSymbol, then we // concatenate them to form the original text and make each one // a separate span if (num_strings == defn_len) makeWordSpans(defn) ; // if there's exactly one FrString or FrSymbol, that is our original // text, and the rest of the list elements define the spans over // that text else if (num_strings == 1) parseSpans(defn) ; else FrWarning("span definition for FrTextSpans ctor must contain\n" "\teither exactly one string or only strings") ; } else { FrWarning("invalid span definition given to FrTextSpans ctor") ; } } return ; }
static bool connect_to_port(const char *hostname, int portnum, int &pipe_in, int &pipe_out,istream *&stream_in, ostream *&stream_out, bool silent = false) { #ifdef FrUSING_SOCKETS FrSocket s = FrConnectToPort(hostname,portnum) ; if (s < 0) { if (!silent) FrWarning("Unable to connect to remote process!") ; return false ; } pipe_in = pipe_out = s ; stream_in = new FrISockStream(s) ; stream_out = new FrOSockStream(s) ; return true ; #else /* FrUSING_SOCKETS */ (void)hostname; (void)portnum ; (void)pipe_in ; (void)pipe_out ; (void)silent ; (void)stream_in ; (void)stream_out ; FrWarning("Sockets are not supported in this implementation") ; return false ; #endif /* FrUSING_SOCKETS */ }
FrList *FramepaC_to_FrameKit(const FrFrame *frame) { if (!frame) return 0 ; FrList *temp_FrameKit_slots = nullptr ; FrList *list = new FrList(FrSymbolTable::add(stringMAKEFRAME), FrSymbolTable::add(frame->frameName()->symbolName())); if (do_slots(frame,FramepaC_to_FrameKit_slot,&temp_FrameKit_slots)) return list->nconc(temp_FrameKit_slots) ; else { FrWarning("error encountered while converting frame to FrameKit format.") ; return 0 ; } }
static int server_request(FrConnection *conn, int reqcode, int datasize, const char *data, FrAsyncCallback *callback, void *client_data, FrPacket **replypacket = 0) { current_connection = conn ; char *packetdata ; if (datasize) { packetdata = FrNewN(char,datasize) ; if (!packetdata) { FrWarning("out of memory while sending a packet") ; return ENOMEM ; } memcpy(packetdata,data,datasize) ; } else
bool FrBWTIndex::compress() { if (m_compressed) return true ; m_bucketsize = DEFAULT_BUCKET_SIZE ; m_maxdelta = 255 - m_bucketsize ; m_numbuckets = (numItems() + bucketsize() - 1) / bucketsize() ; // figure out how big the pool of absolute pointers will be uint32_t prev_succ = ~0 ; size_t abs_pointers = 0 ; size_t comp_EORs = 0 ; m_poolsize = 0 ; FrAdviseMemoryUse(m_items,bytesPerPointer()*numItems(),FrMADV_SEQUENTIAL) ; for (size_t i = 0 ; i < numItems() ; i++) { if ((i % m_bucketsize) == 0) { abs_pointers = 0 ; comp_EORs = 0 ; } uint32_t succ = getUncompSuccessor(i) ; if (succ == m_EOR || (succ > m_EOR && m_eor_state == FrBWT_MergeEOR)) comp_EORs++ ; // will be stored without using an absolute pointer else if (succ <= prev_succ || succ - prev_succ > m_maxdelta || ((i+1)%m_bucketsize == 0 && (abs_pointers + comp_EORs == 0))) { // above enforces at least one absolute pointer per bucket abs_pointers++ ; m_poolsize++ ; } prev_succ = succ ; } size_t bpp = bytesPerPointer() ; // now that we know how big the pool is, check whether we will actually // save any space by compressing if ((m_poolsize + m_numbuckets) * bpp + numItems() >= numItems() * bpp) return false ; // can't (usefully) compress // allocate the various buffers for the compressed data m_buckets = FrNewN(char,bpp * m_numbuckets) ; unsigned char *comp_items = FrNewN(unsigned char,numItems()) ; m_bucket_pool = FrNewN(char,bpp * m_poolsize) ; if (comp_items && m_buckets && m_bucket_pool) { size_t bucket = 0 ; size_t ptr_count = 0 ; size_t ptr_index = 0 ; prev_succ = ~0 ; for (size_t i = 0 ; i < numItems() ; i++) { if ((i % m_bucketsize) == 0) { FrStoreLong(ptr_count,m_buckets + bpp * bucket++) ; ptr_index = 0 ; comp_EORs = 0 ; } if ((i % CHUNK_SIZE) == 0 && i > 0) { // let OS know we're done with another chunk of m_items FrDontNeedMemory(m_items + bpp*(i-CHUNK_SIZE), bpp*CHUNK_SIZE, (i > CHUNK_SIZE)) ; // and tell it to prefetch the next chunk FrWillNeedMemory(m_items + bpp*i, bpp*CHUNK_SIZE) ; } uint32_t succ = getUncompSuccessor(i) ; if (succ == m_EOR || (succ > m_EOR && m_eor_state == FrBWT_MergeEOR)) { comp_items[i] = COMPRESSED_EOR ; comp_EORs++ ; } else if (succ <= prev_succ || succ - prev_succ > m_maxdelta || ((i+1)%m_bucketsize == 0 && (ptr_index + comp_EORs == 0))) // (above ensures at least one abs.ptr per bucket) { FrStoreLong(succ,m_bucket_pool + bpp * ptr_count++); comp_items[i] = (unsigned char)(m_maxdelta + (++ptr_index)) ; } else comp_items[i] = (unsigned char)(succ - prev_succ) ; prev_succ = succ ; } assertq(ptr_count == m_poolsize) ; if (!m_fmap) FrFree(m_items) ; m_items = comp_items ; m_compressed = true ; return true ; } else // memory alloc failed { FrWarning("out of memory while compressing index, " "will remain uncompressed") ; FrFree(comp_items) ; FrFree(m_buckets) ; m_buckets = 0 ; FrFree(m_bucket_pool) ; m_bucket_pool = 0 ; m_numbuckets = 0 ; m_poolsize = 0 ; return false ; } }
bool FrTFIDF::load(const char *filename) { if (filename && *filename) { FrITextFile wt(filename) ; if (!wt.good()) { FrWarningVA("unable to open term weights file '%s'",filename) ; return false ; } delete ht ; ht = new FrSymHashTable ; FrSymbol *symEOF = FrSymbolTable::add("*EOF*") ; char *line = wt.getline() ; bool expanded = false ; if (line && strncmp(line,"!!! ",4) == 0) { char *end = 0 ; total_docs = (size_t)strtol(line+4,&end,10) ; if (end && end != line+4) { char *tmp = end ; size_t vocab_size = (size_t)strtol(tmp,&end,10) ; if (vocab_size > 0 && end && end != tmp) { ht->expand(vocab_size+100) ; expanded = true ; } } } if (!expanded) // ensure some reasonable starting size ht->expand(5000) ; while ((line = wt.getline()) != 0) { if (FrSkipWhitespace(line) == ';' || *line == '\0') continue ; const char *origline = line ; FrSymbol *term = (FrSymbol*)string_to_FrObject(line) ; if (term == symEOF || !term || !term->symbolp()) { FrWarning("invalid line in term-weights file") ; free_object(term) ; continue ; } char *end = 0 ; size_t term_freq = strtol(line,&end,10) ; if (end && end != line) { line = end ; size_t doc_freq = strtol(line,&end,10) ; if (end != line) { if (doc_freq > 0 && term_freq > 0) { FrSymHashEntry *entry = tfidfRecord(term) ; FrTFIDFrecord *rec = new FrTFIDFrecord(term_freq,doc_freq) ; if (entry) { delete (FrTFIDFrecord*)entry->getUserData() ; entry->setUserData(rec) ; } else ht->add(term,(void*)rec) ; continue ; } FrWarning("invalid data in term-weights file -- both term\n" "\tand document frequencies must be nonzero") ; free_object(term) ; continue ; } } FrWarningVA("expected two integers following the term '%s'; line was\n" "\t%s", term->symbolName(), origline) ; free_object(term) ; } return true ; } return false ; }
static bool fork_program(char **arglist, int &pipe_in, int &pipe_out) { #if defined(MSDOS) || defined(__MSDOS__) || defined(__WINDOWS__) || defined(__NT__) FrWarning("Sorry, fork() is not available under MS-DOS or Windoze.\n" "Change the program's configuration to indicate a socket number\n" "other than -1 to permit the use of " #ifdef FrUSING_POPEN "popen" #else "spawn" #endif /* FrUSING_POPEN */ "(), which IS supported.") ; (void)arglist ; (void)pipe_in ; (void)pipe_out ; return false ; #else int pipe_desc[2] ; pipe_desc[0] = EOF ; pipe_desc[1] = EOF ; errno = 0 ; int pipe_stat = pipe( pipe_desc ) ; if (pipe_stat) { if (pipe_desc[0] != EOF) close(pipe_desc[0]) ; if (pipe_desc[1] != EOF) close(pipe_desc[1]) ; FrErrorVA("bad write pipe (errno=%d: %s)",errno,strerror(errno)) ; return false ; } int pipe_in_child = pipe_desc[0] ; int pipe_out_parent = pipe_desc[1] ; errno = 0 ; pipe_stat = pipe( pipe_desc ) ; if (pipe_stat) { close(pipe_in_child) ; close(pipe_out_parent) ; if (pipe_desc[0] != EOF && pipe_desc[0] != pipe_in_child) (void)close(pipe_desc[0]) ; if (pipe_desc[1] != EOF && pipe_desc[1] != pipe_out_parent) (void)close(pipe_desc[1]) ; FrErrorVA("bad read pipe (errno=%d: %s)",errno,strerror(errno)) ; return false ; } int pipe_in_parent = pipe_desc[0] ; int pipe_out_child = pipe_desc[1] ; errno = 0 ; int pid = fork() ; if (pid == -1) { close(pipe_in_parent) ; close(pipe_in_child) ; close(pipe_out_parent) ; close(pipe_out_child) ; FrErrorVA("unable to fork %s (errno=%d)",arglist[0],errno) ; return false ; } else if (pid == 0) { dup2( pipe_in_child, 0 ) ; // put the read end of the pipe on stdin dup2( pipe_out_child, 1 ) ; // put the write end of the pipe on stdout // dup2( 1, 2 ) ; // also put stderr thru the pipe if (!FramepaC_verbose) { close(2) ; open(FrNULL_DEVICE,O_WRONLY) ; } close(pipe_in_child) ; close(pipe_out_child) ; close( pipe_in_parent ) ; // close the unused ends of the pipes close( pipe_out_parent ) ; errno = 0 ; execvp( arglist[0], arglist) ; // not reached except when error FrErrorVA("couldn't exec program %s (errno=%d) -- check configuration file", arglist[0],errno) ; return false ; } else { close(pipe_in_child) ; // close the unused ends of the pipes close(pipe_out_child) ; pipe_in = pipe_in_parent ; pipe_out = pipe_out_parent ; set_child_pid(pid) ; } return true ; #endif /* __WINDOWS__ || __NT__ */ }