int main() { char buffer[2048], *r, *read; do { read = fgets(buffer, LENGTH(buffer), stdin); if (read == buffer) { r = parse_sentence(buffer, strlen(buffer) -1); printf("%s\n", r); free(r); } } while (!feof(stdin)); }
int main(void) { char ret = SUCCESS; int bytes_received = 0; struct sentence_struct ss; ss.ais_msg = malloc(MAX_SENTENCE_LEN*sizeof(char)); reset_sentence_struct(&ss); while (1) { char *buf = calloc(MAX_SENTENCE_LEN*sizeof(char)); char *english = calloc(MAX_ENGLISH_LEN*sizeof(char)); char *outmsg = NULL; char reset = TRUE; bytes_received = recv_until_delim(STDIN, buf, MAX_SENTENCE_LEN, '\x07'); if ((0 >= bytes_received) || ('\x07' != buf[bytes_received - 1])) { ret = -9; break; } else { buf[bytes_received - 1] = '\0'; } if (SUCCESS == parse_sentence(buf, &ss)) { if (DONE == ss.msg_status) { if (SUCCESS == to_english(english, &ss)) { outmsg = english; } else { outmsg = INVALID_MSG; } } else { // PARTIAL == ss->msg_status outmsg = PARTIAL_MSG; reset = FALSE; } } else { outmsg = INVALID_SENTENCE; } send(outmsg, cgc_strlen(outmsg)); if (TRUE == reset) { reset_sentence_struct(&ss); } free(english); free(buf); } return ret; }
int parse_file(Cellule* tab, long size_of_tab, char* fname, Liste* alphabetical_word_list) { FILE* f = fopen(fname, "r"); unsigned long global_index = 0, sentence_offset = 0; unsigned char line[SENTENCE_BUFFER_LEN]; unsigned short line_index; unsigned char c; line_index = 0; sentence_offset = global_index; while(!feof(f) && (c = tolower(fgetc(f)))) { global_index++; if(c == '.' || c == '!' || c == '?'){ parse_sentence(tab, size_of_tab, line, line_index + 1, sentence_offset, alphabetical_word_list); /* reset current line */ for(line_index = 0; line_index < SENTENCE_BUFFER_LEN; ++line_index) line[line_index] = 0; line_index = 0; sentence_offset = global_index + 1; } else line[line_index++] = c; } fclose(f); return 1; }
void* train_thread(void* para) { ThreadData *p = static_cast<ThreadData*>(para); const TrainPara& train_para(p->train_para); const Vocabulary& vocab(*(p->vocab)); Net& net(*(p->net)); int iter_cnt = train_para.iter_cnt; ifstream infile(p->file.c_str()); if (!infile) { cerr << "can't open: " << p->file << endl; pthread_exit((void*) 1); } Model *model; if (train_para.type == CBOW && train_para.algo == NEG_SAMPLING) { model = new CBOW_NS(vocab, train_para.neg_sample_cnt, net); } else if (train_para.type == SKIP_GRAM && train_para.algo == NEG_SAMPLING) { model = new SkipGram_NS(vocab, train_para.neg_sample_cnt, net); } else if (train_para.type == CBOW && train_para.algo == HIER_SOFTMAX) { const HuffmanTree& huffman_tree(*(p->huffman_tree)); model = new CBOW_HS(huffman_tree, net); } else if (train_para.type == SKIP_GRAM && train_para.algo == HIER_SOFTMAX) { const HuffmanTree& huffman_tree(*(p->huffman_tree)); model = new SkipGram_HS(huffman_tree, net); } else { cerr << "unimplemented model" << endl; pthread_exit((void*) 1); } string line; uint64_t word_cnt = 0; uint64_t max_word_cnt = vocab.total_cnt() * iter_cnt / train_para.thread_cnt; uint64_t update_word_cnt = 0; clock_t start_time = clock(); vector<uint64_t> sentence; vector<uint64_t> context; unsigned int seed = 37; real alpha0 = train_para.alpha; real alpha = alpha0; while (iter_cnt-- > 0) { infile.seekg(p->begin_pos); while (getline(infile, line)) { sentence.clear(); update_word_cnt += parse_sentence(line, vocab, train_para.subsample_thres, &seed, &sentence); if (update_word_cnt > 10000) { word_cnt += 10000; update_word_cnt -= 10000; alpha = alpha0 * (1 - static_cast<double>(word_cnt) / (max_word_cnt+1)); if (alpha < alpha0 * 0.0001) { alpha = alpha0 * 0.0001; } clock_t now_time = clock(); cerr << "alpha = " << alpha << ", progress = " << static_cast<double>(word_cnt) / max_word_cnt * 100 << " %, words/thread/sec = " << word_cnt/ ((double)(now_time - start_time + 1)/(double)CLOCKS_PER_SEC * 1000) * train_para.thread_cnt << " k" << endl; } for (size_t i = 0; i != sentence.size(); ++i) { context.clear(); extract_context(sentence, i, train_para.window_size, &seed, &context); if (context.empty()) { continue; } model->update(sentence[i], context, alpha); } if (infile.tellg() >= p->end_pos) { break; } } } infile.close(); delete model; return NULL; }
void parse_story () /* read a sequence of sentence case-role representations into a slot-filler representation of the entire story. Calls parse_sentence as a subroutine to get each case-role rep. */ { int i, j, k, senti, /* sentence number in the input file */ step = 0, /* actual sentence number in the sequence */ modi = STORYPARSMOD; /* module number */ printcomment ("\n", "parsing input story:", "\n"); /* first clean up the network display */ if (displaying) { proc_clear_network (modi); display_current_proc_net (modi); } /* get the target slot-filler indices */ for (i = 0; i < noutputs[modi]; i++) targets[modi][i] = story.slots[i]; /* form the target activation vector */ for (i = 0; i < noutputs[modi]; i++) for (j = 0; j < nsrep; j++) tgtrep[modi][i * nsrep + j] = swords[targets[modi][i]].rep[j]; /* display the target activation */ if (displaying) { display_labeled_layer (modi, noutputs[modi], tgtrep[modi], targets[modi], net[modi].tgtx, net[modi].tgty, BELOW); wait_and_handle_events (); /* stop if stepping, check for events */ } /* previous hidden layer is blank in the beginning of the sequence */ for (i = 0; i < nhidrep[modi]; i++) prevhidrep[modi][i] = 0.0; /* process each sentence included in the input story */ for (senti = 0; senti < story.nsent; senti++) if (story.sents[senti].included) { /* first form the case-role representation for the sentence */ parse_sentence (story.sents[senti], PARATASK); /* get the indices of the correct case-role representation */ for (i = 0; i < ninputs[modi]; i++) inputs[modi][i] = story.sents[senti].caseroles[i]; /* if the modules are in a chain, use sentence parser output as input */ if (chain) for (i = 0; i < ncaserep; i++) inprep[modi][i] = caserep[i]; else /* use the correct case-role rep as input */ for (i = 0; i < ninputs[modi]; i++) for (j = 0; j < nsrep; j++) inprep[modi][i * nsrep + j] = swords[inputs[modi][i]].rep[j]; /* display the input representation and the previous hidden layer */ if (displaying) { display_labeled_layer (modi, ninputs[modi], inprep[modi], inputs[modi], net[modi].inpx, net[modi].inpy, ABOVE); display_assembly (modi, net[modi].prevx, net[modi].prevy, prevhidrep[modi], nhidrep[modi]); wait_and_handle_events (); /* stop if stepping, check events */ } /* propagate from input and prevhid layer to the output */ forward_propagate (modi); /* display hidden layer, output, and the log line */ if (displaying) { display_assembly (modi, net[modi].hidx, net[modi].hidy, hidrep[modi], nhidrep[modi]); display_labeled_layer (modi, noutputs[modi], outrep[modi], targets[modi], net[modi].outx, net[modi].outy, BELOW2); display_error (modi, outrep[modi], noutputs[modi], targets[modi], swords, nsrep, ++step); wait_and_handle_events (); /* stop if stepping, check events */ } /* update the previous hidden layer */ for (k = 0; k < nhidrep[modi]; k++) prevhidrep[modi][k] = hidrep[modi][k]; } printcomment ("\n", "into internal rep:", "\n"); /* collect statistics about the output accuracy of this module */ collect_stats (PARATASK, modi); /* if the modules are in a chain, establish the result to be used for the episodic memory or storygen */ if (chain) for (i = 0; i < nslotrep; i++) slotrep[i] = outrep[modi][i]; }
int main(int argc, char *argv[]) { int s; int numsentences; FILE *words; char grammar[1000]; char buffer[1000]; float temp; int npflag; time_t g_time; time_t s_time; if(argc!=8) { fprintf(stderr,"ERROR in command line, usage:\n cat countsfile | parser.out sentences-file grammarfile beamsize punctuation-flag distaflag distvflag npflag\n"); return 0; } sscanf(argv[1],"%s",buffer); words=fopen(buffer,"r"); assert(words!=NULL); sscanf(argv[2],"%s",grammar); sscanf(argv[3],"%f",&temp); BEAMPROB = log(temp); sscanf(argv[4],"%d",&PUNC_FLAG); sscanf(argv[5],"%d",&DISTAFLAG); sscanf(argv[6],"%d",&DISTVFLAG); sscanf(argv[7],"%d",&npflag); assert(npflag==0 || npflag==1); set_treebankoutputflag(npflag); mymalloc_init(); mymalloc_char_init(); hash_make_table(8000007,&new_hash); effhash_make_table(1000003,&eff_hash); read_grammar(grammar); numsentences=read_sentences(words,sentences,2500); fprintf(stderr,"NUMSENTENCES %d\n",numsentences); read_events(stdin,&new_hash,-1); for(s=0;s<numsentences;s++) { time(&g_time); pthresh = -5000000; parse_sentence(&sentences[s]); /* print_chart();*/ time(&s_time); printf("TIME %d\n",(int) (s_time-g_time)); } return 1; }