Esempio n. 1
0
int
main() {
	char buffer[2048], *r, *read;
	
	do {
		read = fgets(buffer, LENGTH(buffer), stdin);
		if (read == buffer) {
			r = parse_sentence(buffer, strlen(buffer) -1);
			printf("%s\n", r);
			free(r);
		}
	} while (!feof(stdin));
}
Esempio n. 2
0
int main(void) {
	char ret = SUCCESS;
    int bytes_received = 0;

    struct sentence_struct ss;
    ss.ais_msg = malloc(MAX_SENTENCE_LEN*sizeof(char));
    reset_sentence_struct(&ss);

    while (1) {
        char *buf = calloc(MAX_SENTENCE_LEN*sizeof(char));
        char *english = calloc(MAX_ENGLISH_LEN*sizeof(char));

        char *outmsg = NULL;
        char reset = TRUE;

        bytes_received = recv_until_delim(STDIN, buf, MAX_SENTENCE_LEN, '\x07');
        if ((0 >= bytes_received) || ('\x07' != buf[bytes_received - 1])) {
            ret = -9;
            break;
        } else {
            buf[bytes_received - 1] = '\0';
        }

        if (SUCCESS == parse_sentence(buf, &ss)) {
            if (DONE == ss.msg_status) {
                if (SUCCESS == to_english(english, &ss)) {
                    outmsg = english;
                } else {
                    outmsg = INVALID_MSG;
                }
            } else { // PARTIAL == ss->msg_status
                outmsg = PARTIAL_MSG;
                reset = FALSE;
            }
        } else {
            outmsg = INVALID_SENTENCE;
        }

        send(outmsg, cgc_strlen(outmsg));

        if (TRUE == reset) {
            reset_sentence_struct(&ss);
        }

        free(english);
        free(buf);
    }

	return ret;
}
Esempio n. 3
0
int parse_file(Cellule* tab, long size_of_tab, char* fname, Liste* alphabetical_word_list) {
    FILE* f = fopen(fname, "r");
    unsigned long global_index = 0, sentence_offset = 0;
    unsigned char line[SENTENCE_BUFFER_LEN];
    unsigned short line_index;
    unsigned char c;
    line_index = 0;
    sentence_offset = global_index;
    while(!feof(f) && (c = tolower(fgetc(f)))) {
        global_index++;
        if(c == '.' || c == '!' || c == '?'){
            parse_sentence(tab, size_of_tab, line, line_index + 1, sentence_offset, alphabetical_word_list);
            /* reset current line */
            for(line_index = 0; line_index < SENTENCE_BUFFER_LEN; ++line_index)
                line[line_index] = 0;
            line_index = 0;
            sentence_offset = global_index + 1;
        }
        else
            line[line_index++] = c;
    }
    fclose(f);
    return 1;
}
Esempio n. 4
0
void* train_thread(void* para) {
    ThreadData *p = static_cast<ThreadData*>(para);
    const TrainPara& train_para(p->train_para);
    const Vocabulary& vocab(*(p->vocab));
    Net& net(*(p->net));
    int iter_cnt = train_para.iter_cnt;

    ifstream infile(p->file.c_str());
    if (!infile) {
        cerr << "can't open: " << p->file << endl;
        pthread_exit((void*) 1);
    }

    Model *model;
    if (train_para.type == CBOW && train_para.algo == NEG_SAMPLING) {
        model = new CBOW_NS(vocab, train_para.neg_sample_cnt, net);
    }
    else if (train_para.type == SKIP_GRAM && train_para.algo == NEG_SAMPLING) {
        model = new SkipGram_NS(vocab, train_para.neg_sample_cnt, net);
    }
    else if (train_para.type == CBOW && train_para.algo == HIER_SOFTMAX) {
        const HuffmanTree& huffman_tree(*(p->huffman_tree));
        model = new CBOW_HS(huffman_tree, net);
    }
    else if (train_para.type == SKIP_GRAM && train_para.algo == HIER_SOFTMAX) {
        const HuffmanTree& huffman_tree(*(p->huffman_tree));
        model = new SkipGram_HS(huffman_tree, net);
    }
    else {
        cerr << "unimplemented model" << endl;
        pthread_exit((void*) 1);
    }


    string line;
    uint64_t word_cnt = 0;
    uint64_t max_word_cnt = vocab.total_cnt() * iter_cnt / train_para.thread_cnt;
    uint64_t update_word_cnt = 0;
    clock_t start_time = clock();
    vector<uint64_t> sentence;
    vector<uint64_t> context;
    unsigned int seed = 37;
    real alpha0 = train_para.alpha;
    real alpha = alpha0;
    while (iter_cnt-- > 0) {
        infile.seekg(p->begin_pos);
        while (getline(infile, line)) {
            sentence.clear();
            update_word_cnt += parse_sentence(line, vocab, train_para.subsample_thres, &seed, &sentence);
            if (update_word_cnt > 10000) {
                word_cnt += 10000;
                update_word_cnt -= 10000;
                alpha = alpha0 * (1 - static_cast<double>(word_cnt) / (max_word_cnt+1));
                if (alpha < alpha0 * 0.0001) {
                    alpha = alpha0 * 0.0001;
                }

                clock_t now_time = clock();
                cerr << "alpha = " << alpha
                     << ", progress = " << static_cast<double>(word_cnt) / max_word_cnt * 100
                     << " %, words/thread/sec = " << word_cnt/ ((double)(now_time - start_time + 1)/(double)CLOCKS_PER_SEC * 1000) * train_para.thread_cnt
                     << " k" << endl;
            }
            for (size_t i = 0; i != sentence.size(); ++i) {
                context.clear();
                extract_context(sentence, i, train_para.window_size, &seed, &context);
                if (context.empty()) {
                    continue;
                }
                model->update(sentence[i], context, alpha);
            }

            if (infile.tellg() >= p->end_pos) {
                break;
            }
        }
    }
    infile.close();
    delete model;
    return NULL;
}
Esempio n. 5
0
void
parse_story ()
/* read a sequence of sentence case-role representations into a 
   slot-filler representation of the entire story. 
   Calls parse_sentence as a subroutine to get each case-role rep. */
{
  int i, j, k,
  senti,			/* sentence number in the input file */
  step = 0,			/* actual sentence number in the sequence */
  modi = STORYPARSMOD;		/* module number */

  printcomment ("\n", "parsing input story:", "\n");

  /* first clean up the network display */
  if (displaying)
    {
      proc_clear_network (modi);
      display_current_proc_net (modi);
    }
  
  /* get the target slot-filler indices */
  for (i = 0; i < noutputs[modi]; i++)
    targets[modi][i] = story.slots[i];
  /* form the target activation vector */
  for (i = 0; i < noutputs[modi]; i++)
    for (j = 0; j < nsrep; j++)
      tgtrep[modi][i * nsrep + j] = swords[targets[modi][i]].rep[j];
  /* display the target activation */
  if (displaying)
    {
      display_labeled_layer (modi, noutputs[modi], tgtrep[modi], targets[modi],
			     net[modi].tgtx, net[modi].tgty, BELOW);
      wait_and_handle_events ();  /* stop if stepping, check for events */
    }

  /* previous hidden layer is blank in the beginning of the sequence */
  for (i = 0; i < nhidrep[modi]; i++)
    prevhidrep[modi][i] = 0.0;
  /* process each sentence included in the input story */
  for (senti = 0; senti < story.nsent; senti++)
    if (story.sents[senti].included)
      {
	/* first form the case-role representation for the sentence */
	parse_sentence (story.sents[senti], PARATASK);

	/* get the indices of the correct case-role representation */
	for (i = 0; i < ninputs[modi]; i++)
	  inputs[modi][i] = story.sents[senti].caseroles[i];
	/* if the modules are in a chain,
	   use sentence parser output as input */
	if (chain)
	  for (i = 0; i < ncaserep; i++)
	    inprep[modi][i] = caserep[i];
	else
	  /* use the correct case-role rep as input */
	  for (i = 0; i < ninputs[modi]; i++)
	    for (j = 0; j < nsrep; j++)
	      inprep[modi][i * nsrep + j] = swords[inputs[modi][i]].rep[j];

	/* display the input representation and the previous hidden layer */
	if (displaying)
	  {
	    display_labeled_layer (modi, ninputs[modi], inprep[modi],
				   inputs[modi],
				   net[modi].inpx, net[modi].inpy, ABOVE);
	    display_assembly (modi, net[modi].prevx, net[modi].prevy,
			      prevhidrep[modi], nhidrep[modi]);
	    wait_and_handle_events ();  /* stop if stepping, check events */
	  }

	/* propagate from input and prevhid layer to the output */
	forward_propagate (modi);
	/* display hidden layer, output, and the log line */
	if (displaying)
	  {
	    display_assembly (modi, net[modi].hidx, net[modi].hidy,
			      hidrep[modi], nhidrep[modi]);
	    display_labeled_layer (modi, noutputs[modi], outrep[modi],
				   targets[modi],
				   net[modi].outx, net[modi].outy, BELOW2);
	    display_error (modi, outrep[modi], noutputs[modi],
			   targets[modi], swords, nsrep, ++step);
	    wait_and_handle_events ();  /* stop if stepping, check events */
	  }
	/* update the previous hidden layer */
	for (k = 0; k < nhidrep[modi]; k++)
	  prevhidrep[modi][k] = hidrep[modi][k];
      }

  printcomment ("\n", "into internal rep:", "\n");
  /* collect statistics about the output accuracy of this module */
  collect_stats (PARATASK, modi);

  /* if the modules are in a chain, establish the result to be used
     for the episodic memory or storygen */
  if (chain)
    for (i = 0; i < nslotrep; i++)
      slotrep[i] = outrep[modi][i];
}
Esempio n. 6
0
int main(int argc, char *argv[])
{
  int s;
  int numsentences;
  FILE *words;
  char grammar[1000];
  char buffer[1000];
  float temp;
  int npflag;

  time_t g_time;
  time_t s_time;

  if(argc!=8) 
    {
      fprintf(stderr,"ERROR in command line, usage:\n cat countsfile | parser.out sentences-file grammarfile beamsize punctuation-flag distaflag distvflag npflag\n");
      return 0;
    }

  sscanf(argv[1],"%s",buffer);
  words=fopen(buffer,"r");
  assert(words!=NULL);

  sscanf(argv[2],"%s",grammar);

  sscanf(argv[3],"%f",&temp);
  BEAMPROB = log(temp);

  sscanf(argv[4],"%d",&PUNC_FLAG);

  sscanf(argv[5],"%d",&DISTAFLAG);
  sscanf(argv[6],"%d",&DISTVFLAG);
  sscanf(argv[7],"%d",&npflag);
  assert(npflag==0 || npflag==1);
  set_treebankoutputflag(npflag);

  mymalloc_init();
  mymalloc_char_init();

  hash_make_table(8000007,&new_hash);
  effhash_make_table(1000003,&eff_hash);

  read_grammar(grammar);

  numsentences=read_sentences(words,sentences,2500);

  fprintf(stderr,"NUMSENTENCES %d\n",numsentences);

  read_events(stdin,&new_hash,-1);

  for(s=0;s<numsentences;s++)
   {
     time(&g_time);

     pthresh = -5000000;

     parse_sentence(&sentences[s]);
     
/*     print_chart();*/
     time(&s_time);
     printf("TIME %d\n",(int) (s_time-g_time));
   }
  return 1;
}