예제 #1
0
파일: io.c 프로젝트: eldr4d/CFG-learner
int      
main(int argc, char **argv)
{
  si_t  si = make_si(1024);
  FILE  *grammarfp = stdin;
  FILE  *yieldfp;
  FILE	*tracefp = stderr;  	/* set this to NULL to stop trace output */
  FILE	*summaryfp = stderr;	/* set this to NULL to stop parse stats */
  grammar g0, g = NULL;
  int	maxlen = 0, minits = 0, maxits = 0;
  FLOAT stoptol = 1e-7;
  int nanneal = 0, randseed = 97;
  FLOAT rule_bias_default = 0;
  FLOAT annealstart = 1, annealstop = 1;
  FLOAT jitter0 = 0, jitter = 0;
  int debuglevel = 0, nruns = 1, irun = 0;
  FLOAT wordscale=1;
  int VariationalBayes=0;
  int tmp;
  char filename[100];

  {
    int chr;
    
    while ((chr = getopt(argc, argv, "a:g:s:p:l:m:n:d:t:b:B:N:j:J:S:d:R:T:VW:")) != -1) 
      switch (chr) {
      case 'a':
	rule_bias_default = atof(optarg);
	break;
      case 'g': 
  memcpy (filename, optarg, strlen (optarg));
  tmp = strlen (optarg);
  filename[tmp] = 'N';
  tmp++;
  filename[tmp] = 'e';
  tmp++;
  filename[tmp] = 'w';
  tmp++;
  filename[tmp] = '\0';
  tmp++;
	grammarfp = fopen(optarg, "r");
	if (grammarfp == NULL) {
	  fprintf(stderr, "Error: couldn't open grammarfile %s\n%s",
		  optarg, usage);
	  exit(EXIT_FAILURE);
	}
	break;
      case 's':
	stoptol = atof(optarg);
	break;
      case 'p':
	minruleprob = atof(optarg);
	break;
      case 'l':
	maxlen = atoi(optarg);
	break;
      case 'm':
	minits = atoi(optarg);
	break;
      case 'n':
	maxits = atoi(optarg);
	break;
      case 'b':
	annealstart = atof(optarg);
	break;
      case 'B':
	annealstop = atof(optarg);
	break;
      case 'N':
	nanneal = atoi(optarg);
	break;
      case 'j':
	jitter0 = atof(optarg);
	break;
      case 'J':
	jitter = atof(optarg);
	break;
      case 'S': 
	randseed = atoi(optarg);
	break;
      case 'd':
	debuglevel = atoi(optarg);
	break;
      case 'R':
	nruns = atoi(optarg);
	break;
      case 'T':
	summaryfp = fopen(optarg, "w");
	break;
      case 'V':
	VariationalBayes = 1;
	break;
      case 'W':
	wordscale = atof(optarg);
	break;
      case '?':
      default:
	fprintf(stderr, "Error: unknown command line flag %c\n\n%s\n",
		chr, usage);
	exit(EXIT_FAILURE);
	break;
      }
  }
    
  if (optind + 1 != argc) {
    fprintf(stderr, "Error: expect a yieldfile\n\n%s\n", usage);
    exit(EXIT_FAILURE);
  }

  if ((yieldfp = fopen(argv[optind], "r")) == NULL) {
    fprintf(stderr, "Error: Couldn't open yieldfile %s\n%s", argv[optind], usage);
    exit(EXIT_FAILURE);
  }
   
  srand(randseed);
  if (summaryfp && debuglevel >= 100)
    fprintf(summaryfp, "# rule_bias_default (-a) = %g, stoptol (-s) = %g, minruleprob (-p) = %g, "
	    "maxlen (-l) = %d, minits (-m) = %d, maxits = (-n) = %d, annealstart (-b) = %g, "
	    "annealstop (-B) = %g, nanneal (-N) = %d, jitter0 (-j) = %g, jitter (-J) = %g, "
	    "VariationalBayes (-V) = %d, wordscale (-W) = %g, randseed (-S) = %d, "
	    "debuglevel (-d) = %d, nruns (-R) = %d\n",
	    rule_bias_default, stoptol, minruleprob, maxlen, minits, maxits, annealstart,
	    annealstop, nanneal, jitter0, jitter, VariationalBayes, wordscale, randseed, debuglevel, nruns);

  g0 = read_grammar(grammarfp, si, rule_bias_default);
  write_grammar(stdout, g0, si, minruleprob);

  set_rule_weights(g0, g0->weights, VariationalBayes);      /* normalize rule counts */

  signal(SIGINT, write_grammar_);

  for (irun = 0; irun < nruns; ++irun) {
    FLOAT entropy;

    g = copy_grammar(g0, si);

    if (summaryfp && debuglevel >= 100)
      fprintf(summaryfp, "# Run %d\n", irun);
    
    g_global = g;
    si_global = si;
    
    if (jitter0 > 0)
      jitter_weights(g, jitter0);
    
    entropy = inside_outside(g, si, yieldfp, tracefp, summaryfp, debuglevel, maxlen,
			     minits, maxits, stoptol, minruleprob, jitter, VariationalBayes, wordscale,
			     annealstart, annealstop, nanneal); 

    if (summaryfp && debuglevel >= 0)
      fprintf(summaryfp, "# run %d, entropy %g, %ld rules\n", irun, entropy, (long) g->nrules);

    if (debuglevel >= 1) {
      write_grammar(stdout, g, si, minruleprob);

      FILE* grammarfp = fopen(filename, "w");
      //write_grammar(stdout, g, si, minruleprob);
      write_grammar(grammarfp, g, si, minruleprob);
      fprintf(stdout, "\n");
      fflush(stdout);
    }

    free_grammar(g);
  }
  
  free_grammar(g0);
  si_free(si);
    
  if (mmm_blocks_allocated) 
    fprintf(stderr, "Error in mrf(): %ld memory block(s) not deallocated\n", 
	    mmm_blocks_allocated);
  
  /* check that everything has been deallocated */
  assert(mmm_blocks_allocated == 0);		
  exit(EXIT_SUCCESS);
}
예제 #2
0
int run_parser( TTreeView *TreeView1 , char *file_name )
{
    Config_set *config_ptr;
    GRAMMAR *G_ptr; 
    STATE *state_ptr;
    TOKEN *T_ptr , *T_tmp;
    FILE *input ,*out;
    char grammar_rule[1024];
    char *ptr , *ptr_string;
    char *ptr_lhs , *ptr_rhs;
    SET *S_ptr;
    int i , j;
    int rule_num; 

    strncpy( lambda , "£f" , strlen("£f")+1 );
	G_start = NULL;
    G_end = NULL;
	terminal_start = NULL;
    terminal_end = NULL;
    nonterminal_start = NULL;
    nonterminal_end = NULL;
    first_set_start = NULL;
    first_set_end = NULL;
    follow_set_start = NULL;
    follow_set_end = NULL;
    predict_set_start = NULL;
    predict_set_end = NULL;
    config_start = NULL;
    config_end = NULL;
    state_start = NULL;
    state_end = NULL;
    state_num = 0;
    
    
    //scanner();
	input = fopen( file_name ,"r");
	scan = fopen("out.txt","r");

    
    if ( input == NULL )
	{
    	Application->MessageBoxA("grammar file error","error",0);
        return 0;     
    }
     
//    out = fopen("table.csv","w");
     
    for ( i = 0 ; fgets( grammar_rule , 1024 , input ) ;  )
    {
        ptr = grammar_rule;
        // remove \n
        ptr = strstr(grammar_rule , "\n");
        if(ptr != NULL)
            *ptr = '\0';
            
        // remove rule num
        ptr = strstr( grammar_rule , "." );
        // get left handside
        ptr_lhs = ptr + 2;
        ptr = strstr( grammar_rule , " " );
        *ptr = '\0';
        insert_token( &nonterminal_start , &nonterminal_end , ptr_lhs );    // make nonterminal table
        insert_token( &first_set_start , &first_set_end , ptr_lhs );        // make first set table with nonterminal 
        insert_token( &follow_set_start , &follow_set_end , ptr_lhs );      // make follow set table
        insert_token( &predict_set_start , &predict_set_end , ptr_lhs );    // make predict set table
		// get right handside
		ptr = strstr( ++ptr , "¡÷" );
        do
        {
            ptr = strstr( ++ptr , " " );
            while(isspace(*ptr))
                ptr++;
            ptr_rhs = ptr;
            ptr = strstr( ptr_rhs , "|" );
            if ( ptr != NULL &&  *(ptr+1) != '|' )
            {
                for ( j = 1 ; isspace(*(ptr-j)) ; j++ )
                *(ptr-j) = '\0';
            }
            
            insert_grammar( ++i , ptr_lhs , ptr_rhs );
            
        } while ( ptr != NULL &&  *(ptr+1) != '|' ); 
    }
    
    
    // remove nonterminal in termainal table
    T_ptr = nonterminal_start;
    i = 0;
    while ( T_ptr != NULL )
    {
        T_tmp = search_token( terminal_start , T_ptr->string );
        delete_token( &terminal_start , T_tmp );
        T_ptr = T_ptr->next;
        i++;
    }
    num_of_nonterm = i;

    for ( T_ptr = terminal_start , i =0 ; T_ptr != NULL ; T_ptr = T_ptr->next , i++ )
    {
        insert_token( &first_set_start , &first_set_end , T_ptr->string );        // make first set table
    }
    num_of_term = ++i;
    
    fill_frist_set();
    fill_follow_set();
    i = 0;
    view = fopen("state.txt","w");
    //view = stdout;
    out = fopen("go_to_table.csv","w");
    
    build_CFSM();
    

    
    // build goto table
    parser_table = (PARSER**) malloc( sizeof(PARSER) * state_num );
    for( i = 0 ; i < state_num ; i++ )
    {
        parser_table[i] = (PARSER*) malloc( sizeof(PARSER) * ( num_of_term  + num_of_nonterm ));
        for( j = 0 ; j < num_of_term  + num_of_nonterm ; j++ )
        {
            parser_table[i][j].go_to = go_to_state(i,j+1);
            if( j+1 != make_id("$") )
                parser_table[i][j].action = SHIFT;
            else
                parser_table[i][j].action = ACCEPT;
                
            if( parser_table[i][j].go_to == 0 )
                parser_table[i][j].action = ERROR;
        }
    }
    
    // comput lalr lookahead
    build_LALR_lookahead();

    fprintf(out,"\t,");
    for( j = 0 ; j < num_of_term  + num_of_nonterm ; j++ )
    {
        if( j+1 != make_id(",") )
            fprintf(out," %s,",idtostr(j+1));
        else
           fprintf(out," ' ,");
    }
    fprintf( out ,"\n");
    
    // build action table
    for( i = 0 , state_ptr = state_start ; i < state_num ; i++ , state_ptr = state_ptr->next )
    {
        fprintf(out,"state%d,",i); 

        for( j = 0 ; j < num_of_term  + num_of_nonterm ; j++ )
        { 
            for( config_ptr = state_ptr->config_set ; config_ptr != NULL ; config_ptr = config_ptr->set_next )
            {
                if( config_ptr->dot == NULL && search_set( config_ptr->lookahead , idtostr(j+1) ) == TRUE )
                {
                    if(parser_table[i][j].go_to == 0)
                    {
                        //printf("hit\n");
                        parser_table[i][j].go_to = config_ptr->rule->rule;
                        parser_table[i][j].action = REDUCE;
                    }
                    //fprintf(out,"R%02d,",parser_table[i][j].go_to);
                }
            }
            if(parser_table[i][j].go_to > 0)
            {
                if( parser_table[i][j].action == SHIFT )                        
                    fprintf( out , "S%02d,",parser_table[i][j].go_to);
                else
                    fprintf( out , "R%02d,",parser_table[i][j].go_to);
            }
            else
                fprintf( out , ",");                
        }
        fprintf( out ,"\n"); 
    }
    fclose(out);
    
    
    //printf( "%d\n", go_to_state(4,5) );
    for ( i = 0 ; i < state_num ; i++ )
    {
        view_state(i);
        fprintf(view,"\n");
        //getch();
        //system("cls");
	}
    shift_reduce_driver( TreeView1 );

    free_token(&terminal_start);
    free_token(&nonterminal_start);
    free_grammar();
    free_state(&state_start);
    //free_config(&config_start);
    
	//system("pause");
	return 0;
}
예제 #3
0
파일: lncky.c 프로젝트: mjpost/cky
int      
main(int argc, char **argv)
{
  si_t          si = make_si(1024);
  FILE          *grammarfp = stdin, *yieldfp;
  FILE		*tracefp = NULL;  	/* trace output */
  FILE		*summaryfp = stderr;	/* end of parse stats output */
  FILE		*parsefp = stdout;      /* parse trees */
  FILE		*probfp = NULL;         /* max_neglog_prob */

  chart_cell	root_cell;
  grammar	g;
  chart		c;
  vindex 	terms;
  int		maxsentlen = 0;
  int           sentenceno = 0, parsed_sentences = 0, failed_sentences = 0;
  double	sum_neglog_prob = 0;
  int           sentfrom = 0;
  int           sentto = 0;

  srand(RAND_SEED);	/* seed random number generator */

  if (argc<2 || argc>6) {
    fprintf(stderr, "%s yieldfile [maxsentlen [grammarfile [sentfrom sentto]]]\n", argv[0]);
    exit(EXIT_FAILURE);
  }

  if ((yieldfp = fopen(argv[1], "r")) == NULL) {
    fprintf(stderr, "%s: Couldn't open yieldfile %s\n", argv[0], argv[1]);
    exit(EXIT_FAILURE);
  }

  if (argc >= 3)
    if (!sscanf(argv[2], "%d", &maxsentlen)) {
      fprintf(stderr, "%s: Couldn't parse maxsentlen %s\n", argv[0], argv[2]);
      exit(EXIT_FAILURE);
    }

  if (argc >= 4)
    if ((grammarfp = fopen(argv[3], "r")) == NULL) {
      fprintf(stderr, "%s: Couldn't open grammarfile %s\n", argv[0], argv[3]);
      exit(EXIT_FAILURE);
    }

  if (argc >= 6) {
    if (!sscanf(argv[4], "%d", &sentfrom)) {
      fprintf(stderr, "%s: Couldn't parse sentfrom %s\n", argv[0], argv[4]);
      exit(EXIT_FAILURE);
    }
    if (!sscanf(argv[5], "%d", &sentto)) {
      fprintf(stderr, "%s: Couldn't parse sentto %s\n", argv[0], argv[5]);
      exit(EXIT_FAILURE);
    }
  }

  g = read_grammar(grammarfp, si);
  /* write_grammar(tracefp, g, si); */

  while ((terms = read_terms(yieldfp, si))) {
    sentenceno++;

    if (sentfrom && sentenceno < sentfrom) {
      vindex_free(terms);
      continue;
    }
    if (sentto && sentenceno > sentto) {
      vindex_free(terms);
      break;
    }

    /* skip if sentence is too long */
    if (!maxsentlen || (int) terms->n <= maxsentlen) { 
      size_t	i;

      if (tracefp) {
	fprintf(tracefp, "\nSentence %d:\n", sentenceno);
	for (i=0; i<terms->n; i++)
	  fprintf(tracefp, " %s", si_index_string(si, terms->e[i]));
	fprintf(tracefp, "\n");
      }
     
      c = cky(*terms, g, si);

      /* fetch best root node */

      root_cell = sihashcc_ref(CHART_ENTRY(c, 0, terms->n), g.root_label);

      if (root_cell) {
	tree parse_tree = bintree_tree(&root_cell->tree, si);
	double prob = (double) root_cell->prob;

	parsed_sentences++;
	assert(prob > 0.0);
	sum_neglog_prob -= log(prob);

	if (probfp)
	  fprintf(probfp, "max_neglog_prob(%d, %g).\n", 
		  sentenceno, -log(prob)); 

	if (tracefp) 
	  fprintf(tracefp, " Prob = %g\n", prob);

	if (parsefp) {
	  write_tree(parsefp, parse_tree, si);
	  fprintf(parsefp, "\n");
	  /* write_prolog_tree(parsefp, parse_tree, si); */
	}

	free_tree(parse_tree);
      }

      else {
	failed_sentences++;
	if (tracefp)
	  fprintf(tracefp, "Failed to parse\n");
	if (parsefp)
	  fprintf(parsefp, "parse_failure.\n");
      }

      chart_free(c, terms->n);			/* free the chart */
    }
    else { 					/* sentence too long */
      if (parsefp)
	fprintf(parsefp, "too_long.\n");
    }

    vindex_free(terms);				/*  free the terms */
    assert(trees_allocated == 0);
    assert(bintrees_allocated == 0);
  }
  free_grammar(g);
  si_free(si);

  if (summaryfp) {
    fprintf(summaryfp, "\n%d/%d = %g%% test sentences met the length criteron,"
	    " of which %d/%d = %g%% were parsed\n", 
	    parsed_sentences+failed_sentences, sentenceno,
	    (double) (100.0 * (parsed_sentences+failed_sentences)) / 
	                       sentenceno,
	    parsed_sentences, parsed_sentences+failed_sentences, 
	    (double) (100.0 * parsed_sentences) / 
                              (parsed_sentences + failed_sentences));
    fprintf(summaryfp, "Sum(-log prob) = %g\n", sum_neglog_prob);
  }

  /* check that everything has been deallocated */
  /* printf("mmm_blocks_allocated = %ld\n", (long) mmm_blocks_allocated); */
  assert(mmm_blocks_allocated == 0);		
  exit(EXIT_SUCCESS);
}