int main(int argc, char **argv) { si_t si = make_si(1024); FILE *grammarfp = stdin; FILE *yieldfp; FILE *tracefp = stderr; /* set this to NULL to stop trace output */ FILE *summaryfp = stderr; /* set this to NULL to stop parse stats */ grammar g0, g = NULL; int maxlen = 0, minits = 0, maxits = 0; FLOAT stoptol = 1e-7; int nanneal = 0, randseed = 97; FLOAT rule_bias_default = 0; FLOAT annealstart = 1, annealstop = 1; FLOAT jitter0 = 0, jitter = 0; int debuglevel = 0, nruns = 1, irun = 0; FLOAT wordscale=1; int VariationalBayes=0; int tmp; char filename[100]; { int chr; while ((chr = getopt(argc, argv, "a:g:s:p:l:m:n:d:t:b:B:N:j:J:S:d:R:T:VW:")) != -1) switch (chr) { case 'a': rule_bias_default = atof(optarg); break; case 'g': memcpy (filename, optarg, strlen (optarg)); tmp = strlen (optarg); filename[tmp] = 'N'; tmp++; filename[tmp] = 'e'; tmp++; filename[tmp] = 'w'; tmp++; filename[tmp] = '\0'; tmp++; grammarfp = fopen(optarg, "r"); if (grammarfp == NULL) { fprintf(stderr, "Error: couldn't open grammarfile %s\n%s", optarg, usage); exit(EXIT_FAILURE); } break; case 's': stoptol = atof(optarg); break; case 'p': minruleprob = atof(optarg); break; case 'l': maxlen = atoi(optarg); break; case 'm': minits = atoi(optarg); break; case 'n': maxits = atoi(optarg); break; case 'b': annealstart = atof(optarg); break; case 'B': annealstop = atof(optarg); break; case 'N': nanneal = atoi(optarg); break; case 'j': jitter0 = atof(optarg); break; case 'J': jitter = atof(optarg); break; case 'S': randseed = atoi(optarg); break; case 'd': debuglevel = atoi(optarg); break; case 'R': nruns = atoi(optarg); break; case 'T': summaryfp = fopen(optarg, "w"); break; case 'V': VariationalBayes = 1; break; case 'W': wordscale = atof(optarg); break; case '?': default: fprintf(stderr, "Error: unknown command line flag %c\n\n%s\n", chr, usage); exit(EXIT_FAILURE); break; } } if (optind + 1 != argc) { fprintf(stderr, "Error: expect a yieldfile\n\n%s\n", usage); exit(EXIT_FAILURE); } if ((yieldfp = fopen(argv[optind], "r")) == NULL) { fprintf(stderr, "Error: Couldn't open yieldfile %s\n%s", argv[optind], usage); exit(EXIT_FAILURE); } srand(randseed); if (summaryfp && debuglevel >= 100) fprintf(summaryfp, "# rule_bias_default (-a) = %g, stoptol (-s) = %g, minruleprob (-p) = %g, " "maxlen (-l) = %d, minits (-m) = %d, maxits = (-n) = %d, annealstart (-b) = %g, " "annealstop (-B) = %g, nanneal (-N) = %d, jitter0 (-j) = %g, jitter (-J) = %g, " "VariationalBayes (-V) = %d, wordscale (-W) = %g, randseed (-S) = %d, " "debuglevel (-d) = %d, nruns (-R) = %d\n", rule_bias_default, stoptol, minruleprob, maxlen, minits, maxits, annealstart, annealstop, nanneal, jitter0, jitter, VariationalBayes, wordscale, randseed, debuglevel, nruns); g0 = read_grammar(grammarfp, si, rule_bias_default); write_grammar(stdout, g0, si, minruleprob); set_rule_weights(g0, g0->weights, VariationalBayes); /* normalize rule counts */ signal(SIGINT, write_grammar_); for (irun = 0; irun < nruns; ++irun) { FLOAT entropy; g = copy_grammar(g0, si); if (summaryfp && debuglevel >= 100) fprintf(summaryfp, "# Run %d\n", irun); g_global = g; si_global = si; if (jitter0 > 0) jitter_weights(g, jitter0); entropy = inside_outside(g, si, yieldfp, tracefp, summaryfp, debuglevel, maxlen, minits, maxits, stoptol, minruleprob, jitter, VariationalBayes, wordscale, annealstart, annealstop, nanneal); if (summaryfp && debuglevel >= 0) fprintf(summaryfp, "# run %d, entropy %g, %ld rules\n", irun, entropy, (long) g->nrules); if (debuglevel >= 1) { write_grammar(stdout, g, si, minruleprob); FILE* grammarfp = fopen(filename, "w"); //write_grammar(stdout, g, si, minruleprob); write_grammar(grammarfp, g, si, minruleprob); fprintf(stdout, "\n"); fflush(stdout); } free_grammar(g); } free_grammar(g0); si_free(si); if (mmm_blocks_allocated) fprintf(stderr, "Error in mrf(): %ld memory block(s) not deallocated\n", mmm_blocks_allocated); /* check that everything has been deallocated */ assert(mmm_blocks_allocated == 0); exit(EXIT_SUCCESS); }
int run_parser( TTreeView *TreeView1 , char *file_name ) { Config_set *config_ptr; GRAMMAR *G_ptr; STATE *state_ptr; TOKEN *T_ptr , *T_tmp; FILE *input ,*out; char grammar_rule[1024]; char *ptr , *ptr_string; char *ptr_lhs , *ptr_rhs; SET *S_ptr; int i , j; int rule_num; strncpy( lambda , "£f" , strlen("£f")+1 ); G_start = NULL; G_end = NULL; terminal_start = NULL; terminal_end = NULL; nonterminal_start = NULL; nonterminal_end = NULL; first_set_start = NULL; first_set_end = NULL; follow_set_start = NULL; follow_set_end = NULL; predict_set_start = NULL; predict_set_end = NULL; config_start = NULL; config_end = NULL; state_start = NULL; state_end = NULL; state_num = 0; //scanner(); input = fopen( file_name ,"r"); scan = fopen("out.txt","r"); if ( input == NULL ) { Application->MessageBoxA("grammar file error","error",0); return 0; } // out = fopen("table.csv","w"); for ( i = 0 ; fgets( grammar_rule , 1024 , input ) ; ) { ptr = grammar_rule; // remove \n ptr = strstr(grammar_rule , "\n"); if(ptr != NULL) *ptr = '\0'; // remove rule num ptr = strstr( grammar_rule , "." ); // get left handside ptr_lhs = ptr + 2; ptr = strstr( grammar_rule , " " ); *ptr = '\0'; insert_token( &nonterminal_start , &nonterminal_end , ptr_lhs ); // make nonterminal table insert_token( &first_set_start , &first_set_end , ptr_lhs ); // make first set table with nonterminal insert_token( &follow_set_start , &follow_set_end , ptr_lhs ); // make follow set table insert_token( &predict_set_start , &predict_set_end , ptr_lhs ); // make predict set table // get right handside ptr = strstr( ++ptr , "¡÷" ); do { ptr = strstr( ++ptr , " " ); while(isspace(*ptr)) ptr++; ptr_rhs = ptr; ptr = strstr( ptr_rhs , "|" ); if ( ptr != NULL && *(ptr+1) != '|' ) { for ( j = 1 ; isspace(*(ptr-j)) ; j++ ) *(ptr-j) = '\0'; } insert_grammar( ++i , ptr_lhs , ptr_rhs ); } while ( ptr != NULL && *(ptr+1) != '|' ); } // remove nonterminal in termainal table T_ptr = nonterminal_start; i = 0; while ( T_ptr != NULL ) { T_tmp = search_token( terminal_start , T_ptr->string ); delete_token( &terminal_start , T_tmp ); T_ptr = T_ptr->next; i++; } num_of_nonterm = i; for ( T_ptr = terminal_start , i =0 ; T_ptr != NULL ; T_ptr = T_ptr->next , i++ ) { insert_token( &first_set_start , &first_set_end , T_ptr->string ); // make first set table } num_of_term = ++i; fill_frist_set(); fill_follow_set(); i = 0; view = fopen("state.txt","w"); //view = stdout; out = fopen("go_to_table.csv","w"); build_CFSM(); // build goto table parser_table = (PARSER**) malloc( sizeof(PARSER) * state_num ); for( i = 0 ; i < state_num ; i++ ) { parser_table[i] = (PARSER*) malloc( sizeof(PARSER) * ( num_of_term + num_of_nonterm )); for( j = 0 ; j < num_of_term + num_of_nonterm ; j++ ) { parser_table[i][j].go_to = go_to_state(i,j+1); if( j+1 != make_id("$") ) parser_table[i][j].action = SHIFT; else parser_table[i][j].action = ACCEPT; if( parser_table[i][j].go_to == 0 ) parser_table[i][j].action = ERROR; } } // comput lalr lookahead build_LALR_lookahead(); fprintf(out,"\t,"); for( j = 0 ; j < num_of_term + num_of_nonterm ; j++ ) { if( j+1 != make_id(",") ) fprintf(out," %s,",idtostr(j+1)); else fprintf(out," ' ,"); } fprintf( out ,"\n"); // build action table for( i = 0 , state_ptr = state_start ; i < state_num ; i++ , state_ptr = state_ptr->next ) { fprintf(out,"state%d,",i); for( j = 0 ; j < num_of_term + num_of_nonterm ; j++ ) { for( config_ptr = state_ptr->config_set ; config_ptr != NULL ; config_ptr = config_ptr->set_next ) { if( config_ptr->dot == NULL && search_set( config_ptr->lookahead , idtostr(j+1) ) == TRUE ) { if(parser_table[i][j].go_to == 0) { //printf("hit\n"); parser_table[i][j].go_to = config_ptr->rule->rule; parser_table[i][j].action = REDUCE; } //fprintf(out,"R%02d,",parser_table[i][j].go_to); } } if(parser_table[i][j].go_to > 0) { if( parser_table[i][j].action == SHIFT ) fprintf( out , "S%02d,",parser_table[i][j].go_to); else fprintf( out , "R%02d,",parser_table[i][j].go_to); } else fprintf( out , ","); } fprintf( out ,"\n"); } fclose(out); //printf( "%d\n", go_to_state(4,5) ); for ( i = 0 ; i < state_num ; i++ ) { view_state(i); fprintf(view,"\n"); //getch(); //system("cls"); } shift_reduce_driver( TreeView1 ); free_token(&terminal_start); free_token(&nonterminal_start); free_grammar(); free_state(&state_start); //free_config(&config_start); //system("pause"); return 0; }
int main(int argc, char **argv) { si_t si = make_si(1024); FILE *grammarfp = stdin, *yieldfp; FILE *tracefp = NULL; /* trace output */ FILE *summaryfp = stderr; /* end of parse stats output */ FILE *parsefp = stdout; /* parse trees */ FILE *probfp = NULL; /* max_neglog_prob */ chart_cell root_cell; grammar g; chart c; vindex terms; int maxsentlen = 0; int sentenceno = 0, parsed_sentences = 0, failed_sentences = 0; double sum_neglog_prob = 0; int sentfrom = 0; int sentto = 0; srand(RAND_SEED); /* seed random number generator */ if (argc<2 || argc>6) { fprintf(stderr, "%s yieldfile [maxsentlen [grammarfile [sentfrom sentto]]]\n", argv[0]); exit(EXIT_FAILURE); } if ((yieldfp = fopen(argv[1], "r")) == NULL) { fprintf(stderr, "%s: Couldn't open yieldfile %s\n", argv[0], argv[1]); exit(EXIT_FAILURE); } if (argc >= 3) if (!sscanf(argv[2], "%d", &maxsentlen)) { fprintf(stderr, "%s: Couldn't parse maxsentlen %s\n", argv[0], argv[2]); exit(EXIT_FAILURE); } if (argc >= 4) if ((grammarfp = fopen(argv[3], "r")) == NULL) { fprintf(stderr, "%s: Couldn't open grammarfile %s\n", argv[0], argv[3]); exit(EXIT_FAILURE); } if (argc >= 6) { if (!sscanf(argv[4], "%d", &sentfrom)) { fprintf(stderr, "%s: Couldn't parse sentfrom %s\n", argv[0], argv[4]); exit(EXIT_FAILURE); } if (!sscanf(argv[5], "%d", &sentto)) { fprintf(stderr, "%s: Couldn't parse sentto %s\n", argv[0], argv[5]); exit(EXIT_FAILURE); } } g = read_grammar(grammarfp, si); /* write_grammar(tracefp, g, si); */ while ((terms = read_terms(yieldfp, si))) { sentenceno++; if (sentfrom && sentenceno < sentfrom) { vindex_free(terms); continue; } if (sentto && sentenceno > sentto) { vindex_free(terms); break; } /* skip if sentence is too long */ if (!maxsentlen || (int) terms->n <= maxsentlen) { size_t i; if (tracefp) { fprintf(tracefp, "\nSentence %d:\n", sentenceno); for (i=0; i<terms->n; i++) fprintf(tracefp, " %s", si_index_string(si, terms->e[i])); fprintf(tracefp, "\n"); } c = cky(*terms, g, si); /* fetch best root node */ root_cell = sihashcc_ref(CHART_ENTRY(c, 0, terms->n), g.root_label); if (root_cell) { tree parse_tree = bintree_tree(&root_cell->tree, si); double prob = (double) root_cell->prob; parsed_sentences++; assert(prob > 0.0); sum_neglog_prob -= log(prob); if (probfp) fprintf(probfp, "max_neglog_prob(%d, %g).\n", sentenceno, -log(prob)); if (tracefp) fprintf(tracefp, " Prob = %g\n", prob); if (parsefp) { write_tree(parsefp, parse_tree, si); fprintf(parsefp, "\n"); /* write_prolog_tree(parsefp, parse_tree, si); */ } free_tree(parse_tree); } else { failed_sentences++; if (tracefp) fprintf(tracefp, "Failed to parse\n"); if (parsefp) fprintf(parsefp, "parse_failure.\n"); } chart_free(c, terms->n); /* free the chart */ } else { /* sentence too long */ if (parsefp) fprintf(parsefp, "too_long.\n"); } vindex_free(terms); /* free the terms */ assert(trees_allocated == 0); assert(bintrees_allocated == 0); } free_grammar(g); si_free(si); if (summaryfp) { fprintf(summaryfp, "\n%d/%d = %g%% test sentences met the length criteron," " of which %d/%d = %g%% were parsed\n", parsed_sentences+failed_sentences, sentenceno, (double) (100.0 * (parsed_sentences+failed_sentences)) / sentenceno, parsed_sentences, parsed_sentences+failed_sentences, (double) (100.0 * parsed_sentences) / (parsed_sentences + failed_sentences)); fprintf(summaryfp, "Sum(-log prob) = %g\n", sum_neglog_prob); } /* check that everything has been deallocated */ /* printf("mmm_blocks_allocated = %ld\n", (long) mmm_blocks_allocated); */ assert(mmm_blocks_allocated == 0); exit(EXIT_SUCCESS); }