Morpheme * analyse_decomposed_word(Morpho_structures ms, Decomposed_word *decomposed_word){ Sentence sent; Linkage linkage; Decomposed_word *node; Morpheme *morpheme_list=NULL; int num_linkages , i; int stem_pos; char * diagram; // ms->opts->unify_features = FALSE; for(node=decomposed_word; node!=NULL; node=node->next){ sent=sentence_create(node->word, ms->dict); if(!sent){ continue; } num_linkages = sentence_parse(sent, ms->opts); for (i=0; i<num_linkages; i++){ linkage = linkage_create(i, sent, ms->opts); if (PRINT_DIAGRAM){ linkage = linkage_create(i, sent, ms->opts); printf("%s\n", diagram = linkage_print_diagram(linkage)); string_delete(diagram); } if ((stem_pos = find_stem(linkage))>=0){ morpheme_list_add(ms, &morpheme_list,linkage->word[stem_pos], linkage->feature_array[0]); } linkage_delete(linkage); } sentence_delete(sent); } return morpheme_list; }
VALUE create_sentence(const VALUE self, volatile VALUE str, VALUE dict) { char *text = StringValuePtr(str); DictionaryPtr *dict_ptr = retrieve_dictionary(dict); Sentence sentence = sentence_create(text, dict_ptr->dict); SentencePtr *sent_ptr = ALLOC(SentencePtr); sent_ptr->sentence = NULL; sent_ptr->sentence = sentence; return Data_Wrap_Struct(self, 0, free, sent_ptr); }
std::map<int, EdgeDescription> LinkParserAdapterImpl::parseSentence(std::string sentenceStr) { Parse_Options parseOptions = parse_options_create(); int verbosity = psi_logger.getLoggingPriority() / 100 - 4; parse_options_set_verbosity(parseOptions, verbosity); starts_.clear(); ends_.clear(); edgeDescriptions_.clear(); freeSentence(); sentence_ = sentence_create(sentenceStr.c_str(), dictionary_); if (!sentence_) { std::stringstream errorSs; errorSs << "Link-parser failed to tokenize the input text."; throw ParserException(errorSs.str()); } boost::algorithm::to_lower(sentenceStr); if (sentence_parse(sentence_, parseOptions)) { size_t currentPos = 0; size_t foundPos = 0; int wordNo = 0; while (wordNo < sentence_length(sentence_)) { std::string word(sentence_get_word(sentence_, wordNo)); boost::algorithm::to_lower(word); foundPos = sentenceStr.find(word, currentPos); if (foundPos != std::string::npos) { starts_[wordNo] = foundPos; ends_[wordNo] = currentPos = foundPos + word.length(); } ++wordNo; } Linkage linkage = linkage_create(0, sentence_, parseOptions); CNode * ctree = linkage_constituent_tree(linkage); extractEdgeDescriptions(ctree, linkage); linkage_free_constituent_tree(ctree); linkage_delete(linkage); } else { std::stringstream errorSs; errorSs << "Link-parser failed to parse the input text.\n" << "Your input text is probably not a correct sentence."; WARN(errorSs.str()); } return edgeDescriptions_; }
/* * call-seq: * LinkParser::Sentence.new( str, dict ) -> sentence * * Create a new LinkParser::Sentence object from the given input string # using the specified LinkParser::Dictionary. * * dict = LinkParser::Dictionary.new * LinkParser::Sentence.new( "The boy runs", dict ) #=> #<LinkParser::Sentence:0x5481ac> */ static VALUE rlink_sentence_init( VALUE self, VALUE input_string, VALUE dictionary ) { if ( !check_sentence(self) ) { struct rlink_sentence *ptr; Sentence sent; struct rlink_dictionary *dictptr = rlink_get_dict( dictionary ); if ( !(sent = sentence_create( StringValueCStr(input_string), dictptr->dict )) ) rlink_raise_lp_error(); DATA_PTR( self ) = ptr = rlink_sentence_alloc(); ptr->sentence = sent; ptr->dictionary = dictionary; ptr->options = Qnil; } else { rb_raise( rb_eRuntimeError, "Cannot re-initialize a sentence once it's been created." ); } return self; }
int main(int argc, char * argv[]) { FILE *input_fh = stdin; Dictionary dict; const char *language="en"; /* default to english, and not locale */ int num_linkages, i; Label label = NO_LABEL; const char *codeset; const char *locale = NULL; Command_Options *copts; Parse_Options opts; bool batch_in_progress = false; #if LATER /* Try to catch the SIGWINCH ... except this is not working. */ struct sigaction winch_act; winch_act.sa_handler = winch_handler; winch_act.sa_sigaction = NULL; sigemptyset (&winch_act.sa_mask); winch_act.sa_flags = 0; sigaction (SIGWINCH, &winch_act, NULL); #endif i = 1; if ((argc > 1) && (argv[1][0] != '-')) { /* the dictionary is the first argument if it doesn't begin with "-" */ language = argv[1]; i++; } #if !defined(_MSC_VER) && !defined(__MINGW32__) /* Get the locale from the environment... * Perhaps we should someday get it from the dictionary ?? */ locale = setlocale(LC_ALL, ""); /* Check to make sure the current locale is UTF8; if its not, * then force-set this to the english utf8 locale */ codeset = nl_langinfo(CODESET); if (!strstr(codeset, "UTF") && !strstr(codeset, "utf")) { fprintf(stderr, "%s: Warning: locale %s was not UTF-8; force-setting to en_US.UTF-8\n", argv[0], codeset); locale = setlocale(LC_CTYPE, "en_US.UTF-8"); } #else #pragma message("WARNING: Windows console (cmd.exe) does not support unicode input!\nWill attempt to convert from the native encoding!"); fprintf(stderr, "%s: Warning: Windows console (cmd.exe) does not support unicode\n" "input! Will attempt to convert from the native encoding!", argv[0]); #endif for (; i<argc; i++) { if (argv[i][0] == '-' && strcmp("--version", argv[i]) == 0) { printf("Version: %s\n", linkgrammar_get_version()); exit(0); } } copts = command_options_create(); opts = copts->popts; if (copts == NULL || opts == NULL || copts->panic_opts == NULL) { fprintf(stderr, "%s: Fatal error: unable to create parse options\n", argv[0]); exit(-1); } if (language && *language) dict = dictionary_create_lang(language); else dict = dictionary_create_default_lang(); if (dict == NULL) { fprintf(stderr, "%s: Fatal error: Unable to open dictionary.\n", argv[0]); exit(-1); } setup_panic_parse_options(copts->panic_opts); copts->panic_mode = true; parse_options_set_max_parse_time(opts, 30); parse_options_set_linkage_limit(opts, 1000); parse_options_set_min_null_count(opts, 0); parse_options_set_max_null_count(opts, 0); parse_options_set_short_length(opts, 16); /* The English and Russian dicts use a cost of 2.7, which allows * regexes with a fractional cost of less than 1 to be used with * rules that have a cost of 2.0. */ parse_options_set_disjunct_cost(opts, 2.7); /* Process the command line commands */ for (i = 1; i<argc; i++) { if (argv[i][0] == '-') { int rc; if (argv[i][1] == '!' || argv[i][1] == '-') rc = issue_special_command(argv[i]+2, copts, dict); else rc = issue_special_command(argv[i]+1, copts, dict); if (rc) print_usage(argv[0]); } } check_winsize(copts); #if !defined(_MSC_VER) && !defined(__MINGW32__) prt_error("Info: Using locale %s.", locale); #endif prt_error("Info: Dictionary version %s.", linkgrammar_get_dict_version(dict)); prt_error("Info: Library version %s. Enter \"!help\" for help.", linkgrammar_get_version()); /* Main input loop */ while (1) { char *input_string; Sentence sent = NULL; verbosity = parse_options_get_verbosity(opts); debug = parse_options_get_debug(opts); test = parse_options_get_test(opts); input_string = fget_input_string(input_fh, stdout, copts); check_winsize(copts); if (NULL == input_string) { if (input_fh == stdin) break; fclose (input_fh); input_fh = stdin; continue; } if ((strcmp(input_string, "!quit") == 0) || (strcmp(input_string, "!exit") == 0)) break; /* We have to handle the !file command inline; its too hairy * otherwise ... */ if (strncmp(input_string, "!file", 5) == 0) { char * filename = &input_string[6]; input_fh = fopen(filename, "r"); if (NULL == input_fh) { int perr = errno; fprintf(stderr, "Error: %s (%d) %s\n", filename, perr, strerror(perr)); input_fh = stdin; continue; } continue; } /* If the input string is just whitespace, then ignore it. */ if (strspn(input_string, " \t\v") == strlen(input_string)) continue; if (special_command(input_string, copts, dict)) continue; if (!copts->batch_mode) batch_in_progress = false; if ('\0' != test[0]) { /* In batch mode warn only once. * In auto-next-linkage mode don't warn at all. */ if (!batch_in_progress && (NULL == strstr(test, ",auto-next-linkage,"))) { fflush(stdout); /* Remind the developer this is a test mode. */ fprintf(stderr, "Warning: Tests enabled: %s\n", test); if (copts->batch_mode) batch_in_progress = true; } } if (copts->echo_on) { printf("%s\n", input_string); } if (copts->batch_mode) { label = strip_off_label(input_string); } #ifdef USE_VITERBI /* Compile-time optional, for now, since it don't work yet. */ if (parse_options_get_use_viterbi(opts)) { viterbi_parse(input_string, dict); } else #endif { sent = sentence_create(input_string, dict); /* First parse with cost 0 or 1 and no null links */ // parse_options_set_disjunct_cost(opts, 2.7); parse_options_set_min_null_count(opts, 0); parse_options_set_max_null_count(opts, 0); parse_options_reset_resources(opts); num_linkages = sentence_parse(sent, opts); /* num_linkages is negative only on a hard-error; * typically, due to a zero-length sentence. */ if (num_linkages < 0) { sentence_delete(sent); sent = NULL; continue; } #if 0 /* Try again, this time omitting the requirement for * definite articles, etc. This should allow for the parsing * of newspaper headlines and other clipped speech. * * XXX Unfortunately, this also allows for the parsing of * all sorts of ungrammatical sentences which should not * parse, and leads to bad parses of many other unparsable * but otherwise grammatical sentences. Thus, this trick * pretty much fails; we leave it here to document the * experiment. */ if (num_linkages == 0) { parse_options_set_disjunct_cost(opts, 4.5); num_linkages = sentence_parse(sent, opts); if (num_linkages < 0) continue; } #endif /* Try using a larger list of disjuncts */ /* XXX fixme: the lg_expand_disjunct_list() routine is not * currently a part of the public API; it should be made so, * or this expansion idea should be abandoned... not sure which. */ if ((num_linkages == 0) && parse_options_get_use_cluster_disjuncts(opts)) { int expanded; if (verbosity > 0) fprintf(stdout, "No standard linkages, expanding disjunct set.\n"); parse_options_set_disjunct_cost(opts, 3.9); expanded = lg_expand_disjunct_list(sent); if (expanded) { num_linkages = sentence_parse(sent, opts); } if (0 < num_linkages) printf("Got One !!!!!!!!!!!!!!!!!\n"); } /* If asked to show bad linkages, then show them. */ if ((num_linkages == 0) && (!copts->batch_mode)) { if (copts->display_bad) { num_linkages = sentence_num_linkages_found(sent); } } /* Now parse with null links */ if (num_linkages == 0 && !copts->batch_mode) { if (verbosity > 0) fprintf(stdout, "No complete linkages found.\n"); if (copts->allow_null) { /* XXX should use expanded disjunct list here too */ parse_options_set_min_null_count(opts, 1); parse_options_set_max_null_count(opts, sentence_length(sent)); num_linkages = sentence_parse(sent, opts); } } if (verbosity > 0) { if (parse_options_timer_expired(opts)) fprintf(stdout, "Timer is expired!\n"); if (parse_options_memory_exhausted(opts)) fprintf(stdout, "Memory is exhausted!\n"); } if ((num_linkages == 0) && copts->panic_mode && parse_options_resources_exhausted(opts)) { /* print_total_time(opts); */ batch_errors++; if (verbosity > 0) fprintf(stdout, "Entering \"panic\" mode...\n"); parse_options_reset_resources(copts->panic_opts); parse_options_set_verbosity(copts->panic_opts, verbosity); num_linkages = sentence_parse(sent, copts->panic_opts); if (verbosity > 0) { if (parse_options_timer_expired(copts->panic_opts)) fprintf(stdout, "Panic timer is expired!\n"); } } /* print_total_time(opts); */ if (copts->batch_mode) { batch_process_some_linkages(label, sent, copts); } else { int c = process_some_linkages(sent, copts); if (c == EOF) { sentence_delete(sent); sent = NULL; break; } } fflush(stdout); sentence_delete(sent); sent = NULL; } } if (copts->batch_mode) { /* print_time(opts, "Total"); */ fprintf(stderr, "%d error%s.\n", batch_errors, (batch_errors==1) ? "" : "s"); } /* Free stuff, so that mem-leak detectors don't commplain. */ command_options_delete(copts); dictionary_delete(dict); fget_input_string(NULL, NULL, NULL); printf ("Bye.\n"); return 0; }
int main(int argc, char * argv[]) { FILE *input_fh = stdin; Dictionary dict; Sentence sent; const char *language="en"; /* default to english, and not locale */ int pp_on=TRUE; int af_on=TRUE; int cons_on=TRUE; int num_linkages, i; char *input_string; Label label = NO_LABEL; const char *codeset; /*############################################################################################*/ int num=0; FILE *fp2,*fp_word_info,*fp_num,*fp_lname,*fp_rel,*fp_lcount,*fp_word_cat,*fp_word; /*##############################################################################################*/ i = 1; /*############################################################################################*/ if (argc > 4){ num =atoi(argv[4]); num=num-1; /// language = argv[1]; /*####################################################################################*/ // // if ((argc > 1) && (argv[1][0] != '-')) { /* the dictionary is the first argument if it doesn't begin with "-" */ // language = argv[1]; i++; } /*################################*/ language = argv[1]; /*################################*/ /* Get the locale from the environment... * perhaps we should someday get it from the dictionary ?? */ setlocale(LC_ALL, ""); /* Check to make sure the current locale is UTF8; if its not, * then force-set this to the english utf8 locale */ codeset = nl_langinfo(CODESET); if (!strstr(codeset, "UTF") && !strstr(codeset, "utf")) { setlocale(LC_CTYPE, "en_US.UTF-8"); } /*########################################*/ for (; i<argc-3; i++) { /*########################################*/ if (argv[i][0] == '-') { if (strcmp("--version", argv[i])==0) { printf("Version: %s\n", linkgrammar_get_version()); exit(0); } else if (strcmp("-ppoff", argv[i])==0) { pp_on = FALSE; } else if (strcmp("-coff", argv[i])==0) { cons_on = FALSE; } else if (strcmp("-aoff", argv[i])==0) { af_on = FALSE; } else if (strcmp("-batch", argv[i])==0) { } else if (strncmp("-!", argv[i],2)==0) { } else { print_usage(argv[0]); } } else { print_usage(argv[0]); } } opts = parse_options_create(); if (opts == NULL) { fprintf(stderr, "%s: Fatal error: unable to create parse options\n", argv[0]); exit(-1); } panic_parse_opts = parse_options_create(); if (panic_parse_opts == NULL) { fprintf(stderr, "%s: Fatal error: unable to create panic parse options\n", argv[0]); exit(-1); } setup_panic_parse_options(panic_parse_opts); parse_options_set_max_sentence_length(opts, 170); parse_options_set_panic_mode(opts, TRUE); parse_options_set_max_parse_time(opts, 30); parse_options_set_linkage_limit(opts, 1000); parse_options_set_short_length(opts, 10); /*##########################################*/ // parse_options_set_display_on(opts, TRUE); /*##########################################*/ if(language && *language) dict = dictionary_create_lang(language); else dict = dictionary_create_default_lang(); if (dict == NULL) { /*###########################################################################*/ fprintf(stderr, "%s: Fatal error: Unable to open dictionary.\n", argv[1]); /*##########################################################################*/ exit(-1); } /* process the command line like commands */ for (i=1; i<argc; i++) { if ((strcmp("-pp", argv[i])==0) || (strcmp("-c", argv[i])==0) || (strcmp("-a", argv[i])==0)) { i++; } else if ((argv[i][0] == '-') && (strcmp("-ppoff", argv[i])!=0) && (argv[i][0] == '-') && (strcmp("-coff", argv[i])!=0) && (argv[i][0] == '-') && (strcmp("-aoff", argv[i])!=0)) { if (argv[i][1] == '!') issue_special_command(argv[i]+2, opts, dict); else issue_special_command(argv[i]+1, opts, dict); } } verbosity = parse_options_get_verbosity(opts); /* Main input loop */ while (1) { input_string = fget_input_string(input_fh, stdout, opts); if (NULL == input_string) { if (input_fh == stdin) break; fclose (input_fh); input_fh = stdin; continue; } if ((strcmp(input_string, "quit\n")==0) || (strcmp(input_string, "exit\n")==0)) break; /* We have to handle the !file command inline; its too hairy * otherwise ... */ if (strncmp(input_string, "!file", 5) == 0) { char * filename = &input_string[6]; input_fh = fopen(filename, "r"); if (NULL == input_fh) { int perr = errno; fprintf(stderr, "Error: %s (%d) %s\n", filename, perr, strerror(perr)); input_fh = stdin; continue; } continue; } if (special_command(input_string, dict)) continue; if (parse_options_get_echo_on(opts)) { printf("%s", input_string); } if (parse_options_get_batch_mode(opts)) { label = strip_off_label(input_string); } sent = sentence_create(input_string, dict); if (sent == NULL) continue; if (sentence_length(sent) > parse_options_get_max_sentence_length(opts)) { if (verbosity > 0) { fprintf(stdout, "Sentence length (%d words) exceeds maximum allowable (%d words)\n", sentence_length(sent), parse_options_get_max_sentence_length(opts)); } sentence_delete(sent); continue; } /* First parse with cost 0 or 1 and no null links */ parse_options_set_disjunct_cost(opts, 2); parse_options_set_min_null_count(opts, 0); parse_options_set_max_null_count(opts, 0); parse_options_reset_resources(opts); num_linkages = sentence_parse(sent, opts); //############################################################################################ if(num+1>num_linkages && num_linkages != 0) { sprintf(link_info_filename,"%s/%s_tmp/linkid_cat.txt",argv[2],argv[3]); fp2 = fopen(link_info_filename,"a"); if(fp2==NULL) {printf("Could not open %s for writing\n",link_info_filename);exit(1);} fprintf(fp2,";~~~~~~~~~~\n"); fclose(fp2); sprintf(link_info_filename,"%s/%s_tmp/linkid_word.txt",argv[2],argv[3]); fp_word= fopen(link_info_filename, "a"); if(fp_word==NULL) {printf("Could not open %s for writing\n",link_info_filename);exit(1);} fprintf(fp_word,";~~~~~~~~~~\n"); fclose(fp_word); sprintf(link_info_filename,"%s/%s_tmp/link_numeric_word.txt",argv[2],argv[3]); fp_word_info= fopen(link_info_filename, "a"); if(fp_word_info==NULL) {printf("Could not open %s for writing\n",link_info_filename);exit(1);} fprintf(fp_word_info,";~~~~~~~~~~\n"); fclose(fp_word_info); sprintf(link_info_filename,"%s/%s_tmp/link_name_expand.txt",argv[2],argv[3]); fp_lname =fopen(link_info_filename,"a"); if(fp_lname==NULL) {printf("Could not open %s for writing\n",link_info_filename);exit(1);} fprintf(fp_lname,";~~~~~~~~~~\n"); fclose(fp_lname); sprintf(link_info_filename,"%s/%s_tmp/link_relation_info.txt",argv[2],argv[3]); fp_rel = fopen(link_info_filename,"a"); if(fp_rel==NULL) {printf("Could not open %s for writing\n",link_info_filename);exit(1);} fprintf(fp_rel,";~~~~~~~~~~\n"); fclose(fp_rel); /* sprintf(link_info_filename,"%s/%s_tmp/constituents.txt",argv[2],argv[3]); fp = fopen(link_info_filename,"a"); if(fp==NULL) {printf("Could not open %s for writing\n",link_info_filename);exit(1);} fprintf(fp,"\n"); fprintf(fp,";~~~~~~~~~~\n"); fclose(fp);*/ } sprintf(link_info_filename,"%s/%s_tmp/linkage_count.txt",argv[2],argv[3]); fp_lcount = fopen(link_info_filename,"a"); if(fp_lcount==NULL) {printf("Could not open %s for writing\n",link_info_filename);exit(1);} fprintf(fp_lcount, "(Found maximum of %d linkages )\n",num_linkages); fprintf(fp_lcount,";~~~~~~~~~~\n"); fclose(fp_lcount); //############################################################################################ if (num_linkages < 0) continue; /* Now parse with null links */ if ((num_linkages == 0) && (!parse_options_get_batch_mode(opts))) { if (verbosity > 0) //fprintf(stdout, "No complete linkages found.\n"); //############################################################################################ /* This part of the code written by Maha Laxmi and Shirisha Manju * if no complete linkage is found then redirect the output to standard output in clips format*/ sprintf(link_info_filename,"%s/%s_tmp/number.txt",argv[2],argv[3]); fp_num =fopen(link_info_filename,"a"); if(fp_num==NULL) {printf("Could not open %s for writing\n",link_info_filename);exit(1);} /* sprintf(link_info_filename,"%s/%s_tmp/constituents.txt",argv[2],argv[3]); fp_cons= fopen(link_info_filename,"a"); if(fp_cons==NULL) {printf("Could not open %s for writing\n",link_info_filename);exit(1);}*/ sprintf(link_info_filename,"%s/%s_tmp/linkid_word.txt",argv[2],argv[3]); fp_word= fopen(link_info_filename, "a"); if(fp_word==NULL) {printf("Could not open %s for writing\n",link_info_filename);exit(1);} sprintf(link_info_filename,"%s/%s_tmp/link_numeric_word.txt",argv[2],argv[3]); fp_word_info= fopen(link_info_filename, "a"); if(fp_word_info==NULL) {printf("Could not open %s for writing\n",link_info_filename);exit(1);} sprintf(link_info_filename,"%s/%s_tmp/linkid_cat.txt",argv[2],argv[3]); fp_word_cat= fopen(link_info_filename, "a"); if(fp_word_cat==NULL) {printf("Could not open %s for writing\n",link_info_filename);exit(1);} sprintf(link_info_filename,"%s/%s_tmp/link_name_expand.txt",argv[2],argv[3]); fp_lname =fopen(link_info_filename,"a"); if(fp_lname==NULL) {printf("Could not open %s for writing\n",link_info_filename);exit(1);} sprintf(link_info_filename,"%s/%s_tmp/link_relation_info.txt",argv[2],argv[3]); fp_rel = fopen(link_info_filename,"a"); if(fp_rel==NULL) {printf("Could not open %s for writing\n",link_info_filename);exit(1);} fprintf(fp_word, "\n(No complete linkages found)\n"); fprintf(fp_word_cat, "\n(No complete linkages found)\n"); fprintf(fp_word_info, "\n(No complete linkages found)\n"); fprintf(fp_lname, "\n(No complete linkages found)\n"); fprintf(fp_rel,"\n(No complete linkages found)\n"); fprintf(fp_word_cat,";~~~~~~~~~~\n"); // fprintf(fp_cons,";~~~~~~~~~~\n"); fprintf(fp_lname,";~~~~~~~~~~\n"); fprintf(fp_rel,";~~~~~~~~~~\n"); fprintf(fp_word_info,";~~~~~~~~~~\n"); fprintf(fp_word,";~~~~~~~~~~\n"); /* fclose(fp_cons); */ fclose(fp_word_cat); fclose(fp_word_info); fclose(fp_lname); fclose(fp_rel); fclose(fp_word); continue; /*###############################################################################################################*/ if (parse_options_get_allow_null(opts)) { parse_options_set_min_null_count(opts, 1); parse_options_set_max_null_count(opts, sentence_length(sent)); num_linkages = sentence_parse(sent, opts); } } if (parse_options_timer_expired(opts)) { if (verbosity > 0) fprintf(stdout, "Timer is expired!\n"); } if (parse_options_memory_exhausted(opts)) { if (verbosity > 0) fprintf(stdout, "Memory is exhausted!\n"); } if ((num_linkages == 0) && parse_options_resources_exhausted(opts) && parse_options_get_panic_mode(opts)) { /* print_total_time(opts); */ if (verbosity > 0) fprintf(stdout, "Entering \"panic\" mode...\n"); parse_options_reset_resources(panic_parse_opts); parse_options_set_verbosity(panic_parse_opts, verbosity); num_linkages = sentence_parse(sent, panic_parse_opts); if (parse_options_timer_expired(panic_parse_opts)) { if (verbosity > 0) fprintf(stdout, "Timer is expired!\n"); } } /* print_total_time(opts); */ if (parse_options_get_batch_mode(opts)) { //####################################################################### batch_process_some_linkages(label, sent, opts,argv[2],argv[3]); //######################################################################### // batch_process_some_linkages(label, sent, opts); } /*###############################################################################*/ // send num as an argument to process_some_linkages else { int c = process_some_linkages(sent, opts,num,argv[2],argv[3]); if (c == EOF) break; } /*################################################################################*/ /* else { int c = process_some_linkages(sent, opts); if (c == EOF) break; }*/ sentence_delete(sent); } if (parse_options_get_batch_mode(opts)) { /* print_time(opts, "Total"); */ /* fprintf(stderr, "%d error%s.\n", batch_errors, (batch_errors==1) ? "" : "s");*/ } parse_options_delete(panic_parse_opts); parse_options_delete(opts); dictionary_delete(dict); // printf ("Bye.\n"); return 0; }
/******************************************************************************************* * This functions translats a list of sentences in the input_string array, * terminates when it finds "end" in the array * Inputs: input_string: Array of sentences * all_linkages: if true, translate all possible linkages of the sentence * out_to_file : if true, outputs the results to seperate files in the out directory, * the out files names would be something like : src-x-y: Contains a linkage for the source sentence, x is the index of sentence in the array and y is the index of linkage trg-x-y-z: Contains a linklage for the target sentence, x is the index of sentence in the array, y is the index of linkage and z is the zth translation of that linkage x-y(a linkage in the source language may have more than one correspondent target * linkage) *******************************************************************************************/ void translate(char input_string[][200], int all_linkages, int out_to_file){ Dictionary dict; Parse_Options opts; Sentence sent; Linkage src_linkage, trg_linkage; Transfer trans; char * diagram; FILE * fp; int i, j, num_src_linkages, num_trg_linkages; //char filename[30], txfilename[30], filenum[4]; char txfilename[30]; char output_string[200]; int n; opts = parse_options_create(); parse_options_set_verbosity (opts, FALSE); parse_options_set_display_walls(opts, TRUE); parse_options_set_display_postscript(opts, TRUE); dict = dictionary_create("4.0.dict", "4.0.knowledge", NULL, NULL, "morphology/morphemes.dict"); if (!dict){ fprintf(stderr, "%s\n", lperrmsg); parse_options_delete(opts); printf("size : %lld", space_in_use); exit(0); } trans=transfer_create("translation/mapfile.txt", "translation/links_list.txt","translation/lexicon.txt", "target/trg.dict" ); if (!trans){ fprintf(stderr, "%s\n", maperrmsg); fprintf(stderr, "%s\n", lperrmsg); printf("size : %lld", space_in_use); exit(0); } for (n=0; strcmp (input_string[n],"end")!=0; n++ ){ sent = sentence_create(input_string[n], dict); if (!sent){ fprintf(stderr, "%s\n", lperrmsg); parse_options_delete(opts); printf("size : %lld", space_in_use); exit(0); } printf ("\n************************************\n%d-%s\n************************************\n", n+1, input_string[n]); num_src_linkages = sentence_parse(sent, opts); num_src_linkages = (all_linkages) ? num_src_linkages: (num_src_linkages!=0)*1; for (i=0; i<num_src_linkages;i++) { src_linkage = linkage_create(i, sent, opts); diagram = linkage_print_diagram(src_linkage); printf ("\nLinkage No. %d-%d\n\n", n+1, i+1); printf("%s\n", diagram); if (out_to_file){ sprintf(txfilename,"out/src-%d-%d.txt", n+1, i+1); fp=fopen(txfilename,"w+"); if (fp==NULL){ fprintf(stderr, "%s%s\n", "Unable to open ", txfilename); printf("size : %lld", space_in_use); exit(0); } fprintf(fp,"%s",diagram); fclose(fp); } string_delete(diagram); num_trg_linkages = transfer_linkage_driver(trans, src_linkage); if(num_trg_linkages==0){ fprintf(stderr, "%s\n", maperrmsg); } else{ parse_options_set_display_walls (trans->opts, TRUE); for (j=0; j<num_trg_linkages; j++){ //second parameter should always be 0 !? trg_linkage=trans_linkage_create(trans, 0, trans->sent[j], trans->opts); diagram = linkage_print_diagram(trg_linkage); printf ("\nTranslation No. %d_%d_%d\n\n", n+1,i+1,j+1); printf("Translation: %s\n", output_string); printf("%s\n", diagram); if (out_to_file){ sprintf(txfilename,"out/trg-%d-%d-%d.txt", n+1, i+1, j+1); fp=fopen(txfilename,"w+"); if (fp==NULL){ fprintf(stderr, "%s%s\n", "Unable to open ", txfilename); printf("size : %lld", space_in_use); exit(0); } extract_sent (trg_linkage, output_string); fprintf(fp, "Translation:%s\n", output_string); fprintf(fp,"%s",diagram); fclose(fp); } string_delete(diagram); linkage_delete(trg_linkage); } } linkage_delete(src_linkage); } fprintf(stderr, "%s\n", lperrmsg); sentence_delete(sent); } transfer_delete(trans); dictionary_delete(dict); parse_options_delete(opts); }
/******************************************************************************************* * This functions parses a list of sentences and outputs the results to seperate files in * the out directory, * The files are indexed as out_x_y, where x is the index of the sentence in the array and y * is the index of the linkage *******************************************************************************************/ void normal_parse(char input_string[][200], int unify_features, int all_linkages, int out_to_file){ Dictionary dict; Parse_Options opts; int n; Sentence sent; Linkage linkage; char * diagram; int i, num_linkages; char txfilename[30]; FILE * fp; opts = parse_options_create(); parse_options_set_display_walls(opts, TRUE); parse_options_set_display_postscript(opts, TRUE); parse_options_set_unify_features(opts, unify_features); dict = dictionary_create("4.0.dict", "4.0.knowledge", NULL, NULL, "morphology/morphemes.dict"); if (!dict){ fprintf(stderr, "%s\n", lperrmsg); parse_options_delete(opts); printf("size : %lld", space_in_use); exit(0); } //setting opts->unify_features to TRUE enables unification for (n=0; strcmp (input_string[n],"end")!=0; n++ ){ sent = sentence_create(input_string[n], dict); if (!sent){ fprintf(stderr, "%s\n", lperrmsg); dictionary_delete(dict); parse_options_delete(opts); exit(0); } printf ("\n************************************\n%d-%s\n************************************\n", n+1, input_string[n]); num_linkages = sentence_parse(sent, opts); num_linkages = (all_linkages) ? num_linkages : (num_linkages!=0)*1 ; //Echos all linkages to screen, ps file and text file in ./out directory for (i=0; i<num_linkages; ++i) { linkage = linkage_create(i , sent, opts); diagram = linkage_print_diagram(linkage); printf ("\n Linkage No. %d-%d\n\n", n+1, i+1); printf("%s\n", diagram); if (out_to_file){ sprintf(txfilename,"out/src-%d-%d.txt", n+1, i+1); fp=fopen(txfilename,"w+"); if (fp==NULL){ fprintf(stderr, "%s%s\n", "Unable to open ", txfilename); printf("size : %lld", space_in_use); exit(0); } fprintf(fp,"%s",diagram); fclose(fp); } string_delete(diagram); linkage_delete(linkage); } sentence_delete(sent); } dictionary_delete(dict); parse_options_delete(opts); }
int main(int argc, char * argv[]) { Dictionary dict; Sentence sent; char *dictionary_file=NULL; char *post_process_knowledge_file=NULL; char *constituent_knowledge_file=NULL; char *affix_file=NULL; int pp_on=TRUE; int af_on=TRUE; int cons_on=TRUE; int num_linkages, i; char input_string[MAXINPUT]; Label label = NO_LABEL; int parsing_space_leaked, reported_leak, dictionary_and_option_space; i = 1; if ((argc > 1) && (argv[1][0] != '-')) { /* the dictionary is the first argument if it doesn't begin with "-" */ dictionary_file = argv[1]; i++; } for (; i<argc; i++) { if (argv[i][0] == '-') { if (strcmp("-pp", argv[i])==0) { if ((post_process_knowledge_file != NULL) || (i+1 == argc)) print_usage(argv[0]); post_process_knowledge_file = argv[i+1]; i++; } else if (strcmp("-c", argv[i])==0) { if ((constituent_knowledge_file != NULL) || (i+1 == argc)) print_usage(argv[0]); constituent_knowledge_file = argv[i+1]; i++; } else if (strcmp("-a", argv[i])==0) { if ((affix_file != NULL) || (i+1 == argc)) print_usage(argv[0]); affix_file = argv[i+1]; i++; } else if (strcmp("-ppoff", argv[i])==0) { pp_on = FALSE; } else if (strcmp("-coff", argv[i])==0) { cons_on = FALSE; } else if (strcmp("-aoff", argv[i])==0) { af_on = FALSE; } else if (strcmp("-batch", argv[i])==0) { } else if (strncmp("-!", argv[i],2)==0) { } else { print_usage(argv[0]); } } else { print_usage(argv[0]); } } if (!pp_on && post_process_knowledge_file != NULL) print_usage(argv[0]); if (dictionary_file == NULL) { dictionary_file = "4.0.dict"; fprintf(stderr, "No dictionary file specified. Using %s.\n", dictionary_file); } if (af_on && affix_file == NULL) { affix_file = "4.0.affix"; fprintf(stderr, "No affix file specified. Using %s.\n", affix_file); } if (pp_on && post_process_knowledge_file == NULL) { post_process_knowledge_file = "4.0.knowledge"; fprintf(stderr, "No post process knowledge file specified. Using %s.\n", post_process_knowledge_file); } if (cons_on && constituent_knowledge_file == NULL) { constituent_knowledge_file = "4.0.constituent-knowledge"; fprintf(stderr, "No constituent knowledge file specified. Using %s.\n", constituent_knowledge_file); } opts = parse_options_create(); if (opts == NULL) { fprintf(stderr, "%s\n", lperrmsg); exit(-1); } panic_parse_opts = parse_options_create(); if (panic_parse_opts == NULL) { fprintf(stderr, "%s\n", lperrmsg); exit(-1); } setup_panic_parse_options(panic_parse_opts); parse_options_set_max_sentence_length(opts, 70); parse_options_set_panic_mode(opts, TRUE); parse_options_set_max_parse_time(opts, 30); parse_options_set_linkage_limit(opts, 1000); parse_options_set_short_length(opts, 10); dict = dictionary_create(dictionary_file, post_process_knowledge_file, constituent_knowledge_file, affix_file); if (dict == NULL) { fprintf(stderr, "%s\n", lperrmsg); exit(-1); } /* process the command line like commands */ for (i=1; i<argc; i++) { if ((strcmp("-pp", argv[i])==0) || (strcmp("-c", argv[i])==0) || (strcmp("-a", argv[i])==0)) { i++; } else if ((argv[i][0] == '-') && (strcmp("-ppoff", argv[i])!=0) && (argv[i][0] == '-') && (strcmp("-coff", argv[i])!=0) && (argv[i][0] == '-') && (strcmp("-aoff", argv[i])!=0)) { issue_special_command(argv[i]+1, opts, dict); } } dictionary_and_option_space = space_in_use; reported_leak = external_space_in_use = 0; verbosity = parse_options_get_verbosity(opts); while (fget_input_string(input_string, stdin, stdout, opts)) { if (space_in_use != dictionary_and_option_space + reported_leak) { fprintf(stderr, "Warning: %d bytes of space leaked.\n", space_in_use-dictionary_and_option_space-reported_leak); reported_leak = space_in_use - dictionary_and_option_space; } if ((strcmp(input_string, "quit\n")==0) || (strcmp(input_string, "exit\n")==0)) break; if (special_command(input_string, dict)) continue; if (parse_options_get_echo_on(opts)) { printf("%s", input_string); } if (parse_options_get_batch_mode(opts)) { label = strip_off_label(input_string); } sent = sentence_create(input_string, dict); if (sent == NULL) { if (verbosity > 0) fprintf(stderr, "%s\n", lperrmsg); if (lperrno != NOTINDICT) exit(-1); else continue; } if (sentence_length(sent) > parse_options_get_max_sentence_length(opts)) { sentence_delete(sent); if (verbosity > 0) { fprintf(stdout, "Sentence length (%d words) exceeds maximum allowable (%d words)\n", sentence_length(sent), parse_options_get_max_sentence_length(opts)); } continue; } /* First parse with cost 0 or 1 and no null links */ parse_options_set_disjunct_cost(opts, 2); parse_options_set_min_null_count(opts, 0); parse_options_set_max_null_count(opts, 0); parse_options_reset_resources(opts); num_linkages = sentence_parse(sent, opts); /* Now parse with null links */ if ((num_linkages == 0) && (!parse_options_get_batch_mode(opts))) { if (verbosity > 0) fprintf(stdout, "No complete linkages found.\n"); if (parse_options_get_allow_null(opts)) { parse_options_set_min_null_count(opts, 1); parse_options_set_max_null_count(opts, sentence_length(sent)); num_linkages = sentence_parse(sent, opts); } } if (parse_options_timer_expired(opts)) { if (verbosity > 0) fprintf(stdout, "Timer is expired!\n"); } if (parse_options_memory_exhausted(opts)) { if (verbosity > 0) fprintf(stdout, "Memory is exhausted!\n"); } if ((num_linkages == 0) && parse_options_resources_exhausted(opts) && parse_options_get_panic_mode(opts)) { print_total_time(opts); if (verbosity > 0) fprintf(stdout, "Entering \"panic\" mode...\n"); parse_options_reset_resources(panic_parse_opts); parse_options_set_verbosity(panic_parse_opts, verbosity); num_linkages = sentence_parse(sent, panic_parse_opts); if (parse_options_timer_expired(panic_parse_opts)) { if (verbosity > 0) fprintf(stdout, "Timer is expired!\n"); } } print_total_time(opts); if (parse_options_get_batch_mode(opts)) { batch_process_some_linkages(label, sent, opts); } else { process_some_linkages(sent, opts); } sentence_delete(sent); if (external_space_in_use != 0) { fprintf(stderr, "Warning: %d bytes of external space leaked.\n", external_space_in_use); } } if (parse_options_get_batch_mode(opts)) { print_time(opts, "Total"); fprintf(stderr, "%d error%s.\n", batch_errors, (batch_errors==1) ? "" : "s"); } parsing_space_leaked = space_in_use - dictionary_and_option_space; if (parsing_space_leaked != 0) { fprintf(stderr, "Warning: %d bytes of space leaked during parsing.\n", parsing_space_leaked); } parse_options_delete(panic_parse_opts); parse_options_delete(opts); dictionary_delete(dict); if (space_in_use != parsing_space_leaked) { fprintf(stderr, "Warning: %d bytes of dictionary and option space leaked.\n", space_in_use - parsing_space_leaked); } else if (parsing_space_leaked == 0) { fprintf(stderr, "Good news: no space leaked.\n"); } if (external_space_in_use != 0) { fprintf(stderr, "Warning: %d bytes of external space leaked.\n", external_space_in_use); } return 0; }
static PyObject *domains(PyObject *self, PyObject *args) { Dictionary dict; Parse_Options opts; Sentence sent; Linkage linkage; //CNode * cn; /// Link counts int num_linkages; int links; int i; int j = 0; int num_domains; const char *text; PyObject *output_list; PyObject *temp; output_list = PyList_New(0); if (!PyArg_ParseTuple(args, "s", &text)) return NULL; opts = parse_options_create(); parse_options_set_verbosity(opts, -1); setlocale(LC_ALL, ""); dict = dictionary_create_default_lang(); if (!dict) { PyErr_SetString(PyExc_RuntimeError, "Fatal error: Unable to open the dictionary"); Py_INCREF(Py_None); return Py_None; } sent = sentence_create(text, dict); sentence_split(sent, opts); num_linkages = sentence_parse(sent, opts); if (num_linkages > 0) { linkage = linkage_create(0, sent, opts); links = linkage_get_num_sublinkages(linkage); for(i=0; i<=links; i++) { num_domains = linkage_get_link_num_domains(linkage, i); const char **temp1 = linkage_get_link_domain_names(linkage, i); //for(j=0; j<=num_domains; j++){ while(num_domains < j) { temp = PyString_FromString(temp1[j]); PyList_Append(output_list, temp); j++; } j = 0; } linkage_delete(linkage); } else { sentence_delete(sent); dictionary_delete(dict); parse_options_delete(opts); Py_INCREF(Py_None); return Py_None; } sentence_delete(sent); dictionary_delete(dict); parse_options_delete(opts); return Py_BuildValue("Si", output_list, num_domains); }
/// This is the basic sentence dissection static PyObject *sentence(PyObject *self, PyObject *args) { Dictionary dict; Parse_Options opts; Sentence sent; Linkage linkage; Linkage sub_linkage; char * diagram; /// Link counts int num_linkages; int links; /// Index's for the iterators int link_idx; int word_idx; int num_words; long span; long sub_linkages; const char *text; const char *d_output; PyObject *output_list; PyObject *word_list; PyObject *word2_list; PyObject *span_list; PyObject *temp; PyObject *sublinkage_list; PyObject *_diagram; output_list = PyList_New(0); word_list = PyList_New(0); word2_list = PyList_New(0); sublinkage_list = PyList_New(0); span_list = PyList_New(0); if (!PyArg_ParseTuple(args, "s", &text)) return NULL; opts = parse_options_create(); parse_options_set_verbosity(opts, -1); parse_options_set_screen_width(opts, 50); setlocale(LC_ALL, ""); dict = dictionary_create_default_lang(); if (!dict) { PyErr_SetString(PyExc_RuntimeError, "Fatal error: Unable to open the dictionary"); Py_INCREF(Py_None); return Py_None; } sent = sentence_create(text, dict); sentence_split(sent, opts); num_linkages = sentence_parse(sent, opts); if (num_linkages > 0) { linkage = linkage_create(0, sent, opts); /// Get the lengths of everything num_words = linkage_get_num_words(linkage); links = linkage_get_num_links(linkage); for(link_idx=0; link_idx < links; link_idx++) { PyObject *temp_subLen; diagram = linkage_print_diagram(linkage); _diagram = PyString_FromString(diagram); sub_linkage = linkage_create(link_idx, sent, opts); sub_linkages = linkage_get_num_sublinkages(linkage); temp_subLen = PyLong_FromLong(sub_linkages); linkage_delete(sub_linkage); PyList_Append(sublinkage_list, temp_subLen); span = linkage_get_link_length(linkage, link_idx); PyList_Append(span_list, PyInt_FromLong(span)); PyObject *temp_list; temp_list = PyList_New(0); /// Sub Group these (left and right labels) const char *t1 = linkage_get_link_llabel(linkage, link_idx); temp = PyString_FromString(t1); PyList_Append(temp_list, temp); const char *t2 = linkage_get_link_rlabel(linkage, link_idx); temp = PyString_FromString(t2); PyList_Append(temp_list, temp); /// Then add to the main list PyList_Append(output_list, temp_list); /// Just the label const char *t3 = linkage_get_link_label(linkage, link_idx); temp = PyString_FromString(t3); PyList_Append(word2_list, temp); } for(word_idx=0; word_idx < num_words; word_idx++) { d_output = linkage_get_word(linkage, word_idx); PyObject *word; word = PyString_FromString(d_output); PyList_Append(word_list, word); } linkage_free_diagram(diagram); linkage_delete(linkage); } else { sentence_delete(sent); dictionary_delete(dict); parse_options_delete(opts); Py_INCREF(Py_None); return Py_None; } sentence_delete(sent); dictionary_delete(dict); parse_options_delete(opts); return Py_BuildValue("SSSSSS", word_list, span_list, output_list, word2_list, sublinkage_list, _diagram); }
int main(int argc, char * argv[]) { FILE *input_fh = stdin; Dictionary dict; const char *language = NULL; int num_linkages; Label label = NO_LABEL; Command_Options *copts; Parse_Options opts; bool batch_in_progress = false; isatty_stdin = isatty(fileno(stdin)); isatty_stdout = isatty(fileno(stdout)); #ifdef _WIN32 /* If compiled with MSVC/MinGW, we still support running under Cygwin. * This is done by checking running_under_cygwin to resolve * incompatibilities. */ const char *ostype = getenv("OSTYPE"); if ((NULL != ostype) && (0 == strcmp(ostype, "cygwin"))) running_under_cygwin = true; /* argv encoding is in the current locale. */ argv = argv2utf8(argc); if (NULL == argv) { prt_error("Fatal error: Unable to parse command line\n"); exit(-1); } #ifdef _MSC_VER _set_printf_count_output(1); /* enable %n support for display_1line_help()*/ #endif /* _MSC_VER */ win32_set_utf8_output(); #endif /* _WIN32 */ #if LATER /* Try to catch the SIGWINCH ... except this is not working. */ struct sigaction winch_act; winch_act.sa_handler = winch_handler; winch_act.sa_sigaction = NULL; sigemptyset (&winch_act.sa_mask); winch_act.sa_flags = 0; sigaction (SIGWINCH, &winch_act, NULL); #endif copts = command_options_create(); if (copts == NULL || copts->panic_opts == NULL) { prt_error("Fatal error: unable to create parse options\n"); exit(-1); } opts = copts->popts; setup_panic_parse_options(copts->panic_opts); copts->panic_mode = true; parse_options_set_max_parse_time(opts, 30); parse_options_set_linkage_limit(opts, 1000); parse_options_set_min_null_count(opts, 0); parse_options_set_max_null_count(opts, 0); parse_options_set_short_length(opts, 16); parse_options_set_islands_ok(opts, false); parse_options_set_display_morphology(opts, false); save_default_opts(copts); /* Options so far are the defaults */ if ((argc > 1) && (argv[1][0] != '-')) { /* The dictionary is the first argument if it doesn't begin with "-" */ language = argv[1]; } for (int i = 1; i < argc; i++) { if (strcmp("--help", argv[i]) == 0) { print_usage(stdout, argv[0], copts, 0); } if (strcmp("--version", argv[i]) == 0) { printf("Version: %s\n", linkgrammar_get_version()); printf("%s\n", linkgrammar_get_configuration()); exit(0); } } /* Process command line variable-setting commands (only). */ for (int i = 1; i < argc; i++) { if (argv[i][0] == '-') { const char *var = argv[i] + ((argv[i][1] != '-') ? 1 : 2); if ((var[0] != '!') && (0 > issue_special_command(var, copts, NULL))) print_usage(stderr, argv[0], copts, -1); } else if (i != 1) { prt_error("Fatal error: Unknown argument '%s'.\n", argv[i]); print_usage(stderr, argv[0], copts, -1); } } if (language && *language) { dict = dictionary_create_lang(language); if (dict == NULL) { prt_error("Fatal error: Unable to open dictionary.\n"); exit(-1); } } else { dict = dictionary_create_default_lang(); if (dict == NULL) { prt_error("Fatal error: Unable to open default dictionary.\n"); exit(-1); } } /* Process the command line '!' commands */ for (int i = 1; i < argc; i++) { if ((argv[i][0] == '-') && (argv[i][1] == '!')) { if (0 > issue_special_command(argv[i]+1, copts, dict)) print_usage(stderr, argv[0], copts, -1); } } check_winsize(copts); prt_error("Info: Dictionary version %s, locale %s\n", linkgrammar_get_dict_version(dict), linkgrammar_get_dict_locale(dict)); prt_error("Info: Library version %s. Enter \"!help\" for help.\n", linkgrammar_get_version()); /* Main input loop */ while (true) { char *input_string; Sentence sent = NULL; /* Make sure stderr is shown even when MSVC binary runs under * Cygwin/MSYS pty (in that case it is fully buffered(!)). */ fflush(stderr); verbosity = parse_options_get_verbosity(opts); debug = parse_options_get_debug(opts); test = parse_options_get_test(opts); input_string = fget_input_string(input_fh, stdout, /*check_return*/false); check_winsize(copts); if (NULL == input_string) { if (ferror(input_fh)) prt_error("Error: Read: %s\n", strerror(errno)); if (input_fh == stdin) break; fclose (input_fh); input_fh = stdin; continue; } /* Discard whitespace characters from end of string. */ for (char *p = &input_string[strlen(input_string)-1]; (p > input_string) && strchr(WHITESPACE, *p) ; p--) { *p = '\0'; } /* If the input string is just whitespace, then ignore it. */ if (strspn(input_string, WHITESPACE) == strlen(input_string)) continue; char command = special_command(input_string, copts, dict); if ('e' == command) break; /* It was an exit command */ if ('c' == command) continue; /* It was another command */ if (-1 == command) continue; /* It was a bad command */ /* We have to handle the !file command inline; it's too hairy * otherwise ... */ if ('f' == command) { char * filename = &input_string[strcspn(input_string, WHITESPACE)] + 1; int fnlen = strlen(filename); if (0 == fnlen) { prt_error("Error: Missing file name argument\n"); continue; } if ('\n' == filename[fnlen-1]) filename[fnlen-1] = '\0'; struct stat statbuf; if ((0 == stat(filename, &statbuf)) && statbuf.st_mode & S_IFDIR) { prt_error("Error: Cannot open %s: %s\n", filename, strerror(EISDIR)); continue; } input_fh = fopen(filename, "r"); if (NULL == input_fh) { prt_error("Error: Cannot open %s: %s\n", filename, strerror(errno)); input_fh = stdin; continue; } continue; } if (!copts->batch_mode) batch_in_progress = false; if ('\0' != test[0]) { /* In batch mode warn only once. * In auto-next-linkage mode don't warn at all. */ if (!batch_in_progress && !auto_next_linkage_test(test)) { fflush(stdout); /* Remind the developer this is a test mode. */ prt_error("Warning: Tests enabled: %s\n", test); if (copts->batch_mode) batch_in_progress = true; } } if (copts->echo_on) { printf("%s\n", input_string); } if (copts->batch_mode || auto_next_linkage_test(test)) { label = strip_off_label(input_string); } // Post-processing-based pruning will clip away connectors // that we might otherwise want to examine. So disable PP // pruning in this situation. if (copts->display_bad) parse_options_set_perform_pp_prune(opts, false); else parse_options_set_perform_pp_prune(opts, true); sent = sentence_create(input_string, dict); if (sentence_split(sent, opts) < 0) { sentence_delete(sent); sent = NULL; continue; } if (0 != copts->display_wordgraph) { const char *wg_display_flags = ""; /* default flags */ switch (copts->display_wordgraph) { case 1: /* default flags */ break; case 2: /* subgraphs with a legend */ wg_display_flags = "sl"; break; case 3: { /* Use esoteric flags from the test user variable. */ const char *s = test_enabled(test, "wg"); if ((NULL != s) && (':' == s[0])) wg_display_flags = s; } break; default: prt_error("Warning: wordgraph=%d: Unknown value, using 1\n", copts->display_wordgraph); copts->display_wordgraph = 1; } sentence_display_wordgraph(sent, wg_display_flags); } /* First parse with the default disjunct_cost as set by the library * (currently 2.7). Usually parse here with no null links. * However, if "-test=one-step-parse" is used and we are said to * parse with null links, allow parsing here with null links too. */ bool one_step_parse = !copts->batch_mode && copts->allow_null && test_enabled(test, "one-step-parse"); int max_null_count = one_step_parse ? sentence_length(sent) : 0; parse_options_set_min_null_count(opts, 0); parse_options_set_max_null_count(opts, max_null_count); parse_options_reset_resources(opts); num_linkages = sentence_parse(sent, opts); /* num_linkages is negative only on a hard-error; * typically, due to a zero-length sentence. */ if (num_linkages < 0) { sentence_delete(sent); sent = NULL; continue; } #if 0 /* Try again, this time omitting the requirement for * definite articles, etc. This should allow for the parsing * of newspaper headlines and other clipped speech. * * XXX Unfortunately, this also allows for the parsing of * all sorts of ungrammatical sentences which should not * parse, and leads to bad parses of many other unparsable * but otherwise grammatical sentences. Thus, this trick * pretty much fails; we leave it here to document the * experiment. */ if (num_linkages == 0) { parse_options_set_disjunct_cost(opts, 4.5); num_linkages = sentence_parse(sent, opts); if (num_linkages < 0) continue; } #endif /* 0 */ /* Try using a larger list of disjuncts */ /* XXX FIXME: the lg_expand_disjunct_list() routine is not * currently a part of the public API; it should be made so, * or this expansion idea should be abandoned... not sure which. */ if ((num_linkages == 0) && parse_options_get_use_cluster_disjuncts(opts)) { int expanded; if (verbosity > 0) fprintf(stdout, "No standard linkages, expanding disjunct set.\n"); parse_options_set_disjunct_cost(opts, 3.9); expanded = lg_expand_disjunct_list(sent); if (expanded) { num_linkages = sentence_parse(sent, opts); } if (0 < num_linkages) printf("Got One !!!!!!!!!!!!!!!!!\n"); } /* If asked to show bad linkages, then show them. */ if ((num_linkages == 0) && (!copts->batch_mode)) { if (copts->display_bad) { num_linkages = sentence_num_linkages_found(sent); } } /* Now parse with null links */ if (!one_step_parse && num_linkages == 0 && !copts->batch_mode) { if (verbosity > 0) fprintf(stdout, "No complete linkages found.\n"); if (copts->allow_null) { /* XXX should use expanded disjunct list here too */ parse_options_set_min_null_count(opts, 1); parse_options_set_max_null_count(opts, sentence_length(sent)); num_linkages = sentence_parse(sent, opts); } } if (verbosity > 0) { if (parse_options_timer_expired(opts)) fprintf(stdout, "Timer is expired!\n"); if (parse_options_memory_exhausted(opts)) fprintf(stdout, "Memory is exhausted!\n"); } if ((num_linkages == 0) && copts->panic_mode && parse_options_resources_exhausted(opts)) { /* print_total_time(opts); */ batch_errors++; if (verbosity > 0) fprintf(stdout, "Entering \"panic\" mode...\n"); /* If the parser used was the SAT solver, set the panic parser to * it too. * FIXME? Currently, the SAT solver code is not too useful in * panic mode since it doesn't handle parsing with null words, so * using the regular parser in that case could be beneficial. * However, this currently causes a crash due to a memory * management mess. */ parse_options_set_use_sat_parser(copts->panic_opts, parse_options_get_use_sat_parser(opts)); parse_options_reset_resources(copts->panic_opts); parse_options_set_verbosity(copts->panic_opts, verbosity); (void)sentence_parse(sent, copts->panic_opts); if (verbosity > 0) { if (parse_options_timer_expired(copts->panic_opts)) fprintf(stdout, "Panic timer is expired!\n"); } } if (verbosity > 1) parse_options_print_total_time(opts); const char *rc = ""; if (copts->batch_mode) { batch_process_some_linkages(label, sent, copts); } else { rc = process_some_linkages(input_fh, sent, copts); } fflush(stdout); sentence_delete(sent); sent = NULL; if ((NULL == rc) && (input_fh == stdin)) break; } if (copts->batch_mode) { /* print_time(opts, "Total"); */ fprintf(stderr, "%d error%s.\n", batch_errors, (batch_errors==1) ? "" : "s"); } /* Free stuff, so that mem-leak detectors don't complain. */ command_options_delete(copts); dictionary_delete(dict); printf ("Bye.\n"); return 0; }
static void jParse(JNIEnv *env, per_thread_data *ptd, char* inputString) { int maxlen; Parse_Options opts = ptd->opts; int jverbosity = parse_options_get_verbosity(opts); if (ptd->sent) sentence_delete(ptd->sent); if (ptd->dict == NULL) throwException(env, "jParse: dictionary not open\n"); if (inputString == NULL) throwException(env, "jParse: no input sentence!\n"); ptd->sent = sentence_create(inputString, ptd->dict); ptd->num_linkages = 0; if (ptd->sent == NULL) return; maxlen = parse_options_get_max_sentence_length(ptd->opts); if (maxlen < sentence_length(ptd->sent)) { if (jverbosity > 0) { prt_error("Error: JNI: Sentence length (%d words) exceeds maximum allowable (%d words)\n", sentence_length(ptd->sent), maxlen); } sentence_delete(ptd->sent); ptd->sent = NULL; return; } /* First parse with cost 0 or 1 and no null links or fat links */ parse_options_set_disjunct_costf(opts, 2.0f); parse_options_set_min_null_count(opts, 0); parse_options_set_max_null_count(opts, 0); parse_options_set_use_fat_links(opts, FALSE); parse_options_reset_resources(opts); ptd->num_linkages = sentence_parse(ptd->sent, ptd->opts); /* If failed, try again with null links */ if (0 == ptd->num_linkages) { if (jverbosity > 0) prt_error("Warning: JNI: No complete linkages found.\n"); if (parse_options_get_allow_null(opts)) { parse_options_set_min_null_count(opts, 1); parse_options_set_max_null_count(opts, sentence_length(ptd->sent)); ptd->num_linkages = sentence_parse(ptd->sent, opts); } } if (parse_options_timer_expired(opts)) { if (jverbosity > 0) prt_error("Warning: JNI: Timer is expired!\n"); } if (parse_options_memory_exhausted(opts)) { if (jverbosity > 0) prt_error("Warning: JNI: Memory is exhausted!\n"); } if ((ptd->num_linkages == 0) && parse_options_resources_exhausted(opts)) { parse_options_print_total_time(opts); if (jverbosity > 0) prt_error("Warning: JNI: Entering \"panic\" mode...\n"); parse_options_reset_resources(ptd->panic_parse_opts); parse_options_set_verbosity(ptd->panic_parse_opts, jverbosity); ptd->num_linkages = sentence_parse(ptd->sent, ptd->panic_parse_opts); if (parse_options_timer_expired(ptd->panic_parse_opts)) { if (jverbosity > 0) prt_error("Error: JNI: Timer is expired!\n"); } } }
SPOTriplets NLP::sentence2triplets ( const char* sentence ) { // vector of triplets SPOTriplets triplets; #ifdef DEBUG std::cout << "The sentence: " << sentence << std::endl; #endif // creates a Sentence from the input char* Sentence sent = sentence_create ( sentence, dict_ ); #ifdef DEBUG std::cout << "Sentence created" << std::endl; #endif // tokenizes the sentence sentence_split ( sent, parse_opts_ ); #ifdef DEBUG std::cout << "Sentence splitted" << std::endl; #endif // searches for all possible linkages int num_linkages = sentence_parse ( sent, parse_opts_ ); #ifdef DEBUG std::cout << "Sentence parsed" << std::endl; std::cout << "Number of linkages: " << num_linkages << std::endl; #endif // just one triplet SPOTriplet triplet; // if there is any linkage in the sentence if( num_linkages > 0 ) { // create the linkage Linkage linkage = linkage_create ( 0, sent, parse_opts_ ); #ifdef DEBUG // prints the sentence's diagram std::cout << "The diagram: " << std::endl; char *diagram = linkage_print_diagram(linkage, true, 800); std::cout << diagram << std::endl; linkage_free_diagram( diagram ); // end print diagram #endif std::vector<std::string> labels; // 1. find the S_link // S* except there is an SJ* because then S* except Spx // two cases: there is SJ* and there is not SJ* // TODO: VJlp VJrp same as SJ but to predications // TODO: SFut SFst what the f**k? ###FIXED### // TODO: His form was shining like the light not working ###FIXED### // TODO: Car is mine not working ###FIXED### // TODO: The little brown bear has eaten all of the honey not working ###FIXED### // REGEXES std::regex SJ_( "SJ.*" ); std::regex VJ_( "VJ.*"); std::regex subject( "(Ss.*)|(SFut)|(Sp\*.*)" ); std::regex Spx( "Spx.*" ); // TODO:fix theese initializer list not allowed ###FIXED### std::regex predicate( "(Pv.*)|(Pg.*)|(PP.*)|(I.*)|(TO)|(MVi.*)" ); // TODO: make one from theese // (Sp.*)|(Ss.*) ###FIXED### std::regex noun_adject_object ( "(O.*)|(Os.*)|(Op.*)|(MVpn.*)|(Pa.*)|(MVa.*)" ); std::regex preposition ( "(MVp.*)|(Pp.*)|(OF)|(TO)" ); std::regex prep_object ( "(J.*)|(TI)|(I.*)|(ON)" ); // TODO: problems with matching!! Pg*!! ###FIXED### // TODO: problems with matching!! Mvp.*!! ###FIXED### bool s_found = false; bool p_found = false; bool o_found = false; bool SJ = false; // search for SJ.s labels for( auto label: labels ) { if( std::regex_match( label, SJ_ ) ) { SJ = true; break; } } // multiple subject in the sentence if( SJ ) { // SPls left -> first subject // SPrs right -> second subject // Spx right -> predicate // SJ-s are multiple subjects std::string temp; // go through every linkage for( int i = 0; i < linkage_get_num_links( linkage ); ++i ) { // get their label std::string l = linkage_get_link_label( linkage, i ); // if there is an SJl* label if( std::regex_match( l, std::regex( "SJl.*" ) ) ) { // SJls left side triplet.s = linkage_get_word( linkage, linkage_get_link_lword( linkage, i ) ); triplet.cut( triplet.s ); temp = triplet.s + " "; // and word triplet.s = linkage_get_word( linkage, linkage_get_link_rword( linkage, i ) ); triplet.cut( triplet.s ); temp += triplet.s + " "; // find SJr* for( int j = 0; j < linkage_get_num_links( linkage ); ++j ) { std::string m = linkage_get_link_label( linkage, j ); if( std::regex_match( m, std::regex( "SJr.*" ) ) ) { triplet.s = linkage_get_word( linkage, linkage_get_link_rword( linkage, j ) ); triplet.cut(); temp += triplet.s; triplet.s = temp; s_found = true; #ifdef DEBUG std::cout << "Subject found: " << triplet.s << std::endl; #endif break; } // if } // for break; } // if } // for // now we have the subject // find Spx and its right side will be the starter predicate std::string current_word; for( int i = 0; i < linkage_get_num_links( linkage ); ++i ) { std::string l = linkage_get_link_label( linkage, i ); if( std::regex_match( l, std::regex( "Spx.*" ) ) ) { triplet.p = linkage_get_word( linkage, linkage_get_link_rword( linkage, i ) ); current_word = linkage_get_word( linkage, linkage_get_link_rword( linkage, i ) ); } } // from now all the same as on the else branch !!!! bool predicate_match = false; // search for the linkage that has triplet.s as left! do { predicate_match = false; for( int i = 0; i < linkage_get_num_links( linkage ); ++i ) { // every linkage's left word std::string word_i = linkage_get_word( linkage, linkage_get_link_lword( linkage, i ) ); // every linkage's label std::string l = linkage_get_link_label( linkage, i ); if( std::regex_match( l, predicate ) && word_i == current_word ) { // found predicate triplet.p = linkage_get_word( linkage, linkage_get_link_rword( linkage, i ) ); current_word = triplet.p; predicate_match = true; break; } } } while( predicate_match ); // we now have the predicate too // TODO: multiple predicates! p_found = true; #ifdef DEBUG std::cout << "Predicate found: " << triplet.p << std::endl; #endif // ###COPY BEGIN### // search for noun object or adjective object for( int i = 0; i < linkage_get_num_links( linkage ); ++i ) { // get every linkage label std::string l = linkage_get_link_label( linkage, i ); // get the left word of every linkage std::string l_word = linkage_get_word( linkage, linkage_get_link_lword( linkage, i ) ); // if thete is a label that match AND its left word is the predicate if( std::regex_match( l, noun_adject_object ) && triplet.p == l_word ) { // then the object is that linkage's right word triplet.o = linkage_get_word( linkage, linkage_get_link_rword( linkage, i ) ); triplet.cut( triplet.o ); o_found = true; #ifdef DEBUG std::cout << "Adjective or noun object found: " << triplet.o << std::endl; #endif } // if } // for // still not found object, then search for preposition if( !o_found ) { // go through every linkage for( int i = 0; i < linkage_get_num_links( linkage ); ++i ) { // get the linkage's label std::string l = linkage_get_link_label( linkage, i ); // and left word std::string word_i = linkage_get_word( linkage, linkage_get_link_lword( linkage, i ) ); // if there is a linkage which is a preposition and its left word is the predicate if( std::regex_match( l, preposition ) && triplet.p == word_i ) { // found preposition // search for prep_object // then the temp will contain the preposition label's right word std::string temp = linkage_get_word( linkage, linkage_get_link_rword( linkage, i ) ); #ifdef DEBUG std::cout << "Preposition found! and its rigth word is: " << temp << std::endl; #endif for( int j = 0; j < linkage_get_num_links( linkage ); ++j ) { // every linkages std::string m = linkage_get_link_label( linkage, j ); // every left word std::string word_j = linkage_get_word( linkage, linkage_get_link_lword( linkage, j ) ); // if there is a label with match and its left is exactly the preposition's right if( std::regex_match( m, prep_object ) && temp == word_j ) { triplet.o = linkage_get_word( linkage, linkage_get_link_lword( linkage, j ) ); triplet.cut(triplet.o); triplet.o += " "; // save o std::string temp = triplet.o; triplet.o = linkage_get_word( linkage, linkage_get_link_rword( linkage, j ) ); triplet.cut(triplet.o); temp += triplet.o; triplet.o = temp; o_found = true; #ifdef DEBUG std::cout << "Object found: " << triplet.o << std::endl; #endif } // if( std::regex_match( m, prep_object ) && temp == word_j ) END } // for J END } // if( std::regex_match( l, preposition ) && triplet.p == word_i ) END } // for I END } // if( !o_found ) END if( s_found && p_found && o_found ) { // TODO: cut the words itself not the whole triplet // have to cut every word itself // triplet.cut(); triplet.cut(triplet.s); triplet.cut(triplet.p); triplets.push_back( triplet ); s_found = false; p_found = false; o_found = false; } // ###COPY END### } else // only one subject { // except Spx!!! // S left -> subject // S right -> predicate at first // if the word next to S right, is an element of Pv*, Pg* PP*, I*, TO, MVi* // then the new predicate will be that word std::string current_word; // search for subject (S_link) for( int i = 0; i < linkage_get_num_links( linkage ); ++i ) { // get the linkage's label std::string l = linkage_get_link_label( linkage, i ); if( std::regex_match( l, subject ) ) { // subject found triplet.s = linkage_get_word( linkage, linkage_get_link_lword( linkage, i ) ); s_found = true; current_word = linkage_get_word( linkage, linkage_get_link_rword( linkage, i ) ); triplet.p = current_word; #ifdef DEBUG std::cout << "Subject found: " << triplet.s << std::endl; #endif break; } } if( s_found ) { bool predicate_match = false; // search for the linkage that has triplet.s as left! do { predicate_match = false; for( int i = 0; i < linkage_get_num_links( linkage ); ++i ) { // every linkage's left word std::string l_word = linkage_get_word( linkage, linkage_get_link_lword( linkage, i ) ); // every linkage's label std::string l = linkage_get_link_label( linkage, i ); if( std::regex_match( l, predicate ) && l_word == current_word ) { // found predicate triplet.p = linkage_get_word( linkage, linkage_get_link_rword( linkage, i ) ); current_word = triplet.p; predicate_match = true; break; } } // for END } while( predicate_match ); p_found = true; #ifdef DEBUG std::cout << "Predicate found: " << triplet.p << std::endl; #endif } // if( s_found ) END // subject and predicate found // search for object // from k to linkage_get_num_links( linkage ) // if there is any of the noun, adjective od preposition object then that // label's right will give the object. // !!! search only between labels that has triplet.p as left word !!!!! // search for noun object or adjective objects // go through all links for( int i = 0; i < linkage_get_num_links( linkage ); ++i ) { // get every linkage label std::string l = linkage_get_link_label( linkage, i ); // get the left word of every linkage std::string word_i = linkage_get_word( linkage, linkage_get_link_lword( linkage, i ) ); // if thete is a label that match AND its left word is the predicate if( std::regex_match( l, noun_adject_object ) && triplet.p == word_i ) { // then the object is that linkage's right word triplet.o = linkage_get_word( linkage, linkage_get_link_rword( linkage, i ) ); o_found = true; triplet.cut(triplet.o); #ifdef DEBUG std::cout << "Adjective or noun object found: " << triplet.o << std::endl; #endif } // if END } // for END // still not found object, then search for preposition if( !o_found ) { // go through every linkage for( int i = 0; i < linkage_get_num_links( linkage ); ++i ) { // get the linkage's label std::string l = linkage_get_link_label( linkage, i ); // and left word std::string word_i = linkage_get_word( linkage, linkage_get_link_lword( linkage, i ) ); // if there is a linkage which is a preposition and its left word is the predicate if( std::regex_match( l, preposition ) && triplet.p == word_i ) { // found preposition // search for prep_object // then the temp will contain the preposition label's right word std::string temp = linkage_get_word( linkage, linkage_get_link_rword( linkage, i ) ); #ifdef DEBUG std::cout << "Preposition found! and its rigth word is: " << temp << std::endl; #endif // start search from there for( int j = 0; j < linkage_get_num_links( linkage ); ++j ) { // every linkages std::string m = linkage_get_link_label( linkage, j ); // every left word std::string word_j = linkage_get_word( linkage, linkage_get_link_lword( linkage, j ) ); #ifdef DEBUG if( std::regex_match( m, prep_object ) ) std::cout << m << " DOES match to (J.*)|(TI)|(I.*)|(ON)" << std::endl; #endif // if there is a label with match and its left is exactly the preposition's right if( std::regex_match( m, prep_object ) && temp == word_j ) { triplet.o = linkage_get_word( linkage, linkage_get_link_lword( linkage, j ) ); triplet.cut(triplet.o); triplet.o += " "; // save o std::string temp = triplet.o; triplet.o = linkage_get_word( linkage, linkage_get_link_rword( linkage, j ) ); triplet.cut(triplet.o); temp += triplet.o; triplet.o = temp; #ifdef DEBUG std::cout << "Object found: " << triplet.o << std::endl; #endif o_found = true; } } // for } // if } // for } // if( o_found ) END if( s_found && p_found && o_found ) { // TODO: cut the words itself not the whole triplet ###FIXED### // have to cut every word itself // triplet.cut(); triplet.cut(triplet.s); triplet.cut(triplet.p); triplets.push_back( triplet ); s_found = false; p_found = false; o_found = false; } } // end else linkage_delete ( linkage ); } // if( num_linkages > 0 ) END