static PyObject *constituents(PyObject *self, PyObject *args) { Dictionary dict; Parse_Options opts; Sentence sent; Linkage linkage; CNode * cn; /// Link counts int num_linkages; const char *text; PyObject *output_list; if (!PyArg_ParseTuple(args, "s", &text)) return NULL; opts = parse_options_create(); parse_options_set_verbosity(opts, -1); setlocale(LC_ALL, ""); dict = dictionary_create_default_lang(); if (!dict) { PyErr_SetString(PyExc_RuntimeError, "Fatal error: Unable to open the dictionary"); Py_INCREF(Py_None); return Py_None; } sent = sentence_create(text, dict); sentence_split(sent, opts); num_linkages = sentence_parse(sent, opts); if (num_linkages > 0) { linkage = linkage_create(0, sent, opts); cn = linkage_constituent_tree(linkage); output_list = build_tree(cn, linkage); if(output_list == Py_None) { Py_INCREF(output_list); return output_list; } linkage_free_constituent_tree(cn); linkage_delete(linkage); } else { sentence_delete(sent); dictionary_delete(dict); parse_options_delete(opts); Py_INCREF(Py_None); return Py_None; } sentence_delete(sent); dictionary_delete(dict); parse_options_delete(opts); return Py_BuildValue("S", output_list); }
Morpheme * analyse_decomposed_word(Morpho_structures ms, Decomposed_word *decomposed_word){ Sentence sent; Linkage linkage; Decomposed_word *node; Morpheme *morpheme_list=NULL; int num_linkages , i; int stem_pos; char * diagram; // ms->opts->unify_features = FALSE; for(node=decomposed_word; node!=NULL; node=node->next){ sent=sentence_create(node->word, ms->dict); if(!sent){ continue; } num_linkages = sentence_parse(sent, ms->opts); for (i=0; i<num_linkages; i++){ linkage = linkage_create(i, sent, ms->opts); if (PRINT_DIAGRAM){ linkage = linkage_create(i, sent, ms->opts); printf("%s\n", diagram = linkage_print_diagram(linkage)); string_delete(diagram); } if ((stem_pos = find_stem(linkage))>=0){ morpheme_list_add(ms, &morpheme_list,linkage->word[stem_pos], linkage->feature_array[0]); } linkage_delete(linkage); } sentence_delete(sent); } return morpheme_list; }
static void finish(per_thread_data *ptd) { if (ptd->sent) sentence_delete(ptd->sent); ptd->sent = NULL; #if DO_PHRASE_TREE if (tree) linkage_free_constituent_tree(tree); tree = NULL; #endif if (ptd->linkage) linkage_delete(ptd->linkage); ptd->linkage = NULL; dictionary_delete(ptd->dict); ptd->dict = NULL; parse_options_delete(ptd->opts); ptd->opts = NULL; parse_options_delete(ptd->panic_parse_opts); ptd->panic_parse_opts = NULL; #ifdef USE_PTHREADS pthread_setspecific(java_key, NULL); #else global_ptd = NULL; #endif free(ptd); }
int main() { Dictionary dict; Parse_Options opts; Sentence sent; Linkage linkage; char * diagram; int i, num_linkages; char * input_string[] = { "Grammar is useless because there is nothing to say -- Gertrude Stein.", "Computers are useless; they can only give you answers -- Pablo Picasso.", }; opts = parse_options_create(); dict = dictionary_create("4.0.dict", "4.0.knowledge", "4.0.constituent-knowledge", "4.0.affix"); for (i=0; i<2; ++i) { sent = sentence_create(input_string[i], dict); num_linkages = sentence_parse(sent, opts); if (num_linkages > 0) { linkage = linkage_create(0, sent, opts); printf("%s\n", diagram = linkage_print_diagram(linkage)); string_delete(diagram); linkage_delete(linkage); } sentence_delete(sent); } dictionary_delete(dict); parse_options_delete(opts); return 0; }
static void batch_process_some_linkages(Label label, Sentence sent, Command_Options* copts) { Parse_Options opts = copts->popts; if (there_was_an_error(label, sent, opts)) { /* If we found at least one good linkage, print it. */ if (sentence_num_valid_linkages(sent) > 0) { Linkage linkage = NULL; int i; for (i=0; i<sentence_num_linkages_post_processed(sent); i++) { if (0 == sentence_num_violations(sent, i)) { linkage = linkage_create(i, sent, opts); break; } } process_linkage(linkage, copts); linkage_delete(linkage); } fprintf(stdout, "+++++ error %d\n", batch_errors); } else { if (test_enabled(test, "batch-print-parse-statistics")) { print_parse_statistics(sent, opts, copts); } } }
void batch_process_some_linkages(Label label, Sentence sent, Parse_Options opts) { Linkage linkage; if (there_was_an_error(label, sent, opts)) { if (sentence_num_linkages_found(sent) > 0) { linkage = linkage_create(0, sent, opts); process_linkage(linkage, opts); linkage_delete(linkage); } fprintf(stdout, "+++++ error %d\n", batch_errors); } }
/* * GC Free function */ static void rlink_linkage_gc_free( struct rlink_linkage *ptr ) { if ( ptr ) { linkage_delete( (Linkage)ptr->linkage ); ptr->linkage = NULL; ptr->sentence = Qnil; xfree( ptr ); ptr = NULL; } }
std::map<int, EdgeDescription> LinkParserAdapterImpl::parseSentence(std::string sentenceStr) { Parse_Options parseOptions = parse_options_create(); int verbosity = psi_logger.getLoggingPriority() / 100 - 4; parse_options_set_verbosity(parseOptions, verbosity); starts_.clear(); ends_.clear(); edgeDescriptions_.clear(); freeSentence(); sentence_ = sentence_create(sentenceStr.c_str(), dictionary_); if (!sentence_) { std::stringstream errorSs; errorSs << "Link-parser failed to tokenize the input text."; throw ParserException(errorSs.str()); } boost::algorithm::to_lower(sentenceStr); if (sentence_parse(sentence_, parseOptions)) { size_t currentPos = 0; size_t foundPos = 0; int wordNo = 0; while (wordNo < sentence_length(sentence_)) { std::string word(sentence_get_word(sentence_, wordNo)); boost::algorithm::to_lower(word); foundPos = sentenceStr.find(word, currentPos); if (foundPos != std::string::npos) { starts_[wordNo] = foundPos; ends_[wordNo] = currentPos = foundPos + word.length(); } ++wordNo; } Linkage linkage = linkage_create(0, sentence_, parseOptions); CNode * ctree = linkage_constituent_tree(linkage); extractEdgeDescriptions(ctree, linkage); linkage_free_constituent_tree(ctree); linkage_delete(linkage); } else { std::stringstream errorSs; errorSs << "Link-parser failed to parse the input text.\n" << "Your input text is probably not a correct sentence."; WARN(errorSs.str()); } return edgeDescriptions_; }
//######################################################################################## static void batch_process_some_linkages(Label label, Sentence sent, Parse_Options opts,char * tmp_path,char * file_name) //######################################################################################### { Linkage linkage; if (there_was_an_error(label, sent, opts)) { if (sentence_num_linkages_found(sent) > 0) { linkage = linkage_create(0, sent, opts); //################################################ process_linkage(linkage, opts,tmp_path,file_name); //################################################ linkage_delete(linkage); } fprintf(stdout, "+++++ error %d\n", batch_errors); } }
static void makeLinkage(per_thread_data *ptd) { if (ptd->cur_linkage < ptd->num_linkages) { if (ptd->linkage) linkage_delete(ptd->linkage); ptd->linkage = linkage_create(ptd->cur_linkage,ptd->sent,ptd->opts); linkage_compute_union(ptd->linkage); linkage_set_current_sublinkage(ptd->linkage, linkage_get_num_sublinkages(ptd->linkage)-1); #if DO_PHRASE_TREE if (tree) linkage_free_constituent_tree(tree); tree = linkage_constituent_tree(linkage); printTree(tree); #endif } }
static void batch_process_some_linkages(Label label, Sentence sent, Command_Options* copts) { Parse_Options opts = copts->popts; if (there_was_an_error(label, sent, opts)) { /* If linkages were found, print them */ if (sentence_num_linkages_found(sent) > 0) { Linkage linkage = NULL; /* If we found at least one good linkage, print it. */ if (sentence_num_valid_linkages(sent) > 0) { int i; for (i=0; i<sentence_num_linkages_post_processed(sent); i++) { if (0 == sentence_num_violations(sent, i)) { linkage = linkage_create(i, sent, opts); break; } } } else { /* This linkage will be bad; no good ones were found. */ linkage = linkage_create(0, sent, opts); } process_linkage(linkage, copts); linkage_delete(linkage); } fprintf(stdout, "+++++ error %d\n", batch_errors); } else { if (strstr(test, ",batch_print_parse_statistics,")) { print_parse_statistics(sent, opts); } } }
//############################################################################################################ static int process_some_linkages(Sentence sent, Parse_Options opts , int num ,char * tmp_path,char * file_name) //############################################################################################################ { int c; int i, num_displayed, num_to_query; Linkage linkage; double corpus_cost; if (verbosity > 0) print_parse_statistics(sent, opts); if (!parse_options_get_display_bad(opts)) { num_to_query = MIN(sentence_num_valid_linkages(sent), DISPLAY_MAX); } else { num_to_query = MIN(sentence_num_linkages_post_processed(sent), DISPLAY_MAX); } for (i=0, num_displayed=0; i<num_to_query; i++) { /*###########################*/ if (i==num) { /*###########################*/ if ((sentence_num_violations(sent, i) > 0) && (!parse_options_get_display_bad(opts))) { continue; } linkage = linkage_create(i, sent, opts); if (verbosity > 0) { if ((sentence_num_valid_linkages(sent) == 1) && (!parse_options_get_display_bad(opts))) { // fprintf(stdout, " Unique linkage, "); } else if ((parse_options_get_display_bad(opts)) && (sentence_num_violations(sent, i) > 0)) { // fprintf(stdout, " Linkage %d (bad), ", i+1); } else { // fprintf(stdout, " Linkage %d, ", i+1); } if (!linkage_is_canonical(linkage)) { // fprintf(stdout, "non-canonical, "); } if (linkage_is_improper(linkage)) { // fprintf(stdout, "improper fat linkage, "); } if (linkage_has_inconsistent_domains(linkage)) { // fprintf(stdout, "inconsistent domains, "); } corpus_cost = linkage_corpus_cost(linkage); if (corpus_cost < 0.0f) { /* fprintf(stdout, "cost vector = (UNUSED=%d DIS=%d AND=%d LEN=%d)\n", linkage_unused_word_cost(linkage), linkage_disjunct_cost(linkage), linkage_and_cost(linkage), linkage_link_cost(linkage));*/ } else { /* fprintf(stdout, "cost vector = (CORP=%6.4f UNUSED=%d DIS=%d AND=%d LEN=%d)\n", corpus_cost, linkage_unused_word_cost(linkage), linkage_disjunct_cost(linkage), linkage_and_cost(linkage), linkage_link_cost(linkage)); */ } } //################################################## process_linkage(linkage, opts,tmp_path,file_name); //################################################## // process_linkage(linkage, opts); linkage_delete(linkage); if (++num_displayed < num_to_query) { if (verbosity > 0) { // fprintf(stdout, "Press RETURN for the next linkage.\n"); } c = fget_input_char(stdin, stdout, opts); if (c != '\n') return c; } /*##################*/ } // if num /*#################*/ } return 'x'; }
/******************************************************************************************* * This functions translats a list of sentences in the input_string array, * terminates when it finds "end" in the array * Inputs: input_string: Array of sentences * all_linkages: if true, translate all possible linkages of the sentence * out_to_file : if true, outputs the results to seperate files in the out directory, * the out files names would be something like : src-x-y: Contains a linkage for the source sentence, x is the index of sentence in the array and y is the index of linkage trg-x-y-z: Contains a linklage for the target sentence, x is the index of sentence in the array, y is the index of linkage and z is the zth translation of that linkage x-y(a linkage in the source language may have more than one correspondent target * linkage) *******************************************************************************************/ void translate(char input_string[][200], int all_linkages, int out_to_file){ Dictionary dict; Parse_Options opts; Sentence sent; Linkage src_linkage, trg_linkage; Transfer trans; char * diagram; FILE * fp; int i, j, num_src_linkages, num_trg_linkages; //char filename[30], txfilename[30], filenum[4]; char txfilename[30]; char output_string[200]; int n; opts = parse_options_create(); parse_options_set_verbosity (opts, FALSE); parse_options_set_display_walls(opts, TRUE); parse_options_set_display_postscript(opts, TRUE); dict = dictionary_create("4.0.dict", "4.0.knowledge", NULL, NULL, "morphology/morphemes.dict"); if (!dict){ fprintf(stderr, "%s\n", lperrmsg); parse_options_delete(opts); printf("size : %lld", space_in_use); exit(0); } trans=transfer_create("translation/mapfile.txt", "translation/links_list.txt","translation/lexicon.txt", "target/trg.dict" ); if (!trans){ fprintf(stderr, "%s\n", maperrmsg); fprintf(stderr, "%s\n", lperrmsg); printf("size : %lld", space_in_use); exit(0); } for (n=0; strcmp (input_string[n],"end")!=0; n++ ){ sent = sentence_create(input_string[n], dict); if (!sent){ fprintf(stderr, "%s\n", lperrmsg); parse_options_delete(opts); printf("size : %lld", space_in_use); exit(0); } printf ("\n************************************\n%d-%s\n************************************\n", n+1, input_string[n]); num_src_linkages = sentence_parse(sent, opts); num_src_linkages = (all_linkages) ? num_src_linkages: (num_src_linkages!=0)*1; for (i=0; i<num_src_linkages;i++) { src_linkage = linkage_create(i, sent, opts); diagram = linkage_print_diagram(src_linkage); printf ("\nLinkage No. %d-%d\n\n", n+1, i+1); printf("%s\n", diagram); if (out_to_file){ sprintf(txfilename,"out/src-%d-%d.txt", n+1, i+1); fp=fopen(txfilename,"w+"); if (fp==NULL){ fprintf(stderr, "%s%s\n", "Unable to open ", txfilename); printf("size : %lld", space_in_use); exit(0); } fprintf(fp,"%s",diagram); fclose(fp); } string_delete(diagram); num_trg_linkages = transfer_linkage_driver(trans, src_linkage); if(num_trg_linkages==0){ fprintf(stderr, "%s\n", maperrmsg); } else{ parse_options_set_display_walls (trans->opts, TRUE); for (j=0; j<num_trg_linkages; j++){ //second parameter should always be 0 !? trg_linkage=trans_linkage_create(trans, 0, trans->sent[j], trans->opts); diagram = linkage_print_diagram(trg_linkage); printf ("\nTranslation No. %d_%d_%d\n\n", n+1,i+1,j+1); printf("Translation: %s\n", output_string); printf("%s\n", diagram); if (out_to_file){ sprintf(txfilename,"out/trg-%d-%d-%d.txt", n+1, i+1, j+1); fp=fopen(txfilename,"w+"); if (fp==NULL){ fprintf(stderr, "%s%s\n", "Unable to open ", txfilename); printf("size : %lld", space_in_use); exit(0); } extract_sent (trg_linkage, output_string); fprintf(fp, "Translation:%s\n", output_string); fprintf(fp,"%s",diagram); fclose(fp); } string_delete(diagram); linkage_delete(trg_linkage); } } linkage_delete(src_linkage); } fprintf(stderr, "%s\n", lperrmsg); sentence_delete(sent); } transfer_delete(trans); dictionary_delete(dict); parse_options_delete(opts); }
/******************************************************************************************* * This functions parses a list of sentences and outputs the results to seperate files in * the out directory, * The files are indexed as out_x_y, where x is the index of the sentence in the array and y * is the index of the linkage *******************************************************************************************/ void normal_parse(char input_string[][200], int unify_features, int all_linkages, int out_to_file){ Dictionary dict; Parse_Options opts; int n; Sentence sent; Linkage linkage; char * diagram; int i, num_linkages; char txfilename[30]; FILE * fp; opts = parse_options_create(); parse_options_set_display_walls(opts, TRUE); parse_options_set_display_postscript(opts, TRUE); parse_options_set_unify_features(opts, unify_features); dict = dictionary_create("4.0.dict", "4.0.knowledge", NULL, NULL, "morphology/morphemes.dict"); if (!dict){ fprintf(stderr, "%s\n", lperrmsg); parse_options_delete(opts); printf("size : %lld", space_in_use); exit(0); } //setting opts->unify_features to TRUE enables unification for (n=0; strcmp (input_string[n],"end")!=0; n++ ){ sent = sentence_create(input_string[n], dict); if (!sent){ fprintf(stderr, "%s\n", lperrmsg); dictionary_delete(dict); parse_options_delete(opts); exit(0); } printf ("\n************************************\n%d-%s\n************************************\n", n+1, input_string[n]); num_linkages = sentence_parse(sent, opts); num_linkages = (all_linkages) ? num_linkages : (num_linkages!=0)*1 ; //Echos all linkages to screen, ps file and text file in ./out directory for (i=0; i<num_linkages; ++i) { linkage = linkage_create(i , sent, opts); diagram = linkage_print_diagram(linkage); printf ("\n Linkage No. %d-%d\n\n", n+1, i+1); printf("%s\n", diagram); if (out_to_file){ sprintf(txfilename,"out/src-%d-%d.txt", n+1, i+1); fp=fopen(txfilename,"w+"); if (fp==NULL){ fprintf(stderr, "%s%s\n", "Unable to open ", txfilename); printf("size : %lld", space_in_use); exit(0); } fprintf(fp,"%s",diagram); fclose(fp); } string_delete(diagram); linkage_delete(linkage); } sentence_delete(sent); } dictionary_delete(dict); parse_options_delete(opts); }
void process_some_linkages(Sentence sent, Parse_Options opts) { int i, c, num_displayed, num_to_query; Linkage linkage; if (verbosity > 0) print_parse_statistics(sent, opts); if (!parse_options_get_display_bad(opts)) { num_to_query = MIN(sentence_num_valid_linkages(sent), DISPLAY_MAX); } else { num_to_query = MIN(sentence_num_linkages_post_processed(sent), DISPLAY_MAX); } for (i=0, num_displayed=0; i<num_to_query; ++i) { if ((sentence_num_violations(sent, i) > 0) && (!parse_options_get_display_bad(opts))) { continue; } linkage = linkage_create(i, sent, opts); if (verbosity > 0) { if ((sentence_num_valid_linkages(sent) == 1) && (!parse_options_get_display_bad(opts))) { fprintf(stdout, " Unique linkage, "); } else if ((parse_options_get_display_bad(opts)) && (sentence_num_violations(sent, i) > 0)) { fprintf(stdout, " Linkage %d (bad), ", i+1); } else { fprintf(stdout, " Linkage %d, ", i+1); } if (!linkage_is_canonical(linkage)) { fprintf(stdout, "non-canonical, "); } if (linkage_is_improper(linkage)) { fprintf(stdout, "improper fat linkage, "); } if (linkage_has_inconsistent_domains(linkage)) { fprintf(stdout, "inconsistent domains, "); } fprintf(stdout, "cost vector = (UNUSED=%d DIS=%d AND=%d LEN=%d)\n", linkage_unused_word_cost(linkage), linkage_disjunct_cost(linkage), linkage_and_cost(linkage), linkage_link_cost(linkage)); } process_linkage(linkage, opts); linkage_delete(linkage); if (++num_displayed < num_to_query) { if (verbosity > 0) { fprintf(stdout, "Press RETURN for the next linkage.\n"); } if ((c=fget_input_char(stdin, stdout, opts)) != '\n') { ungetc(c, stdin); input_pending = TRUE; break; } } } }
static const char *process_some_linkages(FILE *in, Sentence sent, Command_Options* copts) { int i, num_to_query, num_to_display, num_displayed; Linkage linkage; double corpus_cost; Parse_Options opts = copts->popts; int display_max = DISPLAY_MAX; bool auto_next_linkage = false; i = auto_next_linkage_test(test); if (i != 0) { display_max = i; auto_next_linkage = true; } if (verbosity > 0) print_parse_statistics(sent, opts, copts); num_to_query = sentence_num_linkages_post_processed(sent); if (!copts->display_bad) { num_to_display = MIN(sentence_num_valid_linkages(sent), display_max); } else { num_to_display = MIN(num_to_query, display_max); } for (i=0, num_displayed=0; i<num_to_query; i++) { if ((sentence_num_violations(sent, i) > 0) && !copts->display_bad) { continue; } linkage = linkage_create(i, sent, opts); /* Currently, sat solver sets the linkage violation indication * only when it creates the linkage as a result of the above call. */ if ((sentence_num_violations(sent, i) > 0) && !copts->display_bad) { continue; } /* Currently, sat solver returns NULL when there ain't no more */ if (!linkage) { if (verbosity > 0) { if (0 == i) fprintf(stdout, "No linkages found.\n"); else fprintf(stdout, "No more linkages.\n"); } break; } if (verbosity > 0) { if ((sentence_num_valid_linkages(sent) == 1) && !copts->display_bad) { fprintf(stdout, "\tUnique linkage, "); } else if (copts->display_bad && (sentence_num_violations(sent, i) > 0)) { fprintf(stdout, "\tLinkage %d (bad), ", num_displayed+1); } else { fprintf(stdout, "\tLinkage %d, ", num_displayed+1); } corpus_cost = linkage_corpus_cost(linkage); if (corpus_cost < 0.0f) { fprintf(stdout, "cost vector = (UNUSED=%d DIS=%5.2f LEN=%d)\n", linkage_unused_word_cost(linkage), linkage_disjunct_cost(linkage), linkage_link_cost(linkage)); } else { fprintf(stdout, "cost vector = (CORP=%6.4f UNUSED=%d DIS=%5.2f LEN=%d)\n", corpus_cost, linkage_unused_word_cost(linkage), linkage_disjunct_cost(linkage), linkage_link_cost(linkage)); } } process_linkage(linkage, copts); linkage_delete(linkage); if (++num_displayed < num_to_display) { if (!auto_next_linkage) { if ((verbosity > 0) && (!copts->batch_mode) && isatty_stdin && isatty_stdout) { fprintf(stdout, "Press RETURN for the next linkage.\n"); } char *rc = fget_input_string(stdin, stdout, /*check_return*/true); if ((NULL == rc) || (*rc != '\n')) return rc; } } else { break; } } return "x"; }
static int process_some_linkages(Sentence sent, Command_Options* copts) { int c; int i, num_to_query, num_to_display, num_displayed; Linkage linkage; double corpus_cost; Parse_Options opts = copts->popts; if (verbosity > 0) print_parse_statistics(sent, opts); num_to_query = sentence_num_linkages_post_processed(sent); if (!copts->display_bad) { num_to_display = MIN(sentence_num_valid_linkages(sent), DISPLAY_MAX); } else { num_to_display = MIN(num_to_query, DISPLAY_MAX); } for (i=0, num_displayed=0; i<num_to_query; i++) { if ((sentence_num_violations(sent, i) > 0) && !copts->display_bad) { continue; } linkage = linkage_create(i, sent, opts); /* Currently, sat solver returns NULL when there ain't no more */ if (!linkage) break; if (verbosity > 0) { if ((sentence_num_valid_linkages(sent) == 1) && !copts->display_bad) { fprintf(stdout, " Unique linkage, "); } else if (copts->display_bad && (sentence_num_violations(sent, i) > 0)) { fprintf(stdout, " Linkage %d (bad), ", num_displayed+1); } else { fprintf(stdout, " Linkage %d, ", num_displayed+1); } corpus_cost = linkage_corpus_cost(linkage); if (corpus_cost < 0.0f) { fprintf(stdout, "cost vector = (UNUSED=%d DIS=%5.2f LEN=%d)\n", linkage_unused_word_cost(linkage), linkage_disjunct_cost(linkage), linkage_link_cost(linkage)); } else { fprintf(stdout, "cost vector = (CORP=%6.4f UNUSED=%d DIS=%5.2f LEN=%d)\n", corpus_cost, linkage_unused_word_cost(linkage), linkage_disjunct_cost(linkage), linkage_link_cost(linkage)); } } process_linkage(linkage, copts); linkage_delete(linkage); if (++num_displayed < num_to_display) { if (!strstr(test, ",auto-next-linkage,")) { if (verbosity > 0) { fprintf(stdout, "Press RETURN for the next linkage.\n"); } c = fget_input_char(stdin, stdout, copts); if (c != '\n') return c; } } else { break; } } return 'x'; }
/// This is the basic sentence dissection static PyObject *sentence(PyObject *self, PyObject *args) { Dictionary dict; Parse_Options opts; Sentence sent; Linkage linkage; Linkage sub_linkage; char * diagram; /// Link counts int num_linkages; int links; /// Index's for the iterators int link_idx; int word_idx; int num_words; long span; long sub_linkages; const char *text; const char *d_output; PyObject *output_list; PyObject *word_list; PyObject *word2_list; PyObject *span_list; PyObject *temp; PyObject *sublinkage_list; PyObject *_diagram; output_list = PyList_New(0); word_list = PyList_New(0); word2_list = PyList_New(0); sublinkage_list = PyList_New(0); span_list = PyList_New(0); if (!PyArg_ParseTuple(args, "s", &text)) return NULL; opts = parse_options_create(); parse_options_set_verbosity(opts, -1); parse_options_set_screen_width(opts, 50); setlocale(LC_ALL, ""); dict = dictionary_create_default_lang(); if (!dict) { PyErr_SetString(PyExc_RuntimeError, "Fatal error: Unable to open the dictionary"); Py_INCREF(Py_None); return Py_None; } sent = sentence_create(text, dict); sentence_split(sent, opts); num_linkages = sentence_parse(sent, opts); if (num_linkages > 0) { linkage = linkage_create(0, sent, opts); /// Get the lengths of everything num_words = linkage_get_num_words(linkage); links = linkage_get_num_links(linkage); for(link_idx=0; link_idx < links; link_idx++) { PyObject *temp_subLen; diagram = linkage_print_diagram(linkage); _diagram = PyString_FromString(diagram); sub_linkage = linkage_create(link_idx, sent, opts); sub_linkages = linkage_get_num_sublinkages(linkage); temp_subLen = PyLong_FromLong(sub_linkages); linkage_delete(sub_linkage); PyList_Append(sublinkage_list, temp_subLen); span = linkage_get_link_length(linkage, link_idx); PyList_Append(span_list, PyInt_FromLong(span)); PyObject *temp_list; temp_list = PyList_New(0); /// Sub Group these (left and right labels) const char *t1 = linkage_get_link_llabel(linkage, link_idx); temp = PyString_FromString(t1); PyList_Append(temp_list, temp); const char *t2 = linkage_get_link_rlabel(linkage, link_idx); temp = PyString_FromString(t2); PyList_Append(temp_list, temp); /// Then add to the main list PyList_Append(output_list, temp_list); /// Just the label const char *t3 = linkage_get_link_label(linkage, link_idx); temp = PyString_FromString(t3); PyList_Append(word2_list, temp); } for(word_idx=0; word_idx < num_words; word_idx++) { d_output = linkage_get_word(linkage, word_idx); PyObject *word; word = PyString_FromString(d_output); PyList_Append(word_list, word); } linkage_free_diagram(diagram); linkage_delete(linkage); } else { sentence_delete(sent); dictionary_delete(dict); parse_options_delete(opts); Py_INCREF(Py_None); return Py_None; } sentence_delete(sent); dictionary_delete(dict); parse_options_delete(opts); return Py_BuildValue("SSSSSS", word_list, span_list, output_list, word2_list, sublinkage_list, _diagram); }
void free_linkage_ptr(LinkagePtr *ptr) { linkage_delete(ptr->linkage); free(ptr); }
static PyObject *domains(PyObject *self, PyObject *args) { Dictionary dict; Parse_Options opts; Sentence sent; Linkage linkage; //CNode * cn; /// Link counts int num_linkages; int links; int i; int j = 0; int num_domains; const char *text; PyObject *output_list; PyObject *temp; output_list = PyList_New(0); if (!PyArg_ParseTuple(args, "s", &text)) return NULL; opts = parse_options_create(); parse_options_set_verbosity(opts, -1); setlocale(LC_ALL, ""); dict = dictionary_create_default_lang(); if (!dict) { PyErr_SetString(PyExc_RuntimeError, "Fatal error: Unable to open the dictionary"); Py_INCREF(Py_None); return Py_None; } sent = sentence_create(text, dict); sentence_split(sent, opts); num_linkages = sentence_parse(sent, opts); if (num_linkages > 0) { linkage = linkage_create(0, sent, opts); links = linkage_get_num_sublinkages(linkage); for(i=0; i<=links; i++) { num_domains = linkage_get_link_num_domains(linkage, i); const char **temp1 = linkage_get_link_domain_names(linkage, i); //for(j=0; j<=num_domains; j++){ while(num_domains < j) { temp = PyString_FromString(temp1[j]); PyList_Append(output_list, temp); j++; } j = 0; } linkage_delete(linkage); } else { sentence_delete(sent); dictionary_delete(dict); parse_options_delete(opts); Py_INCREF(Py_None); return Py_None; } sentence_delete(sent); dictionary_delete(dict); parse_options_delete(opts); return Py_BuildValue("Si", output_list, num_domains); }
SPOTriplets NLP::sentence2triplets ( const char* sentence ) { // vector of triplets SPOTriplets triplets; #ifdef DEBUG std::cout << "The sentence: " << sentence << std::endl; #endif // creates a Sentence from the input char* Sentence sent = sentence_create ( sentence, dict_ ); #ifdef DEBUG std::cout << "Sentence created" << std::endl; #endif // tokenizes the sentence sentence_split ( sent, parse_opts_ ); #ifdef DEBUG std::cout << "Sentence splitted" << std::endl; #endif // searches for all possible linkages int num_linkages = sentence_parse ( sent, parse_opts_ ); #ifdef DEBUG std::cout << "Sentence parsed" << std::endl; std::cout << "Number of linkages: " << num_linkages << std::endl; #endif // just one triplet SPOTriplet triplet; // if there is any linkage in the sentence if( num_linkages > 0 ) { // create the linkage Linkage linkage = linkage_create ( 0, sent, parse_opts_ ); #ifdef DEBUG // prints the sentence's diagram std::cout << "The diagram: " << std::endl; char *diagram = linkage_print_diagram(linkage, true, 800); std::cout << diagram << std::endl; linkage_free_diagram( diagram ); // end print diagram #endif std::vector<std::string> labels; // 1. find the S_link // S* except there is an SJ* because then S* except Spx // two cases: there is SJ* and there is not SJ* // TODO: VJlp VJrp same as SJ but to predications // TODO: SFut SFst what the f**k? ###FIXED### // TODO: His form was shining like the light not working ###FIXED### // TODO: Car is mine not working ###FIXED### // TODO: The little brown bear has eaten all of the honey not working ###FIXED### // REGEXES std::regex SJ_( "SJ.*" ); std::regex VJ_( "VJ.*"); std::regex subject( "(Ss.*)|(SFut)|(Sp\*.*)" ); std::regex Spx( "Spx.*" ); // TODO:fix theese initializer list not allowed ###FIXED### std::regex predicate( "(Pv.*)|(Pg.*)|(PP.*)|(I.*)|(TO)|(MVi.*)" ); // TODO: make one from theese // (Sp.*)|(Ss.*) ###FIXED### std::regex noun_adject_object ( "(O.*)|(Os.*)|(Op.*)|(MVpn.*)|(Pa.*)|(MVa.*)" ); std::regex preposition ( "(MVp.*)|(Pp.*)|(OF)|(TO)" ); std::regex prep_object ( "(J.*)|(TI)|(I.*)|(ON)" ); // TODO: problems with matching!! Pg*!! ###FIXED### // TODO: problems with matching!! Mvp.*!! ###FIXED### bool s_found = false; bool p_found = false; bool o_found = false; bool SJ = false; // search for SJ.s labels for( auto label: labels ) { if( std::regex_match( label, SJ_ ) ) { SJ = true; break; } } // multiple subject in the sentence if( SJ ) { // SPls left -> first subject // SPrs right -> second subject // Spx right -> predicate // SJ-s are multiple subjects std::string temp; // go through every linkage for( int i = 0; i < linkage_get_num_links( linkage ); ++i ) { // get their label std::string l = linkage_get_link_label( linkage, i ); // if there is an SJl* label if( std::regex_match( l, std::regex( "SJl.*" ) ) ) { // SJls left side triplet.s = linkage_get_word( linkage, linkage_get_link_lword( linkage, i ) ); triplet.cut( triplet.s ); temp = triplet.s + " "; // and word triplet.s = linkage_get_word( linkage, linkage_get_link_rword( linkage, i ) ); triplet.cut( triplet.s ); temp += triplet.s + " "; // find SJr* for( int j = 0; j < linkage_get_num_links( linkage ); ++j ) { std::string m = linkage_get_link_label( linkage, j ); if( std::regex_match( m, std::regex( "SJr.*" ) ) ) { triplet.s = linkage_get_word( linkage, linkage_get_link_rword( linkage, j ) ); triplet.cut(); temp += triplet.s; triplet.s = temp; s_found = true; #ifdef DEBUG std::cout << "Subject found: " << triplet.s << std::endl; #endif break; } // if } // for break; } // if } // for // now we have the subject // find Spx and its right side will be the starter predicate std::string current_word; for( int i = 0; i < linkage_get_num_links( linkage ); ++i ) { std::string l = linkage_get_link_label( linkage, i ); if( std::regex_match( l, std::regex( "Spx.*" ) ) ) { triplet.p = linkage_get_word( linkage, linkage_get_link_rword( linkage, i ) ); current_word = linkage_get_word( linkage, linkage_get_link_rword( linkage, i ) ); } } // from now all the same as on the else branch !!!! bool predicate_match = false; // search for the linkage that has triplet.s as left! do { predicate_match = false; for( int i = 0; i < linkage_get_num_links( linkage ); ++i ) { // every linkage's left word std::string word_i = linkage_get_word( linkage, linkage_get_link_lword( linkage, i ) ); // every linkage's label std::string l = linkage_get_link_label( linkage, i ); if( std::regex_match( l, predicate ) && word_i == current_word ) { // found predicate triplet.p = linkage_get_word( linkage, linkage_get_link_rword( linkage, i ) ); current_word = triplet.p; predicate_match = true; break; } } } while( predicate_match ); // we now have the predicate too // TODO: multiple predicates! p_found = true; #ifdef DEBUG std::cout << "Predicate found: " << triplet.p << std::endl; #endif // ###COPY BEGIN### // search for noun object or adjective object for( int i = 0; i < linkage_get_num_links( linkage ); ++i ) { // get every linkage label std::string l = linkage_get_link_label( linkage, i ); // get the left word of every linkage std::string l_word = linkage_get_word( linkage, linkage_get_link_lword( linkage, i ) ); // if thete is a label that match AND its left word is the predicate if( std::regex_match( l, noun_adject_object ) && triplet.p == l_word ) { // then the object is that linkage's right word triplet.o = linkage_get_word( linkage, linkage_get_link_rword( linkage, i ) ); triplet.cut( triplet.o ); o_found = true; #ifdef DEBUG std::cout << "Adjective or noun object found: " << triplet.o << std::endl; #endif } // if } // for // still not found object, then search for preposition if( !o_found ) { // go through every linkage for( int i = 0; i < linkage_get_num_links( linkage ); ++i ) { // get the linkage's label std::string l = linkage_get_link_label( linkage, i ); // and left word std::string word_i = linkage_get_word( linkage, linkage_get_link_lword( linkage, i ) ); // if there is a linkage which is a preposition and its left word is the predicate if( std::regex_match( l, preposition ) && triplet.p == word_i ) { // found preposition // search for prep_object // then the temp will contain the preposition label's right word std::string temp = linkage_get_word( linkage, linkage_get_link_rword( linkage, i ) ); #ifdef DEBUG std::cout << "Preposition found! and its rigth word is: " << temp << std::endl; #endif for( int j = 0; j < linkage_get_num_links( linkage ); ++j ) { // every linkages std::string m = linkage_get_link_label( linkage, j ); // every left word std::string word_j = linkage_get_word( linkage, linkage_get_link_lword( linkage, j ) ); // if there is a label with match and its left is exactly the preposition's right if( std::regex_match( m, prep_object ) && temp == word_j ) { triplet.o = linkage_get_word( linkage, linkage_get_link_lword( linkage, j ) ); triplet.cut(triplet.o); triplet.o += " "; // save o std::string temp = triplet.o; triplet.o = linkage_get_word( linkage, linkage_get_link_rword( linkage, j ) ); triplet.cut(triplet.o); temp += triplet.o; triplet.o = temp; o_found = true; #ifdef DEBUG std::cout << "Object found: " << triplet.o << std::endl; #endif } // if( std::regex_match( m, prep_object ) && temp == word_j ) END } // for J END } // if( std::regex_match( l, preposition ) && triplet.p == word_i ) END } // for I END } // if( !o_found ) END if( s_found && p_found && o_found ) { // TODO: cut the words itself not the whole triplet // have to cut every word itself // triplet.cut(); triplet.cut(triplet.s); triplet.cut(triplet.p); triplets.push_back( triplet ); s_found = false; p_found = false; o_found = false; } // ###COPY END### } else // only one subject { // except Spx!!! // S left -> subject // S right -> predicate at first // if the word next to S right, is an element of Pv*, Pg* PP*, I*, TO, MVi* // then the new predicate will be that word std::string current_word; // search for subject (S_link) for( int i = 0; i < linkage_get_num_links( linkage ); ++i ) { // get the linkage's label std::string l = linkage_get_link_label( linkage, i ); if( std::regex_match( l, subject ) ) { // subject found triplet.s = linkage_get_word( linkage, linkage_get_link_lword( linkage, i ) ); s_found = true; current_word = linkage_get_word( linkage, linkage_get_link_rword( linkage, i ) ); triplet.p = current_word; #ifdef DEBUG std::cout << "Subject found: " << triplet.s << std::endl; #endif break; } } if( s_found ) { bool predicate_match = false; // search for the linkage that has triplet.s as left! do { predicate_match = false; for( int i = 0; i < linkage_get_num_links( linkage ); ++i ) { // every linkage's left word std::string l_word = linkage_get_word( linkage, linkage_get_link_lword( linkage, i ) ); // every linkage's label std::string l = linkage_get_link_label( linkage, i ); if( std::regex_match( l, predicate ) && l_word == current_word ) { // found predicate triplet.p = linkage_get_word( linkage, linkage_get_link_rword( linkage, i ) ); current_word = triplet.p; predicate_match = true; break; } } // for END } while( predicate_match ); p_found = true; #ifdef DEBUG std::cout << "Predicate found: " << triplet.p << std::endl; #endif } // if( s_found ) END // subject and predicate found // search for object // from k to linkage_get_num_links( linkage ) // if there is any of the noun, adjective od preposition object then that // label's right will give the object. // !!! search only between labels that has triplet.p as left word !!!!! // search for noun object or adjective objects // go through all links for( int i = 0; i < linkage_get_num_links( linkage ); ++i ) { // get every linkage label std::string l = linkage_get_link_label( linkage, i ); // get the left word of every linkage std::string word_i = linkage_get_word( linkage, linkage_get_link_lword( linkage, i ) ); // if thete is a label that match AND its left word is the predicate if( std::regex_match( l, noun_adject_object ) && triplet.p == word_i ) { // then the object is that linkage's right word triplet.o = linkage_get_word( linkage, linkage_get_link_rword( linkage, i ) ); o_found = true; triplet.cut(triplet.o); #ifdef DEBUG std::cout << "Adjective or noun object found: " << triplet.o << std::endl; #endif } // if END } // for END // still not found object, then search for preposition if( !o_found ) { // go through every linkage for( int i = 0; i < linkage_get_num_links( linkage ); ++i ) { // get the linkage's label std::string l = linkage_get_link_label( linkage, i ); // and left word std::string word_i = linkage_get_word( linkage, linkage_get_link_lword( linkage, i ) ); // if there is a linkage which is a preposition and its left word is the predicate if( std::regex_match( l, preposition ) && triplet.p == word_i ) { // found preposition // search for prep_object // then the temp will contain the preposition label's right word std::string temp = linkage_get_word( linkage, linkage_get_link_rword( linkage, i ) ); #ifdef DEBUG std::cout << "Preposition found! and its rigth word is: " << temp << std::endl; #endif // start search from there for( int j = 0; j < linkage_get_num_links( linkage ); ++j ) { // every linkages std::string m = linkage_get_link_label( linkage, j ); // every left word std::string word_j = linkage_get_word( linkage, linkage_get_link_lword( linkage, j ) ); #ifdef DEBUG if( std::regex_match( m, prep_object ) ) std::cout << m << " DOES match to (J.*)|(TI)|(I.*)|(ON)" << std::endl; #endif // if there is a label with match and its left is exactly the preposition's right if( std::regex_match( m, prep_object ) && temp == word_j ) { triplet.o = linkage_get_word( linkage, linkage_get_link_lword( linkage, j ) ); triplet.cut(triplet.o); triplet.o += " "; // save o std::string temp = triplet.o; triplet.o = linkage_get_word( linkage, linkage_get_link_rword( linkage, j ) ); triplet.cut(triplet.o); temp += triplet.o; triplet.o = temp; #ifdef DEBUG std::cout << "Object found: " << triplet.o << std::endl; #endif o_found = true; } } // for } // if } // for } // if( o_found ) END if( s_found && p_found && o_found ) { // TODO: cut the words itself not the whole triplet ###FIXED### // have to cut every word itself // triplet.cut(); triplet.cut(triplet.s); triplet.cut(triplet.p); triplets.push_back( triplet ); s_found = false; p_found = false; o_found = false; } } // end else linkage_delete ( linkage ); } // if( num_linkages > 0 ) END