/** * This function generates a table, word_used[i][w], showing * whether each word w is used in each sublinkage i; if so, * the value for that cell of the table is 1. */ static void count_words_used(con_context_t *ctxt, Linkage linkage) { int i, w, link, num_subl; num_subl = linkage->num_sublinkages; if(linkage->unionized==1 && num_subl>1) num_subl--; if (verbosity>=2) printf("Number of sublinkages = %d\n", num_subl); for (i=0; i<num_subl; i++) { for (w=0; w<linkage->num_words; w++) ctxt->word_used[i][w]=0; linkage->current=i; for (link=0; link<linkage_get_num_links(linkage); link++) { ctxt->word_used[i][linkage_get_link_lword(linkage, link)]=1; ctxt->word_used[i][linkage_get_link_rword(linkage, link)]=1; } if (verbosity>=2) { printf("Sublinkage %d: ", i); for (w=0; w<linkage->num_words; w++) { if (ctxt->word_used[i][w]==0) printf("0 "); if (ctxt->word_used[i][w]==1) printf("1 "); } printf("\n"); } } }
/** * Go through all the words. If a word is on the right end of * an S (or SF or SX), wordtype[w]=STYPE. If it's also on the left end of a * Pg*b, I, PP, or Pv, wordtype[w]=PTYPE. If it's a question-word * used in an indirect question, wordtype[w]=QTYPE. If it's a * question-word determiner, wordtype[w]=QDTYPE. Else wordtype[w]=NONE. * (This function is called once for each sublinkage.) */ static void generate_misc_word_info(con_context_t * ctxt, Linkage linkage) { int l1, l2, w1, w2; const char * label1, * label2; for (w1=0; w1<linkage->num_words; w1++) ctxt->wordtype[w1]=NONE; for (l1=0; l1<linkage_get_num_links(linkage); l1++) { w1=linkage_get_link_rword(linkage, l1); label1 = linkage_get_link_label(linkage, l1); if ((uppercompare(label1, "S")==0) || (uppercompare(label1, "SX")==0) || (uppercompare(label1, "SF")==0)) { ctxt->wordtype[w1] = STYPE; for (l2=0; l2<linkage_get_num_links(linkage); l2++) { w2=linkage_get_link_lword(linkage, l2); label2 = linkage_get_link_label(linkage, l2); if ((w1==w2) && ((post_process_match("Pg#b", label2)==1) || (uppercompare(label2, "I")==0) || (uppercompare(label2, "PP")==0) || (post_process_match("Pv", label2)==1))) { /* Pvf, Pgf? */ ctxt->wordtype[w1] = PTYPE; } } } if (post_process_match("QI#d", label1)==1) { ctxt->wordtype[w1] = QTYPE; for (l2=0; l2<linkage_get_num_links(linkage); l2++) { w2=linkage_get_link_lword(linkage, l2); label2 = linkage_get_link_label(linkage, l2); if ((w1==w2) && (post_process_match("D##w", label2)==1)) { ctxt->wordtype[w1] = QDTYPE; } } } if (post_process_match("Mr", label1)==1) ctxt->wordtype[w1] = QDTYPE; if (post_process_match("MX#d", label1)==1) ctxt->wordtype[w1] = QDTYPE; } }
/// This is the basic sentence dissection static PyObject *sentence(PyObject *self, PyObject *args) { Dictionary dict; Parse_Options opts; Sentence sent; Linkage linkage; Linkage sub_linkage; char * diagram; /// Link counts int num_linkages; int links; /// Index's for the iterators int link_idx; int word_idx; int num_words; long span; long sub_linkages; const char *text; const char *d_output; PyObject *output_list; PyObject *word_list; PyObject *word2_list; PyObject *span_list; PyObject *temp; PyObject *sublinkage_list; PyObject *_diagram; output_list = PyList_New(0); word_list = PyList_New(0); word2_list = PyList_New(0); sublinkage_list = PyList_New(0); span_list = PyList_New(0); if (!PyArg_ParseTuple(args, "s", &text)) return NULL; opts = parse_options_create(); parse_options_set_verbosity(opts, -1); parse_options_set_screen_width(opts, 50); setlocale(LC_ALL, ""); dict = dictionary_create_default_lang(); if (!dict) { PyErr_SetString(PyExc_RuntimeError, "Fatal error: Unable to open the dictionary"); Py_INCREF(Py_None); return Py_None; } sent = sentence_create(text, dict); sentence_split(sent, opts); num_linkages = sentence_parse(sent, opts); if (num_linkages > 0) { linkage = linkage_create(0, sent, opts); /// Get the lengths of everything num_words = linkage_get_num_words(linkage); links = linkage_get_num_links(linkage); for(link_idx=0; link_idx < links; link_idx++) { PyObject *temp_subLen; diagram = linkage_print_diagram(linkage); _diagram = PyString_FromString(diagram); sub_linkage = linkage_create(link_idx, sent, opts); sub_linkages = linkage_get_num_sublinkages(linkage); temp_subLen = PyLong_FromLong(sub_linkages); linkage_delete(sub_linkage); PyList_Append(sublinkage_list, temp_subLen); span = linkage_get_link_length(linkage, link_idx); PyList_Append(span_list, PyInt_FromLong(span)); PyObject *temp_list; temp_list = PyList_New(0); /// Sub Group these (left and right labels) const char *t1 = linkage_get_link_llabel(linkage, link_idx); temp = PyString_FromString(t1); PyList_Append(temp_list, temp); const char *t2 = linkage_get_link_rlabel(linkage, link_idx); temp = PyString_FromString(t2); PyList_Append(temp_list, temp); /// Then add to the main list PyList_Append(output_list, temp_list); /// Just the label const char *t3 = linkage_get_link_label(linkage, link_idx); temp = PyString_FromString(t3); PyList_Append(word2_list, temp); } for(word_idx=0; word_idx < num_words; word_idx++) { d_output = linkage_get_word(linkage, word_idx); PyObject *word; word = PyString_FromString(d_output); PyList_Append(word_list, word); } linkage_free_diagram(diagram); linkage_delete(linkage); } else { sentence_delete(sent); dictionary_delete(dict); parse_options_delete(opts); Py_INCREF(Py_None); return Py_None; } sentence_delete(sent); dictionary_delete(dict); parse_options_delete(opts); return Py_BuildValue("SSSSSS", word_list, span_list, output_list, word2_list, sublinkage_list, _diagram); }
VALUE linkage_count_num_links(const VALUE self, VALUE link) { LinkagePtr *link_ptr = retrieve_linkage(link); int cnt = linkage_get_num_links(link_ptr->linkage); return INT2FIX(cnt); }
/* * Class: LinkGrammar * Method: getNumLinks * Signature: ()I */ JNIEXPORT jint JNICALL Java_org_linkgrammar_LinkGrammar_getNumLinks(JNIEnv *env, jclass cls) { per_thread_data *ptd = get_ptd(env, cls); return linkage_get_num_links(ptd->linkage); }
SPOTriplets NLP::sentence2triplets ( const char* sentence ) { // vector of triplets SPOTriplets triplets; #ifdef DEBUG std::cout << "The sentence: " << sentence << std::endl; #endif // creates a Sentence from the input char* Sentence sent = sentence_create ( sentence, dict_ ); #ifdef DEBUG std::cout << "Sentence created" << std::endl; #endif // tokenizes the sentence sentence_split ( sent, parse_opts_ ); #ifdef DEBUG std::cout << "Sentence splitted" << std::endl; #endif // searches for all possible linkages int num_linkages = sentence_parse ( sent, parse_opts_ ); #ifdef DEBUG std::cout << "Sentence parsed" << std::endl; std::cout << "Number of linkages: " << num_linkages << std::endl; #endif // just one triplet SPOTriplet triplet; // if there is any linkage in the sentence if( num_linkages > 0 ) { // create the linkage Linkage linkage = linkage_create ( 0, sent, parse_opts_ ); #ifdef DEBUG // prints the sentence's diagram std::cout << "The diagram: " << std::endl; char *diagram = linkage_print_diagram(linkage, true, 800); std::cout << diagram << std::endl; linkage_free_diagram( diagram ); // end print diagram #endif std::vector<std::string> labels; // 1. find the S_link // S* except there is an SJ* because then S* except Spx // two cases: there is SJ* and there is not SJ* // TODO: VJlp VJrp same as SJ but to predications // TODO: SFut SFst what the f**k? ###FIXED### // TODO: His form was shining like the light not working ###FIXED### // TODO: Car is mine not working ###FIXED### // TODO: The little brown bear has eaten all of the honey not working ###FIXED### // REGEXES std::regex SJ_( "SJ.*" ); std::regex VJ_( "VJ.*"); std::regex subject( "(Ss.*)|(SFut)|(Sp\*.*)" ); std::regex Spx( "Spx.*" ); // TODO:fix theese initializer list not allowed ###FIXED### std::regex predicate( "(Pv.*)|(Pg.*)|(PP.*)|(I.*)|(TO)|(MVi.*)" ); // TODO: make one from theese // (Sp.*)|(Ss.*) ###FIXED### std::regex noun_adject_object ( "(O.*)|(Os.*)|(Op.*)|(MVpn.*)|(Pa.*)|(MVa.*)" ); std::regex preposition ( "(MVp.*)|(Pp.*)|(OF)|(TO)" ); std::regex prep_object ( "(J.*)|(TI)|(I.*)|(ON)" ); // TODO: problems with matching!! Pg*!! ###FIXED### // TODO: problems with matching!! Mvp.*!! ###FIXED### bool s_found = false; bool p_found = false; bool o_found = false; bool SJ = false; // search for SJ.s labels for( auto label: labels ) { if( std::regex_match( label, SJ_ ) ) { SJ = true; break; } } // multiple subject in the sentence if( SJ ) { // SPls left -> first subject // SPrs right -> second subject // Spx right -> predicate // SJ-s are multiple subjects std::string temp; // go through every linkage for( int i = 0; i < linkage_get_num_links( linkage ); ++i ) { // get their label std::string l = linkage_get_link_label( linkage, i ); // if there is an SJl* label if( std::regex_match( l, std::regex( "SJl.*" ) ) ) { // SJls left side triplet.s = linkage_get_word( linkage, linkage_get_link_lword( linkage, i ) ); triplet.cut( triplet.s ); temp = triplet.s + " "; // and word triplet.s = linkage_get_word( linkage, linkage_get_link_rword( linkage, i ) ); triplet.cut( triplet.s ); temp += triplet.s + " "; // find SJr* for( int j = 0; j < linkage_get_num_links( linkage ); ++j ) { std::string m = linkage_get_link_label( linkage, j ); if( std::regex_match( m, std::regex( "SJr.*" ) ) ) { triplet.s = linkage_get_word( linkage, linkage_get_link_rword( linkage, j ) ); triplet.cut(); temp += triplet.s; triplet.s = temp; s_found = true; #ifdef DEBUG std::cout << "Subject found: " << triplet.s << std::endl; #endif break; } // if } // for break; } // if } // for // now we have the subject // find Spx and its right side will be the starter predicate std::string current_word; for( int i = 0; i < linkage_get_num_links( linkage ); ++i ) { std::string l = linkage_get_link_label( linkage, i ); if( std::regex_match( l, std::regex( "Spx.*" ) ) ) { triplet.p = linkage_get_word( linkage, linkage_get_link_rword( linkage, i ) ); current_word = linkage_get_word( linkage, linkage_get_link_rword( linkage, i ) ); } } // from now all the same as on the else branch !!!! bool predicate_match = false; // search for the linkage that has triplet.s as left! do { predicate_match = false; for( int i = 0; i < linkage_get_num_links( linkage ); ++i ) { // every linkage's left word std::string word_i = linkage_get_word( linkage, linkage_get_link_lword( linkage, i ) ); // every linkage's label std::string l = linkage_get_link_label( linkage, i ); if( std::regex_match( l, predicate ) && word_i == current_word ) { // found predicate triplet.p = linkage_get_word( linkage, linkage_get_link_rword( linkage, i ) ); current_word = triplet.p; predicate_match = true; break; } } } while( predicate_match ); // we now have the predicate too // TODO: multiple predicates! p_found = true; #ifdef DEBUG std::cout << "Predicate found: " << triplet.p << std::endl; #endif // ###COPY BEGIN### // search for noun object or adjective object for( int i = 0; i < linkage_get_num_links( linkage ); ++i ) { // get every linkage label std::string l = linkage_get_link_label( linkage, i ); // get the left word of every linkage std::string l_word = linkage_get_word( linkage, linkage_get_link_lword( linkage, i ) ); // if thete is a label that match AND its left word is the predicate if( std::regex_match( l, noun_adject_object ) && triplet.p == l_word ) { // then the object is that linkage's right word triplet.o = linkage_get_word( linkage, linkage_get_link_rword( linkage, i ) ); triplet.cut( triplet.o ); o_found = true; #ifdef DEBUG std::cout << "Adjective or noun object found: " << triplet.o << std::endl; #endif } // if } // for // still not found object, then search for preposition if( !o_found ) { // go through every linkage for( int i = 0; i < linkage_get_num_links( linkage ); ++i ) { // get the linkage's label std::string l = linkage_get_link_label( linkage, i ); // and left word std::string word_i = linkage_get_word( linkage, linkage_get_link_lword( linkage, i ) ); // if there is a linkage which is a preposition and its left word is the predicate if( std::regex_match( l, preposition ) && triplet.p == word_i ) { // found preposition // search for prep_object // then the temp will contain the preposition label's right word std::string temp = linkage_get_word( linkage, linkage_get_link_rword( linkage, i ) ); #ifdef DEBUG std::cout << "Preposition found! and its rigth word is: " << temp << std::endl; #endif for( int j = 0; j < linkage_get_num_links( linkage ); ++j ) { // every linkages std::string m = linkage_get_link_label( linkage, j ); // every left word std::string word_j = linkage_get_word( linkage, linkage_get_link_lword( linkage, j ) ); // if there is a label with match and its left is exactly the preposition's right if( std::regex_match( m, prep_object ) && temp == word_j ) { triplet.o = linkage_get_word( linkage, linkage_get_link_lword( linkage, j ) ); triplet.cut(triplet.o); triplet.o += " "; // save o std::string temp = triplet.o; triplet.o = linkage_get_word( linkage, linkage_get_link_rword( linkage, j ) ); triplet.cut(triplet.o); temp += triplet.o; triplet.o = temp; o_found = true; #ifdef DEBUG std::cout << "Object found: " << triplet.o << std::endl; #endif } // if( std::regex_match( m, prep_object ) && temp == word_j ) END } // for J END } // if( std::regex_match( l, preposition ) && triplet.p == word_i ) END } // for I END } // if( !o_found ) END if( s_found && p_found && o_found ) { // TODO: cut the words itself not the whole triplet // have to cut every word itself // triplet.cut(); triplet.cut(triplet.s); triplet.cut(triplet.p); triplets.push_back( triplet ); s_found = false; p_found = false; o_found = false; } // ###COPY END### } else // only one subject { // except Spx!!! // S left -> subject // S right -> predicate at first // if the word next to S right, is an element of Pv*, Pg* PP*, I*, TO, MVi* // then the new predicate will be that word std::string current_word; // search for subject (S_link) for( int i = 0; i < linkage_get_num_links( linkage ); ++i ) { // get the linkage's label std::string l = linkage_get_link_label( linkage, i ); if( std::regex_match( l, subject ) ) { // subject found triplet.s = linkage_get_word( linkage, linkage_get_link_lword( linkage, i ) ); s_found = true; current_word = linkage_get_word( linkage, linkage_get_link_rword( linkage, i ) ); triplet.p = current_word; #ifdef DEBUG std::cout << "Subject found: " << triplet.s << std::endl; #endif break; } } if( s_found ) { bool predicate_match = false; // search for the linkage that has triplet.s as left! do { predicate_match = false; for( int i = 0; i < linkage_get_num_links( linkage ); ++i ) { // every linkage's left word std::string l_word = linkage_get_word( linkage, linkage_get_link_lword( linkage, i ) ); // every linkage's label std::string l = linkage_get_link_label( linkage, i ); if( std::regex_match( l, predicate ) && l_word == current_word ) { // found predicate triplet.p = linkage_get_word( linkage, linkage_get_link_rword( linkage, i ) ); current_word = triplet.p; predicate_match = true; break; } } // for END } while( predicate_match ); p_found = true; #ifdef DEBUG std::cout << "Predicate found: " << triplet.p << std::endl; #endif } // if( s_found ) END // subject and predicate found // search for object // from k to linkage_get_num_links( linkage ) // if there is any of the noun, adjective od preposition object then that // label's right will give the object. // !!! search only between labels that has triplet.p as left word !!!!! // search for noun object or adjective objects // go through all links for( int i = 0; i < linkage_get_num_links( linkage ); ++i ) { // get every linkage label std::string l = linkage_get_link_label( linkage, i ); // get the left word of every linkage std::string word_i = linkage_get_word( linkage, linkage_get_link_lword( linkage, i ) ); // if thete is a label that match AND its left word is the predicate if( std::regex_match( l, noun_adject_object ) && triplet.p == word_i ) { // then the object is that linkage's right word triplet.o = linkage_get_word( linkage, linkage_get_link_rword( linkage, i ) ); o_found = true; triplet.cut(triplet.o); #ifdef DEBUG std::cout << "Adjective or noun object found: " << triplet.o << std::endl; #endif } // if END } // for END // still not found object, then search for preposition if( !o_found ) { // go through every linkage for( int i = 0; i < linkage_get_num_links( linkage ); ++i ) { // get the linkage's label std::string l = linkage_get_link_label( linkage, i ); // and left word std::string word_i = linkage_get_word( linkage, linkage_get_link_lword( linkage, i ) ); // if there is a linkage which is a preposition and its left word is the predicate if( std::regex_match( l, preposition ) && triplet.p == word_i ) { // found preposition // search for prep_object // then the temp will contain the preposition label's right word std::string temp = linkage_get_word( linkage, linkage_get_link_rword( linkage, i ) ); #ifdef DEBUG std::cout << "Preposition found! and its rigth word is: " << temp << std::endl; #endif // start search from there for( int j = 0; j < linkage_get_num_links( linkage ); ++j ) { // every linkages std::string m = linkage_get_link_label( linkage, j ); // every left word std::string word_j = linkage_get_word( linkage, linkage_get_link_lword( linkage, j ) ); #ifdef DEBUG if( std::regex_match( m, prep_object ) ) std::cout << m << " DOES match to (J.*)|(TI)|(I.*)|(ON)" << std::endl; #endif // if there is a label with match and its left is exactly the preposition's right if( std::regex_match( m, prep_object ) && temp == word_j ) { triplet.o = linkage_get_word( linkage, linkage_get_link_lword( linkage, j ) ); triplet.cut(triplet.o); triplet.o += " "; // save o std::string temp = triplet.o; triplet.o = linkage_get_word( linkage, linkage_get_link_rword( linkage, j ) ); triplet.cut(triplet.o); temp += triplet.o; triplet.o = temp; #ifdef DEBUG std::cout << "Object found: " << triplet.o << std::endl; #endif o_found = true; } } // for } // if } // for } // if( o_found ) END if( s_found && p_found && o_found ) { // TODO: cut the words itself not the whole triplet ###FIXED### // have to cut every word itself // triplet.cut(); triplet.cut(triplet.s); triplet.cut(triplet.p); triplets.push_back( triplet ); s_found = false; p_found = false; o_found = false; } } // end else linkage_delete ( linkage ); } // if( num_linkages > 0 ) END