Example #1
0
File: lg_py.c Project: bluemoon/nlp
static PyObject *constituents(PyObject *self, PyObject *args) {
    Dictionary    dict;
    Parse_Options opts;
    Sentence      sent;
    Linkage       linkage;
    CNode *       cn;

    /// Link counts
    int   num_linkages;


    const char *text;

    PyObject *output_list;

    if (!PyArg_ParseTuple(args, "s", &text))
        return NULL;

    opts = parse_options_create();
    parse_options_set_verbosity(opts, -1);

    setlocale(LC_ALL, "");
    dict = dictionary_create_default_lang();

    if (!dict) {
        PyErr_SetString(PyExc_RuntimeError, "Fatal error: Unable to open the dictionary");
        Py_INCREF(Py_None);
        return Py_None;
    }

    sent = sentence_create(text, dict);
    sentence_split(sent, opts);
    num_linkages = sentence_parse(sent, opts);

    if (num_linkages > 0) {
        linkage = linkage_create(0, sent, opts);

        cn = linkage_constituent_tree(linkage);
        output_list = build_tree(cn, linkage);
        if(output_list == Py_None) {
            Py_INCREF(output_list);
            return output_list;
        }
        linkage_free_constituent_tree(cn);

        linkage_delete(linkage);
    } else {
        sentence_delete(sent);
        dictionary_delete(dict);
        parse_options_delete(opts);

        Py_INCREF(Py_None);
        return Py_None;
    }
    sentence_delete(sent);
    dictionary_delete(dict);
    parse_options_delete(opts);

    return Py_BuildValue("S", output_list);
}
Example #2
0
Morpheme * analyse_decomposed_word(Morpho_structures ms, Decomposed_word *decomposed_word){
	Sentence sent;
	Linkage linkage;
	Decomposed_word *node;
	Morpheme *morpheme_list=NULL;
	int num_linkages , i;
	int stem_pos;
	char * diagram;
//	ms->opts->unify_features = FALSE;
	for(node=decomposed_word; node!=NULL; node=node->next){
		sent=sentence_create(node->word, ms->dict);
		if(!sent){
			continue;
		}
		num_linkages = sentence_parse(sent, ms->opts);
		for (i=0; i<num_linkages; i++){
			linkage = linkage_create(i, sent, ms->opts);
			if (PRINT_DIAGRAM){
				linkage = linkage_create(i, sent, ms->opts);
				printf("%s\n", diagram = linkage_print_diagram(linkage));
				string_delete(diagram);
			}
			if ((stem_pos = find_stem(linkage))>=0){
				morpheme_list_add(ms, &morpheme_list,linkage->word[stem_pos], linkage->feature_array[0]);
			}
			linkage_delete(linkage);
		}
		sentence_delete(sent);
	}
	return morpheme_list;
}
Example #3
0
static void finish(per_thread_data *ptd)
{
	if (ptd->sent)
		sentence_delete(ptd->sent);
	ptd->sent = NULL;

#if DO_PHRASE_TREE
	if (tree)
		linkage_free_constituent_tree(tree);
	tree = NULL;
#endif

	if (ptd->linkage)
		linkage_delete(ptd->linkage);
	ptd->linkage = NULL;

	dictionary_delete(ptd->dict);
	ptd->dict = NULL;

	parse_options_delete(ptd->opts);
	ptd->opts = NULL;

	parse_options_delete(ptd->panic_parse_opts);
	ptd->panic_parse_opts = NULL;

#ifdef USE_PTHREADS
	pthread_setspecific(java_key, NULL);
#else
	global_ptd = NULL;
#endif
	free(ptd);
}
Example #4
0
      int main() {

          Dictionary    dict;
          Parse_Options opts;
          Sentence      sent;
          Linkage       linkage;
          char *        diagram;
          int           i, num_linkages;
          char *        input_string[] = {
             "Grammar is useless because there is nothing to say -- Gertrude Stein.",
             "Computers are useless; they can only give you answers -- Pablo Picasso.",
          };

          opts  = parse_options_create();
          dict  = dictionary_create("4.0.dict", "4.0.knowledge", 
                   "4.0.constituent-knowledge", "4.0.affix");

          for (i=0; i<2; ++i) {
              sent = sentence_create(input_string[i], dict);
              num_linkages = sentence_parse(sent, opts);
              if (num_linkages > 0) {
                  linkage = linkage_create(0, sent, opts);
                  printf("%s\n", diagram = linkage_print_diagram(linkage));
                  string_delete(diagram);
                  linkage_delete(linkage);
              }
              sentence_delete(sent);
          }

          dictionary_delete(dict);
          parse_options_delete(opts);
          return 0;
      }
Example #5
0
static void batch_process_some_linkages(Label label,
                                        Sentence sent,
                                        Command_Options* copts)
{
	Parse_Options opts = copts->popts;

	if (there_was_an_error(label, sent, opts))
	{
		/* If we found at least one good linkage, print it. */
		if (sentence_num_valid_linkages(sent) > 0) {
			Linkage linkage = NULL;
			int i;
			for (i=0; i<sentence_num_linkages_post_processed(sent); i++)
			{
				if (0 == sentence_num_violations(sent, i))
				{
					linkage = linkage_create(i, sent, opts);
					break;
				}
			}
			process_linkage(linkage, copts);
			linkage_delete(linkage);
		}
		fprintf(stdout, "+++++ error %d\n", batch_errors);
	}
	else
	{
		if (test_enabled(test, "batch-print-parse-statistics"))
		{
			print_parse_statistics(sent, opts, copts);
		}
	}
}
Example #6
0
void batch_process_some_linkages(Label label, Sentence sent, Parse_Options opts) {
    Linkage linkage;
   
    if (there_was_an_error(label, sent, opts)) {
	if (sentence_num_linkages_found(sent) > 0) {
	    linkage = linkage_create(0, sent, opts);
	    process_linkage(linkage, opts);
	    linkage_delete(linkage);
	}
	fprintf(stdout, "+++++ error %d\n", batch_errors);
    }
}
Example #7
0
/*
 * GC Free function
 */
static void
rlink_linkage_gc_free( struct rlink_linkage *ptr )
{
    if ( ptr ) {
        linkage_delete( (Linkage)ptr->linkage );
        ptr->linkage = NULL;
        ptr->sentence = Qnil;

        xfree( ptr );
        ptr = NULL;
    }
}
std::map<int, EdgeDescription> LinkParserAdapterImpl::parseSentence(std::string sentenceStr) {
    Parse_Options parseOptions = parse_options_create();
    int verbosity = psi_logger.getLoggingPriority() / 100 - 4;
    parse_options_set_verbosity(parseOptions, verbosity);
    starts_.clear();
    ends_.clear();
    edgeDescriptions_.clear();
    freeSentence();
    sentence_ = sentence_create(sentenceStr.c_str(), dictionary_);
    if (!sentence_) {
        std::stringstream errorSs;
        errorSs << "Link-parser failed to tokenize the input text.";
        throw ParserException(errorSs.str());
    }
    boost::algorithm::to_lower(sentenceStr);
    if (sentence_parse(sentence_, parseOptions)) {

        size_t currentPos = 0;
        size_t foundPos = 0;
        int wordNo = 0;
        while (wordNo < sentence_length(sentence_)) {
            std::string word(sentence_get_word(sentence_, wordNo));
            boost::algorithm::to_lower(word);
            foundPos = sentenceStr.find(word, currentPos);
            if (foundPos != std::string::npos) {
                starts_[wordNo] = foundPos;
                ends_[wordNo] = currentPos = foundPos + word.length();
            }
            ++wordNo;
        }

        Linkage linkage = linkage_create(0, sentence_, parseOptions);
        CNode * ctree = linkage_constituent_tree(linkage);
        extractEdgeDescriptions(ctree, linkage);
        linkage_free_constituent_tree(ctree);
        linkage_delete(linkage);

    } else {
        std::stringstream errorSs;
        errorSs << "Link-parser failed to parse the input text.\n"
            << "Your input text is probably not a correct sentence.";
        WARN(errorSs.str());
    }
    return edgeDescriptions_;
}
Example #9
0
//########################################################################################
 static void batch_process_some_linkages(Label label,
					Sentence sent,
					Parse_Options opts,char * tmp_path,char * file_name)
 //#########################################################################################
{
	Linkage linkage;

	if (there_was_an_error(label, sent, opts)) {
		if (sentence_num_linkages_found(sent) > 0) {
			linkage = linkage_create(0, sent, opts);
                       //################################################
                        process_linkage(linkage, opts,tmp_path,file_name);
                       //################################################
			linkage_delete(linkage);
		}
		fprintf(stdout, "+++++ error %d\n", batch_errors);
	}
}
Example #10
0
static void makeLinkage(per_thread_data *ptd)
{
	if (ptd->cur_linkage < ptd->num_linkages)
	{
		if (ptd->linkage)
			linkage_delete(ptd->linkage);

		ptd->linkage = linkage_create(ptd->cur_linkage,ptd->sent,ptd->opts);
		linkage_compute_union(ptd->linkage);
		linkage_set_current_sublinkage(ptd->linkage,
		                        linkage_get_num_sublinkages(ptd->linkage)-1);

#if DO_PHRASE_TREE
		if (tree)
			linkage_free_constituent_tree(tree);
		tree = linkage_constituent_tree(linkage);
		printTree(tree);
#endif
	}
}
Example #11
0
static void batch_process_some_linkages(Label label,
                                        Sentence sent,
                                        Command_Options* copts)
{
	Parse_Options opts = copts->popts;

	if (there_was_an_error(label, sent, opts))
	{
		/* If linkages were found, print them */
		if (sentence_num_linkages_found(sent) > 0) {
			Linkage linkage = NULL;
			/* If we found at least one good linkage, print it. */
			if (sentence_num_valid_linkages(sent) > 0) {
				int i;
				for (i=0; i<sentence_num_linkages_post_processed(sent); i++)
				{
					if (0 == sentence_num_violations(sent, i))
					{
						linkage = linkage_create(i, sent, opts);
						break;
					}
				}
			}
			else
			{
				/* This linkage will be bad; no good ones were found. */
				linkage = linkage_create(0, sent, opts);
			}
			process_linkage(linkage, copts);
			linkage_delete(linkage);
		}
		fprintf(stdout, "+++++ error %d\n", batch_errors);
	}
	else
	{
		if (strstr(test, ",batch_print_parse_statistics,"))
		{
			print_parse_statistics(sent, opts);
		}
	}
}
Example #12
0
  //############################################################################################################
  static int process_some_linkages(Sentence sent, Parse_Options opts , int num ,char * tmp_path,char * file_name)
  //############################################################################################################
{
	int c;
	int i, num_displayed, num_to_query;
	Linkage linkage;
	double corpus_cost;

	if (verbosity > 0) print_parse_statistics(sent, opts);
	if (!parse_options_get_display_bad(opts))
	{
		num_to_query = MIN(sentence_num_valid_linkages(sent), DISPLAY_MAX);
	}
	else
	{
		num_to_query = MIN(sentence_num_linkages_post_processed(sent),
		                   DISPLAY_MAX);
	}

	for (i=0, num_displayed=0; i<num_to_query; i++)
	{
       /*###########################*/
           if (i==num)
             {
        /*###########################*/

		if ((sentence_num_violations(sent, i) > 0) &&
			(!parse_options_get_display_bad(opts)))
		{
			continue;
		}

		linkage = linkage_create(i, sent, opts);

		if (verbosity > 0)
		{
			if ((sentence_num_valid_linkages(sent) == 1) &&
				(!parse_options_get_display_bad(opts)))
			{
			//	fprintf(stdout, "	Unique linkage, ");
			}
			else if ((parse_options_get_display_bad(opts)) &&
			         (sentence_num_violations(sent, i) > 0))
			{
			//	fprintf(stdout, "	Linkage %d (bad), ", i+1);
			}
			else
			{
			//	fprintf(stdout, "	Linkage %d, ", i+1);
			}

			if (!linkage_is_canonical(linkage)) {
			//	fprintf(stdout, "non-canonical, ");
			}
			if (linkage_is_improper(linkage)) {
			//	fprintf(stdout, "improper fat linkage, ");
			}
			if (linkage_has_inconsistent_domains(linkage)) {
			//	fprintf(stdout, "inconsistent domains, ");
			}

			corpus_cost = linkage_corpus_cost(linkage);
			if (corpus_cost < 0.0f)
			{
			/*	fprintf(stdout, "cost vector = (UNUSED=%d DIS=%d AND=%d LEN=%d)\n",
				       linkage_unused_word_cost(linkage),
				       linkage_disjunct_cost(linkage),
				       linkage_and_cost(linkage),
				       linkage_link_cost(linkage));*/
			}
			else
			{
			/*	fprintf(stdout, "cost vector = (CORP=%6.4f UNUSED=%d DIS=%d AND=%d LEN=%d)\n",
				       corpus_cost,
				       linkage_unused_word_cost(linkage),
				       linkage_disjunct_cost(linkage),
				       linkage_and_cost(linkage),
				       linkage_link_cost(linkage)); */
			}
		}

               //##################################################
                process_linkage(linkage, opts,tmp_path,file_name);
               //##################################################
//		process_linkage(linkage, opts);
		linkage_delete(linkage);

		if (++num_displayed < num_to_query) {
			if (verbosity > 0) {
//				fprintf(stdout, "Press RETURN for the next linkage.\n");
			}
			c = fget_input_char(stdin, stdout, opts);
			if (c != '\n') return c;
		}
    /*##################*/
        } // if num
    /*#################*/
	}
	return 'x';
}
Example #13
0
/*******************************************************************************************
*   This functions translats a list of sentences in the input_string array,
*   terminates when it finds "end" in the array
*   Inputs: input_string: Array of sentences
*           all_linkages: if true, translate all possible linkages of the sentence
*           out_to_file : if true, outputs the results to seperate files in the out directory,
*                       the out files names would be something like :
                        src-x-y: Contains a linkage for the source sentence, x is the index
                                 of sentence in the array and y is the index of linkage
                        trg-x-y-z: Contains a linklage for the target sentence, x is the index
                                   of sentence in the array, y is the index of linkage and z
                                   is the zth translation of that linkage x-y(a linkage in the
                                   source language may have more than one correspondent target
*                                  linkage)
*******************************************************************************************/
void translate(char  input_string[][200], int all_linkages, int out_to_file){
    Dictionary    dict;
    Parse_Options opts;
    Sentence      sent;
    Linkage       src_linkage, trg_linkage;
	Transfer trans;
    char *        diagram;
	FILE *		  fp;
    int           i,  j, num_src_linkages, num_trg_linkages;
    //char        filename[30], txfilename[30], filenum[4];
    char          txfilename[30];
    char          output_string[200];
	int n;

	opts  = parse_options_create();
	parse_options_set_verbosity (opts, FALSE);
	parse_options_set_display_walls(opts, TRUE);
	parse_options_set_display_postscript(opts, TRUE);

    dict  = dictionary_create("4.0.dict", "4.0.knowledge", NULL, NULL, "morphology/morphemes.dict");
	if (!dict){
		fprintf(stderr, "%s\n", lperrmsg);
	    parse_options_delete(opts);
		printf("size : %lld", space_in_use);
		exit(0);
	}
	trans=transfer_create("translation/mapfile.txt", "translation/links_list.txt","translation/lexicon.txt", "target/trg.dict" );
	if (!trans){
		fprintf(stderr, "%s\n", maperrmsg);
		fprintf(stderr, "%s\n", lperrmsg);
		printf("size : %lld", space_in_use);
		exit(0);
	}

	for (n=0; strcmp (input_string[n],"end")!=0; n++ ){

		sent = sentence_create(input_string[n], dict);
		if (!sent){
			fprintf(stderr, "%s\n", lperrmsg);
			parse_options_delete(opts);
			printf("size : %lld", space_in_use);
			exit(0);
		}
		printf ("\n************************************\n%d-%s\n************************************\n", n+1, input_string[n]);
		num_src_linkages = sentence_parse(sent, opts);

		num_src_linkages = (all_linkages) ? num_src_linkages: (num_src_linkages!=0)*1;

		for (i=0; i<num_src_linkages;i++) {
			src_linkage = linkage_create(i, sent, opts);
			diagram = linkage_print_diagram(src_linkage);

			printf ("\nLinkage No. %d-%d\n\n", n+1, i+1);
			printf("%s\n", diagram);
			if (out_to_file){
				sprintf(txfilename,"out/src-%d-%d.txt", n+1, i+1);
				fp=fopen(txfilename,"w+");
				if (fp==NULL){
                    fprintf(stderr, "%s%s\n", "Unable to open ", txfilename);
                    printf("size : %lld", space_in_use);
                    exit(0);
				}
				fprintf(fp,"%s",diagram);
				fclose(fp);
			}
			string_delete(diagram);

			num_trg_linkages = transfer_linkage_driver(trans, src_linkage);
			if(num_trg_linkages==0){
				fprintf(stderr, "%s\n", maperrmsg);
			}
			else{
				parse_options_set_display_walls (trans->opts, TRUE);
				for (j=0; j<num_trg_linkages; j++){
					//second parameter should always be 0 !?
					trg_linkage=trans_linkage_create(trans, 0, trans->sent[j], trans->opts);
					diagram = linkage_print_diagram(trg_linkage);

					printf ("\nTranslation No. %d_%d_%d\n\n", n+1,i+1,j+1);
                    printf("Translation: %s\n", output_string);
					printf("%s\n", diagram);
					if (out_to_file){
                        sprintf(txfilename,"out/trg-%d-%d-%d.txt", n+1, i+1, j+1);
						fp=fopen(txfilename,"w+");
                        if (fp==NULL){
                            fprintf(stderr, "%s%s\n", "Unable to open ", txfilename);
                            printf("size : %lld", space_in_use);
                            exit(0);
                        }
                        extract_sent (trg_linkage, output_string);
                        fprintf(fp, "Translation:%s\n", output_string);
						fprintf(fp,"%s",diagram);
						fclose(fp);
					}
					string_delete(diagram);
					linkage_delete(trg_linkage);
				}
			}
			linkage_delete(src_linkage);
		}
		fprintf(stderr, "%s\n", lperrmsg);
		sentence_delete(sent);

	}


	transfer_delete(trans);
    dictionary_delete(dict);
    parse_options_delete(opts);

}
Example #14
0
/*******************************************************************************************
*   This functions parses a list of sentences and outputs the results to seperate files in 
*   the out directory,
*   The files are indexed as out_x_y, where x is the index of the sentence in the array and y
*   is the index of the linkage
*******************************************************************************************/
void normal_parse(char input_string[][200], int unify_features, int all_linkages, int out_to_file){

    Dictionary    dict;
    Parse_Options opts;
	int n;
	Sentence      sent;
    Linkage       linkage;
    char *        diagram;
    int           i, num_linkages;
    char          txfilename[30];
	FILE *		  fp;


	opts  = parse_options_create();
	parse_options_set_display_walls(opts, TRUE);
	parse_options_set_display_postscript(opts, TRUE);
	parse_options_set_unify_features(opts, unify_features);

    dict  = dictionary_create("4.0.dict", "4.0.knowledge", NULL, NULL, "morphology/morphemes.dict");
	if (!dict){
		fprintf(stderr, "%s\n", lperrmsg);
	    parse_options_delete(opts);
		printf("size : %lld", space_in_use);
		exit(0);
	}

	//setting opts->unify_features to TRUE enables unification
	for (n=0; strcmp (input_string[n],"end")!=0; n++ ){
		sent = sentence_create(input_string[n], dict);
		if (!sent){
			fprintf(stderr, "%s\n", lperrmsg);
			dictionary_delete(dict);
			parse_options_delete(opts);
			exit(0);
		}
		printf ("\n************************************\n%d-%s\n************************************\n", n+1, input_string[n]);
		num_linkages = sentence_parse(sent, opts);
		num_linkages = (all_linkages) ? num_linkages : (num_linkages!=0)*1 ;
		//Echos all linkages to screen, ps file and text file in ./out directory
		for (i=0; i<num_linkages; ++i) {
			linkage = linkage_create(i , sent, opts);
			diagram = linkage_print_diagram(linkage);

			printf ("\n Linkage No. %d-%d\n\n", n+1, i+1);
			printf("%s\n", diagram);
			if (out_to_file){
				sprintf(txfilename,"out/src-%d-%d.txt", n+1, i+1);
				fp=fopen(txfilename,"w+");
                if (fp==NULL){
                    fprintf(stderr, "%s%s\n", "Unable to open ", txfilename);
                    printf("size : %lld", space_in_use);
                    exit(0);
                }
				fprintf(fp,"%s",diagram);
				fclose(fp);
			}
			string_delete(diagram);
			linkage_delete(linkage);
 		}
		sentence_delete(sent);
	}

    dictionary_delete(dict);
    parse_options_delete(opts);

}
Example #15
0
void process_some_linkages(Sentence sent, Parse_Options opts) {
    int i, c, num_displayed, num_to_query;
    Linkage linkage;
   
    if (verbosity > 0) print_parse_statistics(sent, opts);
    if (!parse_options_get_display_bad(opts)) {
	num_to_query = MIN(sentence_num_valid_linkages(sent), DISPLAY_MAX);
    }
    else {
	num_to_query = MIN(sentence_num_linkages_post_processed(sent), 
			   DISPLAY_MAX);
    }

    for (i=0, num_displayed=0; i<num_to_query; ++i) {

	if ((sentence_num_violations(sent, i) > 0) &&
	    (!parse_options_get_display_bad(opts))) {
	    continue;
	}

	linkage = linkage_create(i, sent, opts);

	if (verbosity > 0) {
	  if ((sentence_num_valid_linkages(sent) == 1) &&
	      (!parse_options_get_display_bad(opts))) {
	    fprintf(stdout, "  Unique linkage, ");
	  }
	  else if ((parse_options_get_display_bad(opts)) &&
		   (sentence_num_violations(sent, i) > 0)) {
	    fprintf(stdout, "  Linkage %d (bad), ", i+1);
	  }
	  else {
	    fprintf(stdout, "  Linkage %d, ", i+1);
	  }
	  
	  if (!linkage_is_canonical(linkage)) {
	    fprintf(stdout, "non-canonical, ");
	  }
	  if (linkage_is_improper(linkage)) {
	    fprintf(stdout, "improper fat linkage, ");
	  }
	  if (linkage_has_inconsistent_domains(linkage)) {
	    fprintf(stdout, "inconsistent domains, ");
	  }
	  
	  fprintf(stdout, "cost vector = (UNUSED=%d DIS=%d AND=%d LEN=%d)\n",
		  linkage_unused_word_cost(linkage),
		  linkage_disjunct_cost(linkage),
		  linkage_and_cost(linkage),
		  linkage_link_cost(linkage));
	}

	process_linkage(linkage, opts);
	linkage_delete(linkage);

	if (++num_displayed < num_to_query) {
	    if (verbosity > 0) {
	        fprintf(stdout, "Press RETURN for the next linkage.\n");
	    }
	    if ((c=fget_input_char(stdin, stdout, opts)) != '\n') {
		ungetc(c, stdin);
		input_pending = TRUE;
		break;
	    }
	}
    }
}
Example #16
0
static const char *process_some_linkages(FILE *in, Sentence sent,
                                         Command_Options* copts)
{
	int i, num_to_query, num_to_display, num_displayed;
	Linkage linkage;
	double corpus_cost;
	Parse_Options opts = copts->popts;
	int display_max = DISPLAY_MAX;
	bool auto_next_linkage = false;

	i = auto_next_linkage_test(test);
	if (i != 0)
	{
		display_max = i;
		auto_next_linkage = true;
	}

	if (verbosity > 0) print_parse_statistics(sent, opts, copts);
	num_to_query = sentence_num_linkages_post_processed(sent);
	if (!copts->display_bad)
	{
		num_to_display = MIN(sentence_num_valid_linkages(sent),
		                     display_max);
	}
	else
	{
		num_to_display = MIN(num_to_query, display_max);
	}

	for (i=0, num_displayed=0; i<num_to_query; i++)
	{
		if ((sentence_num_violations(sent, i) > 0) &&
			!copts->display_bad)
		{
			continue;
		}

		linkage = linkage_create(i, sent, opts);

		/* Currently, sat solver sets the linkage violation indication
		 * only when it creates the linkage as a result of the above call. */
		if ((sentence_num_violations(sent, i) > 0) &&
			!copts->display_bad)
		{
			continue;
		}

		/* Currently, sat solver returns NULL when there ain't no more */
		if (!linkage)
		{
			if (verbosity > 0)
			{
				if (0 == i)
					fprintf(stdout, "No linkages found.\n");
				else
					fprintf(stdout, "No more linkages.\n");
			}
			break;
		}

		if (verbosity > 0)
		{
			if ((sentence_num_valid_linkages(sent) == 1) &&
				!copts->display_bad)
			{
				fprintf(stdout, "\tUnique linkage, ");
			}
			else if (copts->display_bad &&
			         (sentence_num_violations(sent, i) > 0))
			{
				fprintf(stdout, "\tLinkage %d (bad), ", num_displayed+1);
			}
			else
			{
				fprintf(stdout, "\tLinkage %d, ", num_displayed+1);
			}

			corpus_cost = linkage_corpus_cost(linkage);
			if (corpus_cost < 0.0f)
			{
				fprintf(stdout, "cost vector = (UNUSED=%d DIS=%5.2f LEN=%d)\n",
				       linkage_unused_word_cost(linkage),
				       linkage_disjunct_cost(linkage),
				       linkage_link_cost(linkage));
			}
			else
			{
				fprintf(stdout, "cost vector = (CORP=%6.4f UNUSED=%d DIS=%5.2f LEN=%d)\n",
				       corpus_cost,
				       linkage_unused_word_cost(linkage),
				       linkage_disjunct_cost(linkage),
				       linkage_link_cost(linkage));
			}
		}

		process_linkage(linkage, copts);
		linkage_delete(linkage);

		if (++num_displayed < num_to_display)
		{
			if (!auto_next_linkage)
			{
				if ((verbosity > 0) && (!copts->batch_mode) && isatty_stdin && isatty_stdout)
				{
					fprintf(stdout, "Press RETURN for the next linkage.\n");
				}
				char *rc = fget_input_string(stdin, stdout, /*check_return*/true);
				if ((NULL == rc) || (*rc != '\n')) return rc;
			}
		}
		else
		{
			break;
		}
	}
	return "x";
}
Example #17
0
static int process_some_linkages(Sentence sent, Command_Options* copts)
{
	int c;
	int i, num_to_query, num_to_display, num_displayed;
	Linkage linkage;
	double corpus_cost;
	Parse_Options opts = copts->popts;

	if (verbosity > 0) print_parse_statistics(sent, opts);
	num_to_query = sentence_num_linkages_post_processed(sent);
	if (!copts->display_bad)
	{
		num_to_display = MIN(sentence_num_valid_linkages(sent),
		                     DISPLAY_MAX);
	}
	else
	{
		num_to_display = MIN(num_to_query, DISPLAY_MAX);
	}

	for (i=0, num_displayed=0; i<num_to_query; i++)
	{
		if ((sentence_num_violations(sent, i) > 0) &&
			!copts->display_bad)
		{
			continue;
		}

		linkage = linkage_create(i, sent, opts);

		/* Currently, sat solver returns NULL when there ain't no more */
		if (!linkage) break;

		if (verbosity > 0)
		{
			if ((sentence_num_valid_linkages(sent) == 1) &&
				!copts->display_bad)
			{
				fprintf(stdout, "	Unique linkage, ");
			}
			else if (copts->display_bad &&
			         (sentence_num_violations(sent, i) > 0))
			{
				fprintf(stdout, "	Linkage %d (bad), ", num_displayed+1);
			}
			else
			{
				fprintf(stdout, "	Linkage %d, ", num_displayed+1);
			}

			corpus_cost = linkage_corpus_cost(linkage);
			if (corpus_cost < 0.0f)
			{
				fprintf(stdout, "cost vector = (UNUSED=%d DIS=%5.2f LEN=%d)\n",
				       linkage_unused_word_cost(linkage),
				       linkage_disjunct_cost(linkage),
				       linkage_link_cost(linkage));
			}
			else
			{
				fprintf(stdout, "cost vector = (CORP=%6.4f UNUSED=%d DIS=%5.2f LEN=%d)\n",
				       corpus_cost,
				       linkage_unused_word_cost(linkage),
				       linkage_disjunct_cost(linkage),
				       linkage_link_cost(linkage));
			}
		}

		process_linkage(linkage, copts);
		linkage_delete(linkage);

		if (++num_displayed < num_to_display)
		{
			if (!strstr(test, ",auto-next-linkage,"))
			{
				if (verbosity > 0)
				{
					fprintf(stdout, "Press RETURN for the next linkage.\n");
				}
				c = fget_input_char(stdin, stdout, copts);
				if (c != '\n') return c;
			}
		}
		else
		{
			break;
		}
	}
	return 'x';
}
Example #18
0
File: lg_py.c Project: bluemoon/nlp
/// This is the basic sentence dissection
static PyObject *sentence(PyObject *self, PyObject *args) {
    Dictionary    dict;
    Parse_Options opts;
    Sentence      sent;
    Linkage       linkage;
    Linkage       sub_linkage;
    char *        diagram;

    /// Link counts
    int   num_linkages;
    int   links;

    ///  Index's for the iterators
    int   link_idx;
    int   word_idx;
    int   num_words;
    long   span;
    long   sub_linkages;

    const char *text;
    const char *d_output;

    PyObject *output_list;
    PyObject *word_list;
    PyObject *word2_list;
    PyObject *span_list;
    PyObject *temp;
    PyObject *sublinkage_list;
    PyObject *_diagram;

    output_list = PyList_New(0);
    word_list   = PyList_New(0);
    word2_list  = PyList_New(0);
    sublinkage_list = PyList_New(0);

    span_list = PyList_New(0);

    if (!PyArg_ParseTuple(args, "s", &text))
        return NULL;

    opts = parse_options_create();
    parse_options_set_verbosity(opts, -1);
    parse_options_set_screen_width(opts, 50);

    setlocale(LC_ALL, "");
    dict = dictionary_create_default_lang();

    if (!dict) {
        PyErr_SetString(PyExc_RuntimeError, "Fatal error: Unable to open the dictionary");
        Py_INCREF(Py_None);
        return Py_None;
    }

    sent = sentence_create(text, dict);
    sentence_split(sent, opts);
    num_linkages = sentence_parse(sent, opts);

    if (num_linkages > 0) {
        linkage = linkage_create(0, sent, opts);
        /// Get the lengths of everything
        num_words = linkage_get_num_words(linkage);
        links = linkage_get_num_links(linkage);

        for(link_idx=0; link_idx < links; link_idx++) {
            PyObject *temp_subLen;

            diagram = linkage_print_diagram(linkage);
            _diagram = PyString_FromString(diagram);

            sub_linkage = linkage_create(link_idx, sent, opts);
            sub_linkages = linkage_get_num_sublinkages(linkage);

            temp_subLen = PyLong_FromLong(sub_linkages);
            linkage_delete(sub_linkage);
            PyList_Append(sublinkage_list, temp_subLen);

            span = linkage_get_link_length(linkage, link_idx);
            PyList_Append(span_list, PyInt_FromLong(span));

            PyObject *temp_list;
            temp_list = PyList_New(0);
            /// Sub Group these (left and right labels)
            const char *t1 = linkage_get_link_llabel(linkage, link_idx);
            temp = PyString_FromString(t1);
            PyList_Append(temp_list, temp);

            const char *t2 = linkage_get_link_rlabel(linkage, link_idx);
            temp = PyString_FromString(t2);
            PyList_Append(temp_list, temp);
            /// Then add to the main list
            PyList_Append(output_list, temp_list);


            /// Just the label
            const char *t3 = linkage_get_link_label(linkage, link_idx);
            temp = PyString_FromString(t3);
            PyList_Append(word2_list, temp);
        }

        for(word_idx=0; word_idx < num_words; word_idx++) {
            d_output = linkage_get_word(linkage, word_idx);
            PyObject *word;

            word = PyString_FromString(d_output);
            PyList_Append(word_list, word);
        }

        linkage_free_diagram(diagram);
        linkage_delete(linkage);

    } else {
        sentence_delete(sent);
        dictionary_delete(dict);
        parse_options_delete(opts);

        Py_INCREF(Py_None);
        return Py_None;
    }

    sentence_delete(sent);
    dictionary_delete(dict);
    parse_options_delete(opts);

    return Py_BuildValue("SSSSSS", word_list, span_list, output_list, word2_list, sublinkage_list, _diagram);
}
Example #19
0
void free_linkage_ptr(LinkagePtr *ptr) {
	linkage_delete(ptr->linkage);
	free(ptr);
}
Example #20
0
File: lg_py.c Project: bluemoon/nlp
static PyObject *domains(PyObject *self, PyObject *args) {
    Dictionary    dict;
    Parse_Options opts;
    Sentence      sent;
    Linkage       linkage;
    //CNode *       cn;

    /// Link counts
    int   num_linkages;
    int   links;
    int   i;
    int   j = 0;
    int num_domains;
    const char *text;

    PyObject *output_list;
    PyObject *temp;
    output_list = PyList_New(0);

    if (!PyArg_ParseTuple(args, "s", &text))
        return NULL;

    opts = parse_options_create();
    parse_options_set_verbosity(opts, -1);

    setlocale(LC_ALL, "");
    dict = dictionary_create_default_lang();

    if (!dict) {
        PyErr_SetString(PyExc_RuntimeError, "Fatal error: Unable to open the dictionary");
        Py_INCREF(Py_None);
        return Py_None;
    }

    sent = sentence_create(text, dict);
    sentence_split(sent, opts);
    num_linkages = sentence_parse(sent, opts);

    if (num_linkages > 0) {
        linkage = linkage_create(0, sent, opts);
        links = linkage_get_num_sublinkages(linkage);
        for(i=0; i<=links; i++) {
            num_domains = linkage_get_link_num_domains(linkage, i);
            const char **temp1 = linkage_get_link_domain_names(linkage, i);
            //for(j=0; j<=num_domains; j++){
            while(num_domains < j) {
                temp = PyString_FromString(temp1[j]);
                PyList_Append(output_list, temp);
                j++;
            }
            j = 0;
        }
        linkage_delete(linkage);
    } else {
        sentence_delete(sent);
        dictionary_delete(dict);
        parse_options_delete(opts);
        Py_INCREF(Py_None);
        return Py_None;
    }
    sentence_delete(sent);
    dictionary_delete(dict);
    parse_options_delete(opts);

    return Py_BuildValue("Si", output_list, num_domains);
}
Example #21
0
 SPOTriplets NLP::sentence2triplets ( const char* sentence )
 {
   // vector of triplets
   SPOTriplets triplets;

   #ifdef DEBUG
     std::cout << "The sentence: " << sentence << std::endl;
   #endif
   // creates a Sentence from the input char*
   Sentence sent = sentence_create ( sentence, dict_ );
   #ifdef DEBUG
     std::cout << "Sentence created" << std::endl;
   #endif
   // tokenizes the sentence
   sentence_split ( sent, parse_opts_ );
   #ifdef DEBUG
     std::cout << "Sentence splitted" << std::endl;
   #endif
   // searches for all possible linkages
   int num_linkages = sentence_parse ( sent, parse_opts_ );
   #ifdef DEBUG
     std::cout << "Sentence parsed" << std::endl;
     std::cout << "Number of linkages: " << num_linkages << std::endl;
   #endif

   // just one triplet
   SPOTriplet triplet;

   // if there is any linkage in the sentence
   if( num_linkages > 0 )
   {
     // create the linkage
     Linkage linkage = linkage_create ( 0, sent, parse_opts_ );

     #ifdef DEBUG
       // prints the sentence's diagram
       std::cout << "The diagram: " << std::endl;
       char *diagram = linkage_print_diagram(linkage, true, 800);
       std::cout << diagram << std::endl;
       linkage_free_diagram( diagram );
       // end print diagram
     #endif

     std::vector<std::string> labels;

     // 1. find the S_link
     // S* except there is an SJ* because then S* except Spx
     // two cases: there is SJ* and there is not SJ*

     // TODO: VJlp VJrp same as SJ but to predications
     // TODO: SFut SFst what the f**k?                                     ###FIXED###
     // TODO: His form was shining like the light not working              ###FIXED###
     // TODO: Car is mine not working                                      ###FIXED###
     // TODO: The little brown bear has eaten all of the honey not working ###FIXED###

     // REGEXES
     std::regex SJ_( "SJ.*" );
     std::regex VJ_( "VJ.*");
     std::regex subject( "(Ss.*)|(SFut)|(Sp\*.*)" );
     std::regex Spx( "Spx.*" );
     // TODO:fix theese initializer list not allowed                       ###FIXED###
     std::regex predicate( "(Pv.*)|(Pg.*)|(PP.*)|(I.*)|(TO)|(MVi.*)" );
     // TODO: make one from theese // (Sp.*)|(Ss.*)                        ###FIXED###
     std::regex noun_adject_object ( "(O.*)|(Os.*)|(Op.*)|(MVpn.*)|(Pa.*)|(MVa.*)" );
     std::regex preposition ( "(MVp.*)|(Pp.*)|(OF)|(TO)" );
     std::regex prep_object ( "(J.*)|(TI)|(I.*)|(ON)" );
     // TODO: problems with matching!! Pg*!!                               ###FIXED###
     // TODO: problems with matching!! Mvp.*!!                             ###FIXED###

     bool s_found = false;
     bool p_found = false;
     bool o_found = false;
     bool SJ = false;

     // search for SJ.s labels
     for( auto label: labels )
     {
       if( std::regex_match( label, SJ_ ) )
       {
         SJ = true;
         break;
       }
     }

     // multiple subject in the sentence
     if( SJ )
     {
       // SPls left -> first subject
       // SPrs right -> second subject
       // Spx right -> predicate
       // SJ-s are multiple subjects
       std::string temp;
       // go through every linkage
       for( int i = 0; i < linkage_get_num_links( linkage ); ++i )
       {
         // get their label
         std::string l = linkage_get_link_label( linkage, i );
         // if there is an SJl* label
         if( std::regex_match( l, std::regex( "SJl.*" ) ) )
         {
           // SJls left side
           triplet.s = linkage_get_word( linkage, linkage_get_link_lword( linkage, i ) );
           triplet.cut( triplet.s );
           temp = triplet.s + " ";
           // and word
           triplet.s = linkage_get_word( linkage, linkage_get_link_rword( linkage, i ) );
           triplet.cut( triplet.s );
           temp += triplet.s + " ";

           // find SJr*
           for( int j = 0; j < linkage_get_num_links( linkage ); ++j )
           {
             std::string m = linkage_get_link_label( linkage, j );
             if( std::regex_match( m, std::regex( "SJr.*" ) ) )
             {
               triplet.s = linkage_get_word( linkage, linkage_get_link_rword( linkage, j ) );
               triplet.cut();
               temp += triplet.s;
               triplet.s = temp;

               s_found = true;
               #ifdef DEBUG
                 std::cout << "Subject found: " << triplet.s << std::endl;
               #endif
               break;
             } // if
           } // for
           break;
         } // if
       } // for

       // now we have the subject

       // find Spx and its right side will be the starter predicate
       std::string current_word;
       for( int i = 0; i < linkage_get_num_links( linkage ); ++i )
       {
         std::string l = linkage_get_link_label( linkage, i );
         if( std::regex_match( l, std::regex( "Spx.*" ) ) )
         {
           triplet.p = linkage_get_word( linkage, linkage_get_link_rword( linkage, i ) );
           current_word = linkage_get_word( linkage, linkage_get_link_rword( linkage, i ) );
         }
       }
       // from now all the same as on the else branch !!!!

       bool predicate_match = false;

       // search for the linkage that has triplet.s as left!
       do
       {
         predicate_match = false;

         for( int i = 0; i < linkage_get_num_links( linkage ); ++i )
         {
           // every linkage's left word
           std::string word_i = linkage_get_word( linkage, linkage_get_link_lword( linkage, i ) );
           // every linkage's label
           std::string l = linkage_get_link_label( linkage, i );

           if( std::regex_match( l, predicate ) && word_i == current_word )
           {
             // found predicate
             triplet.p = linkage_get_word( linkage, linkage_get_link_rword( linkage, i ) );
             current_word = triplet.p;
             predicate_match = true;
             break;
           }
         }
       }
       while( predicate_match );

       // we now have the predicate too
       // TODO: multiple predicates!
       p_found = true;
       #ifdef DEBUG
         std::cout << "Predicate found: " << triplet.p << std::endl;
       #endif

       // ###COPY BEGIN###

       // search for noun object or adjective object
       for( int i = 0; i < linkage_get_num_links( linkage ); ++i )
       {
         // get every linkage label
         std::string l = linkage_get_link_label( linkage, i );
         // get the left word of every linkage
         std::string l_word = linkage_get_word( linkage, linkage_get_link_lword( linkage, i ) );
         // if thete is a label that match AND its left word is the predicate
         if( std::regex_match( l, noun_adject_object ) && triplet.p == l_word )
         {
           // then the object is that linkage's right word
           triplet.o = linkage_get_word( linkage, linkage_get_link_rword( linkage, i ) );
           triplet.cut( triplet.o );
           o_found = true;
           #ifdef DEBUG
             std::cout << "Adjective or noun object found: " << triplet.o << std::endl;
           #endif
         } // if
       } // for

       // still not found object, then search for preposition
       if( !o_found )
       {
         // go through every linkage
         for( int i = 0; i < linkage_get_num_links( linkage ); ++i )
         {
           // get the linkage's label
           std::string l = linkage_get_link_label( linkage, i );
           // and left word
           std::string word_i = linkage_get_word( linkage, linkage_get_link_lword( linkage, i ) );
           // if there is a linkage which is a preposition and its left word is the predicate
           if( std::regex_match( l, preposition ) && triplet.p == word_i )
           {
             // found preposition
             // search for prep_object
             // then the temp will contain the preposition label's right word
             std::string temp = linkage_get_word( linkage, linkage_get_link_rword( linkage, i ) );
             #ifdef DEBUG
               std::cout << "Preposition found! and its rigth word is: " << temp << std::endl;
             #endif

             for( int j = 0; j < linkage_get_num_links( linkage ); ++j )
             {
               // every linkages
               std::string m = linkage_get_link_label( linkage, j );
               // every left word
               std::string word_j = linkage_get_word( linkage, linkage_get_link_lword( linkage, j ) );

               // if there is a label with match and its left is exactly the preposition's right
               if( std::regex_match( m, prep_object ) && temp == word_j )
               {
                 triplet.o = linkage_get_word( linkage, linkage_get_link_lword( linkage, j ) );
                 triplet.cut(triplet.o);

                 triplet.o += " ";
                 // save o
                 std::string temp = triplet.o;

                 triplet.o = linkage_get_word( linkage, linkage_get_link_rword( linkage, j ) );
                 triplet.cut(triplet.o);
                 temp += triplet.o;

                 triplet.o = temp;
                 o_found = true;
                 #ifdef DEBUG
                   std::cout << "Object found: " << triplet.o << std::endl;
                 #endif
               } // if( std::regex_match( m, prep_object ) && temp == word_j ) END
             } // for J END
           } // if( std::regex_match( l, preposition ) && triplet.p == word_i ) END
         } // for I END
       } // if( !o_found ) END

       if( s_found && p_found && o_found )
       {
         // TODO: cut the words itself not the whole triplet
         // have to cut every word itself
         // triplet.cut();
         triplet.cut(triplet.s);
         triplet.cut(triplet.p);
         triplets.push_back( triplet );
         s_found = false;
         p_found = false;
         o_found = false;
       }
       // ###COPY END###
     }
     else // only one subject
     {
       // except Spx!!!
       // S left -> subject
       // S right -> predicate at first
       // if the word next to S right, is an element of Pv*, Pg* PP*, I*, TO, MVi*
       // then the new predicate will be that word

       std::string current_word;

       // search for subject (S_link)
       for( int i = 0; i < linkage_get_num_links( linkage ); ++i )
       {
         // get the linkage's label
         std::string l = linkage_get_link_label( linkage, i );

         if( std::regex_match( l, subject ) )
         {
           // subject found
           triplet.s = linkage_get_word( linkage, linkage_get_link_lword( linkage, i ) );
           s_found = true;
           current_word = linkage_get_word( linkage, linkage_get_link_rword( linkage, i ) );
           triplet.p = current_word;
           #ifdef DEBUG
             std::cout << "Subject found: " << triplet.s << std::endl;
           #endif
           break;
         }
       }

       if( s_found )
       {
         bool predicate_match = false;

         // search for the linkage that has triplet.s as left!
         do
         {
           predicate_match = false;

           for( int i = 0; i < linkage_get_num_links( linkage ); ++i )
           {
             // every linkage's left word
             std::string l_word = linkage_get_word( linkage, linkage_get_link_lword( linkage, i ) );
             // every linkage's label
             std::string l = linkage_get_link_label( linkage, i );

             if( std::regex_match( l, predicate ) && l_word == current_word )
             {
               // found predicate
               triplet.p = linkage_get_word( linkage, linkage_get_link_rword( linkage, i ) );
               current_word = triplet.p;
               predicate_match = true;
               break;
             }
           } // for END
         } while( predicate_match );

         p_found = true;
         #ifdef DEBUG
           std::cout << "Predicate found: " << triplet.p << std::endl;
         #endif
       } // if( s_found ) END

       // subject and predicate found
       // search for object

       // from k to linkage_get_num_links( linkage )
       // if there is any of the noun, adjective od preposition object then that
       // label's right will give the object.

       // !!! search only between labels that has triplet.p as left word !!!!!

       // search for noun object or adjective objects
       // go through all links
       for( int i = 0; i < linkage_get_num_links( linkage ); ++i )
       {
         // get every linkage label
         std::string l = linkage_get_link_label( linkage, i );
         // get the left word of every linkage
         std::string word_i = linkage_get_word( linkage, linkage_get_link_lword( linkage, i ) );
         // if thete is a label that match AND its left word is the predicate
         if( std::regex_match( l, noun_adject_object ) && triplet.p == word_i )
         {
           // then the object is that linkage's right word
           triplet.o = linkage_get_word( linkage, linkage_get_link_rword( linkage, i ) );
           o_found = true;
           triplet.cut(triplet.o);
           #ifdef DEBUG
             std::cout << "Adjective or noun object found: " << triplet.o << std::endl;
           #endif
         } // if END
       } // for END

       // still not found object, then search for preposition
       if( !o_found )
       {
         // go through every linkage
         for( int i = 0; i < linkage_get_num_links( linkage ); ++i )
         {
           // get the linkage's label
           std::string l = linkage_get_link_label( linkage, i );
           // and left word
           std::string word_i = linkage_get_word( linkage, linkage_get_link_lword( linkage, i ) );

           // if there is a linkage which is a preposition and its left word is the predicate
           if( std::regex_match( l, preposition ) && triplet.p == word_i )
           {
             // found preposition
             // search for prep_object
             // then the temp will contain the preposition label's right word
             std::string temp = linkage_get_word( linkage, linkage_get_link_rword( linkage, i ) );
             #ifdef DEBUG
               std::cout << "Preposition found! and its rigth word is: " << temp << std::endl;
             #endif

             // start search from there
             for( int j = 0; j < linkage_get_num_links( linkage ); ++j )
             {
               // every linkages
               std::string m = linkage_get_link_label( linkage, j );
               // every left word
               std::string word_j = linkage_get_word( linkage, linkage_get_link_lword( linkage, j ) );
               #ifdef DEBUG
                 if( std::regex_match( m, prep_object ) )
                     std::cout << m << " DOES match to (J.*)|(TI)|(I.*)|(ON)" << std::endl;
               #endif

               // if there is a label with match and its left is exactly the preposition's right
               if( std::regex_match( m, prep_object ) && temp == word_j )
               {
                 triplet.o = linkage_get_word( linkage, linkage_get_link_lword( linkage, j ) );
                 triplet.cut(triplet.o);

                 triplet.o += " ";
                 // save o
                 std::string temp = triplet.o;

                 triplet.o = linkage_get_word( linkage, linkage_get_link_rword( linkage, j ) );
                 triplet.cut(triplet.o);
                 temp += triplet.o;

                 triplet.o = temp;
                 #ifdef DEBUG
                   std::cout << "Object found: " << triplet.o << std::endl;
                 #endif
                 o_found = true;
               }
             } // for
           } // if
         } // for
       } // if( o_found ) END

       if( s_found && p_found && o_found )
       {
         // TODO: cut the words itself not the whole triplet ###FIXED###
         // have to cut every word itself
         // triplet.cut();

         triplet.cut(triplet.s);
         triplet.cut(triplet.p);
         triplets.push_back( triplet );
         s_found = false;
         p_found = false;
         o_found = false;
       }

     } // end else

     linkage_delete ( linkage );
   } // if( num_linkages > 0 ) END