Example #1
0
Morpheme * analyse_decomposed_word(Morpho_structures ms, Decomposed_word *decomposed_word){
	Sentence sent;
	Linkage linkage;
	Decomposed_word *node;
	Morpheme *morpheme_list=NULL;
	int num_linkages , i;
	int stem_pos;
	char * diagram;
//	ms->opts->unify_features = FALSE;
	for(node=decomposed_word; node!=NULL; node=node->next){
		sent=sentence_create(node->word, ms->dict);
		if(!sent){
			continue;
		}
		num_linkages = sentence_parse(sent, ms->opts);
		for (i=0; i<num_linkages; i++){
			linkage = linkage_create(i, sent, ms->opts);
			if (PRINT_DIAGRAM){
				linkage = linkage_create(i, sent, ms->opts);
				printf("%s\n", diagram = linkage_print_diagram(linkage));
				string_delete(diagram);
			}
			if ((stem_pos = find_stem(linkage))>=0){
				morpheme_list_add(ms, &morpheme_list,linkage->word[stem_pos], linkage->feature_array[0]);
			}
			linkage_delete(linkage);
		}
		sentence_delete(sent);
	}
	return morpheme_list;
}
Example #2
0
VALUE create_sentence(const VALUE self, volatile VALUE str, VALUE dict) {
	char *text = StringValuePtr(str);
	DictionaryPtr *dict_ptr = retrieve_dictionary(dict);
	Sentence sentence = sentence_create(text, dict_ptr->dict);
	SentencePtr *sent_ptr = ALLOC(SentencePtr);
	sent_ptr->sentence = NULL;
	sent_ptr->sentence = sentence;
	return Data_Wrap_Struct(self, 0, free, sent_ptr);
}
std::map<int, EdgeDescription> LinkParserAdapterImpl::parseSentence(std::string sentenceStr) {
    Parse_Options parseOptions = parse_options_create();
    int verbosity = psi_logger.getLoggingPriority() / 100 - 4;
    parse_options_set_verbosity(parseOptions, verbosity);
    starts_.clear();
    ends_.clear();
    edgeDescriptions_.clear();
    freeSentence();
    sentence_ = sentence_create(sentenceStr.c_str(), dictionary_);
    if (!sentence_) {
        std::stringstream errorSs;
        errorSs << "Link-parser failed to tokenize the input text.";
        throw ParserException(errorSs.str());
    }
    boost::algorithm::to_lower(sentenceStr);
    if (sentence_parse(sentence_, parseOptions)) {

        size_t currentPos = 0;
        size_t foundPos = 0;
        int wordNo = 0;
        while (wordNo < sentence_length(sentence_)) {
            std::string word(sentence_get_word(sentence_, wordNo));
            boost::algorithm::to_lower(word);
            foundPos = sentenceStr.find(word, currentPos);
            if (foundPos != std::string::npos) {
                starts_[wordNo] = foundPos;
                ends_[wordNo] = currentPos = foundPos + word.length();
            }
            ++wordNo;
        }

        Linkage linkage = linkage_create(0, sentence_, parseOptions);
        CNode * ctree = linkage_constituent_tree(linkage);
        extractEdgeDescriptions(ctree, linkage);
        linkage_free_constituent_tree(ctree);
        linkage_delete(linkage);

    } else {
        std::stringstream errorSs;
        errorSs << "Link-parser failed to parse the input text.\n"
            << "Your input text is probably not a correct sentence.";
        WARN(errorSs.str());
    }
    return edgeDescriptions_;
}
Example #4
0
/*
 *  call-seq:
 *     LinkParser::Sentence.new( str, dict )   -> sentence
 *
 *  Create a new LinkParser::Sentence object from the given input string
 #  using the specified LinkParser::Dictionary.
 *
 *     dict = LinkParser::Dictionary.new
 *     LinkParser::Sentence.new( "The boy runs", dict )  #=> #<LinkParser::Sentence:0x5481ac>
 */
static VALUE
rlink_sentence_init( VALUE self, VALUE input_string, VALUE dictionary ) {
	if ( !check_sentence(self) ) {
		struct rlink_sentence *ptr;
		Sentence sent;
		struct rlink_dictionary *dictptr = rlink_get_dict( dictionary );

		if ( !(sent = sentence_create( StringValueCStr(input_string), dictptr->dict )) )
			rlink_raise_lp_error();

		DATA_PTR( self ) = ptr = rlink_sentence_alloc();

		ptr->sentence = sent;
		ptr->dictionary = dictionary;
		ptr->options = Qnil;

	} else {
		rb_raise( rb_eRuntimeError,
				  "Cannot re-initialize a sentence once it's been created." );
	}

	return self;
}
Example #5
0
int main(int argc, char * argv[])
{
	FILE            *input_fh = stdin;
	Dictionary      dict;
	const char     *language="en";  /* default to english, and not locale */
	int             num_linkages, i;
	Label           label = NO_LABEL;
	const char      *codeset;
	const char      *locale = NULL;
	Command_Options *copts;
	Parse_Options   opts;
	bool batch_in_progress = false;

#if LATER
	/* Try to catch the SIGWINCH ... except this is not working. */
	struct sigaction winch_act;
	winch_act.sa_handler = winch_handler;
	winch_act.sa_sigaction = NULL;
	sigemptyset (&winch_act.sa_mask);
	winch_act.sa_flags = 0;
	sigaction (SIGWINCH, &winch_act, NULL);
#endif

	i = 1;
	if ((argc > 1) && (argv[1][0] != '-')) {
		/* the dictionary is the first argument if it doesn't begin with "-" */
		language = argv[1];
		i++;
	}

#if !defined(_MSC_VER) && !defined(__MINGW32__)
	/* Get the locale from the environment...
	 * Perhaps we should someday get it from the dictionary ??
	 */
	locale = setlocale(LC_ALL, "");

	/* Check to make sure the current locale is UTF8; if its not,
	 * then force-set this to the english utf8 locale
	 */
	codeset = nl_langinfo(CODESET);
	if (!strstr(codeset, "UTF") && !strstr(codeset, "utf"))
	{
		fprintf(stderr,
		    "%s: Warning: locale %s was not UTF-8; force-setting to en_US.UTF-8\n",
		     argv[0], codeset);
		locale = setlocale(LC_CTYPE, "en_US.UTF-8");
	}
#else
 #pragma message("WARNING: Windows console (cmd.exe) does not support unicode input!\nWill attempt to convert from the native encoding!");
	fprintf(stderr,
	    "%s: Warning: Windows console (cmd.exe) does not support unicode\n"
	    "input!  Will attempt to convert from the native encoding!", argv[0]);
#endif

	for (; i<argc; i++)
	{
		if (argv[i][0] == '-' && strcmp("--version", argv[i]) == 0)
		{
			printf("Version: %s\n", linkgrammar_get_version());
			exit(0);
		}
	}

	copts = command_options_create();
	opts = copts->popts;
	if (copts == NULL || opts == NULL || copts->panic_opts == NULL)
	{
		fprintf(stderr, "%s: Fatal error: unable to create parse options\n", argv[0]);
		exit(-1);
	}

	if (language && *language)
		dict = dictionary_create_lang(language);
	else
		dict = dictionary_create_default_lang();

	if (dict == NULL)
	{
		fprintf(stderr, "%s: Fatal error: Unable to open dictionary.\n", argv[0]);
		exit(-1);
	}

	setup_panic_parse_options(copts->panic_opts);
	copts->panic_mode = true;

	parse_options_set_max_parse_time(opts, 30);
	parse_options_set_linkage_limit(opts, 1000);
	parse_options_set_min_null_count(opts, 0);
	parse_options_set_max_null_count(opts, 0);
	parse_options_set_short_length(opts, 16);

	/* The English and Russian dicts use a cost of 2.7, which allows
	 * regexes with a fractional cost of less than 1 to be used with
	 * rules that have a cost of 2.0.
	 */
	parse_options_set_disjunct_cost(opts, 2.7);

	/* Process the command line commands */
	for (i = 1; i<argc; i++)
	{
		if (argv[i][0] == '-')
		{
			int rc;
			if (argv[i][1] == '!' || argv[i][1] == '-')
				rc = issue_special_command(argv[i]+2, copts, dict);
			else
				rc = issue_special_command(argv[i]+1, copts, dict);

			if (rc)
				print_usage(argv[0]);
		}
	}

	check_winsize(copts);

#if !defined(_MSC_VER) && !defined(__MINGW32__)
	prt_error("Info: Using locale %s.", locale);
#endif
	prt_error("Info: Dictionary version %s.",
		linkgrammar_get_dict_version(dict));
	prt_error("Info: Library version %s. Enter \"!help\" for help.",
		linkgrammar_get_version());

	/* Main input loop */
	while (1)
	{
		char *input_string;
		Sentence sent = NULL;

		verbosity = parse_options_get_verbosity(opts);
		debug = parse_options_get_debug(opts);
		test = parse_options_get_test(opts);

		input_string = fget_input_string(input_fh, stdout, copts);
		check_winsize(copts);

		if (NULL == input_string)
		{
			if (input_fh == stdin) break;
			fclose (input_fh);
			input_fh = stdin;
			continue;
		}

		if ((strcmp(input_string, "!quit") == 0) ||
		    (strcmp(input_string, "!exit") == 0)) break;

		/* We have to handle the !file command inline; its too hairy
		 * otherwise ... */
		if (strncmp(input_string, "!file", 5) == 0)
		{
			char * filename = &input_string[6];
			input_fh = fopen(filename, "r");
			if (NULL == input_fh)
			{
				int perr = errno;
				fprintf(stderr, "Error: %s (%d) %s\n",
				        filename, perr, strerror(perr));
				input_fh = stdin;
				continue;
			}
			continue;
		}

		/* If the input string is just whitespace, then ignore it. */
		if (strspn(input_string, " \t\v") == strlen(input_string)) continue;

		if (special_command(input_string, copts, dict)) continue;

		if (!copts->batch_mode) batch_in_progress = false;
		if ('\0' != test[0])
		{
			/* In batch mode warn only once.
			 * In auto-next-linkage mode don't warn at all. */
			if (!batch_in_progress && (NULL == strstr(test, ",auto-next-linkage,")))
			{
				fflush(stdout);
				/* Remind the developer this is a test mode. */
				fprintf(stderr, "Warning: Tests enabled: %s\n", test);
				if (copts->batch_mode) batch_in_progress = true;
			}
		}
		
		if (copts->echo_on)
		{
			printf("%s\n", input_string);
		}

		if (copts->batch_mode)
		{
			label = strip_off_label(input_string);
		}

#ifdef USE_VITERBI
		/* Compile-time optional, for now, since it don't work yet. */
		if (parse_options_get_use_viterbi(opts))
		{
			viterbi_parse(input_string, dict);
		}
		else
#endif
		{
			sent = sentence_create(input_string, dict);

			/* First parse with cost 0 or 1 and no null links */
			// parse_options_set_disjunct_cost(opts, 2.7);
			parse_options_set_min_null_count(opts, 0);
			parse_options_set_max_null_count(opts, 0);
			parse_options_reset_resources(opts);

			num_linkages = sentence_parse(sent, opts);

			/* num_linkages is negative only on a hard-error;
			 * typically, due to a zero-length sentence.  */
			if (num_linkages < 0)
			{
				sentence_delete(sent);
				sent = NULL;
				continue;
			}
#if 0
			/* Try again, this time omitting the requirement for
			 * definite articles, etc. This should allow for the parsing
			 * of newspaper headlines and other clipped speech.
			 *
			 * XXX Unfortunately, this also allows for the parsing of
			 * all sorts of ungrammatical sentences which should not
			 * parse, and leads to bad parses of many other unparsable
			 * but otherwise grammatical sentences.  Thus, this trick
			 * pretty much fails; we leave it here to document the
			 * experiment.
			 */
			if (num_linkages == 0)
			{
				parse_options_set_disjunct_cost(opts, 4.5);
				num_linkages = sentence_parse(sent, opts);
				if (num_linkages < 0) continue;
			}
#endif

			/* Try using a larger list of disjuncts */
			/* XXX fixme: the lg_expand_disjunct_list() routine is not
			 * currently a part of the public API; it should be made so,
			 * or this expansion idea should be abandoned... not sure which.
			 */
			if ((num_linkages == 0) && parse_options_get_use_cluster_disjuncts(opts))
			{
				int expanded;
				if (verbosity > 0) fprintf(stdout, "No standard linkages, expanding disjunct set.\n");
				parse_options_set_disjunct_cost(opts, 3.9);
				expanded = lg_expand_disjunct_list(sent);
				if (expanded)
				{
					num_linkages = sentence_parse(sent, opts);
				}
				if (0 < num_linkages) printf("Got One !!!!!!!!!!!!!!!!!\n");
			}

			/* If asked to show bad linkages, then show them. */
			if ((num_linkages == 0) && (!copts->batch_mode))
			{
				if (copts->display_bad)
				{
					num_linkages = sentence_num_linkages_found(sent);
				}
			}

			/* Now parse with null links */
			if (num_linkages == 0 && !copts->batch_mode)
			{
				if (verbosity > 0) fprintf(stdout, "No complete linkages found.\n");

				if (copts->allow_null)
				{
					/* XXX should use expanded disjunct list here too */
					parse_options_set_min_null_count(opts, 1);
					parse_options_set_max_null_count(opts, sentence_length(sent));
					num_linkages = sentence_parse(sent, opts);
				}
			}

			if (verbosity > 0)
			{
				if (parse_options_timer_expired(opts))
					fprintf(stdout, "Timer is expired!\n");

				if (parse_options_memory_exhausted(opts))
					fprintf(stdout, "Memory is exhausted!\n");
			}

			if ((num_linkages == 0) &&
				copts->panic_mode &&
				parse_options_resources_exhausted(opts))
			{
				/* print_total_time(opts); */
				batch_errors++;
				if (verbosity > 0) fprintf(stdout, "Entering \"panic\" mode...\n");
				parse_options_reset_resources(copts->panic_opts);
				parse_options_set_verbosity(copts->panic_opts, verbosity);
				num_linkages = sentence_parse(sent, copts->panic_opts);
				if (verbosity > 0)
				{
					if (parse_options_timer_expired(copts->panic_opts))
						fprintf(stdout, "Panic timer is expired!\n");
				}
			}

			/* print_total_time(opts); */

			if (copts->batch_mode)
			{
				batch_process_some_linkages(label, sent, copts);
			}
			else
			{
				int c = process_some_linkages(sent, copts);
				if (c == EOF)
				{
					sentence_delete(sent);
					sent = NULL;
					break;
				}
			}
			fflush(stdout);

			sentence_delete(sent);
			sent = NULL;
		}
	}

	if (copts->batch_mode)
	{
		/* print_time(opts, "Total"); */
		fprintf(stderr,
				"%d error%s.\n", batch_errors, (batch_errors==1) ? "" : "s");
	}

	/* Free stuff, so that mem-leak detectors don't commplain. */
	command_options_delete(copts);
	dictionary_delete(dict);
	fget_input_string(NULL, NULL, NULL);

	printf ("Bye.\n");
	return 0;
}
Example #6
0
int main(int argc, char * argv[])
{
	FILE            *input_fh = stdin;
	Dictionary      dict;
	Sentence        sent;
	const char     *language="en";  /* default to english, and not locale */
	int             pp_on=TRUE;
	int             af_on=TRUE;
	int             cons_on=TRUE;
	int             num_linkages, i;
	char            *input_string;
	Label           label = NO_LABEL;
	const char      *codeset;
       /*############################################################################################*/
        int             num=0;
        FILE            *fp2,*fp_word_info,*fp_num,*fp_lname,*fp_rel,*fp_lcount,*fp_word_cat,*fp_word;
       /*##############################################################################################*/

	i = 1;
        /*############################################################################################*/
        if (argc > 4){
                 num =atoi(argv[4]);
                 num=num-1;
///             language = argv[1];
       /*####################################################################################*/
//
//	if ((argc > 1) && (argv[1][0] != '-')) {
		/* the dictionary is the first argument if it doesn't begin with "-" */
//		language = argv[1];
		i++;
	}
       /*################################*/
       language = argv[1];
       /*################################*/
 
	/* Get the locale from the environment... 
	 * perhaps we should someday get it from the dictionary ??
	 */
	setlocale(LC_ALL, "");

	/* Check to make sure the current locale is UTF8; if its not,
	 * then force-set this to the english utf8 locale 
	 */
	codeset = nl_langinfo(CODESET);
	if (!strstr(codeset, "UTF") && !strstr(codeset, "utf"))
	{
		setlocale(LC_CTYPE, "en_US.UTF-8");
	}
 /*########################################*/
	for (; i<argc-3; i++) {
 /*########################################*/
		if (argv[i][0] == '-') {
			if (strcmp("--version", argv[i])==0) {
				printf("Version: %s\n", linkgrammar_get_version());
				exit(0);
			} else if (strcmp("-ppoff", argv[i])==0) {
				pp_on = FALSE;
			} else if (strcmp("-coff", argv[i])==0) {
				cons_on = FALSE;
			} else if (strcmp("-aoff", argv[i])==0) {
				af_on = FALSE;
			} else if (strcmp("-batch", argv[i])==0) {
			} else if (strncmp("-!", argv[i],2)==0) {
			} else {
				print_usage(argv[0]);
			}
		} else {
			print_usage(argv[0]);
		}
	}

	opts = parse_options_create();
	if (opts == NULL) {
		fprintf(stderr, "%s: Fatal error: unable to create parse options\n", argv[0]);
		exit(-1);
	}

	panic_parse_opts = parse_options_create();
	if (panic_parse_opts == NULL) {
		fprintf(stderr, "%s: Fatal error: unable to create panic parse options\n", argv[0]);
		exit(-1);
	}
	setup_panic_parse_options(panic_parse_opts);
	parse_options_set_max_sentence_length(opts, 170);
	parse_options_set_panic_mode(opts, TRUE);
	parse_options_set_max_parse_time(opts, 30);
	parse_options_set_linkage_limit(opts, 1000);
	parse_options_set_short_length(opts, 10);
       /*##########################################*/
//        parse_options_set_display_on(opts, TRUE);
       /*##########################################*/

	if(language && *language)
		dict = dictionary_create_lang(language);
	else
		dict = dictionary_create_default_lang();

	if (dict == NULL) {
               /*###########################################################################*/
		fprintf(stderr, "%s: Fatal error: Unable to open  dictionary.\n", argv[1]);
                /*##########################################################################*/
		exit(-1);
	}

	/* process the command line like commands */
	for (i=1; i<argc; i++) {
		if ((strcmp("-pp", argv[i])==0) ||
			(strcmp("-c", argv[i])==0) ||
			(strcmp("-a", argv[i])==0))
		{
			i++;
		}
		else if ((argv[i][0] == '-') && (strcmp("-ppoff", argv[i])!=0) &&
		         (argv[i][0] == '-') && (strcmp("-coff", argv[i])!=0) &&
		         (argv[i][0] == '-') && (strcmp("-aoff", argv[i])!=0))
		{
			if (argv[i][1] == '!')
				issue_special_command(argv[i]+2, opts, dict);
			else
				issue_special_command(argv[i]+1, opts, dict);
		}
	}

	verbosity = parse_options_get_verbosity(opts);

	/* Main input loop */
	while (1)
	{
		input_string = fget_input_string(input_fh, stdout, opts);

		if (NULL == input_string)
		{
			if (input_fh == stdin) break;
			fclose (input_fh);
			input_fh = stdin;
			continue;
		}

		if ((strcmp(input_string, "quit\n")==0) ||
			(strcmp(input_string, "exit\n")==0)) break;

		/* We have to handle the !file command inline; its too hairy
		 * otherwise ... */
		if (strncmp(input_string, "!file", 5) == 0)
		{
			char * filename = &input_string[6];
			input_fh = fopen(filename, "r");
			if (NULL == input_fh)
			{
				int perr = errno;
				fprintf(stderr, "Error: %s (%d) %s\n",
				        filename, perr, strerror(perr));
				input_fh = stdin;
				continue;
			}
			continue;
		}

		if (special_command(input_string, dict)) continue;
		if (parse_options_get_echo_on(opts)) {
			printf("%s", input_string);
		}

		if (parse_options_get_batch_mode(opts)) {
			label = strip_off_label(input_string);
		}

		sent = sentence_create(input_string, dict);

		if (sent == NULL) continue;

		if (sentence_length(sent) > parse_options_get_max_sentence_length(opts)) {
			if (verbosity > 0) {
				fprintf(stdout,
				       "Sentence length (%d words) exceeds maximum allowable (%d words)\n",
					sentence_length(sent), parse_options_get_max_sentence_length(opts));
			}
			sentence_delete(sent);
			continue;
		}

		/* First parse with cost 0 or 1 and no null links */
		parse_options_set_disjunct_cost(opts, 2);
		parse_options_set_min_null_count(opts, 0);
		parse_options_set_max_null_count(opts, 0);
		parse_options_reset_resources(opts);

		num_linkages = sentence_parse(sent, opts);

              //############################################################################################
               if(num+1>num_linkages && num_linkages != 0)
               {
                 sprintf(link_info_filename,"%s/%s_tmp/linkid_cat.txt",argv[2],argv[3]);
                 fp2 = fopen(link_info_filename,"a");
                 if(fp2==NULL) {printf("Could not open %s for writing\n",link_info_filename);exit(1);}
                 fprintf(fp2,";~~~~~~~~~~\n");
                 fclose(fp2);
                 sprintf(link_info_filename,"%s/%s_tmp/linkid_word.txt",argv[2],argv[3]);
                 fp_word= fopen(link_info_filename, "a");
        	 if(fp_word==NULL) {printf("Could not open %s for writing\n",link_info_filename);exit(1);}
                 fprintf(fp_word,";~~~~~~~~~~\n");
                 fclose(fp_word);                 

	         sprintf(link_info_filename,"%s/%s_tmp/link_numeric_word.txt",argv[2],argv[3]);
        	 fp_word_info= fopen(link_info_filename, "a");
	         if(fp_word_info==NULL) {printf("Could not open %s for writing\n",link_info_filename);exit(1);}
                 fprintf(fp_word_info,";~~~~~~~~~~\n");
                 fclose(fp_word_info);

	         sprintf(link_info_filename,"%s/%s_tmp/link_name_expand.txt",argv[2],argv[3]);
        	 fp_lname =fopen(link_info_filename,"a");
	         if(fp_lname==NULL) {printf("Could not open %s for writing\n",link_info_filename);exit(1);}
                 fprintf(fp_lname,";~~~~~~~~~~\n");
                 fclose(fp_lname);

        	 sprintf(link_info_filename,"%s/%s_tmp/link_relation_info.txt",argv[2],argv[3]);
	         fp_rel = fopen(link_info_filename,"a");
        	 if(fp_rel==NULL) {printf("Could not open %s for writing\n",link_info_filename);exit(1);}
                 fprintf(fp_rel,";~~~~~~~~~~\n");
                 fclose(fp_rel);

                 
                /* sprintf(link_info_filename,"%s/%s_tmp/constituents.txt",argv[2],argv[3]);
                 fp = fopen(link_info_filename,"a");
                 if(fp==NULL) {printf("Could not open %s for writing\n",link_info_filename);exit(1);}
                 fprintf(fp,"\n");
                 fprintf(fp,";~~~~~~~~~~\n");
                 fclose(fp);*/
                }
                 sprintf(link_info_filename,"%s/%s_tmp/linkage_count.txt",argv[2],argv[3]);
                 fp_lcount = fopen(link_info_filename,"a");
                 if(fp_lcount==NULL) {printf("Could not open %s for writing\n",link_info_filename);exit(1);}
                 fprintf(fp_lcount, "(Found maximum of %d linkages )\n",num_linkages);
                 fprintf(fp_lcount,";~~~~~~~~~~\n");
                 fclose(fp_lcount);
                //############################################################################################
        
		if (num_linkages < 0) continue;
		/* Now parse with null links */
		if ((num_linkages == 0) && (!parse_options_get_batch_mode(opts))) {
			if (verbosity > 0) //fprintf(stdout, "No complete linkages found.\n");
                         
        //############################################################################################
                     /*  This part of the code written by Maha Laxmi and Shirisha Manju
                     *   if no complete linkage is found then redirect the output to standard output in clips format*/
         sprintf(link_info_filename,"%s/%s_tmp/number.txt",argv[2],argv[3]);
         fp_num =fopen(link_info_filename,"a");
         if(fp_num==NULL) {printf("Could not open %s for writing\n",link_info_filename);exit(1);}

/*         sprintf(link_info_filename,"%s/%s_tmp/constituents.txt",argv[2],argv[3]);
         fp_cons= fopen(link_info_filename,"a");
         if(fp_cons==NULL) {printf("Could not open %s for writing\n",link_info_filename);exit(1);}*/

         sprintf(link_info_filename,"%s/%s_tmp/linkid_word.txt",argv[2],argv[3]);
         fp_word= fopen(link_info_filename, "a");
         if(fp_word==NULL) {printf("Could not open %s for writing\n",link_info_filename);exit(1);}

         sprintf(link_info_filename,"%s/%s_tmp/link_numeric_word.txt",argv[2],argv[3]);
         fp_word_info= fopen(link_info_filename, "a");
         if(fp_word_info==NULL) {printf("Could not open %s for writing\n",link_info_filename);exit(1);}

         sprintf(link_info_filename,"%s/%s_tmp/linkid_cat.txt",argv[2],argv[3]);
         fp_word_cat= fopen(link_info_filename, "a");
         if(fp_word_cat==NULL) {printf("Could not open %s for writing\n",link_info_filename);exit(1);}

         sprintf(link_info_filename,"%s/%s_tmp/link_name_expand.txt",argv[2],argv[3]);
         fp_lname =fopen(link_info_filename,"a");
         if(fp_lname==NULL) {printf("Could not open %s for writing\n",link_info_filename);exit(1);}

         sprintf(link_info_filename,"%s/%s_tmp/link_relation_info.txt",argv[2],argv[3]);
         fp_rel = fopen(link_info_filename,"a");
         if(fp_rel==NULL) {printf("Could not open %s for writing\n",link_info_filename);exit(1);}

         fprintf(fp_word, "\n(No complete linkages found)\n");
         fprintf(fp_word_cat, "\n(No complete linkages found)\n");
         fprintf(fp_word_info, "\n(No complete linkages found)\n");
         fprintf(fp_lname, "\n(No complete linkages found)\n");
         fprintf(fp_rel,"\n(No complete linkages found)\n");
        
         fprintf(fp_word_cat,";~~~~~~~~~~\n");  //  fprintf(fp_cons,";~~~~~~~~~~\n");
         fprintf(fp_lname,";~~~~~~~~~~\n");       fprintf(fp_rel,";~~~~~~~~~~\n");
         fprintf(fp_word_info,";~~~~~~~~~~\n");   fprintf(fp_word,";~~~~~~~~~~\n");

        /* fclose(fp_cons);  */      fclose(fp_word_cat);  fclose(fp_word_info);
         fclose(fp_lname);       fclose(fp_rel);       fclose(fp_word);

           continue;
  /*###############################################################################################################*/               

			if (parse_options_get_allow_null(opts)) {
				parse_options_set_min_null_count(opts, 1);
				parse_options_set_max_null_count(opts, sentence_length(sent));
				num_linkages = sentence_parse(sent, opts);
			}
		}

		if (parse_options_timer_expired(opts)) {
			if (verbosity > 0) fprintf(stdout, "Timer is expired!\n");
		}
		if (parse_options_memory_exhausted(opts)) {
			if (verbosity > 0) fprintf(stdout, "Memory is exhausted!\n");
		}

		if ((num_linkages == 0) &&
			parse_options_resources_exhausted(opts) &&
			parse_options_get_panic_mode(opts)) {
			/* print_total_time(opts); */
			if (verbosity > 0) fprintf(stdout, "Entering \"panic\" mode...\n");
			parse_options_reset_resources(panic_parse_opts);
			parse_options_set_verbosity(panic_parse_opts, verbosity);
			num_linkages = sentence_parse(sent, panic_parse_opts);
			if (parse_options_timer_expired(panic_parse_opts)) {
				if (verbosity > 0) fprintf(stdout, "Timer is expired!\n");
			}
		}

		/* print_total_time(opts); */

		if (parse_options_get_batch_mode(opts)) {
                        //#######################################################################
                            batch_process_some_linkages(label, sent, opts,argv[2],argv[3]);
                        //#########################################################################
   //			batch_process_some_linkages(label, sent, opts);
		}
               /*###############################################################################*/
               //   send num as an argument to process_some_linkages
                else {
                        int c = process_some_linkages(sent, opts,num,argv[2],argv[3]);
                        if (c == EOF) break;
                      }
              /*################################################################################*/

/*		else {
			int c = process_some_linkages(sent, opts);
			if (c == EOF) break;
		}*/

		sentence_delete(sent);
	}

	if (parse_options_get_batch_mode(opts)) {
		/* print_time(opts, "Total"); */
/*		fprintf(stderr,
				"%d error%s.\n", batch_errors, (batch_errors==1) ? "" : "s");*/
	}

	parse_options_delete(panic_parse_opts);
	parse_options_delete(opts);
	dictionary_delete(dict);

//	printf ("Bye.\n");
	return 0;
}
Example #7
0
/*******************************************************************************************
*   This functions translats a list of sentences in the input_string array,
*   terminates when it finds "end" in the array
*   Inputs: input_string: Array of sentences
*           all_linkages: if true, translate all possible linkages of the sentence
*           out_to_file : if true, outputs the results to seperate files in the out directory,
*                       the out files names would be something like :
                        src-x-y: Contains a linkage for the source sentence, x is the index
                                 of sentence in the array and y is the index of linkage
                        trg-x-y-z: Contains a linklage for the target sentence, x is the index
                                   of sentence in the array, y is the index of linkage and z
                                   is the zth translation of that linkage x-y(a linkage in the
                                   source language may have more than one correspondent target
*                                  linkage)
*******************************************************************************************/
void translate(char  input_string[][200], int all_linkages, int out_to_file){
    Dictionary    dict;
    Parse_Options opts;
    Sentence      sent;
    Linkage       src_linkage, trg_linkage;
	Transfer trans;
    char *        diagram;
	FILE *		  fp;
    int           i,  j, num_src_linkages, num_trg_linkages;
    //char        filename[30], txfilename[30], filenum[4];
    char          txfilename[30];
    char          output_string[200];
	int n;

	opts  = parse_options_create();
	parse_options_set_verbosity (opts, FALSE);
	parse_options_set_display_walls(opts, TRUE);
	parse_options_set_display_postscript(opts, TRUE);

    dict  = dictionary_create("4.0.dict", "4.0.knowledge", NULL, NULL, "morphology/morphemes.dict");
	if (!dict){
		fprintf(stderr, "%s\n", lperrmsg);
	    parse_options_delete(opts);
		printf("size : %lld", space_in_use);
		exit(0);
	}
	trans=transfer_create("translation/mapfile.txt", "translation/links_list.txt","translation/lexicon.txt", "target/trg.dict" );
	if (!trans){
		fprintf(stderr, "%s\n", maperrmsg);
		fprintf(stderr, "%s\n", lperrmsg);
		printf("size : %lld", space_in_use);
		exit(0);
	}

	for (n=0; strcmp (input_string[n],"end")!=0; n++ ){

		sent = sentence_create(input_string[n], dict);
		if (!sent){
			fprintf(stderr, "%s\n", lperrmsg);
			parse_options_delete(opts);
			printf("size : %lld", space_in_use);
			exit(0);
		}
		printf ("\n************************************\n%d-%s\n************************************\n", n+1, input_string[n]);
		num_src_linkages = sentence_parse(sent, opts);

		num_src_linkages = (all_linkages) ? num_src_linkages: (num_src_linkages!=0)*1;

		for (i=0; i<num_src_linkages;i++) {
			src_linkage = linkage_create(i, sent, opts);
			diagram = linkage_print_diagram(src_linkage);

			printf ("\nLinkage No. %d-%d\n\n", n+1, i+1);
			printf("%s\n", diagram);
			if (out_to_file){
				sprintf(txfilename,"out/src-%d-%d.txt", n+1, i+1);
				fp=fopen(txfilename,"w+");
				if (fp==NULL){
                    fprintf(stderr, "%s%s\n", "Unable to open ", txfilename);
                    printf("size : %lld", space_in_use);
                    exit(0);
				}
				fprintf(fp,"%s",diagram);
				fclose(fp);
			}
			string_delete(diagram);

			num_trg_linkages = transfer_linkage_driver(trans, src_linkage);
			if(num_trg_linkages==0){
				fprintf(stderr, "%s\n", maperrmsg);
			}
			else{
				parse_options_set_display_walls (trans->opts, TRUE);
				for (j=0; j<num_trg_linkages; j++){
					//second parameter should always be 0 !?
					trg_linkage=trans_linkage_create(trans, 0, trans->sent[j], trans->opts);
					diagram = linkage_print_diagram(trg_linkage);

					printf ("\nTranslation No. %d_%d_%d\n\n", n+1,i+1,j+1);
                    printf("Translation: %s\n", output_string);
					printf("%s\n", diagram);
					if (out_to_file){
                        sprintf(txfilename,"out/trg-%d-%d-%d.txt", n+1, i+1, j+1);
						fp=fopen(txfilename,"w+");
                        if (fp==NULL){
                            fprintf(stderr, "%s%s\n", "Unable to open ", txfilename);
                            printf("size : %lld", space_in_use);
                            exit(0);
                        }
                        extract_sent (trg_linkage, output_string);
                        fprintf(fp, "Translation:%s\n", output_string);
						fprintf(fp,"%s",diagram);
						fclose(fp);
					}
					string_delete(diagram);
					linkage_delete(trg_linkage);
				}
			}
			linkage_delete(src_linkage);
		}
		fprintf(stderr, "%s\n", lperrmsg);
		sentence_delete(sent);

	}


	transfer_delete(trans);
    dictionary_delete(dict);
    parse_options_delete(opts);

}
Example #8
0
/*******************************************************************************************
*   This functions parses a list of sentences and outputs the results to seperate files in 
*   the out directory,
*   The files are indexed as out_x_y, where x is the index of the sentence in the array and y
*   is the index of the linkage
*******************************************************************************************/
void normal_parse(char input_string[][200], int unify_features, int all_linkages, int out_to_file){

    Dictionary    dict;
    Parse_Options opts;
	int n;
	Sentence      sent;
    Linkage       linkage;
    char *        diagram;
    int           i, num_linkages;
    char          txfilename[30];
	FILE *		  fp;


	opts  = parse_options_create();
	parse_options_set_display_walls(opts, TRUE);
	parse_options_set_display_postscript(opts, TRUE);
	parse_options_set_unify_features(opts, unify_features);

    dict  = dictionary_create("4.0.dict", "4.0.knowledge", NULL, NULL, "morphology/morphemes.dict");
	if (!dict){
		fprintf(stderr, "%s\n", lperrmsg);
	    parse_options_delete(opts);
		printf("size : %lld", space_in_use);
		exit(0);
	}

	//setting opts->unify_features to TRUE enables unification
	for (n=0; strcmp (input_string[n],"end")!=0; n++ ){
		sent = sentence_create(input_string[n], dict);
		if (!sent){
			fprintf(stderr, "%s\n", lperrmsg);
			dictionary_delete(dict);
			parse_options_delete(opts);
			exit(0);
		}
		printf ("\n************************************\n%d-%s\n************************************\n", n+1, input_string[n]);
		num_linkages = sentence_parse(sent, opts);
		num_linkages = (all_linkages) ? num_linkages : (num_linkages!=0)*1 ;
		//Echos all linkages to screen, ps file and text file in ./out directory
		for (i=0; i<num_linkages; ++i) {
			linkage = linkage_create(i , sent, opts);
			diagram = linkage_print_diagram(linkage);

			printf ("\n Linkage No. %d-%d\n\n", n+1, i+1);
			printf("%s\n", diagram);
			if (out_to_file){
				sprintf(txfilename,"out/src-%d-%d.txt", n+1, i+1);
				fp=fopen(txfilename,"w+");
                if (fp==NULL){
                    fprintf(stderr, "%s%s\n", "Unable to open ", txfilename);
                    printf("size : %lld", space_in_use);
                    exit(0);
                }
				fprintf(fp,"%s",diagram);
				fclose(fp);
			}
			string_delete(diagram);
			linkage_delete(linkage);
 		}
		sentence_delete(sent);
	}

    dictionary_delete(dict);
    parse_options_delete(opts);

}
Example #9
0
int main(int argc, char * argv[]) {

    Dictionary      dict;
    Sentence        sent;
    char            *dictionary_file=NULL;
    char            *post_process_knowledge_file=NULL;
    char            *constituent_knowledge_file=NULL;
    char            *affix_file=NULL;
    int             pp_on=TRUE;
    int             af_on=TRUE;
    int             cons_on=TRUE;
    int             num_linkages, i;
    char            input_string[MAXINPUT];
    Label           label = NO_LABEL;  
    int             parsing_space_leaked, reported_leak, dictionary_and_option_space;


    i = 1;
    if ((argc > 1) && (argv[1][0] != '-')) {
	/* the dictionary is the first argument if it doesn't begin with "-" */
	dictionary_file = argv[1];	
	i++;
    }

    for (; i<argc; i++) {
	if (argv[i][0] == '-') {
	    if (strcmp("-pp", argv[i])==0) {
		if ((post_process_knowledge_file != NULL) || (i+1 == argc)) 
		  print_usage(argv[0]);
		post_process_knowledge_file = argv[i+1];
		i++;
	    } else 
	    if (strcmp("-c", argv[i])==0) {
		if ((constituent_knowledge_file != NULL) || (i+1 == argc)) 
		  print_usage(argv[0]);
		constituent_knowledge_file = argv[i+1];
		i++;
	    } else 
	    if (strcmp("-a", argv[i])==0) {
		if ((affix_file != NULL) || (i+1 == argc)) print_usage(argv[0]);
		affix_file = argv[i+1];
		i++;
	    } else if (strcmp("-ppoff", argv[i])==0) {
		pp_on = FALSE;
	    } else if (strcmp("-coff", argv[i])==0) {
		cons_on = FALSE;
	    } else if (strcmp("-aoff", argv[i])==0) {
		af_on = FALSE;
	    } else if (strcmp("-batch", argv[i])==0) {
	    } else if (strncmp("-!", argv[i],2)==0) {
	    } else {
		print_usage(argv[0]);		
	    }
	} else {
	    print_usage(argv[0]);
	}
    }

    if (!pp_on && post_process_knowledge_file != NULL) print_usage(argv[0]);

    if (dictionary_file == NULL) {
	dictionary_file = "4.0.dict";
        fprintf(stderr, "No dictionary file specified.  Using %s.\n", 
		dictionary_file);
    }

    if (af_on && affix_file == NULL) {
	affix_file = "4.0.affix";
        fprintf(stderr, "No affix file specified.  Using %s.\n", affix_file);
    }

    if (pp_on && post_process_knowledge_file == NULL) {
	post_process_knowledge_file = "4.0.knowledge";
        fprintf(stderr, "No post process knowledge file specified.  Using %s.\n",
		post_process_knowledge_file);
    }

    if (cons_on && constituent_knowledge_file == NULL) {
        constituent_knowledge_file = "4.0.constituent-knowledge"; 
	fprintf(stderr, "No constituent knowledge file specified.  Using %s.\n", 
		constituent_knowledge_file);
    }

    opts = parse_options_create();
    if (opts == NULL) {
	fprintf(stderr, "%s\n", lperrmsg);
	exit(-1);
    }

    panic_parse_opts = parse_options_create();
    if (panic_parse_opts == NULL) {
	fprintf(stderr, "%s\n", lperrmsg);
	exit(-1);
    }
    setup_panic_parse_options(panic_parse_opts);
    parse_options_set_max_sentence_length(opts, 70);
    parse_options_set_panic_mode(opts, TRUE);
    parse_options_set_max_parse_time(opts, 30);
    parse_options_set_linkage_limit(opts, 1000);
    parse_options_set_short_length(opts, 10);

    dict = dictionary_create(dictionary_file, 
			     post_process_knowledge_file,
			     constituent_knowledge_file,
			     affix_file);
    if (dict == NULL) {
	fprintf(stderr, "%s\n", lperrmsg);
	exit(-1);
    }

    /* process the command line like commands */
    for (i=1; i<argc; i++) {
	if ((strcmp("-pp", argv[i])==0) || 
	    (strcmp("-c", argv[i])==0) || 
	    (strcmp("-a", argv[i])==0)) {
	  i++;
	} else if ((argv[i][0] == '-') && (strcmp("-ppoff", argv[i])!=0) &&
		   (argv[i][0] == '-') && (strcmp("-coff", argv[i])!=0) &&
		   (argv[i][0] == '-') && (strcmp("-aoff", argv[i])!=0)) {
	  issue_special_command(argv[i]+1, opts, dict);
	}
    }

    dictionary_and_option_space = space_in_use;  
    reported_leak = external_space_in_use = 0;
    verbosity = parse_options_get_verbosity(opts);

    while (fget_input_string(input_string, stdin, stdout, opts)) {

	if (space_in_use != dictionary_and_option_space + reported_leak) {
	    fprintf(stderr, "Warning: %d bytes of space leaked.\n",
		    space_in_use-dictionary_and_option_space-reported_leak);
	    reported_leak = space_in_use - dictionary_and_option_space;
	}

	if ((strcmp(input_string, "quit\n")==0) ||
	    (strcmp(input_string, "exit\n")==0)) break;

	if (special_command(input_string, dict)) continue;
	if (parse_options_get_echo_on(opts)) {
	    printf("%s", input_string);
	}

	if (parse_options_get_batch_mode(opts)) {
	    label = strip_off_label(input_string);
	}

	sent = sentence_create(input_string, dict);

	if (sent == NULL) {
	    if (verbosity > 0) fprintf(stderr, "%s\n", lperrmsg);
	    if (lperrno != NOTINDICT) exit(-1);
	    else continue;
	} 
	if (sentence_length(sent) > parse_options_get_max_sentence_length(opts)) {
	    sentence_delete(sent);
	    if (verbosity > 0) {
	      fprintf(stdout, 
		      "Sentence length (%d words) exceeds maximum allowable (%d words)\n",
		    sentence_length(sent), parse_options_get_max_sentence_length(opts));
	    }
	    continue;
	}

	/* First parse with cost 0 or 1 and no null links */
	parse_options_set_disjunct_cost(opts, 2);
	parse_options_set_min_null_count(opts, 0);
	parse_options_set_max_null_count(opts, 0);
	parse_options_reset_resources(opts);

	num_linkages = sentence_parse(sent, opts);

	/* Now parse with null links */
	if ((num_linkages == 0) && (!parse_options_get_batch_mode(opts))) {
	    if (verbosity > 0) fprintf(stdout, "No complete linkages found.\n");
	    if (parse_options_get_allow_null(opts)) {
		parse_options_set_min_null_count(opts, 1);
		parse_options_set_max_null_count(opts, sentence_length(sent));
		num_linkages = sentence_parse(sent, opts);
	    }
	}

	if (parse_options_timer_expired(opts)) {
	    if (verbosity > 0) fprintf(stdout, "Timer is expired!\n");
	}
	if (parse_options_memory_exhausted(opts)) {
	    if (verbosity > 0) fprintf(stdout, "Memory is exhausted!\n");
	}

	if ((num_linkages == 0) && 
	    parse_options_resources_exhausted(opts) &&
	    parse_options_get_panic_mode(opts)) {
	    print_total_time(opts);
	    if (verbosity > 0) fprintf(stdout, "Entering \"panic\" mode...\n");
	    parse_options_reset_resources(panic_parse_opts);
	    parse_options_set_verbosity(panic_parse_opts, verbosity);
	    num_linkages = sentence_parse(sent, panic_parse_opts);
	    if (parse_options_timer_expired(panic_parse_opts)) {
		if (verbosity > 0) fprintf(stdout, "Timer is expired!\n");
	    }
	}

	print_total_time(opts);

	if (parse_options_get_batch_mode(opts)) {
	    batch_process_some_linkages(label, sent, opts);
	}
	else {
	    process_some_linkages(sent, opts);
	}

	sentence_delete(sent);
	if (external_space_in_use != 0) {
	    fprintf(stderr, "Warning: %d bytes of external space leaked.\n", 
		    external_space_in_use);
	}
    }

    if (parse_options_get_batch_mode(opts)) {
	print_time(opts, "Total");
	fprintf(stderr, 
		"%d error%s.\n", batch_errors, (batch_errors==1) ? "" : "s");
    }

    parsing_space_leaked = space_in_use - dictionary_and_option_space;
    if (parsing_space_leaked != 0) {
        fprintf(stderr, "Warning: %d bytes of space leaked during parsing.\n", 
		parsing_space_leaked);
    }

    parse_options_delete(panic_parse_opts);
    parse_options_delete(opts);
    dictionary_delete(dict);

    if (space_in_use != parsing_space_leaked) {
        fprintf(stderr, 
		"Warning: %d bytes of dictionary and option space leaked.\n", 
		space_in_use - parsing_space_leaked);
    } 
    else if (parsing_space_leaked == 0) {
        fprintf(stderr, "Good news: no space leaked.\n");
    }

    if (external_space_in_use != 0) {
        fprintf(stderr, "Warning: %d bytes of external space leaked.\n", 
		external_space_in_use);
    }

    return 0;
}
Example #10
0
File: lg_py.c Project: bluemoon/nlp
static PyObject *domains(PyObject *self, PyObject *args) {
    Dictionary    dict;
    Parse_Options opts;
    Sentence      sent;
    Linkage       linkage;
    //CNode *       cn;

    /// Link counts
    int   num_linkages;
    int   links;
    int   i;
    int   j = 0;
    int num_domains;
    const char *text;

    PyObject *output_list;
    PyObject *temp;
    output_list = PyList_New(0);

    if (!PyArg_ParseTuple(args, "s", &text))
        return NULL;

    opts = parse_options_create();
    parse_options_set_verbosity(opts, -1);

    setlocale(LC_ALL, "");
    dict = dictionary_create_default_lang();

    if (!dict) {
        PyErr_SetString(PyExc_RuntimeError, "Fatal error: Unable to open the dictionary");
        Py_INCREF(Py_None);
        return Py_None;
    }

    sent = sentence_create(text, dict);
    sentence_split(sent, opts);
    num_linkages = sentence_parse(sent, opts);

    if (num_linkages > 0) {
        linkage = linkage_create(0, sent, opts);
        links = linkage_get_num_sublinkages(linkage);
        for(i=0; i<=links; i++) {
            num_domains = linkage_get_link_num_domains(linkage, i);
            const char **temp1 = linkage_get_link_domain_names(linkage, i);
            //for(j=0; j<=num_domains; j++){
            while(num_domains < j) {
                temp = PyString_FromString(temp1[j]);
                PyList_Append(output_list, temp);
                j++;
            }
            j = 0;
        }
        linkage_delete(linkage);
    } else {
        sentence_delete(sent);
        dictionary_delete(dict);
        parse_options_delete(opts);
        Py_INCREF(Py_None);
        return Py_None;
    }
    sentence_delete(sent);
    dictionary_delete(dict);
    parse_options_delete(opts);

    return Py_BuildValue("Si", output_list, num_domains);
}
Example #11
0
File: lg_py.c Project: bluemoon/nlp
/// This is the basic sentence dissection
static PyObject *sentence(PyObject *self, PyObject *args) {
    Dictionary    dict;
    Parse_Options opts;
    Sentence      sent;
    Linkage       linkage;
    Linkage       sub_linkage;
    char *        diagram;

    /// Link counts
    int   num_linkages;
    int   links;

    ///  Index's for the iterators
    int   link_idx;
    int   word_idx;
    int   num_words;
    long   span;
    long   sub_linkages;

    const char *text;
    const char *d_output;

    PyObject *output_list;
    PyObject *word_list;
    PyObject *word2_list;
    PyObject *span_list;
    PyObject *temp;
    PyObject *sublinkage_list;
    PyObject *_diagram;

    output_list = PyList_New(0);
    word_list   = PyList_New(0);
    word2_list  = PyList_New(0);
    sublinkage_list = PyList_New(0);

    span_list = PyList_New(0);

    if (!PyArg_ParseTuple(args, "s", &text))
        return NULL;

    opts = parse_options_create();
    parse_options_set_verbosity(opts, -1);
    parse_options_set_screen_width(opts, 50);

    setlocale(LC_ALL, "");
    dict = dictionary_create_default_lang();

    if (!dict) {
        PyErr_SetString(PyExc_RuntimeError, "Fatal error: Unable to open the dictionary");
        Py_INCREF(Py_None);
        return Py_None;
    }

    sent = sentence_create(text, dict);
    sentence_split(sent, opts);
    num_linkages = sentence_parse(sent, opts);

    if (num_linkages > 0) {
        linkage = linkage_create(0, sent, opts);
        /// Get the lengths of everything
        num_words = linkage_get_num_words(linkage);
        links = linkage_get_num_links(linkage);

        for(link_idx=0; link_idx < links; link_idx++) {
            PyObject *temp_subLen;

            diagram = linkage_print_diagram(linkage);
            _diagram = PyString_FromString(diagram);

            sub_linkage = linkage_create(link_idx, sent, opts);
            sub_linkages = linkage_get_num_sublinkages(linkage);

            temp_subLen = PyLong_FromLong(sub_linkages);
            linkage_delete(sub_linkage);
            PyList_Append(sublinkage_list, temp_subLen);

            span = linkage_get_link_length(linkage, link_idx);
            PyList_Append(span_list, PyInt_FromLong(span));

            PyObject *temp_list;
            temp_list = PyList_New(0);
            /// Sub Group these (left and right labels)
            const char *t1 = linkage_get_link_llabel(linkage, link_idx);
            temp = PyString_FromString(t1);
            PyList_Append(temp_list, temp);

            const char *t2 = linkage_get_link_rlabel(linkage, link_idx);
            temp = PyString_FromString(t2);
            PyList_Append(temp_list, temp);
            /// Then add to the main list
            PyList_Append(output_list, temp_list);


            /// Just the label
            const char *t3 = linkage_get_link_label(linkage, link_idx);
            temp = PyString_FromString(t3);
            PyList_Append(word2_list, temp);
        }

        for(word_idx=0; word_idx < num_words; word_idx++) {
            d_output = linkage_get_word(linkage, word_idx);
            PyObject *word;

            word = PyString_FromString(d_output);
            PyList_Append(word_list, word);
        }

        linkage_free_diagram(diagram);
        linkage_delete(linkage);

    } else {
        sentence_delete(sent);
        dictionary_delete(dict);
        parse_options_delete(opts);

        Py_INCREF(Py_None);
        return Py_None;
    }

    sentence_delete(sent);
    dictionary_delete(dict);
    parse_options_delete(opts);

    return Py_BuildValue("SSSSSS", word_list, span_list, output_list, word2_list, sublinkage_list, _diagram);
}
Example #12
0
int main(int argc, char * argv[])
{
	FILE            *input_fh = stdin;
	Dictionary      dict;
	const char     *language = NULL;
	int             num_linkages;
	Label           label = NO_LABEL;
	Command_Options *copts;
	Parse_Options   opts;
	bool batch_in_progress = false;

	isatty_stdin = isatty(fileno(stdin));
	isatty_stdout = isatty(fileno(stdout));

#ifdef _WIN32
	/* If compiled with MSVC/MinGW, we still support running under Cygwin.
	 * This is done by checking running_under_cygwin to resolve
	 * incompatibilities. */
	const char *ostype = getenv("OSTYPE");
	if ((NULL != ostype) && (0 == strcmp(ostype, "cygwin")))
		running_under_cygwin = true;

	/* argv encoding is in the current locale. */
	argv = argv2utf8(argc);
	if (NULL == argv)
	{
		prt_error("Fatal error: Unable to parse command line\n");
		exit(-1);
	}

#ifdef _MSC_VER
	_set_printf_count_output(1); /* enable %n support for display_1line_help()*/
#endif /* _MSC_VER */

	win32_set_utf8_output();
#endif /* _WIN32 */

#if LATER
	/* Try to catch the SIGWINCH ... except this is not working. */
	struct sigaction winch_act;
	winch_act.sa_handler = winch_handler;
	winch_act.sa_sigaction = NULL;
	sigemptyset (&winch_act.sa_mask);
	winch_act.sa_flags = 0;
	sigaction (SIGWINCH, &winch_act, NULL);
#endif

	copts = command_options_create();
	if (copts == NULL || copts->panic_opts == NULL)
	{
		prt_error("Fatal error: unable to create parse options\n");
		exit(-1);
	}
	opts = copts->popts;

	setup_panic_parse_options(copts->panic_opts);
	copts->panic_mode = true;

	parse_options_set_max_parse_time(opts, 30);
	parse_options_set_linkage_limit(opts, 1000);
	parse_options_set_min_null_count(opts, 0);
	parse_options_set_max_null_count(opts, 0);
	parse_options_set_short_length(opts, 16);
	parse_options_set_islands_ok(opts, false);
	parse_options_set_display_morphology(opts, false);

	save_default_opts(copts); /* Options so far are the defaults */

	if ((argc > 1) && (argv[1][0] != '-')) {
		/* The dictionary is the first argument if it doesn't begin with "-" */
		language = argv[1];
	}

	for (int i = 1; i < argc; i++)
	{
		if (strcmp("--help", argv[i]) == 0)
		{
			print_usage(stdout, argv[0], copts, 0);
		}

		if (strcmp("--version", argv[i]) == 0)
		{
			printf("Version: %s\n", linkgrammar_get_version());
			printf("%s\n", linkgrammar_get_configuration());
			exit(0);
		}
	}

	/* Process command line variable-setting commands (only). */
	for (int i = 1; i < argc; i++)
	{
		if (argv[i][0] == '-')
		{
			const char *var = argv[i] + ((argv[i][1] != '-') ? 1 : 2);
			if ((var[0] != '!') && (0 > issue_special_command(var, copts, NULL)))
				print_usage(stderr, argv[0], copts, -1);
		}
		else if (i != 1)
		{
			prt_error("Fatal error: Unknown argument '%s'.\n", argv[i]);
			print_usage(stderr, argv[0], copts, -1);
		}
	}

	if (language && *language)
	{
		dict = dictionary_create_lang(language);
		if (dict == NULL)
		{
			prt_error("Fatal error: Unable to open dictionary.\n");
			exit(-1);
		}
	}
	else
	{
		dict = dictionary_create_default_lang();
		if (dict == NULL)
		{
			prt_error("Fatal error: Unable to open default dictionary.\n");
			exit(-1);
		}
	}

	/* Process the command line '!' commands */
	for (int i = 1; i < argc; i++)
	{
		if ((argv[i][0] == '-') && (argv[i][1] == '!'))
		{
			if (0 > issue_special_command(argv[i]+1, copts, dict))
				print_usage(stderr, argv[0], copts, -1);
		}
	}

	check_winsize(copts);

	prt_error("Info: Dictionary version %s, locale %s\n",
		linkgrammar_get_dict_version(dict),
		linkgrammar_get_dict_locale(dict));
	prt_error("Info: Library version %s. Enter \"!help\" for help.\n",
		linkgrammar_get_version());

	/* Main input loop */
	while (true)
	{
		char *input_string;
		Sentence sent = NULL;

		/* Make sure stderr is shown even when MSVC binary runs under
		 * Cygwin/MSYS pty (in that case it is fully buffered(!)). */
		fflush(stderr);

		verbosity = parse_options_get_verbosity(opts);
		debug = parse_options_get_debug(opts);
		test = parse_options_get_test(opts);

		input_string = fget_input_string(input_fh, stdout, /*check_return*/false);
		check_winsize(copts);

		if (NULL == input_string)
		{
			if (ferror(input_fh))
				prt_error("Error: Read: %s\n", strerror(errno));

			if (input_fh == stdin) break;
			fclose (input_fh);
			input_fh = stdin;
			continue;
		}

		/* Discard whitespace characters from end of string. */
		for (char *p = &input_string[strlen(input_string)-1];
		     (p > input_string) && strchr(WHITESPACE, *p) ; p--)
		{
			*p = '\0';
		}

		/* If the input string is just whitespace, then ignore it. */
		if (strspn(input_string, WHITESPACE) == strlen(input_string))
			continue;

		char command = special_command(input_string, copts, dict);
		if ('e' == command) break;    /* It was an exit command */
		if ('c' == command) continue; /* It was another command */
		if (-1 == command) continue;  /* It was a bad command */

		/* We have to handle the !file command inline; it's too hairy
		 * otherwise ... */
		if ('f' == command)
		{
			char * filename = &input_string[strcspn(input_string, WHITESPACE)] + 1;
			int fnlen = strlen(filename);

			if (0 == fnlen)
			{
				prt_error("Error: Missing file name argument\n");
				continue;
			}

			if ('\n' == filename[fnlen-1]) filename[fnlen-1] = '\0';

			struct stat statbuf;
			if ((0 == stat(filename, &statbuf)) && statbuf.st_mode & S_IFDIR)
			{
				prt_error("Error: Cannot open %s: %s\n",
				        filename, strerror(EISDIR));
				continue;
			}

			input_fh = fopen(filename, "r");

			if (NULL == input_fh)
			{
				prt_error("Error: Cannot open %s: %s\n", filename, strerror(errno));
				input_fh = stdin;
				continue;
			}
			continue;
		}


		if (!copts->batch_mode) batch_in_progress = false;
		if ('\0' != test[0])
		{
			/* In batch mode warn only once.
			 * In auto-next-linkage mode don't warn at all. */
			if (!batch_in_progress && !auto_next_linkage_test(test))
			{
				fflush(stdout);
				/* Remind the developer this is a test mode. */
				prt_error("Warning: Tests enabled: %s\n", test);
				if (copts->batch_mode) batch_in_progress = true;
			}
		}

		if (copts->echo_on)
		{
			printf("%s\n", input_string);
		}

		if (copts->batch_mode || auto_next_linkage_test(test))
		{
			label = strip_off_label(input_string);
		}

		// Post-processing-based pruning will clip away connectors
		// that we might otherwise want to examine. So disable PP
		// pruning in this situation.
		if (copts->display_bad)
			parse_options_set_perform_pp_prune(opts, false);
		else
			parse_options_set_perform_pp_prune(opts, true);

		sent = sentence_create(input_string, dict);

		if (sentence_split(sent, opts) < 0)
		{
			sentence_delete(sent);
			sent = NULL;
			continue;
		}

		if (0 != copts->display_wordgraph)
		{
			const char *wg_display_flags = ""; /* default flags */
			switch (copts->display_wordgraph)
			{
				case 1:     /* default flags */
					break;
				case 2:     /* subgraphs with a legend */
					wg_display_flags = "sl";
					break;
				case 3:
					{
						/* Use esoteric flags from the test user variable. */
						const char *s = test_enabled(test, "wg");
						if ((NULL != s) && (':' == s[0])) wg_display_flags = s;
					}
					break;
				default:
					prt_error("Warning: wordgraph=%d: Unknown value, using 1\n",
								 copts->display_wordgraph);
					copts->display_wordgraph = 1;
			}
			sentence_display_wordgraph(sent, wg_display_flags);
		}

		/* First parse with the default disjunct_cost as set by the library
		 * (currently 2.7). Usually parse here with no null links.
		 * However, if "-test=one-step-parse" is used and we are said to
		 * parse with null links, allow parsing here with null links too. */
		bool one_step_parse = !copts->batch_mode && copts->allow_null &&
		                    test_enabled(test, "one-step-parse");
		int max_null_count = one_step_parse ? sentence_length(sent) : 0;

		parse_options_set_min_null_count(opts, 0);
		parse_options_set_max_null_count(opts, max_null_count);
		parse_options_reset_resources(opts);

		num_linkages = sentence_parse(sent, opts);

		/* num_linkages is negative only on a hard-error;
		 * typically, due to a zero-length sentence.  */
		if (num_linkages < 0)
		{
			sentence_delete(sent);
			sent = NULL;
			continue;
		}

#if 0
		/* Try again, this time omitting the requirement for
		 * definite articles, etc. This should allow for the parsing
		 * of newspaper headlines and other clipped speech.
		 *
		 * XXX Unfortunately, this also allows for the parsing of
		 * all sorts of ungrammatical sentences which should not
		 * parse, and leads to bad parses of many other unparsable
		 * but otherwise grammatical sentences.  Thus, this trick
		 * pretty much fails; we leave it here to document the
		 * experiment.
		 */
		if (num_linkages == 0)
		{
			parse_options_set_disjunct_cost(opts, 4.5);
			num_linkages = sentence_parse(sent, opts);
			if (num_linkages < 0) continue;
		}
#endif /* 0 */

		/* Try using a larger list of disjuncts */
		/* XXX FIXME: the lg_expand_disjunct_list() routine is not
		 * currently a part of the public API; it should be made so,
		 * or this expansion idea should be abandoned... not sure which.
		 */
		if ((num_linkages == 0) && parse_options_get_use_cluster_disjuncts(opts))
		{
			int expanded;
			if (verbosity > 0) fprintf(stdout, "No standard linkages, expanding disjunct set.\n");
			parse_options_set_disjunct_cost(opts, 3.9);
			expanded = lg_expand_disjunct_list(sent);
			if (expanded)
			{
				num_linkages = sentence_parse(sent, opts);
			}
			if (0 < num_linkages) printf("Got One !!!!!!!!!!!!!!!!!\n");
		}

		/* If asked to show bad linkages, then show them. */
		if ((num_linkages == 0) && (!copts->batch_mode))
		{
			if (copts->display_bad)
			{
				num_linkages = sentence_num_linkages_found(sent);
			}
		}

		/* Now parse with null links */
		if (!one_step_parse && num_linkages == 0 && !copts->batch_mode)
		{
			if (verbosity > 0) fprintf(stdout, "No complete linkages found.\n");

			if (copts->allow_null)
			{
				/* XXX should use expanded disjunct list here too */
				parse_options_set_min_null_count(opts, 1);
				parse_options_set_max_null_count(opts, sentence_length(sent));
				num_linkages = sentence_parse(sent, opts);
			}
		}

		if (verbosity > 0)
		{
			if (parse_options_timer_expired(opts))
				fprintf(stdout, "Timer is expired!\n");

			if (parse_options_memory_exhausted(opts))
				fprintf(stdout, "Memory is exhausted!\n");
		}

		if ((num_linkages == 0) &&
			copts->panic_mode &&
			parse_options_resources_exhausted(opts))
		{
			/* print_total_time(opts); */
			batch_errors++;
			if (verbosity > 0) fprintf(stdout, "Entering \"panic\" mode...\n");
			/* If the parser used was the SAT solver, set the panic parser to
			 * it too.
			 * FIXME? Currently, the SAT solver code is not too useful in
			 * panic mode since it doesn't handle parsing with null words, so
			 * using the regular parser in that case could be beneficial.
			 * However, this currently causes a crash due to a memory
			 * management mess. */
			parse_options_set_use_sat_parser(copts->panic_opts,
				parse_options_get_use_sat_parser(opts));
			parse_options_reset_resources(copts->panic_opts);
			parse_options_set_verbosity(copts->panic_opts, verbosity);
			(void)sentence_parse(sent, copts->panic_opts);
			if (verbosity > 0)
			{
				if (parse_options_timer_expired(copts->panic_opts))
					fprintf(stdout, "Panic timer is expired!\n");
			}
		}

		if (verbosity > 1) parse_options_print_total_time(opts);

		const char *rc = "";
		if (copts->batch_mode)
		{
			batch_process_some_linkages(label, sent, copts);
		}
		else
		{
			rc = process_some_linkages(input_fh, sent, copts);
		}

		fflush(stdout);
		sentence_delete(sent);
		sent = NULL;

		if ((NULL == rc) && (input_fh == stdin)) break;
	}

	if (copts->batch_mode)
	{
		/* print_time(opts, "Total"); */
		fprintf(stderr,
				"%d error%s.\n", batch_errors, (batch_errors==1) ? "" : "s");
	}

	/* Free stuff, so that mem-leak detectors don't complain. */
	command_options_delete(copts);
	dictionary_delete(dict);

	printf ("Bye.\n");
	return 0;
}
Example #13
0
static void jParse(JNIEnv *env, per_thread_data *ptd, char* inputString)
{
	int maxlen;
	Parse_Options opts = ptd->opts;
	int jverbosity = parse_options_get_verbosity(opts);

	if (ptd->sent)
		sentence_delete(ptd->sent);

	if (ptd->dict == NULL) throwException(env, "jParse: dictionary not open\n");
	if (inputString == NULL) throwException(env, "jParse: no input sentence!\n");
	ptd->sent = sentence_create(inputString, ptd->dict);
	ptd->num_linkages = 0;

	if (ptd->sent == NULL)
		return;

	maxlen = parse_options_get_max_sentence_length(ptd->opts);
	if (maxlen < sentence_length(ptd->sent))
	{
		if (jverbosity > 0) {
			prt_error("Error: JNI: Sentence length (%d words) exceeds maximum allowable (%d words)\n",
				sentence_length(ptd->sent), maxlen);
		}
		sentence_delete(ptd->sent);
		ptd->sent = NULL;
		return;
	}

	/* First parse with cost 0 or 1 and no null links or fat links */
	parse_options_set_disjunct_costf(opts, 2.0f);
	parse_options_set_min_null_count(opts, 0);
	parse_options_set_max_null_count(opts, 0);
	parse_options_set_use_fat_links(opts, FALSE);
	parse_options_reset_resources(opts);

	ptd->num_linkages = sentence_parse(ptd->sent, ptd->opts);

	/* If failed, try again with null links */
	if (0 == ptd->num_linkages)
	{
		if (jverbosity > 0) prt_error("Warning: JNI: No complete linkages found.\n");
		if (parse_options_get_allow_null(opts))
		{
			parse_options_set_min_null_count(opts, 1);
			parse_options_set_max_null_count(opts, sentence_length(ptd->sent));
			ptd->num_linkages = sentence_parse(ptd->sent, opts);
		}
	}

	if (parse_options_timer_expired(opts))
	{
		if (jverbosity > 0) prt_error("Warning: JNI: Timer is expired!\n");
	}
	if (parse_options_memory_exhausted(opts))
	{
		if (jverbosity > 0) prt_error("Warning: JNI: Memory is exhausted!\n");
	}

	if ((ptd->num_linkages == 0) &&
	    parse_options_resources_exhausted(opts))
	{
		parse_options_print_total_time(opts);
		if (jverbosity > 0) prt_error("Warning: JNI: Entering \"panic\" mode...\n");
		parse_options_reset_resources(ptd->panic_parse_opts);
		parse_options_set_verbosity(ptd->panic_parse_opts, jverbosity);
		ptd->num_linkages = sentence_parse(ptd->sent, ptd->panic_parse_opts);
		if (parse_options_timer_expired(ptd->panic_parse_opts)) {
			if (jverbosity > 0) prt_error("Error: JNI: Timer is expired!\n");
		}
	}
}
Example #14
0
 SPOTriplets NLP::sentence2triplets ( const char* sentence )
 {
   // vector of triplets
   SPOTriplets triplets;

   #ifdef DEBUG
     std::cout << "The sentence: " << sentence << std::endl;
   #endif
   // creates a Sentence from the input char*
   Sentence sent = sentence_create ( sentence, dict_ );
   #ifdef DEBUG
     std::cout << "Sentence created" << std::endl;
   #endif
   // tokenizes the sentence
   sentence_split ( sent, parse_opts_ );
   #ifdef DEBUG
     std::cout << "Sentence splitted" << std::endl;
   #endif
   // searches for all possible linkages
   int num_linkages = sentence_parse ( sent, parse_opts_ );
   #ifdef DEBUG
     std::cout << "Sentence parsed" << std::endl;
     std::cout << "Number of linkages: " << num_linkages << std::endl;
   #endif

   // just one triplet
   SPOTriplet triplet;

   // if there is any linkage in the sentence
   if( num_linkages > 0 )
   {
     // create the linkage
     Linkage linkage = linkage_create ( 0, sent, parse_opts_ );

     #ifdef DEBUG
       // prints the sentence's diagram
       std::cout << "The diagram: " << std::endl;
       char *diagram = linkage_print_diagram(linkage, true, 800);
       std::cout << diagram << std::endl;
       linkage_free_diagram( diagram );
       // end print diagram
     #endif

     std::vector<std::string> labels;

     // 1. find the S_link
     // S* except there is an SJ* because then S* except Spx
     // two cases: there is SJ* and there is not SJ*

     // TODO: VJlp VJrp same as SJ but to predications
     // TODO: SFut SFst what the f**k?                                     ###FIXED###
     // TODO: His form was shining like the light not working              ###FIXED###
     // TODO: Car is mine not working                                      ###FIXED###
     // TODO: The little brown bear has eaten all of the honey not working ###FIXED###

     // REGEXES
     std::regex SJ_( "SJ.*" );
     std::regex VJ_( "VJ.*");
     std::regex subject( "(Ss.*)|(SFut)|(Sp\*.*)" );
     std::regex Spx( "Spx.*" );
     // TODO:fix theese initializer list not allowed                       ###FIXED###
     std::regex predicate( "(Pv.*)|(Pg.*)|(PP.*)|(I.*)|(TO)|(MVi.*)" );
     // TODO: make one from theese // (Sp.*)|(Ss.*)                        ###FIXED###
     std::regex noun_adject_object ( "(O.*)|(Os.*)|(Op.*)|(MVpn.*)|(Pa.*)|(MVa.*)" );
     std::regex preposition ( "(MVp.*)|(Pp.*)|(OF)|(TO)" );
     std::regex prep_object ( "(J.*)|(TI)|(I.*)|(ON)" );
     // TODO: problems with matching!! Pg*!!                               ###FIXED###
     // TODO: problems with matching!! Mvp.*!!                             ###FIXED###

     bool s_found = false;
     bool p_found = false;
     bool o_found = false;
     bool SJ = false;

     // search for SJ.s labels
     for( auto label: labels )
     {
       if( std::regex_match( label, SJ_ ) )
       {
         SJ = true;
         break;
       }
     }

     // multiple subject in the sentence
     if( SJ )
     {
       // SPls left -> first subject
       // SPrs right -> second subject
       // Spx right -> predicate
       // SJ-s are multiple subjects
       std::string temp;
       // go through every linkage
       for( int i = 0; i < linkage_get_num_links( linkage ); ++i )
       {
         // get their label
         std::string l = linkage_get_link_label( linkage, i );
         // if there is an SJl* label
         if( std::regex_match( l, std::regex( "SJl.*" ) ) )
         {
           // SJls left side
           triplet.s = linkage_get_word( linkage, linkage_get_link_lword( linkage, i ) );
           triplet.cut( triplet.s );
           temp = triplet.s + " ";
           // and word
           triplet.s = linkage_get_word( linkage, linkage_get_link_rword( linkage, i ) );
           triplet.cut( triplet.s );
           temp += triplet.s + " ";

           // find SJr*
           for( int j = 0; j < linkage_get_num_links( linkage ); ++j )
           {
             std::string m = linkage_get_link_label( linkage, j );
             if( std::regex_match( m, std::regex( "SJr.*" ) ) )
             {
               triplet.s = linkage_get_word( linkage, linkage_get_link_rword( linkage, j ) );
               triplet.cut();
               temp += triplet.s;
               triplet.s = temp;

               s_found = true;
               #ifdef DEBUG
                 std::cout << "Subject found: " << triplet.s << std::endl;
               #endif
               break;
             } // if
           } // for
           break;
         } // if
       } // for

       // now we have the subject

       // find Spx and its right side will be the starter predicate
       std::string current_word;
       for( int i = 0; i < linkage_get_num_links( linkage ); ++i )
       {
         std::string l = linkage_get_link_label( linkage, i );
         if( std::regex_match( l, std::regex( "Spx.*" ) ) )
         {
           triplet.p = linkage_get_word( linkage, linkage_get_link_rword( linkage, i ) );
           current_word = linkage_get_word( linkage, linkage_get_link_rword( linkage, i ) );
         }
       }
       // from now all the same as on the else branch !!!!

       bool predicate_match = false;

       // search for the linkage that has triplet.s as left!
       do
       {
         predicate_match = false;

         for( int i = 0; i < linkage_get_num_links( linkage ); ++i )
         {
           // every linkage's left word
           std::string word_i = linkage_get_word( linkage, linkage_get_link_lword( linkage, i ) );
           // every linkage's label
           std::string l = linkage_get_link_label( linkage, i );

           if( std::regex_match( l, predicate ) && word_i == current_word )
           {
             // found predicate
             triplet.p = linkage_get_word( linkage, linkage_get_link_rword( linkage, i ) );
             current_word = triplet.p;
             predicate_match = true;
             break;
           }
         }
       }
       while( predicate_match );

       // we now have the predicate too
       // TODO: multiple predicates!
       p_found = true;
       #ifdef DEBUG
         std::cout << "Predicate found: " << triplet.p << std::endl;
       #endif

       // ###COPY BEGIN###

       // search for noun object or adjective object
       for( int i = 0; i < linkage_get_num_links( linkage ); ++i )
       {
         // get every linkage label
         std::string l = linkage_get_link_label( linkage, i );
         // get the left word of every linkage
         std::string l_word = linkage_get_word( linkage, linkage_get_link_lword( linkage, i ) );
         // if thete is a label that match AND its left word is the predicate
         if( std::regex_match( l, noun_adject_object ) && triplet.p == l_word )
         {
           // then the object is that linkage's right word
           triplet.o = linkage_get_word( linkage, linkage_get_link_rword( linkage, i ) );
           triplet.cut( triplet.o );
           o_found = true;
           #ifdef DEBUG
             std::cout << "Adjective or noun object found: " << triplet.o << std::endl;
           #endif
         } // if
       } // for

       // still not found object, then search for preposition
       if( !o_found )
       {
         // go through every linkage
         for( int i = 0; i < linkage_get_num_links( linkage ); ++i )
         {
           // get the linkage's label
           std::string l = linkage_get_link_label( linkage, i );
           // and left word
           std::string word_i = linkage_get_word( linkage, linkage_get_link_lword( linkage, i ) );
           // if there is a linkage which is a preposition and its left word is the predicate
           if( std::regex_match( l, preposition ) && triplet.p == word_i )
           {
             // found preposition
             // search for prep_object
             // then the temp will contain the preposition label's right word
             std::string temp = linkage_get_word( linkage, linkage_get_link_rword( linkage, i ) );
             #ifdef DEBUG
               std::cout << "Preposition found! and its rigth word is: " << temp << std::endl;
             #endif

             for( int j = 0; j < linkage_get_num_links( linkage ); ++j )
             {
               // every linkages
               std::string m = linkage_get_link_label( linkage, j );
               // every left word
               std::string word_j = linkage_get_word( linkage, linkage_get_link_lword( linkage, j ) );

               // if there is a label with match and its left is exactly the preposition's right
               if( std::regex_match( m, prep_object ) && temp == word_j )
               {
                 triplet.o = linkage_get_word( linkage, linkage_get_link_lword( linkage, j ) );
                 triplet.cut(triplet.o);

                 triplet.o += " ";
                 // save o
                 std::string temp = triplet.o;

                 triplet.o = linkage_get_word( linkage, linkage_get_link_rword( linkage, j ) );
                 triplet.cut(triplet.o);
                 temp += triplet.o;

                 triplet.o = temp;
                 o_found = true;
                 #ifdef DEBUG
                   std::cout << "Object found: " << triplet.o << std::endl;
                 #endif
               } // if( std::regex_match( m, prep_object ) && temp == word_j ) END
             } // for J END
           } // if( std::regex_match( l, preposition ) && triplet.p == word_i ) END
         } // for I END
       } // if( !o_found ) END

       if( s_found && p_found && o_found )
       {
         // TODO: cut the words itself not the whole triplet
         // have to cut every word itself
         // triplet.cut();
         triplet.cut(triplet.s);
         triplet.cut(triplet.p);
         triplets.push_back( triplet );
         s_found = false;
         p_found = false;
         o_found = false;
       }
       // ###COPY END###
     }
     else // only one subject
     {
       // except Spx!!!
       // S left -> subject
       // S right -> predicate at first
       // if the word next to S right, is an element of Pv*, Pg* PP*, I*, TO, MVi*
       // then the new predicate will be that word

       std::string current_word;

       // search for subject (S_link)
       for( int i = 0; i < linkage_get_num_links( linkage ); ++i )
       {
         // get the linkage's label
         std::string l = linkage_get_link_label( linkage, i );

         if( std::regex_match( l, subject ) )
         {
           // subject found
           triplet.s = linkage_get_word( linkage, linkage_get_link_lword( linkage, i ) );
           s_found = true;
           current_word = linkage_get_word( linkage, linkage_get_link_rword( linkage, i ) );
           triplet.p = current_word;
           #ifdef DEBUG
             std::cout << "Subject found: " << triplet.s << std::endl;
           #endif
           break;
         }
       }

       if( s_found )
       {
         bool predicate_match = false;

         // search for the linkage that has triplet.s as left!
         do
         {
           predicate_match = false;

           for( int i = 0; i < linkage_get_num_links( linkage ); ++i )
           {
             // every linkage's left word
             std::string l_word = linkage_get_word( linkage, linkage_get_link_lword( linkage, i ) );
             // every linkage's label
             std::string l = linkage_get_link_label( linkage, i );

             if( std::regex_match( l, predicate ) && l_word == current_word )
             {
               // found predicate
               triplet.p = linkage_get_word( linkage, linkage_get_link_rword( linkage, i ) );
               current_word = triplet.p;
               predicate_match = true;
               break;
             }
           } // for END
         } while( predicate_match );

         p_found = true;
         #ifdef DEBUG
           std::cout << "Predicate found: " << triplet.p << std::endl;
         #endif
       } // if( s_found ) END

       // subject and predicate found
       // search for object

       // from k to linkage_get_num_links( linkage )
       // if there is any of the noun, adjective od preposition object then that
       // label's right will give the object.

       // !!! search only between labels that has triplet.p as left word !!!!!

       // search for noun object or adjective objects
       // go through all links
       for( int i = 0; i < linkage_get_num_links( linkage ); ++i )
       {
         // get every linkage label
         std::string l = linkage_get_link_label( linkage, i );
         // get the left word of every linkage
         std::string word_i = linkage_get_word( linkage, linkage_get_link_lword( linkage, i ) );
         // if thete is a label that match AND its left word is the predicate
         if( std::regex_match( l, noun_adject_object ) && triplet.p == word_i )
         {
           // then the object is that linkage's right word
           triplet.o = linkage_get_word( linkage, linkage_get_link_rword( linkage, i ) );
           o_found = true;
           triplet.cut(triplet.o);
           #ifdef DEBUG
             std::cout << "Adjective or noun object found: " << triplet.o << std::endl;
           #endif
         } // if END
       } // for END

       // still not found object, then search for preposition
       if( !o_found )
       {
         // go through every linkage
         for( int i = 0; i < linkage_get_num_links( linkage ); ++i )
         {
           // get the linkage's label
           std::string l = linkage_get_link_label( linkage, i );
           // and left word
           std::string word_i = linkage_get_word( linkage, linkage_get_link_lword( linkage, i ) );

           // if there is a linkage which is a preposition and its left word is the predicate
           if( std::regex_match( l, preposition ) && triplet.p == word_i )
           {
             // found preposition
             // search for prep_object
             // then the temp will contain the preposition label's right word
             std::string temp = linkage_get_word( linkage, linkage_get_link_rword( linkage, i ) );
             #ifdef DEBUG
               std::cout << "Preposition found! and its rigth word is: " << temp << std::endl;
             #endif

             // start search from there
             for( int j = 0; j < linkage_get_num_links( linkage ); ++j )
             {
               // every linkages
               std::string m = linkage_get_link_label( linkage, j );
               // every left word
               std::string word_j = linkage_get_word( linkage, linkage_get_link_lword( linkage, j ) );
               #ifdef DEBUG
                 if( std::regex_match( m, prep_object ) )
                     std::cout << m << " DOES match to (J.*)|(TI)|(I.*)|(ON)" << std::endl;
               #endif

               // if there is a label with match and its left is exactly the preposition's right
               if( std::regex_match( m, prep_object ) && temp == word_j )
               {
                 triplet.o = linkage_get_word( linkage, linkage_get_link_lword( linkage, j ) );
                 triplet.cut(triplet.o);

                 triplet.o += " ";
                 // save o
                 std::string temp = triplet.o;

                 triplet.o = linkage_get_word( linkage, linkage_get_link_rword( linkage, j ) );
                 triplet.cut(triplet.o);
                 temp += triplet.o;

                 triplet.o = temp;
                 #ifdef DEBUG
                   std::cout << "Object found: " << triplet.o << std::endl;
                 #endif
                 o_found = true;
               }
             } // for
           } // if
         } // for
       } // if( o_found ) END

       if( s_found && p_found && o_found )
       {
         // TODO: cut the words itself not the whole triplet ###FIXED###
         // have to cut every word itself
         // triplet.cut();

         triplet.cut(triplet.s);
         triplet.cut(triplet.p);
         triplets.push_back( triplet );
         s_found = false;
         p_found = false;
         o_found = false;
       }

     } // end else

     linkage_delete ( linkage );
   } // if( num_linkages > 0 ) END