Ejemplo n.º 1
0
main(int argc, char **argv)
{
	mptok = new MPtok;

	process_tokenizer_args(argc, argv);
	run_tokenizer(stdout);
}
Ejemplo n.º 2
0
main(int argc, char **argv)
{
	int	i, j, t;
	double	m;
	char	line[MAX_LLEN], com[MAX_LLEN], arg1[MAX_LLEN], arg2[MAX_LLEN];
	int	used = 0;
	char	*entry;

	srandom(time(NULL));

	file_stack[0] = stdin;
	num_files = 1;

	mptag = new MPtag;

// #define INTERNAL_TOKENIZE

	mptag->set_untag("UNTAGGED");

#ifdef INTERNAL_TOKENIZE

	extern int process_tokenizer_args(int argc, char **argv);
	extern int run_tokenizer(FILE *ofp);

	process_tokenizer_args(argc, argv);
	int fd[2];
	pipe(fd);
	FILE *ifp = fdopen(fd[0], "r");
	FILE *ofp = fdopen(fd[1], "w");
	run_tokenizer(ofp);
	fclose(ofp);
	file_stack[0] = ifp;
	num_files = 1;
#else

	for (i = 1; i < argc; ++i)
	{
		if (argv[i][0] != '-' && num_files == 1)
		{
			num_files = 0;
			command_input(argv[i]);
		}
	}
#endif

	// Read the input file

	while (read_line(line, 0, 0))
	{
		get_command(line, com, arg1, arg2);
		fflush(stdout);

		// Options affecting the program

		if (strcmp(com, "input") == 0)
		{
			command_input(arg1);
		} else if (strcmp(com, "echo") == 0)
		{
			printf("%s\n", line + 5);
		} else if (strcmp(com, "verbose") == 0)
		{
			option_verbose = atoi(arg1);
			if (option_verbose) printf("verbose %s\n", arg1);
		} else if (strcmp(com, "exit") == 0)
		{
			exit(0);

		// Set options within the tagger

		} else if (strcmp(com, "adhoc") == 0)
		{
			if (option_verbose) printf("adhoc %s\n", arg1);
			if (strcmp(arg1, "none") == 0) mptag->set_adhoc_none();
			else if (strcmp(arg1, "medpost") == 0) mptag->set_adhoc_medpost();
			else if (strcmp(arg1, "penn") == 0) mptag->set_adhoc_penn();
		} else if (strcmp(com, "untag") == 0)
		{
			mptag->set_untag(arg1);

		// Initialize state transition probabilities (ngrams)

		} else if (strcmp(com, "ngrams") == 0)
		{
			mptag->read_ngrams(arg1);
		} else if (strcmp(com, "init") == 0)
		{
			if (option_verbose) printf("init\n");
			mptag->norm_ngrams();
		} else if (strcmp(com, "smooth") == 0)
		{
			if (option_verbose) printf("smooth\n");
			mptag->smooth_ngrams();

		// Initialize the lexicon

		} else if (strcmp(com, "lex") == 0)
		{
			if (option_verbose) printf("lex %s %s\n", arg1, arg2);
			if (mptag->lex) delete mptag->lex;
			mptag->lex = new MPlex(mptag->num_tags, atoi(arg1), arg2, OPTION_USE_CODES);
			mptag->backoff(NULL);
		} else if (strcmp(com, "addlex") == 0)
		{
			if (option_verbose) printf("addlex %s\n", arg1);
			if (mptag->lex) mptag->lex->addfile(arg1);
		} else if (strcmp(com, "rmlex") == 0)
		{
			if (option_verbose) printf("rmlex %s\n", arg1);
			if (mptag->lex) mptag->lex->rmfile(arg1);
		} else if (strcmp(com, "addsmoothing") == 0)
		{
			if (option_verbose) printf("add %g\n", mptag->add_smoothing);
			mptag->add_smoothing = atof(arg1);
		} else if (strcmp(com, "backoff") == 0)
		{
			if (option_verbose) printf("backoff %s\n", arg1);
			mptag->add_smoothing = 0.0;
			mptag->backoff(arg1);

		// Load a sentence

		} else if (strcmp(com, "sentence") == 0)
		{
			command_sentence(arg1);		// this is mptag->load(tokenized-text)

		// Perform tagging

		} else if (strcmp(com, "compute") == 0)
		{
			mptag->compute();
		} else if (strcmp(com, "viterbi") == 0)
		{
			mptag->viterbi();
		} else if (strcmp(com, "baseline") == 0)
		{
			mptag->baseline();

		// Invoke the printing functions of the tagger

		} else if (strcmp(com, "print") == 0)
		{
			mptag->print(0);
		} else if (strcmp(com, "printfull") == 0)
		{
			mptag->print(1);
		} else if (strcmp(com, "printsent") == 0)
		{
			if (option_verbose) printf("printsent\n");
			mptag->print(2);
		}
	}
}
void main(int argc, char *argv[])
{
	FILE *fp;

	symbol_table_entry_t	*p_token;

	if ( 1 == argc )
	{
		printf("%s [ name of file to tokenize ]\n", argv[0]);
		return;
	}

	fp = fopen(argv[1], "r");

	if ( NULL == fp )
	{
		printf("%s. cannot open file[%s] for reading\n",
			argv[0], argv[1]);
		return;
	}

	if ( !init_tokenizer(fp) )
	{
		printf("%s. init_tokenizer() failed on file[%s]\n",
			argv[0], argv[1]);
		return;
	}

	if ( !run_tokenizer() )
	{
		printf("%s. run_tokenizer() failed on file[%s]\n",
			argv[0], argv[1]);
		return;
	}

	while ( 1 )
	{
		p_token = get_next_token();

		if ( p_token == NULL )
		{
			break;
		}

		switch(p_token->token_type)
		{
		case TOKEN_INT:
			printf("line[%d] token is an integer value[%d]\n",
				p_token->line_num, p_token->token_val.token_int_val
				);
			break;

		case TOKEN_REAL:
			printf("line[%d] token is a float value[%f]\n",
				p_token->line_num, p_token->token_val.token_float_val
				);
			break;

		case TOKEN_KEYWORD:
			printf("line[%d] token is a keyword[%s]\n",
				p_token->line_num, keyword_as_str[p_token->token_val.token_keyword]
				);
			break;

		case TOKEN_IDENTIFIER:
			printf("line[%d] token is an identifier[%s]\n",
				p_token->line_num, p_token->token_val.token_id
				);			
			break;

		case TOKEN_OP:
			printf("line[%d] token is an operator[%s]\n",
				p_token->line_num, keyword_as_str[p_token->token_val.token_op]
				);

			break;

		case TOKEN_STRING:
			printf("line[%d] token is a string[%s]\n",
				p_token->line_num, p_token->token_val.token_str
				);

			break;

		case TOKEN_ILLEGAL:
			printf("*******line[%d]***** illegal token detected\n",
				p_token->line_num
				);

			if ( p_token->token_val.token_illegal )
			{
				printf("line[%d] illegal token[%s]\n",
					p_token->line_num, p_token->token_val.token_illegal
					);
			}
			break;

		case TOKEN_UNKNOWN:
			printf("*******line[%d]***** unknown token detected\n",
				p_token->line_num
				);

			if ( p_token->token_val.token_unknown )
			{
				printf("line[%d] unknown token[%s]\n",
					p_token->line_num, p_token->token_val.token_unknown
					);
			}

			break;

		default:
			break;
		}
	}
}