Esempio n. 1
0
void CgRuleEngine::check(const Paragraph * paragraph) { 
	fprintf(stderr, "CgRuleEngine::check\n");

	for (size_t i = 0; i < paragraph->sentenceCount; i++) {
		cg3_sentence *sentence = 0;
		sentence = cg3_sentence_new(applicator);
		for(size_t j = 0; j < paragraph->sentences[i]->tokenCount; j++) {
			Token t =  paragraph->sentences[i]->tokens[j];
			if(wcscmp(t.str, L" ") == 0) {
				continue;
			}
			cg3_cohort *cohort = 0;
			cohort = cg3_cohort_create(sentence);
			//fprintf(stderr, "CgRuleEngine::check %ls (%d) %ld\n", paragraph->sentences[i]->tokens[j].str, t.isValidWord, t.analyses->size());
			cg3_tag *tag = 0;	
			string wordform = string("\"<") + utils::StringUtils::StringUtils::utf8FromUcs4(paragraph->sentences[i]->tokens[j].str) + string(">\""); 
			//fprintf(stderr, "wordform is: %s\n", wordform.c_str());
			tag = cg3_tag_create_u8(applicator, wordform.c_str());
			cg3_cohort_setwordform(cohort, tag);

			list<morphology::Analysis *>::iterator it = t.analyses->begin();
			unsigned int num_analyses = 0;
			while (it != t.analyses->end()) {
				morphology::Analysis *a = *it;
				cg3_reading *reading = 0;
				reading = cg3_reading_create(cohort);
				string baseform = string("\"") + utils::StringUtils::StringUtils::utf8FromUcs4((a->getValue("lemma"))) + string("\"");
				tag = cg3_tag_create_u8(applicator, baseform.c_str());
				cg3_reading_addtag(reading, tag);
				string taglist = string(utils::StringUtils::StringUtils::utf8FromUcs4((a->getValue("tags"))));
				string buf = "";
				for(string::iterator it2 = taglist.begin(); it2 != taglist.end(); it2++) {
					// This assumes + separated tags.
					if(*it2 == '+' && buf.length() > 0) {
						tag = cg3_tag_create_u8(applicator, buf.c_str());
						cg3_reading_addtag(reading, tag);
						buf = "";
						continue;
					} else if(*it2 == '+' && buf.length() == 0) {
						continue;
					}
					buf = buf + *it2;
				}
				tag = cg3_tag_create_u8(applicator, buf.c_str());
				cg3_reading_addtag(reading, tag);
				buf = "";
				num_analyses++;
				cg3_cohort_addreading(cohort, reading);
				++it;
			}
			cg3_sentence_addcohort(sentence, cohort);
			//fprintf(stderr, "CgRuleEngine::check %d analyses\n", num_analyses);
			
		}
		int num_cohorts = cg3_sentence_numcohorts(sentence);
		fprintf(stderr, "CgRuleEngine::num_cohorts %d \n", num_cohorts);
		cg3_sentence_runrules(applicator, sentence);

		// We've run the grammar on the sentence, now we need to go through and look
		// for error tags which by convention begin with &, e.g.
		//
		//   "<beassát>"
		//	"beassi" G3 N Sg Acc PxSg2 @OBJ> &real-beassat #8->8 ADD:3178:beassat 

		cg3_tag *tag = 0;	
		cg3_cohort *cohort = 0;
		cg3_reading *reading = 0;
		size_t ci = 0, ce = 0, ri = 0, re = 0, ti = 0, te = 0;
		const char *tmp;
		size_t cur_pos = 0;

		for (ci = 0, ce = cg3_sentence_numcohorts(sentence) ; ci != ce ; ++ci) {
			cohort = cg3_sentence_getcohort(sentence, ci);
			cg3_tag *wftag = cg3_cohort_getwordform(cohort);
			const char *wftmp = cg3_tag_gettext_u8(wftag);
			fprintf(stderr, "CG[%ld, %ld]: %s\n", cur_pos, wcslen(cg3_tag_gettext_w(wftag)), wftmp);
	
			for (ri = 0, re = cg3_cohort_numreadings(cohort) ; ri != re ; ++ri) {
				reading = cg3_cohort_getreading(cohort, ri);
				fprintf(stderr, "CG: \t");
				for (ti = 0, te = cg3_reading_numtags(reading) ; ti != te ; ++ti) {
					tag = cg3_reading_gettag(reading, ti);
					tmp = cg3_tag_gettext_u8(tag);
					fprintf(stderr, "%s ", tmp);
					if(tmp[0] == '&') { 
						// We've found an error tag, mark the current cohort
						fprintf(stderr, "\nnew CacheEntry (%ld, %d, %d)\n", ci, GCERR_NEGATIVE_VERB_MISMATCH, 1);
						CacheEntry * e = new CacheEntry(0);
						e->error.legacyError.error_code = -1;
						e->error.error_id = tmp;
						e->error.checker = options->grammarChecker;
						cg3_tag *lwt = cg3_cohort_getwordform(cohort);
//						const char *lwf = cg3_tag_gettext_u8(lwt);
						e->error.startpos = cur_pos;
						e->error.errorlen = wcslen(cg3_tag_gettext_w(lwt)) - 4;
						options->grammarChecker->cache.appendError(e);
						fprintf(stderr, "  pos: %ld; len: %ld\n", cur_pos, wcslen(cg3_tag_gettext_w(lwt)) - 4);
					}
				}
				for (ti = 0, te = cg3_reading_numtraces(reading) ; ti != te ; ++ti) {
					uint32_t rule_line = cg3_reading_gettrace(reading, ti);
					fprintf(stderr, "TRACE:%u ", rule_line);
				}
				fprintf(stderr, "\n");
			}
			if(ci != 0) { // We don't count BOS >>>
				fprintf(stderr, "%ld + %ld = ", cur_pos, (wcslen(cg3_tag_gettext_w(wftag)) - 4));
				cur_pos = cur_pos + (wcslen(cg3_tag_gettext_w(wftag)) - 4) + 1;
				fprintf(stderr, "%ld\n", cur_pos);
			}
		}



	}
	
	return;
}
Esempio n. 2
0
int main(int argc, char *argv[]) {
	cg3_grammar *grammar = 0;
	cg3_applicator *applicator = 0;
	cg3_sentence *sentence = 0;
	cg3_cohort *cohort = 0;
	cg3_reading *reading = 0;
	cg3_tag *tag = 0;
	size_t ci = 0, ce = 0, ri = 0, re = 0, ti = 0, te = 0;
	const char *tmp;

	if (argc < 2) {
		fprintf(stderr, "Error: First argument must be a CG-3 grammar to load!\n");
		return 1;
	}

	if (!cg3_init(stdin, stdout, stderr)) {
		fprintf(stderr, "Error: Failed cg3_init()!\n");
		return 2;
	}

	grammar = cg3_grammar_load(argv[1]);
	if (!grammar) {
		fprintf(stderr, "Error: Failed cg3_grammar_load( %s )!\n", argv[1]);
		return 3;
	}

	applicator = cg3_applicator_create(grammar);
	cg3_applicator_setflags(applicator, CG3F_TRACE);
	sentence = cg3_sentence_new(applicator);

	cohort = cg3_cohort_create(sentence);
	tag = cg3_tag_create_u8(applicator, "\"<wordform>\"");
	cg3_cohort_setwordform(cohort, tag);

	reading = cg3_reading_create(cohort);
	tag = cg3_tag_create_w(applicator, L"\"baseform\"");
	cg3_reading_addtag(reading, tag);
	tag = cg3_tag_create_u8(applicator, "notwanted");
	cg3_reading_addtag(reading, tag);
	tag = cg3_tag_create_w(applicator, L"@mapping");
	cg3_reading_addtag(reading, tag);
	cg3_cohort_addreading(cohort, reading);

	reading = cg3_reading_create(cohort);
	tag = cg3_tag_create_w(applicator, L"\"baseform\"");
	cg3_reading_addtag(reading, tag);
	tag = cg3_tag_create_u8(applicator, "wanted");
	cg3_reading_addtag(reading, tag);
	cg3_cohort_addreading(cohort, reading);

	reading = cg3_reading_create(cohort);
	tag = cg3_tag_create_w(applicator, L"\"baseform\"");
	cg3_reading_addtag(reading, tag);
	tag = cg3_tag_create_u8(applicator, "alsonotwanted");
	cg3_reading_addtag(reading, tag);
	cg3_cohort_addreading(cohort, reading);

	cg3_sentence_addcohort(sentence, cohort);

	cg3_sentence_runrules(applicator, sentence);

	for (ci = 0, ce = cg3_sentence_numcohorts(sentence); ci != ce; ++ci) {
		cohort = cg3_sentence_getcohort(sentence, ci);
		tag = cg3_cohort_getwordform(cohort);
		tmp = cg3_tag_gettext_u8(tag);
		fprintf(stdout, "%s\n", tmp);

		for (ri = 0, re = cg3_cohort_numreadings(cohort); ri != re; ++ri) {
			reading = cg3_cohort_getreading(cohort, ri);
			fprintf(stdout, "\t");
			for (ti = 0, te = cg3_reading_numtags(reading); ti != te; ++ti) {
				tag = cg3_reading_gettag(reading, ti);
				tmp = cg3_tag_gettext_u8(tag);
				fprintf(stdout, "%s ", tmp);
			}
			for (ti = 0, te = cg3_reading_numtraces(reading); ti != te; ++ti) {
				uint32_t rule_line = cg3_reading_gettrace(reading, ti);
				fprintf(stdout, "TRACE:%u ", rule_line);
			}
			fprintf(stdout, "\n");
		}
	}

	cg3_sentence_free(sentence);
	cg3_applicator_free(applicator);
	cg3_grammar_free(grammar);

	cg3_cleanup();
	return 0;
}