Exemple #1
0
void read_words(const string filename) {
	assert(!open_word_list);

	ifstream fin(filename.c_str());
	assert(fin.is_open());

	_all_words.clear();

	word_list.clear();
	word_list.push_back("");
	assert(word_list.at(NO_WORD) == "");
	word_map.clear();
	word_map[""] = NO_WORD;		// FIXME: Don't use [] operator

	unsigned i;
	string word;
	while(!fin.eof()) {
		fin >> i >> ws >> word >> ws;

		word_list.push_back(word);
		assert(word_list.at(i) == word);
		word_map[word] = i;	// FIXME: Don't use [] operator
		_all_words.push_back(i);

//		if (word == "*content*") _Word_CONTENT = i;
	}
	assert(word_list.size() == word_map.size());

	cerr << "Read " << word_list.size()-1 << " words from '" << filename << "'\n";

	fin.close();
	open_word_list = true;
}
Exemple #2
0
bool read(LIST& v) {
    static int ch;
    if ((ch = getchar()) == EOF) {
        return false;
    }
    v.clear();
    while (ch != '\n') {
        ungetc(ch, stdin);
        scanf("%d", &ch);
        v.push_back(make_pair(ch, (int)v.size()));
        ch = getchar();
    }
    return true;
}
Exemple #3
0
static void posclass_init() {
	assert(!posclass_is_init);

	posclass_map.clear();
	_all_posclass.clear();

	unsigned skipcnt = 0;
	for (unsigned i = 0; i < tag_to_class_cnt; i++) {
		// Skip labels not in the vocabulary.
		if (!is_label_string(tag_to_class[i][0])) {
			Debug::log(1) << "Skipping unknown label " << tag_to_class[i][0] << " in posclass_init()\n";
			skipcnt++;
			continue;
		}
		
		Label tag = string_to_label(tag_to_class[i][0]);
		assert(is_terminal_label(tag));

		Posclass c = string_to_posclass(tag_to_class[i][1]);
		assert(posclass_list[c] == tag_to_class[i][1]);
		assert(posclass_list[c] != "");

		posclass_map.insert(tag, c);
	}
	posclass_map.lock();

	for (unsigned i = 0; i < posclass_cnt; i++) {
		if (posclass_list[i] != "")
			_all_posclass.push_back(i);
		if (posclass_list[i] == "::N")
			_Posclass_N = i;
		else if (posclass_list[i] == "::NP")
			_Posclass_NP = i;
	}

	posclass_is_init = true;
}
Exemple #4
0
/// \todo Make some assertion about # of constit. labels, and/or that
/// they are the lowest numbered ones?
void read_labels(const string filename) {
	assert(!open_label_list);

	ifstream fin(filename.c_str());
	assert(fin.is_open());

	_all_labels.clear();
	_all_constituent_labels.clear();
	_all_terminal_labels.clear();
	_max_label = 0;

	label_list.clear();
	label_map.clear();
	terminal_set.clear();
	constituent_set.clear();

	unsigned i, is_terminal, cnt;
	string label;
	while(!fin.eof()) {
		fin >> i >> ws >> is_terminal >> ws >> cnt >> label >> ws;

		if (i == NO_LABEL)
			label = "";

		label_list.push_back(label);
		assert(label_list.at(i) == label);
		label_map[label] = i;	// FIXME: Don't use [] operator

		if (i != NO_LABEL) {
			_all_labels.push_back(i);
			if (is_terminal) {
				terminal_set.insert(i, true);
				_all_terminal_labels.push_back(i);
			} else {
				constituent_set.insert(i, true);
				_all_constituent_labels.push_back(i);
			}
			if (i > _max_label) _max_label = i+1;
		}
	}
	assert(label_list.at(NO_LABEL) == "");
	assert(label_map[""] == NO_LABEL);	// FIXME: Don't use [] operator
	assert(label_list.size() == label_map.size());
//	assert(label_map.size() == terminal_set.size() + constituent_set.size() + 1);
	terminal_set.lock();
	constituent_set.lock();

/*
	Debug::log(1) << "Read " << constituent_set.size() << " constituents, " << \
			terminal_set.size() << " terminals from '" << filename << "'\n";
*/

	fin.close();
	open_label_list = true;

	if (is_label_string("ADJP")) _Label_ADJP = string_to_label("ADJP");
	if (is_label_string("ADVP")) _Label_ADVP = string_to_label("ADVP");
	if (is_label_string("AUX")) _Label_AUX = string_to_label("AUX");
	if (is_label_string("AUXG")) _Label_AUXG = string_to_label("AUXG");
	if (is_label_string("CC")) _Label_CC = string_to_label("CC");
	if (is_label_string("CD")) _Label_CD = string_to_label("CD");
	if (is_label_string("COLON")) _Label_COLON = string_to_label(":");
	if (is_label_string("COMMA")) _Label_COMMA = string_to_label(",");
	if (is_label_string("CONJP")) _Label_CONJP = string_to_label("CONJP");
	if (is_label_string("DOLLAR")) _Label_DOLLAR = string_to_label("$");
	if (is_label_string("DT")) _Label_DT = string_to_label("DT");
	if (is_label_string("EX")) _Label_EX = string_to_label("EX");
	if (is_label_string("FRAG")) _Label_FRAG = string_to_label("FRAG");
	if (is_label_string("FW")) _Label_FW = string_to_label("FW");
	if (is_label_string("HASH")) _Label_HASH = string_to_label("#");
	if (is_label_string("IN")) _Label_IN = string_to_label("IN");
	if (is_label_string("INTJ")) _Label_INTJ = string_to_label("INTJ");
	if (is_label_string("JJ")) _Label_JJ = string_to_label("JJ");
	if (is_label_string("JJR")) _Label_JJR = string_to_label("JJR");
	if (is_label_string("JJS")) _Label_JJS = string_to_label("JJS");
	if (is_label_string("LS")) _Label_LS = string_to_label("LS");
	if (is_label_string("LST")) _Label_LST = string_to_label("LST");
	if (is_label_string("MD")) _Label_MD = string_to_label("MD");
	if (is_label_string("NAC")) _Label_NAC = string_to_label("NAC");
	if (is_label_string("NN")) _Label_NN = string_to_label("NN");
	if (is_label_string("NNP")) _Label_NNP = string_to_label("NNP");
	if (is_label_string("NNPS")) _Label_NNPS = string_to_label("NNPS");
	if (is_label_string("NNS")) _Label_NNS = string_to_label("NNS");
	if (is_label_string("NP")) _Label_NP = string_to_label("NP");
	if (is_label_string("NPB")) _Label_NPB = string_to_label("NPB");
	if (is_label_string("NX")) _Label_NX = string_to_label("NX");
	if (is_label_string("POS")) _Label_POS = string_to_label("POS");
	if (is_label_string("PP")) _Label_PP = string_to_label("PP");
	if (is_label_string("PRN")) _Label_PRN = string_to_label("PRN");
	if (is_label_string("PRP")) _Label_PRP = string_to_label("PRP");
	if (is_label_string("PRPP")) _Label_PRPP = string_to_label("PRP$");
	if (is_label_string("PRT")) _Label_PRT = string_to_label("PRT");
	if (is_label_string("QP")) _Label_QP = string_to_label("QP");
	if (is_label_string("RB")) _Label_RB = string_to_label("RB");
	if (is_label_string("RBR")) _Label_RBR = string_to_label("RBR");
	if (is_label_string("RBS")) _Label_RBS = string_to_label("RBS");
	if (is_label_string("RP")) _Label_RP = string_to_label("RP");
	if (is_label_string("RRC")) _Label_RRC = string_to_label("RRC");
	if (is_label_string("S")) _Label_S = string_to_label("S");
	if (is_label_string("SBAR")) _Label_SBAR = string_to_label("SBAR");
	if (is_label_string("SBARQ")) _Label_SBARQ = string_to_label("SBARQ");
	if (is_label_string("SINV")) _Label_SINV = string_to_label("SINV");
	if (is_label_string("SQ")) _Label_SQ = string_to_label("SQ");
	if (is_label_string("SYM")) _Label_SYM = string_to_label("SYM");
	if (is_label_string("TO")) _Label_TO = string_to_label("TO");
	if (is_label_string("TOP")) _Label_TOP = string_to_label("TOP");
	if (is_label_string("UCP")) _Label_UCP = string_to_label("UCP");
	if (is_label_string("UH")) _Label_UH = string_to_label("UH");
	if (is_label_string("VB")) _Label_VB = string_to_label("VB");
	if (is_label_string("VBD")) _Label_VBD = string_to_label("VBD");
	if (is_label_string("VBG")) _Label_VBG = string_to_label("VBG");
	if (is_label_string("VBN")) _Label_VBN = string_to_label("VBN");
	if (is_label_string("VBP")) _Label_VBP = string_to_label("VBP");
	if (is_label_string("VBZ")) _Label_VBZ = string_to_label("VBZ");
	if (is_label_string("VP")) _Label_VP = string_to_label("VP");
	if (is_label_string("WDT")) _Label_WDT = string_to_label("WDT");
	if (is_label_string("WHADJP")) _Label_WHADJP = string_to_label("WHADJP");
	if (is_label_string("WHADVP")) _Label_WHADVP = string_to_label("WHADVP");
	if (is_label_string("WHNP")) _Label_WHNP = string_to_label("WHNP");
	if (is_label_string("WHPP")) _Label_WHPP = string_to_label("WHPP");
	if (is_label_string("WP")) _Label_WP = string_to_label("WP");
	if (is_label_string("X")) _Label_X = string_to_label("X");
}