void read_words(const string filename) { assert(!open_word_list); ifstream fin(filename.c_str()); assert(fin.is_open()); _all_words.clear(); word_list.clear(); word_list.push_back(""); assert(word_list.at(NO_WORD) == ""); word_map.clear(); word_map[""] = NO_WORD; // FIXME: Don't use [] operator unsigned i; string word; while(!fin.eof()) { fin >> i >> ws >> word >> ws; word_list.push_back(word); assert(word_list.at(i) == word); word_map[word] = i; // FIXME: Don't use [] operator _all_words.push_back(i); // if (word == "*content*") _Word_CONTENT = i; } assert(word_list.size() == word_map.size()); cerr << "Read " << word_list.size()-1 << " words from '" << filename << "'\n"; fin.close(); open_word_list = true; }
bool read(LIST& v) { static int ch; if ((ch = getchar()) == EOF) { return false; } v.clear(); while (ch != '\n') { ungetc(ch, stdin); scanf("%d", &ch); v.push_back(make_pair(ch, (int)v.size())); ch = getchar(); } return true; }
static void posclass_init() { assert(!posclass_is_init); posclass_map.clear(); _all_posclass.clear(); unsigned skipcnt = 0; for (unsigned i = 0; i < tag_to_class_cnt; i++) { // Skip labels not in the vocabulary. if (!is_label_string(tag_to_class[i][0])) { Debug::log(1) << "Skipping unknown label " << tag_to_class[i][0] << " in posclass_init()\n"; skipcnt++; continue; } Label tag = string_to_label(tag_to_class[i][0]); assert(is_terminal_label(tag)); Posclass c = string_to_posclass(tag_to_class[i][1]); assert(posclass_list[c] == tag_to_class[i][1]); assert(posclass_list[c] != ""); posclass_map.insert(tag, c); } posclass_map.lock(); for (unsigned i = 0; i < posclass_cnt; i++) { if (posclass_list[i] != "") _all_posclass.push_back(i); if (posclass_list[i] == "::N") _Posclass_N = i; else if (posclass_list[i] == "::NP") _Posclass_NP = i; } posclass_is_init = true; }
/// \todo Make some assertion about # of constit. labels, and/or that /// they are the lowest numbered ones? void read_labels(const string filename) { assert(!open_label_list); ifstream fin(filename.c_str()); assert(fin.is_open()); _all_labels.clear(); _all_constituent_labels.clear(); _all_terminal_labels.clear(); _max_label = 0; label_list.clear(); label_map.clear(); terminal_set.clear(); constituent_set.clear(); unsigned i, is_terminal, cnt; string label; while(!fin.eof()) { fin >> i >> ws >> is_terminal >> ws >> cnt >> label >> ws; if (i == NO_LABEL) label = ""; label_list.push_back(label); assert(label_list.at(i) == label); label_map[label] = i; // FIXME: Don't use [] operator if (i != NO_LABEL) { _all_labels.push_back(i); if (is_terminal) { terminal_set.insert(i, true); _all_terminal_labels.push_back(i); } else { constituent_set.insert(i, true); _all_constituent_labels.push_back(i); } if (i > _max_label) _max_label = i+1; } } assert(label_list.at(NO_LABEL) == ""); assert(label_map[""] == NO_LABEL); // FIXME: Don't use [] operator assert(label_list.size() == label_map.size()); // assert(label_map.size() == terminal_set.size() + constituent_set.size() + 1); terminal_set.lock(); constituent_set.lock(); /* Debug::log(1) << "Read " << constituent_set.size() << " constituents, " << \ terminal_set.size() << " terminals from '" << filename << "'\n"; */ fin.close(); open_label_list = true; if (is_label_string("ADJP")) _Label_ADJP = string_to_label("ADJP"); if (is_label_string("ADVP")) _Label_ADVP = string_to_label("ADVP"); if (is_label_string("AUX")) _Label_AUX = string_to_label("AUX"); if (is_label_string("AUXG")) _Label_AUXG = string_to_label("AUXG"); if (is_label_string("CC")) _Label_CC = string_to_label("CC"); if (is_label_string("CD")) _Label_CD = string_to_label("CD"); if (is_label_string("COLON")) _Label_COLON = string_to_label(":"); if (is_label_string("COMMA")) _Label_COMMA = string_to_label(","); if (is_label_string("CONJP")) _Label_CONJP = string_to_label("CONJP"); if (is_label_string("DOLLAR")) _Label_DOLLAR = string_to_label("$"); if (is_label_string("DT")) _Label_DT = string_to_label("DT"); if (is_label_string("EX")) _Label_EX = string_to_label("EX"); if (is_label_string("FRAG")) _Label_FRAG = string_to_label("FRAG"); if (is_label_string("FW")) _Label_FW = string_to_label("FW"); if (is_label_string("HASH")) _Label_HASH = string_to_label("#"); if (is_label_string("IN")) _Label_IN = string_to_label("IN"); if (is_label_string("INTJ")) _Label_INTJ = string_to_label("INTJ"); if (is_label_string("JJ")) _Label_JJ = string_to_label("JJ"); if (is_label_string("JJR")) _Label_JJR = string_to_label("JJR"); if (is_label_string("JJS")) _Label_JJS = string_to_label("JJS"); if (is_label_string("LS")) _Label_LS = string_to_label("LS"); if (is_label_string("LST")) _Label_LST = string_to_label("LST"); if (is_label_string("MD")) _Label_MD = string_to_label("MD"); if (is_label_string("NAC")) _Label_NAC = string_to_label("NAC"); if (is_label_string("NN")) _Label_NN = string_to_label("NN"); if (is_label_string("NNP")) _Label_NNP = string_to_label("NNP"); if (is_label_string("NNPS")) _Label_NNPS = string_to_label("NNPS"); if (is_label_string("NNS")) _Label_NNS = string_to_label("NNS"); if (is_label_string("NP")) _Label_NP = string_to_label("NP"); if (is_label_string("NPB")) _Label_NPB = string_to_label("NPB"); if (is_label_string("NX")) _Label_NX = string_to_label("NX"); if (is_label_string("POS")) _Label_POS = string_to_label("POS"); if (is_label_string("PP")) _Label_PP = string_to_label("PP"); if (is_label_string("PRN")) _Label_PRN = string_to_label("PRN"); if (is_label_string("PRP")) _Label_PRP = string_to_label("PRP"); if (is_label_string("PRPP")) _Label_PRPP = string_to_label("PRP$"); if (is_label_string("PRT")) _Label_PRT = string_to_label("PRT"); if (is_label_string("QP")) _Label_QP = string_to_label("QP"); if (is_label_string("RB")) _Label_RB = string_to_label("RB"); if (is_label_string("RBR")) _Label_RBR = string_to_label("RBR"); if (is_label_string("RBS")) _Label_RBS = string_to_label("RBS"); if (is_label_string("RP")) _Label_RP = string_to_label("RP"); if (is_label_string("RRC")) _Label_RRC = string_to_label("RRC"); if (is_label_string("S")) _Label_S = string_to_label("S"); if (is_label_string("SBAR")) _Label_SBAR = string_to_label("SBAR"); if (is_label_string("SBARQ")) _Label_SBARQ = string_to_label("SBARQ"); if (is_label_string("SINV")) _Label_SINV = string_to_label("SINV"); if (is_label_string("SQ")) _Label_SQ = string_to_label("SQ"); if (is_label_string("SYM")) _Label_SYM = string_to_label("SYM"); if (is_label_string("TO")) _Label_TO = string_to_label("TO"); if (is_label_string("TOP")) _Label_TOP = string_to_label("TOP"); if (is_label_string("UCP")) _Label_UCP = string_to_label("UCP"); if (is_label_string("UH")) _Label_UH = string_to_label("UH"); if (is_label_string("VB")) _Label_VB = string_to_label("VB"); if (is_label_string("VBD")) _Label_VBD = string_to_label("VBD"); if (is_label_string("VBG")) _Label_VBG = string_to_label("VBG"); if (is_label_string("VBN")) _Label_VBN = string_to_label("VBN"); if (is_label_string("VBP")) _Label_VBP = string_to_label("VBP"); if (is_label_string("VBZ")) _Label_VBZ = string_to_label("VBZ"); if (is_label_string("VP")) _Label_VP = string_to_label("VP"); if (is_label_string("WDT")) _Label_WDT = string_to_label("WDT"); if (is_label_string("WHADJP")) _Label_WHADJP = string_to_label("WHADJP"); if (is_label_string("WHADVP")) _Label_WHADVP = string_to_label("WHADVP"); if (is_label_string("WHNP")) _Label_WHNP = string_to_label("WHNP"); if (is_label_string("WHPP")) _Label_WHPP = string_to_label("WHPP"); if (is_label_string("WP")) _Label_WP = string_to_label("WP"); if (is_label_string("X")) _Label_X = string_to_label("X"); }