proof *mk_lemma_core(proof *pf, expr *fact) { ptr_buffer<expr> args; expr_ref lemma(m); if (m.is_or(fact)) { for (unsigned i = 0, sz = to_app(fact)->get_num_args(); i < sz; ++i) { expr *a = to_app(fact)->get_arg(i); if (!is_reduced(a)) { args.push_back(a); } } } else if (!is_reduced(fact)) { args.push_back(fact); } if (args.size() == 0) { return pf; } else if (args.size() == 1) { lemma = args.get(0); } else { lemma = m.mk_or(args.size(), args.c_ptr()); } proof* res = m.mk_lemma(pf, lemma); m_pinned.push_back(res); if (m_hyps.contains(lemma)) { m_units.insert(lemma, res); } return res; }
bool TrainDataParser::parse(const Utf8File::Data &fileData) { const wchar_t Delimeter = u','; int lineNo = 0; for (auto line : *fileData) { ++lineNo; if (lineNo % 10000 == 0) std::cout << "### Line no: " << lineNo << std::endl; // ### if (line.empty()) continue; std::wstring::iterator delimIt = std::find(line.begin(), line.end(), Delimeter); if (delimIt == line.end()) { std::cerr << "Delimeter not found: line #" << lineNo << std::endl; continue; } std::wstring word(line.begin(), delimIt); std::wstring lemma(delimIt + 1, line.end()); bool badLine = std::find(lemma.begin(), lemma.end(), Delimeter) != lemma.end(); if (word.empty() || lemma.empty() || badLine) { std::cerr << "Too many delimeters: line #" << lineNo << std::endl; continue; } m_wordNet->addString(word, lemma); } return true; }
/** * Transforms a list of texts into a vectors saved in output_fpath in SVMLIB format, * according to the vocabulary of the current classifier. * */ list<bool> Classifier::text2vector(TextToClassify* text, string output_fpath, TextExpanderParams* exp_params, bool is_unit_length) { // Initialization list<bool> has_feature_list; FILE* output_file = fopen(output_fpath.c_str(), "w"); if(!_model_is_loaded) _vocabulary = new FreqVocabulary(); int count = 0; TokenToClassify* token; BowVector* features; //FreqVocabulary relation_voc = _expander->get_vocabulary(); // Debug // Read texts while(text){ //printf("\n\%s\n", text->sText); // Debug // Convert tokens to a numerical vector token = text->pToken; features = new BowVector(); int token_id; while(token){ //printf(">>%s", token->sLemma); // Debug //if(skip_token(token->sLemma)) cout << " (-)" << endl; // Debug if(token->sLemma && !skip_token(token->sLemma)) { string lemma(token->sLemma); if(!_model_is_loaded){ // Add the token whether it is in vocabulary or not token_id = _vocabulary->get_word_id(lemma); features->increment_word(token_id); _vocabulary->increment_word(lemma); } else if(_vocabulary->exists(lemma)){ // Add token if it is in the vocabulary //cout << " (@)" << endl ; //Debug token_id = _vocabulary->get_word_id_ifexists(lemma); features->increment_word(token_id); } else if(exp_params->max_expansions > 0){ // Try to add related words list<long> projected_terms = _expander->get_projected_terms(lemma, exp_params->max_expansions); int i; const int MAX_PROJ_NUM = 3; list<long>::iterator it; for(it = projected_terms.begin(), i = 0; it != projected_terms.end() && i < MAX_PROJ_NUM; it++, i++) { features->increment_word(*it); } // Debug /* cout << " ("<< projected_terms.size() << ")"; if(projected_terms.size() > 0){ cout << " = "; int i; const int MAX_PROJ_NUM = 3; list<long>::iterator it; for(it = projected_terms.begin(), i = 0; it != projected_terms.end() && i < MAX_PROJ_NUM; it++, i++) { //cout << *it << ":" << relation_voc.get_word_by_id_debug(*it) << "; "; cout << relation_voc.get_word_by_id_debug(*it) << "; "; } } cout << endl; */ } //else{ // Debug // cout << "(0)" << endl; //Debug //} // Debug } token = token->pNext; } // Expand the feature vector switch (exp_params->type) { case NO_EXP: // No expansion break; case FIRST_ORDER_EXP: features = _expander->enrich_vector(features, exp_params->max_expansions, exp_params->expansion_weight, false); break; case SECOND_ORDER_EXP: features = _expander->enrich_vector(features, exp_params->max_expansions, exp_params->expansion_weight, true); break; case RECURSIVE_EXP: features = _expander->enrich_vector_recursive(features, exp_params->max_expansions, exp_params->expansion_weight); break; default : // No expansion break; } // Save vector to the file bool has_features = features->size() >= MIN_FEATURES_NUM; if(has_features || _model_is_loaded){ has_feature_list.push_back(has_features); fprintf(output_file, "%d %s\n", text->cClass, features->to_string(is_unit_length).c_str()); count++; } // Next text text = text->pNext; delete features; } // Save the files if(!_model_is_loaded) { _vocabulary->save(_vocabulary_file); delete _vocabulary; } fclose(output_file); //string s1(output_file); s1 = s1 + ".reconstr";reconstruct(output_file, s1.c_str() , s.c_str()); printf("\nfeatures were extracted from %d texts.\n", count); return has_feature_list; }