Esempio n. 1
0
    proof *mk_lemma_core(proof *pf, expr *fact)
    {
        ptr_buffer<expr> args;
        expr_ref lemma(m);

        if (m.is_or(fact)) {
            for (unsigned i = 0, sz = to_app(fact)->get_num_args(); i < sz; ++i) {
                expr *a = to_app(fact)->get_arg(i);
                if (!is_reduced(a))
                { args.push_back(a); }
            }
        } else if (!is_reduced(fact))
        { args.push_back(fact); }


        if (args.size() == 0) { return pf; }
        else if (args.size() == 1) {
            lemma = args.get(0);
        } else {
            lemma = m.mk_or(args.size(), args.c_ptr());
        }
        proof* res = m.mk_lemma(pf, lemma);
        m_pinned.push_back(res);

        if (m_hyps.contains(lemma))
        { m_units.insert(lemma, res); }
        return res;
    }
Esempio n. 2
0
bool TrainDataParser::parse(const Utf8File::Data &fileData)
{
	const wchar_t Delimeter = u',';

	int lineNo = 0;
	for (auto line : *fileData) {
		++lineNo;
		if (lineNo % 10000 == 0)
			std::cout << "### Line no: " << lineNo << std::endl; // ###

		if (line.empty())
			continue;

		std::wstring::iterator delimIt = std::find(line.begin(), line.end(), Delimeter);
		if (delimIt == line.end()) {
			std::cerr << "Delimeter not found: line #" << lineNo << std::endl;
			continue;
		}

		std::wstring word(line.begin(), delimIt);
		std::wstring lemma(delimIt + 1, line.end());

		bool badLine = std::find(lemma.begin(), lemma.end(), Delimeter) != lemma.end();
		if (word.empty() || lemma.empty() || badLine) {
			std::cerr << "Too many delimeters: line #" << lineNo << std::endl;
			continue;
		}

		m_wordNet->addString(word, lemma);
	}

	return true;
}
Esempio n. 3
0
/**
 * Transforms a list of texts into a vectors saved in output_fpath in SVMLIB format,
 * according to the vocabulary of the current classifier.
 * */
list<bool> Classifier::text2vector(TextToClassify* text, string output_fpath,
		TextExpanderParams* exp_params, bool is_unit_length) {
	// Initialization
	list<bool> has_feature_list;
	FILE* output_file = fopen(output_fpath.c_str(), "w");
	if(!_model_is_loaded) _vocabulary = new FreqVocabulary();
	int count = 0;
	TokenToClassify* token;
	BowVector* features;
	//FreqVocabulary relation_voc = _expander->get_vocabulary(); // Debug
	// Read texts
	while(text){
		//printf("\n\%s\n", text->sText); // Debug
		// Convert tokens to a numerical vector
		token = text->pToken;
		features = new BowVector();
		int token_id;

		while(token){
			//printf(">>%s", token->sLemma); // Debug
			//if(skip_token(token->sLemma)) cout << " (-)" << endl; // Debug
			if(token->sLemma && !skip_token(token->sLemma)) {
				string lemma(token->sLemma);
				if(!_model_is_loaded){
					// Add the token whether it is in vocabulary or not
					token_id = _vocabulary->get_word_id(lemma);
					features->increment_word(token_id);
					_vocabulary->increment_word(lemma);
				}
				else if(_vocabulary->exists(lemma)){
					// Add token if it is in the vocabulary
					//cout << " (@)" << endl ; //Debug
					token_id = _vocabulary->get_word_id_ifexists(lemma);
					features->increment_word(token_id);
				}
				else if(exp_params->max_expansions > 0){
					// Try to add related words
					list<long> projected_terms = _expander->get_projected_terms(lemma,  exp_params->max_expansions);
					int i;
					const int MAX_PROJ_NUM = 3;
					list<long>::iterator it;
					for(it = projected_terms.begin(), i = 0; it != projected_terms.end() && i < MAX_PROJ_NUM; it++, i++) {
						features->increment_word(*it);
					}

					// Debug
					/*
					cout << " ("<< projected_terms.size() << ")";
					if(projected_terms.size() > 0){
						cout << " = ";
						int i;
						const int MAX_PROJ_NUM = 3;
						list<long>::iterator it;
						for(it = projected_terms.begin(), i = 0; it != projected_terms.end() && i < MAX_PROJ_NUM; it++, i++) {
							//cout << *it << ":" << relation_voc.get_word_by_id_debug(*it) << "; ";
							cout << relation_voc.get_word_by_id_debug(*it) << "; ";
						}
					}
					cout << endl;
					*/
				}
				//else{ // Debug
				//	cout << "(0)" << endl; //Debug
				//} // Debug
			}
			token = token->pNext;
		}

		// Expand the feature vector
		switch (exp_params->type) {
		case NO_EXP:
			// No expansion
			break;
		case FIRST_ORDER_EXP:
			features = _expander->enrich_vector(features, exp_params->max_expansions, exp_params->expansion_weight, false);
			break;
		case SECOND_ORDER_EXP:
			features = _expander->enrich_vector(features, exp_params->max_expansions, exp_params->expansion_weight, true);
		  	break;
		case RECURSIVE_EXP:
			features = _expander->enrich_vector_recursive(features, exp_params->max_expansions, exp_params->expansion_weight);
			break;
		default :
			// No expansion
			break;
		}

		// Save vector to the file
		bool has_features = features->size() >= MIN_FEATURES_NUM;
		if(has_features || _model_is_loaded){
			has_feature_list.push_back(has_features);
			fprintf(output_file, "%d %s\n", text->cClass, features->to_string(is_unit_length).c_str());
			count++;
		}

		// Next text
		text = text->pNext;
		delete features;
	}

	// Save the files
	if(!_model_is_loaded) {
		_vocabulary->save(_vocabulary_file);
		delete _vocabulary;
	}
	fclose(output_file);
	//string s1(output_file); s1 = s1 + ".reconstr";reconstruct(output_file, s1.c_str() , s.c_str());
	printf("\nfeatures were extracted from %d texts.\n", count);
	return has_feature_list;
}