void ArffData::_cross_check_instance(ArffInstance* inst) { if(inst == NULL) { THROW("ArffData: input instance pointer is null!"); } if(inst->size() != m_num_attrs) { THROW("%s: instance size and num-attrs mismatch inst=%d attrs=%d", "ArffData", inst->size(), m_num_attrs); } for(int32 i=0;i<m_num_attrs;++i) { ArffValue* val = inst->get(i); ArffAttr* attr = m_attrs[i]; ArffValueEnum valType = val->type(); ArffValueEnum attType = attr->type(); bool a_is_num = (attr->type() == NUMERIC); bool a_is_nom = (attr->type() == NOMINAL); bool v_nan = ((valType != INTEGER) && (valType != FLOAT) && (valType != NUMERIC)); bool v_nas = (valType != STRING); // bad numeric/nominal if((a_is_num && v_nan) || (a_is_nom && v_nas)) { THROW("%s: attr-name=%s attr-type=%s, but inst-type=%s!", "ArffData", attr->name().c_str(), arff_value2str(attType).c_str(), arff_value2str(valType).c_str()); } // bad nominal value if(a_is_nom) { ArffNominal nom = get_nominal(attr->name()); ArffNominal::iterator itr; std::string str = (std::string)*val; for(itr=nom.begin();itr!=nom.end();++itr) { if(str == *itr) { break; } } if(itr == nom.end()) { THROW("%s: attr:(name=%s type=%s) inst-val=%s not found!", "ArffData", attr->name().c_str(), arff_value2str(attType).c_str(), str.c_str()); } } if(a_is_num || a_is_nom) { continue; } // data mismatch if(attType != valType) { THROW("%s: attr-name=%s attr-type=%s, but inst-type=%s!", "ArffData", attr->name().c_str(), arff_value2str(attType).c_str(), arff_value2str(valType).c_str()); } ///@todo: Check for date-format mismatch } }
int arff_main(int argc, char *argv[]) { //ArffParser parser("./data/file_format/case1.arff"); // error occurred because of incorrect file format. //ArffParser parser("./data/file_format/case2.arff"); // error occurred because of incorrect file format. //ArffParser parser("./data/file_format/case3.arff"); ArffParser parser("./data/file_format/case4.arff"); ArffData *data = parser.parse(); // relation => @RELATION. const std::string relationName = data->get_relation_name(); std::cout << "relation name = " << relationName << std::endl; // attributes => @ATTRIBUTE. { const int32 numAttr = data->num_attributes(); std::cout << "the number of attributes = " << numAttr << std::endl; for (int32 i = 0; i < numAttr; ++i) { const ArffAttr *attr = data->get_attr(i); const std::string name = attr->name(); const ArffValueEnum type = attr->type(); std::cout << "\tattribute #" << i << " : name = " << name << ", type = " << arff_value2str(type) << std::endl; if (NOMINAL == type) { const ArffNominal nominal = data->get_nominal(name); std::cout << "\t\t"; for (ArffNominal::const_iterator cit = nominal.begin(); cit != nominal.end(); ++cit) std::cout << *cit << ", "; std::cout << std::endl; } } } // instance => @DATA. { const int32 numInst = data->num_instances(); std::cout << "the number of instances = " << numInst << std::endl; for (int32 i = 0; i < numInst; ++i) { const ArffInstance *inst = data->get_instance(i); std::cout << '\t'; for (int32 k = 0; k < inst->size(); ++k) { const ArffValue *value = inst->get(k); if (value->missing()) std::cout << '?'; else { switch (value->type()) { case INTEGER: std::cout << int32(*value); break; case FLOAT: std::cout << float(*value); break; case DATE: std::cout << std::string(*value); // FIXME [check] >> is it right? break; case STRING: std::cout << std::string(*value); break; case NUMERIC: std::cout << std::string(*value); // FIXME [check] >> is it right? break; case NOMINAL: std::cout << std::string(*value); // FIXME [check] >> is it right? break; case UNKNOWN_VAL: default: std::cout << '#'; break; } } std::cout << ','; } std::cout << std::endl; } } //delete data; return 0; }