示例#1
0
void ArffData::_cross_check_instance(ArffInstance* inst) {
    if(inst == NULL) {
        THROW("ArffData: input instance pointer is null!");
    }
    if(inst->size() != m_num_attrs) {
        THROW("%s: instance size and num-attrs mismatch inst=%d attrs=%d",
              "ArffData", inst->size(), m_num_attrs);
    }
    for(int32 i=0;i<m_num_attrs;++i) {
        ArffValue* val = inst->get(i);
        ArffAttr* attr = m_attrs[i];
        ArffValueEnum valType = val->type();
        ArffValueEnum attType = attr->type();
        bool a_is_num = (attr->type() == NUMERIC);
        bool a_is_nom = (attr->type() == NOMINAL);
        bool v_nan = ((valType != INTEGER) && (valType != FLOAT) &&
                      (valType != NUMERIC));
        bool v_nas = (valType != STRING);
        // bad numeric/nominal
        if((a_is_num && v_nan) || (a_is_nom && v_nas)) {
            THROW("%s: attr-name=%s attr-type=%s, but inst-type=%s!",
                  "ArffData", attr->name().c_str(),
                  arff_value2str(attType).c_str(),
                  arff_value2str(valType).c_str());
        }
        // bad nominal value
        if(a_is_nom) {
            ArffNominal nom = get_nominal(attr->name());
            ArffNominal::iterator itr;
            std::string str = (std::string)*val;
            for(itr=nom.begin();itr!=nom.end();++itr) {
                if(str == *itr) {
                    break;
                }
            }
            if(itr == nom.end()) {
                THROW("%s: attr:(name=%s type=%s) inst-val=%s not found!",
                      "ArffData", attr->name().c_str(),
                      arff_value2str(attType).c_str(), str.c_str());
            }
        }
        if(a_is_num || a_is_nom) {
            continue;
        }
        // data mismatch
        if(attType != valType) {
            THROW("%s: attr-name=%s attr-type=%s, but inst-type=%s!",
                  "ArffData", attr->name().c_str(),
                  arff_value2str(attType).c_str(),
                  arff_value2str(valType).c_str());
        }
        ///@todo: Check for date-format mismatch
    }
}
int arff_main(int argc, char *argv[])
{
	//ArffParser parser("./data/file_format/case1.arff");  // error occurred because of incorrect file format.
	//ArffParser parser("./data/file_format/case2.arff");  // error occurred because of incorrect file format.
	//ArffParser parser("./data/file_format/case3.arff");
	ArffParser parser("./data/file_format/case4.arff");

	ArffData *data = parser.parse();

    // relation => @RELATION.
	const std::string relationName = data->get_relation_name();
	std::cout << "relation name = " << relationName << std::endl;

    // attributes => @ATTRIBUTE.
    {
        const int32 numAttr = data->num_attributes();
        std::cout << "the number of attributes = " << numAttr << std::endl;

        for (int32 i = 0; i < numAttr; ++i)
        {
            const ArffAttr *attr = data->get_attr(i);
            const std::string name = attr->name();
            const ArffValueEnum type = attr->type();

            std::cout << "\tattribute #" << i << " : name = " << name << ", type = " << arff_value2str(type) << std::endl;

            if (NOMINAL == type)
            {
                const ArffNominal nominal = data->get_nominal(name);
                std::cout << "\t\t";
                for (ArffNominal::const_iterator cit = nominal.begin(); cit != nominal.end(); ++cit)
                    std::cout << *cit << ", ";
                std::cout << std::endl;
            }
        }
    }

    // instance => @DATA.
    {
        const int32 numInst = data->num_instances();
        std::cout << "the number of instances = " << numInst << std::endl;

        for (int32 i = 0; i < numInst; ++i)
        {
            const ArffInstance *inst = data->get_instance(i);

            std::cout << '\t';
            for (int32 k = 0; k < inst->size(); ++k)
            {
                const ArffValue *value = inst->get(k);

                if (value->missing())
                    std::cout << '?';
                else
                {
                    switch (value->type())
                    {
                    case INTEGER:
                        std::cout << int32(*value);
                        break;
                    case FLOAT:
                        std::cout << float(*value);
                        break;
                    case DATE:
                        std::cout << std::string(*value);  // FIXME [check] >> is it right?
                        break;
                    case STRING:
                        std::cout << std::string(*value);
                       break;
                    case NUMERIC:
                        std::cout << std::string(*value);  // FIXME [check] >> is it right?
                        break;
                    case NOMINAL:
                        std::cout << std::string(*value);  // FIXME [check] >> is it right?
                        break;
                    case UNKNOWN_VAL:
                    default:
                        std::cout << '#';
                        break;
                    }
                }
                std::cout << ',';
            }
            std::cout << std::endl;
        }
    }

    //delete data;

	return 0;
}