Beispiel #1
0
static void
parse_line(
    classifier_type& inst,
    const feature_generator& fgen,
    std::string& label,
    bool& truth,
    const option& opt,
    const std::string& line,
    int lines = 0
    )
{
    double value;
    std::string name;

    // Split the line with tab characters.
    tokenizer values(line, opt.token_separator);
    tokenizer::iterator itv = values.begin();
    if (itv == values.end()) {
        throw invalid_data("no field found in the line", line, lines);
    }

    // Make sure that the first token (class) is not empty.
    if (itv->empty()) {
        throw invalid_data("an empty label found", line, lines);
    }

    // Set the truth value for this candidate.
    if (itv->compare(0, 1, "+") == 0) {
        truth = true;
    } else if (itv->compare(0, 1, "-") == 0) {
        truth = false;
    } else {
        throw invalid_data("a class label must begins with '+' or '-'", line, lines);
    }

    label = itv->substr(1);

    // Create a new candidate.
    int i = inst.size();
    inst.resize(i+1);

    // Set featuress for the instance.
    for (++itv;itv != values.end();++itv) {
        if (!itv->empty()) {
            get_name_value(*itv, name, value, opt.value_separator);
            inst.set(i, fgen, name, 0, value);
        }
    }
}
Beispiel #2
0
int candidate_tag(option& opt, std::ifstream& ifs)
{
    int rl = -1;
    int lines = 0;
    feature_generator fgen;
    std::istream& is = opt.is;
    std::ostream& os = opt.os;
    bool inner = false;
    std::string comment_outer, comment_inner;
    comments_type comments;

    // Load a model.
    model_type model;
    read_model(model, ifs, opt);

    // Create an instance of a classifier on the model.
    classifier_type inst(model);
    labels_type labels;

    // Objects for performance evaluation.
    classias::accuracy acc;
    classias::precall pr(labels.size());

    for (;;) {
        // Read a line.
        std::string line;
        std::getline(is, line);
        if (is.eof()) {
            break;
        }
        ++lines;

        // An empty line or comment line.
        if (line.empty() || line.compare(0, 1, "#") == 0) {
            if (opt.output & option::OUTPUT_COMMENT) {
                if (0 < inst.size()) {
                    // Store the comment line to the current instance.
                    comments[inst.size()-1] += line;
                    comments[inst.size()-1] += '\n';
                } else if (inner) {
                    comment_inner += line;
                    comment_inner += '\n';
                } else {
                    comment_outer += line;
                    comment_outer += '\n';
                }
            }
            continue;
        }

        if (line.compare(0, 4, "@boi") == 0) {
            // Begin of an instance.
            rl = -1;
            inst.clear();
            labels.clear();
            comments.clear();
            inner = true;

        } else if (line == "@eoi") {
            inst.finalize();

            // Determine whether we output this instance or not.
            if (opt.condition == option::CONDITION_ALL ||
                (opt.condition == option::CONDITION_FALSE && rl != inst.argmax())) {

                // Output BOI.
                os << comment_outer;
                os << "@boi" << std::endl;
                os << comment_inner;

                if (opt.output & option::OUTPUT_ALL) {
                    for (int i = 0;i < inst.size();++i) {
                        // Output the reference label.
                        if (opt.output & option::OUTPUT_RLABEL) {
                            os << ((i == rl) ? '+' : '-');
                        }
                        // Output the predicted label.
                        os << ((i == inst.argmax()) ? '+' : '-');
                        os << labels[i];

                        // Output the score/probability if necessary.
                        if (opt.output & option::OUTPUT_PROBABILITY) {
                            os << opt.value_separator << inst.prob(i);
                        } else if (opt.output & option::OUTPUT_SCORE) {
                            os << opt.value_separator << inst.score(i);
                        }

                        os << std::endl;
                        os << comments[i];
                    }

                } else {
                    // Output the reference label.
                    if (opt.output & option::OUTPUT_RLABEL) {
                        os << labels[rl] << opt.token_separator;
                    }
                    // Output the predicted label.
                    os << labels[inst.argmax()];

                    // Output the score/probability if necessary.
                    if (opt.output & option::OUTPUT_PROBABILITY) {
                        os << opt.value_separator << inst.prob(inst.argmax());
                    } else if (opt.output & option::OUTPUT_SCORE) {
                        os << opt.value_separator << inst.score(inst.argmax());
                    }

                    os << std::endl;

                }

                // Output EOI.
                os << "@eoi" << std::endl;
            }

            // Accumulate the performance.
            if (opt.test) {
                acc.set(inst.argmax() == rl);
            }

            rl = -1;
            inst.clear();
            labels.clear();
            comments.clear();
            comment_inner.clear();
            comment_outer.clear();
            inner = false;

        } else {
            std::string label;
            bool truth = false;
            parse_line(inst, fgen, label, truth, opt, line, lines);
            if (truth) {
                rl = inst.size() - 1;
            }
            labels.push_back(label);
            if (labels.size() != inst.size()) {
                throw invalid_data("", line, lines);
            }
            if ((int)comments.size() < inst.size()) {
                comments.resize(inst.size());
            }
        }
    }

    // Output the performance if necessary.
    if (opt.test) {
        acc.output(os);
    }

    return 0;
}
bool STParsedJSON::parse (std::string const& json_name,
    Json::Value const& json, SField::ref inName, int depth,
        std::unique_ptr <STObject>& sub_object)
{
    if (! json.isObject ())
    {
        error = not_an_object (json_name);
        return false;
    }

    SField::ptr name (&inName);

    boost::ptr_vector<SerializedType> data;
    Json::Value::Members members (json.getMemberNames ());

    for (Json::Value::Members::iterator it (members.begin ());
        it != members.end (); ++it)
    {
        std::string const& fieldName = *it;
        Json::Value const& value = json [fieldName];

        SField::ref field = SField::getField (fieldName);

        if (field == sfInvalid)
        {
            error = unknown_field (json_name, fieldName);
            return false;
        }

        switch (field.fieldType)
        {
        case STI_UINT8:
            try
            {
                if (value.isString ())
                {
                    // VFALCO TODO wtf?
                }
                else if (value.isInt ())
                {
                    if (value.asInt () < 0 || value.asInt () > 255)
                    {
                        error = out_of_range (json_name, fieldName);
                        return false;
                    }

                    data.push_back (new STUInt8 (field,
                        range_check_cast <unsigned char> (
                            value.asInt (), 0, 255)));
                }
                else if (value.isUInt ())
                {
                    if (value.asUInt () > 255)
                    {
                        error = out_of_range (json_name, fieldName);
                        return false;
                    }

                    data.push_back (new STUInt8 (field,
                        range_check_cast <unsigned char> (
                            value.asUInt (), 0, 255)));
                }
                else
                {
                    error = bad_type (json_name, fieldName);
                    return false;
                }
            }
            catch (...)
            {
                error = invalid_data (json_name, fieldName);
                return false;
            }

            break;

        case STI_UINT16:
            try
            {
                if (value.isString ())
                {
                    std::string strValue = value.asString ();

                    if (! strValue.empty () &&
                        ((strValue[0] < '0') || (strValue[0] > '9')))
                    {
                        if (field == sfTransactionType)
                        {
                            TxType const txType (TxFormats::getInstance()->
                                findTypeByName (strValue));

                            data.push_back (new STUInt16 (field,
                                static_cast <std::uint16_t> (txType)));

                            if (*name == sfGeneric)
                                name = &sfTransaction;
                        }
                        else if (field == sfLedgerEntryType)
                        {
                            LedgerEntryType const type (LedgerFormats::getInstance()->
                                findTypeByName (strValue));

                            data.push_back (new STUInt16 (field,
                                static_cast <std::uint16_t> (type)));

                            if (*name == sfGeneric)
                                name = &sfLedgerEntry;
                        }
                        else
                        {
                            error = invalid_data (json_name, fieldName);
                            return false;
                        }
                    }
                    else
                    {
                        data.push_back (new STUInt16 (field,
                            beast::lexicalCastThrow <std::uint16_t> (strValue)));
                    }
                }
                else if (value.isInt ())
                {
                    data.push_back (new STUInt16 (field,
                        range_check_cast <std::uint16_t> (
                            value.asInt (), 0, 65535)));
                }
                else if (value.isUInt ())
                {
                    data.push_back (new STUInt16 (field,
                        range_check_cast <std::uint16_t> (
                            value.asUInt (), 0, 65535)));
                }
                else
                {
                    error = bad_type (json_name, fieldName);
                    return false;
                }
            }
            catch (...)
            {
                error = invalid_data (json_name, fieldName);
                return false;
            }

            break;

        case STI_UINT32:
            try
            {
                if (value.isString ())
                {
                    data.push_back (new STUInt32 (field,
                        beast::lexicalCastThrow <std::uint32_t> (value.asString ())));
                }
                else if (value.isInt ())
                {
                    data.push_back (new STUInt32 (field,
                        range_check_cast <std::uint32_t> (value.asInt (), 0u, 4294967295u)));
                }
                else if (value.isUInt ())
                {
                    data.push_back (new STUInt32 (field,
                        static_cast <std::uint32_t> (value.asUInt ())));
                }
                else
                {
                    error = bad_type (json_name, fieldName);
                    return false;
                }
            }
            catch (...)
            {
                error = invalid_data (json_name, fieldName);
                return false;
            }

            break;

        case STI_UINT64:
            try
            {
                if (value.isString ())
                {
                    data.push_back (new STUInt64 (field,
                        uintFromHex (value.asString ())));
                }
                else if (value.isInt ())
                {
                    data.push_back (new STUInt64 (field,
                        range_check_cast<std::uint64_t> (
                            value.asInt (), 0, 18446744073709551615ull)));
                }
                else if (value.isUInt ())
                {
                    data.push_back (new STUInt64 (field,
                        static_cast <std::uint64_t> (value.asUInt ())));
                }
                else
                {
                    error = bad_type (json_name, fieldName);
                    return false;
                }
            }
            catch (...)
            {
                error = invalid_data (json_name, fieldName);
                return false;
            }

            break;

        case STI_HASH128:
            try
            {
                if (value.isString ())
                {
                    data.push_back (new STHash128 (field, value.asString ()));
                }
                else
                {
                    error = bad_type (json_name, fieldName);
                    return false;
                }
            }
            catch (...)
            {
                error = invalid_data (json_name, fieldName);
                return false;
            }

            break;

        case STI_HASH160:
            try
            {
                if (value.isString ())
                {
                    data.push_back (new STHash160 (field, value.asString ()));
                }
                else
                {
                    error = bad_type (json_name, fieldName);
                    return false;
                }
            }
            catch (...)
            {
                error = invalid_data (json_name, fieldName);
                return false;
            }

            break;

        case STI_HASH256:
            try
            {
                if (value.isString ())
                {
                    data.push_back (new STHash256 (field, value.asString ()));
                }
                else
                {
                    error = bad_type (json_name, fieldName);
                    return false;
                }
            }
            catch (...)
            {
                error = invalid_data (json_name, fieldName);
                return false;
            }

            break;

        case STI_VL:
            if (! value.isString ())
            {
                error = bad_type (json_name, fieldName);
                return false;
            }

            try
            {
                std::pair<Blob, bool> ret(strUnHex (value.asString ()));

                if (!ret.second)
                    throw std::invalid_argument ("invalid data");

                data.push_back (new STVariableLength (field, ret.first));
            }
            catch (...)
            {
                error = invalid_data (json_name, fieldName);
                return false;
            }

            break;

        case STI_AMOUNT:
            try
            {
                data.push_back (new STAmount (field, value));
            }
            catch (...)
            {
                error = invalid_data (json_name, fieldName);
                return false;
            }

            break;

        case STI_VECTOR256:
            if (! value.isArray ())
            {
                error = array_expected (json_name, fieldName);
                return false;
            }
            
            try
            {
                data.push_back (new STVector256 (field));
                STVector256* tail (dynamic_cast <STVector256*> (&data.back ()));
                assert (tail);

                for (Json::UInt i = 0; !json.isValidIndex (i); ++i)
                {
                    uint256 s;
                    s.SetHex (json[i].asString ());
                    tail->addValue (s);
                }
            }
            catch (...)
            {
                error = invalid_data (json_name, fieldName);
                return false;
            }

            break;

        case STI_PATHSET:
            if (!value.isArray ())
            {
                error = array_expected (json_name, fieldName);
                return false;
            }

            try
            {
                data.push_back (new STPathSet (field));
                STPathSet* tail = dynamic_cast <STPathSet*> (&data.back ());
                assert (tail);

                for (Json::UInt i = 0; value.isValidIndex (i); ++i)
                {
                    STPath p;

                    if (!value[i].isArray ())
                    {
                        std::stringstream ss;
                        ss << fieldName << "[" << i << "]";
                        error = array_expected (json_name, ss.str ());
                        return false;
                    }

                    for (Json::UInt j = 0; value[i].isValidIndex (j); ++j)
                    {
                        std::stringstream ss;
                        ss << fieldName << "[" << i << "][" << j << "]";
                        std::string const element_name (
                            json_name + "." + ss.str());

                        // each element in this path has some combination of account,
                        // currency, or issuer

                        Json::Value pathEl = value[i][j];

                        if (!pathEl.isObject ())
                        {
                            error = not_an_object (element_name);
                            return false;
                        }

                        const Json::Value& account  = pathEl["account"];
                        const Json::Value& currency = pathEl["currency"];
                        const Json::Value& issuer   = pathEl["issuer"];
                        bool hasCurrency            = false;
                        uint160 uAccount, uCurrency, uIssuer;

                        if (! account.isNull ())
                        {
                            // human account id
                            if (! account.isString ())
                            {
                                error = string_expected (element_name, "account");
                                return false;
                            }

                            std::string const strValue (account.asString ());

                            if (value.size () == 40) // 160-bit hex account value
                                uAccount.SetHex (strValue);

                            {
                                RippleAddress a;

                                if (! a.setAccountID (strValue))
                                {
                                    error = invalid_data (element_name, "account");
                                    return false;
                                }

                                uAccount = a.getAccountID ();
                            }
                        }

                        if (!currency.isNull ())
                        {
                            // human currency
                            if (!currency.isString ())
                            {
                                error = string_expected (element_name, "currency");
                                return false;
                            }

                            hasCurrency = true;

                            if (currency.asString ().size () == 40)
                            {
                                uCurrency.SetHex (currency.asString ());
                            }
                            else if (!STAmount::currencyFromString (
                                uCurrency, currency.asString ()))
                            {
                                error = invalid_data (element_name, "currency");
                                return false;
                            }
                        }

                        if (!issuer.isNull ())
                        {
                            // human account id
                            if (!issuer.isString ())
                            {
                                error = string_expected (element_name, "issuer");
                                return false;
                            }

                            if (issuer.asString ().size () == 40)
                            {
                                uIssuer.SetHex (issuer.asString ());
                            }
                            else
                            {
                                RippleAddress a;

                                if (!a.setAccountID (issuer.asString ()))
                                {
                                    error = invalid_data (element_name, "issuer");
                                    return false;
                                }

                                uIssuer = a.getAccountID ();
                            }
                        }

                        p.addElement (STPathElement (uAccount, uCurrency, uIssuer, hasCurrency));
                    }

                    tail->addPath (p);
                }
            }
            catch (...)
            {
                error = invalid_data (json_name, fieldName);
                return false;
            }

            break;

        case STI_ACCOUNT:
        {
            if (! value.isString ())
            {
                error = bad_type (json_name, fieldName);
                return false;
            }

            std::string strValue = value.asString ();

            try
            {
                if (value.size () == 40) // 160-bit hex account value
                {
                    uint160 v;
                    v.SetHex (strValue);
                    data.push_back (new STAccount (field, v));
                }
                else
                {
                    // ripple address
                    RippleAddress a;

                    if (!a.setAccountID (strValue))
                    {
                        error = invalid_data (json_name, fieldName);
                        return false;
                    }

                    data.push_back (new STAccount (field, a.getAccountID ()));
                }
            }
            catch (...)
            {
                error = invalid_data (json_name, fieldName);
                return false;
            }
        }
        break;

        case STI_OBJECT:
        case STI_TRANSACTION:
        case STI_LEDGERENTRY:
        case STI_VALIDATION:
            if (! value.isObject ())
            {
                error = not_an_object (json_name, fieldName);
                return false;
            }

            if (depth > 64)
            {
                error = too_deep (json_name, fieldName);
                return false;
            }

            try
            {
                std::unique_ptr <STObject> sub_object_;
                bool const success (parse (json_name + "." + fieldName,
                    value, field, depth + 1, sub_object_));
                if (! success)
                    return false;
                data.push_back (sub_object_.release ());
            }
            catch (...)
            {
                error = invalid_data (json_name, fieldName);
                return false;
            }

            break;

        case STI_ARRAY:
            if (! value.isArray ())
            {
                error = array_expected (json_name, fieldName);
                return false;
            }

            try
            {
                data.push_back (new STArray (field));
                STArray* tail = dynamic_cast<STArray*> (&data.back ());
                assert (tail);

                for (Json::UInt i = 0; value.isValidIndex (i); ++i)
                {
                    bool const isObject (value[i].isObject());
                    bool const singleKey (isObject
                        ? value [i].size() == 1
                        : true);

                    if (!isObject || !singleKey)
                    {
                        std::stringstream ss;
                        ss << json_name << "." << fieldName << "[" << i << "]";
                        error = singleton_expected (ss.str ());
                        return false;
                    }

                    // TODO: There doesn't seem to be a nice way to get just the
                    // first/only key in an object without copying all keys into
                    // a vector
                    std::string const objectName (value[i].getMemberNames()[0]);;
                    SField::ref       nameField (SField::getField(objectName));

                    if (nameField == sfInvalid)
                    {
                        error = unknown_field (json_name, objectName);
                        return false;
                    }

                    Json::Value const objectFields (value[i][objectName]);

                    std::unique_ptr <STObject> sub_object_;
                    {
                        std::stringstream ss;
                        ss << json_name << "." << fieldName <<
                            "[" << i << "]." << objectName;
                        bool const success (parse (ss.str (), objectFields,
                            nameField, depth + 1, sub_object_));
                        if (! success)
                            return false;
                    }
                    tail->push_back (*sub_object_);
                }
            }
            catch (...)
            {
                error = invalid_data (json_name, fieldName);
                return false;
            }

            break;

        default:
            error = bad_type (json_name, fieldName);
            return false;
        }
    }

    sub_object.reset (new STObject (*name, data));
    return true;
}