static void parse_line( classifier_type& inst, const feature_generator& fgen, std::string& label, bool& truth, const option& opt, const std::string& line, int lines = 0 ) { double value; std::string name; // Split the line with tab characters. tokenizer values(line, opt.token_separator); tokenizer::iterator itv = values.begin(); if (itv == values.end()) { throw invalid_data("no field found in the line", line, lines); } // Make sure that the first token (class) is not empty. if (itv->empty()) { throw invalid_data("an empty label found", line, lines); } // Set the truth value for this candidate. if (itv->compare(0, 1, "+") == 0) { truth = true; } else if (itv->compare(0, 1, "-") == 0) { truth = false; } else { throw invalid_data("a class label must begins with '+' or '-'", line, lines); } label = itv->substr(1); // Create a new candidate. int i = inst.size(); inst.resize(i+1); // Set featuress for the instance. for (++itv;itv != values.end();++itv) { if (!itv->empty()) { get_name_value(*itv, name, value, opt.value_separator); inst.set(i, fgen, name, 0, value); } } }
int candidate_tag(option& opt, std::ifstream& ifs) { int rl = -1; int lines = 0; feature_generator fgen; std::istream& is = opt.is; std::ostream& os = opt.os; bool inner = false; std::string comment_outer, comment_inner; comments_type comments; // Load a model. model_type model; read_model(model, ifs, opt); // Create an instance of a classifier on the model. classifier_type inst(model); labels_type labels; // Objects for performance evaluation. classias::accuracy acc; classias::precall pr(labels.size()); for (;;) { // Read a line. std::string line; std::getline(is, line); if (is.eof()) { break; } ++lines; // An empty line or comment line. if (line.empty() || line.compare(0, 1, "#") == 0) { if (opt.output & option::OUTPUT_COMMENT) { if (0 < inst.size()) { // Store the comment line to the current instance. comments[inst.size()-1] += line; comments[inst.size()-1] += '\n'; } else if (inner) { comment_inner += line; comment_inner += '\n'; } else { comment_outer += line; comment_outer += '\n'; } } continue; } if (line.compare(0, 4, "@boi") == 0) { // Begin of an instance. rl = -1; inst.clear(); labels.clear(); comments.clear(); inner = true; } else if (line == "@eoi") { inst.finalize(); // Determine whether we output this instance or not. if (opt.condition == option::CONDITION_ALL || (opt.condition == option::CONDITION_FALSE && rl != inst.argmax())) { // Output BOI. os << comment_outer; os << "@boi" << std::endl; os << comment_inner; if (opt.output & option::OUTPUT_ALL) { for (int i = 0;i < inst.size();++i) { // Output the reference label. if (opt.output & option::OUTPUT_RLABEL) { os << ((i == rl) ? '+' : '-'); } // Output the predicted label. os << ((i == inst.argmax()) ? '+' : '-'); os << labels[i]; // Output the score/probability if necessary. if (opt.output & option::OUTPUT_PROBABILITY) { os << opt.value_separator << inst.prob(i); } else if (opt.output & option::OUTPUT_SCORE) { os << opt.value_separator << inst.score(i); } os << std::endl; os << comments[i]; } } else { // Output the reference label. if (opt.output & option::OUTPUT_RLABEL) { os << labels[rl] << opt.token_separator; } // Output the predicted label. os << labels[inst.argmax()]; // Output the score/probability if necessary. if (opt.output & option::OUTPUT_PROBABILITY) { os << opt.value_separator << inst.prob(inst.argmax()); } else if (opt.output & option::OUTPUT_SCORE) { os << opt.value_separator << inst.score(inst.argmax()); } os << std::endl; } // Output EOI. os << "@eoi" << std::endl; } // Accumulate the performance. if (opt.test) { acc.set(inst.argmax() == rl); } rl = -1; inst.clear(); labels.clear(); comments.clear(); comment_inner.clear(); comment_outer.clear(); inner = false; } else { std::string label; bool truth = false; parse_line(inst, fgen, label, truth, opt, line, lines); if (truth) { rl = inst.size() - 1; } labels.push_back(label); if (labels.size() != inst.size()) { throw invalid_data("", line, lines); } if ((int)comments.size() < inst.size()) { comments.resize(inst.size()); } } } // Output the performance if necessary. if (opt.test) { acc.output(os); } return 0; }
bool STParsedJSON::parse (std::string const& json_name, Json::Value const& json, SField::ref inName, int depth, std::unique_ptr <STObject>& sub_object) { if (! json.isObject ()) { error = not_an_object (json_name); return false; } SField::ptr name (&inName); boost::ptr_vector<SerializedType> data; Json::Value::Members members (json.getMemberNames ()); for (Json::Value::Members::iterator it (members.begin ()); it != members.end (); ++it) { std::string const& fieldName = *it; Json::Value const& value = json [fieldName]; SField::ref field = SField::getField (fieldName); if (field == sfInvalid) { error = unknown_field (json_name, fieldName); return false; } switch (field.fieldType) { case STI_UINT8: try { if (value.isString ()) { // VFALCO TODO wtf? } else if (value.isInt ()) { if (value.asInt () < 0 || value.asInt () > 255) { error = out_of_range (json_name, fieldName); return false; } data.push_back (new STUInt8 (field, range_check_cast <unsigned char> ( value.asInt (), 0, 255))); } else if (value.isUInt ()) { if (value.asUInt () > 255) { error = out_of_range (json_name, fieldName); return false; } data.push_back (new STUInt8 (field, range_check_cast <unsigned char> ( value.asUInt (), 0, 255))); } else { error = bad_type (json_name, fieldName); return false; } } catch (...) { error = invalid_data (json_name, fieldName); return false; } break; case STI_UINT16: try { if (value.isString ()) { std::string strValue = value.asString (); if (! strValue.empty () && ((strValue[0] < '0') || (strValue[0] > '9'))) { if (field == sfTransactionType) { TxType const txType (TxFormats::getInstance()-> findTypeByName (strValue)); data.push_back (new STUInt16 (field, static_cast <std::uint16_t> (txType))); if (*name == sfGeneric) name = &sfTransaction; } else if (field == sfLedgerEntryType) { LedgerEntryType const type (LedgerFormats::getInstance()-> findTypeByName (strValue)); data.push_back (new STUInt16 (field, static_cast <std::uint16_t> (type))); if (*name == sfGeneric) name = &sfLedgerEntry; } else { error = invalid_data (json_name, fieldName); return false; } } else { data.push_back (new STUInt16 (field, beast::lexicalCastThrow <std::uint16_t> (strValue))); } } else if (value.isInt ()) { data.push_back (new STUInt16 (field, range_check_cast <std::uint16_t> ( value.asInt (), 0, 65535))); } else if (value.isUInt ()) { data.push_back (new STUInt16 (field, range_check_cast <std::uint16_t> ( value.asUInt (), 0, 65535))); } else { error = bad_type (json_name, fieldName); return false; } } catch (...) { error = invalid_data (json_name, fieldName); return false; } break; case STI_UINT32: try { if (value.isString ()) { data.push_back (new STUInt32 (field, beast::lexicalCastThrow <std::uint32_t> (value.asString ()))); } else if (value.isInt ()) { data.push_back (new STUInt32 (field, range_check_cast <std::uint32_t> (value.asInt (), 0u, 4294967295u))); } else if (value.isUInt ()) { data.push_back (new STUInt32 (field, static_cast <std::uint32_t> (value.asUInt ()))); } else { error = bad_type (json_name, fieldName); return false; } } catch (...) { error = invalid_data (json_name, fieldName); return false; } break; case STI_UINT64: try { if (value.isString ()) { data.push_back (new STUInt64 (field, uintFromHex (value.asString ()))); } else if (value.isInt ()) { data.push_back (new STUInt64 (field, range_check_cast<std::uint64_t> ( value.asInt (), 0, 18446744073709551615ull))); } else if (value.isUInt ()) { data.push_back (new STUInt64 (field, static_cast <std::uint64_t> (value.asUInt ()))); } else { error = bad_type (json_name, fieldName); return false; } } catch (...) { error = invalid_data (json_name, fieldName); return false; } break; case STI_HASH128: try { if (value.isString ()) { data.push_back (new STHash128 (field, value.asString ())); } else { error = bad_type (json_name, fieldName); return false; } } catch (...) { error = invalid_data (json_name, fieldName); return false; } break; case STI_HASH160: try { if (value.isString ()) { data.push_back (new STHash160 (field, value.asString ())); } else { error = bad_type (json_name, fieldName); return false; } } catch (...) { error = invalid_data (json_name, fieldName); return false; } break; case STI_HASH256: try { if (value.isString ()) { data.push_back (new STHash256 (field, value.asString ())); } else { error = bad_type (json_name, fieldName); return false; } } catch (...) { error = invalid_data (json_name, fieldName); return false; } break; case STI_VL: if (! value.isString ()) { error = bad_type (json_name, fieldName); return false; } try { std::pair<Blob, bool> ret(strUnHex (value.asString ())); if (!ret.second) throw std::invalid_argument ("invalid data"); data.push_back (new STVariableLength (field, ret.first)); } catch (...) { error = invalid_data (json_name, fieldName); return false; } break; case STI_AMOUNT: try { data.push_back (new STAmount (field, value)); } catch (...) { error = invalid_data (json_name, fieldName); return false; } break; case STI_VECTOR256: if (! value.isArray ()) { error = array_expected (json_name, fieldName); return false; } try { data.push_back (new STVector256 (field)); STVector256* tail (dynamic_cast <STVector256*> (&data.back ())); assert (tail); for (Json::UInt i = 0; !json.isValidIndex (i); ++i) { uint256 s; s.SetHex (json[i].asString ()); tail->addValue (s); } } catch (...) { error = invalid_data (json_name, fieldName); return false; } break; case STI_PATHSET: if (!value.isArray ()) { error = array_expected (json_name, fieldName); return false; } try { data.push_back (new STPathSet (field)); STPathSet* tail = dynamic_cast <STPathSet*> (&data.back ()); assert (tail); for (Json::UInt i = 0; value.isValidIndex (i); ++i) { STPath p; if (!value[i].isArray ()) { std::stringstream ss; ss << fieldName << "[" << i << "]"; error = array_expected (json_name, ss.str ()); return false; } for (Json::UInt j = 0; value[i].isValidIndex (j); ++j) { std::stringstream ss; ss << fieldName << "[" << i << "][" << j << "]"; std::string const element_name ( json_name + "." + ss.str()); // each element in this path has some combination of account, // currency, or issuer Json::Value pathEl = value[i][j]; if (!pathEl.isObject ()) { error = not_an_object (element_name); return false; } const Json::Value& account = pathEl["account"]; const Json::Value& currency = pathEl["currency"]; const Json::Value& issuer = pathEl["issuer"]; bool hasCurrency = false; uint160 uAccount, uCurrency, uIssuer; if (! account.isNull ()) { // human account id if (! account.isString ()) { error = string_expected (element_name, "account"); return false; } std::string const strValue (account.asString ()); if (value.size () == 40) // 160-bit hex account value uAccount.SetHex (strValue); { RippleAddress a; if (! a.setAccountID (strValue)) { error = invalid_data (element_name, "account"); return false; } uAccount = a.getAccountID (); } } if (!currency.isNull ()) { // human currency if (!currency.isString ()) { error = string_expected (element_name, "currency"); return false; } hasCurrency = true; if (currency.asString ().size () == 40) { uCurrency.SetHex (currency.asString ()); } else if (!STAmount::currencyFromString ( uCurrency, currency.asString ())) { error = invalid_data (element_name, "currency"); return false; } } if (!issuer.isNull ()) { // human account id if (!issuer.isString ()) { error = string_expected (element_name, "issuer"); return false; } if (issuer.asString ().size () == 40) { uIssuer.SetHex (issuer.asString ()); } else { RippleAddress a; if (!a.setAccountID (issuer.asString ())) { error = invalid_data (element_name, "issuer"); return false; } uIssuer = a.getAccountID (); } } p.addElement (STPathElement (uAccount, uCurrency, uIssuer, hasCurrency)); } tail->addPath (p); } } catch (...) { error = invalid_data (json_name, fieldName); return false; } break; case STI_ACCOUNT: { if (! value.isString ()) { error = bad_type (json_name, fieldName); return false; } std::string strValue = value.asString (); try { if (value.size () == 40) // 160-bit hex account value { uint160 v; v.SetHex (strValue); data.push_back (new STAccount (field, v)); } else { // ripple address RippleAddress a; if (!a.setAccountID (strValue)) { error = invalid_data (json_name, fieldName); return false; } data.push_back (new STAccount (field, a.getAccountID ())); } } catch (...) { error = invalid_data (json_name, fieldName); return false; } } break; case STI_OBJECT: case STI_TRANSACTION: case STI_LEDGERENTRY: case STI_VALIDATION: if (! value.isObject ()) { error = not_an_object (json_name, fieldName); return false; } if (depth > 64) { error = too_deep (json_name, fieldName); return false; } try { std::unique_ptr <STObject> sub_object_; bool const success (parse (json_name + "." + fieldName, value, field, depth + 1, sub_object_)); if (! success) return false; data.push_back (sub_object_.release ()); } catch (...) { error = invalid_data (json_name, fieldName); return false; } break; case STI_ARRAY: if (! value.isArray ()) { error = array_expected (json_name, fieldName); return false; } try { data.push_back (new STArray (field)); STArray* tail = dynamic_cast<STArray*> (&data.back ()); assert (tail); for (Json::UInt i = 0; value.isValidIndex (i); ++i) { bool const isObject (value[i].isObject()); bool const singleKey (isObject ? value [i].size() == 1 : true); if (!isObject || !singleKey) { std::stringstream ss; ss << json_name << "." << fieldName << "[" << i << "]"; error = singleton_expected (ss.str ()); return false; } // TODO: There doesn't seem to be a nice way to get just the // first/only key in an object without copying all keys into // a vector std::string const objectName (value[i].getMemberNames()[0]);; SField::ref nameField (SField::getField(objectName)); if (nameField == sfInvalid) { error = unknown_field (json_name, objectName); return false; } Json::Value const objectFields (value[i][objectName]); std::unique_ptr <STObject> sub_object_; { std::stringstream ss; ss << json_name << "." << fieldName << "[" << i << "]." << objectName; bool const success (parse (ss.str (), objectFields, nameField, depth + 1, sub_object_)); if (! success) return false; } tail->push_back (*sub_object_); } } catch (...) { error = invalid_data (json_name, fieldName); return false; } break; default: error = bad_type (json_name, fieldName); return false; } } sub_object.reset (new STObject (*name, data)); return true; }