void MapTupleValues(Tuple t) { Schema tupleSchema = t.getSchema(); vector<string> columnNames = tupleSchema.getFieldNames(); vector<string>::iterator iter; for(iter = columnNames.begin(); iter!=columnNames.end(); iter++) { int dotPos = (*iter).find_first_of('.'); int len = (*iter).length(); if(dotPos == string::npos) throw string("Error in column Mapping"); string tableName = (*iter).substr(0, dotPos); string colName = (*iter).substr(dotPos+1, len - dotPos - 1); FIELD_TYPE ft = tupleSchema.getFieldType(*iter); Field value = t.getField(*iter); TableColumnPair tcpair = TableColumnPair(tableName, colName); Value val = Value(ft, value); ColumnValueMap[tcpair] = val; } }
bool CompareTuple::operator()(Tuple& first, Tuple& second) { if(first.isNull() && second.isNull()) return false; if((!first.isNull()) && second.isNull()) return true; if(first.isNull() && (!second.isNull())) return false; vector<string>::iterator beg = fieldNames.begin(); vector<string>::iterator end = fieldNames.end(); vector<string>::iterator i; Schema tupleSchemaOne = first.getSchema(); Schema tupleSchemaTwo = second.getSchema(); // cout<< "comparing "<<first<<" "<<second<<endl; if(tupleSchemaOne != tupleSchemaTwo) throw string("Tuple Schema Comparision Error!!!! - In PUtility::ComapreTuple::Opeartor()"); for(i = beg; i!=end; i++) { string colName = *i; FIELD_TYPE ftype = tupleSchemaOne.getFieldType(colName); if(ftype == INT) { int firstVal, secVal; firstVal = first.getField(colName).integer; secVal = second.getField(colName).integer; if(firstVal != secVal) return firstVal<secVal; } else //FIELD_TYPE == STR20 { string firstVal, secVal; firstVal = *(first.getField(colName).str); secVal = *(second.getField(colName).str); if(firstVal.compare(secVal) < 0) return true; else if(firstVal.compare(secVal) > 0) return false; //else == 0, continue comparing next tuple } } return false; }
int main(int argc, char** argv){ TBool debug = false; TStr TagsFnm = "/lfs/madmax4/0/yonathan/tags_200000"; if(debug){ TagsFnm = "/lfs/madmax4/0/yonathan/tags_small";} Schema TagS; TagS.Add(TPair<TStr,TAttrType>("UserId", atInt)); TTableContext Context; TTable::SetMP(false); float ft_max; float mu_max; timeval timer0; gettimeofday(&timer0, NULL); double t1 = timer0.tv_sec + (timer0.tv_usec/1000000.0); PTable Tags = TTable::LoadSS(TagS, TagsFnm + ".tsv", Context); gettimeofday(&timer0, NULL); double t2 = timer0.tv_sec + (timer0.tv_usec/1000000.0); printf("Time to load tags table: %f\n", t2 - t1); printf("Table Size:\n"); Tags->PrintSize(); Tags->PrintContextSize(); getmaxcpumem(&ft_max, &mu_max); printf("time: %0.3f seconds, memory: %0.3f MB\n", ft_max, mu_max); printf("\n"); timeval timer1; gettimeofday(&timer1, NULL); t1 = timer1.tv_sec + (timer1.tv_usec/1000000.0); PTable TagsJoinTag = Tags->SelfJoin("Tag"); gettimeofday(&timer1, NULL); t2 = timer1.tv_sec + (timer1.tv_usec/1000000.0); printf("Time to join on tags column: %f\n", t2 - t1); printf("Table Size:\n"); TagsJoinTag->PrintSize(); if(debug){ TagsJoinTag->SaveSS(TagsFnm + "_join_tag.tsv");} getmaxcpumem(&ft_max, &mu_max); printf("time: %0.3f seconds, memory: %0.3f MB\n", ft_max, mu_max); printf("\n"); return 0; }
// Tests parallel table to graph function. TEST(TTable, ToGraphMP) { TTableContext Context; Schema LJS; LJS.Add(TPair<TStr,TAttrType>("Src", atInt)); LJS.Add(TPair<TStr,TAttrType>("Dst", atInt)); TIntV RelevantCols; RelevantCols.Add(0); RelevantCols.Add(1); PTable T1 = TTable::LoadSS(LJS, "table/soc-LiveJournal1_small.txt", &Context, RelevantCols); EXPECT_EQ(499, T1->GetNumRows().Val); EXPECT_EQ(499, T1->GetNumValidRows().Val); TVec<TPair<TStr, TAttrType> > S = T1->GetSchema(); PNGraphMP Graph = TSnap::ToGraphMP<PNGraphMP>(T1, S[0].GetVal1(), S[1].GetVal1()); EXPECT_EQ(689,Graph->GetNodes()); EXPECT_EQ(499,Graph->GetEdges()); EXPECT_EQ(1,Graph->IsOk()); }
bool ShowPostingListSubApp::readDocIdLimit(const Schema &schema) { TuneFileSeqRead tuneFileRead; if (_dm.readDocIdLimit(_indexDir)) return true; uint32_t numIndexFields = schema.getNumIndexFields(); for (uint32_t fieldId = 0; fieldId < numIndexFields; ++fieldId) { const Schema::IndexField &field = schema.getIndexField(fieldId); if (field.getDataType() == DataType::STRING) { FieldReader fr; if (!fr.open(_indexDir + "/" + field.getName() + "/", tuneFileRead)) continue; _dm.setup(fr.getDocIdLimit()); return true; } } return false; }
std::unique_ptr<std::vector<char>> Compression::decompress( const std::vector<char>& data, const Schema& nativeSchema, const Schema* const wantedSchema, const std::size_t numPoints) { if (!wantedSchema || *wantedSchema == nativeSchema) { return decompress(data, nativeSchema, numPoints); } // Get decompressor in the native schema. DecompressionStream decompressionStream(data); pdal::LazPerfDecompressor<DecompressionStream> decompressor( decompressionStream, nativeSchema.pdalLayout().dimTypes()); // Allocate room for a single point in the native schema. std::vector<char> nativePoint(nativeSchema.pointSize()); BinaryPointTable table(nativeSchema, nativePoint.data()); pdal::PointRef pointRef(table, 0); // Get our result space, in the desired schema, ready. std::unique_ptr<std::vector<char>> decompressed( new std::vector<char>(numPoints * wantedSchema->pointSize(), 0)); char* pos(decompressed->data()); const char* end(pos + decompressed->size()); while (pos < end) { decompressor.decompress(nativePoint.data(), nativePoint.size()); for (const auto& d : wantedSchema->dims()) { pointRef.getField(pos, d.id(), d.type()); pos += d.size(); } } return decompressed; }
Schema InPlaceReprojection::alterSchema(Schema& schema) { const std::string x_name = getOptions().getValueOrDefault<std::string>("x_dim", "X"); const std::string y_name = getOptions().getValueOrDefault<std::string>("y_dim", "Y"); const std::string z_name = getOptions().getValueOrDefault<std::string>("z_dim", "Z"); log()->get(logDEBUG2) << "x_dim '" << x_name <<"' requested" << std::endl; log()->get(logDEBUG2) << "y_dim '" << y_name <<"' requested" << std::endl; log()->get(logDEBUG2) << "z_dim '" << z_name <<"' requested" << std::endl; Dimension const& dimX = schema.getDimension(x_name); Dimension const& dimY = schema.getDimension(y_name); Dimension const& dimZ = schema.getDimension(z_name); log()->get(logDEBUG3) << "Fetched x_name: " << dimX; log()->get(logDEBUG3) << "Fetched y_name: " << dimY; log()->get(logDEBUG3) << "Fetched z_name: " << dimZ; double offset_x = getOptions().getValueOrDefault<double>("offset_x", dimX.getNumericOffset()); double offset_y = getOptions().getValueOrDefault<double>("offset_y", dimY.getNumericOffset()); double offset_z = getOptions().getValueOrDefault<double>("offset_z", dimZ.getNumericOffset()); log()->floatPrecision(8); log()->get(logDEBUG2) << "original offset x,y: " << offset_x <<"," << offset_y << std::endl; reprojectOffsets(offset_x, offset_y, offset_z); log()->get(logDEBUG2) << "reprojected offset x,y: " << offset_x <<"," << offset_y << std::endl; double scale_x = getOptions().getValueOrDefault<double>("scale_x", dimX.getNumericScale()); double scale_y = getOptions().getValueOrDefault<double>("scale_y", dimY.getNumericScale()); double scale_z = getOptions().getValueOrDefault<double>("scale_z", dimZ.getNumericScale()); setDimension(x_name, m_old_x_id, m_new_x_id, schema, scale_x, offset_x); setDimension(y_name, m_old_y_id, m_new_y_id, schema, scale_y, offset_y); setDimension(z_name, m_old_z_id, m_new_z_id, schema, scale_z, offset_z); return schema; }
void MappingXMLParser::parse() { QDomNodeList modules = documentElement().elementsByTagName("module"); for (int mi = 0; mi != modules.size(); mi++) { QDomElement mE = modules.item(mi).toElement(); Q_ASSERT(mE.hasAttribute("name")); QString moduleName = nameAttribute(modules.item(mi)); QDomNodeList classes = mE.elementsByTagName("class"); for (int ci = 0; ci != classes.size(); ci++) { QDomElement cE = classes.item(ci).toElement(); Q_ASSERT(cE.hasAttribute("name")); Q_ASSERT(cE.hasAttribute("table_name")); Q_ASSERT(cE.hasAttribute("manager_class_name")); QString qualifiedTableName = cE.attribute("table_name"); QString schemaName = qualifiedTableName.split(".")[0]; QString tableName = qualifiedTableName.split(".")[1]; QString className = nameAttribute(classes.item(ci)); QString managerClassName = cE.attribute("manager_class_name"); DataManager* manager = createManager(getApp(), moduleName, className, schemaName, tableName, managerClassName); QDomNodeList pl = cE.elementsByTagName("property"); Schema* schema = getApp()->databaseModel()->schema(schemaName); Q_CHECK_PTR(schema); Table* table = schema->table(tableName); Q_CHECK_PTR(table); for (int i = 0; i != pl.size(); i++) { QDomElement pe = pl.item(i).toElement(); Q_ASSERT(pe.hasAttribute("property_name")); Q_ASSERT(pe.hasAttribute("column_name")); TableColumn* col = table->column(pe.attribute("column_name")); Q_CHECK_PTR(col); qDebug() << QString("'%1' --> '%2'").arg(pe.attribute("property_name")).arg(col->pathName()); (void) new Property(manager->mapping(), pe.attribute("property_name"), col); } } } }
//---------------------------------------------------------------------------// bool Schema::compatible(const Schema &s) const { index_t dt_id = m_dtype.id(); index_t s_dt_id = s.dtype().id(); if(dt_id != s_dt_id) return false; bool res = true; if(dt_id == DataType::OBJECT_ID) { // each of s's entries that match paths must have dtypes that match std::map<std::string, index_t>::const_iterator itr; for(itr = s.object_map().begin(); itr != s.object_map().end() && res; itr++) { // make sure we actually have the path if(has_path(itr->first)) { // use index to fetch the child from the other schema const Schema &s_chld = s.child(itr->second); // fetch our child by name const Schema &chld = fetch_child(itr->first); // do compat check res = chld.compatible(s_chld); } } } else if(dt_id == DataType::LIST_ID) { // each of s's entries dtypes must match index_t s_n_chd = s.number_of_children(); // can't be compatible in this case if(number_of_children() < s_n_chd) return false; const std::vector<Schema*> &s_lst = s.children(); const std::vector<Schema*> &lst = children(); for(index_t i = 0; i < s_n_chd && res; i++) { res = lst[i]->compatible(*s_lst[i]); } } else { res = m_dtype.compatible(s.dtype()); } return res; }
std::string SchemaHelper::getNewRecordJSON(Schema schema) { std::stringstream resultJSON; resultJSON << "{\"count\":1,"; resultJSON <<"\"columns\":["; for(auto itS = schema->begin(); itS != schema->end(); itS++) { Field f = *itS; if(itS != schema->begin()) resultJSON << ","; resultJSON << f->json; } resultJSON << "],"; resultJSON << "\"data\":["; std::string amigo_id = UUIDGenerator::get(); resultJSON << "{\"amigo_id\":\"" + amigo_id + "\"}],"; resultJSON << "\"is_new\": true}"; return resultJSON.str(); }
Schema SchemaHelper::parseSchema(const std::string &json) { Schema schema = std::make_shared<std::vector<Field> >(); Document document; if(!document.Parse<0>(json.c_str()).HasParseError()) { const Value& columns = document["schema"]; if(columns.IsArray()) { for (rapidjson::SizeType i = 0; i < columns.Size(); i++) { const Value& c = columns[i]; schema->push_back(SchemaHelper::parseField(c, i)); } } else return nullptr; } else return nullptr; return schema; }
void MetaDataConfig::parse(Schema ®) { if (node_->name_.compare(_T("schema"))) throw ParseError(String(_T("Unknown element '")) + node_->name_ + _T("' found during parse of root element, 'schema' expected")); ElementTree::Elements::const_iterator child = node_->children_.begin(), cend = node_->children_.end(); for (; child != cend; ++child) { if (!(*child)->name_.compare(_T("table"))) { Table::Ptr t = parse_table(*child); reg.add_table(t); } else if (!(*child)->name_.compare(_T("relation"))) { Relation::Ptr r = parse_relation(*child); if (shptr_get(r)) reg.add_relation(r); } else throw ParseError(String(_T("Unknown element '")) + (*child)->name_ + _T("' found during parse of element 'schema'")); } }
void ProjectionOperator::updateSchema(){ assert(projectionClauses.size() == expressions.size()); Schema *old = codegen::getSchema(); Schema *schema = new Schema(old->getTableName()); schema->setTuples(old->getTuples()); for(int i = 0; i < expressions.size(); i++){ std::size_t pos = expressions[i].find(" AS "); //TODO check the random column name assignment std::string colName = pos == std::string::npos ? "default_" + std::to_string(i) : expressions[i].substr(0, pos); updateExpression(projectionClauses[i], old->getColumnMap(), old->getTableName()); schema->addAttribute(colName, projectionClauses[i]->getDataType(), projectionClauses[i]); } codegen::setSchema(schema); }
int main(int argc, char* argv[]){ //test1(); TTableContext Context; // create scheme Schema PostS; PostS.Add(TPair<TStr,TAttrType>("Id", atInt)); PostS.Add(TPair<TStr,TAttrType>("OwnerUserId", atInt)); PostS.Add(TPair<TStr,TAttrType>("AcceptedAnswerId", atInt)); PostS.Add(TPair<TStr,TAttrType>("CreationDate", atStr)); PostS.Add(TPair<TStr,TAttrType>("Score", atInt)); TIntV RelevantCols; RelevantCols.Add(0); RelevantCols.Add(1); RelevantCols.Add(2); RelevantCols.Add(3); RelevantCols.Add(4); PTable P = TTable::LoadSS("Posts", PostS, "/dfs/ilfs2/0/ringo/StackOverflow_2/posts.tsv", Context, RelevantCols); printf("Load done\n"); TStrV cols; cols.Add("OwnerUserId"); struct timeval begin, end; gettimeofday(&begin, NULL); P->Aggregate(cols, aaSum, "Score", "Sum"); gettimeofday(&end, NULL); double diff = (end.tv_sec * 1000000 + end.tv_usec) - (begin.tv_sec * 1000000 + begin.tv_usec); printf("Elapsed time:%.3lfs\n", diff / 1000000); if (atoi(argv[1]) == 0) return 0; P->SaveSS("tests/p3.txt"); return 0; }
int main(){ TTableContext Context; // create scheme Schema AnimalS; AnimalS.Add(TPair<TStr,TAttrType>("Animal", atStr)); AnimalS.Add(TPair<TStr,TAttrType>("Size", atStr)); AnimalS.Add(TPair<TStr,TAttrType>("Location", atStr)); AnimalS.Add(TPair<TStr,TAttrType>("Number", atInt)); TIntV RelevantCols; RelevantCols.Add(0); RelevantCols.Add(1); RelevantCols.Add(2); RelevantCols.Add(3); PTable P = TTable::LoadSS("Animals", AnimalS, "tests/animals.txt", Context, RelevantCols); P->SaveSS("tests/p1.txt"); TStrV cols; cols.Add("Size"); cols.Add("Number"); TVec<PTable> R = P->SpliceByGroup(cols); for (TInt i = 0; i < R.Len(); i++) { TStr fn = i.GetStr(); R[i]->SaveSS("tests/sznumber" + fn + ".txt"); } P->Unique(cols, true); P->SaveSS("tests/p2.txt"); TStrV group1; group1.Add("Location"); P->Group(group1, "LocationGroup"); P->SaveSS("tests/p3.txt"); return 0; }
int MrsidReader::SchemaToPointInfo(const Schema &schema, LizardTech::PointInfo &pointInfo) const { schema::index_by_index const& dims = schema.getDimensions().get<schema::index>(); pointInfo.init(dims.size()); for (unsigned int idx=0; idx<dims.size(); idx++) { Dimension const& dim = dims[idx]; std::string name = dim.getName(); if (boost::iequals(dim.getName(),"EdgeOfFlightLine")) name = CHANNEL_NAME_EdgeFlightLine; if (boost::iequals(dim.getName(), "Classification")) name = CHANNEL_NAME_ClassId; if (boost::iequals(dim.getName(), "ScanAngleRank")) name = CHANNEL_NAME_ScanAngle; if (boost::iequals(dim.getName(), "ScanDirectionFlag")) name = CHANNEL_NAME_ScanDir; if (boost::iequals(dim.getName(), "Time")) name = CHANNEL_NAME_GPSTime; if (boost::iequals(dim.getName(), "PointSourceId")) name = CHANNEL_NAME_SourceId; if (boost::iequals(dim.getName(), "ReturnNumber")) name = CHANNEL_NAME_ReturnNum; if (boost::iequals(dim.getName(), "NumberOfReturns")) name = CHANNEL_NAME_NumReturns; if (dim.getInterpretation() == dimension::Float) { if (dim.getByteSize() == 8) pointInfo.getChannel(idx).init(name.c_str(), LizardTech::DATATYPE_FLOAT64, 64); if (dim.getByteSize() == 4) pointInfo.getChannel(idx).init(name.c_str(), LizardTech::DATATYPE_FLOAT32, 32); } if (dim.getInterpretation() == dimension::SignedInteger) { if (dim.getByteSize() == 8) pointInfo.getChannel(idx).init(name.c_str(), LizardTech::DATATYPE_SINT64, 64); if (dim.getByteSize() == 4) pointInfo.getChannel(idx).init(name.c_str(), LizardTech::DATATYPE_SINT32, 32); if (dim.getByteSize() == 2) pointInfo.getChannel(idx).init(name.c_str(), LizardTech::DATATYPE_SINT16, 16); if (dim.getByteSize() == 1) pointInfo.getChannel(idx).init(name.c_str(), LizardTech::DATATYPE_SINT8, 8); } if (dim.getInterpretation() == dimension::UnsignedInteger) { if (dim.getByteSize() == 8) pointInfo.getChannel(idx).init(name.c_str(), LizardTech::DATATYPE_UINT64, 64); if (dim.getByteSize() == 4) pointInfo.getChannel(idx).init(name.c_str(), LizardTech::DATATYPE_UINT32, 32); if (dim.getByteSize() == 2) pointInfo.getChannel(idx).init(name.c_str(), LizardTech::DATATYPE_UINT16, 16); if (dim.getByteSize() == 1) pointInfo.getChannel(idx).init(name.c_str(), LizardTech::DATATYPE_UINT8, 8); } } return 0; //bug, do error checking }
std::string SchemaHelper::getNewRecordWithGeometryJSON(Schema schema, const std::string &geomFieldName, const std::string &wkb) { std::stringstream resultJSON; resultJSON << "{\"count\":1,"; resultJSON <<"\"columns\":["; for(auto itS = schema->begin(); itS != schema->end(); itS++) { Field f = *itS; if(itS != schema->begin()) resultJSON << ","; resultJSON << f->json; } resultJSON << "],"; resultJSON << "\"data\":[{"; resultJSON << "\"" << geomFieldName << "\":\"" + wkb + "\","; std::string amigo_id = UUIDGenerator::get(); resultJSON << "\"amigo_id\":\"" + amigo_id + "\"}],"; resultJSON << "\"is_new\": false}"; return resultJSON.str(); }
// Tests parallel select function. TEST(TTable, ParallelSelect) { TTableContext Context; // TODO: Change this to point to a local copy of the LiveJournal table binary. // char srcfile[100] = "/dfs/ilfs2/0/ringo/benchmarks/soc-LiveJournal1.table"; Schema LJS; LJS.Add(TPair<TStr,TAttrType>("Src", atInt)); LJS.Add(TPair<TStr,TAttrType>("Dst", atInt)); TIntV RelevantCols; RelevantCols.Add(0); RelevantCols.Add(1); PTable T1 = TTable::LoadSS(LJS, "table/soc-LiveJournal1_small.txt", &Context, RelevantCols); EXPECT_EQ(499, T1->GetNumRows().Val); EXPECT_EQ(499, T1->GetNumValidRows().Val); PTable T2 = TTable::New(T1->GetSchema(), &Context); T1->SelectAtomicIntConst("Src", 88, LT, T2); EXPECT_EQ(196, T2->GetNumRows().Val); EXPECT_EQ(196, T2->GetNumValidRows().Val); }
void Serialize(string directoryName) { // Create a schema Schema *schema = Schema::create(LocationIndex); schema->setPrimaryKey("list_id"); // integer, by default not searchable schema->setSearchableAttribute("title", 2); // searchable text schema->setSearchableAttribute("address", 7); // searchable text // Create an analyzer Analyzer *analyzer = new Analyzer(NULL, NULL, NULL, NULL, ""); unsigned mergeEveryNSeconds = 3; unsigned mergeEveryMWrites = 5; unsigned updateHistogramEveryPMerges = 1; unsigned updateHistogramEveryQWrites = 5; CacheManager *cache = new CacheManager(134217728); IndexMetaData *indexMetaData = new IndexMetaData(cache, mergeEveryNSeconds, mergeEveryMWrites, updateHistogramEveryPMerges, updateHistogramEveryQWrites, directoryName); Indexer *indexer = Indexer::create(indexMetaData, analyzer, schema); readRecordsFromFile(indexer, schema, analyzer, directoryName+"/quadtree/1K"); boost::shared_ptr<QuadTreeRootNodeAndFreeLists> quadtree_ReadView; quadtree_ReadView = dynamic_cast<IndexReaderWriter *>(indexer)->getQuadTree_ReadView(); QuadTreeNode *qt = quadtree_ReadView->root; // serialize the index indexer->commit(); indexer->save(directoryName); delete indexer; delete indexMetaData; delete analyzer; delete schema; }
// Append subsets of columns in the given schemas. Schema *Schema::AppendSchemaPtrList( const std::vector<Schema *> &schema_list, const std::vector<std::vector<oid_t>> &subsets) { PL_ASSERT(schema_list.size() == subsets.size()); std::vector<Column> columns; for (unsigned int i = 0; i < schema_list.size(); i++) { Schema *schema = schema_list[i]; const std::vector<oid_t> &subset = subsets[i]; unsigned int column_count = schema->GetColumnCount(); for (oid_t column_itr = 0; column_itr < column_count; column_itr++) { // If column exists in set. if (std::find(subset.begin(), subset.end(), column_itr) != subset.end()) { columns.push_back(schema->columns[column_itr]); } } } Schema *ret_schema = new Schema(columns); return ret_schema; }
void ReadableIndex::encodeIndexKey(const Schema& schema, byte* key, size_t keyLen) const { // unordered index need not to encode index key assert(m_isOrdered); // m_isIndexKeyByteLex is just a common encoding // // some index may use a custom encoding method, in this case, // it just ignore m_isIndexKeyByteLex // if (m_isIndexKeyByteLex) { assert(schema.m_canEncodeToLexByteComparable); schema.byteLexConvert(key, keyLen); } }
// Tests parallel join function. TEST(TTable, ParallelJoin) { TTableContext Context; Schema LJS; LJS.Add(TPair<TStr,TAttrType>("Src", atInt)); LJS.Add(TPair<TStr,TAttrType>("Dst", atInt)); TIntV RelevantCols; RelevantCols.Add(0); RelevantCols.Add(1); PTable T1 = TTable::LoadSS(LJS, "table/soc-LiveJournal1_small.txt", &Context, RelevantCols); EXPECT_EQ(499, T1->GetNumRows().Val); EXPECT_EQ(499, T1->GetNumValidRows().Val); PTable T2 = TTable::LoadSS(LJS, "table/soc-LiveJournal1_small.txt", &Context, RelevantCols); EXPECT_EQ(499, T2->GetNumRows().Val); EXPECT_EQ(499, T2->GetNumValidRows().Val); PTable P = T1->Join("Src", T2, "Dst"); EXPECT_EQ(24, P->GetNumRows().Val); EXPECT_EQ(24, P->GetNumValidRows().Val); }
std::unique_ptr<std::vector<char>> Compression::compress( const char* data, const std::size_t size, const Schema& schema) { CompressionStream compressionStream(size); pdal::LazPerfCompressor<CompressionStream> compressor( compressionStream, schema.pdalLayout().dimTypes()); compressor.compress(data, size); compressor.done(); return compressionStream.data(); }
void test1() { TTableContext Context; // create scheme Schema AnimalS; AnimalS.Add(TPair<TStr,TAttrType>("Animal", atStr)); AnimalS.Add(TPair<TStr,TAttrType>("Size", atStr)); AnimalS.Add(TPair<TStr,TAttrType>("Location", atStr)); AnimalS.Add(TPair<TStr,TAttrType>("Number", atInt)); TIntV RelevantCols; RelevantCols.Add(0); RelevantCols.Add(1); RelevantCols.Add(2); RelevantCols.Add(3); PTable P = TTable::LoadSS("Animals", AnimalS, "tests/s.txt", Context, RelevantCols); printf("Load done\n"); TStrV cols; cols.Add("Size"); cols.Add("Number"); struct timeval begin, end; gettimeofday(&begin, NULL); //P->Unique(cols); P->Group(cols, "SizeNumberGroup"); gettimeofday(&end, NULL); double diff = (end.tv_sec * 1000000 + end.tv_usec) - (begin.tv_sec * 1000000 + begin.tv_usec); printf("Elapsed time:%.3lfs\n", diff / 1000000); P->SaveSS("tests/p3.txt"); }
//---------------------------------------------------------------------------// void Schema::set(const Schema &schema) { bool init_children = false; index_t dt_id = schema.m_dtype.id(); if (dt_id == DataType::OBJECT_ID) { init_object(); init_children = true; object_map() = schema.object_map(); object_order() = schema.object_order(); } else if (dt_id == DataType::LIST_ID) { init_list(); init_children = true; } else { m_dtype = schema.m_dtype; } if (init_children) { std::vector<Schema*> &my_children = children(); const std::vector<Schema*> &their_children = schema.children(); for (index_t i = 0; i < (index_t)their_children.size(); i++) { Schema *child_schema = new Schema(*their_children[i]); child_schema->m_parent = this; my_children.push_back(child_schema); } } }
bool SchemaUtil::IndexIterator::hasMatchingOldFields(const Schema &oldSchema, bool phrases) const { assert(isValid()); const Schema::IndexField &newField = getSchema().getIndexField(getIndex()); const vespalib::string &fieldName = newField.getName(); uint32_t oldFieldId = oldSchema.getIndexFieldId(fieldName); if (oldFieldId == Schema::UNKNOWN_FIELD_ID) return false; if (phrases) { IndexIterator oldIterator(oldSchema, oldFieldId); IndexSettings settings = oldIterator.getIndexSettings(); if (!settings.hasPhrases()) return false; } const Schema::IndexField &oldField = oldSchema.getIndexField(oldFieldId); if (oldField.getDataType() != newField.getDataType() || oldField.getCollectionType() != newField.getCollectionType()) return false; return true; }
// Function to read in a table of edges PTable AddEdgeTable(TTableContext& Context) { char FileName[200]; int ColCnt; int Reverse; printf("Adding Edge Table\n"); printf("Enter filename, number of columns (>= 2), and whether reverse? (reverse = 1, not reverse = 0\n"); scanf("%s %d %d", FileName, &ColCnt, &Reverse); Schema EdgeScm; if (Reverse == 1) { EdgeScm.Add(TPair<TStr, TAttrType>("DstID", atStr)); EdgeScm.Add(TPair<TStr, TAttrType>("SrcID", atStr)); } else { EdgeScm.Add(TPair<TStr, TAttrType>("SrcID", atStr)); EdgeScm.Add(TPair<TStr, TAttrType>("DstID", atStr)); } for (TInt i = 1; i < ColCnt-1; i++) { TStr ColName = "Attribute" + i.GetStr(); EdgeScm.Add(TPair<TStr, TAttrType>(ColName, atStr)); } TStr FName(FileName); PTable T = TTable::LoadSS(EdgeScm, FName, Context); return T; }
void Filter::init(libconfig::Config& root, libconfig::Setting& cfg) { MapWrapper::init(root, cfg); //< calls Filter::mapinit below fieldno = cfg["field"]; // Read column spec from input and create comparator. // ColumnSpec cs = schema.get(fieldno); string tmpstr = cfg["op"]; opstr = tmpstr; Comparator::Comparison compop = Comparator::parseString(opstr); comparator = Schema::createComparator(schema, fieldno, cs, compop); // Create dummy schema and parse input to create comparator. // const char* inputval = cfg["value"]; Schema dummyschema; dummyschema.add(cs); dbgassert(sizeof(value) == FILTERMAXWIDTH); dbgassert(dummyschema.getTupleSize() <= sizeof(value)); dbgassert(dummyschema.columns() == 1); dummyschema.parseTuple(value, &inputval); }
void addPropertiesConstraint(Schema &schema) { PropertiesConstraint::PropertySchemaMap propertySchemaMap; PropertiesConstraint::PropertySchemaMap patternPropertiesSchemaMap; { // Create a child schema for the 'category' property that requires one // of several possible values. Schema &propertySchema = propertySchemaMap["category"]; EnumConstraint::Values enumConstraintValues; enumConstraintValues.push_back(new RapidJsonFrozenValue("album")); enumConstraintValues.push_back(new RapidJsonFrozenValue("book")); enumConstraintValues.push_back(new RapidJsonFrozenValue("other")); enumConstraintValues.push_back(new RapidJsonFrozenValue("video")); propertySchema.addConstraint(new EnumConstraint(enumConstraintValues)); } { // Create a child schema for the 'description' property that requires // a string, but does not enforce any length constraints. Schema &propertySchema = propertySchemaMap["description"]; propertySchema.addConstraint(new TypeConstraint(TypeConstraint::kString)); } { // Create a child schema for the 'price' property, that requires a // number with a value greater than zero. Schema &propertySchema = propertySchemaMap["price"]; propertySchema.addConstraint(new MinimumConstraint(0.0, true)); propertySchema.addConstraint(new TypeConstraint(TypeConstraint::kNumber)); } { // Create a child schema for the 'title' property that requires a string // that is between 1 and 200 characters in length. Schema &propertySchema = propertySchemaMap["title"]; propertySchema.addConstraint(new MaxLengthConstraint(200)); propertySchema.addConstraint(new MinLengthConstraint(1)); propertySchema.addConstraint(new TypeConstraint(TypeConstraint::kString)); } // Add a PropertiesConstraint to the schema, with the properties defined // above, no pattern properties, and with additional property schemas // prohibited. schema.addConstraint(new PropertiesConstraint( propertySchemaMap, patternPropertiesSchemaMap)); }
int main(){ TTableContext Context; // Case 1: Euclidean Distance Schema BuildingS; BuildingS.Add(TPair<TStr,TAttrType>("Building", atStr)); BuildingS.Add(TPair<TStr,TAttrType>("X", atInt)); BuildingS.Add(TPair<TStr,TAttrType>("Y", atInt)); // create table PTable TBuildings = TTable::LoadSS("Buildings", BuildingS, "tests/buildings.txt", Context, '\t', false); TStrV Cols; Cols.Add("X"); Cols.Add("Y"); // Find all buildings within 5 Euc Distance of each other. PTable BuildingJointTable = TBuildings->SelfSimJoin(Cols, "Euclidean_Distance", L2Norm, 5.0); BuildingJointTable->SaveSS("tests/buildings.out.txt"); // Case2 : Haversine distance Schema PlaceS; PlaceS.Add(TPair<TStr,TAttrType>("Name", atStr)); PlaceS.Add(TPair<TStr,TAttrType>("Location", atStr)); PlaceS.Add(TPair<TStr,TAttrType>("Latitude", atFlt)); PlaceS.Add(TPair<TStr,TAttrType>("Longitude", atFlt)); // create table PTable TPlaces = TTable::LoadSS("Places", PlaceS, "tests/places.txt", Context, '\t', false); Cols.Clr(); Cols.Add("Latitude"); Cols.Add("Longitude"); PTable PlacesJointTable = TPlaces->SelfSimJoin(Cols, "Distance",Haversine, 1000.0); TStrV ProjectionV; ProjectionV.Add("Places_1.Name"); ProjectionV.Add("Places_1.Location"); ProjectionV.Add("Places_2.Name"); ProjectionV.Add("Places_2.Location"); ProjectionV.Add("Distance"); PlacesJointTable->ProjectInPlace(ProjectionV); PlacesJointTable->SelectAtomic("Places_1.Name", "Places_2.Name", NEQ); PlacesJointTable->SaveSS("tests/places.out.txt"); printf("Saved buildings.out.txt and places.out.txt\n"); return 0; }