Exemple #1
0
void MapTupleValues(Tuple t)
{
	Schema tupleSchema = t.getSchema();
	vector<string> columnNames = tupleSchema.getFieldNames();
	vector<string>::iterator iter;
	for(iter = columnNames.begin(); iter!=columnNames.end(); iter++)
	{
		int dotPos = (*iter).find_first_of('.');
		int len = (*iter).length();
		if(dotPos == string::npos) throw string("Error in column Mapping");
		string tableName = (*iter).substr(0, dotPos);
		string colName = (*iter).substr(dotPos+1, len - dotPos - 1);
		FIELD_TYPE ft = tupleSchema.getFieldType(*iter);
		Field value = t.getField(*iter);
		TableColumnPair tcpair = TableColumnPair(tableName, colName);
		Value val = Value(ft, value);
		ColumnValueMap[tcpair] = val;
	}
}
Exemple #2
0
bool CompareTuple::operator()(Tuple& first, Tuple& second)
{
	if(first.isNull() && second.isNull())
		return false;
	if((!first.isNull()) && second.isNull())
		return true;
	if(first.isNull() && (!second.isNull()))
		return false;

	vector<string>::iterator beg = fieldNames.begin();
	vector<string>::iterator end = fieldNames.end();
	vector<string>::iterator i;
	Schema tupleSchemaOne = first.getSchema();
	Schema tupleSchemaTwo = second.getSchema();

//	cout<< "comparing "<<first<<"  "<<second<<endl;
	if(tupleSchemaOne != tupleSchemaTwo)
		throw string("Tuple Schema Comparision Error!!!! - In PUtility::ComapreTuple::Opeartor()");
		
	for(i = beg; i!=end; i++)
	{
		string colName = *i;
		FIELD_TYPE ftype = tupleSchemaOne.getFieldType(colName);
		if(ftype == INT)
		{
			int firstVal, secVal;
			firstVal = first.getField(colName).integer;
			secVal = second.getField(colName).integer;
			if(firstVal != secVal)
				return firstVal<secVal;
		}
		else //FIELD_TYPE == STR20
		{
			string firstVal, secVal;
			firstVal = *(first.getField(colName).str); 
			secVal =  *(second.getField(colName).str);
			if(firstVal.compare(secVal) < 0) return true;
			else if(firstVal.compare(secVal) > 0) return false;
			//else == 0, continue comparing next tuple
		}
	}
	return false;
}
int main(int argc, char** argv){
  TBool debug = false;
  TStr TagsFnm = "/lfs/madmax4/0/yonathan/tags_200000";
  if(debug){ TagsFnm = "/lfs/madmax4/0/yonathan/tags_small";}
  Schema TagS; 
  TagS.Add(TPair<TStr,TAttrType>("UserId", atInt));
  TTableContext Context;
  TTable::SetMP(false);
	
  float ft_max;
  float mu_max;
	
  timeval timer0;
  gettimeofday(&timer0, NULL);
  double t1 = timer0.tv_sec + (timer0.tv_usec/1000000.0);
  PTable Tags = TTable::LoadSS(TagS, TagsFnm + ".tsv", Context);
  gettimeofday(&timer0, NULL);
  double t2 = timer0.tv_sec + (timer0.tv_usec/1000000.0);
  printf("Time to load tags table: %f\n", t2 - t1);
  printf("Table Size:\n");
  Tags->PrintSize();
  Tags->PrintContextSize();
  getmaxcpumem(&ft_max, &mu_max);
  printf("time: %0.3f seconds, memory: %0.3f MB\n", ft_max, mu_max);
  printf("\n");
	
  timeval timer1;
  gettimeofday(&timer1, NULL);
  t1 = timer1.tv_sec + (timer1.tv_usec/1000000.0);
  PTable TagsJoinTag = Tags->SelfJoin("Tag");
  gettimeofday(&timer1, NULL);
  t2 = timer1.tv_sec + (timer1.tv_usec/1000000.0);
  printf("Time to join on tags column: %f\n", t2 - t1);
  printf("Table Size:\n");
  TagsJoinTag->PrintSize();
  if(debug){ TagsJoinTag->SaveSS(TagsFnm + "_join_tag.tsv");}
  getmaxcpumem(&ft_max, &mu_max);
  printf("time: %0.3f seconds, memory: %0.3f MB\n", ft_max, mu_max);
  printf("\n");
	
  return 0;
}
Exemple #4
0
// Tests parallel table to graph function.
TEST(TTable, ToGraphMP) {
  TTableContext Context;

  Schema LJS;
  LJS.Add(TPair<TStr,TAttrType>("Src", atInt));
  LJS.Add(TPair<TStr,TAttrType>("Dst", atInt));
  TIntV RelevantCols; RelevantCols.Add(0); RelevantCols.Add(1);

  PTable T1 = TTable::LoadSS(LJS, "table/soc-LiveJournal1_small.txt", &Context, RelevantCols);

  EXPECT_EQ(499, T1->GetNumRows().Val);
  EXPECT_EQ(499, T1->GetNumValidRows().Val); 

  TVec<TPair<TStr, TAttrType> > S = T1->GetSchema();
  PNGraphMP Graph = TSnap::ToGraphMP<PNGraphMP>(T1, S[0].GetVal1(), S[1].GetVal1());

  EXPECT_EQ(689,Graph->GetNodes());
  EXPECT_EQ(499,Graph->GetEdges());
  EXPECT_EQ(1,Graph->IsOk());
}
bool
ShowPostingListSubApp::readDocIdLimit(const Schema &schema)
{
    TuneFileSeqRead tuneFileRead;
    if (_dm.readDocIdLimit(_indexDir))
        return true;
    uint32_t numIndexFields = schema.getNumIndexFields();
    for (uint32_t fieldId = 0; fieldId < numIndexFields; ++fieldId) {
        const Schema::IndexField &field = schema.getIndexField(fieldId);
        if (field.getDataType() == DataType::STRING) {
            FieldReader fr;
            if (!fr.open(_indexDir + "/" + field.getName() + "/",
                         tuneFileRead))
                continue;
            _dm.setup(fr.getDocIdLimit());
            return true;
        }
    }
    return false;
}
Exemple #6
0
std::unique_ptr<std::vector<char>> Compression::decompress(
        const std::vector<char>& data,
        const Schema& nativeSchema,
        const Schema* const wantedSchema,
        const std::size_t numPoints)
{
    if (!wantedSchema || *wantedSchema == nativeSchema)
    {
        return decompress(data, nativeSchema, numPoints);
    }

    // Get decompressor in the native schema.
    DecompressionStream decompressionStream(data);
    pdal::LazPerfDecompressor<DecompressionStream> decompressor(
            decompressionStream,
            nativeSchema.pdalLayout().dimTypes());

    // Allocate room for a single point in the native schema.
    std::vector<char> nativePoint(nativeSchema.pointSize());
    BinaryPointTable table(nativeSchema, nativePoint.data());
    pdal::PointRef pointRef(table, 0);

    // Get our result space, in the desired schema, ready.
    std::unique_ptr<std::vector<char>> decompressed(
            new std::vector<char>(numPoints * wantedSchema->pointSize(), 0));
    char* pos(decompressed->data());
    const char* end(pos + decompressed->size());

    while (pos < end)
    {
        decompressor.decompress(nativePoint.data(), nativePoint.size());

        for (const auto& d : wantedSchema->dims())
        {
            pointRef.getField(pos, d.id(), d.type());
            pos += d.size();
        }
    }

    return decompressed;
}
Exemple #7
0
Schema InPlaceReprojection::alterSchema(Schema& schema)
{


    const std::string x_name = getOptions().getValueOrDefault<std::string>("x_dim", "X");
    const std::string y_name = getOptions().getValueOrDefault<std::string>("y_dim", "Y");
    const std::string z_name = getOptions().getValueOrDefault<std::string>("z_dim", "Z");

    log()->get(logDEBUG2) << "x_dim '" << x_name <<"' requested" << std::endl;
    log()->get(logDEBUG2) << "y_dim '" << y_name <<"' requested" << std::endl;
    log()->get(logDEBUG2) << "z_dim '" << z_name <<"' requested" << std::endl;

    Dimension const& dimX = schema.getDimension(x_name);
    Dimension const& dimY = schema.getDimension(y_name);
    Dimension const& dimZ = schema.getDimension(z_name);
    
    log()->get(logDEBUG3) << "Fetched x_name: " << dimX;
    log()->get(logDEBUG3) << "Fetched y_name: " << dimY;
    log()->get(logDEBUG3) << "Fetched z_name: " << dimZ;
    
    double offset_x = getOptions().getValueOrDefault<double>("offset_x", dimX.getNumericOffset());
    double offset_y = getOptions().getValueOrDefault<double>("offset_y", dimY.getNumericOffset());
    double offset_z = getOptions().getValueOrDefault<double>("offset_z", dimZ.getNumericOffset());

    log()->floatPrecision(8);

    log()->get(logDEBUG2) << "original offset x,y: " << offset_x <<"," << offset_y << std::endl;
    reprojectOffsets(offset_x, offset_y, offset_z);
    log()->get(logDEBUG2) << "reprojected offset x,y: " << offset_x <<"," << offset_y << std::endl;

    double scale_x = getOptions().getValueOrDefault<double>("scale_x", dimX.getNumericScale());
    double scale_y = getOptions().getValueOrDefault<double>("scale_y", dimY.getNumericScale());
    double scale_z = getOptions().getValueOrDefault<double>("scale_z", dimZ.getNumericScale());

    setDimension(x_name, m_old_x_id, m_new_x_id, schema, scale_x, offset_x);
    setDimension(y_name, m_old_y_id, m_new_y_id, schema, scale_y, offset_y);
    setDimension(z_name, m_old_z_id, m_new_z_id, schema, scale_z, offset_z);
    
    return schema;
    
}
void MappingXMLParser::parse() {
    QDomNodeList modules = documentElement().elementsByTagName("module");
    for (int mi = 0; mi != modules.size(); mi++) {
        QDomElement mE = modules.item(mi).toElement();
        Q_ASSERT(mE.hasAttribute("name"));
        QString moduleName = nameAttribute(modules.item(mi));
        QDomNodeList classes = mE.elementsByTagName("class");
        for (int ci = 0; ci != classes.size(); ci++) {
            QDomElement cE = classes.item(ci).toElement();
            Q_ASSERT(cE.hasAttribute("name"));
            Q_ASSERT(cE.hasAttribute("table_name"));
            Q_ASSERT(cE.hasAttribute("manager_class_name"));

            QString qualifiedTableName = cE.attribute("table_name");
            QString schemaName = qualifiedTableName.split(".")[0];
            QString tableName = qualifiedTableName.split(".")[1];
            QString className = nameAttribute(classes.item(ci));
            QString managerClassName = cE.attribute("manager_class_name");

            DataManager* manager = createManager(getApp(), moduleName,
                    className,
                    schemaName,
                    tableName,
                    managerClassName);
            QDomNodeList pl = cE.elementsByTagName("property");
            Schema* schema = getApp()->databaseModel()->schema(schemaName);
            Q_CHECK_PTR(schema);
            Table* table = schema->table(tableName);
            Q_CHECK_PTR(table);
            for (int i = 0; i != pl.size(); i++) {
                QDomElement pe = pl.item(i).toElement();
                Q_ASSERT(pe.hasAttribute("property_name"));
                Q_ASSERT(pe.hasAttribute("column_name"));
                TableColumn* col = table->column(pe.attribute("column_name"));
                Q_CHECK_PTR(col);
                qDebug() << QString("'%1' --> '%2'").arg(pe.attribute("property_name")).arg(col->pathName());
                (void) new Property(manager->mapping(), pe.attribute("property_name"), col);
            }
        }
    }
}
Exemple #9
0
//---------------------------------------------------------------------------//
bool
Schema::compatible(const Schema &s) const
{
    index_t dt_id   = m_dtype.id();
    index_t s_dt_id = s.dtype().id();

    if(dt_id != s_dt_id)
        return false;
    
    bool res = true;
    
    if(dt_id == DataType::OBJECT_ID)
    {
        // each of s's entries that match paths must have dtypes that match
        
        std::map<std::string, index_t>::const_iterator itr;
        
        for(itr  = s.object_map().begin(); 
            itr != s.object_map().end() && res;
            itr++)
        {
            // make sure we actually have the path
            if(has_path(itr->first))
            {
                // use index to fetch the child from the other schema
                const Schema &s_chld = s.child(itr->second);
                // fetch our child by name
                const Schema &chld = fetch_child(itr->first);
                // do compat check
                res = chld.compatible(s_chld);
            }
        }
    }
    else if(dt_id == DataType::LIST_ID) 
    {
        // each of s's entries dtypes must match
        index_t s_n_chd = s.number_of_children();
        
        // can't be compatible in this case
        if(number_of_children() < s_n_chd)
            return false;

        const std::vector<Schema*> &s_lst = s.children();
        const std::vector<Schema*> &lst   = children();

        for(index_t i = 0; i < s_n_chd && res; i++)
        {
            res = lst[i]->compatible(*s_lst[i]);
        }
    }
    else
    {
        res = m_dtype.compatible(s.dtype());
    }
    return res;
}
Exemple #10
0
std::string SchemaHelper::getNewRecordJSON(Schema schema)
{
	std::stringstream resultJSON;
	resultJSON << "{\"count\":1,";
	resultJSON <<"\"columns\":[";
	for(auto itS = schema->begin(); itS != schema->end(); itS++)
	{
        Field f = *itS;

        if(itS != schema->begin())
            resultJSON  << ",";

        resultJSON << f->json;
	}
	resultJSON << "],";
	resultJSON << "\"data\":[";
	std::string amigo_id = UUIDGenerator::get();
	resultJSON << "{\"amigo_id\":\"" + amigo_id + "\"}],";
	resultJSON << "\"is_new\": true}";
	return resultJSON.str();
}
Exemple #11
0
Schema SchemaHelper::parseSchema(const std::string &json)
{
    Schema schema = std::make_shared<std::vector<Field> >();
    Document document;
    if(!document.Parse<0>(json.c_str()).HasParseError())
    {
        const Value& columns = document["schema"];
        if(columns.IsArray())
        {
            for (rapidjson::SizeType i = 0; i < columns.Size(); i++)
            {
                const Value& c = columns[i];
                schema->push_back(SchemaHelper::parseField(c, i));
            }
        } else
            return nullptr;
    } else
        return nullptr;

    return schema;   
}
Exemple #12
0
void MetaDataConfig::parse(Schema &reg)
{
    if (node_->name_.compare(_T("schema")))
        throw ParseError(String(_T("Unknown element '")) + node_->name_ + 
                _T("' found during parse of root element, 'schema' expected"));

    ElementTree::Elements::const_iterator child = node_->children_.begin(),
        cend = node_->children_.end();
    for (; child != cend; ++child) {
        if (!(*child)->name_.compare(_T("table"))) {
            Table::Ptr t = parse_table(*child);
            reg.add_table(t);
        } else if (!(*child)->name_.compare(_T("relation"))) {
            Relation::Ptr r = parse_relation(*child);
            if (shptr_get(r))
                reg.add_relation(r);
        } else
            throw ParseError(String(_T("Unknown element '")) + (*child)->name_ +
                    _T("' found during parse of element 'schema'"));
    }
}
void ProjectionOperator::updateSchema(){
	assert(projectionClauses.size() == expressions.size());
    Schema *old = codegen::getSchema();
    Schema *schema = new Schema(old->getTableName());
    schema->setTuples(old->getTuples());
	for(int i = 0; i < expressions.size(); i++){
		std::size_t pos = expressions[i].find(" AS ");
        //TODO check the random column name assignment
		std::string colName = pos == std::string::npos ? "default_" + std::to_string(i) : expressions[i].substr(0, pos);
		updateExpression(projectionClauses[i], old->getColumnMap(), old->getTableName());
		schema->addAttribute(colName, projectionClauses[i]->getDataType(), projectionClauses[i]);
	}
    codegen::setSchema(schema);
}
Exemple #14
0
int main(int argc, char* argv[]){
  //test1();
  TTableContext Context;

  // create scheme
  Schema PostS;
  PostS.Add(TPair<TStr,TAttrType>("Id", atInt));
  PostS.Add(TPair<TStr,TAttrType>("OwnerUserId", atInt));
  PostS.Add(TPair<TStr,TAttrType>("AcceptedAnswerId", atInt));
  PostS.Add(TPair<TStr,TAttrType>("CreationDate", atStr));
  PostS.Add(TPair<TStr,TAttrType>("Score", atInt));
  TIntV RelevantCols;
  RelevantCols.Add(0); RelevantCols.Add(1); RelevantCols.Add(2); RelevantCols.Add(3); RelevantCols.Add(4);

  PTable P = TTable::LoadSS("Posts", PostS, "/dfs/ilfs2/0/ringo/StackOverflow_2/posts.tsv", Context, RelevantCols);
  printf("Load done\n");

  TStrV cols;
  cols.Add("OwnerUserId");

  struct timeval begin, end;
  gettimeofday(&begin, NULL);
  P->Aggregate(cols, aaSum, "Score", "Sum");
  gettimeofday(&end, NULL);

  double diff = (end.tv_sec * 1000000 + end.tv_usec) - (begin.tv_sec * 1000000 + begin.tv_usec);
  printf("Elapsed time:%.3lfs\n", diff / 1000000);
  
  if (atoi(argv[1]) == 0) return 0;

  P->SaveSS("tests/p3.txt");

  return 0;
}
Exemple #15
0
int main(){
  TTableContext Context;
  // create scheme
  Schema AnimalS;
  AnimalS.Add(TPair<TStr,TAttrType>("Animal", atStr));
  AnimalS.Add(TPair<TStr,TAttrType>("Size", atStr));
  AnimalS.Add(TPair<TStr,TAttrType>("Location", atStr));
  AnimalS.Add(TPair<TStr,TAttrType>("Number", atInt));
  TIntV RelevantCols;
  RelevantCols.Add(0);
  RelevantCols.Add(1);
  RelevantCols.Add(2);
  RelevantCols.Add(3);

  PTable P = TTable::LoadSS("Animals", AnimalS, "tests/animals.txt", Context, RelevantCols);

  P->SaveSS("tests/p1.txt");

  TStrV cols;
  cols.Add("Size");
  cols.Add("Number");

  TVec<PTable> R = P->SpliceByGroup(cols);
  for (TInt i = 0; i < R.Len(); i++) {
    TStr fn = i.GetStr();
    R[i]->SaveSS("tests/sznumber" + fn + ".txt");
  }

  P->Unique(cols, true);

  P->SaveSS("tests/p2.txt");

  TStrV group1;
  group1.Add("Location");
  P->Group(group1, "LocationGroup");

  P->SaveSS("tests/p3.txt");

  return 0;
}
Exemple #16
0
int MrsidReader::SchemaToPointInfo(const Schema &schema, LizardTech::PointInfo &pointInfo) const
{
    schema::index_by_index const& dims = schema.getDimensions().get<schema::index>();

    pointInfo.init(dims.size());
    for (unsigned int idx=0; idx<dims.size(); idx++)
    {
        Dimension const& dim = dims[idx];

        std::string name = dim.getName();
        if (boost::iequals(dim.getName(),"EdgeOfFlightLine")) name = CHANNEL_NAME_EdgeFlightLine;
        if (boost::iequals(dim.getName(), "Classification")) name = CHANNEL_NAME_ClassId;
        if (boost::iequals(dim.getName(), "ScanAngleRank")) name = CHANNEL_NAME_ScanAngle;
        if (boost::iequals(dim.getName(), "ScanDirectionFlag")) name = CHANNEL_NAME_ScanDir;
        if (boost::iequals(dim.getName(), "Time")) name = CHANNEL_NAME_GPSTime;
        if (boost::iequals(dim.getName(), "PointSourceId")) name = CHANNEL_NAME_SourceId;
        if (boost::iequals(dim.getName(), "ReturnNumber")) name = CHANNEL_NAME_ReturnNum;
        if (boost::iequals(dim.getName(), "NumberOfReturns")) name = CHANNEL_NAME_NumReturns;

        if (dim.getInterpretation() == dimension::Float)
        {
            if (dim.getByteSize() == 8)
                pointInfo.getChannel(idx).init(name.c_str(), LizardTech::DATATYPE_FLOAT64, 64);
            if (dim.getByteSize() == 4)
                pointInfo.getChannel(idx).init(name.c_str(), LizardTech::DATATYPE_FLOAT32, 32);
        }
        if (dim.getInterpretation() == dimension::SignedInteger)
        {
            if (dim.getByteSize() == 8)
                pointInfo.getChannel(idx).init(name.c_str(), LizardTech::DATATYPE_SINT64, 64);
            if (dim.getByteSize() == 4)
                pointInfo.getChannel(idx).init(name.c_str(), LizardTech::DATATYPE_SINT32, 32);
            if (dim.getByteSize() == 2)
                pointInfo.getChannel(idx).init(name.c_str(), LizardTech::DATATYPE_SINT16, 16);
            if (dim.getByteSize() == 1)
                pointInfo.getChannel(idx).init(name.c_str(), LizardTech::DATATYPE_SINT8, 8);
        }
        if (dim.getInterpretation() == dimension::UnsignedInteger)
        {
            if (dim.getByteSize() == 8)
                pointInfo.getChannel(idx).init(name.c_str(), LizardTech::DATATYPE_UINT64, 64);
            if (dim.getByteSize() == 4)
                pointInfo.getChannel(idx).init(name.c_str(), LizardTech::DATATYPE_UINT32, 32);
            if (dim.getByteSize() == 2)
                pointInfo.getChannel(idx).init(name.c_str(), LizardTech::DATATYPE_UINT16, 16);
            if (dim.getByteSize() == 1)
                pointInfo.getChannel(idx).init(name.c_str(), LizardTech::DATATYPE_UINT8, 8);
        }

    }
    return 0; //bug, do error checking
}
Exemple #17
0
std::string SchemaHelper::getNewRecordWithGeometryJSON(Schema schema,  const std::string &geomFieldName, const std::string &wkb)
{
    std::stringstream resultJSON;
    resultJSON << "{\"count\":1,";
    resultJSON <<"\"columns\":[";
    for(auto itS = schema->begin(); itS != schema->end(); itS++)
    {
        Field f = *itS;

        if(itS != schema->begin())
            resultJSON  << ",";

        resultJSON << f->json;
    }
    resultJSON << "],";
    resultJSON << "\"data\":[{";
    resultJSON << "\"" << geomFieldName << "\":\"" + wkb + "\",";
    std::string amigo_id = UUIDGenerator::get();
    resultJSON << "\"amigo_id\":\"" + amigo_id + "\"}],";
    resultJSON << "\"is_new\": false}";
    return resultJSON.str();    
}
Exemple #18
0
// Tests parallel select function.
TEST(TTable, ParallelSelect) {
  TTableContext Context;

  // TODO: Change this to point to a local copy of the LiveJournal table binary.
  // char srcfile[100] = "/dfs/ilfs2/0/ringo/benchmarks/soc-LiveJournal1.table";

  Schema LJS;
  LJS.Add(TPair<TStr,TAttrType>("Src", atInt));
  LJS.Add(TPair<TStr,TAttrType>("Dst", atInt));
  TIntV RelevantCols; RelevantCols.Add(0); RelevantCols.Add(1);

  PTable T1 = TTable::LoadSS(LJS, "table/soc-LiveJournal1_small.txt", &Context, RelevantCols);

  EXPECT_EQ(499, T1->GetNumRows().Val);
  EXPECT_EQ(499, T1->GetNumValidRows().Val); 

  PTable T2 = TTable::New(T1->GetSchema(), &Context);
  T1->SelectAtomicIntConst("Src", 88, LT, T2);

  EXPECT_EQ(196, T2->GetNumRows().Val);
  EXPECT_EQ(196, T2->GetNumValidRows().Val); 
}
Exemple #19
0
void Serialize(string directoryName)
{
	// Create a schema
	Schema *schema = Schema::create(LocationIndex);
	schema->setPrimaryKey("list_id"); // integer, by default not searchable
	schema->setSearchableAttribute("title", 2); // searchable text
	schema->setSearchableAttribute("address", 7); // searchable text

    // Create an analyzer
    Analyzer *analyzer = new Analyzer(NULL, NULL, NULL, NULL, "");

    unsigned mergeEveryNSeconds = 3;
    unsigned mergeEveryMWrites = 5;
    unsigned updateHistogramEveryPMerges = 1;
    unsigned updateHistogramEveryQWrites = 5;
	CacheManager *cache = new CacheManager(134217728);
    IndexMetaData *indexMetaData = new IndexMetaData(cache,
    		mergeEveryNSeconds, mergeEveryMWrites,
    		updateHistogramEveryPMerges, updateHistogramEveryQWrites,
    		directoryName);

	Indexer *indexer = Indexer::create(indexMetaData, analyzer, schema);

	readRecordsFromFile(indexer, schema, analyzer, directoryName+"/quadtree/1K");

    boost::shared_ptr<QuadTreeRootNodeAndFreeLists> quadtree_ReadView;
    quadtree_ReadView = dynamic_cast<IndexReaderWriter *>(indexer)->getQuadTree_ReadView();
    QuadTreeNode *qt = quadtree_ReadView->root;

	// serialize the index
	indexer->commit();
	indexer->save(directoryName);

	delete indexer;
    delete indexMetaData;
	delete analyzer;
	delete schema;
}
Exemple #20
0
// Append subsets of columns in the given schemas.
Schema *Schema::AppendSchemaPtrList(
    const std::vector<Schema *> &schema_list,
    const std::vector<std::vector<oid_t>> &subsets) {
  PL_ASSERT(schema_list.size() == subsets.size());

  std::vector<Column> columns;
  for (unsigned int i = 0; i < schema_list.size(); i++) {
    Schema *schema = schema_list[i];
    const std::vector<oid_t> &subset = subsets[i];
    unsigned int column_count = schema->GetColumnCount();

    for (oid_t column_itr = 0; column_itr < column_count; column_itr++) {
      // If column exists in set.
      if (std::find(subset.begin(), subset.end(), column_itr) != subset.end()) {
        columns.push_back(schema->columns[column_itr]);
      }
    }
  }

  Schema *ret_schema = new Schema(columns);

  return ret_schema;
}
Exemple #21
0
void ReadableIndex::encodeIndexKey(const Schema& schema, byte* key, size_t keyLen) const {
	// unordered index need not to encode index key
	assert(m_isOrdered);

	// m_isIndexKeyByteLex is just a common encoding
	//
	// some index may use a custom encoding method, in this case,
	// it just ignore m_isIndexKeyByteLex
	//
	if (m_isIndexKeyByteLex) {
		assert(schema.m_canEncodeToLexByteComparable);
		schema.byteLexConvert(key, keyLen);
	}
}
Exemple #22
0
// Tests parallel join function.
TEST(TTable, ParallelJoin) {
  TTableContext Context;

  Schema LJS;
  LJS.Add(TPair<TStr,TAttrType>("Src", atInt));
  LJS.Add(TPair<TStr,TAttrType>("Dst", atInt));
  TIntV RelevantCols; RelevantCols.Add(0); RelevantCols.Add(1);

  PTable T1 = TTable::LoadSS(LJS, "table/soc-LiveJournal1_small.txt", &Context, RelevantCols);

  EXPECT_EQ(499, T1->GetNumRows().Val);
  EXPECT_EQ(499, T1->GetNumValidRows().Val); 
  
  PTable T2 = TTable::LoadSS(LJS, "table/soc-LiveJournal1_small.txt", &Context, RelevantCols);

  EXPECT_EQ(499, T2->GetNumRows().Val);
  EXPECT_EQ(499, T2->GetNumValidRows().Val); 

  PTable P = T1->Join("Src", T2, "Dst");

  EXPECT_EQ(24, P->GetNumRows().Val);
  EXPECT_EQ(24, P->GetNumValidRows().Val); 
}
Exemple #23
0
std::unique_ptr<std::vector<char>> Compression::compress(
        const char* data,
        const std::size_t size,
        const Schema& schema)
{
    CompressionStream compressionStream(size);
    pdal::LazPerfCompressor<CompressionStream> compressor(
            compressionStream,
            schema.pdalLayout().dimTypes());

    compressor.compress(data, size);
    compressor.done();

    return compressionStream.data();
}
Exemple #24
0
void test1() {
  TTableContext Context;

  // create scheme
  Schema AnimalS;
  AnimalS.Add(TPair<TStr,TAttrType>("Animal", atStr));
  AnimalS.Add(TPair<TStr,TAttrType>("Size", atStr));
  AnimalS.Add(TPair<TStr,TAttrType>("Location", atStr));
  AnimalS.Add(TPair<TStr,TAttrType>("Number", atInt));
  TIntV RelevantCols;
  RelevantCols.Add(0);
  RelevantCols.Add(1);
  RelevantCols.Add(2);
  RelevantCols.Add(3);

  PTable P = TTable::LoadSS("Animals", AnimalS, "tests/s.txt", Context, RelevantCols);
  printf("Load done\n");

  TStrV cols;
  cols.Add("Size");
  cols.Add("Number");

  struct timeval begin, end;
  gettimeofday(&begin, NULL);

  //P->Unique(cols);
  P->Group(cols, "SizeNumberGroup");

  gettimeofday(&end, NULL);

  double diff = (end.tv_sec * 1000000 + end.tv_usec) - (begin.tv_sec * 1000000 + begin.tv_usec);
  printf("Elapsed time:%.3lfs\n", diff / 1000000);

  P->SaveSS("tests/p3.txt");

}
Exemple #25
0
//---------------------------------------------------------------------------//
void 
Schema::set(const Schema &schema)
{
    bool init_children = false;
    index_t dt_id = schema.m_dtype.id();
    if (dt_id == DataType::OBJECT_ID)
    {
       init_object();
       init_children = true;

       object_map() = schema.object_map();
       object_order() = schema.object_order();
    } 
    else if (dt_id == DataType::LIST_ID)
    {
       init_list();
       init_children = true;
    }
    else 
    {
        m_dtype = schema.m_dtype;
    }

    
    if (init_children) 
    {
       std::vector<Schema*> &my_children = children();
       const std::vector<Schema*> &their_children = schema.children();
       for (index_t i = 0; i < (index_t)their_children.size(); i++) 
       {
           Schema *child_schema = new Schema(*their_children[i]);
           child_schema->m_parent = this;
           my_children.push_back(child_schema);
       }
    }
}
Exemple #26
0
bool
SchemaUtil::IndexIterator::hasMatchingOldFields(const Schema &oldSchema,
        bool phrases) const
{
    assert(isValid());
    const Schema::IndexField &newField =
        getSchema().getIndexField(getIndex());
    const vespalib::string &fieldName = newField.getName();
    uint32_t oldFieldId = oldSchema.getIndexFieldId(fieldName);
    if (oldFieldId == Schema::UNKNOWN_FIELD_ID)
        return false;
    if (phrases) {
        IndexIterator oldIterator(oldSchema, oldFieldId);
        IndexSettings settings = oldIterator.getIndexSettings();
        if (!settings.hasPhrases())
            return false;
    }
    const Schema::IndexField &oldField =
        oldSchema.getIndexField(oldFieldId);
    if (oldField.getDataType() != newField.getDataType() ||
        oldField.getCollectionType() != newField.getCollectionType())
        return false;
    return true;
}
Exemple #27
0
// Function to read in a table of edges
PTable AddEdgeTable(TTableContext& Context) {
  char FileName[200];
  int ColCnt;
  int Reverse;
  printf("Adding Edge Table\n");
  printf("Enter filename, number of columns (>= 2), and whether reverse? (reverse = 1, not reverse = 0\n");
  scanf("%s %d %d", FileName, &ColCnt, &Reverse);
  Schema EdgeScm;
  if (Reverse == 1) {
    EdgeScm.Add(TPair<TStr, TAttrType>("DstID", atStr));
    EdgeScm.Add(TPair<TStr, TAttrType>("SrcID", atStr));
  }
  else {
    EdgeScm.Add(TPair<TStr, TAttrType>("SrcID", atStr));
    EdgeScm.Add(TPair<TStr, TAttrType>("DstID", atStr));
  }
  for (TInt i = 1; i < ColCnt-1; i++) {
    TStr ColName = "Attribute" + i.GetStr();
    EdgeScm.Add(TPair<TStr, TAttrType>(ColName, atStr));
  }
  TStr FName(FileName);
  PTable T = TTable::LoadSS(EdgeScm, FName, Context);
  return T;
}
Exemple #28
0
void Filter::init(libconfig::Config& root, libconfig::Setting& cfg)
{
	MapWrapper::init(root, cfg);	//< calls Filter::mapinit below

	fieldno = cfg["field"];

	// Read column spec from input and create comparator. 
	//
	ColumnSpec cs = schema.get(fieldno);
	string tmpstr = cfg["op"];
	opstr = tmpstr;
	Comparator::Comparison compop = Comparator::parseString(opstr);
	comparator = Schema::createComparator(schema, fieldno, cs, compop);
	
	// Create dummy schema and parse input to create comparator.
	//
	const char* inputval = cfg["value"];
	Schema dummyschema;
	dummyschema.add(cs);
	dbgassert(sizeof(value) == FILTERMAXWIDTH);
	dbgassert(dummyschema.getTupleSize() <= sizeof(value));
	dbgassert(dummyschema.columns() == 1);
	dummyschema.parseTuple(value, &inputval);
}
Exemple #29
0
void addPropertiesConstraint(Schema &schema)
{

    PropertiesConstraint::PropertySchemaMap propertySchemaMap;
    PropertiesConstraint::PropertySchemaMap patternPropertiesSchemaMap;

    {
        // Create a child schema for the 'category' property that requires one
        // of several possible values.
        Schema &propertySchema = propertySchemaMap["category"];
        EnumConstraint::Values enumConstraintValues;
        enumConstraintValues.push_back(new RapidJsonFrozenValue("album"));
        enumConstraintValues.push_back(new RapidJsonFrozenValue("book"));
        enumConstraintValues.push_back(new RapidJsonFrozenValue("other"));
        enumConstraintValues.push_back(new RapidJsonFrozenValue("video"));
        propertySchema.addConstraint(new EnumConstraint(enumConstraintValues));
    }

    {
        // Create a child schema for the 'description' property that requires
        // a string, but does not enforce any length constraints.
        Schema &propertySchema = propertySchemaMap["description"];
        propertySchema.addConstraint(new TypeConstraint(TypeConstraint::kString));
    }

    {
        // Create a child schema for the 'price' property, that requires a
        // number with a value greater than zero.
        Schema &propertySchema = propertySchemaMap["price"];
        propertySchema.addConstraint(new MinimumConstraint(0.0, true));
        propertySchema.addConstraint(new TypeConstraint(TypeConstraint::kNumber));
    }

    {
        // Create a child schema for the 'title' property that requires a string
        // that is between 1 and 200 characters in length.
        Schema &propertySchema = propertySchemaMap["title"];
        propertySchema.addConstraint(new MaxLengthConstraint(200));
        propertySchema.addConstraint(new MinLengthConstraint(1));
        propertySchema.addConstraint(new TypeConstraint(TypeConstraint::kString));
    }

    // Add a PropertiesConstraint to the schema, with the properties defined
    // above, no pattern properties, and with additional property schemas
    // prohibited.
    schema.addConstraint(new PropertiesConstraint(
        propertySchemaMap, patternPropertiesSchemaMap));
}
Exemple #30
0
int main(){
  TTableContext Context;
  // Case 1: Euclidean Distance
  Schema BuildingS;
  BuildingS.Add(TPair<TStr,TAttrType>("Building", atStr));
  BuildingS.Add(TPair<TStr,TAttrType>("X", atInt));
  BuildingS.Add(TPair<TStr,TAttrType>("Y", atInt));

  // create table
  PTable TBuildings = TTable::LoadSS("Buildings", BuildingS, "tests/buildings.txt", Context, '\t', false);

	TStrV Cols;
	Cols.Add("X");
	Cols.Add("Y");

	// Find all buildings within 5 Euc Distance of each other.
	PTable BuildingJointTable = TBuildings->SelfSimJoin(Cols, "Euclidean_Distance", L2Norm, 5.0);
	BuildingJointTable->SaveSS("tests/buildings.out.txt");

  // Case2 : Haversine distance 
  Schema PlaceS;
  PlaceS.Add(TPair<TStr,TAttrType>("Name", atStr));
  PlaceS.Add(TPair<TStr,TAttrType>("Location", atStr));
  PlaceS.Add(TPair<TStr,TAttrType>("Latitude", atFlt));
  PlaceS.Add(TPair<TStr,TAttrType>("Longitude", atFlt));

  // create table
  PTable TPlaces = TTable::LoadSS("Places", PlaceS, "tests/places.txt", Context, '\t', false);

	Cols.Clr();
	Cols.Add("Latitude");
	Cols.Add("Longitude");

	PTable PlacesJointTable = TPlaces->SelfSimJoin(Cols, "Distance",Haversine, 1000.0);

	TStrV ProjectionV;
	ProjectionV.Add("Places_1.Name");
	ProjectionV.Add("Places_1.Location");	
	ProjectionV.Add("Places_2.Name");
	ProjectionV.Add("Places_2.Location");
	ProjectionV.Add("Distance");
	PlacesJointTable->ProjectInPlace(ProjectionV);
	PlacesJointTable->SelectAtomic("Places_1.Name", "Places_2.Name", NEQ);
	PlacesJointTable->SaveSS("tests/places.out.txt");

	printf("Saved buildings.out.txt and places.out.txt\n");
  return 0;
}