//---------------------------------------------------------------------------
void dumpFacts(ofstream& out,TempFile& rawFacts,const string& name)
// Dump the facts
{
    // Sort the facts
    TempFile sortedFacts(rawFacts.getBaseFile());
    Sorter::sort(rawFacts,sortedFacts,skipTriple,compareTriple,true);

    // Dump the facts
    {
        unlink("facts.sql");
        ofstream out("facts.sql");
        MemoryMappedFile in;
        in.open(sortedFacts.getFile().c_str());
        const Triple* triplesBegin=reinterpret_cast<const Triple*>(in.getBegin());
        const Triple* triplesEnd=reinterpret_cast<const Triple*>(in.getEnd());
        for (const Triple* iter=triplesBegin,*limit=triplesEnd; iter!=limit; ++iter)
            out << (*iter).subject << "\t" << (*iter).predicate << "\t" << (*iter).object << std::endl;
    }

    // And write the copy statement
    out << "drop schema if exists " << name << " cascade;" << endl;
    out << "create schema " << name << ";" << endl;
    out << "create table " << name << ".facts(subject int not null, predicate int not null, object int not null);" << endl;
    out << "copy " << name << ".facts from 'facts.sql';" << endl;

    // Create indices
    out << "create index facts_spo on " << name << ".facts (subject, predicate, object);" << endl;
    out << "create index facts_pso on " << name << ".facts (predicate, subject, object);" << endl;
    out << "create index facts_pos on " << name << ".facts (predicate, object, subject);" << endl;
}
示例#2
0
//---------------------------------------------------------------------------
int main(int argc,char* argv[])
{
   // Check the arguments
   if (argc<2) {
      cerr <<  "usage: " << argv[0] << " <tmp file> " << endl;
      return 1;
   }

   // Parse the input

   {
   ofstream out("yago_aftertest.txt");

   MemoryMappedFile in;
   const char* iter,*limit;

    ensure(in.open(argv[1])); iter=in.getBegin(); limit=in.getEnd();
   while (iter != limit){
   	uint64_t i1, i2, i3;
   	iter = TempFile::readId(iter, i1);
   	iter = TempFile::readId(iter, i2);
   	iter = TempFile::readId(iter, i3);
   	unsigned node = i1;

   	iter = TempFile::readId(iter, i1);
   	iter = TempFile::readId(iter, i2);
   	iter = TempFile::readId(iter, i3);
   	unsigned dir = i2;

   	iter = TempFile::readId(iter, i1);
   	iter = TempFile::readId(iter, i2);
   	iter = TempFile::readId(iter, i3);
   	unsigned selectivity = i3;

   	iter = TempFile::readId(iter, i1);
   	iter = TempFile::readId(iter, i2);
   	iter = TempFile::readId(iter, i3);

   	out<<node<<" "<<dir<<" "<<selectivity<<endl;
   }
   }
   char a = 1000000;
   cerr<<static_cast<unsigned>(a)<<endl;

   cout << "Done." << endl;
}
示例#3
0
Status TripleBitBuilder::resolveTriples(TempFile& rawFacts, TempFile& facts) {
	cerr<<"Sort by Subject"<<endl;
	ID subjectID, objectID, predicateID;

	ID lastSubject = 0, lastObject = 0, lastPredicate = 0;
	unsigned count0 = 0, count1 = 0;
	TempFile sortedBySubject("./SortByS"), sortedByObject("./SortByO");
	Sorter::sort(rawFacts, sortedBySubject, skipIdIdId, compare123);
	{
		//insert into chunk
		sortedBySubject.close();
		MemoryMappedFile mappedIn;
		assert(mappedIn.open(sortedBySubject.getFile().c_str()));
		const char* reader = mappedIn.getBegin(), *limit = mappedIn.getEnd();

		loadTriple(reader, subjectID, predicateID, objectID);
		lastSubject = subjectID; lastPredicate = predicateID; lastObject = objectID;
		reader = skipIdIdId(reader);
		bool v = generateXY(subjectID, objectID);
		bitmap->insertTriple(predicateID, subjectID, objectID, v, 0);
		count0 = count1 = 1;
		
		while (reader < limit) {
			loadTriple(reader, subjectID, predicateID, objectID);
			if(lastSubject == subjectID && lastPredicate == predicateID && lastObject == objectID) {
				reader = skipIdIdId(reader);
				continue;
			}

			if ( subjectID != lastSubject ) {
				((OneConstantStatisticsBuffer*)statBuffer[0])->addStatis(lastSubject, count0);
				statBuffer[2]->addStatis(lastSubject, lastPredicate, count1);
				lastPredicate = predicateID;
				lastSubject = subjectID;
				lastObject = objectID;
				count0 = count1 = 1;
			} else if ( predicateID != lastPredicate ) {
				statBuffer[2]->addStatis(lastSubject, lastPredicate, count1);
				lastPredicate = predicateID;
				lastObject = objectID;
				count0++; count1 = 1;
			}else {
				count0++; count1++;
				lastObject = objectID;
			}
			
			reader = reader + 12;
			v = generateXY(subjectID, objectID);
			//0 indicate the triple is sorted by subjects' id;
			bitmap->insertTriple(predicateID, subjectID, objectID, v, 0);
		}
		mappedIn.close();
	}

	bitmap->flush();
	((OneConstantStatisticsBuffer*)statBuffer[0])->flush();
	((TwoConstantStatisticsBuffer*)statBuffer[2])->flush();

	//sort
	cerr << "Sort by Object" << endl;
	Sorter::sort(rawFacts, sortedByObject, skipIdIdId, compare321);
	{
		//insert into chunk
		sortedByObject.close();
		MemoryMappedFile mappedIn;
		assert(mappedIn.open(sortedByObject.getFile().c_str()));
		const char* reader = mappedIn.getBegin(), *limit = mappedIn.getEnd();

		loadTriple(reader, subjectID, predicateID, objectID);
		lastSubject = subjectID; lastPredicate = predicateID; lastObject = objectID;
		reader = skipIdIdId(reader);
		bool v = generateXY(objectID, subjectID);
		bitmap->insertTriple(predicateID, objectID, subjectID, v, 1);
		count0 = count1 = 1;

		while (reader < limit) {
			loadTriple(reader, subjectID, predicateID, objectID);
			 if(lastSubject == subjectID && lastPredicate == predicateID && lastObject == objectID) {
				reader = skipIdIdId(reader);
				continue;
			}

			if ( objectID != lastObject ) {
				((OneConstantStatisticsBuffer*)statBuffer[1])->addStatis(lastObject, count0);
				statBuffer[3]->addStatis(lastObject, lastPredicate, count1);
				lastPredicate = predicateID;
				lastObject = objectID;
				lastSubject = subjectID;
				count0 = count1 = 1;
			} else if ( predicateID != lastPredicate ) {
				statBuffer[3]->addStatis(lastObject, lastPredicate, count1);
				lastPredicate = predicateID;
				lastSubject = subjectID;
				count0++; count1 = 1;
			} else {
				lastSubject = subjectID;
				count0++; count1++;
			}
			reader = skipIdIdId(reader);
			v = generateXY(objectID, subjectID);
			// 1 indicate the triple is sorted by objects' id;
			bitmap->insertTriple(predicateID, objectID, subjectID, v, 1);
		}
		mappedIn.close();
	}

	bitmap->flush();
	((OneConstantStatisticsBuffer*)statBuffer[1])->flush();
	((TwoConstantStatisticsBuffer*)statBuffer[3])->flush();
	rawFacts.discard();
	sortedByObject.discard();
	sortedBySubject.discard();

	return OK;
}
//---------------------------------------------------------------------------
static void buildDictionary(TempFile& rawStrings,TempFile& stringTable,TempFile& stringIds,map<unsigned,unsigned>& subTypes)
   // Build the dictionary
{
   cerr << "Building the dictionary..." << endl;

   // Sort the strings to resolve duplicates
   TempFile sortedStrings(rawStrings.getBaseFile());
   Sorter::sort(rawStrings,sortedStrings,skipStringIdId,compareStringIdId);
   rawStrings.discard();

   // Build the id map and the string list
   TempFile rawIdMap(rawStrings.getBaseFile()),stringList(rawStrings.getBaseFile());
   {
      MemoryMappedFile strings;
      ensure(strings.open(sortedStrings.getFile().c_str()));
      uint64_t lastId=0; unsigned lastLen=0; const char* lastStr=0; uint64_t lastType=0;
      for (const char* iter=strings.getBegin(),*limit=strings.getEnd();iter!=limit;) {
         // Read the entry
         unsigned stringLen; const char* stringStart;
         iter=TempFile::readString(iter,stringLen,stringStart);
         uint64_t id,type;
         iter=TempFile::readId(iter,type);
         iter=TempFile::readId(iter,id);

         // A new one?
         if ((!lastStr)||(stringLen!=lastLen)||(memcmp(lastStr,stringStart,stringLen)!=0)||(type!=lastType)) {
            stringList.writeId(id);
            stringList.writeString(stringLen,stringStart);
            stringList.writeId(type);
            rawIdMap.writeId(id);
            rawIdMap.writeId(id);
            lastId=id; lastLen=stringLen; lastStr=stringStart; lastType=type;
         } else {
            rawIdMap.writeId(lastId);
            rawIdMap.writeId(id);
         }
      }
   }
   sortedStrings.discard();

   // Sort the string list
   Sorter::sort(stringList,stringTable,skipIdStringId,compareId);
   stringList.discard();

   // Sort the ID map
   TempFile idMap(rawStrings.getBaseFile());
   Sorter::sort(rawIdMap,idMap,skipIdId,compareId);
   rawIdMap.discard();

   // Construct new ids
   TempFile newIds(rawStrings.getBaseFile());
   {
      MemoryMappedFile in;
      ensure(in.open(idMap.getFile().c_str()));
      uint64_t lastId=0,newId=0;
      for (const char* iter=in.getBegin(),*limit=in.getEnd();iter!=limit;) {
         uint64_t firstId,currentId;
         iter=TempFile::readId(iter,firstId);
         iter=TempFile::readId(iter,currentId);
         if (firstId!=lastId) {
            ++newId;
            lastId=firstId;
         }
         newIds.writeId(currentId);
         newIds.writeId(newId);
         if (subTypes.count(currentId))
            subTypes[currentId]=newId;
      }
   }

   // And a final sort
   Sorter::sort(newIds,stringIds,skipIdId,compareValue);
   newIds.discard();

   // Resolve the subtypes if necessary
   if (!subTypes.empty()) {
      TempFile fixedTypes(rawStrings.getBaseFile());
      MemoryMappedFile in;
      ensure(in.open(stringTable.getFile().c_str()));
      for (const char* iter=in.getBegin(),*limit=in.getEnd();iter!=limit;) {
         uint64_t id,typeInfo;
         const char* value; unsigned valueLen;
         iter=TempFile::readId(TempFile::readString(TempFile::readId(iter,id),valueLen,value),typeInfo);
         unsigned type=typeInfo&0xFF,subType=(typeInfo>>8);
         if (Type::hasSubType(static_cast<Type::ID>(type))) {
            assert(subTypes.count(subType));
            typeInfo=type|(subTypes[subType]<<8);
         } else {
            assert(subType==0);
         }
         fixedTypes.writeId(id);
         fixedTypes.writeString(valueLen,value);
         fixedTypes.writeId(typeInfo);
      }

      fixedTypes.close();
      fixedTypes.swap(stringTable);
   }