vector< int > & HashedDB::fastKNNSearch1 ( vector<int>& permutations, int beam ) { vector<int> sortBuffer ( nSignatures ); vector<int> *result = new vector<int> ( beam ); for ( int i = 0; i < nSignatures; i++ ) sortBuffer[i] = i; if (signatureLength > 0) cerr << "before sort " << *sortBuffer.begin() << " " ; getSignature(*(sortBuffer.begin()+1)).print(getSignature(*(sortBuffer.begin()+1)).bitCount(), 20 ); getSignature(60996).print(60996, 20); nth_element ( sortBuffer.begin(), sortBuffer.begin() + beam, sortBuffer.end(), (comparePermutedSignatures ( permutations, *this ) ) ); cerr << "after sort " << *sortBuffer.begin() << " " ; getSignature(*(sortBuffer.begin()+1)).print(getSignature(*(sortBuffer.begin()+1)).bitCount(), 20 ); copy ( sortBuffer.begin(), sortBuffer.begin() + beam, (*result).begin() ); return *result; }
void unify() { #if 0 uint64_t const uin = pc-pa; #endif sortBuffer(); TripleEdge prevtrip; TripleEdge * po = pa; for ( TripleEdge const * pp = pa; pp != pc; ++pp ) { TripleEdge const & T = *pp; if ( (T.a != prevtrip.a) || (T.b != prevtrip.b) ) { if ( prevtrip.a != prevtrip.b ) *(po++) = prevtrip; prevtrip = T; } else { prevtrip.c += T.c; } } if ( prevtrip.a != prevtrip.b ) *(po++) = prevtrip; pc = po; #if 0 uint64_t const uout = pc-pa; std::cerr << "uin=" << uin << " uout=" << uout << std::endl; #endif }
void EliminateDuplicates(char *infile, unsigned char field, block_t *buffer, unsigned int nmem_blocks, char *outfile, unsigned int *nunique, unsigned int *nios) { if (nmem_blocks < 3) { printf("At least 3 blocks are required."); return; } // empties the buffer emptyBuffer(buffer, nmem_blocks); uint memSize = nmem_blocks - 1; *nunique = 0; *nios = 0; uint fileSize = getSize(infile); // if the relation fits on the buffer and leaves one block free for output, // loads it to the buffer and eliminates duplicates using hashing if (fileSize <= memSize) { hashElimination(infile, fileSize, outfile, field, buffer, memSize, nunique, nios); } else if (fileSize == nmem_blocks) { // if the relation completely fits the buffer, calls useFirstBlock useFirstBlock(infile, outfile, field, buffer, nmem_blocks, nunique, nios); } else { // if the relation is larger than the buffer, then sort it using mergesort, // BUT during the final merging (during last pass) write to the output // only one time each value // the following code is similar to that of MergeSort: int input, output; char tmpFile1[] = ".ed1"; char tmpFile2[] = ".ed2"; uint fullSegments = fileSize / nmem_blocks; uint remainingSegment = fileSize % nmem_blocks; input = open(infile, O_RDONLY, S_IRWXU); output = open(tmpFile1, O_WRONLY | O_CREAT | O_TRUNC, S_IRWXU); uint nSortedSegs = 0; uint segmentSize = nmem_blocks; for (uint i = 0; i <= fullSegments; i++) { if (fullSegments == i) { if (remainingSegment != 0) { segmentSize = remainingSegment; } else { break; } } (*nios) += readBlocks(input, buffer, segmentSize); if (sortBuffer(buffer, segmentSize, field)) { (*nios) += writeBlocks(output, buffer, segmentSize); nSortedSegs += 1; } } close(input); close(output); segmentSize = nmem_blocks; uint lastSegmentSize; if (remainingSegment == 0) { lastSegmentSize = nmem_blocks; } else { lastSegmentSize = remainingSegment; } buffer[memSize].valid = true; while (nSortedSegs != 1) { input = open(tmpFile1, O_RDONLY, S_IRWXU); output = open(tmpFile2, O_WRONLY | O_CREAT | O_TRUNC, S_IRWXU); uint newSortedSegs = 0; uint fullMerges = nSortedSegs / memSize; uint lastMergeSegs = nSortedSegs % memSize; uint *blocksLeft = (uint*) malloc(memSize * sizeof (uint)); uint segsToMerge = memSize; bool lastMerge = false; for (uint mergeCounter = 0; mergeCounter <= fullMerges; mergeCounter++) { uint firstSegOffset = mergeCounter * memSize * segmentSize; if (mergeCounter == fullMerges - 1 && lastMergeSegs == 0) { lastMerge = true; } else if (mergeCounter == fullMerges) { if (lastMergeSegs != 0) { segsToMerge = lastMergeSegs; lastMerge = true; } else { break; } } for (uint i = 0; i < segsToMerge; i++) { (*nios) += preadBlocks(input, buffer + i, (firstSegOffset + i * segmentSize), 1); blocksLeft[i] = segmentSize - 1; } if (lastMerge) { blocksLeft[segsToMerge - 1] = lastSegmentSize - 1; } (*nios) += mergeElimination(input, output, buffer, memSize, segsToMerge, blocksLeft, segmentSize, firstSegOffset, field, nSortedSegs <= memSize, lastMerge, nunique); newSortedSegs += 1; } free(blocksLeft); if (lastMergeSegs == 0) { lastSegmentSize = (memSize - 1) * segmentSize + lastSegmentSize; } else { lastSegmentSize = (lastMergeSegs - 1) * segmentSize + lastSegmentSize; } segmentSize *= memSize; nSortedSegs = newSortedSegs; close(input); close(output); char tmp = tmpFile1[3]; tmpFile1[3] = tmpFile2[3]; tmpFile2[3] = tmp; } rename(tmpFile1, outfile); remove(tmpFile2); } }
/* * infile: filename of the input file * outfile: filename of the output file * field: which field will be used for sorting * buffer: the buffer used * nmem_blocks: size of buffer * nunique: number of unique values * nios: number of ios * * when the input file size is equal to buffer, the whole file is loaded and * sorted. then the first block is used as output where only unique values are * written */ void useFirstBlock(char *infile, char *outfile, unsigned char field, block_t *buffer, uint nmem_blocks, uint *nunique, uint *nios) { int out = open(outfile, O_WRONLY | O_CREAT | O_TRUNC, S_IRWXU); (*nios) += readBlocks(infile, buffer, nmem_blocks); if (sortBuffer(buffer, nmem_blocks, field)) { // all the unique values of the first block are shifted to the start // of it. the rest are marked as invalid recordPtr i = newPtr(1); recordPtr j = newPtr(1); (*nunique) += 1; buffer[0].nreserved = 1; for (; j.block < 1; incr(j)) { record_t record = getRecord(buffer, j); if (record.valid && compareRecords(record, getRecord(buffer, i - 1), field) != 0) { setRecord(buffer, record, i); (*nunique) += 1; incr(i); buffer[0].nreserved += 1; } } j = newPtr(i, 0); for (; j.block < 1; incr(j)) { buffer[j.block].entries[j.record].valid = false; } record_t *lastRecordAdded = (record_t*) malloc(sizeof (record_t)); record_t lastUnique = getRecord(buffer, i - 1); memcpy(lastRecordAdded, &lastUnique, sizeof (record_t)); // if the first block is full after the shifting (meaning that all its // values were actually unique), writes it to the outfile and empties it if (buffer[0].nreserved == MAX_RECORDS_PER_BLOCK) { i.block -= 1; (*nios) += writeBlocks(out, buffer, 1); emptyBlock(buffer); buffer[0].blockid += 1; } // write the unique values of the other blocks to the first one. if it // becomes full writes it to outfile and empties it. at the end, if it // has records not writtend yet, writes them to the outfile as well. j = newPtr(MAX_RECORDS_PER_BLOCK); while (buffer[j.block].valid && j.block < nmem_blocks) { record_t record = getRecord(buffer, j); if (!record.valid) { break; } if (compareRecords(record, (*lastRecordAdded), field) != 0) { setRecord(buffer, record, i); memcpy(lastRecordAdded, &record, sizeof (record_t)); (*nunique) += 1; incr(i); buffer[0].nreserved += 1; } if (buffer[0].nreserved == MAX_RECORDS_PER_BLOCK) { i.block -= 1; (*nios) += writeBlocks(out, buffer, 1); emptyBlock(buffer); buffer[0].blockid += 1; } incr(j); } if (buffer[0].nreserved != 0) { (*nios) += writeBlocks(out, buffer, 1); } free(lastRecordAdded); } close(out); }
void AdlPrimitivesDemo::test( Buffer<int2>& buf, int size, Stopwatch& sw ) { Kernel* kernel = KernelManager::query( m_deviceData, "..\\..\\AdlDemos\\TestBed\\Demos\\AdlPrimitivesDemoKernel", "FillInt4Kernel" ); Buffer<int4> constBuffer( m_deviceData, 1, BufferBase::BUFFER_CONST ); int numGroups = (size+128*4-1)/(128*4); Buffer<u32> workBuffer0( m_deviceData, numGroups*(16) ); Buffer<u32> workBuffer1( m_deviceData, numGroups*(16) ); Buffer<int2> sortBuffer( m_deviceData, size ); { int2* host = new int2[size]; for(int i=0; i<size; i++) { host[i] = make_int2( getRandom(0, 0xf), i ); } sortBuffer.write( host, size ); DeviceUtils::waitForCompletion( m_deviceData ); delete [] host; } int4 constData; { constData.x = size; constData.y = 0; constData.z = numGroups; constData.w = 0; } sw.start(); int nThreads = size/4; { BufferInfo bInfo[] = { BufferInfo( &buf ), BufferInfo( &workBuffer0 ), BufferInfo( &workBuffer1 ) }; Launcher launcher( m_deviceData, kernel ); launcher.setBuffers( bInfo, sizeof(bInfo)/sizeof(Launcher::BufferInfo) ); launcher.setConst( constBuffer, constData ); launcher.launch1D( nThreads, 128 ); } sw.split(); { constData.w = 1; int nThreads = size/4; BufferInfo bInfo[] = { BufferInfo( &buf ), BufferInfo( &workBuffer0 ), BufferInfo( &workBuffer1 ) }; Launcher launcher( m_deviceData, kernel ); launcher.setBuffers( bInfo, sizeof(bInfo)/sizeof(Launcher::BufferInfo) ); launcher.setConst( constBuffer, constData ); launcher.launch1D( nThreads, 128 ); } sw.split(); { constData.w = 2; int nThreads = size/4; BufferInfo bInfo[] = { BufferInfo( &sortBuffer ), BufferInfo( &workBuffer0 ), BufferInfo( &workBuffer1 ) }; Launcher launcher( m_deviceData, kernel ); launcher.setBuffers( bInfo, sizeof(bInfo)/sizeof(Launcher::BufferInfo) ); launcher.setConst( constBuffer, constData ); launcher.launch1D( nThreads, 128 ); } sw.stop(); { int2* host = new int2[size]; buf.read( host, size ); DeviceUtils::waitForCompletion( m_deviceData ); for(int i=0; i<128*4-1; i++) { ADLASSERT( host[i].x <= host[i+1].x ); } delete [] host; } { float t[3]; sw.getMs(t, 3); // (byte * nElems) sprintf_s(m_txtBuffer[m_nTxtLines++], LINE_CAPACITY, "LoadStore: %3.2fGB/s (%3.2fns)", (4*8*2)*nThreads/t[0]/1000/1000, t[0]*1000.f); sprintf_s(m_txtBuffer[m_nTxtLines++], LINE_CAPACITY, "GenHistog: %3.2fGB/s (%3.2fns)", (4*(8*2+2))*nThreads/t[1]/1000/1000, t[1]*1000.f); sprintf_s(m_txtBuffer[m_nTxtLines++], LINE_CAPACITY, "FullSort: %3.2fGB/s (%3.2fns)", (4*(8*2+2))*nThreads/t[2]/1000/1000, t[2]*1000.f); } }