Beispiel #1
0
vector< int > & HashedDB::fastKNNSearch1 ( vector<int>& permutations, int beam )
{
  vector<int> sortBuffer ( nSignatures );
  vector<int> *result = new vector<int> ( beam );
  for ( int i = 0; i < nSignatures; i++ )
    sortBuffer[i] = i;
  if (signatureLength > 0)
    cerr << "before sort " << *sortBuffer.begin() << " " ;
  getSignature(*(sortBuffer.begin()+1)).print(getSignature(*(sortBuffer.begin()+1)).bitCount(), 20 );
  getSignature(60996).print(60996, 20);
  nth_element ( sortBuffer.begin(),
      sortBuffer.begin() + beam,
      sortBuffer.end(),
      (comparePermutedSignatures ( permutations, *this ) ) );

  cerr << "after sort " << *sortBuffer.begin() << " " ;
  getSignature(*(sortBuffer.begin()+1)).print(getSignature(*(sortBuffer.begin()+1)).bitCount(), 20 );

  copy ( sortBuffer.begin(), sortBuffer.begin() + beam, (*result).begin() );
  return *result;
}
Beispiel #2
0
			void unify()
			{
				#if 0
				uint64_t const uin = pc-pa;
				#endif
			
				sortBuffer();

				TripleEdge prevtrip;
				TripleEdge * po = pa;

				for ( TripleEdge const * pp = pa; pp != pc; ++pp )
				{
					TripleEdge const & T = *pp;
									
					if ( (T.a != prevtrip.a) || (T.b != prevtrip.b) )
					{
						if ( prevtrip.a != prevtrip.b )
							*(po++) = prevtrip;

						prevtrip = T;
					}
					else
					{
						prevtrip.c += T.c;
					}
				}
				if ( prevtrip.a != prevtrip.b )
					*(po++) = prevtrip;
					
				pc = po;
				
				#if 0
				uint64_t const uout = pc-pa;
				
				std::cerr << "uin=" << uin << " uout=" << uout << std::endl;			
				#endif
			}
void EliminateDuplicates(char *infile, unsigned char field, block_t *buffer, unsigned int nmem_blocks, char *outfile, unsigned int *nunique, unsigned int *nios) {

    if (nmem_blocks < 3) {
        printf("At least 3 blocks are required.");
        return;
    }

    // empties the buffer
    emptyBuffer(buffer, nmem_blocks);
    uint memSize = nmem_blocks - 1;
    *nunique = 0;
    *nios = 0;

    uint fileSize = getSize(infile);

    // if the relation fits on the buffer and leaves one block free for output,
    // loads it to the buffer and eliminates duplicates using hashing
    if (fileSize <= memSize) {
        hashElimination(infile, fileSize, outfile, field, buffer, memSize, nunique, nios);
    } else if (fileSize == nmem_blocks) {
        // if the relation completely fits the buffer, calls useFirstBlock
        useFirstBlock(infile, outfile, field, buffer, nmem_blocks, nunique, nios);
    } else {
        // if the relation is larger than the buffer, then sort it using mergesort,
        // BUT during the final merging (during last pass) write to the output
        // only one time each value

        // the following code is similar to that of MergeSort:

        int input, output;
        char tmpFile1[] = ".ed1";
        char tmpFile2[] = ".ed2";

        uint fullSegments = fileSize / nmem_blocks;
        uint remainingSegment = fileSize % nmem_blocks;

        input = open(infile, O_RDONLY, S_IRWXU);
        output = open(tmpFile1, O_WRONLY | O_CREAT | O_TRUNC, S_IRWXU);

        uint nSortedSegs = 0;
        uint segmentSize = nmem_blocks;
        for (uint i = 0; i <= fullSegments; i++) {
            if (fullSegments == i) {
                if (remainingSegment != 0) {
                    segmentSize = remainingSegment;
                } else {
                    break;
                }
            }
            (*nios) += readBlocks(input, buffer, segmentSize);
            if (sortBuffer(buffer, segmentSize, field)) {
                (*nios) += writeBlocks(output, buffer, segmentSize);
                nSortedSegs += 1;
            }
        }
        close(input);
        close(output);

        segmentSize = nmem_blocks;
        uint lastSegmentSize;
        if (remainingSegment == 0) {
            lastSegmentSize = nmem_blocks;
        } else {
            lastSegmentSize = remainingSegment;
        }

        buffer[memSize].valid = true;
        while (nSortedSegs != 1) {
            input = open(tmpFile1, O_RDONLY, S_IRWXU);
            output = open(tmpFile2, O_WRONLY | O_CREAT | O_TRUNC, S_IRWXU);

            uint newSortedSegs = 0;
            uint fullMerges = nSortedSegs / memSize;
            uint lastMergeSegs = nSortedSegs % memSize;
            uint *blocksLeft = (uint*) malloc(memSize * sizeof (uint));
            uint segsToMerge = memSize;
            bool lastMerge = false;

            for (uint mergeCounter = 0; mergeCounter <= fullMerges; mergeCounter++) {
                uint firstSegOffset = mergeCounter * memSize * segmentSize;

                if (mergeCounter == fullMerges - 1 && lastMergeSegs == 0) {
                    lastMerge = true;
                } else if (mergeCounter == fullMerges) {
                    if (lastMergeSegs != 0) {
                        segsToMerge = lastMergeSegs;
                        lastMerge = true;
                    } else {
                        break;
                    }
                }

                for (uint i = 0; i < segsToMerge; i++) {
                    (*nios) += preadBlocks(input, buffer + i, (firstSegOffset + i * segmentSize), 1);
                    blocksLeft[i] = segmentSize - 1;
                }

                if (lastMerge) {
                    blocksLeft[segsToMerge - 1] = lastSegmentSize - 1;
                }

                (*nios) += mergeElimination(input, output, buffer, memSize, segsToMerge, blocksLeft, segmentSize, firstSegOffset, field, nSortedSegs <= memSize, lastMerge, nunique);
                newSortedSegs += 1;
            }
            free(blocksLeft);

            if (lastMergeSegs == 0) {
                lastSegmentSize = (memSize - 1) * segmentSize + lastSegmentSize;
            } else {
                lastSegmentSize = (lastMergeSegs - 1) * segmentSize + lastSegmentSize;
            }
            segmentSize *= memSize;
            nSortedSegs = newSortedSegs;
            close(input);
            close(output);

            char tmp = tmpFile1[3];
            tmpFile1[3] = tmpFile2[3];
            tmpFile2[3] = tmp;
        }
        rename(tmpFile1, outfile);
        remove(tmpFile2);
    }
}
/*
 * infile: filename of the input file
 * outfile: filename of the output file
 * field: which field will be used for sorting
 * buffer: the buffer used
 * nmem_blocks: size of buffer
 * nunique: number of unique values
 * nios: number of ios
 * 
 * when the input file size is equal to buffer, the whole file is loaded and
 * sorted. then the first block is used as output where only unique values are
 * written
 */
void useFirstBlock(char *infile, char *outfile, unsigned char field, block_t *buffer, uint nmem_blocks, uint *nunique, uint *nios) {
    int out = open(outfile, O_WRONLY | O_CREAT | O_TRUNC, S_IRWXU);
    (*nios) += readBlocks(infile, buffer, nmem_blocks);
    if (sortBuffer(buffer, nmem_blocks, field)) {
        // all the unique values of the first block are shifted to the start
        // of it. the rest are marked as invalid
        recordPtr i = newPtr(1);
        recordPtr j = newPtr(1);
        (*nunique) += 1;
        buffer[0].nreserved = 1;
        for (; j.block < 1; incr(j)) {
            record_t record = getRecord(buffer, j);
            if (record.valid && compareRecords(record, getRecord(buffer, i - 1), field) != 0) {
                setRecord(buffer, record, i);
                (*nunique) += 1;
                incr(i);
                buffer[0].nreserved += 1;
            }
        }

        j = newPtr(i, 0);
        for (; j.block < 1; incr(j)) {
            buffer[j.block].entries[j.record].valid = false;
        }

        record_t *lastRecordAdded = (record_t*) malloc(sizeof (record_t));
        record_t lastUnique = getRecord(buffer, i - 1);
        memcpy(lastRecordAdded, &lastUnique, sizeof (record_t));
        // if the first block is full after the shifting (meaning that all its
        // values were actually unique), writes it to the outfile and empties it
        if (buffer[0].nreserved == MAX_RECORDS_PER_BLOCK) {
            i.block -= 1;
            (*nios) += writeBlocks(out, buffer, 1);
            emptyBlock(buffer);
            buffer[0].blockid += 1;
        }

        // write the unique values of the other blocks to the first one. if it
        // becomes full writes it to outfile and empties it. at the end, if it
        // has records not writtend yet, writes them to the outfile as well.
        j = newPtr(MAX_RECORDS_PER_BLOCK);
        while (buffer[j.block].valid && j.block < nmem_blocks) {
            record_t record = getRecord(buffer, j);
            if (!record.valid) {
                break;
            }
            if (compareRecords(record, (*lastRecordAdded), field) != 0) {
                setRecord(buffer, record, i);
                memcpy(lastRecordAdded, &record, sizeof (record_t));
                (*nunique) += 1;
                incr(i);
                buffer[0].nreserved += 1;
            }
            if (buffer[0].nreserved == MAX_RECORDS_PER_BLOCK) {
                i.block -= 1;
                (*nios) += writeBlocks(out, buffer, 1);
                emptyBlock(buffer);
                buffer[0].blockid += 1;
            }
            incr(j);
        }
        if (buffer[0].nreserved != 0) {
            (*nios) += writeBlocks(out, buffer, 1);
        }
        free(lastRecordAdded);
    }
    close(out);
}
void AdlPrimitivesDemo::test( Buffer<int2>& buf, int size, Stopwatch& sw )
{
	Kernel* kernel = KernelManager::query( m_deviceData, "..\\..\\AdlDemos\\TestBed\\Demos\\AdlPrimitivesDemoKernel", "FillInt4Kernel" );
	Buffer<int4> constBuffer( m_deviceData, 1, BufferBase::BUFFER_CONST );


	int numGroups = (size+128*4-1)/(128*4);
	Buffer<u32> workBuffer0( m_deviceData, numGroups*(16) );
	Buffer<u32> workBuffer1( m_deviceData, numGroups*(16) );

	Buffer<int2> sortBuffer( m_deviceData, size );
	{
		int2* host = new int2[size];
		for(int i=0; i<size; i++)
		{
			host[i] = make_int2( getRandom(0, 0xf), i );
		}
		sortBuffer.write( host, size );
		DeviceUtils::waitForCompletion( m_deviceData );
		delete [] host;
	}

	int4 constData;
	{
		constData.x = size;
		constData.y = 0;
		constData.z = numGroups;
		constData.w = 0;
	}

	sw.start();

	int nThreads = size/4;
	{
		BufferInfo bInfo[] = { BufferInfo( &buf ), BufferInfo( &workBuffer0 ), BufferInfo( &workBuffer1 ) };
		Launcher launcher( m_deviceData, kernel );
		launcher.setBuffers( bInfo, sizeof(bInfo)/sizeof(Launcher::BufferInfo) );
		launcher.setConst( constBuffer, constData );
		launcher.launch1D( nThreads, 128 );
	}

	sw.split();

	{
		constData.w = 1;
		int nThreads = size/4;
		BufferInfo bInfo[] = { BufferInfo( &buf ), BufferInfo( &workBuffer0 ), BufferInfo( &workBuffer1 ) };
		Launcher launcher( m_deviceData, kernel );
		launcher.setBuffers( bInfo, sizeof(bInfo)/sizeof(Launcher::BufferInfo) );
		launcher.setConst( constBuffer, constData );
		launcher.launch1D( nThreads, 128 );
	}

	sw.split();

	{
		constData.w = 2;
		int nThreads = size/4;
		BufferInfo bInfo[] = { BufferInfo( &sortBuffer ), BufferInfo( &workBuffer0 ), BufferInfo( &workBuffer1 ) };
		Launcher launcher( m_deviceData, kernel );
		launcher.setBuffers( bInfo, sizeof(bInfo)/sizeof(Launcher::BufferInfo) );
		launcher.setConst( constBuffer, constData );
		launcher.launch1D( nThreads, 128 );
	}

	sw.stop();

	{
		int2* host = new int2[size];
		buf.read( host, size );
		DeviceUtils::waitForCompletion( m_deviceData );

		for(int i=0; i<128*4-1; i++)
		{
			ADLASSERT( host[i].x <= host[i+1].x );
		}

		delete [] host;
	}

	{
		float t[3];
		sw.getMs(t, 3);
		//	(byte * nElems)
		sprintf_s(m_txtBuffer[m_nTxtLines++], LINE_CAPACITY, "LoadStore: %3.2fGB/s (%3.2fns)", (4*8*2)*nThreads/t[0]/1000/1000, t[0]*1000.f);		
		sprintf_s(m_txtBuffer[m_nTxtLines++], LINE_CAPACITY, "GenHistog: %3.2fGB/s (%3.2fns)", (4*(8*2+2))*nThreads/t[1]/1000/1000, t[1]*1000.f);		
		sprintf_s(m_txtBuffer[m_nTxtLines++], LINE_CAPACITY, "FullSort: %3.2fGB/s (%3.2fns)", (4*(8*2+2))*nThreads/t[2]/1000/1000, t[2]*1000.f);		
	}
}