Ejemplo n.º 1
0
static int compute_batches(struct xdf* xdf, int assign)
{
	struct data_batch curr, *currb;
	unsigned int nbatch = 1, iarr, foff, dlen;
	const struct xdfch* ch;

	currb = assign ? xdf->batch : &curr;
	reset_batch(currb, 0, 0);

	for (iarr=0; iarr < xdf->narrays; iarr++) {
		foff = 0;
		
		// Scan channels in order to find different batches
		for (ch=xdf->channels; ch; ch=ch->next) {
			if (ch->iarray < 0)
				continue;
			dlen = xdf_get_datasize(ch->inmemtype);

			// Consistency checks
			if ((unsigned int)ch->iarray > xdf->narrays
			    || ch->offset + dlen > xdf->array_stride[ch->iarray])
				return -1;

			// Linearize the processing of channel sourcing
			// the same input array
			if ((iarr == (unsigned int)ch->iarray)
			   && !add_to_batch(currb, ch, foff)) {
				nbatch++;
				if (assign)
					currb++;
				reset_batch(currb, iarr, foff);
				add_to_batch(currb, ch, foff);
			}
			foff += dlen;
		}
	}
	if (assign)
		link_batches(xdf, nbatch);

	return nbatch;
}
Ejemplo n.º 2
0
int
main()
{
	int i;
	struct timeval start, stop;
	FILE *fd;
	char *key;

	cudaSetDevice(0);

	/* Allocate memory */
	if ((key = (char *)malloc(40 * sizeof(char))) == NULL) {
		printf("Malloc failed!\n");
		exit(EXIT_FAILURE);
	}

	cudaMallocHost((void **) &batchKeys,
	    ((BATCH_SIZE + 1) * MAX_LEN_ALIGNED) * sizeof(char));
	cudaMallocHost((void **) &nKeys, BATCH_SIZE * sizeof(size_t));
	cudaMallocHost((void **) &batchIndex, (BATCH_SIZE + 1) * sizeof(int));
	cudaMallocHost((void **) &hashedKeys, BATCH_SIZE * sizeof(uint32_t));

	cudaMalloc((void **) &d_keys,
	    ((BATCH_SIZE + 1) * MAX_LEN_ALIGNED) * sizeof(char));
        cudaMalloc((void **) &d_len, BATCH_SIZE * sizeof(size_t));
        cudaMalloc((void **) &d_index, (BATCH_SIZE + 1) * sizeof(int));
        cudaMalloc((void **) &d_res, BATCH_SIZE * sizeof(uint32_t));

	/* Create 'BATCH_SIZE' number of random keys 
	 * and add them to batch table
	 */
	batchNo = 0;
        batchIndex[0] = 0;
	for(i = 0; i < BATCH_SIZE; i++) { 
		gen_random(key, 30);
		add_to_batch(key, 30);
	}

	/* Start Time (execution + memory) */
#ifdef EXEC_MEM
	gettimeofday(&start, NULL);
#endif // EXEC_MEM
	
	/* MemCpy Host -> Device */
	cudaMemcpy(d_keys, batchKeys, (batchIndex[BATCH_SIZE-1] +
	    strlen(&batchKeys[batchIndex[BATCH_SIZE - 1]])) * sizeof(char),
	    cudaMemcpyHostToDevice);
        cudaMemcpy(d_len, nKeys, BATCH_SIZE * sizeof(size_t),
	    cudaMemcpyHostToDevice);
        cudaMemcpy(d_index, batchIndex, BATCH_SIZE * sizeof(int),
	    cudaMemcpyHostToDevice);

	/* Start Time (execution only)*/
#ifndef EXEC_MEM
	gettimeofday(&start, NULL);
#endif // EXEC_MEM

	/* Call the kernel */
	CUDAhash(d_keys, d_index, d_len, d_res);

	/* Start Time (execution only)*/
#ifndef EXEC_MEM
	cudaDeviceSynchronize();
	gettimeofday(&stop, NULL);
#endif // EXEC_MEM

	/* MemCpy Device -> Host */
	cudaMemcpy(hashedKeys, d_res, BATCH_SIZE * sizeof(uint32_t),
	    cudaMemcpyDeviceToHost);	
	
	/* Start Time (execution + memory) */
#ifdef EXEC_MEM
	gettimeofday(&stop, NULL);
#endif // EXEC_MEM

	
#ifdef DEBUG
	for(i = 0; i < BATCH_SIZE; i++) {
		printf("%s\n", &batchKeys[batchIndex[i]]);
		printf("%u\n", hashedKeys[i]);
	}
#endif // DEBUG

	/* Print Time */
	fd = fopen("log.txt", "a+");
	fprintf(fd, "%lu", ((stop.tv_sec * USECS) + stop.tv_usec ) -
	    ((start.tv_sec * USECS) + start.tv_usec));
	fprintf(fd, "\t%1.f\n", ((double)BATCH_SIZE / 
	    ((double)(((stop.tv_sec * USECS) + stop.tv_usec ) -
	    ((start.tv_sec * USECS) + start.tv_usec)) / 1000000 )) / 1000);
	fclose(fd);

#ifdef DEBUG
	printf("Time: %lu \n", ((stop.tv_sec * USECS) + stop.tv_usec ) -
	    ((start.tv_sec * USECS) + start.tv_usec));
#endif // DEBUG
	
        /* Free memory */
        cudaFree(batchKeys);
	cudaFree(nKeys);
	cudaFree(hashedKeys);
	cudaFree(batchIndex);
        cudaFree(d_keys);
        cudaFree(d_len);
        cudaFree(d_res);
        cudaFree(d_index);

	return 0;
}