static int compute_batches(struct xdf* xdf, int assign) { struct data_batch curr, *currb; unsigned int nbatch = 1, iarr, foff, dlen; const struct xdfch* ch; currb = assign ? xdf->batch : &curr; reset_batch(currb, 0, 0); for (iarr=0; iarr < xdf->narrays; iarr++) { foff = 0; // Scan channels in order to find different batches for (ch=xdf->channels; ch; ch=ch->next) { if (ch->iarray < 0) continue; dlen = xdf_get_datasize(ch->inmemtype); // Consistency checks if ((unsigned int)ch->iarray > xdf->narrays || ch->offset + dlen > xdf->array_stride[ch->iarray]) return -1; // Linearize the processing of channel sourcing // the same input array if ((iarr == (unsigned int)ch->iarray) && !add_to_batch(currb, ch, foff)) { nbatch++; if (assign) currb++; reset_batch(currb, iarr, foff); add_to_batch(currb, ch, foff); } foff += dlen; } } if (assign) link_batches(xdf, nbatch); return nbatch; }
int main() { int i; struct timeval start, stop; FILE *fd; char *key; cudaSetDevice(0); /* Allocate memory */ if ((key = (char *)malloc(40 * sizeof(char))) == NULL) { printf("Malloc failed!\n"); exit(EXIT_FAILURE); } cudaMallocHost((void **) &batchKeys, ((BATCH_SIZE + 1) * MAX_LEN_ALIGNED) * sizeof(char)); cudaMallocHost((void **) &nKeys, BATCH_SIZE * sizeof(size_t)); cudaMallocHost((void **) &batchIndex, (BATCH_SIZE + 1) * sizeof(int)); cudaMallocHost((void **) &hashedKeys, BATCH_SIZE * sizeof(uint32_t)); cudaMalloc((void **) &d_keys, ((BATCH_SIZE + 1) * MAX_LEN_ALIGNED) * sizeof(char)); cudaMalloc((void **) &d_len, BATCH_SIZE * sizeof(size_t)); cudaMalloc((void **) &d_index, (BATCH_SIZE + 1) * sizeof(int)); cudaMalloc((void **) &d_res, BATCH_SIZE * sizeof(uint32_t)); /* Create 'BATCH_SIZE' number of random keys * and add them to batch table */ batchNo = 0; batchIndex[0] = 0; for(i = 0; i < BATCH_SIZE; i++) { gen_random(key, 30); add_to_batch(key, 30); } /* Start Time (execution + memory) */ #ifdef EXEC_MEM gettimeofday(&start, NULL); #endif // EXEC_MEM /* MemCpy Host -> Device */ cudaMemcpy(d_keys, batchKeys, (batchIndex[BATCH_SIZE-1] + strlen(&batchKeys[batchIndex[BATCH_SIZE - 1]])) * sizeof(char), cudaMemcpyHostToDevice); cudaMemcpy(d_len, nKeys, BATCH_SIZE * sizeof(size_t), cudaMemcpyHostToDevice); cudaMemcpy(d_index, batchIndex, BATCH_SIZE * sizeof(int), cudaMemcpyHostToDevice); /* Start Time (execution only)*/ #ifndef EXEC_MEM gettimeofday(&start, NULL); #endif // EXEC_MEM /* Call the kernel */ CUDAhash(d_keys, d_index, d_len, d_res); /* Start Time (execution only)*/ #ifndef EXEC_MEM cudaDeviceSynchronize(); gettimeofday(&stop, NULL); #endif // EXEC_MEM /* MemCpy Device -> Host */ cudaMemcpy(hashedKeys, d_res, BATCH_SIZE * sizeof(uint32_t), cudaMemcpyDeviceToHost); /* Start Time (execution + memory) */ #ifdef EXEC_MEM gettimeofday(&stop, NULL); #endif // EXEC_MEM #ifdef DEBUG for(i = 0; i < BATCH_SIZE; i++) { printf("%s\n", &batchKeys[batchIndex[i]]); printf("%u\n", hashedKeys[i]); } #endif // DEBUG /* Print Time */ fd = fopen("log.txt", "a+"); fprintf(fd, "%lu", ((stop.tv_sec * USECS) + stop.tv_usec ) - ((start.tv_sec * USECS) + start.tv_usec)); fprintf(fd, "\t%1.f\n", ((double)BATCH_SIZE / ((double)(((stop.tv_sec * USECS) + stop.tv_usec ) - ((start.tv_sec * USECS) + start.tv_usec)) / 1000000 )) / 1000); fclose(fd); #ifdef DEBUG printf("Time: %lu \n", ((stop.tv_sec * USECS) + stop.tv_usec ) - ((start.tv_sec * USECS) + start.tv_usec)); #endif // DEBUG /* Free memory */ cudaFree(batchKeys); cudaFree(nKeys); cudaFree(hashedKeys); cudaFree(batchIndex); cudaFree(d_keys); cudaFree(d_len); cudaFree(d_res); cudaFree(d_index); return 0; }