unsigned int* rand_crc(unsigned int num_pages,unsigned int page_size) { unsigned int i,j,num_words; unsigned int* page; num_words = page_size / 4; page = int_new_array(num_pages*num_words,"crc_formats.read_crc() - Heap Overflow! Cannot allocate space for page"); for(j=0; j<num_pages; j++) { for(i=0; i<num_words; i++) { page[j*num_words+i] = common_rand(); } } return page; }
void runTest( int argc, char** argv) { int penalty,idx, index; int *input_itemsets, *output_itemsets, *reference; int size; double t1, t2; int i,j,k,l,c; int nw, n, w, traceback; int new_nw, new_w, new_n; int *input_seq_1, *input_seq_2, *aligned_seq_1, *aligned_seq_2; int input_seq_1_size = 2; int input_seq_2_size = 2; int aligned_index_1 = 0; int aligned_index_2 = 0; int aligned_seq_size = 0; int input_index_1 = 0; int input_index_2 = 0; int nb_possible_seq_items = 10; int w_limit = 0; int n_limit = 0; int print_results = 0; int print_intermediary_results = 0; int use_parallelizable_version = 1; int expected_aligned_seq_1_size = strlen(expected_aligned_seq_1_chars); int expected_aligned_seq_2_size = strlen(expected_aligned_seq_2_chars); int* expected_aligned_seq_1 = to_int_values(expected_aligned_seq_1_chars); int* expected_aligned_seq_2 = to_int_values(expected_aligned_seq_2_chars); cl_mem matrix_d, reference_d; cl_int errcode; penalty = 1; while((c = getopt(argc, argv, "n:g:p:vsih")) != -1) { switch(c) { case 'n': // The rest of the implementation requires max_rows and max_cols to be equal // Size of the first sequence to be generated input_seq_1_size = atoi(optarg); // Size of the second sequence to be generated input_seq_2_size = atoi(optarg); break; case 'g': // Penalty cost for introducing a gap instead of matching to another // item penalty = atoi(optarg); break; case 'p': // Number of different items to generate nb_possible_seq_items = atoi(optarg); if (nb_possible_seq_items < 1 || nb_possible_seq_items > 24) { fprintf(stderr, "The number of different items to generate should be between 1 and 24.\n"); } break; case 'v': // Verbose? print_results = 1; break; case 'i': print_intermediary_results = 1; print_results = 1; break; case 's': // Sequential version? use_parallelizable_version = 0; break; case 'h': // Help usage(argc, argv); break; default: usage(argc,argv); } } // Increase size by one to reserve space for the dynamic programming // base cases, where only gaps are used max_rows = input_seq_1_size + 1; max_cols = input_seq_2_size + 1; // To precompute substition costs for every pair of items. // Data is aligned with corresponding values in input_itemsets reference = (int *)malloc( max_rows * max_cols * sizeof(int) ); // To store the dynamic programming results input_itemsets = (int *)malloc( max_rows * max_cols * sizeof(int) ); // To store the first and second sequences to be matched. Start at 1 to // align the data with input_itemsets input_seq_1 = (int *)malloc(max_rows * sizeof(int)); input_seq_2 = (int *)malloc(max_cols * sizeof(int)); // To store the aligned sequences after matching.The aligned sequences use up // to the sum of items of both individual sequence, with the worst // case being when gaps are introduced for every item. aligned_seq_size = input_seq_1_size + input_seq_2_size; aligned_seq_1 = (int *)malloc(aligned_seq_size * sizeof(int)); aligned_seq_2 = (int *)malloc(aligned_seq_size * sizeof(int)); if (!input_itemsets || !input_seq_1 || !input_seq_2 || !aligned_seq_1 || !aligned_seq_2) { fprintf(stderr, "ERROR: can not allocate memory"); exit(1); } // Initialize memory to zero for (i=0; i<max_rows; i++){ for (j=0; j<max_cols; j++){ input_itemsets[input_index(i,j)] = 0; } } // Initialize the aligned data to be all gaps for (i=0; i<aligned_seq_size; ++i) { aligned_seq_1[i] = -1; aligned_seq_2[i] = -1; } // Generate two random sequences to align. for(i=1; i<max_rows; i++){ input_seq_1[i] = abs(common_rand()) % nb_possible_seq_items; } for(j=1; j<max_cols; j++){ input_seq_2[j] = abs(common_rand()) % nb_possible_seq_items; } if (print_results) fprintf(stderr, "Computing dynamic programming results\n"); t1 = gettime(); // Precompute substitution costs for every pair of sequence item. Start // storing substitution costs at (1,1) to align the reference table values // with the corresponding dynamic programming results for (i = 1 ; i < max_rows; i++){ for (j = 1 ; j < max_cols; j++){ reference[input_index(i,j)] = blosum62[input_seq_1[i]][input_seq_2[j]]; } } // Set cost for dynamic programming base cases, when only gaps are used. // (0,0) has a cost of 0 (no gap), // all others incur a cost of 'penalty' for each skipped item. for(i = 1; i< max_rows ; i++) input_itemsets[input_index(i,0)] = -i * penalty; for(j = 1; j< max_cols ; j++) input_itemsets[input_index(0,j)] = -j * penalty; cl_program clProgram; cl_kernel clKernel_nw1; cl_kernel clKernel_nw2; FILE *kernelFile; char *kernelSource; size_t kernelLength; kernelFile = fopen("needle_kernel.cl", "r"); fseek(kernelFile, 0, SEEK_END); kernelLength = (size_t) ftell(kernelFile); kernelSource = (char *) malloc(sizeof(char)*kernelLength); rewind(kernelFile); fread((void *) kernelSource, kernelLength, 1, kernelFile); fclose(kernelFile); clProgram = clCreateProgramWithSource(context, 1, (const char **) &kernelSource, &kernelLength, &errcode); CHKERR(errcode, "Failed to create program with source!"); free(kernelSource); errcode = clBuildProgram(clProgram, 1, &device_id, NULL, NULL, NULL); if (errcode == CL_BUILD_PROGRAM_FAILURE) { char *log; size_t logLength; errcode = clGetProgramBuildInfo(clProgram, device_id, CL_PROGRAM_BUILD_LOG, 0, NULL, &logLength); log = (char *) malloc(sizeof(char)*logLength); errcode = clGetProgramBuildInfo(clProgram, device_id, CL_PROGRAM_BUILD_LOG, logLength, (void *) log, NULL); fprintf(stderr, "Kernel build error! Log:\n%s", log); free(log); return; } CHKERR(errcode, "Failed to get program build info!"); clKernel_nw1 = clCreateKernel(clProgram, "needle_opencl_shared_1", &errcode); CHKERR(errcode, "Failed to create kernel!"); clKernel_nw2 = clCreateKernel(clProgram, "needle_opencl_shared_2", &errcode); CHKERR(errcode, "Failed to create kernel!"); size = max_cols * max_rows; reference_d = clCreateBuffer(context, CL_MEM_READ_ONLY, sizeof(int)*size, NULL, &errcode); CHKERR(errcode, "Failed to create buffer!"); matrix_d = clCreateBuffer(context, CL_MEM_READ_WRITE, sizeof(int)*size, NULL, &errcode); CHKERR(errcode, "Failed to create buffer!"); errcode = clEnqueueWriteBuffer(commands, reference_d, CL_TRUE, 0, sizeof(int)*size, (void *) reference, 0, NULL, &ocdTempEvent); clFinish(commands); CHKERR(errcode, "Failed to enqueue write buffer!"); errcode = clEnqueueWriteBuffer(commands, matrix_d, CL_TRUE, 0, sizeof(int)*size, (void *) input_itemsets, 0, NULL, &ocdTempEvent); clFinish(commands); CHKERR(errcode, "Failed to enqueue write buffer!"); size_t localWorkSize[2] = {BLOCK_SIZE, 1}; //BLOCK_SIZE work items per work-group in 1D only. size_t globalWorkSize[2]; int block_width = ( max_cols - 1 )/BLOCK_SIZE; //process top-left matrix //Does what the 1st kernel loop does in a higher (block) level. i.e., takes care of blocks of BLOCK_SIZExBLOCK_SIZE in a wave-front pattern upwards //the main anti-diagonal (on block-level). //Each iteration takes care of 1, 2, 3, ... blocks that can be computed in parallel w/o dependencies //E.g. first block [0][0], then blocks [0][1] and [1][0], then [0][2], [1][1], [2][0], etc. for(i = 1 ; i <= block_width ; i++){ globalWorkSize[0] = i*localWorkSize[0]; //i.e., for 1st iteration BLOCK_SIZE total (=1 W.G.), for 2nd iteration 2*BLOCK_SIZE total work items // (=2 W.G.) globalWorkSize[1] = localWorkSize[1]; errcode = clSetKernelArg(clKernel_nw1, 0, sizeof(cl_mem), (void *) &reference_d); errcode |= clSetKernelArg(clKernel_nw1, 1, sizeof(cl_mem), (void *) &matrix_d); errcode |= clSetKernelArg(clKernel_nw1, 2, sizeof(int), (void *) &max_cols); errcode |= clSetKernelArg(clKernel_nw1, 3, sizeof(int), (void *) &penalty); errcode |= clSetKernelArg(clKernel_nw1, 4, sizeof(int), (void *) &i); errcode |= clSetKernelArg(clKernel_nw1, 5, sizeof(int), (void *) &block_width); CHKERR(errcode, "Failed to set kernel arguments!"); errcode = clEnqueueNDRangeKernel(commands, clKernel_nw1, 2, NULL, globalWorkSize, localWorkSize, 0, NULL, &ocdTempEvent); clFinish(commands); CHKERR(errcode, "Failed to enqueue kernel!"); } //process bottom-right matrix //Does what the 2nd kernel loop does in a higher (block) level. i.e., takes care of blocks of BLOCK_SIZExBLOCK_SIZE in a wave-front pattern downwards //the main anti-diagonal. //Each iteration takes care of ..., 3, 2, 1 blocks that can be computed in parallel w/o dependencies for(i = block_width - 1 ; i >= 1 ; i--){ globalWorkSize[0] = i*localWorkSize[0]; globalWorkSize[1] = localWorkSize[1]; errcode = clSetKernelArg(clKernel_nw2, 0, sizeof(cl_mem), (void *) &reference_d); errcode |= clSetKernelArg(clKernel_nw2, 1, sizeof(cl_mem), (void *) &matrix_d); errcode |= clSetKernelArg(clKernel_nw2, 2, sizeof(int), (void *) &max_cols); errcode |= clSetKernelArg(clKernel_nw2, 3, sizeof(int), (void *) &penalty); errcode |= clSetKernelArg(clKernel_nw2, 4, sizeof(int), (void *) &i); errcode |= clSetKernelArg(clKernel_nw2, 5, sizeof(int), (void *) &block_width); CHKERR(errcode, "Failed to set kernel arguments!"); errcode = clEnqueueNDRangeKernel(commands, clKernel_nw2, 2, NULL, globalWorkSize, localWorkSize, 0, NULL, &ocdTempEvent); clFinish(commands); CHKERR(errcode, "Failed to enqueue kernel!"); } errcode = clEnqueueReadBuffer(commands, matrix_d, CL_TRUE, 0, sizeof(float)*size, (void *) input_itemsets, 0, NULL, &ocdTempEvent); clFinish(commands); CHKERR(errcode, "Failed to enqueue read buffer!"); clReleaseMemObject(reference_d); clReleaseMemObject(matrix_d); clReleaseKernel(clKernel_nw1); clReleaseKernel(clKernel_nw2); clReleaseProgram(clProgram); clReleaseCommandQueue(commands); clReleaseContext(context); t2 = gettime(); // Reconstruct the aligned sequences starting from the last items of each // sequence. aligned_index_1 = aligned_seq_size - 1; aligned_index_2 = aligned_seq_size - 1; if (print_results) fprintf(stderr, "Trace solution back\n"); // Start tracing through the results from the last computed value, when all // items have been exhausted for both sequences (in the right bottom corner), // up to the beginning of both sequences (on the top left corner). for (i = max_rows - 1, j = max_cols - 1; !(i==0 && j==0);){ // Recompute which of the previous values, relative to the current position, led // to our current maximum value if ( i > 0 && j > 0 ){ nw = input_itemsets[input_index(i-1,j-1)] + reference[input_index(i,j)]; w = input_itemsets[input_index(i,j-1)] - penalty; n = input_itemsets[input_index(i-1,j)] - penalty; n_limit = 0; w_limit = 0; traceback = maximum(nw, w, n); } else if ( i == 0 ){ n_limit = 1; w_limit = 0; } else if ( j == 0 ){ n_limit = 0; w_limit = 1; } else{ fprintf(stderr, "ERROR\n"); exit(1); } if(n_limit == 0 && w_limit == 0 && traceback == nw) { // Add the matching items to each of the aligned sequences // and move iterators to the previous items aligned_seq_1[aligned_index_1--] = input_seq_1[i--]; aligned_seq_2[aligned_index_2--] = input_seq_2[j--]; } else if(n_limit == 1 || traceback == w) { // Introduce a gap in the first aligned sequence, // add the corresponding item in the second sequence, // and move the second iterator aligned_index_1--; aligned_seq_2[aligned_index_2--] = input_seq_2[j--]; } else if(w_limit == 1 || traceback == n) { // Introduce a gap in the second aligned sequence, // add the corresponding item in the first sequence, // and move the first iterator aligned_index_2--; aligned_seq_1[aligned_index_1--] = input_seq_1[i--]; } else { fprintf(stderr, "ERROR\n"); exit(1); } } if (print_results) { // Print the input sequences and the resulting aligned sequences. // Convert the integer values for items to characters for legibility. fprintf(stderr, "Input Seq 1 :"); for (i=1; i < max_rows; ++i) { fprintf(stderr, "%c", to_char(input_seq_1[i])); } fprintf(stderr, "\n"); fprintf(stderr, "Input Seq 2 :"); for (j=1; j < max_cols; ++j) { fprintf(stderr, "%c", to_char(input_seq_2[j])); } fprintf(stderr, "\n"); fprintf(stderr, "Aligned Seq 1:"); for (i=0; i < aligned_seq_size; ++i) { fprintf(stderr, "%c", to_char(aligned_seq_1[i])); } fprintf(stderr, "\n"); fprintf(stderr, "Aligned Seq 2:"); for (j=0; j < aligned_seq_size; ++j) { fprintf(stderr, "%c", to_char(aligned_seq_2[j])); } fprintf(stderr, "\n"); if (print_intermediary_results) { for (i=0; i<max_rows; ++i) { for (j=0; j<max_cols; ++j) { fprintf(stderr, "%c%.2d ", input_itemsets[input_index(i,j)] >= 0 ? '+' : '-', abs(input_itemsets[input_index(i,j)])); } fprintf(stderr, "\n"); } } } if (input_seq_1_size == 4096 && input_seq_2_size == 4096 && penalty == 1 && nb_possible_seq_items == 10) { if (!seq_equal(aligned_seq_1, expected_aligned_seq_1, aligned_seq_size, expected_aligned_seq_1_size)) { fprintf(stderr, "ERROR: the aligned sequence 1 is different from the values expected.\n"); exit(1); } if (!seq_equal(aligned_seq_2, expected_aligned_seq_2, aligned_seq_size, expected_aligned_seq_2_size)) { fprintf(stderr, "ERROR: the aligned sequence 2 is different from the values expected.\n"); exit(1); } } else { fprintf(stderr, "WARNING: No self-checking for dimension '%d', penalty '%d', and number of possible items '%d'\n", input_seq_1_size, penalty, nb_possible_seq_items ); } free(reference); free(input_itemsets); free(input_seq_1); free(input_seq_2); free(aligned_seq_1); free(aligned_seq_2); free(expected_aligned_seq_1); free(expected_aligned_seq_2); printf("{ \"status\": %d, \"options\": \"-n %d -g %d\", \"time\": %f }\n", 1, input_seq_1_size, penalty, t2-t1); }
//unsigned long gen_rand(const long LB, const long HB) { int gen_rand(const int LB, const int HB) { int range = HB - LB + 1; check((HB >= 0 && LB >= 0 && range > 0),"sparse_formats.gen_rand() - Invalid Bound(s). Exiting..."); return (common_rand() % range) + LB; }