Beispiel #1
0
unsigned int* rand_crc(unsigned int num_pages,unsigned int page_size)
{
	unsigned int i,j,num_words;
	unsigned int* page;

	num_words = page_size / 4;
	page = int_new_array(num_pages*num_words,"crc_formats.read_crc() - Heap Overflow! Cannot allocate space for page");
	for(j=0; j<num_pages; j++)
	{
		for(i=0; i<num_words; i++) {
			page[j*num_words+i] = common_rand();
        }
	}
	return page;
}
Beispiel #2
0
void runTest( int argc, char** argv) {

    int penalty,idx, index;
    int *input_itemsets, *output_itemsets, *reference;
    int size;
    double t1, t2;
    int i,j,k,l,c;
    int nw, n, w, traceback;
    int new_nw, new_w, new_n;

    int *input_seq_1, *input_seq_2, *aligned_seq_1, *aligned_seq_2;
    int input_seq_1_size = 2;
    int input_seq_2_size = 2;
    int aligned_index_1 = 0;
    int aligned_index_2 = 0;
    int aligned_seq_size = 0;
    int input_index_1 = 0;
    int input_index_2 = 0;
    int nb_possible_seq_items = 10;
    int w_limit = 0;
    int n_limit = 0;
    int print_results = 0;
    int print_intermediary_results = 0;
    int use_parallelizable_version = 1;

    int expected_aligned_seq_1_size = strlen(expected_aligned_seq_1_chars);
    int expected_aligned_seq_2_size = strlen(expected_aligned_seq_2_chars);
    int* expected_aligned_seq_1 = to_int_values(expected_aligned_seq_1_chars);
    int* expected_aligned_seq_2 = to_int_values(expected_aligned_seq_2_chars);

    cl_mem matrix_d, reference_d;
  	cl_int errcode;
    penalty = 1;

    while((c = getopt(argc, argv, "n:g:p:vsih")) != -1) {
        switch(c) {
        case 'n':
            // The rest of the implementation requires max_rows and max_cols to be equal
            // Size of the first sequence to be generated
            input_seq_1_size = atoi(optarg);
            // Size of the second sequence to be generated
            input_seq_2_size = atoi(optarg);
            break;
        case 'g':
            // Penalty cost for introducing a gap instead of matching to another
            // item
            penalty = atoi(optarg);
            break;
        case 'p':
            // Number of different items to generate
            nb_possible_seq_items = atoi(optarg);
            if (nb_possible_seq_items < 1 || nb_possible_seq_items > 24) {
                fprintf(stderr, "The number of different items to generate should be between 1 and 24.\n");
            }
            break;
        case 'v':
            // Verbose?
            print_results = 1;
            break;
        case 'i':
            print_intermediary_results = 1;
            print_results = 1;
            break;
        case 's':
            // Sequential version?
            use_parallelizable_version = 0;
            break;
        case 'h':
            // Help
            usage(argc, argv);
            break;
        default:
            usage(argc,argv);
        }
    }

    // Increase size by one to reserve space for the dynamic programming
    // base cases, where only gaps are used
    max_rows = input_seq_1_size + 1;
    max_cols = input_seq_2_size + 1;

    // To precompute substition costs for every pair of items.
    // Data is aligned with corresponding values in input_itemsets
    reference = (int *)malloc( max_rows * max_cols * sizeof(int) );

    // To store the dynamic programming results
    input_itemsets = (int *)malloc( max_rows * max_cols * sizeof(int) );

    // To store the first and second sequences to be matched. Start at 1 to
    // align the data with input_itemsets
    input_seq_1 = (int *)malloc(max_rows * sizeof(int));
    input_seq_2 = (int *)malloc(max_cols * sizeof(int));

    // To store the aligned sequences after matching.The aligned sequences use up
    // to the sum of items of both individual sequence, with the worst
    // case being when gaps are introduced for every item.
    aligned_seq_size = input_seq_1_size + input_seq_2_size;
    aligned_seq_1 = (int *)malloc(aligned_seq_size * sizeof(int));
    aligned_seq_2 = (int *)malloc(aligned_seq_size * sizeof(int));

    if (!input_itemsets || !input_seq_1 || !input_seq_2 ||
        !aligned_seq_1  || !aligned_seq_2) {
        fprintf(stderr, "ERROR: can not allocate memory");
        exit(1);
    }

    // Initialize memory to zero
    for (i=0; i<max_rows; i++){
        for (j=0; j<max_cols; j++){
            input_itemsets[input_index(i,j)] = 0;
        }
    }

    // Initialize the aligned data to be all gaps
    for (i=0; i<aligned_seq_size; ++i) {
        aligned_seq_1[i] = -1;
        aligned_seq_2[i] = -1;
    }

    // Generate two random sequences to align.
    for(i=1; i<max_rows; i++){
        input_seq_1[i] = abs(common_rand()) % nb_possible_seq_items;
    }
    for(j=1; j<max_cols; j++){
        input_seq_2[j] = abs(common_rand()) % nb_possible_seq_items;
    }

    if (print_results) fprintf(stderr, "Computing dynamic programming results\n");
    t1 = gettime();
    // Precompute substitution costs for every pair of sequence item.  Start
    // storing substitution costs at (1,1) to align the reference table values
    // with the corresponding dynamic programming results
    for (i = 1 ; i < max_rows; i++){
        for (j = 1 ; j < max_cols; j++){
            reference[input_index(i,j)] = blosum62[input_seq_1[i]][input_seq_2[j]];
        }
    }

    // Set cost for dynamic programming base cases, when only gaps are used.
    // (0,0) has a cost of 0 (no gap),
    // all others incur a cost of 'penalty' for each skipped item.
    for(i = 1; i< max_rows ; i++)
        input_itemsets[input_index(i,0)] = -i * penalty;
    for(j = 1; j< max_cols ; j++)
        input_itemsets[input_index(0,j)] = -j * penalty;


    cl_program clProgram;
    cl_kernel clKernel_nw1;
    cl_kernel clKernel_nw2;

    FILE *kernelFile;
    char *kernelSource;
    size_t kernelLength;

    kernelFile = fopen("needle_kernel.cl", "r");
    fseek(kernelFile, 0, SEEK_END);
    kernelLength = (size_t) ftell(kernelFile);
    kernelSource = (char *) malloc(sizeof(char)*kernelLength);
    rewind(kernelFile);
    fread((void *) kernelSource, kernelLength, 1, kernelFile);
    fclose(kernelFile);

    clProgram = clCreateProgramWithSource(context, 1, (const char **) &kernelSource, &kernelLength, &errcode);
    CHKERR(errcode, "Failed to create program with source!");

    free(kernelSource);

    errcode = clBuildProgram(clProgram, 1, &device_id, NULL, NULL, NULL);
    if (errcode == CL_BUILD_PROGRAM_FAILURE)
    {
      char *log;
      size_t logLength;
      errcode = clGetProgramBuildInfo(clProgram, device_id, CL_PROGRAM_BUILD_LOG, 0, NULL, &logLength);
      log = (char *) malloc(sizeof(char)*logLength);
      errcode = clGetProgramBuildInfo(clProgram, device_id, CL_PROGRAM_BUILD_LOG, logLength, (void *) log, NULL);
      fprintf(stderr, "Kernel build error! Log:\n%s", log);
      free(log);
      return;
    }
    CHKERR(errcode, "Failed to get program build info!");

    clKernel_nw1 = clCreateKernel(clProgram, "needle_opencl_shared_1", &errcode);
    CHKERR(errcode, "Failed to create kernel!");
    clKernel_nw2 = clCreateKernel(clProgram, "needle_opencl_shared_2", &errcode);
    CHKERR(errcode, "Failed to create kernel!");

    size = max_cols * max_rows;
    reference_d = clCreateBuffer(context, CL_MEM_READ_ONLY, sizeof(int)*size, NULL, &errcode);
    CHKERR(errcode, "Failed to create buffer!");
    matrix_d = clCreateBuffer(context, CL_MEM_READ_WRITE, sizeof(int)*size, NULL, &errcode);
    CHKERR(errcode, "Failed to create buffer!");

    errcode = clEnqueueWriteBuffer(commands, reference_d, CL_TRUE, 0, sizeof(int)*size, (void *) reference, 0, NULL, &ocdTempEvent);
    clFinish(commands);
    CHKERR(errcode, "Failed to enqueue write buffer!");

    errcode = clEnqueueWriteBuffer(commands, matrix_d, CL_TRUE, 0, sizeof(int)*size, (void *) input_itemsets, 0, NULL, &ocdTempEvent);
    clFinish(commands);
    CHKERR(errcode, "Failed to enqueue write buffer!");

    size_t localWorkSize[2] = {BLOCK_SIZE, 1}; //BLOCK_SIZE work items per work-group in 1D only.
    size_t globalWorkSize[2];
    int block_width = ( max_cols - 1 )/BLOCK_SIZE;

    //process top-left matrix
    //Does what the 1st kernel loop does in a higher (block) level. i.e., takes care of blocks of BLOCK_SIZExBLOCK_SIZE in a wave-front pattern upwards
    //the main anti-diagonal (on block-level).
    //Each iteration takes care of 1, 2, 3, ... blocks that can be computed in parallel w/o dependencies
    //E.g. first block [0][0], then blocks [0][1] and [1][0], then [0][2], [1][1], [2][0], etc.
    for(i = 1 ; i <= block_width ; i++){
      globalWorkSize[0] = i*localWorkSize[0]; //i.e., for 1st iteration BLOCK_SIZE total (=1 W.G.), for 2nd iteration 2*BLOCK_SIZE total work items
      // (=2 W.G.)
      globalWorkSize[1] = localWorkSize[1];
      errcode = clSetKernelArg(clKernel_nw1, 0, sizeof(cl_mem), (void *) &reference_d);
      errcode |= clSetKernelArg(clKernel_nw1, 1, sizeof(cl_mem), (void *) &matrix_d);
      errcode |= clSetKernelArg(clKernel_nw1, 2, sizeof(int), (void *) &max_cols);
      errcode |= clSetKernelArg(clKernel_nw1, 3, sizeof(int), (void *) &penalty);
      errcode |= clSetKernelArg(clKernel_nw1, 4, sizeof(int), (void *) &i);
      errcode |= clSetKernelArg(clKernel_nw1, 5, sizeof(int), (void *) &block_width);
      CHKERR(errcode, "Failed to set kernel arguments!");
      errcode = clEnqueueNDRangeKernel(commands, clKernel_nw1, 2, NULL, globalWorkSize, localWorkSize, 0, NULL, &ocdTempEvent);
      clFinish(commands);
      CHKERR(errcode, "Failed to enqueue kernel!");
    }

    //process bottom-right matrix
    //Does what the 2nd kernel loop does in a higher (block) level. i.e., takes care of blocks of BLOCK_SIZExBLOCK_SIZE in a wave-front pattern downwards
    //the main anti-diagonal.
    //Each iteration takes care of ..., 3, 2, 1 blocks that can be computed in parallel w/o dependencies
    for(i = block_width - 1  ; i >= 1 ; i--){
      globalWorkSize[0] = i*localWorkSize[0];
      globalWorkSize[1] = localWorkSize[1];
      errcode = clSetKernelArg(clKernel_nw2, 0, sizeof(cl_mem), (void *) &reference_d);
      errcode |= clSetKernelArg(clKernel_nw2, 1, sizeof(cl_mem), (void *) &matrix_d);
      errcode |= clSetKernelArg(clKernel_nw2, 2, sizeof(int), (void *) &max_cols);
      errcode |= clSetKernelArg(clKernel_nw2, 3, sizeof(int), (void *) &penalty);
      errcode |= clSetKernelArg(clKernel_nw2, 4, sizeof(int), (void *) &i);
      errcode |= clSetKernelArg(clKernel_nw2, 5, sizeof(int), (void *) &block_width);
      CHKERR(errcode, "Failed to set kernel arguments!");
      errcode = clEnqueueNDRangeKernel(commands, clKernel_nw2, 2, NULL, globalWorkSize, localWorkSize, 0, NULL, &ocdTempEvent);
      clFinish(commands);
      CHKERR(errcode, "Failed to enqueue kernel!");
    }

    errcode = clEnqueueReadBuffer(commands, matrix_d, CL_TRUE, 0, sizeof(float)*size, (void *) input_itemsets, 0, NULL, &ocdTempEvent);
    clFinish(commands);
    CHKERR(errcode, "Failed to enqueue read buffer!");

    clReleaseMemObject(reference_d);
    clReleaseMemObject(matrix_d);
    clReleaseKernel(clKernel_nw1);
    clReleaseKernel(clKernel_nw2);
    clReleaseProgram(clProgram);
    clReleaseCommandQueue(commands);
    clReleaseContext(context);

    t2 = gettime();

    // Reconstruct the aligned sequences starting from the last items of each
    // sequence.
    aligned_index_1 = aligned_seq_size - 1;
    aligned_index_2 = aligned_seq_size - 1;

    if (print_results) fprintf(stderr, "Trace solution back\n");
    // Start tracing through the results from the last computed value, when all
    // items have been exhausted for both sequences (in the right bottom corner),
    // up to the beginning of both sequences (on the top left corner).
    for (i = max_rows - 1,  j = max_cols - 1; !(i==0 && j==0);){
        // Recompute which of the previous values, relative to the current position, led
        // to our current maximum value
        if ( i > 0 && j > 0 ){
            nw = input_itemsets[input_index(i-1,j-1)] + reference[input_index(i,j)];
            w  = input_itemsets[input_index(i,j-1)] - penalty;
            n  = input_itemsets[input_index(i-1,j)] - penalty;
            n_limit = 0;
            w_limit = 0;
            traceback = maximum(nw, w, n);
        } else if ( i == 0 ){
            n_limit = 1;
            w_limit = 0;
        } else if ( j == 0 ){
            n_limit = 0;
            w_limit = 1;
        } else{ fprintf(stderr, "ERROR\n"); exit(1); }

        if(n_limit == 0 && w_limit == 0 && traceback == nw) {
            // Add the matching items to each of the aligned sequences
            // and move iterators to the previous items
            aligned_seq_1[aligned_index_1--] = input_seq_1[i--];
            aligned_seq_2[aligned_index_2--] = input_seq_2[j--];
        }
        else if(n_limit == 1 || traceback == w) {
            // Introduce a gap in the first aligned sequence,
            // add the corresponding item in the second sequence,
            // and move the second iterator
            aligned_index_1--;
            aligned_seq_2[aligned_index_2--] = input_seq_2[j--];
        }
        else if(w_limit == 1 || traceback == n) {
            // Introduce a gap in the second aligned sequence,
            // add the corresponding item in the first sequence,
            // and move the first iterator
            aligned_index_2--;
            aligned_seq_1[aligned_index_1--] = input_seq_1[i--];
        } else { fprintf(stderr, "ERROR\n"); exit(1); }
    }

    if (print_results) {
        // Print the input sequences and the resulting aligned sequences.
        // Convert the integer values for items to characters for legibility.
        fprintf(stderr, "Input Seq 1  :");
        for (i=1; i < max_rows; ++i) {
            fprintf(stderr, "%c", to_char(input_seq_1[i]));
        }
        fprintf(stderr, "\n");

        fprintf(stderr, "Input Seq 2  :");
        for (j=1; j < max_cols; ++j) {
            fprintf(stderr, "%c", to_char(input_seq_2[j]));
        }
        fprintf(stderr, "\n");

        fprintf(stderr, "Aligned Seq 1:");
        for (i=0; i < aligned_seq_size; ++i) {
            fprintf(stderr, "%c", to_char(aligned_seq_1[i]));
        }
        fprintf(stderr, "\n");
        fprintf(stderr, "Aligned Seq 2:");
        for (j=0; j < aligned_seq_size; ++j) {
            fprintf(stderr, "%c", to_char(aligned_seq_2[j]));
        }
        fprintf(stderr, "\n");

        if (print_intermediary_results) {
            for (i=0; i<max_rows; ++i) {
                for (j=0; j<max_cols; ++j) {
                    fprintf(stderr, "%c%.2d ", input_itemsets[input_index(i,j)] >= 0 ? '+' : '-', abs(input_itemsets[input_index(i,j)]));
                }
                fprintf(stderr, "\n");
            }
        }
    }

    if (input_seq_1_size == 4096 && input_seq_2_size == 4096 && penalty == 1 && nb_possible_seq_items == 10) {
        if (!seq_equal(aligned_seq_1, expected_aligned_seq_1, aligned_seq_size, expected_aligned_seq_1_size)) {
            fprintf(stderr, "ERROR: the aligned sequence 1 is different from the values expected.\n");
            exit(1);
        }
        if (!seq_equal(aligned_seq_2, expected_aligned_seq_2, aligned_seq_size, expected_aligned_seq_2_size)) {
            fprintf(stderr, "ERROR: the aligned sequence 2 is different from the values expected.\n");
            exit(1);
        }
    } else {
        fprintf(stderr,
            "WARNING: No self-checking for dimension '%d', penalty '%d', and number of possible items '%d'\n",
            input_seq_1_size,
            penalty,
            nb_possible_seq_items
            );
    }

    free(reference);
    free(input_itemsets);
    free(input_seq_1);
    free(input_seq_2);
    free(aligned_seq_1);
    free(aligned_seq_2);
    free(expected_aligned_seq_1);
    free(expected_aligned_seq_2);

    printf("{ \"status\": %d, \"options\": \"-n %d -g %d\", \"time\": %f }\n", 1, input_seq_1_size, penalty, t2-t1);
}
Beispiel #3
0
//unsigned long gen_rand(const long LB, const long HB) {
int gen_rand(const int LB, const int HB) {
    int range = HB - LB + 1;
    check((HB >= 0 && LB >= 0 && range > 0),"sparse_formats.gen_rand() - Invalid Bound(s). Exiting...");
    return (common_rand() % range) + LB;
}