コード例 #1
0
ファイル: apply.c プロジェクト: JSefara/foma
void apply_index(struct apply_handle *h, int inout, int densitycutoff, int mem_limit, int flags_only) {
    struct fsm_state *fsm;
    unsigned int cnt = 0;
    int i, j, maxtrans, numtrans, laststate, sym;
    fsm = h->gstates;

    struct apply_state_index **indexptr, *iptr, *tempiptr;

    struct pre_index {
	int state_no;
	struct pre_index *next;
    } *pre_index, *tp, *tpp;
    if (flags_only && !h->has_flags) {
	return;
    }
    /* get numtrans */
    for (i=0, laststate = 0, maxtrans = 0, numtrans = 0; (fsm+i)->state_no != -1; i++) {
	if ((fsm+i)->state_no != laststate) {
	    maxtrans = numtrans > maxtrans ? numtrans : maxtrans;
	    numtrans = 0;
	}
	if ((fsm+i)->target != -1) {
	    numtrans++;
	}
	laststate = (fsm+i)->state_no;
    }

    pre_index = xxcalloc(maxtrans+1, sizeof(struct pre_index));
    for (i = 0; i <= maxtrans; i++) {
	(pre_index+i)->state_no = -1;
    }

    /* We create an array of states, indexed by how many transitions they have */
    /* so that later, we can traverse them in order densest first, in case we  */
    /* only want to index to some predefined maximum memory usage.             */

    for (i = 0, laststate = 0, maxtrans = 0, numtrans = 0; (fsm+i)->state_no != -1; i++) {
	if ((fsm+i)->state_no != laststate) {
	    if ((pre_index+numtrans)->state_no == -1) {
		(pre_index+numtrans)->state_no = laststate;
	    } else {
		tp = xxcalloc(1, sizeof(struct pre_index));
		tp->state_no = laststate;
		tp->next = (pre_index+numtrans)->next;
		(pre_index+numtrans)->next = tp;
	    }
	    maxtrans = numtrans > maxtrans ? numtrans : maxtrans;
	    numtrans = 0;
	}
	if ((fsm+i)->target != -1) {
	    numtrans++;
	}
	laststate = (fsm+i)->state_no;
    }
    indexptr = NULL;
    cnt += round_up_to_power_of_two(h->last_net->statecount*sizeof(struct apply_state_index *));

    if (cnt > mem_limit) {
	cnt -= round_up_to_power_of_two(h->last_net->statecount*sizeof(struct apply_state_index *));
	goto memlimitnoindex;
    }

    indexptr = xxcalloc(h->last_net->statecount, sizeof(struct apply_state_index *));

    if (h->has_flags && flags_only) {
	/* Mark states that have flags */
	if (!(h->flagstates)) {
	    apply_mark_flagstates(h);
	}
    }

    for (i = maxtrans; i >= 0; i--) {
	for (tp = pre_index+i; tp != NULL; tp = tp->next) {
	    if (tp->state_no >= 0) {
		if (i < densitycutoff) {
		    if (!(h->has_flags && flags_only && BITTEST(h->flagstates, tp->state_no))) {
			continue;
		    }
		}
		cnt += round_up_to_power_of_two(h->sigma_size*sizeof(struct apply_state_index));
		if (cnt > mem_limit) {
		    cnt -= round_up_to_power_of_two(h->sigma_size*sizeof(struct apply_state_index));
		    goto memlimit;
		}
		*(indexptr + tp->state_no) = xxmalloc(h->sigma_size*sizeof(struct apply_state_index));

		/* We make the tail of all index linked lists point to the index  */
		/* for EPSILON, so that we automatically when EPSILON transitions */
		/* also when traversing an index.                                 */

		for (j = 0; j < h->sigma_size; j++) {
		    (*(indexptr + tp->state_no) + j)->fsmptr = -1;
		    if (j == EPSILON)
			(*(indexptr + tp->state_no) + j)->next = NULL;
		    else
			(*(indexptr + tp->state_no) + j)->next = (*(indexptr + tp->state_no)); /* all tails point to epsilon */		    
		}
	    }
	}
    }

 memlimit:

    for (i=0; (fsm+i)->state_no != -1; i++) {
	iptr = *(indexptr + (fsm+i)->state_no);
	if (iptr == NULL || (fsm+i)->target == -1) {
	    continue;
	}
	sym = inout == APPLY_INDEX_INPUT ? (fsm+i)->in : (fsm+i)->out;

	if (h->has_flags && (h->flag_lookup+sym)->type) {
	    sym = EPSILON;
	}
	if (sym == UNKNOWN) {  /* We make the index of UNKNOWN point to IDENTITY */
	    sym = IDENTITY;    /* since these are really the same symbol         */
	}
	if ((iptr+sym)->fsmptr == -1) {
	    (iptr+sym)->fsmptr = i;
	} else {
	    cnt += round_up_to_power_of_two(sizeof(struct apply_state_index));
	    tempiptr = xxcalloc(1, sizeof(struct apply_state_index));

	    tempiptr->next = (iptr+sym)->next;
	    tempiptr->fsmptr =  i;
	    (iptr+sym)->next = tempiptr;
	}
    }

    /* Free preindex */

 memlimitnoindex:

    for (i = maxtrans; i >= 0; i--) {
	for (tp = (pre_index+i)->next; tp != NULL; tp = tpp) {
	    tpp = tp->next;
	    xxfree(tp);
	}
    }
    xxfree(pre_index);

    if (inout == APPLY_INDEX_INPUT) {
	h->index_in = indexptr;
    } else {
	h->index_out = indexptr;
    }
}
コード例 #2
0
ファイル: main.c プロジェクト: XanClic/transalign-killer
int main(int argc, char *argv[])
{
    int ret = 0;


    if (argc < 2)
    {
        fprintf(stderr, "Usage: transalign_killer [--cldev=x.y] <input file>\n");
        fprintf(stderr, "  --cldev=x.y: x specifies the platform index, y the device index.\n");
        return 1;
    }


    long seq_length;
    char *sequence = load_text(argv[argc - 1], &seq_length);
    if (!sequence)
        return 1;

    seq_length--; // Cut final 0 byte

    // FIXME: All the following code relies on seq_length being a multiple of BASE.

    long round_seq_length = round_up_to_power_of_two(seq_length, BASE_EXP);

    long res_length = 0;
    for (long len = round_seq_length / BASE; len; len /= BASE)
        res_length += len;


    // Use some random index to be searched for here
    unsigned letter_index = seq_length / 2;


    // Select an OpenCL device
    cl_device_id dev = select_device(argc - 1, argv);
    if (!dev)
        return 1;

    // Initialize the OpenCL st...ack
    cl_context ctx = clCreateContext(NULL, 1, &dev, NULL, NULL, NULL);
    cl_command_queue queue = clCreateCommandQueue(ctx, dev, 0, NULL);

    // Load the OpenCL kernesl
    char *prog_src = load_text("trans.cl", NULL);
    if (!prog_src)
        return 1;
    cl_program prog = clCreateProgramWithSource(ctx, 1, (const char **)&prog_src, NULL, NULL);
    free(prog_src);

    // Build them
    clBuildProgram(prog, 0, NULL, NULL, NULL, NULL);
    cl_kernel k_iadd = clCreateKernel(prog, "k_iadd", NULL); // initial addition
    cl_kernel k_cadd = clCreateKernel(prog, "k_cadd", NULL); // consecutive addition
    assert(k_iadd);
    assert(k_cadd);


    // Create the result buffer
    unsigned *result = malloc(res_length * sizeof(unsigned));
    cl_mem result_gpu = clCreateBuffer(ctx, CL_MEM_READ_WRITE | HOST_PTR_POLICY, res_length * sizeof(unsigned), result, NULL);


    clock_start();

    /*** START OF ROCKET SCIENCE LEVEL RUNTIME-TIME INTENSIVE STUFF ***/

    // Bandwidth intensive stuff goes here

    // Copy the sequence to the video memory (or, generally speaking, the OpenCL device)
    cl_mem seq_gpu = clCreateBuffer(ctx, CL_MEM_READ_WRITE | HOST_PTR_POLICY, seq_length * sizeof(char), sequence, NULL);

    long bw1_time = clock_delta();


    // GPU intensive stuff goes here

    /**
     * First, transform every - and \0 into a 0 and every other character into a
     * 1. Then, add consecutive fields (BASE fields) together and store them at
     * the beginning of the result buffer.
     */
    clSetKernelArg(k_iadd, 0, sizeof(result_gpu), &result_gpu);
    clSetKernelArg(k_iadd, 1, sizeof(seq_gpu), &seq_gpu);
    clSetKernelArg(k_iadd, 2, sizeof(unsigned), &(unsigned){seq_length});