コード例 #1
0
widget* spawn_ocl(char* filename)
{
	widget *w = widget_window_new(100, 100, filename);
	w->draw = widget_window_ocl_draw;
	w->release = widget_window_ocl_release;
	w->onclick = widget_window_ocl_onclick;
	w->free = widget_window_ocl_free;
	OCLPROGRAM *p = ocl_build(filename);
	w->data2 = p;
	p->window = w;
	widget_add(w);
	return w;
}
コード例 #2
0
ファイル: cofact.c プロジェクト: pstach/gls
void cofact_init(gls_config_t cfg)
{
	unsigned int i, j, lpb, n;

	cofact_queue = NULL;
	for(i = 0; i < sizeof(cfg->lpb) / sizeof(cfg->lpb[0]); i++)
	{
		cand_lpb[i] = cfg->lpb[i];
		if(lpb < cfg->lpb[i])
			lpb = cfg->lpb[i];
	}
	n = nb_curves(cfg->lpb[APOLY_IDX]);
	n_cofact_algos = n + 3;

	cofact_algos = (cofact_algo_t **) malloc(COFACT_SIZES * sizeof(cofact_algo_t *));
	for(i = 0; i < COFACT_SIZES; i++)
	{
		cofact_algos[i] = (cofact_algo_t *) malloc(n_cofact_algos * sizeof(cofact_algo_t));
		memset(cofact_algos[i], 0, n_cofact_algos * sizeof(cofact_algo_t));
		for(j = 0; j < n_cofact_algos; j++)
		{
			cofact_algos[i][j].queue = queue_alloc();
		}
	}

#if USE_OPENCL
	int PP1_STAGE2_XJ_LEN = 0;
	int ECM_COMMONZ_T_LEN = 0;
	int ECM_STAGE2_PID_LEN = 0;
	int ECM_STAGE2_PJ_LEN = 0;

	/* pm1 */
	cofact_algos[0][0].process = pm1_ul32_process_ocl;
	cofact_algos[0][0].plan = malloc(sizeof(pm1_plan_t));
	pm1_plan_init(cofact_algos[0][0].plan, 315, 2205);
	cofact_algos[0][0].algo_idx = 0;
    PP1_STAGE2_XJ_LEN = ((pm1_plan_t *)cofact_algos[0][0].plan)->stage2.n_S1;

    cofact_algos[1][0].process = pm1_ul64_process_ocl;
    cofact_algos[1][0].plan = cofact_algos[0][0].plan;
    cofact_algos[1][0].algo_idx = 0;

	cofact_algos[2][0].process = pm1_ul96_process_ocl;
	cofact_algos[2][0].plan = cofact_algos[0][0].plan;
	cofact_algos[2][0].algo_idx = 0;

	cofact_algos[3][0].process = pm1_ul128_process_ocl;
	cofact_algos[3][0].plan = cofact_algos[0][0].plan;
	cofact_algos[3][0].algo_idx = 0;

	cofact_algos[4][0].process = pm1_ul160_process_ocl;
	cofact_algos[4][0].plan = cofact_algos[0][0].plan;
	cofact_algos[4][0].algo_idx = 0;

	cofact_algos[5][0].process = pm1_ul192_process_ocl;
	cofact_algos[5][0].plan = cofact_algos[0][0].plan;
	cofact_algos[5][0].algo_idx = 0;

	cofact_algos[6][0].process = pm1_ul224_process_ocl;
	cofact_algos[6][0].plan = cofact_algos[0][0].plan;
	cofact_algos[6][0].algo_idx = 0;

	cofact_algos[7][0].process = pm1_ul256_process_ocl;
	cofact_algos[7][0].plan = cofact_algos[0][0].plan;
	cofact_algos[7][0].algo_idx = 0;

	cofact_algos[8][0].process = pm1_mpz_process;
	cofact_algos[8][0].plan = cofact_algos[0][0].plan;
	cofact_algos[8][0].algo_idx = 0;

	/* pp1 */
	cofact_algos[0][1].process = pp1_ul32_process_ocl;
	cofact_algos[0][1].plan = malloc(sizeof(pp1_plan_t));
	pp1_plan_init(cofact_algos[0][1].plan, 525, 3255);
	cofact_algos[0][1].algo_idx = 1;
    PP1_STAGE2_XJ_LEN = MAX(PP1_STAGE2_XJ_LEN, ((pp1_plan_t *)cofact_algos[0][1].plan)->stage2.n_S1);

	cofact_algos[1][1].process = pp1_ul64_process_ocl;
	cofact_algos[1][1].plan = cofact_algos[0][1].plan;
	cofact_algos[1][1].algo_idx = 1;

	cofact_algos[2][1].process = pp1_ul96_process_ocl;
	cofact_algos[2][1].plan = cofact_algos[0][1].plan;
	cofact_algos[2][1].algo_idx = 1;

	cofact_algos[3][1].process = pp1_ul128_process_ocl;
	cofact_algos[3][1].plan = cofact_algos[0][1].plan;
	cofact_algos[3][1].algo_idx = 1;

	cofact_algos[4][1].process = pp1_ul160_process_ocl;
	cofact_algos[4][1].plan = cofact_algos[0][1].plan;
	cofact_algos[4][1].algo_idx = 1;

	cofact_algos[5][1].process = pp1_ul192_process_ocl;
	cofact_algos[5][1].plan = cofact_algos[0][1].plan;
	cofact_algos[5][1].algo_idx = 1;

	cofact_algos[6][1].process = pp1_ul224_process_ocl;
	cofact_algos[6][1].plan = cofact_algos[0][1].plan;
	cofact_algos[6][1].algo_idx = 1;

	cofact_algos[7][1].process = pp1_ul256_process_ocl;
	cofact_algos[7][1].plan = cofact_algos[0][1].plan;
	cofact_algos[7][1].algo_idx = 1;

	cofact_algos[8][1].process = pp1_mpz_process;
	cofact_algos[8][1].plan = cofact_algos[0][1].plan;
	cofact_algos[8][1].algo_idx = 1;

	/* ecm */
	cofact_algos[0][2].process = ecm_ul32_process_ocl;
	cofact_algos[0][2].plan = malloc(sizeof(ecm_plan_t));
	ecm_plan_init(cofact_algos[0][2].plan, 105, 3255, MONTY12, 2);
	cofact_algos[0][2].algo_idx = 2;
    {
        ecm_plan_t *_ecm_plan = (ecm_plan_t *)cofact_algos[0][2].plan;
        ECM_COMMONZ_T_LEN = (_ecm_plan->stage2.n_S1) + (_ecm_plan->stage2.i1 - _ecm_plan->stage2.i0 - ((_ecm_plan->stage2.i0 == 0) ? 1 : 0));
        ECM_STAGE2_PID_LEN = _ecm_plan->stage2.i1 - _ecm_plan->stage2.i0;
        ECM_STAGE2_PJ_LEN = _ecm_plan->stage2.n_S1;
    }

	cofact_algos[1][2].process = ecm_ul64_process_ocl;
	cofact_algos[1][2].plan = cofact_algos[0][2].plan;
	cofact_algos[1][2].algo_idx = 2;

	cofact_algos[2][2].process = ecm_ul96_process_ocl;
	cofact_algos[2][2].plan = cofact_algos[0][2].plan;
	cofact_algos[2][2].algo_idx = 2;

	cofact_algos[3][2].process = ecm_ul128_process_ocl;
	cofact_algos[3][2].plan = cofact_algos[0][2].plan;
	cofact_algos[3][2].algo_idx = 2;

	cofact_algos[4][2].process = ecm_ul160_process_ocl;
	cofact_algos[4][2].plan = cofact_algos[0][2].plan;
	cofact_algos[4][2].algo_idx = 2;

	cofact_algos[5][2].process = ecm_ul192_process_ocl;
	cofact_algos[5][2].plan = cofact_algos[0][2].plan;
	cofact_algos[5][2].algo_idx = 2;

	cofact_algos[6][2].process = ecm_ul224_process_ocl;
	cofact_algos[6][2].plan = cofact_algos[0][2].plan;
	cofact_algos[6][2].algo_idx = 2;

	cofact_algos[7][2].process = ecm_ul256_process_ocl;
	cofact_algos[7][2].plan = cofact_algos[0][2].plan;
	cofact_algos[7][2].algo_idx = 2;

	cofact_algos[8][2].process = ecm_mpz_process;
	cofact_algos[8][2].plan = cofact_algos[0][2].plan;
	cofact_algos[8][2].algo_idx = 2;

	if(n > 0)
	{
		cofact_algos[0][3].process = ecm_ul32_process_ocl;
		cofact_algos[0][3].plan = malloc(sizeof(ecm_plan_t));
		ecm_plan_init(cofact_algos[0][3].plan, 315, 5355, BRENT12, 11);
		cofact_algos[0][3].algo_idx = 3;
        {
            ecm_plan_t *_ecm_plan = (ecm_plan_t *)cofact_algos[0][3].plan;
            int _ECM_COMMONZ_T_LEN = (_ecm_plan->stage2.n_S1) + (_ecm_plan->stage2.i1 - _ecm_plan->stage2.i0 - ((_ecm_plan->stage2.i0 == 0) ? 1 : 0));
            int _ECM_STAGE2_PID_LEN = _ecm_plan->stage2.i1 - _ecm_plan->stage2.i0;
            int _ECM_STAGE2_PJ_LEN = _ecm_plan->stage2.n_S1;
            ECM_COMMONZ_T_LEN = MAX(ECM_COMMONZ_T_LEN, _ECM_COMMONZ_T_LEN);
            ECM_STAGE2_PID_LEN = MAX(ECM_STAGE2_PID_LEN, _ECM_STAGE2_PID_LEN);
            ECM_STAGE2_PJ_LEN = MAX(ECM_STAGE2_PJ_LEN, _ECM_STAGE2_PJ_LEN);
        }

	    cofact_algos[1][3].process = ecm_ul64_process_ocl;
	    cofact_algos[1][3].plan = cofact_algos[0][3].plan;
	    cofact_algos[1][3].algo_idx = 3;

	    cofact_algos[2][3].process = ecm_ul96_process_ocl;
	    cofact_algos[2][3].plan = cofact_algos[0][3].plan;
	    cofact_algos[2][3].algo_idx = 3;

	    cofact_algos[3][3].process = ecm_ul128_process_ocl;
	    cofact_algos[3][3].plan = cofact_algos[0][3].plan;
	    cofact_algos[3][3].algo_idx = 3;

	    cofact_algos[4][3].process = ecm_ul160_process_ocl;
	    cofact_algos[4][3].plan = cofact_algos[0][3].plan;
	    cofact_algos[4][3].algo_idx = 3;

	    cofact_algos[5][3].process = ecm_ul192_process_ocl;
	    cofact_algos[5][3].plan = cofact_algos[0][3].plan;
	    cofact_algos[5][3].algo_idx = 3;

	    cofact_algos[6][3].process = ecm_ul224_process_ocl;
	    cofact_algos[6][3].plan = cofact_algos[0][3].plan;
	    cofact_algos[6][3].algo_idx = 3;

	    cofact_algos[7][3].process = ecm_ul256_process_ocl;
	    cofact_algos[7][3].plan = cofact_algos[0][3].plan;
	    cofact_algos[7][3].algo_idx = 3;

	    cofact_algos[8][3].process = ecm_mpz_process;
	    cofact_algos[8][3].plan = cofact_algos[0][3].plan;
	    cofact_algos[8][3].algo_idx = 3;
	}
#else /* USE_OPENCL */
	/* pm1 */
	cofact_algos[0][0].process = pm1_ul64_process;
	cofact_algos[0][0].plan = malloc(sizeof(pm1_plan_t));
	pm1_plan_init(cofact_algos[0][0].plan, 315, 2205);
	cofact_algos[0][0].algo_idx = 0;

	cofact_algos[1][0].process = pm1_ul128_process;
	cofact_algos[1][0].plan = cofact_algos[0][0].plan;
	cofact_algos[1][0].algo_idx = 0;

	cofact_algos[2][0].process = pm1_mpz_process;
	cofact_algos[2][0].plan = cofact_algos[0][0].plan;
	cofact_algos[2][0].algo_idx = 0;

	/* pp1 */
	cofact_algos[0][1].process = pp1_ul64_process;
	cofact_algos[0][1].plan = malloc(sizeof(pp1_plan_t));
	pp1_plan_init(cofact_algos[0][1].plan, 525, 3255);
	cofact_algos[0][1].algo_idx = 1;

	cofact_algos[1][1].process = pp1_ul128_process;
	cofact_algos[1][1].plan = cofact_algos[0][1].plan;
	cofact_algos[1][1].algo_idx = 1;

	cofact_algos[2][1].process = pp1_mpz_process;
	cofact_algos[2][1].plan = cofact_algos[0][1].plan;
	cofact_algos[2][1].algo_idx = 1;

	/* ecm */
	cofact_algos[0][2].process = ecm_ul64_process;
	cofact_algos[0][2].plan = malloc(sizeof(ecm_plan_t));
	ecm_plan_init(cofact_algos[0][2].plan, 105, 3255, MONTY12, 2);
	cofact_algos[0][2].algo_idx = 2;

	cofact_algos[1][2].process = ecm_ul128_process;
	cofact_algos[1][2].plan = cofact_algos[0][2].plan;
	cofact_algos[1][2].algo_idx = 2;

	cofact_algos[2][2].process = ecm_mpz_process;
	cofact_algos[2][2].plan = cofact_algos[0][2].plan;
	cofact_algos[2][2].algo_idx = 2;

	if(n > 0)
	{
		cofact_algos[0][3].process = ecm_ul64_process;
		cofact_algos[0][3].plan = malloc(sizeof(ecm_plan_t));
		ecm_plan_init(cofact_algos[0][3].plan, 315, 5355, BRENT12, 11);
		cofact_algos[0][3].algo_idx = 3;

		cofact_algos[1][3].process = ecm_ul128_process;
		cofact_algos[1][3].plan = cofact_algos[0][3].plan;
		cofact_algos[1][3].algo_idx = 3;

		cofact_algos[2][3].process = ecm_mpz_process;
		cofact_algos[2][3].plan = cofact_algos[0][3].plan;
		cofact_algos[2][3].algo_idx = 3;
	}
#endif /* USE_OPENCL */

	/* heuristic strategy where B1 is increased by sqrt(B1) at each curve */
	double B1 = 105.0;
	for (i = 4; i < n + 3; i++)
	{
		double B2;
		unsigned int k;

		B1 += sqrt (B1);
		B2 = 17.0 * B1;
		/* we round B2 to (2k+1)*105, thus k is the integer nearest to B2/210-0.5 */
		k = B2 / 210.0;

#if USE_OPENCL
		cofact_algos[0][i].process = ecm_ul32_process_ocl;
		cofact_algos[0][i].plan = malloc(sizeof(ecm_plan_t));
		ecm_plan_init(cofact_algos[0][i].plan, (unsigned int) B1, (2 * k + 1) * 105, MONTY12, i - 1);
		cofact_algos[0][i].algo_idx = i;
        {
            ecm_plan_t *_ecm_plan = (ecm_plan_t *)cofact_algos[0][i].plan;
            int _ECM_COMMONZ_T_LEN = (_ecm_plan->stage2.n_S1) + (_ecm_plan->stage2.i1 - _ecm_plan->stage2.i0 - ((_ecm_plan->stage2.i0 == 0) ? 1 : 0));
            int _ECM_STAGE2_PID_LEN = _ecm_plan->stage2.i1 - _ecm_plan->stage2.i0;
            int _ECM_STAGE2_PJ_LEN = _ecm_plan->stage2.n_S1;
            ECM_COMMONZ_T_LEN = MAX(ECM_COMMONZ_T_LEN, _ECM_COMMONZ_T_LEN);
            ECM_STAGE2_PID_LEN = MAX(ECM_STAGE2_PID_LEN, _ECM_STAGE2_PID_LEN);
            ECM_STAGE2_PJ_LEN = MAX(ECM_STAGE2_PJ_LEN, _ECM_STAGE2_PJ_LEN);
        }

		cofact_algos[1][i].process = ecm_ul64_process_ocl;
		cofact_algos[1][i].plan = cofact_algos[0][i].plan;
		cofact_algos[1][i].algo_idx = i;

		cofact_algos[2][i].process = ecm_ul96_process_ocl;
		cofact_algos[2][i].plan = cofact_algos[0][i].plan;
		cofact_algos[2][i].algo_idx = i;

		cofact_algos[3][i].process = ecm_ul128_process_ocl;
		cofact_algos[3][i].plan = cofact_algos[0][i].plan;
		cofact_algos[3][i].algo_idx = i;

		cofact_algos[4][i].process = ecm_ul160_process_ocl;
		cofact_algos[4][i].plan = cofact_algos[0][i].plan;
		cofact_algos[4][i].algo_idx = i;

		cofact_algos[5][i].process = ecm_ul192_process_ocl;
		cofact_algos[5][i].plan = cofact_algos[0][i].plan;
		cofact_algos[5][i].algo_idx = i;

		cofact_algos[6][i].process = ecm_ul224_process_ocl;
		cofact_algos[6][i].plan = cofact_algos[0][i].plan;
		cofact_algos[6][i].algo_idx = i;

		cofact_algos[7][i].process = ecm_ul256_process_ocl;
		cofact_algos[7][i].plan = cofact_algos[0][i].plan;
		cofact_algos[7][i].algo_idx = i;

		cofact_algos[8][i].process = ecm_mpz_process;
		cofact_algos[8][i].plan = cofact_algos[0][i].plan;
		cofact_algos[8][i].algo_idx = i;
#else /* USE_OPENCL */
		cofact_algos[0][i].process = ecm_ul64_process;
		cofact_algos[0][i].plan = malloc(sizeof(ecm_plan_t));
		ecm_plan_init(cofact_algos[0][i].plan, (unsigned int) B1, (2 * k + 1) * 105, MONTY12, i - 1);
		cofact_algos[0][i].algo_idx = i;

		cofact_algos[1][i].process = ecm_ul128_process;
		cofact_algos[1][i].plan = cofact_algos[0][i].plan;
		cofact_algos[1][i].algo_idx = i;

		cofact_algos[2][i].process = ecm_mpz_process;
		cofact_algos[2][i].plan = cofact_algos[0][i].plan;
		cofact_algos[2][i].algo_idx = i;
#endif /* USE_OPENCL */
	}
	assert(i == n_cofact_algos);

#if USE_OPENCL
    /* Construct build arguments */
	const char *config_mp_source = "las/ocl/las.cl";            /* File name of kernel source */

    char *build_opts = NULL;
    {
		int build_opts_len =
				snprintf(NULL, 0, "%s -D PP1_STAGE2_XJ_LEN=%d -D ECM_COMMONZ_T_LEN=%d -D ECM_STAGE2_PID_LEN=%d -D ECM_STAGE2_PJ_LEN=%d",
						 ocl_state.buildopts,
						 PP1_STAGE2_XJ_LEN,
						 ECM_COMMONZ_T_LEN,
						 ECM_STAGE2_PID_LEN,
						 ECM_STAGE2_PJ_LEN);

		build_opts_len++; /* snprintf does not include null byte in ret value */
		build_opts = (char *)malloc(build_opts_len);

		snprintf(build_opts, build_opts_len, "%s -D PP1_STAGE2_XJ_LEN=%d -D ECM_COMMONZ_T_LEN=%d -D ECM_STAGE2_PID_LEN=%d -D ECM_STAGE2_PJ_LEN=%d",
				 ocl_state.buildopts,
				 PP1_STAGE2_XJ_LEN,
				 ECM_COMMONZ_T_LEN,
				 ECM_STAGE2_PID_LEN,
				 ECM_STAGE2_PJ_LEN);
		printf("build_opts=%s\n", build_opts);
    }
    
    /* Now do the build */
    ocl_build(&ocl_state, config_mp_source, build_opts);
    
    free(build_opts);
#endif /* USE_OPENCL */

	return;
}