widget* spawn_ocl(char* filename) { widget *w = widget_window_new(100, 100, filename); w->draw = widget_window_ocl_draw; w->release = widget_window_ocl_release; w->onclick = widget_window_ocl_onclick; w->free = widget_window_ocl_free; OCLPROGRAM *p = ocl_build(filename); w->data2 = p; p->window = w; widget_add(w); return w; }
void cofact_init(gls_config_t cfg) { unsigned int i, j, lpb, n; cofact_queue = NULL; for(i = 0; i < sizeof(cfg->lpb) / sizeof(cfg->lpb[0]); i++) { cand_lpb[i] = cfg->lpb[i]; if(lpb < cfg->lpb[i]) lpb = cfg->lpb[i]; } n = nb_curves(cfg->lpb[APOLY_IDX]); n_cofact_algos = n + 3; cofact_algos = (cofact_algo_t **) malloc(COFACT_SIZES * sizeof(cofact_algo_t *)); for(i = 0; i < COFACT_SIZES; i++) { cofact_algos[i] = (cofact_algo_t *) malloc(n_cofact_algos * sizeof(cofact_algo_t)); memset(cofact_algos[i], 0, n_cofact_algos * sizeof(cofact_algo_t)); for(j = 0; j < n_cofact_algos; j++) { cofact_algos[i][j].queue = queue_alloc(); } } #if USE_OPENCL int PP1_STAGE2_XJ_LEN = 0; int ECM_COMMONZ_T_LEN = 0; int ECM_STAGE2_PID_LEN = 0; int ECM_STAGE2_PJ_LEN = 0; /* pm1 */ cofact_algos[0][0].process = pm1_ul32_process_ocl; cofact_algos[0][0].plan = malloc(sizeof(pm1_plan_t)); pm1_plan_init(cofact_algos[0][0].plan, 315, 2205); cofact_algos[0][0].algo_idx = 0; PP1_STAGE2_XJ_LEN = ((pm1_plan_t *)cofact_algos[0][0].plan)->stage2.n_S1; cofact_algos[1][0].process = pm1_ul64_process_ocl; cofact_algos[1][0].plan = cofact_algos[0][0].plan; cofact_algos[1][0].algo_idx = 0; cofact_algos[2][0].process = pm1_ul96_process_ocl; cofact_algos[2][0].plan = cofact_algos[0][0].plan; cofact_algos[2][0].algo_idx = 0; cofact_algos[3][0].process = pm1_ul128_process_ocl; cofact_algos[3][0].plan = cofact_algos[0][0].plan; cofact_algos[3][0].algo_idx = 0; cofact_algos[4][0].process = pm1_ul160_process_ocl; cofact_algos[4][0].plan = cofact_algos[0][0].plan; cofact_algos[4][0].algo_idx = 0; cofact_algos[5][0].process = pm1_ul192_process_ocl; cofact_algos[5][0].plan = cofact_algos[0][0].plan; cofact_algos[5][0].algo_idx = 0; cofact_algos[6][0].process = pm1_ul224_process_ocl; cofact_algos[6][0].plan = cofact_algos[0][0].plan; cofact_algos[6][0].algo_idx = 0; cofact_algos[7][0].process = pm1_ul256_process_ocl; cofact_algos[7][0].plan = cofact_algos[0][0].plan; cofact_algos[7][0].algo_idx = 0; cofact_algos[8][0].process = pm1_mpz_process; cofact_algos[8][0].plan = cofact_algos[0][0].plan; cofact_algos[8][0].algo_idx = 0; /* pp1 */ cofact_algos[0][1].process = pp1_ul32_process_ocl; cofact_algos[0][1].plan = malloc(sizeof(pp1_plan_t)); pp1_plan_init(cofact_algos[0][1].plan, 525, 3255); cofact_algos[0][1].algo_idx = 1; PP1_STAGE2_XJ_LEN = MAX(PP1_STAGE2_XJ_LEN, ((pp1_plan_t *)cofact_algos[0][1].plan)->stage2.n_S1); cofact_algos[1][1].process = pp1_ul64_process_ocl; cofact_algos[1][1].plan = cofact_algos[0][1].plan; cofact_algos[1][1].algo_idx = 1; cofact_algos[2][1].process = pp1_ul96_process_ocl; cofact_algos[2][1].plan = cofact_algos[0][1].plan; cofact_algos[2][1].algo_idx = 1; cofact_algos[3][1].process = pp1_ul128_process_ocl; cofact_algos[3][1].plan = cofact_algos[0][1].plan; cofact_algos[3][1].algo_idx = 1; cofact_algos[4][1].process = pp1_ul160_process_ocl; cofact_algos[4][1].plan = cofact_algos[0][1].plan; cofact_algos[4][1].algo_idx = 1; cofact_algos[5][1].process = pp1_ul192_process_ocl; cofact_algos[5][1].plan = cofact_algos[0][1].plan; cofact_algos[5][1].algo_idx = 1; cofact_algos[6][1].process = pp1_ul224_process_ocl; cofact_algos[6][1].plan = cofact_algos[0][1].plan; cofact_algos[6][1].algo_idx = 1; cofact_algos[7][1].process = pp1_ul256_process_ocl; cofact_algos[7][1].plan = cofact_algos[0][1].plan; cofact_algos[7][1].algo_idx = 1; cofact_algos[8][1].process = pp1_mpz_process; cofact_algos[8][1].plan = cofact_algos[0][1].plan; cofact_algos[8][1].algo_idx = 1; /* ecm */ cofact_algos[0][2].process = ecm_ul32_process_ocl; cofact_algos[0][2].plan = malloc(sizeof(ecm_plan_t)); ecm_plan_init(cofact_algos[0][2].plan, 105, 3255, MONTY12, 2); cofact_algos[0][2].algo_idx = 2; { ecm_plan_t *_ecm_plan = (ecm_plan_t *)cofact_algos[0][2].plan; ECM_COMMONZ_T_LEN = (_ecm_plan->stage2.n_S1) + (_ecm_plan->stage2.i1 - _ecm_plan->stage2.i0 - ((_ecm_plan->stage2.i0 == 0) ? 1 : 0)); ECM_STAGE2_PID_LEN = _ecm_plan->stage2.i1 - _ecm_plan->stage2.i0; ECM_STAGE2_PJ_LEN = _ecm_plan->stage2.n_S1; } cofact_algos[1][2].process = ecm_ul64_process_ocl; cofact_algos[1][2].plan = cofact_algos[0][2].plan; cofact_algos[1][2].algo_idx = 2; cofact_algos[2][2].process = ecm_ul96_process_ocl; cofact_algos[2][2].plan = cofact_algos[0][2].plan; cofact_algos[2][2].algo_idx = 2; cofact_algos[3][2].process = ecm_ul128_process_ocl; cofact_algos[3][2].plan = cofact_algos[0][2].plan; cofact_algos[3][2].algo_idx = 2; cofact_algos[4][2].process = ecm_ul160_process_ocl; cofact_algos[4][2].plan = cofact_algos[0][2].plan; cofact_algos[4][2].algo_idx = 2; cofact_algos[5][2].process = ecm_ul192_process_ocl; cofact_algos[5][2].plan = cofact_algos[0][2].plan; cofact_algos[5][2].algo_idx = 2; cofact_algos[6][2].process = ecm_ul224_process_ocl; cofact_algos[6][2].plan = cofact_algos[0][2].plan; cofact_algos[6][2].algo_idx = 2; cofact_algos[7][2].process = ecm_ul256_process_ocl; cofact_algos[7][2].plan = cofact_algos[0][2].plan; cofact_algos[7][2].algo_idx = 2; cofact_algos[8][2].process = ecm_mpz_process; cofact_algos[8][2].plan = cofact_algos[0][2].plan; cofact_algos[8][2].algo_idx = 2; if(n > 0) { cofact_algos[0][3].process = ecm_ul32_process_ocl; cofact_algos[0][3].plan = malloc(sizeof(ecm_plan_t)); ecm_plan_init(cofact_algos[0][3].plan, 315, 5355, BRENT12, 11); cofact_algos[0][3].algo_idx = 3; { ecm_plan_t *_ecm_plan = (ecm_plan_t *)cofact_algos[0][3].plan; int _ECM_COMMONZ_T_LEN = (_ecm_plan->stage2.n_S1) + (_ecm_plan->stage2.i1 - _ecm_plan->stage2.i0 - ((_ecm_plan->stage2.i0 == 0) ? 1 : 0)); int _ECM_STAGE2_PID_LEN = _ecm_plan->stage2.i1 - _ecm_plan->stage2.i0; int _ECM_STAGE2_PJ_LEN = _ecm_plan->stage2.n_S1; ECM_COMMONZ_T_LEN = MAX(ECM_COMMONZ_T_LEN, _ECM_COMMONZ_T_LEN); ECM_STAGE2_PID_LEN = MAX(ECM_STAGE2_PID_LEN, _ECM_STAGE2_PID_LEN); ECM_STAGE2_PJ_LEN = MAX(ECM_STAGE2_PJ_LEN, _ECM_STAGE2_PJ_LEN); } cofact_algos[1][3].process = ecm_ul64_process_ocl; cofact_algos[1][3].plan = cofact_algos[0][3].plan; cofact_algos[1][3].algo_idx = 3; cofact_algos[2][3].process = ecm_ul96_process_ocl; cofact_algos[2][3].plan = cofact_algos[0][3].plan; cofact_algos[2][3].algo_idx = 3; cofact_algos[3][3].process = ecm_ul128_process_ocl; cofact_algos[3][3].plan = cofact_algos[0][3].plan; cofact_algos[3][3].algo_idx = 3; cofact_algos[4][3].process = ecm_ul160_process_ocl; cofact_algos[4][3].plan = cofact_algos[0][3].plan; cofact_algos[4][3].algo_idx = 3; cofact_algos[5][3].process = ecm_ul192_process_ocl; cofact_algos[5][3].plan = cofact_algos[0][3].plan; cofact_algos[5][3].algo_idx = 3; cofact_algos[6][3].process = ecm_ul224_process_ocl; cofact_algos[6][3].plan = cofact_algos[0][3].plan; cofact_algos[6][3].algo_idx = 3; cofact_algos[7][3].process = ecm_ul256_process_ocl; cofact_algos[7][3].plan = cofact_algos[0][3].plan; cofact_algos[7][3].algo_idx = 3; cofact_algos[8][3].process = ecm_mpz_process; cofact_algos[8][3].plan = cofact_algos[0][3].plan; cofact_algos[8][3].algo_idx = 3; } #else /* USE_OPENCL */ /* pm1 */ cofact_algos[0][0].process = pm1_ul64_process; cofact_algos[0][0].plan = malloc(sizeof(pm1_plan_t)); pm1_plan_init(cofact_algos[0][0].plan, 315, 2205); cofact_algos[0][0].algo_idx = 0; cofact_algos[1][0].process = pm1_ul128_process; cofact_algos[1][0].plan = cofact_algos[0][0].plan; cofact_algos[1][0].algo_idx = 0; cofact_algos[2][0].process = pm1_mpz_process; cofact_algos[2][0].plan = cofact_algos[0][0].plan; cofact_algos[2][0].algo_idx = 0; /* pp1 */ cofact_algos[0][1].process = pp1_ul64_process; cofact_algos[0][1].plan = malloc(sizeof(pp1_plan_t)); pp1_plan_init(cofact_algos[0][1].plan, 525, 3255); cofact_algos[0][1].algo_idx = 1; cofact_algos[1][1].process = pp1_ul128_process; cofact_algos[1][1].plan = cofact_algos[0][1].plan; cofact_algos[1][1].algo_idx = 1; cofact_algos[2][1].process = pp1_mpz_process; cofact_algos[2][1].plan = cofact_algos[0][1].plan; cofact_algos[2][1].algo_idx = 1; /* ecm */ cofact_algos[0][2].process = ecm_ul64_process; cofact_algos[0][2].plan = malloc(sizeof(ecm_plan_t)); ecm_plan_init(cofact_algos[0][2].plan, 105, 3255, MONTY12, 2); cofact_algos[0][2].algo_idx = 2; cofact_algos[1][2].process = ecm_ul128_process; cofact_algos[1][2].plan = cofact_algos[0][2].plan; cofact_algos[1][2].algo_idx = 2; cofact_algos[2][2].process = ecm_mpz_process; cofact_algos[2][2].plan = cofact_algos[0][2].plan; cofact_algos[2][2].algo_idx = 2; if(n > 0) { cofact_algos[0][3].process = ecm_ul64_process; cofact_algos[0][3].plan = malloc(sizeof(ecm_plan_t)); ecm_plan_init(cofact_algos[0][3].plan, 315, 5355, BRENT12, 11); cofact_algos[0][3].algo_idx = 3; cofact_algos[1][3].process = ecm_ul128_process; cofact_algos[1][3].plan = cofact_algos[0][3].plan; cofact_algos[1][3].algo_idx = 3; cofact_algos[2][3].process = ecm_mpz_process; cofact_algos[2][3].plan = cofact_algos[0][3].plan; cofact_algos[2][3].algo_idx = 3; } #endif /* USE_OPENCL */ /* heuristic strategy where B1 is increased by sqrt(B1) at each curve */ double B1 = 105.0; for (i = 4; i < n + 3; i++) { double B2; unsigned int k; B1 += sqrt (B1); B2 = 17.0 * B1; /* we round B2 to (2k+1)*105, thus k is the integer nearest to B2/210-0.5 */ k = B2 / 210.0; #if USE_OPENCL cofact_algos[0][i].process = ecm_ul32_process_ocl; cofact_algos[0][i].plan = malloc(sizeof(ecm_plan_t)); ecm_plan_init(cofact_algos[0][i].plan, (unsigned int) B1, (2 * k + 1) * 105, MONTY12, i - 1); cofact_algos[0][i].algo_idx = i; { ecm_plan_t *_ecm_plan = (ecm_plan_t *)cofact_algos[0][i].plan; int _ECM_COMMONZ_T_LEN = (_ecm_plan->stage2.n_S1) + (_ecm_plan->stage2.i1 - _ecm_plan->stage2.i0 - ((_ecm_plan->stage2.i0 == 0) ? 1 : 0)); int _ECM_STAGE2_PID_LEN = _ecm_plan->stage2.i1 - _ecm_plan->stage2.i0; int _ECM_STAGE2_PJ_LEN = _ecm_plan->stage2.n_S1; ECM_COMMONZ_T_LEN = MAX(ECM_COMMONZ_T_LEN, _ECM_COMMONZ_T_LEN); ECM_STAGE2_PID_LEN = MAX(ECM_STAGE2_PID_LEN, _ECM_STAGE2_PID_LEN); ECM_STAGE2_PJ_LEN = MAX(ECM_STAGE2_PJ_LEN, _ECM_STAGE2_PJ_LEN); } cofact_algos[1][i].process = ecm_ul64_process_ocl; cofact_algos[1][i].plan = cofact_algos[0][i].plan; cofact_algos[1][i].algo_idx = i; cofact_algos[2][i].process = ecm_ul96_process_ocl; cofact_algos[2][i].plan = cofact_algos[0][i].plan; cofact_algos[2][i].algo_idx = i; cofact_algos[3][i].process = ecm_ul128_process_ocl; cofact_algos[3][i].plan = cofact_algos[0][i].plan; cofact_algos[3][i].algo_idx = i; cofact_algos[4][i].process = ecm_ul160_process_ocl; cofact_algos[4][i].plan = cofact_algos[0][i].plan; cofact_algos[4][i].algo_idx = i; cofact_algos[5][i].process = ecm_ul192_process_ocl; cofact_algos[5][i].plan = cofact_algos[0][i].plan; cofact_algos[5][i].algo_idx = i; cofact_algos[6][i].process = ecm_ul224_process_ocl; cofact_algos[6][i].plan = cofact_algos[0][i].plan; cofact_algos[6][i].algo_idx = i; cofact_algos[7][i].process = ecm_ul256_process_ocl; cofact_algos[7][i].plan = cofact_algos[0][i].plan; cofact_algos[7][i].algo_idx = i; cofact_algos[8][i].process = ecm_mpz_process; cofact_algos[8][i].plan = cofact_algos[0][i].plan; cofact_algos[8][i].algo_idx = i; #else /* USE_OPENCL */ cofact_algos[0][i].process = ecm_ul64_process; cofact_algos[0][i].plan = malloc(sizeof(ecm_plan_t)); ecm_plan_init(cofact_algos[0][i].plan, (unsigned int) B1, (2 * k + 1) * 105, MONTY12, i - 1); cofact_algos[0][i].algo_idx = i; cofact_algos[1][i].process = ecm_ul128_process; cofact_algos[1][i].plan = cofact_algos[0][i].plan; cofact_algos[1][i].algo_idx = i; cofact_algos[2][i].process = ecm_mpz_process; cofact_algos[2][i].plan = cofact_algos[0][i].plan; cofact_algos[2][i].algo_idx = i; #endif /* USE_OPENCL */ } assert(i == n_cofact_algos); #if USE_OPENCL /* Construct build arguments */ const char *config_mp_source = "las/ocl/las.cl"; /* File name of kernel source */ char *build_opts = NULL; { int build_opts_len = snprintf(NULL, 0, "%s -D PP1_STAGE2_XJ_LEN=%d -D ECM_COMMONZ_T_LEN=%d -D ECM_STAGE2_PID_LEN=%d -D ECM_STAGE2_PJ_LEN=%d", ocl_state.buildopts, PP1_STAGE2_XJ_LEN, ECM_COMMONZ_T_LEN, ECM_STAGE2_PID_LEN, ECM_STAGE2_PJ_LEN); build_opts_len++; /* snprintf does not include null byte in ret value */ build_opts = (char *)malloc(build_opts_len); snprintf(build_opts, build_opts_len, "%s -D PP1_STAGE2_XJ_LEN=%d -D ECM_COMMONZ_T_LEN=%d -D ECM_STAGE2_PID_LEN=%d -D ECM_STAGE2_PJ_LEN=%d", ocl_state.buildopts, PP1_STAGE2_XJ_LEN, ECM_COMMONZ_T_LEN, ECM_STAGE2_PID_LEN, ECM_STAGE2_PJ_LEN); printf("build_opts=%s\n", build_opts); } /* Now do the build */ ocl_build(&ocl_state, config_mp_source, build_opts); free(build_opts); #endif /* USE_OPENCL */ return; }