EXT void pmaxlike_wkfree (pmaxlike_wk* wk) { dmatrix_free(wk-> oper); dmatrix_free(wk-> proj); free(wk-> quad); free(wk-> c_lup); free(wk-> s_lup); free(wk-> h_lup); free(wk); }
void test_dmatrix_power_uint(int n, uint p, int print) { double **out, **in; pTimer t; ptimer_init(&t, CLOCK_PROCESS_CPUTIME_ID); in = doubleAA_malloc(n, n); dmatrix_ramp(in, n, n, 0.0, 2 * n, 1.0); if (print) { dmatrix_print((const double **) in, n, n); puts(""); } out = doubleAA_malloc(n, n); ptimer_tic(&t); dmatrix_mmul(out, (const double **) in, (const double **) in, n, n, n); ptimer_toc(&t); printf("dmatrix_mmul: "); siunit_show(ptimer_get_sec(&t)); puts("\n"); if (print) { dmatrix_print((const double **) out, n, n); puts(""); } ptimer_tic(&t); dmatrix_mmul_strassen(out, (const double **) in, (const double **) in, n, n, n); ptimer_toc(&t); printf("dmatrix_mmul_strassen: "); siunit_show(ptimer_get_sec(&t)); puts("\n"); if (print) { dmatrix_print((const double **) out, n, n); } dmatrix_free(in); dmatrix_free(out); }
EXT Real* pmaxlike_margi_iteration (maxlike_margi_wk* max, int J) { int a, i, j, k; int K = max-> data-> samples, N = max-> state-> n, M = N + 1; Real re, im; Real norm = 1.0 / (max-> data-> samples); dmatrix* operator = dmatrix_alloc(max->state->n); dmatrix* prod = dmatrix_alloc(max->state->n); dmatrix* proj = dmatrix_alloc(max->state->n); dmatrix* last = dmatrix_alloc(max->state->n); dmatrix* state = max-> state; Real* err = (Real*) malloc( J * sizeof(Real) ); int iter = max-> iteration; max-> iteration += J; J = max-> iteration; void *status; pthread_t threads[NUM_THREADS]; pthread_attr_t attr; pthread_attr_init(&attr); pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_JOINABLE); pmaxlike_wk* data[NUM_THREADS]; j = K / NUM_THREADS; #pragma unroll for (a = 0; a < NUM_THREADS; a++) { data[a] = pmaxlike_wkalloc(a * j, (a +1) * j, max-> data, N); data[a]-> state = state; } k = 0; while(J - iter) { i = 0; while(M - i) { j = 0; while(M - j) { last->re[i][j] = state->re[i][j]; last->im[i][j] = state->im[i][j]; j++; } i++; } #pragma unroll for (a = 0; a < NUM_THREADS; a++) pthread_create(&threads[a], &attr, pmaxlike_thread_getproj, (void *) data[a]); #pragma unroll for (a = 0; a < NUM_THREADS; a++) pthread_join(threads[a], &status); i = 0; while(M - i) { j = 0; while(M - j) { re = 0.0; im = 0.0; #pragma unroll for (a = 0; a < NUM_THREADS; a++) re += data[a]-> oper-> re[i][j]; #pragma unroll for (a = 0; a < NUM_THREADS; a++) im += data[a]-> oper-> im[i][j]; operator-> re[i][j] = norm * re; operator-> im[i][j] = norm * im; j++; } i++; } dmatrix_transpose(operator); dmatrix_prod(prod, operator, state); dmatrix_prod(state, prod, operator); dmatrix_renorm(state); err[k] = dmatrix_dist_max(state, last); k++; if(max-> print) printf("err(%i):%f\n",iter,err[k]); if(max-> tester != NULL) (max-> tester)(iter,state); iter++; if( iter % 10 == 0) ;//printf("%i iterations done\n",iter); } dmatrix_free(operator); dmatrix_free(prod); dmatrix_free(proj); dmatrix_free(last); #pragma unroll for (a = 0; a < NUM_THREADS; a++) pmaxlike_wkfree( data[a] ); pthread_attr_destroy(&attr); return err; }
EXT Real* pmaxlike_iteration (maxlike_wk* max, int J) { int i, j, k, theta, x; int M = max-> state-> n + 1; int T = max-> data-> gd-> x-> n_val; int X = max-> data-> gd-> y-> n_val; Real trace, reg; dmatrix* operator = dmatrix_alloc(max->state->n); dmatrix* prod = dmatrix_alloc(max->state->n); dmatrix* last = dmatrix_alloc(max->state->n); dmatrix* state = max->state; Real* err = (Real*) malloc( J * sizeof(Real) ); int iter = max-> iteration; max-> iteration += J; J = max-> iteration; k = 0; while(J-iter) { i = 0; while(M - i) { j = 0; while(M - j) { last->re[i][j] = state->re[i][j]; last->im[i][j] = state->im[i][j]; j++; } i++; } dmatrix_to_zero(operator); theta = 0; while(T-theta) { x = 0; while(X-x) { reg = max-> data-> val[theta][x]; if ( reg != 0.0 ) { trace = 0.0; i = 0; while(M-i) { j = 0; while(M-j) { trace += max-> proj_re[theta][x][i][j] * state-> re[i][j] - max-> proj_im[theta][x][i][j] * state-> im[i][j]; j++; } i++; } trace = reg / trace; i = 0; while(M-i) { j = 0; while(M-j) { operator-> re[i][j] += trace * max-> proj_re[theta][x][i][j]; operator-> im[i][j] += trace * max-> proj_im[theta][x][i][j]; j++; } i++; } } x++; } theta++; } dmatrix_transpose(operator); dmatrix_prod(prod, operator, state); dmatrix_prod(state, prod, operator); dmatrix_renorm(state); err[k] = dmatrix_dist_max(state, last); k++; if(max-> print) printf("err(%i):%f\n",iter,err[k]); if(max-> tester != NULL) (max-> tester)(iter,state); iter++; if( iter % 50 == 0) printf("%i iterations done\n",iter); } dmatrix_free(operator); dmatrix_free(prod); dmatrix_free(last); return err; }
EXT void maxlike_precalculate (maxlike_wk* max) { #define H_Amp 10.0 #define H_Step 65535 #define H_Rez (H_Amp/H_Step) int i = 0, j = 0, theta = 0, x = 0, y = 0, q = 0; int M = max-> state-> n + 1; int T = max-> data-> gd-> x-> n_val; int X = max-> data-> gd-> y-> n_val; int Q = H_Step; int sign = 1; Real accum, thres, a, b; Real rez = max-> data-> gd-> y-> resolution; range* r = range_single(H_Step, H_Amp); hermite_workspace* h = hermite_alloc(max-> state-> n, r); hermite_calculate(h); Real** c_lup = (Real**) malloc( sizeof(Real*) * M ); Real** s_lup = (Real**) malloc( sizeof(Real*) * M ); i = 0; while(M-i) { c_lup[i] = (Real*) malloc( sizeof(Real) * T ); s_lup[i] = (Real*) malloc( sizeof(Real) * T ); theta = 0; while(T-theta) { c_lup[i][theta] = cos( i * (max-> data-> gd-> x-> val[theta]) ); s_lup[i][theta] = sin( i * (max-> data-> gd-> x-> val[theta]) ); theta++; } i++; } i = 0; while(M - i) { j = 0; while(i + 1 - j) { accum = 0.0; thres = rez/2; q = 0; x = (max-> data-> gd-> y-> n_val)/2; while(X - x && Q - q) { accum += h-> harmonics[i][q] * h-> harmonics[j][q]; if ( r-> val[q] >= thres) { thres += rez; accum *= r-> resolution; theta = 0; while(T-theta) { max-> proj_re[theta][x][i][j] = c_lup[i-j][theta] * accum; max-> proj_im[theta][x][i][j] = s_lup[i-j][theta] * accum; theta++; } x++; accum = 0.0; } q++; } j++; } i++; } theta = 0; while(T-theta) { x = (max-> data-> gd-> y-> n_val)/2; y = x; while(X - x ) { i = 0; while(M - i) { j = 0; while(i + 1 - j) { sign = ( (i+j)%2 )? -1 : 1; max-> proj_re[theta][y][i][j] = sign * max-> proj_re[theta][x][i][j]; max-> proj_im[theta][y][i][j] = sign * max-> proj_im[theta][x][i][j]; j++; } i++; } x++; y--; } theta++; } dmatrix* buf = dmatrix_alloc(M-1); theta = 0; while(T-theta) { x = 0; while(X - x ) { i = 0; while(M - i) { j = 0; while(i + 1 - j) { buf-> re[i][j] = max-> proj_re[theta][x][i][j]; buf-> im[i][j] = max-> proj_im[theta][x][i][j]; j++; } i++; } i = 0; while(M - i) { j = 0; while(i + 1 - j) { max-> proj_re[theta][x][j][i] = buf-> re[i][j]; max-> proj_im[theta][x][j][i] =-buf-> im[i][j]; j++; } i++; } x++; } theta++; } dmatrix_free(buf); for (i = 0; i < M; i++) { free( c_lup[i] ); free( s_lup[i] ); } free(c_lup); free(s_lup); hermite_free(h); range_free(r); #undef H_Amp #undef H_Step #undef H_Rez }