/* * Display all the results. * * This function assumes curses is initialized. */ static void refresh_listing(void) { int top_offset, left_offset; unsigned int len = results_visible_length(); struct result *result; if (len >= window_height || len >= RESULTS_MAX_LEN) { wmove(screen, window_height / 2, (window_width - sizeof(MSG_TOO_MANY) - 1) / 2); waddstr(screen, MSG_TOO_MANY); refresh(); return; } top_offset = (window_height - len) / 2; if (window_width < get_max_length()) { left_offset = 0; } else { left_offset = (window_width - get_max_length()) / 2; } /* * Place the lines on screen. Since curses will automatically wrap * longer lines, we need to force a new-line on lines following them. */ for (unsigned int i = 0; i < ARRAY_LENGTH(&results); i++) { result = ARRAY_ITEM(&results, i); if (!result->visible) continue; wmove(screen, top_offset, left_offset); waddstr(screen, result->mbs_value); if (result->wcs_len > window_width) { top_offset += (result->wcs_len / window_width); } top_offset++; } refresh(); }
unsigned int kernel_launch (cl_kernel kernel, cl_context context, cl_command_queue cmd_queue, const char ** p, const char ** t, const struct gapmis_params * in, float * scores) { int error=1; unsigned int pats = get_number_of_sequences (p); unsigned int txts = get_number_of_sequences (t); unsigned int maxTxtLen = get_max_length (txts, t); unsigned int maxPatLen = get_max_length (pats, p); unsigned int pBlockSize = get_pblock_size (txts,32); unsigned int hproVecLen = pats * pBlockSize * (maxTxtLen + 1); unsigned int dproVecLen = pats * pBlockSize * (maxPatLen + 1); unsigned int * txtsVec = calloc(maxTxtLen*pBlockSize, sizeof(unsigned int)); unsigned int * patsVec = calloc(maxPatLen*pats, sizeof(unsigned int)); int * argsVec = malloc(sizeof(int)*(pats+7)); int * txtsLenVec = calloc(pBlockSize,sizeof(int)); float * pensVec = malloc(sizeof(float)*2); int * hproVec = calloc(hproVecLen,sizeof(int)); int * dproVec = calloc(dproVecLen,sizeof(int)); cl_int err; if(patsVec==NULL || txtsVec==NULL || argsVec==NULL || pensVec == NULL || txtsLenVec == NULL || hproVec==NULL || dproVec==NULL) { errno = MALLOC; return ( 0 ); } fill_txtsVec (txts, pBlockSize, t, txtsVec, in->scoring_matrix); fill_patsVec (pats, maxPatLen, p, patsVec, in->scoring_matrix); fill_argsVec (pats, txts, p, in->max_gap, pBlockSize, maxPatLen, maxTxtLen, argsVec); fill_txtsLenVec (txts, t, txtsLenVec); pensVec[0] = - in -> gap_open_pen; pensVec[1] = - in -> gap_extend_pen; /* GPU malloc */ cl_mem txtsVec_device = malloc_device (context, (maxTxtLen*pBlockSize)*sizeof(unsigned int), &error); if(error) { errno = GPUMALLOC; return ( 0 ); } /* copy from CPU to GPU mem */ init_device_mem_uint (context, cmd_queue, txtsVec_device, txtsVec, maxTxtLen*pBlockSize, &error); if(error) { errno = GPUMALLOC; return ( 0 ); } cl_mem patsVec_device = malloc_device (context, (maxPatLen*pats)*sizeof(unsigned int), &error); if(error) { errno = GPUMALLOC; return ( 0 ); } init_device_mem_uint (context, cmd_queue, patsVec_device, patsVec,maxPatLen*pats, &error); if(error) { errno = GPUMALLOC; return ( 0 ); } cl_mem argsVec_device = malloc_device (context, (pats+7)*sizeof(int), &error); if(error) { errno = GPUMALLOC; return ( 0 ); } init_device_mem_int (context, cmd_queue, argsVec_device, argsVec, pats+7, &error); if(error) { errno = GPUMALLOC; return ( 0 ); } cl_mem txtsLenVec_device = malloc_device (context, pBlockSize*sizeof(int), &error); if(error) { errno = GPUMALLOC; return ( 0 ); } init_device_mem_int (context, cmd_queue, txtsLenVec_device, txtsLenVec, pBlockSize, &error); if(error) { errno = GPUMALLOC; return ( 0 ); } cl_mem pensVec_device = malloc_device (context, 2*sizeof(float), &error); if(error) { errno = GPUMALLOC; return ( 0 ); } init_device_mem_float (context, cmd_queue, pensVec_device, pensVec, 2, &error); if(error) { errno = GPUMALLOC; return ( 0 ); } cl_mem hproVec_device = malloc_device (context, hproVecLen*sizeof(int), &error); if(error) { errno = GPUMALLOC; return ( 0 ); } init_device_mem_int (context, cmd_queue, hproVec_device, hproVec, hproVecLen, &error); if(error) { errno = GPUMALLOC; return ( 0 ); } cl_mem dproVec_device = malloc_device (context, dproVecLen*sizeof(int), &error); if(error) { errno = GPUMALLOC; return ( 0 ); } init_device_mem_int (context, cmd_queue, dproVec_device, dproVec, dproVecLen, &error); if(error) { errno = GPUMALLOC; return ( 0 ); } cl_mem scrsVec_device = malloc_device (context, (pats*pBlockSize)*sizeof(float), &error); if(error) { errno = GPUMALLOC; return ( 0 ); } err = clFinish(cmd_queue); if(err != CL_SUCCESS) { errno = GPUERROR; return ( 0 ); } /* connect the input arguments of the kernel with the corresponding mem */ set_kernel_arguments (kernel, cmd_queue, patsVec_device, txtsVec_device, argsVec_device, txtsLenVec_device, pensVec_device, hproVec_device, dproVec_device, scrsVec_device); /* synchronisation */ err = clFinish(cmd_queue); if(err != CL_SUCCESS) { errno = GPUERROR; return ( 0 ); } /* WorkSizeGlobal is the total number of threads of the device*/ size_t WorkSizeGlobal[] = {pBlockSize * pats}; /* WorkSizeLocal is the number of threads per group*/ size_t WorkSizeLocal[] = {pBlockSize}; /* kernel enters the command queue using WorkSizeGlobal and WorkSizeLocal */ err = clEnqueueNDRangeKernel(cmd_queue, kernel, 1, NULL, WorkSizeGlobal, WorkSizeLocal, 0, NULL, NULL); if(error) { errno = KERNEL; return ( 0 ); } /* finalise the kernel */ err = clFinish(cmd_queue); if(err != CL_SUCCESS) { errno = GPUERROR; return ( 0 ); } /* return the results from the GPU to the CPU */ read_device_mem_float (cmd_queue, pats*pBlockSize, scores, scrsVec_device, &error); if(error) { errno = GPUMALLOC; return ( 0 ); } /* deallocation */ free (txtsVec); free (patsVec); free (argsVec); free (txtsLenVec); free (pensVec); free (hproVec); free (dproVec); clReleaseMemObject(patsVec_device); clReleaseMemObject(txtsVec_device); clReleaseMemObject(argsVec_device); clReleaseMemObject(txtsLenVec_device); clReleaseMemObject(pensVec_device); clReleaseMemObject(hproVec_device); clReleaseMemObject(dproVec_device); clReleaseMemObject(scrsVec_device); return ( 1 ); }
unsigned int gapmis_one_to_many_opt_gpu ( const char * p1, const char ** t, const struct gapmis_params * in, struct gapmis_align * out ) { const char * p[] = { p1, NULL}; if ( in -> scoring_matrix > 1 ) { errno = MATRIX; return ( 0 ); } unsigned int pats = get_number_of_sequences (p); unsigned int txts = get_number_of_sequences (t); unsigned int maxPatLen = get_max_length (pats, p); unsigned int minTxtLen = get_min_length (txts, t); if (check_sequences(pats,p,in->scoring_matrix)==0) { errno = BADCHAR; return ( 0 ); } if (check_sequences(txts,t,in->scoring_matrix)==0) { errno = BADCHAR; return ( 0 ); } if(maxPatLen > minTxtLen) { errno = LENGTH; return ( 0 ); } if ( in -> max_gap >= minTxtLen ) { errno = MAXGAP; return ( 0 ); } int err = -1; /* get the GPU id */ cl_platform_id gpu_id = get_gpu_id(&err); if(err) { errno = NOGPU; return ( 0 ); } /* get the device id */ cl_device_id dev_id = get_dev_id(gpu_id, &err); if(err) { errno = NOGPU; return ( 0 ); } /* create the context using dev_id */ cl_context context = create_context(dev_id, &err); if(err) { errno = GPUERROR; return ( 0 ); } /* create a list with the commands to be executed by GPU */ cl_command_queue cmd_queue = create_cmd_queue (dev_id, context, &err); if(err) { errno = GPUERROR; return ( 0 ); } /* create a kernel */ cl_kernel kernel; /* load the kernel ``kernel_dna.cl'' with name ``gapmis_kernel''*/ if(in->scoring_matrix==0) kernel = load_kernel ("kernel_dna.cl", "gapmis_kernel", dev_id, context, &err); else kernel = load_kernel ("kernel_pro.cl", "gapmis_kernel", dev_id, context, &err); if(err) { errno = KERNEL; return ( 0 ); } const unsigned int patGroupSize = 1; const unsigned int txtGroupSize = 768; unsigned int i, j; unsigned int patGroups = get_number_of_groups (pats, patGroupSize); unsigned int txtGroups = get_number_of_groups (txts, txtGroupSize); const char * groupPatterns[patGroupSize+1]; set_null (groupPatterns, patGroupSize+1); const char * groupTexts[txtGroupSize+1]; set_null (groupTexts, txtGroupSize+1); float * groupScores; groupScores = calloc (patGroupSize*txtGroupSize, sizeof(float) ); int groupMatch [patGroupSize]; float groupMatchScores [patGroupSize]; set_invalid(groupMatch,patGroupSize); set_minimum(groupMatchScores,patGroupSize); for(i=0;i<patGroups;i++) { set_null (groupPatterns, patGroupSize+1); initialize_pointers (groupPatterns,i,patGroupSize,p,pats); set_invalid(groupMatch,patGroupSize); set_minimum(groupMatchScores,patGroupSize); for(j=0;j<txtGroups;j++) { set_null (groupTexts, txtGroupSize+1); initialize_pointers (groupTexts,j,txtGroupSize,t,txts); if( ! ( kernel_launch (kernel, context, cmd_queue, groupPatterns, groupTexts, in, groupScores) )) return ( 0 ); update_group_match (groupScores,groupMatch,groupMatchScores,patGroupSize,txtGroupSize, pats, txts, i, j); } for(j=0;j<patGroupSize;j++) { if(i*patGroupSize+j<pats) { groupPatterns[0] = p[i*patGroupSize+j]; groupPatterns[1] = NULL; groupTexts[0] = t[groupMatch[j]]; groupTexts[1] = NULL; if( !( kernel_launch_l (kernel, context, cmd_queue, groupPatterns, groupTexts, in, groupScores,&out[i*patGroupSize+j] ) ) ) return ( 0 ); } } } free ( groupScores ); clReleaseContext ( context ); clReleaseCommandQueue ( cmd_queue ); clReleaseKernel(kernel); return ( 1 ); }
vector<PathData> M2MFstAligner::write_alignment(const VectorFst<LogArc> &ifst, int nbest) { //Generic alignment generator VectorFst<StdArc> fst; Map(ifst, &fst, LogToStdMapper()); for (StateIterator<VectorFst<StdArc> > siter(fst); !siter.Done(); siter.Next()) { StdArc::StateId q = siter.Value(); for (MutableArcIterator<VectorFst<StdArc> > aiter(&fst, q); !aiter.Done(); aiter.Next()) { //Prior to decoding we make several 'heuristic' modifications to the weights: // 1. A multiplier is applied to any multi-token substrings // 2. Any LogWeight::Zero() arc weights are reset to '99'. // We are basically resetting 'Infinity' values to a 'smallest non-Infinity' // so that the ShortestPath algorithm actually produces something no matter what. // 3. Any arcs that consist of subseq1:subseq2 being the same length and subseq1>1 // are set to '99' this forces shortestpath to choose arcs where one of the // following conditions holds true // * len(subseq1)>1 && len(subseq2)!=len(subseq1) // * len(subseq2)>1 && len(subseq1)!=len(subseq2) // * len(subseq1)==len(subseq2)==1 //I suspect these heuristics can be eliminated with a better choice of the initialization // function and maximization function, but this is the way that m2m-aligner works, so // it makes sense for our first cut implementation. //In any case, this guarantees that M2MFstAligner produces results identical to those // produced by m2m-aligner - but with a bit more reliability. //UPDATE: this now produces a better alignment than m2m-aligner. // The maxl heuristic is still in place. The aligner will produce *better* 1-best alignments // *without* the maxl heuristic below, BUT this comes at the cost of producing a less // flexible corpus. That is, for a small training corpus like nettalk, if we use the // best alignment we wind up with more 'chunks' and thus get a worse coverage for unseen // data. Using the aignment lattices to train the joint ngram model solves this problem. // Oh baby. Can't wait to for everyone to see the paper! //NOTE: this is going to fail if we encounter any alignments in a new test item that never // occurred in the original model. StdArc arc = aiter.Value(); int maxl = get_max_length(isyms->Find(arc.ilabel)); if (maxl == -1) { arc.weight = 999; } else { //Optionally penalize m-to-1 / 1-to-m links. This produces // WORSE 1-best alignments, but results in better joint n-gram // models for small training corpora when using only the 1-best // alignment. By further favoring 1-to-1 alignments the 1-best // alignment corpus results in a more flexible joint n-gram model // with regard to previously unseen data. //if( penalize==true ){ arc.weight = alignment_model[arc.ilabel].Value() * maxl; //}else{ //For larger corpora this is probably unnecessary. //arc.weight = alignment_model[arc.ilabel].Value(); //} } if (arc.weight == LogWeight::Zero()) arc.weight = 999; if (arc.weight != arc.weight) arc.weight = 999; aiter.SetValue(arc); } } VectorFst<StdArc> shortest; ShortestPath(fst, &shortest, nbest); RmEpsilon(&shortest); //Skip empty results. This should only happen // in the following situations: // 1. seq1_del=false && len(seq1)<len(seq2) // 2. seq2_del=false && len(seq1)>len(seq2) //In both 1.and 2. the issue is that we need to // insert a 'skip' in order to guarantee at least // one valid alignment path through seq1*seq2, but // user params didn't allow us to. //Probably better to insert these where necessary // during initialization, regardless of user prefs. if (shortest.NumStates() == 0) { vector<PathData> dummy; return dummy; } FstPathFinder pathfinder(skipSeqs); pathfinder.isyms = isyms; pathfinder.findAllStrings(shortest); return pathfinder.paths; }
/* update the momenta with the gauge force */ void QOP_symanzik_1loop_gauge_force(QOP_info_t *info, QOP_GaugeField *gauge, QOP_Force *force, QOP_gauge_coeffs_t *coeffs, Real eps) { register int i,dir; register site *st; su3_matrix tmat1; register Real eb3; /* Note: eps now includes eps*beta */ register su3_matrix* momentum; su3_matrix *staple, *tempmat1; /* lengths of various kinds of loops */ int *loop_length = get_loop_length(); /* number of rotations/reflections for each kind */ int *loop_num = get_loop_num(); /* table of directions, 1 for each kind of loop */ int ***loop_table = get_loop_table(); /* table of coefficients in action, for various "representations" (actually, powers of the trace) */ Real **loop_coeff = get_loop_coeff(); /* We make our own */ int max_length = get_max_length(); /* For Symanzik 1 loop! */ int nloop = get_nloop(); int nreps = get_nreps(); su3_matrix *forwardlink[4]; su3_matrix *tmpmom[4]; int nflop = 153004; /* For Symanzik1 action */ Real final_flop; double dtime; int j,k; int *dirs,length; int *path_dir,path_length; int ln,iloop; Real action,act2,new_term; int ncount; char myname[] = "imp_gauge_force"; dtime=-dclock(); info->status = QOP_FAIL; /* Parity requirements */ if(gauge->evenodd != QOP_EVENODD || force->evenodd != QOP_EVENODD ) { printf("QOP_asqtad_force: Bad parity gauge %d force %d\n", gauge->evenodd, force->evenodd); return; } /* Map field pointers to local static pointers */ FORALLUPDIR(dir){ forwardlink[dir] = gauge->g + dir*sites_on_node; tmpmom[dir] = force->f + dir*sites_on_node; } /* Check loop coefficients */ if(coeffs->plaquette != loop_coeff[0][0] || coeffs->rectangle != loop_coeff[1][0] || coeffs->parallelogram != loop_coeff[2][0]) { printf("%s(%d): Path coeffs don't match\n",myname,this_node); return; } /* Allocate arrays according to action */ dirs = (int *)malloc(max_length*sizeof(int)); if(dirs == NULL){ printf("%s(%d): Can't malloc dirs\n",myname,this_node); return; } path_dir = (int *)malloc(max_length*sizeof(int)); if(path_dir == NULL){ printf("%s(%d): Can't malloc path_dir\n",myname,this_node); return; } staple = (su3_matrix *)special_alloc(sites_on_node*sizeof(su3_matrix)); if(staple == NULL){ printf("%s(%d): Can't malloc temporary\n",myname,this_node); return; } tempmat1 = (su3_matrix *)special_alloc(sites_on_node*sizeof(su3_matrix)); if(tempmat1 == NULL){ printf("%s(%d): Can't malloc temporary\n",myname,this_node); return; } eb3 = eps/3.0; /* Loop over directions, update mom[dir] */ for(dir=XUP; dir<=TUP; dir++){ FORALLSITES(i,st)for(j=0;j<3;j++)for(k=0;k<3;k++){ staple[i].e[j][k]=cmplx(0.0,0.0); } END_LOOP ncount=0; for(iloop=0;iloop<nloop;iloop++){ length=loop_length[iloop]; for(ln=0;ln<loop_num[iloop];ln++){ /**printf("UPD: "); printpath( loop_table[iloop][ln], length );**/ /* set up dirs. we are looking at loop starting in "XUP" direction, rotate so it starts in "dir" direction. */ for(k=0;k<length;k++){ if( GOES_FORWARDS(loop_table[iloop][ln][k]) ){ dirs[k]=(dir+loop_table[iloop][ln][k] )% 4; } else { dirs[k]=OPP_DIR( (dir+OPP_DIR(loop_table[iloop][ln][k]))%4 ); } } path_length= length-1; /* generalized "staple" */ /* check for links in direction of momentum to be updated, each such link gives a contribution. Note the direction of the path - opposite the link. */ for(k=0;k<length;k++)if( dirs[k]==dir||dirs[k]==OPP_DIR(dir)) { if( GOES_FORWARDS(dirs[k]) ) for(j=0;j<path_length;j++) { path_dir[j] = dirs[(k+j+1)%length]; } if( GOES_BACKWARDS(dirs[k]) ) for(j=0;j<path_length;j++) { path_dir[path_length-1-j] = OPP_DIR(dirs[(k+j+1)%length]); } /**if(dir==XUP)printf("X_UPDATE PATH: "); printpath( path_dir, path_length );**/ path_product(path_dir,path_length, tempmat1); /* We took the path in the other direction from our old convention in order to get it to end up "at our site", so now take adjoint */ /* then compute "single_action" contribution to staple */ FORALLSITES(i,st){ su3_adjoint( &(tempmat1[i]), &tmat1 ); /* first we compute the fundamental term */ new_term = loop_coeff[iloop][0]; /* now we add in the higher representations */ if(nreps > 1){ node0_printf("WARNING: THIS CODE IS NOT TESTED\n"); exit(0); act2=1.0; action = 3.0 - realtrace_su3(forwardlink[dir]+i, &tmat1 ); for(j=1;j<nreps;j++){ act2 *= action; new_term += loop_coeff[iloop][j]*act2*(Real)(j+1); } } /* end if nreps > 1 */ scalar_mult_add_su3_matrix( &(staple[i]), &tmat1, new_term, &(staple[i]) ); } END_LOOP ncount++; } /* k (location in path) */ } /* ln */ } /* iloop */ /* Now multiply the staple sum by the link, then update momentum */ FORALLSITES(i,st){ mult_su3_na( forwardlink[dir]+i, &(staple[i]), &tmat1 ); momentum = tmpmom[dir] + i; scalar_mult_sub_su3_matrix( momentum, &tmat1, eb3, momentum ); } END_LOOP
/* update the momenta with the gauge force */ void imp_gauge_force_cpu( Real eps, field_offset mom_off ){ register int i,dir; register site *st; su3_matrix tmat1,tmat2; register Real eb3; register anti_hermitmat* momentum; su3_matrix *staple, *tempmat1; /* lengths of various kinds of loops */ int *loop_length = get_loop_length(); /* number of rotations/reflections for each kind */ int *loop_num = get_loop_num(); /* table of directions, 1 for each kind of loop */ int ***loop_table = get_loop_table(); /* table of coefficients in action, for various "representations" (actually, powers of the trace) */ Real **loop_coeff = get_loop_coeff(); int max_length = get_max_length(); int nloop = get_nloop(); int nreps = get_nreps(); #ifdef GFTIME int nflop = 153004; /* For Symanzik1 action */ double dtime; #endif int j,k; int *dirs,length; int *path_dir,path_length; int ln,iloop; Real action,act2,new_term; int ncount; char myname[] = "imp_gauge_force"; #ifdef GFTIME dtime=-dclock(); #endif dirs = (int *)malloc(max_length*sizeof(int)); if(dirs == NULL){ printf("%s(%d): Can't malloc dirs\n",myname,this_node); terminate(1); } path_dir = (int *)malloc(max_length*sizeof(int)); if(path_dir == NULL){ printf("%s(%d): Can't malloc path_dir\n",myname,this_node); terminate(1); } staple = (su3_matrix *)special_alloc(sites_on_node*sizeof(su3_matrix)); if(staple == NULL){ printf("%s(%d): Can't malloc temporary\n",myname,this_node); terminate(1); } tempmat1 = (su3_matrix *)special_alloc(sites_on_node*sizeof(su3_matrix)); if(tempmat1 == NULL){ printf("%s(%d): Can't malloc temporary\n",myname,this_node); terminate(1); } eb3 = eps*beta/3.0; /* Loop over directions, update mom[dir] */ for(dir=XUP; dir<=TUP; dir++){ FORALLSITES(i,st)for(j=0;j<3;j++)for(k=0;k<3;k++){ staple[i].e[j][k]=cmplx(0.0,0.0); } END_LOOP ncount=0; for(iloop=0;iloop<nloop;iloop++){ length=loop_length[iloop]; for(ln=0;ln<loop_num[iloop];ln++){ /**printf("UPD: "); printpath( loop_table[iloop][ln], length );**/ /* set up dirs. we are looking at loop starting in "XUP" direction, rotate so it starts in "dir" direction. */ for(k=0;k<length;k++){ if( GOES_FORWARDS(loop_table[iloop][ln][k]) ){ dirs[k]=(dir+loop_table[iloop][ln][k] )% 4; } else { dirs[k]=OPP_DIR( (dir+OPP_DIR(loop_table[iloop][ln][k]))%4 ); } } path_length= length-1; /* generalized "staple" */ /* check for links in direction of momentum to be updated, each such link gives a contribution. Note the direction of the path - opposite the link. */ for(k=0;k<length;k++)if( dirs[k]==dir||dirs[k]==OPP_DIR(dir)) { if( GOES_FORWARDS(dirs[k]) ) for(j=0;j<path_length;j++) { path_dir[j] = dirs[(k+j+1)%length]; } if( GOES_BACKWARDS(dirs[k]) ) for(j=0;j<path_length;j++) { path_dir[path_length-1-j] = OPP_DIR(dirs[(k+j+1)%length]); } /**if(dir==XUP)printf("X_UPDATE PATH: "); printpath( path_dir, path_length );**/ path_product(path_dir,path_length, tempmat1); /* We took the path in the other direction from our old convention in order to get it to end up "at our site", so now take adjoint */ /* then compute "single_action" contribution to staple */ FORALLSITES(i,st){ su3_adjoint( &(tempmat1[i]), &tmat1 ); /* first we compute the fundamental term */ new_term = loop_coeff[iloop][0]; /* now we add in the higher representations */ if(nreps > 1){ node0_printf("WARNING: THIS CODE IS NOT TESTED\n"); exit(0); act2=1.0; action = 3.0 - realtrace_su3(&(st->link[dir]), &tmat1 ); for(j=1;j<nreps;j++){ act2 *= action; new_term += loop_coeff[iloop][j]*act2*(Real)(j+1); } } /* end if nreps > 1 */ scalar_mult_add_su3_matrix( &(staple[i]), &tmat1, new_term, &(staple[i]) ); } END_LOOP ncount++; } /* k (location in path) */ } /* ln */ } /* iloop */ /* Now multiply the staple sum by the link, then update momentum */ FORALLSITES(i,st){ mult_su3_na( &(st->link[dir]), &(staple[i]), &tmat1 ); momentum = (anti_hermitmat *)F_PT(st,mom_off); uncompress_anti_hermitian( &momentum[dir], &tmat2 ); scalar_mult_sub_su3_matrix( &tmat2, &tmat1, eb3, &(staple[i]) ); make_anti_hermitian( &(staple[i]), &momentum[dir] ); } END_LOOP
void Arm::solve(Point3f goal_point, int life_count) { // prev and curr are for use of halving // last is making sure the iteration gets a better solution than the last iteration, // otherwise revert changes float prev_err, curr_err, last_err = 9999; Point3f current_point; int max_iterations = 200; int count = 0; float err_margin = 0.01; goal_point -= base; if (goal_point.norm() > get_max_length()) { goal_point = goal_point.normalized() * get_max_length(); } current_point = calculate_end_effector(); // save the first err prev_err = (goal_point - current_point).norm(); curr_err = prev_err; last_err = curr_err; // while the current point is close enough, stop iterating while (curr_err > err_margin) { // calculate the difference between the goal_point and current_point Vector3f dP = goal_point - current_point; // create the jacovian int segment_size = segments.size(); // build the transpose matrix (easier for eigen matrix construction) MatrixXf jac_t(3*segment_size, 3); for(int i=0; i<3*segment_size; i+=3) { Matrix<float, 1, 3> row_theta = compute_jacovian_segment(i/3, goal_point, segments[i/3]->get_right()); Matrix<float, 1, 3> row_phi = compute_jacovian_segment(i/3, goal_point, segments[i/3]->get_up()); Matrix<float, 1, 3> row_z = compute_jacovian_segment(i/3, goal_point, segments[i/3]->get_z()); jac_t(i, 0) = row_theta(0, 0); jac_t(i, 1) = row_theta(0, 1); jac_t(i, 2) = row_theta(0, 2); jac_t(i+1, 0) = row_phi(0, 0); jac_t(i+1, 1) = row_phi(0, 1); jac_t(i+1, 2) = row_phi(0, 2); jac_t(i+2, 0) = row_z(0, 0); jac_t(i+2, 1) = row_z(0, 1); jac_t(i+2, 2) = row_z(0, 2); } // compute the final jacovian MatrixXf jac(3, 3*segment_size); jac = jac_t.transpose(); Matrix<float, Dynamic, Dynamic> pseudo_ijac; MatrixXf pinv_jac(3*segment_size, 3); pinv_jac = pseudoInverse(jac); Matrix<float, Dynamic, 1> changes = pinv_jac * dP; cout << "changes: " << changes << endl; for(int i=0; i<3*segment_size; i+=3) { // save the current transformation on the segments segments[i/3]->save_transformation(); // apply the change to the theta angle segments[i/3]->apply_angle_change(changes[i], segments[i/3]->get_right()); // apply the change to the phi angle segments[i/3]->apply_angle_change(changes[i+1], segments[i/3]->get_up()); // apply the change to the z angle segments[i/3]->apply_angle_change(changes[i+2], segments[i/3]->get_z()); } // compute current_point after making changes current_point = calculate_end_effector(); //cout << "current_point: " << vectorString(current_point) << endl; //cout << "goal_point: " << vectorString(goal_point) << endl; prev_err = curr_err; curr_err = (goal_point - current_point).norm(); int halving_count = 0; cout << "curr err: " << curr_err << " || prev err: " << prev_err << " || last err: " << last_err << endl; // make sure we aren't iterating past the solution while (curr_err > last_err) { // undo changes for(int i=0; i<segment_size; i++) { // unapply the change to the saved angle segments[i]->load_transformation(); } current_point = calculate_end_effector(); changes *= 0.5; // reapply halved changes for(int i=0; i<3*segment_size; i+=3) { // save the current transformation on the segments segments[i/3]->save_transformation(); // apply the change to the theta angle segments[i/3]->apply_angle_change(changes[i], segments[i/3]->get_right()); // apply the change to the phi angle segments[i/3]->apply_angle_change(changes[i+1], segments[i/3]->get_up()); // apply the change to the z angle segments[i/3]->apply_angle_change(changes[i+2], segments[i/3]->get_z()); } // compute the end_effector and measure error current_point = calculate_end_effector(); prev_err = curr_err; curr_err = (goal_point - current_point).norm(); cout << "|half| curr err: " << curr_err << " || prev err: " << prev_err << endl; halving_count++; if (halving_count > 100) break; } if (curr_err > last_err) { // undo changes for(int i=0; i<segment_size; i++) { // unapply the change to the saved angle segments[i]->load_last_transformation(); } current_point = calculate_end_effector(); curr_err = (goal_point - current_point).norm(); cout << "curr iteration not better than last, reverting" << endl; cout << "curr err: " << curr_err << " || last err: " << last_err << endl; break; } for(int i=0; i<segment_size; i++) { // unapply the change to the saved angle segments[i]->save_last_transformation(); } cout << "curr err: " << curr_err << " || last err: " << last_err << endl; last_err = curr_err; cout << "last_err is now : " << last_err << endl; // make sure we don't infinite loop count++; if (count > max_iterations) { break; } } /* // if we haven't gotten to a nice solution if (curr_err > err_margin) { // kill off infinitely recursive solutions if (life_count <= 0) { return; } // try to solve it again solve(goal_point, life_count-1); } else { */ cout << "final error: " << curr_err << endl; }