Exemplo n.º 1
0
Arquivo: pager.c Projeto: tamentis/mdp
/*
 * Display all the results.
 *
 * This function assumes curses is initialized.
 */
static void
refresh_listing(void)
{
	int top_offset, left_offset;
	unsigned int len = results_visible_length();
	struct result *result;

	if (len >= window_height || len >= RESULTS_MAX_LEN) {
		wmove(screen, window_height / 2,
				(window_width - sizeof(MSG_TOO_MANY) - 1) / 2);
		waddstr(screen, MSG_TOO_MANY);
		refresh();
		return;
	}

	top_offset = (window_height - len) / 2;
	if (window_width < get_max_length()) {
		left_offset = 0;
	} else {
		left_offset = (window_width - get_max_length()) / 2;
	}

	/*
	 * Place the lines on screen. Since curses will automatically wrap
	 * longer lines, we need to force a new-line on lines following them.
	 */
	for (unsigned int i = 0; i < ARRAY_LENGTH(&results); i++) {
		result = ARRAY_ITEM(&results, i);

		if (!result->visible)
			continue;

		wmove(screen, top_offset, left_offset);
		waddstr(screen, result->mbs_value);

		if (result->wcs_len > window_width) {
			top_offset += (result->wcs_len / window_width);
		}

		top_offset++;
	}

	refresh();
}
Exemplo n.º 2
0
unsigned int kernel_launch (cl_kernel kernel, cl_context context, cl_command_queue cmd_queue, const char ** p, const char ** t, const struct gapmis_params * in, float * scores)
{
        int error=1;
	unsigned int	pats = get_number_of_sequences (p);
	unsigned int	txts = get_number_of_sequences (t);

	unsigned int	maxTxtLen = get_max_length (txts, t);
	unsigned int	maxPatLen = get_max_length (pats, p);
	unsigned int	pBlockSize = get_pblock_size (txts,32); 
	unsigned int 	hproVecLen = pats * pBlockSize * (maxTxtLen + 1);
	unsigned int 	dproVecLen = pats * pBlockSize * (maxPatLen + 1);
	
	unsigned int * txtsVec = calloc(maxTxtLen*pBlockSize, sizeof(unsigned int));
	unsigned int * patsVec = calloc(maxPatLen*pats, sizeof(unsigned int));
	         int * argsVec = malloc(sizeof(int)*(pats+7));		
		 int * txtsLenVec = calloc(pBlockSize,sizeof(int));
	       float * pensVec = malloc(sizeof(float)*2);
	         int * hproVec = calloc(hproVecLen,sizeof(int));
	         int * dproVec = calloc(dproVecLen,sizeof(int));

	cl_int err;	

	if(patsVec==NULL   || txtsVec==NULL      || argsVec==NULL || 
           pensVec == NULL || txtsLenVec == NULL || hproVec==NULL || 
	   dproVec==NULL)
	{	
	 	errno = MALLOC;
      		return ( 0 );
	}


	fill_txtsVec (txts, pBlockSize, t, txtsVec, in->scoring_matrix);
	fill_patsVec (pats, maxPatLen, p, patsVec, in->scoring_matrix);
	fill_argsVec (pats, txts, p, in->max_gap, pBlockSize, maxPatLen, maxTxtLen, argsVec);
	fill_txtsLenVec (txts, t, txtsLenVec);

	pensVec[0] = - in -> gap_open_pen;
	pensVec[1] = - in -> gap_extend_pen;	
	
	/* GPU malloc */
	cl_mem txtsVec_device = malloc_device (context, (maxTxtLen*pBlockSize)*sizeof(unsigned int), &error);
	if(error)
	{
	 	errno = GPUMALLOC;
      		return ( 0 );	
	}
	
	/* copy from CPU to GPU mem */
	init_device_mem_uint (context, cmd_queue, txtsVec_device, txtsVec, maxTxtLen*pBlockSize, &error);
	if(error)
	{
	 	errno = GPUMALLOC;
      		return ( 0 );	
	}
	
	cl_mem patsVec_device = malloc_device (context, (maxPatLen*pats)*sizeof(unsigned int), &error);
	if(error)
	{
	 	errno = GPUMALLOC;
      		return ( 0 );	
	}

	init_device_mem_uint (context, cmd_queue, patsVec_device, patsVec,maxPatLen*pats, &error);
	if(error)
	{
	 	errno = GPUMALLOC;
      		return ( 0 );	
	}

	cl_mem argsVec_device = malloc_device (context, (pats+7)*sizeof(int), &error);
	if(error)
	{
	 	errno = GPUMALLOC;
      		return ( 0 );	
	}

	init_device_mem_int (context, cmd_queue, argsVec_device, argsVec, pats+7, &error);
	if(error)
	{
	 	errno = GPUMALLOC;
      		return ( 0 );	
	}

	cl_mem txtsLenVec_device = malloc_device (context, pBlockSize*sizeof(int), &error);
	if(error)
	{
	 	errno = GPUMALLOC;
      		return ( 0 );	
	}

	init_device_mem_int (context, cmd_queue, txtsLenVec_device, txtsLenVec, pBlockSize, &error);
	if(error)
	{
	 	errno = GPUMALLOC;
      		return ( 0 );	
	}

	cl_mem pensVec_device = malloc_device (context, 2*sizeof(float), &error);
	if(error)
	{
	 	errno = GPUMALLOC;
      		return ( 0 );	
	}

	init_device_mem_float (context, cmd_queue, pensVec_device, pensVec, 2, &error);
	if(error)
	{
	 	errno = GPUMALLOC;
      		return ( 0 );	
	}

	cl_mem hproVec_device = malloc_device (context, hproVecLen*sizeof(int), &error);
	if(error)
	{
	 	errno = GPUMALLOC;
      		return ( 0 );	
	}

	init_device_mem_int (context, cmd_queue, hproVec_device, hproVec, hproVecLen, &error);
	if(error)
	{
	 	errno = GPUMALLOC;
      		return ( 0 );	
	}

	cl_mem dproVec_device = malloc_device (context, dproVecLen*sizeof(int), &error);
	if(error)
	{
	 	errno = GPUMALLOC;
      		return ( 0 );	
	}

	init_device_mem_int (context, cmd_queue, dproVec_device, dproVec, dproVecLen, &error);
	if(error)
	{
	 	errno = GPUMALLOC;
      		return ( 0 );	
	}

	cl_mem scrsVec_device = malloc_device (context, (pats*pBlockSize)*sizeof(float), &error);
	if(error)
	{
	 	errno = GPUMALLOC;
      		return ( 0 );	
	}

	err = clFinish(cmd_queue);
	if(err != CL_SUCCESS)
	{
	 	errno = GPUERROR;
      		return ( 0 );	
	}

	/* connect the input arguments of the kernel with the corresponding mem */
	set_kernel_arguments (kernel, cmd_queue, patsVec_device, txtsVec_device, argsVec_device, txtsLenVec_device, pensVec_device, hproVec_device, dproVec_device, scrsVec_device);

	/* synchronisation */
	err = clFinish(cmd_queue);
	if(err != CL_SUCCESS)
	{
	 	errno = GPUERROR;
      		return ( 0 );	
	}	


	/* WorkSizeGlobal is the total number of threads of the device*/
	size_t WorkSizeGlobal[] = {pBlockSize * pats};
	/* WorkSizeLocal is the number of threads per group*/
	size_t WorkSizeLocal[] = {pBlockSize};

	/* kernel enters the command queue using WorkSizeGlobal and WorkSizeLocal */
	err = clEnqueueNDRangeKernel(cmd_queue, kernel, 1, NULL, WorkSizeGlobal, WorkSizeLocal, 0, NULL, NULL);
	if(error)
	{
	 	errno = KERNEL;
      		return ( 0 );	
	}

	/* finalise the kernel */
	err = clFinish(cmd_queue);
	if(err != CL_SUCCESS)
	{
	 	errno = GPUERROR;
      		return ( 0 );	
	}	

	/* return the results from the GPU to the CPU */
	read_device_mem_float (cmd_queue, pats*pBlockSize, scores, scrsVec_device, &error);
	if(error)
	{
	 	errno = GPUMALLOC;
      		return ( 0 );	
	}

	/* deallocation */
	free (txtsVec);
	free (patsVec);
	free (argsVec);
	free (txtsLenVec);
	free (pensVec);
	free (hproVec);
	free (dproVec);

	clReleaseMemObject(patsVec_device);
	clReleaseMemObject(txtsVec_device);
	clReleaseMemObject(argsVec_device);
	clReleaseMemObject(txtsLenVec_device);
	clReleaseMemObject(pensVec_device);
	clReleaseMemObject(hproVec_device);
	clReleaseMemObject(dproVec_device);
	clReleaseMemObject(scrsVec_device);

	return ( 1 );
}
Exemplo n.º 3
0
unsigned int gapmis_one_to_many_opt_gpu ( const char * p1, const char ** t, const struct gapmis_params * in, struct gapmis_align * out )
{
	const char * p[] = { p1, NULL};

	if ( in -> scoring_matrix > 1 )
	{
		errno = MATRIX;
		return ( 0 );
	}

	unsigned int 	pats = get_number_of_sequences (p);
	unsigned int 	txts = get_number_of_sequences (t);
	unsigned int	maxPatLen = get_max_length (pats, p);
	unsigned int	minTxtLen = get_min_length (txts, t);

	if (check_sequences(pats,p,in->scoring_matrix)==0)
	{
		errno = BADCHAR;
      		return ( 0 );
	}

	if (check_sequences(txts,t,in->scoring_matrix)==0)
	{
		errno = BADCHAR;
      		return ( 0 );
	}

	if(maxPatLen > minTxtLen)
	{
		errno = LENGTH;
      		return ( 0 );
	}

	if ( in -> max_gap >= minTxtLen )
	{
		errno = MAXGAP; 
		return ( 0 );
	}

	int err = -1;

	/* get the GPU id */
	cl_platform_id gpu_id = get_gpu_id(&err);	
	if(err)
	{	
	 	errno = NOGPU;
      		return ( 0 );
	}

        /* get the device id */
	cl_device_id dev_id = get_dev_id(gpu_id, &err);
	if(err)
	{	
	 	errno = NOGPU;
      		return ( 0 );
	}

	/* create the context using dev_id */
	cl_context context = create_context(dev_id, &err);
	if(err)
	{	
	 	errno = GPUERROR;
      		return ( 0 );
	}

	/* create a list with the commands to be executed by GPU */
	cl_command_queue cmd_queue = create_cmd_queue (dev_id, context, &err);
	if(err)
	{	
	 	errno = GPUERROR;
      		return ( 0 );
	}

	/* create a kernel */
	cl_kernel kernel;

	/* load the kernel ``kernel_dna.cl'' with name ``gapmis_kernel''*/
	if(in->scoring_matrix==0)
		kernel = load_kernel ("kernel_dna.cl", "gapmis_kernel", dev_id, context, &err);
	else
		kernel = load_kernel ("kernel_pro.cl", "gapmis_kernel", dev_id, context, &err);

	if(err)
	{	
	 	errno = KERNEL;
      		return ( 0 );
	}

	const unsigned int patGroupSize = 1;
	const unsigned int txtGroupSize = 768;
	unsigned int i, j;	
	unsigned int patGroups = get_number_of_groups (pats, patGroupSize);
	unsigned int txtGroups = get_number_of_groups (txts, txtGroupSize);	

	const char * groupPatterns[patGroupSize+1];
	set_null (groupPatterns, patGroupSize+1);

	const char * groupTexts[txtGroupSize+1];
	set_null (groupTexts, txtGroupSize+1);

	float * groupScores;
        groupScores = calloc (patGroupSize*txtGroupSize, sizeof(float) );

	int groupMatch [patGroupSize];
	float groupMatchScores [patGroupSize];
	set_invalid(groupMatch,patGroupSize);
	set_minimum(groupMatchScores,patGroupSize);	

	for(i=0;i<patGroups;i++)
	{
		set_null (groupPatterns, patGroupSize+1);
		initialize_pointers (groupPatterns,i,patGroupSize,p,pats);
		set_invalid(groupMatch,patGroupSize);
		set_minimum(groupMatchScores,patGroupSize);
		
		for(j=0;j<txtGroups;j++)
		{			
			set_null (groupTexts, txtGroupSize+1);
			initialize_pointers (groupTexts,j,txtGroupSize,t,txts);

			if( ! ( kernel_launch (kernel, context, cmd_queue, groupPatterns, groupTexts, in, groupScores) ))
				return ( 0 );			

			update_group_match (groupScores,groupMatch,groupMatchScores,patGroupSize,txtGroupSize, pats, txts, i, j);
		
		}

		for(j=0;j<patGroupSize;j++)
		{
			if(i*patGroupSize+j<pats)
			{
				groupPatterns[0] = p[i*patGroupSize+j];
				groupPatterns[1] = NULL;

				groupTexts[0] = t[groupMatch[j]];
				groupTexts[1] = NULL;
				
				if( !( kernel_launch_l (kernel, context, cmd_queue, groupPatterns, groupTexts, in, groupScores,&out[i*patGroupSize+j] ) ) )
					return ( 0 );				
			}
		}
	}

        free ( groupScores );
        clReleaseContext ( context );
	clReleaseCommandQueue ( cmd_queue );
        clReleaseKernel(kernel);

	return ( 1 );
 }
Exemplo n.º 4
0
vector<PathData> M2MFstAligner::write_alignment(const VectorFst<LogArc> &ifst,
        int nbest)
{
    //Generic alignment generator
    VectorFst<StdArc> fst;
    Map(ifst, &fst, LogToStdMapper());

    for (StateIterator<VectorFst<StdArc> > siter(fst); !siter.Done();
            siter.Next()) {
        StdArc::StateId q = siter.Value();
        for (MutableArcIterator<VectorFst<StdArc> > aiter(&fst, q);
                !aiter.Done(); aiter.Next()) {
            //Prior to decoding we make several 'heuristic' modifications to the weights:
            // 1. A multiplier is applied to any multi-token substrings
            // 2. Any LogWeight::Zero() arc weights are reset to '99'.
            //    We are basically resetting 'Infinity' values to a 'smallest non-Infinity'
            //     so that the ShortestPath algorithm actually produces something no matter what.
            // 3. Any arcs that consist of subseq1:subseq2 being the same length and subseq1>1
            //       are set to '99' this forces shortestpath to choose arcs where one of the
            //       following conditions holds true
            //      * len(subseq1)>1 && len(subseq2)!=len(subseq1)
            //      * len(subseq2)>1 && len(subseq1)!=len(subseq2)
            //      * len(subseq1)==len(subseq2)==1
            //I suspect these heuristics can be eliminated with a better choice of the initialization
            // function and maximization function, but this is the way that m2m-aligner works, so
            // it makes sense for our first cut implementation.
            //In any case, this guarantees that M2MFstAligner produces results identical to those
            // produced by m2m-aligner - but with a bit more reliability.
            //UPDATE: this now produces a better alignment than m2m-aligner.
            //  The maxl heuristic is still in place.  The aligner will produce *better* 1-best alignments
            //  *without* the maxl heuristic below, BUT this comes at the cost of producing a less
            //  flexible corpus.  That is, for a small training corpus like nettalk, if we use the
            //  best alignment we wind up with more 'chunks' and thus get a worse coverage for unseen
            //  data.  Using the aignment lattices to train the joint ngram model solves this problem.
            //  Oh baby.  Can't wait to for everyone to see the paper!
            //NOTE: this is going to fail if we encounter any alignments in a new test item that never
            // occurred in the original model.
            StdArc
            arc = aiter.Value();
            int
            maxl = get_max_length(isyms->Find(arc.ilabel));
            if (maxl == -1) {
                arc.weight = 999;
            }
            else {
                //Optionally penalize m-to-1 / 1-to-m links.  This produces
                // WORSE 1-best alignments, but results in better joint n-gram
                // models for small training corpora when using only the 1-best
                // alignment.  By further favoring 1-to-1 alignments the 1-best
                // alignment corpus results in a more flexible joint n-gram model
                // with regard to previously unseen data.
                //if( penalize==true ){
                arc.weight = alignment_model[arc.ilabel].Value() * maxl;
                //}else{
                //For larger corpora this is probably unnecessary.
                //arc.weight = alignment_model[arc.ilabel].Value();
                //}
            }
            if (arc.weight == LogWeight::Zero())
                arc.weight = 999;
            if (arc.weight != arc.weight)
                arc.weight = 999;
            aiter.SetValue(arc);
        }
    }

    VectorFst<StdArc> shortest;
    ShortestPath(fst, &shortest, nbest);
    RmEpsilon(&shortest);
    //Skip empty results.  This should only happen
    // in the following situations:
    //  1. seq1_del=false && len(seq1)<len(seq2)
    //  2. seq2_del=false && len(seq1)>len(seq2)
    //In both 1.and 2. the issue is that we need to
    // insert a 'skip' in order to guarantee at least
    // one valid alignment path through seq1*seq2, but
    // user params didn't allow us to.
    //Probably better to insert these where necessary
    // during initialization, regardless of user prefs.
    if (shortest.NumStates() == 0) {
        vector<PathData> dummy;
        return dummy;
    }
    FstPathFinder
    pathfinder(skipSeqs);
    pathfinder.isyms = isyms;
    pathfinder.findAllStrings(shortest);
    return pathfinder.paths;
}
Exemplo n.º 5
0
/* update the momenta with the gauge force */
void QOP_symanzik_1loop_gauge_force(QOP_info_t *info, QOP_GaugeField *gauge, 
		    QOP_Force *force, QOP_gauge_coeffs_t *coeffs, Real eps)
{
    register int i,dir;
    register site *st;
    su3_matrix tmat1;
    register Real eb3;    /* Note: eps now includes eps*beta */
    register su3_matrix* momentum;
    su3_matrix *staple, *tempmat1;

    /* lengths of various kinds of loops */
    int *loop_length = get_loop_length();
    /* number of rotations/reflections  for each kind */
    int *loop_num = get_loop_num();
    /* table of directions, 1 for each kind of loop */
    int ***loop_table = get_loop_table();
    /* table of coefficients in action, for various "representations"
	(actually, powers of the trace) */
    Real **loop_coeff = get_loop_coeff(); /* We make our own */
    int max_length = get_max_length(); /* For Symanzik 1 loop! */
    int nloop = get_nloop();
    int nreps = get_nreps();
    su3_matrix *forwardlink[4];
    su3_matrix *tmpmom[4];

    int nflop = 153004;  /* For Symanzik1 action */
    Real final_flop;
    double dtime;
    int j,k;
    int *dirs,length;
    int *path_dir,path_length;

    int ln,iloop;
    Real action,act2,new_term;

    int ncount;
    char myname[] = "imp_gauge_force";

    dtime=-dclock();

    info->status = QOP_FAIL;

    /* Parity requirements */
    if(gauge->evenodd != QOP_EVENODD ||
       force->evenodd != QOP_EVENODD
       )
      {
	printf("QOP_asqtad_force: Bad parity gauge %d force %d\n",
	       gauge->evenodd, force->evenodd);
	return;
      }

    /* Map field pointers to local static pointers */
    
    FORALLUPDIR(dir){
      forwardlink[dir] = gauge->g + dir*sites_on_node;
      tmpmom[dir]  = force->f + dir*sites_on_node;
    }
    /* Check loop coefficients */

    if(coeffs->plaquette != loop_coeff[0][0] ||
       coeffs->rectangle != loop_coeff[1][0] ||
       coeffs->parallelogram != loop_coeff[2][0])
      {
	printf("%s(%d): Path coeffs don't match\n",myname,this_node);
	return;
      }

    /* Allocate arrays according to action */
    dirs = (int *)malloc(max_length*sizeof(int));
    if(dirs == NULL){
      printf("%s(%d): Can't malloc dirs\n",myname,this_node);
      return;
    }

    path_dir = (int *)malloc(max_length*sizeof(int));
    if(path_dir == NULL){
      printf("%s(%d): Can't malloc path_dir\n",myname,this_node);
      return;
    }
    staple = (su3_matrix *)special_alloc(sites_on_node*sizeof(su3_matrix));
    if(staple == NULL){
      printf("%s(%d): Can't malloc temporary\n",myname,this_node);
      return;
    }

    tempmat1 = (su3_matrix *)special_alloc(sites_on_node*sizeof(su3_matrix));
    if(tempmat1 == NULL){
      printf("%s(%d): Can't malloc temporary\n",myname,this_node);
      return;
    }

    eb3 = eps/3.0;

    /* Loop over directions, update mom[dir] */
    for(dir=XUP; dir<=TUP; dir++){

	FORALLSITES(i,st)for(j=0;j<3;j++)for(k=0;k<3;k++){
			staple[i].e[j][k]=cmplx(0.0,0.0);
	} END_LOOP

	ncount=0;
	for(iloop=0;iloop<nloop;iloop++){
	    length=loop_length[iloop];
	    for(ln=0;ln<loop_num[iloop];ln++){
/**printf("UPD:  "); printpath( loop_table[iloop][ln], length );**/
		/* set up dirs.  we are looking at loop starting in "XUP"
		   direction, rotate so it starts in "dir" direction. */
		for(k=0;k<length;k++){
                    if( GOES_FORWARDS(loop_table[iloop][ln][k]) ){
                	dirs[k]=(dir+loop_table[iloop][ln][k] )% 4;
		    }
            	    else {
                        dirs[k]=OPP_DIR(
			    (dir+OPP_DIR(loop_table[iloop][ln][k]))%4 );
		    }
		}

		path_length= length-1;  /* generalized "staple" */

		/* check for links in direction of momentum to be
		   updated, each such link gives a contribution. Note
		   the direction of the path - opposite the link. */
		for(k=0;k<length;k++)if( dirs[k]==dir||dirs[k]==OPP_DIR(dir)) {
		    if( GOES_FORWARDS(dirs[k]) ) for(j=0;j<path_length;j++) {
			path_dir[j] = dirs[(k+j+1)%length];
		    }
		    if( GOES_BACKWARDS(dirs[k]) ) for(j=0;j<path_length;j++) {
			path_dir[path_length-1-j] =
			    OPP_DIR(dirs[(k+j+1)%length]);
		    }
/**if(dir==XUP)printf("X_UPDATE PATH: "); printpath( path_dir, path_length );**/
		    path_product(path_dir,path_length, tempmat1);

		    /* We took the path in the other direction from our
			old convention in order to get it to end up
			"at our site", so now take adjoint */
		    /* then compute "single_action" contribution to
			staple */
		    FORALLSITES(i,st){
			su3_adjoint( &(tempmat1[i]), &tmat1 );
			/* first we compute the fundamental term */
			new_term = loop_coeff[iloop][0];

			/* now we add in the higher representations */
			if(nreps > 1){
node0_printf("WARNING: THIS CODE IS NOT TESTED\n"); exit(0);
			    act2=1.0;
			    action = 3.0 - realtrace_su3(forwardlink[dir]+i,
			      &tmat1 ); 

			    for(j=1;j<nreps;j++){
				act2 *= action;
				new_term +=
				    loop_coeff[iloop][j]*act2*(Real)(j+1);
			    }
			}  /* end if nreps > 1 */

			scalar_mult_add_su3_matrix( &(staple[i]), &tmat1,
				new_term, &(staple[i]) );

		    } END_LOOP

		    ncount++;

		} /* k (location in path) */
	    } /* ln */
	} /* iloop */

	/* Now multiply the staple sum by the link, then update momentum */
	FORALLSITES(i,st){
	    mult_su3_na( forwardlink[dir]+i, &(staple[i]), &tmat1 );
	    momentum = tmpmom[dir] + i;
	    scalar_mult_sub_su3_matrix( momentum, &tmat1,
		eb3, momentum );
	} END_LOOP
Exemplo n.º 6
0
/* update the momenta with the gauge force */
void imp_gauge_force_cpu( Real eps, field_offset mom_off ){
    register int i,dir;
    register site *st;
    su3_matrix tmat1,tmat2;
    register Real eb3;
    register anti_hermitmat* momentum;
    su3_matrix *staple, *tempmat1;

    /* lengths of various kinds of loops */
    int *loop_length = get_loop_length();
    /* number of rotations/reflections  for each kind */
    int *loop_num = get_loop_num();
    /* table of directions, 1 for each kind of loop */
    int ***loop_table = get_loop_table();
    /* table of coefficients in action, for various "representations"
	(actually, powers of the trace) */
    Real **loop_coeff = get_loop_coeff();
    int max_length = get_max_length();
    int nloop = get_nloop();
    int nreps = get_nreps();

#ifdef GFTIME
    int nflop = 153004;  /* For Symanzik1 action */
    double dtime;
#endif
    int j,k;
    int *dirs,length;
    int *path_dir,path_length;

    int ln,iloop;
    Real action,act2,new_term;

    int ncount;
    char myname[] = "imp_gauge_force";

#ifdef GFTIME
    dtime=-dclock();
#endif

    dirs = (int *)malloc(max_length*sizeof(int));
    if(dirs == NULL){
      printf("%s(%d): Can't malloc dirs\n",myname,this_node);
      terminate(1);
    }
    path_dir = (int *)malloc(max_length*sizeof(int));
    if(path_dir == NULL){
      printf("%s(%d): Can't malloc path_dir\n",myname,this_node);
      terminate(1);
    }
    staple = (su3_matrix *)special_alloc(sites_on_node*sizeof(su3_matrix));
    if(staple == NULL){
      printf("%s(%d): Can't malloc temporary\n",myname,this_node);
      terminate(1);
    }

    tempmat1 = (su3_matrix *)special_alloc(sites_on_node*sizeof(su3_matrix));
    if(tempmat1 == NULL){
      printf("%s(%d): Can't malloc temporary\n",myname,this_node);
      terminate(1);
    }

    eb3 = eps*beta/3.0;

    /* Loop over directions, update mom[dir] */
    for(dir=XUP; dir<=TUP; dir++){

	FORALLSITES(i,st)for(j=0;j<3;j++)for(k=0;k<3;k++){
			staple[i].e[j][k]=cmplx(0.0,0.0);
	} END_LOOP

	ncount=0;
	for(iloop=0;iloop<nloop;iloop++){
	    length=loop_length[iloop];
	    for(ln=0;ln<loop_num[iloop];ln++){
/**printf("UPD:  "); printpath( loop_table[iloop][ln], length );**/
		/* set up dirs.  we are looking at loop starting in "XUP"
		   direction, rotate so it starts in "dir" direction. */
		for(k=0;k<length;k++){
                    if( GOES_FORWARDS(loop_table[iloop][ln][k]) ){
                	dirs[k]=(dir+loop_table[iloop][ln][k] )% 4;
		    }
            	    else {
                        dirs[k]=OPP_DIR(
			    (dir+OPP_DIR(loop_table[iloop][ln][k]))%4 );
		    }
		}

		path_length= length-1;  /* generalized "staple" */

		/* check for links in direction of momentum to be
		   updated, each such link gives a contribution. Note
		   the direction of the path - opposite the link. */
		for(k=0;k<length;k++)if( dirs[k]==dir||dirs[k]==OPP_DIR(dir)) {
		    if( GOES_FORWARDS(dirs[k]) ) for(j=0;j<path_length;j++) {
			path_dir[j] = dirs[(k+j+1)%length];
		    }
		    if( GOES_BACKWARDS(dirs[k]) ) for(j=0;j<path_length;j++) {
			path_dir[path_length-1-j] =
			    OPP_DIR(dirs[(k+j+1)%length]);
		    }
/**if(dir==XUP)printf("X_UPDATE PATH: "); printpath( path_dir, path_length );**/
		    path_product(path_dir,path_length, tempmat1);

		    /* We took the path in the other direction from our
			old convention in order to get it to end up
			"at our site", so now take adjoint */
		    /* then compute "single_action" contribution to
			staple */
		    FORALLSITES(i,st){
			su3_adjoint( &(tempmat1[i]), &tmat1 );
			/* first we compute the fundamental term */
			new_term = loop_coeff[iloop][0];

			/* now we add in the higher representations */
			if(nreps > 1){
node0_printf("WARNING: THIS CODE IS NOT TESTED\n"); exit(0);
			    act2=1.0;
			    action = 3.0 - realtrace_su3(&(st->link[dir]),
				&tmat1 ); 

			    for(j=1;j<nreps;j++){
				act2 *= action;
				new_term +=
				    loop_coeff[iloop][j]*act2*(Real)(j+1);
			    }
			}  /* end if nreps > 1 */

			scalar_mult_add_su3_matrix( &(staple[i]), &tmat1,
				new_term, &(staple[i]) );

		    } END_LOOP

		    ncount++;

		} /* k (location in path) */
	    } /* ln */
	} /* iloop */

	/* Now multiply the staple sum by the link, then update momentum */
	FORALLSITES(i,st){
	    mult_su3_na( &(st->link[dir]), &(staple[i]), &tmat1 );
	    momentum = (anti_hermitmat *)F_PT(st,mom_off);
	    uncompress_anti_hermitian( &momentum[dir], &tmat2 );
	    scalar_mult_sub_su3_matrix( &tmat2, &tmat1,
		eb3, &(staple[i]) );
	    make_anti_hermitian( &(staple[i]), &momentum[dir] );
	} END_LOOP
Exemplo n.º 7
0
void Arm::solve(Point3f goal_point, int life_count) {
    // prev and curr are for use of halving
    // last is making sure the iteration gets a better solution than the last iteration,
    // otherwise revert changes
    float prev_err, curr_err, last_err = 9999;
    Point3f current_point;
    int max_iterations = 200;
    int count = 0;
    float err_margin = 0.01;

    goal_point -= base;
    if (goal_point.norm() > get_max_length()) {
        goal_point = goal_point.normalized() * get_max_length();
    }

    current_point = calculate_end_effector();

    // save the first err
    prev_err = (goal_point - current_point).norm();
    curr_err = prev_err;
    last_err = curr_err;

    // while the current point is close enough, stop iterating
    while (curr_err > err_margin) {
        // calculate the difference between the goal_point and current_point
        Vector3f dP = goal_point - current_point;

        // create the jacovian
        int segment_size = segments.size();

        // build the transpose matrix (easier for eigen matrix construction)
        MatrixXf jac_t(3*segment_size, 3);
        for(int i=0; i<3*segment_size; i+=3) {
            Matrix<float, 1, 3> row_theta = compute_jacovian_segment(i/3, goal_point, segments[i/3]->get_right());
            Matrix<float, 1, 3> row_phi = compute_jacovian_segment(i/3, goal_point, segments[i/3]->get_up());
            Matrix<float, 1, 3> row_z = compute_jacovian_segment(i/3, goal_point, segments[i/3]->get_z());

            jac_t(i, 0) = row_theta(0, 0);
            jac_t(i, 1) = row_theta(0, 1);
            jac_t(i, 2) = row_theta(0, 2);

            jac_t(i+1, 0) = row_phi(0, 0);
            jac_t(i+1, 1) = row_phi(0, 1);
            jac_t(i+1, 2) = row_phi(0, 2);

            jac_t(i+2, 0) = row_z(0, 0);
            jac_t(i+2, 1) = row_z(0, 1);
            jac_t(i+2, 2) = row_z(0, 2);
        }
        // compute the final jacovian
        MatrixXf jac(3, 3*segment_size);
        jac = jac_t.transpose();

        Matrix<float, Dynamic, Dynamic> pseudo_ijac;
        MatrixXf pinv_jac(3*segment_size, 3);
        pinv_jac = pseudoInverse(jac);

        Matrix<float, Dynamic, 1> changes = pinv_jac * dP;

        cout << "changes: " << changes << endl;

        for(int i=0; i<3*segment_size; i+=3) {
            // save the current transformation on the segments
            segments[i/3]->save_transformation();

            // apply the change to the theta angle
            segments[i/3]->apply_angle_change(changes[i], segments[i/3]->get_right());
            // apply the change to the phi angle
            segments[i/3]->apply_angle_change(changes[i+1], segments[i/3]->get_up());
            // apply the change to the z angle
            segments[i/3]->apply_angle_change(changes[i+2], segments[i/3]->get_z());
        }

        // compute current_point after making changes
        current_point = calculate_end_effector();

        //cout << "current_point: " << vectorString(current_point) << endl;
        //cout << "goal_point: " << vectorString(goal_point) << endl;

        prev_err = curr_err;
        curr_err = (goal_point - current_point).norm();

        int halving_count = 0;

        cout << "curr err: " << curr_err << " || prev err: " << prev_err << " || last err: " << last_err << endl;
        // make sure we aren't iterating past the solution
        while (curr_err > last_err) {
            // undo changes
            for(int i=0; i<segment_size; i++) {
                // unapply the change to the saved angle
                segments[i]->load_transformation();
            }
            current_point = calculate_end_effector();
            changes *= 0.5;
            // reapply halved changes
            for(int i=0; i<3*segment_size; i+=3) {
                // save the current transformation on the segments
                segments[i/3]->save_transformation();

                // apply the change to the theta angle
                segments[i/3]->apply_angle_change(changes[i], segments[i/3]->get_right());
                // apply the change to the phi angle
                segments[i/3]->apply_angle_change(changes[i+1], segments[i/3]->get_up());
                // apply the change to the z angle
                segments[i/3]->apply_angle_change(changes[i+2], segments[i/3]->get_z());
            }

            // compute the end_effector and measure error
            current_point = calculate_end_effector();
            prev_err = curr_err;
            curr_err = (goal_point - current_point).norm();

            cout << "|half| curr err: " << curr_err << " || prev err: " << prev_err << endl;
            halving_count++;
            if (halving_count > 100)
                break;
        }

        if (curr_err > last_err) {
            // undo changes
            for(int i=0; i<segment_size; i++) {
                // unapply the change to the saved angle
                segments[i]->load_last_transformation();
            }
            current_point = calculate_end_effector();
            curr_err = (goal_point - current_point).norm();
            cout << "curr iteration not better than last, reverting" << endl;
            cout << "curr err: " << curr_err << " || last err: " << last_err << endl;
            break;
        }
        for(int i=0; i<segment_size; i++) {
            // unapply the change to the saved angle
            segments[i]->save_last_transformation();
        }
        cout << "curr err: " << curr_err << " || last err: " << last_err << endl;
        last_err = curr_err;
        cout << "last_err is now : " << last_err << endl;


        // make sure we don't infinite loop
        count++;
        if (count > max_iterations) {
            break;
        }
    }

    /*
    // if we haven't gotten to a nice solution
    if (curr_err > err_margin) {
        // kill off infinitely recursive solutions
        if (life_count <= 0) {
            return;
        }

        // try to solve it again
        solve(goal_point, life_count-1);
    } else {
    */
    cout << "final error: " << curr_err << endl;
}