示例#1
0
// Add a log entry
OsStatus OsSysLog::add(const char*            taskName,
                       const int              taskId,
                       const OsSysLogFacility facility,
                       const OsSysLogPriority priority,
                       const char*            format,
                                              ...)
{
   OsStatus rc = OS_UNSPECIFIED;

   // If the log has not been initialized, print everything
   if (isTaskPtrNull())
   {
      // Convert the variable arguments into a single string
      UtlString data ;
      va_list ap;
      va_start(ap, format); 
      myvsprintf(data, format, ap) ;
      data = escape(data) ;
      va_end(ap);

      // Display all of the data
      osPrintf("%s %s %s 0x%08X %s\n", 
            OsSysLog::sFacilityNames[facility], 
            OsSysLog::sPriorityNames[priority],
            (taskName == NULL) ? "" : taskName, 
            taskId,
            data.data()) ;

      rc = OS_SUCCESS ;
   }
   // Otherwise make sure we want to handle the log entry before we process
   // the variable arguments.
   else
   {
      if (willLog(facility, priority))
      {
         va_list ap;
         va_start(ap, format);
         rc = vadd(taskName, taskId, facility, priority, format, ap);
         va_end(ap);
      }  
   }
   return rc;
}
示例#2
0
文件: camera_setup.c 项目: SN9NV/RT
void		setup_camera_plane(t_env *e)
{
	t_vector	n;
	t_vector	c;
	double		w;
	double		h;

	h = 18.0 * ARBITRARY_NUMBER / 35.0;
	w = h * (double)e->x / (double)e->y;
	n = vunit(vsub(e->camera.loc, e->camera.dir));
	e->camera.u = vunit(vcross(e->camera.up, n));
	e->camera.v = vunit(vcross(n, e->camera.u));
	c = vsub(e->camera.loc, vmult(n, ARBITRARY_NUMBER));
	e->camera.l = vadd(vsub(c,
		vmult(e->camera.u, w / 2.0)),
		vmult(e->camera.v, h / 2.0));
	e->camera.stepx = w / (double)e->x;
	e->camera.stepy = h / (double)e->y;
}
示例#3
0
// Add a log entry
OsStatus OsSysLog::add(const OsSysLogFacility facility,
                       const OsSysLogPriority priority,
                       const char*            format,
                                              ...)
{
   OsStatus rc = OS_UNSPECIFIED;

   // If the log has not been initialized, print everything
   if (!isTaskPtrNull())
   {
      if (willLog(facility, priority))
      {
         UtlString taskName ;
         OsTaskId_t taskId = 0 ;

         va_list ap;
         va_start(ap, format);

         OsTaskBase* pBase = OsTask::getCurrentTask() ;
         if (pBase != NULL)
         {
            taskName = pBase->getName() ;
            pBase->id(taskId) ;
         } else {

             // TODO: should get abstracted into a OsTaskBase method
#ifdef __pingtel_on_posix__
            OsTaskLinux::getCurrentTaskId(taskId );
#endif
            taskName = "Anon";
            // OsTask::getIdString_d(taskName, taskId);
         }

         rc = vadd(taskName.data(), taskId, facility, priority, format, ap);         
         va_end(ap);
      }  
   }
   else
      rc = OS_SUCCESS ;

   return rc;
}
示例#4
0
void KX_ObstacleSimulation::UpdateObstacles()
{
	for (size_t i=0; i<m_obstacles.size(); i++)
	{
		if (m_obstacles[i]->m_type==KX_OBSTACLE_NAV_MESH || m_obstacles[i]->m_shape==KX_OBSTACLE_SEGMENT)
			continue;

		KX_Obstacle* obs = m_obstacles[i];
		obs->m_pos = obs->m_gameObj->NodeGetWorldPosition();
		obs->vel[0] = obs->m_gameObj->GetLinearVelocity().x();
		obs->vel[1] = obs->m_gameObj->GetLinearVelocity().y();

		// Update velocity history and calculate perceived (average) velocity.
		vcpy(&obs->hvel[obs->hhead*2], obs->vel);
		obs->hhead = (obs->hhead+1) % VEL_HIST_SIZE;
		vset(obs->pvel,0,0);
		for (int j = 0; j < VEL_HIST_SIZE; ++j)
			vadd(obs->pvel, obs->pvel, &obs->hvel[j*2]);
		vscale(obs->pvel, obs->pvel, 1.0f/VEL_HIST_SIZE);
	}
}
示例#5
0
// Modified by Rick
//bool intersect(dynent *d, vec &from, vec &to)   // if lineseg hits entity bounding box
bool intersect(dynent *d, vec &from, vec &to, vec *end)   // if lineseg hits entity bounding box
{
    vec v = to, w = d->o, *p; 
    vsub(v, from);
    vsub(w, from);
    float c1 = dotprod(w, v);

    if(c1<=0) p = &from;
    else
    {
        float c2 = dotprod(v, v);
        if(c2<=c1) p = &to;
        else
        {
            float f = c1/c2;
            vmul(v, f);
            vadd(v, from);
            p = &v;
        };
    };

    /* Modified by Rick
    return p->x <= d->o.x+d->radius
        && p->x >= d->o.x-d->radius
        && p->y <= d->o.y+d->radius
        && p->y >= d->o.y-d->radius
        && p->z <= d->o.z+d->aboveeye
        && p->z >= d->o.z-d->eyeheight;*/
    if( p->x <= d->o.x+d->radius
        && p->x >= d->o.x-d->radius
        && p->y <= d->o.y+d->radius
        && p->y >= d->o.y-d->radius
        && p->z <= d->o.z+d->aboveeye
        && p->z >= d->o.z-d->eyeheight)
    {
          if (end) *end = *p;
          return true;
     }
     return false;
};
NumList openCLMath(NumList vec1,
                   NumList vec2, 
                   const std::string& operation) {
    // Discover OpenCl platforms available on host
    std::vector<cl::Platform> platformList;
    cl::Platform::get(&platformList);
    // Pick the first platform and query its GPU devices
    std::vector<cl::Device> deviceList;
    platformList[1].getDevices(CL_DEVICE_TYPE_GPU, &deviceList);
    // Create a context for the devices and use the first device to create
    // a command-queue.
    cl::Context context       = cl::Context(deviceList);
    cl::CommandQueue queue    = cl::CommandQueue(context, deviceList[0]);
    // Build and get kernel for the devices
    cl::Kernel kernel = getKernel(deviceList, context,
                                  kernelStart + operation + kernelEnd, "vadd");
    // Build a kernel functor
    cl::KernelFunctor vadd(kernel, queue, cl::NullRange,
                           cl::NDRange(vec1.size()), cl::NullRange);
    // Run the kernel and obtain results
    return mathViaOpenCL(context, queue, kernel, vadd, vec1, vec2);
}
示例#7
0
文件: refract.c 项目: SN9NV/RT
static void	set_refract_ray_prim(t_env *e, t_env *refract)
{
	t_vector	n;

	refract->ray.loc = vadd(e->ray.loc, vmult(e->ray.dir, e->t));
	n = get_normal(e, refract->ray.loc);
	if (refract->flags & RAY_INSIDE)
	{
		n = vunit(vsub((t_vector){0.0, 0.0, 0.0}, n));
		if (refract_prim(e, refract, n))
			refract->flags &= ~RAY_INSIDE;
		else
			set_reflect_ray(e, refract);
	}
	else
	{
		if (refract_prim(e, refract, n))
			refract->flags |= RAY_INSIDE;
		else
			set_reflect_ray(e, refract);
	}
}
示例#8
0
void vOut_next_a(IOUnit *unit, int inNumSamples)
{
	//Print("Out_next_a %d\n", unit->mNumInputs);
	World *world = unit->mWorld;
	int bufLength = world->mBufLength;
	int numChannels = unit->mNumInputs - 1;

	float fbusChannel = ZIN0(0);
	if (fbusChannel != unit->m_fbusChannel) {
		unit->m_fbusChannel = fbusChannel;
		int busChannel = (int)fbusChannel;
		int lastChannel = busChannel + numChannels;

		if (!(busChannel < 0 || lastChannel > (int)world->mNumAudioBusChannels)) {
			unit->m_bus = world->mAudioBus + (busChannel * bufLength);
			unit->m_busTouched = world->mAudioBusTouched + busChannel;
		}
	}

	float *out = unit->m_bus;
	int32 *touched = unit->m_busTouched;
	int32 bufCounter = unit->mWorld->mBufCounter;
	for (int i=0; i<numChannels; ++i, out+=bufLength) {
		ACQUIRE_BUS_AUDIO((int32)fbusChannel + i);
		float *in = IN(i+1);
		if (touched[i] == bufCounter)
		{
			vadd(out, out, in, inNumSamples);
		}
		else
		{
			vcopy(out, in, inNumSamples);
			touched[i] = bufCounter;
		}
		//Print("out %d %g %g\n", i, in[0], out[0]);
		RELEASE_BUS_AUDIO((int32)fbusChannel + i);
	}
}
示例#9
0
文件: cpt.c 项目: colinbouvry/minuit
float *calc_circle(float center[3],double radius,int divisions)
{
	float *points = (float *)mem_malloc(sizeof(float)*divisions*3);
	int i;
	int j=0;

	double delta = (double)((double)(_PI * 2) / divisions);

	for (i=0;i<divisions;i++)
	{
		float r[3];
		float result[3];
		vset(r,cos(i*delta),sin(i*delta),0);
		vmul(r,(float)radius);
		vadd(result,center,r);
		points[j]=result[0];
		points[j+1]=result[1];
		points[j+2]=result[2];
		j+=3;
	}

	return points;
}
示例#10
0
文件: intersect_disk.c 项目: SN9NV/RT
int		intersect_disk(t_ray *r, t_prim *o, double *t)
{
	t_vector	point;
	double		denominator;
	double		numerator;
	double		t0;

	if ((denominator = vdot(r->dir, o->normal)) == 0)
		return (0);
	numerator = vdot(o->loc, o->normal) - vdot(r->loc, o->normal);
	t0 = numerator / denominator;
	if (t0 > EPSILON)
	{
		point = vadd(r->loc, vmult(r->dir, t0));
		if (vnormalize(vsub(point, o->loc)) <= o->radius)
		{
			*t = t0;
			return (1);
		}
		return (0);
	}
	return (0);
}
示例#11
0
文件: skin.c 项目: Lenbok/dormin
CAMLprim value ml_skin_set_anim (value anim_v)
{
    int i;
    CAMLparam1 (anim_v);
    CAMLlocal1 (floats_v);
    State *s = &glob_state;
    struct bone *b = s->bones + 1;
    struct abone *ab = s->abones + 1;

    for (i = 0; i < s->num_bones; ++i, ++b) {
        floats_v = Field (anim_v, i);
        b->aq[0] = Double_field (floats_v, 0);
        b->aq[1] = Double_field (floats_v, 1);
        b->aq[2] = Double_field (floats_v, 2);
        b->aq[3] = Double_field (floats_v, 3);
    }

    b = s->bones + 1;
    for (i = 0; i < s->num_bones; ++i, ++b, ++ab) {
        float v[4], v1[4], q[4], q1[4];
        struct bone *parent = &s->bones[b->parent];

        qapply (v, parent->amq, b->v);
        qcompose (b->amq, b->aq, parent->amq);
        vadd (b->amv, v, parent->amv);

        qconjugate (q1, b->mq);
        qcompose (q, q1, b->amq);

        qapply (v, q, b->mv);
        vsub (v1, b->amv, v);
        q2matrixt (ab->cm, q, v1);
    }

    CAMLreturn (Val_unit);
}
示例#12
0
int sphere_refraction_func(const struct TObject *object, const Ray *ray, const Ray *reflect, const Vector *pt,
                           const Vector *n, Ray *refract, Vector *attenuation) {

  // 入射角i

  const Sphere *sp = (const Sphere*)object->priv;
  double cosi = dot(n, &ray->front) / modulation(n) / modulation(&ray->front);
  double sini = sqrt(1 - cosi*cosi);

  double sinr;
  Vector vin = *n;
  if (cosi < 0) {
    // 空气到介质
    sinr = sini / sp->refractive;
  }

  else {
    // 介质到空气
    sinr = sini * sp->refractive;
    vin = rmul(n, -1);
    // 全反射
    if (sinr >= 1)
      return 0;
  }
  double r = asin(sinr);

  Vector left = vcross(vcross(*n, ray->front), *n);
  normalize(&left);
  Vector nn = *n;
  normalize(&nn);

  refract->front = vadd(vrmul(left, sin(r)), vrmul(nn, cos(r)));
  refract->pos = *pt;
  *attenuation = sp->refract_attenuation;
  return 1;
}
示例#13
0
文件: plan9.c 项目: icattlecoder/go
// genrun is the generic run implementation.
static void
genrun(Buf *b, char *dir, int mode, Vec *argv, int wait)
{
	int i, p[2], pid;
	Buf b1, cmd;
	char *q;

	while(nbg >= maxnbg)
		bgwait1();

	binit(&b1);
	binit(&cmd);

	if(!isabs(argv->p[0])) {
		bpathf(&b1, "/bin/%s", argv->p[0]);
		free(argv->p[0]);
		argv->p[0] = xstrdup(bstr(&b1));
	}

	// Generate a copy of the command to show in a log.
	// Substitute $WORK for the work directory.
	for(i=0; i<argv->len; i++) {
		if(i > 0)
			bwritestr(&cmd, " ");
		q = argv->p[i];
		if(workdir != nil && hasprefix(q, workdir)) {
			bwritestr(&cmd, "$WORK");
			q += strlen(workdir);
		}
		bwritestr(&cmd, q);
	}
	if(vflag > 1)
		errprintf("%s\n", bstr(&cmd));

	if(b != nil) {
		breset(b);
		if(pipe(p) < 0)
			fatal("pipe");
	}

	switch(pid = fork()) {
	case -1:
		fatal("fork");
	case 0:
		if(b != nil) {
			close(0);
			close(p[0]);
			dup(p[1], 1);
			dup(p[1], 2);
			if(p[1] > 2)
				close(p[1]);
		}
		if(dir != nil) {
			if(chdir(dir) < 0) {
				fprint(2, "chdir: %r\n");
				_exits("chdir");
			}
		}
		vadd(argv, nil);
		exec(argv->p[0], argv->p);
		fprint(2, "%s\n", bstr(&cmd));
		fprint(2, "exec: %r\n");
		_exits("exec");
	}
	if(b != nil) {
		close(p[1]);
		breadfrom(b, p[0]);
		close(p[0]);
	}

	if(nbg < 0)
		fatal("bad bookkeeping");
	bg[nbg].pid = pid;
	bg[nbg].mode = mode;
	bg[nbg].cmd = btake(&cmd);
	bg[nbg].b = b;
	nbg++;
	
	if(wait)
		bgwait();

	bfree(&cmd);
	bfree(&b1);
}
示例#14
0
文件: unix.c 项目: pnasrat/gopower
// genrun is the generic run implementation.
static void
genrun(Buf *b, char *dir, int mode, Vec *argv, int wait)
{
	int i, p[2], pid;
	Buf cmd;
	char *q;

	while(nbg >= maxnbg)
		bgwait1();

	// Generate a copy of the command to show in a log.
	// Substitute $WORK for the work directory.
	binit(&cmd);
	for(i=0; i<argv->len; i++) {
		if(i > 0)
			bwritestr(&cmd, " ");
		q = argv->p[i];
		if(workdir != nil && hasprefix(q, workdir)) {
			bwritestr(&cmd, "$WORK");
			q += strlen(workdir);
		}
		bwritestr(&cmd, q);
	}
	//if(vflag > 1)
		xprintf("%s\n", bstr(&cmd));

	if(b != nil) {
		breset(b);
		if(pipe(p) < 0)
			fatal("pipe: %s", strerror(errno));
	}

	switch(pid = fork()) {
	case -1:
		fatal("fork: %s", strerror(errno));
	case 0:
		if(b != nil) {
			close(0);
			close(p[0]);
			dup2(p[1], 1);
			dup2(p[1], 2);
			if(p[1] > 2)
				close(p[1]);
		}
		if(dir != nil) {
			if(chdir(dir) < 0) {
				fprintf(stderr, "chdir %s: %s\n", dir, strerror(errno));
				_exit(1);
			}
		}
		vadd(argv, nil);
		execvp(argv->p[0], argv->p);
		fprintf(stderr, "%s\n", bstr(&cmd));
		fprintf(stderr, "exec %s: %s\n", argv->p[0], strerror(errno));
		_exit(1);
	}
	if(b != nil) {
		close(p[1]);
		breadfrom(b, p[0]);
		close(p[0]);
	}

	if(nbg < 0)
		fatal("bad bookkeeping");
	bg[nbg].pid = pid;
	bg[nbg].mode = mode;
	bg[nbg].cmd = btake(&cmd);
	bg[nbg].b = b;
	nbg++;
	
	if(wait)
		bgwait();

	bfree(&cmd);
}
示例#15
0
 void HMMExperiment::run() {
     
     for (int ifold = 0; ifold < nfolds; ifold++) {
         folds->get_fold(ifold, tr_set, &tr_size, te_set, &te_size);
         logprogressfile << "ifold: " << ifold << std::endl;
         
         auto start_t = std::time(nullptr);
         auto start_time = *std::localtime(&start_t);
         char start_time_str [80];
         strftime(start_time_str, 80, "%F %X", &start_time);
         
         hmm->init_random_parameters();
         
         // subdivide sequences into worker threads
         assign_sequences_to_workers(tr_size, tr_set, te_size, te_set);
         
         // learn parameters
         double absdif;
         EM_test_counter = 0;
         for(int it = 0; it < EMiterations; it++) {
             
             for(HMMWorkerThread & w : workers)
                 w.run_Estep();
             for(HMMWorkerThread & w : workers)
                 w.join();
             
             EM_loglik[it] = 0;
             memset(NU, 0, hs*sizeof(double));
             memset(N, 0, hs*hs*sizeof(double));
             memset(M, 0, hs*os*sizeof(double));
             for(HMMWorkerThread & w : workers) {
                 EM_loglik[it] += w.loglik;
                 vadd(NU, 1, w.NU, 1, NU, 1, hs);
                 vadd(N, 1, w.N, 1, N, 1, hs*hs);
                 vadd(M, 1, w.M, 1, M, 1, hs*os);
             }
             
             hmm->Mstep(N, M, NU, Naux1, Maux, &absdif);
             
             auto now = std::time(nullptr);
             auto now_ = *std::localtime(&now);
             char now_str [80];
             strftime(now_str, 80, "%F %X", &now_);
             logprogressfile << now_str << " ";
             
             logprogressfile << "EM iteration " << it << ", loglik = " << EM_loglik[it] << std::endl;
             
             if((testing_strat == TestingStrategy::test_every) ||
                ((testing_strat == TestingStrategy::test_odd) && (it % 2 == 1)) ||
                ((testing_strat == TestingStrategy::test_last) && (it == EMiterations-1))) {
                 
                 if(prediction_type == PredictionType::viterbi){
                     for(HMMWorkerThread & w : workers)
                         w.run_crossentropy_viterbi();
                 } else if(prediction_type == PredictionType::posterior) {
                     for(HMMWorkerThread & w : workers)
                         w.run_crossentropy_posterior();
                 }
                 
                 for(HMMWorkerThread & w : workers)
                     w.join();
                 
                 double tr_entropy = 0;
                 double te_entropy = 0;
                 
                 int tr_count = 0;
                 int te_count = 0;
                 for(HMMWorkerThread & w : workers) {
                     tr_entropy += w.tr_entropy;
                     tr_count += w.tr_count;
                     te_entropy += w.te_entropy;
                     te_count += w.te_count;
                 }
                 
                 EM_tr_entropy[EM_test_counter] = tr_entropy / tr_count;
                 EM_te_entropy[EM_test_counter] = te_entropy / te_count;
                 EM_test_it[EM_test_counter] = it;
                 
                 now = std::time(nullptr);
                 now_ = *std::localtime(&now);
                 strftime(now_str, 80, "%F %X", &now_);
                 logprogressfile << now_str << " ";
                 
                 logprogressfile << "tr_entropy: " << EM_tr_entropy[EM_test_counter] <<
                 ", te_entropy: " << EM_te_entropy[EM_test_counter] << std::endl;
                 EM_test_counter++;
                 
             }
         }
         
         // hmm->print_parameters();
         std::string filename = model_filename();
         hmm->save_parameters(filename);
         
         // LOG
         auto end_t = std::time(nullptr);
         auto end_time = *std::localtime(&end_t);
         char end_time_str [80];
         strftime(end_time_str, 80, "%F %X", &end_time);
         
         auto duration = std::difftime(end_t, start_t);
         logfile << "{" << std::endl;
         logfile << "\"start_time\": \"" << start_time_str << "\", " << std::endl;
         logfile << "\"end_time\": \"" << end_time_str << "\", " << std::endl;
         logfile << "\"duration\": " << duration << ", " << std::endl;
         logfile << "\"Dataset\": \"" << d->filename << "\", " << std::endl;
         logfile << "\"ifold\": " << ifold << ", " << std::endl;
         logfile << "\"nfold\": " << folds->nfolds << ", " << std::endl;
         logfile << "\"nseq\": " << folds->n << ", " << std::endl;
         logfile << "\"EMiterations\": " << EMiterations << ", " << std::endl;
         logfile << "\"EM_loglik\": " << "[" << v_to_str(EMiterations, EM_loglik) << "], " << std::endl;
         logfile << "\"EM_te_entropy\": " << "[" << v_to_str(EM_test_counter, EM_te_entropy) << "], " << std::endl;
         logfile << "\"EM_tr_entropy\": " << "[" << v_to_str(EM_test_counter, EM_tr_entropy) << "], " << std::endl;
         logfile << "\"EM_test_it\": " << "[" << v_to_str(EM_test_counter, EM_test_it) << "], " << std::endl;
         logfile << "\"hs\": " << hs << ", " << std::endl;
         logfile << "\"nworkers\": " << nworkers << ", " << std::endl;
         logfile << "\"entropy\": " << EM_te_entropy[EM_test_counter - 1] << ", " << std::endl;
         logfile << "\"fn_params\": \"" << filename << "\", " << std::endl;
         logfile << "}," << std::endl;
         logfile << std::endl;
         
     }  // for ifold
 }
示例#16
0
// mkzasm writes zasm_$GOOS_$GOARCH.h,
// which contains struct offsets for use by
// assembly files.  It also writes a copy to the work space
// under the name zasm_GOOS_GOARCH.h (no expansion).
// 
void
mkzasm(char *dir, char *file)
{
	int i, n;
	char *aggr, *p;
	Buf in, b, out;
	Vec argv, lines, fields;

	binit(&in);
	binit(&b);
	binit(&out);
	vinit(&argv);
	vinit(&lines);
	vinit(&fields);
	
	bwritestr(&out, "// auto generated by go tool dist\n\n");
	for(i=0; i<nelem(zasmhdr); i++) {
		if(hasprefix(goarch, zasmhdr[i].goarch) && hasprefix(goos, zasmhdr[i].goos)) {
			bwritestr(&out, zasmhdr[i].hdr);
			goto ok;
		}
	}
	fatal("unknown $GOOS/$GOARCH in mkzasm");
ok:

	// Run 6c -DGOOS_goos -DGOARCH_goarch -Iworkdir -a proc.c
	// to get acid [sic] output.
	vreset(&argv);
	vadd(&argv, bpathf(&b, "%s/%sc", tooldir, gochar));
	vadd(&argv, bprintf(&b, "-DGOOS_%s", goos));
	vadd(&argv, bprintf(&b, "-DGOARCH_%s", goarch));
	vadd(&argv, bprintf(&b, "-I%s", workdir));
	vadd(&argv, "-a");
	vadd(&argv, "proc.c");
	runv(&in, dir, CheckExit, &argv);
	
	// Convert input like
	//	aggr G
	//	{
	//		Gobuf 24 sched;
	//		'Y' 48 stack0;
	//	}
	// into output like
	//	#define g_sched 24
	//	#define g_stack0 48
	//
	aggr = nil;
	splitlines(&lines, bstr(&in));
	for(i=0; i<lines.len; i++) {
		splitfields(&fields, lines.p[i]);
		if(fields.len == 2 && streq(fields.p[0], "aggr")) {
			if(streq(fields.p[1], "G"))
				aggr = "g";
			else if(streq(fields.p[1], "M"))
				aggr = "m";
			else if(streq(fields.p[1], "Gobuf"))
				aggr = "gobuf";
			else if(streq(fields.p[1], "WinCall"))
				aggr = "wincall";
		}
		if(hasprefix(lines.p[i], "}"))
			aggr = nil;
		if(aggr && hasprefix(lines.p[i], "\t") && fields.len >= 2) {
			n = fields.len;
			p = fields.p[n-1];
			if(p[xstrlen(p)-1] == ';')
				p[xstrlen(p)-1] = '\0';
			bwritestr(&out, bprintf(&b, "#define %s_%s %s\n", aggr, fields.p[n-1], fields.p[n-2]));
		}
	}
	
	// Write both to file and to workdir/zasm_GOOS_GOARCH.h.
	writefile(&out, file, 0);
	writefile(&out, bprintf(&b, "%s/zasm_GOOS_GOARCH.h", workdir), 0);

	bfree(&in);
	bfree(&b);
	bfree(&out);
	vfree(&argv);
	vfree(&lines);
	vfree(&fields);
}
示例#17
0
static void
render(unsigned char *img, int comps, int w, int h, int nsubsamples)
{
    int x, y;
    int u, v;

    //float *fimg = (float *)malloc(sizeof(float) * w * h * 3);
    vec *fimg = (vec *)malloc(sizeof(vec) * w * h);
    memset((void *)fimg, 0, sizeof(vec) * w * h);

    for (y = 0; y < h; y++) {
        for (x = 0; x < w; x++) {
            
            for (v = 0; v < nsubsamples; v++) {
                for (u = 0; u < nsubsamples; u++) {
                    float px = (x + (u / (float)nsubsamples) - (w / 2.0)) / (w / 2.0);
                    float py = -(y + (v / (float)nsubsamples) - (h / 2.0)) / (h / 2.0);

                    Ray ray;

                    ray.org.x = 0.0;
                    ray.org.y = 0.0;
                    ray.org.z = 0.0;

                    ray.dir.x = px;
                    ray.dir.y = py;
                    ray.dir.z = -1.0;
                    vnormalize(&(ray.dir));

                    Isect isect;
                    isect.t   = 1.0e+17;
                    isect.hit = 0;

                    ray_sphere_intersect(&isect, &ray, &spheres[0]);
                    ray_sphere_intersect(&isect, &ray, &spheres[1]);
                    ray_sphere_intersect(&isect, &ray, &spheres[2]);
                    ray_plane_intersect (&isect, &ray, &plane);

                    if (isect.hit) {
                        vec col;
                        ambient_occlusion(&col, &isect);

                        vadd(&fimg[y * w + x], fimg[y * w + x], col);
/*
                        fimg[y * w + x].x += col.x;
                        fimg[y * w + x].y += col.y;
                        fimg[y * w + x].z += col.z;
*/
                    }

                }
            }

            vdivs(&fimg[y * w + x], fimg[y * w + x], (float)(nsubsamples * nsubsamples));
/*
            fimg[y * w + x].x /= (float)(nsubsamples * nsubsamples);
            fimg[y * w + x].y /= (float)(nsubsamples * nsubsamples);
            fimg[y * w + x].z /= (float)(nsubsamples * nsubsamples);
*/
            img[comps * (y * w + x) + 0] = clamp(fimg[y * w + x].x);
            img[comps * (y * w + x) + 1] = clamp(fimg[y * w + x].y);
            img[comps * (y * w + x) + 2] = clamp(fimg[y * w + x].z);
        }
    }
}
示例#18
0
// mkzruntimedefs writes zruntime_defs_$GOOS_$GOARCH.h,
// which contains Go struct definitions equivalent to the C ones.
// Mostly we just write the output of 6c -q to the file.
// However, we run it on multiple files, so we have to delete
// the duplicated definitions, and we don't care about the funcs
// and consts, so we delete those too.
// 
void
mkzruntimedefs(char *dir, char *file)
{
	int i, skip;
	char *p;
	Buf in, b, out;
	Vec argv, lines, fields, seen;
	
	binit(&in);
	binit(&b);
	binit(&out);
	vinit(&argv);
	vinit(&lines);
	vinit(&fields);
	vinit(&seen);
	
	bwritestr(&out, "// auto generated by go tool dist\n"
		"\n"
		"package runtime\n"
		"import \"unsafe\"\n"
		"var _ unsafe.Pointer\n"
		"\n"
	);

	
	// Run 6c -DGOOS_goos -DGOARCH_goarch -Iworkdir -q
	// on each of the runtimedefs C files.
	vadd(&argv, bpathf(&b, "%s/%sc", tooldir, gochar));
	vadd(&argv, bprintf(&b, "-DGOOS_%s", goos));
	vadd(&argv, bprintf(&b, "-DGOARCH_%s", goarch));
	vadd(&argv, bprintf(&b, "-I%s", workdir));
	vadd(&argv, "-q");
	vadd(&argv, "");
	p = argv.p[argv.len-1];
	for(i=0; i<nelem(runtimedefs); i++) {
		argv.p[argv.len-1] = runtimedefs[i];
		runv(&b, dir, CheckExit, &argv);
		bwriteb(&in, &b);
	}
	argv.p[argv.len-1] = p;
		
	// Process the aggregate output.
	skip = 0;
	splitlines(&lines, bstr(&in));
	for(i=0; i<lines.len; i++) {
		p = lines.p[i];
		// Drop comment, func, and const lines.
		if(hasprefix(p, "//") || hasprefix(p, "const") || hasprefix(p, "func"))
			continue;
		
		// Note beginning of type or var decl, which can be multiline.
		// Remove duplicates.  The linear check of seen here makes the
		// whole processing quadratic in aggregate, but there are only
		// about 100 declarations, so this is okay (and simple).
		if(hasprefix(p, "type ") || hasprefix(p, "var ")) {
			splitfields(&fields, p);
			if(fields.len < 2)
				continue;
			if(find(fields.p[1], seen.p, seen.len) >= 0) {
				if(streq(fields.p[fields.len-1], "{"))
					skip = 1;  // skip until }
				continue;
			}
			vadd(&seen, fields.p[1]);
		}
		if(skip) {
			if(hasprefix(p, "}"))
				skip = 0;
			continue;
		}
		
		bwritestr(&out, p);
	}
	
	writefile(&out, file, 0);

	bfree(&in);
	bfree(&b);
	bfree(&out);
	vfree(&argv);
	vfree(&lines);
	vfree(&fields);
	vfree(&seen);
}
示例#19
0
文件: gephysics.c 项目: drewet/libge
void geRK4ApplyForce(ge_RK4State* state, ge_Vector3d vec){
	state->force = vadd(2, state->force, vec);
}
示例#20
0
void qadd(quat *a, quat *b) {
	a->s += b->s;

	vadd(a->v, b->v);
}
示例#21
0
文件: kkt.c 项目: jmakovicka/ecos
/**
 * Solves the permuted KKT system and returns the unpermuted search directions.
 *
 * On entry, the factorization of the permuted KKT matrix, PKPt,
 * is assumed to be up to date (call kkt_factor beforehand to achieve this).
 * The right hand side, Pb, is assumed to be already permuted.
 *
 * On exit, the resulting search directions are written into dx, dy and dz,
 * where these variables are permuted back to the original ordering.
 *
 * KKT->nitref iterative refinement steps are applied to solve the linear system.
 *
 * Returns the number of iterative refinement steps really taken.
 */
idxint kkt_solve(kkt* KKT, spmat* A, spmat* G, pfloat* Pb, pfloat* dx, pfloat* dy, pfloat* dz, idxint n, idxint p, idxint m, cone* C, idxint isinit, idxint nitref)
{
	
#if CONEMODE == 0
#define MTILDE (m+2*C->nsoc)
#else
#define MTILDE (m)
#endif
    
    idxint i, k, l, j, kk, kItRef;
#if (defined STATICREG) && (STATICREG > 0)
	idxint dzoffset;
#endif
	idxint*  Pinv = KKT->Pinv;
	pfloat*    Px = KKT->work1;
	pfloat*   dPx = KKT->work2;
	pfloat*     e = KKT->work3;
    pfloat*    Pe = KKT->work4;
    pfloat* truez = KKT->work5;
    pfloat*   Gdx = KKT->work6;
    pfloat* ex = e;
    pfloat* ey = e + n;
    pfloat* ez = e + n+p;
    pfloat bnorm = 1.0 + norminf(Pb, n+p+MTILDE);
    pfloat nex = 0;
    pfloat ney = 0;
    pfloat nez = 0;
    pfloat nerr;
    pfloat nerr_prev;
    pfloat error_threshold = bnorm*LINSYSACC;
    idxint nK = KKT->PKPt->n;

	/* forward - diagonal - backward solves: Px holds solution */
	LDL_lsolve2(nK, Pb, KKT->L->jc, KKT->L->ir, KKT->L->pr, Px );		
	LDL_dsolve(nK, Px, KKT->D);
	LDL_ltsolve(nK, Px, KKT->L->jc, KKT->L->ir, KKT->L->pr);
    
#if PRINTLEVEL > 2
    if( p > 0 ){
        PRINTTEXT("\nIR: it  ||ex||   ||ey||   ||ez|| (threshold: %4.2e)\n", error_threshold);
        PRINTTEXT("    --------------------------------------------------\n");
    } else {
        PRINTTEXT("\nIR: it  ||ex||   ||ez|| (threshold: %4.2e)\n", error_threshold);
        PRINTTEXT("    -----------------------------------------\n");
    }
#endif
    
	/* iterative refinement */
	for( kItRef=0; kItRef <= nitref; kItRef++ ){
        
        /* unpermute x & copy into arrays */
        unstretch(n, p, C, Pinv, Px, dx, dy, dz);
        
		/* compute error term */
        k=0; j=0;
        
		/* 1. error on dx*/
#if (defined STATICREG) && (STATICREG > 0)
		/* ex = bx - A'*dy - G'*dz - DELTASTAT*dx */
        for( i=0; i<n; i++ ){ ex[i] = Pb[Pinv[k++]] - DELTASTAT*dx[i]; }
#else
		/* ex = bx - A'*dy - G'*dz */
		for( i=0; i<n; i++ ){ ex[i] = Pb[Pinv[k++]]; }
#endif
        if(A) sparseMtVm(A, dy, ex, 0, 0);
        sparseMtVm(G, dz, ex, 0, 0);
        nex = norminf(ex,n);
        	
        /* error on dy */
        if( p > 0 ){
#if (defined STATICREG) && (STATICREG > 0)
			/* ey = by - A*dx + DELTASTAT*dy */
            for( i=0; i<p; i++ ){ ey[i] = Pb[Pinv[k++]] + DELTASTAT*dy[i]; }
#else
			/* ey = by - A*dx */
			for( i=0; i<p; i++ ){ ey[i] = Pb[Pinv[k++]]; }
#endif
            sparseMV(A, dx, ey, -1, 0);
            ney = norminf(ey,p);            
        }
        
        
		/* --> 3. ez = bz - G*dx + V*dz_true */
        kk = 0; j=0; 
#if (defined STATICREG) && (STATICREG > 0)		
		dzoffset=0;
#endif
        sparseMV(G, dx, Gdx, 1, 1);
        for( i=0; i<C->lpc->p; i++ ){
#if (defined STATICREG) && (STATICREG > 0)
            ez[kk++] = Pb[Pinv[k++]] - Gdx[j++] + DELTASTAT*dz[dzoffset++];
#else
			ez[kk++] = Pb[Pinv[k++]] - Gdx[j++];
#endif
        }
        for( l=0; l<C->nsoc; l++ ){
            for( i=0; i<C->soc[l].p; i++ ){
#if (defined STATICREG) && (STATICREG > 0) 				
                ez[kk++] = i<(C->soc[l].p-1) ? Pb[Pinv[k++]] - Gdx[j++] + DELTASTAT*dz[dzoffset++] : Pb[Pinv[k++]] - Gdx[j++] - DELTASTAT*dz[dzoffset++];
#else
				ez[kk++] = Pb[Pinv[k++]] - Gdx[j++];
#endif
            }
#if CONEMODE == 0
            ez[kk] = 0;
            ez[kk+1] = 0;
            k += 2;
            kk += 2;
#endif
        }
        for( i=0; i<MTILDE; i++) { truez[i] = Px[Pinv[n+p+i]]; }
        if( isinit == 0 ){
            scale2add(truez, ez, C);
        } else {
            vadd(MTILDE, truez, ez);
        }
        nez = norminf(ez,MTILDE);
        
        
#if PRINTLEVEL > 2
        if( p > 0 ){
            PRINTTEXT("    %2d  %3.1e  %3.1e  %3.1e\n", (int)kItRef, nex, ney, nez);
        } else {
            PRINTTEXT("    %2d  %3.1e  %3.1e\n", (int)kItRef, nex, nez);
        }
#endif
        
        /* maximum error (infinity norm of e) */
        nerr = MAX( nex, nez);
        if( p > 0 ){ nerr = MAX( nerr, ney ); }
        
        /* CHECK WHETHER REFINEMENT BROUGHT DECREASE - if not undo and quit! */
        if( kItRef > 0 && nerr > nerr_prev ){
            /* undo refinement */
            for( i=0; i<nK; i++ ){ Px[i] -= dPx[i]; }
            kItRef--;
            break;
        }
        
        /* CHECK WHETHER TO REFINE AGAIN */
        if( kItRef == nitref || ( nerr < error_threshold ) || ( kItRef > 0 && nerr_prev < IRERRFACT*nerr ) ){
            break;
        }
        nerr_prev = nerr;
        
        /* permute */
        for( i=0; i<nK; i++) { Pe[Pinv[i]] = e[i]; }
        
        /* forward - diagonal - backward solves: dPx holds solution */
        LDL_lsolve2(nK, Pe, KKT->L->jc, KKT->L->ir, KKT->L->pr, dPx);
        LDL_dsolve(nK, dPx, KKT->D);
        LDL_ltsolve(nK, dPx, KKT->L->jc, KKT->L->ir, KKT->L->pr);
        
        /* add refinement to Px */
        for( i=0; i<nK; i++ ){ Px[i] += dPx[i]; }
	}

#if PRINTLEVEL > 2
    PRINTTEXT("\n");
#endif
    
	/* copy solution out into the different arrays, permutation included */
	unstretch(n, p, C, Pinv, Px, dx, dy, dz);
    
    return kItRef;
}
示例#22
0
static void simulate(void)
{
   int sh, i, j, pl, pl2, actp;
   double l;
   Vec2d v;
   
   for(i = 0; i < conf.segmentSteps; ++i)
   {
      for(pl = 0; pl < conf.maxPlayers; ++pl)
      {
         SimPlayer* p = &(player[pl]);
         if(p->watch) continue;
         if(!p->active) continue;
         for(sh = 0; sh < conf.numShots; ++sh)
         {
            SimShot* s = &(p->shot[sh]);
            SimMissile* m = &(s->missile);
            if(!m->live) continue;
            for(j = 0; j < conf.numPlanets; ++j)
            {
               v = vsub(planet[j].position, m->position);
               l = length(v);

               if (l <= planet[j].radius)
               {
                  planetHit(s);
               }

               v = vdiv(v, l);
               v = vmul(v, planet[j].mass / (l * l));
               v = vdiv(v, conf.segmentSteps);

               m->speed = vadd(m->speed, v);
            }
            v = vdiv(m->speed, conf.segmentSteps);
            m->position = vadd(m->position, v);

            for(pl2 = 0; pl2 < conf.maxPlayers; ++pl2)
            {
               if(!player[pl2].active) continue;
               l = distance(player[pl2].position, m->position);

               if (  (l <= conf.playerDiameter)
                  && (m->leftSource == 1)
                  )
               {
		  if(conf.debug & 1) printf("l = %.5f playerDiameter = %.5f missile.x = %.5f missile.y = %.5f player.x = %5f player.y = %5f\n",l,conf.playerDiameter,m->position.x,m->position.y,player[pl2].position.x,player[pl2].position.y);
                  playerHit(s, pl, pl2);
               }

               if (  (l > (conf.playerDiameter + 1))
                  && (pl2 == pl)
                  )
               {
                  m->leftSource = 1;
               }
            }

            if (  (m->position.x < -conf.marginleft)
               || (m->position.x > conf.battlefieldW + conf.marginright)
               || (m->position.y < -conf.margintop)
               || (m->position.y > conf.battlefieldH + conf.marginbottom)
               )
            {
               wallHit(s);
            }
         }
      }
   }
   for(pl = 0, actp = 0; pl < conf.maxPlayers; ++pl) actp += player[pl].active;  
   for(pl = 0; pl < conf.maxPlayers; ++pl)
   {
      SimPlayer* p = &(player[pl]);
      if(!p->active) continue;
      if(p->watch) continue;
      if(p->timeout) p->timeout--;
      if(p->valid || actp == 1) p->timeout = conf.timeout;
      for(sh = 0; sh < conf.numShots; ++sh)
      {
         SimShot* s = &(p->shot[sh]);
         if(!s->missile.live) continue;
         p->timeout = conf.timeout;
         player[currentPlayer].timeoutcnt = 0;
         s->dot[s->length++] = d2f(s->missile.position);
         if(s->length == conf.maxSegments)
         {
            s->missile.live = 0;
            allSendShotFinished(s);
         }
      }
   }
}
示例#23
0
void
oneDynamicsFrame(struct part *part,
                 int iters,
                 struct xyz *averagePositions,
                 struct xyz **pOldPositions,
                 struct xyz **pNewPositions,
                 struct xyz **pPositions,
                 struct xyz *force)
{
    int j;
    int loop;
    double deltaTframe;
    struct xyz f;
    struct xyz *tmp;
    struct jig *jig;
    
    struct xyz *oldPositions = *pOldPositions;
    struct xyz *newPositions = *pNewPositions;
    struct xyz *positions = *pPositions;

    // wware 060109  python exception handling
    NULLPTR(part);
    NULLPTR(averagePositions);
    NULLPTR(oldPositions);
    NULLPTR(newPositions);
    NULLPTR(positions);

    iters = max(iters,1);
    
    deltaTframe = 1.0/iters;
    
    for (j=0; j<part->num_atoms; j++) {
	vsetc(averagePositions[j],0.0);
    }
    
    // See http://www.nanoengineer-1.net/mediawiki/index.php?title=Verlet_integration
    // for a discussion of how dynamics is done in the simulator.

    // we want:
    // x(t+dt) = 2x(t) - x(t-dt) + A dt^2
    // or:
    // newPositions = 2 * positions - oldPositions + A dt^2
    
    // wware 060110  don't handle Interrupted with the BAIL mechanism
    for (loop=0; loop < iters && !Interrupted; loop++) {

	_last_iteration = loop == iters - 1;
        
	Iteration++;
	
	// wware 060109  python exception handling
        updateVanDerWaals(part, NULL, positions); BAIL();
	calculateGradient(part, positions, force); BAIL();
	
        /* first, for each atom, find non-accelerated new pos  */
        /* Atom moved from oldPositions to positions last time,
           now we move it the same amount from positions to newPositions */
        for (j=0; j<part->num_atoms; j++) {
            // f = positions - oldPositions
            vsub2(f,positions[j],oldPositions[j]);
            // newPositions = positions + f
            // or:
            // newPositions = 2 * positions - oldPositions
            vadd2(newPositions[j],positions[j],f);
            // after this, we will need to add A dt^2 to newPositions
        }
	
	// pre-force jigs
	for (j=0;j<part->num_jigs;j++) {	/* for each jig */
	    jig = part->jigs[j];
	    // wware 060109  python exception handling
	    NULLPTR(jig);
	    switch (jig->type) {
	    case LinearMotor:
		jigLinearMotor(jig, positions, newPositions, force, deltaTframe);
		break;
	    default:
		break;
	    }
	}
	
	/* convert forces to accelerations, giving new positions */
	//FoundKE = 0.0;		/* and add up total KE */
	for (j=0; j<part->num_atoms; j++) {
            // to complete Verlet integration, this needs to do:
            // newPositions += A dt^2
            //
            // force[] is in pN, mass is in g, Dt in seconds, f in pm
	    vmul2c(f,force[j],part->atoms[j]->inverseMass); // inverseMass = Dt*Dt/mass

            // XXX: 0.15 probably needs a scaling by Dt
            // 0.15 = deltaX
            // keMax = m v^2 / 2
            // v^2 = 2 keMax / m
            // v = deltaX / Dt = sqrt(2 keMax / m)
            // deltaX = Dt sqrt(2 keMax / m)

            // We probably don't want to do this, because a large raw
            // velocity isn't a problem, it's just when that creates a
            // high force between atoms that it becomes a problem.  We
            // check that elsewhere.
            
	    //if (!ExcessiveEnergyWarning && vlen(f)>0.15) { // 0.15 is just below H flyaway
            // WARNING3("Excessive force %.6f in iteration %d on atom %d -- further warnings suppressed", vlen(f), Iteration, j+1);
            // ExcessiveEnergyWarningThisFrame++;
            //}
	    
	    vadd(newPositions[j],f);
	    
	    //vsub2(f, newPositions[j], positions[j]);
	    //ff = vdot(f, f);
	    //FoundKE += atom[j].energ * ff;
	}
	
	// Jigs are executed in the following order: motors,
	// thermostats, grounds, measurements.  Motions from each
	// motor are added together, then thermostats operate on the
	// motor output.  Grounds override anything that moves atoms.
	// Measurements happen after all things that could affect
	// positions, including grounds.

        // motors
	for (j=0;j<part->num_jigs;j++) {	/* for each jig */
	    jig = part->jigs[j];
	    
	    if (jig->type == RotaryMotor) {
		jigMotor(jig, deltaTframe, positions, newPositions, force);
            }
            // LinearMotor handled in preforce above
	}

        // thermostats
	for (j=0;j<part->num_jigs;j++) {	/* for each jig */
	    jig = part->jigs[j];
	    
	    if (jig->type == Thermostat) {
		jigThermostat(jig, deltaTframe, positions, newPositions);
	    }
	}

        // grounds
	for (j=0;j<part->num_jigs;j++) {	/* for each jig */
	    jig = part->jigs[j];
	    
	    if (jig->type == Ground) {
		jigGround(jig, deltaTframe, positions, newPositions, force);
            }
	}

        // measurements
	for (j=0;j<part->num_jigs;j++) {	/* for each jig */
	    jig = part->jigs[j];
	    
	    switch (jig->type) {
	    case Thermometer:
		jigThermometer(jig, deltaTframe, positions, newPositions);
		break;
	    case DihedralMeter:
		jigDihedral(jig, newPositions);
		break;
	    case AngleMeter:
		jigAngle(jig, newPositions);
		break;
	    case RadiusMeter:
		jigRadius(jig, newPositions);
		break;
            default:
		break;
	    }
	}
	for (j=0; j<part->num_atoms; j++) {
	    vadd(averagePositions[j],newPositions[j]);
	}
	
	tmp=oldPositions; oldPositions=positions; positions=newPositions; newPositions=tmp;
        if (ExcessiveEnergyWarningThisFrame > 0) {
            ExcessiveEnergyWarning = 1;
        }
    }
    
    for (j=0; j<part->num_atoms; j++) {
	vmulc(averagePositions[j],deltaTframe);
    }
    *pOldPositions = oldPositions;
    *pNewPositions = newPositions;
    *pPositions = positions;
}
示例#24
0
static void
cpArbiterApplyImpulse_NEON(cpArbiter *arb)
{
    cpBody *a = arb->body_a;
    cpBody *b = arb->body_b;
    cpFloatx2_t surface_vr = vld((cpFloat_t *)&arb->surface_vr);
    cpFloatx2_t n = vld((cpFloat_t *)&arb->n);
    cpFloat_t friction = arb->u;

    int numContacts = arb->count;
    struct cpContact *contacts = arb->contacts;
    for(int i=0; i<numContacts; i++) {
        struct cpContact *con = contacts + i;
        cpFloatx2_t r1 = vld((cpFloat_t *)&con->r1);
        cpFloatx2_t r2 = vld((cpFloat_t *)&con->r2);

        cpFloatx2_t perp = vmake(-1.0, 1.0);
        cpFloatx2_t r1p = vmul(vrev(r1), perp);
        cpFloatx2_t r2p = vmul(vrev(r2), perp);

        cpFloatx2_t vBias_a = vld((cpFloat_t *)&a->v_bias);
        cpFloatx2_t vBias_b = vld((cpFloat_t *)&b->v_bias);
        cpFloatx2_t wBias = vmake(a->w_bias, b->w_bias);

        cpFloatx2_t vb1 = vadd(vBias_a, vmul_n(r1p, vget_lane(wBias, 0)));
        cpFloatx2_t vb2 = vadd(vBias_b, vmul_n(r2p, vget_lane(wBias, 1)));
        cpFloatx2_t vbr = vsub(vb2, vb1);

        cpFloatx2_t v_a = vld((cpFloat_t *)&a->v);
        cpFloatx2_t v_b = vld((cpFloat_t *)&b->v);
        cpFloatx2_t w = vmake(a->w, b->w);
        cpFloatx2_t v1 = vadd(v_a, vmul_n(r1p, vget_lane(w, 0)));
        cpFloatx2_t v2 = vadd(v_b, vmul_n(r2p, vget_lane(w, 1)));
        cpFloatx2_t vr = vsub(v2, v1);

        cpFloatx2_t vbn_vrn = vpadd(vmul(vbr, n), vmul(vr, n));

        cpFloatx2_t v_offset = vmake(con->bias, -con->bounce);
        cpFloatx2_t jOld = vmake(con->jBias, con->jnAcc);
        cpFloatx2_t jbn_jn = vmul_n(vsub(v_offset, vbn_vrn), con->nMass);
        jbn_jn = vmax(vadd(jOld, jbn_jn), vdup_n(0.0));
        cpFloatx2_t jApply = vsub(jbn_jn, jOld);

        cpFloatx2_t t = vmul(vrev(n), perp);
        cpFloatx2_t vrt_tmp = vmul(vadd(vr, surface_vr), t);
        cpFloatx2_t vrt = vpadd(vrt_tmp, vrt_tmp);

        cpFloatx2_t jtOld = {};
        jtOld = vset_lane(con->jtAcc, jtOld, 0);
        cpFloatx2_t jtMax = vrev(vmul_n(jbn_jn, friction));
        cpFloatx2_t jt = vmul_n(vrt, -con->tMass);
        jt = vmax(vneg(jtMax), vmin(vadd(jtOld, jt), jtMax));
        cpFloatx2_t jtApply = vsub(jt, jtOld);

        cpFloatx2_t i_inv = vmake(-a->i_inv, b->i_inv);
        cpFloatx2_t nperp = vmake(1.0, -1.0);

        cpFloatx2_t jBias = vmul_n(n, vget_lane(jApply, 0));
        cpFloatx2_t jBiasCross = vmul(vrev(jBias), nperp);
        cpFloatx2_t biasCrosses = vpadd(vmul(r1, jBiasCross), vmul(r2, jBiasCross));
        wBias = vadd(wBias, vmul(i_inv, biasCrosses));

        vBias_a = vsub(vBias_a, vmul_n(jBias, a->m_inv));
        vBias_b = vadd(vBias_b, vmul_n(jBias, b->m_inv));

        cpFloatx2_t j = vadd(vmul_n(n, vget_lane(jApply, 1)), vmul_n(t, vget_lane(jtApply, 0)));
        cpFloatx2_t jCross = vmul(vrev(j), nperp);
        cpFloatx2_t crosses = vpadd(vmul(r1, jCross), vmul(r2, jCross));
        w = vadd(w, vmul(i_inv, crosses));

        v_a = vsub(v_a, vmul_n(j, a->m_inv));
        v_b = vadd(v_b, vmul_n(j, b->m_inv));

        // TODO would moving these earlier help pipeline them better?
        vst((cpFloat_t *)&a->v_bias, vBias_a);
        vst((cpFloat_t *)&b->v_bias, vBias_b);
        vst_lane((cpFloat_t *)&a->w_bias, wBias, 0);
        vst_lane((cpFloat_t *)&b->w_bias, wBias, 1);

        vst((cpFloat_t *)&a->v, v_a);
        vst((cpFloat_t *)&b->v, v_b);
        vst_lane((cpFloat_t *)&a->w, w, 0);
        vst_lane((cpFloat_t *)&b->w, w, 1);

        vst_lane((cpFloat_t *)&con->jBias, jbn_jn, 0);
        vst_lane((cpFloat_t *)&con->jnAcc, jbn_jn, 1);
        vst_lane((cpFloat_t *)&con->jtAcc, jt, 0);
    }
}
示例#25
0
int main(void)
{
    std::vector<float> h_a(LENGTH);                // a vector 
    std::vector<float> h_b(LENGTH);                // b vector 	
    std::vector<float> h_c(LENGTH, 0xdeadbeef);    // c = a + b, from compute device

    cl::Buffer d_a;                        // device memory used for the input  a vector
    cl::Buffer d_b;                        // device memory used for the input  b vector
    cl::Buffer d_c;                       // device memory used for the output c vector

    // Fill vectors a and b with random float values
    int count = LENGTH;
    for(int i = 0; i < count; i++)
    {
        h_a[i]  = rand() / (float)RAND_MAX;
        h_b[i]  = rand() / (float)RAND_MAX;
    }

    try 
    {
    	// Create a context
        cl::Context context(DEVICE);

        // Load in kernel source, creating a program object for the context

        cl::Program program(context, util::loadProgram("vadd.cl"), true);

        // Get the command queue
        cl::CommandQueue queue(context);

        // Create the kernel functor
 
        cl::make_kernel<cl::Buffer, cl::Buffer, cl::Buffer, int> vadd(program, "vadd");

        d_a   = cl::Buffer(context, h_a.begin(), h_a.end(), true);
        d_b   = cl::Buffer(context, h_b.begin(), h_b.end(), true);

        d_c  = cl::Buffer(context, CL_MEM_WRITE_ONLY, sizeof(float) * LENGTH);

        util::Timer timer;

        vadd(
            cl::EnqueueArgs(
                queue,
                cl::NDRange(count)), 
            d_a,
            d_b,
            d_c,
            count);

        queue.finish();

        double rtime = static_cast<double>(timer.getTimeMilliseconds()) / 1000.0;
        printf("\nThe kernels ran in %lf seconds\n", rtime);

        cl::copy(queue, d_c, h_c.begin(), h_c.end());

        // Test the results
        int correct = 0;
        float tmp;
        for(int i = 0; i < count; i++) {
            tmp = h_a[i] + h_b[i]; // expected value for d_c[i]
            tmp -= h_c[i];                      // compute errors
            if(tmp*tmp < TOL*TOL) {      // correct if square deviation is less 
                correct++;                         //  than tolerance squared
            }
            else {

                printf(
                    " tmp %f h_a %f h_b %f  h_c %f \n",
                    tmp, 
                    h_a[i], 
                    h_b[i], 
                    h_c[i]);
            }
        }

        // summarize results
        printf(
            "vector add to find C = A+B:  %d out of %d results were correct.\n", 
            correct, 
            count);
    }
    catch (cl::Error err) {
        std::cout << "Exception\n";
        std::cerr 
            << "ERROR: "
            << err.what()
            << "("
            << err_code(err.err())
           << ")"
           << std::endl;
    }
}
示例#26
0
文件: siv.c 项目: medsec/riv
static inline void counter_mode(riv_context_t* ctx, 
                                __m128i iv, 
                                __m128i* plaintext, 
                                uint64_t len, 
                                __m128i* ciphertext)
{
    __m128i ctr = zero;
    __m128i states[8];
    unsigned int i, k, num_blocks, num_chunks, lastblock, remaining_blocks;

    num_blocks = len / BLOCKLEN;   // len / 16
    lastblock = len % BLOCKLEN; // len mod 16

    if (lastblock != 0) {
        num_blocks++;
    }

    num_chunks = num_blocks >> 3;
    remaining_blocks = num_blocks % 8;

    iv  = vxor(iv, ctx->expanced_enc_key[0]);
    k = 0;

    for(i = 0; i != num_chunks; i++) {
        states[0] = vxor(ctr,iv); ctr = vadd(ctr,one);
        states[1] = vxor(ctr,iv); ctr = vadd(ctr,one);
        states[2] = vxor(ctr,iv); ctr = vadd(ctr,one);
        states[3] = vxor(ctr,iv); ctr = vadd(ctr,one);
        states[4] = vxor(ctr,iv); ctr = vadd(ctr,one);
        states[5] = vxor(ctr,iv); ctr = vadd(ctr,one);
        states[6] = vxor(ctr,iv); ctr = vadd(ctr,one);
        states[7] = vxor(ctr,iv); ctr = vadd(ctr,one);

        aes_eight(states, ctx->expanced_enc_key);

        xor_eight(ciphertext, states, plaintext, k);
        k += 8;
    }

    if (remaining_blocks != 0) {
        k = num_chunks * 8; // position
        ciphertext += k;
        plaintext += k;
        
        for(i = 0; i < remaining_blocks; i++) {
            states[i] = vxor(ctr, iv); ctr = vadd(ctr, one);
        }
        
        aes_encrypt_n(states, remaining_blocks, ctx->expanced_enc_key);
        
        for (i = 0; i < remaining_blocks-1; i++) {
            ciphertext[i] = vxor(states[i], plaintext[i]);
        }
        
        if (lastblock == 0) { // Last block is full
            ciphertext[i] = vxor(states[i], plaintext[i]);
        } else {
            store_partial(ciphertext+i, 
                vxor(
                    load_partial((const void*)(plaintext+i), lastblock), 
                    states[i]
                ), lastblock
            );
        }
    }
}
示例#27
0
文件: build.c 项目: rosrad/go-rep
// install installs the library, package, or binary associated with dir,
// which is relative to $GOROOT/src.
static void
install(char *dir)
{
	char *name, *p, *elem, *prefix, *exe;
	bool islib, ispkg, isgo, stale, ispackcmd;
	Buf b, b1, path;
	Vec compile, files, link, go, missing, clean, lib, extra;
	Time ttarg, t;
	int i, j, k, n, doclean, targ;

	if(vflag) {
		if(!streq(goos, gohostos) || !streq(goarch, gohostarch))
			errprintf("%s (%s/%s)\n", dir, goos, goarch);
		else
			errprintf("%s\n", dir);
	}

	binit(&b);
	binit(&b1);
	binit(&path);
	vinit(&compile);
	vinit(&files);
	vinit(&link);
	vinit(&go);
	vinit(&missing);
	vinit(&clean);
	vinit(&lib);
	vinit(&extra);


	// path = full path to dir.
	bpathf(&path, "%s/src/%s", goroot, dir);
	name = lastelem(dir);

	// For misc/prof, copy into the tool directory and we're done.
	if(hasprefix(dir, "misc/")) {
		copy(bpathf(&b, "%s/%s", tooldir, name),
			bpathf(&b1, "%s/misc/%s", goroot, name), 1);
		goto out;
	}

	// For release, cmd/prof is not included.
	if((streq(dir, "cmd/prof")) && !isdir(bstr(&path))) {
		if(vflag > 1)
			errprintf("skipping %s - does not exist\n", dir);
		goto out;
	}

	// set up gcc command line on first run.
	if(gccargs.len == 0) {
		bprintf(&b, "%s %s", defaultcc, defaultcflags);
		splitfields(&gccargs, bstr(&b));
		for(i=0; i<nelem(proto_gccargs); i++)
			vadd(&gccargs, proto_gccargs[i]);
		if(defaultcflags[0] == '\0') {
			for(i=0; i<nelem(proto_gccargs2); i++)
				vadd(&gccargs, proto_gccargs2[i]);
		}
		if(contains(gccargs.p[0], "clang")) {
			// disable ASCII art in clang errors, if possible
			vadd(&gccargs, "-fno-caret-diagnostics");
			// clang is too smart about unused command-line arguments
			vadd(&gccargs, "-Qunused-arguments");
		}
		// disable word wrapping in error messages
		vadd(&gccargs, "-fmessage-length=0");
		if(streq(gohostos, "darwin")) {
			// golang.org/issue/5261
			vadd(&gccargs, "-mmacosx-version-min=10.6");
		}
	}
	if(ldargs.len == 0 && defaultldflags[0] != '\0') {
		bprintf(&b, "%s", defaultldflags);
		splitfields(&ldargs, bstr(&b));
	}

	islib = hasprefix(dir, "lib") || streq(dir, "cmd/cc") || streq(dir, "cmd/gc");
	ispkg = hasprefix(dir, "pkg");
	isgo = ispkg || streq(dir, "cmd/go") || streq(dir, "cmd/cgo");

	exe = "";
	if(streq(gohostos, "windows"))
		exe = ".exe";

	// Start final link command line.
	// Note: code below knows that link.p[targ] is the target.
	ispackcmd = 0;
	if(islib) {
		// C library.
		vadd(&link, "ar");
		if(streq(gohostos, "plan9"))
			vadd(&link, "rc");
		else
			vadd(&link, "rsc");
		prefix = "";
		if(!hasprefix(name, "lib"))
			prefix = "lib";
		targ = link.len;
		vadd(&link, bpathf(&b, "%s/pkg/obj/%s_%s/%s%s.a", goroot, gohostos, gohostarch, prefix, name));
	} else if(ispkg) {
		// Go library (package).
		ispackcmd = 1;
		vadd(&link, "pack"); // program name - unused here, but all the other cases record one
		p = bprintf(&b, "%s/pkg/%s_%s/%s", goroot, goos, goarch, dir+4);
		*xstrrchr(p, '/') = '\0';
		xmkdirall(p);
		targ = link.len;
		vadd(&link, bpathf(&b, "%s/pkg/%s_%s/%s.a", goroot, goos, goarch, dir+4));
	} else if(streq(dir, "cmd/go") || streq(dir, "cmd/cgo")) {
		// Go command.
		vadd(&link, bpathf(&b, "%s/%sl", tooldir, gochar));
		vadd(&link, "-o");
		elem = name;
		if(streq(elem, "go"))
			elem = "go_bootstrap";
		targ = link.len;
		vadd(&link, bpathf(&b, "%s/%s%s", tooldir, elem, exe));
	} else {
		// C command. Use gccargs and ldargs.
		if(streq(gohostos, "plan9")) {
			vadd(&link, bprintf(&b, "%sl", gohostchar));
			vadd(&link, "-o");
			targ = link.len;
			vadd(&link, bpathf(&b, "%s/%s", tooldir, name));
		} else {
			vcopy(&link, gccargs.p, gccargs.len);
			vcopy(&link, ldargs.p, ldargs.len);
			if(sflag)
				vadd(&link, "-static");
			vadd(&link, "-o");
			targ = link.len;
			vadd(&link, bpathf(&b, "%s/%s%s", tooldir, name, exe));
			if(streq(gohostarch, "amd64"))
				vadd(&link, "-m64");
			else if(streq(gohostarch, "386"))
				vadd(&link, "-m32");
		}
	}
	ttarg = mtime(link.p[targ]);

	// Gather files that are sources for this target.
	// Everything in that directory, and any target-specific
	// additions.
	xreaddir(&files, bstr(&path));

	// Remove files beginning with . or _,
	// which are likely to be editor temporary files.
	// This is the same heuristic build.ScanDir uses.
	// There do exist real C files beginning with _,
	// so limit that check to just Go files.
	n = 0;
	for(i=0; i<files.len; i++) {
		p = files.p[i];
		if(hasprefix(p, ".") || (hasprefix(p, "_") && hassuffix(p, ".go")))
			xfree(p);
		else
			files.p[n++] = p;
	}
	files.len = n;

	for(i=0; i<nelem(deptab); i++) {
		if(streq(dir, deptab[i].prefix) ||
		   (hassuffix(deptab[i].prefix, "/") && hasprefix(dir, deptab[i].prefix))) {
			for(j=0; (p=deptab[i].dep[j])!=nil; j++) {
				breset(&b1);
				bwritestr(&b1, p);
				bsubst(&b1, "$GOROOT", goroot);
				bsubst(&b1, "$GOOS", goos);
				bsubst(&b1, "$GOARCH", goarch);
				p = bstr(&b1);
				if(hassuffix(p, ".a")) {
					vadd(&lib, bpathf(&b, "%s", p));
					continue;
				}
				if(hassuffix(p, "/*")) {
					bpathf(&b, "%s/%s", bstr(&path), p);
					b.len -= 2;
					xreaddir(&extra, bstr(&b));
					bprintf(&b, "%s", p);
					b.len -= 2;
					for(k=0; k<extra.len; k++)
						vadd(&files, bpathf(&b1, "%s/%s", bstr(&b), extra.p[k]));
					continue;
				}
				if(hasprefix(p, "-")) {
					p++;
					n = 0;
					for(k=0; k<files.len; k++) {
						if(hasprefix(files.p[k], p))
							xfree(files.p[k]);
						else
							files.p[n++] = files.p[k];
					}
					files.len = n;
					continue;
				}
				vadd(&files, p);
			}
		}
	}
	vuniq(&files);

	// Convert to absolute paths.
	for(i=0; i<files.len; i++) {
		if(!isabs(files.p[i])) {
			bpathf(&b, "%s/%s", bstr(&path), files.p[i]);
			xfree(files.p[i]);
			files.p[i] = btake(&b);
		}
	}

	// Is the target up-to-date?
	stale = rebuildall;
	n = 0;
	for(i=0; i<files.len; i++) {
		p = files.p[i];
		for(j=0; j<nelem(depsuffix); j++)
			if(hassuffix(p, depsuffix[j]))
				goto ok;
		xfree(files.p[i]);
		continue;
	ok:
		t = mtime(p);
		if(t != 0 && !hassuffix(p, ".a") && !shouldbuild(p, dir)) {
			xfree(files.p[i]);
			continue;
		}
		if(hassuffix(p, ".go"))
			vadd(&go, p);
		if(t > ttarg)
			stale = 1;
		if(t == 0) {
			vadd(&missing, p);
			files.p[n++] = files.p[i];
			continue;
		}
		files.p[n++] = files.p[i];
	}
	files.len = n;

	// If there are no files to compile, we're done.
	if(files.len == 0)
		goto out;
	
	for(i=0; i<lib.len && !stale; i++)
		if(mtime(lib.p[i]) > ttarg)
			stale = 1;

	if(!stale)
		goto out;

	// For package runtime, copy some files into the work space.
	if(streq(dir, "pkg/runtime")) {
		copy(bpathf(&b, "%s/arch_GOARCH.h", workdir),
			bpathf(&b1, "%s/arch_%s.h", bstr(&path), goarch), 0);
		copy(bpathf(&b, "%s/defs_GOOS_GOARCH.h", workdir),
			bpathf(&b1, "%s/defs_%s_%s.h", bstr(&path), goos, goarch), 0);
		p = bpathf(&b1, "%s/signal_%s_%s.h", bstr(&path), goos, goarch);
		if(isfile(p))
			copy(bpathf(&b, "%s/signal_GOOS_GOARCH.h", workdir), p, 0);
		copy(bpathf(&b, "%s/os_GOOS.h", workdir),
			bpathf(&b1, "%s/os_%s.h", bstr(&path), goos), 0);
		copy(bpathf(&b, "%s/signals_GOOS.h", workdir),
			bpathf(&b1, "%s/signals_%s.h", bstr(&path), goos), 0);
	}

	// Generate any missing files; regenerate existing ones.
	for(i=0; i<files.len; i++) {
		p = files.p[i];
		elem = lastelem(p);
		for(j=0; j<nelem(gentab); j++) {
			if(gentab[j].gen == nil)
				continue;
			if(hasprefix(elem, gentab[j].nameprefix)) {
				if(vflag > 1)
					errprintf("generate %s\n", p);
				gentab[j].gen(bstr(&path), p);
				// Do not add generated file to clean list.
				// In pkg/runtime, we want to be able to
				// build the package with the go tool,
				// and it assumes these generated files already
				// exist (it does not know how to build them).
				// The 'clean' command can remove
				// the generated files.
				goto built;
			}
		}
		// Did not rebuild p.
		if(find(p, missing.p, missing.len) >= 0)
			fatal("missing file %s", p);
	built:;
	}

	// One more copy for package runtime.
	// The last batch was required for the generators.
	// This one is generated.
	if(streq(dir, "pkg/runtime")) {
		copy(bpathf(&b, "%s/zasm_GOOS_GOARCH.h", workdir),
			bpathf(&b1, "%s/zasm_%s_%s.h", bstr(&path), goos, goarch), 0);
	}

	// Generate .c files from .goc files.
	if(streq(dir, "pkg/runtime")) {
		for(i=0; i<files.len; i++) {
			p = files.p[i];
			if(!hassuffix(p, ".goc"))
				continue;
			// b = path/zp but with _goos_goarch.c instead of .goc
			bprintf(&b, "%s%sz%s", bstr(&path), slash, lastelem(p));
			b.len -= 4;
			bwritef(&b, "_%s_%s.c", goos, goarch);
			goc2c(p, bstr(&b));
			vadd(&files, bstr(&b));
		}
		vuniq(&files);
	}

	if((!streq(goos, gohostos) || !streq(goarch, gohostarch)) && isgo) {
		// We've generated the right files; the go command can do the build.
		if(vflag > 1)
			errprintf("skip build for cross-compile %s\n", dir);
		goto nobuild;
	}

	// Compile the files.
	for(i=0; i<files.len; i++) {
		if(!hassuffix(files.p[i], ".c") && !hassuffix(files.p[i], ".s"))
			continue;
		name = lastelem(files.p[i]);

		vreset(&compile);
		if(!isgo) {
			// C library or tool.
			if(streq(gohostos, "plan9")) {
				vadd(&compile, bprintf(&b, "%sc", gohostchar));
				vadd(&compile, "-FTVwp");
				vadd(&compile, "-DPLAN9");
				vadd(&compile, "-D__STDC__=1");
				vadd(&compile, "-D__SIZE_TYPE__=ulong"); // for GNU Bison
				vadd(&compile, bpathf(&b, "-I%s/include/plan9", goroot));
				vadd(&compile, bpathf(&b, "-I%s/include/plan9/%s", goroot, gohostarch));
			} else {
				vcopy(&compile, gccargs.p, gccargs.len);
				vadd(&compile, "-c");
				if(streq(gohostarch, "amd64"))
					vadd(&compile, "-m64");
				else if(streq(gohostarch, "386"))
					vadd(&compile, "-m32");
	
				vadd(&compile, "-I");
				vadd(&compile, bpathf(&b, "%s/include", goroot));
			}

			if(streq(dir, "lib9"))
				vadd(&compile, "-DPLAN9PORT");


			vadd(&compile, "-I");
			vadd(&compile, bstr(&path));

			// lib9/goos.c gets the default constants hard-coded.
			if(streq(name, "goos.c")) {
				vadd(&compile, "-D");
				vadd(&compile, bprintf(&b, "GOOS=\"%s\"", goos));
				vadd(&compile, "-D");
				vadd(&compile, bprintf(&b, "GOARCH=\"%s\"", goarch));
				bprintf(&b1, "%s", goroot_final);
				bsubst(&b1, "\\", "\\\\");  // turn into C string
				vadd(&compile, "-D");
				vadd(&compile, bprintf(&b, "GOROOT=\"%s\"", bstr(&b1)));
				vadd(&compile, "-D");
				vadd(&compile, bprintf(&b, "GOVERSION=\"%s\"", goversion));
				vadd(&compile, "-D");
				vadd(&compile, bprintf(&b, "GOARM=\"%s\"", goarm));
				vadd(&compile, "-D");
				vadd(&compile, bprintf(&b, "GO386=\"%s\"", go386));
				vadd(&compile, "-D");
				vadd(&compile, bprintf(&b, "GO_EXTLINK_ENABLED=\"%s\"", goextlinkenabled));
			}

			// gc/lex.c records the GOEXPERIMENT setting used during the build.
			if(streq(name, "lex.c")) {
				xgetenv(&b, "GOEXPERIMENT");
				vadd(&compile, "-D");
				vadd(&compile, bprintf(&b1, "GOEXPERIMENT=\"%s\"", bstr(&b)));
			}
		} else {
			// Supporting files for a Go package.
			if(hassuffix(files.p[i], ".s"))
				vadd(&compile, bpathf(&b, "%s/%sa", tooldir, gochar));
			else {
				vadd(&compile, bpathf(&b, "%s/%sc", tooldir, gochar));
				vadd(&compile, "-F");
				vadd(&compile, "-V");
				vadd(&compile, "-w");
			}
			vadd(&compile, "-I");
			vadd(&compile, workdir);
			vadd(&compile, "-I");
			vadd(&compile, bprintf(&b, "%s/pkg/%s_%s", goroot, goos, goarch));
			vadd(&compile, "-D");
			vadd(&compile, bprintf(&b, "GOOS_%s", goos));
			vadd(&compile, "-D");
			vadd(&compile, bprintf(&b, "GOARCH_%s", goarch));
			vadd(&compile, "-D");
			vadd(&compile, bprintf(&b, "GOOS_GOARCH_%s_%s", goos, goarch));
		}

		bpathf(&b, "%s/%s", workdir, lastelem(files.p[i]));
		doclean = 1;
		if(!isgo && streq(gohostos, "darwin")) {
			// To debug C programs on OS X, it is not enough to say -ggdb
			// on the command line.  You have to leave the object files
			// lying around too.  Leave them in pkg/obj/, which does not
			// get removed when this tool exits.
			bpathf(&b1, "%s/pkg/obj/%s", goroot, dir);
			xmkdirall(bstr(&b1));
			bpathf(&b, "%s/%s", bstr(&b1), lastelem(files.p[i]));
			doclean = 0;
		}

		// Change the last character of the output file (which was c or s).
		if(streq(gohostos, "plan9"))
			b.p[b.len-1] = gohostchar[0];
		else
			b.p[b.len-1] = 'o';
		vadd(&compile, "-o");
		vadd(&compile, bstr(&b));
		vadd(&compile, files.p[i]);
		bgrunv(bstr(&path), CheckExit, &compile);

		vadd(&link, bstr(&b));
		if(doclean)
			vadd(&clean, bstr(&b));
	}
	bgwait();

	if(isgo) {
		// The last loop was compiling individual files.
		// Hand the Go files to the compiler en masse.
		vreset(&compile);
		vadd(&compile, bpathf(&b, "%s/%sg", tooldir, gochar));

		bpathf(&b, "%s/_go_.a", workdir);
		vadd(&compile, "-pack");
		vadd(&compile, "-o");
		vadd(&compile, bstr(&b));
		vadd(&clean, bstr(&b));
		if(!ispackcmd)
			vadd(&link, bstr(&b));

		vadd(&compile, "-p");
		if(hasprefix(dir, "pkg/"))
			vadd(&compile, dir+4);
		else
			vadd(&compile, "main");

		if(streq(dir, "pkg/runtime"))
			vadd(&compile, "-+");

		vcopy(&compile, go.p, go.len);

		runv(nil, bstr(&path), CheckExit, &compile);

		if(ispackcmd) {
			xremove(link.p[targ]);
			dopack(link.p[targ], bstr(&b), &link.p[targ+1], link.len - (targ+1));
			goto nobuild;
		}
	}

	if(!islib && !isgo) {
		// C binaries need the libraries explicitly, and -lm.
		vcopy(&link, lib.p, lib.len);
		if(!streq(gohostos, "plan9"))
			vadd(&link, "-lm");
	}

	// Remove target before writing it.
	xremove(link.p[targ]);

	runv(nil, nil, CheckExit, &link);

nobuild:
	// In package runtime, we install runtime.h and cgocall.h too,
	// for use by cgo compilation.
	if(streq(dir, "pkg/runtime")) {
		copy(bpathf(&b, "%s/pkg/%s_%s/cgocall.h", goroot, goos, goarch),
			bpathf(&b1, "%s/src/pkg/runtime/cgocall.h", goroot), 0);
		copy(bpathf(&b, "%s/pkg/%s_%s/runtime.h", goroot, goos, goarch),
			bpathf(&b1, "%s/src/pkg/runtime/runtime.h", goroot), 0);
	}


out:
	for(i=0; i<clean.len; i++)
		xremove(clean.p[i]);

	bfree(&b);
	bfree(&b1);
	bfree(&path);
	vfree(&compile);
	vfree(&files);
	vfree(&link);
	vfree(&go);
	vfree(&missing);
	vfree(&clean);
	vfree(&lib);
	vfree(&extra);
}
示例#28
0
void
test_add (void)
{
    vadd ();
}
示例#29
0
文件: vector.c 项目: lrr-tum/dbrew
int main(int argc, char* argv[])
{
    double t1, t2, t3, t4, t5;
    double sum1, sum2, sum3, sum4;
    int arg = 1, len = 0, iters = 0, verb = 0, run = 1;
    int do_vcopy = 1, do_vadd = 1, do_vjacobi = 1;
    while(argc>arg) {
        if      (strcmp(argv[arg],"-v")==0)  verb++;
        else if (strcmp(argv[arg],"-vv")==0) verb+=2;
        else if (strcmp(argv[arg],"-n")==0)  run = 0;
        else if (strcmp(argv[arg],"-c")==0)  do_vadd = 0,  do_vjacobi = 0;
        else if (strcmp(argv[arg],"-a")==0)  do_vcopy = 0, do_vjacobi = 0;
        else if (strcmp(argv[arg],"-j")==0)  do_vcopy = 0, do_vadd = 0;
        else
            break;
        arg++;
    }
    if (argc>arg) { len   = atoi(argv[arg]); arg++; }
    if (argc>arg) { iters = atoi(argv[arg]); arg++; }
    if (len == 0) len = 10000;
    if (iters == 0) iters = 20;
    len = len * 1000;

    printf("Alloc/init 3 double arrays of length %d ...\n", len);
    double* a = (double*) malloc(len * sizeof(double));
    double* b = (double*) malloc(len * sizeof(double));
    double* c = (double*) malloc(len * sizeof(double));
    for(int i = 0; i<len; i++) {
        a[i] = 1.0;
        b[i] = (double) (i % 20);
        c[i] = 3.0;
    }

    // Generate vectorized variants & run against naive/original

#if __AVX__
    bool do32 = true;
#else
    bool do32 = false;
#endif

    // vcopy

    if (do_vcopy) {
        vcopy_t vcopy16, vcopy32;

        Rewriter* rc16 = dbrew_new();
        if (verb>1) dbrew_verbose(rc16, true, true, true);
        dbrew_set_function(rc16, (uint64_t) vcopy);
        dbrew_config_parcount(rc16, 3);
        dbrew_config_force_unknown(rc16, 0);
        dbrew_set_vectorsize(rc16, 16);
        vcopy16 = (vcopy_t) dbrew_rewrite(rc16, a, b, len);
        if (verb) decode_func(rc16, "vcopy16");

        if (do32) {
            Rewriter* rc32 = dbrew_new();
            if (verb>1) dbrew_verbose(rc32, true, true, true);
            dbrew_set_function(rc32, (uint64_t) vcopy);
            dbrew_config_parcount(rc32, 3);
            dbrew_config_force_unknown(rc32, 0);
            dbrew_set_vectorsize(rc32, 32);
            vcopy32 = (vcopy_t) dbrew_rewrite(rc32, a, b, len);
            if (verb) decode_func(rc32, "vcopy32");
        }

        printf("Running %d iterations of vcopy ...\n", iters);
        t1 = wtime();
        for(int iter = 0; iter < iters; iter++)
            naive_vcopy(a, b, len);
        t2 = wtime();
        for(int iter = 0; iter < iters; iter++)
            vcopy(a, b, len);
        t3 = wtime();
        if (run)
            for(int iter = 0; iter < iters; iter++)
                vcopy16(a, b, len);
        t4 = wtime();
        if (do32 && run)
            for(int iter = 0; iter < iters; iter++)
                vcopy32(a, b, len);
        t5 = wtime();
        printf("  naive: %.3f s, un-rewritten: %.3f s, rewritten-16: %.3f s",
               t2-t1, t3-t2, t4-t3);
        if (do32)
            printf(", rewritten-32: %.3f s", t5-t4);
        printf("\n");
    }


    // vadd

    if (do_vadd) {
        vadd_t vadd16, vadd32;

        Rewriter* ra16 = dbrew_new();
        if (verb>1) dbrew_verbose(ra16, true, true, true);
        dbrew_set_function(ra16, (uint64_t) vadd);
        dbrew_config_parcount(ra16, 4);
        dbrew_config_force_unknown(ra16, 0);
        dbrew_set_vectorsize(ra16, 16);
        vadd16 = (vadd_t) dbrew_rewrite(ra16, a, b, c, len);
        if (verb) decode_func(ra16, "vadd16");

        if (do32) {
            Rewriter* ra32 = dbrew_new();
            if (verb>1) dbrew_verbose(ra32, true, true, true);
            dbrew_set_function(ra32, (uint64_t) vadd);
            dbrew_config_parcount(ra32, 4);
            dbrew_config_force_unknown(ra32, 0);
            dbrew_set_vectorsize(ra32, 32);
            vadd32 = (vadd_t) dbrew_rewrite(ra32, a, b, c, len);
            if (verb) decode_func(ra32, "vadd32");
        }

        sum1 = 0.0, sum2 = 0.0, sum3 = 0.0, sum4 = 0.0;
        printf("Running %d iterations of vadd ...\n", iters);
        t1 = wtime();
        for(int iter = 0; iter < iters; iter++)
            naive_vadd(a, b, c, len);
        for(int i = 0; i < len; i++) sum1 += a[i];
        t2 = wtime();
        for(int iter = 0; iter < iters; iter++)
            vadd(a, b, c, len);
        for(int i = 0; i < len; i++) sum2 += a[i];
        t3 = wtime();
        if (run)
            for(int iter = 0; iter < iters; iter++)
                vadd16(a, b, c, len);
        for(int i = 0; i < len; i++) sum3 += a[i];
        t4 = wtime();
        if (do32 && run)
            for(int iter = 0; iter < iters; iter++)
                vadd32(a, b, c, len);
        for(int i = 0; i < len; i++) sum4 += a[i];
        t5 = wtime();

        printf("  naive: %.3f s, un-rewritten: %.3f s, rewritten-16: %.3f s",
               t2-t1, t3-t2, t4-t3);
        if (do32)
            printf(", rewritten-32: %.3f s", t5-t4);
        printf("\n");
        printf("  sum naive: %f, sum rewritten-16: %f, sum rewritten-16: %f\n",
               sum1, sum3, sum4);
    }


    // vjacobi_1d

    if (do_vjacobi) {
        vcopy_t vjacobi_1d16, vjacobi_1d32;

        Rewriter* rj16 = dbrew_new();
        if (verb>1) dbrew_verbose(rj16, true, true, true);
        dbrew_set_function(rj16, (uint64_t) vjacobi_1d);
        dbrew_config_parcount(rj16, 3);
        dbrew_config_force_unknown(rj16, 0);
        dbrew_set_vectorsize(rj16, 16);
        vjacobi_1d16 = (vcopy_t) dbrew_rewrite(rj16, a, b, len);
        if (verb) decode_func(rj16, "vjacobi_1d16");

        if (do32) {
            Rewriter* rj32 = dbrew_new();
            if (verb>1) dbrew_verbose(rj32, true, true, true);
            dbrew_set_function(rj32, (uint64_t) vjacobi_1d);
            dbrew_config_parcount(rj32, 3);
            dbrew_config_force_unknown(rj32, 0);
            dbrew_set_vectorsize(rj32, 32);
            vjacobi_1d32 = (vcopy_t) dbrew_rewrite(rj32, a, b, len);
            if (verb) decode_func(rj32, "vjacobi_1d32");
        }

        sum1 = 0.0, sum2 = 0.0, sum3 = 0.0, sum4 = 0.0;
        printf("Running %d iterations of vjacobi_1d ...\n", iters);
        t1 = wtime();
        for(int iter = 0; iter < iters; iter++)
            naive_vjacobi_1d(a+1, b+1, len-2);
        for(int i = 0; i < len; i++) sum1 += a[i];
        t2 = wtime();
        for(int iter = 0; iter < iters; iter++)
            vjacobi_1d(a+1, b+1, len-2);
        for(int i = 0; i < len; i++) sum2 += a[i];
        t3 = wtime();
        if (run)
            for(int iter = 0; iter < iters; iter++)
                vjacobi_1d16(a+1, b+1, len-2);
        for(int i = 0; i < len; i++) sum3 += a[i];
        t4 = wtime();
        if (do32 && run)
            for(int iter = 0; iter < iters; iter++)
                vjacobi_1d32(a+1, b+1, len-2);
        for(int i = 0; i < len; i++) sum4 += a[i];
        t5 = wtime();
        printf("  naive: %.3f s, un-rewritten: %.3f s, rewritten-16: %.3f s",
               t2-t1, t3-t2, t4-t3);
        if (do32)
            printf(", rewritten-32: %.3f s", t5-t4);
        printf("\n");
        printf("  sum naive: %f, sum rewritten-16: %f, sum rewritten-16: %f\n",
               sum1, sum3, sum4);
    }
}
示例#30
0
msym_error_t partitionEquivalenceSets(int length, msym_element_t *elements[length], msym_element_t *pelements[length], msym_geometry_t g, int *esl, msym_equivalence_set_t **es, msym_thresholds_t *thresholds) {
    
    int ns = 0, gd = geometryDegenerate(g);
    double *e = calloc(length,sizeof(double));
    double *s = calloc(length,sizeof(double));
    
    int *sp = calloc(length,sizeof(int)); //set partition
    int *ss  = calloc(length,sizeof(int)); //set size
    
    double (*ev)[3] = calloc(length,sizeof(double[3]));
    double (*ep)[3] = calloc(length,sizeof(double[3]));
    
    double (*vec)[3] = calloc(length, sizeof(double[3]));
    double *m = calloc(length, sizeof(double));
    
    for(int i = 0;i < length;i++){
        vcopy(elements[i]->v, vec[i]);
        m[i] = elements[i]->m;
    }

    for(int i=0; i < length; i++){
        for(int j = i+1; j < length;j++){
            double w = m[i]*m[j]/(m[i]+m[j]);
            double dist;
            double v[3];
            double proji[3], projj[3];
            
            vnorm2(vec[i],v);
            vproj_plane(vec[j], v, proji);
            vscale(w, proji, proji);
            vadd(proji,ep[i],ep[i]);
            
            vnorm2(vec[j],v);
            vproj_plane(vec[i], v, projj);
            vscale(w, projj, projj);
            vadd(projj,ep[j],ep[j]);
            
            vsub(vec[j],vec[i],v);
            
            dist = vabs(v);
            
            vscale(w/dist,v,v);
            
            vadd(v,ev[i],ev[i]);
            vsub(ev[j],v,ev[j]);
            
            double dij = w*dist; //This is sqrt(I) for a diatomic molecule along an axis perpendicular to the bond with O at center of mass.
            e[i] += dij;
            e[j] += dij;
            
            s[i] += SQR(dij);
            s[j] += SQR(dij);
        }
        vsub(vec[i],ev[i],ev[i]);
        
    }

    for(int i = 0; i < length; i++){
        
        double v[3];
        double w = m[i]/2.0;
        double dist = vabs(elements[i]->v);
        double dii = w*dist;
        vscale(w,elements[i]->v,v);
        vsub(ev[i],v,ev[i]);
        
        // Plane projection can't really differentiate certain types of structures when we add the initial vector,
        // but not doing so will result in huge cancellation errors on degenerate point groups,
        // also large masses will mess up the eq check when this is 0.
        if(gd) vadd(ep[i],v,ep[i]);
        
        e[i] += dii;
        s[i] += SQR(dii);
    }
    for(int i = 0; i < length; i++){
        if(e[i] >= 0.0){
            sp[i] = i;
            for(int j = i+1; j < length;j++){
                if(e[j] >= 0.0){
                    double vabsevi = vabs(ev[i]), vabsevj = vabs(ev[j]), vabsepi = vabs(ep[i]), vabsepj = vabs(ep[j]);
                    double eep = 0.0, eev = fabs((vabsevi)-(vabsevj))/((vabsevi)+(vabsevj)), ee = fabs((e[i])-(e[j]))/((e[i])+(e[j])), es = fabs((s[i])-(s[j]))/((s[i])+(s[j]));
                    
                    if(!(vabsepi < thresholds->zero && vabsepj < thresholds->zero)){
                        eep = fabs((vabsepi)-(vabsepj))/((vabsepi)+(vabsepj));
                    }
                    
                    double max = fmax(eev,fmax(eep,fmax(ee, es)));
                    
                    if(max < thresholds->equivalence && elements[i]->n == elements[j]->n){
                        e[j] = max > 0.0 ? -max : -1.0;
                        sp[j] = i;
                    }
                }
            }
            e[i] = -1.0;
        }
    }
    
    for(int i = 0; i < length;i++){
        int j = sp[i];
        ns += (ss[j] == 0);
        ss[j]++;
    }

    msym_equivalence_set_t *eqs = calloc(ns,sizeof(msym_equivalence_set_t));
    msym_element_t **lelements = elements;
    msym_element_t **pe = pelements;
    
    if(elements == pelements){
        lelements = malloc(sizeof(msym_element_t *[length]));
        memcpy(lelements, elements, sizeof(msym_element_t *[length]));
    }
    
    for(int i = 0, ni = 0; i < length;i++){
        if(ss[i] > 0){
            int ei = 0;
            eqs[ni].elements = pe;
            eqs[ni].length = ss[i];
            for(int j = 0; j < length;j++){
                if(sp[j] == i){
                    double err = (e[j] > -1.0) ? fabs(e[j]) : 0.0;
                    eqs[ni].err = fmax(eqs[ni].err,err);
                    eqs[ni].elements[ei++] = lelements[j];
                }
            }
            pe += ss[i];
            ni++;
        }
    }

    if(elements == pelements){
        free(lelements);
    }
    free(m);
    free(vec);
    free(s);
    free(e);
    free(sp);
    free(ss);
    free(ev);
    free(ep);
    *es = eqs;
    *esl = ns;
    return MSYM_SUCCESS;
}