// Add a log entry OsStatus OsSysLog::add(const char* taskName, const int taskId, const OsSysLogFacility facility, const OsSysLogPriority priority, const char* format, ...) { OsStatus rc = OS_UNSPECIFIED; // If the log has not been initialized, print everything if (isTaskPtrNull()) { // Convert the variable arguments into a single string UtlString data ; va_list ap; va_start(ap, format); myvsprintf(data, format, ap) ; data = escape(data) ; va_end(ap); // Display all of the data osPrintf("%s %s %s 0x%08X %s\n", OsSysLog::sFacilityNames[facility], OsSysLog::sPriorityNames[priority], (taskName == NULL) ? "" : taskName, taskId, data.data()) ; rc = OS_SUCCESS ; } // Otherwise make sure we want to handle the log entry before we process // the variable arguments. else { if (willLog(facility, priority)) { va_list ap; va_start(ap, format); rc = vadd(taskName, taskId, facility, priority, format, ap); va_end(ap); } } return rc; }
void setup_camera_plane(t_env *e) { t_vector n; t_vector c; double w; double h; h = 18.0 * ARBITRARY_NUMBER / 35.0; w = h * (double)e->x / (double)e->y; n = vunit(vsub(e->camera.loc, e->camera.dir)); e->camera.u = vunit(vcross(e->camera.up, n)); e->camera.v = vunit(vcross(n, e->camera.u)); c = vsub(e->camera.loc, vmult(n, ARBITRARY_NUMBER)); e->camera.l = vadd(vsub(c, vmult(e->camera.u, w / 2.0)), vmult(e->camera.v, h / 2.0)); e->camera.stepx = w / (double)e->x; e->camera.stepy = h / (double)e->y; }
// Add a log entry OsStatus OsSysLog::add(const OsSysLogFacility facility, const OsSysLogPriority priority, const char* format, ...) { OsStatus rc = OS_UNSPECIFIED; // If the log has not been initialized, print everything if (!isTaskPtrNull()) { if (willLog(facility, priority)) { UtlString taskName ; OsTaskId_t taskId = 0 ; va_list ap; va_start(ap, format); OsTaskBase* pBase = OsTask::getCurrentTask() ; if (pBase != NULL) { taskName = pBase->getName() ; pBase->id(taskId) ; } else { // TODO: should get abstracted into a OsTaskBase method #ifdef __pingtel_on_posix__ OsTaskLinux::getCurrentTaskId(taskId ); #endif taskName = "Anon"; // OsTask::getIdString_d(taskName, taskId); } rc = vadd(taskName.data(), taskId, facility, priority, format, ap); va_end(ap); } } else rc = OS_SUCCESS ; return rc; }
void KX_ObstacleSimulation::UpdateObstacles() { for (size_t i=0; i<m_obstacles.size(); i++) { if (m_obstacles[i]->m_type==KX_OBSTACLE_NAV_MESH || m_obstacles[i]->m_shape==KX_OBSTACLE_SEGMENT) continue; KX_Obstacle* obs = m_obstacles[i]; obs->m_pos = obs->m_gameObj->NodeGetWorldPosition(); obs->vel[0] = obs->m_gameObj->GetLinearVelocity().x(); obs->vel[1] = obs->m_gameObj->GetLinearVelocity().y(); // Update velocity history and calculate perceived (average) velocity. vcpy(&obs->hvel[obs->hhead*2], obs->vel); obs->hhead = (obs->hhead+1) % VEL_HIST_SIZE; vset(obs->pvel,0,0); for (int j = 0; j < VEL_HIST_SIZE; ++j) vadd(obs->pvel, obs->pvel, &obs->hvel[j*2]); vscale(obs->pvel, obs->pvel, 1.0f/VEL_HIST_SIZE); } }
// Modified by Rick //bool intersect(dynent *d, vec &from, vec &to) // if lineseg hits entity bounding box bool intersect(dynent *d, vec &from, vec &to, vec *end) // if lineseg hits entity bounding box { vec v = to, w = d->o, *p; vsub(v, from); vsub(w, from); float c1 = dotprod(w, v); if(c1<=0) p = &from; else { float c2 = dotprod(v, v); if(c2<=c1) p = &to; else { float f = c1/c2; vmul(v, f); vadd(v, from); p = &v; }; }; /* Modified by Rick return p->x <= d->o.x+d->radius && p->x >= d->o.x-d->radius && p->y <= d->o.y+d->radius && p->y >= d->o.y-d->radius && p->z <= d->o.z+d->aboveeye && p->z >= d->o.z-d->eyeheight;*/ if( p->x <= d->o.x+d->radius && p->x >= d->o.x-d->radius && p->y <= d->o.y+d->radius && p->y >= d->o.y-d->radius && p->z <= d->o.z+d->aboveeye && p->z >= d->o.z-d->eyeheight) { if (end) *end = *p; return true; } return false; };
NumList openCLMath(NumList vec1, NumList vec2, const std::string& operation) { // Discover OpenCl platforms available on host std::vector<cl::Platform> platformList; cl::Platform::get(&platformList); // Pick the first platform and query its GPU devices std::vector<cl::Device> deviceList; platformList[1].getDevices(CL_DEVICE_TYPE_GPU, &deviceList); // Create a context for the devices and use the first device to create // a command-queue. cl::Context context = cl::Context(deviceList); cl::CommandQueue queue = cl::CommandQueue(context, deviceList[0]); // Build and get kernel for the devices cl::Kernel kernel = getKernel(deviceList, context, kernelStart + operation + kernelEnd, "vadd"); // Build a kernel functor cl::KernelFunctor vadd(kernel, queue, cl::NullRange, cl::NDRange(vec1.size()), cl::NullRange); // Run the kernel and obtain results return mathViaOpenCL(context, queue, kernel, vadd, vec1, vec2); }
static void set_refract_ray_prim(t_env *e, t_env *refract) { t_vector n; refract->ray.loc = vadd(e->ray.loc, vmult(e->ray.dir, e->t)); n = get_normal(e, refract->ray.loc); if (refract->flags & RAY_INSIDE) { n = vunit(vsub((t_vector){0.0, 0.0, 0.0}, n)); if (refract_prim(e, refract, n)) refract->flags &= ~RAY_INSIDE; else set_reflect_ray(e, refract); } else { if (refract_prim(e, refract, n)) refract->flags |= RAY_INSIDE; else set_reflect_ray(e, refract); } }
void vOut_next_a(IOUnit *unit, int inNumSamples) { //Print("Out_next_a %d\n", unit->mNumInputs); World *world = unit->mWorld; int bufLength = world->mBufLength; int numChannels = unit->mNumInputs - 1; float fbusChannel = ZIN0(0); if (fbusChannel != unit->m_fbusChannel) { unit->m_fbusChannel = fbusChannel; int busChannel = (int)fbusChannel; int lastChannel = busChannel + numChannels; if (!(busChannel < 0 || lastChannel > (int)world->mNumAudioBusChannels)) { unit->m_bus = world->mAudioBus + (busChannel * bufLength); unit->m_busTouched = world->mAudioBusTouched + busChannel; } } float *out = unit->m_bus; int32 *touched = unit->m_busTouched; int32 bufCounter = unit->mWorld->mBufCounter; for (int i=0; i<numChannels; ++i, out+=bufLength) { ACQUIRE_BUS_AUDIO((int32)fbusChannel + i); float *in = IN(i+1); if (touched[i] == bufCounter) { vadd(out, out, in, inNumSamples); } else { vcopy(out, in, inNumSamples); touched[i] = bufCounter; } //Print("out %d %g %g\n", i, in[0], out[0]); RELEASE_BUS_AUDIO((int32)fbusChannel + i); } }
float *calc_circle(float center[3],double radius,int divisions) { float *points = (float *)mem_malloc(sizeof(float)*divisions*3); int i; int j=0; double delta = (double)((double)(_PI * 2) / divisions); for (i=0;i<divisions;i++) { float r[3]; float result[3]; vset(r,cos(i*delta),sin(i*delta),0); vmul(r,(float)radius); vadd(result,center,r); points[j]=result[0]; points[j+1]=result[1]; points[j+2]=result[2]; j+=3; } return points; }
int intersect_disk(t_ray *r, t_prim *o, double *t) { t_vector point; double denominator; double numerator; double t0; if ((denominator = vdot(r->dir, o->normal)) == 0) return (0); numerator = vdot(o->loc, o->normal) - vdot(r->loc, o->normal); t0 = numerator / denominator; if (t0 > EPSILON) { point = vadd(r->loc, vmult(r->dir, t0)); if (vnormalize(vsub(point, o->loc)) <= o->radius) { *t = t0; return (1); } return (0); } return (0); }
CAMLprim value ml_skin_set_anim (value anim_v) { int i; CAMLparam1 (anim_v); CAMLlocal1 (floats_v); State *s = &glob_state; struct bone *b = s->bones + 1; struct abone *ab = s->abones + 1; for (i = 0; i < s->num_bones; ++i, ++b) { floats_v = Field (anim_v, i); b->aq[0] = Double_field (floats_v, 0); b->aq[1] = Double_field (floats_v, 1); b->aq[2] = Double_field (floats_v, 2); b->aq[3] = Double_field (floats_v, 3); } b = s->bones + 1; for (i = 0; i < s->num_bones; ++i, ++b, ++ab) { float v[4], v1[4], q[4], q1[4]; struct bone *parent = &s->bones[b->parent]; qapply (v, parent->amq, b->v); qcompose (b->amq, b->aq, parent->amq); vadd (b->amv, v, parent->amv); qconjugate (q1, b->mq); qcompose (q, q1, b->amq); qapply (v, q, b->mv); vsub (v1, b->amv, v); q2matrixt (ab->cm, q, v1); } CAMLreturn (Val_unit); }
int sphere_refraction_func(const struct TObject *object, const Ray *ray, const Ray *reflect, const Vector *pt, const Vector *n, Ray *refract, Vector *attenuation) { // 入射角i const Sphere *sp = (const Sphere*)object->priv; double cosi = dot(n, &ray->front) / modulation(n) / modulation(&ray->front); double sini = sqrt(1 - cosi*cosi); double sinr; Vector vin = *n; if (cosi < 0) { // 空气到介质 sinr = sini / sp->refractive; } else { // 介质到空气 sinr = sini * sp->refractive; vin = rmul(n, -1); // 全反射 if (sinr >= 1) return 0; } double r = asin(sinr); Vector left = vcross(vcross(*n, ray->front), *n); normalize(&left); Vector nn = *n; normalize(&nn); refract->front = vadd(vrmul(left, sin(r)), vrmul(nn, cos(r))); refract->pos = *pt; *attenuation = sp->refract_attenuation; return 1; }
// genrun is the generic run implementation. static void genrun(Buf *b, char *dir, int mode, Vec *argv, int wait) { int i, p[2], pid; Buf b1, cmd; char *q; while(nbg >= maxnbg) bgwait1(); binit(&b1); binit(&cmd); if(!isabs(argv->p[0])) { bpathf(&b1, "/bin/%s", argv->p[0]); free(argv->p[0]); argv->p[0] = xstrdup(bstr(&b1)); } // Generate a copy of the command to show in a log. // Substitute $WORK for the work directory. for(i=0; i<argv->len; i++) { if(i > 0) bwritestr(&cmd, " "); q = argv->p[i]; if(workdir != nil && hasprefix(q, workdir)) { bwritestr(&cmd, "$WORK"); q += strlen(workdir); } bwritestr(&cmd, q); } if(vflag > 1) errprintf("%s\n", bstr(&cmd)); if(b != nil) { breset(b); if(pipe(p) < 0) fatal("pipe"); } switch(pid = fork()) { case -1: fatal("fork"); case 0: if(b != nil) { close(0); close(p[0]); dup(p[1], 1); dup(p[1], 2); if(p[1] > 2) close(p[1]); } if(dir != nil) { if(chdir(dir) < 0) { fprint(2, "chdir: %r\n"); _exits("chdir"); } } vadd(argv, nil); exec(argv->p[0], argv->p); fprint(2, "%s\n", bstr(&cmd)); fprint(2, "exec: %r\n"); _exits("exec"); } if(b != nil) { close(p[1]); breadfrom(b, p[0]); close(p[0]); } if(nbg < 0) fatal("bad bookkeeping"); bg[nbg].pid = pid; bg[nbg].mode = mode; bg[nbg].cmd = btake(&cmd); bg[nbg].b = b; nbg++; if(wait) bgwait(); bfree(&cmd); bfree(&b1); }
// genrun is the generic run implementation. static void genrun(Buf *b, char *dir, int mode, Vec *argv, int wait) { int i, p[2], pid; Buf cmd; char *q; while(nbg >= maxnbg) bgwait1(); // Generate a copy of the command to show in a log. // Substitute $WORK for the work directory. binit(&cmd); for(i=0; i<argv->len; i++) { if(i > 0) bwritestr(&cmd, " "); q = argv->p[i]; if(workdir != nil && hasprefix(q, workdir)) { bwritestr(&cmd, "$WORK"); q += strlen(workdir); } bwritestr(&cmd, q); } //if(vflag > 1) xprintf("%s\n", bstr(&cmd)); if(b != nil) { breset(b); if(pipe(p) < 0) fatal("pipe: %s", strerror(errno)); } switch(pid = fork()) { case -1: fatal("fork: %s", strerror(errno)); case 0: if(b != nil) { close(0); close(p[0]); dup2(p[1], 1); dup2(p[1], 2); if(p[1] > 2) close(p[1]); } if(dir != nil) { if(chdir(dir) < 0) { fprintf(stderr, "chdir %s: %s\n", dir, strerror(errno)); _exit(1); } } vadd(argv, nil); execvp(argv->p[0], argv->p); fprintf(stderr, "%s\n", bstr(&cmd)); fprintf(stderr, "exec %s: %s\n", argv->p[0], strerror(errno)); _exit(1); } if(b != nil) { close(p[1]); breadfrom(b, p[0]); close(p[0]); } if(nbg < 0) fatal("bad bookkeeping"); bg[nbg].pid = pid; bg[nbg].mode = mode; bg[nbg].cmd = btake(&cmd); bg[nbg].b = b; nbg++; if(wait) bgwait(); bfree(&cmd); }
void HMMExperiment::run() { for (int ifold = 0; ifold < nfolds; ifold++) { folds->get_fold(ifold, tr_set, &tr_size, te_set, &te_size); logprogressfile << "ifold: " << ifold << std::endl; auto start_t = std::time(nullptr); auto start_time = *std::localtime(&start_t); char start_time_str [80]; strftime(start_time_str, 80, "%F %X", &start_time); hmm->init_random_parameters(); // subdivide sequences into worker threads assign_sequences_to_workers(tr_size, tr_set, te_size, te_set); // learn parameters double absdif; EM_test_counter = 0; for(int it = 0; it < EMiterations; it++) { for(HMMWorkerThread & w : workers) w.run_Estep(); for(HMMWorkerThread & w : workers) w.join(); EM_loglik[it] = 0; memset(NU, 0, hs*sizeof(double)); memset(N, 0, hs*hs*sizeof(double)); memset(M, 0, hs*os*sizeof(double)); for(HMMWorkerThread & w : workers) { EM_loglik[it] += w.loglik; vadd(NU, 1, w.NU, 1, NU, 1, hs); vadd(N, 1, w.N, 1, N, 1, hs*hs); vadd(M, 1, w.M, 1, M, 1, hs*os); } hmm->Mstep(N, M, NU, Naux1, Maux, &absdif); auto now = std::time(nullptr); auto now_ = *std::localtime(&now); char now_str [80]; strftime(now_str, 80, "%F %X", &now_); logprogressfile << now_str << " "; logprogressfile << "EM iteration " << it << ", loglik = " << EM_loglik[it] << std::endl; if((testing_strat == TestingStrategy::test_every) || ((testing_strat == TestingStrategy::test_odd) && (it % 2 == 1)) || ((testing_strat == TestingStrategy::test_last) && (it == EMiterations-1))) { if(prediction_type == PredictionType::viterbi){ for(HMMWorkerThread & w : workers) w.run_crossentropy_viterbi(); } else if(prediction_type == PredictionType::posterior) { for(HMMWorkerThread & w : workers) w.run_crossentropy_posterior(); } for(HMMWorkerThread & w : workers) w.join(); double tr_entropy = 0; double te_entropy = 0; int tr_count = 0; int te_count = 0; for(HMMWorkerThread & w : workers) { tr_entropy += w.tr_entropy; tr_count += w.tr_count; te_entropy += w.te_entropy; te_count += w.te_count; } EM_tr_entropy[EM_test_counter] = tr_entropy / tr_count; EM_te_entropy[EM_test_counter] = te_entropy / te_count; EM_test_it[EM_test_counter] = it; now = std::time(nullptr); now_ = *std::localtime(&now); strftime(now_str, 80, "%F %X", &now_); logprogressfile << now_str << " "; logprogressfile << "tr_entropy: " << EM_tr_entropy[EM_test_counter] << ", te_entropy: " << EM_te_entropy[EM_test_counter] << std::endl; EM_test_counter++; } } // hmm->print_parameters(); std::string filename = model_filename(); hmm->save_parameters(filename); // LOG auto end_t = std::time(nullptr); auto end_time = *std::localtime(&end_t); char end_time_str [80]; strftime(end_time_str, 80, "%F %X", &end_time); auto duration = std::difftime(end_t, start_t); logfile << "{" << std::endl; logfile << "\"start_time\": \"" << start_time_str << "\", " << std::endl; logfile << "\"end_time\": \"" << end_time_str << "\", " << std::endl; logfile << "\"duration\": " << duration << ", " << std::endl; logfile << "\"Dataset\": \"" << d->filename << "\", " << std::endl; logfile << "\"ifold\": " << ifold << ", " << std::endl; logfile << "\"nfold\": " << folds->nfolds << ", " << std::endl; logfile << "\"nseq\": " << folds->n << ", " << std::endl; logfile << "\"EMiterations\": " << EMiterations << ", " << std::endl; logfile << "\"EM_loglik\": " << "[" << v_to_str(EMiterations, EM_loglik) << "], " << std::endl; logfile << "\"EM_te_entropy\": " << "[" << v_to_str(EM_test_counter, EM_te_entropy) << "], " << std::endl; logfile << "\"EM_tr_entropy\": " << "[" << v_to_str(EM_test_counter, EM_tr_entropy) << "], " << std::endl; logfile << "\"EM_test_it\": " << "[" << v_to_str(EM_test_counter, EM_test_it) << "], " << std::endl; logfile << "\"hs\": " << hs << ", " << std::endl; logfile << "\"nworkers\": " << nworkers << ", " << std::endl; logfile << "\"entropy\": " << EM_te_entropy[EM_test_counter - 1] << ", " << std::endl; logfile << "\"fn_params\": \"" << filename << "\", " << std::endl; logfile << "}," << std::endl; logfile << std::endl; } // for ifold }
// mkzasm writes zasm_$GOOS_$GOARCH.h, // which contains struct offsets for use by // assembly files. It also writes a copy to the work space // under the name zasm_GOOS_GOARCH.h (no expansion). // void mkzasm(char *dir, char *file) { int i, n; char *aggr, *p; Buf in, b, out; Vec argv, lines, fields; binit(&in); binit(&b); binit(&out); vinit(&argv); vinit(&lines); vinit(&fields); bwritestr(&out, "// auto generated by go tool dist\n\n"); for(i=0; i<nelem(zasmhdr); i++) { if(hasprefix(goarch, zasmhdr[i].goarch) && hasprefix(goos, zasmhdr[i].goos)) { bwritestr(&out, zasmhdr[i].hdr); goto ok; } } fatal("unknown $GOOS/$GOARCH in mkzasm"); ok: // Run 6c -DGOOS_goos -DGOARCH_goarch -Iworkdir -a proc.c // to get acid [sic] output. vreset(&argv); vadd(&argv, bpathf(&b, "%s/%sc", tooldir, gochar)); vadd(&argv, bprintf(&b, "-DGOOS_%s", goos)); vadd(&argv, bprintf(&b, "-DGOARCH_%s", goarch)); vadd(&argv, bprintf(&b, "-I%s", workdir)); vadd(&argv, "-a"); vadd(&argv, "proc.c"); runv(&in, dir, CheckExit, &argv); // Convert input like // aggr G // { // Gobuf 24 sched; // 'Y' 48 stack0; // } // into output like // #define g_sched 24 // #define g_stack0 48 // aggr = nil; splitlines(&lines, bstr(&in)); for(i=0; i<lines.len; i++) { splitfields(&fields, lines.p[i]); if(fields.len == 2 && streq(fields.p[0], "aggr")) { if(streq(fields.p[1], "G")) aggr = "g"; else if(streq(fields.p[1], "M")) aggr = "m"; else if(streq(fields.p[1], "Gobuf")) aggr = "gobuf"; else if(streq(fields.p[1], "WinCall")) aggr = "wincall"; } if(hasprefix(lines.p[i], "}")) aggr = nil; if(aggr && hasprefix(lines.p[i], "\t") && fields.len >= 2) { n = fields.len; p = fields.p[n-1]; if(p[xstrlen(p)-1] == ';') p[xstrlen(p)-1] = '\0'; bwritestr(&out, bprintf(&b, "#define %s_%s %s\n", aggr, fields.p[n-1], fields.p[n-2])); } } // Write both to file and to workdir/zasm_GOOS_GOARCH.h. writefile(&out, file, 0); writefile(&out, bprintf(&b, "%s/zasm_GOOS_GOARCH.h", workdir), 0); bfree(&in); bfree(&b); bfree(&out); vfree(&argv); vfree(&lines); vfree(&fields); }
static void render(unsigned char *img, int comps, int w, int h, int nsubsamples) { int x, y; int u, v; //float *fimg = (float *)malloc(sizeof(float) * w * h * 3); vec *fimg = (vec *)malloc(sizeof(vec) * w * h); memset((void *)fimg, 0, sizeof(vec) * w * h); for (y = 0; y < h; y++) { for (x = 0; x < w; x++) { for (v = 0; v < nsubsamples; v++) { for (u = 0; u < nsubsamples; u++) { float px = (x + (u / (float)nsubsamples) - (w / 2.0)) / (w / 2.0); float py = -(y + (v / (float)nsubsamples) - (h / 2.0)) / (h / 2.0); Ray ray; ray.org.x = 0.0; ray.org.y = 0.0; ray.org.z = 0.0; ray.dir.x = px; ray.dir.y = py; ray.dir.z = -1.0; vnormalize(&(ray.dir)); Isect isect; isect.t = 1.0e+17; isect.hit = 0; ray_sphere_intersect(&isect, &ray, &spheres[0]); ray_sphere_intersect(&isect, &ray, &spheres[1]); ray_sphere_intersect(&isect, &ray, &spheres[2]); ray_plane_intersect (&isect, &ray, &plane); if (isect.hit) { vec col; ambient_occlusion(&col, &isect); vadd(&fimg[y * w + x], fimg[y * w + x], col); /* fimg[y * w + x].x += col.x; fimg[y * w + x].y += col.y; fimg[y * w + x].z += col.z; */ } } } vdivs(&fimg[y * w + x], fimg[y * w + x], (float)(nsubsamples * nsubsamples)); /* fimg[y * w + x].x /= (float)(nsubsamples * nsubsamples); fimg[y * w + x].y /= (float)(nsubsamples * nsubsamples); fimg[y * w + x].z /= (float)(nsubsamples * nsubsamples); */ img[comps * (y * w + x) + 0] = clamp(fimg[y * w + x].x); img[comps * (y * w + x) + 1] = clamp(fimg[y * w + x].y); img[comps * (y * w + x) + 2] = clamp(fimg[y * w + x].z); } } }
// mkzruntimedefs writes zruntime_defs_$GOOS_$GOARCH.h, // which contains Go struct definitions equivalent to the C ones. // Mostly we just write the output of 6c -q to the file. // However, we run it on multiple files, so we have to delete // the duplicated definitions, and we don't care about the funcs // and consts, so we delete those too. // void mkzruntimedefs(char *dir, char *file) { int i, skip; char *p; Buf in, b, out; Vec argv, lines, fields, seen; binit(&in); binit(&b); binit(&out); vinit(&argv); vinit(&lines); vinit(&fields); vinit(&seen); bwritestr(&out, "// auto generated by go tool dist\n" "\n" "package runtime\n" "import \"unsafe\"\n" "var _ unsafe.Pointer\n" "\n" ); // Run 6c -DGOOS_goos -DGOARCH_goarch -Iworkdir -q // on each of the runtimedefs C files. vadd(&argv, bpathf(&b, "%s/%sc", tooldir, gochar)); vadd(&argv, bprintf(&b, "-DGOOS_%s", goos)); vadd(&argv, bprintf(&b, "-DGOARCH_%s", goarch)); vadd(&argv, bprintf(&b, "-I%s", workdir)); vadd(&argv, "-q"); vadd(&argv, ""); p = argv.p[argv.len-1]; for(i=0; i<nelem(runtimedefs); i++) { argv.p[argv.len-1] = runtimedefs[i]; runv(&b, dir, CheckExit, &argv); bwriteb(&in, &b); } argv.p[argv.len-1] = p; // Process the aggregate output. skip = 0; splitlines(&lines, bstr(&in)); for(i=0; i<lines.len; i++) { p = lines.p[i]; // Drop comment, func, and const lines. if(hasprefix(p, "//") || hasprefix(p, "const") || hasprefix(p, "func")) continue; // Note beginning of type or var decl, which can be multiline. // Remove duplicates. The linear check of seen here makes the // whole processing quadratic in aggregate, but there are only // about 100 declarations, so this is okay (and simple). if(hasprefix(p, "type ") || hasprefix(p, "var ")) { splitfields(&fields, p); if(fields.len < 2) continue; if(find(fields.p[1], seen.p, seen.len) >= 0) { if(streq(fields.p[fields.len-1], "{")) skip = 1; // skip until } continue; } vadd(&seen, fields.p[1]); } if(skip) { if(hasprefix(p, "}")) skip = 0; continue; } bwritestr(&out, p); } writefile(&out, file, 0); bfree(&in); bfree(&b); bfree(&out); vfree(&argv); vfree(&lines); vfree(&fields); vfree(&seen); }
void geRK4ApplyForce(ge_RK4State* state, ge_Vector3d vec){ state->force = vadd(2, state->force, vec); }
void qadd(quat *a, quat *b) { a->s += b->s; vadd(a->v, b->v); }
/** * Solves the permuted KKT system and returns the unpermuted search directions. * * On entry, the factorization of the permuted KKT matrix, PKPt, * is assumed to be up to date (call kkt_factor beforehand to achieve this). * The right hand side, Pb, is assumed to be already permuted. * * On exit, the resulting search directions are written into dx, dy and dz, * where these variables are permuted back to the original ordering. * * KKT->nitref iterative refinement steps are applied to solve the linear system. * * Returns the number of iterative refinement steps really taken. */ idxint kkt_solve(kkt* KKT, spmat* A, spmat* G, pfloat* Pb, pfloat* dx, pfloat* dy, pfloat* dz, idxint n, idxint p, idxint m, cone* C, idxint isinit, idxint nitref) { #if CONEMODE == 0 #define MTILDE (m+2*C->nsoc) #else #define MTILDE (m) #endif idxint i, k, l, j, kk, kItRef; #if (defined STATICREG) && (STATICREG > 0) idxint dzoffset; #endif idxint* Pinv = KKT->Pinv; pfloat* Px = KKT->work1; pfloat* dPx = KKT->work2; pfloat* e = KKT->work3; pfloat* Pe = KKT->work4; pfloat* truez = KKT->work5; pfloat* Gdx = KKT->work6; pfloat* ex = e; pfloat* ey = e + n; pfloat* ez = e + n+p; pfloat bnorm = 1.0 + norminf(Pb, n+p+MTILDE); pfloat nex = 0; pfloat ney = 0; pfloat nez = 0; pfloat nerr; pfloat nerr_prev; pfloat error_threshold = bnorm*LINSYSACC; idxint nK = KKT->PKPt->n; /* forward - diagonal - backward solves: Px holds solution */ LDL_lsolve2(nK, Pb, KKT->L->jc, KKT->L->ir, KKT->L->pr, Px ); LDL_dsolve(nK, Px, KKT->D); LDL_ltsolve(nK, Px, KKT->L->jc, KKT->L->ir, KKT->L->pr); #if PRINTLEVEL > 2 if( p > 0 ){ PRINTTEXT("\nIR: it ||ex|| ||ey|| ||ez|| (threshold: %4.2e)\n", error_threshold); PRINTTEXT(" --------------------------------------------------\n"); } else { PRINTTEXT("\nIR: it ||ex|| ||ez|| (threshold: %4.2e)\n", error_threshold); PRINTTEXT(" -----------------------------------------\n"); } #endif /* iterative refinement */ for( kItRef=0; kItRef <= nitref; kItRef++ ){ /* unpermute x & copy into arrays */ unstretch(n, p, C, Pinv, Px, dx, dy, dz); /* compute error term */ k=0; j=0; /* 1. error on dx*/ #if (defined STATICREG) && (STATICREG > 0) /* ex = bx - A'*dy - G'*dz - DELTASTAT*dx */ for( i=0; i<n; i++ ){ ex[i] = Pb[Pinv[k++]] - DELTASTAT*dx[i]; } #else /* ex = bx - A'*dy - G'*dz */ for( i=0; i<n; i++ ){ ex[i] = Pb[Pinv[k++]]; } #endif if(A) sparseMtVm(A, dy, ex, 0, 0); sparseMtVm(G, dz, ex, 0, 0); nex = norminf(ex,n); /* error on dy */ if( p > 0 ){ #if (defined STATICREG) && (STATICREG > 0) /* ey = by - A*dx + DELTASTAT*dy */ for( i=0; i<p; i++ ){ ey[i] = Pb[Pinv[k++]] + DELTASTAT*dy[i]; } #else /* ey = by - A*dx */ for( i=0; i<p; i++ ){ ey[i] = Pb[Pinv[k++]]; } #endif sparseMV(A, dx, ey, -1, 0); ney = norminf(ey,p); } /* --> 3. ez = bz - G*dx + V*dz_true */ kk = 0; j=0; #if (defined STATICREG) && (STATICREG > 0) dzoffset=0; #endif sparseMV(G, dx, Gdx, 1, 1); for( i=0; i<C->lpc->p; i++ ){ #if (defined STATICREG) && (STATICREG > 0) ez[kk++] = Pb[Pinv[k++]] - Gdx[j++] + DELTASTAT*dz[dzoffset++]; #else ez[kk++] = Pb[Pinv[k++]] - Gdx[j++]; #endif } for( l=0; l<C->nsoc; l++ ){ for( i=0; i<C->soc[l].p; i++ ){ #if (defined STATICREG) && (STATICREG > 0) ez[kk++] = i<(C->soc[l].p-1) ? Pb[Pinv[k++]] - Gdx[j++] + DELTASTAT*dz[dzoffset++] : Pb[Pinv[k++]] - Gdx[j++] - DELTASTAT*dz[dzoffset++]; #else ez[kk++] = Pb[Pinv[k++]] - Gdx[j++]; #endif } #if CONEMODE == 0 ez[kk] = 0; ez[kk+1] = 0; k += 2; kk += 2; #endif } for( i=0; i<MTILDE; i++) { truez[i] = Px[Pinv[n+p+i]]; } if( isinit == 0 ){ scale2add(truez, ez, C); } else { vadd(MTILDE, truez, ez); } nez = norminf(ez,MTILDE); #if PRINTLEVEL > 2 if( p > 0 ){ PRINTTEXT(" %2d %3.1e %3.1e %3.1e\n", (int)kItRef, nex, ney, nez); } else { PRINTTEXT(" %2d %3.1e %3.1e\n", (int)kItRef, nex, nez); } #endif /* maximum error (infinity norm of e) */ nerr = MAX( nex, nez); if( p > 0 ){ nerr = MAX( nerr, ney ); } /* CHECK WHETHER REFINEMENT BROUGHT DECREASE - if not undo and quit! */ if( kItRef > 0 && nerr > nerr_prev ){ /* undo refinement */ for( i=0; i<nK; i++ ){ Px[i] -= dPx[i]; } kItRef--; break; } /* CHECK WHETHER TO REFINE AGAIN */ if( kItRef == nitref || ( nerr < error_threshold ) || ( kItRef > 0 && nerr_prev < IRERRFACT*nerr ) ){ break; } nerr_prev = nerr; /* permute */ for( i=0; i<nK; i++) { Pe[Pinv[i]] = e[i]; } /* forward - diagonal - backward solves: dPx holds solution */ LDL_lsolve2(nK, Pe, KKT->L->jc, KKT->L->ir, KKT->L->pr, dPx); LDL_dsolve(nK, dPx, KKT->D); LDL_ltsolve(nK, dPx, KKT->L->jc, KKT->L->ir, KKT->L->pr); /* add refinement to Px */ for( i=0; i<nK; i++ ){ Px[i] += dPx[i]; } } #if PRINTLEVEL > 2 PRINTTEXT("\n"); #endif /* copy solution out into the different arrays, permutation included */ unstretch(n, p, C, Pinv, Px, dx, dy, dz); return kItRef; }
static void simulate(void) { int sh, i, j, pl, pl2, actp; double l; Vec2d v; for(i = 0; i < conf.segmentSteps; ++i) { for(pl = 0; pl < conf.maxPlayers; ++pl) { SimPlayer* p = &(player[pl]); if(p->watch) continue; if(!p->active) continue; for(sh = 0; sh < conf.numShots; ++sh) { SimShot* s = &(p->shot[sh]); SimMissile* m = &(s->missile); if(!m->live) continue; for(j = 0; j < conf.numPlanets; ++j) { v = vsub(planet[j].position, m->position); l = length(v); if (l <= planet[j].radius) { planetHit(s); } v = vdiv(v, l); v = vmul(v, planet[j].mass / (l * l)); v = vdiv(v, conf.segmentSteps); m->speed = vadd(m->speed, v); } v = vdiv(m->speed, conf.segmentSteps); m->position = vadd(m->position, v); for(pl2 = 0; pl2 < conf.maxPlayers; ++pl2) { if(!player[pl2].active) continue; l = distance(player[pl2].position, m->position); if ( (l <= conf.playerDiameter) && (m->leftSource == 1) ) { if(conf.debug & 1) printf("l = %.5f playerDiameter = %.5f missile.x = %.5f missile.y = %.5f player.x = %5f player.y = %5f\n",l,conf.playerDiameter,m->position.x,m->position.y,player[pl2].position.x,player[pl2].position.y); playerHit(s, pl, pl2); } if ( (l > (conf.playerDiameter + 1)) && (pl2 == pl) ) { m->leftSource = 1; } } if ( (m->position.x < -conf.marginleft) || (m->position.x > conf.battlefieldW + conf.marginright) || (m->position.y < -conf.margintop) || (m->position.y > conf.battlefieldH + conf.marginbottom) ) { wallHit(s); } } } } for(pl = 0, actp = 0; pl < conf.maxPlayers; ++pl) actp += player[pl].active; for(pl = 0; pl < conf.maxPlayers; ++pl) { SimPlayer* p = &(player[pl]); if(!p->active) continue; if(p->watch) continue; if(p->timeout) p->timeout--; if(p->valid || actp == 1) p->timeout = conf.timeout; for(sh = 0; sh < conf.numShots; ++sh) { SimShot* s = &(p->shot[sh]); if(!s->missile.live) continue; p->timeout = conf.timeout; player[currentPlayer].timeoutcnt = 0; s->dot[s->length++] = d2f(s->missile.position); if(s->length == conf.maxSegments) { s->missile.live = 0; allSendShotFinished(s); } } } }
void oneDynamicsFrame(struct part *part, int iters, struct xyz *averagePositions, struct xyz **pOldPositions, struct xyz **pNewPositions, struct xyz **pPositions, struct xyz *force) { int j; int loop; double deltaTframe; struct xyz f; struct xyz *tmp; struct jig *jig; struct xyz *oldPositions = *pOldPositions; struct xyz *newPositions = *pNewPositions; struct xyz *positions = *pPositions; // wware 060109 python exception handling NULLPTR(part); NULLPTR(averagePositions); NULLPTR(oldPositions); NULLPTR(newPositions); NULLPTR(positions); iters = max(iters,1); deltaTframe = 1.0/iters; for (j=0; j<part->num_atoms; j++) { vsetc(averagePositions[j],0.0); } // See http://www.nanoengineer-1.net/mediawiki/index.php?title=Verlet_integration // for a discussion of how dynamics is done in the simulator. // we want: // x(t+dt) = 2x(t) - x(t-dt) + A dt^2 // or: // newPositions = 2 * positions - oldPositions + A dt^2 // wware 060110 don't handle Interrupted with the BAIL mechanism for (loop=0; loop < iters && !Interrupted; loop++) { _last_iteration = loop == iters - 1; Iteration++; // wware 060109 python exception handling updateVanDerWaals(part, NULL, positions); BAIL(); calculateGradient(part, positions, force); BAIL(); /* first, for each atom, find non-accelerated new pos */ /* Atom moved from oldPositions to positions last time, now we move it the same amount from positions to newPositions */ for (j=0; j<part->num_atoms; j++) { // f = positions - oldPositions vsub2(f,positions[j],oldPositions[j]); // newPositions = positions + f // or: // newPositions = 2 * positions - oldPositions vadd2(newPositions[j],positions[j],f); // after this, we will need to add A dt^2 to newPositions } // pre-force jigs for (j=0;j<part->num_jigs;j++) { /* for each jig */ jig = part->jigs[j]; // wware 060109 python exception handling NULLPTR(jig); switch (jig->type) { case LinearMotor: jigLinearMotor(jig, positions, newPositions, force, deltaTframe); break; default: break; } } /* convert forces to accelerations, giving new positions */ //FoundKE = 0.0; /* and add up total KE */ for (j=0; j<part->num_atoms; j++) { // to complete Verlet integration, this needs to do: // newPositions += A dt^2 // // force[] is in pN, mass is in g, Dt in seconds, f in pm vmul2c(f,force[j],part->atoms[j]->inverseMass); // inverseMass = Dt*Dt/mass // XXX: 0.15 probably needs a scaling by Dt // 0.15 = deltaX // keMax = m v^2 / 2 // v^2 = 2 keMax / m // v = deltaX / Dt = sqrt(2 keMax / m) // deltaX = Dt sqrt(2 keMax / m) // We probably don't want to do this, because a large raw // velocity isn't a problem, it's just when that creates a // high force between atoms that it becomes a problem. We // check that elsewhere. //if (!ExcessiveEnergyWarning && vlen(f)>0.15) { // 0.15 is just below H flyaway // WARNING3("Excessive force %.6f in iteration %d on atom %d -- further warnings suppressed", vlen(f), Iteration, j+1); // ExcessiveEnergyWarningThisFrame++; //} vadd(newPositions[j],f); //vsub2(f, newPositions[j], positions[j]); //ff = vdot(f, f); //FoundKE += atom[j].energ * ff; } // Jigs are executed in the following order: motors, // thermostats, grounds, measurements. Motions from each // motor are added together, then thermostats operate on the // motor output. Grounds override anything that moves atoms. // Measurements happen after all things that could affect // positions, including grounds. // motors for (j=0;j<part->num_jigs;j++) { /* for each jig */ jig = part->jigs[j]; if (jig->type == RotaryMotor) { jigMotor(jig, deltaTframe, positions, newPositions, force); } // LinearMotor handled in preforce above } // thermostats for (j=0;j<part->num_jigs;j++) { /* for each jig */ jig = part->jigs[j]; if (jig->type == Thermostat) { jigThermostat(jig, deltaTframe, positions, newPositions); } } // grounds for (j=0;j<part->num_jigs;j++) { /* for each jig */ jig = part->jigs[j]; if (jig->type == Ground) { jigGround(jig, deltaTframe, positions, newPositions, force); } } // measurements for (j=0;j<part->num_jigs;j++) { /* for each jig */ jig = part->jigs[j]; switch (jig->type) { case Thermometer: jigThermometer(jig, deltaTframe, positions, newPositions); break; case DihedralMeter: jigDihedral(jig, newPositions); break; case AngleMeter: jigAngle(jig, newPositions); break; case RadiusMeter: jigRadius(jig, newPositions); break; default: break; } } for (j=0; j<part->num_atoms; j++) { vadd(averagePositions[j],newPositions[j]); } tmp=oldPositions; oldPositions=positions; positions=newPositions; newPositions=tmp; if (ExcessiveEnergyWarningThisFrame > 0) { ExcessiveEnergyWarning = 1; } } for (j=0; j<part->num_atoms; j++) { vmulc(averagePositions[j],deltaTframe); } *pOldPositions = oldPositions; *pNewPositions = newPositions; *pPositions = positions; }
static void cpArbiterApplyImpulse_NEON(cpArbiter *arb) { cpBody *a = arb->body_a; cpBody *b = arb->body_b; cpFloatx2_t surface_vr = vld((cpFloat_t *)&arb->surface_vr); cpFloatx2_t n = vld((cpFloat_t *)&arb->n); cpFloat_t friction = arb->u; int numContacts = arb->count; struct cpContact *contacts = arb->contacts; for(int i=0; i<numContacts; i++) { struct cpContact *con = contacts + i; cpFloatx2_t r1 = vld((cpFloat_t *)&con->r1); cpFloatx2_t r2 = vld((cpFloat_t *)&con->r2); cpFloatx2_t perp = vmake(-1.0, 1.0); cpFloatx2_t r1p = vmul(vrev(r1), perp); cpFloatx2_t r2p = vmul(vrev(r2), perp); cpFloatx2_t vBias_a = vld((cpFloat_t *)&a->v_bias); cpFloatx2_t vBias_b = vld((cpFloat_t *)&b->v_bias); cpFloatx2_t wBias = vmake(a->w_bias, b->w_bias); cpFloatx2_t vb1 = vadd(vBias_a, vmul_n(r1p, vget_lane(wBias, 0))); cpFloatx2_t vb2 = vadd(vBias_b, vmul_n(r2p, vget_lane(wBias, 1))); cpFloatx2_t vbr = vsub(vb2, vb1); cpFloatx2_t v_a = vld((cpFloat_t *)&a->v); cpFloatx2_t v_b = vld((cpFloat_t *)&b->v); cpFloatx2_t w = vmake(a->w, b->w); cpFloatx2_t v1 = vadd(v_a, vmul_n(r1p, vget_lane(w, 0))); cpFloatx2_t v2 = vadd(v_b, vmul_n(r2p, vget_lane(w, 1))); cpFloatx2_t vr = vsub(v2, v1); cpFloatx2_t vbn_vrn = vpadd(vmul(vbr, n), vmul(vr, n)); cpFloatx2_t v_offset = vmake(con->bias, -con->bounce); cpFloatx2_t jOld = vmake(con->jBias, con->jnAcc); cpFloatx2_t jbn_jn = vmul_n(vsub(v_offset, vbn_vrn), con->nMass); jbn_jn = vmax(vadd(jOld, jbn_jn), vdup_n(0.0)); cpFloatx2_t jApply = vsub(jbn_jn, jOld); cpFloatx2_t t = vmul(vrev(n), perp); cpFloatx2_t vrt_tmp = vmul(vadd(vr, surface_vr), t); cpFloatx2_t vrt = vpadd(vrt_tmp, vrt_tmp); cpFloatx2_t jtOld = {}; jtOld = vset_lane(con->jtAcc, jtOld, 0); cpFloatx2_t jtMax = vrev(vmul_n(jbn_jn, friction)); cpFloatx2_t jt = vmul_n(vrt, -con->tMass); jt = vmax(vneg(jtMax), vmin(vadd(jtOld, jt), jtMax)); cpFloatx2_t jtApply = vsub(jt, jtOld); cpFloatx2_t i_inv = vmake(-a->i_inv, b->i_inv); cpFloatx2_t nperp = vmake(1.0, -1.0); cpFloatx2_t jBias = vmul_n(n, vget_lane(jApply, 0)); cpFloatx2_t jBiasCross = vmul(vrev(jBias), nperp); cpFloatx2_t biasCrosses = vpadd(vmul(r1, jBiasCross), vmul(r2, jBiasCross)); wBias = vadd(wBias, vmul(i_inv, biasCrosses)); vBias_a = vsub(vBias_a, vmul_n(jBias, a->m_inv)); vBias_b = vadd(vBias_b, vmul_n(jBias, b->m_inv)); cpFloatx2_t j = vadd(vmul_n(n, vget_lane(jApply, 1)), vmul_n(t, vget_lane(jtApply, 0))); cpFloatx2_t jCross = vmul(vrev(j), nperp); cpFloatx2_t crosses = vpadd(vmul(r1, jCross), vmul(r2, jCross)); w = vadd(w, vmul(i_inv, crosses)); v_a = vsub(v_a, vmul_n(j, a->m_inv)); v_b = vadd(v_b, vmul_n(j, b->m_inv)); // TODO would moving these earlier help pipeline them better? vst((cpFloat_t *)&a->v_bias, vBias_a); vst((cpFloat_t *)&b->v_bias, vBias_b); vst_lane((cpFloat_t *)&a->w_bias, wBias, 0); vst_lane((cpFloat_t *)&b->w_bias, wBias, 1); vst((cpFloat_t *)&a->v, v_a); vst((cpFloat_t *)&b->v, v_b); vst_lane((cpFloat_t *)&a->w, w, 0); vst_lane((cpFloat_t *)&b->w, w, 1); vst_lane((cpFloat_t *)&con->jBias, jbn_jn, 0); vst_lane((cpFloat_t *)&con->jnAcc, jbn_jn, 1); vst_lane((cpFloat_t *)&con->jtAcc, jt, 0); } }
int main(void) { std::vector<float> h_a(LENGTH); // a vector std::vector<float> h_b(LENGTH); // b vector std::vector<float> h_c(LENGTH, 0xdeadbeef); // c = a + b, from compute device cl::Buffer d_a; // device memory used for the input a vector cl::Buffer d_b; // device memory used for the input b vector cl::Buffer d_c; // device memory used for the output c vector // Fill vectors a and b with random float values int count = LENGTH; for(int i = 0; i < count; i++) { h_a[i] = rand() / (float)RAND_MAX; h_b[i] = rand() / (float)RAND_MAX; } try { // Create a context cl::Context context(DEVICE); // Load in kernel source, creating a program object for the context cl::Program program(context, util::loadProgram("vadd.cl"), true); // Get the command queue cl::CommandQueue queue(context); // Create the kernel functor cl::make_kernel<cl::Buffer, cl::Buffer, cl::Buffer, int> vadd(program, "vadd"); d_a = cl::Buffer(context, h_a.begin(), h_a.end(), true); d_b = cl::Buffer(context, h_b.begin(), h_b.end(), true); d_c = cl::Buffer(context, CL_MEM_WRITE_ONLY, sizeof(float) * LENGTH); util::Timer timer; vadd( cl::EnqueueArgs( queue, cl::NDRange(count)), d_a, d_b, d_c, count); queue.finish(); double rtime = static_cast<double>(timer.getTimeMilliseconds()) / 1000.0; printf("\nThe kernels ran in %lf seconds\n", rtime); cl::copy(queue, d_c, h_c.begin(), h_c.end()); // Test the results int correct = 0; float tmp; for(int i = 0; i < count; i++) { tmp = h_a[i] + h_b[i]; // expected value for d_c[i] tmp -= h_c[i]; // compute errors if(tmp*tmp < TOL*TOL) { // correct if square deviation is less correct++; // than tolerance squared } else { printf( " tmp %f h_a %f h_b %f h_c %f \n", tmp, h_a[i], h_b[i], h_c[i]); } } // summarize results printf( "vector add to find C = A+B: %d out of %d results were correct.\n", correct, count); } catch (cl::Error err) { std::cout << "Exception\n"; std::cerr << "ERROR: " << err.what() << "(" << err_code(err.err()) << ")" << std::endl; } }
static inline void counter_mode(riv_context_t* ctx, __m128i iv, __m128i* plaintext, uint64_t len, __m128i* ciphertext) { __m128i ctr = zero; __m128i states[8]; unsigned int i, k, num_blocks, num_chunks, lastblock, remaining_blocks; num_blocks = len / BLOCKLEN; // len / 16 lastblock = len % BLOCKLEN; // len mod 16 if (lastblock != 0) { num_blocks++; } num_chunks = num_blocks >> 3; remaining_blocks = num_blocks % 8; iv = vxor(iv, ctx->expanced_enc_key[0]); k = 0; for(i = 0; i != num_chunks; i++) { states[0] = vxor(ctr,iv); ctr = vadd(ctr,one); states[1] = vxor(ctr,iv); ctr = vadd(ctr,one); states[2] = vxor(ctr,iv); ctr = vadd(ctr,one); states[3] = vxor(ctr,iv); ctr = vadd(ctr,one); states[4] = vxor(ctr,iv); ctr = vadd(ctr,one); states[5] = vxor(ctr,iv); ctr = vadd(ctr,one); states[6] = vxor(ctr,iv); ctr = vadd(ctr,one); states[7] = vxor(ctr,iv); ctr = vadd(ctr,one); aes_eight(states, ctx->expanced_enc_key); xor_eight(ciphertext, states, plaintext, k); k += 8; } if (remaining_blocks != 0) { k = num_chunks * 8; // position ciphertext += k; plaintext += k; for(i = 0; i < remaining_blocks; i++) { states[i] = vxor(ctr, iv); ctr = vadd(ctr, one); } aes_encrypt_n(states, remaining_blocks, ctx->expanced_enc_key); for (i = 0; i < remaining_blocks-1; i++) { ciphertext[i] = vxor(states[i], plaintext[i]); } if (lastblock == 0) { // Last block is full ciphertext[i] = vxor(states[i], plaintext[i]); } else { store_partial(ciphertext+i, vxor( load_partial((const void*)(plaintext+i), lastblock), states[i] ), lastblock ); } } }
// install installs the library, package, or binary associated with dir, // which is relative to $GOROOT/src. static void install(char *dir) { char *name, *p, *elem, *prefix, *exe; bool islib, ispkg, isgo, stale, ispackcmd; Buf b, b1, path; Vec compile, files, link, go, missing, clean, lib, extra; Time ttarg, t; int i, j, k, n, doclean, targ; if(vflag) { if(!streq(goos, gohostos) || !streq(goarch, gohostarch)) errprintf("%s (%s/%s)\n", dir, goos, goarch); else errprintf("%s\n", dir); } binit(&b); binit(&b1); binit(&path); vinit(&compile); vinit(&files); vinit(&link); vinit(&go); vinit(&missing); vinit(&clean); vinit(&lib); vinit(&extra); // path = full path to dir. bpathf(&path, "%s/src/%s", goroot, dir); name = lastelem(dir); // For misc/prof, copy into the tool directory and we're done. if(hasprefix(dir, "misc/")) { copy(bpathf(&b, "%s/%s", tooldir, name), bpathf(&b1, "%s/misc/%s", goroot, name), 1); goto out; } // For release, cmd/prof is not included. if((streq(dir, "cmd/prof")) && !isdir(bstr(&path))) { if(vflag > 1) errprintf("skipping %s - does not exist\n", dir); goto out; } // set up gcc command line on first run. if(gccargs.len == 0) { bprintf(&b, "%s %s", defaultcc, defaultcflags); splitfields(&gccargs, bstr(&b)); for(i=0; i<nelem(proto_gccargs); i++) vadd(&gccargs, proto_gccargs[i]); if(defaultcflags[0] == '\0') { for(i=0; i<nelem(proto_gccargs2); i++) vadd(&gccargs, proto_gccargs2[i]); } if(contains(gccargs.p[0], "clang")) { // disable ASCII art in clang errors, if possible vadd(&gccargs, "-fno-caret-diagnostics"); // clang is too smart about unused command-line arguments vadd(&gccargs, "-Qunused-arguments"); } // disable word wrapping in error messages vadd(&gccargs, "-fmessage-length=0"); if(streq(gohostos, "darwin")) { // golang.org/issue/5261 vadd(&gccargs, "-mmacosx-version-min=10.6"); } } if(ldargs.len == 0 && defaultldflags[0] != '\0') { bprintf(&b, "%s", defaultldflags); splitfields(&ldargs, bstr(&b)); } islib = hasprefix(dir, "lib") || streq(dir, "cmd/cc") || streq(dir, "cmd/gc"); ispkg = hasprefix(dir, "pkg"); isgo = ispkg || streq(dir, "cmd/go") || streq(dir, "cmd/cgo"); exe = ""; if(streq(gohostos, "windows")) exe = ".exe"; // Start final link command line. // Note: code below knows that link.p[targ] is the target. ispackcmd = 0; if(islib) { // C library. vadd(&link, "ar"); if(streq(gohostos, "plan9")) vadd(&link, "rc"); else vadd(&link, "rsc"); prefix = ""; if(!hasprefix(name, "lib")) prefix = "lib"; targ = link.len; vadd(&link, bpathf(&b, "%s/pkg/obj/%s_%s/%s%s.a", goroot, gohostos, gohostarch, prefix, name)); } else if(ispkg) { // Go library (package). ispackcmd = 1; vadd(&link, "pack"); // program name - unused here, but all the other cases record one p = bprintf(&b, "%s/pkg/%s_%s/%s", goroot, goos, goarch, dir+4); *xstrrchr(p, '/') = '\0'; xmkdirall(p); targ = link.len; vadd(&link, bpathf(&b, "%s/pkg/%s_%s/%s.a", goroot, goos, goarch, dir+4)); } else if(streq(dir, "cmd/go") || streq(dir, "cmd/cgo")) { // Go command. vadd(&link, bpathf(&b, "%s/%sl", tooldir, gochar)); vadd(&link, "-o"); elem = name; if(streq(elem, "go")) elem = "go_bootstrap"; targ = link.len; vadd(&link, bpathf(&b, "%s/%s%s", tooldir, elem, exe)); } else { // C command. Use gccargs and ldargs. if(streq(gohostos, "plan9")) { vadd(&link, bprintf(&b, "%sl", gohostchar)); vadd(&link, "-o"); targ = link.len; vadd(&link, bpathf(&b, "%s/%s", tooldir, name)); } else { vcopy(&link, gccargs.p, gccargs.len); vcopy(&link, ldargs.p, ldargs.len); if(sflag) vadd(&link, "-static"); vadd(&link, "-o"); targ = link.len; vadd(&link, bpathf(&b, "%s/%s%s", tooldir, name, exe)); if(streq(gohostarch, "amd64")) vadd(&link, "-m64"); else if(streq(gohostarch, "386")) vadd(&link, "-m32"); } } ttarg = mtime(link.p[targ]); // Gather files that are sources for this target. // Everything in that directory, and any target-specific // additions. xreaddir(&files, bstr(&path)); // Remove files beginning with . or _, // which are likely to be editor temporary files. // This is the same heuristic build.ScanDir uses. // There do exist real C files beginning with _, // so limit that check to just Go files. n = 0; for(i=0; i<files.len; i++) { p = files.p[i]; if(hasprefix(p, ".") || (hasprefix(p, "_") && hassuffix(p, ".go"))) xfree(p); else files.p[n++] = p; } files.len = n; for(i=0; i<nelem(deptab); i++) { if(streq(dir, deptab[i].prefix) || (hassuffix(deptab[i].prefix, "/") && hasprefix(dir, deptab[i].prefix))) { for(j=0; (p=deptab[i].dep[j])!=nil; j++) { breset(&b1); bwritestr(&b1, p); bsubst(&b1, "$GOROOT", goroot); bsubst(&b1, "$GOOS", goos); bsubst(&b1, "$GOARCH", goarch); p = bstr(&b1); if(hassuffix(p, ".a")) { vadd(&lib, bpathf(&b, "%s", p)); continue; } if(hassuffix(p, "/*")) { bpathf(&b, "%s/%s", bstr(&path), p); b.len -= 2; xreaddir(&extra, bstr(&b)); bprintf(&b, "%s", p); b.len -= 2; for(k=0; k<extra.len; k++) vadd(&files, bpathf(&b1, "%s/%s", bstr(&b), extra.p[k])); continue; } if(hasprefix(p, "-")) { p++; n = 0; for(k=0; k<files.len; k++) { if(hasprefix(files.p[k], p)) xfree(files.p[k]); else files.p[n++] = files.p[k]; } files.len = n; continue; } vadd(&files, p); } } } vuniq(&files); // Convert to absolute paths. for(i=0; i<files.len; i++) { if(!isabs(files.p[i])) { bpathf(&b, "%s/%s", bstr(&path), files.p[i]); xfree(files.p[i]); files.p[i] = btake(&b); } } // Is the target up-to-date? stale = rebuildall; n = 0; for(i=0; i<files.len; i++) { p = files.p[i]; for(j=0; j<nelem(depsuffix); j++) if(hassuffix(p, depsuffix[j])) goto ok; xfree(files.p[i]); continue; ok: t = mtime(p); if(t != 0 && !hassuffix(p, ".a") && !shouldbuild(p, dir)) { xfree(files.p[i]); continue; } if(hassuffix(p, ".go")) vadd(&go, p); if(t > ttarg) stale = 1; if(t == 0) { vadd(&missing, p); files.p[n++] = files.p[i]; continue; } files.p[n++] = files.p[i]; } files.len = n; // If there are no files to compile, we're done. if(files.len == 0) goto out; for(i=0; i<lib.len && !stale; i++) if(mtime(lib.p[i]) > ttarg) stale = 1; if(!stale) goto out; // For package runtime, copy some files into the work space. if(streq(dir, "pkg/runtime")) { copy(bpathf(&b, "%s/arch_GOARCH.h", workdir), bpathf(&b1, "%s/arch_%s.h", bstr(&path), goarch), 0); copy(bpathf(&b, "%s/defs_GOOS_GOARCH.h", workdir), bpathf(&b1, "%s/defs_%s_%s.h", bstr(&path), goos, goarch), 0); p = bpathf(&b1, "%s/signal_%s_%s.h", bstr(&path), goos, goarch); if(isfile(p)) copy(bpathf(&b, "%s/signal_GOOS_GOARCH.h", workdir), p, 0); copy(bpathf(&b, "%s/os_GOOS.h", workdir), bpathf(&b1, "%s/os_%s.h", bstr(&path), goos), 0); copy(bpathf(&b, "%s/signals_GOOS.h", workdir), bpathf(&b1, "%s/signals_%s.h", bstr(&path), goos), 0); } // Generate any missing files; regenerate existing ones. for(i=0; i<files.len; i++) { p = files.p[i]; elem = lastelem(p); for(j=0; j<nelem(gentab); j++) { if(gentab[j].gen == nil) continue; if(hasprefix(elem, gentab[j].nameprefix)) { if(vflag > 1) errprintf("generate %s\n", p); gentab[j].gen(bstr(&path), p); // Do not add generated file to clean list. // In pkg/runtime, we want to be able to // build the package with the go tool, // and it assumes these generated files already // exist (it does not know how to build them). // The 'clean' command can remove // the generated files. goto built; } } // Did not rebuild p. if(find(p, missing.p, missing.len) >= 0) fatal("missing file %s", p); built:; } // One more copy for package runtime. // The last batch was required for the generators. // This one is generated. if(streq(dir, "pkg/runtime")) { copy(bpathf(&b, "%s/zasm_GOOS_GOARCH.h", workdir), bpathf(&b1, "%s/zasm_%s_%s.h", bstr(&path), goos, goarch), 0); } // Generate .c files from .goc files. if(streq(dir, "pkg/runtime")) { for(i=0; i<files.len; i++) { p = files.p[i]; if(!hassuffix(p, ".goc")) continue; // b = path/zp but with _goos_goarch.c instead of .goc bprintf(&b, "%s%sz%s", bstr(&path), slash, lastelem(p)); b.len -= 4; bwritef(&b, "_%s_%s.c", goos, goarch); goc2c(p, bstr(&b)); vadd(&files, bstr(&b)); } vuniq(&files); } if((!streq(goos, gohostos) || !streq(goarch, gohostarch)) && isgo) { // We've generated the right files; the go command can do the build. if(vflag > 1) errprintf("skip build for cross-compile %s\n", dir); goto nobuild; } // Compile the files. for(i=0; i<files.len; i++) { if(!hassuffix(files.p[i], ".c") && !hassuffix(files.p[i], ".s")) continue; name = lastelem(files.p[i]); vreset(&compile); if(!isgo) { // C library or tool. if(streq(gohostos, "plan9")) { vadd(&compile, bprintf(&b, "%sc", gohostchar)); vadd(&compile, "-FTVwp"); vadd(&compile, "-DPLAN9"); vadd(&compile, "-D__STDC__=1"); vadd(&compile, "-D__SIZE_TYPE__=ulong"); // for GNU Bison vadd(&compile, bpathf(&b, "-I%s/include/plan9", goroot)); vadd(&compile, bpathf(&b, "-I%s/include/plan9/%s", goroot, gohostarch)); } else { vcopy(&compile, gccargs.p, gccargs.len); vadd(&compile, "-c"); if(streq(gohostarch, "amd64")) vadd(&compile, "-m64"); else if(streq(gohostarch, "386")) vadd(&compile, "-m32"); vadd(&compile, "-I"); vadd(&compile, bpathf(&b, "%s/include", goroot)); } if(streq(dir, "lib9")) vadd(&compile, "-DPLAN9PORT"); vadd(&compile, "-I"); vadd(&compile, bstr(&path)); // lib9/goos.c gets the default constants hard-coded. if(streq(name, "goos.c")) { vadd(&compile, "-D"); vadd(&compile, bprintf(&b, "GOOS=\"%s\"", goos)); vadd(&compile, "-D"); vadd(&compile, bprintf(&b, "GOARCH=\"%s\"", goarch)); bprintf(&b1, "%s", goroot_final); bsubst(&b1, "\\", "\\\\"); // turn into C string vadd(&compile, "-D"); vadd(&compile, bprintf(&b, "GOROOT=\"%s\"", bstr(&b1))); vadd(&compile, "-D"); vadd(&compile, bprintf(&b, "GOVERSION=\"%s\"", goversion)); vadd(&compile, "-D"); vadd(&compile, bprintf(&b, "GOARM=\"%s\"", goarm)); vadd(&compile, "-D"); vadd(&compile, bprintf(&b, "GO386=\"%s\"", go386)); vadd(&compile, "-D"); vadd(&compile, bprintf(&b, "GO_EXTLINK_ENABLED=\"%s\"", goextlinkenabled)); } // gc/lex.c records the GOEXPERIMENT setting used during the build. if(streq(name, "lex.c")) { xgetenv(&b, "GOEXPERIMENT"); vadd(&compile, "-D"); vadd(&compile, bprintf(&b1, "GOEXPERIMENT=\"%s\"", bstr(&b))); } } else { // Supporting files for a Go package. if(hassuffix(files.p[i], ".s")) vadd(&compile, bpathf(&b, "%s/%sa", tooldir, gochar)); else { vadd(&compile, bpathf(&b, "%s/%sc", tooldir, gochar)); vadd(&compile, "-F"); vadd(&compile, "-V"); vadd(&compile, "-w"); } vadd(&compile, "-I"); vadd(&compile, workdir); vadd(&compile, "-I"); vadd(&compile, bprintf(&b, "%s/pkg/%s_%s", goroot, goos, goarch)); vadd(&compile, "-D"); vadd(&compile, bprintf(&b, "GOOS_%s", goos)); vadd(&compile, "-D"); vadd(&compile, bprintf(&b, "GOARCH_%s", goarch)); vadd(&compile, "-D"); vadd(&compile, bprintf(&b, "GOOS_GOARCH_%s_%s", goos, goarch)); } bpathf(&b, "%s/%s", workdir, lastelem(files.p[i])); doclean = 1; if(!isgo && streq(gohostos, "darwin")) { // To debug C programs on OS X, it is not enough to say -ggdb // on the command line. You have to leave the object files // lying around too. Leave them in pkg/obj/, which does not // get removed when this tool exits. bpathf(&b1, "%s/pkg/obj/%s", goroot, dir); xmkdirall(bstr(&b1)); bpathf(&b, "%s/%s", bstr(&b1), lastelem(files.p[i])); doclean = 0; } // Change the last character of the output file (which was c or s). if(streq(gohostos, "plan9")) b.p[b.len-1] = gohostchar[0]; else b.p[b.len-1] = 'o'; vadd(&compile, "-o"); vadd(&compile, bstr(&b)); vadd(&compile, files.p[i]); bgrunv(bstr(&path), CheckExit, &compile); vadd(&link, bstr(&b)); if(doclean) vadd(&clean, bstr(&b)); } bgwait(); if(isgo) { // The last loop was compiling individual files. // Hand the Go files to the compiler en masse. vreset(&compile); vadd(&compile, bpathf(&b, "%s/%sg", tooldir, gochar)); bpathf(&b, "%s/_go_.a", workdir); vadd(&compile, "-pack"); vadd(&compile, "-o"); vadd(&compile, bstr(&b)); vadd(&clean, bstr(&b)); if(!ispackcmd) vadd(&link, bstr(&b)); vadd(&compile, "-p"); if(hasprefix(dir, "pkg/")) vadd(&compile, dir+4); else vadd(&compile, "main"); if(streq(dir, "pkg/runtime")) vadd(&compile, "-+"); vcopy(&compile, go.p, go.len); runv(nil, bstr(&path), CheckExit, &compile); if(ispackcmd) { xremove(link.p[targ]); dopack(link.p[targ], bstr(&b), &link.p[targ+1], link.len - (targ+1)); goto nobuild; } } if(!islib && !isgo) { // C binaries need the libraries explicitly, and -lm. vcopy(&link, lib.p, lib.len); if(!streq(gohostos, "plan9")) vadd(&link, "-lm"); } // Remove target before writing it. xremove(link.p[targ]); runv(nil, nil, CheckExit, &link); nobuild: // In package runtime, we install runtime.h and cgocall.h too, // for use by cgo compilation. if(streq(dir, "pkg/runtime")) { copy(bpathf(&b, "%s/pkg/%s_%s/cgocall.h", goroot, goos, goarch), bpathf(&b1, "%s/src/pkg/runtime/cgocall.h", goroot), 0); copy(bpathf(&b, "%s/pkg/%s_%s/runtime.h", goroot, goos, goarch), bpathf(&b1, "%s/src/pkg/runtime/runtime.h", goroot), 0); } out: for(i=0; i<clean.len; i++) xremove(clean.p[i]); bfree(&b); bfree(&b1); bfree(&path); vfree(&compile); vfree(&files); vfree(&link); vfree(&go); vfree(&missing); vfree(&clean); vfree(&lib); vfree(&extra); }
void test_add (void) { vadd (); }
int main(int argc, char* argv[]) { double t1, t2, t3, t4, t5; double sum1, sum2, sum3, sum4; int arg = 1, len = 0, iters = 0, verb = 0, run = 1; int do_vcopy = 1, do_vadd = 1, do_vjacobi = 1; while(argc>arg) { if (strcmp(argv[arg],"-v")==0) verb++; else if (strcmp(argv[arg],"-vv")==0) verb+=2; else if (strcmp(argv[arg],"-n")==0) run = 0; else if (strcmp(argv[arg],"-c")==0) do_vadd = 0, do_vjacobi = 0; else if (strcmp(argv[arg],"-a")==0) do_vcopy = 0, do_vjacobi = 0; else if (strcmp(argv[arg],"-j")==0) do_vcopy = 0, do_vadd = 0; else break; arg++; } if (argc>arg) { len = atoi(argv[arg]); arg++; } if (argc>arg) { iters = atoi(argv[arg]); arg++; } if (len == 0) len = 10000; if (iters == 0) iters = 20; len = len * 1000; printf("Alloc/init 3 double arrays of length %d ...\n", len); double* a = (double*) malloc(len * sizeof(double)); double* b = (double*) malloc(len * sizeof(double)); double* c = (double*) malloc(len * sizeof(double)); for(int i = 0; i<len; i++) { a[i] = 1.0; b[i] = (double) (i % 20); c[i] = 3.0; } // Generate vectorized variants & run against naive/original #if __AVX__ bool do32 = true; #else bool do32 = false; #endif // vcopy if (do_vcopy) { vcopy_t vcopy16, vcopy32; Rewriter* rc16 = dbrew_new(); if (verb>1) dbrew_verbose(rc16, true, true, true); dbrew_set_function(rc16, (uint64_t) vcopy); dbrew_config_parcount(rc16, 3); dbrew_config_force_unknown(rc16, 0); dbrew_set_vectorsize(rc16, 16); vcopy16 = (vcopy_t) dbrew_rewrite(rc16, a, b, len); if (verb) decode_func(rc16, "vcopy16"); if (do32) { Rewriter* rc32 = dbrew_new(); if (verb>1) dbrew_verbose(rc32, true, true, true); dbrew_set_function(rc32, (uint64_t) vcopy); dbrew_config_parcount(rc32, 3); dbrew_config_force_unknown(rc32, 0); dbrew_set_vectorsize(rc32, 32); vcopy32 = (vcopy_t) dbrew_rewrite(rc32, a, b, len); if (verb) decode_func(rc32, "vcopy32"); } printf("Running %d iterations of vcopy ...\n", iters); t1 = wtime(); for(int iter = 0; iter < iters; iter++) naive_vcopy(a, b, len); t2 = wtime(); for(int iter = 0; iter < iters; iter++) vcopy(a, b, len); t3 = wtime(); if (run) for(int iter = 0; iter < iters; iter++) vcopy16(a, b, len); t4 = wtime(); if (do32 && run) for(int iter = 0; iter < iters; iter++) vcopy32(a, b, len); t5 = wtime(); printf(" naive: %.3f s, un-rewritten: %.3f s, rewritten-16: %.3f s", t2-t1, t3-t2, t4-t3); if (do32) printf(", rewritten-32: %.3f s", t5-t4); printf("\n"); } // vadd if (do_vadd) { vadd_t vadd16, vadd32; Rewriter* ra16 = dbrew_new(); if (verb>1) dbrew_verbose(ra16, true, true, true); dbrew_set_function(ra16, (uint64_t) vadd); dbrew_config_parcount(ra16, 4); dbrew_config_force_unknown(ra16, 0); dbrew_set_vectorsize(ra16, 16); vadd16 = (vadd_t) dbrew_rewrite(ra16, a, b, c, len); if (verb) decode_func(ra16, "vadd16"); if (do32) { Rewriter* ra32 = dbrew_new(); if (verb>1) dbrew_verbose(ra32, true, true, true); dbrew_set_function(ra32, (uint64_t) vadd); dbrew_config_parcount(ra32, 4); dbrew_config_force_unknown(ra32, 0); dbrew_set_vectorsize(ra32, 32); vadd32 = (vadd_t) dbrew_rewrite(ra32, a, b, c, len); if (verb) decode_func(ra32, "vadd32"); } sum1 = 0.0, sum2 = 0.0, sum3 = 0.0, sum4 = 0.0; printf("Running %d iterations of vadd ...\n", iters); t1 = wtime(); for(int iter = 0; iter < iters; iter++) naive_vadd(a, b, c, len); for(int i = 0; i < len; i++) sum1 += a[i]; t2 = wtime(); for(int iter = 0; iter < iters; iter++) vadd(a, b, c, len); for(int i = 0; i < len; i++) sum2 += a[i]; t3 = wtime(); if (run) for(int iter = 0; iter < iters; iter++) vadd16(a, b, c, len); for(int i = 0; i < len; i++) sum3 += a[i]; t4 = wtime(); if (do32 && run) for(int iter = 0; iter < iters; iter++) vadd32(a, b, c, len); for(int i = 0; i < len; i++) sum4 += a[i]; t5 = wtime(); printf(" naive: %.3f s, un-rewritten: %.3f s, rewritten-16: %.3f s", t2-t1, t3-t2, t4-t3); if (do32) printf(", rewritten-32: %.3f s", t5-t4); printf("\n"); printf(" sum naive: %f, sum rewritten-16: %f, sum rewritten-16: %f\n", sum1, sum3, sum4); } // vjacobi_1d if (do_vjacobi) { vcopy_t vjacobi_1d16, vjacobi_1d32; Rewriter* rj16 = dbrew_new(); if (verb>1) dbrew_verbose(rj16, true, true, true); dbrew_set_function(rj16, (uint64_t) vjacobi_1d); dbrew_config_parcount(rj16, 3); dbrew_config_force_unknown(rj16, 0); dbrew_set_vectorsize(rj16, 16); vjacobi_1d16 = (vcopy_t) dbrew_rewrite(rj16, a, b, len); if (verb) decode_func(rj16, "vjacobi_1d16"); if (do32) { Rewriter* rj32 = dbrew_new(); if (verb>1) dbrew_verbose(rj32, true, true, true); dbrew_set_function(rj32, (uint64_t) vjacobi_1d); dbrew_config_parcount(rj32, 3); dbrew_config_force_unknown(rj32, 0); dbrew_set_vectorsize(rj32, 32); vjacobi_1d32 = (vcopy_t) dbrew_rewrite(rj32, a, b, len); if (verb) decode_func(rj32, "vjacobi_1d32"); } sum1 = 0.0, sum2 = 0.0, sum3 = 0.0, sum4 = 0.0; printf("Running %d iterations of vjacobi_1d ...\n", iters); t1 = wtime(); for(int iter = 0; iter < iters; iter++) naive_vjacobi_1d(a+1, b+1, len-2); for(int i = 0; i < len; i++) sum1 += a[i]; t2 = wtime(); for(int iter = 0; iter < iters; iter++) vjacobi_1d(a+1, b+1, len-2); for(int i = 0; i < len; i++) sum2 += a[i]; t3 = wtime(); if (run) for(int iter = 0; iter < iters; iter++) vjacobi_1d16(a+1, b+1, len-2); for(int i = 0; i < len; i++) sum3 += a[i]; t4 = wtime(); if (do32 && run) for(int iter = 0; iter < iters; iter++) vjacobi_1d32(a+1, b+1, len-2); for(int i = 0; i < len; i++) sum4 += a[i]; t5 = wtime(); printf(" naive: %.3f s, un-rewritten: %.3f s, rewritten-16: %.3f s", t2-t1, t3-t2, t4-t3); if (do32) printf(", rewritten-32: %.3f s", t5-t4); printf("\n"); printf(" sum naive: %f, sum rewritten-16: %f, sum rewritten-16: %f\n", sum1, sum3, sum4); } }
msym_error_t partitionEquivalenceSets(int length, msym_element_t *elements[length], msym_element_t *pelements[length], msym_geometry_t g, int *esl, msym_equivalence_set_t **es, msym_thresholds_t *thresholds) { int ns = 0, gd = geometryDegenerate(g); double *e = calloc(length,sizeof(double)); double *s = calloc(length,sizeof(double)); int *sp = calloc(length,sizeof(int)); //set partition int *ss = calloc(length,sizeof(int)); //set size double (*ev)[3] = calloc(length,sizeof(double[3])); double (*ep)[3] = calloc(length,sizeof(double[3])); double (*vec)[3] = calloc(length, sizeof(double[3])); double *m = calloc(length, sizeof(double)); for(int i = 0;i < length;i++){ vcopy(elements[i]->v, vec[i]); m[i] = elements[i]->m; } for(int i=0; i < length; i++){ for(int j = i+1; j < length;j++){ double w = m[i]*m[j]/(m[i]+m[j]); double dist; double v[3]; double proji[3], projj[3]; vnorm2(vec[i],v); vproj_plane(vec[j], v, proji); vscale(w, proji, proji); vadd(proji,ep[i],ep[i]); vnorm2(vec[j],v); vproj_plane(vec[i], v, projj); vscale(w, projj, projj); vadd(projj,ep[j],ep[j]); vsub(vec[j],vec[i],v); dist = vabs(v); vscale(w/dist,v,v); vadd(v,ev[i],ev[i]); vsub(ev[j],v,ev[j]); double dij = w*dist; //This is sqrt(I) for a diatomic molecule along an axis perpendicular to the bond with O at center of mass. e[i] += dij; e[j] += dij; s[i] += SQR(dij); s[j] += SQR(dij); } vsub(vec[i],ev[i],ev[i]); } for(int i = 0; i < length; i++){ double v[3]; double w = m[i]/2.0; double dist = vabs(elements[i]->v); double dii = w*dist; vscale(w,elements[i]->v,v); vsub(ev[i],v,ev[i]); // Plane projection can't really differentiate certain types of structures when we add the initial vector, // but not doing so will result in huge cancellation errors on degenerate point groups, // also large masses will mess up the eq check when this is 0. if(gd) vadd(ep[i],v,ep[i]); e[i] += dii; s[i] += SQR(dii); } for(int i = 0; i < length; i++){ if(e[i] >= 0.0){ sp[i] = i; for(int j = i+1; j < length;j++){ if(e[j] >= 0.0){ double vabsevi = vabs(ev[i]), vabsevj = vabs(ev[j]), vabsepi = vabs(ep[i]), vabsepj = vabs(ep[j]); double eep = 0.0, eev = fabs((vabsevi)-(vabsevj))/((vabsevi)+(vabsevj)), ee = fabs((e[i])-(e[j]))/((e[i])+(e[j])), es = fabs((s[i])-(s[j]))/((s[i])+(s[j])); if(!(vabsepi < thresholds->zero && vabsepj < thresholds->zero)){ eep = fabs((vabsepi)-(vabsepj))/((vabsepi)+(vabsepj)); } double max = fmax(eev,fmax(eep,fmax(ee, es))); if(max < thresholds->equivalence && elements[i]->n == elements[j]->n){ e[j] = max > 0.0 ? -max : -1.0; sp[j] = i; } } } e[i] = -1.0; } } for(int i = 0; i < length;i++){ int j = sp[i]; ns += (ss[j] == 0); ss[j]++; } msym_equivalence_set_t *eqs = calloc(ns,sizeof(msym_equivalence_set_t)); msym_element_t **lelements = elements; msym_element_t **pe = pelements; if(elements == pelements){ lelements = malloc(sizeof(msym_element_t *[length])); memcpy(lelements, elements, sizeof(msym_element_t *[length])); } for(int i = 0, ni = 0; i < length;i++){ if(ss[i] > 0){ int ei = 0; eqs[ni].elements = pe; eqs[ni].length = ss[i]; for(int j = 0; j < length;j++){ if(sp[j] == i){ double err = (e[j] > -1.0) ? fabs(e[j]) : 0.0; eqs[ni].err = fmax(eqs[ni].err,err); eqs[ni].elements[ei++] = lelements[j]; } } pe += ss[i]; ni++; } } if(elements == pelements){ free(lelements); } free(m); free(vec); free(s); free(e); free(sp); free(ss); free(ev); free(ep); *es = eqs; *esl = ns; return MSYM_SUCCESS; }