void invertQuda(void *hp_x, void *hp_b, QudaInvertParam *param) { // check the gauge fields have been created cudaGaugeField *cudaGauge = checkGauge(param); checkInvertParam(param); if (param->cuda_prec_sloppy != param->prec_precondition && param->inv_type_precondition != QUDA_INVALID_INVERTER) errorQuda("Sorry, cannot yet use different sloppy and preconditioner precisions"); verbosity = param->verbosity; bool pc_solve = (param->solve_type == QUDA_DIRECT_PC_SOLVE || param->solve_type == QUDA_NORMEQ_PC_SOLVE); bool pc_solution = (param->solution_type == QUDA_MATPC_SOLUTION || param->solution_type == QUDA_MATPCDAG_MATPC_SOLUTION); param->spinorGiB = cudaGauge->VolumeCB() * spinorSiteSize; if (!pc_solve) param->spinorGiB *= 2; param->spinorGiB *= (param->cuda_prec == QUDA_DOUBLE_PRECISION ? sizeof(double) : sizeof(float)); if (param->preserve_source == QUDA_PRESERVE_SOURCE_NO) { param->spinorGiB *= (param->inv_type == QUDA_CG_INVERTER ? 5 : 7)/(double)(1<<30); } else { param->spinorGiB *= (param->inv_type == QUDA_CG_INVERTER ? 8 : 9)/(double)(1<<30); } param->secs = 0; param->gflops = 0; param->iter = 0; // create the dirac operator DiracParam diracParam; createDirac(diracParam, *param, pc_solve); Dirac &dirac = *d; Dirac &diracSloppy = *dSloppy; Dirac &diracPre = *dPre; cpuColorSpinorField *h_b = NULL; cpuColorSpinorField *h_x = NULL; cudaColorSpinorField *b = NULL; cudaColorSpinorField *x = NULL; cudaColorSpinorField *in = NULL; cudaColorSpinorField *out = NULL; const int *X = cudaGauge->X(); // wrap CPU host side pointers ColorSpinorParam cpuParam(hp_b, *param, X, pc_solution); h_b = new cpuColorSpinorField(cpuParam); cpuParam.v = hp_x; h_x = new cpuColorSpinorField(cpuParam); // download source ColorSpinorParam cudaParam(cpuParam, *param); cudaParam.create = QUDA_COPY_FIELD_CREATE; b = new cudaColorSpinorField(*h_b, cudaParam); if (param->use_init_guess == QUDA_USE_INIT_GUESS_YES) { // download initial guess x = new cudaColorSpinorField(*h_x, cudaParam); // solution } else { // zero initial guess cudaParam.create = QUDA_ZERO_FIELD_CREATE; x = new cudaColorSpinorField(cudaParam); // solution } if (param->verbosity >= QUDA_VERBOSE) { double nh_b = norm2(*h_b); double nb = norm2(*b); printfQuda("Source: CPU = %f, CUDA copy = %f\n", nh_b, nb); } tuneDirac(*param, pc_solution ? *x : x->Even()); dirac.prepare(in, out, *x, *b, param->solution_type); if (param->verbosity >= QUDA_VERBOSE) { double nin = norm2(*in); printfQuda("Prepared source = %f\n", nin); } massRescale(param->dslash_type, diracParam.kappa, param->solution_type, param->mass_normalization, *in); switch (param->inv_type) { case QUDA_CG_INVERTER: if (param->solution_type != QUDA_MATDAG_MAT_SOLUTION && param->solution_type != QUDA_MATPCDAG_MATPC_SOLUTION) { copyCuda(*out, *in); dirac.Mdag(*in, *out); } { DiracMdagM m(dirac), mSloppy(diracSloppy); CG cg(m, mSloppy, *param); cg(*out, *in); } break; case QUDA_BICGSTAB_INVERTER: if (param->solution_type == QUDA_MATDAG_MAT_SOLUTION || param->solution_type == QUDA_MATPCDAG_MATPC_SOLUTION) { DiracMdag m(dirac), mSloppy(diracSloppy), mPre(diracPre); BiCGstab bicg(m, mSloppy, mPre, *param); bicg(*out, *in); copyCuda(*in, *out); } { DiracM m(dirac), mSloppy(diracSloppy), mPre(diracPre); BiCGstab bicg(m, mSloppy, mPre, *param); bicg(*out, *in); } break; case QUDA_GCR_INVERTER: if (param->solution_type == QUDA_MATDAG_MAT_SOLUTION || param->solution_type == QUDA_MATPCDAG_MATPC_SOLUTION) { DiracMdag m(dirac), mSloppy(diracSloppy), mPre(diracPre); GCR gcr(m, mSloppy, mPre, *param); gcr(*out, *in); copyCuda(*in, *out); } { DiracM m(dirac), mSloppy(diracSloppy), mPre(diracPre); GCR gcr(m, mSloppy, mPre, *param); gcr(*out, *in); } break; default: errorQuda("Inverter type %d not implemented", param->inv_type); } if (param->verbosity >= QUDA_VERBOSE){ double nx = norm2(*x); printfQuda("Solution = %f\n",nx); } dirac.reconstruct(*x, *b, param->solution_type); x->saveCPUSpinorField(*h_x); // since this is a reference, this won't work: h_x = x; if (param->verbosity >= QUDA_VERBOSE){ double nx = norm2(*x); double nh_x = norm2(*h_x); printfQuda("Reconstructed: CUDA solution = %f, CPU copy = %f\n", nx, nh_x); } if (!param->preserve_dirac) { delete d; delete dSloppy; delete dPre; diracCreation = false; diracTune = false; } delete h_b; delete h_x; delete b; delete x; return; }
// get optical flow field descriptor void optiflowDescriptor( int gid, int vid, vector<int>&label, vector<vector<float> > &pfeat, vector<vector<float> > &nfeat) { bool useDenseOF = USEDENSE; int flen = 50; vector<int> mos; ldLabel(gid, mos); char vname[512]; Point2i isize; for(int v = vid; v < vid+1; ++v){ sprintf(vname, "/home/fengzy/Projects/XProject/dataset/Set%.02d/video/%d.avi", gid, v); CvCapture *cap = cvCaptureFromFile(vname); if(!cap) continue; vector<vector<Point2f> > flo(flen); int width = cvGetCaptureProperty(cap, CV_CAP_PROP_FRAME_WIDTH); int height= cvGetCaptureProperty(cap, CV_CAP_PROP_FRAME_HEIGHT); vector<Point2f> densePt; densePt.reserve( width * height); for ( int h = 0; h <height; ++h) for ( int w = 0; w < width; ++w) densePt.push_back( Point2f(w,h)); IplImage *pre, *nex; nex= cvQueryFrame(cap); pre = cvCreateImage(cvGetSize(nex), 8, 3); for(int i = 1; i <= flen; ++i){ printf("[%d/%d]page\n",i,flen); cvCopy(nex, pre); nex = cvQueryFrame(cap); if(!nex) break; Mat mPre(pre), mNex(nex), mask; Point2i imgsize = SamplingOpticalFlow(mPre,mNex,mask,densePt, flo[i-1]); if(!isize.x) isize=imgsize; } // create histogram vector<float> floHist(flo.size()); //// remove the 1st and last frame's optical flow. char buff[512]; sprintf(buff,"/home/fengzy/Projects/XProject/dataset/Set%.02d/feature/%d.txt",gid,v); FILE *fp = fopen(buff,"w+"); vector<float> preFeat; vector<vector<float> >derivHist; for ( unsigned int nlen = 0; nlen < flo.size(); ++nlen) { vector<float> hist[4]; float count[4] = {0}; for( int i = 0; i < 4; ++i) {hist[i] = vector<float>(9, 0);} for ( unsigned int ne = 0; ne < flo[nlen].size(); ++ne){ // // convert into angle float angle = 0, flolen = 1; if ( flo[nlen][ne].y ){ angle = tan2g(flo[nlen][ne].x, flo[nlen][ne].y); angle = angle > 0 ? angle : 360 + angle; // use flo length as weight flolen = floLen( flo[nlen][ne].x, flo[nlen][ne].y); flolen = flolen == 0 ? 1.0f : flolen; } int iy = ne/isize.x, ix = ne%isize.x; int indx = iy*2/isize.y + ix*2/isize.x; int inda = floor(angle/45.0f); if (int(angle) == 360) inda = 7; if(indx >= 4 || inda >= 8) int db = 1; hist[0][inda] += flolen; count[0] += flolen; } vector<float> curFeat; for ( int i = 0; i < 1; ++i) { // normalize if(!count[i]) count[i] = 1; transform(hist[i].begin(), hist[i].end(), hist[i].begin(),bind2nd( multiplies<float>(), float(1)/*/(count[i])*/ )); hist[i].back() = count[i]; floHist.insert( floHist.end(), hist[i].begin(), hist[i].end()); curFeat.insert(curFeat.end(), hist[i].begin(), hist[i].end()); for(int j = 0; j < hist[i].size(); ++j) fprintf(fp,"%.08lf\t",hist[i][j]); } if( nlen) derivHist.push_back(difFeat(curFeat, preFeat)); preFeat = curFeat; fprintf(fp,"\n"); } for(int nf = 0; nf < derivHist.size(); ++nf){ for(int ne = 0; ne < derivHist[nf].size(); ++ne) fprintf(fp,"%.08lf\t", derivHist[nf][ne]); fprintf(fp,"\n"); } fclose(fp); if(mos[v] >= 0 && mos[v] <= 50) pfeat.push_back(floHist); // for each video else nfeat.push_back(floHist); } }