Example #1
0
void invertQuda(void *hp_x, void *hp_b, QudaInvertParam *param)
{
  // check the gauge fields have been created
  cudaGaugeField *cudaGauge = checkGauge(param);

  checkInvertParam(param);
  if (param->cuda_prec_sloppy != param->prec_precondition && 
      param->inv_type_precondition != QUDA_INVALID_INVERTER)
    errorQuda("Sorry, cannot yet use different sloppy and preconditioner precisions");

  verbosity = param->verbosity;

  bool pc_solve = (param->solve_type == QUDA_DIRECT_PC_SOLVE ||
		   param->solve_type == QUDA_NORMEQ_PC_SOLVE);

  bool pc_solution = (param->solution_type == QUDA_MATPC_SOLUTION ||
		      param->solution_type == QUDA_MATPCDAG_MATPC_SOLUTION);

  param->spinorGiB = cudaGauge->VolumeCB() * spinorSiteSize;
  if (!pc_solve) param->spinorGiB *= 2;
  param->spinorGiB *= (param->cuda_prec == QUDA_DOUBLE_PRECISION ? sizeof(double) : sizeof(float));
  if (param->preserve_source == QUDA_PRESERVE_SOURCE_NO) {
    param->spinorGiB *= (param->inv_type == QUDA_CG_INVERTER ? 5 : 7)/(double)(1<<30);
  } else {
    param->spinorGiB *= (param->inv_type == QUDA_CG_INVERTER ? 8 : 9)/(double)(1<<30);
  }

  param->secs = 0;
  param->gflops = 0;
  param->iter = 0;

  // create the dirac operator
  DiracParam diracParam;
  createDirac(diracParam, *param, pc_solve);
  Dirac &dirac = *d;
  Dirac &diracSloppy = *dSloppy;
  Dirac &diracPre = *dPre;

  cpuColorSpinorField *h_b = NULL;
  cpuColorSpinorField *h_x = NULL;
  cudaColorSpinorField *b = NULL;
  cudaColorSpinorField *x = NULL;
  cudaColorSpinorField *in = NULL;
  cudaColorSpinorField *out = NULL;

  const int *X = cudaGauge->X();

  // wrap CPU host side pointers
  ColorSpinorParam cpuParam(hp_b, *param, X, pc_solution);
  h_b = new cpuColorSpinorField(cpuParam);
  cpuParam.v = hp_x;
  h_x = new cpuColorSpinorField(cpuParam);
    
  // download source
  ColorSpinorParam cudaParam(cpuParam, *param);     
  cudaParam.create = QUDA_COPY_FIELD_CREATE;
  b = new cudaColorSpinorField(*h_b, cudaParam); 

  if (param->use_init_guess == QUDA_USE_INIT_GUESS_YES) { // download initial guess
    x = new cudaColorSpinorField(*h_x, cudaParam); // solution  
  } else { // zero initial guess
    cudaParam.create = QUDA_ZERO_FIELD_CREATE;
    x = new cudaColorSpinorField(cudaParam); // solution
  }
    
  if (param->verbosity >= QUDA_VERBOSE) {
    double nh_b = norm2(*h_b);
    double nb = norm2(*b);
    printfQuda("Source: CPU = %f, CUDA copy = %f\n", nh_b, nb);
  }

  tuneDirac(*param, pc_solution ? *x : x->Even());

  dirac.prepare(in, out, *x, *b, param->solution_type);
  if (param->verbosity >= QUDA_VERBOSE) {
    double nin = norm2(*in);
    printfQuda("Prepared source = %f\n", nin);   
  }

  massRescale(param->dslash_type, diracParam.kappa, param->solution_type, param->mass_normalization, *in);

  switch (param->inv_type) {
  case QUDA_CG_INVERTER:
    if (param->solution_type != QUDA_MATDAG_MAT_SOLUTION && param->solution_type != QUDA_MATPCDAG_MATPC_SOLUTION) {
      copyCuda(*out, *in);
      dirac.Mdag(*in, *out);
    }
    {
      DiracMdagM m(dirac), mSloppy(diracSloppy);
      CG cg(m, mSloppy, *param);
      cg(*out, *in);
    }
    break;
  case QUDA_BICGSTAB_INVERTER:
    if (param->solution_type == QUDA_MATDAG_MAT_SOLUTION || param->solution_type == QUDA_MATPCDAG_MATPC_SOLUTION) {
      DiracMdag m(dirac), mSloppy(diracSloppy), mPre(diracPre);
      BiCGstab bicg(m, mSloppy, mPre, *param);
      bicg(*out, *in);
      copyCuda(*in, *out);
    }
    {
      DiracM m(dirac), mSloppy(diracSloppy), mPre(diracPre);
      BiCGstab bicg(m, mSloppy, mPre, *param);
      bicg(*out, *in);
    }
    break;
  case QUDA_GCR_INVERTER:
    if (param->solution_type == QUDA_MATDAG_MAT_SOLUTION || param->solution_type == QUDA_MATPCDAG_MATPC_SOLUTION) {
      DiracMdag m(dirac), mSloppy(diracSloppy), mPre(diracPre);
      GCR gcr(m, mSloppy, mPre, *param);
      gcr(*out, *in);
      copyCuda(*in, *out);
    }
    {
      DiracM m(dirac), mSloppy(diracSloppy), mPre(diracPre);
      GCR gcr(m, mSloppy, mPre, *param);
      gcr(*out, *in);
    }
    break;
  default:
    errorQuda("Inverter type %d not implemented", param->inv_type);
  }
  
  if (param->verbosity >= QUDA_VERBOSE){
   double nx = norm2(*x);
   printfQuda("Solution = %f\n",nx);
  }
  dirac.reconstruct(*x, *b, param->solution_type);
  
  x->saveCPUSpinorField(*h_x); // since this is a reference, this won't work: h_x = x;
  
  if (param->verbosity >= QUDA_VERBOSE){
    double nx = norm2(*x);
    double nh_x = norm2(*h_x);
    printfQuda("Reconstructed: CUDA solution = %f, CPU copy = %f\n", nx, nh_x);
  }
  
  if (!param->preserve_dirac) {
    delete d;
    delete dSloppy;
    delete dPre;
    diracCreation = false;
    diracTune = false;
  }  

  delete h_b;
  delete h_x;
  delete b;
  delete x;
  
  return;
}
// get optical flow field descriptor
void optiflowDescriptor( int gid, int vid, vector<int>&label, vector<vector<float> > &pfeat, 
        vector<vector<float> > &nfeat)
{
	bool useDenseOF = USEDENSE;
    int flen = 50;
    vector<int> mos;
    ldLabel(gid, mos);
    char vname[512];
    Point2i isize;
 
    for(int v = vid; v < vid+1; ++v){
        sprintf(vname, "/home/fengzy/Projects/XProject/dataset/Set%.02d/video/%d.avi", gid, v);
        CvCapture *cap = cvCaptureFromFile(vname);
        if(!cap) continue;

        vector<vector<Point2f> > flo(flen);

        int width = cvGetCaptureProperty(cap, CV_CAP_PROP_FRAME_WIDTH);
        int height= cvGetCaptureProperty(cap, CV_CAP_PROP_FRAME_HEIGHT);
        vector<Point2f> densePt; densePt.reserve( width * height);
        for ( int h = 0; h <height; ++h)
            for ( int w = 0; w < width; ++w)
                densePt.push_back( Point2f(w,h));

        IplImage *pre, *nex;
        nex= cvQueryFrame(cap); pre = cvCreateImage(cvGetSize(nex), 8, 3);
        for(int i = 1; i <= flen; ++i){
            printf("[%d/%d]page\n",i,flen);
            cvCopy(nex, pre);
            nex = cvQueryFrame(cap);
            if(!nex) break;
            Mat mPre(pre), mNex(nex), mask;

            Point2i imgsize = SamplingOpticalFlow(mPre,mNex,mask,densePt, flo[i-1]);
            if(!isize.x) isize=imgsize;
        }
        //	 create histogram
        vector<float> floHist(flo.size()); //// remove the 1st and last frame's optical flow.
        char buff[512];
        sprintf(buff,"/home/fengzy/Projects/XProject/dataset/Set%.02d/feature/%d.txt",gid,v);
        FILE *fp = fopen(buff,"w+");

        vector<float> preFeat;
        vector<vector<float> >derivHist;
        for ( unsigned int nlen = 0; nlen < flo.size(); ++nlen)
        {
            vector<float> hist[4]; float count[4] = {0};
            for( int i = 0; i < 4; ++i) {hist[i] = vector<float>(9, 0);}

            for ( unsigned int ne = 0; ne < flo[nlen].size(); ++ne){
    //			// convert into angle
                float angle = 0, flolen = 1;
                if ( flo[nlen][ne].y ){
                    angle = tan2g(flo[nlen][ne].x, flo[nlen][ne].y);
                    angle = angle > 0 ? angle : 360 + angle;
                    // use flo length as weight
                    flolen = floLen( flo[nlen][ne].x, flo[nlen][ne].y);
                    flolen = flolen == 0 ? 1.0f : flolen;
                }
                int iy = ne/isize.x, ix = ne%isize.x;
                int indx = iy*2/isize.y + ix*2/isize.x;
                int inda = floor(angle/45.0f);
                if (int(angle) == 360) inda = 7;
                if(indx >= 4 || inda >= 8)
                    int db = 1;
                hist[0][inda] += flolen;
                count[0] += flolen;
            }
            vector<float> curFeat; 
            for ( int i = 0; i < 1; ++i)
            {
                // normalize
                if(!count[i]) count[i] = 1;
                transform(hist[i].begin(), hist[i].end(), hist[i].begin(),bind2nd( multiplies<float>(), float(1)/*/(count[i])*/ ));
                hist[i].back() = count[i];
                floHist.insert( floHist.end(), hist[i].begin(), hist[i].end());
                curFeat.insert(curFeat.end(), hist[i].begin(), hist[i].end());
                for(int j = 0; j < hist[i].size(); ++j)
                fprintf(fp,"%.08lf\t",hist[i][j]);
            }
            if( nlen) derivHist.push_back(difFeat(curFeat, preFeat));
            preFeat = curFeat;
            fprintf(fp,"\n");
        }
        for(int nf = 0; nf < derivHist.size(); ++nf){
            for(int ne = 0; ne < derivHist[nf].size(); ++ne)
                fprintf(fp,"%.08lf\t", derivHist[nf][ne]);
            fprintf(fp,"\n");
        }
        fclose(fp);
        
        if(mos[v] >= 0 && mos[v] <= 50) 
            pfeat.push_back(floHist); // for each video
        else nfeat.push_back(floHist);
    }
}