// Determines the total number of computations needed by RBC to // find the the NNS. This function is useful mainly for // evaluating the effectiveness of the RBC. void searchStats(matrix q, matrix x, matrix r, rep *ri, double *avgDists){ unint i, j; unint *repID = (unint*)calloc(q.pr, sizeof(*repID)); real *dToReps = (real*)calloc(q.pr, sizeof(*dToReps)); brutePar(r,q,repID,dToReps); //for each q, need to determine which reps to examine size_t numAdded=0; size_t totalComp=0; #pragma omp parallel for private(j) reduction(+:numAdded,totalComp) for(i=0; i<q.r; i++){ for(j=0; j<r.r; j++ ){ real temp = distVec( q, r, i, j ); //dToRep[i] is current UB on dist to i's NN //temp - ri[j].radius is LB to dist belonging to rep j if( dToReps[i] >= temp - ri[j].radius && temp <= 3.0*dToReps[i] ){ numAdded++; totalComp+=ri[j].len; } } } *avgDists = ((double)totalComp)/((double)q.r); free(repID); free(dToReps); }
//This is a still crude, but doesn't ignore the cache at least void bruteMap2(matrix X, matrix Q, rep *ri, unint* qMap, unint *NNs, real *dToNNs){ unint i, j, k; #pragma omp parallel for private(j,k) for( i=0; i<Q.pr/CL; i++ ){ unint row = i*CL; for(j=0; j<CL; j++){ dToNNs[row+j] = MAX_REAL; NNs[row+j] = 0; } real temp; rep rt[CL]; unint maxLen = 0; for(j=0; j<CL; j++){ rt[j] = ri[qMap[row+j]]; maxLen = MAX(maxLen, rt[j].len); } for(k=0; k<maxLen; k++ ){ for(j=0; j<CL; j++ ){ if( k<rt[j].len ){ temp = distVec( Q, X, row+j, rt[j].lr[k] ); if( temp < dToNNs[row+j]){ NNs[row+j] = rt[j].lr[k]; dToNNs[row+j] = temp; } } } } } }
void bruteKDists(matrix x, matrix q, size_t **NNs, real **D, unint k){ int i, j; float **d; d = (float**)calloc(q.pr, sizeof(*d)); size_t **t; t = (size_t**)calloc(q.pr, sizeof(*t)); for( i=0; i<q.pr; i++){ d[i] = (float*)calloc(x.pr, sizeof(**d)); t[i] = (size_t*)calloc(x.pr, sizeof(**t)); } #pragma omp parallel for private(j) for( i=0; i<q.r; i++){ for( j=0; j<x.r; j++) d[i][j] = distVec( q, x, i, j ); gsl_sort_float_index(t[i], d[i], 1, x.r); for ( j=0; j<k; j++){ NNs[i][j]=t[i][j]; D[i][j]=d[i][t[i][j]]; } } for( i=0; i<q.pr; i++){ free(t[i]); free(d[i]); } free(t); free(d); }
void CMixtureGaussianKernel::InitAdaptive(const CVisDMatrix& vCentralData, int nWithin2Sigma) { m_nCluster = vCentralData.NRows(); m_vData = vCentralData; nWithin2Sigma++; assert((m_nCluster > nWithin2Sigma)&&(nWithin2Sigma>2)); // Estimate sigma double Sigma; { double avgDist = 0; for (int iStep = 0; iStep < nWithin2Sigma; iStep++ ) { CVisDVector distVec(m_nCluster); // Random choose center int iChoose = MyRand() * (m_nCluster - 1); for (int iData = 0; iData < m_nCluster; iData++ ) { CVisDVector tempVec = vCentralData.Row(iChoose); tempVec = tempVec - vCentralData.Row(iData); double dist = sqrt(tempVec * tempVec); distVec[iData] = dist; } // Find the nWidthin2Sigma-th minvalue double fMax = VisDVectorMaxData(distVec); double fMin; for ( int iMin = 0; iMin < nWithin2Sigma; iMin++ ) { int iTemp = VisDVectorMinIndex(distVec); fMin = distVec[iTemp]; distVec[iTemp] = fMax; } avgDist += fMin; } avgDist /= nWithin2Sigma; Sigma = avgDist / 2; } m_nDataDimesion = vCentralData.NCols(); m_vMixingWeight.Resize(m_nCluster); double weight = (double)1.0 / m_nCluster; for (int iCluster = 0; iCluster < m_nCluster ; iCluster++ ) { m_vMixingWeight[iCluster] = weight; } m_vClusterModel.resize(m_nCluster); for (iCluster = 0; iCluster < m_nCluster; iCluster++ ) { m_vClusterModel[iCluster].SetModel(m_nDataDimesion, vCentralData.Row(iCluster), Sigma); } }
void v3color_object::test<19>() { F32 r1 =1.f, g1 = 2.f,b1 = 1.2f, r2 = -2.3f, g2 = 1.11f, b2 = 1234.234f; LLColor3 llcolor3(r1,g1,b1),llcolor3a(r2,g2,b2); F32 val = distVec(llcolor3,llcolor3a); ensure("distVec failed ", is_approx_equal(fsqrtf((r1-r2)*(r1-r2) + (g1-g2)*(g1-g2) + (b1-b2)*(b1-b2)) ,val)); F32 val1 = distVec_squared(llcolor3,llcolor3a); ensure("distVec_squared failed ", is_approx_equal(((r1-r2)*(r1-r2) + (g1-g2)*(g1-g2) + (b1-b2)*(b1-b2)) ,val1)); }
// Exact k-NN search with the RBC. This version works better on computers // with a high core count (say > 4) void searchExactManyCoresK(matrix q, matrix x, matrix r, rep *ri, unint **NNs, real **dNNs, unint K){ unint i, j, k; unint **repID = (unint**)calloc(q.pr, sizeof(*repID)); for(i=0; i<q.pr; i++) repID[i] = (unint*)calloc(K, sizeof(**repID)); real **dToReps = (real**)calloc(q.pr, sizeof(*dToReps)); for(i=0; i<q.pr; i++) dToReps[i] = (real*)calloc(K, sizeof(**dToReps)); intList *toSearch = (intList*)calloc(r.pr, sizeof(*toSearch)); for(i=0;i<r.pr;i++) createList(&toSearch[i]); bruteKHeap(r,q,repID,dToReps,K); #pragma omp parallel for private(j,k) for(i=0; i<r.pr/CL; i++){ unint row = CL*i; real temp[CL]; for(j=0; j<q.r; j++ ){ for(k=0; k<CL; k++){ temp[k] = distVec( q, r, j, row+k ); } for(k=0; k<CL; k++){ //dToRep[j] is current UB on dist to j's NN //temp - ri[i].radius is LB to dist belonging to rep i if( row+k<r.r && 3.0*dToReps[j][K-1] >= temp[k] && dToReps[j][K-1] >= temp[k] - ri[row+k].radius) addToList(&toSearch[row+k], j); //query j needs to search rep } } for(j=0;j<CL;j++){ if(row+j<r.r){ while(toSearch[row+j].len % CL != 0) addToList(&toSearch[row+j],DUMMY_IDX); } } } bruteListK(x,q,ri,toSearch,r.r,NNs,dToReps,K); for(i=0; i<q.r; i++){ for(j=0; j<K; j++) dNNs[i][j]=dToReps[i][j]; } for(i=0;i<q.pr;i++) free(dToReps[i]); free(dToReps); for(i=0;i<r.pr;i++) destroyList(&toSearch[i]); free(toSearch); for(i=0;i<q.pr;i++) free(repID[i]); free(repID); }
void rangeCount(matrix X, matrix Q, real *ranges, unint *counts){ real temp; unint i, j; #pragma omp parallel for private(j,temp) for( i=0; i<Q.r; i++ ){ counts[i] = 0; for(j=0; j<X.r; j++ ){ temp = distVec( Q, X, i, j ); counts[i] += ( temp < ranges[i] ); } } }
// A basic implementation of brute force k-NN search. This does not // use a heap. Instead it computes all distances and then sorts. void bruteK(matrix x, matrix q, unint **NNs, float **dToNNs, unint k) { int i, j, l; int nt = omp_get_max_threads(); float ***d; size_t ***t; d = (float***)calloc(nt, sizeof(*d)); t = (size_t***)calloc(nt, sizeof(*t)); for(i=0; i<nt; i++) { d[i] = (float**)calloc(CL, sizeof(**d)); t[i] = (size_t**)calloc(CL, sizeof(**t)); for(j=0; j<CL; j++) { d[i][j] = (float*)calloc(x.pr, sizeof(***d)); t[i][j] = (size_t*)calloc(x.pr, sizeof(***t)); } } #pragma omp parallel for private(j,l) shared(d,t,k) for( i=0; i<q.pr/CL; i++) { int row = i*CL; int tn = omp_get_thread_num(); //thread num for( j=0; j<x.r; j++) { for( l=0; l<CL; l++) { d[tn][l][j] = distVec( q, x, row+l, j); } } for(l=0; l<CL; l++) gsl_sort_float_smallest_index(t[tn][l], k, d[tn][l], 1, x.r); for(l=0; l<CL; l++) { if(row+l<q.r) { for(j=0; j<k; j++) { NNs[row+l][j] = (unint)t[tn][l][j]; dToNNs[row+l][j] = d[tn][l][t[tn][l][j]]; } } } } for(i=0; i<nt; i++) { for(j=0; j<CL; j++) { free(d[i][j]); free(t[i][j]); } free(d[i]); free(t[i]); } free(t); free(d); }
void brute(matrix X, matrix Q, unint *NNs, real *dToNNs){ real temp; unint i, j; for( i=0; i<Q.r; i++ ){ dToNNs[i] = MAX_REAL; NNs[i] = 0; for(j=0; j<X.r; j++ ){ temp = distVec( Q, X, i, j ); if( temp < dToNNs[i]){ NNs[i] = j; dToNNs[i] = temp; } } } }
// Exact 1-NN search with the RBC. This version works better on computers // with a high core count (say > 4) void searchExactManyCores(matrix q, matrix x, matrix r, rep *ri, unint *NNs, real *dToNNs){ unint i, j, k; unint *repID = (unint*)calloc(q.pr, sizeof(*repID)); real *dToReps = (real*)calloc(q.pr, sizeof(*dToReps)); intList *toSearch = (intList*)calloc(r.pr, sizeof(*toSearch)); for(i=0;i<r.pr;i++) createList(&toSearch[i]); brutePar(r,q,repID,dToReps); #pragma omp parallel for private(j,k) for(i=0; i<r.pr/CL; i++){ unint row = CL*i; real temp[CL]; for(j=0; j<q.r; j++ ){ for(k=0; k<CL; k++){ temp[k] = distVec( q, r, j, row+k ); } for(k=0; k<CL; k++){ //dToRep[j] is current UB on dist to j's NN //temp - ri[i].radius is LB to dist belonging to rep i if( row+k<r.r && dToReps[j] >= temp[k] - ri[row+k].radius && 3.0*dToReps[j] >= temp[k] ) addToList(&toSearch[row+k], j); //need to search rep } } for(j=0;j<CL;j++){ if(row+j<r.r){ while(toSearch[row+j].len % CL != 0) addToList(&toSearch[row+j],DUMMY_IDX); } } } //Most of the time is spent in this method bruteList(x,q,ri,toSearch,r.r,NNs,dToReps); for(i=0; i<q.r; i++) dToNNs[i] = dToReps[i]; for(i=0;i<r.pr;i++) destroyList(&toSearch[i]); free(toSearch); free(repID); free(dToReps); }
// Performs a range count using brute force. void rangeCount(matrix X, matrix Q, float *ranges, unint *counts) { float temp; unint i, j, k; #pragma omp parallel for private(j,k,temp) shared(counts,ranges) for( i=0; i<Q.pr/CL; i++ ) { unint row = i*CL; for( j=0; j<CL; j++) counts[row+j] = 0; for(k=0; k<X.r; k++ ) { for( j=0; j<CL; j++) { temp = distVec( Q, X, row+j, k ); counts[row+j] += ( temp < ranges[row+j] ); } } } }
//This is a very crude implementation without any reordering of q void bruteMap(matrix X, matrix Q, rep *ri, unint* qMap, unint *NNs, real *dToNNs){ real temp; unint i, j; #pragma omp parallel for private(j,temp) for( i=0; i<Q.r; i++ ){ dToNNs[i] = MAX_REAL; NNs[i] = 0; rep rt = ri[qMap[i]]; for(j=0; j<rt.len; j++ ){ temp = distVec( Q, X, i, rt.lr[j] ); if( temp < dToNNs[i]){ NNs[i] = rt.lr[j]; dToNNs[i] = temp; } } } }
// Most basic implementation of parallel brute force search. Not very fast. void brutePar(matrix X, matrix Q, unint *NNs, real *dToNNs){ real temp; int i, j; #pragma omp parallel for private(j,temp) for( i=0; i<Q.r; i++ ){ dToNNs[i] = MAX_REAL; NNs[i] = 0; for(j=0; j<X.r; j++ ){ temp = distVec( Q, X, i, j ); if( temp < dToNNs[i]){ NNs[i] = j; dToNNs[i] = temp; } } } }
void bruteList(matrix X, matrix Q, rep *ri, intList *toSearch, unint numReps, unint *NNs, real *dToNNs){ real temp; unint i, j, k, l; for(i=0; i<Q.r; i++){ dToNNs[i] = MAX_REAL; NNs[i] = 0; } #pragma omp parallel for private(j,k,l,temp) for( i=0; i<numReps; i++ ){ for( j=0; j< toSearch[i].len/CL; j++){ //toSearch is assumed to be padded unint row = j*CL; unint qInd[CL]; for(k=0; k<CL; k++) qInd[k] = toSearch[i].x[row+k]; rep rt = ri[i]; unint curMinInd[CL]; real curMinDist[CL]; for(k=0; k<CL; k++) curMinDist[k] = MAX_REAL; for(k=0; k<rt.len; k++){ for(l=0; l<CL; l++ ){ if(qInd[l]!=DUMMY_IDX){ temp = distVec( Q, X, qInd[l], rt.lr[k] ); if( temp < curMinDist[l] ){ curMinInd[l] = rt.lr[k]; curMinDist[l] = temp; } } } } #pragma omp critical { for(k=0; k<CL; k++){ if( qInd[k]!=DUMMY_IDX && curMinDist[k] < dToNNs[qInd[k]]){ NNs[qInd[k]] = curMinInd[k]; dToNNs[qInd[k]] = curMinDist[k]; } } } } } }
void evalApprox(matrix q, matrix x, unint *NNs){ real *ranges = (real*)calloc(q.pr, sizeof(*ranges)); unint *counts = (unint*)calloc(q.pr,sizeof(*counts)); unint i; for(i=0; i<q.r; i++) ranges[i] = distVec(q,x,i,NNs[i]); rangeCount(x,q,ranges,counts); double avgCount = 0.0; for(i=0; i<q.r; i++) avgCount += ((double)counts[i]); avgCount/=q.r; printf("average num closer = %6.5f \n",avgCount); free(counts); free(ranges); }
void searchExact2(matrix q, matrix x, matrix r, rep *ri, unint *NNs){ unint i, j; unint *repID = (unint*)calloc(q.pr, sizeof(*repID)); real *dToReps = (real*)calloc(q.pr, sizeof(*dToReps)); intList *toSearch = (intList*)calloc(r.pr, sizeof(*toSearch)); for(i=0;i<r.pr;i++) createList(&toSearch[i]); struct timeval tvB,tvE; gettimeofday(&tvB,NULL); brutePar2(r,q,repID,dToReps); gettimeofday(&tvE,NULL); printf("[SE]brutePar2 time elapsed = %6.4f \n", timeDiff(tvB,tvE) ); gettimeofday(&tvB,NULL); #pragma omp parallel for private(j) for(i=0; i<r.r; i++){ for(j=0; j<q.r; j++ ){ real temp = distVec( q, r, j, i ); //dToRep[j] is current UB on dist to j's NN //temp - ri[i].radius is LB to dist belonging to rep i if( dToReps[j] >= temp - ri[i].radius) addToList(&toSearch[i], j); //need to search rep i } while(toSearch[i].len % CL != 0) addToList(&toSearch[i],DUMMY_IDX); } gettimeofday(&tvE,NULL); printf("[SE]loop time elapsed = %6.4f \n", timeDiff(tvB,tvE) ); gettimeofday(&tvB,NULL); bruteListRev(x,q,ri,toSearch,r.r,NNs,dToReps); gettimeofday(&tvE,NULL); printf("[SE]bruteListRev time elapsed = %6.4f \n", timeDiff(tvB,tvE) ); for(i=0;i<r.pr;i++) destroyList(&toSearch[i]); free(toSearch); free(repID); free(dToReps); }
void bruteList(matrix X, matrix Q, rep *ri, intList *toSearch, unint *NNs, real *dToNNs){ real temp; unint i, j, k; #pragma omp parallel for private(j,k,temp) for( i=0; i<Q.r; i++ ){ dToNNs[i] = MAX_REAL; NNs[i] = 0; for( j=0; j<toSearch[i].len; j++ ){ rep rt = ri[ toSearch[i].x[j] ]; for(k=0; k<rt.len; k++ ){ temp = distVec( Q, X, i, rt.lr[k] ); if( temp < dToNNs[i]){ NNs[i] = rt.lr[k]; dToNNs[i] = temp; } } } } }
void bruteCache(matrix X, matrix Q, unint *NNs, real *dToNNs){ real temp[CL]; int i, j, k, t; for( i=0; i<Q.pr/CL; i++ ){ t = i*CL; for(j=0;j<CL;j++){ dToNNs[t+j] = MAX_REAL; NNs[t+j] = 0; } for(j=0; j<X.r; j++ ){ for(k=0; k<CL; k++){ temp[k] = distVec( Q, X, t+k, j ); } for(k=0; k<CL; k++){ if( temp[k] < dToNNs[t+k]){ NNs[t+k] = j; dToNNs[t+k] = temp[k]; } } } } }
// Performs a brute force NN search, but only between queries and points // belonging to each query's nearest representative. This method is used by // the one-shot algorithm. void bruteMap(matrix X, matrix Q, rep *ri, unint* qMap, unint *NNs, float *dToNNs) { unint i, j, k; //Sort the queries, so that queries matched to a particular representative //will be processed together, improving cache performance. size_t *qSort = (size_t*)calloc(Q.pr, sizeof(*qSort)); gsl_sort_uint_index(qSort,qMap,1,Q.r); #pragma omp parallel for private(j,k) schedule(static) for( i=0; i<Q.pr/CL; i++ ) { unint row = i*CL; for(j=0; j<CL; j++) { dToNNs[qSort[row+j]] = MAX_float; } float temp; rep rt[CL]; unint maxLen = 0; for(j=0; j<CL; j++) { rt[j] = ri[qMap[qSort[row+j]]]; maxLen = MAX(maxLen, rt[j].len); } for(k=0; k<maxLen; k++ ) { for(j=0; j<CL; j++ ) { if( k<rt[j].len ) { temp = distVec( Q, X, qSort[row+j], rt[j].lr[k] ); //change to LB if( temp < dToNNs[qSort[row+j]]) { NNs[qSort[row+j]] = rt[j].lr[k]; dToNNs[qSort[row+j]] = temp; } } } } } free(qSort); }
int main(int argc, char *argv[]) { // This little trick lets us print to std::cout only if a (dummy) command-line argument is provided. int iprint = argc - 1; Teuchos::RCP<std::ostream> outStream; Teuchos::oblackholestream bhs; // outputs nothing /*** Initialize communicator. ***/ Teuchos::GlobalMPISession mpiSession (&argc, &argv, &bhs); Teuchos::RCP<const Teuchos::Comm<int> > comm = Tpetra::DefaultPlatform::getDefaultPlatform().getComm(); const int myRank = comm->getRank(); if ((iprint > 0) && (myRank == 0)) { outStream = Teuchos::rcp(&std::cout, false); } else { outStream = Teuchos::rcp(&bhs, false); } int errorFlag = 0; // *** Example body. try { /*** Read in XML input ***/ std::string filename = "input.xml"; Teuchos::RCP<Teuchos::ParameterList> parlist = Teuchos::rcp( new Teuchos::ParameterList() ); Teuchos::updateParametersFromXmlFile( filename, parlist.ptr() ); filename = "SROMinput.xml"; Teuchos::RCP<Teuchos::ParameterList> SROMlist = Teuchos::rcp( new Teuchos::ParameterList() ); Teuchos::updateParametersFromXmlFile( filename, SROMlist.ptr() ); /*** Initialize main data structure. ***/ Teuchos::RCP<StefanBoltzmannData<RealT> > data = Teuchos::rcp(new StefanBoltzmannData<RealT>(comm, parlist, outStream)); /*** Build vectors and dress them up as ROL vectors. ***/ Teuchos::RCP<const Tpetra::Map<> > vecmap_u = data->getMatA()->getDomainMap(); Teuchos::RCP<const Tpetra::Map<> > vecmap_z = data->getMatB()->getDomainMap(); Teuchos::RCP<const Tpetra::Map<> > vecmap_c = data->getMatA()->getRangeMap(); Teuchos::RCP<Tpetra::MultiVector<> > u_rcp = Teuchos::rcp(new Tpetra::MultiVector<>(vecmap_u, 1, true)); Teuchos::RCP<Tpetra::MultiVector<> > z_rcp = Teuchos::rcp(new Tpetra::MultiVector<>(vecmap_z, 1, true)); Teuchos::RCP<Tpetra::MultiVector<> > c_rcp = Teuchos::rcp(new Tpetra::MultiVector<>(vecmap_c, 1, true)); Teuchos::RCP<Tpetra::MultiVector<> > du_rcp = Teuchos::rcp(new Tpetra::MultiVector<>(vecmap_u, 1, true)); Teuchos::RCP<Tpetra::MultiVector<> > dz_rcp = Teuchos::rcp(new Tpetra::MultiVector<>(vecmap_z, 1, true)); Teuchos::RCP<Tpetra::MultiVector<> > Eu_rcp = Teuchos::rcp(new Tpetra::MultiVector<>(vecmap_u, 1, true)); Teuchos::RCP<Tpetra::MultiVector<> > Vu_rcp = Teuchos::rcp(new Tpetra::MultiVector<>(vecmap_u, 1, true)); // Set all values to 1 in u, z and c. u_rcp->putScalar(1.0); z_rcp->putScalar(1.0); c_rcp->putScalar(1.0); // Randomize d vectors. du_rcp->randomize(); dz_rcp->randomize(); // Create ROL::TpetraMultiVectors. Teuchos::RCP<ROL::Vector<RealT> > up = Teuchos::rcp(new ROL::TpetraMultiVector<RealT>(u_rcp)); Teuchos::RCP<ROL::Vector<RealT> > zp = Teuchos::rcp(new ROL::TpetraMultiVector<RealT>(z_rcp)); Teuchos::RCP<ROL::Vector<RealT> > cp = Teuchos::rcp(new ROL::TpetraMultiVector<RealT>(c_rcp)); Teuchos::RCP<ROL::Vector<RealT> > dup = Teuchos::rcp(new ROL::TpetraMultiVector<RealT>(du_rcp)); Teuchos::RCP<ROL::Vector<RealT> > dzp = Teuchos::rcp(new ROL::TpetraMultiVector<RealT>(dz_rcp)); Teuchos::RCP<ROL::Vector<RealT> > Eup = Teuchos::rcp(new ROL::TpetraMultiVector<RealT>(Eu_rcp)); Teuchos::RCP<ROL::Vector<RealT> > Vup = Teuchos::rcp(new ROL::TpetraMultiVector<RealT>(Vu_rcp)); // Create ROL SimOpt vectors. ROL::Vector_SimOpt<RealT> x(up,zp); ROL::Vector_SimOpt<RealT> d(dup,dzp); /*** Build objective function, constraint and reduced objective function. ***/ Teuchos::RCP<ROL::Objective_SimOpt<RealT> > obj = Teuchos::rcp(new Objective_PDEOPT_StefanBoltzmann<RealT>(data, parlist)); Teuchos::RCP<ROL::EqualityConstraint_SimOpt<RealT> > con = Teuchos::rcp(new EqualityConstraint_PDEOPT_StefanBoltzmann<RealT>(data, parlist)); Teuchos::RCP<ROL::Reduced_Objective_SimOpt<RealT> > objReduced = Teuchos::rcp(new ROL::Reduced_Objective_SimOpt<RealT>(obj, con, up, zp, up)); /*** Build stochastic functionality. ***/ int sdim = parlist->sublist("Problem").get("Stochastic Dimension",4); // Build batch manager Teuchos::RCP<ROL::BatchManager<RealT> > bman = Teuchos::rcp(new ROL::BatchManager<RealT>()); // Build sampler std::vector<Teuchos::RCP<ROL::Distribution<RealT> > > distVec(sdim); Teuchos::ParameterList distList; distList.sublist("SOL").sublist("Distribution").set("Name","Uniform"); distList.sublist("SOL").sublist("Distribution").sublist("Uniform").set("Lower Bound",-1.0); distList.sublist("SOL").sublist("Distribution").sublist("Uniform").set("Upper Bound", 1.0); for (int d = 0; d < sdim; d++) { distVec[d] = ROL::DistributionFactory<RealT>(distList); } Teuchos::RCP<ROL::SampleGenerator<RealT> > sampler = Teuchos::rcp(new ROL::SROMGenerator<RealT>(*SROMlist,bman,distVec)); // Build stochastic problem ROL::StochasticProblem<RealT> opt(*parlist,objReduced,sampler,zp); /*** Check functional interface. ***/ std::vector<RealT> par(sdim,1.0); obj->setParameter(par); con->setParameter(par); objReduced->setParameter(par); obj->checkGradient(x,d,true,*outStream); obj->checkHessVec(x,d,true,*outStream); con->checkApplyJacobian(x,d,*up,true,*outStream); con->checkApplyAdjointHessian(x,*dup,d,x,true,*outStream); con->checkAdjointConsistencyJacobian(*dup,d,x,true,*outStream); con->checkInverseJacobian_1(*up,*up,*up,*zp,true,*outStream); con->checkInverseAdjointJacobian_1(*up,*up,*up,*zp,true,*outStream); objReduced->checkGradient(*zp,*dzp,true,*outStream); objReduced->checkHessVec(*zp,*dzp,true,*outStream); opt.checkObjectiveGradient(*dzp,true,*outStream); opt.checkObjectiveHessVec(*dzp,true,*outStream); /*** Solve optimization problem. ***/ ROL::Algorithm<RealT> algo_tr("Trust Region",*parlist,false); zp->zero(); // set zero initial guess algo_tr.run(opt, true, *outStream); *outStream << " Solution Statistic: S(z) = " << opt.getSolutionStatistic() << "\n"; data->outputTpetraVector(z_rcp, "control.txt"); RealT w = 0.0, tol = 1.e-8; ROL::Elementwise::Power<RealT> sqr(2.0); Eup->zero(); Vup->zero(); Teuchos::RCP<ROL::Vector<RealT> > up2 = up->clone(); for (int i = 0; i < sampler->numMySamples(); i++) { // Get samples and weights par = sampler->getMyPoint(i); w = sampler->getMyWeight(i); // Solve state equation at current sample con->setParameter(par); con->solve(*cp,*up,*zp,tol); // Accumulate expected value Eup->axpy(w,*up); // Accumulate variance up2->set(*up); up2->applyUnary(sqr); Vup->axpy(w,*up2); } up2->set(*Eup); up2->applyUnary(sqr); Vup->axpy(-1.0,*up2); if (sampler->numMySamples() > 1) { Vup->scale((RealT)sampler->numMySamples()/(RealT)(sampler->numMySamples()-1)); } data->outputTpetraVector(Eu_rcp, "expected_value_state.txt"); data->outputTpetraVector(Vu_rcp, "variance_state.txt"); } catch (std::logic_error err) { *outStream << err.what() << "\n"; errorFlag = -1000; }; // end try if (errorFlag != 0) std::cout << "End Result: TEST FAILED\n"; else std::cout << "End Result: TEST PASSED\n"; return 0; }
vector<unsigned> Hypergraph::getDistToModification(const vector<int> &modifySet) { int nTotal = _nVertices+_edges.size(); vector<unsigned> distVec (nTotal); distVec.assign(nTotal, 100); for (int i=0; i<modifySet.size(); ++i) { distVec[modifySet[i]] = 0; } int nEdges = _edges.size(); vector<unsigned> pathBuf (nEdges*(nEdges+1)/2); pathBuf.assign(nEdges*(nEdges+1)/2, 100); /*initialization*/ for (int i=0; i<nEdges; ++i) { for (int j=0; j<i; ++j) { HyperEdge v(_edges[i].size()+_edges[j].size()); HyperEdge::iterator vIt = set_intersection(_edges[i].begin(), _edges[i].begin(), _edges[j].begin(), _edges[j].end(), v.begin()); if (vIt!=v.begin()) { pathBuf[i*(i-1)/2+j] = 2; } } for (int j=0; j<modifySet.size(); ++j) { int modifiedIdx = modifySet[j]; HyperEdge::iterator it = find(_edges[i].begin(), _edges[i].end(), modifiedIdx); if (it!=_edges[i].end()) { pathBuf[nEdges*(nEdges-1)/2+i] = 1; break; } } } for (int k=0; k<nEdges+1; ++k) { for (int i=0; i<nEdges+1; ++i) { if (i==k) { continue; } for (int j=0; j<i; ++j) { if (j==k) { continue; } int idxIJ = i*(i-1)/2+j; int idxIK = i>k?i*(i-1)/2+k:k*(k-1)/2+i; int idxJK = j>k?j*(j-1)/2+k:k*(k-1)/2+j; pathBuf[idxIJ] = min(pathBuf[idxIJ], pathBuf[idxIK]+pathBuf[idxJK]); } } } for (int i=0; i<nEdges; ++i) { distVec[i+_nVertices] = pathBuf[nEdges*(nEdges-1)/2+i]; for (int j=0; j<_edges[i].size(); ++j) { distVec[_edges[i][j]] = min(distVec[i+_nVertices]+1, distVec[_edges[i][j]]); } } return distVec; }
int main(int argc, char* argv[]) { Teuchos::GlobalMPISession mpiSession(&argc, &argv); Teuchos::RCP<const Teuchos::Comm<int> > commptr = Teuchos::DefaultComm<int>::getComm(); // This little trick lets us print to std::cout only if a (dummy) command-line argument is provided. int iprint = argc - 1; Teuchos::RCP<std::ostream> outStream; Teuchos::oblackholestream bhs; // outputs nothing if (iprint > 0 && commptr->getRank() == 0) outStream = Teuchos::rcp(&std::cout, false); else outStream = Teuchos::rcp(&bhs, false); int errorFlag = 0; try { /**********************************************************************************************/ /************************* CONSTRUCT SOL COMPONENTS *******************************************/ /**********************************************************************************************/ // Set random seed srand(123456789); // Build samplers size_t dimension = 1; // Initialize distribution Teuchos::RCP<ROL::Distribution<RealT> > dist; std::vector<Teuchos::RCP<ROL::Distribution<RealT> > > distVec(dimension); Teuchos::ParameterList Dlist; Dlist.sublist("SOL").sublist("Distribution").set("Name","Beta"); RealT alpha = 1., beta = 4.; // Fill moment vector and initial guess for (size_t d = 0; d < dimension; d++) { // Build distribution for dimension d alpha++; beta++; Dlist.sublist("SOL").sublist("Distribution").sublist("Beta").set("Shape 1",alpha); Dlist.sublist("SOL").sublist("Distribution").sublist("Beta").set("Shape 2",beta); dist = ROL::DistributionFactory<RealT>(Dlist); distVec[d] = ROL::DistributionFactory<RealT>(Dlist); } // Get ROL parameterlist std::string filename = "input_04.xml"; Teuchos::RCP<Teuchos::ParameterList> parlist = Teuchos::rcp( new Teuchos::ParameterList() ); Teuchos::updateParametersFromXmlFile( filename, parlist.ptr() ); Teuchos::ParameterList &list = parlist->sublist("SOL").sublist("Sample Generator").sublist("SROM"); Teuchos::Array<int> moments = Teuchos::getArrayFromStringParameter<int>(list,"Moments"); size_t numMoments = static_cast<size_t>(moments.size()); std::clock_t timer = std::clock(); Teuchos::RCP<ROL::BatchManager<RealT> > bman = Teuchos::rcp(new ROL::TeuchosBatchManager<RealT,int>(commptr)); Teuchos::RCP<ROL::SampleGenerator<RealT> > sampler = Teuchos::rcp(new ROL::SROMGenerator<RealT>(*parlist,bman,distVec)); *outStream << std::endl << "Sample Time: " << (std::clock()-timer)/(RealT)CLOCKS_PER_SEC << " seconds" << std::endl; RealT val = 0., error = 0., data = 0., sum = 0.; *outStream << std::endl; *outStream << std::scientific << std::setprecision(11); *outStream << std::right << std::setw(20) << "Computed Moment" << std::setw(20) << "True Moment" << std::setw(20) << "Relative Error" << std::endl; for (size_t m = 0; m < numMoments; m++) { for (size_t d = 0; d < dimension; d++) { val = 0.; data = distVec[d]->moment(moments[m]); for (size_t k = 0; k < (size_t)sampler->numMySamples(); k++) { val += sampler->getMyWeight(k)*std::pow((sampler->getMyPoint(k))[d],moments[m]); } bman->sumAll(&val,&sum,1); error = std::abs(sum-data)/std::abs(data); if ( error > 1.e-1 ) { errorFlag++; } *outStream << std::right << std::setw(20) << sum << std::setw(20) << data << std::setw(20) << error << std::endl; } } *outStream << std::endl; // std::ofstream file; // std::stringstream name; // name << "samples." << commptr->getRank() << ".txt"; // file.open(name.str().c_str()); // for (size_t k = 0; k < (size_t)sampler->numMySamples(); k++) { // for (size_t d = 0; d < dimension; d++) { // file << std::setprecision(std::numeric_limits<RealT>::digits10) // << std::scientific // << (sampler->getMyPoint(k))[d]; // file << " "; // } // file << std::setprecision(std::numeric_limits<RealT>::digits10) // << std::scientific // << sampler->getMyWeight(k) << std::endl; // } // file.close(); // commptr->barrier(); } catch (std::logic_error err) { *outStream << err.what() << "\n"; errorFlag = -1000; }; // end try if (errorFlag != 0) std::cout << "End Result: TEST FAILED\n"; else std::cout << "End Result: TEST PASSED\n"; return 0; }
//Exact 1-NN search with the RBC. void searchExact(matrix q, matrix x, matrix r, rep *ri, unint *NNs, real *dToNNs){ unint i, j, k; unint *repID = (unint*)calloc(q.pr, sizeof(*repID)); real *dToReps = (real*)calloc(q.pr, sizeof(*dToReps)); intList *toSearch = (intList*)calloc(r.pr, sizeof(*toSearch)); for(i=0;i<r.pr;i++) createList(&toSearch[i]); int nt = omp_get_max_threads(); float ***d; //d is indexed by: thread, cache line #, rep # d = (float***)calloc(nt, sizeof(*d)); for(i=0; i<nt; i++){ d[i] = (float**)calloc(CL, sizeof(**d)); for(j=0; j<CL; j++){ d[i][j] = (float*)calloc(r.pr, sizeof(***d)); } } #pragma omp parallel for private(j,k) //schedule(dynamic) for(i=0; i<q.pr/CL; i++){ unint row = i*CL; unint tn = omp_get_thread_num(); unint minID[CL]; real minDist[CL]; for(j=0;j<CL;j++) minDist[j] = MAX_REAL; for( j=0; j<r.r; j++ ){ for(k=0; k<CL; k++){ d[tn][k][j] = distVec(q, r, row+k, j); if(d[tn][k][j] < minDist[k]){ minDist[k] = d[tn][k][j]; //gamma minID[k] = j; } } } for( j=0; j<CL; j++ ) dToReps[row+j] = minDist[j]; for(j=0; j<r.r; j++ ){ for(k=0; k<CL; k++ ){ real temp = d[tn][k][j]; if( row + k<q.r && minDist[k] >= temp - ri[j].radius && 3.0*minDist[k] >= temp ){ #pragma omp critical { addToList(&toSearch[j], row+k); } } } } } for(i=0; i<r.r; i++){ while(toSearch[i].len % CL != 0) addToList(&toSearch[i],DUMMY_IDX); } bruteList(x,q,ri,toSearch,r.r,NNs,dToReps); for(i=0; i<q.r; i++) dToNNs[i] = dToReps[i]; for(i=0;i<r.pr;i++) destroyList(&toSearch[i]); free(toSearch); free(repID); free(dToReps); for(i=0; i<nt; i++){ for(j=0; j<CL; j++) free(d[i][j]); free(d[i]); } free(d); }
//Exact k-NN search with the RBC void searchExactK(matrix q, matrix x, matrix r, rep *ri, unint **NNs, real **dNNs, unint K){ unint i, j, k; unint *repID = (unint*)calloc(q.pr, sizeof(*repID)); real **dToReps = (real**)calloc(q.pr, sizeof(*dToReps)); for(i=0; i<q.pr; i++) dToReps[i] = (real*)calloc(K, sizeof(**dToReps)); intList *toSearch = (intList*)calloc(r.pr, sizeof(*toSearch)); for(i=0;i<r.pr;i++) createList(&toSearch[i]); int nt = omp_get_max_threads(); float ***d; //d is indexed by: thread, cache line #, rep # d = (float***)calloc(nt, sizeof(*d)); for(i=0; i<nt; i++){ d[i] = (float**)calloc(CL, sizeof(**d)); for(j=0; j<CL; j++){ d[i][j] = (float*)calloc(r.pr, sizeof(***d)); } } heap **hp; hp = (heap**)calloc(nt, sizeof(*hp)); for(i=0; i<nt; i++){ hp[i] = (heap*)calloc(CL, sizeof(**hp)); for(j=0; j<CL; j++) createHeap(&hp[i][j],K); } #pragma omp parallel for private(j,k) for(i=0; i<q.pr/CL; i++){ unint row = i*CL; unint tn = omp_get_thread_num(); heapEl newEl; for( j=0; j<r.r; j++ ){ for(k=0; k<CL; k++){ d[tn][k][j] = distVec(q, r, row+k, j); if( d[tn][k][j] < hp[tn][k].h[0].val ){ newEl.id = j; newEl.val = d[tn][k][j]; replaceMax( &hp[tn][k], newEl ); } } } for(j=0; j<r.r; j++ ){ for(k=0; k<CL; k++ ){ real minDist = hp[tn][k].h[0].val; real temp = d[tn][k][j]; if( row + k<q.r && minDist >= temp - ri[j].radius && 3.0*minDist >= temp ){ #pragma omp critical { addToList(&toSearch[j], row+k); } } } } for(j=0; j<CL; j++) reInitHeap(&hp[tn][j]); } for(i=0; i<r.r; i++){ while(toSearch[i].len % CL != 0) addToList(&toSearch[i],DUMMY_IDX); } bruteListK(x,q,ri,toSearch,r.r,NNs,dNNs,K); //clean-up for(i=0; i<nt; i++){ for(j=0; j<CL; j++) destroyHeap(&hp[i][j]); free(hp[i]); } free(hp); for(i=0;i<r.pr;i++) destroyList(&toSearch[i]); free(toSearch); free(repID); for(i=0;i<q.pr; i++) free(dToReps[i]); free(dToReps); for(i=0; i<nt; i++){ for(j=0; j<CL; j++) free(d[i][j]); free(d[i]); } free(d); }