int main(int argc, char **argv) { std::string varPathStr; std::string varNameStr1; std::string varNameStr2; std::string varNameStr3; parseArgs(argc, argv); std::vector<double> beginList; std::vector<double> endList; std::vector<double> strideList; beginList.resize(dimension); endList.resize(dimension); strideList.resize(dimension); if (datafile.empty() || condstring == 0 || dimension == 0 ) { std::cerr << "Usage:\n" << *argv << " -f data-file-name" << " -q query-conditions-in-a-single-string" << " -x histogram-dimension" << " -y begin" << " -e end" << " -s stride" << " [-i index-file-name]" << " [-g log-file-name]" << " [-n name-of-variable]" << " [-p path-of-variable]" << " [-m file model [HDF5(default), H5PART, NETCDF, PNETCDF]" << " [-b use-boundingbox-data-selection]" << " [-v verboseness]" << " [-l mpi-subarray-length]" //<< "\n e.g: ./histogram -f h5uc-data-index.h5 -q 'px < 0.3' -n y -p TimeStep2 -x 1\n" << "\n e.g: ./histogram -f h5uc-data.h5 -i indexfile -q 'px<0.3 && py>0' -x 2 -n py,pz;" << " -y '0,-0.5;' -s '0.1,0.02;' -e '1,0;' -p TimeStep2\n\n" << "\tFor More detailed usage description and examples, please see file GUIDE" << std::endl; return -1; } #ifndef FQ_NOMPI MPI_Init(&argc, &argv); int mpi_size, mpi_rank; MPI_Comm_size(MPI_COMM_WORLD, &mpi_size); MPI_Comm_rank(MPI_COMM_WORLD, &mpi_rank); #endif ibis::gParameters().add(FQ_REPORT_STATISTIC, "true"); ibis::horometer totTimer; totTimer.start(); FQ::FileFormat model = FQ::FQ_HDF5; if (fileModel != 0) { std::string format = fileModel; if (format.compare("HDF5") == 0) { model = FQ::FQ_HDF5; } else if (format.compare("H5PART") == 0) { model = FQ::FQ_H5Part; } else if (format.compare("NETCDF") == 0) { model = FQ::FQ_NetCDF; } else if (format.compare("PNETCDF") == 0) { model = FQ::FQ_pnetCDF; } } if (! indexfile.empty()) { if (verboseness > 1) std::cout << "DEBUG: using indexfile \"" << indexfile.c_str() << "\" ... \n"; } if (varPath != 0) { if (verboseness > 1) std::cout << "Debug: use variable path \"" << varPath << "\"\n"; varPathStr = varPath; } // std::cout << "varName:" << varName << " begin:" << begin << " end:" << end << " stride:" << stride << std::endl; varName1 = strtok(varName, ",;"); if (dimension>1) varName2 = strtok(NULL, ",;"); if (dimension>2) varName3 = strtok(NULL, ",;"); begin1 = atof(strtok(begin, ",;")); if (dimension>1) begin2 = atof(strtok(NULL, ",;")); if (dimension>2) begin3 = atof(strtok(NULL, ",;")); end1 = atof(strtok(end, ",;")); if (dimension>1) end2 = atof(strtok(NULL, ",;")); if (dimension>2) end3 = atof(strtok(NULL, ",;")); stride1 = atof(strtok(stride, ",;")); if (dimension>1) stride2 = atof(strtok(NULL, ",;")); if (dimension>2) stride3 = atof(strtok(NULL, ",;")); //std::cout << "varName is " << varName1 << ", " << varName2 << ", " << varName3 << std::endl; //std::cout << "begin is " << begin1 << ", " << begin2 << ", " << begin3 << std::endl; //std::cout << "end is " << end1 << ", " << end2 << ", " << end3 << std::endl; //std::cout << "stride is " << stride1 << ", " << stride2 << ", " << stride3 << std::endl; if (varName1!=0) varNameStr1 = varName1; if (dimension>1 && varName2!=0) varNameStr2 = varName2; if (dimension>2 && varName3!=0) varNameStr3 = varName3; /* if (mpi_rank==0) { unsigned int dims1 = static_cast<uint32_t>(1+floor((end1-begin1)/stride1)); unsigned int dims2 = static_cast<uint32_t>(1+floor((end2-begin2)/stride2)); unsigned int dims3 = static_cast<uint32_t>(1+floor((end3-begin3)/stride3)); std::cout << "dims1 * dims2 * dims3 = " << dims1 << " * " << dims2 << " * " << dims3 << std::endl; } */ if (logfile.str().empty() != true) { #ifndef FQ_NOMPI logfile << mpi_rank << ".log"; #endif if (verboseness > 1) std::cout << "Debug: using logfile \"" << logfile.str().c_str() << "\"\n"; } if (verboseness >1) { std::cout << "open the file handler" << std::endl; } // open the named file QueryProcessor* queryProcessor = new QueryProcessor(datafile, model, indexfile, verboseness, "", logfile.str().c_str()); // the file handler if (queryProcessor->isValid() == false) { if (verboseness > 0) { std::cout << "ERROR: failed to initiate the QueryProcessor object for file \"" << datafile.c_str() << "\" ...\n"; std::cout << "REPORT: failed to complete processing query" << std::endl; } delete(queryProcessor); #ifndef FQ_NOMPI MPI_Finalize(); #endif return -1; } uint64_t hits = 0; // getNumHits ibis::horometer timer; timer.start(); hits = queryProcessor->getNumHits(condstring, varPathStr, mpi_dim, mpi_len); timer.stop(); if (verboseness > 1) std::cout << "Debug: conditions \"" << condstring << "\" number of hits " << hits << std::endl;; if (hits == 0) { if (verboseness > 1) { std::cout << "Warning -- No element is seleteced ==>" << " the rest of the test is skipped!" << std::endl; } if (verboseness > 0) { #ifndef FQ_NOMPI if (mpi_rank==0) { #endif std::cout << "REPORT: successfully completed processing query with " << hits << " hits" << std::endl; #ifndef FQ_NOMPI } #endif } delete(queryProcessor); #ifndef FQ_NOMPI MPI_Finalize(); #endif return hits; } // executeQuery std::vector<uint64_t> coords; std::vector<uint32_t> counts; bool herr = true; // if (mpi_rank==0) std::cout<<"histogram starting..."<<std::endl; if (varPath != 0) { //coords.reserve(hits*dims.size()); // hits1 = queryProcessor->executeQuery((char*)condstring, coords, varPathStr, FQ::POINTS_SELECTION, mpi_dim, mpi_len); if (dimension==1) { //counts.assign(static_cast<uint32_t>(1+floor(end1-begin1)/stride1), 0); herr = queryProcessor->get1DHistogram ((char*) condstring, varNameStr1, varPathStr, begin1, end1, stride1, counts, mpi_dim, mpi_len); } else if (dimension==2) { // if (mpi_rank==0) std::cout << "in 2Dhistogram" << std::endl; //counts.assign(static_cast<uint32_t>(1+floor(end1-begin1)/stride1)* // static_cast<uint32_t>(1+floor(end2-begin2)/stride2), 0); herr = queryProcessor->get2DHistogram ((char*) condstring, varPathStr, varNameStr1, begin1, end1, stride1, varNameStr2, begin2, end2, stride2, counts, mpi_dim, mpi_len); // if (mpi_rank==0) std::cout << "out 2Dhistogram" << std::endl; } else if (dimension==3) { herr = queryProcessor->get3DHistogram ((char*) condstring, varPathStr, varNameStr1, begin1, end1, stride1, varNameStr2, begin2, end2, stride2, varNameStr3, begin3, end3, stride3, counts, mpi_dim, mpi_len); } if (! herr) { LOGGER(ibis::gVerbose >= 0) << *argv << " failed to compute the histogram"; return -2; } /************************/ /* verify part */ /************************/ if (verification) { //verify the Histogram //if (mpi_rank==0) std::cout << "starting verify the histogram..." << std::endl; uint64_t len = 1; if (len) { //std::cout<<"Warning: May use too large memory. Can only check sum.\n"; } else { /* double data[len]; // std::vector<uint32_t> temp_counts; temp_counts.assign(static_cast<uint32_t>(1+floor(end-begin)/stride), 0); bool verr = true; //std::cout << "starting getData...." << std::endl; verr = queryProcessor->getData(varNameStr, &data[0], varPathStr); #ifndef FQ_NOMPI if (mpi_rank==0) { #endif if (len<=1000000) { //std::cout << "getData success" << std::endl; std::cout << "temp Histogram" << std::endl; // copy from fasbit parth.cpp get1DHistogram if (len != 0) { for (uint32_t i = 0; i < len; ++ i) { ++ temp_counts[static_cast<uint32_t>((data[i] - begin) / stride)]; } } // std::cout << "temp Histogram" << std::endl; std::cout << "temp_counts.size is "<< temp_counts.size() << std::endl; for (int i=0; i<temp_counts.size(); i++) { std::cout << "[" << begin+i*stride << ", " << begin+(i+1)*stride << "]:\t" << temp_counts[i] << std::endl; } std::cout << "test Histogram" << std::endl; std::cout << "counts.size is "<< counts.size() << std::endl; for (int i=0; i<counts.size(); i++) { std::cout << "[" << begin+i*stride << ", " << begin+(i+1)*stride << "]:\t" << counts[i] << std::endl; } // verify two histogram vectors if (counts!=temp_counts) { std::cout << "ERROR:Vector is not match.Histogram fail." << std::endl; } else { std::cout << "histogram success" << std::endl; } } #ifndef FQ_NOMPI } #endif */ } //unsigned int hits = 0 ; //hits = queryProcessor->getNumHits(condstring, varPathStr, mpi_dim, mpi_len); uint64_t hits1 = 0; for (int i=0; i<counts.size(); i++) { hits1 += counts[i]; } if (hits1 != hits) { std::cout<<"Error:\tcheck sum failed. Num of Hit is " << hits << ",and histogram number is " << hits1<<std::endl; } else std::cout<<"verification result is correct.\n"; } std::fstream histogramFile; #ifndef FQ_NOMPI if (mpi_rank==0) { #endif if (dimension==1) { std::fstream file; //char fileName[100]=""; //char path[]="/global/homes/v/vidcina/fq/example/"; //fileName<<dimension<<"D"<<"histogram["<<begin1<<":"<<stride1<<":"<<end1<<"].out"; //sprintf(fileName, "%s%d%s%d%s%d%s%d%s", path, dimension, "Dhistogram[", begin1, ":", stride1, ":", end1, "].out"); //std::string temp=""; //temp.push_back(fileName.str()); std::ostringstream fileName; fileName << hist_path << "_"<< dimension << "D" << "histogram[" << begin1 << ":" << stride1 << ":" <<end1 << "].out"; std::string str = fileName.str(); const char* chr = str.c_str(); file.open(chr, std::ios::out); if ( file.fail() ) { std::cout << str << std::endl; std::cout << "openFile fail" << std::endl; } else { for (int i=0; i<counts.size(); i++) { file << begin1+i*stride1 << "\t" << begin1+(i+1)*stride1 << "\t" << counts[i] << std::endl; } } //histogramFile.close(); } else if (dimension==2) { std::cout << "2DHistogram " << "Variable1 "<< varName1 << " begin " << begin1 << " to " << end1 <<" stride is " << stride1 << "Variable2 "<< varName2 << " begin " << begin2 << " to " << end2 <<" stride is " << stride2 << std::endl ; std::cout << "counts.size is "<< counts.size() << std::endl; unsigned int imax = static_cast<uint32_t>(1+floor((end1-begin1)/stride1)); unsigned int jmax = static_cast<uint32_t>(1+floor((end2-begin2)/stride2)); for (unsigned int i=0; i<imax; i++) { for (unsigned int j=0; j<jmax; j++) { std::cout << "[" << begin1+i*stride1 << ", " << begin1+(i+1)*stride1 << "), [" << begin2+j*stride2 << ", " << begin2+(j+1)*stride2 << "):\t" << counts[i*jmax+j] << std::endl; } } } else if (dimension==3) { std::cout << "3DHistogram " << "Variable1 "<< varName1 << " begin " << begin1 << " to " << end1 << " stride is " << stride1 << "Variable2 "<< varName2 << " begin " << begin2 << " to " << end2 << " stride is " << stride2 << "Variable3 "<< varName3 << " begin " << begin3 << " to " << end2 << " stride is " << stride3 << std::endl ; std::cout << "counts.size is "<< counts.size() << std::endl; unsigned int imax = static_cast<uint32_t> (1+floor((end1-begin1)/stride1)); unsigned int jmax = static_cast<uint32_t> (1+floor((end2-begin2)/stride2)); unsigned int kmax = static_cast<uint32_t> (1+floor((end3-begin3)/stride3)); #ifndef FQ_NOMPI if (mpi_rank==0 && imax*jmax*kmax!=counts.size()) { std::cout<<"ERROR: counts.size not match."<<std::endl; delete(queryProcessor); MPI_Finalize(); return 0; } #endif for (unsigned int i=0; i<imax; i++) { for (unsigned int j=0; j<jmax; j++) { for (unsigned int k=0; k<kmax; k++) { if (easyToShow) { if (counts[i*jmax*kmax + j*kmax + k]!=0) { std::cout << "[" << begin1+i*stride1 << ", " << begin1+(i+1)*stride1 << "), [" << begin2+j*stride2 << ", " << begin2+(j+1)*stride2 << "), [" << begin3+k*stride3 << ", " << begin3+(k+1)*stride3 << "):\t" << counts[i*jmax*kmax + j*kmax + k] << std::endl; } } else { std::cout << "[" << begin1+i*stride1 << ", " << begin1+(i+1)*stride1 << "), [" << begin2+j*stride2 << ", " << begin2+(j+1)*stride2 << "), [" << begin3+k*stride3 << ", " << begin3+(k+1)*stride3 << "):\t" << counts[i*jmax*kmax + j*kmax + k] << std::endl; } } } } std::cout << "successfuly printed histogram" << std::endl; } #ifndef FQ_NOMPI } #endif // }//end else }//end if(!varPath) // MPI_Barrier(MPI_COMM_WORLD); /* if (hits != hits1) { std::cout << "Error -- number of hits does not match!" << std::endl; std::cout << "REPORT: failed to complete processing query" << std::endl; delete(queryProcessor); #ifndef FQ_NOMPI MPI_Finalize(); #endif return -1; } */ if (verboseness > 0) { #ifndef FQ_NOMPI if (mpi_rank==0) { #endif std::cout << "REPORT: successfully completed get1DHistogram with " << counts.size() << " histogram size" << std::endl; #ifndef FQ_NOMPI } #endif } delete(queryProcessor); #ifndef FQ_NOMPI MPI_Finalize(); #endif totTimer.stop(); LOGGER(FastQuery::reportTiming()) << "Statistic\thistogram::totTimer\t" << totTimer.CPUTime() << "\t" << totTimer.realTime() << "\t"; return hits; } // main
int main(int argc, char **argv) { FILE *output; ibis::horometer timer1,timer2; timer1.start(); timer2.start(); int CHOP=1; const rlim_t STACK_SIZE = 1000*1024*1024; struct rlimit rl; rl.rlim_cur = STACK_SIZE; int ret = setrlimit(RLIMIT_STACK,&rl); map<int, string> Calls; Calls[0]="AA"; Calls[1]="AT"; Calls[2]="AC"; Calls[3]="AG"; Calls[4]="AN"; Calls[5]="TA"; Calls[6]="TT"; Calls[7]="TC"; Calls[8]="TG"; Calls[9]="TN"; Calls[10]="CA"; Calls[11]="CT"; Calls[12]="CC"; Calls[13]="CG"; Calls[14]="CN"; Calls[15]="GA"; Calls[16]="GT"; Calls[17]="GC"; Calls[18]="GG"; Calls[19]="GN"; Calls[20]="NA"; Calls[21]="NT"; Calls[22]="NC"; Calls[23]="NG"; Calls[24]="NN"; Calls[25]="--"; parseArgs(argc, argv); if(datafile.empty() || pos.empty() || varName.empty() || varPath.empty() || outfile.empty()){ std::cerr << "Usage:\n" << *argv << " -f data-file-name" << " -n variable-name" << " -p variable-path" << " -r ref&row indices ('x|y:z' where x=ref sample,x:y as row range) " << " -r snp bounds (x:y, where x=start, y=end)" << " -d variable-dimension (e.g. 2:2)" << " -o output-file" << std::endl; } #ifndef FQ_NOMPI MPI_Init(&argc, &argv); MPI_Comm_size(MPI_COMM_WORLD, &mpi_size); MPI_Comm_rank(MPI_COMM_WORLD, &mpi_rank); #endif FQ::FileFormat model = FQ::FQ_HDF5; bool berr = true; QueryProcessor* queryProcessor = new QueryProcessor(datafile, model, "", 0, "",""); if (queryProcessor->isValid() == false) { printf("ERROR: Failed to initiate query processor for file.\n"); berr = false; } string variable; vector<uint64_t> dims; FQ::DataType type; if (! queryProcessor->getVariableInfo(varName, variable, dims, &type, varPath)) { printf("ERROR: Failed to get the information for variable\n"); berr = false; } else { if(dims.size()!=2){ /*dims is derived from the data*/ printf("ERROR: The data has an invalid dimension. SNP data should be in 2D matrix only.\n"); berr=false; } string str,param; vector<uint64_t> sample; vector<uint64_t> row; //snps int prePos = 0, idx = 0,blocksize=1; FastQuery* fq = new FastQuery(datafile, model, "", 0, "",""); int *refdata=NULL, *data=NULL, *consensus=NULL; ostringstream paramtemp,ref; /*Get the index for reference sample and subrows*/ idx = pos.find('|',prePos); if(idx!=pos.npos && idx!=pos.length()-1){ str = pos.substr(prePos,idx - prePos); sample.push_back(atoi(str.c_str())); /*get the ref. sample index*/ prePos=idx+1; idx=pos.find(':',prePos); if(idx!=pos.npos){ /*indicates multiple comparison*/ if(idx==pos.length()-1){ /*string ends with ':'*/ printf("ERROR: Incomplete indices specified for sample comparison.\n"); return 0; } str = pos.substr(prePos,idx - prePos); sample.push_back(atoi(str.c_str())); /*get the start index*/ prePos=idx+1; str = pos.substr(prePos,pos.length() - prePos); sample.push_back(atoi(str.c_str())); /*get the end index*/ if(sample[2]<0 || sample[2]>=dims[1] || sample[1]>=sample[2]){ printf("ERROR: Indices out of bounds/invalid range.\n"); return 0; } }else{ str = pos.substr(prePos,idx - prePos); sample.push_back(atoi(str.c_str())); /*get the index of another sample*/ } if(sample[0]<0 || sample[0]>=dims[1] || sample[1]<0 || sample[1]>=dims[1]){ printf("ERROR: Indices out of bounds.\n"); return 0; } }else{ printf("ERROR: Invalid indices specified for sample comparison.\n"); return 0; } /*Get the SNP bounds for subset sample*/ if(!snpbound.empty()){ idx=snpbound.find(':',0); if(idx==snpbound.npos || idx==snpbound.length()-1){ printf("ERROR: Invalid SNP bounds."); return 1; } str = snpbound.substr(0,idx); row.push_back(atoi(str.c_str())); str= snpbound.substr(idx+1,snpbound.length()-idx+1); row.push_back(atoi(str.c_str())); if(row[0]<0 || row[1]>=dims[0] || row[1]<0 || row[0]>=row[1]){ printf("ERROR: Invalid SNP bounds.\n"); return 0; } dims[0]=row[1]-row[0]+1; /*dims is now the SNP bounds for subregion*/ } if(sample.size()==3){ blocksize=sample[2]-sample[1]+1; /*block of data*/ } if((dims[0]+(dims[0]*blocksize)+dims[0])*sizeof(int)>(1000*1024*1024)){ //printf("Error: Insufficient memory to handle huge block.\nREPORT: Failed to complete comparing data.\n"); //return 1; printf("\nData is greater than the available/alloted memory space.\n"); CHOP=10; } printf("Running with %d thread/s.\n",NUMTHREADS); int rowchunk=dims[0]/CHOP, offset; /*CHOP is 1 if data fits in memory*/ float com_time=0; ostringstream outtext; pthread_t threads[NUMTHREADS]; threadData *thread_data = (threadData*)malloc(NUMTHREADS*sizeof(threadData)); refdata=(int*)malloc(rowchunk*sizeof(int)); data=(int*)malloc((rowchunk*blocksize)*sizeof(int)); consensus=(int*)calloc(rowchunk,sizeof(int)); output=fopen(outfile.c_str(),"w"); /*Print output header*/ if(sample.size()==3) fprintf(output,"Reference:%lu\nBlock samples:%lu-%lu\n",sample[0],sample[1],sample[2]); else fprintf(output,"Reference:%lu\nSample:%lu\n",sample[0],sample[1]); fprintf(output,"SNPIdx\tRef\t"); for(int i=sample[1];i<sample[1]+blocksize;i++){ fprintf(output,"%d\t",i); } fprintf(output,"\n"); if(!snpbound.empty()){ /*set start position if bounded*/ offset=(int)row[0]; } int rem=0; if(dims[0]%CHOP!=0){ rem = dims[0]%CHOP; CHOP++; /*another chunk for the remainder*/ } for(int h=0;h<CHOP;h++){ if(h+1==CHOP && rem!=0){ rowchunk=rem; free(data); free(refdata); free(consensus); data=(int*)malloc((rowchunk*blocksize)*sizeof(int)); refdata=(int*)malloc(rowchunk*sizeof(int)); consensus=(int*)calloc(rowchunk,sizeof(int)); printf("Remainder chunk.%d\n",rowchunk); } if(!snpbound.empty()){ ref << variable << "[" << offset <<":"<< offset+rowchunk << "," << sample[0] << "]"; if(sample.size()==3) paramtemp << variable << "[" << offset <<":"<< offset+rowchunk << "," << sample[1] << ":" << sample[2]+1 <<"]"; else paramtemp << variable << "[" << offset <<":"<< offset+rowchunk << "," << sample[1] << "]"; }else{ ref << variable << "[:," << sample[0] << "]"; if(sample.size()==3) paramtemp << variable << "[:," << sample[1] << ":" << sample[2]+1 << "]"; else paramtemp << variable << "[:," << sample[1] << "]"; } param = ref.str(); fq->getData(param,refdata); param = paramtemp.str(); cout << h << "Ref:"<< ref.str() <<"\tParam:"<<paramtemp.str()<<"\n\n"; fq->getData(param,data); /*param=var[:,0:2]*/ /*printf("Reference Row:\n"); for(int i=0; i<rowchunk;i++) printf("%d ",refdata[i]); printf("\n\n"); printf("Comparison Row Block:\n"); for(int x=0;x<(blocksize*rowchunk);x++){ printf("%d ",data[x]); if((x+1)%blocksize==0) printf("\n\n"); }*/ /*THREADING of the comparison*/ for(int i=0;i<NUMTHREADS; i++){ thread_data[i].refdata = refdata; thread_data[i].data = data; thread_data[i].consensus=consensus; thread_data[i].snpcount = rowchunk; thread_data[i].blocksize = blocksize; thread_data[i].tid = i; thread_data[i].tcount = NUMTHREADS; pthread_create(&threads[i],NULL,compareSample, (void*) &thread_data[i]); } for(int i=0;i<NUMTHREADS;i++){ pthread_join(threads[i],NULL); } /*RESULT printing*/ timer1.stop(); for(int i=0;i<rowchunk;i++){ if(consensus[i]==1){ outtext << i+offset << "\t" << Calls[refdata[i]] << "\t"; for(int x=0;x<blocksize;x++){ outtext << Calls[data[i*blocksize+x]] << "\t"; } fprintf(output,"%s\n",outtext.str().c_str()); outtext.str(""); } } offset+=rowchunk; paramtemp.str(""); ref.str(""); outtext.str(""); timer1.resume(); } timer1.stop(); printf("Comparison Time:%f\n", timer1.realTime()); free(data); free(refdata); free(consensus); free(thread_data); fclose(output); } delete(queryProcessor); #ifndef FQ_NOMPI MPI_Finalize(); #endif timer2.stop(); if (berr) { printf("REPORT: Successfully completed comparing data.\n Total time elapsed:%f\n", timer2.realTime()); return 0; } else { printf("REPORT: Failed to complete comparing data.\n"); return -1; } }