int main(int argc, char **argv) { FILE *output; ibis::horometer timer1,timer2; timer1.start(); timer2.start(); int CHOP=1; const rlim_t STACK_SIZE = 1000*1024*1024; struct rlimit rl; rl.rlim_cur = STACK_SIZE; int ret = setrlimit(RLIMIT_STACK,&rl); map<int, string> Calls; Calls[0]="AA"; Calls[1]="AT"; Calls[2]="AC"; Calls[3]="AG"; Calls[4]="AN"; Calls[5]="TA"; Calls[6]="TT"; Calls[7]="TC"; Calls[8]="TG"; Calls[9]="TN"; Calls[10]="CA"; Calls[11]="CT"; Calls[12]="CC"; Calls[13]="CG"; Calls[14]="CN"; Calls[15]="GA"; Calls[16]="GT"; Calls[17]="GC"; Calls[18]="GG"; Calls[19]="GN"; Calls[20]="NA"; Calls[21]="NT"; Calls[22]="NC"; Calls[23]="NG"; Calls[24]="NN"; Calls[25]="--"; parseArgs(argc, argv); if(datafile.empty() || pos.empty() || varName.empty() || varPath.empty() || outfile.empty()){ std::cerr << "Usage:\n" << *argv << " -f data-file-name" << " -n variable-name" << " -p variable-path" << " -r ref&row indices ('x|y:z' where x=ref sample,x:y as row range) " << " -r snp bounds (x:y, where x=start, y=end)" << " -d variable-dimension (e.g. 2:2)" << " -o output-file" << std::endl; } #ifndef FQ_NOMPI MPI_Init(&argc, &argv); MPI_Comm_size(MPI_COMM_WORLD, &mpi_size); MPI_Comm_rank(MPI_COMM_WORLD, &mpi_rank); #endif FQ::FileFormat model = FQ::FQ_HDF5; bool berr = true; QueryProcessor* queryProcessor = new QueryProcessor(datafile, model, "", 0, "",""); if (queryProcessor->isValid() == false) { printf("ERROR: Failed to initiate query processor for file.\n"); berr = false; } string variable; vector<uint64_t> dims; FQ::DataType type; if (! queryProcessor->getVariableInfo(varName, variable, dims, &type, varPath)) { printf("ERROR: Failed to get the information for variable\n"); berr = false; } else { if(dims.size()!=2){ /*dims is derived from the data*/ printf("ERROR: The data has an invalid dimension. SNP data should be in 2D matrix only.\n"); berr=false; } string str,param; vector<uint64_t> sample; vector<uint64_t> row; //snps int prePos = 0, idx = 0,blocksize=1; FastQuery* fq = new FastQuery(datafile, model, "", 0, "",""); int *refdata=NULL, *data=NULL, *consensus=NULL; ostringstream paramtemp,ref; /*Get the index for reference sample and subrows*/ idx = pos.find('|',prePos); if(idx!=pos.npos && idx!=pos.length()-1){ str = pos.substr(prePos,idx - prePos); sample.push_back(atoi(str.c_str())); /*get the ref. sample index*/ prePos=idx+1; idx=pos.find(':',prePos); if(idx!=pos.npos){ /*indicates multiple comparison*/ if(idx==pos.length()-1){ /*string ends with ':'*/ printf("ERROR: Incomplete indices specified for sample comparison.\n"); return 0; } str = pos.substr(prePos,idx - prePos); sample.push_back(atoi(str.c_str())); /*get the start index*/ prePos=idx+1; str = pos.substr(prePos,pos.length() - prePos); sample.push_back(atoi(str.c_str())); /*get the end index*/ if(sample[2]<0 || sample[2]>=dims[1] || sample[1]>=sample[2]){ printf("ERROR: Indices out of bounds/invalid range.\n"); return 0; } }else{ str = pos.substr(prePos,idx - prePos); sample.push_back(atoi(str.c_str())); /*get the index of another sample*/ } if(sample[0]<0 || sample[0]>=dims[1] || sample[1]<0 || sample[1]>=dims[1]){ printf("ERROR: Indices out of bounds.\n"); return 0; } }else{ printf("ERROR: Invalid indices specified for sample comparison.\n"); return 0; } /*Get the SNP bounds for subset sample*/ if(!snpbound.empty()){ idx=snpbound.find(':',0); if(idx==snpbound.npos || idx==snpbound.length()-1){ printf("ERROR: Invalid SNP bounds."); return 1; } str = snpbound.substr(0,idx); row.push_back(atoi(str.c_str())); str= snpbound.substr(idx+1,snpbound.length()-idx+1); row.push_back(atoi(str.c_str())); if(row[0]<0 || row[1]>=dims[0] || row[1]<0 || row[0]>=row[1]){ printf("ERROR: Invalid SNP bounds.\n"); return 0; } dims[0]=row[1]-row[0]+1; /*dims is now the SNP bounds for subregion*/ } if(sample.size()==3){ blocksize=sample[2]-sample[1]+1; /*block of data*/ } if((dims[0]+(dims[0]*blocksize)+dims[0])*sizeof(int)>(1000*1024*1024)){ //printf("Error: Insufficient memory to handle huge block.\nREPORT: Failed to complete comparing data.\n"); //return 1; printf("\nData is greater than the available/alloted memory space.\n"); CHOP=10; } printf("Running with %d thread/s.\n",NUMTHREADS); int rowchunk=dims[0]/CHOP, offset; /*CHOP is 1 if data fits in memory*/ float com_time=0; ostringstream outtext; pthread_t threads[NUMTHREADS]; threadData *thread_data = (threadData*)malloc(NUMTHREADS*sizeof(threadData)); refdata=(int*)malloc(rowchunk*sizeof(int)); data=(int*)malloc((rowchunk*blocksize)*sizeof(int)); consensus=(int*)calloc(rowchunk,sizeof(int)); output=fopen(outfile.c_str(),"w"); /*Print output header*/ if(sample.size()==3) fprintf(output,"Reference:%lu\nBlock samples:%lu-%lu\n",sample[0],sample[1],sample[2]); else fprintf(output,"Reference:%lu\nSample:%lu\n",sample[0],sample[1]); fprintf(output,"SNPIdx\tRef\t"); for(int i=sample[1];i<sample[1]+blocksize;i++){ fprintf(output,"%d\t",i); } fprintf(output,"\n"); if(!snpbound.empty()){ /*set start position if bounded*/ offset=(int)row[0]; } int rem=0; if(dims[0]%CHOP!=0){ rem = dims[0]%CHOP; CHOP++; /*another chunk for the remainder*/ } for(int h=0;h<CHOP;h++){ if(h+1==CHOP && rem!=0){ rowchunk=rem; free(data); free(refdata); free(consensus); data=(int*)malloc((rowchunk*blocksize)*sizeof(int)); refdata=(int*)malloc(rowchunk*sizeof(int)); consensus=(int*)calloc(rowchunk,sizeof(int)); printf("Remainder chunk.%d\n",rowchunk); } if(!snpbound.empty()){ ref << variable << "[" << offset <<":"<< offset+rowchunk << "," << sample[0] << "]"; if(sample.size()==3) paramtemp << variable << "[" << offset <<":"<< offset+rowchunk << "," << sample[1] << ":" << sample[2]+1 <<"]"; else paramtemp << variable << "[" << offset <<":"<< offset+rowchunk << "," << sample[1] << "]"; }else{ ref << variable << "[:," << sample[0] << "]"; if(sample.size()==3) paramtemp << variable << "[:," << sample[1] << ":" << sample[2]+1 << "]"; else paramtemp << variable << "[:," << sample[1] << "]"; } param = ref.str(); fq->getData(param,refdata); param = paramtemp.str(); cout << h << "Ref:"<< ref.str() <<"\tParam:"<<paramtemp.str()<<"\n\n"; fq->getData(param,data); /*param=var[:,0:2]*/ /*printf("Reference Row:\n"); for(int i=0; i<rowchunk;i++) printf("%d ",refdata[i]); printf("\n\n"); printf("Comparison Row Block:\n"); for(int x=0;x<(blocksize*rowchunk);x++){ printf("%d ",data[x]); if((x+1)%blocksize==0) printf("\n\n"); }*/ /*THREADING of the comparison*/ for(int i=0;i<NUMTHREADS; i++){ thread_data[i].refdata = refdata; thread_data[i].data = data; thread_data[i].consensus=consensus; thread_data[i].snpcount = rowchunk; thread_data[i].blocksize = blocksize; thread_data[i].tid = i; thread_data[i].tcount = NUMTHREADS; pthread_create(&threads[i],NULL,compareSample, (void*) &thread_data[i]); } for(int i=0;i<NUMTHREADS;i++){ pthread_join(threads[i],NULL); } /*RESULT printing*/ timer1.stop(); for(int i=0;i<rowchunk;i++){ if(consensus[i]==1){ outtext << i+offset << "\t" << Calls[refdata[i]] << "\t"; for(int x=0;x<blocksize;x++){ outtext << Calls[data[i*blocksize+x]] << "\t"; } fprintf(output,"%s\n",outtext.str().c_str()); outtext.str(""); } } offset+=rowchunk; paramtemp.str(""); ref.str(""); outtext.str(""); timer1.resume(); } timer1.stop(); printf("Comparison Time:%f\n", timer1.realTime()); free(data); free(refdata); free(consensus); free(thread_data); fclose(output); } delete(queryProcessor); #ifndef FQ_NOMPI MPI_Finalize(); #endif timer2.stop(); if (berr) { printf("REPORT: Successfully completed comparing data.\n Total time elapsed:%f\n", timer2.realTime()); return 0; } else { printf("REPORT: Failed to complete comparing data.\n"); return -1; } }