Ejemplo n.º 1
0
// method to test the constructor
void QueryTest::testConstructor() {  
// create a query object
QueryProcessor myQuery = QueryProcessor();

CPPUNIT_ASSERT_EQUAL(0 , myQuery.insertNode("test", "test", -1));

}
void QueryProcessor::displayFrequency(IndexInterface* &myIndex, DocIndex &dIndex, DocParse &parser)
{
    QueryProcessor qProcessor;
    //queryString = qProcessor.search(myIndex, finalPageList, frequency);
    qProcessor.first();
    while (queryString != "EXIT")
    {

        queryString = qProcessor.search(myIndex, finalPageList, frequency);
        if(queryString == "EXIT")
        {
            break;
        }
        if(queryString != " ")
        {
            if(finalPageList.size() == 0 || frequency.size() == 0)
            {
                cout << "No Results Returned. Please Search Again" << endl;
            }
            else
            {
                cout << endl;
                cout << "Here are the Most Relevant Results:" << endl;
                for(int i = 0; i < finalPageList.size(); i++)
                {
                    if(i == 15)
                        break;
                    dIndex.getPageInfo(finalPageList[i], title, author, date);
                    cout << i+1 << ". Title: \"" << title << "\" by: " << author << " Date: " << date << "  TF/IDF: " << frequency[i] << endl;
                    cout << endl;
                    cout << endl;
                }
                cout << "1. Search Again" << endl;
                cout << "2. Expand an Article" << endl;
                cout << "Please choose an option from above: ";
                int choice;
                cin >> choice;
                while(choice < 1 || choice > 2)
                {
                    cout << "Error: please enter a valid choice: ";
                    cin >> choice;
                }
                if (choice == 2)
                {
                    cout << "To expand an article, enter it's corresponding number: ";
                    cin>>choice;
                    if (choice > 15 || choice < 1)
                    {
                        cout << "Invalid, enter a valid number: ";
                        cin >> choice;
                    }
                    cout << "Please Hold. The page contents will appear soon... " << endl;
                    parser.displayPageContents(finalPageList[choice - 1], dIndex);
                    qProcessor.first();
                }
                else
                {
                    qProcessor.first();
                }
            }
Ejemplo n.º 3
0
bool getData(QueryProcessor &queryProcessor, E *data,  uint64_t len, 
	     std::string &variable, std::string &filename)
{
    if ( ! queryProcessor.getData(variable, &data[0])) {
    	LOGGER(ibis::gVerbose > 0)
	    << "ERROR: Failed to get the data of variable \""
	    << variable.c_str() << "\" from file \""
	    << filename.c_str() << "\"" << std::endl;
        return false;
    }
    return true;
}
Ejemplo n.º 4
0
bool getAttr(QueryProcessor &queryProcessor, E *data,  uint64_t len, 
	     std::string &attrName, std::string &varName,
	     std::string &varPath, std::string &filename)
{
    if (! queryProcessor.getAttribute(varName, attrName, &data[0],
				      varPath)) {
    	LOGGER(ibis::gVerbose > 0)
    	    << "Failed to get the information for attribute \""
	    << attrName.c_str() << " of variable \""
	    << varName.c_str() << "\" from file \""
	    << filename.c_str() << "\"" << std::endl;
        return false;
    }
    if (xport) {
	ibis::util::logger lg;
    	lg() << "Value of attribute \"" << attrName.c_str() 
	     << "\" of variable \"" << varName.c_str() 
	     << "\" from file \"" << filename.c_str() << "\"";
    	for(uint64_t i=0; i<len; i++) {
	    lg() << i << " " <<  data[i];
	}
    }
    return true;
}
Ejemplo n.º 5
0
bool Query::
RunQuery( QueryProcessor &qp, KeySet &result )
{
	switch( op ) {
		case IDENT: {
			return( qp.DoQuery( *rectangles, result ) );
		}

		case AND: {
			KeySet	tmp;
			if( !left->RunQuery( qp, result ) || !right->RunQuery( qp, tmp ) ) {
				return( false );
			}
			result.Intersect( tmp );
			return( true );
		}

		case OR: 
			return( left->RunQuery(qp,result) && right->RunQuery(qp,result) );

		default:
			return( false );
	}
}
Ejemplo n.º 6
0
int main(){
	QueryProcessor testQuery;
	bool b = true;
	cout << "This is our query tester.  To test a query enter any combination of AND, OR, and NOT." << endl;
	cout << "There is three things you need to know when testing: " << endl;
	cout << "1.)  Remember to keep these 3 key words all capitalized." << endl;
	cout << "2.)  Keep every word you want to search separated by a space"  << endl;
	cout <<	"3.)  To stop testing type 'DONE'" << endl;

	cout << endl;
	while(b){
		cout << " What is your query: " << endl;
		string tempQuery;
		getline(cin,tempQuery);
		if(tempQuery == "DONE"){
			cout << "Exiting Query Test.." << endl;
			b = false;
		}
		else{
			testQuery.setQuery(tempQuery);
			testQuery.printQuery();
		}
	}
	cout << endl;
	cout << "Now we'll test our 'getter' methods" << endl;
	cout << endl;
	string * tempAnds = testQuery.getAnds();
	string * tempOrs = testQuery.getOrs();
	string * tempNots = testQuery.getNots();
	int numAndWords = testQuery.getNumAnds();
	int numOrWords = testQuery.getNumOrs();
	int numNotWords = testQuery.getNumNots();

	if(numAndWords == 0){
		cout << "There were no AND words." << endl;
		cout << endl;
	}else{
		cout << "The AND words in our query are: " << endl;
		for(int i = 0; i < numAndWords; i++){
			cout << "# " << i+1 << ": " << tempAnds[i] << endl;
		}
		cout << endl;
	}
	if(numOrWords == 0){
		cout << "There were no OR words." << endl;
		cout << endl;
	}else{
		cout << "The OR words in our query are: " << endl;
		for(int i = 0; i < numOrWords; i++){
			cout << "# " << i+1 << ": " << tempOrs[i] << endl;
		}
		cout << endl;
	}
	if(numNotWords == 0){
		cout << "There were no NOT words." << endl;
		cout << endl;
	}else{
		cout << "The NOT words in our query are: " << endl;
		for(int i = 0; i < numNotWords; i++){
			cout << "# " << i+1 << ": " << tempNots[i] << endl;
		}
		cout << endl;
	}


	return 0;
}
Ejemplo n.º 7
0
int main(int argc, char **argv) 
{
    std::string varPathStr;
    std::string varNameStr1;
    std::string varNameStr2;
    std::string varNameStr3;
    parseArgs(argc, argv);
    std::vector<double> beginList;
    std::vector<double> endList;
    std::vector<double> strideList;
    beginList.resize(dimension);
    endList.resize(dimension);
    strideList.resize(dimension);
    if (datafile.empty() || condstring == 0 || dimension == 0 ) {
	std::cerr << "Usage:\n" << *argv 
		  << " -f data-file-name" 
		  << " -q query-conditions-in-a-single-string"
		  << " -x histogram-dimension"
		  << " -y begin"
		  << " -e end"
		  << " -s stride"
		  << " [-i index-file-name]"
		  << " [-g log-file-name]"
		  << " [-n name-of-variable]"
		  << " [-p path-of-variable]" 
		  << " [-m file model [HDF5(default), H5PART, NETCDF, PNETCDF]"
		  << " [-b use-boundingbox-data-selection]"
		  << " [-v verboseness]"
		  << " [-l mpi-subarray-length]"
	    //<< "\n e.g:   ./histogram -f h5uc-data-index.h5 -q 'px < 0.3' -n y -p TimeStep2 -x 1\n"
		  <<   "\n e.g:   ./histogram -f h5uc-data.h5 -i indexfile -q 'px<0.3 && py>0' -x 2 -n py,pz;"
		  << " -y '0,-0.5;' -s '0.1,0.02;' -e '1,0;' -p TimeStep2\n\n"
		  << "\tFor More detailed usage description and examples, please see file GUIDE"
		  << std::endl;
	return -1;
    }

#ifndef FQ_NOMPI
    MPI_Init(&argc, &argv);
    int mpi_size, mpi_rank;
    MPI_Comm_size(MPI_COMM_WORLD, &mpi_size);
    MPI_Comm_rank(MPI_COMM_WORLD, &mpi_rank);
#endif

    ibis::gParameters().add(FQ_REPORT_STATISTIC, "true");
	
    ibis::horometer totTimer;
    totTimer.start();

    FQ::FileFormat model = FQ::FQ_HDF5;
    if (fileModel != 0) {
	std::string format = fileModel;
	if (format.compare("HDF5") == 0) {
	    model = FQ::FQ_HDF5;
	} 
	else if (format.compare("H5PART") == 0) {
	    model = FQ::FQ_H5Part;
	} 
	else if (format.compare("NETCDF") == 0) {
	    model = FQ::FQ_NetCDF;
	}
	else if (format.compare("PNETCDF") == 0) {
	    model = FQ::FQ_pnetCDF;
	}
    }

    if (! indexfile.empty()) {
	if (verboseness > 1) 
	    std::cout << "DEBUG: using indexfile \"" << indexfile.c_str() << "\" ... \n";
    }

    if (varPath != 0) {
	if (verboseness > 1)  std::cout << "Debug: use variable path \"" << varPath << "\"\n";
	varPathStr = varPath;
    }
	
    //	std::cout << "varName:" << varName << " begin:" << begin << " end:" << end << " stride:" << stride << std::endl;
    varName1 = strtok(varName, ",;");
    if (dimension>1) varName2 = strtok(NULL, ",;");
    if (dimension>2) varName3 = strtok(NULL, ",;");
    begin1 = atof(strtok(begin, ",;"));
    if (dimension>1) begin2 = atof(strtok(NULL, ",;"));
    if (dimension>2) begin3 = atof(strtok(NULL, ",;"));
    end1 = atof(strtok(end, ",;"));
    if (dimension>1) end2 = atof(strtok(NULL, ",;"));
    if (dimension>2) end3 = atof(strtok(NULL, ",;"));
    stride1 = atof(strtok(stride, ",;"));
    if (dimension>1) stride2 = atof(strtok(NULL, ",;"));
    if (dimension>2) stride3 = atof(strtok(NULL, ",;"));	

    //std::cout << "varName is " << varName1 << ", " << varName2 << ", " << varName3 << std::endl;
    //std::cout << "begin   is " << begin1 << ", " << begin2 << ", " << begin3 << std::endl;
    //std::cout << "end     is " << end1 << ", " << end2 << ", " << end3 << std::endl;
    //std::cout << "stride  is " << stride1 << ", " << stride2 << ", " << stride3 << std::endl;

    if (varName1!=0) varNameStr1 = varName1;
    if (dimension>1 && varName2!=0) varNameStr2 = varName2;
    if (dimension>2 && varName3!=0) varNameStr3 = varName3;
    /*	
	if (mpi_rank==0) {
	unsigned int dims1 = static_cast<uint32_t>(1+floor((end1-begin1)/stride1));
    	unsigned int dims2 = static_cast<uint32_t>(1+floor((end2-begin2)/stride2));
    	unsigned int dims3 = static_cast<uint32_t>(1+floor((end3-begin3)/stride3));
	std::cout << "dims1 * dims2 * dims3 = " << dims1 << " * " << dims2 << " * " << dims3 << std::endl;
	}
    */


    if (logfile.str().empty() != true) {
#ifndef FQ_NOMPI
	logfile << mpi_rank << ".log";
#endif
	if (verboseness > 1) std::cout << "Debug: using logfile \"" << logfile.str().c_str() << "\"\n";
    }
    if (verboseness >1) {
	std::cout << "open the file handler" << std::endl;
    }
    // open the named file
    QueryProcessor* queryProcessor = new QueryProcessor(datafile, model, indexfile, verboseness, "", logfile.str().c_str()); // the file handler

    if (queryProcessor->isValid() == false) {
	if (verboseness > 0) {
	    std::cout << "ERROR: failed to initiate the QueryProcessor object for file \"" 
		      << datafile.c_str() << "\" ...\n";
	    std::cout << "REPORT: failed to complete processing query" << std::endl;
	}
	delete(queryProcessor);
#ifndef FQ_NOMPI
	MPI_Finalize();
#endif
	return -1;
    }

    uint64_t hits = 0;
    // getNumHits
    ibis::horometer timer;
    timer.start();
    hits = queryProcessor->getNumHits(condstring, varPathStr, mpi_dim, mpi_len);
    timer.stop();
    if (verboseness > 1)
	std::cout << "Debug: conditions \"" << condstring 
		  << "\" number of hits " << hits << std::endl;;

    if (hits == 0) {
	if (verboseness > 1) {
	    std::cout << "Warning -- No element is seleteced ==>"
		      << " the rest of the test is skipped!" << std::endl;
	}
	if (verboseness > 0) {
#ifndef FQ_NOMPI        
	    if (mpi_rank==0) {
#endif
		std::cout << "REPORT: successfully completed processing query with " 
			  << hits << " hits" << std::endl;
#ifndef FQ_NOMPI        
	    }
#endif
	}
	delete(queryProcessor);
#ifndef FQ_NOMPI
	MPI_Finalize();
#endif
	return hits;
    }

    // executeQuery
    std::vector<uint64_t> coords;
    std::vector<uint32_t> counts;
    bool herr = true;
    //	if (mpi_rank==0) std::cout<<"histogram starting..."<<std::endl;	
    if (varPath != 0) {
	//coords.reserve(hits*dims.size());
	// hits1 = queryProcessor->executeQuery((char*)condstring, coords, varPathStr, FQ::POINTS_SELECTION, mpi_dim, mpi_len);
	if (dimension==1) { 
	    //counts.assign(static_cast<uint32_t>(1+floor(end1-begin1)/stride1), 0);
	    herr = queryProcessor->get1DHistogram
		((char*) condstring, varNameStr1, varPathStr, begin1, end1, stride1, 
		 counts, mpi_dim, mpi_len);
	} else if (dimension==2) {
	    //			if (mpi_rank==0) std::cout << "in 2Dhistogram" << std::endl;
	    //counts.assign(static_cast<uint32_t>(1+floor(end1-begin1)/stride1)*
	    //          static_cast<uint32_t>(1+floor(end2-begin2)/stride2), 0);
	    herr = queryProcessor->get2DHistogram
		((char*) condstring, varPathStr, varNameStr1, begin1, end1, stride1, 
		 varNameStr2, begin2, end2, stride2, 
		 counts, mpi_dim, mpi_len);       
	    //			if (mpi_rank==0) std::cout << "out 2Dhistogram" << std::endl;
	} else if (dimension==3) {
	    herr = queryProcessor->get3DHistogram
		((char*) condstring, varPathStr, 
		 varNameStr1, begin1, end1, stride1, 
		 varNameStr2, begin2, end2, stride2, 
		 varNameStr3, begin3, end3, stride3,
		 counts, mpi_dim, mpi_len);
	}
	if (! herr) {
	    LOGGER(ibis::gVerbose >= 0)
		<< *argv << " failed to compute the histogram";
	    return -2;
	}

	/************************/
	/*  verify part         */
	/************************/

	if (verification) {

	    //verify the Histogram
	    //if (mpi_rank==0) std::cout << "starting verify the histogram..." << std::endl;
	    uint64_t len = 1;
	    if (len) {
		//std::cout<<"Warning: May use too large memory. Can only check sum.\n";				
	    } else {
		/*				double data[len];
		//
		std::vector<uint32_t> temp_counts;
		temp_counts.assign(static_cast<uint32_t>(1+floor(end-begin)/stride), 0);
		bool verr = true;
		//std::cout << "starting getData...." << std::endl;
		verr = queryProcessor->getData(varNameStr, &data[0], varPathStr);

		#ifndef FQ_NOMPI  
		if (mpi_rank==0) {
		#endif
		if (len<=1000000) {
		//std::cout << "getData success" << std::endl;		
		std::cout << "temp Histogram" << std::endl;
		    
		// copy from fasbit parth.cpp get1DHistogram
		if (len != 0) {
		for (uint32_t i = 0; i < len; ++ i) {
		++ temp_counts[static_cast<uint32_t>((data[i] - begin) / stride)];
		}
		}
		//
		std::cout << "temp Histogram" << std::endl;
		std::cout << "temp_counts.size is "<< temp_counts.size() << std::endl;
		for (int i=0; i<temp_counts.size(); i++) {
		std::cout << "[" << begin+i*stride << ", " << begin+(i+1)*stride << "]:\t" << temp_counts[i] << std::endl;
		}       
		            
		std::cout << "test Histogram" << std::endl;
		std::cout << "counts.size is "<< counts.size() << std::endl;
		for (int i=0; i<counts.size(); i++) {
		std::cout << "[" << begin+i*stride << ", " << begin+(i+1)*stride << "]:\t" << counts[i] << std::endl;
		}
		// verify two histogram vectors	
		if (counts!=temp_counts) {
		std::cout << "ERROR:Vector is not match.Histogram fail." << std::endl;
		} else {
		std::cout << "histogram success" << std::endl;
		}
		}
		#ifndef FQ_NOMPI     
		} 
		#endif
		*/			}
	    //unsigned int hits = 0 ;
	    //hits = queryProcessor->getNumHits(condstring, varPathStr, mpi_dim, mpi_len);
	    uint64_t hits1 = 0;
	    for (int i=0; i<counts.size(); i++) {
		hits1 += counts[i];
	    }
	    if (hits1 != hits) {
		std::cout<<"Error:\tcheck sum failed. Num of Hit is " << hits << ",and histogram number is " << hits1<<std::endl; 
	    } else 
		std::cout<<"verification result is correct.\n";
	}
		
	std::fstream histogramFile;
#ifndef FQ_NOMPI        
	if (mpi_rank==0) {
#endif
	    if (dimension==1) {
		std::fstream file;
		//char fileName[100]="";
		//char path[]="/global/homes/v/vidcina/fq/example/";
		//fileName<<dimension<<"D"<<"histogram["<<begin1<<":"<<stride1<<":"<<end1<<"].out";
		//sprintf(fileName, "%s%d%s%d%s%d%s%d%s", path, dimension, "Dhistogram[", begin1, ":", stride1, ":", end1, "].out");
		//std::string temp="";
		//temp.push_back(fileName.str());
		std::ostringstream fileName;
		fileName << hist_path << "_"<< dimension << "D" << "histogram["
			 << begin1 << ":" << stride1 << ":" <<end1 << "].out";
		std::string str =  fileName.str();
		const char* chr = str.c_str();
		file.open(chr, std::ios::out);
		if ( file.fail() ) {
		    std::cout << str  << std::endl;
		    std::cout << "openFile fail" << std::endl;
		} else {
		    for (int i=0; i<counts.size(); i++) {
			file << begin1+i*stride1 << "\t" << begin1+(i+1)*stride1 << "\t" << counts[i] << std::endl;
		    }
		}
		//histogramFile.close();
	    } else if (dimension==2) {
		std::cout << "2DHistogram "
			  << "Variable1 "<< varName1 << " begin " << begin1
			  << " to " << end1 <<" stride is " << stride1
			  << "Variable2 "<< varName2 << " begin " << begin2
			  << " to " << end2 <<" stride is " << stride2
			  << std::endl ;
		std::cout << "counts.size is "<< counts.size() << std::endl;
		unsigned int imax = static_cast<uint32_t>(1+floor((end1-begin1)/stride1));
		unsigned int jmax = static_cast<uint32_t>(1+floor((end2-begin2)/stride2));
				
		for (unsigned int i=0; i<imax; i++) {
		    for (unsigned int j=0; j<jmax; j++) {
			std::cout << "[" << begin1+i*stride1 << ", "
				  << begin1+(i+1)*stride1 << "), ["
				  << begin2+j*stride2 << ", "
				  << begin2+(j+1)*stride2 << "):\t"
				  << counts[i*jmax+j] << std::endl;
		    }
		}
	    } else if (dimension==3) {
		std::cout << "3DHistogram " 
			  << "Variable1 "<< varName1 << " begin " << begin1
			  << " to " << end1 << " stride is " << stride1 
			  << "Variable2 "<< varName2 << " begin " << begin2
			  << " to " << end2 << " stride is " << stride2 
			  << "Variable3 "<< varName3 << " begin " << begin3
			  << " to " << end2 << " stride is " << stride3
			  << std::endl ;

		std::cout << "counts.size is "<< counts.size() << std::endl;
		unsigned int imax = static_cast<uint32_t>
		    (1+floor((end1-begin1)/stride1));
		unsigned int jmax = static_cast<uint32_t>
		    (1+floor((end2-begin2)/stride2));
		unsigned int kmax = static_cast<uint32_t>
		    (1+floor((end3-begin3)/stride3));
#ifndef FQ_NOMPI
		if (mpi_rank==0 && imax*jmax*kmax!=counts.size()) {
		    std::cout<<"ERROR: counts.size not match."<<std::endl;
		    delete(queryProcessor);
		    MPI_Finalize();
		    return 0;
		}
#endif	
		for (unsigned int i=0; i<imax; i++) {
		    for (unsigned int j=0; j<jmax; j++) {
			for (unsigned int k=0; k<kmax; k++) {
			    if (easyToShow) {
				if (counts[i*jmax*kmax + j*kmax + k]!=0) {
				    std::cout << "[" << begin1+i*stride1 << ", "
					      << begin1+(i+1)*stride1 << "), [" 
					      << begin2+j*stride2 << ", "
					      << begin2+(j+1)*stride2 << "), [" 
					      << begin3+k*stride3 << ", "
					      << begin3+(k+1)*stride3 << "):\t" 
					      << counts[i*jmax*kmax + j*kmax + k]
					      << std::endl;
				}
			    } else {
				std::cout << "[" << begin1+i*stride1 << ", "
					  << begin1+(i+1)*stride1 << "), [" 
					  << begin2+j*stride2 << ", "
					  << begin2+(j+1)*stride2 << "), [" 
					  << begin3+k*stride3 << ", "
					  << begin3+(k+1)*stride3 << "):\t" 
					  << counts[i*jmax*kmax + j*kmax + k]
					  << std::endl;
			    }	
			}
		    }
		}
		std::cout << "successfuly printed histogram" << std::endl;	

	    }

#ifndef FQ_NOMPI        
	} 
#endif
	//		}//end else

    }//end if(!varPath)

 
    //	MPI_Barrier(MPI_COMM_WORLD);
    /*
      if (hits != hits1) 
      {
      std::cout << "Error -- number of hits does not match!" << std::endl;
      std::cout << "REPORT: failed to complete processing query" << std::endl;
      delete(queryProcessor);
      #ifndef FQ_NOMPI
      MPI_Finalize();
      #endif
      return -1;
      }
    */
   
    if (verboseness > 0) {
#ifndef FQ_NOMPI        
	if (mpi_rank==0) {
#endif
	    std::cout << "REPORT: successfully completed get1DHistogram with " 
		      << counts.size() << " histogram size" << std::endl;
#ifndef FQ_NOMPI        
	}
#endif
    }
    delete(queryProcessor);
#ifndef FQ_NOMPI
    MPI_Finalize();
#endif
    totTimer.stop();
    LOGGER(FastQuery::reportTiming())
	<< "Statistic\thistogram::totTimer\t"
        << totTimer.CPUTime() << "\t" << totTimer.realTime()
        << "\t";
    return hits;
} // main
Ejemplo n.º 8
0
int main(int argc, char **argv) {
    FILE *output;
    ibis::horometer timer1,timer2;
    timer1.start(); 
    timer2.start();
    int CHOP=1;
    
    const rlim_t STACK_SIZE = 1000*1024*1024; 
    struct rlimit rl;
    rl.rlim_cur = STACK_SIZE;
    int ret = setrlimit(RLIMIT_STACK,&rl);

    map<int, string> Calls;
    Calls[0]="AA";
    Calls[1]="AT";
    Calls[2]="AC";
    Calls[3]="AG";
    Calls[4]="AN";
    Calls[5]="TA";
    Calls[6]="TT";
    Calls[7]="TC";
    Calls[8]="TG";
    Calls[9]="TN";
    Calls[10]="CA";
    Calls[11]="CT";
    Calls[12]="CC";
    Calls[13]="CG";
    Calls[14]="CN";
    Calls[15]="GA";
    Calls[16]="GT";
    Calls[17]="GC";
    Calls[18]="GG";
    Calls[19]="GN";
    Calls[20]="NA";
    Calls[21]="NT";
    Calls[22]="NC";
    Calls[23]="NG";
    Calls[24]="NN";
    Calls[25]="--";

    parseArgs(argc, argv);
    if(datafile.empty() || pos.empty() || varName.empty() || varPath.empty() || outfile.empty()){
		std::cerr << "Usage:\n" << *argv
                  << " -f data-file-name"
		  << " -n variable-name"
                  << " -p variable-path"
		  << " -r ref&row indices ('x|y:z' where x=ref sample,x:y as row range) "
		  << " -r snp bounds (x:y, where x=start, y=end)"
		  << " -d variable-dimension (e.g. 2:2)"
		  << " -o output-file"
                  << std::endl;
    }

#ifndef FQ_NOMPI
    MPI_Init(&argc, &argv);
    MPI_Comm_size(MPI_COMM_WORLD, &mpi_size);
    MPI_Comm_rank(MPI_COMM_WORLD, &mpi_rank);
#endif

    FQ::FileFormat model = FQ::FQ_HDF5;
    bool berr = true;
    QueryProcessor* queryProcessor = new QueryProcessor(datafile, model, "", 0, "",""); 
	
    if (queryProcessor->isValid() == false) {
	printf("ERROR: Failed to initiate query processor for file.\n");
 	berr = false;
    }

    string variable;
    vector<uint64_t> dims;
    FQ::DataType type;
    if (! queryProcessor->getVariableInfo(varName, variable, dims, &type, varPath)) {
 	printf("ERROR: Failed to get the information for variable\n");
	berr = false;
    } else {
	if(dims.size()!=2){ /*dims is derived from the data*/
	    printf("ERROR: The data has an invalid dimension. SNP data should be in 2D matrix only.\n");
	    berr=false;
	}
	    	
	string str,param; 
	vector<uint64_t> sample;
	vector<uint64_t> row; //snps
	int prePos = 0, idx = 0,blocksize=1;
	FastQuery* fq = new FastQuery(datafile, model, "", 0, "",""); 
	int *refdata=NULL, *data=NULL, *consensus=NULL;
	ostringstream paramtemp,ref;

	/*Get the index for reference sample and subrows*/
	idx = pos.find('|',prePos);
	if(idx!=pos.npos && idx!=pos.length()-1){
	    str = pos.substr(prePos,idx - prePos); 
	    sample.push_back(atoi(str.c_str())); /*get the ref. sample index*/
	    prePos=idx+1;
	    idx=pos.find(':',prePos);
	    if(idx!=pos.npos){ /*indicates multiple comparison*/
		if(idx==pos.length()-1){ /*string ends with ':'*/
		    printf("ERROR: Incomplete indices specified for sample comparison.\n");
		    return 0;
		}
		str = pos.substr(prePos,idx - prePos); 
		sample.push_back(atoi(str.c_str())); /*get the start index*/
		prePos=idx+1;
		str = pos.substr(prePos,pos.length() - prePos); 
		sample.push_back(atoi(str.c_str())); /*get the end index*/
		if(sample[2]<0 || sample[2]>=dims[1] || sample[1]>=sample[2]){
		    printf("ERROR: Indices out of bounds/invalid range.\n");
		    return 0;
		}
	    }else{ 
		str = pos.substr(prePos,idx - prePos); 
		sample.push_back(atoi(str.c_str())); /*get the index of another sample*/	
	    }

	    if(sample[0]<0 || sample[0]>=dims[1] || sample[1]<0 || sample[1]>=dims[1]){
		printf("ERROR: Indices out of bounds.\n");
		return 0;
	    }
	}else{
	    printf("ERROR: Invalid indices specified for sample comparison.\n");
	    return 0;
	}

	/*Get the SNP bounds for subset sample*/
	if(!snpbound.empty()){
	    idx=snpbound.find(':',0);
	    if(idx==snpbound.npos || idx==snpbound.length()-1){
		printf("ERROR: Invalid SNP bounds.");
		return 1;
	    }
	    str = snpbound.substr(0,idx);
	    row.push_back(atoi(str.c_str()));
	    str= snpbound.substr(idx+1,snpbound.length()-idx+1);
	    row.push_back(atoi(str.c_str()));
	    if(row[0]<0 || row[1]>=dims[0] || row[1]<0 || row[0]>=row[1]){
	    	printf("ERROR: Invalid SNP bounds.\n");
	        return 0;
	    }
	    dims[0]=row[1]-row[0]+1; /*dims is now the SNP bounds for subregion*/
	}

	if(sample.size()==3){
	    blocksize=sample[2]-sample[1]+1; /*block of data*/
	}
	
        if((dims[0]+(dims[0]*blocksize)+dims[0])*sizeof(int)>(1000*1024*1024)){ 
	    //printf("Error: Insufficient memory to handle huge block.\nREPORT: Failed to complete comparing data.\n");
	    //return 1;
	    printf("\nData is greater than the available/alloted memory space.\n");
            CHOP=10;
     	}

        printf("Running with %d thread/s.\n",NUMTHREADS);
        int rowchunk=dims[0]/CHOP, offset; /*CHOP is 1 if data fits in memory*/
	float com_time=0;
	ostringstream outtext;
	pthread_t threads[NUMTHREADS];
	threadData *thread_data = (threadData*)malloc(NUMTHREADS*sizeof(threadData));
        refdata=(int*)malloc(rowchunk*sizeof(int)); 
	data=(int*)malloc((rowchunk*blocksize)*sizeof(int));
	consensus=(int*)calloc(rowchunk,sizeof(int));
        output=fopen(outfile.c_str(),"w");
        
        /*Print output header*/
	if(sample.size()==3) 
	    fprintf(output,"Reference:%lu\nBlock samples:%lu-%lu\n",sample[0],sample[1],sample[2]);
	else 
	    fprintf(output,"Reference:%lu\nSample:%lu\n",sample[0],sample[1]);
	fprintf(output,"SNPIdx\tRef\t");
	for(int i=sample[1];i<sample[1]+blocksize;i++){ 
	    fprintf(output,"%d\t",i);
	}
	fprintf(output,"\n");

	if(!snpbound.empty()){  /*set start position if bounded*/
	    offset=(int)row[0];
	}
	int rem=0;
        if(dims[0]%CHOP!=0){
            rem = dims[0]%CHOP;
	    CHOP++; /*another chunk for the remainder*/
	}

        for(int h=0;h<CHOP;h++){
	    if(h+1==CHOP && rem!=0){
		rowchunk=rem;
		free(data);
		free(refdata);
		free(consensus);
		data=(int*)malloc((rowchunk*blocksize)*sizeof(int));
		refdata=(int*)malloc(rowchunk*sizeof(int));
		consensus=(int*)calloc(rowchunk,sizeof(int));
		printf("Remainder chunk.%d\n",rowchunk);
	    }
            
	    if(!snpbound.empty()){
		ref << variable << "[" << offset <<":"<< offset+rowchunk << "," <<  sample[0] << "]";
		if(sample.size()==3)
		    paramtemp << variable << "[" << offset <<":"<< offset+rowchunk << "," << sample[1] << ":" << sample[2]+1  <<"]"; 
		else
		    paramtemp << variable << "[" << offset <<":"<< offset+rowchunk << "," << sample[1] << "]";
	    }else{
		ref << variable << "[:," << sample[0] << "]";
		if(sample.size()==3)
		    paramtemp << variable << "[:," << sample[1] << ":" << sample[2]+1 << "]";
		else
		    paramtemp << variable << "[:," << sample[1] << "]";
	    }
	
	    param = ref.str(); 
	    fq->getData(param,refdata); 
	    param = paramtemp.str();  
	    cout << h << "Ref:"<< ref.str() <<"\tParam:"<<paramtemp.str()<<"\n\n";
	    fq->getData(param,data); /*param=var[:,0:2]*/

            /*printf("Reference Row:\n");
	    for(int i=0; i<rowchunk;i++) printf("%d ",refdata[i]);
	    printf("\n\n");

	    printf("Comparison Row Block:\n");
	    for(int x=0;x<(blocksize*rowchunk);x++){
	    	printf("%d ",data[x]); 
	    	if((x+1)%blocksize==0) printf("\n\n");
	    }*/

            /*THREADING of the comparison*/	
	    for(int i=0;i<NUMTHREADS; i++){
	    	thread_data[i].refdata = refdata;
	    	thread_data[i].data = data;
	    	thread_data[i].consensus=consensus; 
	    	thread_data[i].snpcount = rowchunk;
	    	thread_data[i].blocksize = blocksize;
	    	thread_data[i].tid = i;
	    	thread_data[i].tcount = NUMTHREADS;
	    	pthread_create(&threads[i],NULL,compareSample, (void*) &thread_data[i]);
	    }

	    for(int i=0;i<NUMTHREADS;i++){
	    	pthread_join(threads[i],NULL);
	    }

            /*RESULT printing*/
            timer1.stop();
	    for(int i=0;i<rowchunk;i++){
	    	if(consensus[i]==1){
		    outtext << i+offset << "\t" << Calls[refdata[i]] << "\t";
		    for(int x=0;x<blocksize;x++){
		    	outtext << Calls[data[i*blocksize+x]] << "\t"; 
		    }
		    fprintf(output,"%s\n",outtext.str().c_str());
		    outtext.str("");
	    	}
			
	    }
 	    offset+=rowchunk;
	    paramtemp.str("");
	    ref.str("");
            outtext.str("");
	    timer1.resume();
        }
	timer1.stop();
	printf("Comparison Time:%f\n", timer1.realTime());
	
	
	free(data);
	free(refdata);
	free(consensus);
	free(thread_data);
	fclose(output);
    }	
	delete(queryProcessor);
#ifndef FQ_NOMPI
    MPI_Finalize();
#endif
    timer2.stop();
    if (berr) {
    	printf("REPORT: Successfully completed comparing data.\n Total time elapsed:%f\n", timer2.realTime());
    	return 0;	
    } else {
    	printf("REPORT: Failed to complete comparing data.\n");
    	return -1;	
    }
	
}
Ejemplo n.º 9
0
void IndexHandler::get_queries(string query)
{
    QueryProcessor processor = QueryProcessor(*index);
    processor.initiate_query(query);
}