/*
	This api takes the user input - and looks up the word[s] in the inverted index.
	The candidate documents are then ranked based on tf-idf BOW (bag of words) model
	*/
	std::vector<std::pair<int,double> > ServeIndex(const std::string& word,int topK)
	{
		//tokenize and normalize the user text
		std::vector<std::string>& word_tokens = _wordBreaker->BreakEnglishText(word.c_str());

		std::vector<std::pair<int,double> > results;

		//generate the candidate document set
		std::set<int> candSet;
		bool foundAny = false;
		for(size_t i=0;i<word_tokens.size();++i)
		{
			boost::unordered_map<std::string,IndexEntry>::iterator itor = _indexPtr->_wordIndex.find(word_tokens[i]);

			if( itor == _indexPtr->_wordIndex.end() )
				continue;

			else{
				//first entry which was found
				if(!foundAny){
					candSet = itor->second._docSet;
					foundAny = true;
				} else{
					std::set<int> temp;
					set_intersection(candSet.begin(),candSet.end(),(itor->second)._docSet.begin(),(itor->second)._docSet.end(),inserter(temp,temp.begin()));
					candSet.clear();
					candSet = temp;
				}
			}

		}

		return Rank(word_tokens,candSet,topK);
	}
	/* 
	This api takes a tsv file - containing 'documents' and generates an in-memory inverted index.
	The index is dumped to disk using the Serialize api.
	*/
	void BuildIndex()
	{
		std::ifstream inFile(docPath.c_str(),std::ifstream::in);
		if(!inFile.good()){
			std::cerr<<"Unable to read file: " <<docPath<<std::endl;
			exit(-1);
		}

		inFile.sync_with_stdio(false);
		std::string line;
		int docId = 0;

		while( getline(inFile,line))
		{
			if( docId % 10000 == 0)
				std::cerr<<"At document: " <<docId<<std::endl;

			std::vector<char* > tempVector;
			char* parts = strtok( (char*)line.c_str() , delim);
			while( parts != NULL)
			{
				tempVector.push_back(parts);
				parts = strtok(NULL,delim);
			}

			assert(tempVector.size() == 3);

			std::vector<std::string>& tokens = _wordBreaker->BreakEnglishText(tempVector[2]);

			AddDocToIndex(docId,tempVector[1],tokens);
			++docId;
		}
		_totalDocs = docId;
	}