Esempio n. 1
0
bool JsonClientServer::registerQuery(std::string jaqlQuery,  void (*callBackFunction)(Element& element) )
{
	//create client socket input to get data from server by socket, the schema information is used to check input
	ClientIOManager::getInstance()->addStreamInput(CLIENT_IP,CLIENT_PORT,callBackFunction);
	
	//generate register query command
	DocumentBuilder registerQueryCommandbuilder;
	registerQueryCommandbuilder.append("command_type","register_query");
	registerQueryCommandbuilder.append("query_content",jaqlQuery);
	DocumentBuilder outputSpecificationBuilder;
	outputSpecificationBuilder.append("output_type","socket_output");
	outputSpecificationBuilder.append("output_ip",CLIENT_IP);
	outputSpecificationBuilder.append("output_port",CLIENT_PORT);
	Document outputSpecificationDocument = outputSpecificationBuilder.obj();
	registerQueryCommandbuilder.appendObject("output_specification",outputSpecificationDocument.objdata(),outputSpecificationDocument.objsize());
	Document registerQueryCommandDocument = registerQueryCommandbuilder.obj();
	
	//register query on server 
	bool bl = sendCommand(serverIp,serverPort,registerQueryCommandDocument);
	return bl;
}
Esempio n. 2
0
//parse the RSS feed which is xml, get each items, generate element for new item and insert into elementList
void RssStreamInput::generateElementByRss(std::string& rssXmlContent)
{
	boost::property_tree::ptree pt;  
	stringstream ss; 
	ss << rssXmlContent;
	read_xml(ss, pt);
	boost::property_tree::ptree channelPt;
	channelPt = pt.get_child("rss.channel");
	std::string linkurl = channelPt.get_child("link").data();
	
	std::vector<std::string> titleVector; // save the titles in the RSS feed page
	std::vector<std::string> linkVector;  // save the links in the RSS feed page
	
	//mark if the newest title we processed is still in this RSS feed page  
	bool isProcessedNewestTitleStillInThisPage = false;

	std::string processedNewestTitle;
	std::map<std::string, std::string >::iterator it = this->urlTitleMap.find(linkurl);
	if(it != this->urlTitleMap.end())
	{
		processedNewestTitle = it->second;
	}
	else
	{
		processedNewestTitle = "";
	}

	BOOST_FOREACH(boost::property_tree::ptree::value_type &v1, channelPt)
	{
		if(v1.first=="item")
		{
			
			boost::property_tree::ptree itemPt = v1.second;
			//get the title and link for each item
			std::string title;
			std::string link;
			title = itemPt.get_child("title").data();
			link = itemPt.get_child("link").data();
			//save title and link 
			titleVector.push_back(title);
			linkVector.push_back(link);
			//see if any exists
			if(processedNewestTitle == title)
			{
				isProcessedNewestTitleStillInThisPage = true;
			}
		}
	}
	//if new titles exist, generate corresponding elements
	if(isProcessedNewestTitleStillInThisPage == false)
	{//no title has been processed, generate element for each one
		int number = titleVector.size();
		for( int i = number-1; i >=0 ; i--)//reverse scan, we scan from the oldest to the newest
		{
			std::string title = titleVector[i];
			std::string link = linkVector[i];
			DocumentBuilder builder;
			builder.append("title",title);
			builder.append("link",link);
			Document document = builder.obj();
			Element element;
			element.document = document;
			element.id = DocumentIdentifierGenerator::generateNewDocumentIdentifier();
			element.timestamp = TimestampGenerator::getCurrentTime();
			this->rssElementList.push_back(element);
		}
	}
	else
	{//we have processed some titles in this page, generate element for the new titles
		int number = titleVector.size();
		bool isProccesdNewsetTitleMeet = false;
		for( int i = number-1; i >=0 ; i--)//reverse scan, we scan from the oldest to the newest
		{
			std::string title = titleVector[i];
			std::string link = linkVector[i];
			if(isProccesdNewsetTitleMeet == true)
			{// during the scan of the titles from the oldest to the newest, the one which we have processed before is passed
			 // then the titles after are all new titles, we should generate element for them each.
				DocumentBuilder builder;
				builder.append("title",title);
				builder.append("link",link);
				Document document = builder.obj();
				Element element;
				element.document = document;
				element.id = DocumentIdentifierGenerator::generateNewDocumentIdentifier();
				element.timestamp = TimestampGenerator::getCurrentTime();
				element.masterTag = false;
				this->rssElementList.push_back(element);
			}
		
			if(title == processedNewestTitle)
			{
				isProccesdNewsetTitleMeet = true;
			}
		}
	}
	//update processedNewestTitle
	if(this->urlTitleMap[linkurl] != titleVector.front())
	{
		this->urlTitleMap[linkurl] = titleVector.front();
		isUrlTitleMapValueChanged = true;
	}
	
}
void PeopleFlowStreamInput::getNextElement(Element& element) 
{


	if(this->currentFileStream.peek()==EOF)
	{
		this->currentFileStream.close();
		this->currentPeopleFlowFileListIterator++;
		assert(this->currentPeopleFlowFileListIterator!= this->peopleFlowFileList.end());
		std::string peopleFlowFile = *this->currentPeopleFlowFileListIterator;
		this->currentFileStream.open(peopleFlowFile.c_str(), std::ifstream::in);
	}

	string value;  

	// read one whole line in the file
	getline ( currentFileStream, value, ',' ); // read a string until next comma:    
	std::string personid = value.substr(0,value.length());
	

	getline ( currentFileStream, value, ',' );
	std::string version = value.substr(0,value.length());

	getline ( currentFileStream, value, ',' );
	std::string tripNumber = value.substr(0,value.length());

	getline ( currentFileStream, value, ',' );
	std::string time = value.substr(0,value.length());
	//change time from "2008/10/1  0:00:00" to timestamp
	std::string yearMonthDay = time.substr(0,time.find(' '));
	std::string hourMinuteSecond = time.substr(time.find(' ')+1);
	std::string year = yearMonthDay.substr(0,yearMonthDay.find('/'));
	yearMonthDay = yearMonthDay.substr(yearMonthDay.find('/')+1);
	std::string month = yearMonthDay.substr(0, yearMonthDay.find('/'));
	yearMonthDay = yearMonthDay.substr(yearMonthDay.find('/')+1);
	std::string day = yearMonthDay;
	std::string hour = hourMinuteSecond.substr(0,hourMinuteSecond.find(':'));
	hourMinuteSecond = hourMinuteSecond.substr(hourMinuteSecond.find(':')+1);
	std::string minute = hourMinuteSecond.substr(0,hourMinuteSecond.find(':'));
	hourMinuteSecond = hourMinuteSecond.substr(hourMinuteSecond.find(':')+1);
	std::string second = hourMinuteSecond;

	struct tm t;
	time_t timestamp; //timestamp
	t.tm_year = atoi(year.c_str())-1900;
	t.tm_mon = atoi(month.c_str());
	t.tm_mday = atoi(day.c_str());
	t.tm_hour =  atoi(hour.c_str());;
	t.tm_min =  atoi(minute.c_str());;
	t.tm_sec =  atoi(second.c_str());;
	t.tm_isdst = 0;
	timestamp = mktime(&t);
	
	getline ( currentFileStream, value, ',' );
	std::string longitudestr = value.substr(0,value.length());
	std::stringstream s(longitudestr);
	float longitude;
	s >> longitude;
	getline ( currentFileStream, value, ',' );
	std::string latitudestr = value.substr(0,value.length());
	std::stringstream s2(latitudestr);
	float latitude;
	s2 >> latitude;
	getline ( currentFileStream, value, ',' );
	std::string gender = value.substr(0,value.length());

	getline ( currentFileStream, value, ',' );
	std::string age = value.substr(0,value.length());

	getline ( currentFileStream, value, ',' );
	std::string home = value.substr(0,value.length());

	getline ( currentFileStream, value, ',' );
	std::string occupation = value.substr(0,value.length());

	getline ( currentFileStream, value, ',' );
	std::string destination = value.substr(0,value.length());

	getline ( currentFileStream, value, ',' );
	std::string expandingCoefficient = value.substr(0,value.length());

	getline ( currentFileStream, value, ',' );
	std::string expandingCoefficient2 = value.substr(0,value.length());

	getline ( currentFileStream, value, '\n' );
	std::string transportation = value.substr(0,value.length());

	//create document for this line
	DocumentBuilder documentBuilder;
	documentBuilder.append("personid",boost::lexical_cast<int>(personid) );
	documentBuilder.append("time",(long long)timestamp);
	documentBuilder.append("longitude",longitude);
	documentBuilder.append("latitude",latitude);
	documentBuilder.append("gender",gender);
	documentBuilder.append("age",age);
	documentBuilder.append("home",home);
	documentBuilder.append("occupation",occupation);
	Document document = documentBuilder.obj();

	element.timestamp = (Timestamp)timestamp;
	element.mark = PLUS_MARK;
	element.id = DocumentIdentifierGenerator::generateNewDocumentIdentifier();
	element.document = document;
	element.masterTag = false;
	//std::cout<<element<<std::endl;

}