示例#1
0
bool JsonClientServer::registerQuery(std::string jaqlQuery,  void (*callBackFunction)(Element& element) )
{
	//create client socket input to get data from server by socket, the schema information is used to check input
	ClientIOManager::getInstance()->addStreamInput(CLIENT_IP,CLIENT_PORT,callBackFunction);
	
	//generate register query command
	DocumentBuilder registerQueryCommandbuilder;
	registerQueryCommandbuilder.append("command_type","register_query");
	registerQueryCommandbuilder.append("query_content",jaqlQuery);
	DocumentBuilder outputSpecificationBuilder;
	outputSpecificationBuilder.append("output_type","socket_output");
	outputSpecificationBuilder.append("output_ip",CLIENT_IP);
	outputSpecificationBuilder.append("output_port",CLIENT_PORT);
	Document outputSpecificationDocument = outputSpecificationBuilder.obj();
	registerQueryCommandbuilder.appendObject("output_specification",outputSpecificationDocument.objdata(),outputSpecificationDocument.objsize());
	Document registerQueryCommandDocument = registerQueryCommandbuilder.obj();
	
	//register query on server 
	bool bl = sendCommand(serverIp,serverPort,registerQueryCommandDocument);
	return bl;
}
示例#2
0
//parse the RSS feed which is xml, get each items, generate element for new item and insert into elementList
void RssStreamInput::generateElementByRss(std::string& rssXmlContent)
{
	boost::property_tree::ptree pt;  
	stringstream ss; 
	ss << rssXmlContent;
	read_xml(ss, pt);
	boost::property_tree::ptree channelPt;
	channelPt = pt.get_child("rss.channel");
	std::string linkurl = channelPt.get_child("link").data();
	
	std::vector<std::string> titleVector; // save the titles in the RSS feed page
	std::vector<std::string> linkVector;  // save the links in the RSS feed page
	
	//mark if the newest title we processed is still in this RSS feed page  
	bool isProcessedNewestTitleStillInThisPage = false;

	std::string processedNewestTitle;
	std::map<std::string, std::string >::iterator it = this->urlTitleMap.find(linkurl);
	if(it != this->urlTitleMap.end())
	{
		processedNewestTitle = it->second;
	}
	else
	{
		processedNewestTitle = "";
	}

	BOOST_FOREACH(boost::property_tree::ptree::value_type &v1, channelPt)
	{
		if(v1.first=="item")
		{
			
			boost::property_tree::ptree itemPt = v1.second;
			//get the title and link for each item
			std::string title;
			std::string link;
			title = itemPt.get_child("title").data();
			link = itemPt.get_child("link").data();
			//save title and link 
			titleVector.push_back(title);
			linkVector.push_back(link);
			//see if any exists
			if(processedNewestTitle == title)
			{
				isProcessedNewestTitleStillInThisPage = true;
			}
		}
	}
	//if new titles exist, generate corresponding elements
	if(isProcessedNewestTitleStillInThisPage == false)
	{//no title has been processed, generate element for each one
		int number = titleVector.size();
		for( int i = number-1; i >=0 ; i--)//reverse scan, we scan from the oldest to the newest
		{
			std::string title = titleVector[i];
			std::string link = linkVector[i];
			DocumentBuilder builder;
			builder.append("title",title);
			builder.append("link",link);
			Document document = builder.obj();
			Element element;
			element.document = document;
			element.id = DocumentIdentifierGenerator::generateNewDocumentIdentifier();
			element.timestamp = TimestampGenerator::getCurrentTime();
			this->rssElementList.push_back(element);
		}
	}
	else
	{//we have processed some titles in this page, generate element for the new titles
		int number = titleVector.size();
		bool isProccesdNewsetTitleMeet = false;
		for( int i = number-1; i >=0 ; i--)//reverse scan, we scan from the oldest to the newest
		{
			std::string title = titleVector[i];
			std::string link = linkVector[i];
			if(isProccesdNewsetTitleMeet == true)
			{// during the scan of the titles from the oldest to the newest, the one which we have processed before is passed
			 // then the titles after are all new titles, we should generate element for them each.
				DocumentBuilder builder;
				builder.append("title",title);
				builder.append("link",link);
				Document document = builder.obj();
				Element element;
				element.document = document;
				element.id = DocumentIdentifierGenerator::generateNewDocumentIdentifier();
				element.timestamp = TimestampGenerator::getCurrentTime();
				element.masterTag = false;
				this->rssElementList.push_back(element);
			}
		
			if(title == processedNewestTitle)
			{
				isProccesdNewsetTitleMeet = true;
			}
		}
	}
	//update processedNewestTitle
	if(this->urlTitleMap[linkurl] != titleVector.front())
	{
		this->urlTitleMap[linkurl] = titleVector.front();
		isUrlTitleMapValueChanged = true;
	}
	
}
void GroupAggregationOperator::execution()
{

#ifdef DEBUG
	std::cout<<"===================operator begin================="<<std::endl;
	std::cout<<"operatorid : "<<this->getId()<<std::endl;
#endif
	assert(this->getInputQueueList().size()==1);
	assert(this->getOutputQueueList().size()==1);
	boost::shared_ptr<QueueEntity>inputQueue = this->getInputQueueList().front();
	boost::shared_ptr<QueueEntity>outputQueue = this->getOutputQueueList().front();
	
	while(1)
	{
		Element inputElement;
		if(outputQueue->isFull())
		{
			break;
		}
		if(inputQueue->isEmpty())
		{
#ifdef SMART
//			SmartScheduler::getInstance()->informOperatorSuspended(this );
#else
//			Scheduler::getInstance()->informOperatorSuspended(this );
#endif
			break;
		}
		inputQueue->dequeue(inputElement);

		if(inputElement.mark == PLUS_MARK)
		{
			
			Document& inputDocument = inputElement.document;
			assert(inputDocument.hasField(this->groupByAttributeName.c_str()));
			//std::cout<<inputDocument<<std::endl;
			if(this->relationSynopsis->isFull())
			{
				throw runtime_error("left synopsis is full");
			}
			//insert into the relation synopsis
			this->relationSynopsis->insertElement(inputElement);
			
			//deal with the groupmap, if the group already exists, insert the element. 
			//if not exists, create the group and insert the element.
			std::string groupByAttributeValue = inputDocument.getField(this->groupByAttributeName).valuestr();
			std::set<DocumentId>currentGroupSet;
			std::map<std::string, std::set<DocumentId> > ::iterator it = groupMap.find(groupByAttributeValue);
			if(it!=this->groupMap.end())
			{// group already exist
				 std::set<DocumentId>& groupSet =  it->second;
				 currentGroupSet = groupSet;
				 groupSet.insert(inputElement.id);
			}
			else
			{// group not exist
				std::set<DocumentId> groupSet;
				groupSet.insert(inputElement.id);
				this->groupMap.insert(make_pair(groupByAttributeValue,groupSet));
				currentGroupSet = groupSet;
			}
			//calculate the aggregation value
			boost::shared_ptr<SetElementIterator> elementIterator;
			elementIterator.reset(new SetElementIterator(this->relationSynopsis,currentGroupSet));
			elementIterator->initial();
			Element groupElement;
			double average=0,sum=0,count=0;
			while((!outputQueue->isFull())&&elementIterator->getNext(groupElement))
			{
				assert(groupElement.document.getField(this->aggregationAttributeName).isNumber());
				double dl = groupElement.document.getField(this->aggregationAttributeName).Number();
				sum += dl;
				count ++;
			}   
			average = sum/count;
			//generate the result element
			Document newDocument;
			DocumentBuilder newDocumentBuilder;
			newDocumentBuilder.appendElements(inputDocument);
			Element newElement;
			if(this->aggregateOperation==AVG)
			{
				newDocumentBuilder.appendNumber(this->resultAttributeName,average);
			}
			else if(this->aggregateOperation==SUM)
			{
				newDocumentBuilder.appendNumber(this->resultAttributeName,sum);
			}
			else if(this->aggregateOperation==COUNT)
			{
				newDocumentBuilder.appendNumber(this->resultAttributeName,count);
			}	
			newDocument = newDocumentBuilder.obj();
			newElement.id = DocumentIdentifierGenerator::generateNewDocumentIdentifier();
			newElement.document = newDocument;
			newElement.mark = PLUS_MARK;
			newElement.timestamp = inputElement.timestamp;
			newElement.masterTag = inputElement.masterTag;
			//insert into lineage synopsis
			if(this->lineageSynopsis->isFull())
			{
				throw runtime_error("lineage synopsis is full");
			}
			Lineage lineage;
			lineage.lineageNumber = 1;
			lineage.lineageDocumentId[0] = inputElement.id;
			this->lineageSynopsis->insertLineage(lineage,newElement);

			//generate new output to the output queue
			if(outputQueue->isFull())
			{
				throw std::runtime_error("output queue is full");
			}
			outputQueue->enqueue(newElement);

		}
		else if(inputElement.mark  == MINUS_MARK)
		{
			//delete from relation synopsis
			relationSynopsis->deleteElement(inputElement);
			//delete from group map
			std::string groupByAttributeValue = inputElement.document.getField(this->groupByAttributeName).String();
			std::map<std::string, std::set<DocumentId> >::iterator it = this->groupMap.find(groupByAttributeValue);
			assert(it!= this->groupMap.end());
			std::set<DocumentId>& documentIdSet = it->second;
			documentIdSet.erase(inputElement.id);

			//delete from lineage synopsis
			Element elementInSynopsis;
			Lineage lineage;
			lineage.lineageNumber = 1;
			lineage.lineageDocumentId[0] = inputElement.id;
			int ret = lineageSynopsis->getAndDeleteElement(lineage,elementInSynopsis);
			if(ret==false)
			{
				//lineage not found, the minus tuple has already been generated
				//we needn't generate the minus tuple again
				continue;
				//throw std::runtime_error("lineage not exist");
			}
			//generate minus element
			Element outputElement;
			outputElement.mark = MINUS_MARK;
			outputElement.document = elementInSynopsis.document;
			outputElement.timestamp = inputElement.timestamp;
			outputElement.id = elementInSynopsis.id;
			outputElement.masterTag = inputElement.masterTag;
			outputQueue->enqueue(outputElement);
		}
	}
#ifdef DEBUG
	std::cout<<"===================operator over================="<<std::endl;
#endif
}
void PeopleFlowStreamInput::getNextElement(Element& element) 
{


	if(this->currentFileStream.peek()==EOF)
	{
		this->currentFileStream.close();
		this->currentPeopleFlowFileListIterator++;
		assert(this->currentPeopleFlowFileListIterator!= this->peopleFlowFileList.end());
		std::string peopleFlowFile = *this->currentPeopleFlowFileListIterator;
		this->currentFileStream.open(peopleFlowFile.c_str(), std::ifstream::in);
	}

	string value;  

	// read one whole line in the file
	getline ( currentFileStream, value, ',' ); // read a string until next comma:    
	std::string personid = value.substr(0,value.length());
	

	getline ( currentFileStream, value, ',' );
	std::string version = value.substr(0,value.length());

	getline ( currentFileStream, value, ',' );
	std::string tripNumber = value.substr(0,value.length());

	getline ( currentFileStream, value, ',' );
	std::string time = value.substr(0,value.length());
	//change time from "2008/10/1  0:00:00" to timestamp
	std::string yearMonthDay = time.substr(0,time.find(' '));
	std::string hourMinuteSecond = time.substr(time.find(' ')+1);
	std::string year = yearMonthDay.substr(0,yearMonthDay.find('/'));
	yearMonthDay = yearMonthDay.substr(yearMonthDay.find('/')+1);
	std::string month = yearMonthDay.substr(0, yearMonthDay.find('/'));
	yearMonthDay = yearMonthDay.substr(yearMonthDay.find('/')+1);
	std::string day = yearMonthDay;
	std::string hour = hourMinuteSecond.substr(0,hourMinuteSecond.find(':'));
	hourMinuteSecond = hourMinuteSecond.substr(hourMinuteSecond.find(':')+1);
	std::string minute = hourMinuteSecond.substr(0,hourMinuteSecond.find(':'));
	hourMinuteSecond = hourMinuteSecond.substr(hourMinuteSecond.find(':')+1);
	std::string second = hourMinuteSecond;

	struct tm t;
	time_t timestamp; //timestamp
	t.tm_year = atoi(year.c_str())-1900;
	t.tm_mon = atoi(month.c_str());
	t.tm_mday = atoi(day.c_str());
	t.tm_hour =  atoi(hour.c_str());;
	t.tm_min =  atoi(minute.c_str());;
	t.tm_sec =  atoi(second.c_str());;
	t.tm_isdst = 0;
	timestamp = mktime(&t);
	
	getline ( currentFileStream, value, ',' );
	std::string longitudestr = value.substr(0,value.length());
	std::stringstream s(longitudestr);
	float longitude;
	s >> longitude;
	getline ( currentFileStream, value, ',' );
	std::string latitudestr = value.substr(0,value.length());
	std::stringstream s2(latitudestr);
	float latitude;
	s2 >> latitude;
	getline ( currentFileStream, value, ',' );
	std::string gender = value.substr(0,value.length());

	getline ( currentFileStream, value, ',' );
	std::string age = value.substr(0,value.length());

	getline ( currentFileStream, value, ',' );
	std::string home = value.substr(0,value.length());

	getline ( currentFileStream, value, ',' );
	std::string occupation = value.substr(0,value.length());

	getline ( currentFileStream, value, ',' );
	std::string destination = value.substr(0,value.length());

	getline ( currentFileStream, value, ',' );
	std::string expandingCoefficient = value.substr(0,value.length());

	getline ( currentFileStream, value, ',' );
	std::string expandingCoefficient2 = value.substr(0,value.length());

	getline ( currentFileStream, value, '\n' );
	std::string transportation = value.substr(0,value.length());

	//create document for this line
	DocumentBuilder documentBuilder;
	documentBuilder.append("personid",boost::lexical_cast<int>(personid) );
	documentBuilder.append("time",(long long)timestamp);
	documentBuilder.append("longitude",longitude);
	documentBuilder.append("latitude",latitude);
	documentBuilder.append("gender",gender);
	documentBuilder.append("age",age);
	documentBuilder.append("home",home);
	documentBuilder.append("occupation",occupation);
	Document document = documentBuilder.obj();

	element.timestamp = (Timestamp)timestamp;
	element.mark = PLUS_MARK;
	element.id = DocumentIdentifierGenerator::generateNewDocumentIdentifier();
	element.document = document;
	element.masterTag = false;
	//std::cout<<element<<std::endl;

}