bool JsonClientServer::registerQuery(std::string jaqlQuery, void (*callBackFunction)(Element& element) ) { //create client socket input to get data from server by socket, the schema information is used to check input ClientIOManager::getInstance()->addStreamInput(CLIENT_IP,CLIENT_PORT,callBackFunction); //generate register query command DocumentBuilder registerQueryCommandbuilder; registerQueryCommandbuilder.append("command_type","register_query"); registerQueryCommandbuilder.append("query_content",jaqlQuery); DocumentBuilder outputSpecificationBuilder; outputSpecificationBuilder.append("output_type","socket_output"); outputSpecificationBuilder.append("output_ip",CLIENT_IP); outputSpecificationBuilder.append("output_port",CLIENT_PORT); Document outputSpecificationDocument = outputSpecificationBuilder.obj(); registerQueryCommandbuilder.appendObject("output_specification",outputSpecificationDocument.objdata(),outputSpecificationDocument.objsize()); Document registerQueryCommandDocument = registerQueryCommandbuilder.obj(); //register query on server bool bl = sendCommand(serverIp,serverPort,registerQueryCommandDocument); return bl; }
//parse the RSS feed which is xml, get each items, generate element for new item and insert into elementList void RssStreamInput::generateElementByRss(std::string& rssXmlContent) { boost::property_tree::ptree pt; stringstream ss; ss << rssXmlContent; read_xml(ss, pt); boost::property_tree::ptree channelPt; channelPt = pt.get_child("rss.channel"); std::string linkurl = channelPt.get_child("link").data(); std::vector<std::string> titleVector; // save the titles in the RSS feed page std::vector<std::string> linkVector; // save the links in the RSS feed page //mark if the newest title we processed is still in this RSS feed page bool isProcessedNewestTitleStillInThisPage = false; std::string processedNewestTitle; std::map<std::string, std::string >::iterator it = this->urlTitleMap.find(linkurl); if(it != this->urlTitleMap.end()) { processedNewestTitle = it->second; } else { processedNewestTitle = ""; } BOOST_FOREACH(boost::property_tree::ptree::value_type &v1, channelPt) { if(v1.first=="item") { boost::property_tree::ptree itemPt = v1.second; //get the title and link for each item std::string title; std::string link; title = itemPt.get_child("title").data(); link = itemPt.get_child("link").data(); //save title and link titleVector.push_back(title); linkVector.push_back(link); //see if any exists if(processedNewestTitle == title) { isProcessedNewestTitleStillInThisPage = true; } } } //if new titles exist, generate corresponding elements if(isProcessedNewestTitleStillInThisPage == false) {//no title has been processed, generate element for each one int number = titleVector.size(); for( int i = number-1; i >=0 ; i--)//reverse scan, we scan from the oldest to the newest { std::string title = titleVector[i]; std::string link = linkVector[i]; DocumentBuilder builder; builder.append("title",title); builder.append("link",link); Document document = builder.obj(); Element element; element.document = document; element.id = DocumentIdentifierGenerator::generateNewDocumentIdentifier(); element.timestamp = TimestampGenerator::getCurrentTime(); this->rssElementList.push_back(element); } } else {//we have processed some titles in this page, generate element for the new titles int number = titleVector.size(); bool isProccesdNewsetTitleMeet = false; for( int i = number-1; i >=0 ; i--)//reverse scan, we scan from the oldest to the newest { std::string title = titleVector[i]; std::string link = linkVector[i]; if(isProccesdNewsetTitleMeet == true) {// during the scan of the titles from the oldest to the newest, the one which we have processed before is passed // then the titles after are all new titles, we should generate element for them each. DocumentBuilder builder; builder.append("title",title); builder.append("link",link); Document document = builder.obj(); Element element; element.document = document; element.id = DocumentIdentifierGenerator::generateNewDocumentIdentifier(); element.timestamp = TimestampGenerator::getCurrentTime(); element.masterTag = false; this->rssElementList.push_back(element); } if(title == processedNewestTitle) { isProccesdNewsetTitleMeet = true; } } } //update processedNewestTitle if(this->urlTitleMap[linkurl] != titleVector.front()) { this->urlTitleMap[linkurl] = titleVector.front(); isUrlTitleMapValueChanged = true; } }
void GroupAggregationOperator::execution() { #ifdef DEBUG std::cout<<"===================operator begin================="<<std::endl; std::cout<<"operatorid : "<<this->getId()<<std::endl; #endif assert(this->getInputQueueList().size()==1); assert(this->getOutputQueueList().size()==1); boost::shared_ptr<QueueEntity>inputQueue = this->getInputQueueList().front(); boost::shared_ptr<QueueEntity>outputQueue = this->getOutputQueueList().front(); while(1) { Element inputElement; if(outputQueue->isFull()) { break; } if(inputQueue->isEmpty()) { #ifdef SMART // SmartScheduler::getInstance()->informOperatorSuspended(this ); #else // Scheduler::getInstance()->informOperatorSuspended(this ); #endif break; } inputQueue->dequeue(inputElement); if(inputElement.mark == PLUS_MARK) { Document& inputDocument = inputElement.document; assert(inputDocument.hasField(this->groupByAttributeName.c_str())); //std::cout<<inputDocument<<std::endl; if(this->relationSynopsis->isFull()) { throw runtime_error("left synopsis is full"); } //insert into the relation synopsis this->relationSynopsis->insertElement(inputElement); //deal with the groupmap, if the group already exists, insert the element. //if not exists, create the group and insert the element. std::string groupByAttributeValue = inputDocument.getField(this->groupByAttributeName).valuestr(); std::set<DocumentId>currentGroupSet; std::map<std::string, std::set<DocumentId> > ::iterator it = groupMap.find(groupByAttributeValue); if(it!=this->groupMap.end()) {// group already exist std::set<DocumentId>& groupSet = it->second; currentGroupSet = groupSet; groupSet.insert(inputElement.id); } else {// group not exist std::set<DocumentId> groupSet; groupSet.insert(inputElement.id); this->groupMap.insert(make_pair(groupByAttributeValue,groupSet)); currentGroupSet = groupSet; } //calculate the aggregation value boost::shared_ptr<SetElementIterator> elementIterator; elementIterator.reset(new SetElementIterator(this->relationSynopsis,currentGroupSet)); elementIterator->initial(); Element groupElement; double average=0,sum=0,count=0; while((!outputQueue->isFull())&&elementIterator->getNext(groupElement)) { assert(groupElement.document.getField(this->aggregationAttributeName).isNumber()); double dl = groupElement.document.getField(this->aggregationAttributeName).Number(); sum += dl; count ++; } average = sum/count; //generate the result element Document newDocument; DocumentBuilder newDocumentBuilder; newDocumentBuilder.appendElements(inputDocument); Element newElement; if(this->aggregateOperation==AVG) { newDocumentBuilder.appendNumber(this->resultAttributeName,average); } else if(this->aggregateOperation==SUM) { newDocumentBuilder.appendNumber(this->resultAttributeName,sum); } else if(this->aggregateOperation==COUNT) { newDocumentBuilder.appendNumber(this->resultAttributeName,count); } newDocument = newDocumentBuilder.obj(); newElement.id = DocumentIdentifierGenerator::generateNewDocumentIdentifier(); newElement.document = newDocument; newElement.mark = PLUS_MARK; newElement.timestamp = inputElement.timestamp; newElement.masterTag = inputElement.masterTag; //insert into lineage synopsis if(this->lineageSynopsis->isFull()) { throw runtime_error("lineage synopsis is full"); } Lineage lineage; lineage.lineageNumber = 1; lineage.lineageDocumentId[0] = inputElement.id; this->lineageSynopsis->insertLineage(lineage,newElement); //generate new output to the output queue if(outputQueue->isFull()) { throw std::runtime_error("output queue is full"); } outputQueue->enqueue(newElement); } else if(inputElement.mark == MINUS_MARK) { //delete from relation synopsis relationSynopsis->deleteElement(inputElement); //delete from group map std::string groupByAttributeValue = inputElement.document.getField(this->groupByAttributeName).String(); std::map<std::string, std::set<DocumentId> >::iterator it = this->groupMap.find(groupByAttributeValue); assert(it!= this->groupMap.end()); std::set<DocumentId>& documentIdSet = it->second; documentIdSet.erase(inputElement.id); //delete from lineage synopsis Element elementInSynopsis; Lineage lineage; lineage.lineageNumber = 1; lineage.lineageDocumentId[0] = inputElement.id; int ret = lineageSynopsis->getAndDeleteElement(lineage,elementInSynopsis); if(ret==false) { //lineage not found, the minus tuple has already been generated //we needn't generate the minus tuple again continue; //throw std::runtime_error("lineage not exist"); } //generate minus element Element outputElement; outputElement.mark = MINUS_MARK; outputElement.document = elementInSynopsis.document; outputElement.timestamp = inputElement.timestamp; outputElement.id = elementInSynopsis.id; outputElement.masterTag = inputElement.masterTag; outputQueue->enqueue(outputElement); } } #ifdef DEBUG std::cout<<"===================operator over================="<<std::endl; #endif }
void PeopleFlowStreamInput::getNextElement(Element& element) { if(this->currentFileStream.peek()==EOF) { this->currentFileStream.close(); this->currentPeopleFlowFileListIterator++; assert(this->currentPeopleFlowFileListIterator!= this->peopleFlowFileList.end()); std::string peopleFlowFile = *this->currentPeopleFlowFileListIterator; this->currentFileStream.open(peopleFlowFile.c_str(), std::ifstream::in); } string value; // read one whole line in the file getline ( currentFileStream, value, ',' ); // read a string until next comma: std::string personid = value.substr(0,value.length()); getline ( currentFileStream, value, ',' ); std::string version = value.substr(0,value.length()); getline ( currentFileStream, value, ',' ); std::string tripNumber = value.substr(0,value.length()); getline ( currentFileStream, value, ',' ); std::string time = value.substr(0,value.length()); //change time from "2008/10/1 0:00:00" to timestamp std::string yearMonthDay = time.substr(0,time.find(' ')); std::string hourMinuteSecond = time.substr(time.find(' ')+1); std::string year = yearMonthDay.substr(0,yearMonthDay.find('/')); yearMonthDay = yearMonthDay.substr(yearMonthDay.find('/')+1); std::string month = yearMonthDay.substr(0, yearMonthDay.find('/')); yearMonthDay = yearMonthDay.substr(yearMonthDay.find('/')+1); std::string day = yearMonthDay; std::string hour = hourMinuteSecond.substr(0,hourMinuteSecond.find(':')); hourMinuteSecond = hourMinuteSecond.substr(hourMinuteSecond.find(':')+1); std::string minute = hourMinuteSecond.substr(0,hourMinuteSecond.find(':')); hourMinuteSecond = hourMinuteSecond.substr(hourMinuteSecond.find(':')+1); std::string second = hourMinuteSecond; struct tm t; time_t timestamp; //timestamp t.tm_year = atoi(year.c_str())-1900; t.tm_mon = atoi(month.c_str()); t.tm_mday = atoi(day.c_str()); t.tm_hour = atoi(hour.c_str());; t.tm_min = atoi(minute.c_str());; t.tm_sec = atoi(second.c_str());; t.tm_isdst = 0; timestamp = mktime(&t); getline ( currentFileStream, value, ',' ); std::string longitudestr = value.substr(0,value.length()); std::stringstream s(longitudestr); float longitude; s >> longitude; getline ( currentFileStream, value, ',' ); std::string latitudestr = value.substr(0,value.length()); std::stringstream s2(latitudestr); float latitude; s2 >> latitude; getline ( currentFileStream, value, ',' ); std::string gender = value.substr(0,value.length()); getline ( currentFileStream, value, ',' ); std::string age = value.substr(0,value.length()); getline ( currentFileStream, value, ',' ); std::string home = value.substr(0,value.length()); getline ( currentFileStream, value, ',' ); std::string occupation = value.substr(0,value.length()); getline ( currentFileStream, value, ',' ); std::string destination = value.substr(0,value.length()); getline ( currentFileStream, value, ',' ); std::string expandingCoefficient = value.substr(0,value.length()); getline ( currentFileStream, value, ',' ); std::string expandingCoefficient2 = value.substr(0,value.length()); getline ( currentFileStream, value, '\n' ); std::string transportation = value.substr(0,value.length()); //create document for this line DocumentBuilder documentBuilder; documentBuilder.append("personid",boost::lexical_cast<int>(personid) ); documentBuilder.append("time",(long long)timestamp); documentBuilder.append("longitude",longitude); documentBuilder.append("latitude",latitude); documentBuilder.append("gender",gender); documentBuilder.append("age",age); documentBuilder.append("home",home); documentBuilder.append("occupation",occupation); Document document = documentBuilder.obj(); element.timestamp = (Timestamp)timestamp; element.mark = PLUS_MARK; element.id = DocumentIdentifierGenerator::generateNewDocumentIdentifier(); element.document = document; element.masterTag = false; //std::cout<<element<<std::endl; }