const stringSet Column::getColumns() const { /* check for name column */ if (this->ast == NULL) { return stringSet(); } return getColumns(this->ast); }
void TableManager::aggregate(columnVector aggregateColumns, columnVector summaryColumns, Filter &filter) { std::vector<stringSet> colIntersect; stringSet aCols, partCols; /* get names (eXidYYY) of all aggregating columns */ for (auto col: aggregateColumns) { for (auto name: col->getColumns()) { aCols.insert(name); } } Table *table; ibis::partList parts; size_t size = 0; /* get names (eXidYYY) of all summary columns */ stringSet sCols; for (auto col: summaryColumns) { for (auto name: col->getColumns()) { int begin = name.find_first_of('(') + 1; int end = name.find_first_of(')'); std::string tmp = name.substr(begin, end-begin); if (tmp != "*") { /* ignore column * used for flows aggregation */ sCols.insert(tmp); } } } size = this->parts.size(); /* filter out parts without summary columns */ for (size_t i = 0; i < this->parts.size(); ++i) { stringSet partColumns; Utils::progressBar( "Aggregating [1/2] ", " ", size, i ); for (size_t j = 0; j < this->parts[i]->columnNames().size(); j++) { partColumns.insert(this->parts[i]->columnNames()[j]); } /* compute set difference */ stringSet difference; std::set_difference(sCols.begin(), sCols.end(), partColumns.begin(), partColumns.end(), std::inserter(difference, difference.begin())); /* When all summary columns are in current part, difference is empty */ if (difference.empty()) { parts.push_back(this->parts[i]); } else { std::cerr << "Ommiting part " << this->parts[i]->currentDataDir() << ", does not have column '" << *difference.begin() << "'" << std::endl; } } size = parts.size(); /* go over all parts and build vector of intersection between part columns and aggregation columns */ /* put together the parts that have same intersection - this ensures for example that ipv4 and ipv6 are aggregate separately by default */ for (size_t i = 0; i < parts.size(); i++) { /* put part columns to set */ for (size_t j = 0; j < parts[i]->columnNames().size(); j++) { partCols.insert(parts[i]->columnNames()[j]); } /* initialize intersection result stringSet */ colIntersect.push_back(stringSet()); /* make an intersection */ std::set_intersection(partCols.begin(), partCols.end(), aCols.begin(), aCols.end(), std::inserter(colIntersect[i], colIntersect[i].begin())); #ifdef DEBUG std::cerr << "Intersection has " << colIntersect[i].size() << " columns" << std::endl; std::cerr << "Intersect columns: "; for (stringSet::const_iterator it = colIntersect[i].begin(); it != colIntersect[i].end(); it++) { std::cerr << *it << ", "; } std::cerr << std::endl; #endif /* cleanup for next iteration */ partCols.clear(); Utils::progressBar( "Aggregating [2/2] ", " ", size, i ); } /* group parts with same intersection to one table */ ibis::partList pList; size_t partsCount = parts.size(); bool used[partsCount]; int iterPos = 0; /* initialise used array */ for (size_t i = 0; i < partsCount; i++) { used[i] = false; } /* go over all parts (theirs intersections), empty intersections are ignored */ for (std::vector<stringSet>::const_iterator outerIter = colIntersect.begin(); outerIter != colIntersect.end(); outerIter++) { /* work with current intersection only if it has not been used before */ /* empty intersections are allowed - we might want to show sum of data that do not match (or query sum(pkt)) */ if (used[iterPos]) { iterPos++; continue; } /* add current part */ used[iterPos] = true; pList.push_back(parts.at(iterPos)); int curPos = iterPos; /* add all parts that have same columns as current part and are not already used */ for (std::vector<stringSet>::const_iterator it = outerIter; it != colIntersect.end(); it++) { if (used[curPos]) { curPos++; continue; } /* compute set difference */ stringSet difference; std::set_symmetric_difference(outerIter->begin(), outerIter->end(), it->begin(), it->end(), std::inserter(difference, difference.begin())); /* When sets are equal, difference is empty */ if (difference.empty()) { /* check that parts have same column types for aggregate columns and issue a warning if not */ /* go over aggregate columns and check the types */ /* TODO check that this is not needed for summary columns */ for (stringSet::const_iterator strIter = (*outerIter).begin(); strIter != (*outerIter).end(); strIter++) { int pos1 = -1, pos2 = -1; /* find index of the column in both parts */ for (unsigned int i = 0; i< parts.at(iterPos)->columnTypes().size(); i++) { if (*strIter == parts.at(iterPos)->columnNames()[i]) { pos1 = i; break; } } for (unsigned int i = 0; i< parts.at(curPos)->columnTypes().size(); i++) { if (*strIter == parts.at(curPos)->columnNames()[i]) { pos2 = i; break; } } /* The columns with given name always exist, so there is no real need to check for '-1' value. This is merely done to avoid programming errors. */ if (pos1 < 0 || pos2 < 0) { std::cerr << "Error: an unexpected error occurred while verifying data types!" << std::endl; break; } /* test that data types are same */ if (parts.at(iterPos)->columnTypes()[pos1] != parts.at(curPos)->columnTypes()[pos2]) { std::cerr << "Warning: column '" << *strIter << "' has different data types in different parts! (" << parts.at(iterPos)->name() << ", " << parts.at(curPos)->name() << ")" << std::endl; } } /* add table to list */ pList.push_back(parts.at(curPos)); used[curPos] = true; } /* don't forget to increment the counter */ curPos++; } #ifdef DEBUG std::cerr << "Creating table from " << pList.size() << " part(s)" << std::endl; std::cerr << "[" << iterPos << "]Aggregate columns: "; for (stringSet::const_iterator it = outerIter->begin(); it != outerIter->end(); it++) { std::cerr << *it << ", "; } std::cerr << std::endl; #endif /* create table for each partList */ if (!outerIter->empty() || aggregateColumns.empty()) { table = new Table(pList); columnVector aggCols; for (auto col: aggregateColumns) { bool isThere = true; for (auto name: col->getColumns()) { if (outerIter->find(name) == outerIter->end()) { isThere = false; break; } } if (isThere) { aggCols.push_back(col); } } /* aggregate the table, use only present aggregation columns */ table->aggregateWithFunctions(aggCols, summaryColumns, filter); table->orderBy(this->orderColumns, this->orderAsc); this->tables.push_back(table); } /* and clear the part list */ pList.clear(); iterPos++; } }
/** * Добавляет узел в граф и строит связааные с ним зависимости(дуги) * Исходим из предположения, что в акутальных данных содержаться * детерминированныее данные, в том числе и скаляры-массивы. */ void Graph::addNode(const Node &_arg_) { //cout<<"node added"<<endl; stringSet local; // local edge vars stringSet other; // other vars to arg Node arg = _arg_; // will be changed arg.setActual(createActual(arg. getIn()), createActual(arg. getOut())); // check for return in statement bool containReturn = false; /* { cout<<"Node #"<<arg. getNumber()<<" statement: "; arg.getAction().print(cout, ""); cout<<"Node #"<<arg. getNumber()<<" (in):"<<arg.getActualIn()<<endl; cout<<"Node #"<<arg. getNumber()<<" (out):"<<arg.getActualOut()<<endl; } */ if (checkStatement(arg.getAction(), false, false) == -2) containReturn = true; /* * Если в предложении есть возврат, то это зависимость по управлению * и необходимо, что бы все действия были завершены до действия возврата * это позволит сохранить семантику исполнения программы. */ nodes_num++; // first phase stringSet foreverOut = arg. getActualOut(); // arg's out stringSet readOut = arg. getActualOut(); // current read for consistency stringSet readIn = arg. getActualIn(); // current read for in TNodeSet::iterator beg = nodes. begin(); TNodeSet::iterator end = nodes. end(); unsigned long layer = init_layer - 1; bool findDep = false; while (beg != end) { Node cur = (*--end); // forward dep if (isForwardDepend(readIn, cur. getActualOut())) { findDep = true; // form local createForwardTransition(cur. getActualOut(), readIn, local, other); //cout<<"Forward :"<<cur. getNumber()<<"->"<<arg. getNumber()<<":"<<local<<endl; // cur. actual(), readIn() - in // local, other, actual - out readIn = other; other. clear(); // для использования в ЛЮБОЙ следующий раз } else { // backward dep if (isBackwardDepend(foreverOut, cur. getActualIn())) { //cout<<"Backward :"<<cur. getNumber()<<"->"<<arg. getNumber()<<endl; findDep = true; } } // out dep if (isOutwardDepend(foreverOut, cur. getActualOut())) { findDep = true; if (arg.getNumber() == 3) { //cout<<"Extended info"<<endl; //cout<<"Forever out : "<<foreverOut<<endl; //cout<<"cur out Node #"<<cur. getNumber()<<": "<<cur.getActualOut()<<endl; //cout<<"readOut: "<<readOut<<endl; //cout<<"Local: "<<local<<endl; //cout<<"Node #"<<arg. getNumber()<<" (in):"<<arg.getActualIn()<<endl; //cout<<"Node #"<<arg. getNumber()<<" (out):"<<arg.getActualOut()<<endl; } createOutwardTransition(cur. getActualOut(), readOut, local, other); //cout<<"Outward :"<<cur. getNumber()<<"->"<<arg. getNumber()<<":"<<local<<endl; // cur. actual(), arg. getIn() - in // local, other, actual - inout readOut = other; other. clear(); } if (findDep) { // create edge Edge edge(arg. getNumber(), cur. getNumber(), packArrays(local)); //edge. print(cout); // cout if (!local. empty()) ; //cout<<"Transit :"<<local<<endl; addEdge(edge); local. clear(); arg.addIn(edge. getNumber()); (*end). addOut(edge. getNumber()); if (cur. getLayer() > layer) layer = cur. getLayer(); findDep = false; } else { if (containReturn) { // create empty edge Edge edge(arg. getNumber(), cur. getNumber(), stringSet()); addEdge(edge); arg.addIn(edge. getNumber()); (*end). addOut(edge. getNumber()); if (cur. getLayer() > layer) layer = cur. getLayer(); } // else - do nothing } } // insert node arg. setLayer(layer + 1); nodes. push_back(arg); }