Example #1
0
const stringSet Column::getColumns() const
{
	/* check for name column */
	if (this->ast == NULL) {
		return stringSet();
	}

	return getColumns(this->ast);
}
void TableManager::aggregate(columnVector aggregateColumns, columnVector summaryColumns, Filter &filter)
{
	std::vector<stringSet> colIntersect;
	stringSet aCols, partCols;
	
	/* get names (eXidYYY) of all aggregating columns */
	for (auto col: aggregateColumns) {
		for (auto name: col->getColumns()) {
			aCols.insert(name);
		}
	}
	
	Table *table;
	ibis::partList parts;
	size_t size = 0;
	
	/* get names (eXidYYY) of all summary columns */
	stringSet sCols;
	for (auto col: summaryColumns) {
		for (auto name: col->getColumns()) {
			int begin = name.find_first_of('(') + 1;
			int end = name.find_first_of(')');
			std::string tmp = name.substr(begin, end-begin);
			if (tmp != "*") { /* ignore column * used for flows aggregation */
				sCols.insert(tmp);
			}
		}
	}
	
	size = this->parts.size();
	
	/* filter out parts without summary columns */
	for (size_t i = 0; i < this->parts.size(); ++i) {
		stringSet partColumns;
		Utils::progressBar( "Aggregating [1/2]  ", "   ", size, i );
		for (size_t j = 0; j < this->parts[i]->columnNames().size(); j++) {
			partColumns.insert(this->parts[i]->columnNames()[j]);
		}
		
		/* compute set difference */
		stringSet difference;
		std::set_difference(sCols.begin(), sCols.end(), partColumns.begin(),
				partColumns.end(), std::inserter(difference, difference.begin()));
		
		/* When all summary columns are in current part, difference is empty */
		if (difference.empty()) {
			parts.push_back(this->parts[i]);
		} else {
			std::cerr << "Ommiting part " << this->parts[i]->currentDataDir() << ", does not have column '" << *difference.begin() << "'" << std::endl;
		}
	}
	
	size = parts.size();
	/* go over all parts and build vector of intersection between part columns and aggregation columns */
	/* put together the parts that have same intersection - this ensures for example that ipv4 and ipv6 are aggregate separately by default */
	for (size_t i = 0; i < parts.size(); i++) {

		/* put part columns to set */
		for (size_t j = 0; j < parts[i]->columnNames().size(); j++) {
			partCols.insert(parts[i]->columnNames()[j]);
		}

		/* initialize intersection result stringSet */
		colIntersect.push_back(stringSet());

		/* make an intersection */
		std::set_intersection(partCols.begin(), partCols.end(), aCols.begin(),
				aCols.end(), std::inserter(colIntersect[i], colIntersect[i].begin()));

#ifdef DEBUG
		std::cerr << "Intersection has " << colIntersect[i].size() << " columns" << std::endl;

		std::cerr << "Intersect columns: ";
		for (stringSet::const_iterator it = colIntersect[i].begin(); it != colIntersect[i].end(); it++) {
			std::cerr << *it << ", ";
		}
		std::cerr << std::endl;
#endif

		/* cleanup for next iteration */
		partCols.clear();

		Utils::progressBar( "Aggregating [2/2]  ", "   ", size, i );
	}
	
	/* group parts with same intersection to one table */
	ibis::partList pList;
	size_t partsCount = parts.size();
	bool used[partsCount];
	int iterPos = 0;

	/* initialise used array */
	for (size_t i = 0; i < partsCount; i++) {
		used[i] = false;
	}
	/* go over all parts (theirs intersections), empty intersections are ignored */
	for (std::vector<stringSet>::const_iterator outerIter = colIntersect.begin(); outerIter != colIntersect.end(); outerIter++) {
		/* work with current intersection only if it has not been used before */
		/* empty intersections are allowed - we might want to show sum of data that do not match (or query sum(pkt)) */
		if (used[iterPos]) {
			iterPos++;
			continue;
		}

		/* add current part */
		used[iterPos] = true;
		pList.push_back(parts.at(iterPos));
		int curPos = iterPos;

		/* add all parts that have same columns as current part and are not already used */
		for (std::vector<stringSet>::const_iterator it = outerIter; it != colIntersect.end(); it++) {
			if (used[curPos]) {
				curPos++;
				continue;
			}

			/* compute set difference */
			stringSet difference;
			std::set_symmetric_difference(outerIter->begin(), outerIter->end(), it->begin(),
					it->end(), std::inserter(difference, difference.begin()));

			/* When sets are equal, difference is empty */
			if (difference.empty()) {
				/* check that parts have same column types for aggregate columns and issue a warning if not */
				/* go over aggregate columns and check the types */ /* TODO check that this is not needed for summary columns */
				for (stringSet::const_iterator strIter = (*outerIter).begin(); strIter != (*outerIter).end(); strIter++) {
					int pos1 = -1, pos2 = -1;

					/* find index of the column in both parts */
					for (unsigned int i = 0; i< parts.at(iterPos)->columnTypes().size(); i++) {
						if (*strIter == parts.at(iterPos)->columnNames()[i]) {
							pos1 = i;
							break;
						}
					}
					for (unsigned int i = 0; i< parts.at(curPos)->columnTypes().size(); i++) {
						if (*strIter == parts.at(curPos)->columnNames()[i]) {
							pos2 = i;
							break;
						}
					}

					/*
					   The columns with given name always exist, so there is no real need to check for '-1' value.
					   This is merely done to avoid programming errors.
					*/
					if (pos1 < 0 || pos2 < 0) {
						std::cerr << "Error: an unexpected error occurred while verifying data types!" << std::endl;
						break;
					}

					/* test that data types are same */
					if (parts.at(iterPos)->columnTypes()[pos1] != parts.at(curPos)->columnTypes()[pos2]) {
						std::cerr << "Warning: column '" << *strIter << "' has different data types in different parts! ("
								<< parts.at(iterPos)->name() << ", " << parts.at(curPos)->name() << ")" << std::endl;
					}
				}

				/* add table to list */
				pList.push_back(parts.at(curPos));
				used[curPos] = true;
			}
			/* don't forget to increment the counter */
			curPos++;
		}


#ifdef DEBUG
		std::cerr << "Creating table from " << pList.size() << " part(s)" << std::endl;

		std::cerr << "[" << iterPos << "]Aggregate columns: ";
		for (stringSet::const_iterator it = outerIter->begin(); it != outerIter->end(); it++) {
			std::cerr << *it << ", ";
		}
		std::cerr << std::endl;
#endif

		/* create table for each partList */
		if (!outerIter->empty() || aggregateColumns.empty()) {
			table = new Table(pList);

			columnVector aggCols;
			for (auto col: aggregateColumns) {
				bool isThere = true;
				for (auto name: col->getColumns()) {
					if (outerIter->find(name) == outerIter->end()) {
						isThere = false;
						break;
					}
				}
				if (isThere) {
					aggCols.push_back(col);
				}
			}

			/* aggregate the table, use only present aggregation columns */
			table->aggregateWithFunctions(aggCols, summaryColumns, filter);
			table->orderBy(this->orderColumns, this->orderAsc);
			this->tables.push_back(table);
		}

		/* and clear the part list */
		pList.clear();
		iterPos++;
	}
}
Example #3
0
/**
 * Добавляет узел в граф и строит связааные с ним зависимости(дуги)
 * Исходим из предположения, что в акутальных данных содержаться 
 * детерминированныее данные, в том числе и скаляры-массивы.
 */
void Graph::addNode(const Node &_arg_)
{
    //cout<<"node added"<<endl;
    stringSet local; // local edge vars
    stringSet other; // other vars to arg
    Node arg = _arg_; // will be changed
    arg.setActual(createActual(arg. getIn()), createActual(arg. getOut()));
    // check for return in statement
    bool containReturn = false;
    /*
    {
     cout<<"Node #"<<arg. getNumber()<<" statement: ";
     arg.getAction().print(cout, "");
     cout<<"Node #"<<arg. getNumber()<<" (in):"<<arg.getActualIn()<<endl;
     cout<<"Node #"<<arg. getNumber()<<" (out):"<<arg.getActualOut()<<endl;
    }   */
    if (checkStatement(arg.getAction(), false, false) == -2)
        containReturn = true;
    /*
     * Если в предложении есть возврат, то это зависимость по управлению
     * и необходимо, что бы все действия были завершены до действия возврата
     * это позволит сохранить семантику исполнения программы.  
     */
    nodes_num++;
    // first phase
    stringSet foreverOut = arg. getActualOut(); // arg's out
    stringSet readOut = arg. getActualOut(); // current read for consistency
    stringSet readIn = arg. getActualIn();  // current read for in

    TNodeSet::iterator beg = nodes. begin();
    TNodeSet::iterator end = nodes. end();
    unsigned long layer = init_layer - 1;
    bool findDep = false;
    while (beg != end)
    {
        Node cur = (*--end);
        // forward dep
        if (isForwardDepend(readIn, cur. getActualOut()))
        {
            findDep = true;
            // form local
            createForwardTransition(cur. getActualOut(), readIn, local, other);
            //cout<<"Forward :"<<cur. getNumber()<<"->"<<arg. getNumber()<<":"<<local<<endl;
            // cur. actual(), readIn() - in
            // local, other, actual - out
            readIn = other;
            other. clear(); // для использования в ЛЮБОЙ следующий раз
        }
        else
        {
            // backward dep
            if (isBackwardDepend(foreverOut, cur. getActualIn()))
            {
                //cout<<"Backward :"<<cur. getNumber()<<"->"<<arg. getNumber()<<endl;
                findDep = true;
            }
        }
        // out dep
        if (isOutwardDepend(foreverOut, cur. getActualOut()))
        {
            findDep = true;
            if (arg.getNumber() == 3)
            {
                //cout<<"Extended info"<<endl;
                //cout<<"Forever out : "<<foreverOut<<endl;
                //cout<<"cur out Node #"<<cur. getNumber()<<": "<<cur.getActualOut()<<endl;
                //cout<<"readOut: "<<readOut<<endl;
                //cout<<"Local: "<<local<<endl;
                //cout<<"Node #"<<arg. getNumber()<<" (in):"<<arg.getActualIn()<<endl;
                //cout<<"Node #"<<arg. getNumber()<<" (out):"<<arg.getActualOut()<<endl;
            }

            createOutwardTransition(cur. getActualOut(), readOut, local, other);
            //cout<<"Outward :"<<cur. getNumber()<<"->"<<arg. getNumber()<<":"<<local<<endl;
            // cur. actual(), arg. getIn() - in
            // local, other, actual - inout
            readOut = other;
            other. clear();
        }
        if (findDep)
        {
            // create edge
            Edge edge(arg. getNumber(), cur. getNumber(), packArrays(local));
            //edge. print(cout); // cout
            if (!local. empty())
                ;
            //cout<<"Transit :"<<local<<endl;
            addEdge(edge);
            local. clear();

            arg.addIn(edge. getNumber());
            (*end). addOut(edge. getNumber());
            if (cur. getLayer() > layer)
                layer = cur. getLayer();
            findDep = false;
        }
        else
        {
            if (containReturn)
            {
                // create empty edge
                Edge edge(arg. getNumber(), cur. getNumber(), stringSet());
                addEdge(edge);
                arg.addIn(edge. getNumber());
                (*end). addOut(edge. getNumber());
                if (cur. getLayer() > layer)
                    layer = cur. getLayer();
            } // else - do nothing
        }
    }
    // insert node
    arg. setLayer(layer + 1);
    nodes. push_back(arg);
}