bool GroupBy::init()
{
	Tokenizer groupColsTokens;
	groupColsTokens.tokenize(upCast(_context)->getGroupCols(), ',');
	int numElems = groupColsTokens.getNumValidElems();
	for (int i=0; i < numElems; i++) {
		//if the item is a range, such as 3-5,
		//must split that as well.
		const QuickString &elem = groupColsTokens.getElem(i);

		if (strchr(elem.c_str(), '-')) {
			Tokenizer rangeElems;
			rangeElems.tokenize(elem, '-');
			int startNum = str2chrPos(rangeElems.getElem(0));
			int endNum = str2chrPos(rangeElems.getElem(1));
			for (int i=startNum; i <= endNum; i++) {
				_groupCols.push_back(i);
			}
		} else {
			_groupCols.push_back(str2chrPos(elem));
		}
	}
	_queryFRM = _context->getFile(0);
	_prevFields.resize(_groupCols.size());

	_prevRecord = getNextRecord();
	return true;
}
Exemple #2
0
void NewGenomeFile::loadGenomeFileIntoMap() {


	ifstream genFile(_genomeFileName.c_str());
	if (!genFile.good()) {
		cerr << "Error: Can't open genome file" << _genomeFileName << "Exiting..." << endl;
		exit(1);
	}
	string sLine;
	Tokenizer fieldTokens;
	CHRPOS chrSize = 0;
	string chrName;
	while (!genFile.eof()) {
		sLine.clear();
		chrSize = 0;
		chrName.clear();
		getline(genFile, sLine);
		int numFields = fieldTokens.tokenize(sLine.c_str());
		// allow use of .fai files.
		 if (numFields < 2) {
		 	continue;
		 }
		chrName = fieldTokens.getElem(0);
		chrSize = str2chrPos(fieldTokens.getElem(1));
		_maxId++;
		_chromSizeIds[chrName] = pair<CHRPOS, int>(chrSize, _maxId);
		_startOffsets.push_back(_genomeLength);
		_genomeLength += chrSize;
		_chromList.push_back(chrName);
	}
	if (_maxId == -1) {
		cerr << "Error: The genome file " << _genomeFileName << " has no valid entries. Exiting." << endl;
		exit(1);
	}
	// Special: BAM files can have unmapped reads, which show as no chromosome, or an empty chrom string.
	// Add in an empty chrom so these don't error.
	_maxId++;
	_chromSizeIds[""] = pair<CHRPOS, int>(0, _maxId);
	_chromList.push_back("");


	_startOffsets.push_back(_genomeLength); //insert the final length as the last element
	//to help with the lower_bound call in the projectOnGenome method.
	genFile.close();
}
Exemple #3
0
void NewGenomeFile::loadGenomeFileIntoMap() {


	ifstream genFile(_genomeFileName.c_str());
	if (!genFile.good()) {
		cerr << "Error: Can't open genome file" << _genomeFileName << "Exiting..." << endl;
		exit(1);
	}
	string sLine;
	Tokenizer fieldTokens;
	CHRPOS chrSize = 0;
	QuickString chrName;
	while (!genFile.eof()) {
		sLine.clear();
		chrSize = 0;
		chrName.clear();
		getline(genFile, sLine);
		int numFields = fieldTokens.tokenize(sLine.c_str());
		if (numFields != 2) {
			continue;
		}
		chrName = fieldTokens.getElem(0);
		chrSize = str2chrPos(fieldTokens.getElem(1));
		_maxId++;
		_chromSizeIds[chrName] = pair<CHRPOS, int>(chrSize, _maxId);
		_startOffsets.push_back(_genomeLength);
		_genomeLength += chrSize;
		_chromList.push_back(chrName);
	}
	if (_maxId == -1) {
		cerr << "Error: The genome file " << _genomeFileName << " has no valid entries. Exiting." << endl;
		exit(1);
	}
	_startOffsets.push_back(_genomeLength); //insert the final length as the last element
	//to help with the lower_bound call in the projectOnGenome method.
	genFile.close();
}
Exemple #4
0
bool KeyListOps::isValidColumnOps(FileRecordMgr *dbFile) {

	//get the strings from context containing the comma-delimited lists of columns
	//and operations. Split both of these into vectors. Get the operation code
	//for each operation string. Finally, make a vector of pairs, where the first
	//member of each pair is a column number, and the second member is the code for the
	//operation to perform on that column.

    Tokenizer colTokens;
    Tokenizer opsTokens;

    int numCols = colTokens.tokenize(_columns, ',');
	int numOps = opsTokens.tokenize(_operations, ',');

	if (numOps < 1 || numCols < 1) {
		 cerr << endl << "*****" << endl
		             << "***** ERROR: There must be at least one column and at least one operation named." << endl;
		 return false;
	}
	if (numOps > 1 && numCols > 1 && numCols != numOps) {
		 cerr << endl << "*****" << endl
		             << "***** ERROR: There are " << numCols <<" columns given, but there are " << numOps << " operations." << endl;
		cerr << "\tPlease provide either a single operation that will be applied to all listed columns, " << endl;
		cerr << "\ta single column to which all operations will be applied," << endl;
		cerr << "\tor an operation for each column." << endl;
		return false;
	}
	int loop = max(numCols, numOps);

	// If there is only one column, all ops are performed on it.
	// Otherwise, if there is only op, it is performed on all columns.
	// Besides that, ops are performed on columns in their respective
	// ordering.

	for (int i=0; i < loop; i++) {
		int col = str2chrPos(colTokens.getElem(numCols > 1 ? i : 0));

		//check that the column number is valid
		if (col < 1 || col > dbFile->getNumFields()) {
			 cerr << endl << "*****" << endl  << "***** ERROR: Requested column " << col << ", but database file "
					 << dbFile->getFileName() << " only has fields 1 - " << dbFile->getNumFields() << "." << endl;
			 return false;
		}
		const QuickString &operation = opsTokens.getElem(numOps > 1 ? i : 0);
		OP_TYPES opCode = getOpCode(operation);
		if (opCode == INVALID) {
			cerr << endl << "*****" << endl
								 << "***** ERROR: " << operation << " is not a valid operation. " << endl;
			return false;
		}
		_colOps.push_back(pair<int, OP_TYPES>(col, opCode));
	}

	//lastly, if the file is BAM, and they asked for column 2, which is the
	//flags field, then for now we have to throw an error, as the flag field
	//is currently not supported.
	if (_dbFileType == FileRecordTypeChecker::BAM_FILE_TYPE) {
		//also, tell the methods class we're dealing with BAM.
		_methods.setIsBam(true);
		for (size_t i = 0; i < _colOps.size(); i++) {
			if (_colOps[i].first == 2) {
				cerr << endl << "*****" << endl << "***** ERROR: Requested column 2 of a BAM file, which is the Flags field." << endl;
				cerr << "             We currently do not support this, but may in future versions." << endl;
				return false;
			}
		}
	}

    return true;
}