Example #1
0
bool RapMapIndex::load(std::string& indexPrefix) {
    auto logger = spdlog::get("stderrLog");
    std::string kmerInfosName = indexPrefix + "kinfo.bin";
    std::string eqClassListName = indexPrefix + "eqclass.bin";
    std::string eqLabelListName = indexPrefix + "eqlab.bin";
    std::string posListName = indexPrefix + "pos.bin";
    std::string jfFileName = indexPrefix + "rapidx.jfhash";
    std::string txpNameFile = indexPrefix + "txpnames.bin";
    std::string txpLenFile = indexPrefix + "txplens.bin";
    std::string fwdJumpFile = indexPrefix + "fwdjump.bin";
    std::string revJumpFile = indexPrefix + "revjump.bin";

    // Load the kmer info list first --- this will
    // give us the # of unique k-mers
    std::ifstream kmerInfoStream(kmerInfosName, std::ios::binary);
    {
        logger->info("loading k-mer info list . . .");
        ScopedTimer timer;
        cereal::BinaryInputArchive kmerInfoArchive(kmerInfoStream);
        kmerInfoArchive(kmerInfos);
        logger->info("done\n");
    }
    kmerInfoStream.close();

    size_t numDistinctKmers = kmerInfos.size();
    {
        ScopedTimer timer;
        logger->info("loading k-mer => id hash . . . ");
        std::ifstream bis(jfFileName);
        const SpecialHeader bh(bis);
        // mapFile.reset(new jellyfish::mapped_file(jfFileName.c_str()));
        const size_t sizeInBytes = bh.size_bytes();
        // Load the hash from file
        logger->info("\theader format = {}"      , bh.format());
        logger->info("\t# distinct k-mers = {}"  , numDistinctKmers);
        logger->info("\thash key len = {}"       , bh.key_len());
        logger->info("\tcounter len = {}"        , bh.counter_len());
        logger->info("\tmax reprobe offset = {}" , bh.max_reprobe());
        logger->info("\tsize in bytes = {}"      , sizeInBytes);

        // Allocate the actual storage
        rawHashMem.reset(new char[sizeInBytes]);
        bis.read(rawHashMem.get(), sizeInBytes);
        // We can close the file now
        bis.close();

        merHash.reset( new FileMerArray(rawHashMem.get(),//mapFile->base() + bh.offset(),
                    sizeInBytes,
                    bh.size(),
                    bh.key_len(),
                    bh.counter_len(),
                    bh.max_reprobe(),
                    bh.matrix()));
        // Set the key size
        rapmap::utils::my_mer::k(bh.key_len() / 2);
        logger->info("done");
    }


    std::ifstream eqClassStream(eqClassListName, std::ios::binary);
    {
        logger->info("loading eq classes . . . ");
        ScopedTimer timer;
        cereal::BinaryInputArchive eqClassArchive(eqClassStream);
        eqClassArchive(eqClassList);
        logger->info("[{}] classes", eqClassList.size());
        logger->info("done");
    }
    eqClassStream.close();
    std::ifstream eqLabelStream(eqLabelListName, std::ios::binary);
    {
        logger->info("loading eq class labels . . . ");
        ScopedTimer timer;
        cereal::BinaryInputArchive eqLabelArchive(eqLabelStream);
        eqLabelArchive(eqLabelList);
        logger->info("[{}] labels", eqLabelList.size());
        logger->info("done");
    }
    eqLabelStream.close();
    std::ifstream posStream(posListName, std::ios::binary);
    {
        logger->info("loading position list . . . ");
        ScopedTimer timer;
        cereal::BinaryInputArchive posArchive(posStream);
        posArchive(posList);
        logger->info("[{}] total k-mer positions", posList.size());
        logger->info("done");
    }
    posStream.close();
    std::ifstream txpNameStream(txpNameFile, std::ios::binary);
    {
        logger->info("loading transcript names ");
        ScopedTimer timer;
        cereal::BinaryInputArchive txpNameArchive(txpNameStream);
        txpNameArchive(txpNames);
        logger->info("[{}] transcripts in index ", txpNames.size());
        logger->info("done ");
    }
    txpNameStream.close();

    std::ifstream txpLenStream(txpLenFile, std::ios::binary);
    {
        logger->info("loading transcript lengths");
        ScopedTimer timer;
        cereal::BinaryInputArchive txpLenArchive(txpLenStream);
        txpLenArchive(txpLens);
        logger->info("[{}] transcripts in index", txpLens.size());
        logger->info("done ");
    }
    txpLenStream.close();

    std::ifstream fwdJumpStream(fwdJumpFile, std::ios::binary);
    {
        logger->info("loading forward jumps");
        ScopedTimer timer;
        cereal::BinaryInputArchive fwdJumpArchive(fwdJumpStream);
        fwdJumpArchive(fwdJumpTable);
        logger->info("[{}] forward jumps", fwdJumpTable.size());
        logger->info("done ");
    }
    fwdJumpStream.close();

    std::ifstream revJumpStream(revJumpFile, std::ios::binary);
    {
        logger->info("loading forward jumps");
        ScopedTimer timer;
        cereal::BinaryInputArchive revJumpArchive(revJumpStream);
        revJumpArchive(revJumpTable);
        logger->info("[{}] reverse jumps", revJumpTable.size());
        logger->info("done ");
    }
    revJumpStream.close();
    return true;
}
Example #2
0
void StandardStrings::convertLasToRms(QWidget *parent, const QString& lasFileName, const QString& rmsFileName, const QString& posFileName){
	//this assumes that MD/Z is the first log in the las file.
	QFile lasFile(lasFileName);
	QFile rmsFile(rmsFileName);
	QFile posFile(posFileName);
	if (!lasFile.open(QFile::ReadOnly | QFile::Text)) {
		QMessageBox::warning(parent, QString("Unable to open file"), QString("Could not open the file."), QMessageBox::Ok);
		return;
	}
	if (!posFile.open(QFile::ReadOnly | QFile::Text)) {
		QMessageBox::warning(parent, QString("Unable to open file"), QString("Could not open the file."), QMessageBox::Ok);
		return;
	}
	if (!rmsFile.open(QFile::WriteOnly | QFile::Text)) {
		QMessageBox::warning(parent, QString("Unable to open file"), QString("Could not open the file."), QMessageBox::Ok);
		return;
	}
	QString wellName;
	QTextStream lasStream(&lasFile);
	QTextStream rmsStream(&rmsFile);
	QTextStream posStream(&posFile);
	QString line = posStream.readLine();
	while(!line.isNull()){
		if(!line.isEmpty()){
			if(line[0]==QChar('-')){
				line = posStream.readLine();
				break;
			}
		}
		line = posStream.readLine();
	}
	QList<double> xList;
	QList<double> yList;
	QList<double> mdList;
	//
	while(!line.isNull()){
		QStringList list =line.simplified().split(" ");
		xList << list[0].toDouble();
		yList << list[1].toDouble();
		mdList << list[5].toDouble();
		line = posStream.readLine();
	}
	line = lasStream.readLine();
	while(!line.isNull()){
		if(line.split(" ").at(0)==QString("WELL")){
			wellName=line.split("\"").at(1);
			while(!wellName[0].isDigit()){
				wellName.remove(0,1);
				if(wellName.isEmpty()){
					QMessageBox::warning(parent, QString("No well name"), QString("Could not find a valid well name."), QMessageBox::Ok);
					return;
				}	
			}
			wellName=wellName.remove(" ");
			break;
		}
		line = lasStream.readLine();
	}
	if(wellName.isEmpty()){
		QMessageBox::warning(parent, QString("No well name"), QString("Could not find a valid well name."), QMessageBox::Ok);
		return;
	}
	QString wellX;
	while(!line.isNull()){
		if(line.split(" ").at(0)==QString("XWELL")){
			QStringList wellXList=line.split(" ");
			for(int i=0;i<wellXList.size();++i){
				if(!wellXList[i].isEmpty()){
					bool ok;
					wellXList[i].toDouble(&ok);
					if(ok){
						wellX=wellXList[i];
					}
				}
			}
			break;
		}
		line = lasStream.readLine();
	}
	if(wellX.isEmpty()){
		QMessageBox::warning(parent, QString("No well x-coordinate"), QString("Could not find a valid well x-coordinate."), QMessageBox::Ok);
		return;
	}
	QString wellY;
	while(!line.isNull()){
		if(line.split(" ").at(0)==QString("YWELL")){
			QStringList wellYList=line.split(" ");
			for(int i=0;i<wellYList.size();++i){
				if(!wellYList[i].isEmpty()){
					bool ok;
					wellYList[i].toDouble(&ok);
					if(ok){
						wellY=wellYList[i];
					}
				}
			}
			break;
		}
		line = lasStream.readLine();
	}
	if(wellY.isEmpty()){
		QMessageBox::warning(parent, QString("No well x-coordinate"), QString("Could not find a valid well x-coordinate."), QMessageBox::Ok);
		return;
	}
	while(!line.isNull()){
		if(!line.isEmpty()){
			if(line[0]==QChar('~')){
				lasStream.readLine();//read away the comments
				lasStream.readLine();
				break;
			}
		}
		line = lasStream.readLine();
	}
	QStringList logNames;
	lasStream.readLine();
	line = lasStream.readLine();
	while(!line.isEmpty()){
		logNames << line.split(".")[0].remove(" ");
		line = lasStream.readLine();
	}
	lasStream.readLine();//~ASCII
	line = lasStream.readLine();
	int minIndex=0;
	rmsStream.QTextStream::setRealNumberNotation(QTextStream::FixedNotation);
	if(wellX.size()>wellY.size()){
		rmsStream.setRealNumberPrecision(wellX.size());
	}
	else{
		rmsStream.setRealNumberPrecision(wellY.size());
	}
	rmsStream.setRealNumberPrecision(2);
	rmsStream << 1.0 << endl;
	rmsStream << "undefined" << endl;
	rmsStream << wellName << " " << wellX << " " << wellY << endl;
	rmsStream << logNames.size() << endl;
	for(int i=0;i<logNames.size();++i){
		rmsStream << logNames[i] << "  " << "UNK lin" << endl;
	}
	bool first=true;
	while(!line.isEmpty()){
		if(!first){
			rmsStream << endl;
		}
		else{
			first=false;
		}
		bool printed=false;
		QStringList lineList = line.simplified().split(" ");
		for(int j=minIndex;j<mdList.size();++j){
			if(mdList[j]==lineList.at(0).toDouble()){
				rmsStream << xList[j] << " " << yList[j];
				for(int i=0;i<lineList.size();++i){
					rmsStream << " " << lineList.at(i);
				}
				printed=true;
				break;
			} else if(mdList[j]<lineList.at(0).toDouble()){
				minIndex=j;
			} else{
				bool overRule=false;
				while(mdList[j]==mdList[minIndex]){
					if(j<mdList.size()-1){
						++j;
					}
					else {
						overRule=true;
						break;
					}
				}
				double fractionDifference=(lineList.at(0).toDouble()-mdList[minIndex])/(mdList[j]-mdList[minIndex]);
				if(overRule){
					fractionDifference=0;
				}
				rmsStream << (xList[j]+fractionDifference*(xList[j]-xList[minIndex])) << " ";
				rmsStream << (yList[j]+fractionDifference*(yList[j]-yList[minIndex]));
				for(int i=0;i<lineList.size();++i){
					rmsStream << " " << lineList.at(i);
				}
				printed=true;
				break;
			}
		}
		if(!printed){
				
				rmsStream << (xList[xList.size()-1]) << " " << (yList[yList.size()-1]);
				for(int i=0;i<lineList.size();++i){
					rmsStream << " " << lineList.at(i);
				}
		}
		line = lasStream.readLine();
	}

}