void faToTwoBit(char *inFiles[], int inFileCount, char *outFile) /* Convert inFiles in fasta format to outfile in 2 bit * format. */ { struct twoBit *twoBitList = NULL, *twoBit; int i; struct hash *uniqHash = newHash(18); FILE *f; for (i=0; i<inFileCount; ++i) { char *fileName = inFiles[i]; struct lineFile *lf = lineFileOpen(fileName, TRUE); struct dnaSeq seq; ZeroVar(&seq); while (faMixedSpeedReadNext(lf, &seq.dna, &seq.size, &seq.name)) { if (seq.size == 0) { warn("Skipping item %s which has no sequence.\n",seq.name); continue; } /* strip off version number */ if (stripVersion) { char *sp = NULL; sp = strchr(seq.name,'.'); if (sp != NULL) *sp = '\0'; } if (hashLookup(uniqHash, seq.name)) { if (!ignoreDups) errAbort("Duplicate sequence name %s", seq.name); else continue; } hashAdd(uniqHash, seq.name, NULL); if (noMask) faToDna(seq.dna, seq.size); else unknownToN(seq.dna, seq.size); twoBit = twoBitFromDnaSeq(&seq, !noMask); slAddHead(&twoBitList, twoBit); } lineFileClose(&lf); } slReverse(&twoBitList); f = mustOpen(outFile, "wb"); twoBitWriteHeader(twoBitList, f); for (twoBit = twoBitList; twoBit != NULL; twoBit = twoBit->next) { twoBitWriteOne(twoBit, f); } carefulClose(&f); }
int TwoBitRunner::faToTwoBit(std::map<std::string, std::string> inputCommands) { TwoBitSetUp setUp(inputCommands); std::string inputFilename = ""; std::string outFilename = ""; bool overWrite = false; bool trimNameAtWhitepsace = false; setUp.setOption(inputFilename, "--in,-i", "Input fasta filename, can be several files seperated by commas", true); setUp.setOption(outFilename, "--out,-o", "Name of an output file", true); setUp.setOption(overWrite, "--overWrite", "Whether to overwrite the file if one is given by --out"); setUp.setOption(overWrite, "--overWrite", "Whether to overwrite the file if one is given by --out"); setUp.setOption(trimNameAtWhitepsace, "--trimNameAtWhitepsace", "Whether to trim the names of the fasta records at the first whitespace"); setUp.finishSetUp(std::cout); cppprogutils::appendAsNeeded(outFilename, ".2bit"); std::ofstream out; //check if output file exists if (!overWrite && cppprogutils::fexists(outFilename)) { throw Exception(__PRETTY_FUNCTION__, "File " + outFilename + " already exists, use --overWrite to over write"); } //read in seqs std::vector<std::unique_ptr<FastaRecord>> seqs; auto toks = cppprogutils::tokenizeString(inputFilename, ","); for(const auto & fName : toks){ std::ifstream in(fName); std::unique_ptr<FastaRecord> seq; while (readNextFasta(in, seq, trimNameAtWhitepsace)) { seqs.emplace_back(std::move(seq)); } } out.open(outFilename, std::ios::binary | std::ios::out); //write out header twoBitWriteHeader(seqs, out); //write out sequences for (const auto & seq : seqs) { seq->twoBitWriteOne(out); } return 0; }
void twoBitMask(char *inName, char *maskName, char *outName) /* twoBitMask - apply masking to a .2bit file, creating a new .2bit file. */ { struct hash *tbHash = hashNew(20); struct hash *bitmapHash = hashNew(20); struct twoBit *twoBitList = NULL; struct twoBit *twoBit = NULL; FILE *f = NULL; if (! twoBitIsFile(inName)) { if (twoBitIsSpec(inName)) errAbort("Sorry, this works only on whole .2bit files, not specs."); else errAbort("Input %s does not look like a proper .2bit file.", inName); } twoBitList = slurpInput(inName, tbHash, bitmapHash); /* Read mask data into bitmapHash, store it in twoBits: */ if ((type && endsWith(type, "bed")) || endsWith(maskName, ".bed")) maskWithBed(maskName, tbHash, bitmapHash); else if ((type && endsWith(type, "out")) || endsWith(maskName, ".out")) maskWithOut(maskName, tbHash, bitmapHash); else errAbort("Sorry, maskFile must end in \".bed\" or \".out\"."); /* Create a new .2bit file, write it out from twoBits. */ f = mustOpen(outName, "wb"); twoBitWriteHeader(twoBitList, f); for (twoBit = twoBitList; twoBit != NULL; twoBit = twoBit->next) { twoBitWriteOne(twoBit, f); } carefulClose(&f); /* Don't bother freeing twoBitList and hashes here -- just exit. */ }