コード例 #1
0
ファイル: faToTwoBit.c プロジェクト: sktu/kentUtils
void faToTwoBit(char *inFiles[], int inFileCount, char *outFile)
/* Convert inFiles in fasta format to outfile in 2 bit 
 * format. */
{
struct twoBit *twoBitList = NULL, *twoBit;
int i;
struct hash *uniqHash = newHash(18);
FILE *f;

for (i=0; i<inFileCount; ++i)
    {
    char *fileName = inFiles[i];
    struct lineFile *lf = lineFileOpen(fileName, TRUE);
    struct dnaSeq seq;
    ZeroVar(&seq);
    while (faMixedSpeedReadNext(lf, &seq.dna, &seq.size, &seq.name))
        {
	if (seq.size == 0)
	    {
	    warn("Skipping item %s which has no sequence.\n",seq.name);
	    continue;
	    }
	    
        /* strip off version number */
        if (stripVersion)
            {
            char *sp = NULL;
            sp = strchr(seq.name,'.');
            if (sp != NULL)
                *sp = '\0';
            }

        if (hashLookup(uniqHash, seq.name))
            {
            if (!ignoreDups)
                errAbort("Duplicate sequence name %s", seq.name);
            else
                continue;
            }
	hashAdd(uniqHash, seq.name, NULL);
	if (noMask)
	    faToDna(seq.dna, seq.size);
	else
	    unknownToN(seq.dna, seq.size);
	twoBit = twoBitFromDnaSeq(&seq, !noMask);
	slAddHead(&twoBitList, twoBit);
	}
    lineFileClose(&lf);
    }
slReverse(&twoBitList);
f = mustOpen(outFile, "wb");
twoBitWriteHeader(twoBitList, f);
for (twoBit = twoBitList; twoBit != NULL; twoBit = twoBit->next)
    {
    twoBitWriteOne(twoBit, f);
    }
carefulClose(&f);
}
コード例 #2
0
ファイル: TwoBitRunner.cpp プロジェクト: weng-lab/TwoBit
int TwoBitRunner::faToTwoBit(std::map<std::string, std::string> inputCommands) {
	TwoBitSetUp setUp(inputCommands);
	std::string inputFilename = "";
	std::string outFilename = "";
	bool overWrite = false;
	bool trimNameAtWhitepsace = false;
	setUp.setOption(inputFilename, "--in,-i", "Input fasta filename, can be several files seperated by commas", true);
	setUp.setOption(outFilename, "--out,-o",
			"Name of an output file", true);
	setUp.setOption(overWrite, "--overWrite",
			"Whether to overwrite the file if one is given by --out");
	setUp.setOption(overWrite, "--overWrite",
			"Whether to overwrite the file if one is given by --out");
	setUp.setOption(trimNameAtWhitepsace, "--trimNameAtWhitepsace",
				"Whether to trim the names of the fasta records at the first whitespace");
	setUp.finishSetUp(std::cout);
	cppprogutils::appendAsNeeded(outFilename, ".2bit");
	std::ofstream out;
	//check if output file exists
	if (!overWrite && cppprogutils::fexists(outFilename)) {
		throw Exception(__PRETTY_FUNCTION__,
				"File " + outFilename
						+ " already exists, use --overWrite to over write");
	}
	//read in seqs
	std::vector<std::unique_ptr<FastaRecord>> seqs;
	auto toks = cppprogutils::tokenizeString(inputFilename, ",");
	for(const auto & fName : toks){
		std::ifstream in(fName);
		std::unique_ptr<FastaRecord> seq;
		while (readNextFasta(in, seq, trimNameAtWhitepsace)) {
			seqs.emplace_back(std::move(seq));
		}
	}
	out.open(outFilename, std::ios::binary | std::ios::out);
	//write out header
	twoBitWriteHeader(seqs, out);
	//write out sequences
	for (const auto & seq : seqs) {
		seq->twoBitWriteOne(out);
	}
	return 0;
}
コード例 #3
0
void twoBitMask(char *inName, char *maskName, char *outName)
/* twoBitMask - apply masking to a .2bit file, creating a new .2bit file. */
{
    struct hash *tbHash = hashNew(20);
    struct hash *bitmapHash = hashNew(20);
    struct twoBit *twoBitList = NULL;
    struct twoBit *twoBit = NULL;
    FILE *f = NULL;

    if (! twoBitIsFile(inName))
    {
        if (twoBitIsSpec(inName))
            errAbort("Sorry, this works only on whole .2bit files, not specs.");
        else
            errAbort("Input %s does not look like a proper .2bit file.", inName);
    }

    twoBitList = slurpInput(inName, tbHash, bitmapHash);

    /* Read mask data into bitmapHash, store it in twoBits: */
    if ((type && endsWith(type, "bed")) || endsWith(maskName, ".bed"))
        maskWithBed(maskName, tbHash, bitmapHash);
    else if ((type && endsWith(type, "out")) || endsWith(maskName, ".out"))
        maskWithOut(maskName, tbHash, bitmapHash);
    else
        errAbort("Sorry, maskFile must end in \".bed\" or \".out\".");

    /* Create a new .2bit file, write it out from twoBits. */
    f = mustOpen(outName, "wb");
    twoBitWriteHeader(twoBitList, f);
    for (twoBit = twoBitList; twoBit != NULL; twoBit = twoBit->next)
    {
        twoBitWriteOne(twoBit, f);
    }
    carefulClose(&f);

    /* Don't bother freeing twoBitList and hashes here -- just exit. */
}