예제 #1
0
TranscriptGeneMap transcriptToGeneMapFromFasta( const std::string& transcriptsFile ) {

    using std::vector;
    NameVector transcriptNames;
    NameVector geneNames {"gene"};

    vector<bfs::path> paths{transcriptsFile};
    StreamingReadParser parser(paths);
    parser.start();

    ReadProducer<StreamingReadParser> producer(parser);

    ReadSeq* s;
    // while there are transcripts left to process
    while (producer.nextRead(s)) {
      // The transcript name
      std::string fullHeader(s->name, s->nlen);
      std::string header = fullHeader.substr(0, fullHeader.find(' '));
      transcriptNames.emplace_back(header);
      producer.finishedWithRead(s);
    }

    // Sort the transcript names
    std::sort(transcriptNames.begin(), transcriptNames.end());

    // Since we have no real gene groupings, the t2g vector is trivial,
    // everything maps to gene 0.
    IndexVector t2g(transcriptNames.size(), 0);

    return TranscriptGeneMap(transcriptNames, geneNames, t2g);
}
예제 #2
0
    void _computeReverseMap() {

        _genesToTranscripts.resize( _geneNames.size(), {});

        Index geneID;
        Index transcriptID = 0;
        size_t maxNumTrans = 0;
        Index maxGene;
        for ( size_t transcriptID = 0; transcriptID < _transcriptsToGenes.size(); ++transcriptID ) {
            _genesToTranscripts[ _transcriptsToGenes[transcriptID] ].push_back( transcriptID );
            if ( maxNumTrans < _genesToTranscripts[ _transcriptsToGenes[transcriptID] ].size() ) {
                maxNumTrans = _genesToTranscripts[ _transcriptsToGenes[transcriptID] ].size();
                maxGene = _transcriptsToGenes[transcriptID];
            }
        }
        std::cerr << "max # of transcripts in a gene was " << maxNumTrans << " in gene " << _geneNames[maxGene] << "\n";
    }
예제 #3
0
 Size numGenes() {
     return _geneNames.size();
 }
예제 #4
0
 Size numTranscripts() {
     return _transcriptNames.size();
 }