Example #1
0
TranscriptGeneMap transcriptToGeneMapFromFasta( const std::string& transcriptsFile ) {

    using std::vector;
    NameVector transcriptNames;
    NameVector geneNames {"gene"};

    vector<bfs::path> paths{transcriptsFile};
    StreamingReadParser parser(paths);
    parser.start();

    ReadProducer<StreamingReadParser> producer(parser);

    ReadSeq* s;
    // while there are transcripts left to process
    while (producer.nextRead(s)) {
      // The transcript name
      std::string fullHeader(s->name, s->nlen);
      std::string header = fullHeader.substr(0, fullHeader.find(' '));
      transcriptNames.emplace_back(header);
      producer.finishedWithRead(s);
    }

    // Sort the transcript names
    std::sort(transcriptNames.begin(), transcriptNames.end());

    // Since we have no real gene groupings, the t2g vector is trivial,
    // everything maps to gene 0.
    IndexVector t2g(transcriptNames.size(), 0);

    return TranscriptGeneMap(transcriptNames, geneNames, t2g);
}
Example #2
0
 Index findTranscriptID( const std::string &tname ) {
     using std::distance;
     using std::lower_bound;
     auto it = lower_bound( _transcriptNames.begin(), _transcriptNames.end(), tname );
     return ( it == _transcriptNames.end() ) ? INVALID : ( distance(_transcriptNames.begin(), it) );
 }