Vector *IntronSupportingEvidenceAdaptor_objectsFromStatementHandle(IntronSupportingEvidenceAdaptor *isea, 
                                                                   StatementHandle *sth,
                                                                   AssemblyMapper *assMapper,
                                                                   Slice *destSlice) {
  SliceAdaptor *sa     = DBAdaptor_getSliceAdaptor(isea->dba);
  AnalysisAdaptor *aa  = DBAdaptor_getAnalysisAdaptor(isea->dba);

  Vector *features = Vector_new();
  IDHash *sliceHash = IDHash_new(IDHASH_SMALL);
  
/* Unneccesary
  my %analysis_hash;
  my %sr_name_hash;
  my %sr_cs_hash;
*/
  

  
/* Unused
  my $asm_cs;
  my $cmp_cs;
  my $asm_cs_vers;
  my $asm_cs_name;
  my $cmp_cs_vers;
  my $cmp_cs_name;
  if($mapper) {
    $asm_cs = $mapper->assembled_CoordSystem();
    $cmp_cs = $mapper->component_CoordSystem();
    $asm_cs_name = $asm_cs->name();
    $asm_cs_vers = $asm_cs->version();
    $cmp_cs_name = $cmp_cs->name();
    $cmp_cs_vers = $cmp_cs->version();
  }
*/

  long         destSliceStart;
  long         destSliceEnd;
  int          destSliceStrand;
  long         destSliceLength;
  //CoordSystem *destSliceCs;
  char *       destSliceSrName;
  IDType       destSliceSrId = 0;
  //AssemblyMapperAdaptor *asma;

  if (destSlice) {
    destSliceStart  = Slice_getStart(destSlice);
    destSliceEnd    = Slice_getEnd(destSlice);
    destSliceStrand = Slice_getStrand(destSlice);
    destSliceLength = Slice_getLength(destSlice);
    //??destSliceCs     = Slice_getCoordSystem(destSlice);
    destSliceSrName = Slice_getSeqRegionName(destSlice);
    destSliceSrId   = Slice_getSeqRegionId(destSlice);
    //??asma            = DBAdaptor_getAssemblyMapperAdaptor(ea->dba);
  }

  ResultRow *row;
  while ((row = sth->fetchRow(sth))) {
    IDType id =           row->getLongLongAt(row,0);
    IDType analysisId =   row->getLongLongAt(row,1);
    IDType seqRegionId =  row->getLongLongAt(row,2);
    long seqRegionStart = row->getLongAt(row,3);
    long seqRegionEnd =   row->getLongAt(row,4);
    int seqRegionStrand = row->getIntAt(row,5);
    char *hitName =       row->getStringAt(row,6);
    double score =        row->getDoubleAt(row,7);
    char *scoreType =     row->getStringAt(row,8);
    int spliceCanonical = row->getIntAt(row,9); 

    // get the analysis object
    Analysis *analysis = AnalysisAdaptor_fetchByDbID(aa, analysisId);

/*
    // need to get the internal_seq_region, if present
    $seq_region_id = $self->get_seq_region_id_internal($seq_region_id);
    #get the slice object
    my $slice = $slice_hash{"ID:".$seq_region_id};
    if(!$slice) {
      $slice = $sa->fetch_by_seq_region_id($seq_region_id);
      $slice_hash{"ID:".$seq_region_id} = $slice;
      $sr_name_hash{$seq_region_id} = $slice->seq_region_name();
      $sr_cs_hash{$seq_region_id} = $slice->coord_system();
    }

    my $sr_name = $sr_name_hash{$seq_region_id};
    my $sr_cs   = $sr_cs_hash{$seq_region_id};
*/
    if (! IDHash_contains(sliceHash, seqRegionId)) {
      IDHash_add(sliceHash, seqRegionId, SliceAdaptor_fetchBySeqRegionId(sa, seqRegionId, POS_UNDEF, POS_UNDEF, STRAND_UNDEF));
    }
    Slice *slice = IDHash_getValue(sliceHash, seqRegionId);

    Slice *iseSlice = slice;
    
    char *srName      = Slice_getSeqRegionName(slice);
    CoordSystem *srCs = Slice_getCoordSystem(slice);

    // 
    // remap the feature coordinates to another coord system
    // if a mapper was provided
    //
    if (assMapper != NULL) {
      MapperRangeSet *mrs;

      // Slightly suspicious about need for this if statement so left in perl statements for now
      if (destSlice != NULL &&
          assMapper->objectType == CLASS_CHAINEDASSEMBLYMAPPER) {
        mrs = ChainedAssemblyMapper_map(assMapper, srName, seqRegionStart, seqRegionEnd, seqRegionStrand, srCs, 1, destSlice);
      } else {
        mrs = AssemblyMapper_fastMap(assMapper, srName, seqRegionStart, seqRegionEnd, seqRegionStrand, srCs, NULL);
      }

      // skip features that map to gaps or coord system boundaries
      //next FEATURE if (!defined($seq_region_id));
      if (MapperRangeSet_getNumRange(mrs) == 0) {
        continue;
      }
      MapperRange *range = MapperRangeSet_getRangeAt(mrs, 0);
      if (range->rangeType == MAPPERRANGE_GAP) {
        fprintf(stderr,"Got a mapper gap in gene obj_from_sth - not sure if this is allowed\n");
        exit(1);
      } else {
        MapperCoordinate *mc = (MapperCoordinate *)range;

        seqRegionId     = mc->id;
        seqRegionStart  = mc->start;
        seqRegionEnd    = mc->end;
        seqRegionStrand = mc->strand;
      }

      MapperRangeSet_free(mrs);

      
/* Was - but identical if and else so why test???
      #get a slice in the coord system we just mapped to
      if($asm_cs == $sr_cs || ($cmp_cs != $sr_cs && $asm_cs->equals($sr_cs))) {
        $slice = $slice_hash{"ID:".$seq_region_id} ||=
          $sa->fetch_by_seq_region_id($seq_region_id);
      } else {
        $slice = $slice_hash{"ID:".$seq_region_id} ||=
          $sa->fetch_by_seq_region_id($seq_region_id);
      }
*/
// Instead...
      if (! IDHash_contains(sliceHash, seqRegionId)) {
        IDHash_add(sliceHash, seqRegionId, SliceAdaptor_fetchBySeqRegionId(sa, seqRegionId, POS_UNDEF, POS_UNDEF, STRAND_UNDEF));
      }
      iseSlice = IDHash_getValue(sliceHash, seqRegionId);
    }


    //
    // If a destination slice was provided convert the coords
    // If the dest_slice starts at 1 and is foward strand, nothing needs doing
    // 
    if (destSlice != NULL) {
      if (destSliceStart != 1 || destSliceStrand != 1) {
        if (destSliceStrand == 1) {
          seqRegionStart = seqRegionStart - destSliceStart + 1;
          seqRegionEnd   = seqRegionEnd - destSliceStart + 1;
        } else {
          long tmpSeqRegionStart = seqRegionStart;
          seqRegionStart = destSliceEnd - seqRegionEnd + 1;
          seqRegionEnd   = destSliceEnd - tmpSeqRegionStart + 1;

          seqRegionStrand = -seqRegionStrand;
        }
      }
       
      // throw away features off the end of the requested slice
      if (seqRegionEnd < 1 || seqRegionStart > destSliceLength || (destSliceSrId != seqRegionId)) {
        continue;
      }
      iseSlice = destSlice;
    }
    
    IntronSupportingEvidence *ise = IntronSupportingEvidence_new();

    IntronSupportingEvidence_setStart             (ise, seqRegionStart);
    IntronSupportingEvidence_setEnd               (ise, seqRegionEnd);
    IntronSupportingEvidence_setStrand            (ise, seqRegionStrand);
    IntronSupportingEvidence_setSlice             (ise, iseSlice);
    IntronSupportingEvidence_setAnalysis          (ise, analysis);
    IntronSupportingEvidence_setAdaptor           (ise, (BaseAdaptor *)isea);
    IntronSupportingEvidence_setDbID              (ise, id);
    IntronSupportingEvidence_setHitName           (ise, hitName);
    IntronSupportingEvidence_setScore             (ise, score);
    IntronSupportingEvidence_setScoreType         (ise, scoreType);
    IntronSupportingEvidence_setIsSpliceCanonical(ise, spliceCanonical);

    Vector_addElement(features, ise);
  }
  
  return features;
}
Beispiel #2
0
/*
=head2 _objs_from_sth

  Arg [1]    : DBI:st $sth 
               An executed DBI statement handle
  Arg [2]    : (optional) Bio::EnsEMBL::Mapper $mapper 
               An mapper to be used to convert contig coordinates
               to assembly coordinates.
  Arg [3]    : (optional) Bio::EnsEMBL::Slice $slice
               A slice to map the prediction transcript to.   
  Example    : $p_transcripts = $self->_objs_from_sth($sth);
  Description: Creates a list of Prediction transcripts from an executed DBI
               statement handle.  The columns retrieved via the statement 
               handle must be in the same order as the columns defined by the
               _columns method.  If the slice argument is provided then the
               the prediction transcripts will be in returned in the coordinate
               system of the $slice argument.  Otherwise the prediction 
               transcripts will be returned in the RawContig coordinate system.
  Returntype : reference to a list of Bio::EnsEMBL::PredictionTranscripts
  Exceptions : none
  Caller     : superclass generic_fetch
  Status     : Stable

=cut
*/
Vector *PredictionTranscriptAdaptor_objectsFromStatementHandle(PredictionTranscriptAdaptor *pta, 
                                                               StatementHandle *sth, 
                                                               AssemblyMapper *assMapper, 
                                                               Slice *destSlice) {
  SliceAdaptor *sa     = DBAdaptor_getSliceAdaptor(pta->dba);
  AnalysisAdaptor *aa  = DBAdaptor_getAnalysisAdaptor(pta->dba);

  Vector *pTranscripts = Vector_new();
  IDHash *sliceHash = IDHash_new(IDHASH_SMALL);

  long         destSliceStart;
  long         destSliceEnd;
  int          destSliceStrand;
  long         destSliceLength;
  char *       destSliceSrName;
  IDType       destSliceSrId = 0;

  if (destSlice) {
    destSliceStart  = Slice_getStart(destSlice);
    destSliceEnd    = Slice_getEnd(destSlice);
    destSliceStrand = Slice_getStrand(destSlice);
    destSliceLength = Slice_getLength(destSlice);
    destSliceSrName = Slice_getSeqRegionName(destSlice);
    destSliceSrId   = Slice_getSeqRegionId(destSlice);
  }

  ResultRow *row;
  while ((row = sth->fetchRow(sth))) {
    IDType predictionTranscriptId = row->getLongLongAt(row,0);
    IDType seqRegionId            = row->getLongLongAt(row,1);
    long seqRegionStart           = row->getLongAt(row,2);
    long seqRegionEnd             = row->getLongAt(row,3);
    int seqRegionStrand           = row->getIntAt(row,4);
    IDType analysisId             = row->getLongLongAt(row,5);
    char *displayLabel            = row->getStringAt(row,6);

    // get the analysis object
    Analysis *analysis = AnalysisAdaptor_fetchByDbID(aa, analysisId);

    if (! IDHash_contains(sliceHash, seqRegionId)) {
      IDHash_add(sliceHash, seqRegionId, SliceAdaptor_fetchBySeqRegionId(sa, seqRegionId, POS_UNDEF, POS_UNDEF, STRAND_UNDEF));
    }
    Slice *slice = IDHash_getValue(sliceHash, seqRegionId);

    Slice *ptSlice = slice;

    char *srName      = Slice_getSeqRegionName(slice);
    CoordSystem *srCs = Slice_getCoordSystem(slice);

    //
    // remap the feature coordinates to another coord system
    // if a mapper was provided
    //
    if (assMapper != NULL) {
      MapperRangeSet *mrs;

      // Slightly suspicious about need for this if statement so left in perl statements for now
      if (destSlice != NULL &&
          assMapper->objectType == CLASS_CHAINEDASSEMBLYMAPPER) {
        mrs = ChainedAssemblyMapper_map(assMapper, srName, seqRegionStart, seqRegionEnd, seqRegionStrand, srCs, 1, destSlice);
      } else {
        mrs = AssemblyMapper_fastMap(assMapper, srName, seqRegionStart, seqRegionEnd, seqRegionStrand, srCs, NULL);
      }

      // skip features that map to gaps or coord system boundaries
      if (MapperRangeSet_getNumRange(mrs) == 0) {
        continue;
      }
      MapperRange *range = MapperRangeSet_getRangeAt(mrs, 0);
      if (range->rangeType == MAPPERRANGE_GAP) {
        fprintf(stderr,"Got a mapper gap in gene obj_from_sth - not sure if this is allowed\n");
        exit(1);
      } else {
        MapperCoordinate *mc = (MapperCoordinate *)range;

        seqRegionId     = mc->id;
        seqRegionStart  = mc->start;
        seqRegionEnd    = mc->end;
        seqRegionStrand = mc->strand;
      }

      MapperRangeSet_free(mrs);

      if (! IDHash_contains(sliceHash, seqRegionId)) {
        IDHash_add(sliceHash, seqRegionId, SliceAdaptor_fetchBySeqRegionId(sa, seqRegionId, POS_UNDEF, POS_UNDEF, STRAND_UNDEF));
      }
      ptSlice = IDHash_getValue(sliceHash, seqRegionId);
    }

    //
    // If a destination slice was provided convert the coords
    // If the dest_slice starts at 1 and is foward strand, nothing needs doing
    //
    if (destSlice != NULL) {
      if (destSliceStart != 1 || destSliceStrand != 1) {
        if (destSliceStrand == 1) {
          seqRegionStart = seqRegionStart - destSliceStart + 1;
          seqRegionEnd   = seqRegionEnd - destSliceStart + 1;
        } else {
          long tmpSeqRegionStart = seqRegionStart;
          seqRegionStart = destSliceEnd - seqRegionEnd + 1;
          seqRegionEnd   = destSliceEnd - tmpSeqRegionStart + 1;

          seqRegionStrand = -seqRegionStrand;
        }
      }
      // throw away features off the end of the requested slice
      if (seqRegionEnd < 1 || seqRegionStart > destSliceLength || (destSliceSrId != seqRegionId)) {
        continue;
      }
      ptSlice = destSlice;
    }
    
    // Finally, create the new PredictionTranscript.
    PredictionTranscript *pt = PredictionTranscript_new();

    PredictionTranscript_setStart       (pt, seqRegionStart);
    PredictionTranscript_setEnd         (pt, seqRegionEnd);
    PredictionTranscript_setStrand      (pt, seqRegionStrand);
    PredictionTranscript_setSlice       (pt, ptSlice);
    PredictionTranscript_setAnalysis    (pt, analysis);
    PredictionTranscript_setAdaptor     (pt, (BaseAdaptor *)pta);
    PredictionTranscript_setDbID        (pt, predictionTranscriptId);
    PredictionTranscript_setDisplayLabel(pt, displayLabel);

    Vector_addElement(pTranscripts, pt);
  }

  IDHash_free(sliceHash, NULL);
  return pTranscripts;
}
Beispiel #3
0
int calcCoverage(char *fName, Slice *slice, htsFile *in, hts_idx_t *idx, int flags) {
  int  ref;
  int  begRange;
  int  endRange;
  char region[1024];
  char region_name[512];


  if (Slice_getChrStart(slice) != 1) {
    fprintf(stderr, "Currently only allow a slice start position of 1\n");
    return 1;
  }
  if (flags & M_UCSC_NAMING) {
    sprintf(region,"chr%s", Slice_getSeqRegionName(slice));
  } else {
    sprintf(region,"%s", Slice_getSeqRegionName(slice));
  }
  bam_hdr_t *header = bam_hdr_init();
  header = bam_hdr_read(in->fp.bgzf);
  ref = bam_name2id(header, region);
  if (ref < 0) {
    fprintf(stderr, "Invalid region %s\n", region);
    exit(1);
  }
  sprintf(region,"%s:%ld-%ld", region_name,
                             Slice_getSeqRegionStart(slice),
                             Slice_getSeqRegionEnd(slice));
  if (hts_parse_reg(region, &begRange, &endRange) == NULL) {
    fprintf(stderr, "Could not parse %s\n", region);
    exit(2);
  }
  bam_hdr_destroy(header);


  hts_itr_t *iter = sam_itr_queryi(idx, ref, begRange, endRange);
  bam1_t *b = bam_init1();

  Coverage *coverage = calloc(Slice_getLength(slice),sizeof(Coverage));

  long counter = 0;
  long overlapping = 0;
  long bad = 0;
  int startIndex = 0;
  while (bam_itr_next(in, iter, b) >= 0) {
    if (b->core.flag & (BAM_FUNMAP | BAM_FSECONDARY | BAM_FQCFAIL | BAM_FDUP)) {
      bad++;
      continue;
    }

    int end;
    //end = bam_calend(&b->core, bam1_cigar(b));
    end = bam_endpos(b);

    // There is a special case for reads which have zero length and start at begRange (so end at begRange ie. before the first base we're interested in).
    // That is the reason for the || end == begRange test
    if (end == begRange) {
      continue;
    }
    counter++;

    if (!(counter%1000000)) {
      if (verbosity > 1) { printf("."); }
      fflush(stdout);
    }

// Remember: b->core.pos is zero based!
    int cigInd;
    int refPos;
    int readPos;
    uint32_t *cigar = bam_get_cigar(b);
    for (cigInd = readPos = 0, refPos = b->core.pos; cigInd < b->core.n_cigar; ++cigInd) {
      int k;
      int lenCigBlock = cigar[cigInd]>>4;
      int op          = cigar[cigInd]&0xf;

      if (op == BAM_CMATCH || op == BAM_CEQUAL || op == BAM_CDIFF) {
        for (k = 0; k < lenCigBlock; ++k) {
          //if (ref[refPos+k] == 0) break; // out of boundary
          coverage[refPos+k].coverage++;
        }
        if (k < lenCigBlock) break;
        refPos += lenCigBlock; readPos += lenCigBlock;
      } else if (op == BAM_CDEL) {
        for (k = 0; k < lenCigBlock; ++k) {
        //  if (ref[refPos+k] == 0) break;
          coverage[refPos+k].coverage++;
        }
        if (k < lenCigBlock) break;
        refPos += lenCigBlock;
      } else if (op == BAM_CSOFT_CLIP) {
        readPos += lenCigBlock;
      } else if (op == BAM_CHARD_CLIP) {
      } else if (op == BAM_CINS) {
         readPos += lenCigBlock;
      } else if (op == BAM_CREF_SKIP) {
         refPos += lenCigBlock;
      }
    }

#ifdef DONE
    int j;
    int done = 0;
    int hadOverlap = 0;
    
    for (j=startIndex; j < Vector_getNumElement(genes) && !done; j++) {
      Gene *gene = Vector_getElementAt(genes,j); 
      if (!gene) {
        continue;
      }
// Remember: b->core.pos is zero based!
      if (b->core.pos < Gene_getEnd(gene) && end >= Gene_getStart(gene)) {
        int k;

        int doneGene = 0;
        for (k=0; k<Gene_getTranscriptCount(gene) && !doneGene; k++) {
          Transcript *trans = Gene_getTranscriptAt(gene,k);

          if (b->core.pos < Transcript_getEnd(trans) && end >= Transcript_getStart(trans)) {
            int m;
     
            for (m=0; m<Transcript_getExonCount(trans) && !doneGene; m++) {
              Exon *exon = Transcript_getExonAt(trans,m);

              if (b->core.pos < Exon_getEnd(exon) && end >= Exon_getStart(exon)) {

                // Only count as overlapping once (could be that a read overlaps more than one gene)
                if (!hadOverlap) {
                  overlapping++;
                  hadOverlap = 1;
                }

                gs = IDHash_getValue(geneCountsHash, Gene_getDbID(gene));
                gs->score++;
                
                doneGene = 1;
              }
            }
          }
        }
      } else if (Gene_getStart(gene) > end) {
        done = 1;
      } else if (Gene_getEnd(gene) < b->core.pos+1) {
        gs = IDHash_getValue(geneCountsHash, Gene_getDbID(gene));
        printf("Gene %s (%s) score %ld\n",Gene_getStableId(gene), 
                                          Gene_getDisplayXref(gene) ? DBEntry_getDisplayId(Gene_getDisplayXref(gene)) : "", 
                                          gs->score);

        if (verbosity > 1) { 
          printf("Removing gene %s (index %d) with extent %d to %d\n", 
                 Gene_getStableId(gene), 
                 gs->index,
                 Gene_getStart(gene),
                 Gene_getEnd(gene));
        }
        Vector_setElementAt(genes,j,NULL);

        // Magic (very important for speed) - move startIndex to first non null gene
        int n;
        startIndex = 0;
        for (n=0;n<Vector_getNumElement(genes);n++) {
          void *v = Vector_getElementAt(genes,n);

          if (v != NULL) {
            break;
          }
          startIndex++;
        }
        if (verbosity > 1) { 
          printf("startIndex now %d\n",startIndex);
        }
      }
    }
#endif
  }
  if (verbosity > 1) { printf("\n"); }

#ifdef DONE
// Print out read counts for what ever's left in the genes array
  int n;
  for (n=0;n<Vector_getNumElement(genes);n++) {
    Gene *gene = Vector_getElementAt(genes,n);

    if (gene != NULL) {
      gs = IDHash_getValue(geneCountsHash, Gene_getDbID(gene));
      printf("Gene %s (%s) score %ld\n",Gene_getStableId(gene), 
                                        Gene_getDisplayXref(gene) ? DBEntry_getDisplayId(Gene_getDisplayXref(gene)) : "", 
                                        gs->score);
    }

  }
#endif

  printf("Read %ld reads. Number of bad reads (unmapped, qc fail, secondary, dup) %ld\n", counter, bad);

  long i;
  for (i=0; i< Slice_getLength(slice); i++) {
    printf("%ld %ld\n", i+1, coverage[i].coverage);
  }

  sam_itr_destroy(iter);
  bam_destroy1(b);


  return 1;
}
Beispiel #4
0
Vector *PredictionTranscriptAdaptor_fetchAllBySlice(PredictionTranscriptAdaptor *pta, Slice *slice, char *logicName, int loadExons) {

  //my $transcripts = $self->SUPER::fetch_all_by_Slice($slice,$logic_name);
  Vector *transcripts = BaseFeatureAdaptor_fetchAllBySlice((BaseFeatureAdaptor *)pta, slice, logicName);

  // if there are 0 or 1 transcripts still do lazy-loading
  if ( ! loadExons || Vector_getNumElement(transcripts) < 2 ) {
    return transcripts;
  }

  // preload all of the exons now, instead of lazy loading later
  // faster than 1 query per transcript

  // get extent of region spanned by transcripts
  long minStart =  2000000000;
  long maxEnd   = -2000000000;

  int i;
  for (i=0; i<Vector_getNumElement(transcripts); i++) {
    PredictionTranscript *t  = Vector_getElementAt(transcripts, i);
    if (PredictionTranscript_getSeqRegionStart((SeqFeature*)t) < minStart) {
      minStart = PredictionTranscript_getSeqRegionStart((SeqFeature*)t);
    }
    if (PredictionTranscript_getSeqRegionEnd((SeqFeature*)t) > maxEnd) {
      maxEnd = PredictionTranscript_getSeqRegionEnd((SeqFeature*)t);
    }
  }

  Slice *extSlice;

  if (minStart >= Slice_getStart(slice) && maxEnd <= Slice_getEnd(slice)) {
    extSlice = slice;
  } else {
    SliceAdaptor *sa = DBAdaptor_getSliceAdaptor(pta->dba);
    extSlice = SliceAdaptor_fetchByRegion(sa, Slice_getCoordSystemName(slice), Slice_getSeqRegionName(slice),
                                          minStart, maxEnd, Slice_getStrand(slice), CoordSystem_getVersion(Slice_getCoordSystem(slice)), 0);
  }

  // associate exon identifiers with transcripts
  IDHash *trHash = IDHash_new(IDHASH_MEDIUM);
  for (i=0; i<Vector_getNumElement(transcripts); i++) {
    PredictionTranscript *t  = Vector_getElementAt(transcripts, i);
    if ( ! IDHash_contains(trHash, PredictionTranscript_getDbID(t))) {
      IDHash_add(trHash, PredictionTranscript_getDbID(t), t);
    }
  }

  IDType *uniqueIds = IDHash_getKeys(trHash);

  char tmpStr[1024];
  char *qStr = NULL;
  if ((qStr = (char *)calloc(655500,sizeof(char))) == NULL) {
    fprintf(stderr,"Failed allocating qStr\n");
    return transcripts;
  }

  int lenNum;
  int endPoint = sprintf(qStr, "SELECT prediction_transcript_id, prediction_exon_id, exon_rank FROM prediction_exon WHERE  prediction_transcript_id IN (");
  for (i=0; i<IDHash_getNumValues(trHash); i++) {
    if (i!=0) {
      qStr[endPoint++] = ',';
      qStr[endPoint++] = ' ';
    }
    lenNum = sprintf(tmpStr,IDFMTSTR,uniqueIds[i]);
    memcpy(&(qStr[endPoint]), tmpStr, lenNum);
    endPoint+=lenNum;
  }
  qStr[endPoint++] = ')';
  qStr[endPoint] = '\0';

  free(uniqueIds);

  StatementHandle *sth = pta->prepare((BaseAdaptor *)pta,qStr,strlen(qStr));
  sth->execute(sth);

  IDHash *exTrHash = IDHash_new(IDHASH_MEDIUM);
  ResultRow *row;
  while ((row = sth->fetchRow(sth))) {
    IDType trId = row->getLongLongAt(row,0);
    IDType exId = row->getLongLongAt(row,1);
    int    rank = row->getIntAt(row,2);

    if (! IDHash_contains(exTrHash, exId)) {
      Vector *vec = Vector_new();
      Vector_setFreeFunc(vec, PredictionTranscriptRankPair_free);
      IDHash_add(exTrHash, exId, vec);
    }
    Vector *exVec = IDHash_getValue(exTrHash, exId);
    PredictionTranscriptRankPair *trp = PredictionTranscriptRankPair_new(IDHash_getValue(trHash, trId), rank);
    Vector_addElement(exVec, trp);
  }

  IDHash_free(trHash, NULL);

  sth->finish(sth);

  PredictionExonAdaptor *pea = DBAdaptor_getPredictionExonAdaptor(pta->dba);
  Vector *exons = PredictionExonAdaptor_fetchAllBySlice(pea, extSlice);

  // move exons onto transcript slice, and add them to transcripts
  for (i=0; i<Vector_getNumElement(exons); i++) {
    PredictionExon *ex = Vector_getElementAt(exons, i);

  // Perl didn't have this line - it was in GeneAdaptor version so I think I'm going to keep it
    if (!IDHash_contains(exTrHash, PredictionExon_getDbID(ex))) continue;

    PredictionExon *newEx;
    if (slice != extSlice) {
      newEx = (PredictionExon*)PredictionExon_transfer((SeqFeature*)ex, slice);
      if (newEx == NULL) {
        fprintf(stderr, "Unexpected. Exon could not be transferred onto PredictionTranscript slice.\n");
        exit(1);
      }
    } else {
      newEx = ex;
    }

    Vector *exVec = IDHash_getValue(exTrHash, PredictionExon_getDbID(newEx));
    int j;
    for (j=0; j<Vector_getNumElement(exVec); j++) {
      PredictionTranscriptRankPair *trp = Vector_getElementAt(exVec, j);
      PredictionTranscript_addExon(trp->transcript, newEx, &trp->rank);
    }
  }

  IDHash_free(exTrHash, Vector_free);
  free(qStr);

  return transcripts;
}