Example #1
0
// New
Vector *Translation_getAllSeqEdits(Translation *translation) {
  char *edits[] = { "initial_met", "_selenocysteine", "amino_acid_sub", NULL };

  Vector *seqEds = Vector_new();

  char **editP = edits;
  while (*editP) {
    char *edit = *editP;

    Vector *attribs = Translation_getAllAttributes(translation, edit);

    // convert attributes to SeqEdit objects
    int i;
    for (i=0; i<Vector_getNumElement(attribs); i++) {
      Attribute *attrib = Vector_getElementAt(attribs, i);
      SeqEdit *seqEd = SeqEdit_newFromAttribute(attrib);

      Vector_addElement(seqEds, seqEd);
    }

    Vector_free(attribs);
    editP++;
  }

  return seqEds;
}
Example #2
0
// NIY:
// Because this can filter the results the vector that gets returned must be freeable - so for now
// make a copy of the translation->attributes vector if returning unfiltered so behaviour is 
// consistent. Long term probably want reference count incremented
Vector *Translation_getAllAttributes(Translation *translation, char *attribCode) {
  if (translation->attributes == NULL) {
    TranslationAdaptor *tlna = (TranslationAdaptor *)Translation_getAdaptor(translation);
    if (tlna == NULL) { // No adaptor
// Perl comments out the warning, I'll put it back for now, just in case
      //fprintf(stderr,"Warning: Cannot get attributes without an adaptor.\n");
      return Vector_new();
    }

    AttributeAdaptor *ata = DBAdaptor_getAttributeAdaptor(tlna->dba);
    translation->attributes = AttributeAdaptor_fetchAllByTranslation(ata, translation, NULL);
  }

  if (attribCode != NULL) {
    Vector *results = Vector_new();
    int i;
    for (i=0; i<Vector_getNumElement(translation->attributes); i++) {
      Attribute *attrib = Vector_getElementAt(translation->attributes, i);
      if (!strcasecmp(attrib->code, attribCode)) {
        Vector_addElement(results, attrib);
      }
    }
    return results;
  } else {
// See NIY note above for why I'm making a copy 
    return Vector_copy(translation->attributes);
  }
}
Example #3
0
Vector *HomologyAdaptor_listStableIdsFromSpecies(HomologyAdaptor *ha, char *sp)  {
  StatementHandle *sth;
  ResultRow *row;
  Vector *genes;
  char qStr[1024];
  char *species;

  species = StrUtil_copyString(&species,sp,0);
  species = StrUtil_strReplChr(species,'_',' ');

  sprintf(qStr,
          "select  distinct grm.member_stable_id "
          " from    gene_relationship_member grm,"
          "         genome_db gd "
          " where   gd.genome_db_id = grm.genome_db_id "
          " and     gd.name = '%s'", species);


  sth = ha->prepare((BaseAdaptor *)ha, qStr, strlen(qStr));
  sth->execute(sth);

  genes = Vector_new();

  while ((row = sth->fetchRow(sth))) {
    char *tmpStr;
    Vector_addElement(genes,StrUtil_copyString(&tmpStr, row->getStringAt(row,0),0));
  }
  sth->finish(sth);

  free(species);

  return genes;
}                               
Vector *IntronSupportingEvidenceAdaptor_listLinkedTranscriptIds(IntronSupportingEvidenceAdaptor *isea, IntronSupportingEvidence *ise) {
  char qStr[1024];

  sprintf(qStr,"SELECT transcript_id from transcript_intron_supporting_evidence "
               "WHERE intron_supporting_evidence_id = "IDFMTSTR, IntronSupportingEvidence_getDbID(ise));

  StatementHandle *sth = isea->prepare((BaseAdaptor *)isea,qStr,strlen(qStr));
  sth->execute(sth);

  Vector *idVec = Vector_new();
  ResultRow *row;
  while ((row = sth->fetchRow(sth))) {
    IDType id = row->getLongLongAt(row, 0);
    IDType *idP;

    if ((idP = calloc(1,sizeof(IDType))) == NULL) {
      fprintf(stderr, "Failed allocating space for a id\n");
      exit(1);
    }

    *idP = id;
    Vector_addElement(idVec, idP);
  }
  sth->finish(sth);

  Vector_setFreeFunc(idVec, free);
  return idVec;
}
Example #5
0
Vector *PredictionTranscript_getAllTranslateableExons(PredictionTranscript *trans) {
    int i;

    if (!trans->translateableExons) {
        trans->translateableExons = Vector_new();
        for (i=0; i<PredictionTranscript_getExonCount(trans); i++) {
            PredictionExon *ex = PredictionTranscript_getExonAt(trans,i);
            if (ex) {
                Vector_addElement(trans->translateableExons, ex);
            }
        }
    }

    return trans->translateableExons;
}
Example #6
0
void PredictionTranscript_addExon(PredictionTranscript *trans, PredictionExon *exon, int *positionP) {
    if (positionP) {
        Vector_setElementAt(trans->exons, *positionP, exon);
    } else {
        Vector_addElement(trans->exons, exon);
    }

    if (exon && (!PredictionTranscript_getStartIsSet(trans) ||
                 PredictionExon_getStart(exon) < PredictionTranscript_getStart(trans))) {
        PredictionTranscript_setStart(trans, PredictionExon_getStart(exon));
    }
    if (exon && (!PredictionTranscript_getEndIsSet(trans) ||
                 PredictionExon_getEnd(exon) > PredictionTranscript_getEnd(trans))) {
        PredictionTranscript_setEnd(trans, PredictionExon_getEnd(exon));
    }
}
Example #7
0
/* 
  In perl note this is misspelled compared to normal _objs_from_sth
  Not sure if that's deliberate to avoid clash or just an error
  Obviously this isn't called through the normal path at all
  (just directly from within the fetch method).
*/
Vector *AttributeAdaptor_objectsFromStatementHandle(AttributeAdaptor *ata, StatementHandle *sth) {
  Vector *results = Vector_new();

  ResultRow *row;
// Note extra parentheses are to keep mac compiler happy
  while ((row = sth->fetchRow(sth))) {
    char *code  = row->getStringAt(row, 0);
    char *name  = row->getStringAt(row, 1);
    char *desc  = row->getStringAt(row, 2);
    char *value = row->getStringAt(row, 3);

    Attribute *attr = Attribute_new();
    Attribute_setCode(attr, code);
    Attribute_setName(attr, name);
    Attribute_setDescription(attr, desc);
    Attribute_setValue(attr, value);
    
    Vector_addElement(results, attr);
  }

  return results;
}
/*
=head2 fetch_all_by_Transcript

  Arg[1]      : Bio::EnsEMBL::Transcript Transcript to search with
  Example     : my $ises = $isea->fetch_all_by_Transcript($transcript);
  Description : Uses the given Transcript to search for all instances of
                IntronSupportingEvidence linked to the transcript in the 
                database 
  Returntype  : ArrayRef of IntronSupportingEvidence objects
  Exceptions  : Thrown if arguments are not as stated and for DB errors 

=cut
*/
Vector *IntronSupportingEvidenceAdaptor_fetchAllByTranscript(IntronSupportingEvidenceAdaptor *isea, Transcript *transcript) {
  char qStr[1024];

  sprintf(qStr,"SELECT intron_supporting_evidence_id "
                 "FROM transcript_intron_supporting_evidence "
                "WHERE transcript_id = "IDFMTSTR, Transcript_getDbID(transcript));

  StatementHandle *sth = isea->prepare((BaseAdaptor *)isea,qStr,strlen(qStr));
  sth->execute(sth);

  Vector *idVec = Vector_new();
  ResultRow *row;
  while ((row = sth->fetchRow(sth))) {
    IDType id = row->getLongLongAt(row, 0);
    IDType *idP;

    if ((idP = calloc(1,sizeof(IDType))) == NULL) {
      fprintf(stderr, "Failed allocating space for a id\n");
      exit(1);
    }

    *idP = id;
    Vector_addElement(idVec, idP);
  }
  sth->finish(sth);

  Vector *out;
  if (Vector_getNumElement(idVec) > 0) {
    out = IntronSupportingEvidenceAdaptor_fetchAllByDbIDList(isea, idVec, NULL); 
  } else {
    out = Vector_new();
  }
 
  // Free ids vector
  Vector_setFreeFunc(idVec, free);
  Vector_free(idVec);

  return out;
}
Example #9
0
Vector *HomologyAdaptor_getHomologues(HomologyAdaptor *ha, char *qStr) {
  StatementHandle *sth;
  ResultRow *row;
  Vector *genes;

  sth = ha->prepare((BaseAdaptor *)ha, qStr, strlen(qStr));
  sth->execute(sth);

  genes = Vector_new();
  while ((row = sth->fetchRow(sth))) {
    Homology *homol = Homology_new();
    Homology_setSpecies(homol, row->getStringAt(row,1));
    Homology_setStableId(homol, row->getStringAt(row,0));
    Homology_setChromosome(homol, row->getStringAt(row,2));
    Homology_setChrStart(homol, row->getIntAt(row,3));
    Homology_setChrEnd(homol, row->getIntAt(row,4));
        
    Vector_addElement(genes,homol);
  }

  sth->finish(sth);

  return genes;
}
Example #10
0
Vector *SyntenyRegionAdaptor_fetchByClusterId(SyntenyRegionAdaptor *sra, IDType clusterId) {
  char qStr[256];
  StatementHandle *sth;
  ResultRow *row;
  Vector *out = NULL;

  if (!clusterId) {
    fprintf(stderr, "Error: fetch_by_cluster_id with no cluster_id!\n");
  } else {
    sprintf(qStr,
            "select synteny_region_id,"
            "       dnafrag_id,"
            "       seq_start,"
            "       seq_end "
            " from synteny_region "
            " where synteny_cluster_id = " IDFMTSTR, clusterId);

    sth = sra->prepare((BaseAdaptor *)sra, qStr, strlen(qStr));
    sth->execute(sth);

    out = Vector_new();
    while ((row = sth->fetchRow(sth))) {
      SyntenyRegion *sr = SyntenyRegionAdaptor_newRegionFromArray(sra, 
                                                                  row->getLongLongAt(row,0),
                                                                  clusterId,
                                                                  row->getLongLongAt(row,1), 
                                                                  row->getIntAt(row,2),
                                                                  row->getIntAt(row,3));
    
      Vector_addElement(out,sr);
    }
    sth->finish(sth);
  }

  return out;
}
Example #11
0
Vector *GenomicAlignAdaptor_mergeAlignsets(GenomicAlignAdaptor *gaa, Vector *alignSet1, Vector *alignSet2) {
  int i;
  Vector *bigList = Vector_new();
  IDHash *overlappingSets[2];
  Vector *mergedAligns;


  for (i=0;i<Vector_getNumElement(alignSet1); i++) {
    GenomicAlign *align = Vector_getElementAt(alignSet1, i);
    Vector_addElement(bigList, GenomicAlignListElem_new(DNAFrag_getDbID(GenomicAlign_getQueryDNAFrag(align)),
                                                        GenomicAlign_getQueryStart(align), align, 0));
    Vector_addElement(bigList, GenomicAlignListElem_new(DNAFrag_getDbID(GenomicAlign_getQueryDNAFrag(align)),
                                                        GenomicAlign_getQueryEnd(align)+0.5, align, 0));
  }

  for (i=0;i<Vector_getNumElement(alignSet2); i++) {
    GenomicAlign *align = Vector_getElementAt(alignSet2, i);
    Vector_addElement(bigList, GenomicAlignListElem_new(DNAFrag_getDbID(GenomicAlign_getConsensusDNAFrag(align)),
                                                        GenomicAlign_getConsensusStart(align), align, 1));
    Vector_addElement(bigList, GenomicAlignListElem_new(DNAFrag_getDbID(GenomicAlign_getConsensusDNAFrag(align)),
                                                        GenomicAlign_getConsensusEnd(align)+0.5, align, 1));
  }
  
  Vector_sort(bigList, GenomicAlignListElem_compFunc);

  // walking from start to end through sortlist and keep track of the 
  // currently overlapping set of Alignments
 
  overlappingSets[0] = IDHash_new(IDHASH_SMALL);
  overlappingSets[1] = IDHash_new(IDHASH_SMALL);

  mergedAligns = Vector_new();

  for (i=0; i<Vector_getNumElement(bigList); i++) {
    GenomicAlignListElem *gale  = Vector_getElementAt(bigList,i);

    GenomicAlign *align = gale->align;
    IDType alignID      = GenomicAlign_getDbID(align);
    int setNo           = gale->setNum;

    if (IDHash_contains(overlappingSets[setNo], alignID)) {
      // remove from current overlapping set
      IDHash_remove(overlappingSets[setNo], alignID, NULL);
    } else {
      int j;
      void **values = IDHash_getValues(overlappingSets[1-setNo]);

      // insert into the set and do all the overlap business
      IDHash_add(overlappingSets[setNo], alignID, align);

      // the other set contains everything this align overlaps with
      for (j=0; j<IDHash_getNumValues(overlappingSets[1-setNo]); j++) {
        GenomicAlign *align2 = values[j];
        if (setNo == 0) {
          GenomicAlignAdaptor_addDerivedAlignments(gaa, mergedAligns, align, align2);
        } else {
          GenomicAlignAdaptor_addDerivedAlignments(gaa, mergedAligns, align2, align);
        }
      }
      free(values);
    }
  }

// NIY Free gale

  return mergedAligns;
}
Example #12
0
// Also added a flag to indicate we actually want the gaps vector returned - quite often its not used in the caller and so would leak
// memory
Vector *RangeRegistry_checkAndRegister(RangeRegistry *registry, IDType id, long start, long end, 
                                       long rStart, long rEnd, int wantGaps) {

  // The following was commented out due to Ensembl Genomes requirements
  // for bacterial genomes.
  // The following was uncommented because I'm not caring about those requirements
  if ( start > end ) {
    fprintf(stderr, "start argument [%ld] must be less than (or equal to) end argument [%ld]\n", start, end);
    exit(1);
  }
  
  if ( rStart > rEnd ) {
    fprintf(stderr, "rStart argument [%ld] must be less than (or equal to) rEnd argument [%ld]\n", rStart, rEnd);
    exit(1);
  }

  if ( rStart > start ) {
    fprintf(stderr, "rStart argument [%ld] must be less than (or equal to) start [%ld]\n", rStart, start);
    exit(1);
  }

  if ( rEnd < end ) {
    fprintf(stderr, "rEnd argument [%ld] must be greater than (or equal to) end [%ld]\n", rEnd, end);
    exit(1);
  }

  IDHash *regReg = RangeRegistry_getRegistry(registry);
  Vector *list;
  if (IDHash_contains(regReg, id)) {
    list = IDHash_getValue(regReg, id);
  } else {
    list = Vector_new();
    IDHash_add(regReg, id, list);
  }

  
  Vector *gapPairs = NULL;
  if (wantGaps) {
    gapPairs = Vector_new();
  }

  int len = Vector_getNumElement(list);

  if (len == 0) {
    //this is the first request for this id, return a gap pair for the
    // entire range and register it as seen
    CoordPair *cp = CoordPair_new(rStart, rEnd);
    Vector_addElement(list, cp);

    return Vector_copy(list);
  }

  //####
  // loop through the list of existing ranges recording any "gaps" where
  // the existing range does not cover part of the requested range
  // 

  int startIdx = 0;
  int endIdx   = Vector_getNumElement(list)-1;
  int midIdx;
  CoordPair *range;

  // binary search the relevant pairs
  // helps if the list is big
  while ( ( endIdx - startIdx ) > 1 ) {
    midIdx = ( startIdx + endIdx ) >> 1;
    range  = Vector_getElementAt(list, midIdx);

    if ( CoordPair_getEnd(range) < rStart ) {
      startIdx = midIdx;
    } else {
      endIdx = midIdx;
    }
  }

  long gapStart;
  long gapEnd;
  int rIdx = -1;
  int rStartIdx = -1;
  int rEndIdx;

  gapStart = rStart;

  int i;
  for (i=startIdx; i < len ; i++ ) {
    CoordPair *pRange = Vector_getElementAt(list, i);
    long pStart = CoordPair_getStart(pRange);
    long pEnd   = CoordPair_getEnd(pRange);
    
    // no work needs to be done at all if we find a range pair that
    // entirely overlaps the requested region
    if ( pStart <= start && pEnd >= end ) {
      return Vector_new(); // perl returns undef, but that causes me problems
    }

    // find adjacent or overlapping regions already registered
    if ( pEnd >= ( rStart - 1 ) && pStart <= ( rEnd + 1 ) ) {
      if ( rStartIdx < 0 ) { // Not yet been set
        rStartIdx = i;
      }
      rEndIdx = i;
    }

    if ( pStart > rStart ) {
      gapEnd = ( rEnd < pStart ) ? rEnd : pStart - 1;
      if (wantGaps) {
        CoordPair *cp = CoordPair_new(gapStart, gapEnd);
        Vector_addElement(gapPairs, cp);
      }
    }

    gapStart = ( rStart > pEnd ) ? rStart : pEnd + 1;

    if ( pEnd >= rEnd && rIdx < 0 ) {
      rIdx = i;
      break;
    }
  }

  // do we have to make another gap?
  if ( gapStart <= rEnd ) {
    if (wantGaps) {
      CoordPair *cp = CoordPair_new(gapStart, rEnd);
      Vector_addElement(gapPairs, cp);
    }
  }

  // 
  // Merge the new range into the registered list
  // 
  if (rStartIdx >= 0 ) { // rStartIdx has been set to something 
    long newStart;
    long newEnd;
    CoordPair *rStartIdxRange = Vector_getElementAt(list, rStartIdx); 
    CoordPair *rEndIdxRange   = Vector_getElementAt(list, rEndIdx); 

    if ( rStart < CoordPair_getStart(rStartIdxRange)) {
      newStart = rStart;
    } else {
      newStart = CoordPair_getStart(rStartIdxRange);
    }

    if ( rEnd > CoordPair_getEnd(rEndIdxRange)) {
      newEnd = rEnd;
    } else {
      newEnd = CoordPair_getEnd(rEndIdxRange);
    }

    CoordPair *cp = CoordPair_new(newStart, newEnd);

    // Think its <=
    for (i=rStartIdx; i<=rEndIdx; i++) {
      Vector_removeElementAt(list, rStartIdx); // Always remove from rStartIdx as array is shrinking by one each time called
    }
    Vector_insertElementAt(list, rStartIdx, cp);
    //splice( @$list, $rstart_idx,
    //        $rend_idx - $rstart_idx + 1,
    //        [ $new_start, $new_end ] );

  } else if (rIdx >= 0) {
    CoordPair *cp = CoordPair_new(rStart, rEnd);
    Vector_insertElementAt(list, rIdx, cp);
    //splice( @$list, $r_idx, 0, [ $rstart, $rend ] );
  } else {
    CoordPair *cp = CoordPair_new(rStart, rEnd);
    Vector_addElement(list, cp);
  }

  // Note if wantGaps is not set then gapPairs will be NULL - but you said you didn't want it so that should be OK
  return gapPairs;
}
Example #13
0
int main(int argc, char *argv[]) {
  DBAdaptor *      dba;
  StatementHandle *sth;
  ResultRow *      row;
  Vector *         slices;
  int              nSlices;
  htsFile *      out;

  int   argNum = 1;

  char *inFName  = NULL;
  char *outFName = NULL;

  char *dbUser = "******";
  char *dbPass = NULL;
  int   dbPort = 3306;

  char *dbHost = "ens-staging.internal.sanger.ac.uk";
  char *dbName = "homo_sapiens_core_71_37";

  char *assName = "GRCh37";

  char *chrName = "1";


  int flags = 0;
  int   threads  = 1;

  initEnsC(argc, argv);

  while (argNum < argc) {
    char *arg = argv[argNum];
    char *val;

// Ones without a val go here
    if (!strcmp(arg, "-U") || !strcmp(arg,"--ucsc_naming")) {
      flags |= M_UCSC_NAMING;
    } else {
// Ones with a val go in this block
      if (argNum == argc-1) {
        Bamcov_usage();
      }

      val = argv[++argNum];
  
      if (!strcmp(arg, "-i") || !strcmp(arg,"--in_file")) {
        StrUtil_copyString(&inFName,val,0);
      } else if (!strcmp(arg, "-o") || !strcmp(arg,"--out_file")) {
        StrUtil_copyString(&outFName,val,0);
      } else if (!strcmp(arg, "-h") || !strcmp(arg,"--host")) {
        StrUtil_copyString(&dbHost,val,0);
      } else if (!strcmp(arg, "-p") || !strcmp(arg,"--password")) {
        StrUtil_copyString(&dbPass,val,0);
      } else if (!strcmp(arg, "-P") || !strcmp(arg,"--port")) {
        dbPort = atoi(val);
      } else if (!strcmp(arg, "-n") || !strcmp(arg,"--name")) {
        StrUtil_copyString(&dbName,val,0);
      } else if (!strcmp(arg, "-u") || !strcmp(arg,"--user")) {
        StrUtil_copyString(&dbUser,val,0);
      } else if (!strcmp(arg, "-t") || !strcmp(arg,"--threads")) {
        threads = atoi(val);
      } else if (!strcmp(arg, "-a") || !strcmp(arg,"--assembly")) {
        StrUtil_copyString(&assName,val,0);
      } else if (!strcmp(arg, "-v") || !strcmp(arg,"--verbosity")) {
        verbosity = atoi(val);
// Temporary
      } else if (!strcmp(arg, "-c") || !strcmp(arg,"--chromosome")) {
        StrUtil_copyString(&chrName,val,0);
      } else {
        fprintf(stderr,"Error in command line at %s\n\n",arg);
        Bamcov_usage();
      }
    }
    argNum++;
  }

  if (verbosity > 0) {
    printf("Program for calculating read coverage in a BAM file \n"
           "Steve M.J. Searle.  [email protected]  Last update April 2013.\n");
  }

  if (!inFName || !outFName) {
    Bamcov_usage();
  }

  dba = DBAdaptor_new(dbHost,dbUser,dbPass,dbName,dbPort,NULL);

  //nSlices = getSlices(dba, destName);
  nSlices = 1;

  slices = Vector_new();

  SliceAdaptor *sa = DBAdaptor_getSliceAdaptor(dba);

  Slice *slice = SliceAdaptor_fetchByRegion(sa,NULL,chrName,POS_UNDEF,POS_UNDEF,1,NULL, 0);

  Vector_addElement(slices,slice);

  if (Vector_getNumElement(slices) == 0) {
    fprintf(stderr, "Error: No slices.\n");
    exit(1);
  }

  htsFile *in = hts_open(inFName, "rb");
  if (in == 0) {
    fprintf(stderr, "Fail to open BAM file %s\n", inFName);
    return 1;
  }

  hts_set_threads(in, threads);
  hts_idx_t *idx;
  idx = bam_index_load(inFName); // load BAM index
  if (idx == 0) {
    fprintf(stderr, "BAM index file is not available.\n");
    return 1;
  }

  int i;
  for (i=0; i<Vector_getNumElement(slices); i++) {
    Slice *slice = Vector_getElementAt(slices,i);

    if (verbosity > 0) printf("Working on '%s'\n",Slice_getName(slice));

//    if (verbosity > 0) printf("Stage 1 - retrieving annotation from database\n");
//    Vector *genes = getGenes(slice, flags);

    if (verbosity > 0) printf("Stage 1 - calculating coverage\n");
    calcCoverage(inFName, slice, in, idx, flags);
  }


  hts_idx_destroy(idx);
  hts_close(in);

  if (verbosity > 0) printf("Done\n");
  return 0;
}
Example #14
0
/*
=head2 _objs_from_sth

  Arg [1]    : DBI:st $sth 
               An executed DBI statement handle
  Arg [2]    : (optional) Bio::EnsEMBL::Mapper $mapper 
               An mapper to be used to convert contig coordinates
               to assembly coordinates.
  Arg [3]    : (optional) Bio::EnsEMBL::Slice $slice
               A slice to map the prediction transcript to.   
  Example    : $p_transcripts = $self->_objs_from_sth($sth);
  Description: Creates a list of Prediction transcripts from an executed DBI
               statement handle.  The columns retrieved via the statement 
               handle must be in the same order as the columns defined by the
               _columns method.  If the slice argument is provided then the
               the prediction transcripts will be in returned in the coordinate
               system of the $slice argument.  Otherwise the prediction 
               transcripts will be returned in the RawContig coordinate system.
  Returntype : reference to a list of Bio::EnsEMBL::PredictionTranscripts
  Exceptions : none
  Caller     : superclass generic_fetch
  Status     : Stable

=cut
*/
Vector *PredictionTranscriptAdaptor_objectsFromStatementHandle(PredictionTranscriptAdaptor *pta, 
                                                               StatementHandle *sth, 
                                                               AssemblyMapper *assMapper, 
                                                               Slice *destSlice) {
  SliceAdaptor *sa     = DBAdaptor_getSliceAdaptor(pta->dba);
  AnalysisAdaptor *aa  = DBAdaptor_getAnalysisAdaptor(pta->dba);

  Vector *pTranscripts = Vector_new();
  IDHash *sliceHash = IDHash_new(IDHASH_SMALL);

  long         destSliceStart;
  long         destSliceEnd;
  int          destSliceStrand;
  long         destSliceLength;
  char *       destSliceSrName;
  IDType       destSliceSrId = 0;

  if (destSlice) {
    destSliceStart  = Slice_getStart(destSlice);
    destSliceEnd    = Slice_getEnd(destSlice);
    destSliceStrand = Slice_getStrand(destSlice);
    destSliceLength = Slice_getLength(destSlice);
    destSliceSrName = Slice_getSeqRegionName(destSlice);
    destSliceSrId   = Slice_getSeqRegionId(destSlice);
  }

  ResultRow *row;
  while ((row = sth->fetchRow(sth))) {
    IDType predictionTranscriptId = row->getLongLongAt(row,0);
    IDType seqRegionId            = row->getLongLongAt(row,1);
    long seqRegionStart           = row->getLongAt(row,2);
    long seqRegionEnd             = row->getLongAt(row,3);
    int seqRegionStrand           = row->getIntAt(row,4);
    IDType analysisId             = row->getLongLongAt(row,5);
    char *displayLabel            = row->getStringAt(row,6);

    // get the analysis object
    Analysis *analysis = AnalysisAdaptor_fetchByDbID(aa, analysisId);

    if (! IDHash_contains(sliceHash, seqRegionId)) {
      IDHash_add(sliceHash, seqRegionId, SliceAdaptor_fetchBySeqRegionId(sa, seqRegionId, POS_UNDEF, POS_UNDEF, STRAND_UNDEF));
    }
    Slice *slice = IDHash_getValue(sliceHash, seqRegionId);

    Slice *ptSlice = slice;

    char *srName      = Slice_getSeqRegionName(slice);
    CoordSystem *srCs = Slice_getCoordSystem(slice);

    //
    // remap the feature coordinates to another coord system
    // if a mapper was provided
    //
    if (assMapper != NULL) {
      MapperRangeSet *mrs;

      // Slightly suspicious about need for this if statement so left in perl statements for now
      if (destSlice != NULL &&
          assMapper->objectType == CLASS_CHAINEDASSEMBLYMAPPER) {
        mrs = ChainedAssemblyMapper_map(assMapper, srName, seqRegionStart, seqRegionEnd, seqRegionStrand, srCs, 1, destSlice);
      } else {
        mrs = AssemblyMapper_fastMap(assMapper, srName, seqRegionStart, seqRegionEnd, seqRegionStrand, srCs, NULL);
      }

      // skip features that map to gaps or coord system boundaries
      if (MapperRangeSet_getNumRange(mrs) == 0) {
        continue;
      }
      MapperRange *range = MapperRangeSet_getRangeAt(mrs, 0);
      if (range->rangeType == MAPPERRANGE_GAP) {
        fprintf(stderr,"Got a mapper gap in gene obj_from_sth - not sure if this is allowed\n");
        exit(1);
      } else {
        MapperCoordinate *mc = (MapperCoordinate *)range;

        seqRegionId     = mc->id;
        seqRegionStart  = mc->start;
        seqRegionEnd    = mc->end;
        seqRegionStrand = mc->strand;
      }

      MapperRangeSet_free(mrs);

      if (! IDHash_contains(sliceHash, seqRegionId)) {
        IDHash_add(sliceHash, seqRegionId, SliceAdaptor_fetchBySeqRegionId(sa, seqRegionId, POS_UNDEF, POS_UNDEF, STRAND_UNDEF));
      }
      ptSlice = IDHash_getValue(sliceHash, seqRegionId);
    }

    //
    // If a destination slice was provided convert the coords
    // If the dest_slice starts at 1 and is foward strand, nothing needs doing
    //
    if (destSlice != NULL) {
      if (destSliceStart != 1 || destSliceStrand != 1) {
        if (destSliceStrand == 1) {
          seqRegionStart = seqRegionStart - destSliceStart + 1;
          seqRegionEnd   = seqRegionEnd - destSliceStart + 1;
        } else {
          long tmpSeqRegionStart = seqRegionStart;
          seqRegionStart = destSliceEnd - seqRegionEnd + 1;
          seqRegionEnd   = destSliceEnd - tmpSeqRegionStart + 1;

          seqRegionStrand = -seqRegionStrand;
        }
      }
      // throw away features off the end of the requested slice
      if (seqRegionEnd < 1 || seqRegionStart > destSliceLength || (destSliceSrId != seqRegionId)) {
        continue;
      }
      ptSlice = destSlice;
    }
    
    // Finally, create the new PredictionTranscript.
    PredictionTranscript *pt = PredictionTranscript_new();

    PredictionTranscript_setStart       (pt, seqRegionStart);
    PredictionTranscript_setEnd         (pt, seqRegionEnd);
    PredictionTranscript_setStrand      (pt, seqRegionStrand);
    PredictionTranscript_setSlice       (pt, ptSlice);
    PredictionTranscript_setAnalysis    (pt, analysis);
    PredictionTranscript_setAdaptor     (pt, (BaseAdaptor *)pta);
    PredictionTranscript_setDbID        (pt, predictionTranscriptId);
    PredictionTranscript_setDisplayLabel(pt, displayLabel);

    Vector_addElement(pTranscripts, pt);
  }

  IDHash_free(sliceHash, NULL);
  return pTranscripts;
}
Example #15
0
Vector *PredictionTranscriptAdaptor_fetchAllBySlice(PredictionTranscriptAdaptor *pta, Slice *slice, char *logicName, int loadExons) {

  //my $transcripts = $self->SUPER::fetch_all_by_Slice($slice,$logic_name);
  Vector *transcripts = BaseFeatureAdaptor_fetchAllBySlice((BaseFeatureAdaptor *)pta, slice, logicName);

  // if there are 0 or 1 transcripts still do lazy-loading
  if ( ! loadExons || Vector_getNumElement(transcripts) < 2 ) {
    return transcripts;
  }

  // preload all of the exons now, instead of lazy loading later
  // faster than 1 query per transcript

  // get extent of region spanned by transcripts
  long minStart =  2000000000;
  long maxEnd   = -2000000000;

  int i;
  for (i=0; i<Vector_getNumElement(transcripts); i++) {
    PredictionTranscript *t  = Vector_getElementAt(transcripts, i);
    if (PredictionTranscript_getSeqRegionStart((SeqFeature*)t) < minStart) {
      minStart = PredictionTranscript_getSeqRegionStart((SeqFeature*)t);
    }
    if (PredictionTranscript_getSeqRegionEnd((SeqFeature*)t) > maxEnd) {
      maxEnd = PredictionTranscript_getSeqRegionEnd((SeqFeature*)t);
    }
  }

  Slice *extSlice;

  if (minStart >= Slice_getStart(slice) && maxEnd <= Slice_getEnd(slice)) {
    extSlice = slice;
  } else {
    SliceAdaptor *sa = DBAdaptor_getSliceAdaptor(pta->dba);
    extSlice = SliceAdaptor_fetchByRegion(sa, Slice_getCoordSystemName(slice), Slice_getSeqRegionName(slice),
                                          minStart, maxEnd, Slice_getStrand(slice), CoordSystem_getVersion(Slice_getCoordSystem(slice)), 0);
  }

  // associate exon identifiers with transcripts
  IDHash *trHash = IDHash_new(IDHASH_MEDIUM);
  for (i=0; i<Vector_getNumElement(transcripts); i++) {
    PredictionTranscript *t  = Vector_getElementAt(transcripts, i);
    if ( ! IDHash_contains(trHash, PredictionTranscript_getDbID(t))) {
      IDHash_add(trHash, PredictionTranscript_getDbID(t), t);
    }
  }

  IDType *uniqueIds = IDHash_getKeys(trHash);

  char tmpStr[1024];
  char *qStr = NULL;
  if ((qStr = (char *)calloc(655500,sizeof(char))) == NULL) {
    fprintf(stderr,"Failed allocating qStr\n");
    return transcripts;
  }

  int lenNum;
  int endPoint = sprintf(qStr, "SELECT prediction_transcript_id, prediction_exon_id, exon_rank FROM prediction_exon WHERE  prediction_transcript_id IN (");
  for (i=0; i<IDHash_getNumValues(trHash); i++) {
    if (i!=0) {
      qStr[endPoint++] = ',';
      qStr[endPoint++] = ' ';
    }
    lenNum = sprintf(tmpStr,IDFMTSTR,uniqueIds[i]);
    memcpy(&(qStr[endPoint]), tmpStr, lenNum);
    endPoint+=lenNum;
  }
  qStr[endPoint++] = ')';
  qStr[endPoint] = '\0';

  free(uniqueIds);

  StatementHandle *sth = pta->prepare((BaseAdaptor *)pta,qStr,strlen(qStr));
  sth->execute(sth);

  IDHash *exTrHash = IDHash_new(IDHASH_MEDIUM);
  ResultRow *row;
  while ((row = sth->fetchRow(sth))) {
    IDType trId = row->getLongLongAt(row,0);
    IDType exId = row->getLongLongAt(row,1);
    int    rank = row->getIntAt(row,2);

    if (! IDHash_contains(exTrHash, exId)) {
      Vector *vec = Vector_new();
      Vector_setFreeFunc(vec, PredictionTranscriptRankPair_free);
      IDHash_add(exTrHash, exId, vec);
    }
    Vector *exVec = IDHash_getValue(exTrHash, exId);
    PredictionTranscriptRankPair *trp = PredictionTranscriptRankPair_new(IDHash_getValue(trHash, trId), rank);
    Vector_addElement(exVec, trp);
  }

  IDHash_free(trHash, NULL);

  sth->finish(sth);

  PredictionExonAdaptor *pea = DBAdaptor_getPredictionExonAdaptor(pta->dba);
  Vector *exons = PredictionExonAdaptor_fetchAllBySlice(pea, extSlice);

  // move exons onto transcript slice, and add them to transcripts
  for (i=0; i<Vector_getNumElement(exons); i++) {
    PredictionExon *ex = Vector_getElementAt(exons, i);

  // Perl didn't have this line - it was in GeneAdaptor version so I think I'm going to keep it
    if (!IDHash_contains(exTrHash, PredictionExon_getDbID(ex))) continue;

    PredictionExon *newEx;
    if (slice != extSlice) {
      newEx = (PredictionExon*)PredictionExon_transfer((SeqFeature*)ex, slice);
      if (newEx == NULL) {
        fprintf(stderr, "Unexpected. Exon could not be transferred onto PredictionTranscript slice.\n");
        exit(1);
      }
    } else {
      newEx = ex;
    }

    Vector *exVec = IDHash_getValue(exTrHash, PredictionExon_getDbID(newEx));
    int j;
    for (j=0; j<Vector_getNumElement(exVec); j++) {
      PredictionTranscriptRankPair *trp = Vector_getElementAt(exVec, j);
      PredictionTranscript_addExon(trp->transcript, newEx, &trp->rank);
    }
  }

  IDHash_free(exTrHash, Vector_free);
  free(qStr);

  return transcripts;
}
Example #16
0
Vector *GenomicAlignAdaptor_fetchAllByDNAFragGenomeDB(GenomicAlignAdaptor *gaa,
               DNAFrag *dnaFrag, GenomeDB *targetGenome, int *startP, int *endP, 
               char *alignmentType) {

  Vector *result = NULL;
  GenomeDB *genomeCons;
  IDType methodLinkId;
  GenomeDB *genomeQuery;
  Vector *mergedAligns;
  int ok = 1;

  if (!dnaFrag) {
    fprintf(stderr, "Error: dnaFrag argument must be non NULL\n");
    ok = 0;
  }

  if (ok) {
    methodLinkId = GenomicAlignAdaptor_methodLinkIdByAlignmentType(gaa, alignmentType);

    genomeCons = DNAFrag_getGenomeDB(dnaFrag);
    genomeQuery = targetGenome;
  
    // direct or indirect ??
    if (GenomeDB_hasConsensus(genomeCons, genomeQuery, methodLinkId) ||
        GenomeDB_hasQuery(genomeCons, genomeQuery, methodLinkId)) {
      result = GenomicAlignAdaptor_fetchAllByDNAFragGenomeDBDirect(gaa, 
                                                                   dnaFrag, targetGenome, startP, endP, methodLinkId);
    } else {
      // indirect checks
      Vector *linkedCons  = GenomeDB_linkedGenomesByMethodLinkId(genomeCons, methodLinkId);
      Vector *linkedQuery = GenomeDB_linkedGenomesByMethodLinkId(genomeQuery, methodLinkId);
    
      // there are not many genomes, square effort is cheap
      Vector *linked = Vector_new();
      Vector *set1 = Vector_new();
      mergedAligns = Vector_new();
      int i;

      for (i=0; i<Vector_getNumElement(linkedCons); i++) {
        int j;
        GenomeDB *g1 = Vector_getElementAt(linkedCons, i);
        
        for (j=0; j<Vector_getNumElement(linkedQuery); j++) {
          GenomeDB *g2 = Vector_getElementAt(linkedQuery, i);
          if (g1 == g2) {
            Vector_addElement(linked, g1);
          }
        }
      }
      Vector_free(linkedCons);
      Vector_free(linkedQuery);

      // collect GenomicAligns from all linked genomes
      for (i=0; i<Vector_getNumElement(linked); i++) {
        GenomeDB *g = Vector_getElementAt(linked, i);

        Vector *gres = GenomicAlignAdaptor_fetchAllByDNAFragGenomeDBDirect(gaa, 
                                                                           dnaFrag, g, startP, endP, methodLinkId);
        Vector_append(set1, gres);
        
        Vector_free(gres);
      }

      // go from each dnafrag in the result set to target_genome
      // there is room for improvement here: create start end
      // my %frags = map { $_->query_dnafrag->dbID => $_->query_dnafrag } @$set1;
    

      for (i=0; i<Vector_getNumElement(set1); i++) {
        GenomicAlign *alignA = Vector_getElementAt(set1,i);
        DNAFrag *frag = GenomicAlign_getQueryDNAFrag(alignA);
        int qStart = GenomicAlign_getQueryStart(alignA);
        int qEnd   = GenomicAlign_getQueryEnd(alignA);

        Vector *dres = GenomicAlignAdaptor_fetchAllByDNAFragGenomeDBDirect(gaa,
                                                                           frag, genomeQuery, &qStart, &qEnd, methodLinkId);
        int j;

        for (j=0; j<Vector_getNumElement(dres); j++) {
          GenomicAlign *alignB = Vector_getElementAt(dres,j);
          GenomicAlignAdaptor_addDerivedAlignments(gaa,  mergedAligns, alignA, alignB);
        } 
        Vector_free(dres);
      }
      // NIY freeing
      result = mergedAligns;
    }
  }

  return result;
}
Example #17
0
void GenomicAlignAdaptor_addDerivedAlignments(GenomicAlignAdaptor *gaa, 
                     Vector *mergedAligns, GenomicAlign *alignA, GenomicAlign *alignB) {

  // variable name explanation
  // q - query c - consensus s - start e - end l - last
  // o, ov overlap j - jump_in_
  // r - result

  int  qs, qe, lqs, lqe, cs, ce, lce,
       ocs, oce, oqs, oqe, jc, jq, ovs, ove,
       rcs, rce, rqs, rqe;
  int currentMatch = 0;
  int newMatch;
  int cigAPos = 0, cigBPos = 0;
  char *resultCig;
  char tmpStr[128];

  // initialization phase
  Vector *cigA = CigarStrUtil_getPieces(GenomicAlign_getCigarString(alignA));
  Vector *cigB = CigarStrUtil_getPieces(GenomicAlign_getCigarString(alignB));

  if (GenomicAlign_getQueryStrand(alignA) == -1 ) {
    Vector_reverse(cigB);
  }

  // need a 'normalized' start for qs, qe, oxs so I dont 
  // have to check strandedness all the time  

  // consensus is strand 1 and is not compared to anything,
  // can keep its original coordinate system
 
  lce = GenomicAlign_getConsensusStart(alignA) - 1;
  ce = lce;
  cs = ce + 1;
  
  // alignBs query can be + or - just keep relative coords for now
  lqe = 0; lqs = 1;
  qe = 0; qs = 1;

  // ocs will be found relative to oce and has to be comparable
  // to oqs. But it could be that we have to move downwards if we
  // are not - strand. thats why coordinates are transformed here

  if (GenomicAlign_getQueryStrand(alignA) == -1 ) {
    // query_end is first basepair of alignment
    if (GenomicAlign_getQueryEnd(alignA) < GenomicAlign_getConsensusEnd(alignB)) {
      oce = 0; ocs = 1;
      oqe = GenomicAlign_getConsensusEnd(alignB) - GenomicAlign_getQueryEnd(alignA);
      oqs = oqe + 1;
    } else {
      oqe = 0; oqs = 1;
      oce = GenomicAlign_getQueryEnd(alignA) - GenomicAlign_getConsensusEnd(alignB);
      ocs = oce + 1;
    }
  } else {
    // in theory no coordinate magic necessary :-)
    oqs = GenomicAlign_getQueryStart(alignA);
    oqe = oqs - 1; 
    ocs = GenomicAlign_getConsensusStart(alignB);
    oce = ocs - 1;
  }

  // initializing result
  rcs = rce = rqs = rqe = 0;
  resultCig= StrUtil_copyString(&resultCig,"",0);

  while (1) {
    int newGa;
    // exit if you request a new piece of alignment and the cig list is 
    // empty

    if (oce < ocs || oce < oqs) {
      // next M area in cigB
      if (cigBPos == Vector_getNumElement(cigB)) break;
      GenomicAlignAdaptor_nextCig(gaa, cigB, &cigBPos, &ocs, &oce, &qs, &qe ); 
      continue;
    }
    if (oqe < oqs || oqe < ocs) {
      // next M area in cigA
      if (cigAPos == Vector_getNumElement(cigA)) break;
      GenomicAlignAdaptor_nextCig(gaa, cigA, &cigAPos, &cs, &ce, &oqs, &oqe );
      continue;
    }

    // now matching region overlap in reference genome
    ovs = ocs < oqs ? oqs : ocs;
    ove = oce < oqe ? oce : oqe;
    
    if (currentMatch) {
      jc = cs + (ovs - oqs) - lce - 1;
      jq = qs + (ovs - ocs) - lqe - 1;
    } else {
      jc = jq = 0;
    }

    newMatch = ove - ovs + 1;
    newGa = 0;

    if (jc==0) {
      if (jq==0) {
	currentMatch += newMatch;
      } else {
        // store current match;
        sprintf(tmpStr,"%dM",currentMatch);
        resultCig = StrUtil_appendString(resultCig,tmpStr);

	// jq deletions;
	if (jq == 1) {
          resultCig = StrUtil_appendString(resultCig,"D");
        } else {
          sprintf(tmpStr,"%dD",jq);
          resultCig = StrUtil_appendString(resultCig,tmpStr);
        }
	currentMatch = newMatch;
      }
    } else {
      if (jq==0) {
        // store current match;
        sprintf(tmpStr,"%dM",currentMatch);
        resultCig = StrUtil_appendString(resultCig,tmpStr);

	// jc insertions;
	if (jc==1) {
          resultCig = StrUtil_appendString(resultCig,"I");
        } else {
          sprintf(tmpStr,"%dI",jc);
          resultCig = StrUtil_appendString(resultCig,tmpStr);
        }
	currentMatch = newMatch;
         
      } else {
        double percId;
        double score;
        GenomicAlign *ga;

        sprintf(tmpStr,"%dM",currentMatch);
        resultCig = StrUtil_appendString(resultCig,tmpStr);

	// new GA
	int queryStrand = GenomicAlign_getQueryStrand(alignA) * GenomicAlign_getQueryStrand(alignB);
	int queryStart, queryEnd;
	if (queryStrand == 1) {
	  queryStart = rqs + GenomicAlign_getQueryStart(alignB) - 1;
	  queryEnd = rqe + GenomicAlign_getQueryStart(alignB) - 1;
	} else {
	  queryEnd = GenomicAlign_getQueryEnd(alignB) - rqs + 1;
	  queryStart = GenomicAlign_getQueryEnd(alignB) - rqe + 1;
	}
      
        score = (GenomicAlign_getScore(alignA) < GenomicAlign_getScore(alignB)) ? 
          GenomicAlign_getScore(alignA) : GenomicAlign_getScore(alignB);
        percId =  (int)(GenomicAlign_getPercentId(alignA)*GenomicAlign_getPercentId(alignB)/100.0);
        
        ga = GenomicAlign_new();
    
        GenomicAlign_setConsensusDNAFrag(ga, GenomicAlign_getConsensusDNAFrag(alignA));
        GenomicAlign_setQueryDNAFrag(ga, GenomicAlign_getQueryDNAFrag(alignB));
        GenomicAlign_setCigarString(ga, resultCig);
        GenomicAlign_setConsensusStart(ga, rcs);
        GenomicAlign_setConsensusEnd(ga, rce);
        GenomicAlign_setQueryStrand(ga, queryStrand);
        GenomicAlign_setQueryStart(ga, queryStart);
        GenomicAlign_setQueryEnd(ga, queryEnd);
        GenomicAlign_setAdaptor(ga, (BaseAdaptor *)gaa);
        GenomicAlign_setPercentId(ga, percId);
        GenomicAlign_setScore(ga, score);

	Vector_addElement(mergedAligns, ga);

        rcs = rce = rqs = rqe = 0;
	resultCig[0] = '\0';
	
	currentMatch = newMatch;
      }
    }


    
    if (!rcs) rcs = cs+(ovs-oqs);
    rce = cs+(ove-oqs);
    if (!rqs) rqs = qs+(ovs-ocs);
    rqe = qs+(ove-ocs);

    // update the last positions
    lce = rce; 
    lqe = rqe;

    // next piece on the one that end earlier
 
    if (oce <= oqe) {
      // next M area in cigB
      if (cigBPos == Vector_getNumElement(cigB)) break;
      GenomicAlignAdaptor_nextCig(gaa, cigB, &cigBPos, &ocs, &oce, &qs, &qe ); 
    }
    if (oce >= oqe) {
      // next M area in cigA
      if (cigAPos == Vector_getNumElement(cigA)) break;
      GenomicAlignAdaptor_nextCig(gaa, cigA, &cigAPos, &cs, &ce, &oqs, &oqe );
    } 
  } // end of while loop

  // if there is a last floating current match
  if (currentMatch) {
    
    // new GA
    int queryStrand = GenomicAlign_getQueryStrand(alignA) * GenomicAlign_getQueryStrand(alignB);
    int queryStart, queryEnd;
    double percId;
    double score;
    GenomicAlign *ga;

    sprintf(tmpStr,"%dM",currentMatch);
    resultCig = StrUtil_appendString(resultCig, tmpStr);

    if (queryStrand == 1) {
      queryStart = rqs + GenomicAlign_getQueryStart(alignB) - 1;
      queryEnd = rqe + GenomicAlign_getQueryStart(alignB) - 1;
    } else {
      queryEnd = GenomicAlign_getQueryEnd(alignB) - rqs + 1;
      queryStart = GenomicAlign_getQueryEnd(alignB) - rqe + 1;
    }
  
    score = (GenomicAlign_getScore(alignA) < GenomicAlign_getScore(alignB)) ? 
      GenomicAlign_getScore(alignA) : GenomicAlign_getScore(alignB);
    percId =  (int)(GenomicAlign_getPercentId(alignA)*GenomicAlign_getPercentId(alignB)/100.0);
    
    ga = GenomicAlign_new();

    GenomicAlign_setConsensusDNAFrag(ga, GenomicAlign_getConsensusDNAFrag(alignA));
    GenomicAlign_setQueryDNAFrag(ga, GenomicAlign_getQueryDNAFrag(alignB));
    GenomicAlign_setCigarString(ga, resultCig);
    GenomicAlign_setConsensusStart(ga, rcs);
    GenomicAlign_setConsensusEnd(ga, rce);
    GenomicAlign_setQueryStrand(ga, queryStrand);
    GenomicAlign_setQueryStart(ga, queryStart);
    GenomicAlign_setQueryEnd(ga, queryEnd);
    GenomicAlign_setAdaptor(ga, (BaseAdaptor *)gaa);
    GenomicAlign_setPercentId(ga, percId);
    GenomicAlign_setScore(ga, score);

    Vector_addElement(mergedAligns, ga);
  }

  free(resultCig);

  Vector_free(cigA);
  Vector_free(cigB);

  // nothing to return all in merged_aligns
}
Example #18
0
Vector *GenomicAlignAdaptor_objectsFromStatementHandle(GenomicAlignAdaptor *gaa, StatementHandle *sth,
                                                       int reverse) {
  Vector *results = Vector_new();
  ResultRow *row;
  DNAFragAdaptor *dfa;

  IDType consensusDNAFragId;
  IDType queryDNAFragId;
  int consensusStart;
  int consensusEnd;
  int queryStart;
  int queryEnd;
  int queryStrand;
  IDType methodLinkId;
  double score;
  double percId;
  char *cigarString;

  dfa = ComparaDBAdaptor_getDNAFragAdaptor(gaa->dba);

  while ((row = sth->fetchRow(sth))) {
    GenomicAlign *genomicAlign;
    char *alignmentType;
    
    if (reverse) {
      queryDNAFragId     = row->getLongLongAt(row,0);
      queryStart         = row->getIntAt(row,1);
      queryEnd           = row->getIntAt(row,2);
      consensusDNAFragId = row->getLongLongAt(row,3);
      consensusStart     = row->getIntAt(row,4);
      consensusEnd       = row->getIntAt(row,5);
    } else {
      consensusDNAFragId = row->getLongLongAt(row,0);
      consensusStart     = row->getIntAt(row,1);
      consensusEnd       = row->getIntAt(row,2);
      queryDNAFragId     = row->getLongLongAt(row,3);
      queryStart         = row->getIntAt(row,4);
      queryEnd           = row->getIntAt(row,5);
    }
    queryStrand  = row->getIntAt(row,6);
    methodLinkId = row->getLongLongAt(row,7);
    score        = row->getDoubleAt(row,8);
    percId       = row->getDoubleAt(row,9);
    cigarString  = row->getStringAt(row,10);

    alignmentType = GenomicAlignAdaptor_alignmentTypeByMethodLinkId(gaa, methodLinkId);

    if (reverse) {
      StrUtil_strReplChrs(cigarString,"DI","ID");

      // alignment of the opposite strand
      if (queryStrand == -1) {
        cigarString = CigarStrUtil_reverse(cigarString, strlen(cigarString));
      }
    }
    
    
    genomicAlign = GenomicAlign_new();
    GenomicAlign_setAdaptor(genomicAlign, (BaseAdaptor *)gaa);
    GenomicAlign_setConsensusDNAFrag(genomicAlign, DNAFragAdaptor_fetchByDbID(dfa,consensusDNAFragId));
    GenomicAlign_setConsensusStart(genomicAlign, consensusStart);
    GenomicAlign_setConsensusEnd(genomicAlign, consensusEnd);
    GenomicAlign_setQueryDNAFrag(genomicAlign, DNAFragAdaptor_fetchByDbID(dfa,queryDNAFragId));
    GenomicAlign_setQueryStart(genomicAlign, queryStart);
    GenomicAlign_setQueryEnd(genomicAlign, queryEnd);
    GenomicAlign_setQueryStrand(genomicAlign, queryStrand);
    GenomicAlign_setAlignmentType(genomicAlign, alignmentType);
    GenomicAlign_setScore(genomicAlign, score);
    GenomicAlign_setPercentId(genomicAlign, percId);
    GenomicAlign_setCigarString(genomicAlign, cigarString);

    Vector_addElement(results, genomicAlign);
  }

  return results;
}
Example #19
0
Vector *DBEntryAdaptor_fetchByObjectType(DBEntryAdaptor *dbea, IDType ensObj, char *ensType) {
  Vector *out;
  char qStr[1024];
  StatementHandle *sth;
  ResultRow *row;
  IDHash *seen;
  
  if (!ensObj) {
    fprintf(stderr,"Error: Can't fetchByObjectType without an object\n");
    exit(1);
  }

  if (!ensType) {
    fprintf(stderr,"Error: Can't fetchByObjectType without a type\n");
    exit(1);
  }

// Not sure if idt identities are right way round
  sprintf(qStr,
    "SELECT xref.xref_id, xref.dbprimary_acc, xref.display_label, xref.version,"
    "       xref.description,"
    "       exDB.db_name, exDB.db_release, exDB.status," 
    "       oxr.object_xref_id,"
    "       es.synonym," 
    "       idt.xref_identity, idt.ensembl_identity"
    " FROM  (external_db exDB, object_xref oxr, xref xref)" 
    " LEFT JOIN external_synonym es on es.xref_id = xref.xref_id"
    " LEFT JOIN identity_xref idt on idt.object_xref_id = oxr.object_xref_id"
    " WHERE  xref.xref_id = oxr.xref_id"
    "  AND  xref.external_db_id = exDB.external_db_id"
    "  AND  oxr.ensembl_id = " IDFMTSTR
    "  AND  oxr.ensembl_object_type = '%s'",
    ensObj,
    ensType);
  
  sth = dbea->prepare((BaseAdaptor *)dbea,qStr,strlen(qStr));

  sth->execute(sth);

  seen = IDHash_new(IDHASH_SMALL);
  out = Vector_new();

  while ((row = sth->fetchRow(sth))) {
    DBEntry *exDB;
    IDType refID = row->getLongLongAt(row,0);
			    
    // using an outer join on the synonyms as well as on identity_xref, we
    // now have to filter out the duplicates (see v.1.18 for
    // original). Since there is at most one identity_xref row per xref,
    // this is easy enough; all the 'extra' bits are synonyms

    if (!IDHash_contains(seen,refID))  {
      exDB = DBEntry_new();
      DBEntry_setAdaptor(exDB,(BaseAdaptor *)dbea);
      DBEntry_setDbID(exDB, refID);
      DBEntry_setPrimaryId(exDB, row->getStringAt(row,1));
      DBEntry_setDisplayId(exDB, row->getStringAt(row,2));
      DBEntry_setVersion(exDB, row->getStringAt(row,3));
      DBEntry_setDbName(exDB, row->getStringAt(row,5));
      DBEntry_setRelease(exDB, row->getStringAt(row,6));

      if (row->col(row,10)) {
        IdentityXref *idx = IdentityXref_new();
        DBEntry_setIdentityXref(exDB,idx);
	IdentityXref_setQueryIdentity(idx, row->getDoubleAt(row,10));
	IdentityXref_setTargetIdentity(idx, row->getDoubleAt(row,11));
      }
      
      if (row->col(row,4)) DBEntry_setDescription(exDB, row->getStringAt(row,4));
      if (row->col(row,7)) DBEntry_setStatus(exDB, row->getStringAt(row,7));
      
      Vector_addElement(out, exDB);
      IDHash_add(seen, refID, exDB);
    } 

    exDB = IDHash_getValue(seen, refID);

    if (row->col(row,9)) {
      DBEntry_addSynonym(exDB,row->getStringAt(row,9));
    }
  }

  IDHash_free(seen, NULL);

  sth->finish(sth);
  
  return out;
}
Vector *IntronSupportingEvidenceAdaptor_objectsFromStatementHandle(IntronSupportingEvidenceAdaptor *isea, 
                                                                   StatementHandle *sth,
                                                                   AssemblyMapper *assMapper,
                                                                   Slice *destSlice) {
  SliceAdaptor *sa     = DBAdaptor_getSliceAdaptor(isea->dba);
  AnalysisAdaptor *aa  = DBAdaptor_getAnalysisAdaptor(isea->dba);

  Vector *features = Vector_new();
  IDHash *sliceHash = IDHash_new(IDHASH_SMALL);
  
/* Unneccesary
  my %analysis_hash;
  my %sr_name_hash;
  my %sr_cs_hash;
*/
  

  
/* Unused
  my $asm_cs;
  my $cmp_cs;
  my $asm_cs_vers;
  my $asm_cs_name;
  my $cmp_cs_vers;
  my $cmp_cs_name;
  if($mapper) {
    $asm_cs = $mapper->assembled_CoordSystem();
    $cmp_cs = $mapper->component_CoordSystem();
    $asm_cs_name = $asm_cs->name();
    $asm_cs_vers = $asm_cs->version();
    $cmp_cs_name = $cmp_cs->name();
    $cmp_cs_vers = $cmp_cs->version();
  }
*/

  long         destSliceStart;
  long         destSliceEnd;
  int          destSliceStrand;
  long         destSliceLength;
  //CoordSystem *destSliceCs;
  char *       destSliceSrName;
  IDType       destSliceSrId = 0;
  //AssemblyMapperAdaptor *asma;

  if (destSlice) {
    destSliceStart  = Slice_getStart(destSlice);
    destSliceEnd    = Slice_getEnd(destSlice);
    destSliceStrand = Slice_getStrand(destSlice);
    destSliceLength = Slice_getLength(destSlice);
    //??destSliceCs     = Slice_getCoordSystem(destSlice);
    destSliceSrName = Slice_getSeqRegionName(destSlice);
    destSliceSrId   = Slice_getSeqRegionId(destSlice);
    //??asma            = DBAdaptor_getAssemblyMapperAdaptor(ea->dba);
  }

  ResultRow *row;
  while ((row = sth->fetchRow(sth))) {
    IDType id =           row->getLongLongAt(row,0);
    IDType analysisId =   row->getLongLongAt(row,1);
    IDType seqRegionId =  row->getLongLongAt(row,2);
    long seqRegionStart = row->getLongAt(row,3);
    long seqRegionEnd =   row->getLongAt(row,4);
    int seqRegionStrand = row->getIntAt(row,5);
    char *hitName =       row->getStringAt(row,6);
    double score =        row->getDoubleAt(row,7);
    char *scoreType =     row->getStringAt(row,8);
    int spliceCanonical = row->getIntAt(row,9); 

    // get the analysis object
    Analysis *analysis = AnalysisAdaptor_fetchByDbID(aa, analysisId);

/*
    // need to get the internal_seq_region, if present
    $seq_region_id = $self->get_seq_region_id_internal($seq_region_id);
    #get the slice object
    my $slice = $slice_hash{"ID:".$seq_region_id};
    if(!$slice) {
      $slice = $sa->fetch_by_seq_region_id($seq_region_id);
      $slice_hash{"ID:".$seq_region_id} = $slice;
      $sr_name_hash{$seq_region_id} = $slice->seq_region_name();
      $sr_cs_hash{$seq_region_id} = $slice->coord_system();
    }

    my $sr_name = $sr_name_hash{$seq_region_id};
    my $sr_cs   = $sr_cs_hash{$seq_region_id};
*/
    if (! IDHash_contains(sliceHash, seqRegionId)) {
      IDHash_add(sliceHash, seqRegionId, SliceAdaptor_fetchBySeqRegionId(sa, seqRegionId, POS_UNDEF, POS_UNDEF, STRAND_UNDEF));
    }
    Slice *slice = IDHash_getValue(sliceHash, seqRegionId);

    Slice *iseSlice = slice;
    
    char *srName      = Slice_getSeqRegionName(slice);
    CoordSystem *srCs = Slice_getCoordSystem(slice);

    // 
    // remap the feature coordinates to another coord system
    // if a mapper was provided
    //
    if (assMapper != NULL) {
      MapperRangeSet *mrs;

      // Slightly suspicious about need for this if statement so left in perl statements for now
      if (destSlice != NULL &&
          assMapper->objectType == CLASS_CHAINEDASSEMBLYMAPPER) {
        mrs = ChainedAssemblyMapper_map(assMapper, srName, seqRegionStart, seqRegionEnd, seqRegionStrand, srCs, 1, destSlice);
      } else {
        mrs = AssemblyMapper_fastMap(assMapper, srName, seqRegionStart, seqRegionEnd, seqRegionStrand, srCs, NULL);
      }

      // skip features that map to gaps or coord system boundaries
      //next FEATURE if (!defined($seq_region_id));
      if (MapperRangeSet_getNumRange(mrs) == 0) {
        continue;
      }
      MapperRange *range = MapperRangeSet_getRangeAt(mrs, 0);
      if (range->rangeType == MAPPERRANGE_GAP) {
        fprintf(stderr,"Got a mapper gap in gene obj_from_sth - not sure if this is allowed\n");
        exit(1);
      } else {
        MapperCoordinate *mc = (MapperCoordinate *)range;

        seqRegionId     = mc->id;
        seqRegionStart  = mc->start;
        seqRegionEnd    = mc->end;
        seqRegionStrand = mc->strand;
      }

      MapperRangeSet_free(mrs);

      
/* Was - but identical if and else so why test???
      #get a slice in the coord system we just mapped to
      if($asm_cs == $sr_cs || ($cmp_cs != $sr_cs && $asm_cs->equals($sr_cs))) {
        $slice = $slice_hash{"ID:".$seq_region_id} ||=
          $sa->fetch_by_seq_region_id($seq_region_id);
      } else {
        $slice = $slice_hash{"ID:".$seq_region_id} ||=
          $sa->fetch_by_seq_region_id($seq_region_id);
      }
*/
// Instead...
      if (! IDHash_contains(sliceHash, seqRegionId)) {
        IDHash_add(sliceHash, seqRegionId, SliceAdaptor_fetchBySeqRegionId(sa, seqRegionId, POS_UNDEF, POS_UNDEF, STRAND_UNDEF));
      }
      iseSlice = IDHash_getValue(sliceHash, seqRegionId);
    }


    //
    // If a destination slice was provided convert the coords
    // If the dest_slice starts at 1 and is foward strand, nothing needs doing
    // 
    if (destSlice != NULL) {
      if (destSliceStart != 1 || destSliceStrand != 1) {
        if (destSliceStrand == 1) {
          seqRegionStart = seqRegionStart - destSliceStart + 1;
          seqRegionEnd   = seqRegionEnd - destSliceStart + 1;
        } else {
          long tmpSeqRegionStart = seqRegionStart;
          seqRegionStart = destSliceEnd - seqRegionEnd + 1;
          seqRegionEnd   = destSliceEnd - tmpSeqRegionStart + 1;

          seqRegionStrand = -seqRegionStrand;
        }
      }
       
      // throw away features off the end of the requested slice
      if (seqRegionEnd < 1 || seqRegionStart > destSliceLength || (destSliceSrId != seqRegionId)) {
        continue;
      }
      iseSlice = destSlice;
    }
    
    IntronSupportingEvidence *ise = IntronSupportingEvidence_new();

    IntronSupportingEvidence_setStart             (ise, seqRegionStart);
    IntronSupportingEvidence_setEnd               (ise, seqRegionEnd);
    IntronSupportingEvidence_setStrand            (ise, seqRegionStrand);
    IntronSupportingEvidence_setSlice             (ise, iseSlice);
    IntronSupportingEvidence_setAnalysis          (ise, analysis);
    IntronSupportingEvidence_setAdaptor           (ise, (BaseAdaptor *)isea);
    IntronSupportingEvidence_setDbID              (ise, id);
    IntronSupportingEvidence_setHitName           (ise, hitName);
    IntronSupportingEvidence_setScore             (ise, score);
    IntronSupportingEvidence_setScoreType         (ise, scoreType);
    IntronSupportingEvidence_setIsSpliceCanonical(ise, spliceCanonical);

    Vector_addElement(features, ise);
  }
  
  return features;
}
Example #21
0
// For ordered, the default should be 0 (if you just need to fill out the args)
// Note ONLY stable_id can be char, all other pk's must be IDType (see code)
Vector *BaseAdaptor_listDbIDs(BaseAdaptor *ba, char *table, char *pk, int ordered) {
  int ok = 1;
  char colName[1024];

  if (pk == NULL) {
    sprintf(colName, "%s_id", table);
  } else {
    strcpy(colName, pk);
  }

  char qStr[1024];
  sprintf(qStr,"SELECT `%s` FROM `%s`", colName, table );

  if ( BaseAdaptor_isMultiSpecies(BaseAdaptor *ba)
      // For now just the multi species because I don't have adaptors in the Class hierarchy
      // && $self->isa('Bio::EnsEMBL::DBSQL::BaseFeatureAdaptor')
      // && !$self->isa('Bio::EnsEMBL::DBSQL::UnmappedObjectAdaptor') 
     ) {
    char tmpStr[1024];
    
    sprintf(tmpStr, "JOIN seq_region USING (seq_region_id) "
                    "JOIN coord_system cs USING (coord_system_id) "
                    "WHERE cs.species_id = "IDFMTSTR, BaseAdaptor_getSpeciesId(ba));

    sprintf(qStr, "%s %s", qStr, tmpStr);
  }

  if (ordered) {
    sprintf(qStr, "%s ORDER BY seq_region_id, seq_region_start", qStr);
  }

  StatementHandle *sth = ba->prepare(ba,qStr,strlen(qStr));

  sth->execute(sth);

  Vector *out = Vector_new();

  if (strcmp(pk, "stable_id")) {
    ResultRow *row;
    while ((row = sth->fetchRow(sth))) {
      char *stableId = row->getStringCopyAt(row, 0);
  
      Vector_addElement(out, stableId);
    }
  } else  {
    IDType *idP;
    ResultRow *row;
    while ((row = sth->fetchRow(sth))) {
      IDType id = row->getLongLongAt(row, 0);
  
      if ((idP = calloc(1,sizeof(IDType))) == NULL) {
        fprintf(stderr, "Failed allocating space for a id\n");      
        ok = 0;
      } else {
        *idP = id;
        Vector_addElement(out, idP);
      }
    }
  }

  if (!ok) {
    Vector_free(out);
    out = NULL;
  }

  return out;
}