Ejemplo n.º 1
0
Mapper *Mapper_new(char *from, char *to, CoordSystem *fromCs, CoordSystem *toCs) {
  Mapper *m;

  if ((m = (Mapper *)calloc(1,sizeof(Mapper))) == NULL) {
    fprintf(stderr,"ERROR: Failed allocating space for mapper\n");
    exit(1);
  }

  if ( to == NULL || from == NULL ) {
    fprintf(stderr, "Must supply 'to' and 'from' tags");
    exit(1);
  }

  Mapper_setFrom(m, from);
  Mapper_setTo(m, to);

  Mapper_setFromCoordSystem(m, fromCs);
  Mapper_setToCoordSystem(m, toCs);

  Mapper_setPairCount(m, 0);

  Mapper_setPairHash(m, MAPPER_FROM_IND, IDHash_new(IDHASH_MEDIUM));
  Mapper_setPairHash(m, MAPPER_TO_IND,   IDHash_new(IDHASH_MEDIUM));

  return m;
}
Ejemplo n.º 2
0
void Mapper_flush(Mapper *m)  {
// NIY: What should free func be for these
  IDHash_free(Mapper_getPairHash(m, MAPPER_FROM_IND), NULL);
  IDHash_free(Mapper_getPairHash(m, MAPPER_TO_IND), NULL);

  Mapper_setPairHash(m, MAPPER_FROM_IND, IDHash_new(IDHASH_MEDIUM));
  Mapper_setPairHash(m, MAPPER_TO_IND,   IDHash_new(IDHASH_MEDIUM));

  Mapper_setPairCount(m, 0);
}
Ejemplo n.º 3
0
void AssemblyMapper_flushImpl(AssemblyMapper *am) {
  Mapper_flush( AssemblyMapper_getMapper(am) );

// IDHash_free may not be correct function type
// 
//  IDHash_free( AssemblyMapper_getComponentRegister(am), IDHash_free);
//  IDHash_free( AssemblyMapper_getAssembledRegister(am), IDHash_free);
  IDHash_free( AssemblyMapper_getComponentRegister(am), NULL);
// Note: HACK here to handle an IDHash within an IDHash - IDHash freeFunc only passes one arg on so need to HACK - will think about how to do this properly, I promise!
  IDHash_free( AssemblyMapper_getAssembledRegister(am), freeRegisterIDHash);

// Create new hashes
  AssemblyMapper_setAssembledRegister(am, IDHash_new(IDHASH_MEDIUM));
  AssemblyMapper_setComponentRegister(am, IDHash_new(IDHASH_MEDIUM));
}
Ejemplo n.º 4
0
RangeRegistry *RangeRegistry_new() {
  RangeRegistry *registry;

  if ((registry = (RangeRegistry *)calloc(1,sizeof(RangeRegistry))) == NULL) {
    fprintf(stderr,"ERROR: Failed allocating space for registry\n");
    exit(1);
  }

  registry->registry = IDHash_new(IDHASH_MEDIUM);

  return registry;
}
Ejemplo n.º 5
0
/*
=head2 register_assembled

  Arg [1]    : integer $asm_seq_region
               The dbID of the sequence region to register.
  Arg [2]    : int $chunk_id
               The chunk number of the provided seq_region to register.
  Example    : $asm_mapper->register_assembled( 'X', 4 );
  Description: Flags a given assembled region as registered in this
               assembly mapper.  This should only be called by this
               class or the AssemblyMapperAdaptor.  Do not call this
               method unless you really know what you are doing.
  Return type: None
  Exceptions : Throws on incorrect arguments
  Caller     : Internal, AssemblyMapperAdaptor
  Status     : Stable

=cut
*/
void AssemblyMapper_registerAssembled(AssemblyMapper *am, IDType asmSeqRegionId, int chunkId) {
  IDHash *assembledRegister = AssemblyMapper_getAssembledRegister(am);

  if ( !IDHash_contains(assembledRegister, asmSeqRegionId) ) {
    IDHash_add(assembledRegister, asmSeqRegionId, IDHash_new(IDHASH_MEDIUM));
  }

  IDHash *chunkHash = IDHash_getValue(assembledRegister, asmSeqRegionId);

  if (!IDHash_contains(chunkHash, (IDType)chunkId)) {
    IDHash_add(chunkHash, (IDType)chunkId, &trueVal);
  }
}
Ejemplo n.º 6
0
AssemblyMapper *AssemblyMapper_new(AssemblyMapperAdaptor *adaptor, Vector *coordSystems) {
  AssemblyMapper *am;

  if ((am = (AssemblyMapper *)calloc(1, sizeof(AssemblyMapper))) == NULL) {
    fprintf(stderr, "ERROR: Failed allocating space for AssemblyMapper\n");
    return NULL;
  }

  am->objectType = CLASS_ASSEMBLYMAPPER;

  am->funcs = &assemblyMapperFuncs;

  Object_incRefCount(am);

  AssemblyMapper_setAdaptor(am, adaptor);

  AssemblyMapperAdaptor_cacheSeqIdsWithMultAssemblies(adaptor);

  if ( Vector_getNumElement(coordSystems) != 2 ) {
    fprintf(stderr, "Can only map between two coordinate systems %d were provided\n", Vector_getNumElement(coordSystems));
    exit(1);
  }

  // Set the component and assembled coordinate systems
  AssemblyMapper_setAssembledCoordSystem(am, Vector_getElementAt(coordSystems, 0));
  AssemblyMapper_setComponentCoordSystem(am, Vector_getElementAt(coordSystems, 1));

  AssemblyMapper_setAssembledRegister(am, IDHash_new(IDHASH_MEDIUM));
  AssemblyMapper_setComponentRegister(am, IDHash_new(IDHASH_MEDIUM));

  // We load the mapper calling the 'ASSEMBLED' the 'from' coord system
  // and the 'COMPONENT' the 'to' coord system.
  AssemblyMapper_setMapper(am, Mapper_new("assembled", "component", AssemblyMapper_getAssembledCoordSystem(am), 
                                                                    AssemblyMapper_getComponentCoordSystem(am)));

  AssemblyMapper_setMaxPairCount(am, AM_DEFAULT_MAX_PAIR_COUNT);

  return am;
}
Ejemplo n.º 7
0
ChromosomeAdaptor *ChromosomeAdaptor_new(DBAdaptor *dba) {
  ChromosomeAdaptor *ca;

  if ((ca = (ChromosomeAdaptor *)calloc(1,sizeof(ChromosomeAdaptor))) == NULL) {
    fprintf(stderr, "ERROR: Failed allocating space for ChromosomeAdaptor\n");
    return NULL;
  }
  BaseAdaptor_init((BaseAdaptor *)ca, dba, CHROMOSOME_ADAPTOR);

  ca->chrCache = IDHash_new(IDHASH_SMALL);
  ca->chrNameCache = StringHash_new(STRINGHASH_SMALL);

  return ca;
}
Vector *IntronSupportingEvidenceAdaptor_objectsFromStatementHandle(IntronSupportingEvidenceAdaptor *isea, 
                                                                   StatementHandle *sth,
                                                                   AssemblyMapper *assMapper,
                                                                   Slice *destSlice) {
  SliceAdaptor *sa     = DBAdaptor_getSliceAdaptor(isea->dba);
  AnalysisAdaptor *aa  = DBAdaptor_getAnalysisAdaptor(isea->dba);

  Vector *features = Vector_new();
  IDHash *sliceHash = IDHash_new(IDHASH_SMALL);
  
/* Unneccesary
  my %analysis_hash;
  my %sr_name_hash;
  my %sr_cs_hash;
*/
  

  
/* Unused
  my $asm_cs;
  my $cmp_cs;
  my $asm_cs_vers;
  my $asm_cs_name;
  my $cmp_cs_vers;
  my $cmp_cs_name;
  if($mapper) {
    $asm_cs = $mapper->assembled_CoordSystem();
    $cmp_cs = $mapper->component_CoordSystem();
    $asm_cs_name = $asm_cs->name();
    $asm_cs_vers = $asm_cs->version();
    $cmp_cs_name = $cmp_cs->name();
    $cmp_cs_vers = $cmp_cs->version();
  }
*/

  long         destSliceStart;
  long         destSliceEnd;
  int          destSliceStrand;
  long         destSliceLength;
  //CoordSystem *destSliceCs;
  char *       destSliceSrName;
  IDType       destSliceSrId = 0;
  //AssemblyMapperAdaptor *asma;

  if (destSlice) {
    destSliceStart  = Slice_getStart(destSlice);
    destSliceEnd    = Slice_getEnd(destSlice);
    destSliceStrand = Slice_getStrand(destSlice);
    destSliceLength = Slice_getLength(destSlice);
    //??destSliceCs     = Slice_getCoordSystem(destSlice);
    destSliceSrName = Slice_getSeqRegionName(destSlice);
    destSliceSrId   = Slice_getSeqRegionId(destSlice);
    //??asma            = DBAdaptor_getAssemblyMapperAdaptor(ea->dba);
  }

  ResultRow *row;
  while ((row = sth->fetchRow(sth))) {
    IDType id =           row->getLongLongAt(row,0);
    IDType analysisId =   row->getLongLongAt(row,1);
    IDType seqRegionId =  row->getLongLongAt(row,2);
    long seqRegionStart = row->getLongAt(row,3);
    long seqRegionEnd =   row->getLongAt(row,4);
    int seqRegionStrand = row->getIntAt(row,5);
    char *hitName =       row->getStringAt(row,6);
    double score =        row->getDoubleAt(row,7);
    char *scoreType =     row->getStringAt(row,8);
    int spliceCanonical = row->getIntAt(row,9); 

    // get the analysis object
    Analysis *analysis = AnalysisAdaptor_fetchByDbID(aa, analysisId);

/*
    // need to get the internal_seq_region, if present
    $seq_region_id = $self->get_seq_region_id_internal($seq_region_id);
    #get the slice object
    my $slice = $slice_hash{"ID:".$seq_region_id};
    if(!$slice) {
      $slice = $sa->fetch_by_seq_region_id($seq_region_id);
      $slice_hash{"ID:".$seq_region_id} = $slice;
      $sr_name_hash{$seq_region_id} = $slice->seq_region_name();
      $sr_cs_hash{$seq_region_id} = $slice->coord_system();
    }

    my $sr_name = $sr_name_hash{$seq_region_id};
    my $sr_cs   = $sr_cs_hash{$seq_region_id};
*/
    if (! IDHash_contains(sliceHash, seqRegionId)) {
      IDHash_add(sliceHash, seqRegionId, SliceAdaptor_fetchBySeqRegionId(sa, seqRegionId, POS_UNDEF, POS_UNDEF, STRAND_UNDEF));
    }
    Slice *slice = IDHash_getValue(sliceHash, seqRegionId);

    Slice *iseSlice = slice;
    
    char *srName      = Slice_getSeqRegionName(slice);
    CoordSystem *srCs = Slice_getCoordSystem(slice);

    // 
    // remap the feature coordinates to another coord system
    // if a mapper was provided
    //
    if (assMapper != NULL) {
      MapperRangeSet *mrs;

      // Slightly suspicious about need for this if statement so left in perl statements for now
      if (destSlice != NULL &&
          assMapper->objectType == CLASS_CHAINEDASSEMBLYMAPPER) {
        mrs = ChainedAssemblyMapper_map(assMapper, srName, seqRegionStart, seqRegionEnd, seqRegionStrand, srCs, 1, destSlice);
      } else {
        mrs = AssemblyMapper_fastMap(assMapper, srName, seqRegionStart, seqRegionEnd, seqRegionStrand, srCs, NULL);
      }

      // skip features that map to gaps or coord system boundaries
      //next FEATURE if (!defined($seq_region_id));
      if (MapperRangeSet_getNumRange(mrs) == 0) {
        continue;
      }
      MapperRange *range = MapperRangeSet_getRangeAt(mrs, 0);
      if (range->rangeType == MAPPERRANGE_GAP) {
        fprintf(stderr,"Got a mapper gap in gene obj_from_sth - not sure if this is allowed\n");
        exit(1);
      } else {
        MapperCoordinate *mc = (MapperCoordinate *)range;

        seqRegionId     = mc->id;
        seqRegionStart  = mc->start;
        seqRegionEnd    = mc->end;
        seqRegionStrand = mc->strand;
      }

      MapperRangeSet_free(mrs);

      
/* Was - but identical if and else so why test???
      #get a slice in the coord system we just mapped to
      if($asm_cs == $sr_cs || ($cmp_cs != $sr_cs && $asm_cs->equals($sr_cs))) {
        $slice = $slice_hash{"ID:".$seq_region_id} ||=
          $sa->fetch_by_seq_region_id($seq_region_id);
      } else {
        $slice = $slice_hash{"ID:".$seq_region_id} ||=
          $sa->fetch_by_seq_region_id($seq_region_id);
      }
*/
// Instead...
      if (! IDHash_contains(sliceHash, seqRegionId)) {
        IDHash_add(sliceHash, seqRegionId, SliceAdaptor_fetchBySeqRegionId(sa, seqRegionId, POS_UNDEF, POS_UNDEF, STRAND_UNDEF));
      }
      iseSlice = IDHash_getValue(sliceHash, seqRegionId);
    }


    //
    // If a destination slice was provided convert the coords
    // If the dest_slice starts at 1 and is foward strand, nothing needs doing
    // 
    if (destSlice != NULL) {
      if (destSliceStart != 1 || destSliceStrand != 1) {
        if (destSliceStrand == 1) {
          seqRegionStart = seqRegionStart - destSliceStart + 1;
          seqRegionEnd   = seqRegionEnd - destSliceStart + 1;
        } else {
          long tmpSeqRegionStart = seqRegionStart;
          seqRegionStart = destSliceEnd - seqRegionEnd + 1;
          seqRegionEnd   = destSliceEnd - tmpSeqRegionStart + 1;

          seqRegionStrand = -seqRegionStrand;
        }
      }
       
      // throw away features off the end of the requested slice
      if (seqRegionEnd < 1 || seqRegionStart > destSliceLength || (destSliceSrId != seqRegionId)) {
        continue;
      }
      iseSlice = destSlice;
    }
    
    IntronSupportingEvidence *ise = IntronSupportingEvidence_new();

    IntronSupportingEvidence_setStart             (ise, seqRegionStart);
    IntronSupportingEvidence_setEnd               (ise, seqRegionEnd);
    IntronSupportingEvidence_setStrand            (ise, seqRegionStrand);
    IntronSupportingEvidence_setSlice             (ise, iseSlice);
    IntronSupportingEvidence_setAnalysis          (ise, analysis);
    IntronSupportingEvidence_setAdaptor           (ise, (BaseAdaptor *)isea);
    IntronSupportingEvidence_setDbID              (ise, id);
    IntronSupportingEvidence_setHitName           (ise, hitName);
    IntronSupportingEvidence_setScore             (ise, score);
    IntronSupportingEvidence_setScoreType         (ise, scoreType);
    IntronSupportingEvidence_setIsSpliceCanonical(ise, spliceCanonical);

    Vector_addElement(features, ise);
  }
  
  return features;
}
Ejemplo n.º 9
0
void RangeRegistry_flush(RangeRegistry *registry) {
  IDHash_free(registry->registry, Vector_free);

  registry->registry = IDHash_new(IDHASH_MEDIUM);
}
Ejemplo n.º 10
0
/*
=head2 _objs_from_sth

  Arg [1]    : DBI:st $sth 
               An executed DBI statement handle
  Arg [2]    : (optional) Bio::EnsEMBL::Mapper $mapper 
               An mapper to be used to convert contig coordinates
               to assembly coordinates.
  Arg [3]    : (optional) Bio::EnsEMBL::Slice $slice
               A slice to map the prediction transcript to.   
  Example    : $p_transcripts = $self->_objs_from_sth($sth);
  Description: Creates a list of Prediction transcripts from an executed DBI
               statement handle.  The columns retrieved via the statement 
               handle must be in the same order as the columns defined by the
               _columns method.  If the slice argument is provided then the
               the prediction transcripts will be in returned in the coordinate
               system of the $slice argument.  Otherwise the prediction 
               transcripts will be returned in the RawContig coordinate system.
  Returntype : reference to a list of Bio::EnsEMBL::PredictionTranscripts
  Exceptions : none
  Caller     : superclass generic_fetch
  Status     : Stable

=cut
*/
Vector *PredictionTranscriptAdaptor_objectsFromStatementHandle(PredictionTranscriptAdaptor *pta, 
                                                               StatementHandle *sth, 
                                                               AssemblyMapper *assMapper, 
                                                               Slice *destSlice) {
  SliceAdaptor *sa     = DBAdaptor_getSliceAdaptor(pta->dba);
  AnalysisAdaptor *aa  = DBAdaptor_getAnalysisAdaptor(pta->dba);

  Vector *pTranscripts = Vector_new();
  IDHash *sliceHash = IDHash_new(IDHASH_SMALL);

  long         destSliceStart;
  long         destSliceEnd;
  int          destSliceStrand;
  long         destSliceLength;
  char *       destSliceSrName;
  IDType       destSliceSrId = 0;

  if (destSlice) {
    destSliceStart  = Slice_getStart(destSlice);
    destSliceEnd    = Slice_getEnd(destSlice);
    destSliceStrand = Slice_getStrand(destSlice);
    destSliceLength = Slice_getLength(destSlice);
    destSliceSrName = Slice_getSeqRegionName(destSlice);
    destSliceSrId   = Slice_getSeqRegionId(destSlice);
  }

  ResultRow *row;
  while ((row = sth->fetchRow(sth))) {
    IDType predictionTranscriptId = row->getLongLongAt(row,0);
    IDType seqRegionId            = row->getLongLongAt(row,1);
    long seqRegionStart           = row->getLongAt(row,2);
    long seqRegionEnd             = row->getLongAt(row,3);
    int seqRegionStrand           = row->getIntAt(row,4);
    IDType analysisId             = row->getLongLongAt(row,5);
    char *displayLabel            = row->getStringAt(row,6);

    // get the analysis object
    Analysis *analysis = AnalysisAdaptor_fetchByDbID(aa, analysisId);

    if (! IDHash_contains(sliceHash, seqRegionId)) {
      IDHash_add(sliceHash, seqRegionId, SliceAdaptor_fetchBySeqRegionId(sa, seqRegionId, POS_UNDEF, POS_UNDEF, STRAND_UNDEF));
    }
    Slice *slice = IDHash_getValue(sliceHash, seqRegionId);

    Slice *ptSlice = slice;

    char *srName      = Slice_getSeqRegionName(slice);
    CoordSystem *srCs = Slice_getCoordSystem(slice);

    //
    // remap the feature coordinates to another coord system
    // if a mapper was provided
    //
    if (assMapper != NULL) {
      MapperRangeSet *mrs;

      // Slightly suspicious about need for this if statement so left in perl statements for now
      if (destSlice != NULL &&
          assMapper->objectType == CLASS_CHAINEDASSEMBLYMAPPER) {
        mrs = ChainedAssemblyMapper_map(assMapper, srName, seqRegionStart, seqRegionEnd, seqRegionStrand, srCs, 1, destSlice);
      } else {
        mrs = AssemblyMapper_fastMap(assMapper, srName, seqRegionStart, seqRegionEnd, seqRegionStrand, srCs, NULL);
      }

      // skip features that map to gaps or coord system boundaries
      if (MapperRangeSet_getNumRange(mrs) == 0) {
        continue;
      }
      MapperRange *range = MapperRangeSet_getRangeAt(mrs, 0);
      if (range->rangeType == MAPPERRANGE_GAP) {
        fprintf(stderr,"Got a mapper gap in gene obj_from_sth - not sure if this is allowed\n");
        exit(1);
      } else {
        MapperCoordinate *mc = (MapperCoordinate *)range;

        seqRegionId     = mc->id;
        seqRegionStart  = mc->start;
        seqRegionEnd    = mc->end;
        seqRegionStrand = mc->strand;
      }

      MapperRangeSet_free(mrs);

      if (! IDHash_contains(sliceHash, seqRegionId)) {
        IDHash_add(sliceHash, seqRegionId, SliceAdaptor_fetchBySeqRegionId(sa, seqRegionId, POS_UNDEF, POS_UNDEF, STRAND_UNDEF));
      }
      ptSlice = IDHash_getValue(sliceHash, seqRegionId);
    }

    //
    // If a destination slice was provided convert the coords
    // If the dest_slice starts at 1 and is foward strand, nothing needs doing
    //
    if (destSlice != NULL) {
      if (destSliceStart != 1 || destSliceStrand != 1) {
        if (destSliceStrand == 1) {
          seqRegionStart = seqRegionStart - destSliceStart + 1;
          seqRegionEnd   = seqRegionEnd - destSliceStart + 1;
        } else {
          long tmpSeqRegionStart = seqRegionStart;
          seqRegionStart = destSliceEnd - seqRegionEnd + 1;
          seqRegionEnd   = destSliceEnd - tmpSeqRegionStart + 1;

          seqRegionStrand = -seqRegionStrand;
        }
      }
      // throw away features off the end of the requested slice
      if (seqRegionEnd < 1 || seqRegionStart > destSliceLength || (destSliceSrId != seqRegionId)) {
        continue;
      }
      ptSlice = destSlice;
    }
    
    // Finally, create the new PredictionTranscript.
    PredictionTranscript *pt = PredictionTranscript_new();

    PredictionTranscript_setStart       (pt, seqRegionStart);
    PredictionTranscript_setEnd         (pt, seqRegionEnd);
    PredictionTranscript_setStrand      (pt, seqRegionStrand);
    PredictionTranscript_setSlice       (pt, ptSlice);
    PredictionTranscript_setAnalysis    (pt, analysis);
    PredictionTranscript_setAdaptor     (pt, (BaseAdaptor *)pta);
    PredictionTranscript_setDbID        (pt, predictionTranscriptId);
    PredictionTranscript_setDisplayLabel(pt, displayLabel);

    Vector_addElement(pTranscripts, pt);
  }

  IDHash_free(sliceHash, NULL);
  return pTranscripts;
}
Ejemplo n.º 11
0
Vector *PredictionTranscriptAdaptor_fetchAllBySlice(PredictionTranscriptAdaptor *pta, Slice *slice, char *logicName, int loadExons) {

  //my $transcripts = $self->SUPER::fetch_all_by_Slice($slice,$logic_name);
  Vector *transcripts = BaseFeatureAdaptor_fetchAllBySlice((BaseFeatureAdaptor *)pta, slice, logicName);

  // if there are 0 or 1 transcripts still do lazy-loading
  if ( ! loadExons || Vector_getNumElement(transcripts) < 2 ) {
    return transcripts;
  }

  // preload all of the exons now, instead of lazy loading later
  // faster than 1 query per transcript

  // get extent of region spanned by transcripts
  long minStart =  2000000000;
  long maxEnd   = -2000000000;

  int i;
  for (i=0; i<Vector_getNumElement(transcripts); i++) {
    PredictionTranscript *t  = Vector_getElementAt(transcripts, i);
    if (PredictionTranscript_getSeqRegionStart((SeqFeature*)t) < minStart) {
      minStart = PredictionTranscript_getSeqRegionStart((SeqFeature*)t);
    }
    if (PredictionTranscript_getSeqRegionEnd((SeqFeature*)t) > maxEnd) {
      maxEnd = PredictionTranscript_getSeqRegionEnd((SeqFeature*)t);
    }
  }

  Slice *extSlice;

  if (minStart >= Slice_getStart(slice) && maxEnd <= Slice_getEnd(slice)) {
    extSlice = slice;
  } else {
    SliceAdaptor *sa = DBAdaptor_getSliceAdaptor(pta->dba);
    extSlice = SliceAdaptor_fetchByRegion(sa, Slice_getCoordSystemName(slice), Slice_getSeqRegionName(slice),
                                          minStart, maxEnd, Slice_getStrand(slice), CoordSystem_getVersion(Slice_getCoordSystem(slice)), 0);
  }

  // associate exon identifiers with transcripts
  IDHash *trHash = IDHash_new(IDHASH_MEDIUM);
  for (i=0; i<Vector_getNumElement(transcripts); i++) {
    PredictionTranscript *t  = Vector_getElementAt(transcripts, i);
    if ( ! IDHash_contains(trHash, PredictionTranscript_getDbID(t))) {
      IDHash_add(trHash, PredictionTranscript_getDbID(t), t);
    }
  }

  IDType *uniqueIds = IDHash_getKeys(trHash);

  char tmpStr[1024];
  char *qStr = NULL;
  if ((qStr = (char *)calloc(655500,sizeof(char))) == NULL) {
    fprintf(stderr,"Failed allocating qStr\n");
    return transcripts;
  }

  int lenNum;
  int endPoint = sprintf(qStr, "SELECT prediction_transcript_id, prediction_exon_id, exon_rank FROM prediction_exon WHERE  prediction_transcript_id IN (");
  for (i=0; i<IDHash_getNumValues(trHash); i++) {
    if (i!=0) {
      qStr[endPoint++] = ',';
      qStr[endPoint++] = ' ';
    }
    lenNum = sprintf(tmpStr,IDFMTSTR,uniqueIds[i]);
    memcpy(&(qStr[endPoint]), tmpStr, lenNum);
    endPoint+=lenNum;
  }
  qStr[endPoint++] = ')';
  qStr[endPoint] = '\0';

  free(uniqueIds);

  StatementHandle *sth = pta->prepare((BaseAdaptor *)pta,qStr,strlen(qStr));
  sth->execute(sth);

  IDHash *exTrHash = IDHash_new(IDHASH_MEDIUM);
  ResultRow *row;
  while ((row = sth->fetchRow(sth))) {
    IDType trId = row->getLongLongAt(row,0);
    IDType exId = row->getLongLongAt(row,1);
    int    rank = row->getIntAt(row,2);

    if (! IDHash_contains(exTrHash, exId)) {
      Vector *vec = Vector_new();
      Vector_setFreeFunc(vec, PredictionTranscriptRankPair_free);
      IDHash_add(exTrHash, exId, vec);
    }
    Vector *exVec = IDHash_getValue(exTrHash, exId);
    PredictionTranscriptRankPair *trp = PredictionTranscriptRankPair_new(IDHash_getValue(trHash, trId), rank);
    Vector_addElement(exVec, trp);
  }

  IDHash_free(trHash, NULL);

  sth->finish(sth);

  PredictionExonAdaptor *pea = DBAdaptor_getPredictionExonAdaptor(pta->dba);
  Vector *exons = PredictionExonAdaptor_fetchAllBySlice(pea, extSlice);

  // move exons onto transcript slice, and add them to transcripts
  for (i=0; i<Vector_getNumElement(exons); i++) {
    PredictionExon *ex = Vector_getElementAt(exons, i);

  // Perl didn't have this line - it was in GeneAdaptor version so I think I'm going to keep it
    if (!IDHash_contains(exTrHash, PredictionExon_getDbID(ex))) continue;

    PredictionExon *newEx;
    if (slice != extSlice) {
      newEx = (PredictionExon*)PredictionExon_transfer((SeqFeature*)ex, slice);
      if (newEx == NULL) {
        fprintf(stderr, "Unexpected. Exon could not be transferred onto PredictionTranscript slice.\n");
        exit(1);
      }
    } else {
      newEx = ex;
    }

    Vector *exVec = IDHash_getValue(exTrHash, PredictionExon_getDbID(newEx));
    int j;
    for (j=0; j<Vector_getNumElement(exVec); j++) {
      PredictionTranscriptRankPair *trp = Vector_getElementAt(exVec, j);
      PredictionTranscript_addExon(trp->transcript, newEx, &trp->rank);
    }
  }

  IDHash_free(exTrHash, Vector_free);
  free(qStr);

  return transcripts;
}
Ejemplo n.º 12
0
Vector *DBEntryAdaptor_fetchByObjectType(DBEntryAdaptor *dbea, IDType ensObj, char *ensType) {
  Vector *out;
  char qStr[1024];
  StatementHandle *sth;
  ResultRow *row;
  IDHash *seen;
  
  if (!ensObj) {
    fprintf(stderr,"Error: Can't fetchByObjectType without an object\n");
    exit(1);
  }

  if (!ensType) {
    fprintf(stderr,"Error: Can't fetchByObjectType without a type\n");
    exit(1);
  }

// Not sure if idt identities are right way round
  sprintf(qStr,
    "SELECT xref.xref_id, xref.dbprimary_acc, xref.display_label, xref.version,"
    "       xref.description,"
    "       exDB.db_name, exDB.db_release, exDB.status," 
    "       oxr.object_xref_id,"
    "       es.synonym," 
    "       idt.xref_identity, idt.ensembl_identity"
    " FROM  (external_db exDB, object_xref oxr, xref xref)" 
    " LEFT JOIN external_synonym es on es.xref_id = xref.xref_id"
    " LEFT JOIN identity_xref idt on idt.object_xref_id = oxr.object_xref_id"
    " WHERE  xref.xref_id = oxr.xref_id"
    "  AND  xref.external_db_id = exDB.external_db_id"
    "  AND  oxr.ensembl_id = " IDFMTSTR
    "  AND  oxr.ensembl_object_type = '%s'",
    ensObj,
    ensType);
  
  sth = dbea->prepare((BaseAdaptor *)dbea,qStr,strlen(qStr));

  sth->execute(sth);

  seen = IDHash_new(IDHASH_SMALL);
  out = Vector_new();

  while ((row = sth->fetchRow(sth))) {
    DBEntry *exDB;
    IDType refID = row->getLongLongAt(row,0);
			    
    // using an outer join on the synonyms as well as on identity_xref, we
    // now have to filter out the duplicates (see v.1.18 for
    // original). Since there is at most one identity_xref row per xref,
    // this is easy enough; all the 'extra' bits are synonyms

    if (!IDHash_contains(seen,refID))  {
      exDB = DBEntry_new();
      DBEntry_setAdaptor(exDB,(BaseAdaptor *)dbea);
      DBEntry_setDbID(exDB, refID);
      DBEntry_setPrimaryId(exDB, row->getStringAt(row,1));
      DBEntry_setDisplayId(exDB, row->getStringAt(row,2));
      DBEntry_setVersion(exDB, row->getStringAt(row,3));
      DBEntry_setDbName(exDB, row->getStringAt(row,5));
      DBEntry_setRelease(exDB, row->getStringAt(row,6));

      if (row->col(row,10)) {
        IdentityXref *idx = IdentityXref_new();
        DBEntry_setIdentityXref(exDB,idx);
	IdentityXref_setQueryIdentity(idx, row->getDoubleAt(row,10));
	IdentityXref_setTargetIdentity(idx, row->getDoubleAt(row,11));
      }
      
      if (row->col(row,4)) DBEntry_setDescription(exDB, row->getStringAt(row,4));
      if (row->col(row,7)) DBEntry_setStatus(exDB, row->getStringAt(row,7));
      
      Vector_addElement(out, exDB);
      IDHash_add(seen, refID, exDB);
    } 

    exDB = IDHash_getValue(seen, refID);

    if (row->col(row,9)) {
      DBEntry_addSynonym(exDB,row->getStringAt(row,9));
    }
  }

  IDHash_free(seen, NULL);

  sth->finish(sth);
  
  return out;
}
Ejemplo n.º 13
0
Vector *GenomicAlignAdaptor_mergeAlignsets(GenomicAlignAdaptor *gaa, Vector *alignSet1, Vector *alignSet2) {
  int i;
  Vector *bigList = Vector_new();
  IDHash *overlappingSets[2];
  Vector *mergedAligns;


  for (i=0;i<Vector_getNumElement(alignSet1); i++) {
    GenomicAlign *align = Vector_getElementAt(alignSet1, i);
    Vector_addElement(bigList, GenomicAlignListElem_new(DNAFrag_getDbID(GenomicAlign_getQueryDNAFrag(align)),
                                                        GenomicAlign_getQueryStart(align), align, 0));
    Vector_addElement(bigList, GenomicAlignListElem_new(DNAFrag_getDbID(GenomicAlign_getQueryDNAFrag(align)),
                                                        GenomicAlign_getQueryEnd(align)+0.5, align, 0));
  }

  for (i=0;i<Vector_getNumElement(alignSet2); i++) {
    GenomicAlign *align = Vector_getElementAt(alignSet2, i);
    Vector_addElement(bigList, GenomicAlignListElem_new(DNAFrag_getDbID(GenomicAlign_getConsensusDNAFrag(align)),
                                                        GenomicAlign_getConsensusStart(align), align, 1));
    Vector_addElement(bigList, GenomicAlignListElem_new(DNAFrag_getDbID(GenomicAlign_getConsensusDNAFrag(align)),
                                                        GenomicAlign_getConsensusEnd(align)+0.5, align, 1));
  }
  
  Vector_sort(bigList, GenomicAlignListElem_compFunc);

  // walking from start to end through sortlist and keep track of the 
  // currently overlapping set of Alignments
 
  overlappingSets[0] = IDHash_new(IDHASH_SMALL);
  overlappingSets[1] = IDHash_new(IDHASH_SMALL);

  mergedAligns = Vector_new();

  for (i=0; i<Vector_getNumElement(bigList); i++) {
    GenomicAlignListElem *gale  = Vector_getElementAt(bigList,i);

    GenomicAlign *align = gale->align;
    IDType alignID      = GenomicAlign_getDbID(align);
    int setNo           = gale->setNum;

    if (IDHash_contains(overlappingSets[setNo], alignID)) {
      // remove from current overlapping set
      IDHash_remove(overlappingSets[setNo], alignID, NULL);
    } else {
      int j;
      void **values = IDHash_getValues(overlappingSets[1-setNo]);

      // insert into the set and do all the overlap business
      IDHash_add(overlappingSets[setNo], alignID, align);

      // the other set contains everything this align overlaps with
      for (j=0; j<IDHash_getNumValues(overlappingSets[1-setNo]); j++) {
        GenomicAlign *align2 = values[j];
        if (setNo == 0) {
          GenomicAlignAdaptor_addDerivedAlignments(gaa, mergedAligns, align, align2);
        } else {
          GenomicAlignAdaptor_addDerivedAlignments(gaa, mergedAligns, align2, align);
        }
      }
      free(values);
    }
  }

// NIY Free gale

  return mergedAligns;
}
Ejemplo n.º 14
0
// Note I didn't implement the stable id fetching uggliness here. I'll probably make a separate method for that
// if necessary
Vector *BaseAdaptor_uncachedFetchAllByDbIDList(BaseAdaptor *ba, Vector *idList, Slice *slice) {
  if ( idList == NULL) {
    fprintf(stderr, "id_list list reference argument is required - bye!");
    return NULL;
  }
  char constraintPref[1024];
  

  if (!Vector_getNumElement(idList)) {
    return Vector_new();
  }

  NameTableType *tables = ba->getTables();
  char **t = (*tables)[0];

  sprintf(constraintPref, "%s.%s_id ", t[SYN], t[NAME] ); 

  // Ensure that we do not exceed MySQL's max_allowed_packet (defaults to
  // 1 MB) splitting large queries into smaller queries of at most 256 KB.
  // Assuming a (generous) average dbID string
  // length of 16, this means 16384 dbIDs in each query.
  int maxSize = 16384;

  // Uniquify the list
  IDHash *idListHash = IDHash_new(IDHASH_MEDIUM);

  int i;
  for (i=0; i<Vector_getNumElement(idList); i++) {
    IDType id = *(IDType *)(Vector_getElementAt(idList, i));
    if (!IDHash_contains(idListHash, id)) {
      IDHash_add(idListHash, id, &trueVal);
    }
  }

  IDType *uniqueIds = IDHash_getKeys(idListHash);
  int nUniqueId = IDHash_getNumValues(idListHash);

  IDHash_free(idListHash, NULL);

  Vector *out = Vector_new();

  int lenNum;
  for (i=0; i<nUniqueId; i+=maxSize) {
    char *constraint = NULL;

    if ((constraint = (char *)calloc(655500,sizeof(char))) == NULL) {
      fprintf(stderr,"Failed allocating constraint\n");
      return out;
    }

    strcpy(constraint, constraintPref);
  
    // Special case for one remaining Id
    if (i == nUniqueId-1) {
      sprintf(constraint, "%s = "IDFMTSTR, constraint, uniqueIds[i]);
    } else {
      char tmpStr[1024];
      int endPoint = sprintf(constraint, "%s IN (", constraint);
      int j;
      for (j=0; j<maxSize && j+i<nUniqueId; j++) {
        if (j!=0) {
          constraint[endPoint++] = ',';
          constraint[endPoint++] = ' ';
        }
        lenNum = sprintf(tmpStr, IDFMTSTR, uniqueIds[i+j]);
        memcpy(&(constraint[endPoint]), tmpStr, lenNum);
        endPoint+=lenNum;
      }
      constraint[endPoint++] = ')';
      constraint[endPoint] = '\0';
    }

    Vector *resChunk = BaseAdaptor_genericFetch(ba, constraint, NULL, slice);

    Vector_append(out, resChunk);

    Vector_free(resChunk);
    free(constraint);
  }
  free(uniqueIds);

  return out;
}