예제 #1
0
Vector *DBEntryAdaptor_fetchByObjectType(DBEntryAdaptor *dbea, IDType ensObj, char *ensType) {
  Vector *out;
  char qStr[1024];
  StatementHandle *sth;
  ResultRow *row;
  IDHash *seen;
  
  if (!ensObj) {
    fprintf(stderr,"Error: Can't fetchByObjectType without an object\n");
    exit(1);
  }

  if (!ensType) {
    fprintf(stderr,"Error: Can't fetchByObjectType without a type\n");
    exit(1);
  }

// Not sure if idt identities are right way round
  sprintf(qStr,
    "SELECT xref.xref_id, xref.dbprimary_acc, xref.display_label, xref.version,"
    "       xref.description,"
    "       exDB.db_name, exDB.db_release, exDB.status," 
    "       oxr.object_xref_id,"
    "       es.synonym," 
    "       idt.xref_identity, idt.ensembl_identity"
    " FROM  (external_db exDB, object_xref oxr, xref xref)" 
    " LEFT JOIN external_synonym es on es.xref_id = xref.xref_id"
    " LEFT JOIN identity_xref idt on idt.object_xref_id = oxr.object_xref_id"
    " WHERE  xref.xref_id = oxr.xref_id"
    "  AND  xref.external_db_id = exDB.external_db_id"
    "  AND  oxr.ensembl_id = " IDFMTSTR
    "  AND  oxr.ensembl_object_type = '%s'",
    ensObj,
    ensType);
  
  sth = dbea->prepare((BaseAdaptor *)dbea,qStr,strlen(qStr));

  sth->execute(sth);

  seen = IDHash_new(IDHASH_SMALL);
  out = Vector_new();

  while ((row = sth->fetchRow(sth))) {
    DBEntry *exDB;
    IDType refID = row->getLongLongAt(row,0);
			    
    // using an outer join on the synonyms as well as on identity_xref, we
    // now have to filter out the duplicates (see v.1.18 for
    // original). Since there is at most one identity_xref row per xref,
    // this is easy enough; all the 'extra' bits are synonyms

    if (!IDHash_contains(seen,refID))  {
      exDB = DBEntry_new();
      DBEntry_setAdaptor(exDB,(BaseAdaptor *)dbea);
      DBEntry_setDbID(exDB, refID);
      DBEntry_setPrimaryId(exDB, row->getStringAt(row,1));
      DBEntry_setDisplayId(exDB, row->getStringAt(row,2));
      DBEntry_setVersion(exDB, row->getStringAt(row,3));
      DBEntry_setDbName(exDB, row->getStringAt(row,5));
      DBEntry_setRelease(exDB, row->getStringAt(row,6));

      if (row->col(row,10)) {
        IdentityXref *idx = IdentityXref_new();
        DBEntry_setIdentityXref(exDB,idx);
	IdentityXref_setQueryIdentity(idx, row->getDoubleAt(row,10));
	IdentityXref_setTargetIdentity(idx, row->getDoubleAt(row,11));
      }
      
      if (row->col(row,4)) DBEntry_setDescription(exDB, row->getStringAt(row,4));
      if (row->col(row,7)) DBEntry_setStatus(exDB, row->getStringAt(row,7));
      
      Vector_addElement(out, exDB);
      IDHash_add(seen, refID, exDB);
    } 

    exDB = IDHash_getValue(seen, refID);

    if (row->col(row,9)) {
      DBEntry_addSynonym(exDB,row->getStringAt(row,9));
    }
  }

  IDHash_free(seen, NULL);

  sth->finish(sth);
  
  return out;
}
Vector *IntronSupportingEvidenceAdaptor_objectsFromStatementHandle(IntronSupportingEvidenceAdaptor *isea, 
                                                                   StatementHandle *sth,
                                                                   AssemblyMapper *assMapper,
                                                                   Slice *destSlice) {
  SliceAdaptor *sa     = DBAdaptor_getSliceAdaptor(isea->dba);
  AnalysisAdaptor *aa  = DBAdaptor_getAnalysisAdaptor(isea->dba);

  Vector *features = Vector_new();
  IDHash *sliceHash = IDHash_new(IDHASH_SMALL);
  
/* Unneccesary
  my %analysis_hash;
  my %sr_name_hash;
  my %sr_cs_hash;
*/
  

  
/* Unused
  my $asm_cs;
  my $cmp_cs;
  my $asm_cs_vers;
  my $asm_cs_name;
  my $cmp_cs_vers;
  my $cmp_cs_name;
  if($mapper) {
    $asm_cs = $mapper->assembled_CoordSystem();
    $cmp_cs = $mapper->component_CoordSystem();
    $asm_cs_name = $asm_cs->name();
    $asm_cs_vers = $asm_cs->version();
    $cmp_cs_name = $cmp_cs->name();
    $cmp_cs_vers = $cmp_cs->version();
  }
*/

  long         destSliceStart;
  long         destSliceEnd;
  int          destSliceStrand;
  long         destSliceLength;
  //CoordSystem *destSliceCs;
  char *       destSliceSrName;
  IDType       destSliceSrId = 0;
  //AssemblyMapperAdaptor *asma;

  if (destSlice) {
    destSliceStart  = Slice_getStart(destSlice);
    destSliceEnd    = Slice_getEnd(destSlice);
    destSliceStrand = Slice_getStrand(destSlice);
    destSliceLength = Slice_getLength(destSlice);
    //??destSliceCs     = Slice_getCoordSystem(destSlice);
    destSliceSrName = Slice_getSeqRegionName(destSlice);
    destSliceSrId   = Slice_getSeqRegionId(destSlice);
    //??asma            = DBAdaptor_getAssemblyMapperAdaptor(ea->dba);
  }

  ResultRow *row;
  while ((row = sth->fetchRow(sth))) {
    IDType id =           row->getLongLongAt(row,0);
    IDType analysisId =   row->getLongLongAt(row,1);
    IDType seqRegionId =  row->getLongLongAt(row,2);
    long seqRegionStart = row->getLongAt(row,3);
    long seqRegionEnd =   row->getLongAt(row,4);
    int seqRegionStrand = row->getIntAt(row,5);
    char *hitName =       row->getStringAt(row,6);
    double score =        row->getDoubleAt(row,7);
    char *scoreType =     row->getStringAt(row,8);
    int spliceCanonical = row->getIntAt(row,9); 

    // get the analysis object
    Analysis *analysis = AnalysisAdaptor_fetchByDbID(aa, analysisId);

/*
    // need to get the internal_seq_region, if present
    $seq_region_id = $self->get_seq_region_id_internal($seq_region_id);
    #get the slice object
    my $slice = $slice_hash{"ID:".$seq_region_id};
    if(!$slice) {
      $slice = $sa->fetch_by_seq_region_id($seq_region_id);
      $slice_hash{"ID:".$seq_region_id} = $slice;
      $sr_name_hash{$seq_region_id} = $slice->seq_region_name();
      $sr_cs_hash{$seq_region_id} = $slice->coord_system();
    }

    my $sr_name = $sr_name_hash{$seq_region_id};
    my $sr_cs   = $sr_cs_hash{$seq_region_id};
*/
    if (! IDHash_contains(sliceHash, seqRegionId)) {
      IDHash_add(sliceHash, seqRegionId, SliceAdaptor_fetchBySeqRegionId(sa, seqRegionId, POS_UNDEF, POS_UNDEF, STRAND_UNDEF));
    }
    Slice *slice = IDHash_getValue(sliceHash, seqRegionId);

    Slice *iseSlice = slice;
    
    char *srName      = Slice_getSeqRegionName(slice);
    CoordSystem *srCs = Slice_getCoordSystem(slice);

    // 
    // remap the feature coordinates to another coord system
    // if a mapper was provided
    //
    if (assMapper != NULL) {
      MapperRangeSet *mrs;

      // Slightly suspicious about need for this if statement so left in perl statements for now
      if (destSlice != NULL &&
          assMapper->objectType == CLASS_CHAINEDASSEMBLYMAPPER) {
        mrs = ChainedAssemblyMapper_map(assMapper, srName, seqRegionStart, seqRegionEnd, seqRegionStrand, srCs, 1, destSlice);
      } else {
        mrs = AssemblyMapper_fastMap(assMapper, srName, seqRegionStart, seqRegionEnd, seqRegionStrand, srCs, NULL);
      }

      // skip features that map to gaps or coord system boundaries
      //next FEATURE if (!defined($seq_region_id));
      if (MapperRangeSet_getNumRange(mrs) == 0) {
        continue;
      }
      MapperRange *range = MapperRangeSet_getRangeAt(mrs, 0);
      if (range->rangeType == MAPPERRANGE_GAP) {
        fprintf(stderr,"Got a mapper gap in gene obj_from_sth - not sure if this is allowed\n");
        exit(1);
      } else {
        MapperCoordinate *mc = (MapperCoordinate *)range;

        seqRegionId     = mc->id;
        seqRegionStart  = mc->start;
        seqRegionEnd    = mc->end;
        seqRegionStrand = mc->strand;
      }

      MapperRangeSet_free(mrs);

      
/* Was - but identical if and else so why test???
      #get a slice in the coord system we just mapped to
      if($asm_cs == $sr_cs || ($cmp_cs != $sr_cs && $asm_cs->equals($sr_cs))) {
        $slice = $slice_hash{"ID:".$seq_region_id} ||=
          $sa->fetch_by_seq_region_id($seq_region_id);
      } else {
        $slice = $slice_hash{"ID:".$seq_region_id} ||=
          $sa->fetch_by_seq_region_id($seq_region_id);
      }
*/
// Instead...
      if (! IDHash_contains(sliceHash, seqRegionId)) {
        IDHash_add(sliceHash, seqRegionId, SliceAdaptor_fetchBySeqRegionId(sa, seqRegionId, POS_UNDEF, POS_UNDEF, STRAND_UNDEF));
      }
      iseSlice = IDHash_getValue(sliceHash, seqRegionId);
    }


    //
    // If a destination slice was provided convert the coords
    // If the dest_slice starts at 1 and is foward strand, nothing needs doing
    // 
    if (destSlice != NULL) {
      if (destSliceStart != 1 || destSliceStrand != 1) {
        if (destSliceStrand == 1) {
          seqRegionStart = seqRegionStart - destSliceStart + 1;
          seqRegionEnd   = seqRegionEnd - destSliceStart + 1;
        } else {
          long tmpSeqRegionStart = seqRegionStart;
          seqRegionStart = destSliceEnd - seqRegionEnd + 1;
          seqRegionEnd   = destSliceEnd - tmpSeqRegionStart + 1;

          seqRegionStrand = -seqRegionStrand;
        }
      }
       
      // throw away features off the end of the requested slice
      if (seqRegionEnd < 1 || seqRegionStart > destSliceLength || (destSliceSrId != seqRegionId)) {
        continue;
      }
      iseSlice = destSlice;
    }
    
    IntronSupportingEvidence *ise = IntronSupportingEvidence_new();

    IntronSupportingEvidence_setStart             (ise, seqRegionStart);
    IntronSupportingEvidence_setEnd               (ise, seqRegionEnd);
    IntronSupportingEvidence_setStrand            (ise, seqRegionStrand);
    IntronSupportingEvidence_setSlice             (ise, iseSlice);
    IntronSupportingEvidence_setAnalysis          (ise, analysis);
    IntronSupportingEvidence_setAdaptor           (ise, (BaseAdaptor *)isea);
    IntronSupportingEvidence_setDbID              (ise, id);
    IntronSupportingEvidence_setHitName           (ise, hitName);
    IntronSupportingEvidence_setScore             (ise, score);
    IntronSupportingEvidence_setScoreType         (ise, scoreType);
    IntronSupportingEvidence_setIsSpliceCanonical(ise, spliceCanonical);

    Vector_addElement(features, ise);
  }
  
  return features;
}
예제 #3
0
Vector *GenomicAlignAdaptor_objectsFromStatementHandle(GenomicAlignAdaptor *gaa, StatementHandle *sth,
                                                       int reverse) {
  Vector *results = Vector_new();
  ResultRow *row;
  DNAFragAdaptor *dfa;

  IDType consensusDNAFragId;
  IDType queryDNAFragId;
  int consensusStart;
  int consensusEnd;
  int queryStart;
  int queryEnd;
  int queryStrand;
  IDType methodLinkId;
  double score;
  double percId;
  char *cigarString;

  dfa = ComparaDBAdaptor_getDNAFragAdaptor(gaa->dba);

  while ((row = sth->fetchRow(sth))) {
    GenomicAlign *genomicAlign;
    char *alignmentType;
    
    if (reverse) {
      queryDNAFragId     = row->getLongLongAt(row,0);
      queryStart         = row->getIntAt(row,1);
      queryEnd           = row->getIntAt(row,2);
      consensusDNAFragId = row->getLongLongAt(row,3);
      consensusStart     = row->getIntAt(row,4);
      consensusEnd       = row->getIntAt(row,5);
    } else {
      consensusDNAFragId = row->getLongLongAt(row,0);
      consensusStart     = row->getIntAt(row,1);
      consensusEnd       = row->getIntAt(row,2);
      queryDNAFragId     = row->getLongLongAt(row,3);
      queryStart         = row->getIntAt(row,4);
      queryEnd           = row->getIntAt(row,5);
    }
    queryStrand  = row->getIntAt(row,6);
    methodLinkId = row->getLongLongAt(row,7);
    score        = row->getDoubleAt(row,8);
    percId       = row->getDoubleAt(row,9);
    cigarString  = row->getStringAt(row,10);

    alignmentType = GenomicAlignAdaptor_alignmentTypeByMethodLinkId(gaa, methodLinkId);

    if (reverse) {
      StrUtil_strReplChrs(cigarString,"DI","ID");

      // alignment of the opposite strand
      if (queryStrand == -1) {
        cigarString = CigarStrUtil_reverse(cigarString, strlen(cigarString));
      }
    }
    
    
    genomicAlign = GenomicAlign_new();
    GenomicAlign_setAdaptor(genomicAlign, (BaseAdaptor *)gaa);
    GenomicAlign_setConsensusDNAFrag(genomicAlign, DNAFragAdaptor_fetchByDbID(dfa,consensusDNAFragId));
    GenomicAlign_setConsensusStart(genomicAlign, consensusStart);
    GenomicAlign_setConsensusEnd(genomicAlign, consensusEnd);
    GenomicAlign_setQueryDNAFrag(genomicAlign, DNAFragAdaptor_fetchByDbID(dfa,queryDNAFragId));
    GenomicAlign_setQueryStart(genomicAlign, queryStart);
    GenomicAlign_setQueryEnd(genomicAlign, queryEnd);
    GenomicAlign_setQueryStrand(genomicAlign, queryStrand);
    GenomicAlign_setAlignmentType(genomicAlign, alignmentType);
    GenomicAlign_setScore(genomicAlign, score);
    GenomicAlign_setPercentId(genomicAlign, percId);
    GenomicAlign_setCigarString(genomicAlign, cigarString);

    Vector_addElement(results, genomicAlign);
  }

  return results;
}