DBEntry *DBEntryAdaptor_fetchByDbID(DBEntryAdaptor *dbea, IDType dbID) { char qStr[512]; StatementHandle *sth; ResultRow *row; DBEntry *dbe = NULL; sprintf(qStr, "SELECT xref.xref_id, xref.dbprimary_acc, xref.display_label," " xref.version, xref.description," " exDB.db_name, exDB.db_release, es.synonym" " FROM (xref, external_db exDB)" " LEFT JOIN external_synonym es on es.xref_id = xref.xref_id" " WHERE xref.xref_id = " IDFMTSTR " AND xref.external_db_id = exDB.external_db_id", dbID); sth = dbea->prepare((BaseAdaptor *)dbea,qStr,strlen(qStr)); sth->execute(sth); // Why???? my %duplicate; while ((row = sth->fetchRow(sth))){ if (!row->col(row,0)) { fprintf(stderr,"WARNING: Got xref with no refID\n"); return NULL; } if (!dbe) { dbe = DBEntry_new(); DBEntry_setAdaptor(dbe,(BaseAdaptor *)dbea); DBEntry_setDbID(dbe, dbID); DBEntry_setPrimaryId(dbe, row->getStringAt(row,1)); DBEntry_setDisplayId(dbe, row->getStringAt(row,2)); DBEntry_setVersion(dbe, row->getStringAt(row,3)); DBEntry_setRelease(dbe, row->getStringAt(row,6)); DBEntry_setDbName(dbe, row->getStringAt(row,5)); if (row->col(row,4)) DBEntry_setDescription(dbe, row->getStringAt(row,4)); } if (row->col(row,7)) DBEntry_addSynonym(dbe, row->getStringAt(row,7)); } sth->finish(sth); return dbe; }
Vector *HomologyAdaptor_listStableIdsFromSpecies(HomologyAdaptor *ha, char *sp) { StatementHandle *sth; ResultRow *row; Vector *genes; char qStr[1024]; char *species; species = StrUtil_copyString(&species,sp,0); species = StrUtil_strReplChr(species,'_',' '); sprintf(qStr, "select distinct grm.member_stable_id " " from gene_relationship_member grm," " genome_db gd " " where gd.genome_db_id = grm.genome_db_id " " and gd.name = '%s'", species); sth = ha->prepare((BaseAdaptor *)ha, qStr, strlen(qStr)); sth->execute(sth); genes = Vector_new(); while ((row = sth->fetchRow(sth))) { char *tmpStr; Vector_addElement(genes,StrUtil_copyString(&tmpStr, row->getStringAt(row,0),0)); } sth->finish(sth); free(species); return genes; }
char *GenomicAlignAdaptor_alignmentTypeByMethodLinkId(GenomicAlignAdaptor *gaa, IDType methodLinkId) { StatementHandle *sth; ResultRow *row; char qStr[512]; char *alignmentType = NULL; int ok = 1; if (!methodLinkId) { fprintf(stderr, "Error: methodLinkId has to be defined"); ok = 0; } if (ok) { sprintf(qStr,"SELECT type FROM method_link WHERE method_link_id = " IDFMTSTR, methodLinkId); sth = gaa->prepare((BaseAdaptor *)gaa, qStr, strlen(qStr)); sth->execute(sth); if ((row = sth->fetchRow(sth))) { alignmentType = StrUtil_copyString(&alignmentType, row->getStringAt(row,0), 0); } else { fprintf(stderr,"Error: No alignmentType for " IDFMTSTR "\n",methodLinkId); ok = 0; } } if (ok) { sth->finish(sth); } // NIY switch to using passed in string return alignmentType; }
/* In perl note this is misspelled compared to normal _objs_from_sth Not sure if that's deliberate to avoid clash or just an error Obviously this isn't called through the normal path at all (just directly from within the fetch method). */ Vector *AttributeAdaptor_objectsFromStatementHandle(AttributeAdaptor *ata, StatementHandle *sth) { Vector *results = Vector_new(); ResultRow *row; // Note extra parentheses are to keep mac compiler happy while ((row = sth->fetchRow(sth))) { char *code = row->getStringAt(row, 0); char *name = row->getStringAt(row, 1); char *desc = row->getStringAt(row, 2); char *value = row->getStringAt(row, 3); Attribute *attr = Attribute_new(); Attribute_setCode(attr, code); Attribute_setName(attr, name); Attribute_setDescription(attr, desc); Attribute_setValue(attr, value); Vector_addElement(results, attr); } return results; }
Vector *HomologyAdaptor_getHomologues(HomologyAdaptor *ha, char *qStr) { StatementHandle *sth; ResultRow *row; Vector *genes; sth = ha->prepare((BaseAdaptor *)ha, qStr, strlen(qStr)); sth->execute(sth); genes = Vector_new(); while ((row = sth->fetchRow(sth))) { Homology *homol = Homology_new(); Homology_setSpecies(homol, row->getStringAt(row,1)); Homology_setStableId(homol, row->getStringAt(row,0)); Homology_setChromosome(homol, row->getStringAt(row,2)); Homology_setChrStart(homol, row->getIntAt(row,3)); Homology_setChrEnd(homol, row->getIntAt(row,4)); Vector_addElement(genes,homol); } sth->finish(sth); return genes; }
Vector *IntronSupportingEvidenceAdaptor_objectsFromStatementHandle(IntronSupportingEvidenceAdaptor *isea, StatementHandle *sth, AssemblyMapper *assMapper, Slice *destSlice) { SliceAdaptor *sa = DBAdaptor_getSliceAdaptor(isea->dba); AnalysisAdaptor *aa = DBAdaptor_getAnalysisAdaptor(isea->dba); Vector *features = Vector_new(); IDHash *sliceHash = IDHash_new(IDHASH_SMALL); /* Unneccesary my %analysis_hash; my %sr_name_hash; my %sr_cs_hash; */ /* Unused my $asm_cs; my $cmp_cs; my $asm_cs_vers; my $asm_cs_name; my $cmp_cs_vers; my $cmp_cs_name; if($mapper) { $asm_cs = $mapper->assembled_CoordSystem(); $cmp_cs = $mapper->component_CoordSystem(); $asm_cs_name = $asm_cs->name(); $asm_cs_vers = $asm_cs->version(); $cmp_cs_name = $cmp_cs->name(); $cmp_cs_vers = $cmp_cs->version(); } */ long destSliceStart; long destSliceEnd; int destSliceStrand; long destSliceLength; //CoordSystem *destSliceCs; char * destSliceSrName; IDType destSliceSrId = 0; //AssemblyMapperAdaptor *asma; if (destSlice) { destSliceStart = Slice_getStart(destSlice); destSliceEnd = Slice_getEnd(destSlice); destSliceStrand = Slice_getStrand(destSlice); destSliceLength = Slice_getLength(destSlice); //??destSliceCs = Slice_getCoordSystem(destSlice); destSliceSrName = Slice_getSeqRegionName(destSlice); destSliceSrId = Slice_getSeqRegionId(destSlice); //??asma = DBAdaptor_getAssemblyMapperAdaptor(ea->dba); } ResultRow *row; while ((row = sth->fetchRow(sth))) { IDType id = row->getLongLongAt(row,0); IDType analysisId = row->getLongLongAt(row,1); IDType seqRegionId = row->getLongLongAt(row,2); long seqRegionStart = row->getLongAt(row,3); long seqRegionEnd = row->getLongAt(row,4); int seqRegionStrand = row->getIntAt(row,5); char *hitName = row->getStringAt(row,6); double score = row->getDoubleAt(row,7); char *scoreType = row->getStringAt(row,8); int spliceCanonical = row->getIntAt(row,9); // get the analysis object Analysis *analysis = AnalysisAdaptor_fetchByDbID(aa, analysisId); /* // need to get the internal_seq_region, if present $seq_region_id = $self->get_seq_region_id_internal($seq_region_id); #get the slice object my $slice = $slice_hash{"ID:".$seq_region_id}; if(!$slice) { $slice = $sa->fetch_by_seq_region_id($seq_region_id); $slice_hash{"ID:".$seq_region_id} = $slice; $sr_name_hash{$seq_region_id} = $slice->seq_region_name(); $sr_cs_hash{$seq_region_id} = $slice->coord_system(); } my $sr_name = $sr_name_hash{$seq_region_id}; my $sr_cs = $sr_cs_hash{$seq_region_id}; */ if (! IDHash_contains(sliceHash, seqRegionId)) { IDHash_add(sliceHash, seqRegionId, SliceAdaptor_fetchBySeqRegionId(sa, seqRegionId, POS_UNDEF, POS_UNDEF, STRAND_UNDEF)); } Slice *slice = IDHash_getValue(sliceHash, seqRegionId); Slice *iseSlice = slice; char *srName = Slice_getSeqRegionName(slice); CoordSystem *srCs = Slice_getCoordSystem(slice); // // remap the feature coordinates to another coord system // if a mapper was provided // if (assMapper != NULL) { MapperRangeSet *mrs; // Slightly suspicious about need for this if statement so left in perl statements for now if (destSlice != NULL && assMapper->objectType == CLASS_CHAINEDASSEMBLYMAPPER) { mrs = ChainedAssemblyMapper_map(assMapper, srName, seqRegionStart, seqRegionEnd, seqRegionStrand, srCs, 1, destSlice); } else { mrs = AssemblyMapper_fastMap(assMapper, srName, seqRegionStart, seqRegionEnd, seqRegionStrand, srCs, NULL); } // skip features that map to gaps or coord system boundaries //next FEATURE if (!defined($seq_region_id)); if (MapperRangeSet_getNumRange(mrs) == 0) { continue; } MapperRange *range = MapperRangeSet_getRangeAt(mrs, 0); if (range->rangeType == MAPPERRANGE_GAP) { fprintf(stderr,"Got a mapper gap in gene obj_from_sth - not sure if this is allowed\n"); exit(1); } else { MapperCoordinate *mc = (MapperCoordinate *)range; seqRegionId = mc->id; seqRegionStart = mc->start; seqRegionEnd = mc->end; seqRegionStrand = mc->strand; } MapperRangeSet_free(mrs); /* Was - but identical if and else so why test??? #get a slice in the coord system we just mapped to if($asm_cs == $sr_cs || ($cmp_cs != $sr_cs && $asm_cs->equals($sr_cs))) { $slice = $slice_hash{"ID:".$seq_region_id} ||= $sa->fetch_by_seq_region_id($seq_region_id); } else { $slice = $slice_hash{"ID:".$seq_region_id} ||= $sa->fetch_by_seq_region_id($seq_region_id); } */ // Instead... if (! IDHash_contains(sliceHash, seqRegionId)) { IDHash_add(sliceHash, seqRegionId, SliceAdaptor_fetchBySeqRegionId(sa, seqRegionId, POS_UNDEF, POS_UNDEF, STRAND_UNDEF)); } iseSlice = IDHash_getValue(sliceHash, seqRegionId); } // // If a destination slice was provided convert the coords // If the dest_slice starts at 1 and is foward strand, nothing needs doing // if (destSlice != NULL) { if (destSliceStart != 1 || destSliceStrand != 1) { if (destSliceStrand == 1) { seqRegionStart = seqRegionStart - destSliceStart + 1; seqRegionEnd = seqRegionEnd - destSliceStart + 1; } else { long tmpSeqRegionStart = seqRegionStart; seqRegionStart = destSliceEnd - seqRegionEnd + 1; seqRegionEnd = destSliceEnd - tmpSeqRegionStart + 1; seqRegionStrand = -seqRegionStrand; } } // throw away features off the end of the requested slice if (seqRegionEnd < 1 || seqRegionStart > destSliceLength || (destSliceSrId != seqRegionId)) { continue; } iseSlice = destSlice; } IntronSupportingEvidence *ise = IntronSupportingEvidence_new(); IntronSupportingEvidence_setStart (ise, seqRegionStart); IntronSupportingEvidence_setEnd (ise, seqRegionEnd); IntronSupportingEvidence_setStrand (ise, seqRegionStrand); IntronSupportingEvidence_setSlice (ise, iseSlice); IntronSupportingEvidence_setAnalysis (ise, analysis); IntronSupportingEvidence_setAdaptor (ise, (BaseAdaptor *)isea); IntronSupportingEvidence_setDbID (ise, id); IntronSupportingEvidence_setHitName (ise, hitName); IntronSupportingEvidence_setScore (ise, score); IntronSupportingEvidence_setScoreType (ise, scoreType); IntronSupportingEvidence_setIsSpliceCanonical(ise, spliceCanonical); Vector_addElement(features, ise); } return features; }
/* =head2 _objs_from_sth Arg [1] : DBI:st $sth An executed DBI statement handle Arg [2] : (optional) Bio::EnsEMBL::Mapper $mapper An mapper to be used to convert contig coordinates to assembly coordinates. Arg [3] : (optional) Bio::EnsEMBL::Slice $slice A slice to map the prediction transcript to. Example : $p_transcripts = $self->_objs_from_sth($sth); Description: Creates a list of Prediction transcripts from an executed DBI statement handle. The columns retrieved via the statement handle must be in the same order as the columns defined by the _columns method. If the slice argument is provided then the the prediction transcripts will be in returned in the coordinate system of the $slice argument. Otherwise the prediction transcripts will be returned in the RawContig coordinate system. Returntype : reference to a list of Bio::EnsEMBL::PredictionTranscripts Exceptions : none Caller : superclass generic_fetch Status : Stable =cut */ Vector *PredictionTranscriptAdaptor_objectsFromStatementHandle(PredictionTranscriptAdaptor *pta, StatementHandle *sth, AssemblyMapper *assMapper, Slice *destSlice) { SliceAdaptor *sa = DBAdaptor_getSliceAdaptor(pta->dba); AnalysisAdaptor *aa = DBAdaptor_getAnalysisAdaptor(pta->dba); Vector *pTranscripts = Vector_new(); IDHash *sliceHash = IDHash_new(IDHASH_SMALL); long destSliceStart; long destSliceEnd; int destSliceStrand; long destSliceLength; char * destSliceSrName; IDType destSliceSrId = 0; if (destSlice) { destSliceStart = Slice_getStart(destSlice); destSliceEnd = Slice_getEnd(destSlice); destSliceStrand = Slice_getStrand(destSlice); destSliceLength = Slice_getLength(destSlice); destSliceSrName = Slice_getSeqRegionName(destSlice); destSliceSrId = Slice_getSeqRegionId(destSlice); } ResultRow *row; while ((row = sth->fetchRow(sth))) { IDType predictionTranscriptId = row->getLongLongAt(row,0); IDType seqRegionId = row->getLongLongAt(row,1); long seqRegionStart = row->getLongAt(row,2); long seqRegionEnd = row->getLongAt(row,3); int seqRegionStrand = row->getIntAt(row,4); IDType analysisId = row->getLongLongAt(row,5); char *displayLabel = row->getStringAt(row,6); // get the analysis object Analysis *analysis = AnalysisAdaptor_fetchByDbID(aa, analysisId); if (! IDHash_contains(sliceHash, seqRegionId)) { IDHash_add(sliceHash, seqRegionId, SliceAdaptor_fetchBySeqRegionId(sa, seqRegionId, POS_UNDEF, POS_UNDEF, STRAND_UNDEF)); } Slice *slice = IDHash_getValue(sliceHash, seqRegionId); Slice *ptSlice = slice; char *srName = Slice_getSeqRegionName(slice); CoordSystem *srCs = Slice_getCoordSystem(slice); // // remap the feature coordinates to another coord system // if a mapper was provided // if (assMapper != NULL) { MapperRangeSet *mrs; // Slightly suspicious about need for this if statement so left in perl statements for now if (destSlice != NULL && assMapper->objectType == CLASS_CHAINEDASSEMBLYMAPPER) { mrs = ChainedAssemblyMapper_map(assMapper, srName, seqRegionStart, seqRegionEnd, seqRegionStrand, srCs, 1, destSlice); } else { mrs = AssemblyMapper_fastMap(assMapper, srName, seqRegionStart, seqRegionEnd, seqRegionStrand, srCs, NULL); } // skip features that map to gaps or coord system boundaries if (MapperRangeSet_getNumRange(mrs) == 0) { continue; } MapperRange *range = MapperRangeSet_getRangeAt(mrs, 0); if (range->rangeType == MAPPERRANGE_GAP) { fprintf(stderr,"Got a mapper gap in gene obj_from_sth - not sure if this is allowed\n"); exit(1); } else { MapperCoordinate *mc = (MapperCoordinate *)range; seqRegionId = mc->id; seqRegionStart = mc->start; seqRegionEnd = mc->end; seqRegionStrand = mc->strand; } MapperRangeSet_free(mrs); if (! IDHash_contains(sliceHash, seqRegionId)) { IDHash_add(sliceHash, seqRegionId, SliceAdaptor_fetchBySeqRegionId(sa, seqRegionId, POS_UNDEF, POS_UNDEF, STRAND_UNDEF)); } ptSlice = IDHash_getValue(sliceHash, seqRegionId); } // // If a destination slice was provided convert the coords // If the dest_slice starts at 1 and is foward strand, nothing needs doing // if (destSlice != NULL) { if (destSliceStart != 1 || destSliceStrand != 1) { if (destSliceStrand == 1) { seqRegionStart = seqRegionStart - destSliceStart + 1; seqRegionEnd = seqRegionEnd - destSliceStart + 1; } else { long tmpSeqRegionStart = seqRegionStart; seqRegionStart = destSliceEnd - seqRegionEnd + 1; seqRegionEnd = destSliceEnd - tmpSeqRegionStart + 1; seqRegionStrand = -seqRegionStrand; } } // throw away features off the end of the requested slice if (seqRegionEnd < 1 || seqRegionStart > destSliceLength || (destSliceSrId != seqRegionId)) { continue; } ptSlice = destSlice; } // Finally, create the new PredictionTranscript. PredictionTranscript *pt = PredictionTranscript_new(); PredictionTranscript_setStart (pt, seqRegionStart); PredictionTranscript_setEnd (pt, seqRegionEnd); PredictionTranscript_setStrand (pt, seqRegionStrand); PredictionTranscript_setSlice (pt, ptSlice); PredictionTranscript_setAnalysis (pt, analysis); PredictionTranscript_setAdaptor (pt, (BaseAdaptor *)pta); PredictionTranscript_setDbID (pt, predictionTranscriptId); PredictionTranscript_setDisplayLabel(pt, displayLabel); Vector_addElement(pTranscripts, pt); } IDHash_free(sliceHash, NULL); return pTranscripts; }
Vector *DBEntryAdaptor_fetchByObjectType(DBEntryAdaptor *dbea, IDType ensObj, char *ensType) { Vector *out; char qStr[1024]; StatementHandle *sth; ResultRow *row; IDHash *seen; if (!ensObj) { fprintf(stderr,"Error: Can't fetchByObjectType without an object\n"); exit(1); } if (!ensType) { fprintf(stderr,"Error: Can't fetchByObjectType without a type\n"); exit(1); } // Not sure if idt identities are right way round sprintf(qStr, "SELECT xref.xref_id, xref.dbprimary_acc, xref.display_label, xref.version," " xref.description," " exDB.db_name, exDB.db_release, exDB.status," " oxr.object_xref_id," " es.synonym," " idt.xref_identity, idt.ensembl_identity" " FROM (external_db exDB, object_xref oxr, xref xref)" " LEFT JOIN external_synonym es on es.xref_id = xref.xref_id" " LEFT JOIN identity_xref idt on idt.object_xref_id = oxr.object_xref_id" " WHERE xref.xref_id = oxr.xref_id" " AND xref.external_db_id = exDB.external_db_id" " AND oxr.ensembl_id = " IDFMTSTR " AND oxr.ensembl_object_type = '%s'", ensObj, ensType); sth = dbea->prepare((BaseAdaptor *)dbea,qStr,strlen(qStr)); sth->execute(sth); seen = IDHash_new(IDHASH_SMALL); out = Vector_new(); while ((row = sth->fetchRow(sth))) { DBEntry *exDB; IDType refID = row->getLongLongAt(row,0); // using an outer join on the synonyms as well as on identity_xref, we // now have to filter out the duplicates (see v.1.18 for // original). Since there is at most one identity_xref row per xref, // this is easy enough; all the 'extra' bits are synonyms if (!IDHash_contains(seen,refID)) { exDB = DBEntry_new(); DBEntry_setAdaptor(exDB,(BaseAdaptor *)dbea); DBEntry_setDbID(exDB, refID); DBEntry_setPrimaryId(exDB, row->getStringAt(row,1)); DBEntry_setDisplayId(exDB, row->getStringAt(row,2)); DBEntry_setVersion(exDB, row->getStringAt(row,3)); DBEntry_setDbName(exDB, row->getStringAt(row,5)); DBEntry_setRelease(exDB, row->getStringAt(row,6)); if (row->col(row,10)) { IdentityXref *idx = IdentityXref_new(); DBEntry_setIdentityXref(exDB,idx); IdentityXref_setQueryIdentity(idx, row->getDoubleAt(row,10)); IdentityXref_setTargetIdentity(idx, row->getDoubleAt(row,11)); } if (row->col(row,4)) DBEntry_setDescription(exDB, row->getStringAt(row,4)); if (row->col(row,7)) DBEntry_setStatus(exDB, row->getStringAt(row,7)); Vector_addElement(out, exDB); IDHash_add(seen, refID, exDB); } exDB = IDHash_getValue(seen, refID); if (row->col(row,9)) { DBEntry_addSynonym(exDB,row->getStringAt(row,9)); } } IDHash_free(seen, NULL); sth->finish(sth); return out; }
Vector *GenomicAlignAdaptor_objectsFromStatementHandle(GenomicAlignAdaptor *gaa, StatementHandle *sth, int reverse) { Vector *results = Vector_new(); ResultRow *row; DNAFragAdaptor *dfa; IDType consensusDNAFragId; IDType queryDNAFragId; int consensusStart; int consensusEnd; int queryStart; int queryEnd; int queryStrand; IDType methodLinkId; double score; double percId; char *cigarString; dfa = ComparaDBAdaptor_getDNAFragAdaptor(gaa->dba); while ((row = sth->fetchRow(sth))) { GenomicAlign *genomicAlign; char *alignmentType; if (reverse) { queryDNAFragId = row->getLongLongAt(row,0); queryStart = row->getIntAt(row,1); queryEnd = row->getIntAt(row,2); consensusDNAFragId = row->getLongLongAt(row,3); consensusStart = row->getIntAt(row,4); consensusEnd = row->getIntAt(row,5); } else { consensusDNAFragId = row->getLongLongAt(row,0); consensusStart = row->getIntAt(row,1); consensusEnd = row->getIntAt(row,2); queryDNAFragId = row->getLongLongAt(row,3); queryStart = row->getIntAt(row,4); queryEnd = row->getIntAt(row,5); } queryStrand = row->getIntAt(row,6); methodLinkId = row->getLongLongAt(row,7); score = row->getDoubleAt(row,8); percId = row->getDoubleAt(row,9); cigarString = row->getStringAt(row,10); alignmentType = GenomicAlignAdaptor_alignmentTypeByMethodLinkId(gaa, methodLinkId); if (reverse) { StrUtil_strReplChrs(cigarString,"DI","ID"); // alignment of the opposite strand if (queryStrand == -1) { cigarString = CigarStrUtil_reverse(cigarString, strlen(cigarString)); } } genomicAlign = GenomicAlign_new(); GenomicAlign_setAdaptor(genomicAlign, (BaseAdaptor *)gaa); GenomicAlign_setConsensusDNAFrag(genomicAlign, DNAFragAdaptor_fetchByDbID(dfa,consensusDNAFragId)); GenomicAlign_setConsensusStart(genomicAlign, consensusStart); GenomicAlign_setConsensusEnd(genomicAlign, consensusEnd); GenomicAlign_setQueryDNAFrag(genomicAlign, DNAFragAdaptor_fetchByDbID(dfa,queryDNAFragId)); GenomicAlign_setQueryStart(genomicAlign, queryStart); GenomicAlign_setQueryEnd(genomicAlign, queryEnd); GenomicAlign_setQueryStrand(genomicAlign, queryStrand); GenomicAlign_setAlignmentType(genomicAlign, alignmentType); GenomicAlign_setScore(genomicAlign, score); GenomicAlign_setPercentId(genomicAlign, percId); GenomicAlign_setCigarString(genomicAlign, cigarString); Vector_addElement(results, genomicAlign); } return results; }