void GenomicAlignAdaptor_nextCig(GenomicAlignAdaptor *gaa, Vector *cigList, int *cigListPos, int *cs, int *ce, int *qs, int *qe) { int count; char type; char *cigElem; int lenElem; do { cigElem = Vector_getElementAt(cigList, *cigListPos); (*cigListPos)++; lenElem = strlen(cigElem); type = cigElem[lenElem-1]; if (type!='M' && type!='I' && type!='D') { fprintf(stderr,"Error: Cigar string format error for %s\n",cigElem); break; } if (lenElem > 1) { cigElem[lenElem-1] = '\0'; count = atol(cigElem); } else { count = 1; } switch (type) { case 'D': *qe += count; break; case 'I': *ce += count; break; case 'M': *cs = *ce + 1; *ce = *cs + count - 1; *qs = *qe + 1; *qe = *qs + count - 1; } } while (type != 'M' && *cigListPos!=Vector_getNumElement(cigList)); }
MapperRangeSet *PredictionTranscript_genomic2cDNA(PredictionTranscript *trans, int start, int end, int strand, BaseContig *contig) { Mapper *mapper; // "ids" in mapper are contigs of exons, so use the same contig that should // be attached to all of the exons... if (!contig) { Vector *translateable = PredictionTranscript_getAllTranslateableExons(trans); PredictionExon *firstExon; if (!Vector_getNumElement(translateable)) { return MapperRangeSet_new(); } firstExon = Vector_getElementAt(translateable, 0); contig = (BaseContig*)PredictionExon_getSlice(firstExon); Vector_free(translateable); } mapper = PredictionTranscript_getcDNACoordMapper(trans); return Mapper_mapCoordinates(mapper,(IDType)contig, start, end, strand, "genomic"); }
PredictionTranscript *PredictionTranscriptAdaptor_fetchByStableId(PredictionTranscriptAdaptor *pta, char *stableId) { if (stableId == NULL) { fprintf(stderr,"Error: Stable_id argument expected in PredictionTranscriptAdaptor_fetchByStableId\n"); exit(1); } NameTableType *tables = pta->getTables(); char **primTab = (*tables)[0]; char *tableSynonym = primTab[SYN]; char constraint[1024]; sprintf(constraint, "%s.display_label = '%s'", tableSynonym, stableId); Vector *pts = PredictionTranscriptAdaptor_genericFetch(pta, constraint, NULL, NULL); PredictionTranscript *pt = Vector_getElementAt(pts, 0); Vector_free(pts); // NIY: Free pts if there are more than 1 // NIY: Perl seemed to allow no pts so undef return, but I've not done that in other places eg. TranscriptAdaptor so didn't here either return pt; }
long RangeRegistry_overlapSize(RangeRegistry *registry, IDType id, long start, long end) { long overlap = 0; if ( start > end ) return 0; IDHash *regReg = RangeRegistry_getRegistry(registry); Vector *list; if (IDHash_contains(regReg, id)) { list = IDHash_getValue(regReg, id); } else { return 0; // No list for this id, so can't be any overlap } int len = Vector_getNumElement(list); if ( len == 0 ) { fprintf(stderr, "Odd have zero length list in RangeRegistry_overlapSize\n"); return 0; } int startIdx = 0; int endIdx = Vector_getNumElement(list)-1; int midIdx; CoordPair *range; // binary search the relevant pairs // helps if the list is big while ( ( endIdx - startIdx ) > 1 ) { midIdx = ( startIdx + endIdx ) >> 1; range = Vector_getElementAt(list, midIdx); if ( CoordPair_getEnd(range) < start ) { startIdx = midIdx; } else { endIdx = midIdx; } } int i; for (i=startIdx; i < len ; i++ ) { CoordPair *pRange = Vector_getElementAt(list, i); long pStart = CoordPair_getStart(pRange); long pEnd = CoordPair_getEnd(pRange); if ( pStart > end ) { break; } if ( pStart <= start && pEnd >= end ) { overlap = end - start + 1; break; } long mStart = ( start < pStart ? pStart : start ); long mEnd = ( end < pEnd ? end : pEnd ); if (mEnd - mStart >= 0) { overlap += ( mEnd - mStart + 1 ); } } return overlap; }
rc = Vector_getElementAt(rcVector,0); Vector_free(rcVector); return rc; } RepeatConsensus *RepeatConsensusAdaptor_fetchByNameAndClass(RepeatConsensusAdaptor *rca, char *name, char *class) { char constraintStr[256]; Vector *rcVector; RepeatConsensus *rc; sprintf(constraintStr,"repeat_name = \'%s\' AND repeat_class = \'%s\' limit 1", name,class); rcVector = RepeatConsensusAdaptor_genericFetch(rca, constraintStr); rc = Vector_getElementAt(rcVector,0); Vector_free(rcVector); return rc; } Vector *RepeatConsensusAdaptor_fetchByClassAndSeq(RepeatConsensusAdaptor *rca, char *class, char *seq) { Vector *result = NULL; char *constraintStr = NULL; if ((constraintStr = (char *)calloc(655500,sizeof(char))) == NULL) { fprintf(stderr,"Failed allocating constraintStr\n"); return result; }
int main(int argc, char *argv[]) { DBAdaptor * dba; StatementHandle *sth; ResultRow * row; Vector * slices; int nSlices; htsFile * out; int argNum = 1; char *inFName = NULL; char *outFName = NULL; char *dbUser = "******"; char *dbPass = NULL; int dbPort = 3306; char *dbHost = "ens-staging.internal.sanger.ac.uk"; char *dbName = "homo_sapiens_core_71_37"; char *assName = "GRCh37"; char *chrName = "1"; int flags = 0; int threads = 1; initEnsC(argc, argv); while (argNum < argc) { char *arg = argv[argNum]; char *val; // Ones without a val go here if (!strcmp(arg, "-U") || !strcmp(arg,"--ucsc_naming")) { flags |= M_UCSC_NAMING; } else { // Ones with a val go in this block if (argNum == argc-1) { Bamcov_usage(); } val = argv[++argNum]; if (!strcmp(arg, "-i") || !strcmp(arg,"--in_file")) { StrUtil_copyString(&inFName,val,0); } else if (!strcmp(arg, "-o") || !strcmp(arg,"--out_file")) { StrUtil_copyString(&outFName,val,0); } else if (!strcmp(arg, "-h") || !strcmp(arg,"--host")) { StrUtil_copyString(&dbHost,val,0); } else if (!strcmp(arg, "-p") || !strcmp(arg,"--password")) { StrUtil_copyString(&dbPass,val,0); } else if (!strcmp(arg, "-P") || !strcmp(arg,"--port")) { dbPort = atoi(val); } else if (!strcmp(arg, "-n") || !strcmp(arg,"--name")) { StrUtil_copyString(&dbName,val,0); } else if (!strcmp(arg, "-u") || !strcmp(arg,"--user")) { StrUtil_copyString(&dbUser,val,0); } else if (!strcmp(arg, "-t") || !strcmp(arg,"--threads")) { threads = atoi(val); } else if (!strcmp(arg, "-a") || !strcmp(arg,"--assembly")) { StrUtil_copyString(&assName,val,0); } else if (!strcmp(arg, "-v") || !strcmp(arg,"--verbosity")) { verbosity = atoi(val); // Temporary } else if (!strcmp(arg, "-c") || !strcmp(arg,"--chromosome")) { StrUtil_copyString(&chrName,val,0); } else { fprintf(stderr,"Error in command line at %s\n\n",arg); Bamcov_usage(); } } argNum++; } if (verbosity > 0) { printf("Program for calculating read coverage in a BAM file \n" "Steve M.J. Searle. [email protected] Last update April 2013.\n"); } if (!inFName || !outFName) { Bamcov_usage(); } dba = DBAdaptor_new(dbHost,dbUser,dbPass,dbName,dbPort,NULL); //nSlices = getSlices(dba, destName); nSlices = 1; slices = Vector_new(); SliceAdaptor *sa = DBAdaptor_getSliceAdaptor(dba); Slice *slice = SliceAdaptor_fetchByRegion(sa,NULL,chrName,POS_UNDEF,POS_UNDEF,1,NULL, 0); Vector_addElement(slices,slice); if (Vector_getNumElement(slices) == 0) { fprintf(stderr, "Error: No slices.\n"); exit(1); } htsFile *in = hts_open(inFName, "rb"); if (in == 0) { fprintf(stderr, "Fail to open BAM file %s\n", inFName); return 1; } hts_set_threads(in, threads); hts_idx_t *idx; idx = bam_index_load(inFName); // load BAM index if (idx == 0) { fprintf(stderr, "BAM index file is not available.\n"); return 1; } int i; for (i=0; i<Vector_getNumElement(slices); i++) { Slice *slice = Vector_getElementAt(slices,i); if (verbosity > 0) printf("Working on '%s'\n",Slice_getName(slice)); // if (verbosity > 0) printf("Stage 1 - retrieving annotation from database\n"); // Vector *genes = getGenes(slice, flags); if (verbosity > 0) printf("Stage 1 - calculating coverage\n"); calcCoverage(inFName, slice, in, idx, flags); } hts_idx_destroy(idx); hts_close(in); if (verbosity > 0) printf("Done\n"); return 0; }
int calcCoverage(char *fName, Slice *slice, htsFile *in, hts_idx_t *idx, int flags) { int ref; int begRange; int endRange; char region[1024]; char region_name[512]; if (Slice_getChrStart(slice) != 1) { fprintf(stderr, "Currently only allow a slice start position of 1\n"); return 1; } if (flags & M_UCSC_NAMING) { sprintf(region,"chr%s", Slice_getSeqRegionName(slice)); } else { sprintf(region,"%s", Slice_getSeqRegionName(slice)); } bam_hdr_t *header = bam_hdr_init(); header = bam_hdr_read(in->fp.bgzf); ref = bam_name2id(header, region); if (ref < 0) { fprintf(stderr, "Invalid region %s\n", region); exit(1); } sprintf(region,"%s:%ld-%ld", region_name, Slice_getSeqRegionStart(slice), Slice_getSeqRegionEnd(slice)); if (hts_parse_reg(region, &begRange, &endRange) == NULL) { fprintf(stderr, "Could not parse %s\n", region); exit(2); } bam_hdr_destroy(header); hts_itr_t *iter = sam_itr_queryi(idx, ref, begRange, endRange); bam1_t *b = bam_init1(); Coverage *coverage = calloc(Slice_getLength(slice),sizeof(Coverage)); long counter = 0; long overlapping = 0; long bad = 0; int startIndex = 0; while (bam_itr_next(in, iter, b) >= 0) { if (b->core.flag & (BAM_FUNMAP | BAM_FSECONDARY | BAM_FQCFAIL | BAM_FDUP)) { bad++; continue; } int end; //end = bam_calend(&b->core, bam1_cigar(b)); end = bam_endpos(b); // There is a special case for reads which have zero length and start at begRange (so end at begRange ie. before the first base we're interested in). // That is the reason for the || end == begRange test if (end == begRange) { continue; } counter++; if (!(counter%1000000)) { if (verbosity > 1) { printf("."); } fflush(stdout); } // Remember: b->core.pos is zero based! int cigInd; int refPos; int readPos; uint32_t *cigar = bam_get_cigar(b); for (cigInd = readPos = 0, refPos = b->core.pos; cigInd < b->core.n_cigar; ++cigInd) { int k; int lenCigBlock = cigar[cigInd]>>4; int op = cigar[cigInd]&0xf; if (op == BAM_CMATCH || op == BAM_CEQUAL || op == BAM_CDIFF) { for (k = 0; k < lenCigBlock; ++k) { //if (ref[refPos+k] == 0) break; // out of boundary coverage[refPos+k].coverage++; } if (k < lenCigBlock) break; refPos += lenCigBlock; readPos += lenCigBlock; } else if (op == BAM_CDEL) { for (k = 0; k < lenCigBlock; ++k) { // if (ref[refPos+k] == 0) break; coverage[refPos+k].coverage++; } if (k < lenCigBlock) break; refPos += lenCigBlock; } else if (op == BAM_CSOFT_CLIP) { readPos += lenCigBlock; } else if (op == BAM_CHARD_CLIP) { } else if (op == BAM_CINS) { readPos += lenCigBlock; } else if (op == BAM_CREF_SKIP) { refPos += lenCigBlock; } } #ifdef DONE int j; int done = 0; int hadOverlap = 0; for (j=startIndex; j < Vector_getNumElement(genes) && !done; j++) { Gene *gene = Vector_getElementAt(genes,j); if (!gene) { continue; } // Remember: b->core.pos is zero based! if (b->core.pos < Gene_getEnd(gene) && end >= Gene_getStart(gene)) { int k; int doneGene = 0; for (k=0; k<Gene_getTranscriptCount(gene) && !doneGene; k++) { Transcript *trans = Gene_getTranscriptAt(gene,k); if (b->core.pos < Transcript_getEnd(trans) && end >= Transcript_getStart(trans)) { int m; for (m=0; m<Transcript_getExonCount(trans) && !doneGene; m++) { Exon *exon = Transcript_getExonAt(trans,m); if (b->core.pos < Exon_getEnd(exon) && end >= Exon_getStart(exon)) { // Only count as overlapping once (could be that a read overlaps more than one gene) if (!hadOverlap) { overlapping++; hadOverlap = 1; } gs = IDHash_getValue(geneCountsHash, Gene_getDbID(gene)); gs->score++; doneGene = 1; } } } } } else if (Gene_getStart(gene) > end) { done = 1; } else if (Gene_getEnd(gene) < b->core.pos+1) { gs = IDHash_getValue(geneCountsHash, Gene_getDbID(gene)); printf("Gene %s (%s) score %ld\n",Gene_getStableId(gene), Gene_getDisplayXref(gene) ? DBEntry_getDisplayId(Gene_getDisplayXref(gene)) : "", gs->score); if (verbosity > 1) { printf("Removing gene %s (index %d) with extent %d to %d\n", Gene_getStableId(gene), gs->index, Gene_getStart(gene), Gene_getEnd(gene)); } Vector_setElementAt(genes,j,NULL); // Magic (very important for speed) - move startIndex to first non null gene int n; startIndex = 0; for (n=0;n<Vector_getNumElement(genes);n++) { void *v = Vector_getElementAt(genes,n); if (v != NULL) { break; } startIndex++; } if (verbosity > 1) { printf("startIndex now %d\n",startIndex); } } } #endif } if (verbosity > 1) { printf("\n"); } #ifdef DONE // Print out read counts for what ever's left in the genes array int n; for (n=0;n<Vector_getNumElement(genes);n++) { Gene *gene = Vector_getElementAt(genes,n); if (gene != NULL) { gs = IDHash_getValue(geneCountsHash, Gene_getDbID(gene)); printf("Gene %s (%s) score %ld\n",Gene_getStableId(gene), Gene_getDisplayXref(gene) ? DBEntry_getDisplayId(Gene_getDisplayXref(gene)) : "", gs->score); } } #endif printf("Read %ld reads. Number of bad reads (unmapped, qc fail, secondary, dup) %ld\n", counter, bad); long i; for (i=0; i< Slice_getLength(slice); i++) { printf("%ld %ld\n", i+1, coverage[i].coverage); } sam_itr_destroy(iter); bam_destroy1(b); return 1; }
Vector *PredictionTranscriptAdaptor_fetchAllBySlice(PredictionTranscriptAdaptor *pta, Slice *slice, char *logicName, int loadExons) { //my $transcripts = $self->SUPER::fetch_all_by_Slice($slice,$logic_name); Vector *transcripts = BaseFeatureAdaptor_fetchAllBySlice((BaseFeatureAdaptor *)pta, slice, logicName); // if there are 0 or 1 transcripts still do lazy-loading if ( ! loadExons || Vector_getNumElement(transcripts) < 2 ) { return transcripts; } // preload all of the exons now, instead of lazy loading later // faster than 1 query per transcript // get extent of region spanned by transcripts long minStart = 2000000000; long maxEnd = -2000000000; int i; for (i=0; i<Vector_getNumElement(transcripts); i++) { PredictionTranscript *t = Vector_getElementAt(transcripts, i); if (PredictionTranscript_getSeqRegionStart((SeqFeature*)t) < minStart) { minStart = PredictionTranscript_getSeqRegionStart((SeqFeature*)t); } if (PredictionTranscript_getSeqRegionEnd((SeqFeature*)t) > maxEnd) { maxEnd = PredictionTranscript_getSeqRegionEnd((SeqFeature*)t); } } Slice *extSlice; if (minStart >= Slice_getStart(slice) && maxEnd <= Slice_getEnd(slice)) { extSlice = slice; } else { SliceAdaptor *sa = DBAdaptor_getSliceAdaptor(pta->dba); extSlice = SliceAdaptor_fetchByRegion(sa, Slice_getCoordSystemName(slice), Slice_getSeqRegionName(slice), minStart, maxEnd, Slice_getStrand(slice), CoordSystem_getVersion(Slice_getCoordSystem(slice)), 0); } // associate exon identifiers with transcripts IDHash *trHash = IDHash_new(IDHASH_MEDIUM); for (i=0; i<Vector_getNumElement(transcripts); i++) { PredictionTranscript *t = Vector_getElementAt(transcripts, i); if ( ! IDHash_contains(trHash, PredictionTranscript_getDbID(t))) { IDHash_add(trHash, PredictionTranscript_getDbID(t), t); } } IDType *uniqueIds = IDHash_getKeys(trHash); char tmpStr[1024]; char *qStr = NULL; if ((qStr = (char *)calloc(655500,sizeof(char))) == NULL) { fprintf(stderr,"Failed allocating qStr\n"); return transcripts; } int lenNum; int endPoint = sprintf(qStr, "SELECT prediction_transcript_id, prediction_exon_id, exon_rank FROM prediction_exon WHERE prediction_transcript_id IN ("); for (i=0; i<IDHash_getNumValues(trHash); i++) { if (i!=0) { qStr[endPoint++] = ','; qStr[endPoint++] = ' '; } lenNum = sprintf(tmpStr,IDFMTSTR,uniqueIds[i]); memcpy(&(qStr[endPoint]), tmpStr, lenNum); endPoint+=lenNum; } qStr[endPoint++] = ')'; qStr[endPoint] = '\0'; free(uniqueIds); StatementHandle *sth = pta->prepare((BaseAdaptor *)pta,qStr,strlen(qStr)); sth->execute(sth); IDHash *exTrHash = IDHash_new(IDHASH_MEDIUM); ResultRow *row; while ((row = sth->fetchRow(sth))) { IDType trId = row->getLongLongAt(row,0); IDType exId = row->getLongLongAt(row,1); int rank = row->getIntAt(row,2); if (! IDHash_contains(exTrHash, exId)) { Vector *vec = Vector_new(); Vector_setFreeFunc(vec, PredictionTranscriptRankPair_free); IDHash_add(exTrHash, exId, vec); } Vector *exVec = IDHash_getValue(exTrHash, exId); PredictionTranscriptRankPair *trp = PredictionTranscriptRankPair_new(IDHash_getValue(trHash, trId), rank); Vector_addElement(exVec, trp); } IDHash_free(trHash, NULL); sth->finish(sth); PredictionExonAdaptor *pea = DBAdaptor_getPredictionExonAdaptor(pta->dba); Vector *exons = PredictionExonAdaptor_fetchAllBySlice(pea, extSlice); // move exons onto transcript slice, and add them to transcripts for (i=0; i<Vector_getNumElement(exons); i++) { PredictionExon *ex = Vector_getElementAt(exons, i); // Perl didn't have this line - it was in GeneAdaptor version so I think I'm going to keep it if (!IDHash_contains(exTrHash, PredictionExon_getDbID(ex))) continue; PredictionExon *newEx; if (slice != extSlice) { newEx = (PredictionExon*)PredictionExon_transfer((SeqFeature*)ex, slice); if (newEx == NULL) { fprintf(stderr, "Unexpected. Exon could not be transferred onto PredictionTranscript slice.\n"); exit(1); } } else { newEx = ex; } Vector *exVec = IDHash_getValue(exTrHash, PredictionExon_getDbID(newEx)); int j; for (j=0; j<Vector_getNumElement(exVec); j++) { PredictionTranscriptRankPair *trp = Vector_getElementAt(exVec, j); PredictionTranscript_addExon(trp->transcript, newEx, &trp->rank); } } IDHash_free(exTrHash, Vector_free); free(qStr); return transcripts; }
// Note I didn't implement the stable id fetching uggliness here. I'll probably make a separate method for that // if necessary Vector *BaseAdaptor_uncachedFetchAllByDbIDList(BaseAdaptor *ba, Vector *idList, Slice *slice) { if ( idList == NULL) { fprintf(stderr, "id_list list reference argument is required - bye!"); return NULL; } char constraintPref[1024]; if (!Vector_getNumElement(idList)) { return Vector_new(); } NameTableType *tables = ba->getTables(); char **t = (*tables)[0]; sprintf(constraintPref, "%s.%s_id ", t[SYN], t[NAME] ); // Ensure that we do not exceed MySQL's max_allowed_packet (defaults to // 1 MB) splitting large queries into smaller queries of at most 256 KB. // Assuming a (generous) average dbID string // length of 16, this means 16384 dbIDs in each query. int maxSize = 16384; // Uniquify the list IDHash *idListHash = IDHash_new(IDHASH_MEDIUM); int i; for (i=0; i<Vector_getNumElement(idList); i++) { IDType id = *(IDType *)(Vector_getElementAt(idList, i)); if (!IDHash_contains(idListHash, id)) { IDHash_add(idListHash, id, &trueVal); } } IDType *uniqueIds = IDHash_getKeys(idListHash); int nUniqueId = IDHash_getNumValues(idListHash); IDHash_free(idListHash, NULL); Vector *out = Vector_new(); int lenNum; for (i=0; i<nUniqueId; i+=maxSize) { char *constraint = NULL; if ((constraint = (char *)calloc(655500,sizeof(char))) == NULL) { fprintf(stderr,"Failed allocating constraint\n"); return out; } strcpy(constraint, constraintPref); // Special case for one remaining Id if (i == nUniqueId-1) { sprintf(constraint, "%s = "IDFMTSTR, constraint, uniqueIds[i]); } else { char tmpStr[1024]; int endPoint = sprintf(constraint, "%s IN (", constraint); int j; for (j=0; j<maxSize && j+i<nUniqueId; j++) { if (j!=0) { constraint[endPoint++] = ','; constraint[endPoint++] = ' '; } lenNum = sprintf(tmpStr, IDFMTSTR, uniqueIds[i+j]); memcpy(&(constraint[endPoint]), tmpStr, lenNum); endPoint+=lenNum; } constraint[endPoint++] = ')'; constraint[endPoint] = '\0'; } Vector *resChunk = BaseAdaptor_genericFetch(ba, constraint, NULL, slice); Vector_append(out, resChunk); Vector_free(resChunk); free(constraint); } free(uniqueIds); return out; }
int dumpGenes(Vector *genes, int withSupport) { FILE *fp = stderr; int i; int failed = 0; for (i=0;i<Vector_getNumElement(genes) && !failed;i++) { Gene *g = Vector_getElementAt(genes,i); fprintf(fp,"Gene %s (%s) coords: %ld %ld %d\n",Gene_getStableId(g),(Gene_getDisplayXref(g) ? DBEntry_getDisplayId(Gene_getDisplayXref(g)) : ""),Gene_getStart(g),Gene_getEnd(g),Gene_getStrand(g)); int j; for (j=0;j<Gene_getTranscriptCount(g);j++) { Transcript *t = Gene_getTranscriptAt(g,j); int k; fprintf(fp," Trans %s coords: %ld %ld %d biotype: %s\n",Transcript_getStableId(t), Transcript_getStart(t),Transcript_getEnd(t),Transcript_getStrand(t),Transcript_getBiotype(t)); if (withSupport) { Vector *support = Transcript_getAllSupportingFeatures(t); for (k=0; k<Vector_getNumElement(support); k++) { BaseAlignFeature *baf = Vector_getElementAt(support, k); fprintf(fp," support %s coords: %ld %ld %d\n", BaseAlignFeature_getHitSeqName(baf), BaseAlignFeature_getStart(baf), BaseAlignFeature_getEnd(baf), BaseAlignFeature_getStrand(baf)); } Vector *intronSupport = Transcript_getAllIntronSupportingEvidence(t); for (k=0; k<Vector_getNumElement(intronSupport); k++) { IntronSupportingEvidence *ise = Vector_getElementAt(intronSupport, k); fprintf(fp," intron support %s coords: %ld %ld %d\n", IntronSupportingEvidence_getHitName(ise), IntronSupportingEvidence_getStart(ise), IntronSupportingEvidence_getEnd(ise), IntronSupportingEvidence_getStrand(ise)); } } for (k=0;k<Transcript_getExonCount(t);k++) { Exon *e = Transcript_getExonAt(t,k); fprintf(fp," exon %s (%p) coords: %ld %ld %d\n",Exon_getStableId(e), e, Exon_getStart(e), Exon_getEnd(e), Exon_getStrand(e)); if (withSupport) { Vector *support = Exon_getAllSupportingFeatures(e); int m; for (m=0; m<Vector_getNumElement(support); m++) { BaseAlignFeature *baf = Vector_getElementAt(support, m); fprintf(fp," support %s coords: %ld %ld %d\n", BaseAlignFeature_getHitSeqName(baf), BaseAlignFeature_getStart(baf), BaseAlignFeature_getEnd(baf), BaseAlignFeature_getStrand(baf)); } } } Translation *tln = Transcript_getTranslation(t); if (tln) { fprintf(fp," translation id: %s %s %d %s %d\n",Translation_getStableId(tln), Exon_getStableId(Translation_getStartExon(tln)), Translation_getStart(tln), Exon_getStableId(Translation_getEndExon(tln)), Translation_getEnd(tln)); char *tSeq = Transcript_translate(t); fprintf(fp," translation: %s\n",tSeq); free(tSeq); Vector *tlnAttribs = Translation_getAllAttributes(tln, NULL); if (Vector_getNumElement(tlnAttribs)) { fprintf(fp, " translation attributes:\n"); int n; for (n=0; n<Vector_getNumElement(tlnAttribs); n++) { Attribute *attrib = Vector_getElementAt(tlnAttribs, n); fprintf(fp, " code %s name %s desc %s value %s\n", Attribute_getCode(attrib), Attribute_getName(attrib), Attribute_getDescription(attrib), Attribute_getValue(attrib)); } } } } } return failed; }
int main(int argc, char *argv[]) { DBAdaptor *dba; GeneAdaptor *ga; Slice *slice = NULL; Vector *genes = NULL; int i = 0; int failed = 0; initEnsC(argc, argv); // ProcUtil_showBacktrace(EnsC_progName); dba = Test_initROEnsDB(); slice = Test_getStandardSlice(dba); // DBAdaptor *seqdba = DBAdaptor_new("genebuild6.internal.sanger.ac.uk","ensadmin","ensembl","steve_chicken_rnaseq_missing_reference",3306,NULL); // dba = DBAdaptor_new("genebuild1.internal.sanger.ac.uk","ensadmin","ensembl","steve_chicken_rnaseq_missing_refined",3306,seqdba); ok(1, slice!=NULL); ga = DBAdaptor_getGeneAdaptor(dba); SliceAdaptor *sa = DBAdaptor_getSliceAdaptor(dba); ok(2, ga!=NULL); slice = SliceAdaptor_fetchByRegion(sa,"chromosome","20",10000000,50000000,1,NULL,0); // slice = SliceAdaptor_fetchByRegion(sa,"chromosome","17",1000000,5000000,1,NULL,0); // slice = SliceAdaptor_fetchByRegion(sa,"chromosome","17",1,5000000,1,NULL,0); // Has a seleno // slice = SliceAdaptor_fetchByRegion(sa,"chromosome","1",1000000,27000000,1,NULL,0); // slice = SliceAdaptor_fetchByRegion(sa,"chromosome","MT",1,17000,1,NULL,0); genes = Slice_getAllGenes(slice, NULL, NULL, 1, NULL, NULL); fprintf(stdout, "Have %d genes\n", Vector_getNumElement(genes)); ok(3, genes!=NULL); ok(4, Vector_getNumElement(genes)!=0); failed = dumpGenes(genes, 1); ok(5, !failed); //Vector *toplevelSlices = SliceAdaptor_fetchAll(sa, "toplevel", NULL, 0); Vector *toplevelSlices = SliceAdaptor_fetchAll(sa, "chromosome", NULL, 0); for (i=0;i<Vector_getNumElement(toplevelSlices) && !failed;i++) { Slice *tlSlice = Vector_getElementAt(toplevelSlices, i); fprintf(stderr, "Slice %s\n", Slice_getName(tlSlice)); genes = Slice_getAllGenes(tlSlice, NULL, NULL, 1, NULL, NULL); fprintf(stderr, "Got %d genes on %s\n", Vector_getNumElement(genes), Slice_getName(tlSlice)); failed = dumpGenes(genes, 0); } //tc_malloc_stats(); fprintf(stderr,"\nEcostring table stats:\n"); EcoString_getInfo(ecoSTable); fprintf(stderr,"\n"); ProcUtil_timeInfo("at end of GeneTest"); return 0; }
IDType DBEntryAdaptor_store(DBEntryAdaptor *dbea, DBEntry *exObj, IDType ensObject, char *ensType, int ignoreRelease) { fprintf(stderr,"DBEntryAdaptor_store does not implement ignoreRelease functionality yet\n"); char qStr[512]; StatementHandle *sth; ResultRow *row; IDType dbRef; IDType dbX; // // Check for the existance of the external_db, throw if it does not exist // sprintf(qStr, "SELECT external_db_id" " FROM external_db" " WHERE db_name = '%s'" " AND db_release = %s", DBEntry_getDbName(exObj), DBEntry_getRelease(exObj)); sth = dbea->prepare((BaseAdaptor *)dbea,qStr,strlen(qStr)); sth->execute(sth); row = sth->fetchRow(sth); if( row == NULL ) { sth->finish(sth); fprintf(stderr,"Error: external_db [%s] release [%s] does not exist\n", DBEntry_getDbName(exObj), DBEntry_getRelease(exObj)); exit(1); } dbRef = row->getLongLongAt(row,0); sth->finish(sth); // // Check for the existance of the external reference, add it if not present // sprintf(qStr, "SELECT xref_id" " FROM xref" " WHERE external_db_id = " IDFMTSTR " AND dbprimary_acc = '%s'" " AND version = %s", dbRef, DBEntry_getPrimaryId(exObj), DBEntry_getVersion(exObj)); sth = dbea->prepare((BaseAdaptor *)dbea,qStr,strlen(qStr)); sth->execute(sth); row = sth->fetchRow(sth); if (row != NULL) { dbX = row->getLongLongAt(row,0); sth->finish(sth); } else { // // store the new xref // // First finish the old sth sth->finish(sth); // NIY Handling NULL values sprintf(qStr, "INSERT ignore INTO xref" " SET dbprimary_acc = '%s'," " display_label = '%s'," " version = %s," " description = '%s'," " external_db_id = " IDFMTSTR, DBEntry_getPrimaryId(exObj), DBEntry_getDisplayId(exObj), DBEntry_getVersion(exObj), DBEntry_getDescription(exObj), dbRef ); sth = dbea->prepare((BaseAdaptor *)dbea,qStr,strlen(qStr)); sth->execute(sth); dbX = sth->getInsertId(sth); sth->finish(sth); // // store the synonyms for the new xref // if (DBEntry_getAllSynonyms(exObj)) { StatementHandle *checkSth; StatementHandle *storeSth; int i; Vector *synonyms; sprintf(qStr, "SELECT xref_id, synonym" " FROM external_synonym" " WHERE xref_id = %" IDFMTSTR " AND synonym = '%%s'"); checkSth = dbea->prepare((BaseAdaptor *)dbea,qStr,strlen(qStr)); sprintf(qStr, "INSERT ignore INTO external_synonym" " SET xref_id = %" IDFMTSTR ", synonym = '%%s'"); storeSth = dbea->prepare((BaseAdaptor *)dbea,qStr,strlen(qStr)); synonyms = DBEntry_getAllSynonyms(exObj); for (i=0;i<Vector_getNumElement(synonyms); i++) { char *syn = Vector_getElementAt(synonyms,i); checkSth->execute(checkSth, dbX, syn); row = checkSth->fetchRow(checkSth); if (!row) { storeSth->execute(storeSth, dbX, syn); } } checkSth->finish(checkSth); storeSth->finish(storeSth); } } // // check if the object mapping was already stored // sprintf(qStr, "SELECT xref_id" " FROM object_xref" " WHERE xref_id = " IDFMTSTR " AND ensembl_object_type = '%s'" " AND ensembl_id = " IDFMTSTR, dbX, ensType, ensObject); sth = dbea->prepare((BaseAdaptor *)dbea,qStr,strlen(qStr)); sth->execute(sth); row = sth->fetchRow(sth); // NOTE row will be invalid after this call but will still // indicate whether something was found sth->finish(sth); if (!row) { IDType Xidt; // // Store the reference to the internal ensembl object // sprintf(qStr, "INSERT ignore INTO object_xref" " SET xref_id = " IDFMTSTR "," " ensembl_object_type = '%s'," " ensembl_id = " IDFMTSTR, dbX, ensType, ensObject); sth = dbea->prepare((BaseAdaptor *)dbea,qStr,strlen(qStr)); sth->execute(sth); DBEntry_setDbID(exObj, dbX); DBEntry_setAdaptor(exObj, (BaseAdaptor *)dbea); Xidt = sth->getInsertId(sth); // // If this is an IdentityXref need to store in that table too // if (DBEntry_getIdentityXref(exObj)) { IdentityXref *idx = DBEntry_getIdentityXref(exObj); sprintf(qStr, "INSERT ignore INTO identity_xref" " SET object_xref_id = " IDFMTSTR "," " query_identity = %f," " target_identity = %f", Xidt, IdentityXref_getQueryIdentity(idx), IdentityXref_getTargetIdentity(idx)); sth = dbea->prepare((BaseAdaptor *)dbea,qStr,strlen(qStr)); sth->execute(sth); sth->finish(sth); } } return dbX; }
void GenomicAlignAdaptor_store(GenomicAlignAdaptor *gaa, Vector *genomicAligns) { int ok = 1; char *qStr = NULL; StatementHandle *sth; char commaStr[2] = {'\0','\0'}; int i; char *tmpStr = NULL; if ((tmpStr = (char *)calloc(65556,sizeof(char))) == NULL) { fprintf(stderr,"Failed allocating tmpStr\n"); ok = 0; } if (ok) { StrUtil_copyString(&qStr, "INSERT INTO genomic_align_block" " (consensus_dnafrag_id, consensus_start, consensus_end," " query_dnafrag_id, query_start, query_end, query_strand, method_link_id," " score, perc_id, cigar_line) VALUES ",0); for (i=0; i<Vector_getNumElement(genomicAligns); i++) { GenomicAlign *ga = Vector_getElementAt(genomicAligns,i); DNAFrag *consDNAFrag = GenomicAlign_getConsensusDNAFrag(ga); DNAFrag *queryDNAFrag = GenomicAlign_getQueryDNAFrag(ga); // check that everything has dbIDs if (!DNAFrag_getDbID(consDNAFrag) || !DNAFrag_getDbID(queryDNAFrag)) { fprintf(stderr, "Error: dna_fragment in GenomicAlign is not in DB\n"); ok = 0; break; } } } GenomicAlign *ga = NULL; DNAFrag *consDNAFrag = NULL; DNAFrag *queryDNAFrag = NULL; IDType methodLinkId = 0; if (ok) { // all clear for storing for (i=0; i<Vector_getNumElement(genomicAligns); i++) { ga = Vector_getElementAt(genomicAligns,i); consDNAFrag = GenomicAlign_getConsensusDNAFrag(ga); queryDNAFrag = GenomicAlign_getQueryDNAFrag(ga); methodLinkId = GenomicAlignAdaptor_methodLinkIdByAlignmentType(gaa, GenomicAlign_getAlignmentType(ga)); if (!methodLinkId) { fprintf(stderr, "Error: There is no method_link with this type [%s] in the DB.\n", GenomicAlign_getAlignmentType(ga)); ok = 0; break; } } if (ok) { sprintf(tmpStr," %s(" IDFMTSTR ", %d, %d, " IDFMTSTR ", %d, %d, %d, " IDFMTSTR ", %f, %f, '%s')", commaStr, DNAFrag_getDbID(consDNAFrag), GenomicAlign_getConsensusStart(ga), GenomicAlign_getConsensusEnd(ga), DNAFrag_getDbID(queryDNAFrag), GenomicAlign_getQueryStart(ga), GenomicAlign_getQueryEnd(ga), GenomicAlign_getQueryStrand(ga), methodLinkId, GenomicAlign_getScore(ga), GenomicAlign_getPercentId(ga), GenomicAlign_getCigarString(ga)); qStr = StrUtil_appendString(qStr, tmpStr); commaStr[0] = ','; } sth = gaa->prepare((BaseAdaptor *)gaa, qStr, strlen(qStr)); sth->execute(sth); sth->finish(sth); } if (qStr) free(qStr); if (tmpStr) free(tmpStr); }
Vector *GenomicAlignAdaptor_mergeAlignsets(GenomicAlignAdaptor *gaa, Vector *alignSet1, Vector *alignSet2) { int i; Vector *bigList = Vector_new(); IDHash *overlappingSets[2]; Vector *mergedAligns; for (i=0;i<Vector_getNumElement(alignSet1); i++) { GenomicAlign *align = Vector_getElementAt(alignSet1, i); Vector_addElement(bigList, GenomicAlignListElem_new(DNAFrag_getDbID(GenomicAlign_getQueryDNAFrag(align)), GenomicAlign_getQueryStart(align), align, 0)); Vector_addElement(bigList, GenomicAlignListElem_new(DNAFrag_getDbID(GenomicAlign_getQueryDNAFrag(align)), GenomicAlign_getQueryEnd(align)+0.5, align, 0)); } for (i=0;i<Vector_getNumElement(alignSet2); i++) { GenomicAlign *align = Vector_getElementAt(alignSet2, i); Vector_addElement(bigList, GenomicAlignListElem_new(DNAFrag_getDbID(GenomicAlign_getConsensusDNAFrag(align)), GenomicAlign_getConsensusStart(align), align, 1)); Vector_addElement(bigList, GenomicAlignListElem_new(DNAFrag_getDbID(GenomicAlign_getConsensusDNAFrag(align)), GenomicAlign_getConsensusEnd(align)+0.5, align, 1)); } Vector_sort(bigList, GenomicAlignListElem_compFunc); // walking from start to end through sortlist and keep track of the // currently overlapping set of Alignments overlappingSets[0] = IDHash_new(IDHASH_SMALL); overlappingSets[1] = IDHash_new(IDHASH_SMALL); mergedAligns = Vector_new(); for (i=0; i<Vector_getNumElement(bigList); i++) { GenomicAlignListElem *gale = Vector_getElementAt(bigList,i); GenomicAlign *align = gale->align; IDType alignID = GenomicAlign_getDbID(align); int setNo = gale->setNum; if (IDHash_contains(overlappingSets[setNo], alignID)) { // remove from current overlapping set IDHash_remove(overlappingSets[setNo], alignID, NULL); } else { int j; void **values = IDHash_getValues(overlappingSets[1-setNo]); // insert into the set and do all the overlap business IDHash_add(overlappingSets[setNo], alignID, align); // the other set contains everything this align overlaps with for (j=0; j<IDHash_getNumValues(overlappingSets[1-setNo]); j++) { GenomicAlign *align2 = values[j]; if (setNo == 0) { GenomicAlignAdaptor_addDerivedAlignments(gaa, mergedAligns, align, align2); } else { GenomicAlignAdaptor_addDerivedAlignments(gaa, mergedAligns, align2, align); } } free(values); } } // NIY Free gale return mergedAligns; }
Vector *GenomicAlignAdaptor_fetchAllByDNAFragGenomeDB(GenomicAlignAdaptor *gaa, DNAFrag *dnaFrag, GenomeDB *targetGenome, int *startP, int *endP, char *alignmentType) { Vector *result = NULL; GenomeDB *genomeCons; IDType methodLinkId; GenomeDB *genomeQuery; Vector *mergedAligns; int ok = 1; if (!dnaFrag) { fprintf(stderr, "Error: dnaFrag argument must be non NULL\n"); ok = 0; } if (ok) { methodLinkId = GenomicAlignAdaptor_methodLinkIdByAlignmentType(gaa, alignmentType); genomeCons = DNAFrag_getGenomeDB(dnaFrag); genomeQuery = targetGenome; // direct or indirect ?? if (GenomeDB_hasConsensus(genomeCons, genomeQuery, methodLinkId) || GenomeDB_hasQuery(genomeCons, genomeQuery, methodLinkId)) { result = GenomicAlignAdaptor_fetchAllByDNAFragGenomeDBDirect(gaa, dnaFrag, targetGenome, startP, endP, methodLinkId); } else { // indirect checks Vector *linkedCons = GenomeDB_linkedGenomesByMethodLinkId(genomeCons, methodLinkId); Vector *linkedQuery = GenomeDB_linkedGenomesByMethodLinkId(genomeQuery, methodLinkId); // there are not many genomes, square effort is cheap Vector *linked = Vector_new(); Vector *set1 = Vector_new(); mergedAligns = Vector_new(); int i; for (i=0; i<Vector_getNumElement(linkedCons); i++) { int j; GenomeDB *g1 = Vector_getElementAt(linkedCons, i); for (j=0; j<Vector_getNumElement(linkedQuery); j++) { GenomeDB *g2 = Vector_getElementAt(linkedQuery, i); if (g1 == g2) { Vector_addElement(linked, g1); } } } Vector_free(linkedCons); Vector_free(linkedQuery); // collect GenomicAligns from all linked genomes for (i=0; i<Vector_getNumElement(linked); i++) { GenomeDB *g = Vector_getElementAt(linked, i); Vector *gres = GenomicAlignAdaptor_fetchAllByDNAFragGenomeDBDirect(gaa, dnaFrag, g, startP, endP, methodLinkId); Vector_append(set1, gres); Vector_free(gres); } // go from each dnafrag in the result set to target_genome // there is room for improvement here: create start end // my %frags = map { $_->query_dnafrag->dbID => $_->query_dnafrag } @$set1; for (i=0; i<Vector_getNumElement(set1); i++) { GenomicAlign *alignA = Vector_getElementAt(set1,i); DNAFrag *frag = GenomicAlign_getQueryDNAFrag(alignA); int qStart = GenomicAlign_getQueryStart(alignA); int qEnd = GenomicAlign_getQueryEnd(alignA); Vector *dres = GenomicAlignAdaptor_fetchAllByDNAFragGenomeDBDirect(gaa, frag, genomeQuery, &qStart, &qEnd, methodLinkId); int j; for (j=0; j<Vector_getNumElement(dres); j++) { GenomicAlign *alignB = Vector_getElementAt(dres,j); GenomicAlignAdaptor_addDerivedAlignments(gaa, mergedAligns, alignA, alignB); } Vector_free(dres); } // NIY freeing result = mergedAligns; } } return result; }
// Also added a flag to indicate we actually want the gaps vector returned - quite often its not used in the caller and so would leak // memory Vector *RangeRegistry_checkAndRegister(RangeRegistry *registry, IDType id, long start, long end, long rStart, long rEnd, int wantGaps) { // The following was commented out due to Ensembl Genomes requirements // for bacterial genomes. // The following was uncommented because I'm not caring about those requirements if ( start > end ) { fprintf(stderr, "start argument [%ld] must be less than (or equal to) end argument [%ld]\n", start, end); exit(1); } if ( rStart > rEnd ) { fprintf(stderr, "rStart argument [%ld] must be less than (or equal to) rEnd argument [%ld]\n", rStart, rEnd); exit(1); } if ( rStart > start ) { fprintf(stderr, "rStart argument [%ld] must be less than (or equal to) start [%ld]\n", rStart, start); exit(1); } if ( rEnd < end ) { fprintf(stderr, "rEnd argument [%ld] must be greater than (or equal to) end [%ld]\n", rEnd, end); exit(1); } IDHash *regReg = RangeRegistry_getRegistry(registry); Vector *list; if (IDHash_contains(regReg, id)) { list = IDHash_getValue(regReg, id); } else { list = Vector_new(); IDHash_add(regReg, id, list); } Vector *gapPairs = NULL; if (wantGaps) { gapPairs = Vector_new(); } int len = Vector_getNumElement(list); if (len == 0) { //this is the first request for this id, return a gap pair for the // entire range and register it as seen CoordPair *cp = CoordPair_new(rStart, rEnd); Vector_addElement(list, cp); return Vector_copy(list); } //#### // loop through the list of existing ranges recording any "gaps" where // the existing range does not cover part of the requested range // int startIdx = 0; int endIdx = Vector_getNumElement(list)-1; int midIdx; CoordPair *range; // binary search the relevant pairs // helps if the list is big while ( ( endIdx - startIdx ) > 1 ) { midIdx = ( startIdx + endIdx ) >> 1; range = Vector_getElementAt(list, midIdx); if ( CoordPair_getEnd(range) < rStart ) { startIdx = midIdx; } else { endIdx = midIdx; } } long gapStart; long gapEnd; int rIdx = -1; int rStartIdx = -1; int rEndIdx; gapStart = rStart; int i; for (i=startIdx; i < len ; i++ ) { CoordPair *pRange = Vector_getElementAt(list, i); long pStart = CoordPair_getStart(pRange); long pEnd = CoordPair_getEnd(pRange); // no work needs to be done at all if we find a range pair that // entirely overlaps the requested region if ( pStart <= start && pEnd >= end ) { return Vector_new(); // perl returns undef, but that causes me problems } // find adjacent or overlapping regions already registered if ( pEnd >= ( rStart - 1 ) && pStart <= ( rEnd + 1 ) ) { if ( rStartIdx < 0 ) { // Not yet been set rStartIdx = i; } rEndIdx = i; } if ( pStart > rStart ) { gapEnd = ( rEnd < pStart ) ? rEnd : pStart - 1; if (wantGaps) { CoordPair *cp = CoordPair_new(gapStart, gapEnd); Vector_addElement(gapPairs, cp); } } gapStart = ( rStart > pEnd ) ? rStart : pEnd + 1; if ( pEnd >= rEnd && rIdx < 0 ) { rIdx = i; break; } } // do we have to make another gap? if ( gapStart <= rEnd ) { if (wantGaps) { CoordPair *cp = CoordPair_new(gapStart, rEnd); Vector_addElement(gapPairs, cp); } } // // Merge the new range into the registered list // if (rStartIdx >= 0 ) { // rStartIdx has been set to something long newStart; long newEnd; CoordPair *rStartIdxRange = Vector_getElementAt(list, rStartIdx); CoordPair *rEndIdxRange = Vector_getElementAt(list, rEndIdx); if ( rStart < CoordPair_getStart(rStartIdxRange)) { newStart = rStart; } else { newStart = CoordPair_getStart(rStartIdxRange); } if ( rEnd > CoordPair_getEnd(rEndIdxRange)) { newEnd = rEnd; } else { newEnd = CoordPair_getEnd(rEndIdxRange); } CoordPair *cp = CoordPair_new(newStart, newEnd); // Think its <= for (i=rStartIdx; i<=rEndIdx; i++) { Vector_removeElementAt(list, rStartIdx); // Always remove from rStartIdx as array is shrinking by one each time called } Vector_insertElementAt(list, rStartIdx, cp); //splice( @$list, $rstart_idx, // $rend_idx - $rstart_idx + 1, // [ $new_start, $new_end ] ); } else if (rIdx >= 0) { CoordPair *cp = CoordPair_new(rStart, rEnd); Vector_insertElementAt(list, rIdx, cp); //splice( @$list, $r_idx, 0, [ $rstart, $rend ] ); } else { CoordPair *cp = CoordPair_new(rStart, rEnd); Vector_addElement(list, cp); } // Note if wantGaps is not set then gapPairs will be NULL - but you said you didn't want it so that should be OK return gapPairs; }
int main(int argc, char *argv[]) { DBAdaptor *dba; AssemblyMapperAdaptor *asma; int testNum = 1; initEnsC(argc, argv); dba = Test_initROEnsDB(); // // 1 Test AssemblyMapperAdaptor constructor // asma = DBAdaptor_getAssemblyMapperAdaptor(dba); ok(testNum++, asma!=NULL); // // Test fetch_by_CoordSystems // CoordSystemAdaptor *csa = DBAdaptor_getCoordSystemAdaptor(dba); CoordSystemAdaptor_dumpCachedMappings(csa); CoordSystem *toplevelCs = CoordSystemAdaptor_fetchByName(csa, "toplevel", NULL); CoordSystem *clnCs = CoordSystemAdaptor_fetchByName(csa, "clone", NULL); CoordSystem *superctgCs = CoordSystemAdaptor_fetchByName(csa, "supercontig", NULL); TopLevelAssemblyMapper *clnToplevelMapper = (TopLevelAssemblyMapper *)AssemblyMapperAdaptor_fetchByCoordSystems(asma, toplevelCs, clnCs); TopLevelAssemblyMapper *superctgToplevelMapper = (TopLevelAssemblyMapper *)AssemblyMapperAdaptor_fetchByCoordSystems(asma, toplevelCs, superctgCs); ok(testNum++, clnToplevelMapper!=NULL); // && $cln_toplevel_mapper->isa('Bio::EnsEMBL::TopLevelAssemblyMapper')); ok(testNum++, superctgToplevelMapper!=NULL); // && $cln_toplevel_mapper->isa('Bio::EnsEMBL::TopLevelAssemblyMapper')); // // test db has chr 20 (50KB -> 62MB) // // // Test map // MapperRangeSet *coords = NULL; if (clnToplevelMapper) { fprintf(stderr, "MAP 'AL359765.6'->toplevel\n"); coords = TopLevelAssemblyMapper_map(clnToplevelMapper,"AL359765.6", 1, 13780, 1, clnCs, 0, NULL); printCoords(coords); ok(testNum++, coords!=NULL); } if (superctgToplevelMapper) { fprintf(stderr, "MAP NT_028392->toplevel\n"); coords = TopLevelAssemblyMapper_map(superctgToplevelMapper, "NT_028392", 600000, 1000000, 1, superctgCs, 0, NULL); printCoords(coords); ok(testNum++, coords!=NULL); } // // Test list_seq_regions // Vector *seqRegions; int i; if (clnToplevelMapper) { seqRegions = TopLevelAssemblyMapper_listSeqRegions(clnToplevelMapper, "AL359765.6", 1, 13780, clnCs); ok(testNum++, seqRegions!=NULL && Vector_getNumElement(seqRegions) == 1 && !strcmp("20", Vector_getElementAt(seqRegions,0))); for (i=0;i<Vector_getNumElement(seqRegions); i++) { char *regionName = Vector_getElementAt(seqRegions, i); fprintf(stderr, "%s\n",regionName); } } if (superctgToplevelMapper) { seqRegions = TopLevelAssemblyMapper_listSeqRegions(superctgToplevelMapper, "NT_028392", 600000, 1000000, superctgCs); ok(testNum++, seqRegions!=NULL && Vector_getNumElement(seqRegions) == 1 && !strcmp("20", Vector_getElementAt(seqRegions,0))); for (i=0;i<Vector_getNumElement(seqRegions); i++) { char *regionName = Vector_getElementAt(seqRegions, i); fprintf(stderr, "%s\n",regionName); } } // // Test list_seq_ids // Vector *ids; if (clnToplevelMapper) { ids = TopLevelAssemblyMapper_listIds(clnToplevelMapper, "AL359765.6", 1, 13780, clnCs); ok(testNum++, ids!=NULL && Vector_getNumElement(ids) == 1 && *((IDType *)Vector_getElementAt(ids,0)) == 469283 ); for (i=0;i<Vector_getNumElement(ids); i++) { IDType id = *((IDType *)Vector_getElementAt(ids, i)); fprintf(stderr, IDFMTSTR"\n",id); } } if (superctgToplevelMapper) { ids = TopLevelAssemblyMapper_listIds(superctgToplevelMapper, "NT_028392", 600000, 1000000, superctgCs); ok(testNum++, ids!=NULL && Vector_getNumElement(ids) == 1 && *((IDType *)Vector_getElementAt(ids,0)) == 469283 ); for (i=0;i<Vector_getNumElement(ids); i++) { IDType id = *((IDType *)Vector_getElementAt(ids, i)); fprintf(stderr, IDFMTSTR"\n",id); } } // Test for a not implemented method // seqRegions = TopLevelAssemblyMapper_listContigIds(clnToplevelMapper, "AL359765.6", 1, 13780, 1); return 0; }
int main(int argc, char *argv[]) { DBAdaptor *dba; AssemblyMapperAdaptor *asma; int testNum = 1; initEnsC(argc, argv); dba = Test_initROEnsDB(); // // 1 Test AssemblyMapperAdaptor constructor // asma = DBAdaptor_getAssemblyMapperAdaptor(dba); ok(testNum++, asma!=NULL); // // 2 Test fetch_by_CoordSystems // CoordSystemAdaptor *csa = DBAdaptor_getCoordSystemAdaptor(dba); CoordSystemAdaptor_dumpCachedMappings(csa); CoordSystem *chrCs = CoordSystemAdaptor_fetchByName(csa, "chromosome", NULL); CoordSystem *clnCs = CoordSystemAdaptor_fetchByName(csa, "clone", NULL); CoordSystem *sCtgCs = CoordSystemAdaptor_fetchByName(csa, "supercontig", NULL); ChainedAssemblyMapper *asmMapper = (ChainedAssemblyMapper *)AssemblyMapperAdaptor_fetchByCoordSystems(asma, clnCs, chrCs); ok(testNum++, asmMapper!=NULL); // Need to make it an object before can do this && asmMapper->objectType == ( "Bio::EnsEMBL::ChainedAssemblyMapper" )); ChainedAssemblyMapper *chrSCtgMapper = (ChainedAssemblyMapper *)AssemblyMapperAdaptor_fetchByCoordSystems(asma, chrCs, sCtgCs); ok(testNum++, chrSCtgMapper!=NULL);// && $chr_sctg_mapper->isa('Bio::EnsEMBL::ChainedAssemblyMapper')); // // test db has chr 20 (50KB -> 62MB) // MapperRangeSet *coords; if (asmMapper) { fprintf(stderr,"MAP 20->clone\n"); coords = ChainedAssemblyMapper_map(asmMapper, "20", 500001, 60000000, 1, chrCs, 0, NULL); ok(testNum++, coords!=NULL); printCoords(coords); } if (asmMapper) { fprintf(stderr,"MAP 'AL359765.6'->chromosome\n"); coords = ChainedAssemblyMapper_map(asmMapper, "AL359765.6", 1, 13780, 1, clnCs, 0, NULL); ok(testNum++, coords!=NULL); printCoords(coords); } if (chrSCtgMapper) { fprintf(stderr,"MAP 20->supercontig\n"); coords = ChainedAssemblyMapper_map(chrSCtgMapper, "20", 500001, 60000000, 1, chrCs, 0, NULL); ok(testNum++, coords!=NULL); printCoords(coords); } // // Test list_seq_regions // fprintf(stderr,"Starting list tests\n"); int i; if (asmMapper) { Vector *seqRegions = ChainedAssemblyMapper_listSeqRegions(asmMapper, "20", 500001, 60000000, chrCs); ok(testNum++, seqRegions != NULL); for (i=0;i<Vector_getNumElement(seqRegions); i++) { char *regionName = Vector_getElementAt(seqRegions, i); fprintf(stderr, "%s\n",regionName); } } if (asmMapper) { Vector *seqRegions = ChainedAssemblyMapper_listSeqRegions(asmMapper, "AL359765.6", 1, 13780, clnCs); ok(testNum++, seqRegions!=NULL); for (i=0;i<Vector_getNumElement(seqRegions); i++) { char *regionName = Vector_getElementAt(seqRegions, i); fprintf(stderr, "%s\n",regionName); } } if (chrSCtgMapper) { Vector *seqRegions = ChainedAssemblyMapper_listSeqRegions(chrSCtgMapper, "NT_028392", 600000, 1000000, sCtgCs); ok(testNum++, seqRegions!=NULL); for (i=0;i<Vector_getNumElement(seqRegions); i++) { char *regionName = Vector_getElementAt(seqRegions, i); fprintf(stderr, "%s\n",regionName); } } if (chrSCtgMapper) { Vector *seqRegions = ChainedAssemblyMapper_listSeqRegions(chrSCtgMapper, "20", 3000000, 31000000, chrCs); ok(testNum++, seqRegions!=NULL); for (i=0;i<Vector_getNumElement(seqRegions); i++) { char *regionName = Vector_getElementAt(seqRegions, i); fprintf(stderr, "%s\n",regionName); } } // // Test list_seq_ids // if (asmMapper) { Vector *seqIds = ChainedAssemblyMapper_listIds(asmMapper, "20", 500001, 60000000, chrCs); ok(testNum++, seqIds!=NULL); for (i=0;i<Vector_getNumElement(seqIds); i++) { IDType regionId = *((IDType *)Vector_getElementAt(seqIds, i)); fprintf(stderr, IDFMTSTR"\n",regionId); } } if (asmMapper) { Vector *seqIds = ChainedAssemblyMapper_listIds(asmMapper, "AL359765.6", 1, 13780, clnCs); ok(testNum++, seqIds!=NULL); for (i=0;i<Vector_getNumElement(seqIds); i++) { IDType regionId = *((IDType *)Vector_getElementAt(seqIds, i)); fprintf(stderr, IDFMTSTR"\n",regionId); } } if (chrSCtgMapper) { Vector *seqIds = ChainedAssemblyMapper_listIds(chrSCtgMapper, "NT_028392", 600000, 1000000, sCtgCs); ok(testNum++, seqIds!=NULL); for (i=0;i<Vector_getNumElement(seqIds); i++) { IDType regionId = *((IDType *)Vector_getElementAt(seqIds, i)); fprintf(stderr, IDFMTSTR"\n",regionId); } } if (chrSCtgMapper) { Vector *seqIds = ChainedAssemblyMapper_listIds(chrSCtgMapper, "20", 3000000, 31000000, chrCs); ok(testNum++, seqIds!=NULL); for (i=0;i<Vector_getNumElement(seqIds); i++) { IDType regionId = *((IDType *)Vector_getElementAt(seqIds, i)); fprintf(stderr, IDFMTSTR"\n",regionId); } } return 0; }