IDType DBEntryAdaptor_exists(DBEntryAdaptor *dbea, DBEntry *dbe) { char qStr[512]; StatementHandle *sth; ResultRow *row; IDType dbID; if (!dbe || !Class_isDescendent(CLASS_DBENTRY, dbe->objectType)) { fprintf(stderr,"Error: arg must be a DBEntry\n"); exit(1); } // NIY Was dbe->external_db instead of dbe->dbname - are they different? // NIY mysql_quote strings sprintf(qStr, "SELECT x.xref_id " " FROM xref x, external_db xdb" " WHERE x.external_db_id = xdb.external_db_id" " AND x.display_label = '%s' " " AND xdb.db_name = '%s'", DBEntry_getDisplayId(dbe), DBEntry_getDbName(dbe)); sth = dbea->prepare((BaseAdaptor *)dbea,qStr,strlen(qStr)); sth->execute(sth); row = sth->fetchRow(sth); if( row == NULL ) { sth->finish(sth); return 0; } dbID = row->getLongLongAt(row,0); sth->finish(sth); return dbID; }
int calcCoverage(char *fName, Slice *slice, htsFile *in, hts_idx_t *idx, int flags) { int ref; int begRange; int endRange; char region[1024]; char region_name[512]; if (Slice_getChrStart(slice) != 1) { fprintf(stderr, "Currently only allow a slice start position of 1\n"); return 1; } if (flags & M_UCSC_NAMING) { sprintf(region,"chr%s", Slice_getSeqRegionName(slice)); } else { sprintf(region,"%s", Slice_getSeqRegionName(slice)); } bam_hdr_t *header = bam_hdr_init(); header = bam_hdr_read(in->fp.bgzf); ref = bam_name2id(header, region); if (ref < 0) { fprintf(stderr, "Invalid region %s\n", region); exit(1); } sprintf(region,"%s:%ld-%ld", region_name, Slice_getSeqRegionStart(slice), Slice_getSeqRegionEnd(slice)); if (hts_parse_reg(region, &begRange, &endRange) == NULL) { fprintf(stderr, "Could not parse %s\n", region); exit(2); } bam_hdr_destroy(header); hts_itr_t *iter = sam_itr_queryi(idx, ref, begRange, endRange); bam1_t *b = bam_init1(); Coverage *coverage = calloc(Slice_getLength(slice),sizeof(Coverage)); long counter = 0; long overlapping = 0; long bad = 0; int startIndex = 0; while (bam_itr_next(in, iter, b) >= 0) { if (b->core.flag & (BAM_FUNMAP | BAM_FSECONDARY | BAM_FQCFAIL | BAM_FDUP)) { bad++; continue; } int end; //end = bam_calend(&b->core, bam1_cigar(b)); end = bam_endpos(b); // There is a special case for reads which have zero length and start at begRange (so end at begRange ie. before the first base we're interested in). // That is the reason for the || end == begRange test if (end == begRange) { continue; } counter++; if (!(counter%1000000)) { if (verbosity > 1) { printf("."); } fflush(stdout); } // Remember: b->core.pos is zero based! int cigInd; int refPos; int readPos; uint32_t *cigar = bam_get_cigar(b); for (cigInd = readPos = 0, refPos = b->core.pos; cigInd < b->core.n_cigar; ++cigInd) { int k; int lenCigBlock = cigar[cigInd]>>4; int op = cigar[cigInd]&0xf; if (op == BAM_CMATCH || op == BAM_CEQUAL || op == BAM_CDIFF) { for (k = 0; k < lenCigBlock; ++k) { //if (ref[refPos+k] == 0) break; // out of boundary coverage[refPos+k].coverage++; } if (k < lenCigBlock) break; refPos += lenCigBlock; readPos += lenCigBlock; } else if (op == BAM_CDEL) { for (k = 0; k < lenCigBlock; ++k) { // if (ref[refPos+k] == 0) break; coverage[refPos+k].coverage++; } if (k < lenCigBlock) break; refPos += lenCigBlock; } else if (op == BAM_CSOFT_CLIP) { readPos += lenCigBlock; } else if (op == BAM_CHARD_CLIP) { } else if (op == BAM_CINS) { readPos += lenCigBlock; } else if (op == BAM_CREF_SKIP) { refPos += lenCigBlock; } } #ifdef DONE int j; int done = 0; int hadOverlap = 0; for (j=startIndex; j < Vector_getNumElement(genes) && !done; j++) { Gene *gene = Vector_getElementAt(genes,j); if (!gene) { continue; } // Remember: b->core.pos is zero based! if (b->core.pos < Gene_getEnd(gene) && end >= Gene_getStart(gene)) { int k; int doneGene = 0; for (k=0; k<Gene_getTranscriptCount(gene) && !doneGene; k++) { Transcript *trans = Gene_getTranscriptAt(gene,k); if (b->core.pos < Transcript_getEnd(trans) && end >= Transcript_getStart(trans)) { int m; for (m=0; m<Transcript_getExonCount(trans) && !doneGene; m++) { Exon *exon = Transcript_getExonAt(trans,m); if (b->core.pos < Exon_getEnd(exon) && end >= Exon_getStart(exon)) { // Only count as overlapping once (could be that a read overlaps more than one gene) if (!hadOverlap) { overlapping++; hadOverlap = 1; } gs = IDHash_getValue(geneCountsHash, Gene_getDbID(gene)); gs->score++; doneGene = 1; } } } } } else if (Gene_getStart(gene) > end) { done = 1; } else if (Gene_getEnd(gene) < b->core.pos+1) { gs = IDHash_getValue(geneCountsHash, Gene_getDbID(gene)); printf("Gene %s (%s) score %ld\n",Gene_getStableId(gene), Gene_getDisplayXref(gene) ? DBEntry_getDisplayId(Gene_getDisplayXref(gene)) : "", gs->score); if (verbosity > 1) { printf("Removing gene %s (index %d) with extent %d to %d\n", Gene_getStableId(gene), gs->index, Gene_getStart(gene), Gene_getEnd(gene)); } Vector_setElementAt(genes,j,NULL); // Magic (very important for speed) - move startIndex to first non null gene int n; startIndex = 0; for (n=0;n<Vector_getNumElement(genes);n++) { void *v = Vector_getElementAt(genes,n); if (v != NULL) { break; } startIndex++; } if (verbosity > 1) { printf("startIndex now %d\n",startIndex); } } } #endif } if (verbosity > 1) { printf("\n"); } #ifdef DONE // Print out read counts for what ever's left in the genes array int n; for (n=0;n<Vector_getNumElement(genes);n++) { Gene *gene = Vector_getElementAt(genes,n); if (gene != NULL) { gs = IDHash_getValue(geneCountsHash, Gene_getDbID(gene)); printf("Gene %s (%s) score %ld\n",Gene_getStableId(gene), Gene_getDisplayXref(gene) ? DBEntry_getDisplayId(Gene_getDisplayXref(gene)) : "", gs->score); } } #endif printf("Read %ld reads. Number of bad reads (unmapped, qc fail, secondary, dup) %ld\n", counter, bad); long i; for (i=0; i< Slice_getLength(slice); i++) { printf("%ld %ld\n", i+1, coverage[i].coverage); } sam_itr_destroy(iter); bam_destroy1(b); return 1; }
int dumpGenes(Vector *genes, int withSupport) { FILE *fp = stderr; int i; int failed = 0; for (i=0;i<Vector_getNumElement(genes) && !failed;i++) { Gene *g = Vector_getElementAt(genes,i); fprintf(fp,"Gene %s (%s) coords: %ld %ld %d\n",Gene_getStableId(g),(Gene_getDisplayXref(g) ? DBEntry_getDisplayId(Gene_getDisplayXref(g)) : ""),Gene_getStart(g),Gene_getEnd(g),Gene_getStrand(g)); int j; for (j=0;j<Gene_getTranscriptCount(g);j++) { Transcript *t = Gene_getTranscriptAt(g,j); int k; fprintf(fp," Trans %s coords: %ld %ld %d biotype: %s\n",Transcript_getStableId(t), Transcript_getStart(t),Transcript_getEnd(t),Transcript_getStrand(t),Transcript_getBiotype(t)); if (withSupport) { Vector *support = Transcript_getAllSupportingFeatures(t); for (k=0; k<Vector_getNumElement(support); k++) { BaseAlignFeature *baf = Vector_getElementAt(support, k); fprintf(fp," support %s coords: %ld %ld %d\n", BaseAlignFeature_getHitSeqName(baf), BaseAlignFeature_getStart(baf), BaseAlignFeature_getEnd(baf), BaseAlignFeature_getStrand(baf)); } Vector *intronSupport = Transcript_getAllIntronSupportingEvidence(t); for (k=0; k<Vector_getNumElement(intronSupport); k++) { IntronSupportingEvidence *ise = Vector_getElementAt(intronSupport, k); fprintf(fp," intron support %s coords: %ld %ld %d\n", IntronSupportingEvidence_getHitName(ise), IntronSupportingEvidence_getStart(ise), IntronSupportingEvidence_getEnd(ise), IntronSupportingEvidence_getStrand(ise)); } } for (k=0;k<Transcript_getExonCount(t);k++) { Exon *e = Transcript_getExonAt(t,k); fprintf(fp," exon %s (%p) coords: %ld %ld %d\n",Exon_getStableId(e), e, Exon_getStart(e), Exon_getEnd(e), Exon_getStrand(e)); if (withSupport) { Vector *support = Exon_getAllSupportingFeatures(e); int m; for (m=0; m<Vector_getNumElement(support); m++) { BaseAlignFeature *baf = Vector_getElementAt(support, m); fprintf(fp," support %s coords: %ld %ld %d\n", BaseAlignFeature_getHitSeqName(baf), BaseAlignFeature_getStart(baf), BaseAlignFeature_getEnd(baf), BaseAlignFeature_getStrand(baf)); } } } Translation *tln = Transcript_getTranslation(t); if (tln) { fprintf(fp," translation id: %s %s %d %s %d\n",Translation_getStableId(tln), Exon_getStableId(Translation_getStartExon(tln)), Translation_getStart(tln), Exon_getStableId(Translation_getEndExon(tln)), Translation_getEnd(tln)); char *tSeq = Transcript_translate(t); fprintf(fp," translation: %s\n",tSeq); free(tSeq); Vector *tlnAttribs = Translation_getAllAttributes(tln, NULL); if (Vector_getNumElement(tlnAttribs)) { fprintf(fp, " translation attributes:\n"); int n; for (n=0; n<Vector_getNumElement(tlnAttribs); n++) { Attribute *attrib = Vector_getElementAt(tlnAttribs, n); fprintf(fp, " code %s name %s desc %s value %s\n", Attribute_getCode(attrib), Attribute_getName(attrib), Attribute_getDescription(attrib), Attribute_getValue(attrib)); } } } } } return failed; }
IDType DBEntryAdaptor_store(DBEntryAdaptor *dbea, DBEntry *exObj, IDType ensObject, char *ensType, int ignoreRelease) { fprintf(stderr,"DBEntryAdaptor_store does not implement ignoreRelease functionality yet\n"); char qStr[512]; StatementHandle *sth; ResultRow *row; IDType dbRef; IDType dbX; // // Check for the existance of the external_db, throw if it does not exist // sprintf(qStr, "SELECT external_db_id" " FROM external_db" " WHERE db_name = '%s'" " AND db_release = %s", DBEntry_getDbName(exObj), DBEntry_getRelease(exObj)); sth = dbea->prepare((BaseAdaptor *)dbea,qStr,strlen(qStr)); sth->execute(sth); row = sth->fetchRow(sth); if( row == NULL ) { sth->finish(sth); fprintf(stderr,"Error: external_db [%s] release [%s] does not exist\n", DBEntry_getDbName(exObj), DBEntry_getRelease(exObj)); exit(1); } dbRef = row->getLongLongAt(row,0); sth->finish(sth); // // Check for the existance of the external reference, add it if not present // sprintf(qStr, "SELECT xref_id" " FROM xref" " WHERE external_db_id = " IDFMTSTR " AND dbprimary_acc = '%s'" " AND version = %s", dbRef, DBEntry_getPrimaryId(exObj), DBEntry_getVersion(exObj)); sth = dbea->prepare((BaseAdaptor *)dbea,qStr,strlen(qStr)); sth->execute(sth); row = sth->fetchRow(sth); if (row != NULL) { dbX = row->getLongLongAt(row,0); sth->finish(sth); } else { // // store the new xref // // First finish the old sth sth->finish(sth); // NIY Handling NULL values sprintf(qStr, "INSERT ignore INTO xref" " SET dbprimary_acc = '%s'," " display_label = '%s'," " version = %s," " description = '%s'," " external_db_id = " IDFMTSTR, DBEntry_getPrimaryId(exObj), DBEntry_getDisplayId(exObj), DBEntry_getVersion(exObj), DBEntry_getDescription(exObj), dbRef ); sth = dbea->prepare((BaseAdaptor *)dbea,qStr,strlen(qStr)); sth->execute(sth); dbX = sth->getInsertId(sth); sth->finish(sth); // // store the synonyms for the new xref // if (DBEntry_getAllSynonyms(exObj)) { StatementHandle *checkSth; StatementHandle *storeSth; int i; Vector *synonyms; sprintf(qStr, "SELECT xref_id, synonym" " FROM external_synonym" " WHERE xref_id = %" IDFMTSTR " AND synonym = '%%s'"); checkSth = dbea->prepare((BaseAdaptor *)dbea,qStr,strlen(qStr)); sprintf(qStr, "INSERT ignore INTO external_synonym" " SET xref_id = %" IDFMTSTR ", synonym = '%%s'"); storeSth = dbea->prepare((BaseAdaptor *)dbea,qStr,strlen(qStr)); synonyms = DBEntry_getAllSynonyms(exObj); for (i=0;i<Vector_getNumElement(synonyms); i++) { char *syn = Vector_getElementAt(synonyms,i); checkSth->execute(checkSth, dbX, syn); row = checkSth->fetchRow(checkSth); if (!row) { storeSth->execute(storeSth, dbX, syn); } } checkSth->finish(checkSth); storeSth->finish(storeSth); } } // // check if the object mapping was already stored // sprintf(qStr, "SELECT xref_id" " FROM object_xref" " WHERE xref_id = " IDFMTSTR " AND ensembl_object_type = '%s'" " AND ensembl_id = " IDFMTSTR, dbX, ensType, ensObject); sth = dbea->prepare((BaseAdaptor *)dbea,qStr,strlen(qStr)); sth->execute(sth); row = sth->fetchRow(sth); // NOTE row will be invalid after this call but will still // indicate whether something was found sth->finish(sth); if (!row) { IDType Xidt; // // Store the reference to the internal ensembl object // sprintf(qStr, "INSERT ignore INTO object_xref" " SET xref_id = " IDFMTSTR "," " ensembl_object_type = '%s'," " ensembl_id = " IDFMTSTR, dbX, ensType, ensObject); sth = dbea->prepare((BaseAdaptor *)dbea,qStr,strlen(qStr)); sth->execute(sth); DBEntry_setDbID(exObj, dbX); DBEntry_setAdaptor(exObj, (BaseAdaptor *)dbea); Xidt = sth->getInsertId(sth); // // If this is an IdentityXref need to store in that table too // if (DBEntry_getIdentityXref(exObj)) { IdentityXref *idx = DBEntry_getIdentityXref(exObj); sprintf(qStr, "INSERT ignore INTO identity_xref" " SET object_xref_id = " IDFMTSTR "," " query_identity = %f," " target_identity = %f", Xidt, IdentityXref_getQueryIdentity(idx), IdentityXref_getTargetIdentity(idx)); sth = dbea->prepare((BaseAdaptor *)dbea,qStr,strlen(qStr)); sth->execute(sth); sth->finish(sth); } } return dbX; }