示例#1
0
IDType DBEntryAdaptor_exists(DBEntryAdaptor *dbea, DBEntry *dbe) {
  char qStr[512];
  StatementHandle *sth;
  ResultRow *row;
  IDType dbID;

  if (!dbe || !Class_isDescendent(CLASS_DBENTRY, dbe->objectType)) {
    fprintf(stderr,"Error: arg must be a DBEntry\n");
    exit(1);
  }

// NIY Was dbe->external_db instead of dbe->dbname - are they different?
// NIY mysql_quote strings
  sprintf(qStr,
              "SELECT x.xref_id "
              " FROM   xref x, external_db xdb"
              " WHERE  x.external_db_id = xdb.external_db_id"
              " AND    x.display_label = '%s' "
              " AND    xdb.db_name = '%s'",
          DBEntry_getDisplayId(dbe),
          DBEntry_getDbName(dbe));
  

  sth = dbea->prepare((BaseAdaptor *)dbea,qStr,strlen(qStr));

  sth->execute(sth);

  row = sth->fetchRow(sth);
  if( row == NULL ) {
    sth->finish(sth);
    return 0;
  }

  dbID = row->getLongLongAt(row,0);

  sth->finish(sth);

  return dbID;
}
示例#2
0
int calcCoverage(char *fName, Slice *slice, htsFile *in, hts_idx_t *idx, int flags) {
  int  ref;
  int  begRange;
  int  endRange;
  char region[1024];
  char region_name[512];


  if (Slice_getChrStart(slice) != 1) {
    fprintf(stderr, "Currently only allow a slice start position of 1\n");
    return 1;
  }
  if (flags & M_UCSC_NAMING) {
    sprintf(region,"chr%s", Slice_getSeqRegionName(slice));
  } else {
    sprintf(region,"%s", Slice_getSeqRegionName(slice));
  }
  bam_hdr_t *header = bam_hdr_init();
  header = bam_hdr_read(in->fp.bgzf);
  ref = bam_name2id(header, region);
  if (ref < 0) {
    fprintf(stderr, "Invalid region %s\n", region);
    exit(1);
  }
  sprintf(region,"%s:%ld-%ld", region_name,
                             Slice_getSeqRegionStart(slice),
                             Slice_getSeqRegionEnd(slice));
  if (hts_parse_reg(region, &begRange, &endRange) == NULL) {
    fprintf(stderr, "Could not parse %s\n", region);
    exit(2);
  }
  bam_hdr_destroy(header);


  hts_itr_t *iter = sam_itr_queryi(idx, ref, begRange, endRange);
  bam1_t *b = bam_init1();

  Coverage *coverage = calloc(Slice_getLength(slice),sizeof(Coverage));

  long counter = 0;
  long overlapping = 0;
  long bad = 0;
  int startIndex = 0;
  while (bam_itr_next(in, iter, b) >= 0) {
    if (b->core.flag & (BAM_FUNMAP | BAM_FSECONDARY | BAM_FQCFAIL | BAM_FDUP)) {
      bad++;
      continue;
    }

    int end;
    //end = bam_calend(&b->core, bam1_cigar(b));
    end = bam_endpos(b);

    // There is a special case for reads which have zero length and start at begRange (so end at begRange ie. before the first base we're interested in).
    // That is the reason for the || end == begRange test
    if (end == begRange) {
      continue;
    }
    counter++;

    if (!(counter%1000000)) {
      if (verbosity > 1) { printf("."); }
      fflush(stdout);
    }

// Remember: b->core.pos is zero based!
    int cigInd;
    int refPos;
    int readPos;
    uint32_t *cigar = bam_get_cigar(b);
    for (cigInd = readPos = 0, refPos = b->core.pos; cigInd < b->core.n_cigar; ++cigInd) {
      int k;
      int lenCigBlock = cigar[cigInd]>>4;
      int op          = cigar[cigInd]&0xf;

      if (op == BAM_CMATCH || op == BAM_CEQUAL || op == BAM_CDIFF) {
        for (k = 0; k < lenCigBlock; ++k) {
          //if (ref[refPos+k] == 0) break; // out of boundary
          coverage[refPos+k].coverage++;
        }
        if (k < lenCigBlock) break;
        refPos += lenCigBlock; readPos += lenCigBlock;
      } else if (op == BAM_CDEL) {
        for (k = 0; k < lenCigBlock; ++k) {
        //  if (ref[refPos+k] == 0) break;
          coverage[refPos+k].coverage++;
        }
        if (k < lenCigBlock) break;
        refPos += lenCigBlock;
      } else if (op == BAM_CSOFT_CLIP) {
        readPos += lenCigBlock;
      } else if (op == BAM_CHARD_CLIP) {
      } else if (op == BAM_CINS) {
         readPos += lenCigBlock;
      } else if (op == BAM_CREF_SKIP) {
         refPos += lenCigBlock;
      }
    }

#ifdef DONE
    int j;
    int done = 0;
    int hadOverlap = 0;
    
    for (j=startIndex; j < Vector_getNumElement(genes) && !done; j++) {
      Gene *gene = Vector_getElementAt(genes,j); 
      if (!gene) {
        continue;
      }
// Remember: b->core.pos is zero based!
      if (b->core.pos < Gene_getEnd(gene) && end >= Gene_getStart(gene)) {
        int k;

        int doneGene = 0;
        for (k=0; k<Gene_getTranscriptCount(gene) && !doneGene; k++) {
          Transcript *trans = Gene_getTranscriptAt(gene,k);

          if (b->core.pos < Transcript_getEnd(trans) && end >= Transcript_getStart(trans)) {
            int m;
     
            for (m=0; m<Transcript_getExonCount(trans) && !doneGene; m++) {
              Exon *exon = Transcript_getExonAt(trans,m);

              if (b->core.pos < Exon_getEnd(exon) && end >= Exon_getStart(exon)) {

                // Only count as overlapping once (could be that a read overlaps more than one gene)
                if (!hadOverlap) {
                  overlapping++;
                  hadOverlap = 1;
                }

                gs = IDHash_getValue(geneCountsHash, Gene_getDbID(gene));
                gs->score++;
                
                doneGene = 1;
              }
            }
          }
        }
      } else if (Gene_getStart(gene) > end) {
        done = 1;
      } else if (Gene_getEnd(gene) < b->core.pos+1) {
        gs = IDHash_getValue(geneCountsHash, Gene_getDbID(gene));
        printf("Gene %s (%s) score %ld\n",Gene_getStableId(gene), 
                                          Gene_getDisplayXref(gene) ? DBEntry_getDisplayId(Gene_getDisplayXref(gene)) : "", 
                                          gs->score);

        if (verbosity > 1) { 
          printf("Removing gene %s (index %d) with extent %d to %d\n", 
                 Gene_getStableId(gene), 
                 gs->index,
                 Gene_getStart(gene),
                 Gene_getEnd(gene));
        }
        Vector_setElementAt(genes,j,NULL);

        // Magic (very important for speed) - move startIndex to first non null gene
        int n;
        startIndex = 0;
        for (n=0;n<Vector_getNumElement(genes);n++) {
          void *v = Vector_getElementAt(genes,n);

          if (v != NULL) {
            break;
          }
          startIndex++;
        }
        if (verbosity > 1) { 
          printf("startIndex now %d\n",startIndex);
        }
      }
    }
#endif
  }
  if (verbosity > 1) { printf("\n"); }

#ifdef DONE
// Print out read counts for what ever's left in the genes array
  int n;
  for (n=0;n<Vector_getNumElement(genes);n++) {
    Gene *gene = Vector_getElementAt(genes,n);

    if (gene != NULL) {
      gs = IDHash_getValue(geneCountsHash, Gene_getDbID(gene));
      printf("Gene %s (%s) score %ld\n",Gene_getStableId(gene), 
                                        Gene_getDisplayXref(gene) ? DBEntry_getDisplayId(Gene_getDisplayXref(gene)) : "", 
                                        gs->score);
    }

  }
#endif

  printf("Read %ld reads. Number of bad reads (unmapped, qc fail, secondary, dup) %ld\n", counter, bad);

  long i;
  for (i=0; i< Slice_getLength(slice); i++) {
    printf("%ld %ld\n", i+1, coverage[i].coverage);
  }

  sam_itr_destroy(iter);
  bam_destroy1(b);


  return 1;
}
示例#3
0
int dumpGenes(Vector *genes, int withSupport) {
  FILE *fp = stderr;
  int i;
  int failed = 0;
  for (i=0;i<Vector_getNumElement(genes) && !failed;i++) {
    Gene *g = Vector_getElementAt(genes,i);
    fprintf(fp,"Gene %s (%s) coords: %ld %ld %d\n",Gene_getStableId(g),(Gene_getDisplayXref(g) ? DBEntry_getDisplayId(Gene_getDisplayXref(g)) : ""),Gene_getStart(g),Gene_getEnd(g),Gene_getStrand(g));

    int j;
    for (j=0;j<Gene_getTranscriptCount(g);j++) {
      Transcript *t = Gene_getTranscriptAt(g,j);
      int k;
     
      fprintf(fp," Trans %s coords: %ld %ld %d biotype: %s\n",Transcript_getStableId(t), Transcript_getStart(t),Transcript_getEnd(t),Transcript_getStrand(t),Transcript_getBiotype(t));
      if (withSupport) {
        Vector *support = Transcript_getAllSupportingFeatures(t);
        for (k=0; k<Vector_getNumElement(support); k++) {
          BaseAlignFeature *baf = Vector_getElementAt(support, k);
          fprintf(fp,"   support %s coords: %ld %ld %d\n", BaseAlignFeature_getHitSeqName(baf), BaseAlignFeature_getStart(baf), BaseAlignFeature_getEnd(baf), BaseAlignFeature_getStrand(baf));
        }
        Vector *intronSupport = Transcript_getAllIntronSupportingEvidence(t);
        for (k=0; k<Vector_getNumElement(intronSupport); k++) {
          IntronSupportingEvidence *ise = Vector_getElementAt(intronSupport, k);
          fprintf(fp,"   intron support %s coords: %ld %ld %d\n", IntronSupportingEvidence_getHitName(ise), IntronSupportingEvidence_getStart(ise), IntronSupportingEvidence_getEnd(ise), IntronSupportingEvidence_getStrand(ise));
        }
      }

      for (k=0;k<Transcript_getExonCount(t);k++) {
        Exon *e = Transcript_getExonAt(t,k);
        fprintf(fp,"  exon %s (%p) coords: %ld %ld %d\n",Exon_getStableId(e), e, Exon_getStart(e), Exon_getEnd(e), Exon_getStrand(e));
        if (withSupport) {
          Vector *support = Exon_getAllSupportingFeatures(e);
          int m;
          for (m=0; m<Vector_getNumElement(support); m++) {
            BaseAlignFeature *baf = Vector_getElementAt(support, m);
            fprintf(fp,"   support %s coords: %ld %ld %d\n", BaseAlignFeature_getHitSeqName(baf), BaseAlignFeature_getStart(baf), BaseAlignFeature_getEnd(baf), BaseAlignFeature_getStrand(baf));
          }
        }
      }
      Translation *tln = Transcript_getTranslation(t);
      if (tln) {
 
        fprintf(fp," translation id: %s %s %d %s %d\n",Translation_getStableId(tln), 
                Exon_getStableId(Translation_getStartExon(tln)), Translation_getStart(tln),
                Exon_getStableId(Translation_getEndExon(tln)), Translation_getEnd(tln));
        char *tSeq = Transcript_translate(t);
        fprintf(fp," translation: %s\n",tSeq);
        free(tSeq);
        Vector *tlnAttribs = Translation_getAllAttributes(tln, NULL);
        if (Vector_getNumElement(tlnAttribs)) {
          fprintf(fp, " translation attributes:\n");
          int n;
          for (n=0; n<Vector_getNumElement(tlnAttribs); n++) {
            Attribute *attrib = Vector_getElementAt(tlnAttribs, n);
            fprintf(fp, "  code %s name %s desc %s value %s\n", 
                    Attribute_getCode(attrib), 
                    Attribute_getName(attrib),
                    Attribute_getDescription(attrib),
                    Attribute_getValue(attrib));
          }
        }
      }
    }
  }
  return failed;
}
示例#4
0
IDType DBEntryAdaptor_store(DBEntryAdaptor *dbea, DBEntry *exObj, 
                            IDType ensObject, char *ensType, int ignoreRelease) {
  fprintf(stderr,"DBEntryAdaptor_store does not implement ignoreRelease functionality yet\n");

  char qStr[512];
  StatementHandle *sth;
  ResultRow *row;
  IDType dbRef;
  IDType dbX;

  //
  // Check for the existance of the external_db, throw if it does not exist
  //
  sprintf(qStr,
     "SELECT external_db_id"
     "  FROM external_db"
     " WHERE db_name = '%s'"
     "   AND db_release = %s",
     DBEntry_getDbName(exObj),
     DBEntry_getRelease(exObj));

  sth = dbea->prepare((BaseAdaptor *)dbea,qStr,strlen(qStr));
  sth->execute(sth);
    
  row = sth->fetchRow(sth);
  if( row == NULL ) {
    sth->finish(sth);
    fprintf(stderr,"Error: external_db [%s] release [%s] does not exist\n", 
            DBEntry_getDbName(exObj), DBEntry_getRelease(exObj));
    exit(1);
  }

  dbRef =  row->getLongLongAt(row,0);
  sth->finish(sth);
    
  //
  // Check for the existance of the external reference, add it if not present
  //
  sprintf(qStr,
       "SELECT xref_id"
       "  FROM xref"
       " WHERE external_db_id = " IDFMTSTR
       "   AND dbprimary_acc = '%s'"
       "   AND version = %s",
      dbRef,
      DBEntry_getPrimaryId(exObj),
      DBEntry_getVersion(exObj));

  sth = dbea->prepare((BaseAdaptor *)dbea,qStr,strlen(qStr));
  sth->execute(sth);

  row = sth->fetchRow(sth);
    
  if (row != NULL) {
    dbX =  row->getLongLongAt(row,0);
    sth->finish(sth);
  } else {
    //
    // store the new xref
    //

    // First finish the old sth
    sth->finish(sth);

// NIY Handling NULL values
    sprintf(qStr,
       "INSERT ignore INTO xref"
       " SET dbprimary_acc = '%s',"
       "    display_label = '%s',"
       "    version = %s,"
       "    description = '%s',"
       "    external_db_id = " IDFMTSTR,
       DBEntry_getPrimaryId(exObj),
       DBEntry_getDisplayId(exObj),
       DBEntry_getVersion(exObj),
       DBEntry_getDescription(exObj),
       dbRef
      );
    sth = dbea->prepare((BaseAdaptor *)dbea,qStr,strlen(qStr));

    sth->execute(sth);
    dbX = sth->getInsertId(sth);

    sth->finish(sth);
	
    //
    // store the synonyms for the new xref
    // 
    if (DBEntry_getAllSynonyms(exObj)) {
      StatementHandle *checkSth;
      StatementHandle *storeSth;
      int i;
      Vector *synonyms;

      sprintf(qStr,
              "SELECT xref_id, synonym"
              " FROM external_synonym"
              " WHERE xref_id = %" IDFMTSTR
              " AND synonym = '%%s'");

      checkSth = dbea->prepare((BaseAdaptor *)dbea,qStr,strlen(qStr));

      sprintf(qStr,
        "INSERT ignore INTO external_synonym"
        " SET xref_id = %" IDFMTSTR ", synonym = '%%s'");     

      storeSth = dbea->prepare((BaseAdaptor *)dbea,qStr,strlen(qStr));

      synonyms = DBEntry_getAllSynonyms(exObj);

      for (i=0;i<Vector_getNumElement(synonyms); i++) {	    
        char *syn = Vector_getElementAt(synonyms,i);
        checkSth->execute(checkSth, dbX, syn);
        row = checkSth->fetchRow(checkSth);
        if (!row) {
          storeSth->execute(storeSth, dbX, syn);
        }
      }
  	
      checkSth->finish(checkSth);
      storeSth->finish(storeSth);
    }
  }

  //
  // check if the object mapping was already stored
  //
  sprintf(qStr,
           "SELECT xref_id"
           " FROM object_xref"
           " WHERE xref_id = " IDFMTSTR
           " AND   ensembl_object_type = '%s'"
           " AND   ensembl_id = " IDFMTSTR,
         dbX, ensType, ensObject);

  sth = dbea->prepare((BaseAdaptor *)dbea,qStr,strlen(qStr));

  sth->execute(sth);

  row = sth->fetchRow(sth);
// NOTE row will be invalid after this call but will still
//      indicate whether something was found
  sth->finish(sth);
    
  if (!row) {
    IDType Xidt;

    //
    // Store the reference to the internal ensembl object
    //
    sprintf(qStr,
         "INSERT ignore INTO object_xref"
         " SET xref_id = " IDFMTSTR ","
         "     ensembl_object_type = '%s',"
         "     ensembl_id = " IDFMTSTR,
        dbX, ensType, ensObject);

    sth = dbea->prepare((BaseAdaptor *)dbea,qStr,strlen(qStr));
	
    sth->execute(sth);
    DBEntry_setDbID(exObj, dbX);
    DBEntry_setAdaptor(exObj, (BaseAdaptor *)dbea);
      
    Xidt = sth->getInsertId(sth);

    //
    // If this is an IdentityXref need to store in that table too
    //
    if (DBEntry_getIdentityXref(exObj)) {
      IdentityXref *idx = DBEntry_getIdentityXref(exObj);
      sprintf(qStr,
             "INSERT ignore INTO identity_xref"
             " SET object_xref_id = " IDFMTSTR ","
             "     query_identity = %f,"
             "     target_identity = %f",
             Xidt, 
             IdentityXref_getQueryIdentity(idx),
             IdentityXref_getTargetIdentity(idx));

      sth = dbea->prepare((BaseAdaptor *)dbea,qStr,strlen(qStr));
      sth->execute(sth);
      sth->finish(sth);
    }
  } 
  return dbX;    
}