Пример #1
0
int main(int argc, char *argv[]) {
  DBAdaptor *dba;
  Slice *slice;
  SliceAdaptor *sliceA;
  SequenceAdaptor *seqA;

  initEnsC(argc, argv);

  dba = DBAdaptor_new("ensembldb.ensembl.org","anonymous",NULL,"homo_sapiens_core_70_37",5306,NULL);
  //dba = DBAdaptor_new("ens-livemirror.internal.sanger.ac.uk","ensro",NULL,"homo_sapiens_core_70_37",3306,NULL);
  //dba = DBAdaptor_new("genebuild2.internal.sanger.ac.uk","ensadmin","ensembl","steve_hs_testdb",3306,NULL);

  sliceA = DBAdaptor_getSliceAdaptor(dba);
  seqA = DBAdaptor_getSequenceAdaptor(dba);

//
// Test fetch_by_Slice_start_end_strand
//
  slice = SliceAdaptor_fetchByRegion(sliceA,"chromosome",CHR,START,END,STRAND,NULL,0);
  compareComplements(slice, seqA);

  slice = SliceAdaptor_fetchByRegion(sliceA, "clone","AL031658.11", POS_UNDEF, POS_UNDEF, STRAND_UNDEF, NULL, 0);
  compareComplements(slice, seqA);

  slice = SliceAdaptor_fetchByRegion(sliceA, "supercontig","NT_028392", POS_UNDEF, POS_UNDEF, STRAND_UNDEF, NULL, 0);
  compareComplements(slice, seqA);

  slice = SliceAdaptor_fetchByRegion(sliceA, "contig", "AL031658.11.1.162976", POS_UNDEF, POS_UNDEF, STRAND_UNDEF, NULL, 0);
  compareComplements(slice, seqA);

  return 0;
}
Пример #2
0
int main(int argc, char *argv[]) {
  DBAdaptor *dba;
  DBAdaptor *writeDba;
  ProteinAlignFeatureAdaptor *pafa;
  Slice *slice;
  Vector *features;
  int i;
  int failed;
  
  initEnsC(argc, argv);

  dba = Test_initROEnsDB();

  writeDba = Test_initRWEnsDB();

  slice = Test_getStandardSlice(dba);

  ok(1, slice!=NULL);

  pafa = DBAdaptor_getProteinAlignFeatureAdaptor(writeDba);
  SliceAdaptor *sa = DBAdaptor_getSliceAdaptor(dba);

  ok(2, pafa!=NULL);

  //features =  Slice_getAllDNAPepAlignFeatures(slice,NULL,NULL, NULL,NULL);

  //Slice *slice3 = SliceAdaptor_fetchByRegion(sa,"chromosome","1",2,260000000,1,NULL,0);
  Slice *slice2 = SliceAdaptor_fetchByRegion(sa,"chromosome","1",1000000,4000000,1,NULL,0);
  features =  Slice_getAllProteinAlignFeatures(slice2,NULL,NULL, NULL,NULL);

  ok(3, features!=NULL);
  ok(4, Vector_getNumElement(features)!=0);

  ProteinAlignFeatureAdaptor_store((BaseFeatureAdaptor*)pafa, features);

  return 0;
}
Пример #3
0
int main(int argc, char *argv[]) {
  DBAdaptor *      dba;
  StatementHandle *sth;
  ResultRow *      row;
  Vector *         slices;
  int              nSlices;
  htsFile *      out;

  int   argNum = 1;

  char *inFName  = NULL;
  char *outFName = NULL;

  char *dbUser = "******";
  char *dbPass = NULL;
  int   dbPort = 3306;

  char *dbHost = "ens-staging.internal.sanger.ac.uk";
  char *dbName = "homo_sapiens_core_71_37";

  char *assName = "GRCh37";

  char *chrName = "1";


  int flags = 0;
  int   threads  = 1;

  initEnsC(argc, argv);

  while (argNum < argc) {
    char *arg = argv[argNum];
    char *val;

// Ones without a val go here
    if (!strcmp(arg, "-U") || !strcmp(arg,"--ucsc_naming")) {
      flags |= M_UCSC_NAMING;
    } else {
// Ones with a val go in this block
      if (argNum == argc-1) {
        Bamcov_usage();
      }

      val = argv[++argNum];
  
      if (!strcmp(arg, "-i") || !strcmp(arg,"--in_file")) {
        StrUtil_copyString(&inFName,val,0);
      } else if (!strcmp(arg, "-o") || !strcmp(arg,"--out_file")) {
        StrUtil_copyString(&outFName,val,0);
      } else if (!strcmp(arg, "-h") || !strcmp(arg,"--host")) {
        StrUtil_copyString(&dbHost,val,0);
      } else if (!strcmp(arg, "-p") || !strcmp(arg,"--password")) {
        StrUtil_copyString(&dbPass,val,0);
      } else if (!strcmp(arg, "-P") || !strcmp(arg,"--port")) {
        dbPort = atoi(val);
      } else if (!strcmp(arg, "-n") || !strcmp(arg,"--name")) {
        StrUtil_copyString(&dbName,val,0);
      } else if (!strcmp(arg, "-u") || !strcmp(arg,"--user")) {
        StrUtil_copyString(&dbUser,val,0);
      } else if (!strcmp(arg, "-t") || !strcmp(arg,"--threads")) {
        threads = atoi(val);
      } else if (!strcmp(arg, "-a") || !strcmp(arg,"--assembly")) {
        StrUtil_copyString(&assName,val,0);
      } else if (!strcmp(arg, "-v") || !strcmp(arg,"--verbosity")) {
        verbosity = atoi(val);
// Temporary
      } else if (!strcmp(arg, "-c") || !strcmp(arg,"--chromosome")) {
        StrUtil_copyString(&chrName,val,0);
      } else {
        fprintf(stderr,"Error in command line at %s\n\n",arg);
        Bamcov_usage();
      }
    }
    argNum++;
  }

  if (verbosity > 0) {
    printf("Program for calculating read coverage in a BAM file \n"
           "Steve M.J. Searle.  [email protected]  Last update April 2013.\n");
  }

  if (!inFName || !outFName) {
    Bamcov_usage();
  }

  dba = DBAdaptor_new(dbHost,dbUser,dbPass,dbName,dbPort,NULL);

  //nSlices = getSlices(dba, destName);
  nSlices = 1;

  slices = Vector_new();

  SliceAdaptor *sa = DBAdaptor_getSliceAdaptor(dba);

  Slice *slice = SliceAdaptor_fetchByRegion(sa,NULL,chrName,POS_UNDEF,POS_UNDEF,1,NULL, 0);

  Vector_addElement(slices,slice);

  if (Vector_getNumElement(slices) == 0) {
    fprintf(stderr, "Error: No slices.\n");
    exit(1);
  }

  htsFile *in = hts_open(inFName, "rb");
  if (in == 0) {
    fprintf(stderr, "Fail to open BAM file %s\n", inFName);
    return 1;
  }

  hts_set_threads(in, threads);
  hts_idx_t *idx;
  idx = bam_index_load(inFName); // load BAM index
  if (idx == 0) {
    fprintf(stderr, "BAM index file is not available.\n");
    return 1;
  }

  int i;
  for (i=0; i<Vector_getNumElement(slices); i++) {
    Slice *slice = Vector_getElementAt(slices,i);

    if (verbosity > 0) printf("Working on '%s'\n",Slice_getName(slice));

//    if (verbosity > 0) printf("Stage 1 - retrieving annotation from database\n");
//    Vector *genes = getGenes(slice, flags);

    if (verbosity > 0) printf("Stage 1 - calculating coverage\n");
    calcCoverage(inFName, slice, in, idx, flags);
  }


  hts_idx_destroy(idx);
  hts_close(in);

  if (verbosity > 0) printf("Done\n");
  return 0;
}
Пример #4
0
Vector *PredictionTranscriptAdaptor_fetchAllBySlice(PredictionTranscriptAdaptor *pta, Slice *slice, char *logicName, int loadExons) {

  //my $transcripts = $self->SUPER::fetch_all_by_Slice($slice,$logic_name);
  Vector *transcripts = BaseFeatureAdaptor_fetchAllBySlice((BaseFeatureAdaptor *)pta, slice, logicName);

  // if there are 0 or 1 transcripts still do lazy-loading
  if ( ! loadExons || Vector_getNumElement(transcripts) < 2 ) {
    return transcripts;
  }

  // preload all of the exons now, instead of lazy loading later
  // faster than 1 query per transcript

  // get extent of region spanned by transcripts
  long minStart =  2000000000;
  long maxEnd   = -2000000000;

  int i;
  for (i=0; i<Vector_getNumElement(transcripts); i++) {
    PredictionTranscript *t  = Vector_getElementAt(transcripts, i);
    if (PredictionTranscript_getSeqRegionStart((SeqFeature*)t) < minStart) {
      minStart = PredictionTranscript_getSeqRegionStart((SeqFeature*)t);
    }
    if (PredictionTranscript_getSeqRegionEnd((SeqFeature*)t) > maxEnd) {
      maxEnd = PredictionTranscript_getSeqRegionEnd((SeqFeature*)t);
    }
  }

  Slice *extSlice;

  if (minStart >= Slice_getStart(slice) && maxEnd <= Slice_getEnd(slice)) {
    extSlice = slice;
  } else {
    SliceAdaptor *sa = DBAdaptor_getSliceAdaptor(pta->dba);
    extSlice = SliceAdaptor_fetchByRegion(sa, Slice_getCoordSystemName(slice), Slice_getSeqRegionName(slice),
                                          minStart, maxEnd, Slice_getStrand(slice), CoordSystem_getVersion(Slice_getCoordSystem(slice)), 0);
  }

  // associate exon identifiers with transcripts
  IDHash *trHash = IDHash_new(IDHASH_MEDIUM);
  for (i=0; i<Vector_getNumElement(transcripts); i++) {
    PredictionTranscript *t  = Vector_getElementAt(transcripts, i);
    if ( ! IDHash_contains(trHash, PredictionTranscript_getDbID(t))) {
      IDHash_add(trHash, PredictionTranscript_getDbID(t), t);
    }
  }

  IDType *uniqueIds = IDHash_getKeys(trHash);

  char tmpStr[1024];
  char *qStr = NULL;
  if ((qStr = (char *)calloc(655500,sizeof(char))) == NULL) {
    fprintf(stderr,"Failed allocating qStr\n");
    return transcripts;
  }

  int lenNum;
  int endPoint = sprintf(qStr, "SELECT prediction_transcript_id, prediction_exon_id, exon_rank FROM prediction_exon WHERE  prediction_transcript_id IN (");
  for (i=0; i<IDHash_getNumValues(trHash); i++) {
    if (i!=0) {
      qStr[endPoint++] = ',';
      qStr[endPoint++] = ' ';
    }
    lenNum = sprintf(tmpStr,IDFMTSTR,uniqueIds[i]);
    memcpy(&(qStr[endPoint]), tmpStr, lenNum);
    endPoint+=lenNum;
  }
  qStr[endPoint++] = ')';
  qStr[endPoint] = '\0';

  free(uniqueIds);

  StatementHandle *sth = pta->prepare((BaseAdaptor *)pta,qStr,strlen(qStr));
  sth->execute(sth);

  IDHash *exTrHash = IDHash_new(IDHASH_MEDIUM);
  ResultRow *row;
  while ((row = sth->fetchRow(sth))) {
    IDType trId = row->getLongLongAt(row,0);
    IDType exId = row->getLongLongAt(row,1);
    int    rank = row->getIntAt(row,2);

    if (! IDHash_contains(exTrHash, exId)) {
      Vector *vec = Vector_new();
      Vector_setFreeFunc(vec, PredictionTranscriptRankPair_free);
      IDHash_add(exTrHash, exId, vec);
    }
    Vector *exVec = IDHash_getValue(exTrHash, exId);
    PredictionTranscriptRankPair *trp = PredictionTranscriptRankPair_new(IDHash_getValue(trHash, trId), rank);
    Vector_addElement(exVec, trp);
  }

  IDHash_free(trHash, NULL);

  sth->finish(sth);

  PredictionExonAdaptor *pea = DBAdaptor_getPredictionExonAdaptor(pta->dba);
  Vector *exons = PredictionExonAdaptor_fetchAllBySlice(pea, extSlice);

  // move exons onto transcript slice, and add them to transcripts
  for (i=0; i<Vector_getNumElement(exons); i++) {
    PredictionExon *ex = Vector_getElementAt(exons, i);

  // Perl didn't have this line - it was in GeneAdaptor version so I think I'm going to keep it
    if (!IDHash_contains(exTrHash, PredictionExon_getDbID(ex))) continue;

    PredictionExon *newEx;
    if (slice != extSlice) {
      newEx = (PredictionExon*)PredictionExon_transfer((SeqFeature*)ex, slice);
      if (newEx == NULL) {
        fprintf(stderr, "Unexpected. Exon could not be transferred onto PredictionTranscript slice.\n");
        exit(1);
      }
    } else {
      newEx = ex;
    }

    Vector *exVec = IDHash_getValue(exTrHash, PredictionExon_getDbID(newEx));
    int j;
    for (j=0; j<Vector_getNumElement(exVec); j++) {
      PredictionTranscriptRankPair *trp = Vector_getElementAt(exVec, j);
      PredictionTranscript_addExon(trp->transcript, newEx, &trp->rank);
    }
  }

  IDHash_free(exTrHash, Vector_free);
  free(qStr);

  return transcripts;
}
Пример #5
0
int main(int argc, char *argv[]) {
  DBAdaptor *dba;
  GeneAdaptor *ga;
  Slice *slice = NULL;
  Vector *genes = NULL;
  int i = 0;
  int failed = 0;
  
  initEnsC(argc, argv);

//  ProcUtil_showBacktrace(EnsC_progName);

  dba = Test_initROEnsDB();
  slice = Test_getStandardSlice(dba);

//  DBAdaptor *seqdba = DBAdaptor_new("genebuild6.internal.sanger.ac.uk","ensadmin","ensembl","steve_chicken_rnaseq_missing_reference",3306,NULL);
//  dba = DBAdaptor_new("genebuild1.internal.sanger.ac.uk","ensadmin","ensembl","steve_chicken_rnaseq_missing_refined",3306,seqdba);

  ok(1, slice!=NULL);

  ga = DBAdaptor_getGeneAdaptor(dba);
  SliceAdaptor *sa = DBAdaptor_getSliceAdaptor(dba);

  ok(2, ga!=NULL);

  slice = SliceAdaptor_fetchByRegion(sa,"chromosome","20",10000000,50000000,1,NULL,0);
//  slice = SliceAdaptor_fetchByRegion(sa,"chromosome","17",1000000,5000000,1,NULL,0);
//  slice = SliceAdaptor_fetchByRegion(sa,"chromosome","17",1,5000000,1,NULL,0);
// Has a seleno
//  slice = SliceAdaptor_fetchByRegion(sa,"chromosome","1",1000000,27000000,1,NULL,0);
//  slice = SliceAdaptor_fetchByRegion(sa,"chromosome","MT",1,17000,1,NULL,0);
  genes =  Slice_getAllGenes(slice, NULL, NULL, 1, NULL, NULL);

  fprintf(stdout, "Have %d genes\n", Vector_getNumElement(genes));
  ok(3, genes!=NULL);
  ok(4, Vector_getNumElement(genes)!=0);

  failed = dumpGenes(genes, 1);
  ok(5, !failed);

  //Vector *toplevelSlices = SliceAdaptor_fetchAll(sa, "toplevel", NULL, 0);
  Vector *toplevelSlices = SliceAdaptor_fetchAll(sa, "chromosome", NULL, 0);

  for (i=0;i<Vector_getNumElement(toplevelSlices) && !failed;i++) {
    Slice *tlSlice = Vector_getElementAt(toplevelSlices, i);
    fprintf(stderr, "Slice %s\n", Slice_getName(tlSlice));
    genes =  Slice_getAllGenes(tlSlice, NULL, NULL, 1, NULL, NULL);
    fprintf(stderr, "Got %d genes on %s\n", Vector_getNumElement(genes), Slice_getName(tlSlice));
    failed = dumpGenes(genes, 0);
  }

  //tc_malloc_stats();

  fprintf(stderr,"\nEcostring table stats:\n");
  EcoString_getInfo(ecoSTable);

  fprintf(stderr,"\n");
  ProcUtil_timeInfo("at end of GeneTest");

  return 0;
}