int main(int argc, char *argv[]) { DBAdaptor *dba; Slice *slice; SliceAdaptor *sliceA; SequenceAdaptor *seqA; initEnsC(argc, argv); dba = DBAdaptor_new("ensembldb.ensembl.org","anonymous",NULL,"homo_sapiens_core_70_37",5306,NULL); //dba = DBAdaptor_new("ens-livemirror.internal.sanger.ac.uk","ensro",NULL,"homo_sapiens_core_70_37",3306,NULL); //dba = DBAdaptor_new("genebuild2.internal.sanger.ac.uk","ensadmin","ensembl","steve_hs_testdb",3306,NULL); sliceA = DBAdaptor_getSliceAdaptor(dba); seqA = DBAdaptor_getSequenceAdaptor(dba); // // Test fetch_by_Slice_start_end_strand // slice = SliceAdaptor_fetchByRegion(sliceA,"chromosome",CHR,START,END,STRAND,NULL,0); compareComplements(slice, seqA); slice = SliceAdaptor_fetchByRegion(sliceA, "clone","AL031658.11", POS_UNDEF, POS_UNDEF, STRAND_UNDEF, NULL, 0); compareComplements(slice, seqA); slice = SliceAdaptor_fetchByRegion(sliceA, "supercontig","NT_028392", POS_UNDEF, POS_UNDEF, STRAND_UNDEF, NULL, 0); compareComplements(slice, seqA); slice = SliceAdaptor_fetchByRegion(sliceA, "contig", "AL031658.11.1.162976", POS_UNDEF, POS_UNDEF, STRAND_UNDEF, NULL, 0); compareComplements(slice, seqA); return 0; }
int main(int argc, char *argv[]) { DBAdaptor *dba; DBAdaptor *writeDba; ProteinAlignFeatureAdaptor *pafa; Slice *slice; Vector *features; int i; int failed; initEnsC(argc, argv); dba = Test_initROEnsDB(); writeDba = Test_initRWEnsDB(); slice = Test_getStandardSlice(dba); ok(1, slice!=NULL); pafa = DBAdaptor_getProteinAlignFeatureAdaptor(writeDba); SliceAdaptor *sa = DBAdaptor_getSliceAdaptor(dba); ok(2, pafa!=NULL); //features = Slice_getAllDNAPepAlignFeatures(slice,NULL,NULL, NULL,NULL); //Slice *slice3 = SliceAdaptor_fetchByRegion(sa,"chromosome","1",2,260000000,1,NULL,0); Slice *slice2 = SliceAdaptor_fetchByRegion(sa,"chromosome","1",1000000,4000000,1,NULL,0); features = Slice_getAllProteinAlignFeatures(slice2,NULL,NULL, NULL,NULL); ok(3, features!=NULL); ok(4, Vector_getNumElement(features)!=0); ProteinAlignFeatureAdaptor_store((BaseFeatureAdaptor*)pafa, features); return 0; }
int main(int argc, char *argv[]) { DBAdaptor * dba; StatementHandle *sth; ResultRow * row; Vector * slices; int nSlices; htsFile * out; int argNum = 1; char *inFName = NULL; char *outFName = NULL; char *dbUser = "******"; char *dbPass = NULL; int dbPort = 3306; char *dbHost = "ens-staging.internal.sanger.ac.uk"; char *dbName = "homo_sapiens_core_71_37"; char *assName = "GRCh37"; char *chrName = "1"; int flags = 0; int threads = 1; initEnsC(argc, argv); while (argNum < argc) { char *arg = argv[argNum]; char *val; // Ones without a val go here if (!strcmp(arg, "-U") || !strcmp(arg,"--ucsc_naming")) { flags |= M_UCSC_NAMING; } else { // Ones with a val go in this block if (argNum == argc-1) { Bamcov_usage(); } val = argv[++argNum]; if (!strcmp(arg, "-i") || !strcmp(arg,"--in_file")) { StrUtil_copyString(&inFName,val,0); } else if (!strcmp(arg, "-o") || !strcmp(arg,"--out_file")) { StrUtil_copyString(&outFName,val,0); } else if (!strcmp(arg, "-h") || !strcmp(arg,"--host")) { StrUtil_copyString(&dbHost,val,0); } else if (!strcmp(arg, "-p") || !strcmp(arg,"--password")) { StrUtil_copyString(&dbPass,val,0); } else if (!strcmp(arg, "-P") || !strcmp(arg,"--port")) { dbPort = atoi(val); } else if (!strcmp(arg, "-n") || !strcmp(arg,"--name")) { StrUtil_copyString(&dbName,val,0); } else if (!strcmp(arg, "-u") || !strcmp(arg,"--user")) { StrUtil_copyString(&dbUser,val,0); } else if (!strcmp(arg, "-t") || !strcmp(arg,"--threads")) { threads = atoi(val); } else if (!strcmp(arg, "-a") || !strcmp(arg,"--assembly")) { StrUtil_copyString(&assName,val,0); } else if (!strcmp(arg, "-v") || !strcmp(arg,"--verbosity")) { verbosity = atoi(val); // Temporary } else if (!strcmp(arg, "-c") || !strcmp(arg,"--chromosome")) { StrUtil_copyString(&chrName,val,0); } else { fprintf(stderr,"Error in command line at %s\n\n",arg); Bamcov_usage(); } } argNum++; } if (verbosity > 0) { printf("Program for calculating read coverage in a BAM file \n" "Steve M.J. Searle. [email protected] Last update April 2013.\n"); } if (!inFName || !outFName) { Bamcov_usage(); } dba = DBAdaptor_new(dbHost,dbUser,dbPass,dbName,dbPort,NULL); //nSlices = getSlices(dba, destName); nSlices = 1; slices = Vector_new(); SliceAdaptor *sa = DBAdaptor_getSliceAdaptor(dba); Slice *slice = SliceAdaptor_fetchByRegion(sa,NULL,chrName,POS_UNDEF,POS_UNDEF,1,NULL, 0); Vector_addElement(slices,slice); if (Vector_getNumElement(slices) == 0) { fprintf(stderr, "Error: No slices.\n"); exit(1); } htsFile *in = hts_open(inFName, "rb"); if (in == 0) { fprintf(stderr, "Fail to open BAM file %s\n", inFName); return 1; } hts_set_threads(in, threads); hts_idx_t *idx; idx = bam_index_load(inFName); // load BAM index if (idx == 0) { fprintf(stderr, "BAM index file is not available.\n"); return 1; } int i; for (i=0; i<Vector_getNumElement(slices); i++) { Slice *slice = Vector_getElementAt(slices,i); if (verbosity > 0) printf("Working on '%s'\n",Slice_getName(slice)); // if (verbosity > 0) printf("Stage 1 - retrieving annotation from database\n"); // Vector *genes = getGenes(slice, flags); if (verbosity > 0) printf("Stage 1 - calculating coverage\n"); calcCoverage(inFName, slice, in, idx, flags); } hts_idx_destroy(idx); hts_close(in); if (verbosity > 0) printf("Done\n"); return 0; }
Vector *PredictionTranscriptAdaptor_fetchAllBySlice(PredictionTranscriptAdaptor *pta, Slice *slice, char *logicName, int loadExons) { //my $transcripts = $self->SUPER::fetch_all_by_Slice($slice,$logic_name); Vector *transcripts = BaseFeatureAdaptor_fetchAllBySlice((BaseFeatureAdaptor *)pta, slice, logicName); // if there are 0 or 1 transcripts still do lazy-loading if ( ! loadExons || Vector_getNumElement(transcripts) < 2 ) { return transcripts; } // preload all of the exons now, instead of lazy loading later // faster than 1 query per transcript // get extent of region spanned by transcripts long minStart = 2000000000; long maxEnd = -2000000000; int i; for (i=0; i<Vector_getNumElement(transcripts); i++) { PredictionTranscript *t = Vector_getElementAt(transcripts, i); if (PredictionTranscript_getSeqRegionStart((SeqFeature*)t) < minStart) { minStart = PredictionTranscript_getSeqRegionStart((SeqFeature*)t); } if (PredictionTranscript_getSeqRegionEnd((SeqFeature*)t) > maxEnd) { maxEnd = PredictionTranscript_getSeqRegionEnd((SeqFeature*)t); } } Slice *extSlice; if (minStart >= Slice_getStart(slice) && maxEnd <= Slice_getEnd(slice)) { extSlice = slice; } else { SliceAdaptor *sa = DBAdaptor_getSliceAdaptor(pta->dba); extSlice = SliceAdaptor_fetchByRegion(sa, Slice_getCoordSystemName(slice), Slice_getSeqRegionName(slice), minStart, maxEnd, Slice_getStrand(slice), CoordSystem_getVersion(Slice_getCoordSystem(slice)), 0); } // associate exon identifiers with transcripts IDHash *trHash = IDHash_new(IDHASH_MEDIUM); for (i=0; i<Vector_getNumElement(transcripts); i++) { PredictionTranscript *t = Vector_getElementAt(transcripts, i); if ( ! IDHash_contains(trHash, PredictionTranscript_getDbID(t))) { IDHash_add(trHash, PredictionTranscript_getDbID(t), t); } } IDType *uniqueIds = IDHash_getKeys(trHash); char tmpStr[1024]; char *qStr = NULL; if ((qStr = (char *)calloc(655500,sizeof(char))) == NULL) { fprintf(stderr,"Failed allocating qStr\n"); return transcripts; } int lenNum; int endPoint = sprintf(qStr, "SELECT prediction_transcript_id, prediction_exon_id, exon_rank FROM prediction_exon WHERE prediction_transcript_id IN ("); for (i=0; i<IDHash_getNumValues(trHash); i++) { if (i!=0) { qStr[endPoint++] = ','; qStr[endPoint++] = ' '; } lenNum = sprintf(tmpStr,IDFMTSTR,uniqueIds[i]); memcpy(&(qStr[endPoint]), tmpStr, lenNum); endPoint+=lenNum; } qStr[endPoint++] = ')'; qStr[endPoint] = '\0'; free(uniqueIds); StatementHandle *sth = pta->prepare((BaseAdaptor *)pta,qStr,strlen(qStr)); sth->execute(sth); IDHash *exTrHash = IDHash_new(IDHASH_MEDIUM); ResultRow *row; while ((row = sth->fetchRow(sth))) { IDType trId = row->getLongLongAt(row,0); IDType exId = row->getLongLongAt(row,1); int rank = row->getIntAt(row,2); if (! IDHash_contains(exTrHash, exId)) { Vector *vec = Vector_new(); Vector_setFreeFunc(vec, PredictionTranscriptRankPair_free); IDHash_add(exTrHash, exId, vec); } Vector *exVec = IDHash_getValue(exTrHash, exId); PredictionTranscriptRankPair *trp = PredictionTranscriptRankPair_new(IDHash_getValue(trHash, trId), rank); Vector_addElement(exVec, trp); } IDHash_free(trHash, NULL); sth->finish(sth); PredictionExonAdaptor *pea = DBAdaptor_getPredictionExonAdaptor(pta->dba); Vector *exons = PredictionExonAdaptor_fetchAllBySlice(pea, extSlice); // move exons onto transcript slice, and add them to transcripts for (i=0; i<Vector_getNumElement(exons); i++) { PredictionExon *ex = Vector_getElementAt(exons, i); // Perl didn't have this line - it was in GeneAdaptor version so I think I'm going to keep it if (!IDHash_contains(exTrHash, PredictionExon_getDbID(ex))) continue; PredictionExon *newEx; if (slice != extSlice) { newEx = (PredictionExon*)PredictionExon_transfer((SeqFeature*)ex, slice); if (newEx == NULL) { fprintf(stderr, "Unexpected. Exon could not be transferred onto PredictionTranscript slice.\n"); exit(1); } } else { newEx = ex; } Vector *exVec = IDHash_getValue(exTrHash, PredictionExon_getDbID(newEx)); int j; for (j=0; j<Vector_getNumElement(exVec); j++) { PredictionTranscriptRankPair *trp = Vector_getElementAt(exVec, j); PredictionTranscript_addExon(trp->transcript, newEx, &trp->rank); } } IDHash_free(exTrHash, Vector_free); free(qStr); return transcripts; }
int main(int argc, char *argv[]) { DBAdaptor *dba; GeneAdaptor *ga; Slice *slice = NULL; Vector *genes = NULL; int i = 0; int failed = 0; initEnsC(argc, argv); // ProcUtil_showBacktrace(EnsC_progName); dba = Test_initROEnsDB(); slice = Test_getStandardSlice(dba); // DBAdaptor *seqdba = DBAdaptor_new("genebuild6.internal.sanger.ac.uk","ensadmin","ensembl","steve_chicken_rnaseq_missing_reference",3306,NULL); // dba = DBAdaptor_new("genebuild1.internal.sanger.ac.uk","ensadmin","ensembl","steve_chicken_rnaseq_missing_refined",3306,seqdba); ok(1, slice!=NULL); ga = DBAdaptor_getGeneAdaptor(dba); SliceAdaptor *sa = DBAdaptor_getSliceAdaptor(dba); ok(2, ga!=NULL); slice = SliceAdaptor_fetchByRegion(sa,"chromosome","20",10000000,50000000,1,NULL,0); // slice = SliceAdaptor_fetchByRegion(sa,"chromosome","17",1000000,5000000,1,NULL,0); // slice = SliceAdaptor_fetchByRegion(sa,"chromosome","17",1,5000000,1,NULL,0); // Has a seleno // slice = SliceAdaptor_fetchByRegion(sa,"chromosome","1",1000000,27000000,1,NULL,0); // slice = SliceAdaptor_fetchByRegion(sa,"chromosome","MT",1,17000,1,NULL,0); genes = Slice_getAllGenes(slice, NULL, NULL, 1, NULL, NULL); fprintf(stdout, "Have %d genes\n", Vector_getNumElement(genes)); ok(3, genes!=NULL); ok(4, Vector_getNumElement(genes)!=0); failed = dumpGenes(genes, 1); ok(5, !failed); //Vector *toplevelSlices = SliceAdaptor_fetchAll(sa, "toplevel", NULL, 0); Vector *toplevelSlices = SliceAdaptor_fetchAll(sa, "chromosome", NULL, 0); for (i=0;i<Vector_getNumElement(toplevelSlices) && !failed;i++) { Slice *tlSlice = Vector_getElementAt(toplevelSlices, i); fprintf(stderr, "Slice %s\n", Slice_getName(tlSlice)); genes = Slice_getAllGenes(tlSlice, NULL, NULL, 1, NULL, NULL); fprintf(stderr, "Got %d genes on %s\n", Vector_getNumElement(genes), Slice_getName(tlSlice)); failed = dumpGenes(genes, 0); } //tc_malloc_stats(); fprintf(stderr,"\nEcostring table stats:\n"); EcoString_getInfo(ecoSTable); fprintf(stderr,"\n"); ProcUtil_timeInfo("at end of GeneTest"); return 0; }