int main(int argc,char **argv) { int count; bode::Interval *i; std::string readsFN(argv[1]); std::string inFN(argv[2]); std::string outFN(argv[3]); int target[16384]; bode::IntervalReader *reads = bode::IntervalReader::open(readsFN); bode::BedReader *in = new bode::BedReader(inFN); std::ofstream *out = new std::ofstream(outFN.c_str(),std::ofstream::out); bode::IntervalSet *is = loadReads(reads); reads->close(); while (i=in->nextI()) { int left = i->left(); int right = i->right(); if (right - left > 16384) { std::cerr << "over-wide interval: "<<i->chrom()<<":"<<left<<"-"<<right<<std::endl; } else { is->density(i->chrom(),left,right,target); *out <<i->chrom()<<":"<<left<<"-"<<right; for (int j=0;j<right-left;j++) { *out << " " << target[j]; } *out << std::endl; } count++; } in->close(); out->close(); std::cerr << "processed " << count << " intervals" << std::endl; return 0; }
struct read* readsGetNext(Reads* reads) { struct read* result; // Do we need to load more reads? if(reads->current % BATCH_SIZE == 0) { loadReads(reads); } result = &(reads->readData[reads->current % BATCH_SIZE]); reads->current = reads->current + 1; return result; }
int main(int argc, char **argv) { char *gkpStoreName = NULL; char *outPrefix = NULL; uint32 minReadLength = 0; uint32 firstFileArg = 0; char errorLogName[FILENAME_MAX]; char htmlLogName[FILENAME_MAX]; char nameMapName[FILENAME_MAX]; argc = AS_configure(argc, argv); int arg = 1; int err = 0; while (arg < argc) { if (strcmp(argv[arg], "-o") == 0) { gkpStoreName = argv[++arg]; } else if (strcmp(argv[arg], "-minlength") == 0) { minReadLength = atoi(argv[++arg]); } else if (strcmp(argv[arg], "--") == 0) { firstFileArg = arg++; break; } else if (argv[arg][0] == '-') { fprintf(stderr, "ERROR: unknown option '%s'\n", argv[arg]); err++; } else { firstFileArg = arg; break; } arg++; } if (gkpStoreName == NULL) err++; if (firstFileArg == 0) err++; if (err) { fprintf(stderr, "usage: %s [...] -o gkpStore\n", argv[0]); fprintf(stderr, " -o gkpStore create this gkpStore\n"); fprintf(stderr, " \n"); fprintf(stderr, " -minlength L discard reads shorter than L\n"); fprintf(stderr, " \n"); fprintf(stderr, " \n"); if (gkpStoreName == NULL) fprintf(stderr, "ERROR: no gkpStore (-g) supplied.\n"); if (firstFileArg == 0) fprintf(stderr, "ERROR: no input files supplied.\n"); exit(1); } gkStore *gkpStore = gkStore::gkStore_open(gkpStoreName, gkStore_extend); gkRead *gkpRead = NULL; gkLibrary *gkpLibrary = NULL; uint32 gkpFileID = 0; // Used for HTML output, an ID for each file loaded. uint32 inLineLen = 1024; char inLine[1024] = { 0 }; validSeq['a'] = validSeq['c'] = validSeq['g'] = validSeq['t'] = validSeq['n'] = 1; validSeq['A'] = validSeq['C'] = validSeq['G'] = validSeq['T'] = validSeq['N'] = 1; errno = 0; sprintf(errorLogName, "%s/errorLog", gkpStoreName); FILE *errorLog = fopen(errorLogName, "w"); if (errno) fprintf(stderr, "ERROR: cannot open error file '%s': %s\n", errorLogName, strerror(errno)), exit(1); sprintf(htmlLogName, "%s/load.dat", gkpStoreName); FILE *htmlLog = fopen(htmlLogName, "w"); if (errno) fprintf(stderr, "ERROR: cannot open uid map file '%s': %s\n", htmlLogName, strerror(errno)), exit(1); sprintf(nameMapName, "%s/readNames.txt", gkpStoreName); FILE *nameMap = fopen(nameMapName, "w"); if (errno) fprintf(stderr, "ERROR: cannot open uid map file '%s': %s\n", nameMapName, strerror(errno)), exit(1); uint32 nERROR = 0; // There aren't any errors, we just exit fatally if encountered. uint32 nWARNS = 0; uint32 nLOADED = 0; // Reads loaded uint64 bLOADED = 0; // Bases loaded uint32 nSKIPPED = 0; uint64 bSKIPPED = 0; // Bases not loaded, too short #if 0 fprintf(htmlLog, "<!DOCTYPE html>\n"); fprintf(htmlLog, "<html>\n"); fprintf(htmlLog, "<head>\n"); fprintf(htmlLog, "<title>gatekeeper load statistics</title>\n"); fprintf(htmlLog, "<style type='text/css'>\n"); fprintf(htmlLog, "body { font-family: Helvetica, Verdana, sans-serif; }\n"); fprintf(htmlLog, "h1, h2 { color: #ee3e80; }\n"); fprintf(htmlLog, "p { color: #665544; }\n"); fprintf(htmlLog, "th, td { border: 1px solid #111111; padding: 2px 2px 2px 2px; }\n"); fprintf(htmlLog, "td:hover { background-color: #e4e4e4; }\n"); fprintf(htmlLog, "th:hover { background-color: #d4d4d4; }\n"); fprintf(htmlLog, "tr.details { visibility: collapse; }\n"); fprintf(htmlLog, "</style>\n"); fprintf(htmlLog, "</head>\n"); fprintf(htmlLog, "<body>\n"); fprintf(htmlLog, "<h2>Input Files</h2>\n"); fprintf(htmlLog, "<table>\n"); #endif for (; firstFileArg < argc; firstFileArg++) { fprintf(stderr, "\n"); fprintf(stderr, "Starting file '%s'.\n", argv[firstFileArg]); compressedFileReader *inFile = new compressedFileReader(argv[firstFileArg]); char *line = new char [10240]; KeyAndValue keyval; while (fgets(line, 10240, inFile->file()) != NULL) { chomp(line); keyval.find(line); if (keyval.key() == NULL) { // No key, so must be a comment or blank line continue; } if (strcasecmp(keyval.key(), "name") == 0) { gkpLibrary = gkpStore->gkStore_addEmptyLibrary(keyval.value()); continue; } // We'd better have a gkpLibrary defined, if not, the .gkp input file is incorrect. if (gkpLibrary == NULL) { fprintf(stderr, "WARNING: no 'name' tag in gkp input; creating library with name 'DEFAULT'.\n"); gkpLibrary = gkpStore->gkStore_addEmptyLibrary(keyval.value()); nWARNS++; } if (strcasecmp(keyval.key(), "preset") == 0) { gkpLibrary->gkLibrary_parsePreset(keyval.value()); } else if (strcasecmp(keyval.key(), "qv") == 0) { gkpLibrary->gkLibrary_setDefaultQV(keyval.value_double()); } else if (strcasecmp(keyval.key(), "isNonRandom") == 0) { gkpLibrary->gkLibrary_setIsNonRandom(keyval.value_bool()); } else if (strcasecmp(keyval.key(), "trustHomopolymerRuns") == 0) { gkpLibrary->gkLibrary_setTrustHomopolymerRuns(keyval.value_bool()); } else if (strcasecmp(keyval.key(), "removeDuplicateReads") == 0) { gkpLibrary->gkLibrary_setRemoveDuplicateReads(keyval.value_bool()); } else if (strcasecmp(keyval.key(), "finalTrim") == 0) { gkpLibrary->gkLibrary_setFinalTrim(keyval.value()); } else if (strcasecmp(keyval.key(), "removeSpurReads") == 0) { gkpLibrary->gkLibrary_setRemoveSpurReads(keyval.value_bool()); } else if (strcasecmp(keyval.key(), "removeChimericReads") == 0) { gkpLibrary->gkLibrary_setRemoveChimericReads(keyval.value_bool()); } else if (strcasecmp(keyval.key(), "checkForSubReads") == 0) { gkpLibrary->gkLibrary_setCheckForSubReads(keyval.value_bool()); } else if (AS_UTL_fileExists(keyval.key(), false, false)) { loadReads(gkpStore, gkpLibrary, gkpFileID++, minReadLength, nameMap, htmlLog, errorLog, keyval.key(), nWARNS, nLOADED, bLOADED, nSKIPPED, bSKIPPED); } else { fprintf(stderr, "ERROR: option '%s' not recognized, and not a file of reads.\n", line); exit(1); } } delete inFile; delete [] line; } #if 0 fprintf(htmlLog, "</table>\n"); #endif gkpStore->gkStore_close(); fclose(nameMap); fclose(errorLog); fprintf(stderr, "\n"); fprintf(stderr, "Finished with:\n"); fprintf(stderr, " "F_U32" warnings (bad base or qv)\n", nWARNS); fprintf(stderr, "\n"); fprintf(stderr, "Read from inputs:\n"); fprintf(stderr, " "F_U64" bp.\n", bLOADED); fprintf(stderr, " "F_U32" reads.\n", nLOADED); fprintf(stderr, "\n"); fprintf(stderr, "Loaded into store:\n"); fprintf(stderr, " "F_U64" bp.\n", bLOADED); fprintf(stderr, " "F_U32" reads.\n", nLOADED); fprintf(stderr, "\n"); fprintf(stderr, "Skipped (too short):\n"); fprintf(stderr, " "F_U64" bp (%.4f%%).\n", bSKIPPED, 100.0 * bSKIPPED / (bSKIPPED + bLOADED)); fprintf(stderr, " "F_U32" reads (%.4f%%).\n", nSKIPPED, 100.0 * nSKIPPED / (nSKIPPED + nLOADED)); fprintf(stderr, "\n"); fprintf(stderr, "\n"); #if 0 fprintf(htmlLog, "\n"); fprintf(htmlLog, "<h2>Final Store</h2>\n"); fprintf(htmlLog, "<table>\n"); fprintf(htmlLog, "<tr><td colspan='2'>%s</td></tr>\n", gkpStoreName); fprintf(htmlLog, "<tr><td>readsLoaded</td><td>"F_U32" reads ("F_U64" bp)</td></tr>\n", nLOADED, bLOADED); fprintf(htmlLog, "<tr><td>readsSkipped</td><td>"F_U32" reads ("F_U64" bp) (read was too short)</td></tr>\n", nSKIPPED, bSKIPPED); fprintf(htmlLog, "<tr><td>warnings</td><td>"F_U32" warnings (invalid base or quality value)</td></tr>\n", nWARNS); fprintf(htmlLog, "</table>\n"); fprintf(htmlLog, "\n"); fprintf(htmlLog, "<script type='text/javascript'>\n"); fprintf(htmlLog, "var toggleOne = function() {\n"); fprintf(htmlLog, " var table = this.closest('table');\n"); fprintf(htmlLog, " var elts = table.querySelectorAll('.details');\n"); fprintf(htmlLog, "\n"); fprintf(htmlLog, " for (var i=0; i<elts.length; i++) {\n"); fprintf(htmlLog, " if (!elts[i].enabled) {\n"); fprintf(htmlLog, " elts[i].enabled = true;\n"); fprintf(htmlLog, " elts[i].style.visibility = 'visible';\n"); fprintf(htmlLog, " } else {\n"); fprintf(htmlLog, " elts[i].enabled = false;\n"); fprintf(htmlLog, " elts[i].style.visibility = 'collapse';\n"); fprintf(htmlLog, " }\n"); fprintf(htmlLog, " }\n"); fprintf(htmlLog, "}\n"); fprintf(htmlLog, "\n"); for (uint32 ii=0; ii<gkpFileID; ii++) { fprintf(htmlLog, "document.getElementById('gkpload%u').onclick = toggleOne;\n", ii); fprintf(htmlLog, "document.getElementById('gkpload%u').style = 'cursor: pointer;';\n", ii); } fprintf(htmlLog, "</script>\n"); fprintf(htmlLog, "\n"); fprintf(htmlLog, "</body>\n"); fprintf(htmlLog, "</html>\n"); #else fprintf(htmlLog, "sum "F_U32" "F_U64" "F_U32" "F_U64" "F_U32"\n", nLOADED, bLOADED, nSKIPPED, bSKIPPED, nWARNS); #endif fclose(htmlLog); if (nERROR > 0) fprintf(stderr, "gatekeeperCreate did NOT finish successfully; too many errors.\n"); if (bSKIPPED > 0.25 * (bSKIPPED + bLOADED)) fprintf(stderr, "gatekeeperCreate did NOT finish successfully; too many bases skipped. Check your reads.\n"); if (nWARNS > 0.25 * (nLOADED)) fprintf(stderr, "gatekeeperCreate did NOT finish successfully; too many warnings. Check your reads.\n"); if (nSKIPPED > 0.50 * (nLOADED)) fprintf(stderr, "gatekeeperCreate did NOT finish successfully; too many short reads. Check your reads!\n"); if ((nERROR > 0) || (bSKIPPED > 0.25 * (bSKIPPED + bLOADED)) || (nWARNS > 0.25 * (nSKIPPED + nLOADED)) || (nSKIPPED > 0.50 * (nSKIPPED + nLOADED))) exit(1); fprintf(stderr, "gatekeeperCreate finished successfully.\n"); exit(0); }
int main(int argc,char **argv) { int count,j,s; int norm; bode::Interval *i; std::string readsFN; std::string inFN; std::string outFN; std::string inputFN; int readsTotal,inputTotal; double readsScale,inputScale; int *target; int bins,binwidth; bode::Interval z; bode::Flags *fset; fset = processCmdLine(argc,argv); norm = fset->getInt("norm"); bins = fset->getInt("bins"); binwidth = fset->getInt("binwidth"); inputFN = fset->getStr("input"); readsFN = fset->positionalArgs()[1]; inFN = fset->positionalArgs()[2]; outFN = fset->positionalArgs()[3]; // width = getBedWidth(inFN); target = new int[bins]; bode::IntervalReader *reads = bode::IntervalReader::open(readsFN); bode::CffReader *in = new bode::CffReader(inFN); std::ofstream *out = new std::ofstream(outFN.c_str(),std::ofstream::out); bode::IntervalReader *input = NULL; if (inputFN != "") { input = bode::IntervalReader::open(inputFN); } *out << "GID\tscore\tcoordinates"; s = 1; while (s <= bins) { *out << "\t" << s; s++; } *out << std::endl; fprintf(stderr,"loading reads...\n"); bode::IntervalSet *is = loadReads(reads,fset); readsTotal = is->count(); fprintf(stderr,"loaded %d reads.\n",readsTotal); readsScale = ((double) norm) / (double) readsTotal; bode::IntervalSet *background = NULL; if (input != NULL) { fprintf(stderr,"loading input...\n"); background = loadReads(input,fset); inputTotal = background->count(); fprintf(stderr,"loaded %d input reads.\n",inputTotal); inputScale = ((double) norm) / (double) inputTotal; } reads->close(); count = 0; fprintf(stderr,"processing intervals...\n"); while ((i=in->nextI())) { bode::Bed *bi = static_cast<bode::Bed*>(i); count++; if (count % 1000 == 0) { fprintf(stderr,"%9d\r",count); } for (j=0;j<bins;j++) { target[j] = 0; } int left = bi->left() - (bins / 2) * binwidth; int right = bi->left() + (bins / 2) * binwidth; // is->density(i->chrom(),left,right,target); (*out) <<bi->name() << "\t"<<bi->score()<<"\t"<<bi->chrom()<<":"<<bi->left(); s = left; while (s < right) { z.update(i->chrom(),s,s+binwidth); int c = is->overlapping(&z); int bk; double ncbk; if (background != NULL) { bk = background->overlapping(&z); ncbk = (c * readsScale) - (bk * inputScale); } else { ncbk = c * readsScale; } *out << "\t" << ncbk; s += binwidth; } /* for (int j=0;j<right-left;j++) { *out << "\t" << target[j]; } */ *out << std::endl; } in->close(); out->close(); if (input != NULL) { input->close(); } std::cerr << "processed " << count << " intervals." << std::endl; return 0; }
int main(int argc,char **argv) { int count,j,s; int norm; bode::Interval *i; bode::Bed *b; std::string readsFN; std::string inFN; std::string outFN; int readsTotal; double readsScale; int *target; bode::Flags *fset; fset = processCmdLine(argc,argv); readsFN = fset->positionalArgs()[1]; inFN = fset->positionalArgs()[2]; outFN = fset->positionalArgs()[3]; norm = fset->getInt("norm"); target = new int[MAXINTERVAL]; bode::IntervalReader *reads = bode::IntervalReader::open(readsFN); bode::BedReader *in = new bode::BedReader(inFN); std::ofstream *out = new std::ofstream(outFN.c_str(),std::ofstream::out); fprintf(stderr,"loading reads...\n"); bode::IntervalSet *is = loadReads(reads,fset); readsTotal = is->count(); fprintf(stderr,"loaded %d reads.\n",readsTotal); readsScale = ((double) norm) / (double) readsTotal; reads->close(); count = 0; fprintf(stderr,"processing intervals...\n"); while ((i=in->nextI())) { count++; if (count % 1000 == 0) { fprintf(stderr,"%9d\r",count); } int width = i->right() - i->left(); for (j=0;j<width;j++) { target[j] = 0; } is->density(i->chrom(),i->left(),i->right(),target); if (i->strand() == '-') { for (int k=0;k<width/2;k++) { int x = target[k]; target[k] = target[width-k-1]; target[width-k-1] = x; } } double nc = target[0] * readsScale; b = static_cast<bode::Bed *>(i); *out << b->name(); for (s=0;s<width;s++) { nc = target[s] * readsScale; *out << "\t" << nc; } *out << std::endl; } in->close(); out->close(); std::cerr << "processed " << count << " intervals." << std::endl; return 0; }