Пример #1
0
int main(int argc,char **argv) {
  int count;
  bode::Interval *i;
  std::string readsFN(argv[1]);
  std::string inFN(argv[2]);
  std::string outFN(argv[3]);
  int target[16384];

  bode::IntervalReader *reads = bode::IntervalReader::open(readsFN);
  bode::BedReader *in = new bode::BedReader(inFN);
  std::ofstream *out = new std::ofstream(outFN.c_str(),std::ofstream::out);
  
  bode::IntervalSet *is = loadReads(reads);
  reads->close();
  while (i=in->nextI()) {
    int left = i->left();
    int right = i->right();
    if (right - left > 16384) {
      std::cerr << "over-wide interval: "<<i->chrom()<<":"<<left<<"-"<<right<<std::endl;
    } else {
      is->density(i->chrom(),left,right,target);
      *out <<i->chrom()<<":"<<left<<"-"<<right;
      for (int j=0;j<right-left;j++) {
        *out << " " << target[j];
      }
      *out << std::endl;
    }
    count++;
  }
  in->close();
  out->close();

  std::cerr << "processed " << count << " intervals" << std::endl;
  return 0;
}
Пример #2
0
struct read* readsGetNext(Reads* reads)
{
    struct read* result;
    
    // Do we need to load more reads?
    if(reads->current % BATCH_SIZE == 0)
    {
        loadReads(reads);
    }
    
    result = &(reads->readData[reads->current % BATCH_SIZE]);
    reads->current = reads->current + 1;    
    
    return result;
}
Пример #3
0
int
main(int argc, char **argv) {
  char            *gkpStoreName      = NULL;
  char            *outPrefix         = NULL;

  uint32           minReadLength     = 0;

  uint32           firstFileArg      = 0;

  char             errorLogName[FILENAME_MAX];
  char             htmlLogName[FILENAME_MAX];
  char             nameMapName[FILENAME_MAX];

  argc = AS_configure(argc, argv);

  int arg = 1;
  int err = 0;
  while (arg < argc) {
    if        (strcmp(argv[arg], "-o") == 0) {
      gkpStoreName = argv[++arg];

    } else if (strcmp(argv[arg], "-minlength") == 0) {
      minReadLength = atoi(argv[++arg]);

    } else if (strcmp(argv[arg], "--") == 0) {
      firstFileArg = arg++;
      break;

    } else if (argv[arg][0] == '-') {
      fprintf(stderr, "ERROR: unknown option '%s'\n", argv[arg]);
      err++;

    } else {
      firstFileArg = arg;
      break;
    }
    arg++;
  }

  if (gkpStoreName == NULL)
    err++;
  if (firstFileArg == 0)
    err++;

  if (err) {
    fprintf(stderr, "usage: %s [...] -o gkpStore\n", argv[0]);
    fprintf(stderr, "  -o gkpStore         create this gkpStore\n");
    fprintf(stderr, "  \n");
    fprintf(stderr, "  -minlength L        discard reads shorter than L\n");
    fprintf(stderr, "  \n");
    fprintf(stderr, "  \n");

    if (gkpStoreName == NULL)
      fprintf(stderr, "ERROR: no gkpStore (-g) supplied.\n");
    if (firstFileArg == 0)
      fprintf(stderr, "ERROR: no input files supplied.\n");

    exit(1);
  }


  gkStore     *gkpStore     = gkStore::gkStore_open(gkpStoreName, gkStore_extend);
  gkRead      *gkpRead      = NULL;
  gkLibrary   *gkpLibrary   = NULL;
  uint32       gkpFileID    = 0;      //  Used for HTML output, an ID for each file loaded.

  uint32       inLineLen    = 1024;
  char         inLine[1024] = { 0 };

  validSeq['a'] = validSeq['c'] = validSeq['g'] = validSeq['t'] = validSeq['n'] = 1;
  validSeq['A'] = validSeq['C'] = validSeq['G'] = validSeq['T'] = validSeq['N'] = 1;

  errno = 0;

  sprintf(errorLogName, "%s/errorLog",    gkpStoreName);
  FILE    *errorLog = fopen(errorLogName, "w");
  if (errno)
    fprintf(stderr, "ERROR:  cannot open error file '%s': %s\n", errorLogName, strerror(errno)), exit(1);

  sprintf(htmlLogName,   "%s/load.dat", gkpStoreName);
  FILE    *htmlLog   = fopen(htmlLogName,   "w");
  if (errno)
    fprintf(stderr, "ERROR:  cannot open uid map file '%s': %s\n", htmlLogName, strerror(errno)), exit(1);

  sprintf(nameMapName,   "%s/readNames.txt", gkpStoreName);
  FILE    *nameMap   = fopen(nameMapName,   "w");
  if (errno)
    fprintf(stderr, "ERROR:  cannot open uid map file '%s': %s\n", nameMapName, strerror(errno)), exit(1);

  uint32  nERROR   = 0;  //  There aren't any errors, we just exit fatally if encountered.
  uint32  nWARNS   = 0;

  uint32  nLOADED  = 0;  //  Reads loaded
  uint64  bLOADED  = 0;  //  Bases loaded

  uint32  nSKIPPED = 0;
  uint64  bSKIPPED = 0;  //  Bases not loaded, too short

#if 0
  fprintf(htmlLog, "<!DOCTYPE html>\n");
  fprintf(htmlLog, "<html>\n");
  fprintf(htmlLog, "<head>\n");
  fprintf(htmlLog, "<title>gatekeeper load statistics</title>\n");
  fprintf(htmlLog, "<style type='text/css'>\n");
  fprintf(htmlLog, "body       { font-family: Helvetica, Verdana, sans-serif; }\n");
  fprintf(htmlLog, "h1, h2     { color: #ee3e80; }\n");
  fprintf(htmlLog, "p          { color: #665544; }\n");
  fprintf(htmlLog, "th, td     { border: 1px solid #111111; padding: 2px 2px 2px 2px; }\n");
  fprintf(htmlLog, "td:hover   { background-color: #e4e4e4; }\n");
  fprintf(htmlLog, "th:hover   { background-color: #d4d4d4; }\n");
  fprintf(htmlLog, "tr.details { visibility: collapse; }\n");
  fprintf(htmlLog, "</style>\n");
  fprintf(htmlLog, "</head>\n");
  fprintf(htmlLog, "<body>\n");
  fprintf(htmlLog, "<h2>Input Files</h2>\n");
  fprintf(htmlLog, "<table>\n");
#endif

  for (; firstFileArg < argc; firstFileArg++) {
    fprintf(stderr, "\n");
    fprintf(stderr, "Starting file '%s'.\n", argv[firstFileArg]);

    compressedFileReader *inFile = new compressedFileReader(argv[firstFileArg]);
    char                 *line   = new char [10240];
    KeyAndValue           keyval;

    while (fgets(line, 10240, inFile->file()) != NULL) {
      chomp(line);
      keyval.find(line);

      if (keyval.key() == NULL) {
        //  No key, so must be a comment or blank line
        continue;
      }

      if (strcasecmp(keyval.key(), "name") == 0) {
        gkpLibrary = gkpStore->gkStore_addEmptyLibrary(keyval.value());
        continue;
      }

      //  We'd better have a gkpLibrary defined, if not, the .gkp input file is incorrect.
      if (gkpLibrary == NULL) {
        fprintf(stderr, "WARNING: no 'name' tag in gkp input; creating library with name 'DEFAULT'.\n");
        gkpLibrary = gkpStore->gkStore_addEmptyLibrary(keyval.value());
        nWARNS++;
      }

      if        (strcasecmp(keyval.key(), "preset") == 0) {
        gkpLibrary->gkLibrary_parsePreset(keyval.value());

      } else if (strcasecmp(keyval.key(), "qv") == 0) {
        gkpLibrary->gkLibrary_setDefaultQV(keyval.value_double());

      } else if (strcasecmp(keyval.key(), "isNonRandom") == 0) {
        gkpLibrary->gkLibrary_setIsNonRandom(keyval.value_bool());

      } else if (strcasecmp(keyval.key(), "trustHomopolymerRuns") == 0) {
        gkpLibrary->gkLibrary_setTrustHomopolymerRuns(keyval.value_bool());

      } else if (strcasecmp(keyval.key(), "removeDuplicateReads") == 0) {
        gkpLibrary->gkLibrary_setRemoveDuplicateReads(keyval.value_bool());

      } else if (strcasecmp(keyval.key(), "finalTrim") == 0) {
        gkpLibrary->gkLibrary_setFinalTrim(keyval.value());

      } else if (strcasecmp(keyval.key(), "removeSpurReads") == 0) {
        gkpLibrary->gkLibrary_setRemoveSpurReads(keyval.value_bool());

      } else if (strcasecmp(keyval.key(), "removeChimericReads") == 0) {
        gkpLibrary->gkLibrary_setRemoveChimericReads(keyval.value_bool());

      } else if (strcasecmp(keyval.key(), "checkForSubReads") == 0) {
        gkpLibrary->gkLibrary_setCheckForSubReads(keyval.value_bool());

      } else if (AS_UTL_fileExists(keyval.key(), false, false)) {
        loadReads(gkpStore,
                  gkpLibrary,
                  gkpFileID++,
                  minReadLength,
                  nameMap,
                  htmlLog,
                  errorLog,
                  keyval.key(),
                  nWARNS, nLOADED, bLOADED, nSKIPPED, bSKIPPED);

      } else {
        fprintf(stderr, "ERROR:  option '%s' not recognized, and not a file of reads.\n", line);
        exit(1);
      }
    }

    delete    inFile;
    delete [] line;
  }

#if 0
  fprintf(htmlLog, "</table>\n");
#endif

  gkpStore->gkStore_close();

  fclose(nameMap);
  fclose(errorLog);

  fprintf(stderr, "\n");
  fprintf(stderr, "Finished with:\n");
  fprintf(stderr, "  "F_U32" warnings (bad base or qv)\n", nWARNS);
  fprintf(stderr, "\n");
  fprintf(stderr, "Read from inputs:\n");
  fprintf(stderr, "  "F_U64" bp.\n",    bLOADED);
  fprintf(stderr, "  "F_U32" reads.\n", nLOADED);
  fprintf(stderr, "\n");
  fprintf(stderr, "Loaded into store:\n");
  fprintf(stderr, "  "F_U64" bp.\n",    bLOADED);
  fprintf(stderr, "  "F_U32" reads.\n", nLOADED);
  fprintf(stderr, "\n");
  fprintf(stderr, "Skipped (too short):\n");
  fprintf(stderr, "  "F_U64" bp (%.4f%%).\n",    bSKIPPED, 100.0 * bSKIPPED / (bSKIPPED + bLOADED));
  fprintf(stderr, "  "F_U32" reads (%.4f%%).\n", nSKIPPED, 100.0 * nSKIPPED / (nSKIPPED + nLOADED));
  fprintf(stderr, "\n");
  fprintf(stderr, "\n");

#if 0
  fprintf(htmlLog, "\n");
  fprintf(htmlLog, "<h2>Final Store</h2>\n");
  fprintf(htmlLog, "<table>\n");
  fprintf(htmlLog, "<tr><td colspan='2'>%s</td></tr>\n", gkpStoreName);
  fprintf(htmlLog, "<tr><td>readsLoaded</td><td>"F_U32" reads ("F_U64" bp)</td></tr>\n", nLOADED, bLOADED);
  fprintf(htmlLog, "<tr><td>readsSkipped</td><td>"F_U32" reads ("F_U64" bp) (read was too short)</td></tr>\n", nSKIPPED, bSKIPPED);
  fprintf(htmlLog, "<tr><td>warnings</td><td>"F_U32" warnings (invalid base or quality value)</td></tr>\n", nWARNS);
  fprintf(htmlLog, "</table>\n");
  fprintf(htmlLog, "\n");

  fprintf(htmlLog, "<script type='text/javascript'>\n");
  fprintf(htmlLog, "var toggleOne = function() {\n");
  fprintf(htmlLog, "  var table = this.closest('table');\n");
  fprintf(htmlLog, "  var elts  = table.querySelectorAll('.details');\n");
  fprintf(htmlLog, "\n");
  fprintf(htmlLog, "  for (var i=0; i<elts.length; i++) {\n");
  fprintf(htmlLog, "    if (!elts[i].enabled) {\n");
  fprintf(htmlLog, "      elts[i].enabled = true;\n");
  fprintf(htmlLog, "      elts[i].style.visibility = 'visible';\n");
  fprintf(htmlLog, "    } else {\n");
  fprintf(htmlLog, "      elts[i].enabled = false;\n");
  fprintf(htmlLog, "      elts[i].style.visibility = 'collapse';\n");
  fprintf(htmlLog, "    }\n");
  fprintf(htmlLog, "  }\n");
  fprintf(htmlLog, "}\n");
  fprintf(htmlLog, "\n");
  for (uint32 ii=0; ii<gkpFileID; ii++) {
    fprintf(htmlLog, "document.getElementById('gkpload%u').onclick = toggleOne;\n", ii);
    fprintf(htmlLog, "document.getElementById('gkpload%u').style   = 'cursor: pointer;';\n", ii);
  }
  fprintf(htmlLog, "</script>\n");
  fprintf(htmlLog, "\n");
  fprintf(htmlLog, "</body>\n");
  fprintf(htmlLog, "</html>\n");
#else
  fprintf(htmlLog, "sum "F_U32" "F_U64" "F_U32" "F_U64" "F_U32"\n", nLOADED, bLOADED, nSKIPPED, bSKIPPED, nWARNS);
#endif

  fclose(htmlLog);



  if (nERROR > 0)
    fprintf(stderr, "gatekeeperCreate did NOT finish successfully; too many errors.\n");

  if (bSKIPPED > 0.25 * (bSKIPPED + bLOADED))
    fprintf(stderr, "gatekeeperCreate did NOT finish successfully; too many bases skipped.  Check your reads.\n");

  if (nWARNS > 0.25 * (nLOADED))
    fprintf(stderr, "gatekeeperCreate did NOT finish successfully; too many warnings.  Check your reads.\n");

  if (nSKIPPED > 0.50 * (nLOADED))
    fprintf(stderr, "gatekeeperCreate did NOT finish successfully; too many short reads.  Check your reads!\n");

  if ((nERROR > 0) ||
      (bSKIPPED > 0.25 * (bSKIPPED + bLOADED)) ||
      (nWARNS   > 0.25 * (nSKIPPED + nLOADED)) ||
      (nSKIPPED > 0.50 * (nSKIPPED + nLOADED)))
    exit(1);

  fprintf(stderr, "gatekeeperCreate finished successfully.\n");

  exit(0);
}
Пример #4
0
int main(int argc,char **argv) {
  int count,j,s;
  int norm;
  bode::Interval *i;
  std::string readsFN;
  std::string inFN;
  std::string outFN;
  std::string inputFN;
  int readsTotal,inputTotal;
  double readsScale,inputScale;
  int *target;
  int bins,binwidth;
  bode::Interval z;
  bode::Flags *fset;

  fset = processCmdLine(argc,argv);
  norm = fset->getInt("norm");
  bins = fset->getInt("bins");
  binwidth = fset->getInt("binwidth");
  inputFN = fset->getStr("input");
  readsFN = fset->positionalArgs()[1];
  inFN = fset->positionalArgs()[2];
  outFN = fset->positionalArgs()[3];

//  width = getBedWidth(inFN);

  target = new int[bins];
  bode::IntervalReader *reads = bode::IntervalReader::open(readsFN);
  bode::CffReader *in = new bode::CffReader(inFN);
  std::ofstream *out = new std::ofstream(outFN.c_str(),std::ofstream::out);
  bode::IntervalReader *input = NULL;
  if (inputFN != "") {
    input = bode::IntervalReader::open(inputFN);
  }

  *out << "GID\tscore\tcoordinates";
  s = 1;
  while (s <= bins) {
    *out << "\t" << s;
    s++;
  }
  *out << std::endl;
  
  fprintf(stderr,"loading reads...\n");
  bode::IntervalSet *is = loadReads(reads,fset);
  readsTotal = is->count();
  fprintf(stderr,"loaded %d reads.\n",readsTotal);
  readsScale = ((double) norm) / (double) readsTotal;
  bode::IntervalSet *background = NULL;
  if (input != NULL) {
    fprintf(stderr,"loading input...\n");
    background = loadReads(input,fset);
    inputTotal = background->count();
    fprintf(stderr,"loaded %d input reads.\n",inputTotal);
    inputScale = ((double) norm) / (double) inputTotal;
  }
    
  reads->close();
  count = 0;
  fprintf(stderr,"processing intervals...\n");
  while ((i=in->nextI())) {
    bode::Bed *bi = static_cast<bode::Bed*>(i);
    count++;
    if (count % 1000 == 0) {
      fprintf(stderr,"%9d\r",count);
    }
    for (j=0;j<bins;j++) {
      target[j] = 0;
    }
    int left = bi->left() - (bins / 2) * binwidth;
    int right = bi->left() + (bins / 2) * binwidth;
//    is->density(i->chrom(),left,right,target);
    (*out) <<bi->name() << "\t"<<bi->score()<<"\t"<<bi->chrom()<<":"<<bi->left();
    s = left;
    while (s < right) {
      z.update(i->chrom(),s,s+binwidth);
      int c = is->overlapping(&z);
      int bk;
      double ncbk;
      if (background != NULL) {
        bk = background->overlapping(&z);
        ncbk = (c * readsScale) - (bk * inputScale);
      } else {
        ncbk = c * readsScale;
      }
      *out << "\t" << ncbk;
      s += binwidth;
    }
/*    for (int j=0;j<right-left;j++) {
      *out << "\t" << target[j];
    }
*/
    *out << std::endl;
  }
  in->close();
  out->close();
  if (input != NULL) {
    input->close();
  }

  std::cerr << "processed " << count << " intervals." << std::endl;
  return 0;
}
Пример #5
0
int main(int argc,char **argv) {
  int count,j,s;
  int norm;
  bode::Interval *i;
  bode::Bed *b;
  std::string readsFN;
  std::string inFN;
  std::string outFN;
  int readsTotal;
  double readsScale;
  int *target;
  bode::Flags *fset;

  fset = processCmdLine(argc,argv);
  readsFN = fset->positionalArgs()[1];
  inFN = fset->positionalArgs()[2];
  outFN = fset->positionalArgs()[3];
  norm = fset->getInt("norm");

  target = new int[MAXINTERVAL];
  bode::IntervalReader *reads = bode::IntervalReader::open(readsFN);
  bode::BedReader *in = new bode::BedReader(inFN);
  std::ofstream *out = new std::ofstream(outFN.c_str(),std::ofstream::out);

  fprintf(stderr,"loading reads...\n");
  bode::IntervalSet *is = loadReads(reads,fset);
  readsTotal = is->count();
  fprintf(stderr,"loaded %d reads.\n",readsTotal);
  readsScale = ((double) norm) / (double) readsTotal;
  reads->close();
  count = 0;
  fprintf(stderr,"processing intervals...\n");
  while ((i=in->nextI())) {
    count++;
    if (count % 1000 == 0) {
      fprintf(stderr,"%9d\r",count);
    }
    int width = i->right() - i->left();
    for (j=0;j<width;j++) {
      target[j] = 0;
    }
    is->density(i->chrom(),i->left(),i->right(),target);
    if (i->strand() == '-') {
      for (int k=0;k<width/2;k++) {
        int x = target[k];
        target[k] = target[width-k-1];
        target[width-k-1] = x;
      }
    }
    double nc = target[0] * readsScale;
    b = static_cast<bode::Bed *>(i);
    *out << b->name();
    for (s=0;s<width;s++) {
      nc = target[s] * readsScale;
      *out << "\t" << nc;
    }
    *out << std::endl;
  }
  in->close();
  out->close();

  std::cerr << "processed " << count << " intervals." << std::endl;
  return 0;
}