Example #1
0
int PreClusterCommand::driverGroups(string newFFile, string newNFile, string newMFile, int start, int end, vector<string> groups){
	try {
		
		int numSeqs = 0;
		
		//precluster each group
		for (int i = start; i < end; i++) {
			
			start = time(NULL);
			
			if (m->control_pressed) {  return 0; }
			
			m->mothurOutEndLine(); m->mothurOut("Processing group " + groups[i] + ":"); m->mothurOutEndLine();
			
			map<string, string> thisNameMap;
            vector<Sequence> thisSeqs;
			if (groupfile != "") { 
                thisSeqs = parser->getSeqs(groups[i]);
            }else if (countfile != "") {
                thisSeqs = cparser->getSeqs(groups[i]);
            }
			if (namefile != "") {  thisNameMap = parser->getNameMap(groups[i]); }
            
			//fill alignSeqs with this groups info.
			numSeqs = loadSeqs(thisNameMap, thisSeqs, groups[i]);
			
			if (m->control_pressed) {   return 0; }
			
            if (method == "aligned") { if (diffs > length) { m->mothurOut("Error: diffs is greater than your sequence length."); m->mothurOutEndLine(); m->control_pressed = true; return 0;  } }
			
			int count= process(newMFile+groups[i]+".map");
			outputNames.push_back(newMFile+groups[i]+".map"); outputTypes["map"].push_back(newMFile+groups[i]+".map");
			
			if (m->control_pressed) {  return 0; }
			
			m->mothurOut("Total number of sequences before pre.cluster was " + toString(alignSeqs.size()) + "."); m->mothurOutEndLine();
			m->mothurOut("pre.cluster removed " + toString(count) + " sequences."); m->mothurOutEndLine(); m->mothurOutEndLine(); 
			printData(newFFile, newNFile, groups[i]);
			
			m->mothurOut("It took " + toString(time(NULL) - start) + " secs to cluster " + toString(numSeqs) + " sequences."); m->mothurOutEndLine(); 
			
		}
		
		return numSeqs;
	}
	catch(exception& e) {
		m->errorOut(e, "PreClusterCommand", "driverGroups");
		exit(1);
	}
}
Example #2
0
/* void getParams()
 * Parses the command line.
 */
void getParams(int argc, char** argv) {

  char* outFile = NULL, *inFile = NULL, *primFile = NULL,
    *bedFile = NULL, *fwdPos = NULL, *revPos = NULL,
    *bedPos = NULL, *logFile = NULL, *wasteFile = NULL,
    *corrFile = NULL;
  int misAllow = 0, revLen = 0, revMis = 0, revLMis = 0,
    revOpt = 0;

  // parse argv
  for (int i = 1; i < argc; i++) {
    if (!strcmp(argv[i], HELP))
      usage();
    else if (!strcmp(argv[i], REVOPT))
      revOpt = 1;
    else if (i < argc - 1) {
      if (!strcmp(argv[i], OUTFILE))
        outFile = argv[++i];
      else if (!strcmp(argv[i], INFILE))
        inFile = argv[++i];
      else if (!strcmp(argv[i], PRIMFILE))
        primFile = argv[++i];
      else if (!strcmp(argv[i], BEDFILE))
        bedFile = argv[++i];
      else if (!strcmp(argv[i], FWDPOS))
        fwdPos = argv[++i];
      else if (!strcmp(argv[i], REVPOS))
        revPos = argv[++i];
      else if (!strcmp(argv[i], BEDPOS))
        bedPos = argv[++i];
      else if (!strcmp(argv[i], LOGFILE))
        logFile = argv[++i];
      else if (!strcmp(argv[i], WASTEFILE))
        wasteFile = argv[++i];
      else if (!strcmp(argv[i], MISALLOW))
        misAllow = getInt(argv[++i]);
      else if (!strcmp(argv[i], REVMIS))
        revMis = getInt(argv[++i]);
      else if (!strcmp(argv[i], REVLENGTH))
        revLen = getInt(argv[++i]);
      else if (!strcmp(argv[i], REVLMIS))
        revLMis = getInt(argv[++i]);
      else if (!strcmp(argv[i], CORRFILE))
        corrFile = argv[++i];
      else
        exit(error(argv[i], ERRINVAL));
    } else
      exit(error(argv[i], ERRINVAL));
  }

  if (outFile == NULL || inFile == NULL || primFile == NULL)
    usage();
  int gz = 0;
  if (!strcmp(inFile + strlen(inFile) - strlen(GZEXT), GZEXT))
    gz = 1;

  // open files, load primer sequences
  File out, in, waste, corr;
  FILE* prim = NULL, *log = NULL, *bed = NULL;
  openFiles(outFile, &out, primFile, &prim, inFile, &in,
    logFile, &log, bedFile, &bed, wasteFile, &waste,
    corrFile, &corr, gz);
  int pr = loadSeqs(prim);

  // get start and end locations
  int fwdSt = 0, fwdEnd = 1, revSt = 0, revEnd = 1,
    bedSt = 0, bedEnd = 1;
  getPos(fwdPos, &fwdSt, &fwdEnd);
  getPos(revPos, &revSt, &revEnd);
  if (bed != NULL) {
    getLengths(bed);
    getPos(bedPos, &bedSt, &bedEnd);
  }

  // read file
  int match = 0, rcmatch = 0;  // counting variables
  int count = readFile(in, out, misAllow, &match, &rcmatch,
    fwdSt, fwdEnd, revSt, revEnd, bedSt, bedEnd,
    waste, wasteFile != NULL, revMis, revLen, revLMis,
    revOpt, corr, corrFile != NULL, gz);

  // print log output
  if (log != NULL) {
    fprintf(log, "Primer pairs: %d\nRead count: %d\n", pr, count);
    fprintf(log, "Primer matches: %d\nBoth primer matches: %d\n\n", match, rcmatch);
    fprintf(log, "Matches:\nPrimer\tFwd\tFwd-Both\tRev\tRev-Both\n");
    for (Primer* p = primo; p != NULL; p = p->next)
      fprintf(log, "%s\t%d\t%d\t%d\t%d\n", p->name, p->fcount, p->fcountr,
        p->rcount, p->rcountr);
  }

  // close files
  if ( (gz && (gzclose(in.gzf) != Z_OK || gzclose(out.gzf) != Z_OK ||
      (wasteFile != NULL && gzclose(waste.gzf) != Z_OK) ||
      (corrFile != NULL && gzclose(corr.gzf) != Z_OK) ) ) ||
      ( ! gz && (fclose(in.f) || fclose(out.f) ||
      (wasteFile != NULL && fclose(waste.f)) ||
      (corrFile != NULL && fclose(corr.f)) ) ) ||
      fclose(prim) || (log != NULL && fclose(log)) ||
      (bed != NULL && fclose(bed)) )
    exit(error("", ERRCLOSE));
}