Beispiel #1
0
void compute_confusion_matrix(int32_t max_read_length, std::string bam_file, std::string fasta_file, std::string fasta_dir, bool skip_soft_clipped, std::ostream& out){
  BamTools::BamReader bam_reader;
  if (!bam_reader.Open(bam_file)) printErrorAndDie("Failed to open BAM file");

  std::string ref_seq;
  int32_t ref_id;
  if (fasta_file.compare("N/A") == 0)
    ref_id = -2;
  else {
    readFasta(fasta_file, fasta_dir, ref_seq);
    ref_id = 0;
  }

  int32_t* matrix_counts = new int32_t [25*max_read_length]();
  int32_t* total_counts  = new int32_t [5*max_read_length]();
  int32_t forward = 0, backward = 0;
  process_reads(bam_reader, max_read_length, ref_id, ref_seq, fasta_dir, skip_soft_clipped, matrix_counts, total_counts, forward, backward);

  out << forward  << "\n"
      << backward << std::endl;
  print_confusion_matrix(matrix_counts, total_counts, max_read_length, out);

  delete [] matrix_counts;
  delete [] total_counts;
}
Beispiel #2
0
int ListSeqsCommand::execute(){
	try {
		
		if (abort) { if (calledHelp) { return 0; }  return 2;	}
		
		//read functions fill names vector
		if (fastafile != "")		{	inputFileName = fastafile;	readFasta();	}
        else if (fastqfile != "")	{	inputFileName = fastqfile;	readFastq();	}
		else if (namefile != "")	{	inputFileName = namefile;	readName();		}
		else if (groupfile != "")	{	inputFileName = groupfile;	readGroup();	}
		else if (alignfile != "")	{	inputFileName = alignfile;	readAlign();	}
		else if (listfile != "")	{	inputFileName = listfile;	readList();		}
		else if (taxfile != "")		{	inputFileName = taxfile;	readTax();		}
        else if (countfile != "")	{	inputFileName = countfile;	readCount();	}
		
		if (m->getControl_pressed()) { outputTypes.clear();  return 0; }
		
		//sort in alphabetical order
		sort(names.begin(), names.end());
		
		if (outputDir == "") {  outputDir += util.hasPath(inputFileName);  }
		
        map<string, string> variables; 
        variables["[filename]"] = outputDir + util.getRootName(util.getSimpleName(inputFileName));
		string outputFileName = getOutputFileName("accnos", variables);

        util.printAccnos(outputFileName, names);
        
		outputNames.push_back(outputFileName); outputTypes["accnos"].push_back(outputFileName);
		
		if (m->getControl_pressed()) { outputTypes.clear();  util.mothurRemove(outputFileName); return 0; }
		
		current->setAccnosFile(outputFileName);
		
		m->mothurOut("\nOutput File Names: \n"); 
		m->mothurOut(outputFileName); m->mothurOutEndLine();	
		m->mothurOutEndLine();
		
		//set accnos file as new current accnosfile
		string currentName = "";
		itTypes = outputTypes.find("accnos");
		if (itTypes != outputTypes.end()) {
			if ((itTypes->second).size() != 0) { currentName = (itTypes->second)[0]; current->setAccnosFile(currentName); }
		}
		
		return 0;		
	}

	catch(exception& e) {
		m->errorOut(e, "ListSeqsCommand", "execute");
		exit(1);
	}
}
int main(int argc, char *argv[])
/* hash snpFasta, read through chrN_snpTmp, rewrite with extensions to individual chrom tables */
{
struct slName *chromList, *chromPtr;
char tableName[64];

if (argc != 2)
    usage();

snpDb = argv[1];
hSetDb(snpDb);
chromList = hAllChromNamesDb(snpDb);

errorFileHandle = mustOpen("snpMoltype.errors", "w");

multiFastaHash = readFasta("chrMulti");

for (chromPtr = chromList; chromPtr != NULL; chromPtr = chromPtr->next)
    {
    safef(tableName, ArraySize(tableName), "%s_snpTmp", chromPtr->name);
    if (!hTableExists(tableName)) continue;
    verbose(1, "chrom = %s\n", chromPtr->name);
    chromFastaHash = readFasta(chromPtr->name);
    processSnps(chromPtr->name);
    }

carefulClose(&errorFileHandle);

for (chromPtr = chromList; chromPtr != NULL; chromPtr = chromPtr->next)
    {
    safef(tableName, ArraySize(tableName), "%s_snpTmp", chromPtr->name);
    if (!hTableExists(tableName)) continue;
    recreateDatabaseTable(chromPtr->name);
    verbose(1, "loading chrom = %s\n", chromPtr->name);
    loadDatabase(chromPtr->name);
    }

return 0;
}
Beispiel #4
0
int main(int argc, char **argv){
  MultiFasta *fasta;
  Parameters  param;

  param = readParams(argc, argv);

  fasta = readFasta(param.fastaname);
  multialign(fasta, param.kmersize, param.mindiagsize);
  printFasta(fasta);
  releaseFasta(fasta);

  return EXIT_SUCCESS;
}
Beispiel #5
0
int do_file(char *fname, struct trie *root, int which){

  FILE *fasta;
  char *genome;
  int glen;
  int i;
  char *window;
  char *rcwindow;
  int willInsert;
  char dummy[SEQ_LENGTH];
  int win = 0;

  willInsert = 1;

  
  fprintf(stderr, "\n\nInserting sequence from file %s\n", fname);
  fasta = myfopen(fname, "r");
  fgets(dummy, SEQ_LENGTH, fasta);

  genome = readFasta(fasta, &glen);  
  
  window = (char *) mymalloc(sizeof(char) * WINDOW_SIZE+1);
  rcwindow = (char *) mymalloc(sizeof(char) * WINDOW_SIZE+1);
  
  i = 0;


  while (i < glen - WINDOW_SIZE){

    if  (getWindow(genome, glen, &window, i) == 0 ){
      i+=SLIDE_SIZE;
      continue;    
    }
      
    //while (getIndexWindow(fasta, &window, &willInsert) > 0){
    //fprintf(stderr, "\rGetting windows from %s\t: %d%%", fname, ((int)(100.0*(float)(i+strlen(window))/(float)strlen(genome))));
    if (VERBOSE)
      printf("%s\n", window);
    /* insert the window to the trie here */
    if (willInsert){
      // start location is i+1
      // because i starts from 0 here; but 
      // we want the sequence index starting from 1
      // STARTING POSITION FOR FORWARD
     

      rcomp(window, rcwindow);
      if (NOREV){
	if (strcmp(window, rcwindow) < 0 )
	  insert(root, window, (i+1), fname, which);      
	else if (strcmp(window, rcwindow) > 0 )
	  insert(root, rcwindow, (-1*(i+1)), fname, which);      
      }
      else{
	insert(root, window, (i+1), fname, which);      
	insert(root, rcwindow, (-1*(i+1)), fname, which);      
      }
      /*
      insert(root, window, (i+1), fname, which);      
      if (!NOREV){
      // reverse complement of this window is indexed by its negative
      // value ENDING POSITION FOR REVERSE COMPLEMENT
	rcomp(window, rcwindow);
	if (strcmp(window, rcwindow))
	  insert(root, rcwindow, (-1*(i+1)), fname, which);      
	  }*/
    }
    i+=SLIDE_SIZE;
    if (i % 10000 == 0)
      fprintf(stderr,"\r [%i] of [%d]", i, glen);
  }
  fprintf(stderr, "\n");
  
  //free(genome);
  fclose(fasta);
  free(window);
  free(rcwindow);
  
  return 1;
}
void proteinInfer(){
	readFasta();
	proteinMap();
	calcuPro();
	proteinRela();
}
int GetSeqsCommand::execute(){
	try {
		
		if (abort == true) { if (calledHelp) { return 0; }  return 2;	}
		
		//get names you want to keep
		names = m->readAccnos(accnosfile);
		
		if (m->control_pressed) { return 0; }
        
        if (countfile != "") {
            if ((fastafile != "") || (listfile != "") || (taxfile != "")) { 
                m->mothurOut("\n[NOTE]: The count file should contain only unique names, so mothur assumes your fasta, list and taxonomy files also contain only uniques.\n\n");
            }
        }
		
		//read through the correct file and output lines you want to keep
		if (namefile != "")			{		readName();			}
		if (fastafile != "")		{		readFasta();		}
        if (fastqfile != "")		{		readFastq();		}
		if (groupfile != "")		{		readGroup();		}
        if (countfile != "")		{		readCount();		}
		if (alignfile != "")		{		readAlign();		}
		if (listfile != "")			{		readList();			}
		if (taxfile != "")			{		readTax();			}
		if (qualfile != "")			{		readQual();			}
		if (accnosfile2 != "")		{		compareAccnos();	}
        
        if (m->debug) { runSanityCheck(); }
		
		if (m->control_pressed) { outputTypes.clear(); for (int i = 0; i < outputNames.size(); i++) {	m->mothurRemove(outputNames[i]);  } return 0; }
		
		
		if (outputNames.size() != 0) {
			m->mothurOutEndLine();
			m->mothurOut("Output File Names: "); m->mothurOutEndLine();
			for (int i = 0; i < outputNames.size(); i++) {	m->mothurOut(outputNames[i]); m->mothurOutEndLine();	}
			m->mothurOutEndLine();
			
			//set fasta file as new current fastafile
			string current = "";
			itTypes = outputTypes.find("fasta");
			if (itTypes != outputTypes.end()) {
				if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setFastaFile(current); }
			}
			
			itTypes = outputTypes.find("name");
			if (itTypes != outputTypes.end()) {
				if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setNameFile(current); }
			}
			
			itTypes = outputTypes.find("group");
			if (itTypes != outputTypes.end()) {
				if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setGroupFile(current); }
			}
			
			itTypes = outputTypes.find("list");
			if (itTypes != outputTypes.end()) {
				if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setListFile(current); }
			}
			
			itTypes = outputTypes.find("taxonomy");
			if (itTypes != outputTypes.end()) {
				if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setTaxonomyFile(current); }
			}
			
			itTypes = outputTypes.find("qfile");
			if (itTypes != outputTypes.end()) {
				if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setQualFile(current); }
			}
			
            itTypes = outputTypes.find("count");
			if (itTypes != outputTypes.end()) {
				if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setCountTableFile(current); }
			}
		}
		
		return 0;		
	}

	catch(exception& e) {
		m->errorOut(e, "GetSeqsCommand", "execute");
		exit(1);
	}
}
int RemoveGroupsCommand::execute(){
	try {
		
		if (abort == true) { if (calledHelp) { return 0; }  return 2;	}
		
		//get groups you want to remove
		if (accnosfile != "") { m->readAccnos(accnosfile, Groups); m->setGroups(Groups);  }
		
		if (groupfile != "") {
			groupMap = new GroupMap(groupfile);
			groupMap->readMap();
			
			//make sure groups are valid
			//takes care of user setting groupNames that are invalid or setting groups=all
			vector<string> namesGroups = groupMap->getNamesOfGroups();
			vector<string> checkedGroups;
            for (int i = 0; i < Groups.size(); i++) {
                if (m->inUsersGroups(Groups[i], namesGroups)) { checkedGroups.push_back(Groups[i]); }
                else {  m->mothurOut("[WARNING]: " + Groups[i] + " is not a valid group in your groupfile, ignoring.\n"); }
            }
            
            if (checkedGroups.size() == 0) { m->mothurOut("[ERROR]: no valid groups, aborting.\n"); delete groupMap; return 0; }
			else {
                Groups = checkedGroups;
                m->setGroups(Groups);
            }
            
			//fill names with names of sequences that are from the groups we want to remove 
			fillNames();
			
			delete groupMap;
		}else if (countfile != ""){
            if ((fastafile != "") || (listfile != "") || (taxfile != "")) { 
                m->mothurOut("\n[NOTE]: The count file should contain only unique names, so mothur assumes your fasta, list and taxonomy files also contain only uniques.\n\n");
            }
            CountTable ct;
            ct.readTable(countfile, true, false);
            if (!ct.hasGroupInfo()) { m->mothurOut("[ERROR]: your count file does not contain group info, aborting.\n"); return 0; }
            
            vector<string> gNamesOfGroups = ct.getNamesOfGroups();
            SharedUtil util;
            util.setGroups(Groups, gNamesOfGroups);
            vector<string> namesOfSeqs = ct.getNamesOfSeqs();
            sort(Groups.begin(), Groups.end());
            
            for (int i = 0; i < namesOfSeqs.size(); i++) {
                vector<string> thisSeqsGroups = ct.getGroups(namesOfSeqs[i]);
                if (m->isSubset(Groups, thisSeqsGroups)) { //you only have seqs from these groups so remove you
                    names.insert(namesOfSeqs[i]);
                }
            }
        }

				
		if (m->control_pressed) { return 0; }
		
		//read through the correct file and output lines you want to keep
		if (namefile != "")			{		readName();		}
		if (fastafile != "")		{		readFasta();	}
		if (groupfile != "")		{		readGroup();	}
        if (countfile != "")		{		readCount();	}
		if (listfile != "")			{		readList();		}
		if (taxfile != "")			{		readTax();		}
		if (sharedfile != "")		{		readShared();	}
        if (designfile != "")		{		readDesign();	}
		
		if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) {	m->mothurRemove(outputNames[i]); } return 0; }
				
		if (outputNames.size() != 0) {
			m->mothurOutEndLine();
			m->mothurOut("Output File names: "); m->mothurOutEndLine();
			for (int i = 0; i < outputNames.size(); i++) {	m->mothurOut(outputNames[i]); m->mothurOutEndLine();	}
			m->mothurOutEndLine();
			
			//set fasta file as new current fastafile
			string current = "";
			itTypes = outputTypes.find("fasta");
			if (itTypes != outputTypes.end()) {
				if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setFastaFile(current); }
			}
			
			itTypes = outputTypes.find("name");
			if (itTypes != outputTypes.end()) {
				if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setNameFile(current); }
			}
			
			itTypes = outputTypes.find("group");
			if (itTypes != outputTypes.end()) {
				if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setGroupFile(current); }
			}
			
			itTypes = outputTypes.find("list");
			if (itTypes != outputTypes.end()) {
				if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setListFile(current); }
			}
			
			itTypes = outputTypes.find("taxonomy");
			if (itTypes != outputTypes.end()) {
				if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setTaxonomyFile(current); }
			}
			
			itTypes = outputTypes.find("shared");
			if (itTypes != outputTypes.end()) {
				if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setSharedFile(current); }
			}
            
            itTypes = outputTypes.find("design");
			if (itTypes != outputTypes.end()) {
				if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setDesignFile(current); }
			}
            
            itTypes = outputTypes.find("count");
			if (itTypes != outputTypes.end()) {
				if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setCountTableFile(current); }
			}
		}
		
		return 0;		
	}
	
	catch(exception& e) {
		m->errorOut(e, "RemoveGroupsCommand", "execute");
		exit(1);
	}
}
Beispiel #9
0
int main(int argc, char **argv){
  char str[1000];
  FILE *fasta;
  FILE *hor;
  FILE *log;
  FILE *aligned;
  int i,j;
  int count;
  int nseq, nhor;
  int *freqs;
  int swapfreq;
  int mismatch;
  int trim;
  /* pattern match stuff */

  FILE *clusterFile;
  char cfbase[100];
  struct cluster *allclusters;
  struct cluster *prevcluster;
  struct cluster *current;
  struct cmember *cm;
  struct wgsread *allreads;
  struct wgsread *wgs1, *wgs2;
  struct asatseq *asat;
  struct asatseq *patternasat;
  char *wgsname;
  int wgsstart, wgsend;
  int iflag;
  int isInserted;
  int cluster_id;
  int pattern_id;
  int occurance;
  int isFINALHOR;

  /* pattern match stuff ends here */

  if (argc != 3 && argc!=4){
    fprintf(stderr, "Aligns sequences to the given center star given a two fasta-centroid files.\nThen dumps the alignments\n");
    fprintf(stderr, "%s [fasta file][hor fasta] <-trim>\n", argv[0]);
    exit(0);
  }
  
  trim = 0;
  fasta = fopen(argv[1], "r");
  hor = fopen(argv[2], "r");
  if (argc == 4 && !strcmp(argv[3], "-trim"))
    trim = 1;
  if (fasta == NULL || hor == NULL)
    return 0;


  strcpy(str, argv[1]);
  for (i=strlen(str)-1;i>=0;i--)
    if (str[i]=='.'){ 
      str[i] = 0;
      break;
    }

  if (trim)
    fprintf(stdout, "Will trim.\n");
  
  makevectors();  

  strcat(str, ".log");
  log = fopen(str, "w");
  sprintf(str, "center-%s", argv[1]);
  aligned = fopen(str, "w");

  readFasta(fasta, hor, &nseq, &nhor);
  fprintf(stderr, "%d monomers, and the center are read into memory.\n", nseq);

  for (i=0;i<nseq;i++){
    align(seqs[i], horseqs[0]);
    /* aligned seq is in Sp, aligned center is in Tp */
    fprintf(aligned, ">%s\n", names[i]);
    fprintf(log, "%s\t%s\n%s\t%s\n", names[i], Sp, hornames[0], Tp);
    for (j=strlen(Sp)-1;j>=0;j--){
      if (!trim)
	fprintf(aligned, "%c", Sp[j]);
      else if (Tp[j]!='-')
 	fprintf(aligned, "%c", Sp[j]);
     if ((strlen(Sp)-j)%60 == 0 && j!=strlen(Sp)-1)
	fprintf(aligned, "\n");
    }
    fprintf(aligned, "\n");    
    if (strchr(Tp, '-')!=NULL)
      fprintf(log, "%s\n", names[i]);
    fprintf(stderr, "\r%d\tof\t%d", (i+1), nseq);
  }
  fprintf(stderr, "\n");    
  fclose(log);
  fclose(aligned);
  return 1;
}