Пример #1
0
int main( int argc, char *argv[] ) {
  
  //This function will calculate the all structures within a fixed
  //range of the "algorithmic" mfe.  The algorithmic mfe is the
  //minimum free energy of a structure, ignoring symmetry corrections.
  
  //The output is a list of the structures, including the difference
  //from the algorithmic mfe, followed by the symmetry factor and the
  //corrected energies, after adjusting for symmetries.  The list is
  //sorted by the corrected energies.
  
  char seq[ MAXSEQLENGTH];
  int seqNum[ MAXSEQLENGTH+1];
  int isNicked[ MAXSEQLENGTH];
  int nNicks = 0;
  
  int nicks[MAXSTRANDS];
  int nickIndex;
  int **etaN;	
  int complexity = 3;
  int length, tmpLength;
  float gap = -1;
  int i;
  int vs;
  char outFile[MAXLINE];
  int inputFileSpecified;
  FILE *fp;
  
  dnaStructures mfeStructs = {NULL, 0, 0, 0, NAD_INFINITY}; 
  //this struct will store
  //all the structures within the given range
  
  char inputFile[ MAXLINE];
  strcpy( inputFile, "");
  
  inputFileSpecified = ReadCommandLineNPK( argc, argv, inputFile);
  
  if(NupackShowHelp) {
    printf("Usage: subopt [OPTIONS] PREFIX\n");
    printf("Calculate and store all structures within the specified energy gap\n");
    printf("of the MFE structure.\n");
    printf("Example: subopt -multi -T 25 -material dna example\n");
    PrintNupackThermoHelp();
    PrintNupackUtilitiesHelp();
    exit(1);
  }

  if( !inputFileSpecified ) {
    printf("Enter output file prefix: ");
    scanf("%s", inputFile);
    strcat(inputFile,".in"); // Here, .in is just a placeholder
  }
  
  if( !inputFileSpecified ||
     !ReadInputFile( inputFile, seq, &vs, &gap, NULL, NULL) ) {
       if (inputFileSpecified==0) getUserInput( seq, &vs, &gap, NULL);
       else abort();
     }
  strncpy(outFile,inputFile,strlen(inputFile)-3);
  outFile[strlen(inputFile)-3] = '\0';
  strcat(outFile,".subopt");
  
  header( argc, argv, "subopt", outFile);
  printInputs( argc, argv, seq, vs, &gap, NULL, outFile);
  
  // Add newline for stylistic reasons
  fp = fopen(outFile,"a");
  fprintf(fp,"\n");
  fclose(fp);
  
  if( !DO_PSEUDOKNOTS ) {
    complexity = 3;
  }
  else {
    complexity = 5;
  }
  
  tmpLength = length = strlen( seq);
  convertSeq(seq, seqNum, tmpLength);

  mfeFullWithSym_SubOpt( seqNum, tmpLength, &mfeStructs, complexity, 
                        DNARNACOUNT, DANGLETYPE, 
                        TEMP_K - ZERO_C_IN_KELVIN,
			 vs, (DBL_TYPE) gap, 0, SODIUM_CONC, MAGNESIUM_CONC,
			 USE_LONG_HELIX_FOR_SALT_CORRECTION);

  //the rest is for printing purposes
  
  for( i = 0; i < tmpLength; i++) {
    isNicked[i] = 0;
    if( seq[i] == '+') {
      length--;
      isNicked[ i - nNicks++ -1] = 1;
    } 
  }
  
  //initialize nicks
  for( i = 0; i < MAXSTRANDS; i++) {
    nicks[i] = -1;
  }
  
  nickIndex = 0;
  for( i = 0; i < length; i++) {
    if( isNicked[i])
      nicks[ nickIndex++] = i;
  }
  
  //overkill, but convenient
  etaN = (int**) malloc( (length*(length+1)/2 + (length+1))*sizeof( int*));
  InitEtaN( etaN, nicks, length);
  
  PrintDnaStructures( &mfeStructs, etaN, nicks, vs, outFile);
  
  clearDnaStructures( &mfeStructs);
  
  return 0;
}
Пример #2
0
int main( int argc, char *argv[] ) {

  char seqChar[MAXSEQLENGTH];
  int seqNum[MAXSEQLENGTH+1];  

  DBL_TYPE pf;
  int complexity = 3;
  int length, tmpLength;
  char inputFile[ MAXLINE];
  int vs;
  char sampleFile[MAXLINE];
  //permAvgPairs stores the expected value of each 
  //class of base pairs, grouped by permutation or complex, respectively

  int inputFileSpecified;
  FILE *F_sample = NULL; // ppairs file
  int index;

  strcpy( inputFile, "");
  nupack_sample = 1;
  nupack_num_samples = 10;
  struct timeval rand_time;

  gettimeofday(&rand_time,0);
  nupack_random_seed = (rand_time.tv_sec)*1000000 + rand_time.tv_usec;

  inputFileSpecified = ReadCommandLineNPK( argc, argv, inputFile);

  if(NupackShowHelp) {
    printf("Usage: sample [OPTIONS] PREFIX\n");
    printf("Randomly sample unpseudoknotted structures from the equilibrium distribution\n");
    printf("Example: sample -multi -T 25 -material dna -samples 100 example\n");
    PrintNupackThermoHelp();
    PrintNupackUtilitiesHelp();
    exit(1);
  }

  if( !inputFileSpecified ) {
    printf("Enter output file prefix: ");
    scanf("%s", inputFile);
    strcat(inputFile,".in"); // Here, .in is just a placeholder
  }

  if(!inputFileSpecified ||
     !ReadInputFile( inputFile, seqChar, &vs, NULL, NULL, NULL) ) {
       if (inputFileSpecified==0) getUserInput( seqChar, &vs, NULL, NULL);
       else abort();
  }

  strncpy(sampleFile,inputFile,strlen(inputFile)-3);
  sampleFile[strlen(inputFile)-3] = '\0';
  strcat(sampleFile,".sample");

  header( argc, argv, "sample", sampleFile);
  printInputs( argc, argv, seqChar, vs, NULL, NULL,sampleFile);


  tmpLength = length = strlen( seqChar);
  convertSeq(seqChar, seqNum, tmpLength);
  int ns1,ns2;
  getSequenceLength(seqChar, &ns1);
  getSequenceLengthInt(seqNum, &ns2);

  init_genrand(nupack_random_seed);

  pairPr = NULL;
  if (complexity != 3) {
    printf("Sampling supported only for complexity = 3. Exiting\n");
    exit(1);
  }

  nupack_sample_list = (char **)calloc(nupack_num_samples, sizeof(char *));
  printf("Number of Samples = %i\n",nupack_num_samples);
  for(index = 0 ; index < nupack_num_samples ; index++) {
    nupack_sample_list[index] = (char *) calloc(tmpLength+1,sizeof(char));
  }

  printf("Started Calculation\n");
  pf = pfuncFull(seqNum, complexity, DNARNACOUNT, DANGLETYPE, TEMP_K - ZERO_C_IN_KELVIN, 0,
      SODIUM_CONC, MAGNESIUM_CONC, USE_LONG_HELIX_FOR_SALT_CORRECTION);
  printf("Finished Calculation\n");


  

  
  if ((F_sample = fopen(sampleFile,"a")) == NULL) {
    printf("Error opening file %s!\n",sampleFile);
    exit(1);
  }

  // Print the free energy to the output file
  fprintf(F_sample,"%s Free energy: %.8Le kcal/mol\n",
          COMMENT_STRING,-kB*TEMP_K*logl(pf));
  fprintf(F_sample,"%s Number of Samples: %i\n",COMMENT_STRING,nupack_num_samples);

  // Put newline for stylistic reasons
  fprintf(F_sample,"\n");


  for(index = 0 ; index < nupack_num_samples ; index++) {
    fprintf(F_sample, "%s\n",nupack_sample_list[index]);
    free(nupack_sample_list[index]);
    nupack_sample_list[index] = NULL;
  }
  free(nupack_sample_list);

#ifdef GC_DEBUG
  CHECK_LEAKS();
#endif
  return 0;
}
Пример #3
0
int seqNoise::writeOutput(string fastaFileName, string namesFileName, string uMapFileName, vector<int> finalTau, vector<int> centroids, vector<int> otuData, vector<string> sequences, vector<string> uniqueNames, vector<string> redundantNames, vector<int> seqFreq, vector<double>& distances){
	try {
		int numOTUs = finalTau.size();
		int numSeqs = uniqueNames.size();
		
		ofstream fastaFile(fastaFileName.c_str());
		ofstream namesFile(namesFileName.c_str());
		ofstream uMapFile(uMapFileName.c_str());
		
		vector<int> maxSequenceAbund(numOTUs, 0);
		vector<int> maxSequenceIndex(numOTUs, 0);
		
		for(int i=0;i<numSeqs;i++){
			if (m->control_pressed) { return 0; }
			if(maxSequenceAbund[otuData[i]] < seqFreq[i]){
				maxSequenceAbund[otuData[i]] = seqFreq[i];
				maxSequenceIndex[otuData[i]] = i;
			}
		}
		
		int count = 1;
		
		for(int i=0;i<numOTUs;i++){
			if (m->control_pressed) { return 0; }
			
			if(finalTau[i] > 0){
				
				if(maxSequenceIndex[i] != centroids[i] && distances[maxSequenceIndex[i]*numSeqs + centroids[i]] == 0){
					//				cout << uniqueNames[centroids[i]] << '\t' << uniqueNames[maxSequenceIndex[i]] << '\t' << count << endl;
					centroids[i] = maxSequenceIndex[i];
				}
				
				int index = centroids[i];
				
				fastaFile << '>' << uniqueNames[index] << endl << sequences[index] << endl;
				namesFile << uniqueNames[index] << '\t';
				
				string refSeq = sequences[index];
				string redundantSeqs = redundantNames[index];;
				
				
				vector<freqData> frequencyData;
				
				for(int j=0;j<numSeqs;j++){
					if(otuData[j] == i && j != index){
						frequencyData.push_back(freqData(j, seqFreq[j]));
					}
				}
				sort(frequencyData.rbegin(), frequencyData.rend());
				
				string refDegap = degapSeq(refSeq);
				vector<int> rUnalign = convertSeq(refDegap);
				
				uMapFile << "ideal_seq_" << count << '\t' << finalTau[i] << endl;
				uMapFile << uniqueNames[index] << '\t' << seqFreq[index] << "\t0\t" << refDegap << endl;
				
				
				for(int j=0;j<frequencyData.size();j++){
					if (m->control_pressed) { return 0; }
					redundantSeqs += ',' + redundantNames[frequencyData[j].index];
					
					uMapFile << uniqueNames[frequencyData[j].index] << '\t' << seqFreq[frequencyData[j].index] << '\t';
					
					string querySeq = sequences[frequencyData[j].index];
					
					string queryDegap = degapSeq(querySeq);
					vector<int> qUnalign = convertSeq(queryDegap);
					
					int udiffs = countDiffs(qUnalign, rUnalign);
					uMapFile << udiffs << '\t' << queryDegap << endl;
					
				}					
				
				uMapFile << endl;
				namesFile << redundantSeqs << endl;
				count++;
				
			}
		}
		fastaFile.close();
		namesFile.close();
		uMapFile.close();
		return 0;
	}
	catch(exception& e) {
		m->errorOut(e, "seqNoise", "writeOutput");
		exit(1);
	}
}