// The sampled ct's are usually a set of cts.
// Must enumerate the cts.
void t_stoch_sampled_structures::dump_sampled_cts(int _sample_id)
{
if(_DUMP_STOCH_SAMPLED_STRUCTURES_MESSAGES_)
	printf("Dumping first structure.\n");

	// Create the sampling output directory.
	char ct1_fp[4096];
	if(this->ppf_cli->seq1_sample_ct_op == NULL)
	{
		sprintf(ct1_fp, "%s_sampled_cts.ct", this->ppf_cli->seq1_op_file_prefix);
	}
	else
	{
		strcpy(ct1_fp, this->ppf_cli->seq1_sample_ct_op);
	}
	FILE* ct1_file = NULL;
	ct1_file = open_f(ct1_fp, "a");

	fprintf(ct1_file, "%d\t %s_%d\t Energy=0.0\n", this->N1, this->ppf_cli->seq1_op_file_prefix, _sample_id);

	// Dump all base pairing info for ct1.
	for(int cnt = 1; cnt <= N1; cnt++)
	{
		// Sth. like following:
		//     1 G       0    2   73    1
		fprintf(ct1_file, "%d %c\t%d\t%d\t%d\t%d\n", cnt, this->seq_man->seq1->nucs[cnt], cnt-1, cnt+1, this->seq1_sampled_ct_bps[cnt], cnt);
	}

	fclose(ct1_file);

if(_DUMP_STOCH_SAMPLED_STRUCTURES_MESSAGES_)
	printf("Dumping second sampled structure.\n");

	char ct2_fp[4096];
	if(this->ppf_cli->seq2_sample_ct_op == NULL)
	{
		sprintf(ct2_fp, "%s_sampled_cts.ct", this->ppf_cli->seq2_op_file_prefix);
	}
	else
	{
		strcpy(ct2_fp, this->ppf_cli->seq2_sample_ct_op);
	}

	FILE* ct2_file = open_f(ct2_fp, "a");
	fprintf(ct2_file, "%d\t %s_%d\t Energy=0.0\n", this->N2, this->ppf_cli->seq2_op_file_prefix, _sample_id);

	// Dump all base pairing info for ct1.
	for(int cnt = 1; cnt <= N2; cnt++)
	{
		// Sth. like following:
		//     1 G       0    2   73    1
		fprintf(ct2_file, "%d %c\t%d\t%d\t%d\t%d\n", cnt, this->seq_man->seq2->nucs[cnt], cnt-1, cnt+1, this->seq2_sampled_ct_bps[cnt], cnt);
	}

	fclose(ct2_file);

}
Exemplo n.º 2
0
void mpi_write_part(
  sptensor_t const * const tt,
  permutation_t const * const perm,
  rank_info const * const rinfo)
{
  /* file name is <rank>.part */
  char name[256];
  sprintf(name, "%d.part", rinfo->rank);

  FILE * fout = open_f(name, "w");
  for(idx_t n=0; n < tt->nnz; ++n) {
    for(idx_t m=0; m < tt->nmodes; ++m) {
      /* map idx to original global coordinate */
      idx_t idx = tt->ind[m][n];
      if(tt->indmap[m] != NULL) {
        idx = tt->indmap[m][idx];
      }
      if(perm->iperms[m] != NULL) {
        idx = perm->iperms[m][idx];
      }

      /* write index */
      fprintf(fout, "%"SPLATT_PF_IDX" ", 1+idx);
    }
    fprintf(fout, "%"SPLATT_PF_VAL"\n", tt->vals[n]);
  }
  fclose(fout);
}
Exemplo n.º 3
0
void t_matrix::dump_sparse_matrix(char* fp)
{
	FILE* f_matrix = open_f(fp, "wb");

	// Must dump all the entries without regard to symmetry of the matrix.
	for(int i_row = 1; i_row <= this->height; i_row++)
	{
		for(int i_col = 1; i_col <= this->width; i_col++)
		{
			if(i_row > i_col && this->symmetric)
			{
				double cur_val = this->x(i_col, i_row);
				fwrite((void*)&i_row, sizeof(int), 1, f_matrix);
				fwrite((void*)&i_col, sizeof(int), 1, f_matrix);
				fwrite((void*)&cur_val, sizeof(double), 1, f_matrix);
			}
			else
			{
				double cur_val = this->x(i_row, i_col);
				fwrite((void*)&i_row, sizeof(int), 1, f_matrix);
				fwrite((void*)&i_col, sizeof(int), 1, f_matrix);
				fwrite((void*)&cur_val, sizeof(double), 1, f_matrix);
			}
			
		} // i_col loop
	} // i_row loop

	fclose(f_matrix);
}
Exemplo n.º 4
0
CTEST2(graph, graph_convert)
{
  for(idx_t i=0; i < data->ntensors; ++i) {
    sptensor_t * const tt = data->tensors[i];

    splatt_graph * graph = graph_convert(tt);

    /* count vtxs */
    vtx_t nv = 0;
    for(idx_t m=0; m < tt->nmodes; ++m) {
      nv += (vtx_t) tt->dims[m];
    }
    ASSERT_EQUAL(nv, graph->nvtxs);

    /* now write graph to tmp.txt and compare against good graph */
    FILE * fout = open_f(TMP_FILE, "w");
    graph_write_file(graph, fout);
    fclose(fout);

    FILE * fin = open_f(TMP_FILE, "r");
    FILE * gold = open_f(graphs[i], "r");

    /* check file lengths lengths */
    fseek(fin , 0 , SEEK_END);
    fseek(gold , 0 , SEEK_END);
    long length_fin  = ftell(fin);
    long length_gold = ftell(gold);
    ASSERT_EQUAL(length_gold, length_fin);
    rewind(fin);
    rewind(gold);

    /* compare each byte */
    char fbyte;
    char gbyte;
    for(long byte=0; byte < length_fin; ++byte) {
      fread(&fbyte, 1, 1, fin);
      fread(&gbyte, 1, 1, gold);
      ASSERT_EQUAL(gbyte, fbyte);
    }

    /* clean up */
    fclose(gold);
    fclose(fin);
    remove(TMP_FILE);
    graph_free(graph);
  }
}
Exemplo n.º 5
0
void t_matrix::dump_matrix(char* fp)
{
	FILE* f_matrix = open_f(fp, "w");

	printf("Dumping to %s\n", fp);

	// Dump indices are 1-based.
	for(int i_row = 1; i_row <= this->height; i_row++)
	{
		for(int i_col = 1; i_col <= this->width; i_col++)
		{
			fprintf(f_matrix, "%lf ", this->x(i_row, i_col));
		} // i_col loop

		fprintf(f_matrix, "\n");
	} // i_row loop

	fclose(f_matrix);
}
Exemplo n.º 6
0
static int * p_distribute_parts(
  sptensor_t * const ttbuf,
  char const * const pfname,
  rank_info * const rinfo)
{
  /* root may have more than target_nnz */
  idx_t const target_nnz = rinfo->global_nnz / rinfo->npes;
  int * parts = (int *) splatt_malloc(SS_MAX(ttbuf->nnz, target_nnz) * sizeof(int));

  if(rinfo->rank == 0) {
    int ret;
    FILE * fin = open_f(pfname, "r");

    /* send to all other ranks */
    for(int p=1; p < rinfo->npes; ++p) {
      /* read into buffer */
      for(idx_t n=0; n < target_nnz; ++n) {
        if((ret = fscanf(fin, "%d", &(parts[n]))) == 0) {
          fprintf(stderr, "SPLATT ERROR: not enough elements in '%s'\n",
              pfname);
          exit(1);
        }
      }
      MPI_Send(parts, target_nnz, MPI_INT, p, 0, rinfo->comm_3d);
    }

    /* now read my own part info */
    for(idx_t n=0; n < ttbuf->nnz; ++n) {
      if((ret = fscanf(fin, "%d", &(parts[n]))) == 0) {
        fprintf(stderr, "SPLATT ERROR: not enough elements in '%s'\n",
            pfname);
        exit(1);
      }
    }
    fclose(fin);
  } else {
    /* receive part info */
    MPI_Recv(parts, ttbuf->nnz, MPI_INT, 0, 0, rinfo->comm_3d,
        &(rinfo->status));
  }
  return parts;
}
Exemplo n.º 7
0
sptensor_t * mpi_simple_distribute(
  char const * const ifname,
  MPI_Comm comm)
{
  int rank, npes;
  MPI_Comm_rank(comm, &rank);
  MPI_Comm_size(comm, &npes);

  sptensor_t * tt = NULL;

  FILE * fin = NULL;
  if(rank == 0) {
    fin = open_f(ifname, "r");
  }

  switch(get_file_type(ifname)) {
  case SPLATT_FILE_TEXT_COORD:
    tt = p_tt_mpi_read_file(fin, comm);
    break;
  case SPLATT_FILE_BIN_COORD:
    tt = p_tt_mpi_read_binary_file(fin, comm);
    break;
  }

  if(rank == 0) {
    fclose(fin);
  }

  /* set dims info */
  #pragma omp parallel for schedule(static, 1)
  for(idx_t m=0; m < tt->nmodes; ++m) {
    idx_t const * const inds = tt->ind[m];
    idx_t dim = 1 +inds[0];
    for(idx_t n=1; n < tt->nnz; ++n) {
      dim = SS_MAX(dim, 1 + inds[n]);
    }
    tt->dims[m] = dim;
  }


  return tt;
}
Exemplo n.º 8
0
/**
* @brief Count the nonzero values in a partition of X.
*
* @param fname The name of the file containing X.
* @param nmodes The number of modes of X.
*
* @return The number of nonzeros in the intersection of all sstarts and sends.
*/
static idx_t p_count_my_nnz_1d(
  char const * const fname,
  idx_t const nmodes,
  idx_t const * const sstarts,
  idx_t const * const sends)
{
  FILE * fin = open_f(fname, "r");

  char * ptr = NULL;
  char * line = NULL;
  ssize_t read;
  size_t len = 0;

  /* count nnz in my partition */
  idx_t mynnz = 0;
  while((read = getline(&line, &len, fin)) != -1) {
    /* skip empty and commented lines */
    if(read > 1 && line[0] != '#') {
      int mine = 0;
      ptr = line;
      for(idx_t m=0; m < nmodes; ++m) {
        idx_t ind = strtoull(ptr, &ptr, 10) - 1;
        /* I own the nnz if it falls in any of my slices */
        if(ind >= sstarts[m] && ind < sends[m]) {
          mine = 1;
          break;
        }
      }
      if(mine) {
        ++mynnz;
      }
      /* skip over tensor val */
      strtod(ptr, &ptr);
    }
  }
  fclose(fin);

  free(line);

  return mynnz;
}
Exemplo n.º 9
0
void t_matrix::load_sparse_matrix(char* fp)
{
	FILE* f_matrix = open_f(fp, "rb");

	int cur_i;
	int cur_j; 
	double cur_value;
	//while(fscanf(f_matrix, "%d %d %lf", &cur_i, &cur_j, &cur_value) == 3)
	while(fread(&cur_i, sizeof(int), 1, f_matrix) == 1)
	{
		if(fread(&cur_j, sizeof(int), 1, f_matrix) != 1)
		{
			printf("Could not read current j in %s @ %s(%d)\n", fp, __FILE__, __LINE__);
			exit(0);
		}

		if(fread(&cur_value, sizeof(double), 1, f_matrix) != 1)
		{
			printf("Could not read current value in %s @ %s(%d)\n", fp, __FILE__, __LINE__);
			exit(0);
		}

		//printf("Read %d, %d %lf\n", cur_i, cur_j, cur_value);

		// If the matrix is symmetric, do a check on the read indices.
		if(this->symmetric)
		{
			if(cur_j > cur_i)
			{
				this->x(cur_i, cur_j) = cur_value;
			}
		}
		else
		{
			this->x(cur_i, cur_j) = cur_value;
		}
	} // file reading loop.

	fclose(f_matrix);
}
Exemplo n.º 10
0
/**
* @brief Read a partition of X into tt.
*
* @param fname The file containing X.
* @param tt The tensor structure (must be pre-allocated).
* @param sstarts Array of starting slices, inclusive (one for each mode).
* @param sends Array of ending slices, exclusive (one for each mode).
*/
static void p_read_tt_part_1d(
  char const * const fname,
  sptensor_t * const tt,
  idx_t const * const sstarts,
  idx_t const * const sends)
{
  idx_t const nnz = tt->nnz;
  idx_t const nmodes = tt->nmodes;

  char * ptr = NULL;
  char * line = NULL;
  ssize_t read;
  size_t len = 0;

  FILE * fin = open_f(fname, "r");
  idx_t nnzread = 0;
  while(nnzread < nnz && (read = getline(&line, &len, fin)) != -1) {
    /* skip empty and commented lines */
    if(read > 1 && line[0] != '#') {
      int mine = 0;
      ptr = line;
      for(idx_t m=0; m < nmodes; ++m) {
        idx_t ind = strtoull(ptr, &ptr, 10) - 1;
        tt->ind[m][nnzread] = ind;
        if(ind >= sstarts[m] && ind < sends[m]) {
          mine = 1;
        }
      }
      tt->vals[nnzread] = strtod(ptr, &ptr);
      if(mine) {
        ++nnzread;
      }
    }
  }
  fclose(fin);
  free(line);
}
Exemplo n.º 11
0
// It is very important to make sure that a seq file is in following format:
// ; ...
// [ THIS LINE SHOULD NOT CONTAIN SEQUENCE DATA, ITS SHOULD BE A LABEL OR EMPTY LINE]
// [ EMPTY LINE OR SEQUENCE DATA]
void t_structure::openseq(char* seq_fp)
{
	// Very strict measure: Exit is sequence file is not verifiable.
	if(!this->verify_seq(seq_fp))
	{
		printf("Could not verify sequence file %s @ %s(%d)\n", seq_fp, __FILE__, __LINE__);
		exit(1);
	}

	FILE* seq_file = open_f(seq_fp, "r");
	if(seq_file == NULL)
	{
		printf("seq file %s does not exist @ %s(%d).\n", seq_fp, __FILE__, __LINE__);
		exit(1);
	}

	this->numseq = NULL;
	this->nucs = NULL;
	this->basepr = NULL;
	this->danglings_on_branch = NULL;
	this->danglings_on_mb_closure = NULL;
	this->stackings_on_branch = NULL;
	this->stackings_on_mb_closure = NULL;
	this->unpaired_forced = NULL;

	char line_buffer[MAX_HEADER_LENGTH];
	fgets(line_buffer, MAX_HEADER_LENGTH, seq_file);
	while(line_buffer[0] == ';')
	{
		fgets(line_buffer, MAX_HEADER_LENGTH, seq_file);
	}

	// Read label, contains new line character at the end of label.
	this->ctlabel = (char*)malloc(sizeof(char) * MAX_HEADER_LENGTH);
	strcpy(this->ctlabel, line_buffer);
	if(this->ctlabel[strlen(this->ctlabel) - 1] == '\n')
	{
		this->ctlabel[strlen(this->ctlabel) - 1] = 0;
	}
	this->check_set_label();
	
	//printf("seq label: %s\n", this->ctlabel);

	// Read and determine length of sequence.
	char cur_char = 0;
	this->numofbases = 0;

	// Start reading sequence data.
	while(1)
	{
		int ret = fscanf(seq_file, "%c", &cur_char);
		if(ret == EOF)
		{
			break;
		}

		if(cur_char == '1')
		{
			break;
		}

		if(cur_char != '\n' && cur_char != ' ')
		{
			this->numofbases++;
		}
	}

	//printf("Length of sequence is %d\n", this->numofbases);
	this->numseq = (int*)malloc(sizeof(int) * (this->numofbases + 1));
	this->nucs = (char*)malloc(sizeof(char) * (this->numofbases + 2));
	this->basepr = (int*)malloc(sizeof(int) * (this->numofbases + 1));
	this->unpaired_forced = (bool*)malloc(sizeof(bool) * (this->numofbases + 2));

	// Set file position to data position.
	// Cannot use fsetpos and fgetpos because for some reason they are messing up indices
	// when a linux text file is taken to a windows machine.
	fseek(seq_file, 0, SEEK_SET);

	// Read all information again before sequence data.
	fgets(line_buffer, MAX_HEADER_LENGTH, seq_file);
	while(line_buffer[0] == ';')
	{
		fgets(line_buffer, MAX_HEADER_LENGTH, seq_file);
	}

	this->nucs[0] = '#';
	int i = 1; // Sequence index, starts from 1.

	// Start reading sequence data.
	while(1)
	{
		// Read and validate input.
		int ret = fscanf(seq_file, "%c", &cur_char);
		if(ret == EOF)
		{
			break;
		}

		// Check end of sequence marker.
		if(cur_char == '1')
		{
			break;
		}

		// Process this nuc.
		if(cur_char != '\n' && cur_char != ' ')
		{
			this->nucs[i] = cur_char;

			if(this->nucs[i] == 'a' ||
				this->nucs[i] == 'c' ||
				this->nucs[i] == 'g' ||
				this->nucs[i] == 'u' ||
				this->nucs[i] == 't')
			{
				this->unpaired_forced[i] = true;
			}
			else
			{
				this->unpaired_forced[i] = false;
			}

			// Convert current base character into number value, from Dave's structure code.
			if (toupper(this->nucs[i]) == 'A') 
				this->numseq[i]=1;
			else if (toupper(this->nucs[i]) == 'C') 
				this->numseq[i]=2;
			else if (toupper(this->nucs[i]) == 'G') 
				this->numseq[i]=3;
			else if (toupper(this->nucs[i]) == 'U' || toupper(this->nucs[i]) == 'T') 
				this->numseq[i]=4;
			else if (toupper(this->nucs[i]) == 'I') 
				this->numseq[i]=5;
			else 
				this->numseq[i]=0; // Map unknown nucleotides to A automatically!

			this->basepr[i] = 0; // No base pairing information.

			//printf("%c %d\n", this->nucs[i], this->numseq[i]);

			i++;
		}
	}

	// This is for ending sequences.
	this->nucs[i] = 0; 

	fclose(seq_file);
}
Exemplo n.º 12
0
/**
  * Meniul de navigare in fisiere
  */
void printdirs(struct DIR * dir){
	char namebuf [12];
	uint32_t size = 0;
	uint8_t bitmap = 0, selected = 0, cnt = 0, ret_code = 0, is=0;
	struct dirent * crt_dir;

	do{
		/* Afisam continutul directorului curent */
		rewinddir(dir);
		LCD_clear();
		cnt = 0;
		while(1){
			crt_dir = readdir( dir, buffer);
			ret_code = check (crt_dir);
			if(ret_code == DIR_INVALID)
				continue;
			if(ret_code == DIR_END)
				break;

			get_dirent_name(crt_dir, namebuf);
			if(cnt ++ == selected){
				LCD_str( namebuf,SELECTED );
			}else{
				LCD_str( namebuf, NOT_SELECTED);
			}
		}

		/* Asteptam sa selecteze un fisier sau director */
		bitmap = BTN_wait();

		switch(bitmap){
			case UP:
				selected = (selected +1)%cnt;
				continue;
			case DOWN:
				selected = (selected + cnt-1) % cnt;
				continue;
			case ENTER:
				rewinddir(dir);
				selected ++;
				while(1){
					crt_dir = readdir( dir, buffer);
					ret_code = check (crt_dir);
					if( ret_code == DIR_VALID) selected--;
					if( !selected ) break;
				}
				break;
			default:
				continue;

		}

		is_dir(crt_dir, &is);
		if( is){
			dir = opendir(crt_dir);
		}else{
			get_dirent_size(crt_dir, &size);
			open_f(crt_dir);

			/* Curatam ecranul si afisam poza */
			LCD_clear();
			draw_bmp();
			close_f();

			/* Asteptam sa apese butonul de exit */
			while( BTN_wait() != CLOSE);
		}
	}while(1);
	exit(EXIT_SUCCESS);
}
Exemplo n.º 13
0
// Read a fasta file that contains sequence information for multiple fasta files and return all of them in a vector.
vector<t_structure*>* t_structure::read_multi_seq(char* multi_seq_fp)
{
	vector<t_structure*>* seqs = new vector<t_structure*>();

	FILE* f_multi_seq = open_f(multi_seq_fp, "r");
	if(f_multi_seq == NULL)
	{
		printf("Could not find the input file @ %s.\n", multi_seq_fp);
		exit(0);
	}

	// Read the file and load information. 
	vector<char>* cur_nucs = new vector<char>();
	char cur_label[MAX_HEADER_LENGTH];
	char cur_line[MAX_HEADER_LENGTH];
	while(1)
	{
		// Read current line.
		if(fgets(cur_line, MAX_HEADER_LENGTH, f_multi_seq) == NULL)
		{
			// Save the last sequence in the sequence list and initiate a new sequence.
			if(cur_nucs->size() > 0)
			{
				t_structure* new_seq = new t_structure(cur_label, cur_nucs);
				seqs->push_back(new_seq);
			}

			delete(cur_nucs);
			break;
		}

		// Get rid of the new line, if there is one.
		if(strlen(cur_line) > 0 && cur_line[strlen(cur_line) - 1] == '\n')
		{
			cur_line[strlen(cur_line) - 1] = 0;
		}

		if(strlen(cur_line) > 0)
		{
			// if starts with a '>', then a new sequence is initiated.
			if(cur_line[0] == '>')
			{
				// Save the last sequence in the sequence list and initiate a new sequence.
				if(cur_nucs->size() > 0)
				{
					t_structure* new_seq = new t_structure(cur_label, cur_nucs);
					seqs->push_back(new_seq);
				}

				// Read the label from the remaining of the line.
				strcpy(cur_label, &cur_line[1]);

				// Empty current nucleotides for loading next sequence, if there is any.
				cur_nucs->clear();
			}
			else if(cur_line[0] == ';')
			{
				// Save the last sequence in the sequence list and initiate a new sequence.
				if(cur_nucs->size() > 0)
				{
					//printf("instantiating with new label: %s\n", cur_label);
					t_structure* new_str = new t_structure(cur_label, cur_nucs);
					seqs->push_back(new_str);
				}

				// Read the label from the next line.
				fgets(cur_label, MAX_HEADER_LENGTH, f_multi_seq);
				if(cur_label[strlen(cur_label)-1] == '\n')
				{
					cur_label[strlen(cur_label)-1] = 0;
				}

				//printf("Read new label: %s\n", cur_label);

				// Empty current nucleotides for loading next sequence, if there is any.
				cur_nucs->clear();
			}
			else
			{
				// This is sequence data, copy the sequence data and continue, no input validation here.
				for(int i_cpy = 0; i_cpy < (int)strlen(cur_line); i_cpy++)
				{
					// This is a necessity coming from .seq file specifications. All .seq files end with a '1' character.
					if(cur_line[i_cpy] != '1' &&
						cur_line[i_cpy] != ' ' &&
						cur_line[i_cpy] != '\n' &&
						cur_line[i_cpy] != '\t')
					{
						cur_nucs->push_back(cur_line[i_cpy]);
					}
				} // Copy the nucleotides.
			} // label/nuc data check.
		} // Length check for current line.
	}

	return(seqs);
}
Exemplo n.º 14
0
/*
Seq file should be like this:
;
[Empty line or comment or id ...]
[Empty line or sequence data]
*/
bool t_structure::verify_seq(char* seq_fp)
{
	return(true);

	FILE* f_seq = open_f(seq_fp, "r");

	char line_buffer[MAX_HEADER_LENGTH];

	_fgets(line_buffer, MAX_HEADER_LENGTH, f_seq);

	// If the first character of first line is not semicolon, 
	// this is not a valid sequence file.
	if(line_buffer[0] != ';')
	{
		printf("Verification failed for sequence file %s @ %s(%d)\n", seq_fp, __FILE__, __LINE__);
		return(false);
	}

	int current_line_cnt = 2;
	int i_seq = 0;
	char seq_data[MAX_HEADER_LENGTH];

	// Read file and fill lines.
	while(1)
	{
		// Read next line starting with 2nd line.
		if(_fgets(line_buffer, MAX_HEADER_LENGTH, f_seq))
		{
			//printf("Current line_buffer: %s\n", line_buffer);

			// If currently read line is after 2nd line
			// the sequence data is being retrieved.
			if(current_line_cnt > 2)
			{
				for(int i = 0; i < (int)strlen(line_buffer); i++)
				{
					// If this is end of sequence, 
					if(seq_data[i_seq - 1] == '1') // Is sequence data already finished?
					{
						printf("Sequence data is ending before file ends, exiting at %s(%d)\n", __FILE__, __LINE__);
						return(false);
					}

					if(line_buffer[i] != '1' &&
						line_buffer[i] != 'A' &&
						line_buffer[i] != 'C' &&
						line_buffer[i] != 'G' &&
						line_buffer[i] != 'U' &&
						line_buffer[i] != 'T' &&
						line_buffer[i] != 'a' &&
						line_buffer[i] != 'c' &&
						line_buffer[i] != 'g' &&
						line_buffer[i] != 'u' &&
						line_buffer[i] != 't')
					{
						printf("Unknown nucleotide in sequence: %c, exiting at %s(%d)\n", line_buffer[i], __FILE__, __LINE__);
						return(false);
					}
					seq_data[i_seq++] = line_buffer[i];
				}
			}

			current_line_cnt++;
		}
		else
		{
			break;
		}
	}

	// If 2nd line is not read OR no sequence data is read,
	// return false.
	/*
	if(current_line_cnt < 3 ||		// Check if at least 3 lines are read.
		i_seq == 0 ||				// Check if sequence data is read.
		seq_data[i_seq - 1] != '1') // Check correct ending of seq_data
	{
		printf("Verification failed for sequence file %s @ %s(%d)\n", seq_fp, __FILE__, __LINE__);
		return(false);
	}
	*/

	if(current_line_cnt < 3)	// Check if at least 3 lines are read.
	{
		printf("Verification failed for sequence file %s @ %s(%d)\n", seq_fp, __FILE__, __LINE__);
		return(false);
	}

	if(i_seq == 0)				// Check if sequence data is read.
	{
		printf("Verification failed for sequence file %s @ %s(%d): No sequence data\n", seq_fp, __FILE__, __LINE__);
		return(false);
	}

	if(seq_data[i_seq - 1] != '1') // Check correct ending of seq_data
	{
		printf("Verification failed for sequence file %s @ %s(%d): %c\n", seq_fp, __FILE__, __LINE__, seq_data[i_seq - 1]);
		return(false);
	}

	fclose(f_seq);

	return(true);
}
Exemplo n.º 15
0
t_config::t_config(const char* config_fp)
{
	FILE* f_conf = open_f(config_fp, "r");
	if(f_conf == NULL)
	{
		printf("Could not open configuration file %s\n", config_fp);
		exit(0);
	}

	this->ids = new vector<char*>();
	this->vals = new vector<vector<char*>*>();
	//char cur_id[1000];
	//char cur_val[2000];

	char cur_line[5000];
	while(fgets(cur_line, 5000, f_conf) != NULL)
	{
		// Get rid of the new line.
		int l_line = strlen(cur_line);
		if(cur_line[l_line-1] == '\n')
		{
			cur_line[l_line-1] = 0;
		}

		if(cur_line[0] != '#')
		{
			t_string* line_str = new t_string(cur_line);
			t_string_tokens* line_tokens = line_str->tokenize_by_chars(" \t");

			// Add all the values in this line as a new entry.
			if((int)line_tokens->size() < 2)
			{
				//printf("Empty entry: %s\n", cur_line);
			}
			else
			{
				char* new_id = new char[strlen(line_tokens->at(0)->str()) + 2];
				strcpy(new_id, line_tokens->at(0)->str());
				vector<char*>* new_val_list = new vector<char*>();

				// Add all the values to the value list.
				for(int i_val = 1; i_val < (int)line_tokens->size(); i_val++)
				{
					char* new_val = new char[strlen(line_tokens->at(i_val)->str()) + 2];
					strcpy(new_val, line_tokens->at(i_val)->str());
					new_val_list->push_back(new_val);
				} // i_val loop.

				// Add the new entries.
				this->ids->push_back(new_id);
				this->vals->push_back(new_val_list);
			}
/*
			if(sscanf(cur_line, "%s %s", cur_id, cur_val) == 2)
			{
				char* new_id = new char[strlen(cur_id) + 2];
				char* new_val = new char[strlen(cur_val) + 2];
				strcpy(new_id, cur_id);
				strcpy(new_val, cur_val);
				this->ids->push_back(new_id);
				this->vals->push_back(new_val);
			} // Skip the comments in the configuration file.
*/
		} // Skip comments.
	} // File reading loop.

	fclose(f_conf);
}
Exemplo n.º 16
0
// Backend function for computing alignment envelope.
t_aln_env_result* t_phmm_aln::compute_alignment_envelope(int aln_env_type, 
														 t_pp_result* _pp_result, 
														 double log_threshold, 
														 int par)
{
if(_DUMP_ALN_ENV_UTILS_MESSAGES_)
	printf("Computing alignment envelope...\n");

	// if pp_result is not supplied, recompute it.
	t_pp_result* pp_result = NULL;
	if(_pp_result == NULL)
	{
		pp_result = this->compute_posterior_probs();
	}
	else
	{
		pp_result = _pp_result;
	}

	// alignment envelope type affects how the limits are set.
	// Limit indices are 1 based.
	int* low_limits = (int*)malloc(sizeof(int) * (this->l1() + 2));
	int* high_limits = (int*)malloc(sizeof(int) * (this->l1() + 2));

	// Initialize loop limits.
	for(int i = 0; i <= this->l1(); i++)
	{
		low_limits[i] = 0;
		high_limits[i] = 0;
	}

	if(aln_env_type == PROB_ALN_ENV)
	{
		// Compute alignment envelope.
if(_DUMP_ALN_ENV_UTILS_MESSAGES_)
		printf("Allocating alignment envelope...\n");
		bool** aln_env = (bool**)malloc((this->l1() + 1) * sizeof(bool*));
		double n_aln_env_bytes = 0.0f;

		for(int i = 0; i <= this->l1(); i++)
		{
			int low_k = t_phmm_array::low_phmm_limit(i, l1(), l2(), this->phmm_band_constraint_size);
			int high_k = t_phmm_array::high_phmm_limit(i, l1(), l2(), this->phmm_band_constraint_size);
			aln_env[i] = (bool*)malloc((high_k - low_k + 1) * sizeof(bool));
			n_aln_env_bytes += ((high_k - low_k + 1) * sizeof(bool));
			aln_env[i] -= low_k;
		}
if(_DUMP_ALN_ENV_UTILS_MESSAGES_)
		printf("Allocated %lf bytes for alignment envelope.\n", n_aln_env_bytes);

if(_DUMP_ALN_ENV_UTILS_MESSAGES_)
		printf("Computing alignment envelope from probability planes.\n");

		for(int i = 0; i <= this->l1(); i++)
		{
			int low_k = t_phmm_array::low_phmm_limit(i, l1(), l2(), this->phmm_band_constraint_size);
			int high_k = t_phmm_array::high_phmm_limit(i, l1(), l2(), this->phmm_band_constraint_size);

			for(int k = low_k; k <= high_k; k++)
			{
				//printf("(%d, %d): %f, %f\n", cnt1, cnt2, xlog_div(global_aln_info.aln_probs[cnt1][cnt2], global_aln_info.op_prob), log_threshold);
				double ins1_prob = pp_result->ins1_probs[i][k];
				double ins2_prob = pp_result->ins2_probs[i][k];
				double aln_prob = pp_result->aln_probs[i][k];
				double three_plane_sum = xlog_sum(ins1_prob, xlog_sum(ins2_prob, aln_prob));

				if(three_plane_sum < log_threshold)
				{
					aln_env[i][k] = false;
				}
				else
				{
					aln_env[i][k] = true;
				}
			}
		}

		//FILE* f_aln_env = fopen("aln_env.txt", "w");
		//for(int i = 0; i <= this->l1(); i++)
		//{
		//	int low_k = t_phmm_array::low_phmm_limit(i, l1(), l2(), this->phmm_band_constraint_size);
		//	int high_k = t_phmm_array::high_phmm_limit(i, l1(), l2(), this->phmm_band_constraint_size);

		//	for(int k = low_k; k <= high_k; k++)
		//	{
		//		fprintf(f_aln_env, "%d ", aln_env[i][k]);
		//	} // k loop

		//	fprintf(f_aln_env, "\n");
		//} // i loop
		//fclose(f_aln_env);

if(_DUMP_ALN_ENV_UTILS_MESSAGES_)
		printf("Validating alignment envelope connectivity...\n");

		// If alignment envelope is not connected, return NULL.
		if(!this->check_connection(aln_env))
		{
			printf("Alignment envelope not connected.\n");

			// If pp_result is allocated, free it.
			if(_pp_result == NULL)
			{
				this->free_pp_result(pp_result);
			}

			// Free the limits.
			free(low_limits);
			free(high_limits);

			// Free aln. env. since it is of no use any more.
			for(int i = 0; i <= this->l1(); i++)
			{
				int low_k = t_phmm_array::low_phmm_limit(i, l1(), l2(), this->phmm_band_constraint_size);
				aln_env[i] += low_k;
				free( aln_env[i] );
			}	

			free(aln_env);	

			return(NULL);
		}

if(_DUMP_ALN_ENV_UTILS_MESSAGES_)
		printf("Pruning alignment envelope...\n");
		// Calculate pruned alignment envelope and set it to global_aln_info's alignment envelope, 
		// calculate also the size of alignment envelope.
		//#define _PRUNE_ALN_
		//#ifdef _PRUNE_ALN_
		bool** pruned_aln_env = this->prune_aln_env(aln_env);
		//#else
		//	copy_aln_env(aln_env);
		//#endif

if(_DUMP_ALN_ENV_UTILS_MESSAGES_)
		printf("Releasing alignment envelope memory.\n");

		// Free aln. env. since it is of no use any more.
		for(int i = 0; i <= this->l1(); i++)
		{
			int low_k = t_phmm_array::low_phmm_limit(i, l1(), l2(), this->phmm_band_constraint_size);
			aln_env[i] += low_k;
			free( aln_env[i] );
		}	

		free(aln_env);	

if(_DUMP_ALN_ENV_UTILS_MESSAGES_)
		printf("Computing loop limits.\n");
		// Compute the loop limits.
		for(int i = 1; i <= this->l1(); i++)
		{
			int low_k = t_phmm_array::low_phmm_limit(i, l1(), l2(), this->phmm_band_constraint_size);
			int high_k = t_phmm_array::high_phmm_limit(i, l1(), l2(), this->phmm_band_constraint_size);

			for(int k = low_k; k <= high_k; k++)
			{
				if(pruned_aln_env[i][k])
				{
					//fprintf(ll_file, "%d ", cnt2); // Dump low limit.
					low_limits[i] = k;
					break;
				}
			}

			for(int k = high_k; k >= low_k; k--)
			{
				if(pruned_aln_env[i][k])
				{
					//fprintf(ll_file, "%d", cnt2); // Dump high limit.
					high_limits[i] = k;
					break;
				}
			}
		} // loop limit computation loop.

		// Free pruned aln. env. since it is of no use any more.
if(_DUMP_ALN_ENV_UTILS_MESSAGES_)
		printf("Releasing pruned alignment envelope memory.\n");

		for(int i = 1; i <= this->l1(); i++)
		{
			int low_k = t_phmm_array::low_phmm_limit(i, l1(), l2(), this->phmm_band_constraint_size);
			pruned_aln_env[i] += low_k;
			free(pruned_aln_env[i]);
		}	

		free(pruned_aln_env);
	} // PROB_ALN_ENV
	else if(aln_env_type == BANDED_ALN_ENV)
	{
		// par argument contains band size.
		double band_size = (double)par;
		double floating_N1 = (double)this->l1();
		double floating_N2 = (double)this->l2();

		// Initialize loop limits.
		for(double i = 1.0f; i <= this->l1(); i++)
		{
			low_limits[(int)i] = (int) MAX(0, ((i * floating_N2 / floating_N1) - band_size));
			high_limits[(int)i] = (int) MIN(floating_N2, ((i * floating_N2 / floating_N1) + band_size));			

if(_DUMP_ALN_ENV_UTILS_MESSAGES_)
			printf("%d -> (%d, %d)\n", (int)i, low_limits[(int)i], high_limits[(int)i]);
		}

		//exit(0);

	} // BANDED_ALN_ENV
	else if(aln_env_type == FULL_ALN_ENV)
	{
		// Initialize loop limits.
		for(double i = 0.0f; i <= this->l1(); i++)
		{
			low_limits[(int)i] = 0;
			high_limits[(int)i] = this->l2();
		}
	} // FULL_ALN_ENV
	else if(aln_env_type == MANUAL_ALN_ENV)
	{
		this->load_map_limits_from_map("aln_map.txt", low_limits, high_limits);
	}
	else
	{
		printf("Invalid alignment envelope type: %d\n", aln_env_type);
		exit(0);
	} // switch according to selected alignment envelope type.

	low_limits[0] = low_limits[1];
	high_limits[0] = high_limits[1];

	// Set low limits with values 1 to 0, so that the initialized values can be recursed correctly.
	for(int i = 0; i <= this->l1(); i++)
	{
		if(low_limits[i] == 1)
		{
			low_limits[i] = 0;
		}
	}

	// Allocate and set aln_env_result.
	t_aln_env_result* aln_env_result = (t_aln_env_result*)malloc(sizeof(t_aln_env_result));
	aln_env_result->high_limits = high_limits;
	aln_env_result->low_limits = low_limits;
	//aln_env_result->pp_result = pp_result;

	// Check for alignment constraints in the alignment envelope.
	this->check_ins1_ins2(aln_env_result);

	// Dump the probability planes. (all of it)
if(_DUMP_ALN_ENV_UTILS_MESSAGES_)
{
	FILE* f_aln_probs = open_f("aln_plane_probs", "wb");
	FILE* f_ins1_probs = open_f("ins1_plane_probs", "wb");
	FILE* f_ins2_probs = open_f("ins2_plane_probs", "wb");
	for(int i1 = 1; i1 <= this->l1(); i1++)
	{
		int low_i2 = t_phmm_array::low_phmm_limit(i1, l1(), l2(), this->phmm_band_constraint_size);
		int high_i2 = t_phmm_array::high_phmm_limit(i1, l1(), l2(), this->phmm_band_constraint_size);

		for(int i2 = low_i2; i2 <= high_i2; i2++)
		{
			if(pp_result->aln_probs[i1][i2] != xlog(0.0))
			{
				double cur_aln_prob = pp_result->aln_probs[i1][i2];
				fwrite(&i1, sizeof(int), 1, f_aln_probs);
				fwrite(&i2, sizeof(int), 1, f_aln_probs);
				fwrite(&cur_aln_prob, sizeof(double), 1, f_aln_probs);
			}

			if(pp_result->ins1_probs[i1][i2] != xlog(0.0))
			{
				double cur_ins1_prob = pp_result->ins1_probs[i1][i2];
				fwrite(&i1, sizeof(int), 1, f_ins1_probs);
				fwrite(&i2, sizeof(int), 1, f_ins1_probs);
				fwrite(&cur_ins1_prob, sizeof(double), 1, f_ins1_probs);
			}

			if(pp_result->ins2_probs[i1][i2] != xlog(0.0))
			{
				double cur_ins2_prob = pp_result->ins2_probs[i1][i2];
				fwrite(&i1, sizeof(int), 1, f_ins2_probs);
				fwrite(&i2, sizeof(int), 1, f_ins2_probs);
				fwrite(&cur_ins2_prob, sizeof(double), 1, f_ins2_probs);
			}
		} // i2 loop.
	} // i1 loop.

	fclose(f_aln_probs);
	fclose(f_ins1_probs);
	fclose(f_ins2_probs);

	FILE* f_lls = open_f("loop_limits.txt", "w");

	// Dump the loop limits.
	for(int i = 0; i <= this->l1(); i++)
	{
		fprintf(f_lls, "%d %d %d\n", i, low_limits[i], high_limits[i]);
	}

	fclose(f_lls);
} // message dump check.

	//printf("Dumping alignment map.\n");
	//FILE* f_aln_map = open_f("aln_map.txt", "w");

	//for(int i = 1; i <= this->l1(); i++)
	//{
	//	for(int j = 1; j <= this->l2(); j++)
	//	{
	//		if(j < low_limits[i])
	//		{
	//			fprintf(f_aln_map, "0");
	//		}
	//		else if(j <= high_limits[i])
	//		{
	//			fprintf(f_aln_map, "1");
	//		}
	//		else
	//		{
	//			fprintf(f_aln_map, "0");
	//		}
	//	}
	//	fprintf(f_aln_map, "\n");
	//}

	//fclose(f_aln_map);

if(_DUMP_ALN_ENV_UTILS_MESSAGES_)
	printf("Computed alignment envelope.\n");

	for(int i = 2; i <= this->l1(); i++)
	{
//		fprintf(f_lls, "%d %d %d\n", i, low_limits[i], high_limits[i]);
            if(aln_env_result->low_limits[i] < aln_env_result->low_limits[i-1])
                aln_env_result->low_limits[i] = aln_env_result->low_limits[i-1];
            
	}


        for(int i = this->l1()-1; i >= 1; i--)
	{
//		fprintf(f_lls, "%d %d %d\n", i, low_limits[i], high_limits[i]);
            if(aln_env_result->high_limits[i] > aln_env_result->high_limits[i+1])
                aln_env_result->high_limits[i] = aln_env_result->high_limits[i+1];
            
	}



	return(aln_env_result);
}
Exemplo n.º 17
0
void t_phmm_aln::load_map_limits_from_map(char* aln_map_fn, int* low_limits, int* high_limits)
{
if(_DUMP_ALN_ENV_UTILS_MESSAGES_)
	printf("Setting alignment loop limits from map.\n");
	int N1 = this->l1();
	int N2 = this->l2();

	// Open alignment map file.
	FILE* aln_map_file = open_f(aln_map_fn, "r");

	if(aln_map_file == NULL)
	{
		printf("Could not find alignment map file %s @ %s(%d), exiting.\n", aln_map_fn, __FILE__, __LINE__);
		exit(0);
	}

	for(int i1 = 1; i1 <= N1; i1++)
	{
		// Reset limits.
		low_limits[i1] = -1;
		high_limits[i1] = -1;

		for(int i2 = 1; i2 <= N2; i2++)
		{
			// Current flag for current position in alignment map file.
			int cur_flag;

			// Read map file which consists of 1s and 0s for correct positions.
			fscanf(aln_map_file, "%d", &cur_flag); // Read current flag.

if(_DUMP_ALN_ENV_UTILS_MESSAGES_)
			printf("%d ", cur_flag);

			// Set low limit at the point where 1's start.
			if(low_limits[i1] == -1 && cur_flag == 1)
			{
				low_limits[i1] = i2;
			}

			// Set high limit if high limit is not already set and lomw limit is already set.
			if(high_limits[i1] == -1 && low_limits[i1] != -1 && cur_flag == 0)
			{
				high_limits[i1] = i2 - 1;
			}

			// If high limit is not set and loop hit end of alignment line, set high limit to end of 2nd sequence.
			if(high_limits[i1] == -1 && i2 == N2)
			{
				high_limits[i1] = N2;
			}
		}

		printf("\n");
	}

	fclose(aln_map_file);

	//// Have to set limits for 0th nucleotide.
	//low_limits[0] = low_limits[1];
	//high_limits[0] = high_limits[1];

	//low_limits[0] = 1;
	//high_limits[0] = N2;

//	// For i > N1, just add N2 to limits for i < N1.
//	for(int i1 = N1 + 1; i1 <= 2 * N1; i1++)
//	{
//		low_limits[i1] = low_limits[i1 - N1] + N2;
//		high_limits[i1] = high_limits[i1 - N1] + N2;
//
//if(_DUMP_LOOP_LIMIT_MESSAGES_)
//{
//		printf("low[%d] = %d, high[%d]=%d\n", i1, low_limits[i1], i1, high_limits[i1]);
//}
//	}
//
//	// Set low limits with values 1 to 0, so that the initialized values can be recursed correctly.
//	for(int i = 0; i <= N1; i++)
//	{
//		if(low_limits[i] == 1)
//		{
//			low_limits[i] = 0;
//		}

////if(_DUMP_LOOP_LIMIT_MESSAGES_)
//{
//		//printf("low[%d] = %d, high[%d]=%d\n", i, low_limits[i], i, high_limits[i]);
//}
	/*}*/
}
Exemplo n.º 18
0
void t_structure::openct(char* ct_fp)
{
	FILE* ct_file = open_f(ct_fp, "r");
	if(ct_file == NULL)
	{
		printf("ct file %s does not exist @ %s(%d).\n", ct_fp, __FILE__, __LINE__);
		exit(1);
	}

	// Allocate header buffer.
	this->ctlabel = (char*)malloc(sizeof(char) * MAX_HEADER_LENGTH);

	// Read first line
	fscanf(ct_file, "%d", &this->numofbases);

	// Read remaining of the line, contains new line character at the end of label.
	fgets(this->ctlabel, MAX_HEADER_LENGTH, ct_file);
	if(this->ctlabel[strlen(this->ctlabel) - 1] == '\n')
	{
		this->ctlabel[strlen(this->ctlabel) - 1] = 0;
	}
	this->check_set_label();

	//printf("ct label: %s\n", this->ctlabel);

	this->numseq = (int*)malloc(sizeof(int) * (this->numofbases + 3));
	this->nucs = (char*)malloc(sizeof(char) * (this->numofbases + 3));
	this->basepr = (int*)malloc(sizeof(int) * (this->numofbases + 3));
	this->danglings_on_branch = (int*)malloc(sizeof(int) * (this->numofbases + 3));
	this->danglings_on_mb_closure = (int*)malloc(sizeof(int) * (this->numofbases + 3));
	this->stackings_on_branch = (int*)malloc(sizeof(int) * (this->numofbases + 3));
	this->stackings_on_mb_closure = (int*)malloc(sizeof(int) * (this->numofbases + 3));
	this->unpaired_forced = (bool*)malloc(sizeof(bool) * (this->numofbases + 2));

	for(int i = 0; i <= this->numofbases; i++)
	{
		this->basepr[i] = 0;
		this->danglings_on_branch[i] = 0;
		this->danglings_on_mb_closure[i] = 0;
		this->stackings_on_branch[i] = 0;
		this->stackings_on_mb_closure[i] = 0;
	}

	int* dangles = (int*)malloc(sizeof(int) * (this->numofbases + 3));
	int* stacks = (int*)malloc(sizeof(int) * (this->numofbases + 3));

	// Read sequence data.
	// Must read base pairing before dangles/stacks can be resolved from file.
	for(int i = 1; i <= this->numofbases; i++)
	{
		int index;
		int some_val1;
		char raw_nuc;

		//                1  G 0  2  120 1
		fscanf(ct_file, "%d %c %d %d %d %d", &index, &raw_nuc, &dangles[i], &stacks[i], &this->basepr[i], &some_val1);

		//if(this->nucs[i] == 'a' ||
		//	this->nucs[i] == 'c' ||
		//	this->nucs[i] == 'g' ||
		//	this->nucs[i] == 'u' ||
		//	this->nucs[i] == 't')
		//{
		//	this->unpaired_forced[i] = true;
		//}
		//else
		//{
		//	this->unpaired_forced[i] = false;
		//}

		//printf("%c", this->nucs[i]);

		/*
		The danglings on external loop branches are buffered as 
		danglings on branch. Note that there cannot be a stacking on external loop closure
		because by definition external loop is not closed.
		*/

		// Convert nucleotide symbols into indices: XACGUI -> 012345 
		// refer to IUPAC nucleotide symbols for more information:
		// http://www.mun.ca/biochem/courses/3107/symbols.html
		//if (toupper(this->nucs[i]) == 'A' || toupper(this->nucs[i]) == 'B') 
		//	this->numseq[i]=1;
		//else if (toupper(this->nucs[i]) == 'C' || toupper(this->nucs[i]) == 'Z') 
		//	this->numseq[i]=2;
		//else if (toupper(this->nucs[i]) == 'G' || toupper(this->nucs[i]) == 'H') 
		//	this->numseq[i]=3;
		//else if (toupper(this->nucs[i]) == 'U' || toupper(this->nucs[i]) == 'T' || toupper(this->nucs[i]) == 'V' || toupper(this->nucs[i]) == 'W' ) 
		//	this->numseq[i]=4;
		//else if (toupper(this->nucs[i]) == 'I') 
		//	this->numseq[i]=5;
		//else 
		//	this->numseq[i]=0;

		this->map_nuc_IUPAC_code(raw_nuc, this->nucs[i], this->numseq[i], this->unpaired_forced[i]);

		//printf("%d\n", this->basepr[i]);
	}

#undef _USE_STACKING_INFO_
#ifdef _USE_STACKING_INFO_
	// Resolve stacks and dangles.
	for(int i = 1; i <= this->numofbases; i++)
	{
		// Dangling?
		if(dangles[i] != 0)
		{
			// Dangle on branch?
			if(dangles[i] == i+1)
			{
				if(this->basepr[i+1] == 0)
				{
					printf("Dangling of %d on unpaired nucleotide %d.\n", i, i+1);
					exit(0);
				}

				if(this->basepr[i+1] > i+1)
				{
					this->danglings_on_branch[i] = i+1;
				}
				else
				{
					this->danglings_on_mb_closure[i] = i+1;
				}
			}
			// Dangle on mbl closure?
			if(dangles[i] == i-1)
			{
				if(this->basepr[i-1] == 0)
				{
					printf("Dangling of %d on unpaired nucleotide %d.\n", i, i-1);
					exit(0);
				}

				if(this->basepr[i-1] > i-1)
				{
					this->danglings_on_mb_closure[i] = i-1;
				}
				else
				{
					this->danglings_on_branch[i] = i-1;
				}
			}
		}

		// Stacking?
		if(stacks[i] != 0)
		{
			// stack on branch?
			if(stacks[i] == i+1)
			{
				if(this->basepr[i+1] == 0)
				{
					printf("Stacking of %d on unpaired nucleotide %d.\n", i, i+1);
					exit(0);
				}

				if(this->basepr[i+1] > i+1)
				{
					this->stackings_on_branch[i] = i+1;
				}
				else
				{
					this->stackings_on_mb_closure[i] = i+1;
				}
			}
			// stack on mbl closure?
			if(stacks[i] == i-1)
			{
				if(this->basepr[i-1] == 0)
				{
					printf("Stacking of %d on unpaired nucleotide %d.\n", i, i-1);
					exit(0);
				}

				if(this->basepr[i-1] > i-1)
				{
					this->stackings_on_mb_closure[i] = i-1;
				}
				else
				{
					this->stackings_on_branch[i] = i-1;
				}
			}
		}
	}

	// Do a sanity check on dangles and stacks.
	for(int i = 1; i < this->numofbases; i++)
	{
		if(this->stackings_on_branch[i] == i+1)
		{
			int current_j = this->basepr[i+1];
			if(current_j == 0 || this->stackings_on_branch[current_j+1] != current_j)
			{
				printf("Stacking check failed for stacking of %d on %d\n", i, i+1);
			}
		}

		if(this->stackings_on_mb_closure[i] == i+1)
		{
			int current_j = this->basepr[i+1];
			if(current_j == 0 || this->stackings_on_mb_closure[current_j+1] != current_j)
			{
				printf("Stacking check failed for stacking of %d on %d\n", i, i+1);
			}
		}
	}
#endif // _USE_STACKING_INFO_

	free(dangles);
	free(stacks);

	fclose(ct_file);
}
Exemplo n.º 19
0
// Dump map alignment.
void t_MAP_alignment::dump_map_alignment()
{
if(_DUMP_MAP_ALIGNMENT_MESSAGES_)
{
	FILE* map_aln_file = open_f("ppf_map_alignment.txt", "w");
	for(int cnt1 = 1; cnt1 <= this->seq_man->get_l_seq1(); cnt1++)
	{
		fprintf(map_aln_file, "%d %d %s\n", cnt1, this->seq1_alns[cnt1][0], state_names[this->seq1_alns[cnt1][1]]);
	}

	fprintf(map_aln_file, "\n\n");

	for(int cnt2 = 1; cnt2 <= this->seq_man->get_l_seq2(); cnt2++)
	{
		fprintf(map_aln_file, "%d %d %s\n", cnt2, this->seq2_alns[cnt2][0], state_names[this->seq2_alns[cnt2][1]]);
	}

	fclose(map_aln_file);
}

	// Both alignment arrays correspond to same coincidence path.
	// In order to represent those alignment arrays, have to trace them correctly
	// into alignment strings.
	//int aln_str_length = seq_man->get_l_seq1() + seq_man->get_l_seq2();
	int l_aln = this->get_l_aln();
	this->aln_str1 = (char*)malloc(sizeof(char) * (l_aln + 2));
	this->aln_str2 = (char*)malloc(sizeof(char) * (l_aln + 2));
	
	this->aln_index_line1 = (int*)malloc(sizeof(int) * (l_aln + 3));
	this->aln_index_line2 = (int*)malloc(sizeof(int) * (l_aln + 3));

	// Following points to last alignment position in coincidence map.
	int last_i1 = 0;
	int last_i2 = 0;

	char nucs[] = "NACGUI";

	// Problem is determining if next state is an event in first seq (ins1) or an event in second sequence (ins2)
	// or if it is an event in both sequences (aln). So check seq1_alns[last_i1 + 1] and seq1_alns[last_i2 + 1]
	// indices and states; see if the state and indices are corectly adding up on last_i1 and last_i2.
	// e.g. if seq1_alns[1][0] = 0 and seq1_alns[1][1] = STATE_INS1, then this means that there is an insertion
	// in first sequence which will be over 0, 0. 
	int aln_str_index = 0;
	while(last_i1 != seq_man->get_l_seq1() || 
		last_i2 != seq_man->get_l_seq2())
	{
if(_DUMP_MAP_ALIGNMENT_MESSAGES_)
		printf("%d(%d), %d(%d)\n", last_i1, seq_man->get_l_seq1(), last_i2, seq_man->get_l_seq2());

		// Check for alignment case.
		if((last_i1+1) <= seq_man->get_l_seq1() && 
			(last_i2+1) <= seq_man->get_l_seq2() &&
			this->seq1_alns[last_i1 + 1][1] == STATE_ALN && 
			this->seq1_alns[last_i1 + 1][0] == last_i2 + 1)
		{
			// If next nuc. in sequence 1 is aligned, is it aligned to
			// next nuc. in sequence 2?
			last_i1++;
			last_i2++;

			aln_str1[aln_str_index] = nucs[this->seq_man->get_nuc_seq1(last_i1)];
			aln_str2[aln_str_index] = nucs[this->seq_man->get_nuc_seq2(last_i2)];

			this->aln_index_line1[aln_str_index+1] = last_i1;
			this->aln_index_line2[aln_str_index+1] = last_i2;

			aln_str_index++;

if(_DUMP_MAP_ALIGNMENT_MESSAGES_)
			printf("Align %d, %d\n", last_i1, last_i2);
		}
		// Check for alignment case.
		else if((last_i1+1) <= seq_man->get_l_seq1() && 
				this->seq1_alns[last_i1 + 1][1] == STATE_INS1 && 
				this->seq1_alns[last_i1 + 1][0] == last_i2)
		{
			// If next nuc. in sequence 1 is inserted, is it inserted on top of current nuc. in sequence 2?
			last_i1++;

			aln_str1[aln_str_index] = nucs[this->seq_man->get_nuc_seq1(last_i1)];
			aln_str2[aln_str_index] = '.';

			this->aln_index_line1[aln_str_index+1] = last_i1;
			this->aln_index_line2[aln_str_index+1] = 0;

			aln_str_index++;

if(_DUMP_MAP_ALIGNMENT_MESSAGES_)
			printf("Insert1 %d, %d\n", last_i1, last_i2);

		}
		// Check for alignment case.
		else if((last_i2+1) <= seq_man->get_l_seq2() && 
				this->seq2_alns[last_i2 + 1][1] == STATE_INS2 && 
				this->seq2_alns[last_i2 + 1][0] == last_i1)
		{
			// If next nuc. in sequence 2 is inserted, is it inserted on top of current nuc. in sequence 1?
			last_i2++;

			aln_str1[aln_str_index] = '.';
			aln_str2[aln_str_index] = nucs[this->seq_man->get_nuc_seq2(last_i2)];

			this->aln_index_line1[aln_str_index+1] = 0;
			this->aln_index_line2[aln_str_index+1] = last_i2;

			aln_str_index++;

if(_DUMP_MAP_ALIGNMENT_MESSAGES_)
			printf("Insert2 %d, %d\n", last_i1, last_i2);
		}
		else
		{
			printf("Could not decode next coincidence position in alignment at (%d, %d) @ %s(%d).\n", last_i1, last_i2, __FILE__, __LINE__);
			exit(0);
		}
	} // map alignment string formation loop.

	// Finish alignment strings.
	aln_str1[aln_str_index] = 0;
	aln_str2[aln_str_index] = 0;

if(_DUMP_MAP_ALIGNMENT_MESSAGES_)
	printf("MAP Alignment:\n%s\n%s\n", aln_str1, aln_str2);

	char aln_fp[4096];
	if(this->ppf_cli->map_aln_op == NULL)
	{
		sprintf(aln_fp, "%s_%s_map_aln.aln", this->ppf_cli->seq1_op_file_prefix, this->ppf_cli->seq2_op_file_prefix);
	}
	else
	{
		strcpy(aln_fp, this->ppf_cli->map_aln_op);
	}
	FILE* aln_file = open_f(aln_fp, "w");
	fprintf(aln_file, "%s-%s MAP Alignment:\n%s\n%s \n", this->ppf_cli->seq1_op_file_prefix, this->ppf_cli->seq2_op_file_prefix, aln_str1, aln_str2);
	fclose(aln_file);
}
Exemplo n.º 20
0
void t_structure::openfasta(char* fasta_fp)
{
	// Very strict measure: Exit is sequence file is not verifiable.
	if(!this->verify_seq(fasta_fp))
	{
		printf("Could not verify sequence file %s @ %s(%d)\n", fasta_fp, __FILE__, __LINE__);
		exit(1);
	}

	FILE* fasta_file = open_f(fasta_fp, "r");
	if(fasta_file == NULL)
	{
		printf("fasta file %s does not exist @ %s(%d).\n", fasta_fp, __FILE__, __LINE__);
		exit(1);
	}

	this->numseq = NULL;
	this->nucs = NULL;
	this->basepr = NULL;
	this->danglings_on_branch = NULL;
	this->danglings_on_mb_closure = NULL;
	this->stackings_on_branch = NULL;
	this->stackings_on_mb_closure = NULL;

	char line_buffer[MAX_HEADER_LENGTH];
	fgets(line_buffer, MAX_HEADER_LENGTH, fasta_file);
	if(line_buffer[0] == '>')
	{
		// Copy label.
		this->ctlabel = (char*)malloc(sizeof(char) * MAX_HEADER_LENGTH);
		strcpy(this->ctlabel, &line_buffer[1]);
		if(this->ctlabel[strlen(this->ctlabel) - 1] == '\n')
		{
			this->ctlabel[strlen(this->ctlabel) - 1] = 0;
		}
	}

	// Read and determine length of sequence.
	char cur_char = 0;
	this->numofbases = 0;

	// Start reading sequence data.
	while(1)
	{
		int ret = fscanf(fasta_file, "%c", &cur_char);
		if(ret == EOF)
		{
			break;
		}

		// Found a new fasta sequence?
		if(cur_char == '>')
		{
			break;
		}

		if(cur_char != '\n' && cur_char != ' ')
		{
			this->numofbases++;
		}
	}

	//printf("Length of sequence is %d\n", this->numofbases);
	this->numseq = (int*)malloc(sizeof(int) * (this->numofbases + 1));
	this->nucs = (char*)malloc(sizeof(char) * (this->numofbases + 2));
	this->basepr = (int*)malloc(sizeof(int) * (this->numofbases + 1));
	this->unpaired_forced = (bool*)malloc(sizeof(bool) * (this->numofbases + 2));

	// Set file position to data position.
	// Cannot use fsetpos and fgetpos because for some reason they are messing up indices
	// when a linux text file is taken to a windows machine.
	fseek(fasta_file, 0, SEEK_SET);

	// Read captoin information.
	fgets(line_buffer, MAX_HEADER_LENGTH, fasta_file);

	int i = 1; // Sequence index, starts from 1.

	// Start reading sequence data.
	while(1)
	{
		// Read and validate input.
		int ret = fscanf(fasta_file, "%c", &cur_char);
		if(ret == EOF)
		{
			break;
		}

		// Check end of sequence marker.
		if(cur_char == '>')
		{
			break;
		}

		// Process this nuc.
		if(cur_char != '\n' && cur_char != ' ')
		{
			this->basepr[i] = 0; // No base pairing information.

			this->map_nuc_IUPAC_code(cur_char, this->nucs[i], this->numseq[i], this->unpaired_forced[i]);

			//printf("%c %d\n", this->nucs[i], this->numseq[i]);

			i++;
		}
	}

	// This is for ending sequences.
	this->nucs[i] = 0; 

	//printf("Read fasta file: %s (%d nucs)\n", this->nucs, this->numofbases);
	//getc(stdin);

	fclose(fasta_file);
}