Beispiel #1
0
/*
Calculates a best-fit image transform from image feature correspondences
using RANSAC.

For more information refer to:

Fischler, M. A. and Bolles, R. C.  Random sample consensus: a paradigm for
model fitting with applications to image analysis and automated cartography.
<EM>Communications of the ACM, 24</EM>, 6 (1981), pp. 381--395.

@param features an array of features; only features with a non-NULL match
	of type mtype are used in homography computation
@param n number of features in feat
@param mtype determines which of each feature's match fields to use
	for model computation; should be one of FEATURE_FWD_MATCH,
	FEATURE_BCK_MATCH, or FEATURE_MDL_MATCH; if this is FEATURE_MDL_MATCH,
	correspondences are assumed to be between a feature's img_pt field
	and its match's mdl_pt field, otherwise correspondences are assumed to
	be between the the feature's img_pt field and its match's img_pt field
@param xform_fn pointer to the function used to compute the desired
	transformation from feature correspondences
@param m minimum number of correspondences necessary to instantiate the
	model computed by xform_fn
@param p_badxform desired probability that the final transformation
	returned by RANSAC is corrupted by outliers (i.e. the probability that
	no samples of all inliers were drawn)
@param err_fn pointer to the function used to compute a measure of error
	between putative correspondences and a computed model
@param err_tol correspondences within this distance of a computed model are
	considered as inliers
@param inliers if not NULL, output as an array of pointers to the final
	set of inliers
@param n_in if not NULL and \a inliers is not NULL, output as the final
	number of inliers

@return Returns a transformation matrix computed using RANSAC or NULL
	on error or if an acceptable transform could not be computed.
*/
CvMat* ransac_xform( struct feature* features, int n, int mtype,
					ransac_xform_fn xform_fn, int m, double p_badxform,
					ransac_err_fn err_fn, double err_tol,
                    struct feature*** inliers, int* n_in )
{
    //matched:所有具有mtype类型匹配点的特征点的数组,也就是样本集
    //sample:单个样本,即4个特征点的数组
    //consensus:当前一致集;
    //consensus_max:当前最大一致集(即当前的最好结果的一致集)
	struct feature** matched, ** sample, ** consensus, ** consensus_max = NULL;
    struct ransac_data* rdata;//每个特征点的feature_data域的ransac数据指针
    CvPoint2D64f* pts, * mpts;//每个样本对应的两个坐标数组:特征点坐标数组pts和匹配点坐标数组mpts
    CvMat* M = NULL;//当前变换矩阵
    //p:当前计算出的模型的错误概率,当p小于p_badxform时迭代停止
    //in_frac:内点数目占样本总数目的百分比
	double p, in_frac = RANSAC_INLIER_FRAC_EST;
    //nm:输入的特征点数组中具有mtype类型匹配点的特征点个数
    //in:当前一致集中元素个数
    //in_min:一致集中元素个数允许的最小值,保证RANSAC最终计算出的转换矩阵错误的概率小于p_badxform所需的最小内点数目
    //in_max:当前最优一致集(最大一致集)中元素的个数
    //k:迭代次数,与计算当前模型的错误概率有关
	int i, nm, in, in_min, in_max = 0, k = 0;

    //找到特征点数组features中所有具有mtype类型匹配点的特征点,放到matched数组(样本集)中,返回值nm是matched数组的元素个数
	nm = get_matched_features( features, n, mtype, &matched );
    //若找到的具有匹配点的特征点个数小于计算变换矩阵需要的最小特征点对个数,出错
	if( nm < m )
    {   //出错处理,特征点对个数不足
		fprintf( stderr, "Warning: not enough matches to compute xform, %s" \
			" line %d\n", __FILE__, __LINE__ );
		goto end;
	}

	/* initialize random number generator */
    srand( time(NULL) );//初始化随机数生成器

    //计算保证RANSAC最终计算出的转换矩阵错误的概率小于p_badxform所需的最小内点数目
	in_min = calc_min_inliers( nm, m, RANSAC_PROB_BAD_SUPP, p_badxform );
    //当前计算出的模型的错误概率,内点所占比例in_frac越大,错误概率越小;迭代次数k越大,错误概率越小
    p = pow( 1.0 - pow( in_frac, m ), k );
	i = 0;

    //当前错误概率大于输入的允许错误概率p_badxform,继续迭代
	while( p > p_badxform )
	{
        //从样本集matched中随机抽选一个RANSAC样本(即一个4个特征点的数组),放到样本变量sample中
		sample = draw_ransac_sample( matched, nm, m );
        //从样本中获取特征点和其对应匹配点的二维坐标,分别放到输出参数pts和mpts中
		extract_corresp_pts( sample, m, mtype, &pts, &mpts );
        //调用参数中传入的函数xform_fn,计算将m个点的数组pts变换为mpts的矩阵,返回变换矩阵给M
        M = xform_fn( pts, mpts, m );//一般传入lsq_homog()函数
        if( ! M )//出错判断
			goto iteration_end;
        //给定特征点集,变换矩阵,误差函数,计算出当前一致集consensus,返回一致集中元素个数给in
		in = find_consensus( matched, nm, mtype, M, err_fn, err_tol, &consensus);

        //若当前一致集大于历史最优一致集,即当前一致集为最优,则更新最优一致集consensus_max
		if( in > in_max )
		{
            if( consensus_max )//若之前有最优值,释放其空间
				free( consensus_max );
            consensus_max = consensus;//更新最优一致集
            in_max = in;//更新最优一致集中元素个数
            in_frac = (double)in_max / nm;//最优一致集中元素个数占样本总个数的百分比
		}
        else//若当前一致集小于历史最优一致集,释放当前一致集
			free( consensus );
		cvReleaseMat( &M );

iteration_end:
        release_mem( pts, mpts, sample );
        p = pow( 1.0 - pow( in_frac, m ), ++k );//更新当前错误概率
	}

    //根据最优一致集计算最终的变换矩阵
	/* calculate final transform based on best consensus set */
    //若最优一致集中元素个数大于最低标准,即符合要求
	if( in_max >= in_min )
	{
        //从最优一致集中获取特征点和其对应匹配点的二维坐标,分别放到输出参数pts和mpts中
		extract_corresp_pts( consensus_max, in_max, mtype, &pts, &mpts );
        //调用参数中传入的函数xform_fn,计算将in_max个点的数组pts变换为mpts的矩阵,返回变换矩阵给M
		M = xform_fn( pts, mpts, in_max );
        /***********下面会再进行一次迭代**********/
        //根据变换矩阵M从样本集matched中计算出一致集consensus,返回一致集中元素个数给in
		in = find_consensus( matched, nm, mtype, M, err_fn, err_tol, &consensus);
		cvReleaseMat( &M );
		release_mem( pts, mpts, consensus_max );
        //从一致集中获取特征点和其对应匹配点的二维坐标,分别放到输出参数pts和mpts中
		extract_corresp_pts( consensus, in, mtype, &pts, &mpts );
        //调用参数中传入的函数xform_fn,计算将in个点的数组pts变换为mpts的矩阵,返回变换矩阵给M
		M = xform_fn( pts, mpts, in );
		if( inliers )
		{
            *inliers = consensus;//将最优一致集赋值给输出参数:inliers,即内点集合
			consensus = NULL;
		}
		if( n_in )
            *n_in = in;//将最优一致集中元素个数赋值给输出参数:n_in,即内点个数
		release_mem( pts, mpts, consensus );
	}
	else if( consensus_max )
    {   //没有计算出符合要求的一致集
		if( inliers )
			*inliers = NULL;
		if( n_in )
			*n_in = 0;
		free( consensus_max );
	}

    //RANSAC算法结束:恢复特征点中被更改的数据域feature_data,并返回变换矩阵M
end:
	for( i = 0; i < nm; i++ )
	{
        //利用宏feat_ransac_data来提取matched[i]中的feature_data成员并转换为ransac_data格式的指针
		rdata = feat_ransac_data( matched[i] );
        //恢复feature_data成员的以前的值
		matched[i]->feature_data = rdata->orig_feat_data;
        free( rdata );//释放内存
	}
	free( matched );

    return M;//返回求出的变换矩阵M
}
Beispiel #2
0
/*
  Calculates a best-fit image transform from image feature correspondences
  using RANSAC.
  
  For more information refer to:
  
  Fischler, M. A. and Bolles, R. C.  Random sample consensus: a paradigm for
  model fitting with applications to image analysis and automated cartography.
  <EM>Communications of the ACM, 24</EM>, 6 (1981), pp. 381--395.
  
  @param features an array of features; only features with a non-NULL match
    of type mtype are used in homography computation
  @param n number of features in feat
  @param mtype determines which of each feature's match fields to use
    for model computation; should be one of FEATURE_FWD_MATCH,
    FEATURE_BCK_MATCH, or FEATURE_MDL_MATCH; if this is FEATURE_MDL_MATCH,
    correspondences are assumed to be between a feature's img_pt field
    and its match's mdl_pt field, otherwise correspondences are assumed to
    be between the the feature's img_pt field and its match's img_pt field
  @param xform_fn pointer to the function used to compute the desired
    transformation from feature correspondences
  @param m minimum number of correspondences necessary to instantiate the
    model computed by xform_fn
  @param p_badxform desired probability that the final transformation
    returned by RANSAC is corrupted by outliers (i.e. the probability that
    no samples of all inliers were drawn)
  @param err_fn pointer to the function used to compute a measure of error
    between putative correspondences and a computed model
  @param err_tol correspondences within this distance of a computed model are
    considered as inliers
  @param inliers if not NULL, output as an array of pointers to the final
    set of inliers
  @param n_in if not NULL and \a inliers is not NULL, output as the final
    number of inliers
  
  @return Returns a transformation matrix computed using RANSAC or NULL
    on error or if an acceptable transform could not be computed.
*/
CvMat* ransac_xform( struct feature* features, int n, int mtype,
		     ransac_xform_fn xform_fn, int m, double p_badxform,
		     ransac_err_fn err_fn, double err_tol,
		     struct feature*** inliers, int* n_in )
{
  struct feature** matched, ** sample, ** consensus, ** consensus_max = NULL;
  struct ransac_data* rdata;
  CvPoint2D64f* pts, * mpts;
  CvMat* M = NULL;
  double p, in_frac = RANSAC_INLIER_FRAC_EST;
  int i, nm, in, in_min, in_max = 0, k = 0;

  nm = get_matched_features( features, n, mtype, &matched );
  if( nm < m )
    {
      fprintf( stderr, "Warning: not enough matches to compute xform, %s" \
	       " line %d\n", __FILE__, __LINE__ );
      goto end;
    }

  srandom( time(NULL) );

  in_min = calc_min_inliers( nm, m, RANSAC_PROB_BAD_SUPP, p_badxform );
  p = pow( 1.0 - pow( in_frac, m ), k );
  while( p > p_badxform )
    {
      sample = draw_ransac_sample( matched, nm, m );
      extract_corresp_pts( sample, m, mtype, &pts, &mpts );
      M = xform_fn( pts, mpts, m );
      if( ! M )
	goto iteration_end;
      in = find_consensus( matched, nm, mtype, M, err_fn, err_tol, &consensus);
      if( in > in_max )
	{
	  if( consensus_max )
	    free( consensus_max );
	  consensus_max = consensus;
	  in_max = in;
	  in_frac = (double)in_max / nm;
	}
      else
	free( consensus );
      cvReleaseMat( &M );

    iteration_end:
      release_mem( pts, mpts, sample );
      p = pow( 1.0 - pow( in_frac, m ), ++k );
    }

  /* calculate final transform based on best consensus set */
  if( in_max >= in_min )
    {
      extract_corresp_pts( consensus_max, in_max, mtype, &pts, &mpts );
      M = xform_fn( pts, mpts, in_max );
      in = find_consensus( matched, nm, mtype, M, err_fn, err_tol, &consensus);
      cvReleaseMat( &M );
      release_mem( pts, mpts, consensus_max );
      extract_corresp_pts( consensus, in, mtype, &pts, &mpts );
      M = xform_fn( pts, mpts, in );
      if( inliers )
	{
	  *inliers = consensus;
	  consensus = NULL;
	}
      if( n_in )
	*n_in = in;
      release_mem( pts, mpts, consensus );
    }
  else if( consensus_max )
    {
      if( inliers )
	*inliers = NULL;
      if( n_in )
	*n_in = 0;
      free( consensus_max );
    }

 end:
  for( i = 0; i < nm; i++ )
    {
      rdata = feat_ransac_data( matched[i] );
      matched[i]->feature_data = rdata->orig_feat_data;
      free( rdata );
    }
  free( matched );
  return M;
}
Beispiel #3
0
/* For a given region, defined by reg_start and reg_end, show
 the refence sequence, the consensus sequence, 
 and the sequence of all the fragments that overlap this
 region at all.
 */
void print_region( MapAlignmentP maln, int reg_start, int reg_end,
		   int out_format, int in_color ) {
  int i, ref_pos, ref_gaps, j, cons_pos, ins_len;
  int num_gaps = 0;
  int ins_seq_len;
  int read_out_pos;
  char* consensus;
  char* aln_ref;
  char* read_reg;
  char* ins_cons;
  char* read_str;
  char* read_id;
  char* ins_seq;
  int* ins_cov;
  BaseCountsP bcs;
  AlnSeqP aln_seq;
  PSSMP psm;
  
  /* Make sure region doesn't go off edge */
  if (reg_start < 1) {
    reg_start = 1;
  }
  if (reg_end > maln->ref->seq_len) {
    reg_end = maln->ref->seq_len;
  }
  
  bcs = (BaseCountsP)save_malloc(sizeof(BaseCounts));
  reset_base_counts(bcs);
  
  /* Find how many gaps are in this region */
  for (i = reg_start-1; i <= reg_end; i++) {
    num_gaps += maln->ref->gaps[i];
  }
  
  /* Make char arrays long enough for the sequence plus
     gaps for the reference, the consensus, and a single 
     read. These will be populated and output by the rest
     of this function.
  */
  consensus = (char*)save_malloc((num_gaps + (reg_end-reg_start+1) + 10)
				 * sizeof(char));
  aln_ref = (char*)save_malloc((num_gaps + (reg_end-reg_start+1) + 10)
			       * sizeof(char));
  read_reg = (char*)save_malloc((num_gaps + (reg_end-reg_start+1) + 10)
				* sizeof(char));
	
  /* Make char and int array for insert consensus and
     insert coverage to be used whenever needed */
  ins_cons = (char*)save_malloc(MAX_INS_LEN * sizeof(char));
  ins_cov = (int* )save_malloc(MAX_INS_LEN * sizeof(int));
  
  cons_pos = 0;
  for (ref_pos = reg_start - 1; ref_pos < reg_end; ref_pos++) {
    ref_gaps = maln->ref->gaps[ref_pos];
    /* Add these gaps to the reference aligned string and the inserted
       sequence to the consensus[] */
    if (ref_gaps > 0) {
      find_ins_cons(maln, ref_pos, ins_cons, ins_cov, out_format);
      for (j = 0; j < ref_gaps; j++) {
	aln_ref[cons_pos] = '-';
	consensus[cons_pos] = ins_cons[j];
	cons_pos++;
      }
    }
    /* Re-zero all the base counts */
    reset_base_counts(bcs);
    
    /* Find all the aligned fragments that include this
       position and make a consensus from it */
    for (j = 0; j < maln->num_aln_seqs; j++) {
      aln_seq = maln->AlnSeqArray[j];
      /* Does this aligned fragment cover this position? */
      if ( (aln_seq->start <= ref_pos) && // checked
	   (aln_seq->end >= ref_pos)) {
	if (aln_seq->revcom) {
	  psm = maln->rpsm;
	} else {
	  psm = maln->fpsm;
	}
	add_base(aln_seq->seq[ref_pos - aln_seq->start], bcs, psm,
		 aln_seq->smp[ref_pos - aln_seq->start]);
      }
    }
    
    consensus[cons_pos] = find_consensus(bcs, maln->cons_code);
    aln_ref[cons_pos] = maln->ref->seq[ref_pos];
    cons_pos++;
  }
  
  consensus[cons_pos] = '\0';
  aln_ref[cons_pos] = '\0';
  
  /* Now print the reference and the consensus */
  if (out_format == 61) {
    fasta_aln_print(aln_ref, maln->ref->id);
    fasta_aln_print(consensus, "Consensus");
  } else {
    if (in_color) {
      printf("%-20.20s ", maln->ref->id);
      color_print(aln_ref);
      printf("%-20.20s ", "Consensus");
      color_print(consensus);
    } else
      printf("%-20.20s %s\n%-20s %s\n", maln->ref->id, aln_ref,
	     "Consensus", consensus);
  }
  
  /* 
     Alloc memories for the string to hold each read (plus .'s outside)
     and alloc memories for the special id which is the regular ID
     plus the code for whether it's truncated, reverse complemented,
     and the number of input sequence
  */
  read_str = (char*)save_malloc(strlen(aln_ref) * sizeof(char) + 1);
  read_id  = (char*)save_malloc((MAX_ID_LEN + 4) * sizeof(char) + 1);
  /* Find every sequence that overlaps this region and print
     the overlapping segment */
  for (j = 0; j < maln->num_aln_seqs; j++) {
    aln_seq = maln->AlnSeqArray[j];
    if (alnseq_ol_reg(aln_seq, (reg_start-1), (reg_end-1)) ) {
      read_out_pos = 0;
      if (aln_seq->trimmed) {
	read_id[0] = 't';
      } else {
	read_id[0] = '_';
      }
      
      if (aln_seq->revcom) {
	read_id[1] = 'r';
      } else {
	read_id[1] = '_';
      }
      sprintf( &read_id[2], "%0.2d", aln_seq->num_inputs );
      read_id[4] = '\0';

      strcat(read_id, aln_seq->id);
      if (out_format == 6) {
	printf("%-20.20s ", read_id);
      }
      for (ref_pos = reg_start - 1; ref_pos < reg_end; ref_pos++) {
	ref_gaps = maln->ref->gaps[ref_pos];
	/* Check to make sure that this fragment has started and
	   not ended by this ref_pos */
	if ( (aln_seq->start <= ref_pos) && // checked
	     (aln_seq->end >= ref_pos)) {
	  if (ref_gaps > 0) {
	    if (aln_seq->ins[ref_pos - aln_seq->start] == NULL) {
	      ins_len = 0;
	    } else {
	      ins_len
		= strlen(aln_seq->ins[ref_pos - aln_seq->start]);
	    }
	    if (aln_seq->start == ref_pos) {
	      // Exactly at the beginning of this frag
	      for (i = 0; i < ref_gaps; i++) {
		read_str[read_out_pos++] = '.';
		//		printf( "." );
	      }
	    } else {
	      // Just a normal, interior gapped position
	      if (ins_len > 0) {
		ins_seq
		  = aln_seq->ins[ref_pos - aln_seq->start];
		ins_seq_len = strlen(ins_seq);
		for (i = 0; i < ins_seq_len; i++) {
		  read_str[read_out_pos++] = ins_seq[i];
		}
		//		printf( "%s", aln_seq->ins[ref_pos - aln_seq->start] );
	      }
	      for (i = 0; i < (ref_gaps - ins_len); i++) {
		read_str[read_out_pos++] = '-';
		//		printf( "-" );
	      }
	    }
	  }
	  read_str[read_out_pos++]
	    = aln_seq->seq[ref_pos - aln_seq->start];
	  //printf( "%c", aln_seq->seq[ref_pos - aln_seq->start] );
	} else {
	  // This fragment doesn't actually cover this base
	  for (i = 0; i < ref_gaps; i++) {
	    // print this . for all ref gaps
	    read_str[read_out_pos++] = '.';
	    // printf( "." );
	  }
	  read_str[read_out_pos++] = '.';
	  //printf( "." );
	}
      }
      read_str[read_out_pos] = '\0';
      if (out_format == 61) {
	fasta_aln_print(read_str, read_id);
      } else {
	
	if (in_color) {
	  color_print(read_str);
	} else
	  printf("%s\n", read_str);
      }
    }
  }
  free(bcs);
  free(consensus);
  free(aln_ref);
  free(read_reg);
  free(ins_cons);
  free(ins_cov);
  free(read_str);
  free(read_id);
}
Beispiel #4
0
//input first the fasta file, then the sample_1000.out file run on the fasta, then options
int main(int argc, char *argv[]) {
  int i,total,notcommon,most;
  char **mostfreq;
  FILE *fp;

  OUTPUT = DEF_OUTPUT;
  INPUT = NULL;
  NATIVE = NULL;
  FILTER = 0;
  NOISE = DEF_NOISE;
  MIN_HEL_LEN = DEF_MIN_HEL_LEN;
  //  THRESH_FREQ = DEF_THRESH_FREQ;
  THRESH_FREQ = 0;
  THRESH_COMMON = DEF_THRESH_COMMON;
  VERBOSE = 0;
  NUMFREQ = 0;
  NUMPROF = 0;
  PROF_FREQ = 0;
  //  PROF_FREQ = DEF_PROF_FREQ;
  NUMSTRUCTS = 0;
  THRESH_STRUCT = DEF_THRESH_STRUCT;
  LENGTH = 0;
  STATS = 0;
  GRAPH = 1;
  REP_STRUCT = 0;

  if (argc < 3) {
    fprintf(stderr,"Not enough arguments\n");
    exit(EXIT_FAILURE);
  }

  //OUTPUT = malloc(strlen(argv[1])+5);
  //sprintf(OUTPUT,"%s.dot",argv[1]);

  for (i = 3; i < argc; i++) {
    //printf("argv[%d] is %s\n",i,argv[i]);
    if (!strcmp(argv[i],"-f")) {
      FILTER = 1;
      if ((i + 1 <= argc - 1) && sscanf(argv[i+1],"%d",&NUMFREQ)) {
	//	sscanf(argv[i+1],"%s",val);
	//printf("val is %s and argv %s\n",val,argv[i+1]);
	NUMFREQ = atoi(argv[i+1]);
	i++;
      }
      else
	NUMFREQ = DEF_NUMFREQ;
    }
    else if (!strcmp(argv[i],"-z")) {
      if ((i + 1 <= argc - 1) && sscanf(argv[i+1],"%d",&NOISE)) {
	NOISE = atoi(argv[i+1]);
	i++;
      }
    }
    else if (!strcmp(argv[i],"-h")) {
      if ((i + 1 <= argc - 1) && sscanf(argv[i+1],"%f",&THRESH_FREQ)) {
	THRESH_FREQ = atof(argv[i+1]);
	if (THRESH_FREQ < 0 || THRESH_FREQ > 100) {
	  fprintf(stderr,"Error: invalid input %f for frequency threshold\n",THRESH_FREQ);
	  THRESH_FREQ = DEF_THRESH_FREQ;
	}
	i++;
      }
    }
    else if (!strcmp(argv[i],"-c")) {
      if ((i + 1 <= argc - 1) && sscanf(argv[i+1],"%f",&THRESH_COMMON)) {
	THRESH_COMMON = atof(argv[i+1]);
	if (THRESH_COMMON < 0.0 || THRESH_COMMON > 100.0) {
	  fprintf(stderr,"Error: invalid input %f for common threshold\n",THRESH_COMMON);
	  THRESH_COMMON = DEF_THRESH_COMMON;
	}
	i++;
      }
    }
    else if (!strcmp(argv[i],"-q")) {
      //PRUNE = 1;
      if ((i + 1 <= argc - 1) && sscanf(argv[i+1],"%d",&NUMPROF)) {
	NUMPROF = atoi(argv[i+1]);
	i++;
      }
    }
    else if (!strcmp(argv[i],"-p")) {
      if ((i + 1 <= argc - 1) && sscanf(argv[i+1],"%f",&PROF_FREQ)) {
	PROF_FREQ = atof(argv[i+1]);
	i++;
      }
    }
    else if (!strcmp(argv[i],"-l")) {
      if ((i + 1 <= argc - 1) && sscanf(argv[i+1],"%d",&MIN_HEL_LEN)) {
	MIN_HEL_LEN = atoi(argv[i+1]);
	i++;
      }
    }
    else if (!strcmp(argv[i],"-s")) {
      if ((i + 1 <= argc - 1) && sscanf(argv[i+1],"%d",&NUMSTRUCTS)) {
	NUMSTRUCTS = atoi(argv[i+1]);
	i++;
      }
    }
    else if (!strcmp(argv[i],"-t")) {
      if ((i + 1 <= argc - 1) && sscanf(argv[i+1],"%f",&THRESH_STRUCT)) {
	THRESH_STRUCT = atof(argv[i+1]);
	i++;
      }
    }
    else if (!strcmp(argv[i],"-o")) {
      if (i + 1 <= argc - 1) {
	OUTPUT = argv[i+1];
	i++;
      }
    }
    else if (!strcmp(argv[i],"-i")) {
      if (i + 1 <= argc - 1) {
	INPUT = argv[i+1];
	i++;
      }
    }
    else if (!strcmp(argv[i],"-n")) {
      if (i + 1 <= argc - 1) {
	NATIVE = argv[i+1];
	i++;
      }
    }
    else if (!strcmp(argv[i],"-v"))
      VERBOSE = 1;
    else if (!strcmp(argv[i],"-a"))
      STATS = 1;
    else if (!strcmp(argv[i],"-g"))
      GRAPH = 0;
    else if (!strcmp(argv[i],"-r"))
      REP_STRUCT = 1;
  }

  if (!(bp = hashtbl_create(HASHSIZE,NULL))) {
    fprintf(stderr, "ERROR: hashtbl_create() for bp failed");
    exit(EXIT_FAILURE);
  }
  
  if (!(marginals = hashtbl_create(HASHSIZE,NULL))) {
    fprintf(stderr, "ERROR: hashtbl_create() for marginals failed");
    exit(EXIT_FAILURE);
  }

  if (!(idhash = hashtbl_create(HASHSIZE,NULL))) {
    fprintf(stderr, "ERROR: hashtbl_create() for idhash failed");
    exit(EXIT_FAILURE);
  }

  if (!(binary = hashtbl_create(HASHSIZE,NULL))) {
    fprintf(stderr, "ERROR: hashtbl_create() for binary failed");
    exit(EXIT_FAILURE);
  }

  total = process_structs(argv[1],argv[2]);
  if (THRESH_FREQ==0) 
    THRESH_FREQ = set_h_dropoff(marginals, H_START);
  if (VERBOSE) {
    printf("Threshold to find frequent helices: %.1f\%\n",THRESH_FREQ);
    printf("Maximum number of frequent helices: ");
    if (NUMFREQ == 0)
      puts("no limit");
    else
      printf("%d\n",NUMFREQ);
    /*
      printf("Threshold to select frequent profiles: %.1f\%\n",PROF_FREQ);    
      printf("Maximum number of profiles: ");
      if (NUMPROF == 0)
      puts("no limit");
      else
      printf("%d\n",NUMPROF);
    */
    printf("Number of structures processed: %d\n",NUMSTRUCTS);
  }
  printf("Total number of equivalence helix classes: %d\n",total-1);
  if (VERBOSE)
    print_all_helices(total);
  
  //  make_graph(marginals,max,id,total,argv[1],fp);
  
  mostfreq = find_freq(total);
  printf("Total number of selected helices: %d\n",hashtbl_numkeys(freq));
  notcommon = make_profiles(argv[2]);
  printf("Total number of profiles: %d\n",hashtbl_numkeys(cluster));
  print_profiles();

  if (PROF_FREQ == 0) {
    PROF_FREQ = set_h_dropoff(cluster,P_START);
    if (VERBOSE)
      printf("setting p to %.1f\n",PROF_FREQ);
  }

  most = select_profiles(mostfreq,notcommon)-1;
  printf("Total number of selected profiles: %d\n",hashtbl_numkeys(cluster));
  if (hashtbl_numkeys(cluster) == 0)
    GRAPH = 0;
  if (hashtbl_numkeys(cluster) > 23 && GRAPH) {
    GRAPH = 0;
    printf("Total number of profiles above threshold %.1f is %d: disabling graph\n",PROF_FREQ,hashtbl_numkeys(cluster));
  }
  if (REP_STRUCT) {
    //fp = fopen("structures.out","w");
    //fprintf(fp,"Processing %s\n",argv[2]);
    find_consensus();
    print_consensus(argv[1]);
    //print_cluster(argv[1]);
    //fclose(fp);
  }
  if (GRAPH) {
    fp = fopen(OUTPUT,"w");
    insert_graph(fp,argv[1],most);  
    fputs("}",fp);
    fclose(fp);
  }
  hashtbl_destroy(bp);
  hashtbl_destroy(marginals);
  hashtbl_destroy(idhash);
  hashtbl_destroy(binary);
  hashtbl_destroy(freq);
  hashtbl_destroy(cluster);
  hashtbl_destroy(bracket);
  hashtbl_destroy(infreq);
  return 0;
}
Beispiel #5
0
/* Takes a MapAlignmentP and a position where some of
 the aligned fragments have an insert relative to the
 reference. That is, maln->ref->gaps[position] > 0.
 Populates the char* ins_cons and int* cons_cov
 arrays with the consensus sequence and consensus
 coverage, respectively. These must be appropriately
 sized elsewhere. If out_format is the special value
 of 4, then we just show these differences now and
 do not return anything.
 */
void find_ins_cons(MapAlignmentP maln, int pos, char* ins_cons, int* cons_cov,
		int out_format) {
	int i, j, ins_len, this_frag_ins_len;
	char* ins_seq;
	AlnSeqP aln_seq;
	BaseCountsP* bcs_array;
	BaseCountsP first_bcs;
	PSSMP psm;

	ins_len = maln->ref->gaps[pos];

	bcs_array = (BaseCountsP*)save_malloc(ins_len * sizeof(BaseCountsP));
	first_bcs = (BaseCountsP)save_malloc(ins_len * sizeof(BaseCounts));

	for (i = 0; i < ins_len; i++) {
		bcs_array[i] = &first_bcs[i];
		reset_base_counts(bcs_array[i]);
	}

	for (i = 0; i < maln->num_aln_seqs; i++) {
		aln_seq = maln->AlnSeqArray[i];
		/* Does this aligned fragment cover this position? */
		if ( (aln_seq->start < pos) && // It does not cover this position
				//if it starts exactly here because the gap is, by convention,
				//just upstream of this position
				(aln_seq->end >= pos)) {
			if (aln_seq->revcom) {
				psm = maln->rpsm;
			} else {
				psm = maln->fpsm;
			}
			/* Does it have some actual inserted sequence? */
			ins_seq = aln_seq->ins[pos - aln_seq->start];
			if (ins_seq == NULL) {
				for (j = 0; j < ins_len; j++) {
					add_base( '-', bcs_array[j], psm,
							aln_seq->smp[pos - aln_seq->start]);
				}
			} else {
				this_frag_ins_len = strlen(ins_seq);
				for (j = 0; j < ins_len; j++) {
					if (j < this_frag_ins_len) {
						add_base(ins_seq[j], bcs_array[j], psm,
								aln_seq->smp[pos - aln_seq->start]);
					} else {
						add_base( '-', bcs_array[j], psm,
								aln_seq->smp[pos - aln_seq->start]);
					}
				}
			}
		}
	}

	for (j = 0; j < ins_len; j++) {
		ins_cons[j] = find_consensus(bcs_array[j], maln->cons_code);
		cons_cov[j] = bcs_array[j]->cov;
		if ( (out_format == 4) && !(ins_cons[j] == '-')) {
			show_single_pos(pos, '-', ins_cons[j], bcs_array[j]);
		}
		if (out_format == 41) {
			show_single_pos(pos, '-', ins_cons[j], bcs_array[j]);
		}
	}

	free(first_bcs);
	free(bcs_array);
}