void reverse_target(void)
{
  Genomic * gen_temp;

  gen_temp = reverse_complement_Genomic(gen);

  free_temporary_objects();

  free_Genomic(gen);

  gen = gen_temp;
  
}
Exemple #2
0
GenomicDB * new_GenomicDB_from_single_seq(Genomic * gen,ComplexSequenceEvalSet * cses,int score_in_repeat_coding)
{
  ComplexSequence * cs,*cs_rev;
  GenomicDB * out;
  Genomic * temp;
  
  
  cs = evaluate_ComplexSequence_Genomic(gen,cses,0,score_in_repeat_coding);
  temp = reverse_complement_Genomic(gen);
  cs_rev = evaluate_ComplexSequence_Genomic(temp,cses,0,score_in_repeat_coding);

  out = new_GenomicDB_from_forrev_cseq(cs,cs_rev);
  out->single = hard_link_Genomic(gen);
  out->revsingle = temp;
  return out;
}
Exemple #3
0
main(int argc,char ** argv)
{
    Genomic * gen;
    Genomic * gen2;
    Genomic * gen3;

    gen = read_fasta_file_Genomic(argv[1],10);

    show_Genomic(gen,stdout);

    gen2 = reverse_complement_Genomic(gen);

    show_Genomic(gen2,stdout);

    gen3 = truncate_Genomic(gen,1,50);

    show_Genomic(gen3,stdout);


}
boolean build_objects(void)
{
  boolean ret = TRUE;
  Protein * pro_temp;
  Genomic * gen_temp;
  FILE * ifp;





  startend = threestatemodel_mode_from_string(startend_string);
  if( startend == TSM_unknown ) {
    warn("String %s was unable to converted into a start/end policy\n",startend_string);
    ret = FALSE;
  }


  if( tstart_str != NULL ) {
    if( is_integer_string(tstart_str,&tstart) == FALSE || tstart < 0) {
      warn("Could not make %s out as target start",tstart);
      ret = FALSE;
    }
  }

  if( tend_str != NULL ) {
    if( is_integer_string(tend_str,&tend) == FALSE || tend < 0) {
      warn("Could not make %s out as target end",tend);
      ret = FALSE;
    }
  }

  if( is_integer_string(gap_str,&gap) == FALSE ) {
      warn("Could not make %s out as gap penalty (must be integer at the moment)",gap_str);
      ret = FALSE;
  }
  

  if( is_integer_string(ext_str,&ext) == FALSE ) {
    warn("Could not make %s out as gap penalty (must be integer at the moment)",ext_str);
    ret = FALSE;
  }

  if( is_embl == FALSE ) {
    if( (gen = read_fasta_file_Genomic(dna_seq_file,length_of_N)) == NULL ) {
      ret = FALSE;
      warn("Could not read genomic sequence in %s",dna_seq_file);
      gen = NULL;
    } 
  } else {
    embl = read_EMBL_GenomicRegion_file(dna_seq_file);
    if( embl == NULL ) {
      warn("Could not read genomic EMBL file in %s",dna_seq_file);
      gen = NULL;
      ret = FALSE;
    } else {
      gen = hard_link_Genomic(embl->genomic);
      
    }
  }

  if( gen != NULL ) {

    if( tstart != -1 || tend != -1 ) {
      if( tstart == -1 )
	tstart = 0;
      if( tend == -1 ) 
	tend = gen->baseseq->len;
      gen_temp = truncate_Genomic(gen,tstart-1,tend);
      if( gen_temp == NULL ){
	ret = FALSE;
      } else {
	free_Genomic(gen);
	gen = gen_temp;
      }
    } else {
      /* no truncation required */
    }
  

    if( reverse == TRUE ) {
      if( tstart > tend ) {
	warn("You have already reversed the DNA by using %d - %d truncation. Re-reversing",tstart,tend);
    }
      
      gen_temp = reverse_complement_Genomic(gen); 
      free_Genomic(gen);
      gen = gen_temp;
    }
  }

  /*
   * Can't truncate on GenomicRegion (for good reasons!).
   * but we want only a section of the EMBL file to be used
   * 
   * So... swap genomic now. Positions in EMBL are still valid,
   * however - some genes will loose their sequence, which will be damaging. ;)
   */

  
  if( is_embl ) {
    free_Genomic(embl->genomic);
    embl->genomic = hard_link_Genomic(gen); /* pointer could be dead anyway ;) */
  }


  if( target_abs == TRUE ) {
    if( is_embl == TRUE ) {
      warn("Sorry you can't both use absolute positioning and EMBL files as I can't cope with all the coordinate remapping. You'll have to convert to fasta.");
      ret =  FALSE;
    }

    gen->baseseq->offset = 1;
    gen->baseseq->end  = strlen(gen->baseseq->seq);
  }

  if( alg_str != NULL ) {
    alg = gwrap_alg_type_from_string(alg_str);
  } else {
    if( use_tsm == TRUE ) {
      alg_str = "623L";
    } else {
      alg_str = "623";
    }
    alg = gwrap_alg_type_from_string(alg_str);
  }
      

  if( qstart_str != NULL ) {
    if( is_integer_string(qstart_str,&qstart) == FALSE || qstart < 0) {
      warn("Could not make %s out as query start",qstart);
      ret = FALSE;
    }
  }

  if( qend_str != NULL ) {
    if( is_integer_string(qend_str,&qend) == FALSE || qend < 0) {
      warn("Could not make %s out as query end",qend);
      ret = FALSE;
    }
  }


  if( use_tsm == FALSE ) {
    if( startend != TSM_default && startend != TSM_global && startend != TSM_local && startend != TSM_endbiased) {
      warn("Proteins can only have local/global/endbias startend policies set, not %s",startend_string);
      ret = FALSE;
    }
    if( (pro = read_fasta_file_Protein(protein_file)) == NULL ) {
      ret = FALSE;
      warn("Could not read Protein sequence in %s",protein_file);
    } else {
      
      if( qstart != -1 || qend != -1 ) {
	if( qstart == -1 )
	  qstart = 0;
	if( qend == -1 ) 
	  qend = pro->baseseq->len;
	
	pro_temp = truncate_Protein(pro,qstart-1,qend);
	if( pro_temp == NULL ){
	  ret = FALSE;
	} else {
	  free_Protein(pro);
	  pro = pro_temp;
	}
      }
    }
  } else {
    /** using a HMM **/
    
    /*tsm = read_HMMer_1_7_ascii_file(hmm_file);*/
    /*tsm = Wise2_read_ThreeStateModel_from_hmmer1_file(hmm_file);*/
    tsm = HMMer2_read_ThreeStateModel(hmm_file);
    
    
      if( tsm == NULL ) {
	warn("Could not read hmm from %s\n",hmm_file);
	ret = FALSE;
      }  else {
	
	display_char_in_ThreeStateModel(tsm);
	if( hmm_name != NULL ) {
	  if( tsm->name != NULL ) 
	    ckfree(tsm->name);
	  tsm->name = stringalloc(hmm_name);
	}
	
	if( tsm == NULL ) {
	  warn("Could not read %s as a hmm",hmm_file);
	}
	
	/** have to set start/end **/
	set_startend_policy_ThreeStateModel(tsm,startend,30,0.1);
	
      }
  } /* end of else tsm != NULL */
  

  
  if( main_block_str != NULL ) {
    if( is_integer_string(main_block_str,&main_block) == FALSE ) {
      warn("Could not get maximum main_block number %s",main_block_str);
      ret = FALSE;
    }
  }
   


  if( is_double_string(subs_string,&subs_error) == FALSE ) {
    warn("Could not convert %s to a double",subs_error);
    ret = FALSE;
  }

  if( is_double_string(indel_string,&indel_error) == FALSE ) {
    warn("Could not convert %s to a double",indel_error);
    ret = FALSE;
  }

  if( is_double_string(allN_string,&allN) == FALSE ) {
    warn("Could not convert %s to a double",allN_string);
    ret = FALSE;
  }

  
  if( strcmp(cfreq_string,"model") == 0 ) {
    model_codon = TRUE;
  } else if ( strcmp(cfreq_string,"flat") == 0 ) {
    model_codon = FALSE;
  } else {
    warn("Cannot interpret [%s] as a codon modelling parameter\n",cfreq_string);
    ret = FALSE;
  }
  

  if( strcmp(splice_string,"model") == 0 ) {
    model_splice = TRUE;
  } else if ( strcmp(splice_string,"flat") == 0 ) {
    model_splice = FALSE;
    gmp->use_gtag_splice = TRUE;
  } else {
    warn("Cannot interpret [%s] as a splice modelling parameter\n",splice_string);
    ret = FALSE;
  }

  if( strcmp(null_string,"syn") == 0 ) {
    use_syn = TRUE;
  } else if ( strcmp(null_string,"flat") == 0 ) {
    use_syn = FALSE;
  } else {
    warn("Cannot interpret [%s] as a null model string\n",null_string);
    ret = FALSE;
  }

  if( strcmp(intron_string,"model") == 0 ) {
    use_tied_model = FALSE;
  } else if ( strcmp(intron_string,"tied") == 0 ) {
    use_tied_model = TRUE;
  } else {
    warn("Cannot interpret [%s] as a intron tieing switch\n",intron_string);
    ret = FALSE;
  }



  if( (rm = default_RandomModel()) == NULL) {
    warn("Could not make default random model\n");
    ret = FALSE;
  }

  if( use_new_stats == 0 ) {
    if( (gf = read_GeneFrequency21_file(gene_file)) == NULL) {
      ret = FALSE;
      warn("Could not read a GeneFrequency file in %s",gene_file);
    }
  } else {
    if( (gs = GeneStats_from_GeneModelParam(gmp)) == NULL ){
      ret=FALSE;
      warn("Could not read gene statistics in %s",new_gene_file);
    }
  } /* end of else using new gene stats */


  if( (mat = read_Blast_file_CompMat(matrix_file)) == NULL) {
    if( use_tsm == TRUE ) {
      info("I could not read the Comparison matrix file in %s; however, you are using a HMM so it is not needed. Please set the WISECONFIGDIR or WISEPERSONALDIR variable correctly to prevent this message.",matrix_file);
    } else {
      warn("Could not read Comparison matrix file in %s",matrix_file);
      ret = FALSE;
    }
  }

  if( (ct = read_CodonTable_file(codon_file)) == NULL) {
    ret = FALSE;
    warn("Could not read codon table file in %s",codon_file);
  }

  if( (ofp = openfile(output_file,"W")) ==  NULL) {
    warn("Could not open %s as an output file",output_file);
    ret = FALSE;
  }

  rmd = RandomModelDNA_std();
  return ret;

}
Exemple #5
0
ComplexSequence * reload_GenomicDB(ComplexSequence * last,GenomicDB * gendb,int * return_status)
{
  ComplexSequence * cs;
  Sequence * seq;
  Genomic *temp;
  Genomic * gen;
  

  
  /** NB - notice that we don't do silly things with free's. Maybe we should **/

  if( gendb->is_single_seq == TRUE ) {
    if( gendb->done_forward == TRUE ) {
      *return_status = DB_RETURN_OK;
      gendb->done_forward = FALSE;
      return hard_link_ComplexSequence(gendb->rev);
    } else {
      *return_status = DB_RETURN_END;
      return NULL;
    }
  }

  /** standard database **/

  /** free Complex Sequence **/

  if ( last != NULL ) {
    free_ComplexSequence(last);
  }

  if( gendb->done_forward == TRUE ) {
    if( gendb->current == NULL ) {
      warn("A bad internal genomic db error - unable to find current sequence in db reload");
      *return_status = DB_RETURN_ERROR;
      return NULL;
    }

    temp = reverse_complement_Genomic(gendb->current);


    if( temp == NULL ) {
      warn("A bad internal genomic db error - unable to reverse complements current");
      *return_status = DB_RETURN_ERROR;
      return NULL;
    }

    cs = evaluate_ComplexSequence_Genomic(temp,gendb->cses,0,gendb->repeat_in_cds_score);

    if( cs == NULL ) {
      warn("A bad internal genomic db error - unable to make complex sequence in db reload");
      *return_status = DB_RETURN_ERROR;
      return NULL;
    }

    free_Genomic(temp);
    gendb->done_forward = FALSE;
    return cs;
  }


  /* otherwise we have to get a new sequence */

  seq = reload_SequenceDB(NULL,gendb->sdb,return_status);

  if( seq == NULL || *return_status == DB_RETURN_ERROR || *return_status == DB_RETURN_END ) {
    return NULL; /** error already reported **/
  }

  uppercase_Sequence(seq);

  /* check dna status. We assumme someone knows what he is doing when he makes a genomic db!*/
  if( seq->type != SEQUENCE_DNA) {
    warn("Sequence from %s data entry doesn't look like DNA. Forcing it to",seq->name);
  }

  force_to_dna_Sequence(seq,1.0,NULL);


  if( force_to_dna_Sequence(seq,0.1,NULL) == FALSE ) {
    if( gendb->error_handling == GENDB_READ_THROUGH ) {
      warn("Unable to map %s sequence to a genomic sequence, but ignoring that for the moment...",seq->name);
      free_Sequence(seq);
      return reload_GenomicDB(NULL,gendb,return_status);
    } else {
      warn("Unable to map %s sequence to a genomic sequence. Failing",seq->name);
      *return_status = DB_RETURN_ERROR;
      return NULL;
    }
  }

  gen = Genomic_from_Sequence_Nheuristic(seq,gendb->length_of_N);
  cs = evaluate_ComplexSequence_Genomic(gen,gendb->cses,0,gendb->repeat_in_cds_score);
  if( cs == NULL ) {
    if( gendb->error_handling == GENDB_READ_THROUGH ) {
      warn("Unable to map %s sequence to a genomic sequence, but ignoring that for the moment...",seq->name);
      free_Sequence(seq);
      return reload_GenomicDB(NULL,gendb,return_status);
    } else {
      warn("Unable to map %s sequence to a genomic sequence. Failing",seq->name);
      *return_status = DB_RETURN_ERROR;
      return NULL;
    }
  }

  gendb->current = free_Genomic(gendb->current);
  gendb->current = gen;
  gendb->done_forward= TRUE;

  return cs;
}