Beispiel #1
0
// *** MAIN START***
int main (int argc, const char * argv[]){
  cmdl_opts opts;
  get_opts( argc, argv, opts);
  int nTimes=0, nT=0;
  //*** EMITTED DATA OBJECTS ***
  Emission cnaEmit, bafEmit, snvEmit;
  if (opts.cna_fn != NULL) get_cna_data( &cnaEmit, opts, nTimes);
  if (opts.baf_fn != NULL) get_baf_data( &bafEmit, opts, nTimes, nT);
  if (opts.snv_fn != NULL) get_snv_data( &snvEmit, opts, nTimes, nT);
  //*** ANNOUNCE ***
  printf("\ncloneHD: probabilistic inference of sub-clonality using...\n\n");
  if (cnaEmit.is_set){
    printf("CNA data in %s: %i sites in %i chr across %i samples\n", 
	   opts.cna_fn, cnaEmit.total_loci, cnaEmit.nSamples, nTimes);
  }
  if (bafEmit.is_set){
    printf("BAF data in %s: %i sites in %i chr across %i samples\n", 
	   opts.baf_fn, bafEmit.total_loci, bafEmit.nSamples, nTimes);
  }
  if (snvEmit.is_set){
    printf("SNV data in %s: %i sites in %i chr across %i samples\n", 
	   opts.snv_fn, snvEmit.total_loci, snvEmit.nSamples, nTimes);
  }
  cout<<endl;
  // *** ALLOCATE CLONE ***
  Clone myClone;
  myClone.allocate( &cnaEmit, &bafEmit, &snvEmit, opts.chr_fn);
  myClone.cna_pen_zero = opts.cna_pen_zero;//CNA penalty for zero total copies
  myClone.cna_pen_diff = opts.cna_pen_diff;//CNA penalty for different c.n.
  myClone.cna_pen_norm = opts.cna_pen_norm;//CNA penalty for non-normal c.n.
  myClone.baf_pen_comp = opts.baf_pen_comp;//BAF penalty for complex chr status
  myClone.snv_pen_high = opts.snv_pen_high;//SNV penalty for high SNV genotypes
  myClone.snv_pen_mult = opts.snv_pen_mult;//SNV penalty for multiple hit SNVs
  myClone.snv_fpr  = opts.snv_fpr;//SNV false-positive rate
  myClone.snv_fpf  = opts.snv_fpf;//SNV frequency of false positives
  myClone.bulk_fix = opts.bulk_fix;
  myClone.cnaGrid  = opts.cnaGrid;
  myClone.bafGrid  = opts.bafGrid;
  myClone.snvGrid  = opts.snvGrid;
  myClone.bulkGrid = opts.bulkGrid;
  myClone.learn_priors = (cnaEmit.is_set || snvEmit.connect || opts.avcn_fn != NULL) ? 0 : opts.learn_priors;
  // *** GET MAX-TCN INFO ***
  get_maxtcn_input( opts.maxtcn_fn, opts.maxtcn, &myClone);
  // *** GET SNV BULK PRIOR ***
  if ( snvEmit.is_set && opts.bulk_fn != NULL ){
    printf("Using data in %s as SNV bulk prior...\n", opts.bulk_fn);
    get_snv_bulk_prior( &myClone, opts);
  }
  //*** GET JUMP PROBABILITY TRACKS and COLLAPSE TO EVENTS***
  get_jump_probability( &myClone, opts);
  //...now all segments are fixed and mean_tcn/av_cn allocated.
  if ( snvEmit.is_set && !cnaEmit.is_set ){//for SNV only
    // *** GET TOTAL MEAN COPYNUMBER TRACK ***  
    if( opts.mntcn_fn != NULL ){
      get_mean_tcn( opts.mntcn_fn, &myClone, &snvEmit);
    } 
    // *** GET AVAILABLE COPYNUMBER TRACK ***  
    if ( opts.avcn_fn != NULL ){
      get_avail_cn( opts.avcn_fn, &myClone, &snvEmit);
    }
  }
  //*** GET READ DEPTH BIAS FIELD ***
  if (cnaEmit.is_set && opts.bias_fn != NULL){
    get_bias_field( &myClone, opts);
  }
  //*** PREPARE COARSE-GRAINED DATA ***
  if (cnaEmit.is_set && (opts.cna_jumps_fn != NULL || opts.cna_jump == 0.0)){
    cnaEmit.log_space      = 1;
    cnaEmit.coarse_grained = 1;
    printf( "Collapsed CNA data to %5i segments based on potential jump events.\n", 
	    cnaEmit.total_events);
    cout<<"Precomputing for CNA..."<<flush;
    myClone.get_cnaEmitLog();
    cout<<"done."<<endl;
  }
  if (bafEmit.is_set && ( opts.cna_jumps_fn != NULL || opts.baf_jumps_fn != NULL || opts.baf_jump == 0.0)){
    bafEmit.log_space      = 1;
    bafEmit.coarse_grained = 1;
    printf("Collapsed BAF data to %5i segments based on potential jump events.\n", bafEmit.total_events);
    cout<<"Precomputing for BAF..."<<flush;
    myClone.get_bafEmitLog();
    cout<<"done."<<endl;
  }
  if (snvEmit.is_set && opts.snv_jumps_fn != NULL){
    snvEmit.log_space      = 1;
    snvEmit.coarse_grained = 1;
    printf("Collapsed SNV data to %5i segments based on potential jump events.\n", snvEmit.total_events);
    cout<<"Precomputing for SNV..."<<flush;
    myClone.get_snvEmitLog();
    cout<<"done."<<endl;
  }
  cout<<endl;
  //exit(0);
  // get purities...
  if (opts.purity_fn != NULL){
    get_purity( opts.purity_fn, myClone.min_purity);
  }
  // get user pre-defined clones
  gsl_matrix * clones = NULL;
  gsl_vector * mass   = NULL;
  if (opts.clones_fn != NULL) get_fixed_clones( clones, mass, opts.clones_fn, nTimes);
  int bestn=0, rows=0;
  if (mass != NULL   && (int) mass->size > nTimes)    rows = (int) mass->size;
  if (clones != NULL && (int) clones->size1 > nTimes) rows = (int) clones->size1;
  if (rows > nTimes){//print LLH's for predefined parameter values...
    print_llh_for_set( clones, mass, &myClone, opts);
    return(0);
  }
  else{
    // ****** INFERENCE STARTS HERE ******
    bestn = infer_clones( clones, mass, &myClone, opts);
    printf("cloneHD in ");
    if (cnaEmit.is_set && bafEmit.is_set && snvEmit.is_set) cout<<"cna-baf-snv ";
    if (cnaEmit.is_set && bafEmit.is_set && !snvEmit.is_set) cout<<"cna-baf ";
    if (cnaEmit.is_set && !bafEmit.is_set && snvEmit.is_set) cout<<"cna-snv ";
    if (cnaEmit.is_set && !bafEmit.is_set && !snvEmit.is_set) cout<<"cna ";
    if (!cnaEmit.is_set && !bafEmit.is_set && snvEmit.is_set) cout<<"snv ";
    printf("mode found support for %i sub-clone(s) in the data.\n", bestn);
    // ****** INFERENCE COMPLETED ********
  }
  print_all_results( &myClone, opts);
  // all done...
  return (0);
}