Exemple #1
0
int main(int argc, char** argv)
{
	// input arguments are initial fragment file, variant file with variant information and alleles for each variant 
	// number of iterations total, when to output the solution, file to output solution .....
	time_t ts; time(&ts); srand48((long int)ts);
	if (MINCUTALGO ==2) RANDOM_START=0;
	int i=0,j=0; int flag =0;
	char fragfile[10000]; char varfile[10000]; char VCFfile[10000]; char hapfile[10000]; int maxiter =100; 
	strcpy(fragfile,"None"); strcpy(varfile,"None");strcpy(hapfile,"None"); 
	for (i=1;i<argc;i+=2)
	{
		if (argc < 6) break;
		if (strcmp(argv[i],"--fragments") ==0 || strcmp(argv[i],"--frags") ==0)     {   strcpy(fragfile,argv[i+1]); flag++; }
		else if (strcmp(argv[i],"--variants") ==0)    
		{    
			j=strlen(argv[i+1]);  // check if it is a VCF file ending with .vcf or .VCF 
			if (j > 3 && ((argv[i+1][j-1] == 'f' && argv[i+1][j-2] == 'c' && argv[i+1][j-3] == 'v') || (argv[i+1][j-1] == 'F' && argv[i+1][j-2] == 'C' && argv[i+1][j-3] == 'V')))
			{
				strcpy(VCFfile,argv[i+1]); VCFformat = 1; flag++;
			}
			else
			{
				fprintf(stderr,"please provide variant file in VCF format using --VCF option, old variant format is no longer supported\n\n"); 
				return 1;
			}
		}
		else if (strcmp(argv[i],"--VCF") ==0 || strcmp(argv[i],"--vcf")==0)    {   strcpy(VCFfile,argv[i+1]); VCFformat = 1; flag++; }
		else if (strcmp(argv[i],"--output") ==0 || strcmp(argv[i],"--out") ==0)      {   strcpy(hapfile,argv[i+1]); flag++; }
		else if (strcmp(argv[i],"--maxiter") ==0)        maxiter = atoi(argv[i+1]);
		else if (strcmp(argv[i],"--longreads") ==0 || strcmp(argv[i],"--lr") ==0)        // long reads pacbio 
		{
			FOSMIDS = atoi(argv[i+1]); 
		}
		else if (strcmp(argv[i],"--fosmid") ==0 || strcmp(argv[i],"--fosmids") ==0)        
		{
			FOSMIDS = atoi(argv[i+1]);
			if (FOSMIDS ==1) SCORING_FUNCTION = 1;  // unless explicitly specified, for fosmids use switch error based function...
		}
		else if (strcmp(argv[i],"--sf") ==0 || strcmp(argv[i],"--switches") ==0)        SCORING_FUNCTION = atoi(argv[i+1]);
		else if (strcmp(argv[i],"--printscores") ==0 || strcmp(argv[i],"--scores") ==0)       PRINT_FRAGMENT_SCORES = atoi(argv[i+1]);
		else if (strcmp(argv[i],"--maxcutiter") ==0)  
		{
			MAXCUT_ITER = atoi(argv[i+1]);
			fprintf(stderr,"max iterations for max-cut calculations is %d \n",MAXCUT_ITER);
		}
		else if (strcmp(argv[i],"--QVoffset") ==0 || strcmp(argv[i],"--qvoffset") ==0)        QVoffset = atoi(argv[i+1]);
		else if (strcmp(argv[i],"--maxmem") ==0 )        MAX_MEMORY = atoi(argv[i+1]);
		else if (strcmp(argv[i],"--mbq") ==0 )        MINQ = atoi(argv[i+1]);
	}
	if (flag !=3) // three essential arguments are not supplied 
	{ 
		print_hapcut_options(); return -1;
	}
	else
	{
		if (VCFformat ==1) 
		{
			fprintf(stderr,"\n\nfragment file: %s\nvariantfile (VCF format):%s\nhaplotypes will be output to file: %s\niterations of maxcut algorithm: %d\nQVoffset: %d\n\n",fragfile,VCFfile,hapfile,maxiter,QVoffset); 	
			maxcut_haplotyping(fragfile,VCFfile,0,hapfile,maxiter); 
		}
		else 
		{
			fprintf(stderr,"\n\nfragment file: %s\nvariantfile (variant format):%s\nhaplotypes will be output to file: %s\niterations of maxcut algorithm: %d\nQVoffset: %d\n\n",fragfile,varfile,hapfile,maxiter,QVoffset); 	
			maxcut_haplotyping(fragfile,varfile,0,hapfile,maxiter); 
		}
	}
	return 0;
}
Exemple #2
0
int main(int argc, char** argv) {

    // input arguments are initial fragment file, variant file with variant information and alleles for each variant
    // number of iterations total, when to output the solution, file to output solution .....
    int i = 0;
    int flag = 0;
    char fragfile[10000];
    char varfile[10000];
    char VCFfile[10000];
    char hapfile[10000];
    strcpy(fragfile, "None");
    strcpy(varfile, "None");
    strcpy(hapfile, "None");
    strcpy(HTRANS_DATA_INFILE, "None");
    strcpy(HTRANS_DATA_OUTFILE, "None");

    if (argc % 2 != 1){
        fprintf(stderr, "\nERROR: Invalid number of arguments specified.\n");
        exit(1);
    }

    for (i = 1; i < argc; i += 2) {
        if (argc < 6) break;

        // BASIC OPTIONS
        if (strcmp(argv[i], "--fragments") == 0 || strcmp(argv[i], "--f") == 0) {
            strcpy(fragfile, argv[i + 1]);
            flag++;
        } else if (strcmp(argv[i], "--VCF") == 0 || strcmp(argv[i], "--vcf") == 0) {
            strcpy(VCFfile, argv[i + 1]);
            flag++;
        } else if (strcmp(argv[i], "--output") == 0 || strcmp(argv[i], "--out") == 0|| strcmp(argv[i], "--o") == 0) {
            strcpy(hapfile, argv[i + 1]);
            flag++;
        }else if ((strcmp(argv[i], "--converge") == 0) || (strcmp(argv[i], "--c") == 0)) {
            CONVERGE = atoi(argv[i + 1]);
        }else if ((strcmp(argv[i], "--rh") == 0) || (strcmp(argv[i], "--tags") == 0)) { // read-haplotype assignments
            OUTPUT_RH_ASSIGNMENTS = atoi(argv[i + 1]);
        }else if ((strcmp(argv[i], "--outvcf") == 0) ) { // output VCF or not, default is 1
            OUTPUT_VCF = atoi(argv[i + 1]);
        }else if (strcmp(argv[i], "--verbose") == 0 || strcmp(argv[i], "--v") == 0){
            check_input_0_or_1(argv[i + 1]);
            VERBOSE = atoi(argv[i + 1]);
        }
        // READ-TECHNOLOGY OPTIONS
        else if (strcmp(argv[i], "--HiC") == 0 || strcmp(argv[i], "--hic") == 0){
            check_input_0_or_1(argv[i + 1]);
            if (atoi(argv[i + 1])){
                MAX_HIC_EM_ITER = 100; //atoi(argv[i + 1]);
                NEW_FRAGFILE_FORMAT = 1;
                HIC = 1;
            }
        }else if (strcmp(argv[i], "--long_reads") == 0 || strcmp(argv[i], "--lr") == 0){
            check_input_0_or_1(argv[i + 1]);
            LONG_READS = atoi(argv[i + 1]);
            AUTODETECT_LONGREADS = 0;
        }else if (strcmp(argv[i], "--QV_offset") == 0 || strcmp(argv[i], "--qv_offset") == 0 || strcmp(argv[i], "--qo") == 0){
            QVoffset = atoi(argv[i + 1]);
        }else if (strcmp(argv[i], "--hic_htrans_file") == 0 || strcmp(argv[i], "--hf") == 0){
            NEW_FRAGFILE_FORMAT = 1;
            strcpy(HTRANS_DATA_INFILE, argv[i + 1]);
            HIC = 1;
        }
        // HAPLOTYPE POST-PROCESSING OPTIONS
        else if (strcmp(argv[i], "--threshold") == 0 || strcmp(argv[i], "--t") == 0){
            THRESHOLD = 1.0 - unphred(atof(argv[i + 1]));
        //}else if (strcmp(argv[i], "--split_blocks") == 0 || strcmp(argv[i], "--sb") == 0){
            //SPLIT_BLOCKS = atoi(argv[i + 1]);
        }else if (strcmp(argv[i], "--split_threshold") == 0 || strcmp(argv[i], "--st") == 0){
            SPLIT_THRESHOLD = 1.0 - unphred(atof(argv[i + 1]));
        }else if (strcmp(argv[i], "--call_homozygous") == 0 || strcmp(argv[i], "--ch") == 0){
            check_input_0_or_1(argv[i + 1]);
            CALL_HOMOZYGOUS = atoi(argv[i + 1]);
        }else if (strcmp(argv[i], "--discrete_pruning") == 0 || strcmp(argv[i], "--dp") == 0){
            check_input_0_or_1(argv[i + 1]);
            DISCRETE_PRUNING = atoi(argv[i + 1]);
        }else if (strcmp(argv[i], "--error_analysis_mode") == 0 || strcmp(argv[i], "--ea") == 0){
            check_input_0_or_1(argv[i + 1]);
            ERROR_ANALYSIS_MODE = atoi(argv[i + 1]);
        }else if (strcmp(argv[i], "--SNVs_before_indels") == 0 || strcmp(argv[i], "--si") == 0){
            check_input_0_or_1(argv[i + 1]);
            SNVS_BEFORE_INDELS = atoi(argv[i + 1]);
        }
        // ADVANCED OPTIONS
        else if (strcmp(argv[i], "--nf") == 0 || strcmp(argv[i], "--new_format") == 0){
            check_input_0_or_1(argv[i + 1]);
            NEW_FRAGFILE_FORMAT = atoi(argv[i + 1]);
        }else if (strcmp(argv[i], "--max_iter") == 0 || strcmp(argv[i], "--mi") == 0){
            MAXITER = atoi(argv[i + 1]);
        }else if (strcmp(argv[i], "--maxcut_iter") == 0 || strcmp(argv[i], "--mc") == 0) {
            MAXCUT_ITER = atoi(argv[i + 1]);
        }else if (strcmp(argv[i], "--htrans_read_lowbound") == 0 || strcmp(argv[i], "--hrl") == 0){
            HTRANS_READ_LOWBOUND = atoi(argv[i + 1]);
        }else if (strcmp(argv[i], "--htrans_max_window") == 0 || strcmp(argv[i], "--hmw") == 0){
            HTRANS_MAX_WINDOW = atoi(argv[i + 1]);
        }
        // HIDDEN OPTIONS
        else if (strcmp(argv[i], "--htrans_data_outfile") == 0 || strcmp(argv[i], "--ohf") == 0){
            strcpy(HTRANS_DATA_OUTFILE, argv[i + 1]);
        }else if (strcmp(argv[i], "--printscores") == 0 || strcmp(argv[i], "--scores") == 0){
            PRINT_FRAGMENT_SCORES = atoi(argv[i + 1]);
        }else if (strcmp(argv[i], "--mbq") == 0){
            MINQ = atoi(argv[i + 1]);
        }else if (strcmp(argv[i], "--skip_prune") == 0 || strcmp(argv[i], "--sp") == 0){
            check_input_0_or_1(argv[i + 1]);
            SKIP_PRUNE = atoi(argv[i + 1]);
        }else if (strcmp(argv[i], "--max_IS") == 0 || strcmp(argv[i], "--mi") == 0){
            MAX_IS = atoi(argv[i + 1]);
        }else{
            fprintf(stderr, "\nERROR: Invalid Option \"%s\" specified.\n",argv[i]);
            exit(1);
        }
    }

    if (ERROR_ANALYSIS_MODE && HIC){
        fprintf_time(stderr,"WARNING: Switch error quality scores are not intended for use with Hi-C data. Scores will be left blank.\n");
    }

    if (flag != 3) // three essential arguments are not supplied
    {
        print_hapcut_options();
        return 0;
    }

	fprintf(stderr, "\n\n");
    fprintf_time(stderr, "fragment file: %s\n", fragfile);
    fprintf_time(stderr, "variantfile (VCF format):%s\n", VCFfile);
    fprintf_time(stderr, "haplotypes will be output to file: %s\n", hapfile);
    fprintf_time(stderr, "solution convergence cutoff: %d\n", CONVERGE);
    fprintf_time(stderr, "QVoffset: %d\n\n", QVoffset);
    maxcut_haplotyping(fragfile, VCFfile, hapfile);
    return 0;
}