예제 #1
0
파일: rm.c 프로젝트: tibo-xx/NvPcomp
int main(int argc, char **argv)
    {
    const char* progname = "rm";
    struct arg_lit  *dir   = arg_lit0("d", "directory",   "unlink file(s), even if it is a non-empty directory");
    struct arg_rem  *dir2  = arg_rem( NULL,               "(super-user only)");
    struct arg_lit  *force = arg_lit0("f", "force",       "ignore nonexistant files, never prompt");
    struct arg_lit  *inter = arg_lit0("i", "interactive", "prompt before any removal");
    struct arg_lit  *recur = arg_lit0("rR","recursive",   "remove the contents of directories recursively");
    struct arg_lit  *verb  = arg_lit0("v", "verbose",     "explain what is being done");
    struct arg_lit  *help  = arg_lit0(NULL,"help",        "print this help and exit");
    struct arg_lit  *vers  = arg_lit0(NULL,"version",     "print version information and exit");
    struct arg_file *files = arg_filen(NULL,NULL,NULL,1,argc+2,NULL);
    struct arg_end  *end   = arg_end(20);
    void* argtable[] = {dir,dir2,force,inter,recur,verb,help,vers,files,end};
    int exitcode=0;
    int nerrors;

    /* verify the argtable[] entries were allocated sucessfully */
    if (arg_nullcheck(argtable) != 0)
        {
        /* NULL entries were detected, some allocations must have failed */
        printf("%s: insufficient memory\n",progname);
        exitcode=1;
        goto exit;
        }

    /* Parse the command line as defined by argtable[] */
    nerrors = arg_parse(argc,argv,argtable);

    /* special case: '--help' takes precedence over error reporting */
    if (help->count > 0)
        {
        printf("Usage: %s", progname);
        arg_print_syntax(stdout,argtable,"\n");
        printf("Remove (unlink) the specified file(s).\n\n");
        arg_print_glossary(stdout,argtable,"  %-20s %s\n");
        printf("\nReport bugs to <no-one> as this is just an example program.\n");
        exitcode=0;
        goto exit;
        }

    /* special case: '--version' takes precedence error reporting */
    if (vers->count > 0)
        {
        printf("'%s' example program for the \"argtable\" command line argument parser.\n",progname);
        printf("September 2003, Stewart Heitmann\n");
        exitcode=0;
        goto exit;
        }

    /* If the parser returned any errors then display them and exit */
    if (nerrors > 0)
        {
        /* Display the error details contained in the arg_end struct.*/
        arg_print_errors(stdout,end,progname);
        printf("Try '%s --help' for more information.\n",progname);
        exitcode=1;
        goto exit;
        }

    /* command line options are all ok, now perform the "rm" functionality */
    mymain(dir->count, force->count, inter->count, recur->count, verb->count, files->filename, files->count);

    exit:
    /* deallocate each non-null entry in argtable[] */
    arg_freetable(argtable,sizeof(argtable)/sizeof(argtable[0]));

    return exitcode;
    }
예제 #2
0
int main(int argc, char *argv[]) {
    
#ifndef _OPENMP
    fprintf(stderr, "\nERROR: Program built with compiler lacking OpenMP support.\n");
    fprintf(stderr, "See SEAStAR README file for information about suitable compilers.\n");
    exit(EXIT_FAILURE);
#endif
    
    ///////////////////////////
    // Variable declarations
    ///////////////////////////
    
    // Input filenames
    UT_string *in_read1_fq_fn, *in_read2_fq_fn, *in_single1_fq_fn, *in_single2_fq_fn;
    utstring_new(in_read1_fq_fn);
    utstring_new(in_read2_fq_fn);
    utstring_new(in_single1_fq_fn);
    utstring_new(in_single2_fq_fn);
    
    // Output filenames
    UT_string *out_read1_fn, *out_read2_fn, *out_single1_fn, *out_single2_fn, *out_mates_fn, *out_filetype;
    utstring_new(out_filetype);
    utstring_new(out_read1_fn);
    utstring_new(out_read2_fn);
    utstring_new(out_single1_fn);
    utstring_new(out_single2_fn);
    utstring_new(out_mates_fn);
    
    // Read name prefix
    UT_string *out_read_prefix;
    utstring_new(out_read_prefix);
    
    // Flags
    int singles_flag = 0;   // 1 when two output singles files being written
    int num_input_singles_files = 0;
    
    // Read counters
    unsigned long int mp_org = 0, R1_org = 0, R2_org = 0, singlet1_org = 0, singlet2_org = 0;
    unsigned long int mp_cnt = 0, R1_cnt = 0, R2_cnt = 0, singlet1_cnt = 0, singlet2_cnt = 0, s1_cnt = 0, s2_cnt = 0;
    unsigned long int comp_r1 = 0, comp_r2 = 0, comp_s1 = 0, comp_s2 = 0;
    unsigned long int read1_singlet_cnt = 0, read2_singlet_cnt = 0;

    ////////////////////////////////////////////////////////////////////////
    // All done with variable declarations!!
    
    ///////////////////////////////////
    // Command line argtable settings
    ///////////////////////////////////
    
    struct arg_lit *gzip = arg_lit0("z", "gzip", "Output converted files in gzip compressed format. [NULL]");
    struct arg_lit *inv_singles = arg_lit0("v", "invert_singles", "Causes singles output to be the inverse of the input. 2->1 or 1->2 [NULL]");
    struct arg_lit *num_singles = arg_lit0("s", "singles", "Write two singlet files, one for each mate-paired input file. [NULL]");
    struct arg_rem *sing_rem = arg_rem(NULL, "Note! -v is only valid when there are input singlet reads. -s is only valid when there are NO input singlet reads.");    
    struct arg_str *pre_read_id = arg_str0(NULL, "prefix", "<string>", "Prefix to add to read identifiers. [out_prefix]");
    struct arg_lit *no_pre = arg_lit0(NULL, "no_prefix", "Do not change the read names in any way. [NULL]");
    struct arg_lit *pre_read_len = arg_lit0(NULL, "add_len", "Add the final trimmed length value to the read id prefix. [length not added]");
    struct arg_dbl *prob = arg_dbl0("p","correct_prob","<d>","Probability that output reads are correct. 0.0 disables quality trimming. [0.5]");
    struct arg_int *fixed_len = arg_int0("f","fixed_len","<u>","Trim all reads to a fixed length, still filtering on quality [no fixed length]");
    struct arg_int *len = arg_int0("l","min_read_len","<u>","Minimum length of a singlet or longest-mate in nucleotides [24]");
    struct arg_int *mate_len = arg_int0("m","min_mate_len","<u>","Minimum length of the shortest mate in nucleotides [min_read_len]");
    struct arg_dbl *entropy = arg_dbl0("e","entropy_filter","<d>","Remove reads with per position information below given value (in bits per dinucleotide) [No filter]");
    struct arg_lit *entropy_strict = arg_lit0(NULL, "entropy_strict", "Reject reads for low entropy overall, not just the retained part after trimming [NULL]");
    struct arg_lit *mates = arg_lit0(NULL, "mates_file", "Produce a Velvet compatible interleaved paired read output file (e.g. <out_prefix>_mates.fastq). [NULL]");
    struct arg_lit *no_rev = arg_lit0(NULL, "no_rev", "By default, the second read in each pair is reversed for colorspace --mate-file output. --no_rev disables reversing. [rev]");
    struct arg_lit *only_mates = arg_lit0(NULL, "only_mates", "Supress writing .read1 and .read2 outputs. Requires --mates_file. [NULL]");
    struct arg_lit *fasta = arg_lit0(NULL, "fasta", "Write FASTA format files instead of FASTQ for all outputs (e.g. <out_prefix>.<read_type>.fasta). [FASTQ]");
    struct arg_file *input = arg_file1(NULL, NULL, "<in_prefix>", "Input file prefix: (e.g. <in_prefix>_single.fastq [<in_prefix>_read1.fastq <in_prefix>_read2.fastq]) ");
    struct arg_file *output = arg_file1(NULL, NULL, "<out_prefix>", "Output file prefix: (e.g. <out_prefix>_single.fastq [<out_prefix>_read1.fastq <out_prefix>_read2.fastq]) ");
    struct arg_lit *version = arg_lit0(NULL,"version","Print the build version and exit."); 
    struct arg_lit *h = arg_lit0("h", "help", "Request help.");
    struct arg_end *end = arg_end(20);
    
    void *argtable[] = {h,version,gzip,inv_singles,num_singles,sing_rem,prob,len,mate_len,fixed_len,pre_read_id,pre_read_len,no_pre,entropy,entropy_strict,mates,no_rev,only_mates,fasta,input,output,end};
    int arg_errors = 0;
        
    ////////////////////////////////////////////////////////////////////////
    // Handle command line processing (via argtable2 library) 
    ////////////////////////////////////////////////////////////////////////
    
    arg_errors = arg_parse(argc, argv, argtable);

	if (version->count) {
		fprintf(stderr, "%s version: %s\n", argv[0], SS_BUILD_VERSION);
		exit(EXIT_SUCCESS);
    }    
	
    if (h->count) {
        fprintf(stderr,"\ntrim_fastq is a utility for performing quality and information-based\n");
        fprintf(stderr,"trimming on paired or unpaired, nucleotide or SOLiD colorspace reads. \n\n");
        arg_print_syntaxv(stderr, argtable, "\n\n");
        arg_print_glossary(stderr, argtable, "%-25s %s\n");
        fprintf(stderr, "\nInput and output \"prefixes\" are the part of the filename before:\n");
        fprintf(stderr, "_single.fastq [_read1.fastq _read2.fastq]  A singlets (single) file\n");
        fprintf(stderr, "is required.  Mate-paired read files are automatically used if present.\n");
        fprintf(stderr, "Multiple output files only produced for mate-paired inputs.\n");
        fprintf(stderr, "\nNote! Input and output files may be gzipped, and outputs can be written\n");
        fprintf(stderr, "as either FASTQ or FASTA format files.\n");
        
        exit(EXIT_FAILURE);
    }    
    
    if (arg_errors) { 
        arg_print_errors(stderr, end, "trimfastq");
        arg_print_syntaxv(stderr, argtable, "\n");
        exit(EXIT_FAILURE);
    }
    
    // Validate entropy
    if (entropy->count) {
        entropy_cutoff = entropy->dval[0];
        if ((entropy_cutoff < 0.0) || (entropy_cutoff > 4.0)) {
            fprintf(stderr, "entropy_filter must be [0.0 - 4.0] \n");
            exit(EXIT_FAILURE);
        }
        strict_ent = entropy_strict->count;
    } else {
        if (entropy_strict->count) {
            fprintf(stderr, "Error: --entropy_strict requires --entropy_filter.\n");
            exit(EXIT_FAILURE);
        } 
        entropy_cutoff = -1.0;
    }    
    
    // Validate error_prob
    if (prob->count) {
        err_prob = prob->dval[0];
        if ((err_prob < 0.0) || (err_prob > 1.0)) {
            fprintf(stderr, "--correct_prob (-p) must be 0.0 - 1.0 inclusive\n");
            exit(EXIT_FAILURE);
        }
    } else {
        err_prob = 0.5;
    }    

    // Validate min read len
    if (len->count) {
        min_len = len->ival[0];
        if (min_len <= 0) {
            fprintf(stderr, "min_read_len must be > 0\n");
            exit(EXIT_FAILURE);
        }        
    } else {
        min_len = 24;
    }

    // Validate min mate len
    if (mate_len->count) {
        min_mate_len = mate_len->ival[0];
        if (min_mate_len <= 0) {
            fprintf(stderr, "min_mate_len must be > 0\n");
            exit(EXIT_FAILURE);
        }        
        if (min_mate_len > min_len) {
            fprintf(stderr, "min_mate_len must be <= min_len\n");
            exit(EXIT_FAILURE);
        }        
    } else {
        min_mate_len = min_len;
    }
    
    if (fixed_len->count) {
        fix_len = min_mate_len = min_len = fixed_len->ival[0];
        if ((mate_len->count) || (len->count)) {
            fprintf(stderr, "fixed_len cannot be used with min_read_len or min_mate_len\n");
            exit(EXIT_FAILURE);
        }
        if (fix_len <= 0) {
            fprintf(stderr, "fixed_len must be > 0\n");
            exit(EXIT_FAILURE);
        }
    } else {
        fix_len = 0;
    }
    
    if (pre_read_id->count) {
        
        if (no_pre->count) {
            fprintf(stderr, "Error: Both --prefix and --no_prefix were specified.\n");
            exit(EXIT_FAILURE);
        }
        
        if (! strlen(pre_read_id->sval[0])) {
            fprintf(stderr, "Read ID prefix may not be zero length.\n");
            exit(EXIT_FAILURE);
        } 
        
        if (strchr(pre_read_id->sval[0], ':') || strchr(pre_read_id->sval[0], '|') || strchr(pre_read_id->sval[0], '+') || strchr(pre_read_id->sval[0], '/')) {
            fprintf(stderr, "Read ID prefix '%s' may not contain the characters ':', '|', '+' or '/'.\n", pre_read_id->sval[0]);
            exit(EXIT_FAILURE);
        }
        
        // Build default read ID prefix
        ss_strcat_utstring(out_read_prefix, pre_read_id->sval[0]);
        
    } else {

        if (!no_pre->count) {
            
            if (strchr(output->filename[0], ':') || strchr(output->filename[0], '|') || strchr(output->filename[0], '+') || strchr(output->filename[0], '/')) {
                fprintf(stderr, "Read ID prefix '%s' (from output prefix) may not contain the characters ':', '|', '+' or '/'.\n", output->filename[0]);
                fprintf(stderr, "Hint: Use the --prefix parameter if the output file prefix contains path information.\n");
                exit(EXIT_FAILURE);
            }  
            
            // Build default read ID prefix
            ss_strcat_utstring(out_read_prefix, output->filename[0]);
        }
    }
    
    if ((only_mates->count) && (!mates->count)) {
        fprintf(stderr, "--only_mates requires --mates.\n");
        exit(EXIT_FAILURE);
    }

    if ((no_rev->count) && (!mates->count)) {
        fprintf(stderr, "--no_rev requires --mates.\n");
        exit(EXIT_FAILURE);
    }
    
    // Check for null string prefixes
    if (!(strlen(input->filename[0]) && strlen(output->filename[0]))) {
        fprintf(stderr, "Error: NULL prefix strings are not permitted.\n");
        exit(EXIT_FAILURE);
    }
    
    // Construct input filenames
    utstring_printf(in_read1_fq_fn, "%s.read1.fastq", input->filename[0]);
    utstring_printf(in_read2_fq_fn, "%s.read2.fastq", input->filename[0]);
    
    utstring_printf(in_single1_fq_fn, "%s.single.fastq", input->filename[0]);
    
    FILE *in_read_file = NULL;
    
    num_input_singles_files = 1;
    
    // Try to open a singlet fastq file
    // Check singlet output options -s and -v
    // Set input singlet names to
    //   - *.single.fastq or
    //   - *.single1.fastq and *.single2.fastq
    if (!(in_read_file = ss_get_gzFile(utstring_body(in_single1_fq_fn), "r"))) {
        utstring_clear(in_single1_fq_fn);
        utstring_printf(in_single1_fq_fn, "%s.single1.fastq", input->filename[0]);
        utstring_printf(in_single2_fq_fn, "%s.single2.fastq", input->filename[0]);
        num_input_singles_files = 2;
        
        if ((in_read_file = ss_get_gzFile(utstring_body(in_single1_fq_fn), "r")) || (in_read_file = ss_get_gzFile(utstring_body(in_single2_fq_fn), "r"))) {
            singles_flag = 1;   // Two singlet outputs
        } else {
            singles_flag = num_singles->count;  // Number of singlet outputs set by -s parm
            if (inv_singles->count) {
                fprintf(stderr, "Error: Invalid option -v, No input singlet file(s) found. Use -s to select multiple output singlet files.\n");
                exit(EXIT_FAILURE);
            }
        }
    }
    
    if (in_read_file) {
        gzclose(in_read_file);
        if (num_singles->count) {
            fprintf(stderr, "Error: Invalid option -s, Input singlet file(s) found, use -v to change the number of output singlet files.\n");
            exit(EXIT_FAILURE);
        }
    }

    // singles->count inverts the current singles file input scheme
    singles_flag = (singles_flag ^ inv_singles->count);
    
    // Check if input fastq is colorspace
    // If some files are colorspace and some are basespace, throw an error
    int fcount = 0;
    int cscount = 0;
    fcount += ss_is_fastq(utstring_body(in_read1_fq_fn));
    fcount += ss_is_fastq(utstring_body(in_read2_fq_fn));
    fcount += ss_is_fastq(utstring_body(in_single1_fq_fn));
    fcount += ss_is_fastq(utstring_body(in_single2_fq_fn));
    cscount += (ss_is_fastq(utstring_body(in_read1_fq_fn)) && ss_is_colorspace_fastq(utstring_body(in_read1_fq_fn)));
    cscount += (ss_is_fastq(utstring_body(in_read2_fq_fn)) && ss_is_colorspace_fastq(utstring_body(in_read2_fq_fn)));
    cscount += (ss_is_fastq(utstring_body(in_single1_fq_fn)) && ss_is_colorspace_fastq(utstring_body(in_single1_fq_fn)));
    cscount += (ss_is_fastq(utstring_body(in_single2_fq_fn)) && ss_is_colorspace_fastq(utstring_body(in_single2_fq_fn)));
    
    if (cscount && (cscount != fcount)) {        
        printf("Error: Mixed colorspace and basespace FASTQ files detected\n");
        exit(EXIT_FAILURE);
    }
    colorspace_flag = cscount ? 1 : 0;
    
    // Output filenames
    
    if (fasta->count) {
        ss_strcat_utstring(out_filetype, "fasta");
        read_count_divisor = 2;
    } else {
        ss_strcat_utstring(out_filetype, "fastq");
        read_count_divisor = 4;
    }
    
    if (!only_mates->count) {
        utstring_printf(out_read1_fn, "%s.read1.%s", output->filename[0], utstring_body(out_filetype));
        utstring_printf(out_read2_fn, "%s.read2.%s", output->filename[0], utstring_body(out_filetype));
    }
    
    if (singles_flag == 1) {
        utstring_printf(out_single1_fn, "%s.single1.%s", output->filename[0], utstring_body(out_filetype));
        utstring_printf(out_single2_fn, "%s.single2.%s", output->filename[0], utstring_body(out_filetype));
    } else {
        utstring_printf(out_single1_fn, "%s.single.%s", output->filename[0], utstring_body(out_filetype));
    }

    if (mates->count) {
        utstring_printf(out_mates_fn, "%s.mates.%s", output->filename[0], utstring_body(out_filetype));
    }
    ////////////////////////////////////////////////////////////////////////////////////////////////
    // Begin processing!
    
#ifdef _OPENMP    
    omp_set_num_threads(10);
#endif    
    
    // This is the value of a non-valid pipe descriptor    
#define NO_PIPE 0
    
    int r1_pipe[2];
    int r2_pipe[2];
    int s1_pipe[2];
    int s2_pipe[2];
    pipe(r1_pipe);
    pipe(r2_pipe);
    pipe(s1_pipe);
    pipe(s2_pipe);
    
    int r1_out_pipe[2];
    int r2_out_pipe[2];
    int mates_out_pipe[2];
    int s1_out_pipe[2];
    int s2_out_pipe[2];
    pipe(r1_out_pipe);
    pipe(r2_out_pipe);
    pipe(mates_out_pipe);
    pipe(s1_out_pipe);
    pipe(s2_out_pipe);
    
    
#pragma omp parallel sections default(shared)       
    {
        
#pragma omp section 
        {   // Read1 reader
            fq_stream_trimmer(in_read1_fq_fn, r1_pipe[1], out_read_prefix, no_pre->count, pre_read_len->count, &comp_r1, &R1_org, '\0', fasta->count);
        }
        
#pragma omp section 
        {   // Read1 writer
            R1_cnt = ss_stream_writer(out_read1_fn, r1_out_pipe[0], gzip->count) / read_count_divisor;
        }
        
#pragma omp section 
        {   // Read2 reader
            fq_stream_trimmer(in_read2_fq_fn, r2_pipe[1], out_read_prefix, no_pre->count, pre_read_len->count, &comp_r2, &R2_org, '\0', fasta->count);
        }
        
#pragma omp section 
        {   // Read2 writer
            R2_cnt = ss_stream_writer(out_read2_fn, r2_out_pipe[0], gzip->count) / read_count_divisor;
        }
        
#pragma omp section 
        {   // Single1 reader
            
            // When there is only one input singles file, but two output singles files, then supply which mate to use for this stream in the split parameter
            if ((singles_flag) && (num_input_singles_files == 1)) {
                singlet1_cnt = fq_stream_trimmer(in_single1_fq_fn, s1_pipe[1], out_read_prefix, no_pre->count, pre_read_len->count, &comp_s1, &singlet1_org, '1', fasta->count);
            } else {
                singlet1_cnt = fq_stream_trimmer(in_single1_fq_fn, s1_pipe[1], out_read_prefix, no_pre->count, pre_read_len->count, &comp_s1, &singlet1_org, '\0', fasta->count);
            }
        }
        
#pragma omp section 
        {   // Single1 writer
            s1_cnt = ss_stream_writer(out_single1_fn, s1_out_pipe[0], gzip->count) / read_count_divisor;
        }
        
#pragma omp section 
        {   // Single2 reader
            
            // When there is only one input singles file, but two output singles files, then supply which mate to use for this stream in the split parameter
            if ((singles_flag) && (num_input_singles_files == 1)) {
                singlet2_cnt = fq_stream_trimmer(in_single1_fq_fn, s2_pipe[1], out_read_prefix, no_pre->count, pre_read_len->count, &comp_s2, &singlet2_org, '2', fasta->count);
            } else {
                singlet2_cnt = fq_stream_trimmer(in_single2_fq_fn, s2_pipe[1], out_read_prefix, no_pre->count, pre_read_len->count, &comp_s2, &singlet2_org, '\0', fasta->count);
            }
        }
        
#pragma omp section 
        {   // Single2 writer
            s2_cnt = ss_stream_writer(out_single2_fn, s2_out_pipe[0], gzip->count) / read_count_divisor;
        }

#pragma omp section 
        {   // Velvet mates writer
            // Divide count by 2 because both R1 and R2 reads go through this writer
            mp_cnt = ss_stream_writer(out_mates_fn, mates_out_pipe[0], gzip->count)  / 2 / read_count_divisor;
        }
        
#pragma omp section 
        {   // Dispatcher

            
            // Allocate data buffer strings
            
            UT_string *r1_data;
            utstring_new(r1_data);
            UT_string *r2_data;
            utstring_new(r2_data);
            UT_string *s1_data;
            utstring_new(s1_data);
            UT_string *s2_data;
            utstring_new(s2_data);
            
            UT_string *rev_tmp;
            utstring_new(rev_tmp);
            
            UT_string *rev_data;
            utstring_new(rev_data);
            
            // Pipes
            FILE *r1_in = fdopen(r1_pipe[0],"r"); 
            FILE *r2_in = fdopen(r2_pipe[0],"r");                 
            
            FILE *s1_in = fdopen(s1_pipe[0],"r");
            FILE *s2_in = fdopen(s2_pipe[0],"r");             

            FILE *mates_out = fdopen(mates_out_pipe[1],"w"); 
            
            FILE *r1_out = fdopen(r1_out_pipe[1],"w"); 
            FILE *r2_out = fdopen(r2_out_pipe[1],"w");                 
            
            FILE *s1_out = fdopen(s1_out_pipe[1],"w");
            FILE *s2_out = fdopen(s2_out_pipe[1],"w");             
            
            if (!singles_flag) {
                fclose(s2_out);
                s2_out = s1_out;
            }
            
            // Flags for data left in single files
            int single1_hungry = 1;
            int single2_hungry = 1;
            
            // Handle read1 and read2 files
            while (ss_get_utstring(r1_in, r1_data)) {
                if (!ss_get_utstring(r2_in, r2_data)) {
                    fprintf(stderr, "Error: Input read1 and read2 files are not synced\n");
                    exit(EXIT_FAILURE);
                }
                if (keep_read(r1_data)) {
                    if (keep_read(r2_data)) {
                        // Output both read1 and read2
                        if (mates->count) {
                            if (only_mates->count) {
                                // Interleaved velvet output only
                                output_read(r1_data, NULL, NULL, r1_in, NULL, mates_out, fasta->count);
                                if (no_rev->count || !colorspace_flag) {
                                    output_read(r2_data, NULL, NULL, r2_in, NULL, mates_out, fasta->count);
                                } else {
                                    output_read(r2_data, rev_data, rev_tmp, r2_in, NULL, mates_out, fasta->count);
                                }
                            } else {
                                // Interleaved velvet output and normal read file output
                                output_read(r1_data, NULL, NULL, r1_in, r1_out, mates_out, fasta->count);
                                if (no_rev->count || !colorspace_flag) {
                                    output_read(r2_data, NULL, NULL, r2_in, r2_out, mates_out, fasta->count);
                                } else {
                                    output_read(r2_data, rev_data, rev_tmp, r2_in, r2_out, mates_out, fasta->count);
                                }
                            }
                        } else {
                            // No interleaved velvet output
                            output_read(r1_data, NULL, NULL, r1_in, r1_out, NULL, fasta->count);
                            output_read(r2_data, NULL, NULL, r2_in, r2_out, NULL, fasta->count);
                        }
                    } else {
                        // Discard read2, output read1 as singlet
                        output_read(r1_data, NULL, NULL, r1_in, s1_out, NULL, fasta->count);
                        read1_singlet_cnt++;
                    }
                } else {
                    if (keep_read(r2_data)) {
                        // Discard read1, output read2 as singlet
                        output_read(r2_data, NULL, NULL, r2_in, s2_out, NULL, fasta->count);
                        read2_singlet_cnt++;
                    }
                }
                
                // Process reads from singles here to take advantage of
                // parallelism
                if (single1_hungry || single2_hungry) {
                    if (single1_hungry) {
                        if (ss_get_utstring(s1_in, s1_data)) {
                            if (keep_read(s1_data)) {
                                output_read(s1_data, NULL, NULL, s1_in, s1_out, NULL, fasta->count);
                            }
                        } else {
                            single1_hungry = 0;
                        }
                    }
                    if (single2_hungry) {
                        if (ss_get_utstring(s2_in, s2_data)) {
                            if (keep_read(s2_data)) {
                                output_read(s2_data, NULL, NULL, s2_in, s2_out, NULL, fasta->count);
                            }
                        } else {
                            single2_hungry = 0;
                        }
                    }
                }
            }
            
            while (single1_hungry || single2_hungry) {
                if (single1_hungry) {
                    if (ss_get_utstring(s1_in, s1_data)) {
                        if (keep_read(s1_data)) {
                            output_read(s1_data, NULL, NULL, s1_in, s1_out, NULL, fasta->count);
                        }
                    } else {
                        single1_hungry = 0;
                    }
                }
                if (single2_hungry) {
                    if (ss_get_utstring(s2_in, s2_data)) {
                        if (keep_read(s2_data)) {
                            output_read(s2_data, NULL, NULL, s2_in, s2_out, NULL, fasta->count);
                        }
                    } else {
                        single2_hungry = 0;
                    }
                }
            }
            
            fclose(r1_in);
            fclose(r2_in);
            
            fclose(s1_in);
            fclose(s2_in);
            
            fclose(mates_out);
            
            fclose(r1_out);
            fclose(r2_out);
            
            fclose(s1_out);
            
            if (singles_flag) {
                fclose(s2_out);
            }
            
            // Free buffers
            utstring_free(r1_data);
            utstring_free(r2_data);
            utstring_free(s1_data);
            utstring_free(s2_data);
            utstring_free(rev_tmp);
            utstring_free(rev_data);
        }
    }

    if (!(R1_org+singlet1_org+singlet2_org)) {
    
        fprintf(stderr, "ERROR! No reads found in input files, or input(s) not found.\n");
        exit(EXIT_FAILURE);
    
    }
    
    if (R1_org != R2_org) {
        fprintf(stderr, "\nWarning! read1 and read2 fastq files did not contain an equal number of reads. %lu %lu\n", R1_org, R2_org);
    }
    
    if ((R1_org + R2_org) && !(singlet1_cnt + singlet2_cnt)) {
        fprintf(stderr, "\nWarning! read1/read2 files were processed, but no corresponding input singlets were found.\n");
    } 
    
    if (entropy->count) {
        printf("\nLow complexity reads discarded: Read1: %lu, Read2: %lu, Singlets: %lu %lu\n", comp_r1, comp_r2, comp_s1, comp_s2);
    }

    mp_org = R1_org;
    if (!only_mates->count) {
        mp_cnt = R1_cnt;
    }
    
    printf("\nMatepairs: Before: %lu, After: %lu\n", mp_org, mp_cnt);
    printf("Singlets: Before: %lu %lu After: %lu %lu\n", singlet1_org, singlet2_org, s1_cnt, s2_cnt);
    printf("Read1 singlets: %lu, Read2 singlets: %lu, Original singlets: %lu %lu\n", read1_singlet_cnt, read2_singlet_cnt, singlet1_cnt, singlet2_cnt);
    printf("Total Reads Processed: %lu, Reads retained: %lu\n", 2*mp_org+singlet1_org+singlet2_org, 2*mp_cnt+s1_cnt+s2_cnt);
    
    utstring_free(in_read1_fq_fn);
    utstring_free(in_read2_fq_fn);
    utstring_free(in_single1_fq_fn);
    utstring_free(in_single2_fq_fn);
    utstring_free(out_read1_fn);
    utstring_free(out_read2_fn);
    utstring_free(out_single1_fn);
    utstring_free(out_single2_fn);
    utstring_free(out_mates_fn);
    utstring_free(out_filetype);
    
    utstring_free(out_read_prefix);
    
    exit(EXIT_SUCCESS);
}
예제 #3
0
파일: mv.c 프로젝트: SebastianSchlag/KaHIP
int main(int argc, char **argv)
    {
    const char *progname = "mv";
    struct arg_str  *backupc  = arg_str0(NULL, "backup", "[CONTROL]",    "make a backup of each existing destination file");
    struct arg_lit  *backup   = arg_lit0("b", NULL,                      "like --backup but does not accept an argument");
    struct arg_lit  *force    = arg_lit0("f", "force",                   "do not prompt before overwriting");
    struct arg_rem  *force1   = arg_rem (NULL,                           "  equivalent to --reply=yes");
    struct arg_lit  *interact = arg_lit0("i", "interactive",             "Prompt before overwriting");
    struct arg_rem  *interact1= arg_rem (NULL,                           "  equivalent to --reply=yes");
    struct arg_str  *reply    = arg_str0(NULL,"reply", "{yes,no,query}", "specify how to handle the prompt about an");
    struct arg_rem  *reply1   = arg_rem (NULL,                           "  existing destination file");
    struct arg_lit  *strpslsh = arg_lit0(NULL,"strip-trailing-slashes",  "remove any trailing slashes from each SOURCE argument");
    struct arg_str  *suffix   = arg_str0("S", "suffix", "SUFFIX",        "override the usual backup suffix");
    struct arg_str  *targetd  = arg_str0(NULL,"target-directory", "DIRECTORY",  "move all SOURCE arguments into DIRECTORY");
    struct arg_lit  *update   = arg_lit0("u", "update",                  "copy only when the SOURCE file is newer");
    struct arg_rem  *update1  = arg_rem (NULL,                           "  than the destination file or when the");
    struct arg_rem  *update2  = arg_rem (NULL,                           "  destination file is missing");
    struct arg_lit  *verbose  = arg_lit0("v", "verbose",                 "explain what is being done");
    struct arg_lit  *help     = arg_lit0(NULL,"help",                    "display this help and exit");
    struct arg_lit  *version  = arg_lit0(NULL,"version",                 "display version information and exit");
    struct arg_file *files    = arg_filen(NULL, NULL, "SOURCE", 1, argc+2, NULL);
    struct arg_rem  *dest     = arg_rem ("DEST|DIRECTORY", NULL);
    struct arg_end  *end      = arg_end(20);
    void* argtable[] = {backupc,backup,force,force1,interact,interact1,reply,reply1,strpslsh,suffix,targetd,update,update1,update2,verbose,help,version,files,dest,end};
    int exitcode=0;
    int nerrors;

    /* verify the argtable[] entries were allocated sucessfully */
    if (arg_nullcheck(argtable) != 0)
        {
        /* NULL entries were detected, some allocations must have failed */
        printf("%s: insufficient memory\n",progname);
        exitcode=1;
        goto exit;
        }

    /* Set default argument values prior to parsing */
    backupc->sval[0] = "existing";  /* --backup={none,off,numbered,t,existing,nil,simple,never} */
    suffix->sval[0]  = "~";         /* --suffix=~ */
    reply->sval[0]   = "query";     /* --reply={yes,no,query} */
    targetd->sval[0] = NULL;

    /* Parse the command line as defined by argtable[] */
    nerrors = arg_parse(argc,argv,argtable);

    /* special case: '--help' takes precedence over error reporting */
    if (help->count > 0)
        {
        printf("Usage: %s", progname);
        arg_print_syntax(stdout,argtable,"\n");
        printf("Rename SOURCE to DEST, or move SOURCE(s) to DIRECTORY.\n\n");
        arg_print_glossary(stdout,argtable,"  %-30s %s\n");
        printf("\nThe backup suffix is \"~\", unless set with --suffix or SIMPLE_BACKUP_SUFFIX.\n"
               "The version control method may be selected via the --backup option or through\n"
               "the VERSION_CONTROL environment variable.  Here are the values:\n\n"
               "  none, off       never make backups (even if --backup is given)\n"
               "  numbered, t     make numbered backups\n"
               "  existing, nil   numbered if numbered backups exist, simple otherwise\n"
               "  simple, never   always make simple backups\n\n"
               "Report bugs to <foo@bar>.\n");
        exitcode=0;
        goto exit;
        }

    /* special case: '--version' takes precedence error reporting */
    if (version->count > 0)
        {
        printf("'%s' example program for the \"argtable\" command line argument parser.\n",progname);
        printf("September 2003, Stewart Heitmann\n");
        exitcode=0;
        goto exit;
        }

    /* If the parser returned any errors then display them and exit */
    if (nerrors > 0)
        {
        /* Display the error details contained in the arg_end struct.*/
        arg_print_errors(stdout,end,progname);
        printf("Try '%s --help' for more information.\n",progname);
        exitcode=1;
        goto exit;
        }

    /* Command line parsing is complete, do the main processing */
    exitcode = mymain(backupc->sval[0],
                      backup->count,
                      force->count,
                      interact->count,
                      reply->sval[0],
                      strpslsh->count,
                      suffix->sval[0],
                      targetd->sval[0],
                      update->count,
                      verbose->count,
                      files->filename,
                      files->count);

exit:
    /* deallocate each non-null entry in argtable[] */
    arg_freetable(argtable,sizeof(argtable)/sizeof(argtable[0]));

    return exitcode;
    }
예제 #4
0
파일: ls.c 프로젝트: SebastianSchlag/KaHIP
int main(int argc, char **argv)
    {
    /* The argtable[] entries define the command line options */
    void *argtable[] = {
                a = arg_lit0("a", "all",                 "do not hide entries starting with ."),
                A = arg_lit0("A", "almost-all",          "do not list implied . and .."),
           author = arg_lit0(NULL,"author",              "print the author of each file"),
                b = arg_lit0("b", "escape",              "print octal escapes for nongraphic characters"),
        blocksize = arg_int0(NULL,"block-size","SIZE",   "use SIZE-byte blocks"),
                B = arg_lit0("B", "ignore-backups",      "do not list implied entries ending with ~"),
                c = arg_lit0("c", NULL,                  "with -lt: sort by, and show, ctime (time of last"),
                    arg_rem(NULL,                        "  modification of file status information)"),
                    arg_rem(NULL,                        "  with -l: show ctime and sort by name"),
                    arg_rem(NULL,                        "  otherwise: sort by ctime"),
                C = arg_lit0("C", NULL,                  "list entries by columns"),
            color = arg_str0(NULL,"color","WHEN",        "control whether color is used to distinguish file"),
                    arg_rem(NULL,                        "  types.  WHEN may be `never', `always', or `auto'"),
                d = arg_lit0("d", "directory",           "list directory entries instead of contents,"),
                    arg_rem(NULL,                        "  and do not dereference symbolic links"),
                D = arg_lit0("D", "dired",               "generate output designed for Emacs' dired mode"),
                f = arg_lit0("f", NULL,                  "do not sort, enable -aU, disable -lst"),
                F = arg_lit0("F", "classify",            "append indicator (one of */=@|) to entries"),
           format = arg_str0(NULL,"format","WORD",       "across -x, commas -m, horizontal -x, long -l,"),
                    arg_rem (NULL,                       "  single-column -1, verbose -l, vertical -C"),
         fulltime = arg_lit0(NULL,"full-time",           "like -l --time-style=full-iso"),
                g = arg_lit0("g", NULL,                  "like -l, but do not list owner"),
                G = arg_lit0("G", "no-group",            "inhibit display of group information"),
                h = arg_lit0("h", "human-readable",      "print sizes in human readable format (e.g., 1K 234M 2G)"),
               si = arg_lit0(NULL,"si",                  "likewise, but use powers of 1000 not 1024"),
                H = arg_lit0("H", "dereference-command-line","follow symbolic links listed on the command line"),
            deref = arg_lit0(NULL,"dereference-command-line-symlink-to-dir","follow each command line symbolic link"),
                    arg_rem(NULL,                       "  that points to a directory"),
            indic = arg_str0(NULL,"indicator-style","WORD","append indicator with style WORD to entry names:"),
                    arg_rem (NULL,                       "  none (default), classify (-F), file-type (-p)"),
                i = arg_lit0("i", "inode",               "print index number of each file"),
                I = arg_str0("I", "ignore","PATTERN",    "do not list implied entries matching shell PATTERN"),
                k = arg_lit0("k", NULL,                  "like --block-size=1K"),
                l = arg_lit0("l", NULL,                  "use a long listing format"),
                L = arg_lit0("L", "dereference",         "when showing file information for a symbolic"),
                    arg_rem (NULL,                       "  link, show information for the file the link"),
                    arg_rem (NULL,                       "  references rather than for the link itself"),
                m = arg_lit0("m", NULL,                  "fill width with a comma separated list of entries"),
                n = arg_lit0("n", "numeric-uid-gid",     "like -l, but list numeric UIDs and GIDs"),
                N = arg_lit0("N", "literal",             "print raw entry names (don't treat e.g. control"),
                    arg_rem (NULL,                       "  characters specially)"),
                o = arg_lit0("o", NULL,                  "like -l, but do not list group information"),
                p = arg_lit0("p", "file-type",           "append indicator (one of /=@|) to entries"),
                q = arg_lit0("q", "hide-control-chars",  "print ? instead of non graphic characters"),
           shcont = arg_lit0(NULL,"show-control-chars",  "show non graphic characters as-is (default"),
                    arg_rem (NULL,                       "unless program is `ls' and output is a terminal)"),
                Q = arg_lit0("Q", "quote-name",          "enclose entry names in double quotes"),
           Qstyle = arg_str0(NULL,"quoting-style","WORD","use quoting style WORD for entry names:"),
                    arg_rem (NULL,                       "  literal, locale, shell, shell-always, c, escape"),
                r = arg_lit0("r", "reverse",             "reverse order while sorting"),
                R = arg_lit0("R", "recursive",           "list subdirectories recursively"),
                s = arg_lit0("s", "size",                "print size of each file, in blocks"),
                S = arg_lit0("S", NULL,                  "sort by file size"),
             sort = arg_str0(NULL,"sort","WORD",         "extension -X, none -U, size -S, time -t, version -v,"),
                    arg_rem (NULL,                       "status -c, time -t, atime -u, access -u, use -u"),
             Time = arg_str0(NULL,"time","WORD",         "show time as WORD instead of modification time:"),
                    arg_rem (NULL,                       "  atime, access, use, ctime or status; use"),
                    arg_rem (NULL,                       "  specified time as sort key if --sort=time"),
          timesty = arg_str0(NULL, "time-style","STYLE", "show times using style STYLE:"),
                    arg_rem (NULL,                       "  full-iso, long-iso, iso, locale, +FORMAT"),
                    arg_rem (NULL,                       "FORMAT is interpreted like `date'; if FORMAT is"),
                    arg_rem (NULL,                       "FORMAT1<newline>FORMAT2, FORMAT1 applies to"),
                    arg_rem (NULL,                       "non-recent files and FORMAT2 to recent files;"),
                    arg_rem (NULL,                       "if STYLE is prefixed with `posix-', STYLE"),
                    arg_rem (NULL,                       "takes effect only outside the POSIX locale"),
                t = arg_lit0("t", NULL,                  "sort by modification time"),
                T = arg_int0("T", "tabsize", "COLS",     "assume tab stops at each COLS instead of 8"),
                u = arg_lit0("u", NULL,                  "with -lt: sort by, and show, access time"),
                    arg_rem (NULL,                       "  with -l: show access time and sort by name"),
                    arg_rem (NULL,                       "  otherwise: sort by access time"),
                U = arg_lit0("U", NULL,                  "do not sort; list entries in directory order"),
                v = arg_lit0("v", NULL,                  "sort by version"),
                w = arg_int0("w", "width", "COLS",       "assume screen width instead of current value"),
                x = arg_lit0("x", NULL,                  "list entries by lines instead of by columns"),
                X = arg_lit0("X", NULL,                  "sort alphabetically by entry extension"),
              one = arg_lit0("1", NULL,                  "list one file per line"),
             help = arg_lit0(NULL,"help",                "display this help and exit"),
          version = arg_lit0(NULL,"version",             "display version information and exit"),
            files = arg_filen(NULL, NULL, "FILE", 0, argc+2, NULL),
              end = arg_end(20),
        };
    const char *progname = "ls";
    int exitcode=0;
    int nerrors;

    /* verify the argtable[] entries were allocated sucessfully */
    if (arg_nullcheck(argtable) != 0)
        {
        /* NULL entries were detected, some allocations must have failed */
        printf("%s: insufficient memory\n",progname);
        exitcode=1;
        goto exit;
        }

    /* allow optional argument values for --color */
    /* and set the default value to "always" */
    color->hdr.flag |= ARG_HASOPTVALUE;
    color->sval[0] = "always";

    /* Parse the command line as defined by argtable[] */
    nerrors = arg_parse(argc,argv,argtable);

    /* special case: '--help' takes precedence over error reporting */
    if (help->count > 0)
        {
        printf("Usage: %s", progname);
        arg_print_syntax(stdout,argtable,"\n");
        printf("List information about the FILE(s) (the current directory by default).\n");
        printf("Sort entries alphabetically if none of -cftuSUX nor --sort.\n\n");
        arg_print_glossary(stdout,argtable,"  %-25s %s\n");
        printf("\nSIZE may be (or may be an integer optionally followed by) one of following:\n"
               "kB 1000, K 1024, MB 1,000,000, M 1,048,576, and so on for G, T, P, E, Z, Y.\n\n"
               "By default, color is not used to distinguish types of files.  That is\n"
               "equivalent to using --color=none.  Using the --color option without the\n"
               "optional WHEN argument is equivalent to using --color=always.  With\n"
               "--color=auto, color codes are output only if standard output is connected\n"
               "to a terminal (tty).\n\n"
               "Report bugs to <foo@bar>.\n");
        exitcode=0;
        goto exit;
        }

    /* special case: '--version' takes precedence error reporting */
    if (version->count > 0)
        {
        printf("'%s' example program for the \"argtable\" command line argument parser.\n",progname);
        printf("September 2003, Stewart Heitmann\n");
        exitcode=0;
        goto exit;
        }

    /* If the parser returned any errors then display them and exit */
    if (nerrors > 0)
        {
        /* Display the error details contained in the arg_end struct.*/
        arg_print_errors(stdout,end,progname);
        printf("Try '%s --help' for more information.\n",progname);
        exitcode=1;
        goto exit;
        }

    /* Command line parsing is complete, do the main processing */
    exitcode = mymain();

exit:
    /* deallocate each non-null entry in argtable[] */
    arg_freetable(argtable,sizeof(argtable)/sizeof(argtable[0]));

    return exitcode;
    }
예제 #5
0
파일: myargs.c 프로젝트: amolai/hello-world
int ParseMyArgs(int argc, char *argv[])
{
	int exitcode=0;
    int nerrors;

	argtable[0] =i= arg_lit0("i", "insert",           "insert a tag into the text file");
	argtable[1]=s= arg_lit0("s", "separate",          "separate a text file into smaller parts");
	               arg_rem("NULL",                    "new files with number suffix");
    argtable[2] =author= arg_lit0(NULL,"author",              "print the author of each file");
	argtable[3]=keyword= arg_str0(NULL, "keyword", "WORD",          "keyword to seperate the parts in ");
						 arg_rem(NULL,                            " a text book");
    argtable[4] =size= arg_int0(NULL,"size", "SIZE",              "size of each of parts");
	argtable[5] =help= arg_lit0(NULL,"help",                "display this help and exit");
	argtable[6] =version= arg_lit0(NULL,"version",             "display version information and exit");
	argtable[7] = files = arg_filen(NULL, NULL, "FILE", 0, argc+2, NULL),
	argtable[8] = end= arg_end(20);

	
 /* verify the argtable[] entries were allocated sucessfully */
    if (arg_nullcheck(argtable) != 0)
        {
        /* NULL entries were detected, some allocations must have failed */
        printf("%s: insufficient memory\n",argv[0]);
        exitcode=1;
        goto exit;
        }

    
    /* Parse the command line as defined by argtable[] */
    nerrors = arg_parse(argc,argv,argtable);
	if(author->count >0)
	{
		printf("This software is developed by a teacher and some students\n");
		printf("Detail can be found in README file");
	}

    /* special case: '--help' takes precedence over error reporting */
    if (help->count > 0)
        {
        printf("Usage: %s", argv[0]);
        arg_print_syntax(stdout,argtable,"\n");
		 arg_print_glossary(stdout,argtable,"  %-25s %s\n");
        exitcode=1;
        goto exit;
        } 

    /* special case: '--version' takes precedence error reporting */
    if (version->count > 0){
        printf("Dec 2010, \n");
        exitcode=1;
        goto exit;
        } 

    /* If the parser returned any errors then display them and exit */
    if (nerrors > 0)
        {
        /* Display the error details contained in the arg_end struct.*/
        arg_print_errors(stdout,end,argv[0]);
        printf("Try '%s --help' for more information.\n",argv[0]);
        exitcode=1;
        goto exit;
        }

    /* Command line parsing is complete, do the main processing */

exit:   
    return exitcode;
}
예제 #6
0
파일: mymain.c 프로젝트: rforge/phyexe
/**
 * @brief Parse command line parameters. Will exit if help/usage etc
 * are called or or call Log(&rLog, LOG_FATAL, ) if an error was detected.
 *
 * @param[out] user_opts
 * User parameter struct, with defaults already set.
 * @param[in] argc
 * mains argc
 * @param[in] argv
 * mains argv
 * 
 */    
void
ParseCommandLine(cmdline_opts_t *user_opts, int argc, char **argv)
{

     /* argtable command line parsing:
     * see
     * http://argtable.sourceforge.net/doc/argtable2-intro.html
     *
     * basic structure is: arg_xxxN:
     * xxx can be int, lit, db1, str, rex, file or date
     * If N = 0, arguments may appear zero-or-once; N = 1 means
     * exactly once, N = n means up to maxcount times
     *
     *
     * @note: changes here, might also affect main.cpp:ConvertOldCmdLine()
     *
     */  
   
    struct arg_rem  *rem_seq_input  = arg_rem(NULL, "\nSequence Input:");
    struct arg_file *opt_seqin = arg_file0("i", "in,infile",
                                            "{<file>,-}",
                                            "Multiple sequence input file (- for stdin)");
    struct arg_file *opt_hmm_in = arg_filen(NULL, "hmm-in", "<file>",
                                            /*min*/ 0, /*max*/ 128,
                                            "HMM input files");
    struct arg_lit *opt_dealign = arg_lit0(NULL, "dealign",
                                           "Dealign input sequences");
    struct arg_file *opt_profile1 = arg_file0(NULL, "profile1,p1",
                                              "<file>",
                                              "Pre-aligned multiple sequence file (aligned columns will be kept fix)");
    struct arg_file *opt_profile2 = arg_file0(NULL, "profile2,p2",
                                              "<file>",
                                              "Pre-aligned multiple sequence file (aligned columns will be kept fix)");
    struct arg_str *opt_seqtype = arg_str0("t", "seqtype",
                                           "{Protein, RNA, DNA}",
                                           "Force a sequence type (default: auto)");
/*    struct arg_lit *opt_force_protein = arg_lit0(NULL, "protein",
                                         "Set sequence type to protein even if Clustal guessed nucleic acid"); */
    struct arg_str *opt_infmt = arg_str0(NULL, "infmt",
                                            "{a2m=fa[sta],clu[stal],msf,phy[lip],selex,st[ockholm],vie[nna]}",
                                            "Forced sequence input file format (default: auto)");

    
    struct arg_rem  *rem_guidetree  = arg_rem(NULL, "\nClustering:");
    struct arg_str *opt_pairdist = arg_str0("p", "pairdist",
                                             "{ktuple}",
                                             "Pairwise alignment distance measure");
    struct arg_file *opt_distmat_in = arg_file0(NULL, "distmat-in",
                                                "<file>",
                                                "Pairwise distance matrix input file (skips distance computation)");
    struct arg_file *opt_distmat_out = arg_file0(NULL, "distmat-out",
                                                 "<file>",
                                                 "Pairwise distance matrix output file");
    struct arg_file *opt_guidetree_in = arg_file0(NULL, "guidetree-in",
                                                  "<file>",
                                                  "Guide tree input file (skips distance computation and guide-tree clustering step)");
    struct arg_file *opt_guidetree_out = arg_file0(NULL, "guidetree-out",
                                                   "<file>",
                                                   "Guide tree output file");
    /* AW: mbed is default since at least R253
       struct arg_lit *opt_mbed = arg_lit0(NULL, "mbed",
       "Fast, Mbed-like clustering for guide-tree calculation");
       struct arg_lit *opt_mbed_iter = arg_lit0(NULL, "mbed-iter",
       "Use Mbed-like clustering also during iteration");
    */
    /* Note: might be better to use arg_str (mbed=YES/NO) but I don't want to introduce an '=' into pipeline, FS, r250 -> */
    struct arg_lit *opt_full = arg_lit0(NULL, "full",
                                        "Use full distance matrix for guide-tree calculation (might be slow; mBed is default)");
    struct arg_lit *opt_full_iter = arg_lit0(NULL, "full-iter",
                                        "Use full distance matrix for guide-tree calculation during iteration (might be slowish; mBed is default)");

    struct arg_str *opt_clustering = arg_str0("c", "clustering",
                                              "{UPGMA}",
                                              "Clustering method for guide tree");

    
    struct arg_rem *rem_aln_output  = arg_rem(NULL, "\nAlignment Output:");
    struct arg_file *opt_outfile = arg_file0("o", "out,outfile",
                                             "{file,-}",
                                             "Multiple sequence alignment output file (default: stdout)");
    struct arg_str *opt_outfmt = arg_str0(NULL, "outfmt",
                                            "{a2m=fa[sta],clu[stal],msf,phy[lip],selex,st[ockholm],vie[nna]}",
                                            "MSA output file format (default: fasta)");

    
    struct arg_rem *rem_iteration  = arg_rem(NULL, "\nIteration:");
    struct arg_str *opt_num_iterations = arg_str0(NULL, "iterations,iter",
                                                  /* FIXME "{<n>,auto}", "Number of combined guide-tree/HMM iterations"); */
                                                  "<n>", "Number of (combined guide-tree/HMM) iterations");
    struct arg_int *opt_max_guidetree_iterations = arg_int0(NULL, "max-guidetree-iterations",
                                                            "<n>", "Maximum number guidetree iterations");
    struct arg_int *opt_max_hmm_iterations = arg_int0(NULL, "max-hmm-iterations",
                                                      "<n>", "Maximum number of HMM iterations");

   
    struct arg_rem *rem_limits  = arg_rem(NULL, "\nLimits (will exit early, if exceeded):");
    struct arg_int *opt_max_seq = arg_int0(NULL, "maxnumseq", "<n>",
                                           "Maximum allowed number of sequences");
    struct arg_int *opt_max_seqlen = arg_int0(NULL, "maxseqlen", "<l>", 
                                              "Maximum allowed sequence length");


    struct arg_rem *rem_misc  = arg_rem(NULL, "\nMiscellaneous:");

    struct arg_lit *opt_autooptions = arg_lit0(NULL, "auto",
                                         "Set options automatically (might overwrite some of your options)");
    struct arg_int *opt_threads = arg_int0(NULL, "threads", "<n>", 
                                              "Number of processors to use");
    struct arg_file *opt_logfile = arg_file0("l", "log",
                                             "<file>",
                                             "Log all non-essential output to this file");
    struct arg_lit *opt_help = arg_lit0("h", "help",
                                         "Print this help and exit");
    struct arg_lit *opt_version = arg_lit0(NULL, "version",
                                           "Print version information and exit");
    struct arg_lit *opt_long_version = arg_lit0(NULL, "long-version",
                                           "Print long version information and exit");
    struct arg_lit *opt_verbose = arg_litn("v", "verbose",
                                           0, 3,
                                           "Verbose output (increases if given multiple times)");
    struct arg_lit *opt_force = arg_lit0(NULL, "force",
                                         "Force file overwriting");
    struct arg_int *opt_macram = arg_int0(NULL, "MAC-RAM", "<n>", /* keep this quiet for the moment, FS r240 -> */
                                          NULL/*"maximum amount of RAM to use for MAC algorithm (in MB)"*/);


    struct arg_end *opt_end = arg_end(10); /* maximum number of errors
                                            * to store */

    void *argtable[] = {rem_seq_input,
                        opt_seqin,
                        opt_hmm_in,
                        opt_dealign,
                        opt_profile1,
                        opt_profile2,
                        opt_seqtype,
                        /* opt_force_protein, */
                        opt_infmt,
                        rem_guidetree,
#if 0
                        /* no other options then default available or not implemented */
                        opt_pairdist,
#endif
                        opt_distmat_in,
                        opt_distmat_out,
                        opt_guidetree_in,
                        opt_guidetree_out,
                        opt_full, /* FS, r250 -> */
                        opt_full_iter, /* FS, r250 -> */
#if 0
                        /* no other options then default available */
                        opt_clustering,
#endif
                        rem_aln_output,
                        opt_outfile,
                        opt_outfmt,

                        rem_iteration,
                        opt_num_iterations,
                        opt_max_guidetree_iterations,
                        opt_max_hmm_iterations,

                        rem_limits,
                        opt_max_seq,
                        opt_max_seqlen,

                        rem_misc,
                        opt_autooptions,
                        opt_threads,
                        opt_logfile,
                        opt_help,
                        opt_verbose,
                        opt_version,
                        opt_long_version,
                        opt_force,
                        opt_macram, /* FS, r240 -> r241 */

                        opt_end};
    int nerrors;


    /* Verify the argtable[] entries were allocated sucessfully
     */
    if (arg_nullcheck(argtable)) {
        Log(&rLog, LOG_FATAL, "insufficient memory (for argtable allocation)");
    }

    /* Parse the command line as defined by argtable[]
     */
    nerrors = arg_parse(argc, argv, argtable);

    /* Special case: '--help' takes precedence over error reporting
     */
    if (opt_help->count > 0) {
        printf("%s - %s (%s)\n", PACKAGE_NAME, PACKAGE_VERSION, PACKAGE_CODENAME);

        printf("\n");
        printf("If you like Clustal-Omega please cite:\n%s\n", CITATION);
        printf("If you don't like Clustal-Omega, please let us know why (and cite us anyway).\n");
        /* printf("You can contact reach us under %s\n", PACKAGE_BUGREPORT); */
        printf("\n");
        printf("Check http://www.clustal.org for more information and updates.\n");
            
        printf("\n");
        printf("Usage: %s", basename(argv[0]));
        arg_print_syntax(stdout,argtable, "\n");

        printf("\n");
        printf("A typical invocation would be: %s -i my-in-seqs.fa -o my-out-seqs.fa -v\n",
               basename(argv[0]));
        printf("See below for a list of all options.\n");

        arg_print_glossary(stdout, argtable, "  %-25s %s\n");
        arg_freetable(argtable, sizeof(argtable)/sizeof(argtable[0]));
        exit(EXIT_SUCCESS);
    }

    /* Special case: '--version' takes precedence over error reporting
     */
    if (opt_version->count > 0) {
        printf("%s\n", PACKAGE_VERSION);
        arg_freetable(argtable,sizeof(argtable)/sizeof(argtable[0]));
        exit(EXIT_SUCCESS);
    }

    /* Special case: '--long-version' takes precedence over error reporting
     */
    if (opt_long_version->count > 0) {
        char zcLongVersion[1024];
        PrintLongVersion(zcLongVersion, sizeof(zcLongVersion));
        printf("%s\n", zcLongVersion);
        arg_freetable(argtable,sizeof(argtable)/sizeof(argtable[0]));
        exit(EXIT_SUCCESS);
    }

    /* If the parser returned any errors then display them and exit
     */
    if (nerrors > 0) {
        /* Display the error details contained in the arg_end struct.*/
        arg_print_errors(stdout, opt_end, PACKAGE);
        fprintf(stderr, "For more information try: %s --help\n", argv[0]);
        arg_freetable(argtable,sizeof(argtable)/sizeof(argtable[0]));
        exit(EXIT_FAILURE);
    }

    
    /* ------------------------------------------------------------
     *
     * Command line successfully parsed. Now transfer values to
     * user_opts. While doing so, make sure that given input files
     * exist and given output files are writable do not exist, or if
     * they do, should be overwritten.
     *
     * No logic checks here! They are done in a different function
     *
     * ------------------------------------------------------------*/
        
    
    /* not part of user_opts because it declared in src/util.h */
    if (0 == opt_verbose->count) {
        rLog.iLogLevelEnabled = LOG_WARN;
    } else if (1 == opt_verbose->count) {
        rLog.iLogLevelEnabled = LOG_INFO;
    } else if (2 == opt_verbose->count) {
        rLog.iLogLevelEnabled = LOG_VERBOSE;
    } else if (3 == opt_verbose->count) {
        rLog.iLogLevelEnabled = LOG_DEBUG;
    }

    user_opts->aln_opts.bAutoOptions = opt_autooptions->count;

    user_opts->bDealignInputSeqs = opt_dealign->count;

    /* NOTE: full distance matrix used to be default - there was
       --mbed flag but no --full flag after r250 decided that mBed
       should be default - now need --full flag to turn off mBed.
       wanted to retain mBed Boolean, so simply added --full flag. if
       both flags set (erroneously) want --mbed to overwrite --full,
       therefore do --full 1st, the --mbed, FS, r250 */
    if (opt_full->count){
        user_opts->aln_opts.bUseMbed = FALSE;
    }

    if (opt_full_iter->count){
        user_opts->aln_opts.bUseMbedForIteration = FALSE;
    }

    user_opts->bForceFileOverwrite = opt_force->count;



    /* log-file
     */
    if (opt_logfile->count > 0) {
        user_opts->pcLogFile = CkStrdup(opt_logfile->filename[0]);
        
        /* warn if already exists or not writable */
        if (FileExists(user_opts->pcLogFile) && ! user_opts->bForceFileOverwrite) {
            Log(&rLog, LOG_FATAL, "%s '%s'. %s",
                  "Cowardly refusing to overwrite already existing file",
                  user_opts->pcLogFile,
                  "Use --force to force overwriting.");
        }
        if (! FileIsWritable(user_opts->pcLogFile)) {
            Log(&rLog, LOG_FATAL, "Sorry, I do not have permission to write to file '%s'.",
                  user_opts->pcLogFile);
        }
    }


    /* normal sequence input (no profile)
     */
    if (opt_seqin->count > 0) {
        user_opts->pcSeqInfile = CkStrdup(opt_seqin->filename[0]);
    }

    /* Input limitations
     */
    /* maximum number of sequences */
    if (opt_max_seq->count > 0) {
        user_opts->iMaxNumSeq = opt_max_seq->ival[0];
    }
    
    /* maximum sequence length */
    if (opt_max_seqlen->count > 0) {
        user_opts->iMaxSeqLen = opt_max_seqlen->ival[0];
    }
    
    /* Output format
     */
    if (opt_infmt->count > 0) {
        /* avoid gcc warning about discarded qualifier */
        char *tmp = (char *)opt_infmt->sval[0];
        user_opts->iSeqInFormat = String2SeqfileFormat(tmp);
    } else {
        user_opts->iSeqInFormat = SQFILE_UNKNOWN;
    }


    /* Sequence type
     */
    if (opt_seqtype->count > 0) {
        if (STR_NC_EQ(opt_seqtype->sval[0], "protein")) {
            user_opts->iSeqType = SEQTYPE_PROTEIN;
        } else if (STR_NC_EQ(opt_seqtype->sval[0], "rna")) {
            user_opts->iSeqType = SEQTYPE_RNA;
        } else if  (STR_NC_EQ(opt_seqtype->sval[0], "dna")) {
            user_opts->iSeqType = SEQTYPE_DNA;
        } else {
            Log(&rLog, LOG_FATAL, "Unknown sequence type '%s'", opt_seqtype->sval[0]);
        }
    }
/*    if (opt_force_protein->count > 0) {
        user_opts->iSeqType = SEQTYPE_PROTEIN;
    } */

    /* Profile input
     */
    if (opt_profile1->count > 0) {
        user_opts->pcProfile1Infile = CkStrdup(opt_profile1->filename[0]);
        if (! FileExists(user_opts->pcProfile1Infile)) {
            Log(&rLog, LOG_FATAL, "File '%s' does not exist.", user_opts->pcProfile1Infile);
        }
    }
    
    if (opt_profile2->count > 0) {
        user_opts->pcProfile2Infile = CkStrdup(opt_profile2->filename[0]);
        if (! FileExists(user_opts->pcProfile2Infile)) {
            Log(&rLog, LOG_FATAL, "File '%s' does not exist.", user_opts->pcProfile2Infile);
        }
    }
    
    
    /* HMM input
     */
    user_opts->aln_opts.iHMMInputFiles = 0;
    user_opts->aln_opts.ppcHMMInput = NULL;
    if (opt_hmm_in->count>0) {
        int iAux;
        user_opts->aln_opts.iHMMInputFiles = opt_hmm_in->count;
        user_opts->aln_opts.ppcHMMInput = (char **) CKMALLOC(
            user_opts->aln_opts.iHMMInputFiles * sizeof(char*));
        for (iAux=0; iAux<opt_hmm_in->count; iAux++) {
            user_opts->aln_opts.ppcHMMInput[iAux] = CkStrdup(opt_hmm_in->filename[iAux]);
            if (! FileExists(user_opts->aln_opts.ppcHMMInput[iAux])) {
                Log(&rLog, LOG_FATAL, "File '%s' does not exist.", user_opts->aln_opts.ppcHMMInput[iAux]);
            }
        }
    }


    /* Pair distance method
     */
    if (opt_pairdist->count > 0) {
        if (STR_NC_EQ(opt_pairdist->sval[0], "ktuple")) {
            user_opts->aln_opts.iPairDistType = PAIRDIST_KTUPLE;
        } else {
            Log(&rLog, LOG_FATAL, "Unknown pairdist method '%s'", opt_pairdist->sval[0]);
        }
    }


    /* Distance matrix input
     */
    if (opt_distmat_in->count > 0) {
        user_opts->aln_opts.pcDistmatInfile = CkStrdup(opt_distmat_in->filename[0]);
        if (! FileExists(user_opts->aln_opts.pcDistmatInfile)) {
            Log(&rLog, LOG_FATAL, "File '%s' does not exist.", user_opts->aln_opts.pcDistmatInfile);
        }
    }


    /* Distance matrix output
     */
    if (opt_distmat_out->count > 0) {
        user_opts->aln_opts.pcDistmatOutfile = CkStrdup(opt_distmat_out->filename[0]);
        
        /* warn if already exists or not writable */
        if (FileExists(user_opts->aln_opts.pcDistmatOutfile) && ! user_opts->bForceFileOverwrite) {
            Log(&rLog, LOG_FATAL, "%s '%s'. %s",
                  "Cowardly refusing to overwrite already existing file",
                  user_opts->aln_opts.pcDistmatOutfile,
                  "Use --force to force overwriting.");
        }
        if (! FileIsWritable(user_opts->aln_opts.pcDistmatOutfile)) {
            Log(&rLog, LOG_FATAL, "Sorry, I do not have permission to write to file '%s'.",
                user_opts->aln_opts.pcDistmatOutfile);
        }
    }

    /* Clustering
     *
     */
    if (opt_clustering->count > 0) {
        if (STR_NC_EQ(opt_clustering->sval[0], "upgma")) {
            user_opts->aln_opts.iClusteringType = CLUSTERING_UPGMA;
        } else {
            Log(&rLog, LOG_FATAL, "Unknown guide-tree clustering method '%s'", opt_clustering->sval[0]);
        }
    }

    
    /* Guidetree input
     */
    if (opt_guidetree_in->count > 0) {
        user_opts->aln_opts.pcGuidetreeInfile = CkStrdup(opt_guidetree_in->filename[0]);
        if (! FileExists(user_opts->aln_opts.pcGuidetreeInfile)) {
            Log(&rLog, LOG_FATAL, "File '%s' does not exist.", user_opts->aln_opts.pcGuidetreeInfile);
        }
    }
    
    
    /* Guidetree output
     */
    if (opt_guidetree_out->count > 0) {
        user_opts->aln_opts.pcGuidetreeOutfile = CkStrdup(opt_guidetree_out->filename[0]);
        
        /* warn if already exists or not writable */
        if (FileExists(user_opts->aln_opts.pcGuidetreeOutfile) && ! user_opts->bForceFileOverwrite) {
            Log(&rLog, LOG_FATAL, "%s '%s'. %s",
                  "Cowardly refusing to overwrite already existing file",
                  user_opts->aln_opts.pcGuidetreeOutfile,
                  "Use --force to force overwriting.");
        }
        if (! FileIsWritable(user_opts->aln_opts.pcGuidetreeOutfile)) {
            Log(&rLog, LOG_FATAL, "Sorry, I do not have permission to write to file '%s'.",
                  user_opts->aln_opts.pcGuidetreeOutfile);
        }
    }
    

    /* max guidetree iterations
     */
    if (opt_max_guidetree_iterations->count > 0) {
        user_opts->aln_opts.iMaxGuidetreeIterations = opt_max_guidetree_iterations->ival[0];
    }


    /* max guidetree iterations
     */
    if (opt_max_hmm_iterations->count > 0) {
        user_opts->aln_opts.iMaxHMMIterations = opt_max_hmm_iterations->ival[0];
    }

     /* number of iterations
      */
     if (opt_num_iterations->count > 0) {
        if (STR_NC_EQ(opt_num_iterations->sval[0], "auto")) {
            Log(&rLog, LOG_FATAL, "Automatic iteration not supported at the moment.");
            user_opts->aln_opts.bIterationsAuto = TRUE;

        } else {
            int iAux;
            user_opts->aln_opts.bIterationsAuto = FALSE;
            for (iAux=0; iAux<(int)strlen(opt_num_iterations->sval[0]); iAux++) {
                if (! isdigit(opt_num_iterations->sval[0][iAux])) {
                    Log(&rLog, LOG_FATAL, "Couldn't iteration parameter: %s",
                          opt_num_iterations->sval[0]);
                }
            }
            user_opts->aln_opts.iNumIterations = atoi(opt_num_iterations->sval[0]);
        }
    }

    
    /* Alignment output
     */
    if (opt_outfile->count > 0) {
        user_opts->pcAlnOutfile = CkStrdup(opt_outfile->filename[0]);

        /* warn if already exists or not writable */
        if (FileExists(user_opts->pcAlnOutfile) && ! user_opts->bForceFileOverwrite) {
            Log(&rLog, LOG_FATAL, "%s '%s'. %s",
                  "Cowardly refusing to overwrite already existing file",
                  user_opts->pcAlnOutfile,
                  "Use --force to force overwriting.");
        }
        if (! FileIsWritable(user_opts->pcAlnOutfile)) {
            Log(&rLog, LOG_FATAL, "Sorry, I do not have permission to write to file '%s'.",
                  user_opts->pcAlnOutfile);
        }
    }
    

    /* Output format
     */
    if (opt_outfmt->count > 0) {
        /* avoid gcc warning about discarded qualifier */
        char *tmp = (char *)opt_outfmt->sval[0];
        user_opts->iAlnOutFormat = String2SeqfileFormat(tmp);
        if (SQFILE_UNKNOWN == user_opts->iAlnOutFormat) {
            Log(&rLog, LOG_FATAL, "Unknown output format '%s'", opt_outfmt->sval[0]);
        }
    }

    /* Number of threads
     */
#ifdef HAVE_OPENMP
    if (opt_threads->count > 0) {
        if (opt_threads->ival[0] <= 0) {
            Log(&rLog, LOG_FATAL, "Changing number of threads to %d doesn't make sense.", 
                  opt_threads->ival[0]);    
        }
        user_opts->iThreads = opt_threads->ival[0];
    }

#else
    if (opt_threads->count > 0) {
        if (opt_threads->ival[0] > 1) {
            Log(&rLog, LOG_FATAL, "Cannot change number of threads to %d. %s was build without OpenMP support.", 
                  opt_threads->ival[0], PACKAGE_NAME);    
        }
    }
#endif


    /* max MAC RAM (maximum amount of RAM set aside for MAC algorithm)
     */
    if (opt_macram->count > 0) { /* FS, r240 -> r241 */
        user_opts->aln_opts.rHhalignPara.iMacRamMB = opt_macram->ival[0];
    }

    arg_freetable(argtable,sizeof(argtable)/sizeof(argtable[0]));

    UserOptsLogicCheck(user_opts);

    return; 
}