main (int argc, char ** argv ) { char *seqfile; /* name of sequence file */ SQINFO sqinfo; /* extra info about sequence */ SQFILE *dbfp; /* open sequence file */ int fmt,ofmt=106; /* format of seqfile */ /* 106 is PHYLIP format in SQUID */ char *seq; /* sequence */ int type; /* kAmino, kDNA, kRNA, or kOtherSeq */ sequence * seqs, * cds_seqs; sequence tmp_seqs[2], tmp_cds_seqs[2]; char *optname; char *optarg, *t; int optind; int be_quiet; int seqct = 0,cdsct = 0; int min_aln_len = 0; int do_oneline = 0; char * output_filename = 0, *submat_file = 0; int showaln = 1; int showheader=1; FILE *ofd, *fd; alignment *cds_aln; alignment * opt_alignment = NULL; /* place for pairwise alignment */ int len,i,j, k, jk,ik,aln_count, rc; pairwise_distances pwMLdist, pwNGdist; int firsttime = 1; struct timeval tp; pwMLdist.N = make_double_matrix(NUM_PW_SEQS,NUM_PW_SEQS); pwMLdist.dN = make_double_matrix(NUM_PW_SEQS,NUM_PW_SEQS); pwMLdist.S = make_double_matrix(NUM_PW_SEQS,NUM_PW_SEQS); pwMLdist.dS = make_double_matrix(NUM_PW_SEQS,NUM_PW_SEQS); pwMLdist.dNdS = make_double_matrix(NUM_PW_SEQS,NUM_PW_SEQS); pwMLdist.SEdS = make_double_matrix(NUM_PW_SEQS,NUM_PW_SEQS); pwMLdist.SEdN = make_double_matrix(NUM_PW_SEQS,NUM_PW_SEQS); pwMLdist.t = make_double_matrix(NUM_PW_SEQS,NUM_PW_SEQS); pwMLdist.kappa= make_double_matrix(NUM_PW_SEQS,NUM_PW_SEQS); pwNGdist.dN = make_double_matrix(NUM_PW_SEQS,NUM_PW_SEQS); pwNGdist.dS = make_double_matrix(NUM_PW_SEQS,NUM_PW_SEQS); pwNGdist.dNdS = make_double_matrix(NUM_PW_SEQS,NUM_PW_SEQS); /* pwMLdist.N = pwMLdist.dN = pwMLdist.S = 0; pwMLdist.dS = pwMLdist.dNdS = pwMLdist.SEdS = 0; pwMLdist.SEdN = pwMLdist.t = pwMLdist.kappa= 0; pwNGdist.dN = pwNGdist.dS = pwNGdist.dNdS = 0; */ Alntype = default_aln_type; /* Command line Parse */ fmt = SQFILE_UNKNOWN; /* default: autodetect format */ be_quiet = FALSE; type = kOtherSeq; /* for our purposes this is only pairwise alignments, but * would rather do it correctly in case we move to MSA case */ while (Getopt(argc, argv, OPTIONS, NOPTIONS, usage, &optind, &optname, &optarg)) { if (strcmp(optname, "--matrix") == 0) submat_file = optarg; else if (strcmp(optname, "--quiet") == 0) be_quiet = TRUE; else if (strcmp(optname, "--gapopen") == 0) { Gapopen = atoi(optarg); if( Gapopen < 0 ) Gapopen *= -1; } else if (strcmp(optname, "--gapext") == 0) { Gapext = atoi(optarg); if( Gapext < 0 ) Gapext *= -1; } else if (strcmp(optname, "--informat") == 0) { fmt = String2SeqfileFormat(optarg); if (fmt == SQFILE_UNKNOWN) Die("unrecognized sequence file format \"%s\"", optarg); } else if (strcmp(optname, "--outformat") == 0) { ofmt = String2SeqfileFormat(optarg); if (ofmt == SQFILE_UNKNOWN) Die("unrecognized sequence file format \"%s\"", optarg); } else if( strcmp(optname, "--global") == 0 ) { Alntype = global; } else if (strcmp(optname, "-h") == 0) { puts(usage); puts(experts); exit(EXIT_SUCCESS); } else if ( strcmp(optname, "-v") == 0 ) { Verbose = 1; } else if ( strcmp(optname, "--gapchar") == 0 ) { GapChar = optarg[0]; } else if( strcmp(optname, "--output") == 0 ) { output_filename = optarg; } else if( strcmp(optname, "--showtable" ) == 0 ) { showaln = 0; } else if( strcmp(optname, "--noheader" ) == 0 ) { showheader = 0; } } if (argc - optind < 1) Die("%s\n", usage); if( ! submat_file ) { if( (t = getenv("SUBOPTDIR")) != 0 || (t = getenv("SUBOPT_DIR")) != 0 ) { submat_file = calloc(strlen(t) + 24, sizeof(char)); sprintf(submat_file, "%s/%s",t,Default_submat); } else { submat_file = calloc(strlen((void *)Default_submat) + 24, sizeof(char)); sprintf(submat_file, "../%s",Default_submat); } } /* open matrix */ fd = fopen(submat_file, "r"); if( ! ParsePAMFile(fd,&ScoringMatrix, &MatrixScale) ) { fprintf(stderr, "Cannot parse or open matrix file %s\n",submat_file); free(submat_file); exit(EXIT_SUCCESS); } if( output_filename && strlen(output_filename) != 1 && output_filename[0] != '-') { ofd = fopen(output_filename,"w"); if( ! ofd ) { fprintf(stderr, "could not open file %s",output_filename); goto end; } } else ofd = stdout; while( optind < argc ) { seqfile = argv[optind++]; /* Try to work around inability to autodetect from a pipe or .gz: * assume FASTA format */ if (fmt == SQFILE_UNKNOWN && (Strparse("^.*\\.gz$", seqfile, 0) || strcmp(seqfile, "-") == 0)) fmt = SQFILE_FASTA; if ((dbfp = SeqfileOpen(seqfile, fmt, NULL)) == NULL) Die("Failed to open sequence file %s for reading", seqfile); while (ReadSeq(dbfp, dbfp->format, &seq, &sqinfo)) { FreeSequence(NULL, &sqinfo); seqct++; } cds_seqs = (sequence *)calloc(seqct, sizeof(sequence)); seqs = (sequence *)calloc(seqct, sizeof(sequence)); SeqfileRewind(dbfp); seqct=0; while (ReadSeq(dbfp, dbfp->format, &seq, &sqinfo)) { sqinfo.type = Seqtype(seq); if( sqinfo.type == kDNA || sqinfo.type == kRNA ) { seqs[seqct].seqstr = Translate(seq,stdcode1); /* Let's remove the last codon if it is a stop codon */ len = strlen(seqs[seqct].seqstr); if( Verbose ) fprintf(stderr,"seqct is %d length is %d\n",seqct, len); if( seqs[seqct].seqstr[len-1] == '*' ) { seqs[seqct].seqstr[len-1] = '\0'; seq[strlen(seq) - 3] = '\0'; } cds_seqs[cdsct].seqstr = seq; seqs[seqct].seqname = calloc(strlen(sqinfo.name)+1,sizeof(char)); cds_seqs[cdsct].seqname = calloc(strlen(sqinfo.name)+1,sizeof(char)); strcpy(seqs[seqct].seqname,sqinfo.name ); strcpy(cds_seqs[cdsct].seqname,sqinfo.name); cds_seqs[cdsct].length = sqinfo.len; cds_seqs[cdsct].alphabet = ( sqinfo.type == kDNA ) ? dna : rna; seqs[seqct].length = strlen(seqs[seqct].seqstr); seqs[seqct].alphabet = protein; cdsct++; seqct++; } else { fprintf(stderr,"Expect CDS sequences (DNA or RNA) not Protein\n"); goto end; } FreeSequence(NULL, &sqinfo); if( Verbose && seqct > 3 ) break; } if( seqct < 2 ) { fprintf(stderr,"Must have provided a valid file with at least 2 sequences in it"); goto end; } for( i=0; i < seqct; i++ ) { for(k=i+1; k < seqct; k++ ) { if( (opt_alignment = (alignment *)calloc(1,sizeof(alignment *))) == NULL) { fprintf(stderr,"Could not allocate memory\n"); goto end; } opt_alignment->msa = NULL; rc = optimal_align(&seqs[i],&seqs[k],opt_alignment); if( rc != 1 ) { fprintf(stderr,"Could not make an optimal alignment\n"); goto end; } else { tmp_cds_seqs[0] = cds_seqs[i]; tmp_cds_seqs[1] = cds_seqs[k]; rc = mrtrans(opt_alignment, tmp_cds_seqs, &cds_aln,0); if( rc != 0 ) { fprintf(stderr, "Could not map the coding sequence to the protein alignemnt for aln %d: %d\n",i,rc); goto end; } if( showaln ) { if( ofmt >= 100 ) { MSAFileWrite(ofd,cds_aln->msa, ofmt,do_oneline); } else { for(j=0; j < cds_aln->msa->nseq; j++ ) { WriteSeq(ofd, ofmt, cds_aln->msa->aseq[j], &(cds_aln->sqinfo[j]) ); } } } else { if( showheader && firsttime ) { fprintf(ofd,"SEQ1\tSEQ2\tSCORE\tdN\tdS\tOMEGA\tN\tS\tkappa\tt\tLENGTH\n"); firsttime = 0; } if( do_kaks_yn00(cds_aln->msa, &pwMLdist,&pwNGdist) < 0 ) { fprintf(stderr, "warning: problem with align for %s %s\n", cds_aln->msa->sqname[0], cds_aln->msa->sqname[1]); continue; } for(ik = 0; ik < NUM_PW_SEQS; ik++ ) { for( jk = ik+1; jk < NUM_PW_SEQS; jk++ ) { fprintf(ofd,"%s\t%s\t%d\t%f\t%f\t%f\t%f\t%f\t%f\t%f\t%d\n", cds_aln->sqinfo[ik].name, cds_aln->sqinfo[jk].name, opt_alignment->score, pwMLdist.dN[ik][jk],pwMLdist.dS[ik][jk], pwMLdist.dNdS[ik][jk], pwMLdist.N[ik][jk], pwMLdist.S[ik][jk], pwMLdist.kappa[ik][jk], pwMLdist.t[ik][jk], opt_alignment->msa->alen); } } } } cleanup_alignment(cds_aln); cleanup_alignment(opt_alignment); } } } if( ofd && ofd != stdout ) fclose(ofd); end: free(submat_file); Free2DArray((void **)ScoringMatrix,27); for(i =0; i< seqct; i++ ) { free(seqs[i].seqstr); free(seqs[i].seqname); seqs[i].seqstr = seqs[i].seqname = 0; } for(i = 0; i < cdsct; i++) { free(cds_seqs[i].seqstr); free(cds_seqs[i].seqname); cds_seqs[i].seqstr = cds_seqs[i].seqname = 0; } cleanup_matrix((void **)pwMLdist.N,NUM_PW_SEQS); cleanup_matrix((void **)pwMLdist.dN,NUM_PW_SEQS); cleanup_matrix((void **)pwMLdist.S,NUM_PW_SEQS); cleanup_matrix((void **)pwMLdist.dS,NUM_PW_SEQS); cleanup_matrix((void **)pwMLdist.SEdS,NUM_PW_SEQS); cleanup_matrix((void **)pwMLdist.SEdN,NUM_PW_SEQS); cleanup_matrix((void **)pwMLdist.t,NUM_PW_SEQS); cleanup_matrix((void **)pwMLdist.dNdS,NUM_PW_SEQS); cleanup_matrix((void **)pwMLdist.kappa,NUM_PW_SEQS); cleanup_matrix((void **)pwNGdist.dN,NUM_PW_SEQS); cleanup_matrix((void **)pwNGdist.dS,NUM_PW_SEQS); cleanup_matrix((void **)pwNGdist.dNdS,NUM_PW_SEQS); free(pwNGdist.dNdS); free(pwNGdist.dN); free(pwNGdist.dS); free(pwMLdist.dNdS); free(pwMLdist.dN); free(pwMLdist.dS); free(pwMLdist.N); free(pwMLdist.S); free(pwMLdist.SEdS); free(pwMLdist.SEdN); free(pwMLdist.t); free(pwMLdist.kappa); return 0; }
/** * @brief Parse command line parameters. Will exit if help/usage etc * are called or or call Log(&rLog, LOG_FATAL, ) if an error was detected. * * @param[out] user_opts * User parameter struct, with defaults already set. * @param[in] argc * mains argc * @param[in] argv * mains argv * */ void ParseCommandLine(cmdline_opts_t *user_opts, int argc, char **argv) { /* argtable command line parsing: * see * http://argtable.sourceforge.net/doc/argtable2-intro.html * * basic structure is: arg_xxxN: * xxx can be int, lit, db1, str, rex, file or date * If N = 0, arguments may appear zero-or-once; N = 1 means * exactly once, N = n means up to maxcount times * * * @note: changes here, might also affect main.cpp:ConvertOldCmdLine() * */ struct arg_rem *rem_seq_input = arg_rem(NULL, "\nSequence Input:"); struct arg_file *opt_seqin = arg_file0("i", "in,infile", "{<file>,-}", "Multiple sequence input file (- for stdin)"); struct arg_file *opt_hmm_in = arg_filen(NULL, "hmm-in", "<file>", /*min*/ 0, /*max*/ 128, "HMM input files"); struct arg_lit *opt_dealign = arg_lit0(NULL, "dealign", "Dealign input sequences"); struct arg_file *opt_profile1 = arg_file0(NULL, "profile1,p1", "<file>", "Pre-aligned multiple sequence file (aligned columns will be kept fix)"); struct arg_file *opt_profile2 = arg_file0(NULL, "profile2,p2", "<file>", "Pre-aligned multiple sequence file (aligned columns will be kept fix)"); struct arg_str *opt_seqtype = arg_str0("t", "seqtype", "{Protein, RNA, DNA}", "Force a sequence type (default: auto)"); /* struct arg_lit *opt_force_protein = arg_lit0(NULL, "protein", "Set sequence type to protein even if Clustal guessed nucleic acid"); */ struct arg_str *opt_infmt = arg_str0(NULL, "infmt", "{a2m=fa[sta],clu[stal],msf,phy[lip],selex,st[ockholm],vie[nna]}", "Forced sequence input file format (default: auto)"); struct arg_rem *rem_guidetree = arg_rem(NULL, "\nClustering:"); struct arg_str *opt_pairdist = arg_str0("p", "pairdist", "{ktuple}", "Pairwise alignment distance measure"); struct arg_file *opt_distmat_in = arg_file0(NULL, "distmat-in", "<file>", "Pairwise distance matrix input file (skips distance computation)"); struct arg_file *opt_distmat_out = arg_file0(NULL, "distmat-out", "<file>", "Pairwise distance matrix output file"); struct arg_file *opt_guidetree_in = arg_file0(NULL, "guidetree-in", "<file>", "Guide tree input file (skips distance computation and guide-tree clustering step)"); struct arg_file *opt_guidetree_out = arg_file0(NULL, "guidetree-out", "<file>", "Guide tree output file"); /* AW: mbed is default since at least R253 struct arg_lit *opt_mbed = arg_lit0(NULL, "mbed", "Fast, Mbed-like clustering for guide-tree calculation"); struct arg_lit *opt_mbed_iter = arg_lit0(NULL, "mbed-iter", "Use Mbed-like clustering also during iteration"); */ /* Note: might be better to use arg_str (mbed=YES/NO) but I don't want to introduce an '=' into pipeline, FS, r250 -> */ struct arg_lit *opt_full = arg_lit0(NULL, "full", "Use full distance matrix for guide-tree calculation (might be slow; mBed is default)"); struct arg_lit *opt_full_iter = arg_lit0(NULL, "full-iter", "Use full distance matrix for guide-tree calculation during iteration (might be slowish; mBed is default)"); struct arg_str *opt_clustering = arg_str0("c", "clustering", "{UPGMA}", "Clustering method for guide tree"); struct arg_rem *rem_aln_output = arg_rem(NULL, "\nAlignment Output:"); struct arg_file *opt_outfile = arg_file0("o", "out,outfile", "{file,-}", "Multiple sequence alignment output file (default: stdout)"); struct arg_str *opt_outfmt = arg_str0(NULL, "outfmt", "{a2m=fa[sta],clu[stal],msf,phy[lip],selex,st[ockholm],vie[nna]}", "MSA output file format (default: fasta)"); struct arg_rem *rem_iteration = arg_rem(NULL, "\nIteration:"); struct arg_str *opt_num_iterations = arg_str0(NULL, "iterations,iter", /* FIXME "{<n>,auto}", "Number of combined guide-tree/HMM iterations"); */ "<n>", "Number of (combined guide-tree/HMM) iterations"); struct arg_int *opt_max_guidetree_iterations = arg_int0(NULL, "max-guidetree-iterations", "<n>", "Maximum number guidetree iterations"); struct arg_int *opt_max_hmm_iterations = arg_int0(NULL, "max-hmm-iterations", "<n>", "Maximum number of HMM iterations"); struct arg_rem *rem_limits = arg_rem(NULL, "\nLimits (will exit early, if exceeded):"); struct arg_int *opt_max_seq = arg_int0(NULL, "maxnumseq", "<n>", "Maximum allowed number of sequences"); struct arg_int *opt_max_seqlen = arg_int0(NULL, "maxseqlen", "<l>", "Maximum allowed sequence length"); struct arg_rem *rem_misc = arg_rem(NULL, "\nMiscellaneous:"); struct arg_lit *opt_autooptions = arg_lit0(NULL, "auto", "Set options automatically (might overwrite some of your options)"); struct arg_int *opt_threads = arg_int0(NULL, "threads", "<n>", "Number of processors to use"); struct arg_file *opt_logfile = arg_file0("l", "log", "<file>", "Log all non-essential output to this file"); struct arg_lit *opt_help = arg_lit0("h", "help", "Print this help and exit"); struct arg_lit *opt_version = arg_lit0(NULL, "version", "Print version information and exit"); struct arg_lit *opt_long_version = arg_lit0(NULL, "long-version", "Print long version information and exit"); struct arg_lit *opt_verbose = arg_litn("v", "verbose", 0, 3, "Verbose output (increases if given multiple times)"); struct arg_lit *opt_force = arg_lit0(NULL, "force", "Force file overwriting"); struct arg_int *opt_macram = arg_int0(NULL, "MAC-RAM", "<n>", /* keep this quiet for the moment, FS r240 -> */ NULL/*"maximum amount of RAM to use for MAC algorithm (in MB)"*/); struct arg_end *opt_end = arg_end(10); /* maximum number of errors * to store */ void *argtable[] = {rem_seq_input, opt_seqin, opt_hmm_in, opt_dealign, opt_profile1, opt_profile2, opt_seqtype, /* opt_force_protein, */ opt_infmt, rem_guidetree, #if 0 /* no other options then default available or not implemented */ opt_pairdist, #endif opt_distmat_in, opt_distmat_out, opt_guidetree_in, opt_guidetree_out, opt_full, /* FS, r250 -> */ opt_full_iter, /* FS, r250 -> */ #if 0 /* no other options then default available */ opt_clustering, #endif rem_aln_output, opt_outfile, opt_outfmt, rem_iteration, opt_num_iterations, opt_max_guidetree_iterations, opt_max_hmm_iterations, rem_limits, opt_max_seq, opt_max_seqlen, rem_misc, opt_autooptions, opt_threads, opt_logfile, opt_help, opt_verbose, opt_version, opt_long_version, opt_force, opt_macram, /* FS, r240 -> r241 */ opt_end}; int nerrors; /* Verify the argtable[] entries were allocated sucessfully */ if (arg_nullcheck(argtable)) { Log(&rLog, LOG_FATAL, "insufficient memory (for argtable allocation)"); } /* Parse the command line as defined by argtable[] */ nerrors = arg_parse(argc, argv, argtable); /* Special case: '--help' takes precedence over error reporting */ if (opt_help->count > 0) { printf("%s - %s (%s)\n", PACKAGE_NAME, PACKAGE_VERSION, PACKAGE_CODENAME); printf("\n"); printf("If you like Clustal-Omega please cite:\n%s\n", CITATION); printf("If you don't like Clustal-Omega, please let us know why (and cite us anyway).\n"); /* printf("You can contact reach us under %s\n", PACKAGE_BUGREPORT); */ printf("\n"); printf("Check http://www.clustal.org for more information and updates.\n"); printf("\n"); printf("Usage: %s", basename(argv[0])); arg_print_syntax(stdout,argtable, "\n"); printf("\n"); printf("A typical invocation would be: %s -i my-in-seqs.fa -o my-out-seqs.fa -v\n", basename(argv[0])); printf("See below for a list of all options.\n"); arg_print_glossary(stdout, argtable, " %-25s %s\n"); arg_freetable(argtable, sizeof(argtable)/sizeof(argtable[0])); exit(EXIT_SUCCESS); } /* Special case: '--version' takes precedence over error reporting */ if (opt_version->count > 0) { printf("%s\n", PACKAGE_VERSION); arg_freetable(argtable,sizeof(argtable)/sizeof(argtable[0])); exit(EXIT_SUCCESS); } /* Special case: '--long-version' takes precedence over error reporting */ if (opt_long_version->count > 0) { char zcLongVersion[1024]; PrintLongVersion(zcLongVersion, sizeof(zcLongVersion)); printf("%s\n", zcLongVersion); arg_freetable(argtable,sizeof(argtable)/sizeof(argtable[0])); exit(EXIT_SUCCESS); } /* If the parser returned any errors then display them and exit */ if (nerrors > 0) { /* Display the error details contained in the arg_end struct.*/ arg_print_errors(stdout, opt_end, PACKAGE); fprintf(stderr, "For more information try: %s --help\n", argv[0]); arg_freetable(argtable,sizeof(argtable)/sizeof(argtable[0])); exit(EXIT_FAILURE); } /* ------------------------------------------------------------ * * Command line successfully parsed. Now transfer values to * user_opts. While doing so, make sure that given input files * exist and given output files are writable do not exist, or if * they do, should be overwritten. * * No logic checks here! They are done in a different function * * ------------------------------------------------------------*/ /* not part of user_opts because it declared in src/util.h */ if (0 == opt_verbose->count) { rLog.iLogLevelEnabled = LOG_WARN; } else if (1 == opt_verbose->count) { rLog.iLogLevelEnabled = LOG_INFO; } else if (2 == opt_verbose->count) { rLog.iLogLevelEnabled = LOG_VERBOSE; } else if (3 == opt_verbose->count) { rLog.iLogLevelEnabled = LOG_DEBUG; } user_opts->aln_opts.bAutoOptions = opt_autooptions->count; user_opts->bDealignInputSeqs = opt_dealign->count; /* NOTE: full distance matrix used to be default - there was --mbed flag but no --full flag after r250 decided that mBed should be default - now need --full flag to turn off mBed. wanted to retain mBed Boolean, so simply added --full flag. if both flags set (erroneously) want --mbed to overwrite --full, therefore do --full 1st, the --mbed, FS, r250 */ if (opt_full->count){ user_opts->aln_opts.bUseMbed = FALSE; } if (opt_full_iter->count){ user_opts->aln_opts.bUseMbedForIteration = FALSE; } user_opts->bForceFileOverwrite = opt_force->count; /* log-file */ if (opt_logfile->count > 0) { user_opts->pcLogFile = CkStrdup(opt_logfile->filename[0]); /* warn if already exists or not writable */ if (FileExists(user_opts->pcLogFile) && ! user_opts->bForceFileOverwrite) { Log(&rLog, LOG_FATAL, "%s '%s'. %s", "Cowardly refusing to overwrite already existing file", user_opts->pcLogFile, "Use --force to force overwriting."); } if (! FileIsWritable(user_opts->pcLogFile)) { Log(&rLog, LOG_FATAL, "Sorry, I do not have permission to write to file '%s'.", user_opts->pcLogFile); } } /* normal sequence input (no profile) */ if (opt_seqin->count > 0) { user_opts->pcSeqInfile = CkStrdup(opt_seqin->filename[0]); } /* Input limitations */ /* maximum number of sequences */ if (opt_max_seq->count > 0) { user_opts->iMaxNumSeq = opt_max_seq->ival[0]; } /* maximum sequence length */ if (opt_max_seqlen->count > 0) { user_opts->iMaxSeqLen = opt_max_seqlen->ival[0]; } /* Output format */ if (opt_infmt->count > 0) { /* avoid gcc warning about discarded qualifier */ char *tmp = (char *)opt_infmt->sval[0]; user_opts->iSeqInFormat = String2SeqfileFormat(tmp); } else { user_opts->iSeqInFormat = SQFILE_UNKNOWN; } /* Sequence type */ if (opt_seqtype->count > 0) { if (STR_NC_EQ(opt_seqtype->sval[0], "protein")) { user_opts->iSeqType = SEQTYPE_PROTEIN; } else if (STR_NC_EQ(opt_seqtype->sval[0], "rna")) { user_opts->iSeqType = SEQTYPE_RNA; } else if (STR_NC_EQ(opt_seqtype->sval[0], "dna")) { user_opts->iSeqType = SEQTYPE_DNA; } else { Log(&rLog, LOG_FATAL, "Unknown sequence type '%s'", opt_seqtype->sval[0]); } } /* if (opt_force_protein->count > 0) { user_opts->iSeqType = SEQTYPE_PROTEIN; } */ /* Profile input */ if (opt_profile1->count > 0) { user_opts->pcProfile1Infile = CkStrdup(opt_profile1->filename[0]); if (! FileExists(user_opts->pcProfile1Infile)) { Log(&rLog, LOG_FATAL, "File '%s' does not exist.", user_opts->pcProfile1Infile); } } if (opt_profile2->count > 0) { user_opts->pcProfile2Infile = CkStrdup(opt_profile2->filename[0]); if (! FileExists(user_opts->pcProfile2Infile)) { Log(&rLog, LOG_FATAL, "File '%s' does not exist.", user_opts->pcProfile2Infile); } } /* HMM input */ user_opts->aln_opts.iHMMInputFiles = 0; user_opts->aln_opts.ppcHMMInput = NULL; if (opt_hmm_in->count>0) { int iAux; user_opts->aln_opts.iHMMInputFiles = opt_hmm_in->count; user_opts->aln_opts.ppcHMMInput = (char **) CKMALLOC( user_opts->aln_opts.iHMMInputFiles * sizeof(char*)); for (iAux=0; iAux<opt_hmm_in->count; iAux++) { user_opts->aln_opts.ppcHMMInput[iAux] = CkStrdup(opt_hmm_in->filename[iAux]); if (! FileExists(user_opts->aln_opts.ppcHMMInput[iAux])) { Log(&rLog, LOG_FATAL, "File '%s' does not exist.", user_opts->aln_opts.ppcHMMInput[iAux]); } } } /* Pair distance method */ if (opt_pairdist->count > 0) { if (STR_NC_EQ(opt_pairdist->sval[0], "ktuple")) { user_opts->aln_opts.iPairDistType = PAIRDIST_KTUPLE; } else { Log(&rLog, LOG_FATAL, "Unknown pairdist method '%s'", opt_pairdist->sval[0]); } } /* Distance matrix input */ if (opt_distmat_in->count > 0) { user_opts->aln_opts.pcDistmatInfile = CkStrdup(opt_distmat_in->filename[0]); if (! FileExists(user_opts->aln_opts.pcDistmatInfile)) { Log(&rLog, LOG_FATAL, "File '%s' does not exist.", user_opts->aln_opts.pcDistmatInfile); } } /* Distance matrix output */ if (opt_distmat_out->count > 0) { user_opts->aln_opts.pcDistmatOutfile = CkStrdup(opt_distmat_out->filename[0]); /* warn if already exists or not writable */ if (FileExists(user_opts->aln_opts.pcDistmatOutfile) && ! user_opts->bForceFileOverwrite) { Log(&rLog, LOG_FATAL, "%s '%s'. %s", "Cowardly refusing to overwrite already existing file", user_opts->aln_opts.pcDistmatOutfile, "Use --force to force overwriting."); } if (! FileIsWritable(user_opts->aln_opts.pcDistmatOutfile)) { Log(&rLog, LOG_FATAL, "Sorry, I do not have permission to write to file '%s'.", user_opts->aln_opts.pcDistmatOutfile); } } /* Clustering * */ if (opt_clustering->count > 0) { if (STR_NC_EQ(opt_clustering->sval[0], "upgma")) { user_opts->aln_opts.iClusteringType = CLUSTERING_UPGMA; } else { Log(&rLog, LOG_FATAL, "Unknown guide-tree clustering method '%s'", opt_clustering->sval[0]); } } /* Guidetree input */ if (opt_guidetree_in->count > 0) { user_opts->aln_opts.pcGuidetreeInfile = CkStrdup(opt_guidetree_in->filename[0]); if (! FileExists(user_opts->aln_opts.pcGuidetreeInfile)) { Log(&rLog, LOG_FATAL, "File '%s' does not exist.", user_opts->aln_opts.pcGuidetreeInfile); } } /* Guidetree output */ if (opt_guidetree_out->count > 0) { user_opts->aln_opts.pcGuidetreeOutfile = CkStrdup(opt_guidetree_out->filename[0]); /* warn if already exists or not writable */ if (FileExists(user_opts->aln_opts.pcGuidetreeOutfile) && ! user_opts->bForceFileOverwrite) { Log(&rLog, LOG_FATAL, "%s '%s'. %s", "Cowardly refusing to overwrite already existing file", user_opts->aln_opts.pcGuidetreeOutfile, "Use --force to force overwriting."); } if (! FileIsWritable(user_opts->aln_opts.pcGuidetreeOutfile)) { Log(&rLog, LOG_FATAL, "Sorry, I do not have permission to write to file '%s'.", user_opts->aln_opts.pcGuidetreeOutfile); } } /* max guidetree iterations */ if (opt_max_guidetree_iterations->count > 0) { user_opts->aln_opts.iMaxGuidetreeIterations = opt_max_guidetree_iterations->ival[0]; } /* max guidetree iterations */ if (opt_max_hmm_iterations->count > 0) { user_opts->aln_opts.iMaxHMMIterations = opt_max_hmm_iterations->ival[0]; } /* number of iterations */ if (opt_num_iterations->count > 0) { if (STR_NC_EQ(opt_num_iterations->sval[0], "auto")) { Log(&rLog, LOG_FATAL, "Automatic iteration not supported at the moment."); user_opts->aln_opts.bIterationsAuto = TRUE; } else { int iAux; user_opts->aln_opts.bIterationsAuto = FALSE; for (iAux=0; iAux<(int)strlen(opt_num_iterations->sval[0]); iAux++) { if (! isdigit(opt_num_iterations->sval[0][iAux])) { Log(&rLog, LOG_FATAL, "Couldn't iteration parameter: %s", opt_num_iterations->sval[0]); } } user_opts->aln_opts.iNumIterations = atoi(opt_num_iterations->sval[0]); } } /* Alignment output */ if (opt_outfile->count > 0) { user_opts->pcAlnOutfile = CkStrdup(opt_outfile->filename[0]); /* warn if already exists or not writable */ if (FileExists(user_opts->pcAlnOutfile) && ! user_opts->bForceFileOverwrite) { Log(&rLog, LOG_FATAL, "%s '%s'. %s", "Cowardly refusing to overwrite already existing file", user_opts->pcAlnOutfile, "Use --force to force overwriting."); } if (! FileIsWritable(user_opts->pcAlnOutfile)) { Log(&rLog, LOG_FATAL, "Sorry, I do not have permission to write to file '%s'.", user_opts->pcAlnOutfile); } } /* Output format */ if (opt_outfmt->count > 0) { /* avoid gcc warning about discarded qualifier */ char *tmp = (char *)opt_outfmt->sval[0]; user_opts->iAlnOutFormat = String2SeqfileFormat(tmp); if (SQFILE_UNKNOWN == user_opts->iAlnOutFormat) { Log(&rLog, LOG_FATAL, "Unknown output format '%s'", opt_outfmt->sval[0]); } } /* Number of threads */ #ifdef HAVE_OPENMP if (opt_threads->count > 0) { if (opt_threads->ival[0] <= 0) { Log(&rLog, LOG_FATAL, "Changing number of threads to %d doesn't make sense.", opt_threads->ival[0]); } user_opts->iThreads = opt_threads->ival[0]; } #else if (opt_threads->count > 0) { if (opt_threads->ival[0] > 1) { Log(&rLog, LOG_FATAL, "Cannot change number of threads to %d. %s was build without OpenMP support.", opt_threads->ival[0], PACKAGE_NAME); } } #endif /* max MAC RAM (maximum amount of RAM set aside for MAC algorithm) */ if (opt_macram->count > 0) { /* FS, r240 -> r241 */ user_opts->aln_opts.rHhalignPara.iMacRamMB = opt_macram->ival[0]; } arg_freetable(argtable,sizeof(argtable)/sizeof(argtable[0])); UserOptsLogicCheck(user_opts); return; }