int main(int argc, const char **argv) { Parameters par(argc, argv); strcpy(par.infile, ""); strcpy(par.outfile, ""); // maximum number of sequences to be written par.nseqdis = par.maxseq - 1; // no filtering for maximum diversity par.Ndiff = 0; ProcessArguments(par); // Check command line input and default values if (!*par.infile) { help(par); HH_LOG(ERROR) << "Input file is missing!" << std::endl; exit(4); } if (!*par.outfile) { help(par); HH_LOG(ERROR) << "Output file is missing!" << std::endl; exit(4); } HH_LOG(INFO) << "Input file = " << par.infile << "\n"; HH_LOG(INFO) << "Output file = " << par.outfile << "\n"; // Reads in an alignment from par.infile into matrix X[k][l] as ASCII FILE* inf = NULL; if (strcmp(par.infile, "stdin")) { inf = fopen(par.infile, "r"); if (!inf) { OpenFileError(par.infile, __FILE__, __LINE__, __func__); } } else { inf = stdin; } Alignment qali(par.maxseq, par.maxres); qali.Read(inf, par.infile, par.mark, par.maxcol, par.nseqdis); fclose(inf); // Convert ASCII to int (0-20),throw out all insert states, record their number in I[k][i] // and store marked sequences in name[k] and seq[k] qali.Compress(par.infile, par.cons, par.maxcol, par.M, par.Mgaps); // substitution matrix flavours float __attribute__((aligned(16))) P[20][20]; float __attribute__((aligned(16))) R[20][20]; float __attribute__((aligned(16))) Sim[20][20]; float __attribute__((aligned(16))) S[20][20]; float __attribute__((aligned(16))) pb[21]; SetSubstitutionMatrix(par.matrix, pb, P, R, S, Sim); // Remove sequences with seq. identity larger than seqid percent (remove the shorter of two) qali.N_filtered = qali.Filter(par.max_seqid, S, par.coverage, par.qid, par.qsc,par.Ndiff); // Atune alignment diversity q.Neff with qsc to value Neff_goal if (par.Neff >= 1.0) { qali.FilterNeff(par.wg, par.mark, par.cons, par.showcons, par.max_seqid, par.coverage, par.Neff, pb, S, Sim); } // Write filtered alignment WITH insert states (lower case) to alignment file qali.WriteToFile(par.outfile, par.append); }
///////////////////////////////////////////////////////////////////////////////////// //// MAIN PROGRAM ///////////////////////////////////////////////////////////////////////////////////// int main(int argc, char **argv) { char* argv_conf[MAXOPT]; // Input arguments from .hhdefaults file (first=1: argv_conf[0] is not used) int argc_conf; // Number of arguments in argv_conf strcpy(par.infile, ""); strcpy(par.outfile, ""); strcpy(par.alnfile, ""); //Default parameter settings par.nseqdis = MAXSEQ - 1; // maximum number of sequences to be written par.showcons = 0; par.cons = 1; par.Ndiff = 0; par.max_seqid = 100; par.coverage = 0; par.pc_hhm_context_engine.pca = 0.0; // no amino acid pseudocounts par.pc_hhm_nocontext_a = 0.0; // no amino acid pseudocounts par.gapb = 0.0; // no transition pseudocounts // Make command line input globally available par.argv = argv; par.argc = argc; RemovePathAndExtension(program_name, argv[0]); // Enable changing verbose mode before defaults file and command line are processed int v = 2; for (int i = 1; i < argc; i++) { if (!strcmp(argv[i], "-def")) par.readdefaultsfile = 1; else if (strcmp(argv[i], "-v") == 0) { v = atoi(argv[i + 1]); } } par.v = Log::from_int(v); Log::reporting_level() = par.v; par.SetDefaultPaths(); // Read .hhdefaults file? if (par.readdefaultsfile) { // Process default otpions from .hhconfig file ReadDefaultsFile(argc_conf, argv_conf); ProcessArguments(argc_conf, argv_conf); } // Process command line options (they override defaults from .hhdefaults file) ProcessArguments(argc, argv); Alignment* qali = new Alignment(MAXSEQ, par.maxres); HMM* q = new HMM(MAXSEQDIS, par.maxres); //Create a HMM with maximum of par.maxres match states // q is only available after maxres is known, so we had to move this here for (int i = 1; i <= argc - 1; i++) { if (!strcmp(argv[i], "-name") && (i < argc - 1)) { strmcpy(q->name, argv[++i], NAMELEN - 1); //copy longname to name... strmcpy(q->longname, argv[i], DESCLEN - 1); //copy full name to longname } } // Check command line input and default values if (!*par.infile) { help(); HH_LOG(ERROR) << "Input file is missing!" << std::endl; exit(4); } // Get basename RemoveExtension(q->file, par.infile); //Get basename of infile (w/o extension): // Outfile not given? Name it basename.hhm if (!*par.outfile && !*par.alnfile) { RemoveExtension(par.outfile, par.infile); strcat(par.outfile, ".seq"); } // Prepare CS pseudocounts lib if (!par.nocontxt && *par.clusterfile) { InitializePseudocountsEngine(par, context_lib, crf, pc_hhm_context_engine, pc_hhm_context_mode, pc_prefilter_context_engine, pc_prefilter_context_mode); } // Set substitution matrix; adjust to query aa distribution if par.pcm==3 SetSubstitutionMatrix(par.matrix, pb, P, R, S, Sim); // Read input file (HMM, HHM, or alignment format), and add pseudocounts etc. char input_format = 0; ReadQueryFile(par, par.infile, input_format, par.wg, q, qali, pb, S, Sim); // Same code as in PrepareQueryHMM(par.infile,input_format,q,qali), except that we add SS prediction // Add Pseudocounts, if no HMMER input if (input_format == 0) { // Transform transition freqs to lin space if not already done q->AddTransitionPseudocounts(par.gapd, par.gape, par.gapf, par.gapg, par.gaph, par.gapi, par.gapb, par.gapb); // Comput substitution matrix pseudocounts if (par.nocontxt) { // Generate an amino acid frequency matrix from f[i][a] with full pseudocount admixture (tau=1) -> g[i][a] q->PreparePseudocounts(R); // Add amino acid pseudocounts to query: p[i][a] = (1-tau)*f[i][a] + tau*g[i][a] q->AddAminoAcidPseudocounts(par.pc_hhm_nocontext_mode, par.pc_hhm_nocontext_a, par.pc_hhm_nocontext_b, par.pc_hhm_nocontext_c); } else { // Add full context specific pseudocounts to query q->AddContextSpecificPseudocounts(pc_hhm_context_engine, pc_hhm_context_mode); } } else { q->AddAminoAcidPseudocounts(0, par.pc_hhm_nocontext_a, par.pc_hhm_nocontext_b, par.pc_hhm_nocontext_c); } q->CalculateAminoAcidBackground(pb); if (par.columnscore == 5 && !q->divided_by_local_bg_freqs) q->DivideBySqrtOfLocalBackgroundFreqs( par.half_window_size_local_aa_bg_freqs, pb); // Write consensus sequence to sequence file // Consensus sequence is calculated in hhalignment.C, Alignment::FrequenciesAndTransitions() if (*par.outfile) { FILE* outf = NULL; if (strcmp(par.outfile, "stdout")) { outf = fopen(par.outfile, "a"); if (!outf) OpenFileError(par.outfile, __FILE__, __LINE__, __func__); } else outf = stdout; // OLD //// ">name_consensus" -> ">name consensus" //strsubst(q->sname[q->nfirst],"_consensus"," consensus"); //fprintf(outf,">%s\n%s\n",q->sname[q->nfirst],q->seq[q->nfirst]+1); // NEW (long header needed for NR30cons database) fprintf(outf, ">%s\n%s\n", q->longname, q->seq[q->nfirst] + 1); fclose(outf); } // Print A3M/A2M/FASTA output alignment if (*par.alnfile) { HalfAlignment qa; int n = imin(q->n_display, par.nseqdis + (q->nss_dssp >= 0) + (q->nss_pred >= 0) + (q->nss_conf >= 0) + (q->ncons >= 0)); qa.Set(q->name, q->seq, q->sname, n, q->L, q->nss_dssp, q->nss_pred, q->nss_conf, q->nsa_dssp, q->ncons); if (par.outformat == 1) qa.BuildFASTA(); else if (par.outformat == 2) qa.BuildA2M(); else if (par.outformat == 3) qa.BuildA3M(); if (qali->readCommentLine) qa.Print(par.alnfile, par.append, qali->longname); // print alignment to outfile else qa.Print(par.alnfile, par.append); // print alignment to outfile } delete qali; delete q; DeletePseudocountsEngine(context_lib, crf, pc_hhm_context_engine, pc_hhm_context_mode, pc_prefilter_context_engine, pc_prefilter_context_mode); }