Exemple #1
0
int main(int argc, const char **argv) {
  Parameters par(argc, argv);

  strcpy(par.infile, "");
  strcpy(par.outfile, "");

  // maximum number of sequences to be written
  par.nseqdis = par.maxseq - 1;
  // no filtering for maximum diversity
  par.Ndiff = 0;

  ProcessArguments(par);

  // Check command line input and default values
  if (!*par.infile) {
    help(par);
    HH_LOG(ERROR) << "Input file is missing!" << std::endl;
    exit(4);
  }
  if (!*par.outfile) {
    help(par);
    HH_LOG(ERROR) << "Output file is missing!" << std::endl;
    exit(4);
  }

  HH_LOG(INFO) << "Input file = " << par.infile << "\n";
  HH_LOG(INFO) << "Output file = " << par.outfile << "\n";

  // Reads in an alignment from par.infile into matrix X[k][l] as ASCII
  FILE* inf = NULL;
  if (strcmp(par.infile, "stdin")) {
    inf = fopen(par.infile, "r");
    if (!inf) {
      OpenFileError(par.infile, __FILE__, __LINE__, __func__);
    }
  }
  else {
    inf = stdin;
  }

  Alignment qali(par.maxseq, par.maxres);
  qali.Read(inf, par.infile, par.mark, par.maxcol, par.nseqdis);
  fclose(inf);

  // Convert ASCII to int (0-20),throw out all insert states, record their number in I[k][i]
  // and store marked sequences in name[k] and seq[k]
  qali.Compress(par.infile, par.cons, par.maxcol, par.M, par.Mgaps);

  // substitution matrix flavours
  float __attribute__((aligned(16))) P[20][20];
  float __attribute__((aligned(16))) R[20][20];
  float __attribute__((aligned(16))) Sim[20][20];
  float __attribute__((aligned(16))) S[20][20];
  float __attribute__((aligned(16))) pb[21];
  SetSubstitutionMatrix(par.matrix, pb, P, R, S, Sim);

  // Remove sequences with seq. identity larger than seqid percent (remove the shorter of two)
  qali.N_filtered = qali.Filter(par.max_seqid, S, par.coverage, par.qid, par.qsc,par.Ndiff);

  // Atune alignment diversity q.Neff with qsc to value Neff_goal
  if (par.Neff >= 1.0) {
    qali.FilterNeff(par.wg, par.mark, par.cons, par.showcons, par.max_seqid, par.coverage, par.Neff, pb, S, Sim);
  }

  // Write filtered alignment WITH insert states (lower case) to alignment file
  qali.WriteToFile(par.outfile, par.append);
}
Exemple #2
0
/////////////////////////////////////////////////////////////////////////////////////
//// MAIN PROGRAM
/////////////////////////////////////////////////////////////////////////////////////
int main(int argc, char **argv) {
    char* argv_conf[MAXOPT]; // Input arguments from .hhdefaults file (first=1: argv_conf[0] is not used)
    int argc_conf;               // Number of arguments in argv_conf

    strcpy(par.infile, "");
    strcpy(par.outfile, "");
    strcpy(par.alnfile, "");

    //Default parameter settings
    par.nseqdis = MAXSEQ - 1;        // maximum number of sequences to be written
    par.showcons = 0;
    par.cons = 1;
    par.Ndiff = 0;
    par.max_seqid = 100;
    par.coverage = 0;
    par.pc_hhm_context_engine.pca = 0.0;  // no amino acid pseudocounts
    par.pc_hhm_nocontext_a = 0.0;  // no amino acid pseudocounts
    par.gapb = 0.0; // no transition pseudocounts

    // Make command line input globally available
    par.argv = argv;
    par.argc = argc;
    RemovePathAndExtension(program_name, argv[0]);

    // Enable changing verbose mode before defaults file and command line are processed
    int v = 2;
    for (int i = 1; i < argc; i++) {
        if (!strcmp(argv[i], "-def"))
            par.readdefaultsfile = 1;
        else if (strcmp(argv[i], "-v") == 0) {
            v = atoi(argv[i + 1]);
        }
    }
    par.v = Log::from_int(v);
    Log::reporting_level() = par.v;

    par.SetDefaultPaths();

    // Read .hhdefaults file?
    if (par.readdefaultsfile) {
        // Process default otpions from .hhconfig file
        ReadDefaultsFile(argc_conf, argv_conf);
        ProcessArguments(argc_conf, argv_conf);
    }

    // Process command line options (they override defaults from .hhdefaults file)
    ProcessArguments(argc, argv);

    Alignment* qali = new Alignment(MAXSEQ, par.maxres);
    HMM* q = new HMM(MAXSEQDIS, par.maxres);        //Create a HMM with maximum of par.maxres match states

    // q is only available after maxres is known, so we had to move this here
    for (int i = 1; i <= argc - 1; i++) {
        if (!strcmp(argv[i], "-name") && (i < argc - 1)) {
            strmcpy(q->name, argv[++i], NAMELEN - 1); //copy longname to name...
            strmcpy(q->longname, argv[i], DESCLEN - 1);   //copy full name to longname
        }
    }

    // Check command line input and default values
    if (!*par.infile) {
        help();
        HH_LOG(ERROR) << "Input file is missing!" << std::endl;
        exit(4);
    }

    // Get basename
    RemoveExtension(q->file, par.infile); //Get basename of infile (w/o extension):

    // Outfile not given? Name it basename.hhm
    if (!*par.outfile && !*par.alnfile) {
        RemoveExtension(par.outfile, par.infile);
        strcat(par.outfile, ".seq");
    }

    // Prepare CS pseudocounts lib
    if (!par.nocontxt && *par.clusterfile) {
        InitializePseudocountsEngine(par, context_lib, crf, pc_hhm_context_engine,
                                     pc_hhm_context_mode, pc_prefilter_context_engine,
                                     pc_prefilter_context_mode);
    }

    // Set substitution matrix; adjust to query aa distribution if par.pcm==3
    SetSubstitutionMatrix(par.matrix, pb, P, R, S, Sim);

    // Read input file (HMM, HHM, or alignment format), and add pseudocounts etc.
    char input_format = 0;
    ReadQueryFile(par, par.infile, input_format, par.wg, q, qali, pb, S, Sim);

    // Same code as in PrepareQueryHMM(par.infile,input_format,q,qali), except that we add SS prediction
    // Add Pseudocounts, if no HMMER input
    if (input_format == 0) {
        // Transform transition freqs to lin space if not already done
        q->AddTransitionPseudocounts(par.gapd, par.gape, par.gapf, par.gapg,
                                     par.gaph, par.gapi, par.gapb, par.gapb);

        // Comput substitution matrix pseudocounts
        if (par.nocontxt) {
            // Generate an amino acid frequency matrix from f[i][a] with full pseudocount admixture (tau=1) -> g[i][a]
            q->PreparePseudocounts(R);
            // Add amino acid pseudocounts to query: p[i][a] = (1-tau)*f[i][a] + tau*g[i][a]
            q->AddAminoAcidPseudocounts(par.pc_hhm_nocontext_mode,
                                        par.pc_hhm_nocontext_a, par.pc_hhm_nocontext_b,
                                        par.pc_hhm_nocontext_c);
        }
        else {
            // Add full context specific pseudocounts to query
            q->AddContextSpecificPseudocounts(pc_hhm_context_engine,
                                              pc_hhm_context_mode);
        }
    }
    else {
        q->AddAminoAcidPseudocounts(0, par.pc_hhm_nocontext_a,
                                    par.pc_hhm_nocontext_b, par.pc_hhm_nocontext_c);
    }

    q->CalculateAminoAcidBackground(pb);

    if (par.columnscore == 5 && !q->divided_by_local_bg_freqs)
        q->DivideBySqrtOfLocalBackgroundFreqs(
            par.half_window_size_local_aa_bg_freqs, pb);

    // Write consensus sequence to sequence file
    // Consensus sequence is calculated in hhalignment.C, Alignment::FrequenciesAndTransitions()
    if (*par.outfile) {
        FILE* outf = NULL;
        if (strcmp(par.outfile, "stdout")) {
            outf = fopen(par.outfile, "a");
            if (!outf)
                OpenFileError(par.outfile, __FILE__, __LINE__, __func__);
        }
        else
            outf = stdout;
        // OLD
        //// ">name_consensus" -> ">name consensus"
        //strsubst(q->sname[q->nfirst],"_consensus"," consensus");
        //fprintf(outf,">%s\n%s\n",q->sname[q->nfirst],q->seq[q->nfirst]+1);
        // NEW (long header needed for NR30cons database)
        fprintf(outf, ">%s\n%s\n", q->longname, q->seq[q->nfirst] + 1);
        fclose(outf);
    }

    // Print A3M/A2M/FASTA output alignment
    if (*par.alnfile) {
        HalfAlignment qa;
        int n = imin(q->n_display,
                     par.nseqdis + (q->nss_dssp >= 0) + (q->nss_pred >= 0)
                     + (q->nss_conf >= 0) + (q->ncons >= 0));
        qa.Set(q->name, q->seq, q->sname, n, q->L, q->nss_dssp, q->nss_pred,
               q->nss_conf, q->nsa_dssp, q->ncons);

        if (par.outformat == 1)
            qa.BuildFASTA();
        else if (par.outformat == 2)
            qa.BuildA2M();
        else if (par.outformat == 3)
            qa.BuildA3M();
        if (qali->readCommentLine)
            qa.Print(par.alnfile, par.append, qali->longname); // print alignment to outfile
        else
            qa.Print(par.alnfile, par.append);   // print alignment to outfile
    }

    delete qali;
    delete q;

    DeletePseudocountsEngine(context_lib, crf, pc_hhm_context_engine,
                             pc_hhm_context_mode, pc_prefilter_context_engine,
                             pc_prefilter_context_mode);
}