Esempio n. 1
0
int main (int argc, char** argv)
{
  std::string command;
  std::string fastaFileName;
  std::string seqname;
  std::string longseqname;
  bool dump = false;
  bool buildIndex = false;  // flag to force index building
  bool printEntropy = false;  // entropy printing
  bool readRegionsFromStdin = false;
  std::string region;
  int c;

  while (true)
  {
    static struct option long_options[] =
    {
        /* These options set a flag. */
        {"help", no_argument, 0, 'h'},
        {"index",  no_argument, 0, 'i'},
        {"entropy", no_argument, 0, 'e'},
        {"region", required_argument, 0, 'r'},
        {"stdin", no_argument, 0, 'c'},
        {0, 0, 0, 0}
    };

    /* getopt_long stores the option index here. */
    int option_index = 0;
    c = getopt_long (argc, argv, "hciedr:", long_options, &option_index);

    /* Detect the end of the options. */
    if (c == -1)
      break;

    switch (c)
    {
      case 0:
        /* If this option set a flag, do nothing else now. */
        if (long_options[option_index].flag != 0)
          break;
        printf ("option %s", long_options[option_index].name);
        if (optarg)
          printf (" with arg %s", optarg);
        printf ("\n");
        break;

      case 'e':
        printEntropy = true;
        break;

      case 'c':
        readRegionsFromStdin = true;
        break;

      case 'i':
        buildIndex = true;
        break;

      case 'r':
        region = optarg;
        break;

        case 'd':
            dump = true;
            break;

      case 'h':
        printSummary();
        exit(0);
        break;

      case '?':
        /* getopt_long already printed an error message. */
        printSummary();
        exit(1);
        break;

      default:
        abort ();
    }
  }

  /* Print any remaining command line arguments (not options). */
  if (optind < argc)
  {
    //cerr << "fasta file: " << argv[optind] << std::endl;
    fastaFileName = argv[optind];
  }
  else
  {
    std::cerr << "Please specify a FASTA file." << std::endl;
    printSummary();
    exit(1);
  }

  if (buildIndex)
  {
    FastaIndex* fai = new FastaIndex();
    //cerr << "generating fasta index file for " << fastaFileName << std::endl;
    fai->indexReference(fastaFileName);
    fai->writeIndexFile((std::string) fastaFileName + fai->indexFileExtension());
  }
  
  std::string sequence;  // holds sequence so we can optionally process it

  FastaReference fr;
  fr.open(fastaFileName);

  if (dump)
  {
    for (vector<std::string>::iterator s = fr.index->sequenceNames.begin(); s != fr.index->sequenceNames.end(); ++s)
    {
      std::cout << *s << "\t" << fr.getSequence(*s) << std::endl;
    }

    return 0;
  }

  if (region != "")
  {
    FastaRegion target(region);
    sequence = fr.getTargetSubSequence(target);
  }

  if (readRegionsFromStdin)
  {
    std::string regionstr;

    while (getline(cin, regionstr))
    {
      FastaRegion target(regionstr);

      if (target.startPos == -1)
      {
        std::cout << fr.getSequence(target.startSeq) << std::endl;
      }
      else
      {
        std::cout << fr.getSubSequence(target.startSeq, target.startPos - 1, target.length()) << std::endl;
      }
    }
  }
  else
  {
    if (sequence != "")
    {
      if (printEntropy)
      {
        if (sequence.size() > 0)
        {
          std::cout << shannon_H((char*) sequence.c_str(), sequence.size()) << std::endl;
        }
        else
        {
          std::cerr << "please specify a region or sequence for which to calculate the shannon entropy" << std::endl;
        }
      }
      else
      {
        // if no statistical processing is requested, just print the sequence
        std::cout << sequence << std::endl;
      }
    }
  }

  return 0;
}
Esempio n. 2
0
File: main.cpp Progetto: egafni/glia
// one-off
void construct_dag_and_align_single_sequence(Parameters& params) {

    if (params.debug) {
        cout << "read: " << params.read_input << endl;
        //cout << "fastq file:" << params.fastq_file << endl;
        cout << "fasta reference:" << params.fasta_reference << endl;
        cout << "vcf file " << params.vcf_file << endl;
        cout << "target " << params.target << endl;
        cout << endl;
    }

    // get sequence of target
    FastaReference reference;
    reference.open(params.fasta_reference);
    FastaRegion target(params.target);
    string targetSequence = reference.getTargetSubSequence(target);

    // get variants in target
    vector<vcf::Variant> variants;
    vcf::VariantCallFile vcffile;

    if (!params.vcf_file.empty()) {
        vcffile.open(params.vcf_file);
        vcf::Variant var(vcffile);
    
        vcffile.setRegion(params.target);
        while (vcffile.getNextVariant(var)) {
            if (var.position + var.ref.length() <= target.stopPos) {
                variants.push_back(var);
            }
        }
    }

    long offset = max(target.startPos, 1); // start is -1 when coordinates are not specified

    // Declare the target DAG to align against.
    //vector<Cigar> cigars;
    //vector<long int> refpositions;
    ReferenceMappings ref_map;
    gssw_graph* graph = gssw_graph_create(0);
    int8_t* nt_table = gssw_create_nt_table();
	int8_t* mat = gssw_create_score_matrix(params.match, params.mism);
    constructDAGProgressive(graph,
                            ref_map,
                            targetSequence,
                            target.startSeq,
                            variants,
                            offset,
                            nt_table,
                            mat,
                            params.flat_input_vcf);

    if (params.display_dag) {
        cout << "DAG generated from input variants:" << endl;
    }


    // run the alignment

    string read = params.read_input;
    string qualities(read.size(), shortInt2QualityChar(30));
    int score;
    long int position;
    string strand;
    Cigar flat_cigar;
    gssw_graph_mapping* gm = gswalign(graph,
                                      ref_map,
                                      read,
                                      qualities,
                                      params,
                                      position,
                                      score,
                                      flat_cigar,
                                      strand,
                                      nt_table,
                                      mat);
    cerr << graph_mapping_to_string(gm) << endl;
    gssw_graph_mapping_destroy(gm);

    /*
    cout << score << " " << strand << " "
         << (trace_report.node->position - 1) + trace_report.x << " "
         << trace_report.fcigar
         << " seq:" << trace_report.x << " read:" << trace_report.y
         << " " << trace_report.gcigar << " " << trace_report.fcigar << endl;

    if (params.display_alignment) {
        string refseq;
        for (vector<sn*>::iterator n = trace_report.node_list.begin();
             n != trace_report.node_list.end(); ++n) {
            refseq.append((*n)->sequence);
        }
        refseq = refseq.substr(trace_report.x, read.size());
        cout << refseq << endl;
        if (strand == "+") {
            cout << read << endl;
        } else {
            cout << reverseComplement(read) << endl;
        }
    }
    */
}