int main (int argc, char** argv) { std::string command; std::string fastaFileName; std::string seqname; std::string longseqname; bool dump = false; bool buildIndex = false; // flag to force index building bool printEntropy = false; // entropy printing bool readRegionsFromStdin = false; std::string region; int c; while (true) { static struct option long_options[] = { /* These options set a flag. */ {"help", no_argument, 0, 'h'}, {"index", no_argument, 0, 'i'}, {"entropy", no_argument, 0, 'e'}, {"region", required_argument, 0, 'r'}, {"stdin", no_argument, 0, 'c'}, {0, 0, 0, 0} }; /* getopt_long stores the option index here. */ int option_index = 0; c = getopt_long (argc, argv, "hciedr:", long_options, &option_index); /* Detect the end of the options. */ if (c == -1) break; switch (c) { case 0: /* If this option set a flag, do nothing else now. */ if (long_options[option_index].flag != 0) break; printf ("option %s", long_options[option_index].name); if (optarg) printf (" with arg %s", optarg); printf ("\n"); break; case 'e': printEntropy = true; break; case 'c': readRegionsFromStdin = true; break; case 'i': buildIndex = true; break; case 'r': region = optarg; break; case 'd': dump = true; break; case 'h': printSummary(); exit(0); break; case '?': /* getopt_long already printed an error message. */ printSummary(); exit(1); break; default: abort (); } } /* Print any remaining command line arguments (not options). */ if (optind < argc) { //cerr << "fasta file: " << argv[optind] << std::endl; fastaFileName = argv[optind]; } else { std::cerr << "Please specify a FASTA file." << std::endl; printSummary(); exit(1); } if (buildIndex) { FastaIndex* fai = new FastaIndex(); //cerr << "generating fasta index file for " << fastaFileName << std::endl; fai->indexReference(fastaFileName); fai->writeIndexFile((std::string) fastaFileName + fai->indexFileExtension()); } std::string sequence; // holds sequence so we can optionally process it FastaReference fr; fr.open(fastaFileName); if (dump) { for (vector<std::string>::iterator s = fr.index->sequenceNames.begin(); s != fr.index->sequenceNames.end(); ++s) { std::cout << *s << "\t" << fr.getSequence(*s) << std::endl; } return 0; } if (region != "") { FastaRegion target(region); sequence = fr.getTargetSubSequence(target); } if (readRegionsFromStdin) { std::string regionstr; while (getline(cin, regionstr)) { FastaRegion target(regionstr); if (target.startPos == -1) { std::cout << fr.getSequence(target.startSeq) << std::endl; } else { std::cout << fr.getSubSequence(target.startSeq, target.startPos - 1, target.length()) << std::endl; } } } else { if (sequence != "") { if (printEntropy) { if (sequence.size() > 0) { std::cout << shannon_H((char*) sequence.c_str(), sequence.size()) << std::endl; } else { std::cerr << "please specify a region or sequence for which to calculate the shannon entropy" << std::endl; } } else { // if no statistical processing is requested, just print the sequence std::cout << sequence << std::endl; } } } return 0; }
// one-off void construct_dag_and_align_single_sequence(Parameters& params) { if (params.debug) { cout << "read: " << params.read_input << endl; //cout << "fastq file:" << params.fastq_file << endl; cout << "fasta reference:" << params.fasta_reference << endl; cout << "vcf file " << params.vcf_file << endl; cout << "target " << params.target << endl; cout << endl; } // get sequence of target FastaReference reference; reference.open(params.fasta_reference); FastaRegion target(params.target); string targetSequence = reference.getTargetSubSequence(target); // get variants in target vector<vcf::Variant> variants; vcf::VariantCallFile vcffile; if (!params.vcf_file.empty()) { vcffile.open(params.vcf_file); vcf::Variant var(vcffile); vcffile.setRegion(params.target); while (vcffile.getNextVariant(var)) { if (var.position + var.ref.length() <= target.stopPos) { variants.push_back(var); } } } long offset = max(target.startPos, 1); // start is -1 when coordinates are not specified // Declare the target DAG to align against. //vector<Cigar> cigars; //vector<long int> refpositions; ReferenceMappings ref_map; gssw_graph* graph = gssw_graph_create(0); int8_t* nt_table = gssw_create_nt_table(); int8_t* mat = gssw_create_score_matrix(params.match, params.mism); constructDAGProgressive(graph, ref_map, targetSequence, target.startSeq, variants, offset, nt_table, mat, params.flat_input_vcf); if (params.display_dag) { cout << "DAG generated from input variants:" << endl; } // run the alignment string read = params.read_input; string qualities(read.size(), shortInt2QualityChar(30)); int score; long int position; string strand; Cigar flat_cigar; gssw_graph_mapping* gm = gswalign(graph, ref_map, read, qualities, params, position, score, flat_cigar, strand, nt_table, mat); cerr << graph_mapping_to_string(gm) << endl; gssw_graph_mapping_destroy(gm); /* cout << score << " " << strand << " " << (trace_report.node->position - 1) + trace_report.x << " " << trace_report.fcigar << " seq:" << trace_report.x << " read:" << trace_report.y << " " << trace_report.gcigar << " " << trace_report.fcigar << endl; if (params.display_alignment) { string refseq; for (vector<sn*>::iterator n = trace_report.node_list.begin(); n != trace_report.node_list.end(); ++n) { refseq.append((*n)->sequence); } refseq = refseq.substr(trace_report.x, read.size()); cout << refseq << endl; if (strand == "+") { cout << read << endl; } else { cout << reverseComplement(read) << endl; } } */ }