void init_library_table() { ReadGroupProperties fr_unstranded; fr_unstranded.platform(UNKNOWN_PLATFORM); fr_unstranded.mate_strand_mapping(FR); fr_unstranded.std_mate_orientation(MATES_POINT_TOWARD); fr_unstranded.strandedness(UNSTRANDED_PROTOCOL); library_type_table["fr-unstranded"] = fr_unstranded; ReadGroupProperties fr_firststrand; fr_firststrand.platform(UNKNOWN_PLATFORM); fr_firststrand.mate_strand_mapping(RF); fr_firststrand.std_mate_orientation(MATES_POINT_TOWARD); fr_firststrand.strandedness(STRANDED_PROTOCOL); library_type_table["fr-firststrand"] = fr_firststrand; ReadGroupProperties fr_secondstrand; fr_secondstrand.platform(UNKNOWN_PLATFORM); fr_secondstrand.mate_strand_mapping(FR); fr_secondstrand.std_mate_orientation(MATES_POINT_TOWARD); fr_secondstrand.strandedness(STRANDED_PROTOCOL); library_type_table["fr-secondstrand"] = fr_secondstrand; ReadGroupProperties ff_unstranded; ff_unstranded.platform(UNKNOWN_PLATFORM); ff_unstranded.mate_strand_mapping(FF); ff_unstranded.std_mate_orientation(MATES_POINT_TOWARD); ff_unstranded.strandedness(UNSTRANDED_PROTOCOL); library_type_table["ff-unstranded"] = ff_unstranded; ReadGroupProperties ff_firststrand; ff_firststrand.platform(UNKNOWN_PLATFORM); ff_firststrand.mate_strand_mapping(FF); ff_firststrand.std_mate_orientation(MATES_POINT_TOWARD); ff_firststrand.strandedness(STRANDED_PROTOCOL); library_type_table["ff-firststrand"] = ff_firststrand; ReadGroupProperties ff_secondstrand; ff_secondstrand.platform(UNKNOWN_PLATFORM); ff_secondstrand.mate_strand_mapping(RR); ff_secondstrand.std_mate_orientation(MATES_POINT_TOWARD); ff_secondstrand.strandedness(STRANDED_PROTOCOL); library_type_table["ff-secondstrand"] = ff_secondstrand; ReadGroupProperties transfrags; transfrags.platform(UNKNOWN_PLATFORM); transfrags.mate_strand_mapping(FR); transfrags.std_mate_orientation(MATES_POINT_TOWARD); transfrags.strandedness(UNSTRANDED_PROTOCOL); transfrags.complete_fragments(true); library_type_table["transfrags"] = transfrags; //global_read_properties = &(library_type_table.find(default_library_type)->second); }
// Parses the header to determine platform and other properties bool HitFactory::parse_header_string(const string& header_rec, ReadGroupProperties& rg_props) { vector<string> columns; tokenize(header_rec, "\t", columns); if (columns[0] == "@RG") { for (size_t i = 1; i < columns.size(); ++i) { vector<string> fields; tokenize(columns[i], ":", fields); if (fields[0] == "PL") { if (rg_props.platform() == UNKNOWN_PLATFORM) { Platform p = str_to_platform(fields[1]); rg_props.platform(p); } else { Platform p = str_to_platform(fields[1]); if (p != rg_props.platform()) { fprintf(stderr, "Error: Processing reads from different platforms is not currently supported\n"); return false; } } } } } else if (columns[0] == "@SQ") { _num_seq_header_recs++; for (size_t i = 1; i < columns.size(); ++i) { vector<string> fields; tokenize(columns[i], ":", fields); if (fields[0] == "SN") { // Populate the RefSequenceTable with the sequence dictionary, // to ensure that (for example) downstream GTF files are sorted // in an order consistent with the header, and to enforce that // BAM records appear in the order implied by the header RefID _id = _ref_table.get_id(fields[1], NULL); const RefSequenceTable::SequenceInfo* info = _ref_table.get_info(_id); if (info->observation_order != _num_seq_header_recs) { if (info->name != fields[1]) { fprintf(stderr, "Error: Hash collision between references '%s' and '%s'.\n", info->name, fields[1].c_str()); } else { fprintf(stderr, "Error: sort order of reads in BAMs must be the same\n"); } exit(1); } } } } return true; }