void apath_limit_ref_length( const unsigned target_ref_length, path_t& apath) { unsigned ref_length(0); const unsigned as(apath.size()); for (unsigned i(0); i<as; ++i) { path_segment& ps(apath[i]); if (! is_segment_type_ref_length(ps.type)) continue; ref_length += ps.length; if (ref_length < target_ref_length) continue; if (ref_length > target_ref_length) { const unsigned extra(ref_length - target_ref_length); assert(ps.length > extra); ps.length -= extra; } apath.resize(i+1); break; } }
void apath_append( path_t& apath, const align_t seg_type, const unsigned length) { if (apath.size() && apath.back().type == seg_type) { apath.back().length += length; } else { apath.emplace_back(seg_type,length); } }
virtual int make_directory(entry_t& entry, const path_t& path) { const char_t* chars = 0; size_t length = 0; int err = 1; if ((chars = path.directory().chars(length)) && (0 < length)) { const char_t* dchars = chars; size_t dlength = length; sub_directories_t sub_directories; EV_LOG_MESSAGE_INFO("make directory \"" << chars << "\"..."); for (bool exists = false; !exists; ) { string_t directory(dchars, dlength); EV_LOG_MESSAGE_INFO("directory \"" << directory.chars() << "\" exists?..."); if (!(exists = entry.exists(directory.chars()))) { sub_directory_t sub_directory(dchars, dlength); EV_LOG_MESSAGE_INFO("...directory \"" << directory.chars() << "\" does not exist"); sub_directories.push_front(sub_directory); if ((dchars = parent_directory(dchars, dlength, path))) { continue; } } else { EV_LOG_MESSAGE_INFO("...directory \"" << directory.chars() << "\" exists"); } break; } err = make_directories(sub_directories, entry, path); } return err; }
bool is_segment_swap_start(const path_t& apath, unsigned i) { using namespace ALIGNPATH; bool is_insert(false); bool is_delete(false); const unsigned as(apath.size()); for (; i<as; ++i) { if (apath[i].type == INSERT) { is_insert=true; } else if (apath[i].type == DELETE) { is_delete=true; } else { break; } } return (is_insert && is_delete); }
hasher::digest_rc md5_hasher::hex_digest(path_t const& fp) const { std::ifstream fh(fp.c_str(), std::ifstream::in | std::ifstream::binary); digest_rc rc(hex_digest(fh)); fh.close(); return rc; }
bool is_clipped_front(const path_t& apath) { const unsigned as(apath.size()); if (as==0) return false; if ((apath[0].type == SOFT_CLIP) || (apath[0].type == HARD_CLIP)) return true; return false; }
virtual int copy_file_to_hash (const entry_t& source, const path_t& target, hash_t& hash) { entry_t& entry = target_entry_; string_t target_path(target.chars()); const char_t* chars = 0; int err = 1; if ((append_hash_name_to_target_path_) && (chars = hash.name())) { target_path.append(&target.extension_separator(), 1); target_path.append(hash_name_prefix_); target_path.append(chars); target_path.append(hash_name_suffix_); } if ((entry.exists(chars = target_path.chars()))) { if ((write_overwrite != write_) && (write_append != write_)) { errf("target file \"%s\" already exists\n", chars); } else { fs::entry_type type = fs::entry_type_none; switch (type = entry.type()) { case fs::entry_type_file: err = copy_file_to_file_hash(source, entry, hash); break; default: break; } } } else { if (!(err = make_directory(entry, target))) { entry.set_path(chars); err = copy_file_to_file_hash(source, entry, hash); } else { errf("failed to make directory \"%s\"\n", target.directory().chars()); } } if (!(err) && (!(to_same != to_) || !(target_modified_))) { if ((entry.set_times_to_set(source))) { if ((entry.set_times_set())) { } else { } } } return err; }
void apath_limit_read_length( const unsigned target_read_start, const unsigned target_read_end, path_t& apath) { bool isStartSet(false); unsigned read_length(0); const unsigned as(apath.size()); unsigned startSegment(0); unsigned endSegment(as); for (unsigned i(0); i<as; ++i) { path_segment& ps(apath[i]); if (! is_segment_type_read_length(ps.type)) continue; read_length += ps.length; if ((! isStartSet) && (read_length > target_read_start)) { { const unsigned extra(ps.length - (read_length - target_read_start)); assert(ps.length > extra); ps.length -= extra; } startSegment=i; isStartSet=true; } if (read_length >= target_read_end) { if (read_length > target_read_end) { const unsigned extra(read_length - target_read_end); assert(ps.length > extra); ps.length -= extra; } endSegment=i+1; break; } } apath = path_t(apath.begin()+startSegment,apath.begin()+endSegment); }
static void apath_push(path_t& apath, path_segment& ps, const align_t t) { if ( (0==ps.length) || (ps.type==t) ) return; apath.push_back(ps); ps.clear(); }
void read(std::basic_string<CharT>& buf, path_t const& path) { std::basic_string<char> buf_; std::basic_ifstream<char> ifs(path.string()); ifs.imbue(std::locale("")); if (!ifs) { throw file_error("failed to open file " + path.string()); } // get length ifs.seekg(0, std::ios::end); auto const len = ifs.tellg(); ifs.seekg(0, std::ios::beg); buf_.resize(len); ifs.read(&buf_[0], len); buf = to<std::basic_string<CharT>>(buf_); }
void apath_to_bam_cigar(const path_t& apath, uint32_t* bam_cigar) { const unsigned as(apath.size()); for (unsigned i(0); i<as; ++i) { const path_segment& ps(apath[i]); assert(ps.type != NONE); bam_cigar[i] = (ps.length<<BAM_CIGAR_SHIFT | (static_cast<uint32_t>(ps.type)-1)); } }
uint64_t file_manager::stat_filesize(path_t const& p) const { std::ifstream fp(p.string().c_str(), std::ios_base::binary); if (!fp.is_open() || !fp.good()) return 0; uint64_t size = stat_filesize(fp); fp.close(); return size; }
void export_md_to_apath(const char* md, const bool is_fwd_strand, path_t& apath, const bool is_edge_deletion_error) { // to make best use of previous code, we parse the MD in the // alignment direction and then orient apath to the forward strand // as a second step if required // assert(NULL != md); apath.clear(); export_md_to_apath_impl(md,apath); unsigned as(apath.size()); if ( ((as>0) and (apath.front().type == DELETE)) or ((as>1) and (apath.back().type == DELETE)) ) { std::ostringstream oss; if (is_edge_deletion_error) { oss << "ERROR: "; } else { oss << "WARNING: "; } oss << "alignment path: " << apath_to_cigar(apath) << " contains meaningless edge deletion.\n"; if (is_edge_deletion_error) { throw blt_exception(oss.str().c_str()); } else { log_os << oss.str(); path_t apath2; for (unsigned i(0); i<as; ++i) { if (((i==0) or ((i+1)==as)) and apath[i].type == DELETE) continue; apath2.push_back(apath[i]); } apath=apath2; as=apath.size(); } } if ( (not is_fwd_strand) and (as>1) ) { std::reverse(apath.begin(),apath.end()); } }
inline bool operator==(path_t const &a, path_t const &b) { if (!a || !b) return a.c_str() == b.c_str(); return a.length() == b.length() && ::strcmp(a.c_str(), b.c_str()) == 0; }
void bam_cigar_to_apath(const uint32_t* bam_cigar, const unsigned n_cigar, path_t& apath) { // this assertion isn't really required... // assert(n_cigar>0); apath.resize(n_cigar); for (unsigned i(0); i<n_cigar; ++i) { apath[i].length=(bam_cigar[i]>>BAM_CIGAR_SHIFT); apath[i].type = static_cast<align_t>(1+(bam_cigar[i]&BAM_CIGAR_MASK)); } }
accessor get_path(path_t const& path) const { accessor next = *this; for (size_t i = 0; i < path.size() && next.is_valid; ++i) { const std::string* key; const int* idx; if ((key = boost::get<std::string>(&path[i]))) { next = next[*key]; } else if ((idx = boost::get<int>(&path[i]))) { next = next[*idx]; } } return next; }
bool is_seq_swap(const path_t& apath) { const unsigned as(apath.size()); for (unsigned i(0); (i+1)<as; ++i) { if (is_segment_type_indel(apath[i].type) && is_segment_type_indel(apath[i+1].type)) { return true; } } return false; }
void cigar_to_apath(const char* cigar, path_t& apath) { using illumina::blt_util::parse_unsigned; assert(NULL != cigar); apath.clear(); path_segment lps; const char* cptr(cigar); while (*cptr) { path_segment ps; // expect sequences of digits and cigar codes: if (! isdigit(*cptr)) unknown_cigar_error(cigar,cptr); ps.length = parse_unsigned(cptr); ps.type = cigar_code_to_segment_type(*cptr); if (ps.type == NONE) unknown_cigar_error(cigar,cptr); cptr++; if ((ps.type == PAD) || (ps.length == 0)) continue; if (ps.type != lps.type) { if (lps.type != NONE) apath.push_back(lps); lps = ps; } else { lps.length += ps.length; } } if (lps.type != NONE) apath.push_back(lps); }
void apath_clip_adder(path_t& apath, const unsigned hc_lead, const unsigned hc_trail, const unsigned sc_lead, const unsigned sc_trail) { path_t apath2; path_segment ps; if (hc_lead>0) { ps.type = HARD_CLIP; ps.length = hc_lead; apath2.push_back(ps); } if (sc_lead>0) { ps.type = SOFT_CLIP; ps.length = sc_lead; apath2.push_back(ps); } apath2.insert(apath2.end(),apath.begin(),apath.end()); if (sc_trail>0) { ps.type = SOFT_CLIP; ps.length = sc_trail; apath2.push_back(ps); } if (hc_trail>0) { ps.type = HARD_CLIP; ps.length = hc_trail; apath2.push_back(ps); } apath=apath2; }
virtual int copy_file(const entry_t& source, const path_t& target) { entry_t& entry = target_entry_; const char_t* chars = 0; int err = 1; if ((entry.exists(chars = target.chars()))) { if ((write_overwrite != write_) && (write_append != write_)) { errf("target file \"%s\" already exists\n", chars); } else { fs::entry_type type = fs::entry_type_none; switch (type = entry.type()) { case fs::entry_type_file: err = copy_file_to_file(source, entry); break; default: break; } } } else { if (!(err = make_directory(entry, target))) { entry.set_path(chars); err = copy_file_to_file(source, entry); } else { errf("failed to make directory \"%s\"\n", target.directory().chars()); } } if (!(err) && (!(to_same != to_) || !(target_modified_))) { if ((entry.set_times_to_set(source))) { if ((entry.set_times_set())) { } else { } } } return err; }
bool downloader::fetch(string_t const& url, path_t const& path, string_t const& checksum, int* const retry_tally) const { // TODO: rethink about this, this really sounds like an external concern for (int i = 0; i < retry_count_ + 1; ++i) { bool fetch_successful; if (!file_manager_.is_writable(path)) { error() << "Download destination is un-writable: " << path; return false; } std::ofstream fp(path.string().c_str(), std::ios_base::trunc | std::ios_base::binary); if (retry_tally != nullptr) { (*retry_tally) = i; } fetch_successful = fetch(url, fp); fp.close(); if (fetch_successful) { if (!file_manager_.is_readable(path)) { return false; // this really shouldn't happen, but oh well } // validate integrity hasher::digest_rc rc = config_.hasher->hex_digest(path); if (rc == checksum) { return true; } else { warn() << "Downloaded file integrity mismatch: " << rc.digest << " vs " << checksum; } } notice() << "Retry #" << i+1; } return false; }
std::pair<unsigned,unsigned> get_match_edge_segments(const path_t& apath) { const unsigned as(apath.size()); std::pair<unsigned,unsigned> res(as,as); bool is_first_match(false); for (unsigned i(0); i<as; ++i) { const path_segment& ps(apath[i]); if (is_segment_align_match(ps.type)) { if (! is_first_match) res.first=i; is_first_match=true; res.second=i; } } return res; }
unsigned get_clip_len(const path_t& apath) { const unsigned as(apath.size()); if (as==0) return 0; if ((apath[0].type == SOFT_CLIP) || (apath[0].type == HARD_CLIP)) { return apath[0].length; } if (as>1) { if ((apath[as-1].type == SOFT_CLIP) || (apath[as-1].type == HARD_CLIP)) { return apath[as-1].length; } } return 0; }
void edit_bam_cigar(const path_t& apath, bam1_t& br) { bam1_core_t& bc(br.core); const int old_n_cigar(bc.n_cigar); const int new_n_cigar(apath.size()); const int delta(4*(new_n_cigar-old_n_cigar)); if (0 != delta) { const int end(bc.l_qname+(4*old_n_cigar)); change_bam_data_segment_len(end,delta,br); bc.n_cigar=new_n_cigar; } //update content of cigar array: apath_to_bam_cigar(apath,bam1_cigar(&br)); }
path_t BackupSystem::map_path(const path_t &path, const path_mapper_t &mapper){ path_t ret; stream_index_t longest = 0; for (auto &tuple : mapper){ auto temp = path.wstring(); auto start = temp.begin(), end = temp.end(); boost::match_results<decltype(start)> match; auto &re = tuple.first; if (!boost::regex_search(start, end, match, re, default_regex_flags)) continue; std::wstring g1(match[1].first, match[1].second); if (g1.size() <= longest) continue; longest = g1.size(); std::wstring g2(match[2].first, match[2].second); ret = tuple.second + g2; } return !longest ? path : ret; }
virtual int make_directories (const sub_directories_t& sub_directories, const entry_t& entry, const path_t& path) { int err = 0; sub_directories_t::const_iterator end = sub_directories.end(); sub_directories_t::const_iterator i; for (i = sub_directories.begin(); i != end; ++i) { const sub_directory_t& d = *i; string_t path(d.chars, d.length); EV_LOG_MESSAGE_INFO("make directory \"" << path << "\"..."); if ((entry.make_directory(path.chars()))) { EV_LOG_MESSAGE_INFO("...made directory \"" << path << "\""); } else { EV_LOG_MESSAGE_INFO("...failed to make directory \"" << path << "\""); return 1; } } return err; }
virtual int copy(const path_t& source, const path_t& target) { entry_t& entry = source_entry_; const char_t* chars = 0; int err = 0; if ((entry.exists(chars = source.chars()))) { fs::entry_type type = fs::entry_type_none; switch (type = entry.type()) { case fs::entry_type_file: err = copy_file(entry, target); break; default: break; } } else { errf("source file \"%s\" does not exist\n", chars); err = 1; } return err; }
bool is_edge_readref_len_segment(const path_t& apath) { const unsigned as(apath.size()); if (as==0) return false; const std::pair<unsigned,unsigned> ends(get_match_edge_segments(apath)); // at this point we assume the alignment has been sanity checked for legal clipping, // where hard-clip is only on the outside, next soft-clipping, then anything else... // for (unsigned i(0); i<as; ++i) { const path_segment& ps(apath[i]); const bool is_edge_segment((i<ends.first) || (i>ends.second)); const bool is_clip_type(ps.type==INSERT || ps.type==DELETE || ps.type==SKIP || ps.type==SOFT_CLIP); if (is_edge_segment && is_clip_type) return true; } return false; }
std::pair<unsigned,unsigned> get_nonclip_end_segments(const path_t& apath) { const unsigned as(apath.size()); std::pair<unsigned,unsigned> res(as,as); bool is_first_nonclip(false); for (unsigned i(0); i<as; ++i) { const path_segment& ps(apath[i]); if (! (ps.type == SOFT_CLIP || ps.type == HARD_CLIP)) { if (! is_first_nonclip) { res.first=i; is_first_nonclip=true; } res.second=i; } } return res; }
void path_resolver::resolve(path_t root, bool verbose) { if (root.empty()) { root_path_ = locate_root_directory(locate_bin_directory(this, verbose)); } else { root_path_ = root; } // locate the binary and build its path #if KZH_PLATFORM == KZH_PLATFORM_LINUX // Linux: if (verbose) { debug() << "Platform: Linux"; } cache_path_ = path_t(root_path_ / ".kzh/cache").make_preferred(); #elif KZH_PLATFORM == KZH_PLATFORM_APPLE // OS X: if (verbose) { debug() << "Platform: OS X"; } cache_path_ = path_t(root_path_ / ".kzh/cache").make_preferred(); #else // Windows: if (verbose) { debug() << "Platform: Windows"; } cache_path_ = path_t(root_path_ / ".kzh/cache").make_preferred(); #endif if (verbose) { debug() << "Root path: " << root_path_; debug() << "Cache path: " << cache_path_; } }