vector<int> Vectorizer::alignment_to_a_hot(Alignment a){ int64_t entity_size = my_xg->node_count + my_xg->edge_count; vector<int> ret(entity_size, 0); Path path = a.path(); for (int i = 0; i < path.mapping_size(); i++){ Mapping mapping = path.mapping(i); if(! mapping.has_position()){ continue; } Position pos = mapping.position(); int64_t node_id = pos.node_id(); int64_t key = my_xg->node_rank_as_entity(node_id); // Okay, solved the previous out of range errors: // We have to use an entity-space that is |nodes + edges + 1| // as nodes are indexed from 1, not from 0. //TODO: this means we may one day have to do the same bump up // by one for edges, as I assume they are also indexed starting at 1. //cerr << key << " - " << entity_size << endl; //Find edge by current / previous node ID // we can check the orientation, though it shouldn't **really** matter // whether we catch them in the forward or reverse direction. if (i > 0){ Mapping prev_mapping = path.mapping(i - 1); Position prev_pos = prev_mapping.position(); int64_t prev_node_id = prev_pos.node_id(); if (my_xg->has_edge(prev_node_id, false, node_id, false)){ int64_t edge_key = my_xg->edge_rank_as_entity(prev_node_id, false, node_id, false); vector<size_t> edge_paths = my_xg->paths_of_entity(edge_key); if (edge_paths.size() > 0){ ret[edge_key - 1] = 1; } else{ ret[edge_key - 1] = 2; } } } //Check if the node of interest is on a path vector<size_t> node_paths = my_xg->paths_of_node(node_id); if (node_paths.size() > 0){ ret[key - 1] = 2; } else{ ret[key - 1] = 1; } } return ret; }
vector<double> Vectorizer::alignment_to_identity_hot(Alignment a){ int64_t entity_size = my_xg->node_count + my_xg->edge_count; vector<double> ret(entity_size, 0.0); Path path = a.path(); for (int i = 0; i < path.mapping_size(); i ++){ Mapping mapping = path.mapping(i); if(! mapping.has_position()){ continue; } Position pos = mapping.position(); int64_t node_id = pos.node_id(); int64_t key = my_xg->node_rank_as_entity(node_id); //Calculate % identity by walking the edits and counting matches. double pct_id = 0.0; double match_len = 0.0; double total_len = 0.0; for (int j = 0; j < mapping.edit_size(); j++){ Edit e = mapping.edit(j); total_len += e.from_length(); if (e.from_length() == e.to_length() && e.sequence() == ""){ match_len += (double) e.to_length(); } else if (e.from_length() == e.to_length() && e.sequence() != ""){ // TODO if we map but don't match exactly, add half the average length to match_length //match_len += (double) (0.5 * ((double) e.to_length())); } else{ } } pct_id = (match_len == 0.0 && total_len == 0.0) ? 0.0 : (match_len / total_len); ret[key - 1] = pct_id; if (i > 0){ Mapping prev_mapping = path.mapping(i - 1); Position prev_pos = prev_mapping.position(); int64_t prev_node_id = prev_pos.node_id(); if (my_xg->has_edge(prev_node_id, false, node_id, false)){ int64_t edge_key = my_xg->edge_rank_as_entity(prev_node_id, false, node_id, false); ret[edge_key - 1] = 1.0; } } } return ret; }