/** Insert a value in to an array of line frequencies * * @param nu array of line frequencies * @param nu_insert value of nu key * @param number_of_lines number of lines in the line list * * @return index of the next line ot the red. If the key value is redder than the reddest line returns number_of_lines. */ tardis_error_t line_search (const double *nu, double nu_insert, int64_t number_of_lines, int64_t * result) { tardis_error_t ret_val = TARDIS_ERROR_OK; int64_t imin = 0; int64_t imax = number_of_lines - 1; if (nu_insert > nu[imin]) { *result = imin; } else if (nu_insert < nu[imax]) { *result = imax + 1; } else { ret_val = reverse_binary_search (nu, nu_insert, imin, imax, result); *result = *result + 1; } return ret_val; }
void find_duplication_distances(const bpp::Tree &tree, const vector<int> &breadth_first_visit_order, const map<int, AttributeT> attribute_of_node, const map<int, map<AttributeT, TreeDistanceInfo<ObjectT, nullObjectValue> *> *> distance_from_object_with_attribute_to_node, const LeftRightIdsOfNodeIdMap &idMap, const map<int, int> &nodeIdOfLeftIdMap, const map<ObjectT, int> &leaf_of_object, const map<int, ObjectT> &object_of_leaf, const map<ObjectT, map<ObjectT, set<int> *> *> &alternative_nearest_objects, map<ObjectT, map<ObjectT, DupInfo *> *> &duplication_node_of_objects, map<int, ObjObjInfo *> &pair_of_objects_yielding_duplication_node_distance, map<int, ObjObjInfo *> &pair_of_nearest_objects_to_maximal_node, map<int, double> &greatest_distance_of_maximal_descendant) { if (alternative_nearest_objects.size() == 0) { // There is nothing to do here but set the root int rootId = tree.getRootId(); greatest_distance_of_maximal_descendant[rootId] = -1; return; } // A maximal node is one for which, for some attribute, it is not the case // that the node, its parent, and all its siblings have the same nearest // object in the tree with that attribute. There must be at least two // distinct objects with the same attribute such that they are the nearest // object with that attribute to some of the node, its parent, and its // siblings. All such pairs of objects, and the maximal nodes to which they // pertain, have previously been recorded in alternative_nearest_objects. // For each pair of objects in alternative_nearest_objects, we will find // the most recent common ancestor (MRCA) of that pair of objects. That is // the duplication node for the pair of objects. We also will find the tree // distance between the two objects. If the tree obeyed a molecular clock, // then half this tree distance would tell us how long ago the duplication // happened, and each of the two objects would be exactly this distance from // the duplication node. Because the tree does not obey a molecular clock, // for any particular duplication node, the distances we get this way from // different pairs of duplicated objects that descend from it may be // different. For each duplication node, we will record the maximum of all // such distances for any pair of duplicated objects descending from that // node, and which pair of objects yielded this maximum. Call this maximum // duplication_node_distance. // For each maximal node, it is maximal due to several possible pairs of // alternative nearest objects. It may not always be the case that for each // pair of alternative nearest objects causing a particular node to be // maximal, one or both members of the pair will be descendants of the maximal // node. The duplication node between them also may not be a descendant of // the maximal node. The duplication distance of a maximal node will be the // maximum duplication_node_distance for all duplication nodes of pairs of // alternative nearest objects to the maximal node. We will find the // duplication distance for each maximal node, and record which pair of // objects has the duplication_node_distance yielding that duplication // distance. Note that this pair of objects may not be the same as the one // yielding the duplication_node_distance of the duplication node. // We find the distances from each of the objects that cause nodes to be // maximal to each of their ancestors. map<int, map<ObjectT, double> *> distances_to_maximizing_objects; find_distances_to_maximizing_objects(tree, breadth_first_visit_order, alternative_nearest_objects, leaf_of_object, distances_to_maximizing_objects); // The left_ids and right_ids of the nodes in this tree have previously been // computed and passed in idMap (which has idMap[nodeId] == pair<left_id, // right_id>). These numbers, which were computed by a modified preorder tree // traversal, have the useful properties that: // node N1 descends from ndoe N2 if and only if (iff): // left_id(N1) >= left_id(N2) and right_id(N1) <= right_id(N2) // node N is an ancestor of nodes N1, ..., Nk iff: // left_id(Ni) >= left_id(N) for all i = 1,...,k and // right_id(Ni) <= right_id(N) for all i = 1,...,k // node N is the most recent common ancestor of nodes N1, ..., Nk iff // node N is an ancestor of nodes N1, ..., Nk and // any of the following equivalent conditions holds: // (i) node N has the greatest left_id among ancestors of nodes N1,...,Nk // (ii) node N has the least right_id among ancestors of nodes N1,...,Nk // (iii) node N appears last in the breadth_first_search_order among // ancestors of nodes N1,...,Nk // We'll be using (iii). // We'll be looking up nodes by either their left_id or their right_id. The // nodeIdOfLeftIdMap was passed in, but we now create a nodeIdOfRighgtIdMap as // well, using the idMap (which has idMap[nodeId] == pair<left_id, right_id>). map<int, int> nodeIdOfRightIdMap; map<int, pair<int, int> >::const_iterator node_id_iter; for (node_id_iter = idMap.begin(); node_id_iter != idMap.end(); ++node_id_iter) { nodeIdOfRightIdMap[node_id_iter->second.second] = node_id_iter->first; } // We will be finding the tree distances between pairs of // alternative_nearest_objects. To make it easy to test the condition that // N is an ancestor of N1 and N2, i.e., that // left_id(N1) >= left_id(N) and left_id(N2) >= left_id(N) and // right_id(N1) <= right_id(N) and right_id(N2) <= right_id(N), // we will keep track of each pair of leaves N1 and N2 as // left_id = min(left_id(N1), left_id(N2)) and // right_id = max(right_id(N1), right_id(N2)). map<int, vector<int> *> other_leaf_right_ids_of_leaf_left_ids; map<int, vector<int> *>::const_iterator right_left_iter; int leftId0, leftId1, rightId0, rightId1, leftId, rightId; map<ObjectT, set<int> *>::const_iterator obj_nodes_iter; map<ObjectT, map<ObjectT, set<int> *> *>::const_iterator obj_objs_nodes_iter; int obj0, obj1, lesserObj, greaterObj; map<ObjectT, int>::const_iterator obj_leaf_iter; map<int, pair<int,int> >::const_iterator node_left_right_iter; map<ObjectT, map<ObjectT, DupInfo *> *>::iterator obj_obj_dup_iter; for (obj_objs_nodes_iter = alternative_nearest_objects.begin(); obj_objs_nodes_iter != alternative_nearest_objects.end(); ++obj_objs_nodes_iter) { obj0 = obj_objs_nodes_iter->first; obj_leaf_iter = leaf_of_object.find(obj0); node_left_right_iter = idMap.find(obj_leaf_iter->second); leftId0 = node_left_right_iter->second.first; rightId0 = node_left_right_iter->second.second; for (obj_nodes_iter = obj_objs_nodes_iter->second->begin(); obj_nodes_iter != obj_objs_nodes_iter->second->end(); ++obj_nodes_iter) { obj1 = obj_nodes_iter->first; lesserObj = (obj0 < obj1) ? obj0 : obj1; greaterObj = (obj0 < obj1) ? obj1 : obj0; obj_obj_dup_iter = duplication_node_of_objects.find(lesserObj); if (obj_obj_dup_iter == duplication_node_of_objects.end()) { duplication_node_of_objects[lesserObj] = new map<ObjectT, DupInfo *>; } obj_leaf_iter = leaf_of_object.find(obj1); node_left_right_iter = idMap.find(obj_leaf_iter->second); leftId1 = node_left_right_iter->second.first; rightId1 = node_left_right_iter->second.second; leftId = (leftId0 < leftId1) ? leftId0 : leftId1; rightId = (rightId0 > rightId1) ? rightId0 : rightId1; right_left_iter = other_leaf_right_ids_of_leaf_left_ids.find(leftId); if (right_left_iter == other_leaf_right_ids_of_leaf_left_ids.end()) { other_leaf_right_ids_of_leaf_left_ids[leftId] = new vector<int>; } other_leaf_right_ids_of_leaf_left_ids[leftId]->push_back(rightId); } } // We sort the left_ids and right_ids. This way when we are checking whether // a node is an ancestor of some pair, it will be easy for us to only check // those pairs of which it could possibly be an ancestor. vector<int> sorted_left_ids_of_leaves; for (right_left_iter = other_leaf_right_ids_of_leaf_left_ids.begin(); right_left_iter != other_leaf_right_ids_of_leaf_left_ids.end(); ++right_left_iter) { sorted_left_ids_of_leaves.push_back(right_left_iter->first); std::sort(right_left_iter->second->begin(), right_left_iter->second->end()); std::reverse(right_left_iter->second->begin(), right_left_iter->second->end()); } std::sort(sorted_left_ids_of_leaves.begin(), sorted_left_ids_of_leaves.end()); int least_left_id_greater_than_node_left_id, greatest_right_id_less_than_node_right_id; vector<int>::iterator j, k; int r; AttributeT attr; int nodeId, childId; double distanceToLesserObj, distanceToGreaterObj, duplication_node_distance, new_duplication_node_distance; map<int, ObjObjInfo *>::iterator node_obj_pair_iter; map<int, int>::const_iterator int_node_iter; map<int, AttributeT>::const_iterator node_attr_iter; map<int, ObjectT>::const_iterator leaf_obj_iter; map<ObjectT, int>::const_iterator ancestral_iter; map<int, map<AttributeT, TreeDistanceInfo<ObjectT, nullObjectValue> *> *>::const_iterator dist_attr_node_iter; // Now we go up the tree computing the distances between each pair of // alternative nearest objects, and all the duplication_node_distances. for (int i = breadth_first_visit_order.size() - 1; i >= 0; --i) { nodeId = breadth_first_visit_order[i]; const vector<int> &children = tree.getSonsId(nodeId); duplication_node_distance = -1.0; for (int c = 0; c < children.size(); ++c) { childId = children[c]; node_obj_pair_iter = pair_of_objects_yielding_duplication_node_distance.find(childId); if (node_obj_pair_iter != pair_of_objects_yielding_duplication_node_distance.end()) { new_duplication_node_distance = getDuplicationNodeDistance(childId, duplication_node_of_objects, pair_of_objects_yielding_duplication_node_distance); if (duplication_node_distance < 0.0) { pair_of_objects_yielding_duplication_node_distance[nodeId] = new ObjObjInfo(); pair_of_objects_yielding_duplication_node_distance[ nodeId]->setValues( pair_of_objects_yielding_duplication_node_distance[ childId]->getLesserObj(), pair_of_objects_yielding_duplication_node_distance[ childId]->getGreaterObj()); duplication_node_distance = new_duplication_node_distance; } else { if (new_duplication_node_distance > duplication_node_distance) { pair_of_objects_yielding_duplication_node_distance[ nodeId]->setValues( pair_of_objects_yielding_duplication_node_distance[ childId]->getLesserObj(), pair_of_objects_yielding_duplication_node_distance[ childId]->getGreaterObj()); duplication_node_distance = new_duplication_node_distance; } } } } node_left_right_iter = idMap.find(nodeId); leftId = node_left_right_iter->second.first; rightId = node_left_right_iter->second.second; // If the binary_search returns -1, this means leftId is before the // beginning of sorted_left_ids_of_leaves. So it will be correct that j = // sorted_left_ids_of_leaves.begin() j = sorted_left_ids_of_leaves.begin() + 1 + binary_search(leftId, sorted_left_ids_of_leaves); least_left_id_greater_than_node_left_id = *j; // least_left_id_greater_than_node_left_id will be increasing as we go // through this loop; we stop when it goes past rightId. while (j != sorted_left_ids_of_leaves.end() && least_left_id_greater_than_node_left_id < rightId) { int_node_iter = nodeIdOfLeftIdMap.find(least_left_id_greater_than_node_left_id); node_attr_iter = attribute_of_node.find(int_node_iter->second); leaf_obj_iter = object_of_leaf.find(int_node_iter->second); obj0 = leaf_obj_iter->second; // If the binary_search returns -1, this means rightId is after the end of // the right_ids that are paired with // least_left_id_greater_than_node_left_id (since they are sorted in // reverse order). So it will be correct that k = // other_leaf_right_ids_of_leaf_left_ids[least_left_id_greater_than_node_left_id]->begin(). // beginning of the right_ids that are paired with // least_left_id_greater_than_node_left_id. Hence none of them can be // descendant from this node, and we should continue. k = other_leaf_right_ids_of_leaf_left_ids[ least_left_id_greater_than_node_left_id]->begin() + 1 + reverse_binary_search(rightId, *(other_leaf_right_ids_of_leaf_left_ids[ least_left_id_greater_than_node_left_id])); greatest_right_id_less_than_node_right_id = *k; // greatest_right_id_less_than_node_right_id will be decreasing as we go // through this loop; we stop when it goes past leftId. while (k != other_leaf_right_ids_of_leaf_left_ids[ least_left_id_greater_than_node_left_id]->end() && greatest_right_id_less_than_node_right_id > leftId) { int_node_iter = nodeIdOfRightIdMap.find( greatest_right_id_less_than_node_right_id); leaf_obj_iter = object_of_leaf.find(int_node_iter->second); obj1 = leaf_obj_iter->second; // This node is the duplication node of obj0, obj1 lesserObj = (obj0 < obj1) ? obj0 : obj1; greaterObj = (obj0 < obj1) ? obj1 : obj0; distanceToLesserObj = (*distances_to_maximizing_objects[nodeId])[lesserObj]; distanceToGreaterObj = (*distances_to_maximizing_objects[nodeId])[greaterObj]; (*duplication_node_of_objects[lesserObj])[greaterObj] = new DupInfo(nodeId, distanceToLesserObj, distanceToGreaterObj); new_duplication_node_distance = (distanceToLesserObj + distanceToGreaterObj) / 2; if (duplication_node_distance < 0.0) { pair_of_objects_yielding_duplication_node_distance[nodeId] = new ObjObjInfo(); pair_of_objects_yielding_duplication_node_distance[ nodeId]->setValues(lesserObj, greaterObj); duplication_node_distance = new_duplication_node_distance; } else { if (new_duplication_node_distance > duplication_node_distance) { pair_of_objects_yielding_duplication_node_distance[ nodeId]->setValues(lesserObj, greaterObj); duplication_node_distance = new_duplication_node_distance; } } // Since we've found the MRCA of obj0, obj1, we don't need to check // for this pair any more. In fact, we had better not, otherwise we // would erroneously keep setting the duplication node to other // ancestors closer to the root of the tree. So we must erase // right_id here. k = other_leaf_right_ids_of_leaf_left_ids[ least_left_id_greater_than_node_left_id]->erase(k); if (k != other_leaf_right_ids_of_leaf_left_ids[ least_left_id_greater_than_node_left_id]->end()) { // Now k points to the item that originally came *after* // greatest_right_id_less_than_node_right_id in the vector, which // means that it is *less* than // greatest_right_id_less_than_node_right_id (since the vector is in // reverse order), and hence still less than rightId. greatest_right_id_less_than_node_right_id = *k; } } if (other_leaf_right_ids_of_leaf_left_ids[ least_left_id_greater_than_node_left_id]->size() == 0) { // Since we've found the duplication nodes of all pairs with obj0, we // don't need to check it any more. So we can erase the left_id. delete other_leaf_right_ids_of_leaf_left_ids[ least_left_id_greater_than_node_left_id]; other_leaf_right_ids_of_leaf_left_ids.erase( least_left_id_greater_than_node_left_id); j = sorted_left_ids_of_leaves.erase(j); } else { ++j; } // Now j points to the item that originall came after // least_left_id_greater_than_node_left_id in sorted_left_ids_of_leaves, // which means that it greater than // least_left_id_greater_than_node_left_id, and hence also than leftId. if (j != sorted_left_ids_of_leaves.end()) { least_left_id_greater_than_node_left_id = *j; } } } if (other_leaf_right_ids_of_leaf_left_ids.size() > 0) { cerr << "Warning: other_leaf_right_ids_of_leaf_left_ids ended nonempty" << endl; } // Now we go through alternative_nearest_objects computing the duplication // distances of the maximal nodes. set<int>::iterator maximal_node_iter; int duplicationNodeId; map<int, ObjObjInfo *>::iterator obj_obj_max_node_iter; for (obj_objs_nodes_iter = alternative_nearest_objects.begin(); obj_objs_nodes_iter != alternative_nearest_objects.end(); ++obj_objs_nodes_iter) { obj0 = obj_objs_nodes_iter->first; for (obj_nodes_iter = obj_objs_nodes_iter->second->begin(); obj_nodes_iter != obj_objs_nodes_iter->second->end(); ++obj_nodes_iter) { obj1 = obj_nodes_iter->first; lesserObj = (obj0 < obj1) ? obj0 : obj1; greaterObj = (obj0 < obj1) ? obj1 : obj0; duplicationNodeId = (*duplication_node_of_objects[lesserObj]) [greaterObj]->getDuplicationNodeId(); for (maximal_node_iter = obj_nodes_iter->second->begin(); maximal_node_iter != obj_nodes_iter->second->end(); ++maximal_node_iter) { obj_obj_max_node_iter = pair_of_nearest_objects_to_maximal_node.find( *maximal_node_iter); if (obj_obj_max_node_iter == pair_of_nearest_objects_to_maximal_node.end()) { pair_of_nearest_objects_to_maximal_node[*maximal_node_iter] = new ObjObjInfo(); pair_of_nearest_objects_to_maximal_node[ *maximal_node_iter]->setValues(lesserObj, greaterObj); } else { if (getDuplicationNodeDistance(duplicationNodeId, duplication_node_of_objects, pair_of_objects_yielding_duplication_node_distance) > getMaximalNodeDistance(*maximal_node_iter, duplication_node_of_objects, pair_of_objects_yielding_duplication_node_distance, pair_of_nearest_objects_to_maximal_node)) { pair_of_nearest_objects_to_maximal_node[ *maximal_node_iter]->setValues(lesserObj, greaterObj); } } } } } // Now we go up the tree finding, for each maximal node, the greatest // duplication distance of any of its strict descendants (i.e., not including // itself) which is also a maximal node. We store the pair of the maximal // node's own duplication distance and this for each of the maximal nodes, and // for the root. map<int, double> all_node_greatest_distance_of_maximal_descendant; map<int, double>::const_iterator node_iter, parent_iter; int parentId; double currentDistance; for (int i = breadth_first_visit_order.size() - 1; i >= 1; --i) { nodeId = breadth_first_visit_order[i]; node_iter = all_node_greatest_distance_of_maximal_descendant.find(nodeId); parentId = tree.getFatherId(nodeId); parent_iter = all_node_greatest_distance_of_maximal_descendant.find(parentId); if (node_iter != all_node_greatest_distance_of_maximal_descendant.end()) { if (parent_iter == all_node_greatest_distance_of_maximal_descendant.end()) { all_node_greatest_distance_of_maximal_descendant[parentId] = node_iter->second; } else { if (node_iter->second > parent_iter->second) { all_node_greatest_distance_of_maximal_descendant[parentId] = node_iter->second; } } } obj_obj_max_node_iter = pair_of_nearest_objects_to_maximal_node.find(nodeId); if (obj_obj_max_node_iter != pair_of_nearest_objects_to_maximal_node.end()) { if (node_iter == all_node_greatest_distance_of_maximal_descendant.end()) { greatest_distance_of_maximal_descendant[nodeId] = -1.0; } else { greatest_distance_of_maximal_descendant[nodeId] = node_iter->second; } currentDistance = getMaximalNodeDistance(nodeId, duplication_node_of_objects, pair_of_objects_yielding_duplication_node_distance, pair_of_nearest_objects_to_maximal_node); if (parent_iter == all_node_greatest_distance_of_maximal_descendant.end()) { all_node_greatest_distance_of_maximal_descendant[parentId] = currentDistance; } else { if (currentDistance > parent_iter->second) { all_node_greatest_distance_of_maximal_descendant[parentId] = currentDistance; } } } } int rootId = tree.getRootId(); obj_obj_max_node_iter = pair_of_nearest_objects_to_maximal_node.find(rootId); node_iter = all_node_greatest_distance_of_maximal_descendant.find(rootId); if (node_iter == all_node_greatest_distance_of_maximal_descendant.end()) { greatest_distance_of_maximal_descendant[rootId] = -1.0; } else { greatest_distance_of_maximal_descendant[rootId] = node_iter->second; } map<int, map<ObjectT, double> *>::iterator node_obj_dist_iter; for (node_obj_dist_iter = distances_to_maximizing_objects.begin(); node_obj_dist_iter != distances_to_maximizing_objects.end(); ++node_obj_dist_iter) { delete node_obj_dist_iter->second; } }