/** * find all edges originating from one node and create edge structures in the * graph so that the DFS can find a legal ordering */ static void probe_edges(int i, struct copy_graph_node ** search_array, int count) { /* binary search */ int position = count/2; int next_jump = count/4; OFF_T source = search_array[i]->u.copy->block*BLOCK_SIZE; if(search_array[i]->u.copy->target > source) { broken_copies++; } while(0<= position && position < count && next_jump!=0) { if(verbose>4) { rprintf(FINFO, "Binary Search pos=%d jump=%d\n",position,next_jump); } if(search_array[position]->u.copy->target< source) { /* go right */ position += next_jump; next_jump /= 2; } else if(search_array[position]->u.copy->target > source) { /* go left*/ position -= next_jump; next_jump /= 2; } else { break; } } if(verbose>4) { rprintf(FINFO, "binary search done pos=%d\n",position); } /* either moved to right part of the array, or found an exact match for * source/target. Move iterator k to the left until we find no conflict */ while(position>=0 && (edge_weight(search_array[i],search_array[position]) || position==i)) { position--; } position++; while(position<count && (edge_weight(search_array[i],search_array[position]) || position==i)) { if(position!=i) { struct copy_graph_edge * edge; edge = (struct copy_graph_edge *)malloc(sizeof(struct copy_graph_edge)); update_extra_memory(sizeof(struct copy_graph_edge)); edge->dest = search_array[position]; edge->dest->references++; edge->next = search_array[i]->edge; search_array[i]->edge = edge; } position++; } }
void dijkstra(struct Graph *G, int s) { // Set configurations of all the nodes for (int i = 1; i <= G->V; ++i) { G->arr[i].color = WHITE; G->arr[i].weight = INFINITE; } // For the root G->arr[s].color = GRAY; G->arr[s].weight = 0; // Do the iteration int u, v; struct listNode* iter; for (u=1; u<=G->V; ++u) { iter = G->arr[u].next; while(iter!=NULL) { v = iter->node; if (G->arr[v].color!=BLACK) { // Dijkstra hackery if (G->arr[v].weight > G->arr[u].weight) { G->arr[v].weight = G->arr[u].weight + edge_weight(G, u, v); } } iter = iter->next; } G->arr[u].color = BLACK; } }
void Graph::remove_node(reg_t u) { auto& u_node = m_nodes.at(u); for (auto v : u_node.adjacent()) { auto& v_node = m_nodes.at(v); if (!v_node.is_active()) { continue; } v_node.m_weight -= edge_weight(v_node, u_node); } u_node.m_props.reset(Node::ACTIVE); }
unsigned int mst_total_weight(graph_t mst) { unsigned int result = 0, num_edges; edge_t *edges; num_edges = graph_edges_count(mst); edges = graph_edges(mst); for (unsigned int i = 0; i < num_edges; i++) { if (edge_is_primary(edges[i])) { result += edge_weight(edges[i]); } edges[i] = edge_destroy(edges[i]); } free(edges); return result; }
unsigned int mst_total_weight(graph_t mst) { /* Returns the sum of the weights of all the primary * edges of the given graph. */ unsigned int sum, m; edge_t *edges; edges = graph_edges(mst); sum = 0; m = graph_edges_count(mst); for (unsigned int i = 0; i < m; i++) { if (edge_is_primary(edges[i])) { sum += edge_weight(edges[i]); } edges[i] = edge_destroy(edges[i]); } free(edges); return (sum); }
float compute_goodcut(struct SNPfrags* snpfrag, char* hap, int* slist, struct BLOCK* component, struct fragment* Flist) { // given a haplotype 'hap' and a fragment matrix, find a cut with positive score int totaledges = 0, i = 0, j = 0, k = 0, l = 0, f = 0; int wf = 0; //if (drand48() < 0.5) wf=1; float W = 0; int N = component->phased; int iters_since_improved_cut = 0; /* CODE TO set up the read-haplotype consistency graph */ for (i = 0; i < N; i++) { snpfrag[slist[i]].tedges = 0; k = -1; // edges contain duplicates in sorted order, but tedges is unique count of edges for (j = 0; j < snpfrag[slist[i]].edges; j++) { if (k != snpfrag[slist[i]].elist[j].snp) { snpfrag[slist[i]].tedges++; k = snpfrag[slist[i]].elist[j].snp; } } } for (i = 0; i < N; i++) { snpfrag[slist[i]].tedges = 0; k = -1; for (j = 0; j < snpfrag[slist[i]].edges; j++) { if (k != snpfrag[slist[i]].elist[j].snp) { snpfrag[slist[i]].telist[snpfrag[slist[i]].tedges].snp = snpfrag[slist[i]].elist[j].snp; k = snpfrag[slist[i]].elist[j].snp; W = (float) edge_weight(hap, slist[i], k, snpfrag[slist[i]].elist[j].p, Flist, snpfrag[slist[i]].elist[j].frag); if (wf == 0) W /= Flist[snpfrag[slist[i]].elist[j].frag].calls - 1; snpfrag[slist[i]].telist[snpfrag[slist[i]].tedges].w = W; snpfrag[slist[i]].tedges++; totaledges++; } else if (k == snpfrag[slist[i]].elist[j].snp) { W = (float) edge_weight(hap, slist[i], k, snpfrag[slist[i]].elist[j].p, Flist, snpfrag[slist[i]].elist[j].frag); if (wf == 0) W /= Flist[snpfrag[slist[i]].elist[j].frag].calls - 1; snpfrag[slist[i]].telist[snpfrag[slist[i]].tedges - 1].w += W; } } } /* CODE TO find 'K' biggest edges in MEC graph, negative weight edges in graph */ int K = 5; int smallest = 0; float smallw = 1000; if (totaledges / 2 < K) K = totaledges / 2; EDGE* edgelist = (EDGE*) malloc(sizeof (EDGE) * K); j = 0; i = 0; k = 0; for (i = 0; i < N; i++) { for (j = 0; j < snpfrag[slist[i]].tedges; j++) { if (k < K) { edgelist[k].s = slist[i]; edgelist[k].t = snpfrag[slist[i]].telist[j].snp; edgelist[k].w = snpfrag[slist[i]].telist[j].w; if (edgelist[k].w < smallw) { smallest = k; smallw = edgelist[k].w; } k++; } else { if (snpfrag[slist[i]].telist[j].w > smallw) { edgelist[smallest].s = slist[i]; edgelist[smallest].t = snpfrag[slist[i]].telist[j].snp; edgelist[smallest].w = snpfrag[slist[i]].telist[j].w; smallw = 1000; for (l = 0; l < K; l++) { if (edgelist[l].w < smallw) { smallest = l; smallw = edgelist[l].w; } } } } } } /* CODE TO set up the read-haplotype consistency graph */ // edge contraction algorithm: merge vertices until only two nodes left or total edge weight of graph is negative int startnode = (int) (drand48() * N); if (startnode == N) startnode--; int secondnode = -1; // root of 2nd cluster initially not there // chose a positive edge to initialize the two clusters and run this algorithm $O(m)$ times for each block // a negative weight cut should have at least one negative edge or if there is no negative weight edge, the edge with lowest weight int V = N; float curr_cut = 0, best_cut = 10000; int snp_add; int c1 = 0, c2 = 0; char* bestmincut; // int size_small,best_small=0,secondlast=0,last=0; int iter = 0, maxiter = N / 10; if (N / 10 < 1) maxiter = 1; if (maxiter >= MAXCUT_ITER && MAXCUT_ITER >= 1) maxiter = MAXCUT_ITER; // added march 13 2013 int fixheap = 0; PHEAP pheap; pinitheap(&pheap, N); // heap for maxcut /*****************************Maintain two clusters and add each vertex to one of these two ******************/ bestmincut = (char*) malloc(N); for (i = 0; i < N; i++) bestmincut[i] = '0'; //for (iter=0;iter<totaledges*(int)(log2(totaledges));iter++) for (iter = 0; iter < maxiter + K; iter++) { pheap.length = N - 2; V = N - 2; if (iter < K) { startnode = edgelist[iter].s; secondnode = edgelist[iter].t; if (DEBUG) fprintf(stdout, " edge sel %d %d %f \n", startnode, secondnode, edgelist[iter].w); } else { if (drand48() < 0.5) { i = (int) (drand48() * totaledges - 0.0001); j = 0; while (i >= snpfrag[slist[j]].tedges) { i -= snpfrag[slist[j]].tedges; j++; } startnode = slist[j]; secondnode = snpfrag[slist[j]].telist[i].snp; if (snpfrag[slist[j]].telist[i].w >= 1) continue; } else { // find node with high MEC score, initialize as startnode j = (int) (drand48() * N); if (j >= N) j = N - 1; startnode = slist[j]; secondnode = -1; pheap.length = N - 1; V = N - 1; } } for (i = 0; i < N; i++) snpfrag[slist[i]].parent = slist[i]; // new code added for heap based calculation for (i = 0; i < N; i++) snpfrag[slist[i]].score = 0; j = 0; // heap only has N-2 elements (startnode and secondnode are not there) for (i = 0; i < N; i++) { if (slist[i] != startnode && slist[i] != secondnode) { pheap.elements[j] = i; snpfrag[slist[i]].heaploc = j; j++; } } // for (i=0;i<N;i++) fprintf(stdout,"heaploc %d %d %d-%d\n",slist[i],snpfrag[slist[i]].heaploc,startnode,secondnode); for (i = 0; i < component->frags; i++) { f = component->flist[i]; Flist[f].scores[0] = 0.0; Flist[f].scores[1] = 0.0; Flist[f].scores[2] = 0.0; Flist[f].scores[3] = 0.0; Flist[f].htscores[0] = 0.0; Flist[f].htscores[1] = 0.0; Flist[f].htscores[2] = 0.0; Flist[f].htscores[3] = 0.0; } init_fragment_scores(snpfrag, Flist, hap, startnode, secondnode); pbuildmaxheap(&pheap, snpfrag, slist); //V = N-2; while (V > 0) // more than two clusters, this loop is O(N^2) { snp_add = pheap.elements[0]; premovemax(&pheap, snpfrag, slist); fixheap = 0; //if (N < 30) fprintf(stdout,"standard best score %f snp %d %d V %d\n",snpfrag[slist[snp_add]].score,snp_add,slist[snp_add],V); if (snpfrag[slist[snp_add]].score > 0) snpfrag[slist[snp_add]].parent = startnode; else if (snpfrag[slist[snp_add]].score < 0) { if (secondnode < 0) { secondnode = slist[snp_add]; //fprintf_time(stderr,"secondnode found %d %f V %d N %d\n",secondnode,snpfrag[slist[snp_add]].score,V,N); } snpfrag[slist[snp_add]].parent = secondnode; } else if (secondnode < 0) secondnode = slist[snp_add]; else // score is 0 { if (drand48() < 0.5) snpfrag[slist[snp_add]].parent = startnode; else snpfrag[slist[snp_add]].parent = secondnode; } V--; update_fragment_scores(snpfrag, Flist, hap, startnode, secondnode, slist[snp_add], &pheap, slist); for (i = 0; i < N; i++) { if (DEBUG) fprintf(stdout, "score %d %f hap %c \n", slist[i], snpfrag[slist[i]].score, hap[slist[i]]); } if (DEBUG) fprintf(stdout, "init frag-scores %d...%d new node added %d parent %d\n\n", startnode, secondnode, slist[snp_add], snpfrag[slist[snp_add]].parent); if (fixheap == 1) pbuildmaxheap(&pheap, snpfrag, slist); } if (secondnode == -1) continue; // cut is empty, so we should ignore this cut // compute score of the cut computed above for (i = 0; i < N; i++) { if (snpfrag[slist[i]].parent == startnode) snpfrag[slist[i]].parent = 0; else snpfrag[slist[i]].parent = 1; } c1 = 0; c2 = 0; for (i = 0; i < N; i++) { if (snpfrag[slist[i]].parent == 0) c1++; else c2++; } if (c1 == 0 || c2 == 0) { if (DEBUG) fprintf(stdout, " cut size is 0 red \n"); exit(0); } curr_cut = cut_score(Flist, snpfrag, component, hap); // cut score returns difference between likelihood of current haplotype and new haplotype => smaller it is, better the cut if (DEBUG) fprintf(stdout, "cut size %d %d %f best %f\n", c1, c2, curr_cut, best_cut); // for new likelihood based cut score, the score of the cut should always be less than 0 since it is difference of the log-likelihoods of old and new haplotypes if (curr_cut < best_cut) // negative weight cut is better... { best_cut = curr_cut; for (i = 0; i < N; i++) { if (snpfrag[slist[i]].parent == 1) bestmincut[i] = '1'; else bestmincut[i] = '0'; } }else{ iters_since_improved_cut++; } if (iters_since_improved_cut > CONVERGE){ break; } } for (i = 0; i < N; i++) { if (bestmincut[i] == '1') slist[i] = -1 * slist[i] - 1; } free(bestmincut); free(pheap.elements); free(edgelist); return best_cut; }
/** * perform the topological sorting on the graph. Returned is the head to a * topologically sorted list of nodes */ static struct copy_graph_node * topo_sort( struct copy_graph_node ** node_array, int count, struct add_list * add_list) { int j; struct copy_graph_node * topo_head = NULL; struct copy_stack * copy_top = NULL; for(j=count-1; j>=0; j--) { struct copy_graph_node * bottom_node; bottom_node = node_array[j]; if(bottom_node->visited == FINISHED || bottom_node->visited == DELETED) { continue; } bottom_node->next = NULL; copy_top = (struct copy_stack * ) malloc(sizeof(struct copy_stack)); update_extra_memory(sizeof(struct copy_stack)); copy_top->below = NULL; copy_top->data = bottom_node; copy_top->data->visited = ON_STACK; while(copy_top != NULL) { struct copy_graph_node * current_node = copy_top->data; struct copy_graph_edge * current_edge = copy_top->data->edge; if(verbose >3) { rprintf(FINFO, "looking at node src=%d target=%.0f\n", copy_top->data->u.copy->block, (double)copy_top->data->u.copy->target); } if(!current_edge) { struct copy_stack * temp_copy_top; /* pop node off and put it on the front of the topo list*/ current_node->visited = FINISHED; /* put on topo list as finished (if not deleted)*/ // rprintf(FINFO,"Putting on to topo list: %08x\n",current_node); current_node->next = topo_head; topo_head = current_node; /* pop off stack */ temp_copy_top = copy_top->below; update_extra_memory(-sizeof(struct copy_stack)); free(copy_top); copy_top = temp_copy_top; if(verbose >4) { rprintf(FINFO, "finished node\n"); } } else { /* remove edge from list */ copy_top->data->edge = current_edge->next; current_edge->dest->references--; /* is its target already on stack? */ if(current_edge->dest->visited == ON_STACK) { /* resolve cycle */ stats.broken_cycles++; cycles_broken++; if(inplace==1) { current_edge->dest->visited = DELETED; buffer_add(add_list,current_edge->dest->u.copy->target, current_edge->dest->u.copy->length); if(verbose >3 ) { rprintf(FINFO,"Deleting copy src=%.0f len=%d\n", (double)current_edge->dest->u.copy->target, current_edge->dest->u.copy->length); } } else if(inplace==2) { struct copy_graph_node * best_src_node = current_node; struct copy_graph_node * best_dest_node = current_edge->dest; int min_edge_weight = edge_weight(best_src_node,best_dest_node); struct copy_stack * current_stack_node = copy_top; current_stack_node = copy_top; /* iterate down through stack and find best edge to trim*/ while(current_stack_node->data != current_node) { int temp_weight = edge_weight(current_stack_node->below->data, current_stack_node->data); if(temp_weight < min_edge_weight) { best_src_node = current_stack_node->below->data; best_dest_node = current_stack_node->data; min_edge_weight = temp_weight; } } /* restore stack to appropriate position */ current_stack_node = copy_top; do { struct copy_stack * temp_stack_node; /*rprintf(FINFO,"Current_stack_node: %.8x data:%.8x\n", current_stack_node, current_stack_node->data);*/ current_stack_node->data->visited = UNVISITED; if (current_stack_node->below && current_stack_node->below != best_src_node) { current_edge = (struct copy_graph_edge *) malloc(sizeof(struct copy_graph_edge)); current_edge->dest = current_stack_node->data; current_edge->next = current_stack_node->below->data->edge; current_stack_node->below->data->edge = current_edge; } temp_stack_node = current_stack_node; current_stack_node = current_stack_node->below; update_extra_memory(-sizeof(struct copy_stack)); free(temp_stack_node); } while(current_stack_node && current_stack_node->data != best_src_node); copy_top = current_stack_node; current_node = NULL; current_edge = NULL; if(copy_top) { current_node = copy_top->data; current_edge = copy_top->data->edge; } if(verbose > 3) { rprintf(FINFO,"Trimming copy " "weight=%d\n",edge_weight(best_src_node,best_dest_node)); } shrink_node(best_src_node,best_dest_node); if(edge_weight(best_src_node,best_dest_node)>0) { rprintf(FINFO, "dependency not fixed\n"); } } } else if(current_edge->dest->visited == DELETED) { } else if(current_edge->dest->visited != FINISHED && 0!=edge_weight(current_node, current_edge->dest)) { struct copy_stack * new_copy_top; /* push new node */ new_copy_top = (struct copy_stack *) malloc(sizeof(struct copy_stack)); update_extra_memory(sizeof(struct copy_stack)); new_copy_top->below = copy_top; new_copy_top->data = current_edge->dest; new_copy_top->data->visited = ON_STACK; copy_top = new_copy_top; } /* free edge */ if(current_edge) { update_extra_memory(-sizeof(struct copy_graph_edge)); free(current_edge); } } } } return topo_head; }