void sortedge() { unsigned int index ; EDGE * sort_edge , * backup_edge ; sort_edge = ( EDGE * ) ckalloc ( sizeof ( EDGE ) * ( num_ed + 1 ) ); backup_edge = ( EDGE * ) ckalloc ( sizeof ( EDGE ) * ( num_ed + 1 ) ); unsigned int i = 1; for ( index = 1 ; index <= num_ed ; index ++ ) { sort_edge[i].from_vt = edge_array[index].from_vt; sort_edge[i].seq = edge_array[index].seq; sort_edge[i].to_vt = index; // record old id sort_edge[i].length = edge_array[index].length; i++; copyOneEdge ( & ( backup_edge[index] ) , & ( edge_array[index] ) ); if ( !EdSameAsTwin ( index ) ) { index++; copyOneEdge ( & ( backup_edge[index] ) , & ( edge_array[index] ) ); } } qsort ( & ( sort_edge[1] ), i - 1, sizeof ( sort_edge[1] ), cmp_seq ); index_array = ( unsigned int * ) ckalloc ( sizeof ( unsigned int ) * ( num_ed + 1 ) ); // used to record new id unsigned int new_index = 1, old_index; for ( index = 1; index <= i - 1; index++ ) { old_index = sort_edge[index].to_vt; // old id sort_edge[index].seq = NULL; index_array[old_index] = new_index++;// old id -> new id if ( !EdSameAsTwin ( old_index ) ) { index_array[old_index + 1] = new_index++; // old id -> new id } } for ( index = 1; index <= num_ed; index++ ) { new_index = index_array[index]; copyOneEdge ( & ( edge_array[new_index] ), & ( backup_edge[index] ) ); updateArcToEd ( new_index ); } free ( index_array ); free ( sort_edge ); free ( backup_edge ); };
static void delete1contig(unsigned int edgeid) { edge_array[edgeid].cvg=0; edge_array[edgeid].deleted=1; edge_array[edgeid].length=0; ARC *arc=edge_array[edgeid].arcs; while(arc) { arc->multiplicity=0; arc->bal_arc->multiplicity=0; arc=arc->next; } if(EdSameAsTwin(edgeid)) return; edge_array[getTwinEdge(edgeid)].cvg=0; edge_array[getTwinEdge(edgeid)].deleted=1; edge_array[getTwinEdge(edgeid)].length=0; arc = edge_array[getTwinEdge(edgeid)].arcs; while(arc) { arc->multiplicity=0; arc->bal_arc->multiplicity=0; arc=arc->next; } }
void deleteShortContig(int cutLength) { unsigned int index; if(pool== NULL) pool= (int*)ckalloc (sizeof(int)*(num_ed+1)); int * poolid_length=(int*)ckalloc(sizeof(int)*(num_ed+1)); for(index=0; index<=num_ed; index++) { pool[index]=0; poolid_length[index]=0; } int poolid_index=1; COV_LIST * cov = (COV_LIST * ) ckalloc (sizeof(COV_LIST)*(num_ed+1)); for(index=1; index<=num_ed; index++) { cov[index].contig=index; cov[index].cov=edge_array[index].cvg; } qsort(&cov[1], num_ed, sizeof(COV_LIST), cmp_cov); for(index=1; index<=num_ed; index++) { poolid_length[poolid_index]=extern_contig(cov[index].contig,poolid_index); if(poolid_length[poolid_index]!=0) poolid_index++; } int num_delelte=0; for(index=1; index<=num_ed; index++) { if(poolid_length[pool[index]]<cutLength) { delete1contig(index); num_delelte++; } if(!EdSameAsTwin(index)) index++; } free(poolid_length); free(pool); free(cov); printf("%d short contig(<%d) removed \n",num_delelte,cutLength); removeArc(); }
void deleteWeakEdge(unsigned short cutoff) { if(cutoff > 30) cutoff=30; printf("Start to remove the low coverage edge < %d\n",cutoff/10); unsigned int index; int total=0; for(index=1; index<=num_ed; index++) { if(edge_array[index].cvg < cutoff) { delete1contig(index); total++; } if(!EdSameAsTwin(index)) index++; } printf("%d edges removed\n\n",total); removeArc(); }
//concatenate two edges if they are linearly linked void linearConcatenate () { unsigned int i; int conc_c = 1; int counter; unsigned int from_ed, to_ed, bal_ed; ARC *parc, *parc2; unsigned int bal_fe; //debugging(30514); while (conc_c) { conc_c = 0; counter = 0; for (i = 1; i <= num_ed; i++) //num_ed { if (edge_array[i].deleted || EdSameAsTwin (i)) { continue; } if (edge_array[i].length > 0) { counter++; } parc = edge_array[i].arcs; if (!parc || parc->next) { continue; } to_ed = parc->to_ed; bal_ed = getTwinEdge (to_ed); parc2 = edge_array[bal_ed].arcs; if (bal_ed == to_ed || !parc2 || parc2->next) { continue; } from_ed = i; if (from_ed == to_ed || from_ed == bal_ed) { continue; } //linear connection found conc_c++; linearUpdateConnection (from_ed, to_ed, 0); allpathUpdateEdge (from_ed, to_ed, 0); bal_fe = getTwinEdge (from_ed); linearUpdateConnection (bal_ed, bal_fe, 1); allpathUpdateEdge (bal_ed, bal_fe, 1); /* if(from_ed==6589||to_ed==6589) printf("%d <- %d (%d)\n",from_ed,to_ed,i); if(bal_fe==6589||bal_ed==6589) printf("%d <- %d (%d)\n",bal_fe,bal_ed,i); */ } printf ("a linear concatenation lap, %d concatenated\n", conc_c); } printf ("%d edges in graph\n", counter); }
/* - - > - < - - */ unsigned int solvable ( unsigned int edgeno ) { if ( EdSameAsTwin ( edgeno ) || edge_array[edgeno].multi == 255 ) { return 0; } unsigned int bal_ed = getTwinEdge ( edgeno ); unsigned int arcRight_n, arcLeft_n; unsigned int counter; unsigned int i, j; unsigned int branch, bal_branch; ARC * parcL, *parcR; parcL = arcCounts ( bal_ed, &arcLeft_n ); if ( arcLeft_n < 2 ) { return 0; } parcR = arcCounts ( edgeno, &arcRight_n ); if ( arcLeft_n != arcRight_n ) { return 0; } // check each right branch only has one upsteam connection /* if(edgeno==2551){ for(i=0;i<arcLeft_n;i++) printf("%d,",lefts[i]); printf("__left to %d\n",edgeno); for(j=0;j<arcRight_n;j++) printf("%d,",rights[j]); printf("__right to %d\n",edgeno); } */ arcRight_n = 0; while ( parcR ) { if ( parcR->to_ed == 0 ) { parcR = parcR->next; continue; } branch = parcR->to_ed; if ( EdSameAsTwin ( branch ) || edge_array[branch].multi == 255 ) { return 0; } rights[arcRight_n++] = branch; bal_branch = getTwinEdge ( branch ); arcCounts ( bal_branch, &counter ); if ( counter != 1 ) { return 0; } parcR = parcR->next; } // check if each left branch only has one downsteam connection arcLeft_n = 0; while ( parcL ) { if ( parcL->to_ed == 0 ) { parcL = parcL->next; continue; } branch = parcL->to_ed; if ( EdSameAsTwin ( branch ) || edge_array[branch].multi == 255 ) { return 0; } bal_branch = getTwinEdge ( branch ); lefts[arcLeft_n++] = bal_branch; arcCounts ( bal_branch, &counter ); if ( counter != 1 ) { return 0; } parcL = parcL->next; } //check if reads indicate one to one connection between upsteam and downstream edges for ( i = 0; i < arcLeft_n; i++ ) { counter = 0; for ( j = 0; j < arcRight_n; j++ ) { gothrough[i][j] = cntByReads ( lefts[i], edgeno, rights[j] ) == 0 ? 0 : 1; counter += gothrough[i][j]; if ( counter > 1 ) { return 0; } } if ( counter != 1 ) { return 0; } } for ( j = 0; j < arcRight_n; j++ ) { counter = 0; for ( i = 0; i < arcLeft_n; i++ ) { counter += gothrough[i][j]; } if ( counter != 1 ) { return 0; } } return arcLeft_n; }
void swapedge() { unsigned int i; ARC * arc, *bal_arc, *temp_arc; int count_swap = 0, count_equal = 0; for ( i = 1; i <= num_ed; ++i ) { if ( edge_array[i].deleted || EdSameAsTwin ( i ) ) { continue; } if ( EdSmallerThanTwin ( i ) ) { if ( KmerLarger ( vt_array[edge_array[i].from_vt].kmer, vt_array[edge_array[i + 1].from_vt].kmer ) ) { count_swap++; copyEdge ( i, num_ed + 1 + 1 ); copyEdge ( i + 1, num_ed + 1 ); copyEdge ( num_ed + 1, i ); copyEdge ( num_ed + 1 + 1, i + 1 ); edge_array[i].bal_edge = 2; edge_array[i + 1].bal_edge = 0; //take care of the arcs arc = edge_array[i].arcs; while ( arc ) { arc->bal_arc->to_ed = i + 1; arc = arc->next; } arc = edge_array[i + 1].arcs; while ( arc ) { arc->bal_arc->to_ed = i; arc = arc->next; } } else if ( KmerEqual ( vt_array[edge_array[i].from_vt].kmer, vt_array[edge_array[i + 1].from_vt].kmer ) ) { int temp = EdgeEqual ( i, i + 1 ); if ( temp == 0 ) { count_equal++; edge_array[i].bal_edge = 1; delete1Edge ( i + 1 ); //take care of the arcs arc = edge_array[i].arcs; while ( arc ) { arc->bal_arc->to_ed = i; arc = arc->next; } bal_arc = edge_array[i + 1].arcs; edge_array[i + 1].arcs = NULL; while ( bal_arc ) { temp_arc = bal_arc; bal_arc = bal_arc->next; if ( edge_array[i].arcs ) { edge_array[i].arcs->prev = temp_arc; } temp_arc->next = edge_array[i].arcs; edge_array[i].arcs = temp_arc; } } else if ( temp > 0 ) { count_swap++; copyEdge ( i, num_ed + 1 + 1 ); copyEdge ( i + 1, num_ed + 1 ); copyEdge ( num_ed + 1, i ); copyEdge ( num_ed + 1 + 1, i + 1 ); edge_array[i].bal_edge = 2; edge_array[i + 1].bal_edge = 0; //take care of the arcs arc = edge_array[i].arcs; while ( arc ) { arc->bal_arc->to_ed = i + 1; arc = arc->next; } arc = edge_array[i + 1].arcs; while ( arc ) { arc->bal_arc->to_ed = i; arc = arc->next; } } } ++i; } else { delete1Edge ( i ); printf( "Warning : Front edge %d is larger than %d.\n", i, i + 1 ); } } printf( "%d none-palindrome edge(s) swapped, %d palindrome edge(s) processed.\n", count_swap, count_equal ); };
int deleteLightContig() { double prev_cov,next_cov,max,min,curr_cov; unsigned int index; int change=0; ARC * arc,*arc_temp; for(index=1; index<=num_ed; index++) { if(EdSameAsTwin(index)) continue; computeNextCov(index,&next_cov); computeNextCov(getTwinEdge(index),&prev_cov); if(next_cov ==0 || prev_cov ==0) continue; if(next_cov > prev_cov) { max=next_cov; min=prev_cov; } else { max=prev_cov; min=next_cov; } curr_cov = (double)edge_array[index].cvg; printf("contig_cov:\t%0.1f\t%0.1f\t%0.1f\n",curr_cov,max,min); if(min / max <0.1) { if(curr_cov /min < 0.5) { delete1contig(index); } } else { if(curr_cov / max <0.05) { delete1contig(index); } } index++; } /* for(index=1;index<=num_ed;index++) { arc=edge_array[index].arcs; while(arc) { arc_temp=arc; arc=arc->next; if(arc_temp->multiplicity == 0) { if(arc_temp->prev) { arc_temp->prev->next=arc_temp->next; } else { edge_array[index].arcs=arc_temp->next; } if(arc_temp->next) { arc_temp->next->prev = arc_temp->prev; } dismissArc (arc_temp); change++; } } }*/ change = removeArc(); return change>0?1:0;; }
void removeLowCovEdges (int lenCutoff, unsigned short covCutoff) { unsigned int bal_ed; unsigned int arcRight_n, arcLeft_n; ARC *arcLeft, *arcRight; unsigned int i; int counter = 0; for (i = 1; i <= num_ed; i++) { if (edge_array[i].deleted || edge_array[i].cvg == 0 || edge_array[i].cvg > covCutoff * 10 || edge_array[i].length >= lenCutoff || EdSameAsTwin (i) || edge_array[i].length == 0) { continue; } bal_ed = getTwinEdge (i); arcRight = arcCount (i, &arcRight_n); arcLeft = arcCount (bal_ed, &arcLeft_n); if (arcLeft_n < 1 || arcRight_n < 1) { continue; } destroyEdge (i); counter++; } printf ("Remove low coverage(%d): %d inner edges destroyed\n", covCutoff, counter); removeDeadArcs (); linearConcatenate (); compactEdgeArray (); }
void removeWeakEdges (int lenCutoff, unsigned int multiCutoff) { unsigned int bal_ed; unsigned int arcRight_n, arcLeft_n; ARC *arcLeft, *arcRight; unsigned int i; int counter = 0; for (i = 1; i <= num_ed; i++) { if (edge_array[i].deleted || edge_array[i].length == 0 || edge_array[i].length > lenCutoff || EdSameAsTwin (i)) { continue; } bal_ed = getTwinEdge (i); arcRight = arcCount (i, &arcRight_n); if (arcRight_n > 1 || !arcRight || arcRight->multiplicity > multiCutoff) { continue; } arcLeft = arcCount (bal_ed, &arcLeft_n); if (arcLeft_n > 1 || !arcLeft || arcLeft->multiplicity > multiCutoff) { continue; } destroyEdge (i); counter++; } printf ("%d weak inner edges destroyed\n", counter); removeDeadArcs (); /* linearConcatenate(); compactEdgeArray(); */ }
void resetCov() { unsigned int index; if(pool== NULL) pool= (int*)ckalloc (sizeof(int)*(num_ed+1)); int * poolid_length=(int*)ckalloc(sizeof(int)*(num_ed+1)); for(index=0; index<=num_ed; index++) { pool[index]=0; poolid_length[index]=0; } int poolid_index=1; COV_LIST * cov = (COV_LIST * ) ckalloc (sizeof(COV_LIST)*(num_ed+1)); for(index=1; index<=num_ed; index++) { cov[index].contig=index; cov[index].cov=edge_array[index].cvg; } qsort(&cov[1], num_ed, sizeof(COV_LIST), cmp_cov); /* for(index=1;index<=num_ed;index++) { printf("cov:\t%d\n",cov[index].cov); } */ for(index=1; index<=num_ed; index++) { poolid_length[poolid_index]=extern_contig(cov[index].contig,poolid_index); if(poolid_length[poolid_index]!=0) poolid_index++; } int i; unsigned int *contig_cov = (unsigned int *) ckalloc ( sizeof(unsigned int ) * (poolid_index + 1)); unsigned int *contig_length = (unsigned int *) ckalloc ( sizeof(unsigned int ) * (poolid_index + 1)); for(i=1; i<poolid_index; i++) { contig_cov[i]=0; contig_length[i]=0; } for(index=1; index<=num_ed; index++) { contig_cov[pool[index]]+=edge_array[index].cvg *edge_array[index].length; contig_length[pool[index]]+=edge_array[index].length; if(!(EdSameAsTwin(index))) index++; } for(i=1; i<poolid_index; i++) { if(contig_length[i]>0) contig_cov[i] /= contig_length[i]; else printf("pool length == 0\n"); } for(index=1; index<=num_ed; index++) { edge_array[index].cvg = contig_cov[pool[index]]; } free(cov); free(pool); pool=NULL; free(poolid_length); free(contig_cov); free(contig_length); }