static void delete1contig(unsigned int edgeid) { edge_array[edgeid].cvg=0; edge_array[edgeid].deleted=1; edge_array[edgeid].length=0; ARC *arc=edge_array[edgeid].arcs; while(arc) { arc->multiplicity=0; arc->bal_arc->multiplicity=0; arc=arc->next; } if(EdSameAsTwin(edgeid)) return; edge_array[getTwinEdge(edgeid)].cvg=0; edge_array[getTwinEdge(edgeid)].deleted=1; edge_array[getTwinEdge(edgeid)].length=0; arc = edge_array[getTwinEdge(edgeid)].arcs; while(arc) { arc->multiplicity=0; arc->bal_arc->multiplicity=0; arc=arc->next; } }
static void moveArc2cp ( unsigned int leftEd, unsigned int rightEd, unsigned int source, unsigned int target ) { unsigned int bal_left = getTwinEdge ( leftEd ); unsigned int bal_right = getTwinEdge ( rightEd ); unsigned int bal_source = getTwinEdge ( source ); unsigned int bal_target = getTwinEdge ( target ); ARC * arc; ARC * newArc, *twinArc; //between left and source arc = getArcBetween ( leftEd, source ); arc->to_ed = 0; newArc = allocateArc ( target ); newArc->multiplicity = arc->multiplicity; newArc->prev = NULL; newArc->next = edge_array[leftEd].arcs; if ( edge_array[leftEd].arcs ) { edge_array[leftEd].arcs->prev = newArc; } edge_array[leftEd].arcs = newArc; arc = getArcBetween ( bal_source, bal_left ); arc->to_ed = 0; twinArc = allocateArc ( bal_left ); twinArc->multiplicity = arc->multiplicity; twinArc->prev = NULL; twinArc->next = NULL; edge_array[bal_target].arcs = twinArc; newArc->bal_arc = twinArc; twinArc->bal_arc = newArc; //between source and right arc = getArcBetween ( source, rightEd ); arc->to_ed = 0; newArc = allocateArc ( rightEd ); newArc->multiplicity = arc->multiplicity; newArc->prev = NULL; newArc->next = NULL; edge_array[target].arcs = newArc; arc = getArcBetween ( bal_right, bal_source ); arc->to_ed = 0; twinArc = allocateArc ( bal_target ); twinArc->multiplicity = arc->multiplicity; twinArc->prev = NULL; twinArc->next = edge_array[bal_right].arcs; if ( edge_array[bal_right].arcs ) { edge_array[bal_right].arcs->prev = twinArc; } edge_array[bal_right].arcs = twinArc; newArc->bal_arc = twinArc; twinArc->bal_arc = newArc; }
Idx VoronoiDiagram<CoordT>::addVertex( const PointT& pos, Idx edge1, Idx edge2, Idx edge3 ) { Idx vertex = vertex_pos_.add(pos); edges_[edge1].end_vertex = vertex; edges_[edge2].end_vertex = vertex; edges_[edge3].end_vertex = vertex; consecutiveEdges(edge1, getTwinEdge(edge3)); consecutiveEdges(edge2, getTwinEdge(edge1)); consecutiveEdges(edge3, getTwinEdge(edge2)); return vertex; }
void destroyEdge (unsigned int edgeid) { unsigned int bal_ed = getTwinEdge (edgeid); ARC *arc; if (bal_ed == edgeid) { edge_array[edgeid].length = 0; return; } arc = edge_array[edgeid].arcs; while (arc) { arc->bal_arc->to_ed = 0; arc = arc->next; } arc = edge_array[bal_ed].arcs; while (arc) { arc->bal_arc->to_ed = 0; arc = arc->next; } edge_array[edgeid].arcs = NULL; edge_array[bal_ed].arcs = NULL; edge_array[edgeid].length = 0; edge_array[bal_ed].length = 0; edge_array[edgeid].deleted = 1; edge_array[bal_ed].deleted = 1; //printf("Destroyed %d and %d\n",edgeid,bal_ed); }
static unsigned int deleteLightFlowArc(double min_arc_rate) { unsigned int index,twin,count=0; unsigned int total_in_weight,total_out_weight,coverage; ARC * arc,*next_arc,*twin_arc; unsigned int to_ed,twin_te; for(index=1; index<=num_ed; index++) { total_in_weight=0; total_out_weight=0; twin = getTwinEdge(index); coverage = (double)edge_array[index].cvg/10; arc = edge_array[index].arcs; while(arc) { total_out_weight += arc->multiplicity; arc=arc->next; } arc = edge_array[twin].arcs; while(arc) { total_in_weight += arc->multiplicity; arc=arc->next; } arc = edge_array[index].arcs; while(arc) { next_arc = arc->next; to_ed = arc ->to_ed; if(arc->multiplicity != 0 && arc->multiplicity <= (double)total_in_weight*min_arc_rate || arc->multiplicity <= (double)coverage*min_arc_rate) { twin_arc=arc->bal_arc; arc->multiplicity=0; twin_arc->multiplicity=0; count++; } arc=next_arc; } arc = edge_array[twin].arcs; while(arc) { next_arc = arc->next; to_ed = arc ->to_ed; if(arc->multiplicity != 0 && arc->multiplicity <= (double)total_out_weight*min_arc_rate || arc->multiplicity <= (double)coverage*min_arc_rate) { twin_arc=arc->bal_arc; arc->multiplicity=0; twin_arc->multiplicity=0; count++; } arc=next_arc; } if(twin != index) index++; } return count; }
/************************************************* Function: compactEdgeArray Description: Compacts the edge array by removing deleted edges. Input: None. Output: None. Return: None. *************************************************/ void compactEdgeArray () { unsigned int i; unsigned int validCounter = 0; unsigned int bal_ed; fprintf ( stderr, "Before compacting, %d edge(s) existed.\n", num_ed ); for ( i = 1; i <= num_ed; i++ ) { if ( edge_array[i].deleted ) { continue; } validCounter++; if ( i == validCounter ) { continue; } bal_ed = getTwinEdge ( i ); edgeMove ( i, validCounter ); if ( bal_ed != i ) { i++; validCounter++; } } num_ed = validCounter; fprintf ( stderr, "After compacting, %d edge(s) left.\n", num_ed ); }
void removeLowCovEdges (int lenCutoff, unsigned short covCutoff) { unsigned int bal_ed; unsigned int arcRight_n, arcLeft_n; ARC *arcLeft, *arcRight; unsigned int i; int counter = 0; for (i = 1; i <= num_ed; i++) { if (edge_array[i].deleted || edge_array[i].cvg == 0 || edge_array[i].cvg > covCutoff * 10 || edge_array[i].length >= lenCutoff || EdSameAsTwin (i) || edge_array[i].length == 0) { continue; } bal_ed = getTwinEdge (i); arcRight = arcCount (i, &arcRight_n); arcLeft = arcCount (bal_ed, &arcLeft_n); if (arcLeft_n < 1 || arcRight_n < 1) { continue; } destroyEdge (i); counter++; } printf ("Remove low coverage(%d): %d inner edges destroyed\n", covCutoff, counter); removeDeadArcs (); linearConcatenate (); compactEdgeArray (); }
void compactEdgeArray() { unsigned int i; unsigned int validCounter = 0; unsigned int bal_ed; printf ( "there're %d edges\n", num_ed ); for ( i = 1; i <= num_ed; i++ ) { if ( edge_array[i].deleted ) { continue; } validCounter++; if ( i == validCounter ) { continue; } bal_ed = getTwinEdge ( i ); edgeMove ( i, validCounter ); if ( bal_ed != i ) { i++; validCounter++; } } num_ed = validCounter; printf ( "after compacting %d edges left\n", num_ed ); }
static boolean interferingCheck ( unsigned int edgeno, int repTimes ) { int i, j, t; unsigned int bal_ed; involved[0] = edgeno; i = 1; for ( j = 0; j < repTimes; j++ ) { involved[i++] = lefts[j]; } for ( j = 0; j < repTimes; j++ ) { involved[i++] = rights[j]; } for ( j = 0; j < i - 1; j++ ) for ( t = j + 1; t < i; t++ ) if ( involved[j] == involved[t] ) { return 1; } for ( j = 0; j < i; j++ ) { bal_ed = getTwinEdge ( involved[j] ); for ( t = 0; t < i; t++ ) if ( bal_ed == involved[t] ) { return 1; } } return 0; }
void output_graph ( char * outfile ) { char name[256]; FILE * fp; unsigned int i, bal_i; sprintf ( name, "%s.edge.gvz", outfile ); fp = ckopen ( name, "w" ); fprintf ( fp, "digraph G{\n" ); fprintf ( fp, "\tsize=\"512,512\";\n" ); for ( i = num_ed; i > 0; i-- ) { if ( edge_array[i].deleted ) { continue; } /* arcCount(i,&arcNum); if(arcNum<1) continue; */ bal_i = getTwinEdge ( i ); /* arcCount(bal_i,&arcNum); if(arcNum<1) continue; */ fprintf ( fp, "\tV%d -> V%d[label =\"%d(%d)\"];\n", edge_array[i].from_vt, edge_array[i].to_vt, i, edge_array[i].length ); } fprintf ( fp, "}\n" ); fclose ( fp ); }
//a path from e1 to e2 is merged int to e1(indicate=0) or e2(indicate=1), update graph topology void linearUpdateConnection (unsigned int e1, unsigned int e2, int indicate) { unsigned int bal_ed; ARC *parc; if (!indicate) { edge_array[e1].to_vt = edge_array[e2].to_vt; bal_ed = getTwinEdge (e1); parc = edge_array[e2].arcs; while (parc) { parc->bal_arc->to_ed = bal_ed; parc = parc->next; } edge_array[e1].arcs = edge_array[e2].arcs; edge_array[e2].arcs = NULL; if (edge_array[e1].length || edge_array[e2].length) edge_array[e1].cvg = (edge_array[e1].cvg * edge_array[e1].length + edge_array[e2].cvg * edge_array[e2].length) / (edge_array[e1].length + edge_array[e2].length); edge_array[e2].deleted = 1; } else { //all the arcs pointing to e1 switch to e2 parc = edge_array[getTwinEdge (e1)].arcs; while (parc) { parc->bal_arc->to_ed = e2; parc = parc->next; } edge_array[e1].arcs = NULL; edge_array[e2].from_vt = edge_array[e1].from_vt; if (edge_array[e1].length || edge_array[e2].length) edge_array[e2].cvg = (edge_array[e1].cvg * edge_array[e1].length + edge_array[e2].cvg * edge_array[e2].length) / (edge_array[e1].length + edge_array[e2].length); edge_array[e1].deleted = 1; } }
/************************************************* Function: add1marker2edge Description: Records the id of read which crosses the edge. Input: 1. edgeno: the edge index 2. readid: the read id Output: None. Return: None. *************************************************/ static void add1marker2edge ( unsigned int edgeno, long long readid ) { if ( edge_array[edgeno].multi == 255 ) { return; } unsigned int bal_ed = getTwinEdge ( edgeno ); unsigned char counter = edge_array[edgeno].multi++; edge_array[edgeno].markers[counter] = readid; counter = edge_array[bal_ed].multi++; edge_array[bal_ed].markers[counter] = -readid; }
static unsigned int cp1edge ( unsigned int source, unsigned int target ) { int length = edge_array[source].length; char * tightSeq; int index; unsigned int bal_source = getTwinEdge ( source ); unsigned int bal_target; if ( bal_source > source ) { bal_target = target + 1; } else { bal_target = target; target = target + 1; } tightSeq = ( char * ) ckalloc ( ( length / 4 + 1 ) * sizeof ( char ) ); for ( index = 0; index < length / 4 + 1; index++ ) { tightSeq[index] = edge_array[source].seq[index]; } edge_array[target].length = length; edge_array[target].cvg = edge_array[source].cvg; edge_array[target].to_vt = edge_array[source].to_vt; edge_array[target].from_vt = edge_array[source].from_vt; edge_array[target].seq = tightSeq; edge_array[target].bal_edge = edge_array[source].bal_edge; edge_array[target].rv = NULL; edge_array[target].arcs = NULL; edge_array[target].markers = NULL; edge_array[target].flag = 0; edge_array[target].deleted = 0; tightSeq = ( char * ) ckalloc ( ( length / 4 + 1 ) * sizeof ( char ) ); for ( index = 0; index < length / 4 + 1; index++ ) { tightSeq[index] = edge_array[bal_source].seq[index]; } edge_array[bal_target].length = length; edge_array[bal_target].cvg = edge_array[bal_source].cvg; edge_array[bal_target].to_vt = edge_array[bal_source].to_vt; edge_array[bal_target].from_vt = edge_array[bal_source].from_vt; edge_array[bal_target].seq = tightSeq; edge_array[bal_target].bal_edge = edge_array[bal_source].bal_edge; edge_array[bal_target].rv = NULL; edge_array[bal_target].arcs = NULL; edge_array[bal_target].markers = NULL; edge_array[bal_target].flag = 0; edge_array[bal_target].deleted = 0; return target; }
//move edge from source to target void edgeMove (unsigned int source, unsigned int target) { unsigned int bal_source, bal_target; ARC *arc; copyEdge (source, target); bal_source = getTwinEdge (source); //bal_edge if (bal_source != source) { bal_target = target + 1; copyEdge (bal_source, bal_target); edge_array[target].bal_edge = 2; edge_array[bal_target].bal_edge = 0; } else { edge_array[target].bal_edge = 1; bal_target = target; } //take care of the arcs arc = edge_array[target].arcs; while (arc) { arc->bal_arc->to_ed = bal_target; arc = arc->next; } if (bal_target == target) { return; } arc = edge_array[bal_target].arcs; while (arc) { arc->bal_arc->to_ed = target; arc = arc->next; } }
void delowHighArc(int multi) { unsigned int i, twin,to_edge,count = 0; ARC *arc, *arc_temp; unsigned int in_weight,out_weight,curr_weight; for (i = 1; i <= num_ed; i++) { in_weight=0; curr_weight=0; //获取i的in_flow权重 twin=getTwinEdge(i); arc=edge_array[twin].arcs; while(arc) { in_weight += arc->multiplicity; arc=arc->next; } arc = edge_array[i].arcs; while (arc) { curr_weight = arc->multiplicity; to_edge = arc->to_ed; arc_temp = edge_array[to_edge].arcs; out_weight=0; while(arc_temp) { out_weight += arc_temp->multiplicity; arc_temp=arc_temp->next; } if( in_weight != 0 && curr_weight !=0 && curr_weight > in_weight*multi && curr_weight > out_weight*multi) { count++; arc->multiplicity= in_weight > out_weight ? in_weight : out_weight; } arc=arc->next; } } // printf("delow arc : %d\n",count); }
void removeWeakEdges (int lenCutoff, unsigned int multiCutoff) { unsigned int bal_ed; unsigned int arcRight_n, arcLeft_n; ARC *arcLeft, *arcRight; unsigned int i; int counter = 0; for (i = 1; i <= num_ed; i++) { if (edge_array[i].deleted || edge_array[i].length == 0 || edge_array[i].length > lenCutoff || EdSameAsTwin (i)) { continue; } bal_ed = getTwinEdge (i); arcRight = arcCount (i, &arcRight_n); if (arcRight_n > 1 || !arcRight || arcRight->multiplicity > multiCutoff) { continue; } arcLeft = arcCount (bal_ed, &arcLeft_n); if (arcLeft_n > 1 || !arcLeft || arcLeft->multiplicity > multiCutoff) { continue; } destroyEdge (i); counter++; } printf ("%d weak inner edges destroyed\n", counter); removeDeadArcs (); /* linearConcatenate(); compactEdgeArray(); */ }
static int extern_contig(unsigned int edgeid,int pool_index) { if(pool[edgeid]!=0) return 0; pool[edgeid]=pool_index; pool[ getTwinEdge(edgeid)]=pool_index; int length=0; length += edge_array[edgeid].length; ARC *arc; unsigned int best_id; int max_arc; unsigned int curr_edge= edgeid; while(curr_edge) { max_arc=0; arc = edge_array[edgeid].arcs; while(arc) { if(pool[arc->to_ed] ==0) { if(arc->multiplicity > max_arc) { max_arc=arc->multiplicity; best_id=arc->to_ed; } } arc=arc->next; } if(max_arc>0) { pool[best_id]=pool_index; pool[getTwinEdge(best_id)]=pool_index; length += edge_array[best_id].length; curr_edge=best_id; } else curr_edge=0; } curr_edge= getTwinEdge(edgeid); while(curr_edge) { max_arc=0; arc = edge_array[edgeid].arcs; while(arc) { if(pool[arc->to_ed] ==0) { if(arc->multiplicity > max_arc) { max_arc=arc->multiplicity; best_id=arc->to_ed; } } arc=arc->next; } if(max_arc>0) { pool[best_id]=pool_index; pool[getTwinEdge(best_id)]=pool_index; length += edge_array[best_id].length; curr_edge=best_id; } else curr_edge=0; } return length; }
void output_contig ( EDGE * ed_array, unsigned int ed_num, char * outfile, int cut_len ) { char temp[256]; FILE * fp, *fp_contig; int flag, count, len_c; int signI; unsigned int i; long long sum = 0, N90, N50; unsigned int * length_array; boolean tip; sprintf ( temp, "%s.contig", outfile ); fp = ckopen ( temp, "w" ); qsort ( &ed_array[1], ed_num, sizeof ( EDGE ), cmp_edge ); length_array = ( unsigned int * ) ckalloc ( ed_num * sizeof ( unsigned int ) ); kmerSeq = ( char * ) ckalloc ( overlaplen * sizeof ( char ) ); //first scan for number counting count = len_c = 0; for ( i = 1; i <= ed_num; i++ ) { if ( ( ed_array[i].length + overlaplen ) >= len_bar ) { length_array[len_c++] = ed_array[i].length + overlaplen; } if ( ed_array[i].length < 1 || ed_array[i].deleted ) { continue; } count++; if ( EdSmallerThanTwin ( i ) ) { i++; } } sum = 0; for ( signI = len_c - 1; signI >= 0; signI-- ) { sum += length_array[signI]; } if ( len_c > 0 ) { printf ( "%d ctgs longer than %d, sum up %lldbp, with average length %lld\n", len_c, len_bar, sum, sum / len_c ); } qsort ( length_array, len_c, sizeof ( length_array[0] ), cmp_int ); printf ( "the longest is %dbp, ", length_array[len_c - 1] ); N50 = sum * 0.5; N90 = sum * 0.9; sum = flag = 0; for ( signI = len_c - 1; signI >= 0; signI-- ) { sum += length_array[signI]; if ( !flag && sum >= N50 ) { printf ( "contig N50 is %d bp,", length_array[signI] ); flag = 1; } if ( sum >= N90 ) { printf ( "contig N90 is %d bp\n", length_array[signI] ); break; } } //fprintf(fp,"Number %d\n",count); for ( i = 1; i <= ed_num; i++ ) { //if(ed_array[i].multi!=1||ed_array[i].length<1||(ed_array[i].length+overlaplen)<cut_len) if ( ed_array[i].deleted || ed_array[i].length < 1 ) { continue; } if ( ed_array[i].arcs && ed_array[getTwinEdge ( i )].arcs ) { tip = 0; } else { tip = 1; } output_1contig ( i, & ( ed_array[i] ), fp, tip ); if ( EdSmallerThanTwin ( i ) ) { i++; } } fclose ( fp ); free ( ( void * ) kmerSeq ); free ( ( void * ) length_array ); printf ( "%d contigs longer than %d output\n", count, cut_len ); sprintf ( temp, "%s.ContigIndex", outfile ); fp_contig = ckopen ( temp, "w" ); fprintf ( fp_contig, "Edge_num %d %d\n", ed_num, count ); fprintf ( fp_contig, "index\tlength\treverseComplement\n" ); for ( i = 1; i <= num_ed; i++ ) { fprintf ( fp_contig, "%d\t%d\t", i, edge_array[i].length + overlaplen ); if ( EdSmallerThanTwin ( i ) ) { fprintf ( fp_contig, "1\n" ); i++; } else if ( EdLargerThanTwin ( i ) ) { fprintf ( fp_contig, "-1\n" ); } else { fprintf ( fp_contig, "0\n" ); } } fclose ( fp_contig ); }
/************************************************* Function: loadPath Description: 1. Loads the path info. 2. Records the ids of reads crossing edges. Input: 1. graphfile: the input prefix Output: None. Return: None. *************************************************/ boolean loadPath ( char *graphfile ) { FILE *fp; char name[256], line[1024]; unsigned int i, bal_ed, num1, edgeno, num2; long long markCounter = 0, readid = 0; char *seg; sprintf ( name, "%s.markOnEdge", graphfile ); fp = fopen ( name, "r" ); if ( !fp ) { return 0; } for ( i = 1; i <= num_ed; i++ ) { edge_array[i].multi = 0; } for ( i = 1; i <= num_ed; i++ ) { fscanf ( fp, "%d", &num1 ); if ( EdSmallerThanTwin ( i ) ) { fscanf ( fp, "%d", &num2 ); bal_ed = getTwinEdge ( i ); if ( num1 + num2 >= 255 ) { edge_array[i].multi = 255; edge_array[bal_ed].multi = 255; } else { edge_array[i].multi = num1 + num2; edge_array[bal_ed].multi = num1 + num2; markCounter += 2 * ( num1 + num2 ); } i++; } else { if ( 2 * num1 >= 255 ) { edge_array[i].multi = 255; } else { edge_array[i].multi = 2 * num1; markCounter += 2 * num1; } } } fclose ( fp ); fprintf ( stderr, "%lld markers overall.\n", markCounter ); markersArray = ( long long * ) ckalloc ( markCounter * sizeof ( long long ) ); markCounter = 0; for ( i = 1; i <= num_ed; i++ ) { if ( edge_array[i].multi == 255 ) { continue; } edge_array[i].markers = markersArray + markCounter; markCounter += edge_array[i].multi; edge_array[i].multi = 0; } sprintf ( name, "%s.path", graphfile ); fp = fopen ( name, "r" ); if ( !fp ) { return 0; } while ( fgets ( line, sizeof ( line ), fp ) != NULL ) { //printf("%s",line); readid++; seg = strtok ( line, " " ); while ( seg ) { edgeno = atoi ( seg ); //printf("%s, %d\n",seg,edgeno); add1marker2edge ( edgeno, readid ); seg = strtok ( NULL, " " ); } } fclose ( fp ); markCounter = 0; for ( i = 1; i <= num_ed; i++ ) { if ( edge_array[i].multi == 255 ) { continue; } markCounter += edge_array[i].multi; } fprintf ( stderr, "%lld marks loaded.\n", markCounter ); return 1; }
/* - - > - < - - */ unsigned int solvable ( unsigned int edgeno ) { if ( EdSameAsTwin ( edgeno ) || edge_array[edgeno].multi == 255 ) { return 0; } unsigned int bal_ed = getTwinEdge ( edgeno ); unsigned int arcRight_n, arcLeft_n; unsigned int counter; unsigned int i, j; unsigned int branch, bal_branch; ARC * parcL, *parcR; parcL = arcCounts ( bal_ed, &arcLeft_n ); if ( arcLeft_n < 2 ) { return 0; } parcR = arcCounts ( edgeno, &arcRight_n ); if ( arcLeft_n != arcRight_n ) { return 0; } // check each right branch only has one upsteam connection /* if(edgeno==2551){ for(i=0;i<arcLeft_n;i++) printf("%d,",lefts[i]); printf("__left to %d\n",edgeno); for(j=0;j<arcRight_n;j++) printf("%d,",rights[j]); printf("__right to %d\n",edgeno); } */ arcRight_n = 0; while ( parcR ) { if ( parcR->to_ed == 0 ) { parcR = parcR->next; continue; } branch = parcR->to_ed; if ( EdSameAsTwin ( branch ) || edge_array[branch].multi == 255 ) { return 0; } rights[arcRight_n++] = branch; bal_branch = getTwinEdge ( branch ); arcCounts ( bal_branch, &counter ); if ( counter != 1 ) { return 0; } parcR = parcR->next; } // check if each left branch only has one downsteam connection arcLeft_n = 0; while ( parcL ) { if ( parcL->to_ed == 0 ) { parcL = parcL->next; continue; } branch = parcL->to_ed; if ( EdSameAsTwin ( branch ) || edge_array[branch].multi == 255 ) { return 0; } bal_branch = getTwinEdge ( branch ); lefts[arcLeft_n++] = bal_branch; arcCounts ( bal_branch, &counter ); if ( counter != 1 ) { return 0; } parcL = parcL->next; } //check if reads indicate one to one connection between upsteam and downstream edges for ( i = 0; i < arcLeft_n; i++ ) { counter = 0; for ( j = 0; j < arcRight_n; j++ ) { gothrough[i][j] = cntByReads ( lefts[i], edgeno, rights[j] ) == 0 ? 0 : 1; counter += gothrough[i][j]; if ( counter > 1 ) { return 0; } } if ( counter != 1 ) { return 0; } } for ( j = 0; j < arcRight_n; j++ ) { counter = 0; for ( i = 0; i < arcLeft_n; i++ ) { counter += gothrough[i][j]; } if ( counter != 1 ) { return 0; } } return arcLeft_n; }
boolean isUnreliableTip_strict (unsigned int edgeid, int cutLen) { unsigned int arcRight_n, arcLeft_n; unsigned int bal_ed; unsigned int currentEd = edgeid; int length = 0; unsigned int mult = 0; ARC *arc, *activeArc = NULL, *tempArc; if (edgeid == 0) { return 0; } bal_ed = getTwinEdge (edgeid); if (bal_ed == edgeid) { return 0; } arcCount (bal_ed, &arcLeft_n); if (arcLeft_n > 0) { return 0; } while (currentEd) { arcCount (bal_ed, &arcLeft_n); tempArc = arcCount (currentEd, &arcRight_n); if (arcLeft_n > 1 || arcRight_n > 1) { if (arcLeft_n == 0 || length == 0) { return 0; } else { break; } } length += edge_array[currentEd].length; if (length >= cutLen) { return 0; } if (tempArc) { activeArc = tempArc; currentEd = activeArc->to_ed; bal_ed = getTwinEdge (currentEd); } else { currentEd = 0; } } if (currentEd == 0) { caseA++; return 1; } if (!activeArc) { printf ("no activeArc while checking edge %d\n", edgeid); } if (activeArc->multiplicity == 1) { caseB++; return 1; } for (arc = edge_array[bal_ed].arcs; arc != NULL; arc = arc->next) if (arc->multiplicity > mult) { mult = arc->multiplicity; } if (mult > activeArc->multiplicity) { caseC++; } return mult > activeArc->multiplicity; }
boolean loadPathBin ( char *graphfile ) { FILE *fp; char name[256]; unsigned int i, bal_ed, num1, num2; long long markCounter = 0, readid = 0; unsigned char seg, ch; unsigned int *freadBuf; sprintf ( name, "%s.markOnEdge", graphfile ); fp = fopen ( name, "r" ); if ( !fp ) { return 0; } for ( i = 1; i <= num_ed; i++ ) { edge_array[i].multi = 0; edge_array[i].markers = NULL; } for ( i = 1; i <= num_ed; i++ ) { fscanf ( fp, "%d", &num1 ); if ( EdSmallerThanTwin ( i ) ) { fscanf ( fp, "%d", &num2 ); bal_ed = getTwinEdge ( i ); if ( num1 + num2 >= 255 ) { edge_array[i].multi = 255; edge_array[bal_ed].multi = 255; } else { edge_array[i].multi = num1 + num2; edge_array[bal_ed].multi = num1 + num2; markCounter += 2 * ( num1 + num2 ); } i++; } else { if ( 2 * num1 >= 255 ) { edge_array[i].multi = 255; } else { edge_array[i].multi = 2 * num1; markCounter += 2 * num1; } } } fclose ( fp ); fprintf ( stderr, "%lld markers overall.\n", markCounter ); markersArray = ( long long * ) ckalloc ( markCounter * sizeof ( long long ) ); markCounter = 0; for ( i = 1; i <= num_ed; i++ ) { if ( edge_array[i].multi == 255 ) { continue; } edge_array[i].markers = markersArray + markCounter; markCounter += edge_array[i].multi; edge_array[i].multi = 0; } sprintf ( name, "%s.path", graphfile ); fp = fopen ( name, "rb" ); if ( !fp ) { return 0; } freadBuf = ( unsigned int * ) ckalloc ( ( maxReadLen - overlaplen + 1 ) * sizeof ( unsigned int ) ); while ( fread ( &ch, sizeof ( char ), 1, fp ) == 1 ) { //printf("%s",line); if ( fread ( freadBuf, sizeof ( unsigned int ), ch, fp ) != ch ) { break; } readid++; for ( seg = 0; seg < ch; seg++ ) { add1marker2edge ( freadBuf[seg], readid ); } } fclose ( fp ); markCounter = 0; for ( i = 1; i <= num_ed; i++ ) { if ( edge_array[i].multi == 255 ) { continue; } markCounter += edge_array[i].multi; } for ( i = 0; i <= num_ed; i++ ) { if ( edge_array[i].multi >= 2 && edge_array[i].multi != 255 ) { qsort ( edge_array[i].markers, ( int ) edge_array[i].multi, sizeof ( long long ), comp ); } } fprintf ( stderr, "%lld markers loaded.\n", markCounter ); free ( ( void * ) freadBuf ); return 1; }
//concatenate two edges if they are linearly linked void linearConcatenate () { unsigned int i; int conc_c = 1; int counter; unsigned int from_ed, to_ed, bal_ed; ARC *parc, *parc2; unsigned int bal_fe; //debugging(30514); while (conc_c) { conc_c = 0; counter = 0; for (i = 1; i <= num_ed; i++) //num_ed { if (edge_array[i].deleted || EdSameAsTwin (i)) { continue; } if (edge_array[i].length > 0) { counter++; } parc = edge_array[i].arcs; if (!parc || parc->next) { continue; } to_ed = parc->to_ed; bal_ed = getTwinEdge (to_ed); parc2 = edge_array[bal_ed].arcs; if (bal_ed == to_ed || !parc2 || parc2->next) { continue; } from_ed = i; if (from_ed == to_ed || from_ed == bal_ed) { continue; } //linear connection found conc_c++; linearUpdateConnection (from_ed, to_ed, 0); allpathUpdateEdge (from_ed, to_ed, 0); bal_fe = getTwinEdge (from_ed); linearUpdateConnection (bal_ed, bal_fe, 1); allpathUpdateEdge (bal_ed, bal_fe, 1); /* if(from_ed==6589||to_ed==6589) printf("%d <- %d (%d)\n",from_ed,to_ed,i); if(bal_fe==6589||bal_ed==6589) printf("%d <- %d (%d)\n",bal_fe,bal_ed,i); */ } printf ("a linear concatenation lap, %d concatenated\n", conc_c); } printf ("%d edges in graph\n", counter); }
int deleteLightContig() { double prev_cov,next_cov,max,min,curr_cov; unsigned int index; int change=0; ARC * arc,*arc_temp; for(index=1; index<=num_ed; index++) { if(EdSameAsTwin(index)) continue; computeNextCov(index,&next_cov); computeNextCov(getTwinEdge(index),&prev_cov); if(next_cov ==0 || prev_cov ==0) continue; if(next_cov > prev_cov) { max=next_cov; min=prev_cov; } else { max=prev_cov; min=next_cov; } curr_cov = (double)edge_array[index].cvg; printf("contig_cov:\t%0.1f\t%0.1f\t%0.1f\n",curr_cov,max,min); if(min / max <0.1) { if(curr_cov /min < 0.5) { delete1contig(index); } } else { if(curr_cov / max <0.05) { delete1contig(index); } } index++; } /* for(index=1;index<=num_ed;index++) { arc=edge_array[index].arcs; while(arc) { arc_temp=arc; arc=arc->next; if(arc_temp->multiplicity == 0) { if(arc_temp->prev) { arc_temp->prev->next=arc_temp->next; } else { edge_array[index].arcs=arc_temp->next; } if(arc_temp->next) { arc_temp->next->prev = arc_temp->prev; } dismissArc (arc_temp); change++; } } }*/ change = removeArc(); return change>0?1:0;; }
Idx VoronoiDiagram<CoordT>::getStartVertex(Idx edge) const { return edges_[getTwinEdge(edge)].end_vertex; }
void output_contig (EDGE * ed_array, unsigned int ed_num, char *outfile, int cut_len) { char temp[256]; FILE * fp, *fp_contig; int flag, count, len_c; int signI; unsigned int i, j, diff_len=0; long long sum = 0, N90, N50; unsigned int *length_array; boolean tip; sprintf (temp, "%s.contig", outfile); fp = ckopen (temp, "w"); index_array = (unsigned int *)ckalloc((ed_num+1)*sizeof(unsigned int)); unsigned int * all_length_arr = (unsigned int*) ckalloc((ed_num+1)*sizeof(unsigned int)); flag_array = (unsigned int*)ckalloc((ed_num+1)*sizeof(unsigned int)); for (i=1; i<=ed_num; ++i) { index_array[i] = ed_array[i].length; all_length_arr[i] = ed_array[i].length; } qsort(&all_length_arr[1], ed_num, sizeof(all_length_arr[0]), cmp_int); for (i=1; i<=ed_num; ++i) { for (j=i+1; j<=ed_num; ++j) { if (all_length_arr[i] != all_length_arr[j]) break; } all_length_arr[++diff_len] = all_length_arr[i]; flag_array[diff_len] = i; i = j-1; } for (i=1; i<=ed_num; ++i) { index_array[i] = uniqueLenSearch(all_length_arr, flag_array, diff_len, index_array[i]); } for (i=1; i<=ed_num; ++i) { flag_array[index_array[i]] = i; } free((void*)all_length_arr); length_array = (unsigned int *) ckalloc (ed_num * sizeof (unsigned int)); kmerSeq = (char *) ckalloc (overlaplen * sizeof (char)); count = len_c = 0; for (i = 1; i <= ed_num; i++) { if ((ed_array[i].length + overlaplen) >= len_bar) { length_array[len_c++] = ed_array[i].length + overlaplen; } if (ed_array[i].length < 1 || ed_array[i].deleted) { continue; } count++; if (EdSmallerThanTwin (i)) { i++; } } sum = 0; for (signI = len_c - 1; signI >= 0; signI--) { sum += length_array[signI]; } qsort ( length_array, len_c, sizeof ( length_array[0] ), cmp_int ); if ( len_c > 0 ) { printf ( "%d ctgs longer than %d, sum up %lldbp, with average length %lld\n", len_c, len_bar, sum, sum / len_c ); printf ( "the longest is %dbp, ", length_array[len_c - 1] ); } N50 = sum * 0.5; N90 = sum * 0.9; sum = flag = 0; for (signI = len_c - 1; signI >= 0; signI--) { sum += length_array[signI]; if (!flag && sum >= N50) { printf ("contig N50 is %d bp,", length_array[signI]); flag = 1; } if (sum >= N90) { printf ("contig N90 is %d bp\n", length_array[signI]); break; } } for (i = 1; i <= ed_num; i++) { j = flag_array[i]; if (ed_array[j].deleted || ed_array[j].length < 1) { continue; } if (ed_array[j].arcs && ed_array[getTwinEdge (j)].arcs) { tip = 0; } else { tip = 1; } output_1contig (i, &(ed_array[j]), fp, tip); if (EdSmallerThanTwin (j)) { i++; } } fclose (fp); free ((void *) kmerSeq); free ((void *) length_array); printf ("%d contigs longer than %d output\n", count, cut_len); sprintf (temp, "%s.ContigIndex", outfile); fp_contig = ckopen (temp, "w"); fprintf (fp_contig, "Edge_num %d %d\n", ed_num, count); fprintf (fp_contig, "index\tlength\treverseComplement\n"); for (i = 1; i <= num_ed; i++) { j = flag_array[i]; fprintf (fp_contig, "%d\t%d\t", i, edge_array[j].length + overlaplen); if (EdSmallerThanTwin (j)) { fprintf (fp_contig, "1\n"); i++; } else if (EdLargerThanTwin (j)) { fprintf (fp_contig, "-1\n"); } else { fprintf (fp_contig, "0\n"); } } fclose (fp_contig); }