void output_updated_edges ( char * outfile ) { FILE * fp; char name[256]; unsigned int i, validCounter = 0; EDGE * edge; sprintf ( name, "%s.updated.edge", outfile ); fp = ckopen ( name, "w" ); for ( i = 1; i <= num_ed; i++ ) { validCounter++; } fprintf ( fp, "EDGEs %d\n", validCounter ); validCounter = 0; for ( i = 1; i <= num_ed; i++ ) { edge = &edge_array[i]; fprintf ( fp, ">length %d,", edge->length ); print_kmer ( fp, vt_array[edge->from_vt].kmer, ',' ); print_kmer ( fp, vt_array[edge->to_vt].kmer, ',' ); if ( EdSmallerThanTwin ( i ) ) { fprintf ( fp, "1," ); } else if ( EdLargerThanTwin ( i ) ) { fprintf ( fp, "-1," ); } else { fprintf ( fp, "0," ); } fprintf ( fp, "%d\n", edge->cvg ); } fclose ( fp ); }
void solveReps() { unsigned int i; unsigned int repTime; int counter = 0; boolean flag; //debugging(30514); extraEdgeNum = num_ed + 1; for ( i = 1; i <= num_ed; i++ ) { repTime = solvable ( i ); if ( repTime == 0 ) { continue; } flag = interferingCheck ( i, repTime ); if ( flag ) { continue; } split1edge ( i, repTime ); counter ++; //+= 2*(repTime-1); if ( EdSmallerThanTwin ( i ) ) { i++; } } printf ( "%d repeats solvable, %d more edges\n", counter, extraEdgeNum - 1 - num_ed ); num_ed = extraEdgeNum - 1; removeDeadArcs(); if ( markersArray ) { free ( ( void * ) markersArray ); markersArray = NULL; } }
void output_contig ( EDGE * ed_array, unsigned int ed_num, char * outfile, int cut_len ) { char temp[256]; FILE * fp, *fp_contig; int flag, count, len_c; int signI; unsigned int i; long long sum = 0, N90, N50; unsigned int * length_array; boolean tip; sprintf ( temp, "%s.contig", outfile ); fp = ckopen ( temp, "w" ); qsort ( &ed_array[1], ed_num, sizeof ( EDGE ), cmp_edge ); length_array = ( unsigned int * ) ckalloc ( ed_num * sizeof ( unsigned int ) ); kmerSeq = ( char * ) ckalloc ( overlaplen * sizeof ( char ) ); //first scan for number counting count = len_c = 0; for ( i = 1; i <= ed_num; i++ ) { if ( ( ed_array[i].length + overlaplen ) >= len_bar ) { length_array[len_c++] = ed_array[i].length + overlaplen; } if ( ed_array[i].length < 1 || ed_array[i].deleted ) { continue; } count++; if ( EdSmallerThanTwin ( i ) ) { i++; } } sum = 0; for ( signI = len_c - 1; signI >= 0; signI-- ) { sum += length_array[signI]; } if ( len_c > 0 ) { printf ( "%d ctgs longer than %d, sum up %lldbp, with average length %lld\n", len_c, len_bar, sum, sum / len_c ); } qsort ( length_array, len_c, sizeof ( length_array[0] ), cmp_int ); printf ( "the longest is %dbp, ", length_array[len_c - 1] ); N50 = sum * 0.5; N90 = sum * 0.9; sum = flag = 0; for ( signI = len_c - 1; signI >= 0; signI-- ) { sum += length_array[signI]; if ( !flag && sum >= N50 ) { printf ( "contig N50 is %d bp,", length_array[signI] ); flag = 1; } if ( sum >= N90 ) { printf ( "contig N90 is %d bp\n", length_array[signI] ); break; } } //fprintf(fp,"Number %d\n",count); for ( i = 1; i <= ed_num; i++ ) { //if(ed_array[i].multi!=1||ed_array[i].length<1||(ed_array[i].length+overlaplen)<cut_len) if ( ed_array[i].deleted || ed_array[i].length < 1 ) { continue; } if ( ed_array[i].arcs && ed_array[getTwinEdge ( i )].arcs ) { tip = 0; } else { tip = 1; } output_1contig ( i, & ( ed_array[i] ), fp, tip ); if ( EdSmallerThanTwin ( i ) ) { i++; } } fclose ( fp ); free ( ( void * ) kmerSeq ); free ( ( void * ) length_array ); printf ( "%d contigs longer than %d output\n", count, cut_len ); sprintf ( temp, "%s.ContigIndex", outfile ); fp_contig = ckopen ( temp, "w" ); fprintf ( fp_contig, "Edge_num %d %d\n", ed_num, count ); fprintf ( fp_contig, "index\tlength\treverseComplement\n" ); for ( i = 1; i <= num_ed; i++ ) { fprintf ( fp_contig, "%d\t%d\t", i, edge_array[i].length + overlaplen ); if ( EdSmallerThanTwin ( i ) ) { fprintf ( fp_contig, "1\n" ); i++; } else if ( EdLargerThanTwin ( i ) ) { fprintf ( fp_contig, "-1\n" ); } else { fprintf ( fp_contig, "0\n" ); } } fclose ( fp_contig ); }
void swapedge() { unsigned int i; ARC * arc, *bal_arc, *temp_arc; int count_swap = 0, count_equal = 0; for ( i = 1; i <= num_ed; ++i ) { if ( edge_array[i].deleted || EdSameAsTwin ( i ) ) { continue; } if ( EdSmallerThanTwin ( i ) ) { if ( KmerLarger ( vt_array[edge_array[i].from_vt].kmer, vt_array[edge_array[i + 1].from_vt].kmer ) ) { count_swap++; copyEdge ( i, num_ed + 1 + 1 ); copyEdge ( i + 1, num_ed + 1 ); copyEdge ( num_ed + 1, i ); copyEdge ( num_ed + 1 + 1, i + 1 ); edge_array[i].bal_edge = 2; edge_array[i + 1].bal_edge = 0; //take care of the arcs arc = edge_array[i].arcs; while ( arc ) { arc->bal_arc->to_ed = i + 1; arc = arc->next; } arc = edge_array[i + 1].arcs; while ( arc ) { arc->bal_arc->to_ed = i; arc = arc->next; } } else if ( KmerEqual ( vt_array[edge_array[i].from_vt].kmer, vt_array[edge_array[i + 1].from_vt].kmer ) ) { int temp = EdgeEqual ( i, i + 1 ); if ( temp == 0 ) { count_equal++; edge_array[i].bal_edge = 1; delete1Edge ( i + 1 ); //take care of the arcs arc = edge_array[i].arcs; while ( arc ) { arc->bal_arc->to_ed = i; arc = arc->next; } bal_arc = edge_array[i + 1].arcs; edge_array[i + 1].arcs = NULL; while ( bal_arc ) { temp_arc = bal_arc; bal_arc = bal_arc->next; if ( edge_array[i].arcs ) { edge_array[i].arcs->prev = temp_arc; } temp_arc->next = edge_array[i].arcs; edge_array[i].arcs = temp_arc; } } else if ( temp > 0 ) { count_swap++; copyEdge ( i, num_ed + 1 + 1 ); copyEdge ( i + 1, num_ed + 1 ); copyEdge ( num_ed + 1, i ); copyEdge ( num_ed + 1 + 1, i + 1 ); edge_array[i].bal_edge = 2; edge_array[i + 1].bal_edge = 0; //take care of the arcs arc = edge_array[i].arcs; while ( arc ) { arc->bal_arc->to_ed = i + 1; arc = arc->next; } arc = edge_array[i + 1].arcs; while ( arc ) { arc->bal_arc->to_ed = i; arc = arc->next; } } } ++i; } else { delete1Edge ( i ); printf( "Warning : Front edge %d is larger than %d.\n", i, i + 1 ); } } printf( "%d none-palindrome edge(s) swapped, %d palindrome edge(s) processed.\n", count_swap, count_equal ); };
/************************************************* Function: loadPath Description: 1. Loads the path info. 2. Records the ids of reads crossing edges. Input: 1. graphfile: the input prefix Output: None. Return: None. *************************************************/ boolean loadPath ( char *graphfile ) { FILE *fp; char name[256], line[1024]; unsigned int i, bal_ed, num1, edgeno, num2; long long markCounter = 0, readid = 0; char *seg; sprintf ( name, "%s.markOnEdge", graphfile ); fp = fopen ( name, "r" ); if ( !fp ) { return 0; } for ( i = 1; i <= num_ed; i++ ) { edge_array[i].multi = 0; } for ( i = 1; i <= num_ed; i++ ) { fscanf ( fp, "%d", &num1 ); if ( EdSmallerThanTwin ( i ) ) { fscanf ( fp, "%d", &num2 ); bal_ed = getTwinEdge ( i ); if ( num1 + num2 >= 255 ) { edge_array[i].multi = 255; edge_array[bal_ed].multi = 255; } else { edge_array[i].multi = num1 + num2; edge_array[bal_ed].multi = num1 + num2; markCounter += 2 * ( num1 + num2 ); } i++; } else { if ( 2 * num1 >= 255 ) { edge_array[i].multi = 255; } else { edge_array[i].multi = 2 * num1; markCounter += 2 * num1; } } } fclose ( fp ); fprintf ( stderr, "%lld markers overall.\n", markCounter ); markersArray = ( long long * ) ckalloc ( markCounter * sizeof ( long long ) ); markCounter = 0; for ( i = 1; i <= num_ed; i++ ) { if ( edge_array[i].multi == 255 ) { continue; } edge_array[i].markers = markersArray + markCounter; markCounter += edge_array[i].multi; edge_array[i].multi = 0; } sprintf ( name, "%s.path", graphfile ); fp = fopen ( name, "r" ); if ( !fp ) { return 0; } while ( fgets ( line, sizeof ( line ), fp ) != NULL ) { //printf("%s",line); readid++; seg = strtok ( line, " " ); while ( seg ) { edgeno = atoi ( seg ); //printf("%s, %d\n",seg,edgeno); add1marker2edge ( edgeno, readid ); seg = strtok ( NULL, " " ); } } fclose ( fp ); markCounter = 0; for ( i = 1; i <= num_ed; i++ ) { if ( edge_array[i].multi == 255 ) { continue; } markCounter += edge_array[i].multi; } fprintf ( stderr, "%lld marks loaded.\n", markCounter ); return 1; }
boolean loadPathBin ( char *graphfile ) { FILE *fp; char name[256]; unsigned int i, bal_ed, num1, num2; long long markCounter = 0, readid = 0; unsigned char seg, ch; unsigned int *freadBuf; sprintf ( name, "%s.markOnEdge", graphfile ); fp = fopen ( name, "r" ); if ( !fp ) { return 0; } for ( i = 1; i <= num_ed; i++ ) { edge_array[i].multi = 0; edge_array[i].markers = NULL; } for ( i = 1; i <= num_ed; i++ ) { fscanf ( fp, "%d", &num1 ); if ( EdSmallerThanTwin ( i ) ) { fscanf ( fp, "%d", &num2 ); bal_ed = getTwinEdge ( i ); if ( num1 + num2 >= 255 ) { edge_array[i].multi = 255; edge_array[bal_ed].multi = 255; } else { edge_array[i].multi = num1 + num2; edge_array[bal_ed].multi = num1 + num2; markCounter += 2 * ( num1 + num2 ); } i++; } else { if ( 2 * num1 >= 255 ) { edge_array[i].multi = 255; } else { edge_array[i].multi = 2 * num1; markCounter += 2 * num1; } } } fclose ( fp ); fprintf ( stderr, "%lld markers overall.\n", markCounter ); markersArray = ( long long * ) ckalloc ( markCounter * sizeof ( long long ) ); markCounter = 0; for ( i = 1; i <= num_ed; i++ ) { if ( edge_array[i].multi == 255 ) { continue; } edge_array[i].markers = markersArray + markCounter; markCounter += edge_array[i].multi; edge_array[i].multi = 0; } sprintf ( name, "%s.path", graphfile ); fp = fopen ( name, "rb" ); if ( !fp ) { return 0; } freadBuf = ( unsigned int * ) ckalloc ( ( maxReadLen - overlaplen + 1 ) * sizeof ( unsigned int ) ); while ( fread ( &ch, sizeof ( char ), 1, fp ) == 1 ) { //printf("%s",line); if ( fread ( freadBuf, sizeof ( unsigned int ), ch, fp ) != ch ) { break; } readid++; for ( seg = 0; seg < ch; seg++ ) { add1marker2edge ( freadBuf[seg], readid ); } } fclose ( fp ); markCounter = 0; for ( i = 1; i <= num_ed; i++ ) { if ( edge_array[i].multi == 255 ) { continue; } markCounter += edge_array[i].multi; } for ( i = 0; i <= num_ed; i++ ) { if ( edge_array[i].multi >= 2 && edge_array[i].multi != 255 ) { qsort ( edge_array[i].markers, ( int ) edge_array[i].multi, sizeof ( long long ), comp ); } } fprintf ( stderr, "%lld markers loaded.\n", markCounter ); free ( ( void * ) freadBuf ); return 1; }
void output_contig (EDGE * ed_array, unsigned int ed_num, char *outfile, int cut_len) { char temp[256]; FILE * fp, *fp_contig; int flag, count, len_c; int signI; unsigned int i, j, diff_len=0; long long sum = 0, N90, N50; unsigned int *length_array; boolean tip; sprintf (temp, "%s.contig", outfile); fp = ckopen (temp, "w"); index_array = (unsigned int *)ckalloc((ed_num+1)*sizeof(unsigned int)); unsigned int * all_length_arr = (unsigned int*) ckalloc((ed_num+1)*sizeof(unsigned int)); flag_array = (unsigned int*)ckalloc((ed_num+1)*sizeof(unsigned int)); for (i=1; i<=ed_num; ++i) { index_array[i] = ed_array[i].length; all_length_arr[i] = ed_array[i].length; } qsort(&all_length_arr[1], ed_num, sizeof(all_length_arr[0]), cmp_int); for (i=1; i<=ed_num; ++i) { for (j=i+1; j<=ed_num; ++j) { if (all_length_arr[i] != all_length_arr[j]) break; } all_length_arr[++diff_len] = all_length_arr[i]; flag_array[diff_len] = i; i = j-1; } for (i=1; i<=ed_num; ++i) { index_array[i] = uniqueLenSearch(all_length_arr, flag_array, diff_len, index_array[i]); } for (i=1; i<=ed_num; ++i) { flag_array[index_array[i]] = i; } free((void*)all_length_arr); length_array = (unsigned int *) ckalloc (ed_num * sizeof (unsigned int)); kmerSeq = (char *) ckalloc (overlaplen * sizeof (char)); count = len_c = 0; for (i = 1; i <= ed_num; i++) { if ((ed_array[i].length + overlaplen) >= len_bar) { length_array[len_c++] = ed_array[i].length + overlaplen; } if (ed_array[i].length < 1 || ed_array[i].deleted) { continue; } count++; if (EdSmallerThanTwin (i)) { i++; } } sum = 0; for (signI = len_c - 1; signI >= 0; signI--) { sum += length_array[signI]; } qsort ( length_array, len_c, sizeof ( length_array[0] ), cmp_int ); if ( len_c > 0 ) { printf ( "%d ctgs longer than %d, sum up %lldbp, with average length %lld\n", len_c, len_bar, sum, sum / len_c ); printf ( "the longest is %dbp, ", length_array[len_c - 1] ); } N50 = sum * 0.5; N90 = sum * 0.9; sum = flag = 0; for (signI = len_c - 1; signI >= 0; signI--) { sum += length_array[signI]; if (!flag && sum >= N50) { printf ("contig N50 is %d bp,", length_array[signI]); flag = 1; } if (sum >= N90) { printf ("contig N90 is %d bp\n", length_array[signI]); break; } } for (i = 1; i <= ed_num; i++) { j = flag_array[i]; if (ed_array[j].deleted || ed_array[j].length < 1) { continue; } if (ed_array[j].arcs && ed_array[getTwinEdge (j)].arcs) { tip = 0; } else { tip = 1; } output_1contig (i, &(ed_array[j]), fp, tip); if (EdSmallerThanTwin (j)) { i++; } } fclose (fp); free ((void *) kmerSeq); free ((void *) length_array); printf ("%d contigs longer than %d output\n", count, cut_len); sprintf (temp, "%s.ContigIndex", outfile); fp_contig = ckopen (temp, "w"); fprintf (fp_contig, "Edge_num %d %d\n", ed_num, count); fprintf (fp_contig, "index\tlength\treverseComplement\n"); for (i = 1; i <= num_ed; i++) { j = flag_array[i]; fprintf (fp_contig, "%d\t%d\t", i, edge_array[j].length + overlaplen); if (EdSmallerThanTwin (j)) { fprintf (fp_contig, "1\n"); i++; } else if (EdLargerThanTwin (j)) { fprintf (fp_contig, "-1\n"); } else { fprintf (fp_contig, "0\n"); } } fclose (fp_contig); }