/************************************************* Function: prlRead2HashTable Description: 1. Imports the reads from the lib file one by one. 2. Chops the reads into kmers and store them in KmerSets. 3. Removes the kmers with low coverage. 4. Marks the linear kmers. 5. Counts the kmer frequences. Input: 1. libfile : the reads config file 2. outfile : the output file prefix Output: None. Return: 1 if exits normally. *************************************************/ boolean prlRead2HashTable ( char * libfile, char * outfile ) { char * cach1; char * cach2; unsigned char asm_ctg = 1; long long i; char * next_name, name[256]; FILE * fo; time_t start_t, stop_t; int maxReadNum; int libNo; pthread_t threads[thrd_num]; unsigned char thrdSignal[thrd_num + 1]; PARAMETER paras[thrd_num]; boolean flag, pairs = 0; WORDFILTER = createFilter ( overlaplen ); maxReadLen = 0; maxNameLen = 256; scan_libInfo ( libfile ); alloc_pe_mem ( num_libs ); if ( !maxReadLen ) { maxReadLen = 100; } if ( gLineLen < maxReadLen ) { gStr = ( char * ) ckalloc ( ( maxReadLen + 1 ) * sizeof ( char ) ); } //init maxReadLen4all = maxReadLen; fprintf ( stderr, "In %s, %d lib(s), maximum read length %d, maximum name length %d.\n\n", libfile, num_libs, maxReadLen, maxNameLen ); next_name = ( char * ) ckalloc ( ( maxNameLen + 1 ) * sizeof ( char ) ); kmerBuffer = ( Kmer * ) ckalloc ( buffer_size * sizeof ( Kmer ) ); hashBanBuffer = ( ubyte8 * ) ckalloc ( buffer_size * sizeof ( ubyte8 ) ); prevcBuffer = ( char * ) ckalloc ( buffer_size * sizeof ( char ) ); nextcBuffer = ( char * ) ckalloc ( buffer_size * sizeof ( char ) ); maxReadNum = buffer_size / ( maxReadLen - overlaplen + 1 ); //printf("buffer size %d, max read len %d, max read num %d\n",buffer_size,maxReadLen,maxReadNum); int maxAIOSize = 32768; aioBuffer1 = ( char * ) ckalloc ( ( maxAIOSize ) * sizeof ( char ) ); aioBuffer2 = ( char * ) ckalloc ( ( maxAIOSize ) * sizeof ( char ) ); readBuffer1 = ( char * ) ckalloc ( ( maxAIOSize + ( maxReadLen * 4 + 1024 ) ) * sizeof ( char ) ); //(char *)ckalloc(maxAIOSize*sizeof(char)); //1024 readBuffer2 = ( char * ) ckalloc ( ( maxAIOSize + ( maxReadLen * 4 + 1024 ) ) * sizeof ( char ) ); //1024 cach1 = ( char * ) ckalloc ( ( maxReadLen * 4 + 1024 ) * sizeof ( char ) ); //1024 cach2 = ( char * ) ckalloc ( ( maxReadLen * 4 + 1024 ) * sizeof ( char ) ); //1024 memset ( cach1, '\0', ( maxReadLen * 4 + 1024 ) ); //1024 memset ( cach2, '\0', ( maxReadLen * 4 + 1024 ) ); //1024 seqBuffer = ( char ** ) ckalloc ( maxReadNum * sizeof ( char * ) ); lenBuffer = ( int * ) ckalloc ( maxReadNum * sizeof ( int ) ); indexArray = ( int * ) ckalloc ( maxReadNum * sizeof ( int ) ); for ( i = 0; i < maxReadNum; i++ ) { seqBuffer[i] = ( char * ) ckalloc ( maxReadLen * sizeof ( char ) ); } rcSeq = ( char ** ) ckalloc ( ( thrd_num + 1 ) * sizeof ( char * ) ); if ( 1 ) { kmerCounter = ( long long * ) ckalloc ( ( thrd_num + 1 ) * sizeof ( long long ) ); KmerSets = ( KmerSet ** ) ckalloc ( thrd_num * sizeof ( KmerSet * ) ); ubyte8 init_size = 1024; ubyte8 k = 0; if ( initKmerSetSize ) { #ifdef MER127 init_size = ( ubyte8 ) ( ( double ) initKmerSetSize * 1024.0f * 1024.0f * 1024.0f / ( double ) thrd_num / 40 ); #else init_size = ( ubyte8 ) ( ( double ) initKmerSetSize * 1024.0f * 1024.0f * 1024.0f / ( double ) thrd_num / 24 ); //is it true? #endif do { ++k; } while ( k * 0xFFFFFFLLU < init_size ); } for ( i = 0; i < thrd_num; i++ ) { //KmerSets[i] = init_kmerset(1024,0.77f); KmerSets[i] = init_kmerset ( ( ( initKmerSetSize ) ? ( k * 0xFFFFFFLLU ) : ( init_size ) ), 0.77f ); thrdSignal[i + 1] = 0; paras[i].threadID = i; paras[i].mainSignal = &thrdSignal[0]; paras[i].selfSignal = &thrdSignal[i + 1]; kmerCounter[i + 1] = 0; rcSeq[i + 1] = ( char * ) ckalloc ( maxReadLen * sizeof ( char ) ); } creatThrds ( threads, paras ); } thrdSignal[0] = kmerCounter[0] = 0; time ( &start_t ); kmer_c = n_solexa = read_c = i = libNo = readNumBack = gradsCounter = 0; while ( openNextFile ( &libNo, pairs, asm_ctg ) ) { //read bam file if ( lib_array[libNo].curr_type == 4 ) { int type = 0; //deside the PE reads is good or bad while ( ( flag = read1seqInLibBam ( seqBuffer[read_c], next_name, & ( lenBuffer[read_c] ), &libNo, pairs, 1, &type ) ) != 0 ) { if ( type == -1 ) //if the reads is bad, go back. { i--; if ( lenBuffer[read_c - 1] >= overlaplen + 1 ) { kmer_c -= lenBuffer[read_c - 1] - overlaplen + 1; read_c--; } n_solexa -= 2; continue; } if ( ( ++i ) % 100000000 == 0 ) { fprintf ( stderr, "--- %lldth reads.\n", i ); } if ( lenBuffer[read_c] < 0 ) { fprintf ( stderr, "Read len %d.\n", lenBuffer[read_c] ); } if ( lenBuffer[read_c] < overlaplen + 1 ) { continue; } /* if(lenBuffer[read_c]>70) lenBuffer[read_c] = 50; else if(lenBuffer[read_c]>40) lenBuffer[read_c] = 40; */ indexArray[read_c] = kmer_c; kmer_c += lenBuffer[read_c] - overlaplen + 1; read_c++; if ( read_c == maxReadNum ) { kmerCounter[0] += kmer_c; sendWorkSignal ( 2, thrdSignal ); //chopKmer4read sendWorkSignal ( 1, thrdSignal ); //singleKmer kmer_c = read_c = 0; } } } //read PE fasta or fastq else if ( lib_array[libNo].curr_type == 1 || lib_array[libNo].curr_type == 2 ) { initAIO ( &aio1, aioBuffer1, fileno ( lib_array[libNo].fp1 ), maxAIOSize ); initAIO ( &aio2, aioBuffer2, fileno ( lib_array[libNo].fp2 ), maxAIOSize ); int offset1, offset2, flag1, flag2, rt1, rt2; offset1 = offset2 = 0; rt1 = aio_read ( &aio1 ); rt2 = aio_read ( &aio2 ); flag1 = AIORead ( &aio1, &offset1, readBuffer1, cach1, &rt1, lib_array[libNo].curr_type ); flag2 = AIORead ( &aio2, &offset2, readBuffer2, cach2, &rt2, lib_array[libNo].curr_type ); if ( flag1 && flag2 ) { int start1, start2, turn; start1 = start2 = 0; turn = 1; while ( start1 < offset1 || start2 < offset2 ) { if ( turn == 1 ) { turn = 2; readseqInLib ( seqBuffer[read_c], next_name, & ( lenBuffer[read_c] ), readBuffer1, &start1, offset1, libNo ); if ( ( ++i ) % 100000000 == 0 ) { fprintf ( stderr, "--- %lldth reads.\n", i ); } if ( lenBuffer[read_c] < 0 ) { fprintf ( stderr, "Read len %d.\n", lenBuffer[read_c] ); } if ( lenBuffer[read_c] < overlaplen + 1 ) { if ( start1 >= offset1 ) { start1 = 0; offset1 = 0; flag1 = AIORead ( &aio1, &offset1, readBuffer1, cach1, &rt1, lib_array[libNo].curr_type ); } continue; } indexArray[read_c] = kmer_c; kmer_c += lenBuffer[read_c] - overlaplen + 1; read_c++; if ( start1 >= offset1 ) { start1 = 0; offset1 = 0; flag1 = AIORead ( &aio1, &offset1, readBuffer1, cach1, &rt1, lib_array[libNo].curr_type ); } if ( read_c == maxReadNum ) { kmerCounter[0] += kmer_c; sendWorkSignal ( 2, thrdSignal ); //chopKmer4read sendWorkSignal ( 1, thrdSignal ); //singleKmer kmer_c = read_c = 0; } continue; } if ( turn == 2 ) { turn = 1; readseqInLib ( seqBuffer[read_c], next_name, & ( lenBuffer[read_c] ), readBuffer2, &start2, offset2, libNo ); if ( ( ++i ) % 100000000 == 0 ) { fprintf ( stderr, "--- %lldth reads.\n", i ); } if ( lenBuffer[read_c] < 0 ) { fprintf ( stderr, "Read len %d.\n", lenBuffer[read_c] ); } if ( lenBuffer[read_c] < overlaplen + 1 ) { if ( ( flag2 == 2 ) && ( start2 >= offset2 ) ) { break; } if ( start2 >= offset2 ) { start2 = 0; offset2 = 0; flag2 = AIORead ( &aio2, &offset2, readBuffer2, cach2, &rt2, lib_array[libNo].curr_type ); } continue; } indexArray[read_c] = kmer_c; kmer_c += lenBuffer[read_c] - overlaplen + 1; read_c++; if ( ( flag2 == 2 ) && ( start2 >= offset2 ) ) { break; } if ( start2 >= offset2 ) { start2 = 0; offset2 = 0; flag2 = AIORead ( &aio2, &offset2, readBuffer2, cach2, &rt2, lib_array[libNo].curr_type ); } if ( read_c == maxReadNum ) { kmerCounter[0] += kmer_c; sendWorkSignal ( 2, thrdSignal ); //chopKmer4read sendWorkSignal ( 1, thrdSignal ); //singleKmer kmer_c = read_c = 0; } continue; } } } else { fprintf(stderr, "Error: aio_read error.\n"); } } //read single fasta, single fastq and PE fasta in one file else { initAIO ( &aio1, aioBuffer1, fileno ( lib_array[libNo].fp1 ), maxAIOSize ); int offset, flag1, rt; offset = 0; rt = aio_read ( &aio1 ); while ( ( flag1 = AIORead ( &aio1, &offset, readBuffer1, cach1, &rt, lib_array[libNo].curr_type ) ) ) { int start = 0; while ( start < offset ) { readseqInLib ( seqBuffer[read_c], next_name, & ( lenBuffer[read_c] ), readBuffer1, &start, offset, libNo ); if ( ( ++i ) % 100000000 == 0 ) { fprintf ( stderr, "--- %lldth reads.\n", i ); } if ( lenBuffer[read_c] < 0 ) { fprintf ( stderr, "Read len %d.\n", lenBuffer[read_c] ); } if ( lenBuffer[read_c] < overlaplen + 1 ) { continue; } indexArray[read_c] = kmer_c; kmer_c += lenBuffer[read_c] - overlaplen + 1; read_c++; } if ( read_c > maxReadNum - 1024 ) { kmerCounter[0] += kmer_c; sendWorkSignal ( 2, thrdSignal ); //chopKmer4read sendWorkSignal ( 1, thrdSignal ); //singleKmer kmer_c = read_c = 0; } if ( flag1 == 2 ) { break; } } } } if ( read_c ) { kmerCounter[0] += kmer_c; sendWorkSignal ( 2, thrdSignal ); //chopKmer4read sendWorkSignal ( 1, thrdSignal ); //singleKmer } time ( &stop_t ); fprintf ( stderr, "Time spent on hashing reads: %ds, %lld read(s) processed.\n", ( int ) ( stop_t - start_t ), i ); //record insert size info if ( pairs ) { if ( gradsCounter ) { fprintf ( stderr, "%d pe insert size, the largest boundary is %lld.\n\n", gradsCounter, pes[gradsCounter - 1].PE_bound ); } else { fprintf ( stderr, "No paired reads found.\n" ); } sprintf ( name, "%s.peGrads", outfile ); fo = ckopen ( name, "w" ); fprintf ( fo, "grads&num: %d\t%lld\n", gradsCounter, n_solexa ); for ( i = 0; i < gradsCounter; i++ ) { fprintf ( fo, "%d\t%lld\t%d\n", pes[i].insertS, pes[i].PE_bound, pes[i].rank ); } fclose ( fo ); } free_pe_mem (); free_libs (); if ( 1 ) { unsigned long long alloCounter = 0; unsigned long long allKmerCounter = 0; for ( i = 0; i < thrd_num; i++ ) { alloCounter += count_kmerset ( ( KmerSets[i] ) ); allKmerCounter += kmerCounter[i + 1]; free ( ( void * ) rcSeq[i + 1] ); } fprintf ( stderr, "%lli node(s) allocated, %lli kmer(s) in reads, %lli kmer(s) processed.\n", alloCounter, kmerCounter[0], allKmerCounter ); } free ( ( void * ) rcSeq ); free ( ( void * ) kmerCounter ); for ( i = 0; i < maxReadNum; i++ ) { free ( ( void * ) seqBuffer[i] ); } free ( ( void * ) seqBuffer ); free ( ( void * ) lenBuffer ); free ( ( void * ) indexArray ); free ( ( void * ) kmerBuffer ); free ( ( void * ) hashBanBuffer ); free ( ( void * ) nextcBuffer ); free ( ( void * ) prevcBuffer ); free ( ( void * ) next_name ); free ( ( void * ) aioBuffer1 ); free ( ( void * ) aioBuffer2 ); free ( ( void * ) readBuffer1 ); free ( ( void * ) readBuffer2 ); free ( ( void * ) cach1 ); free ( ( void * ) cach2 ); fprintf ( stderr, "done hashing nodes\n" ); if ( deLowKmer ) { time ( &start_t ); deLowCov ( thrdSignal ); time ( &stop_t ); fprintf ( stderr, "Time spent on delowcvgNode: %ds.\n", ( int ) ( stop_t - start_t ) ); } time ( &start_t ); Mark1in1outNode ( thrdSignal ); freqStat ( outfile ); time ( &stop_t ); fprintf ( stderr, "Time spent on marking linear nodes: %ds.\n", ( int ) ( stop_t - start_t ) ); sendWorkSignal ( 3, thrdSignal ); //exit thread_wait ( threads ); return 1; }
void prlRead2edge (char *libfile, char *outfile) { char *cach1; char *cach2; unsigned char asm_ctg = 1; long long i; char name[256], *src_name, *next_name; FILE *outfp = NULL; int maxReadNum, libNo; boolean flag, pairs = 0; pthread_t threads[thrd_num]; unsigned char thrdSignal[thrd_num + 1]; PARAMETER paras[thrd_num]; maxReadLen = 0; maxNameLen = 256; scan_libInfo (libfile); alloc_pe_mem (num_libs); if (!maxReadLen) { maxReadLen = 100; } maxReadLen4all = maxReadLen; printf ("In file: %s, max seq len %d, max name len %d\n\n", libfile, maxReadLen, maxNameLen); if (repsTie) { sprintf (name, "%s.path", outfile); outfp = ckopen (name, "wb"); } src_name = (char *) ckalloc ((maxNameLen + 1) * sizeof (char)); next_name = (char *) ckalloc ((10*maxNameLen + 1) * sizeof (char)); kmerBuffer = (Kmer *) ckalloc (buffer_size * sizeof (Kmer)); mixBuffer = (Kmer *) ckalloc (buffer_size * sizeof (Kmer)); hashBanBuffer = (ubyte8 *) ckalloc (buffer_size * sizeof (ubyte8)); nodeBuffer = (kmer_t **) ckalloc (buffer_size * sizeof (kmer_t *)); smallerBuffer = (boolean *) ckalloc (buffer_size * sizeof (boolean)); flagArray = (boolean *) ckalloc (buffer_size * sizeof (boolean)); maxReadNum = buffer_size / (maxReadLen - overlaplen + 1); //printf("buffer for at most %d reads\n",maxReadNum); int maxAIOSize = 32768;/* aioBuffer1 = (char *) ckalloc ((maxAIOSize) * sizeof (char)); aioBuffer2 = (char *) ckalloc ((maxAIOSize) * sizeof (char)); readBuffer1 = (char *) ckalloc ((maxAIOSize + 1024) * sizeof (char)); //(char *)ckalloc(maxAIOSize*sizeof(char)); readBuffer2 = (char *) ckalloc ((maxAIOSize + 1024) * sizeof (char)); cach1 = (char *) ckalloc (1024 * sizeof (char)); cach2 = (char *) ckalloc (1024 * sizeof (char)); memset(cach1,'\0',1024); memset(cach2,'\0',1024);*/ aioBuffer1 = (char *) ckalloc ((maxAIOSize) * sizeof (char)); aioBuffer2 = (char *) ckalloc ((maxAIOSize) * sizeof (char)); readBuffer1 = (char *) ckalloc ((maxAIOSize + (maxReadLen+1024)) * sizeof (char)); //(char *)ckalloc(maxAIOSize*sizeof(char)); //1024 readBuffer2 = (char *) ckalloc ((maxAIOSize + (maxReadLen+1024)) * sizeof (char)); //1024 cach1 = (char *) ckalloc ((maxReadLen+1024) * sizeof (char)); //1024 cach2 = (char *) ckalloc ((maxReadLen+1024) * sizeof (char)); //1024 memset(cach1,'\0',(maxReadLen+1024)); //1024 memset(cach2,'\0',(maxReadLen+1024)); //1024 seqBuffer = (char **) ckalloc (maxReadNum * sizeof (char *)); lenBuffer = (int *) ckalloc (maxReadNum * sizeof (int)); indexArray = (int *) ckalloc ((maxReadNum + 1) * sizeof (int)); for (i = 0; i < maxReadNum; i++) { seqBuffer[i] = (char *) ckalloc (maxReadLen * sizeof (char)); } memoAlloc4preArc (); flags = (char **) ckalloc ((thrd_num + 1) * sizeof (char *)); deletion = (int *) ckalloc ((thrd_num + 1) * sizeof (int)); rcSeq = (char **) ckalloc ((thrd_num + 1) * sizeof (char *)); if (repsTie) { markerOnEdge = (unsigned char *) ckalloc ((num_ed + 1) * sizeof (unsigned char)); for (i = 1; i <= num_ed; i++) { markerOnEdge[i] = 0; } fwriteBuf = (unsigned int *) ckalloc ((maxReadLen - overlaplen + 1) * sizeof (unsigned int)); } thrdSignal[0] = 0; if (1) { preArc_mem_managers = (MEM_MANAGER **) ckalloc (thrd_num * sizeof (MEM_MANAGER *)); arcCounters = (unsigned int *) ckalloc (thrd_num * sizeof (unsigned int)); for (i = 0; i < thrd_num; i++) { arcCounters[i] = 0; preArc_mem_managers[i] = createMem_manager (preARCBLOCKSIZE, sizeof (preARC)); deletion[i + 1] = 0; flags[i + 1] = (char *) ckalloc (2 * maxReadLen * sizeof (char)); rcSeq[i + 1] = (char *) ckalloc (maxReadLen * sizeof (char)); thrdSignal[i + 1] = 0; paras[i].threadID = i; paras[i].mainSignal = &thrdSignal[0]; paras[i].selfSignal = &thrdSignal[i + 1]; } creatThrds (threads, paras); } if (1) { deletion[0] = 0; flags[0] = (char *) ckalloc (2 * maxReadLen * sizeof (char)); rcSeq[0] = (char *) ckalloc (maxReadLen * sizeof (char)); } kmer_c = n_solexa = read_c = i = libNo = readNumBack = gradsCounter = 0; int t0, t1, t2, t3, t4, t5, t6; t0 = t1 = t2 = t3 = t4 = t5 = t6 = 0; time_t read_start, read_end, time_bef, time_aft; time (&read_start); while (openNextFile (&libNo, pairs, asm_ctg)) { if (lib_array[libNo].curr_type == 4) { int type = 0; //deside the PE reads is good or bad while ((flag = read1seqInLibBam (seqBuffer[read_c], next_name, &(lenBuffer[read_c]), &libNo, pairs, 1, &type)) != 0) { if (type == -1) //if the reads is bad, go back. { i--; if (lenBuffer[read_c - 1] >= overlaplen + 1) { kmer_c -= lenBuffer[read_c - 1] - overlaplen + 1; read_c--; } n_solexa -= 2; continue; } if ((++i) % 1000000 == 0) { printf ("--- %lldth reads\n", i); } if (lenBuffer[read_c] < overlaplen + 1) { continue; } //if(lenBuffer[read_c]>70) // lenBuffer[read_c] = 70; //else if(lenBuffer[read_c]>40) // lenBuffer[read_c] = 40; indexArray[read_c] = kmer_c; kmer_c += lenBuffer[read_c] - overlaplen + 1; read_c++; if (read_c == maxReadNum) { indexArray[read_c] = kmer_c; time (&read_end); t0 += read_end - read_start; time (&time_bef); sendWorkSignal (2, thrdSignal); time (&time_aft); t1 += time_aft - time_bef; time (&time_bef); sendWorkSignal (1, thrdSignal); time (&time_aft); t2 += time_aft - time_bef; time (&time_bef); sendWorkSignal (3, thrdSignal); time (&time_aft); t3 += time_aft - time_bef; time (&time_bef); sendWorkSignal (4, thrdSignal); time (&time_aft); t4 += time_aft - time_bef; time (&time_bef); sendWorkSignal (6, thrdSignal); time (&time_aft); t5 += time_aft - time_bef; time (&time_bef); //recordPreArc(); if (repsTie) { recordPathBin (outfp); } time (&time_aft); t6 += time_aft - time_bef; //output_path(read_c,edge_no,flags,outfp); kmer_c = 0; read_c = 0; time (&read_start); } } } else if (lib_array[libNo].curr_type == 1 || lib_array[libNo].curr_type == 2) { initAIO (&aio1, aioBuffer1, fileno (lib_array[libNo].fp1), maxAIOSize); initAIO (&aio2, aioBuffer2, fileno (lib_array[libNo].fp2), maxAIOSize); int offset1, offset2, flag1, flag2, rt1, rt2; offset1 = offset2 = 0; rt1 = aio_read (&aio1); rt2 = aio_read (&aio2); flag1 = AIORead (&aio1, &offset1, readBuffer1, cach1, &rt1, lib_array[libNo].curr_type); flag2 = AIORead (&aio2, &offset2, readBuffer2, cach2, &rt2, lib_array[libNo].curr_type); if(flag1 && flag2) { int start1, start2, turn; start1 = start2 = 0; turn = 1; while (start1 < offset1 || start2 < offset2) { if (turn == 1) { turn = 2; readseqInLib (seqBuffer[read_c], next_name, &(lenBuffer[read_c]), readBuffer1, &start1, offset1, libNo); if ((++i) % 1000000 == 0) printf ("--- %lldth reads\n", i); /* if (lenBuffer[read_c] < overlaplen + 1) continue;*/ if (lenBuffer[read_c] < overlaplen + 1) { if(start1>=offset1) { start1=0; flag1=AIORead (&aio1, &offset1, readBuffer1, cach1, &rt1, lib_array[libNo].curr_type); } continue; } indexArray[read_c] = kmer_c; kmer_c += lenBuffer[read_c] - overlaplen + 1; read_c++; if(start1>=offset1){ start1=0; flag1=AIORead (&aio1, &offset1, readBuffer1, cach1, &rt1, lib_array[libNo].curr_type); } if (read_c == maxReadNum) { indexArray[read_c] = kmer_c; time (&read_end); t0 += read_end - read_start; time (&time_bef); sendWorkSignal (2, thrdSignal); time (&time_aft); t1 += time_aft - time_bef; time (&time_bef); sendWorkSignal (1, thrdSignal); time (&time_aft); t2 += time_aft - time_bef; time (&time_bef); sendWorkSignal (3, thrdSignal); time (&time_aft); t3 += time_aft - time_bef; time (&time_bef); sendWorkSignal (4, thrdSignal); time (&time_aft); t4 += time_aft - time_bef; time (&time_bef); sendWorkSignal (6, thrdSignal); time (&time_aft); t5 += time_aft - time_bef; time (&time_bef); //recordPreArc(); if (repsTie) recordPathBin (outfp); time (&time_aft); t6 += time_aft - time_bef; //output_path(read_c,edge_no,flags,outfp); kmer_c = 0; read_c = 0; time (&read_start); } continue; } if (turn == 2) { turn = 1; readseqInLib (seqBuffer[read_c], next_name, &(lenBuffer[read_c]), readBuffer2, &start2, offset2, libNo); if ((++i) % 1000000 == 0) printf ("--- %lldth reads\n", i); /* if (lenBuffer[read_c] < overlaplen + 1) continue;*/ if (lenBuffer[read_c] < overlaplen + 1) { if((flag2 == 2) && (start2 >= offset2)) break; if(start2 >= offset2) { start2=0; flag2 = AIORead (&aio2, &offset2, readBuffer2, cach2, &rt2, lib_array[libNo].curr_type); } continue; } indexArray[read_c] = kmer_c; kmer_c += lenBuffer[read_c] - overlaplen + 1; read_c++; if((flag2 == 2) && (start2 >= offset2)) break; if(start2 >= offset2){ start2=0; flag2 = AIORead (&aio2, &offset2, readBuffer2, cach2, &rt2, lib_array[libNo].curr_type); } if (read_c == maxReadNum){ indexArray[read_c] = kmer_c; time (&read_end); t0 += read_end - read_start; time (&time_bef); sendWorkSignal (2, thrdSignal); time (&time_aft); t1 += time_aft - time_bef; time (&time_bef); sendWorkSignal (1, thrdSignal); time (&time_aft); t2 += time_aft - time_bef; time (&time_bef); sendWorkSignal (3, thrdSignal); time (&time_aft); t3 += time_aft - time_bef; time (&time_bef); sendWorkSignal (4, thrdSignal); time (&time_aft); t4 += time_aft - time_bef; time (&time_bef); sendWorkSignal (6, thrdSignal); time (&time_aft); t5 += time_aft - time_bef; time (&time_bef); //recordPreArc(); if (repsTie) recordPathBin (outfp); time (&time_aft); t6 += time_aft - time_bef; //output_path(read_c,edge_no,flags,outfp); kmer_c = 0; read_c = 0; time (&read_start); } continue; } } } } else { initAIO (&aio1, aioBuffer1, fileno (lib_array[libNo].fp1), maxAIOSize); int offset, flag1, rt; offset = 0; rt = aio_read (&aio1); while ((flag1 = AIORead (&aio1, &offset, readBuffer1, cach1, &rt, lib_array[libNo].curr_type))) { int start = 0; while (start < offset) { readseqInLib (seqBuffer[read_c], next_name, &(lenBuffer[read_c]), readBuffer1, &start, offset, libNo); if ((++i) % 1000000 == 0) printf ("--- %lld reads\n", i); if (lenBuffer[read_c] < overlaplen + 1) continue; indexArray[read_c] = kmer_c; kmer_c += lenBuffer[read_c] - overlaplen + 1; read_c++; if (read_c > maxReadNum - 1024) { indexArray[read_c] = kmer_c; time (&read_end); t0 += read_end - read_start; time (&time_bef); sendWorkSignal (2, thrdSignal); time (&time_aft); t1 += time_aft - time_bef; time (&time_bef); sendWorkSignal (1, thrdSignal); time (&time_aft); t2 += time_aft - time_bef; time (&time_bef); sendWorkSignal (3, thrdSignal); time (&time_aft); t3 += time_aft - time_bef; time (&time_bef); sendWorkSignal (4, thrdSignal); time (&time_aft); t4 += time_aft - time_bef; time (&time_bef); sendWorkSignal (6, thrdSignal); time (&time_aft); t5 += time_aft - time_bef; time (&time_bef); //recordPreArc(); if (repsTie) recordPathBin (outfp); time (&time_aft); t6 += time_aft - time_bef; //output_path(read_c,edge_no,flags,outfp); kmer_c = 0; read_c = 0; time (&read_start); } } if (flag1 == 2) break; } } } printf ("%lld reads processed\n", i); printf ("time %d,%d,%d,%d,%d,%d,%d\n", t0, t1, t2, t3, t4, t5, t6); if (read_c) { indexArray[read_c] = kmer_c; sendWorkSignal (2, thrdSignal); sendWorkSignal (1, thrdSignal); sendWorkSignal (3, thrdSignal); sendWorkSignal (4, thrdSignal); sendWorkSignal (6, thrdSignal); //recordPreArc(); if (repsTie) { recordPathBin (outfp); } } printf ("%lld markers outputed\n", markCounter); sendWorkSignal (5, thrdSignal); thread_wait (threads); output_arcs (outfile); memoFree4preArc (); if (1) // multi-threads { arcCounter = 0; for (i = 0; i < thrd_num; i++) { arcCounter += arcCounters[i]; free ((void *) flags[i + 1]); deletion[0] += deletion[i + 1]; free ((void *) rcSeq[i + 1]); } } if (1) { free ((void *) flags[0]); free ((void *) rcSeq[0]); } printf ("done mapping reads, %d reads deleted, %lld arcs created\n", deletion[0], arcCounter); if (repsTie) { free ((void *) markerOnEdge); free ((void *) fwriteBuf); } free ((void *) arcCounters); free ((void *) rcSeq); for (i = 0; i < maxReadNum; i++) { free ((void *) seqBuffer[i]); } free ((void *) seqBuffer); free ((void *) lenBuffer); free ((void *) indexArray); free ((void *) flags); free ((void *) deletion); free ((void *) kmerBuffer); free ((void *) mixBuffer); free ((void *) smallerBuffer); free ((void *) flagArray); free ((void *) hashBanBuffer); free ((void *) nodeBuffer); free ((void *) src_name); free ((void *) next_name); free ((void *) aioBuffer1); free ((void *) aioBuffer2); free ((void *) readBuffer1); free ((void *) readBuffer2); free ((void *) cach1); free ((void *) cach2); if (repsTie) { fclose (outfp); } free_pe_mem (); free_libs (); }