Esempio n. 1
0
/*************************************************
Function:
    prlRead2HashTable
Description:
    1. Imports the reads from the lib file one by one.
    2. Chops the reads into kmers and store them in KmerSets.
    3. Removes the kmers with low coverage.
    4. Marks the linear kmers.
    5. Counts the kmer frequences.
Input:
    1. libfile :            the reads config file
    2. outfile :        the output file prefix
Output:
    None.
Return:
    1 if exits normally.
*************************************************/
boolean prlRead2HashTable ( char * libfile, char * outfile )
{
	char * cach1;
	char * cach2;
	unsigned char asm_ctg = 1;
	long long i;
	char * next_name, name[256];
	FILE * fo;
	time_t start_t, stop_t;
	int maxReadNum;
	int libNo;
	pthread_t threads[thrd_num];
	unsigned char thrdSignal[thrd_num + 1];
	PARAMETER paras[thrd_num];
	boolean flag, pairs = 0;
	WORDFILTER = createFilter ( overlaplen );
	maxReadLen = 0;
	maxNameLen = 256;
	scan_libInfo ( libfile );
	alloc_pe_mem ( num_libs );

	if ( !maxReadLen )
	{
		maxReadLen = 100;
	}

	if ( gLineLen < maxReadLen )
	{
		gStr = ( char * ) ckalloc ( ( maxReadLen + 1 ) * sizeof ( char ) );
	}

	//init
	maxReadLen4all = maxReadLen;
	fprintf ( stderr, "In %s, %d lib(s), maximum read length %d, maximum name length %d.\n\n", libfile, num_libs, maxReadLen, maxNameLen );
	next_name = ( char * ) ckalloc ( ( maxNameLen + 1 ) * sizeof ( char ) );
	kmerBuffer = ( Kmer * ) ckalloc ( buffer_size * sizeof ( Kmer ) );
	hashBanBuffer = ( ubyte8 * ) ckalloc ( buffer_size * sizeof ( ubyte8 ) );
	prevcBuffer = ( char * ) ckalloc ( buffer_size * sizeof ( char ) );
	nextcBuffer = ( char * ) ckalloc ( buffer_size * sizeof ( char ) );
	maxReadNum = buffer_size / ( maxReadLen - overlaplen + 1 );
	//printf("buffer size %d, max read len %d, max read num %d\n",buffer_size,maxReadLen,maxReadNum);
	int maxAIOSize = 32768;
	aioBuffer1 = ( char * ) ckalloc ( ( maxAIOSize ) * sizeof ( char ) );
	aioBuffer2 = ( char * ) ckalloc ( ( maxAIOSize ) * sizeof ( char ) );
	readBuffer1 = ( char * ) ckalloc ( ( maxAIOSize + ( maxReadLen * 4 + 1024 ) ) * sizeof ( char ) ); //(char *)ckalloc(maxAIOSize*sizeof(char)); //1024
	readBuffer2 = ( char * ) ckalloc ( ( maxAIOSize + ( maxReadLen * 4 + 1024 ) ) * sizeof ( char ) ); //1024
	cach1 = ( char * ) ckalloc ( ( maxReadLen * 4 + 1024 ) * sizeof ( char ) ); //1024
	cach2 = ( char * ) ckalloc ( ( maxReadLen * 4 + 1024 ) * sizeof ( char ) ); //1024
	memset ( cach1, '\0', ( maxReadLen * 4 + 1024 ) ); //1024
	memset ( cach2, '\0', ( maxReadLen * 4 + 1024 ) ); //1024
	seqBuffer = ( char ** ) ckalloc ( maxReadNum * sizeof ( char * ) );
	lenBuffer = ( int * ) ckalloc ( maxReadNum * sizeof ( int ) );
	indexArray = ( int * ) ckalloc ( maxReadNum * sizeof ( int ) );

	for ( i = 0; i < maxReadNum; i++ )
	{
		seqBuffer[i] = ( char * ) ckalloc ( maxReadLen * sizeof ( char ) );
	}

	rcSeq = ( char ** ) ckalloc ( ( thrd_num + 1 ) * sizeof ( char * ) );

	if ( 1 )
	{
		kmerCounter = ( long long * ) ckalloc ( ( thrd_num + 1 ) * sizeof ( long long ) );
		KmerSets = ( KmerSet ** ) ckalloc ( thrd_num * sizeof ( KmerSet * ) );
		ubyte8 init_size = 1024;
		ubyte8 k = 0;

		if ( initKmerSetSize )
		{
#ifdef MER127
			init_size = ( ubyte8 ) ( ( double ) initKmerSetSize * 1024.0f * 1024.0f * 1024.0f / ( double ) thrd_num / 40 );
#else
			init_size = ( ubyte8 ) ( ( double ) initKmerSetSize * 1024.0f * 1024.0f * 1024.0f / ( double ) thrd_num / 24 ); //is it true?
#endif

			do
			{
				++k;
			}
			while ( k * 0xFFFFFFLLU < init_size );
		}

		for ( i = 0; i < thrd_num; i++ )
		{
			//KmerSets[i] = init_kmerset(1024,0.77f);
			KmerSets[i] = init_kmerset ( ( ( initKmerSetSize ) ? ( k * 0xFFFFFFLLU ) : ( init_size ) ), 0.77f );
			thrdSignal[i + 1] = 0;
			paras[i].threadID = i;
			paras[i].mainSignal = &thrdSignal[0];
			paras[i].selfSignal = &thrdSignal[i + 1];
			kmerCounter[i + 1] = 0;
			rcSeq[i + 1] = ( char * ) ckalloc ( maxReadLen * sizeof ( char ) );
		}

		creatThrds ( threads, paras );
	}

	thrdSignal[0] = kmerCounter[0] = 0;
	time ( &start_t );
	kmer_c = n_solexa = read_c = i = libNo = readNumBack = gradsCounter = 0;

	while ( openNextFile ( &libNo, pairs, asm_ctg ) )
	{
		//read bam file
		if ( lib_array[libNo].curr_type == 4 )
		{
			int type = 0;   //deside the PE reads is good or bad

			while ( ( flag = read1seqInLibBam ( seqBuffer[read_c], next_name, & ( lenBuffer[read_c] ), &libNo, pairs, 1, &type ) ) != 0 )
			{
				if ( type == -1 ) //if the reads is bad, go back.
				{
					i--;

					if ( lenBuffer[read_c - 1] >= overlaplen + 1 )
					{
						kmer_c -= lenBuffer[read_c - 1] - overlaplen + 1;
						read_c--;
					}

					n_solexa -= 2;
					continue;
				}

				if ( ( ++i ) % 100000000 == 0 )
					{ fprintf ( stderr, "--- %lldth reads.\n", i ); }

				if ( lenBuffer[read_c] < 0 )
					{ fprintf ( stderr, "Read len %d.\n", lenBuffer[read_c] ); }

				if ( lenBuffer[read_c] < overlaplen + 1 )
					{ continue; }

				/*
				   if(lenBuffer[read_c]>70)
				   lenBuffer[read_c] = 50;
				   else if(lenBuffer[read_c]>40)
				   lenBuffer[read_c] = 40;
				 */
				indexArray[read_c] = kmer_c;
				kmer_c += lenBuffer[read_c] - overlaplen + 1;
				read_c++;

				if ( read_c == maxReadNum )
				{
					kmerCounter[0] += kmer_c;
					sendWorkSignal ( 2, thrdSignal ); //chopKmer4read
					sendWorkSignal ( 1, thrdSignal ); //singleKmer
					kmer_c = read_c = 0;
				}
			}
		}
		//read PE fasta or fastq
		else if ( lib_array[libNo].curr_type == 1 || lib_array[libNo].curr_type == 2 )
		{
			initAIO ( &aio1, aioBuffer1, fileno ( lib_array[libNo].fp1 ), maxAIOSize );
			initAIO ( &aio2, aioBuffer2, fileno ( lib_array[libNo].fp2 ), maxAIOSize );
			int offset1, offset2, flag1, flag2, rt1, rt2;
			offset1 = offset2 = 0;
			rt1 = aio_read ( &aio1 );
			rt2 = aio_read ( &aio2 );
			flag1 = AIORead ( &aio1, &offset1, readBuffer1, cach1, &rt1, lib_array[libNo].curr_type );
			flag2 = AIORead ( &aio2, &offset2, readBuffer2, cach2, &rt2, lib_array[libNo].curr_type );

			if ( flag1 && flag2 )
			{
				int start1, start2, turn;
				start1 = start2 = 0;
				turn = 1;

				while ( start1 < offset1 || start2 < offset2 )
				{
					if ( turn == 1 )
					{
						turn = 2;
						readseqInLib ( seqBuffer[read_c], next_name, & ( lenBuffer[read_c] ), readBuffer1, &start1, offset1, libNo );

						if ( ( ++i ) % 100000000 == 0 )
							{ fprintf ( stderr, "--- %lldth reads.\n", i ); }

						if ( lenBuffer[read_c] < 0 )
							{ fprintf ( stderr, "Read len %d.\n", lenBuffer[read_c] ); }

						if ( lenBuffer[read_c] < overlaplen + 1 )
						{
							if ( start1 >= offset1 )
							{
								start1 = 0;
								offset1 = 0;
								flag1 = AIORead ( &aio1, &offset1, readBuffer1, cach1, &rt1, lib_array[libNo].curr_type );
							}

							continue;
						}

						indexArray[read_c] = kmer_c;
						kmer_c += lenBuffer[read_c] - overlaplen + 1;
						read_c++;

						if ( start1 >= offset1 )
						{
							start1 = 0;
							offset1 = 0;
							flag1 = AIORead ( &aio1, &offset1, readBuffer1, cach1, &rt1, lib_array[libNo].curr_type );
						}

						if ( read_c == maxReadNum )
						{
							kmerCounter[0] += kmer_c;
							sendWorkSignal ( 2, thrdSignal );   //chopKmer4read
							sendWorkSignal ( 1, thrdSignal );   //singleKmer
							kmer_c = read_c = 0;
						}

						continue;
					}

					if ( turn == 2 )
					{
						turn = 1;
						readseqInLib ( seqBuffer[read_c], next_name, & ( lenBuffer[read_c] ), readBuffer2, &start2, offset2, libNo );

						if ( ( ++i ) % 100000000 == 0 )
							{ fprintf ( stderr, "--- %lldth reads.\n", i ); }

						if ( lenBuffer[read_c] < 0 )
							{ fprintf ( stderr, "Read len %d.\n", lenBuffer[read_c] ); }

						if ( lenBuffer[read_c] < overlaplen + 1 )
						{
							if ( ( flag2 == 2 ) && ( start2 >= offset2 ) )
								{ break; }

							if ( start2 >= offset2 )
							{
								start2 = 0;
								offset2 = 0;
								flag2 = AIORead ( &aio2, &offset2, readBuffer2, cach2, &rt2, lib_array[libNo].curr_type );
							}

							continue;
						}

						indexArray[read_c] = kmer_c;
						kmer_c += lenBuffer[read_c] - overlaplen + 1;
						read_c++;

						if ( ( flag2 == 2 ) && ( start2 >= offset2 ) )
							{ break; }

						if ( start2 >= offset2 )
						{
							start2 = 0;
							offset2 = 0;
							flag2 = AIORead ( &aio2, &offset2, readBuffer2, cach2, &rt2, lib_array[libNo].curr_type );
						}

						if ( read_c == maxReadNum )
						{
							kmerCounter[0] += kmer_c;
							sendWorkSignal ( 2, thrdSignal );   //chopKmer4read
							sendWorkSignal ( 1, thrdSignal );   //singleKmer
							kmer_c = read_c = 0;
						}

						continue;
					}
				}
			}
			else
			{
				fprintf(stderr, "Error: aio_read error.\n");
			}
		}
		//read single fasta, single fastq and PE fasta in one file
		else
		{
			initAIO ( &aio1, aioBuffer1, fileno ( lib_array[libNo].fp1 ), maxAIOSize );
			int offset, flag1, rt;
			offset = 0;
			rt = aio_read ( &aio1 );

			while ( ( flag1 = AIORead ( &aio1, &offset, readBuffer1, cach1, &rt, lib_array[libNo].curr_type ) ) )
			{
				int start = 0;

				while ( start < offset )
				{
					readseqInLib ( seqBuffer[read_c], next_name, & ( lenBuffer[read_c] ), readBuffer1, &start, offset, libNo );

					if ( ( ++i ) % 100000000 == 0 )
						{ fprintf ( stderr, "--- %lldth reads.\n", i ); }

					if ( lenBuffer[read_c] < 0 )
						{ fprintf ( stderr, "Read len %d.\n", lenBuffer[read_c] ); }

					if ( lenBuffer[read_c] < overlaplen + 1 )
						{ continue; }

					indexArray[read_c] = kmer_c;
					kmer_c += lenBuffer[read_c] - overlaplen + 1;
					read_c++;
				}

				if ( read_c > maxReadNum - 1024 )
				{
					kmerCounter[0] += kmer_c;
					sendWorkSignal ( 2, thrdSignal );   //chopKmer4read
					sendWorkSignal ( 1, thrdSignal );   //singleKmer
					kmer_c = read_c = 0;
				}

				if ( flag1 == 2 )
					{ break; }
			}
		}
	}

	if ( read_c )
	{
		kmerCounter[0] += kmer_c;
		sendWorkSignal ( 2, thrdSignal );   //chopKmer4read
		sendWorkSignal ( 1, thrdSignal );   //singleKmer
	}

	time ( &stop_t );
	fprintf ( stderr, "Time spent on hashing reads: %ds, %lld read(s) processed.\n", ( int ) ( stop_t - start_t ), i );

	//record insert size info
	if ( pairs )
	{
		if ( gradsCounter )
			{ fprintf ( stderr, "%d pe insert size, the largest boundary is %lld.\n\n", gradsCounter, pes[gradsCounter - 1].PE_bound ); }
		else
		{
			fprintf ( stderr, "No paired reads found.\n" );
		}

		sprintf ( name, "%s.peGrads", outfile );
		fo = ckopen ( name, "w" );
		fprintf ( fo, "grads&num: %d\t%lld\n", gradsCounter, n_solexa );

		for ( i = 0; i < gradsCounter; i++ )
		{
			fprintf ( fo, "%d\t%lld\t%d\n", pes[i].insertS, pes[i].PE_bound, pes[i].rank );
		}

		fclose ( fo );
	}

	free_pe_mem ();
	free_libs ();

	if ( 1 )
	{
		unsigned long long alloCounter = 0;
		unsigned long long allKmerCounter = 0;

		for ( i = 0; i < thrd_num; i++ )
		{
			alloCounter += count_kmerset ( ( KmerSets[i] ) );
			allKmerCounter += kmerCounter[i + 1];
			free ( ( void * ) rcSeq[i + 1] );
		}

		fprintf ( stderr, "%lli node(s) allocated, %lli kmer(s) in reads, %lli kmer(s) processed.\n", alloCounter, kmerCounter[0], allKmerCounter );
	}

	free ( ( void * ) rcSeq );
	free ( ( void * ) kmerCounter );

	for ( i = 0; i < maxReadNum; i++ )
	{
		free ( ( void * ) seqBuffer[i] );
	}

	free ( ( void * ) seqBuffer );
	free ( ( void * ) lenBuffer );
	free ( ( void * ) indexArray );
	free ( ( void * ) kmerBuffer );
	free ( ( void * ) hashBanBuffer );
	free ( ( void * ) nextcBuffer );
	free ( ( void * ) prevcBuffer );
	free ( ( void * ) next_name );
	free ( ( void * ) aioBuffer1 );
	free ( ( void * ) aioBuffer2 );
	free ( ( void * ) readBuffer1 );
	free ( ( void * ) readBuffer2 );
	free ( ( void * ) cach1 );
	free ( ( void * ) cach2 );
	fprintf ( stderr, "done hashing nodes\n" );

	if ( deLowKmer )
	{
		time ( &start_t );
		deLowCov ( thrdSignal );
		time ( &stop_t );
		fprintf ( stderr, "Time spent on delowcvgNode: %ds.\n", ( int ) ( stop_t - start_t ) );
	}

	time ( &start_t );
	Mark1in1outNode ( thrdSignal );
	freqStat ( outfile );
	time ( &stop_t );
	fprintf ( stderr, "Time spent on marking linear nodes: %ds.\n", ( int ) ( stop_t - start_t ) );
	sendWorkSignal ( 3, thrdSignal );   //exit
	thread_wait ( threads );
	return 1;
}
Esempio n. 2
0
void prlRead2edge (char *libfile, char *outfile)
{
	char *cach1;
	char *cach2;
	unsigned char asm_ctg = 1;

	long long i;
	char name[256], *src_name, *next_name;
	FILE *outfp = NULL;
	int maxReadNum, libNo;
	boolean flag, pairs = 0;
	pthread_t threads[thrd_num];
	unsigned char thrdSignal[thrd_num + 1];
	PARAMETER paras[thrd_num];

	maxReadLen = 0;
	maxNameLen = 256;
	scan_libInfo (libfile);
	alloc_pe_mem (num_libs);

	if (!maxReadLen)
	{
		maxReadLen = 100;
	}

	maxReadLen4all = maxReadLen;
	printf ("In file: %s, max seq len %d, max name len %d\n\n", libfile, maxReadLen, maxNameLen);

	if (repsTie)
	{
		sprintf (name, "%s.path", outfile);
		outfp = ckopen (name, "wb");
	}

	src_name = (char *) ckalloc ((maxNameLen + 1) * sizeof (char));
	next_name = (char *) ckalloc ((10*maxNameLen + 1) * sizeof (char));
	kmerBuffer = (Kmer *) ckalloc (buffer_size * sizeof (Kmer));
	mixBuffer = (Kmer *) ckalloc (buffer_size * sizeof (Kmer));
	hashBanBuffer = (ubyte8 *) ckalloc (buffer_size * sizeof (ubyte8));
	nodeBuffer = (kmer_t **) ckalloc (buffer_size * sizeof (kmer_t *));
	smallerBuffer = (boolean *) ckalloc (buffer_size * sizeof (boolean));
	flagArray = (boolean *) ckalloc (buffer_size * sizeof (boolean));
	maxReadNum = buffer_size / (maxReadLen - overlaplen + 1);
	//printf("buffer for at most %d reads\n",maxReadNum);
	
	int maxAIOSize = 32768;/*
	aioBuffer1 = (char *) ckalloc ((maxAIOSize) * sizeof (char));
	aioBuffer2 = (char *) ckalloc ((maxAIOSize) * sizeof (char));
	readBuffer1 = (char *) ckalloc ((maxAIOSize + 1024) * sizeof (char));	//(char *)ckalloc(maxAIOSize*sizeof(char));
	readBuffer2 = (char *) ckalloc ((maxAIOSize + 1024) * sizeof (char));
	cach1 = (char *) ckalloc (1024 * sizeof (char));
	cach2 = (char *) ckalloc (1024 * sizeof (char));
	memset(cach1,'\0',1024);
	memset(cach2,'\0',1024);*/
        aioBuffer1 = (char *) ckalloc ((maxAIOSize) * sizeof (char));
        aioBuffer2 = (char *) ckalloc ((maxAIOSize) * sizeof (char));
        readBuffer1 = (char *) ckalloc ((maxAIOSize + (maxReadLen+1024)) * sizeof (char));      //(char *)ckalloc(maxAIOSize*sizeof(char));     //1024
        readBuffer2 = (char *) ckalloc ((maxAIOSize + (maxReadLen+1024)) * sizeof (char));      //1024
        cach1 = (char *) ckalloc ((maxReadLen+1024) * sizeof (char));   //1024
        cach2 = (char *) ckalloc ((maxReadLen+1024) * sizeof (char));   //1024
        memset(cach1,'\0',(maxReadLen+1024));   //1024
        memset(cach2,'\0',(maxReadLen+1024));   //1024


	seqBuffer = (char **) ckalloc (maxReadNum * sizeof (char *));
	lenBuffer = (int *) ckalloc (maxReadNum * sizeof (int));
	indexArray = (int *) ckalloc ((maxReadNum + 1) * sizeof (int));

	for (i = 0; i < maxReadNum; i++)
	{
		seqBuffer[i] = (char *) ckalloc (maxReadLen * sizeof (char));
	}

	memoAlloc4preArc ();
	flags = (char **) ckalloc ((thrd_num + 1) * sizeof (char *));
	deletion = (int *) ckalloc ((thrd_num + 1) * sizeof (int));
	rcSeq = (char **) ckalloc ((thrd_num + 1) * sizeof (char *));

	if (repsTie)
	{
		markerOnEdge = (unsigned char *) ckalloc ((num_ed + 1) * sizeof (unsigned char));

		for (i = 1; i <= num_ed; i++)
		{
			markerOnEdge[i] = 0;
		}

		fwriteBuf = (unsigned int *) ckalloc ((maxReadLen - overlaplen + 1) * sizeof (unsigned int));
	}

	thrdSignal[0] = 0;

	if (1)
	{
		preArc_mem_managers = (MEM_MANAGER **) ckalloc (thrd_num * sizeof (MEM_MANAGER *));
		arcCounters = (unsigned int *) ckalloc (thrd_num * sizeof (unsigned int));

		for (i = 0; i < thrd_num; i++)
		{
			arcCounters[i] = 0;
			preArc_mem_managers[i] = createMem_manager (preARCBLOCKSIZE, sizeof (preARC));
			deletion[i + 1] = 0;
			flags[i + 1] = (char *) ckalloc (2 * maxReadLen * sizeof (char));
			rcSeq[i + 1] = (char *) ckalloc (maxReadLen * sizeof (char));
			thrdSignal[i + 1] = 0;
			paras[i].threadID = i;
			paras[i].mainSignal = &thrdSignal[0];
			paras[i].selfSignal = &thrdSignal[i + 1];
		}

		creatThrds (threads, paras);
	}

	if (1)
	{
		deletion[0] = 0;
		flags[0] = (char *) ckalloc (2 * maxReadLen * sizeof (char));
		rcSeq[0] = (char *) ckalloc (maxReadLen * sizeof (char));
	}

	kmer_c = n_solexa = read_c = i = libNo = readNumBack = gradsCounter = 0;
	int t0, t1, t2, t3, t4, t5, t6;

	t0 = t1 = t2 = t3 = t4 = t5 = t6 = 0;
	time_t read_start, read_end, time_bef, time_aft;

	time (&read_start);
	
		while (openNextFile (&libNo, pairs, asm_ctg))
	{
		if (lib_array[libNo].curr_type == 4)
		{
			int type = 0;	//deside the PE reads is good or bad

			while ((flag = read1seqInLibBam (seqBuffer[read_c], next_name, &(lenBuffer[read_c]), &libNo, pairs, 1, &type)) != 0)
			{
				if (type == -1)	//if the reads is bad, go back.
				{
					i--;
					if (lenBuffer[read_c - 1] >= overlaplen + 1)
					{
						kmer_c -= lenBuffer[read_c - 1] - overlaplen + 1;
						read_c--;
					}
					n_solexa -= 2;
					continue;
				}
				if ((++i) % 1000000 == 0)
				{
					printf ("--- %lldth reads\n", i);
				}

				if (lenBuffer[read_c] < overlaplen + 1)
				{
					continue;
				}

				//if(lenBuffer[read_c]>70)
				//    lenBuffer[read_c] = 70;
				//else if(lenBuffer[read_c]>40)
				//    lenBuffer[read_c] = 40;

				indexArray[read_c] = kmer_c;
				kmer_c += lenBuffer[read_c] - overlaplen + 1;
				read_c++;

				if (read_c == maxReadNum)
				{
					indexArray[read_c] = kmer_c;
					time (&read_end);
					t0 += read_end - read_start;
					time (&time_bef);
					sendWorkSignal (2, thrdSignal);
					time (&time_aft);
					t1 += time_aft - time_bef;
					time (&time_bef);
					sendWorkSignal (1, thrdSignal);
					time (&time_aft);
					t2 += time_aft - time_bef;
					time (&time_bef);
					sendWorkSignal (3, thrdSignal);
					time (&time_aft);
					t3 += time_aft - time_bef;
					time (&time_bef);
					sendWorkSignal (4, thrdSignal);
					time (&time_aft);
					t4 += time_aft - time_bef;
					time (&time_bef);
					sendWorkSignal (6, thrdSignal);
					time (&time_aft);
					t5 += time_aft - time_bef;
					time (&time_bef);

					//recordPreArc();
					if (repsTie)
					{
						recordPathBin (outfp);
					}

					time (&time_aft);
					t6 += time_aft - time_bef;
					//output_path(read_c,edge_no,flags,outfp);
					kmer_c = 0;
					read_c = 0;
					time (&read_start);
				}
			}
		}
		else if (lib_array[libNo].curr_type == 1 || lib_array[libNo].curr_type == 2)
		{
			initAIO (&aio1, aioBuffer1, fileno (lib_array[libNo].fp1), maxAIOSize);
			initAIO (&aio2, aioBuffer2, fileno (lib_array[libNo].fp2), maxAIOSize);
			int offset1, offset2, flag1, flag2, rt1, rt2;

			offset1 = offset2 = 0;
			rt1 = aio_read (&aio1);
			rt2 = aio_read (&aio2);
			flag1 = AIORead (&aio1, &offset1, readBuffer1, cach1, &rt1, lib_array[libNo].curr_type);
			flag2 = AIORead (&aio2, &offset2, readBuffer2, cach2, &rt2, lib_array[libNo].curr_type);
			if(flag1 && flag2)
			{
				int start1, start2, turn;

				start1 = start2 = 0;
				turn = 1;
				while (start1 < offset1 || start2 < offset2)
				{
					if (turn == 1)
					{
						turn = 2;
						readseqInLib (seqBuffer[read_c], next_name, &(lenBuffer[read_c]), readBuffer1, &start1, offset1, libNo);
						if ((++i) % 1000000 == 0)
							printf ("--- %lldth reads\n", i);
/*						if (lenBuffer[read_c] < overlaplen + 1)
							continue;*/
						if (lenBuffer[read_c] < overlaplen + 1)
						{
							if(start1>=offset1)
							{
								start1=0;
								flag1=AIORead (&aio1, &offset1, readBuffer1, cach1, &rt1, lib_array[libNo].curr_type);
							}
							continue;
						}

						indexArray[read_c] = kmer_c;
						kmer_c += lenBuffer[read_c] - overlaplen + 1;
						read_c++;
						if(start1>=offset1){
							start1=0;
							flag1=AIORead (&aio1, &offset1, readBuffer1, cach1, &rt1, lib_array[libNo].curr_type);
						}
						if (read_c == maxReadNum) {
							indexArray[read_c] = kmer_c;

							time (&read_end);
							t0 += read_end - read_start;
							time (&time_bef);
							sendWorkSignal (2, thrdSignal);
							time (&time_aft);
							t1 += time_aft - time_bef;
							time (&time_bef);
							sendWorkSignal (1, thrdSignal);
							time (&time_aft);
							t2 += time_aft - time_bef;
							time (&time_bef);
							sendWorkSignal (3, thrdSignal);
							time (&time_aft);
							t3 += time_aft - time_bef;
							time (&time_bef);
							sendWorkSignal (4, thrdSignal);
							time (&time_aft);
							t4 += time_aft - time_bef;
							time (&time_bef);
							sendWorkSignal (6, thrdSignal);
							time (&time_aft);
							t5 += time_aft - time_bef;
							time (&time_bef);

							//recordPreArc();
							if (repsTie)
								recordPathBin (outfp);
							time (&time_aft);
							t6 += time_aft - time_bef;
							//output_path(read_c,edge_no,flags,outfp);
							kmer_c = 0;
							read_c = 0;
							time (&read_start);
						}
						continue;
					}
					if (turn == 2)
					{
						turn = 1;
						readseqInLib (seqBuffer[read_c], next_name, &(lenBuffer[read_c]), readBuffer2, &start2, offset2, libNo);
						if ((++i) % 1000000 == 0)
							printf ("--- %lldth reads\n", i);
/*						if (lenBuffer[read_c] < overlaplen + 1)
							continue;*/
						if (lenBuffer[read_c] < overlaplen + 1)
						{
							if((flag2 == 2) && (start2 >= offset2))
								break;

							if(start2 >= offset2)
							{
								start2=0;
								flag2 = AIORead (&aio2, &offset2, readBuffer2, cach2, &rt2, lib_array[libNo].curr_type);
							}
							continue;
						}

						indexArray[read_c] = kmer_c;
						kmer_c += lenBuffer[read_c] - overlaplen + 1;
						read_c++;
						if((flag2 == 2) && (start2 >= offset2))
							break;
						if(start2 >= offset2){
			                        	start2=0;
							flag2 = AIORead (&aio2, &offset2, readBuffer2, cach2, &rt2, lib_array[libNo].curr_type);
						}
						if (read_c == maxReadNum){
							indexArray[read_c] = kmer_c;

							time (&read_end);
							t0 += read_end - read_start;
							time (&time_bef);
							sendWorkSignal (2, thrdSignal);
							time (&time_aft);
							t1 += time_aft - time_bef;
							time (&time_bef);
							sendWorkSignal (1, thrdSignal);
							time (&time_aft);
							t2 += time_aft - time_bef;
							time (&time_bef);
							sendWorkSignal (3, thrdSignal);
							time (&time_aft);
							t3 += time_aft - time_bef;
							time (&time_bef);
							sendWorkSignal (4, thrdSignal);
							time (&time_aft);
							t4 += time_aft - time_bef;
							time (&time_bef);
							sendWorkSignal (6, thrdSignal);
							time (&time_aft);
							t5 += time_aft - time_bef;
							time (&time_bef);

							//recordPreArc();
							if (repsTie)
								recordPathBin (outfp);
							time (&time_aft);
							t6 += time_aft - time_bef;
							//output_path(read_c,edge_no,flags,outfp);
							kmer_c = 0;
							read_c = 0;
							time (&read_start);
						}
						continue;
					}
				}
			}
		}
		else
		{
			initAIO (&aio1, aioBuffer1, fileno (lib_array[libNo].fp1), maxAIOSize);
			int offset, flag1, rt;

			offset = 0;
			rt = aio_read (&aio1);
			while ((flag1 = AIORead (&aio1, &offset, readBuffer1, cach1, &rt, lib_array[libNo].curr_type)))
			{
				int start = 0;

				while (start < offset)
				{
					readseqInLib (seqBuffer[read_c], next_name, &(lenBuffer[read_c]), readBuffer1, &start, offset, libNo);
					if ((++i) % 1000000 == 0)
						printf ("--- %lld reads\n", i);
					if (lenBuffer[read_c] < overlaplen + 1)
						continue;
					indexArray[read_c] = kmer_c;
					kmer_c += lenBuffer[read_c] - overlaplen + 1;
					read_c++;

					if (read_c > maxReadNum - 1024)
					{
						indexArray[read_c] = kmer_c;

						time (&read_end);
						t0 += read_end - read_start;
						time (&time_bef);
						sendWorkSignal (2, thrdSignal);
						time (&time_aft);
						t1 += time_aft - time_bef;
						time (&time_bef);
						sendWorkSignal (1, thrdSignal);
						time (&time_aft);
						t2 += time_aft - time_bef;
						time (&time_bef);
						sendWorkSignal (3, thrdSignal);
						time (&time_aft);
						t3 += time_aft - time_bef;
						time (&time_bef);
						sendWorkSignal (4, thrdSignal);
						time (&time_aft);
						t4 += time_aft - time_bef;
						time (&time_bef);
						sendWorkSignal (6, thrdSignal);
						time (&time_aft);
						t5 += time_aft - time_bef;
						time (&time_bef);

						//recordPreArc();
						if (repsTie)
							recordPathBin (outfp);
						time (&time_aft);
						t6 += time_aft - time_bef;
						//output_path(read_c,edge_no,flags,outfp);
						kmer_c = 0;
						read_c = 0;
						time (&read_start);
					}
				}
				if (flag1 == 2)
					break;

			}
		}
	}

	printf ("%lld reads processed\n", i);
	printf ("time %d,%d,%d,%d,%d,%d,%d\n", t0, t1, t2, t3, t4, t5, t6);

	if (read_c)
	{
		indexArray[read_c] = kmer_c;
		sendWorkSignal (2, thrdSignal);
		sendWorkSignal (1, thrdSignal);
		sendWorkSignal (3, thrdSignal);
		sendWorkSignal (4, thrdSignal);
		sendWorkSignal (6, thrdSignal);

		//recordPreArc();
		if (repsTie)
		{
			recordPathBin (outfp);
		}
	}

	printf ("%lld markers outputed\n", markCounter);
	sendWorkSignal (5, thrdSignal);
	thread_wait (threads);
	output_arcs (outfile);
	memoFree4preArc ();

	if (1)			// multi-threads
	{
		arcCounter = 0;

		for (i = 0; i < thrd_num; i++)
		{
			arcCounter += arcCounters[i];
			free ((void *) flags[i + 1]);
			deletion[0] += deletion[i + 1];
			free ((void *) rcSeq[i + 1]);
		}
	}

	if (1)
	{
		free ((void *) flags[0]);
		free ((void *) rcSeq[0]);
	}

	printf ("done mapping reads, %d reads deleted, %lld arcs created\n", deletion[0], arcCounter);

	if (repsTie)
	{
		free ((void *) markerOnEdge);
		free ((void *) fwriteBuf);
	}

	free ((void *) arcCounters);
	free ((void *) rcSeq);

	for (i = 0; i < maxReadNum; i++)
	{
		free ((void *) seqBuffer[i]);
	}

	free ((void *) seqBuffer);
	free ((void *) lenBuffer);
	free ((void *) indexArray);
	free ((void *) flags);
	free ((void *) deletion);
	free ((void *) kmerBuffer);
	free ((void *) mixBuffer);
	free ((void *) smallerBuffer);
	free ((void *) flagArray);
	free ((void *) hashBanBuffer);
	free ((void *) nodeBuffer);
	free ((void *) src_name);
	free ((void *) next_name);
	free ((void *) aioBuffer1);
    free ((void *) aioBuffer2);
    free ((void *) readBuffer1);
    free ((void *) readBuffer2);
    free ((void *) cach1);
    free ((void *) cach2);

	if (repsTie)
	{
		fclose (outfp);
	}

	free_pe_mem ();
	free_libs ();
}
Esempio n. 3
0
boolean prlContig2nodes (char *grapfile, int len_cut)
{
	long long i, num_seq;
	char name[256], *next_name;
	FILE *fp;
	pthread_t threads[thrd_num];
	time_t start_t, stop_t;
	unsigned char thrdSignal[thrd_num + 1];
	PARAMETER paras[thrd_num];
	int maxCtgLen, minCtgLen, nameLen;
	unsigned int lenSum, contigId;

	WORDFILTER = createFilter (overlaplen);
	time (&start_t);
	sprintf (name, "%s.contig", grapfile);
	fp = ckopen (name, "r");
	maxCtgLen = nameLen = 10;
	minCtgLen = 1000;
	num_seq = readseqpar (&maxCtgLen, &minCtgLen, &nameLen, fp);
	printf ("\nthere're %lld contigs in file: %s, max seq len %d, min seq len %d, max name len %d\n", num_seq, grapfile, maxCtgLen, minCtgLen, nameLen);
	maxReadLen = maxCtgLen;
	fclose (fp);
	time (&stop_t);
	printf ("time spent on parse contigs file %ds\n", (int) (stop_t - start_t));
	next_name = (char *) ckalloc ((maxNameLen + 1) * sizeof (char));
	// extract all the EDONs
	seq_buffer_size = buffer_size * 2;
	max_read_c = seq_buffer_size / 20;
	kmerBuffer = (Kmer *) ckalloc (buffer_size * sizeof (Kmer));
	hashBanBuffer = (ubyte8 *) ckalloc (buffer_size * sizeof (ubyte8));
	smallerBuffer = (boolean *) ckalloc (buffer_size * sizeof (boolean));
	seqBuffer = (char *) ckalloc (seq_buffer_size * sizeof (char));
	lenBuffer = (int *) ckalloc (max_read_c * sizeof (int));
	indexArray = (unsigned int *) ckalloc ((max_read_c + 1) * sizeof (unsigned int));
	seqBreakers = (unsigned int *) ckalloc ((max_read_c + 1) * sizeof (unsigned int));
	ctgIdArray = (int *) ckalloc (max_read_c * sizeof (int));
	fp = ckopen (name, "r");
	//node_mem_manager = createMem_manager(EDONBLOCKSIZE,sizeof(EDON));
	rcSeq = (char **) ckalloc ((thrd_num + 1) * sizeof (char *));

	if (1)
	{
		kmerCounter = (long long *) ckalloc ((thrd_num + 1) * sizeof (long long));
		KmerSets = (KmerSet **) ckalloc (thrd_num * sizeof (KmerSet *));

		for (i = 0; i < thrd_num; i++)
		{
			KmerSets[i] = init_kmerset (1024, 0.77f);
			thrdSignal[i + 1] = 0;
			paras[i].threadID = i;
			paras[i].mainSignal = &thrdSignal[0];
			paras[i].selfSignal = &thrdSignal[i + 1];
			kmerCounter[i + 1] = 0;
			rcSeq[i + 1] = (char *) ckalloc (maxCtgLen * sizeof (char));
		}

		creatThrds (threads, paras);
	}

	kmer_c = thrdSignal[0] = kmerCounter[0] = 0;
	time (&start_t);
	read_c = lenSum = i = seqBreakers[0] = indexArray[0] = 0;
	readseq1by1 (seqBuffer + seqBreakers[read_c], next_name, &(lenBuffer[read_c]), fp, -1);

	while (!feof (fp))
	{
		contigId = getID (next_name);
		readseq1by1 (seqBuffer + seqBreakers[read_c], next_name, &(lenBuffer[read_c]), fp, 1);

		if ((++i) % 10000000 == 0)
		{
			printf ("--- %lldth contigs\n", i);
		}

		if (lenBuffer[read_c] < overlaplen + 1 || lenBuffer[read_c] < len_cut)
		{
			contigId = getID (next_name);
			continue;
		}

		//printf("len of seq %d is %d, ID %d\n",read_c,lenBuffer[read_c],contigId);
		ctgIdArray[read_c] = contigId > 0 ? contigId : i;
		lenSum += lenBuffer[read_c];
		kmer_c += lenBuffer[read_c] - overlaplen + 1;
		read_c++;
		seqBreakers[read_c] = lenSum;
		indexArray[read_c] = kmer_c;

		//printf("seq %d start at %d\n",read_c,seqBreakers[read_c]);
		if (read_c == max_read_c || (lenSum + maxCtgLen) > seq_buffer_size || (kmer_c + maxCtgLen - overlaplen + 1) > buffer_size)
		{
			kmerCounter[0] += kmer_c;
			sendWorkSignal (2, thrdSignal);
			sendWorkSignal (1, thrdSignal);
			kmer_c = read_c = lenSum = 0;
		}
	}

	if (read_c)
	{
		kmerCounter[0] += kmer_c;
		sendWorkSignal (2, thrdSignal);
		sendWorkSignal (1, thrdSignal);
	}

	sendWorkSignal (3, thrdSignal);
	thread_wait (threads);
	time (&stop_t);
	printf ("time spent on hash reads: %ds\n", (int) (stop_t - start_t));

	if (1)
	{
		unsigned long long alloCounter = 0;
		unsigned long long allKmerCounter = 0;

		for (i = 0; i < thrd_num; i++)
		{
			alloCounter += count_kmerset ((KmerSets[i]));
			allKmerCounter += kmerCounter[i + 1];
			free ((void *) rcSeq[i + 1]);
		}

		printf ("%lli nodes allocated, %lli kmer in reads, %lli kmer processed\n", alloCounter, kmerCounter[0], allKmerCounter);
	}

	free ((void *) rcSeq);
	free ((void *) kmerCounter);
	free ((void *) seqBuffer);
	free ((void *) lenBuffer);
	free ((void *) indexArray);
	free ((void *) seqBreakers);
	free ((void *) ctgIdArray);
	free ((void *) kmerBuffer);
	free ((void *) hashBanBuffer);
	free ((void *) smallerBuffer);
	free ((void *) next_name);
	fclose (fp);
	return 1;
}
/*************************************************
Function:
    build_preArc_threaded
Description:
    This is the main entry for building preArcs.
Input:
    1. arc_arr:     preArc array
    2. v_ht:        vertex hash
    3. K_size:      kmer size
    4. cut_off_len:     cut off length
    5. in_filenames_vt:     input reads file names
    6. thread_num:      thread number
Output:
    None.
Return:
    None.
*************************************************/
void build_preArc_threaded ( preArc_array * arc_arr, vertex_hash2 * v_ht, int K_size, int cut_off_len, vector<string> *in_filenames_vt, int thread_num )
{
	//create main io thread
	int read_buf_sz = 102400 * thrd_num_s;
	read_buf0 = new string[read_buf_sz];
	read_buf1 = new string[read_buf_sz];
	io_stat0 = 1; //must be one, if io_stat0 =0 ,the io thread will work immediately
	io_stat1 = 1;
	io_ready = 0;
	io_para_main io_para_mains;
	io_para_mains.read_buf_sz = read_buf_sz;
	io_para_mains.in_filenames_vt = in_filenames_vt;
	pthread_t io_thread;
	int temp;

	//fprintf(stderr,"Creating main io thread ...\n");
	if ( ( temp = pthread_create ( &io_thread, NULL, run_io_thread_main, &io_para_mains ) ) != 0 )
	{
		fprintf ( stderr, "ERROR: failed creating main io thread.\n" );
		exit ( -1 );
	}

	fprintf ( stderr, "1 io thread initialized.\n" );
	//create work threads ..
	//fprintf(stderr,"Creating work threads ...\n");
	pthread_t threads[thrd_num_s];
	unsigned char thrdSignal[thrd_num_s + 1];
	PARAMETER paras[thrd_num_s];
	locks = ( pthread_spinlock_t * ) calloc ( arc_arr->array_sz, sizeof ( pthread_spinlock_t ) );

	//init as unlock stat ..
	for ( size_t i = 0; i < arc_arr->array_sz; ++i )
	{
		locks[i] = 1;
	}

	for ( int k = 0; k < thrd_num_s; k++ )
	{
		thrdSignal[k + 1] = 0;
		paras[k].threadID = k;
		paras[k].mainSignal = &thrdSignal[0];
		paras[k].selfSignal = &thrdSignal[k + 1];
		paras[k].ht = NULL;
		paras[k].preArcs = arc_arr;
		paras[k].v_ht = v_ht;
		paras[k].cut_off_len = cut_off_len;
		paras[k].K_size = K_size;
		paras[k].gap = gap;
	}

	creatThrds ( threads, paras );
	thrdSignal[0] = 0;

	//run it
	while ( 1 )
	{
		sendIOWorkSignal();

		while ( io_ready == 0 ) {usleep ( 1 );}

		if ( io_ready )
		{
			sendWorkSignal ( 12, thrdSignal );
		}

		if ( io_ready == 2 )
		{
			//fprintf(stderr,"All reads have been processed!\n");
			break;
		}
	}

	sendWorkSignal ( 3, thrdSignal );
	thread_wait ( threads );
	delete [] read_buf0;
	delete [] read_buf1;
	free ( ( void * ) locks );
	free_vertex_hash ( v_ht );
}
Esempio n. 5
0
boolean prlRead2HashTable ( char * libfile, char * outfile )
{
	long long i;
	char * next_name, name[256];
	FILE * fo;
	time_t start_t, stop_t;
	int maxReadNum;
	int libNo;
	pthread_t threads[thrd_num];
	unsigned char thrdSignal[thrd_num + 1];
	PARAMETER paras[thrd_num];
	boolean flag, pairs = 0;
	WORDFILTER = createFilter ( overlaplen );
	maxReadLen = 0;
	maxNameLen = 256;
	scan_libInfo ( libfile );
	alloc_pe_mem ( num_libs );

	if ( !maxReadLen )
		{ maxReadLen = 100; }

	maxReadLen4all = maxReadLen;
	printf ( "In %s, %d libs, max seq len %d, max name len %d\n\n",
	         libfile, num_libs, maxReadLen, maxNameLen );
	next_name = ( char * ) ckalloc ( ( maxNameLen + 1 ) * sizeof ( char ) );
	kmerBuffer = ( Kmer * ) ckalloc ( buffer_size * sizeof ( Kmer ) );
	hashBanBuffer = ( ubyte8 * ) ckalloc ( buffer_size * sizeof ( ubyte8 ) );
	prevcBuffer = ( char * ) ckalloc ( buffer_size * sizeof ( char ) );
	nextcBuffer = ( char * ) ckalloc ( buffer_size * sizeof ( char ) );
	maxReadNum = buffer_size / ( maxReadLen - overlaplen + 1 );
	//printf("buffer size %d, max read len %d, max read num %d\n",buffer_size,maxReadLen,maxReadNum);
	seqBuffer = ( char ** ) ckalloc ( maxReadNum * sizeof ( char * ) );
	lenBuffer = ( int * ) ckalloc ( maxReadNum * sizeof ( int ) );
	indexArray = ( int * ) ckalloc ( maxReadNum * sizeof ( int ) );

	for ( i = 0; i < maxReadNum; i++ )
		{ seqBuffer[i] = ( char * ) ckalloc ( maxReadLen * sizeof ( char ) ); }

	rcSeq = ( char ** ) ckalloc ( ( thrd_num + 1 ) * sizeof ( char * ) );

	if ( 1 )
	{
		kmerCounter = ( long long * ) ckalloc ( ( thrd_num + 1 ) * sizeof ( long long ) );
		KmerSets = ( KmerSet ** ) ckalloc ( thrd_num * sizeof ( KmerSet * ) );
		ubyte8  init_size = 1024;
		ubyte8 k = 0;

		if ( initKmerSetSize )
		{
			init_size = ( ubyte8 ) ( ( double ) initKmerSetSize * 1024.0f * 1024.0f * 1024.0f / ( double ) thrd_num / 32 );

			do
			{
				++k;
			}
			while ( k * 0xFFFFFFLLU < init_size );
		}

		for ( i = 0; i < thrd_num; i++ )
		{
			//KmerSets[i] = init_kmerset(1024,0.77f);
			KmerSets[i] = init_kmerset ( k * 0xFFFFFFLLU, 0.77f );
			thrdSignal[i + 1] = 0;
			paras[i].threadID = i;
			paras[i].mainSignal = &thrdSignal[0];
			paras[i].selfSignal = &thrdSignal[i + 1];
			kmerCounter[i + 1] = 0;
			rcSeq[i + 1] = ( char * ) ckalloc ( maxReadLen * sizeof ( char ) );
		}

		creatThrds ( threads, paras );
	}

	thrdSignal[0] = kmerCounter[0] = 0;
	time ( &start_t );
	kmer_c = n_solexa = read_c = i = libNo = readNumBack = gradsCounter = 0;

	while ( ( flag = read1seqInLib ( seqBuffer[read_c], next_name, & ( lenBuffer[read_c] ), &libNo, pairs, 1 ) ) != 0 )
	{
		if ( ( ++i ) % 100000000 == 0 )
			{ printf ( "--- %lldth reads\n", i ); }

		if ( lenBuffer[read_c] < 0 )
			{ printf ( "read len %d\n", lenBuffer[read_c] ); }

		if ( lenBuffer[read_c] < overlaplen + 1 )
			{ continue; }

		/*
		if(lenBuffer[read_c]>70)
		    lenBuffer[read_c] = 50;
		else if(lenBuffer[read_c]>40)
		    lenBuffer[read_c] = 40;
		*/
		indexArray[read_c] = kmer_c;
		kmer_c += lenBuffer[read_c] - overlaplen + 1;
		read_c++;

		if ( read_c == maxReadNum )
		{
			kmerCounter[0] += kmer_c;
			sendWorkSignal ( 2, thrdSignal );
			sendWorkSignal ( 1, thrdSignal );
			kmer_c = read_c = 0;
		}
	}

	if ( read_c )
	{
		kmerCounter[0] += kmer_c;
		sendWorkSignal ( 2, thrdSignal );
		sendWorkSignal ( 1, thrdSignal );
	}

	time ( &stop_t );
	printf ( "time spent on hash reads: %ds, %lld reads processed\n", ( int ) ( stop_t - start_t ), i );

	//record insert size info
	if ( pairs )
	{
		if ( gradsCounter )
			printf ( "%d pe insert size, the largest boundary is %lld\n\n",
			         gradsCounter, pes[gradsCounter - 1].PE_bound );
		else
			{ printf ( "no paired reads found\n" ); }

		sprintf ( name, "%s.peGrads", outfile );
		fo = ckopen ( name, "w" );
		fprintf ( fo, "grads&num: %d\t%lld\n", gradsCounter, n_solexa );

		for ( i = 0; i < gradsCounter; i++ )
			{ fprintf ( fo, "%d\t%lld\t%d\n", pes[i].insertS, pes[i].PE_bound, pes[i].rank ); }

		fclose ( fo );
	}

	free_pe_mem();
	free_libs();

	if ( 1 )
	{
		unsigned long long alloCounter = 0;
		unsigned long long allKmerCounter = 0;

		for ( i = 0; i < thrd_num; i++ )
		{
			alloCounter += count_kmerset ( ( KmerSets[i] ) );
			allKmerCounter += kmerCounter[i + 1];
			free ( ( void * ) rcSeq[i + 1] );
		}

		printf ( "%lli nodes allocated, %lli kmer in reads, %lli kmer processed\n"
		         , alloCounter, kmerCounter[0], allKmerCounter );
	}

	free ( ( void * ) rcSeq );
	free ( ( void * ) kmerCounter );

	for ( i = 0; i < maxReadNum; i++ )
		{ free ( ( void * ) seqBuffer[i] ); }

	free ( ( void * ) seqBuffer );
	free ( ( void * ) lenBuffer );
	free ( ( void * ) indexArray );
	free ( ( void * ) kmerBuffer );
	free ( ( void * ) hashBanBuffer );
	free ( ( void * ) nextcBuffer );
	free ( ( void * ) prevcBuffer );
	free ( ( void * ) next_name );

	//printf("done hashing nodes\n");
	if ( deLowKmer )
	{
		time ( &start_t );
		deLowCov ( thrdSignal );
		time ( &stop_t );
		printf ( "time spent on delowcvgNode %ds\n", ( int ) ( stop_t - start_t ) );
	}

	time ( &start_t );
	Mark1in1outNode ( thrdSignal );
	freqStat ( outfile );
	time ( &stop_t );
	printf ( "time spent on marking linear nodes %ds\n", ( int ) ( stop_t - start_t ) );
	fflush ( stdout );
	sendWorkSignal ( 3, thrdSignal );
	thread_wait ( threads );
	/*
	    Kmer word = 0x21c3ca82c734c8d0;
	    Kmer hash_ban = hash_kmer(word);
	    int setPicker = hash_ban%thrd_num;
	    kmer_t *node;
	    boolean found = search_kmerset(KmerSets[setPicker], word, &node);
	    if(!found)
	        printf("kmer %llx not found,\n",word);
	    else{
	        printf("kmer %llx, linear %d\n",word,node->linear);
	        for(i=0;i<4;i++){
	            if(get_kmer_right_cov(*node,i)>0)
	                printf("right %d, kmer %llx\n",i,nextKmer(node->seq,i));
	            if(get_kmer_left_cov(*node,i)>0)
	                printf("left %d, kmer %llx\n",i,prevKmer(node->seq,i));
	        }

	    }
	*/
	return 1;
}