コード例 #1
0
ファイル: prlHashReads.c プロジェクト: Buttonwood/SOAPdenovo2
/*************************************************
Function:
    prlRead2HashTable
Description:
    1. Imports the reads from the lib file one by one.
    2. Chops the reads into kmers and store them in KmerSets.
    3. Removes the kmers with low coverage.
    4. Marks the linear kmers.
    5. Counts the kmer frequences.
Input:
    1. libfile :            the reads config file
    2. outfile :        the output file prefix
Output:
    None.
Return:
    1 if exits normally.
*************************************************/
boolean prlRead2HashTable ( char * libfile, char * outfile )
{
	char * cach1;
	char * cach2;
	unsigned char asm_ctg = 1;
	long long i;
	char * next_name, name[256];
	FILE * fo;
	time_t start_t, stop_t;
	int maxReadNum;
	int libNo;
	pthread_t threads[thrd_num];
	unsigned char thrdSignal[thrd_num + 1];
	PARAMETER paras[thrd_num];
	boolean flag, pairs = 0;
	WORDFILTER = createFilter ( overlaplen );
	maxReadLen = 0;
	maxNameLen = 256;
	scan_libInfo ( libfile );
	alloc_pe_mem ( num_libs );

	if ( !maxReadLen )
	{
		maxReadLen = 100;
	}

	if ( gLineLen < maxReadLen )
	{
		gStr = ( char * ) ckalloc ( ( maxReadLen + 1 ) * sizeof ( char ) );
	}

	//init
	maxReadLen4all = maxReadLen;
	fprintf ( stderr, "In %s, %d lib(s), maximum read length %d, maximum name length %d.\n\n", libfile, num_libs, maxReadLen, maxNameLen );
	next_name = ( char * ) ckalloc ( ( maxNameLen + 1 ) * sizeof ( char ) );
	kmerBuffer = ( Kmer * ) ckalloc ( buffer_size * sizeof ( Kmer ) );
	hashBanBuffer = ( ubyte8 * ) ckalloc ( buffer_size * sizeof ( ubyte8 ) );
	prevcBuffer = ( char * ) ckalloc ( buffer_size * sizeof ( char ) );
	nextcBuffer = ( char * ) ckalloc ( buffer_size * sizeof ( char ) );
	maxReadNum = buffer_size / ( maxReadLen - overlaplen + 1 );
	//printf("buffer size %d, max read len %d, max read num %d\n",buffer_size,maxReadLen,maxReadNum);
	int maxAIOSize = 32768;
	aioBuffer1 = ( char * ) ckalloc ( ( maxAIOSize ) * sizeof ( char ) );
	aioBuffer2 = ( char * ) ckalloc ( ( maxAIOSize ) * sizeof ( char ) );
	readBuffer1 = ( char * ) ckalloc ( ( maxAIOSize + ( maxReadLen * 4 + 1024 ) ) * sizeof ( char ) ); //(char *)ckalloc(maxAIOSize*sizeof(char)); //1024
	readBuffer2 = ( char * ) ckalloc ( ( maxAIOSize + ( maxReadLen * 4 + 1024 ) ) * sizeof ( char ) ); //1024
	cach1 = ( char * ) ckalloc ( ( maxReadLen * 4 + 1024 ) * sizeof ( char ) ); //1024
	cach2 = ( char * ) ckalloc ( ( maxReadLen * 4 + 1024 ) * sizeof ( char ) ); //1024
	memset ( cach1, '\0', ( maxReadLen * 4 + 1024 ) ); //1024
	memset ( cach2, '\0', ( maxReadLen * 4 + 1024 ) ); //1024
	seqBuffer = ( char ** ) ckalloc ( maxReadNum * sizeof ( char * ) );
	lenBuffer = ( int * ) ckalloc ( maxReadNum * sizeof ( int ) );
	indexArray = ( int * ) ckalloc ( maxReadNum * sizeof ( int ) );

	for ( i = 0; i < maxReadNum; i++ )
	{
		seqBuffer[i] = ( char * ) ckalloc ( maxReadLen * sizeof ( char ) );
	}

	rcSeq = ( char ** ) ckalloc ( ( thrd_num + 1 ) * sizeof ( char * ) );

	if ( 1 )
	{
		kmerCounter = ( long long * ) ckalloc ( ( thrd_num + 1 ) * sizeof ( long long ) );
		KmerSets = ( KmerSet ** ) ckalloc ( thrd_num * sizeof ( KmerSet * ) );
		ubyte8 init_size = 1024;
		ubyte8 k = 0;

		if ( initKmerSetSize )
		{
#ifdef MER127
			init_size = ( ubyte8 ) ( ( double ) initKmerSetSize * 1024.0f * 1024.0f * 1024.0f / ( double ) thrd_num / 40 );
#else
			init_size = ( ubyte8 ) ( ( double ) initKmerSetSize * 1024.0f * 1024.0f * 1024.0f / ( double ) thrd_num / 24 ); //is it true?
#endif

			do
			{
				++k;
			}
			while ( k * 0xFFFFFFLLU < init_size );
		}

		for ( i = 0; i < thrd_num; i++ )
		{
			//KmerSets[i] = init_kmerset(1024,0.77f);
			KmerSets[i] = init_kmerset ( ( ( initKmerSetSize ) ? ( k * 0xFFFFFFLLU ) : ( init_size ) ), 0.77f );
			thrdSignal[i + 1] = 0;
			paras[i].threadID = i;
			paras[i].mainSignal = &thrdSignal[0];
			paras[i].selfSignal = &thrdSignal[i + 1];
			kmerCounter[i + 1] = 0;
			rcSeq[i + 1] = ( char * ) ckalloc ( maxReadLen * sizeof ( char ) );
		}

		creatThrds ( threads, paras );
	}

	thrdSignal[0] = kmerCounter[0] = 0;
	time ( &start_t );
	kmer_c = n_solexa = read_c = i = libNo = readNumBack = gradsCounter = 0;

	while ( openNextFile ( &libNo, pairs, asm_ctg ) )
	{
		//read bam file
		if ( lib_array[libNo].curr_type == 4 )
		{
			int type = 0;   //deside the PE reads is good or bad

			while ( ( flag = read1seqInLibBam ( seqBuffer[read_c], next_name, & ( lenBuffer[read_c] ), &libNo, pairs, 1, &type ) ) != 0 )
			{
				if ( type == -1 ) //if the reads is bad, go back.
				{
					i--;

					if ( lenBuffer[read_c - 1] >= overlaplen + 1 )
					{
						kmer_c -= lenBuffer[read_c - 1] - overlaplen + 1;
						read_c--;
					}

					n_solexa -= 2;
					continue;
				}

				if ( ( ++i ) % 100000000 == 0 )
					{ fprintf ( stderr, "--- %lldth reads.\n", i ); }

				if ( lenBuffer[read_c] < 0 )
					{ fprintf ( stderr, "Read len %d.\n", lenBuffer[read_c] ); }

				if ( lenBuffer[read_c] < overlaplen + 1 )
					{ continue; }

				/*
				   if(lenBuffer[read_c]>70)
				   lenBuffer[read_c] = 50;
				   else if(lenBuffer[read_c]>40)
				   lenBuffer[read_c] = 40;
				 */
				indexArray[read_c] = kmer_c;
				kmer_c += lenBuffer[read_c] - overlaplen + 1;
				read_c++;

				if ( read_c == maxReadNum )
				{
					kmerCounter[0] += kmer_c;
					sendWorkSignal ( 2, thrdSignal ); //chopKmer4read
					sendWorkSignal ( 1, thrdSignal ); //singleKmer
					kmer_c = read_c = 0;
				}
			}
		}
		//read PE fasta or fastq
		else if ( lib_array[libNo].curr_type == 1 || lib_array[libNo].curr_type == 2 )
		{
			initAIO ( &aio1, aioBuffer1, fileno ( lib_array[libNo].fp1 ), maxAIOSize );
			initAIO ( &aio2, aioBuffer2, fileno ( lib_array[libNo].fp2 ), maxAIOSize );
			int offset1, offset2, flag1, flag2, rt1, rt2;
			offset1 = offset2 = 0;
			rt1 = aio_read ( &aio1 );
			rt2 = aio_read ( &aio2 );
			flag1 = AIORead ( &aio1, &offset1, readBuffer1, cach1, &rt1, lib_array[libNo].curr_type );
			flag2 = AIORead ( &aio2, &offset2, readBuffer2, cach2, &rt2, lib_array[libNo].curr_type );

			if ( flag1 && flag2 )
			{
				int start1, start2, turn;
				start1 = start2 = 0;
				turn = 1;

				while ( start1 < offset1 || start2 < offset2 )
				{
					if ( turn == 1 )
					{
						turn = 2;
						readseqInLib ( seqBuffer[read_c], next_name, & ( lenBuffer[read_c] ), readBuffer1, &start1, offset1, libNo );

						if ( ( ++i ) % 100000000 == 0 )
							{ fprintf ( stderr, "--- %lldth reads.\n", i ); }

						if ( lenBuffer[read_c] < 0 )
							{ fprintf ( stderr, "Read len %d.\n", lenBuffer[read_c] ); }

						if ( lenBuffer[read_c] < overlaplen + 1 )
						{
							if ( start1 >= offset1 )
							{
								start1 = 0;
								offset1 = 0;
								flag1 = AIORead ( &aio1, &offset1, readBuffer1, cach1, &rt1, lib_array[libNo].curr_type );
							}

							continue;
						}

						indexArray[read_c] = kmer_c;
						kmer_c += lenBuffer[read_c] - overlaplen + 1;
						read_c++;

						if ( start1 >= offset1 )
						{
							start1 = 0;
							offset1 = 0;
							flag1 = AIORead ( &aio1, &offset1, readBuffer1, cach1, &rt1, lib_array[libNo].curr_type );
						}

						if ( read_c == maxReadNum )
						{
							kmerCounter[0] += kmer_c;
							sendWorkSignal ( 2, thrdSignal );   //chopKmer4read
							sendWorkSignal ( 1, thrdSignal );   //singleKmer
							kmer_c = read_c = 0;
						}

						continue;
					}

					if ( turn == 2 )
					{
						turn = 1;
						readseqInLib ( seqBuffer[read_c], next_name, & ( lenBuffer[read_c] ), readBuffer2, &start2, offset2, libNo );

						if ( ( ++i ) % 100000000 == 0 )
							{ fprintf ( stderr, "--- %lldth reads.\n", i ); }

						if ( lenBuffer[read_c] < 0 )
							{ fprintf ( stderr, "Read len %d.\n", lenBuffer[read_c] ); }

						if ( lenBuffer[read_c] < overlaplen + 1 )
						{
							if ( ( flag2 == 2 ) && ( start2 >= offset2 ) )
								{ break; }

							if ( start2 >= offset2 )
							{
								start2 = 0;
								offset2 = 0;
								flag2 = AIORead ( &aio2, &offset2, readBuffer2, cach2, &rt2, lib_array[libNo].curr_type );
							}

							continue;
						}

						indexArray[read_c] = kmer_c;
						kmer_c += lenBuffer[read_c] - overlaplen + 1;
						read_c++;

						if ( ( flag2 == 2 ) && ( start2 >= offset2 ) )
							{ break; }

						if ( start2 >= offset2 )
						{
							start2 = 0;
							offset2 = 0;
							flag2 = AIORead ( &aio2, &offset2, readBuffer2, cach2, &rt2, lib_array[libNo].curr_type );
						}

						if ( read_c == maxReadNum )
						{
							kmerCounter[0] += kmer_c;
							sendWorkSignal ( 2, thrdSignal );   //chopKmer4read
							sendWorkSignal ( 1, thrdSignal );   //singleKmer
							kmer_c = read_c = 0;
						}

						continue;
					}
				}
			}
			else
			{
				fprintf(stderr, "Error: aio_read error.\n");
			}
		}
		//read single fasta, single fastq and PE fasta in one file
		else
		{
			initAIO ( &aio1, aioBuffer1, fileno ( lib_array[libNo].fp1 ), maxAIOSize );
			int offset, flag1, rt;
			offset = 0;
			rt = aio_read ( &aio1 );

			while ( ( flag1 = AIORead ( &aio1, &offset, readBuffer1, cach1, &rt, lib_array[libNo].curr_type ) ) )
			{
				int start = 0;

				while ( start < offset )
				{
					readseqInLib ( seqBuffer[read_c], next_name, & ( lenBuffer[read_c] ), readBuffer1, &start, offset, libNo );

					if ( ( ++i ) % 100000000 == 0 )
						{ fprintf ( stderr, "--- %lldth reads.\n", i ); }

					if ( lenBuffer[read_c] < 0 )
						{ fprintf ( stderr, "Read len %d.\n", lenBuffer[read_c] ); }

					if ( lenBuffer[read_c] < overlaplen + 1 )
						{ continue; }

					indexArray[read_c] = kmer_c;
					kmer_c += lenBuffer[read_c] - overlaplen + 1;
					read_c++;
				}

				if ( read_c > maxReadNum - 1024 )
				{
					kmerCounter[0] += kmer_c;
					sendWorkSignal ( 2, thrdSignal );   //chopKmer4read
					sendWorkSignal ( 1, thrdSignal );   //singleKmer
					kmer_c = read_c = 0;
				}

				if ( flag1 == 2 )
					{ break; }
			}
		}
	}

	if ( read_c )
	{
		kmerCounter[0] += kmer_c;
		sendWorkSignal ( 2, thrdSignal );   //chopKmer4read
		sendWorkSignal ( 1, thrdSignal );   //singleKmer
	}

	time ( &stop_t );
	fprintf ( stderr, "Time spent on hashing reads: %ds, %lld read(s) processed.\n", ( int ) ( stop_t - start_t ), i );

	//record insert size info
	if ( pairs )
	{
		if ( gradsCounter )
			{ fprintf ( stderr, "%d pe insert size, the largest boundary is %lld.\n\n", gradsCounter, pes[gradsCounter - 1].PE_bound ); }
		else
		{
			fprintf ( stderr, "No paired reads found.\n" );
		}

		sprintf ( name, "%s.peGrads", outfile );
		fo = ckopen ( name, "w" );
		fprintf ( fo, "grads&num: %d\t%lld\n", gradsCounter, n_solexa );

		for ( i = 0; i < gradsCounter; i++ )
		{
			fprintf ( fo, "%d\t%lld\t%d\n", pes[i].insertS, pes[i].PE_bound, pes[i].rank );
		}

		fclose ( fo );
	}

	free_pe_mem ();
	free_libs ();

	if ( 1 )
	{
		unsigned long long alloCounter = 0;
		unsigned long long allKmerCounter = 0;

		for ( i = 0; i < thrd_num; i++ )
		{
			alloCounter += count_kmerset ( ( KmerSets[i] ) );
			allKmerCounter += kmerCounter[i + 1];
			free ( ( void * ) rcSeq[i + 1] );
		}

		fprintf ( stderr, "%lli node(s) allocated, %lli kmer(s) in reads, %lli kmer(s) processed.\n", alloCounter, kmerCounter[0], allKmerCounter );
	}

	free ( ( void * ) rcSeq );
	free ( ( void * ) kmerCounter );

	for ( i = 0; i < maxReadNum; i++ )
	{
		free ( ( void * ) seqBuffer[i] );
	}

	free ( ( void * ) seqBuffer );
	free ( ( void * ) lenBuffer );
	free ( ( void * ) indexArray );
	free ( ( void * ) kmerBuffer );
	free ( ( void * ) hashBanBuffer );
	free ( ( void * ) nextcBuffer );
	free ( ( void * ) prevcBuffer );
	free ( ( void * ) next_name );
	free ( ( void * ) aioBuffer1 );
	free ( ( void * ) aioBuffer2 );
	free ( ( void * ) readBuffer1 );
	free ( ( void * ) readBuffer2 );
	free ( ( void * ) cach1 );
	free ( ( void * ) cach2 );
	fprintf ( stderr, "done hashing nodes\n" );

	if ( deLowKmer )
	{
		time ( &start_t );
		deLowCov ( thrdSignal );
		time ( &stop_t );
		fprintf ( stderr, "Time spent on delowcvgNode: %ds.\n", ( int ) ( stop_t - start_t ) );
	}

	time ( &start_t );
	Mark1in1outNode ( thrdSignal );
	freqStat ( outfile );
	time ( &stop_t );
	fprintf ( stderr, "Time spent on marking linear nodes: %ds.\n", ( int ) ( stop_t - start_t ) );
	sendWorkSignal ( 3, thrdSignal );   //exit
	thread_wait ( threads );
	return 1;
}
コード例 #2
0
ファイル: preloader.c プロジェクト: depletionmode/preloader
int main(int ac, char *av[])
{
  /* create .preloader directory */
  char path[500];
  sprintf( path, "%s/.preloader", getenv("HOME") );
  if( mkdir( path, S_IRWXU | S_IRWXG | S_IROTH | S_IXOTH ) < 0 )  /* mkdir failed */
    if( errno !=  EEXIST ) {
      fprintf( stderr, "error: failed to create %s!\n", path );
      exit( 0 );
    }


  CTX ctx;
  memset( &ctx, 0, sizeof( ctx ) );

  snprintf( ctx.filename,
            sizeof( ctx.filename ),
            "%s%s", *av[1] == '/' || *av[1] == '.' ? "" : "./",
            av[1] );

  _init_display();
  ctx.state = STATE_PROCESSING_SYMS;

  ctx.db = database_init();
  ctx.hash = database_add_target( ctx.db, ctx.filename ); /* add target to db */

  /* get symbols from target target */
  int fd = open( av[1], O_RDONLY );
  DYNSYM *ds = get_dynsyms( fd, DYNSYM_UNDEFINED_ONLY );
  close( fd );

  /* add symbols to db */
  DYNSYM *p_ds = ds;
  while( p_ds ) {
    ctx.extra = p_ds->name;
    _draw_display( &ctx );
    database_add_symbol( ctx.db, p_ds->name );
    p_ds = p_ds->nxt;
  }

  ctx.state = STATE_PROCESSING_LIBS;

  /* add libs to db */
  LL *lib_sym_info = ll_calloc();

  LIBS *libs = get_libs(ctx.filename);
  LIBS *p_libs = libs;
  while( p_libs ) {
    ctx.extra = p_libs->path;
    _draw_display( &ctx );
    database_add_lib( ctx.db, p_libs->name, p_libs->path );

    p_libs = p_libs->nxt;
  }

  ctx.state = STATE_RESOLVING_SYMBOLS;
  /* match symbols to libs */
  /* immensely inefficient, should call get_dynsyms() ONCE for each lib!! (TODO) */
  p_ds = ds;
  while( p_ds ) {   /* for each symbol in target */
    int found = 0;

    ctx.extra = p_ds->name;
    _draw_display( &ctx );

    LIBS *p_lib = libs;
    while( p_lib && !found ) {  /* search each lib for match */

      int fd_lib = open( p_lib->path, O_RDONLY );
      DYNSYM *ds_lib = get_dynsyms( fd_lib, DYNSYM_DEFINED_ONLY );    /* get symbols from lib */

      DYNSYM *p_ds_lib = ds_lib;
      while( p_ds_lib ) { /* for each symbol in library */
        if( strcmp( p_ds_lib->name, p_ds->name ) == 0 ) {
          database_link_sym_lib( ctx.db, p_ds->name, p_lib->path );
          found = 1;
          break;
        }
        p_ds_lib = p_ds_lib->nxt;   /* move to next symbol in library */
      }
      free_dynsyms( ds_lib );

      close( fd_lib );

      p_lib = p_lib->nxt;   /* move to next library */
    }

    p_ds = p_ds->nxt; /* move to next symbol */
  }

  ll_free( lib_sym_info );

  free_libs( libs );

  free_dynsyms( ds );

  // TODO iter through and free free_dynsyms( ds_lib );

  _populate_symbol_list( ctx.db, &ctx.symbols );

  ctx.state = STATE_NORMAL;
  ctx.running = 1;

  while( ctx.running ) {
    usleep(1000);

    _draw_display( &ctx );
    _parse_input( &ctx );
  }

  _disable_display();

  database_kill( ctx.db );

  /* free symbol list */
  ll_free( ctx.symbols.func );
  ll_free( ctx.symbols.sig );

  return 0;
}
コード例 #3
0
void prlRead2edge (char *libfile, char *outfile)
{
	char *cach1;
	char *cach2;
	unsigned char asm_ctg = 1;

	long long i;
	char name[256], *src_name, *next_name;
	FILE *outfp = NULL;
	int maxReadNum, libNo;
	boolean flag, pairs = 0;
	pthread_t threads[thrd_num];
	unsigned char thrdSignal[thrd_num + 1];
	PARAMETER paras[thrd_num];

	maxReadLen = 0;
	maxNameLen = 256;
	scan_libInfo (libfile);
	alloc_pe_mem (num_libs);

	if (!maxReadLen)
	{
		maxReadLen = 100;
	}

	maxReadLen4all = maxReadLen;
	printf ("In file: %s, max seq len %d, max name len %d\n\n", libfile, maxReadLen, maxNameLen);

	if (repsTie)
	{
		sprintf (name, "%s.path", outfile);
		outfp = ckopen (name, "wb");
	}

	src_name = (char *) ckalloc ((maxNameLen + 1) * sizeof (char));
	next_name = (char *) ckalloc ((10*maxNameLen + 1) * sizeof (char));
	kmerBuffer = (Kmer *) ckalloc (buffer_size * sizeof (Kmer));
	mixBuffer = (Kmer *) ckalloc (buffer_size * sizeof (Kmer));
	hashBanBuffer = (ubyte8 *) ckalloc (buffer_size * sizeof (ubyte8));
	nodeBuffer = (kmer_t **) ckalloc (buffer_size * sizeof (kmer_t *));
	smallerBuffer = (boolean *) ckalloc (buffer_size * sizeof (boolean));
	flagArray = (boolean *) ckalloc (buffer_size * sizeof (boolean));
	maxReadNum = buffer_size / (maxReadLen - overlaplen + 1);
	//printf("buffer for at most %d reads\n",maxReadNum);
	
	int maxAIOSize = 32768;/*
	aioBuffer1 = (char *) ckalloc ((maxAIOSize) * sizeof (char));
	aioBuffer2 = (char *) ckalloc ((maxAIOSize) * sizeof (char));
	readBuffer1 = (char *) ckalloc ((maxAIOSize + 1024) * sizeof (char));	//(char *)ckalloc(maxAIOSize*sizeof(char));
	readBuffer2 = (char *) ckalloc ((maxAIOSize + 1024) * sizeof (char));
	cach1 = (char *) ckalloc (1024 * sizeof (char));
	cach2 = (char *) ckalloc (1024 * sizeof (char));
	memset(cach1,'\0',1024);
	memset(cach2,'\0',1024);*/
        aioBuffer1 = (char *) ckalloc ((maxAIOSize) * sizeof (char));
        aioBuffer2 = (char *) ckalloc ((maxAIOSize) * sizeof (char));
        readBuffer1 = (char *) ckalloc ((maxAIOSize + (maxReadLen+1024)) * sizeof (char));      //(char *)ckalloc(maxAIOSize*sizeof(char));     //1024
        readBuffer2 = (char *) ckalloc ((maxAIOSize + (maxReadLen+1024)) * sizeof (char));      //1024
        cach1 = (char *) ckalloc ((maxReadLen+1024) * sizeof (char));   //1024
        cach2 = (char *) ckalloc ((maxReadLen+1024) * sizeof (char));   //1024
        memset(cach1,'\0',(maxReadLen+1024));   //1024
        memset(cach2,'\0',(maxReadLen+1024));   //1024


	seqBuffer = (char **) ckalloc (maxReadNum * sizeof (char *));
	lenBuffer = (int *) ckalloc (maxReadNum * sizeof (int));
	indexArray = (int *) ckalloc ((maxReadNum + 1) * sizeof (int));

	for (i = 0; i < maxReadNum; i++)
	{
		seqBuffer[i] = (char *) ckalloc (maxReadLen * sizeof (char));
	}

	memoAlloc4preArc ();
	flags = (char **) ckalloc ((thrd_num + 1) * sizeof (char *));
	deletion = (int *) ckalloc ((thrd_num + 1) * sizeof (int));
	rcSeq = (char **) ckalloc ((thrd_num + 1) * sizeof (char *));

	if (repsTie)
	{
		markerOnEdge = (unsigned char *) ckalloc ((num_ed + 1) * sizeof (unsigned char));

		for (i = 1; i <= num_ed; i++)
		{
			markerOnEdge[i] = 0;
		}

		fwriteBuf = (unsigned int *) ckalloc ((maxReadLen - overlaplen + 1) * sizeof (unsigned int));
	}

	thrdSignal[0] = 0;

	if (1)
	{
		preArc_mem_managers = (MEM_MANAGER **) ckalloc (thrd_num * sizeof (MEM_MANAGER *));
		arcCounters = (unsigned int *) ckalloc (thrd_num * sizeof (unsigned int));

		for (i = 0; i < thrd_num; i++)
		{
			arcCounters[i] = 0;
			preArc_mem_managers[i] = createMem_manager (preARCBLOCKSIZE, sizeof (preARC));
			deletion[i + 1] = 0;
			flags[i + 1] = (char *) ckalloc (2 * maxReadLen * sizeof (char));
			rcSeq[i + 1] = (char *) ckalloc (maxReadLen * sizeof (char));
			thrdSignal[i + 1] = 0;
			paras[i].threadID = i;
			paras[i].mainSignal = &thrdSignal[0];
			paras[i].selfSignal = &thrdSignal[i + 1];
		}

		creatThrds (threads, paras);
	}

	if (1)
	{
		deletion[0] = 0;
		flags[0] = (char *) ckalloc (2 * maxReadLen * sizeof (char));
		rcSeq[0] = (char *) ckalloc (maxReadLen * sizeof (char));
	}

	kmer_c = n_solexa = read_c = i = libNo = readNumBack = gradsCounter = 0;
	int t0, t1, t2, t3, t4, t5, t6;

	t0 = t1 = t2 = t3 = t4 = t5 = t6 = 0;
	time_t read_start, read_end, time_bef, time_aft;

	time (&read_start);
	
		while (openNextFile (&libNo, pairs, asm_ctg))
	{
		if (lib_array[libNo].curr_type == 4)
		{
			int type = 0;	//deside the PE reads is good or bad

			while ((flag = read1seqInLibBam (seqBuffer[read_c], next_name, &(lenBuffer[read_c]), &libNo, pairs, 1, &type)) != 0)
			{
				if (type == -1)	//if the reads is bad, go back.
				{
					i--;
					if (lenBuffer[read_c - 1] >= overlaplen + 1)
					{
						kmer_c -= lenBuffer[read_c - 1] - overlaplen + 1;
						read_c--;
					}
					n_solexa -= 2;
					continue;
				}
				if ((++i) % 1000000 == 0)
				{
					printf ("--- %lldth reads\n", i);
				}

				if (lenBuffer[read_c] < overlaplen + 1)
				{
					continue;
				}

				//if(lenBuffer[read_c]>70)
				//    lenBuffer[read_c] = 70;
				//else if(lenBuffer[read_c]>40)
				//    lenBuffer[read_c] = 40;

				indexArray[read_c] = kmer_c;
				kmer_c += lenBuffer[read_c] - overlaplen + 1;
				read_c++;

				if (read_c == maxReadNum)
				{
					indexArray[read_c] = kmer_c;
					time (&read_end);
					t0 += read_end - read_start;
					time (&time_bef);
					sendWorkSignal (2, thrdSignal);
					time (&time_aft);
					t1 += time_aft - time_bef;
					time (&time_bef);
					sendWorkSignal (1, thrdSignal);
					time (&time_aft);
					t2 += time_aft - time_bef;
					time (&time_bef);
					sendWorkSignal (3, thrdSignal);
					time (&time_aft);
					t3 += time_aft - time_bef;
					time (&time_bef);
					sendWorkSignal (4, thrdSignal);
					time (&time_aft);
					t4 += time_aft - time_bef;
					time (&time_bef);
					sendWorkSignal (6, thrdSignal);
					time (&time_aft);
					t5 += time_aft - time_bef;
					time (&time_bef);

					//recordPreArc();
					if (repsTie)
					{
						recordPathBin (outfp);
					}

					time (&time_aft);
					t6 += time_aft - time_bef;
					//output_path(read_c,edge_no,flags,outfp);
					kmer_c = 0;
					read_c = 0;
					time (&read_start);
				}
			}
		}
		else if (lib_array[libNo].curr_type == 1 || lib_array[libNo].curr_type == 2)
		{
			initAIO (&aio1, aioBuffer1, fileno (lib_array[libNo].fp1), maxAIOSize);
			initAIO (&aio2, aioBuffer2, fileno (lib_array[libNo].fp2), maxAIOSize);
			int offset1, offset2, flag1, flag2, rt1, rt2;

			offset1 = offset2 = 0;
			rt1 = aio_read (&aio1);
			rt2 = aio_read (&aio2);
			flag1 = AIORead (&aio1, &offset1, readBuffer1, cach1, &rt1, lib_array[libNo].curr_type);
			flag2 = AIORead (&aio2, &offset2, readBuffer2, cach2, &rt2, lib_array[libNo].curr_type);
			if(flag1 && flag2)
			{
				int start1, start2, turn;

				start1 = start2 = 0;
				turn = 1;
				while (start1 < offset1 || start2 < offset2)
				{
					if (turn == 1)
					{
						turn = 2;
						readseqInLib (seqBuffer[read_c], next_name, &(lenBuffer[read_c]), readBuffer1, &start1, offset1, libNo);
						if ((++i) % 1000000 == 0)
							printf ("--- %lldth reads\n", i);
/*						if (lenBuffer[read_c] < overlaplen + 1)
							continue;*/
						if (lenBuffer[read_c] < overlaplen + 1)
						{
							if(start1>=offset1)
							{
								start1=0;
								flag1=AIORead (&aio1, &offset1, readBuffer1, cach1, &rt1, lib_array[libNo].curr_type);
							}
							continue;
						}

						indexArray[read_c] = kmer_c;
						kmer_c += lenBuffer[read_c] - overlaplen + 1;
						read_c++;
						if(start1>=offset1){
							start1=0;
							flag1=AIORead (&aio1, &offset1, readBuffer1, cach1, &rt1, lib_array[libNo].curr_type);
						}
						if (read_c == maxReadNum) {
							indexArray[read_c] = kmer_c;

							time (&read_end);
							t0 += read_end - read_start;
							time (&time_bef);
							sendWorkSignal (2, thrdSignal);
							time (&time_aft);
							t1 += time_aft - time_bef;
							time (&time_bef);
							sendWorkSignal (1, thrdSignal);
							time (&time_aft);
							t2 += time_aft - time_bef;
							time (&time_bef);
							sendWorkSignal (3, thrdSignal);
							time (&time_aft);
							t3 += time_aft - time_bef;
							time (&time_bef);
							sendWorkSignal (4, thrdSignal);
							time (&time_aft);
							t4 += time_aft - time_bef;
							time (&time_bef);
							sendWorkSignal (6, thrdSignal);
							time (&time_aft);
							t5 += time_aft - time_bef;
							time (&time_bef);

							//recordPreArc();
							if (repsTie)
								recordPathBin (outfp);
							time (&time_aft);
							t6 += time_aft - time_bef;
							//output_path(read_c,edge_no,flags,outfp);
							kmer_c = 0;
							read_c = 0;
							time (&read_start);
						}
						continue;
					}
					if (turn == 2)
					{
						turn = 1;
						readseqInLib (seqBuffer[read_c], next_name, &(lenBuffer[read_c]), readBuffer2, &start2, offset2, libNo);
						if ((++i) % 1000000 == 0)
							printf ("--- %lldth reads\n", i);
/*						if (lenBuffer[read_c] < overlaplen + 1)
							continue;*/
						if (lenBuffer[read_c] < overlaplen + 1)
						{
							if((flag2 == 2) && (start2 >= offset2))
								break;

							if(start2 >= offset2)
							{
								start2=0;
								flag2 = AIORead (&aio2, &offset2, readBuffer2, cach2, &rt2, lib_array[libNo].curr_type);
							}
							continue;
						}

						indexArray[read_c] = kmer_c;
						kmer_c += lenBuffer[read_c] - overlaplen + 1;
						read_c++;
						if((flag2 == 2) && (start2 >= offset2))
							break;
						if(start2 >= offset2){
			                        	start2=0;
							flag2 = AIORead (&aio2, &offset2, readBuffer2, cach2, &rt2, lib_array[libNo].curr_type);
						}
						if (read_c == maxReadNum){
							indexArray[read_c] = kmer_c;

							time (&read_end);
							t0 += read_end - read_start;
							time (&time_bef);
							sendWorkSignal (2, thrdSignal);
							time (&time_aft);
							t1 += time_aft - time_bef;
							time (&time_bef);
							sendWorkSignal (1, thrdSignal);
							time (&time_aft);
							t2 += time_aft - time_bef;
							time (&time_bef);
							sendWorkSignal (3, thrdSignal);
							time (&time_aft);
							t3 += time_aft - time_bef;
							time (&time_bef);
							sendWorkSignal (4, thrdSignal);
							time (&time_aft);
							t4 += time_aft - time_bef;
							time (&time_bef);
							sendWorkSignal (6, thrdSignal);
							time (&time_aft);
							t5 += time_aft - time_bef;
							time (&time_bef);

							//recordPreArc();
							if (repsTie)
								recordPathBin (outfp);
							time (&time_aft);
							t6 += time_aft - time_bef;
							//output_path(read_c,edge_no,flags,outfp);
							kmer_c = 0;
							read_c = 0;
							time (&read_start);
						}
						continue;
					}
				}
			}
		}
		else
		{
			initAIO (&aio1, aioBuffer1, fileno (lib_array[libNo].fp1), maxAIOSize);
			int offset, flag1, rt;

			offset = 0;
			rt = aio_read (&aio1);
			while ((flag1 = AIORead (&aio1, &offset, readBuffer1, cach1, &rt, lib_array[libNo].curr_type)))
			{
				int start = 0;

				while (start < offset)
				{
					readseqInLib (seqBuffer[read_c], next_name, &(lenBuffer[read_c]), readBuffer1, &start, offset, libNo);
					if ((++i) % 1000000 == 0)
						printf ("--- %lld reads\n", i);
					if (lenBuffer[read_c] < overlaplen + 1)
						continue;
					indexArray[read_c] = kmer_c;
					kmer_c += lenBuffer[read_c] - overlaplen + 1;
					read_c++;

					if (read_c > maxReadNum - 1024)
					{
						indexArray[read_c] = kmer_c;

						time (&read_end);
						t0 += read_end - read_start;
						time (&time_bef);
						sendWorkSignal (2, thrdSignal);
						time (&time_aft);
						t1 += time_aft - time_bef;
						time (&time_bef);
						sendWorkSignal (1, thrdSignal);
						time (&time_aft);
						t2 += time_aft - time_bef;
						time (&time_bef);
						sendWorkSignal (3, thrdSignal);
						time (&time_aft);
						t3 += time_aft - time_bef;
						time (&time_bef);
						sendWorkSignal (4, thrdSignal);
						time (&time_aft);
						t4 += time_aft - time_bef;
						time (&time_bef);
						sendWorkSignal (6, thrdSignal);
						time (&time_aft);
						t5 += time_aft - time_bef;
						time (&time_bef);

						//recordPreArc();
						if (repsTie)
							recordPathBin (outfp);
						time (&time_aft);
						t6 += time_aft - time_bef;
						//output_path(read_c,edge_no,flags,outfp);
						kmer_c = 0;
						read_c = 0;
						time (&read_start);
					}
				}
				if (flag1 == 2)
					break;

			}
		}
	}

	printf ("%lld reads processed\n", i);
	printf ("time %d,%d,%d,%d,%d,%d,%d\n", t0, t1, t2, t3, t4, t5, t6);

	if (read_c)
	{
		indexArray[read_c] = kmer_c;
		sendWorkSignal (2, thrdSignal);
		sendWorkSignal (1, thrdSignal);
		sendWorkSignal (3, thrdSignal);
		sendWorkSignal (4, thrdSignal);
		sendWorkSignal (6, thrdSignal);

		//recordPreArc();
		if (repsTie)
		{
			recordPathBin (outfp);
		}
	}

	printf ("%lld markers outputed\n", markCounter);
	sendWorkSignal (5, thrdSignal);
	thread_wait (threads);
	output_arcs (outfile);
	memoFree4preArc ();

	if (1)			// multi-threads
	{
		arcCounter = 0;

		for (i = 0; i < thrd_num; i++)
		{
			arcCounter += arcCounters[i];
			free ((void *) flags[i + 1]);
			deletion[0] += deletion[i + 1];
			free ((void *) rcSeq[i + 1]);
		}
	}

	if (1)
	{
		free ((void *) flags[0]);
		free ((void *) rcSeq[0]);
	}

	printf ("done mapping reads, %d reads deleted, %lld arcs created\n", deletion[0], arcCounter);

	if (repsTie)
	{
		free ((void *) markerOnEdge);
		free ((void *) fwriteBuf);
	}

	free ((void *) arcCounters);
	free ((void *) rcSeq);

	for (i = 0; i < maxReadNum; i++)
	{
		free ((void *) seqBuffer[i]);
	}

	free ((void *) seqBuffer);
	free ((void *) lenBuffer);
	free ((void *) indexArray);
	free ((void *) flags);
	free ((void *) deletion);
	free ((void *) kmerBuffer);
	free ((void *) mixBuffer);
	free ((void *) smallerBuffer);
	free ((void *) flagArray);
	free ((void *) hashBanBuffer);
	free ((void *) nodeBuffer);
	free ((void *) src_name);
	free ((void *) next_name);
	free ((void *) aioBuffer1);
    free ((void *) aioBuffer2);
    free ((void *) readBuffer1);
    free ((void *) readBuffer2);
    free ((void *) cach1);
    free ((void *) cach2);

	if (repsTie)
	{
		fclose (outfp);
	}

	free_pe_mem ();
	free_libs ();
}
コード例 #4
0
boolean prlRead2HashTable ( char * libfile, char * outfile )
{
	long long i;
	char * next_name, name[256];
	FILE * fo;
	time_t start_t, stop_t;
	int maxReadNum;
	int libNo;
	pthread_t threads[thrd_num];
	unsigned char thrdSignal[thrd_num + 1];
	PARAMETER paras[thrd_num];
	boolean flag, pairs = 0;
	WORDFILTER = createFilter ( overlaplen );
	maxReadLen = 0;
	maxNameLen = 256;
	scan_libInfo ( libfile );
	alloc_pe_mem ( num_libs );

	if ( !maxReadLen )
		{ maxReadLen = 100; }

	maxReadLen4all = maxReadLen;
	printf ( "In %s, %d libs, max seq len %d, max name len %d\n\n",
	         libfile, num_libs, maxReadLen, maxNameLen );
	next_name = ( char * ) ckalloc ( ( maxNameLen + 1 ) * sizeof ( char ) );
	kmerBuffer = ( Kmer * ) ckalloc ( buffer_size * sizeof ( Kmer ) );
	hashBanBuffer = ( ubyte8 * ) ckalloc ( buffer_size * sizeof ( ubyte8 ) );
	prevcBuffer = ( char * ) ckalloc ( buffer_size * sizeof ( char ) );
	nextcBuffer = ( char * ) ckalloc ( buffer_size * sizeof ( char ) );
	maxReadNum = buffer_size / ( maxReadLen - overlaplen + 1 );
	//printf("buffer size %d, max read len %d, max read num %d\n",buffer_size,maxReadLen,maxReadNum);
	seqBuffer = ( char ** ) ckalloc ( maxReadNum * sizeof ( char * ) );
	lenBuffer = ( int * ) ckalloc ( maxReadNum * sizeof ( int ) );
	indexArray = ( int * ) ckalloc ( maxReadNum * sizeof ( int ) );

	for ( i = 0; i < maxReadNum; i++ )
		{ seqBuffer[i] = ( char * ) ckalloc ( maxReadLen * sizeof ( char ) ); }

	rcSeq = ( char ** ) ckalloc ( ( thrd_num + 1 ) * sizeof ( char * ) );

	if ( 1 )
	{
		kmerCounter = ( long long * ) ckalloc ( ( thrd_num + 1 ) * sizeof ( long long ) );
		KmerSets = ( KmerSet ** ) ckalloc ( thrd_num * sizeof ( KmerSet * ) );
		ubyte8  init_size = 1024;
		ubyte8 k = 0;

		if ( initKmerSetSize )
		{
			init_size = ( ubyte8 ) ( ( double ) initKmerSetSize * 1024.0f * 1024.0f * 1024.0f / ( double ) thrd_num / 32 );

			do
			{
				++k;
			}
			while ( k * 0xFFFFFFLLU < init_size );
		}

		for ( i = 0; i < thrd_num; i++ )
		{
			//KmerSets[i] = init_kmerset(1024,0.77f);
			KmerSets[i] = init_kmerset ( k * 0xFFFFFFLLU, 0.77f );
			thrdSignal[i + 1] = 0;
			paras[i].threadID = i;
			paras[i].mainSignal = &thrdSignal[0];
			paras[i].selfSignal = &thrdSignal[i + 1];
			kmerCounter[i + 1] = 0;
			rcSeq[i + 1] = ( char * ) ckalloc ( maxReadLen * sizeof ( char ) );
		}

		creatThrds ( threads, paras );
	}

	thrdSignal[0] = kmerCounter[0] = 0;
	time ( &start_t );
	kmer_c = n_solexa = read_c = i = libNo = readNumBack = gradsCounter = 0;

	while ( ( flag = read1seqInLib ( seqBuffer[read_c], next_name, & ( lenBuffer[read_c] ), &libNo, pairs, 1 ) ) != 0 )
	{
		if ( ( ++i ) % 100000000 == 0 )
			{ printf ( "--- %lldth reads\n", i ); }

		if ( lenBuffer[read_c] < 0 )
			{ printf ( "read len %d\n", lenBuffer[read_c] ); }

		if ( lenBuffer[read_c] < overlaplen + 1 )
			{ continue; }

		/*
		if(lenBuffer[read_c]>70)
		    lenBuffer[read_c] = 50;
		else if(lenBuffer[read_c]>40)
		    lenBuffer[read_c] = 40;
		*/
		indexArray[read_c] = kmer_c;
		kmer_c += lenBuffer[read_c] - overlaplen + 1;
		read_c++;

		if ( read_c == maxReadNum )
		{
			kmerCounter[0] += kmer_c;
			sendWorkSignal ( 2, thrdSignal );
			sendWorkSignal ( 1, thrdSignal );
			kmer_c = read_c = 0;
		}
	}

	if ( read_c )
	{
		kmerCounter[0] += kmer_c;
		sendWorkSignal ( 2, thrdSignal );
		sendWorkSignal ( 1, thrdSignal );
	}

	time ( &stop_t );
	printf ( "time spent on hash reads: %ds, %lld reads processed\n", ( int ) ( stop_t - start_t ), i );

	//record insert size info
	if ( pairs )
	{
		if ( gradsCounter )
			printf ( "%d pe insert size, the largest boundary is %lld\n\n",
			         gradsCounter, pes[gradsCounter - 1].PE_bound );
		else
			{ printf ( "no paired reads found\n" ); }

		sprintf ( name, "%s.peGrads", outfile );
		fo = ckopen ( name, "w" );
		fprintf ( fo, "grads&num: %d\t%lld\n", gradsCounter, n_solexa );

		for ( i = 0; i < gradsCounter; i++ )
			{ fprintf ( fo, "%d\t%lld\t%d\n", pes[i].insertS, pes[i].PE_bound, pes[i].rank ); }

		fclose ( fo );
	}

	free_pe_mem();
	free_libs();

	if ( 1 )
	{
		unsigned long long alloCounter = 0;
		unsigned long long allKmerCounter = 0;

		for ( i = 0; i < thrd_num; i++ )
		{
			alloCounter += count_kmerset ( ( KmerSets[i] ) );
			allKmerCounter += kmerCounter[i + 1];
			free ( ( void * ) rcSeq[i + 1] );
		}

		printf ( "%lli nodes allocated, %lli kmer in reads, %lli kmer processed\n"
		         , alloCounter, kmerCounter[0], allKmerCounter );
	}

	free ( ( void * ) rcSeq );
	free ( ( void * ) kmerCounter );

	for ( i = 0; i < maxReadNum; i++ )
		{ free ( ( void * ) seqBuffer[i] ); }

	free ( ( void * ) seqBuffer );
	free ( ( void * ) lenBuffer );
	free ( ( void * ) indexArray );
	free ( ( void * ) kmerBuffer );
	free ( ( void * ) hashBanBuffer );
	free ( ( void * ) nextcBuffer );
	free ( ( void * ) prevcBuffer );
	free ( ( void * ) next_name );

	//printf("done hashing nodes\n");
	if ( deLowKmer )
	{
		time ( &start_t );
		deLowCov ( thrdSignal );
		time ( &stop_t );
		printf ( "time spent on delowcvgNode %ds\n", ( int ) ( stop_t - start_t ) );
	}

	time ( &start_t );
	Mark1in1outNode ( thrdSignal );
	freqStat ( outfile );
	time ( &stop_t );
	printf ( "time spent on marking linear nodes %ds\n", ( int ) ( stop_t - start_t ) );
	fflush ( stdout );
	sendWorkSignal ( 3, thrdSignal );
	thread_wait ( threads );
	/*
	    Kmer word = 0x21c3ca82c734c8d0;
	    Kmer hash_ban = hash_kmer(word);
	    int setPicker = hash_ban%thrd_num;
	    kmer_t *node;
	    boolean found = search_kmerset(KmerSets[setPicker], word, &node);
	    if(!found)
	        printf("kmer %llx not found,\n",word);
	    else{
	        printf("kmer %llx, linear %d\n",word,node->linear);
	        for(i=0;i<4;i++){
	            if(get_kmer_right_cov(*node,i)>0)
	                printf("right %d, kmer %llx\n",i,nextKmer(node->seq,i));
	            if(get_kmer_left_cov(*node,i)>0)
	                printf("left %d, kmer %llx\n",i,prevKmer(node->seq,i));
	        }

	    }
	*/
	return 1;
}