예제 #1
0
파일: Utils.cpp 프로젝트: goshng/cocoa
//in last partition : create solid kmers file, and load solid kmers in bloo1 bloom
void end_kmer_count_partition(bool last_partition, Hash16 *hasht1)
{

    int value;
    int cptk=0;
    int64_t nso=0;
    /////////////////////////begin write files 
    rewind (F_kmercpt_read);
    rewind (F_kmercpt_write);

#ifndef MINGW
    ftruncate(fileno(F_kmercpt_write), 0); //erase previous file 
#else // tempfix? fileno is not accepted by mingw
    fclose(F_kmercpt_write);
    F_kmercpt_write = fopen("kmer_count2","wb+");
#endif
    BinaryBank * SolidKmers = NULL; 
    kmer_type graine;

    if (last_partition)
        SolidKmers = new BinaryBank(return_file_name(solid_kmers_file),sizeof(kmer_type),1);

    while(fread(&graine, sizeof(graine),1, F_kmercpt_read)){
        fread(&cptk, sizeof(cptk), 1, F_kmercpt_read);

        hasht1->remove(graine,&value); // if graine is present, get value of graine and remove graine, else value=0
        cptk +=  value;

        fwrite(&graine, sizeof(graine), 1, F_kmercpt_write);
        fwrite(&cptk, sizeof(cptk), 1, F_kmercpt_write);  

        if (last_partition && cptk >= nks)
            // if last partition, also need to search for solid kmers in remaining of hasht1, so this is not enough:
        {
            SolidKmers->write_element(&graine);
            nso++;
        }

    }
    hasht1->dump(F_kmercpt_write); // dump remaining of hasht1

    if (last_partition)  
    {
        nso+=hasht1->getsolids(NULL,SolidKmers,nks); // get remaining solids of hasht1
        fprintf(stderr,"nsolid kmers =  %lli  \n",(long long)nso);

        SolidKmers->close();

#ifndef MINGW
        ftruncate(fileno(F_kmercpt_read), 0); //erase previous file 
#else // tempfix? fileno is not accepted by mingw
        fclose(F_kmercpt_read);
        F_kmercpt_read = fopen("kmer_count2","wb+");
#endif

    } 
} 
예제 #2
0
파일: Utils.cpp 프로젝트: goshng/cocoa
Bloom *bloom_create_bloo1(T *bloom_counter, bool from_dump)
{

    BinaryBank * SolidKmers ;
    
    if(from_dump && nsolids) // from dump and known number of solid kmers 
    {
        //nsolids is sotred in a config file
        //number of solid kmers cannot be computed precisely from bloom file, imprecision of 0-7
        estimated_BL1 = max( (int)ceilf(log2f(nsolids*NBITS_PER_KMER)), 1);
        estimated_BL1_freesize =  (uint64_t)(nsolids*NBITS_PER_KMER);
    }
    else
    {
        // get true number of solid kmers, in order to precisely size the bloom filter
        SolidKmers = new BinaryBank(return_file_name(solid_kmers_file),sizeof(kmer_type),0);
        estimated_BL1 = max( (int)ceilf(log2f(SolidKmers->nb_elements()*NBITS_PER_KMER)), 1);
        estimated_BL1_freesize =  (uint64_t)(SolidKmers->nb_elements()*NBITS_PER_KMER);
        printf("nelem %lli nbits %g \n",SolidKmers->nb_elements(),NBITS_PER_KMER);
    }
    
    //printf("Allocating %0.1f MB of memory for the main Bloom structure (%g bits/kmer)\n",(1LL<<estimated_BL1)/1024.0/1024.0/8.0,NBITS_PER_KMER);
    printf("freesize %lli estimated_BL1_freesize  %0.1f MB of memory for the main Bloom structure (%g bits/kmer)\n",(long long)estimated_BL1_freesize,(estimated_BL1_freesize)/1024.0/1024.0/8.0,NBITS_PER_KMER);
    
    Bloom *bloo1;
#if CUSTOMSIZE
    bloo1 = new Bloom((uint64_t)estimated_BL1_freesize);
#else
    bloo1 = new Bloom(estimated_BL1);
#endif

    bloo1->set_number_of_hash_func((int)floorf(0.7*NBITS_PER_KMER));

    if (from_dump)
        bloo1->load(return_file_name(bloom_file)); // just load the dump 
    else
    {
        bloom_pass_reads_binary(bloo1, bloom_counter, (char*)"%cInsert solid Kmers in Bloom %lld"); // use the method reading SolidKmers binary file, was useful when varying Bloom size (!= dumped size)
        //bloo1->dump(return_file_name(bloom_file)); // create bloom dump
        SolidKmers->close();
    }

    return bloo1;    
}
예제 #3
0
파일: Utils.cpp 프로젝트: goshng/cocoa
void bloom_pass_reads_binary(T *bloom_to_insert, BloomCpt *bloom_counter, char *stderr_message)
{
  fprintf(stderr,"binary pass \n");
  int64_t NbRead = 0;
  int64_t NbInsertedKmers = 0;
  kmer_type kmer;
  
  // read solid kmers from disk
  BinaryBank * SolidKmers = new BinaryBank(return_file_name(solid_kmers_file),sizeof(kmer),0);

  while(SolidKmers->read_element(&kmer))
    {
      // printf("kmer %lld\n",kmer);
      bloom_to_insert->add(kmer);
      NbInsertedKmers++;
      NbRead++;
      if ((NbRead%10000)==0) fprintf (stderr,stderr_message,13,(long long)NbRead);
    }
  fprintf (stderr,"\nInserted %lld %s kmers in the bloom structure.\n",(long long)NbInsertedKmers,"solid");
  SolidKmers->close();
  
}
예제 #4
0
int debloom(int order, int max_memory)
{
    // read bloo1 from disk dump
    Bloom *bloo1 = bloom_create_bloo1((BloomCpt *)NULL);

    STARTWALL(pos);

    FILE * debloom_file = fopen(return_file_name("debloom"),"wb+");
    FILE * debloom_file_2 = fopen(return_file_name("debloom2"),"wb+");
    FILE * F_tmp;
    
    F_debloom_read = debloom_file;
    F_debloom_write = debloom_file_2;
	
    BinaryBank *SolidKmers = new BinaryBank(return_file_name(solid_kmers_file),sizeof(kmer_type),0);
    
    uint64_t cc=0;
    kmer_type new_graine, kmer;
    int nt;
   
    uint64_t NbSolidKmer =0;
    // write all positive extensions in disk file
    while (SolidKmers->read_element(&kmer))
    {

        //8 right extensions   (4F and 4R); left extensions are redundant by revcomplementation
        for(nt=0; nt<4; nt++) 
        {
            int strand;
            for (strand = 0; strand < 2 ; strand++)
            {
                int current_strand = strand;
                new_graine = next_kmer(kmer,nt, &current_strand);

                if(bloo1->contains(new_graine)){   // extension is positive

                    // maybe do more lax deblooming; if it's a dead-end, it's no big deal, don't pass it to the false positive test
                    // what would have been needed if i decided to enable order>0 (but actually this won't happen): 
                    //  - better estimate of structure size in the presence of order>0 deblooming  
                    if (order == 1)  // this case just detects tips
                    {printf("ORDER==1");
                        bool is_linked = false;
                        for(int tip_nt=0; tip_nt<4; tip_nt++) 
                        {
                            int new_strand = current_strand;
                            kmer_type kmer_after_possible_tip = next_kmer(new_graine,tip_nt, &new_strand);
                            if(bloo1->contains(kmer_after_possible_tip))
                            {
                                is_linked = true;
                                break;
                            }
                        }
                        if (!is_linked)
                            continue; // it's a tip, because it's linked to nothing
                    }
    
                    if (order > 1) // general case. should work for order = 1, but i coded an optimized version above
                    { printf("ORDER>1");
                        Frontline frontline( new_graine, current_strand, bloo1, NULL, NULL, NULL);
                        while (frontline.depth < order)
                        {
                            frontline.go_next_depth();
                            if (frontline.size() == 0)
                                break;
                            // don't allow a breadth too large anywqy
                            if (frontline.size()> 10)
                                break;
                        }
                        if (frontline.size() == 0)
                            continue; // it's a deadend
                    }

                    if (!fwrite(&new_graine, sizeof(new_graine), 1, debloom_file))
                    {
                        printf("error: can't fwrite (disk full?)\n");
                        exit(1);
                    }
                    cc++;
                }

            }
        }
        NbSolidKmer++;
        if ((NbSolidKmer%table_print_frequency)==0) fprintf (stderr,"%c Writing positive Bloom Kmers %lld",13,NbSolidKmer);
    }
    nbkmers_solid =  NbSolidKmer; // GUS: it's global now

    fprintf(stderr,"\n%lli kmers written\n",cc);

    STOPWALL(pos,"Write all positive kmers");

    STARTWALL(deb);

    double bl1tai =  (double)bloo1->tai ;
    delete bloo1;

    // now that bloo1 is deleted, initialize hasht1
    int NBITS_HT = max( (int)ceilf(log2f((0.1*max_memory*1024L*1024L)/sizeof(cell_ptr_t))), 1); // set hasht1 cells to occupy 0.1 * [as much mem as poss]
    hasht1 =new Hash16(NBITS_HT); 
    
    ////////////////////////////////////////////////////////////////   --find false positive, with hash table partitioning
    uint64_t max_kmer_per_part = (uint64_t) (0.8*max_memory*1024LL*1024LL /sizeof(cell<kmer_type>));
    //adapter taille ht en fonction
    

    printf("%d partitions will be needed\n",(int)(nbkmers_solid/max_kmer_per_part));

    NbSolidKmer =0;
    int numpart = 0;
    SolidKmers->rewind_all();

    // deblooming:
    // read the list of (non-redundant) solid kmers and load it, in chunks, into a hash table
    // at each pass, check all the positive extensions and keep those which are not indicated, by the current chunk, as solid kmers
    // at the end, only the positive extensions which are not solid are kept
    while (SolidKmers->read_element(&kmer))
    {
        hasht1->add(kmer);

        NbSolidKmer++;
        if ((NbSolidKmer%table_print_frequency)==0) fprintf (stderr,"%cBuild Hash table %lld",13,NbSolidKmer);

        if(hasht1->nb_elem >max_kmer_per_part) //end partition,  find false positives
        {
            fprintf(stderr,"End of debloom partition  %lli / %lld \n",hasht1->nb_elem,max_kmer_per_part);

            end_debloom_partition(false);

            //swap file pointers
            F_tmp = F_debloom_read;
            F_debloom_read = F_debloom_write;
            F_debloom_write = F_tmp;
            /////////end write files

            //reset hash table
            hasht1->empty_all();

            fprintf(stderr,"\n%lli false positives written , partition %i \n",n_false_positives,numpart);

            numpart++;
        } ///end partition


    }
    fprintf(stderr,"Nb kmers stored in the bloom table %lld\n",nbkmers_solid);


    ///////////////////////// last partition, will write all the FP's to the good file

    end_debloom_partition(true); 

    /////////end write files


    fprintf(stderr,"Total nb false positives stored in the Debloom hashtable %lli \n",n_false_positives);

    delete hasht1;


    STOPWALL(deb,"Debloom");
 
    // GUS: will use to output summary later
    b1_size = (uint64_t) bl1tai;
  
    fclose(debloom_file);
    fclose(debloom_file_2);
    SolidKmers->close();


    return 1;

}
예제 #5
0
Set *load_false_positives_cascading4()
{
  int64_t NbInsertedKmers;
  char * rseq;
  int readlen;
  kmer_type kmer, graine, graine_revcomp;

  
  // **** Initialize B2, B3, B4 and T4 ****
  Bank *FalsePositives = new Bank(return_file_name(false_positive_kmers_file));
  uint64_t nbFP = countFP(FalsePositives);
  
  FPSetCascading4 *fp = new FPSetCascading4;
  
  fp->bloom2 = new Bloom((uint64_t)(nbFP * NBITS_PER_KMER));
  fp->bloom2->set_number_of_hash_func((int)floorf(0.7*NBITS_PER_KMER));

  uint64_t estimated_T2_size = max((int)ceilf(nbkmers_solid * (double)powf((double)0.62, (double)NBITS_PER_KMER)), 1);
  uint64_t estimated_T3_size = max((int)ceilf(nbFP          * (double)powf((double)0.62, (double)NBITS_PER_KMER)) ,1);

  fp->bloom3 = new Bloom((uint64_t)(estimated_T2_size * NBITS_PER_KMER));
  fp->bloom3->set_number_of_hash_func((int)floorf(0.7*NBITS_PER_KMER));

  fp->bloom4 = new Bloom((uint64_t)(estimated_T3_size * NBITS_PER_KMER));
  fp->bloom4->set_number_of_hash_func((int)floorf(0.7*NBITS_PER_KMER));


  // **** Insert the false positives in B2 ****
  NbInsertedKmers = 0;
  while (FalsePositives->get_next_seq(&rseq,&readlen))
  {
    kmer = extractKmerFromRead(rseq,0,&graine,&graine_revcomp);
    
    fp->bloom2->add(kmer);
    
    NbInsertedKmers++;
    if ((NbInsertedKmers%table_print_frequency)==0)
      fprintf (stderr,"%cInsert false positive B2 %lld",13,NbInsertedKmers);
  }
  fprintf (stderr,"%cInsert false positive B2 %lld", 13,NbInsertedKmers);
  FalsePositives->close();

  printf("\nInserted %lld (estimated, %lld) kmers in B2.\n", NbInsertedKmers, nbFP);


  //  **** Insert false positives in B3 and write T2 
  int addKmers = 0;
  NbInsertedKmers = 0;
  FILE *T2_file = fopen(return_file_name("t2_kmers"), "w+"); // We will read this file later, when filling T4 
  BinaryBank *SolidKmers = new BinaryBank(return_file_name(solid_kmers_file),sizeof(kmer),0);
  while(SolidKmers->read_element(&kmer))
  {
    if (fp->bloom2->contains(kmer))
    {
      if (!fwrite(&kmer, sizeof(kmer), 1, T2_file))
      {
	printf("error: can't fwrite (disk full?)\n");
	exit(1);
      }

      fp->bloom3->add(kmer);
      addKmers++;
    }

    NbInsertedKmers++;
    if ((NbInsertedKmers% table_print_frequency)==0)
      fprintf (stderr,(char*)"%cInsert false positive B3 %lld",13,NbInsertedKmers);
  }
  fprintf (stderr,(char*)"%cInsert false positive B3 %lld",13,NbInsertedKmers);
  SolidKmers->close();

  printf("\nInserted %lld (estimated, %llu) kmers in B3.\n", addKmers, estimated_T2_size);

  
  // **** Insert false positives in B4 (we could write T3, but it's not necessary)
  FalsePositives = new Bank(return_file_name(false_positive_kmers_file));
  NbInsertedKmers = 0;
  addKmers = 0;
  while (FalsePositives->get_next_seq(&rseq,&readlen))
  {
    kmer = extractKmerFromRead(rseq,0,&graine,&graine_revcomp);
    
    if (fp->bloom3->contains(kmer))
    {
      fp->bloom4->add(kmer);
      addKmers++;
    }

    NbInsertedKmers++;
    if ((NbInsertedKmers%table_print_frequency)==0)
      fprintf (stderr,"%cInsert false positive B4 %lld",13,NbInsertedKmers);
  }
  fprintf (stderr,"%cInsert false positive B4 %lld", 13,NbInsertedKmers);
  FalsePositives->close();

  printf("\nInserted %lld (estimated, %lld) kmers in B4.\n", addKmers, estimated_T3_size);
  

  // **** Count and insert false positives in T4
  rewind(T2_file);
  addKmers = 0;
  while (fread(&kmer, sizeof(kmer), 1, T2_file))
    if (fp->bloom4->contains(kmer))
      addKmers++;

  fp->false_positives = new FPSet(addKmers);
  rewind(T2_file);
  addKmers = 0;
  NbInsertedKmers = 0;
  while (fread(&kmer, sizeof(kmer), 1, T2_file))
  {  
    if (fp->bloom4->contains(kmer))
    {
      fp->false_positives->insert(kmer);
      addKmers++;
    }

    NbInsertedKmers++;
    if ((NbInsertedKmers%table_print_frequency)==0)
      fprintf (stderr,"%cInsert false positive T4 %lld",13,NbInsertedKmers);
  }
  fp->false_positives->finalize();
  fprintf (stderr,"%cInsert false positive T4 %lld", 13,NbInsertedKmers);
  fclose(T2_file);

  printf("\nInserted %lld (estimated, %lld) kmers in T4.\n\n", addKmers, (uint64_t)fp->false_positives->capacity());
  
  print_size_summary(fp);

  return fp;
}
예제 #6
0
파일: Minia.cpp 프로젝트: nickingle/clipper
inline void assemble()
{

    //////-------------------------------------------------------------------------------------------
    fprintf (stderr,"______________________________________________________ \n");
    fprintf (stderr,"___________ Assemble from bloom filter _______________ \n");
    fprintf (stderr,"______________________________________________________ \n\n");

    //////-------------------------------------------------------------------------------------------


    long long len_left = 0;
    long long len_right = 0;
    long long contig_len =0;
    long long maxlen=10000000;

    char *left_traversal  = (char *) malloc(maxlen*sizeof(char));
    char *right_traversal = (char *) malloc(maxlen*sizeof(char));
    KmerColour *left_colour_traversal  = (KmerColour *) malloc(maxlen*sizeof(KmerColour));
    KmerColour *right_colour_traversal = (KmerColour *) malloc(maxlen*sizeof(KmerColour));

    char *contig          = (char *) malloc(2*(maxlen+sizeKmer)*sizeof(char));
    KmerColour *contig_colour   = (KmerColour *) malloc(2*(maxlen+sizeKmer)*sizeof(KmerColour));

    kmer_type kmer;

    long long nbContig =0;
    long long totalnt=0;
    long long max_contig_len=0;
    long long mlenleft=0,mlenright=0;
    int64_t NbBranchingKmer=0;
    char kmer_seq[sizeKmer+1];
    FILE * file_assembly = fopen(return_file_name(assembly_file),"w+");
    FILE * file_colour_assembly = fopen(return_file_name(assembly_colour_file),"w+");
    BinaryBank *SolidKmers = new BinaryBank(return_file_name(solid_kmers_file),sizeof(kmer_type),0);
    BinaryBank *solid_kmers_colour = new BinaryBank(return_file_name(solid_kmers_colour_file), kSizeOfKmerType+kSizeOfKmerColour, 0);

    char colour_seq[1000];

    STARTWALL(assembly);

    char *assemble_only_one_region = NULL; // debugging, set to a ASCII kmer to activate, NULL to desactivate
    bool LOAD_BRANCHING_KMERS=false; // debugging
    bool DUMP_BRANCHING_KMERS=false;
   
    BranchingTerminator *terminator;

    if (LOAD_BRANCHING_KMERS)
    {printf("LOA:%d\n",LOAD_BRANCHING_KMERS);
        BinaryBank *BranchingKmers = new BinaryBank(return_file_name(branching_kmers_file),sizeof(kmer_type),false);
        terminator = new BranchingTerminator(BranchingKmers,SolidKmers, bloo1,false_positives);
        BranchingKmers->close();
    }
    else
        terminator = new BranchingTerminator(SolidKmers,genome_size, bloo1,false_positives);

    if (DUMP_BRANCHING_KMERS)
    {printf("DUMP:%d\n",DUMP_BRANCHING_KMERS);
        BinaryBank *BranchingKmers = new BinaryBank(return_file_name(branching_kmers_file),sizeof(kmer_type),true);
        terminator->dump_branching_kmers(BranchingKmers);
        BranchingKmers->close();
    }
    printf("Check boolean:%i\t%i\n", LOAD_BRANCHING_KMERS, DUMP_BRANCHING_KMERS);

#ifdef UNITIG
    SimplePathsTraversal *traversal = new SimplePathsTraversal(bloo1,false_positives,terminator);
    fprintf (stderr,"_________________Assembling in Unitig mode ..._____________________ \n\n");
#else
    MonumentTraversal *traversal = new MonumentTraversal(bloo1,false_positives,terminator);
#endif
    //RandomBranchingTraversal *traversal = new RandomBranchingTraversal(bloo1,false_positives,terminator);
    traversal->set_maxlen(maxlen);
    traversal->set_max_depth(500);
    traversal->set_max_breadth(20);
    traversal->SetSolidKmersColour(solid_kmers_colour, max_memory);

    while (terminator->next(&kmer))
    {
        // keep looping while a starting kmer is available from this kmer
		// everything will be marked during the traversal()'s
		kmer_type starting_kmer;
		code2seq(kmer,kmer_seq); // convert
//		printf("StartWhile, init Kmer:%li\t%s\n",kmer, kmer_seq);// Varified! kmer's matched seq from the original creation
		while (traversal->find_starting_kmer(kmer,starting_kmer))
//		while (traversal->find_starting_kmer_inside_simple_path(kmer,starting_kmer))
		{
		    code2seq(starting_kmer,kmer_seq); // convert starting kmer to nucleotide seq
		    KmerColour kmer_colour = traversal->GetColour(starting_kmer);

//		    printf("Starting_kmer:%lu %s",starting_kmer, kmer_seq);
            if (assemble_only_one_region != NULL)
            {
                kmer_type dummy;
                starting_kmer = extractKmerFromRead(assemble_only_one_region,0,&kmer,&dummy,false);
            }

            // right extension
//            len_right = traversal->traverse(starting_kmer, right_traversal, 0);
			len_right = traversal->traverse_colour(starting_kmer, right_traversal, right_colour_traversal, 0);
            mlenright= max(len_right,mlenright);
            int debug=1;
            if(debug>1){
            	printf("RightSeq:%lld\t%s\n", len_right, right_traversal);
//            	printf("RightColour:");
//            	for (int i = 0; i < len_right; ++i) {
//            		printf("%u ",right_colour_traversal[i]);
//				}
            	kmer_colour_pattern_string(right_colour_traversal, len_right, colour_seq);
				printf("RightColour:%s\n",colour_seq);
            }

            // left extension, is equivalent to right extension of the revcomp
//            len_left = traversal->traverse(starting_kmer, left_traversal, 1);
            len_left = traversal->traverse_colour(starting_kmer, left_traversal,
            										left_colour_traversal, 1);
            mlenleft= max(len_left,mlenleft);

            // form the contig

//            printf("before Rev:%s\n",left_traversal);
            revcomp_sequence(left_traversal,len_left);
            KmerColourUtil::rev_colour(left_colour_traversal, len_left);

//            printf("after Rev:%s\n",left_traversal);
            strcpy(contig,left_traversal); // contig = revcomp(left_traversal)
	        strcat(contig,kmer_seq);//               + starting_kmer
            strcat(contig,right_traversal);//           + right_traversal
			contig_len=len_left+len_right+sizeKmer;


            int colour_len = 0;
            KmerColour sep_colour = kErrorCode+1;// output with %x, so anything greater than 100;
			colour_len = KmerColourUtil::append_colour(left_colour_traversal, len_left,
					contig_colour, colour_len);
			if(debug){
				KmerColourUtil::append_colour(&sep_colour, 1, contig_colour,
						colour_len);
			}
//            memset(contig_colour+pt_len, (int) kmer_colour, kSizeOfKmerColour*sizeKmer);
//            pt_len += sizeKmer;

			KmerColourUtil::append_colour(&kmer_colour, 1, contig_colour,
					colour_len);

			if(debug){
				KmerColourUtil::append_colour(&sep_colour, 1, contig_colour,
										colour_len);
			}

//            memcpy(contig_colour+colour_len, right_colour_traversal, len_right);
//			colour_len += len_right;
			KmerColourUtil::append_colour(right_colour_traversal, len_right,
					contig_colour, colour_len);


            if(debug>1){
            	printf("LeftSeq:%lld\t%s\n", len_left, left_traversal);
//            	printf("LeftColour:");
//				for (int i = 0; i < len_left; ++i) {
//					printf("%u ",left_colour_traversal[i]);
//				}
//				printf("\n");
            	kmer_colour_pattern_string(left_colour_traversal, len_left, colour_seq);
				printf("LeftColour:%s\n",colour_seq);
				printf("Kmer:%s\n",kmer_seq);
				printf("KmerColour:%u\n",kmer_colour);
				printf("Contig:%lld\t%s\n",contig_len ,contig);
//				printf("Colour:");
//				for (int i = 0; i < pt_len; ++i) {
//					printf("%x", contig_colour[i]);
//				}

//				printf("Colour:%d\t%s\n\n",pt_len+len_right ,contig_colour);


            }

            std::string report("==========Summary==========\n");
//			KmerColourUtil::summary(report, contig_colour, colour_len);
//			KmerColourUtil::colour_table(report, contig_colour, colour_len, max_colour_count);
//			printf("%s", report.data());

			KmerColourSummary kcs(contig_colour, colour_len, max_colour_count);
			kcs.summary_colour_code(report);
			kcs.summary_colour_count(report);
			kcs.summary_stat(report);
			kcs.colour_table(report);
//			printf("%s", report.data());
//			delete &kcs;
//			delete &kcs;
//			printf("================END======================\n\n\n");
			// save the contig
            if(contig_len >= MIN_CONTIG_SIZE)//TODO: add colour info here
            {
                max_contig_len = max(max_contig_len,contig_len);
                fprintf(file_assembly,">%lli__len__%lli \n",nbContig,contig_len);
                fprintf(file_assembly,"%s\n",contig);

                fprintf(file_colour_assembly,">%lli__len__%lli \n",nbContig,contig_len);
				fprintf(file_colour_assembly,"%s\n",contig);
////				fprintf(file_colour_assembly,"%s\n",contig_colour);
//				for (int i = 0; i < colour_len; ++i) {
//					fprintf(file_colour_assembly, "%d", all_colour[i]);
//				}
				fprintf(file_colour_assembly,"%s\n",report.data());
                nbContig++;
                totalnt+=contig_len;
            }
            if (assemble_only_one_region != NULL)
                break;
//            printf("Memory: %zu %zu\n", getPeakRSS(), getCurrentRSS() );
//exit(-1);
        }
//		printf("Done while look is assemble()\n");
//fclose(file_assembly);
//fclose(file_colour_assembly);
//exit(-2);
        NbBranchingKmer++;
        if ((NbBranchingKmer%300)==0) fprintf (stderr,"%cLooping through branching kmer n° %" PRId64 "/ %" PRId64 " total nt   %lli" ,13,NbBranchingKmer,terminator->nb_branching_kmers,totalnt );

        if (nbContig > 0 && assemble_only_one_region != NULL)
            break;

    }
    fclose(file_assembly);

    fprintf (stderr,"\n Total nt assembled  %lli  nbContig %lli\n",totalnt,nbContig);
    fprintf (stderr,"\n Max contig len  %lli (debug: max len left %lli, max len right %lli)\n",max_contig_len,mlenleft,mlenright);
    
    STOPWALL(assembly,"Assembly");

    free(left_traversal);
    free(right_traversal);
    free(contig);
    SolidKmers->close();
    solid_kmers_colour->close();
//    delete SolidKmers;
//    delete solid_kmers_colour;
//    delete terminator;
    delete traversal;
//	printf("Memory: %zu %zu\n", getPeakRSS(), getCurrentRSS() );
    printf("===========DONE=========EXIT========\n");
//exit(-9);
}
inline void assemble()
{

    //////-------------------------------------------------------------------------------------------
    fprintf (stderr,"______________________________________________________ \n");
    fprintf (stderr,"___________ Assemble from bloom filter _______________ \n");
    fprintf (stderr,"______________________________________________________ \n\n");

    //////-------------------------------------------------------------------------------------------


    long long len_left = 0;
    long long len_right = 0;
    long long contig_len =0;
    long long maxlen=10000000;

    char *left_traversal  = (char *) malloc(maxlen*sizeof(char));
    char *right_traversal = (char *) malloc(maxlen*sizeof(char));
    char *contig          = (char *) malloc(2*(maxlen+sizeKmer)*sizeof(char));
    kmer_type kmer;

    long long nbContig =0;
    long long nbSmallContig =0;
    long long totalnt=0;
    long long max_contig_len=0;
    long long mlenleft=0,mlenright=0;
    int64_t NbBranchingKmer=0;
    char kmer_seq[sizeKmer+1];
    FILE * file_assembly = fopen(return_file_name(assembly_file),"w+");

    BinaryBank *SolidKmers = new BinaryBank(return_file_name(solid_kmers_file),sizeof(kmer_type),0);

    STARTWALL(assembly);

    char *assemble_only_one_region = NULL; // debugging, set to a ASCII kmer to activate, NULL to desactivate
    bool LOAD_BRANCHING_KMERS=false; // debugging
    bool DUMP_BRANCHING_KMERS=false;
   
    BranchingTerminator *terminator;

    if (LOAD_BRANCHING_KMERS)
    {
        BinaryBank *BranchingKmers = new BinaryBank(return_file_name(branching_kmers_file),sizeof(kmer_type),false);
        terminator = new BranchingTerminator(BranchingKmers,SolidKmers, bloo1,false_positives);
        BranchingKmers->close();
    }
    else
        terminator = new BranchingTerminator(SolidKmers,genome_size, bloo1,false_positives);

    if (DUMP_BRANCHING_KMERS)
    {
        BinaryBank *BranchingKmers = new BinaryBank(return_file_name(branching_kmers_file),sizeof(kmer_type),true);
        terminator->dump_branching_kmers(BranchingKmers);
        BranchingKmers->close();
    }

#ifdef UNITIG
    SimplePathsTraversal *traversal = new SimplePathsTraversal(bloo1,false_positives,terminator);
    fprintf (stderr,"_________________Assembling in Unitig mode ..._____________________ \n\n");
#else
    MonumentTraversal *traversal = new MonumentTraversal(bloo1,false_positives,terminator);
#endif
    //RandomBranchingTraversal *traversal = new RandomBranchingTraversal(bloo1,false_positives,terminator);
    traversal->set_maxlen(maxlen);
    traversal->set_max_depth(500);
    traversal->set_max_breadth(20);
    
    while (terminator->next(&kmer))
    {
        // keep looping while a starting kmer is available from this kmer
		// everything will be marked during the traversal()'s
		kmer_type starting_kmer;
#ifdef UNITIG
        while (traversal->get_new_starting_node_improved(kmer,starting_kmer))
#else
        while (traversal->find_starting_kmer(kmer,starting_kmer))
#endif
		{
		    code2seq(starting_kmer,kmer_seq); // convert starting kmer to nucleotide seq
            traversal->revert_stats(); // set stats from the last commit (discard stats from find_starting_kmer / small contigs)

            if (assemble_only_one_region != NULL)
            {
                kmer_type dummy;
                starting_kmer = extractKmerFromRead(assemble_only_one_region,0,&kmer,&dummy,false);
            }

            // right extension
            len_right = traversal->traverse(starting_kmer,right_traversal,0);
            mlenright= max(len_right,mlenright);

            // left extension, is equivalent to right extension of the revcomp
            len_left = traversal->traverse(starting_kmer,left_traversal,1);
            mlenleft= max(len_left,mlenleft);

            // form the contig
            revcomp_sequence(left_traversal,len_left);
            strcpy(contig,left_traversal); // contig = revcomp(left_traversal)
	        strcat(contig,kmer_seq);//               + starting_kmer
            strcat(contig,right_traversal);//           + right_traversal

            contig_len=len_left+len_right+sizeKmer;

            // save the contig
            if(contig_len >= MIN_CONTIG_SIZE)
            {
                max_contig_len = max(max_contig_len,contig_len);
                fprintf(file_assembly,">%lli__len__%lli \n",nbContig,contig_len);
                fprintf(file_assembly,"%s\n",contig);
                nbContig++;
                totalnt+=contig_len;
                traversal->commit_stats();
            }
            else
            {
                traversal->revert_stats();
                nbSmallContig++;
            }
            if (assemble_only_one_region != NULL)
                break;
        }
    
        NbBranchingKmer++;
        if ((NbBranchingKmer%300)==0) fprintf (stderr,"%cLooping through branching kmer n° %lld / %lld  total nt   %lld   ",13,(long long int) NbBranchingKmer,(long long int) terminator->nb_branching_kmers, (long long int)totalnt );

        if (nbContig > 0 && assemble_only_one_region != NULL)
            break;

    }
    fclose(file_assembly);

    fprintf (stderr,"\n Total nt assembled  %lli  nbContig %lli\n",totalnt,nbContig);
    fprintf (stderr," Max contig len  %lli (debug: max len left %lli, max len right %lli)\n",max_contig_len,mlenleft,mlenright);
    fprintf (stderr,"\n Debug traversal stats: %ld ends of contigs (%lld unsaved small contigs), among them:\n",traversal->final_stats.ended_traversals,nbSmallContig);
    fprintf (stderr," %ld couldn't validate consensuses\n",traversal->final_stats.couldnt_validate_consensuses);
    fprintf (stderr," %ld large bubble breadth, %ld large bubble depth, %ld marked kmer, %ld no extension\n",traversal->final_stats.couldnt_traverse_bubble_breadth,traversal->final_stats.couldnt_traverse_bubble_depth,traversal->final_stats.couldnt_because_marked_kmer,traversal->final_stats.couldnt_find_extension);
    fprintf (stderr," %ld in-branchin large depth, %ld in-branching large breadth, %ld in-branching other\n",traversal->final_stats.couldnt_inbranching_depth,traversal->final_stats.couldnt_inbranching_breadth,traversal->final_stats.couldnt_inbranching_other);
    
    STOPWALL(assembly,"Assembly");

    free(left_traversal);
    free(right_traversal);
    free(contig);
    SolidKmers->close();
}