Example #1
0
void bloom_count(Bank *Reads, unsigned long max_memory)
{

#define NBITS_BLOOMCPT 23 // 33 :4GB (4 bits/elem)   // size of the bloom counter table to count kmers 

    fprintf(stderr,"nbits bloom counter: %i \n",NBITS_BLOOMCPT);

    BloomCpt3 * bloocpt  =   new BloomCpt3(NBITS_BLOOMCPT);
    BloomCpt3 * bloocpt2 =   new BloomCpt3(NBITS_BLOOMCPT);

    bloocpt->setSeed( 0x4909FEA3A68CC6A7LL);
    bloocpt2->setSeed( 0x0CD5DA28467C5492LL);

    //  bloocpt->set_number_of_hash_func(4);
    // bloocpt2->set_number_of_hash_func(6);


    ///////////////////////////////////first pass ; count kmers with Bloom cpt


    bloom_pass_reads(Reads,bloocpt, (BloomCpt * ) NULL, (char*)"%cFirst pass %lld");


    fprintf (stderr,"\n ____________   Second bloom counter   _________\n");

    bloom_pass_reads(Reads,bloocpt2, bloocpt, (char*)"%cSecond pass %lld");

STARTWALL(count);

    fprintf(stderr,"\n------------------ second pass  bloom counter   \n\n");

     delete bloocpt;
    
    ////////////////////////////////////// exact kmer count with hash table partitionning,
    //also create solid kmers file and fills bloo1

    exact_kmer_count(Reads,bloocpt2,max_memory);

    //////////////////////////////////////

    STOPWALL(count,"Counted kmers");

    fprintf(stderr,"\n------------------ Counted kmers and kept those with abundance >=%i \n\n",nks);
    
    
    ////////////////////////////////////////////////////fin bloom insert
    
    //delete bloocpt2;
}
Example #2
0
int debloom(int order, int max_memory)
{
    // read bloo1 from disk dump
    Bloom *bloo1 = bloom_create_bloo1((BloomCpt *)NULL);

    STARTWALL(pos);

    FILE * debloom_file = fopen(return_file_name("debloom"),"wb+");
    FILE * debloom_file_2 = fopen(return_file_name("debloom2"),"wb+");
    FILE * F_tmp;
    
    F_debloom_read = debloom_file;
    F_debloom_write = debloom_file_2;
	
    BinaryBank *SolidKmers = new BinaryBank(return_file_name(solid_kmers_file),sizeof(kmer_type),0);
    
    uint64_t cc=0;
    kmer_type new_graine, kmer;
    int nt;
   
    uint64_t NbSolidKmer =0;
    // write all positive extensions in disk file
    while (SolidKmers->read_element(&kmer))
    {

        //8 right extensions   (4F and 4R); left extensions are redundant by revcomplementation
        for(nt=0; nt<4; nt++) 
        {
            int strand;
            for (strand = 0; strand < 2 ; strand++)
            {
                int current_strand = strand;
                new_graine = next_kmer(kmer,nt, &current_strand);

                if(bloo1->contains(new_graine)){   // extension is positive

                    // maybe do more lax deblooming; if it's a dead-end, it's no big deal, don't pass it to the false positive test
                    // what would have been needed if i decided to enable order>0 (but actually this won't happen): 
                    //  - better estimate of structure size in the presence of order>0 deblooming  
                    if (order == 1)  // this case just detects tips
                    {printf("ORDER==1");
                        bool is_linked = false;
                        for(int tip_nt=0; tip_nt<4; tip_nt++) 
                        {
                            int new_strand = current_strand;
                            kmer_type kmer_after_possible_tip = next_kmer(new_graine,tip_nt, &new_strand);
                            if(bloo1->contains(kmer_after_possible_tip))
                            {
                                is_linked = true;
                                break;
                            }
                        }
                        if (!is_linked)
                            continue; // it's a tip, because it's linked to nothing
                    }
    
                    if (order > 1) // general case. should work for order = 1, but i coded an optimized version above
                    { printf("ORDER>1");
                        Frontline frontline( new_graine, current_strand, bloo1, NULL, NULL, NULL);
                        while (frontline.depth < order)
                        {
                            frontline.go_next_depth();
                            if (frontline.size() == 0)
                                break;
                            // don't allow a breadth too large anywqy
                            if (frontline.size()> 10)
                                break;
                        }
                        if (frontline.size() == 0)
                            continue; // it's a deadend
                    }

                    if (!fwrite(&new_graine, sizeof(new_graine), 1, debloom_file))
                    {
                        printf("error: can't fwrite (disk full?)\n");
                        exit(1);
                    }
                    cc++;
                }

            }
        }
        NbSolidKmer++;
        if ((NbSolidKmer%table_print_frequency)==0) fprintf (stderr,"%c Writing positive Bloom Kmers %lld",13,NbSolidKmer);
    }
    nbkmers_solid =  NbSolidKmer; // GUS: it's global now

    fprintf(stderr,"\n%lli kmers written\n",cc);

    STOPWALL(pos,"Write all positive kmers");

    STARTWALL(deb);

    double bl1tai =  (double)bloo1->tai ;
    delete bloo1;

    // now that bloo1 is deleted, initialize hasht1
    int NBITS_HT = max( (int)ceilf(log2f((0.1*max_memory*1024L*1024L)/sizeof(cell_ptr_t))), 1); // set hasht1 cells to occupy 0.1 * [as much mem as poss]
    hasht1 =new Hash16(NBITS_HT); 
    
    ////////////////////////////////////////////////////////////////   --find false positive, with hash table partitioning
    uint64_t max_kmer_per_part = (uint64_t) (0.8*max_memory*1024LL*1024LL /sizeof(cell<kmer_type>));
    //adapter taille ht en fonction
    

    printf("%d partitions will be needed\n",(int)(nbkmers_solid/max_kmer_per_part));

    NbSolidKmer =0;
    int numpart = 0;
    SolidKmers->rewind_all();

    // deblooming:
    // read the list of (non-redundant) solid kmers and load it, in chunks, into a hash table
    // at each pass, check all the positive extensions and keep those which are not indicated, by the current chunk, as solid kmers
    // at the end, only the positive extensions which are not solid are kept
    while (SolidKmers->read_element(&kmer))
    {
        hasht1->add(kmer);

        NbSolidKmer++;
        if ((NbSolidKmer%table_print_frequency)==0) fprintf (stderr,"%cBuild Hash table %lld",13,NbSolidKmer);

        if(hasht1->nb_elem >max_kmer_per_part) //end partition,  find false positives
        {
            fprintf(stderr,"End of debloom partition  %lli / %lld \n",hasht1->nb_elem,max_kmer_per_part);

            end_debloom_partition(false);

            //swap file pointers
            F_tmp = F_debloom_read;
            F_debloom_read = F_debloom_write;
            F_debloom_write = F_tmp;
            /////////end write files

            //reset hash table
            hasht1->empty_all();

            fprintf(stderr,"\n%lli false positives written , partition %i \n",n_false_positives,numpart);

            numpart++;
        } ///end partition


    }
    fprintf(stderr,"Nb kmers stored in the bloom table %lld\n",nbkmers_solid);


    ///////////////////////// last partition, will write all the FP's to the good file

    end_debloom_partition(true); 

    /////////end write files


    fprintf(stderr,"Total nb false positives stored in the Debloom hashtable %lli \n",n_false_positives);

    delete hasht1;


    STOPWALL(deb,"Debloom");
 
    // GUS: will use to output summary later
    b1_size = (uint64_t) bl1tai;
  
    fclose(debloom_file);
    fclose(debloom_file_2);
    SolidKmers->close();


    return 1;

}
Example #3
0
int main(int argc, char *argv[])
{
    if(argc <  3)
    {
        fprintf (stderr,"%s: [d]isk [s]treaming of [k]-mers (constant-memory k-mer counting)\n",argv[0]);
        fprintf (stderr,"usage:\n");
        fprintf (stderr," %s input_file kmer_size [-t min_abundance] [-m max_memory] [-d max_disk_space] [-o out_prefix] [-histo]\n",argv[0]);
        fprintf (stderr,"details:\n [-t min_abundance] filters out k-mers seen ( < min_abundance ) times, default: 1 (all kmers are returned)\n [-m max_memory] is in MB, default: min(total system memory / 2, 5 GB) \n [-d max_disk_space] is in MB, default: min(available disk space / 2, reads file size)\n [-o out_prefix] saves results in [out_prefix].solid_kmers. default out_prefix = basename(input_file)\n [-histo] outputs histogram of kmers abundance\n [-rev] outputs only one of forward or reverse complement k-mers\n Input file can be fasta, fastq, gzipped or not, or a file containing a list of file names.\n");
#ifdef SVN_REV
fprintf(stderr,"Running dsk version %s\n",STR(SVN_REV));
#endif
        return 0;
    }

    // reads file
    Bank *Reads = new Bank(argv[1]);

    if (argv[2][0] == '-')
    {
        printf("please specify a k value\n");
        exit(1);
    }
	/* Changes by Raunaq
	 * Code addition for taking in multiple values of k. The file containing should have values of k sorted in decreasing order  
	 *
	*/
	int *Kmerlist =  loadKmers(argv[2]);
	
    // kmer size
	//fprintf(stderr,"Smallest kmer is %d \n",smallestKmer);
    sizeKmer = Kmerlist[0];
    if (sizeKmer>(int)(sizeof(kmer_type)*4))
    {
        printf("Max kmer size on this compiled version is %lu\n",sizeof(kmer_type)*4);
        exit(1);
    }
    kmerMask=(((kmer_type)1)<<(sizeKmer*2))-1;

    // default solidity 
    nks = 1;

    // default max memory
    max_memory = 5*1024;
    #ifndef OSX
    struct sysinfo info;
    sysinfo(&info);
    int total_ram = (int)(((double)info.totalram*(double)info.mem_unit)/1024/1024);
    printf("Total RAM: %d MB\n",total_ram);
#else
    int total_ram = 128*1024;
#endif


    // default prefix is the reads file basename
    char *reads_path=strdup(argv[1]);
    string reads_name(basename(reads_path)); // posix basename() may alter reads_path
    free(reads_path);
    int lastindex = reads_name.find_last_of("."); 
    strcpy(prefix,reads_name.substr(0, lastindex).c_str()); 

    for (int n_a = 3; n_a < argc ; n_a++)
    {
        if (strcmp(argv[n_a],"-t")==0)
            nks = atoi(argv[n_a+1]);

        if (strcmp(argv[n_a],"-o")==0)
            strcpy(prefix,argv[n_a+1]);
    }

    int verbose = 0;
    bool reverse = false;
    max_disk_space = 0;

    output_histo =false;
    // parse the remaining arguments: these will override the default max memory / max disk
    for (int n_a = 3; n_a < argc ; n_a++)
    {
        if (strcmp(argv[n_a],"-m")==0)
            max_memory = atoi(argv[n_a+1]);

        if (strcmp(argv[n_a],"-d")==0)
            max_disk_space = atoi(argv[n_a+1]);

        if (strcmp(argv[n_a],"-v")==0)
            verbose = 1;

        if (strcmp(argv[n_a],"-vv")==0)
            verbose = 2;
        
        if (strcmp(argv[n_a],"-histo")==0)
            output_histo =true;
	
	if (strcmp(argv[n_a],"-rev")==0)
	    reverse = true;
    }

    if (max_memory > total_ram)
    {
        printf("Maximum memory (%d MB), exceeds total RAM (%d MB). Setting maximum memory to %d MB.\n",max_memory,total_ram,total_ram/2);
        max_memory = total_ram/2;
    }

    STARTWALL(0);

    sorting_count(Reads,prefix,max_memory,max_disk_space,true,verbose,reverse);

    STOPWALL(0,"Total");

    delete Reads;

    return 0;
}
// main k-mer counting function, shared between minia and dsk
// verbose == 0 : stderr progress bar
// verbose >= 1 : print basic status
// verbose >= 2 : print extra partition information
// write_count == True: include kmer count in results file, in that form:
//           - save kmer count for each kmer in the resulting binary file
//           - the very first four bytes of the result file are the kmer length
void sorting_count(Bank *Sequences, char *prefix, int max_memory, int max_disk_space, bool write_count, int verbose)
{

    // create a temp dir from the prefix
    char temp_dir[1024];
    sprintf(temp_dir,"%s_temp",prefix);

    // clear the temp folder (needs to be done before estimating disk space)
    DIR*            dp;
    struct dirent*  ep;
    char            p_buf[512] = {0};
    dp = opendir(temp_dir);
    while ( (dp != NULL) && ((ep = readdir(dp)) != NULL)) {
        sprintf(p_buf, "%s/%s", temp_dir, ep->d_name);
        remove(p_buf);
    }
    if(dp != NULL)
        closedir(dp);

    if (max_disk_space == 0)
    {
        // default max disk space
        struct statvfs buffer ;
        char current_path[1000];
        getcwd(current_path,sizeof(current_path));
        // int ret =
        statvfs(current_path, &buffer);
        int available = (int)(((double)buffer.f_bavail * (double)buffer.f_bsize) / 1024 / 1024);
	uint32_t tt_new_temp = (uint32_t) (((double)Sequences->filesizes)/(1024*1024));
        printf("Available disk space in %s: %d  %u %llu MB\n",current_path,available,tt_new_temp,Sequences->filesizes); // not working in osx (is that a TODO then?)
        max_disk_space = min((uint32_t)available/2, tt_new_temp);
    } 
    if (max_disk_space <= 0) // still 0?
        max_disk_space = 10000; // = default for osx

    // estimate number of iterations TODO Check if multiplication with totalKmers is actually required or not. It may be just increasing number of partitions for no reason
    //uint64_t volume = totalKmers*Sequences->estimate_kmers_volume(smallestKmer);  //Since there are totalKmers no of kmers and an upper bound can be estimated by using the smallest size of kmer. Added by Raunaq
    uint64_t volume = Sequences->estimate_kmers_volume(smallestKmer);  //Since there are totalKmers no of kmers and an upper bound can be estimated by using the smallest size of kmer. Added by Raunaq
    uint32_t nb_passes = ( volume / max_disk_space ) + 1;
    int passes_hash ;
    
    int nb_threads=1;
    
#if OMP
    use_compressed_reads =true;
    nb_threads = 8;
    max_memory /= nb_threads;
    max_memory = max (max_memory,1);
#endif
    
    // temp bugfix: don't use compressed reads for long reads
    if (Sequences->estimate_max_readlen() > 1000000)
        use_compressed_reads = false;
    
    
    uint64_t volume_per_pass,volume_per_partition;
    uint32_t nb_partitions;
    int partitions_hash;

    // loop to lower the number of partitions below the maximum number of simulatenously open files
    do
    {
        volume_per_pass = volume / nb_passes;
        nb_partitions = ( volume_per_pass * totalKmers / max_memory ) + 1; 
	//printf("volume per pass and total volume %llu %llu \n",volume_per_pass,(unsigned long long)volume);
        // if partitions are hashed instead of sorted, adjust for load factor
        // (as in the worst case, all kmers in the partition are distinct and partition may be slightly bigger due to hash-repartition)
        if (use_hashing)
        {
            nb_partitions = (uint32_t) ceil((float) nb_partitions / load_factor);
            nb_partitions = ((nb_partitions * OAHash::size_entry() ) + sizeof(key_type)-1) / sizeof(key_type); // also adjust for hash overhead
        }

        struct rlimit lim;
        int max_open_files = 1000;
        int err = getrlimit(RLIMIT_NOFILE, &lim);
        if (err == 0)
            max_open_files = lim.rlim_cur / 2;
        if (nb_partitions >= max_open_files)
            nb_passes++;
        else
            break;
    }
    while (1);
    volume_per_partition= volume_per_pass/nb_partitions;
    passes_hash = ceil(log(nb_passes)/log(4));
    partitions_hash = ceil(log(nb_partitions)/log(4));
    int size_for_reestimation = ceil((passes_hash + partitions_hash)*1.8);
    double * lmer_counts = (double * ) malloc(sizeof(long)*pow(4,size_for_reestimation));
    long * lmers_for_hash = (long * ) malloc(sizeof(long)*pow(4,size_for_reestimation));
    int * partitions_for_lmers =(int * ) malloc(sizeof(int)*pow(4,size_for_reestimation));
    Sequences->count_kmers_for_small_value(size_for_reestimation,lmer_counts);
    int temp_partition=reestimate_partitions(size_for_reestimation,volume_per_partition,lmer_counts,lmers_for_hash,partitions_for_lmers);
    unordered_map<long,int> part_hash;
    int total_lmers=pow(4,size_for_reestimation);
    for(int it=0;it<total_lmers;it++)
    {
	pair<long,int> temp_pair(lmers_for_hash[it],partitions_for_lmers[it]);
        part_hash.insert (temp_pair); // Add element to the hash 
    }
    //uint64_t up_passes_size = volume_per_pass;
      	do
	{
		//recompute the number of partitions based on updated partitions estimate
		nb_partitions = ceil(temp_partition*1.0/nb_passes);
		struct rlimit lim;
	        int max_open_files = 1000;
	        int err = getrlimit(RLIMIT_NOFILE, &lim);
	        if (err == 0)
        	    max_open_files = lim.rlim_cur / 2;
	        if (nb_partitions >= max_open_files)
        	    nb_passes++;
	        else
        	    break;
	}while(1);
    	printf("no of partitions before %lu and after %d passes %lu \n",nb_partitions*nb_passes,temp_partition,nb_passes);
    uint64_t total_IO =   volume * 2LL * 1024LL*1024LL   ;// in bytes  +   nb_passes * ( volume / (sizeof(kmer_type)*4) )    ; // in bytes
    uint64_t temp_IO = 0;
    BinaryBankConcurrent * redundant_partitions_file[nb_partitions]; 
    char redundant_filename[nb_partitions][256];
    kmer_type kmer;
    int max_read_length = KMERSBUFFER_MAX_READLEN;
    kmer_type * kmer_table_seq = (kmer_type * ) malloc(sizeof(kmer_type)*max_read_length); ;
    kmer_type * kmer_length_table_seq = (kmer_type * ) malloc(sizeof(kmer_type)*max_read_length);

    BinaryReads *  binread = NULL;
    if(use_compressed_reads)
        binread = new BinaryReads(return_file_name(binary_read_file),true);

    fprintf(stderr,"Sequentially counting ~%llu MB of kmers with %d partition(s) and %d passes using %d thread(s), ~%d MB of memory and ~%d MB of disk space\n", (unsigned long long)volume, nb_partitions,nb_passes, nb_threads, max_memory * nb_threads, max_disk_space);

    STARTWALL(count);

    mkdir(temp_dir, S_IRWXU | S_IRWXG | S_IROTH | S_IXOTH);
    
    // Open totalKmers files to store counts of totalKmers different k's
    BinaryBankConcurrent * SolidKmers[totalKmers];
    for (int s=0;s<totalKmers;s++) 
    {	
		char temp[1024];
		sprintf(temp,"%s.%d",return_file_name(solid_kmers_file),Kmerlist[s]);
		uint64_t exp = (((uint64_t)1)<<(Kmerlist[s]*2))-1;
		SolidKmers[s] = new BinaryBankConcurrent(temp,sizeof(kmer),true,nb_threads);
		//printf("kmer is %d exp is %llu \n",Kmerlist[s],exp);
		//BinaryBankConcurrent * SolidKmers = new BinaryBankConcurrent(return_file_name(solid_kmers_file),sizeof(kmer),true,nb_threads);

	    if (write_count)
	    {
        	// write k-mer nbits as the first 4 bytes; and actual k-mer size as the next 4 bits
      		  uint32_t kmer_nbits = sizeof(kmer) * 8;
   	     	SolidKmers[s]->write_buffered(&kmer_nbits, 4,0);
        	SolidKmers[s]->write_buffered(&Kmerlist[s], 4,0);
        	SolidKmers[s]->flush(0);
        }
   }

    int64_t estimated_NbReads = Sequences->estimate_nb_reads();
    char * rseq;
    int readlen;
    int64_t NbSolid = 0;
    int64_t * NbSolid_omp = (int64_t  *) calloc(nb_threads,sizeof(int64_t));
    //long total_kmers_per_partition[nb_partitions]; //guillaume probably commented it because updating this variable would require synchronization
    long distinct_kmers_per_partition[nb_partitions];
    uint64_t  * histo_count = (uint64_t  *) calloc(10001,sizeof(uint64_t));


#if OMP
    uint64_t  **  histo_count_omp = (uint64_t  **) calloc(nb_threads,sizeof(uint64_t *));
    for(int ii=0;ii<nb_threads;ii++)
    {
        histo_count_omp[ii]= (uint64_t  *) calloc(10001,sizeof(uint64_t));
    }
#endif
    

    
   
    //start by the conversion of the file to binary format

    if(use_compressed_reads)
    {
        char * pt_begin;
        int idx =0 ;
        int64_t NbRead = 0;
        Progress progress_conversion;
       // progress_conversion.timer_mode=1; // to switch to timer mode (show elapsed and estimated remaining time)
        progress_conversion.init(estimated_NbReads,"First step: Converting input file into Binary format");
        
        Sequences->rewind_all();
        while(1)
        {
            if(! Sequences->get_next_seq(&rseq,&readlen)) break; // read  original fasta file
            if(readlen > max_read_length) // realloc kmer_table_seq if needed
            {
                max_read_length = 2*readlen;
                kmer_table_seq = (kmer_type * ) realloc(kmer_table_seq,sizeof(kmer_type)*max_read_length);
            	kmer_length_table_seq = (kmer_type * ) realloc(kmer_length_table_seq,sizeof(kmer_type)*max_read_length);
	    }
            
            pt_begin = rseq;
            //should be ok
            while (pt_begin < (rseq+ readlen))
            {
                idx=0; // start a new read

                //skips NN
                while (*pt_begin =='N' && pt_begin < (rseq+ readlen))
                {
                    pt_begin ++;
                }
                // goes to next N or end of seq
                while ( (pt_begin[idx] !='N') &&  ((pt_begin +idx) < (rseq+ readlen))  )
                {
                    idx++;
                }
                
                //we have a seq beginning at  pt_begin of size idx  ,without any N, will be treated as a read:
                binread->write_read(pt_begin,idx);
		revcomp_sequence(pt_begin,idx); // reverse complement the string 
		binread->write_read(pt_begin,idx); // write reverse complement string 
		revcomp_sequence(pt_begin,idx); // restore the string 

		pt_begin += idx;
            }
            
            // binread->write_read(rseq,readlen);
            
            
            NbRead++;
            if ((NbRead%10000)==0)
            {
                progress_conversion.inc(10000);
            }
        }
	//printf("Number of reads converted to binary %d \n",NbRead);
        progress_conversion.finish();
        binread->close();

    }
    ///fin conversion
    if (clear_cache)
    {
#ifdef OSX
        system("purge");
#else
        system("echo 3 > /proc/sys/vm/drop_caches");
#endif
    }
    
    
    
#if SINGLE_BAR
    Progress progress;
    char message[1000];
    sprintf(message,"Counting kmers");
    progress.timer_mode=1;
    if (verbose == 0 )
        progress.init(total_IO,message);
#endif
    
    //use_compressed_reads=false; // for testing compute_kmer_from_one_seq 
    // how many times we will traverse the whole reads file (has an influence on temp disk space)

   uint64_t iter_partition=0;
    for (uint32_t current_pass = 0; current_pass < nb_passes; current_pass ++)
    {
	// stop computing if all partitions are done Added by Raunaq
        if (iter_partition==temp_partition)
		break;
	if(use_compressed_reads ) //open binary reads for reading
            binread->open(false);
        
        STARTWALL(debpass);
        STARTWALL(debw);
	int initial_value = current_pass*nb_partitions;
        for (uint32_t p=0;p<nb_partitions;p++)
        {
            sprintf(redundant_filename[p],"%s/partition%d.redundant_kmers",temp_dir,p);
            redundant_partitions_file[p] =  new BinaryBankConcurrent (redundant_filename[p],sizeof(kmer_type),true, nb_threads);
            distinct_kmers_per_partition[p]=0;
       	}
	int final_value = ((current_pass+1)*nb_partitions)-1;
	printf("Storing k-mers in partition files between %d and %d \n",initial_value,final_value);
        Sequences->rewind_all();
#if !SINGLE_BAR
        Progress progress;
        progress.timer_mode=1; // to switch to timer mode (show elapsed and estimated remaining time)
        char message[1000];
        sprintf(message,"Pass %d/%d, Step 1: partitioning",current_pass+1,nb_passes);
        if (verbose == 0 )
            progress.init(estimated_NbReads,message);
#endif
     

        
        //current_pass> 0 &&
#if OMP
#pragma omp parallel if(use_compressed_reads)  num_threads(nb_threads)
#endif
        {
            int64_t  nbkmers_written =0;
            int tid =0;
            int64_t NbRead = 0;
            int64_t nread =0;
            int64_t tempread =0;
	    long it_zero_wrt =0;
#if OMP

            tid = omp_get_thread_num();
#endif
            int nreads_in_buffer= 1000;
            KmersBuffer * kbuff =NULL;
            if(use_compressed_reads)
            {
                kbuff = new KmersBuffer (binread, 1000000,  nreads_in_buffer); //buffer size (in nb of kmers), seq per task // the buffer is per thread
                kbuff->binary_read_file = binread->binary_read_file;
            }

            kmer_type * kmer_table ;
            kmer_type * kmer_length_info ; // Added by Raunaq, to store the length of read into the partitions file
	    while(1)
            {

                //read the fasta file
                if(use_compressed_reads) // && current_pass>0
                {
                    nread = kbuff->readkmers();
                    if( nread < 0) break;
                    NbRead+= nread;
                    tempread+= nread;
                }
                else
                {
                    if(! Sequences->get_next_seq(&rseq,&readlen)) break; // read  original fasta file
                    if(readlen > max_read_length) // realloc kmer_table_seq if needed
                    {
                        max_read_length = 2*readlen;
                        kmer_table_seq = (kmer_type * ) realloc(kmer_table_seq,sizeof(kmer_type)*max_read_length);
            		kmer_length_table_seq = (kmer_type * ) realloc(kmer_length_table_seq,sizeof(kmer_type)*max_read_length);
                    }

                }

//                if(use_compressed_reads ) //write compressed read file at first pass //&& current_pass==0
//                    binread->write_read(rseq,readlen);

                int i;
                int nbkmers =readlen-sizeKmer+1;  

                if( use_compressed_reads) //current_pass >0 &&
                {
                    nbkmers = kbuff->nkmers;
                    kmer_table = kbuff->kmers_buffer;
		    kmer_length_info = kbuff->kmer_length;
                } 
                else //old fashion   
                {
                    compute_kmer_table_from_one_seq(readlen,rseq,kmer_table_seq,kmer_length_table_seq,Kmerlist[totalKmers-1]); // Added by Raunaq for computing kmers for all values of k 
                    nbkmers =readlen-Kmerlist[totalKmers-1]+1;  
                    kmer_table = kmer_table_seq;
		    kmer_length_info = kmer_length_table_seq;
                    NbRead++;
                    //printf("Number of kmers read from seq %d \n",nbkmers);
		}
		
                nbkmers_written= 0;
		char  temp_kmer[256];
		int zero;
                //compute the kmers stored in the buffer kmer_table
                for (i=0; i<nbkmers; i++)
                {
                    kmer_type lkmer;
					kmer_type lkmer_length;
                    // kmer = extractKmerFromRead(rseq,i,&graine,&graine_revcomp);

                    lkmer = kmer_table[i];
					lkmer_length = kmer_length_info[i];
		   // zero = code2seq(lkmer,temp_kmer);
					long pass_lkmer = code2first_n_nucleotide(lkmer,size_for_reestimation);
					unordered_map<long,int>::const_iterator got = part_hash.find(pass_lkmer);
					int p;// compute in which partition this kmer falls into
					if(got==part_hash.end())
						continue;
					else
						p = got->second; 
                    // check if this kmer should be included in the current pass
                    if(!(p >= initial_value && p<= final_value))
						continue;


/*		
#ifdef _ttmath
                    (reduced_kmer % nb_partitions).ToInt(p);
#else
                    p = reduced_kmer % nb_partitions;
#endif
*/
					p = p - current_pass*nb_partitions;  
                    nbkmers_written++;

                    redundant_partitions_file[p]->write_element_buffered(&lkmer,tid); // save this kmer to the right partition file
					redundant_partitions_file[p]->write_buffered(&lkmer_length,sizeof(lkmer_length),tid,false); // save the kmer length next to the kmer in the same partition file
		    // total_kmers_per_partition[p]++; // guillaume probably commented it because updating this variable would require synchronization

                }
                //NbRead++;
#if SINGLE_BAR
                if(verbose==0)
                {
                if (nb_threads == 1)
                    progress.inc(nbkmers_written * sizeof(kmer_type));
                else
                    progress.inc(nbkmers_written * sizeof(kmer_type),tid);
                }
#endif
             //   if ((NbRead%10000)==0)
                if(tempread> 10000)
                {
                    tempread -= 10000;
                    if (verbose)
                        fprintf (stderr,"%cPass %d/%d, loop through reads to separate (redundant) kmers into partitions, processed %lluM reads out of %lluM",13,current_pass+1,nb_passes,(unsigned long long)(NbRead/1000/1000),(unsigned long long)(estimated_NbReads/1000/1000));
#if !SINGLE_BAR
                    else
                        if (nb_threads == 1)
                            progress.set(NbRead);
                        else
                            progress.inc(10000,tid);
#endif
                }
            } //end while
           // printf("Count of zero in write is %lu \n",it_zero_wrt);
            if(use_compressed_reads)
                delete kbuff;
        } // end OMP 


        
#if !SINGLE_BAR
        if (verbose == 0)
        {
            if (nb_threads == 1)
             progress.finish();
            else
              progress.finish_threaded();  // here only one thread
            
            sprintf(message,"Pass %d/%d, Step 2: computing kmer count per partition",current_pass+1,nb_passes);
            progress.init(nb_partitions+1,message);
        }
#endif
        
        if (verbose)fprintf(stderr,"\n");

        if (verbose >= 2)
        {
            STOPWALL(debw,"Writing redundant kmers");
        }
        STARTWALL(debtri);
	


            for (uint32_t p=0;p<nb_partitions;p++)
            {	
				redundant_partitions_file[p]->close();
                redundant_partitions_file[p]->open(false);
            }



        // for better timing: clear the file cache, since the partitions may still be in memory, that's unfair to low mem machines
        if (clear_cache)
        {
#ifdef OSX
            system("purge");
#else
            system("echo 3 > /proc/sys/vm/drop_caches");
#endif
        }

        //quick and dirty parall with omp, testing
        //todo if we want omp and histo : separate histo_count tab per thread that needs to be merged at the end
        // TODO to guillaume: remove that todo above, because it is done, right?
        kmer_type lkmer,lkmer_length,lkmer_temp,exp;
	long it_zero=0;
	OAHash * hash;
	int p,s;
#if OMP 
        //omp_set_numthreads(2);  //num_threads(2) //if(!output_histo) num_threads(nb_threads)
#pragma omp parallel for private (p,s,lkmer,lkmer_length,hash,lkmer_temp,exp)  num_threads(nb_threads)
#endif        
        // load, sort each partition to output solid kmers
        for ( p=0;p<nb_partitions;p++)
        {
			char temp_kmer[256];  // bug check code 
			int zero;
			kmer_type lkmer_revcomp; // to store revcomps
				
           	bool use_hashing_for_this_partition = use_hashing;
			if(hybrid_mode)
			{
				if(   (redundant_partitions_file[p]->nb_elements()*sizeof(kmer_type)) <  (max_memory*1024LL*1024LL) )  // Maintain totalKmers hash for each partition file
				{	
					use_hashing_for_this_partition = false;
				}
				else
				{
					use_hashing_for_this_partition = true;
				}
			}
            int tid =0;
			//int s;
			//Computing if hashing should be used or not for this partition
#if OMP
            tid = omp_get_thread_num();
#endif
            //use_hashing_for_this_partition = false;  //to check the vector part of the code
           	if (use_hashing_for_this_partition)
            {
                // hash partition and save to solid file
				
				hash = new OAHash(max_memory*1024LL*1024LL/2); // One hash to store all types of k-mer lengths

				uint64_t nkmers_read=0;
				redundant_partitions_file[p]->read_element_buffered(&lkmer_length);

				while (redundant_partitions_file[p]->read_element_buffered(&lkmer))
				{
			
					if(lkmer_length == Kmerlist[0])  //only add the largest k-mer 
						hash->increment(lkmer,convert_to_int(lkmer_length));
					else
					{
						unordered_map<int,int>::const_iterator got = kmerlength_map.find(convert_to_int(lkmer_length));
						exp = (((kmer_type)1)<<(got->second*2))-1;
						lkmer_temp = lkmer & exp;
						hash->increment(lkmer_temp,got->second);

					}	
					if(!redundant_partitions_file[p]->read_element_buffered(&lkmer_length)) 
					{
						break;
					}
					nkmers_read++;
#if SINGLE_BAR
					if(verbose==0 && nkmers_read==10000)
					{
						if (nb_threads == 1)
							progress.inc(nkmers_read*sizeof(kmer_type));
						else
							progress.inc(nkmers_read*sizeof(kmer_type),tid);
						nkmers_read=0;
					}
#endif
                }
                
                
				if (verbose >= 2)
					 printf("Pass %d/%d partition %d/%d hash load factor: %0.3f\n",current_pass+1,nb_passes,p+1,nb_partitions,hash->load_factor());
                	for( s=0;s<totalKmers;s++) 
					{
						OAHash * temp_ = new OAHash(max_memory*1024LL*1024LL/2);
						hash->start_iterator();
						while (hash->next_iterator())
                				{
							uint_abundance_t abundance = hash->iterator->value;
        	       		 			uint_abundance_t abund_tid = (current_pass+1)*100+p;
							if(output_histo)
							{
							 uint_abundance_t saturated_abundance;
							 saturated_abundance = (abundance >= 10000) ? 10000 : abundance;
#if OMP
							 histo_count_omp[tid][saturated_abundance]++;
#else
					
							 histo_count[saturated_abundance]++;
#endif
							}
							int length_kmer = hash->iterator->length;
							lkmer = hash->iterator->key;
	                    				if (abundance >= nks && abundance <= max_couv && length_kmer == Kmerlist[s])
							{
								//write if lkmer is the smaller of it and its reverse complement
								lkmer_revcomp = revcomp(lkmer,length_kmer);
								if(lkmer < lkmer_revcomp)
								{
								SolidKmers[s]->write_element_buffered(&(hash->iterator->key),tid);
							
								 NbSolid_omp[tid]++;
								if (write_count)
										SolidKmers[s]->write_buffered(&abundance, sizeof(abundance),tid, false);
								}
							}
		                    			distinct_kmers_per_partition[p]++;
							if(s!=totalKmers-1)
							{
								if(length_kmer == Kmerlist[s])
								{
									exp = (((kmer_type)1)<<(Kmerlist[s+1]*2))-1;
									lkmer_temp = lkmer & exp;
									temp_->increment_by_value(lkmer_temp,abundance,Kmerlist[s+1]);
								}else {
									temp_->increment_by_value(lkmer,abundance,length_kmer);
								}
							}
						}
						hash->~OAHash();
						hash = temp_;
					}
				hash->~OAHash();
			//printf("All hashes closed and destroyed \n");
			}
            
			else
			{
				// This part does it in slower fashion
				// sort partition and save to solid file 
        	    //vector < kmer_type > kmers;
				vector < kmer_type > kmers[totalKmers];
                uint64_t nkmers_read=0;
               	//int s=0; 
				
				redundant_partitions_file[p]->read_element_buffered(&lkmer_length);
				while (redundant_partitions_file[p]->read_element_buffered (&lkmer))
				{
    		        for(s=0;s<totalKmers;s++)
					{
						//kmer_type lkmer_temp;
						//kmer_type exp;
						if(lkmer_length<Kmerlist[s])
							continue;
						if(s==0)
							kmers[s].push_back (lkmer);
						else
						{
							exp = (((kmer_type)1)<<(Kmerlist[s]*2))-1;
							lkmer_temp = lkmer & exp; // Converting the kmer to its smaller equivalent in binary 
							kmers[s].push_back (lkmer_temp);
						}
                    }
					nkmers_read++;
					if(!redundant_partitions_file[p]->read_element_buffered(&lkmer_length)) break;  //Added to get the next length of kmer
#if SINGLE_BAR
					if(verbose==0 && nkmers_read==10000)
					{
						if (nb_threads == 1)
							progress.inc(nkmers_read*sizeof(kmer_type));
						else
							progress.inc(nkmers_read*sizeof(kmer_type),tid);
						nkmers_read=0;
					}
#endif
                }
                
                for(s=0;s<totalKmers;s++)
               	{
					sort (kmers[s].begin (), kmers[s].end ());
                
					kmer_type previous_kmer = *(kmers[s].begin ());
					uint_abundance_t abundance = 0;
					for (vector < kmer_type >::iterator it = kmers[s].begin (); it != kmers[s].end ();it++)
					{
						kmer_type current_kmer = *it;
					
						if (current_kmer == previous_kmer)
							abundance++;
						else
						{
							if(output_histo)
							{
									uint_abundance_t saturated_abundance;
									saturated_abundance = (abundance >= 10000) ? 10000 : abundance;
#if OMP
									histo_count_omp[tid][saturated_abundance]++;
#else
									histo_count[saturated_abundance]++;
#endif
					
							}
							if (abundance >= nks  && abundance <= max_couv)
							{
								 NbSolid_omp[tid]++;
								 SolidKmers[s]->write_element_buffered(&previous_kmer,tid);
						
								 if (write_count)
									SolidKmers[s]->write_buffered(&abundance, sizeof(abundance),tid, false);
							}		
								abundance = 1;
							distinct_kmers_per_partition[p]++;
						}
						previous_kmer = current_kmer;
					}
                
                //last kmer
					distinct_kmers_per_partition[p]++;
					if(output_histo)
					{
							uint_abundance_t saturated_abundance;
							saturated_abundance = (abundance >= 10000) ? 10000 : abundance;
#if OMP
							histo_count_omp[tid][saturated_abundance]++;
#else
							histo_count[saturated_abundance]++;
#endif
				
					}
					if (abundance >= nks && abundance <= max_couv)
					{
							NbSolid_omp[tid]++;
							SolidKmers[s]->write_element_buffered(&previous_kmer,tid);
				
							if (write_count)
							   SolidKmers[s]->write_buffered(&abundance, sizeof(abundance),tid, false);
				
					}	
				}
			}
            
            
		//printf("Done writing kmers for all K \n");
            
            	if (verbose >= 1)
                	fprintf(stderr,"%cPass %d/%d, loaded and sorted partition %d/%d, found %lld solid kmers so far",13,current_pass+1,nb_passes,p+1,nb_partitions,(long long)(NbSolid_omp[tid]));
            
		//printf("Done writing kmers for all K %d check 1 \n",p);
            	if (verbose >= 2)
                	printf("\nPass %d/%d partition %d/%d %ld distinct kmers\n",current_pass+1,nb_passes,p+1,nb_partitions,/*total_kmers_per_partition[p],*/distinct_kmers_per_partition[p]);
            
#if !SINGLE_BAR
            	if (verbose == 0 && nb_threads==1)
                	progress.inc(1);
            	else if (verbose == 0 && nb_threads>1)
                	progress.inc(1,tid);
#endif
            
            	//if(redundant_partitions_file[p]->find_error()) {
		//	printf("Error in the binary file \n");
		//}
            	redundant_partitions_file[p]->close();
		
            	remove(redundant_filename[p]);
 
        } // end for partitions

#if OMP
        //merge histo
        if(output_histo)

        {
            for (int cc=1; cc<10001; cc++) {
                uint64_t sum_omp = 0;
                for(int ii=0;ii<nb_threads;ii++)
                {
                    sum_omp += histo_count_omp[ii][cc];
                }
                histo_count[cc] = sum_omp;
            }
        }
#endif
        
#if !SINGLE_BAR
        if (verbose == 0 && nb_threads == 1)
            progress.finish();
        else if (verbose == 0 && nb_threads > 1 )
            progress.finish_threaded();
#endif

        if (verbose) fprintf(stderr,"\n");

        if (verbose >= 2)
        {
            STOPWALL(debtri,"Reading and sorting partitions");
            STOPWALL(debpass,"Pass total");

        }
       
	//printf("Done writing kmers for all K check 4 \n");
        if(use_compressed_reads)
            binread->close();
        
        //delete
            for (uint32_t p=0;p<nb_partitions;p++)
            {
                delete redundant_partitions_file[p] ;
            }
        
    }
	//printf("Done writing kmers for all K check 5 \n");

    //single bar
#if SINGLE_BAR
    if (verbose == 0 && nb_threads == 1)
        progress.finish();
    else if (verbose == 0 && nb_threads > 1 )
        progress.finish_threaded();
#endif
    
    if(output_histo)
    {
        FILE * histo_file = fopen(return_file_name(histo_file_name),"w");
        for (int cc=1; cc<10001; cc++) {
            fprintf(histo_file,"%i\t%llu\n",cc,(unsigned long long)(histo_count[cc]));
        }
        fclose(histo_file);
    }
    free(histo_count);

    NbSolid = NbSolid_omp[0];
#if OMP
    NbSolid=0;
    for(int ii=0;ii<nb_threads;ii++)
    {
        NbSolid += NbSolid_omp[ii];
    }
#endif
   for ( int s=0;s<totalKmers;s++) 
    	SolidKmers[s]->close();
    printf("\nSaved %lld solid kmers\n",(long long)NbSolid);
    rmdir(temp_dir);

    STOPWALL(count,"Counted kmers");
    fprintf(stderr,"\n------------------ Counted kmers and kept those with abundance >=%i,     \n",nks);
} 
inline void assemble()
{

    //////-------------------------------------------------------------------------------------------
    fprintf (stderr,"______________________________________________________ \n");
    fprintf (stderr,"___________ Assemble from bloom filter _______________ \n");
    fprintf (stderr,"______________________________________________________ \n\n");

    //////-------------------------------------------------------------------------------------------


    long long len_left = 0;
    long long len_right = 0;
    long long contig_len =0;
    long long maxlen=10000000;

    char *left_traversal  = (char *) malloc(maxlen*sizeof(char));
    char *right_traversal = (char *) malloc(maxlen*sizeof(char));
    char *contig          = (char *) malloc(2*(maxlen+sizeKmer)*sizeof(char));
    kmer_type kmer;

    long long nbContig =0;
    long long nbSmallContig =0;
    long long totalnt=0;
    long long max_contig_len=0;
    long long mlenleft=0,mlenright=0;
    int64_t NbBranchingKmer=0;
    char kmer_seq[sizeKmer+1];
    FILE * file_assembly = fopen(return_file_name(assembly_file),"w+");

    BinaryBank *SolidKmers = new BinaryBank(return_file_name(solid_kmers_file),sizeof(kmer_type),0);

    STARTWALL(assembly);

    char *assemble_only_one_region = NULL; // debugging, set to a ASCII kmer to activate, NULL to desactivate
    bool LOAD_BRANCHING_KMERS=false; // debugging
    bool DUMP_BRANCHING_KMERS=false;
   
    BranchingTerminator *terminator;

    if (LOAD_BRANCHING_KMERS)
    {
        BinaryBank *BranchingKmers = new BinaryBank(return_file_name(branching_kmers_file),sizeof(kmer_type),false);
        terminator = new BranchingTerminator(BranchingKmers,SolidKmers, bloo1,false_positives);
        BranchingKmers->close();
    }
    else
        terminator = new BranchingTerminator(SolidKmers,genome_size, bloo1,false_positives);

    if (DUMP_BRANCHING_KMERS)
    {
        BinaryBank *BranchingKmers = new BinaryBank(return_file_name(branching_kmers_file),sizeof(kmer_type),true);
        terminator->dump_branching_kmers(BranchingKmers);
        BranchingKmers->close();
    }

#ifdef UNITIG
    SimplePathsTraversal *traversal = new SimplePathsTraversal(bloo1,false_positives,terminator);
    fprintf (stderr,"_________________Assembling in Unitig mode ..._____________________ \n\n");
#else
    MonumentTraversal *traversal = new MonumentTraversal(bloo1,false_positives,terminator);
#endif
    //RandomBranchingTraversal *traversal = new RandomBranchingTraversal(bloo1,false_positives,terminator);
    traversal->set_maxlen(maxlen);
    traversal->set_max_depth(500);
    traversal->set_max_breadth(20);
    
    while (terminator->next(&kmer))
    {
        // keep looping while a starting kmer is available from this kmer
		// everything will be marked during the traversal()'s
		kmer_type starting_kmer;
#ifdef UNITIG
        while (traversal->get_new_starting_node_improved(kmer,starting_kmer))
#else
        while (traversal->find_starting_kmer(kmer,starting_kmer))
#endif
		{
		    code2seq(starting_kmer,kmer_seq); // convert starting kmer to nucleotide seq
            traversal->revert_stats(); // set stats from the last commit (discard stats from find_starting_kmer / small contigs)

            if (assemble_only_one_region != NULL)
            {
                kmer_type dummy;
                starting_kmer = extractKmerFromRead(assemble_only_one_region,0,&kmer,&dummy,false);
            }

            // right extension
            len_right = traversal->traverse(starting_kmer,right_traversal,0);
            mlenright= max(len_right,mlenright);

            // left extension, is equivalent to right extension of the revcomp
            len_left = traversal->traverse(starting_kmer,left_traversal,1);
            mlenleft= max(len_left,mlenleft);

            // form the contig
            revcomp_sequence(left_traversal,len_left);
            strcpy(contig,left_traversal); // contig = revcomp(left_traversal)
	        strcat(contig,kmer_seq);//               + starting_kmer
            strcat(contig,right_traversal);//           + right_traversal

            contig_len=len_left+len_right+sizeKmer;

            // save the contig
            if(contig_len >= MIN_CONTIG_SIZE)
            {
                max_contig_len = max(max_contig_len,contig_len);
                fprintf(file_assembly,">%lli__len__%lli \n",nbContig,contig_len);
                fprintf(file_assembly,"%s\n",contig);
                nbContig++;
                totalnt+=contig_len;
                traversal->commit_stats();
            }
            else
            {
                traversal->revert_stats();
                nbSmallContig++;
            }
            if (assemble_only_one_region != NULL)
                break;
        }
    
        NbBranchingKmer++;
        if ((NbBranchingKmer%300)==0) fprintf (stderr,"%cLooping through branching kmer n° %lld / %lld  total nt   %lld   ",13,(long long int) NbBranchingKmer,(long long int) terminator->nb_branching_kmers, (long long int)totalnt );

        if (nbContig > 0 && assemble_only_one_region != NULL)
            break;

    }
    fclose(file_assembly);

    fprintf (stderr,"\n Total nt assembled  %lli  nbContig %lli\n",totalnt,nbContig);
    fprintf (stderr," Max contig len  %lli (debug: max len left %lli, max len right %lli)\n",max_contig_len,mlenleft,mlenright);
    fprintf (stderr,"\n Debug traversal stats: %ld ends of contigs (%lld unsaved small contigs), among them:\n",traversal->final_stats.ended_traversals,nbSmallContig);
    fprintf (stderr," %ld couldn't validate consensuses\n",traversal->final_stats.couldnt_validate_consensuses);
    fprintf (stderr," %ld large bubble breadth, %ld large bubble depth, %ld marked kmer, %ld no extension\n",traversal->final_stats.couldnt_traverse_bubble_breadth,traversal->final_stats.couldnt_traverse_bubble_depth,traversal->final_stats.couldnt_because_marked_kmer,traversal->final_stats.couldnt_find_extension);
    fprintf (stderr," %ld in-branchin large depth, %ld in-branching large breadth, %ld in-branching other\n",traversal->final_stats.couldnt_inbranching_depth,traversal->final_stats.couldnt_inbranching_breadth,traversal->final_stats.couldnt_inbranching_other);
    
    STOPWALL(assembly,"Assembly");

    free(left_traversal);
    free(right_traversal);
    free(contig);
    SolidKmers->close();
}
int main(int argc, char *argv[])
{
    
    if(argc <  6)
    {
        fprintf (stderr,"usage:\n");
        fprintf (stderr," %s input_file kmer_size min_abundance estimated_genome_size prefix\n",argv[0]);
        fprintf (stderr,"hints:\n min_abundance ~ 3\n estimated_genome_size is in bp, does not need to be accurate, only controls memory usage\n prefix is any name you want the results to start with\n");

        return 1;
    }

    bool FOUR_BLOOM_VERSION = true;

     // shortcuts to go directly to assembly using serialized bloom and serialized hash
    int START_FROM_SOLID_KMERS=0; // if = 0, construct the fasta file of solid kmers, if = 1, start directly from that file 
    int LOAD_FALSE_POSITIVE_KMERS=0; // if = 0, construct the fasta file of false positive kmers (debloom), if = 1, load that file into the hashtable
    int NO_FALSE_POSITIVES_AT_ALL=0; // if = 0, normal behavior, if = 1, don't load false positives (will be a probabilistic de bruijn graph)
    int max_disk_space = 0;// let dsk decide
    for (int n_a = 6; n_a < argc ; n_a++)
    {
        if (strcmp(argv[n_a],"--original") == 0)
    	    FOUR_BLOOM_VERSION = false;

        if (strcmp(argv[n_a],"--dont-count")==0)
            START_FROM_SOLID_KMERS = 1;

        if (strcmp(argv[n_a],"--dont-debloom")==0)
            LOAD_FALSE_POSITIVE_KMERS = 1;

        if (strcmp(argv[n_a],"--just-assemble")==0)
        {
            START_FROM_SOLID_KMERS = 1;
            LOAD_FALSE_POSITIVE_KMERS = 1;
        }

        if (strcmp(argv[n_a],"--titus-mode")==0)
            NO_FALSE_POSITIVES_AT_ALL = 1;
        
        
        if (strcmp(argv[n_a],"-d")==0)
            max_disk_space = atoi(argv[n_a+1]);
        
        
        if (strcmp(argv[n_a],"-maxc")==0)
	    max_couv = atoi(argv[n_a+1]);
        
        if (strcmp(argv[n_a],"--le-changement")==0)
            {printf("c'est maintenant!\n");exit(0);}
    }


    // kmer size
    sizeKmer=27; // let's make it even for now, because i havnt thought of how to handle palindromes (dont want to stop on them)
    if(argc >=  3)
    {
        sizeKmer = atoi(argv[2]);
        if (sizeKmer%2==0)
        {
            sizeKmer-=1;
            printf("Need odd kmer size to avoid palindromes. I've set kmer size to %d.\n",sizeKmer);
        }
        if (sizeKmer>((int)sizeof(kmer_type)*4))
        {
            printf("Max kmer size on this compiled version is %lu\n",sizeof(kmer_type)*4);
            exit(1);
        }
    }

    if (sizeKmer == (int)(sizeof(kmer_type)*4))
        kmerMask = -1;
    else
        kmerMask=(((kmer_type)1)<<(sizeKmer*2))-1;

    double lg2 = log(2);
   
    if (sizeKmer > 128)
    {
        FOUR_BLOOM_VERSION = false;
        printf("Reverted to single Bloom filter implementation for k>128\n");
    }

    if (!FOUR_BLOOM_VERSION) 
      NBITS_PER_KMER = log(16*sizeKmer*(lg2*lg2))/(lg2*lg2); // needed to process argv[5]
    else 
      NBITS_PER_KMER = rvalues[sizeKmer][1];

    // solidity 
    nks =NNKS;
    if(argc >=  4)
    {
        nks = atoi(argv[3]);
        if (nks==0) nks=1; // min abundance can't be 0
    }


   if(argc >=  5)
    {
       genome_size  = atoll(argv[4]);
      // int estimated_bloom_size = max( (int)ceilf(log2f(genome_size * NBITS_PER_KMER )), 1);
        uint64_t estimated_bloom_size = (uint64_t) (genome_size * NBITS_PER_KMER);

       uint64_t estimated_nb_FP =  (uint64_t)(genome_size * 4 * powf(0.6,11)); // just indicative
    
       //max_memory = max( (1LL << estimated_bloom_size)/8LL /1024LL/1024LL, 1LL );
        max_memory =  max((int64_t) estimated_bloom_size/8LL /1024LL/1024LL,1LL);

      printf("estimated values: nbits Bloom %lli, nb FP %lld, max memory %i MB\n",estimated_bloom_size,estimated_nb_FP,max_memory);

    }

    // output prefix
    if(argc >=  6)
    {
        strcpy(prefix,argv[5]);
    }

   


    fprintf (stderr,"taille cell %lu \n", sizeof(cell<kmer_type>));

    STARTWALL(0);

    Bank *Reads = new Bank(argv[1]);
    
    // counter kmers, write solid kmers to disk
    if (!START_FROM_SOLID_KMERS)
    {
        int verbose = 0;
        bool write_count = false;
        bool skip_binary_conversion = false;

        sorting_count(Reads,prefix,max_memory,max_disk_space,write_count,verbose, skip_binary_conversion);
    }

    // debloom, write false positives to disk, insert them into false_positives
    if (! LOAD_FALSE_POSITIVE_KMERS)
    {
        debloom(order, max_memory);
    }
    
    bloo1 = bloom_create_bloo1((BloomCpt *)NULL, false);

    if (! NO_FALSE_POSITIVES_AT_ALL)
    {
        // load false positives from disk into false_positives
        if (!FOUR_BLOOM_VERSION) 
            false_positives = load_false_positives();
	else
	    false_positives = load_false_positives_cascading4();
    }
    else
    {
        // titus mode: no FP's
        false_positives = dummy_false_positives();
    }

    //  return 1;
    assemble(); 

    STOPWALL(0,"Total");

    delete Reads;
    return 0;
}