void AssocPairedSet::print() { int i; char seq[100]; printf("print %lu elems \n",liste.size()); for (i=0; i<liste.size(); i++) { code2seq(liste[i], seq); printf("%s=(", seq); code2seq(liste_value[i].nk1.prev_kmer, seq); printf("%s,%c) (", seq, liste_value[i].nk1.nt); code2seq(liste_value[i].nk2.prev_kmer, seq); printf("%s,%c)\n", seq, liste_value[i].nk2.nt); } }
void end_debloom_partition(bool last_partition) { int value; char false_positive_kmer_char[sizeKmer+1]; FILE *file_false_positive_kmers =NULL; kmer_type graine; /////////////////////////begin write files rewind (F_debloom_read); rewind (F_debloom_write); #ifndef MINGW ftruncate(fileno(F_debloom_write), 0); //erase previous file #else // tempfix? fileno is not accepted by mingw fclose(F_debloom_write); F_debloom_write = fopen(return_file_name("debloom2"),"wb+"); #endif BinaryReads* file_false_positive_kmers_binary = NULL; if (last_partition) { // write false positive kmers to fasta file file_false_positive_kmers = fopen(return_file_name(false_positive_kmers_file),"wb"); char *false_positive_kmers_binary_file = (char *)"false_positive_kmers_binary"; file_false_positive_kmers_binary = new BinaryReads( return_file_name(false_positive_kmers_binary_file), true); //TODO: change store FP into binary? reduce space? } n_false_positives = 0; while(fread(&graine, sizeof(graine),1, F_debloom_read)){ if(hasht1->get(graine,&value)==0) //kmer not present == kmer not solid { n_false_positives ++; if (!fwrite(&graine, sizeof(graine), 1, F_debloom_write)) { printf("error: can't fwrite (disk full?)\n"); exit(1); } if (last_partition) { code2seq(graine,false_positive_kmer_char); fprintf(file_false_positive_kmers,">fp\n"); fputs(false_positive_kmer_char,file_false_positive_kmers); fprintf(file_false_positive_kmers,"\n"); file_false_positive_kmers_binary->write_read(false_positive_kmer_char, sizeKmer); } } //else kmer is a true positive, do nothing } if (last_partition){ fclose(file_false_positive_kmers); file_false_positive_kmers_binary->close(); } }
inline void assemble() { //////------------------------------------------------------------------------------------------- fprintf (stderr,"______________________________________________________ \n"); fprintf (stderr,"___________ Assemble from bloom filter _______________ \n"); fprintf (stderr,"______________________________________________________ \n\n"); //////------------------------------------------------------------------------------------------- long long len_left = 0; long long len_right = 0; long long contig_len =0; long long maxlen=10000000; char *left_traversal = (char *) malloc(maxlen*sizeof(char)); char *right_traversal = (char *) malloc(maxlen*sizeof(char)); char *contig = (char *) malloc(2*(maxlen+sizeKmer)*sizeof(char)); kmer_type kmer; long long nbContig =0; long long nbSmallContig =0; long long totalnt=0; long long max_contig_len=0; long long mlenleft=0,mlenright=0; int64_t NbBranchingKmer=0; char kmer_seq[sizeKmer+1]; FILE * file_assembly = fopen(return_file_name(assembly_file),"w+"); BinaryBank *SolidKmers = new BinaryBank(return_file_name(solid_kmers_file),sizeof(kmer_type),0); STARTWALL(assembly); char *assemble_only_one_region = NULL; // debugging, set to a ASCII kmer to activate, NULL to desactivate bool LOAD_BRANCHING_KMERS=false; // debugging bool DUMP_BRANCHING_KMERS=false; BranchingTerminator *terminator; if (LOAD_BRANCHING_KMERS) { BinaryBank *BranchingKmers = new BinaryBank(return_file_name(branching_kmers_file),sizeof(kmer_type),false); terminator = new BranchingTerminator(BranchingKmers,SolidKmers, bloo1,false_positives); BranchingKmers->close(); } else terminator = new BranchingTerminator(SolidKmers,genome_size, bloo1,false_positives); if (DUMP_BRANCHING_KMERS) { BinaryBank *BranchingKmers = new BinaryBank(return_file_name(branching_kmers_file),sizeof(kmer_type),true); terminator->dump_branching_kmers(BranchingKmers); BranchingKmers->close(); } #ifdef UNITIG SimplePathsTraversal *traversal = new SimplePathsTraversal(bloo1,false_positives,terminator); fprintf (stderr,"_________________Assembling in Unitig mode ..._____________________ \n\n"); #else MonumentTraversal *traversal = new MonumentTraversal(bloo1,false_positives,terminator); #endif //RandomBranchingTraversal *traversal = new RandomBranchingTraversal(bloo1,false_positives,terminator); traversal->set_maxlen(maxlen); traversal->set_max_depth(500); traversal->set_max_breadth(20); while (terminator->next(&kmer)) { // keep looping while a starting kmer is available from this kmer // everything will be marked during the traversal()'s kmer_type starting_kmer; #ifdef UNITIG while (traversal->get_new_starting_node_improved(kmer,starting_kmer)) #else while (traversal->find_starting_kmer(kmer,starting_kmer)) #endif { code2seq(starting_kmer,kmer_seq); // convert starting kmer to nucleotide seq traversal->revert_stats(); // set stats from the last commit (discard stats from find_starting_kmer / small contigs) if (assemble_only_one_region != NULL) { kmer_type dummy; starting_kmer = extractKmerFromRead(assemble_only_one_region,0,&kmer,&dummy,false); } // right extension len_right = traversal->traverse(starting_kmer,right_traversal,0); mlenright= max(len_right,mlenright); // left extension, is equivalent to right extension of the revcomp len_left = traversal->traverse(starting_kmer,left_traversal,1); mlenleft= max(len_left,mlenleft); // form the contig revcomp_sequence(left_traversal,len_left); strcpy(contig,left_traversal); // contig = revcomp(left_traversal) strcat(contig,kmer_seq);// + starting_kmer strcat(contig,right_traversal);// + right_traversal contig_len=len_left+len_right+sizeKmer; // save the contig if(contig_len >= MIN_CONTIG_SIZE) { max_contig_len = max(max_contig_len,contig_len); fprintf(file_assembly,">%lli__len__%lli \n",nbContig,contig_len); fprintf(file_assembly,"%s\n",contig); nbContig++; totalnt+=contig_len; traversal->commit_stats(); } else { traversal->revert_stats(); nbSmallContig++; } if (assemble_only_one_region != NULL) break; } NbBranchingKmer++; if ((NbBranchingKmer%300)==0) fprintf (stderr,"%cLooping through branching kmer n° %lld / %lld total nt %lld ",13,(long long int) NbBranchingKmer,(long long int) terminator->nb_branching_kmers, (long long int)totalnt ); if (nbContig > 0 && assemble_only_one_region != NULL) break; } fclose(file_assembly); fprintf (stderr,"\n Total nt assembled %lli nbContig %lli\n",totalnt,nbContig); fprintf (stderr," Max contig len %lli (debug: max len left %lli, max len right %lli)\n",max_contig_len,mlenleft,mlenright); fprintf (stderr,"\n Debug traversal stats: %ld ends of contigs (%lld unsaved small contigs), among them:\n",traversal->final_stats.ended_traversals,nbSmallContig); fprintf (stderr," %ld couldn't validate consensuses\n",traversal->final_stats.couldnt_validate_consensuses); fprintf (stderr," %ld large bubble breadth, %ld large bubble depth, %ld marked kmer, %ld no extension\n",traversal->final_stats.couldnt_traverse_bubble_breadth,traversal->final_stats.couldnt_traverse_bubble_depth,traversal->final_stats.couldnt_because_marked_kmer,traversal->final_stats.couldnt_find_extension); fprintf (stderr," %ld in-branchin large depth, %ld in-branching large breadth, %ld in-branching other\n",traversal->final_stats.couldnt_inbranching_depth,traversal->final_stats.couldnt_inbranching_breadth,traversal->final_stats.couldnt_inbranching_other); STOPWALL(assembly,"Assembly"); free(left_traversal); free(right_traversal); free(contig); SolidKmers->close(); }