void cb_seeds_add(struct cb_seeds *seeds, struct cb_coarse_seq *seq) { char *kmer; int32_t hash, i; struct cb_seed_loc *sl1, *sl2; pthread_rwlock_wrlock(&seeds->lock); for (i = 0; i < seq->seq->length - seeds->seed_size+1; i++) { kmer = seq->seq->residues + i; sl1 = cb_seed_loc_init(seq->id, i); hash = hash_kmer(seeds, kmer); if (seeds->locs[hash] == NULL) seeds->locs[hash] = sl1; else { for (sl2 = seeds->locs[hash]; sl2->next != NULL; sl2 = sl2->next); sl2->next = sl1; } } pthread_rwlock_unlock(&seeds->lock); }
struct cb_seed_loc * cb_seeds_lookup(struct cb_seeds *seeds, char *kmer) { struct cb_seed_loc *sl, *copy_first, *copy; pthread_rwlock_rdlock(&seeds->lock); sl = seeds->locs[hash_kmer(seeds, kmer)]; if (sl == NULL) { pthread_rwlock_unlock(&seeds->lock); return NULL; } copy_first = cb_seed_loc_init(sl->coarse_seq_id, sl->residue_index); copy = copy_first; for (sl = sl->next; sl != NULL; sl = sl->next) { copy->next = cb_seed_loc_init(sl->coarse_seq_id, sl->residue_index); copy = copy->next; } pthread_rwlock_unlock(&seeds->lock); return copy_first; }
static void chopKmer4read ( int t, int threadID ) { char * src_seq = seqBuffer[t]; char * bal_seq = rcSeq[threadID]; int len_seq = lenBuffer[t]; int j, bal_j; ubyte8 hash_ban, bal_hash_ban; Kmer word, bal_word; int index; char InvalidCh = 4; #ifdef MER127 word.high1 = word.low1 = word.high2 = word.low2 = 0; for ( index = 0; index < overlaplen; index++ ) { word = KmerLeftBitMoveBy2 ( word ); word.low2 |= src_seq[index]; } #else word.high = word.low = 0; for ( index = 0; index < overlaplen; index++ ) { word = KmerLeftBitMoveBy2 ( word ); word.low |= src_seq[index]; } #endif reverseComplementSeq ( src_seq, len_seq, bal_seq ); // complementary node bal_word = reverseComplement ( word, overlaplen ); bal_j = len_seq - overlaplen; index = indexArray[t]; if ( KmerSmaller ( word, bal_word ) ) { hash_ban = hash_kmer ( word ); hashBanBuffer[index] = hash_ban; kmerBuffer[index] = word; prevcBuffer[index] = InvalidCh; nextcBuffer[index++] = src_seq[0 + overlaplen]; } else { bal_hash_ban = hash_kmer ( bal_word ); hashBanBuffer[index] = bal_hash_ban; kmerBuffer[index] = bal_word; prevcBuffer[index] = bal_seq[bal_j - 1]; nextcBuffer[index++] = InvalidCh; } for ( j = 1; j <= len_seq - overlaplen; j++ ) { word = nextKmer ( word, src_seq[j - 1 + overlaplen] ); bal_j = len_seq - j - overlaplen; bal_word = prevKmer ( bal_word, bal_seq[bal_j] ); if ( KmerSmaller ( word, bal_word ) ) { hash_ban = hash_kmer ( word ); hashBanBuffer[index] = hash_ban; kmerBuffer[index] = word; prevcBuffer[index] = src_seq[j - 1]; if ( j < len_seq - overlaplen ) { nextcBuffer[index++] = src_seq[j + overlaplen]; } else { nextcBuffer[index++] = InvalidCh; } //printf("%dth: %p with %p\n",kmer_c-1,word,hashBanBuffer[kmer_c-1]); } else { // complementary node bal_hash_ban = hash_kmer ( bal_word ); hashBanBuffer[index] = bal_hash_ban; kmerBuffer[index] = bal_word; if ( bal_j > 0 ) { prevcBuffer[index] = bal_seq[bal_j - 1]; } else { prevcBuffer[index] = InvalidCh; } nextcBuffer[index++] = bal_seq[bal_j + overlaplen]; //printf("%dth: %p with %p\n",kmer_c-1,bal_word,hashBanBuffer[kmer_c-1]); } } }
static void parse1read (int t, int threadID) { unsigned int j, retain = 0; unsigned int edge_index = 0; kmer_t *node; boolean isSmaller; Kmer wordplus, bal_wordplus; unsigned int start, finish, pos; Kmer prevKmer, currentKmer; boolean IsPrevKmer = 0; start = indexArray[t]; finish = indexArray[t + 1]; pos = start; for (j = start; j < finish; j++) { #ifdef MER127 if( kmerBuffer[j].low2== 0 && N_kmer) #endif #ifdef MER63 if( kmerBuffer[j].low== 0 && N_kmer) #endif #ifdef MER31 if( kmerBuffer[j]== 0 && N_kmer) #endif { IsPrevKmer=0; continue; } node = nodeBuffer[j]; //extract edges or keep kmers if ((node->deleted) || (node->linear && !node->inEdge)) // deleted or in a floating loop { if (retain < 2) { retain = 0; pos = start; } else { break; } continue; } isSmaller = smallerBuffer[j]; if (node->linear) { if (isSmaller) { edge_index = node->l_links; } else { edge_index = node->l_links + node->twin - 1; } #ifdef MER127 if (retain == 0 || IsPrevKmer) { retain++; mixBuffer[pos].low2 = edge_index; flagArray[pos++] = 0; IsPrevKmer = 0; } else if (edge_index != mixBuffer[pos - 1].low2) { retain++; mixBuffer[pos].low2 = edge_index; flagArray[pos++] = 0; } #endif #ifdef MER63 if (retain == 0 || IsPrevKmer) { retain++; mixBuffer[pos].low= edge_index; flagArray[pos++] = 0; IsPrevKmer = 0; } else if (edge_index != mixBuffer[pos - 1].low) { retain++; mixBuffer[pos].low= edge_index; flagArray[pos++] = 0; } #endif #ifdef MER31 if (retain == 0 || IsPrevKmer) { retain++; mixBuffer[pos] = edge_index; flagArray[pos++] = 0; IsPrevKmer = 0; } else if (edge_index != mixBuffer[pos - 1]) { retain++; mixBuffer[pos] = edge_index; flagArray[pos++] = 0; } #endif } else { if (isSmaller) { currentKmer = node->seq; } else { currentKmer = reverseComplement (node->seq, overlaplen); } if (IsPrevKmer) { retain++; wordplus = KmerPlus (prevKmer, lastCharInKmer (currentKmer)); bal_wordplus = reverseComplement (wordplus, overlaplen + 1); if (KmerSmaller (wordplus, bal_wordplus)) { smallerBuffer[pos] = 1; hashBanBuffer[pos] = hash_kmer (wordplus); mixBuffer[pos] = wordplus; } else { smallerBuffer[pos] = 0; hashBanBuffer[pos] = hash_kmer (bal_wordplus); mixBuffer[pos] = bal_wordplus; } // fprintf(stderr,"%lld\n",hashBanBuffer[pos]); flagArray[pos++] = 1; } IsPrevKmer = 1; prevKmer = currentKmer; } } /* for(j=start;j<pos;j++) fprintf(stderr,"%d ",flagArray[j]); fprintf(stderr,"\n"); */ if (retain < 1) { deletion[threadID]++; } if (retain < 2) { flagArray[start] = 0; mixBuffer[start] = kmerZero; return; } if ((pos - start) != retain) { printf ("read %d, %d vs %d\n", t, retain, edge_index - start); } if (pos < finish) { flagArray[pos] = 0; mixBuffer[pos] = kmerZero; } }
static void chopKmer4read (int t, int threadID) { char *src_seq = seqBuffer[t]; char *bal_seq = rcSeq[threadID]; int len_seq = lenBuffer[t]; int j, bal_j; ubyte8 hash_ban, bal_hash_ban; Kmer word, bal_word; int index; //mao 2011 10 8 Kmer InvalidKmer ; InvalidKmer=kmerZero; int n_num; word = kmerZero; for (index = 0; index < overlaplen; index++) { #ifdef MER127 word = KmerLeftBitMoveBy2(word); word.low2 |= src_seq[index]; #endif #ifdef MER63 word = KmerLeftBitMoveBy2(word); word.low |= src_seq[index]; #endif #ifdef MER31 word = KmerLeftBitMoveBy2(word); word += src_seq[index]; #endif //mao 2011 10 8 if(src_seq[index] == 4) n_num = overlaplen; else if(n_num >0) n_num--; } reverseComplementSeq (src_seq, len_seq, bal_seq); // complementary node bal_word = reverseComplement (word, overlaplen); bal_j = len_seq - 0 - overlaplen; // 0; index = indexArray[t]; //mao 2011 10 8 if(n_num > 0 && N_kmer) { hash_ban = hash_kmer (InvalidKmer); hashBanBuffer[index] = hash_ban; kmerBuffer[index] = InvalidKmer; smallerBuffer[index] = 1; } else if (KmerSmaller (word, bal_word)) { hash_ban = hash_kmer (word); kmerBuffer[index] = word; smallerBuffer[index] = 1; hashBanBuffer[index++] = hash_ban; } else { bal_hash_ban = hash_kmer (bal_word); kmerBuffer[index] = bal_word; smallerBuffer[index] = 0; hashBanBuffer[index++] = bal_hash_ban; } //printf("%dth: %p with %p\n",kmer_c-1,bal_word,bal_hash_ban); for (j = 1; j <= len_seq - overlaplen; j++) { word = nextKmer (word, src_seq[j - 1 + overlaplen]); bal_j = len_seq - j - overlaplen; // j; bal_word = reverseComplement (word, overlaplen); //mao 2011 10 8 if(src_seq[j - 1 + overlaplen] == 4) n_num = overlaplen; else if(n_num >0) n_num--; //mao 2011 10 8 if(n_num > 0 && N_kmer) { hash_ban = hash_kmer (InvalidKmer); hashBanBuffer[index] = hash_ban; kmerBuffer[index] = InvalidKmer; smallerBuffer[index] = 1; } else if (KmerSmaller (word, bal_word)) { hash_ban = hash_kmer (word); kmerBuffer[index] = word; smallerBuffer[index] = 1; hashBanBuffer[index++] = hash_ban; //printf("%dth: %p with %p\n",kmer_c-1,word,hashBanBuffer[kmer_c-1]); } else { // complementary node bal_hash_ban = hash_kmer (bal_word); kmerBuffer[index] = bal_word; smallerBuffer[index] = 0; hashBanBuffer[index++] = bal_hash_ban; //printf("%dth: %p with %p\n",kmer_c-1,bal_word,hashBanBuffer[kmer_c-1]); } } }
static void stringBeads ( KMER_PT * firstBead, char nextch, int * node_c ) { boolean smaller, found; Kmer tempKmer, bal_word; Kmer word = firstBead->kmer; ubyte8 hash_ban; kmer_t * outgoing_node; int nodeCounter = 1, setPicker; char ch; unsigned char flag; KMER_PT * temp_pt, *prev_pt = firstBead; word = prev_pt->kmer; nodeCounter = 1; word = nextKmer ( word, nextch ); bal_word = reverseComplement ( word, overlaplen ); if ( KmerLarger ( word, bal_word ) ) { tempKmer = bal_word; bal_word = word; word = tempKmer; smaller = 0; } else { smaller = 1; } hash_ban = hash_kmer ( word ); setPicker = hash_ban % thrd_num; found = search_kmerset ( KmerSets[setPicker], word, &outgoing_node ); while ( found && ( outgoing_node->linear ) ) // for every node in this line { nodeCounter++; temp_pt = ( KMER_PT * ) stackPush ( nodeStack ); temp_pt->node = outgoing_node; temp_pt->isSmaller = smaller; if ( smaller ) { temp_pt->kmer = word; } else { temp_pt->kmer = bal_word; } prev_pt = temp_pt; if ( smaller ) { for ( ch = 0; ch < 4; ch++ ) { flag = get_kmer_right_cov ( *outgoing_node, ch ); if ( flag ) { break; } } word = nextKmer ( prev_pt->kmer, ch ); bal_word = reverseComplement ( word, overlaplen ); if ( KmerLarger ( word, bal_word ) ) { tempKmer = bal_word; bal_word = word; word = tempKmer; smaller = 0; } else { smaller = 1; } hash_ban = hash_kmer ( word ); setPicker = hash_ban % thrd_num; found = search_kmerset ( KmerSets[setPicker], word, &outgoing_node ); } else { for ( ch = 0; ch < 4; ch++ ) { flag = get_kmer_left_cov ( *outgoing_node, ch ); if ( flag ) { break; } } word = nextKmer ( prev_pt->kmer, int_comp ( ch ) ); bal_word = reverseComplement ( word, overlaplen ); if ( KmerLarger ( word, bal_word ) ) { tempKmer = bal_word; bal_word = word; word = tempKmer; smaller = 0; } else { smaller = 1; } hash_ban = hash_kmer ( word ); setPicker = hash_ban % thrd_num; found = search_kmerset ( KmerSets[setPicker], word, &outgoing_node ); } } if ( outgoing_node ) //this is always true { nodeCounter++; temp_pt = ( KMER_PT * ) stackPush ( nodeStack ); temp_pt->node = outgoing_node; temp_pt->isSmaller = smaller; if ( smaller ) { temp_pt->kmer = word; } else { temp_pt->kmer = bal_word; } } *node_c = nodeCounter; }
static void merge_linearV2 ( char bal_edge, STACK * nStack, int count, FILE * fp ) { int length, char_index; preEDGE * newedge; kmer_t * del_node, *longNode; char * tightSeq, firstCh; long long symbol = 0; int len_tSeq; Kmer wordplus, bal_wordplus; ubyte8 hash_ban; KMER_PT * last_np = ( KMER_PT * ) stackPop ( nStack ); KMER_PT * second_last_np = ( KMER_PT * ) stackPop ( nStack ); KMER_PT * first_np, *second_np = NULL; KMER_PT * temp; boolean found, lastOne = 1, single = 1; int setPicker; length = count - 1; len_tSeq = length; if ( len_tSeq >= edge_length_limit ) { tightSeq = ( char * ) ckalloc ( len_tSeq * sizeof ( char ) ); } else { tightSeq = edge_seq; } char_index = length - 1; newedge = &temp_edge; newedge->to_node = last_np->kmer; newedge->length = length; newedge->bal_edge = bal_edge; tightSeq[char_index--] = lastCharInKmer ( last_np->kmer ); firstCh = firstCharInKmer ( second_last_np->kmer ); dislink2prevUncertain ( last_np->node, firstCh, last_np->isSmaller ); stackRecover ( nStack ); while ( nStack->item_c > 1 ) { second_np = ( KMER_PT * ) stackPop ( nStack ); } first_np = ( KMER_PT * ) stackPop ( nStack ); //unlink first node to the second one dislink2nextUncertain ( first_np->node, lastCharInKmer ( second_np->kmer ), first_np->isSmaller ); //printf("from %llx, to %llx\n",first_np->node->seq,last_np->node->seq); //now temp is the last node in line, out_node is the second last node in line newedge->from_node = first_np->kmer; //create a long kmer for edge with length 1 if ( length == 1 ) { nodeCounter++; wordplus = KmerPlus ( newedge->from_node, lastCharInKmer ( newedge->to_node ) ); bal_wordplus = reverseComplement ( wordplus, overlaplen + 1 ); /* Kmer temp = KmerPlus(reverseComplement(newedge->to_node,overlaplen), lastCharInKmer(reverseComplement(newedge->from_node,overlaplen))); fprintf(stderr,"(%llx %llx) (%llx %llx) (%llx %llx)\n", wordplus.high,wordplus.low,temp.high,temp.low, bal_wordplus.high,bal_wordplus.low); */ edge_c++; edgeCounter++; if ( KmerSmaller ( wordplus, bal_wordplus ) ) { hash_ban = hash_kmer ( wordplus ); setPicker = hash_ban % thrd_num; found = put_kmerset ( KmerSetsPatch[setPicker], wordplus, 4, 4, &longNode ); if ( found ) { printf ( "longNode %llx %llx already exist\n", wordplus.high, wordplus.low ); } longNode->l_links = edge_c; longNode->twin = ( unsigned char ) ( bal_edge + 1 ); } else { hash_ban = hash_kmer ( bal_wordplus ); setPicker = hash_ban % thrd_num; found = put_kmerset ( KmerSetsPatch[setPicker], bal_wordplus, 4, 4, &longNode ); if ( found ) { printf ( "longNode %llx %llx already exist\n", bal_wordplus.high, bal_wordplus.low ); } longNode->l_links = edge_c + bal_edge; longNode->twin = ( unsigned char ) ( -bal_edge + 1 ); } } else { edge_c++; edgeCounter++; } stackRecover ( nStack ); //mark all the internal nodes temp = ( KMER_PT * ) stackPop ( nStack ); while ( nStack->item_c > 1 ) { temp = ( KMER_PT * ) stackPop ( nStack ); del_node = temp->node; del_node->inEdge = 1; symbol += get_kmer_left_covs ( *del_node ); if ( temp->isSmaller ) { del_node->l_links = edge_c; del_node->twin = ( unsigned char ) ( bal_edge + 1 ); } else { del_node->l_links = edge_c + bal_edge; del_node->twin = ( unsigned char ) ( -bal_edge + 1 ); } tightSeq[char_index--] = lastCharInKmer ( temp->kmer ); } newedge->seq = tightSeq; if ( length > 1 ) { newedge->cvg = symbol / ( length - 1 ) * 10 > MaxEdgeCov ? MaxEdgeCov : symbol / ( length - 1 ) * 10; } else { newedge->cvg = 0; } output_1edge ( newedge, fp ); if ( len_tSeq >= edge_length_limit ) { free ( ( void * ) tightSeq ); } edge_c += bal_edge; if ( edge_c % 10000000 == 0 ) { printf ( "--- %d edges built\n", edge_c ); } return; }
static void chopKmer4read (int t, int threadID) { char *src_seq = seqBuffer + seqBreakers[t]; char *bal_seq = rcSeq[threadID]; int len_seq = lenBuffer[t]; int j, bal_j; ubyte8 hash_ban, bal_hash_ban; Kmer word, bal_word; int index; word=kmerZero; for (index = 0; index < overlaplen; index++) { word = KmerLeftBitMoveBy2 (word); #ifdef MER127 word.low2 |= src_seq[index]; #endif #ifdef MER63 word.low|= src_seq[index]; #endif #ifdef MER31 word |= src_seq[index]; #endif } reverseComplementSeq (src_seq, len_seq, bal_seq); // complementary node bal_word = reverseComplement (word, overlaplen); bal_j = len_seq - 0 - overlaplen; // 0; index = indexArray[t]; if (KmerSmaller (word, bal_word)) { hash_ban = hash_kmer (word); kmerBuffer[index] = word; hashBanBuffer[index] = hash_ban; smallerBuffer[index++] = 1; } else { bal_hash_ban = hash_kmer (bal_word); kmerBuffer[index] = bal_word; hashBanBuffer[index] = bal_hash_ban; smallerBuffer[index++] = 0; } //printf("%dth: %p with %p\n",kmer_c-1,bal_word,bal_hash_ban); for (j = 1; j <= len_seq - overlaplen; j++) { word = nextKmer (word, src_seq[j - 1 + overlaplen]); bal_j = len_seq - j - overlaplen; // j; bal_word = prevKmer (bal_word, bal_seq[bal_j]); if (KmerSmaller (word, bal_word)) { hash_ban = hash_kmer (word); kmerBuffer[index] = word; hashBanBuffer[index] = hash_ban; smallerBuffer[index++] = 1; //printf("%dth: %p with %p\n",kmer_c-1,word,hashBanBuffer[kmer_c-1]); } else { // complementary node bal_hash_ban = hash_kmer (bal_word); kmerBuffer[index] = bal_word; hashBanBuffer[index] = bal_hash_ban; smallerBuffer[index++] = 0; //printf("%dth: %p with %p\n",kmer_c-1,bal_word,hashBanBuffer[kmer_c-1]); } } }
static void chopKmer4read(int t, int threadID) { char *src_seq = seqBuffer + seqBreakers[t]; char *bal_seq = rcSeq[threadID]; int len_seq = lenBuffer[t]; int j, bal_j; Kmer hash_ban, bal_hash_ban; Kmer word, bal_word; int index; word = 0; for (index = 0; index < overlaplen; index++) { word <<= 2; word += src_seq[index]; } reverseComplementSeq(src_seq, len_seq, bal_seq); // complementary node bal_word = reverseComplement(word, overlaplen); bal_j = len_seq - 0 - overlaplen; // 0; index = indexArray[t]; if(word < bal_word) { hash_ban = hash_kmer(word); kmerBuffer[index] = word; hashBanBuffer[index] = hash_ban; smallerBuffer[index++] = 1; } else { bal_hash_ban = hash_kmer(bal_word); kmerBuffer[index] = bal_word; hashBanBuffer[index] = bal_hash_ban; smallerBuffer[index++] = 0; } //printf("%dth: %p with %p\n",kmer_c-1,bal_word,bal_hash_ban); for(j = 1; j <= len_seq - overlaplen; j ++) { word = nextKmer(word, src_seq[j - 1 + overlaplen]); bal_j = len_seq - j - overlaplen; // j; bal_word = prevKmer(bal_word, bal_seq[bal_j]); if(word < bal_word) { hash_ban = hash_kmer(word); kmerBuffer[index] = word; hashBanBuffer[index] = hash_ban; smallerBuffer[index++] = 1; //printf("%dth: %p with %p\n",kmer_c-1,word,hashBanBuffer[kmer_c-1]); } else { // complementary node bal_hash_ban = hash_kmer(bal_word); kmerBuffer[index] = bal_word; hashBanBuffer[index] = bal_hash_ban; smallerBuffer[index++] = 0; //printf("%dth: %p with %p\n",kmer_c-1,bal_word,hashBanBuffer[kmer_c-1]); } } }