static void singleKmer (int t, KmerSet * kset, unsigned int seq_index, unsigned int pos) { boolean flag; kmer_t *node; flag = put_kmerset (kset, kmerBuffer[t], 4, 4, &node); //printf("singleKmer: kmer %llx\n",kmerBuffer[t]); if (!flag) { if (smallerBuffer[t]) { node->twin = 0; } else { node->twin = 1; }; node->l_links = ctgIdArray[seq_index]; node->r_links = pos; } else { node->deleted = 1; } }
static void singleKmer ( int t, KmerSet * kset ) { kmer_t * pos; put_kmerset ( kset, kmerBuffer[t], prevcBuffer[t], nextcBuffer[t], &pos ); }
static void merge_linearV2 ( char bal_edge, STACK * nStack, int count, FILE * fp ) { int length, char_index; preEDGE * newedge; kmer_t * del_node, *longNode; char * tightSeq, firstCh; long long symbol = 0; int len_tSeq; Kmer wordplus, bal_wordplus; ubyte8 hash_ban; KMER_PT * last_np = ( KMER_PT * ) stackPop ( nStack ); KMER_PT * second_last_np = ( KMER_PT * ) stackPop ( nStack ); KMER_PT * first_np, *second_np = NULL; KMER_PT * temp; boolean found, lastOne = 1, single = 1; int setPicker; length = count - 1; len_tSeq = length; if ( len_tSeq >= edge_length_limit ) { tightSeq = ( char * ) ckalloc ( len_tSeq * sizeof ( char ) ); } else { tightSeq = edge_seq; } char_index = length - 1; newedge = &temp_edge; newedge->to_node = last_np->kmer; newedge->length = length; newedge->bal_edge = bal_edge; tightSeq[char_index--] = lastCharInKmer ( last_np->kmer ); firstCh = firstCharInKmer ( second_last_np->kmer ); dislink2prevUncertain ( last_np->node, firstCh, last_np->isSmaller ); stackRecover ( nStack ); while ( nStack->item_c > 1 ) { second_np = ( KMER_PT * ) stackPop ( nStack ); } first_np = ( KMER_PT * ) stackPop ( nStack ); //unlink first node to the second one dislink2nextUncertain ( first_np->node, lastCharInKmer ( second_np->kmer ), first_np->isSmaller ); //printf("from %llx, to %llx\n",first_np->node->seq,last_np->node->seq); //now temp is the last node in line, out_node is the second last node in line newedge->from_node = first_np->kmer; //create a long kmer for edge with length 1 if ( length == 1 ) { nodeCounter++; wordplus = KmerPlus ( newedge->from_node, lastCharInKmer ( newedge->to_node ) ); bal_wordplus = reverseComplement ( wordplus, overlaplen + 1 ); /* Kmer temp = KmerPlus(reverseComplement(newedge->to_node,overlaplen), lastCharInKmer(reverseComplement(newedge->from_node,overlaplen))); fprintf(stderr,"(%llx %llx) (%llx %llx) (%llx %llx)\n", wordplus.high,wordplus.low,temp.high,temp.low, bal_wordplus.high,bal_wordplus.low); */ edge_c++; edgeCounter++; if ( KmerSmaller ( wordplus, bal_wordplus ) ) { hash_ban = hash_kmer ( wordplus ); setPicker = hash_ban % thrd_num; found = put_kmerset ( KmerSetsPatch[setPicker], wordplus, 4, 4, &longNode ); if ( found ) { printf ( "longNode %llx %llx already exist\n", wordplus.high, wordplus.low ); } longNode->l_links = edge_c; longNode->twin = ( unsigned char ) ( bal_edge + 1 ); } else { hash_ban = hash_kmer ( bal_wordplus ); setPicker = hash_ban % thrd_num; found = put_kmerset ( KmerSetsPatch[setPicker], bal_wordplus, 4, 4, &longNode ); if ( found ) { printf ( "longNode %llx %llx already exist\n", bal_wordplus.high, bal_wordplus.low ); } longNode->l_links = edge_c + bal_edge; longNode->twin = ( unsigned char ) ( -bal_edge + 1 ); } } else { edge_c++; edgeCounter++; } stackRecover ( nStack ); //mark all the internal nodes temp = ( KMER_PT * ) stackPop ( nStack ); while ( nStack->item_c > 1 ) { temp = ( KMER_PT * ) stackPop ( nStack ); del_node = temp->node; del_node->inEdge = 1; symbol += get_kmer_left_covs ( *del_node ); if ( temp->isSmaller ) { del_node->l_links = edge_c; del_node->twin = ( unsigned char ) ( bal_edge + 1 ); } else { del_node->l_links = edge_c + bal_edge; del_node->twin = ( unsigned char ) ( -bal_edge + 1 ); } tightSeq[char_index--] = lastCharInKmer ( temp->kmer ); } newedge->seq = tightSeq; if ( length > 1 ) { newedge->cvg = symbol / ( length - 1 ) * 10 > MaxEdgeCov ? MaxEdgeCov : symbol / ( length - 1 ) * 10; } else { newedge->cvg = 0; } output_1edge ( newedge, fp ); if ( len_tSeq >= edge_length_limit ) { free ( ( void * ) tightSeq ); } edge_c += bal_edge; if ( edge_c % 10000000 == 0 ) { printf ( "--- %d edges built\n", edge_c ); } return; }