int main() { int evalcost; char *align = "R7I2R2D1R3I1R3"; char *seq1 = "acgtagatatatagat"; char *seq2 = "agaaagaggtaagaggga"; alignment alg = alignment_new(seq1, seq2, align); alignment_show(alg); evalcost = alignment_evalcost(alg); printf("\tCosts: %d\n\n", evalcost); printf("\tadding one deletion...\n"); alignment_add_operation(&alg, 1, 'D'); alignment_show(alg); printf("\tadding one insertion...\n"); alignment_add_operation(&alg, 1, 'I'); alignment_show(alg); printf("\tadding one replacement...\n"); alignment_add_operation(&alg, 1, 'R'); alignment_show(alg); return(EXIT_SUCCESS); }
void write_unmapped_read(fastq_read_t *fq_read, bam_file_t *bam_file) { static char aux[1024] = ""; alignment_t *alig; size_t header_len; char *id; bam1_t *bam1; // calculating cigar //sprintf(aux, "%luX", fq_read->length); alig = alignment_new(); //header_len = strlen(fq_read->id); //id = (char *) malloc(sizeof(char) * (header_len + 1)); //get_to_first_blank(fq_read->id, header_len, id); //free(fq_read->id); alignment_init_single_end(strdup(fq_read->id), fq_read->sequence, fq_read->quality, 0, -1, -1, /*strdup(aux)*/"", 0, 0, 0, 0, 0, NULL, alig); bam1 = convert_to_bam(alig, 33); bam_fwrite(bam1, bam_file); bam_destroy1(bam1); alig->sequence = NULL; alig->quality = NULL; alig->cigar = NULL; alignment_free(alig); //printf("\tWRITE : read %i (%d items): unmapped...done !!\n", i, num_items); }
int main(int argc, char* argv[]) { // check argument count if (argc < 5) { printf("usage: %s seq1 seq2 scorematrix indelcosts\n!", argv[0]); return 1; } // read sequences and m (cost function) from command line char* s1 = argv[1]; int len1 = strlen(s1); char* s2 = argv[2]; int len2 = strlen(s2); char* filename = argv[3]; int indel; sscanf(argv[4], "%d", &indel); printf("Sequenz 1: %s\n", s1); printf("Sequenz 2: %s\n", s2); scorematrix* sm = read_scorematrix(filename,indel); if (sm == NULL) return 1; // reserve memory for dynamic programing table alignentry*** table = initializeDP (len1+1, len2+1); // calculate costs for optimal alignment of s1 and s2 int imax,jmax; int score = align (table, s1, len1, s2, len2, sm, &imax, &jmax); // print dynamic programing table show_DPtable(s1,len1,s2,len2,table); printf("Alignment: %d\n", score); alignment* align = alignment_new(s1,len1,s2,len2); traceback(table,align,imax,jmax); if (alignment_show(align)) { puts("FEHLER: inkonsistentes Alignment!"); } alignment_delete(align); deleteDP (table, len1+1, len2+1); return 0; }
int main(int argc, char * ARGV[]) { if (argc < 5 || argc%2 == 0) { printf("FEHLER: falsche Anzahl Argumente!\n"); return 1; } char * s1 = ARGV[1]; int len1 = strlen(s1); char * s2 = ARGV[2]; int len2 = strlen(s2); int ops = 0; int i; alignment *al = alignment_new (s1, len1, s2, len2); for (i = 3; i < argc; i+=2) { ops |= alignment_add_operations (al, ARGV[i][0], atoi(ARGV[i+1])); } if (ops) { printf ("Fehler beim Einfuegen einer Operation (Ueberlauf)!\n"); } int show = alignment_show (al); if (show) { printf ("Fehler beim Ausgeben des Alignments (Alignment inkonsistent)!\n"); } printf ("Länge des Alignments (Länge der Multiedit-Liste): %d (%d)\n", \ al->length, al->editlength); int costs = alignment_evalcost (al, unit_cost); printf ("Kosten des Alignments: %d\n", costs); alignment_delete (al); al = NULL; return 0; }
alignment* optimal_alignment(char* s1, char* s2, int (*costFunc) (char, char), int* cost) { int len1 = strlen(s1); int len2 = strlen(s2); // reserve memory for dynamic programing table alignentry*** table = initializeDP (len1+1, len2+1); // calculate costs for optimal alignment of s1 and s2 *cost = align (table, s1, len1, s2, len2, costFunc); alignment* a = alignment_new(s1,len1,s2,len2); traceback(table,a,len1,len2); // if test-mode => show table #ifdef UNIT_TEST show_dp(table, s1, s2); #endif deleteDP (table, len1+1, len2+1); return a; }
int main(int argc, char* argv[]) { // check argument count if (argc < 4) { printf("FEHLER: zu wenig Argumente angegeben\n!"); return 1; } // read sequences and m (cost function) from command line char* s1 = argv[1]; int len1 = strlen(s1); char* s2 = argv[2]; int len2 = strlen(s2); int m = atoi(argv[3]); // determine function-pointer for cost function int (*costFunc)(char,char); if (m == 0) costFunc = unity; else if (m == 1) costFunc = hamming; else costFunc = otherCosts; printf("Sequenz 1: %s\n", s1); printf("Sequenz 2: %s\n", s2); // reserve memory for dynamic programing table alignentry*** table = initializeDP (len1+1, len2+1); // calculate costs for optimal alignment of s1 and s2 int costs = align (table, s1, len1, s2, len2, costFunc); // print dynamic programing table printf("------------------------------------------------------\n"); int i,j; int c; for (i = -1; i <= len1; i++) { for (j = -1; j <= len2; j++) { if (i == -1 && j != -1) { if (j == 0) printf("%3c ", '-'); else printf("%3c ", s2[j-1]); } else if (j == -1 && i != -1) { if (i == 0) printf("%3c ", '-'); else printf("%3c ", s1[i-1]); } else if (i == -1 && j == -1) { printf("%3c ", ' '); } else { c = table[i][j]->value; if (c == INF) printf("%3c ", 'I'); else printf("%3d ", c); } } printf("\n"); } printf("------------------------------------------------------\n"); printf("Alignment: %d\n", costs); alignment* align = alignment_new(s1,len1,s2,len2); traceback(table,align,len1,len2); if (alignment_show(align)) { puts("FEHLER: inkonsistentes Alignment!"); } alignment_delete(align); deleteDP (table, len1+1, len2+1); return 0; }
int apply_sw_bs_4nt(sw_server_input_t* input, batch_t *batch) { mapping_batch_t *mapping_batch = batch->mapping_batch; genome_t *genome1 = input->genome1_p; genome_t *genome2 = input->genome2_p; sw_optarg_t *sw_optarg = &input->sw_optarg; { char r[1024]; size_t start = 169312417; size_t end = start + 99; genome_read_sequence_by_chr_index(r, 0, 0, &start, &end, genome2); printf("+++++++++++++ genome2 = %s \n", r); genome_read_sequence_by_chr_index(r, 0, 0, &start, &end, genome1); printf("+++++++++++++ genome1 = %s \n", r); } // fill gaps between seeds fill_gaps_bs(mapping_batch, sw_optarg, genome2, genome1, 20, 5, 1); merge_seed_regions_bs(mapping_batch, 1); fill_end_gaps_bs(mapping_batch, sw_optarg, genome1, genome2, 20, 400, 1); fill_gaps_bs(mapping_batch, sw_optarg, genome1, genome2, 20, 5, 0); merge_seed_regions_bs(mapping_batch, 0); fill_end_gaps_bs(mapping_batch, sw_optarg, genome2, genome1, 20, 400, 0); // now we can create the alignments fastq_read_t *read; array_list_t *fq_batch = mapping_batch->fq_batch; char *match_seq, *match_qual; size_t read_index, read_len, match_len, match_start; cal_t *cal; array_list_t *cal_list = NULL; size_t num_cals; seed_region_t *s; cigar_code_t *cigar_code; cigar_op_t *first_op; float score, norm_score, min_score = input->min_score; alignment_t *alignment; array_list_t *alignment_list; char *p, *optional_fields; int optional_fields_length, AS; array_list_t **mapping_lists; size_t num_targets; size_t *targets; for (int bs_id = 0; bs_id < 2; bs_id++) { if (bs_id == 0) { mapping_lists = mapping_batch->mapping_lists; num_targets = mapping_batch->num_targets; targets = mapping_batch->targets; } else { mapping_lists = mapping_batch->mapping_lists2; num_targets = mapping_batch->num_targets2; targets = mapping_batch->targets2; } for (size_t i = 0; i < num_targets; i++) { read_index = targets[i]; read = (fastq_read_t *) array_list_get(read_index, fq_batch); cal_list = mapping_lists[read_index]; num_cals = array_list_size(cal_list); if (num_cals <= 0) continue; read_len = read->length; alignment_list = array_list_new(num_cals, 1.25f, COLLECTION_MODE_ASYNCHRONIZED); // processing each CAL from this read for(size_t j = 0; j < num_cals; j++) { // get cal and read index cal = array_list_get(j, cal_list); if (cal->sr_list->size == 0) continue; s = (seed_region_t *) linked_list_get_first(cal->sr_list); cigar_code = (cigar_code_t *) s->info; norm_score = cigar_code_get_score(read_len, cigar_code); score = norm_score * 100; //read_len; LOG_DEBUG_F("score = %0.2f\n", norm_score); // filter by SW score if (norm_score > min_score) { // update cigar and sequence and quality strings cigar_code_update(cigar_code); LOG_DEBUG_F("\tcigar code = %s\n", new_cigar_code_string(cigar_code)); match_start = 0; match_len = cigar_code_nt_length(cigar_code); first_op = cigar_code_get_first_op(cigar_code); match_start = (first_op && first_op->name == 'H' ? first_op->number : 0); match_seq = (char *) malloc((match_len + 1)* sizeof(char)); memcpy(match_seq, &read->sequence[match_start], match_len); match_seq[match_len] = 0; match_qual = (char *) malloc((match_len + 1)* sizeof(char)); memcpy(match_qual, &read->quality[match_start], match_len); match_qual[match_len] = 0; // set optional fields optional_fields_length = 100; optional_fields = (char *) calloc(optional_fields_length, sizeof(char)); p = optional_fields; AS = (int) norm_score * 100; sprintf(p, "ASi"); p += 3; memcpy(p, &AS, sizeof(int)); p += sizeof(int); sprintf(p, "NHi"); p += 3; memcpy(p, &num_cals, sizeof(int)); p += sizeof(int); sprintf(p, "NMi"); p += 3; memcpy(p, &cigar_code->distance, sizeof(int)); p += sizeof(int); assert(read->length == cigar_code_nt_length(cigar_code)); // create an alignment and insert it into the list alignment = alignment_new(); //read_id = malloc(read->length); size_t header_len = strlen(read->id); char *head_id = (char *) malloc(header_len + 1); get_to_first_blank(read->id, header_len, head_id); alignment_init_single_end(head_id, match_seq, match_qual, cal->strand, cal->chromosome_id - 1, cal->start - 1, new_cigar_code_string(cigar_code), cigar_code_get_num_ops(cigar_code), norm_score * 254, 1, (num_cals > 1), optional_fields_length, optional_fields, alignment); array_list_insert(alignment, alignment_list); LOG_DEBUG_F("creating alignment (bs_id = %i)...\n", bs_id); //alignment_print(alignment); } } // free the cal list, and update the mapping list with the alignment list array_list_free(cal_list, (void *) cal_free); mapping_lists[read_index] = alignment_list; } } // go to the next stage return BS_POST_PAIR_STAGE; }
int sa_bam_writer(void *data) { sa_wf_batch_t *wf_batch = (sa_wf_batch_t *) data; sa_mapping_batch_t *mapping_batch = (sa_mapping_batch_t *) wf_batch->mapping_batch; if (mapping_batch == NULL) { printf("bam_writer1: error, NULL mapping batch\n"); return 0; } // for (int i = 0; i < NUM_COUNTERS; i++) { // counters[i] += mapping_batch->counters[i]; // } #ifdef _TIMING for (int i = 0; i < NUM_TIMING; i++) { func_times[i] += mapping_batch->func_times[i]; } #endif int flag, len; char *sequence, *quality; fastq_read_t *read; array_list_t *read_list = mapping_batch->fq_reads; bam1_t *bam1; alignment_t *alig; array_list_t *mapping_list; bam_file_t *out_file = wf_batch->writer_input->bam_file; sa_genome3_t *genome = wf_batch->sa_index->genome; size_t num_reads, num_mappings, num_mate_mappings; num_reads = mapping_batch->num_reads; for (size_t i = 0; i < num_reads; i++) { read = (fastq_read_t *) array_list_get(i, read_list); mapping_list = mapping_batch->mapping_lists[i]; num_mappings = array_list_size(mapping_list); num_total_mappings += num_mappings; #ifdef _VERBOSE if (num_mappings > 1) { num_dup_reads++; num_total_dup_reads += num_mappings; } #endif if (num_mappings > 0) { num_mapped_reads++; if (num_mappings > 1) { num_multihit_reads++; } for (size_t j = 0; j < num_mappings; j++) { alig = (alignment_t *) array_list_get(j, mapping_list); // update alignment if (num_mappings > 1) { alig->map_quality = 0; } else { alig->map_quality = alig->mapq; } bam1 = convert_to_bam(alig, 33); bam_fwrite(bam1, out_file); bam_destroy1(bam1); alignment_free(alig); } } else { num_unmapped_reads++; if (read->adapter) { // sequences and cigar len = read->length + abs(read->adapter_length); sequence = (char *) malloc(len + 1); quality = (char *) malloc(len + 1); if (read->adapter_length < 0) { strcpy(quality, read->adapter_quality); strcat(quality, read->quality); } else { strcpy(quality, read->quality); strcat(quality, read->adapter_quality); } if ((read->adapter_strand == 0 && read->adapter_length < 0) || (read->adapter_strand == 1 && read->adapter_length > 0)) { strcpy(sequence, read->adapter); strcat(sequence, read->sequence); } else { strcpy(sequence, read->sequence); strcat(sequence, read->adapter); } sequence[len] = 0; quality[len] = 0; } else { // sequences sequence = read->sequence; quality = read->quality; } alig = alignment_new(); alignment_init_single_end(strdup(read->id), sequence, quality, 0, -1, -1, /*strdup(aux)*/"", 0, 0, 0, 0, 0, NULL, alig); bam1 = convert_to_bam(alig, 33); bam_fwrite(bam1, out_file); // free memory bam_destroy1(bam1); alig->sequence = NULL; alig->quality = NULL; alig->cigar = NULL; alignment_free(alig); if (read->adapter) { free(sequence); free(quality); } } array_list_free(mapping_list, (void *) NULL); } // free memory sa_mapping_batch_free(mapping_batch); if (wf_batch) sa_wf_batch_free(wf_batch); return 0; }
int main (int argc, char **argv) { Table *tab; int ii, jj; Alignment *sol; int nsol = 1; int done; if (argc < 3) errx (EXIT_FAILURE, "please provide two strings on the command line, for example 'vbr yog'"); tab = table_new (argv[1], argv[2]); printf ("input source: %s\n" "input destination: %s\n", argv[1], argv[2]); /*************************************************** * propagate */ for (ii = 1; ii <= tab->srclen; ++ii) { for (jj = 1; jj <= tab->dstlen; ++jj) { int best; char * act; tab->state[ii][jj].v_ins = tab->state[ii][jj-1].v_best + GAP_COST; best = tab->state[ii][jj].v_ins; tab->state[ii][jj].v_del = tab->state[ii-1][jj].v_best + GAP_COST; if (best < tab->state[ii][jj].v_del) best = tab->state[ii][jj].v_del; tab->state[ii][jj].v_match = tab->state[ii-1][jj-1].v_best + match_cost (tab->src[ii-1], /* use ii-1 because of the extra "gap" at the beginning */ tab->dst[jj-1]); if (best < tab->state[ii][jj].v_match) best = tab->state[ii][jj].v_match; act = tab->state[ii][jj].act; if (best == tab->state[ii][jj].v_match) *(act++) = 'm'; if (best == tab->state[ii][jj].v_del) *(act++) = 'd'; if (best == tab->state[ii][jj].v_ins) *(act++) = 'i'; *(act++) = '\0'; tab->state[ii][jj].v_best = best; } } /*************************************************** * print */ printf ("\n _"); for (ii = 1; ii <= tab->dstlen; ++ii) printf (" %c", tab->dst[ii-1]); printf ("\n\n"); for (ii = 0; ii <= tab->srclen; ++ii) { printf ("%c", ii == 0 ? '_' : tab->src[ii-1]); for (jj = 0; jj <= tab->dstlen; ++jj) printf (" % 4d", tab->state[ii][jj].v_best); printf ("\n "); for (jj = 0; jj <= tab->dstlen; ++jj) printf (" %4s", tab->state[ii][jj].act); printf ("\n\n"); } /*************************************************** * trace back */ sol = alignment_new (tab); for (done = 0; done != 1; /**/) { int ncheck; done = 1; /* check for branches, but may end up appending duplicates which should not be checked this time around... so remember the old size in ncheck */ ncheck = nsol; for (ii = 0; ii < ncheck; ++ii) { /* skip finished alignments */ if (sol[ii].isrc == 0 && sol[ii].idst == 0) continue; /* check whether we have a 2nd and a 3rd action */ for (jj = 1; jj <= 2; ++jj) { /* no more actions when we hit the '\0' of the action string */ if (tab->state[sol[ii].isrc][sol[ii].idst].act[jj] == '\0') break; /* append a duplicate of sol[ii] that uses jj as iact */ if (NULL == (sol = realloc (sol, (nsol + 1) * sizeof *sol))) err (EXIT_FAILURE, "realloc sol"); alignment_dup (sol + nsol, sol + ii, jj); ++nsol; } } /* update all alignment branches */ for (ii = 0; ii < nsol; ++ii) { char act; /* skip finished alignments */ if (sol[ii].isrc == 0 && sol[ii].idst == 0) continue; /* perform next action in the backtrace */ done = 0; act = tab->state[sol[ii].isrc][sol[ii].idst].act[sol[ii].iact]; if ('m' == act) { *(--sol[ii].src) = tab->src[--sol[ii].isrc]; *(--sol[ii].dst) = tab->dst[--sol[ii].idst]; } else if ('i' == act) { *(--sol[ii].src) = '_'; *(--sol[ii].dst) = tab->dst[--sol[ii].idst]; } else if ('d' == act) { *(--sol[ii].src) = tab->src[--sol[ii].isrc]; *(--sol[ii].dst) = '_'; } else errx (EXIT_FAILURE, "BUG: action %d solution %d isrc %d idst %d iact %d", (int) act, ii, sol[ii].isrc, sol[ii].idst, sol[ii].iact); /* reset iact to zero (only becomes non-zero in the branch detection) */ sol[ii].iact = 0; } } /* print all optimal alignments */ printf ("backtrace:\n\n"); for (ii = 0; ii < nsol; ++ii) printf ("output source: %s\n" "output destination: %s\n\n", sol[ii].src, sol[ii].dst); /*************************************************** * clean up after ourselves */ for (ii = 0; ii < nsol; ++ii) { free (sol[ii].srcbuf); free (sol[ii].dstbuf); } free (sol); table_delete (tab); return 0; }
void batch_writer2(batch_writer_input_t* input) { printf("START: batch_writer (%i): START, for file %s\n", omp_get_thread_num(), input->match_filename); bam1_t *bam1; bam_header_t *bam_header; bam_file_t *bam_file; alignment_t *alig; char* match_filename = input->match_filename; // char* splice_filename = input->splice_filename; list_t *write_list = input->list_p; array_list_t *array_list; list_item_t *item = NULL; aligner_batch_t *batch = NULL; fastq_batch_t *fq_batch = NULL; FILE* fd; static char aux[10]; size_t read_len; bam_header = bam_header_new(HUMAN, NCBI37, input->header_filename); bam_file = bam_fopen_mode(match_filename, bam_header, "w"); bam_fwrite_header(bam_header, bam_file); size_t num_reads = 0, num_items = 0, total_mappings = 0; // main loop while ( (item = list_remove_item(write_list)) != NULL ) { // if (array_list == NULL) printf("batch_writer.c...\n"); batch = (aligner_batch_t *) item->data_p; fq_batch = batch->fq_batch; num_reads = batch->num_mapping_lists; for (size_t i = 0; i < num_reads; i++) { array_list = batch->mapping_lists[i]; // if (array_list == NULL) printf("READ %d, writer, list is NULL\n", i); // printf("----> list == NULL ? %d\n", (array_list == NULL)); num_items = (array_list == NULL ? 0 : array_list_size(array_list)); // printf("----> number of items = %d, num_items <= 0 ? %d\n", num_items, num_items <= 0); read_len = fq_batch->data_indices[i + 1] - fq_batch->data_indices[i] - 1; // mapped or not mapped ? if (num_items == 0) { //printf("\tWRITE : read %i (%d items): unmapped...\n", i, num_items); // calculating cigar sprintf(aux, "%luX", read_len); alig = alignment_new(); alignment_init_single_end(&(fq_batch->header[fq_batch->header_indices[i]])+1, &(fq_batch->seq[fq_batch->data_indices[i]]), &(fq_batch->quality[fq_batch->data_indices[i]]), 0, 0, 0, aux, 1, 255, 0, 0, alig); bam1 = convert_to_bam(alig, 33); bam_fwrite(bam1, bam_file); bam_destroy1(bam1); // some cosmetic stuff before freeing the alignment, // (in order to not free twice some fields) alig->query_name = NULL; alig->sequence = NULL; alig->quality = NULL; alig->cigar = NULL; alignment_free(alig); // printf("\tWRITE : read %i (%d items): unmapped...done !!\n", i, num_items); } else { // printf("\tWRITE : read %d (%d items): mapped...\n", i, num_items); for (size_t j = 0; j < num_items; j++) { alig = (alignment_t *) array_list_get(j, array_list); if (alig != NULL) { bam1 = convert_to_bam(alig, 33); bam_fwrite(bam1, bam_file); bam_destroy1(bam1); alignment_free(alig); } } // printf("\tWRITE : read %d (%d items): mapped...done !!\n", i, num_items); } if (array_list != NULL) array_list_free(array_list, NULL); } if (batch != NULL) aligner_batch_free(batch); if (item != NULL) list_item_free(item); if (time_on) { timing_stop(BATCH_WRITER, 0, timing_p); } } // end of batch loop bam_fclose(bam_file); printf("END: batch_writer (total mappings %lu)\n", total_mappings); }