static void allpairs_compare_SWALIGN( const char *name1, const char *data1, int size1, const char *name2, const char *data2, int size2 ) { char *stra = strdup(data1); char *strb = strdup(data2); stra[size1-1] = 0; strb[size2-1] = 0; struct matrix *m = matrix_create(size1-1,size2-1); struct alignment *aln = align_smith_waterman(m,stra,strb); pthread_mutex_lock(&mutex); printf("> %s %s\n",name1,name2); alignment_print(stdout,stra,strb,aln); pthread_mutex_unlock(&mutex); free(stra); free(strb); matrix_delete(m); alignment_delete(aln); }
int main(int argc, char *argv[]) { if (argc != 4) { printf("Error.\n"); printf("Usage: %s index-dirname seq num-errors\n", argv[0]); exit(-1); } char *index_dirname = argv[1]; char *seq = argv[2]; int num_errors = atoi(argv[3]); // initializations initReplaceTable(); bwt_optarg_t *bwt_optarg = bwt_optarg_new(num_errors, 1, 10000, 1, 0, 0); bwt_index_t *bwt_index = bwt_index_new(index_dirname); // seq { array_list_t *mapping_list = array_list_new(100000, 1.25f, COLLECTION_MODE_SYNCHRONIZED); size_t num_mappings; num_mappings = bwt_map_seq(seq, bwt_optarg, bwt_index, mapping_list); printf("seq: %s\n", seq); printf("num_mappings = %lu\n", num_mappings); for (size_t i = 0; i < num_mappings; i++) { printf("%lu\t---------------------\n", i); alignment_print(array_list_get(i, mapping_list)); } } // seed { array_list_t *mapping_list = array_list_new(100000, 1.25f, COLLECTION_MODE_SYNCHRONIZED); size_t num_mappings; size_t len = strlen(seq); char *code_seq = (char *) calloc(len + 10, sizeof(char)); replaceBases(seq, code_seq, len); num_mappings = bwt_map_exact_seeds_seq(code_seq, 18, 16, bwt_optarg, bwt_index, mapping_list); region_t *region; for (size_t i = 0; i < num_mappings; i++) { region = array_list_get(i, mapping_list); printf("Region: chr = %lu, strand = %d, start = %lu, end = %lu\n", region->chromosome_id, region->strand, region->start, region->end); } } printf("Done.\n"); }
ERROR_CODE compare_bams_qual(const char* bamPath0, const char* bamPath1, const int cycles) { bam_file_t* bamFile0; bam_file_t* bamFile1; bam_batch_t* bamBatch0; bam_batch_t* bamBatch1; bam1_t* bamAlig; alignment_t* aligAlig0; alignment_t* aligAlig1; int diff, i; printf("Opening BAM 1 form \"%s\" ...\n", bamPath0); printf("Opening BAM 2 form \"%s\" ...\n", bamPath1); bamFile0 = bam_fopen((char *)bamPath0); bamFile1 = bam_fopen((char *)bamPath1); printf("BAM opened!...\n"); printf("\n\n---------------------------------------------------------\n"); bamBatch0 = bam_batch_new(1, MULTIPLE_CHROM_BATCH); bamBatch1 = bam_batch_new(1, MULTIPLE_CHROM_BATCH); bam_fread_max_size(bamBatch0, 1, 1, bamFile0); bam_fread_max_size(bamBatch1, 1, 1, bamFile1); //Obtain first alignment from first bam bamAlig = bamBatch0->alignments_p[0]; aligAlig0 = alignment_new_by_bam(bamAlig, 1); alignment_print(aligAlig0); //Obtain first alignment from second bam bamAlig = bamBatch1->alignments_p[0]; aligAlig1 = alignment_new_by_bam(bamAlig, 1); alignment_print(aligAlig1); //Obtain quality diffs printf("Diffs: \nNuc\tQ1\tQ2\n"); diff=0; for(i=0; i < 76; i++) { printf("%c \t%d ", aligAlig0->sequence[i], aligAlig0->quality[i]); if(aligAlig0->quality[i] == aligAlig1->quality[i]) { printf("====\t%d\n", aligAlig1->quality[i]); } else { printf("\t%d\n", aligAlig1->quality[i]); } diff += abs(aligAlig1->quality[i] - aligAlig0->quality[i]); } printf("Total diff: %d\n", diff); printf("\n---------------------------------------------------------\n"); printf("Closing BAMs...\n"); bam_fclose(bamFile0); bam_fclose(bamFile1); bam_batch_free(bamBatch0, 1); bam_batch_free(bamBatch1, 1); printf("BAM closed.\n"); return NO_ERROR; }
int main(int argc, char ** argv) { FILE * input; struct seq *s1=0, *s2=0; char ori; char c; int fileindex; int del_input=0; while((c = getopt(argc, argv, "a:o:k:m:q:xd:vh")) != (char) -1) { switch (c) { case 'a': align_type = optarg; break; case 'o': output_format = optarg; break; case 'm': min_align = atoi(optarg); break; case 'q': min_qual = atof(optarg); break; case 'x': del_input = 1; break; case 'd': debug_flags_set(optarg); break; case 'v': cctools_version_print(stdout, argv[0]); exit(0); break; default: case 'h': show_help(argv[0]); exit(0); break; } } cctools_version_debug(D_DEBUG, argv[0]); fileindex = optind; if ((argc - optind) == 1) { input = fopen(argv[fileindex], "r"); if (!input) { fprintf(stderr, "sand_align_kernel: couldn't open %s: %s\n",argv[fileindex],strerror(errno)); exit(1); } } else { input = stdin; } struct cseq *c1, *c2; if(!strcmp(output_format,"ovl") || !strcmp(output_format, "ovl_new")) { overlap_write_begin(stdout); } // outer loop: read first sequence in comparison list while((c1=cseq_read(input))) { s1 = cseq_uncompress(c1); cseq_free(c1); // inner loop: read sequences until null (indicating end of list) // then continue again with outer loop. (two nulls to halt.) while((c2=cseq_read(input))) { s2 = cseq_uncompress(c2); cseq_free(c2); int dir = 0; int start1 = 0; int start2 = 0; char* tmp = strdup(s2->metadata); int metadata_valid = 0; char* token = strtok(tmp, " "); start2 = atoi(token); metadata_valid++; while((token = strtok(NULL, " "))) { dir = start1; start1 = start2; start2 = atoi(token); metadata_valid++; } if(metadata_valid>=1 && dir==-1) { seq_reverse_complement(s2); ori = 'I'; } else { ori = 'N'; } struct matrix *m = matrix_create(s1->num_bases,s2->num_bases); if(!m) { fprintf(stderr,"sand_align_kernel: out of memory when creating alignment matrix.\n"); exit(1); } struct alignment *aln; if(!strcmp(align_type,"sw")) { aln = align_smith_waterman(m,s1->data,s2->data); } else if(!strcmp(align_type,"ps")) { aln = align_prefix_suffix(m,s1->data,s2->data, min_align); } else if(!strcmp(align_type,"banded")) { if(metadata_valid<3) { fprintf(stderr,"sand_align_kernel: sequence %s did not indicate start positions for the banded alignment.\n",s2->name); exit(1); } /* The width of the band is proportional to the desired quality of the match. */ int k = 2 + min_qual * MIN(s1->num_bases,s2->num_bases) / 2.0; if(k<5) k = 5; aln = align_banded(m,s1->data, s2->data, start1, start2, k); } else { fprintf(stderr,"unknown alignment type: %s\n",align_type); exit(1); } aln->ori = ori; if(aln->quality <= min_qual) { if(!strcmp(output_format,"ovl")) { overlap_write_v5(stdout, aln, s1->name, s2->name); } else if(!strcmp(output_format, "ovl_new")) { overlap_write_v7(stdout, aln, s1->name, s2->name); } else if(!strcmp(output_format,"matrix")) { printf("*** %s alignment of sequences %s and %s (quality %lf):\n\n",align_type,s1->name,s2->name,aln->quality); matrix_print(m,s1->data,s2->data); } else if(!strcmp(output_format,"align")) { printf("*** %s alignment of sequences %s and %s (quality %lf):\n\n",align_type,s1->name,s2->name,aln->quality); alignment_print(stdout,s1->data,s2->data,aln); } else { printf("unknown output format '%s'\n",output_format); exit(1); } } matrix_delete(m); seq_free(s2); alignment_delete(aln); } seq_free(s1); } fclose(input); if(!strcmp(output_format,"ovl") || !strcmp(output_format, "ovl_new")) { overlap_write_end(stdout); } if ((argc - optind) == 1 && del_input == 1) { remove(argv[fileindex]); } return 0; }