Ejemplo n.º 1
0
static void allpairs_compare_SWALIGN( const char *name1, const char *data1, int size1, const char *name2, const char *data2, int size2 )
{
	char *stra = strdup(data1);
	char *strb = strdup(data2);

	stra[size1-1] = 0;
	strb[size2-1] = 0;

	struct matrix *m = matrix_create(size1-1,size2-1);
	struct alignment *aln = align_smith_waterman(m,stra,strb);

	pthread_mutex_lock(&mutex);
	printf("> %s %s\n",name1,name2);
	alignment_print(stdout,stra,strb,aln);
	pthread_mutex_unlock(&mutex);

	free(stra);
	free(strb);
	matrix_delete(m);
	alignment_delete(aln);
}
Ejemplo n.º 2
0
int main(int argc, char *argv[]) {

  if (argc != 4) {
    printf("Error.\n");
    printf("Usage: %s index-dirname seq num-errors\n", argv[0]);
    exit(-1);
  }

  char *index_dirname = argv[1];
  char *seq = argv[2];
  int num_errors = atoi(argv[3]);

  // initializations
  initReplaceTable();

  bwt_optarg_t *bwt_optarg = bwt_optarg_new(num_errors, 1, 10000, 1, 0, 0);
  bwt_index_t *bwt_index = bwt_index_new(index_dirname);

  // seq
  {
    array_list_t *mapping_list = array_list_new(100000, 1.25f, 
						COLLECTION_MODE_SYNCHRONIZED);
    
    size_t num_mappings;
    
    num_mappings = bwt_map_seq(seq, bwt_optarg, 
			       bwt_index, mapping_list);
    printf("seq: %s\n", seq);
    printf("num_mappings = %lu\n", num_mappings);
    for (size_t i = 0; i < num_mappings; i++) {
      printf("%lu\t---------------------\n", i);
      alignment_print(array_list_get(i, mapping_list));
    }
  }


  // seed
  {
    array_list_t *mapping_list = array_list_new(100000, 1.25f, 
						COLLECTION_MODE_SYNCHRONIZED);
    
    size_t num_mappings;
    
    size_t len = strlen(seq);
    char *code_seq = (char *) calloc(len + 10, sizeof(char));
    replaceBases(seq, code_seq, len);
    
    num_mappings = bwt_map_exact_seeds_seq(code_seq, 18, 16,
					   bwt_optarg, bwt_index, mapping_list);
    
    region_t *region;
    for (size_t i = 0; i < num_mappings; i++) {
      region = array_list_get(i, mapping_list);
      printf("Region: chr = %lu, strand = %d, start = %lu, end = %lu\n", 
	     region->chromosome_id, region->strand, region->start, region->end);
    }
  }

  printf("Done.\n");

}
Ejemplo n.º 3
0
ERROR_CODE
compare_bams_qual(const char* bamPath0, const char* bamPath1, const int cycles)
{
	bam_file_t* bamFile0;
	bam_file_t* bamFile1;
	bam_batch_t* bamBatch0;
	bam_batch_t* bamBatch1;
	bam1_t* bamAlig;
	alignment_t* aligAlig0;
	alignment_t* aligAlig1;
	int diff, i;

	printf("Opening BAM 1 form \"%s\" ...\n", bamPath0);
	printf("Opening BAM 2 form \"%s\" ...\n", bamPath1);

	bamFile0 = bam_fopen((char *)bamPath0);
	bamFile1 = bam_fopen((char *)bamPath1);

	printf("BAM opened!...\n");


	printf("\n\n---------------------------------------------------------\n");

	bamBatch0 = bam_batch_new(1, MULTIPLE_CHROM_BATCH);
	bamBatch1 = bam_batch_new(1, MULTIPLE_CHROM_BATCH);
	bam_fread_max_size(bamBatch0, 1, 1, bamFile0);
	bam_fread_max_size(bamBatch1, 1, 1, bamFile1);

	//Obtain first alignment from first bam
	bamAlig = bamBatch0->alignments_p[0];
	aligAlig0 = alignment_new_by_bam(bamAlig, 1);
	alignment_print(aligAlig0);

	//Obtain first alignment from second bam
	bamAlig = bamBatch1->alignments_p[0];
	aligAlig1 = alignment_new_by_bam(bamAlig, 1);
	alignment_print(aligAlig1);

	//Obtain quality diffs
	printf("Diffs: \nNuc\tQ1\tQ2\n");
	diff=0;
	for(i=0; i < 76; i++)
	{
		printf("%c \t%d ", aligAlig0->sequence[i], aligAlig0->quality[i]);
		if(aligAlig0->quality[i] == aligAlig1->quality[i])
		{
			printf("====\t%d\n", aligAlig1->quality[i]);
		}
		else
		{
			printf("\t%d\n", aligAlig1->quality[i]);
		}


		diff += abs(aligAlig1->quality[i] - aligAlig0->quality[i]);
	}
	printf("Total diff: %d\n", diff);

	printf("\n---------------------------------------------------------\n");
	printf("Closing BAMs...\n");
	bam_fclose(bamFile0);
	bam_fclose(bamFile1);
	bam_batch_free(bamBatch0, 1);
	bam_batch_free(bamBatch1, 1);

	printf("BAM closed.\n");

	return NO_ERROR;
}
Ejemplo n.º 4
0
int main(int argc, char ** argv)
{
    FILE * input;
    struct seq *s1=0, *s2=0;
    char ori;
    char c;
    int fileindex;
    int del_input=0;

    while((c = getopt(argc, argv, "a:o:k:m:q:xd:vh")) != (char) -1) {
        switch (c) {
        case 'a':
            align_type = optarg;
            break;
        case 'o':
            output_format = optarg;
            break;
        case 'm':
            min_align = atoi(optarg);
            break;
        case 'q':
            min_qual = atof(optarg);
            break;
        case 'x':
            del_input = 1;
            break;
        case 'd':
            debug_flags_set(optarg);
            break;
        case 'v':
            cctools_version_print(stdout, argv[0]);
            exit(0);
            break;
        default:
        case 'h':
            show_help(argv[0]);
            exit(0);
            break;
        }
    }

    cctools_version_debug(D_DEBUG, argv[0]);

    fileindex = optind;
    if ((argc - optind) == 1) {
        input = fopen(argv[fileindex], "r");
        if (!input) {
            fprintf(stderr, "sand_align_kernel: couldn't open %s: %s\n",argv[fileindex],strerror(errno));
            exit(1);
        }
    } else {
        input = stdin;
    }

    struct cseq *c1, *c2;

    if(!strcmp(output_format,"ovl") || !strcmp(output_format, "ovl_new")) {
        overlap_write_begin(stdout);
    }

    // outer loop: read first sequence in comparison list

    while((c1=cseq_read(input))) {
        s1 = cseq_uncompress(c1);
        cseq_free(c1);

        // inner loop: read sequences until null (indicating end of list)
        // then continue again with outer loop.  (two nulls to halt.)

        while((c2=cseq_read(input))) {
            s2 = cseq_uncompress(c2);
            cseq_free(c2);

            int dir = 0;
            int start1 = 0;
            int start2 = 0;
            char* tmp = strdup(s2->metadata);
            int metadata_valid = 0;

            char* token = strtok(tmp, "	 ");
            start2 = atoi(token);
            metadata_valid++;
            while((token = strtok(NULL, "	 ")))
            {
                dir = start1;
                start1 = start2;
                start2 = atoi(token);
                metadata_valid++;
            }

            if(metadata_valid>=1 && dir==-1) {
                seq_reverse_complement(s2);
                ori = 'I';
            } else {
                ori = 'N';
            }

            struct matrix *m = matrix_create(s1->num_bases,s2->num_bases);
            if(!m) {
                fprintf(stderr,"sand_align_kernel: out of memory when creating alignment matrix.\n");
                exit(1);
            }

            struct alignment *aln;

            if(!strcmp(align_type,"sw")) {

                aln = align_smith_waterman(m,s1->data,s2->data);

            } else if(!strcmp(align_type,"ps")) {

                aln = align_prefix_suffix(m,s1->data,s2->data, min_align);

            } else if(!strcmp(align_type,"banded")) {
                if(metadata_valid<3) {
                    fprintf(stderr,"sand_align_kernel: sequence %s did not indicate start positions for the banded alignment.\n",s2->name);
                    exit(1);
                }

                /* The width of the band is proportional to the desired quality of the match. */

                int k = 2 + min_qual * MIN(s1->num_bases,s2->num_bases) / 2.0;
                if(k<5) k = 5;

                aln = align_banded(m,s1->data, s2->data, start1, start2, k);
            } else {
                fprintf(stderr,"unknown alignment type: %s\n",align_type);
                exit(1);
            }

            aln->ori = ori;

            if(aln->quality <= min_qual) {
                if(!strcmp(output_format,"ovl")) {
                    overlap_write_v5(stdout, aln, s1->name, s2->name);
                } else if(!strcmp(output_format, "ovl_new")) {
                    overlap_write_v7(stdout, aln, s1->name, s2->name);
                } else if(!strcmp(output_format,"matrix")) {
                    printf("*** %s alignment of sequences %s and %s (quality %lf):\n\n",align_type,s1->name,s2->name,aln->quality);
                    matrix_print(m,s1->data,s2->data);
                } else if(!strcmp(output_format,"align")) {
                    printf("*** %s alignment of sequences %s and %s (quality %lf):\n\n",align_type,s1->name,s2->name,aln->quality);
                    alignment_print(stdout,s1->data,s2->data,aln);
                } else {
                    printf("unknown output format '%s'\n",output_format);
                    exit(1);
                }
            }

            matrix_delete(m);
            seq_free(s2);
            alignment_delete(aln);
        }
        seq_free(s1);
    }

    fclose(input);

    if(!strcmp(output_format,"ovl") || !strcmp(output_format, "ovl_new")) {
        overlap_write_end(stdout);
    }

    if ((argc - optind) == 1 && del_input == 1)
    {
        remove(argv[fileindex]);
    }
    return 0;
}