Esempio n. 1
junos_strbuf_vsprintf(junos_strbuf_t *strbuf, const char *fmt, va_list ap)
	int status;

	if (! strbuf)
		return -1;

	assert(strbuf->string[strbuf->pos] == '\0');

	if (strbuf->pos >= strbuf->size)
		if (strbuf_resize(strbuf))
			return -1;

	status = vsnprintf(strbuf->string + strbuf->pos,
			strbuf->size - strbuf->pos, fmt, ap);

	if (status < 0)
		return status;

	if ((size_t)status >= strbuf->size - strbuf->pos) {
		return junos_strbuf_vsprintf(strbuf, fmt, ap);

	strbuf->pos += (size_t)status;
	return (ssize_t)strbuf->pos;
} /* junos_strbuf_vsprintf */
Esempio n. 2
/* strbuf_append_fmt_retry() can be used when the there is no known
 * upper bound for the output string. */
void strbuf_append_fmt_retry(strbuf_t *s, const char *fmt, ...)
    va_list arg;
    int fmt_len, try1;
    int empty_len;

    /* If the first attempt to append fails, resize the buffer appropriately
     * and try again */
    for (try1 = 0; ; try1++) {
        va_start(arg, fmt);
        /* Append the new formatted string */
        /* fmt_len is the length of the string required, excluding the
         * trailing NULL */
        empty_len = strbuf_empty_length(s);
        /* Add 1 since there is also space to store the terminating NULL. */
        fmt_len = vsnprintf(s->buf + s->length, empty_len + 1, fmt, arg);

        if (fmt_len <= empty_len)
            break;  /* SUCCESS */
        if (try1 > 0)
            die("BUG: length of formatted string changed");

        strbuf_resize(s, s->length + fmt_len);

    s->length += fmt_len;
Esempio n. 3
static void strbuf_append(StringBuffer *strbuf, wchar_t c) {
  if (strbuf->length == strbuf->capacity) {
    strbuf_resize(strbuf, strbuf->capacity * 2);
  strbuf->chars[strbuf->length] = c;
  strbuf->chars[strbuf->length] = L'\0';
Esempio n. 4
File: strbuf.c Progetto: whatot/ma_c
void strbuf_append_string(strbuf_t *s, const char *str) {
	int space, i;

	space = strbuf_empty_length(s);

	for (i = 0; str[i]; i++) {
		if (space < 1) {
			strbuf_resize(s, s->length + 1);
			space = strbuf_empty_length(s);

		s->buf[s->length] = str[i];
Esempio n. 5
//outputs fastQ unless add_greedy_bases_for_better_bwt_compression==true, in which case is for 1000genomes, and they want fastA
inline void error_correct_file_against_graph(char* fastq_file, char quality_cutoff, char ascii_qual_offset,
        dBGraphEc *db_graph, char* outfile,
        uint64_t *bases_modified_count_array,//distribution across reads; how many of the read_length bases are fixed
        uint64_t *posn_modified_count_array,//where in the read are we making corrections?
        int bases_modified_count_array_size,
        int min_read_len,
        HandleLowQualUncorrectable policy,
        boolean add_greedy_bases_for_better_bwt_compression,
        int num_greedy_bases,
        boolean rev_comp_read_if_on_reverse_strand)
    int max_read_len = bases_modified_count_array_size - 2;
    int read_len_upper_bound = max_read_len + num_greedy_bases;
    int read_len_lower_bound = min_read_len + num_greedy_bases;
    int read_len_final = 0;

    //reset the stats arrays, we get stats per input file
    set_uint64_t_array(bases_modified_count_array,bases_modified_count_array_size, (uint64_t) 0);
    set_uint64_t_array(posn_modified_count_array, bases_modified_count_array_size, (uint64_t) 0);

    //set some variables, quality etc
    short kmer_size = db_graph->kmer_size;

    StrBuf* uncorrectedGoodQual_file = strbuf_create(outfile);
    StrBuf* uncorrectedLowQual_file = strbuf_create(outfile);
    StrBuf* corrected_file = strbuf_create(outfile);
    StrBuf* discarded_undefined_file = strbuf_create(outfile);
    StrBuf* discarded_uncorrectable_file = strbuf_create(outfile);
    StrBuf* discarded_shortread_file = strbuf_create(outfile);

    strbuf_append_str(uncorrectedGoodQual_file, ".printuncorrectedgoodqual");
    strbuf_append_str(uncorrectedLowQual_file, ".printuncorrectedlowqual");
    strbuf_append_str(corrected_file, ".printcorrected");
    strbuf_append_str(discarded_undefined_file, ".discardundefinedbase");
    strbuf_append_str(discarded_uncorrectable_file, ".discarduncorrectable");
    strbuf_append_str(discarded_shortread_file, ".discardshortread");

    FILE* uncorrectedGoodQual_fp = fopen(uncorrectedGoodQual_file->buff, "w");
    FILE* uncorrectedLowQual_fp = fopen(uncorrectedLowQual_file->buff, "w");
    FILE* corrected_fp = fopen(corrected_file->buff, "w");
    FILE* discarded_undefined_fp = fopen(discarded_undefined_file->buff, "w");
    FILE* discarded_uncorrectable_fp = fopen(discarded_uncorrectable_file->buff, "w");
    FILE* discarded_shortread_fp = fopen(discarded_shortread_file->buff, "w");

    char* suff1 = ".distrib_num_modified_bases";
    char* suff2 = ".distrib_posn_modified_bases";
    char* suff3 =".read_stats";
    char* stat1 = (char*) malloc(sizeof(char)*(strlen(outfile)+strlen(suff1)+1));
    char* stat2 = (char*) malloc(sizeof(char)*(strlen(outfile)+strlen(suff2)+1));
    char* stat3 = (char*) malloc(sizeof(char)*(strlen(outfile)+strlen(suff3)+1));

    if ( (stat1==NULL) || (stat2==NULL) || (stat3==NULL) )
        die("Unable to malloc FILENAME strings. Something badly wrong with your server\n");
    set_string_to_null(stat1, strlen(outfile)+strlen(suff1)+1);
    set_string_to_null(stat2, strlen(outfile)+strlen(suff2)+1);
    set_string_to_null(stat3, strlen(outfile)+strlen(suff3)+1);
    strcpy(stat1, outfile);
    strcat(stat1, suff1);
    strcat(stat2, outfile);
    strcat(stat2, suff2);
    strcat(stat3, outfile);
    strcat(stat3, suff3);

    FILE* out_stat1 = fopen(stat1, "w");
    FILE* out_stat2 = fopen(stat2, "w");
    FILE* out_stat3 = fopen(stat3, "w");
    if ( (out_stat1==NULL)|| (out_stat2==NULL) || (out_stat3==NULL) )
        die("Unable to open %s or %s or %s to write to - permissions issue?\n", stat1, stat2, stat3);

    SeqFile *sf = seq_file_open(fastq_file);
    if(sf == NULL)
        // Error opening file
        fprintf(stderr, "Error: cannot read seq file '%s'\n", fastq_file);
    char is_fastq = seq_has_quality_scores(sf);
    if (is_fastq==0)
        die("Error correction is only meant to work on FASTQ and this file: %s is not\n", fastq_file);

    StrBuf* buf_seq  = strbuf_new();
    StrBuf* buf_qual = strbuf_new();
    StrBuf* working_buf=strbuf_new();
    dBNodeEc* last_node_in_read;
    Orientation last_or_in_read;
    int num_original_reads=0, num_final_reads=0, num_corrected_reads=0, num_discarded_reads=0;
    int num_discarded_undefined = 0;
    int num_discarded_uncorrectable = 0;
    int num_discarded_short_read = 0;
    int num_print_uncorrected_lowqual = 0;
    int num_print_uncorrected_goodqual = 0;
    int num_print_corrected = 0;

    StrBuf* buf_dashes = strbuf_create("---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------");

        int count_corrected_bases=0;
        //NOTE - uses modified version fo Isaacs code - new func
        seq_read_all_bases_and_quals(sf, buf_seq, buf_qual);
        StrBuf* buf_seq_debug  = strbuf_clone(buf_seq);
        StrBuf* buf_seq_origin  = strbuf_clone(buf_seq);
        StrBuf* buf_seq_fixed = strbuf_clone(buf_dashes);
        strbuf_resize(buf_seq_fixed, strbuf_len(buf_seq)+1);

        int read_len = seq_get_length(sf);
        int num_kmers = read_len-kmer_size+1;
        int quality_good[read_len];
        set_int_array(quality_good, read_len, 1);

        int first_good=0;//index of first kmer in graph

        //populate the qual array showing which bases have qual >threshold
        //if all quals are high, will Print uncorrected
        //else, if all kmers NOT in graph, will discard or print uncorrected depending on policy
        //else print corrected.
        Orientation strand_first_good_kmer;
        ReadCorrectionDecison dec =
            get_first_good_kmer_and_populate_qual_array(seq_get_read_name(sf), buf_seq, buf_qual, num_kmers, read_len,
                    quality_good, quality_cutoff,
                    &first_good, &strand_first_good_kmer,
                    db_graph, policy, rev_comp_read_if_on_reverse_strand);

        //*** start of local functions

        //if going right, keep going to right hand end. if going left, keep going to left hand end
        boolean condition(WhichEndOfKmer direction, int pos)
            if ((direction==Right) && (pos<num_kmers))
                return true;
            if ((direction==Left) && (pos>=0))
                return true;
            return false;
        boolean kmer_is_in_graph(char* kmer, dBGraphEc* db_g)
            BinaryKmer curr_kmer;
            if (seq_to_binary_kmer(kmer, kmer_size, &curr_kmer)==NULL)
                //is an N
                return false;

            BinaryKmer temp_key;
            element_ec_get_key(&curr_kmer, kmer_size, &temp_key);
            dBNodeEc* node = hash_table_ec_find(&temp_key, db_g);
            if (node==NULL)
                return false;
                return true;
        int increment(int i, WhichEndOfKmer direction)
            if (direction==Right)
                return i+1;
                return i-1;
        char working_str[kmer_size+1];

        // start_pos is in kmer units
        boolean check_bases_to_end_of_read(int start_pos, ReadCorrectionDecison* decision,
                                           WhichEndOfKmer direction,
                                           int* num_corrected_bases_in_this_read_debug)

            boolean any_correction_done=false;
            if ((start_pos<0) || (start_pos>=num_kmers))
                return any_correction_done;
            int pos=start_pos;
            int offset=0;
            if (direction==Right)
                offset= kmer_size-1;
            char local_kmer[kmer_size+1];

            while ( (*decision==PrintCorrected) && (condition(direction,pos)==true) )
                strncpy(local_kmer, buf_seq->buff+pos, kmer_size);

                if (quality_good[pos+offset]==1)
                    //nothing to do
                else if (kmer_is_in_graph(local_kmer, db_graph)==true)
                    //nothing to do - don't correct if kmer is in graph
                else//kmer not in graph and quality bad
                    boolean fixed = fix_end_if_unambiguous(direction, buf_seq, buf_seq_fixed, buf_qual, quality_cutoff, pos,
                                                           working_buf, working_str, db_graph);
                    if ( (policy==DiscardReadIfLowQualBaseUnCorrectable)
                            (fixed==false) )
                    else if (fixed==true)
                        if (offset+pos<bases_modified_count_array_size)
                pos = increment(pos, direction);
            return any_correction_done;