Beispiel #1
0
array_list_t *indel_filter(array_list_t *input_records, array_list_t *failed, variant_stats_t **input_stats, char *filter_name, void *f_args) {
    assert(input_records);
    assert(failed);
    
    array_list_t *passed = array_list_new(input_records->size + 1, 1, COLLECTION_MODE_ASYNCHRONIZED);
    size_t filter_name_len = strlen(filter_name);

    int include_indels = ((indel_filter_args*)f_args)->include_indels;

    LOG_DEBUG_F("indel_filter (preserve indels = %d) over %zu records\n", include_indels, input_records->size);
    vcf_record_t *record;
    variant_stats_t *variant_stats;
    for (int i = 0; i < input_records->size; i++) {
        record = input_records->items[i];
        variant_stats = input_stats[i];
        
        if (variant_stats->is_indel) {
            if (include_indels) {
                array_list_insert(record, passed);
            } else {
                annotate_failed_record(filter_name, filter_name_len, record);
                array_list_insert(record, failed);
            }
        } else {
            if (include_indels) {
                annotate_failed_record(filter_name, filter_name_len, record);
                array_list_insert(record, failed);
            } else {
                array_list_insert(record, passed);
            }
        }
    }

    return passed;
}
Beispiel #2
0
array_list_t *snp_filter(array_list_t *input_records, array_list_t *failed, variant_stats_t **input_stats, char *filter_name, void *f_args) {
    assert(input_records);
    assert(failed);
    
    array_list_t *passed = array_list_new(input_records->size + 1, 1, COLLECTION_MODE_ASYNCHRONIZED);
    size_t filter_name_len = strlen(filter_name);

    int include_snps = ((snp_filter_args*)f_args)->include_snps;

    LOG_DEBUG_F("snp_filter (preserve SNPs = %d) over %zu records\n", include_snps, input_records->size);
    vcf_record_t *record;
    for (int i = 0; i < input_records->size; i++) {
        record = input_records->items[i];
        if (record->id_len == 1 && strncmp(".", record->id, 1) == 0) {
            if (include_snps) {
                annotate_failed_record(filter_name, filter_name_len, record);
                array_list_insert(record, failed);
            } else {
                array_list_insert(record, passed);
            }
        } else {
            if (include_snps) {
                array_list_insert(record, passed);
            } else {
                annotate_failed_record(filter_name, filter_name_len, record);
                array_list_insert(record, failed);
            }
        }
    }

    return passed;
}
Beispiel #3
0
array_list_t* mendelian_errors_filter(array_list_t* input_records, array_list_t *failed, variant_stats_t **input_stats, char *filter_name, void* args) {
    assert(input_records);
    assert(failed);
    
    array_list_t *passed = array_list_new(input_records->size + 1, 1, COLLECTION_MODE_ASYNCHRONIZED);
    size_t filter_name_len = strlen(filter_name);

    int max_errors = ((mendelian_errors_filter_args*) args)->max_mendelian_errors;
    float allele_count;

    variant_stats_t *variant_stats;
    // The stats returned by get_variants_stats are related to a record in the same
    // position of the input_records list, so when a variant_stats_t fulfills the condition,
    // it means the related vcf_record_t passes the filter
    vcf_record_t *record;
    for (int i = 0; i < input_records->size; i++) {
        record = input_records->items[i];
        variant_stats = input_stats[i];
        allele_count = 0;
        
        if (variant_stats->mendelian_errors <= max_errors) {
            array_list_insert(record, passed);
        } else {
            annotate_failed_record(filter_name, filter_name_len, record);
            array_list_insert(record, failed);
        }
    }
    
    return passed;
}
Beispiel #4
0
array_list_t* maf_filter(array_list_t* input_records, array_list_t *failed, variant_stats_t **input_stats, char *filter_name, void* args) {
    assert(input_records);
    assert(failed);
    
    array_list_t *passed = array_list_new(input_records->size + 1, 1, COLLECTION_MODE_ASYNCHRONIZED);
    size_t filter_name_len = strlen(filter_name);

    float min_maf = ((maf_filter_args*) args)->min_maf;
    float record_maf = 1.0;

    variant_stats_t *variant_stats;
    // The stats returned by get_variants_stats are related to a record in the same
    // position of the input_records list, so when a variant_stats_t fulfills the condition,
    // it means the related vcf_record_t passes the filter
    vcf_record_t *record;
    for (int i = 0; i < input_records->size; i++) {
        record = input_records->items[i];
        variant_stats = input_stats[i];
        
        record_maf = 1.0;
        for (int j = 0; j < variant_stats->num_alleles; j++) {
            record_maf = fmin(record_maf, variant_stats->alleles_freq[j]);
        }
        
        if (record_maf >= min_maf) {
            array_list_insert(record, passed);
        } else {
            annotate_failed_record(filter_name, filter_name_len, record);
            array_list_insert(record, failed);
        }
    }
    
    return passed;
}
Beispiel #5
0
array_list_t *inheritance_pattern_filter(array_list_t *input_records, array_list_t *failed, variant_stats_t **input_stats, char *filter_name, void *f_args) {
    assert(input_records);
    assert(failed);
    
    array_list_t *passed = array_list_new(input_records->size + 1, 1, COLLECTION_MODE_ASYNCHRONIZED);
    size_t filter_name_len = strlen(filter_name);

    enum inheritance_pattern pattern = ((inheritance_pattern_filter_args*)f_args)->pattern;
    float min_following_pattern = ((inheritance_pattern_filter_args*)f_args)->min_following_pattern;
    
    if (pattern == DOMINANT) {
        LOG_DEBUG_F("inheritance_pattern_filter (dominant in %.2f% of samples) over %zu records\n", 
                    min_following_pattern * 100, input_records->size);
    } else {
        LOG_DEBUG_F("inheritance_pattern_filter (recessive in %.2f% of samples) over %zu records\n", 
                    min_following_pattern * 100, input_records->size);
    }
    
    vcf_record_t *record;
    variant_stats_t *stats;
    for (int i = 0; i < input_records->size; i++) {
        record = input_records->items[i];
        stats = input_stats[i];
        
        if (pattern == DOMINANT) {
            if (stats->cases_percent_dominant >= min_following_pattern &&
                stats->controls_percent_dominant >= min_following_pattern) {
                array_list_insert(record, passed);
            } else {
                annotate_failed_record(filter_name, filter_name_len, record);
                array_list_insert(record, failed);
            }
        } else if (pattern == RECESSIVE) {
            if (stats->cases_percent_recessive >= min_following_pattern &&
                   stats->controls_percent_recessive >= min_following_pattern) {
                array_list_insert(record, passed);
            } else {
                annotate_failed_record(filter_name, filter_name_len, record);
                array_list_insert(record, failed);
            }
        }
    }

    return passed;
}
Beispiel #6
0
array_list_t* coverage_filter(array_list_t* input_records, array_list_t *failed, variant_stats_t **input_stats, char *filter_name, void* f_args) {
    assert(input_records);
    assert(failed);
    
    array_list_t *passed = array_list_new(input_records->size + 1, 1, COLLECTION_MODE_ASYNCHRONIZED);
    size_t filter_name_len = strlen(filter_name);

    int min_coverage = ((coverage_filter_args*)f_args)->min_coverage;

    LOG_DEBUG_F("coverage_filter (min coverage = %d) over %zu records\n", min_coverage, input_records->size);
    char *aux_buffer = (char*) calloc (128, sizeof(char));
    vcf_record_t *record;
    for (int i = 0; i < input_records->size; i++) {
        record = input_records->items[i];
        
        if (record->info_len > strlen(aux_buffer)) {
            aux_buffer = realloc (aux_buffer, record->info_len+1);
            memset(aux_buffer, 0, (record->info_len+1) * sizeof(char));
        }
        
        strncpy(aux_buffer, record->info, record->info_len);
        
        char *record_coverage = get_field_value_in_info("DP", aux_buffer);
        if (record_coverage != NULL && is_numeric(record_coverage)) {
            if (atoi(record_coverage) >= min_coverage) {
                array_list_insert(record, passed);
            } else {
                annotate_failed_record(filter_name, filter_name_len, record);
                array_list_insert(record, failed);
            }
        } else {
            annotate_failed_record(filter_name, filter_name_len, record);
            array_list_insert(record, failed);
        }
        
    }

    free(aux_buffer);
    return passed;
}
Beispiel #7
0
array_list_t *region_filter(array_list_t *input_records, array_list_t *failed, variant_stats_t **input_stats, char *filter_name, void *f_args) {
    assert(input_records);
    assert(failed);
    
    array_list_t *passed = array_list_new(input_records->size + 1, 1, COLLECTION_MODE_ASYNCHRONIZED);
    size_t filter_name_len = strlen(filter_name);

    region_filter_args *args = (region_filter_args*) f_args;
    region_table_t *regions = args->regions;

    LOG_DEBUG_F("region_filter over %zu records\n", input_records->size);

    vcf_record_t *record;
    region_t *region = (region_t*) malloc (sizeof(region_t));
    for (int i = 0; i < input_records->size; i++) {
        record = input_records->items[i];
        
//         LOG_DEBUG_F("record = %s, %ld\n", record->chromosome, record->position);
        
        region->chromosome = strndup(record->chromosome, record->chromosome_len);
        region->start_position = record->position;
        region->end_position = record->position;
        
        int found = 0;
        if (args->type) {
            region->type = args->type;
            found = find_region_by_type(region, regions);
        } else {
            found = find_region(region, regions);
        }
        
        if (found) {
            // Add to the list of records that pass all checks for at least one region
            array_list_insert(record, passed);
//             LOG_DEBUG_F("%.*s, %ld passed\n", record->chromosome_len, record->chromosome, record->position);
        } else {
            // Add to the list of records that fail all checks for all regions
            annotate_failed_record(filter_name, filter_name_len, record);
            array_list_insert(record, failed);
        }
        
        free(region->chromosome);
    }

    free(region);

    return passed;
}
Beispiel #8
0
array_list_t* quality_filter(array_list_t* input_records, array_list_t *failed, variant_stats_t **input_stats, char *filter_name, void* f_args) {
    assert(input_records);
    assert(failed);
    
    array_list_t *passed = array_list_new(input_records->size + 1, 1, COLLECTION_MODE_ASYNCHRONIZED);
    size_t filter_name_len = strlen(filter_name);

    int min_quality = ((quality_filter_args*)f_args)->min_quality;

    LOG_DEBUG_F("quality_filter (min quality = %d) over %zu records\n", min_quality, input_records->size);
    vcf_record_t *record;
    for (int i = 0; i < input_records->size; i++) {
        record = input_records->items[i];
        if (record->quality >= min_quality) {
            array_list_insert(record, passed);
        } else {
            annotate_failed_record(filter_name, filter_name_len, record);
            array_list_insert(record, failed);
        }
    }

    return passed;
}
Beispiel #9
0
array_list_t* missing_values_filter(array_list_t* input_records, array_list_t *failed, variant_stats_t **input_stats, char *filter_name, void* args) {
    assert(input_records);
    assert(failed);
    
    array_list_t *passed = array_list_new(input_records->size + 1, 1, COLLECTION_MODE_ASYNCHRONIZED);
    size_t filter_name_len = strlen(filter_name);

    float max_missing = ((missing_values_filter_args*) args)->max_missing;
    float record_missing;
    float allele_count;

    list_item_t *stats_item = NULL;
    variant_stats_t *variant_stats;
    // The stats returned by get_variants_stats are related to a record in the same
    // position of the input_records list, so when a variant_stats_t fulfills the condition,
    // it means the related vcf_record_t passes the filter
    vcf_record_t *record;
    for (int i = 0; i < input_records->size; i++) {
        record = input_records->items[i];
        variant_stats = input_stats[i];
        allele_count = 0;
        
        for (int j = 0; j < variant_stats->num_alleles; j++) {
            allele_count += variant_stats->alleles_count[j];
        }
        record_missing = variant_stats->missing_alleles / (allele_count + variant_stats->missing_alleles);
        
        if (record_missing <= max_missing) {
            array_list_insert(record, passed);
        } else {
            annotate_failed_record(filter_name, filter_name_len, record);
            array_list_insert(record, failed);
        }
    }
    
    return passed;
}
Beispiel #10
0
array_list_t *variant_type_filter(array_list_t *input_records, array_list_t *failed, variant_stats_t **input_stats, char *filter_name, void *f_args) {
    assert(input_records);
    assert(failed);
    
    array_list_t *passed = array_list_new(input_records->size + 1, 1, COLLECTION_MODE_ASYNCHRONIZED);
    size_t filter_name_len = strlen(filter_name);

    enum variant_type type = ((variant_type_filter_args*)f_args)->type;

    LOG_DEBUG_F("variant_type_filter (variant_type  %d) over %zu records\n", type, input_records->size);
    vcf_record_t *record;
    for (int i = 0; i < input_records->size; i++) {
        record = input_records->items[i];
        
        if (record->type == type) {
            array_list_insert(record, passed);
        } else {
            annotate_failed_record(filter_name, filter_name_len, record);
            array_list_insert(record, failed);
        }
    }

    return passed;
}