vcf_batch_t* vcf_batch_new(size_t size) { vcf_batch_t *vcf_batch = calloc (1, sizeof(vcf_batch_t)); vcf_batch->text = NULL; if (size > 0) { vcf_batch->records = array_list_new(size, 1.2, COLLECTION_MODE_ASYNCHRONIZED); } else { vcf_batch->records = array_list_new(100, 1.2, COLLECTION_MODE_ASYNCHRONIZED); } return vcf_batch; }
/* old key path: key1_path, new key path: key2_path if key1_path == key2_path, if either type or cb is different , add a new item. or do noting if key1_path is the parent of key2_path, if either type or cb is different , add a new item. or do nothing. if key1_path is the child of key2 path, a. either type or cb is different, add a new item or replace the item with key1_path using item with key2_path ==> 1. either type or cb is different, add a new item 2. if both type anc cb are the same one, check key1_path and key2_path, if key1_path is the child of key2_path, replace key1_path using key2_path */ INT32 tg_shared_preferences_register(const CHAR* path,const CHAR* keys,SharedPreferences_Notification_Callback cb,SharedPreferences_WRITE_TYPE type) { INT32* idx_list = NULL; INT32 idx=0; CHAR* normalize_key = NULL; Shared_Preferences_Register_Item* item = NULL; INT32 ret = SharedPreferences_SUCC;//SharedPreferences_ERROR; //sem_wait (&s_shared_preferences_sem); tg_os_WaitSemaphore(s_shared_preferences_sem); if (s_shared_preferences_register_list.list == NULL) { s_shared_preferences_register_list.list = array_list_new(tg_shared_preferences_register_free); } //sem_post (&s_shared_preferences_sem); tg_os_SignalSemaphore(s_shared_preferences_sem); return_val_if_fail(path,SharedPreferences_PATH_ERROR); normalize_key = tg_shared_preferences_normalize_keys(keys); return_val_if_fail(normalize_key,SharedPreferences_ERROR); item = tg_shared_preferences_find_register_item(path,normalize_key,cb,type,&idx); if (!item) //found at least one item { tg_shared_preferences_add_new_register_item(path,normalize_key,cb,type); } TG_FREE(normalize_key); #ifdef SharedPreferences_DEBUG tg_shared_preferences_travel_register_list(); #endif return ret; }
vcf_header_entry_t* vcf_header_entry_new() { vcf_header_entry_t *entry = (vcf_header_entry_t*) malloc (sizeof(vcf_header_entry_t)); entry->name = NULL; entry->name_len = 0; entry->values = array_list_new(4, 1.5, COLLECTION_MODE_ASYNCHRONIZED); return entry; }
struct jstruct_result _jstruct_import(struct json_object *obj, const void *data, const struct jstruct_object_property *properties, struct json_object *errors) { _init_importers(); if (errors != NULL && json_object_get_type(errors) != json_type_array) { return jstruct_error_new(jstruct_error_errors_not_array_or_null, NULL, json_object_get_type(errors)); } const struct jstruct_object_property *property; struct json_object *prop; struct jstruct_result result = JSTRUCT_OK; result.allocated = array_list_new(jstruct_allocated_free); for (property = properties; property->name; ++property) { void *ptr = jstruct_prop_ptr(data, property, JSTRUCT_PROP_PTR_GET_NO_DEREF); struct jstruct_result err; if (json_object_object_get_ex(obj, property->name, &prop)) { if (json_object_get_type(prop) != property->type.json) { err = jstruct_error_new(jstruct_error_incorrect_type, property->name, json_object_get_type(prop)); } else { jstruct_import_importer import = importers[json_type_index(property->type.json)]; err = import(prop, data, ptr, property); } } else { if (!set_null(ptr, property)) { err = jstruct_error_array_add(errors, jstruct_error_not_nullable, property->name, 0); } } jstruct_error_consume(&result, &err, errors, property->name, -1); } if (result.allocated->length == 0) { array_list_free(result.allocated); result.allocated = NULL; } return result; }
json_object * tg_shared_preferences_find_parent_of_leaf(SharedPreferences* thiz,const CHAR* key_path,CHAR** leaf_key) { struct json_object *jso=NULL; struct array_list* key_list = NULL; INT32 key_list_len = 0; INT32 idx = 0; return_val_if_fail((thiz&&key_path),NULL); key_list = array_list_new(tg_shared_preferences_key_free); return_val_if_fail((key_list),NULL); return_val_if_fail(tg_shared_preferences_parse_keypath(key_path,key_list,&key_list_len),NULL); for (jso=thiz->obj; idx<key_list_len-1; idx++) { jso = json_object_object_get(jso,(CHAR*)array_list_get_idx(key_list,idx)); if (jso==NULL) break; } if (jso!=NULL) { CHAR* key = (CHAR*)array_list_get_idx(key_list,key_list_len-1); //ASSERT(key); *leaf_key = TG_CALLOC((strlen(key)+1),1); strcpy(*leaf_key,key); } array_list_free(key_list); return jso; }
void workflow_set_stages(int num_stages, workflow_stage_function_t *functions, char **labels, workflow_t *wf) { if (functions && wf) { pthread_mutex_lock(&wf->main_mutex); wf->num_stages = num_stages; wf->stage_functions = functions; wf->stage_times = (double *) calloc(num_stages, sizeof(double)); wf->stage_times_mutex = (pthread_mutex_t *) calloc(num_stages, sizeof(pthread_mutex_t)); for (int i = 0; i < num_stages; i++) { pthread_mutex_init(&wf->stage_times_mutex[i], NULL); } wf->pending_items = (array_list_t **) calloc(num_stages, sizeof(array_list_t *)); if (labels) wf->stage_labels = (char **) calloc(num_stages, sizeof(char *)); for (int i = 0; i < num_stages; i++) { wf->pending_items[i] = array_list_new(100, 1.25f, COLLECTION_MODE_ASYNCHRONIZED); if (labels && labels[i]) wf->stage_labels[i] = strdup(labels[i]); } pthread_mutex_unlock(&wf->main_mutex); } }
array_list_t *snp_filter(array_list_t *input_records, array_list_t *failed, variant_stats_t **input_stats, char *filter_name, void *f_args) { assert(input_records); assert(failed); array_list_t *passed = array_list_new(input_records->size + 1, 1, COLLECTION_MODE_ASYNCHRONIZED); size_t filter_name_len = strlen(filter_name); int include_snps = ((snp_filter_args*)f_args)->include_snps; LOG_DEBUG_F("snp_filter (preserve SNPs = %d) over %zu records\n", include_snps, input_records->size); vcf_record_t *record; for (int i = 0; i < input_records->size; i++) { record = input_records->items[i]; if (record->id_len == 1 && strncmp(".", record->id, 1) == 0) { if (include_snps) { annotate_failed_record(filter_name, filter_name_len, record); array_list_insert(record, failed); } else { array_list_insert(record, passed); } } else { if (include_snps) { array_list_insert(record, passed); } else { annotate_failed_record(filter_name, filter_name_len, record); array_list_insert(record, failed); } } } return passed; }
array_list_t *indel_filter(array_list_t *input_records, array_list_t *failed, variant_stats_t **input_stats, char *filter_name, void *f_args) { assert(input_records); assert(failed); array_list_t *passed = array_list_new(input_records->size + 1, 1, COLLECTION_MODE_ASYNCHRONIZED); size_t filter_name_len = strlen(filter_name); int include_indels = ((indel_filter_args*)f_args)->include_indels; LOG_DEBUG_F("indel_filter (preserve indels = %d) over %zu records\n", include_indels, input_records->size); vcf_record_t *record; variant_stats_t *variant_stats; for (int i = 0; i < input_records->size; i++) { record = input_records->items[i]; variant_stats = input_stats[i]; if (variant_stats->is_indel) { if (include_indels) { array_list_insert(record, passed); } else { annotate_failed_record(filter_name, filter_name_len, record); array_list_insert(record, failed); } } else { if (include_indels) { annotate_failed_record(filter_name, filter_name_len, record); array_list_insert(record, failed); } else { array_list_insert(record, passed); } } } return passed; }
array_list_t* mendelian_errors_filter(array_list_t* input_records, array_list_t *failed, variant_stats_t **input_stats, char *filter_name, void* args) { assert(input_records); assert(failed); array_list_t *passed = array_list_new(input_records->size + 1, 1, COLLECTION_MODE_ASYNCHRONIZED); size_t filter_name_len = strlen(filter_name); int max_errors = ((mendelian_errors_filter_args*) args)->max_mendelian_errors; float allele_count; variant_stats_t *variant_stats; // The stats returned by get_variants_stats are related to a record in the same // position of the input_records list, so when a variant_stats_t fulfills the condition, // it means the related vcf_record_t passes the filter vcf_record_t *record; for (int i = 0; i < input_records->size; i++) { record = input_records->items[i]; variant_stats = input_stats[i]; allele_count = 0; if (variant_stats->mendelian_errors <= max_errors) { array_list_insert(record, passed); } else { annotate_failed_record(filter_name, filter_name_len, record); array_list_insert(record, failed); } } return passed; }
array_list_t* maf_filter(array_list_t* input_records, array_list_t *failed, variant_stats_t **input_stats, char *filter_name, void* args) { assert(input_records); assert(failed); array_list_t *passed = array_list_new(input_records->size + 1, 1, COLLECTION_MODE_ASYNCHRONIZED); size_t filter_name_len = strlen(filter_name); float min_maf = ((maf_filter_args*) args)->min_maf; float record_maf = 1.0; variant_stats_t *variant_stats; // The stats returned by get_variants_stats are related to a record in the same // position of the input_records list, so when a variant_stats_t fulfills the condition, // it means the related vcf_record_t passes the filter vcf_record_t *record; for (int i = 0; i < input_records->size; i++) { record = input_records->items[i]; variant_stats = input_stats[i]; record_maf = 1.0; for (int j = 0; j < variant_stats->num_alleles; j++) { record_maf = fmin(record_maf, variant_stats->alleles_freq[j]); } if (record_maf >= min_maf) { array_list_insert(record, passed); } else { annotate_failed_record(filter_name, filter_name_len, record); array_list_insert(record, failed); } } return passed; }
gff_batch_t* gff_batch_new(size_t size) { gff_batch_t *gff_batch = malloc(sizeof(gff_batch_t)); gff_batch->text = NULL; if (size < 1) { size = 100; } gff_batch->records = array_list_new(size, 1.4, COLLECTION_MODE_ASYNCHRONIZED); return gff_batch; }
void *fastq_reader(void *input) { struct timeval start, end; double time; extern size_t fd_read_bytes; size_t read_bytes; //if (time_on) { start_timer(start); } wf_input_t *wf_input = (wf_input_t *) input; batch_t *new_batch = NULL; batch_t *batch = wf_input->batch; fastq_batch_reader_input_t *fq_reader_input = wf_input->fq_reader_input; array_list_t *reads = array_list_new(10000, 1.25f, COLLECTION_MODE_ASYNCHRONIZED); if (fq_reader_input->gzip) { //Gzip fastq file if (fq_reader_input->flags == SINGLE_END_MODE) { fastq_gzread_bytes_se(reads, fq_reader_input->batch_size, fq_reader_input->fq_gzip_file1); } else { //printf("Gzip Reader for pair-end not implemented\n");; fastq_gzread_bytes_pe(reads, fq_reader_input->batch_size, fq_reader_input->fq_gzip_file1, fq_reader_input->fq_gzip_file2); //fastq_fread_bytes_aligner_pe(reads, fq_reader_input->batch_size, // fq_reader_input->fq_gzip_file1, fq_reader_input->fq_gzip_file2); } } else { //Fastq file if (fq_reader_input->flags == SINGLE_END_MODE) { read_bytes = fastq_fread_bytes_se(reads, fq_reader_input->batch_size, fq_reader_input->fq_file1); } else { read_bytes = fastq_fread_bytes_aligner_pe(reads, fq_reader_input->batch_size, fq_reader_input->fq_file1, fq_reader_input->fq_file2); } fd_read_bytes += read_bytes; } size_t num_reads = array_list_size(reads); if (num_reads == 0) { array_list_free(reads, (void *)fastq_read_free); } else { mapping_batch_t *mapping_batch = mapping_batch_new(reads, batch->pair_input->pair_mng); new_batch = batch_new(batch->bwt_input, batch->region_input, batch->cal_input, batch->pair_input, batch->preprocess_rna, batch->sw_input, batch->writer_input, batch->mapping_mode, mapping_batch); } //if (time_on) { stop_timer(start, end, time); timing_add(time, FASTQ_READER, timing); } //printf("Read batch %i\n", num_reads); return new_batch; }
BOOL tg_shared_preferences_lock(const CHAR* path,BOOL lock) { INT32 i = 0; INT32 len = 0; struct array_list* list = NULL; INT32 idx = -1; //sem_wait (&s_shared_preferences_sem); tg_os_WaitSemaphore(s_shared_preferences_sem); if (s_shared_preferences_lock_list.lock_list == NULL) { s_shared_preferences_lock_list.lock_list = array_list_new(tg_shared_preferences_lock_free); } //sem_post (&s_shared_preferences_sem); tg_os_SignalSemaphore(s_shared_preferences_sem); list = s_shared_preferences_lock_list.lock_list; return_val_if_fail(path,FALSE); return_val_if_fail(list,FALSE); idx = tg_shared_preferences_find_lock_path(path); if (idx>=0) { if (!lock) { //sem_wait (&s_shared_preferences_sem); tg_os_WaitSemaphore(s_shared_preferences_sem); array_list_put_idx(list, idx, NULL); //sem_post (&s_shared_preferences_sem); tg_os_SignalSemaphore(s_shared_preferences_sem); } return TRUE; } else { if (lock) { CHAR* lock_path = TG_CALLOC((strlen(path)+1),1); strcpy(lock_path,path); //sem_wait (&s_shared_preferences_sem); tg_os_WaitSemaphore(s_shared_preferences_sem); array_list_put_idx(list, tg_shared_preferences_get_first_free_slot(s_shared_preferences_lock_list.lock_list), (void*)lock_path); //sem_post (&s_shared_preferences_sem); tg_os_SignalSemaphore(s_shared_preferences_sem); } return TRUE; } }
array_list_t *region_filter(array_list_t *input_records, array_list_t *failed, variant_stats_t **input_stats, char *filter_name, void *f_args) { assert(input_records); assert(failed); array_list_t *passed = array_list_new(input_records->size + 1, 1, COLLECTION_MODE_ASYNCHRONIZED); size_t filter_name_len = strlen(filter_name); region_filter_args *args = (region_filter_args*) f_args; region_table_t *regions = args->regions; LOG_DEBUG_F("region_filter over %zu records\n", input_records->size); vcf_record_t *record; region_t *region = (region_t*) malloc (sizeof(region_t)); for (int i = 0; i < input_records->size; i++) { record = input_records->items[i]; // LOG_DEBUG_F("record = %s, %ld\n", record->chromosome, record->position); region->chromosome = strndup(record->chromosome, record->chromosome_len); region->start_position = record->position; region->end_position = record->position; int found = 0; if (args->type) { region->type = args->type; found = find_region_by_type(region, regions); } else { found = find_region(region, regions); } if (found) { // Add to the list of records that pass all checks for at least one region array_list_insert(record, passed); // LOG_DEBUG_F("%.*s, %ld passed\n", record->chromosome_len, record->chromosome, record->position); } else { // Add to the list of records that fail all checks for all regions annotate_failed_record(filter_name, filter_name_len, record); array_list_insert(record, failed); } free(region->chromosome); } free(region); return passed; }
array_list_t *inheritance_pattern_filter(array_list_t *input_records, array_list_t *failed, variant_stats_t **input_stats, char *filter_name, void *f_args) { assert(input_records); assert(failed); array_list_t *passed = array_list_new(input_records->size + 1, 1, COLLECTION_MODE_ASYNCHRONIZED); size_t filter_name_len = strlen(filter_name); enum inheritance_pattern pattern = ((inheritance_pattern_filter_args*)f_args)->pattern; float min_following_pattern = ((inheritance_pattern_filter_args*)f_args)->min_following_pattern; if (pattern == DOMINANT) { LOG_DEBUG_F("inheritance_pattern_filter (dominant in %.2f% of samples) over %zu records\n", min_following_pattern * 100, input_records->size); } else { LOG_DEBUG_F("inheritance_pattern_filter (recessive in %.2f% of samples) over %zu records\n", min_following_pattern * 100, input_records->size); } vcf_record_t *record; variant_stats_t *stats; for (int i = 0; i < input_records->size; i++) { record = input_records->items[i]; stats = input_stats[i]; if (pattern == DOMINANT) { if (stats->cases_percent_dominant >= min_following_pattern && stats->controls_percent_dominant >= min_following_pattern) { array_list_insert(record, passed); } else { annotate_failed_record(filter_name, filter_name_len, record); array_list_insert(record, failed); } } else if (pattern == RECESSIVE) { if (stats->cases_percent_recessive >= min_following_pattern && stats->controls_percent_recessive >= min_following_pattern) { array_list_insert(record, passed); } else { annotate_failed_record(filter_name, filter_name_len, record); array_list_insert(record, failed); } } } return passed; }
int insert_position_read(char key[64], vcf_record_file_link* link, kh_pos_t* positions_read) { int ret; array_list_t *records_in_position; khiter_t iter = kh_get(pos, positions_read, key); if (iter != kh_end(positions_read)) { records_in_position = kh_value(positions_read, iter); ret = array_list_insert(link, records_in_position); } else { records_in_position = array_list_new(8, 1.5, COLLECTION_MODE_SYNCHRONIZED); ret = array_list_insert(link, records_in_position); iter = kh_put(pos, positions_read, strdup(key), &ret); if (ret) { kh_value(positions_read, iter) = records_in_position; } } return ret; }
void *sa_fq_reader(void *input) { sa_wf_input_t *wf_input = (sa_wf_input_t *) input; sa_wf_batch_t *new_wf_batch = NULL; sa_wf_batch_t *curr_wf_batch = wf_input->wf_batch; fastq_batch_reader_input_t *fq_reader_input = wf_input->fq_reader_input; array_list_t *reads = array_list_new(fq_reader_input->batch_size, 1.25f, COLLECTION_MODE_ASYNCHRONIZED); if (fq_reader_input->gzip) { // Gzip fastq file if (fq_reader_input->flags == SINGLE_END_MODE) { fastq_gzread_bytes_se(reads, fq_reader_input->batch_size, fq_reader_input->fq_gzip_file1); } else { fastq_gzread_bytes_pe(reads, fq_reader_input->batch_size, fq_reader_input->fq_gzip_file1, fq_reader_input->fq_gzip_file2); } } else { // Fastq file if (fq_reader_input->flags == SINGLE_END_MODE) { fastq_fread_bytes_se(reads, fq_reader_input->batch_size, fq_reader_input->fq_file1); } else { fastq_fread_bytes_aligner_pe(reads, fq_reader_input->batch_size, fq_reader_input->fq_file1, fq_reader_input->fq_file2); } } size_t num_reads = array_list_size(reads); if (num_reads == 0) { array_list_free(reads, (void *)fastq_read_free); } else { sa_mapping_batch_t *sa_mapping_batch = sa_mapping_batch_new(reads); sa_mapping_batch->bam_format = wf_input->bam_format; new_wf_batch = sa_wf_batch_new(curr_wf_batch->options, curr_wf_batch->sa_index, curr_wf_batch->writer_input, sa_mapping_batch, NULL); } return new_wf_batch; }
static void report_vcf_variant_stats_sqlite3(sqlite3 *db, int num_variants, variant_stats_t **stats_batch) { array_list_t *fields = array_list_new(num_variants + 1, 1.1, COLLECTION_MODE_ASYNCHRONIZED); variant_stats_t *var_stats; for (int i = 0; i < num_variants; i++) { var_stats = stats_batch[i]; variant_stats_db_fields_t *f = variant_stats_db_fields_new(var_stats->chromosome, var_stats->position, var_stats->ref_allele, var_stats->alt_alleles, var_stats->maf_allele, var_stats->maf, var_stats->mgf_genotype, var_stats->mgf, var_stats->missing_alleles, var_stats->missing_genotypes, var_stats->mendelian_errors, var_stats->is_indel, var_stats->cases_percent_dominant, var_stats->controls_percent_dominant, var_stats->cases_percent_recessive, var_stats->controls_percent_recessive); array_list_insert(f, fields); } insert_variant_stats_db_fields_list(fields, db); array_list_free(fields, (void *)variant_stats_db_fields_free); }
workflow_t *workflow_new() { workflow_t *wf = calloc(1, sizeof(workflow_t)); wf->num_threads = 0; wf->max_num_work_items = 0; wf->num_stages = 0; wf->completed_producer = 0; wf->num_pending_items = 0; wf->running_producer = 0; wf->running_consumer = 0; pthread_mutex_init(&wf->producer_mutex, NULL); pthread_mutex_init(&wf->consumer_mutex, NULL); pthread_mutex_init(&wf->main_mutex, NULL); wf->workflow_time = 0; wf->producer_time = 0; wf->consumer_time = 0; wf->stage_times = NULL; wf->pending_items = NULL; wf->completed_items = array_list_new(100, 1.25f, COLLECTION_MODE_ASYNCHRONIZED); wf->stage_functions = NULL; wf->stage_labels = NULL; wf->producer_function = NULL; wf->producer_label = NULL; wf->consumer_function = NULL; wf->consumer_label = NULL; wf->complete_extra_stage = 1; //wf->status_function = workflow_get_status_; return wf; }
array_list_t* coverage_filter(array_list_t* input_records, array_list_t *failed, variant_stats_t **input_stats, char *filter_name, void* f_args) { assert(input_records); assert(failed); array_list_t *passed = array_list_new(input_records->size + 1, 1, COLLECTION_MODE_ASYNCHRONIZED); size_t filter_name_len = strlen(filter_name); int min_coverage = ((coverage_filter_args*)f_args)->min_coverage; LOG_DEBUG_F("coverage_filter (min coverage = %d) over %zu records\n", min_coverage, input_records->size); char *aux_buffer = (char*) calloc (128, sizeof(char)); vcf_record_t *record; for (int i = 0; i < input_records->size; i++) { record = input_records->items[i]; if (record->info_len > strlen(aux_buffer)) { aux_buffer = realloc (aux_buffer, record->info_len+1); memset(aux_buffer, 0, (record->info_len+1) * sizeof(char)); } strncpy(aux_buffer, record->info, record->info_len); char *record_coverage = get_field_value_in_info("DP", aux_buffer); if (record_coverage != NULL && is_numeric(record_coverage)) { if (atoi(record_coverage) >= min_coverage) { array_list_insert(record, passed); } else { annotate_failed_record(filter_name, filter_name_len, record); array_list_insert(record, failed); } } else { annotate_failed_record(filter_name, filter_name_len, record); array_list_insert(record, failed); } } free(aux_buffer); return passed; }
/* require interface */ json_object * tg_shared_preferences_find_leaf_obj(SharedPreferences* thiz,const CHAR* key_path) { struct json_object *jso=NULL; struct array_list* key_list = NULL; INT32 key_list_len = 0; INT32 idx = 0; return_val_if_fail((thiz&&key_path),NULL); if (strcmp(key_path,"/")==0) return thiz->obj; key_list = array_list_new(tg_shared_preferences_key_free); return_val_if_fail((key_list),NULL); return_val_if_fail(tg_shared_preferences_parse_keypath(key_path,key_list,&key_list_len),NULL); for (jso=thiz->obj; idx<key_list_len &&jso; idx++) { jso = json_object_object_get(jso,(CHAR*)array_list_get_idx(key_list,idx)); } array_list_free(key_list); return jso; }
array_list_t* quality_filter(array_list_t* input_records, array_list_t *failed, variant_stats_t **input_stats, char *filter_name, void* f_args) { assert(input_records); assert(failed); array_list_t *passed = array_list_new(input_records->size + 1, 1, COLLECTION_MODE_ASYNCHRONIZED); size_t filter_name_len = strlen(filter_name); int min_quality = ((quality_filter_args*)f_args)->min_quality; LOG_DEBUG_F("quality_filter (min quality = %d) over %zu records\n", min_quality, input_records->size); vcf_record_t *record; for (int i = 0; i < input_records->size; i++) { record = input_records->items[i]; if (record->quality >= min_quality) { array_list_insert(record, passed); } else { annotate_failed_record(filter_name, filter_name_len, record); array_list_insert(record, failed); } } return passed; }
vcf_record_t *vcf_record_copy(vcf_record_t *orig) { vcf_record_t *record = (vcf_record_t*) calloc (1, sizeof(vcf_record_t)); record->chromosome = strndup(orig->chromosome, orig->chromosome_len); record->chromosome_len = orig->chromosome_len; record->position = orig->position; record->id = strndup(orig->id, orig->id_len); record->id_len = orig->id_len; record->reference = strndup(orig->reference, orig->reference_len); record->reference_len = orig->reference_len; record->alternate = strndup(orig->alternate, orig->alternate_len); record->alternate_len = orig->alternate_len; record->filter = strndup(orig->filter, orig->filter_len); record->filter_len = orig->filter_len; record->info = strndup(orig->info, orig->info_len); record->info_len = orig->info_len; record->format = strndup(orig->format, orig->format_len); record->format_len = orig->format_len; record->samples = array_list_new(orig->samples->size + 1, 1.5, COLLECTION_MODE_ASYNCHRONIZED); for (int i = 0; i < orig->samples->size; i++) { array_list_insert(strdup(array_list_get(i, orig->samples)), record->samples); } return record; }
array_list_t* missing_values_filter(array_list_t* input_records, array_list_t *failed, variant_stats_t **input_stats, char *filter_name, void* args) { assert(input_records); assert(failed); array_list_t *passed = array_list_new(input_records->size + 1, 1, COLLECTION_MODE_ASYNCHRONIZED); size_t filter_name_len = strlen(filter_name); float max_missing = ((missing_values_filter_args*) args)->max_missing; float record_missing; float allele_count; list_item_t *stats_item = NULL; variant_stats_t *variant_stats; // The stats returned by get_variants_stats are related to a record in the same // position of the input_records list, so when a variant_stats_t fulfills the condition, // it means the related vcf_record_t passes the filter vcf_record_t *record; for (int i = 0; i < input_records->size; i++) { record = input_records->items[i]; variant_stats = input_stats[i]; allele_count = 0; for (int j = 0; j < variant_stats->num_alleles; j++) { allele_count += variant_stats->alleles_count[j]; } record_missing = variant_stats->missing_alleles / (allele_count + variant_stats->missing_alleles); if (record_missing <= max_missing) { array_list_insert(record, passed); } else { annotate_failed_record(filter_name, filter_name_len, record); array_list_insert(record, failed); } } return passed; }
array_list_t *variant_type_filter(array_list_t *input_records, array_list_t *failed, variant_stats_t **input_stats, char *filter_name, void *f_args) { assert(input_records); assert(failed); array_list_t *passed = array_list_new(input_records->size + 1, 1, COLLECTION_MODE_ASYNCHRONIZED); size_t filter_name_len = strlen(filter_name); enum variant_type type = ((variant_type_filter_args*)f_args)->type; LOG_DEBUG_F("variant_type_filter (variant_type %d) over %zu records\n", type, input_records->size); vcf_record_t *record; for (int i = 0; i < input_records->size; i++) { record = input_records->items[i]; if (record->type == type) { array_list_insert(record, passed); } else { annotate_failed_record(filter_name, filter_name_len, record); array_list_insert(record, failed); } } return passed; }
size_t bwt_search_pair_anchors(array_list_t *list, unsigned int read_length) { bwt_anchor_t *bwt_anchor; int max_anchor_length = 0; bwt_anchor_t *bwt_anchor_back, *bwt_anchor_forw; int anchor_length_tmp, anchor_back, anchor_forw; int strand = 0, type = 0; int found_anchor = 0, found_double_anchor = 0; const int MIN_ANCHOR = 25; const int MIN_SINGLE_ANCHOR = 40; //const int MIN_DOUBLE_ANCHOR = MIN_ANCHOR*2; const int MAX_BWT_REGIONS = 50; const int MAX_BWT_ANCHOR_DISTANCE = 500000; array_list_t *anchor_list_tmp, *forward_anchor_list, *backward_anchor_list; cal_t *cal; int seed_size, gap_read, gap_genome; array_list_t *backward_anchor_list_0 = array_list_new(MAX_BWT_REGIONS, 1.25f, COLLECTION_MODE_ASYNCHRONIZED); array_list_t *forward_anchor_list_0 = array_list_new(MAX_BWT_REGIONS, 1.25f , COLLECTION_MODE_ASYNCHRONIZED); array_list_t *backward_anchor_list_1 = array_list_new(MAX_BWT_REGIONS, 1.25f, COLLECTION_MODE_ASYNCHRONIZED); array_list_t *forward_anchor_list_1 = array_list_new(MAX_BWT_REGIONS, 1.25f , COLLECTION_MODE_ASYNCHRONIZED); array_list_t *big_anchor_list = array_list_new(MAX_BWT_REGIONS, 1.25f , COLLECTION_MODE_ASYNCHRONIZED); //printf("Tot Anchors %i\n", array_list_size(list)); for (int i = 0; i < array_list_size(list); i++) { bwt_anchor = array_list_get(i, list); if (bwt_anchor->strand == 1) { //printf("(-)bwt anchor %i:%lu-%lu (%i): \n", bwt_anchor->chromosome + 1, bwt_anchor->start, bwt_anchor->end, bwt_anchor->end - bwt_anchor->start + 1); if (bwt_anchor->type == FORWARD_ANCHOR) { array_list_insert(bwt_anchor, forward_anchor_list_1); //printf("FORW\n"); } else { array_list_insert(bwt_anchor, backward_anchor_list_1); //printf("BACK\n"); } } else { //printf("(+)bwt anchor %i:%lu-%lu (%i): \n", bwt_anchor->chromosome + 1, bwt_anchor->start, bwt_anchor->end, bwt_anchor->end - bwt_anchor->start + 1); if (bwt_anchor->type == FORWARD_ANCHOR) { array_list_insert(bwt_anchor, forward_anchor_list_0); //printf("FORW\n"); } else { array_list_insert(bwt_anchor, backward_anchor_list_0); //printf("BACK\n"); } } anchor_length_tmp = bwt_anchor->end - bwt_anchor->start + 1; if (anchor_length_tmp > MIN_SINGLE_ANCHOR && anchor_length_tmp > max_anchor_length) { max_anchor_length = anchor_length_tmp; found_anchor = 1; strand = bwt_anchor->strand; type = bwt_anchor->type; } if (read_length - anchor_length_tmp < 16) { array_list_insert(bwt_anchor, big_anchor_list); } } array_list_clear(list, NULL); if (array_list_size(big_anchor_list) > 0) { for (int i = array_list_size(big_anchor_list) - 1; i >= 0; i--) { //printf("Insert cal %i\n", i); bwt_anchor = array_list_remove_at(i, big_anchor_list); size_t seed_size = bwt_anchor->end - bwt_anchor->start; if (bwt_anchor->type == FORWARD_ANCHOR) { cal = convert_bwt_anchor_to_CAL(bwt_anchor, 0, seed_size); } else { cal = convert_bwt_anchor_to_CAL(bwt_anchor, read_length - seed_size - 1, read_length - 1); } array_list_insert(cal, list); } array_list_set_flag(SINGLE_ANCHORS, list); goto exit; } for (int type = 1; type >= 0; type--) { if (!type) { forward_anchor_list = forward_anchor_list_1; backward_anchor_list = backward_anchor_list_1; //printf("Strand (+): %i-%i\n", array_list_size(forward_anchor_list), array_list_size(backward_anchor_list)); } else { forward_anchor_list = forward_anchor_list_0; backward_anchor_list = backward_anchor_list_0; //printf("Strand (-): %i-%i\n", array_list_size(forward_anchor_list), array_list_size(backward_anchor_list)); } int *set_forward = (int *)calloc(array_list_size(forward_anchor_list), sizeof(int)); int *set_backward = (int *)calloc(array_list_size(backward_anchor_list), sizeof(int)); //Associate Anchors (+)/(-) for (int i = 0; i < array_list_size(forward_anchor_list); i++) { if (set_forward[i]) { continue; } bwt_anchor_forw = array_list_get(i, forward_anchor_list); for (int j = 0; j < array_list_size(backward_anchor_list); j++) { if (set_backward[j]) { continue; } bwt_anchor_back = array_list_get(j, backward_anchor_list); anchor_forw = (bwt_anchor_forw->end - bwt_anchor_forw->start + 1); anchor_back = (bwt_anchor_back->end - bwt_anchor_back->start + 1); anchor_length_tmp = anchor_forw + anchor_back; //printf("\tCommpare %i:%lu-%lu with %i:%lu-%lu\n", bwt_anchor_forw->chromosome + 1, // bwt_anchor_forw->start, bwt_anchor_forw->end, bwt_anchor_back->chromosome + 1, // bwt_anchor_back->start, bwt_anchor_back->end); if (bwt_anchor_forw->chromosome == bwt_anchor_back->chromosome && abs(bwt_anchor_back->start - bwt_anchor_forw->end) <= MAX_BWT_ANCHOR_DISTANCE && anchor_forw >= MIN_ANCHOR && anchor_back >= MIN_ANCHOR) { if (bwt_anchor_back->start < bwt_anchor_forw->end) { continue; } gap_read = read_length - (anchor_forw + anchor_back); gap_genome = bwt_anchor_back->start - bwt_anchor_forw->end; //printf("anchor_forw = %i, anchor_back = %i, gap_read = %i, gap_genome = %i\n", // anchor_forw, anchor_back, gap_read, gap_genome); int apply_flank = 0; if (gap_read < 2 || gap_genome < 2) { int gap; if (gap_read < 0 && gap_genome < 0) { gap = abs(gap_read) > abs(gap_genome) ? abs(gap_read) : abs(gap_genome); } else if (gap_read < 0) { gap = abs(gap_read); } else if (gap_genome < 0) { gap = abs(gap_genome); } else { gap = 2; } int flank = 5; apply_flank = 1; if (abs(gap) >= flank*2) { //Solve read overlap flank = abs(gap)/2 + flank/2; } //printf("\tgap = %i, flank = %i\n", gap, flank); if (flank >= anchor_forw) { bwt_anchor_forw->end -= anchor_forw/2; } else { bwt_anchor_forw->end -= flank; } if (flank >= anchor_back) { bwt_anchor_back->start += anchor_back/2; } else { bwt_anchor_back->start += flank; } } cal = convert_bwt_anchor_to_CAL(bwt_anchor_forw, 0, bwt_anchor_forw->end - bwt_anchor_forw->start); //printf("INSERT-1 (%i)[%i:%lu-%lu]\n", cal->strand, cal->chromosome_id, cal->start, cal->end); array_list_insert(cal, list); seed_size = bwt_anchor_back->end - bwt_anchor_back->start + 1; //if (bwt_anchor_forw->end + read_length >= bwt_anchor_back->start) { //seed_region_t *seed_region = seed_region_new(read_length - seed_size, read_length - 1, //bwt_anchor_back->start, bwt_anchor_back->end, 1); //cal->end = bwt_anchor_back->end; //linked_list_insert_last(seed_region, cal->sr_list); //} else { cal = convert_bwt_anchor_to_CAL(bwt_anchor_back, read_length - seed_size, read_length - 1); //printf("INSERT-2 (%i)[%i:%lu-%lu]\n", cal->strand, cal->chromosome_id, cal->start, cal->end); array_list_insert(cal, list); if (array_list_size(list) > 5) { free(set_backward); free(set_forward); goto exit; } array_list_set_flag(DOUBLE_ANCHORS, list); found_double_anchor = 1; set_forward[i] = 1; set_backward[j] = 1; break; } } } free(set_backward); free(set_forward); } if (!found_double_anchor && found_anchor) { //Not Double anchor found but one Yes!! if (strand == 1) { if (type == FORWARD_ANCHOR) { anchor_list_tmp = forward_anchor_list_1; } else { anchor_list_tmp = backward_anchor_list_1; } } else { if (type == FORWARD_ANCHOR) { anchor_list_tmp = forward_anchor_list_0; } else { anchor_list_tmp = backward_anchor_list_0; } } //printf("LIST SIZE %i\n", array_list_size(anchor_list_tmp)); for (int i = 0; i < array_list_size(anchor_list_tmp); i++) { bwt_anchor = array_list_get(i, anchor_list_tmp); size_t seed_size = bwt_anchor->end - bwt_anchor->start; //array_list_insert(bwt_anchor_new(bwt_anchor->strand, bwt_anchor->chromosome, // bwt_anchor->start, bwt_anchor->end, bwt_anchor->type), anchor_list); if (bwt_anchor->type == FORWARD_ANCHOR) { //printf("------------------------> start %i\n", 0); cal = convert_bwt_anchor_to_CAL(bwt_anchor, 0, seed_size); } else { //printf("------------------------> start %i\n", read_length - seed_size); cal = convert_bwt_anchor_to_CAL(bwt_anchor, read_length - seed_size - 1, read_length - 1); } array_list_insert(cal, list); } array_list_set_flag(SINGLE_ANCHORS, list); } exit: array_list_free(forward_anchor_list_1, (void *)bwt_anchor_free); array_list_free(backward_anchor_list_1, (void *)bwt_anchor_free); array_list_free(forward_anchor_list_0, (void *)bwt_anchor_free); array_list_free(backward_anchor_list_0, (void *)bwt_anchor_free); array_list_free(big_anchor_list, (void *)bwt_anchor_free); return array_list_size(list); }
vcf_record_t* vcf_record_new() { vcf_record_t *record = (vcf_record_t*) calloc (1, sizeof(vcf_record_t)); record->samples = array_list_new(16, 1.5, COLLECTION_MODE_ASYNCHRONIZED); return record; }
int apply_sw_bs_4nt(sw_server_input_t* input, batch_t *batch) { mapping_batch_t *mapping_batch = batch->mapping_batch; genome_t *genome1 = input->genome1_p; genome_t *genome2 = input->genome2_p; sw_optarg_t *sw_optarg = &input->sw_optarg; { char r[1024]; size_t start = 169312417; size_t end = start + 99; genome_read_sequence_by_chr_index(r, 0, 0, &start, &end, genome2); printf("+++++++++++++ genome2 = %s \n", r); genome_read_sequence_by_chr_index(r, 0, 0, &start, &end, genome1); printf("+++++++++++++ genome1 = %s \n", r); } // fill gaps between seeds fill_gaps_bs(mapping_batch, sw_optarg, genome2, genome1, 20, 5, 1); merge_seed_regions_bs(mapping_batch, 1); fill_end_gaps_bs(mapping_batch, sw_optarg, genome1, genome2, 20, 400, 1); fill_gaps_bs(mapping_batch, sw_optarg, genome1, genome2, 20, 5, 0); merge_seed_regions_bs(mapping_batch, 0); fill_end_gaps_bs(mapping_batch, sw_optarg, genome2, genome1, 20, 400, 0); // now we can create the alignments fastq_read_t *read; array_list_t *fq_batch = mapping_batch->fq_batch; char *match_seq, *match_qual; size_t read_index, read_len, match_len, match_start; cal_t *cal; array_list_t *cal_list = NULL; size_t num_cals; seed_region_t *s; cigar_code_t *cigar_code; cigar_op_t *first_op; float score, norm_score, min_score = input->min_score; alignment_t *alignment; array_list_t *alignment_list; char *p, *optional_fields; int optional_fields_length, AS; array_list_t **mapping_lists; size_t num_targets; size_t *targets; for (int bs_id = 0; bs_id < 2; bs_id++) { if (bs_id == 0) { mapping_lists = mapping_batch->mapping_lists; num_targets = mapping_batch->num_targets; targets = mapping_batch->targets; } else { mapping_lists = mapping_batch->mapping_lists2; num_targets = mapping_batch->num_targets2; targets = mapping_batch->targets2; } for (size_t i = 0; i < num_targets; i++) { read_index = targets[i]; read = (fastq_read_t *) array_list_get(read_index, fq_batch); cal_list = mapping_lists[read_index]; num_cals = array_list_size(cal_list); if (num_cals <= 0) continue; read_len = read->length; alignment_list = array_list_new(num_cals, 1.25f, COLLECTION_MODE_ASYNCHRONIZED); // processing each CAL from this read for(size_t j = 0; j < num_cals; j++) { // get cal and read index cal = array_list_get(j, cal_list); if (cal->sr_list->size == 0) continue; s = (seed_region_t *) linked_list_get_first(cal->sr_list); cigar_code = (cigar_code_t *) s->info; norm_score = cigar_code_get_score(read_len, cigar_code); score = norm_score * 100; //read_len; LOG_DEBUG_F("score = %0.2f\n", norm_score); // filter by SW score if (norm_score > min_score) { // update cigar and sequence and quality strings cigar_code_update(cigar_code); LOG_DEBUG_F("\tcigar code = %s\n", new_cigar_code_string(cigar_code)); match_start = 0; match_len = cigar_code_nt_length(cigar_code); first_op = cigar_code_get_first_op(cigar_code); match_start = (first_op && first_op->name == 'H' ? first_op->number : 0); match_seq = (char *) malloc((match_len + 1)* sizeof(char)); memcpy(match_seq, &read->sequence[match_start], match_len); match_seq[match_len] = 0; match_qual = (char *) malloc((match_len + 1)* sizeof(char)); memcpy(match_qual, &read->quality[match_start], match_len); match_qual[match_len] = 0; // set optional fields optional_fields_length = 100; optional_fields = (char *) calloc(optional_fields_length, sizeof(char)); p = optional_fields; AS = (int) norm_score * 100; sprintf(p, "ASi"); p += 3; memcpy(p, &AS, sizeof(int)); p += sizeof(int); sprintf(p, "NHi"); p += 3; memcpy(p, &num_cals, sizeof(int)); p += sizeof(int); sprintf(p, "NMi"); p += 3; memcpy(p, &cigar_code->distance, sizeof(int)); p += sizeof(int); assert(read->length == cigar_code_nt_length(cigar_code)); // create an alignment and insert it into the list alignment = alignment_new(); //read_id = malloc(read->length); size_t header_len = strlen(read->id); char *head_id = (char *) malloc(header_len + 1); get_to_first_blank(read->id, header_len, head_id); alignment_init_single_end(head_id, match_seq, match_qual, cal->strand, cal->chromosome_id - 1, cal->start - 1, new_cigar_code_string(cigar_code), cigar_code_get_num_ops(cigar_code), norm_score * 254, 1, (num_cals > 1), optional_fields_length, optional_fields, alignment); array_list_insert(alignment, alignment_list); LOG_DEBUG_F("creating alignment (bs_id = %i)...\n", bs_id); //alignment_print(alignment); } } // free the cal list, and update the mapping list with the alignment list array_list_free(cal_list, (void *) cal_free); mapping_lists[read_index] = alignment_list; } } // go to the next stage return BS_POST_PAIR_STAGE; }
int main(int argc, char *argv[]) { if (argc != 4) { printf("Error.\n"); printf("Usage: %s index-dirname seq num-errors\n", argv[0]); exit(-1); } char *index_dirname = argv[1]; char *seq = argv[2]; int num_errors = atoi(argv[3]); // initializations initReplaceTable(); bwt_optarg_t *bwt_optarg = bwt_optarg_new(num_errors, 1, 10000, 1, 0, 0); bwt_index_t *bwt_index = bwt_index_new(index_dirname); // seq { array_list_t *mapping_list = array_list_new(100000, 1.25f, COLLECTION_MODE_SYNCHRONIZED); size_t num_mappings; num_mappings = bwt_map_seq(seq, bwt_optarg, bwt_index, mapping_list); printf("seq: %s\n", seq); printf("num_mappings = %lu\n", num_mappings); for (size_t i = 0; i < num_mappings; i++) { printf("%lu\t---------------------\n", i); alignment_print(array_list_get(i, mapping_list)); } } // seed { array_list_t *mapping_list = array_list_new(100000, 1.25f, COLLECTION_MODE_SYNCHRONIZED); size_t num_mappings; size_t len = strlen(seq); char *code_seq = (char *) calloc(len + 10, sizeof(char)); replaceBases(seq, code_seq, len); num_mappings = bwt_map_exact_seeds_seq(code_seq, 18, 16, bwt_optarg, bwt_index, mapping_list); region_t *region; for (size_t i = 0; i < num_mappings; i++) { region = array_list_get(i, mapping_list); printf("Region: chr = %lu, strand = %d, start = %lu, end = %lu\n", region->chromosome_id, region->strand, region->start, region->end); } } printf("Done.\n"); }
void apply_sw(sw_server_input_t* input, aligner_batch_t *batch) { // printf("START: apply_sw\n"); int tid = omp_get_thread_num(); cal_t *cal = NULL; array_list_t *cal_list = NULL, *mapping_list = NULL;//, *old_list = NULL, *new_list = NULL; fastq_batch_t *fq_batch = batch->fq_batch; size_t start, end; genome_t *genome = input->genome_p; size_t flank_length = input->flank_length; // SIMD support for Smith-Waterman float score, min_score = input->min_score; // size_t curr_depth = 0; sw_output_t *sw_output; // sw_simd_input_t *sw_sinput = sw_simd_input_new(SIMD_DEPTH); // sw_simd_output_t *sw_soutput = sw_simd_output_new(SIMD_DEPTH); //sw_simd_context_t *context = sw_simd_context_new(input->match, input->mismatch, // input->gap_open, input->gap_extend); // for tracking the current read, cal being processed using sw_channel_t //sw_channel_t *channel; //sw_channel_t sw_channels[SIMD_DEPTH]; //memset(sw_channels, 0, sizeof(sw_channels)); //size_t header_len, read_len; //size_t strands[SIMD_DEPTH], chromosomes[SIMD_DEPTH], starts[SIMD_DEPTH]; size_t index, num_cals; size_t total = 0, valids = 0; size_t num_seqs = batch->num_targets; // set to zero batch->num_done = batch->num_to_do; batch->num_to_do = 0; size_t sw_total = batch->num_done; /* // for all seqs pending to process !! size_t sw_total = 0; for (size_t i = 0; i < num_seqs; i++) { sw_total += array_list_size(batch->mapping_lists[batch->targets[i]]); } printf("number of sw to run: %d (vs num_done = %d)\n", sw_total, batch->num_done); */ sw_optarg_t *sw_optarg = &input->sw_optarg; /* sw_optarg_t sw_optarg; //= sw_optarg_new(gap_open, gap_extend, matrix_filename); sw_optarg.gap_open = input->gap_open; sw_optarg.gap_extend = input->gap_extend; sw_optarg.subst_matrix['A']['A'] = input->match; sw_optarg.subst_matrix['C']['A'] = input->mismatch; sw_optarg.subst_matrix['T']['A'] = input->mismatch; sw_optarg.subst_matrix['G']['A'] = input->mismatch; sw_optarg.subst_matrix['A']['C'] = input->mismatch; sw_optarg.subst_matrix['C']['C'] = input->match; sw_optarg.subst_matrix['T']['C'] = input->mismatch; sw_optarg.subst_matrix['G']['C'] = input->mismatch; sw_optarg.subst_matrix['A']['G'] = input->mismatch; sw_optarg.subst_matrix['C']['T'] = input->mismatch; sw_optarg.subst_matrix['T']['T'] = input->match; sw_optarg.subst_matrix['G']['T'] = input->mismatch; sw_optarg.subst_matrix['A']['T'] = input->mismatch; sw_optarg.subst_matrix['C']['G'] = input->mismatch; sw_optarg.subst_matrix['T']['G'] = input->mismatch; sw_optarg.subst_matrix['G']['G'] = input->match; */ sw_multi_output_t *output = sw_multi_output_new(sw_total); char *q[sw_total], *r[sw_total]; uint8_t strands[sw_total], chromosomes[sw_total]; size_t starts[sw_total]; size_t sw_count = 0, read_indices[sw_total]; int read_len; // debugging: to kown how many reads are not mapped by SW score // int unmapped_by_score[fq_batch->num_reads]; // memset(unmapped_by_score, 0, fq_batch->num_reads * sizeof(int)); // printf("num of sw to do: %i\n", sw_total); // initialize query and reference sequences to Smith-Waterman for (size_t i = 0; i < num_seqs; i++) { index = batch->targets[i]; cal_list = batch->mapping_lists[index]; num_cals = array_list_size(cal_list); // printf("sw_server: read #%i with %i cals\n", index, num_cals); // processing each CAL from this read for(size_t j = 0; j < num_cals; j++) { // get cal and read index cal = array_list_get(j, cal_list); read_indices[sw_count] = index; // query sequence, revcomp if necessary read_len = fq_batch->data_indices[index + 1] - fq_batch->data_indices[index]; q[sw_count] = (char *) calloc((read_len + 1), sizeof(char)); memcpy(q[sw_count], &(fq_batch->seq[fq_batch->data_indices[index]]), read_len); if (cal->strand == 1) { seq_reverse_complementary(q[sw_count], read_len); } //q[sw_count] = &(fq_batch->seq[fq_batch->data_indices[index]]); // reference sequence //printf("\tSW: %d.[chromosome:%d]-[strand:%d]-[start:%d, end:%d]\n", j, cal->chromosome_id, cal->strand, cal->start, cal->end); start = cal->start - flank_length; end = cal->end + flank_length; r[sw_count] = calloc(1, end - start + 2); genome_read_sequence_by_chr_index(r[sw_count], cal->strand, cal->chromosome_id - 1, &start, &end, genome); // save some stuff, we'll use them after... strands[sw_count] = cal->strand; chromosomes[sw_count] = cal->chromosome_id; starts[sw_count] = start; // printf("read #%i (sw #%i): query: %s (%i)\nref : %s (%i)\n\n", index, sw_count, q[sw_count], strlen(q[sw_count]), r[sw_count], strlen(r[sw_count])); // increase counter sw_count++; } // free cal_list array_list_free(cal_list, (void *)cal_free); batch->mapping_lists[index] = NULL; } // run Smith-Waterman // printf("before smith_waterman: number of sw = %i\n", sw_total); smith_waterman_mqmr(q, r, sw_total, sw_optarg, 1, output); // printf("after smith_waterman\n"); /* // debugging { FILE *fd = fopen("sw.out", "w"); sw_multi_output_save(sw_total, output, fd); fclose(fd); } */ size_t num_targets = 0; // filter alignments by min_score for (size_t i = 0; i < sw_total; i++) { // score = output->score_p[i] / (strlen(output->query_map_p[i]) * input->match); // if (score >= min_score) { /* printf("--------------------------------------------------------------\n"); printf("Smith-Waterman results:\n"); printf("id\t%s\n", &(batch->fq_batch->header[batch->fq_batch->header_indices[read_indices[i]]])); printf("ref\n%s\n", r[i]); printf("query\n%s\n", q[i]); printf("map\n%s\n", output->ref_map_p[i]); printf("ref: chr = %d, strand = %d, start = %d, len = %d\n", chromosomes[i], strands[i], starts[i], strlen(r[i])); printf("query-map-start = %d, ref-map-start = %d\n", output->query_start_p[i], output->ref_start_p[i]); printf("score = %0.2f (min. score = %0.2f)\n", output->score_p[i], min_score); printf("--------------------------------------------------------------\n"); */ if (output->score_p[i] >= min_score) { // valid mappings, //insert in the list for further processing index = read_indices[i]; if (batch->mapping_lists[index] == NULL) { mapping_list = array_list_new(1000, 1.25f, COLLECTION_MODE_ASYNCHRONIZED); array_list_set_flag(0, mapping_list); batch->mapping_lists[index] = mapping_list; batch->targets[num_targets++] = index; } sw_output = sw_output_new(strands[i], chromosomes[i], starts[i], strlen(r[i]), strlen(output->query_map_p[i]), output->query_start_p[i], output->ref_start_p[i], output->score_p[i], score, output->query_map_p[i], output->ref_map_p[i]); array_list_insert(sw_output, mapping_list); batch->num_to_do++; // debugging //unmapped_by_score[index] = 1; } // free query and reference free(q[i]); free(r[i]); } batch->num_targets = num_targets; /* // debugging for (size_t i = 0; i < fq_batch->num_reads; i++) { if (unmapped_by_score[i] == 0) { unmapped_by_score_counter[tid]++; //printf("by score: %s\n", &(batch->fq_batch->header[batch->fq_batch->header_indices[index]])); } } */ // update counter thr_sw_items[tid] += sw_count; // free sw_multi_output_free(output); // printf("END: apply_sw, (%d Smith-Waterman, %d valids)\n", total, valids); }