예제 #1
0
vcf_batch_t* vcf_batch_new(size_t size) {
    vcf_batch_t *vcf_batch = calloc (1, sizeof(vcf_batch_t));
    vcf_batch->text = NULL;
    if (size > 0) {
        vcf_batch->records = array_list_new(size, 1.2, COLLECTION_MODE_ASYNCHRONIZED);
    } else {
        vcf_batch->records = array_list_new(100, 1.2, COLLECTION_MODE_ASYNCHRONIZED);
    }
    
    return vcf_batch;
}
/*
old key path: key1_path, new key path: key2_path

if key1_path == key2_path, if either type or cb is different , add a new item. or do noting
if key1_path is the parent of key2_path, if either type or cb is different , add a new item. or do nothing.
if key1_path is the child of key2 path, a. either type or cb is different, add a new item or replace the item with key1_path using item with key2_path

==>

1. either type or cb is different, add a new item
2. if both type anc cb are the same one, check key1_path and key2_path, if key1_path is the child of key2_path, replace key1_path using key2_path

*/
INT32 tg_shared_preferences_register(const CHAR* path,const CHAR* keys,SharedPreferences_Notification_Callback cb,SharedPreferences_WRITE_TYPE type)
{


    INT32* idx_list = NULL;
    INT32 idx=0;
    CHAR* normalize_key = NULL;
    Shared_Preferences_Register_Item* item = NULL;
    INT32 ret = SharedPreferences_SUCC;//SharedPreferences_ERROR;
    //sem_wait (&s_shared_preferences_sem);
    tg_os_WaitSemaphore(s_shared_preferences_sem);
    if (s_shared_preferences_register_list.list == NULL)
    {
        s_shared_preferences_register_list.list = array_list_new(tg_shared_preferences_register_free);
    }
    //sem_post (&s_shared_preferences_sem);
    tg_os_SignalSemaphore(s_shared_preferences_sem);

    return_val_if_fail(path,SharedPreferences_PATH_ERROR);

    normalize_key = tg_shared_preferences_normalize_keys(keys);
    return_val_if_fail(normalize_key,SharedPreferences_ERROR);
    item = tg_shared_preferences_find_register_item(path,normalize_key,cb,type,&idx);
    
    if (!item)   //found at least one item
    {
        tg_shared_preferences_add_new_register_item(path,normalize_key,cb,type);

    }
    TG_FREE(normalize_key);
#ifdef SharedPreferences_DEBUG
     tg_shared_preferences_travel_register_list();
#endif
    return ret;
}
예제 #3
0
vcf_header_entry_t* vcf_header_entry_new() {
    vcf_header_entry_t *entry = (vcf_header_entry_t*) malloc (sizeof(vcf_header_entry_t));
    entry->name = NULL;
    entry->name_len = 0;
    entry->values = array_list_new(4, 1.5, COLLECTION_MODE_ASYNCHRONIZED);
    return entry;
}
예제 #4
0
파일: import.c 프로젝트: jamie-pate/jstruct
struct jstruct_result
_jstruct_import(struct json_object *obj, const void *data,
        const struct jstruct_object_property *properties, struct json_object *errors) {
    _init_importers();
    if (errors != NULL && json_object_get_type(errors) != json_type_array) {
        return jstruct_error_new(jstruct_error_errors_not_array_or_null, NULL, json_object_get_type(errors));
    }
    const struct jstruct_object_property *property;
    struct json_object *prop;
    struct jstruct_result result = JSTRUCT_OK;
    result.allocated = array_list_new(jstruct_allocated_free);
    for (property = properties; property->name; ++property) {
        void *ptr = jstruct_prop_ptr(data, property, JSTRUCT_PROP_PTR_GET_NO_DEREF);
        struct jstruct_result err;
        if (json_object_object_get_ex(obj, property->name, &prop)) {
            if (json_object_get_type(prop) != property->type.json) {
                err = jstruct_error_new(jstruct_error_incorrect_type, property->name, json_object_get_type(prop));
            } else {
                jstruct_import_importer import = importers[json_type_index(property->type.json)];
                err = import(prop, data, ptr, property);
            }
        } else {
            if (!set_null(ptr, property)) {
                err = jstruct_error_array_add(errors, jstruct_error_not_nullable, property->name, 0);
            }
        }
        jstruct_error_consume(&result, &err, errors, property->name, -1);
    }
    if (result.allocated->length == 0) {
        array_list_free(result.allocated);
        result.allocated = NULL;
    }
    return result;
}
예제 #5
0
json_object * tg_shared_preferences_find_parent_of_leaf(SharedPreferences* thiz,const CHAR* key_path,CHAR** leaf_key)
{
    struct json_object *jso=NULL;
    struct array_list* key_list = NULL;
    INT32 key_list_len = 0;
    INT32 idx = 0;
    return_val_if_fail((thiz&&key_path),NULL);
    key_list = array_list_new(tg_shared_preferences_key_free);
    return_val_if_fail((key_list),NULL);
    return_val_if_fail(tg_shared_preferences_parse_keypath(key_path,key_list,&key_list_len),NULL);
    for (jso=thiz->obj; idx<key_list_len-1; idx++)
    {
        jso = json_object_object_get(jso,(CHAR*)array_list_get_idx(key_list,idx));
        if (jso==NULL)
            break;

    }
    if (jso!=NULL)
    {
        CHAR* key = (CHAR*)array_list_get_idx(key_list,key_list_len-1);
        //ASSERT(key);
        *leaf_key = TG_CALLOC((strlen(key)+1),1);
        strcpy(*leaf_key,key);
    }
    array_list_free(key_list);
    return jso;

}
예제 #6
0
void workflow_set_stages(int num_stages, workflow_stage_function_t *functions, 
			 char **labels, workflow_t *wf) {
     
     if (functions && wf) {
	  pthread_mutex_lock(&wf->main_mutex);
	  
	  wf->num_stages = num_stages;
	  wf->stage_functions = functions;

	  wf->stage_times = (double *) calloc(num_stages, sizeof(double));

	  wf->stage_times_mutex = (pthread_mutex_t *) calloc(num_stages, sizeof(pthread_mutex_t));     
	  for (int i = 0; i < num_stages; i++) {
	    pthread_mutex_init(&wf->stage_times_mutex[i], NULL);
	  }

	  wf->pending_items = (array_list_t **) calloc(num_stages, sizeof(array_list_t *));
	  
	  if (labels) wf->stage_labels = (char **) calloc(num_stages, sizeof(char *));
	  
	  for (int i = 0; i < num_stages; i++) {
	       wf->pending_items[i] = array_list_new(100, 1.25f, COLLECTION_MODE_ASYNCHRONIZED);
	       if (labels && labels[i]) wf->stage_labels[i] = strdup(labels[i]);
	  }
	  
	  pthread_mutex_unlock(&wf->main_mutex);
     }
}
예제 #7
0
파일: vcf_filters.c 프로젝트: mrG7/hpg-libs
array_list_t *snp_filter(array_list_t *input_records, array_list_t *failed, variant_stats_t **input_stats, char *filter_name, void *f_args) {
    assert(input_records);
    assert(failed);
    
    array_list_t *passed = array_list_new(input_records->size + 1, 1, COLLECTION_MODE_ASYNCHRONIZED);
    size_t filter_name_len = strlen(filter_name);

    int include_snps = ((snp_filter_args*)f_args)->include_snps;

    LOG_DEBUG_F("snp_filter (preserve SNPs = %d) over %zu records\n", include_snps, input_records->size);
    vcf_record_t *record;
    for (int i = 0; i < input_records->size; i++) {
        record = input_records->items[i];
        if (record->id_len == 1 && strncmp(".", record->id, 1) == 0) {
            if (include_snps) {
                annotate_failed_record(filter_name, filter_name_len, record);
                array_list_insert(record, failed);
            } else {
                array_list_insert(record, passed);
            }
        } else {
            if (include_snps) {
                array_list_insert(record, passed);
            } else {
                annotate_failed_record(filter_name, filter_name_len, record);
                array_list_insert(record, failed);
            }
        }
    }

    return passed;
}
예제 #8
0
파일: vcf_filters.c 프로젝트: mrG7/hpg-libs
array_list_t *indel_filter(array_list_t *input_records, array_list_t *failed, variant_stats_t **input_stats, char *filter_name, void *f_args) {
    assert(input_records);
    assert(failed);
    
    array_list_t *passed = array_list_new(input_records->size + 1, 1, COLLECTION_MODE_ASYNCHRONIZED);
    size_t filter_name_len = strlen(filter_name);

    int include_indels = ((indel_filter_args*)f_args)->include_indels;

    LOG_DEBUG_F("indel_filter (preserve indels = %d) over %zu records\n", include_indels, input_records->size);
    vcf_record_t *record;
    variant_stats_t *variant_stats;
    for (int i = 0; i < input_records->size; i++) {
        record = input_records->items[i];
        variant_stats = input_stats[i];
        
        if (variant_stats->is_indel) {
            if (include_indels) {
                array_list_insert(record, passed);
            } else {
                annotate_failed_record(filter_name, filter_name_len, record);
                array_list_insert(record, failed);
            }
        } else {
            if (include_indels) {
                annotate_failed_record(filter_name, filter_name_len, record);
                array_list_insert(record, failed);
            } else {
                array_list_insert(record, passed);
            }
        }
    }

    return passed;
}
예제 #9
0
파일: vcf_filters.c 프로젝트: mrG7/hpg-libs
array_list_t* mendelian_errors_filter(array_list_t* input_records, array_list_t *failed, variant_stats_t **input_stats, char *filter_name, void* args) {
    assert(input_records);
    assert(failed);
    
    array_list_t *passed = array_list_new(input_records->size + 1, 1, COLLECTION_MODE_ASYNCHRONIZED);
    size_t filter_name_len = strlen(filter_name);

    int max_errors = ((mendelian_errors_filter_args*) args)->max_mendelian_errors;
    float allele_count;

    variant_stats_t *variant_stats;
    // The stats returned by get_variants_stats are related to a record in the same
    // position of the input_records list, so when a variant_stats_t fulfills the condition,
    // it means the related vcf_record_t passes the filter
    vcf_record_t *record;
    for (int i = 0; i < input_records->size; i++) {
        record = input_records->items[i];
        variant_stats = input_stats[i];
        allele_count = 0;
        
        if (variant_stats->mendelian_errors <= max_errors) {
            array_list_insert(record, passed);
        } else {
            annotate_failed_record(filter_name, filter_name_len, record);
            array_list_insert(record, failed);
        }
    }
    
    return passed;
}
예제 #10
0
파일: vcf_filters.c 프로젝트: mrG7/hpg-libs
array_list_t* maf_filter(array_list_t* input_records, array_list_t *failed, variant_stats_t **input_stats, char *filter_name, void* args) {
    assert(input_records);
    assert(failed);
    
    array_list_t *passed = array_list_new(input_records->size + 1, 1, COLLECTION_MODE_ASYNCHRONIZED);
    size_t filter_name_len = strlen(filter_name);

    float min_maf = ((maf_filter_args*) args)->min_maf;
    float record_maf = 1.0;

    variant_stats_t *variant_stats;
    // The stats returned by get_variants_stats are related to a record in the same
    // position of the input_records list, so when a variant_stats_t fulfills the condition,
    // it means the related vcf_record_t passes the filter
    vcf_record_t *record;
    for (int i = 0; i < input_records->size; i++) {
        record = input_records->items[i];
        variant_stats = input_stats[i];
        
        record_maf = 1.0;
        for (int j = 0; j < variant_stats->num_alleles; j++) {
            record_maf = fmin(record_maf, variant_stats->alleles_freq[j]);
        }
        
        if (record_maf >= min_maf) {
            array_list_insert(record, passed);
        } else {
            annotate_failed_record(filter_name, filter_name_len, record);
            array_list_insert(record, failed);
        }
    }
    
    return passed;
}
예제 #11
0
파일: gff_batch.c 프로젝트: mrG7/hpg-libs
gff_batch_t* gff_batch_new(size_t size) {
    gff_batch_t *gff_batch = malloc(sizeof(gff_batch_t));
    gff_batch->text = NULL;
    
    if (size < 1) {
        size = 100;
    }
    gff_batch->records = array_list_new(size, 1.4, COLLECTION_MODE_ASYNCHRONIZED);
    
    return gff_batch;
}
void *fastq_reader(void *input) {
     struct timeval start, end;
     double time;
     extern size_t fd_read_bytes;
     size_t read_bytes;
     //if (time_on) { start_timer(start); }

     wf_input_t *wf_input = (wf_input_t *) input;
     batch_t *new_batch = NULL;
     batch_t *batch = wf_input->batch;
     fastq_batch_reader_input_t *fq_reader_input = wf_input->fq_reader_input;
     array_list_t *reads = array_list_new(10000, 1.25f, COLLECTION_MODE_ASYNCHRONIZED);

     if (fq_reader_input->gzip) {
       //Gzip fastq file
       if (fq_reader_input->flags == SINGLE_END_MODE) {
	 fastq_gzread_bytes_se(reads, fq_reader_input->batch_size, fq_reader_input->fq_gzip_file1);
       } else {
	 //printf("Gzip Reader for pair-end not implemented\n");;
	 fastq_gzread_bytes_pe(reads, fq_reader_input->batch_size, fq_reader_input->fq_gzip_file1, fq_reader_input->fq_gzip_file2);
	 //fastq_fread_bytes_aligner_pe(reads, fq_reader_input->batch_size, 
	 //		      fq_reader_input->fq_gzip_file1, fq_reader_input->fq_gzip_file2);
       }
     } else {
       //Fastq file
       if (fq_reader_input->flags == SINGLE_END_MODE) {
	 read_bytes = fastq_fread_bytes_se(reads, fq_reader_input->batch_size, fq_reader_input->fq_file1);
       } else {
	 read_bytes = fastq_fread_bytes_aligner_pe(reads, fq_reader_input->batch_size, 
				      fq_reader_input->fq_file1, fq_reader_input->fq_file2);
       }
       fd_read_bytes += read_bytes;
     }

     size_t num_reads = array_list_size(reads);

     if (num_reads == 0) {
	  array_list_free(reads, (void *)fastq_read_free);
     } else {
	  mapping_batch_t *mapping_batch = mapping_batch_new(reads, 
							     batch->pair_input->pair_mng);

	  new_batch = batch_new(batch->bwt_input, batch->region_input, batch->cal_input, 
				batch->pair_input, batch->preprocess_rna, batch->sw_input, batch->writer_input, 
				batch->mapping_mode, mapping_batch);
     }

     //if (time_on) { stop_timer(start, end, time); timing_add(time, FASTQ_READER, timing); }
     //printf("Read batch %i\n", num_reads);
     
     return new_batch;
}
BOOL tg_shared_preferences_lock(const CHAR* path,BOOL lock)
{

    INT32 i = 0;
    INT32 len = 0;
    struct array_list* list = NULL;
    INT32 idx = -1;
    //sem_wait (&s_shared_preferences_sem);
    tg_os_WaitSemaphore(s_shared_preferences_sem);
    if (s_shared_preferences_lock_list.lock_list == NULL)
    {
        s_shared_preferences_lock_list.lock_list = array_list_new(tg_shared_preferences_lock_free);
    }
    //sem_post (&s_shared_preferences_sem);
    tg_os_SignalSemaphore(s_shared_preferences_sem);
    list = s_shared_preferences_lock_list.lock_list;
    return_val_if_fail(path,FALSE);
    return_val_if_fail(list,FALSE);
    idx = tg_shared_preferences_find_lock_path(path);
    if (idx>=0)
    {
        if (!lock)
        {
            //sem_wait (&s_shared_preferences_sem);
            tg_os_WaitSemaphore(s_shared_preferences_sem);
            array_list_put_idx(list, idx, NULL);
            //sem_post (&s_shared_preferences_sem);
            tg_os_SignalSemaphore(s_shared_preferences_sem);
        }
        return TRUE;
    }

    else
    {

        if (lock)
        {
            CHAR* lock_path = TG_CALLOC((strlen(path)+1),1);
            strcpy(lock_path,path);
            //sem_wait (&s_shared_preferences_sem);
            tg_os_WaitSemaphore(s_shared_preferences_sem);
            array_list_put_idx(list, tg_shared_preferences_get_first_free_slot(s_shared_preferences_lock_list.lock_list), (void*)lock_path);
            //sem_post (&s_shared_preferences_sem);
            tg_os_SignalSemaphore(s_shared_preferences_sem);
        }

        return TRUE;
    }
}
예제 #14
0
array_list_t *region_filter(array_list_t *input_records, array_list_t *failed, variant_stats_t **input_stats, char *filter_name, void *f_args) {
    assert(input_records);
    assert(failed);
    
    array_list_t *passed = array_list_new(input_records->size + 1, 1, COLLECTION_MODE_ASYNCHRONIZED);
    size_t filter_name_len = strlen(filter_name);

    region_filter_args *args = (region_filter_args*) f_args;
    region_table_t *regions = args->regions;

    LOG_DEBUG_F("region_filter over %zu records\n", input_records->size);

    vcf_record_t *record;
    region_t *region = (region_t*) malloc (sizeof(region_t));
    for (int i = 0; i < input_records->size; i++) {
        record = input_records->items[i];
        
//         LOG_DEBUG_F("record = %s, %ld\n", record->chromosome, record->position);
        
        region->chromosome = strndup(record->chromosome, record->chromosome_len);
        region->start_position = record->position;
        region->end_position = record->position;
        
        int found = 0;
        if (args->type) {
            region->type = args->type;
            found = find_region_by_type(region, regions);
        } else {
            found = find_region(region, regions);
        }
        
        if (found) {
            // Add to the list of records that pass all checks for at least one region
            array_list_insert(record, passed);
//             LOG_DEBUG_F("%.*s, %ld passed\n", record->chromosome_len, record->chromosome, record->position);
        } else {
            // Add to the list of records that fail all checks for all regions
            annotate_failed_record(filter_name, filter_name_len, record);
            array_list_insert(record, failed);
        }
        
        free(region->chromosome);
    }

    free(region);

    return passed;
}
예제 #15
0
파일: vcf_filters.c 프로젝트: mrG7/hpg-libs
array_list_t *inheritance_pattern_filter(array_list_t *input_records, array_list_t *failed, variant_stats_t **input_stats, char *filter_name, void *f_args) {
    assert(input_records);
    assert(failed);
    
    array_list_t *passed = array_list_new(input_records->size + 1, 1, COLLECTION_MODE_ASYNCHRONIZED);
    size_t filter_name_len = strlen(filter_name);

    enum inheritance_pattern pattern = ((inheritance_pattern_filter_args*)f_args)->pattern;
    float min_following_pattern = ((inheritance_pattern_filter_args*)f_args)->min_following_pattern;
    
    if (pattern == DOMINANT) {
        LOG_DEBUG_F("inheritance_pattern_filter (dominant in %.2f% of samples) over %zu records\n", 
                    min_following_pattern * 100, input_records->size);
    } else {
        LOG_DEBUG_F("inheritance_pattern_filter (recessive in %.2f% of samples) over %zu records\n", 
                    min_following_pattern * 100, input_records->size);
    }
    
    vcf_record_t *record;
    variant_stats_t *stats;
    for (int i = 0; i < input_records->size; i++) {
        record = input_records->items[i];
        stats = input_stats[i];
        
        if (pattern == DOMINANT) {
            if (stats->cases_percent_dominant >= min_following_pattern &&
                stats->controls_percent_dominant >= min_following_pattern) {
                array_list_insert(record, passed);
            } else {
                annotate_failed_record(filter_name, filter_name_len, record);
                array_list_insert(record, failed);
            }
        } else if (pattern == RECESSIVE) {
            if (stats->cases_percent_recessive >= min_following_pattern &&
                   stats->controls_percent_recessive >= min_following_pattern) {
                array_list_insert(record, passed);
            } else {
                annotate_failed_record(filter_name, filter_name_len, record);
                array_list_insert(record, failed);
            }
        }
    }

    return passed;
}
예제 #16
0
int insert_position_read(char key[64], vcf_record_file_link* link, kh_pos_t* positions_read) {
    int ret;
    array_list_t *records_in_position;
    khiter_t iter = kh_get(pos, positions_read, key);
    if (iter != kh_end(positions_read)) {
        records_in_position = kh_value(positions_read, iter);
        ret = array_list_insert(link, records_in_position);
    } else {
        records_in_position = array_list_new(8, 1.5, COLLECTION_MODE_SYNCHRONIZED);
        ret = array_list_insert(link, records_in_position);
        iter = kh_put(pos, positions_read, strdup(key), &ret);
        if (ret) {
            kh_value(positions_read, iter) = records_in_position;
        }
    }
    
    return ret;
}
예제 #17
0
void *sa_fq_reader(void *input) {
  sa_wf_input_t *wf_input = (sa_wf_input_t *) input;
  
  sa_wf_batch_t *new_wf_batch = NULL;
  sa_wf_batch_t *curr_wf_batch = wf_input->wf_batch;
  
  fastq_batch_reader_input_t *fq_reader_input = wf_input->fq_reader_input;
  array_list_t *reads = array_list_new(fq_reader_input->batch_size, 1.25f, COLLECTION_MODE_ASYNCHRONIZED);

  if (fq_reader_input->gzip) {
    // Gzip fastq file
    if (fq_reader_input->flags == SINGLE_END_MODE) {
      fastq_gzread_bytes_se(reads, fq_reader_input->batch_size, fq_reader_input->fq_gzip_file1);
    } else {
      fastq_gzread_bytes_pe(reads, fq_reader_input->batch_size, fq_reader_input->fq_gzip_file1, fq_reader_input->fq_gzip_file2);
    }
  } else {
    // Fastq file
    if (fq_reader_input->flags == SINGLE_END_MODE) {
      fastq_fread_bytes_se(reads, fq_reader_input->batch_size, fq_reader_input->fq_file1);
    } else {
      fastq_fread_bytes_aligner_pe(reads, fq_reader_input->batch_size, 
				   fq_reader_input->fq_file1, fq_reader_input->fq_file2);
    }
  }
  
  size_t num_reads = array_list_size(reads);
  
  if (num_reads == 0) {
    array_list_free(reads, (void *)fastq_read_free);
  } else {
    sa_mapping_batch_t *sa_mapping_batch = sa_mapping_batch_new(reads);
    sa_mapping_batch->bam_format = wf_input->bam_format;

    new_wf_batch = sa_wf_batch_new(curr_wf_batch->options,
				   curr_wf_batch->sa_index,
				   curr_wf_batch->writer_input, 
				   sa_mapping_batch,
				   NULL);
  }

  return new_wf_batch;

}
예제 #18
0
static void report_vcf_variant_stats_sqlite3(sqlite3 *db, int num_variants, variant_stats_t **stats_batch) {
    array_list_t *fields = array_list_new(num_variants + 1, 1.1, COLLECTION_MODE_ASYNCHRONIZED);

    variant_stats_t *var_stats;
    for (int i = 0; i < num_variants; i++) {
        var_stats = stats_batch[i];
        variant_stats_db_fields_t *f = variant_stats_db_fields_new(var_stats->chromosome, var_stats->position, var_stats->ref_allele, var_stats->alt_alleles,
                                       var_stats->maf_allele, var_stats->maf, var_stats->mgf_genotype, var_stats->mgf,
                                       var_stats->missing_alleles, var_stats->missing_genotypes,
                                       var_stats->mendelian_errors, var_stats->is_indel,
                                       var_stats->cases_percent_dominant, var_stats->controls_percent_dominant,
                                       var_stats->cases_percent_recessive, var_stats->controls_percent_recessive);

        array_list_insert(f, fields);
    }

    insert_variant_stats_db_fields_list(fields, db);

    array_list_free(fields, (void *)variant_stats_db_fields_free);
}
예제 #19
0
workflow_t *workflow_new() {
     workflow_t *wf = calloc(1, sizeof(workflow_t));

     wf->num_threads = 0;
     wf->max_num_work_items = 0;

     wf->num_stages = 0;
     wf->completed_producer = 0;
     
     wf->num_pending_items = 0;
     
     wf->running_producer = 0;
     wf->running_consumer = 0;
     
     pthread_mutex_init(&wf->producer_mutex, NULL);
     pthread_mutex_init(&wf->consumer_mutex, NULL);
     
     pthread_mutex_init(&wf->main_mutex, NULL);

     wf->workflow_time = 0;
     wf->producer_time = 0;
     wf->consumer_time = 0;
     wf->stage_times = NULL;
     
     wf->pending_items = NULL;
     wf->completed_items = array_list_new(100, 1.25f, COLLECTION_MODE_ASYNCHRONIZED);
     
     wf->stage_functions = NULL;
     wf->stage_labels = NULL;

     wf->producer_function = NULL;
     wf->producer_label = NULL;
     
     wf->consumer_function = NULL;
     wf->consumer_label = NULL;
     
     wf->complete_extra_stage = 1;
     //wf->status_function = workflow_get_status_;

     return wf;
}
예제 #20
0
파일: vcf_filters.c 프로젝트: mrG7/hpg-libs
array_list_t* coverage_filter(array_list_t* input_records, array_list_t *failed, variant_stats_t **input_stats, char *filter_name, void* f_args) {
    assert(input_records);
    assert(failed);
    
    array_list_t *passed = array_list_new(input_records->size + 1, 1, COLLECTION_MODE_ASYNCHRONIZED);
    size_t filter_name_len = strlen(filter_name);

    int min_coverage = ((coverage_filter_args*)f_args)->min_coverage;

    LOG_DEBUG_F("coverage_filter (min coverage = %d) over %zu records\n", min_coverage, input_records->size);
    char *aux_buffer = (char*) calloc (128, sizeof(char));
    vcf_record_t *record;
    for (int i = 0; i < input_records->size; i++) {
        record = input_records->items[i];
        
        if (record->info_len > strlen(aux_buffer)) {
            aux_buffer = realloc (aux_buffer, record->info_len+1);
            memset(aux_buffer, 0, (record->info_len+1) * sizeof(char));
        }
        
        strncpy(aux_buffer, record->info, record->info_len);
        
        char *record_coverage = get_field_value_in_info("DP", aux_buffer);
        if (record_coverage != NULL && is_numeric(record_coverage)) {
            if (atoi(record_coverage) >= min_coverage) {
                array_list_insert(record, passed);
            } else {
                annotate_failed_record(filter_name, filter_name_len, record);
                array_list_insert(record, failed);
            }
        } else {
            annotate_failed_record(filter_name, filter_name_len, record);
            array_list_insert(record, failed);
        }
        
    }

    free(aux_buffer);
    return passed;
}
예제 #21
0
/*
require interface
*/
json_object * tg_shared_preferences_find_leaf_obj(SharedPreferences* thiz,const CHAR* key_path)
{
    struct json_object *jso=NULL;
    struct array_list* key_list = NULL;
    INT32 key_list_len = 0;
    INT32 idx = 0;

    return_val_if_fail((thiz&&key_path),NULL);
    if (strcmp(key_path,"/")==0)
        return thiz->obj;
    key_list = array_list_new(tg_shared_preferences_key_free);
    return_val_if_fail((key_list),NULL);
    return_val_if_fail(tg_shared_preferences_parse_keypath(key_path,key_list,&key_list_len),NULL);
    for (jso=thiz->obj; idx<key_list_len &&jso; idx++)
    {
        jso = json_object_object_get(jso,(CHAR*)array_list_get_idx(key_list,idx));

    }
    array_list_free(key_list);
    return jso;

}
예제 #22
0
파일: vcf_filters.c 프로젝트: mrG7/hpg-libs
array_list_t* quality_filter(array_list_t* input_records, array_list_t *failed, variant_stats_t **input_stats, char *filter_name, void* f_args) {
    assert(input_records);
    assert(failed);
    
    array_list_t *passed = array_list_new(input_records->size + 1, 1, COLLECTION_MODE_ASYNCHRONIZED);
    size_t filter_name_len = strlen(filter_name);

    int min_quality = ((quality_filter_args*)f_args)->min_quality;

    LOG_DEBUG_F("quality_filter (min quality = %d) over %zu records\n", min_quality, input_records->size);
    vcf_record_t *record;
    for (int i = 0; i < input_records->size; i++) {
        record = input_records->items[i];
        if (record->quality >= min_quality) {
            array_list_insert(record, passed);
        } else {
            annotate_failed_record(filter_name, filter_name_len, record);
            array_list_insert(record, failed);
        }
    }

    return passed;
}
예제 #23
0
vcf_record_t *vcf_record_copy(vcf_record_t *orig) {
    vcf_record_t *record = (vcf_record_t*) calloc (1, sizeof(vcf_record_t));
    record->chromosome = strndup(orig->chromosome, orig->chromosome_len);
    record->chromosome_len = orig->chromosome_len;
    record->position = orig->position;
    record->id = strndup(orig->id, orig->id_len);
    record->id_len = orig->id_len;
    record->reference = strndup(orig->reference, orig->reference_len);
    record->reference_len = orig->reference_len;
    record->alternate = strndup(orig->alternate, orig->alternate_len);
    record->alternate_len = orig->alternate_len;
    record->filter = strndup(orig->filter, orig->filter_len);
    record->filter_len = orig->filter_len;
    record->info = strndup(orig->info, orig->info_len);
    record->info_len = orig->info_len;
    record->format = strndup(orig->format, orig->format_len);
    record->format_len = orig->format_len;
    record->samples = array_list_new(orig->samples->size + 1, 1.5, COLLECTION_MODE_ASYNCHRONIZED);
    for (int i = 0; i < orig->samples->size; i++) {
        array_list_insert(strdup(array_list_get(i, orig->samples)), record->samples);
    }
    return record;
}
예제 #24
0
array_list_t* missing_values_filter(array_list_t* input_records, array_list_t *failed, variant_stats_t **input_stats, char *filter_name, void* args) {
    assert(input_records);
    assert(failed);
    
    array_list_t *passed = array_list_new(input_records->size + 1, 1, COLLECTION_MODE_ASYNCHRONIZED);
    size_t filter_name_len = strlen(filter_name);

    float max_missing = ((missing_values_filter_args*) args)->max_missing;
    float record_missing;
    float allele_count;

    list_item_t *stats_item = NULL;
    variant_stats_t *variant_stats;
    // The stats returned by get_variants_stats are related to a record in the same
    // position of the input_records list, so when a variant_stats_t fulfills the condition,
    // it means the related vcf_record_t passes the filter
    vcf_record_t *record;
    for (int i = 0; i < input_records->size; i++) {
        record = input_records->items[i];
        variant_stats = input_stats[i];
        allele_count = 0;
        
        for (int j = 0; j < variant_stats->num_alleles; j++) {
            allele_count += variant_stats->alleles_count[j];
        }
        record_missing = variant_stats->missing_alleles / (allele_count + variant_stats->missing_alleles);
        
        if (record_missing <= max_missing) {
            array_list_insert(record, passed);
        } else {
            annotate_failed_record(filter_name, filter_name_len, record);
            array_list_insert(record, failed);
        }
    }
    
    return passed;
}
예제 #25
0
파일: vcf_filters.c 프로젝트: mrG7/hpg-libs
array_list_t *variant_type_filter(array_list_t *input_records, array_list_t *failed, variant_stats_t **input_stats, char *filter_name, void *f_args) {
    assert(input_records);
    assert(failed);
    
    array_list_t *passed = array_list_new(input_records->size + 1, 1, COLLECTION_MODE_ASYNCHRONIZED);
    size_t filter_name_len = strlen(filter_name);

    enum variant_type type = ((variant_type_filter_args*)f_args)->type;

    LOG_DEBUG_F("variant_type_filter (variant_type  %d) over %zu records\n", type, input_records->size);
    vcf_record_t *record;
    for (int i = 0; i < input_records->size; i++) {
        record = input_records->items[i];
        
        if (record->type == type) {
            array_list_insert(record, passed);
        } else {
            annotate_failed_record(filter_name, filter_name_len, record);
            array_list_insert(record, failed);
        }
    }

    return passed;
}
예제 #26
0
size_t bwt_search_pair_anchors(array_list_t *list, unsigned int read_length) {
  bwt_anchor_t *bwt_anchor;
  int max_anchor_length = 0;
  

  bwt_anchor_t *bwt_anchor_back, *bwt_anchor_forw;
  int anchor_length_tmp, anchor_back, anchor_forw;
  int strand = 0, type = 0;
  int found_anchor = 0, found_double_anchor = 0;

  const int MIN_ANCHOR = 25;
  const int MIN_SINGLE_ANCHOR = 40;

  //const int MIN_DOUBLE_ANCHOR = MIN_ANCHOR*2;
  const int MAX_BWT_REGIONS = 50;
  const int MAX_BWT_ANCHOR_DISTANCE = 500000;

  array_list_t *anchor_list_tmp, *forward_anchor_list, *backward_anchor_list;
  cal_t *cal;
  int seed_size, gap_read, gap_genome;

  array_list_t *backward_anchor_list_0 = array_list_new(MAX_BWT_REGIONS, 1.25f, COLLECTION_MODE_ASYNCHRONIZED);
  array_list_t *forward_anchor_list_0 = array_list_new(MAX_BWT_REGIONS, 1.25f , COLLECTION_MODE_ASYNCHRONIZED);
  array_list_t *backward_anchor_list_1 = array_list_new(MAX_BWT_REGIONS, 1.25f, COLLECTION_MODE_ASYNCHRONIZED);
  array_list_t *forward_anchor_list_1 = array_list_new(MAX_BWT_REGIONS, 1.25f , COLLECTION_MODE_ASYNCHRONIZED);

  array_list_t *big_anchor_list = array_list_new(MAX_BWT_REGIONS, 1.25f , COLLECTION_MODE_ASYNCHRONIZED);

  //printf("Tot Anchors %i\n", array_list_size(list));
  for (int i = 0; i < array_list_size(list); i++) {
    bwt_anchor = array_list_get(i, list);
    if (bwt_anchor->strand == 1) {
      //printf("(-)bwt anchor %i:%lu-%lu (%i): \n", bwt_anchor->chromosome + 1, bwt_anchor->start, bwt_anchor->end, bwt_anchor->end - bwt_anchor->start + 1);
      if (bwt_anchor->type == FORWARD_ANCHOR) {
	array_list_insert(bwt_anchor, forward_anchor_list_1);
	//printf("FORW\n");
      } else {
	array_list_insert(bwt_anchor, backward_anchor_list_1);
	//printf("BACK\n");
      }
    } else {
      //printf("(+)bwt anchor %i:%lu-%lu (%i): \n", bwt_anchor->chromosome + 1, bwt_anchor->start, bwt_anchor->end, bwt_anchor->end - bwt_anchor->start + 1);
      if (bwt_anchor->type == FORWARD_ANCHOR) {
	array_list_insert(bwt_anchor, forward_anchor_list_0);
	//printf("FORW\n");
      } else {
	array_list_insert(bwt_anchor, backward_anchor_list_0);
	//printf("BACK\n");
      }
    }

    anchor_length_tmp = bwt_anchor->end - bwt_anchor->start + 1;
    if (anchor_length_tmp > MIN_SINGLE_ANCHOR && anchor_length_tmp > max_anchor_length) {
      max_anchor_length = anchor_length_tmp;
      found_anchor = 1;
      strand = bwt_anchor->strand;
      type = bwt_anchor->type;
    }
    
    if (read_length - anchor_length_tmp < 16) {
      array_list_insert(bwt_anchor, big_anchor_list);
    } 
    
  }
  
  array_list_clear(list, NULL);

  if (array_list_size(big_anchor_list) > 0) {
    for (int i = array_list_size(big_anchor_list) - 1; i >= 0; i--) {
      //printf("Insert cal %i\n", i);
      bwt_anchor = array_list_remove_at(i, big_anchor_list);
      size_t seed_size = bwt_anchor->end - bwt_anchor->start;

      if (bwt_anchor->type == FORWARD_ANCHOR) {
	cal = convert_bwt_anchor_to_CAL(bwt_anchor, 0, seed_size);
      } else {
	cal = convert_bwt_anchor_to_CAL(bwt_anchor, read_length - seed_size - 1, read_length - 1);
      }
      
      array_list_insert(cal, list);
    }
    array_list_set_flag(SINGLE_ANCHORS, list);
    
    goto exit;
  }

  for (int type = 1; type >= 0; type--) {
    if (!type) {
      forward_anchor_list = forward_anchor_list_1;
      backward_anchor_list = backward_anchor_list_1;
      //printf("Strand (+): %i-%i\n", array_list_size(forward_anchor_list), array_list_size(backward_anchor_list));
    } else { 
      forward_anchor_list = forward_anchor_list_0;
      backward_anchor_list = backward_anchor_list_0;
      //printf("Strand (-): %i-%i\n", array_list_size(forward_anchor_list), array_list_size(backward_anchor_list));
    }

    int *set_forward  = (int *)calloc(array_list_size(forward_anchor_list),  sizeof(int));
    int *set_backward = (int *)calloc(array_list_size(backward_anchor_list), sizeof(int));

    //Associate Anchors (+)/(-)
    for (int i = 0; i < array_list_size(forward_anchor_list); i++) { 
      if (set_forward[i]) { continue; }
      bwt_anchor_forw = array_list_get(i, forward_anchor_list);
      for (int j = 0; j < array_list_size(backward_anchor_list); j++) { 
	if (set_backward[j]) { continue; }
	bwt_anchor_back = array_list_get(j, backward_anchor_list);
	anchor_forw = (bwt_anchor_forw->end - bwt_anchor_forw->start + 1);
	anchor_back = (bwt_anchor_back->end - bwt_anchor_back->start + 1); 

	anchor_length_tmp = anchor_forw + anchor_back;

	//printf("\tCommpare %i:%lu-%lu with %i:%lu-%lu\n", bwt_anchor_forw->chromosome + 1, 
	//     bwt_anchor_forw->start, bwt_anchor_forw->end, bwt_anchor_back->chromosome + 1, 
	//     bwt_anchor_back->start, bwt_anchor_back->end);
	if (bwt_anchor_forw->chromosome == bwt_anchor_back->chromosome &&
	    abs(bwt_anchor_back->start - bwt_anchor_forw->end) <= MAX_BWT_ANCHOR_DISTANCE && 
	    anchor_forw >= MIN_ANCHOR && anchor_back >= MIN_ANCHOR) {
	  
	  if (bwt_anchor_back->start < bwt_anchor_forw->end) { continue; }
	  
	  gap_read = read_length - (anchor_forw + anchor_back);
	  gap_genome = bwt_anchor_back->start - bwt_anchor_forw->end;

	  //printf("anchor_forw = %i, anchor_back = %i, gap_read = %i, gap_genome = %i\n",
	  //	 anchor_forw, anchor_back, gap_read, gap_genome);
	  	  
	  int apply_flank = 0;
	  if (gap_read < 2 || gap_genome < 2) {
	    int gap;
	    if (gap_read < 0 && gap_genome < 0) {
	      gap = abs(gap_read) > abs(gap_genome) ? abs(gap_read) : abs(gap_genome);
	    } else if (gap_read < 0) {
	      gap = abs(gap_read);
	    } else if (gap_genome < 0) {
	      gap = abs(gap_genome);
	    } else {
	      gap = 2;
	    }
	    
	    int flank  = 5;
	    apply_flank = 1;
	    
	    if (abs(gap) >= flank*2) {
	      //Solve read overlap
	      flank = abs(gap)/2 + flank/2;
	    }
	    //printf("\tgap = %i, flank = %i\n", gap, flank);
	    if (flank >= anchor_forw) {
	      bwt_anchor_forw->end -= anchor_forw/2;	      
	    } else {
	      bwt_anchor_forw->end -= flank;
	    }

	    if (flank >= anchor_back) {
	      bwt_anchor_back->start += anchor_back/2;	    
	    } else {
	      bwt_anchor_back->start += flank;
	    }
	  } 
	  	  
	  cal = convert_bwt_anchor_to_CAL(bwt_anchor_forw, 0, bwt_anchor_forw->end - bwt_anchor_forw->start);
	  //printf("INSERT-1 (%i)[%i:%lu-%lu]\n", cal->strand, cal->chromosome_id, cal->start, cal->end);
	  array_list_insert(cal, list);
	  seed_size = bwt_anchor_back->end - bwt_anchor_back->start + 1;
	  //if (bwt_anchor_forw->end + read_length >= bwt_anchor_back->start) {	    
	  //seed_region_t *seed_region = seed_region_new(read_length - seed_size, read_length - 1,
	  //bwt_anchor_back->start, bwt_anchor_back->end, 1);
	  //cal->end = bwt_anchor_back->end;
	  //linked_list_insert_last(seed_region, cal->sr_list);	
	  //} else {
	  cal = convert_bwt_anchor_to_CAL(bwt_anchor_back, read_length - seed_size, read_length - 1);
	  //printf("INSERT-2 (%i)[%i:%lu-%lu]\n", cal->strand, cal->chromosome_id, cal->start, cal->end);
	  array_list_insert(cal, list);
	  if (array_list_size(list) > 5) { 
	    free(set_backward);
	    free(set_forward);	    
	    goto exit;
	  }

	  array_list_set_flag(DOUBLE_ANCHORS, list);
	  found_double_anchor = 1;
	  set_forward[i]  = 1;
	  set_backward[j] = 1;
	  break;
	}                                                                                                                      
      }         
    }
    free(set_backward);
    free(set_forward);
  }

  if (!found_double_anchor && found_anchor) { 
    //Not Double anchor found but one Yes!!
    if (strand == 1) {
      if (type == FORWARD_ANCHOR) {
	anchor_list_tmp = forward_anchor_list_1;
      } else {
	anchor_list_tmp =  backward_anchor_list_1;
      }
    } else {
      if (type == FORWARD_ANCHOR) {
	anchor_list_tmp =  forward_anchor_list_0;
      } else {
	anchor_list_tmp =  backward_anchor_list_0;
      }
    }

    //printf("LIST SIZE %i\n", array_list_size(anchor_list_tmp));
    for (int i = 0; i < array_list_size(anchor_list_tmp); i++) {
      bwt_anchor = array_list_get(i, anchor_list_tmp);
      size_t seed_size = bwt_anchor->end - bwt_anchor->start;
      //array_list_insert(bwt_anchor_new(bwt_anchor->strand, bwt_anchor->chromosome, 
      //			       bwt_anchor->start, bwt_anchor->end, bwt_anchor->type), anchor_list);
      if (bwt_anchor->type == FORWARD_ANCHOR) {
	//printf("------------------------> start %i\n", 0);
	cal = convert_bwt_anchor_to_CAL(bwt_anchor, 0, seed_size);
      } else {
	//printf("------------------------> start %i\n", read_length - seed_size);
	cal = convert_bwt_anchor_to_CAL(bwt_anchor, read_length - seed_size - 1, read_length - 1);
      }
      array_list_insert(cal, list);
    }
    array_list_set_flag(SINGLE_ANCHORS, list);
  } 

 exit:
  array_list_free(forward_anchor_list_1, (void *)bwt_anchor_free);
  array_list_free(backward_anchor_list_1,  (void *)bwt_anchor_free);
  array_list_free(forward_anchor_list_0,  (void *)bwt_anchor_free);
  array_list_free(backward_anchor_list_0,  (void *)bwt_anchor_free);
  array_list_free(big_anchor_list,  (void *)bwt_anchor_free);

  return array_list_size(list);
  
}
예제 #27
0
vcf_record_t* vcf_record_new() {
    vcf_record_t *record = (vcf_record_t*) calloc (1, sizeof(vcf_record_t));
    record->samples = array_list_new(16, 1.5, COLLECTION_MODE_ASYNCHRONIZED);
    return record;
}
예제 #28
0
int apply_sw_bs_4nt(sw_server_input_t* input, batch_t *batch) {

  mapping_batch_t *mapping_batch = batch->mapping_batch;
  genome_t *genome1 = input->genome1_p;
  genome_t *genome2 = input->genome2_p;
  sw_optarg_t *sw_optarg = &input->sw_optarg;

  {
    char r[1024];
    size_t start = 169312417;
    size_t end = start + 99;
    genome_read_sequence_by_chr_index(r, 0,
				      0, &start, &end, genome2);
    printf("+++++++++++++ genome2 = %s \n", r);
    genome_read_sequence_by_chr_index(r, 0,
				      0, &start, &end, genome1);
    printf("+++++++++++++ genome1 = %s \n", r);

  }

  // fill gaps between seeds
  fill_gaps_bs(mapping_batch, sw_optarg, genome2, genome1, 20, 5, 1);
  merge_seed_regions_bs(mapping_batch, 1);
  fill_end_gaps_bs(mapping_batch, sw_optarg, genome1, genome2, 20, 400, 1);
  
  fill_gaps_bs(mapping_batch, sw_optarg, genome1, genome2, 20, 5, 0);
  merge_seed_regions_bs(mapping_batch, 0);
  fill_end_gaps_bs(mapping_batch, sw_optarg, genome2, genome1, 20, 400, 0);

  // now we can create the alignments
  fastq_read_t *read;
  array_list_t *fq_batch = mapping_batch->fq_batch;
  
  char *match_seq, *match_qual;
  size_t read_index, read_len, match_len, match_start;
  
  cal_t *cal;
  array_list_t *cal_list = NULL;
  size_t num_cals;
  
  seed_region_t *s;
  cigar_code_t *cigar_code;
  cigar_op_t *first_op;

  float score, norm_score, min_score = input->min_score;

  alignment_t *alignment;
  array_list_t *alignment_list;

  char *p, *optional_fields;
  int optional_fields_length, AS;

  array_list_t **mapping_lists;
  size_t num_targets;
  size_t *targets;

  for (int bs_id = 0; bs_id < 2; bs_id++) {

    if (bs_id == 0) {
      mapping_lists = mapping_batch->mapping_lists;
      num_targets = mapping_batch->num_targets;
      targets = mapping_batch->targets;
    } else {
      mapping_lists = mapping_batch->mapping_lists2;
      num_targets = mapping_batch->num_targets2;
      targets = mapping_batch->targets2;
    }

    for (size_t i = 0; i < num_targets; i++) {
      read_index = targets[i];
      read = (fastq_read_t *) array_list_get(read_index, fq_batch);
      
      cal_list = mapping_lists[read_index];
      num_cals = array_list_size(cal_list);
      
      if (num_cals <= 0) continue;
    
      read_len = read->length;
    
      alignment_list = array_list_new(num_cals, 1.25f, COLLECTION_MODE_ASYNCHRONIZED);

      // processing each CAL from this read
      for(size_t j = 0; j < num_cals; j++) {

	// get cal and read index
	cal = array_list_get(j, cal_list);
	if (cal->sr_list->size == 0) continue;
	
	s = (seed_region_t *) linked_list_get_first(cal->sr_list);
	cigar_code = (cigar_code_t *) s->info;
	
	norm_score = cigar_code_get_score(read_len, cigar_code);
	score = norm_score * 100; //read_len;
	LOG_DEBUG_F("score = %0.2f\n", norm_score);

	// filter by SW score
	if (norm_score > min_score) {

	  // update cigar and sequence and quality strings
	  cigar_code_update(cigar_code);
	  LOG_DEBUG_F("\tcigar code = %s\n", new_cigar_code_string(cigar_code));
	  match_start = 0;
	  match_len = cigar_code_nt_length(cigar_code); 
	  first_op = cigar_code_get_first_op(cigar_code);
	  match_start = (first_op && first_op->name == 'H' ? first_op->number : 0);
	  
	  match_seq = (char *) malloc((match_len + 1)* sizeof(char));
	  memcpy(match_seq, &read->sequence[match_start], match_len);
	  match_seq[match_len] = 0;
	  
	  match_qual = (char *) malloc((match_len + 1)* sizeof(char));
	  memcpy(match_qual, &read->quality[match_start], match_len);
	  match_qual[match_len] = 0;
	  
	  // set optional fields
	  optional_fields_length = 100;
	  optional_fields = (char *) calloc(optional_fields_length, sizeof(char));
	  
	  p = optional_fields;
	  AS = (int) norm_score * 100;
	
	  sprintf(p, "ASi");
	  p += 3;
	  memcpy(p, &AS, sizeof(int));
	  p += sizeof(int);
	  
	  sprintf(p, "NHi");
	  p += 3;
	  memcpy(p, &num_cals, sizeof(int));
	  p += sizeof(int);
	  
	  sprintf(p, "NMi");
	  p += 3;
	  memcpy(p, &cigar_code->distance, sizeof(int));
	  p += sizeof(int);
	  
	  assert(read->length == cigar_code_nt_length(cigar_code));
	  
	  // create an alignment and insert it into the list
	  alignment = alignment_new();

	  //read_id = malloc(read->length);
	  size_t header_len = strlen(read->id);
	  char *head_id = (char *) malloc(header_len + 1);
	  
	  get_to_first_blank(read->id, header_len, head_id);
	
	  alignment_init_single_end(head_id, match_seq, match_qual, 
				    cal->strand, cal->chromosome_id - 1, cal->start - 1,
				    new_cigar_code_string(cigar_code), 
				    cigar_code_get_num_ops(cigar_code), 
				    norm_score * 254, 1, (num_cals > 1),
				    optional_fields_length, optional_fields, alignment);
	  
	  array_list_insert(alignment, alignment_list);

	  LOG_DEBUG_F("creating alignment (bs_id = %i)...\n", bs_id);
	  //alignment_print(alignment);

	}
      }
      
      // free the cal list, and update the mapping list with the alignment list
      array_list_free(cal_list, (void *) cal_free);
      mapping_lists[read_index] = alignment_list;
    }
  }

  // go to the next stage
  return BS_POST_PAIR_STAGE;
}
예제 #29
0
int main(int argc, char *argv[]) {

  if (argc != 4) {
    printf("Error.\n");
    printf("Usage: %s index-dirname seq num-errors\n", argv[0]);
    exit(-1);
  }

  char *index_dirname = argv[1];
  char *seq = argv[2];
  int num_errors = atoi(argv[3]);

  // initializations
  initReplaceTable();

  bwt_optarg_t *bwt_optarg = bwt_optarg_new(num_errors, 1, 10000, 1, 0, 0);
  bwt_index_t *bwt_index = bwt_index_new(index_dirname);

  // seq
  {
    array_list_t *mapping_list = array_list_new(100000, 1.25f, 
						COLLECTION_MODE_SYNCHRONIZED);
    
    size_t num_mappings;
    
    num_mappings = bwt_map_seq(seq, bwt_optarg, 
			       bwt_index, mapping_list);
    printf("seq: %s\n", seq);
    printf("num_mappings = %lu\n", num_mappings);
    for (size_t i = 0; i < num_mappings; i++) {
      printf("%lu\t---------------------\n", i);
      alignment_print(array_list_get(i, mapping_list));
    }
  }


  // seed
  {
    array_list_t *mapping_list = array_list_new(100000, 1.25f, 
						COLLECTION_MODE_SYNCHRONIZED);
    
    size_t num_mappings;
    
    size_t len = strlen(seq);
    char *code_seq = (char *) calloc(len + 10, sizeof(char));
    replaceBases(seq, code_seq, len);
    
    num_mappings = bwt_map_exact_seeds_seq(code_seq, 18, 16,
					   bwt_optarg, bwt_index, mapping_list);
    
    region_t *region;
    for (size_t i = 0; i < num_mappings; i++) {
      region = array_list_get(i, mapping_list);
      printf("Region: chr = %lu, strand = %d, start = %lu, end = %lu\n", 
	     region->chromosome_id, region->strand, region->start, region->end);
    }
  }

  printf("Done.\n");

}
예제 #30
0
void apply_sw(sw_server_input_t* input, aligner_batch_t *batch) {


  //  printf("START: apply_sw\n"); 
  int tid = omp_get_thread_num();

  cal_t *cal = NULL;
  array_list_t *cal_list = NULL, *mapping_list = NULL;//, *old_list = NULL, *new_list = NULL;
  fastq_batch_t *fq_batch = batch->fq_batch;

  size_t start, end;
  genome_t *genome = input->genome_p;
     
  size_t flank_length = input->flank_length;

  // SIMD support for Smith-Waterman
  float score, min_score = input->min_score;
  //  size_t curr_depth = 0;
  sw_output_t *sw_output;
  //  sw_simd_input_t *sw_sinput = sw_simd_input_new(SIMD_DEPTH);
  //  sw_simd_output_t *sw_soutput = sw_simd_output_new(SIMD_DEPTH);
  //sw_simd_context_t *context = sw_simd_context_new(input->match, input->mismatch, 
  //						    input->gap_open, input->gap_extend); 

  // for tracking the current read, cal being processed using sw_channel_t
  //sw_channel_t *channel;
  //sw_channel_t sw_channels[SIMD_DEPTH];
  //memset(sw_channels, 0, sizeof(sw_channels));
  
  //size_t header_len, read_len;
  //size_t strands[SIMD_DEPTH], chromosomes[SIMD_DEPTH], starts[SIMD_DEPTH];
  
  size_t index, num_cals;
  size_t total = 0, valids = 0;

  size_t num_seqs = batch->num_targets;

  // set to zero
  batch->num_done = batch->num_to_do;
  batch->num_to_do = 0;

  size_t sw_total = batch->num_done;
  /*
  // for all seqs pending to process !!
  size_t sw_total = 0;
  for (size_t i = 0; i < num_seqs; i++) {
    sw_total += array_list_size(batch->mapping_lists[batch->targets[i]]);
  }
  printf("number of sw to run: %d (vs num_done = %d)\n", sw_total, batch->num_done);
  */

  sw_optarg_t *sw_optarg = &input->sw_optarg;
    /*
  sw_optarg_t sw_optarg; //= sw_optarg_new(gap_open, gap_extend, matrix_filename);
  sw_optarg.gap_open = input->gap_open;
  sw_optarg.gap_extend = input->gap_extend;
  sw_optarg.subst_matrix['A']['A'] = input->match;    sw_optarg.subst_matrix['C']['A'] = input->mismatch; sw_optarg.subst_matrix['T']['A'] = input->mismatch; sw_optarg.subst_matrix['G']['A'] = input->mismatch;
  sw_optarg.subst_matrix['A']['C'] = input->mismatch; sw_optarg.subst_matrix['C']['C'] = input->match;    sw_optarg.subst_matrix['T']['C'] = input->mismatch; sw_optarg.subst_matrix['G']['C'] = input->mismatch;
  sw_optarg.subst_matrix['A']['G'] = input->mismatch; sw_optarg.subst_matrix['C']['T'] = input->mismatch; sw_optarg.subst_matrix['T']['T'] = input->match;    sw_optarg.subst_matrix['G']['T'] = input->mismatch;
  sw_optarg.subst_matrix['A']['T'] = input->mismatch; sw_optarg.subst_matrix['C']['G'] = input->mismatch; sw_optarg.subst_matrix['T']['G'] = input->mismatch; sw_optarg.subst_matrix['G']['G'] = input->match;
    */
  sw_multi_output_t *output = sw_multi_output_new(sw_total);
  char *q[sw_total], *r[sw_total];
  uint8_t strands[sw_total], chromosomes[sw_total];
  size_t starts[sw_total];
  size_t sw_count = 0, read_indices[sw_total];
  int read_len;

  // debugging: to kown how many reads are not mapped by SW score
  //  int unmapped_by_score[fq_batch->num_reads];
  //  memset(unmapped_by_score, 0, fq_batch->num_reads * sizeof(int));

  //  printf("num of sw to do: %i\n", sw_total);

  // initialize query and reference sequences to Smith-Waterman
  for (size_t i = 0; i < num_seqs; i++) {
    index = batch->targets[i];

    cal_list = batch->mapping_lists[index];
    num_cals = array_list_size(cal_list);

    //    printf("sw_server: read #%i with %i cals\n", index, num_cals);

    // processing each CAL from this read
    for(size_t j = 0; j < num_cals; j++) {

      // get cal and read index
      cal = array_list_get(j, cal_list);
      read_indices[sw_count] = index;

      // query sequence, revcomp if necessary
      read_len = fq_batch->data_indices[index + 1] - fq_batch->data_indices[index];
      q[sw_count] = (char *) calloc((read_len + 1), sizeof(char));
      memcpy(q[sw_count], &(fq_batch->seq[fq_batch->data_indices[index]]), read_len);
      if (cal->strand == 1) {
	seq_reverse_complementary(q[sw_count], read_len);
      }
      //q[sw_count] = &(fq_batch->seq[fq_batch->data_indices[index]]);

      // reference sequence
      //printf("\tSW: %d.[chromosome:%d]-[strand:%d]-[start:%d, end:%d]\n", j, cal->chromosome_id, cal->strand, cal->start, cal->end);
  
      start = cal->start - flank_length;
      end = cal->end + flank_length;
      r[sw_count] = calloc(1, end - start + 2);
      genome_read_sequence_by_chr_index(r[sw_count], cal->strand,
					cal->chromosome_id - 1, &start, &end, genome);

      // save some stuff, we'll use them after...
      strands[sw_count] = cal->strand;
      chromosomes[sw_count] = cal->chromosome_id;
      starts[sw_count] = start;


      //      printf("read #%i (sw #%i): query: %s (%i)\nref  : %s (%i)\n\n", index, sw_count, q[sw_count], strlen(q[sw_count]), r[sw_count], strlen(r[sw_count]));

      // increase counter
      sw_count++;
    }

    // free cal_list
    array_list_free(cal_list, (void *)cal_free);
    batch->mapping_lists[index] = NULL;
  }

  // run Smith-Waterman
  //  printf("before smith_waterman: number of sw = %i\n", sw_total);
  smith_waterman_mqmr(q, r, sw_total, sw_optarg, 1, output);
  //  printf("after smith_waterman\n");

  /*
  // debugging
  {
    FILE *fd = fopen("sw.out", "w");
    sw_multi_output_save(sw_total, output, fd);
    fclose(fd);
  }
  */

  size_t num_targets = 0;
  // filter alignments by min_score
  for (size_t i = 0; i < sw_total; i++) {

    //    score = output->score_p[i] / (strlen(output->query_map_p[i]) * input->match);
    //    if (score >= min_score) {
    /*
    printf("--------------------------------------------------------------\n");
    printf("Smith-Waterman results:\n");
    printf("id\t%s\n", &(batch->fq_batch->header[batch->fq_batch->header_indices[read_indices[i]]]));
    printf("ref\n%s\n", r[i]);
    printf("query\n%s\n", q[i]);
    printf("map\n%s\n", output->ref_map_p[i]);
    printf("ref: chr = %d, strand = %d, start = %d, len = %d\n", chromosomes[i], strands[i], starts[i], strlen(r[i]));
    printf("query-map-start = %d, ref-map-start = %d\n", 
	   output->query_start_p[i], output->ref_start_p[i]);
    printf("score = %0.2f (min. score = %0.2f)\n", output->score_p[i], min_score);
    printf("--------------------------------------------------------------\n");
    */
    if (output->score_p[i] >= min_score) {
      // valid mappings, 
      //insert in the list for further processing
      index = read_indices[i];
      if (batch->mapping_lists[index] == NULL) {
	mapping_list = array_list_new(1000, 
				      1.25f, 
				      COLLECTION_MODE_ASYNCHRONIZED);
	array_list_set_flag(0, mapping_list);
	
	batch->mapping_lists[index] = mapping_list;
	batch->targets[num_targets++] = index;
      }

      sw_output = sw_output_new(strands[i],
				chromosomes[i],
				starts[i],
				strlen(r[i]),
				strlen(output->query_map_p[i]),
				output->query_start_p[i],
				output->ref_start_p[i],
				output->score_p[i],
				score,
				output->query_map_p[i],
				output->ref_map_p[i]);
      array_list_insert(sw_output, mapping_list);

      batch->num_to_do++;

      // debugging
      //unmapped_by_score[index] = 1;
    }

    // free query and reference
    free(q[i]);
    free(r[i]);
  }
  batch->num_targets = num_targets;
  /*
  // debugging
  for (size_t i = 0; i < fq_batch->num_reads; i++) {
    if (unmapped_by_score[i] == 0) {
	unmapped_by_score_counter[tid]++;
	//printf("by score: %s\n", &(batch->fq_batch->header[batch->fq_batch->header_indices[index]]));
      }
  }
  */

  // update counter
  thr_sw_items[tid] += sw_count;

  // free
  sw_multi_output_free(output);

  //  printf("END: apply_sw, (%d Smith-Waterman, %d valids)\n", total, valids);
}