void workflow_schedule(workflow_t *wf) { work_item_t *item = NULL; pthread_mutex_lock(&wf->main_mutex); //printf("Workflow schedule mutex lock\n"); while (wf->num_pending_items <= 0 && !wf->completed_producer) { //printf("Waitign in workflow...\n"); pthread_cond_wait(&wf->workers_cond, &wf->main_mutex); } // for (int i = wf->num_stages - 1; i >= 0; i--) { for (int i = 0 ; i <= wf->num_stages - 1; i++) { item = array_list_remove_at(0, wf->pending_items[i]); if (item) { break; } } pthread_mutex_unlock(&wf->main_mutex); if (item) { workflow_stage_function_t stage_function = wf->stage_functions[item->stage_id]; // Extrae_event(6000019, item->stage_id + 1); struct timeval start_time, end_time; double total_time = 0.0; start_timer(start_time); int next_stage = stage_function(item->data); stop_timer(start_time, end_time, total_time); pthread_mutex_lock(&wf->stage_times_mutex[item->stage_id]); wf->stage_times[item->stage_id] += (total_time / 1000000.0f); pthread_mutex_unlock(&wf->stage_times_mutex[item->stage_id]); // Extrae_event(6000019, 0); item->stage_id = next_stage; if (next_stage >= 0 && next_stage < wf->num_stages) { // moving item to the next stage to process pthread_mutex_lock(&wf->main_mutex); array_list_insert(item, wf->pending_items[item->stage_id]); pthread_mutex_unlock(&wf->main_mutex); } else if (next_stage == -1) { // item fully processed !! pthread_mutex_lock(&wf->main_mutex); wf->num_pending_items--; array_list_insert(item, wf->completed_items); pthread_cond_broadcast(&wf->consumer_cond); pthread_mutex_unlock(&wf->main_mutex); } else { // error !! pthread_mutex_lock(&wf->main_mutex); wf->num_pending_items--; pthread_mutex_unlock(&wf->main_mutex); } } }
array_list_t *indel_filter(array_list_t *input_records, array_list_t *failed, variant_stats_t **input_stats, char *filter_name, void *f_args) { assert(input_records); assert(failed); array_list_t *passed = array_list_new(input_records->size + 1, 1, COLLECTION_MODE_ASYNCHRONIZED); size_t filter_name_len = strlen(filter_name); int include_indels = ((indel_filter_args*)f_args)->include_indels; LOG_DEBUG_F("indel_filter (preserve indels = %d) over %zu records\n", include_indels, input_records->size); vcf_record_t *record; variant_stats_t *variant_stats; for (int i = 0; i < input_records->size; i++) { record = input_records->items[i]; variant_stats = input_stats[i]; if (variant_stats->is_indel) { if (include_indels) { array_list_insert(record, passed); } else { annotate_failed_record(filter_name, filter_name_len, record); array_list_insert(record, failed); } } else { if (include_indels) { annotate_failed_record(filter_name, filter_name_len, record); array_list_insert(record, failed); } else { array_list_insert(record, passed); } } } return passed; }
array_list_t* maf_filter(array_list_t* input_records, array_list_t *failed, variant_stats_t **input_stats, char *filter_name, void* args) { assert(input_records); assert(failed); array_list_t *passed = array_list_new(input_records->size + 1, 1, COLLECTION_MODE_ASYNCHRONIZED); size_t filter_name_len = strlen(filter_name); float min_maf = ((maf_filter_args*) args)->min_maf; float record_maf = 1.0; variant_stats_t *variant_stats; // The stats returned by get_variants_stats are related to a record in the same // position of the input_records list, so when a variant_stats_t fulfills the condition, // it means the related vcf_record_t passes the filter vcf_record_t *record; for (int i = 0; i < input_records->size; i++) { record = input_records->items[i]; variant_stats = input_stats[i]; record_maf = 1.0; for (int j = 0; j < variant_stats->num_alleles; j++) { record_maf = fmin(record_maf, variant_stats->alleles_freq[j]); } if (record_maf >= min_maf) { array_list_insert(record, passed); } else { annotate_failed_record(filter_name, filter_name_len, record); array_list_insert(record, failed); } } return passed; }
array_list_t* mendelian_errors_filter(array_list_t* input_records, array_list_t *failed, variant_stats_t **input_stats, char *filter_name, void* args) { assert(input_records); assert(failed); array_list_t *passed = array_list_new(input_records->size + 1, 1, COLLECTION_MODE_ASYNCHRONIZED); size_t filter_name_len = strlen(filter_name); int max_errors = ((mendelian_errors_filter_args*) args)->max_mendelian_errors; float allele_count; variant_stats_t *variant_stats; // The stats returned by get_variants_stats are related to a record in the same // position of the input_records list, so when a variant_stats_t fulfills the condition, // it means the related vcf_record_t passes the filter vcf_record_t *record; for (int i = 0; i < input_records->size; i++) { record = input_records->items[i]; variant_stats = input_stats[i]; allele_count = 0; if (variant_stats->mendelian_errors <= max_errors) { array_list_insert(record, passed); } else { annotate_failed_record(filter_name, filter_name_len, record); array_list_insert(record, failed); } } return passed; }
array_list_t *snp_filter(array_list_t *input_records, array_list_t *failed, variant_stats_t **input_stats, char *filter_name, void *f_args) { assert(input_records); assert(failed); array_list_t *passed = array_list_new(input_records->size + 1, 1, COLLECTION_MODE_ASYNCHRONIZED); size_t filter_name_len = strlen(filter_name); int include_snps = ((snp_filter_args*)f_args)->include_snps; LOG_DEBUG_F("snp_filter (preserve SNPs = %d) over %zu records\n", include_snps, input_records->size); vcf_record_t *record; for (int i = 0; i < input_records->size; i++) { record = input_records->items[i]; if (record->id_len == 1 && strncmp(".", record->id, 1) == 0) { if (include_snps) { annotate_failed_record(filter_name, filter_name_len, record); array_list_insert(record, failed); } else { array_list_insert(record, passed); } } else { if (include_snps) { array_list_insert(record, passed); } else { annotate_failed_record(filter_name, filter_name_len, record); array_list_insert(record, failed); } } } return passed; }
array_list_t *fastq_filter(array_list_t *reads, array_list_t *passed, array_list_t *failed, fastq_filter_options_t *options) { fastq_read_t *read; fastq_read_stats_t *fq_read_stats = fastq_read_stats_new(); fastq_read_stats_options_t *fq_read_stats_options = fastq_read_stats_options_new(options->min_length, options->max_length, 4); // #pragma omp parallel for schedule(dynamic, 500000) for(size_t i=0; i<reads->size; i++) { // fastq_read_stats_init(fq_read_stats); read = array_list_get(i, reads); if(read->length >= options->min_length && read->length <= options->max_length) { fastq_read_stats_se(read, fq_read_stats_options, fq_read_stats); // fastq_read_stats_print(fq_read_stats); if(fq_read_stats->quality_average >= options->min_quality && fq_read_stats->quality_average <= options->max_quality && fq_read_stats->Ns < options->max_Ns) { array_list_insert(read, passed); }else { array_list_insert(read, failed); } }else { // get read stats array_list_insert(read, failed); } } fastq_read_stats_free(fq_read_stats); fastq_read_stats_options_free(fq_read_stats_options); return passed; }
size_t fastq_fread_bytes_aligner_pe(array_list_t *reads, size_t bytes, fastq_file_t *fq_file1, fastq_file_t *fq_file2) { size_t accumulated_size = 0; char header1[MAX_READ_ID_LENGTH]; char header2[MAX_READ_ID_LENGTH]; char read_separator[MAX_READ_ID_LENGTH]; char sequence1[MAX_READ_SEQUENCE_LENGTH]; char sequence2[MAX_READ_SEQUENCE_LENGTH]; char qualities1[MAX_READ_SEQUENCE_LENGTH]; char qualities2[MAX_READ_SEQUENCE_LENGTH]; int header_length1, sequence_length1, quality_length1; int header_length2, sequence_length2, quality_length2; fastq_read_t *read1, *read2; while (accumulated_size < bytes && fgets(header1, MAX_READ_ID_LENGTH, fq_file1->fd) != NULL) { fgets(sequence1, MAX_READ_SEQUENCE_LENGTH, fq_file1->fd); fgets(read_separator, MAX_READ_ID_LENGTH, fq_file1->fd); fgets(qualities1, MAX_READ_SEQUENCE_LENGTH, fq_file1->fd); header_length1 = strlen(header1); sequence_length1 = strlen(sequence1); quality_length1 = strlen(qualities1); // '\n' char is removed, but '\0' is left chomp_at(header1, header_length1 - 1); chomp_at(sequence1, sequence_length1 - 1); chomp_at(qualities1, quality_length1 - 1); // second file fgets(header2, MAX_READ_ID_LENGTH, fq_file2->fd); fgets(sequence2, MAX_READ_SEQUENCE_LENGTH, fq_file2->fd); fgets(read_separator, MAX_READ_ID_LENGTH, fq_file2->fd); fgets(qualities2, MAX_READ_SEQUENCE_LENGTH, fq_file2->fd); header_length2 = strlen(header2); sequence_length2 = strlen(sequence2); quality_length2 = strlen(qualities2); // '\n' char is removed, but '\0' is left chomp_at(header2, header_length2 - 1); chomp_at(sequence2, sequence_length2 - 1); chomp_at(qualities2, quality_length2 - 1); read1 = fastq_read_new(header1, sequence1, qualities1); read2 = fastq_read_new(header2, sequence2, qualities2); array_list_insert(read1, reads); array_list_insert(read2, reads); accumulated_size += header_length1 + sequence_length1 + quality_length1 + header_length2 + sequence_length2 + quality_length2; } return accumulated_size; }
array_list_t *region_filter(array_list_t *input_records, array_list_t *failed, variant_stats_t **input_stats, char *filter_name, void *f_args) { assert(input_records); assert(failed); array_list_t *passed = array_list_new(input_records->size + 1, 1, COLLECTION_MODE_ASYNCHRONIZED); size_t filter_name_len = strlen(filter_name); region_filter_args *args = (region_filter_args*) f_args; region_table_t *regions = args->regions; LOG_DEBUG_F("region_filter over %zu records\n", input_records->size); vcf_record_t *record; region_t *region = (region_t*) malloc (sizeof(region_t)); for (int i = 0; i < input_records->size; i++) { record = input_records->items[i]; // LOG_DEBUG_F("record = %s, %ld\n", record->chromosome, record->position); region->chromosome = strndup(record->chromosome, record->chromosome_len); region->start_position = record->position; region->end_position = record->position; int found = 0; if (args->type) { region->type = args->type; found = find_region_by_type(region, regions); } else { found = find_region(region, regions); } if (found) { // Add to the list of records that pass all checks for at least one region array_list_insert(record, passed); // LOG_DEBUG_F("%.*s, %ld passed\n", record->chromosome_len, record->chromosome, record->position); } else { // Add to the list of records that fail all checks for all regions annotate_failed_record(filter_name, filter_name_len, record); array_list_insert(record, failed); } free(region->chromosome); } free(region); return passed; }
size_t fastq_fread_se_ex(array_list_t *reads, size_t num_reads, fastq_file_t *fq_file) { size_t count = 0; char *p; char header1[MAX_READ_ID_LENGTH]; char sequence[MAX_READ_SEQUENCE_LENGTH]; char header2[MAX_READ_ID_LENGTH]; char qualities[MAX_READ_SEQUENCE_LENGTH]; int header_length, sequence_length, quality_length; fastq_read_t *read; while (count < num_reads && fgets(header1, MAX_READ_ID_LENGTH, fq_file->fd) != NULL) { char *res = fgets(sequence, MAX_READ_SEQUENCE_LENGTH, fq_file->fd); res = fgets(header2, MAX_READ_ID_LENGTH, fq_file->fd); res = fgets(qualities, MAX_READ_SEQUENCE_LENGTH, fq_file->fd); header_length = strlen(header1); sequence_length = strlen(sequence); quality_length = strlen(qualities); // '\n' char is removed, but '\0' is left chomp_at(header1, header_length - 1); if ((p = strstr(header1, " ")) != NULL) { *p = 0; } chomp_at(sequence, sequence_length - 1); chomp_at(qualities, quality_length - 1); read = fastq_read_new(&header1[1], sequence, qualities); array_list_insert(read, reads); count++; } return count; }
//---------------------------------------------------------------------------------------- void workflow_insert_stage_item_at(void *data, int new_stage, workflow_t *wf) { work_item_t *item = work_item_new(new_stage, data); pthread_mutex_lock(&wf->main_mutex); array_list_insert(item, wf->pending_items[item->stage_id]); pthread_mutex_unlock(&wf->main_mutex); }
size_t fastq_fread_bytes_se(array_list_t *reads, size_t bytes, fastq_file_t *fq_file) { size_t accumulated_size = 0; char header1[MAX_READ_ID_LENGTH]; char sequence[MAX_READ_SEQUENCE_LENGTH]; char header2[MAX_READ_ID_LENGTH]; char qualities[MAX_READ_SEQUENCE_LENGTH]; int header_length, sequence_length, quality_length; fastq_read_t *read; while (accumulated_size < bytes && fgets(header1, MAX_READ_ID_LENGTH, fq_file->fd) != NULL) { fgets(sequence, MAX_READ_SEQUENCE_LENGTH, fq_file->fd); fgets(header2, MAX_READ_ID_LENGTH, fq_file->fd); fgets(qualities, MAX_READ_SEQUENCE_LENGTH, fq_file->fd); header_length = strlen(header1); sequence_length = strlen(sequence); quality_length = strlen(qualities); // '\n' char is removed, but '\0' is left chomp_at(header1, header_length - 1); chomp_at(sequence, sequence_length - 1); chomp_at(qualities, quality_length - 1); read = fastq_read_new(header1, sequence, qualities); array_list_insert(read, reads); accumulated_size += header_length + sequence_length + quality_length; } return accumulated_size; }
array_list_t *inheritance_pattern_filter(array_list_t *input_records, array_list_t *failed, variant_stats_t **input_stats, char *filter_name, void *f_args) { assert(input_records); assert(failed); array_list_t *passed = array_list_new(input_records->size + 1, 1, COLLECTION_MODE_ASYNCHRONIZED); size_t filter_name_len = strlen(filter_name); enum inheritance_pattern pattern = ((inheritance_pattern_filter_args*)f_args)->pattern; float min_following_pattern = ((inheritance_pattern_filter_args*)f_args)->min_following_pattern; if (pattern == DOMINANT) { LOG_DEBUG_F("inheritance_pattern_filter (dominant in %.2f% of samples) over %zu records\n", min_following_pattern * 100, input_records->size); } else { LOG_DEBUG_F("inheritance_pattern_filter (recessive in %.2f% of samples) over %zu records\n", min_following_pattern * 100, input_records->size); } vcf_record_t *record; variant_stats_t *stats; for (int i = 0; i < input_records->size; i++) { record = input_records->items[i]; stats = input_stats[i]; if (pattern == DOMINANT) { if (stats->cases_percent_dominant >= min_following_pattern && stats->controls_percent_dominant >= min_following_pattern) { array_list_insert(record, passed); } else { annotate_failed_record(filter_name, filter_name_len, record); array_list_insert(record, failed); } } else if (pattern == RECESSIVE) { if (stats->cases_percent_recessive >= min_following_pattern && stats->controls_percent_recessive >= min_following_pattern) { array_list_insert(record, passed); } else { annotate_failed_record(filter_name, filter_name_len, record); array_list_insert(record, failed); } } } return passed; }
int insert_position_read(char key[64], vcf_record_file_link* link, kh_pos_t* positions_read) { int ret; array_list_t *records_in_position; khiter_t iter = kh_get(pos, positions_read, key); if (iter != kh_end(positions_read)) { records_in_position = kh_value(positions_read, iter); ret = array_list_insert(link, records_in_position); } else { records_in_position = array_list_new(8, 1.5, COLLECTION_MODE_SYNCHRONIZED); ret = array_list_insert(link, records_in_position); iter = kh_put(pos, positions_read, strdup(key), &ret); if (ret) { kh_value(positions_read, iter) = records_in_position; } } return ret; }
int add_vcf_header_entry(vcf_header_entry_t *header_entry, vcf_file_t *file) { assert(header_entry); assert(file); int result = array_list_insert(header_entry, file->header_entries); // if (result) { // printf("header entry %zu\n", file->header_entries->size); // } else { // printf("header entry %zu not inserted\n", get_num_vcf_header_entries(file)); // } return result; }
void add_vcf_record_sample(char* sample, int length, vcf_record_t* record) { assert(sample); assert(record); // int result = array_list_insert(sample, record->samples); int result = array_list_insert(strndup(sample, length), record->samples); // if (result) { // LOG_DEBUG_F("sample %s inserted\n", sample); // } else { // LOG_DEBUG_F("sample %s not inserted\n", sample); // } }
int add_vcf_sample_name(char *name, int length, vcf_file_t *file) { assert(name); assert(file); int result = array_list_insert(strndup(name, length), file->samples_names); // if (result) { // (vcf_file->num_samples)++; // // LOG_DEBUG_F("sample %zu is %s\n", vcf_file->samples_names->size, name); // } else { // // LOG_DEBUG_F("sample %zu not inserted\n", vcf_file->num_samples); // } return result; }
array_list_t* coverage_filter(array_list_t* input_records, array_list_t *failed, variant_stats_t **input_stats, char *filter_name, void* f_args) { assert(input_records); assert(failed); array_list_t *passed = array_list_new(input_records->size + 1, 1, COLLECTION_MODE_ASYNCHRONIZED); size_t filter_name_len = strlen(filter_name); int min_coverage = ((coverage_filter_args*)f_args)->min_coverage; LOG_DEBUG_F("coverage_filter (min coverage = %d) over %zu records\n", min_coverage, input_records->size); char *aux_buffer = (char*) calloc (128, sizeof(char)); vcf_record_t *record; for (int i = 0; i < input_records->size; i++) { record = input_records->items[i]; if (record->info_len > strlen(aux_buffer)) { aux_buffer = realloc (aux_buffer, record->info_len+1); memset(aux_buffer, 0, (record->info_len+1) * sizeof(char)); } strncpy(aux_buffer, record->info, record->info_len); char *record_coverage = get_field_value_in_info("DP", aux_buffer); if (record_coverage != NULL && is_numeric(record_coverage)) { if (atoi(record_coverage) >= min_coverage) { array_list_insert(record, passed); } else { annotate_failed_record(filter_name, filter_name_len, record); array_list_insert(record, failed); } } else { annotate_failed_record(filter_name, filter_name_len, record); array_list_insert(record, failed); } } free(aux_buffer); return passed; }
array_list_t* quality_filter(array_list_t* input_records, array_list_t *failed, variant_stats_t **input_stats, char *filter_name, void* f_args) { assert(input_records); assert(failed); array_list_t *passed = array_list_new(input_records->size + 1, 1, COLLECTION_MODE_ASYNCHRONIZED); size_t filter_name_len = strlen(filter_name); int min_quality = ((quality_filter_args*)f_args)->min_quality; LOG_DEBUG_F("quality_filter (min quality = %d) over %zu records\n", min_quality, input_records->size); vcf_record_t *record; for (int i = 0; i < input_records->size; i++) { record = input_records->items[i]; if (record->quality >= min_quality) { array_list_insert(record, passed); } else { annotate_failed_record(filter_name, filter_name_len, record); array_list_insert(record, failed); } } return passed; }
size_t fastq_fread_pe(array_list_t *reads, size_t num_reads, fastq_file_t *fq_file1, fastq_file_t *fq_file2, int mode) { size_t count = 0; char header1[MAX_READ_ID_LENGTH]; char header2[MAX_READ_ID_LENGTH]; char read_separator[MAX_READ_ID_LENGTH]; char sequence1[MAX_READ_SEQUENCE_LENGTH]; char sequence2[MAX_READ_SEQUENCE_LENGTH]; char qualities1[MAX_READ_SEQUENCE_LENGTH]; char qualities2[MAX_READ_SEQUENCE_LENGTH]; int header_length1, sequence_length1, quality_length1; int header_length2, sequence_length2, quality_length2; fastq_read_pe_t *read_pe; while (count < num_reads && fgets(header1, MAX_READ_ID_LENGTH, fq_file1->fd) != NULL) { fgets(sequence1, MAX_READ_SEQUENCE_LENGTH, fq_file1->fd); fgets(read_separator, MAX_READ_ID_LENGTH, fq_file1->fd); fgets(qualities1, MAX_READ_SEQUENCE_LENGTH, fq_file1->fd); header_length1 = strlen(header1); sequence_length1 = strlen(sequence1); quality_length1 = strlen(qualities1); // '\n' char is removed, but '\0' is left chomp_at(header1, header_length1 - 1); chomp_at(sequence1, sequence_length1 - 1); chomp_at(qualities1, quality_length1 - 1); // second file fgets(header2, MAX_READ_ID_LENGTH, fq_file2->fd); fgets(sequence2, MAX_READ_SEQUENCE_LENGTH, fq_file2->fd); fgets(read_separator, MAX_READ_ID_LENGTH, fq_file2->fd); fgets(qualities2, MAX_READ_SEQUENCE_LENGTH, fq_file2->fd); header_length2 = strlen(header2); sequence_length2 = strlen(sequence2); quality_length2 = strlen(qualities2); // '\n' char is removed, but '\0' is left chomp_at(header2, header_length2 - 1); chomp_at(sequence2, sequence_length2 - 1); chomp_at(qualities2, quality_length2 - 1); read_pe = fastq_read_pe_new(header1, header2, sequence1, qualities1, sequence2, qualities2, mode); array_list_insert(read_pe, reads); count++; } return count; }
array_list_t* missing_values_filter(array_list_t* input_records, array_list_t *failed, variant_stats_t **input_stats, char *filter_name, void* args) { assert(input_records); assert(failed); array_list_t *passed = array_list_new(input_records->size + 1, 1, COLLECTION_MODE_ASYNCHRONIZED); size_t filter_name_len = strlen(filter_name); float max_missing = ((missing_values_filter_args*) args)->max_missing; float record_missing; float allele_count; list_item_t *stats_item = NULL; variant_stats_t *variant_stats; // The stats returned by get_variants_stats are related to a record in the same // position of the input_records list, so when a variant_stats_t fulfills the condition, // it means the related vcf_record_t passes the filter vcf_record_t *record; for (int i = 0; i < input_records->size; i++) { record = input_records->items[i]; variant_stats = input_stats[i]; allele_count = 0; for (int j = 0; j < variant_stats->num_alleles; j++) { allele_count += variant_stats->alleles_count[j]; } record_missing = variant_stats->missing_alleles / (allele_count + variant_stats->missing_alleles); if (record_missing <= max_missing) { array_list_insert(record, passed); } else { annotate_failed_record(filter_name, filter_name_len, record); array_list_insert(record, failed); } } return passed; }
int bam1s_stats(array_list_t *bam1s, bam_stats_options_t *opts, array_list_t *bam1s_stats) { bam1_t *bam1; bam_stats_t *stats; size_t num_items = array_list_size(bam1s); for (int i = 0; i < num_items; i++) { bam1 = array_list_get(i, bam1s); stats = bam1_stats(bam1, opts); array_list_insert(stats, bam1s_stats); } return array_list_size(bam1s_stats); }
array_list_t *variant_type_filter(array_list_t *input_records, array_list_t *failed, variant_stats_t **input_stats, char *filter_name, void *f_args) { assert(input_records); assert(failed); array_list_t *passed = array_list_new(input_records->size + 1, 1, COLLECTION_MODE_ASYNCHRONIZED); size_t filter_name_len = strlen(filter_name); enum variant_type type = ((variant_type_filter_args*)f_args)->type; LOG_DEBUG_F("variant_type_filter (variant_type %d) over %zu records\n", type, input_records->size); vcf_record_t *record; for (int i = 0; i < input_records->size; i++) { record = input_records->items[i]; if (record->type == type) { array_list_insert(record, passed); } else { annotate_failed_record(filter_name, filter_name_len, record); array_list_insert(record, failed); } } return passed; }
void workflow_insert_item_at(int stage_id, void *data, workflow_t *wf) { work_item_t *item = work_item_new(stage_id, data); pthread_mutex_lock(&wf->main_mutex); //printf("Producer mutex lock\n"); while (workflow_get_num_items_(wf) >= wf->max_num_work_items) { pthread_cond_wait(&wf->producer_cond, &wf->main_mutex); } if (array_list_insert(item, wf->pending_items[stage_id])) { wf->num_pending_items++; item->context = (void *) wf; } pthread_cond_broadcast(&wf->workers_cond); pthread_mutex_unlock(&wf->main_mutex); }
static void report_vcf_variant_stats_sqlite3(sqlite3 *db, int num_variants, variant_stats_t **stats_batch) { array_list_t *fields = array_list_new(num_variants + 1, 1.1, COLLECTION_MODE_ASYNCHRONIZED); variant_stats_t *var_stats; for (int i = 0; i < num_variants; i++) { var_stats = stats_batch[i]; variant_stats_db_fields_t *f = variant_stats_db_fields_new(var_stats->chromosome, var_stats->position, var_stats->ref_allele, var_stats->alt_alleles, var_stats->maf_allele, var_stats->maf, var_stats->mgf_genotype, var_stats->mgf, var_stats->missing_alleles, var_stats->missing_genotypes, var_stats->mendelian_errors, var_stats->is_indel, var_stats->cases_percent_dominant, var_stats->controls_percent_dominant, var_stats->cases_percent_recessive, var_stats->controls_percent_recessive); array_list_insert(f, fields); } insert_variant_stats_db_fields_list(fields, db); array_list_free(fields, (void *)variant_stats_db_fields_free); }
int main( int argc, char **args ) { //create a new array list. friso_array_t array = new_array_list(); fstring keys[] = { "chenmanwen", "yangqinghua", "chenxin", "luojiangyan", "xiaoyanzi", "bibi", "zhangrenfang", "yangjian", "liuxiao", "pankai", "chenpei", "liheng", "zhangzhigang", "zhgangyishao", "yangjiangbo", "caizaili", "panpan", "xiaolude", "yintanwen" }; int j, idx = 2, len = sizeof( keys ) / sizeof( fstring ); for ( j = 0; j < len; j++ ) { array_list_add( array, keys[j] ); } printf("length=%d, allocations=%d\n", array->length, array->allocs ); array_list_trim( array ); printf("after tirm length=%d, allocations=%d\n", array->length, array->allocs ); printf("idx=%d, value=%s\n", idx, ( fstring ) array_list_get( array, idx ) ); printf("\nAfter set %dth item.\n", idx ); array_list_set( array, idx, "chenxin__" ); printf("idx=%d, value=%s\n", idx, ( fstring ) array_list_get( array, idx ) ); printf("\nAfter remove %dth item.\n", idx ); array_list_remove( array, idx ); printf("length=%d, allocations=%d\n", array->length, array->allocs ); printf("idx=%d, value=%s\n", idx, ( fstring ) array_list_get( array, idx ) ); printf("\nInsert a item at %dth\n", idx ); array_list_insert( array, idx, "*chenxin*" ); printf("idx=%d, value=%s\n", idx, ( fstring ) array_list_get( array, idx ) ); free_array_list( array ); return 0; }
vcf_record_t *vcf_record_copy(vcf_record_t *orig) { vcf_record_t *record = (vcf_record_t*) calloc (1, sizeof(vcf_record_t)); record->chromosome = strndup(orig->chromosome, orig->chromosome_len); record->chromosome_len = orig->chromosome_len; record->position = orig->position; record->id = strndup(orig->id, orig->id_len); record->id_len = orig->id_len; record->reference = strndup(orig->reference, orig->reference_len); record->reference_len = orig->reference_len; record->alternate = strndup(orig->alternate, orig->alternate_len); record->alternate_len = orig->alternate_len; record->filter = strndup(orig->filter, orig->filter_len); record->filter_len = orig->filter_len; record->info = strndup(orig->info, orig->info_len); record->info_len = orig->info_len; record->format = strndup(orig->format, orig->format_len); record->format_len = orig->format_len; record->samples = array_list_new(orig->samples->size + 1, 1.5, COLLECTION_MODE_ASYNCHRONIZED); for (int i = 0; i < orig->samples->size; i++) { array_list_insert(strdup(array_list_get(i, orig->samples)), record->samples); } return record; }
void add_vcf_header_entry_value(char *value, int length, vcf_header_entry_t *entry) { assert(value); assert(entry); int result = array_list_insert(strndup(value, length), entry->values); }
size_t fastq_gzread_se(array_list_t *reads, size_t num_reads, fastq_gzfile_t *fq_gzfile) { size_t count = 0; char header1[MAX_READ_ID_LENGTH]; char sequence[MAX_READ_SEQUENCE_LENGTH]; char header2[MAX_READ_ID_LENGTH]; char qualities[MAX_READ_SEQUENCE_LENGTH]; int header_length, sequence_length, quality_length; fastq_read_t *read; size_t num_lines_to_read = 4 * num_reads; /* Each read consists of 4 lines */ int max_data_len = CHUNK; int max_read_len = MAX_READ_SEQUENCE_LENGTH; /* Each read is supposed to be shorter than MAX_READ_SEQUENCE_LENGTH */ int eof_found = 0; int c = 0; int i = 0; // fq_gzfile->i = 0; size_t lines = 0; char *aux; // fq_gzfile->data = (char*) calloc (CHUNK, sizeof(char)); char *data; // = (char*) calloc (CHUNK, sizeof(char)); char *id = (char*) calloc (max_read_len, sizeof(char)); char *seq = (char*) calloc (max_read_len, sizeof(char)); char *qual = (char*) calloc (max_read_len, sizeof(char)); // ZLIB variables unsigned have; unsigned char in[CHUNK]; unsigned char out[CHUNK]; // If there is some data from before calls if(fq_gzfile->data != NULL) { if(fq_gzfile->data_size > max_data_len) { data = (char*) calloc (fq_gzfile->data_size+max_data_len, sizeof(char)); max_data_len = fq_gzfile->data_size+max_data_len; }else{ data = (char*) calloc (max_data_len, sizeof(char)); } strncpy(data, fq_gzfile->data, fq_gzfile->data_size); i = fq_gzfile->data_size; }else { // first time, no data has been saved before data = (char*) calloc (max_data_len, sizeof(char)); } do { fq_gzfile->strm.avail_in = fread(in, 1, CHUNK, fq_gzfile->fd); // printf("fq_gzfile->strm.avail_in: %i, CHUNK: %i\nnext_in: %s\n\n", fq_gzfile->strm.avail_in, CHUNK, fq_gzfile->strm.next_in); if (ferror(fq_gzfile->fd)) { (void)inflateEnd(&fq_gzfile->strm); return Z_ERRNO; } if (fq_gzfile->strm.avail_in == 0) break; fq_gzfile->strm.next_in = in; /* run inflate() on input until output buffer not full */ do { fq_gzfile->strm.avail_out = CHUNK; fq_gzfile->strm.next_out = out; fq_gzfile->ret = inflate(&fq_gzfile->strm, Z_NO_FLUSH); assert(fq_gzfile->ret != Z_STREAM_ERROR); /* state not clobbered */ switch (fq_gzfile->ret) { case Z_NEED_DICT: fq_gzfile->ret = Z_DATA_ERROR; /* and fall through */ case Z_DATA_ERROR: case Z_MEM_ERROR: (void)inflateEnd(&fq_gzfile->strm); return fq_gzfile->ret; } have = CHUNK - fq_gzfile->strm.avail_out; for (int j = 0; j < have && !eof_found; j++) { c = out[j]; if (c != EOF) { max_data_len = consume_input(c, &data, max_data_len, i); if (c == '\n') { lines++; } i++; } else { eof_found = 1; } } } while (fq_gzfile->strm.avail_out == 0); /* done when inflate() says it's done */ } while (lines < num_lines_to_read && fq_gzfile->ret != Z_STREAM_END); // printf("data: %s\n", data); // LOG_DEBUG_F("lines: %i, num_lines_to_read: %i\n", lines, num_lines_to_read); // check if have read the expected number of lines size_t parsed_chars; size_t parsed_lines = 0; size_t data_size; // if(lines > 0) { //= num_lines_to_read aux = data; for(parsed_chars = 0; parsed_chars < i && parsed_lines < num_lines_to_read; parsed_chars++) { if(data[parsed_chars] == '\n') { // printf(">>i: %i, parsed_chars: %i, %i, aux: %s\n", i, parsed_chars, data[i-1], aux); data[parsed_chars] = '\0'; if(count % 4 == 0) { strcpy(id, aux); //printf("%s\n", id); } if(count % 4 == 1) { strcpy(seq, aux); //printf("%s\n", seq); } if(count % 4 == 2) { } if(count % 4 == 3) { strcpy(qual, aux); //printf("%s\n", qual); read = fastq_read_new(id, seq, qual); array_list_insert(read, reads); } count++; aux = data + parsed_chars + 1; parsed_lines++; } } // LOG_DEBUG_F("i: %lu, parsed_lines: %lu\n", i, parsed_lines); // LOG_DEBUG_F("parsed_chars: %lu, parsed_lines: %lu\n", parsed_chars, parsed_lines); // lines = 0; // LOG_DEBUG_F("BEFORE memcpy: fq_gzfile->data_size: %lu, new size: %lu\n", fq_gzfile->data_size, data_size); data_size = i - parsed_chars; if(fq_gzfile->data == NULL) { fq_gzfile->data = (char*)malloc(data_size*sizeof(char)); } if(fq_gzfile->data_size != 0 && fq_gzfile->data_size < data_size) { fq_gzfile->data = realloc(fq_gzfile->data, data_size); } if(data_size > 0) { memcpy(fq_gzfile->data, data+parsed_chars, data_size); } fq_gzfile->data_size = data_size; // } free(data); free(id); free(seq); free(qual); // if(fq_gzfile->ret == Z_STREAM_END) { // (void)inflateEnd(&fq_gzfile->strm); // } // return fq_gzfile->ret == Z_STREAM_END ? Z_OK : Z_DATA_ERROR; // printf(">>>>reads->size: %lu, num_reads: %lu\n", reads->size, num_reads); return reads->size; }
void add_record_to_vcf_batch(vcf_record_t *record, vcf_batch_t *batch) { assert(record); assert(batch); array_list_insert(record, batch->records); }
void array_insert(Array * const list, int index, Object obj) { array_list_insert((ArrayList * const ) list, index, obj); }