Beispiel #1
0
void workflow_schedule(workflow_t *wf) {
     work_item_t *item = NULL;

     pthread_mutex_lock(&wf->main_mutex);

     //printf("Workflow schedule mutex lock\n");
     while (wf->num_pending_items <= 0 && 
	    !wf->completed_producer) {
       //printf("Waitign in workflow...\n");
       pthread_cond_wait(&wf->workers_cond, &wf->main_mutex);
     }

//     for (int i = wf->num_stages - 1; i >= 0; i--) {
     for (int i = 0 ; i <= wf->num_stages - 1; i++) {
	  item = array_list_remove_at(0, wf->pending_items[i]);
	  if (item) {
	       break;
	  }
     }
     
     pthread_mutex_unlock(&wf->main_mutex);

     if (item) {
	  workflow_stage_function_t stage_function = wf->stage_functions[item->stage_id];

	  //	  Extrae_event(6000019, item->stage_id + 1); 
	  struct timeval start_time, end_time;
	  double total_time = 0.0;
	  start_timer(start_time);
	  int next_stage = stage_function(item->data);

	  stop_timer(start_time, end_time, total_time);
	  pthread_mutex_lock(&wf->stage_times_mutex[item->stage_id]);
	  wf->stage_times[item->stage_id] += (total_time / 1000000.0f);
	  pthread_mutex_unlock(&wf->stage_times_mutex[item->stage_id]);
	  //	  Extrae_event(6000019, 0); 

	  item->stage_id = next_stage;
	  
	  if (next_stage >= 0 && next_stage < wf->num_stages) {	       
	       // moving item to the next stage to process
	       pthread_mutex_lock(&wf->main_mutex);
	       array_list_insert(item, wf->pending_items[item->stage_id]);
	       pthread_mutex_unlock(&wf->main_mutex);
	  } else if (next_stage == -1) {
	       // item fully processed !!
	       pthread_mutex_lock(&wf->main_mutex);
	       wf->num_pending_items--;
	       array_list_insert(item, wf->completed_items);

	       pthread_cond_broadcast(&wf->consumer_cond);
	       pthread_mutex_unlock(&wf->main_mutex);  
	  } else {
	       // error !!
	       pthread_mutex_lock(&wf->main_mutex);
	       wf->num_pending_items--;
	       pthread_mutex_unlock(&wf->main_mutex);	       
	  }
     }
}
Beispiel #2
0
array_list_t *indel_filter(array_list_t *input_records, array_list_t *failed, variant_stats_t **input_stats, char *filter_name, void *f_args) {
    assert(input_records);
    assert(failed);
    
    array_list_t *passed = array_list_new(input_records->size + 1, 1, COLLECTION_MODE_ASYNCHRONIZED);
    size_t filter_name_len = strlen(filter_name);

    int include_indels = ((indel_filter_args*)f_args)->include_indels;

    LOG_DEBUG_F("indel_filter (preserve indels = %d) over %zu records\n", include_indels, input_records->size);
    vcf_record_t *record;
    variant_stats_t *variant_stats;
    for (int i = 0; i < input_records->size; i++) {
        record = input_records->items[i];
        variant_stats = input_stats[i];
        
        if (variant_stats->is_indel) {
            if (include_indels) {
                array_list_insert(record, passed);
            } else {
                annotate_failed_record(filter_name, filter_name_len, record);
                array_list_insert(record, failed);
            }
        } else {
            if (include_indels) {
                annotate_failed_record(filter_name, filter_name_len, record);
                array_list_insert(record, failed);
            } else {
                array_list_insert(record, passed);
            }
        }
    }

    return passed;
}
Beispiel #3
0
array_list_t* maf_filter(array_list_t* input_records, array_list_t *failed, variant_stats_t **input_stats, char *filter_name, void* args) {
    assert(input_records);
    assert(failed);
    
    array_list_t *passed = array_list_new(input_records->size + 1, 1, COLLECTION_MODE_ASYNCHRONIZED);
    size_t filter_name_len = strlen(filter_name);

    float min_maf = ((maf_filter_args*) args)->min_maf;
    float record_maf = 1.0;

    variant_stats_t *variant_stats;
    // The stats returned by get_variants_stats are related to a record in the same
    // position of the input_records list, so when a variant_stats_t fulfills the condition,
    // it means the related vcf_record_t passes the filter
    vcf_record_t *record;
    for (int i = 0; i < input_records->size; i++) {
        record = input_records->items[i];
        variant_stats = input_stats[i];
        
        record_maf = 1.0;
        for (int j = 0; j < variant_stats->num_alleles; j++) {
            record_maf = fmin(record_maf, variant_stats->alleles_freq[j]);
        }
        
        if (record_maf >= min_maf) {
            array_list_insert(record, passed);
        } else {
            annotate_failed_record(filter_name, filter_name_len, record);
            array_list_insert(record, failed);
        }
    }
    
    return passed;
}
Beispiel #4
0
array_list_t* mendelian_errors_filter(array_list_t* input_records, array_list_t *failed, variant_stats_t **input_stats, char *filter_name, void* args) {
    assert(input_records);
    assert(failed);
    
    array_list_t *passed = array_list_new(input_records->size + 1, 1, COLLECTION_MODE_ASYNCHRONIZED);
    size_t filter_name_len = strlen(filter_name);

    int max_errors = ((mendelian_errors_filter_args*) args)->max_mendelian_errors;
    float allele_count;

    variant_stats_t *variant_stats;
    // The stats returned by get_variants_stats are related to a record in the same
    // position of the input_records list, so when a variant_stats_t fulfills the condition,
    // it means the related vcf_record_t passes the filter
    vcf_record_t *record;
    for (int i = 0; i < input_records->size; i++) {
        record = input_records->items[i];
        variant_stats = input_stats[i];
        allele_count = 0;
        
        if (variant_stats->mendelian_errors <= max_errors) {
            array_list_insert(record, passed);
        } else {
            annotate_failed_record(filter_name, filter_name_len, record);
            array_list_insert(record, failed);
        }
    }
    
    return passed;
}
Beispiel #5
0
array_list_t *snp_filter(array_list_t *input_records, array_list_t *failed, variant_stats_t **input_stats, char *filter_name, void *f_args) {
    assert(input_records);
    assert(failed);
    
    array_list_t *passed = array_list_new(input_records->size + 1, 1, COLLECTION_MODE_ASYNCHRONIZED);
    size_t filter_name_len = strlen(filter_name);

    int include_snps = ((snp_filter_args*)f_args)->include_snps;

    LOG_DEBUG_F("snp_filter (preserve SNPs = %d) over %zu records\n", include_snps, input_records->size);
    vcf_record_t *record;
    for (int i = 0; i < input_records->size; i++) {
        record = input_records->items[i];
        if (record->id_len == 1 && strncmp(".", record->id, 1) == 0) {
            if (include_snps) {
                annotate_failed_record(filter_name, filter_name_len, record);
                array_list_insert(record, failed);
            } else {
                array_list_insert(record, passed);
            }
        } else {
            if (include_snps) {
                array_list_insert(record, passed);
            } else {
                annotate_failed_record(filter_name, filter_name_len, record);
                array_list_insert(record, failed);
            }
        }
    }

    return passed;
}
Beispiel #6
0
array_list_t *fastq_filter(array_list_t *reads, array_list_t *passed, array_list_t *failed, fastq_filter_options_t *options) {
	fastq_read_t *read;

	fastq_read_stats_t *fq_read_stats = fastq_read_stats_new();
	fastq_read_stats_options_t *fq_read_stats_options = fastq_read_stats_options_new(options->min_length, options->max_length, 4);

//	#pragma omp parallel for schedule(dynamic, 500000)
	for(size_t i=0; i<reads->size; i++) {
//		fastq_read_stats_init(fq_read_stats);
		read = array_list_get(i, reads);
		if(read->length >= options->min_length && read->length <= options->max_length) {
			fastq_read_stats_se(read, fq_read_stats_options, fq_read_stats);
//			fastq_read_stats_print(fq_read_stats);
			if(fq_read_stats->quality_average >= options->min_quality && fq_read_stats->quality_average <= options->max_quality && fq_read_stats->Ns < options->max_Ns) {
				array_list_insert(read, passed);
			}else {
				array_list_insert(read, failed);
			}
		}else {
			// get read stats
			array_list_insert(read, failed);
		}
	}
	fastq_read_stats_free(fq_read_stats);
	fastq_read_stats_options_free(fq_read_stats_options);

	return passed;
}
Beispiel #7
0
size_t fastq_fread_bytes_aligner_pe(array_list_t *reads, size_t bytes, fastq_file_t *fq_file1, fastq_file_t *fq_file2) {
	size_t accumulated_size = 0;
	char header1[MAX_READ_ID_LENGTH];
	char header2[MAX_READ_ID_LENGTH];
	char read_separator[MAX_READ_ID_LENGTH];
	char sequence1[MAX_READ_SEQUENCE_LENGTH];
	char sequence2[MAX_READ_SEQUENCE_LENGTH];
	char qualities1[MAX_READ_SEQUENCE_LENGTH];
	char qualities2[MAX_READ_SEQUENCE_LENGTH];
	int header_length1, sequence_length1, quality_length1;
	int header_length2, sequence_length2, quality_length2;
	fastq_read_t *read1, *read2;

	while (accumulated_size < bytes && fgets(header1, MAX_READ_ID_LENGTH, fq_file1->fd) != NULL) {
		fgets(sequence1, MAX_READ_SEQUENCE_LENGTH, fq_file1->fd);
		fgets(read_separator, MAX_READ_ID_LENGTH, fq_file1->fd);
		fgets(qualities1, MAX_READ_SEQUENCE_LENGTH, fq_file1->fd);

		header_length1 = strlen(header1);
		sequence_length1 = strlen(sequence1);
		quality_length1 = strlen(qualities1);

		// '\n' char is removed, but '\0' is left
		chomp_at(header1, header_length1 - 1);
		chomp_at(sequence1, sequence_length1 - 1);
		chomp_at(qualities1, quality_length1 - 1);

		// second file
		fgets(header2, MAX_READ_ID_LENGTH, fq_file2->fd);
		fgets(sequence2, MAX_READ_SEQUENCE_LENGTH, fq_file2->fd);
		fgets(read_separator, MAX_READ_ID_LENGTH, fq_file2->fd);
		fgets(qualities2, MAX_READ_SEQUENCE_LENGTH, fq_file2->fd);

		header_length2 = strlen(header2);
		sequence_length2 = strlen(sequence2);
		quality_length2 = strlen(qualities2);

		// '\n' char is removed, but '\0' is left
		chomp_at(header2, header_length2 - 1);
		chomp_at(sequence2, sequence_length2 - 1);
		chomp_at(qualities2, quality_length2 - 1);

		read1 = fastq_read_new(header1, sequence1, qualities1);
		read2 = fastq_read_new(header2, sequence2, qualities2);

		array_list_insert(read1, reads);
		array_list_insert(read2, reads);

		accumulated_size += header_length1 + sequence_length1 + quality_length1 + header_length2 + sequence_length2 + quality_length2;
	}

	return accumulated_size;
}
Beispiel #8
0
array_list_t *region_filter(array_list_t *input_records, array_list_t *failed, variant_stats_t **input_stats, char *filter_name, void *f_args) {
    assert(input_records);
    assert(failed);
    
    array_list_t *passed = array_list_new(input_records->size + 1, 1, COLLECTION_MODE_ASYNCHRONIZED);
    size_t filter_name_len = strlen(filter_name);

    region_filter_args *args = (region_filter_args*) f_args;
    region_table_t *regions = args->regions;

    LOG_DEBUG_F("region_filter over %zu records\n", input_records->size);

    vcf_record_t *record;
    region_t *region = (region_t*) malloc (sizeof(region_t));
    for (int i = 0; i < input_records->size; i++) {
        record = input_records->items[i];
        
//         LOG_DEBUG_F("record = %s, %ld\n", record->chromosome, record->position);
        
        region->chromosome = strndup(record->chromosome, record->chromosome_len);
        region->start_position = record->position;
        region->end_position = record->position;
        
        int found = 0;
        if (args->type) {
            region->type = args->type;
            found = find_region_by_type(region, regions);
        } else {
            found = find_region(region, regions);
        }
        
        if (found) {
            // Add to the list of records that pass all checks for at least one region
            array_list_insert(record, passed);
//             LOG_DEBUG_F("%.*s, %ld passed\n", record->chromosome_len, record->chromosome, record->position);
        } else {
            // Add to the list of records that fail all checks for all regions
            annotate_failed_record(filter_name, filter_name_len, record);
            array_list_insert(record, failed);
        }
        
        free(region->chromosome);
    }

    free(region);

    return passed;
}
Beispiel #9
0
size_t fastq_fread_se_ex(array_list_t *reads, size_t num_reads, fastq_file_t *fq_file) {
  size_t count = 0;
  char *p;
  char header1[MAX_READ_ID_LENGTH];
  char sequence[MAX_READ_SEQUENCE_LENGTH];
  char header2[MAX_READ_ID_LENGTH];
  char qualities[MAX_READ_SEQUENCE_LENGTH];
  int header_length, sequence_length, quality_length;
  fastq_read_t *read;
  
  while (count < num_reads && fgets(header1, MAX_READ_ID_LENGTH, fq_file->fd) != NULL) {
    char *res = fgets(sequence, MAX_READ_SEQUENCE_LENGTH, fq_file->fd);
    res = fgets(header2, MAX_READ_ID_LENGTH, fq_file->fd);
    res = fgets(qualities, MAX_READ_SEQUENCE_LENGTH, fq_file->fd);
    
    header_length = strlen(header1);
    sequence_length = strlen(sequence);
    quality_length = strlen(qualities);
    
    // '\n' char is removed, but '\0' is left
    chomp_at(header1, header_length - 1);
    if ((p = strstr(header1, " ")) != NULL) {
      *p = 0;
    }
    chomp_at(sequence, sequence_length - 1);
    chomp_at(qualities, quality_length - 1);

    read = fastq_read_new(&header1[1], sequence, qualities);
    array_list_insert(read, reads);
    
    count++;
  }
  
  return count;
}
Beispiel #10
0
//----------------------------------------------------------------------------------------
void workflow_insert_stage_item_at(void *data, int new_stage, workflow_t *wf) {
     work_item_t *item = work_item_new(new_stage, data);
    
     pthread_mutex_lock(&wf->main_mutex);
     array_list_insert(item, wf->pending_items[item->stage_id]);
     pthread_mutex_unlock(&wf->main_mutex);
}
Beispiel #11
0
size_t fastq_fread_bytes_se(array_list_t *reads, size_t bytes, fastq_file_t *fq_file) {
	size_t accumulated_size = 0;
	char header1[MAX_READ_ID_LENGTH];
	char sequence[MAX_READ_SEQUENCE_LENGTH];
	char header2[MAX_READ_ID_LENGTH];
	char qualities[MAX_READ_SEQUENCE_LENGTH];
	int header_length, sequence_length, quality_length;
	fastq_read_t *read;

	while (accumulated_size < bytes && fgets(header1, MAX_READ_ID_LENGTH, fq_file->fd) != NULL) {
		fgets(sequence, MAX_READ_SEQUENCE_LENGTH, fq_file->fd);
		fgets(header2, MAX_READ_ID_LENGTH, fq_file->fd);
		fgets(qualities, MAX_READ_SEQUENCE_LENGTH, fq_file->fd);
		header_length = strlen(header1);
		sequence_length = strlen(sequence);
		quality_length = strlen(qualities);

		// '\n' char is removed, but '\0' is left
		chomp_at(header1, header_length - 1);
		chomp_at(sequence, sequence_length - 1);
		chomp_at(qualities, quality_length - 1);
		
		read = fastq_read_new(header1, sequence, qualities);
		array_list_insert(read, reads);
		
		accumulated_size += header_length + sequence_length + quality_length;
	}

	return accumulated_size;
}
Beispiel #12
0
array_list_t *inheritance_pattern_filter(array_list_t *input_records, array_list_t *failed, variant_stats_t **input_stats, char *filter_name, void *f_args) {
    assert(input_records);
    assert(failed);
    
    array_list_t *passed = array_list_new(input_records->size + 1, 1, COLLECTION_MODE_ASYNCHRONIZED);
    size_t filter_name_len = strlen(filter_name);

    enum inheritance_pattern pattern = ((inheritance_pattern_filter_args*)f_args)->pattern;
    float min_following_pattern = ((inheritance_pattern_filter_args*)f_args)->min_following_pattern;
    
    if (pattern == DOMINANT) {
        LOG_DEBUG_F("inheritance_pattern_filter (dominant in %.2f% of samples) over %zu records\n", 
                    min_following_pattern * 100, input_records->size);
    } else {
        LOG_DEBUG_F("inheritance_pattern_filter (recessive in %.2f% of samples) over %zu records\n", 
                    min_following_pattern * 100, input_records->size);
    }
    
    vcf_record_t *record;
    variant_stats_t *stats;
    for (int i = 0; i < input_records->size; i++) {
        record = input_records->items[i];
        stats = input_stats[i];
        
        if (pattern == DOMINANT) {
            if (stats->cases_percent_dominant >= min_following_pattern &&
                stats->controls_percent_dominant >= min_following_pattern) {
                array_list_insert(record, passed);
            } else {
                annotate_failed_record(filter_name, filter_name_len, record);
                array_list_insert(record, failed);
            }
        } else if (pattern == RECESSIVE) {
            if (stats->cases_percent_recessive >= min_following_pattern &&
                   stats->controls_percent_recessive >= min_following_pattern) {
                array_list_insert(record, passed);
            } else {
                annotate_failed_record(filter_name, filter_name_len, record);
                array_list_insert(record, failed);
            }
        }
    }

    return passed;
}
Beispiel #13
0
int insert_position_read(char key[64], vcf_record_file_link* link, kh_pos_t* positions_read) {
    int ret;
    array_list_t *records_in_position;
    khiter_t iter = kh_get(pos, positions_read, key);
    if (iter != kh_end(positions_read)) {
        records_in_position = kh_value(positions_read, iter);
        ret = array_list_insert(link, records_in_position);
    } else {
        records_in_position = array_list_new(8, 1.5, COLLECTION_MODE_SYNCHRONIZED);
        ret = array_list_insert(link, records_in_position);
        iter = kh_put(pos, positions_read, strdup(key), &ret);
        if (ret) {
            kh_value(positions_read, iter) = records_in_position;
        }
    }
    
    return ret;
}
int add_vcf_header_entry(vcf_header_entry_t *header_entry, vcf_file_t *file) {
    assert(header_entry);
    assert(file);
    int result = array_list_insert(header_entry, file->header_entries);
//     if (result) {
//         printf("header entry %zu\n", file->header_entries->size);
//     } else {
//         printf("header entry %zu not inserted\n", get_num_vcf_header_entries(file));
//     }
    return result;
}
void add_vcf_record_sample(char* sample, int length, vcf_record_t* record) {
    assert(sample);
    assert(record);
//     int result = array_list_insert(sample, record->samples);
    int result = array_list_insert(strndup(sample, length), record->samples);
//     if (result) {
//         LOG_DEBUG_F("sample %s inserted\n", sample);
//     } else {
//         LOG_DEBUG_F("sample %s not inserted\n", sample);
//     }
}
int add_vcf_sample_name(char *name, int length, vcf_file_t *file) {
    assert(name);
    assert(file);
    int result = array_list_insert(strndup(name, length), file->samples_names);
//     if (result) {
//         (vcf_file->num_samples)++;
// //         LOG_DEBUG_F("sample %zu is %s\n", vcf_file->samples_names->size, name);
//     } else {
// //         LOG_DEBUG_F("sample %zu not inserted\n", vcf_file->num_samples);
//     }
    return result;
}
Beispiel #17
0
array_list_t* coverage_filter(array_list_t* input_records, array_list_t *failed, variant_stats_t **input_stats, char *filter_name, void* f_args) {
    assert(input_records);
    assert(failed);
    
    array_list_t *passed = array_list_new(input_records->size + 1, 1, COLLECTION_MODE_ASYNCHRONIZED);
    size_t filter_name_len = strlen(filter_name);

    int min_coverage = ((coverage_filter_args*)f_args)->min_coverage;

    LOG_DEBUG_F("coverage_filter (min coverage = %d) over %zu records\n", min_coverage, input_records->size);
    char *aux_buffer = (char*) calloc (128, sizeof(char));
    vcf_record_t *record;
    for (int i = 0; i < input_records->size; i++) {
        record = input_records->items[i];
        
        if (record->info_len > strlen(aux_buffer)) {
            aux_buffer = realloc (aux_buffer, record->info_len+1);
            memset(aux_buffer, 0, (record->info_len+1) * sizeof(char));
        }
        
        strncpy(aux_buffer, record->info, record->info_len);
        
        char *record_coverage = get_field_value_in_info("DP", aux_buffer);
        if (record_coverage != NULL && is_numeric(record_coverage)) {
            if (atoi(record_coverage) >= min_coverage) {
                array_list_insert(record, passed);
            } else {
                annotate_failed_record(filter_name, filter_name_len, record);
                array_list_insert(record, failed);
            }
        } else {
            annotate_failed_record(filter_name, filter_name_len, record);
            array_list_insert(record, failed);
        }
        
    }

    free(aux_buffer);
    return passed;
}
Beispiel #18
0
array_list_t* quality_filter(array_list_t* input_records, array_list_t *failed, variant_stats_t **input_stats, char *filter_name, void* f_args) {
    assert(input_records);
    assert(failed);
    
    array_list_t *passed = array_list_new(input_records->size + 1, 1, COLLECTION_MODE_ASYNCHRONIZED);
    size_t filter_name_len = strlen(filter_name);

    int min_quality = ((quality_filter_args*)f_args)->min_quality;

    LOG_DEBUG_F("quality_filter (min quality = %d) over %zu records\n", min_quality, input_records->size);
    vcf_record_t *record;
    for (int i = 0; i < input_records->size; i++) {
        record = input_records->items[i];
        if (record->quality >= min_quality) {
            array_list_insert(record, passed);
        } else {
            annotate_failed_record(filter_name, filter_name_len, record);
            array_list_insert(record, failed);
        }
    }

    return passed;
}
Beispiel #19
0
size_t fastq_fread_pe(array_list_t *reads, size_t num_reads, fastq_file_t *fq_file1, fastq_file_t *fq_file2, int mode) {
	size_t count = 0;
	char header1[MAX_READ_ID_LENGTH];
	char header2[MAX_READ_ID_LENGTH];
	char read_separator[MAX_READ_ID_LENGTH];
	char sequence1[MAX_READ_SEQUENCE_LENGTH];
	char sequence2[MAX_READ_SEQUENCE_LENGTH];
	char qualities1[MAX_READ_SEQUENCE_LENGTH];
	char qualities2[MAX_READ_SEQUENCE_LENGTH];
	int header_length1, sequence_length1, quality_length1;
	int header_length2, sequence_length2, quality_length2;
	fastq_read_pe_t *read_pe;

	while (count < num_reads && fgets(header1, MAX_READ_ID_LENGTH, fq_file1->fd) != NULL) {
		fgets(sequence1, MAX_READ_SEQUENCE_LENGTH, fq_file1->fd);
		fgets(read_separator, MAX_READ_ID_LENGTH, fq_file1->fd);
		fgets(qualities1, MAX_READ_SEQUENCE_LENGTH, fq_file1->fd);

		header_length1 = strlen(header1);
		sequence_length1 = strlen(sequence1);
		quality_length1 = strlen(qualities1);

		// '\n' char is removed, but '\0' is left
		chomp_at(header1, header_length1 - 1);
		chomp_at(sequence1, sequence_length1 - 1);
		chomp_at(qualities1, quality_length1 - 1);

		// second file
		fgets(header2, MAX_READ_ID_LENGTH, fq_file2->fd);
		fgets(sequence2, MAX_READ_SEQUENCE_LENGTH, fq_file2->fd);
		fgets(read_separator, MAX_READ_ID_LENGTH, fq_file2->fd);
		fgets(qualities2, MAX_READ_SEQUENCE_LENGTH, fq_file2->fd);

		header_length2 = strlen(header2);
		sequence_length2 = strlen(sequence2);
		quality_length2 = strlen(qualities2);

		// '\n' char is removed, but '\0' is left
		chomp_at(header2, header_length2 - 1);
		chomp_at(sequence2, sequence_length2 - 1);
		chomp_at(qualities2, quality_length2 - 1);

		read_pe = fastq_read_pe_new(header1, header2, sequence1, qualities1, sequence2, qualities2, mode);
		array_list_insert(read_pe, reads);

		count++;
	}

	return count;
}
Beispiel #20
0
array_list_t* missing_values_filter(array_list_t* input_records, array_list_t *failed, variant_stats_t **input_stats, char *filter_name, void* args) {
    assert(input_records);
    assert(failed);
    
    array_list_t *passed = array_list_new(input_records->size + 1, 1, COLLECTION_MODE_ASYNCHRONIZED);
    size_t filter_name_len = strlen(filter_name);

    float max_missing = ((missing_values_filter_args*) args)->max_missing;
    float record_missing;
    float allele_count;

    list_item_t *stats_item = NULL;
    variant_stats_t *variant_stats;
    // The stats returned by get_variants_stats are related to a record in the same
    // position of the input_records list, so when a variant_stats_t fulfills the condition,
    // it means the related vcf_record_t passes the filter
    vcf_record_t *record;
    for (int i = 0; i < input_records->size; i++) {
        record = input_records->items[i];
        variant_stats = input_stats[i];
        allele_count = 0;
        
        for (int j = 0; j < variant_stats->num_alleles; j++) {
            allele_count += variant_stats->alleles_count[j];
        }
        record_missing = variant_stats->missing_alleles / (allele_count + variant_stats->missing_alleles);
        
        if (record_missing <= max_missing) {
            array_list_insert(record, passed);
        } else {
            annotate_failed_record(filter_name, filter_name_len, record);
            array_list_insert(record, failed);
        }
    }
    
    return passed;
}
Beispiel #21
0
int bam1s_stats(array_list_t *bam1s,
	       bam_stats_options_t *opts,
	       array_list_t *bam1s_stats) {

  bam1_t *bam1;
  bam_stats_t *stats;

  size_t num_items = array_list_size(bam1s);
  for (int i = 0; i < num_items; i++) {
    bam1 = array_list_get(i, bam1s);
    stats = bam1_stats(bam1, opts);
    array_list_insert(stats, bam1s_stats);
  }
  return array_list_size(bam1s_stats);
}
Beispiel #22
0
array_list_t *variant_type_filter(array_list_t *input_records, array_list_t *failed, variant_stats_t **input_stats, char *filter_name, void *f_args) {
    assert(input_records);
    assert(failed);
    
    array_list_t *passed = array_list_new(input_records->size + 1, 1, COLLECTION_MODE_ASYNCHRONIZED);
    size_t filter_name_len = strlen(filter_name);

    enum variant_type type = ((variant_type_filter_args*)f_args)->type;

    LOG_DEBUG_F("variant_type_filter (variant_type  %d) over %zu records\n", type, input_records->size);
    vcf_record_t *record;
    for (int i = 0; i < input_records->size; i++) {
        record = input_records->items[i];
        
        if (record->type == type) {
            array_list_insert(record, passed);
        } else {
            annotate_failed_record(filter_name, filter_name_len, record);
            array_list_insert(record, failed);
        }
    }

    return passed;
}
Beispiel #23
0
void workflow_insert_item_at(int stage_id, void *data, workflow_t *wf) {
     work_item_t *item = work_item_new(stage_id, data);
     
     pthread_mutex_lock(&wf->main_mutex);
     //printf("Producer mutex lock\n");
     while (workflow_get_num_items_(wf) >= wf->max_num_work_items) {
       pthread_cond_wait(&wf->producer_cond, &wf->main_mutex);
     }

     if (array_list_insert(item, wf->pending_items[stage_id])) {
	  wf->num_pending_items++;
	  item->context = (void *) wf;
     }

     pthread_cond_broadcast(&wf->workers_cond);

     pthread_mutex_unlock(&wf->main_mutex);
}
Beispiel #24
0
static void report_vcf_variant_stats_sqlite3(sqlite3 *db, int num_variants, variant_stats_t **stats_batch) {
    array_list_t *fields = array_list_new(num_variants + 1, 1.1, COLLECTION_MODE_ASYNCHRONIZED);

    variant_stats_t *var_stats;
    for (int i = 0; i < num_variants; i++) {
        var_stats = stats_batch[i];
        variant_stats_db_fields_t *f = variant_stats_db_fields_new(var_stats->chromosome, var_stats->position, var_stats->ref_allele, var_stats->alt_alleles,
                                       var_stats->maf_allele, var_stats->maf, var_stats->mgf_genotype, var_stats->mgf,
                                       var_stats->missing_alleles, var_stats->missing_genotypes,
                                       var_stats->mendelian_errors, var_stats->is_indel,
                                       var_stats->cases_percent_dominant, var_stats->controls_percent_dominant,
                                       var_stats->cases_percent_recessive, var_stats->controls_percent_recessive);

        array_list_insert(f, fields);
    }

    insert_variant_stats_db_fields_list(fields, db);

    array_list_free(fields, (void *)variant_stats_db_fields_free);
}
Beispiel #25
0
int main( int argc, char **args ) {
    
    //create a new array list.
    friso_array_t array = new_array_list();    
    fstring keys[] = {
        "chenmanwen", "yangqinghua",
        "chenxin", "luojiangyan", "xiaoyanzi", "bibi",
        "zhangrenfang", "yangjian",
        "liuxiao", "pankai",
        "chenpei", "liheng", "zhangzhigang", "zhgangyishao", "yangjiangbo",
        "caizaili", "panpan", "xiaolude", "yintanwen"
    };
    int j, idx = 2, len = sizeof( keys ) / sizeof( fstring );

    for ( j = 0; j < len; j++ ) {
        array_list_add( array, keys[j] );
    }

    printf("length=%d, allocations=%d\n", array->length, array->allocs );
    array_list_trim( array );
    printf("after tirm length=%d, allocations=%d\n", array->length, array->allocs );
    printf("idx=%d, value=%s\n", idx, ( fstring ) array_list_get( array, idx ) );

    printf("\nAfter set %dth item.\n", idx );
    array_list_set( array, idx, "chenxin__" );
    printf("idx=%d, value=%s\n", idx, ( fstring ) array_list_get( array, idx ) );

    printf("\nAfter remove %dth item.\n", idx );
    array_list_remove( array, idx );
    printf("length=%d, allocations=%d\n", array->length, array->allocs );
    printf("idx=%d, value=%s\n", idx, ( fstring ) array_list_get( array, idx ) );

    printf("\nInsert a item at %dth\n", idx );
    array_list_insert( array, idx, "*chenxin*" );
    printf("idx=%d, value=%s\n", idx, ( fstring ) array_list_get( array, idx ) );

    free_array_list( array );

    return 0;
}
vcf_record_t *vcf_record_copy(vcf_record_t *orig) {
    vcf_record_t *record = (vcf_record_t*) calloc (1, sizeof(vcf_record_t));
    record->chromosome = strndup(orig->chromosome, orig->chromosome_len);
    record->chromosome_len = orig->chromosome_len;
    record->position = orig->position;
    record->id = strndup(orig->id, orig->id_len);
    record->id_len = orig->id_len;
    record->reference = strndup(orig->reference, orig->reference_len);
    record->reference_len = orig->reference_len;
    record->alternate = strndup(orig->alternate, orig->alternate_len);
    record->alternate_len = orig->alternate_len;
    record->filter = strndup(orig->filter, orig->filter_len);
    record->filter_len = orig->filter_len;
    record->info = strndup(orig->info, orig->info_len);
    record->info_len = orig->info_len;
    record->format = strndup(orig->format, orig->format_len);
    record->format_len = orig->format_len;
    record->samples = array_list_new(orig->samples->size + 1, 1.5, COLLECTION_MODE_ASYNCHRONIZED);
    for (int i = 0; i < orig->samples->size; i++) {
        array_list_insert(strdup(array_list_get(i, orig->samples)), record->samples);
    }
    return record;
}
void add_vcf_header_entry_value(char *value, int length, vcf_header_entry_t *entry) {
    assert(value);
    assert(entry);
    int result = array_list_insert(strndup(value, length), entry->values);
}
Beispiel #28
0
size_t fastq_gzread_se(array_list_t *reads, size_t num_reads, fastq_gzfile_t *fq_gzfile) {
	size_t count = 0;
	char header1[MAX_READ_ID_LENGTH];
	char sequence[MAX_READ_SEQUENCE_LENGTH];
	char header2[MAX_READ_ID_LENGTH];
	char qualities[MAX_READ_SEQUENCE_LENGTH];
	int header_length, sequence_length, quality_length;
	fastq_read_t *read;

	size_t num_lines_to_read = 4 * num_reads;	/* Each read consists of 4 lines */

	int max_data_len = CHUNK;
	int max_read_len = MAX_READ_SEQUENCE_LENGTH;	/* Each read is supposed to be shorter than MAX_READ_SEQUENCE_LENGTH */
	int eof_found = 0;
	int c = 0;
	int i = 0;
	//	fq_gzfile->i = 0;
	size_t lines = 0;
	char *aux;
	//	fq_gzfile->data = (char*) calloc (CHUNK, sizeof(char));
	char *data; // = (char*) calloc (CHUNK, sizeof(char));
	char *id = (char*) calloc (max_read_len, sizeof(char));
	char *seq = (char*) calloc (max_read_len, sizeof(char));
	char *qual = (char*) calloc (max_read_len, sizeof(char));

	// ZLIB variables
	unsigned have;
	unsigned char in[CHUNK];
	unsigned char out[CHUNK];


	// If there is some data from before calls
	if(fq_gzfile->data != NULL) {
		if(fq_gzfile->data_size > max_data_len) {
			data = (char*) calloc (fq_gzfile->data_size+max_data_len, sizeof(char));
			max_data_len = fq_gzfile->data_size+max_data_len;
		}else{
			data = (char*) calloc (max_data_len, sizeof(char));
		}
		strncpy(data, fq_gzfile->data, fq_gzfile->data_size);
		i = fq_gzfile->data_size;
	}else {
		// first time, no data has been saved before
		data = (char*) calloc (max_data_len, sizeof(char));
	}


	do {
		fq_gzfile->strm.avail_in = fread(in, 1, CHUNK, fq_gzfile->fd);
		//		printf("fq_gzfile->strm.avail_in: %i, CHUNK: %i\nnext_in: %s\n\n", fq_gzfile->strm.avail_in, CHUNK, fq_gzfile->strm.next_in);
		if (ferror(fq_gzfile->fd)) {
			(void)inflateEnd(&fq_gzfile->strm);
			return Z_ERRNO;
		}
		if (fq_gzfile->strm.avail_in == 0)
			break;
		fq_gzfile->strm.next_in = in;

		/* run inflate() on input until output buffer not full */
		do {
			fq_gzfile->strm.avail_out = CHUNK;
			fq_gzfile->strm.next_out = out;
			fq_gzfile->ret = inflate(&fq_gzfile->strm, Z_NO_FLUSH);
			assert(fq_gzfile->ret != Z_STREAM_ERROR);  /* state not clobbered */
			switch (fq_gzfile->ret) {
			case Z_NEED_DICT:
				fq_gzfile->ret = Z_DATA_ERROR;     /* and fall through */
			case Z_DATA_ERROR:
			case Z_MEM_ERROR:
				(void)inflateEnd(&fq_gzfile->strm);
				return fq_gzfile->ret;
			}
			have = CHUNK - fq_gzfile->strm.avail_out;
			for (int j = 0; j < have && !eof_found; j++) {
				c = out[j];

				if (c != EOF) {
					max_data_len = consume_input(c, &data, max_data_len, i);
					if (c == '\n') {
						lines++;
					}
					i++;
				} else {
					eof_found = 1;
				}
			}
		} while (fq_gzfile->strm.avail_out == 0);

		/* done when inflate() says it's done */
	} while (lines < num_lines_to_read && fq_gzfile->ret != Z_STREAM_END);

	//	printf("data: %s\n", data);
	//	LOG_DEBUG_F("lines: %i, num_lines_to_read: %i\n", lines, num_lines_to_read);

	// check if have read the expected number of lines
	size_t parsed_chars;
	size_t parsed_lines = 0;
	size_t data_size;
	//	if(lines > 0) { //= num_lines_to_read
	aux = data;
	for(parsed_chars = 0; parsed_chars < i && parsed_lines < num_lines_to_read; parsed_chars++) {
		if(data[parsed_chars] == '\n') {
//		printf(">>i: %i, parsed_chars: %i, %i, aux: %s\n", i, parsed_chars, data[i-1], aux);
			data[parsed_chars] = '\0';
			if(count % 4 == 0) {
				strcpy(id, aux);  //printf("%s\n", id);
			}
			if(count % 4 == 1) {
				strcpy(seq, aux);  //printf("%s\n", seq);
			}
			if(count % 4 == 2) {
			}
			if(count % 4 == 3) {
				strcpy(qual, aux);  //printf("%s\n", qual);
				read = fastq_read_new(id, seq, qual);
				array_list_insert(read, reads);
			}
			count++;
			aux = data + parsed_chars + 1;
			parsed_lines++;
		}
	}
	//		LOG_DEBUG_F("i: %lu, parsed_lines: %lu\n", i, parsed_lines);
	//		LOG_DEBUG_F("parsed_chars: %lu, parsed_lines: %lu\n", parsed_chars, parsed_lines);
	//		lines = 0;
	//		LOG_DEBUG_F("BEFORE memcpy: fq_gzfile->data_size: %lu, new size: %lu\n", fq_gzfile->data_size, data_size);
	data_size = i - parsed_chars;
	if(fq_gzfile->data == NULL) {
		fq_gzfile->data = (char*)malloc(data_size*sizeof(char));
	}
	if(fq_gzfile->data_size != 0 && fq_gzfile->data_size < data_size) {
		fq_gzfile->data = realloc(fq_gzfile->data, data_size);
	}
	if(data_size > 0) {
		memcpy(fq_gzfile->data, data+parsed_chars, data_size);
	}
	fq_gzfile->data_size = data_size;
	//	}

	free(data);
	free(id);
	free(seq);
	free(qual);

	//	if(fq_gzfile->ret == Z_STREAM_END) {
	//		(void)inflateEnd(&fq_gzfile->strm);
	//	}
	//		return fq_gzfile->ret == Z_STREAM_END ? Z_OK : Z_DATA_ERROR;
	//	printf(">>>>reads->size: %lu, num_reads: %lu\n", reads->size, num_reads);
	return reads->size;
}
void add_record_to_vcf_batch(vcf_record_t *record, vcf_batch_t *batch) {
    assert(record);
    assert(batch);
    array_list_insert(record, batch->records);
}
Beispiel #30
0
void array_insert(Array * const list, int index, Object obj) {
  array_list_insert((ArrayList * const ) list, index, obj);
}