static int initialize_output_files(char *output_directory, size_t output_directory_len, cp_hashtable **output_files) { // Initialize collections of file descriptors *output_files = cp_hashtable_create_by_option(COLLECTION_MODE_DEEP, 50, cp_hash_int, (cp_compare_fn) int_cmp, NULL, (cp_destructor_fn) free_file_key1, NULL, (cp_destructor_fn) free_file_descriptor ); char all_variants_filename[output_directory_len + 18]; sprintf(all_variants_filename, "%s/all_variants.txt", output_directory); FILE *all_variants_file = fopen(all_variants_filename, "a"); if (!all_variants_file) { // Can't store results return 1; } char *key = (char*) calloc (13, sizeof(char)); strncat(key, "all_variants", 12); cp_hashtable_put(*output_files, key, all_variants_file); char summary_filename[output_directory_len + 13]; sprintf(summary_filename, "%s/summary.txt", output_directory); FILE *summary_file = fopen(summary_filename, "a"); if (!summary_file) { // Can't store results return 2; } key = (char*) calloc (8, sizeof(char)); strncat(key, "summary", 7); cp_hashtable_put(*output_files, key, summary_file); char snp_phenotype_filename[output_directory_len + 20]; sprintf(snp_phenotype_filename, "%s/snp_phenotypes.txt", output_directory); FILE *snp_phenotype_file = fopen(snp_phenotype_filename, "a"); if (!snp_phenotype_filename) { return 3; } key = (char*) calloc (15, sizeof(char)); strncat(key, "snp_phenotypes", 14); cp_hashtable_put(*output_files, key, snp_phenotype_file); char mutation_phenotype_filename[output_directory_len + 25]; sprintf(mutation_phenotype_filename, "%s/mutation_phenotypes.txt", output_directory); FILE *mutation_phenotype_file = fopen(mutation_phenotype_filename, "a"); if (!mutation_phenotype_filename) { return 3; } key = (char*) calloc (20, sizeof(char)); strncat(key, "mutation_phenotypes", 19); cp_hashtable_put(*output_files, key, mutation_phenotype_file); return 0; }
cp_hashtable *load_hasthable_codes() { cp_hashtable *t = cp_hashtable_create(400, cp_hash_istring, (cp_compare_fn)strcasecmp); /*const unsigned char COMBINATORIAL = (NUCLEOTIDES_NUM * NUCLEOTIDES_NUM * NUCLEOTIDES_NUM) + (NUCLEOTIDES_NUM * NUCLEOTIDES_NUM) + NUCLEOTIDES_NUM; unsigned char *id_array = (unsigned char *)malloc(sizeof(unsigned char)*COMBINATORIAL); */ size_t id = 0; char combination[4]; combination[3] = '\0'; for(unsigned int nt_1 = 0; nt_1 < NUCLEOTIDES_NUM; nt_1++){ combination[0] = NUCLEOTIDES[nt_1]; for(unsigned int nt_2 = 0; nt_2 < NUCLEOTIDES_NUM; nt_2++){ combination[1] = NUCLEOTIDES[nt_2]; for(unsigned int nt_3 = 0; nt_3 < NUCLEOTIDES_NUM; nt_3++){ combination[2] = NUCLEOTIDES[nt_3]; cp_hashtable_put(t, strdup(combination), (void *)id); id++; } } } // printf("Table size %d\n", t->table_size); combination[2] = '\0'; for(unsigned int nt_1 = 0; nt_1 < NUCLEOTIDES_NUM; nt_1++){ combination[0] = NUCLEOTIDES[nt_1]; for(unsigned int nt_2 = 0; nt_2 < NUCLEOTIDES_NUM; nt_2++){ combination[1] = NUCLEOTIDES[nt_2]; cp_hashtable_put(t, strdup(combination), (void *)id); id++; } } //printf("Table size %d\n", t->table_size); combination[1] = '\0'; for(unsigned int nt = 0; nt < NUCLEOTIDES_NUM; nt++){ combination[0] = NUCLEOTIDES[nt]; cp_hashtable_put(t, strdup(combination), (void *)id); id++; } return t; }
cp_hashtable* associate_samples_and_positions(vcf_file_t* file) { LOG_DEBUG_F("** %zu sample names read\n", file->samples_names->size); array_list_t *sample_names = file->samples_names; cp_hashtable *sample_ids = cp_hashtable_create(sample_names->size * 2, cp_hash_string, (cp_compare_fn) strcasecmp ); int *index; char *name; for (int i = 0; i < sample_names->size; i++) { name = sample_names->items[i]; index = (int*) malloc (sizeof(int)); *index = i; if (cp_hashtable_get(sample_ids, name)) { LOG_FATAL_F("Sample %s appears more than once. File can not be analyzed.\n", name); } cp_hashtable_put(sample_ids, name, index); } // char **keys = (char**) cp_hashtable_get_keys(sample_ids); // int num_keys = cp_hashtable_count(sample_ids); // for (int i = 0; i < num_keys; i++) { // printf("%s\t%d\n", keys[i], *((int*) cp_hashtable_get(sample_ids, keys[i]))); // } return sample_ids; }
static long get_thread_serial(long tno) { long *num; if (thread_id == NULL) return 0; num = cp_hashtable_get(thread_id, &tno); if (num == NULL) { long *key; num = malloc(sizeof(long)); if (num == NULL) { cp_error(CP_MEMORY_ALLOCATION_FAILURE, "can\'t allocate thread mapping number"); return -1L; } key = malloc(sizeof(long)); if (key == NULL) { cp_error(CP_MEMORY_ALLOCATION_FAILURE, "can\'t allocate thread mapping key"); return -1L; } *num = ++thread_count; *key = tno; cp_hashtable_put(thread_id, key, num); } return *num; }
void *writer(void *prm) { int i, num; long count = (long) prm; char kbuf[30]; char *entry; cp_mutex_lock(&start_mutex); while (!running) cp_cond_wait(&start_cond, &start_mutex); cp_mutex_unlock(&start_mutex); for (i = 0; i < count; i++) { sprintf(kbuf, "ENTRY %d", i); entry = strdup(kbuf); num = i % COUNT; if (!silent) printf("writing (%d): %s\n", num, entry); cp_mutex_lock(&lock[i % COUNT]); cp_hashtable_put(t[num], entry, entry); cp_list_append(tl[num], entry); cp_cond_broadcast(&cond[i % COUNT]); cp_mutex_unlock(&lock[i % COUNT]); } return NULL; }
int load_mime_types(char *filename) { FILE *fp; char mimebuf[LINELEN]; int rc = 0; char *name; char *ext; char *curr; mimemap = cp_hashtable_create_by_option(COLLECTION_MODE_NOSYNC | COLLECTION_MODE_COPY | COLLECTION_MODE_DEEP, 500, cp_hash_string, cp_hash_compare_string, (cp_copy_fn) strdup, (cp_destructor_fn) free, (cp_copy_fn) strdup, (cp_destructor_fn) free); fp = fopen(filename, "r"); if (fp == NULL) { cp_error(CP_INVALID_VALUE, "can\'t open %s", filename); cp_hashtable_destroy(mimemap); return -1; } while (fgets(mimebuf, LINELEN, fp)) { if (mimebuf[0] == '#') continue; name = curr = mimebuf; while (*curr && !isspace(*curr)) curr++; if (*curr == '\0') continue; /* no extension for this type */ *curr++ = '\0'; while (1) { while (*curr && isspace(*curr)) curr++; ext = curr; while (*curr && !isspace(*curr)) curr++; if (strlen(ext)) { *curr++ = '\0'; cp_hashtable_put(mimemap, ext, name); } else break; } } fclose(fp); return rc; }
int cp_log_init(char *filename, int verbosity) { int i, err_code_count; unsigned long thread_no; #ifdef SIGHUP struct sigaction act; #endif log_filename = strdup(filename); logfile = fopen(filename, "a+"); if (logfile == NULL) return CP_LOG_FILE_OPEN_FAILURE; loglevel = verbosity; err_code_count = sizeof(error_messages) / sizeof(error_code_legend); error_message_lookup = cp_hashtable_create_by_mode(COLLECTION_MODE_NOSYNC, err_code_count * 2, cp_hash_int, cp_hash_compare_int); for (i = 0; i < err_code_count; i++) { error_code_legend *entry = &error_messages[i]; cp_hashtable_put(error_message_lookup, &entry->code, entry->msg); } thread_id = cp_hashtable_create_by_option(COLLECTION_MODE_DEEP, 10, cp_hash_long, cp_hash_compare_long, NULL, free, NULL, free); thread_count = 0; // thread_no = (unsigned long) pthread_self(); thread_no = (unsigned long) cp_thread_self(); get_thread_serial(thread_no); /* establish this as thread number one */ log_closing = 0; #ifdef SIGHUP act.sa_handler = cp_log_default_signal_handler; sigemptyset(&act.sa_mask); act.sa_flags = 0; sigaction(SIGHUP, &act, NULL); #endif // signal(SIGHUP, cp_log_default_signal_handler); return 0; }
static void parse_effect_response(int tid, char *output_directory, size_t output_directory_len, cp_hashtable *output_files, list_t *output_list, cp_hashtable *summary_count, cp_hashtable *gene_list) { int *SO_found = (int*) malloc (sizeof(int)); // Whether the SO code field has been found int *count; char tmp_consequence_type[128]; int num_lines; char **split_batch = split(effect_line[tid], "\n", &num_lines); for (int i = 0; i < num_lines; i++) { int num_columns; char *copy_buf = strdup(split_batch[i]); char **split_result = split(copy_buf, "\t", &num_columns); free(copy_buf); // Find consequence type name (always after SO field) *SO_found = 0; if (num_columns == 25) { // LOG_DEBUG_F("gene = %s\tSO = %d\tCT = %s\n", split_result[17], atoi(split_result[18] + 3), split_result[19]); if (!cp_hashtable_contains(gene_list, split_result[17])) { cp_hashtable_put(gene_list, strdup(split_result[17]), NULL); } *SO_found = atoi(split_result[18] + 3); memset(tmp_consequence_type, 0, 128 * sizeof(char)); strncat(tmp_consequence_type, split_result[19], strlen(split_result[19])); } else { if (strlen(split_batch[i]) == 0) { // Last line in batch could be only a newline for (int s = 0; s < num_columns; s++) { free(split_result[s]); } free(split_result); continue; } LOG_INFO_F("[%d] Non-valid line found (%d fields): '%s'\n", tid, num_columns, split_batch[i]); for (int s = 0; s < num_columns; s++) { free(split_result[s]); } free(split_result); continue; } for (int s = 0; s < num_columns; s++) { free(split_result[s]); } free(split_result); if (!*SO_found) { // SO:000000 is not valid LOG_INFO_F("[%d] Non-valid SO found (0)\n", tid); continue; } // LOG_DEBUG_F("[%d] SO found = %d\n", tid, *SO_found); size_t consequence_type_len = strlen(tmp_consequence_type); // If file does not exist, create its descriptor and summary counter FILE *aux_file = cp_hashtable_get(output_files, SO_found); if (!aux_file) { #pragma omp critical { // This construction avoids 2 threads trying to insert the same CT aux_file = cp_hashtable_get(output_files, SO_found); if (!aux_file) { char filename[output_directory_len + consequence_type_len + 6]; memset(filename, 0, (output_directory_len + consequence_type_len + 6) * sizeof(char)); strncat(filename, output_directory, output_directory_len); strncat(filename, "/", 1); strncat(filename, tmp_consequence_type, consequence_type_len); strncat(filename, ".txt", 4); aux_file = fopen(filename, "a"); // Add to hashtables (file descriptors and summary counters) int *SO_stored = (int*) malloc (sizeof(int)); *SO_stored = *SO_found; cp_hashtable_put(output_files, SO_stored, aux_file); LOG_INFO_F("[%d] New consequence type found = %s\n", tid, tmp_consequence_type); } } } // Write line[tid] to file corresponding to its consequence type if (aux_file) { #pragma omp critical { // TODO move critical one level below? count = (int*) cp_hashtable_get(summary_count, tmp_consequence_type); if (count == NULL) { char *consequence_type = (char*) calloc (consequence_type_len+1, sizeof(char)); strncat(consequence_type, tmp_consequence_type, consequence_type_len); assert(!strcmp(consequence_type, tmp_consequence_type)); count = (int*) malloc (sizeof(int)); *count = 0; cp_hashtable_put(summary_count, consequence_type, count); // LOG_DEBUG_F("[%d] Initialized summary count for %s\n", tmp_consequence_type); } // Increment counter for summary (*count)++; } // LOG_DEBUG_F("[%d] before writing %s\n", tid, tmp_consequence_type); list_item_t *output_item = list_item_new(tid, *SO_found, strdup(split_batch[i])); list_insert_item(output_item, output_list); // LOG_DEBUG_F("[%d] after writing %s\n", tid, tmp_consequence_type); } } for (int i = 0; i < num_lines; i++) { free(split_batch[i]); } free(split_batch); }
int initialize_ws_output(shared_options_data_t *shared_options, effect_options_data_t *options_data){ int num_threads = shared_options->num_threads; char *outdir = shared_options->output_directory; // Initialize output text list output_list = (list_t*) malloc (sizeof(list_t)); list_init("output", num_threads, shared_options->max_batches * shared_options->batch_lines, output_list); // Initialize collections of file descriptors output_files = cp_hashtable_create_by_option(COLLECTION_MODE_DEEP, 50, cp_hash_int, (cp_compare_fn) int_cmp, NULL, (cp_destructor_fn) free_file_key1, NULL, (cp_destructor_fn) free_file_descriptor ); char all_variants_filename[strlen(outdir) + 18]; sprintf(all_variants_filename, "%s/all_variants.txt", outdir); all_variants_file = fopen(all_variants_filename, "a"); if (!all_variants_file) { // Can't store results return 1; } char *key = (char*) calloc (13, sizeof(char)); strncat(key, "all_variants", 12); cp_hashtable_put(output_files, key, all_variants_file); char summary_filename[strlen(outdir) + 13]; sprintf(summary_filename, "%s/summary.txt", outdir); summary_file = fopen(summary_filename, "a"); if (!summary_file) { // Can't store results return 2; } key = (char*) calloc (8, sizeof(char)); strncat(key, "summary", 7); cp_hashtable_put(output_files, key, summary_file); char snp_phenotype_filename[strlen(outdir) + 20]; sprintf(snp_phenotype_filename, "%s/snp_phenotypes.txt", outdir); snp_phenotype_file = fopen(snp_phenotype_filename, "a"); if (!snp_phenotype_filename) { return 3; } key = (char*) calloc (15, sizeof(char)); strncat(key, "snp_phenotypes", 14); cp_hashtable_put(output_files, key, snp_phenotype_file); char mutation_phenotype_filename[strlen(outdir) + 25]; sprintf(mutation_phenotype_filename, "%s/mutation_phenotypes.txt", outdir); mutation_phenotype_file = fopen(mutation_phenotype_filename, "a"); if (!mutation_phenotype_filename) { return 3; } key = (char*) calloc (20, sizeof(char)); strncat(key, "mutation_phenotypes", 19); cp_hashtable_put(output_files, key, mutation_phenotype_file); // Initialize summary counters and genes list summary_count = cp_hashtable_create_by_option(COLLECTION_MODE_DEEP, 64, cp_hash_istring, (cp_compare_fn) strcasecmp, NULL, (cp_destructor_fn) free_file_key2, NULL, (cp_destructor_fn) free_summary_counter ); gene_list = cp_hashtable_create_by_option(COLLECTION_MODE_DEEP, 64, cp_hash_istring, (cp_compare_fn) strcasecmp, NULL, (cp_destructor_fn) free_file_key2, NULL, NULL ); // Create a buffer for each thread line = (char**) calloc (num_threads, sizeof(char*)); output_line = (char**) calloc (num_threads, sizeof(char*)); max_line_size = (int*) calloc (num_threads, sizeof(int)); snp_line = (char**) calloc (num_threads, sizeof(char*)); snp_output_line = (char**) calloc (num_threads, sizeof(char*)); snp_max_line_size = (int*) calloc (num_threads, sizeof(int)); mutation_line = (char**) calloc (num_threads, sizeof(char*)); mutation_output_line = (char**) calloc (num_threads, sizeof(char*)); mutation_max_line_size = (int*) calloc (num_threads, sizeof(int)); for (int i = 0; i < num_threads; i++) { max_line_size[i] = snp_max_line_size[i] = mutation_max_line_size[i] = 512; line[i] = (char*) calloc (max_line_size[i], sizeof(char)); output_line[i] = (char*) calloc (max_line_size[i], sizeof(char)); snp_line[i] = (char*) calloc (snp_max_line_size[i], sizeof(char)); snp_output_line[i] = (char*) calloc (snp_max_line_size[i], sizeof(char)); mutation_line[i] = (char*) calloc (mutation_max_line_size[i], sizeof(char)); mutation_output_line[i] = (char*) calloc (mutation_max_line_size[i], sizeof(char)); } return 0; }
static size_t write_effect_ws_results(char *contents, size_t size, size_t nmemb, void *userdata) { int tid = omp_get_thread_num(); int i = 0; int data_read_len = 0, next_line_len = 0; // Whether the SO code field (previous to the consequence type name) has been found int *SO_found = (int*) malloc (sizeof(int)); // Whether the buffer was consumed with a line read just partially int premature_end = 0; size_t realsize = size * nmemb; int *count; char *data = contents; char tmp_consequence_type[128]; char *aux_buffer; char *output_text; LOG_DEBUG_F("Effect WS invoked, response size = %zu bytes\n", realsize); while (data_read_len < realsize) { assert((line + tid) != NULL); assert((max_line_size + tid) != NULL); LOG_DEBUG_F("[%d] loop iteration #%d\n", tid, i); // Get length of data to copy next_line_len = strcspn(data, "\n"); // If the line[tid] is too long for the current buffers, reallocate a little more than the needed memory if (strlen(line[tid]) + next_line_len + 1 > max_line_size[tid]) { // LOG_DEBUG_F("Line too long (%d elements, but %zu needed) in batch #%d\n", // max_line_size[tid], strlen(line[tid]) + next_line_len, batch_num); // char *out_buf = (char*) calloc (next_line_len+1, sizeof(char)); // snprintf(out_buf, next_line_len, "%s", data); // LOG_INFO_F("[%d] too big data is: '%s'\n", tid, out_buf); char *aux_1 = (char*) realloc (line[tid], (max_line_size[tid] + next_line_len + 1) * sizeof(char)); char *aux_2 = (char*) realloc (output_line[tid], (max_line_size[tid] + next_line_len + 1) * sizeof(char)); if (!aux_1 || !aux_2) { LOG_ERROR("Can't resize buffers\n"); // Can't resize buffers -> can't keep reading the file if (!aux_1) { free(line[tid]); } if (!aux_2) { free(output_line[tid]); } return data_read_len; } line[tid] = aux_1; output_line[tid] = aux_2; max_line_size[tid] += next_line_len + 1; // LOG_DEBUG_F("[%d] buffers realloc'd (%d)\n", tid, max_line_size[tid]); } // LOG_DEBUG_F("[%d] position = %d, read = %d, max_size = %zu\n", i, next_line_len, data_read_len, realsize); if (data_read_len + next_line_len >= realsize) { // Save current state (line[tid] partially read) strncat(line[tid], data, next_line_len); chomp(line[tid]); line[tid][strlen(line[tid])] = '\0'; premature_end = 1; // LOG_DEBUG_F("widow line[tid] = '%s'\n", line[tid]); data_read_len = realsize; break; } strncat(line[tid], data, next_line_len); strncat(output_line[tid], line[tid], strlen(line[tid])); // LOG_DEBUG_F("[%d] copy to buffer (%zu)\n", tid, strlen(line[tid])); int num_substrings; char *copy_buf = strdup(line[tid]); // char *copy_buf = strdup(trim(line[tid])); char **split_result = split(copy_buf, "\t", &num_substrings); free(copy_buf); // Find consequence type name (always after SO field) *SO_found = 0; if (num_substrings == 25) { // LOG_DEBUG_F("gene = %s\tSO = %d\tCT = %s\n", split_result[17], atoi(split_result[18] + 3), split_result[19]); if (!cp_hashtable_contains(gene_list, split_result[17])) { cp_hashtable_put(gene_list, strdup(split_result[17]), NULL); } *SO_found = atoi(split_result[18] + 3); memset(tmp_consequence_type, 0, 128 * sizeof(char)); strncat(tmp_consequence_type, split_result[19], strlen(split_result[19])); } else { LOG_INFO_F("[%d] Non-valid line found (%d fields): '%s'\n", tid, num_substrings, line[tid]); memset(line[tid], 0, strlen(line[tid])); memset(output_line[tid], 0, strlen(output_line[tid])); // #pragma omp critical // { // printf("********\n"); // LOG_INFO_F("[%d] Non-valid line found (%d fields): '%s'\n", tid, num_substrings, line[tid]); for (int s = 0; s < num_substrings; s++) { // printf("%s^", split_result[s]); free(split_result[s]); } // printf("********\n\n"); free(split_result); // } continue; } for (int s = 0; s < num_substrings; s++) { free(split_result[s]); } free(split_result); if (!*SO_found) { // SO:000000 is not valid LOG_INFO_F("[%d] Non-valid SO found (0)\n", tid); memset(line[tid], 0, strlen(line[tid])); memset(output_line[tid], 0, strlen(output_line[tid])); continue; } // LOG_DEBUG_F("[%d] SO found = %d\n", tid, *SO_found); size_t consequence_type_len = strlen(tmp_consequence_type); // If file does not exist, create its descriptor and summary counter FILE *aux_file = cp_hashtable_get(output_files, SO_found); if (!aux_file) { #pragma omp critical { // This construction avoids 2 threads trying to insert the same CT aux_file = cp_hashtable_get(output_files, SO_found); if (!aux_file) { char filename[output_directory_len + consequence_type_len + 6]; memset(filename, 0, (output_directory_len + consequence_type_len + 6) * sizeof(char)); strncat(filename, output_directory, output_directory_len); strncat(filename, "/", 1); strncat(filename, tmp_consequence_type, consequence_type_len); strncat(filename, ".txt", 4); aux_file = fopen(filename, "a"); // Add to hashtables (file descriptors and summary counters) int *SO_stored = (int*) malloc (sizeof(int)); *SO_stored = *SO_found; cp_hashtable_put(output_files, SO_stored, aux_file); LOG_INFO_F("[%d] new CT = %s\n", tid, tmp_consequence_type); } } } // Write line[tid] to file corresponding to its consequence type if (aux_file) { #pragma omp critical { // TODO move critical one level below? count = (int*) cp_hashtable_get(summary_count, tmp_consequence_type); if (count == NULL) { char *consequence_type = (char*) calloc (consequence_type_len+1, sizeof(char)); strncat(consequence_type, tmp_consequence_type, consequence_type_len); assert(!strcmp(consequence_type, tmp_consequence_type)); count = (int*) malloc (sizeof(int)); *count = 0; cp_hashtable_put(summary_count, consequence_type, count); // LOG_DEBUG_F("[%d] Initialized summary count for %s\n", tmp_consequence_type); } // Increment counter for summary (*count)++; } // LOG_DEBUG_F("[%d] before writing %s\n", tid, tmp_consequence_type); output_text = strdup(output_line[tid]); list_item_t *output_item = list_item_new(tid, *SO_found, output_text); list_insert_item(output_item, output_list); // LOG_DEBUG_F("[%d] after writing %s\n", tid, tmp_consequence_type); } data += next_line_len+1; data_read_len += next_line_len+1; memset(line[tid], 0, strlen(line[tid])); memset(output_line[tid], 0, strlen(output_line[tid])); i++; } // Empty buffer for next callback invocation if (!premature_end) { memset(line[tid], 0, strlen(line[tid])); memset(output_line[tid], 0, strlen(line[tid])); } free(SO_found); return data_read_len; }