static void parse_effect_response(int tid, char *output_directory, size_t output_directory_len, cp_hashtable *output_files, list_t *output_list, cp_hashtable *summary_count, cp_hashtable *gene_list) { int *SO_found = (int*) malloc (sizeof(int)); // Whether the SO code field has been found int *count; char tmp_consequence_type[128]; int num_lines; char **split_batch = split(effect_line[tid], "\n", &num_lines); for (int i = 0; i < num_lines; i++) { int num_columns; char *copy_buf = strdup(split_batch[i]); char **split_result = split(copy_buf, "\t", &num_columns); free(copy_buf); // Find consequence type name (always after SO field) *SO_found = 0; if (num_columns == 25) { // LOG_DEBUG_F("gene = %s\tSO = %d\tCT = %s\n", split_result[17], atoi(split_result[18] + 3), split_result[19]); if (!cp_hashtable_contains(gene_list, split_result[17])) { cp_hashtable_put(gene_list, strdup(split_result[17]), NULL); } *SO_found = atoi(split_result[18] + 3); memset(tmp_consequence_type, 0, 128 * sizeof(char)); strncat(tmp_consequence_type, split_result[19], strlen(split_result[19])); } else { if (strlen(split_batch[i]) == 0) { // Last line in batch could be only a newline for (int s = 0; s < num_columns; s++) { free(split_result[s]); } free(split_result); continue; } LOG_INFO_F("[%d] Non-valid line found (%d fields): '%s'\n", tid, num_columns, split_batch[i]); for (int s = 0; s < num_columns; s++) { free(split_result[s]); } free(split_result); continue; } for (int s = 0; s < num_columns; s++) { free(split_result[s]); } free(split_result); if (!*SO_found) { // SO:000000 is not valid LOG_INFO_F("[%d] Non-valid SO found (0)\n", tid); continue; } // LOG_DEBUG_F("[%d] SO found = %d\n", tid, *SO_found); size_t consequence_type_len = strlen(tmp_consequence_type); // If file does not exist, create its descriptor and summary counter FILE *aux_file = cp_hashtable_get(output_files, SO_found); if (!aux_file) { #pragma omp critical { // This construction avoids 2 threads trying to insert the same CT aux_file = cp_hashtable_get(output_files, SO_found); if (!aux_file) { char filename[output_directory_len + consequence_type_len + 6]; memset(filename, 0, (output_directory_len + consequence_type_len + 6) * sizeof(char)); strncat(filename, output_directory, output_directory_len); strncat(filename, "/", 1); strncat(filename, tmp_consequence_type, consequence_type_len); strncat(filename, ".txt", 4); aux_file = fopen(filename, "a"); // Add to hashtables (file descriptors and summary counters) int *SO_stored = (int*) malloc (sizeof(int)); *SO_stored = *SO_found; cp_hashtable_put(output_files, SO_stored, aux_file); LOG_INFO_F("[%d] New consequence type found = %s\n", tid, tmp_consequence_type); } } } // Write line[tid] to file corresponding to its consequence type if (aux_file) { #pragma omp critical { // TODO move critical one level below? count = (int*) cp_hashtable_get(summary_count, tmp_consequence_type); if (count == NULL) { char *consequence_type = (char*) calloc (consequence_type_len+1, sizeof(char)); strncat(consequence_type, tmp_consequence_type, consequence_type_len); assert(!strcmp(consequence_type, tmp_consequence_type)); count = (int*) malloc (sizeof(int)); *count = 0; cp_hashtable_put(summary_count, consequence_type, count); // LOG_DEBUG_F("[%d] Initialized summary count for %s\n", tmp_consequence_type); } // Increment counter for summary (*count)++; } // LOG_DEBUG_F("[%d] before writing %s\n", tid, tmp_consequence_type); list_item_t *output_item = list_item_new(tid, *SO_found, strdup(split_batch[i])); list_insert_item(output_item, output_list); // LOG_DEBUG_F("[%d] after writing %s\n", tid, tmp_consequence_type); } } for (int i = 0; i < num_lines; i++) { free(split_batch[i]); } free(split_batch); }
static size_t write_effect_ws_results(char *contents, size_t size, size_t nmemb, void *userdata) { int tid = omp_get_thread_num(); int i = 0; int data_read_len = 0, next_line_len = 0; // Whether the SO code field (previous to the consequence type name) has been found int *SO_found = (int*) malloc (sizeof(int)); // Whether the buffer was consumed with a line read just partially int premature_end = 0; size_t realsize = size * nmemb; int *count; char *data = contents; char tmp_consequence_type[128]; char *aux_buffer; char *output_text; LOG_DEBUG_F("Effect WS invoked, response size = %zu bytes\n", realsize); while (data_read_len < realsize) { assert((line + tid) != NULL); assert((max_line_size + tid) != NULL); LOG_DEBUG_F("[%d] loop iteration #%d\n", tid, i); // Get length of data to copy next_line_len = strcspn(data, "\n"); // If the line[tid] is too long for the current buffers, reallocate a little more than the needed memory if (strlen(line[tid]) + next_line_len + 1 > max_line_size[tid]) { // LOG_DEBUG_F("Line too long (%d elements, but %zu needed) in batch #%d\n", // max_line_size[tid], strlen(line[tid]) + next_line_len, batch_num); // char *out_buf = (char*) calloc (next_line_len+1, sizeof(char)); // snprintf(out_buf, next_line_len, "%s", data); // LOG_INFO_F("[%d] too big data is: '%s'\n", tid, out_buf); char *aux_1 = (char*) realloc (line[tid], (max_line_size[tid] + next_line_len + 1) * sizeof(char)); char *aux_2 = (char*) realloc (output_line[tid], (max_line_size[tid] + next_line_len + 1) * sizeof(char)); if (!aux_1 || !aux_2) { LOG_ERROR("Can't resize buffers\n"); // Can't resize buffers -> can't keep reading the file if (!aux_1) { free(line[tid]); } if (!aux_2) { free(output_line[tid]); } return data_read_len; } line[tid] = aux_1; output_line[tid] = aux_2; max_line_size[tid] += next_line_len + 1; // LOG_DEBUG_F("[%d] buffers realloc'd (%d)\n", tid, max_line_size[tid]); } // LOG_DEBUG_F("[%d] position = %d, read = %d, max_size = %zu\n", i, next_line_len, data_read_len, realsize); if (data_read_len + next_line_len >= realsize) { // Save current state (line[tid] partially read) strncat(line[tid], data, next_line_len); chomp(line[tid]); line[tid][strlen(line[tid])] = '\0'; premature_end = 1; // LOG_DEBUG_F("widow line[tid] = '%s'\n", line[tid]); data_read_len = realsize; break; } strncat(line[tid], data, next_line_len); strncat(output_line[tid], line[tid], strlen(line[tid])); // LOG_DEBUG_F("[%d] copy to buffer (%zu)\n", tid, strlen(line[tid])); int num_substrings; char *copy_buf = strdup(line[tid]); // char *copy_buf = strdup(trim(line[tid])); char **split_result = split(copy_buf, "\t", &num_substrings); free(copy_buf); // Find consequence type name (always after SO field) *SO_found = 0; if (num_substrings == 25) { // LOG_DEBUG_F("gene = %s\tSO = %d\tCT = %s\n", split_result[17], atoi(split_result[18] + 3), split_result[19]); if (!cp_hashtable_contains(gene_list, split_result[17])) { cp_hashtable_put(gene_list, strdup(split_result[17]), NULL); } *SO_found = atoi(split_result[18] + 3); memset(tmp_consequence_type, 0, 128 * sizeof(char)); strncat(tmp_consequence_type, split_result[19], strlen(split_result[19])); } else { LOG_INFO_F("[%d] Non-valid line found (%d fields): '%s'\n", tid, num_substrings, line[tid]); memset(line[tid], 0, strlen(line[tid])); memset(output_line[tid], 0, strlen(output_line[tid])); // #pragma omp critical // { // printf("********\n"); // LOG_INFO_F("[%d] Non-valid line found (%d fields): '%s'\n", tid, num_substrings, line[tid]); for (int s = 0; s < num_substrings; s++) { // printf("%s^", split_result[s]); free(split_result[s]); } // printf("********\n\n"); free(split_result); // } continue; } for (int s = 0; s < num_substrings; s++) { free(split_result[s]); } free(split_result); if (!*SO_found) { // SO:000000 is not valid LOG_INFO_F("[%d] Non-valid SO found (0)\n", tid); memset(line[tid], 0, strlen(line[tid])); memset(output_line[tid], 0, strlen(output_line[tid])); continue; } // LOG_DEBUG_F("[%d] SO found = %d\n", tid, *SO_found); size_t consequence_type_len = strlen(tmp_consequence_type); // If file does not exist, create its descriptor and summary counter FILE *aux_file = cp_hashtable_get(output_files, SO_found); if (!aux_file) { #pragma omp critical { // This construction avoids 2 threads trying to insert the same CT aux_file = cp_hashtable_get(output_files, SO_found); if (!aux_file) { char filename[output_directory_len + consequence_type_len + 6]; memset(filename, 0, (output_directory_len + consequence_type_len + 6) * sizeof(char)); strncat(filename, output_directory, output_directory_len); strncat(filename, "/", 1); strncat(filename, tmp_consequence_type, consequence_type_len); strncat(filename, ".txt", 4); aux_file = fopen(filename, "a"); // Add to hashtables (file descriptors and summary counters) int *SO_stored = (int*) malloc (sizeof(int)); *SO_stored = *SO_found; cp_hashtable_put(output_files, SO_stored, aux_file); LOG_INFO_F("[%d] new CT = %s\n", tid, tmp_consequence_type); } } } // Write line[tid] to file corresponding to its consequence type if (aux_file) { #pragma omp critical { // TODO move critical one level below? count = (int*) cp_hashtable_get(summary_count, tmp_consequence_type); if (count == NULL) { char *consequence_type = (char*) calloc (consequence_type_len+1, sizeof(char)); strncat(consequence_type, tmp_consequence_type, consequence_type_len); assert(!strcmp(consequence_type, tmp_consequence_type)); count = (int*) malloc (sizeof(int)); *count = 0; cp_hashtable_put(summary_count, consequence_type, count); // LOG_DEBUG_F("[%d] Initialized summary count for %s\n", tmp_consequence_type); } // Increment counter for summary (*count)++; } // LOG_DEBUG_F("[%d] before writing %s\n", tid, tmp_consequence_type); output_text = strdup(output_line[tid]); list_item_t *output_item = list_item_new(tid, *SO_found, output_text); list_insert_item(output_item, output_list); // LOG_DEBUG_F("[%d] after writing %s\n", tid, tmp_consequence_type); } data += next_line_len+1; data_read_len += next_line_len+1; memset(line[tid], 0, strlen(line[tid])); memset(output_line[tid], 0, strlen(output_line[tid])); i++; } // Empty buffer for next callback invocation if (!premature_end) { memset(line[tid], 0, strlen(line[tid])); memset(output_line[tid], 0, strlen(line[tid])); } free(SO_found); return data_read_len; }