Example #1
0
static void parse_effect_response(int tid, char *output_directory, size_t output_directory_len, cp_hashtable *output_files, 
                                  list_t *output_list, cp_hashtable *summary_count, cp_hashtable *gene_list) {
    int *SO_found = (int*) malloc (sizeof(int)); // Whether the SO code field has been found
    int *count;
    char tmp_consequence_type[128];
    
    int num_lines;
    char **split_batch = split(effect_line[tid], "\n", &num_lines);
    
    for (int i = 0; i < num_lines; i++) {
        int num_columns;
        char *copy_buf = strdup(split_batch[i]);
        char **split_result = split(copy_buf, "\t", &num_columns);
        free(copy_buf);
        
        // Find consequence type name (always after SO field)
        *SO_found = 0;
        if (num_columns == 25) {
//             LOG_DEBUG_F("gene = %s\tSO = %d\tCT = %s\n", split_result[17], atoi(split_result[18] + 3), split_result[19]);
            if (!cp_hashtable_contains(gene_list, split_result[17])) {
                cp_hashtable_put(gene_list, strdup(split_result[17]), NULL);
            }
            *SO_found = atoi(split_result[18] + 3);
           memset(tmp_consequence_type, 0, 128 * sizeof(char));
           strncat(tmp_consequence_type, split_result[19], strlen(split_result[19]));
        } else {
            if (strlen(split_batch[i]) == 0) { // Last line in batch could be only a newline
                for (int s = 0; s < num_columns; s++) {
                    free(split_result[s]);
                }
                free(split_result);
                continue;
            }
            
            LOG_INFO_F("[%d] Non-valid line found (%d fields): '%s'\n", tid, num_columns, split_batch[i]);
            
            for (int s = 0; s < num_columns; s++) {
                free(split_result[s]);
            }
            free(split_result);
            continue;
        }
        
        for (int s = 0; s < num_columns; s++) {
            free(split_result[s]);
        }
        free(split_result);
        
        if (!*SO_found) { // SO:000000 is not valid
            LOG_INFO_F("[%d] Non-valid SO found (0)\n", tid);
            continue;
        }

//         LOG_DEBUG_F("[%d] SO found = %d\n", tid, *SO_found);
        size_t consequence_type_len = strlen(tmp_consequence_type);
     
        // If file does not exist, create its descriptor and summary counter
        FILE *aux_file = cp_hashtable_get(output_files, SO_found);
        if (!aux_file) {
#pragma omp critical
            {
                // This construction avoids 2 threads trying to insert the same CT
                aux_file = cp_hashtable_get(output_files, SO_found);
                if (!aux_file) {
                    char filename[output_directory_len + consequence_type_len + 6];
                    memset(filename, 0, (output_directory_len + consequence_type_len + 6) * sizeof(char));
                    strncat(filename, output_directory, output_directory_len);
                    strncat(filename, "/", 1);
                    strncat(filename, tmp_consequence_type, consequence_type_len);
                    strncat(filename, ".txt", 4);
                    aux_file = fopen(filename, "a");
                    
                    // Add to hashtables (file descriptors and summary counters)
                    int *SO_stored = (int*) malloc (sizeof(int));
                    *SO_stored = *SO_found;
                    cp_hashtable_put(output_files, SO_stored, aux_file);

                    LOG_INFO_F("[%d] New consequence type found = %s\n", tid, tmp_consequence_type);
                }
            }
        }
        
        // Write line[tid] to file corresponding to its consequence type
        if (aux_file) { 
#pragma omp critical
            {
                // TODO move critical one level below?
                count = (int*) cp_hashtable_get(summary_count, tmp_consequence_type);
                if (count == NULL) {
                    char *consequence_type = (char*) calloc (consequence_type_len+1, sizeof(char));
                    strncat(consequence_type, tmp_consequence_type, consequence_type_len);
                    assert(!strcmp(consequence_type, tmp_consequence_type));
                    count = (int*) malloc (sizeof(int));
                    *count = 0;
                    cp_hashtable_put(summary_count, consequence_type, count);
//                     LOG_DEBUG_F("[%d] Initialized summary count for %s\n", tmp_consequence_type);
                }
                // Increment counter for summary
                (*count)++;
            }
            
//             LOG_DEBUG_F("[%d] before writing %s\n", tid, tmp_consequence_type);
            list_item_t *output_item = list_item_new(tid, *SO_found, strdup(split_batch[i]));
            list_insert_item(output_item, output_list);
//             LOG_DEBUG_F("[%d] after writing %s\n", tid, tmp_consequence_type);
        }
    }
    
    for (int i = 0; i < num_lines; i++) {
        free(split_batch[i]);
    }
    free(split_batch);
}
Example #2
0
static size_t write_effect_ws_results(char *contents, size_t size, size_t nmemb, void *userdata) {
    int tid = omp_get_thread_num();
    
    int i = 0;
    int data_read_len = 0, next_line_len = 0;
    // Whether the SO code field (previous to the consequence type name) has been found
    int *SO_found = (int*) malloc (sizeof(int));
    // Whether the buffer was consumed with a line read just partially
    int premature_end = 0;
    
    size_t realsize = size * nmemb;
    
    int *count;
    
    char *data = contents;
    char tmp_consequence_type[128];
    char *aux_buffer;
    char *output_text;
    
    
    LOG_DEBUG_F("Effect WS invoked, response size = %zu bytes\n", realsize);
    
    while (data_read_len < realsize) {
        assert((line + tid) != NULL);
        assert((max_line_size + tid) != NULL);
        
        LOG_DEBUG_F("[%d] loop iteration #%d\n", tid, i);
        // Get length of data to copy
        next_line_len = strcspn(data, "\n");
        
        // If the line[tid] is too long for the current buffers, reallocate a little more than the needed memory
        if (strlen(line[tid]) + next_line_len + 1 > max_line_size[tid]) {
//             LOG_DEBUG_F("Line too long (%d elements, but %zu needed) in batch #%d\n", 
//                         max_line_size[tid], strlen(line[tid]) + next_line_len, batch_num);
//             char *out_buf = (char*) calloc (next_line_len+1, sizeof(char));
//             snprintf(out_buf, next_line_len, "%s", data);
//             LOG_INFO_F("[%d] too big data is: '%s'\n", tid, out_buf);
            char *aux_1 = (char*) realloc (line[tid], (max_line_size[tid] + next_line_len + 1) * sizeof(char));
            char *aux_2 = (char*) realloc (output_line[tid], (max_line_size[tid] + next_line_len + 1) * sizeof(char));
            
            if (!aux_1 || !aux_2) {
                LOG_ERROR("Can't resize buffers\n");
                // Can't resize buffers -> can't keep reading the file
                if (!aux_1) { free(line[tid]); }
                if (!aux_2) { free(output_line[tid]); }
                return data_read_len;
            }
            
            line[tid] = aux_1;
            output_line[tid] = aux_2;
            max_line_size[tid] += next_line_len + 1;
//             LOG_DEBUG_F("[%d] buffers realloc'd (%d)\n", tid, max_line_size[tid]);
        }
        
//         LOG_DEBUG_F("[%d] position = %d, read = %d, max_size = %zu\n", i, next_line_len, data_read_len, realsize);
        
        if (data_read_len + next_line_len >= realsize) {
            // Save current state (line[tid] partially read)
            strncat(line[tid], data, next_line_len);
            chomp(line[tid]);
            line[tid][strlen(line[tid])] = '\0';
            premature_end = 1;
//             LOG_DEBUG_F("widow line[tid] = '%s'\n", line[tid]);
            data_read_len = realsize;
            break;
        }
        
        strncat(line[tid], data, next_line_len);
        strncat(output_line[tid], line[tid], strlen(line[tid]));
     
//         LOG_DEBUG_F("[%d] copy to buffer (%zu)\n", tid, strlen(line[tid]));
    
        int num_substrings;
        char *copy_buf = strdup(line[tid]);
//         char *copy_buf = strdup(trim(line[tid]));
        char **split_result = split(copy_buf, "\t", &num_substrings);
        free(copy_buf);
        
        // Find consequence type name (always after SO field)
        *SO_found = 0;
        if (num_substrings == 25) {
//             LOG_DEBUG_F("gene = %s\tSO = %d\tCT = %s\n", split_result[17], atoi(split_result[18] + 3), split_result[19]);
            if (!cp_hashtable_contains(gene_list, split_result[17])) {
                cp_hashtable_put(gene_list, strdup(split_result[17]), NULL);
            }
            *SO_found = atoi(split_result[18] + 3);
           memset(tmp_consequence_type, 0, 128 * sizeof(char));
           strncat(tmp_consequence_type, split_result[19], strlen(split_result[19]));
        } else {
            LOG_INFO_F("[%d] Non-valid line found (%d fields): '%s'\n", tid, num_substrings, line[tid]);
            memset(line[tid], 0, strlen(line[tid]));
            memset(output_line[tid], 0, strlen(output_line[tid]));
            
// #pragma omp critical
//             {
//             printf("********\n");
//             LOG_INFO_F("[%d] Non-valid line found (%d fields): '%s'\n", tid, num_substrings, line[tid]);
            for (int s = 0; s < num_substrings; s++) {
//                 printf("%s^", split_result[s]);
                free(split_result[s]);
            }
//             printf("********\n\n");
            free(split_result);
//             }
            continue;
        }
        
        for (int s = 0; s < num_substrings; s++) {
            free(split_result[s]);
        }
        free(split_result);
        
        if (!*SO_found) { // SO:000000 is not valid
            LOG_INFO_F("[%d] Non-valid SO found (0)\n", tid);
            memset(line[tid], 0, strlen(line[tid]));
            memset(output_line[tid], 0, strlen(output_line[tid]));
            continue;
        }

//         LOG_DEBUG_F("[%d] SO found = %d\n", tid, *SO_found);
        size_t consequence_type_len = strlen(tmp_consequence_type);
     
        // If file does not exist, create its descriptor and summary counter
        FILE *aux_file = cp_hashtable_get(output_files, SO_found);
        if (!aux_file) {
#pragma omp critical
            {
                // This construction avoids 2 threads trying to insert the same CT
                aux_file = cp_hashtable_get(output_files, SO_found);
                if (!aux_file) {
                    char filename[output_directory_len + consequence_type_len + 6];
                    memset(filename, 0, (output_directory_len + consequence_type_len + 6) * sizeof(char));
                    strncat(filename, output_directory, output_directory_len);
                    strncat(filename, "/", 1);
                    strncat(filename, tmp_consequence_type, consequence_type_len);
                    strncat(filename, ".txt", 4);
                    aux_file = fopen(filename, "a");
                    
                    // Add to hashtables (file descriptors and summary counters)
                    int *SO_stored = (int*) malloc (sizeof(int));
                    *SO_stored = *SO_found;
                    cp_hashtable_put(output_files, SO_stored, aux_file);

                    LOG_INFO_F("[%d] new CT = %s\n", tid, tmp_consequence_type);
                }
            }
        }
        
        // Write line[tid] to file corresponding to its consequence type
        if (aux_file) { 
#pragma omp critical
            {
                // TODO move critical one level below?
                count = (int*) cp_hashtable_get(summary_count, tmp_consequence_type);
                if (count == NULL) {
                    char *consequence_type = (char*) calloc (consequence_type_len+1, sizeof(char));
                    strncat(consequence_type, tmp_consequence_type, consequence_type_len);
                    assert(!strcmp(consequence_type, tmp_consequence_type));
                    count = (int*) malloc (sizeof(int));
                    *count = 0;
                    cp_hashtable_put(summary_count, consequence_type, count);
//                     LOG_DEBUG_F("[%d] Initialized summary count for %s\n", tmp_consequence_type);
                }
                // Increment counter for summary
                (*count)++;
            }
            
//             LOG_DEBUG_F("[%d] before writing %s\n", tid, tmp_consequence_type);
            output_text = strdup(output_line[tid]);
            list_item_t *output_item = list_item_new(tid, *SO_found, output_text);
            list_insert_item(output_item, output_list);
//             LOG_DEBUG_F("[%d] after writing %s\n", tid, tmp_consequence_type);
        }
        
        data += next_line_len+1;
        data_read_len += next_line_len+1;
        
        memset(line[tid], 0, strlen(line[tid]));
        memset(output_line[tid], 0, strlen(output_line[tid]));
        
        i++;
    }
 
    // Empty buffer for next callback invocation
    if (!premature_end) {
        memset(line[tid], 0, strlen(line[tid]));
        memset(output_line[tid], 0, strlen(line[tid]));
    }
    free(SO_found);

    return data_read_len;
}