예제 #1
0
파일: bmemory.cpp 프로젝트: ncbi/dcode-cape
/**
 * The function reallocate memory for a 
 * @param self the pointer to be reallocated
 * @param size size in bytes
 * @param file the source code file (__FILE__) or NULL to not print this info
 * @param line the source code line (__LINE__) or 0
 * @return return a pointer to the reallocated memory
 */
void *reallocate(void *self, size_t size, const char *file, int line){
    return checkPointerError(realloc(self,size), "Can't reallocate memory", file, line, -1);
}
예제 #2
0
파일: main.c 프로젝트: r78v10a07/gTools
int main(int argc, char** argv) {
    int next_option, verbose;
    const char* const short_options = "vhd:o:s:g:r:";
    FILE *s;
    char *line = NULL;
    size_t len = 0;
    ssize_t read = 0;
    char **fields = NULL;
    size_t fieldsSize = 0;
    char **samples = NULL;
    int samplesLen = 0;
    BtreeNode_t *genes = NULL;
    void **genesArray = NULL;
    int genesLen = 0;
    struct dirent **namelist;
    int i, n, l, k;
    BtreeRecord_t *rec = NULL;
    MGene_l gene;
    double cllSum, nbcSum, tmpValue, tmp1Value;
    int cllLen, nbcLen;
    int cllLenTMP, nbcLenTMP;
    FILE *outFile = NULL;
    FILE *geneFile = NULL;
    FILE *sumFile = NULL;
    FILE *repFile = NULL;
    char *dir = NULL;
    int s1Len;
    int s2Len;

    program_name = argv[0];

    const struct option long_options[] = {
        { "help", 0, NULL, 'h'},
        { "verbose", 0, NULL, 'v'},
        { "output", 1, NULL, 'o'},
        { "dir", 1, NULL, 'd'},
        { "gene", 1, NULL, 'g'},
        { "sum", 1, NULL, 's'},
        { "rep", 1, NULL, 'r'},
        { NULL, 0, NULL, 0} /* Required at end of array.  */
    };

    verbose = 0;
    do {
        next_option = getopt_long(argc, argv, short_options, long_options, NULL);

        switch (next_option) {
            case 'h':
                print_usage(stdout, 0);

            case 'v':
                verbose = 1;
                break;

            case 'd':
                dir = strdup(optarg);
                break;

            case 'o':
                outFile = checkPointerError(fopen(optarg, "w"), "Can't open OUTPUT file", __FILE__, __LINE__, -1);
                break;

            case 'g':
                geneFile = checkPointerError(fopen(optarg, "w"), "Can't open GENE file", __FILE__, __LINE__, -1);
                break;

            case 's':
                sumFile = checkPointerError(fopen(optarg, "w"), "Can't open SUM file", __FILE__, __LINE__, -1);
                break;

            case 'r':
                repFile = checkPointerError(fopen(optarg, "w"), "Can't open replicate file", __FILE__, __LINE__, -1);
                break;
        }
    } while (next_option != -1);
    if (!dir || !outFile || !sumFile) {
        print_usage(stderr, -1);
    }

    fprintf(geneFile, "GeneId\tTranscriptId\tNBC_Gene_TPM\tNBC_Exon_TPM\tNBC_Intron_TPM\tNBC_Intron_Exon_Ratio\tNBC_LOG2_Intron_Exon_Ratio\tCLL_Gene_TPM\tCLL_Exon_TPM\tCLL_Intron_TPM\tCLL_Intron_Exon_Ratio\tCLL_LOG2_Intron_Exon_Ratio\tHausdorff_distance\tCLL_NBC_Intron_Ratio\tLOG2_CLL_NBC_Intron_Ratio\tCLL_NBC_Exon_Ratio\tLOG2_CLL_NBC_Exon_Ratio\n");
    fprintf(outFile, "GeneId\tTranscriptId\tIntron_Exon_Number\tType");
    fprintf(repFile, "GeneId\tTranscriptId");

    cllLen = nbcLen = samplesLen = 0;
    n = scandir(dir, &namelist, 0, alphasort);
    if (n < 0)
        perror("scandir");
    else {
        for (i = 0; i < n; i++) {
            if (strbcmp(namelist[i]->d_name, ".ent") == 0) {
                s = fopen(namelist[i]->d_name, "r");
                fseeko(s, 0, SEEK_END);
                if (ftello(s) != 0) {
                    samples = reallocate(samples, sizeof (samples) * (samplesLen + 1), __FILE__, __LINE__);
                    samples[samplesLen] = strdup(namelist[i]->d_name);
                    if (strncmp(samples[samplesLen], "CLL", 3) == 0) cllLen++;
                    if (strncmp(samples[samplesLen], "NBC", 3) == 0) nbcLen++;
                    samplesLen++;
                }
                fclose(s);
            }
            free(namelist[i]);
        }
    }
    free(namelist);

    for (i = 0; i < samplesLen; i++) {
        s = checkPointerError(fopen(samples[i], "r"), "Can't open sample ENT file", __FILE__, __LINE__, -1);
        *(strstr(samples[i], ".ent")) = '\0';
        fprintf(outFile, "\t%s\t%s_Reads", samples[i], samples[i]);
        fprintf(repFile, "\t%s_Exon\t%s_Exon_Reads\t%s_Intron\t%s_Intron_Reads", samples[i], samples[i], samples[i], samples[i]);
        if (verbose) printf("Parsing ENT file: %s\n", samples[i]);

        while ((read = getline(&line, &len, s)) != -1) {
            if (strncmp(line, "Gene_Id", 7) != 0) {
                fieldsSize = splitString(&fields, line, "\t");
                if (fieldsSize == 8) {
                    gene = NewMGene(fields);
                    rec = BTreeFind(genes, gene, GeneKeyCMP);
                    if (rec == NULL) {
                        gene->entities = reallocate(gene->entities, sizeof (MEntity_t) * (gene->entitiesLen + 1), __FILE__, __LINE__);
                        gene->entities[gene->entitiesLen].length = atoi(fields[5]);
                        gene->entities[gene->entitiesLen].type = strdup(fields[3]);
                        gene->NBCIntronExonRatio = allocate(sizeof (double) * nbcLen, __FILE__, __LINE__);
                        for (k = 0; k < nbcLen; k++) {
                            gene->NBCIntronExonRatio[k] = NAN;
                        }
                        gene->CLLIntronExonRatio = allocate(sizeof (double) * cllLen, __FILE__, __LINE__);
                        for (k = 0; k < cllLen; k++) {
                            gene->CLLIntronExonRatio[k] = NAN;
                        }

                        gene->repExonTPM = allocate(sizeof (double) * samplesLen, __FILE__, __LINE__);
                        for (k = 0; k < samplesLen; k++) {
                            gene->repExonTPM[k] = NAN;
                        }
                        gene->repExonCount = allocate(sizeof (int) * samplesLen, __FILE__, __LINE__);
                        for (k = 0; k < samplesLen; k++) {
                            gene->repExonCount[k] = NAN;
                        }

                        gene->repIntronTPM = allocate(sizeof (double) * samplesLen, __FILE__, __LINE__);
                        for (k = 0; k < samplesLen; k++) {
                            gene->repIntronTPM[k] = NAN;
                        }

                        gene->repIntronCount = allocate(sizeof (int) * samplesLen, __FILE__, __LINE__);
                        for (k = 0; k < samplesLen; k++) {
                            gene->repIntronCount[k] = NAN;
                        }

                        gene->entities[gene->entitiesLen].samples = allocate(sizeof (MEntitySample_t) * (samplesLen), __FILE__, __LINE__);
                        for (k = 0; k < samplesLen; k++) {
                            gene->entities[gene->entitiesLen].samples[k].sample = samples[k];
                            gene->entities[gene->entitiesLen].samples[k].count = -1;
                            gene->entities[gene->entitiesLen].samples[k].TPM = INFINITY;
                        }
                        gene->entities[gene->entitiesLen].samples[i].count = atoi(fields[6]);
                        gene->entities[gene->entitiesLen].samples[i].TPM = strtod(fields[7], NULL);
                        gene->entitiesLen++;

                        genes = BtreeInsert(genes, gene, gene, GeneKeyCMP);
                    } else {
                        FreeMGene(gene);
                        gene = rec->value;
                        l = atoi(fields[4]) - 1;
                        if (l >= gene->entitiesLen) {
                            gene->entities = reallocate(gene->entities, sizeof (MEntity_t) * (gene->entitiesLen + 1), __FILE__, __LINE__);
                            gene->entities[l].length = atoi(fields[5]);
                            gene->entities[l].type = strdup(fields[3]);
                            gene->entities[l].samples = allocate(sizeof (MEntitySample_t) * (samplesLen), __FILE__, __LINE__);
                            for (k = 0; k < samplesLen; k++) {
                                gene->entities[gene->entitiesLen].samples[k].sample = samples[k];
                                gene->entities[gene->entitiesLen].samples[k].count = -1;
                                gene->entities[gene->entitiesLen].samples[k].TPM = INFINITY;
                            }
                            gene->entities[l].samples[i].count = atoi(fields[6]);
                            gene->entities[l].samples[i].TPM = strtod(fields[7], NULL);
                            gene->entitiesLen++;
                        } else {
                            gene->entities[l].samples[i].sample = samples[i];
                            gene->entities[l].samples[i].count = atoi(fields[6]);
                            gene->entities[l].samples[i].TPM = strtod(fields[7], NULL);
                        }
                    }
                } else {
                    fprintf(stderr, "\n\n%s\n\n", line);
                    printLog(stderr, "Bad ENT format", __FILE__, __LINE__, -1);
                }
                freeArrayofPointers((void **) fields, fieldsSize);
            }
        }
        fclose(s);
    }

    fprintf(outFile, "\tCLL_Mean\tNBC_Mean\tCLL_NBC_Mean_Diff\n");
    fprintf(repFile, "\n");

    s1Len = s2Len = 0;
    for (i = 0; i < samplesLen; i++) {
        strcat(samples[i], ".out");
        s = checkPointerError(fopen(samples[i], "r"), "Can't open sample OUT file", __FILE__, __LINE__, -1);
        *(strstr(samples[i], ".out")) = '\0';
        if (verbose) printf("Parsing OUT file: %s\n", samples[i]);

        while ((read = getline(&line, &len, s)) != -1) {
            if (strncmp(line, "Gene_Id", 7) != 0) {
                fieldsSize = splitString(&fields, line, "\t");
                if (fieldsSize == 12) {
                    gene = NewMGene(fields);
                    rec = BTreeFind(genes, gene, GeneKeyCMP);
                    if (rec == NULL) {
                        fprintf(stderr, "\n\n%s\n\n", line);
                        printLog(stderr, "Wrong GENE name", __FILE__, __LINE__, -1);
                    }
                    FreeMGene(gene);
                    gene = rec->value;
                    if (gene->length == -1) {
                        gene->length = atoi(fields[3]);
                    } else if (gene->length != atoi(fields[3])) {
                        fprintf(stderr, "\n\n%s\n\n", line);
                        printLog(stderr, "Wrong GENE length", __FILE__, __LINE__, -1);
                    }
                    tmpValue = strtod(fields[5], NULL);
                    if (!isnan(tmpValue) && !isinf(tmpValue)) {
                        if (strncmp(samples[i], "CLL", 3) == 0) {
                            gene->CLLTPM += tmpValue;
                            gene->CLLTMPCount++;
                        } else {
                            gene->NBCTPM += tmpValue;
                            gene->NBCTMPCount++;
                        }
                    }

                    tmpValue = strtod(fields[7], NULL);
                    if (!isnan(tmpValue) && !isinf(tmpValue)) {
                        gene->repExonCount[i] = tmpValue;
                    }

                    tmpValue = strtod(fields[8], NULL);
                    if (!isnan(tmpValue) && !isinf(tmpValue)) {
                        gene->repExonTPM[i] = tmpValue;
                        if (strncmp(samples[i], "CLL", 3) == 0) {
                            gene->CLLTPMExon += tmpValue;
                            gene->CLLexonCount++;
                        } else {
                            gene->NBCTPMExon += tmpValue;
                            gene->NBCexonCount++;
                        }
                    }

                    tmpValue = strtod(fields[10], NULL);
                    if (!isnan(tmpValue) && !isinf(tmpValue)) {
                        gene->repIntronCount[i] = tmpValue;
                    }

                    tmp1Value = strtod(fields[11], NULL);
                    if (!isnan(tmp1Value) && !isinf(tmp1Value)) {
                        gene->repIntronTPM[i] = tmp1Value;
                        if (strncmp(samples[i], "CLL", 3) == 0) {
                            gene->CLLTPMIntron += tmp1Value;
                            gene->CLLintronCount++;

                            gene->CLLIntronExonRatio[s1Len] = tmp1Value;
                        } else {
                            gene->NBCTPMIntron += tmp1Value;
                            gene->NBCintronCount++;

                            gene->NBCIntronExonRatio[s2Len] = tmp1Value;
                        }
                    }
                } else {
                    fprintf(stderr, "\n\n%s\n\n", line);
                    printLog(stderr, "Bad OUT format", __FILE__, __LINE__, -1);
                }
                freeArrayofPointers((void **) fields, fieldsSize);
            }
        }
        fclose(s);
        if (strncmp(samples[i], "CLL", 3) == 0) s1Len++;
        else s2Len++;
    }

    BtreeRecordsToArray(&genesArray, &genesLen, genes);
    for (i = 0; i < genesLen; i++) {
        nbcSum = ((MGene_l) genesArray[i])->NBCTPMExon / ((MGene_l) genesArray[i])->NBCexonCount;
        tmpValue = ((MGene_l) genesArray[i])->NBCTPMIntron / ((MGene_l) genesArray[i])->NBCintronCount;
        fprintf(geneFile, "%s\t%s\t%.4f\t%.4f\t%.4f\t%.4f\t%.4f",
                ((MGene_l) genesArray[i])->geneId,
                ((MGene_l) genesArray[i])->transcriptId,
                ((MGene_l) genesArray[i])->NBCTPM / ((MGene_l) genesArray[i])->NBCTMPCount,
                nbcSum,
                tmpValue,
                tmpValue / nbcSum,
                log2(tmpValue / nbcSum));
        cllSum = ((MGene_l) genesArray[i])->CLLTPMExon / ((MGene_l) genesArray[i])->CLLexonCount;
        tmp1Value = ((MGene_l) genesArray[i])->CLLTPMIntron / ((MGene_l) genesArray[i])->CLLintronCount;
        fprintf(geneFile, "\t%.4f\t%.4f\t%.4f\t%.4f\t%.4f\t%.4f\t%.4f\t%.4f\t%.4f\t%.4f\n",
                ((MGene_l) genesArray[i])->CLLTPM / ((MGene_l) genesArray[i])->CLLTMPCount,
                cllSum,
                tmpValue,
                tmpValue / cllSum,
                log2(tmpValue / cllSum),
                HausdorffDistance(((MGene_l) genesArray[i])->CLLIntronExonRatio, cllLen, ((MGene_l) genesArray[i])->NBCIntronExonRatio, nbcLen),
                tmp1Value / tmpValue,
                log2(tmp1Value / tmpValue),
                cllSum / nbcSum,
                log2(cllSum / nbcSum));

        for (l = 0; l < ((MGene_l) genesArray[i])->entitiesLen; l++) {
            fprintf(outFile, "%s\t%s\t%d", ((MGene_l) genesArray[i])->geneId, ((MGene_l) genesArray[i])->transcriptId, l + 1);
            cllSum = nbcSum = 0.0;
            cllLenTMP = nbcLenTMP = 0;
            for (k = 0; k < samplesLen; k++) {
                if (strncmp(samples[k], "CLL", 3) == 0 && ((MGene_l) genesArray[i])->entities[l].samples[k].TPM != INFINITY) {
                    cllLenTMP++;
                    cllSum += ((MGene_l) genesArray[i])->entities[l].samples[k].TPM;
                }
                if (strncmp(samples[k], "NBC", 3) == 0 && ((MGene_l) genesArray[i])->entities[l].samples[k].TPM != INFINITY) {
                    nbcLenTMP++;
                    nbcSum += ((MGene_l) genesArray[i])->entities[l].samples[k].TPM;
                }
                if (k == 0) {
                    fprintf(outFile, "\t%s", ((MGene_l) genesArray[i])->entities[l].type);
                }
                fprintf(outFile, "\t%.4f\t%d", ((MGene_l) genesArray[i])->entities[l].samples[k].TPM, ((MGene_l) genesArray[i])->entities[l].samples[k].count);

            }
            cllSum = cllSum / cllLenTMP;
            nbcSum = nbcSum / nbcLenTMP;
            fprintf(outFile, "\t%.4f\t%.4f\t", cllSum, nbcSum);
            if (fabs(cllSum) <= 0.000001 && fabs(nbcSum) <= 0.000001) {
                fprintf(outFile, "nan\n");
            } else {
                fprintf(outFile, "%.4f\n", cllSum - nbcSum);
            }
        }

        fprintf(repFile, "%s\t%s", ((MGene_l) genesArray[i])->geneId, ((MGene_l) genesArray[i])->transcriptId);
        for (k = 0; k < samplesLen; k++) {
            fprintf(repFile, "\t%.4f\t%d\t%.4f\t%d", ((MGene_l) genesArray[i])->repExonTPM[k], ((MGene_l) genesArray[i])->repExonCount[k],
                    ((MGene_l) genesArray[i])->repIntronTPM[k], ((MGene_l) genesArray[i])->repIntronCount[k]);
        }
        fprintf(repFile, "\n");
    }

    for (k = 0; k < samplesLen; k++) {
        cllSum = 0.0;
        for (i = 0; i < genesLen; i++) {
            for (l = 0; l < ((MGene_l) genesArray[i])->entitiesLen; l++) {
                if (((MGene_l) genesArray[i])->entities[l].samples[k].TPM != INFINITY) {
                    cllSum += ((MGene_l) genesArray[i])->entities[l].samples[k].TPM;
                }
            }
        }
        fprintf(sumFile, "%s\t%.4f\n", samples[k], cllSum);
    }

    if (dir) free(dir);
    if (genesArray) free(genesArray);
    BTreeFree(genes, FreeMGene, NULL);
    freeArrayofPointers((void **) samples, samplesLen);
    if (line) free(line);
    fclose(outFile);
    fclose(geneFile);
    fclose(sumFile);
    fclose(repFile);
    return (EXIT_SUCCESS);
}
예제 #3
0
파일: bmemory.cpp 프로젝트: ncbi/dcode-cape
/**
 * The function allocates memory of size bytes
 * 
 * @param size size in bytes
 * @param file the source code file (__FILE__) or NULL to not print this info
 * @param line the source code line (__LINE__) or 0
 * @return return a pointer to the allocated memory
 */
void *allocate(size_t size, const char *file, int line) {
    return checkPointerError(malloc(size), "Can't allocate memory", file, line, -1);
}
예제 #4
0
파일: main.c 프로젝트: r78v10a07/gTools
int main(int argc, char** argv) {
    int i, p;    
    int next_option, verbose;
    const char* const short_options = "vhg:i:o:";
    char *outputName, *output = NULL, *input = NULL;
    FILE *gtfFile = NULL;
    FILE *inFile = NULL;
    FILE *outFile = NULL;
    FILE *errFile = NULL;
    FILE *entFile = NULL;
    int max = 3000;
    Chromosome_l chr = NULL;
    int rFrom, rTo, rLen, total = 0, errors = 0;
    time_t now, later;
    double seconds;
    int extragenic = 0;
    bool flag = false;
    
    program_name = argv[0];
    
    const struct option long_options[] = {
        { "help", 0, NULL, 'h'},
        { "verbose", 0, NULL, 'v'},
        { "gtf", 1, NULL, 'g'},
        { "output", 1, NULL, 'o'},
        { "input", 1, NULL, 'i'},
        { NULL, 0, NULL, 0} /* Required at end of array.  */
    };
    
    verbose = 0;
    do {
        next_option = getopt_long(argc, argv, short_options, long_options, NULL);

        switch (next_option) {
            case 'h':
                print_usage(stdout, 0);

            case 'v':
                verbose = 1;
                break;

            case 'o':
                output = strdup(optarg);
                break;
                
            case 'i':
                if (strlen(optarg) == 1 && optarg[0] == '-'){
                    inFile = stdin;
                }else{
                    inFile = checkPointerError(fopen(optarg, "r"), "Can't open INPUT file", __FILE__, __LINE__, -1);
                }
                break;

            case 'g':
                gtfFile = checkPointerError(fopen(optarg, "r"), "Can't open GTF file", __FILE__, __LINE__, -1);
                break;
        }
    } while (next_option != -1);

    if (!gtfFile || !output || ! inFile) {
        print_usage(stderr, -1);
    }
    outputName = allocate(sizeof(char) * (strlen(output) + 10), __FILE__, __LINE__);
    sprintf(outputName,"%s.out", output);
    outFile = checkPointerError(fopen(outputName, "w"), "Can't open OUT file", __FILE__, __LINE__, -1);
    sprintf(outputName,"%s.err", output);
    errFile = checkPointerError(fopen(outputName, "w"), "Can't open ERR file", __FILE__, __LINE__, -1);
    sprintf(outputName,"%s.ent", output);
    entFile = checkPointerError(fopen(outputName, "w"), "Can't open ENT file", __FILE__, __LINE__, -1);
    free(outputName);
    free(output);

    time(&now);
    if (verbose) printf("Reading chromosomes from GTF file\n");
    Chromosome_f *chrFactory = NewChromosomeFactory(gtfFile);
    time(&later);
    if (verbose) printf("Chromosomes loaded in %.0f s\n", difftime(later, now));

    if (verbose) printf("Parsing SAM file\n");
    SAM_f *samFactory = NewSAMFactory();
    Reads_f *readsFactory = NewReadsFactory(samFactory, chrFactory, errFile);
    readsFactory->processReadFromSAM(readsFactory, inFile, verbose);
 
    if (verbose) printf("Processing GTF structure\n");
    chrFactory->calculus(chrFactory);
    
    if (verbose) printf("Printing results\n");
    chrFactory->print(chrFactory, outFile, entFile);

    FreeSAMFactory(&samFactory);
    FreeChromosomeFactory(&chrFactory);
    fclose(gtfFile);
    fclose(outFile);
    fclose(errFile);
    fclose(entFile);
    return (EXIT_SUCCESS);
}