void txGeneAltProt(char *pepFile, char *isoformsFile, char *outFile) /* txGeneAltProt - Figure out statistics on number of alternative proteins produced by alt-splicing.. */ { struct hash *pepHash = faReadAllIntoHash(pepFile, dnaUpper); struct hash *totalUniqHash = hashNew(18); uglyf("Read %d from %s\n", pepHash->elCount, pepFile); int lastClusterId = -1; struct hash *uniqHash = NULL; struct slName *clusterList = NULL; FILE *f = mustOpen(outFile, "w"); struct lineFile *lf = lineFileOpen(isoformsFile, TRUE); char *row[2]; while (lineFileRow(lf, row)) { int clusterId = lineFileNeedNum(lf, row, 0); char *tx = row[1]; if (clusterId != lastClusterId) { if (uniqHash != NULL) { outputCluster(lastClusterId, clusterList, f); hashFree(&uniqHash); slFreeList(&clusterList); } uniqHash = hashNew(0); } lastClusterId = clusterId; struct dnaSeq *pep = hashFindVal(pepHash, tx); if (pep != NULL) { if (!hashLookup(uniqHash, pep->dna)) { hashAdd(uniqHash, pep->dna, NULL); slNameAddTail(&clusterList, tx); } if (!hashLookup(totalUniqHash, pep->dna)) hashAdd(totalUniqHash, pep->dna, NULL); } } outputCluster(lastClusterId, clusterList, f); verbose(1, "%d total unique proteins\n", totalUniqHash->elCount); carefulClose(&f); }
void setCover() { int bigCluster; bigCluster = findBigCluster(); //printf("L396 %i %i\n", bigCluster, listClusters[bigCluster].cloneSup); while((bigCluster>-1) && (listClusters[bigCluster].clustProb >4)) { // printf("%i %i\n", bigCluster, listClusters[bigCluster].cloneSup); //printf("1 %i %i %f\n", bigCluster, listClusters[bigCluster].cloneSup, listClusters[bigCluster].clustProb); outputCluster(bigCluster); // printf("2\n"); removeClones(bigCluster); //printf("3\n"); bigCluster=findBigCluster(); //printf("4\n"); //printf("%i\n", bigCluster); } }