void makePai(boolean isEst, char *inName, char *outName)
/* Decide what the input type is, and call appropriate reader.
 * Then write out list. */
{
cloneHash = newHash(20);
if (endsWith(inName, ".ra"))
    makeRaPai(isEst, inName, outName);
else if (endsWith(inName, ".info"))
    makeInfoPai(isEst, inName, outName);
else
    usage();
printf("Saving %s\n", outName);
savePairs(outName);
}
Exemple #2
0
static void mapUniprot(struct spMapper *sm, char *qSpId, char *tSpId, float score)
/* map uniprot/swissprot ids without uniref mappings */
{
struct slName *qIds = mapSpId(sm, qSpId);
struct slName *tIds = mapSpId(sm, tSpId);

if ((qIds == NULL) || (tIds == NULL))
    {
    if (qIds == NULL)
        handleNoMapping(sm, qSpId, noKGMapping);
    if (tIds == NULL)
        handleNoMapping(sm, tSpId, noKGMapping);
    sm->qtNoSpIdMapCnt++;
    }
else 
    {
    savePairs(sm, qSpId, tSpId, qIds, tIds, score);
    sm->qtMapCnt++;
    }
}
Exemple #3
0
static void mapUnirefEntries(struct spMapper *sm,
                             struct uniref *qUniref, struct uniref *tUniref,
                             char *qSpId, char *tSpId, float score)
/* add all combinations of uniref entries */
{
struct uniref *qUniprot, *tUniprot;
boolean qKgFound = FALSE, tKgFound = FALSE;

/* build list of kg pairs associate with these two uniref entries, avoiding
 * duplicated pairs. */
for (qUniprot = qUniref; qUniprot != NULL; qUniprot = qUniprot->next)
    {
    for (tUniprot = tUniref; tUniprot != NULL; tUniprot = tUniprot->next)
        {
        struct slName *qIds = mapSpId(sm, qUniprot->upAcc);
        struct slName *tIds = mapSpId(sm, tUniprot->upAcc);
        if (qIds != NULL)
            qKgFound = TRUE;
        if (tIds != NULL)
            tKgFound = TRUE;
        if ((qIds != NULL) && (tIds != NULL))
            savePairs(sm, qSpId, tSpId, qIds, tIds, score);
        }
    }
if (!(qKgFound && tKgFound))
    {
    /* couldn't map to known genes */
    if (!qKgFound)
        handleNoMapping(sm, qSpId, noKGMapping);
    if (!tKgFound)
        handleNoMapping(sm, tSpId, noKGMapping);
    sm->qtNoSpIdMapCnt++;
    }
else
    {
    sm->qtMapCnt++;
    }
}
Exemple #4
0
void ppAnalyse(char *headersName, char *sizesName, char *pairsPsl, 
    char *finDir, char *pairOut, char *mispairOut)
/* ppAnalyse - Analyse paired plasmid reads. */
{
struct hash *readHash = newHash(21);
struct hash *pairHash = newHash(20);
struct hash *fragHash = newHash(17);
struct hash *finHash = NULL;
struct hash *gsiHash = newHash(8);
struct pairInfo *pairList = NULL, *measuredList, *pair;
struct readInfo *readList = NULL, *rd;
struct groupSizeInfo *gsiList = NULL, *gsi;
int aliCount;
int finCount;
int i;
struct slName *finList, *name;

finHash = newHash(14);
finList = listDir(finDir, "*.fa");
if ((finCount = slCount(finList)) == 0)
    errAbort("No fa files in %s\n", finDir);
printf("Got %d (finished) .fa files in %s\n", finCount, finDir);
for (name = finList; name != NULL; name = name->next)
    {
    chopSuffix(name->name);
    hashStore(finHash, name->name);
    }
#ifdef SOMETIMES
#endif /* SOMETIMES */

gsiList = readSizes(sizesName, gsiHash);
printf("Got %d groups\n", slCount(gsiList));

readHeaders(headersName, readHash, pairHash, &readList, &pairList);
slReverse(&readList);
slReverse(&pairList);
printf("Got %d reads in %d pairs\n", slCount(readList), slCount(pairList));


savePairs(pairOut, pairList, gsiHash);
printf("Saved pairs to %s\n", pairOut);

aliCount = readAlignments(pairsPsl, readHash, fragHash);
printf("Got %d alignments in %s\n", aliCount, pairsPsl);


doReadStats(readList, aliCount);
doPairStats(pairList, finHash, gsiHash, mispairOut, &measuredList, &pairList);
gsiMeanAndVariance(measuredList, gsiList, gsiHash, finHash);
printf("Alignment length stats:\n");
for (i=0; i<10; ++i)
    {
    printf("%3d - %4d :  %6d  %4.2f%%\n",
    	i*100, (i+1)*100, aliSizes[i], 100.0 * (double)aliSizes[i]/(double)aliCount);
    }
doMeasuredStats(measuredList);

for (gsi = gsiList; gsi != NULL; gsi = gsi->next)
    {
    if (gsi->measuredCount > 0)
	{
	printf("%s: mean %f, std %f, min %d, max %d, samples %d\n",
	    gsi->name, gsi->measuredMean, sqrt(gsi->variance),
	    gsi->measuredMin, gsi->measuredMax,
	    gsi->measuredCount);
	printf("   %4.2f%% within guessed range (%d-%d)\n", 
		100.0 * (double)gsi->guessedCount/(double)gsi->measuredCount,
		gsi->guessedMin, gsi->guessedMax);
	printf("   w/in 200 %4.2f%%, w/in 400 %4.2f%%,  w/in 800 %4.2f%%,  w/in 1600 %4.3f%%, w/in 3200 %4.2f%%\n\n",
	    gsiTight(gsi, 200), gsiTight(gsi, 400), gsiTight(gsi, 800), gsiTight(gsi, 1600), gsiTight(gsi, 3200) );
	showHistogram(gsi);
	}
    }
}