Esempio n. 1
0
int main(int argc, char *argv[])
{
cgiSpoof(&argc, argv);
seqIdent = cgiOptionalDouble("seqIdent",.9);
basePct = cgiOptionalDouble("basePct", .9);
pslIn = cgiOptionalString("in");
pslOut = cgiOptionalString("out");
if(pslIn == NULL || pslOut==NULL)
    usage();
filterPsls();
return 0;
}
int countCassetteExons(struct altGraphX *agList, float minConfidence, FILE *outfile, FILE *bedOutFile)
/* count up the number of cassette exons that have a certain
   confidence, returns number of edges. If outfile != NULL will output fasta sequences
   to outfile. */
{
struct altGraphX *ag = NULL;
int edge =0;
int cassetteCount = 0;
int i =0;
int mod3 = 0;
int counter =0;
boolean outputted = FALSE;
float estPrior = cgiOptionalDouble("estPrior", 10);
FILE *log = mustOpen("confidences.log", "w");
FILE *html = mustOpen("confidences.html", "w");
FILE *sizes = mustOpen("sizes.log", "w");
int minSize = cgiOptionalInt("minSize", 0);
startHtml(html);
for(ag = agList; ag != NULL; ag = ag->next)
    {
    outputted = FALSE;
    for(i=0;i<ag->edgeCount; i++)
	{
	if(ag->edgeTypes[i] == ggCassette)
	    {
	    float conf = altGraphCassetteConfForEdge(ag, i, estPrior);
	    struct bed *bed, *bedList = altGraphGetExonCassette(ag, i);
	    char buff[256];
	    int size = ag->vPositions[ag->edgeEnds[i]] - ag->vPositions[ag->edgeStarts[i]];
	    boolean filtersOk = FALSE;
	    if(ag->name == NULL)
		ag->name = cloneString("");

	    slSort(&bedList, bedCmpMaxScore);
	    for(bed=bedList; bed != NULL; bed = bed->next)
		{
		snprintf(buff, sizeof(buff), "%s.%d", ag->name, counter);
		bed->name = cloneString(buff);
		fprintf(log, "%f\n", conf);
		fprintf(sizes, "%d\n%d\n%d\n", bed->blockSizes[0], bed->blockSizes[1], bed->blockSizes[2]);
		filtersOk = bedPassFilters(bed, ag, i);
		if(conf >= minConfidence && size >= minSize && filtersOk) 
		    {
		    writeCassetteExon(bed, ag, i, &outputted, bedOutFile, outfile, html, conf);
		    cassetteCount++;
		    if((size % 3) == 0)
			mod3++;
		    }
		counter++;
		}
	    bedFreeList(&bedList);
	    }
	}
    }
endHtml(html);
carefulClose(&html);
carefulClose(&log);
warn("%d cassettes are mod 3", mod3);
return cassetteCount;
}
int main(int argc, char *argv[])
{
struct altGraphX *agList = NULL;
int cassetteCount = 0;
float minConfidence = 0;
char *bedFileName = NULL;
char *faFile = NULL;
FILE *faOut = NULL;
FILE *bedOut = NULL;
boolean mrnaFilter = FALSE;
float estPrior = 0.0;
int minSize = 0;
if(argc < 4)
    usage();
cgiSpoof(&argc, argv);
warn("Loading graphs.");
agList = altGraphXLoadAll(argv[1]);
bedFileName = cgiOptionalString("bedFile");
minConfidence = cgiDouble("minConf");
db = cgiString("db");
faFile = cgiOptionalString("faFile");
estPrior = cgiOptionalDouble("estPrior", 10);
minSize = cgiOptionalInt("minSize", 0);
mrnaFilter = cgiBoolean("mrnaFilter");
if(mrnaFilter)
    loadMrnaHash();
warn("Counting cassette exons from %d clusters above confidence: %f", slCount(agList), minConfidence);
if(bedFileName != NULL)
    {
    bedOut = mustOpen(bedFileName, "w");
    printCommandState(argc, argv, bedOut);
    fprintf(bedOut, "track name=cass_conf-%4.2f_est-%3.2f description=\"spliceStats minConf=%4.2f estPrior=%3.2f minSize=%d\"\n", 
	    minConfidence, estPrior, minConfidence, estPrior, minSize);
    }
if(faFile != NULL)
    faOut = mustOpen(faFile, "w");
cassetteCount = countCassetteExons(agList, minConfidence, faOut,bedOut );
carefulClose(&faOut);
carefulClose(&bedOut);
warn("%d cassette exons out of %d clusters in %s", cassetteCount, slCount(agList), argv[1]);
altGraphXFreeList(&agList);
return 0;
}