static struct bed *randomTrial(struct chrGapList *bounding, struct bed *placed)
/*	placed bed list has already been sorted by size descending,
	return is the newly placed bed list	*/
{
struct bed *bedList = NULL;
struct bed *bedEl;
int placedCount = slCount(placed);
int gapCount = countGaps(bounding);
int i;
struct gap **sizedGaps = NULL;	/*	an array of pointers	*/
int maxGapCount = 0;

/*	We should never have more gaps than the initial set of gaps plus
 *	the placed item count since each placed item only creates one
 *	new gap.  This array will be used repeatedly as lists of gaps of
 *	specific sizes are created.  The array will be an array of
 *	pointers to the gaps greater than the specified size.
 *	The + 1 on the maxGapCount is to keep the array one larger than
 *	expected maximum so that a safety check can be performed that it
 *	never reaches past the expected maximum.
 */
maxGapCount = placedCount + gapCount + 1;
sizedGaps = needHugeMem((size_t)(sizeof(struct gap *) * maxGapCount));
i = 0;

for (bedEl = placed; bedEl != NULL; bedEl = bedEl->next)
    {
    struct bed *newBed;
    int N;
    int R;
    int itemSize = bedEl->chromEnd - bedEl->chromStart;
    if (itemSize < 1)
	errAbort("ERROR: placing items less than 1 bp in length ? %s:%d-%d",
	bedEl->chrom, bedEl->chromEnd, bedEl->chromStart);
    N = gapsOfSize(bounding,itemSize, sizedGaps, maxGapCount);
    /*	From those N gaps, randomly select one of them	(drand48 = [0.0,1.0)*/
    R = floor(N * drand48());	/*	interval: [0,N) == [0,N-1]	*/
    if ((R >= N) || (R >= maxGapCount))
	errAbort("ERROR: did not expect random "
	    "number %d to be >= %d (or %d)\n", R, N, maxGapCount);
    /*	The newBed is the bedEl translated to a new random location */
    newBed = randomInsert(bedEl,sizedGaps[R]);
    slAddHead(&bedList,newBed);
    }
/*	sizedGaps are just a bunch of pointers, the bed element inserts
 *	actually went into the bounding gap list which is going to be
 *	freed up, along with the specially added bed elements back in
 *	the loop that is managing the copying of the bounding list.
 */
freeMem(sizedGaps);
return(bedList);
}
Example #2
0
static void tabBlastOut(struct axtBundle *abList, int queryIx, boolean isProt, 
	FILE *f, char *databaseName, int databaseSeqCount, 
	double databaseLetterCount, char *ourId, boolean withComment)
/* Do NCBI tabular blast output. */
{
char *queryName = abList->axtList->qName;
int querySize = abList->qSize;
struct targetHits *targetList = NULL, *target;

if (withComment)
    {
    // use date from CVS, unless checked out with -kk, then ignore.
    char * rcsDate = "$Date: 2009/02/26 00:05:49 $";
    char dateStamp[11];
    if (strlen(rcsDate) > 17)
        safencpy(dateStamp, sizeof(dateStamp), rcsDate+7, 10);
    else
        safecpy(dateStamp, sizeof(dateStamp), "");
    dateStamp[10] = 0;
    fprintf(f, "# BLAT %s [%s]\n", gfVersion, dateStamp);
    fprintf(f, "# Query: %s\n", queryName);
    fprintf(f, "# Database: %s\n", databaseName);
    fprintf(f, "%s\n", 
    	"# Fields: Query id, Subject id, % identity, alignment length, "
	"mismatches, gap openings, q. start, q. end, s. start, s. end, "
	"e-value, bit score");
    }

/* Print out details on each target. */
targetList = bundleIntoTargets(abList);
for (target = targetList; target != NULL; target = target->next)
    {
    struct axtRef *ref;
    for (ref = target->axtList; ref != NULL; ref = ref->next)
        {
	struct axt *axt = ref->axt;
	int matches = countMatches(axt->qSym, axt->tSym, axt->symCount);
	int gaps = countGaps(axt->qSym, axt->tSym, axt->symCount);
	int gapOpens = countGapOpens(axt->qSym, axt->tSym, axt->symCount);
	fprintf(f, "%s\t", axt->qName);
	fprintf(f, "%s\t", axt->tName);
	fprintf(f, "%.2f\t", 100.0 * matches/axt->symCount);
	fprintf(f, "%d\t", axt->symCount);
	fprintf(f, "%d\t", axt->symCount - matches - gaps);
	fprintf(f, "%d\t", gapOpens);
	if (axt->qStrand == '-')
	    {
	    int s = axt->qStart, e = axt->qEnd;
	    reverseIntRange(&s, &e, querySize);
	    fprintf(f, "%d\t", s+1);
	    fprintf(f, "%d\t", e);
	    printAxtTargetBlastTab(f, axt, target->size);
	    }
	else
	    {
	    fprintf(f, "%d\t", axt->qStart + 1);
	    fprintf(f, "%d\t", axt->qEnd);
	    printAxtTargetBlastTab(f, axt, target->size);
	    }
	fprintf(f, "%3.1e\t", blastzScoreToNcbiExpectation(axt->score));
	fprintf(f, "%d.0\n", blastzScoreToNcbiBits(axt->score));
	}
    }

/* Cleanup time. */
targetHitsFreeList(&targetList);
}
Example #3
0
static void ncbiBlastOut(struct axtBundle *abList, int queryIx, boolean isProt, 
	FILE *f, char *databaseName, int databaseSeqCount, 
	double databaseLetterCount, char *ourId, double minIdentity)
/* Do ncbiblast-like output at end of processing query. */
{
char asciiNum[32];
struct targetHits *targetList = NULL, *target;
char *queryName;
int querySize = abList->qSize;
boolean isTranslated = (abList->axtList->frame != 0);

/* Print out stuff that doesn't depend on query or database. */
if (ourId == NULL)
    ourId = "axtBlastOut";
fprintf(f, "%s 2.2.11 [%s]\n", progType(isProt, abList, TRUE), ourId);
fprintf(f, "\n");
fprintf(f, "Reference:  Kent, WJ. (2002) BLAT - The BLAST-like alignment tool\n");
fprintf(f, "\n");

/* Print query and database info. */
queryName = abList->axtList->qName;
fprintf(f, "Query= %s\n", queryName);
fprintf(f, "         (%d letters)\n", abList->qSize);
fprintf(f, "\n");
fprintf(f, "Database: %s \n",  databaseName);
sprintLongWithCommas(asciiNum, databaseLetterCount);
fprintf(f, "           %d sequences; %s total letters\n",  databaseSeqCount, asciiNum);
fprintf(f, "\n");
fprintf(f, "Searching.done\n");

targetList = bundleIntoTargets(abList);

/* Print out summary of hits. */
fprintf(f, "                                                                 Score    E\n");
fprintf(f, "Sequences producing significant alignments:                      (bits) Value\n");
fprintf(f, "\n");
for (target = targetList; target != NULL; target = target->next)
    {
    struct axtRef *ref;
    struct axt *axt;
    int matches;
    double identity, expectation;
    int bit;
    
    for (ref = target->axtList; ref != NULL; ref = ref->next)
	{
	axt = ref->axt;
	
	matches = countMatches(axt->qSym, axt->tSym, axt->symCount);
	identity = round(100.0 * matches / axt->symCount);
	/* skip output if minIdentity not reached */
	if (identity < minIdentity) continue;
    
    	bit = blastzScoreToNcbiBits(axt->score);
        expectation = blastzScoreToNcbiExpectation(axt->score);
    	fprintf(f, "%-67s  %4d   ", target->name, bit);
    	ncbiPrintE(f, expectation);
    	fprintf(f, "\n");
    	}
    }
fprintf(f, "\n");

/* Print out details on each target. */
for (target = targetList; target != NULL; target = target->next)
    {
    struct axtRef *ref;
    struct axt *axt;
    int matches, gaps;
    char *oldName;
    
    int ii = 0;
    double identity;
    oldName = strdup("");

    for (ref = target->axtList; ref != NULL; ref = ref->next)
	{
	ii++;
	axt = ref->axt;
	
	matches = countMatches(axt->qSym, axt->tSym, axt->symCount);
	identity = round(100.0 * matches / axt->symCount);
	
	/* skip output if minIdentity not reached */
	if (identity < minIdentity) continue;
        
	/* print target sequence name and length only once */ 
	if (!sameWord(oldName, target->name))
	    {
	    fprintf(f, "\n\n>%s \n", target->name);
	    fprintf(f, "          Length = %d\n", target->size);
	    oldName = strdup(target->name);
	    }

	fprintf(f, "\n");
	fprintf(f, " Score = %d bits (%d), Expect = ",
	     blastzScoreToNcbiBits(axt->score),
	     blastzScoreToNcbiScore(axt->score));
	ncbiPrintE(f, blastzScoreToNcbiExpectation(axt->score));
	fprintf(f, "\n");
	
	if (isProt)
	    {
	    int positives = countPositives(axt->qSym, axt->tSym, axt->symCount);
	    gaps = countGaps(axt->qSym, axt->tSym, axt->symCount);
	    fprintf(f, " Identities = %d/%d (%d%%),",
		 matches, axt->symCount, round(100.0 * matches / axt->symCount));
	    fprintf(f, " Positives = %d/%d (%d%%),",
		 positives, axt->symCount, round(100.0 * positives / axt->symCount));
	    fprintf(f, " Gaps = %d/%d (%d%%)\n",
		 gaps, axt->symCount, round(100.0 * gaps / axt->symCount));
	    if (axt->frame != 0) 
		fprintf(f, " Frame = %c%d\n", axt->tStrand, axt->frame);
	    /* set the special global variable, answer_for_kg.  
   	       This is needed for Known Genes track building.  Fan 1/21/03 */
            answer_for_kg=axt->symCount - matches;
	    }
	else
	    {
	    fprintf(f, " Identities = %d/%d (%d%%)\n",
		 matches, axt->symCount, round(100.0 * matches / axt->symCount));
	    /* blast displays dna searches as +- instead of blat's default -+ */
	    if (!isTranslated)
		if ((axt->qStrand == '-') && (axt->tStrand == '+'))
		    {
		    reverseIntRange(&axt->qStart, &axt->qEnd, querySize);
		    reverseIntRange(&axt->tStart, &axt->tEnd, target->size);
		    reverseComplement(axt->qSym, axt->symCount);
		    reverseComplement(axt->tSym, axt->symCount);
		    axt->qStrand = '+';
		    axt->tStrand = '-';
		    }
	    fprintf(f, " Strand = %s / %s\n", nameForStrand(axt->qStrand),
		nameForStrand(axt->tStrand));
	    }
	fprintf(f, "\n");
	blastiodAxtOutput(f, axt, target->size, querySize, 60, isProt, isTranslated);
	}
    }

fprintf(f, "  Database: %s\n", databaseName);

/* Cleanup time. */
targetHitsFreeList(&targetList);
}