boolean sameStickyEnd(struct cutter *enz1, struct cutter *enz2)
/* Check to see if two enzymes make the same sticky ends.  If either of the
   enzymes have sticky ends that isn't all ACGT, then this returns false. */
{
boolean ret = FALSE;
struct dnaSeq *sticky1 = stickyEnd(enz1);
struct dnaSeq *sticky2 = stickyEnd(enz2);

if (sticky1 && sticky2)
if (sticky1 && sticky2 && (sticky1->size == sticky2->size) &&
    (acgtCount(sticky1->dna) == sticky1->size) && (acgtCount(sticky2->dna) == sticky2->size))
    {
    if (sameString(sticky1->dna, sticky2->dna))
	ret = TRUE;
    else
	{
	reverseComplement(sticky2->dna, sticky2->size);
	if (sameString(sticky1->dna, sticky2->dna))
	    ret = TRUE;
	}
    }
freeDnaSeq(&sticky1);
freeDnaSeq(&sticky2);
return ret;
}
void trans3Free(struct trans3 **pT3)
/* Free a trans3 structure. */
{
struct trans3 *t3 = *pT3;
if (t3 != NULL)
    {
    freeDnaSeq(&t3->trans[0]);
    freeDnaSeq(&t3->trans[1]);
    freeDnaSeq(&t3->trans[2]);
    freez(pT3);
    }
}
Exemple #3
0
void readDbstsFa(FILE *dff)
/* Read in sequences from dbSTS.fa and add, if possible */
{
  struct dnaSeq *ds;
  struct sts *s;
  struct gb *gb;
  char name[256], *line;

  while (faReadMixedNext(dff, 0, "default", TRUE, &line, &ds))
    {
      /* Determine the UCSC id */
      if (hashLookup(gbAccHash, ds->name)) 
	{
	  /* Determine if this is linked to a marker */
	  gb = hashMustFindVal(gbAccHash, ds->name);
	  if (gb->s != NULL) 
	    {
	      /* If no recorded sequence, then add */ 
	      s = gb->s;
	      if (s->fa == NULL) 
		{
		  s->faAcc = cloneString(ds->name);
		  safef(name, ArraySize(name), "%d", s->si->identNo);
		  ds->name = cloneString(name);
		  s->fa = ds;
		  s->si->sequence = 1;
		} 
	      /* If no accession recorded, see if sequences are the same */
	      else if (s->faAcc == NULL) 
		{
		  if (sameString(s->fa->dna, ds->dna))
		    {
		      s->faAcc = cloneString(ds->name);
		      s->si->sequence = 1;		  
		    }
		  freeDnaSeq(&ds);
		}	  
	      /* If same accession as recorded, the update sequence */
	      else if (sameString(s->faAcc, ds->name))
		{
		  ds->name = cloneString(s->fa->name);
		  freeDnaSeq(&s->fa);
		  s->fa = ds;
		  s->si->sequence = 1;
		}
	      else
		freeDnaSeq(&ds);	    
	    }
	  else
	    freeDnaSeq(&ds);	    
	}
    }
}
Exemple #4
0
void gfAlignStrand(int *pConn, char *tSeqDir, struct dnaSeq *seq,
    boolean isRc, int minMatch, struct hash *tFileCache, struct gfOutput *out)
/* Search genome on server with one strand of other sequence to find homology. 
 * Then load homologous bits of genome locally and do detailed alignment.
 * Call 'outFunction' with each alignment that is found. */
{
struct ssBundle *bun;
struct gfRange *rangeList = NULL, *range;
struct dnaSeq *targetSeq;
char targetName[PATH_LEN];

rangeList = gfQuerySeq(*pConn, seq);
close(*pConn);
*pConn = -1;
slSort(&rangeList, gfRangeCmpTarget);
rangeList = gfRangesBundle(rangeList, ffIntronMax);
for (range = rangeList; range != NULL; range = range->next)
    {
    getTargetName(range->tName, out->includeTargetFile, targetName);
    targetSeq = gfiExpandAndLoadCached(range, tFileCache, tSeqDir, 
    	seq->size, &range->tTotalSize, FALSE, FALSE, usualExpansion);
    AllocVar(bun);
    bun->qSeq = seq;
    bun->genoSeq = targetSeq;
    alignComponents(range, bun, ffCdna);
    ssStitch(bun, ffCdna, minMatch, ssAliCount);
    saveAlignments(targetName, range->tTotalSize, range->tStart, 
	bun, NULL, isRc, FALSE, ffCdna, minMatch, out);
    ssBundleFree(&bun);
    freeDnaSeq(&targetSeq);
    }
gfRangeFreeList(&rangeList);
}
Exemple #5
0
static void makeOligoHistogram(char *fileName, struct seqList *seqList, 
    int oligoSize, int **retTable, int *retTotal)
/* Make up table of oligo occurences. Either pass in an FA file or a seqList.
 * (the other should be NULL). */
{
FILE *f = NULL;
int tableSize = (1<<(oligoSize+oligoSize));
int tableByteSize = tableSize * sizeof(int);
int *table = needLargeMem(tableByteSize);
struct dnaSeq *seq;
struct seqList *seqEl = seqList;
int *softMask = NULL;
int total = 0;

if (seqList == NULL)
    f = mustOpen(fileName, "rb");

memset(table, 0, tableByteSize);
for (;;)
    {
    DNA *dna;
    int size;
    int endIx;
    int i;
    int oliVal;
    if (seqList != NULL)
        {
        if (seqEl == NULL)
            break;
        seq = seqEl->seq;
        softMask = seqEl->softMask;
        seqEl = seqEl->next;
        }
    else
        {
        seq = faReadOneDnaSeq(f, "", TRUE);
        if (seq == NULL)
            break;
        }
    dna = seq->dna;
    size = seq->size;
    endIx = size-oligoSize;
    for (i=0; i<=endIx; ++i)
        {
        if (softMask == NULL || !masked(softMask+i, oligoSize) )
            {
            if ((oliVal = oligoVal(dna+i, oligoSize)) >= 0)
                {
                table[oliVal] += 1;
                ++total;
                }
            }
        }
    if (seqList == NULL)
        freeDnaSeq(&seq);
    }
carefulClose(&f);
*retTable = table;
*retTotal = total;
}
void foldPslIntoStats(struct psl *psl, struct dnaSeq *tSeq,
                      struct hash *otherHash, struct stats *stats)
/* Load sequence corresponding to bed and add alignment stats. */
{
    struct dnaSeq *qSeq = loadSomeSeq(otherHash,
                                      psl->qName, psl->qStart, psl->qEnd);
    int i, bCount = psl->blockCount;
    int qOffset;

// uglyf("%s:%d-%d %s %s:%d-%d\n", psl->qName, psl->qStart, psl->qEnd, psl->strand, psl->tName, psl->tStart, psl->tEnd);
    if (qSeq != NULL && tSeq != NULL)
    {
        if (psl->strand[0] == '-')
        {
            reverseComplement(qSeq->dna, qSeq->size);
            qOffset = psl->qSize - psl->qEnd;
        }
        else
            qOffset = psl->qStart;
        if (psl->strand[1] == '-')
            errAbort("Can't yet handle reverse complemented targets");
        for (i=0; i<bCount; ++i)
        {
            int bSize  = psl->blockSizes[i];
            stats->bedBaseAli += bSize;
            stats->bedBaseMatch += baseMatch(qSeq->dna + psl->qStarts[i] - qOffset,
                                             tSeq->dna + psl->tStarts[i],  bSize);
        }
    }
    freeDnaSeq(&qSeq);
}
Exemple #7
0
static void simpleFillInSequence(char *seqDir, struct agpFrag *agpList,
    DNA *dna, int dnaSize)
/* Fill in DNA array with sequences from simple clones. */
{
struct agpFrag *agp;
char underline = '_';

for (agp = agpList; agp != NULL; agp = agp->next)
    {
    char clone[128];
    char path[512];
    struct dnaSeq *seq;
    int size;
    strcpy(clone, agp->frag);
    chopSuffixAt(clone,underline);
    sprintf(path, "%s/%s.fa", seqDir, clone);
    seq = faReadAllDna(path);
    if (slCount(seq) != 1)
	errAbort("Can only handle exactly one clone in %s.", path);
    size = agp->fragEnd - agp->fragStart;
    if (agp->strand[0] == '-')
	reverseComplement(seq->dna + agp->fragStart, size);
    memcpy(dna + agp->chromStart, seq->dna + agp->fragStart, size);
    freeDnaSeq(&seq);
    }
}
Exemple #8
0
void musAliAt(char *database, char *chrom, char *humanFa, char *mouseFa)
/* musAliAt - Produce .fa files where mouse alignments hit on chr22. */
{
char query[256], **row;
struct sqlResult *sr;
struct sqlConnection *conn;
struct dnaSeq *musSeq, *homoSeq;
struct psl *psl;
struct hash *musHash = newHash(10);
FILE *musOut = mustOpen(mouseFa, "w");

hSetDb(database);
conn = hAllocConn();
sqlSafef(query, sizeof query, "select * from blatMouse where tName = '%s'", chrom);
sr = sqlGetResult(conn, query);
while ((row = sqlNextRow(sr)) != NULL)
    {
    psl = pslLoad(row);
    if ((musSeq = hashFindVal(musHash, psl->qName)) == NULL)
        {
	musSeq = hExtSeq(psl->qName);
	hashAdd(musHash, psl->qName, NULL);
	faWriteNext(musOut, musSeq->name, musSeq->dna, musSeq->size);
	freeDnaSeq(&musSeq);
	}
    pslFree(&psl);
    }
}
void loadIfNewSeq(char *nibDir, char *newName, char strand, 
	char **pName, struct dnaSeq **pSeq, char *pStrand)
/* Load sequence unless it is already loaded.  Reverse complement
 * if necessary. */
{
struct dnaSeq *seq;
if (sameString(newName, *pName))
    {
    if (strand != *pStrand)
        {
	seq = *pSeq;
	reverseComplement(seq->dna, seq->size);
	*pStrand = strand;
	}
    }
else
    {
    char fileName[512];
    freeDnaSeq(pSeq);
    snprintf(fileName, sizeof(fileName), "%s/%s.nib", nibDir, newName);
    *pName = newName;
    *pSeq = seq = nibLoadAllMasked(NIB_MASK_MIXED, fileName);
    *pStrand = strand;
    if (strand == '-')
        reverseComplement(seq->dna, seq->size);
    uglyf("Loaded %d bases in %s\n", seq->size, fileName);
    }
}
Exemple #10
0
void twoBitDup(char *filename)
/* twoBitDup - check to see if a twobit file has any identical sequences in it. */
{
struct twoBitFile *tbf;

tbf = twoBitOpen(filename);
struct twoBitIndex *index;
int seqCount = slCount(tbf->indexList);
int hashSize = log2(seqCount) + 2;	 // +2 for luck
struct hash *seqHash = newHash(hashSize);

verbose(2, "hash size is %d\n", hashSize);

for (index = tbf->indexList; index != NULL; index = index->next)
    {
    verbose(2,"grabbing seq %s\n", index->name);
    int size;
    struct dnaSeq *seq = twoBitReadSeqFragExt(tbf, index->name,
	0, 0, FALSE, &size);
    struct hashEl *hel;
    if ((hel = hashLookup(seqHash, seq->dna)) != NULL)
	printf("%s and %s are identical\n", index->name, (char *)hel->val);
    else
	hashAdd(seqHash, seq->dna, index->name);
    freeDnaSeq(&seq);
    }
}
void writeChainPart(struct dnaSeq *tChrom,
	struct nibTwoCache *qNtc, char *nibDir,
	struct chain *chain, int tStart, int tEnd, FILE *f, FILE *gapFile)
/* write out axt's from subset of chain */
{
struct dnaSeq *qSeq;
boolean isRev = (chain->qStrand == '-');
struct chain *subChain, *chainToFree;
int fullSeqSize;
int qStart;

chainSubsetOnT(chain, tStart, tEnd, &subChain, &chainToFree);
if (subChain == NULL)
    errAbort("null subchain in chain ID %d\n", chain->id);

/* Get query sequence fragment. */
nibTwoCacheSeqPart(qNtc, chain->qName, 1, 1, &fullSeqSize);
qStart = (isRev ? fullSeqSize - subChain->qEnd : subChain->qStart);
qSeq = nibTwoCacheSeqPart(qNtc, subChain->qName, qStart, 
                                subChain->qEnd - subChain->qStart, NULL);
if (isRev)
    reverseComplement(qSeq->dna, qSeq->size);

verbose(9, "fill chain id, subchain %d %s %d %d %c qOffset=%d\n", 
                subChain->id, subChain->qName,
                tStart, tEnd, subChain->qStrand, qStart);
writeAxtFromChain(subChain, qSeq, subChain->qStart, tChrom, 0, f, gapFile);
chainFree(&chainToFree);
freeDnaSeq(&qSeq);
}
void netToAxt(char *netName, char *chainName, char *tNibDir, char *qNibDir, char *axtName)
/* netToAxt - Convert net (and chain) to axt.. */
{
Bits *usedBits = findUsedIds(netName);
struct hash *chainHash;
struct chainNet *net;
struct lineFile *lf = lineFileOpen(netName, TRUE);
FILE *f = mustOpen(axtName, "w");
struct dnaSeq *tChrom = NULL;
struct nibTwoCache *qNtc = nibTwoCacheNew(qNibDir);
char *gapFileName = optionVal("gapOut", NULL);
FILE *gapFile = NULL;

if (gapFileName)
    gapFile = mustOpen(gapFileName, "w");
lineFileSetMetaDataOutput(lf, f);
chainHash = chainReadUsedSwap(chainName, qChain, usedBits);
bitFree(&usedBits);
while ((net = chainNetRead(lf)) != NULL)
    {
    verbose(1, "Processing %s\n", net->name);
    tChrom = nibTwoLoadOne(tNibDir, net->name);
    if (tChrom->size != net->size)
	errAbort("Size mismatch on %s.  Net/nib out of sync or possibly nib dirs swapped?", 
		tChrom->name);
    rConvert(net->fillList, tChrom, qNtc, qNibDir, chainHash, f, gapFile);
    freeDnaSeq(&tChrom);
    chainNetFree(&net);
    }
nibTwoCacheFree(&qNtc);
}
Exemple #13
0
void correctOne(struct dnaSeq *est, struct psl *psl, char *nibDir, 
   struct hash *nibHash, FILE *f)
/* Write one corrected EST to file. */
{
struct dnaSeq *geno = readCachedNib(nibHash, nibDir, psl->tName, 
	psl->tStart, psl->tEnd - psl->tStart);
struct dyString *t = newDyString(est->size+20);
int qSize = psl->qSize;
int tSize = psl->tSize;
int qLastEnd = 0;
int blockIx;
struct mrnaBlock *mbList, *mb;
int genoOffset = psl->tStart;
boolean isRc = FALSE;

/* Load sequence and alignment blocks, coping with reverse
 * strand as necessary. */
toUpperN(geno->dna, geno->size);	/* This helps debug... */
mbList = mrnaBlockFromPsl(psl);
if (psl->strand[0] == '-')
    {
    reverseComplement(geno->dna, geno->size);
    genoOffset = tSize - psl->tEnd;
    for (mb = mbList; mb != NULL; mb = mb->next)
         {
	 reverseIntRange(&mb->tStart, &mb->tEnd, tSize);
	 reverseIntRange(&mb->qStart, &mb->qEnd, qSize);
	 }
    slReverse(&mbList);
    isRc = TRUE;
    }

/* Make t have corrected sequence. */
for (mb = mbList; mb != NULL; mb = mb->next)
    {
    int qStart = mb->qStart;
    int qEnd = mb->qEnd;
    int uncovSize = qStart - qLastEnd;
    if (uncovSize > 0)
	dyStringAppendN(t, est->dna + qLastEnd, uncovSize);
    dyStringAppendN(t, geno->dna + mb->tStart - genoOffset, 
    	mb->tEnd - mb->tStart);
    qLastEnd = qEnd;
    }
if (qLastEnd != qSize)
    {
    int uncovSize = qSize - qLastEnd;
    dyStringAppendN(t, est->dna + qLastEnd, uncovSize);
    }

/* Output */
faWriteNext(f, est->name, t->string, t->stringSize);

/* Clean up time. */
slFreeList(&mbList);
freeDyString(&t);
freeDnaSeq(&geno);
}
Exemple #14
0
void freeSeqList(struct dnaSeq **pSeqList)
/* Free an entire list of sequences */
{
    struct dnaSeq *seq, *next;
    for (seq = *pSeqList; seq != NULL; seq = next)
    {
        next = seq->next;
        freeDnaSeq(&seq);
    }
    *pSeqList = NULL;
}
Exemple #15
0
void freeCdnaAliList(struct cdnaAli **pList)
/* Free a list of alignments and associated data. */
{
struct cdnaAli *ca;
for (ca = *pList; ca != NULL; ca = ca->next)
    {
    ffFreeAli(&ca->ali);
    freeDnaSeq(&ca->cdna);
    }
slFreeList(pList);
}
Exemple #16
0
void freeDnaSeqList(struct dnaSeq **pSeqList)
/* Free up list of DNA sequences. */
{
struct dnaSeq *seq, *next;

for (seq = *pSeqList; seq != NULL; seq = next)
    {
    next = seq->next;
    freeDnaSeq(&seq);
    }
*pSeqList = NULL;
}
Exemple #17
0
void hgNibSeq(char *database, char *destDir, int faCount, char *faNames[])
/* hgNibSeq - convert DNA to nibble-a-base and store location in database. */
{
char dir[256], name[128], chromName[128], ext[64];
char nibName[512];
struct sqlConnection *conn = sqlConnect(database);
char query[512];
int i;
char *faName;
struct dnaSeq *seq = NULL;
unsigned long total = 0;
int size;

if (!strchr(destDir, '/'))
   errAbort("Use full path name for nib file dir\n");

makeDir(destDir);
if ((!appendTbl) || !sqlTableExists(conn, tableName))
    createTable(conn);
for (i=0; i<faCount; ++i)
    {
    faName = faNames[i];
    splitPath(faName, dir, name, ext);
    sprintf(nibName, "%s/%s.nib", destDir, name);
    printf("Processing %s to %s\n", faName, nibName);
    if (preMadeNib)
        {
	FILE *nibFile;
	nibOpenVerify(nibName, &nibFile, &size);
	carefulClose(&nibFile);
	}
    else
	{
	seq = faReadDna(faName);
	if (seq != NULL)
	    {
	    size = seq->size;
	    uglyf("Read DNA\n");
	    nibWrite(seq, nibName);
	    uglyf("Wrote nib\n");
	    freeDnaSeq(&seq);
	    }
	}
    strcpy(chromName, chromPrefix);
    strcat(chromName, name);
    sqlSafef(query, sizeof query, "INSERT into %s VALUES('%s', %d, '%s')",
        tableName, chromName, size, nibName);
    sqlUpdate(conn,query);
    total += size;
    }
sqlDisconnect(&conn);
printf("%lu total bases\n", total);
}
Exemple #18
0
void freeAllSeq(struct dnaSeq **pList)
/* Free all sequences on list. */
{
struct dnaSeq *seq, *next;
if (*pList != NULL)
    {
    for (seq = *pList; seq != NULL; seq = next)
        {
        next = seq->next;
        freeDnaSeq(&seq);
        }
    *pList = NULL;
    }
}
static void doAChain(struct chain *chain, struct nibTwoCache *tSeqCache, struct nibTwoCache *qSeqCache,
                     FILE *f)
/* Convert one chain to an axt. */
{
struct dnaSeq *qSeq = loadSeqStrand(qSeqCache, chain->qName, chain->qStart, chain->qEnd, chain->qStrand);
struct dnaSeq *tSeq = loadSeqStrand(tSeqCache, chain->tName, chain->tStart, chain->tEnd, '+');
struct axt *axtList= chainToAxt(chain, qSeq, chain->qStart, tSeq, chain->tStart, maxGap, BIGNUM);
struct axt *axt = NULL;

for (axt = axtList; axt != NULL; axt = axt->next)
    {
    double idRatio = axtIdRatio(axt);
    if (minIdRatio <= idRatio)
        {
        if (bedOut)
            bedWriteAxt(axt, chain->qSize, chain->tSize, idRatio, f);
        else
            axtWrite(axt, f);
        }
    }
axtFreeList(&axtList);
freeDnaSeq(&qSeq);
freeDnaSeq(&tSeq);
}
Exemple #20
0
struct axt *netFillToAxt(struct cnFill *fill, struct dnaSeq *tChrom , int tSize,
	struct hash *qChromHash, char *nibDir,
	struct chain *chain, boolean swap)
/* Convert subset of chain as defined by fill to axt. swap query and target if swap is true*/
{
struct dnaSeq *qSeq;
boolean isRev = (chain->qStrand == '-');
struct chain *subChain, *chainToFree;
int qOffset;
struct axt *axtList = NULL , *axt;
struct nibInfo *nib = hashFindVal(qChromHash, fill->qName);

/* Get query sequence fragment. */
    {
    if (nib == NULL)
        {
	char path[512];
	AllocVar(nib);
	safef(path, sizeof(path), "%s/%s.nib", nibDir, fill->qName);
	nib->fileName = cloneString(path);
	nibOpenVerify(path, &nib->f, &nib->size);
	hashAdd(qChromHash, fill->qName, nib);
	}
    qSeq = nibLoadPartMasked(NIB_MASK_MIXED, nib->fileName, 
    	fill->qStart, fill->qSize);
    if (isRev)
	{
        reverseComplement(qSeq->dna, qSeq->size);
	qOffset = nib->size - (fill->qStart + fill->qSize);
	}
    else
	qOffset = fill->qStart;
    }
chainSubsetOnT(chain, fill->tStart, fill->tStart + fill->tSize, 
	&subChain, &chainToFree);
if (subChain != NULL)
    {
    axtList = chainToAxt(subChain, qSeq, qOffset, tChrom, fill->tStart, 100, BIGNUM);
    if (swap)
        {
        for (axt = axtList ; axt != NULL ; axt = axt->next)
            axtSwap(axt, tSize, nib->size);
        }
    }
chainFree(&chainToFree);
freeDnaSeq(&qSeq);
return axtList;
}
void randomEst(char *database, int count, char *output)
/* randomEst - Select random ESTs from database. */
{
struct sqlConnection *conn = sqlConnect(database);
struct sqlResult *sr;
char **row;
int i, elIx, okCount = 0;
struct slName *list = NULL, *el;
FILE *f = NULL;
char **array = NULL;
struct dnaSeq *seq;
struct hash *uniqHash = newHash(0);

hSetDb(database);
printf("Scanning database\n");
sr = sqlGetResult(conn, "select acc,type,direction from mrna");
while ((row = sqlNextRow(sr)) != NULL)
    {
    if (sameString(row[1], "EST") && sameString(row[2], "3"))
        {
	el = newSlName(row[0]);
	slAddHead(&list, el);
	++okCount;
	}
    }
sqlFreeResult(&sr);
printf("Got %d 3' ESTs\n", okCount);
AllocArray(array, okCount);
for (i=0, el = list; el != NULL; el = el->next, ++i)
    array[i] = el->name;

printf("Selecting %d to put into %s\n", count, output);
f = mustOpen(output, "w");
for (i=0; i<count; ++i)
    {
    char *name;
    elIx = rand()%okCount;
    name = array[elIx];
    if (!hashLookup(uniqHash, name))
	{
	hashAdd(uniqHash, name, NULL);
	seq = hRnaSeq(name);
	faWriteNext(f, seq->name, seq->dna, seq->size);
	freeDnaSeq(&seq);
	}
    }
}
Exemple #22
0
void chromFeatureSeq(struct sqlConnection *conn, 
	char *database, char *chrom, char *trackSpec,
	FILE *bedFile, FILE *faFile,
	int *retItemCount, int *retBaseCount)
/* Write out sequence file for features from one chromosome.
 * This separate routine handles the non-merged case.  It's
 * reason for being is so that the feature names get preserved. */
{
boolean hasBin;
char t[512], *s = NULL;
char table[HDB_MAX_TABLE_STRING];
struct featureBits *fbList = NULL, *fb;

if (trackSpec[0] == '!')
   errAbort("Sorry, '!' not available with fa output unless you use faMerge");
isolateTrackPartOfSpec(trackSpec, t);
s = strchr(t, '.');
if (s != NULL)
    errAbort("Sorry, only database (not file) tracks allowed with "
             "fa output unless you use faMerge");
// ignore isSplit return from hFindSplitTable()
(void) hFindSplitTable(database, chrom, t, table, &hasBin);
fbList = fbGetRangeQuery(database, trackSpec, chrom, 0, hChromSize(database, chrom),
			 where, TRUE, TRUE);
for (fb = fbList; fb != NULL; fb = fb->next)
    {
    int s = fb->start, e = fb->end;
    if (bedFile != NULL)
	{
	fprintf(bedFile, "%s\t%d\t%d\t%s", 
	    fb->chrom, fb->start, fb->end, fb->name);
	if (fb->strand != '?')
	    fprintf(bedFile, "\t0\t%c", fb->strand);
	fprintf(bedFile, "\n");
	}
    if (faFile != NULL)
        {
	struct dnaSeq *seq = hDnaFromSeq(database, chrom, s, e, dnaLower);
	if (fb->strand == '-')
	    reverseComplement(seq->dna, seq->size);
	faWriteNext(faFile, fb->name, seq->dna, seq->size);
	freeDnaSeq(&seq);
	}
    }
featureBitsFreeList(&fbList);
}
Exemple #23
0
void printExons(struct genePred *gene, struct dnaSeq *seq, FILE *f)
/* print the sequence from the exons */
{
int exonPos = 0;
int exonStart = 0;
int exonEnd = 0;
int size = 0;
int total = 0;
struct dnaSeq *exonOnlySeq;
int offset = 0;

verbose(3, "exonCount = %d\n", gene->exonCount);

// get length of exons
for (exonPos = 0; exonPos < gene->exonCount; exonPos++)
    {
    exonStart = gene->exonStarts[exonPos] - gene->txStart;
    exonEnd   = gene->exonEnds[exonPos] - gene->txStart;
    size = exonEnd - exonStart;
    assert (size > 0);
    total += size;
    }

// modeled after hgSeq.c
AllocVar(exonOnlySeq);
exonOnlySeq->dna = needLargeMem(total+1);
exonOnlySeq->size = total;

offset = 0;
for (exonPos = 0; exonPos < gene->exonCount; exonPos++)
    {
    exonStart = gene->exonStarts[exonPos] - gene->txStart;
    exonEnd   = gene->exonEnds[exonPos] - gene->txStart;
    size = exonEnd - exonStart;
    verbose(4, "size = %d\n", size);
    memcpy(exonOnlySeq->dna+offset, seq->dna+exonStart, size);
    offset += size;
    }

assert(offset == exonOnlySeq->size);
exonOnlySeq->dna[offset] = 0;
faWriteNext(f, gene->name, exonOnlySeq->dna, exonOnlySeq->size);
freeDnaSeq(&exonOnlySeq);

}
void runSamples(char *goodFile, char *badFile, char *newDb, char *oldDb, int numToRun)
/* run a bunch of tests */
{
int i,j,k;
FILE *good = mustOpen(goodFile, "w");
FILE *bad = mustOpen(badFile, "w");
char *tmp = NULL;
int numGood=0, numBad=0, tooManyNs=0;
boolean success = FALSE;
struct dnaSeq *seq = NULL;
printf("Running Tests\t");
for(i=0;i<numToRun;i++)
    {
    struct coordConvRep *ccr = NULL;
    struct coordConv *cc = NULL;
    if(!(i%10)) putTic();
    cc = getRandomCoord(oldDb);
    seq = hDnaFromSeq(cc->chrom, cc->chromStart, cc->chromEnd, dnaLower);
    if(!(strstr(seq->dna, "nnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnn")))
	{
	chrom = cc->chrom;
	chromStart = cc->chromStart;
	chromEnd = cc->chromEnd;
	success = convertCoordinates(good, bad, printReport, printReport);
	if(success)
	    numGood++;
	else
	    numBad++;
	}
    else 
	{
	tooManyNs++;
	}
    freeDnaSeq(&seq);
    coordConvFree(&cc);
    }
carefulClose(&good);
carefulClose(&bad);
printf("\tDone.\n");
printf("Out of %d attempts got %d 'succesfully converted' and %d 'had problems', %d had too many N's\n", 
       (numGood + numBad), numGood, numBad, tooManyNs);
printf("After checking got %d of %d correctly called and %d incorrectly called.\n", 
       hgTestCorrect, hgTestCorrect+hgTestWrong, hgTestWrong);
}
Exemple #25
0
static struct traceInfo* parseFastaRecord(FILE* fh, char* fastaName)
/* read the next fasta record akd create a traceInfo object.  This
 * parses the sequence id and comment for the read and clone name. */
{
struct dnaSeq* dna;
char* comment;
struct traceInfo* traceInfo;

if (!faReadNext(fh, NULL, 0, &comment, &dna))
    return NULL; /* EOF */

AllocVar(traceInfo);
traceInfo->ti = parseTraceId(dna->name, fastaName);
traceInfo->size = dna->size;
traceInfo->templateId = parseTemplateId(comment, fastaName);

freeMem(comment);
freeDnaSeq(&dna);
return traceInfo;
}
Exemple #26
0
static struct mafAli *getRefAli(char *database, char *chrom, int start, int end)
{
struct mafAli *ali;
struct mafComp *comp;
char buffer[1024];

AllocVar(ali);
AllocVar(comp);
ali->components = comp;
ali->textSize = end - start;

safef(buffer, sizeof buffer, "%s.%s", database, chrom);
comp->src = cloneString(buffer);
comp->start = start;
comp->strand = '+';
comp->size = end - start;
struct dnaSeq *seq = hChromSeqMixed(database, chrom, start , end);
comp->text = cloneString(seq->dna);
freeDnaSeq(&seq);

return ali;
}
static void checkExtRecord(struct seqFields *seq,
                           char *extPath)
/* Check the external file record for a sequence (slow). Assumes
 * that bounds have been sanity check for a file. */
{
/* read range into buffer */
FILE *fh = mustOpen(extPath, "r");
char *faBuf;
char accVer[GB_ACC_BUFSZ];
struct dnaSeq *dnaSeq;
if (fseeko(fh, seq->file_offset, SEEK_SET) < 0)
    {
    gbError("%s: can't seek %s", seq->acc, extPath);
    carefulClose(&fh);
    }
faBuf = needMem(seq->file_size+1);
mustRead(fh, faBuf, seq->file_size);
faBuf[seq->file_size] = '\0';
carefulClose(&fh);

/* verify contents */
if (faBuf[0] != '>')
    {
    gbError("%s: gbExtFile offset %lld doesn't start a fasta record: %s",
            seq->acc, (long long)seq->file_offset, extPath);
    free(faBuf);
    return;
    }
dnaSeq = faFromMemText(faBuf);
safef(accVer, sizeof(accVer), "%s.%d", seq->acc, seq->version);

if (!sameString(dnaSeq->name, accVer))
    gbError("%s: name in fasta header \"%s\" doesn't match expected \"%s\": %s",
            seq->acc, dnaSeq->name, accVer, extPath);
if (dnaSeq->size != seq->size)
    gbError("%s: size of fasta sequence (%d) doesn't match expected (%d): %s",
            seq->acc, dnaSeq->size, seq->size, extPath);
freeDnaSeq(&dnaSeq);
}
Exemple #28
0
void countCosmids(char *listFileName, FILE *out)
/* Read each cosmid in list file and find out how big it is. */
{
FILE *listFile = mustOpen(listFileName, "r");
char line[512], *s;
int lineCount;
struct dnaSeq *seq;
char path[512];

while (fgets(line, sizeof(line), listFile))
    {
    ++lineCount;
    s = trimSpaces(line);
    sprintf(path, "%s/%s", "C:/biodata/cbriggsae/finish", s);
    seq = faReadDna(path);
    ++cosmidCount;
    cosmidTotalSize += seq->size;
    freeDnaSeq(&seq);
    }
fclose(listFile);
cosmidAverageSize = round((double)cosmidTotalSize/cosmidCount);
fprintf(out, "%d cosmids, average length %d\n", cosmidCount, cosmidAverageSize);
}
void loadIfNewSeq(char *seqPath, boolean isTwoBit, char *newName, char strand, 
	char **pName, struct dnaSeq **pSeq, char *pStrand)
/* Load sequence unless it is already loaded.  Reverse complement
 * if necessary. */
{
struct dnaSeq *seq;
if (sameString(newName, *pName))
    {
    if (strand != *pStrand)
        {
	seq = *pSeq;
	reverseComplement(seq->dna, seq->size);
	*pStrand = strand;
	}
    }
else
    {
    char fileName[512];
    freeDnaSeq(pSeq);
    if (isTwoBit)
        {
	struct twoBitFile *tbf = twoBitOpenCached(seqPath);
	*pSeq = seq = twoBitReadSeqFrag(tbf, newName, 0, 0);
	verbose(1, "Loaded %d bases of %s from %s\n", seq->size, newName, seqPath);
	}
    else
	{
	snprintf(fileName, sizeof(fileName), "%s/%s.nib", seqPath, newName);
	*pSeq = seq = nibLoadAllMasked(NIB_MASK_MIXED, fileName);
	verbose(1, "Loaded %d bases in %s\n", seq->size, fileName);
	}
    *pName = newName;
    *pStrand = strand;
    if (strand == '-')
	reverseComplement(seq->dna, seq->size);
    }
}
void writeCassetteExon(struct bed *bedList, struct altGraphX *ag, int eIx, boolean *outputted, 
		       FILE *bedOutFile, FILE *outfile, FILE *html, float conf )
/* Write out the information for a cassette exon. */
{
int i = eIx;
struct bed *bed=NULL;
if(bedOutFile != NULL)
    bedTabOutN(bedList,12, bedOutFile);
writeBrowserLink(html, ag, conf, i);
if(!outputted)
    {
    altGraphXTabOut(ag, stdout);
    *outputted = TRUE;
    }
if(outfile != NULL)
    {
    struct dnaSeq *seq = hChromSeq(ag->tName, ag->vPositions[ag->edgeStarts[i]], ag->vPositions[ag->edgeEnds[i]]);
    if(sameString(ag->strand , "+")) 
	reverseComplement(seq->dna, seq->size);
    if(seq->size < 200)
	faWriteNext(outfile, seq->name, seq->dna, seq->size);
    freeDnaSeq(&seq);
    }
}