Example #1
0
void readAllSts(FILE *asf) 
/* Read in current sequences for sts markers */
{
  struct dnaSeq *ds;
  struct sts *s;
  char *words[8], *acc=NULL, *line;
  int wordCount;

  while (faReadMixedNext(asf, 0, "default", TRUE, &line, &ds))
    {
      /* Determine the UCSC id */
      wordCount = chopByWhite(line, words, ArraySize(words));
      stripString(words[0], ">");
      if (wordCount == 3)
	acc = cloneString(words[2]);
      else
	acc = NULL;
      /* Find the record and attach */
      if (hashLookup(stsHash, ds->name))
	{
	  s = hashMustFindVal(stsHash, ds->name);
	  s->fa = ds;
	  s->faAcc = acc;
	  s->si->sequence = 1;
	}
      else 
	{
	  dnaSeqFree(&ds);
	  freez(&line);
	  if (acc != NULL)
	    freez(&acc);
	}
    }
}
Example #2
0
struct dnaSeq *readSeqFromFaPos(struct seqFilePos *sfp,  FILE *f)
/* Read part of FA file. */
{
struct dnaSeq *seq;
fseek(f, sfp->pos, SEEK_SET);
if (!faReadMixedNext(f, TRUE, "", TRUE, NULL, &seq))
    errAbort("Couldn't faReadNext on %s in %s\n", sfp->name, sfp->file);
return seq;
}
Example #3
0
boolean faReadNext(FILE *f, char *defaultName, boolean mustStartWithComment,
                         char **retCommentLine, struct dnaSeq **retSeq) 
/* Read next sequence from .fa file. Return sequence in retSeq.  
 * If retCommentLine is non-null
 * return the '>' line in retCommentLine.   
 * The whole thing returns FALSE at end of file.  
 * DNA chars are mapped to lower case.*/
{
    return faReadMixedNext(f, 0, defaultName, mustStartWithComment,
                                        retCommentLine, retSeq);
}
Example #4
0
void readDbstsFa(FILE *dff)
/* Read in sequences from dbSTS.fa and add, if possible */
{
  struct dnaSeq *ds;
  struct sts *s;
  struct gb *gb;
  char name[256], *line;

  while (faReadMixedNext(dff, 0, "default", TRUE, &line, &ds))
    {
      /* Determine the UCSC id */
      if (hashLookup(gbAccHash, ds->name)) 
	{
	  /* Determine if this is linked to a marker */
	  gb = hashMustFindVal(gbAccHash, ds->name);
	  if (gb->s != NULL) 
	    {
	      /* If no recorded sequence, then add */ 
	      s = gb->s;
	      if (s->fa == NULL) 
		{
		  s->faAcc = cloneString(ds->name);
		  safef(name, ArraySize(name), "%d", s->si->identNo);
		  ds->name = cloneString(name);
		  s->fa = ds;
		  s->si->sequence = 1;
		} 
	      /* If no accession recorded, see if sequences are the same */
	      else if (s->faAcc == NULL) 
		{
		  if (sameString(s->fa->dna, ds->dna))
		    {
		      s->faAcc = cloneString(ds->name);
		      s->si->sequence = 1;		  
		    }
		  freeDnaSeq(&ds);
		}	  
	      /* If same accession as recorded, the update sequence */
	      else if (sameString(s->faAcc, ds->name))
		{
		  ds->name = cloneString(s->fa->name);
		  freeDnaSeq(&s->fa);
		  s->fa = ds;
		  s->si->sequence = 1;
		}
	      else
		freeDnaSeq(&ds);	    
	    }
	  else
	    freeDnaSeq(&ds);	    
	}
    }
}
void axtChain(char *axtIn, char *tNibDir, char *qNibDir, char *chainOut)
/* axtChain - Chain together axt alignments.. */
{
struct hash *pairHash = newHash(0);  /* Hash keyed by qSeq<strand>tSeq */
struct seqPair *spList = NULL, *sp;
FILE *f = mustOpen(chainOut, "w");
char *qName = "",  *tName = "";
struct dnaSeq *qSeq = NULL, *tSeq = NULL;
char qStrand = 0, tStrand = 0;
struct chain *chainList = NULL, *chain;
FILE *details = NULL;
struct dnaSeq *seq, *seqList = NULL;
struct hash *faHash = newHash(0);
struct hash *tFaHash = newHash(0);
FILE *faF;
boolean qIsTwoBit = twoBitIsFile(qNibDir);
boolean tIsTwoBit = twoBitIsFile(tNibDir);

axtScoreSchemeDnaWrite(scoreScheme, f, "axtChain");

if (detailsName != NULL)
    details = mustOpen(detailsName, "w");
/* Read input file and divide alignments into various parts. */
if (optionExists("psl"))
    spList = readPslBlocks(axtIn, pairHash, f);
else
    spList = readAxtBlocks(axtIn, pairHash, f);

if (optionExists("faQ"))
    {
    faF = mustOpen(qNibDir, "r");
    while ( faReadMixedNext(faF, TRUE, NULL, TRUE, NULL, &seq))
        {
        hashAdd(faHash, seq->name, seq);
        slAddHead(&seqList, seq);
        }
    fclose(faF);
    }
if (optionExists("faT"))
    {
    faF = mustOpen(tNibDir, "r");
    while ( faReadMixedNext(faF, TRUE, NULL, TRUE, NULL, &seq))
        {
        hashAdd(tFaHash, seq->name, seq);
        slAddHead(&seqList, seq);
        }
    fclose(faF);
    }
for (sp = spList; sp != NULL; sp = sp->next)
    {
    slReverse(&sp->blockList);
    removeExactOverlaps(&sp->blockList);
    verbose(1, "%d blocks after duplicate removal\n", slCount(sp->blockList));
    if (optionExists("faQ"))
        {
        assert (faHash != NULL);
        loadFaSeq(faHash, sp->qName, sp->qStrand, &qName, &qSeq, &qStrand);
        }
    else
	{
        loadIfNewSeq(qNibDir, qIsTwoBit, sp->qName, sp->qStrand, 
		&qName, &qSeq, &qStrand);
        }
    if (optionExists("faT"))
        {
        assert (tFaHash != NULL);
        loadFaSeq(tFaHash, sp->tName, '+', &tName, &tSeq, &tStrand);
        }
    else 
	{
        loadIfNewSeq(tNibDir, tIsTwoBit, sp->tName, '+', 
		&tName, &tSeq, &tStrand);
	}
    chainPair(sp, qSeq, tSeq, &chainList, details);
    }
slSort(&chainList, chainCmpScore);
for (chain = chainList; chain != NULL; chain = chain->next)
    {
    assert(chain->qStart == chain->blockList->qStart 
	&& chain->tStart == chain->blockList->tStart);
    chainWrite(chain, f);
    }

carefulClose(&f);
}
void doChainScore(char *chainIn, char *tNibDir, char *qNibDir, char *chainOut)
{
char qStrand = 0, tStrand = 0;
struct dnaSeq *qSeq = NULL, *tSeq = NULL;
char *qName = "",  *tName = "";
FILE *f = mustOpen(chainOut, "w");
struct chain *chainList = NULL, *chain;
struct chain *inputChains, *next;
FILE *details = NULL;
struct lineFile *lf = NULL;
struct dnaSeq *seq, *seqList = NULL;
struct hash *faHash = newHash(0);
struct hash *chainHash = newHash(0);
char comment[1024];
FILE *faF;
struct seqPair *spList = NULL, *sp;
struct dyString *dy = newDyString(512);
struct lineFile *chainsLf = lineFileOpen(chainIn, TRUE);

while ((chain = chainRead(chainsLf)) != NULL)
    {
    dyStringClear(dy);
    dyStringPrintf(dy, "%s%c%s", chain->qName, chain->qStrand, chain->tName);
    sp = hashFindVal(chainHash, dy->string);
    if (sp == NULL)
        {
	AllocVar(sp);
	slAddHead(&spList, sp);
	hashAddSaveName(chainHash, dy->string, sp, &sp->name);
	sp->qName = cloneString(chain->qName);
	sp->tName = cloneString(chain->tName);
	sp->qStrand = chain->qStrand;
	}
    slAddHead(&sp->chain, chain);
    }
slSort(&spList, seqPairCmp);
lineFileClose(&chainsLf);

if (optionExists("faQ"))
    {
    faF = mustOpen(qNibDir, "r");
    while ( faReadMixedNext(faF, TRUE, NULL, TRUE, NULL, &seq))
        {
        hashAdd(faHash, seq->name, seq);
        slAddHead(&seqList, seq);
        }
    fclose(faF);
    }
for (sp = spList; sp != NULL; sp = sp->next)
    {
    if (optionExists("faQ"))
        {
        assert (faHash != NULL);
        loadFaSeq(faHash, sp->qName, sp->qStrand, &qName, &qSeq, &qStrand);
        }
    else
        loadIfNewSeq(qNibDir, sp->qName, sp->qStrand, &qName, &qSeq, &qStrand);
    loadIfNewSeq(tNibDir, sp->tName, '+', &tName, &tSeq, &tStrand);
    scorePair(sp, qSeq, tSeq, &chainList, sp->chain);
    }


slSort(&chainList, chainCmpScore);
for (chain = chainList; chain != NULL; chain = chain->next)
    {
    assert(chain->qStart == chain->blockList->qStart 
	&& chain->tStart == chain->blockList->tStart);
    chainWrite(chain, f);
    }

carefulClose(&f);
}