Esempio n. 1
0
static boolean checkSnapOk(struct vertex *vOld, struct vertex *vNew, boolean isRev, 
	int bleedSize, int maxUncheckedSize, struct nibTwoCache *seqCache, char *chromName)
/* Load sequence that corresponds to bleed-over, and  make sure that sequence of next
 * exon is similar. */
{
if (vertexPartOfUncuttableEdge(vOld, isRev))
    return FALSE;
if (bleedSize <= maxUncheckedSize)
    return TRUE;
int minScore = bleedSize-2;
boolean similar = FALSE;
if (isRev)
    {
    int oldStart = vOld->position;
    struct slRef *eRef;
    struct dnaSeq *oldSeq = nibTwoCacheSeqPartExt(seqCache, chromName, oldStart, bleedSize, FALSE, NULL);
    for (eRef = vNew->waysIn; eRef != NULL; eRef = eRef->next)
        {
	struct edge *edge = eRef->val;
	struct vertex *vRest = edge->start;
	int newStart = vRest->position - bleedSize;
	struct dnaSeq *newSeq = nibTwoCacheSeqPartExt(seqCache, chromName, newStart, bleedSize, FALSE, NULL);
	similar = checkSeqSimilar(oldSeq, newSeq, minScore);
	dnaSeqFree(&newSeq);
	if (similar)
	    break;
	}
    dnaSeqFree(&oldSeq);
    }
else
    {
    int oldStart = vOld->position - bleedSize;
    struct slRef *eRef;
    struct dnaSeq *oldSeq = nibTwoCacheSeqPartExt(seqCache, chromName, oldStart, bleedSize, FALSE, NULL);
    for (eRef = vNew->waysOut; eRef != NULL; eRef = eRef->next)
        {
	struct edge *edge = eRef->val;
	struct vertex *vRest = edge->end;
	int newStart = vRest->position;
	struct dnaSeq *newSeq = nibTwoCacheSeqPartExt(seqCache, chromName, newStart, bleedSize, FALSE, NULL);
	similar = checkSeqSimilar(oldSeq, newSeq, minScore);
	dnaSeqFree(&newSeq);
	if (similar)
	    break;
	}
    }
return similar;
}
Esempio n. 2
0
void snpMaskChrom(char *tableName, char *nibFile, char *outFile)
/* snpMaskChrom - Print a nib file as a fasta file, using IUPAC codes for single base substitutions. */
{
struct dnaSeq *seq;
char *ptr;
struct snpSimple *snps = NULL;
struct snpSimple *snp = NULL;
boolean inRep = FALSE;

seq = nibLoadAllMasked(NIB_MASK_MIXED, nibFile);
ptr = seq->dna;
snps = readSnpsFromChrom(tableName, chromName);

/* do all substitutions */

for (snp = snps; snp != NULL; snp = snp->next)
    {
    if (islower(ptr[snp->chromStart])) inRep = TRUE;
    else inRep = FALSE;
    ptr[snp->chromStart] = iupac(snp->name, snp->observed, ptr[snp->chromStart]);
    if (inRep)
        ptr[snp->chromStart] = tolower(ptr[snp->chromStart]);
    }

faWrite(outFile, chromName, seq->dna, seq->size);
snpSimpleFreeList(&snps);
dnaSeqFree(&seq);  

}
Esempio n. 3
0
void txgAnalyze(char *inTxg, char *dnaPath, char *outFile)
/* txgAnalyze - Analyse transcription graph for alt exons, alt 3', alt 5',
 * retained introns, alternative promoters, etc.. */
{
    struct lineFile *lf = lineFileOpen(inTxg, TRUE);
    FILE *f = mustOpen(outFile, "w");
    char *row[TXGRAPH_NUM_COLS];
    struct nibTwoCache *ntc = nibTwoCacheNew(dnaPath);
    struct dnaSeq *chrom = NULL;
    while (lineFileRow(lf, row))
    {
        struct txGraph *txg = txGraphLoad(row);
        if (chrom == NULL || !sameString(chrom->name, txg->tName))
        {
            dnaSeqFree(&chrom);
            chrom = nibTwoCacheSeq(ntc, txg->tName);
            verbose(2, "Loaded %s into %s\n", txg->tName, chrom->name);
        }
        struct range *exonsWithIntrons = retainedIntrons(txg, f);
        cassetteExons(txg, f);
        altThreePrime(txg, exonsWithIntrons, f);
        altFivePrime(txg, exonsWithIntrons, f);
        altPromoter(txg, f);
        strangeSplices(txg, chrom, f);
        bleedsIntoIntrons(txg, f);
        refSeparateButJoined(txg, f);
        if (fConst != NULL)
            constExons(txg, fConst);
        slFreeList(&exonsWithIntrons);
        txGraphFree(&txg);
    }
    carefulClose(&f);
}
Esempio n. 4
0
void readAllSts(FILE *asf) 
/* Read in current sequences for sts markers */
{
  struct dnaSeq *ds;
  struct sts *s;
  char *words[8], *acc=NULL, *line;
  int wordCount;

  while (faReadMixedNext(asf, 0, "default", TRUE, &line, &ds))
    {
      /* Determine the UCSC id */
      wordCount = chopByWhite(line, words, ArraySize(words));
      stripString(words[0], ">");
      if (wordCount == 3)
	acc = cloneString(words[2]);
      else
	acc = NULL;
      /* Find the record and attach */
      if (hashLookup(stsHash, ds->name))
	{
	  s = hashMustFindVal(stsHash, ds->name);
	  s->fa = ds;
	  s->faAcc = acc;
	  s->si->sequence = 1;
	}
      else 
	{
	  dnaSeqFree(&ds);
	  freez(&line);
	  if (acc != NULL)
	    freez(&acc);
	}
    }
}
Esempio n. 5
0
void pslRecalcMatch(char *inName, char *targetName, char *queryName, 
	char *outName)
/* pslRecalcMatch - Recalculate match,mismatch,repMatch columns in psl file.  
 * This can be useful if the psl went through pslMap, or if you've added 
 * lower-case repeat masking after the fact. */
{
struct nibTwoCache *tCache = nibTwoCacheNew(targetName);
struct dnaSeq *qSeqList = dnaLoadAll(queryName);
struct hash *qHash = dnaSeqHash(qSeqList);
struct psl *psl;
struct lineFile *lf = pslFileOpen(inName);
FILE *f = mustOpen(outName, "w");

while ((psl = pslNext(lf)) != NULL)
    {
    int tSize;
    struct dnaSeq *tSeqPart = nibTwoCacheSeqPart(tCache,
    	psl->tName, psl->tStart, psl->tEnd - psl->tStart, &tSize);
    struct dnaSeq *qSeq = hashMustFindVal(qHash, getQName(psl->qName));
    recalcMatches(psl, tSeqPart, psl->tStart, qSeq);
    pslTabOut(psl, f);
    dnaSeqFree(&tSeqPart);
    }
carefulClose(&f);
lineFileClose(&lf);
}
static void showMrnaFromGenePred(struct sqlConnection *conn, 
	char *geneId, char *geneName)
/* Get mRNA sequence for gene from gene prediction. */
{
char *table = genomeSetting("knownGene");
struct sqlResult *sr;
char **row;
char query[256];
boolean hasBin = hIsBinned(sqlGetDatabase(conn), table);

hPrintf("<TT><PRE>");
safef(query, sizeof(query), 
    "select * from %s where name='%s'"
    " and chrom='%s' and txStart=%d and txEnd=%d", 
    table, geneId, curGeneChrom, curGeneStart, curGeneEnd);
sr = sqlGetResult(conn, query);
if ((row = sqlNextRow(sr)) != NULL)
    {
    struct genePred *gene = genePredLoad(row+hasBin);
    struct bed *bed = bedFromGenePred(gene);
    struct dnaSeq *seq = hSeqForBed(sqlGetDatabase(conn), bed);
    hPrintf(">%s (%s predicted mRNA)\n", geneId, geneName);
    writeSeqWithBreaks(stdout, seq->dna, seq->size, 50);
    dnaSeqFree(&seq);
    bedFree(&bed);
    genePredFree(&gene);
    }
else
    errAbort("Couldn't find %s at %s:%d-%d", geneId, 
    	curGeneChrom, curGeneStart, curGeneEnd);
sqlFreeResult(&sr);
hPrintf("</TT></PRE>");
}
Esempio n. 7
0
struct dnaSeq *twoBitAndBedToSeq(struct twoBitFile *tbf, struct bed *bed)
/* Get sequence defined by bed.  Exclude introns. */
{
struct dnaSeq *seq;
if (bed->blockCount <= 1)
    {
    seq = twoBitReadSeqFrag(tbf, bed->chrom, bed->chromStart, bed->chromEnd);
    freeMem(seq->name);
    seq->name = cloneString(bed->name);
    }
else
    {
    int totalBlockSize = bedTotalBlockSize(bed);
    AllocVar(seq);
    seq->name = cloneString(bed->name);
    seq->dna = needMem(totalBlockSize+1);
    seq->size = totalBlockSize;
    int i;
    int seqOffset = 0;
    for (i=0; i<bed->blockCount; ++i)
        {
	int exonSize = bed->blockSizes[i];
	int exonStart = bed->chromStart + bed->chromStarts[i];
	struct dnaSeq *exon = twoBitReadSeqFrag(tbf, bed->chrom, exonStart, exonStart+exonSize);
	memcpy(seq->dna + seqOffset, exon->dna, exonSize);
	seqOffset += exonSize;
	dnaSeqFree(&exon);
	}
    }
if (bed->strand[0] == '-')
    reverseComplement(seq->dna, seq->size);
return seq;
}
Esempio n. 8
0
static void processRnaSeq(FILE *fh, struct sqlConnection *conn, struct refSeqVerInfo *rsvi)
/* get an RNA sequence, which already includes version in name */
{
struct dnaSeq *seq = hGenBankGetMrnaC(conn, rsvi->acc, NULL);
if (seq == NULL)
    errAbort("failed to get %s from database", rsvi->acc);
faWriteNext(fh, seq->name, seq->dna, seq->size);
dnaSeqFree(&seq);
}
Esempio n. 9
0
void outputOne(struct twoBitFile *tbf, char *seqSpec, FILE *f, 
	int start, int end)
/* Output sequence. */
{
struct dnaSeq *seq = twoBitReadSeqFrag(tbf, seqSpec, start, end);
if (noMask)
    toUpperN(seq->dna, seq->size);
faWriteNext(f, seq->name, seq->dna, seq->size);
dnaSeqFree(&seq);
}
void cassetteSeqFree(struct cassetteSeq **pCseq)
/* Free a cassetteSeq */
{
struct cassetteSeq *cseq = *pCseq;
dnaSeqFree(&cseq->seq);
bedFree(&cseq->bed);
freez(&cseq->name);
freez(&cseq->rightPrimer);
freez(&cseq->leftPrimer);
freez(&cseq);
pCseq = NULL;
}
Esempio n. 11
0
void retroShowCdnaAli(char *mappedId)
/* Show alignment for accession, mostly ripped off from htcCdnaAli */
{
char *track = cartString(cart, "aliTable");
struct trackDb *tdb = hashMustFindVal(trackHash, track);
char *table = cartString(cart, "table");
int start = cartInt(cart, "o");
struct sqlConnection *conn = hAllocConn(database);
struct sqlConnection *defDbConn = NULL;
struct mappingInfo *mi = mappingInfoNew(conn, table, mappedId);
struct genbankCds cds = getCds(conn, mi);
struct psl *psl;
struct dnaSeq *rnaSeq = NULL;
char *spec = trackDbRequiredSetting(tdb, BASE_COLOR_USE_SEQUENCE);
char *specCopy = cloneString(spec);
char *words[3];
int nwords = chopByWhite(specCopy, words, ArraySize(words));

char acc[512];

/* Print start of HTML. */
writeFramesetType();
puts("<HTML>");
printf("<HEAD>\n<TITLE>%s vs Genomic [%s]</TITLE>\n</HEAD>\n\n", mi->seqId, track);

/* Look up alignment and sequence in database.  Always get sequence
 * from defaultDb */
psl = loadAlign(conn, mi, start);
if (startsWith("August",mi->geneSet))
    safef(acc, sizeof(acc), "aug-%s.T1",mi->seqId);
else
    safef(acc, sizeof(acc), "%s",mi->seqId);
// value is: extFile seqTbl extFileTbl
if ((nwords != (int)ArraySize(words)) || !sameString(words[0], "extFile"))
    errAbort("invalid %s track setting: %s ", BASE_COLOR_USE_SEQUENCE, spec);
char *seqTable = words[1];
char *extTable = words[2];
rnaSeq = hDnaSeqGet(database, acc, seqTable, extTable);
if (rnaSeq == NULL)
    {
    char *acc = mi->seqId;
    rnaSeq = hExtSeq(database, acc);
    if (rnaSeq == NULL)
        errAbort("can't get mRNA sequence from %s prefix %s for %s from %s mappedId %s", 
            database, mi->geneSet, acc, track, mappedId);
    }
sqlDisconnect(&defDbConn);
showSomeAlignment(psl, rnaSeq, gftDna, 0, rnaSeq->size, NULL, cds.start, cds.end);
pslFree(&psl);
dnaSeqFree(&rnaSeq);
mappingInfoFree(&mi);
hFreeConn(&conn);
}
Esempio n. 12
0
void snpMaskGenes(char *nibFile, char *outFile)
/* snpMaskGenes - Print gene sequence, exons only, 
   using IUPAC codes for single base substitutions. */
{
struct genePred *genes = NULL;
struct genePred *gene = NULL;
struct dnaSeq *seq;
char *ptr;
struct snpSimple *snps = NULL;
struct snpSimple *snp = NULL;
int snpPos = 0;
int size = 0;
FILE *fileHandle = mustOpen(outFile, "w");

genes = readGenes(chromName);

for (gene = genes; gene != NULL; gene = gene->next)
    {
    verbose(4, "gene = %s\n", gene->name);

    snps = readSnpsFromGene(gene, chromName);

    size = gene->txEnd - gene->txStart;
    assert(size > 0);
    AllocVar(seq);
    seq->dna = needLargeMem(size+1);
    seq = nibLoadPartMasked(NIB_MASK_MIXED, nibFile, gene->txStart, size);

    ptr = seq->dna;

    /* do substitutions */
    /* including introns; doesn't take much time, keeps code clean */
    for (snp = snps; snp != NULL; snp = snp->next)
        {
	snpPos = snp->chromStart - gene->txStart;
	assert(snpPos >= 0);
	verbose(5, "before substitution %c\n", ptr[snpPos]);
        ptr[snpPos] = iupac(snp->name, snp->observed, ptr[snpPos]);
	verbose(5, "after substitution %c\n", ptr[snpPos]);
        }

    printExons(gene, seq, fileHandle);
    snpSimpleFreeList(&snps);
    dnaSeqFree(&seq);  
    }

geneFreeList(&genes);
if (fclose(fileHandle) != 0)
    errnoAbort("fclose failed");
}
static void blatzClient(char *input, char *output)
/* Send query message and dna to server and print result. */
{
struct dnaLoad *dl = dnaLoadOpen(input);
struct dnaSeq *seq;
FILE *f = mustOpen(output, "w");
static struct optionSpec options[] = {
   BZP_CLIENT_OPTIONS
};
int i;
while ((seq = dnaLoadNext(dl)) != NULL)
    {
    /* Connect */
    int sd = netMustConnect(host, port);
    FILE *sf = NULL;

    /* Send query command. */
    netSendString(sd, "query");

    /* Send options. */
    for (i=0; i<ArraySize(options); ++i)
        sendOption(sd, options[i].name);

    /* Send sequence. */
    if (optionExists("rna") || optionExists("unmask"))
        toUpperN(seq->dna, seq->size);
    else
	{
	if (seqIsLower(seq))
	    warn("Sequence %s is all lower case, and thus ignored. Use -unmask "
	         "flag to unmask lower case sequence.", seq->name);
	}
    netSendString(sd, "seq");
    netSendString(sd, seq->name);
    netSendHugeString(sd, seq->dna);
    verbose(1, "%s\n", seq->name);
    dnaSeqFree(&seq);

    /* Get and save response. */
    sf = netFileFromSocket(sd);
    copyOpenFile(sf, f);
    carefulClose(&sf);

    /* Close connection */
    close(sd);
    }
dnaLoadClose(&dl);
carefulClose(&f);
}
Esempio n. 14
0
void doFetch(char *inputFileName, char *sequenceFileName, char *outputFileName)
/* lookup sequence for each line */
{
struct lineFile *lf = NULL;
char *line;
char *row[6];
int elementCount;
struct twoBitFile *tbf;

char *fileChrom = NULL;
int start = 0;
int end = 0;
char *name = NULL;
int score = 0;
char *strand = NULL;

struct dnaSeq *chunk = NULL;

FILE *outputFileHandle = mustOpen(outputFileName, "w");

tbf = twoBitOpen(sequenceFileName);

lf = lineFileOpen(inputFileName, TRUE);
while (lineFileNext(lf, &line, NULL))
    {
    elementCount = chopString(line, "\t", row, ArraySize(row));
    if (elementCount != 6) continue;

    fileChrom = cloneString(row[0]);
    start = sqlUnsigned(row[1]);
    end = sqlUnsigned(row[2]);
    name = cloneString(row[3]);
    score = sqlUnsigned(row[4]);
    strand = cloneString(row[5]);

    if (start == end) continue;
    assert (end > start);

    chunk = twoBitReadSeqFrag(tbf, fileChrom, start, end);
    touppers(chunk->dna);
    if (sameString(strand, "-"))
        reverseComplement(chunk->dna, chunk->size);
    fprintf(outputFileHandle, "%s\t%d\t%d\t%s\t%d\t%s\t%s\n", fileChrom, start, end, name, score, strand, chunk->dna);
    dnaSeqFree(&chunk);
    }

lineFileClose(&lf);
carefulClose(&outputFileHandle);
}
Esempio n. 15
0
static double motifScoreHere(char *chrom, int start, int end,
	char *motifName, char *motifTable)
/* Return score of motif at given position. */
{
double score;
struct dnaSeq *seq = hDnaFromSeq(database, chrom, start, end, dnaLower);
struct dnaMotif *motif = loadDnaMotif(motifName, motifTable);
char strand = dnaMotifBestStrand(motif, seq->dna);
if (strand == '-')
    reverseComplement(seq->dna, seq->size);
score = dnaMotifBitScore(motif, seq->dna);
dnaMotifFree(&motif);
dnaSeqFree(&seq);
return score;
}
Esempio n. 16
0
void annoAssemblyGetSeq(struct annoAssembly *aa, char *seqName, uint start, uint end,
			char *buf, size_t bufSize)
/* Copy sequence to buf; bufSize must be at least end-start+1 chars in length. */
{
if (aa->curSeq == NULL || differentString(aa->curSeq->name, seqName))
    {
    dnaSeqFree(&aa->curSeq);
    aa->curSeq = twoBitReadSeqFragLower(aa->tbf, seqName, 0, 0);
    }
uint chromSize = aa->curSeq->size;
if (end > chromSize || start > chromSize || start > end)
    errAbort("annoAssemblyGetSeq: bad coords [%u,%u) (sequence %s size %u)",
	     start, end, seqName, chromSize);
safencpy(buf, bufSize, aa->curSeq->dna+start, end-start);
}
Esempio n. 17
0
static void processProtSeq(FILE *fh, struct sqlConnection *conn, struct refSeqVerInfo *rsvi, struct hash *doneProts)
/* get an protein sequence, which already includes version in name. Don't duplicate NPs */
{
char query[128];
sqlSafef(query, sizeof(query), "SELECT protAcc FROM refLink WHERE mrnaAcc = \"%s\"", rsvi->acc);
char *protAcc = sqlNeedQuickString(conn, query);
if (isNotEmpty(protAcc) && hashLookup(doneProts, protAcc) == NULL)
    {
    struct dnaSeq *seq = hGenBankGetPepC(conn, protAcc, NULL);
    if (seq == NULL)
        errAbort("failed to get %s from database", protAcc);
    faWriteNext(fh, seq->name, seq->dna, seq->size);
    dnaSeqFree(&seq);
    hashAdd(doneProts, protAcc, NULL);
    }
freeMem(protAcc);
}
struct cassetteSeq *cassetteSeqFromBed(struct bed *bed, int targetExon)
/* Consruct a cassetteSeq from a bed using the targetExon. */
{
struct cassetteSeq *cseq = NULL;
int i=0;
char buff[1024];
int bedSize=0;
int seqSize=0;
int targetStart=0;
/* Make sure the target exon is valid. */
if(targetExon >= bed->blockCount) 
    errAbort("pickCassettePcrPrimers::cassetteSeqFromBed() - Got request"
	     "for %d targetExon, but only %d exons present in %s\n", 
	     targetExon, bed->blockCount, bed->name);

AllocVar(cseq);
AllocVar(cseq->seq);
snprintf(buff, sizeof(buff), "%s:%d-%d_%s", bed->chrom, bed->chromStart, bed->chromEnd, bed->name);
cseq->name = cloneString(buff);
bedSize = countBedSize(bed) + 1;
cseq->seq->dna = needMem(sizeof(char)*bedSize);
cseq->bed = cloneBed(bed);
for(i=0; i<bed->blockCount; i++)
    {
    struct dnaSeq *seq = NULL;
    int chromStart =  bed->chromStarts[i] + bed->chromStart;
    int chromEnd = bed->blockSizes[i] + chromStart;
    seq = hChromSeq(bed->chrom, chromStart, chromEnd);
    sprintf(cseq->seq->dna+seqSize, "%s", seq->dna);
    if(targetExon == i)
	targetStart = seqSize;
    seqSize += bed->blockSizes[i];
    dnaSeqFree(&seq);
    }
cseq->seq->size = seqSize;
if(sameString(bed->strand, "-")) 
    {
    reverseComplement(cseq->seq->dna, cseq->seq->size);
    targetStart = cseq->seq->size - targetStart - bed->blockSizes[targetExon];
    }
cseq->targetStart = targetStart;
cseq->targetEnd = targetStart + bed->blockSizes[targetExon];
snprintf(cseq->strand, sizeof(cseq->strand), "%s", bed->strand);
return cseq;
}    
static void outputBlocks(FILE *pslOutFh, struct seqReader *seqReader, char *seqName, char strand,
                         int start, int end, int blockCount, unsigned *starts, unsigned *sizes)
/* output sequences for of the sequence columns */
{
int fullSeqSize = 0;;
struct dnaSeq *seq = seqReaderGet(seqReader, seqName, start, end, &fullSeqSize);
int seqOff = start;
if (strand == '-')
    {
    reverseComplement(seq->dna, seq->size);
    seqOff = fullSeqSize - end;
    }
int iBlk;
for (iBlk = 0; iBlk < blockCount; iBlk++)
    {
    mustWrite(pslOutFh, seq->dna+starts[iBlk]-seqOff, sizes[iBlk]);
    fputc(',', pslOutFh);
    }
dnaSeqFree(&seq);
}
void getAccMrna(char *acc, struct sqlConnection *conn, FILE *outFa)
/* get mrna for an accession */
{
HGID seqId;
char *faSeq;
struct dnaSeq *dna;
boolean cdsOk = TRUE;
char accBuf[512];
struct genbankCds cds;

faSeq = hGetSeqAndId(conn, acc, &seqId);
if (faSeq == NULL)
    {
    fprintf(stderr, "%s\tsequence not in database\n", acc);
    return;
    }
dna = faFromMemText(faSeq);

if (cdsUpper || peptides)
    cdsOk = getCds(conn, acc, dna->size, !cdsUpperAll, &cds);

if (cdsOk && cdsUpper)
    upperCaseCds(dna, &cds);
if ((cdsOk || cdsUpperAll) && inclVer)
    {
    int ver = getVersion(conn, acc);
    safef(accBuf, sizeof(accBuf), "%s.%d", acc, ver);
    acc = accBuf;
    }

if ((cdsOk || cdsUpperAll))
    {
    if (peptides)
        writePeptide(outFa, acc, dna, &cds);
    else
        faWriteNext(outFa, acc, dna->dna, dna->size);
    }

dnaSeqFree(&dna);
}
Esempio n. 21
0
static void processSeqsFromBed(struct twoBitFile *tbf, char *bedFileName, FILE *outFile)
/* Get sequences defined by beds.  Exclude introns. */
{
struct bed *bed, *bedList = bedLoadAll(bedFileName);
for (bed = bedList; bed != NULL; bed = bed->next)
    {
    struct dnaSeq *seq = twoBitAndBedToSeq(tbf, bed);
    char* seqName = NULL;
    if (clBedPos) 
        {
        char buf[1024];
        safef(buf, 1024, "%s:%d-%d", bed->chrom, bed->chromStart, bed->chromEnd);
        seqName = buf;
        }
    else
        seqName = seq->name;
    if (noMask)
        toUpperN(seq->dna, seq->size);
    faWriteNext(outFile, seqName, seq->dna, seq->size);
    dnaSeqFree(&seq);
    }
}
Esempio n. 22
0
void snpMask(char *nibFile, char *outFile)
/* snpMask - Print a nib file, using IUPAC codes for single base substitutions. */
{
struct dnaSeq *seq;
char *ptr;
struct snp *snps = NULL;
struct snp *snp = NULL;

seq = nibLoadAllMasked(NIB_MASK_MIXED, nibFile);
ptr = seq->dna;
snps = readSnpsFromChrom(chromName);
printf("got all snps in %s\n", chromName);

/* do all substitutions */
for (snp = snps; snp != NULL; snp = snp->next)
    {
    ptr[snp->chromStart] = iupac(snp->name, snp->observed, ptr[snp->chromStart]);
    }

if (printSnps)
    {
    for (snp = snps; snp != NULL; snp = snp->next)
        {
        printSnpSeq(snp, seq);
        }
    }

if (printChrom)
    faWrite(outFile, chromName, seq->dna, seq->size);

snpFreeList(&snps);

if (printGenes) doPrintGenes(chromName, seq);

dnaSeqFree(&seq);  

}
Esempio n. 23
0
void fillInEstInfo(struct estOrientInfo *ei, struct dnaSeq *est, struct dnaSeq *geno,
	struct psl *psl)
/* Fill in estOrientInfo struct by examining alignment and est and genomic
 * sequences.  Corrects EST according to genome and may reverse complement's 
 * est as a side effect. */
{
static char *signals[] = {"aataaa", "attaaa"};
int unAliSize;
struct dnaSeq* revEst;
ei->chrom = psl->tName;
ei->chromStart = psl->tStart;
ei->chromEnd = psl->tEnd;
ei->name = psl->qName;
ei->intronOrientation = findIntronOrientation(psl, geno);
ei->sizePolyA = countCharAtEnd('a', est->dna, est->size);
unAliSize = psl->qSize - psl->qEnd;
if (ei->sizePolyA > unAliSize)
    ei->sizePolyA = unAliSize;
unAliSize = psl->qStart;
ei->revSizePolyA = countCharAtStart('t', est->dna, est->size);
if (ei->revSizePolyA > unAliSize)
    ei->revSizePolyA = unAliSize;
correctEst(psl, est, geno);

/* Find poly A signal - first looking for most common signal, then
 * for less common signal on both strands. */
revEst = cloneDnaSeq(est);
reverseComplement(revEst->dna, est->size);
ei->signalPos = findSignalPos(psl, est, ei->sizePolyA, signals[0]);
ei->revSignalPos = findSignalPos(psl, revEst, ei->revSizePolyA, signals[0]);
if (ei->signalPos == 0 && ei->revSignalPos == 0)
    {
    ei->signalPos = findSignalPos(psl, est, ei->sizePolyA, signals[1]);
    ei->revSignalPos = findSignalPos(psl, revEst, ei->revSizePolyA, signals[1]);
    }
dnaSeqFree(&revEst);
}
void createFastaFilesForBits(char *root, struct genomeBit *gbList, boolean addDummy)
/* load all of the fasta records for the bits in the genome list into one
   fasta file. Uses .nib files as they are much more compact and allow random access. */
{
struct dnaSeq *seq = NULL;
struct genomeBit *gb = NULL;
FILE *faOut = NULL;
char *faFile = NULL;
char *nibFile = NULL;
int totalBp = 0;
assert(gbList);
faFile = fileNameFromGenomeBit(outputRoot, ".fa", gbList);
faOut = mustOpen(faFile, "w");
for(gb = gbList; gb != NULL; gb = gb->next)
    {
    char buff[256];
    snprintf(buff, sizeof(buff), "%s:%u-%u", gb->chrom, gb->chromStart, gb->chromEnd);
    nibFile = nibFileFromChrom(root, gb->chrom);
    seq = nibLoadPartMasked(NIB_MASK_MIXED, nibFile, gb->chromStart, gb->chromEnd-gb->chromStart);
    totalBp += strlen(seq->dna);

    faWriteNext(faOut, buff, seq->dna, seq->size);
    dnaSeqFree(&seq);
    freez(&nibFile);
    }
/* Add a dummy fasta record so that avid will order and orient things for us.. */
if(addDummy)
    faWriteNext(faOut, "garbage", "nnnnnnnnnn", 10);
carefulClose(&faOut);
/** This bit is commented out as we are now using nnnn's as repeat masking */
/*  if(slCount(gbList) > 1) */
/*      repeatMaskFile(outputRoot, gbList); */
/*  else */
/*    fakeRepeatMaskFile(outputRoot, gbList); */
freez(&faFile);
}
Esempio n. 25
0
struct altGraphX *agFromGp(char *db, struct genePred *gp, struct sqlConnection *conn, 
			   int maxGap, FILE *out)
/** Create an altGraphX record by clustering psl records within coordinates
    specified by genePred record. */
{
struct altGraphX *ag = NULL;
struct dnaSeq *genoSeq = NULL;
struct ggMrnaAli *maList=NULL, *ma=NULL, *maNext=NULL, *maSameStrand=NULL;
struct psl *pslList = NULL, *psl = NULL, *pslCluster = NULL, *pslNext = NULL;
char *chrom = gp->chrom;
int chromStart = BIGNUM;
int chromEnd = -1;

verbose(2, "agFromGp on %s %s:%d-%d\n", gp->name, gp->chrom, gp->txStart, gp->txEnd);

pslList = getPsls(gp, conn);
verbose(3, "  got %d psls\n", slCount(pslList));
if(slCount(pslList) == 0)
    {
    verbose(2, "No available alignments for %s.", gp->name);
    return NULL;
    }
/* expand to find the furthest boundaries of alignments */
expandToMaxAlignment(pslList, chrom, &chromStart, &chromEnd);
verbose(3, "  expanded to %s:%d-%d\n", chrom, chromStart, chromEnd);

/* get the sequence */
genoSeq = dnaFromChrom(db, chrom, chromStart, chromEnd, dnaLower);

for(psl = pslList; psl != NULL; psl = pslNext)
    {
    pslNext = psl->next;
    if(singleExonOk || pslHasIntron(psl, genoSeq, chromStart))
	{
	slAddHead(&pslCluster, psl);
	}
    else 
	{
	if(!useChromKeeper)
	    pslFree(&psl);
	}
    }
verbose(3, "  got %d psls after intron/singleExon check\n", slCount(pslCluster));
/* load and check the alignments */
maList = pslListToGgMrnaAliList(pslCluster, gp->chrom, chromStart, chromEnd, genoSeq, maxGap);
verbose(3, "  got %d in maList\n", slCount(maList));

for(ma = maList; ma != NULL; ma = maNext)
    {
    maNext = ma->next;
    verbose(4, "      ma->strand %s, gp->strand %s\n", ma->strand, gp->strand);
    if(ma->strand[0] == gp->strand[0])
	{
	slSafeAddHead(&maSameStrand, ma);
	}
    else
	ggMrnaAliFree(&ma);
    }
slReverse(&maSameStrand);

verbose(3, "  got %d in ma on same strand\n", slCount(maSameStrand));

/* If there is a cluster to work with create an geneGraph */
if(maSameStrand != NULL)
    {
    ag = agFromAlignments(db, maSameStrand, genoSeq, conn, chromStart, chromEnd,  out);
    }
else
    {
    dnaSeqFree(&genoSeq);
    ggMrnaAliFreeList(&maSameStrand);
    }

/* Only free psls if not using cache... */
if(!useChromKeeper)
    pslFreeList(&pslCluster);
return ag;
}
Esempio n. 26
0
/* Check a protein sequence, return FALSE if there is some reason it can't be
 * obtained or doesn't match */
static boolean faCheckProtRec(char *protAcc, short protVer, struct extFile* extFile,
                              off_t faOff, unsigned seqSize, unsigned recSize)
{
static const int extraBytes = 8;  /* extra bytes to read to allow checking next record */
int askSize = recSize+extraBytes;
int readSize;
char *faBuf, *p, gotAcc[GB_ACC_BUFSZ];
short gotVer;
struct dnaSeq *protSeq;
FILE *fh = mustOpen(extFile->path, "r");

/* bounds have already been check; so error if we can read the bytes */
if (fseeko(fh, faOff, SEEK_SET) < 0)
    errnoAbort("%s: can't seek to %lld in %s", protAcc, (long long)faOff, extFile->path);
faBuf = needMem(askSize+1);
readSize = fread(faBuf, 1, askSize, fh);
if (readSize < 0)
    errnoAbort("%s: read failed at %lld in %s", protAcc, (long long)faOff, extFile->path);
if (readSize < recSize)
    errAbort("%s: can't read %d bytes at %lld in %s", protAcc, recSize, (long long)faOff, extFile->path);
carefulClose(&fh);
faBuf[readSize] = '\0';

/* check that it starts with a '>' and that there are no extra bases after the
 * end of sequence */
if (faBuf[0] != '>')
    {
    gbVerbMsg(3, "%s: fasta record at %lld does not start with a '>': %s", protAcc, 
              (long long)faOff, extFile->path);
    freeMem(faBuf);
    return FALSE;
    }
p = skipLeadingSpaces(faBuf+recSize);
if (!((*p == '>') || (*p == '\0')))
    {
    gbVerbMsg(3, "%s: fasta record at %lld for %d has extra characters following the record: %s", protAcc, 
              (long long)faOff, recSize, extFile->path);
    freeMem(faBuf);
    return FALSE;
    }
protSeq = faSeqFromMemText(faBuf, FALSE);
gotVer = gbSplitAccVer(protSeq->name, gotAcc);
if (!(sameString(gotAcc, protAcc) && (gotVer == protVer)))
    {
    gbVerbMsg(3, "%s: expected sequence %s.%d, found %s.%d in fasta record at %lld : %s", protAcc,
              protAcc, protVer, gotAcc, gotVer, (long long)faOff, extFile->path);
    dnaSeqFree(&protSeq);
    return FALSE;
    }

if (protSeq->size != seqSize)
    {
    gbVerbMsg(3, "%s: expected sequence of %d chars, got %d from fasta record at %lld : %s", protAcc,
              seqSize, protSeq->size, (long long)faOff, extFile->path);
    dnaSeqFree(&protSeq);
    return FALSE;
    }

dnaSeqFree(&protSeq);
return TRUE;
}
void checkBedMatchesSeqs(struct cassetteSeq *cseq, struct bed *bed)
/* Pull the bed sequences out of the database and make sure that they
   match the primer sequences in cseq. Second sequence in bed is always
   reverse complemented, matching sequences flip depeneding on which
   strand we're on:

   If gene is on '+' strand
   ++++++++++++++++++++++++++++++++++++++++++++->
   <--------------------------------------------
   
   llllllllll->                    <-rrrrrrrrrr (rev-comp)
   
   If gene is on '-' strand
   ++++++++++++++++++++++++++++++++++++++++++++->
   <--------------------------------------------
   
   rrrrrrrrrr->                    <-llllllllll (rev-comp)
*/
{
struct dnaSeq *firstSeq = NULL;
struct dnaSeq *secSeq = NULL;
boolean goodFlag = TRUE;
firstSeq = hChromSeq(bed->chrom, bed->chromStart, bed->chromStart+bed->blockSizes[0]);
secSeq = hChromSeq(bed->chrom, bed->chromStart+bed->chromStarts[1], bed->chromStart + bed->chromStarts[1] + bed->blockSizes[1]);
if(sameString(bed->strand,"+")) 
    {
    reverseComplement(secSeq->dna, secSeq->size);
    if(differentString(firstSeq->dna, cseq->leftPrimer))
	goodFlag = FALSE;
    if(differentString(secSeq->dna, cseq->rightPrimer))
	goodFlag = FALSE;
    reverseComplement(secSeq->dna, secSeq->size);
    }
else
    {
    reverseComplement(secSeq->dna, secSeq->size);
    if(differentString(firstSeq->dna, cseq->rightPrimer))
	goodFlag = FALSE;
    if(differentString(secSeq->dna, cseq->leftPrimer))
	goodFlag = FALSE;
    reverseComplement(secSeq->dna, secSeq->size);
    }
if(goodFlag == FALSE) 
    {
    char *rpRev = cloneString(cseq->rightPrimer);
    char *lpRev = cloneString(cseq->leftPrimer);
    char *firstBlock = cloneString(firstSeq->dna);
    char *secBlock = cloneString(secSeq->dna);
    reverseComplement(rpRev, strlen(rpRev));
    reverseComplement(lpRev, strlen(lpRev));
    reverseComplement(firstBlock, strlen(firstBlock));
    reverseComplement(secBlock, strlen(secBlock));
    warn("Problem for bed; %s on strand %s", bed->name, bed->strand);
    warn("leftPrimer:\t%s\t%s", cseq->leftPrimer, lpRev);
    warn("rightPrimer:\t%s\t%s", cseq->rightPrimer, rpRev);
    warn("firstBlock:\t%s\t%s", firstSeq->dna, firstBlock);
    warn("secBlock:\t%s\t%s", secSeq->dna, secBlock);
    freez(&rpRev);
    freez(&lpRev);
    freez(&firstBlock);
    freez(&secBlock);
    }
dnaSeqFree(&firstSeq);
dnaSeqFree(&secSeq);
}
Esempio n. 28
0
void queryResponse(int sd, struct bzp *bzp, struct blatzIndex *indexList)
/* Respond to query message - read options and dna from socket,
 * and do alignment. */
{
struct bzp lbzp = *bzp;
struct dnaSeq *seq = NULL;
char buf[256], *line, *word;
char *out = NULL, *mafT = NULL, *mafQ = NULL;

/* First get options - overriding what got set at startup. */
for (;;)
    {
    if ((line = netGetString(sd, buf)) == NULL)
         {
         truncatedQuery(1);
         return;
         }
    word = nextWord(&line);
    line = skipLeadingSpaces(line);
    if (sameString(word, "seq"))
        break;
    else if (sameString(word, "rna"))
       lbzp.rna = TRUE;
    else if (sameString(word, "minScore"))
       lbzp.minScore = atoi(line);
    else if (sameString(word, "minGapless"))
       lbzp.minGapless = atoi(line);
    else if (sameString(word, "multiHits"))
       lbzp.multiHits = atoi(line);
    else if (sameString(word, "minChain"))
       lbzp.minChain = atoi(line);
    else if (sameString(word, "maxExtend"))
       lbzp.maxExtend = atoi(line);
    else if (sameString(word, "maxBandGap"))
       lbzp.maxBandGap = atoi(line);
    else if (sameString(word, "minExpand"))
       lbzp.minExpand = atoi(line);
    else if (sameString(word, "expandWindow"))
       lbzp.expandWindow = atoi(line);
    else if (sameString(word, "out"))
       lbzp.out = out = cloneString(line);
    else if (sameString(word, "mafQ"))
       lbzp.mafQ = mafQ = cloneString(line);
    else if (sameString(word, "mafT"))
       lbzp.mafT = mafT = cloneString(line);
    }

/* Get DNA into seq*/
    {
    char *name = netGetString(sd, buf);
    char *dna;
    if (name == NULL)
        {
        truncatedQuery(2);
        return;
        }
    dna = netGetHugeString(sd);
    if (dna == NULL)
        {
        truncatedQuery(3);
        return;
        }
    AllocVar(seq);
    seq->dna = dna;
    seq->size = strlen(dna);
    seq->name = cloneString(name);
    bzpTime("Received %d bases in %s", seq->size, seq->name);
    if (lbzp.rna)
        maskTailPolyA(seq->dna, seq->size);
    }

/* Create alignments into chainList and write results. */
    {
    FILE *f = netFileFromSocket(sd);
    struct chain *chainList = blatzAlign(&lbzp, indexList, seq);
    blatzWriteChains(&lbzp, &chainList, seq, 0, seq->size, seq->size, indexList, f);
    bzpTime("sent result - %d chains", slCount(chainList));
    carefulClose(&f);
    }

dnaSeqFree(&seq);
freez(&out);
freez(&mafQ);
freez(&mafT);
}
void snpValid()
/* Test snpMap --> dbSnpRs/affy for one assembly. */
{


char *Org;
char *dbSnpTbl = NULL;

struct dbSnpRs *dbSnps = NULL;
struct dbSnpRs *dbSnp = NULL;

struct affy10KDetails *affy10s = NULL;
struct affy10KDetails *affy10  = NULL;

struct affy120KDetails *affy120s = NULL;
struct affy120KDetails *affy120  = NULL;

struct axtScoreScheme *simpleDnaScheme = NULL;

int match = 0;         /* good match of minimal acceptable quality */
int mismatch = 0;      /* unacceptable match quality */
int missing = 0;       /* unable to find rsId in dbSnpRs/affy */
int goodrc = 0;        /* matches after reverse-complement */
int assemblyDash = 0;  /* assembly context is just a single dash - (complex cases) */
int gapNib = 0;        /* nib returns n's, we are in the gap */

int totalMatch = 0;
int totalMismatch = 0;
int totalMissing = 0;
int totalGoodrc = 0;
int totalAssemblyDash = 0;
int totalGapNib = 0;

boolean affy = FALSE;

int mode = 3;  
void *next = NULL;
char *id   = NULL;
char *seq  = NULL;
char affy120id[12];

int matchScore = 100;
int misMatchScore = 100;
int gapOpenPenalty = 400;
int gapExtendPenalty = 50;

int noDna = 0;
int snpMapRows = 0;


/* controls whether affy120k, affy10k, or dbSnpRs is used 
   currently affys are human only
*/
if (!hDbIsActive(db))
    {
    printf("Currently no support for db %s\n", db);
    return;
    }

hSetDb(db);

Org = hOrganism(db);

if (sameWord(Org,"Human"))
    affy = TRUE;


if (sameWord(Org,"Human"))
    dbSnpTbl = "dbSnpRsHg";
else if (sameWord(Org,"Mouse"))
    dbSnpTbl = "dbSnpRsMm";
else if (sameWord(Org,"Rat"))
    dbSnpTbl = "dbSnpRsRn";
else 
    {
    printf("Currently no support for Org %s\n", Org);
    return;
    }

simpleDnaScheme = axtScoreSchemeSimpleDna(matchScore, misMatchScore, gapOpenPenalty, gapExtendPenalty);

uglyf("dbSnp Table=%s \n",dbSnpTbl);

uglyf("Affy=%s \n", affy ? "TRUE" : "FALSE" );


dbSnps = readDbSnps(dbSnpTbl);
printf("read hgFixed.%s \n",dbSnpTbl);

if (affy)
    {
    affy10s = readAffy10();
    printf("read hgFixed.affy10KDetails \n");

    affy120s = readAffy120();
    printf("read hgFixed.affy120KDetails \n");
    }



int bogus = 0;

// debug
if (0) 
    {
    printf("rsId     assembly-sequence                     \n");
    printf("---------------------------------------------- \n");
    for (dbSnp = dbSnps; dbSnp != NULL; dbSnp = dbSnp->next)
	{
    	printf("%s %s \n",
	  dbSnp->rsId,
	  dbSnp->assembly
	  );
    
	// debug: cut it short for testing only
	if (++bogus > 1)
    	    break;
    
	}
    printf("\n");
    printf("\n");
    }
	

bogus=0;

struct slName *cns = hAllChromNames();
struct slName *cn=NULL;
if (!cns)
    {
    printf("testDb: hAllChromNames returned empty list \n");
    return;
    }


if (affy)
    {
    mode=1; /* start on affy120 with numbers in snpMap.rsId */
    }
else
    {
    mode=2; /* start on dbSnps with "rs*" in snpMap.rsId */
    }
    
for (cn = cns; cn != NULL; cn = cn->next)
    {
    struct dnaSeq *chromSeq = NULL;
    struct snpMap *snps = NULL;
    struct snpMap *snp = NULL;

    if (chr != NULL)
	if (!sameWord(chr,cn->name))
	    continue;

    //uglyf("testDb: beginning chrom %s \n",cn->name);
   
    chromSeq = hLoadChrom(cn->name);
    printf("testDb: chrom %s :  size (%u) \n",cn->name,chromSeq->size);
    
    snps = readSnps(cn->name);
    printf("read %s.snpMap where chrom=%s \n",db,cn->name);

        
    dbSnp   = dbSnps; 
    affy10  = affy10s;
    affy120 = affy120s;
    
    printf("=========================================================\n");
    for (snp = snps; snp != NULL; snp = snp->next)
	{
	int cmp = -1;
	char *nibDna=NULL;
	char *nibDnaRc=NULL;

	++snpMapRows;

	
	/* 
    	printf("%s %s %u %u %s\n",
	  snp->name,
	  snp->chrom,
	  snp->chromStart,
	  snp->chromEnd,
	  nibDna
	  );
	*/

	
        while (cmp < 0)
	    {
	    while (cmp < 0)
		{
    		switch (mode)
		    {
		    case 1:
			next = affy120; break;
		    case 2:
			next = dbSnp; break;
		    case 3:
			next = affy10; break;
		    }
		if (next == NULL) 
		    {
		    switch (mode)
			{
			case 1:
			    ++mode; break;
			case 2:
			    ++mode; break;
			case 3:
			    cmp = 1; break;
			}
		    }
		else
		    {
		    break;
		    }
		}
		
	    if (cmp < 0)
		{
		switch (mode)
		    {
		    case 1:
			safef(affy120id, sizeof(affy120id), "%d", affy120->affyId); /* have int type but want string */
			id = affy120id;
			break;
		    case 2:
			id = dbSnp->rsId; break;
		    case 3:
			id = affy10->affyId; break;
		    }
		cmp=mystrcmp(id, snp->name);
		}
		
	    if (cmp < 0) 
		{
		switch (mode)
		    {
		    case 1:
			affy120 = affy120->next; break;
		    case 2:
			dbSnp = dbSnp->next; break;
		    case 3:
			affy10 = affy10->next; break;
		    }
		}
	    }	
	    

	if (cmp==0) 
	    {
	    int strand=1;
	    char *rc = NULL;
	    int m = 0;
	    int lf = 0;  /* size of left flank context (lower case dna) */
	    int rf = 0;  /* size of right flank context (lower case dna) */
	    int ls = 0;  /* total size of assembly dna context plus actual region in dbSnpRs/affy */
	    char *origSeq = NULL; /* use to display the original dnSnpRs.assembly seq */
	    
	    switch (mode)
		{
		case 1:
		    seq = affy120->sequenceA; break;
		case 2:
		    seq = dbSnp->assembly; break;
		case 3:
		    seq = affy10->sequenceA; break; 
		}
		
            if (sameString(seq,"-"))
		{
		++assemblyDash;
		if (Verbose)
		printf("(no assembly context) rsId=%s chrom=%s %u %u \n assembly=%s \n\n",
		  id,
		  snp->chrom,
		  snp->chromStart,
		  snp->chromEnd,
		  seq
		  );
		continue;
		}
	
	    origSeq = seq;
	    lf = leftFlank(origSeq);
	    rf = rightFlank(origSeq);
	    seq = cloneString(origSeq);
	    stripDashes(seq);      /* remove dashes indicating insert to simplify and correct processing of nib data */
            ls = strlen(seq);      /* used to be: lengthOneDash(seq); */
	    
	    
	    //debug
	    //uglyf("about to call checkandFetchNib origSeq=%s lf=%d, rf=%d ls=%d \n", origSeq, lf, rf, ls);
	
	    nibDna = checkAndFetchNib(chromSeq, snp, lf, ls);
	    if (nibDna==NULL) 
		{
		++noDna;
		printf("no dna for %s %s %u %u \n",
		    snp->name,
	  	    snp->chrom,
		    snp->chromStart,
	  	    snp->chromEnd
		    );
		continue;
		}
	    
	    //debug
	    //uglyf("got past checkandFetchNib call: \n nibDna=%s  \n",nibDna);
	
            if (allNs(nibDna))
		{
		++gapNib;
		++mismatch;
		if (Verbose)
		printf("(nib gap) rsId=%s chrom=%s %u %u \n assembly=%s \n  snpMap=%s \n\n",
		  id,
		  snp->chrom,
		  snp->chromStart,
		  snp->chromEnd,
		  seq,
		  nibDna
		  );
		continue;
		}
		
	    m = misses(seq,nibDna);
	    if (m > 1)
		{
	    
		//debug
    		//uglyf("rc: about to call checkandFetchNib \n");
	
		rc = checkAndFetchNib(chromSeq, snp, rf, ls);
		if (rc==NULL) 
		    {
		    ++noDna;
		    printf("no dna for %s %s %u %u \n",
			snp->name,
			snp->chrom,
			snp->chromStart,
			snp->chromEnd
			);
		    continue;
		    }
	    
		//debug
		//uglyf("rc: got past checkandFetchNib call: \n rc Dna=%s  \n",rc);
	
		reverseComplement(rc,strlen(rc));
		int n = misses(seq, rc);
		if (n < m) 
		    {
		    strand=-1;
		    m = n;
		    }
		}
	    if (m <= 1)
		{
		++match;
		if (strand < 1)
		  ++goodrc;
		}
	    else
		{
		struct dnaSeq query, target;
		struct axt *axtAln = NULL;
		int bestScore = 0; 
		ZeroVar(&query);
		query.dna = seq;
		query.size = strlen(query.dna);
		
		ZeroVar(&target);
		target.dna = nibDna;
		target.size = strlen(target.dna);
		axtAln = axtAffine(&query, &target, simpleDnaScheme);
		strand = 1;
		if (axtAln) 
		    {
		    bestScore = axtAln->score / ls;
		    }
		axtFree(&axtAln);
		
		if (bestScore < threshold)
		    {
		    ZeroVar(&target);
		    target.dna = rc;
		    target.size = strlen(target.dna);
		    axtAln = axtAffine(&query, &target, simpleDnaScheme);
		    if ((axtAln) && (bestScore < (axtAln->score / ls)))
			{
			strand = -1;
			bestScore = axtAln->score / ls;
			}
		    axtFree(&axtAln);
		    }
		
		if (bestScore >= threshold)
		    {
    		    ++match;
		    if (strand < 1)
      			++goodrc;
		    }
		else
		    {
    		    ++mismatch;
		    }
		
		if ((bestScore < threshold) || Verbose) 
		    {
		    printf(
			"score=%d misses=%u strand=%d rsId=%s chrom=%s %u %u lf=%d ls=%d \n"
			" assembly=%s \n"
			"   snpMap=%s \n"
			"rc snpMap=%s \n"
			"\n",
		      bestScore,
		      m,
		      strand,
		      id,
		      snp->chrom,
		      snp->chromStart,
		      snp->chromEnd,
		      lf,
		      ls,
		      seq,
		      nibDna,
		      rc
		      );
		     } 
		
		}
		
	    freez(&rc);
	    freez(&seq);
	
	    }
	else
	    {
	    char snpLkup[10] = "";
	    /* this id is missing from dbSnpRs/affy! */
	    ++missing;
	    switch (mode)
		{
		case 1:
		    safef(snpLkup,sizeof(snpLkup),"%s","affy120"); break;
		case 2:
		    safef(snpLkup,sizeof(snpLkup),"%s",dbSnpTbl); break;
		case 3:
		    safef(snpLkup,sizeof(snpLkup),"%s","affy10"); break;
		}
	    if (Verbose)		    
    		printf("snpMap.name=%s is missing from %s (now at %s) \n\n",snp->name,snpLkup,id);
	    }
	
	
	freez(&nibDna);
    
	// debug: cut it short for testing only
	//break;
    
	}
    snpMapFreeList(&snps);

    dnaSeqFree(&chromSeq);  

    printf("\n\n\n Total matches for chrom %s:\n ",cn->name);
    printf("             matches: %u \n ",match);
    printf("          mismatches: %u \n",mismatch);
    printf("missing from dbSnpRs: %u \n",missing);
    printf("   rev compl matches: %u \n",goodrc);
    printf("        assembly = -: %u \n",assemblyDash);
    printf("         nib in gap : %u \n",gapNib);
     
    printf("\n\n=========================================\n");
    
    totalMatch    += match;
    totalMismatch += mismatch;
    totalMissing  += missing;
    totalGoodrc   += goodrc;
    totalAssemblyDash += assemblyDash;
    totalGapNib   += gapNib;
    
    match        = 0;
    mismatch     = 0;
    missing      = 0;
    goodrc       = 0;
    assemblyDash = 0;
    gapNib       = 0;
    // debug: cut it to just one or two chrom for testing
    //if (++bogus > 1)
    //    break;
    
    printf("\n");
    printf("\n");
    
    }

slFreeList(&cns);


dbSnpRsFreeList(&dbSnps);
if (affy) 
    {
    affy10KDetailsFreeList(&affy10s);
    affy120KDetailsFreeList(&affy120s);
    }

axtScoreSchemeFree(&simpleDnaScheme);

printf("\n\n\n Grand Totals:  \n ");
printf("             matches: %u \n ",totalMatch);
printf("          mismatches: %u \n",totalMismatch);
printf("missing from dbSnpRs: %u \n",totalMissing);
printf("   rev compl matches: %u \n",totalGoodrc);
printf("        assembly = -: %u \n",totalAssemblyDash);
printf("         nib in gap : %u \n",totalGapNib);


printf("\n       Total rows in snpMap: %u \n ",snpMapRows);
printf("\n        # no dna found for : %u \n ",noDna);

printf("\n\n=========================================\n");

}
void searchOneIndex(int fileCount, char *files[], struct genoFind *gf, char *outName, 
	boolean isProt, struct hash *maskHash, FILE *outFile, boolean showStatus)
/* Search all sequences in all files against single genoFind index. */
{
int i;
char *fileName;
int count = 0; 
long long totalSize = 0;

gfOutputHead(gvo, outFile);
for (i=0; i<fileCount; ++i)
    {
    fileName = files[i];
    if (nibIsFile(fileName))
        {
	struct dnaSeq *seq;

	if (isProt)
	    errAbort("%s: Can't use .nib files with -prot or d=prot option\n", fileName);
	seq = nibLoadAllMasked(NIB_MASK_MIXED, fileName);
	freez(&seq->name);
	seq->name = cloneString(fileName);
	searchOneMaskTrim(seq, isProt, gf, outFile,
			  maskHash, &totalSize, &count);
	freeDnaSeq(&seq);
	}
    else if (twoBitIsSpec(fileName))
	{
	struct twoBitSpec *tbs = twoBitSpecNew(fileName);
	struct twoBitFile *tbf = twoBitOpen(tbs->fileName);
	if (isProt)
	    errAbort("%s is a two bit file, which doesn't work for proteins.", 
	    	fileName);
	if (tbs->seqs != NULL)
	    {
	    struct twoBitSeqSpec *ss = NULL;
	    for (ss = tbs->seqs;  ss != NULL;  ss = ss->next)
		{
		struct dnaSeq *seq = twoBitReadSeqFrag(tbf, ss->name,
						       ss->start, ss->end);
		searchOneMaskTrim(seq, isProt, gf, outFile,
				  maskHash, &totalSize, &count);
		dnaSeqFree(&seq);
		}
	    }
	else
	    {
	    struct twoBitIndex *index = NULL;
	    for (index = tbf->indexList; index != NULL; index = index->next)
		{
		struct dnaSeq *seq = twoBitReadSeqFrag(tbf, index->name, 0, 0);
		searchOneMaskTrim(seq, isProt, gf, outFile,
				  maskHash, &totalSize, &count);
		dnaSeqFree(&seq);
		}
	    }
	twoBitClose(&tbf);
	}
    else
        {
	static struct dnaSeq seq;
	struct lineFile *lf = lineFileOpen(fileName, TRUE);
	while (faMixedSpeedReadNext(lf, &seq.dna, &seq.size, &seq.name))
	    {
	    searchOneMaskTrim(&seq, isProt, gf, outFile,
			      maskHash, &totalSize, &count);
	    }
	lineFileClose(&lf);
	}
    }
carefulClose(&outFile);
if (showStatus)
    printf("Searched %lld bases in %d sequences\n", totalSize, count);
}