Esempio n. 1
0
void scanGenes(char *chromName)
{
char query[512];
struct sqlConnection *conn = hAllocConn();
struct sqlResult *sr;
char **row;
struct hashEl *hel = NULL;
struct slName *snpPtr = NULL;
struct slName *snpList = NULL;
char *geneName = NULL;

sqlSafef(query, sizeof(query), "select name, txStart, txEnd from refGene where chrom = '%s'", chromName);
sr = sqlGetResult(conn, query);
while ((row = sqlNextRow(sr)) != NULL)
    {
    geneName = cloneString(row[0]);
    snpList = readSnps(chromName, sqlUnsigned(row[1]), sqlUnsigned(row[2]));
    for (snpPtr = snpList; snpPtr != NULL; snpPtr = snpPtr->next)
        {
        hel = hashLookup(contigHash, snpPtr->name);
	if (hel == NULL)
	    verbose(1, "no function for %s\n", snpPtr->name);
	else if (!sameString(hel->val, geneName))
	    verbose(1, "mismatch for %s: refGene = %s, mrna_acc = %s\n", snpPtr->name, geneName, hel->val);
	}
    }
sqlFreeResult(&sr);
hFreeConn(&conn);
}
Esempio n. 2
0
int main(int argc, char *argv[])
/* Read snp126. */
/* Write coords of multiple alignments to .tab file. */
/* Write counts to .log file. */
{

if (argc != 2)
    usage();

snpDb = argv[1];
hSetDb(snpDb);

outputFileHandle = mustOpen("snpMultiple.tab", "w");
logFileHandle = mustOpen("snpMultiple.log", "w");

readSnps();
writeResults();

// free hashes

carefulClose(&outputFileHandle);
carefulClose(&logFileHandle);

return 0;
}
int main(int argc, char *argv[])
/* Read snp125. */
{

if (argc != 2)
    usage();

snpDb = argv[1];
hSetDb(snpDb);
readSnps();
checkExceptions();
return 0;
}
void snpValid()
/* Test snpMap --> dbSnpRs/affy for one assembly. */
{


char *Org;
char *dbSnpTbl = NULL;

struct dbSnpRs *dbSnps = NULL;
struct dbSnpRs *dbSnp = NULL;

struct affy10KDetails *affy10s = NULL;
struct affy10KDetails *affy10  = NULL;

struct affy120KDetails *affy120s = NULL;
struct affy120KDetails *affy120  = NULL;

struct axtScoreScheme *simpleDnaScheme = NULL;

int match = 0;         /* good match of minimal acceptable quality */
int mismatch = 0;      /* unacceptable match quality */
int missing = 0;       /* unable to find rsId in dbSnpRs/affy */
int goodrc = 0;        /* matches after reverse-complement */
int assemblyDash = 0;  /* assembly context is just a single dash - (complex cases) */
int gapNib = 0;        /* nib returns n's, we are in the gap */

int totalMatch = 0;
int totalMismatch = 0;
int totalMissing = 0;
int totalGoodrc = 0;
int totalAssemblyDash = 0;
int totalGapNib = 0;

boolean affy = FALSE;

int mode = 3;  
void *next = NULL;
char *id   = NULL;
char *seq  = NULL;
char affy120id[12];

int matchScore = 100;
int misMatchScore = 100;
int gapOpenPenalty = 400;
int gapExtendPenalty = 50;

int noDna = 0;
int snpMapRows = 0;


/* controls whether affy120k, affy10k, or dbSnpRs is used 
   currently affys are human only
*/
if (!hDbIsActive(db))
    {
    printf("Currently no support for db %s\n", db);
    return;
    }

hSetDb(db);

Org = hOrganism(db);

if (sameWord(Org,"Human"))
    affy = TRUE;


if (sameWord(Org,"Human"))
    dbSnpTbl = "dbSnpRsHg";
else if (sameWord(Org,"Mouse"))
    dbSnpTbl = "dbSnpRsMm";
else if (sameWord(Org,"Rat"))
    dbSnpTbl = "dbSnpRsRn";
else 
    {
    printf("Currently no support for Org %s\n", Org);
    return;
    }

simpleDnaScheme = axtScoreSchemeSimpleDna(matchScore, misMatchScore, gapOpenPenalty, gapExtendPenalty);

uglyf("dbSnp Table=%s \n",dbSnpTbl);

uglyf("Affy=%s \n", affy ? "TRUE" : "FALSE" );


dbSnps = readDbSnps(dbSnpTbl);
printf("read hgFixed.%s \n",dbSnpTbl);

if (affy)
    {
    affy10s = readAffy10();
    printf("read hgFixed.affy10KDetails \n");

    affy120s = readAffy120();
    printf("read hgFixed.affy120KDetails \n");
    }



int bogus = 0;

// debug
if (0) 
    {
    printf("rsId     assembly-sequence                     \n");
    printf("---------------------------------------------- \n");
    for (dbSnp = dbSnps; dbSnp != NULL; dbSnp = dbSnp->next)
	{
    	printf("%s %s \n",
	  dbSnp->rsId,
	  dbSnp->assembly
	  );
    
	// debug: cut it short for testing only
	if (++bogus > 1)
    	    break;
    
	}
    printf("\n");
    printf("\n");
    }
	

bogus=0;

struct slName *cns = hAllChromNames();
struct slName *cn=NULL;
if (!cns)
    {
    printf("testDb: hAllChromNames returned empty list \n");
    return;
    }


if (affy)
    {
    mode=1; /* start on affy120 with numbers in snpMap.rsId */
    }
else
    {
    mode=2; /* start on dbSnps with "rs*" in snpMap.rsId */
    }
    
for (cn = cns; cn != NULL; cn = cn->next)
    {
    struct dnaSeq *chromSeq = NULL;
    struct snpMap *snps = NULL;
    struct snpMap *snp = NULL;

    if (chr != NULL)
	if (!sameWord(chr,cn->name))
	    continue;

    //uglyf("testDb: beginning chrom %s \n",cn->name);
   
    chromSeq = hLoadChrom(cn->name);
    printf("testDb: chrom %s :  size (%u) \n",cn->name,chromSeq->size);
    
    snps = readSnps(cn->name);
    printf("read %s.snpMap where chrom=%s \n",db,cn->name);

        
    dbSnp   = dbSnps; 
    affy10  = affy10s;
    affy120 = affy120s;
    
    printf("=========================================================\n");
    for (snp = snps; snp != NULL; snp = snp->next)
	{
	int cmp = -1;
	char *nibDna=NULL;
	char *nibDnaRc=NULL;

	++snpMapRows;

	
	/* 
    	printf("%s %s %u %u %s\n",
	  snp->name,
	  snp->chrom,
	  snp->chromStart,
	  snp->chromEnd,
	  nibDna
	  );
	*/

	
        while (cmp < 0)
	    {
	    while (cmp < 0)
		{
    		switch (mode)
		    {
		    case 1:
			next = affy120; break;
		    case 2:
			next = dbSnp; break;
		    case 3:
			next = affy10; break;
		    }
		if (next == NULL) 
		    {
		    switch (mode)
			{
			case 1:
			    ++mode; break;
			case 2:
			    ++mode; break;
			case 3:
			    cmp = 1; break;
			}
		    }
		else
		    {
		    break;
		    }
		}
		
	    if (cmp < 0)
		{
		switch (mode)
		    {
		    case 1:
			safef(affy120id, sizeof(affy120id), "%d", affy120->affyId); /* have int type but want string */
			id = affy120id;
			break;
		    case 2:
			id = dbSnp->rsId; break;
		    case 3:
			id = affy10->affyId; break;
		    }
		cmp=mystrcmp(id, snp->name);
		}
		
	    if (cmp < 0) 
		{
		switch (mode)
		    {
		    case 1:
			affy120 = affy120->next; break;
		    case 2:
			dbSnp = dbSnp->next; break;
		    case 3:
			affy10 = affy10->next; break;
		    }
		}
	    }	
	    

	if (cmp==0) 
	    {
	    int strand=1;
	    char *rc = NULL;
	    int m = 0;
	    int lf = 0;  /* size of left flank context (lower case dna) */
	    int rf = 0;  /* size of right flank context (lower case dna) */
	    int ls = 0;  /* total size of assembly dna context plus actual region in dbSnpRs/affy */
	    char *origSeq = NULL; /* use to display the original dnSnpRs.assembly seq */
	    
	    switch (mode)
		{
		case 1:
		    seq = affy120->sequenceA; break;
		case 2:
		    seq = dbSnp->assembly; break;
		case 3:
		    seq = affy10->sequenceA; break; 
		}
		
            if (sameString(seq,"-"))
		{
		++assemblyDash;
		if (Verbose)
		printf("(no assembly context) rsId=%s chrom=%s %u %u \n assembly=%s \n\n",
		  id,
		  snp->chrom,
		  snp->chromStart,
		  snp->chromEnd,
		  seq
		  );
		continue;
		}
	
	    origSeq = seq;
	    lf = leftFlank(origSeq);
	    rf = rightFlank(origSeq);
	    seq = cloneString(origSeq);
	    stripDashes(seq);      /* remove dashes indicating insert to simplify and correct processing of nib data */
            ls = strlen(seq);      /* used to be: lengthOneDash(seq); */
	    
	    
	    //debug
	    //uglyf("about to call checkandFetchNib origSeq=%s lf=%d, rf=%d ls=%d \n", origSeq, lf, rf, ls);
	
	    nibDna = checkAndFetchNib(chromSeq, snp, lf, ls);
	    if (nibDna==NULL) 
		{
		++noDna;
		printf("no dna for %s %s %u %u \n",
		    snp->name,
	  	    snp->chrom,
		    snp->chromStart,
	  	    snp->chromEnd
		    );
		continue;
		}
	    
	    //debug
	    //uglyf("got past checkandFetchNib call: \n nibDna=%s  \n",nibDna);
	
            if (allNs(nibDna))
		{
		++gapNib;
		++mismatch;
		if (Verbose)
		printf("(nib gap) rsId=%s chrom=%s %u %u \n assembly=%s \n  snpMap=%s \n\n",
		  id,
		  snp->chrom,
		  snp->chromStart,
		  snp->chromEnd,
		  seq,
		  nibDna
		  );
		continue;
		}
		
	    m = misses(seq,nibDna);
	    if (m > 1)
		{
	    
		//debug
    		//uglyf("rc: about to call checkandFetchNib \n");
	
		rc = checkAndFetchNib(chromSeq, snp, rf, ls);
		if (rc==NULL) 
		    {
		    ++noDna;
		    printf("no dna for %s %s %u %u \n",
			snp->name,
			snp->chrom,
			snp->chromStart,
			snp->chromEnd
			);
		    continue;
		    }
	    
		//debug
		//uglyf("rc: got past checkandFetchNib call: \n rc Dna=%s  \n",rc);
	
		reverseComplement(rc,strlen(rc));
		int n = misses(seq, rc);
		if (n < m) 
		    {
		    strand=-1;
		    m = n;
		    }
		}
	    if (m <= 1)
		{
		++match;
		if (strand < 1)
		  ++goodrc;
		}
	    else
		{
		struct dnaSeq query, target;
		struct axt *axtAln = NULL;
		int bestScore = 0; 
		ZeroVar(&query);
		query.dna = seq;
		query.size = strlen(query.dna);
		
		ZeroVar(&target);
		target.dna = nibDna;
		target.size = strlen(target.dna);
		axtAln = axtAffine(&query, &target, simpleDnaScheme);
		strand = 1;
		if (axtAln) 
		    {
		    bestScore = axtAln->score / ls;
		    }
		axtFree(&axtAln);
		
		if (bestScore < threshold)
		    {
		    ZeroVar(&target);
		    target.dna = rc;
		    target.size = strlen(target.dna);
		    axtAln = axtAffine(&query, &target, simpleDnaScheme);
		    if ((axtAln) && (bestScore < (axtAln->score / ls)))
			{
			strand = -1;
			bestScore = axtAln->score / ls;
			}
		    axtFree(&axtAln);
		    }
		
		if (bestScore >= threshold)
		    {
    		    ++match;
		    if (strand < 1)
      			++goodrc;
		    }
		else
		    {
    		    ++mismatch;
		    }
		
		if ((bestScore < threshold) || Verbose) 
		    {
		    printf(
			"score=%d misses=%u strand=%d rsId=%s chrom=%s %u %u lf=%d ls=%d \n"
			" assembly=%s \n"
			"   snpMap=%s \n"
			"rc snpMap=%s \n"
			"\n",
		      bestScore,
		      m,
		      strand,
		      id,
		      snp->chrom,
		      snp->chromStart,
		      snp->chromEnd,
		      lf,
		      ls,
		      seq,
		      nibDna,
		      rc
		      );
		     } 
		
		}
		
	    freez(&rc);
	    freez(&seq);
	
	    }
	else
	    {
	    char snpLkup[10] = "";
	    /* this id is missing from dbSnpRs/affy! */
	    ++missing;
	    switch (mode)
		{
		case 1:
		    safef(snpLkup,sizeof(snpLkup),"%s","affy120"); break;
		case 2:
		    safef(snpLkup,sizeof(snpLkup),"%s",dbSnpTbl); break;
		case 3:
		    safef(snpLkup,sizeof(snpLkup),"%s","affy10"); break;
		}
	    if (Verbose)		    
    		printf("snpMap.name=%s is missing from %s (now at %s) \n\n",snp->name,snpLkup,id);
	    }
	
	
	freez(&nibDna);
    
	// debug: cut it short for testing only
	//break;
    
	}
    snpMapFreeList(&snps);

    dnaSeqFree(&chromSeq);  

    printf("\n\n\n Total matches for chrom %s:\n ",cn->name);
    printf("             matches: %u \n ",match);
    printf("          mismatches: %u \n",mismatch);
    printf("missing from dbSnpRs: %u \n",missing);
    printf("   rev compl matches: %u \n",goodrc);
    printf("        assembly = -: %u \n",assemblyDash);
    printf("         nib in gap : %u \n",gapNib);
     
    printf("\n\n=========================================\n");
    
    totalMatch    += match;
    totalMismatch += mismatch;
    totalMissing  += missing;
    totalGoodrc   += goodrc;
    totalAssemblyDash += assemblyDash;
    totalGapNib   += gapNib;
    
    match        = 0;
    mismatch     = 0;
    missing      = 0;
    goodrc       = 0;
    assemblyDash = 0;
    gapNib       = 0;
    // debug: cut it to just one or two chrom for testing
    //if (++bogus > 1)
    //    break;
    
    printf("\n");
    printf("\n");
    
    }

slFreeList(&cns);


dbSnpRsFreeList(&dbSnps);
if (affy) 
    {
    affy10KDetailsFreeList(&affy10s);
    affy120KDetailsFreeList(&affy120s);
    }

axtScoreSchemeFree(&simpleDnaScheme);

printf("\n\n\n Grand Totals:  \n ");
printf("             matches: %u \n ",totalMatch);
printf("          mismatches: %u \n",totalMismatch);
printf("missing from dbSnpRs: %u \n",totalMissing);
printf("   rev compl matches: %u \n",totalGoodrc);
printf("        assembly = -: %u \n",totalAssemblyDash);
printf("         nib in gap : %u \n",totalGapNib);


printf("\n       Total rows in snpMap: %u \n ",snpMapRows);
printf("\n        # no dna found for : %u \n ",noDna);

printf("\n\n=========================================\n");

}
Esempio n. 5
0
void affyCheck()
/* affyCheck - read in all Affy SNPs, compare to lookupTable. */
{
struct snpSimple *snps = NULL;
struct snpSimple *snp = NULL;
struct snpSimple *newSnp = NULL;
char query[512];
struct sqlConnection *conn = hAllocConn();
struct sqlResult *sr;
char **row;
int count = 0;
int obsLen = 0;

snps = readSnps();

verbose(1, "checking....\n");

for (snp = snps; snp != NULL; snp = snp->next)
    {
    count++;
    verbose(2, "----------------------\n");
    verbose(2, "%d: %s:%d-%d\n", count, snp->chrom, snp->chromStart, snp->chromEnd);
    sqlSafef(query, sizeof(query), "select name, chrom, chromStart, chromEnd, strand, observed from %s"
          " where chrom = '%s' and chromStart = %d and chromEnd = %d", lookupTable, snp->chrom, snp->chromStart, snp->chromEnd);
    sr = sqlGetResult(conn, query);
    row = sqlNextRow(sr);
    if (row == NULL) 
        {
	verbose(2, "no matches for %s %s\n", affyTable, snp->name);
	continue;
	}
    newSnp = snpSimpleLoad(row);
    verbose(2, "comparing %s %s to %s %s\n", affyTable, snp->name, lookupTable, newSnp->name);

    if (sameString(newSnp->observed, "n/a")) 
        {
        while ((row = sqlNextRow(sr)) != NULL) { }
        continue;
	}

    if (sameString(newSnp->observed, "t/n")) 
        {
        while ((row = sqlNextRow(sr)) != NULL) { }
        continue;
	}

    if (sameString(snp->observed, newSnp->observed)) 
        {
        while ((row = sqlNextRow(sr)) != NULL) { }
        continue;
	}

    obsLen = strlen(newSnp->observed);
    if (obsLen > 3)
        {
	verbose(2, "%s is not bi-allelic (%s)\n", newSnp->name, newSnp->observed);
        while ((row = sqlNextRow(sr)) != NULL)
            {
	    }
        continue;
	}

    verbose(1, "----------------------\n");
    verbose(1, "%d: %s:%d-%d\n", count, snp->chrom, snp->chromStart, snp->chromEnd);
    verbose(1, "comparing %s %s to %s %s\n", affyTable, snp->name, lookupTable, newSnp->name);
    verbose(1, "observed difference\n");
    verbose(1, "%s observed = %s, %s observed = %s\n", affyTable, snp->observed, lookupTable, newSnp->observed);

    if (snp->strand == '-' && newSnp->strand == '+')
        {
	verbose(1, "strand difference\n");
	verbose(1, "%s strand = %c, %s strand = %c\n", affyTable, snp->strand, lookupTable, newSnp->strand);
	}
    if (snp->strand == '+' && newSnp->strand == '-')
        {
	verbose(1, "strand difference\n");
	verbose(1, "%s strand = %c, %s strand = %c\n", affyTable, snp->strand, lookupTable, newSnp->strand);
	}
    /* check here for multiple matches */
    while ((row = sqlNextRow(sr)) != NULL)
        {
	}
    /* free newSnp */
    }

snpSimpleFreeList(&snps);

}