Esempio n. 1
0
struct fileInfo *listDirXExt(char *dir, char *pattern, boolean fullPath, boolean ignoreStatFailures)
/* Return list of files matching wildcard pattern with
 * extra info. If full path is true then the path will be
 * included in the name of each file. */
{
struct fileInfo *list = NULL, *el;
struct dirent *de;
DIR *d;
int dirNameSize = strlen(dir);
int fileNameOffset = dirNameSize+1;
char pathName[512];

if ((d = opendir(dir)) == NULL)
    return NULL;
memcpy(pathName, dir, dirNameSize);
pathName[dirNameSize] = '/';

while ((de = readdir(d)) != NULL)
    {
    char *fileName = de->d_name;
    if (differentString(fileName, ".") && differentString(fileName, ".."))
	{
	if (pattern == NULL || wildMatch(pattern, fileName))
	    {
	    struct stat st;
	    bool isDir = FALSE;
	    int statErrno = 0;
	    strcpy(pathName+fileNameOffset, fileName);
	    if (stat(pathName, &st) < 0)
		{
		if (ignoreStatFailures)
		    statErrno = errno;
		else
    		    errAbort("stat failed in listDirX");
		}
	    if (S_ISDIR(st.st_mode))
		isDir = TRUE;
	    if (fullPath)
		fileName = pathName;
	    el = newFileInfo(fileName, st.st_size, isDir, statErrno, st.st_atime);
	    slAddHead(&list, el);
	    }
	}
    }
closedir(d);
slSort(&list, cmpFileInfo);
return list;
}
Esempio n. 2
0
static struct annoRow *asvNextRow(struct annoStreamer *sSelf, char *minChrom, uint minEnd,
				  struct lm *callerLm)
/* Return an annoRow encoding the next VCF record, or NULL if there are no more items. */
{
struct annoStreamVcf *self = (struct annoStreamVcf *)sSelf;
if (minChrom != NULL && sSelf->chrom != NULL && differentString(minChrom, sSelf->chrom))
    errAbort("annoStreamVcf %s: nextRow minChrom='%s' but region chrom='%s'",
	     sSelf->name, minChrom, sSelf->chrom);
if (self->maxRecords > 0 && self->recordCount >= self->maxRecords)
    return NULL;
char **words = nextRowUnfiltered(self, minChrom, minEnd);
if (words == NULL)
    return NULL;
// Skip past any left-join failures until we get a right-join failure, a passing row, or EOF.
boolean rightFail = FALSE;
while (annoFilterRowFails(sSelf->filters, words, self->numCols, &rightFail))
    {
    if (rightFail)
	break;
    words = nextRowUnfiltered(self, minChrom, minEnd);
    if (words == NULL)
	return NULL;
    }
struct vcfRecord *rec = self->record;
char *chrom = getProperChromName(self, rec->chrom);
self->recordCount++;
return annoRowFromStringArray(chrom, rec->chromStart, rec->chromEnd,
			      rightFail, words, self->numCols, callerLm);
}
Esempio n. 3
0
void findStanAlignments(char *db, char *stan, char *image, char *pslOut)
{
struct hash *iHash = newHash(5);
struct stanMad *smList = NULL, *sm = NULL;
FILE *out = mustOpen(pslOut, "w");
int count =0;
struct sqlConnection *conn = NULL;
warn("Getting sql Connection...");
conn = hAllocConn(db);
warn("Reading in image clones...");
readInImageHash(iHash, image);
warn("Loading Stanford Alignments..");
smList = stanMadLoadAll(stan);
warn("Finding best Alignments...");
for(sm = smList; sm != NULL; sm = sm->next)
    {
    if(differentString(sm->type,"Control"))
	{
	if((count++ % 10000) ==0)
	    {
	    printf(".");
	    fflush(stdout);
	    }
	outputAlignmentForStan(conn, sm, iHash, out);
	}
    }
printf("\n");
warn("Done. Cleaning up...");
stanMadFreeList(&smList);
freeHash(&iHash);
hFreeConn(&conn);

}
Esempio n. 4
0
static char *normalizeType(char *type)
/* Strips any quotation marks and converts common synonyms */
{
if (type != NULL)
    {
    (void)stripChar(type,'\"');
    if (sameWord(type,"Factor"))
        return cloneString(CV_TERM_ANTIBODY);

    char *cleanType = cloneString(cvTermNormalized(type));
    if (differentString(cleanType,type))
        return cleanType;
    freeMem(cleanType);
    /*
    if ((sameWord(type,"Cell Line"))
    ||  (sameWord(type,"cellLine" ))
    ||  (sameWord(type,"Cell Type"))
    ||  (sameWord(type,"Cell Type"))
    ||  (sameWord(type,"Cell" )))  // sameWord is case insensitive
        return cloneString(CV_TERM_CELL);
    else if (sameWord(type,"Factor"))
         ||  (sameString(type,"Antibody")))
        return cloneString(CV_TERM_ANTIBODY);
    */
    }
return type;
}
Esempio n. 5
0
struct hash *loadRegions(char *file)
/* load regions into a hash of lists by chrom */
{
struct bed *bed = NULL, *bedList = NULL, *nextBed = NULL, *temp = NULL;
struct hash *regionHash = newHash(6);
struct bed *regions;

regions = bedLoadNAll(file, outDir ? 4 : 3);
/* order by chrom, start */
slSort(&regions, bedCmp);
verbose(2, "found %d regions\n", slCount(regions));
bedList = regions;
for (bed = regions; bed != NULL; bed = nextBed)
    {
    verbose(3, "region %s:%d-%d\n", bed->chrom, bed->chromStart+1, bed->chromEnd);
    nextBed = bed->next;
    if ((bed->next == NULL) || (differentString(bed->chrom,bed->next->chrom)))
	{
	temp = bed->next;
	bed->next = NULL;
	hashAdd(regionHash, bed->chrom, bedList);
	verbose(2, "just added %d regions on %s\n", slCount(bedList), bed->chrom);
	bedList = temp;
	}
    }
return regionHash;
}
Esempio n. 6
0
boolean wigIsOverlayTypeAggregate(char *aggregate)
/* Return TRUE if aggregater type is one of the overlay ones. */
{
if (aggregate == NULL)
    return FALSE;
return differentString(aggregate, WIG_AGGREGATE_NONE);
}
Esempio n. 7
0
File: ra.c Progetto: bh0085/kent
struct hash *raReadWithFilter(char *fileName, char *keyField,char *filterKey,char *filterValue)
/* Return hash that contains all filtered ra records in file keyed by given field, which must exist.
 * The values of the hash are themselves hashes.  The filter is a key/value pair that must exist.
 * Example raReadWithFilter(file,"term","type","antibody"): returns hash of hashes of every term with type=antibody */
{
struct lineFile *lf = lineFileOpen(fileName, TRUE);
struct hash *bigHash = hashNew(14);
struct hash *hash;
while ((hash = raNextRecord(lf)) != NULL)
    {
    char *key = hashFindVal(hash, keyField);
    if (key == NULL)
        errAbort("Couldn't find key field %s line %d of %s",
                keyField, lf->lineIx, lf->fileName);
    if (filterKey != NULL)
        {
        char *filter = hashFindVal(hash, filterKey);
        if (filter == NULL)
            {
            hashFree(&hash);
            continue;
            }
        if (filterValue != NULL && differentString(filterValue,filter))
            {
            hashFree(&hash);
            continue;
            }
        }
        hashAdd(bigHash, key, hash);
    }
lineFileClose(&lf);
if (hashNumEntries(bigHash) == 0)
    hashFree(&bigHash);
return bigHash;
}
Esempio n. 8
0
void removeElement(char *el, char ***array, unsigned *count)
/* Add a new element to a array of elements */
{
  char *arrayCurr, *arrayCurrDel, del[128];
  int sizeOne, size;
  char **cArray, **rArray=NULL, ***dArray;

  if (*count > 0)
    {
      size = *count;
      arrayCurr = sqlStringArrayToString(*array, *count);
      safef(del, ArraySize(del), "%s,", el);
      arrayCurrDel = replaceChars(arrayCurr, del, "");
      if (differentString(arrayCurr, arrayCurrDel))
	  size--;
      dArray = array;
      /* if (*dArray) 
	 freeMem(dArray); */
      sqlStringDynamicArray(arrayCurrDel, &cArray, &sizeOne);
      assert(sizeOne == size);
      *count = size;
      if (size > 0) 
	{
	  AllocArray(rArray, size);
	  CopyArray(cArray, rArray, size);
	  *array = rArray;
	}
      else
	*array = NULL;
    }
}
Esempio n. 9
0
struct slName *randomBigBedIds(char *table, struct sqlConnection *conn, int count)
/* Return some arbitrary IDs from a bigBed file. */
{
/* Figure out bigBed file name and open it.  Get contents for first chromosome as an example. */
struct slName *idList = NULL;
char *fileName = bigBedFileName(table, conn);
struct bbiFile *bbi = bigBedFileOpen(fileName);
struct bbiChromInfo *chromList = bbiChromList(bbi);
struct lm *lm = lmInit(0);
int orderedCount = count * 4;
if (orderedCount < 100)
    orderedCount = 100;
struct bigBedInterval *iv, *ivList = getNElements(bbi, chromList, lm, orderedCount);
shuffleList(&ivList);
// Make a list of item names from intervals.
int outCount = 0;
for (iv = ivList;  iv != NULL && outCount < count;  iv = iv->next)
    {
    char *row[bbi->fieldCount];
    char startBuf[16], endBuf[16];
    bigBedIntervalToRow(iv, chromList->name, startBuf, endBuf, row, bbi->fieldCount);
    if (idList == NULL || differentString(row[3], idList->name))
	{
	slAddHead(&idList, slNameNew(row[3]));
	outCount++;
	}
    }
lmCleanup(&lm);
bbiFileClose(&bbi);
freeMem(fileName);
return idList;
}
Esempio n. 10
0
char *showTableField(struct trackDb *track, char *varName, boolean useJoiner)
/* Show table control and label. */
{
struct slName *name, *nameList = NULL;
char *selTable;

if (track == NULL)
    nameList = tablesForDb(findSelDb());
else
    nameList = cartTrackDbTablesForTrack(database, track, useJoiner);

/* Get currently selected table.  If it isn't in our list
 * then revert to first in list. */
selTable = cartUsualString(cart, varName, nameList->name);
if (!slNameInListUseCase(nameList, selTable))
    selTable = nameList->name;

/* Print out label and drop-down list. */
hPrintf("<B>table: </B>");
hPrintf("<SELECT NAME=\"%s\" %s>\n", varName, onChangeTable());
struct trackDb *selTdb = NULL;
for (name = nameList; name != NULL; name = name->next)
    {
    struct trackDb *tdb = NULL;
    if (track != NULL)
	tdb = findTdbForTable(database,track,name->name, ctLookupName);
    hPrintf("<OPTION VALUE=\"%s\"", name->name);
    // Disable options for related tables that are noGenome -- if a non-positional table
    // is selected then we output its entire contents.
    if (cartTrackDbIsNoGenome(database, name->name) &&
        (track == NULL || differentString(track->table, name->name)))
        hPrintf(" DISABLED"NO_GENOME_CLASS);
    else if (sameString(selTable, name->name))
        {
        hPrintf(" SELECTED");
        selTdb = tdb;
        }
    if (tdb != NULL)
	if ((curTrack == NULL) || differentWord(tdb->shortLabel, curTrack->shortLabel))
	    hPrintf(">%s (%s)\n", tdb->shortLabel, name->name);
	else
	    hPrintf(">%s\n", name->name);
    else
	hPrintf(">%s\n", name->name);
    }
hPrintf("</SELECT>\n");
if (!trackHubDatabase(database))
    {
    char *restrictDate = encodeRestrictionDateDisplay(database,selTdb);
    if (restrictDate)
	{
	hPrintf("<A HREF=\'%s\' TARGET=BLANK>restricted until:</A>&nbsp;%s",
		    ENCODE_DATA_RELEASE_POLICY, restrictDate);
	freeMem(restrictDate);
	}
    }
return selTable;
}
Esempio n. 11
0
void printPgSiftPred (char *db, char *tableName, struct pgSnp *item)
/* print the predictions for an hgc item click for a pgSnp track */
{
struct pgSiftPred *el;
struct sqlResult *sr;
char **row;
char query[512];
struct sqlConnection *conn = hAllocConn(db);

sqlSafef(query, sizeof(query), "select * from %s where chrom = '%s' and chromStart = %d and chromEnd = %d",
    tableName, item->chrom, item->chromStart, item->chromEnd);
sr = sqlGetResult(conn, query);
while ((row = sqlNextRow(sr)) != NULL)
    {
    el = pgSiftPredLoadWithNull(row);
    printf("<br><b>SIFT prediction</b>: %s\n", el->prediction);
    printf("<ul>");
    if (el->geneId != NULL && differentString(el->geneId, ""))
        printf("<li>Gene ID: %s</li>\n", el->geneId);
    if (el->geneName != NULL && differentString(el->geneName, ""))
        printf("<li>Gene name: %s</li>\n", el->geneName);
    if (el->geneDesc != NULL && differentString(el->geneDesc, ""))
        printf("<li>Gene description: %s</li>\n", el->geneDesc);
    if (el->protFamDesc != NULL && differentString(el->protFamDesc, ""))
        printf("<li>Protein family description: %s</li>\n", el->protFamDesc);
    if (el->omimDisease != NULL && differentString(el->omimDisease, ""))
        printf("<li>OMIM disease: %s</li>\n", el->omimDisease);
    if (el->aveAlleleFreq != NULL && differentString(el->aveAlleleFreq, ""))
        printf("<li>Average allele frequency: %s</li>\n", el->aveAlleleFreq);
    if (el->ceuAlleleFreq != NULL && differentString(el->ceuAlleleFreq, ""))
	printf("<li>CEU allele frequency: %s</li>\n", el->ceuAlleleFreq);
    printf("</ul>\n");
    }
}
Esempio n. 12
0
static void asbwDoQuery(struct annoStreamBigWig *self, char *minChrom, uint minEnd)
/* Store results of an interval query. [Would be nice to make a streaming version of this.] */
{
struct annoStreamer *sSelf = &(self->streamer);
if (self->intervalQueryLm == NULL)
    self->intervalQueryLm = lmInit(0);
if (sSelf->chrom != NULL)
    {
    uint start = sSelf->regionStart;
    if (minChrom)
	{
	if (differentString(minChrom, sSelf->chrom))
	    errAbort("annoStreamBigWig %s: nextRow minChrom='%s' but region chrom='%s'",
		     sSelf->name, minChrom, sSelf->chrom);
	if (start < minEnd)
	    start = minEnd;
	}
    self->intervalList = bigWigIntervalQuery(self->bbi, sSelf->chrom, start, sSelf->regionEnd,
					     self->intervalQueryLm);
    // If there are no intervals in the query region, we're done.
    if (self->intervalList == NULL)
        self->eof = TRUE;
    }
else
    {
    // Genome-wide query: break it into chrom-by-chrom queries.
    if (self->queryChrom == NULL)
	self->queryChrom = self->chromList;
    else
	self->queryChrom = self->queryChrom->next;
    if (minChrom != NULL)
	{
	// Skip chroms that precede minChrom
	while (self->queryChrom != NULL && strcmp(self->queryChrom->name, minChrom) < 0)
	    self->queryChrom = self->queryChrom->next;
	}
    if (self->queryChrom == NULL)
	{
	self->eof = TRUE;
	self->intervalList = NULL;
	}
    else
	{
	char *chrom = self->queryChrom->name;
	int start = 0;
	if (minChrom != NULL && sameString(chrom, minChrom))
	    start = minEnd;
	uint end = self->queryChrom->size;
	self->intervalList = bigWigIntervalQuery(self->bbi, chrom, start, end,
						 self->intervalQueryLm);
	}
    }
self->nextInterval = self->intervalList;
}
Esempio n. 13
0
static void gapSanityCheck(struct agpGap *gapList)
{
int prevEnd = 0;
int prevStart = 0;
char *prevChr = NULL;
char *prevType = NULL;
struct agpGap *gap;

for (gap = gapList; gap; gap = gap->next)
    {
    int chrSize = hashIntVal(cInfoHash, gap->chrom);
    if (gap->chromStart < 0)
	verbose(1, "WARNING: gap chromStart < 0 at %s:%d-%d\n",
		gap->chrom, gap->chromStart, gap->chromEnd);
    if (gap->chromEnd > chrSize)
	verbose(1, "WARNING: gap chromEnd > chromSize(%d) "
	    "at %s:%d-%d\n", chrSize, gap->chrom,
		gap->chromStart, gap->chromEnd);
    if (gap->chromEnd == chrSize && differentString(gap->type, "telomere"))
	verbose(1, "WARNING: gap at end of chromosome not telomere "
	    "at %s:%d-%d, type: %s\n", gap->chrom,
		gap->chromStart, gap->chromEnd, gap->type);
    if (gap->chromStart >= gap->chromEnd)
	verbose(1, "WARNING: gap chromStart >= chromEnd at %s:%d-%d\n",
	    gap->chrom, gap->chromStart, gap->chromEnd);
    if (prevEnd > 0)
	{
	if (sameWord(prevChr, gap->chrom) &&
		(prevEnd >= gap->chromStart))
	    verbose(1,"WARNING: overlapping gap at "
		"%s:%d-%d(%s) and %s:%d-%d(%s)\n",
		    gap->chrom, prevStart, prevEnd, prevType, gap->chrom,
			gap->chromStart, gap->chromEnd, gap->type);
	}
    else
	{
	prevStart = gap->chromStart;
	prevEnd = gap->chromEnd;
	prevType = gap->type;
	}
    if (isNotEmpty(prevChr))
	{
	if (differentWord(prevChr, gap->chrom))
	    {
	    freeMem(prevChr);
	    prevChr = cloneString(gap->chrom);
	    }
	}
    else
	prevChr = cloneString(gap->chrom);
    prevStart = gap->chromStart;
    prevEnd = gap->chromEnd;
    }
}
struct consWiggle *consWiggleFind(char *db,struct trackDb *parent,char *table)
/* Return conservation wig if it is found in the parent. */
{
if(parent == NULL || !startsWith("wigMaf", parent->type))
    return NULL;

struct consWiggle *wig, *wiggles = wigMafWiggles(db, parent);
for (wig = wiggles;
     wig != NULL && differentString(wig->table,table);
     wig = wig->next) {}
return wig;
}
Esempio n. 15
0
void processPrimers(struct lineFile *pf, FILE *of)
/* Read and process isPCR file and sts locations */
{
int lineSize, wordCount;
char *line;
char *words[21];
char *dbsts_name, *dbsts[4], *currDbsts;
struct sts *sts=NULL;
struct psl *psl;
struct place *place;

 currDbsts = "\0";
while (lineFileNext(pf, &line, &lineSize))
    {
    wordCount = chopTabs(line, words);
    if (wordCount != 21)
	errAbort("Bad line %d of %s\n", pf->lineIx, pf->fileName);
    psl = pslLoad(words);
    dbsts_name = cloneString(psl->qName);
    wordCount = chopByChar(dbsts_name, '_', dbsts, ArraySize(dbsts));
    if (differentString(dbsts[1], currDbsts))
      {
	if (sts != NULL)
	  {
	    filterPrimersAndWrite(of, sts);
	    /* stsFree(&sts); */
	    freez(&currDbsts);
	  }
	currDbsts = cloneString(dbsts[1]);
	sts = NULL;
	if (hashLookup(stsHash, dbsts[1]))
	  sts = hashMustFindVal(stsHash, dbsts[1]);
      }
    if (sts)
      {
	AllocVar(place);
	/* Check if this psl record is already present */
	if (!pslInList(place->psl, psl))
	  {
	    slAddHead(&place->psl, psl);
	    place->unali = calcUnali(sts, psl);
	    place->sizeDiff = calcSizeDiff(sts, psl);
	    place->badBits = calcBadBits(place);
	    if (place->sizeDiff < (200 - (place->badBits * 50)))
	      slAddHead(&sts->place, place);
	    else
	      placeFree(&place);
	  }
      }
    }
 if (sts != NULL)
   filterPrimersAndWrite(of, sts);
}
struct autoTest *autoTestLoad(char **row)
/* Load a autoTest from row fetched with select * from autoTest
 * from database.  Dispose of this with autoTestFree(). */
{
struct autoTest *ret;

AllocVar(ret);
ret->ptCount = sqlSigned(row[5]);
ret->difCount = sqlSigned(row[7]);
ret->valCount = sqlSigned(row[10]);
ret->id = sqlUnsigned(row[0]);
safecpy(ret->shortName, sizeof(ret->shortName), row[1]);
ret->longName = cloneString(row[2]);
{
char *s = cloneString(row[3]);
sqlStringArray(s, ret->aliases, 3);
}
{
char *s = row[4];
if(s != NULL && differentString(s, ""))
   ret->threeD = pointCommaIn(&s, NULL);
}
{
int sizeOne;
sqlShortDynamicArray(row[6], &ret->pts, &sizeOne);
assert(sizeOne == ret->ptCount);
}
{
int sizeOne;
sqlUbyteDynamicArray(row[8], &ret->difs, &sizeOne);
assert(sizeOne == ret->difCount);
}
sqlSignedArray(row[9], ret->xy, 2);
{
int sizeOne;
sqlStringDynamicArray(row[11], &ret->vals, &sizeOne);
assert(sizeOne == ret->valCount);
}
ret->dblVal = sqlDouble(row[12]);
ret->fltVal = sqlFloat(row[13]);
{
int sizeOne;
sqlDoubleDynamicArray(row[14], &ret->dblArray, &sizeOne);
assert(sizeOne == ret->valCount);
}
{
int sizeOne;
sqlFloatDynamicArray(row[15], &ret->fltArray, &sizeOne);
assert(sizeOne == ret->valCount);
}
return ret;
}
Esempio n. 17
0
char *vcfGetSlashSepAllelesFromWords(char **words, struct dyString *dy,
				     boolean *retSkippedFirstBase)
/* Overwrite dy with a /-separated allele string from VCF words,
 * skipping the extra initial base that VCF requires for indel alleles if necessary.
 * Return dy->string for convenience. */
{
dyStringClear(dy);
// VCF reference allele gets its own column:
char *refAllele = words[3];
char *altAlleles = words[4];
// First determine whether there is an extra initial base that we need to skip:
boolean allStartSame = TRUE;
char *p;
while ((p = strchr(altAlleles, ',')) != NULL)
    {
    if (altAlleles[0] != refAllele[0])
	allStartSame = FALSE;
    altAlleles = p+1;
    }
if (altAlleles[0] != refAllele[0])
    allStartSame = FALSE;
int offset = allStartSame ? 1 : 0;
if (refAllele[offset] == '\0')
    dyStringAppendC(dy, '-');
else
    dyStringAppend(dy, refAllele+offset);
// VCF alternate alleles are comma-separated, make them /-separated:
altAlleles = words[4];
if (isNotEmpty(altAlleles) && differentString(altAlleles, "."))
    {
    // Now construct the string:
    while ((p = strchr(altAlleles, ',')) != NULL)
	{
	dyStringAppendC(dy, '/');
	int len = p - altAlleles - offset;
	if (len == 0)
	    dyStringAppendC(dy, '-');
	else
	    dyStringAppendN(dy, altAlleles+offset, len);
	altAlleles = p+1;
	}
    dyStringAppendC(dy, '/');
    int len = strlen(altAlleles) - offset;
    if (len == 0)
	dyStringAppendC(dy, '-');
    else
	dyStringAppendN(dy, altAlleles+offset, len);
    }
if (retSkippedFirstBase)
    *retSkippedFirstBase = offset;
return dy->string;
}
boolean sameExceptForSome(char **a, char **b, int size, bool *some)
/* Go through a and b of given size.  Return TRUE if they are the same 
 * except for places where the some array it TRUE */
{
int i;
for (i=0; i<size; ++i)
    {
    if (!some[i])
        if (differentString(a[i], b[i]))
	    return FALSE;
    }
return TRUE;
}
int lfNamePositionCmp(const void *va, const void *vb)
	/* Compare based on name, then chromStart, used for
	sorting sample based tracks. */
{
	const struct linkedFeatures *a = *((struct linkedFeatures **)va);
	const struct linkedFeatures *b = *((struct linkedFeatures **)vb);
	int dif;
	char *tgName = NULL;
	if(sortGroupList != NULL)
		tgName = sortGroupList->shortLabel;
	if(tgName != NULL)
	{
		if(sameString(a->name, tgName) && differentString(b->name, tgName))
			return -1;
		if(sameString(b->name, tgName) && differentString(a->name, tgName))
			return 1;
	}
	dif = strcmp(a->name, b->name);
	if (dif == 0)
		dif = a->start - b->start;
	return dif;
}
Esempio n. 20
0
void doGenePreds(struct sqlConnection *conn, char *db, char *orthoDb, char *chrom, 
	    char *netTable, char *geneFileName, char *geneTableName,
	    char *outBedName, char *selectedFileName, int *foundCount, int *notFoundCount)	
/* Map over genePreds. */
{
FILE *bedOut = NULL;
FILE *selectedOut = NULL;
FILE *cdsErrorFp = NULL;
struct genePred *gene = NULL, *geneList = NULL;
struct bed *bed = NULL;

//init output files
if(optionExists("cdsErrorFile"))
{
    cdsErrorFp = fopen( optionVal("cdsErrorFile", NULL), "w" );
    fprintf( cdsErrorFp, "#name\tchrom\ttxStart\ttxEnd\tcdsStart\tcdsEnd\tstrand\texonCount\n" );
    fclose(cdsErrorFp);
}

warn("Loading Gene Predictions.");
assert(outBedName);
if(geneFileName)
    geneList=genePredLoadAll(geneFileName);
else
    geneList=loadGeneFromTable(conn, geneTableName, chrom, 0, BIGNUM);
/* Convert genePreds. */
warn("Converting genes.");
bedOut = mustOpen(outBedName, "w");
if (selectedFileName != NULL)
    selectedOut = mustOpen(selectedFileName, "w");
for(gene = geneList; gene != NULL; gene = gene->next)
    {
    struct genePred *synGene = NULL;
    if(differentString(gene->chrom, chrom))
	continue;
    synGene = orthoBedFromGene(conn, db, orthoDb, netTable, gene);
    occassionalDot();
    if(synGene != NULL && synGene->exonCount > 0)
	{
	(*foundCount)++;
	genePredTabOut(synGene, bedOut);
        if (selectedOut != NULL)
            genePredTabOut(gene, selectedOut);
	}
    else
	(*notFoundCount)++;
    genePredFree(&synGene);
    }
carefulClose(&selectedOut);
 carefulClose(&bedOut);
}
Esempio n. 21
0
void checkMetaTables(struct mdbObj *mdbObj, char *database)
{
struct sqlConnection *conn = sqlConnect(database);

verbose(1, "----------------------------------------------\n");
verbose(1, "Checking that tables specified in metaDb exist in database\n");
verbose(1, "----------------------------------------------\n");
for(; mdbObj != NULL; mdbObj=mdbObj->next)
    {
    struct mdbVar *mdbVar = hashFindVal(mdbObj->varHash, "objType");
    if (mdbVar == NULL)
        {
        warn("objType not found in object %s", mdbObj->obj);
        continue;
        }
    if (differentString(mdbVar->val, "table"))
        continue;

    mdbObj->deleteThis = FALSE;
    mdbVar = hashFindVal(mdbObj->varHash, "tableName");

    if (mdbVar == NULL)
        {
        mdbObj->deleteThis = TRUE;
        warn("tableName not found in object %s", mdbObj->obj);
        continue;
        }

    char *tableName = mdbVar->val;

    if (!startsWith(mdbObj->obj, tableName))
	{
	warn("tableName %s does not start with object name %s", tableName, mdbObj->obj);
	}


    struct mdbVar *atticVar = hashFindVal(mdbObj->varHash, "attic");

    if (!sqlTableExists(conn, tableName))
        {
	if (atticVar)
	    {
	    warn("attic metaDb table %s not found in database %s", tableName, database);
	    continue;
	    }
        mdbObj->deleteThis = TRUE;
        warn("metaDb table %s not found in database %s", tableName, database);
        }
    }
sqlDisconnect(&conn);
}
char *mayRenameFile(struct sqlConnection *conn, char *table, char *oldFileName, char *newFileName, char *downDir)
// this is a table with a fileName field, this may point to the same file
// that we renamed earlier, so we don't want to rename it again
{
char buffer[10 * 1024];
char fileName[10 * 1024];

safef(buffer, sizeof buffer, "select fileName from %s limit 1", table);
if (sqlQuickQuery(conn, buffer, fileName, sizeof fileName) == NULL)
    errAbort("couldn't get fileName from table %s\n", table);

char *link = getSymLinkFile(fileName);

#ifdef NOTNOW
verbose(2,"got fileName %s\n", fileName);

FILE *f = fopen(fileName, "r");
if (f == NULL)
    errAbort("fileName %s from table %s can't be opened", fileName, table);
else
    fclose(f);
#endif

safef(buffer, sizeof buffer, "%s/%s", downDir, oldFileName);
if (differentString(link, buffer))
    errAbort("symlink to '%s' in table '%s', is not the same as '%s'\n", link,
        table, buffer);


if (!doTest)
    {
    verbose(2, "unlinking file %s\n", fileName);
    }

char *ptr = strrchr(fileName, '/');
if (ptr == NULL)
    errAbort("can't find '/' in %s\n", fileName);
ptr++;

int bufferLen = sizeof(fileName) - (ptr - fileName);
safef(ptr, bufferLen, "%s", newFileName);

safef(buffer, sizeof buffer, "%s/%s", downDir, newFileName);

if (!doTest)
    {
    verbose(2, "linking %s to %s\n", buffer, fileName);
    }

return cloneString(fileName);
}
Esempio n. 23
0
char *getAllele2(char *allele1, struct hashEl *helCEU, struct hashEl *helCHB, 
                 struct hashEl *helJPT, struct hashEl *helYRI)
/* return the second allele found */
/* must be different from allele1 */
/* will return "?" if all populations are monomorphic */
{
int count = 0;

if (confirmAllele(helCEU, "A")) count++;
if (confirmAllele(helCHB, "A")) count++;
if (confirmAllele(helJPT, "A")) count++;
if (confirmAllele(helYRI, "A")) count++;
if (count > 0 && differentString(allele1, "A")) return "A";

count = 0;
if (confirmAllele(helCEU, "C")) count++;
if (confirmAllele(helCHB, "C")) count++;
if (confirmAllele(helJPT, "C")) count++;
if (confirmAllele(helYRI, "C")) count++;
if (count > 0 && differentString(allele1, "C")) return "C";

count = 0;
if (confirmAllele(helCEU, "G")) count++;
if (confirmAllele(helCHB, "G")) count++;
if (confirmAllele(helJPT, "G")) count++;
if (confirmAllele(helYRI, "G")) count++;
if (count > 0 && differentString(allele1, "G")) return "G";

count = 0;
if (confirmAllele(helCEU, "T")) count++;
if (confirmAllele(helCHB, "T")) count++;
if (confirmAllele(helJPT, "T")) count++;
if (confirmAllele(helYRI, "T")) count++;
if (count > 0 && differentString(allele1, "T")) return "T";

/* no allele found, all populations are monomorphic */
return "none";
}
Esempio n. 24
0
struct slName *tdbListGetGroups(struct trackDb *tdbList)
// Returns a list of groups found in the tdbList
// FIXME: Should be moved to trackDbCustom and shared
{
struct slName *groupList = NULL;
char *lastGroup = "[]";
struct trackDb *tdb = tdbList;
for (;tdb!=NULL;tdb=tdb->next)
    {
    if (differentString(lastGroup,tdb->grp))
        lastGroup = slNameStore(&groupList, tdb->grp);
    }
return groupList;
}
Esempio n. 25
0
static void asdDoQuerySimple(struct annoStreamDb *self, char *minChrom, uint minEnd)
/* Return a sqlResult for a query on table items in position range.
 * If doing a whole genome query. just select all rows from table. */
// NOTE: it would be possible to implement filters at this level, as in hgTables.
{
struct annoStreamer *streamer = &(self->streamer);
boolean hasWhere = FALSE;
struct dyString *query = self->makeBaselineQuery(self, &hasWhere);
if (!streamer->positionIsGenome)
    {
    if (minChrom && differentString(minChrom, streamer->chrom))
	errAbort("annoStreamDb %s: nextRow minChrom='%s' but region chrom='%s'",
		 streamer->name, minChrom, streamer->chrom);
    if (self->hasBin)
	{
	// Results will be in bin order, but we can restore chromStart order by
	// accumulating initial coarse-bin items and merge-sorting them with
	// subsequent finest-bin items which will be in chromStart order.
	resetMergeState(self);
	self->mergeBins = TRUE;
	self->qLm = lmInit(0);
	}
    if (self->endFieldIndexName != NULL)
	// Don't let mysql use a (chrom, chromEnd) index because that messes up
	// sorting by chromStart.
	sqlDyStringPrintf(query, " IGNORE INDEX (%s)", self->endFieldIndexName);
    sqlDyStringAppend(query, hasWhere ? " and " : " where ");
    sqlDyStringPrintf(query, "%s='%s'", self->chromField, streamer->chrom);
    int chromSize = annoAssemblySeqSize(streamer->assembly, streamer->chrom);
    if (streamer->regionStart != 0 || streamer->regionEnd != chromSize)
	{
	dyStringAppend(query, " and ");
	if (self->hasBin)
	    hAddBinToQuery(streamer->regionStart, streamer->regionEnd, query);
	sqlDyStringPrintf(query, "%s < %u and %s > %u", self->startField, streamer->regionEnd,
		       self->endField, streamer->regionStart);
	}
    if (self->notSorted)
	sqlDyStringPrintf(query, " order by %s", self->startField);
    }
else if (self->notSorted)
    sqlDyStringPrintf(query, " order by %s,%s", self->chromField, self->startField);
if (self->maxOutRows > 0)
    dyStringPrintf(query, " limit %d", self->maxOutRows);
struct sqlResult *sr = sqlGetResult(self->conn, query->string);
dyStringFree(&query);
self->sr = sr;
self->needQuery = FALSE;
}
static unsigned getLabelTypes(struct track *tg)
/* get set of labels to use */
{
unsigned labelSet = 0;

// label setting are on parent track
char prefix[128];
safef(prefix, sizeof(prefix), "%s.label", tg->tdb->track);
struct hashEl *labels = cartFindPrefix(cart, prefix);

if (labels == NULL)
    {
    // default to common name+accession and save this in cart so it makes sense in trackUi
    labelSet = useOrgCommon|useAcc;
    char setting[64];
    safef(setting, sizeof(setting), "%s.label.acc", tg->tdb->track);
    cartSetBoolean(cart, setting, TRUE);
    safef(setting, sizeof(setting), "%s.label.orgCommon", tg->tdb->track);
    cartSetBoolean(cart, setting, TRUE);
    }
struct hashEl *label;
for (label = labels; label != NULL; label = label->next)
    {
    if (endsWith(label->name, ".orgCommon") && differentString(label->val, "0"))
        labelSet |= useOrgCommon;
    else if (endsWith(label->name, ".orgAbbrv") && differentString(label->val, "0"))
        labelSet |= useOrgAbbrv;
    else if (endsWith(label->name, ".db") && differentString(label->val, "0"))
        labelSet |= useOrgDb;
    else if (endsWith(label->name, ".gene") && differentString(label->val, "0"))
        labelSet |= useGene;
    else if (endsWith(label->name, ".acc") && differentString(label->val, "0"))
        labelSet |= useAcc;
    }
return labelSet;
}
Esempio n. 27
0
void annoAssemblyGetSeq(struct annoAssembly *aa, char *seqName, uint start, uint end,
			char *buf, size_t bufSize)
/* Copy sequence to buf; bufSize must be at least end-start+1 chars in length. */
{
if (aa->curSeq == NULL || differentString(aa->curSeq->name, seqName))
    {
    dnaSeqFree(&aa->curSeq);
    aa->curSeq = twoBitReadSeqFragLower(aa->tbf, seqName, 0, 0);
    }
uint chromSize = aa->curSeq->size;
if (end > chromSize || start > chromSize || start > end)
    errAbort("annoAssemblyGetSeq: bad coords [%u,%u) (sequence %s size %u)",
	     start, end, seqName, chromSize);
safencpy(buf, bufSize, aa->curSeq->dna+start, end-start);
}
Esempio n. 28
0
void processSnps(char *snpTableName, struct hash *hapmapHash)
/* read snpTable, lookup in hapmapHash */
{
char query[512];
struct sqlConnection *conn = hAllocConn();
struct sqlResult *sr;
char **row;
struct hashEl *hel = NULL;
char *rsId = NULL;
int start = 0;
int end = 0;
FILE *outputFileHandle = mustOpen("hapmapOrtho.tab", "w");
FILE *errorFileHandle = mustOpen("hapmapOrtho.err", "w");
struct coords *coordItem = NULL;

sqlSafef(query, sizeof(query), 
    "select chrom, chromStart, chromEnd, name, orthoScore, strand, refUCSC, observed, "
    "orthoChrom, orthoStart, orthoEnd, orthoStrand, orthoAllele from %s", 
    snpTableName);
sr = sqlGetResult(conn, query);
while ((row = sqlNextRow(sr)) != NULL)
    {
    rsId = cloneString(row[3]);
    start = sqlUnsigned(row[1]);
    end = sqlUnsigned(row[2]);
    /* all hapmap data is single base */
    /* don't include lift if it wasn't also single base */
    if (end != start + 1) 
        {
        fprintf(errorFileHandle, "skipping %s due to size %d\n", rsId, end-start);
	continue;
	}
    hel = hashLookup(hapmapHash, rsId);
    if (hel == NULL) continue;
    coordItem = (struct coords *)hel->val;
    if (differentString(coordItem->chrom, row[0])) continue;
    if (coordItem->start != start) continue;
    if (coordItem->end != end) continue;
    fprintf(outputFileHandle, "%s\t%s\t%s\t%s\t", row[0], row[1], row[2], rsId);
    fprintf(outputFileHandle, "%s\t%s\t%s\t%s\t", row[4], row[5], row[6], row[7]);
    fprintf(outputFileHandle, "%s\t%s\t%s\t%s\t%s\n", row[8], row[9], row[10], row[11], row[12]);
    }

carefulClose(&outputFileHandle);
carefulClose(&errorFileHandle);
sqlFreeResult(&sr);
hFreeConn(&conn);
}
void findNextDifferent(struct splatAli *list, struct splatAli **retNext, int *retSameCount)
/* Find next element of list that is from a different read. Return it in retNext, and
 * the number of items that are the same in retSameCount. */
{
char *readName = list->readName;
int sameCount = 1;
struct splatAli *el;
for (el = list->next; el != NULL; el = el->next)
    {
    if (differentString(el->readName, readName))
        break;
    ++sameCount;
    }
*retNext = el;
*retSameCount = sameCount;
}
Esempio n. 30
0
static void testOneSql(int start, int end, char *expected)
{
struct dyString *sqlQuery;
sqlQuery = newDyString(1024);
hAddBinToQuery(start, end, sqlQuery);
if (NULL != expected)
    if (differentString(sqlQuery->string,expected))
	{
	verbose(2,"#\tERROR: SQL incorrect at (%d, %d)\n",
	    start, end);
	++failureCount;
	}

verbose(3,"# (%d, %d):\nsql:\"%s\",\n", start, end, sqlQuery->string);
freeDyString(&sqlQuery);
}