Ejemplo n.º 1
0
void getChromNamesAndDirForDb(char *db)
{
struct sqlConnection *conn = hConnectCentral();
char query[512];
char buff[512];
char *tmpMark = NULL;
int buffSize = 512;

sqlSafef(query, sizeof(query), "select nibPath from dbDb where name='%s'", db);
if(sqlQuickQuery(conn, query, buff, buffSize) == NULL)
    errAbort("Coun't find nib dir for genome %s\n", db);
dirName = needMem(buffSize*sizeof(char));
tmpMark = strrchr(buff, '/');
if(tmpMark != NULL)
    *tmpMark = '\0';
snprintf(dirName, buffSize, "%s/mixedNib/", buff);
chromNames = hAllChromNames();
hDisconnectCentral(&conn);
}
Ejemplo n.º 2
0
struct lm *lmInit(int blockSize)
/* Create a local memory pool. */
{
struct lm *lm;
int aliSize = sizeof(long);
if (aliSize < sizeof(double))
    aliSize = sizeof(double);
if (aliSize < sizeof(void *))
    aliSize = sizeof(void *);
lm = needMem(sizeof(*lm));
lm->blocks = NULL;
if (blockSize <= 0)
    blockSize = (1<<14);    /* 16k default. */
lm->blockSize = blockSize;
lm->allignAdd = (aliSize-1);
lm->allignMask = ~lm->allignAdd;
newBlock(lm, blockSize);
return lm;
}
/* read a list of single words from a file */
static struct slName *readList(char *fileName)
{
struct lineFile *lf = lineFileOpen(fileName, TRUE);
struct slName *list = NULL;
char *row[1];

while (lineFileRow(lf, row))
    {
    int len = strlen(row[0]);
    struct slName *sn = needMem(sizeof(*sn)+len);
    strcpy(sn->name, row[0]);
    slAddHead(&list, sn);
    }

slReverse(&list);

lineFileClose(&lf);
return list;
}
Ejemplo n.º 4
0
char *expandRelativePath(char *baseDir, char *relPath)
/* Expand relative path to more absolute one. */
{
char *e = baseDir + strlen(baseDir);
int slashCount;
char *rel = relPath;
char *result;
int size, baseSize;
undosPath(baseDir);
undosPath(relPath);
slashCount = countChars(baseDir, '/');
if (baseDir[0] == 0)
    slashCount = -1;
while (startsWith("../", rel))
    {
    if (slashCount < 0)
        {
	warn("More ..'s in \"%s\" than directories in \"%s\"", relPath, baseDir);
	return NULL;
	}
    else if (slashCount == 0)
        e = baseDir;
    else
        e = findSlashBefore(baseDir, e);
    slashCount -= 1;
    rel += 3;
    }
baseSize = e - baseDir;
size = strlen(rel) + 1;
if (baseSize > 0)
    size += baseSize + 1;
if (baseSize > 0)
    {
    result = needMem(size);
    memcpy(result, baseDir, baseSize);
    result[baseSize] = '/';
    strcpy(result + baseSize + 1, rel);
    }
else
    result = cloneString(rel);
return result;
}
static int *getBedFieldIndices(struct joinedTables *joined,
                               struct hTableInfo *hti)
/* Go through the list of joined fields and determine how they map to bed
 * fields. */
{
    int *indices = needMem(sizeof(int) * 12);
    struct joinerDtf *field = NULL;
    int i;
    for (i=0;  i < 12;  i++)
    {
        indices[i] = -1;
    }
    i = 0;
    for (field = joined->fieldList;  field != NULL;  field = field->next)
    {
        if (sameString(field->field, hti->chromField))
            indices[0] = i;
        else if (sameString(field->field, hti->startField))
            indices[1] = i;
        else if (sameString(field->field, hti->endField))
            indices[2] = i;
        else if (sameString(field->field, hti->nameField))
            indices[3] = i;
        else if (sameString(field->field, hti->scoreField))
            indices[4] = i;
        else if (sameString(field->field, hti->strandField))
            indices[5] = i;
        else if (sameString(field->field, hti->cdsStartField))
            indices[6] = i;
        else if (sameString(field->field, hti->cdsEndField))
            indices[7] = i;
        /* Reserved is skipped!  So indices after this point are 1 smaller. */
        else if (sameString(field->field, hti->countField))
            indices[8] = i;
        else if (sameString(field->field, hti->endsSizesField))
            indices[9] = i;
        else if (sameString(field->field, hti->startsField))
            indices[10] = i;
        i++;
    }
    return indices;
}
Ejemplo n.º 6
0
struct shaResNode *shaNewNode(struct shaResList *list, char *name, void *data)
/* Create a new node with link count of one. */
{
struct shaResNode *nn = needMem(sizeof(*nn));
struct shaResNode *head = list->head;

/* Store the goods and what list we're on, and start with one link. */
nn->list = list;
nn->data = data;
nn->links = 1;
nn->name = cloneString(name);

/* Put us at the front of the list. */
nn->next = head;
nn->prev = NULL;
if (head != NULL)
    head->prev = nn;
list->head = nn;
return nn;
}
Ejemplo n.º 7
0
void readAllWords(char *fileName, char ***retWords, int *retWordCount, char **retBuf)
/* Read in whole file and break it into words. You need to freeMem both
 * *retWordCount and *retBuf when done. */
{
int wordCount;
char *buf = NULL;
char **words = NULL;
size_t bufSize;

readInGulp(fileName, &buf, &bufSize);
wordCount = chopByWhite(buf, NULL, 0);
if (wordCount != 0)
    {
    words = needMem(wordCount * sizeof(words[0]));
    chopByWhite(buf, words, wordCount);
    }
*retWords = words;
*retWordCount = wordCount;
*retBuf = buf;
}
Ejemplo n.º 8
0
Archivo: main.c Proyecto: bowhan/kent
static char *expandTilde(char *fileName)
/* If file name starts with ~, then replace that with
 * HOME dir. */
{
if (fileName[0] == '~' && fileName[1] == '/')
    {
    char *home = getenv("HOME");
    int homeLen, oldLen;
    char *expanded;
    if (home == NULL)
        errAbort("Can't find home");
    homeLen = strlen(home);
    oldLen = strlen(fileName);
    expanded = needMem(homeLen + oldLen);
    memcpy(expanded, home, homeLen);
    memcpy(expanded+homeLen, fileName+1, oldLen);
    fileName = expanded;
    }
return fileName;
}
void diffQuery(FILE *outFh, FILE *detailsFh, struct pslSets *ps, char *qName)
/* diff one query */
{
static char *categories = NULL;
struct pslMatches *matches;
/* categories is an array of 1-character values, but add a terminating
 * zero to make it easy to display in a debugger */
if (categories == NULL)
    categories = needMem(ps->numSets+1);

pslSetsMatchQuery(ps, qName);
for (matches = ps->matches; matches != NULL; matches = matches->next)
    if (!allMatchesSame(matches))
        {
        categorizePsls(matches, categories);
        prDiffMatches(outFh, ps, qName, matches, categories);
        if (detailsFh != NULL)
            prDiffDetails(detailsFh, ps, qName, matches, categories);
        }
}
Ejemplo n.º 10
0
static char * outMafTableDrop(struct cart *cart, struct sqlConnection *conn)
{
struct slName *list = hTrackTablesOfType(conn, "wigMaf%%");
int count = slCount(list);

if (count == 0)
    errAbort("There are no multiple alignments available for this genome.");

char **tables = needMem(sizeof(char *) * count);
char **tb = tables;
char *mafTable = cartOptionalString(cart, hgtaCGIGeneMafTable);

if (mafTable != NULL)
    {
    struct slName *l = list;
    for(; l; l=l->next)
	if (sameString(l->name, mafTable))
	    break;

    /* didn't find mafTable in list, reset it */
    if (l == NULL)
	mafTable = NULL;
    }

if (mafTable == NULL)
    {
    if ((mafTable = getConservationTrackName(conn)) == NULL)
	mafTable = list->name;

    cartSetString(cart, hgtaCGIGeneMafTable, mafTable);
    }

for(; list; list = list->next)
    *tb++ = list->name;

printf("<B>MAF table: </B>\n");
cgiMakeDropListFull(hgtaCGIGeneMafTable, tables, tables,
    count , mafTable, onChangeGenome());

return mafTable;
}
Ejemplo n.º 11
0
struct cirTreeFile *cirTreeFileAttach(char *fileName, struct udcFile *udc)
/* Open up r-tree index file on previously open file, with cirTree
 * header at current file position. */
{
/* Open file and allocate structure to hold info from header etc. */
struct cirTreeFile *crt = needMem(sizeof(*crt));
crt->fileName = fileName;
crt->udc = udc;

/* Read magic number at head of file and use it to see if we are proper file type, and
 * see if we are byte-swapped. */
bits32 magic;
boolean isSwapped = FALSE;
udcMustReadOne(udc, magic);
if (magic != cirTreeSig)
    {
    magic = byteSwap32(magic);
    isSwapped = crt->isSwapped = TRUE;
    if (magic != cirTreeSig)
       errAbort("%s is not a chromosome id r-tree index file", fileName);
    }

/* Read rest of defined bits of header, byte swapping as needed. */
crt->blockSize = udcReadBits32(udc, isSwapped);
crt->itemCount = udcReadBits64(udc, isSwapped);
crt->startChromIx = udcReadBits32(udc, isSwapped);
crt->startBase = udcReadBits32(udc, isSwapped);
crt->endChromIx = udcReadBits32(udc, isSwapped);
crt->endBase = udcReadBits32(udc, isSwapped);
crt->fileSize = udcReadBits64(udc, isSwapped);
crt->itemsPerSlot = udcReadBits32(udc, isSwapped);

/* Skip over reserved bits of header. */
bits32 reserved32;
udcMustReadOne(udc, reserved32);

/* Save position of root block of r tree. */
crt->rootOffset = udcTell(udc);

return crt;
}
Ejemplo n.º 12
0
static boolean geneDna(struct gff *gff, struct gffGene *gene,
    int leftExtra, int rightExtra, char **retDna, long *retDnaSize,
    int *retStartOffset)
/* Allocate an array and fill it with dna from a gene. */
{
char *dna;
char *pt;
long geneSize;
long i;
long seqStart, seqEnd, seqSize;

/* Filter out unreasonable looking genes - input to this
 * program isn't totally clean. */
geneSize = gene->end - gene->start + 1;
if (geneSize <= 0 || geneSize >= 1000000)
    return FALSE;  

/* Figure out extents of DNA we're going to return.
 * Return extra they ask for if possible, but clip
 * it to what is actually in GFF file. */
seqStart = gene->start - leftExtra;
seqEnd = gene->end + rightExtra + 1;
if (seqStart < 0)
    seqStart = 0;
if (seqEnd > gff->dnaSize)
    seqEnd = gff->dnaSize;
seqSize = seqEnd - seqStart;

/* Allocate memory and fetch the dna. */
dna = needMem(seqSize+1);
pt = dna;
for (i=0; i<seqSize; i++)
    *pt++ = gff->dna[seqStart+i];
*pt = 0;

/* Report results back to caller. */
*retDna = dna;
*retDnaSize = seqSize;
*retStartOffset = (gene->start - seqStart);
return TRUE;
}
Ejemplo n.º 13
0
static int *calcFrames(struct genePred *gp)
/* compute frames for a genePred the doesn't have them.  Free resulting array */
{
int *frames = needMem(gp->exonCount*sizeof(int));
int iStart = (gp->strand[0] == '+') ? 0 : gp->exonCount - 1;
int iStop = (gp->strand[0] == '+') ? gp->exonCount : -1;
int iIncr = (gp->strand[0] == '+') ? 1 : -1;
int i, cdsStart, cdsEnd;
int cdsBaseCnt = 0;
for (i = iStart; i != iStop; i += iIncr)
    {
    if (genePredCdsExon(gp, i, &cdsStart, &cdsEnd))
        {
        frames[i] = cdsBaseCnt % 3;
        cdsBaseCnt += (cdsEnd - cdsStart);
        }
    else
        frames[i] = -1;
    }
return frames;
}
Ejemplo n.º 14
0
struct textLine *loadLines(char *fileName)
/* Load in each line of file into a textLine structure. */
{
char buf[512];
struct textLine *list = NULL;
struct textLine *tl;
int textSize;
FILE *f = mustOpen(fileName, "r");

while (fgets(buf, sizeof(buf), f) != NULL)
    {
    textSize = strlen(buf);
    tl = needMem(sizeof(*tl) + textSize);
    strcpy(tl->line, buf);
    tl->next = list;
    list = tl;
    }
slReverse(&list);
fclose(f);
return list;
}
Ejemplo n.º 15
0
char * projectString(char *s, char *ref, char refChar, char insertChar)
/* Insert 'insertChar' in 's' at every position 'ref' has 'refChar'. */
{
int i,j,size = strlen(ref);
char *copy = (char *) needMem(size + 1);

if (strlen(s) != strlen(ref) - countChars(ref, refChar))
  errAbort("ERROR from rnautil::projectString: Input string 's' has wrong length.\n"); 

for (i = 0, j = 0; i < size; i++)
    {
    if (ref[i] == refChar)
	copy[i] = insertChar;
    else
	{	
	copy[i] = s[j];
	j++;
	}
    }
return copy;
}
Ejemplo n.º 16
0
struct expRecord *expRecordFromAffyAtlas(struct affyAtlas *aa)
/** constructs a simple experiment record from information in an affyAtlas record */
{
static int id = 0;
struct expRecord *er = NULL;
char name[256];
AllocVar(er);
sprintf(name, "%s_%s", aa->annName, aa->tissue);
er->id = id++;
er->name = cloneString(name);
er->description = cloneString(aa->tissue);
er->url = cloneString("http://www.affymetrix.com/analysis/index.affx");
er->ref = cloneString("http://www.gnf.org/");
er->credit = cloneString("http://www.gnf.org/");
er->numExtras = 3;
er->extras = needMem(sizeof(char*) * er->numExtras);
er->extras[0] = cloneString(chip);
er->extras[1] = cloneString(aa->annName);
er->extras[2] = cloneString(aa->tissue);
return er;
}
Ejemplo n.º 17
0
struct crTreeFile *crTreeFileOpen(char *fileName)
/* Open up r-tree index file - reading headers and verifying things. */
{
/* Open file and allocate structure to hold info from header etc. */
struct udcFile *udc = udcFileOpen(fileName, udcDefaultDir());
struct crTreeFile *crt = needMem(sizeof(*crt));
fileName = crt->fileName = cloneString(fileName);
crt->udc = udc;

/* Read magic number at head of file and use it to see if we are proper file type, and
 * see if we are byte-swapped. */
bits32 magic;
boolean isSwapped = FALSE;
udcMustReadOne(udc, magic);
if (magic != crTreeSig)
    {
    magic = byteSwap32(magic);
    isSwapped = crt->isSwapped = TRUE;
    if (magic != crTreeSig)
       errAbort("%s is not a chromosome r-tree index file", fileName);
    }

/* Read rest of high level header including notably the offsets to the
 * chromosome and range indexes. */
bits32 reserved32;
udcMustReadOne(udc, reserved32);
crt->chromOffset = udcReadBits64(udc, isSwapped);
crt->cirOffset = udcReadBits64(udc, isSwapped);

/* Read in the chromosome index header. */
udcSeek(udc, crt->chromOffset);
crt->chromBpt = bptFileAttach(fileName, udc);

/* Read in range index header. */
udcSeek(udc, crt->cirOffset);
crt->cir = cirTreeFileAttach(fileName, udc);

return crt;
}
Ejemplo n.º 18
0
struct floatPic *floatPicNew(int width, int height)
/* Return a new floatPic. */
{
long lineSize = 3L * width;
long imageSize = lineSize * height;
struct floatPic *pic = needMem(sizeof(struct floatPic));
pic->width = width;
pic->height = height;
pic->image = needHugeMem(imageSize * sizeof(float));

/* Create and initialize line start array */
AllocArray(pic->lines, height);
int i = height;
float *line = pic->image;
float **lines = pic->lines;
while (--i >= 0)
    {
    *lines++ = line;
    line += lineSize;
    }
return pic;
}
Ejemplo n.º 19
0
struct gffGene *gffDupeGeneAndSurrounds(struct gff *gff, struct gffGene *oldGene,
    int leftExtra, int rightExtra)
/* Make a duplicate of gene with extra DNA around coding region. 
 * gffFreeGene it when done. */
/* In a perhaps hair brained scheme to save some cycles,
 * the memory allocation of the intron and exon lists
 * is shared with that of the gffGene itself. */
{
struct gffGene *g;
int intronCount = slCount(oldGene->introns);
int exonCount = slCount(oldGene->exons);
int memSize = sizeof(*g) + (intronCount + exonCount) * sizeof(struct gffSegment);
char *memPt;
int firstExonOffset;


memPt = needMem(memSize);
g = (struct gffGene *)memPt;
memPt += sizeof(*g);
g->exons = (struct gffSegment *)memPt;
memPt += exonCount*sizeof(struct gffSegment);
g->introns = (struct gffSegment *)memPt;

g->next = NULL;
g->start = oldGene->start;
g->end = oldGene->end;
g->strand = oldGene->strand;
memcpy(g->name, oldGene->name, sizeof(g->name));
g->exons = dupeSegmentList(oldGene->exons, g->exons);
g->introns = dupeSegmentList(oldGene->introns, g->introns);
if (!geneDna(gff, oldGene, leftExtra, rightExtra, 
    &g->dna, &g->dnaSize, &firstExonOffset))
    {
    gffFreeGene(&g);
    return NULL;
    }
fixDirectionAndOffsets(g, g->dna, g->dnaSize, firstExonOffset);
return g;
}
static void checkExtRecord(struct seqFields *seq,
                           char *extPath)
/* Check the external file record for a sequence (slow). Assumes
 * that bounds have been sanity check for a file. */
{
/* read range into buffer */
FILE *fh = mustOpen(extPath, "r");
char *faBuf;
char accVer[GB_ACC_BUFSZ];
struct dnaSeq *dnaSeq;
if (fseeko(fh, seq->file_offset, SEEK_SET) < 0)
    {
    gbError("%s: can't seek %s", seq->acc, extPath);
    carefulClose(&fh);
    }
faBuf = needMem(seq->file_size+1);
mustRead(fh, faBuf, seq->file_size);
faBuf[seq->file_size] = '\0';
carefulClose(&fh);

/* verify contents */
if (faBuf[0] != '>')
    {
    gbError("%s: gbExtFile offset %lld doesn't start a fasta record: %s",
            seq->acc, (long long)seq->file_offset, extPath);
    free(faBuf);
    return;
    }
dnaSeq = faFromMemText(faBuf);
safef(accVer, sizeof(accVer), "%s.%d", seq->acc, seq->version);

if (!sameString(dnaSeq->name, accVer))
    gbError("%s: name in fasta header \"%s\" doesn't match expected \"%s\": %s",
            seq->acc, dnaSeq->name, accVer, extPath);
if (dnaSeq->size != seq->size)
    gbError("%s: size of fasta sequence (%d) doesn't match expected (%d): %s",
            seq->acc, dnaSeq->size, seq->size, extPath);
freeDnaSeq(&dnaSeq);
}
Ejemplo n.º 21
0
struct snp *snpLoad(char **row)
/* Load a snp from row fetched with select * from snp
 * from database.  Dispose of this with snpFree(). */
/* Complement observed if negative strand. */
{
struct snp *ret;
int obsLen, i;
char *obsComp;

AllocVar(ret);
ret->name = cloneString(row[0]);
ret->chromStart = atoi(row[1]);
strcpy(&ret->strand, row[2]);
ret->observed   = cloneString(row[3]);

if (ret->strand == '+') return ret;

obsLen = strlen(ret->observed);
obsComp = needMem(obsLen + 1);
strcpy(obsComp, ret->observed);
for (i = 0; i < obsLen; i = i+2)
    {
    if (ret->observed[i] == 'A') obsComp[obsLen-i-1] = 'T';
    else if (ret->observed[i] == 'T') obsComp[obsLen-i-1] = 'A';
    else if (ret->observed[i] == 'C') obsComp[obsLen-i-1] = 'G';
    else if (ret->observed[i] == 'G') obsComp[obsLen-i-1] = 'C';
    }

if (snpMaskVerbose)
    {
    printf("negative strand detected for snp %s\n", ret->name);
    printf("original observed string = %s\n", ret->observed);
    printf("complemented observed string = %s\n", obsComp);
}

ret->observed=obsComp;
return ret;
}
Ejemplo n.º 22
0
void mafToProtein(char *dbName, char *mafTable, char *frameTable, 
    char *org,  char *speciesList, char *outName)
/* mafToProtein - output protein alignments using maf and frames. */
{
struct slName *geneNames = NULL;
struct slName *speciesNames = readList(speciesList);
FILE *f = mustOpen(outName, "w");

hSetDb(dbName);

newTableType = hHasField(frameTable, "isExonStart");

if (inExons && !newTableType)
    errAbort("must have new mafFrames type to output in exons");

if (geneList != NULL)
    geneNames = readList(geneList);
else if (geneName != NULL)
    {
    int len = strlen(geneName);
    geneNames = needMem(sizeof(*geneNames)+len);
    strcpy(geneNames->name, geneName);
    }
else
    geneNames = queryNames(dbName, frameTable, org);

for(; geneNames; geneNames = geneNames->next)
    {
    verbose(2, "outting  gene %s \n",geneNames->name);
    outGene(f, geneNames->name, dbName, mafTable, 
	frameTable, org, speciesNames);
    if (delay)
	{
	verbose(2, "delaying %d seconds\n",delay);
	sleep(delay);
	}
    }
}
Ejemplo n.º 23
0
char *accWithoutSuffix(char *acc) 
/* 
Function to strip the suffix from an accession in order to make it consistent
with our notation here. We ignore the suffix.
Eg. NM_123456.1 becomes NM_123456
*/
{
char *fixedAcc = acc;
char *dotIndex = strchr(acc, '.');

if (dotIndex)
    {
    char *accNum = NULL;
    int dotPos = dotIndex - acc; /* stupid C pointer arith. No other way to do get the string
                                    length up to the period. */
    accNum = needMem(dotPos + 1);
    strncpy(accNum, acc, dotPos);
    accNum[dotPos] = 0; /* Null terminate */
    fixedAcc = accNum;
    }

return fixedAcc;
}
Ejemplo n.º 24
0
struct snpSimple *snpSimpleLoad(char **row)
/* Load a snpSimple from row fetched from snp table
 * in database.  Dispose of this with snpSimpleFree().
   Complement observed if negative strand, preserving alphabetical order. */
{
struct snpSimple *ret;
int obsLen, i;
char *obsComp;

AllocVar(ret);
ret->name = cloneString(row[0]);
ret->chrom = cloneString(row[1]);
ret->chromStart = atoi(row[2]);
ret->chromEnd = atoi(row[3]);
// use cloneString rather than strcpy
strcpy(&ret->strand, row[4]);
ret->observed   = cloneString(row[5]);

if (ret->strand == '+') return ret;

obsLen = strlen(ret->observed);
obsComp = needMem(obsLen + 1);
obsComp = cloneString(ret->observed);
for (i = 0; i < obsLen; i = i+2)
    {
    char c = ret->observed[i];
    obsComp[obsLen-i-1] = ntCompTable[c];
    }

verbose(2, "negative strand detected for snp %s\n", ret->name);
verbose(2, "original observed string = %s\n", ret->observed);
verbose(2, "complemented observed string = %s\n", obsComp);

freeMem(ret->observed);
ret->observed=obsComp;
return ret;
}
Ejemplo n.º 25
0
struct bptFile *bptFileAttach(char *fileName, struct udcFile *udc)
/* Open up index file on previously open file, with header at current file position. */
{
/* Open file and allocate structure to hold info from header etc. */
struct bptFile *bpt = needMem(sizeof(*bpt));
bpt->fileName = fileName;
bpt->udc = udc;

/* Read magic number at head of file and use it to see if we are proper file type, and
 * see if we are byte-swapped. */
bits32 magic;
boolean isSwapped = FALSE;
udcMustReadOne(udc, magic);
if (magic != bptSig)
    {
    magic = byteSwap32(magic);
    isSwapped = bpt->isSwapped = TRUE;
    if (magic != bptSig)
       errAbort("%s is not a bpt b-plus tree index file", fileName);
    }

/* Read rest of defined bits of header, byte swapping as needed. */
bpt->blockSize = udcReadBits32(udc, isSwapped);
bpt->keySize = udcReadBits32(udc, isSwapped);
bpt->valSize = udcReadBits32(udc, isSwapped);
bpt->itemCount = udcReadBits64(udc, isSwapped);

/* Skip over reserved bits of header. */
bits32 reserved32;
udcMustReadOne(udc, reserved32);
udcMustReadOne(udc, reserved32);

/* Save position of root block of b+ tree. */
bpt->rootOffset = udcTell(udc);

return bpt;
}
Ejemplo n.º 26
0
struct hash *newHashExt(int powerOfTwoSize, boolean useLocalMem)
/* Returns new hash table. Uses local memory optionally. */
{
struct hash *hash = needMem(sizeof(*hash));
int memBlockPower = 16;
if (powerOfTwoSize == 0)
    powerOfTwoSize = 12;
assert(powerOfTwoSize <= hashMaxSize && powerOfTwoSize > 0);
hash->powerOfTwoSize = powerOfTwoSize;
hash->size = (1<<powerOfTwoSize);
/* Make size of memory block for allocator vary between
 * 256 bytes and 64k depending on size of table. */
if (powerOfTwoSize < 8)
    memBlockPower = 8;
else if (powerOfTwoSize < 16)
    memBlockPower = powerOfTwoSize;
if (useLocalMem) 
    hash->lm = lmInit(1<<memBlockPower);
hash->mask = hash->size-1;
AllocArray(hash->table, hash->size);
hash->autoExpand = TRUE;
hash->expansionFactor = defaultExpansionFactor;   /* Expand when elCount > size*expansionFactor */
return hash;
}
struct expRecord *createExpRec(char *file, int expNum)
{
struct expRecord *er = NULL;
char * name = cloneString(file);
char *exp=NULL, *tissue=NULL;
chopSuffix(name);
chopSuffix(name);
AllocVar(er);
er->name = cloneString(name);
tissue = strrchr(name,'_');
if(tissue != NULL)
    {
    *tissue = '\0';
    tissue++;
    }
else 
    {
    tissue = "DUPLICATE";
    }

er->id = expNum;
er->description = description;
er->url = url;
er->ref = reference;
er->credit = credit;
er->numExtras = 2;
er->extras = needMem(sizeof(char *) * er->numExtras);
exp = strstr(name, "_");
if(exp != NULL)
    {
    *exp = '\0';
    }
er->extras[0] = name;
er->extras[1] = tissue;
return er;
}
void foldIn(char *fileName, struct hash *hash, struct scoredFrag **pList)
/* Read file and add contents to hash/list. */
{
struct lineFile *lf = lineFileOpen(fileName, TRUE);
char *row[5];
struct scoredFrag *frag;

while (lineFileRow(lf, row))
    {
    char *name = row[0];
    if ((frag = hashFindVal(hash, name)) == NULL)
        {
	frag = needMem(sizeof(*frag) + strlen(name));
	strcpy(frag->frag, name);
	hashAdd(hash, name, frag);
	slAddHead(pList, frag);
	}
    frag->perfectCount += lineFileNeedNum(lf, row, 1);
    frag->posCount += lineFileNeedNum(lf, row, 2);
    frag->posTotal += atof(row[3]);
    frag->total += atof(row[4]);
    }
lineFileClose(&lf);
}
Ejemplo n.º 29
0
/**
Generates the data plot and associated html
*/
void doCountsPage(struct sageExp *seList, struct sage *sgList)
{
struct gnuPlot2D *gp = needMem(sizeof(struct gnuPlot2D*));

char *cmd = NULL;
double xSize;
double ySize;
char *title = NULL;
char *xTics = NULL;
char plotSize[256]; // = cloneString("set size .75,.75\n");
struct tempName pngTn;

chuckHtmlStart("Sage Graph");
printf("<center>");
makeTempName(&pngTn, "sageDat", ".png");
gp = createSagePlot(seList, sgList);
gp->fileName = pngTn.forCgi;
xTics = constructXticsFromExps(seList);
title = cloneString("Sage Data for Unigene Clusters");
xSize = 0.020*slCount(gp->gpList[0]);
ySize = 0.075*slCount(sgList);
if(ySize < 1.25) ySize = 1.25;
if(xSize <.75) xSize = .75;
sprintf(plotSize, "set size %g, %g\n", xSize,ySize);
gp->other = cloneString(plotSize);
dynamicStrncat(&gp->other, xTics);
gp->ylabel = cloneString("Median Counts");
gp->xlabel = cloneString("Experiment");
gp->title = cloneString(title);
gp->xMax = slCount(gp->gpList[0]);
gp->yMax = maxDataVal;
cmd = gptGenerateCmd(gp);
gptPlotFromCmd(cmd);
doPlotPrintOut(pngTn.forHtml);
htmlEnd();
}
void wigAsciiToBinary( int argc, char *argv[] )
{
int i = 0;				/* general purpose int counter	*/
struct lineFile *lf;			/* for line file utilities	*/
char * fileName;			/* the basename of the input file */
char *line = (char *) NULL;		/* to receive data input line	*/
char *words[4];				/* to split data input line	*/
int wordCount = 0;			/* result of split	*/
int validLines = 0;			/* counting only lines with data */
unsigned long long previousOffset = 0;	/* for data missing detection */
double dataValue = 0.0;				/* from data input	*/
char *wigfile = (char *) NULL;	/*	file name of wiggle database file */
boolean firstInChrom;		/* Is this the first line in chromosome? */

/*	for each input data file	*/
for (i = 1; i < argc; ++i)
    {
    verbose(2, "translating file: %s\n", argv[i]);

    fileName = basename(argv[i]);
    if (name)		/*	Is the name of this feature specified ?	*/
	{
	safef( featureName, sizeof(featureName) - 1, "%s", name);
	}
    if (chrom)		/*	Is the chrom name specified ? */
	{
	chromName = cloneString(chrom);
	if (! name)	/*	that names the feature too if not already */
	    safef( featureName, sizeof(featureName) - 1, "%s", chrom);
	}
    /*	Name mangling to determine output file name */
    if (wibFile)	/*	when specified, simply use it	*/
	{
	binfile = addSuffix(wibFile, ".wib");
	wigfile = addSuffix(wibFile, ".wig");
	} else {	/*	not specified, construct from input names */
	if (startsWith("chr",fileName))
	    {
	    char *tmpString;
	    tmpString = cloneString(fileName);
	    chopSuffix(tmpString);
	    binfile = addSuffix(tmpString, ".wib");
	    wigfile = addSuffix(tmpString, ".wig");
	    if (! chrom)	/*	if not already taken care of	*/
		chromName = cloneString(tmpString);
	    if (! name && ! chrom)	/*	if not already done	*/
		safef(featureName, sizeof(featureName) - 1, "%s", tmpString);
	    freeMem(tmpString);
	    } else {
	    errAbort("Can not determine output file name, no -wibFile specified\n");
	    }
	}

    verbose(2, "output files: %s, %s\n", binfile, wigfile);
    validLines = 0;	/* to count only lines with data */
    rowCount = 0;	/* to count rows output */
    bincount = 0;	/* to count up to binsize	*/
    fileOffset = 0;	/* current location within binary data file	*/
    fileOffsetBegin = 0;/* location in binary data file where this bin starts*/
    firstInChrom = TRUE;
    freeMem(data_values);
    freeMem(validData);
    data_values =  needMem( (size_t) (binsize * sizeof(double)));
    validData = needMem( (size_t) (binsize * sizeof(unsigned char)));
    overallLowerLimit = 1.0e+300;	/* for the complete set of data */
    overallUpperLimit = -1.0e+300;	/* for the complete set of data */
    binout = mustOpen(binfile,"w");	/*	binary data file	*/
    wigout = mustOpen(wigfile,"w");	/*	table row definition file */
    lf = lineFileOpen(argv[i], TRUE);	/*	input file	*/
    while (lineFileNextReal(lf, &line))
	{
	boolean readingFrameSlipped;
	char *valEnd;
	char *val;
	++validLines;
	wordCount = chopByWhite(line, words, ArraySize(words));
	if (wordCount == 1)
	    {
	    Offset += 1;
	    val = words[0];
	    }
	else if (wordCount == 2)
	    {
	    Offset = atoll(words[0]) - 1;
	    val = words[1];
	    }
	else if (wordCount == 3)
	    {
	    char *newChrom = words[0];
	    boolean sameChrom = (chromName == NULL || sameString(chromName, newChrom));
	    Offset = atoll(words[1]) - 1;
	    val = words[2];
	    if (!sameChrom)
		{
		output_row();
		firstInChrom = TRUE;
		freez(&chromName);
		}
	    if (chromName == NULL)
		chromName = cloneString(newChrom);
	    }
	else
	    {
	    val = NULL;
	    badFormat(lf);
	    }
	if (Offset < 0)
	    errAbort("Illegal offset %llu at line %d of %s", Offset+1, lf->lineIx,
	    	lf->fileName);
	dataValue = strtod(val, &valEnd);
	if(trimVals)
	    {
	    dataValue = max(minVal, dataValue);
	    dataValue = min(maxVal, dataValue);
	    }
	if ((*val == '\0') || (*valEnd != '\0'))
	    errAbort("Not a valid float at line %d: %s\n", lf->lineIx, val);
	/* see if this is the first time through, establish chromStart 	*/
	if (firstInChrom) {
	    chromStart = Offset;
	    verbose(2, "first offset: %llu\n", chromStart);
	}
	else if (!firstInChrom && (Offset <= previousOffset))
	    errAbort("ERROR: chrom positions not in order. line %d of %s\n"
	             "previous: %llu >= %llu <-current", 
		     lf->lineIx, lf->fileName, previousOffset+1, Offset+1);
	/* if we are working on a zoom level and the data is not exactly
	 * spaced according to the span, then we need to put each value
	 * in its own row in order to keep positioning correct for these
	 * data values.  The number of skipped bases has to be an even
	 * multiple of dataSpan
	 */
	readingFrameSlipped = FALSE;
	if (!firstInChrom && (dataSpan > 1))
	    {
	    int skippedBases;
	    int spansSkipped;
	    skippedBases = Offset - previousOffset;
	    spansSkipped = skippedBases / dataSpan;
	    if ((spansSkipped * dataSpan) != skippedBases)
		readingFrameSlipped = TRUE;
	    }
	if (readingFrameSlipped)
	    {
	    verbose(2, "data not spanning %llu bases, prev: %llu, this: %llu, at line: %d\n", dataSpan, previousOffset, Offset, lf->lineIx);
	    output_row();
	    chromStart = Offset;	/*	a full reset here	*/
	    }
	/*	Check to see if data is being skipped	*/
	else if ( (!firstInChrom) && (Offset > (previousOffset + dataSpan)) )
	    {
	    unsigned long long off;
	    unsigned long long fillSize;	/* number of bytes */
	    verbose(2, "missing data offsets: %llu - %llu\n",
		    previousOffset+1,Offset-1);
	    /*	If we are just going to fill the rest of this bin with
	     *  no data, then may as well stop here.  No need to fill
	     *  it with nothing.
	     */
	    fillSize = (Offset - (previousOffset + dataSpan)) / dataSpan;
	    verbose(2, "filling NO_DATA for %llu bytes at bincount: %llu\n", fillSize, bincount);
	    if (fillSize + bincount >= binsize)
		{
		verbose(2, "completing a bin due to  NO_DATA for %llu bytes, only %llu - %llu = %llu to go\n", fillSize, binsize, bincount, binsize - bincount);
		verbose(2, "Offset: %llu, previousOffset: %llu\n",
			Offset, previousOffset);
		output_row();
		chromStart = Offset;	/*	a full reset here	*/
	    } else {
		fillSize = 0;
		/*	fill missing data with NO_DATA indication	*/
		for (off = previousOffset + dataSpan; off < Offset;
			off += dataSpan)
		    {
		    ++fillSize;
		    ++fileOffset;
		    ++bincount;	/*	count scores in this bin */
		    if (bincount >= binsize) break;
		    }
		verbose(2, "filled NO_DATA for %llu bytes at bincount: %llu\n", fillSize, bincount);
		/*	If that finished off this bin, output it
		 *	This most likely should not happen here.  The
		 *	check above: if (fillSize + bincount >= binsize) 
		 *	should have caught this case already.
		 */
		    if (bincount >= binsize)
			{
			output_row();
			chromStart = Offset;	/* a full reset here */
			}
	        }
	    }
	/*	With perhaps the missing data taken care of, back to the
	 *	real data.
	 */
	data_values[bincount] = dataValue;
	validData[bincount] = TRUE;
	++fileOffset;
	++bincount;	/*	count scores in this bin */
	/*	Is it time to output a row definition ? */
	if (bincount >= binsize)
	    {
	    output_row();
	    }
	previousOffset = Offset;
	firstInChrom = FALSE;
        }	/*	reading file input loop end	*/
    /*	Done with input file, any data points left in this bin ?	*/
    if (bincount)
	{
	output_row();
	}
    verbose(2, "fini: %s, read %d lines, table rows: %llu, data bytes: %lld\n",
	    argv[i], lf->lineIx, rowCount, fileOffset);
    verbose(1, "data limits: [%g:%g], range: %g\n", 
	overallLowerLimit, overallUpperLimit,
	overallUpperLimit - overallLowerLimit);
    lineFileClose(&lf);
    fclose(binout);
    fclose(wigout);
    freeMem(binfile);
    freeMem(wigfile);
    freeMem(chromName);
    binfile = (char *) NULL;
    wigfile = (char *) NULL;
    chromName = (char *) NULL;
    }
return;
}