struct qaSeq *qacReadNext(FILE *f, boolean isSwapped)
/* Read in next record in .qac file. */
{
bits32 cSize, origSize;
struct qaSeq *qa;
signed char *buf;
char *s;

s = readString(f);
if (s == NULL)
   return NULL;
AllocVar(qa);
qa->name = s;
mustReadOne(f, origSize);
if (isSwapped)
    origSize = byteSwap32(origSize);
mustReadOne(f, cSize);
if (isSwapped)
    cSize = byteSwap32(cSize);
qa->size = origSize;
qa->qa = needLargeMem(origSize);
buf = needLargeMem(cSize);
mustRead(f, buf, cSize);
rleUncompress(buf, cSize, qa->qa, origSize);
freeMem(buf);
return qa;
}
Ejemplo n.º 2
0
static void getCloneDna(struct clone *clone, struct hash *fragHash)
/* Read in clone DNA from file in format with one record per
 * clone contig.   Make clone->dna so that it is same as
 * non-fragmented clone file. */
{
struct dnaSeq *seqList = faReadAllDna(clone->faFile), *seq;
int fragSize;
clone->dna = needLargeMem(clone->size+1);
clone->dna[clone->size] = 0;
uglyf("GetCloneDna %s\n", clone->faFile);
for (seq = seqList; seq != NULL; seq = seq->next)
    {
    struct frag *frag = hashFindVal(fragHash, seq->name);
    if (frag == NULL)
        errAbort("Couldn't find %s from %s in trans files", seq->name, clone->faFile);
    assert(frag->end <= clone->size);
    fragSize = frag->end - frag->start;
    assert(fragSize >= 0);
    if (fragSize != seq->size)
        errAbort("Size mismatch (%d vs %d) between trans and .ffa files on %s", 
		fragSize, seq->size, frag->name);
    memcpy(clone->dna + frag->start,  seq->dna,  fragSize);
    }
freeDnaSeqList(&seqList);
}
Ejemplo n.º 3
0
void *cloneMem(void *pt, size_t size)
/* Allocate a new buffer of given size, and copy pt to it. */
{
void *newPt = needLargeMem(size);
memcpy(newPt, pt, size);
return newPt;
}
Ejemplo n.º 4
0
void dlSort(struct dlList *list, 
	int (*compare )(const void *elem1,  const void *elem2))
/* Sort a singly linked list with Qsort and a temporary array. 
 * The arguments to the compare function in real, non-void, life
 * are pointers to pointers of the type that is in the val field of 
 * the nodes of the list. */
{
int len = dlCount(list);

if (len > 1)
    {
    /* Move val's onto an array, sort, and then put back into list. */
    struct dlSorter *sorter = needLargeMem(len * sizeof(sorter[0])), *s;
    struct dlNode *node;
    int i;

    for (i=0, node = list->head; i<len; ++i, node = node->next)
	{
	s = &sorter[i];
	s->node = node;
	}
    compareFunc = compare;
    qsort(sorter, len, sizeof(sorter[0]), dlNodeCmp);
    dlListInit(list);
    for (i=0; i<len; ++i)
	dlAddTail(list, sorter[i].node);
    freeMem(sorter);
    }
}
Ejemplo n.º 5
0
void shuffleList(void *pList)
/* Randomize order of slList.  Usage:
 *     randomizeList(&list)
 * where list is a pointer to a structure that
 * begins with a next field. */
{
struct slList **pL = (struct slList **)pList;
struct slList *list = *pL;
int count;
count = slCount(list);
if (count > 1)
    {
    struct slList *el;
    struct slList **array;
    int i;
    array = needLargeMem(count * sizeof(*array));
    for (el = list, i=0; el != NULL; el = el->next, i++)
        array[i] = el;
    for (i=0; i<4; ++i)
        shuffleArrayOfPointers(array, count);
    list = NULL;
    for (i=0; i<count; ++i)
        {
        array[i]->next = list;
        list = array[i];
        }
    freeMem(array);
    slReverse(&list);
    *pL = list;       
    }
}
Ejemplo n.º 6
0
static void makeOligoHistogram(char *fileName, struct seqList *seqList, 
    int oligoSize, int **retTable, int *retTotal)
/* Make up table of oligo occurences. Either pass in an FA file or a seqList.
 * (the other should be NULL). */
{
FILE *f = NULL;
int tableSize = (1<<(oligoSize+oligoSize));
int tableByteSize = tableSize * sizeof(int);
int *table = needLargeMem(tableByteSize);
struct dnaSeq *seq;
struct seqList *seqEl = seqList;
int *softMask = NULL;
int total = 0;

if (seqList == NULL)
    f = mustOpen(fileName, "rb");

memset(table, 0, tableByteSize);
for (;;)
    {
    DNA *dna;
    int size;
    int endIx;
    int i;
    int oliVal;
    if (seqList != NULL)
        {
        if (seqEl == NULL)
            break;
        seq = seqEl->seq;
        softMask = seqEl->softMask;
        seqEl = seqEl->next;
        }
    else
        {
        seq = faReadOneDnaSeq(f, "", TRUE);
        if (seq == NULL)
            break;
        }
    dna = seq->dna;
    size = seq->size;
    endIx = size-oligoSize;
    for (i=0; i<=endIx; ++i)
        {
        if (softMask == NULL || !masked(softMask+i, oligoSize) )
            {
            if ((oliVal = oligoVal(dna+i, oligoSize)) >= 0)
                {
                table[oliVal] += 1;
                ++total;
                }
            }
        }
    if (seqList == NULL)
        freeDnaSeq(&seq);
    }
carefulClose(&f);
*retTable = table;
*retTotal = total;
}
Ejemplo n.º 7
0
struct axt *createAxtGap(char *nibFile, char *chrom, 	
			 int start, int end, char strand)
/* return an axt alignment with the query all deletes - null aligment */
{
struct axt *axt;
int size = end-start;
char *gapPt = needLargeMem(size+1);
char *p;
struct dnaSeq *seq = NULL;

for (p=gapPt;p<=gapPt+size;p++)
    *p = '-';
AllocVar(axt);
axt->tName = chrom;
axt->tStart = start;
axt->tEnd = end;
axt->tStrand = strand;
axt->qName = "gap";
axt->qStart = 1;
axt->qEnd = size;
axt->qStrand = strand;
axt->symCount = size;
axt->score = 0;
seq = nibLoadPart(nibFile, start,size);
axt->tSym = cloneMem(seq->dna, size+1);
axt->qSym = cloneMem(gapPt, size+1);
return axt;
}
Ejemplo n.º 8
0
void slSort(void *pList, int (*compare )(const void *elem1,  const void *elem2))
/* Sort a singly linked list with Qsort and a temporary array. */
{
struct slList **pL = (struct slList **)pList;
struct slList *list = *pL;
int count;
count = slCount(list);
if (count > 1)
    {
    struct slList *el;
    struct slList **array;
    int i;
    array = needLargeMem(count * sizeof(*array));
    for (el = list, i=0; el != NULL; el = el->next, i++)
        array[i] = el;
    qsort(array, count, sizeof(array[0]), compare);
    list = NULL;
    for (i=0; i<count; ++i)
        {
        array[i]->next = list;
        list = array[i];
        }
    freeMem(array);
    slReverse(&list);
    *pL = list;       
    }
}
Ejemplo n.º 9
0
void *needLargeZeroedMem(size_t size)
/* Request a large block of memory and zero it. */
{
void *v;
v = needLargeMem(size);
memset(v, 0, size);
return v;
}
Ejemplo n.º 10
0
static void expandFaFastBuf(int bufPos)
/* Make faFastBuf bigger. */
{
if (faFastBufSize == 0)
    {
    faFastBufSize = 64 * 1024;
    faFastBuf = needLargeMem(faFastBufSize);
    }
else
    {
    DNA *newBuf;
    int newBufSize = faFastBufSize + faFastBufSize;
    newBuf = needLargeMem(newBufSize);
    memcpy(newBuf, faFastBuf, bufPos);
    freeMem(faFastBuf);
    faFastBuf = newBuf;
    faFastBufSize = newBufSize;
    }
}
Ejemplo n.º 11
0
static struct nt4Seq *allocNt4(size_t baseCount, char *name)
/* Create a new nt4Seq struct with memory for bases. */
{
size_t memSize = bits32PaddedSize(baseCount);
struct nt4Seq *seq = needMem(sizeof(*seq));
seq->baseCount = baseCount;
seq->bases = needLargeMem(memSize);
seq->name = cloneString(name);
return seq;
}
struct qaSeq *qaMakeConstant(char *name, int val, int size)
/* Allocate and fill in constant quality info. */
{
struct qaSeq *qa;
AllocVar(qa);
qa->name = cloneString(name);
qa->qa = needLargeMem(size+1);
qa->size = size;
memset(qa->qa, val, size);
return qa;
}
Ejemplo n.º 13
0
void readInGulp(char *fileName, char **retBuf, size_t *retSize)
/* Read whole file in one big gulp. */
{
size_t size = (size_t)fileSize(fileName);
char *buf;
FILE *f = mustOpen(fileName, "rb");
*retBuf = buf = needLargeMem(size+1);
mustRead(f, buf, size);
buf[size] = 0;      /* Just in case it needs zero termination. */
fclose(f);
if (retSize != NULL)
    *retSize = size;
}
Ejemplo n.º 14
0
void snpMaskGenes(char *nibFile, char *outFile)
/* snpMaskGenes - Print gene sequence, exons only, 
   using IUPAC codes for single base substitutions. */
{
struct genePred *genes = NULL;
struct genePred *gene = NULL;
struct dnaSeq *seq;
char *ptr;
struct snpSimple *snps = NULL;
struct snpSimple *snp = NULL;
int snpPos = 0;
int size = 0;
FILE *fileHandle = mustOpen(outFile, "w");

genes = readGenes(chromName);

for (gene = genes; gene != NULL; gene = gene->next)
    {
    verbose(4, "gene = %s\n", gene->name);

    snps = readSnpsFromGene(gene, chromName);

    size = gene->txEnd - gene->txStart;
    assert(size > 0);
    AllocVar(seq);
    seq->dna = needLargeMem(size+1);
    seq = nibLoadPartMasked(NIB_MASK_MIXED, nibFile, gene->txStart, size);

    ptr = seq->dna;

    /* do substitutions */
    /* including introns; doesn't take much time, keeps code clean */
    for (snp = snps; snp != NULL; snp = snp->next)
        {
	snpPos = snp->chromStart - gene->txStart;
	assert(snpPos >= 0);
	verbose(5, "before substitution %c\n", ptr[snpPos]);
        ptr[snpPos] = iupac(snp->name, snp->observed, ptr[snpPos]);
	verbose(5, "after substitution %c\n", ptr[snpPos]);
        }

    printExons(gene, seq, fileHandle);
    snpSimpleFreeList(&snps);
    dnaSeqFree(&seq);  
    }

geneFreeList(&genes);
if (fclose(fileHandle) != 0)
    errnoAbort("fclose failed");
}
Ejemplo n.º 15
0
Archivo: ps02.c Proyecto: bowhan/kent
struct patSpace *newPatSpace()
/* Return an empty pattern space. */
{
struct patSpace *ps;
long startTime, endTime;

startTime = clock1000();
ps = needLargeMem(sizeof(*ps));
endTime = clock1000();
startTime = clock1000();
memset(ps, 0, sizeof(*ps));
endTime = clock1000();
return ps;
}
void qacWriteNext(FILE *f, struct qaSeq *qa)
/* Write next record to qac file. */
{
int cBufSize = qa->size + (qa->size>>1);
signed char *cBuf = needLargeMem(cBufSize);
bits32 cSize, origSize;

origSize = qa->size;
cSize = rleCompress(qa->qa, qa->size, cBuf);
writeString(f, qa->name);
writeOne(f, origSize);
writeOne(f, cSize);
mustWrite(f, cBuf, cSize);
freeMem(cBuf);
}
Ejemplo n.º 17
0
void _pf_cm_file_read(_pf_Stack *stack)
/* Read in a fixed number of bytes to string.
 * This will return a string of length zero at
 * EOF, and a string smaller than what is asked
 * for near EOF. */
{
struct file *file = stack[0].v;
_pf_Int count = stack[1].Int;
_pf_Int bytesRead;
struct _pf_string *string = _pf_string_new(needLargeMem(count+1), count);
bytesRead = fread(string->s, 1, count, file->f);
if (bytesRead <= 0)
    bytesRead = 0;
string->size = bytesRead;
string->s[bytesRead] = 0;
stack[0].String = string;
}
Ejemplo n.º 18
0
int *makeRcTable(int oligoSize)
/* Make a table for doing reverse complement of packed oligos. */
{
int tableSize = (1<<(oligoSize+oligoSize));
int tableByteSize = tableSize * sizeof(int);
int *table = needLargeMem(tableByteSize);
char oligo[17];
int i;

for (i=0; i<tableSize; ++i)
    {
    unpackVal(i, oligoSize, oligo);
    reverseComplement(oligo, oligoSize);
    table[i] = oligoVal(oligo, oligoSize);
    }
return table;
}
Ejemplo n.º 19
0
void printExons(struct genePred *gene, struct dnaSeq *seq, FILE *f)
/* print the sequence from the exons */
{
int exonPos = 0;
int exonStart = 0;
int exonEnd = 0;
int size = 0;
int total = 0;
struct dnaSeq *exonOnlySeq;
int offset = 0;

verbose(3, "exonCount = %d\n", gene->exonCount);

// get length of exons
for (exonPos = 0; exonPos < gene->exonCount; exonPos++)
    {
    exonStart = gene->exonStarts[exonPos] - gene->txStart;
    exonEnd   = gene->exonEnds[exonPos] - gene->txStart;
    size = exonEnd - exonStart;
    assert (size > 0);
    total += size;
    }

// modeled after hgSeq.c
AllocVar(exonOnlySeq);
exonOnlySeq->dna = needLargeMem(total+1);
exonOnlySeq->size = total;

offset = 0;
for (exonPos = 0; exonPos < gene->exonCount; exonPos++)
    {
    exonStart = gene->exonStarts[exonPos] - gene->txStart;
    exonEnd   = gene->exonEnds[exonPos] - gene->txStart;
    size = exonEnd - exonStart;
    verbose(4, "size = %d\n", size);
    memcpy(exonOnlySeq->dna+offset, seq->dna+exonStart, size);
    offset += size;
    }

assert(offset == exonOnlySeq->size);
exonOnlySeq->dna[offset] = 0;
faWriteNext(f, gene->name, exonOnlySeq->dna, exonOnlySeq->size);
freeDnaSeq(&exonOnlySeq);

}
Ejemplo n.º 20
0
/* translate a nuc sequence into amino acids. If there
 * are any dashes in any of the three nuc positions
 * make the AA a dash.
 */
static aaSeq *doTranslate(struct dnaSeq *inSeq, unsigned offset, 
    unsigned inSize, boolean stop, boolean doUniq)
{
aaSeq *seq;
DNA *dna = inSeq->dna;
AA *pep, aa;
int i, lastCodon;
int actualSize = 0;

assert(offset <= inSeq->size);
if ((inSize == 0) || (inSize > (inSeq->size - offset)))
    inSize = inSeq->size - offset;
lastCodon = offset + inSize - 3;

AllocVar(seq);
seq->dna = pep = needLargeMem(inSize/3+1);
for (i=offset; i <= lastCodon; i += 3)
    {
    if (doUniq)
	aa = lookupUniqCodon(dna+i);
    else
	aa = lookupCodon(dna+i);
    if (aa == 'X')
	{
	if ((dna[i] == '-') ||
	    (dna[i+1] == '-') ||
	    (dna[i+2] == '-'))
	    aa = '-';
	}
    if (aa == 0)
	{
        if (stop)
	    break;
	else
	    aa = 'Z';
	}
    *pep++ = aa;
    ++actualSize;
    }
*pep = 0;
assert(actualSize <= inSize/3+1);
seq->size = actualSize;
return seq;
}
Ejemplo n.º 21
0
FILE *pipelineFile(struct pipeline *pl)
/* Get a FILE object wrapped around the pipeline.  Do not close the FILE, is
 * owned by the pipeline object.  A FILE is created on first call to this
 * function.  Subsequent calls return the same FILE.*/
{
if (pl->pipeFh == NULL)
    {
    /* create FILE* on first access */
    char *mode = (pl->options & pipelineRead) ? "r" : "w";
    if (pl->pipeLf != NULL)
        errAbort("can't call pipelineFile after having associated a lineFile with a pipeline");
    pl->pipeFh = fdopen(pl->pipeFd, mode);
    if (pl->pipeFh == NULL)
        errnoAbort("fdopen failed for: %s", pl->procName);
    pl->stdioBuf = needLargeMem(FILE_BUF_SIZE);
    setvbuf(pl->pipeFh, pl->stdioBuf,  _IOFBF, FILE_BUF_SIZE);
    }
return pl->pipeFh;
}
Ejemplo n.º 22
0
static bool downloadBlockRun(BigFileReaderData * data, char * chrom, struct fileOffsetSize * firstBlock, struct fileOffsetSize * afterBlock, bits64 mergedSize) {
	char * mergedBuf, *blockBuf;
	struct fileOffsetSize * block;

	udcSeek(data->udc, firstBlock->offset);
	blockBuf = mergedBuf = (char *) needLargeMem(mergedSize);
	udcMustRead(data->udc, mergedBuf, mergedSize);

	for (block = firstBlock; block != afterBlock; block = block->next) {
		if (openBlock(data, block, blockBuf)) {
			freeMem(mergedBuf);
			return true;
		}
		blockBuf += block->size;
	}

	freeMem(mergedBuf);
	return false;
}
Ejemplo n.º 23
0
Archivo: ps02.c Proyecto: bowhan/kent
int allocPatSpaceLists(struct patSpace *ps)
/* Allocate pat space lists and set up list pointers. 
 * Returns size of all lists. */
{
int maxCount = 64*1024-1;
int oneCount;
int count = 0;
int i;
bits16 *listSizes = ps->listSizes;
bits16 **lists = ps->lists;
bits16 *allocated;
int ignoreCount = 0;
bits16 maxPat = ps->maxPat;
int size;
int usedCount = 0, overusedCount = 0;

for (i=0; i<patSpaceSize; ++i)
    {
    /* If pattern is too much used it's no good to us, ignore. */
    if ((oneCount = listSizes[i]) < maxPat)
        {
        count += oneCount;
        usedCount += 1;
        }
    else
        {
        overusedCount += 1;
        }
    }
printf("%d patterns used, %d overused\n", usedCount, overusedCount);
ps->allocated = allocated = needLargeMem(count*sizeof(allocated[0]));
for (i=0; i<patSpaceSize; ++i)
    {
    if ((size = listSizes[i]) < maxPat)
        {
        lists[i] = allocated;
        allocated += size;
        }
    }
return count;
}
Ejemplo n.º 24
0
struct wabaChromHit *wchLoad(char *row[])
/* Create a wabaChromHit from database row. 
 * Since squeezedSym autoSql can't generate this,
 * alas. */
{
int size;
char *sym;
struct wabaChromHit *wch;

AllocVar(wch);
wch->query = cloneString(row[0]);
wch->chromStart = sqlUnsigned(row[1]);
wch->chromEnd = sqlUnsigned(row[2]);
wch->strand = row[3][0];
wch->milliScore = sqlUnsigned(row[4]);
size = wch->chromEnd - wch->chromStart;
wch->squeezedSym = sym = needLargeMem(size+1);
memcpy(sym, row[5], size);
sym[size] = 0;
return wch;
}
Ejemplo n.º 25
0
aaSeq *translateSeqN(struct dnaSeq *inSeq, unsigned offset, unsigned inSize, boolean stop)
/* Return a translated sequence.  Offset is position of first base to
 * translate. If size is 0 then use length of inSeq. */
{
aaSeq *seq;
DNA *dna = inSeq->dna;
AA *pep, aa;
int i, lastCodon;
int actualSize = 0;

assert(offset <= inSeq->size);
if ((inSize == 0) || (inSize > (inSeq->size - offset)))
    inSize = inSeq->size - offset;
lastCodon = offset + inSize - 3;

AllocVar(seq);
seq->dna = pep = needLargeMem(inSize/3+1);
for (i=offset; i <= lastCodon; i += 3)
    {
    aa = lookupCodon(dna+i);
    if (aa == 0)
	{
        if (stop)
	    break;
	else
	    aa = 'Z';
	}
    *pep++ = aa;
    ++actualSize;
    }
*pep = 0;
assert(actualSize <= inSize/3+1);
seq->size = actualSize;
seq->name = cloneString(inSeq->name);
return seq;
}
Ejemplo n.º 26
0
/* load the background from the given filename and put the numnber of windows
 * at the given address */
bgPoint* loadBackground(char* filename, long* numberOfWindows) {
    char chrom[16];
    char c[16];
    long chromStart;
    long chromEnd;
    long number;
    long AA;
    long AC;
    long AG;
    long AT;
    long CA;
    long CC;
    long CG;
    long CT;
    long GA;
    long GC;
    long GG;
    long GT;
    long TA;
    long TC;
    long TG;
    long TT;

    FILE* backgroundFile;
    bgPoint* backgroundData = 0;
    long i;

    *numberOfWindows = 0;
    backgroundFile = mustOpen(filename, "r");

    /* see if the first character is a # */
    *c = fgetc(backgroundFile);
    if(*c == '#') {
        /* read the rest of the line */
        while(fgetc(backgroundFile) != '\n')
            ;
    } else
        ungetc(*c, backgroundFile);
        
    /* count now many windows there are */
    while(!feof(backgroundFile)) {
        if(fscanf(backgroundFile, "%15s\t%ld\t%ld\t%ld\t%ld\t%ld\t%ld\t%ld\t%ld\t%ld\t"
                                  "%ld\t%ld\t%ld\t%ld\t%ld\t%ld\t%ld\t%ld\t%ld\t%ld",
                c, &chromStart, &chromEnd, &number, 
                &AA, &AC, &AG, &AT, &CA, &CC, &CG, &CT,
                &GA, &GC, &GG, &GT, &TA, &TC, &TG, &TT) == 20)
            (*numberOfWindows)++;
    }
    
    /* add two to account for the end and begin sentries */
    (*numberOfWindows) += 2;

    backgroundData = needLargeMem((*numberOfWindows) * sizeof(bgPoint));
    
    /* now read the data starting from the begining of the file */
    rewind(backgroundFile); 

    /* see if the first character is a # */
    *c = fgetc(backgroundFile);
    if(*c == '#') {
        /* read the rest of the line */
        while(fgetc(backgroundFile) != '\n')
            ;
    } else
        ungetc(*c, backgroundFile);
    
    /* added a begin of list sentry */
    backgroundData[0].position = -1;
    backgroundData[0].score = -1;
    backgroundData[0].number = -1;
    backgroundData[0].radius = -1;

    /* read the first data file, and store the chrom to make sure that they
     * are the same for all windows */
    assert(fscanf(backgroundFile, "%15s\t%ld\t%ld\t%ld\t%ld\t%ld\t%ld\t%ld\t%ld\t%ld\t"
                                  "%ld\t%ld\t%ld\t%ld\t%ld\t%ld\t%ld\t%ld\t%ld\t%ld",
                chrom, &chromStart, &chromEnd, &number, 
                &AA, &AC, &AG, &AT, &CA, &CC, &CG, &CT,
                &GA, &GC, &GG, &GT, &TA, &TC, &TG, &TT) == 20);
    backgroundData[1].position = (chromStart + chromEnd) / 2;
    backgroundData[1].score = (((double)AA) + CC + GG +TT) /
        (((double)AA) + AC + AG + AT + CA + CC + CG + CT + GA + GC + GG + GT + TA + TC + TG + TT);
    backgroundData[1].number = number;
    backgroundData[1].radius = (chromEnd - chromStart) / 2;

    for(i = 2; i < *numberOfWindows - 1; i++) {
        assert(fscanf(backgroundFile, "%15s\t%ld\t%ld\t%ld\t%ld\t%ld\t%ld\t%ld\t%ld\t%ld\t"
                               "%ld\t%ld\t%ld\t%ld\t%ld\t%ld\t%ld\t%ld\t%ld\t%ld",
                c, &chromStart, &chromEnd, &number, 
                &AA, &AC, &AG, &AT, &CA, &CC, &CG, &CT,
                &GA, &GC, &GG, &GT, &TA, &TC, &TG, &TT) == 20);

        /* make sure that all the windows are on the smae chrom */
        if(!sameString(chrom, c))
            errAbort("all window do not come from the same chromosome "
                    "in file %s\n", filename);

        backgroundData[i].position = (chromStart + chromEnd) / 2;
        backgroundData[i].score = (((double)AA) + CC + GG +TT) /
            (((double)AA) + AC + AG + AT + CA + CC + CG + CT + GA + GC + GG + GT + TA + TC + TG + TT);
        backgroundData[i].number = number;
        backgroundData[i].radius = (chromEnd - chromStart) / 2;
    }

    /* added an end of list sentry */
    backgroundData[i].position = LONG_MAX;
    backgroundData[i].score = -1;
    backgroundData[i].number = -1;
    backgroundData[i].radius = -1;
    
    fclose(backgroundFile);

    return backgroundData;
}
int bigWigIntervalDump(struct bbiFile *bwf, char *chrom, bits32 start, bits32 end, int maxCount,
	FILE *out)
/* Print out info on bigWig parts that intersect chrom:start-end.   Set maxCount to 0 if you 
 * don't care how many are printed.  Returns number printed. */
{
if (bwf->typeSig != bigWigSig)
   errAbort("Trying to do bigWigIntervalDump on a non big-wig file.");
bbiAttachUnzoomedCir(bwf);
struct fileOffsetSize *blockList = bbiOverlappingBlocks(bwf, bwf->unzoomedCir, 
	chrom, start, end, NULL);
struct fileOffsetSize *block, *beforeGap, *afterGap;
struct udcFile *udc = bwf->udc;
int printCount = 0;

/* Set up for uncompression optionally. */
char *uncompressBuf = NULL;
if (bwf->uncompressBufSize > 0)
    uncompressBuf = needLargeMem(bwf->uncompressBufSize);

/* This loop is a little complicated because we merge the read requests for efficiency, but we 
 * have to then go back through the data one unmerged block at a time. */
for (block = blockList; block != NULL; )
    {
    /* Find contigious blocks and read them into mergedBuf. */
    fileOffsetSizeFindGap(block, &beforeGap, &afterGap);
    bits64 mergedOffset = block->offset;
    bits64 mergedSize = beforeGap->offset + beforeGap->size - mergedOffset;
    udcSeek(udc, mergedOffset);
    char *mergedBuf = needLargeMem(mergedSize);
    udcMustRead(udc, mergedBuf, mergedSize);
    char *blockBuf = mergedBuf;

    /* Loop through individual blocks within merged section. */
    for (;block != afterGap; block = block->next)
        {
	/* Uncompress if necessary. */
	char *blockPt, *blockEnd;
	if (uncompressBuf)
	    {
	    blockPt = uncompressBuf;
	    int uncSize = zUncompress(blockBuf, block->size, uncompressBuf, bwf->uncompressBufSize);
	    blockEnd = blockPt + uncSize;
	    }
	else
	    {
	    blockPt = blockBuf;
	    blockEnd = blockPt + block->size;
	    }

	/* Do the actual dump. */
	int oneCount = bigWigBlockDumpIntersectingRange(bwf->isSwapped, blockPt, blockEnd, 
		chrom, start, end, maxCount, out);

	/* Keep track of how many dumped, not exceeding maximum. */
	printCount += oneCount;
	if (maxCount != 0)
	    {
	    if (oneCount >= maxCount)
		break;
	    maxCount -= oneCount;
	    }
	blockBuf += block->size;
	}
    freeMem(mergedBuf);
    }
freeMem(uncompressBuf);

slFreeList(&blockList);
return printCount;
}
struct bbiInterval *bigWigIntervalQuery(struct bbiFile *bwf, char *chrom, bits32 start, bits32 end,
	struct lm *lm)
/* Get data for interval.  Return list allocated out of lm. */
{
if (bwf->typeSig != bigWigSig)
   errAbort("Trying to do bigWigIntervalQuery on a non big-wig file.");
bbiAttachUnzoomedCir(bwf);
struct bbiInterval *el, *list = NULL;
struct fileOffsetSize *blockList = bbiOverlappingBlocks(bwf, bwf->unzoomedCir, 
	chrom, start, end, NULL);
struct fileOffsetSize *block, *beforeGap, *afterGap;
struct udcFile *udc = bwf->udc;
boolean isSwapped = bwf->isSwapped;
float val;
int i;

/* Set up for uncompression optionally. */
char *uncompressBuf = NULL;
if (bwf->uncompressBufSize > 0)
    uncompressBuf = needLargeMem(bwf->uncompressBufSize);

/* This loop is a little complicated because we merge the read requests for efficiency, but we 
 * have to then go back through the data one unmerged block at a time. */
for (block = blockList; block != NULL; )
    {
    /* Find contigious blocks and read them into mergedBuf. */
    fileOffsetSizeFindGap(block, &beforeGap, &afterGap);
    bits64 mergedOffset = block->offset;
    bits64 mergedSize = beforeGap->offset + beforeGap->size - mergedOffset;
    udcSeek(udc, mergedOffset);
    char *mergedBuf = needLargeMem(mergedSize);
    udcMustRead(udc, mergedBuf, mergedSize);
    char *blockBuf = mergedBuf;

    /* Loop through individual blocks within merged section. */
    for (;block != afterGap; block = block->next)
        {
	/* Uncompress if necessary. */
	char *blockPt, *blockEnd;
	if (uncompressBuf)
	    {
	    blockPt = uncompressBuf;
	    int uncSize = zUncompress(blockBuf, block->size, uncompressBuf, bwf->uncompressBufSize);
	    blockEnd = blockPt + uncSize;
	    }
	else
	    {
	    blockPt = blockBuf;
	    blockEnd = blockPt + block->size;
	    }

	/* Deal with insides of block. */
	struct bwgSectionHead head;
	bwgSectionHeadFromMem(&blockPt, &head, isSwapped);
	switch (head.type)
	    {
	    case bwgTypeBedGraph:
		{
		for (i=0; i<head.itemCount; ++i)
		    {
		    bits32 s = memReadBits32(&blockPt, isSwapped);
		    bits32 e = memReadBits32(&blockPt, isSwapped);
		    val = memReadFloat(&blockPt, isSwapped);
		    if (s < start) s = start;
		    if (e > end) e = end;
		    if (s < e)
			{
			lmAllocVar(lm, el);
			el->start = s;
			el->end = e;
			el->val = val;
			slAddHead(&list, el);
			}
		    }
		break;
		}
	    case bwgTypeVariableStep:
		{
		for (i=0; i<head.itemCount; ++i)
		    {
		    bits32 s = memReadBits32(&blockPt, isSwapped);
		    bits32 e = s + head.itemSpan;
		    val = memReadFloat(&blockPt, isSwapped);
		    if (s < start) s = start;
		    if (e > end) e = end;
		    if (s < e)
			{
			lmAllocVar(lm, el);
			el->start = s;
			el->end = e;
			el->val = val;
			slAddHead(&list, el);
			}
		    }
		break;
		}
	    case bwgTypeFixedStep:
		{
		bits32 s = head.start;
		bits32 e = s + head.itemSpan;
		for (i=0; i<head.itemCount; ++i)
		    {
		    val = memReadFloat(&blockPt, isSwapped);
		    bits32 clippedS = s, clippedE = e;
		    if (clippedS < start) clippedS = start;
		    if (clippedE > end) clippedE = end;
		    if (clippedS < clippedE)
			{
			lmAllocVar(lm, el);
			el->start = clippedS;
			el->end = clippedE;
			el->val = val;
			slAddHead(&list, el);
			}
		    s += head.itemStep;
		    e += head.itemStep;
		    }
		break;
		}
	    default:
		internalErr();
		break;
	    }
	assert(blockPt == blockEnd);
	blockBuf += block->size;
	}
    freeMem(mergedBuf);
    }
freeMem(uncompressBuf);
slFreeList(&blockList);
slReverse(&list);
return list;
}
Ejemplo n.º 29
0
static void fetchIntoBuf(struct bbiFile *bwf, char *chrom, bits32 start, bits32 end,
	struct bigWigValsOnChrom *chromVals)
/* Get data for interval.  Return list allocated out of lm. */
{
/* A lot of code duplicated with bigWigIntervalQuery, but here the clipping
 * is simplified since always working across full chromosome, and the output is
 * different.  Since both of these are in inner loops and speed critical, it's hard
 * to factor out without perhaps making it worse than the bit of duplication. */
if (bwf->typeSig != bigWigSig)
   errAbort("Trying to do fetchIntoBuf on a non big-wig file.");
bbiAttachUnzoomedCir(bwf);
struct fileOffsetSize *blockList = bbiOverlappingBlocks(bwf, bwf->unzoomedCir, 
	chrom, start, end, NULL);
struct fileOffsetSize *block, *beforeGap, *afterGap;
struct udcFile *udc = bwf->udc;
boolean isSwapped = bwf->isSwapped;
float val;
int i;
Bits *covBuf = chromVals->covBuf;
double *valBuf = chromVals->valBuf;

/* Set up for uncompression optionally. */
char *uncompressBuf = NULL;
if (bwf->uncompressBufSize > 0)
    uncompressBuf = needLargeMem(bwf->uncompressBufSize);

/* This loop is a little complicated because we merge the read requests for efficiency, but we 
 * have to then go back through the data one unmerged block at a time. */
for (block = blockList; block != NULL; )
    {
    /* Find contigious blocks and read them into mergedBuf. */
    fileOffsetSizeFindGap(block, &beforeGap, &afterGap);
    bits64 mergedOffset = block->offset;
    bits64 mergedSize = beforeGap->offset + beforeGap->size - mergedOffset;
    udcSeek(udc, mergedOffset);
    char *mergedBuf = needLargeMem(mergedSize);
    udcMustRead(udc, mergedBuf, mergedSize);
    char *blockBuf = mergedBuf;

    /* Loop through individual blocks within merged section. */
    for (;block != afterGap; block = block->next)
        {
	/* Uncompress if necessary. */
	char *blockPt, *blockEnd;
	if (uncompressBuf)
	    {
	    blockPt = uncompressBuf;
	    int uncSize = zUncompress(blockBuf, block->size, uncompressBuf, bwf->uncompressBufSize);
	    blockEnd = blockPt + uncSize;
	    }
	else
	    {
	    blockPt = blockBuf;
	    blockEnd = blockPt + block->size;
	    }

	/* Deal with insides of block. */
	struct bwgSectionHead head;
	bwgSectionHeadFromMem(&blockPt, &head, isSwapped);
	switch (head.type)
	    {
	    case bwgTypeBedGraph:
		{
		for (i=0; i<head.itemCount; ++i)
		    {
		    bits32 s = memReadBits32(&blockPt, isSwapped);
		    bits32 e = memReadBits32(&blockPt, isSwapped);
		    bitSetRange(covBuf, s, e-s);
		    val = memReadFloat(&blockPt, isSwapped);
		    bits32 j;
		    for (j=s; j<e; ++j)
		        valBuf[j] = val;
		    }
		break;
		}
	    case bwgTypeVariableStep:
		{
		for (i=0; i<head.itemCount; ++i)
		    {
		    bits32 s = memReadBits32(&blockPt, isSwapped);
		    val = memReadFloat(&blockPt, isSwapped);
		    bitSetRange(covBuf, s, head.itemSpan);
		    bits32 e = s + head.itemSpan;
		    bits32 j;
		    for (j=s; j<e; ++j)
		        valBuf[j] = val;
		    }
		break;
		}
	    case bwgTypeFixedStep:
		{
		/* Do a little optimization for the most common and worst case - step1/span1 */
		if (head.itemStep == 1 && head.itemSpan == 1)
		    {
		    bits32 s = head.start;
		    bits32 e = head.end;
		    bitSetRange(covBuf, s, e-s);
		    bits32 j;
		    for (j=s; j<e; ++j)
		        valBuf[j] = memReadFloat(&blockPt, isSwapped);
		    }
		else
		    {
		    bits32 s = head.start;
		    bits32 e = s + head.itemSpan;
		    for (i=0; i<head.itemCount; ++i)
			{
			bitSetRange(covBuf, s, head.itemSpan);
			val = memReadFloat(&blockPt, isSwapped);
			bits32 j;
			for (j=s; j<e; ++j)
			    valBuf[j] = val;
			s += head.itemStep;
			e += head.itemStep;
			}
		    }
		break;
		}
	    default:
		internalErr();
		break;
	    }
	assert(blockPt == blockEnd);
	blockBuf += block->size;
	}
    freeMem(mergedBuf);
    }
freeMem(uncompressBuf);
slFreeList(&blockList);
}
Ejemplo n.º 30
0
static void writeBlocks(struct bbiChromUsage *usageList, struct lineFile *lf, struct asObject *as, 
	int itemsPerSlot, struct bbiBoundsArray *bounds, 
	int sectionCount, boolean doCompress, FILE *f, 
	int resTryCount, int resScales[], int resSizes[], 
	struct bbExIndexMaker *eim,  int bedCount,
	bits16 fieldCount, bits32 *retMaxBlockSize)
/* Read through lf, writing it in f.  Save starting points of blocks (every itemsPerSlot)
 * to boundsArray */
{
int maxBlockSize = 0;
struct bbiChromUsage *usage = usageList;
char *line, *row[fieldCount+1];
int lastField = fieldCount-1;
int itemIx = 0, sectionIx = 0;
bits64 blockStartOffset = 0;
int startPos = 0, endPos = 0;
bits32 chromId = 0;
struct dyString *stream = dyStringNew(0);

/* Will keep track of some things that help us determine how much to reduce. */
bits32 resEnds[resTryCount];
int resTry;
for (resTry = 0; resTry < resTryCount; ++resTry)
    resEnds[resTry] = 0;
boolean atEnd = FALSE, sameChrom = FALSE;
bits32 start = 0, end = 0;
char *chrom = NULL;
struct bed *bed;
AllocVar(bed);

/* Help keep track of which beds are in current chunk so as to write out
 * namedChunks to eim if need be. */
long sectionStartIx = 0, sectionEndIx = 0;

for (;;)
    {
    /* Get next line of input if any. */
    if (lineFileNextReal(lf, &line))
	{
	/* Chop up line and make sure the word count is right. */
	int wordCount;
	if (tabSep)
	    wordCount = chopTabs(line, row);
	else
	    wordCount = chopLine(line, row);
	lineFileExpectWords(lf, fieldCount, wordCount);

	loadAndValidateBed(row, bedN, fieldCount, lf, bed, as, FALSE);

	chrom = bed->chrom;
	start = bed->chromStart;
	end = bed->chromEnd;

	sameChrom = sameString(chrom, usage->name);
	}
    else  /* No next line */
	{
	atEnd = TRUE;
	}


    /* Check conditions that would end block and save block info and advance to next if need be. */
    if (atEnd || !sameChrom || itemIx >= itemsPerSlot)
        {
	/* Save stream to file, compressing if need be. */
	if (stream->stringSize > maxBlockSize)
	    maxBlockSize = stream->stringSize;
	if (doCompress)
            {
	    size_t maxCompSize = zCompBufSize(stream->stringSize);

            // keep around an area of scratch memory
            static int compBufSize = 0;
            static char *compBuf = NULL;
            // check to see if buffer needed for compression is big enough
            if (compBufSize < maxCompSize)
                {
                // free up the old not-big-enough piece
                freez(&compBuf); // freez knows bout NULL

                // get new scratch area
                compBufSize = maxCompSize;
                compBuf = needLargeMem(compBufSize);
                }

	    int compSize = zCompress(stream->string, stream->stringSize, compBuf, maxCompSize);
	    mustWrite(f, compBuf, compSize);
	    }
	else
	    mustWrite(f, stream->string, stream->stringSize);
	dyStringClear(stream);

	/* Save block offset and size for all named chunks in this section. */
	if (eim != NULL)
	    {
	    bits64 blockEndOffset = ftell(f);
	    bbExIndexMakerAddOffsetSize(eim, blockStartOffset, blockEndOffset-blockStartOffset,
		sectionStartIx, sectionEndIx);
	    sectionStartIx = sectionEndIx;
	    }

	/* Save info on existing block. */
	struct bbiBoundsArray *b = &bounds[sectionIx];
	b->offset = blockStartOffset;
	b->range.chromIx = chromId;
	b->range.start = startPos;
	b->range.end = endPos;
	++sectionIx;
	itemIx = 0;

	if (atEnd)
	    break;
	}

    /* Advance to next chromosome if need be and get chromosome id. */
    if (!sameChrom)
        {
	usage = usage->next;
	assert(usage != NULL);
	assert(sameString(chrom, usage->name));
	for (resTry = 0; resTry < resTryCount; ++resTry)
	    resEnds[resTry] = 0;
	}
    chromId = usage->id;

    /* At start of block we save a lot of info. */
    if (itemIx == 0)
        {
	blockStartOffset = ftell(f);
	startPos = start;
	endPos = end;
	}
    /* Otherwise just update end. */
        {
	if (endPos < end)
	    endPos = end;
	/* No need to update startPos since list is sorted. */
	}

    /* Save name into namedOffset if need be. */
    if (eim != NULL)
	{
	bbExIndexMakerAddKeysFromRow(eim, row, sectionEndIx);
	sectionEndIx += 1;
	}

    /* Write out data. */
    dyStringWriteOne(stream, chromId);
    dyStringWriteOne(stream, start);
    dyStringWriteOne(stream, end);
    if (fieldCount > 3)
        {
	int i;
	/* Write 3rd through next to last field and a tab separator. */
	for (i=3; i<lastField; ++i)
	    {
	    char *s = row[i];
	    dyStringAppend(stream, s);
	    dyStringAppendC(stream, '\t');
	    }
	/* Write last field and terminal zero */
	char *s = row[lastField];
	dyStringAppend(stream, s);
	}
    dyStringAppendC(stream, 0);

    itemIx += 1;

    /* Do zoom counting. */
    for (resTry = 0; resTry < resTryCount; ++resTry)
        {
	bits32 resEnd = resEnds[resTry];
	if (start >= resEnd)
	    {
	    resSizes[resTry] += 1;
	    resEnds[resTry] = resEnd = start + resScales[resTry];
	    }
	while (end > resEnd)
	    {
	    resSizes[resTry] += 1;
	    resEnds[resTry] = resEnd = resEnd + resScales[resTry];
	    }
	}
    }
assert(sectionIx == sectionCount);
freez(&bed);
*retMaxBlockSize = maxBlockSize;
}