Exemplo n.º 1
0
void readPairFile(struct lineFile *prf)
/* Read in pairs and initialize clone list */
{
  int lineSize, i;
  char *line;
  char *words[4];
  char *names[16];
  int wordCount, nameCount;
  struct clone *clone;
  struct cloneName *cloneName;
  
  while (lineFileNext(prf, &line, &lineSize))
    {
      wordCount = chopTabs(line,words);
      if (wordCount != 3)
	errAbort("Bad line %d of %s\n", prf->lineIx, prf->fileName);
      if (!hashLookup(clones, words[2])) 
	{
	  clone = createClone(words[2],NULL,NULL);
	  hashAdd(clones, words[2], clone);
	  slAddHead(&cloneList,clone);
	}
      AllocVar(cloneName);
      sprintf(cloneName->name, "%s", words[2]);
      nameCount = chopCommas(words[0],names);
      for (i = 0; i < nameCount; i++) 
	hashAdd(leftNames, names[i], cloneName);
      nameCount = chopCommas(words[1],names);
      for (i = 0; i < nameCount; i++) 
	hashAdd(rightNames, names[i], cloneName);
    }     
}
int findBedSize(char *fileName, struct lineFile **retLf)
/* Read first line of file and figure out how many words in it. */
/* Input file could be stdin, in which case we really don't want to open,
 * read, and close it here.  So if retLf is non-NULL, return the open 
 * linefile (having told it to reuse the line we just read). */
{
struct lineFile *lf = lineFileOpen(fileName, TRUE);
char *words[64], *line;
int wordCount;

if (!lineFileNextReal(lf, &line))
    if (ignoreEmpty)
        return(0);
line = cloneString(line);
if (strictTab)
    wordCount = chopTabs(line, words);
else
    wordCount = chopLine(line, words);
if (wordCount == 0)
    errAbort("%s appears to be empty", fileName);
if (retLf != NULL)
    {
    lineFileReuse(lf);
    *retLf = lf;
    }
else
    lineFileClose(&lf);
freeMem(line);
return wordCount;
}
void readHugoMultiTable(char *fileName, struct hugoMulti **retList,
	struct hash **retIdHash, struct hash **retSymbolHash)
/* Read in file into list and hashes.  Make hash keyed on omim ID
 * and on OMIM symbol.  */
{
struct hash *idHash = newHash(0);
struct hash *symbolHash = newHash(0);
struct hugoMulti *list = NULL, *el;
struct lineFile *lf = lineFileOpen(fileName, TRUE);
char *words[16];
char *line;
int lineSize, wordCount;
char *name;

while (lineFileNext(lf, &line, &lineSize))
    {
    if (line[0] == 0 || line[0] == '#')
        continue;
    wordCount = chopTabs(line, words);
    lineFileExpectWords(lf, 11, wordCount);
    el = hugoMultiLoad(words);
    slAddHead(&list, el);
    name = el->omimId;
    if (name[0] != 0)
	hashAdd(idHash, name, el);
    name = el->symbol;
    if (name[0] != 0)
	hashAdd(symbolHash, name, el);
    }
lineFileClose(&lf);
slReverse(&list);
*retList = list;
*retIdHash = idHash;
*retSymbolHash = symbolHash;
}
Exemplo n.º 4
0
struct psl *nextLmPsl(struct lineFile *lf, struct lm *lm)
/* Read next line from file and convert it to psl.  Return
 * NULL at eof. */
{
char *line;
int lineSize;
char *words[32];
int wordCount;

if (!lineFileNext(lf, &line, &lineSize))
    return NULL;
wordCount = chopTabs(line, words);
if (wordCount == 21)
    {
    return pslLoadLm(words, lm);
    }
else if (wordCount == 23)
    {
    return pslxLoadLm(words, lm);
    }
else
    {
    warn("Bad line %d of %s", lf->lineIx, lf->fileName);
    return NULL;
    }
}
Exemplo n.º 5
0
struct psl *nextPsl(struct lineFile *lf)
/* Read next line from file and convert it to psl.  Return
 * NULL at eof. */
{
char *line;
int lineSize;
char *words[32];
int wordCount;

if (!lineFileNext(lf, &line, &lineSize))
    {
    //warn("File %s appears to be incomplete\n", lf->fileName);
    return NULL;
    }
wordCount = chopTabs(line, words);
if (wordCount == 21)
    {
    return pslLoad(words);
    }
else if (wordCount == 23)
    {
    return pslxLoad(words);
    }
else
    {
    warn("Bad line %d of %s", lf->lineIx, lf->fileName);
    return NULL;
    }
}
boolean mgcStatusTblCopyRow(struct lineFile *inLf, FILE *outFh)
/* read a copy one row of a status table tab file without
 * fully parsing.  Expand if optional fields are missing  */
{
char *line;
int numCols, i;
char *row[MGCSTATUS_NUM_COLS];
if (!lineFileNextReal(inLf, &line))
    return FALSE;
numCols = chopTabs(line, row);
numCols = min(numCols, MGCSTATUS_NUM_COLS);
lineFileExpectAtLeast(inLf, MGCSTATUS_MIN_NUM_COLS, numCols);
for (i = 0; i < numCols; i++)
    {
    if (i > 0)
        fputc('\t', outFh);
    fputs(row[i], outFh);
    }

/* pad */
for (; i < MGCSTATUS_NUM_COLS; i++)
    fputc('\t', outFh);
fputc('\n', outFh);

return TRUE;
}
Exemplo n.º 7
0
void readPslFile(struct lineFile *pf)
/* Process all records in a psl file of mRNA alignments */
{
 int lineSize;
 char *line;
 char *words[32];
 int  wordCount;
 struct psl *psl;
 struct clone *clone;
 struct pslAli *pa = NULL;
 struct cloneName *cloneName;
 
 while (lineFileNext(pf, &line, &lineSize))
   {
     wordCount = chopTabs(line, words);
     if (wordCount != 21)
       errAbort("Bad line %d of %s\n", pf->lineIx, pf->fileName);
     psl = pslLoad(words);
     if (hashLookup(leftNames, psl->qName))
       cloneName = hashMustFindVal(leftNames, psl->qName);
     else if (hashLookup(rightNames, psl->qName))
       cloneName = hashMustFindVal(rightNames, psl->qName);
     else
       continue;
     clone = hashMustFindVal(clones, cloneName->name);
     if ((psl->tBaseInsert < TINSERT) && ((!NORANDOM) || (strlen(psl->tName) < 7))) 
       {
	 pa = createPslAli(psl);
	 if (hashLookup(leftNames, psl->qName))
	   slAddHead(&(clone->end1), pa);
	 else
	   slAddHead(&(clone->end2), pa);
       }
   }
}
Exemplo n.º 8
0
void fixGdup(char *inName, char *outName)
/* fixGdup - Reformat genomic dups table a little.. */
{
struct lineFile *lf = lineFileOpen(inName, TRUE);
FILE *f = mustOpen(outName, "w");
int wordCount, lineSize;
char *words[32], *line;
int i;


while (lineFileNext(lf, &line, &lineSize))
    {
    if (line[0] == '#')
        continue;
    wordCount = chopTabs(line, words);
    if (wordCount == 0)
        continue;
    lineFileExpectWords(lf, 15, wordCount);
    for (i=0; i<3; ++i)
        fprintf(f, "%s\t", words[i]);
    fprintf(f, "%s:%s\t", words[6], words[7]);
    for (i=4; i<9; ++i)
        fprintf(f, "%s\t", words[i]);
    for (i=10; i<wordCount; ++i)
	{
        fprintf(f, "%s", words[i]);
	if (i == wordCount-1)
	    fprintf(f, "\n");
	else
	    fprintf(f, "\t");
	}
    }
}
Exemplo n.º 9
0
void gffFileAdd(struct gffFile *gff, char *fileName, int baseOffset)
/* Create a gffFile structure from a GFF file. */
{
/* Open file and do basic allocations. */
struct lineFile *lf = lineFileOpen(fileName, TRUE);
char *line, *words[9];
int lineSize, wordCount;

while (lineFileNext(lf, &line, &lineSize))
    {
    if (line[0] != '#')
	{
	wordCount = chopTabs(line, words);
        if (wordCount > 0)
            gffFileAddRow(gff, baseOffset, words, wordCount, lf->fileName, lf->lineIx);
	}
    }
slReverse(&gff->lineList);
slReverse(&gff->seqList);
slReverse(&gff->sourceList);
slReverse(&gff->featureList);
slReverse(&gff->groupList);
slReverse(&gff->geneIdList);
lineFileClose(&lf);
}
void writeBedTab(char *fileName, struct bedStub *bedList, int bedSize)
/* Write out bed list to tab-separated file. */
{
struct bedStub *bed;
FILE *f = mustOpen(fileName, "w");
char *words[64];
int i, wordCount;
for (bed = bedList; bed != NULL; bed = bed->next)
    {
    if (!noBin)
        if (fprintf(f, "%u\t", hFindBin(bed->chromStart, bed->chromEnd)) <= 0)
	    writeFailed(fileName);
    if (strictTab)
	wordCount = chopTabs(bed->line, words);
    else
	wordCount = chopLine(bed->line, words);
    for (i=0; i<wordCount; ++i)
        {
	/*	new definition for old "reserved" field, now itemRgb */
	/*	and when itemRgb, it is a comma separated string r,g,b */
	if (itemRgb && (i == 8))
	    {
	    char *comma;
	    /*  Allow comma separated list of rgb values here   */
	    comma = strchr(words[8], ',');
	    if (comma)
		{
		int itemRgb = 0;
		if (-1 == (itemRgb = bedParseRgb(words[8])))
		    errAbort("ERROR: expecting r,g,b specification, "
				"found: '%s'", words[8]);
		else
		    if (fprintf(f, "%d", itemRgb) <= 0)
			writeFailed(fileName);

		verbose(2, "itemRgb: %s, rgb: %#x\n", words[8], itemRgb);
		}
	    else
		if (fputs(words[i], f) == EOF)
		    writeFailed(fileName);
	    }
	else
	    if (fputs(words[i], f) == EOF)
		writeFailed(fileName);

	if (i == wordCount-1)
	    {
	    if (fputc('\n', f) == EOF)
		writeFailed(fileName);
	    }
	else
	    if (fputc('\t', f) == EOF)
		writeFailed(fileName);
	}
    }
fclose(f);
}
Exemplo n.º 11
0
void pslReps(char *inName, char *bestAliName, char *repName)
/* Analyse inName and put best alignments for eacmRNA in estAliName.
 * Put repeat info in repName. */
{
struct lineFile *in = pslFileOpen(inName);
FILE *bestFile = mustOpen(bestAliName, "w");
FILE *repFile = mustOpen(repName, "w");
int lineSize;
char *line;
char *words[32];
int wordCount;
struct psl *pslList = NULL, *psl = NULL;
char lastName[512];
int aliCount = 0;
quiet = sameString(bestAliName, "stdout") || sameString(repName, "stdout");
if (coverQSizeFile != NULL)
    loadCoverQSizes(coverQSizeFile);

if (!quiet)
    printf("Processing %s to %s and %s\n", inName, bestAliName, repName);
 if (!noHead)
     pslWriteHead(bestFile);
strcpy(lastName, "");
while (lineFileNext(in, &line, &lineSize))
    {
    if (((++aliCount & 0x1ffff) == 0) && !quiet)
        {
	printf(".");
	fflush(stdout);
	}
    wordCount = chopTabs(line, words);
    if (wordCount == 21)
	psl = pslLoad(words);
    else if (wordCount == 23)
	psl = pslxLoad(words);
    else
	errAbort("Bad line %d of %s\n", in->lineIx, in->fileName);
    if (!sameString(lastName, psl->qName))
	{
	doOneAcc(lastName, pslList, bestFile, repFile);
	pslFreeList(&pslList);
	safef(lastName, sizeof(lastName), "%s", psl->qName);
	}
    slAddHead(&pslList, psl);
    }
doOneAcc(lastName, pslList, bestFile, repFile);
pslFreeList(&pslList);
lineFileClose(&in);
fclose(bestFile);
fclose(repFile);
if (!quiet)
    printf("Processed %d alignments\n", aliCount);
}
Exemplo n.º 12
0
void processPrimers(struct lineFile *pf, FILE *of)
/* Read and process isPCR file and sts locations */
{
int lineSize, wordCount;
char *line;
char *words[21];
char *dbsts_name, *dbsts[4], *currDbsts;
struct sts *sts=NULL;
struct psl *psl;
struct place *place;

 currDbsts = "\0";
while (lineFileNext(pf, &line, &lineSize))
    {
    wordCount = chopTabs(line, words);
    if (wordCount != 21)
	errAbort("Bad line %d of %s\n", pf->lineIx, pf->fileName);
    psl = pslLoad(words);
    dbsts_name = cloneString(psl->qName);
    wordCount = chopByChar(dbsts_name, '_', dbsts, ArraySize(dbsts));
    if (differentString(dbsts[1], currDbsts))
      {
	if (sts != NULL)
	  {
	    filterPrimersAndWrite(of, sts);
	    /* stsFree(&sts); */
	    freez(&currDbsts);
	  }
	currDbsts = cloneString(dbsts[1]);
	sts = NULL;
	if (hashLookup(stsHash, dbsts[1]))
	  sts = hashMustFindVal(stsHash, dbsts[1]);
      }
    if (sts)
      {
	AllocVar(place);
	/* Check if this psl record is already present */
	if (!pslInList(place->psl, psl))
	  {
	    slAddHead(&place->psl, psl);
	    place->unali = calcUnali(sts, psl);
	    place->sizeDiff = calcSizeDiff(sts, psl);
	    place->badBits = calcBadBits(place);
	    if (place->sizeDiff < (200 - (place->badBits * 50)))
	      slAddHead(&sts->place, place);
	    else
	      placeFree(&place);
	  }
      }
    }
 if (sts != NULL)
   filterPrimersAndWrite(of, sts);
}
Exemplo n.º 13
0
boolean gffHasGtfGroup(char *line)
/* Return TRUE if line has a GTF group field */
{
char *words[10];
char *dupe = cloneString(line);
int wordCt = chopTabs(dupe, words);
boolean isGtf = FALSE;
if (wordCt >= 9) 
    if (isGtfGroup(words[8]))
        isGtf = TRUE;
freeMem(dupe);
return isGtf;
}
Exemplo n.º 14
0
char* restField(struct bigBedInterval *bb, int fieldIdx) 
/* return a given field from the bb->rest field, NULL on error */
{
if (fieldIdx==0) // we don't return the first(=name) field of bigBed
    return NULL;
char *rest = cloneString(bb->rest);
char *restFields[256];
int restCount = chopTabs(rest, restFields);
char *field = NULL;
if (fieldIdx < restCount)
    field = cloneString(restFields[fieldIdx]);
freeMem(rest);
return field;
}
struct mgcStatusTbl *mgcStatusTblLoad(char *mgcStatusTab, unsigned opts)
/* Load a mgcStatusTbl object from a tab file */
{
struct mgcStatusTbl *mst = mgcStatusTblNew(opts);
struct lineFile *lf = lineFileOpen(mgcStatusTab, TRUE);
char *line;
char *row[MGCSTATUS_NUM_COLS];

while (lineFileNextReal(lf, &line))
    {
    int numCols = chopTabs(line, row);
    lineFileExpectAtLeast(lf, MGCSTATUS_MIN_NUM_COLS, numCols);
    loadRow(mst, lf, row, numCols);
    }
lineFileClose(&lf);
return mst;
}
void loadOneBed(struct lineFile *lf, int bedSize, struct bedStub **pList)
/* Load one bed file.  Make sure all lines have bedSize fields.
 * Put results in *pList. */
{
char *words[64], *line, *dupe;
int wordCount;
struct bedStub *bed;

verbose(1, "Reading %s\n", lf->fileName);
while (lineFileNextReal(lf, &line))
    {
    if (hasBin)
	nextWord(&line);
    dupe = cloneString(line);
    if (strictTab)
	wordCount = chopTabs(line, words);
    else
	wordCount = chopLine(line, words);
    /* ignore empty lines	*/
    if (0 == wordCount)
	continue;
    lineFileExpectWords(lf, bedSize, wordCount);
    AllocVar(bed);
    bed->chrom = cloneString(words[0]);
    bed->chromStart = lineFileNeedNum(lf, words, 1);
    bed->chromEnd = lineFileNeedNum(lf, words, 2);
    if (! noStrict)
	{
	if (bed->chromEnd < 1)
	    errAbort("ERROR: line %d:'%s'\nchromEnd is less than 1\n",
		     lf->lineIx, dupe);
	if (bed->chromStart == bed->chromEnd && !allowStartEqualEnd)
	    errAbort("ERROR: line %d:'%s'\nchromStart == chromEnd (%d) (zero-length item)\n"
		     "Use -allowStartEqualEnd if that is legit (e.g. for insertion point).\n",
		     lf->lineIx, dupe, bed->chromStart);
	if (bed->chromStart > bed->chromEnd)
	    errAbort("ERROR: line %d:'%s'\nchromStart after chromEnd (%d > %d)\n",
		     lf->lineIx, dupe, bed->chromStart, bed->chromEnd);
	}
    bed->line = dupe;
    slAddHead(pList, bed);
    }
}
Exemplo n.º 17
0
void verifyJoinedFormat(char *s)
/* Verify that s consists of lines with two tab-separated fields,
 * and that the second field has some n/a and some comma-separated lists. */
{
char *e;
int lineIx = 0;
boolean gotCommas = FALSE, gotNa = FALSE;

while (s != NULL && s[0] != 0)
    {
    char *row[3];
    int fieldCount;
    ++lineIx;
    e = strchr(s, '\n');
    if (e != NULL)
       *e++ = 0;
    if (s[0] != '#')
	{
	fieldCount = chopTabs(s, row);
	if (fieldCount != 2)
	    {
	    qaStatusSoftError(tablesTestList->status, 
		    "Got %d fields line %d of  joined result, expected 2", 
		    fieldCount, lineIx);
	    break;
	    }
	if (sameString(row[1], "n/a"))
	     gotNa = TRUE;
	if (countChars(s, ',') >= 2)
	     gotCommas = TRUE;
	}
    s = e;
    }
if (!gotCommas)
    qaStatusSoftError(tablesTestList->status, 
           "Expected some rows in join to have comma separated lists.");
if (!gotNa)
    qaStatusSoftError(tablesTestList->status, 
           "Expected some rows in joint to have n/a.");
}
Exemplo n.º 18
0
void motifFinder(char *database, char *name, int fileCount, char *files[])
/* motifFinder - find largest scoring motif in bed items. */
{
struct sqlConnection *conn = sqlConnect(database);
int fileNum;
char where[256];
struct chromInfo *ci  = createChromInfoList(NULL, database);
sqlSafefFrag(where, sizeof(where), "name = '%s'", name);
struct dnaMotif *motif = dnaMotifLoadWhere(conn, motifTable, where);
if(markovTable != NULL)
    dnaMotifMakeLog2(motif);
if(motif == NULL)
    errAbort("couldn't find motif '%s'", name);
for (fileNum = 0; fileNum < fileCount; fileNum++)
    {
    char *words[64], *line;
    char **row;
    struct lineFile *lf = lineFileOpen(files[fileNum], TRUE);
    while (lineFileNextReal(lf, &line))
        {
	int dnaLength, i, j, rowOffset, length, wordCount = chopTabs(line, words);
        unsigned chromSize;
        boolean markovFound = FALSE;
        double mark0[5];
        double mark2[5][5][5];
        struct dnaSeq *seq = NULL;
        char *dupe = NULL;
        if (0 == wordCount)
            continue;
        lineFileExpectAtLeast(lf, 3, wordCount);
        dupe = cloneString(line);
        char *chrom = words[0];
        int chromStart = lineFileNeedNum(lf, words, 1);
        if(markovTable != NULL)
            chromStart = max(2, chromStart);
        unsigned chromEnd = lineFileNeedNum(lf, words, 2);
        if (chromEnd < 1)
            errAbort("ERROR: line %d:'%s'\nchromEnd is less than 1\n",
		     lf->lineIx, dupe);
        if (chromStart > chromEnd)
            errAbort("ERROR: line %d:'%s'\nchromStart after chromEnd (%d > %d)\n",
                     lf->lineIx, dupe, chromStart, chromEnd);
        length = chromEnd - chromStart;
        chromSize = getChromSize(ci, chrom);
        if(markovTable == NULL)
            {
            dnaLength = length;
            seq = hDnaFromSeq(database, chrom, chromStart, chromEnd, dnaUpper);
            if(uniformBackground)
                {
                int i;
                mark0[0] = 1;
                for(i = 1; i <= 4; i++)
                    mark0[i] = 0.25;
                }
            else
                {
                dnaMark0(seq, mark0, NULL);
                }
            }
        else
            {
            dnaLength = length + 4;
            if(chromStart - 2 + dnaLength > chromSize)
                // can't do analysis for potential peak hanging off the end of the chrom
                continue;
            seq = hDnaFromSeq(database, chrom, chromStart - 2, chromEnd + 2, dnaUpper);
            struct sqlResult *sr = hRangeQuery(conn, markovTable, chrom, chromStart,
                                               chromStart + 1, NULL, &rowOffset);
            if((row = sqlNextRow(sr)) != NULL)
                {
                dnaMark2Deserialize(row[rowOffset + 3], mark2);
                dnaMarkMakeLog2(mark2);
                markovFound = TRUE;
                }
            else
                errAbort("markov table '%s' is missing; non-markov analysis is current not supported", markovTable);
            sqlFreeResult(&sr);
            }
        struct bed6FloatScore *hits = NULL;
        for (i = 0; i < 2; i++)
            {
            double mark0Copy[5];
            char strand = i == 0 ? '+' : '-';
            for (j = 0; j <= 4; j++)
                mark0Copy[j] = mark0[j];
            if(strand == '-')
                {
                // reverse markov table too!
                double tmp;
                reverseComplement(seq->dna, dnaLength);
                tmp = mark0Copy[1];
                mark0Copy[1] = mark0Copy[3];
                mark0Copy[3] = tmp;
                tmp = mark0Copy[2];
                mark0Copy[2] = mark0Copy[4];
                mark0Copy[4] = tmp;
                }
            for (j = 0; j < length - motif->columnCount + 1; j++)
                // tricky b/c if(markovFound) then seq includes the two bytes on either side of actual sequence.
                {
                double score;
                if(markovFound)
                    score = dnaMotifBitScoreWithMarkovBg(motif, seq->dna + j, mark2);
                else
                    score = dnaMotifBitScoreWithMark0Bg(motif, seq->dna + j, mark0Copy);
                if(score >= minScoreCutoff)
                    {
                    int start;
                    if(strand == '-')
                        start = (chromEnd - j) - motif->columnCount;
                    else
                        start = chromStart + j;
                    struct bed6FloatScore *hit = NULL;

                    // Watch out for overlapping hits (on either strand; yes, I've seen that happen);
                    // we report only the highest scoring hit in this case.
                    // O(n^2) where n == number of motifs in a peak, but I expect n to be almost always very small.
                    if(!originalCoordinates)
                        {
                        for (hit = hits; hit != NULL; hit = hit->next)
                            {
                            if(hit->chromEnd > start && hit->chromStart <= (start + motif->columnCount))
                                {
                                verbose(3, "found overlapping hits: %d-%d overlaps with %d-%d\n", start, start + motif->columnCount, hit->chromStart, hit->chromEnd);
                                break;
                                }
                            }
                        }
                    if(hit == NULL || hit->score < score)
                        {
                        if(hit == NULL)
                            {
                            AllocVar(hit);
                            slAddHead(&hits, hit);
                            hit->chrom = cloneString(chrom);
                            }
                        hit->chromStart = originalCoordinates ? chromStart : start;
                        hit->chromEnd = originalCoordinates ? chromEnd : start + motif->columnCount;
                        hit->score = score;
                        hit->strand[0] = strand;
                        }
                    }
                verbose(3, "j: %d; score: %.2f\n", j, score);
                }
            }
        slSort(&hits, bed6FloatCmpDesc);
        int count;
        float currentPrior = prior;
        for(count = 1; hits != NULL; count++, hits = hits->next)
            {
            if(topOnly && count > topOnly)
                break;
            // Use a progressively weaker prior for hits with lower scores
            verbose(3, "count: %d; score: %.2f; prior: %.2f; log2(prior / (1 - prior)): %.2f\n", count, hits->score, currentPrior, log2(currentPrior / (1 - currentPrior)));
            if(hits->score >= minScoreCutoff - log2(currentPrior / (1 - currentPrior)))
                {
                printf("%s\t%d\t%d\t%s\t%.2f\t%c\n", chrom, originalCoordinates ? chromStart : hits->chromStart, 
                       originalCoordinates ? chromEnd : hits->chromStart + motif->columnCount, name, hits->score, hits->strand[0]);
                currentPrior = count == 1 ? priorBackoff : currentPrior * priorBackoff;
                if(count > 2)
                    verbose(3, "hit for count: %d at %s:%d-%d\n", count, chrom, hits->chromStart, hits->chromStart + motif->columnCount);
                }
            else
                break;
            }
        freeDnaSeq(&seq);
        freeMem(dupe);
        }
    lineFileClose(&lf);
    }
sqlDisconnect(&conn);
}
Exemplo n.º 19
0
static void bigBedClick(char *fileName, struct trackDb *tdb,
                     char *item, int start, int end, int bedSize)
/* Handle click in generic bigBed track. */
{
boolean showUrl = FALSE;
char *chrom = cartString(cart, "c");

/* Open BigWig file and get interval list. */
struct bbiFile *bbi = bigBedFileOpen(fileName);
struct lm *lm = lmInit(0);
int ivStart = start, ivEnd = end;
if (start == end)
    {
    // item is an insertion; expand the search range from 0 bases to 2 so we catch it:
    ivStart = max(0, start-1);
    ivEnd++;
    }
struct bigBedInterval *bbList = bigBedIntervalQuery(bbi, chrom, ivStart, ivEnd, 0, lm);

/* Get bedSize if it's not already defined. */
if (bedSize == 0)
    {
    bedSize = bbi->definedFieldCount;
    showUrl = TRUE;
    }


char *scoreFilter = cartOrTdbString(cart, tdb, "scoreFilter", NULL);
int minScore = 0;
if (scoreFilter)
    minScore = atoi(scoreFilter);

/* Find particular item in list - matching start, and item if possible. */
boolean found = FALSE;
boolean firstTime = TRUE;
struct bigBedInterval *bb;
for (bb = bbList; bb != NULL; bb = bb->next)
    {
    if (!(bb->start == start && bb->end == end))
	continue;
    if (bedSize > 3)
	{
	char *name = cloneFirstWordByTab(bb->rest);
	boolean match = sameString(name, item);
	freez(&name);
	if (!match)
	    continue;
	}

    found = TRUE;
    if (firstTime)
	printf("<BR>\n");
    int seq1Seq2Fields = 0;
    // check for seq1 and seq2 in columns 7+8 (eg, pairedTagAlign)
    boolean seq1Seq2 = sameOk(trackDbSetting(tdb, BASE_COLOR_USE_SEQUENCE), "seq1Seq2");
    if (seq1Seq2 && bedSize == 6)
	seq1Seq2Fields = 2;
    char *fields[bedSize+seq1Seq2Fields];
    char startBuf[16], endBuf[16];
    char *rest = cloneString(bb->rest);
    int bbFieldCount = bigBedIntervalToRow(bb, chrom, startBuf, endBuf, fields,
                                           bedSize+seq1Seq2Fields);
    if (bbFieldCount != bedSize+seq1Seq2Fields)
        {
        errAbort("Disagreement between trackDb field count (%d) and %s fieldCount (%d)",
		bedSize, fileName, bbFieldCount);
	}
    struct bed *bed = bedLoadN(fields, bedSize);
    if (bedSize >= 6 && scoreFilter && bed->score < minScore)
	continue;
    if (showUrl && (bedSize >= 4))
        printCustomUrl(tdb, item, TRUE);
    bedPrintPos(bed, bedSize, tdb);

    // display seq1 and seq2
    if (seq1Seq2 && bedSize+seq1Seq2Fields == 8)
        printf("<table><tr><th>Sequence 1</th><th>Sequence 2</th></tr>"
	       "<tr><td> %s </td><td> %s </td></tr></table>", fields[6], fields[7]);
    else if (isNotEmpty(rest))
	{
	char *restFields[256];
	int restCount = chopTabs(rest, restFields);
	int restBedFields = bedSize - 3;
	if (restCount > restBedFields)
	    {
            if (0 == extraFieldsPrint(tdb,NULL,restFields + restBedFields,restCount - restBedFields))
                {
                int i;
                char label[20];
                safef(label, sizeof(label), "nonBedFieldsLabel");
                printf("<B>%s&nbsp;</B>",
                       trackDbSettingOrDefault(tdb, label, "Non-BED fields:"));
                for (i = restBedFields;  i < restCount;  i++)
                    printf("%s%s", (i > 0 ? "\t" : ""), restFields[i]);
                printf("<BR>\n");
                }
	    }
	}
    if (isCustomTrack(tdb->track))
	{
	time_t timep = bbiUpdateTime(bbi);
	printBbiUpdateTime(&timep);
	}

    }

if (!found)
    {
    printf("No item %s starting at %d\n", emptyForNull(item), start);
    }

lmCleanup(&lm);
bbiFileClose(&bbi);
}
Exemplo n.º 20
0
void processRefSeq(char *database, char *faFile, char *raFile, char *pslFile, char *loc2refFile, 
	char *pepFile, char *mim2locFile)
/* hgRefSeqMrna - Load refSeq mRNA alignments and other info into 
 * refSeqGene table. */
{
struct lineFile *lf;
struct hash *raHash, *rsiHash = newHash(0);
struct hash *loc2mimHash = newHash(0);
struct refSeqInfo *rsiList = NULL, *rsi;
char *s, *line, *row[5];
int wordCount, dotMod = 0;
int noLocCount = 0;
int rsiCount = 0;
int noProtCount = 0;
struct psl *psl;
struct sqlConnection *conn = hgStartUpdate(database);
struct hash *productHash = loadNameTable(conn, "productName", 16);
struct hash *geneHash = loadNameTable(conn, "geneName", 16);
char *kgName = "refGene";

FILE *kgTab = hgCreateTabFile(".", kgName);
FILE *productTab = hgCreateTabFile(".", "productName");
FILE *geneTab = hgCreateTabFile(".", "geneName");
FILE *refLinkTab = hgCreateTabFile(".", "refLink");
FILE *refPepTab = hgCreateTabFile(".", "refPep");
FILE *refMrnaTab = hgCreateTabFile(".", "refMrna");

struct exon *exonList = NULL, *exon;
char *answer;
char cond_str[200];

/* Make refLink and other tables table if they don't exist already. */
sqlMaybeMakeTable(conn, "refLink", refLinkTableDef);
sqlUpdate(conn, "NOSQLINJ delete from refLink");
sqlMaybeMakeTable(conn, "refGene", refGeneTableDef);
sqlUpdate(conn, "NOSQLINJ delete from refGene");
sqlMaybeMakeTable(conn, "refPep", refPepTableDef);
sqlUpdate(conn, "NOSQLINJ delete from refPep");
sqlMaybeMakeTable(conn, "refMrna", refMrnaTableDef);
sqlUpdate(conn, "NOSQLINJ delete from refMrna");

/* Scan through locus link to omim ID file and put in hash. */
    {
    char *row[2];

    printf("Scanning %s\n", mim2locFile);
    lf = lineFileOpen(mim2locFile, TRUE);
    while (lineFileRow(lf, row))
	{
	hashAdd(loc2mimHash, row[1], intToPt(atoi(row[0])));
	}
    lineFileClose(&lf);
    }

/* Scan through .ra file and make up start of refSeqInfo
 * objects in hash and list. */
printf("Scanning %s\n", raFile);
lf = lineFileOpen(raFile, TRUE);
while ((raHash = hashNextRa(lf)) != NULL)
    {
    if (clDots > 0 && ++dotMod == clDots )
        {
	dotMod = 0;
	dotOut();
	}
    AllocVar(rsi);
    slAddHead(&rsiList, rsi);
    if ((s = hashFindVal(raHash, "acc")) == NULL)
        errAbort("No acc near line %d of %s", lf->lineIx, lf->fileName);
    rsi->mrnaAcc = cloneString(s);
    if ((s = hashFindVal(raHash, "siz")) == NULL)
        errAbort("No siz near line %d of %s", lf->lineIx, lf->fileName);
    rsi->size = atoi(s);
    if ((s = hashFindVal(raHash, "gen")) != NULL)
	rsi->geneName = cloneString(s);
    //!!!else
      //!!!  warn("No gene name for %s", rsi->mrnaAcc);
    if ((s = hashFindVal(raHash, "cds")) != NULL)
        parseCds(s, 0, rsi->size, &rsi->cdsStart, &rsi->cdsEnd);
    else
        rsi->cdsEnd = rsi->size;
    if ((s = hashFindVal(raHash, "ngi")) != NULL)
        rsi->ngi = atoi(s);

    rsi->geneNameId = putInNameTable(geneHash, geneTab, rsi->geneName);
    s = hashFindVal(raHash, "pro");
    if (s != NULL)
        rsi->productName = cloneString(s);
    rsi->productNameId = putInNameTable(productHash, productTab, s);
    hashAdd(rsiHash, rsi->mrnaAcc, rsi);

    freeHashAndVals(&raHash);
    }
lineFileClose(&lf);
if (clDots) printf("\n");

/* Scan through loc2ref filling in some gaps in rsi. */
printf("Scanning %s\n", loc2refFile);
lf = lineFileOpen(loc2refFile, TRUE);
while (lineFileNext(lf, &line, NULL))
    {
    char *mrnaAcc;

    if (line[0] == '#')
        continue;
    wordCount = chopTabs(line, row);
    if (wordCount < 5)
        errAbort("Expecting at least 5 tab-separated words line %d of %s",
		lf->lineIx, lf->fileName);
    mrnaAcc = row[1];
    mrnaAcc = accWithoutSuffix(mrnaAcc);

    if (mrnaAcc[2] != '_')
        warn("%s is and odd name %d of %s", 
		mrnaAcc, lf->lineIx, lf->fileName);
    if ((rsi = hashFindVal(rsiHash, mrnaAcc)) != NULL)
        {
	rsi->locusLinkId = lineFileNeedNum(lf, row, 0);
	rsi->omimId = ptToInt(hashFindVal(loc2mimHash, row[0]));
	rsi->proteinAcc = cloneString(accWithoutSuffix(row[4]));
	}
    }
lineFileClose(&lf);

/* Report how many seem to be missing from loc2ref file. 
 * Write out knownInfo file. */
printf("Writing %s\n", "refLink.tab");
for (rsi = rsiList; rsi != NULL; rsi = rsi->next)
    {
    ++rsiCount;
    if (rsi->locusLinkId == 0)
        ++noLocCount;
    if (rsi->proteinAcc == NULL)
        ++noProtCount;
    fprintf(refLinkTab, "%s\t%s\t%s\t%s\t%u\t%u\t%u\t%u\n",
	emptyForNull(rsi->geneName), 
	emptyForNull(rsi->productName),
    	emptyForNull(rsi->mrnaAcc), 
	emptyForNull(rsi->proteinAcc),
	rsi->geneNameId, rsi->productNameId, 
	rsi->locusLinkId, rsi->omimId);
    }
if (noLocCount) 
    printf("Missing locusLinkIds for %d of %d\n", noLocCount, rsiCount);
if (noProtCount)
    printf("Missing protein accessions for %d of %d\n", noProtCount, rsiCount);

/* Process alignments and write them out as genes. */
lf = pslFileOpen(pslFile);
dotMod = 0;
while ((psl = pslNext(lf)) != NULL)
  {
  if (hashFindVal(rsiHash, psl->qName) != NULL)
    {
    if (clDots > 0 && ++dotMod == clDots )
        {
	dotMod = 0;
	dotOut();
	}
   
    sqlSafefFrag(cond_str, sizeof cond_str, "extAC='%s'", psl->qName);
    answer = sqlGetField(proteinDB, "spXref2", "displayID", cond_str);
	       
    if (answer == NULL)
	{
	fprintf(stderr, "%s NOT FOUND.\n", psl->qName);
   	fflush(stderr);
	}

    if (answer != NULL)
    	{	
        struct genePred *gp = NULL;
    	exonList = pslToExonList(psl);
    	fprintf(kgTab, "%s\t%s\t%c\t%d\t%d\t",
	psl->qName, psl->tName, psl->strand[0], psl->tStart, psl->tEnd);
    	rsi = hashMustFindVal(rsiHash, psl->qName);

        gp = genePredFromPsl(psl, rsi->cdsStart, rsi->cdsEnd, genePredStdInsertMergeSize);
        if (!gp)
            errAbort("Cannot convert psl (%s) to genePred.\n", psl->qName);

    	fprintf(kgTab, "%d\t%d\t", gp->cdsStart, gp->cdsEnd);
    	fprintf(kgTab, "%d\t", slCount(exonList));
    
    	fflush(kgTab);
     
    	for (exon = exonList; exon != NULL; exon = exon->next)
        fprintf(kgTab, "%d,", exon->start);
    	fprintf(kgTab, "\t");
    
        for (exon = exonList; exon != NULL; exon = exon->next)
        	fprintf(kgTab, "%d,", exon->end);
    	fprintf(kgTab, "\n");
    	slFreeList(&exonList);
    	}
    }
  else
    {
    fprintf(stderr, "%s found in psl, but not in .fa or .ra data files.\n", psl->qName);
    fflush(stderr);
    }
  }

if (clDots) printf("\n");

if (!clTest)
    {
    writeSeqTable(pepFile, refPepTab, FALSE, TRUE);
    writeSeqTable(faFile, refMrnaTab, FALSE, FALSE);
    }

carefulClose(&kgTab);
carefulClose(&productTab);
carefulClose(&geneTab);
carefulClose(&refLinkTab);
carefulClose(&refPepTab);
carefulClose(&refMrnaTab);

if (!clTest)
    {
    printf("Loading database with %s\n", kgName);
    fflush(stdout);
    
    hgLoadTabFile(conn, ".", kgName, NULL);

    printf("Loading database with %s\n", "productName");
    fflush(stdout);
    hgLoadTabFile(conn, ".", "productName", NULL);
    
    printf("Loading database with %s\n", "geneName");
    fflush(stdout);
    hgLoadTabFile(conn, ".", "geneName", NULL);
    
    printf("Loading database with %s\n", "refLink");
    fflush(stdout);
    hgLoadTabFile(conn, ".", "refLink", NULL);
    
    printf("Loading database with %s\n", "refPep");
    fflush(stdout);
    hgLoadTabFile(conn, ".", "refPep", NULL);
    
    printf("Loading database with %s\n", "refMrna");
    fflush(stdout);
    hgLoadTabFile(conn, ".", "refMrna", NULL);
    }
}
Exemplo n.º 21
0
static void writeBlocks(struct bbiChromUsage *usageList, struct lineFile *lf, struct asObject *as, 
	int itemsPerSlot, struct bbiBoundsArray *bounds, 
	int sectionCount, boolean doCompress, FILE *f, 
	int resTryCount, int resScales[], int resSizes[], 
	struct bbExIndexMaker *eim,  int bedCount,
	bits16 fieldCount, bits32 *retMaxBlockSize)
/* Read through lf, writing it in f.  Save starting points of blocks (every itemsPerSlot)
 * to boundsArray */
{
int maxBlockSize = 0;
struct bbiChromUsage *usage = usageList;
char *line, *row[fieldCount+1];
int lastField = fieldCount-1;
int itemIx = 0, sectionIx = 0;
bits64 blockStartOffset = 0;
int startPos = 0, endPos = 0;
bits32 chromId = 0;
struct dyString *stream = dyStringNew(0);

/* Will keep track of some things that help us determine how much to reduce. */
bits32 resEnds[resTryCount];
int resTry;
for (resTry = 0; resTry < resTryCount; ++resTry)
    resEnds[resTry] = 0;
boolean atEnd = FALSE, sameChrom = FALSE;
bits32 start = 0, end = 0;
char *chrom = NULL;
struct bed *bed;
AllocVar(bed);

/* Help keep track of which beds are in current chunk so as to write out
 * namedChunks to eim if need be. */
long sectionStartIx = 0, sectionEndIx = 0;

for (;;)
    {
    /* Get next line of input if any. */
    if (lineFileNextReal(lf, &line))
	{
	/* Chop up line and make sure the word count is right. */
	int wordCount;
	if (tabSep)
	    wordCount = chopTabs(line, row);
	else
	    wordCount = chopLine(line, row);
	lineFileExpectWords(lf, fieldCount, wordCount);

	loadAndValidateBed(row, bedN, fieldCount, lf, bed, as, FALSE);

	chrom = bed->chrom;
	start = bed->chromStart;
	end = bed->chromEnd;

	sameChrom = sameString(chrom, usage->name);
	}
    else  /* No next line */
	{
	atEnd = TRUE;
	}


    /* Check conditions that would end block and save block info and advance to next if need be. */
    if (atEnd || !sameChrom || itemIx >= itemsPerSlot)
        {
	/* Save stream to file, compressing if need be. */
	if (stream->stringSize > maxBlockSize)
	    maxBlockSize = stream->stringSize;
	if (doCompress)
            {
	    size_t maxCompSize = zCompBufSize(stream->stringSize);

            // keep around an area of scratch memory
            static int compBufSize = 0;
            static char *compBuf = NULL;
            // check to see if buffer needed for compression is big enough
            if (compBufSize < maxCompSize)
                {
                // free up the old not-big-enough piece
                freez(&compBuf); // freez knows bout NULL

                // get new scratch area
                compBufSize = maxCompSize;
                compBuf = needLargeMem(compBufSize);
                }

	    int compSize = zCompress(stream->string, stream->stringSize, compBuf, maxCompSize);
	    mustWrite(f, compBuf, compSize);
	    }
	else
	    mustWrite(f, stream->string, stream->stringSize);
	dyStringClear(stream);

	/* Save block offset and size for all named chunks in this section. */
	if (eim != NULL)
	    {
	    bits64 blockEndOffset = ftell(f);
	    bbExIndexMakerAddOffsetSize(eim, blockStartOffset, blockEndOffset-blockStartOffset,
		sectionStartIx, sectionEndIx);
	    sectionStartIx = sectionEndIx;
	    }

	/* Save info on existing block. */
	struct bbiBoundsArray *b = &bounds[sectionIx];
	b->offset = blockStartOffset;
	b->range.chromIx = chromId;
	b->range.start = startPos;
	b->range.end = endPos;
	++sectionIx;
	itemIx = 0;

	if (atEnd)
	    break;
	}

    /* Advance to next chromosome if need be and get chromosome id. */
    if (!sameChrom)
        {
	usage = usage->next;
	assert(usage != NULL);
	assert(sameString(chrom, usage->name));
	for (resTry = 0; resTry < resTryCount; ++resTry)
	    resEnds[resTry] = 0;
	}
    chromId = usage->id;

    /* At start of block we save a lot of info. */
    if (itemIx == 0)
        {
	blockStartOffset = ftell(f);
	startPos = start;
	endPos = end;
	}
    /* Otherwise just update end. */
        {
	if (endPos < end)
	    endPos = end;
	/* No need to update startPos since list is sorted. */
	}

    /* Save name into namedOffset if need be. */
    if (eim != NULL)
	{
	bbExIndexMakerAddKeysFromRow(eim, row, sectionEndIx);
	sectionEndIx += 1;
	}

    /* Write out data. */
    dyStringWriteOne(stream, chromId);
    dyStringWriteOne(stream, start);
    dyStringWriteOne(stream, end);
    if (fieldCount > 3)
        {
	int i;
	/* Write 3rd through next to last field and a tab separator. */
	for (i=3; i<lastField; ++i)
	    {
	    char *s = row[i];
	    dyStringAppend(stream, s);
	    dyStringAppendC(stream, '\t');
	    }
	/* Write last field and terminal zero */
	char *s = row[lastField];
	dyStringAppend(stream, s);
	}
    dyStringAppendC(stream, 0);

    itemIx += 1;

    /* Do zoom counting. */
    for (resTry = 0; resTry < resTryCount; ++resTry)
        {
	bits32 resEnd = resEnds[resTry];
	if (start >= resEnd)
	    {
	    resSizes[resTry] += 1;
	    resEnds[resTry] = resEnd = start + resScales[resTry];
	    }
	while (end > resEnd)
	    {
	    resSizes[resTry] += 1;
	    resEnds[resTry] = resEnd = resEnd + resScales[resTry];
	    }
	}
    }
assert(sectionIx == sectionCount);
freez(&bed);
*retMaxBlockSize = maxBlockSize;
}
void liftTabbed(char *destFile, struct hash *liftHash, 
   int sourceCount, char *sources[],
   int ctgWord, int startWord, int endWord, 
   boolean doubleLift, int ctgWord2, int startWord2, int endWord2,
   int startOffset, int strandWord)
/* Generic lift a tab-separated file with contig, start, and end fields.
 * If doubleLift is TRUE, also lift second set of coordinated.*/
{
int minFieldCount = max3(startWord, endWord, ctgWord) + 1;
int wordCount, lineSize;
char *words[128], *line, *source;
struct lineFile *lf;
FILE *f = mustOpen(destFile, "w");
int i,j;
int start = 0;
int end = 0;
int start2 = 0;
int end2 = 0;
char *contig, *chrom = NULL, *chrom2 = NULL;
struct liftSpec *spec;
static char buf[1024*16];
char *s;
int len;
struct bedInfo *biList = NULL, *bi;
boolean anyHits = FALSE;

if (doubleLift)
   {
   int min2 = max3(ctgWord2, startWord2, endWord2);
   minFieldCount = max(minFieldCount, min2);
   }
for (i=0; i<sourceCount; ++i)
    {
    source = sources[i];
    lf = lineFileOpen(source, TRUE);
    verbose(1, "Lifting %s\n", source);
    while (lineFileNext(lf, &line, &lineSize))
	{
	if (line[0] == '#')
	    continue;
	wordCount = chopTabs(line, words);
	if (wordCount == 0)
	    continue;
	if (wordCount < minFieldCount)
	   errAbort("Expecting at least %d words line %d of %s", 
	   	minFieldCount, lf->lineIx, lf->fileName); 
	contig = words[ctgWord];
	contig = rmChromPrefix(contig);
	if (startWord >= 0)
	    start = lineFileNeedNum(lf, words, startWord);
	if (endWord >= 0)
	    end = lineFileNeedNum(lf, words, endWord);
	spec = findLift(liftHash, contig, lf);
	if (spec == NULL)
	    {
	    if (how == carryMissing)
		chrom = cloneString(contig);
	    else
		continue;
	    }
	else
	    {
	    chrom = spec->newName;
	    if (spec->strand == '-')
		{
		int s = start - startOffset,  e = end;
		start = spec->oldSize - e + startOffset;
		end = spec->oldSize - s;
		if (strandWord >= 0 && strandWord < wordCount)
		    {
		    char strand = words[strandWord][0];
		    if (strand == '+')
		        words[strandWord] = "-";
		    else if (strand == '-')
		        words[strandWord] = "+";
		    }
		}
	    start += spec->offset;
	    end += spec->offset;
	    }
	if (doubleLift)
	    {
	    contig = words[ctgWord2];
	    start2 = lineFileNeedNum(lf, words, startWord2);
	    end2 = lineFileNeedNum(lf, words, endWord2);
	    spec = findLift(liftHash, contig, lf);
	    if (spec == NULL)
		{
		if (how == carryMissing)
		    chrom2 = cloneString(contig);
		else
		    errAbort("Couldn't find second contig in lift file at line %d of %s\n", lf->lineIx, lf->fileName);
		}
	    else
		{
		cantHandleSpecRevStrand(spec);
		chrom2 = spec->newName;
		start2 += spec->offset;
		end2 += spec->offset;
		}
	    }
	anyHits = TRUE;
	s = buf;
	for (j=0; j<wordCount; ++j)
	    {
	    if (s + 128 >= buf + sizeof(buf))
	        errAbort("Line %d too long in %s", lf->lineIx, lf->fileName);
	    if (j != 0)
		*s++ = '\t';
	    if (j == ctgWord)
		s += sprintf(s, "%s", chrom);
	    else if (j == startWord)
	        s += sprintf(s, "%d", start);
	    else if (j == endWord)
	        s += sprintf(s, "%d", end);
	    else if (doubleLift && j == ctgWord2)
		s += sprintf(s, "%s", chrom2);
	    else if (doubleLift && j == startWord2)
	        s += sprintf(s, "%d", start2);
	    else if (doubleLift && j == endWord2)
	        s += sprintf(s, "%d", end2);
	    else
	        s += sprintf(s, "%s", words[j]);
	    }
	*s = 0;
        if (nosort)
            {
            fprintf(f, "%s\n", buf);
            }
        else
            {
            len = s-buf;
            bi = needMem(sizeof(*bi) + len);
            bi->chrom = chrom;
            bi->start = start;
            bi->end = end;
            memcpy(bi->line, buf, len);
            slAddHead(&biList, bi);
            }
	}
    lineFileClose(&lf);
    if (dots)
        verbose(1, "\n");
    }
if (!nosort)
    {
    slSort(&biList, bedInfoCmp);
    for (bi = biList; bi != NULL; bi = bi->next)
        {
        fprintf(f, "%s\n", bi->line);
        }
    }
if (ferror(f))
    errAbort("error writing %s", destFile);
fclose(f);
if (!anyHits)
   errAbort("No lines lifted!");
}
Exemplo n.º 23
0
void loadOneBed(struct lineFile *lf, int bedSize, struct bedStub **pList)
/* Load one bed file.  Make sure all lines have the correct number of fields.
 * Put results in *pList. */
{
char *words[64], *line, *dupe;
int wordCount;
struct bedStub *bed;
struct asObject *asObj = getAsObj(bedSize);
int fieldCount = getFieldCount(bedSize, asObj);
struct bed *validateBed;
AllocVar(validateBed);

verbose(1, "Reading %s\n", lf->fileName);
while (lineFileNextReal(lf, &line))
    {
    if (hasBin)
	nextWord(&line);
    dupe = cloneString(line);
    if (strictTab)
	wordCount = chopTabs(line, words);
    else
	wordCount = chopLine(line, words);
    /* ignore empty lines	*/
    if (0 == wordCount)
	continue;
    lineFileExpectWords(lf, fieldCount, wordCount);

    if (type)  
        // TODO also, may need to add a flag to the validateBed() interface to support -allowNegativeScores when not isCt
        //  although can probably get away without it since usually -allowNegativeScores is used by ct which has already verified it.
        //  thus -allowNegativeScores is unlikely to be used with -type.
	{
	loadAndValidateBed(words, typeBedN, fieldCount, lf, validateBed, asObj, FALSE);
	checkChromNameAndSize(lf, validateBed->chrom, validateBed->chromEnd);
	}

    AllocVar(bed);
    bed->chrom = cloneString(words[0]);
    bed->chromStart = lineFileNeedNum(lf, words, 1);
    bed->chromEnd = lineFileNeedNum(lf, words, 2);
    if (! noStrict)
	{
	if ((bed->chromEnd < 1) && !allowStartEqualEnd)
	    errAbort("ERROR: line %d:'%s'\nchromEnd is less than 1\n",
		     lf->lineIx, dupe);
	if (bed->chromStart == bed->chromEnd && !allowStartEqualEnd)
	    errAbort("ERROR: line %d:'%s'\nchromStart == chromEnd (%d) (zero-length item)\n"
		     "Use -allowStartEqualEnd if that is legit (e.g. for insertion point).\n",
		     lf->lineIx, dupe, bed->chromStart);
	if (bed->chromStart > bed->chromEnd)
	    errAbort("ERROR: line %d:'%s'\nchromStart after chromEnd (%d > %d)\n",
		     lf->lineIx, dupe, bed->chromStart, bed->chromEnd);
	}
    bed->line = dupe;
    slAddHead(pList, bed);
    }

if (asObj)
    asObjectFreeList(&asObj);
freez(&validateBed);
}
Exemplo n.º 24
0
void writeBedTab(char *fileName, struct bedStub *bedList)
/* Write out bed list to tab-separated file. */
{
struct bedStub *bed;
FILE *f = mustOpen(fileName, "w");
char *words[64];
int i, wordCount;
for (bed = bedList; bed != NULL; bed = bed->next)
    {
    if (!noBin)
        {
        // allow for zero-length at start of seq [bin code can't handle 0-0]
        unsigned end = (bed->chromEnd > 0) ? bed->chromEnd : 1;
        if (fprintf(f, "%u\t", hFindBin(bed->chromStart, end)) <= 0)
	    writeFailed(fileName);
        }
    if (strictTab)
	wordCount = chopTabs(bed->line, words);
    else
	wordCount = chopLine(bed->line, words);
    for (i=0; i<wordCount; ++i)
        {
	/*	new definition for old "reserved" field, now itemRgb */
	/*	and when itemRgb, it is a comma separated string r,g,b */
	if (itemRgb && (i == 8))
	    {
	    char *comma;
	    /*  Allow comma separated list of rgb values here   */
	    comma = strchr(words[8], ',');
	    if (comma)
		{
		int itemRgb = 0;
		if (-1 == (itemRgb = bedParseRgb(words[8])))
		    errAbort("ERROR: expecting r,g,b specification, "
				"found: '%s'", words[8]);
		else
		    if (fprintf(f, "%d", itemRgb) <= 0)
			writeFailed(fileName);

		verbose(2, "itemRgb: %s, rgb: %#x\n", words[8], itemRgb);
		}
	    else
		if (fputs(words[i], f) == EOF)
		    writeFailed(fileName);
	    }
	else if ((dotIsNull > 0) && (dotIsNull == i) && sameString(words[i],"."))
        /* If the . was used to represent NULL, replace with -1 in the tables */
	    {
	    if (fputs("-1", f) == EOF)
		writeFailed(fileName);
	    }
	else
	    if (fputs(words[i], f) == EOF)
		writeFailed(fileName);

	if (i == wordCount-1)
	    {
	    if (fputc('\n', f) == EOF)
		writeFailed(fileName);
	    }
	else
	    if (fputc('\t', f) == EOF)
		writeFailed(fileName);
	}
    }
fclose(f);
}
Exemplo n.º 25
0
struct tagStorm *idfToStormTop(char *fileName)
/* Convert an idf.txt format file to a tagStorm with a single top-level stanza */
{
/* Create a tag storm with one as yet empty stanza */
struct tagStorm *storm = tagStormNew(fileName);
struct tagStanza *stanza = tagStanzaNew(storm, NULL);

/* Some stuff to help turn File_Data1, File_Data2, etc to a comma separated list */
char *additionalFilePrefix = "idf.Comment_AdditionalFile_Data";
struct dyString *additionalFileDy = dyStringNew(0);

/* There can be multiple secondary accession tags, so handle these too */
char *secondaryAccessionTag = "idf.Comment_SecondaryAccession";
struct dyString *secondaryAccessionDy = dyStringNew(0);


/* Parse lines from idf file into stanza */
struct lineFile *lf = lineFileOpen(fileName, TRUE);
char *line;
struct dyString *dyVal = dyStringNew(0);
while (lineFileNextReal(lf, &line))
    {
    /* Erase trailing tab... */
    eraseTrailingSpaces(line);

    /* Parse line into tab-separated array and make sure it's a reasonable size */
    char *row[256];
    int rowSize = chopTabs(line, row);
    if (rowSize == ArraySize(row))
        errAbort("Line %d of %s has too many fields", lf->lineIx, lf->fileName);
    if (rowSize < 2)
	continue;

    /* Convert first element to tagName */
    char tagName[256];
    aeFieldToNormalField("idf.", trimSpaces(row[0]), tagName, sizeof(tagName));

    /* Special case where we already are a comma separated list */
    if (sameString(tagName, "idf.Publication_Author_List"))
        {
	tagStanzaAppend(storm, stanza, tagName, row[1]);
	}
    else if (startsWith(additionalFilePrefix, tagName))
        {
	csvEscapeAndAppend(additionalFileDy, row[1]);
	}
    else if (sameString(secondaryAccessionTag, tagName))
        {
	csvEscapeAndAppend(secondaryAccessionDy, row[1]);
	}
    else
	{
	/* Convert rest of elements to possibly comma separated values */
	dyStringClear(dyVal);
	int i;
	for (i=1; i<rowSize; ++i)
	    csvEscapeAndAppend(dyVal, row[i]);
	tagStanzaAppend(storm, stanza, tagName, dyVal->string);
	}
    }
if (additionalFileDy->stringSize != 0)
     tagStanzaAppend(storm, stanza, additionalFilePrefix, additionalFileDy->string);
if (secondaryAccessionDy->stringSize != 0)
     tagStanzaAppend(storm, stanza, secondaryAccessionTag, secondaryAccessionDy->string);
dyStringFree(&secondaryAccessionDy);
dyStringFree(&additionalFileDy);
dyStringFree(&dyVal);
lineFileClose(&lf);
return storm;
}