Ejemplo n.º 1
0
void outputPartialTab(char *inTab, struct asObject *as, struct slPair *fieldList, char *outName)
/* Output columns in fieldList from inTab (described by as) into outName */
{
/* Open input and output. */
struct lineFile *lf = lineFileOpen(inTab, TRUE);
FILE *f = mustOpen(outName, "w");

/* Set up array for input fields with more than we expect for better error reporting. */
int oldFieldCount = slCount(as->columnList);
int newFieldCount = slCount(fieldList);
int allocFields = oldFieldCount+10;
char *words[allocFields];

/* Set up array for output fields that says where to find them in input. */
int *oldIx = makeNewToOldArray(as, fieldList);

/* Go through each line of input, outputting selected columns. */
int fieldCount;
while ((fieldCount = lineFileChopNextTab(lf, words, allocFields)) > 0)
    {
    lineFileExpectWords(lf, oldFieldCount, fieldCount);
    fprintf(f, "%s", words[oldIx[0]]);
    int i;
    for (i=1; i<newFieldCount; ++i)
	fprintf(f, "\t%s", words[oldIx[i]]);
    fprintf(f, "\n");
    }

/* Clean up and go home. */
freez(&oldIx);
carefulClose(&f);
lineFileClose(&lf);
}
void liftGenePredExt(char *destFile, struct hash *liftHash, int sourceCount, char *sources[])
/* Lift a genePred files. */
{
char *row[GENEPREDX_NUM_COLS];
struct lineFile* lf;
FILE* dest = mustOpen(destFile, "w");
int iSrc;
int colCount;

for (iSrc = 0; iSrc < sourceCount; iSrc++)
    {
    verbose(1, "Lifting %s\n", sources[iSrc]);
    lf = lineFileOpen(sources[iSrc], TRUE);
    while ((colCount = lineFileChopNextTab(lf, row, ArraySize(row))))
        {
        struct genePred* gp = genePredExtLoad(row, colCount);
        if (liftGenePredObj(liftHash, gp, lf))
            genePredTabOut(gp, dest);
        genePredFree(&gp);
        }
    lineFileClose(&lf);
    if (dots)
        verbose(1, "\n");
    }

carefulClose(&dest);
}
struct tenFields *parseTenFields(char *fileName)
/* Return list of partially parsed lines from GFF. */
{
struct lineFile *lf = lineFileOpen(fileName, TRUE);
struct tenFields *tfList = NULL, *tf;
char *words[11];
int wordCount, i;

while ((wordCount = lineFileChopNextTab(lf, words, ArraySize(words))) != 0)
    {
    if (wordCount != 10)
        errAbort("Expecting 10 tab separated fields got %d line %d of %s",
		wordCount, lf->lineIx, lf->fileName);
    AllocVar(tf);
    for (i=0; i<10; ++i)
	{
	char *s = words[i];
	if (i == 0 && sameString(s, "17")) /* 17 is a synonym for mitochondria */
	    s = cloneString("M");
	if (i != 6)
	    cgiDecode(s, s, strlen(s));
	tf->fields[i] = cloneString(s);
	}
    tf->lineIx = lf->lineIx;
    slAddHead(&tfList, tf);
    }
lineFileClose(&lf);
slReverse(&tfList);
return tfList;
}
static void readSizesFile(char *fileName, struct hash *sizes)
/* Read tab-separated file into hash. */
{
struct lineFile *lf = lineFileOpen(fileName, TRUE);
char *row[2];
while (lineFileChopNextTab(lf, row, ArraySize(row)))
    addSize(row[0], lineFileNeedNum(lf, row, 1), sizes);
lineFileClose(&lf);
}
Ejemplo n.º 5
0
void subColumn(char *asciiColumn, char *inFile, char *subFile, char *outFile)
/* subColumn - Substitute one column in a tab-separated file.. */
{
struct hash *subHash = hashTwoColumnFile(subFile);
int column = atoi(asciiColumn);
if (column == 0)
    usage();
else
    column -= 1;
char *row[1024*4];
struct lineFile *lf = lineFileOpen(inFile, TRUE);
FILE *f = mustOpen(outFile, "w");
int rowCount;
while ((rowCount = lineFileChopNextTab(lf, row, ArraySize(row))) > 0)
    {
    if (rowCount == ArraySize(row))
        errAbort("Too many columns (%d) line %d of  %s.", rowCount, lf->lineIx, lf->fileName);
    if (column >= rowCount)
        errAbort("Not enough columns (%d) line %d of  %s.", rowCount, lf->lineIx, lf->fileName);
    int i;
    for (i=0; i<rowCount; ++i)
	{
	char *s = row[i];
	if (i == column)
	    {
	    if (isList)
	        {
		s = subCommaList(subHash, s);
		}
	    else
		{
		char *sub = hashFindVal(subHash, s);
		if (sub == NULL)
		    {
		    if (fMiss)
			{
		        fprintf(fMiss, "%s\n", s);
			++missCount;
			}
		    else
			errAbort("%s not in %s line %d of %s", s, subFile, lf->lineIx, lf->fileName);
		    }
		else
		    s = sub;
		}
	    }
	fputs(s, f);
	if (i == rowCount-1)
	    fputc('\n', f);
	else
	    fputc('\t', f);
	}
    }
carefulClose(&f);
}
Ejemplo n.º 6
0
boolean lineFileNextRowTab(struct lineFile *lf, char *words[], int wordCount)
/* Return next non-blank line that doesn't start with '#' chopped into words
 * at tabs. Returns FALSE at EOF.  Aborts on error. */
{
int wordsRead;
wordsRead = lineFileChopNextTab(lf, words, wordCount);
if (wordsRead == 0)
    return FALSE;
if (wordsRead < wordCount)
    lineFileExpectWords(lf, wordCount, wordsRead);
return TRUE;
}
Ejemplo n.º 7
0
static struct genePred *fileNext(struct genePredReader* gpr)
/* read the next record from a file */
{
char *row[GENEPREDX_NUM_COLS];
int numFields;

while ((numFields = lineFileChopNextTab(gpr->lf, row, GENEPREDX_NUM_COLS)) > 0)
    {
    lineFileExpectAtLeast(gpr->lf, GENEPRED_NUM_COLS, numFields);
    if ((gpr->chrom == NULL) || (sameString(row[1], gpr->chrom)))
        return genePredExtLoad(row, numFields);
    }
return NULL;
}
Ejemplo n.º 8
0
static struct psl *fileNext(struct pslReader* pr)
/* read the next record from a file */
{
char *row[PSLX_NUM_COLS];
int numCols;

while ((numCols = lineFileChopNextTab(pr->lf, row, PSLX_NUM_COLS)) > 0)
    {
    lineFileExpectWords(pr->lf, (pr->isPslx ? PSLX_NUM_COLS : PSL_NUM_COLS), numCols);
    if ((pr->chrom == NULL) || (sameString(row[13], pr->chrom)))
        {
        if (pr->isPslx)
            return pslxLoad(row);
        else
            return pslLoad(row);
        }
    }
return NULL;
}
Ejemplo n.º 9
0
void outputUniqueOnSharedKey(char *inTab, struct asObject *as, struct asColumn *keyCol,
    struct slPair *fieldList, char *outTab, char *outErr)
/* Scan through tab-separated file inTab and output fields in fieldList to
 * outTab. Make sure there is only one row for each value of sharedKey field. 
 * If there would be multiple different rows in output with sharedKey, 
 * complain about it in outErr. */
{
/* Open input and output. */
struct lineFile *lf = lineFileOpen(inTab, TRUE);
FILE *f = mustOpen(outTab, "w");
FILE *fErr = mustOpen(outErr, "w");

/* Set up array for input fields with more than we expect for better error reporting. */
int oldFieldCount = slCount(as->columnList);
int newFieldCount = slCount(fieldList);
int allocFields = oldFieldCount+10;
char *words[allocFields];

/* Set up array for output fields that says where to find them in input. */
int *oldIx = makeNewToOldArray(as, fieldList);

/* Figure out index of key field. */
int keyIx = slIxFromElement(as->columnList, keyCol);

/* Go through each line of input, outputting selected columns. */
struct hash *uniqHash = hashNew(18); 
struct hash *errHash = hashNew(0);
struct dyString *dy = dyStringNew(1024);
int fieldCount;
while ((fieldCount = lineFileChopNextTab(lf, words, allocFields)) > 0)
    {
    lineFileExpectWords(lf, oldFieldCount, fieldCount);

    /* Collect possible output into dy. */
    dyStringClear(dy);
    dyStringPrintf(dy, "%s", words[oldIx[0]]);
    int i;
    for (i=1; i<newFieldCount; ++i)
	dyStringPrintf(dy,  "\t%s", words[oldIx[i]]);
    dyStringPrintf(dy, "\n");

    /* Check that this line is either unique for this key, or the same as previous lines
     * for the key. */
    char *key = words[keyIx];
    char *oldVal = hashFindVal(uniqHash, key);
    if (oldVal != NULL)
        {
	if (!sameString(oldVal, dy->string))
	    {
	    /* Error reporting is a little complex.  We want to output all lines associated
	     * with key, including the first one, but we only want to do first line once. */
	    if (!hashLookup(errHash, key))
	        {
		hashAdd(errHash, key, NULL);
		fputs(oldVal, fErr);
		}
	    fputs(dy->string, fErr);
	    }
	}
    else
	{
	hashAdd(uniqHash, key, cloneString(dy->string));
        fputs(dy->string, f);
	}
    }

/* Report error summary */
if (errHash->elCount > 0)
    {
    warn("Warning: %d shared keys have multiple values in table 2. See %s.\n"
         "Only first row for each key put in %s" , errHash->elCount, outErr, outTab);
    if (!mergeOk)
        noWarnAbort();
    }

/* Clean up and go home. */
freez(&oldIx);
carefulClose(&fErr);
carefulClose(&f);
lineFileClose(&lf);
}
Ejemplo n.º 10
0
void readDbstsPrimers(struct lineFile *dsf)
/* Read in primer and organism info from dbSTS.sts */
{
  struct primer *p;
  struct sts *s;
  char *words[8], *name, *org;
  int dbStsId, newId;

  orgHash = newHash(20);

  while (lineFileChopNextTab(dsf, words, 8))
    {
      /* Check that the organism is human, or at least that a human record 
	 has not already been read in for this dbSTS id */
      org = cloneString(words[7]);
      if (!hashLookup(orgHash, words[0]) || (sameString(org, "H**o sapiens\0")))
	hashAdd(orgHash, words[0], org);
      /* If not human, then don't process any further */
      if (differentString(org, "H**o sapiens\0"))
	continue;

      /* See if this dbSTS id is currently in use by a STS marker */
      dbStsId = sqlUnsigned(words[0]);
      if (hashLookup(ucscIdHash, words[0]))
	s = hashMustFindVal(ucscIdHash, words[0]);
      else
	s = NULL;
      /* See if primers already recorded for this dbSTS id from STS Info file */
      if (hashLookup(primerHash, words[0]))
	{
	  p = hashMustFindVal(primerHash, words[0]);
	  /* If STS marker not mapped, update primer information */
	  if (s == NULL && !s->mapped)
	    {
	      freez(&(p->left));
	      p->left = cloneString(words[1]);
	      freez(&(p->right));
	      p->left = cloneString(words[2]);
	      freez(&(p->dist));
	      p->left = cloneString(words[3]);
	    }
	}
      /* If no record of this primer, create one */
      else
	{
	  AllocVar(p);
	  p->next = NULL;
	  p->dbStsId = dbStsId;
	  p->left = cloneString(words[1]);
	  p->right = cloneString(words[2]);
	  p->dist = cloneString(words[3]);
	  if (s != NULL)
	    p->ucscId = s->si->identNo;
	  else
	    p->ucscId = 0;
	  name = cloneString(words[0]);
	  hashAdd(primerHash, name, p);
	}
      /* If dbSTS id linked to a STS marker already */
      if (s != NULL)
	{
	  /* If linked to ucsc record and record not mapped or doesn't have primer information,
	     update primer info */
	  if (((!s->mapped) || (sameString(s->si->leftPrimer, "\0"))) && (s->si->dbSTSid == dbStsId))
	    updatePrimerInfo(p, s);
	}
      /* If not linked to a ucsc record and human, check if the name is already in use */
      else if ((sameString(org, "H**o sapiens\0")) && (hashLookup(nameHash, words[4])))
	    {
	      s = hashMustFindVal(nameHash, words[4]);

	      /* Update the marker record with the dbSTS id and primer info if none exists */
	      if ((s->si->dbSTSid == 0) || (s->si->dbSTSid >= MAX_STS_ID) || 
		  (sameString(s->si->leftPrimer, "\0")))
		updatePrimerInfo(p, s);
	      /* If the record is already linked to another dbSTS id, add this to other list */
	      else 
		{
		  newId = sqlUnsigned(words[0]);
		  addElementInt(newId, &s->si->otherDbSTS, &s->si->otherDbstsCount);
		}
	      hashAdd(ucscIdHash, words[0], s);
	    }
    }
}	/*	void readDbstsPrimers(struct lineFile *dsf)	*/
Ejemplo n.º 11
0
void outputBedsFromPsls(struct hash *pslHash,char *bedOutName, char *expRecordOutName, 
			char *affyFileName, char *expFileName)
/** For each set of entries in affyFile find matching psl and create a bed. */
{
struct bed *bed = NULL, *b=NULL;
struct psl *pslList = NULL, *psl = NULL;
struct hash *expHash = NULL;
int numExps = 0;
int expCount = 0;
int i =0;
char *probeSet = NULL;
char *row[4];
char key[128];
struct slName *expNames = NULL, *name = NULL;
FILE *bedOut = NULL;
FILE *expRecordOut = NULL;
char *toDiffFileName = optionVal("toDiffFile", NULL);
FILE *toDiffOut = NULL;
struct lineFile *lf = NULL;
fillInExpHash(expFileName, &expHash, &expNames, &expCount);
lf = lineFileOpen(affyFileName, TRUE);
bedOut = mustOpen(bedOutName, "w");
if(toDiffFileName != NULL)
    toDiffOut = mustOpen(toDiffFileName, "w");

/* Loop through either adding experiments to beds or if new
   probeset create bed from psl and start over. */
while(lineFileChopNextTab(lf, row, sizeof(row)))
    {
    /* Do we have to make a new bed? */
    if(probeSet == NULL || differentWord(probeSet, row[0]))
	{
	occassionalDot();
	numExps = 0;
	/* If we have probeset print out the current beds. */
	if(probeSet != NULL)
	    {
	    for(b = bed; b != NULL; b = b->next)
		{
		int avgCount = 0;
		for(i = 0; i < b->expCount; i++)
		    if(b->expScores[i] != -10000)
			avgCount++;
		if(avgCount != 0 && b->score > 0)
		    b->score = log(b->score / avgCount) * 100;
		else
		    b->score = 0;
		bedTabOutN(b, 15, bedOut);
		if(toDiffOut != NULL)
		    outputToDiffRecord(b, expNames, toDiffOut);
		}
	    }
	bedFreeList(&bed);
	/* Lookup key in pslHash to find list of psl. */
	safef(key, sizeof(key), "%s", row[0]);
	pslList = hashFindVal(pslHash, key);
	/* Can have multiple psls. */
	for(psl = pslList; psl != NULL; psl = psl->next)
	    {
	    b = bedFromPsl(psl);
	    AllocArray(b->expIds, expCount );
	    AllocArray(b->expScores, expCount);
	    b->expCount = expCount;
	    initBedScores(b, expCount);
	    slAddHead(&bed, b);
	    }
	}
    if(bed != NULL)
	{
	/* Allocate larger arrays if necessary. */
	if(numExps > expCount)
	    {
	    errAbort("Supposed to be %d experiments but probeset %s has at least %d",
		     expCount, bed->name, numExps);
	    }
	for(b = bed; b != NULL; b = b->next)
	    {
	    int exp = hashIntVal(expHash, row[1]);
	    if(differentWord(row[3], "NaN"))
	       b->expScores[exp] = atof(row[3]);
	    if(differentWord(row[2], "NaN"))
	       b->score += atof(row[2]);
	    }
	numExps++;
	}
    freez(&probeSet);
    probeSet = cloneString(row[0]);
    }
expRecordOut = mustOpen(expRecordOutName, "w");
i = 0;
for(name = expNames; name != NULL; name = name->next)
    {
    subChar(name->name, ',', '_');	    
    subChar(name->name, ' ', '_');
    fprintf(expRecordOut, "%d\t%s\tuclaExp\tuclaExp\tuclaExp\tuclaExp\t1\t%s,\n", i++, name->name, name->name);
    }
hashFree(&expHash);
slFreeList(&expNames);
carefulClose(&expRecordOut);
carefulClose(&bedOut);
lineFileClose(&lf);
}