void convertPslFile(struct sqlConnection *conn, char *pslFile, FILE *genePredFh) /* convert mrnas in a psl file to genePred objects */ { struct lineFile *lf = pslFileOpen(pslFile); char *row[PSL_NUM_COLS]; while (lineFileNextRowTab(lf, row, PSL_NUM_COLS)) convertPslFileRow(conn, row, genePredFh); lineFileClose(&lf); }
static void parseIndex(struct gbRelease* release, struct gbIgnore* ignore, char *path) /* read and parse ignore file */ { char* row[IGIDX_NUM_COLS]; struct lineFile* lf = lineFileOpen(path, TRUE); while (lineFileNextRowTab(lf, row, ArraySize(row))) parseRow(release, ignore, lf, row); lineFileClose(&lf); }
static struct hash *loadSizes(char *szFile) /* load sizes into a hash */ { struct hash *sizes = hashNew(0); struct lineFile *lf = lineFileOpen(szFile, TRUE); char *row[2]; while (lineFileNextRowTab(lf, row, 2)) hashAddInt(sizes, row[0], sqlSigned(row[1])); return sizes; }
void hgCeOrfToGene(char *database, char *geneNames, char *geneTable, char *table) /* hgCeOrfToGene - Make orfToGene table for C.elegans from * GENE_DUMPS/gene_names.txt. */ { struct lineFile *lf = lineFileOpen(geneNames, TRUE); struct sqlConnection *conn; struct sqlResult *sr; char query[256]; char **row; char *tempDir = "."; FILE *f = hgCreateTabFile(tempDir, table); char *words[4]; struct hash *orfHash = newHash(17); /* Make hash to look up gene names. */ while (lineFileNextRowTab(lf, words, ArraySize(words))) { char *gene = words[0]; char *orfs = words[3]; char *type = words[2]; char *orf[128]; int i, orfCount; if (sameString(type, "Gene")) { orfCount = chopString(orfs, ",", orf, ArraySize(orf)); if (orfCount >= ArraySize(orf)) errAbort("Too many ORFs line %d of %s", lf->lineIx, lf->fileName); for (i=0; i<orfCount; ++i) hashAdd(orfHash, orf[i], cloneString(gene)); } } lineFileClose(&lf); /* For each orf in gene table write out gene name if possible, * otherwise orf name. */ conn = sqlConnect(database); safef(query, sizeof(query), "select name from %s", geneTable); sr = sqlGetResult(conn,query); while ((row = sqlNextRow(sr)) != NULL) { char *orf = row[0]; char *gene = hashFindVal(orfHash, orf); if (gene == NULL) gene = orf; fprintf(f, "%s\t%s\n", orf, gene); } sqlFreeResult(&sr); createTable(conn, table, unique); hgLoadTabFile(conn, tempDir, table, &f); }
/* read qNames and sizes into a hash of querySizeCnt objet */ static struct hash* querySizeCntLoad(char *querySizeFile) { struct hash* querySizesTbl = hashNew(queryHashPowTwo); struct lineFile *lf = lineFileOpen(querySizeFile, TRUE); char *row[2]; while (lineFileNextRowTab(lf, row, ArraySize(row))) querySizeCntGet(querySizesTbl, row[0], sqlUnsigned(row[1])); lineFileClose(&lf); return querySizesTbl; }
void loadCdsFile(char *cdsFile) /* read a CDS file into the global hash */ { struct lineFile *lf = lineFileOpen(cdsFile, TRUE); char *row[2]; gCdsTable = hashNew(20); while (lineFileNextRowTab(lf, row, 2)) hashAdd(gCdsTable, row[0], lmCloneString(gCdsTable->lm, row[1])); lineFileClose(&lf); }
struct itemAttr* loadAttrFile(char* itemAttrFile) /* load itemAttr records into memory and sort */ { struct itemAttr *itemAttrs = NULL; struct lineFile *lf = lineFileOpen(itemAttrFile, TRUE); char *row[ITEMATTR_NUM_COLS]; while (lineFileNextRowTab(lf, row, ITEMATTR_NUM_COLS)) slSafeAddHead(&itemAttrs, itemAttrLoad(row)); lineFileClose(&lf); slSort(&itemAttrs, itemAttrCmp); return itemAttrs; }
void processFrameFile(FILE *sortFh, char *framesFile) /* read records from one frame file, adding bin and write to pipe to sort */ { struct lineFile *inLf = lineFileOpen(framesFile, TRUE); struct mafFrames mf; char *row[MAFFRAMES_NUM_COLS]; while (lineFileNextRowTab(inLf, row, MAFFRAMES_NUM_COLS)) { mafFramesStaticLoad(row, &mf); fprintf(sortFh, "%d\t", binFromRange(mf.chromStart, mf.chromEnd)); mafFramesTabOut(&mf, sortFh); } lineFileClose(&inLf); }
void loadCoverQSizes(char* coverQSizeFile) /* load coverage query sizes */ { struct lineFile *lf = lineFileOpen(coverQSizeFile, TRUE); char *row[2]; coverQSizes = hashNew(0); while (lineFileNextRowTab(lf, row, ArraySize(row))) { int qSize = sqlSigned(row[1]); hashAdd(coverQSizes, row[0], intToPt(qSize)); } lineFileClose(&lf); }
struct hash *polyASizeLoadHash(char *fileName) /* load a tab-separated file of polyASize objects into a hash table. Objects * are stored in the local mem of the hash, and thus freed when the hash is * freed. */ { struct lineFile *lf = lineFileOpen(fileName, TRUE); char *row[POLYASIZE_NUM_COLS]; struct hash *pasHash = hashNew(22); while (lineFileNextRowTab(lf, row, ArraySize(row))) loadPolyASizeRec(pasHash, row); lineFileClose(&lf); return pasHash; }
struct mahoney *vgLoadMahoneyList(char *fileName) /* Load data from tab-separated file (not whitespace separated) */ { struct mahoney *list = NULL, *el; struct lineFile *lf = lineFileOpen(fileName, TRUE); char *row[MAHONEY_NUM_COLS]; while (lineFileNextRowTab(lf, row, ArraySize(row))) { el = mahoneyLoad(row); fixPrimers(el); slAddHead(&list, el); } lineFileClose(&lf); slReverse(&list); return list; }
void convertCelFile(struct lqRecord ***lqMatrix, struct hash *nmerHash, char *celFile, char *outputFormat) { struct lineFile *lf = lineFileOpen(celFile, TRUE); char *line=NULL; int lineSize, x, y, i,j; char *words[5]; struct lqRecord *pm = NULL, *mm=NULL; char *outFile = needMem(sizeof(char)*(strlen(celFile)+5)); FILE *out = NULL; int numCols=0, numRows=0; struct cel ***celMatrix = NULL; safef(outFile, strlen(celFile)+5, "%s.tab", celFile); out = mustOpen(outFile, "w"); parseCelRowsCols(lf, &numRows, &numCols); if(numCols == 0 || numRows == 0) errAbort("Couldn't find a 'Cols=' or a 'Rows=' in %s, is this a cel file?\n", celFile); /* Allocate the matrix. */ celMatrix = needMem(sizeof(struct lqRecord *)*numRows); for(i=0; i<numRows; i++) celMatrix[i] = needMem(sizeof(struct lqRecord *)*numCols); while(lineFileNextRowTab(lf, words, 5)) { struct cel *cel = parseCelRow(words); celMatrix[cel->x][cel->y] = cel; if(cel->x + 1 == numRows && cel->y +1 == numCols) break; } outputPairsFile(out, numRows, numCols, lqMatrix, celMatrix); /* Cleanup. */ for(i=0;i<numRows;i++) for(j=0; j<numCols; j++) if(celMatrix[i][j] != NULL) freez(&celMatrix[i][j]); for(i=0; i<numRows; i++) freez(&celMatrix[i]); freez(&celMatrix); lineFileClose(&lf); freez(&outFile); carefulClose(&out); }
struct stanMad *stanMadLoadAll(char *fileName) /* Load all stanMad from a tab-separated file. * Dispose of this with stanMadFreeList(). */ { struct stanMad *list = NULL, *el; struct lineFile *lf = lineFileOpen(fileName, TRUE); char *row[40]; while (lineFileNextRowTab(lf, row, ArraySize(row))) { if(strstr(row[0], "EXP")) continue; el = stanMadLoad(row); slAddHead(&list, el); } lineFileClose(&lf); slReverse(&list); return list; }
struct hash *hashNmerFile(char *file) { struct lineFile *lf = lineFileOpen(file, TRUE); struct hash *nmerHash = newHash(15); struct nmerAlign *nmerList = NULL, *nmer; char key[256]; char *words[6]; while(lineFileNextRowTab(lf, words, 6)) { nmer = parseNmerAlignRow(words); snprintf(key, sizeof(key), "%s-%s", nmer->seq, nmer->name); nmerList = hashFindVal(nmerHash, key); if(nmerList == NULL) hashAddUnique(nmerHash, key, nmer); else slAddTail(&nmerList, nmer); } lineFileClose(&lf); return nmerHash; }
struct mgcLibraryTbl *mgcLibraryTblLoad(char *fileName) /* load a file of mgcLibrary objects, building a hash by library id */ { struct mgcLibraryTbl *mlt; char *row[MGCLIBRARY_NUM_COLS]; char key[64]; struct lineFile *lf; AllocVar(mlt); mlt->idHash = hashNew(12); /* 4096 */ lf = gzLineFileOpen(fileName); while (lineFileNextRowTab(lf, row, MGCLIBRARY_NUM_COLS)) { struct mgcLibrary *mgcLibrary = mgcLibraryLoad(row); safef(key, sizeof(key), "%d", mgcLibrary->id_lib); hashAdd(mlt->idHash, key, mgcLibrary); } gzLineFileClose(&lf); return mlt; }
static void processOrgCatOi(struct gbSelect* select, unsigned orgCat) /* process files in an update an organism category. OIs are only available * for native, however this follow the structure of the PSL code */ { char inOi[PATH_LEN], *row[EST_ORIENT_INFO_NUM_COLS]; struct lineFile* inOiLf; unsigned orgCatsHold = select->orgCats; select->orgCats = orgCat; gbAlignedGetPath(select, "oi.gz", NULL, inOi); inOiLf = gzLineFileOpen(inOi); while (lineFileNextRowTab(inOiLf, row, EST_ORIENT_INFO_NUM_COLS)) { struct estOrientInfo* oi = estOrientInfoLoad(row); processOi(select, oi); estOrientInfoFree(&oi); } gzLineFileClose(&inOiLf); select->orgCats = orgCatsHold; }
void migrateOrientInfos(struct migrateAligns* migrate, FILE* outOiFh) /* Migrate estOrientInfo records */ { char inOi[PATH_LEN]; struct lineFile* inOiLf; char *row[EST_ORIENT_INFO_NUM_COLS]; gbAlignedGetPath(migrate->prevSelect, "oi.gz", NULL, inOi); if (fileExists(inOi)) { gbVerbEnter(2, "migrating from %s", inOi); inOiLf = gzLineFileOpen(inOi); while (lineFileNextRowTab(inOiLf, row, ArraySize(row))) { struct estOrientInfo *oi = estOrientInfoLoad(row); migrateOrientInfo(migrate, oi, inOi, outOiFh); estOrientInfoFree(&oi); } gzLineFileClose(&inOiLf); gbVerbLeave(2, "migrating from %s", inOi); } }
void copyOrientInfos(struct gbSelect* select, FILE* outOiFh, struct recCounts* recCounts) /* Copy an OI file from the work directory, if it exists, count alignments * for index. */ { char inOi[PATH_LEN]; struct lineFile* inOiLf; char *row[EST_ORIENT_INFO_NUM_COLS]; gbAlignedGetPath(select, "oi", gWorkDir, inOi); if (fileExists(inOi)) { gbVerbEnter(2, "installing from %s", inOi); inOiLf = gzLineFileOpen(inOi); while (lineFileNextRowTab(inOiLf, row, ArraySize(row))) { struct estOrientInfo *oi = estOrientInfoLoad(row); copyOrientInfo(select, oi, inOi, outOiFh, recCounts); estOrientInfoFree(&oi); } gzLineFileClose(&inOiLf); gbVerbLeave(2, "installing from %s", inOi); } }
void hgLoadNetDist(char *inTab, char *db, char *outTable) { char *tempDir = "."; FILE *f = hgCreateTabFile(tempDir, outTable); struct sqlConnection *hConn = sqlConnect(db); FILE *missingFile=NULL; int missingCount=0; struct lineFile *lf=NULL; char *row[3]; int rowCount=3; if (sqlRemap) { fetchRemapInfo(db); missingHash = newHash(16); missingFile = mustOpen("missing.tab","w"); } /* read edges from file */ lf=lineFileOpen(inTab, TRUE); /* print final values, remapping if needed */ while (lineFileNextRowTab(lf, row, rowCount)) { char *geneI = row[0]; char *geneJ = row[1]; char *dij = row[2]; char *gi=NULL, *gj=NULL; if (sqlRemap) { /* it is possible for each id to have multiple remap values in hash */ struct hashEl *hi=NULL, *hj=NULL, *hjSave=NULL; hi = hashLookup(aliasHash,geneI); hj = hashLookup(aliasHash,geneJ); missingCount += handleMissing(hi, geneI, missingHash, missingFile); missingCount += handleMissing(hj, geneJ, missingHash, missingFile); hjSave = hj; /* do all combinations of i and j */ for(;hi;hi=hashLookupNext(hi)) { gi = (char *)hi->val; for(;hj;hj=hashLookupNext(hj)) { gj = (char *)hj->val; fprintf(f,"%s\t%s\t%s\n",gi,gj,dij); } hj = hjSave; /* reset it */ } } else { gi=geneI; gj=geneJ; fprintf(f,"%s\t%s\t%s\n",gi,gj,dij); } } lineFileClose(&lf); carefulClose(&f); if (sqlRemap) { carefulClose(&missingFile); if (missingCount == 0) unlink("missing.tab"); else printf("hgLoadNetDist %d id-remapping misses, see missing.tab\n", missingCount); } createTable(hConn, outTable); hgLoadTabFile(hConn, tempDir, outTable, &f); hgRemoveTabFile(tempDir, outTable); }
struct hash *makeProbeBed(char *inGff, char *outBed) /* Convert probe location GFF file to BED. */ { struct lineFile *lf = lineFileOpen(inGff, TRUE); char *row[9]; struct hash *hash = newHash(16); FILE *f = mustOpen(outBed, "w"); while (lineFileNextRowTab(lf, row, ArraySize(row))) { int chromIx = romanToArabicChrom(row[0], lf); int start = lineFileNeedNum(lf, row, 3) - 1; int end = lineFileNeedNum(lf, row, 4); char *s = row[8]; char *probe, *orf, *note; char *boundAt = "Bound at "; struct tfBinding *tfbList = NULL, *tfb; if (!startsWith("Probe ", s)) errAbort("Expecting 9th column to start with 'Probe ' line %d of %s", lf->lineIx, lf->fileName); probe = nextWord(&s); orf = nextWord(&s); chopOff(orf, ';'); note = nextWord(&s); if (!sameWord("Note", note)) errAbort("Expecting 'note' in 9th column line %d of %s", lf->lineIx, lf->fileName); s = skipLeadingSpaces(s); if (!parseQuotedString(s, s, NULL)) errAbort("Expecting quoted string in 9th column line %d of %s", lf->lineIx, lf->fileName); if (startsWith("Bad Probe", s)) continue; else if (startsWith("Not bound", s)) { /* Ok, we do nothing. */ } else if (startsWith(boundAt, s)) { while (s != NULL && startsWith(boundAt, s)) { char *word, *by; double binding; s += strlen(boundAt); word = nextWord(&s); binding = atof(word); by = nextWord(&s); if (!sameString("by:", by)) errAbort("Expecting by: line %d of %s", lf->lineIx, lf->fileName); while ((word = nextWord(&s)) != NULL) { char lastChar = 0, *e; e = word + strlen(word) - 1; lastChar = *e; if (lastChar == ';' || lastChar == ',') *e = 0; AllocVar(tfb); tfb->binding = binding; tfb->tf = cloneString(word); slAddHead(&tfbList, tfb); if (lastChar == ';') break; } s = skipLeadingSpaces(s); } slReverse(&tfbList); } else { errAbort("Expecting %s in note line %d of %s", boundAt, lf->lineIx, lf->fileName); } fprintf(f, "chr%d\t%d\t%d\t", chromIx+1, start, end); fprintf(f, "%s\t%d\t", orf, slCount(tfbList)); for (tfb = tfbList; tfb != NULL; tfb = tfb->next) fprintf(f, "%s,", tfb->tf); fprintf(f, "\t"); for (tfb = tfbList; tfb != NULL; tfb = tfb->next) fprintf(f, "%4.3f,", tfb->binding); fprintf(f, "\n"); hashAdd(hash, orf, NULL); } lineFileClose(&lf); carefulClose(&f); return hash; }
void checkGff(char *gff, struct hash *chromHash) /* Check that CDS portions of GFF file have start * codons where they are supposed to. */ { struct lineFile *lf = lineFileOpen(gff, TRUE); char *row[10]; int cdsCount = 0, goodCount = 0, badCount = 0; verbose(2,"# scanning %d fields of gff file:\n#\t'%s'\n", gffRowCount, gff); while (lineFileNextRowTab(lf, row, gffRowCount)) { if (startsWith("CDS", row[2])) { int start = lineFileNeedNum(lf, row, 3) - 1; int end = lineFileNeedNum(lf, row, 4); int size = end-start; char strand = row[6][0]; char chrom[64]; struct dnaSeq *seq; char *startCodon; if (size < 1) { errAbort("start not before end line %d of %s", lf->lineIx, lf->fileName); } if (strand != '+' && strand != '-') { errAbort("Expecting strand got %s line %d of %s", row[6], lf->lineIx, lf->fileName); } if (startsWith("2-micron", row[0])) // need to stop processing here break; if (!startsWith("chr", row[0])) continue; if (startsWith("chrMito", row[0])) // change name to UCSC chrM safef(chrom, sizeof(chrom), "%s", "chrM"); else safef(chrom, sizeof(chrom), "%s", row[0]); if ((seq = hashFindVal(chromHash, chrom)) == NULL) errAbort("Unknown chromosome %s line %d of %s", row[0], lf->lineIx, lf->fileName); if (end > seq->size) { printf("end (%d) greater than %s size (%d) line %d of %s", end, chrom, seq->size, lf->lineIx, lf->fileName); ++badCount; continue; } startCodon = seq->dna + start; if (strand == '-') reverseComplement(startCodon, size); if (!startsWith("ATG", startCodon)) { char *s = startCodon; verbose(3,"# not ATG: %s:%d-%d\t%c%c%c\t%c\n", chrom, start, end, s[0], s[1], s[2], strand); ++badCount; } else ++goodCount; if (verboseLevel()>=4) { char *s = startCodon; if (gffRowCount > 9) printf("%s\t%d\t%c%c%c\t%c\t%s\n", chrom, start, s[0], s[1], s[2], strand, row[9]); else printf("%s\t%d\t%c%c%c\t%c\t%s\n", chrom, start, s[0], s[1], s[2], strand, row[8]); } if (strand == '-') reverseComplement(startCodon, size); ++cdsCount; } } lineFileClose(&lf); printf("# good %d, bad %d, total %d\n", goodCount, badCount, cdsCount); }
void initGapAid(char *gapFileName) /* Initialize gap aid structure for faster gap * computations. */ { int i, tableSize, startLong = -1; char *sizeDesc[2]; char *words[128]; if (gapFileName != NULL) { struct lineFile *lf = lineFileOpen(gapFileName, TRUE); int count; lineFileNextRowTab(lf, sizeDesc, 2); tableSize = atoi(sizeDesc[1]); AllocArray(gapInitPos,tableSize); AllocArray(gapInitQGap,tableSize); AllocArray(gapInitTGap,tableSize); AllocArray(gapInitBothGap,tableSize); while (count = lineFileChopNext(lf, words, tableSize+1)) { if (sameString(words[0],"smallSize")) { aid.smallSize = atoi(words[1]); } if (sameString(words[0],"position")) { for (i=0 ; i<count-1 ; i++) gapInitPos[i] = atoi(words[i+1]); } if (sameString(words[0],"qGap")) { for (i=0 ; i<count-1 ; i++) gapInitQGap[i] = atoi(words[i+1]); } if (sameString(words[0],"tGap")) { for (i=0 ; i<count-1 ; i++) gapInitTGap[i] = atoi(words[i+1]); } if (sameString(words[0],"bothGap")) { for (i=0 ; i<count-1 ; i++) gapInitBothGap[i] = atoi(words[i+1]); } } if (aid.smallSize == 0) errAbort("missing smallSize parameter in %s\n",gapFileName); lineFileClose(&lf); } else { /* if no gap file, then setup default values */ /* Set up to handle small values */ aid.smallSize = 111; tableSize = 11; AllocArray(gapInitPos,tableSize); AllocArray(gapInitQGap,tableSize); AllocArray(gapInitTGap,tableSize); AllocArray(gapInitBothGap,tableSize); for (i = 0 ; i < tableSize ; i++) { gapInitPos[i] = gapInitPosDefault[i]; gapInitTGap[i] = gapInitTGapDefault[i]; gapInitQGap[i] = gapInitQGapDefault[i]; gapInitBothGap[i] = gapInitBothGapDefault[i]; } } AllocArray(aid.qSmall, aid.smallSize); AllocArray(aid.tSmall, aid.smallSize); AllocArray(aid.bSmall, aid.smallSize); for (i=1; i<aid.smallSize; ++i) { aid.qSmall[i] = interpolate(i, gapInitPos, gapInitQGap, tableSize); aid.tSmall[i] = interpolate(i, gapInitPos, gapInitTGap, tableSize); aid.bSmall[i] = interpolate(i, gapInitPos, gapInitBothGap, tableSize); } /* Set up to handle intermediate values. */ for (i=0; i<tableSize; ++i) { if (aid.smallSize == gapInitPos[i]) { startLong = i; break; } } if (startLong < 0) errAbort("No position %d in initGapAid()\n", aid.smallSize); aid.longCount = tableSize - startLong; aid.qPosCount = tableSize - startLong; aid.tPosCount = tableSize - startLong; aid.bPosCount = tableSize - startLong; aid.longPos = cloneMem(gapInitPos + startLong, aid.longCount * sizeof(int)); aid.qLong = cloneMem(gapInitQGap + startLong, aid.qPosCount * sizeof(double)); aid.tLong = cloneMem(gapInitTGap + startLong, aid.tPosCount * sizeof(double)); aid.bLong = cloneMem(gapInitBothGap + startLong, aid.bPosCount * sizeof(double)); /* Set up to handle huge values. */ aid.qLastPos = aid.longPos[aid.qPosCount-1]; aid.tLastPos = aid.longPos[aid.tPosCount-1]; aid.bLastPos = aid.longPos[aid.bPosCount-1]; aid.qLastPosVal = aid.qLong[aid.qPosCount-1]; aid.tLastPosVal = aid.tLong[aid.tPosCount-1]; aid.bLastPosVal = aid.bLong[aid.bPosCount-1]; aid.qLastSlope = calcSlope(aid.qLastPosVal, aid.qLong[aid.qPosCount-2], aid.qLastPos, aid.longPos[aid.qPosCount-2]); aid.tLastSlope = calcSlope(aid.tLastPosVal, aid.tLong[aid.tPosCount-2], aid.tLastPos, aid.longPos[aid.tPosCount-2]); aid.bLastSlope = calcSlope(aid.bLastPosVal, aid.bLong[aid.bPosCount-2], aid.bLastPos, aid.longPos[aid.bPosCount-2]); // uglyf("qLastPos %d, qlastPosVal %f, qLastSlope %f\n", aid.qLastPos, aid.qLastPosVal, aid.qLastSlope); // uglyf("tLastPos %d, tlastPosVal %f, tLastSlope %f\n", aid.tLastPos, aid.tLastPosVal, aid.tLastSlope); // uglyf("bLastPos %d, blastPosVal %f, bLastSlope %f\n", aid.bLastPos, aid.bLastPosVal, aid.bLastSlope); }
void bioImageLoad(char *setRaFile, char *itemTabFile) /* bioImageLoad - Load data into bioImage database. */ { struct hash *raHash = raReadSingle(setRaFile); struct hash *rowHash; struct lineFile *lf = lineFileOpen(itemTabFile, TRUE); char *line, *words[256]; struct sqlConnection *conn = sqlConnect(database); int rowSize; int submissionSetId; struct hash *fullDirHash = newHash(0); struct hash *screenDirHash = newHash(0); struct hash *thumbDirHash = newHash(0); struct hash *treatmentHash = newHash(0); struct hash *bodyPartHash = newHash(0); struct hash *sliceTypeHash = newHash(0); struct hash *imageTypeHash = newHash(0); struct hash *sectionSetHash = newHash(0); struct dyString *dy = dyStringNew(0); /* Read first line of tab file, and from it get all the field names. */ if (!lineFileNext(lf, &line, NULL)) errAbort("%s appears to be empty", lf->fileName); if (line[0] != '#') errAbort("First line of %s needs to start with #, and then contain field names", lf->fileName); rowHash = hashRowOffsets(line+1); rowSize = rowHash->elCount; if (rowSize >= ArraySize(words)) errAbort("Too many fields in %s", lf->fileName); /* Check that have all required fields */ { char *fieldName; int i; for (i=0; i<ArraySize(requiredSetFields); ++i) { fieldName = requiredSetFields[i]; if (!hashLookup(raHash, fieldName)) errAbort("Field %s is not in %s", fieldName, setRaFile); } for (i=0; i<ArraySize(requiredItemFields); ++i) { fieldName = requiredItemFields[i]; if (!hashLookup(rowHash, fieldName)) errAbort("Field %s is not in %s", fieldName, itemTabFile); } for (i=0; i<ArraySize(requiredFields); ++i) { fieldName = requiredFields[i]; if (!hashLookup(rowHash, fieldName) && !hashLookup(raHash, fieldName)) errAbort("Field %s is not in %s or %s", fieldName, setRaFile, itemTabFile); } } /* Create/find submission record. */ submissionSetId = saveSubmissionSet(conn, raHash); /* Process rest of tab file. */ while (lineFileNextRowTab(lf, words, rowSize)) { int fullDir = cachedId(conn, "location", "name", fullDirHash, "fullDir", raHash, rowHash, words); int screenDir = cachedId(conn, "location", "name", screenDirHash, "screenDir", raHash, rowHash, words); int thumbDir = cachedId(conn, "location", "name", thumbDirHash, "thumbDir", raHash, rowHash, words); int bodyPart = cachedId(conn, "bodyPart", "name", bodyPartHash, "bodyPart", raHash, rowHash, words); int sliceType = cachedId(conn, "sliceType", "name", sliceTypeHash, "sliceType", raHash, rowHash, words); int imageType = cachedId(conn, "imageType", "name", imageTypeHash, "imageType", raHash, rowHash, words); int treatment = cachedId(conn, "treatment", "conditions", treatmentHash, "treatment", raHash, rowHash, words); char *fileName = getVal("fileName", raHash, rowHash, words, NULL); char *submitId = getVal("submitId", raHash, rowHash, words, NULL); char *taxon = getVal("taxon", raHash, rowHash, words, NULL); char *isEmbryo = getVal("isEmbryo", raHash, rowHash, words, NULL); char *age = getVal("age", raHash, rowHash, words, NULL); char *sectionSet = getVal("sectionSet", raHash, rowHash, words, ""); char *sectionIx = getVal("sectionIx", raHash, rowHash, words, "0"); char *gene = getVal("gene", raHash, rowHash, words, ""); char *locusLink = getVal("locusLink", raHash, rowHash, words, ""); char *refSeq = getVal("refSeq", raHash, rowHash, words, ""); char *genbank = getVal("genbank", raHash, rowHash, words, ""); char *priority = getVal("priority", raHash, rowHash, words, "200"); int sectionId = 0; int oldId; // char *xzy = getVal("xzy", raHash, rowHash, words, xzy); if (sectionSet[0] != 0 && !sameString(sectionSet, "0")) { struct hashEl *hel = hashLookup(sectionSetHash, sectionSet); if (hel != NULL) sectionId = ptToInt(hel->val); else { sqlUpdate(conn, "insert into sectionSet values(default)"); sectionId = sqlLastAutoId(conn); hashAdd(sectionSetHash, sectionSet, intToPt(sectionId)); } } dyStringClear(dy); dyStringAppend(dy, "select id from image "); dyStringPrintf(dy, "where fileName = '%s' ", fileName); dyStringPrintf(dy, "and fullLocation = %d", fullDir); oldId = sqlQuickNum(conn, dy->string); if (oldId != 0) { if (replace) { dyStringClear(dy); dyStringPrintf(dy, "delete from image where id = %d", oldId); sqlUpdate(conn, dy->string); } else errAbort("%s is already in database line %d of %s", fileName, lf->lineIx, lf->fileName); } dyStringClear(dy); dyStringAppend(dy, "insert into image set\n"); dyStringPrintf(dy, " id = default,\n"); dyStringPrintf(dy, " fileName = '%s',\n", fileName); dyStringPrintf(dy, " fullLocation = %d,\n", fullDir); dyStringPrintf(dy, " screenLocation = %d,\n", screenDir); dyStringPrintf(dy, " thumbLocation = %d,\n", thumbDir); dyStringPrintf(dy, " submissionSet = %d,\n", submissionSetId); dyStringPrintf(dy, " sectionSet = %d,\n", sectionId); dyStringPrintf(dy, " sectionIx = %s,\n", sectionIx); dyStringPrintf(dy, " submitId = '%s',\n", submitId); dyStringPrintf(dy, " gene = '%s',\n", gene); dyStringPrintf(dy, " locusLink = '%s',\n", locusLink); dyStringPrintf(dy, " refSeq = '%s',\n", refSeq); dyStringPrintf(dy, " genbank = '%s',\n", genbank); dyStringPrintf(dy, " priority = %s,\n", priority); dyStringPrintf(dy, " taxon = %s,\n", taxon); dyStringPrintf(dy, " isEmbryo = %s,\n", isEmbryo); dyStringPrintf(dy, " age = %s,\n", age); dyStringPrintf(dy, " bodyPart = %d,\n", bodyPart); dyStringPrintf(dy, " sliceType = %d,\n", sliceType); dyStringPrintf(dy, " imageType = %d,\n", imageType); dyStringPrintf(dy, " treatment = %d\n", treatment); sqlUpdate(conn, dy->string); } }