void checkAllBeds(struct bed **bedList, int expCount) /** check to make sure that all the beds have the same number of experiments associated with them */ { struct bed *bed = NULL; for(bed = *bedList; bed != NULL; ) { if(bed->expCount != expCount) { struct bed *tmp = NULL; if(bed->expCount != 0) { warn("Bed %s at %d has only %d exps, mark is %d", bed->name, slIxFromElement(*bedList, bed), bed->expCount, expCount); missingExpsCount++; } else noExpCount++; tmp = bed->next; slRemoveEl(bedList, bed); bed = tmp; } else bed = bed->next; } }
struct bbExIndexMaker *bbExIndexMakerNew(struct slName *extraIndexList, struct asObject *as) /* Return an index maker corresponding to extraIndexList. Checks that all fields * mentioned are in autoSql definition, and for now that they are all text fields. */ { /* Fill in scalar fields and return quickly if no extra indexes. */ struct bbExIndexMaker *eim; AllocVar(eim); eim->indexCount = slCount(extraIndexList); if (eim->indexCount == 0) return eim; // Not much to do in this case /* Allocate arrays according field count. */ AllocArray(eim->indexFields, eim->indexCount); AllocArray(eim->maxFieldSize, eim->indexCount); AllocArray(eim->chunkArrayArray, eim->indexCount); AllocArray(eim->fileOffsets, eim->indexCount); /* Loop through each field checking that it is indeed something we can index * and if so saving information about it */ int indexIx = 0; struct slName *name; for (name = extraIndexList; name != NULL; name = name->next) { struct asColumn *col = asColumnFind(as, name->name); if (col == NULL) errAbort("extraIndex field %s not a standard bed field or found in 'as' file.", name->name); if (!sameString(col->lowType->name, "string")) errAbort("Sorry for now can only index string fields."); eim->indexFields[indexIx] = slIxFromElement(as->columnList, col); ++indexIx; } return eim; }
struct bptFile *bigBedOpenExtraIndex(struct bbiFile *bbi, char *fieldName, int *retFieldIx) /* Return index associated with fieldName. Aborts if no such index. Optionally return * index in a row of this field. */ { struct udcFile *udc = bbi->udc; boolean isSwapped = bbi->isSwapped; struct asObject *as = bigBedAsOrDefault(bbi); struct asColumn *col = asColumnFind(as, fieldName); if (col == NULL) errAbort("No field %s in %s", fieldName, bbi->fileName); int colIx = slIxFromElement(as->columnList, col); if (retFieldIx != NULL) *retFieldIx = colIx; asObjectFree(&as); /* See if we have any extra indexes, and if so seek to there. */ bits64 offset = bbi->extraIndexListOffset; if (offset == 0) errAbort("%s has no indexes", bbi->fileName); udcSeek(udc, offset); /* Go through each extra index and see if it's a match */ int i; for (i=0; i<bbi->extraIndexCount; ++i) { bits16 type = udcReadBits16(udc, isSwapped); bits16 fieldCount = udcReadBits16(udc, isSwapped); bits64 fileOffset = udcReadBits64(udc, isSwapped); udcSeekCur(udc, 4); // skip over reserved bits if (type != 0) { warn("Don't understand type %d", type); internalErr(); } if (fieldCount == 1) { bits16 fieldId = udcReadBits16(udc, isSwapped); udcSeekCur(udc, 2); // skip over reserved bits if (fieldId == colIx) { udcSeek(udc, fileOffset); struct bptFile *bpt = bptFileAttach(bbi->fileName, udc); return bpt; } } else { warn("Not yet understanding indexes on multiple fields at once."); internalErr(); } } errAbort("%s is not indexed in %s", fieldName, bbi->fileName); return NULL; }
int altGraphXItemHeight(struct track *tg, void *item) /* Return how high an item is. If we're using altGraphXDrawPackTrack() * we have to look up how many rows an item takes in the associated * hash, otherwise it is just the heightPer. */ { if(tg->limitedVis == tvDense || tg->customPt == NULL) return tg->lineHeight; else if(tg->limitedVis == tvFull) { char key[128]; safef(key, sizeof(key), "%d", slIxFromElement(tg->items, item)); return (hashIntVal((struct hash*)tg->customPt, key)) * tg->lineHeight; } else return tg->heightPer; }
int *makeNewToOldArray(struct asObject *as, struct slPair *fieldList) /* Return an array where we can lookup old index given new index. */ { int oldFieldCount = slCount(as->columnList); int newFieldCount = slCount(fieldList); int *oldIx; AllocArray(oldIx, newFieldCount); int i; struct slPair *fieldPair; for (i=0, fieldPair = fieldList; i<newFieldCount; ++i, fieldPair = fieldPair->next) { char *oldName = fieldPair->val; struct asColumn *col = asColumnFind(as, oldName); assert(col != NULL); /* We checked earlier but... */ int ix = slIxFromElement(as->columnList, col); assert(ix >= 0 && ix <= oldFieldCount); oldIx[i] = ix; } return oldIx; }
void outputUniqueOnSharedKey(char *inTab, struct asObject *as, struct asColumn *keyCol, struct slPair *fieldList, char *outTab, char *outErr) /* Scan through tab-separated file inTab and output fields in fieldList to * outTab. Make sure there is only one row for each value of sharedKey field. * If there would be multiple different rows in output with sharedKey, * complain about it in outErr. */ { /* Open input and output. */ struct lineFile *lf = lineFileOpen(inTab, TRUE); FILE *f = mustOpen(outTab, "w"); FILE *fErr = mustOpen(outErr, "w"); /* Set up array for input fields with more than we expect for better error reporting. */ int oldFieldCount = slCount(as->columnList); int newFieldCount = slCount(fieldList); int allocFields = oldFieldCount+10; char *words[allocFields]; /* Set up array for output fields that says where to find them in input. */ int *oldIx = makeNewToOldArray(as, fieldList); /* Figure out index of key field. */ int keyIx = slIxFromElement(as->columnList, keyCol); /* Go through each line of input, outputting selected columns. */ struct hash *uniqHash = hashNew(18); struct hash *errHash = hashNew(0); struct dyString *dy = dyStringNew(1024); int fieldCount; while ((fieldCount = lineFileChopNextTab(lf, words, allocFields)) > 0) { lineFileExpectWords(lf, oldFieldCount, fieldCount); /* Collect possible output into dy. */ dyStringClear(dy); dyStringPrintf(dy, "%s", words[oldIx[0]]); int i; for (i=1; i<newFieldCount; ++i) dyStringPrintf(dy, "\t%s", words[oldIx[i]]); dyStringPrintf(dy, "\n"); /* Check that this line is either unique for this key, or the same as previous lines * for the key. */ char *key = words[keyIx]; char *oldVal = hashFindVal(uniqHash, key); if (oldVal != NULL) { if (!sameString(oldVal, dy->string)) { /* Error reporting is a little complex. We want to output all lines associated * with key, including the first one, but we only want to do first line once. */ if (!hashLookup(errHash, key)) { hashAdd(errHash, key, NULL); fputs(oldVal, fErr); } fputs(dy->string, fErr); } } else { hashAdd(uniqHash, key, cloneString(dy->string)); fputs(dy->string, f); } } /* Report error summary */ if (errHash->elCount > 0) { warn("Warning: %d shared keys have multiple values in table 2. See %s.\n" "Only first row for each key put in %s" , errHash->elCount, outErr, outTab); if (!mergeOk) noWarnAbort(); } /* Clean up and go home. */ freez(&oldIx); carefulClose(&fErr); carefulClose(&f); lineFileClose(&lf); }
struct bed *pathToBed(struct path *path, struct splice *splice, int source, int sink, boolean spoofEnds) /* Construct a bed for the path. If spoofEnds is TRUE, ensure that there is at least a 1bp exon at splice sites. */ { struct bed *bed = NULL; int vertIx = 0; int *verts = path->vertices; int *vPos = splice->vPositions; unsigned char *vTypes = splice->vTypes; int i = 0; struct dyString *buff = newDyString(256); AllocVar(bed); bed->chrom = cloneString(splice->tName); bed->chromStart = BIGNUM; bed->chromEnd = 0; safef(bed->strand, sizeof(bed->strand), "%s", splice->strand); bed->score = splice->type; AllocArray(bed->chromStarts, path->vCount); AllocArray(bed->blockSizes, path->vCount); /* If necessary tack on a fake exon. */ if(spoofEnds && verts[vertIx] != source && verts[vertIx+1] <= splice->vCount && pathEdgeType(vTypes, verts[vertIx], verts[vertIx+1]) != ggExon) { bed->blockSizes[bed->blockCount] = 1; bed->chromStarts[bed->blockCount] = vPos[verts[vertIx]] - 1; bed->chromStart = bed->thickStart = min(bed->chromStart, vPos[verts[vertIx]] - 1 ); bed->chromEnd = bed->thickEnd = max(bed->chromEnd, vPos[verts[vertIx+1]]); bed->blockCount++; } /* For each edge that is an exon count up the base pairs. */ for(vertIx = 0; vertIx < path->vCount - 1; vertIx++) { if(verts[vertIx] != source && verts[vertIx] <= splice->vCount) { /* If exon add up the base pairs. */ if(pathEdgeType(vTypes, verts[vertIx], verts[vertIx+1]) == ggExon) { bed->blockSizes[bed->blockCount] = vPos[verts[vertIx+1]] - vPos[verts[vertIx]]; bed->chromStarts[bed->blockCount] = vPos[verts[vertIx]]; bed->chromStart = bed->thickStart = min(bed->chromStart, vPos[verts[vertIx]]); bed->chromEnd = bed->thickEnd = max(bed->chromEnd, vPos[verts[vertIx+1]]); bed->blockCount++; } } } /* if spoofing ends tack on a 1bp exon as necessary. */ vertIx = path->vCount - 2; if(spoofEnds && verts[vertIx] != source && verts[vertIx+1] <= splice->vCount && pathEdgeType(vTypes, verts[vertIx], verts[vertIx+1]) != ggExon) { bed->blockSizes[bed->blockCount] = 1; bed->chromStarts[bed->blockCount] = vPos[verts[vertIx+1]]; bed->chromStart = bed->thickStart = min(bed->chromStart, vPos[verts[vertIx+1]]); bed->chromEnd = bed->thickEnd = max(bed->chromEnd, vPos[verts[vertIx+1]]+1); bed->blockCount++; } /* Fix up the name and adjust the chromStarts. */ dyStringPrintf(buff, "%s.%d.", splice->name, slIxFromElement(splice->paths, path)); for(i = 0; i < path->vCount; i++) { if(path->vertices[i] != sink && path->vertices[i] <= splice->vCount) dyStringPrintf(buff, "%d,", path->vertices[i]); } if(splice->type == alt5Prime || splice->type == alt3Prime || splice->type == altRetInt || splice->type == altCassette) { int pathIx = slIxFromElement(splice->paths, path); if(pathIx == 0) dyStringPrintf(buff, "-Ex"); else if(pathIx == 1) dyStringPrintf(buff, "-Inc"); } bed->name = cloneString(buff->string); for(i = 0; i < bed->blockCount; i++) bed->chromStarts[i] -= bed->chromStart; /* If we don't have any blocks, quit now. */ if(bed->blockCount == 0) bedFree(&bed); dyStringFree(&buff); return bed; }