struct psl *loadPslsFromDb(struct sqlConnection *conn, int numTables, char **tables, char *chrom, unsigned int chromStart, unsigned int chromEnd) /* load up all of the psls that align on a given section of the database */ { struct sqlResult *sr = NULL; char **row = NULL; int rowOffset = -100; struct psl *pslList = NULL; struct psl *psl = NULL; int i=0; /* for each table load up the relevant psls */ for(i = 0; i < numTables; i++) { sr = hRangeQuery(conn, tables[i], chrom, chromStart, chromEnd, NULL, &rowOffset); while ((row = sqlNextRow(sr)) != NULL) { psl = pslLoad(row+rowOffset); slSafeAddHead(&pslList, psl); if(weightMrna && (stringIn("refSeqAli",tables[i]) || stringIn("mrna", tables[i]))) { psl = clonePsl(psl); slSafeAddHead(&pslList, psl); } } sqlFreeResult(&sr); } slReverse(&pslList); return pslList; }
void faSimplify(char *inName, char *startPat, char *endPat, char *outName) /* faFlyBaseToUcsc - Convert Flybase peptide fasta file to UCSC format. */ { struct lineFile *lf = lineFileOpen(inName, TRUE); FILE *f = mustOpen(outName, "w"); char *line; int startSize = strlen(startPat); while (lineFileNext(lf, &line, NULL)) { if (line[0] == '>') { char *s = stringIn(startPat, line), *e; if (s == NULL) errAbort("No %s line %d of %s", startPat, lf->lineIx, lf->fileName); s += startSize; e = stringIn(endPat, s); if (e == NULL) errAbort("No %s line %d of %s", endPat, lf->lineIx, lf->fileName); *e = 0; fprintf(f, ">%s%s%s\n", prefix, s, suffix); } else { fprintf(f, "%s\n", line); } } carefulClose(&f); lineFileClose(&lf); }
static boolean isFivePrime(char *s) /* Return TRUE if s looks to have words five prime in it. */ { if (s == NULL) return FALSE; return stringIn("5'", s) || stringIn("Five prime", s) || stringIn("five prime", s) || stringIn("5 prime", s); }
static boolean isThreePrime(char *s) /* Return TRUE if s looks to have words three prime in it. */ { if (s == NULL) return FALSE; return stringIn("3'", s) || stringIn("Three prime", s) || stringIn("three prime", s) || stringIn("3 prime", s); }
QList<QString> extractNullTerminatedStrings(const void *src, size_t srcSize, QTextCodec *codec) { QList<QString> result; size_t ptr = static_cast<size_t>(0); size_t strStart = static_cast<size_t>(0); const quint8 *source = reinterpret_cast<const quint8 *>(src); while(ptr < srcSize) { if(source[ptr] == 0) { size_t strLen = ptr - strStart + static_cast<size_t>(1); if(strLen > static_cast<size_t>(1)) { QScopedArrayPointer<quint8> stringIn(new quint8[strLen]); memcpy( reinterpret_cast<void *>(stringIn.data()), reinterpret_cast<const void *>(source + strStart), strLen); strStart = ptr + static_cast<size_t>(1); QScopedPointer<QTextDecoder> decoder(codec->makeDecoder()); if (decoder.isNull()) { throw std::runtime_error("Unable to create text decoder"); } result.append(decoder->toUnicode(reinterpret_cast<const char *> ( stringIn.data()), static_cast<int> (strLen - static_cast<size_t>(1)))); } else { strStart = ptr + static_cast<size_t>(1); result.append(QString()); } } ptr++; } if(ptr - strStart > static_cast<size_t>(0)) { size_t strLen = ptr - strStart; QScopedArrayPointer<quint8> stringIn(new quint8[strLen]); memcpy(reinterpret_cast<void *>(stringIn.data()), reinterpret_cast<const void *>(source + strStart), strLen); QScopedPointer<QTextDecoder> decoder(codec->makeDecoder()); if (decoder.isNull()) { throw std::runtime_error("Unable to create text decoder"); } result.append(decoder->toUnicode(reinterpret_cast<const char *> ( stringIn.data()), strLen)); } return result; }
int endFromFileName(char *fileName) /* Try and figure out end from file name */ { if (stringIn("_R1_", fileName)) return 1; else if (stringIn("_R2_", fileName)) return 2; else if (stringIn("_1.", fileName)) return 1; else if (stringIn("_2.", fileName)) return 2; errAbort("Couldn't deduce paired end from file name %s", fileName); return 0; }
char* lookupName( struct sqlConnection *conn , char *id) { char *name = cloneString(id); char infoTable[128]; safef(infoTable, sizeof(infoTable), "%sInfo","bdgpGene"); if (hTableExists(infoTable)) { struct sqlConnection *conn = hAllocConn(); char *symbol = NULL; char *ptr = strchr(name, '-'); char query[256]; char buf[64]; if (ptr != NULL) *ptr = 0; safef(query, sizeof(query), "select symbol from %s where bdgpName = '%s';", infoTable, name); symbol = sqlQuickQuery(conn, query, buf, sizeof(buf)); hFreeConn(&conn); if (symbol != NULL) { char *ptr = stringIn("{}", symbol); if (ptr != NULL) *ptr = 0; freeMem(name); name = cloneString(symbol); } } return(name); }
struct hash *hashTextFields(struct sqlConnection *conn, char *table) /* Return hash with just text (char, varchar, blob, text) fields in it, and no values */ { struct hash *hash = hashNew(0); struct sqlResult *sr = sqlDescribe(conn, table); char **row; while ((row = sqlNextRow(sr)) != NULL) { char *field = row[0]; char *type = row[1]; if (stringIn("char", type) || stringIn("blob", type) || stringIn("text", type)) hashAdd(hash, field, NULL); } sqlFreeResult(&sr); return hash; }
long long currentVmPeak() /* return value of peak Vm memory usage (if /proc/ business exists) */ { long long vmPeak = 0; pid_t pid = getpid(); char temp[256]; safef(temp, sizeof(temp), "/proc/%d/status", (int) pid); struct lineFile *lf = lineFileMayOpen(temp, TRUE); if (lf) { char *line; while (lineFileNextReal(lf, &line)) { // typical line: 'VmPeak: 62646196 kB' // seems to always be kB if (stringIn("VmPeak", line)) { char *words[3]; chopByWhite(line, words, 3); vmPeak = sqlLongLong(words[1]); // assume always 2nd word break; } } lineFileClose(&lf); } return vmPeak; }
char *cellAbbreviation(char *cell) /* Return abbreviated version of cell-name */ { if (cellLetterHash == NULL) return cellAbbrevDefault(cell, FALSE); if (noLetterOk) { // strip qualifiers (follow the '+' char) char *plus = stringIn("+", cell); if (plus) *plus = 0; } char *val = hashFindVal(cellLetterHash, cell); if (val != NULL) return val; if (noLetterOk) return cellAbbrevDefault(cell, TRUE); if (noLetter) uglyf("cell %s isn't in %s\n", cell, cellLetter); else errAbort("cell %s isn't in %s\n", cell, cellLetter); return NULL; }
void noteIds(struct tenFields *tfList, char *inGff, FILE *cdsFile, FILE *otherFile) /* Look for cases where tenth field is of form * ID=XXX;Note=YYY. In these cases move XXX to * the ninth field, and store the ID, the * third (type) field, and the YYY in f */ { struct tenFields *tf; struct hash *uniqHash = newHash(19); for (tf = tfList; tf != NULL; tf = tf->next) { char *s = tf->fields[9]; if (startsWith("ID=", s)) { char *id = s+3, *note = ""; char *e = strchr(s, ';'); if (!hashLookup(uniqHash, id)) { hashAdd(uniqHash, id, NULL); if (e != NULL) { *e++ = 0; s = stringIn("Note=", e); if (s != NULL) { note = s+5; cgiDecode(note, note, strlen(note)); } } } tf->fields[8] = id; } } hashFree(&uniqHash); }
int laneFromFileName(char *fileName) /* Deduce lane from file name. If there is something of form _L123_ in the midst of * file name we'll call it lane. Otherwise return 0 (they start counting at 1) */ { char *pat = "_L"; int patLen = strlen(pat); char *s = fileName; while ((s = stringIn(pat, s)) != NULL) { s += patLen; char *e = strchr(s, '_'); if (e == NULL) break; int midSize = e - s; if (midSize == 3) { char midBuf[midSize+1]; memcpy(midBuf, s, midSize); midBuf[midSize] = 0; if (isAllDigits(midBuf)) return atoi(midBuf); } } return 0; }
void outputChunk(struct psl **pPslList, char *tempDir, int midIx, boolean noHead) /* Sort and write out pslList and free it. */ { char fileName[512]; FILE *f; struct psl *psl; if (*pPslList == NULL) return; /* Empty. */ psl = *pPslList; //slSort(pPslList, pslCmpTarget); makeMidName(tempDir, midIx, fileName); if (stripVer) { char *s = stringIn(".",psl->qName); if (s != NULL) *s = 0; } if (chunkSize ==1) safef(fileName, sizeof(fileName), "%s/%s.psl",tempDir,psl->qName); f = mustOpen(fileName, "w"); if (!noHead) pslWriteHead(f); for (psl = *pPslList; psl != NULL; psl = psl->next) pslTabOut(psl, f); fclose(f); pslFreeList(pPslList); }
struct dyString * dyStringSub(char *orig, char *in, char *out) /* Make up a duplicate of orig with all occurences of in substituted * with out. */ { int inLen = strlen(in), outLen = strlen(out), origLen = strlen(orig); struct dyString *dy = newDyString(origLen + 2*outLen); char *s, *e; if (orig == NULL) return NULL; for (s = orig; ;) { e = stringIn(in, s); if (e == NULL) { e = orig + origLen; dyStringAppendN(dy, s, e - s); break; } else { dyStringAppendN(dy, s, e - s); dyStringAppendN(dy, out, outLen); s = e + inLen; } } return dy; }
bool inclChrom(char *name) /* check if a chromosome should be included */ { return !((noRandom && (endsWith(name, "_random") || startsWith("chrUn", name) || sameWord("chrNA", name) /* danRer */ || sameWord("chrU", name))) /* dm */ || (noHap && stringIn( "_hap", name))); }
int nearCountUniqAccRows(struct htmlPage *page) /* Count number of unique rows in table containing just hyperlinked * accessions. */ { char *s, *e, *row, *acc; int count = 0; struct hash *uniqHash = hashNew(0); if (page == NULL) return -1; /* Set s to first row. */ s = stringIn(nearStartTablePat, page->htmlText); if (s == NULL) return -1; s += strlen(nearStartTablePat); for (;;) { e = stringIn(nearEndRowPat, s); if (e == NULL) break; row = cloneStringZ(s, e-s); acc = qaStringBetween(row, ">", "</a>"); if (acc == NULL) { warn("Can't find acc text between > and </a> while counting uniq row %s", row); freez(&row); break; } if (!hashLookup(uniqHash, acc)) { hashAdd(uniqHash, acc, NULL); ++count; } freez(&row); freez(&acc); s = e + strlen(nearEndRowPat); } hashFree(&uniqHash); return count; }
boolean orgFits(struct hash *hash) /* Return TRUE if it's an organism that passes filter. */ { char *bf; if (organism == NULL) return TRUE; bf = hashFindVal(hash, "BF"); if (bf == NULL) return FALSE; return stringIn(organism, bf) != NULL; }
int qaCountBetween(char *s, char *startPattern, char *endPattern, char *midPattern) /* Count the number of midPatterns that occur between start and end pattern. */ { int count = 0; char *e; s = stringIn(startPattern, s); if (s != NULL) { s += strlen(startPattern); e = stringIn(endPattern, s); while (s < e) { if (startsWith(midPattern, s)) ++count; s += 1; } } return count; }
void ans01MetaOut(FILE *f, char *midString) /* Version of function used for Anshul's TFBS uniform peak calling ENCODE Jan 2011 freeze. */ { char *pattern = "0_VS_"; char *patPos = stringIn(pattern, midString); if (patPos == NULL) errAbort("Can't find %s in %s\n", pattern, midString); *patPos = 0; fprintf(f, "\twgEncode%s1", midString); }
void ans02MetaOut(FILE *f, char *midString) /* Version of function used for Anshul's TFBS uniform peak calling ENCODE June 2012 freeze. */ /* NOTE: Including single-replicate data sets (Rep1). This is different from ans01 * Another difference is that the control dataset object is also parsed out (_VS_). * Input string has common prefix stripped -- starts with lab/dataType, e.g. 'HaibTfbs.*' * Patterns are: *Rep[0-1].bam, *Rep[0-1]V[1-9].bam. * Convert Rep0 to Rep1 to obtain a valid UCSC object name. Rep0 is Anshul's pooling convention.*/ { char *pattern; char *patPos; char *endString; // parse the experiment pattern = "Rep"; patPos = stringIn(pattern, midString); if (patPos == NULL) errAbort("Can't find %s in %s\n", pattern, midString); // force to Rep1 patPos += strlen(pattern); *patPos++ = '1'; *patPos++ = 0; // now the control pattern = "bam_VS_wgEncode"; patPos = stringIn(pattern, patPos); if (patPos == NULL) errAbort("Can't find %s in %s\n", pattern, midString); endString = patPos + strlen(pattern); pattern = "Rep"; patPos = stringIn(pattern, endString); if (patPos != NULL) { // force to Rep1 patPos += strlen(pattern); *patPos++ = '1'; } // allow for no replicates (e.g. OpenChrom) fprintf(f, "\twgEncode%s\twgEncode%s", midString, endString); }
static char *cloneAndCut(char *s, char *cutAt) /* Return copy of string that may have stuff cut off end. */ { char *clone = cloneString(s); if (cutAt != NULL) { char *end = stringIn(cutAt, clone); if (end != NULL) *end = 0; } return clone; }
QString getNullTerminatedString(const void *src, size_t offset, size_t srcSize, QTextCodec *codec, size_t &stringLength) { size_t ptr = offset; size_t strEnd = offset; bool nullFound = false; while(ptr < srcSize) { if((reinterpret_cast<const quint8 *>(src))[ptr] == 0) { strEnd = ptr; nullFound = true; break; } else { ptr++; } } if(!nullFound) { throw std::runtime_error("No end of string found"); } if((strEnd + static_cast<size_t>(1)) < offset) { throw std::runtime_error("No end of string found"); } size_t strLen = strEnd - offset + static_cast<size_t>(1); stringLength = strLen; if(strLen > static_cast<size_t>(1)) { QScopedArrayPointer<quint8> stringIn(new quint8[strLen]); memcpy(reinterpret_cast<void *>(stringIn.data()), reinterpret_cast<const void *>(reinterpret_cast<const quint8 *>(src) + offset), strLen); QScopedPointer<QTextDecoder> decoder(codec->makeDecoder()); if (decoder.isNull()) { throw std::runtime_error("Unable to create text decoder"); } return decoder->toUnicode(reinterpret_cast<const char *> (stringIn.data( )), static_cast<int> (strLen - static_cast<size_t>(1))); } else { return QString(); } }
void weedLines(char *weedFile, char *file, char *output, boolean invert, char *invertOutput) /* weedLines - Selectively remove lines from file. */ { struct hash *hash = hashWordsInFile(weedFile, 16); struct hashEl *weedList = hashElListHash(hash); verbose(2, "%d words in weed file %s\n", hash->elCount, weedFile); struct lineFile *lf = lineFileOpen(file, TRUE); char *line, *word; FILE *f = mustOpen(output, "w"); FILE *fInvert = NULL; boolean embedded = optionExists("embedded"); if (invertOutput != NULL) fInvert = mustOpen(invertOutput, "w"); while (lineFileNext(lf, &line, NULL)) { boolean doWeed = FALSE; char *dupe = NULL; if (embedded) { struct hashEl *hel; for (hel = weedList; hel != NULL; hel = hel->next) { if (stringIn(hel->name, line)) doWeed = TRUE; } } else { dupe = cloneString(line); while ((word = nextWord(&line)) != NULL) { if (hashLookup(hash, word)) doWeed = TRUE; } line = dupe; } if (invert) doWeed = !doWeed; if (!doWeed) fprintf(f, "%s\n", line); else { if (fInvert != NULL) fprintf(fInvert, "%s\n", line); } freez(&dupe); } }
struct blastQuery *blastFileNextQuery(struct blastFile *bf) /* Read all alignments associated with next query. Return NULL at EOF. */ { char *line; struct blastQuery *bq; struct blastGappedAli *bga; AllocVar(bq); verbose(TRACE_LEVEL, "blastFileNextQuery\n"); /* find and parse Query= */ line = bfSearchForLine(bf, "Query="); if (line == NULL) return NULL; parseQueryLines(bf, line, bq); /* find and parse Database: */ line = bfSearchForLine(bf, "Database:"); if (line == NULL) bfUnexpectedEof(bf); parseDatabaseLines(bf, line, bq); /* Seek to beginning of first gapped alignment. */ for (;;) { line = bfNeedNextLine(bf); if (line[0] == '>') { lineFileReuse(bf->lf); break; } else if (isRoundLine(line)) parseRoundLine(line, bq); else if (stringIn("No hits found", line) != NULL) break; } /* Read in gapped alignments. */ while ((bga = blastFileNextGapped(bf, bq)) != NULL) { slAddHead(&bq->gapped, bga); } slReverse(&bq->gapped); if (verboseLevel() >= DUMP_LEVEL) { verbose(DUMP_LEVEL, "blastFileNextQuery result:\n"); blastQueryPrint(bq, stderr); } return bq; }
void replaceTextBetween(char *start, char *end, char *outerFile, char *middleFile) /* replaceTextBetween - Replaces a section of text in the middle of a file.. */ { /* Read outer file into memory. */ char *outer; size_t outerSize; readInGulp(outerFile, &outer, &outerSize); /* Figure out the boundaries of the region we want to replace. */ char *s = stringIn(start, outer); if (s == NULL) errAbort("Can't find '%s' in %s", start, outerFile); char *e = stringIn(end, s); if (e == NULL) errAbort("Can't find '%s' in %s", end, outerFile); if (withEnds) { e += strlen(end); } else { s += strlen(start); } /* Read middle file into memory. */ char *middle; size_t middleSize; readInGulp(middleFile, &middle, &middleSize); /* Write out file in three parts. */ int startSize = s - outer; mustWrite(stdout, outer, startSize); mustWrite(stdout, middle, middleSize); int endSize = outer + outerSize - e; mustWrite(stdout, e, endSize); }
void loadPslsFromDatabase(struct sqlConnection *conn, char *db, char *chrom) /** Load all of the desired alignments into the chromkeeper structure from the desired pslTables. */ { int i = 0; struct sqlResult *sr = NULL; char **row = NULL; int rowOffset = 0; struct psl *pslList = NULL, *psl = NULL; for(i = 0; i < numDbTables; i++) { sr = hChromQuery(conn, dbTables[i], chrom, NULL, &rowOffset); while((row = sqlNextRow(sr)) != NULL) { psl = pslLoad(row+rowOffset); slAddHead(&pslList, psl); minPslStart = min(psl->tStart, minPslStart); maxPslEnd = max(psl->tEnd, maxPslEnd); /* This just adds the mrna twice to the list, cheat way to add more weight to certain tables. */ if(weightMrna && (stringIn("refSeqAli", dbTables[i]) || stringIn("mrna", dbTables[i]))) { psl = clonePsl(psl); slAddHead(&pslList, psl); } } sqlFreeResult(&sr); } chromPslBin = binKeeperNew(minPslStart, maxPslEnd); agxSeenBin = binKeeperNew(minPslStart, maxPslEnd); for(psl = pslList; psl != NULL; psl = psl->next) { binKeeperAdd(chromPslBin, psl->tStart, psl->tEnd, psl); } }
static void handleCID(char **html, char *ctMain) /* Handle CID replacements if needed */ { if (optionExists("cid") && ctMain && sameWord(ctMain,"text/html")) { struct hashEl *el, *list = hashElListHash(cidHash); for(el=list;el;el=el->next) { char *cid=addSuffix("cid:",el->name); if (stringIn(cid,*html)) { char *new = replaceChars(*html, cid, el->val); freez(html); *html = new; } freez(&cid); // support for content-location if (stringIn(el->name,*html)) { char *new = replaceChars(*html, el->name, el->val); freez(html); *html = new; } }
char *qaStringBetween(char *text, char *startPattern, char *endPattern) /* Return text that occurs between startPattern and endPattern, * or NULL if no startPattern. (Will return up to 100 characters * after startPattern if there is no endPattern) */ { char *startMid = stringIn(startPattern, text); if (startMid != NULL) { char *endMid; int midSize; startMid += strlen(startPattern); endMid = stringIn(startMid, endPattern); if (endMid == NULL) { midSize = strlen(startMid); if (midSize > 100) midSize = 100; } else midSize = endMid - startMid; return cloneStringZ(startMid, midSize); } return NULL; }
void testColInfo(struct htmlPage *dbPage, char *org, char *db, char *col) /* Click on all colInfo columns. */ { struct htmlPage *infoPage = quickSubmit(dbPage, NULL, org, db, col, NULL, "colInfo", colInfoVarName, col); if (infoPage != NULL) { if (stringIn("No additional info available", infoPage->htmlText)) qaStatusSoftError(nearTestList->status, "%s failed - no %s.html?", colInfoVarName, col); } quickErrReport(); htmlPageFree(&infoPage); }
QString readFixedLengthStringFromBuffer(const void *buffer, size_t bufferSize, size_t pos, uint length, QTextCodec *codec) { if(codec == NULL) { throw std::runtime_error("Codec is NULL"); } QScopedArrayPointer<quint8> stringIn( new quint8[static_cast<size_t>(length)]); if((pos + static_cast<size_t>(length)) <= bufferSize) { size_t last = pos + static_cast<size_t>(length); size_t strPtr = static_cast<size_t>(0); for(size_t ptr = pos; ptr < last; ptr++) { stringIn[strPtr] = (reinterpret_cast<const quint8 *>(buffer))[ptr]; strPtr++; } } else { size_t last = bufferSize; size_t strPtr = static_cast<size_t>(0); for(size_t ptr = pos; ptr < last; ptr++) { stringIn[strPtr] = (reinterpret_cast<const quint8 *>(buffer))[ptr]; strPtr++; } for(size_t ptr = strPtr; ptr < static_cast<size_t>(length); ptr++) { stringIn[ptr] = 0; } } uint stringLength = qstrnlen( reinterpret_cast<char *> (stringIn.data()), length); QScopedPointer<QTextDecoder> decoder(codec->makeDecoder()); if (decoder.isNull()) { throw std::runtime_error("Unable to create text decoder"); } return decoder->toUnicode( reinterpret_cast<const char *> (stringIn.data()), static_cast<int> (stringLength)); }