int ffCalcCdnaGapPenalty(int hGap, int nGap) /* Return gap penalty for given h and n gaps. */ { int acc = 2; if (hGap > 400000) /* Discourage really long introns. */ { acc += (hGap - 400000)/3000; if (hGap > ffIntronMax) acc += (hGap - ffIntronMax)/2000; } if (hGap < 0) /* Discourage jumping back in haystack. */ { hGap = -8*hGap; if (hGap > 48) hGap = (hGap*hGap); } if (nGap < 0) /* Jumping back in needle gets rid of previous alignment. */ { acc += -nGap; nGap = 0; } acc += digitsBaseTwo(hGap)/2; if (nGap != 0) { acc += digitsBaseTwo(nGap); } else { if (hGap > 30) acc -= 1; } return acc; }
struct hashEl *hashAddN(struct hash *hash, char *name, int nameSize, void *val) /* Add name of given size to hash (no need to be zero terminated) */ { struct hashEl *el; if (hash->lm) el = lmAlloc(hash->lm, sizeof(*el)); else AllocVar(el); el->hashVal = hashString(name); int hashVal = el->hashVal & hash->mask; if (hash->lm) { el->name = lmAlloc(hash->lm, nameSize+1); memcpy(el->name, name, nameSize); } else el->name = cloneStringZ(name, nameSize); el->val = val; el->next = hash->table[hashVal]; hash->table[hashVal] = el; hash->elCount += 1; if (hash->autoExpand && hash->elCount > (int)(hash->size * hash->expansionFactor)) { /* double the size */ hashResize(hash, digitsBaseTwo(hash->size)); } return el; }
struct hash *hashSizedForTable(struct sqlConnection *conn, char *table) /* Return a hash sized appropriately to hold all of table. */ { char query[256]; int tableSize; sqlSafef(query, sizeof(query), "select count(*) from %s", table); tableSize = sqlQuickNum(conn, query); printf("%s has %d rows\n", table, tableSize); return newHash(digitsBaseTwo(tableSize) + 1); }
struct hash *dnaSeqHash(struct dnaSeq *seqList) /* Return hash of sequences keyed by name. */ { int size = slCount(seqList)+1; int sizeLog2 = digitsBaseTwo(size); struct hash *hash = hashNew(sizeLog2); struct dnaSeq *seq; for (seq = seqList; seq != NULL; seq = seq->next) hashAddUnique(hash, seq->name, seq); return hash; }
int gapPenalty(struct ffAli *left, struct ffAli *ali) /* Calculate gap penalty using exon scoring. */ { int nGap, hGap; int minGap; nGap = ali->nStart - left->nEnd; assert(nGap >= 0); hGap = ali->hStart - left->hStart; if (hGap < 0) { hGap = 0; nGap -= hGap; } minGap = (nGap < hGap ? nGap : hGap); return (2 + digitsBaseTwo(hGap+minGap) + (nGap-minGap)); }
uint annoAssemblySeqSize(struct annoAssembly *aa, char *seqName) /* Return the number of bases in seq which must be in aa's twoBitFile. */ { if (aa->seqSizes == NULL) aa->seqSizes = hashNew(digitsBaseTwo(aa->tbf->seqCount)); struct hashEl *hel = hashLookup(aa->seqSizes, seqName); uint seqSize; if (hel != NULL) seqSize = (uint)(hel->val - NULL); else { seqSize = (uint)twoBitSeqSize(aa->tbf, seqName); char *pt = NULL; hashAdd(aa->seqSizes, seqName, pt + seqSize); } return seqSize; }
struct hash *hashKeyField(struct joinedTables *joined, int keyIx, struct joinerField *jf) /* Make a hash based on key field. */ { int hashSize = digitsBaseTwo(joined->rowCount); struct hash *hash = NULL; struct joinedRow *jr; if (hashSize > 20) hashSize = 20; hash = newHash(hashSize); for (jr = joined->rowList; jr != NULL; jr = jr->next) { struct slName *key; for (key = jr->keys[keyIx]; key != NULL; key = key->next) { if (jf->separator == NULL) { char *s = chopKey(jf->chopBefore, jf->chopAfter, key->name); if (s[0] != 0) hashAdd(hash, s, jr); } else { char *s = key->name, *e; char sep = jf->separator[0]; while (s != NULL && s[0] != 0) { e = strchr(s, sep); if (e != NULL) *e++ = 0; s = chopKey(jf->chopBefore, jf->chopAfter, s); if (s[0] != 0) hashAdd(hash, s, jr); s = e; } } } } return hash; }
struct hash *readKeyHash(char *db, struct joiner *joiner, struct joinerField *keyField, struct keyHitInfo **retList) /* Read key-field into hash. Check for dupes if need be. */ { struct sqlConnection *conn = sqlWarnConnect(db); struct hash *keyHash = NULL; struct keyHitInfo *khiList = NULL, *khi; if (conn == NULL) { return NULL; } else { struct slName *table; struct slName *tableList = getTablesForField(conn,keyField->splitPrefix, keyField->table, keyField->splitSuffix); int rowCount = totalTableRows(conn, tableList); int hashSize = digitsBaseTwo(rowCount)+1; char query[256], **row; struct sqlResult *sr; int itemCount = 0; int dupeCount = 0; char *dupe = NULL; if (rowCount > 0) { if (hashSize > hashMaxSize) hashSize = hashMaxSize; keyHash = hashNew(hashSize); for (table = tableList; table != NULL; table = table->next) { safef(query, sizeof(query), "select %s from %s", keyField->field, table->name); sr = sqlGetResult(conn, query); while ((row = sqlNextRow(sr)) != NULL) { char *id = doChopsAndUpper(keyField, row[0]); if (hashLookup(keyHash, id)) { if (keyField->unique) { if (keyField->exclude == NULL || !slNameInList(keyField->exclude, id)) { if (dupeCount == 0) dupe = cloneString(id); ++dupeCount; } } } else { AllocVar(khi); hashAddSaveName(keyHash, id, khi, &khi->name); slAddHead(&khiList, khi); ++itemCount; } } sqlFreeResult(&sr); } if (dupe != NULL) { warn("Error: %d duplicates in %s.%s.%s including '%s'", dupeCount, db, keyField->table, keyField->field, dupe); freez(&dupe); } verbose(2, " %s.%s.%s - %d unique identifiers\n", db, keyField->table, keyField->field, itemCount); } slFreeList(&tableList); } sqlDisconnect(&conn); *retList = khiList; return keyHash; }