static void snapSoftToCloseHard(struct rbTree *vertexTree, struct rbTree *edgeTree, int maxSnapSize, int maxUncheckedSnapSize, struct nibTwoCache *seqCache, char *chromName) /* Snap hard vertices to nearby soft vertices of same type. */ { struct lm *lm = lmInit(0); addWaysInAndOut(vertexTree, edgeTree, lm); struct dlList *vList = sortedListFromTree(vertexTree); struct dlNode *node; int snapCount = 0; for (node = vList->head; !dlEnd(node); node = node->next) { if (snapVertex(node, maxSnapSize, maxUncheckedSnapSize, seqCache, chromName)) { rbTreeRemove(vertexTree, node->val); ++snapCount; } } /* Clean up ways in and out since have removed some nodes. */ for (node = vList->head; !dlEnd(node); node = node->next) { struct vertex *v = node->val; v->waysIn = v->waysOut = NULL; } if (snapCount > 0) { verbose(3, "Snapped %d close edges, now have %d vertices\n", snapCount, vertexTree->n); updateForwardedEdges(edgeTree); } dlListFree(&vList); lmCleanup(&lm); }
void memTrackerEnd() /* Free any remaining blocks and pop tracker memory handler. */ { struct memTracker *mt = memTracker; if (mt == NULL) errAbort("memTrackerEnd without memTrackerStart"); memTracker = NULL; popMemHandler(); dlListFree(&mt->list); freeMem(mt->handler); freeMem(mt); }
static void wordTreeMakeNonsense(struct wordTree *wt, int maxSize, char *firstWord, int maxOutputWords, FILE *f) /* Go spew out a bunch of words according to probabilities in tree. */ { struct dlList *ll = dlListNew(); int listSize = 0; int outputWords = 0; for (;;) { if (++outputWords > maxOutputWords) break; struct dlNode *node; char *word; /* Get next predicted word. */ if (listSize == 0) { AllocVar(node); ++listSize; word = firstWord; } else if (listSize >= maxSize) { node = dlPopHead(ll); word = predictNext(wt, ll); } else { word = predictNext(wt, ll); AllocVar(node); ++listSize; } node->val = word; dlAddTail(ll, node); if (word == NULL) break; /* Output last word in list. */ { node = ll->tail; word = node->val; fprintf(f, "%s", word); if (word[strlen(word)-1] == '.') fprintf(f, "\n"); else fprintf(f, " "); } } dlListFree(&ll); }
void synQueueFree(struct synQueue **pSq) /* Free up synQueue. Be sure no other threads are using * it first though! This will not free any dynamic memory * in the messages. Use synQueueFreeAndVals for that. */ { struct synQueue *sq = *pSq; if (sq == NULL) return; dlListFree(&sq->queue); pthreadCondDestroy(&sq->cond); pthreadMutexDestroy(&sq->mutex); freez(pSq); }
void letterChain(char *inFile, char *outFile, int maxSize) /* letterChain - Make Markov chain of letters in text. */ { struct dlList *ll = dlListNew(); int llSize = 0; int c; FILE *in = mustOpen(inFile, "r"); FILE *out; struct dlNode *node; UBYTE *s; struct trie *trie; AllocVar(trie); while ((c = getc(in)) >= 0) { if (llSize < maxSize) { s = needMem(1); *s = c; dlAddValTail(ll, s); ++llSize; if (llSize == maxSize) addToTrie(trie, ll); } else { node = dlPopHead(ll); s = node->val; *s = c; dlAddTail(ll, node); addToTrie(trie, ll); } } if (llSize < maxSize) addToTrie(trie, ll); while ((node = dlPopHead(ll)) != NULL) { addToTrie(trie, ll); freeMem(node->val); freeMem(node); } dlListFree(&ll); carefulClose(&in); out = mustOpen(outFile, "w"); rDumpTrie(0, trie->useCount, trie, out); carefulClose(&out); }
static void wordTreeGenerateFile(struct wordStore *store, int maxSize, struct wordTree *firstWord, int maxOutputWords, char *fileName) /* Create file containing words base on tree probabilities. The wordTreeGenerateList does * most of work. */ { struct dlList *ll = wordTreeGenerateList(store, maxSize, firstWord, maxOutputWords); FILE *f = mustOpen(fileName, "w"); struct dlNode *node; for (node = ll->head; !dlEnd(node); node = node->next) { struct wordInfo *info = node->val; fprintf(f, "%s\n", info->word); } carefulClose(&f); dlListFree(&ll); }
struct wordTree *wordTreeForChainsInFile(char *fileName, int chainSize, struct lm *lm) /* Return a wordTree of all chains-of-words of length chainSize seen in file. * Allocate the structure in local memory pool lm. */ { /* Stuff for processing file a line at a time. */ struct lineFile *lf = lineFileOpen(fileName, TRUE); char *line, *word; /* We'll keep a chain of three or so words in a doubly linked list. */ struct dlNode *node; struct dlList *chain = dlListNew(); int curSize = 0; /* We'll build up the tree starting with an empty root node. */ struct wordTree *wt = wordTreeNew(""); int wordCount = 0; /* Save time/space by sharing stack between all "following" rbTrees. */ struct rbTreeNode **stack; lmAllocArray(lm, stack, 256); /* Loop through each line of input file, lowercasing the whole line, and then * looping through each word of line, stripping out special chars, and finally * processing each word. */ while (lineFileNext(lf, &line, NULL)) { if (lower) tolowers(line); while ((word = nextWord(&line)) != NULL) { if (unpunc) { stripChar(word, ','); stripChar(word, '.'); stripChar(word, ';'); stripChar(word, '-'); stripChar(word, '"'); stripChar(word, '?'); stripChar(word, '!'); stripChar(word, '('); stripChar(word, ')'); if (word[0] == 0) continue; } verbose(2, "%s\n", word); /* We come to this point in the code for each word in the file. * Here we want to maintain a chain of sequential words up to * chainSize long. We do this with a doubly-linked list structure. * For the first few words in the file we'll just build up the list, * only adding it to the tree when we finally do get to the desired * chain size. Once past the initial section of the file we'll be * getting rid of the first link in the chain as well as adding a new * last link in the chain with each new word we see. */ if (curSize < chainSize) { dlAddValTail(chain, cloneString(word)); ++curSize; if (curSize == chainSize) addChainToTree(wt, chain, lm, stack); } else { /* Reuse doubly-linked-list node, but give it a new value, as we move * it from head to tail of list. */ node = dlPopHead(chain); freeMem(node->val); node->val = cloneString(word); dlAddTail(chain, node); addChainToTree(wt, chain, lm, stack); } ++wordCount; } } /* Handle last few words in file, where can't make a chain of full size. Need * a special case for file that has fewer than chain size words too. */ if (curSize < chainSize) addChainToTree(wt, chain, lm, stack); while ((node = dlPopHead(chain)) != NULL) { addChainToTree(wt, chain, lm, stack); freeMem(node->val); freeMem(node); } dlListFree(&chain); lineFileClose(&lf); return wt; }
struct wordStore *wordStoreForChainsInFile(char *fileName, int chainSize) /* Return a wordStore containing all words, and also all chains-of-words of length * chainSize seen in file. */ { /* Stuff for processing file a line at a time. */ struct lineFile *lf = lineFileOpen(fileName, TRUE); char *line, *word; /* We'll build up the tree starting with an empty root node. */ struct wordStore *store = wordStoreNew(chainSize); struct wordTree *wt = store->markovChains = wordTreeNew(wordStoreAdd(store, "")); /* Loop through each line of file, treating it as a separate read. There's * special cases at the beginning and end of line, and for short lines. In the * main case we'll be maintaining a chain (doubly linked list) of maxChainSize words, * popping off one word from the start, and adding one word to the end for each * new word we encounter. This list is added to the tree each iteration. */ while (lineFileNext(lf, &line, NULL)) { /* We'll keep a chain of three or so words in a doubly linked list. */ struct dlNode *node; struct dlList *chain = dlListNew(); int curSize = 0; int wordCount = 0; /* skipping the first word which is the read id */ word = nextWord(&line); while ((word = nextWord(&line)) != NULL) { struct wordInfo *info = wordStoreAdd(store, word); /* For the first few words in the file after ID, we'll just build up the chain, * only adding it to the tree when we finally do get to the desired * chain size. Once past the initial section of the file we'll be * getting rid of the first link in the chain as well as adding a new * last link in the chain with each new word we see. */ if (curSize < chainSize) { dlAddValTail(chain, info); ++curSize; if (curSize == chainSize) addChainToTree(wt, chain); } else { /* Reuse doubly-linked-list node, but give it a new value, as we move * it from head to tail of list. */ node = dlPopHead(chain); node->val = info; dlAddTail(chain, node); addChainToTree(wt, chain); } ++wordCount; } /* Handle last few words in line, where can't make a chain of full size. Also handles * lines that have fewer than chain size words. */ if (curSize < chainSize) addChainToTree(wt, chain); while ((node = dlPopHead(chain)) != NULL) { if (!dlEmpty(chain)) addChainToTree(wt, chain); freeMem(node); } dlListFree(&chain); } lineFileClose(&lf); wordTreeSort(wt); // Make output of chain file prettier return store; }