struct synQueue *synQueueNew() /* Make a new, empty, synQueue. */ { struct synQueue *sq; AllocVar(sq); pthreadMutexInit(&sq->mutex); pthreadCondInit(&sq->cond); sq->queue = dlListNew(); return sq; }
static struct dlList *sortedListFromTree(struct rbTree *tree) /* Create a double-linked list from tree. List will be sorted. */ { struct slRef *ref, *refList = rbTreeItems(tree); struct dlList *list = dlListNew(); for (ref = refList; ref != NULL; ref = ref->next) dlAddValTail(list, ref->val); slFreeList(&refList); return list; }
static void wordTreeMakeNonsense(struct wordTree *wt, int maxSize, char *firstWord, int maxOutputWords, FILE *f) /* Go spew out a bunch of words according to probabilities in tree. */ { struct dlList *ll = dlListNew(); int listSize = 0; int outputWords = 0; for (;;) { if (++outputWords > maxOutputWords) break; struct dlNode *node; char *word; /* Get next predicted word. */ if (listSize == 0) { AllocVar(node); ++listSize; word = firstWord; } else if (listSize >= maxSize) { node = dlPopHead(ll); word = predictNext(wt, ll); } else { word = predictNext(wt, ll); AllocVar(node); ++listSize; } node->val = word; dlAddTail(ll, node); if (word == NULL) break; /* Output last word in list. */ { node = ll->tail; word = node->val; fprintf(f, "%s", word); if (word[strlen(word)-1] == '.') fprintf(f, "\n"); else fprintf(f, " "); } } dlListFree(&ll); }
static struct dlList *wordTreeGenerateList(struct wordStore *store, int maxSize, struct wordTree *firstWord, int maxOutputWords) /* Make a list of words based on probabilities in tree. */ { struct dlList *ll = dlListNew(); int chainSize = 0; int outputWords = 0; struct dlNode *chainStartNode = NULL; for (;;) { if (++outputWords > maxOutputWords) break; struct dlNode *node; struct wordTree *picked; /* Get next predicted word. */ AllocVar(node); if (chainSize == 0) { chainStartNode = node; ++chainSize; picked = firstWord; } else if (chainSize >= maxSize) { chainStartNode = chainStartNode->next; picked = predictNext(store, ll); } else { picked = predictNext(store, ll); ++chainSize; } if (picked == NULL) break; /* Add word from whatever level we fetched back to our output chain. */ struct wordInfo *info = picked->info; node->val = info; dlAddTail(ll, node); decrementOutputCountsInTree(picked); info->outTarget -= 1; info->outCount += 1; } verbose(2, "totUseZeroCount = %d\n", totUseZeroCount); return ll; }
void letterChain(char *inFile, char *outFile, int maxSize) /* letterChain - Make Markov chain of letters in text. */ { struct dlList *ll = dlListNew(); int llSize = 0; int c; FILE *in = mustOpen(inFile, "r"); FILE *out; struct dlNode *node; UBYTE *s; struct trie *trie; AllocVar(trie); while ((c = getc(in)) >= 0) { if (llSize < maxSize) { s = needMem(1); *s = c; dlAddValTail(ll, s); ++llSize; if (llSize == maxSize) addToTrie(trie, ll); } else { node = dlPopHead(ll); s = node->val; *s = c; dlAddTail(ll, node); addToTrie(trie, ll); } } if (llSize < maxSize) addToTrie(trie, ll); while ((node = dlPopHead(ll)) != NULL) { addToTrie(trie, ll); freeMem(node->val); freeMem(node); } dlListFree(&ll); carefulClose(&in); out = mustOpen(outFile, "w"); rDumpTrie(0, trie->useCount, trie, out); carefulClose(&out); }
void memTrackerStart() /* Push memory handler that will track blocks allocated so that * they can be automatically released with memTrackerEnd(). You * can have memTrackerStart one after the other, but memTrackerStart/End * need to nest. */ { struct memTracker *mt; if (memTracker != NULL) errAbort("multiple memTrackerStart calls"); AllocVar(mt); AllocVar(mt->handler); mt->handler->alloc = memTrackerAlloc; mt->handler->free = memTrackerFree; mt->handler->realloc = memTrackerRealloc; mt->list = dlListNew(); mt->parent = pushMemHandler(mt->handler); memTracker = mt; }
struct wordTree *wordTreeForChainsInFile(char *fileName, int chainSize, struct lm *lm) /* Return a wordTree of all chains-of-words of length chainSize seen in file. * Allocate the structure in local memory pool lm. */ { /* Stuff for processing file a line at a time. */ struct lineFile *lf = lineFileOpen(fileName, TRUE); char *line, *word; /* We'll keep a chain of three or so words in a doubly linked list. */ struct dlNode *node; struct dlList *chain = dlListNew(); int curSize = 0; /* We'll build up the tree starting with an empty root node. */ struct wordTree *wt = wordTreeNew(""); int wordCount = 0; /* Save time/space by sharing stack between all "following" rbTrees. */ struct rbTreeNode **stack; lmAllocArray(lm, stack, 256); /* Loop through each line of input file, lowercasing the whole line, and then * looping through each word of line, stripping out special chars, and finally * processing each word. */ while (lineFileNext(lf, &line, NULL)) { if (lower) tolowers(line); while ((word = nextWord(&line)) != NULL) { if (unpunc) { stripChar(word, ','); stripChar(word, '.'); stripChar(word, ';'); stripChar(word, '-'); stripChar(word, '"'); stripChar(word, '?'); stripChar(word, '!'); stripChar(word, '('); stripChar(word, ')'); if (word[0] == 0) continue; } verbose(2, "%s\n", word); /* We come to this point in the code for each word in the file. * Here we want to maintain a chain of sequential words up to * chainSize long. We do this with a doubly-linked list structure. * For the first few words in the file we'll just build up the list, * only adding it to the tree when we finally do get to the desired * chain size. Once past the initial section of the file we'll be * getting rid of the first link in the chain as well as adding a new * last link in the chain with each new word we see. */ if (curSize < chainSize) { dlAddValTail(chain, cloneString(word)); ++curSize; if (curSize == chainSize) addChainToTree(wt, chain, lm, stack); } else { /* Reuse doubly-linked-list node, but give it a new value, as we move * it from head to tail of list. */ node = dlPopHead(chain); freeMem(node->val); node->val = cloneString(word); dlAddTail(chain, node); addChainToTree(wt, chain, lm, stack); } ++wordCount; } } /* Handle last few words in file, where can't make a chain of full size. Need * a special case for file that has fewer than chain size words too. */ if (curSize < chainSize) addChainToTree(wt, chain, lm, stack); while ((node = dlPopHead(chain)) != NULL) { addChainToTree(wt, chain, lm, stack); freeMem(node->val); freeMem(node); } dlListFree(&chain); lineFileClose(&lf); return wt; }
struct wordStore *wordStoreForChainsInFile(char *fileName, int chainSize) /* Return a wordStore containing all words, and also all chains-of-words of length * chainSize seen in file. */ { /* Stuff for processing file a line at a time. */ struct lineFile *lf = lineFileOpen(fileName, TRUE); char *line, *word; /* We'll build up the tree starting with an empty root node. */ struct wordStore *store = wordStoreNew(chainSize); struct wordTree *wt = store->markovChains = wordTreeNew(wordStoreAdd(store, "")); /* Loop through each line of file, treating it as a separate read. There's * special cases at the beginning and end of line, and for short lines. In the * main case we'll be maintaining a chain (doubly linked list) of maxChainSize words, * popping off one word from the start, and adding one word to the end for each * new word we encounter. This list is added to the tree each iteration. */ while (lineFileNext(lf, &line, NULL)) { /* We'll keep a chain of three or so words in a doubly linked list. */ struct dlNode *node; struct dlList *chain = dlListNew(); int curSize = 0; int wordCount = 0; /* skipping the first word which is the read id */ word = nextWord(&line); while ((word = nextWord(&line)) != NULL) { struct wordInfo *info = wordStoreAdd(store, word); /* For the first few words in the file after ID, we'll just build up the chain, * only adding it to the tree when we finally do get to the desired * chain size. Once past the initial section of the file we'll be * getting rid of the first link in the chain as well as adding a new * last link in the chain with each new word we see. */ if (curSize < chainSize) { dlAddValTail(chain, info); ++curSize; if (curSize == chainSize) addChainToTree(wt, chain); } else { /* Reuse doubly-linked-list node, but give it a new value, as we move * it from head to tail of list. */ node = dlPopHead(chain); node->val = info; dlAddTail(chain, node); addChainToTree(wt, chain); } ++wordCount; } /* Handle last few words in line, where can't make a chain of full size. Also handles * lines that have fewer than chain size words. */ if (curSize < chainSize) addChainToTree(wt, chain); while ((node = dlPopHead(chain)) != NULL) { if (!dlEmpty(chain)) addChainToTree(wt, chain); freeMem(node); } dlListFree(&chain); } lineFileClose(&lf); wordTreeSort(wt); // Make output of chain file prettier return store; }