Пример #1
0
static void snapSoftToCloseHard(struct rbTree *vertexTree, struct rbTree *edgeTree, int maxSnapSize,
	int maxUncheckedSnapSize, struct nibTwoCache *seqCache, char *chromName)
/* Snap hard vertices to nearby soft vertices of same type. */
{
struct lm *lm = lmInit(0);
addWaysInAndOut(vertexTree, edgeTree, lm);
struct dlList *vList = sortedListFromTree(vertexTree);
struct dlNode *node;
int snapCount = 0;
for (node = vList->head; !dlEnd(node); node = node->next)
    {
    if (snapVertex(node, maxSnapSize, maxUncheckedSnapSize, seqCache, chromName))
	{
	rbTreeRemove(vertexTree, node->val);
        ++snapCount;
	}
    }
/* Clean up ways in and out since have removed some nodes. */
for (node = vList->head; !dlEnd(node); node = node->next)
    {
    struct vertex *v = node->val;
    v->waysIn = v->waysOut = NULL;
    }
if (snapCount > 0)
    {
    verbose(3, "Snapped %d close edges, now have %d vertices\n", snapCount, vertexTree->n);
    updateForwardedEdges(edgeTree);
    }
dlListFree(&vList);
lmCleanup(&lm);
}
Пример #2
0
void memTrackerEnd()
/* Free any remaining blocks and pop tracker memory handler. */
{
struct memTracker *mt = memTracker;
if (mt == NULL)
    errAbort("memTrackerEnd without memTrackerStart");
memTracker = NULL;
popMemHandler();
dlListFree(&mt->list);
freeMem(mt->handler);
freeMem(mt);
}
Пример #3
0
static void wordTreeMakeNonsense(struct wordTree *wt, int maxSize, char *firstWord, 
	int maxOutputWords, FILE *f)
/* Go spew out a bunch of words according to probabilities in tree. */
{
struct dlList *ll = dlListNew();
int listSize = 0;
int outputWords = 0;

for (;;)
    {
    if (++outputWords > maxOutputWords)
        break;
    struct dlNode *node;
    char *word;

    /* Get next predicted word. */
    if (listSize == 0)
        {
	AllocVar(node);
	++listSize;
	word = firstWord;
	}
    else if (listSize >= maxSize)
	{
	node = dlPopHead(ll);
	word = predictNext(wt, ll);
	}
    else
	{
	word = predictNext(wt, ll);
	AllocVar(node);
	++listSize;
	}
    node->val = word;
    dlAddTail(ll, node);

    if (word == NULL)
         break;

    /* Output last word in list. */
	{
	node = ll->tail;
	word = node->val;
	fprintf(f, "%s", word);
	if (word[strlen(word)-1] == '.')
	    fprintf(f, "\n");
	else
	    fprintf(f, " ");
	}
    }
dlListFree(&ll);
}
Пример #4
0
void synQueueFree(struct synQueue **pSq)
/* Free up synQueue.  Be sure no other threads are using
 * it first though! This will not free any dynamic memory
 * in the messages.  Use synQueueFreeAndVals for that. */
{
struct synQueue *sq = *pSq;
if (sq == NULL)
    return;
dlListFree(&sq->queue);
pthreadCondDestroy(&sq->cond);
pthreadMutexDestroy(&sq->mutex);
freez(pSq);
}
Пример #5
0
void letterChain(char *inFile, char *outFile, int maxSize)
/* letterChain - Make Markov chain of letters in text. */
{
struct dlList *ll = dlListNew();
int llSize = 0;
int c;
FILE *in = mustOpen(inFile, "r");
FILE *out;
struct dlNode *node;
UBYTE *s;
struct trie *trie;

AllocVar(trie);

while ((c = getc(in)) >= 0)
    {
    if (llSize < maxSize)
        {
	s = needMem(1);
	*s = c;
	dlAddValTail(ll, s);
	++llSize;
	if (llSize == maxSize)
	    addToTrie(trie, ll);
	}
    else
        {
	node = dlPopHead(ll);
	s = node->val;
	*s = c;
	dlAddTail(ll, node);
	addToTrie(trie, ll);
	}
    }
if (llSize < maxSize)
    addToTrie(trie, ll);
while ((node = dlPopHead(ll)) != NULL)
    {
    addToTrie(trie, ll);
    freeMem(node->val);
    freeMem(node);
    }
dlListFree(&ll);
carefulClose(&in);

out = mustOpen(outFile, "w");
rDumpTrie(0, trie->useCount, trie, out);

carefulClose(&out);
}
Пример #6
0
static void wordTreeGenerateFile(struct wordStore *store, int maxSize, struct wordTree *firstWord, 
	int maxOutputWords, char *fileName)
/* Create file containing words base on tree probabilities.  The wordTreeGenerateList does
 * most of work. */
{
struct dlList *ll = wordTreeGenerateList(store, maxSize, firstWord, maxOutputWords);
FILE *f = mustOpen(fileName, "w");
struct dlNode *node;
for (node = ll->head; !dlEnd(node); node = node->next)
    {
    struct wordInfo *info = node->val;
    fprintf(f, "%s\n", info->word);
    }
carefulClose(&f);
dlListFree(&ll);
}
Пример #7
0
struct wordTree *wordTreeForChainsInFile(char *fileName, int chainSize, struct lm *lm)
/* Return a wordTree of all chains-of-words of length chainSize seen in file. 
 * Allocate the structure in local memory pool lm. */ 
{
/* Stuff for processing file a line at a time. */
struct lineFile *lf = lineFileOpen(fileName, TRUE);
char *line, *word;

/* We'll keep a chain of three or so words in a doubly linked list. */
struct dlNode *node;
struct dlList *chain = dlListNew();
int curSize = 0;

/* We'll build up the tree starting with an empty root node. */
struct wordTree *wt = wordTreeNew("");	
int wordCount = 0;

/* Save time/space by sharing stack between all "following" rbTrees. */
struct rbTreeNode **stack;	
lmAllocArray(lm, stack, 256);

/* Loop through each line of input file, lowercasing the whole line, and then
 * looping through each word of line, stripping out special chars, and finally
 * processing each word. */
while (lineFileNext(lf, &line, NULL))
    {
    if (lower)
        tolowers(line);
    while ((word = nextWord(&line)) != NULL)
	{
	if (unpunc)
	    {
	    stripChar(word, ',');
	    stripChar(word, '.');
	    stripChar(word, ';');
	    stripChar(word, '-');
	    stripChar(word, '"');
	    stripChar(word, '?');
	    stripChar(word, '!');
	    stripChar(word, '(');
	    stripChar(word, ')');
	    if (word[0] == 0)
	         continue;
	    }
	verbose(2, "%s\n", word);

	/* We come to this point in the code for each word in the file. 
	 * Here we want to maintain a chain of sequential words up to
	 * chainSize long.  We do this with a doubly-linked list structure.
	 * For the first few words in the file we'll just build up the list,
	 * only adding it to the tree when we finally do get to the desired
	 * chain size.  Once past the initial section of the file we'll be
	 * getting rid of the first link in the chain as well as adding a new
	 * last link in the chain with each new word we see. */
	if (curSize < chainSize)
	    {
	    dlAddValTail(chain, cloneString(word));
	    ++curSize;
	    if (curSize == chainSize)
		addChainToTree(wt, chain, lm, stack);
	    }
	else
	    {
	    /* Reuse doubly-linked-list node, but give it a new value, as we move
	     * it from head to tail of list. */
	    node = dlPopHead(chain);
	    freeMem(node->val);
	    node->val = cloneString(word);
	    dlAddTail(chain, node);
	    addChainToTree(wt, chain, lm, stack);
	    }
	++wordCount;
	}
    }

/* Handle last few words in file, where can't make a chain of full size.  Need
 * a special case for file that has fewer than chain size words too. */
if (curSize < chainSize)
    addChainToTree(wt, chain, lm, stack);
while ((node = dlPopHead(chain)) != NULL)
    {
    addChainToTree(wt, chain, lm, stack);
    freeMem(node->val);
    freeMem(node);
    }
dlListFree(&chain);
lineFileClose(&lf);
return wt;
}
Пример #8
0
struct wordStore *wordStoreForChainsInFile(char *fileName, int chainSize)
/* Return a wordStore containing all words, and also all chains-of-words of length 
 * chainSize seen in file.  */
{
/* Stuff for processing file a line at a time. */
struct lineFile *lf = lineFileOpen(fileName, TRUE);
char *line, *word;

/* We'll build up the tree starting with an empty root node. */
struct wordStore *store = wordStoreNew(chainSize);
struct wordTree *wt = store->markovChains = wordTreeNew(wordStoreAdd(store, ""));	

/* Loop through each line of file, treating it as a separate read. There's 
 * special cases at the beginning and end of line, and for short lines.  In the
 * main case we'll be maintaining a chain (doubly linked list) of maxChainSize words, 
 * popping off one word from the start, and adding one word to the end for each
 * new word we encounter. This list is added to the tree each iteration. */
while (lineFileNext(lf, &line, NULL))
    {
    /* We'll keep a chain of three or so words in a doubly linked list. */
    struct dlNode *node;
    struct dlList *chain = dlListNew();
    int curSize = 0;
    int wordCount = 0;

    /* skipping the first word which is the read id */
    word = nextWord(&line);

    while ((word = nextWord(&line)) != NULL)
	{
	struct wordInfo *info = wordStoreAdd(store, word);
	 /* For the first few words in the file after ID, we'll just build up the chain,
	 * only adding it to the tree when we finally do get to the desired
	 * chain size.  Once past the initial section of the file we'll be
	 * getting rid of the first link in the chain as well as adding a new
	 * last link in the chain with each new word we see. */
	if (curSize < chainSize)
	    {
	    dlAddValTail(chain, info);
	    ++curSize;
	    if (curSize == chainSize)
		addChainToTree(wt, chain);
	    }
	else
	    {
	    /* Reuse doubly-linked-list node, but give it a new value, as we move
	     * it from head to tail of list. */
	    node = dlPopHead(chain);
	    node->val = info;
	    dlAddTail(chain, node);
	    addChainToTree(wt, chain);
	    }
	++wordCount;
	}
    /* Handle last few words in line, where can't make a chain of full size.  Also handles       
    * lines that have fewer than chain size words. */
    if (curSize < chainSize)
 	addChainToTree(wt, chain);
    while ((node = dlPopHead(chain)) != NULL)
	{
	if (!dlEmpty(chain))
	    addChainToTree(wt, chain);
	freeMem(node);
	}
    dlListFree(&chain);
    }
lineFileClose(&lf);

wordTreeSort(wt);  // Make output of chain file prettier
return store;
}