void dlCat(struct dlList *a, struct dlList *b) /* Move items from b to end of a. */ { struct dlNode *node; while ((node = dlPopHead(b)) != NULL) dlAddTail(a, node); }
void dlSort(struct dlList *list, int (*compare )(const void *elem1, const void *elem2)) /* Sort a singly linked list with Qsort and a temporary array. * The arguments to the compare function in real, non-void, life * are pointers to pointers of the type that is in the val field of * the nodes of the list. */ { int len = dlCount(list); if (len > 1) { /* Move val's onto an array, sort, and then put back into list. */ struct dlSorter *sorter = needLargeMem(len * sizeof(sorter[0])), *s; struct dlNode *node; int i; for (i=0, node = list->head; i<len; ++i, node = node->next) { s = &sorter[i]; s->node = node; } compareFunc = compare; qsort(sorter, len, sizeof(sorter[0]), dlNodeCmp); dlListInit(list); for (i=0; i<len; ++i) dlAddTail(list, sorter[i].node); freeMem(sorter); } }
struct dlNode *dlAddValTail(struct dlList *list, void *val) /* Create a node containing val and add to tail of list. */ { struct dlNode *node = AllocA(struct dlNode); node->val = val; dlAddTail(list, node); return node; }
void separateEntities(struct dlList *easyList, struct dlList *hardList) /* Separate out hard (overlapping) entities onto hard list. */ { struct dlNode *node, *next, *listNode; int sepCount = 0; dlSort(easyList, cmpEntities); for (node = easyList->head; node->next != NULL; node = next) { struct entity *ent = node->val; int eStart = ent->start, eEnd = ent->end; next = node->next; for (listNode = easyList->head; listNode->next != NULL; listNode = listNode->next) { struct entity *listEnt = listNode->val; int lStart = listEnt->start, lEnd = listEnt->end; int start = max(eStart, lStart); int end = min(eEnd, lEnd); int overlap = end - start; if (listEnt != ent && overlap > 0) { int eRefCount = slCount(ent->cdaRefList); int lRefCount = slCount(listEnt->cdaRefList); /* Move the one with less cDNA to the hard list. */ if (eRefCount > lRefCount) { dlRemove(listNode); dlAddTail(hardList, listNode); if (listNode == next) next = node; } else { dlRemove(node); dlAddTail(hardList, node); } ++sepCount; break; } } } }
static void wordTreeMakeNonsense(struct wordTree *wt, int maxSize, char *firstWord, int maxOutputWords, FILE *f) /* Go spew out a bunch of words according to probabilities in tree. */ { struct dlList *ll = dlListNew(); int listSize = 0; int outputWords = 0; for (;;) { if (++outputWords > maxOutputWords) break; struct dlNode *node; char *word; /* Get next predicted word. */ if (listSize == 0) { AllocVar(node); ++listSize; word = firstWord; } else if (listSize >= maxSize) { node = dlPopHead(ll); word = predictNext(wt, ll); } else { word = predictNext(wt, ll); AllocVar(node); ++listSize; } node->val = word; dlAddTail(ll, node); if (word == NULL) break; /* Output last word in list. */ { node = ll->tail; word = node->val; fprintf(f, "%s", word); if (word[strlen(word)-1] == '.') fprintf(f, "\n"); else fprintf(f, " "); } } dlListFree(&ll); }
static void *memTrackerAlloc(size_t size) /* Allocate extra memory for cookies and list node, and then * return memory block. */ { struct dlNode *node; size += sizeof (*node); node = memTracker->parent->alloc(size); if (node == NULL) return node; dlAddTail(memTracker->list, node); return (void*)(node+1); }
static struct dlList *wordTreeGenerateList(struct wordStore *store, int maxSize, struct wordTree *firstWord, int maxOutputWords) /* Make a list of words based on probabilities in tree. */ { struct dlList *ll = dlListNew(); int chainSize = 0; int outputWords = 0; struct dlNode *chainStartNode = NULL; for (;;) { if (++outputWords > maxOutputWords) break; struct dlNode *node; struct wordTree *picked; /* Get next predicted word. */ AllocVar(node); if (chainSize == 0) { chainStartNode = node; ++chainSize; picked = firstWord; } else if (chainSize >= maxSize) { chainStartNode = chainStartNode->next; picked = predictNext(store, ll); } else { picked = predictNext(store, ll); ++chainSize; } if (picked == NULL) break; /* Add word from whatever level we fetched back to our output chain. */ struct wordInfo *info = picked->info; node->val = info; dlAddTail(ll, node); decrementOutputCountsInTree(picked); info->outTarget -= 1; info->outCount += 1; } verbose(2, "totUseZeroCount = %d\n", totUseZeroCount); return ll; }
void letterChain(char *inFile, char *outFile, int maxSize) /* letterChain - Make Markov chain of letters in text. */ { struct dlList *ll = dlListNew(); int llSize = 0; int c; FILE *in = mustOpen(inFile, "r"); FILE *out; struct dlNode *node; UBYTE *s; struct trie *trie; AllocVar(trie); while ((c = getc(in)) >= 0) { if (llSize < maxSize) { s = needMem(1); *s = c; dlAddValTail(ll, s); ++llSize; if (llSize == maxSize) addToTrie(trie, ll); } else { node = dlPopHead(ll); s = node->val; *s = c; dlAddTail(ll, node); addToTrie(trie, ll); } } if (llSize < maxSize) addToTrie(trie, ll); while ((node = dlPopHead(ll)) != NULL) { addToTrie(trie, ll); freeMem(node->val); freeMem(node); } dlListFree(&ll); carefulClose(&in); out = mustOpen(outFile, "w"); rDumpTrie(0, trie->useCount, trie, out); carefulClose(&out); }
void dlAddMiddle(struct dlList *list, struct dlNode *node) /* Add node to approximate middle of list. */ { int listSize = dlCount(list); int midPos = listSize/2; if (midPos <= 0) dlAddTail(list, node); else { struct dlNode *ln = list->head; int i; for (i=0; i<midPos; ++i) ln = ln->next; dlAddAfter(ln, node); } }
static void *memTrackerRealloc(void *vpt, size_t size) /* Resize a memory block from memTrackerAlloc. */ { if (vpt == NULL) return memTrackerAlloc(size); else { struct dlNode *node = ((struct dlNode *)vpt)-1; size += sizeof(*node); dlRemove(node); node = memTracker->parent->realloc(node, size); if (node == NULL) return node; dlAddTail(memTracker->list, node); return (void*)(node+1); } }
void ccCp(char *source, char *dest, char *hostList) /* Copy source to dest on all files in hostList. */ { time_t startTime = time(NULL); time_t curTime, lastTime = 0; struct machine *machineList = NULL; struct netSwitch *nsList; struct machine *m, *m2; struct dlList *toDoList = newDlList(); /* We haven't done these. */ struct dlList *finishedList = newDlList(); /* All done here. */ struct dlList *sourceList = newDlList(); /* These are sources for copies. */ struct dlList *workingList = newDlList(); /* These are copying data to themselves. */ struct dlList *errList = newDlList(); /* These are messed up 3x or more. */ bool firstOk = FALSE; struct dlNode *finNode, *node, *sourceNode, *destNode; struct dyString *cmd = newDyString(256); int machineCount; int machinesFinished = 0; char *thisHost = getenv("HOST"); off_t size; int goodMachines; double grandTotal; /* Get host and switch info. */ readHosts(hostList, &machineList, &nsList); machineCount = slCount(machineList); /* Make sure file exists.... */ if (!fileExists(source)) errAbort("%s doesn't exist\n", source); size = fileSize(source); printf("Copying %s (%lld bytes) to %d machines\n", source, (unsigned long long)size, machineCount); /* Add everything to the to-do list. */ for (m = machineList; m != NULL; m = m->next) { dlAddValTail(toDoList, m); } /* Loop through to-do list trying to do first copy. */ for (node = toDoList->head; node->next != NULL; node = node->next) { m = node->val; dyStringClear(cmd); m = node->val; if (sameString(thisHost, m->name)) { if (sameString(source, dest)) { /* Hey, this is too easy. */ firstOk = TRUE; ++machinesFinished; break; } else { dyStringPrintf(cmd, "cp %s %s", source, dest); } } else { dyStringPrintf(cmd, "rcp %s %s:%s", source, m->name, dest); } if (system(cmd->string) == 0) { dlRemove(node); dlAddTail(finishedList, node); firstOk = TRUE; ++machinesFinished; break; } else /* some error in rcp */ { warn("Problem with %s\n", cmd->string); m->errCount += 1; } } /* Loop around launching child processes to copy and * wait for them to finish. */ while (machinesFinished < machineCount) { int pid; int status; /* Start all possible copies. */ while (matchMaker(finishedList, toDoList, &sourceNode, &destNode)) { dlAddTail(sourceList, sourceNode); dlAddTail(workingList, destNode); m = destNode->val; m->sourceNode = sourceNode; startCopy(sourceNode->val, destNode->val, dest, thisHost, cmd); } curTime = time(NULL); if (curTime - lastTime >= 3) { printf("%d finished in %d seconds, %d in progress, %d to start, %d errors, %d total\n", dlCount(finishedList) + dlCount(sourceList), (int)(curTime - startTime), dlCount(workingList), dlCount(toDoList), dlCount(errList), machineCount); lastTime = curTime; } /* Wait for a child to finish. Figure out which machine it is. */ pid = wait(&status); finNode = NULL; for (node = workingList->head; node->next != NULL; node = node->next) { m = node->val; if (m->pid == pid) { finNode = node; break; } } if (finNode == NULL) { errAbort("Returned from wait on unknown child %d\n", pid); continue; } m = finNode->val; m->pid = 0; dlRemove(finNode); dlRemove(m->sourceNode); m2 = m->sourceNode->val; if (m->netSwitch != m2->netSwitch) --crossSwitchCount; dlAddTail(finishedList, m->sourceNode); if (WIFEXITED(status) && WEXITSTATUS(status) == 0) { /* Good return - move self and source node to finished list. */ ++machinesFinished; dlAddTail(finishedList, finNode); } else { /* Bad return. Increment error count, and maybe move it to * error list. */ if (++m->errCount >= maxErrCount) { ++machinesFinished; dlAddTail(errList, finNode); fprintf(stderr, "Gave up on %s\n", m->name); } else { dlAddMiddle(toDoList, finNode); fprintf(stderr, "Retry %d on %s\n", m->errCount, m->name); } } } if (!dlEmpty(errList)) { fprintf(stderr, "errors in:"); for (node = errList->head; node->next != NULL; node = node->next) { m = node->val; fprintf(stderr, " %s", m->name); } fprintf(stderr, "\n"); } goodMachines = dlCount(finishedList); grandTotal = (double)goodMachines * (double)size; printf("Copied to %d of %d machines (grand total %e bytes) in %d seconds\n", goodMachines, machineCount, grandTotal, (int)(time(NULL) - startTime)); }
int rudpReceiveTimeOut(struct rudp *ru, void *messageBuf, int bufSize, struct sockaddr_in *retFrom, int timeOut) /* Read message into buffer of given size. Returns actual size read on * success. On failure prints a warning, sets errno, and returns -1. * Also returns ip address of message source. If timeOut is nonzero, * it represents the timeout in milliseconds. It will set errno to * ETIMEDOUT in this case.*/ { char inBuf[udpEthMaxSize]; struct rudpHeader *head = (struct rudpHeader *)inBuf; struct rudpHeader ackHead; struct sockaddr_in sai; socklen_t saiSize = sizeof(sai); int readSize, err; assert(bufSize <= rudpMaxSize); ru->receiveCount += 1; for (;;) { if (timeOut != 0) { if (!readReadyWait(ru->socket, timeOut)) { warn("rudpReceive timed out\n"); errno = ETIMEDOUT; return -1; } } readSize = recvfrom(ru->socket, inBuf, sizeof(inBuf), 0, (struct sockaddr*)&sai, &saiSize); if (retFrom != NULL) *retFrom = sai; if (readSize < 0) { if (errno == EINTR) continue; warn("recvfrom error: %s", strerror(errno)); ru->failCount += 1; return readSize; } if (readSize < sizeof(*head)) { warn("rudpRecieve truncated message"); continue; } if (head->type != rudpData) { if (head->type != rudpAck) warn("skipping non-data message %d in rudpReceive", head->type); continue; } ackHead = *head; ackHead.type = rudpAck; err = sendto(ru->socket, &ackHead, sizeof(ackHead), 0, (struct sockaddr *)&sai, sizeof(sai)); if (err < 0) { warn("problem sending ack in rudpRecieve: %s", strerror(errno)); } readSize -= sizeof(*head); if (readSize > bufSize) { warn("read more bytes than have room for in rudpReceive"); readSize = bufSize; } memcpy(messageBuf, head+1, readSize); /* check for duplicate packet */ if (!ru->recvHash) { /* take advantage of new auto-expanding hashes */ ru->recvHash = newHashExt(4, FALSE); /* do not use local mem in hash */ ru->recvList = newDlList(); ru->recvCount = 0; } char hashKey[64]; char saiDottedQuad[17]; internetIpToDottedQuad(ntohl(sai.sin_addr.s_addr), saiDottedQuad); safef(hashKey, sizeof(hashKey), "%s-%d-%d-%d" , saiDottedQuad , head->pid , head->connId , head->id ); if (hashLookup(ru->recvHash, hashKey)) { warn("duplicate packet filtered out: %s", hashKey); continue; } long now = time(NULL); struct packetSeen *p; AllocVar(p); AllocVar(p->node); p->node->val = p; p->recvHashKey = hashStoreName(ru->recvHash, hashKey); p->lastChecked = now; dlAddTail(ru->recvList, p->node); ++ru->recvCount; sweepOutOldPacketsSeen(ru, now); ru->lastIdReceived = head->id; break; } return readSize; }
struct wordTree *wordTreeForChainsInFile(char *fileName, int chainSize, struct lm *lm) /* Return a wordTree of all chains-of-words of length chainSize seen in file. * Allocate the structure in local memory pool lm. */ { /* Stuff for processing file a line at a time. */ struct lineFile *lf = lineFileOpen(fileName, TRUE); char *line, *word; /* We'll keep a chain of three or so words in a doubly linked list. */ struct dlNode *node; struct dlList *chain = dlListNew(); int curSize = 0; /* We'll build up the tree starting with an empty root node. */ struct wordTree *wt = wordTreeNew(""); int wordCount = 0; /* Save time/space by sharing stack between all "following" rbTrees. */ struct rbTreeNode **stack; lmAllocArray(lm, stack, 256); /* Loop through each line of input file, lowercasing the whole line, and then * looping through each word of line, stripping out special chars, and finally * processing each word. */ while (lineFileNext(lf, &line, NULL)) { if (lower) tolowers(line); while ((word = nextWord(&line)) != NULL) { if (unpunc) { stripChar(word, ','); stripChar(word, '.'); stripChar(word, ';'); stripChar(word, '-'); stripChar(word, '"'); stripChar(word, '?'); stripChar(word, '!'); stripChar(word, '('); stripChar(word, ')'); if (word[0] == 0) continue; } verbose(2, "%s\n", word); /* We come to this point in the code for each word in the file. * Here we want to maintain a chain of sequential words up to * chainSize long. We do this with a doubly-linked list structure. * For the first few words in the file we'll just build up the list, * only adding it to the tree when we finally do get to the desired * chain size. Once past the initial section of the file we'll be * getting rid of the first link in the chain as well as adding a new * last link in the chain with each new word we see. */ if (curSize < chainSize) { dlAddValTail(chain, cloneString(word)); ++curSize; if (curSize == chainSize) addChainToTree(wt, chain, lm, stack); } else { /* Reuse doubly-linked-list node, but give it a new value, as we move * it from head to tail of list. */ node = dlPopHead(chain); freeMem(node->val); node->val = cloneString(word); dlAddTail(chain, node); addChainToTree(wt, chain, lm, stack); } ++wordCount; } } /* Handle last few words in file, where can't make a chain of full size. Need * a special case for file that has fewer than chain size words too. */ if (curSize < chainSize) addChainToTree(wt, chain, lm, stack); while ((node = dlPopHead(chain)) != NULL) { addChainToTree(wt, chain, lm, stack); freeMem(node->val); freeMem(node); } dlListFree(&chain); lineFileClose(&lf); return wt; }
struct wordStore *wordStoreForChainsInFile(char *fileName, int chainSize) /* Return a wordStore containing all words, and also all chains-of-words of length * chainSize seen in file. */ { /* Stuff for processing file a line at a time. */ struct lineFile *lf = lineFileOpen(fileName, TRUE); char *line, *word; /* We'll build up the tree starting with an empty root node. */ struct wordStore *store = wordStoreNew(chainSize); struct wordTree *wt = store->markovChains = wordTreeNew(wordStoreAdd(store, "")); /* Loop through each line of file, treating it as a separate read. There's * special cases at the beginning and end of line, and for short lines. In the * main case we'll be maintaining a chain (doubly linked list) of maxChainSize words, * popping off one word from the start, and adding one word to the end for each * new word we encounter. This list is added to the tree each iteration. */ while (lineFileNext(lf, &line, NULL)) { /* We'll keep a chain of three or so words in a doubly linked list. */ struct dlNode *node; struct dlList *chain = dlListNew(); int curSize = 0; int wordCount = 0; /* skipping the first word which is the read id */ word = nextWord(&line); while ((word = nextWord(&line)) != NULL) { struct wordInfo *info = wordStoreAdd(store, word); /* For the first few words in the file after ID, we'll just build up the chain, * only adding it to the tree when we finally do get to the desired * chain size. Once past the initial section of the file we'll be * getting rid of the first link in the chain as well as adding a new * last link in the chain with each new word we see. */ if (curSize < chainSize) { dlAddValTail(chain, info); ++curSize; if (curSize == chainSize) addChainToTree(wt, chain); } else { /* Reuse doubly-linked-list node, but give it a new value, as we move * it from head to tail of list. */ node = dlPopHead(chain); node->val = info; dlAddTail(chain, node); addChainToTree(wt, chain); } ++wordCount; } /* Handle last few words in line, where can't make a chain of full size. Also handles * lines that have fewer than chain size words. */ if (curSize < chainSize) addChainToTree(wt, chain); while ((node = dlPopHead(chain)) != NULL) { if (!dlEmpty(chain)) addChainToTree(wt, chain); freeMem(node); } dlListFree(&chain); } lineFileClose(&lf); wordTreeSort(wt); // Make output of chain file prettier return store; }