void synQueuePut(struct synQueue *sq, void *message) /* Add message to end of queue. */ { pthreadMutexLock(&sq->mutex); dlAddValTail(sq->queue, message); pthreadCondSignal(&sq->cond); pthreadMutexUnlock(&sq->mutex); }
static struct dlList *sortedListFromTree(struct rbTree *tree) /* Create a double-linked list from tree. List will be sorted. */ { struct slRef *ref, *refList = rbTreeItems(tree); struct dlList *list = dlListNew(); for (ref = refList; ref != NULL; ref = ref->next) dlAddValTail(list, ref->val); slFreeList(&refList); return list; }
void letterChain(char *inFile, char *outFile, int maxSize) /* letterChain - Make Markov chain of letters in text. */ { struct dlList *ll = dlListNew(); int llSize = 0; int c; FILE *in = mustOpen(inFile, "r"); FILE *out; struct dlNode *node; UBYTE *s; struct trie *trie; AllocVar(trie); while ((c = getc(in)) >= 0) { if (llSize < maxSize) { s = needMem(1); *s = c; dlAddValTail(ll, s); ++llSize; if (llSize == maxSize) addToTrie(trie, ll); } else { node = dlPopHead(ll); s = node->val; *s = c; dlAddTail(ll, node); addToTrie(trie, ll); } } if (llSize < maxSize) addToTrie(trie, ll); while ((node = dlPopHead(ll)) != NULL) { addToTrie(trie, ll); freeMem(node->val); freeMem(node); } dlListFree(&ll); carefulClose(&in); out = mustOpen(outFile, "w"); rDumpTrie(0, trie->useCount, trie, out); carefulClose(&out); }
void makeEntities(struct clonePair *pairList, struct dlList **entLists) /* Lump pairs of cDNAs into entities based on them having overlapping * and compatable cDNAs. */ { struct dlList *chromEntList; struct entity *compatableList, *entity; struct clonePair *pair; struct dlNode *node; int pairCount = 0; for (pair = pairList; pair != NULL; pair = pair->next) { if (++pairCount % 1000 == 0) printf("Processing pair %d\n", pairCount); chromEntList = entLists[pair->chromIx]; if ((compatableList = findCompatableEntities(chromEntList, pair)) != NULL) { compatableList = addToEntityList(compatableList, pair->p3); compatableList = addToEntityList(compatableList, pair->p5); mergeEntities(compatableList); } else { if (pair->p5) { entity = newEntity(pair->p5); if (pair->p3) { if (isCompatable(entity, pair->p3)) /* There are a few rare cases * where this isn't true. */ { entity = addToEntityList(entity, pair->p3); entity = mergeEntities(entity); } } } else { entity = newEntity(pair->p3); } node = dlAddValTail(chromEntList, entity); entity->node = node; } } }
struct dgNodeRef *dgConstrainedPriorityOrder(struct diGraph *dg) /* Return traversal of graph in priority order subject to * constraint that all parents must be output before * their children regardless of node priority. * Graph must be cycle free. */ { struct dlList *sortedList = newDlList(); struct dgNode *graphNode; struct dlNode *listNode; struct dgNodeRef *refList = NULL, *ref; if (dgHasCycles(dg)) errAbort("Call to dgConstrainedPriorityOrder on graph with cycles."); /* Make up list sorted by priority. */ for (graphNode = dg->nodeList; graphNode != NULL; graphNode = graphNode->next) { dlAddValTail(sortedList, graphNode); graphNode->visited = FALSE; } dlSort(sortedList, cmpPriority); /* Loop taking first member of list with no untraversed parents. */ while (!dlEmpty(sortedList)) { for (listNode = sortedList->head; listNode->next != NULL; listNode = listNode->next) { graphNode = listNode->val; if (dgParentsAllVisited(graphNode)) { dlRemove(listNode); freeMem(listNode); AllocVar(ref); ref->node = graphNode; slAddHead(&refList, ref); graphNode->visited = TRUE; break; } } } freeDlList(&sortedList); slReverse(&refList); return refList; }
struct dgEdge *dgConnectWithVal(struct diGraph *dg, struct dgNode *a, struct dgNode *b, void *val) /* Connect node a to node b and put val on edge. An error to * reconnect with a different val. */ { struct dgConnection *con; struct dgEdge *edge; struct dlNode *edgeOnList; /* Check to see if it's already there. */ if ((con = dgFindNodeInConList(a->nextList, b)) != NULL) { edge = con->edgeOnList->val; if (val != edge->val) warn("Trying to add new value to edge between %s and %s, ignoring", a->name, b->name); return edge; } /* Allocate edge and put on list. */ AllocVar(edge); edge->a = a; edge->b = b; edge->val = val; edgeOnList = dlAddValTail(dg->edgeList, edge); /* Connect nodes to each other. */ AllocVar(con); con->node = b; con->edgeOnList = edgeOnList; slAddHead(&a->nextList, con); AllocVar(con); con->node = a; con->edgeOnList = edgeOnList; slAddHead(&b->prevList, con); return edge; }
void figureSynteny(char *inName, FILE *out) /* Figure out synteny stats - how much in a row aligns. */ { FILE *in; char line[512]; int lineCount = 0; char *words[64]; int wordCount; char *firstWord; char *queryName = ""; struct contig *contig; struct dlNode *contigNode; struct dlList *contigList = newDlList(); int lineState = 0; /* Keeps track of groups of four lines. */ struct slName *queryNameList = NULL; int maxSymCount = 64*1024; char *qSymBuf = needMem(maxSymCount+1); char *tSymBuf = needMem(maxSymCount+1); char *hSymBuf = needMem(maxSymCount+1); int symCount = 0; int qSymLen, tSymLen, hSymLen; int bestSegScore; int lastQoff = -1; int i; in = mustOpen(inName, "r"); while (fgets(line, sizeof(line), in)) { ++lineCount; if ((lineCount%100000) == 0) { printf("Processing line %d of %s\n", lineCount, inName); } if (++lineState == 5) lineState = 0; wordCount = chopLine(line, words); if (wordCount <= 0) continue; firstWord = words[0]; if (sameString(firstWord, "Aligning")) { char *queryString; char *targetString; char queryStrand, targetStrand; char *parts[8]; int partCount; /* Do some preliminary checking of this line. */ if (wordCount < 6) errAbort("Short line %d of %s", lineCount, inName); queryString = words[1]; queryStrand = words[2][0]; targetString = words[4]; targetStrand = words[5][0]; /* Extract the name of the query sequence. If it's new, * then write out contigs on previous query we've accumulated * so far and start a new list. */ partCount = chopString(queryString, ":-", parts, ArraySize(parts)); if (!sameString(parts[0], queryName)) { /* Allocate new name and keep track of it. */ struct slName *newName = newSlName(parts[0]); slAddHead(&queryNameList, newName); /* Set last Segment for this clone to impossible val. */ bestSegScore = -0x3fffffff; lastQoff = -1; /* Write out old contigs and empty out contig list. */ syntenyOnClone(queryName, contigList, out); freeContigList(&contigList); contigList = newDlList(); queryName = newName->name; } /* Make up a new contig, and fill it in with the data we * have so far about query. */ AllocVar(contig); contig->query = queryName; contig->qOffset = atoi(parts[1]); contig->qEndOffset = atoi(parts[2]); contig->qStrand = queryStrand; if (lastQoff != contig->qOffset) { lastQoff = contig->qOffset; bestSegScore = -0x3fffffff; } /* Parse target string and fill in contig with it's info. */ chopString(targetString, ":-", parts, ArraySize(parts)); contig->target = cloneString(parts[0]); contig->tOffset = atoi(parts[1]); contig->tEndOffset = atoi(parts[2]); contig->tStrand = targetStrand; /* We don't know start and end yet - set them to values * that will get easily replace by max/min. */ contig->qStart = contig->tStart = 0x3fffffff; lineState = -1; symCount = 0; } else if (sameString(firstWord, "best")) { if (wordCount < 3) errAbort("Short line %d of %s", lineCount, inName); contig->score = atoi(words[2]); if (contig->score > bestSegScore && contig->score >= minScore) { struct dlNode *tailNode; struct contig *tailContig; bestSegScore = contig->score; contig->isComplete = TRUE; contig->qSym = cloneStringZ(qSymBuf, symCount); contig->tSym = cloneStringZ(tSymBuf, symCount); contig->hSym = cloneStringZ(hSymBuf, symCount); contig->symCount = symCount; contig->qEnd = contig->qStart + countNonGap(qSymBuf, symCount); contig->tEnd = contig->tStart + countNonGap(tSymBuf, symCount); tailNode = contigList->tail; if (tailNode != NULL) { tailContig = tailNode->val; if (tailContig->qOffset == contig->qOffset) { freeContig(&tailContig); dlRemove(tailNode); freeMem(tailNode); } } contigNode = dlAddValTail(contigList, contig); } } else if (wordCount > 1 && isdigit(firstWord[0]) || firstWord[0] == '-') { int start, end; char *sym = words[1]; int symLen = strlen(sym); char firstChar = firstWord[0]; if (lineState != 0 && lineState != 2) errAbort("Bummer - phasing mismatch on lineState line %d of %s!\n", lineCount, inName); assert(lineState == 0 || lineState == 2); start = atoi(firstWord); end = start + symLen; if (symCount + symLen > maxSymCount) { errAbort("Single contig too long line %d of %s, can only handle up to %d symbols\n", lineCount, inName, maxSymCount); } if (lineState == 0) /* query symbols */ { qSymLen = symLen; if (isdigit(firstChar)) { start += contig->qOffset; end += contig->qOffset; contig->qStart = min(contig->qStart, start); contig->qEnd = max(contig->qEnd, end); } memcpy(qSymBuf+symCount, sym, symLen); } else /* target symbols */ { tSymLen = symLen; if (tSymLen != qSymLen) { errAbort("Target symbol size not same as query line %d of %s", lineCount, inName); } if (isdigit(firstChar)) { start += contig->tOffset; end += contig->tOffset; contig->tStart = min(contig->tStart, start); } memcpy(tSymBuf+symCount, sym, symLen); } } else if (firstWord[0] == '(') { lineState = -1; } else { assert(lineState == 1 || lineState == 3); if (lineState == 3) /* Hidden symbols. */ { char *sym = firstWord; int symLen = strlen(sym); hSymLen = symLen; if (hSymLen != qSymLen) { errAbort("Hidden symbol size not same as query line %d of %s", lineCount, inName); } memcpy(hSymBuf+symCount, sym, symLen); symCount += symLen; } } } syntenyOnClone(queryName, contigList, out); freeContigList(&contigList); fclose(in); slFreeList(&queryNameList); freeMem(qSymBuf); freeMem(tSymBuf); freeMem(hSymBuf); fprintf(out, "CloneSegCounts[] = \n"); for (i=0; i<ArraySize(cloneSegCounts); ++i) fprintf(out, "%d %d\n", i, cloneSegCounts[i]); fprintf(out, "\n"); fprintf(out, "kCounts[] = \n"); for (i=0; i<ArraySize(kCounts); ++i) fprintf(out, "%d %d\n", i, kCounts[i]); segAverageSize = round((double)segTotalSize/segCount); fprintf(out, "\n%d Segments, average size %d\n", segCount, segAverageSize); }
void synQueuePutUnprotected(struct synQueue *sq, void *message) /* Add message to end of queue without protecting against multithreading * contention - used before pthreads are launched perhaps. */ { dlAddValTail(sq->queue, message); }
struct dgNodeRef *dgFindPath(struct diGraph *dg, struct dgNode *a, struct dgNode *b) /* Find shortest path from a to b. Return NULL if can't be found. */ { struct dgNodeRef *refList = NULL, *ref; struct dgConnection *con; struct dgNode *node, *nNode; struct dlList *fifo; struct dlNode *ffNode; struct dgNode endNode; int fifoSize = 1; /* Do some quick and easy tests first to return if have no way out * of node A, or if B directly follows A. */ if (a->nextList == NULL) return NULL; if (a == b) { AllocVar(ref); ref->node = a; return ref; } if ((con = dgFindNodeInConList(a->nextList, b)) != NULL) { AllocVar(refList); refList->node = a; node = con->node; AllocVar(ref); ref->node = node; slAddTail(&refList, ref); return refList; } /* Set up for breadth first traversal. Will use a doubly linked * list as a fifo. */ for (node = dg->nodeList; node != NULL; node = node->next) node->tempEntry = NULL; fifo = newDlList(); dlAddValTail(fifo, a); a->tempEntry = &endNode; while ((ffNode = dlPopHead(fifo)) != NULL) { --fifoSize; node = ffNode->val; freeMem(ffNode); for (con = node->nextList; con != NULL; con = con->next) { nNode = con->node; if (nNode->tempEntry == NULL) { nNode->tempEntry = node; if (nNode == b) { while (nNode != &endNode && nNode != NULL) { AllocVar(ref); ref->node = nNode; slAddHead(&refList, ref); nNode = nNode->tempEntry; } break; } else { dlAddValTail(fifo, nNode); ++fifoSize; if (fifoSize > 100000) errAbort("Internal error in dgFindPath"); } } } } freeDlList(&fifo); return refList; }
void doRun(char *line, struct sockaddr_in *hubIp) /* Execute command. */ { char *jobMessage = cloneString(line); static char *args[1024]; int argCount; char hubDottedQuad[17]; nextRandom(); if (line == NULL) warn("Executing nothing..."); else if (!internetIpToDottedQuad(ntohl(hubIp->sin_addr.s_addr), hubDottedQuad)) warn("Can't convert ipToDottedQuad"); else { struct runJobMessage rjm; if (parseRunJobMessage(line, &rjm)) { int jobId = atoi(rjm.jobIdString); if (findRunningJob(jobId) == NULL && findFinishedJob(jobId) == NULL) { if (busyProcs < maxProcs) { int childPid; argCount = chopLine(rjm.command, args); if (argCount >= ArraySize(args)) warn("Too many arguments to run"); else { args[argCount] = NULL; if ((childPid = forkOrDie()) == 0) { /* Do JOB_ID substitutions */ struct subText *st = subTextNew("$JOB_ID", rjm.jobIdString); int i; rjm.in = subTextString(st, rjm.in); rjm.out = subTextString(st, rjm.out); rjm.err = subTextString(st, rjm.err); for (i=0; i<argCount; ++i) args[i] = subTextString(st, args[i]); execProc(hubDottedQuad, rjm.jobIdString, rjm.reserved, rjm.user, rjm.dir, rjm.in, rjm.out, rjm.err, rjm.ram, args[0], args); exit(0); } else { struct job *job; AllocVar(job); job->jobId = atoi(rjm.jobIdString); job->pid = childPid; job->startMessage = jobMessage; jobMessage = NULL; /* No longer own memory. */ job->node = dlAddValTail(jobsRunning, job); ++busyProcs; } } } else { warn("Trying to run when busy."); } } else { warn("Duplicate run-job %d\n", jobId); } } } freez(&jobMessage); }
void ccCp(char *source, char *dest, char *hostList) /* Copy source to dest on all files in hostList. */ { time_t startTime = time(NULL); time_t curTime, lastTime = 0; struct machine *machineList = NULL; struct netSwitch *nsList; struct machine *m, *m2; struct dlList *toDoList = newDlList(); /* We haven't done these. */ struct dlList *finishedList = newDlList(); /* All done here. */ struct dlList *sourceList = newDlList(); /* These are sources for copies. */ struct dlList *workingList = newDlList(); /* These are copying data to themselves. */ struct dlList *errList = newDlList(); /* These are messed up 3x or more. */ bool firstOk = FALSE; struct dlNode *finNode, *node, *sourceNode, *destNode; struct dyString *cmd = newDyString(256); int machineCount; int machinesFinished = 0; char *thisHost = getenv("HOST"); off_t size; int goodMachines; double grandTotal; /* Get host and switch info. */ readHosts(hostList, &machineList, &nsList); machineCount = slCount(machineList); /* Make sure file exists.... */ if (!fileExists(source)) errAbort("%s doesn't exist\n", source); size = fileSize(source); printf("Copying %s (%lld bytes) to %d machines\n", source, (unsigned long long)size, machineCount); /* Add everything to the to-do list. */ for (m = machineList; m != NULL; m = m->next) { dlAddValTail(toDoList, m); } /* Loop through to-do list trying to do first copy. */ for (node = toDoList->head; node->next != NULL; node = node->next) { m = node->val; dyStringClear(cmd); m = node->val; if (sameString(thisHost, m->name)) { if (sameString(source, dest)) { /* Hey, this is too easy. */ firstOk = TRUE; ++machinesFinished; break; } else { dyStringPrintf(cmd, "cp %s %s", source, dest); } } else { dyStringPrintf(cmd, "rcp %s %s:%s", source, m->name, dest); } if (system(cmd->string) == 0) { dlRemove(node); dlAddTail(finishedList, node); firstOk = TRUE; ++machinesFinished; break; } else /* some error in rcp */ { warn("Problem with %s\n", cmd->string); m->errCount += 1; } } /* Loop around launching child processes to copy and * wait for them to finish. */ while (machinesFinished < machineCount) { int pid; int status; /* Start all possible copies. */ while (matchMaker(finishedList, toDoList, &sourceNode, &destNode)) { dlAddTail(sourceList, sourceNode); dlAddTail(workingList, destNode); m = destNode->val; m->sourceNode = sourceNode; startCopy(sourceNode->val, destNode->val, dest, thisHost, cmd); } curTime = time(NULL); if (curTime - lastTime >= 3) { printf("%d finished in %d seconds, %d in progress, %d to start, %d errors, %d total\n", dlCount(finishedList) + dlCount(sourceList), (int)(curTime - startTime), dlCount(workingList), dlCount(toDoList), dlCount(errList), machineCount); lastTime = curTime; } /* Wait for a child to finish. Figure out which machine it is. */ pid = wait(&status); finNode = NULL; for (node = workingList->head; node->next != NULL; node = node->next) { m = node->val; if (m->pid == pid) { finNode = node; break; } } if (finNode == NULL) { errAbort("Returned from wait on unknown child %d\n", pid); continue; } m = finNode->val; m->pid = 0; dlRemove(finNode); dlRemove(m->sourceNode); m2 = m->sourceNode->val; if (m->netSwitch != m2->netSwitch) --crossSwitchCount; dlAddTail(finishedList, m->sourceNode); if (WIFEXITED(status) && WEXITSTATUS(status) == 0) { /* Good return - move self and source node to finished list. */ ++machinesFinished; dlAddTail(finishedList, finNode); } else { /* Bad return. Increment error count, and maybe move it to * error list. */ if (++m->errCount >= maxErrCount) { ++machinesFinished; dlAddTail(errList, finNode); fprintf(stderr, "Gave up on %s\n", m->name); } else { dlAddMiddle(toDoList, finNode); fprintf(stderr, "Retry %d on %s\n", m->errCount, m->name); } } } if (!dlEmpty(errList)) { fprintf(stderr, "errors in:"); for (node = errList->head; node->next != NULL; node = node->next) { m = node->val; fprintf(stderr, " %s", m->name); } fprintf(stderr, "\n"); } goodMachines = dlCount(finishedList); grandTotal = (double)goodMachines * (double)size; printf("Copied to %d of %d machines (grand total %e bytes) in %d seconds\n", goodMachines, machineCount, grandTotal, (int)(time(NULL) - startTime)); }
struct wordTree *wordTreeForChainsInFile(char *fileName, int chainSize, struct lm *lm) /* Return a wordTree of all chains-of-words of length chainSize seen in file. * Allocate the structure in local memory pool lm. */ { /* Stuff for processing file a line at a time. */ struct lineFile *lf = lineFileOpen(fileName, TRUE); char *line, *word; /* We'll keep a chain of three or so words in a doubly linked list. */ struct dlNode *node; struct dlList *chain = dlListNew(); int curSize = 0; /* We'll build up the tree starting with an empty root node. */ struct wordTree *wt = wordTreeNew(""); int wordCount = 0; /* Save time/space by sharing stack between all "following" rbTrees. */ struct rbTreeNode **stack; lmAllocArray(lm, stack, 256); /* Loop through each line of input file, lowercasing the whole line, and then * looping through each word of line, stripping out special chars, and finally * processing each word. */ while (lineFileNext(lf, &line, NULL)) { if (lower) tolowers(line); while ((word = nextWord(&line)) != NULL) { if (unpunc) { stripChar(word, ','); stripChar(word, '.'); stripChar(word, ';'); stripChar(word, '-'); stripChar(word, '"'); stripChar(word, '?'); stripChar(word, '!'); stripChar(word, '('); stripChar(word, ')'); if (word[0] == 0) continue; } verbose(2, "%s\n", word); /* We come to this point in the code for each word in the file. * Here we want to maintain a chain of sequential words up to * chainSize long. We do this with a doubly-linked list structure. * For the first few words in the file we'll just build up the list, * only adding it to the tree when we finally do get to the desired * chain size. Once past the initial section of the file we'll be * getting rid of the first link in the chain as well as adding a new * last link in the chain with each new word we see. */ if (curSize < chainSize) { dlAddValTail(chain, cloneString(word)); ++curSize; if (curSize == chainSize) addChainToTree(wt, chain, lm, stack); } else { /* Reuse doubly-linked-list node, but give it a new value, as we move * it from head to tail of list. */ node = dlPopHead(chain); freeMem(node->val); node->val = cloneString(word); dlAddTail(chain, node); addChainToTree(wt, chain, lm, stack); } ++wordCount; } } /* Handle last few words in file, where can't make a chain of full size. Need * a special case for file that has fewer than chain size words too. */ if (curSize < chainSize) addChainToTree(wt, chain, lm, stack); while ((node = dlPopHead(chain)) != NULL) { addChainToTree(wt, chain, lm, stack); freeMem(node->val); freeMem(node); } dlListFree(&chain); lineFileClose(&lf); return wt; }
struct wordStore *wordStoreForChainsInFile(char *fileName, int chainSize) /* Return a wordStore containing all words, and also all chains-of-words of length * chainSize seen in file. */ { /* Stuff for processing file a line at a time. */ struct lineFile *lf = lineFileOpen(fileName, TRUE); char *line, *word; /* We'll build up the tree starting with an empty root node. */ struct wordStore *store = wordStoreNew(chainSize); struct wordTree *wt = store->markovChains = wordTreeNew(wordStoreAdd(store, "")); /* Loop through each line of file, treating it as a separate read. There's * special cases at the beginning and end of line, and for short lines. In the * main case we'll be maintaining a chain (doubly linked list) of maxChainSize words, * popping off one word from the start, and adding one word to the end for each * new word we encounter. This list is added to the tree each iteration. */ while (lineFileNext(lf, &line, NULL)) { /* We'll keep a chain of three or so words in a doubly linked list. */ struct dlNode *node; struct dlList *chain = dlListNew(); int curSize = 0; int wordCount = 0; /* skipping the first word which is the read id */ word = nextWord(&line); while ((word = nextWord(&line)) != NULL) { struct wordInfo *info = wordStoreAdd(store, word); /* For the first few words in the file after ID, we'll just build up the chain, * only adding it to the tree when we finally do get to the desired * chain size. Once past the initial section of the file we'll be * getting rid of the first link in the chain as well as adding a new * last link in the chain with each new word we see. */ if (curSize < chainSize) { dlAddValTail(chain, info); ++curSize; if (curSize == chainSize) addChainToTree(wt, chain); } else { /* Reuse doubly-linked-list node, but give it a new value, as we move * it from head to tail of list. */ node = dlPopHead(chain); node->val = info; dlAddTail(chain, node); addChainToTree(wt, chain); } ++wordCount; } /* Handle last few words in line, where can't make a chain of full size. Also handles * lines that have fewer than chain size words. */ if (curSize < chainSize) addChainToTree(wt, chain); while ((node = dlPopHead(chain)) != NULL) { if (!dlEmpty(chain)) addChainToTree(wt, chain); freeMem(node); } dlListFree(&chain); } lineFileClose(&lf); wordTreeSort(wt); // Make output of chain file prettier return store; }