/* - zapSubtree - initialize the retry memory of a subtree to zeros ^ static void zapSubtree(struct vars *, struct subre *); */ static void zapSubtree( struct vars *const v, struct subre *const t) { if (t == NULL) { return; } assert(v->mem != NULL); v->mem[t->retry] = 0; if (t->op == '(') { assert(t->subno > 0); v->pmatch[t->subno].rm_so = -1; v->pmatch[t->subno].rm_eo = -1; } if (t->left != NULL) { zapSubtree(v, t->left); } if (t->right != NULL) { zapSubtree(v, t->right); } }
static size_t processNextInotifyEvent(int *inotifyFd, char *buf, int bufSize, int firstTry) { char fullPath[PATH_MAX]; struct inotify_event *ev; size_t evLen; int evCacheSlot; ev = (struct inotify_event *) buf; displayInotifyEvent(ev); if (ev->wd != -1 && !(ev->mask & IN_IGNORED)) { /* IN_Q_OVERFLOW has (ev->wd == -1) */ /* Skip IN_IGNORED, since it will come after an event that has already zapped the corresponding cache entry */ /* Cache consistency check; see the discussion of "intra-tree" rename() events */ evCacheSlot = findWatchChecked(ev->wd); if (evCacheSlot == -1) { /* Cache reached an inconsistent state */ *inotifyFd = reinitialize(*inotifyFd); /* Discard all remaining events in current read() buffer */ return INOTIFY_READ_BUF_LEN; } } evLen = sizeof(struct inotify_event) + ev->len; if ((ev->mask & IN_ISDIR) && (ev->mask & (IN_CREATE | IN_MOVED_TO))) { /* A new subdirectory was created, or a subdirectory was renamed into the tree; create watches for it, and all of its subdirectories. */ snprintf(fullPath, sizeof(fullPath), "%s/%s", wlCache[evCacheSlot].path, ev->name); logMessage(VB_BASIC, "Directory creation on wd %d: %s\n", ev->wd, fullPath); /* We only watch the new subtree if it has not already been cached. This deals with a race condition: * On the one hand, the following steps might occur: 1. The "child" directory is created. 2. The "grandchild" directory is created 3. We receive an IN_CREATE event for the creation of the "child" and create a watch and a cache entry for it. 4. To handle the possibility that step 2 came before step 3, we recursively walk through the descendants of the "child" directory, adding any subdirectories to the cache. * On the other hand, the following steps might occur: 1. The "child" directory is created. 3. We receive an IN_CREATE event for the creation of the "child" and create a watch and a cache entry for it. 3. The "grandchild" directory is created 4. During the recursive walk through the descendants of the "child" directory, we cache the "grandchild" and add a watch for it. 5. We receive the IN_CREATE event for the creation of the "grandchild". At this point, we should NOT create a cache entry and watch for the "grandchild" because they already exist. (Creating the watch for the second time is harmless, but adding a second cache for the grandchild would leave the cache in a confused state.) */ if (!pathnameInCache(fullPath)) watchSubtree(*inotifyFd, fullPath); } else if (ev->mask & IN_DELETE_SELF) { /* A directory was deleted. Remove the corresponding item from the cache. */ logMessage(VB_BASIC, "Clearing watchlist item %d (%s)\n", ev->wd, wlCache[evCacheSlot].path); if (isRootDirPath(wlCache[evCacheSlot].path)) zapRootDirPath(wlCache[evCacheSlot].path); markCacheSlotEmpty(evCacheSlot); /* No need to remove the watch; that happens automatically */ } else if ((ev->mask & (IN_MOVED_FROM | IN_ISDIR)) == (IN_MOVED_FROM | IN_ISDIR)) { /* We have a "moved from" event. To know how to deal with it, we need to determine whether there is a following "moved to" event with a matching cookie value (i.e., an "intra-tree" rename() where the source and destination are inside our monitored trees). If there is not, then we are dealing with a rename() out of our monitored tree(s). We assume that if this is an "intra-tree" rename() event, then the "moved to" event is the next event in the buffer returned by the current read(). (If we are already at the last event in this buffer, then we ask our caller to read a bit more, in the hope of getting the following IN_MOVED_TO event in the next read().) In most cases, the assumption holds. However, where multiple processes are manipulating the tree, we can can get event sequences such as the following: IN_MOVED_FROM (rename(x) by process A) IN_MOVED_FROM (rename(y) by process B) IN_MOVED_TO (rename(y) by process B) IN_MOVED_TO (rename(x) by process A) In principle, there may be arbitrarily complex variations on the above theme. Our assumption that related IN_MOVED_FROM and IN_MOVED_TO events are consecutive is broken by such scenarios. We could try to resolve this issue by extending the window we use to search for IN_MOVED_TO events beyond the next item in the queue. But this must be done heuristically (e.g., limiting the window to N events or to events read within X milliseconds), because sometimes we will have an unmatched IN_MOVED_FROM events that result from out-of-tree renames. The heuristic approach is therefore unavoidably racy: there is always a chance that we will fail to match up an IN_MOVED_FROM+IN_MOVED_TO event pair. So, this program takes the simple approach of assuming that an IN_MOVED_FROM+IN_MOVED_TO pair occupy consecutive events in the buffer returned by read(). When that assumption is wrong (and we therefore fail to recognize an intra-tree rename() event), then the rename will be treated as separate "moved from" and "moved to" events, with the result that some watch items and cache entries are zapped and re-created. This causes the watch descriptors in our cache to become inconsistent with the watch descriptors in as yet unread events, because the watches are re-created with different watch descriptor numbers. Once such an inconsistency occurs, then, at some later point, we will do a lookup for a watch descriptor returned by inotify, and find that it is not in our cache. When that happens, we reinitialize our cache with a fresh set of watch descriptors and re-create the inotify file descriptor, in order to bring our cache back into consistency with the filesystem. An alternative would be to cache the cookies of the (recent) IN_MOVED_FROM events for which which we did not find a matching IN_MOVED_TO event, and rebuild our watch cache when we find an IN_MOVED_TO event whose cookie matches one of the cached cookies. Yet another approach when we detect an out-of-tree rename would be to reinitialize the cache and create a new inotify file descriptor. (TODO: consider the fact that for a rename event, there won't be other events for the object between IN_MOVED_FROM and IN_MOVED_TO.) Rebuilding the watch cache is expensive if the monitored tree is large. So, there is a trade-off between how much effort we want to go to to avoid cache rebuilds versus how much effort we want to devote to matching up IN_MOVED_FROM+IN_MOVED_TO event pairs. At the one extreme we would do no search ahead for IN_MOVED_TO, with the result that every rename() potentially could trigger a cache rebuild. Limiting the search window to just the following event is a compromise that catches the vast majority of intra-tree renames and triggers relatively few cache rebuilds. */ struct inotify_event *nextEv; nextEv = (struct inotify_event *) (buf + evLen); if (((char *) nextEv < buf + bufSize) && (nextEv->mask & IN_MOVED_TO) && (nextEv->cookie == ev->cookie)) { int nextEvCacheSlot; /* We have a rename() event. We need to fix up the cached pathnames for the corresponding directory and all of its subdirectories. */ nextEvCacheSlot = findWatchChecked(nextEv->wd); if (nextEvCacheSlot == -1) { /* Cache reached an inconsistent state */ *inotifyFd = reinitialize(*inotifyFd); /* Discard all remaining events in current read() buffer */ return INOTIFY_READ_BUF_LEN; } rewriteCachedPaths(wlCache[evCacheSlot].path, ev->name, wlCache[nextEvCacheSlot].path, nextEv->name); /* We have also processed the next (IN_MOVED_TO) event, so skip over it */ evLen += sizeof(struct inotify_event) + nextEv->len; } else if (((char *) nextEv < buf + bufSize) || !firstTry) { /* We got a "moved from" event without an accompanying "moved to" event. The directory has been moved outside the tree we are monitoring. We need to remove the watches and zap the cache entries for the moved directory and all of its subdirectories. */ logMessage(VB_NOISY, "MOVED_OUT: %p %p\n", wlCache[evCacheSlot].path, ev->name); logMessage(VB_NOISY, "firstTry = %d; remaining bytes = %d\n", firstTry, buf + bufSize - (char *) nextEv); snprintf(fullPath, sizeof(fullPath), "%s/%s", wlCache[evCacheSlot].path, ev->name); if (zapSubtree(*inotifyFd, fullPath) == -1) { /* Cache reached an inconsistent state */ *inotifyFd = reinitialize(*inotifyFd); /* Discard all remaining events in current read() buffer */ return INOTIFY_READ_BUF_LEN; } } else { logMessage(VB_NOISY, "HANGING IN_MOVED_FROM\n"); return -1; /* Tell our caller to do another read() */ } } else if (ev->mask & IN_Q_OVERFLOW) { static int overflowCnt = 0; overflowCnt++; logMessage(0, "Queue overflow (%d) (inotifyReadCnt = %d)\n", overflowCnt, inotifyReadCnt); /* When the queue overflows, some events are lost, at which point we've lost any chance of keeping our cache consistent with the state of the filesystem. So, discard this inotify file descriptor and create a new one, and zap and rebuild the cache. */ *inotifyFd = reinitialize(*inotifyFd); /* Discard all remaining events in current read() buffer */ evLen = INOTIFY_READ_BUF_LEN; } else if (ev->mask & IN_UNMOUNT) { /* When a filesystem is unmounted, each of the watches on the is dropped, and an unmount and an ignore event are generated. There's nothing left for us to monitor, so we just zap the corresponding cache entry. */ logMessage(0, "Filesystem unmounted: %s\n", wlCache[evCacheSlot].path); markCacheSlotEmpty(evCacheSlot); /* No need to remove the watch; that happens automatically */ } else if (ev->mask & IN_MOVE_SELF && isRootDirPath(wlCache[evCacheSlot].path)) { /* If the root path moves to a new location in the same filesystem, then all cached pathnames become invalid, and we have no direct way of knowing the new name of the root path. We could in theory find the new name by caching the i-node of the root path on start-up and then trying to find a pathname that corresponds to that i-node. Instead, we'll keep things simple, and just cease monitoring it. */ logMessage(0, "Root path moved: %s\n", wlCache[evCacheSlot].path); zapRootDirPath(wlCache[evCacheSlot].path); if (zapSubtree(*inotifyFd, wlCache[evCacheSlot].path) == -1) { /* Cache reached an inconsistent state */ *inotifyFd = reinitialize(*inotifyFd); /* Discard all remaining events in current read() buffer */ return INOTIFY_READ_BUF_LEN; } } if (checkCache) checkCacheConsistency(); if (dumpCache) dumpCacheToLog(); return evLen; }
static void executeCommand(int *inotifyFd) { const int MAX_LINE = 100; ssize_t numRead; char line[MAX_LINE], arg[MAX_LINE]; char cmd; int j, cnt, ns, failures; struct stat sb; FILE *fp; numRead = read(STDIN_FILENO, line, MAX_LINE); if (numRead <= 0) { printf("bye!\n"); exit(EXIT_FAILURE); } line[numRead - 1] = '\0'; if (strlen(line) == 0) return; ns = sscanf(line, "%c %s\n", &cmd, arg); switch (cmd) { case 'a': /* Add/refresh a subtree */ cnt = zapSubtree(*inotifyFd, arg); if (cnt == 0) { logMessage(VB_BASIC, "Adding new subtree: %s\n", arg); } else { logMessage(VB_BASIC, "Zapped: %s, %d entries\n", arg, cnt); } watchSubtree(*inotifyFd, arg); break; case 'c': /* Check that all cached pathnames exist */ case 'C': cnt = 0; failures = 0; for (j = 0; j < cacheSize; j++) { if (wlCache[j].wd >= 0) { if (lstat(wlCache[j].path, &sb) == -1) { if (cmd == 'c') logMessage(VB_BASIC, "stat: [slot = %d; wd = %d] %s: %s\n", j, wlCache[j].wd, wlCache[j].path, strerror(errno)); failures++; } else if (!S_ISDIR(sb.st_mode)) { if (cmd == 'c') logMessage(0, "%s is not a directory\n", wlCache[j].path); exit(EXIT_FAILURE); } else { if (cmd == 'c') logMessage(VB_NOISY, "OK: [slot = %d; wd = %d] %s\n", j, wlCache[j].wd, wlCache[j].path); cnt++; } } } logMessage(0, "Successfully verified %d entries\n", cnt); logMessage(0, "Failures: %d\n", failures); break; case 'l': /* List entries in the cache */ cnt = 0; for (j = 0; j < cacheSize; j++) { if (wlCache[j].wd >= 0) { logMessage(0, "%d: %d %s\n", j, wlCache[j].wd, wlCache[j].path); cnt++; } } logMessage(VB_BASIC, "Total entries: %d\n", cnt); break; case 'q': /* Quit */ exit(EXIT_SUCCESS); case 'v': /* Set log verbosity level */ if (ns == 2) verboseMask = atoi(arg); else { verboseMask = !verboseMask; printf("%s\n", verboseMask ? "on" : "off"); } break; case 'd': /* Toggle cache dumping */ dumpCache = !dumpCache; printf("%s\n", dumpCache ? "on" : "off"); break; case 'x': /* Set toggle checking */ checkCache = !checkCache; printf("%s\n", checkCache ? "on" : "off"); break; case 'w': /* Write directory list to file */ /* We can compare the output from the below against the output from "find DIR -type d" to check whether the contents of the cache are consistent with the state of the filesystem */ fp = fopen(arg, "w+"); if (fp == NULL) perror("fopen"); for (j = 0; j < cacheSize; j++) if (wlCache[j].wd >= 0) fprintf(fp, "%s\n", wlCache[j].path); fclose(fp); break; case 'z': /* Stop watching a subtree, and zap its cache entries */ cnt = zapSubtree(*inotifyFd, arg); logMessage(VB_BASIC, "Zapped: %s, %d entries\n", arg, cnt); break; case '0': /* Rebuild cache */ close(*inotifyFd); *inotifyFd = reinitialize(-1); break; default: printf("Unrecognized command: %c\n", cmd); printf("Commands:\n"); printf("0 Rebuild cache\n"); printf("a path Add/refresh pathname watches and cache\n"); printf("c Verify cached pathnames\n"); printf("d Toggle cache dumping\n"); printf("l List cached pathnames\n"); printf("q Quit\n"); printf("v [n] Toggle/set verbose level for messages to stderr\n"); printf(" 0 = no messages\n"); printf(" 1 = basic messages\n"); printf(" 2 = verbose messages\n"); printf(" 3 = basic and verbose messages\n"); printf("w file Write directory list to file\n"); printf("x Toggle cache checking\n"); printf("z path Zap pathname and watches from cache\n"); break; } }
/* - complicatedReversedDissect - determine backref shortest-first subexpression - matches * The retry memory stores the offset of the trial midpoint from begin, plus 1 * so that 0 uniquely means "clean slate". ^ static int complicatedReversedDissect(struct vars *, struct subre *, chr *, chr *); */ static int /* regexec return code */ complicatedReversedDissect( struct vars *const v, struct subre *const t, chr *const begin, /* beginning of relevant substring */ chr *const end) /* end of same */ { struct dfa *d, *d2; chr *mid; assert(t->op == '.'); assert(t->left != NULL && t->left->cnfa.nstates > 0); assert(t->right != NULL && t->right->cnfa.nstates > 0); assert(t->left->flags&SHORTER); /* * Concatenation -- need to split the substring between parts. */ d = newDFA(v, &t->left->cnfa, &v->g->cmap, DOMALLOC); if (ISERR()) { return v->err; } d2 = newDFA(v, &t->right->cnfa, &v->g->cmap, DOMALLOC); if (ISERR()) { freeDFA(d); return v->err; } MDEBUG(("cRev %d\n", t->retry)); /* * Pick a tentative midpoint. */ if (v->mem[t->retry] == 0) { mid = shortest(v, d, begin, begin, end, NULL, NULL); if (mid == NULL) { freeDFA(d); freeDFA(d2); return REG_NOMATCH; } MDEBUG(("tentative midpoint %ld\n", LOFF(mid))); v->mem[t->retry] = (mid - begin) + 1; } else { mid = begin + (v->mem[t->retry] - 1); MDEBUG(("working midpoint %ld\n", LOFF(mid))); } /* * Iterate until satisfaction or failure. */ for (;;) { /* * Try this midpoint on for size. */ if (longest(v, d2, mid, end, NULL) == end) { int er = complicatedDissect(v, t->left, begin, mid); if (er == REG_OKAY) { er = complicatedDissect(v, t->right, mid, end); if (er == REG_OKAY) { /* * Satisfaction. */ MDEBUG(("successful\n")); freeDFA(d); freeDFA(d2); return REG_OKAY; } } if (er != REG_OKAY && er != REG_NOMATCH) { freeDFA(d); freeDFA(d2); return er; } } /* * That midpoint didn't work, find a new one. */ if (mid == end) { /* * All possibilities exhausted. */ MDEBUG(("%d no midpoint\n", t->retry)); freeDFA(d); freeDFA(d2); return REG_NOMATCH; } mid = shortest(v, d, begin, mid+1, end, NULL, NULL); if (mid == NULL) { /* * Failed to find a new one. */ MDEBUG(("%d failed midpoint\n", t->retry)); freeDFA(d); freeDFA(d2); return REG_NOMATCH; } MDEBUG(("%d: new midpoint %ld\n", t->retry, LOFF(mid))); v->mem[t->retry] = (mid - begin) + 1; zapSubtree(v, t->left); zapSubtree(v, t->right); } }
/* - complicatedFindLoop - the heart of complicatedFind ^ static int complicatedFindLoop(struct vars *, struct cnfa *, struct colormap *, ^ struct dfa *, struct dfa *, chr **); */ static int complicatedFindLoop( struct vars *const v, struct cnfa *const cnfa, struct colormap *const cm, struct dfa *const d, struct dfa *const s, chr **const coldp) /* where to put coldstart pointer */ { chr *begin, *end; chr *cold; chr *open, *close; /* Open and close of range of possible * starts */ chr *estart, *estop; int er, hitend; int shorter = v->g->tree->flags&SHORTER; assert(d != NULL && s != NULL); cold = NULL; close = v->start; do { MDEBUG(("\ncsearch at %ld\n", LOFF(close))); close = shortest(v, s, close, close, v->stop, &cold, NULL); if (close == NULL) { break; /* NOTE BREAK */ } assert(cold != NULL); open = cold; cold = NULL; MDEBUG(("cbetween %ld and %ld\n", LOFF(open), LOFF(close))); for (begin = open; begin <= close; begin++) { MDEBUG(("\ncomplicatedFind trying at %ld\n", LOFF(begin))); estart = begin; estop = v->stop; for (;;) { if (shorter) { end = shortest(v, d, begin, estart, estop, NULL, &hitend); } else { end = longest(v, d, begin, estop, &hitend); } if (hitend && cold == NULL) { cold = begin; } if (end == NULL) { break; /* NOTE BREAK OUT */ } MDEBUG(("tentative end %ld\n", LOFF(end))); zapSubexpressions(v->pmatch, v->nmatch); zapSubtree(v, v->g->tree); er = complicatedDissect(v, v->g->tree, begin, end); if (er == REG_OKAY) { if (v->nmatch > 0) { v->pmatch[0].rm_so = OFF(begin); v->pmatch[0].rm_eo = OFF(end); } *coldp = cold; return REG_OKAY; } if (er != REG_NOMATCH) { ERR(er); return er; } if ((shorter) ? end == estop : end == begin) { break; } /* * Go around and try again */ if (shorter) { estart = end + 1; } else { estop = end - 1; } } } } while (close < v->stop); *coldp = cold; return REG_NOMATCH; }