int Scan_TreeAndIndex(char *pathname, Index *ind, Pathstore *store,int discardDups) { const uint32_t MAXPATH = 1024; if (Fileops_isfile(pathname)) { return Scan_File(pathname, ind, store, discardDups); } // Not a file must be directory, process all entries in the directory if (strlen(pathname) > MAXPATH-16) { fprintf(stderr, "Too deep of directories %s\n", pathname); return -1; } numdirs++; int dirfd = Fileops_open(pathname); if (dirfd < 0) { fprintf(stderr, "Can't open pathname %s\n", pathname); return -1; } if (pathname[1] == 0) { // pathame == "/" pathname++; // Delete extra / character } int ret; while (1) { struct direntv6 dirent; ret = Fileops_read(dirfd, (char *)&dirent, sizeof(struct direntv6)); if (ret == 0) { /* Done with directory */ break; } if (ret != sizeof(struct direntv6)) { fprintf(stderr, "Error reading directory %s\n", pathname); ret = -1; break; } numdirents++; char *n = dirent.d_name; if (n[0] == '.') { if ((n[1] == 0) || ((n[1] == '.') && (n[2] == 0))) { /* Skip over "." and ".." */ continue; } } char nextpath[MAXPATH]; sprintf(nextpath, "%s/%s",pathname, n); Scan_TreeAndIndex(nextpath, ind, store, discardDups); } Fileops_close(dirfd); return ret; }
int Scan_File(char *inpathname, Index *ind, Pathstore *store, int discardDups) { // Save the pathname in the store char *pathname = Pathstore_path(store, inpathname,discardDups); if (pathname == NULL) { numdups++; DPRINTF('s',("Scan_Pathname discard dup (%s)\n", inpathname)); return 0; } numfiles++; DPRINTF('s', ("Scan_Pathname(%s)\n", pathname)); int fd = Fileops_open(pathname); if (fd < 0) { fprintf(stderr, "Can't open pathname %s\n", pathname); return -1; } int size = 0; int inum = Fileops_getinumber(fd, &size); printf("Size: %d\n", size); if(inum < 0) {Fileops_close(fd); return 0; } unsigned char buf[DISKIMG_SECTOR_SIZE]; int blockNo = Fileops_getblockno(fd); int bytesMoved = Fileops_getblock(inum, blockNo, buf); int ch = Fileops_getchar(fd, inum, size, buf, &bytesMoved); numchars++; while (!(ch < 0)) { // Process words until we reach the end of the file while (!isalpha(ch)) { // Skip any leading non-alpha characters ch = Fileops_getchar(fd, inum, size, buf, &bytesMoved); if (ch < 0) {Fileops_close(fd); return 0; } numchars++; } // Found a word - record it in the index. int offset = Fileops_tell(fd); int pos = 0; // read off the word until we hit the end of the word buffer // or the end of the file or we hit an non-alpah characters char word[MAX_WORD_SIZE+1]; while ((pos < MAX_WORD_SIZE) && !(ch < 0) && isalpha(ch)) { word[pos++] = ch; ch = Fileops_getchar(fd, inum, size, buf, &bytesMoved); numchars++; } numwords++; word[pos] = 0; // terminate string bool ok = Index_StoreEntry(ind, word, pathname, offset); assert(ok); } Fileops_close(fd); return 0; }
/* * Do the two pathnames refer to a file with the same contents. */ static int IsSameFile(Pathstore *store, char *pathname1, char *pathname2) { char chksum1[CHKSUMFILE_SIZE], chksum2[CHKSUMFILE_SIZE]; struct unixfilesystem *fs = (struct unixfilesystem *) (store->fshandle); numcompares++; if (strcmp(pathname1, pathname2) == 0) { return 1; // Same pathname must be same file. } /* Compute the chksumfile of each file to see if they are the same */ int err = chksumfile_bypathname(fs, pathname1, chksum1); if (err < 0) { fprintf(stderr,"Can't checksum path %s\n", pathname1); return 0; } err = chksumfile_bypathname(fs, pathname2, chksum2); if (err < 0) { fprintf(stderr,"Can't checksum path %s\n", pathname2); return 0; } if (chksumfile_compare(chksum1, chksum2) == 0) { numdiffchecksum++; return 0; // Checksum mismatch, not the same file } /* Checksums match, do a content comparison */ int fd1 = Fileops_open(pathname1); if (fd1 < 0) { fprintf(stderr, "Can't open path %s\n", pathname1); return 0; } int fd2 = Fileops_open(pathname2); if (fd2 < 0) { Fileops_close(fd1); fprintf(stderr, "Can't open path %s\n", pathname2); return 0; } int ch1, ch2; do { ch1 = Fileops_getchar(fd1); ch2 = Fileops_getchar(fd2); if (ch1 != ch2) { break; // Mismatch - exit loop with ch1 != ch2 } } while (ch1 != -1); // if files match then ch1 == ch2 == -1 Fileops_close(fd1); Fileops_close(fd2); if (ch1 == ch2) { numsamefiles++; } else { numdifferentfiles++; } return ch1 == ch2; }