int Scan_File(char *inpathname, Index *ind, Pathstore *store, int discardDups) { // Save the pathname in the store char *pathname = Pathstore_path(store, inpathname,discardDups); if (pathname == NULL) { numdups++; DPRINTF('s',("Scan_Pathname discard dup (%s)\n", inpathname)); return 0; } numfiles++; DPRINTF('s', ("Scan_Pathname(%s)\n", pathname)); int fd = Fileops_open(pathname); if (fd < 0) { fprintf(stderr, "Can't open pathname %s\n", pathname); return -1; } int size = 0; int inum = Fileops_getinumber(fd, &size); printf("Size: %d\n", size); if(inum < 0) {Fileops_close(fd); return 0; } unsigned char buf[DISKIMG_SECTOR_SIZE]; int blockNo = Fileops_getblockno(fd); int bytesMoved = Fileops_getblock(inum, blockNo, buf); int ch = Fileops_getchar(fd, inum, size, buf, &bytesMoved); numchars++; while (!(ch < 0)) { // Process words until we reach the end of the file while (!isalpha(ch)) { // Skip any leading non-alpha characters ch = Fileops_getchar(fd, inum, size, buf, &bytesMoved); if (ch < 0) {Fileops_close(fd); return 0; } numchars++; } // Found a word - record it in the index. int offset = Fileops_tell(fd); int pos = 0; // read off the word until we hit the end of the word buffer // or the end of the file or we hit an non-alpah characters char word[MAX_WORD_SIZE+1]; while ((pos < MAX_WORD_SIZE) && !(ch < 0) && isalpha(ch)) { word[pos++] = ch; ch = Fileops_getchar(fd, inum, size, buf, &bytesMoved); numchars++; } numwords++; word[pos] = 0; // terminate string bool ok = Index_StoreEntry(ind, word, pathname, offset); assert(ok); } Fileops_close(fd); return 0; }
/* * Implement the Unix read system call. Number of bytes returned. Return -1 on * err. */ int Fileops_read(int fd, char *buffer, int length) { int i; int ch; int size = 0; numreads++; int inumber = Fileops_getinumber(fd, &size); printf ("Size: %d\n", size); if(inumber < 0) return 0; int blockNo = Fileops_getblockno(fd); unsigned char buf[DISKIMG_SECTOR_SIZE]; int bytesMoved = Fileops_getblock(inumber, blockNo, buf); for (i = 0; i < length; i++) { ch = Fileops_getchar(fd, inumber, size, buf, &bytesMoved); if (ch == -1) break; buffer[i] = ch; } return i; }
/* * Implement the Unix read system call. Number of bytes returned. Return -1 on * err. */ int Fileops_read(int fd, char *buffer, int length) { int i; int ch; numreads++; for (i = 0; i < length; i++) { ch = Fileops_getchar(fd); if (ch == -1) break; buffer[i] = ch; } return i; }
/* * Do the two pathnames refer to a file with the same contents. */ static int IsSameFile(Pathstore *store, char *pathname1, char *pathname2) { char chksum1[CHKSUMFILE_SIZE], chksum2[CHKSUMFILE_SIZE]; struct unixfilesystem *fs = (struct unixfilesystem *) (store->fshandle); numcompares++; if (strcmp(pathname1, pathname2) == 0) { return 1; // Same pathname must be same file. } /* Compute the chksumfile of each file to see if they are the same */ int err = chksumfile_bypathname(fs, pathname1, chksum1); if (err < 0) { fprintf(stderr,"Can't checksum path %s\n", pathname1); return 0; } err = chksumfile_bypathname(fs, pathname2, chksum2); if (err < 0) { fprintf(stderr,"Can't checksum path %s\n", pathname2); return 0; } if (chksumfile_compare(chksum1, chksum2) == 0) { numdiffchecksum++; return 0; // Checksum mismatch, not the same file } /* Checksums match, do a content comparison */ int fd1 = Fileops_open(pathname1); if (fd1 < 0) { fprintf(stderr, "Can't open path %s\n", pathname1); return 0; } int fd2 = Fileops_open(pathname2); if (fd2 < 0) { Fileops_close(fd1); fprintf(stderr, "Can't open path %s\n", pathname2); return 0; } int ch1, ch2; do { ch1 = Fileops_getchar(fd1); ch2 = Fileops_getchar(fd2); if (ch1 != ch2) { break; // Mismatch - exit loop with ch1 != ch2 } } while (ch1 != -1); // if files match then ch1 == ch2 == -1 Fileops_close(fd1); Fileops_close(fd2); if (ch1 == ch2) { numsamefiles++; } else { numdifferentfiles++; } return ch1 == ch2; }