Exemplo n.º 1
0
/*
 * Is this file the same as any other one in the store
 */
static int
SameFileIsInStore(Pathstore *store, char *pathname)
{
  PathstoreElement *e = store->elementList;

  char chksum[CHKSUMFILE_SIZE];
  struct unixfilesystem *fs = (struct unixfilesystem *) (store->fshandle);
  int err = chksumfile_bypathname(fs, pathname, chksum);
  if (err < 0) {
    fprintf(stderr,"Can't checksum path %s\n", pathname);
	return 0;
  }

  while (e) {
  	if (chksumfile_compare(chksum, e->checksum) != 0) {
      return 1;  // Checksum mismatch, not the same file
    }
	/*
    if (IsSameFile(store, pathname, e->pathname)) {
      return 1;  // In store already
    }*/
    e = e->nextElement;
  }
  return 0; // Not found in store
}
Exemplo n.º 2
0
/*
 * Store a pathname in the pathname store.
 */
char*
Pathstore_path(Pathstore *store, char *pathname, int discardDuplicateFiles)
{
  PathstoreElement *e;

  numstores++;

  if (discardDuplicateFiles) {
    if (SameFileIsInStore(store,pathname)) {
      numdups++;
      return NULL;
    }
  }

  e = malloc(sizeof(PathstoreElement));
  if (e == NULL) {
    return NULL;
  }

  e->pathname = strdup(pathname);
  if (e->pathname == NULL) {
    free(e);
    return NULL;
  }
  e->nextElement = store->elementList;
  store->elementList = e;

  /* Hashing stuff */

  struct unixfilesystem *fs = (struct unixfilesystem *) (store->fshandle);
  int err = chksumfile_bypathname(fs, pathname, e->checksum);
  if (err < 0) {
    fprintf(stderr,"Can't checksum path %s\n", pathname);
    return 0;
  }

  return e->pathname;

}
Exemplo n.º 3
0
/*
 * Do the two pathnames refer to a file with the same contents.
 */
static int
IsSameFile(Pathstore *store, char *pathname1, char *pathname2)
{

  char chksum1[CHKSUMFILE_SIZE],
       chksum2[CHKSUMFILE_SIZE];

  struct unixfilesystem *fs = (struct unixfilesystem *) (store->fshandle);

  numcompares++;
  if (strcmp(pathname1, pathname2) == 0) {
    return 1; // Same pathname must be same file.
  }

  /* Compute the chksumfile of each file to see if they are the same */

  int err = chksumfile_bypathname(fs, pathname1, chksum1);
  if (err < 0) {
    fprintf(stderr,"Can't checksum path %s\n", pathname1);
    return 0;
  }
  err = chksumfile_bypathname(fs, pathname2, chksum2);
  if (err < 0) {
    fprintf(stderr,"Can't checksum path %s\n", pathname2);
    return 0;
  }

  if (chksumfile_compare(chksum1, chksum2) == 0) {
    numdiffchecksum++;
    return 0;  // Checksum mismatch, not the same file
  }
  /* Checksums match, do a content comparison */
  int fd1 = Fileops_open(pathname1);
  if (fd1 < 0) {
    fprintf(stderr, "Can't open path %s\n", pathname1);
    return 0;
  }

  int fd2 = Fileops_open(pathname2);
  if (fd2 < 0) {
    Fileops_close(fd1);
    fprintf(stderr, "Can't open path %s\n", pathname2);
    return 0;
  }

  int ch1, ch2;

  do {
    ch1 = Fileops_getchar(fd1);
    ch2 = Fileops_getchar(fd2);

    if (ch1 != ch2) {
      break; // Mismatch - exit loop with ch1 != ch2
    }
  } while (ch1 != -1);

  // if files match then ch1 == ch2 == -1

  Fileops_close(fd1);
  Fileops_close(fd2);

  if (ch1 == ch2) {
    numsamefiles++;
  } else {
    numdifferentfiles++;
  }

  return ch1 == ch2;
}
Exemplo n.º 4
0
/*
 * Store a pathname in the pathname store.
 */
char*
Pathstore_path(Pathstore *store, char *pathname, int discardDuplicateFiles)
{
	char chksum1[CHKSUMFILE_SIZE];
	struct unixfilesystem *fs = (struct unixfilesystem *) (store->fshandle);

	numstores++;
	
	PathstoreElement *entry;
	
	/* For 1 file case
	 * No hash table or checksum 
	 */
	if(numfilesseen == 0){
		numfilesseen++;
		entry = malloc(sizeof(PathstoreElement));;
	    entry->pathname = strdup(pathname);
		if (entry->pathname == NULL) {
		  	free(entry);
			printf("memory problem 2\n");
		  	return NULL;
		}
		store->elementList = entry;
		return entry->pathname;
	}
	
	/* For >1 file
	 * Use hash table and checksums
	 */
	_LHASH *hashtable;
	
	// if we are going from 1 file case to 2 files
	if(numfilesseen == 1){
		numfilesseen++;
		//store first entry somewhere
		PathstoreElement *temp = store->elementList;
		//calc checksum for file 1 path
		int err = chksumfile_bypathname(fs, temp->pathname, chksum1);
		if (err < 0) {
	    	fprintf(stderr,"Can't checksum path %s\n", pathname);
		    return 0;
	 	}
		memcpy(temp->chksum, chksum1, CHKSUMFILE_SIZE);
		//initialize hash table
		store->elementList = lh_new(HashCallback, CompareCallback);
		hashtable = (_LHASH*) (store->elementList);
		//seed hash table with first entry
		lh_insert(hashtable,(char *) temp);
	    if (lh_error(hashtable)) {
	    	free(temp);
			printf("hash problem\n");
	    	return NULL;
	    }
	}else{
		hashtable = (_LHASH*) (store->elementList);
	}
	// calc checksum of pathname
	int err = chksumfile_bypathname(fs, pathname, chksum1);
	if (err < 0) {
    	fprintf(stderr,"Can't checksum path %s\n", pathname);
	    return 0;
 	}	

	PathstoreElement key;
	memcpy(key.chksum, chksum1, CHKSUMFILE_SIZE);	
	
	// if discardDups, see if its in table, if it is, return
	if (discardDuplicateFiles) {
		entry = lh_retrieve(hashtable, (char *) &key);
		if(entry != NULL){
			numdups++;
			return NULL;
		}
	}
	
	// otherwise add
	entry = malloc(sizeof(PathstoreElement));
    if (entry == NULL) {
		printf("memory problem\n");
      	return NULL;
    }
	memcpy(entry->chksum, chksum1, CHKSUMFILE_SIZE);
    entry->pathname = strdup(pathname);
	if (entry->pathname == NULL) {
	  	free(entry);
		printf("memory problem 2\n");
	  	return NULL;
	}

    lh_insert(hashtable,(char *) entry);

    if (lh_error(hashtable)) {
    	free(entry);
		printf("hash problem\n");
    	return NULL;
    }
  
	return entry->pathname;
}
Exemplo n.º 5
0
/*
 * Output to the specified file the checksum of the specified pathname and
 * inode as well as all its children if it is a directory.
 *
 * This is used by the grading script, so be careful not to change its output
 * format.
 */
void
DumpPathAndChildren(struct unixfilesystem *fs, const char *pathname, int inumber, FILE *f)
{
  struct inode in;
  if (inode_iget(fs, inumber, &in) < 0) {
    fprintf(stderr,"Can't read inode %d \n", inumber);
    return;
  }
  assert(in.i_mode & IALLOC);

  char chksum1[CHKSUMFILE_SIZE];
  if (chksumfile_byinumber(fs, inumber, chksum1) < 0) {
    fprintf(stderr,"Can't checksum inode %d path %s\n", inumber, pathname);
    return;
  }

  char chksum2[CHKSUMFILE_SIZE];
  if (chksumfile_bypathname(fs, pathname, chksum2) < 0) {
    fprintf(stderr,"Can't checksum inode %d path %s\n", inumber, pathname);
    return;
  }

  if (!chksumfile_compare(chksum1, chksum2)) {
    fprintf(stderr,"Pathname checksum of %s differs from inode %d\n", pathname, inumber);
    return;
  }

  char chksumstring[CHKSUMFILE_STRINGSIZE];
  chksumfile_cvt2string(chksum2, chksumstring);
  int size = inode_getsize(&in);
  fprintf(f, "Path %s %d mode 0x%x size %d checksum %s\n",pathname,inumber,in.i_mode, size, chksumstring);

  if (pathname[1] == 0) {
    /* pathame == "/" */
    pathname++; /* Delete extra / character */
  }

  if ((in.i_mode & IFMT) == IFDIR) { 
      const unsigned int MAXPATH = 1024;

      if (strlen(pathname) > MAXPATH-16) {
        fprintf(stderr, "Too deep of directories %s\n", pathname);
      }

      struct direntv6 direntries[10000];
      int numentries = GetDirEntries(fs, inumber, direntries, 10000);

      for (int i = 0; i < numentries; i++) {
        char *n;
        n =  direntries[i].d_name;
        if (n[0] == '.') {
          if ((n[1] == 0) || ((n[1] == '.') && (n[2] == 0))) {
            /* Skip over "." and ".." */
            continue;
          }
        }

        char nextpath[MAXPATH];
        sprintf(nextpath, "%s/%s",pathname, direntries[i].d_name);
        DumpPathAndChildren(fs, nextpath,  direntries[i].d_inumber, f);
      }
  }
}