int main(int argc, char **argv)
{
  SNP **snpmarkers ;
  Indiv **indivmarkers ;
  int numsnps, numindivs ;
  unsigned char *packg1, *packg2 ;

  int **snppos ;
  int *snpindx ;
  int  lsnplist, lindlist, numeg ;
  int i,j; 
  SNP *cupt, *cupt1, *cupt2, *cupt3 ;
  Indiv *indx ;

  int ch1, ch2 ;
  int fmnum , lmnum ;
  int num, n1, n2 ;
  int nkill = 0 ;
  int t, k, x ;

  int nignore, numrisks = 1 ;

  char **genolist ;
  int numgenolist ;
  int maxmiss ; 

  tersem = YES ;     // no snp counts

  readcommands(argc, argv) ;

  setomode(&outputmode, omode) ;
  packmode = YES ;
  settersemode(tersem) ;

  nums1 = 
    getsnps(snp1, &snpm1, 0.0, NULL, &nignore, numrisks) ;

  putped(1) ;
  freeped() ;

  nums2 = 
    getsnps(snp2, &snpm2, 0.0, NULL, &nignore, numrisks) ;

  putped(2) ;
  freeped() ;

  for (x=0; x<nums1; ++x)  {  
   cupt1 = snpm1[x] ;
   cupt1 -> tagnumber = -1 ;
  }
  for (x=0; x<nums2; ++x)  {  
   cupt2 = snpm2[x] ;
   t = x %1000 ;   
// if (t==0) printf("zz %d %d\n", x, nums2) ;

   k = snpindex(snpm1, nums1, cupt2 -> ID) ;  
   if (k<0) { 
    cupt2 -> ignore = YES ;
    continue ;
   }
   cupt1 = snpm1[k] ;
   cupt1 -> tagnumber = x ;
   t = checkmatch(cupt1, cupt2) ;
   if (t==1) continue ;
   if (t==2) {  
    cupt2 -> isrfake = YES ;
    continue ;
   }
   if (t<0) {  
    cupt1  -> ignore = cupt2 -> ignore = YES ;
    continue ;
   }
   printf("allele funny: %s", cupt1 -> ID) ;
   printalleles(cupt1, stdout) ;
   printalleles(cupt2, stdout) ;
   printnl() ;
   cupt1  -> ignore = cupt2 -> ignore = YES ;
   continue ;
  }
  freesnpindex() ;
  numi1 = getindivs(ind1, &indm1) ;
  numi2 = getindivs(ind2, &indm2) ;

  for (x=0; x<numi2; ++x) {  
   k = indindex(indm1, numi1, indm2[x] -> ID) ;
// this code could be modified to allow duplicate individuals
   if (k>=0) fatalx("dup ind: %s\n", indm2[x] -> ID) ;  // fix later?  
  }

  setgenotypename(&geno1, ind1) ;
  getped(1) ;
  getgenos(geno1, snpm1, indm1, 
     nums1, numi1, nignore) ;

  packg1 = (unsigned char *) getpackgenos() ;
  clearpackgenos() ;

  setgenotypename(&geno2, ind2) ;
  getped(2) ;
  getgenos(geno2, snpm2, indm2, 
     nums2, numi2, nignore) ;

  packg2 = (unsigned char *) getpackgenos() ;
  numindivs = mergeit(snpm1, snpm2, &indm1, indm2, nums1, nums2, numi1, numi2) ;

  snpmarkers = snpm1 ; 
  numsnps = nums1 ;
  indivmarkers = indm1 ; 

  free(packg1) ;
  free(packg2) ;

  outfiles(snpoutfilename, indoutfilename, genooutfilename, 
   snpmarkers, indivmarkers, numsnps, numindivs, packout, ogmode) ;

  printf("##end of mergeit run\n") ;
  return 0 ;
}
Example #2
0
int main(int argc, char **argv) {
  int x;
  int opt;
  FILE *file1;
  FILE *file2;
  file_t *files = NULL;
  file_t *curfile;
  file_t *match = NULL;
  filetree_t *checktree = NULL;
  int filecount = 0;
  int progress = 0;
 
  static struct option long_options[] = 
  {
    { "omitfirst", 0, 0, 'f' },
    { "recurse", 0, 0, 'r' },
    { "quiet", 0, 0, 'q' },
    { "sameline", 0, 0, '1' },
    { "size", 0, 0, 'S' },
    { "symlinks", 0, 0, 's' },
    { "hardlinks", 0, 0, 'H' },
    { "noempty", 0, 0, 'n' },
    { "delete", 0, 0, 'd' },
    { "version", 0, 0, 'v' },
    { "help", 0, 0, 'h' },
    { 0, 0, 0, 0 }
  };

  program_name = argv[0];

  while ((opt = getopt_long(argc, argv, "frq1SsHndvh", long_options, NULL)) != EOF) {
    switch (opt) {
    case 'f':
      SETFLAG(flags, F_OMITFIRST);
      break;
    case 'r':
      SETFLAG(flags, F_RECURSE);
      break;
    case 'q':
      SETFLAG(flags, F_HIDEPROGRESS);
      break;
    case '1':
      SETFLAG(flags, F_DSAMELINE);
      break;
    case 'S':
      SETFLAG(flags, F_SHOWSIZE);
      break;
    case 's':
      SETFLAG(flags, F_FOLLOWLINKS);
      break;
    case 'H':
      SETFLAG(flags, F_CONSIDERHARDLINKS);
      break;
    case 'n':
      SETFLAG(flags, F_EXCLUDEEMPTY);
      break;
    case 'd':
      SETFLAG(flags, F_DELETEFILES);
      break;
    case 'v':
      printf("fdupes %s\n", VERSION);
      exit(0);
    case 'h':
      help_text();
      exit(1);
    default:
      fprintf(stderr, "Try `fdupes --help' for more information\n");
      exit(1);
    }
  }

  if (optind >= argc) {
    errormsg("no directories specified\n");
    exit(1);
  }

  for (x = optind; x < argc; x++) filecount += grokdir(argv[x], &files);

  if (!files) exit(0);
  
  curfile = files;

  while (curfile) {
    if (!checktree) 
#ifndef EXPERIMENTAL_RBTREE
      registerfile(&checktree, curfile);
#else
      registerfile(&checktree, NULL, TREE_ROOT, curfile);
#endif
    else 
      match = checkmatch(&checktree, checktree, curfile);

    if (match != NULL) {
      file1 = fopen(curfile->d_name, "rb");
      if (!file1) {
	curfile = curfile->next;
	continue;
      }

      file2 = fopen(match->d_name, "rb");
      if (!file2) {
	fclose(file1);
	curfile = curfile->next;
	continue;
      }
 
      if (confirmmatch(file1, file2)) {
	match->hasdupes = 1;
        curfile->duplicates = match->duplicates;
        match->duplicates = curfile;
      }
      
      fclose(file1);
      fclose(file2);
    }

    curfile = curfile->next;

    if (!ISFLAG(flags, F_HIDEPROGRESS)) {
      fprintf(stderr, "\rProgress [%d/%d] %d%% ", progress, filecount,
       (int)((float) progress / (float) filecount * 100.0));
      progress++;
    }
  }
Example #3
0
int main(int argc, char **argv) {
  int x;
  int opt;
  FILE *file1;
  FILE *file2;
  file_t *files = NULL;
  file_t *curfile;
  file_t **match = NULL;
  filetree_t *checktree = NULL;
  int filecount = 0;
  int progress = 0;
  char **oldargv;
  int firstrecurse;
  
#ifndef OMIT_GETOPT_LONG
  static struct option long_options[] = 
  {
    { "omitfirst", 0, 0, 'f' },
    { "recurse", 0, 0, 'r' },
    { "recursive", 0, 0, 'r' },
    { "recurse:", 0, 0, 'R' },
    { "recursive:", 0, 0, 'R' },
    { "quiet", 0, 0, 'q' },
    { "sameline", 0, 0, '1' },
    { "size", 0, 0, 'S' },
    { "symlinks", 0, 0, 's' },
    { "hardlinks", 0, 0, 'H' },
    { "relink", 0, 0, 'l' },
    { "noempty", 0, 0, 'n' },
    { "delete", 0, 0, 'd' },
    { "version", 0, 0, 'v' },
    { "help", 0, 0, 'h' },
    { "noprompt", 0, 0, 'N' },
    { "summarize", 0, 0, 'm'},
    { "summary", 0, 0, 'm' },
    { 0, 0, 0, 0 }
  };
#define GETOPT getopt_long
#else
#define GETOPT getopt
#endif

  program_name = argv[0];

  oldargv = cloneargs(argc, argv);

  while ((opt = GETOPT(argc, argv, "frRq1Ss::HlndvhNm"
#ifndef OMIT_GETOPT_LONG
          , long_options, NULL
#endif
          )) != EOF) {
    switch (opt) {
    case 'f':
      SETFLAG(flags, F_OMITFIRST);
      break;
    case 'r':
      SETFLAG(flags, F_RECURSE);
      break;
    case 'R':
      SETFLAG(flags, F_RECURSEAFTER);
      break;
    case 'q':
      SETFLAG(flags, F_HIDEPROGRESS);
      break;
    case '1':
      SETFLAG(flags, F_DSAMELINE);
      break;
    case 'S':
      SETFLAG(flags, F_SHOWSIZE);
      break;
    case 's':
      SETFLAG(flags, F_FOLLOWLINKS);
      break;
    case 'H':
      SETFLAG(flags, F_CONSIDERHARDLINKS);
      break;
    case 'n':
      SETFLAG(flags, F_EXCLUDEEMPTY);
      break;
    case 'd':
      SETFLAG(flags, F_DELETEFILES);
      break;
    case 'v':
      printf("fdupes %s\n", VERSION);
      exit(0);
    case 'h':
      help_text();
      exit(1);
    case 'N':
      SETFLAG(flags, F_NOPROMPT);
      break;
    case 'm':
      SETFLAG(flags, F_SUMMARIZEMATCHES);
      break;

    default:
      fprintf(stderr, "Try `fdupes --help' for more information.\n");
      exit(1);
    }
  }

  if (optind >= argc) {
    errormsg("no directories specified\n");
    exit(1);
  }

  if (ISFLAG(flags, F_RECURSE) && ISFLAG(flags, F_RECURSEAFTER)) {
    errormsg("options --recurse and --recurse: are not compatible\n");
    exit(1);
  }

  if (ISFLAG(flags, F_SUMMARIZEMATCHES) && ISFLAG(flags, F_DELETEFILES)) {
    errormsg("options --summarize and --delete are not compatible\n");
    exit(1);
  }

  if (ISFLAG(flags, F_RECURSEAFTER)) {
    firstrecurse = nonoptafter("--recurse:", argc, oldargv, argv, optind);
    
    if (firstrecurse == argc)
      firstrecurse = nonoptafter("-R", argc, oldargv, argv, optind);

    if (firstrecurse == argc) {
      errormsg("-R option must be isolated from other options\n");
      exit(1);
    }

    /* F_RECURSE is not set for directories before --recurse: */
    for (x = optind; x < firstrecurse; x++)
      filecount += grokdir(argv[x], &files);

    /* Set F_RECURSE for directories after --recurse: */
    SETFLAG(flags, F_RECURSE);

    for (x = firstrecurse; x < argc; x++)
      filecount += grokdir(argv[x], &files);
  } else {
    for (x = optind; x < argc; x++)
      filecount += grokdir(argv[x], &files);
  }

  if (!files) {
    if (!ISFLAG(flags, F_HIDEPROGRESS)) fprintf(stderr, "\r%40s\r", " ");
    exit(0);
  }
  
  curfile = files;

  while (curfile) {
    if (!checktree) 
      registerfile(&checktree, curfile);
    else 
      match = checkmatch(&checktree, checktree, curfile);

    if (match != NULL) {
      file1 = fopen(curfile->d_name, "rb");
      if (!file1) {
	curfile = curfile->next;
	continue;
      }
      
      file2 = fopen((*match)->d_name, "rb");
      if (!file2) {
	fclose(file1);
	curfile = curfile->next;
	continue;
      }

      if (confirmmatch(file1, file2)) {
	registerpair(match, curfile, sort_pairs_by_mtime);
	
	/*match->hasdupes = 1;
        curfile->duplicates = match->duplicates;
        match->duplicates = curfile;*/
      }
      
      fclose(file1);
      fclose(file2);
    }

    curfile = curfile->next;

    if (!ISFLAG(flags, F_HIDEPROGRESS)) {
      fprintf(stderr, "\rProgress [%d/%d] %d%% ", progress, filecount,
       (int)((float) progress / (float) filecount * 100.0));
      progress++;
    }
  }

  if (!ISFLAG(flags, F_HIDEPROGRESS)) fprintf(stderr, "\r%40s\r", " ");

  if (ISFLAG(flags, F_DELETEFILES))
  {
    if (ISFLAG(flags, F_NOPROMPT))
    {
      deletefiles(files, 0, 0);
    }
    else
    {
      stdin = freopen("/dev/tty", "r", stdin);
      deletefiles(files, 1, stdin);
    }
  }

  else 

    if (ISFLAG(flags, F_SUMMARIZEMATCHES))
      summarizematches(files);
      
    else

      printmatches(files);

  while (files) {
    curfile = files->next;
    free(files->d_name);
    free(files->crcsignature);
    free(files->crcpartial);
    free(files);
    files = curfile;
  }

  for (x = 0; x < argc; x++)
    free(oldargv[x]);

  free(oldargv);

  purgetree(checktree);

  return 0;
}
Example #4
0
file_t *checkmatch(filetree_t **root, filetree_t *checktree, file_t *file)
{
  int cmpresult;
  char *crcsignature;
  off_t fsize;

  /* If inodes are equal one of the files is a hard link, which
     is usually not accidental. We don't want to flag them as 
     duplicates, unless the user specifies otherwise. */

  if (!ISFLAG(flags, F_CONSIDERHARDLINKS) && getinode(file->d_name) == 
   checktree->file->inode) return NULL;

  fsize = filesize(file->d_name);
  
  if (fsize < checktree->file->size) 
    cmpresult = -1;
  else 
    if (fsize > checktree->file->size) cmpresult = 1;
  else {
    if (checktree->file->crcsignature == NULL) {
      crcsignature = getcrcsignature(checktree->file->d_name);
      if (crcsignature == NULL) return NULL;

      checktree->file->crcsignature = (char*) malloc(strlen(crcsignature)+1);
      if (checktree->file->crcsignature == NULL) {
	errormsg("out of memory\n");
	exit(1);
      }
      strcpy(checktree->file->crcsignature, crcsignature);
    }

    if (file->crcsignature == NULL) {
      crcsignature = getcrcsignature(file->d_name);
      if (crcsignature == NULL) return NULL;

      file->crcsignature = (char*) malloc(strlen(crcsignature)+1);
      if (file->crcsignature == NULL) {
	errormsg("out of memory\n");
	exit(1);
      }
      strcpy(file->crcsignature, crcsignature);
    }

    cmpresult = strcmp(file->crcsignature, checktree->file->crcsignature);
  }

  if (cmpresult < 0) {
    if (checktree->left != NULL) {
      return checkmatch(root, checktree->left, file);
    } else {
#ifndef EXPERIMENTAL_RBTREE
      registerfile(&(checktree->left), file);
#else
      registerfile(root, checktree, TREE_LEFT, file);
#endif
      return NULL;
    }
  } else if (cmpresult > 0) {
    if (checktree->right != NULL) {
      return checkmatch(root, checktree->right, file);
    } else {
#ifndef EXPERIMENTAL_RBTREE
      registerfile(&(checktree->right), file);
#else
      registerfile(root, checktree, TREE_RIGHT, file);
#endif
      return NULL;
    }
  } else return checktree->file;
}