int main(int argc, char *argv[]) { int ch, i, stdinflag = 0; char cflag = 0, mflag = 0; char *outfile, *outpath = NULL; struct field *fldtab; size_t fldtab_sz, fld_cnt; size_t alloc_size; struct filelist filelist; int num_input_files; FILE *outfp = NULL; struct rlimit rl; struct stat st; setlocale(LC_ALL, ""); /* bump RLIMIT_NOFILE to maximum our hard limit allows */ if (getrlimit(RLIMIT_NOFILE, &rl) < 0) err(2, "getrlimit"); rl.rlim_cur = rl.rlim_max; if (setrlimit(RLIMIT_NOFILE, &rl) < 0) err(2, "setrlimit"); d_mask[REC_D = '\n'] = REC_D_F; d_mask['\t'] = d_mask[' '] = BLANK | FLD_D; /* fldtab[0] is the global options. */ fldtab_sz = 3; fld_cnt = 0; alloc_size = fldtab_sz * sizeof(*fldtab); fldtab = malloc(alloc_size); if (fldtab == NULL) err(1, "Cannot allocate %zu bytes", alloc_size); memset(fldtab, 0, alloc_size); #define SORT_OPTS "bcdD:fHik:lmno:rR:sSt:T:ux" /* Convert "+field" args to -f format */ fixit(&argc, argv, SORT_OPTS); if (!(tmpdir = getenv("TMPDIR"))) tmpdir = _PATH_TMP; while ((ch = getopt(argc, argv, SORT_OPTS)) != -1) { switch (ch) { case 'b': fldtab[0].flags |= BI | BT; break; case 'c': cflag = 1; break; case 'D': /* Debug flags */ for (i = 0; optarg[i]; i++) debug_flags |= 1 << (optarg[i] & 31); break; case 'd': case 'f': case 'i': case 'n': case 'l': fldtab[0].flags |= optval(ch, 0); break; case 'H': /* -H was ; use merge sort for blocks of large files' */ /* That is now the default. */ break; case 'k': alloc_size = (fldtab_sz + 1) * sizeof(*fldtab); fldtab = realloc(fldtab, alloc_size); if (fldtab == NULL) err(1, "Cannot re-allocate %zu bytes", alloc_size); memset(&fldtab[fldtab_sz], 0, sizeof(fldtab[0])); fldtab_sz++; setfield(optarg, &fldtab[++fld_cnt], fldtab[0].flags); break; case 'm': mflag = 1; break; case 'o': outpath = optarg; break; case 'r': REVERSE = 1; break; case 's': /* * Nominally 'stable sort', keep lines with equal keys * in input file order. (Default for NetBSD) * (-s for GNU sort compatibility.) */ posix_sort = 0; break; case 'S': /* * Reverse of -s! * This needs to enforce a POSIX sort where records * with equal keys are then sorted by the raw data. * Currently not implemented! * (using libc radixsort() v sradixsort() doesn't * have the desired effect.) */ posix_sort = 1; break; case 't': if (SEP_FLAG) usage("multiple field delimiters"); SEP_FLAG = 1; d_mask[' '] &= ~FLD_D; d_mask['\t'] &= ~FLD_D; d_mask[(u_char)*optarg] |= FLD_D; if (d_mask[(u_char)*optarg] & REC_D_F) errx(2, "record/field delimiter clash"); break; case 'R': if (REC_D != '\n') usage("multiple record delimiters"); REC_D = *optarg; if (REC_D == '\n') break; if (optarg[1] != '\0') { char *ep; int t = 0; if (optarg[0] == '\\') optarg++, t = 8; REC_D = (int)strtol(optarg, &ep, t); if (*ep != '\0' || REC_D < 0 || REC_D >= (int)__arraycount(d_mask)) errx(2, "invalid record delimiter %s", optarg); } d_mask['\n'] = d_mask[' ']; d_mask[REC_D] = REC_D_F; break; case 'T': /* -T tmpdir */ tmpdir = optarg; break; case 'u': UNIQUE = 1; break; case '?': default: usage(NULL); } } if (UNIQUE) /* Don't sort on raw record if keys match */ posix_sort = 0; if (cflag && argc > optind+1) errx(2, "too many input files for -c option"); if (argc - 2 > optind && !strcmp(argv[argc-2], "-o")) { outpath = argv[argc-1]; argc -= 2; } if (mflag && argc - optind > (MAXFCT - (16+1))*16) errx(2, "too many input files for -m option"); for (i = optind; i < argc; i++) { /* allow one occurrence of /dev/stdin */ if (!strcmp(argv[i], "-") || !strcmp(argv[i], _PATH_STDIN)) { if (stdinflag) warnx("ignoring extra \"%s\" in file list", argv[i]); else stdinflag = 1; /* change to /dev/stdin if '-' */ if (argv[i][0] == '-') { static char path_stdin[] = _PATH_STDIN; argv[i] = path_stdin; } } else if ((ch = access(argv[i], R_OK))) err(2, "%s", argv[i]); } if (fldtab[1].icol.num == 0) { /* No sort key specified */ if (fldtab[0].flags & (I|D|F|N|L)) { /* Modified - generate a key that covers the line */ fldtab[0].flags &= ~(BI|BT); setfield("1", &fldtab[++fld_cnt], fldtab->flags); fldreset(fldtab); } else { /* Unmodified, just compare the line */ SINGL_FLD = 1; fldtab[0].icol.num = 1; } } else { fldreset(fldtab); } settables(); if (optind == argc) { static const char * const names[] = { _PATH_STDIN, NULL }; filelist.names = names; num_input_files = 1; } else { filelist.names = (const char * const *) &argv[optind]; num_input_files = argc - optind; } if (cflag) { order(&filelist, fldtab); /* NOT REACHED */ } if (!outpath) { toutpath[0] = '\0'; /* path not used in this case */ outfile = outpath = toutpath; outfp = stdout; } else if (lstat(outpath, &st) == 0 && !S_ISCHR(st.st_mode) && !S_ISBLK(st.st_mode)) { /* output file exists and isn't character or block device */ struct sigaction act; static const int sigtable[] = {SIGHUP, SIGINT, SIGPIPE, SIGXCPU, SIGXFSZ, SIGVTALRM, SIGPROF, 0 }; int outfd; errno = 0; if (access(outpath, W_OK)) err(2, "%s", outpath); (void)snprintf(toutpath, sizeof(toutpath), "%sXXXXXX", outpath); if ((outfd = mkstemp(toutpath)) == -1) err(2, "Cannot create temporary file `%s'", toutpath); (void)atexit(cleanup); act.sa_handler = onsignal; (void) sigemptyset(&act.sa_mask); act.sa_flags = SA_RESTART | SA_RESETHAND; for (i = 0; sigtable[i]; ++i) /* always unlink toutpath */ sigaction(sigtable[i], &act, 0); outfile = toutpath; if ((outfp = fdopen(outfd, "w")) == NULL) err(2, "Cannot open temporary file `%s'", toutpath); } else { outfile = outpath; if ((outfp = fopen(outfile, "w")) == NULL) err(2, "output file %s", outfile); } if (mflag) fmerge(&filelist, num_input_files, outfp, fldtab); else fsort(&filelist, num_input_files, outfp, fldtab); if (outfile != outpath) { if (access(outfile, F_OK)) err(2, "%s", outfile); /* * Copy file permissions bits of the original file. * st is initialized above, when we create the * temporary spool file. */ if (lchmod(outfile, st.st_mode & ALLPERMS) != 0) { err(2, "cannot chmod %s: output left in %s", outpath, outfile); } (void)unlink(outpath); if (link(outfile, outpath)) err(2, "cannot link %s: output left in %s", outpath, outfile); (void)unlink(outfile); toutpath[0] = 0; } exit(0); }
void fsort(int binno, int depth, union f_handle infiles, int nfiles, FILE *outfp, struct field *ftbl) { u_char *weights, **keypos, *bufend, *tmpbuf; static u_char *buffer, **keylist; static size_t bufsize; int ntfiles, mfct = 0, total, i, maxb, lastb, panic = 0; int c, nelem; long sizes[NBINS+1]; union f_handle tfiles, mstart = {MAXFCT-16}; int (*get)(int, union f_handle, int, RECHEADER *, u_char *, struct field *); RECHEADER *crec; struct field tfield[2]; FILE *prevfp, *tailfp[FSORTMAX+1]; memset(tailfp, 0, sizeof(tailfp)); prevfp = outfp; memset(tfield, 0, sizeof(tfield)); if (ftbl[0].flags & R) tfield[0].weights = Rascii; else tfield[0].weights = ascii; tfield[0].icol.num = 1; weights = ftbl[0].weights; if (buffer == NULL) { bufsize = BUFSIZE; if ((buffer = malloc(bufsize)) == NULL || (keylist = calloc(MAXNUM, sizeof(u_char *))) == NULL) err(2, NULL); } bufend = buffer + bufsize - 1; if (binno >= 0) { tfiles.top = infiles.top + nfiles; get = getnext; } else { tfiles.top = 0; if (SINGL_FLD) get = makeline; else get = makekey; } for (;;) { memset(sizes, 0, sizeof(sizes)); c = ntfiles = 0; if (binno == weights[REC_D] && !(SINGL_FLD && ftbl[0].flags & F)) { /* pop */ rd_append(weights[REC_D], infiles, nfiles, prevfp, buffer, bufend); break; } else if (binno == weights[REC_D]) { depth = 0; /* start over on flat weights */ ftbl = tfield; weights = ftbl[0].weights; } while (c != EOF) { keypos = keylist; nelem = 0; crec = (RECHEADER *) buffer; while ((c = get(binno, infiles, nfiles, crec, bufend, ftbl)) == 0) { *keypos++ = crec->data + depth; if (++nelem == MAXNUM) { c = BUFFEND; break; } crec = (RECHEADER *)((char *)crec + SALIGN(crec->length) + sizeof(TRECHEADER)); } /* * buffer was too small for data, allocate * a bigger buffer. */ if (c == BUFFEND && nelem == 0) { bufsize *= 2; tmpbuf = realloc(buffer, bufsize); if (!tmpbuf) err(2, "failed to realloc buffer"); crec = (RECHEADER *) (tmpbuf + ((u_char *)crec - buffer)); buffer = tmpbuf; bufend = buffer + bufsize - 1; continue; } if (c == BUFFEND || ntfiles || mfct) { /* push */ if (panic >= PANIC) { fstack[MAXFCT-16+mfct].fp = ftmp(); if (radixsort((const u_char **)keylist, nelem, weights, REC_D)) err(2, NULL); append(keylist, nelem, depth, fstack[ MAXFCT-16+mfct].fp, putrec, ftbl); mfct++; /* reduce number of open files */ if (mfct == 16 ||(c == EOF && ntfiles)) { fstack[tfiles.top + ntfiles].fp = ftmp(); fmerge(0, mstart, mfct, geteasy, fstack[tfiles.top+ntfiles].fp, putrec, ftbl); ntfiles++; mfct = 0; } } else { fstack[tfiles.top + ntfiles].fp= ftmp(); onepass(keylist, depth, nelem, sizes, weights, fstack[tfiles.top+ntfiles].fp); ntfiles++; } } } get = getnext; if (!ntfiles && !mfct) { /* everything in memory--pop */ if (nelem > 1) { if (STABLE) { i = sradixsort((const u_char **)keylist, nelem, weights, REC_D); } else { i = radixsort((const u_char **)keylist, nelem, weights, REC_D); } if (i) err(2, NULL); } append(keylist, nelem, depth, outfp, putline, ftbl); break; /* pop */ } if (panic >= PANIC) { if (!ntfiles) fmerge(0, mstart, mfct, geteasy, outfp, putline, ftbl); else fmerge(0, tfiles, ntfiles, geteasy, outfp, putline, ftbl); break; } total = maxb = lastb = 0; /* find if one bin dominates */ for (i = 0; i < NBINS; i++) if (sizes[i]) { if (sizes[i] > sizes[maxb]) maxb = i; lastb = i; total += sizes[i]; } if (sizes[maxb] < max((total / 2) , BUFSIZE)) maxb = lastb; /* otherwise pop after last bin */ fstack[tfiles.top].lastb = lastb; fstack[tfiles.top].maxb = maxb; /* start refining next level. */ get(-1, tfiles, ntfiles, crec, bufend, 0); /* rewind */ for (i = 0; i < maxb; i++) { if (!sizes[i]) /* bin empty; step ahead file offset */ get(i, tfiles, ntfiles, crec, bufend, 0); else fsort(i, depth+1, tfiles, ntfiles, outfp, ftbl); } if (lastb != maxb) { if (prevfp != outfp) tailfp[panic] = prevfp; prevfp = ftmp(); for (i = maxb+1; i <= lastb; i++) if (!sizes[i]) get(i, tfiles, ntfiles, crec, bufend,0); else fsort(i, depth+1, tfiles, ntfiles, prevfp, ftbl); } /* sort biggest (or last) bin at this level */ depth++; panic++; binno = maxb; infiles.top = tfiles.top; /* getnext will free tfiles, */ nfiles = ntfiles; /* so overwrite them */ } if (prevfp != outfp) { concat(outfp, prevfp); fclose(prevfp); } for (i = panic; i >= 0; --i) if (tailfp[i]) { concat(outfp, tailfp[i]); fclose(tailfp[i]); } }