void fsort(int binno, int depth, union f_handle infiles, int nfiles, FILE *outfp, struct field *ftbl) { u_char *weights, **keypos, *bufend, *tmpbuf; static u_char *buffer, **keylist; static size_t bufsize; int ntfiles, mfct = 0, total, i, maxb, lastb, panic = 0; int c, nelem; long sizes[NBINS+1]; union f_handle tfiles, mstart = {MAXFCT-16}; int (*get)(int, union f_handle, int, RECHEADER *, u_char *, struct field *); RECHEADER *crec; struct field tfield[2]; FILE *prevfp, *tailfp[FSORTMAX+1]; memset(tailfp, 0, sizeof(tailfp)); prevfp = outfp; memset(tfield, 0, sizeof(tfield)); if (ftbl[0].flags & R) tfield[0].weights = Rascii; else tfield[0].weights = ascii; tfield[0].icol.num = 1; weights = ftbl[0].weights; if (buffer == NULL) { bufsize = BUFSIZE; if ((buffer = malloc(bufsize)) == NULL || (keylist = calloc(MAXNUM, sizeof(u_char *))) == NULL) err(2, NULL); } bufend = buffer + bufsize - 1; if (binno >= 0) { tfiles.top = infiles.top + nfiles; get = getnext; } else { tfiles.top = 0; if (SINGL_FLD) get = makeline; else get = makekey; } for (;;) { memset(sizes, 0, sizeof(sizes)); c = ntfiles = 0; if (binno == weights[REC_D] && !(SINGL_FLD && ftbl[0].flags & F)) { /* pop */ rd_append(weights[REC_D], infiles, nfiles, prevfp, buffer, bufend); break; } else if (binno == weights[REC_D]) { depth = 0; /* start over on flat weights */ ftbl = tfield; weights = ftbl[0].weights; } while (c != EOF) { keypos = keylist; nelem = 0; crec = (RECHEADER *) buffer; while ((c = get(binno, infiles, nfiles, crec, bufend, ftbl)) == 0) { *keypos++ = crec->data + depth; if (++nelem == MAXNUM) { c = BUFFEND; break; } crec = (RECHEADER *)((char *)crec + SALIGN(crec->length) + sizeof(TRECHEADER)); } /* * buffer was too small for data, allocate * a bigger buffer. */ if (c == BUFFEND && nelem == 0) { bufsize *= 2; tmpbuf = realloc(buffer, bufsize); if (!tmpbuf) err(2, "failed to realloc buffer"); crec = (RECHEADER *) (tmpbuf + ((u_char *)crec - buffer)); buffer = tmpbuf; bufend = buffer + bufsize - 1; continue; } if (c == BUFFEND || ntfiles || mfct) { /* push */ if (panic >= PANIC) { fstack[MAXFCT-16+mfct].fp = ftmp(); if (radixsort((const u_char **)keylist, nelem, weights, REC_D)) err(2, NULL); append(keylist, nelem, depth, fstack[ MAXFCT-16+mfct].fp, putrec, ftbl); mfct++; /* reduce number of open files */ if (mfct == 16 ||(c == EOF && ntfiles)) { fstack[tfiles.top + ntfiles].fp = ftmp(); fmerge(0, mstart, mfct, geteasy, fstack[tfiles.top+ntfiles].fp, putrec, ftbl); ntfiles++; mfct = 0; } } else { fstack[tfiles.top + ntfiles].fp= ftmp(); onepass(keylist, depth, nelem, sizes, weights, fstack[tfiles.top+ntfiles].fp); ntfiles++; } } } get = getnext; if (!ntfiles && !mfct) { /* everything in memory--pop */ if (nelem > 1) { if (STABLE) { i = sradixsort((const u_char **)keylist, nelem, weights, REC_D); } else { i = radixsort((const u_char **)keylist, nelem, weights, REC_D); } if (i) err(2, NULL); } append(keylist, nelem, depth, outfp, putline, ftbl); break; /* pop */ } if (panic >= PANIC) { if (!ntfiles) fmerge(0, mstart, mfct, geteasy, outfp, putline, ftbl); else fmerge(0, tfiles, ntfiles, geteasy, outfp, putline, ftbl); break; } total = maxb = lastb = 0; /* find if one bin dominates */ for (i = 0; i < NBINS; i++) if (sizes[i]) { if (sizes[i] > sizes[maxb]) maxb = i; lastb = i; total += sizes[i]; } if (sizes[maxb] < max((total / 2) , BUFSIZE)) maxb = lastb; /* otherwise pop after last bin */ fstack[tfiles.top].lastb = lastb; fstack[tfiles.top].maxb = maxb; /* start refining next level. */ get(-1, tfiles, ntfiles, crec, bufend, 0); /* rewind */ for (i = 0; i < maxb; i++) { if (!sizes[i]) /* bin empty; step ahead file offset */ get(i, tfiles, ntfiles, crec, bufend, 0); else fsort(i, depth+1, tfiles, ntfiles, outfp, ftbl); } if (lastb != maxb) { if (prevfp != outfp) tailfp[panic] = prevfp; prevfp = ftmp(); for (i = maxb+1; i <= lastb; i++) if (!sizes[i]) get(i, tfiles, ntfiles, crec, bufend,0); else fsort(i, depth+1, tfiles, ntfiles, prevfp, ftbl); } /* sort biggest (or last) bin at this level */ depth++; panic++; binno = maxb; infiles.top = tfiles.top; /* getnext will free tfiles, */ nfiles = ntfiles; /* so overwrite them */ } if (prevfp != outfp) { concat(outfp, prevfp); fclose(prevfp); } for (i = panic; i >= 0; --i) if (tailfp[i]) { concat(outfp, tailfp[i]); fclose(tailfp[i]); } }
void fsort(struct filelist *filelist, int nfiles, FILE *outfp, struct field *ftbl) { RECHEADER **keylist; RECHEADER **keypos, **keyp; RECHEADER *buffer; size_t bufsize = DEFBUFSIZE; u_char *bufend; int mfct = 0; int c, nelem; get_func_t get; RECHEADER *crec; RECHEADER *nbuffer; FILE *fp, *tmp_fp; int file_no; int max_recs = DEBUG('m') ? 16 : MAXNUM; buffer = allocrec(NULL, bufsize); bufend = (u_char *)buffer + bufsize; /* Allocate double length keymap for radix_sort */ keylist = malloc(2 * max_recs * sizeof(*keylist)); if (buffer == NULL || keylist == NULL) err(2, "failed to malloc initial buffer or keylist"); if (SINGL_FLD) /* Key and data are one! */ get = makeline; else /* Key (merged key fields) added before data */ get = makekey; file_no = 0; fp = fopen(filelist->names[0], "r"); if (fp == NULL) err(2, "%s", filelist->names[0]); /* Loop through reads of chunk of input files that get sorted * and then merged together. */ for (;;) { keypos = keylist; nelem = 0; crec = buffer; makeline_copydown(crec); /* Loop reading records */ for (;;) { c = get(fp, crec, bufend, ftbl); /* 'c' is 0, EOF or BUFFEND */ if (c == 0) { /* Save start of key in input buffer */ *keypos++ = crec; if (++nelem == max_recs) { c = BUFFEND; break; } crec = (RECHEADER *)(crec->data + SALIGN(crec->length)); continue; } if (c == EOF) { /* try next file */ if (++file_no >= nfiles) /* no more files */ break; fp = fopen(filelist->names[file_no], "r"); if (fp == NULL) err(2, "%s", filelist->names[file_no]); continue; } if (nelem >= max_recs || (bufsize >= MAXBUFSIZE && nelem > 8)) /* Need to sort and save this lot of data */ break; /* c == BUFFEND, and we can process more data */ /* Allocate a larger buffer for this lot of data */ bufsize *= 2; nbuffer = allocrec(buffer, bufsize); if (!nbuffer) { err(2, "failed to realloc buffer to %zu bytes", bufsize); } /* patch up keylist[] */ for (keyp = &keypos[-1]; keyp >= keylist; keyp--) *keyp = nbuffer + (*keyp - buffer); crec = nbuffer + (crec - buffer); buffer = nbuffer; bufend = (u_char *)buffer + bufsize; } /* Sort this set of records */ radix_sort(keylist, keylist + max_recs, nelem); if (c == EOF && mfct == 0) { /* all the data is (sorted) in the buffer */ append(keylist, nelem, outfp, DEBUG('k') ? putkeydump : putline); break; } /* Save current data to a temporary file for a later merge */ if (nelem != 0) { tmp_fp = ftmp(); append(keylist, nelem, tmp_fp, putrec); save_for_merge(tmp_fp, geteasy, ftbl); } mfct = 1; if (c == EOF) { /* merge to output file */ merge_sort(outfp, DEBUG('k') ? putkeydump : putline, ftbl); break; } } free(keylist); keylist = NULL; free(buffer); buffer = NULL; }