Exemplo n.º 1
0
void
fsort(int binno, int depth, union f_handle infiles, int nfiles, FILE *outfp,
    struct field *ftbl)
{
	u_char *weights, **keypos, *bufend, *tmpbuf;
	static u_char *buffer, **keylist;
	static size_t bufsize;
	int ntfiles, mfct = 0, total, i, maxb, lastb, panic = 0;
	int c, nelem;
	long sizes[NBINS+1];
	union f_handle tfiles, mstart = {MAXFCT-16};
	int (*get)(int, union f_handle, int, RECHEADER *,
		u_char *, struct field *);
	RECHEADER *crec;
	struct field tfield[2];
	FILE *prevfp, *tailfp[FSORTMAX+1];

	memset(tailfp, 0, sizeof(tailfp));
	prevfp = outfp;
	memset(tfield, 0, sizeof(tfield));
	if (ftbl[0].flags & R)
		tfield[0].weights = Rascii;
	else
		tfield[0].weights = ascii;
	tfield[0].icol.num = 1;
	weights = ftbl[0].weights;
	if (buffer == NULL) {
		bufsize = BUFSIZE;
		if ((buffer = malloc(bufsize)) == NULL ||
		    (keylist = calloc(MAXNUM, sizeof(u_char *))) == NULL)
			err(2, NULL);
	}
	bufend = buffer + bufsize - 1;
	if (binno >= 0) {
		tfiles.top = infiles.top + nfiles;
		get = getnext;
	} else {
		tfiles.top = 0;
		if (SINGL_FLD)
			get = makeline;
		else
			get = makekey;
	}				
	for (;;) {
		memset(sizes, 0, sizeof(sizes));
		c = ntfiles = 0;
		if (binno == weights[REC_D] &&
		    !(SINGL_FLD && ftbl[0].flags & F)) {	/* pop */
			rd_append(weights[REC_D],
			    infiles, nfiles, prevfp, buffer, bufend);
			break;
		} else if (binno == weights[REC_D]) {
			depth = 0;		/* start over on flat weights */
			ftbl = tfield;
			weights = ftbl[0].weights;
		}
		while (c != EOF) {
			keypos = keylist;
			nelem = 0;
			crec = (RECHEADER *) buffer;
			while ((c = get(binno, infiles, nfiles, crec, bufend,
			    ftbl)) == 0) {
				*keypos++ = crec->data + depth;
				if (++nelem == MAXNUM) {
					c = BUFFEND;
					break;
				}
				crec = (RECHEADER *)((char *)crec +
				    SALIGN(crec->length) + sizeof(TRECHEADER));
			}
			/*
			 * buffer was too small for data, allocate
			 * a bigger buffer.
			 */
			if (c == BUFFEND && nelem == 0) {
				bufsize *= 2;
				tmpbuf = realloc(buffer, bufsize);
				if (!tmpbuf)
					err(2, "failed to realloc buffer");
				crec = (RECHEADER *)
				    (tmpbuf + ((u_char *)crec - buffer));
				buffer = tmpbuf;
				bufend = buffer + bufsize - 1;
				continue;
			}
			if (c == BUFFEND || ntfiles || mfct) {	/* push */
				if (panic >= PANIC) {
					fstack[MAXFCT-16+mfct].fp = ftmp();
					if (radixsort((const u_char **)keylist,
					    nelem, weights, REC_D))
						err(2, NULL);
					append(keylist, nelem, depth, fstack[
					 MAXFCT-16+mfct].fp, putrec, ftbl);
					mfct++;
					/* reduce number of open files */
					if (mfct == 16 ||(c == EOF && ntfiles)) {
						fstack[tfiles.top + ntfiles].fp
						    = ftmp();
						fmerge(0, mstart, mfct, geteasy,
						  fstack[tfiles.top+ntfiles].fp,
						  putrec, ftbl);
						ntfiles++;
						mfct = 0;
					}
				} else {
					fstack[tfiles.top + ntfiles].fp= ftmp();
					onepass(keylist, depth, nelem, sizes,
					weights, fstack[tfiles.top+ntfiles].fp);
					ntfiles++;
				}
			}
		}
		get = getnext;
		if (!ntfiles && !mfct) {	/* everything in memory--pop */
			if (nelem > 1) {
				if (STABLE) {
					i = sradixsort((const u_char **)keylist,
					    nelem, weights, REC_D);
				} else {
					i = radixsort((const u_char **)keylist,
					    nelem, weights, REC_D);
				}
				if (i)
					err(2, NULL);
			}
			append(keylist, nelem, depth, outfp, putline, ftbl);
			break;					/* pop */
		}
		if (panic >= PANIC) {
			if (!ntfiles)
				fmerge(0, mstart, mfct, geteasy,
				    outfp, putline, ftbl);
			else
				fmerge(0, tfiles, ntfiles, geteasy,
				    outfp, putline, ftbl);
			break;
		}
		total = maxb = lastb = 0;	/* find if one bin dominates */
		for (i = 0; i < NBINS; i++)
		  if (sizes[i]) {
			if (sizes[i] > sizes[maxb])
				maxb = i;
			lastb = i;
			total += sizes[i];
		}
		if (sizes[maxb] < max((total / 2) , BUFSIZE))
			maxb = lastb;	/* otherwise pop after last bin */
		fstack[tfiles.top].lastb = lastb;
		fstack[tfiles.top].maxb = maxb;

			/* start refining next level. */
		get(-1, tfiles, ntfiles, crec, bufend, 0);	/* rewind */
		for (i = 0; i < maxb; i++) {
			if (!sizes[i])	/* bin empty; step ahead file offset */
				get(i, tfiles, ntfiles, crec, bufend, 0);
			else
				fsort(i, depth+1, tfiles, ntfiles, outfp, ftbl);
		}
		if (lastb != maxb) {
			if (prevfp != outfp)
				tailfp[panic] = prevfp;
			prevfp = ftmp();
			for (i = maxb+1; i <= lastb; i++)
				if (!sizes[i])
					get(i, tfiles, ntfiles, crec, bufend,0);
				else
					fsort(i, depth+1, tfiles, ntfiles,
					    prevfp, ftbl);
		}

		/* sort biggest (or last) bin at this level */
		depth++;
		panic++;
		binno = maxb;
		infiles.top = tfiles.top;	/* getnext will free tfiles, */
		nfiles = ntfiles;		/* so overwrite them */
	}
	if (prevfp != outfp) {
		concat(outfp, prevfp);
		fclose(prevfp);
	}
	for (i = panic; i >= 0; --i)
		if (tailfp[i]) {
			concat(outfp, tailfp[i]);
			fclose(tailfp[i]);
		}
}
Exemplo n.º 2
0
void
fsort(struct filelist *filelist, int nfiles, FILE *outfp, struct field *ftbl)
{
	RECHEADER **keylist;
	RECHEADER **keypos, **keyp;
	RECHEADER *buffer;
	size_t bufsize = DEFBUFSIZE;
	u_char *bufend;
	int mfct = 0;
	int c, nelem;
	get_func_t get;
	RECHEADER *crec;
	RECHEADER *nbuffer;
	FILE *fp, *tmp_fp;
	int file_no;
	int max_recs = DEBUG('m') ? 16 : MAXNUM;

	buffer = allocrec(NULL, bufsize);
	bufend = (u_char *)buffer + bufsize;
	/* Allocate double length keymap for radix_sort */
	keylist = malloc(2 * max_recs * sizeof(*keylist));
	if (buffer == NULL || keylist == NULL)
		err(2, "failed to malloc initial buffer or keylist");

	if (SINGL_FLD)
		/* Key and data are one! */
		get = makeline;
	else
		/* Key (merged key fields) added before data */
		get = makekey;

	file_no = 0;
	fp = fopen(filelist->names[0], "r");
	if (fp == NULL)
		err(2, "%s", filelist->names[0]);

	/* Loop through reads of chunk of input files that get sorted
	 * and then merged together. */
	for (;;) {
		keypos = keylist;
		nelem = 0;
		crec = buffer;
		makeline_copydown(crec);

		/* Loop reading records */
		for (;;) {
			c = get(fp, crec, bufend, ftbl);
			/* 'c' is 0, EOF or BUFFEND */
			if (c == 0) {
				/* Save start of key in input buffer */
				*keypos++ = crec;
				if (++nelem == max_recs) {
					c = BUFFEND;
					break;
				}
				crec = (RECHEADER *)(crec->data + SALIGN(crec->length));
				continue;
			}
			if (c == EOF) {
				/* try next file */
				if (++file_no >= nfiles)
					/* no more files */
					break;
				fp = fopen(filelist->names[file_no], "r");
				if (fp == NULL)
					err(2, "%s", filelist->names[file_no]);
				continue;
			}
			if (nelem >= max_recs
			    || (bufsize >= MAXBUFSIZE && nelem > 8))
				/* Need to sort and save this lot of data */
				break;

			/* c == BUFFEND, and we can process more data */
			/* Allocate a larger buffer for this lot of data */
			bufsize *= 2;
			nbuffer = allocrec(buffer, bufsize);
			if (!nbuffer) {
				err(2, "failed to realloc buffer to %zu bytes",
					bufsize);
			}

			/* patch up keylist[] */
			for (keyp = &keypos[-1]; keyp >= keylist; keyp--)
				*keyp = nbuffer + (*keyp - buffer);

			crec = nbuffer + (crec - buffer);
			buffer = nbuffer;
			bufend = (u_char *)buffer + bufsize;
		}

		/* Sort this set of records */
		radix_sort(keylist, keylist + max_recs, nelem);

		if (c == EOF && mfct == 0) {
			/* all the data is (sorted) in the buffer */
			append(keylist, nelem, outfp,
			    DEBUG('k') ? putkeydump : putline);
			break;
		}

		/* Save current data to a temporary file for a later merge */
		if (nelem != 0) {
			tmp_fp = ftmp();
			append(keylist, nelem, tmp_fp, putrec);
			save_for_merge(tmp_fp, geteasy, ftbl);
		}
		mfct = 1;

		if (c == EOF) {
			/* merge to output file */
			merge_sort(outfp, 
			    DEBUG('k') ? putkeydump : putline, ftbl);
			break;
		}
	}

	free(keylist);
	keylist = NULL;
	free(buffer);
	buffer = NULL;
}