/* Travers the groups and put the elements into buckets. The parameter pos indicates the current position in the string. To be able to skip groups that are already sorted we keep track of the previous group. Also, the previously read character is recorded. In this way it is possible to move the elements in blocks consisting of strings that have a common character in position pos. Furthermore, a group that is not split during this phase is left behind and not put into a bucket. */ static void intobuckets(group g, bucket b[], int pos) { group prevg; character ch, prevch; boolean split; list tail, tailn; int size; resetmem(bucketmem); for (prevg = g, g = g->nextunf ; g; g = g->nextunf) { if (g->finis) {prevg->nextunf = g->nextunf; continue;} tail = g->head; split = FALSE; prevch = CHAR(tail->str, pos); size = 1; for ( ; (tailn = tail->next); tail = tailn) { ch = CHAR(tailn->str, pos); size++; if (ch == prevch) continue; intobucket(b+prevch, g->head, tail, size-1, g); g->head = tailn; split = TRUE; prevch = ch; size = 1; } if (split) { intobucket(b+prevch, g->head, tail, size, g); g->head = NULL; prevg = g; } else if (IS_ENDMARK(prevch)) prevg->nextunf = g->nextunf; else prevg = g; } }
/* Traverse the buckets and put the elements back into their groups. Split the groups and mark all finished groups. The elements are moved in blocks. */ static void intogroups(bucket b[], int pos) { character ch; bucket s; boolean finis; for (ch = 0; ch < CHARS; ch++) { if (!b[ch]) continue; for (s = b[ch]; s; s = s->next) { finis = IS_ENDMARK(ch); if (s->size < INSERTBREAK && !finis) { if (s->size > 1) s->head = Insertsort(s->head, &s->tail, pos); finis = TRUE; } intogroup(s->tag, s->head, s->tail, finis); } b[ch] = NULL; } }
static inline void twobytes(list a, int pos) { static bucket b[BUCKETS]; /* buckets */ character ch, prevch; list t = a, tn; int size = 1; int used1[CHARS]; /* What buckets are used? */ int used2[CHARS]; int buckets1 = 0, buckets2 = 0; character ch1, ch2, high; for (ch = 0; ch < CHARS; ch++) used1[ch] = used2[ch] = FALSE; prevch = SHORT(t->str, pos); /* into buckets */ for ( ; (tn = t->next); t = tn) { ch = SHORT(tn->str, pos); size++; if (ch == prevch) continue; intobucket2(b+prevch,a, t, size-1, prevch, used1, used2); a = tn; prevch = ch; size = 1; } intobucket2(b+prevch, a, t, size, prevch, used1, used2); for (ch = 0; ch < CHARS; ch++) { if (used1[ch]) used1[buckets1++] = ch; if (used2[ch]) used2[buckets2++] = ch; } for (ch1 = 0; ch1 < buckets1; ch1++) { /* put onto stack */ high = used1[ch1] << 8; for (ch2 = 0; ch2 < buckets2; ch2++) { ch = high | used2[ch2]; if (b[ch].head) { if IS_ENDMARK(ch) b[ch].size = 0; /* finished */ ontostack(b+ch, pos+2); } } } }