void initRDS(RDS *rds) { uint i; SEQ *seq = rds->seq; uint size_w = rds->txt_len; CODE A, B; PAIR *pair; PAIR **p_que = rds->p_que; for (i = 0; i + 1 < size_w; i++) { A = seq[i].code; B = seq[i+1].code; if ((pair = locatePair(rds, A, B)) == NULL) { pair = createPair(rds, A, B, i); } else { seq[i].prev = pair->b_pos; seq[i].next = DUMMY_POS; seq[pair->b_pos].next = i; pair->b_pos = i; incrementPair(rds, pair); } } resetPQ(rds, 1); }
static void updateBlock(CRDS *crds, CODE new_code, uint target_pos) { SEQ *seq = crds->seq; uint l_pos, r_pos, rr_pos, nx_pos; CODE c_code, r_code, l_code, rr_code; PCODE c_pcode, r_pcode, l_pcode; PAIR *l_pair, *c_pair, *r_pair; l_pos = leftPos(crds, target_pos); r_pos = rightPos(crds, target_pos); rr_pos = rightPos(crds, r_pos); c_code = seq[target_pos].code; c_pcode = seq[target_pos].pcode; r_code = seq[r_pos].code; r_pcode = seq[r_pos].pcode; nx_pos = seq[target_pos].next; if (nx_pos == r_pos) { nx_pos = seq[nx_pos].next; } assert(c_code != DUMMY_CODE); assert(r_code != DUMMY_CODE); if (l_pos != DUMMY_POS) { l_code = seq[l_pos].code; l_pcode = seq[l_pos].pcode; assert(seq[l_pos].code != DUMMY_CODE); removeLink(crds, l_pos); if ((l_pair = locatePair(crds, l_pcode, l_code, c_code)) != NULL) { if (l_pair->f_pos == l_pos) { l_pair->f_pos = seq[l_pos].next; } decrementPair(crds, l_pair); } if ((l_pair = locatePair(crds, l_pcode, l_code, new_code)) == NULL) { seq[l_pos].prev = DUMMY_POS; seq[l_pos].next = DUMMY_POS; createPair(crds, l_pcode, l_code, new_code, l_pos); } else { seq[l_pos].prev = l_pair->b_pos; seq[l_pos].next = DUMMY_POS; seq[l_pair->b_pos].next = l_pos; l_pair->b_pos = l_pos; incrementPair(crds, l_pair); } } removeLink(crds, target_pos); removeLink(crds, r_pos); seq[target_pos].code = new_code; seq[r_pos].code = DUMMY_CODE; if (rr_pos != DUMMY_POS) { rr_code = seq[rr_pos].code; assert(rr_code != DUMMY_CODE); if ((r_pair = locatePair(crds, r_pcode, r_code, rr_code)) != NULL) { if (r_pair->f_pos == r_pos) { r_pair->f_pos = seq[r_pos].next; } decrementPair(crds, r_pair); } if (target_pos + 1 == rr_pos - 1) { seq[target_pos+1].prev = rr_pos; seq[target_pos+1].next = target_pos; } else { seq[target_pos+1].prev = rr_pos; seq[target_pos+1].next = DUMMY_POS; seq[rr_pos-1].prev = DUMMY_POS; seq[rr_pos-1].next = target_pos; } if (nx_pos > rr_pos) { if ((c_pair = locatePair(crds, c_pcode, new_code, rr_code)) == NULL) { seq[target_pos].prev = seq[target_pos].next = DUMMY_POS; createPair(crds, c_pcode, new_code, rr_code, target_pos); } else { seq[target_pos].prev = c_pair->b_pos; seq[target_pos].next = DUMMY_POS; seq[c_pair->b_pos].next = target_pos; c_pair->b_pos = target_pos; incrementPair(crds, c_pair); } } else { seq[target_pos].next = seq[target_pos].prev = DUMMY_POS; } } else if (target_pos < crds->txt_len - 1) { assert(seq[target_pos+1].code == DUMMY_CODE); seq[target_pos+1].prev = DUMMY_POS; seq[target_pos+1].next = target_pos; seq[r_pos].prev = seq[r_pos].next = DUMMY_POS; } }
static CRDS *createCRDS(FILE *input, uint cont_len, uint mchar_size) { uint size_w; SEQ *seq; uint char_size; bool check_table[MAX_CHAR_SIZE]; uint char_freq[MAX_CHAR_SIZE]; CODE *char_table; uchar *mchar_table; uint num_contexts = (uint)pow(mchar_size, cont_len); PQUE **p_que; uint p_max; CRDS *crds; fseek(input, 0, SEEK_END); size_w = ftell(input); rewind(input); seq = (SEQ *)malloc(sizeof(SEQ) * size_w); char_table = (CODE *)malloc(sizeof(CODE) * MAX_CHAR_SIZE); mchar_table = (uchar *)malloc(sizeof(uchar) * MAX_CHAR_SIZE); { uint i; for (i = 0; i < MAX_CHAR_SIZE; i++) { check_table[i] = false; char_table[i] = DUMMY_CODE; char_freq[i] = 0; } } char_size = 0; { CODE c; uint i = 0; while ((c = getc(input)) != EOF) { seq[i].code = c; seq[i].next = DUMMY_POS; seq[i].prev = DUMMY_POS; if (check_table[c] == false) { check_table[c] = true; char_size++; } i++; } } if (char_size <= mchar_size) { mchar_size = char_size; } { uint i, j; for (i = 0, j = 0; i < MAX_CHAR_SIZE; i++) { if (check_table[i] == true) { char_table[i] = (CODE)j++; } } } { uint i = 0; while (i < size_w) { seq[i].code = char_table[seq[i].code]; char_freq[seq[i].code]++; i++; } } if (mchar_size < char_size) { uint i, j; int k = 0; uchar max_code = 0; uint max; bool up_flag = true; for (i = 0; i < char_size; i++) { max = 0; for (j = 0; j < char_size; j++) { if (char_freq[j] > max) { max_code = (uchar)j; max = char_freq[j]; } } char_freq[max_code] = 0; mchar_table[max_code] = k; if (up_flag == true) { k++; if (k == mchar_size) { k = mchar_size - 1; up_flag = false; } } else { k--; if (k < 0) { k = 0; up_flag = true; } } } } else { uint i; for (i = 0; i < char_size; i++) { mchar_table[i] = i; } } { int i, j, k; uchar context[cont_len]; CODE id; i = 0; while (i < size_w) { j = i - cont_len; k = 0; while (k < cont_len) { if (j < 0) { context[k++] = HEAD_PCODE; j++; } else { context[k++] = mchar_table[seq[j++].code]; } } id = getContextID(mchar_size, cont_len, context); seq[i++].pcode = id; } } p_max = (uint)ceil(sqrt((double)size_w))/num_contexts; printf("p_max = %d\n", p_max); { uint i, j; p_que = (PQUE **)malloc(sizeof(PQUE *)*num_contexts); for (i = 0; i < num_contexts; i++) { p_que[i] = (PQUE *)malloc(sizeof(PQUE)); p_que[i]->h_entry = (PAIR **)malloc(sizeof(PAIR *) * primes[INIT_HASH_NUM]); for (j = 0; j < primes[INIT_HASH_NUM]; j++) { p_que[i]->h_entry[j] = NULL; } p_que[i]->p_head = (PAIR **)malloc(sizeof(PAIR *) * p_max); for (j = 0; j < p_max; j++) { p_que[i]->p_head[j] = NULL; } p_que[i]->h_num = INIT_HASH_NUM; p_que[i]->mp_pos = 0; p_que[i]->p_max = p_max; p_que[i]->num_pairs = 0; } } crds = (CRDS *)malloc(sizeof(CRDS)); crds->txt_len = size_w; crds->char_size = char_size; crds->char_table = char_table; crds->mchar_size = mchar_size; crds->mchar_table = mchar_table; crds->cont_len = cont_len; crds->num_contexts = num_contexts; crds->seq = seq; crds->p_que = p_que; { uint i; PCODE P; CODE A, B; PAIR *pair; for (i = 0; i < size_w - 1; i++) { P = seq[i].pcode; A = seq[i].code; B = seq[i+1].code; if ((pair = locatePair(crds, P, A, B)) == NULL) { pair = createPair(crds, P, A, B, i); } else { seq[i].prev = pair->b_pos; seq[i].next = DUMMY_POS; seq[pair->b_pos].next = i; pair->b_pos = i; incrementPair(crds, pair); } } for (i = 0; i < num_contexts; i++) { deletePQ(crds, 1, i); } } return crds; }