value camlzip_deflate(value vzs, value srcbuf, value srcpos, value srclen, value dstbuf, value dstpos, value dstlen, value vflush) { z_stream * zs = ZStream_val(vzs); int retcode; long used_in, used_out; value res; zs->next_in = &Byte_u(srcbuf, Long_val(srcpos)); zs->avail_in = Long_val(srclen); zs->next_out = &Byte_u(dstbuf, Long_val(dstpos)); zs->avail_out = Long_val(dstlen); retcode = deflate(zs, camlzip_flush_table[Int_val(vflush)]); if (retcode < 0) camlzip_error("Zlib.deflate", vzs); used_in = Long_val(srclen) - zs->avail_in; used_out = Long_val(dstlen) - zs->avail_out; zs->next_in = NULL; /* not required, but cleaner */ zs->next_out = NULL; /* (avoid dangling pointers into Caml heap) */ res = caml_alloc_small(3, 0); Field(res, 0) = Val_bool(retcode == Z_STREAM_END); Field(res, 1) = Val_int(used_in); Field(res, 2) = Val_int(used_out); return res; }
CAMLprim value re_search_backward(value re, value str, value startpos) { unsigned char * starttxt = &Byte_u(str, 0); unsigned char * txt = &Byte_u(str, Long_val(startpos)); unsigned char * endtxt = &Byte_u(str, caml_string_length(str)); unsigned char * startchars; if (txt < starttxt || txt > endtxt) caml_invalid_argument("Str.search_backward"); if (Startchars(re) == -1) { do { if (re_match(re, starttxt, txt, endtxt, 0)) return re_alloc_groups(re, str); txt--; } while (txt >= starttxt); return Atom(0); } else { startchars = (unsigned char *) String_val(Field(Cpool(re), Startchars(re))); do { while (txt > starttxt && startchars[*txt] == 0) txt--; if (re_match(re, starttxt, txt, endtxt, 0)) return re_alloc_groups(re, str); txt--; } while (txt >= starttxt); return Atom(0); } }
CAMLprim value re_partial_match(value re, value str, value pos) { unsigned char * starttxt = &Byte_u(str, 0); unsigned char * txt = &Byte_u(str, Long_val(pos)); unsigned char * endtxt = &Byte_u(str, caml_string_length(str)); if (txt < starttxt || txt > endtxt) caml_invalid_argument("Str.string_partial_match"); if (re_match(re, starttxt, txt, endtxt, 1)) { return re_alloc_groups(re, str); } else { return Atom(0); } }
CAMLprim value caml_string_set(value str, value index, value newval) { intnat idx = Long_val(index); if (idx < 0 || idx >= caml_string_length(str)) caml_array_bound_error(); Byte_u(str, idx) = Int_val(newval); return Val_unit; }
/* Contrary to caml_md5_chan, this function releases the runtime lock. [fd] must be a file descriptor open for reading and not be nonblocking, otherwise the function might fail non-deterministically. */ CAMLprim value caml_md5_fd(value fd) { CAMLparam1 (fd); value res; struct MD5Context ctx; caml_enter_blocking_section(); { intnat bytes_read; char buffer[4096]; caml_MD5Init(&ctx); while (1){ bytes_read = read (Int_val(fd), buffer, sizeof(buffer)); if (bytes_read < 0) { if (errno == EINTR) continue; caml_leave_blocking_section(); uerror("caml_md5_fd", Nothing); } if (bytes_read == 0) break; caml_MD5Update (&ctx, (unsigned char *) buffer, bytes_read); } } caml_leave_blocking_section(); res = caml_alloc_string(16); caml_MD5Final(&Byte_u(res, 0), &ctx); CAMLreturn (res); }
CAMLprim value caml_lex_engine(struct lexing_table *tbl, value start_state, struct lexer_buffer *lexbuf) { int state, base, backtrk, c; state = Int_val(start_state); if (state >= 0) { /* First entry */ lexbuf->lex_last_pos = lexbuf->lex_start_pos = lexbuf->lex_curr_pos; lexbuf->lex_last_action = Val_int(-1); } else { /* Reentry after refill */ state = -state - 1; } while(1) { /* Lookup base address or action number for current state */ base = Short(tbl->lex_base, state); if (base < 0) return Val_int(-base-1); /* See if it's a backtrack point */ backtrk = Short(tbl->lex_backtrk, state); if (backtrk >= 0) { lexbuf->lex_last_pos = lexbuf->lex_curr_pos; lexbuf->lex_last_action = Val_int(backtrk); } /* See if we need a refill */ if (lexbuf->lex_curr_pos >= lexbuf->lex_buffer_len){ if (lexbuf->lex_eof_reached == Val_bool (0)){ return Val_int(-state - 1); }else{ c = 256; } }else{ /* Read next input char */ c = Byte_u(lexbuf->lex_buffer, Long_val(lexbuf->lex_curr_pos)); lexbuf->lex_curr_pos += 2; } /* Determine next state */ if (Short(tbl->lex_check, base + c) == state) state = Short(tbl->lex_trans, base + c); else state = Short(tbl->lex_default, state); /* If no transition on this char, return to last backtrack point */ if (state < 0) { lexbuf->lex_curr_pos = lexbuf->lex_last_pos; if (lexbuf->lex_last_action == Val_int(-1)) { caml_failwith("lexing: empty token"); } else { return lexbuf->lex_last_action; } }else{ /* Erase the EOF condition only if the EOF pseudo-character was consumed by the automaton (i.e. there was no backtrack above) */ if (c == 256) lexbuf->lex_eof_reached = Val_bool (0); } } }
CAMLprim value caml_sha256_final(value ctx) { CAMLparam1(ctx); CAMLlocal1(res); res = alloc_string(32); SHA256_finish(Context_val(ctx), &Byte_u(res, 0)); CAMLreturn(res); }
CAMLprim value caml_md5_final(value ctx) { CAMLparam1(ctx); CAMLlocal1(res); res = alloc_string(16); caml_MD5Final(&Byte_u(res, 0), Context_val(ctx)); CAMLreturn(res); }
void print_string(value v) { char *s; int i, size; s = (char *) v; size = string_length(v); printf("\""); for (i=0; i<size; i++) { unsigned char p = Byte_u(s, i); if ((p > 31) && (p < 128)) printf("%c", s[i]); else printf("%u", p); } printf("\""); return; }
CAMLprim value digest_array (value v_iarr) { CAMLparam1(v_iarr); CAMLlocal1(result); MD5Context context; int len = Bigarray_val(v_iarr)->dim[0]; unsigned char *buf = Data_bigarray_val(v_iarr); MD5Init (&context); while (len > 0) { int block = (len > 8192) ? 8192 : len; MD5Update (&context, buf, block); buf += block; len -= block; } result = alloc_string (16); MD5Final (&Byte_u(result, 0), &context); CAMLreturn(result); }
value get_next_char(struct lexer_buffer *lexbuf) { mlsize_t buffer_len, curr_pos; buffer_len = string_length(lexbuf->lex_buffer); assert(VAL_TO_LONG(lexbuf->lex_curr_pos) > 0); curr_pos = (mlsize_t) (VAL_TO_LONG(lexbuf->lex_curr_pos)); if (curr_pos >= buffer_len) { PUSH_ROOTS(r, 1); r[0] = (value) lexbuf; callback(lexbuf->refill_buff, (value) lexbuf); lexbuf = (struct lexer_buffer *) r[0]; assert(VAL_TO_LONG(lexbuf->lex_curr_pos) > 0); curr_pos = (mlsize_t) (VAL_TO_LONG(lexbuf->lex_curr_pos)); POP_ROOTS(); } lexbuf->lex_curr_pos += 2; return INT_TO_VAL(Byte_u(lexbuf->lex_buffer, curr_pos)); }
/* The bytecode interpreter for the NFA */ static int re_match(value re, unsigned char * starttxt, register unsigned char * txt, register unsigned char * endtxt, int accept_partial_match) { register value * pc; intnat instr; struct backtrack_stack * stack; union backtrack_point * sp; value cpool; value normtable; unsigned char c; union backtrack_point back; { int i; struct re_group * p; unsigned char ** q; for (p = &re_group[1], i = Numgroups(re); i > 1; i--, p++) p->start = p->end = NULL; for (q = &re_register[0], i = Numregisters(re); i > 0; i--, q++) *q = NULL; } pc = &Field(Prog(re), 0); stack = &initial_stack; sp = stack->point; cpool = Cpool(re); normtable = Normtable(re); re_group[0].start = txt; while (1) { instr = Long_val(*pc++); switch (Opcode(instr)) { case CHAR: if (txt == endtxt) goto prefix_match; if (*txt != Arg(instr)) goto backtrack; txt++; break; case CHARNORM: if (txt == endtxt) goto prefix_match; if (Byte_u(normtable, *txt) != Arg(instr)) goto backtrack; txt++; break; case STRING: { unsigned char * s = (unsigned char *) String_val(Field(cpool, Arg(instr))); while ((c = *s++) != 0) { if (txt == endtxt) goto prefix_match; if (c != *txt) goto backtrack; txt++; } break; } case STRINGNORM: { unsigned char * s = (unsigned char *) String_val(Field(cpool, Arg(instr))); while ((c = *s++) != 0) { if (txt == endtxt) goto prefix_match; if (c != Byte_u(normtable, *txt)) goto backtrack; txt++; } break; } case CHARCLASS: if (txt == endtxt) goto prefix_match; if (! In_bitset(String_val(Field(cpool, Arg(instr))), *txt, c)) goto backtrack; txt++; break; case BOL: if (txt > starttxt && txt[-1] != '\n') goto backtrack; break; case EOL: if (txt < endtxt && *txt != '\n') goto backtrack; break; case WORDBOUNDARY: /* At beginning and end of text: no At beginning of text: OK if current char is a letter At end of text: OK if previous char is a letter Otherwise: OK if previous char is a letter and current char not a letter or previous char is not a letter and current char is a letter */ if (txt == starttxt) { if (txt == endtxt) goto prefix_match; if (Is_word_letter(txt[0])) break; goto backtrack; } else if (txt == endtxt) { if (Is_word_letter(txt[-1])) break; goto backtrack; } else { if (Is_word_letter(txt[-1]) != Is_word_letter(txt[0])) break; goto backtrack; } case BEGGROUP: { int group_no = Arg(instr); struct re_group * group = &(re_group[group_no]); back.undo.loc = &(group->start); back.undo.val = group->start; group->start = txt; goto push; } case ENDGROUP: { int group_no = Arg(instr); struct re_group * group = &(re_group[group_no]); back.undo.loc = &(group->end); back.undo.val = group->end; group->end = txt; goto push; } case REFGROUP: { int group_no = Arg(instr); struct re_group * group = &(re_group[group_no]); unsigned char * s; if (group->start == NULL || group->end == NULL) goto backtrack; for (s = group->start; s < group->end; s++) { if (txt == endtxt) goto prefix_match; if (*s != *txt) goto backtrack; txt++; } break; } case ACCEPT: goto accept; case SIMPLEOPT: { char * set = String_val(Field(cpool, Arg(instr))); if (txt < endtxt && In_bitset(set, *txt, c)) txt++; break; } case SIMPLESTAR: { char * set = String_val(Field(cpool, Arg(instr))); while (txt < endtxt && In_bitset(set, *txt, c)) txt++; break; } case SIMPLEPLUS: { char * set = String_val(Field(cpool, Arg(instr))); if (txt == endtxt) goto prefix_match; if (! In_bitset(set, *txt, c)) goto backtrack; txt++; while (txt < endtxt && In_bitset(set, *txt, c)) txt++; break; } case GOTO: pc = pc + SignedArg(instr); break; case PUSHBACK: back.pos.pc = Set_tag(pc + SignedArg(instr)); back.pos.txt = txt; goto push; case SETMARK: { int reg_no = Arg(instr); unsigned char ** reg = &(re_register[reg_no]); back.undo.loc = reg; back.undo.val = *reg; *reg = txt; goto push; } case CHECKPROGRESS: { int reg_no = Arg(instr); if (re_register[reg_no] == txt) goto backtrack; break; } default: caml_fatal_error ("impossible case in re_match"); } /* Continue with next instruction */ continue; push: /* Push an item on the backtrack stack and continue with next instr */ if (sp == stack->point + BACKTRACK_STACK_BLOCK_SIZE) { struct backtrack_stack * newstack = caml_stat_alloc(sizeof(struct backtrack_stack)); newstack->previous = stack; stack = newstack; sp = stack->point; } *sp = back; sp++; continue; prefix_match: /* We get here when matching failed because the end of text was encountered. */ if (accept_partial_match) goto accept; backtrack: /* We get here when matching fails. Backtrack to most recent saved program point, undoing variable assignments on the way. */ while (1) { if (sp == stack->point) { struct backtrack_stack * prevstack = stack->previous; if (prevstack == NULL) return 0; caml_stat_free(stack); stack = prevstack; sp = stack->point + BACKTRACK_STACK_BLOCK_SIZE; } sp--; if (Tag_is_set(sp->pos.pc)) { pc = Clear_tag(sp->pos.pc); txt = sp->pos.txt; break; } else { *(sp->undo.loc) = sp->undo.val; } } continue; } accept: /* We get here when the regexp was successfully matched */ free_backtrack_stack(stack); re_group[0].end = txt; return 1; }
value camlzip_update_crc32(value crc, value buf, value pos, value len) { return caml_copy_int32(crc32((uint32) Int32_val(crc), &Byte_u(buf, Long_val(pos)), Long_val(len))); }
static void hash_aux(value obj) { unsigned char * p; mlsize_t i; tag_t tag; hash_univ_limit--; if (hash_univ_count < 0 || hash_univ_limit < 0) { if (safe) { fatal_error("hash: count limit exceeded\n"); } else { return; } } if (IS_LONG(obj)) { hash_univ_count--; Combine(VAL_TO_LONG(obj)); return; } /* Atoms are not in the heap, but it's better to hash their tag than to do nothing. */ if (Is_atom(obj)) { tag = Tag_val(obj); hash_univ_count--; Combine_small(tag); return; } /* Pointers into the heap are well-structured blocks. We can inspect the block contents. */ if (Is_in_heap(obj) || Is_young(obj)) { tag = Tag_val(obj); switch (tag) { case String_tag: hash_univ_count--; { mlsize_t len = string_length(obj); i = len <= 128 ? len : 128; // Hash on 128 first characters for (p = &Byte_u(obj, 0); i > 0; i--, p++) { Combine_small(*p); } // Hash on logarithmically many additional characters beyond 128 for (i = 1; i+127 < len; i *= 2) { Combine_small(Byte_u(obj, 127+i)); } break; } case Double_tag: /* For doubles, we inspect their binary representation, LSB first. The results are consistent among all platforms with IEEE floats. */ hash_univ_count--; #ifdef WORDS_BIGENDIAN for (p = &Byte_u(obj, sizeof(double) - 1), i = sizeof(double); i > 0; p--, i--) #else for (p = &Byte_u(obj, 0), i = sizeof(double); i > 0; p++, i--) #endif Combine_small(*p); break; case Abstract_tag: case Final_tag: /* We don't know anything about the contents of the block. Better do nothing. */ break; case Reference_tag: /* We can't hash on the heap address itself, since the reference block * may be moved (from the young generation to the old one). * But, we may follow the pointer. On cyclic structures this will * terminate because the hash_univ_count gets decremented. */ /* Poor idea to hash on the pointed-to structure, even so: it may change, * and hence the hash value of the value changes, although the ref * doesn't. * * This breaks most hash table implementations. sestoft 2000-02-20. */ if (safe) { fatal_error("hash: ref encountered\n"); } Combine_small(tag); hash_univ_count--; break; default: hash_univ_count--; Combine_small(tag); i = Wosize_val(obj); while (i != 0) { i--; hash_aux(Field(obj, i)); } break; } return; } /* Otherwise, obj is a pointer outside the heap, to an object with a priori unknown structure. Use its physical address as hash key. */ Combine((long) obj); }
value caml_bgzf_input(value bgzf, value buf, value ofs, value len) { CAMLparam4(bgzf,buf,ofs,len); CAMLreturn(Val_long(bgzf_read(BGZF_val(bgzf),&Byte_u(buf,Long_val(ofs)),Int_val(len)))); }
CAMLprim value caml_md5_update(value ctx, value src, value ofs, value len) { caml_MD5Update(Context_val(ctx), &Byte_u(src, Long_val(ofs)), Long_val(len)); return Val_unit; }
CAMLprim value caml_sha256_update(value ctx, value src, value ofs, value len) { SHA256_add_data(Context_val(ctx), &Byte_u(src, Long_val(ofs)), Long_val(len)); return Val_unit; }
static void hash_aux(value obj) { unsigned char * p; mlsize_t i, j; tag_t tag; hash_univ_limit--; if (hash_univ_count < 0 || hash_univ_limit < 0) return; again: if (Is_long(obj)) { hash_univ_count--; Combine(Long_val(obj)); return; } /* Pointers into the heap are well-structured blocks. So are atoms. We can inspect the block contents. */ Assert (Is_block (obj)); if (Is_in_value_area(obj)) { tag = Tag_val(obj); switch (tag) { case String_tag: hash_univ_count--; i = caml_string_length(obj); for (p = &Byte_u(obj, 0); i > 0; i--, p++) Combine_small(*p); break; case Double_tag: /* For doubles, we inspect their binary representation, LSB first. The results are consistent among all platforms with IEEE floats. */ hash_univ_count--; #ifdef ARCH_BIG_ENDIAN for (p = &Byte_u(obj, sizeof(double) - 1), i = sizeof(double); i > 0; p--, i--) #else for (p = &Byte_u(obj, 0), i = sizeof(double); i > 0; p++, i--) #endif Combine_small(*p); break; case Double_array_tag: hash_univ_count--; for (j = 0; j < Bosize_val(obj); j += sizeof(double)) { #ifdef ARCH_BIG_ENDIAN for (p = &Byte_u(obj, j + sizeof(double) - 1), i = sizeof(double); i > 0; p--, i--) #else for (p = &Byte_u(obj, j), i = sizeof(double); i > 0; p++, i--) #endif Combine_small(*p); } break; case Abstract_tag: /* We don't know anything about the contents of the block. Better do nothing. */ break; case Infix_tag: hash_aux(obj - Infix_offset_val(obj)); break; case Forward_tag: obj = Forward_val (obj); goto again; case Object_tag: hash_univ_count--; Combine(Oid_val(obj)); break; case Custom_tag: /* If no hashing function provided, do nothing */ if (Custom_ops_val(obj)->hash != NULL) { hash_univ_count--; Combine(Custom_ops_val(obj)->hash(obj)); } break; default: hash_univ_count--; Combine_small(tag); i = Wosize_val(obj); while (i != 0) { i--; hash_aux(Field(obj, i)); } break; } return; } /* Otherwise, obj is a pointer outside the heap, to an object with a priori unknown structure. Use its physical address as hash key. */ Combine((intnat) obj); }
CAMLprim value caml_string_get(value str, value index) { intnat idx = Long_val(index); if (idx < 0 || idx >= caml_string_length(str)) caml_array_bound_error(); return Val_int(Byte_u(str, idx)); }