示例#1
0
value camlzip_deflate(value vzs, value srcbuf, value srcpos, value srclen,
                      value dstbuf, value dstpos, value dstlen,
                      value vflush)
{
  z_stream * zs = ZStream_val(vzs);
  int retcode;
  long used_in, used_out;
  value res;

  zs->next_in = &Byte_u(srcbuf, Long_val(srcpos));
  zs->avail_in = Long_val(srclen);
  zs->next_out = &Byte_u(dstbuf, Long_val(dstpos));
  zs->avail_out = Long_val(dstlen);
  retcode = deflate(zs, camlzip_flush_table[Int_val(vflush)]);
  if (retcode < 0) camlzip_error("Zlib.deflate", vzs);
  used_in = Long_val(srclen) - zs->avail_in;
  used_out = Long_val(dstlen) - zs->avail_out;
  zs->next_in = NULL;         /* not required, but cleaner */
  zs->next_out = NULL;        /* (avoid dangling pointers into Caml heap) */
  res = caml_alloc_small(3, 0);
  Field(res, 0) = Val_bool(retcode == Z_STREAM_END);
  Field(res, 1) = Val_int(used_in);
  Field(res, 2) = Val_int(used_out);
  return res;
}
示例#2
0
CAMLprim value re_search_backward(value re, value str, value startpos)
{
  unsigned char * starttxt = &Byte_u(str, 0);
  unsigned char * txt = &Byte_u(str, Long_val(startpos));
  unsigned char * endtxt = &Byte_u(str, caml_string_length(str));
  unsigned char * startchars;

  if (txt < starttxt || txt > endtxt)
    caml_invalid_argument("Str.search_backward");
  if (Startchars(re) == -1) {
    do {
      if (re_match(re, starttxt, txt, endtxt, 0))
        return re_alloc_groups(re, str);
      txt--;
    } while (txt >= starttxt);
    return Atom(0);
  } else {
    startchars =
      (unsigned char *) String_val(Field(Cpool(re), Startchars(re)));
    do {
      while (txt > starttxt && startchars[*txt] == 0) txt--;
      if (re_match(re, starttxt, txt, endtxt, 0))
        return re_alloc_groups(re, str);
      txt--;
    } while (txt >= starttxt);
    return Atom(0);
  }
}
示例#3
0
CAMLprim value re_partial_match(value re, value str, value pos)
{
  unsigned char * starttxt = &Byte_u(str, 0);
  unsigned char * txt = &Byte_u(str, Long_val(pos));
  unsigned char * endtxt = &Byte_u(str, caml_string_length(str));

  if (txt < starttxt || txt > endtxt)
    caml_invalid_argument("Str.string_partial_match");
  if (re_match(re, starttxt, txt, endtxt, 1)) {
    return re_alloc_groups(re, str);
  } else {
    return Atom(0);
  }
}
示例#4
0
CAMLprim value caml_string_set(value str, value index, value newval)
{
  intnat idx = Long_val(index);
  if (idx < 0 || idx >= caml_string_length(str)) caml_array_bound_error();
  Byte_u(str, idx) = Int_val(newval);
  return Val_unit;
}
示例#5
0
/* Contrary to caml_md5_chan, this function releases the runtime lock.

   [fd] must be a file descriptor open for reading and not be
   nonblocking, otherwise the function might fail non-deterministically.
 */
CAMLprim value caml_md5_fd(value fd)
{
  CAMLparam1 (fd);
  value res;
  struct MD5Context ctx;
  caml_enter_blocking_section();
  {
    intnat bytes_read;
    char buffer[4096];

    caml_MD5Init(&ctx);
    while (1){
      bytes_read = read (Int_val(fd), buffer, sizeof(buffer));
      if (bytes_read < 0) {
        if (errno == EINTR) continue;
        caml_leave_blocking_section();
        uerror("caml_md5_fd", Nothing);
      }
      if (bytes_read == 0) break;
      caml_MD5Update (&ctx, (unsigned char *) buffer, bytes_read);
    }
  }
  caml_leave_blocking_section();
  res = caml_alloc_string(16);
  caml_MD5Final(&Byte_u(res, 0), &ctx);
  CAMLreturn (res);
}
示例#6
0
CAMLprim value caml_lex_engine(struct lexing_table *tbl, value start_state,
                               struct lexer_buffer *lexbuf)
{
  int state, base, backtrk, c;

  state = Int_val(start_state);
  if (state >= 0) {
    /* First entry */
    lexbuf->lex_last_pos = lexbuf->lex_start_pos = lexbuf->lex_curr_pos;
    lexbuf->lex_last_action = Val_int(-1);
  } else {
    /* Reentry after refill */
    state = -state - 1;
  }
  while(1) {
    /* Lookup base address or action number for current state */
    base = Short(tbl->lex_base, state);
    if (base < 0) return Val_int(-base-1);
    /* See if it's a backtrack point */
    backtrk = Short(tbl->lex_backtrk, state);
    if (backtrk >= 0) {
      lexbuf->lex_last_pos = lexbuf->lex_curr_pos;
      lexbuf->lex_last_action = Val_int(backtrk);
    }
    /* See if we need a refill */
    if (lexbuf->lex_curr_pos >= lexbuf->lex_buffer_len){
      if (lexbuf->lex_eof_reached == Val_bool (0)){
        return Val_int(-state - 1);
      }else{
        c = 256;
      }
    }else{
      /* Read next input char */
      c = Byte_u(lexbuf->lex_buffer, Long_val(lexbuf->lex_curr_pos));
      lexbuf->lex_curr_pos += 2;
    }
    /* Determine next state */
    if (Short(tbl->lex_check, base + c) == state)
      state = Short(tbl->lex_trans, base + c);
    else
      state = Short(tbl->lex_default, state);
    /* If no transition on this char, return to last backtrack point */
    if (state < 0) {
      lexbuf->lex_curr_pos = lexbuf->lex_last_pos;
      if (lexbuf->lex_last_action == Val_int(-1)) {
        caml_failwith("lexing: empty token");
      } else {
        return lexbuf->lex_last_action;
      }
    }else{
      /* Erase the EOF condition only if the EOF pseudo-character was
         consumed by the automaton (i.e. there was no backtrack above)
       */
      if (c == 256) lexbuf->lex_eof_reached = Val_bool (0);
    }
  }
}
示例#7
0
CAMLprim value caml_sha256_final(value ctx)
{
  CAMLparam1(ctx);
  CAMLlocal1(res);

  res = alloc_string(32);
  SHA256_finish(Context_val(ctx), &Byte_u(res, 0));
  CAMLreturn(res);
}
示例#8
0
CAMLprim value caml_md5_final(value ctx)
{
  CAMLparam1(ctx);
  CAMLlocal1(res);

  res = alloc_string(16);
  caml_MD5Final(&Byte_u(res, 0), Context_val(ctx));
  CAMLreturn(res);
}
示例#9
0
void print_string(value v)
{
    char *s;
    int i, size;

    s = (char *) v;
    size = string_length(v);

    printf("\"");
    for (i=0; i<size; i++)
    {
        unsigned char p = Byte_u(s, i);
        if ((p > 31) && (p < 128))
            printf("%c", s[i]);
        else
            printf("%u", p);
    }
    printf("\"");
    return;
}
示例#10
0
CAMLprim value digest_array (value v_iarr)
{
	CAMLparam1(v_iarr);
	CAMLlocal1(result);
	MD5Context context;
	int len = Bigarray_val(v_iarr)->dim[0];
	unsigned char *buf = Data_bigarray_val(v_iarr);

	MD5Init (&context);
	while (len > 0) {
		int block = (len > 8192) ? 8192 : len;
		MD5Update (&context, buf, block);
		buf += block;
		len -= block;
	}

	result = alloc_string (16);
	MD5Final (&Byte_u(result, 0), &context);
	CAMLreturn(result);
}
示例#11
0
value get_next_char(struct lexer_buffer *lexbuf)
{
	mlsize_t buffer_len, curr_pos;

	buffer_len = string_length(lexbuf->lex_buffer);
	assert(VAL_TO_LONG(lexbuf->lex_curr_pos) > 0);
	curr_pos = (mlsize_t) (VAL_TO_LONG(lexbuf->lex_curr_pos));
	if (curr_pos >= buffer_len) {
		PUSH_ROOTS(r, 1);
		r[0] = (value) lexbuf;
		callback(lexbuf->refill_buff, (value) lexbuf);
		lexbuf = (struct lexer_buffer *) r[0];
		assert(VAL_TO_LONG(lexbuf->lex_curr_pos) > 0);
		curr_pos = (mlsize_t) (VAL_TO_LONG(lexbuf->lex_curr_pos));
		POP_ROOTS();
	}
	lexbuf->lex_curr_pos += 2;

	return INT_TO_VAL(Byte_u(lexbuf->lex_buffer, curr_pos));
}
示例#12
0
/* The bytecode interpreter for the NFA */
static int re_match(value re,
                    unsigned char * starttxt,
                    register unsigned char * txt,
                    register unsigned char * endtxt,
                    int accept_partial_match)
{
  register value * pc;
  intnat instr;
  struct backtrack_stack * stack;
  union backtrack_point * sp;
  value cpool;
  value normtable;
  unsigned char c;
  union backtrack_point back;

  { int i;
    struct re_group * p;
    unsigned char ** q;
    for (p = &re_group[1], i = Numgroups(re); i > 1; i--, p++)
      p->start = p->end = NULL;
    for (q = &re_register[0], i = Numregisters(re); i > 0; i--, q++)
      *q = NULL;
  }

  pc = &Field(Prog(re), 0);
  stack = &initial_stack;
  sp = stack->point;
  cpool = Cpool(re);
  normtable = Normtable(re);
  re_group[0].start = txt;

  while (1) {
    instr = Long_val(*pc++);
    switch (Opcode(instr)) {
    case CHAR:
      if (txt == endtxt) goto prefix_match;
      if (*txt != Arg(instr)) goto backtrack;
      txt++;
      break;
    case CHARNORM:
      if (txt == endtxt) goto prefix_match;
      if (Byte_u(normtable, *txt) != Arg(instr)) goto backtrack;
      txt++;
      break;
    case STRING: {
      unsigned char * s =
        (unsigned char *) String_val(Field(cpool, Arg(instr)));
      while ((c = *s++) != 0) {
        if (txt == endtxt) goto prefix_match;
        if (c != *txt) goto backtrack;
        txt++;
      }
      break;
    }
    case STRINGNORM: {
      unsigned char * s =
        (unsigned char *) String_val(Field(cpool, Arg(instr)));
      while ((c = *s++) != 0) {
        if (txt == endtxt) goto prefix_match;
        if (c != Byte_u(normtable, *txt)) goto backtrack;
        txt++;
      }
      break;
    }
    case CHARCLASS:
      if (txt == endtxt) goto prefix_match;
      if (! In_bitset(String_val(Field(cpool, Arg(instr))), *txt, c))
        goto backtrack;
      txt++;
      break;
    case BOL:
      if (txt > starttxt && txt[-1] != '\n') goto backtrack;
      break;
    case EOL:
      if (txt < endtxt && *txt != '\n') goto backtrack;
      break;
    case WORDBOUNDARY:
      /* At beginning and end of text: no
         At beginning of text: OK if current char is a letter
         At end of text: OK if previous char is a letter
         Otherwise:
           OK if previous char is a letter and current char not a letter
           or previous char is not a letter and current char is a letter */
      if (txt == starttxt) {
        if (txt == endtxt) goto prefix_match;
        if (Is_word_letter(txt[0])) break;
        goto backtrack;
      } else if (txt == endtxt) {
        if (Is_word_letter(txt[-1])) break;
        goto backtrack;
      } else {
        if (Is_word_letter(txt[-1]) != Is_word_letter(txt[0])) break;
        goto backtrack;
      }
    case BEGGROUP: {
      int group_no = Arg(instr);
      struct re_group * group = &(re_group[group_no]);
      back.undo.loc = &(group->start);
      back.undo.val = group->start;
      group->start = txt;
      goto push;
    }
    case ENDGROUP: {
      int group_no = Arg(instr);
      struct re_group * group = &(re_group[group_no]);
      back.undo.loc = &(group->end);
      back.undo.val = group->end;
      group->end = txt;
      goto push;
    }
    case REFGROUP: {
      int group_no = Arg(instr);
      struct re_group * group = &(re_group[group_no]);
      unsigned char * s;
      if (group->start == NULL || group->end == NULL) goto backtrack;
      for (s = group->start; s < group->end; s++) {
        if (txt == endtxt) goto prefix_match;
        if (*s != *txt) goto backtrack;
        txt++;
      }
      break;
    }
    case ACCEPT:
      goto accept;
    case SIMPLEOPT: {
      char * set = String_val(Field(cpool, Arg(instr)));
      if (txt < endtxt && In_bitset(set, *txt, c)) txt++;
      break;
    }
    case SIMPLESTAR: {
      char * set = String_val(Field(cpool, Arg(instr)));
      while (txt < endtxt && In_bitset(set, *txt, c))
        txt++;
      break;
    }
    case SIMPLEPLUS: {
      char * set = String_val(Field(cpool, Arg(instr)));
      if (txt == endtxt) goto prefix_match;
      if (! In_bitset(set, *txt, c)) goto backtrack;
      txt++;
      while (txt < endtxt && In_bitset(set, *txt, c))
        txt++;
      break;
    }
    case GOTO:
      pc = pc + SignedArg(instr);
      break;
    case PUSHBACK:
      back.pos.pc = Set_tag(pc + SignedArg(instr));
      back.pos.txt = txt;
      goto push;
    case SETMARK: {
      int reg_no = Arg(instr);
      unsigned char ** reg = &(re_register[reg_no]);
      back.undo.loc = reg;
      back.undo.val = *reg;
      *reg = txt;
      goto push;
    }
    case CHECKPROGRESS: {
      int reg_no = Arg(instr);
      if (re_register[reg_no] == txt)
        goto backtrack;
      break;
    }
    default:
      caml_fatal_error ("impossible case in re_match");
    }
    /* Continue with next instruction */
    continue;

  push:
    /* Push an item on the backtrack stack and continue with next instr */
    if (sp == stack->point + BACKTRACK_STACK_BLOCK_SIZE) {
      struct backtrack_stack * newstack =
        caml_stat_alloc(sizeof(struct backtrack_stack));
      newstack->previous = stack;
      stack = newstack;
      sp = stack->point;
    }
    *sp = back;
    sp++;
    continue;

  prefix_match:
    /* We get here when matching failed because the end of text
       was encountered. */
    if (accept_partial_match) goto accept;

  backtrack:
    /* We get here when matching fails.  Backtrack to most recent saved
       program point, undoing variable assignments on the way. */
    while (1) {
      if (sp == stack->point) {
        struct backtrack_stack * prevstack = stack->previous;
        if (prevstack == NULL) return 0;
        caml_stat_free(stack);
        stack = prevstack;
        sp = stack->point + BACKTRACK_STACK_BLOCK_SIZE;
      }
      sp--;
      if (Tag_is_set(sp->pos.pc)) {
        pc = Clear_tag(sp->pos.pc);
        txt = sp->pos.txt;
        break;
      } else {
        *(sp->undo.loc) = sp->undo.val;
      }
    }
    continue;
  }

 accept:
  /* We get here when the regexp was successfully matched */
  free_backtrack_stack(stack);
  re_group[0].end = txt;
  return 1;
}
示例#13
0
value camlzip_update_crc32(value crc, value buf, value pos, value len)
{
  return caml_copy_int32(crc32((uint32) Int32_val(crc), 
                          &Byte_u(buf, Long_val(pos)),
                          Long_val(len)));
}
示例#14
0
static void hash_aux(value obj)
{
  unsigned char * p;
  mlsize_t i;
  tag_t tag;

  hash_univ_limit--;
  if (hash_univ_count < 0 || hash_univ_limit < 0) {
	  if (safe) {
		  fatal_error("hash: count limit exceeded\n");
	  } else {
		  return;
	  }
  }

  if (IS_LONG(obj)) {
	  hash_univ_count--;
	  Combine(VAL_TO_LONG(obj));
	  return;
  }

  /* Atoms are not in the heap, but it's better to hash their tag
     than to do nothing. */

  if (Is_atom(obj)) {
    tag = Tag_val(obj);
    hash_univ_count--;
    Combine_small(tag);
    return;
  }

  /* Pointers into the heap are well-structured blocks.
     We can inspect the block contents. */

  if (Is_in_heap(obj) || Is_young(obj)) {
    tag = Tag_val(obj);
    switch (tag) {
    case String_tag:
      hash_univ_count--;
      {
	      mlsize_t len = string_length(obj);
	      i = len <= 128 ? len : 128;
	      // Hash on 128 first characters
	      for (p = &Byte_u(obj, 0); i > 0; i--, p++) {
		      Combine_small(*p);
	      }
	      // Hash on logarithmically many additional characters beyond 128
	      for (i = 1; i+127 < len; i *= 2) {
		      Combine_small(Byte_u(obj, 127+i));
	      }
	      break;
      }
    case Double_tag:
      /* For doubles, we inspect their binary representation, LSB first.
         The results are consistent among all platforms with IEEE floats. */
      hash_univ_count--;
#ifdef WORDS_BIGENDIAN
      for (p = &Byte_u(obj, sizeof(double) - 1), i = sizeof(double);
           i > 0;
           p--, i--)
#else
      for (p = &Byte_u(obj, 0), i = sizeof(double);
           i > 0;
           p++, i--)
#endif
        Combine_small(*p);
      break;
    case Abstract_tag:
    case Final_tag:
      /* We don't know anything about the contents of the block.
         Better do nothing. */
      break;
    case Reference_tag:
      /* We can't hash on the heap address itself, since the reference block
       * may be moved (from the young generation to the old one).
       * But, we may follow the pointer.  On cyclic structures this will
       * terminate because the hash_univ_count gets decremented.
       */

      /* Poor idea to hash on the pointed-to structure, even so: it may change,
       * and hence the hash value of the value changes, although the ref
       * doesn't.
       *
       * This breaks most hash table implementations.  sestoft 2000-02-20.
       */

	    if (safe) {
		    fatal_error("hash: ref encountered\n");
	    }
	    Combine_small(tag);
	    hash_univ_count--;
      break;
    default:
      hash_univ_count--;
      Combine_small(tag);
      i = Wosize_val(obj);
      while (i != 0) {
        i--;
        hash_aux(Field(obj, i));
      }
      break;
    }
    return;
  }

  /* Otherwise, obj is a pointer outside the heap, to an object with
     a priori unknown structure. Use its physical address as hash key. */
  Combine((long) obj);
}
示例#15
0
value caml_bgzf_input(value bgzf, value buf, value ofs, value len) {
	CAMLparam4(bgzf,buf,ofs,len);
	CAMLreturn(Val_long(bgzf_read(BGZF_val(bgzf),&Byte_u(buf,Long_val(ofs)),Int_val(len))));
}
示例#16
0
CAMLprim value caml_md5_update(value ctx, value src, value ofs, value len)
{
  caml_MD5Update(Context_val(ctx), &Byte_u(src, Long_val(ofs)), Long_val(len));
  return Val_unit;
}
示例#17
0
CAMLprim value caml_sha256_update(value ctx, value src, value ofs, value len)
{
  SHA256_add_data(Context_val(ctx), &Byte_u(src, Long_val(ofs)), Long_val(len));
  return Val_unit;
}
示例#18
0
文件: hash.c 项目: OpenXT/ocaml
static void hash_aux(value obj)
{
  unsigned char * p;
  mlsize_t i, j;
  tag_t tag;

  hash_univ_limit--;
  if (hash_univ_count < 0 || hash_univ_limit < 0) return;

 again:
  if (Is_long(obj)) {
    hash_univ_count--;
    Combine(Long_val(obj));
    return;
  }

  /* Pointers into the heap are well-structured blocks. So are atoms.
     We can inspect the block contents. */

  Assert (Is_block (obj));  
  if (Is_in_value_area(obj)) {
    tag = Tag_val(obj);
    switch (tag) {
    case String_tag:
      hash_univ_count--;
      i = caml_string_length(obj);
      for (p = &Byte_u(obj, 0); i > 0; i--, p++)
        Combine_small(*p);
      break;
    case Double_tag:
      /* For doubles, we inspect their binary representation, LSB first.
         The results are consistent among all platforms with IEEE floats. */
      hash_univ_count--;
#ifdef ARCH_BIG_ENDIAN
      for (p = &Byte_u(obj, sizeof(double) - 1), i = sizeof(double);
           i > 0;
           p--, i--)
#else
      for (p = &Byte_u(obj, 0), i = sizeof(double);
           i > 0;
           p++, i--)
#endif
        Combine_small(*p);
      break;
    case Double_array_tag:
      hash_univ_count--;
      for (j = 0; j < Bosize_val(obj); j += sizeof(double)) {
#ifdef ARCH_BIG_ENDIAN
      for (p = &Byte_u(obj, j + sizeof(double) - 1), i = sizeof(double);
           i > 0;
           p--, i--)
#else
      for (p = &Byte_u(obj, j), i = sizeof(double);
           i > 0;
           p++, i--)
#endif
        Combine_small(*p);
      }
      break;
    case Abstract_tag:
      /* We don't know anything about the contents of the block.
         Better do nothing. */
      break;
    case Infix_tag:
      hash_aux(obj - Infix_offset_val(obj));
      break;
    case Forward_tag:
      obj = Forward_val (obj);
      goto again;
    case Object_tag:
      hash_univ_count--;
      Combine(Oid_val(obj));
      break;
    case Custom_tag:
      /* If no hashing function provided, do nothing */
      if (Custom_ops_val(obj)->hash != NULL) {
        hash_univ_count--;
        Combine(Custom_ops_val(obj)->hash(obj));
      }
      break;
    default:
      hash_univ_count--;
      Combine_small(tag);
      i = Wosize_val(obj);
      while (i != 0) {
        i--;
        hash_aux(Field(obj, i));
      }
      break;
    }
    return;
  }

  /* Otherwise, obj is a pointer outside the heap, to an object with
     a priori unknown structure. Use its physical address as hash key. */
  Combine((intnat) obj);
}
示例#19
0
CAMLprim value caml_string_get(value str, value index)
{
  intnat idx = Long_val(index);
  if (idx < 0 || idx >= caml_string_length(str)) caml_array_bound_error();
  return Val_int(Byte_u(str, idx));
}