int ccns_slice_name(struct ccn_charbuf *nm, struct ccns_slice *s) { struct ccn_charbuf *c; struct ccn_digest *digest = NULL; struct ccn_charbuf *hash = NULL; int res = 0; c = ccn_charbuf_create(); if (c == NULL) return (-1); res = append_slice(c, s); if (res < 0) goto Cleanup; digest = ccn_digest_create(CCN_DIGEST_SHA256); hash = ccn_charbuf_create_n(ccn_digest_size(digest)); if (hash == NULL) goto Cleanup; ccn_digest_init(digest); res |= ccn_digest_update(digest, c->buf, c->length); res |= ccn_digest_final(digest, hash->buf, hash->limit); if (res < 0) goto Cleanup; hash->length = hash->limit; if (ccn_name_from_uri(nm, "ccnx:/%C1.M.S.localhost/%C1.S.cs") < 0) res = -1; res |= ccn_name_append(nm, hash->buf, hash->length); Cleanup: ccn_charbuf_destroy(&c); ccn_digest_destroy(&digest); ccn_charbuf_destroy(&hash); return (res); }
static int write_slice(struct ccn *h, struct ccns_slice *slice, struct ccn_charbuf *name) { struct ccn_charbuf *content = NULL; unsigned char *cbuf = NULL; size_t clength = 0; struct ccn_charbuf *sw = NULL; struct ccn_charbuf *templ = NULL; struct ccn_charbuf *cob = NULL; struct ccn_signing_params sparm = CCN_SIGNING_PARAMS_INIT; struct ccn_closure *wc = NULL; int res; sw = ccn_charbuf_create_n(32 + name->length); if (sw == NULL) { res = -1; goto Cleanup; } ccn_charbuf_append_charbuf(sw, name); ccn_name_chop(sw, NULL, -1); // remove segment number ccn_name_from_uri(sw, "%C1.R.sw"); ccn_name_append_nonce(sw); // create and sign the content object cob = ccn_charbuf_create(); if (cob == NULL) { res = -1; goto Cleanup; } if (slice != NULL) { content = ccn_charbuf_create(); if (content == NULL) { res = -1; goto Cleanup; } res = append_slice(content, slice); if (res < 0) goto Cleanup; cbuf = content->buf; clength = content->length; } else { sparm.type = CCN_CONTENT_GONE; } sparm.sp_flags = CCN_SP_FINAL_BLOCK; res = ccn_sign_content(h, cob, name, &sparm, cbuf, clength); if (res < 0) goto Cleanup; // establish handler for interest in the slice content object wc = calloc(1, sizeof(*wc)); if (wc == NULL) { res = -1; goto Cleanup; } wc->p = &write_interest_handler; wc->data = cob; res = ccn_set_interest_filter(h, name, wc); if (res < 0) goto Cleanup; templ = make_scope1_template(); if (templ == NULL) { res = -1; goto Cleanup; } res = ccn_get(h, sw, templ, 1000, NULL, NULL, NULL, 0); if (res < 0) goto Cleanup; ccn_run(h, 1000); // give the repository a chance to fetch the data if (wc->intdata != 1) { res = -1; goto Cleanup; } res = 0; Cleanup: ccn_set_interest_filter(h, name, NULL); if (wc != NULL) free(wc); ccn_charbuf_destroy(&cob); ccn_charbuf_destroy(&content); ccn_charbuf_destroy(&sw); ccn_charbuf_destroy(&templ); return (res); }
static duk_int32_t parse_disjunction(duk_re_compiler_ctx *re_ctx, int expect_eof) { duk_int32_t atom_start_offset = -1; duk_int32_t atom_char_length = 0; /* negative -> complex atom */ duk_int32_t unpatched_disjunction_split = -1; duk_int32_t unpatched_disjunction_jump = -1; duk_uint32_t entry_offset = DUK_BUFLEN(re_ctx); duk_int32_t res = 0; /* -1 if disjunction is complex, char length if simple */ if (re_ctx->recursion_depth >= re_ctx->recursion_limit) { DUK_ERROR(re_ctx->thr, DUK_ERR_INTERNAL_ERROR, "regexp compiler recursion limit reached"); } re_ctx->recursion_depth++; for (;;) { duk_int32_t new_atom_char_length; /* char length of the atom parsed in this loop */ duk_int32_t new_atom_start_offset; /* bytecode start offset of the atom parsed in this loop * (allows quantifiers to copy the atom bytecode) */ duk_lexer_parse_re_token(&re_ctx->lex, &re_ctx->curr_token); DUK_DDPRINT("re token: %d (num=%d, char=%c)", re_ctx->curr_token.t, re_ctx->curr_token.num, (re_ctx->curr_token.num >= 0x20 && re_ctx->curr_token.num <= 0x7e) ? (char) re_ctx->curr_token.num : '?'); /* set by atom case clauses */ new_atom_start_offset = -1; new_atom_char_length = -1; switch (re_ctx->curr_token.t) { case DUK_RETOK_DISJUNCTION: { /* * The handling here is a bit tricky. If a previous '|' has been processed, * we have a pending split1 and a pending jump (for a previous match). These * need to be back-patched carefully. See docs for a detailed example. */ /* patch pending jump and split */ if (unpatched_disjunction_jump >= 0) { duk_uint32_t offset; DUK_ASSERT(unpatched_disjunction_split >= 0); offset = unpatched_disjunction_jump; offset += insert_jump_offset(re_ctx, offset, DUK_BUFLEN(re_ctx) - offset); /* offset is now target of the pending split (right after jump) */ insert_jump_offset(re_ctx, unpatched_disjunction_split, offset - unpatched_disjunction_split); } /* add a new pending split to the beginning of the entire disjunction */ (void) insert_u32(re_ctx, entry_offset, DUK_REOP_SPLIT1); /* prefer direct execution */ unpatched_disjunction_split = entry_offset + 1; /* +1 for opcode */ /* add a new pending match jump for latest finished alternative */ append_u32(re_ctx, DUK_REOP_JUMP); unpatched_disjunction_jump = DUK_BUFLEN(re_ctx); /* 'taint' result as complex */ res = -1; break; } case DUK_RETOK_QUANTIFIER: { if (atom_start_offset < 0) { DUK_ERROR(re_ctx->thr, DUK_ERR_SYNTAX_ERROR, "quantifier without preceding atom"); } if (re_ctx->curr_token.qmin > re_ctx->curr_token.qmax) { DUK_ERROR(re_ctx->thr, DUK_ERR_SYNTAX_ERROR, "quantifier values invalid (qmin > qmax)"); } if (atom_char_length >= 0) { /* * Simple atom * * If atom_char_length is zero, we'll have unbounded execution time for e.g. * /()*x/.exec('x'). We can't just skip the match because it might have some * side effects (for instance, if we allowed captures in simple atoms, the * capture needs to happen). The simple solution below is to force the * quantifier to match at most once, since the additional matches have no effect. */ duk_int32_t atom_code_length; duk_uint32_t offset; duk_uint32_t qmin, qmax; qmin = re_ctx->curr_token.qmin; qmax = re_ctx->curr_token.qmax; if (atom_char_length == 0) { /* qmin and qmax will be 0 or 1 */ if (qmin > 1) { qmin = 1; } if (qmax > 1) { qmax = 1; } } append_u32(re_ctx, DUK_REOP_MATCH); /* complete 'sub atom' */ atom_code_length = DUK_BUFLEN(re_ctx) - atom_start_offset; offset = atom_start_offset; if (re_ctx->curr_token.greedy) { offset += insert_u32(re_ctx, offset, DUK_REOP_SQGREEDY); offset += insert_u32(re_ctx, offset, qmin); offset += insert_u32(re_ctx, offset, qmax); offset += insert_u32(re_ctx, offset, atom_char_length); offset += insert_jump_offset(re_ctx, offset, atom_code_length); } else { offset += insert_u32(re_ctx, offset, DUK_REOP_SQMINIMAL); offset += insert_u32(re_ctx, offset, qmin); offset += insert_u32(re_ctx, offset, qmax); offset += insert_jump_offset(re_ctx, offset, atom_code_length); } } else { /* * Complex atom * * The original code is used as a template, and removed at the end * (this differs from the handling of simple quantifiers). * * NOTE: there is no current solution for empty atoms in complex * quantifiers. This would need some sort of a 'progress' instruction. * * XXX: impose limit on maximum result size, i.e. atom_code_len * atom_copies? */ duk_int32_t atom_code_length; duk_uint32_t atom_copies; duk_uint32_t tmp_qmin, tmp_qmax; /* pre-check how many atom copies we're willing to make (atom_copies not needed below) */ atom_copies = (re_ctx->curr_token.qmax == DUK_RE_QUANTIFIER_INFINITE) ? re_ctx->curr_token.qmin : re_ctx->curr_token.qmax; if (atom_copies > DUK_RE_MAX_ATOM_COPIES) { DUK_ERROR(re_ctx->thr, DUK_ERR_INTERNAL_ERROR, "quantifier expansion requires too many atom copies"); } atom_code_length = DUK_BUFLEN(re_ctx) - atom_start_offset; /* insert the required matches (qmin) by copying the atom */ tmp_qmin = re_ctx->curr_token.qmin; tmp_qmax = re_ctx->curr_token.qmax; while (tmp_qmin > 0) { append_slice(re_ctx, atom_start_offset, atom_code_length); tmp_qmin--; if (tmp_qmax != DUK_RE_QUANTIFIER_INFINITE) { tmp_qmax--; } } DUK_ASSERT(tmp_qmin == 0); /* insert code for matching the remainder - infinite or finite */ if (tmp_qmax == DUK_RE_QUANTIFIER_INFINITE) { /* reuse last emitted atom for remaining 'infinite' quantifier */ if (re_ctx->curr_token.qmin == 0) { /* Special case: original qmin was zero so there is nothing * to repeat. Emit an atom copy but jump over it here. */ append_u32(re_ctx, DUK_REOP_JUMP); append_jump_offset(re_ctx, atom_code_length); append_slice(re_ctx, atom_start_offset, atom_code_length); } if (re_ctx->curr_token.greedy) { append_u32(re_ctx, DUK_REOP_SPLIT2); /* prefer jump */ } else { append_u32(re_ctx, DUK_REOP_SPLIT1); /* prefer direct */ } append_jump_offset(re_ctx, -atom_code_length - 1); /* -1 for opcode */ } else { /* * The remaining matches are emitted as sequence of SPLITs and atom * copies; the SPLITs skip the remaining copies and match the sequel. * This sequence needs to be emitted starting from the last copy * because the SPLITs are variable length due to the variable length * skip offset. This causes a lot of memory copying now. * * Example structure (greedy, match maximum # atoms): * * SPLIT1 LSEQ * (atom) * SPLIT1 LSEQ ; <- the byte length of this instruction is needed * (atom) ; to encode the above SPLIT1 correctly * ... * LSEQ: */ duk_uint32_t offset = DUK_BUFLEN(re_ctx); while (tmp_qmax > 0) { insert_slice(re_ctx, offset, atom_start_offset, atom_code_length); if (re_ctx->curr_token.greedy) { insert_u32(re_ctx, offset, DUK_REOP_SPLIT1); /* prefer direct */ } else { insert_u32(re_ctx, offset, DUK_REOP_SPLIT2); /* prefer jump */ } insert_jump_offset(re_ctx, offset + 1, /* +1 for opcode */ DUK_BUFLEN(re_ctx) - (offset + 1)); tmp_qmax--; } } /* remove the original 'template' atom */ remove_slice(re_ctx, atom_start_offset, atom_code_length); } /* 'taint' result as complex */ res = -1; break; } case DUK_RETOK_ASSERT_START: { append_u32(re_ctx, DUK_REOP_ASSERT_START); break; } case DUK_RETOK_ASSERT_END: { append_u32(re_ctx, DUK_REOP_ASSERT_END); break; } case DUK_RETOK_ASSERT_WORD_BOUNDARY: { append_u32(re_ctx, DUK_REOP_ASSERT_WORD_BOUNDARY); break; } case DUK_RETOK_ASSERT_NOT_WORD_BOUNDARY: { append_u32(re_ctx, DUK_REOP_ASSERT_NOT_WORD_BOUNDARY); break; } case DUK_RETOK_ASSERT_START_POS_LOOKAHEAD: case DUK_RETOK_ASSERT_START_NEG_LOOKAHEAD: { duk_uint32_t offset; duk_uint32_t opcode = (re_ctx->curr_token.t == DUK_RETOK_ASSERT_START_POS_LOOKAHEAD) ? DUK_REOP_LOOKPOS : DUK_REOP_LOOKNEG; offset = DUK_BUFLEN(re_ctx); (void) parse_disjunction(re_ctx, 0); append_u32(re_ctx, DUK_REOP_MATCH); (void) insert_u32(re_ctx, offset, opcode); (void) insert_jump_offset(re_ctx, offset + 1, /* +1 for opcode */ DUK_BUFLEN(re_ctx) - (offset + 1)); /* 'taint' result as complex -- this is conservative, * as lookaheads do not backtrack. */ res = -1; break; } case DUK_RETOK_ATOM_PERIOD: { new_atom_char_length = 1; new_atom_start_offset = DUK_BUFLEN(re_ctx); append_u32(re_ctx, DUK_REOP_PERIOD); break; } case DUK_RETOK_ATOM_CHAR: { /* Note: successive characters could be joined into string matches * but this is not trivial (consider e.g. '/xyz+/); see docs for * more discussion. */ duk_uint32_t ch; new_atom_char_length = 1; new_atom_start_offset = DUK_BUFLEN(re_ctx); append_u32(re_ctx, DUK_REOP_CHAR); ch = re_ctx->curr_token.num; if (re_ctx->re_flags & DUK_RE_FLAG_IGNORE_CASE) { ch = duk_unicode_re_canonicalize_char(re_ctx->thr, ch); } append_u32(re_ctx, ch); break; } case DUK_RETOK_ATOM_DIGIT: case DUK_RETOK_ATOM_NOT_DIGIT: { new_atom_char_length = 1; new_atom_start_offset = DUK_BUFLEN(re_ctx); append_u32(re_ctx, (re_ctx->curr_token.t == DUK_RETOK_ATOM_DIGIT) ? DUK_REOP_RANGES : DUK_REOP_INVRANGES); append_u32(re_ctx, sizeof(duk_unicode_re_ranges_digit) / (2 * sizeof(duk_uint16_t))); append_u16_list(re_ctx, duk_unicode_re_ranges_digit, sizeof(duk_unicode_re_ranges_digit) / sizeof(duk_uint16_t)); break; } case DUK_RETOK_ATOM_WHITE: case DUK_RETOK_ATOM_NOT_WHITE: { new_atom_char_length = 1; new_atom_start_offset = DUK_BUFLEN(re_ctx); append_u32(re_ctx, (re_ctx->curr_token.t == DUK_RETOK_ATOM_WHITE) ? DUK_REOP_RANGES : DUK_REOP_INVRANGES); append_u32(re_ctx, sizeof(duk_unicode_re_ranges_white) / (2 * sizeof(duk_uint16_t))); append_u16_list(re_ctx, duk_unicode_re_ranges_white, sizeof(duk_unicode_re_ranges_white) / sizeof(duk_uint16_t)); break; } case DUK_RETOK_ATOM_WORD_CHAR: case DUK_RETOK_ATOM_NOT_WORD_CHAR: { new_atom_char_length = 1; new_atom_start_offset = DUK_BUFLEN(re_ctx); append_u32(re_ctx, (re_ctx->curr_token.t == DUK_RETOK_ATOM_WORD_CHAR) ? DUK_REOP_RANGES : DUK_REOP_INVRANGES); append_u32(re_ctx, sizeof(duk_unicode_re_ranges_wordchar) / (2 * sizeof(duk_uint16_t))); append_u16_list(re_ctx, duk_unicode_re_ranges_wordchar, sizeof(duk_unicode_re_ranges_wordchar) / sizeof(duk_uint16_t)); break; } case DUK_RETOK_ATOM_BACKREFERENCE: { duk_uint32_t backref = (duk_uint32_t) re_ctx->curr_token.num; if (backref > re_ctx->highest_backref) { re_ctx->highest_backref = backref; } new_atom_char_length = -1; /* mark as complex */ new_atom_start_offset = DUK_BUFLEN(re_ctx); append_u32(re_ctx, DUK_REOP_BACKREFERENCE); append_u32(re_ctx, backref); break; } case DUK_RETOK_ATOM_START_CAPTURE_GROUP: { duk_uint32_t cap; new_atom_char_length = -1; /* mark as complex (capture handling) */ new_atom_start_offset = DUK_BUFLEN(re_ctx); cap = ++re_ctx->captures; append_u32(re_ctx, DUK_REOP_SAVE); append_u32(re_ctx, cap * 2); (void) parse_disjunction(re_ctx, 0); /* retval (sub-atom char length) unused, tainted as complex above */ append_u32(re_ctx, DUK_REOP_SAVE); append_u32(re_ctx, cap * 2 + 1); break; } case DUK_RETOK_ATOM_START_NONCAPTURE_GROUP: { new_atom_char_length = parse_disjunction(re_ctx, 0); new_atom_start_offset = DUK_BUFLEN(re_ctx); break; } case DUK_RETOK_ATOM_START_CHARCLASS: case DUK_RETOK_ATOM_START_CHARCLASS_INVERTED: { /* * Range parsing is done with a special lexer function which calls * us for every range parsed. This is different from how rest of * the parsing works, but avoids a heavy, arbitrary size intermediate * value type to hold the ranges. * * Another complication is the handling of character ranges when * case insensitive matching is used (see docs for discussion). * The range handler callback given to the lexer takes care of this * as well. * * Note that duplicate ranges are not eliminated when parsing character * classes, so that canonicalization of * * [0-9a-fA-Fx-{] * * creates the result (note the duplicate ranges): * * [0-9A-FA-FX-Z{-{] * * where [x-{] is split as a result of canonicalization. The duplicate * ranges are not a semantics issue: they work correctly. */ duk_uint32_t offset; DUK_DDPRINT("character class"); /* insert ranges instruction, range count patched in later */ new_atom_char_length = 1; new_atom_start_offset = DUK_BUFLEN(re_ctx); append_u32(re_ctx, (re_ctx->curr_token.t == DUK_RETOK_ATOM_START_CHARCLASS) ? DUK_REOP_RANGES : DUK_REOP_INVRANGES); offset = DUK_BUFLEN(re_ctx); /* patch in range count later */ /* parse ranges until character class ends */ re_ctx->nranges = 0; /* note: ctx-wide temporary */ duk_lexer_parse_re_ranges(&re_ctx->lex, generate_ranges, (void *) re_ctx); /* insert range count */ insert_u32(re_ctx, offset, re_ctx->nranges); break; } case DUK_RETOK_ATOM_END_GROUP: { if (expect_eof) { DUK_ERROR(re_ctx->thr, DUK_ERR_SYNTAX_ERROR, "unexpected closing parenthesis"); } goto done; } case DUK_RETOK_EOF: { if (!expect_eof) { DUK_ERROR(re_ctx->thr, DUK_ERR_SYNTAX_ERROR, "unexpected end of pattern"); } goto done; } default: { DUK_ERROR(re_ctx->thr, DUK_ERR_SYNTAX_ERROR, "unexpected token in regexp"); } } /* a complex (new) atom taints the result */ if (new_atom_start_offset >= 0) { if (new_atom_char_length < 0) { res = -1; } else if (res >= 0) { /* only advance if not tainted */ res += new_atom_char_length; } } /* record previous atom info in case next token is a quantifier */ atom_start_offset = new_atom_start_offset; atom_char_length = new_atom_char_length; } done: /* finish up pending jump and split for last alternative */ if (unpatched_disjunction_jump >= 0) { duk_uint32_t offset; DUK_ASSERT(unpatched_disjunction_split >= 0); offset = unpatched_disjunction_jump; offset += insert_jump_offset(re_ctx, offset, DUK_BUFLEN(re_ctx) - offset); /* offset is now target of the pending split (right after jump) */ insert_jump_offset(re_ctx, unpatched_disjunction_split, offset - unpatched_disjunction_split); } re_ctx->recursion_depth--; return res; }