// encoding functions // for all: // dest_len is in code units. // returns number of code units put into dest buffer. int encode_ucs_to_utf8(const wchar_t* src, unsigned char* dest, int dest_len, int src_len) { // count length for null terminated source... if(src_len == 0) { src_len = (int)wcslen(src); } int destCapacity = dest_len; // while there is data in the source buffer, for (int idx = 0; idx < src_len; ++idx) { wchar_t cp = src[idx]; // check there is enough destination buffer to receive this encoded unit (exit loop & return if not) if (destCapacity < encoded_size(cp)) { break; } if (cp < 0x80) { *dest++ = (unsigned char)cp; --destCapacity; } else if (cp < 0x0800) { *dest++ = (unsigned char)((cp >> 6) | 0xC0); *dest++ = (unsigned char)((cp & 0x3F) | 0x80); destCapacity -= 2; } else if (cp < 0x10000)
static int emit_encoded_size_nohash(lcmgen_t *lcm, lcm_struct_t *ls) { start_file("_encodedSize_nohash"); emit(0, "function s = %s_encodedSize_nohash(S)", sn); emit(1, "s = uint32(0);"); for (unsigned int m = 0; m < g_ptr_array_size(ls->members); m++) { lcm_member_t *lm = (lcm_member_t *) g_ptr_array_index(ls->members, m); char* lm_tnc = dots_to_double_colons(lm->type->lctypename); int const encsize = encoded_size(lm_tnc); int const dimensions = g_ptr_array_size(lm->dimensions); if (encsize > -1) {//known constant size emit_start(1, "s = s + %d", encsize); for (int dx = 0; dx < dimensions; ++dx) { lcm_dimension_t *dim = (lcm_dimension_t*) g_ptr_array_index(lm->dimensions, dx); emit_continue(" * %s", dim->size); } emit_end(";"); } else { if (0 == dimensions) { emit(1, "s = s + %s_encodedSize_nohash(S.%s);", map_type_name(lm_tnc), lm->membername); } else { //emit: for each dimension for (int dx = 0; dx < dimensions; ++dx) { lcm_dimension_t *dim = (lcm_dimension_t*) g_ptr_array_index(lm->dimensions, dx); emit(1 + dx, "for dx%d = 1:%s", dx, dim->size); } {//do emit_start(1 + dimensions, "s = s + %s_encodedSize_nohash(S.%s(", map_type_name(lm_tnc), lm->membername); for (int dx = 0; dx < dimensions - 1; ++dx) { emit_continue("dx%d,", dx); } emit_end("dx%d));", dimensions - 1); } //end for (int dx = dimensions; dx > 0; --dx) { emit(dx, "end"); } } } free(lm_tnc); } emit(0, "%%endfunction"); emit(0, ""); end_file(); }
// build an internal buffer with the string encoded as utf8 (remains valid until string is modified). utf8* String::build_utf8_buff(void) const { size_type buffsize = encoded_size(ptr(), d_cplength) + 1; if (buffsize > d_encodedbufflen) { if (d_encodedbufflen > 0) { delete[] d_encodedbuff; } d_encodedbuff = new utf8[buffsize]; d_encodedbufflen = buffsize; } encode(ptr(), d_encodedbuff, buffsize, d_cplength); // always add a null at end d_encodedbuff[buffsize-1] = ((utf8)0); d_encodeddatlen = buffsize; return d_encodedbuff; }
// build an internal buffer with the string encoded as utf8 (remains valid until string is modified). utf8* String::build_utf8_buff(void) const { size_type buffsize = encoded_size(ptr(), d_cplength) + 1; if (buffsize > d_encodedbufflen) { if (d_encodedbufflen > 0) { CEGUI_DELETE_ARRAY_PT(d_encodedbuff, utf8, d_encodedbufflen, String); } d_encodedbuff = CEGUI_NEW_ARRAY_PT(utf8, buffsize, String); d_encodedbufflen = buffsize; } encode(ptr(), d_encodedbuff, buffsize, d_cplength); // always add a null at end d_encodedbuff[buffsize-1] = ((utf8)0); d_encodeddatlen = buffsize; return d_encodedbuff; }
char *encode_data(const char *postdata) { char *buf; size_t bufsz, i, j; bufsz = encoded_size(postdata); if (bufsz == 0) return NULL; buf = cli_calloc(1, bufsz+1); if (!(buf)) return NULL; for (i=0, j=0; postdata[i] != '\0'; i++) { if (isalnum(postdata[i])) { buf[j++] = postdata[i]; } else { sprintf(buf+j, "%%%02x", postdata[i]); j += 3; } } return buf; }
/* Load ELF 'filename', parse the .eh_frame contents, and for each entry in the * second argument check whether its address is contained in the range of some * Frame Description Entry. If it does, fill in the function range of the * entry. In other words, try to assign start address and length of function * corresponding to each backtrace entry. We'll need that for the disassembly. * * Fails quietly - we should still be able to use the build ids. * * I wonder if this is really better than parsing eu-readelf text output. */ static GHashTable *elf_iterate_fdes(const char *filename, GList *entries, Elf *e) { const unsigned char *e_ident; Elf_Data *scn_data; GElf_Shdr shdr; GElf_Phdr phdr; size_t phnum; GHashTable *retval = NULL; /* NULL = error */ e_ident = (unsigned char *)elf_getident(e, NULL); if (e_ident == NULL) { VERB1 log_elf_error("elf_getident", filename); return NULL; } /* Look up the .eh_frame section */ if (!xelf_section_by_name(e, ".eh_frame", filename, &scn_data, &shdr)) { VERB1 log("Section .eh_frame not found in %s", filename); return NULL; } /* Get the address at which the executable segment is loaded. If the * .eh_frame addresses are absolute, this is used to convert them to * relative to the beginning of executable segment. We are looking for the * first LOAD segment that is executable, I hope this is sufficient. */ if (elf_getphdrnum(e, &phnum) != 0) { VERB1 log_elf_error("elf_getphdrnum", filename); return NULL; } uintptr_t exec_base; int i; for (i = 0; i < phnum; i++) { if (gelf_getphdr(e, i, &phdr) != &phdr) { VERB1 log_elf_error("gelf_getphdr", filename); return NULL; } if (phdr.p_type == PT_LOAD && phdr.p_flags & PF_X) { exec_base = (uintptr_t)phdr.p_vaddr; goto base_found; } } VERB1 log("Can't determine executable base for '%s'", filename); return NULL; base_found: VERB2 log("Executable base: %jx", (uintmax_t)exec_base); /* We now have a handle to .eh_frame data. We'll use dwarf_next_cfi to * iterate through all FDEs looking for those matching the addresses we * have. * Some info on .eh_frame can be found at http://www.airs.com/blog/archives/460 * and in DWARF documentation for .debug_frame. The initial_location and * address_range decoding is 'inspired' by elfutils source. * XXX: If this linear scan is too slow, we can do binary search on * .eh_frame_hdr -- see http://www.airs.com/blog/archives/462 */ int ret; Dwarf_Off cfi_offset; Dwarf_Off cfi_offset_next = 0; Dwarf_CFI_Entry cfi; struct cie_encoding { Dwarf_Off cie_offset; int ptr_len; bool pcrel; } *cie; GList *cie_list = NULL; /* Init hash table * keys are pointers to integers which we allocate with malloc * values stored directly */ GHashTable *hash = g_hash_table_new_full(g_int64_hash, g_int64_equal, free, NULL); while(1) { cfi_offset = cfi_offset_next; ret = dwarf_next_cfi(e_ident, scn_data, 1, cfi_offset, &cfi_offset_next, &cfi); if (ret > 0) { /* We're at the end. */ break; } if (ret < 0) { /* Error. If cfi_offset_next was updated, we may skip the * erroneous cfi. */ if (cfi_offset_next > cfi_offset) { continue; } VERB1 log("dwarf_next_cfi failed for %s: %s", filename, dwarf_errmsg(-1)); goto ret_free; } if (dwarf_cfi_cie_p(&cfi)) { /* Current CFI is a CIE. We store its offset and FDE encoding * attributes to be used when reading FDEs. */ /* Default FDE encoding (i.e. no R in augmentation string) is * DW_EH_PE_absptr. */ cie = btp_mallocz(sizeof(*cie)); cie->cie_offset = cfi_offset; cie->ptr_len = encoded_size(DW_EH_PE_absptr, e_ident); /* Search the augmentation data for FDE pointer encoding. * Unfortunately, 'P' can come before 'R' (which we are looking * for), so we may have to parse the whole thing. See the * abovementioned blog post for details. */ const char *aug = cfi.cie.augmentation; const uint8_t *augdata = cfi.cie.augmentation_data; bool skip_cie = 0; if (*aug == 'z') { aug++; } while (*aug != '\0') { if(*aug == 'R') { cie->ptr_len = encoded_size(*augdata, e_ident); if (cie->ptr_len != 4 && cie->ptr_len != 8) { VERB1 log("Unknown FDE encoding (CIE %jx) in %s", (uintmax_t)cfi_offset, filename); skip_cie = 1; } if ((*augdata & 0x70) == DW_EH_PE_pcrel) { cie->pcrel = 1; } break; } else if (*aug == 'L') { augdata++; } else if (*aug == 'P') { unsigned size = encoded_size(*augdata, e_ident); if (size == 0) { VERB1 log("Unknown size for personality encoding in %s", filename); skip_cie = 1; break; } augdata += (size + 1); } else { VERB1 log("Unknown augmentation char in %s", filename); skip_cie = 1; break; } aug++; } if (skip_cie) { free(cie); continue; } cie_list = g_list_append(cie_list, cie); } else { /* Current CFI is an FDE. */ GList *it = cie_list; cie = NULL; /* Find the CIE data that we should have saved earlier. XXX: We can * use hash table/tree to speed up the search, the number of CIEs * should usally be very low though. */ while (it != NULL) { cie = it->data; /* In .eh_frame, CIE_pointer is relative, but libdw converts it * to absolute offset. */ if(cfi.fde.CIE_pointer == cie->cie_offset) { break; /* Found. */ } it = g_list_next(it); } if (it == NULL) { VERB1 log("CIE not found for FDE %jx in %s", (uintmax_t)cfi_offset, filename); continue; } /* Read the two numbers we need and if they are PC-relative, * compute the offset from VMA base */ uintptr_t initial_location = fde_read_address(cfi.fde.start, cie->ptr_len); uintptr_t address_range = fde_read_address(cfi.fde.start+cie->ptr_len, cie->ptr_len); if (cie->pcrel) { /* We need to determine how long is the 'length' (and * consequently CIE id) field of this FDE -- it can be either 4 * or 12 bytes long. */ uintptr_t length = fde_read_address(scn_data->d_buf + cfi_offset, 4); uintptr_t skip = (length == 0xffffffffUL ? 12 : 4); uintptr_t mask = (cie->ptr_len == 4 ? 0xffffffffUL : 0xffffffffffffffffUL); initial_location += (uintptr_t)shdr.sh_offset + (uintptr_t)cfi_offset + 2*skip; initial_location &= mask; } else { /* Assuming that not pcrel means absolute address (what if the file is a library?). * Convert to text-section-start-relative. */ initial_location -= exec_base; } /* Insert the pair into hash */ uintptr_t *key = addr_alloc(initial_location + exec_base); g_hash_table_insert(hash, key, (gpointer)address_range); VERB3 log("FDE start: 0x%jx length: %u", (uintmax_t)*key, (unsigned)address_range); /* Iterate through the backtrace entries and check each address * member whether it belongs into the range given by current FDE. */ for (it = entries; it != NULL; it = g_list_next(it)) { struct backtrace_entry *entry = it->data; if (initial_location <= entry->build_id_offset && entry->build_id_offset < initial_location + address_range) { /* Convert to before-relocation absolute addresses, disassembler uses those. */ entry->function_initial_loc = exec_base + initial_location; entry->function_length = address_range; /*TODO: remove the entry from the list to save a bit of time in next iteration?*/ } } } } retval = hash; /* success */ ret_free: list_free_with_free(cie_list); if (retval == NULL) g_hash_table_destroy(hash); return retval; }
static struct cie * read_cie(Dwarf_CFI_Entry *cfi, Dwarf_Off cfi_offset, unsigned char *e_ident, char **error_message) { /* Default FDE encoding (i.e. no R in augmentation string) is * DW_EH_PE_absptr. */ struct cie *cie = sr_mallocz(sizeof(struct cie)); cie->cie_offset = cfi_offset; cie->ptr_len = encoded_size(DW_EH_PE_absptr, e_ident); /* Search the augmentation data for FDE pointer encoding. * Unfortunately, 'P' can come before 'R' (which we are looking * for), so we may have to parse the whole thing. See the * abovementioned blog post for details. */ const char *augmentation = cfi->cie.augmentation; const uint8_t *augmentation_data = cfi->cie.augmentation_data; if (*augmentation == 'z') ++augmentation; while (*augmentation != '\0') { switch (*augmentation) { case 'R': cie->ptr_len = encoded_size(*augmentation_data, e_ident); if (cie->ptr_len != 4 && cie->ptr_len != 8) { *error_message = sr_asprintf("Unknown FDE encoding (CIE %jx)", (uintmax_t)cfi_offset); free(cie); return NULL; } if ((*augmentation_data & 0x70) == DW_EH_PE_pcrel) cie->pcrel = true; return cie; case 'L': ++augmentation_data; break; case 'P': { unsigned size = encoded_size(*augmentation_data, e_ident); if (0 == size) { *error_message = sr_asprintf("Unknown size for personality encoding (CIE %jx)", (uintmax_t)cfi_offset); free(cie); return NULL; } augmentation_data += size + 1; break; } default: *error_message = sr_asprintf("Unknown augmentation char (CIE %jx)", (uintmax_t)cfi_offset); free(cie); return NULL; } ++augmentation; } return cie; }