void ucstring_append_bytes(de_ucstring *s, const de_byte *buf, de_int64 buflen, unsigned int conv_flags, int encoding) { int ret; de_int64 pos = 0; de_int32 ch; de_int64 code_len; // Adjust buflen if necessary. if(conv_flags & DE_CONVFLAG_STOP_AT_NUL) { char *tmpp; tmpp = de_memchr(buf, 0, (size_t)buflen); if(tmpp) { buflen = (const de_byte*)tmpp - buf; } } while(pos<buflen) { if(encoding==DE_ENCODING_UTF8) { ret = de_utf8_to_uchar(&buf[pos], buflen-pos, &ch, &code_len); if(!ret) { ch = '_'; code_len = 1; } } else if(encoding==DE_ENCODING_UTF16LE) { ret = de_utf16x_to_uchar(&buf[pos], buflen-pos, &ch, &code_len, 1); if(!ret) { ch = '_'; code_len = 2; } } else if(encoding==DE_ENCODING_UTF16BE) { ret = de_utf16x_to_uchar(&buf[pos], buflen-pos, &ch, &code_len, 0); if(!ret) { ch = '_'; code_len = 2; } } else { ch = de_char_to_unicode(s->c, buf[pos], encoding); if(ch==DE_INVALID_CODEPOINT) { ch = '_'; } code_len = 1; } ucstring_append_char(s, ch); pos += code_len; } }
// create bitmap_font object static void do_make_image(deark *c, lctx *d) { struct de_bitmap_font *font = NULL; de_int64 i; de_int64 pos; font = de_create_bitmap_font(c); font->has_nonunicode_codepoints = 1; if(d->encoding!=DE_ENCODING_UNKNOWN) font->has_unicode_codepoints = 1; font->prefer_unicode = 0; font->nominal_width = (int)d->nominal_char_width; font->nominal_height = (int)d->char_height; font->num_chars = d->num_chars_stored; font->char_array = de_malloc(c, font->num_chars * sizeof(struct de_bitmap_font_char)); for(i=0; i<d->num_chars_stored; i++) { de_int64 char_width; de_int64 char_offset; de_int32 char_index; de_int64 num_tiles; de_int64 tile; de_int64 row; pos = d->hdrsize + d->char_entry_size*i; char_width = de_getui16le(pos); if(d->char_entry_size==6) char_offset = de_getui32le(pos+2); else char_offset = de_getui16le(pos+2); de_dbg2(c, "char[%d] width=%d offset=%d\n", (int)(d->first_char + i), (int)char_width, (int)char_offset); num_tiles = (char_width+7)/8; if(i == d->num_chars_stored-1) { // Arbitrarily put the "absolute space" char at codepoint 256, // and U+2002 EN SPACE (best I can do). font->char_array[i].codepoint_nonunicode = 256; font->char_array[i].codepoint_unicode = 0x2002; } else { char_index = (de_int32)d->first_char + (de_int32)i; font->char_array[i].codepoint_nonunicode = char_index; if(font->has_unicode_codepoints) { if(char_index<32 && d->dfCharSet==0) { // This kind of font usually doesn't have glyphs below 32. // If it does, assume that they are VT100 line drawing characters. font->char_array[i].codepoint_unicode = de_char_to_unicode(c, 95+char_index, DE_ENCODING_DEC_SPECIAL_GRAPHICS); } else { font->char_array[i].codepoint_unicode = de_char_to_unicode(c, char_index, d->encoding); } } } font->char_array[i].width = (int)char_width; font->char_array[i].height = (int)d->char_height; font->char_array[i].rowspan = num_tiles; font->char_array[i].bitmap = de_malloc(c, d->char_height * num_tiles); for(row=0; row<d->char_height; row++) { for(tile=0; tile<num_tiles; tile++) { font->char_array[i].bitmap[row * font->char_array[i].rowspan + tile] = de_getbyte(char_offset + tile*d->char_height + row); } } } de_font_bitmap_font_to_image(c, font, d->fi, 0); if(font) { if(font->char_array) { for(i=0; i<font->num_chars; i++) { de_free(c, font->char_array[i].bitmap); } de_free(c, font->char_array); } de_destroy_bitmap_font(c, font); } }
static void do_text_paragraph(deark *c, lctx *d, struct para_info *pinfo) { i64 i, k; if(!d->html_outf) return; if((pinfo->papflags & 0x06)!=0) { // TODO: Decode headers and footers somehow. do_emit_raw_sz(c, d, pinfo, "<p class=r>"); do_emit_raw_sz(c, d, pinfo, (pinfo->papflags&0x01)?"footer":"header"); do_emit_raw_sz(c, d, pinfo, " definition</p>\n"); return; } pinfo->in_para = 0; pinfo->xpos = 0; pinfo->space_count = 0; pinfo->has_content = 0; pinfo->in_span = 0; default_text_styles(&pinfo->text_styles_wanted); default_text_styles(&pinfo->text_styles_current); for(i=0; i<pinfo->thisparalen; i++) { u8 incp; incp = de_getbyte(pinfo->thisparapos+i); if(incp==0x0d && i<pinfo->thisparalen-1) { if(de_getbyte(pinfo->thisparapos+i+1)==0x0a) { // Found CR-LF combo i++; ensure_in_para(c, d, pinfo); end_para(c, d, pinfo); continue; } } if(incp!=32 && pinfo->space_count>0) { int nonbreaking_count, breaking_count; if(!pinfo->in_para && pinfo->space_count==1) { // If the paragraph starts with a single space, make it nonbreaking. nonbreaking_count = 1; breaking_count = 0; } else { // Else make all spaces but the last one nonbreaking nonbreaking_count = pinfo->space_count-1; breaking_count = 1; } ensure_in_para(c, d, pinfo); for(k=0; k<nonbreaking_count; k++) { do_emit_codepoint(c, d, pinfo, 0xa0); } if(breaking_count>0) { if(pinfo->xpos>70) { // We don't do proper word wrapping of the HTML source, but // maybe this is better than nothing. do_emit_raw_sz(c, d, pinfo, "\n"); } else { do_emit_codepoint(c, d, pinfo, 32); } } pinfo->space_count=0; } if(incp>=33) { i32 outcp; outcp = de_char_to_unicode(c, (i32)incp, d->input_encoding); do_emit_codepoint(c, d, pinfo, outcp); } else { switch(incp) { case 9: // tab pinfo->text_styles_wanted.tab_style = 1; do_emit_codepoint(c, d, pinfo, 0x2192); pinfo->text_styles_wanted.tab_style = 0; break; case 10: case 11: ensure_in_para(c, d, pinfo); do_emit_raw_sz(c, d, pinfo, "<br>\n"); pinfo->has_content = 1; break; case 12: // page break end_para(c, d, pinfo); do_emit_raw_sz(c, d, pinfo, "<hr>\n"); break; case 31: break; case 32: pinfo->space_count++; break; default: do_emit_codepoint(c, d, pinfo, 0xfffd); } } } end_para(c, d, pinfo); }