static char *ParseText(const uint8_t *data, size_t size, const char *charset) { char *text = malloc(size); if (text == NULL) return NULL; size_t text_size = 0; for (size_t i = 0; i < size; i++) { uint8_t code = data[i]; if (code == 0x8f) break; if (code == 0x7f) continue; /* TODO: italics begin/end 0x80/0x81, underline being/end 0x82/0x83 */ if (code & 0x60) text[text_size++] = code; if (code == 0x8a) text[text_size++] = '\n'; } char *u8 = FromCharset(charset, text, text_size); free(text); return u8; }
static char *ParseText(const uint8_t *data, size_t size, const char *charset) { char *text = malloc(size); if (text == NULL) return NULL; size_t text_size = 0; for (size_t i = 0; i < size; i++) { uint8_t code = data[i]; if (code == 0x8f) break; if (code == 0x7f) continue; if (code & 0x60) text[text_size++] = code; if (code == 0x8a) text[text_size++] = '\n'; } char *u8 = FromCharset(charset, text, text_size); free(text); return u8; }
const char *vlc_readdir(DIR *dir) { /* Beware that readdir_r() assumes <buf> is large enough to hold the result * dirent including the file name. A buffer overflow could occur otherwise. * In particular, pathconf() and _POSIX_NAME_MAX cannot be used here. */ struct dirent *ent; char *path = NULL; /* In the implementation of Innotek LIBC, aka kLIBC on OS/2, * fpathconf (_PC_NAME_MAX) is broken, and errno is set to EBADF. * Moreover, d_name is not the last member of struct dirent. * So just allocate as many as the size of struct dirent. */ #if 1 long len = sizeof (struct dirent); #else long len = fpathconf (dirfd (dir), _PC_NAME_MAX); len += offsetof (struct dirent, d_name) + 1; #endif struct dirent *buf = malloc (len); if (unlikely(buf == NULL)) return NULL; int val = readdir_r (dir, buf, &ent); if (val != 0) errno = val; else if (ent != NULL) path = FromCharset ("", ent->d_name, strlen(ent->d_name)); free (buf); return path; }
/** * converts a string from the system locale character encoding to utf-8, * the result is always allocated on the heap. * * @param locale nul-terminated string to convert * * @return a nul-terminated utf-8 string, or null in case of error. * The result must be freed using free() - as with the strdup() function. */ char *FromLocaleDup (const char *locale) { #ifdef ASSUME_UTF8 return strdup (locale); #else return FromCharset ("", locale, strlen(locale)); #endif }
/** * Converts a string from the system locale character encoding to UTF-8. * * @param locale nul-terminated string to convert * * @return a nul-terminated UTF-8 string, or NULL in case of error. * To avoid memory leak, you have to pass the result to LocaleFree() * when it is no longer needed. */ char *FromLocale (const char *locale) { #ifdef ASSUME_UTF8 return (char *)locale; #else return locale ? FromCharset ("", locale, strlen(locale)) : NULL; #endif }
static int vlclua_from_charset( lua_State *L ) { if( lua_gettop( L ) < 2 ) return vlclua_error( L ); size_t i_in_bytes; const char *psz_input = luaL_checklstring( L, 2, &i_in_bytes ); if( i_in_bytes == 0 ) return vlclua_error( L ); const char *psz_charset = luaL_checkstring( L, 1 ); char *psz_output = FromCharset( psz_charset, psz_input, i_in_bytes ); lua_pushstring( L, psz_output ? psz_output : "" ); free( psz_output ); return 1; }
static bool isSmoothStreaming( stream_t *s ) { const uint8_t *peek; int i_size = stream_Peek( s->p_source, &peek, 512 ); if( i_size < 512 ) return false; char *peeked = malloc( 512 ); if( unlikely( peeked == NULL ) ) return false; memcpy( peeked, peek, 512 ); peeked[511] = peeked[510] = '\0'; char *str; if( !memcmp( peeked, "\xFF\xFE", 2 ) ) { str = FromCharset( "UTF-16LE", peeked, 512 ); free( peeked ); } else if( !memcmp( peeked, "\xFE\xFF", 2 ) ) { str = FromCharset( "UTF-16BE", peeked, 512 ); free( peeked ); } else str = peeked; if( str == NULL ) return false; bool ret = strstr( str, "<SmoothStreamingMedia" ) != NULL; free( str ); return ret; }
/** * Reads the next file name from an open directory. * * @param dir The directory that is being read * * @return a UTF-8 string of the directory entry. Use free() to release it. * If there are no more entries in the directory, NULL is returned. * If an error occurs, errno is set and NULL is returned. */ char *vlc_readdir( DIR *dir ) { /* Beware that readdir_r() assumes <buf> is large enough to hold the result * dirent including the file name. A buffer overflow could occur otherwise. * In particular, pathconf() and _POSIX_NAME_MAX cannot be used here. */ struct dirent *ent; char *path = NULL; long len = fpathconf (dirfd (dir), _PC_NAME_MAX); #if !defined(__OS2__) || !defined(__INNOTEK_LIBC__) #ifdef NAME_MAX /* POSIX says there shall we room for NAME_MAX bytes at all times */ if (/*len == -1 ||*/ len < NAME_MAX) len = NAME_MAX; #else /* OS is broken. Lets assume there is no files left. */ if (len == -1) return NULL; #endif len += offsetof (struct dirent, d_name) + 1; #else /* __OS2__ && __INNOTEK_LIBC__ */ /* In the implementation of Innotek LIBC, aka kLIBC on OS/2, * fpathconf (_PC_NAME_MAX) is broken, and d_name is not the last member * of struct dirent. * So just allocate as many as the size of struct dirent. */ len = sizeof (struct dirent); #endif struct dirent *buf = malloc (len); if (unlikely(buf == NULL)) return NULL; int val = readdir_r (dir, buf, &ent); if (val != 0) errno = val; else if (ent != NULL) #ifndef __APPLE__ path = FromLocaleDup (ent->d_name); #else path = FromCharset ("UTF-8-MAC", ent->d_name, strlen (ent->d_name)); #endif free (buf); return path; }
static char *ParseText(uint8_t *data, int size, const char *charset) { char *text = strdup(""); int text_size = 0; for (int i = 0; i < size; i++) { uint8_t code = data[i]; if (code == 0x8f) break; char tmp[16] = ""; char *t = tmp; if ((code >= 0x20 && code <= 0x7e) || (code >= 0xa0 && code <= 0xff) ) snprintf(tmp, sizeof(tmp), "%c", code); #if 0 else if (code == 0x80) snprintf(tmp, sizeof(tmp), "<i>"); else if (code == 0x81) snprintf(tmp, sizeof(tmp), "</i>"); else if (code == 0x82) snprintf(tmp, sizeof(tmp), "<u>"); else if (code == 0x83) snprintf(tmp, sizeof(tmp), "</u>"); else if (code == 0x8a) snprintf(tmp, sizeof(tmp), "\n"); #endif else { t = NULL; } if (!t) continue; size_t t_size = strlen(t); text = realloc_or_free(text, t_size + text_size + 1); if (!text) continue; memcpy(&text[text_size], t, t_size); text_size += t_size; text[text_size] = '\0'; } return FromCharset(charset, text, text_size); }
static bool isSmoothStreaming( stream_t *s ) { const uint8_t *peek; const char *needle = "<SmoothStreamingMedia"; const char *encoding = NULL; bool ret = false; int i_size = stream_Peek( s->p_source, &peek, 512 ); if( i_size < 512 ) return false; char *peeked = malloc( 512 ); if( unlikely( !peeked ) ) return false; memcpy( peeked, peek, 512 ); peeked[511] = peeked[510] = '\0'; if( strstr( (const char *)peeked, needle ) != NULL ) ret = true; else /* maybe it's utf-16 encoding, should we also test other encodings? */ { if( !memcmp( peeked, "\xFF\xFE", 2 ) ) encoding = "UTF-16LE"; else if( !memcmp( peeked, "\xFE\xFF", 2 ) ) encoding = "UTF-16BE"; else { free( peeked ); return false; } peeked = FromCharset( encoding, peeked, 512 ); if( strstr( peeked, needle ) != NULL ) ret = true; } free( peeked ); return ret; }
static char *nscdec( vlc_object_t *p_demux, char* p_encoded ) { unsigned int i; unsigned char tmp; unsigned char j, k; unsigned int length; unsigned char encoding_type; unsigned char *buf16; char *buf8; char *p_input = p_encoded; if( strlen( p_input ) < 15 ) { msg_Err( p_demux, "input string less than 15 characters" ); return NULL; } if( load_byte( 1, &encoding_type, &p_input, NULL, NULL ) ) { msg_Err( p_demux, "unable to get NSC encoding type" ); return NULL; } if( encoding_type != 1 && encoding_type != 2 ) { msg_Err( p_demux, "encoding type %d is not supported", encoding_type ); return NULL; } j = k = 0; if( load_byte( encoding_type, &tmp, &p_input, &j, &k ) ) { msg_Err( p_demux, "load_byte failed" ); return NULL; } for( i = 0; i < 4; i++ ) { if( load_byte( encoding_type, &tmp, &p_input, &j, &k ) ) { msg_Err( p_demux, "load_byte failed" ); return NULL; } } length = 0; for( i = 4; i; i-- ) { if( load_byte( encoding_type, &tmp, &p_input, &j, &k ) ) { msg_Err( p_demux, "load_byte failed" ); return NULL; } length |= tmp << ((i - 1) * 8); } if( length == 0 ) { msg_Err( p_demux, "Length is 0" ); return NULL; } buf16 = malloc( length ); if( buf16 == NULL ) return NULL; for( i = 0; i < length; i++ ) { if( load_byte( encoding_type, &buf16[ i ], &p_input, &j, &k ) ) { msg_Err( p_demux, "load_byte failed" ); free( buf16 ); return NULL; } } buf8 = FromCharset( "UTF-16LE", buf16, length ); free( buf16 ); if( buf8 == NULL ) { msg_Err( p_demux, "iconv failed" ); return NULL; } return buf8; }
int OpenDemux( vlc_object_t* p_this ) { demux_t *p_demux = (demux_t*)p_this; demux_sys_t *p_sys; const uint8_t *p_peek; ssize_t i_peek = vlc_stream_Peek( p_demux->s, &p_peek, 2048 ); if( unlikely( i_peek <= 32 ) ) return VLC_EGENERIC; const char *psz_xml = (const char *) p_peek; size_t i_xml = i_peek; /* Try to probe without xml module/loading the full document */ char *psz_alloc = NULL; switch( GetQWBE(p_peek) ) { /* See RFC 3023 Part 4 */ case UINT64_C(0xFFFE3C003F007800): /* UTF16 BOM<? */ case UINT64_C(0xFFFE3C003F007400): /* UTF16 BOM<t */ case UINT64_C(0xFEFF003C003F0078): /* UTF16 BOM<? */ case UINT64_C(0xFEFF003C003F0074): /* UTF16 BOM<t */ psz_alloc = FromCharset( "UTF-16", p_peek, i_peek ); break; case UINT64_C(0x3C003F0078006D00): /* UTF16-LE <?xm */ case UINT64_C(0x3C003F0074007400): /* UTF16-LE <tt */ psz_alloc = FromCharset( "UTF-16LE", p_peek, i_peek ); break; case UINT64_C(0x003C003F0078006D): /* UTF16-BE <?xm */ case UINT64_C(0x003C003F00740074): /* UTF16-BE <tt */ psz_alloc = FromCharset( "UTF-16BE", p_peek, i_peek ); break; case UINT64_C(0xEFBBBF3C3F786D6C): /* UTF8 BOM<?xml */ case UINT64_C(0x3C3F786D6C207665): /* UTF8 <?xml ve */ case UINT64_C(0xEFBBBF3C74742078): /* UTF8 BOM<tt x*/ break; default: if(GetDWBE(p_peek) != UINT32_C(0x3C747420)) /* tt node without xml document marker */ return VLC_EGENERIC; } if( psz_alloc ) { psz_xml = psz_alloc; i_xml = strlen( psz_alloc ); } /* Simplified probing. Valid TTML must have a namespace declaration */ const char *psz_tt = strnstr( psz_xml, "tt ", i_xml ); if( !psz_tt || psz_tt == psz_xml || (psz_tt[-1] != ':' && psz_tt[-1] != '<') ) { free( psz_alloc ); return VLC_EGENERIC; } else { const char * const rgsz[] = { "=\"http://www.w3.org/ns/ttml\"", "=\"http://www.w3.org/2004/11/ttaf1\"", "=\"http://www.w3.org/2006/04/ttaf1\"", "=\"http://www.w3.org/2006/10/ttaf1\"", }; const char *psz_ns = NULL; for( size_t i=0; i<ARRAY_SIZE(rgsz) && !psz_ns; i++ ) { psz_ns = strnstr( psz_xml, rgsz[i], i_xml - (psz_tt - psz_xml) ); } free( psz_alloc ); if( !psz_ns ) return VLC_EGENERIC; } p_demux->p_sys = p_sys = calloc( 1, sizeof( *p_sys ) ); if( unlikely( p_sys == NULL ) ) return VLC_ENOMEM; p_sys->b_first_time = true; p_sys->temporal_extent.i_type = TT_TIMINGS_PARALLEL; tt_time_Init( &p_sys->temporal_extent.begin ); tt_time_Init( &p_sys->temporal_extent.end ); tt_time_Init( &p_sys->temporal_extent.dur ); p_sys->temporal_extent.begin.base = 0; p_sys->p_xml = xml_Create( p_demux ); if( !p_sys->p_xml ) goto error; p_sys->p_reader = xml_ReaderCreate( p_sys->p_xml, p_demux->s ); if( !p_sys->p_reader ) goto error; #ifndef TTML_DEMUX_DEBUG p_sys->p_reader->obj.flags |= OBJECT_FLAGS_QUIET; #endif if( ReadTTML( p_demux ) != VLC_SUCCESS ) goto error; tt_timings_Resolve( (tt_basenode_t *) p_sys->p_rootnode, &p_sys->temporal_extent, &p_sys->times.p_array, &p_sys->times.i_count ); #ifdef TTML_DEMUX_DEBUG { struct vlc_memstream stream; if( vlc_memstream_open( &stream ) ) goto error; tt_time_t t; tt_time_Init( &t ); tt_node_ToText( &stream, (tt_basenode_t*)p_sys->p_rootnode, &t /* invalid */ ); vlc_memstream_putc( &stream, '\0' ); if( vlc_memstream_close( &stream ) == VLC_SUCCESS ) { msg_Dbg( p_demux, "%s", stream.ptr ); free( stream.ptr ); } } #endif p_demux->pf_demux = Demux; p_demux->pf_control = Control; es_format_t fmt; es_format_Init( &fmt, SPU_ES, VLC_CODEC_TTML ); p_sys->p_es = es_out_Add( p_demux->out, &fmt ); if( !p_sys->p_es ) goto error; es_format_Clean( &fmt ); return VLC_SUCCESS; error: CloseDemux( p_demux ); return VLC_EGENERIC; }
/***************************************************************************** * Decode: *****************************************************************************/ static int Decode( decoder_t *p_dec, block_t *p_block ) { subpicture_t *p_spu = NULL; if( p_block == NULL ) /* No Drain */ return VLCDEC_SUCCESS; if( ( p_block->i_flags & (BLOCK_FLAG_CORRUPTED) ) || p_block->i_buffer < sizeof(uint16_t) ) { block_Release( p_block ); return VLCDEC_SUCCESS; } uint8_t *p_buf = p_block->p_buffer; /* Read our raw string and create the styled segment for HTML */ uint16_t i_psz_bytelength = GetWBE( p_buf ); const uint8_t *p_pszstart = p_block->p_buffer + sizeof(uint16_t); char *psz_subtitle; if ( i_psz_bytelength > 2 && ( !memcmp( p_pszstart, "\xFE\xFF", 2 ) || !memcmp( p_pszstart, "\xFF\xFE", 2 ) ) ) { psz_subtitle = FromCharset( "UTF-16", p_pszstart, i_psz_bytelength ); if ( !psz_subtitle ) return VLCDEC_SUCCESS; } else { psz_subtitle = malloc( i_psz_bytelength + 1 ); if ( !psz_subtitle ) return VLCDEC_SUCCESS; memcpy( psz_subtitle, p_pszstart, i_psz_bytelength ); psz_subtitle[ i_psz_bytelength ] = '\0'; } p_buf += i_psz_bytelength + sizeof(uint16_t); for( uint16_t i=0; i < i_psz_bytelength; i++ ) if ( psz_subtitle[i] == '\r' ) psz_subtitle[i] = '\n'; tx3g_segment_t *p_segment3g = tx3g_segment_New( psz_subtitle ); p_segment3g->i_size = str8len( psz_subtitle ); if ( p_dec->fmt_in.subs.p_style ) p_segment3g->s->style = text_style_Duplicate( p_dec->fmt_in.subs.p_style ); free( psz_subtitle ); if ( !p_segment3g->s->psz_text ) { text_segment_Delete( p_segment3g->s ); free( p_segment3g ); return VLCDEC_SUCCESS; } /* Create the subpicture unit */ p_spu = decoder_NewSubpictureText( p_dec ); if( !p_spu ) { text_segment_Delete( p_segment3g->s ); free( p_segment3g ); return VLCDEC_SUCCESS; } subpicture_updater_sys_t *p_spu_sys = p_spu->updater.p_sys; /* Parse our styles */ while( (size_t)(p_buf - p_block->p_buffer) + 8 < p_block->i_buffer ) { uint32_t i_atomsize = GetDWBE( p_buf ); vlc_fourcc_t i_atomtype = VLC_FOURCC(p_buf[4],p_buf[5],p_buf[6],p_buf[7]); p_buf += 8; switch( i_atomtype ) { case VLC_FOURCC('s','t','y','l'): { if ( (size_t)(p_buf - p_block->p_buffer) < 14 ) break; uint16_t i_nbrecords = GetWBE(p_buf); uint16_t i_cur_record = 0; p_buf += 2; while( i_cur_record++ < i_nbrecords ) { if ( (size_t)(p_buf - p_block->p_buffer) < 12 ) break; uint16_t i_start = __MIN( GetWBE(p_buf), i_psz_bytelength - 1 ); uint16_t i_end = __MIN( GetWBE(p_buf + 2), i_psz_bytelength - 1 ); text_style_t style; memset( &style, 0, sizeof(text_style_t) ); style.i_style_flags = ConvertFlags( p_buf[6] ); style.i_font_size = p_buf[7]; style.i_font_color = GetDWBE(p_buf+8) >> 8;// RGBA -> RGB style.i_font_alpha = GetDWBE(p_buf+8) & 0xFF; style.i_features = STYLE_HAS_FONT_COLOR | STYLE_HAS_FONT_ALPHA; ApplySegmentStyle( &p_segment3g, i_start, i_end, &style ); if ( i_nbrecords == 1 ) { if ( p_buf[6] ) { if( (p_spu_sys->p_default_style->i_style_flags = ConvertFlags( p_buf[6] )) ) p_spu_sys->p_default_style->i_features |= STYLE_HAS_FLAGS; } p_spu_sys->p_default_style->i_font_size = p_buf[7]; p_spu_sys->p_default_style->i_font_color = GetDWBE(p_buf+8) >> 8;// RGBA -> ARGB p_spu_sys->p_default_style->i_font_alpha = (GetDWBE(p_buf+8) & 0xFF) << 24; p_spu_sys->p_default_style->i_features |= (STYLE_HAS_FONT_COLOR | STYLE_HAS_FONT_ALPHA); } p_buf += 12; } } break; case VLC_FOURCC('d','r','p','o'): if ( (size_t)(p_buf - p_block->p_buffer) < 4 ) break; p_spu_sys->p_default_style->i_shadow_width = __MAX( GetWBE(p_buf), GetWBE(p_buf+2) ); break; case VLC_FOURCC('d','r','p','t'): if ( (size_t)(p_buf - p_block->p_buffer) < 2 ) break; p_spu_sys->p_default_style->i_shadow_alpha = GetWBE(p_buf); p_spu_sys->p_default_style->i_features |= STYLE_HAS_SHADOW_ALPHA; break; default: break; } p_buf += i_atomsize; } p_spu->i_start = p_block->i_pts; p_spu->i_stop = p_block->i_pts + p_block->i_length; p_spu->b_ephemer = (p_block->i_length == 0); p_spu->b_absolute = false; p_spu_sys->region.inner_align = SUBPICTURE_ALIGN_BOTTOM; FontSizeConvert( p_dec->fmt_in.subs.p_style, p_spu_sys->p_default_style ); /* Unwrap */ text_segment_t *p_text_segments = p_segment3g->s; text_segment_t *p_cur = p_text_segments; while( p_segment3g ) { FontSizeConvert( p_dec->fmt_in.subs.p_style, p_segment3g->s->style ); tx3g_segment_t * p_old = p_segment3g; p_segment3g = p_segment3g->p_next3g; free( p_old ); if( p_segment3g ) p_cur->p_next = p_segment3g->s; p_cur = p_cur->p_next; } p_spu_sys->region.p_segments = p_text_segments; block_Release( p_block ); decoder_QueueSub( p_dec, p_spu ); return VLCDEC_SUCCESS; }