char * atsc_a65_Decode_simple_UTF16_string( atsc_a65_handle_t *p_handle, const uint8_t *p_buffer, size_t i_buffer ) { if( i_buffer < 1 ) return NULL; if( !p_handle->iconv_u16be ) { if ( !(p_handle->iconv_u16be = vlc_iconv_open("UTF-8", "UTF-16BE")) ) return NULL; } else if ( VLC_ICONV_ERR == vlc_iconv( p_handle->iconv_u16be, NULL, NULL, NULL, NULL ) ) /* reset */ { return NULL; } const size_t i_target_buffer = i_buffer * 3 / 2; size_t i_target_remaining = i_target_buffer; const char *psz_toconvert = (const char *) p_buffer; char *psz_converted_end; char *psz_converted = psz_converted_end = malloc( i_target_buffer ); if( unlikely(!psz_converted) ) return NULL; if( VLC_ICONV_ERR == vlc_iconv( p_handle->iconv_u16be, &psz_toconvert, &i_buffer, &psz_converted_end, &i_target_remaining ) ) { free( psz_converted ); psz_converted = NULL; } psz_converted[ i_target_buffer - i_target_remaining - 1 ] = 0; return psz_converted; }
void var_buffer_addUTF16( var_buffer_t *p_buf, const char *p_str ) { unsigned int i; if( !p_str ) { var_buffer_add16( p_buf, 0 ); } else { vlc_iconv_t iconv_handle; size_t i_in = strlen( p_str ); size_t i_out = i_in * 4; char *psz_out, *psz_tmp; psz_out = psz_tmp = malloc( i_out + 1 ); iconv_handle = vlc_iconv_open( "UTF-16LE", "UTF-8" ); vlc_iconv( iconv_handle, &p_str, &i_in, &psz_tmp, &i_out ); vlc_iconv_close( iconv_handle ); psz_tmp[0] = '\0'; psz_tmp[1] = '\0'; for( i = 0; ; i += 2 ) { uint16_t v = GetWLE( &psz_out[i] ); var_buffer_add16( p_buf, v ); if( !v ) break; } free( psz_out ); } }
/***************************************************************************** * OpenDecoder: probe the decoder and return score ***************************************************************************** * Tries to launch a decoder and return score so that the interface is able * to chose. *****************************************************************************/ static int OpenDecoder( vlc_object_t *p_this ) { decoder_t *p_dec = (decoder_t*)p_this; decoder_sys_t *p_sys; switch( p_dec->fmt_in.i_codec ) { case VLC_CODEC_SUBT: case VLC_CODEC_ITU_T140: break; default: return VLC_EGENERIC; } p_dec->pf_decode = DecodeBlock; p_dec->fmt_out.i_cat = SPU_ES; p_dec->fmt_out.i_codec = 0; /* Allocate the memory needed to store the decoder's structure */ p_dec->p_sys = p_sys = calloc( 1, sizeof( *p_sys ) ); if( p_sys == NULL ) return VLC_ENOMEM; /* init of p_sys */ p_sys->i_align = 0; p_sys->iconv_handle = (vlc_iconv_t)-1; p_sys->b_autodetect_utf8 = false; const char *encoding; char *var = NULL; /* First try demux-specified encoding */ if( p_dec->fmt_in.i_codec == VLC_CODEC_ITU_T140 ) encoding = "UTF-8"; /* IUT T.140 is always using UTF-8 */ else if( p_dec->fmt_in.subs.psz_encoding && *p_dec->fmt_in.subs.psz_encoding ) { encoding = p_dec->fmt_in.subs.psz_encoding; msg_Dbg (p_dec, "trying demuxer-specified character encoding: %s", encoding); } else { /* Second, try configured encoding */ if ((var = var_InheritString (p_dec, "subsdec-encoding")) != NULL) { msg_Dbg (p_dec, "trying configured character encoding: %s", var); if (!strcmp (var, "system")) { free (var); var = NULL; encoding = ""; /* ^ iconv() treats "" as nl_langinfo(CODESET) */ } else encoding = var; } else /* Third, try "local" encoding */ { /* xgettext: The Windows ANSI code page most commonly used for this language. VLC uses this as a guess of the subtitle files character set (if UTF-8 and UTF-16 autodetection fails). Western European languages normally use "CP1252", which is a Microsoft-variant of ISO 8859-1. That suits the Latin alphabet. Other scripts use other code pages. This MUST be a valid iconv character set. If unsure, please refer the VideoLAN translators mailing list. */ encoding = vlc_pgettext("GetACP", "CP1252"); msg_Dbg (p_dec, "trying default character encoding: %s", encoding); } /* Check UTF-8 autodetection */ if (var_InheritBool (p_dec, "subsdec-autodetect-utf8")) { msg_Dbg (p_dec, "using automatic UTF-8 detection"); p_sys->b_autodetect_utf8 = true; } } if (strcasecmp (encoding, "UTF-8") && strcasecmp (encoding, "utf8")) { p_sys->iconv_handle = vlc_iconv_open ("UTF-8", encoding); if (p_sys->iconv_handle == (vlc_iconv_t)(-1)) msg_Err (p_dec, "cannot convert from %s: %s", encoding, vlc_strerror_c(errno)); } free (var); p_sys->i_align = var_InheritInteger( p_dec, "subsdec-align" ); return VLC_SUCCESS; }
char *stream_ReadLine( stream_t *s ) { stream_priv_t *priv = (stream_priv_t *)s; char *p_line = NULL; int i_line = 0, i_read = 0; /* Let's fail quickly if this is a readdir access */ if( s->pf_read == NULL ) return NULL; for( ;; ) { char *psz_eol; const uint8_t *p_data; int i_data; int64_t i_pos; /* Probe new data */ i_data = stream_Peek( s, &p_data, STREAM_PROBE_LINE ); if( i_data <= 0 ) break; /* No more data */ /* BOM detection */ i_pos = stream_Tell( s ); if( i_pos == 0 && i_data >= 2 ) { const char *psz_encoding = NULL; if( !memcmp( p_data, "\xFF\xFE", 2 ) ) { psz_encoding = "UTF-16LE"; priv->text.little_endian = true; } else if( !memcmp( p_data, "\xFE\xFF", 2 ) ) { psz_encoding = "UTF-16BE"; } /* Open the converter if we need it */ if( psz_encoding != NULL ) { msg_Dbg( s, "UTF-16 BOM detected" ); priv->text.char_width = 2; priv->text.conv = vlc_iconv_open( "UTF-8", psz_encoding ); if( priv->text.conv == (vlc_iconv_t)-1 ) msg_Err( s, "iconv_open failed" ); } } if( i_data % priv->text.char_width ) { /* keep i_char_width boundary */ i_data = i_data - ( i_data % priv->text.char_width ); msg_Warn( s, "the read is not i_char_width compatible"); } if( i_data == 0 ) break; /* Check if there is an EOL */ if( priv->text.char_width == 1 ) { /* UTF-8: 0A <LF> */ psz_eol = memchr( p_data, '\n', i_data ); if( psz_eol == NULL ) /* UTF-8: 0D <CR> */ psz_eol = memchr( p_data, '\r', i_data ); } else { const uint8_t *p_last = p_data + i_data - priv->text.char_width; uint16_t eol = priv->text.little_endian ? 0x0A00 : 0x00A0; assert( priv->text.char_width == 2 ); psz_eol = NULL; /* UTF-16: 000A <LF> */ for( const uint8_t *p = p_data; p <= p_last; p += 2 ) { if( U16_AT( p ) == eol ) { psz_eol = (char *)p + 1; break; } } if( psz_eol == NULL ) { /* UTF-16: 000D <CR> */ eol = priv->text.little_endian ? 0x0D00 : 0x00D0; for( const uint8_t *p = p_data; p <= p_last; p += 2 ) { if( U16_AT( p ) == eol ) { psz_eol = (char *)p + 1; break; } } } } if( psz_eol ) { i_data = (psz_eol - (char *)p_data) + 1; p_line = realloc_or_free( p_line, i_line + i_data + priv->text.char_width ); /* add \0 */ if( !p_line ) goto error; i_data = stream_Read( s, &p_line[i_line], i_data ); if( i_data <= 0 ) break; /* Hmmm */ i_line += i_data - priv->text.char_width; /* skip \n */; i_read += i_data; /* We have our line */ break; } /* Read data (+1 for easy \0 append) */ p_line = realloc_or_free( p_line, i_line + STREAM_PROBE_LINE + priv->text.char_width ); if( !p_line ) goto error; i_data = stream_Read( s, &p_line[i_line], STREAM_PROBE_LINE ); if( i_data <= 0 ) break; /* Hmmm */ i_line += i_data; i_read += i_data; if( i_read >= STREAM_LINE_MAX ) goto error; /* line too long */ } if( i_read > 0 ) { memset(p_line + i_line, 0, priv->text.char_width); i_line += priv->text.char_width; /* the added \0 */ if( priv->text.char_width > 1 ) { int i_new_line = 0; size_t i_in = 0, i_out = 0; const char * p_in = NULL; char * p_out = NULL; char * psz_new_line = NULL; /* iconv */ /* UTF-8 needs at most 150% of the buffer as many as UTF-16 */ i_new_line = i_line * 3 / 2; psz_new_line = malloc( i_new_line ); if( psz_new_line == NULL ) goto error; i_in = (size_t)i_line; i_out = (size_t)i_new_line; p_in = p_line; p_out = psz_new_line; if( vlc_iconv( priv->text.conv, &p_in, &i_in, &p_out, &i_out ) == (size_t)-1 ) { msg_Err( s, "iconv failed" ); msg_Dbg( s, "original: %d, in %d, out %d", i_line, (int)i_in, (int)i_out ); } free( p_line ); p_line = psz_new_line; i_line = (size_t)i_new_line - i_out; /* does not include \0 */ } /* Remove trailing LF/CR */ while( i_line >= 2 && ( p_line[i_line-2] == '\r' || p_line[i_line-2] == '\n') ) i_line--; /* Make sure the \0 is there */ p_line[i_line-1] = '\0'; return p_line; } error: /* We failed to read any data, probably EOF */ free( p_line ); /* */ if( priv->text.conv != (vlc_iconv_t)(-1) ) { vlc_iconv_close( priv->text.conv ); priv->text.conv = (vlc_iconv_t)(-1); } return NULL; }
/** * This function receives a string and creates a subpicture for it. It * also calculates the size needed for this string, and renders the * needed glyphs into memory. It is used as pf_add_string callback in * the vout method by this module */ static subpicture_t *RenderText( filter_t *p_filter, block_t *p_block ) { filter_sys_t *p_sys = p_filter->p_sys; subpicture_t *p_subpic = 0; subpicture_data_t *p_string = 0; line_desc_t *p_line = 0, *p_next = 0, *p_prev = 0; int i, i_pen_y, i_pen_x, i_error, i_glyph_index, i_previous; uint32_t *psz_unicode, *psz_unicode_orig = 0, i_char, *psz_line_start; int i_string_length; char *psz_string; vlc_iconv_t iconv_handle = (vlc_iconv_t)(-1); FT_BBox line; FT_BBox glyph_size; FT_Vector result; FT_Glyph tmp_glyph; /* Sanity check */ if( !p_block ) return NULL; psz_string = p_block->p_buffer; if( !psz_string || !*psz_string ) goto error; result.x = 0; result.y = 0; line.xMin = 0; line.xMax = 0; line.yMin = 0; line.yMax = 0; /* Create and initialize a subpicture */ p_subpic = p_filter->pf_sub_buffer_new( p_filter ); if( !p_subpic ) goto error; p_subpic->i_start = p_block->i_pts; p_subpic->i_stop = p_block->i_pts + p_block->i_length; p_subpic->b_ephemer = (p_block->i_length == 0); p_subpic->b_absolute = VLC_FALSE; /* Create and initialize private data for the subpicture */ p_string = malloc( sizeof(subpicture_data_t) ); if( !p_string ) { msg_Err( p_filter, "out of memory" ); goto error; } p_string->p_lines = 0; p_string->psz_text = strdup( psz_string ); psz_unicode = psz_unicode_orig = malloc( ( strlen(psz_string) + 1 ) * sizeof(uint32_t) ); if( psz_unicode == NULL ) { msg_Err( p_filter, "out of memory" ); goto error; } #if defined(WORDS_BIGENDIAN) iconv_handle = vlc_iconv_open( "UCS-4BE", "UTF-8" ); #else iconv_handle = vlc_iconv_open( "UCS-4LE", "UTF-8" ); #endif if( iconv_handle == (vlc_iconv_t)-1 ) { msg_Warn( p_filter, "unable to do convertion" ); goto error; } { char *p_in_buffer, *p_out_buffer; size_t i_in_bytes, i_out_bytes, i_out_bytes_left, i_ret; i_in_bytes = strlen( psz_string ); i_out_bytes = i_in_bytes * sizeof( uint32_t ); i_out_bytes_left = i_out_bytes; p_in_buffer = psz_string; p_out_buffer = (char *)psz_unicode; i_ret = vlc_iconv( iconv_handle, &p_in_buffer, &i_in_bytes, &p_out_buffer, &i_out_bytes_left ); vlc_iconv_close( iconv_handle ); if( i_in_bytes ) { msg_Warn( p_filter, "failed to convert string to unicode (%s), " "bytes left %d", strerror(errno), i_in_bytes ); goto error; } *(uint32_t*)p_out_buffer = 0; i_string_length = (i_out_bytes - i_out_bytes_left) / sizeof(uint32_t); } #if defined(HAVE_FRIBIDI) { uint32_t *p_fribidi_string; FriBidiCharType base_dir = FRIBIDI_TYPE_ON; p_fribidi_string = malloc( (i_string_length + 1) * sizeof(uint32_t) ); fribidi_log2vis( (FriBidiChar*)psz_unicode, i_string_length, &base_dir, (FriBidiChar*)p_fribidi_string, 0, 0, 0 ); free( psz_unicode_orig ); psz_unicode = psz_unicode_orig = p_fribidi_string; p_fribidi_string[ i_string_length ] = 0; } #endif /* Calculate relative glyph positions and a bounding box for the * entire string */ p_line = NewLine( psz_string ); if( p_line == NULL ) { msg_Err( p_filter, "out of memory" ); goto error; } p_string->p_lines = p_line; i_pen_x = 0; i_pen_y = 0; i_previous = 0; i = 0; psz_line_start = psz_unicode; #define face p_sys->p_face #define glyph face->glyph while( *psz_unicode ) { i_char = *psz_unicode++; if( i_char == '\r' ) /* ignore CR chars wherever they may be */ { continue; } if( i_char == '\n' ) { psz_line_start = psz_unicode; p_next = NewLine( psz_string ); if( p_next == NULL ) { msg_Err( p_filter, "out of memory" ); goto error; } p_line->p_next = p_next; p_line->i_width = line.xMax; p_line->i_height = face->size->metrics.height >> 6; p_line->pp_glyphs[ i ] = NULL; p_prev = p_line; p_line = p_next; result.x = __MAX( result.x, line.xMax ); result.y += face->size->metrics.height >> 6; i_pen_x = 0; i_previous = 0; line.xMin = 0; line.xMax = 0; line.yMin = 0; line.yMax = 0; i_pen_y += face->size->metrics.height >> 6; #if 0 msg_Dbg( p_filter, "Creating new line, i is %d", i ); #endif i = 0; continue; } i_glyph_index = FT_Get_Char_Index( face, i_char ); if( p_sys->i_use_kerning && i_glyph_index && i_previous ) { FT_Vector delta; FT_Get_Kerning( face, i_previous, i_glyph_index, ft_kerning_default, &delta ); i_pen_x += delta.x >> 6; }
static bool convert_encoding_set( atsc_a65_handle_t *p_handle, const uint8_t *p_src, size_t i_src, char **ppsz_merg, size_t *pi_mergmin1, uint8_t i_mode ) { char *psz_dest = *ppsz_merg; size_t i_mergmin1 = *pi_mergmin1; bool b_ret = true; if( i_src == 0 ) return NULL; /* First exclude reserved ranges */ for( unsigned i=0; i<12; i+=2 ) { if( i_mode >= ATSC_A65_MODE_RESERVED_RANGES[i] && i_mode <= ATSC_A65_MODE_RESERVED_RANGES[i+1] ) return false; } if( i_mode == ATSC_A65_MODE_UNICODE_RANGE_START ) /* Latin 1 */ { char *psz_realloc = realloc( psz_dest, i_mergmin1 + i_src + 1 ); if( psz_realloc ) { psz_realloc[i_mergmin1 + i_src] = 0; memcpy( &psz_realloc[i_mergmin1], p_src, i_src ); psz_dest = psz_realloc; i_mergmin1 += i_src; } else return false; } else if( i_mode > ATSC_A65_MODE_UNICODE_RANGE_START && /* 8 range prefix + 8 */ i_mode <= ATSC_A65_MODE_UNICODE_RANGE_END ) { if( !p_handle->iconv_u16be ) { if ( !(p_handle->iconv_u16be = vlc_iconv_open("UTF-8", "UTF-16BE")) ) return false; } else if ( VLC_ICONV_ERR == vlc_iconv( p_handle->iconv_u16be, NULL, NULL, NULL, NULL ) ) /* reset */ { return false; } char *psz16 = enlarge_to16( p_src, i_src, i_mode ); /* Maybe we can skip and feed iconv 2 by 2 */ if( psz16 ) { char *psz_realloc = realloc( psz_dest, i_mergmin1 + (4 * i_src) + 1 ); if( psz_realloc ) { const char *p_inbuf = psz16; char *p_outbuf = &psz_realloc[i_mergmin1]; const size_t i_outbuf_size = i_src * 4; size_t i_inbuf_remain = i_src * 2; size_t i_outbuf_remain = i_outbuf_size; b_ret = ( VLC_ICONV_ERR != vlc_iconv( p_handle->iconv_u16be, &p_inbuf, &i_inbuf_remain, &p_outbuf, &i_outbuf_remain ) ); psz_dest = psz_realloc; i_mergmin1 += (i_outbuf_size - i_outbuf_remain); psz_dest[i_mergmin1 - 1] = 0; } free( psz16 ); } else return false; } else { /* Unsupported encodings */ return false; } *ppsz_merg = psz_dest; *pi_mergmin1 = i_mergmin1; return b_ret; }
/***************************************************************************** * OpenDecoder: probe the decoder and return score ***************************************************************************** * Tries to launch a decoder and return score so that the interface is able * to chose. *****************************************************************************/ static int OpenDecoder( vlc_object_t *p_this ) { decoder_t *p_dec = (decoder_t*)p_this; decoder_sys_t *p_sys; vlc_value_t val; if( p_dec->fmt_in.i_codec != VLC_FOURCC('s','u','b','t') && p_dec->fmt_in.i_codec != VLC_FOURCC('s','s','a',' ') ) { return VLC_EGENERIC; } p_dec->pf_decode_sub = DecodeBlock; /* Allocate the memory needed to store the decoder's structure */ if( ( p_dec->p_sys = p_sys = (decoder_sys_t *)malloc(sizeof(decoder_sys_t)) ) == NULL ) { msg_Err( p_dec, "out of memory" ); return VLC_ENOMEM; } /* init of p_sys */ p_sys->i_align = 0; p_sys->iconv_handle = (vlc_iconv_t)-1; p_sys->b_autodetect_utf8 = VLC_FALSE; p_sys->b_ass = VLC_FALSE; p_sys->i_original_height = -1; p_sys->i_original_width = -1; p_sys->pp_ssa_styles = NULL; p_sys->i_ssa_styles = 0; if( p_dec->fmt_in.subs.psz_encoding && *p_dec->fmt_in.subs.psz_encoding ) { msg_Dbg( p_dec, "using demux suggested character encoding: %s", p_dec->fmt_in.subs.psz_encoding ); if( strcmp( p_dec->fmt_in.subs.psz_encoding, "UTF-8" ) ) p_sys->iconv_handle = vlc_iconv_open( "UTF-8", p_dec->fmt_in.subs.psz_encoding ); } else { var_Create( p_dec, "subsdec-encoding", VLC_VAR_STRING | VLC_VAR_DOINHERIT ); var_Get( p_dec, "subsdec-encoding", &val ); if( !strcmp( val.psz_string, DEFAULT_NAME ) ) { const char *psz_charset = GetFallbackEncoding(); p_sys->b_autodetect_utf8 = var_CreateGetBool( p_dec, "subsdec-autodetect-utf8" ); p_sys->iconv_handle = vlc_iconv_open( "UTF-8", psz_charset ); msg_Dbg( p_dec, "using fallback character encoding: %s", psz_charset ); } else if( !strcmp( val.psz_string, "UTF-8" ) ) { msg_Dbg( p_dec, "using enforced character encoding: UTF-8" ); } else if( val.psz_string ) { msg_Dbg( p_dec, "using enforced character encoding: %s", val.psz_string ); p_sys->iconv_handle = vlc_iconv_open( "UTF-8", val.psz_string ); if( p_sys->iconv_handle == (vlc_iconv_t)-1 ) { msg_Warn( p_dec, "unable to do requested conversion" ); } } if( val.psz_string ) free( val.psz_string ); } var_Create( p_dec, "subsdec-align", VLC_VAR_INTEGER | VLC_VAR_DOINHERIT ); var_Get( p_dec, "subsdec-align", &val ); p_sys->i_align = val.i_int; if( p_dec->fmt_in.i_codec == VLC_FOURCC('s','s','a',' ') && var_CreateGetBool( p_dec, "subsdec-formatted" ) ) { if( p_dec->fmt_in.i_extra > 0 ) ParseSSAHeader( p_dec ); } return VLC_SUCCESS; }
char *nscdec( vlc_object_t *p_demux, char* p_encoded ) { unsigned int i; unsigned char tmp; unsigned char j, k; unsigned int length; unsigned char encoding_type; vlc_iconv_t conv; size_t buf16_size; unsigned char *buf16; char *p_buf16; size_t buf8_size; char *buf8; char *p_buf8; char *p_input = p_encoded; if( strlen( p_input ) < 15 ) { msg_Err( p_demux, "input string less than 15 characters" ); return NULL; } if( load_byte( 1, &encoding_type, &p_input, NULL, NULL ) ) { msg_Err( p_demux, "unable to get NSC encoding type" ); return NULL; } if( encoding_type != 1 && encoding_type != 2 ) { msg_Err( p_demux, "encoding type %d is not supported", encoding_type ); return NULL; } j = k = 0; if( load_byte( encoding_type, &tmp, &p_input, &j, &k ) ) { msg_Err( p_demux, "load_byte failed" ); return NULL; } for( i = 0; i < 4; i++ ) { if( load_byte( encoding_type, &tmp, &p_input, &j, &k ) ) { msg_Err( p_demux, "load_byte failed" ); return NULL; } } length = 0; for( i = 4; i; i-- ) { if( load_byte( encoding_type, &tmp, &p_input, &j, &k ) ) { msg_Err( p_demux, "load_byte failed" ); return NULL; } length |= tmp << ((i - 1) * 8); } if( length == 0 ) { msg_Err( p_demux, "Length is 0" ); return NULL; } buf16_size = length; buf16 = (unsigned char *)malloc( buf16_size ); if( buf16 == NULL ) { msg_Err( p_demux, "out of memory" ); return NULL; } for( i = 0; i < length; i++ ) { if( load_byte( encoding_type, &buf16[ i ], &p_input, &j, &k ) ) { msg_Err( p_demux, "load_byte failed" ); free( (void *)buf16 ); return NULL; } } buf8_size = length; buf8 = (char *)malloc( buf8_size + 1 ); if( buf8 == NULL ) { msg_Err( p_demux, "out of memory" ); free( (void *)buf16 ); return NULL; } conv = vlc_iconv_open( "UTF-8", "UTF-16LE" ); if( conv == (vlc_iconv_t)-1 ) { msg_Err( p_demux, "iconv_open failed" ); free( (void *)buf16 ); free( (void *)buf8 ); return NULL; } p_buf8 = &buf8[ 0 ]; p_buf16 = (char *)&buf16[ 0 ]; if( vlc_iconv( conv, &p_buf16, &buf16_size, &p_buf8, &buf8_size ) < 0 ) { msg_Err( p_demux, "iconv failed" ); return NULL; } else { buf8[ length - buf8_size ] = '\0'; } vlc_iconv_close( conv ); free( (void *)buf16 ); return buf8; }
/***************************************************************************** * OpenDecoder: probe the decoder and return score ***************************************************************************** * Tries to launch a decoder and return score so that the interface is able * to chose. *****************************************************************************/ static int OpenDecoder( vlc_object_t *p_this ) { decoder_t *p_dec = (decoder_t*)p_this; decoder_sys_t *p_sys; switch( p_dec->fmt_in.i_codec ) { case VLC_CODEC_SUBT: case VLC_CODEC_SSA: case VLC_CODEC_ITU_T140: break; default: return VLC_EGENERIC; } p_dec->pf_decode_sub = DecodeBlock; p_dec->fmt_out.i_cat = SPU_ES; p_dec->fmt_out.i_codec = 0; /* Allocate the memory needed to store the decoder's structure */ p_dec->p_sys = p_sys = calloc( 1, sizeof( *p_sys ) ); if( p_sys == NULL ) return VLC_ENOMEM; /* init of p_sys */ p_sys->i_align = 0; p_sys->iconv_handle = (vlc_iconv_t)-1; p_sys->b_autodetect_utf8 = false; p_sys->b_ass = false; p_sys->i_original_height = -1; p_sys->i_original_width = -1; TAB_INIT( p_sys->i_ssa_styles, p_sys->pp_ssa_styles ); TAB_INIT( p_sys->i_images, p_sys->pp_images ); char *psz_charset = NULL; /* First try demux-specified encoding */ if( p_dec->fmt_in.i_codec == VLC_CODEC_ITU_T140 ) psz_charset = strdup( "UTF-8" ); /* IUT T.140 is always using UTF-8 */ else if( p_dec->fmt_in.subs.psz_encoding && *p_dec->fmt_in.subs.psz_encoding ) { psz_charset = strdup (p_dec->fmt_in.subs.psz_encoding); msg_Dbg (p_dec, "trying demuxer-specified character encoding: %s", p_dec->fmt_in.subs.psz_encoding ? p_dec->fmt_in.subs.psz_encoding : "not specified"); } /* Second, try configured encoding */ if (psz_charset == NULL) { psz_charset = var_InheritString (p_dec, "subsdec-encoding"); msg_Dbg (p_dec, "trying configured character encoding: %s", psz_charset ? psz_charset : "not specified"); if (psz_charset != NULL && !strcmp (psz_charset, "system")) { free (psz_charset); psz_charset = strdup (""); /* ^ iconv() treats "" as nl_langinfo(CODESET) */ } } /* Third, try "local" encoding with optional UTF-8 autodetection */ if (psz_charset == NULL) { /* xgettext: The Windows ANSI code page most commonly used for this language. VLC uses this as a guess of the subtitle files character set (if UTF-8 and UTF-16 autodetection fails). Western European languages normally use "CP1252", which is a Microsoft-variant of ISO 8859-1. That suits the Latin alphabet. Other scripts use other code pages. This MUST be a valid iconv character set. If unsure, please refer the VideoLAN translators mailing list. */ const char *acp = vlc_pgettext("GetACP", "CP1252"); psz_charset = strdup (acp); msg_Dbg (p_dec, "trying default character encoding: %s", psz_charset ? psz_charset : "not specified"); if (var_InheritBool (p_dec, "subsdec-autodetect-utf8")) { msg_Dbg (p_dec, "using automatic UTF-8 detection"); p_sys->b_autodetect_utf8 = true; } } /* Forth, don't do character decoding, i.e. assume UTF-8 */ if (psz_charset == NULL) { psz_charset = strdup ("UTF-8"); msg_Dbg (p_dec, "using UTF-8 character encoding" ); } if ((psz_charset != NULL) && strcasecmp (psz_charset, "UTF-8") && strcasecmp (psz_charset, "utf8")) { p_sys->iconv_handle = vlc_iconv_open ("UTF-8", psz_charset); if (p_sys->iconv_handle == (vlc_iconv_t)(-1)) msg_Err (p_dec, "cannot convert from %s: %m", psz_charset); } free (psz_charset); p_sys->i_align = var_InheritInteger( p_dec, "subsdec-align" ); if( p_dec->fmt_in.i_codec == VLC_CODEC_SSA && var_InheritBool( p_dec, "subsdec-formatted" ) ) { if( p_dec->fmt_in.i_extra > 0 ) ParseSSAHeader( p_dec ); } return VLC_SUCCESS; }