Beispiel #1
0
char * atsc_a65_Decode_simple_UTF16_string( atsc_a65_handle_t *p_handle, const uint8_t *p_buffer, size_t i_buffer )
{
    if( i_buffer < 1 )
        return NULL;

    if( !p_handle->iconv_u16be )
    {
        if ( !(p_handle->iconv_u16be = vlc_iconv_open("UTF-8", "UTF-16BE")) )
            return NULL;
    }
    else if ( VLC_ICONV_ERR == vlc_iconv( p_handle->iconv_u16be, NULL, NULL, NULL, NULL ) ) /* reset */
    {
        return NULL;
    }

    const size_t i_target_buffer = i_buffer * 3 / 2;
    size_t i_target_remaining = i_target_buffer;
    const char *psz_toconvert = (const char *) p_buffer;
    char *psz_converted_end;
    char *psz_converted = psz_converted_end = malloc( i_target_buffer );

    if( unlikely(!psz_converted) )
        return NULL;

    if( VLC_ICONV_ERR == vlc_iconv( p_handle->iconv_u16be, &psz_toconvert, &i_buffer,
                                                           &psz_converted_end, &i_target_remaining ) )
    {
        free( psz_converted );
        psz_converted = NULL;
    }

    psz_converted[ i_target_buffer - i_target_remaining - 1 ] = 0;
    return psz_converted;
}
Beispiel #2
0
void var_buffer_addUTF16( var_buffer_t *p_buf, const char *p_str )
{
    unsigned int i;
    if( !p_str )
    {
        var_buffer_add16( p_buf, 0 );
    }
    else
    {
        vlc_iconv_t iconv_handle;
        size_t i_in = strlen( p_str );
        size_t i_out = i_in * 4;
        char *psz_out, *psz_tmp;

        psz_out = psz_tmp = malloc( i_out + 1 );
        iconv_handle = vlc_iconv_open( "UTF-16LE", "UTF-8" );
        vlc_iconv( iconv_handle, &p_str, &i_in, &psz_tmp, &i_out );
        vlc_iconv_close( iconv_handle );
        psz_tmp[0] = '\0';
        psz_tmp[1] = '\0';

        for( i = 0; ; i += 2 )
        {
            uint16_t v = GetWLE( &psz_out[i] );
            var_buffer_add16( p_buf, v );
            if( !v )
                break;
        }
        free( psz_out );
    }
}
Beispiel #3
0
/*****************************************************************************
 * OpenDecoder: probe the decoder and return score
 *****************************************************************************
 * Tries to launch a decoder and return score so that the interface is able
 * to chose.
 *****************************************************************************/
static int OpenDecoder( vlc_object_t *p_this )
{
    decoder_t     *p_dec = (decoder_t*)p_this;
    decoder_sys_t *p_sys;

    switch( p_dec->fmt_in.i_codec )
    {
        case VLC_CODEC_SUBT:
        case VLC_CODEC_ITU_T140:
            break;
        default:
            return VLC_EGENERIC;
    }

    p_dec->pf_decode = DecodeBlock;
    p_dec->fmt_out.i_cat = SPU_ES;
    p_dec->fmt_out.i_codec = 0;

    /* Allocate the memory needed to store the decoder's structure */
    p_dec->p_sys = p_sys = calloc( 1, sizeof( *p_sys ) );
    if( p_sys == NULL )
        return VLC_ENOMEM;

    /* init of p_sys */
    p_sys->i_align = 0;
    p_sys->iconv_handle = (vlc_iconv_t)-1;
    p_sys->b_autodetect_utf8 = false;

    const char *encoding;
    char *var = NULL;

    /* First try demux-specified encoding */
    if( p_dec->fmt_in.i_codec == VLC_CODEC_ITU_T140 )
        encoding = "UTF-8"; /* IUT T.140 is always using UTF-8 */
    else
    if( p_dec->fmt_in.subs.psz_encoding && *p_dec->fmt_in.subs.psz_encoding )
    {
        encoding = p_dec->fmt_in.subs.psz_encoding;
        msg_Dbg (p_dec, "trying demuxer-specified character encoding: %s",
                 encoding);
    }
    else
    {
        /* Second, try configured encoding */
        if ((var = var_InheritString (p_dec, "subsdec-encoding")) != NULL)
        {
            msg_Dbg (p_dec, "trying configured character encoding: %s", var);
            if (!strcmp (var, "system"))
            {
                free (var);
                var = NULL;
                encoding = "";
                /* ^ iconv() treats "" as nl_langinfo(CODESET) */
            }
            else
                encoding = var;
        }
        else
        /* Third, try "local" encoding */
        {
        /* xgettext:
           The Windows ANSI code page most commonly used for this language.
           VLC uses this as a guess of the subtitle files character set
           (if UTF-8 and UTF-16 autodetection fails).
           Western European languages normally use "CP1252", which is a
           Microsoft-variant of ISO 8859-1. That suits the Latin alphabet.
           Other scripts use other code pages.

           This MUST be a valid iconv character set. If unsure, please refer
           the VideoLAN translators mailing list. */
            encoding = vlc_pgettext("GetACP", "CP1252");
            msg_Dbg (p_dec, "trying default character encoding: %s", encoding);
        }

        /* Check UTF-8 autodetection */
        if (var_InheritBool (p_dec, "subsdec-autodetect-utf8"))
        {
            msg_Dbg (p_dec, "using automatic UTF-8 detection");
            p_sys->b_autodetect_utf8 = true;
        }
    }

    if (strcasecmp (encoding, "UTF-8") && strcasecmp (encoding, "utf8"))
    {
        p_sys->iconv_handle = vlc_iconv_open ("UTF-8", encoding);
        if (p_sys->iconv_handle == (vlc_iconv_t)(-1))
            msg_Err (p_dec, "cannot convert from %s: %s", encoding,
                     vlc_strerror_c(errno));
    }
    free (var);

    p_sys->i_align = var_InheritInteger( p_dec, "subsdec-align" );

    return VLC_SUCCESS;
}
Beispiel #4
0
char *stream_ReadLine( stream_t *s )
{
    stream_priv_t *priv = (stream_priv_t *)s;
    char *p_line = NULL;
    int i_line = 0, i_read = 0;

    /* Let's fail quickly if this is a readdir access */
    if( s->pf_read == NULL )
        return NULL;

    for( ;; )
    {
        char *psz_eol;
        const uint8_t *p_data;
        int i_data;
        int64_t i_pos;

        /* Probe new data */
        i_data = stream_Peek( s, &p_data, STREAM_PROBE_LINE );
        if( i_data <= 0 ) break; /* No more data */

        /* BOM detection */
        i_pos = stream_Tell( s );
        if( i_pos == 0 && i_data >= 2 )
        {
            const char *psz_encoding = NULL;

            if( !memcmp( p_data, "\xFF\xFE", 2 ) )
            {
                psz_encoding = "UTF-16LE";
                priv->text.little_endian = true;
            }
            else if( !memcmp( p_data, "\xFE\xFF", 2 ) )
            {
                psz_encoding = "UTF-16BE";
            }

            /* Open the converter if we need it */
            if( psz_encoding != NULL )
            {
                msg_Dbg( s, "UTF-16 BOM detected" );
                priv->text.char_width = 2;
                priv->text.conv = vlc_iconv_open( "UTF-8", psz_encoding );
                if( priv->text.conv == (vlc_iconv_t)-1 )
                    msg_Err( s, "iconv_open failed" );
            }
        }

        if( i_data % priv->text.char_width )
        {
            /* keep i_char_width boundary */
            i_data = i_data - ( i_data % priv->text.char_width );
            msg_Warn( s, "the read is not i_char_width compatible");
        }

        if( i_data == 0 )
            break;

        /* Check if there is an EOL */
        if( priv->text.char_width == 1 )
        {
            /* UTF-8: 0A <LF> */
            psz_eol = memchr( p_data, '\n', i_data );
            if( psz_eol == NULL )
                /* UTF-8: 0D <CR> */
                psz_eol = memchr( p_data, '\r', i_data );
        }
        else
        {
            const uint8_t *p_last = p_data + i_data - priv->text.char_width;
            uint16_t eol = priv->text.little_endian ? 0x0A00 : 0x00A0;

            assert( priv->text.char_width == 2 );
            psz_eol = NULL;
            /* UTF-16: 000A <LF> */
            for( const uint8_t *p = p_data; p <= p_last; p += 2 )
            {
                if( U16_AT( p ) == eol )
                {
                     psz_eol = (char *)p + 1;
                     break;
                }
            }

            if( psz_eol == NULL )
            {   /* UTF-16: 000D <CR> */
                eol = priv->text.little_endian ? 0x0D00 : 0x00D0;
                for( const uint8_t *p = p_data; p <= p_last; p += 2 )
                {
                    if( U16_AT( p ) == eol )
                    {
                        psz_eol = (char *)p + 1;
                        break;
                    }
                }
            }
        }

        if( psz_eol )
        {
            i_data = (psz_eol - (char *)p_data) + 1;
            p_line = realloc_or_free( p_line,
                        i_line + i_data + priv->text.char_width ); /* add \0 */
            if( !p_line )
                goto error;
            i_data = stream_Read( s, &p_line[i_line], i_data );
            if( i_data <= 0 ) break; /* Hmmm */
            i_line += i_data - priv->text.char_width; /* skip \n */;
            i_read += i_data;

            /* We have our line */
            break;
        }

        /* Read data (+1 for easy \0 append) */
        p_line = realloc_or_free( p_line,
                          i_line + STREAM_PROBE_LINE + priv->text.char_width );
        if( !p_line )
            goto error;
        i_data = stream_Read( s, &p_line[i_line], STREAM_PROBE_LINE );
        if( i_data <= 0 ) break; /* Hmmm */
        i_line += i_data;
        i_read += i_data;

        if( i_read >= STREAM_LINE_MAX )
            goto error; /* line too long */
    }

    if( i_read > 0 )
    {
        memset(p_line + i_line, 0, priv->text.char_width);
        i_line += priv->text.char_width; /* the added \0 */

        if( priv->text.char_width > 1 )
        {
            int i_new_line = 0;
            size_t i_in = 0, i_out = 0;
            const char * p_in = NULL;
            char * p_out = NULL;
            char * psz_new_line = NULL;

            /* iconv */
            /* UTF-8 needs at most 150% of the buffer as many as UTF-16 */
            i_new_line = i_line * 3 / 2;
            psz_new_line = malloc( i_new_line );
            if( psz_new_line == NULL )
                goto error;
            i_in = (size_t)i_line;
            i_out = (size_t)i_new_line;
            p_in = p_line;
            p_out = psz_new_line;

            if( vlc_iconv( priv->text.conv, &p_in, &i_in, &p_out, &i_out ) == (size_t)-1 )
            {
                msg_Err( s, "iconv failed" );
                msg_Dbg( s, "original: %d, in %d, out %d", i_line, (int)i_in, (int)i_out );
            }
            free( p_line );
            p_line = psz_new_line;
            i_line = (size_t)i_new_line - i_out; /* does not include \0 */
        }

        /* Remove trailing LF/CR */
        while( i_line >= 2 && ( p_line[i_line-2] == '\r' ||
            p_line[i_line-2] == '\n') ) i_line--;

        /* Make sure the \0 is there */
        p_line[i_line-1] = '\0';

        return p_line;
    }

error:
    /* We failed to read any data, probably EOF */
    free( p_line );

    /* */
    if( priv->text.conv != (vlc_iconv_t)(-1) )
    {
        vlc_iconv_close( priv->text.conv );
        priv->text.conv = (vlc_iconv_t)(-1);
    }
    return NULL;
}
Beispiel #5
0
/**
 * This function receives a string and creates a subpicture for it. It
 * also calculates the size needed for this string, and renders the
 * needed glyphs into memory. It is used as pf_add_string callback in
 * the vout method by this module
 */
static subpicture_t *RenderText( filter_t *p_filter, block_t *p_block )
{
    filter_sys_t *p_sys = p_filter->p_sys;
    subpicture_t *p_subpic = 0;
    subpicture_data_t *p_string = 0;
    line_desc_t  *p_line = 0, *p_next = 0, *p_prev = 0;
    int i, i_pen_y, i_pen_x, i_error, i_glyph_index, i_previous;
    uint32_t *psz_unicode, *psz_unicode_orig = 0, i_char, *psz_line_start;
    int i_string_length;
    char *psz_string;
    vlc_iconv_t iconv_handle = (vlc_iconv_t)(-1);

    FT_BBox line;
    FT_BBox glyph_size;
    FT_Vector result;
    FT_Glyph tmp_glyph;

    /* Sanity check */
    if( !p_block ) return NULL;
    psz_string = p_block->p_buffer;
    if( !psz_string || !*psz_string ) goto error;

    result.x = 0;
    result.y = 0;
    line.xMin = 0;
    line.xMax = 0;
    line.yMin = 0;
    line.yMax = 0;

    /* Create and initialize a subpicture */
    p_subpic = p_filter->pf_sub_buffer_new( p_filter );
    if( !p_subpic ) goto error;

    p_subpic->i_start = p_block->i_pts;
    p_subpic->i_stop = p_block->i_pts + p_block->i_length;
    p_subpic->b_ephemer = (p_block->i_length == 0);
    p_subpic->b_absolute = VLC_FALSE;

    /* Create and initialize private data for the subpicture */
    p_string = malloc( sizeof(subpicture_data_t) );
    if( !p_string )
    {
        msg_Err( p_filter, "out of memory" );
        goto error;
    }
    p_string->p_lines = 0;
    p_string->psz_text = strdup( psz_string );

    psz_unicode = psz_unicode_orig =
        malloc( ( strlen(psz_string) + 1 ) * sizeof(uint32_t) );
    if( psz_unicode == NULL )
    {
        msg_Err( p_filter, "out of memory" );
        goto error;
    }
#if defined(WORDS_BIGENDIAN)
    iconv_handle = vlc_iconv_open( "UCS-4BE", "UTF-8" );
#else
    iconv_handle = vlc_iconv_open( "UCS-4LE", "UTF-8" );
#endif
    if( iconv_handle == (vlc_iconv_t)-1 )
    {
        msg_Warn( p_filter, "unable to do convertion" );
        goto error;
    }

    {
        char *p_in_buffer, *p_out_buffer;
        size_t i_in_bytes, i_out_bytes, i_out_bytes_left, i_ret;
        i_in_bytes = strlen( psz_string );
        i_out_bytes = i_in_bytes * sizeof( uint32_t );
        i_out_bytes_left = i_out_bytes;
        p_in_buffer = psz_string;
        p_out_buffer = (char *)psz_unicode;
        i_ret = vlc_iconv( iconv_handle, &p_in_buffer, &i_in_bytes,
                           &p_out_buffer, &i_out_bytes_left );

        vlc_iconv_close( iconv_handle );

        if( i_in_bytes )
        {
            msg_Warn( p_filter, "failed to convert string to unicode (%s), "
                      "bytes left %d", strerror(errno), i_in_bytes );
            goto error;
        }
        *(uint32_t*)p_out_buffer = 0;
        i_string_length = (i_out_bytes - i_out_bytes_left) / sizeof(uint32_t);
    }

#if defined(HAVE_FRIBIDI)
    {
        uint32_t *p_fribidi_string;
        FriBidiCharType base_dir = FRIBIDI_TYPE_ON;
        p_fribidi_string = malloc( (i_string_length + 1) * sizeof(uint32_t) );
        fribidi_log2vis( (FriBidiChar*)psz_unicode, i_string_length,
                         &base_dir, (FriBidiChar*)p_fribidi_string, 0, 0, 0 );
        free( psz_unicode_orig );
        psz_unicode = psz_unicode_orig = p_fribidi_string;
        p_fribidi_string[ i_string_length ] = 0;
    }
#endif

    /* Calculate relative glyph positions and a bounding box for the
     * entire string */
    p_line = NewLine( psz_string );
    if( p_line == NULL )
    {
        msg_Err( p_filter, "out of memory" );
        goto error;
    }
    p_string->p_lines = p_line;
    i_pen_x = 0;
    i_pen_y = 0;
    i_previous = 0;
    i = 0;
    psz_line_start = psz_unicode;

#define face p_sys->p_face
#define glyph face->glyph

    while( *psz_unicode )
    {
        i_char = *psz_unicode++;
        if( i_char == '\r' ) /* ignore CR chars wherever they may be */
        {
            continue;
        }

        if( i_char == '\n' )
        {
            psz_line_start = psz_unicode;
            p_next = NewLine( psz_string );
            if( p_next == NULL )
            {
                msg_Err( p_filter, "out of memory" );
                goto error;
            }
            p_line->p_next = p_next;
            p_line->i_width = line.xMax;
            p_line->i_height = face->size->metrics.height >> 6;
            p_line->pp_glyphs[ i ] = NULL;
            p_prev = p_line;
            p_line = p_next;
            result.x = __MAX( result.x, line.xMax );
            result.y += face->size->metrics.height >> 6;
            i_pen_x = 0;
            i_previous = 0;
            line.xMin = 0;
            line.xMax = 0;
            line.yMin = 0;
            line.yMax = 0;
            i_pen_y += face->size->metrics.height >> 6;
#if 0
            msg_Dbg( p_filter, "Creating new line, i is %d", i );
#endif
            i = 0;
            continue;
        }

        i_glyph_index = FT_Get_Char_Index( face, i_char );
        if( p_sys->i_use_kerning && i_glyph_index
            && i_previous )
        {
            FT_Vector delta;
            FT_Get_Kerning( face, i_previous, i_glyph_index,
                            ft_kerning_default, &delta );
            i_pen_x += delta.x >> 6;

        }
Beispiel #6
0
static bool convert_encoding_set( atsc_a65_handle_t *p_handle,
                                  const uint8_t *p_src, size_t i_src,
                                  char **ppsz_merg, size_t *pi_mergmin1,
                                  uint8_t i_mode )
{
    char *psz_dest = *ppsz_merg;
    size_t i_mergmin1 = *pi_mergmin1;
    bool b_ret = true;

    if( i_src == 0 )
        return NULL;

    /* First exclude reserved ranges */
    for( unsigned i=0; i<12; i+=2 )
    {
        if( i_mode >= ATSC_A65_MODE_RESERVED_RANGES[i]   &&
            i_mode <= ATSC_A65_MODE_RESERVED_RANGES[i+1] )
            return false;
    }

    if( i_mode == ATSC_A65_MODE_UNICODE_RANGE_START ) /* Latin 1 */
    {
        char *psz_realloc = realloc( psz_dest, i_mergmin1 + i_src + 1 );
        if( psz_realloc )
        {
            psz_realloc[i_mergmin1 + i_src] = 0;
            memcpy( &psz_realloc[i_mergmin1], p_src, i_src );
            psz_dest = psz_realloc;
            i_mergmin1 += i_src;
        }
        else return false;
    }
    else if( i_mode > ATSC_A65_MODE_UNICODE_RANGE_START &&  /* 8 range prefix + 8 */
             i_mode <= ATSC_A65_MODE_UNICODE_RANGE_END )
    {
        if( !p_handle->iconv_u16be )
        {
            if ( !(p_handle->iconv_u16be = vlc_iconv_open("UTF-8", "UTF-16BE")) )
                return false;
        }
        else if ( VLC_ICONV_ERR == vlc_iconv( p_handle->iconv_u16be, NULL, NULL, NULL, NULL ) ) /* reset */
        {
            return false;
        }

        char *psz16 = enlarge_to16( p_src, i_src, i_mode ); /* Maybe we can skip and feed iconv 2 by 2 */
        if( psz16 )
        {
            char *psz_realloc = realloc( psz_dest, i_mergmin1 + (4 * i_src) + 1 );
            if( psz_realloc )
            {
                const char *p_inbuf = psz16;
                char *p_outbuf = &psz_realloc[i_mergmin1];
                const size_t i_outbuf_size = i_src * 4;
                size_t i_inbuf_remain = i_src * 2;
                size_t i_outbuf_remain = i_outbuf_size;
                b_ret = ( VLC_ICONV_ERR != vlc_iconv( p_handle->iconv_u16be, &p_inbuf, &i_inbuf_remain,
                                                                            &p_outbuf, &i_outbuf_remain ) );
                psz_dest = psz_realloc;
                i_mergmin1 += (i_outbuf_size - i_outbuf_remain);
                psz_dest[i_mergmin1 - 1] = 0;
            }
            free( psz16 );
        }
        else return false;
    }
    else
    {
        /* Unsupported encodings */
        return false;
    }

    *ppsz_merg = psz_dest;
    *pi_mergmin1 = i_mergmin1;
    return b_ret;
}
Beispiel #7
0
/*****************************************************************************
 * OpenDecoder: probe the decoder and return score
 *****************************************************************************
 * Tries to launch a decoder and return score so that the interface is able
 * to chose.
 *****************************************************************************/
static int OpenDecoder( vlc_object_t *p_this )
{
    decoder_t     *p_dec = (decoder_t*)p_this;
    decoder_sys_t *p_sys;
    vlc_value_t    val;

    if( p_dec->fmt_in.i_codec != VLC_FOURCC('s','u','b','t') &&
        p_dec->fmt_in.i_codec != VLC_FOURCC('s','s','a',' ') )
    {
        return VLC_EGENERIC;
    }

    p_dec->pf_decode_sub = DecodeBlock;

    /* Allocate the memory needed to store the decoder's structure */
    if( ( p_dec->p_sys = p_sys =
          (decoder_sys_t *)malloc(sizeof(decoder_sys_t)) ) == NULL )
    {
        msg_Err( p_dec, "out of memory" );
        return VLC_ENOMEM;
    }

    /* init of p_sys */
    p_sys->i_align = 0;
    p_sys->iconv_handle = (vlc_iconv_t)-1;
    p_sys->b_autodetect_utf8 = VLC_FALSE;
    p_sys->b_ass = VLC_FALSE;
    p_sys->i_original_height = -1;
    p_sys->i_original_width = -1;
    p_sys->pp_ssa_styles = NULL;
    p_sys->i_ssa_styles = 0;

    if( p_dec->fmt_in.subs.psz_encoding && *p_dec->fmt_in.subs.psz_encoding )
    {
        msg_Dbg( p_dec, "using demux suggested character encoding: %s",
                 p_dec->fmt_in.subs.psz_encoding );
        if( strcmp( p_dec->fmt_in.subs.psz_encoding, "UTF-8" ) )
            p_sys->iconv_handle = vlc_iconv_open( "UTF-8", p_dec->fmt_in.subs.psz_encoding );
    }
    else
    {
        var_Create( p_dec, "subsdec-encoding",
                    VLC_VAR_STRING | VLC_VAR_DOINHERIT );
        var_Get( p_dec, "subsdec-encoding", &val );
        if( !strcmp( val.psz_string, DEFAULT_NAME ) )
        {
            const char *psz_charset = GetFallbackEncoding();

            p_sys->b_autodetect_utf8 = var_CreateGetBool( p_dec,
                    "subsdec-autodetect-utf8" );

            p_sys->iconv_handle = vlc_iconv_open( "UTF-8", psz_charset );
            msg_Dbg( p_dec, "using fallback character encoding: %s", psz_charset );
        }
        else if( !strcmp( val.psz_string, "UTF-8" ) )
        {
            msg_Dbg( p_dec, "using enforced character encoding: UTF-8" );
        }
        else if( val.psz_string )
        {
            msg_Dbg( p_dec, "using enforced character encoding: %s", val.psz_string );
            p_sys->iconv_handle = vlc_iconv_open( "UTF-8", val.psz_string );
            if( p_sys->iconv_handle == (vlc_iconv_t)-1 )
            {
                msg_Warn( p_dec, "unable to do requested conversion" );
            }
        }
        if( val.psz_string ) free( val.psz_string );
    }

    var_Create( p_dec, "subsdec-align", VLC_VAR_INTEGER | VLC_VAR_DOINHERIT );
    var_Get( p_dec, "subsdec-align", &val );
    p_sys->i_align = val.i_int;

    if( p_dec->fmt_in.i_codec == VLC_FOURCC('s','s','a',' ') && var_CreateGetBool( p_dec, "subsdec-formatted" ) )
    {
        if( p_dec->fmt_in.i_extra > 0 )
            ParseSSAHeader( p_dec );
    }

    return VLC_SUCCESS;
}
Beispiel #8
0
char *nscdec( vlc_object_t *p_demux, char* p_encoded )
{
    unsigned int i;
    unsigned char tmp;
    unsigned char j, k;
    unsigned int length;
    unsigned char encoding_type;

    vlc_iconv_t conv;
    size_t buf16_size;
    unsigned char *buf16;
    char *p_buf16;
    size_t buf8_size;
    char *buf8;
    char *p_buf8;

    char *p_input = p_encoded;

    if( strlen( p_input ) < 15 )
    {
        msg_Err( p_demux, "input string less than 15 characters" );
        return NULL;
    }

    if( load_byte( 1, &encoding_type, &p_input, NULL, NULL ) )
    {
        msg_Err( p_demux, "unable to get NSC encoding type" );
        return NULL;
    }

    if( encoding_type != 1 && encoding_type != 2 )
    {
        msg_Err( p_demux, "encoding type %d is not supported",
                 encoding_type );
        return NULL;
    }

    j = k = 0;

    if( load_byte( encoding_type, &tmp, &p_input, &j, &k ) )
    {
        msg_Err( p_demux, "load_byte failed" );
        return NULL;
    }

    for( i = 0; i < 4; i++ )
    {
        if( load_byte( encoding_type, &tmp, &p_input, &j, &k ) )
        {
            msg_Err( p_demux, "load_byte failed" );
            return NULL;
        }
    }

    length = 0;
    for( i = 4; i; i-- )
    {
        if( load_byte( encoding_type, &tmp, &p_input, &j, &k ) )
        {
            msg_Err( p_demux, "load_byte failed" );
            return NULL;
        }
        length |= tmp << ((i - 1) * 8);
    }

    if( length == 0 )
    {
        msg_Err( p_demux, "Length is 0" );
        return NULL;
    }

    buf16_size = length;
    buf16 = (unsigned char *)malloc( buf16_size );
    if( buf16 == NULL )
    {
        msg_Err( p_demux, "out of memory" );
        return NULL;
    }

    for( i = 0; i < length; i++ )
    {
        if( load_byte( encoding_type, &buf16[ i ], &p_input, &j, &k ) )
        {
            msg_Err( p_demux, "load_byte failed" );
            free( (void *)buf16 );
            return NULL;
        }
    }

    buf8_size = length;
    buf8 = (char *)malloc( buf8_size + 1 );
    if( buf8 == NULL )
    {
        msg_Err( p_demux, "out of memory" );
        free( (void *)buf16 );
        return NULL;
    }

    conv = vlc_iconv_open( "UTF-8", "UTF-16LE" );
    if( conv == (vlc_iconv_t)-1 )
    {
        msg_Err( p_demux, "iconv_open failed" );
        free( (void *)buf16 );
        free( (void *)buf8 );
        return NULL;
    }

    p_buf8 = &buf8[ 0 ];
    p_buf16 = (char *)&buf16[ 0 ];

    if( vlc_iconv( conv, &p_buf16, &buf16_size, &p_buf8, &buf8_size ) < 0 )
    {
        msg_Err( p_demux, "iconv failed" );
        return NULL;
    }
    else
    {
        buf8[ length - buf8_size ] = '\0';
    }

    vlc_iconv_close( conv );

    free( (void *)buf16 );
    return buf8;
}
Beispiel #9
0
/*****************************************************************************
 * OpenDecoder: probe the decoder and return score
 *****************************************************************************
 * Tries to launch a decoder and return score so that the interface is able
 * to chose.
 *****************************************************************************/
static int OpenDecoder( vlc_object_t *p_this )
{
    decoder_t     *p_dec = (decoder_t*)p_this;
    decoder_sys_t *p_sys;

    switch( p_dec->fmt_in.i_codec )
    {
        case VLC_CODEC_SUBT:
        case VLC_CODEC_SSA:
        case VLC_CODEC_ITU_T140:
            break;
        default:
            return VLC_EGENERIC;
    }

    p_dec->pf_decode_sub = DecodeBlock;
    p_dec->fmt_out.i_cat = SPU_ES;
    p_dec->fmt_out.i_codec = 0;

    /* Allocate the memory needed to store the decoder's structure */
    p_dec->p_sys = p_sys = calloc( 1, sizeof( *p_sys ) );
    if( p_sys == NULL )
        return VLC_ENOMEM;

    /* init of p_sys */
    p_sys->i_align = 0;
    p_sys->iconv_handle = (vlc_iconv_t)-1;
    p_sys->b_autodetect_utf8 = false;
    p_sys->b_ass = false;
    p_sys->i_original_height = -1;
    p_sys->i_original_width = -1;
    TAB_INIT( p_sys->i_ssa_styles, p_sys->pp_ssa_styles );
    TAB_INIT( p_sys->i_images, p_sys->pp_images );

    char *psz_charset = NULL;

    /* First try demux-specified encoding */
    if( p_dec->fmt_in.i_codec == VLC_CODEC_ITU_T140 )
        psz_charset = strdup( "UTF-8" ); /* IUT T.140 is always using UTF-8 */
    else
    if( p_dec->fmt_in.subs.psz_encoding && *p_dec->fmt_in.subs.psz_encoding )
    {
        psz_charset = strdup (p_dec->fmt_in.subs.psz_encoding);
        msg_Dbg (p_dec, "trying demuxer-specified character encoding: %s",
                 p_dec->fmt_in.subs.psz_encoding ?
                 p_dec->fmt_in.subs.psz_encoding : "not specified");
    }

    /* Second, try configured encoding */
    if (psz_charset == NULL)
    {
        psz_charset = var_InheritString (p_dec, "subsdec-encoding");
        msg_Dbg (p_dec, "trying configured character encoding: %s",
                 psz_charset ? psz_charset : "not specified");
        if (psz_charset != NULL && !strcmp (psz_charset, "system"))
        {
            free (psz_charset);
            psz_charset = strdup ("");
            /* ^ iconv() treats "" as nl_langinfo(CODESET) */
        }
    }

    /* Third, try "local" encoding with optional UTF-8 autodetection */
    if (psz_charset == NULL)
    {
        /* xgettext:
           The Windows ANSI code page most commonly used for this language.
           VLC uses this as a guess of the subtitle files character set
           (if UTF-8 and UTF-16 autodetection fails).
           Western European languages normally use "CP1252", which is a
           Microsoft-variant of ISO 8859-1. That suits the Latin alphabet.
           Other scripts use other code pages.

           This MUST be a valid iconv character set. If unsure, please refer
           the VideoLAN translators mailing list. */
        const char *acp = vlc_pgettext("GetACP", "CP1252");

        psz_charset = strdup (acp);
        msg_Dbg (p_dec, "trying default character encoding: %s",
                 psz_charset ? psz_charset : "not specified");

        if (var_InheritBool (p_dec, "subsdec-autodetect-utf8"))
        {
            msg_Dbg (p_dec, "using automatic UTF-8 detection");
            p_sys->b_autodetect_utf8 = true;
        }
    }

    /* Forth, don't do character decoding, i.e. assume UTF-8 */
    if (psz_charset == NULL)
    {
        psz_charset = strdup ("UTF-8");
        msg_Dbg (p_dec, "using UTF-8 character encoding" );
    }

    if ((psz_charset != NULL)
     && strcasecmp (psz_charset, "UTF-8")
     && strcasecmp (psz_charset, "utf8"))
    {
        p_sys->iconv_handle = vlc_iconv_open ("UTF-8", psz_charset);
        if (p_sys->iconv_handle == (vlc_iconv_t)(-1))
            msg_Err (p_dec, "cannot convert from %s: %m", psz_charset);
    }
    free (psz_charset);

    p_sys->i_align = var_InheritInteger( p_dec, "subsdec-align" );

    if( p_dec->fmt_in.i_codec == VLC_CODEC_SSA
     && var_InheritBool( p_dec, "subsdec-formatted" ) )
    {
        if( p_dec->fmt_in.i_extra > 0 )
            ParseSSAHeader( p_dec );
    }

    return VLC_SUCCESS;
}