WEBVTT_EXPORT void webvtt_release_node( webvtt_node **node ) { webvtt_uint i; webvtt_node *n; if( !node || !*node ) { return; } n = *node; if( webvtt_deref( &n->refs ) == 0 ) { if( n->kind == WEBVTT_TEXT ) { webvtt_release_string( &n->data.text ); } else if( WEBVTT_IS_VALID_INTERNAL_NODE( n->kind ) && n->data.internal_data ) { webvtt_release_stringlist( &n->data.internal_data->css_classes ); webvtt_release_string( &n->data.internal_data->annotation ); for( i = 0; i < n->data.internal_data->length; i++ ) { webvtt_release_node( n->data.internal_data->children + i ); } webvtt_free( n->data.internal_data->children ); webvtt_free( n->data.internal_data ); } webvtt_free( n ); } *node = 0; }
WEBVTT_INTERN webvtt_status webvtt_class_state( webvtt_byte **position, webvtt_token_state *token_state, webvtt_stringlist *css_classes ) { webvtt_string buffer; webvtt_status status = WEBVTT_SUCCESS; CHECK_MEMORY_OP( webvtt_create_string( 1, &buffer ) ); for( ; *token_state == START_TAG_CLASS; (*position)++ ) { if( **position == UTF8_TAB || **position == UTF8_FORM_FEED || **position == UTF8_SPACE || **position == UTF8_LINE_FEED || **position == UTF8_CARRIAGE_RETURN) { CHECK_MEMORY_OP_JUMP( status, webvtt_stringlist_push( css_classes, &buffer ) ); *token_state = START_TAG_ANNOTATION; return WEBVTT_SUCCESS; } else if( **position == UTF8_GREATER_THAN || **position == UTF8_NULL_BYTE ) { CHECK_MEMORY_OP_JUMP( status, webvtt_stringlist_push( css_classes, &buffer ) ); webvtt_release_string( &buffer ); return WEBVTT_SUCCESS; } else if( **position == UTF8_FULL_STOP ) { CHECK_MEMORY_OP_JUMP( status, webvtt_stringlist_push( css_classes, &buffer ) ); webvtt_release_string( &buffer ); CHECK_MEMORY_OP( webvtt_create_string( 1, &buffer ) ); } else { CHECK_MEMORY_OP_JUMP( status, webvtt_string_putc( &buffer, **position ) ); } } dealloc: webvtt_release_string( &buffer ); return status; }
WEBVTT_INTERN void webvtt_delete_token( webvtt_cuetext_token **token ) { webvtt_start_token_data data; webvtt_cuetext_token *t; if( !token ) { return; } if( !*token ) { return; } t = *token; /** * Note that time stamp tokens do not need to free any internal data because * they do not allocate anything. */ switch( t->token_type ) { case START_TOKEN: data = t->start_token_data; webvtt_release_stringlist( &data.css_classes ); webvtt_release_string( &data.annotations ); webvtt_release_string( &t->tag_name ); break; case END_TOKEN: webvtt_release_string( &t->tag_name ); break; case TEXT_TOKEN: webvtt_release_string( &t->text ); break; } webvtt_free( t ); *token = 0; }
WEBVTT_INTERN webvtt_status webvtt_class_state( webvtt_byte **position, webvtt_token_state *token_state, webvtt_stringlist *css_classes ) { webvtt_string buffer; webvtt_status status = WEBVTT_SUCCESS; CHECK_MEMORY_OP( webvtt_create_string( 1, &buffer ) ); for( ; *token_state == START_TAG_CLASS; (*position)++ ) { if( **position == '\t' || **position == '\f' || **position == ' ' || **position == '\n' || **position == '\r') { CHECK_MEMORY_OP_JUMP( status, webvtt_stringlist_push( css_classes, &buffer ) ); *token_state = START_TAG_ANNOTATION; return WEBVTT_SUCCESS; } else if( **position == '>' || **position == '\0' ) { CHECK_MEMORY_OP_JUMP( status, webvtt_stringlist_push( css_classes, &buffer ) ); webvtt_release_string( &buffer ); return WEBVTT_SUCCESS; } else if( **position == '.' ) { CHECK_MEMORY_OP_JUMP( status, webvtt_stringlist_push( css_classes, &buffer ) ); webvtt_release_string( &buffer ); CHECK_MEMORY_OP( webvtt_create_string( 1, &buffer ) ); } else { CHECK_MEMORY_OP_JUMP( status, webvtt_string_putc( &buffer, **position ) ); } } dealloc: webvtt_release_string( &buffer ); return status; }
WEBVTT_EXPORT void webvtt_release_cue( webvtt_cue **pcue ) { if( pcue && *pcue ) { webvtt_cue *cue = *pcue; *pcue = 0; if( webvtt_deref( &cue->refs ) == 0 ) { webvtt_release_string( &cue->id ); webvtt_release_string( &cue->body ); webvtt_release_node( &cue->node_head ); webvtt_free( cue ); } } }
WEBVTT_EXPORT webvtt_status webvtt_cue_validate_set_settings( webvtt_parser self, webvtt_cue *cue, const webvtt_string *settings ) { int line = 1; int column = 0; int length; const char *eol; int position = 0; webvtt_status s; if( !cue || !settings ) { return WEBVTT_INVALID_PARAM; } length = (int)webvtt_string_length( settings ); if( ( eol = strchr( webvtt_string_text( settings ), '\r' ) ) || ( eol = strchr( webvtt_string_text( settings ), '\n' ) ) ) { length = (int)( eol - webvtt_string_text( settings ) ); } if( self ) { line = self->line; column = self->column; } /** * http://www.w3.org/html/wg/drafts/html/master/single-page.html#split-a-string-on-spaces * 4. Skip whitespace */ column += webvtt_string_skip_whitespace( settings, &position ); while( position < length ) { webvtt_string word; const char *keyword; const char *end; int nwhite, ncol; /* Collect word (sequence of non-space characters terminated by space) */ if( WEBVTT_FAILED( webvtt_string_collect_word( settings, &word, &position ) ) ) { return WEBVTT_OUT_OF_MEMORY; } /* skip trailing whitespace */ nwhite = webvtt_string_skip_whitespace( settings, &position ); /* Get the word text */ keyword = webvtt_string_text( &word ); /* Get pointer to end of the word. (for chcount()) */ end = keyword + webvtt_string_length( &word ); /* Get the column count that needs to be skipped. */ ncol = webvtt_utf8_chcount( keyword, end ); if( WEBVTT_FAILED( s = webvtt_cue_set_setting_from_string( cue, keyword ) ) ) { if( self ) { /* Figure out which error to emit */ webvtt_error error; if( webvtt_error_for_status( s, &error ) ) { /* There is no non-recoverable cue-setting error. Therefore we do not want to abort the loop, regardless of the return value from the error handler. */ WARNING_AT( error, line, column ); } } } /* Move column pointer beyond word and trailing whitespace */ column += ncol + nwhite; webvtt_release_string( &word ); } if( self ) { self->column = column; } return WEBVTT_SUCCESS; }
/** * Need to set up differently. * Get a status in order to return at end and release memeory. */ WEBVTT_INTERN webvtt_status webvtt_cuetext_tokenizer( webvtt_byte **position, webvtt_cuetext_token **token ) { webvtt_token_state token_state = DATA; webvtt_string result, annotation; webvtt_stringlist *css_classes; webvtt_timestamp time_stamp = 0; webvtt_status status = WEBVTT_UNFINISHED; if( !position ) { return WEBVTT_INVALID_PARAM; } webvtt_create_string( 10, &result ); webvtt_create_string( 10, &annotation ); webvtt_create_stringlist( &css_classes ); /** * Loop while the tokenizer is not finished. * Based on the state of the tokenizer enter a function to handle that * particular tokenizer state. Those functions will loop until they either * change the state of the tokenizer or reach a valid token end point. */ while( status == WEBVTT_UNFINISHED ) { switch( token_state ) { case DATA : status = webvtt_data_state( position, &token_state, &result ); break; case ESCAPE: status = webvtt_escape_state( position, &token_state, &result ); break; case TAG: status = webvtt_tag_state( position, &token_state, &result ); break; case START_TAG: status = webvtt_start_tag_state( position, &token_state, &result ); break; case START_TAG_CLASS: status = webvtt_class_state( position, &token_state, css_classes ); break; case START_TAG_ANNOTATION: status = webvtt_annotation_state( position, &token_state, &annotation ); break; case END_TAG: status = webvtt_end_tag_state( position, &token_state, &result ); break; case TIME_STAMP_TAG: status = webvtt_timestamp_state( position, &token_state, &result ); break; } if( token_state == START_TAG_ANNOTATION ) { webvtt_skipwhite( position ); } } if( **position == UTF8_GREATER_THAN ) { (*position)++; } if( status == WEBVTT_SUCCESS ) { /** * The state that the tokenizer left off on will tell us what kind of token * needs to be made. */ if( token_state == DATA || token_state == ESCAPE ) { status = webvtt_create_text_token( token, &result ); } else if(token_state == TAG || token_state == START_TAG || token_state == START_TAG_CLASS || token_state == START_TAG_ANNOTATION) { /** * If the tag does not accept an annotation then release the current * annotation and intialize annotation to a safe empty state */ if( !tag_accepts_annotation( &result ) ) { webvtt_release_string( &annotation ); webvtt_init_string( &annotation ); } status = webvtt_create_start_token( token, &result, css_classes, &annotation ); } else if( token_state == END_TAG ) { status = webvtt_create_end_token( token, &result ); } else if( token_state == TIME_STAMP_TAG ) { parse_timestamp( webvtt_string_text( &result ), &time_stamp ); status = webvtt_create_timestamp_token( token, time_stamp ); } else { status = WEBVTT_INVALID_TOKEN_STATE; } } webvtt_release_stringlist( &css_classes ); webvtt_release_string( &result ); webvtt_release_string( &annotation ); return status; }
WEBVTT_INTERN webvtt_status webvtt_escape_state( webvtt_byte **position, webvtt_token_state *token_state, webvtt_string *result ) { webvtt_string buffer; webvtt_status status = WEBVTT_SUCCESS; CHECK_MEMORY_OP_JUMP( status, webvtt_create_string( 1, &buffer ) ); /** * Append ampersand here because the algorithm is not able to add it to the * buffer when it reads it in the DATA state tokenizer. */ CHECK_MEMORY_OP_JUMP( status, webvtt_string_putc( &buffer, UTF8_AMPERSAND ) ); for( ; *token_state == ESCAPE; (*position)++ ) { /** * We have encountered a token termination point. * Append buffer to result and return success. */ if( **position == UTF8_NULL_BYTE || **position == UTF8_LESS_THAN ) { CHECK_MEMORY_OP_JUMP( status, webvtt_string_append_string( result, &buffer ) ); goto dealloc; } /** * This means we have enocuntered a malformed escape character sequence. * This means that we need to add that malformed text to the result and * recreate the buffer to prepare for a new escape sequence. */ else if( **position == UTF8_AMPERSAND ) { CHECK_MEMORY_OP_JUMP( status, webvtt_string_append_string( result, &buffer ) ); webvtt_release_string( &buffer ); CHECK_MEMORY_OP_JUMP( status, webvtt_create_string( 1, &buffer ) ); CHECK_MEMORY_OP_JUMP( status, webvtt_string_putc( &buffer, *position[0] ) ); } /** * We've encountered the semicolon which is the end of an escape sequence. * Check if buffer contains a valid escape sequence and if it does append * the interpretation to result and change the state to DATA. */ else if( **position == UTF8_SEMI_COLON ) { if( webvtt_string_is_equal( &buffer, "&", 4 ) ) { CHECK_MEMORY_OP_JUMP( status, webvtt_string_putc( result, '&' ) ); } else if( webvtt_string_is_equal( &buffer, "<", 3 ) ) { CHECK_MEMORY_OP_JUMP( status, webvtt_string_putc( result, '<' ) ); } else if( webvtt_string_is_equal( &buffer, ">", 3 ) ) { CHECK_MEMORY_OP_JUMP( status, webvtt_string_putc( result, '>' ) ); } else if( webvtt_string_is_equal( &buffer, "&rlm", 4 ) ) { CHECK_MEMORY_OP_JUMP( status, webvtt_string_append( result, rlm_replace, RLM_REPLACE_LENGTH ) ); } else if( webvtt_string_is_equal( &buffer, "&lrm", 4 ) ) { CHECK_MEMORY_OP_JUMP( status, webvtt_string_append( result, lrm_replace, LRM_REPLACE_LENGTH ) ); } else if( webvtt_string_is_equal( &buffer, " ", 5 ) ) { CHECK_MEMORY_OP_JUMP( status, webvtt_string_append( result, nbsp_replace, NBSP_REPLACE_LENGTH ) ); } else { CHECK_MEMORY_OP_JUMP( status, webvtt_string_append_string( result, &buffer ) ); CHECK_MEMORY_OP_JUMP( status, webvtt_string_putc( result, **position ) ); } *token_state = DATA; status = WEBVTT_UNFINISHED; } /** * Character is alphanumeric. This means we are in the body of the escape * sequence. */ else if( webvtt_isalphanum( **position ) ) { CHECK_MEMORY_OP_JUMP( status, webvtt_string_putc( &buffer, **position ) ); } /** * If we have not found an alphanumeric character then we have encountered * a malformed escape sequence. Add buffer to result and continue to parse * in DATA state. */ else { CHECK_MEMORY_OP_JUMP( status, webvtt_string_append_string( result, &buffer ) ); CHECK_MEMORY_OP_JUMP( status, webvtt_string_putc( result, **position ) ); status = WEBVTT_UNFINISHED; *token_state = DATA; } } dealloc: webvtt_release_string( &buffer ); return status; }