WEBVTT_INTERN webvtt_status webvtt_node_kind_from_tag_name( webvtt_string *tag_name, webvtt_node_kind *kind ) { if( !tag_name || !kind ) { return WEBVTT_INVALID_PARAM; } if( webvtt_string_length(tag_name) == 1 ) { switch( webvtt_string_text(tag_name)[0] ) { case( UTF8_B ): *kind = WEBVTT_BOLD; break; case( UTF8_I ): *kind = WEBVTT_ITALIC; break; case( UTF8_U ): *kind = WEBVTT_UNDERLINE; break; case( UTF8_C ): *kind = WEBVTT_CLASS; break; case( UTF8_V ): *kind = WEBVTT_VOICE; break; } } else if( webvtt_string_is_equal( tag_name, "ruby", 4 ) ) { *kind = WEBVTT_RUBY; } else if( webvtt_string_is_equal( tag_name, "rt", 2 ) ) { *kind = WEBVTT_RUBY_TEXT; } else { return WEBVTT_INVALID_TAG_NAME; } return WEBVTT_SUCCESS; }
WEBVTT_EXPORT webvtt_status webvtt_cue_validate_set_settings( webvtt_parser self, webvtt_cue *cue, const webvtt_string *settings ) { int line = 1; int column = 0; int length; const char *eol; int position = 0; webvtt_status s; if( !cue || !settings ) { return WEBVTT_INVALID_PARAM; } length = (int)webvtt_string_length( settings ); if( ( eol = strchr( webvtt_string_text( settings ), '\r' ) ) || ( eol = strchr( webvtt_string_text( settings ), '\n' ) ) ) { length = (int)( eol - webvtt_string_text( settings ) ); } if( self ) { line = self->line; column = self->column; } /** * http://www.w3.org/html/wg/drafts/html/master/single-page.html#split-a-string-on-spaces * 4. Skip whitespace */ column += webvtt_string_skip_whitespace( settings, &position ); while( position < length ) { webvtt_string word; const char *keyword; const char *end; int nwhite, ncol; /* Collect word (sequence of non-space characters terminated by space) */ if( WEBVTT_FAILED( webvtt_string_collect_word( settings, &word, &position ) ) ) { return WEBVTT_OUT_OF_MEMORY; } /* skip trailing whitespace */ nwhite = webvtt_string_skip_whitespace( settings, &position ); /* Get the word text */ keyword = webvtt_string_text( &word ); /* Get pointer to end of the word. (for chcount()) */ end = keyword + webvtt_string_length( &word ); /* Get the column count that needs to be skipped. */ ncol = webvtt_utf8_chcount( keyword, end ); if( WEBVTT_FAILED( s = webvtt_cue_set_setting_from_string( cue, keyword ) ) ) { if( self ) { /* Figure out which error to emit */ webvtt_error error; if( webvtt_error_for_status( s, &error ) ) { /* There is no non-recoverable cue-setting error. Therefore we do not want to abort the loop, regardless of the return value from the error handler. */ WARNING_AT( error, line, column ); } } } /* Move column pointer beyond word and trailing whitespace */ column += ncol + nwhite; webvtt_release_string( &word ); } if( self ) { self->column = column; } return WEBVTT_SUCCESS; }
/** * Currently line and len are not being kept track of. * Don't think pnode_length is needed as nodes track there list count * internally. */ WEBVTT_INTERN webvtt_status webvtt_parse_cuetext( webvtt_parser self, webvtt_cue *cue, webvtt_string *payload, int finished ) { const webvtt_byte *cue_text; webvtt_status status; webvtt_byte *position; webvtt_node *node_head; webvtt_node *current_node; webvtt_node *temp_node; webvtt_cuetext_token *token; webvtt_node_kind kind; if( !cue ) { return WEBVTT_INVALID_PARAM; } cue_text = webvtt_string_text( payload ); if( !cue_text ) { return WEBVTT_INVALID_PARAM; } if ( WEBVTT_FAILED(status = webvtt_create_head_node( &cue->node_head ) ) ) { return status; } position = (webvtt_byte *)cue_text; node_head = cue->node_head; current_node = node_head; temp_node = NULL; token = NULL; /** * Routine taken from the W3C specification * http://dev.w3.org/html5/webvtt/#webvtt-cue-text-parsing-rules */ while( *position != UTF8_NULL_BYTE ) { webvtt_delete_token( &token ); /* Step 7. */ switch( webvtt_cuetext_tokenizer( &position, &token ) ) { case( WEBVTT_UNFINISHED ): /* Error here. */ break; /* Step 8. */ case( WEBVTT_SUCCESS ): /** * If we've found an end token which has a valid end token tag name and * a tag name that is equal to the current node then set current to the * parent of current. */ if( token->token_type == END_TOKEN ) { /** * We have encountered an end token but we are at the top of the list * and thus have not encountered any start tokens yet, throw away the * token. */ if( current_node->kind == WEBVTT_HEAD_NODE ) { continue; } /** * We have encountered an end token but it is not in a format that is * supported, throw away the token. */ if( webvtt_node_kind_from_tag_name( &token->tag_name, &kind ) == WEBVTT_INVALID_TAG_NAME ) { continue; } /** * We have encountered an end token and it matches the start token of * the node that we are currently on. Move back up the list of nodes * and continue parsing. */ if( current_node->kind == kind ) { current_node = current_node->parent; } } else { /** * Attempt to create a valid node from the token. * If successful then attach the node to the current nodes list and * also set current to the newly created node if it is an internal * node type. */ if( webvtt_create_node_from_token( token, &temp_node, current_node ) != WEBVTT_SUCCESS ) { /* Do something here? */ } else { webvtt_attach_node( current_node, temp_node ); if( WEBVTT_IS_VALID_INTERNAL_NODE( temp_node->kind ) ) { current_node = temp_node; } /* Release the node as attach internal node increases the count. */ webvtt_release_node( &temp_node ); } } break; } webvtt_skipwhite( &position ); } webvtt_delete_token( &token ); return WEBVTT_SUCCESS; }
/** * Need to set up differently. * Get a status in order to return at end and release memeory. */ WEBVTT_INTERN webvtt_status webvtt_cuetext_tokenizer( webvtt_byte **position, webvtt_cuetext_token **token ) { webvtt_token_state token_state = DATA; webvtt_string result, annotation; webvtt_stringlist *css_classes; webvtt_timestamp time_stamp = 0; webvtt_status status = WEBVTT_UNFINISHED; if( !position ) { return WEBVTT_INVALID_PARAM; } webvtt_create_string( 10, &result ); webvtt_create_string( 10, &annotation ); webvtt_create_stringlist( &css_classes ); /** * Loop while the tokenizer is not finished. * Based on the state of the tokenizer enter a function to handle that * particular tokenizer state. Those functions will loop until they either * change the state of the tokenizer or reach a valid token end point. */ while( status == WEBVTT_UNFINISHED ) { switch( token_state ) { case DATA : status = webvtt_data_state( position, &token_state, &result ); break; case ESCAPE: status = webvtt_escape_state( position, &token_state, &result ); break; case TAG: status = webvtt_tag_state( position, &token_state, &result ); break; case START_TAG: status = webvtt_start_tag_state( position, &token_state, &result ); break; case START_TAG_CLASS: status = webvtt_class_state( position, &token_state, css_classes ); break; case START_TAG_ANNOTATION: status = webvtt_annotation_state( position, &token_state, &annotation ); break; case END_TAG: status = webvtt_end_tag_state( position, &token_state, &result ); break; case TIME_STAMP_TAG: status = webvtt_timestamp_state( position, &token_state, &result ); break; } if( token_state == START_TAG_ANNOTATION ) { webvtt_skipwhite( position ); } } if( **position == UTF8_GREATER_THAN ) { (*position)++; } if( status == WEBVTT_SUCCESS ) { /** * The state that the tokenizer left off on will tell us what kind of token * needs to be made. */ if( token_state == DATA || token_state == ESCAPE ) { status = webvtt_create_text_token( token, &result ); } else if(token_state == TAG || token_state == START_TAG || token_state == START_TAG_CLASS || token_state == START_TAG_ANNOTATION) { /** * If the tag does not accept an annotation then release the current * annotation and intialize annotation to a safe empty state */ if( !tag_accepts_annotation( &result ) ) { webvtt_release_string( &annotation ); webvtt_init_string( &annotation ); } status = webvtt_create_start_token( token, &result, css_classes, &annotation ); } else if( token_state == END_TAG ) { status = webvtt_create_end_token( token, &result ); } else if( token_state == TIME_STAMP_TAG ) { parse_timestamp( webvtt_string_text( &result ), &time_stamp ); status = webvtt_create_timestamp_token( token, time_stamp ); } else { status = WEBVTT_INVALID_TOKEN_STATE; } } webvtt_release_stringlist( &css_classes ); webvtt_release_string( &result ); webvtt_release_string( &annotation ); return status; }
/** * Currently line and len are not being kept track of. * Don't think pnode_length is needed as nodes track there list count * internally. */ WEBVTT_INTERN webvtt_status webvtt_parse_cuetext( webvtt_parser self, webvtt_cue *cue, webvtt_string *payload, int finished ) { const webvtt_byte *cue_text; webvtt_status status; webvtt_byte *position; webvtt_node *node_head; webvtt_node *current_node; webvtt_node *temp_node; webvtt_cuetext_token *token; webvtt_node_kind kind; /** * TODO: Use these parameters! 'finished' isn't really important * here, but 'self' certainly is as it lets us report syntax errors. * * However, for the time being we can trick the compiler into not * warning us about unused variables by doing this. */ ( void )self; ( void )finished; if( !cue ) { return WEBVTT_INVALID_PARAM; } cue_text = webvtt_string_text( payload ); if( !cue_text ) { return WEBVTT_INVALID_PARAM; } if ( WEBVTT_FAILED(status = webvtt_create_head_node( &cue->node_head ) ) ) { return status; } position = (webvtt_byte *)cue_text; node_head = cue->node_head; current_node = node_head; temp_node = NULL; token = NULL; /** * Routine taken from the W3C specification * http://dev.w3.org/html5/webvtt/#webvtt-cue-text-parsing-rules */ while( *position != '\0' ) { webvtt_status status = WEBVTT_SUCCESS; webvtt_delete_token( &token ); /* Step 7. */ if( WEBVTT_FAILED( status = webvtt_cuetext_tokenizer( &position, &token ) ) ) { /* Error here. */ } else { /* Succeeded... Process token */ if( token->token_type == END_TOKEN ) { /** * If we've found an end token which has a valid end token tag name and * a tag name that is equal to the current node then set current to the * parent of current. */ if( current_node->kind == WEBVTT_HEAD_NODE ) { /** * We have encountered an end token but we are at the top of the list * and thus have not encountered any start tokens yet, throw away the * token. */ continue; } if( webvtt_node_kind_from_tag_name( &token->tag_name, &kind ) == WEBVTT_INVALID_TAG_NAME ) { /** * We have encountered an end token but it is not in a format that is * supported, throw away the token. */ continue; } if( current_node->kind == kind ) { /** * We have encountered an end token and it matches the start token of * the node that we are currently on. Move back up the list of nodes * and continue parsing. */ current_node = current_node->parent; } } else { /** * Attempt to create a valid node from the token. * If successful then attach the node to the current nodes list and * also set current to the newly created node if it is an internal * node type. */ if( webvtt_create_node_from_token( token, &temp_node, current_node ) != WEBVTT_SUCCESS ) { /* Do something here? */ } else { webvtt_attach_node( current_node, temp_node ); if( WEBVTT_IS_VALID_INTERNAL_NODE( temp_node->kind ) ) { current_node = temp_node; } /* Release the node as attach internal node increases the count. */ webvtt_release_node( &temp_node ); } } } } webvtt_delete_token( &token ); return WEBVTT_SUCCESS; }