WEBVTT_INTERN webvtt_status webvtt_class_state( webvtt_byte **position, webvtt_token_state *token_state, webvtt_stringlist *css_classes ) { webvtt_string buffer; webvtt_status status = WEBVTT_SUCCESS; CHECK_MEMORY_OP( webvtt_create_string( 1, &buffer ) ); for( ; *token_state == START_TAG_CLASS; (*position)++ ) { if( **position == UTF8_TAB || **position == UTF8_FORM_FEED || **position == UTF8_SPACE || **position == UTF8_LINE_FEED || **position == UTF8_CARRIAGE_RETURN) { CHECK_MEMORY_OP_JUMP( status, webvtt_stringlist_push( css_classes, &buffer ) ); *token_state = START_TAG_ANNOTATION; return WEBVTT_SUCCESS; } else if( **position == UTF8_GREATER_THAN || **position == UTF8_NULL_BYTE ) { CHECK_MEMORY_OP_JUMP( status, webvtt_stringlist_push( css_classes, &buffer ) ); webvtt_release_string( &buffer ); return WEBVTT_SUCCESS; } else if( **position == UTF8_FULL_STOP ) { CHECK_MEMORY_OP_JUMP( status, webvtt_stringlist_push( css_classes, &buffer ) ); webvtt_release_string( &buffer ); CHECK_MEMORY_OP( webvtt_create_string( 1, &buffer ) ); } else { CHECK_MEMORY_OP_JUMP( status, webvtt_string_putc( &buffer, **position ) ); } } dealloc: webvtt_release_string( &buffer ); return status; }
WEBVTT_INTERN webvtt_status webvtt_class_state( webvtt_byte **position, webvtt_token_state *token_state, webvtt_stringlist *css_classes ) { webvtt_string buffer; webvtt_status status = WEBVTT_SUCCESS; CHECK_MEMORY_OP( webvtt_create_string( 1, &buffer ) ); for( ; *token_state == START_TAG_CLASS; (*position)++ ) { if( **position == '\t' || **position == '\f' || **position == ' ' || **position == '\n' || **position == '\r') { CHECK_MEMORY_OP_JUMP( status, webvtt_stringlist_push( css_classes, &buffer ) ); *token_state = START_TAG_ANNOTATION; return WEBVTT_SUCCESS; } else if( **position == '>' || **position == '\0' ) { CHECK_MEMORY_OP_JUMP( status, webvtt_stringlist_push( css_classes, &buffer ) ); webvtt_release_string( &buffer ); return WEBVTT_SUCCESS; } else if( **position == '.' ) { CHECK_MEMORY_OP_JUMP( status, webvtt_stringlist_push( css_classes, &buffer ) ); webvtt_release_string( &buffer ); CHECK_MEMORY_OP( webvtt_create_string( 1, &buffer ) ); } else { CHECK_MEMORY_OP_JUMP( status, webvtt_string_putc( &buffer, **position ) ); } } dealloc: webvtt_release_string( &buffer ); return status; }
/** * Need to set up differently. * Get a status in order to return at end and release memeory. */ WEBVTT_INTERN webvtt_status webvtt_cuetext_tokenizer( webvtt_byte **position, webvtt_cuetext_token **token ) { webvtt_token_state token_state = DATA; webvtt_string result, annotation; webvtt_stringlist *css_classes; webvtt_timestamp time_stamp = 0; webvtt_status status = WEBVTT_UNFINISHED; if( !position ) { return WEBVTT_INVALID_PARAM; } webvtt_create_string( 10, &result ); webvtt_create_string( 10, &annotation ); webvtt_create_stringlist( &css_classes ); /** * Loop while the tokenizer is not finished. * Based on the state of the tokenizer enter a function to handle that * particular tokenizer state. Those functions will loop until they either * change the state of the tokenizer or reach a valid token end point. */ while( status == WEBVTT_UNFINISHED ) { switch( token_state ) { case DATA : status = webvtt_data_state( position, &token_state, &result ); break; case ESCAPE: status = webvtt_escape_state( position, &token_state, &result ); break; case TAG: status = webvtt_tag_state( position, &token_state, &result ); break; case START_TAG: status = webvtt_start_tag_state( position, &token_state, &result ); break; case START_TAG_CLASS: status = webvtt_class_state( position, &token_state, css_classes ); break; case START_TAG_ANNOTATION: status = webvtt_annotation_state( position, &token_state, &annotation ); break; case END_TAG: status = webvtt_end_tag_state( position, &token_state, &result ); break; case TIME_STAMP_TAG: status = webvtt_timestamp_state( position, &token_state, &result ); break; } if( token_state == START_TAG_ANNOTATION ) { webvtt_skipwhite( position ); } } if( **position == UTF8_GREATER_THAN ) { (*position)++; } if( status == WEBVTT_SUCCESS ) { /** * The state that the tokenizer left off on will tell us what kind of token * needs to be made. */ if( token_state == DATA || token_state == ESCAPE ) { status = webvtt_create_text_token( token, &result ); } else if(token_state == TAG || token_state == START_TAG || token_state == START_TAG_CLASS || token_state == START_TAG_ANNOTATION) { /** * If the tag does not accept an annotation then release the current * annotation and intialize annotation to a safe empty state */ if( !tag_accepts_annotation( &result ) ) { webvtt_release_string( &annotation ); webvtt_init_string( &annotation ); } status = webvtt_create_start_token( token, &result, css_classes, &annotation ); } else if( token_state == END_TAG ) { status = webvtt_create_end_token( token, &result ); } else if( token_state == TIME_STAMP_TAG ) { parse_timestamp( webvtt_string_text( &result ), &time_stamp ); status = webvtt_create_timestamp_token( token, time_stamp ); } else { status = WEBVTT_INVALID_TOKEN_STATE; } } webvtt_release_stringlist( &css_classes ); webvtt_release_string( &result ); webvtt_release_string( &annotation ); return status; }
WEBVTT_INTERN webvtt_status webvtt_escape_state( webvtt_byte **position, webvtt_token_state *token_state, webvtt_string *result ) { webvtt_string buffer; webvtt_status status = WEBVTT_SUCCESS; CHECK_MEMORY_OP_JUMP( status, webvtt_create_string( 1, &buffer ) ); /** * Append ampersand here because the algorithm is not able to add it to the * buffer when it reads it in the DATA state tokenizer. */ CHECK_MEMORY_OP_JUMP( status, webvtt_string_putc( &buffer, UTF8_AMPERSAND ) ); for( ; *token_state == ESCAPE; (*position)++ ) { /** * We have encountered a token termination point. * Append buffer to result and return success. */ if( **position == UTF8_NULL_BYTE || **position == UTF8_LESS_THAN ) { CHECK_MEMORY_OP_JUMP( status, webvtt_string_append_string( result, &buffer ) ); goto dealloc; } /** * This means we have enocuntered a malformed escape character sequence. * This means that we need to add that malformed text to the result and * recreate the buffer to prepare for a new escape sequence. */ else if( **position == UTF8_AMPERSAND ) { CHECK_MEMORY_OP_JUMP( status, webvtt_string_append_string( result, &buffer ) ); webvtt_release_string( &buffer ); CHECK_MEMORY_OP_JUMP( status, webvtt_create_string( 1, &buffer ) ); CHECK_MEMORY_OP_JUMP( status, webvtt_string_putc( &buffer, *position[0] ) ); } /** * We've encountered the semicolon which is the end of an escape sequence. * Check if buffer contains a valid escape sequence and if it does append * the interpretation to result and change the state to DATA. */ else if( **position == UTF8_SEMI_COLON ) { if( webvtt_string_is_equal( &buffer, "&", 4 ) ) { CHECK_MEMORY_OP_JUMP( status, webvtt_string_putc( result, '&' ) ); } else if( webvtt_string_is_equal( &buffer, "<", 3 ) ) { CHECK_MEMORY_OP_JUMP( status, webvtt_string_putc( result, '<' ) ); } else if( webvtt_string_is_equal( &buffer, ">", 3 ) ) { CHECK_MEMORY_OP_JUMP( status, webvtt_string_putc( result, '>' ) ); } else if( webvtt_string_is_equal( &buffer, "&rlm", 4 ) ) { CHECK_MEMORY_OP_JUMP( status, webvtt_string_append( result, rlm_replace, RLM_REPLACE_LENGTH ) ); } else if( webvtt_string_is_equal( &buffer, "&lrm", 4 ) ) { CHECK_MEMORY_OP_JUMP( status, webvtt_string_append( result, lrm_replace, LRM_REPLACE_LENGTH ) ); } else if( webvtt_string_is_equal( &buffer, " ", 5 ) ) { CHECK_MEMORY_OP_JUMP( status, webvtt_string_append( result, nbsp_replace, NBSP_REPLACE_LENGTH ) ); } else { CHECK_MEMORY_OP_JUMP( status, webvtt_string_append_string( result, &buffer ) ); CHECK_MEMORY_OP_JUMP( status, webvtt_string_putc( result, **position ) ); } *token_state = DATA; status = WEBVTT_UNFINISHED; } /** * Character is alphanumeric. This means we are in the body of the escape * sequence. */ else if( webvtt_isalphanum( **position ) ) { CHECK_MEMORY_OP_JUMP( status, webvtt_string_putc( &buffer, **position ) ); } /** * If we have not found an alphanumeric character then we have encountered * a malformed escape sequence. Add buffer to result and continue to parse * in DATA state. */ else { CHECK_MEMORY_OP_JUMP( status, webvtt_string_append_string( result, &buffer ) ); CHECK_MEMORY_OP_JUMP( status, webvtt_string_putc( result, **position ) ); status = WEBVTT_UNFINISHED; *token_state = DATA; } } dealloc: webvtt_release_string( &buffer ); return status; }