Example #1
0
WEBVTT_INTERN webvtt_status
webvtt_class_state( webvtt_byte **position, webvtt_token_state *token_state, 
                    webvtt_stringlist *css_classes )
{
  webvtt_string buffer;
  webvtt_status status = WEBVTT_SUCCESS;

  CHECK_MEMORY_OP( webvtt_create_string( 1, &buffer ) );

  for( ; *token_state == START_TAG_CLASS; (*position)++ ) {
    if( **position == UTF8_TAB || **position == UTF8_FORM_FEED ||
        **position == UTF8_SPACE || **position == UTF8_LINE_FEED ||
        **position == UTF8_CARRIAGE_RETURN) {
      CHECK_MEMORY_OP_JUMP( status, webvtt_stringlist_push( css_classes, &buffer ) );
      *token_state = START_TAG_ANNOTATION;
      return WEBVTT_SUCCESS;
    } else if( **position == UTF8_GREATER_THAN || **position == UTF8_NULL_BYTE ) {
      CHECK_MEMORY_OP_JUMP( status, webvtt_stringlist_push( css_classes, &buffer ) );
      webvtt_release_string( &buffer );
      return WEBVTT_SUCCESS;
    } else if( **position == UTF8_FULL_STOP ) {
      CHECK_MEMORY_OP_JUMP( status, webvtt_stringlist_push( css_classes, &buffer ) );
      webvtt_release_string( &buffer );
      CHECK_MEMORY_OP( webvtt_create_string( 1, &buffer ) );
    } else {
      CHECK_MEMORY_OP_JUMP( status, webvtt_string_putc( &buffer, **position ) );
    }
  }

dealloc:
  webvtt_release_string( &buffer );

  return status;
}
Example #2
0
WEBVTT_INTERN webvtt_status
webvtt_class_state( webvtt_byte **position, webvtt_token_state *token_state, 
                    webvtt_stringlist *css_classes )
{
  webvtt_string buffer;
  webvtt_status status = WEBVTT_SUCCESS;

  CHECK_MEMORY_OP( webvtt_create_string( 1, &buffer ) );

  for( ; *token_state == START_TAG_CLASS; (*position)++ ) {
    if( **position == '\t' || **position == '\f' ||
        **position == ' ' || **position == '\n' ||
        **position == '\r') {
      CHECK_MEMORY_OP_JUMP( status, webvtt_stringlist_push( css_classes, &buffer ) );
      *token_state = START_TAG_ANNOTATION;
      return WEBVTT_SUCCESS;
    } else if( **position == '>' || **position == '\0' ) {
      CHECK_MEMORY_OP_JUMP( status, webvtt_stringlist_push( css_classes, &buffer ) );
      webvtt_release_string( &buffer );
      return WEBVTT_SUCCESS;
    } else if( **position == '.' ) {
      CHECK_MEMORY_OP_JUMP( status, webvtt_stringlist_push( css_classes, &buffer ) );
      webvtt_release_string( &buffer );
      CHECK_MEMORY_OP( webvtt_create_string( 1, &buffer ) );
    } else {
      CHECK_MEMORY_OP_JUMP( status, webvtt_string_putc( &buffer, **position ) );
    }
  }

dealloc:
  webvtt_release_string( &buffer );

  return status;
}
Example #3
0
/**
 * Need to set up differently.
 * Get a status in order to return at end and release memeory.
 */
WEBVTT_INTERN webvtt_status
webvtt_cuetext_tokenizer( webvtt_byte **position, webvtt_cuetext_token **token )
{
  webvtt_token_state token_state = DATA;
  webvtt_string result, annotation;
  webvtt_stringlist *css_classes;
  webvtt_timestamp time_stamp = 0;
  webvtt_status status = WEBVTT_UNFINISHED;

  if( !position ) {
    return WEBVTT_INVALID_PARAM;
  }

  webvtt_create_string( 10, &result );
  webvtt_create_string( 10, &annotation );
  webvtt_create_stringlist( &css_classes );
  
  /**
   * Loop while the tokenizer is not finished.
   * Based on the state of the tokenizer enter a function to handle that
   * particular tokenizer state. Those functions will loop until they either
   * change the state of the tokenizer or reach a valid token end point.
   */
  while( status == WEBVTT_UNFINISHED ) {
    switch( token_state ) {
      case DATA :
        status = webvtt_data_state( position, &token_state, &result );
        break;
      case ESCAPE:
        status = webvtt_escape_state( position, &token_state, &result );
        break;
      case TAG:
        status = webvtt_tag_state( position, &token_state, &result );
        break;
      case START_TAG:
        status = webvtt_start_tag_state( position, &token_state, &result );
        break;
      case START_TAG_CLASS:
        status = webvtt_class_state( position, &token_state, css_classes );
        break;
      case START_TAG_ANNOTATION:
        status = webvtt_annotation_state( position, &token_state, &annotation );
        break;
      case END_TAG:
        status = webvtt_end_tag_state( position, &token_state, &result );
        break;
      case TIME_STAMP_TAG:
        status = webvtt_timestamp_state( position, &token_state, &result );
        break;
    }

    if( token_state == START_TAG_ANNOTATION ) {
      webvtt_skipwhite( position );
    }
  }

  if( **position == UTF8_GREATER_THAN )
  { (*position)++; }
  
  if( status == WEBVTT_SUCCESS ) {
    /**
     * The state that the tokenizer left off on will tell us what kind of token
     * needs to be made.
     */
    if( token_state == DATA || token_state == ESCAPE ) {
      status = webvtt_create_text_token( token, &result );
    } else if(token_state == TAG || token_state == START_TAG || token_state == START_TAG_CLASS ||
              token_state == START_TAG_ANNOTATION) {
      /**
      * If the tag does not accept an annotation then release the current 
      * annotation and intialize annotation to a safe empty state
      */
      if( !tag_accepts_annotation( &result ) ) {
        webvtt_release_string( &annotation );
        webvtt_init_string( &annotation );
      }
      status = webvtt_create_start_token( token, &result, css_classes, &annotation );
    } else if( token_state == END_TAG ) {
      status = webvtt_create_end_token( token, &result );
    } else if( token_state == TIME_STAMP_TAG ) {
      parse_timestamp( webvtt_string_text( &result ), &time_stamp );
      status = webvtt_create_timestamp_token( token, time_stamp );
    } else {
      status = WEBVTT_INVALID_TOKEN_STATE;
    }
  }
  
  webvtt_release_stringlist( &css_classes );
  webvtt_release_string( &result );
  webvtt_release_string( &annotation );
  
  return status;
}
Example #4
0
WEBVTT_INTERN webvtt_status
webvtt_escape_state( webvtt_byte **position, webvtt_token_state *token_state, 
                     webvtt_string *result )
{
  webvtt_string buffer;
  webvtt_status status = WEBVTT_SUCCESS;

  CHECK_MEMORY_OP_JUMP( status, webvtt_create_string( 1, &buffer ) );

  /**
   * Append ampersand here because the algorithm is not able to add it to the
   * buffer when it reads it in the DATA state tokenizer.
   */
  CHECK_MEMORY_OP_JUMP( status, webvtt_string_putc( &buffer, UTF8_AMPERSAND ) );

  for( ; *token_state == ESCAPE; (*position)++ ) {
    /**
     * We have encountered a token termination point.
     * Append buffer to result and return success.
     */
    if( **position == UTF8_NULL_BYTE || **position == UTF8_LESS_THAN ) {
      CHECK_MEMORY_OP_JUMP( status, webvtt_string_append_string( result, &buffer ) );
      goto dealloc;
    }
    /**
     * This means we have enocuntered a malformed escape character sequence.
     * This means that we need to add that malformed text to the result and
     * recreate the buffer to prepare for a new escape sequence.
     */
    else if( **position == UTF8_AMPERSAND ) {
      CHECK_MEMORY_OP_JUMP( status, webvtt_string_append_string( result, &buffer ) );
      webvtt_release_string( &buffer );
      CHECK_MEMORY_OP_JUMP( status, webvtt_create_string( 1, &buffer ) );
      CHECK_MEMORY_OP_JUMP( status, webvtt_string_putc( &buffer, *position[0] ) );
    }
    /**
     * We've encountered the semicolon which is the end of an escape sequence.
     * Check if buffer contains a valid escape sequence and if it does append
     * the interpretation to result and change the state to DATA.
     */
    else if( **position == UTF8_SEMI_COLON ) {
      if( webvtt_string_is_equal( &buffer, "&amp", 4 ) ) {
        CHECK_MEMORY_OP_JUMP( status, webvtt_string_putc( result, '&' ) );
      } else if( webvtt_string_is_equal( &buffer, "&lt", 3 ) ) {
        CHECK_MEMORY_OP_JUMP( status, webvtt_string_putc( result, '<' ) );
      } else if( webvtt_string_is_equal( &buffer, "&gt", 3 ) ) {
        CHECK_MEMORY_OP_JUMP( status, webvtt_string_putc( result, '>' ) );
      } else if( webvtt_string_is_equal( &buffer, "&rlm", 4 ) ) {
        CHECK_MEMORY_OP_JUMP( status, webvtt_string_append( result, rlm_replace, RLM_REPLACE_LENGTH ) );
      } else if( webvtt_string_is_equal( &buffer, "&lrm", 4 ) ) {
        CHECK_MEMORY_OP_JUMP( status, webvtt_string_append( result, lrm_replace, LRM_REPLACE_LENGTH ) );
      } else if( webvtt_string_is_equal( &buffer, "&nbsp", 5 ) ) {
        CHECK_MEMORY_OP_JUMP( status, webvtt_string_append( result, nbsp_replace, NBSP_REPLACE_LENGTH ) );
      } else {
        CHECK_MEMORY_OP_JUMP( status, webvtt_string_append_string( result, &buffer ) );
        CHECK_MEMORY_OP_JUMP( status, webvtt_string_putc( result, **position ) );
      }

      *token_state = DATA;
      status = WEBVTT_UNFINISHED;
    }
    /**
     * Character is alphanumeric. This means we are in the body of the escape
     * sequence.
     */
    else if( webvtt_isalphanum( **position ) ) {
      CHECK_MEMORY_OP_JUMP( status, webvtt_string_putc( &buffer, **position ) );
    }
    /**
     * If we have not found an alphanumeric character then we have encountered
     * a malformed escape sequence. Add buffer to result and continue to parse
     * in DATA state.
     */
    else {
      CHECK_MEMORY_OP_JUMP( status, webvtt_string_append_string( result, &buffer ) );
      CHECK_MEMORY_OP_JUMP( status, webvtt_string_putc( result, **position ) );
      status = WEBVTT_UNFINISHED;
      *token_state = DATA;
    }
  }

dealloc:
  webvtt_release_string( &buffer );

  return status;
}