Example #1
0
WEBVTT_INTERN webvtt_status
webvtt_node_kind_from_tag_name( webvtt_string *tag_name, webvtt_node_kind *kind )
{
  if( !tag_name || !kind ) {
    return WEBVTT_INVALID_PARAM;
  }

  if( webvtt_string_length(tag_name) == 1 ) {
    switch( webvtt_string_text(tag_name)[0] ) {
      case( UTF8_B ):
        *kind = WEBVTT_BOLD;
        break;
      case( UTF8_I ):
        *kind = WEBVTT_ITALIC;
        break;
      case( UTF8_U ):
        *kind = WEBVTT_UNDERLINE;
        break;
      case( UTF8_C ):
        *kind = WEBVTT_CLASS;
        break;
      case( UTF8_V ):
        *kind = WEBVTT_VOICE;
        break;
    }
  } else if( webvtt_string_is_equal( tag_name, "ruby", 4 ) ) {
    *kind = WEBVTT_RUBY;
  } else if( webvtt_string_is_equal( tag_name, "rt", 2 ) ) {
    *kind = WEBVTT_RUBY_TEXT;
  } else {
    return WEBVTT_INVALID_TAG_NAME;
  }

  return WEBVTT_SUCCESS;
}
Example #2
0
File: cue.c Project: caitp/webvtt
WEBVTT_EXPORT webvtt_status
webvtt_cue_validate_set_settings( webvtt_parser self, webvtt_cue *cue,
                                  const webvtt_string *settings )
{
  int line = 1;
  int column = 0;
  int length;
  const char *eol;
  int position = 0;
  webvtt_status s;
  if( !cue || !settings ) {
    return WEBVTT_INVALID_PARAM;
  }
  length = (int)webvtt_string_length( settings );
  if( ( eol = strchr( webvtt_string_text( settings ), '\r' ) )
      || ( eol = strchr( webvtt_string_text( settings ), '\n' ) ) ) {
    length = (int)( eol - webvtt_string_text( settings ) );
  }

  if( self ) {
    line = self->line;
    column = self->column;
  }

  /**
   * http://www.w3.org/html/wg/drafts/html/master/single-page.html#split-a-string-on-spaces
   * 4. Skip whitespace
   */
  column += webvtt_string_skip_whitespace( settings, &position );

  while( position < length ) {
    webvtt_string word;
    const char *keyword;
    const char *end;
    int nwhite, ncol;
    /* Collect word (sequence of non-space characters terminated by space) */
    if( WEBVTT_FAILED( webvtt_string_collect_word( settings, &word,
                       &position ) ) ) {
      return WEBVTT_OUT_OF_MEMORY;
    }
    /* skip trailing whitespace */
    nwhite = webvtt_string_skip_whitespace( settings, &position );
    /* Get the word text */
    keyword = webvtt_string_text( &word );
    /* Get pointer to end of the word. (for chcount()) */
    end = keyword + webvtt_string_length( &word );
    /* Get the column count that needs to be skipped. */
    ncol = webvtt_utf8_chcount( keyword, end );
    if( WEBVTT_FAILED( s = webvtt_cue_set_setting_from_string( cue,
                       keyword ) ) ) {
      if( self ) {
        /* Figure out which error to emit */
        webvtt_error error;
        if( webvtt_error_for_status( s, &error ) ) {
          /* There is no non-recoverable cue-setting error.
             Therefore we do not want to abort the loop, regardless
             of the return value from the error handler. */
          WARNING_AT( error, line, column );
        }
      }
    }
    /* Move column pointer beyond word and trailing whitespace */
    column += ncol + nwhite;
    webvtt_release_string( &word );
  }

  if( self ) {
    self->column = column;
  }
  return WEBVTT_SUCCESS;
}
Example #3
0
/**
 * Currently line and len are not being kept track of.
 * Don't think pnode_length is needed as nodes track there list count
 * internally.
 */
WEBVTT_INTERN webvtt_status
webvtt_parse_cuetext( webvtt_parser self, webvtt_cue *cue, webvtt_string *payload, int finished )
{

  const webvtt_byte *cue_text;
  webvtt_status status;
  webvtt_byte *position;
  webvtt_node *node_head;
  webvtt_node *current_node;
  webvtt_node *temp_node;
  webvtt_cuetext_token *token;
  webvtt_node_kind kind;

  if( !cue ) {
    return WEBVTT_INVALID_PARAM;
  }

  cue_text = webvtt_string_text( payload );

  if( !cue_text ) {
    return WEBVTT_INVALID_PARAM;
  }

  if ( WEBVTT_FAILED(status = webvtt_create_head_node( &cue->node_head ) ) ) {
    return status;
  }

  position = (webvtt_byte *)cue_text;
  node_head = cue->node_head;
  current_node = node_head;
  temp_node = NULL;
  token = NULL;

  /**
   * Routine taken from the W3C specification
   * http://dev.w3.org/html5/webvtt/#webvtt-cue-text-parsing-rules
   */
  while( *position != UTF8_NULL_BYTE ) {
    
    webvtt_delete_token( &token );

    /* Step 7. */
    switch( webvtt_cuetext_tokenizer( &position, &token ) ) {
      case( WEBVTT_UNFINISHED ):
        /* Error here. */
        break;
        /* Step 8. */
      case( WEBVTT_SUCCESS ):

        /**
         * If we've found an end token which has a valid end token tag name and
         * a tag name that is equal to the current node then set current to the
         * parent of current.
         */
        if( token->token_type == END_TOKEN ) {
          /**
           * We have encountered an end token but we are at the top of the list
           * and thus have not encountered any start tokens yet, throw away the
           * token.
           */
          if( current_node->kind == WEBVTT_HEAD_NODE ) {
            continue;
          }

          /**
           * We have encountered an end token but it is not in a format that is
           * supported, throw away the token.
           */
          if( webvtt_node_kind_from_tag_name( &token->tag_name, &kind ) == WEBVTT_INVALID_TAG_NAME ) {
            continue;
          }

          /**
           * We have encountered an end token and it matches the start token of
           * the node that we are currently on. Move back up the list of nodes
           * and continue parsing.
           */
          if( current_node->kind == kind ) {
            current_node = current_node->parent;
          }
        } else {

          /**
           * Attempt to create a valid node from the token.
           * If successful then attach the node to the current nodes list and
           * also set current to the newly created node if it is an internal
           * node type.
           */
          if( webvtt_create_node_from_token( token, &temp_node, current_node ) != WEBVTT_SUCCESS ) { 
            /* Do something here? */ 
          }
          else {
            webvtt_attach_node( current_node, temp_node );
            
            if( WEBVTT_IS_VALID_INTERNAL_NODE( temp_node->kind ) ) { 
              current_node = temp_node; 
            }
            
            /* Release the node as attach internal node increases the count. */
            webvtt_release_node( &temp_node );
          }
        }
        break;
    }
    webvtt_skipwhite( &position );
  }
  
  webvtt_delete_token( &token );
  
  return WEBVTT_SUCCESS;
}
Example #4
0
/**
 * Need to set up differently.
 * Get a status in order to return at end and release memeory.
 */
WEBVTT_INTERN webvtt_status
webvtt_cuetext_tokenizer( webvtt_byte **position, webvtt_cuetext_token **token )
{
  webvtt_token_state token_state = DATA;
  webvtt_string result, annotation;
  webvtt_stringlist *css_classes;
  webvtt_timestamp time_stamp = 0;
  webvtt_status status = WEBVTT_UNFINISHED;

  if( !position ) {
    return WEBVTT_INVALID_PARAM;
  }

  webvtt_create_string( 10, &result );
  webvtt_create_string( 10, &annotation );
  webvtt_create_stringlist( &css_classes );
  
  /**
   * Loop while the tokenizer is not finished.
   * Based on the state of the tokenizer enter a function to handle that
   * particular tokenizer state. Those functions will loop until they either
   * change the state of the tokenizer or reach a valid token end point.
   */
  while( status == WEBVTT_UNFINISHED ) {
    switch( token_state ) {
      case DATA :
        status = webvtt_data_state( position, &token_state, &result );
        break;
      case ESCAPE:
        status = webvtt_escape_state( position, &token_state, &result );
        break;
      case TAG:
        status = webvtt_tag_state( position, &token_state, &result );
        break;
      case START_TAG:
        status = webvtt_start_tag_state( position, &token_state, &result );
        break;
      case START_TAG_CLASS:
        status = webvtt_class_state( position, &token_state, css_classes );
        break;
      case START_TAG_ANNOTATION:
        status = webvtt_annotation_state( position, &token_state, &annotation );
        break;
      case END_TAG:
        status = webvtt_end_tag_state( position, &token_state, &result );
        break;
      case TIME_STAMP_TAG:
        status = webvtt_timestamp_state( position, &token_state, &result );
        break;
    }

    if( token_state == START_TAG_ANNOTATION ) {
      webvtt_skipwhite( position );
    }
  }

  if( **position == UTF8_GREATER_THAN )
  { (*position)++; }
  
  if( status == WEBVTT_SUCCESS ) {
    /**
     * The state that the tokenizer left off on will tell us what kind of token
     * needs to be made.
     */
    if( token_state == DATA || token_state == ESCAPE ) {
      status = webvtt_create_text_token( token, &result );
    } else if(token_state == TAG || token_state == START_TAG || token_state == START_TAG_CLASS ||
              token_state == START_TAG_ANNOTATION) {
      /**
      * If the tag does not accept an annotation then release the current 
      * annotation and intialize annotation to a safe empty state
      */
      if( !tag_accepts_annotation( &result ) ) {
        webvtt_release_string( &annotation );
        webvtt_init_string( &annotation );
      }
      status = webvtt_create_start_token( token, &result, css_classes, &annotation );
    } else if( token_state == END_TAG ) {
      status = webvtt_create_end_token( token, &result );
    } else if( token_state == TIME_STAMP_TAG ) {
      parse_timestamp( webvtt_string_text( &result ), &time_stamp );
      status = webvtt_create_timestamp_token( token, time_stamp );
    } else {
      status = WEBVTT_INVALID_TOKEN_STATE;
    }
  }
  
  webvtt_release_stringlist( &css_classes );
  webvtt_release_string( &result );
  webvtt_release_string( &annotation );
  
  return status;
}
Example #5
0
/**
 * Currently line and len are not being kept track of.
 * Don't think pnode_length is needed as nodes track there list count
 * internally.
 */
WEBVTT_INTERN webvtt_status
webvtt_parse_cuetext( webvtt_parser self, webvtt_cue *cue, webvtt_string *payload, int finished )
{

  const webvtt_byte *cue_text;
  webvtt_status status;
  webvtt_byte *position;
  webvtt_node *node_head;
  webvtt_node *current_node;
  webvtt_node *temp_node;
  webvtt_cuetext_token *token;
  webvtt_node_kind kind;

  /**
   *  TODO: Use these parameters! 'finished' isn't really important
   * here, but 'self' certainly is as it lets us report syntax errors.
   *
   * However, for the time being we can trick the compiler into not
   * warning us about unused variables by doing this.
   */
  ( void )self;
  ( void )finished;

  if( !cue ) {
    return WEBVTT_INVALID_PARAM;
  }

  cue_text = webvtt_string_text( payload );

  if( !cue_text ) {
    return WEBVTT_INVALID_PARAM;
  }

  if ( WEBVTT_FAILED(status = webvtt_create_head_node( &cue->node_head ) ) ) {
    return status;
  }

  position = (webvtt_byte *)cue_text;
  node_head = cue->node_head;
  current_node = node_head;
  temp_node = NULL;
  token = NULL;

  /**
   * Routine taken from the W3C specification
   * http://dev.w3.org/html5/webvtt/#webvtt-cue-text-parsing-rules
   */
  while( *position != '\0' ) {
    webvtt_status status = WEBVTT_SUCCESS; 
    webvtt_delete_token( &token );

    /* Step 7. */
    if( WEBVTT_FAILED( status = webvtt_cuetext_tokenizer( &position, 
                                                          &token ) ) ) {
      /* Error here. */
    } else {
      /* Succeeded... Process token */
      if( token->token_type == END_TOKEN ) {
        /**
         * If we've found an end token which has a valid end token tag name and
         * a tag name that is equal to the current node then set current to the
         * parent of current.
         */
       if( current_node->kind == WEBVTT_HEAD_NODE ) {
          /**
           * We have encountered an end token but we are at the top of the list
           * and thus have not encountered any start tokens yet, throw away the
           * token.
           */
          continue;
        }

        if( webvtt_node_kind_from_tag_name( &token->tag_name, &kind ) == WEBVTT_INVALID_TAG_NAME ) {
          /**
           * We have encountered an end token but it is not in a format that is
           * supported, throw away the token.
           */
          continue;
        }

        if( current_node->kind == kind ) {
          /**
           * We have encountered an end token and it matches the start token of
           * the node that we are currently on. Move back up the list of nodes
           * and continue parsing.
           */
          current_node = current_node->parent;
        }
      } else {
        /**
         * Attempt to create a valid node from the token.
         * If successful then attach the node to the current nodes list and
         * also set current to the newly created node if it is an internal
         * node type.
         */
        if( webvtt_create_node_from_token( token, &temp_node, current_node ) != WEBVTT_SUCCESS ) { 
          /* Do something here? */ 
        } else {
          webvtt_attach_node( current_node, temp_node );

          if( WEBVTT_IS_VALID_INTERNAL_NODE( temp_node->kind ) ) { 
            current_node = temp_node; 
          }
            
          /* Release the node as attach internal node increases the count. */
          webvtt_release_node( &temp_node );
        }
      }
    }
  }
  
  webvtt_delete_token( &token );
  
  return WEBVTT_SUCCESS;
}