Пример #1
0
size_t multipart_parser_execute(multipart_parser    *p,
                                const char          *buf,
                                size_t              len
                                )
{
    size_t      i       = 0;
    size_t      mark    = 0;
    char        c, cl;
    int         is_last = 0;


    while ( i < len )
    {
        c       = buf[i];
        is_last = (i == (len - 1) );

        switch ( p->state )
        {
            case s_start:
                multipart_log("s_start");
                p->index    = 0;
                p->state    = s_start_boundary;


            /* fallthrough */
            case s_start_boundary:
                multipart_log("s_start_boundary");

                if ( p->index == p->boundary_length )
                {
                    if ( c != CR )
                    {
                        return (i);
                    }

                    p->index++;
                    break;
                }
                else if ( p->index == (p->boundary_length + 1) )
                {
                    if ( c != LF )
                    {
                        return (i);
                    }

                    p->index    = 0;
                    NOTIFY_CB(part_data_begin);
                    p->state    = s_header_field_start;
                    break;
                }

                if ( c != p->multipart_boundary[p->index] )
                {
                    return (i);
                }

                p->index++;
                break;

            case s_header_field_start:
                multipart_log("s_header_field_start");
                mark        = i;
                p->state    = s_header_field;


            /* fallthrough */
            case s_header_field:
                multipart_log("s_header_field");

                if ( c == CR )
                {
                    p->state = s_headers_almost_done;
                    break;
                }

                if ( c == ':' )
                {
                    EMIT_DATA_CB(header_field, buf + mark, i - mark);
                    p->state = s_header_value_start;
                    break;
                }

                cl = tolower(c);

                if ( (c != '-') && ( (cl < 'a') || (cl > 'z') ) )
                {
                    multipart_log("invalid character in header name");

                    return (i);
                }

                if ( is_last )
                {
                    EMIT_DATA_CB(header_field, buf + mark, (i - mark) + 1);
                }

                break;

            case s_headers_almost_done:
                multipart_log("s_headers_almost_done");

                if ( c != LF )
                {
                    return (i);
                }

                p->state = s_part_data_start;
                break;

            case s_header_value_start:
                multipart_log("s_header_value_start");

                if ( c == ' ' )
                {
                    break;
                }

                mark        = i;
                p->state    = s_header_value;


            /* fallthrough */
            case s_header_value:
                multipart_log("s_header_value");

                if ( c == CR )
                {
                    EMIT_DATA_CB(header_value, buf + mark, i - mark);
                    p->state = s_header_value_almost_done;
                    break;
                }

                if ( is_last )
                {
                    EMIT_DATA_CB(header_value, buf + mark, (i - mark) + 1);
                }

                break;

            case s_header_value_almost_done:
                multipart_log("s_header_value_almost_done");

                if ( c != LF )
                {
                    return (i);
                }

                p->state    = s_header_field_start;
                break;

            case s_part_data_start:
                multipart_log("s_part_data_start");
                NOTIFY_CB(headers_complete);
                mark        = i;
                p->state    = s_part_data;


            /* fallthrough */
            case s_part_data:
                multipart_log("s_part_data");

                if ( c == CR )
                {
                    EMIT_DATA_CB(part_data, buf + mark, i - mark);
                    mark                = i;
                    p->state            = s_part_data_almost_boundary;
                    p->lookbehind[0]    = CR;
                    break;
                }

                if ( is_last )
                {
                    EMIT_DATA_CB(part_data, buf + mark, (i - mark) + 1);
                }

                break;

            case s_part_data_almost_boundary:
                multipart_log("s_part_data_almost_boundary");

                if ( c == LF )
                {
                    p->state            = s_part_data_boundary;
                    p->lookbehind[1]    = LF;
                    p->index            = 0;
                    break;
                }

                EMIT_DATA_CB(part_data, p->lookbehind, 1);
                p->state    = s_part_data;
                mark        = i--;
                break;

            case s_part_data_boundary:
                multipart_log("s_part_data_boundary");

                if ( p->multipart_boundary[p->index] != c )
                {
                    EMIT_DATA_CB(part_data, p->lookbehind, 2 + p->index);
                    p->state    = s_part_data;
                    mark        = i--;
                    break;
                }

                p->lookbehind[2 + p->index] = c;

                if ( (++p->index) == p->boundary_length )
                {
                    NOTIFY_CB(part_data_end);
                    p->state = s_part_data_almost_end;
                }

                break;

            case s_part_data_almost_end:
                multipart_log("s_part_data_almost_end");

                if ( c == '-' )
                {
                    p->state = s_part_data_final_hyphen;
                    break;
                }

                if ( c == CR )
                {
                    p->state = s_part_data_end;
                    break;
                }

                return (i);

            case s_part_data_final_hyphen:
                multipart_log("s_part_data_final_hyphen");

                if ( c == '-' )
                {
                    NOTIFY_CB(body_end);
                    p->state = s_end;
                    break;
                }

                return (i);

            case s_part_data_end:
                multipart_log("s_part_data_end");

                if ( c == LF )
                {
                    p->state = s_header_field_start;
                    NOTIFY_CB(part_data_begin);
                    break;
                }

                return (i);

            case s_end:
                multipart_log("s_end: %02X", (int) c);
                break;

            default:
                multipart_log("Multipart parser unrecoverable error");

                return (0);
        }

        ++i;
    }

    return (len);
}
size_t multipart_parser_c_execute(multipart_parser_c* p, const multipart_parser_c_settings* settings, const char *buf, size_t len) {
  size_t i = 0;
  size_t mark = 0;
  char c, cl;
  int is_last = 0;

  while(i < len) {
    c = buf[i];
    is_last = (i == (len - 1));
    switch (p->state) {
      case s_start:
        multipart_log("s_start");
        p->index = 0;
        NOTIFY_CB(message_begin);
        p->state = s_start_boundary;

      /* fallthrough */
      case s_start_boundary:
        multipart_log("s_start_boundary");
        if (p->index == p->boundary_length) {
          if (c != CR) {
            return i;
          }
          p->index++;
          break;
        } else if (p->index == (p->boundary_length + 1)) {
          if (c != LF) {
            return i;
          }
          p->index = 0;
          NOTIFY_CB(part_begin);
          p->state = s_header_field_start;
          break;
        }

        /* if starting boundaru doesn't match, assume we are reading the preamble */
        if (c != p->multipart_boundary[p->index]) {
          p->index = 0;

          if (c == CR) { /* smallest preable is CR LF */

            p->state = s_preamble_almost_boundary;
          } else {
            p->state = s_preamble;
          }
          break;
        }

        p->index++;
        break;

      case s_preamble:
        if (c == CR) {
            p->state = s_preamble_almost_boundary;
        }
        break;

      case s_preamble_almost_boundary:
        if(c == LF) {
          p->state = s_start_boundary;
        } else {
          p->state = s_preamble;
        }
        break;

      case s_header_field_start:
        multipart_log("s_header_field_start");
        mark = i;
        p->state = s_header_field;

      /* fallthrough */
      case s_header_field:
        multipart_log("s_header_field");
        if (c == CR) {
          p->state = s_headers_almost_done;
          break;
        }

        if (c == ':') {
          EMIT_DATA_CB(header_field, buf + mark, i - mark);
          p->state = s_header_value_start;
          break;
        }

        cl = tolower(c);
        if ((c != '-') && (cl < 'a' || cl > 'z')) {
          multipart_log("invalid character in header name");
          return i;
        }
        if (is_last)
            EMIT_DATA_CB(header_field, buf + mark, (i - mark) + 1);
        break;

      case s_headers_almost_done:
        multipart_log("s_headers_almost_done");
        if (c != LF) {
          return i;
        }

        p->state = s_part_data_start;
        break;

      case s_header_value_start:
        multipart_log("s_header_value_start");
        if (c == ' ') {
          break;
        }

        mark = i;
        p->state = s_header_value;

      /* fallthrough */
      case s_header_value:
        multipart_log("s_header_value");
        if (c == CR) {
          EMIT_DATA_CB(header_value, buf + mark, i - mark);
          p->state = s_header_value_almost_done;
          break;
        }
        if (is_last)
            EMIT_DATA_CB(header_value, buf + mark, (i - mark) + 1);
        break;

      case s_header_value_almost_done:
        multipart_log("s_header_value_almost_done");
        if (c != LF) {
          return i;
        }
        p->state = s_header_field_start;
        break;

      case s_part_data_start:
        multipart_log("s_part_data_start");
        NOTIFY_CB(headers_complete);
        mark = i;
        p->state = s_part_data;

      /* fallthrough */
      case s_part_data:
        multipart_log("s_part_data");
        if (c == CR) {
            EMIT_DATA_CB(part_data, buf + mark, i - mark);
            mark = i;
            p->state = s_part_data_almost_boundary;
            p->lookbehind[0] = CR;
            break;
        }
        if (is_last)
            EMIT_DATA_CB(part_data, buf + mark, (i - mark) + 1);
        break;

      case s_part_data_almost_boundary:
        multipart_log("s_part_data_almost_boundary");
        if (c == LF) {
            p->state = s_part_data_boundary;
            p->lookbehind[1] = LF;
            p->index = 0;
            break;
        }
        EMIT_DATA_CB(part_data, p->lookbehind, 1);
        p->state = s_part_data;
        mark = i --;
        break;

      case s_part_data_boundary:
        multipart_log("s_part_data_boundary");
        if (p->multipart_boundary[p->index] != c) {
          EMIT_DATA_CB(part_data, p->lookbehind, 2 + p->index);
          p->state = s_part_data;
          mark = i --;
          break;
        }
        p->lookbehind[2 + p->index] = c;
        if ((++ p->index) == p->boundary_length) {
            NOTIFY_CB(part_complete);
            p->state = s_part_data_almost_end;
        }
        break;

      case s_part_data_almost_end:
        multipart_log("s_part_data_almost_end");
        if (c == '-') {
            p->state = s_part_data_final_hyphen;
            break;
        }
        if (c == CR) {
            p->state = s_part_complete;
            break;
        }
        return i;
   
      case s_part_data_final_hyphen:
        multipart_log("s_part_data_final_hyphen");
        if (c == '-') {
            NOTIFY_CB(message_complete);
            p->state = s_end;
            break;
        }
        return i;

      case s_part_complete:
        multipart_log("s_part_complete");
        if (c == LF) {
            p->state = s_header_field_start;
            NOTIFY_CB(part_begin);
            break;
        }
        return i;

      case s_end:
        multipart_log("s_end: %02X", (int) c);
        break;

      default:
        multipart_log("Multipart parser unrecoverable error");
        return 0;
    }
    ++ i;
  }

  return len;
}
Пример #3
0
size_t multipart_parser_execute(multipart_parser* p, const char *buf, size_t len) {
  size_t i = 0;
  size_t mark = 0;
  char c, cl;
  int is_last = 0;

  while(i < len) {
    c = buf[i];
    is_last = (i == (len - 1));
    switch (p->state) {
      case s_start:
        multipart_log("s_start");
        p->index = 0;
        p->state = s_start_boundary;

      /* fallthrough */
      case s_start_boundary:
        multipart_log("s_start_boundary");
          if (p->index == p->boundary_length) {
          if (c != CR) {
            return i;
          }
          p->index++;
          break;
        }
        // encapsulation boundary→CRと来たので、LFがくる筈。
        else if (p->index == (p->boundary_length + 1)) {
          if (c != LF) {
            return i;
          }
          p->index = 0;
          NOTIFY_CB(part_data_begin);
          p->state = s_header_field_start;
          break;
        }
        // FIXME: http://www.w3.org/Protocols/rfc1341/7_2_Multipart.html の
        // 7.2.1の引用:
        // The Content-Type field for multipart entities requires one parameter,
        // "boundary", which is used to specify the encapsulation boundary. The
        // encapsulation boundary is defined as a line consisting entirely of
        // two hyphen characters ("-", decimal code 45) followed by the boundary
        // parameter value from the Content-Type header field.
        //
        // 「encapsulating boundary」と「boundary parameter value」は別物;なので、
        // p->multipart_boundaryつまりは「boundary parameter value」との一致
        // とは別に2つのハイフンとの一致を探さなければならない。
        if (c != p->multipart_boundary[p->index]) {
          return i;
        }
        p->index++;
        break;

      case s_header_field_start:
        multipart_log("s_header_field_start");
        mark = i;
        p->state = s_header_field;

      /* fallthrough */
      case s_header_field:
        multipart_log("s_header_field");
        if (c == CR) {
          p->state = s_headers_almost_done;
          break;
        }

        if (c == '-') {
          break;
        }

        if (c == ':') {
          EMIT_DATA_CB(header_field, buf + mark, i - mark);
          p->state = s_header_value_start;
          break;
        }

        cl = tolower(c);
        if (cl < 'a' || cl > 'z') {
          multipart_log("invalid character in header name");
          return i;
        }
        if (is_last)
            EMIT_DATA_CB(header_field, buf + mark, (i - mark) + 1);
        break;

      case s_headers_almost_done:
        multipart_log("s_headers_almost_done");
        if (c != LF) {
          return i;
        }

        p->state = s_part_data_start;
        break;

      case s_header_value_start:
        multipart_log("s_header_value_start");
        if (c == ' ') {
          break;
        }

        mark = i;
        p->state = s_header_value;

      /* fallthrough */
      case s_header_value:
        multipart_log("s_header_value");
        if (c == CR) {
          EMIT_DATA_CB(header_value, buf + mark, i - mark);
          p->state = s_header_value_almost_done;
        }
        if (is_last)
            EMIT_DATA_CB(header_value, buf + mark, (i - mark) + 1);
        break;

      case s_header_value_almost_done:
        multipart_log("s_header_value_almost_done");
        if (c != LF) {
          return i;
        }
        p->state = s_header_field_start;
        break;

      case s_part_data_start:
        multipart_log("s_part_data_start");
        NOTIFY_CB(headers_complete);
        mark = i;
        p->state = s_part_data;

      /* fallthrough */
      case s_part_data:
        multipart_log("s_part_data");
        if (c == CR) {
            EMIT_DATA_CB(part_data, buf + mark, i - mark);
            mark = i;
            p->state = s_part_data_almost_boundary;
            p->lookbehind[0] = CR;
            break;
        }
        if (is_last)
            EMIT_DATA_CB(part_data, buf + mark, (i - mark) + 1);
        break;

      case s_part_data_almost_boundary:
        multipart_log("s_part_data_almost_boundary");
        if (c == LF) {
            p->state = s_part_data_boundary;
            p->lookbehind[1] = LF;
            p->index = 0;
            break;
        }
        EMIT_DATA_CB(part_data, p->lookbehind, 1);
        p->state = s_part_data;
        mark = i --;
        break;

      case s_part_data_boundary:
        multipart_log("s_part_data_boundary");
        if (p->multipart_boundary[p->index] != c) {
          EMIT_DATA_CB(part_data, p->lookbehind, 2 + p->index);
          p->state = s_part_data;
          mark = i --;
          break;
        }
        p->lookbehind[2 + p->index] = c;
        if ((++ p->index) == p->boundary_length) {
            NOTIFY_CB(part_data_end);
            p->state = s_part_data_almost_end;
        }
        break;

      case s_part_data_almost_end:
        multipart_log("s_part_data_almost_end");
        if (c == '-') {
            p->state = s_part_data_final_hyphen;
            break;
        }
        if (c == CR) {
            p->state = s_part_data_end;
            break;
        }
        return i;
   
      case s_part_data_final_hyphen:
        multipart_log("s_part_data_final_hyphen");
        if (c == '-') {
            NOTIFY_CB(body_end);
            p->state = s_end;
            break;
        }
        return i;

      case s_part_data_end:
        multipart_log("s_part_data_end");
        if (c == LF) {
            p->state = s_header_field_start;
            NOTIFY_CB(part_data_begin);
            break;
        }
        return i;

      case s_end:
        multipart_log("s_end: %02X", (int) c);
        break;

      default:
        multipart_log("Multipart parser unrecoverable error");
        return 0;
    }
    ++ i;
  }

  return len;
}
//Returns number of bytes parsed
size_t multipart_parser_execute(multipart_parser* p, const char *buf, size_t len) {
  size_t i = 0;
  size_t mark = 0;
  char c;
  int is_last = 0;

  while(!is_last) {
    c = buf[i];
    is_last = (i == (len - 1));
    switch (p->state) {
      case s_start:
        multipart_log("s_start");
        p->index = 0;
        p->state = s_start_boundary;

      /* fallthrough */
      case s_start_boundary:
        multipart_log("s_start_boundary");
        //Check to see if one past the end of the boundary
        if (p->index == p->boundary_length) {
          //If not properly terminated, then return immediately
          if (c != CR) {
            return i;
          }
          p->index++;
          break;
        } else if (p->index == (p->boundary_length + 1)) {
          if (c != LF) {
            return i;
          }
          p->index = 0;
          NOTIFY_CB(part_data_begin);
          p->state = s_header_field_start;
          break;
        }
        if (c != p->multipart_boundary[p->index]) {
          return i;
        }
        p->index++;
        break;

      case s_header_field_start:
        multipart_log("s_header_field_start");
        mark = i;
        p->state = s_header_field;

      /* fallthrough */
      case s_header_field:
        multipart_log("s_header_field");
        if (c == CR) {
          p->state = s_headers_almost_done;
          break;
        }

        if (c == '-') {
		  if(is_last)
		  {
			  EMIT_DATA_CB(header_field,buf + mark, (i - mark) +1 );
		  }
			
          break;
        }

        if (c == ':') {
          EMIT_DATA_CB(header_field, buf + mark, i - mark);
          p->state = s_header_value_start;
          break;
        }

        if (not isalpha(c) ) {
          multipart_log("invalid character in header name");
          return i;
        }
        if (is_last)
        {
            EMIT_DATA_CB(header_field, buf + mark, (i - mark) + 1);
        }
        break;

      case s_headers_almost_done:
        multipart_log("s_headers_almost_done");
        if (c != LF) {
          return i;
        }

        p->state = s_part_data_start;
        break;

      case s_header_value_start:
        multipart_log("s_header_value_start");
        if (c == ' ') {
          break;
        }

        mark = i;
        p->state = s_header_value;

      /* fallthrough */
      case s_header_value:
        multipart_log("s_header_value");
        if (c == CR) {
          EMIT_DATA_CB(header_value, buf + mark, i - mark);
          p->state = s_header_value_almost_done;
        }
        if (is_last)
        {
            EMIT_DATA_CB(header_value, buf + mark, (i - mark) + 1);
        }
        break;

      case s_header_value_almost_done:
        multipart_log("s_header_value_almost_done");
        if (c != LF) {
          return i;
        }
        NOTIFY_CB(header_value_end);
        p->state = s_header_field_start;
        break;

      case s_part_data_start:
        multipart_log("s_part_data_start");
        NOTIFY_CB(headers_complete);
        mark = i;
        p->state = s_part_data;

      /* fallthrough */
      case s_part_data:
        multipart_log("s_part_data");
        if (c == CR) {
            EMIT_DATA_CB(part_data, buf + mark, i - mark);
            mark = i;
            p->state = s_part_data_almost_boundary;
            p->lookbehind[0] = CR;
            break;
        }
        if (is_last)
            EMIT_DATA_CB(part_data, buf + mark, (i - mark) + 1);
        break;

      case s_part_data_almost_boundary:
        multipart_log("s_part_data_almost_boundary");
        if (c == LF) {
            p->state = s_part_data_boundary;
            p->lookbehind[1] = LF;
            p->index = 0;
            break;
        }
        EMIT_DATA_CB(part_data, p->lookbehind, 1);
        p->state = s_part_data;
        mark = i --;
        break;

      case s_part_data_boundary:
        multipart_log("s_part_data_boundary");
        if (p->multipart_boundary[p->index] != c) {
          EMIT_DATA_CB(part_data, p->lookbehind, 2 + p->index);
          p->state = s_part_data;
          mark = i --;
          break;
        }
        p->lookbehind[2 + p->index] = c;
        if ((++ p->index) == p->boundary_length) {
            NOTIFY_CB(part_data_end);
            p->state = s_part_data_almost_end;
        }
        break;

      case s_part_data_almost_end:
        multipart_log("s_part_data_almost_end");
        if (c == '-') {
            p->state = s_part_data_final_hyphen;
            break;
        }
        if (c == CR) {
            p->state = s_part_data_end;
            break;
        }
        return i;
   
      case s_part_data_final_hyphen:
        multipart_log("s_part_data_final_hyphen");
        if (c == '-') {
            NOTIFY_CB(body_end);
            p->state = s_end;
            break;
        }
        return i;

      case s_part_data_end:
        multipart_log("s_part_data_end");
        if (c == LF) {
            p->state = s_header_field_start;
            NOTIFY_CB(part_data_begin);
            break;
        }
        return i;

      case s_end:
        multipart_log("s_end: %02X", (int) c);
        break;

      default:
        multipart_log("Multipart parser unrecoverable error");
        return 0;
    }
    ++ i;
  }

  return len;
}