fix the problem http request with '\0' and fixx query string in url

Fork of mbed-http by sandbox

Embed: (wiki syntax)

« Back to documentation index

Show/hide line numbers http_parser.c Source File

http_parser.c

00001 /* Based on src/http/ngx_http_parse.c from NGINX copyright Igor Sysoev
00002  *
00003  * Additional changes are licensed under the same terms as NGINX and
00004  * copyright Joyent, Inc. and other Node contributors. All rights reserved.
00005  *
00006  * Permission is hereby granted, free of charge, to any person obtaining a copy
00007  * of this software and associated documentation files (the "Software"), to
00008  * deal in the Software without restriction, including without limitation the
00009  * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
00010  * sell copies of the Software, and to permit persons to whom the Software is
00011  * furnished to do so, subject to the following conditions:
00012  *
00013  * The above copyright notice and this permission notice shall be included in
00014  * all copies or substantial portions of the Software.
00015  *
00016  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
00017  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
00018  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
00019  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
00020  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
00021  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
00022  * IN THE SOFTWARE.
00023  */
00024 #include "http_parser.h"
00025 #include <assert.h>
00026 #include <stddef.h>
00027 #include <ctype.h>
00028 #include <stdlib.h>
00029 #include <string.h>
00030 #include <limits.h>
00031 
00032 #ifndef ULLONG_MAX
00033 # define ULLONG_MAX ((uint64_t) -1) /* 2^64-1 */
00034 #endif
00035 
00036 #ifndef MIN
00037 # define MIN(a,b) ((a) < (b) ? (a) : (b))
00038 #endif
00039 
00040 #ifndef ARRAY_SIZE
00041 # define ARRAY_SIZE(a) (sizeof(a) / sizeof((a)[0]))
00042 #endif
00043 
00044 #ifndef BIT_AT
00045 # define BIT_AT(a, i)                                                \
00046   (!!((unsigned int) (a)[(unsigned int) (i) >> 3] &                  \
00047    (1 << ((unsigned int) (i) & 7))))
00048 #endif
00049 
00050 #ifndef ELEM_AT
00051 # define ELEM_AT(a, i, v) ((unsigned int) (i) < ARRAY_SIZE(a) ? (a)[(i)] : (v))
00052 #endif
00053 
00054 #define SET_ERRNO(e)                                                 \
00055 do {                                                                 \
00056   parser->http_errno = (e);                                          \
00057 } while(0)
00058 
00059 #define CURRENT_STATE() p_state
00060 #define UPDATE_STATE(V) p_state = (enum state) (V);
00061 #define RETURN(V)                                                    \
00062 do {                                                                 \
00063   parser->state = CURRENT_STATE();                                   \
00064   return (V);                                                        \
00065 } while (0);
00066 #define REEXECUTE()                                                  \
00067   goto reexecute;                                                    \
00068 
00069 
00070 #ifdef __GNUC__
00071 # define LIKELY(X) __builtin_expect(!!(X), 1)
00072 # define UNLIKELY(X) __builtin_expect(!!(X), 0)
00073 #else
00074 # define LIKELY(X) (X)
00075 # define UNLIKELY(X) (X)
00076 #endif
00077 
00078 
00079 /* Run the notify callback FOR, returning ER if it fails */
00080 #define CALLBACK_NOTIFY_(FOR, ER)                                    \
00081 do {                                                                 \
00082   assert(HTTP_PARSER_ERRNO(parser) == HPE_OK);                       \
00083                                                                      \
00084   if (LIKELY(settings->on_##FOR)) {                                  \
00085     parser->state = CURRENT_STATE();                                 \
00086     if (UNLIKELY(0 != settings->on_##FOR(parser))) {                 \
00087       SET_ERRNO(HPE_CB_##FOR);                                       \
00088     }                                                                \
00089     UPDATE_STATE(parser->state);                                     \
00090                                                                      \
00091     /* We either errored above or got paused; get out */             \
00092     if (UNLIKELY(HTTP_PARSER_ERRNO(parser) != HPE_OK)) {             \
00093       return (ER);                                                   \
00094     }                                                                \
00095   }                                                                  \
00096 } while (0)
00097 
00098 /* Run the notify callback FOR and consume the current byte */
00099 #define CALLBACK_NOTIFY(FOR)            CALLBACK_NOTIFY_(FOR, p - data + 1)
00100 
00101 /* Run the notify callback FOR and don't consume the current byte */
00102 #define CALLBACK_NOTIFY_NOADVANCE(FOR)  CALLBACK_NOTIFY_(FOR, p - data)
00103 
00104 /* Run data callback FOR with LEN bytes, returning ER if it fails */
00105 #define CALLBACK_DATA_(FOR, LEN, ER)                                 \
00106 do {                                                                 \
00107   assert(HTTP_PARSER_ERRNO(parser) == HPE_OK);                       \
00108                                                                      \
00109   if (FOR##_mark) {                                                  \
00110     if (LIKELY(settings->on_##FOR)) {                                \
00111       parser->state = CURRENT_STATE();                               \
00112       if (UNLIKELY(0 !=                                              \
00113                    settings->on_##FOR(parser, FOR##_mark, (LEN)))) { \
00114         SET_ERRNO(HPE_CB_##FOR);                                     \
00115       }                                                              \
00116       UPDATE_STATE(parser->state);                                   \
00117                                                                      \
00118       /* We either errored above or got paused; get out */           \
00119       if (UNLIKELY(HTTP_PARSER_ERRNO(parser) != HPE_OK)) {           \
00120         return (ER);                                                 \
00121       }                                                              \
00122     }                                                                \
00123     FOR##_mark = NULL;                                               \
00124   }                                                                  \
00125 } while (0)
00126 
00127 /* Run the data callback FOR and consume the current byte */
00128 #define CALLBACK_DATA(FOR)                                           \
00129     CALLBACK_DATA_(FOR, p - FOR##_mark, p - data + 1)
00130 
00131 /* Run the data callback FOR and don't consume the current byte */
00132 #define CALLBACK_DATA_NOADVANCE(FOR)                                 \
00133     CALLBACK_DATA_(FOR, p - FOR##_mark, p - data)
00134 
00135 /* Set the mark FOR; non-destructive if mark is already set */
00136 #define MARK(FOR)                                                    \
00137 do {                                                                 \
00138   if (!FOR##_mark) {                                                 \
00139     FOR##_mark = p;                                                  \
00140   }                                                                  \
00141 } while (0)
00142 
00143 /* Don't allow the total size of the HTTP headers (including the status
00144  * line) to exceed HTTP_MAX_HEADER_SIZE.  This check is here to protect
00145  * embedders against denial-of-service attacks where the attacker feeds
00146  * us a never-ending header that the embedder keeps buffering.
00147  *
00148  * This check is arguably the responsibility of embedders but we're doing
00149  * it on the embedder's behalf because most won't bother and this way we
00150  * make the web a little safer.  HTTP_MAX_HEADER_SIZE is still far bigger
00151  * than any reasonable request or response so this should never affect
00152  * day-to-day operation.
00153  */
00154 #define COUNT_HEADER_SIZE(V)                                         \
00155 do {                                                                 \
00156   parser->nread += (V);                                              \
00157   if (UNLIKELY(parser->nread > (HTTP_MAX_HEADER_SIZE))) {            \
00158     SET_ERRNO(HPE_HEADER_OVERFLOW);                                  \
00159     goto error;                                                      \
00160   }                                                                  \
00161 } while (0)
00162 
00163 
00164 #define PROXY_CONNECTION "proxy-connection"
00165 #define CONNECTION "connection"
00166 #define CONTENT_LENGTH "content-length"
00167 #define TRANSFER_ENCODING "transfer-encoding"
00168 #define UPGRADE "upgrade"
00169 #define CHUNKED "chunked"
00170 #define KEEP_ALIVE "keep-alive"
00171 #define CLOSE "close"
00172 
00173 
00174 static const char *method_strings[] =
00175   {
00176 #define XX(num, name, string) #string,
00177   HTTP_METHOD_MAP(XX)
00178 #undef XX
00179   };
00180 
00181 
00182 /* Tokens as defined by rfc 2616. Also lowercases them.
00183  *        token       = 1*<any CHAR except CTLs or separators>
00184  *     separators     = "(" | ")" | "<" | ">" | "@"
00185  *                    | "," | ";" | ":" | "\" | <">
00186  *                    | "/" | "[" | "]" | "?" | "="
00187  *                    | "{" | "}" | SP | HT
00188  */
00189 static const char tokens[256] = {
00190 /*   0 nul    1 soh    2 stx    3 etx    4 eot    5 enq    6 ack    7 bel  */
00191         0,       0,       0,       0,       0,       0,       0,       0,
00192 /*   8 bs     9 ht    10 nl    11 vt    12 np    13 cr    14 so    15 si   */
00193         0,       0,       0,       0,       0,       0,       0,       0,
00194 /*  16 dle   17 dc1   18 dc2   19 dc3   20 dc4   21 nak   22 syn   23 etb */
00195         0,       0,       0,       0,       0,       0,       0,       0,
00196 /*  24 can   25 em    26 sub   27 esc   28 fs    29 gs    30 rs    31 us  */
00197         0,       0,       0,       0,       0,       0,       0,       0,
00198 /*  32 sp    33  !    34  "    35  #    36  $    37  %    38  &    39  '  */
00199         0,      '!',      0,      '#',     '$',     '%',     '&',    '\'',
00200 /*  40  (    41  )    42  *    43  +    44  ,    45  -    46  .    47  /  */
00201         0,       0,      '*',     '+',      0,      '-',     '.',      0,
00202 /*  48  0    49  1    50  2    51  3    52  4    53  5    54  6    55  7  */
00203        '0',     '1',     '2',     '3',     '4',     '5',     '6',     '7',
00204 /*  56  8    57  9    58  :    59  ;    60  <    61  =    62  >    63  ?  */
00205        '8',     '9',      0,       0,       0,       0,       0,       0,
00206 /*  64  @    65  A    66  B    67  C    68  D    69  E    70  F    71  G  */
00207         0,      'a',     'b',     'c',     'd',     'e',     'f',     'g',
00208 /*  72  H    73  I    74  J    75  K    76  L    77  M    78  N    79  O  */
00209        'h',     'i',     'j',     'k',     'l',     'm',     'n',     'o',
00210 /*  80  P    81  Q    82  R    83  S    84  T    85  U    86  V    87  W  */
00211        'p',     'q',     'r',     's',     't',     'u',     'v',     'w',
00212 /*  88  X    89  Y    90  Z    91  [    92  \    93  ]    94  ^    95  _  */
00213        'x',     'y',     'z',      0,       0,       0,      '^',     '_',
00214 /*  96  `    97  a    98  b    99  c   100  d   101  e   102  f   103  g  */
00215        '`',     'a',     'b',     'c',     'd',     'e',     'f',     'g',
00216 /* 104  h   105  i   106  j   107  k   108  l   109  m   110  n   111  o  */
00217        'h',     'i',     'j',     'k',     'l',     'm',     'n',     'o',
00218 /* 112  p   113  q   114  r   115  s   116  t   117  u   118  v   119  w  */
00219        'p',     'q',     'r',     's',     't',     'u',     'v',     'w',
00220 /* 120  x   121  y   122  z   123  {   124  |   125  }   126  ~   127 del */
00221        'x',     'y',     'z',      0,      '|',      0,      '~',       0 };
00222 
00223 
00224 static const int8_t unhex[256] =
00225   {-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1
00226   ,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1
00227   ,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1
00228   , 0, 1, 2, 3, 4, 5, 6, 7, 8, 9,-1,-1,-1,-1,-1,-1
00229   ,-1,10,11,12,13,14,15,-1,-1,-1,-1,-1,-1,-1,-1,-1
00230   ,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1
00231   ,-1,10,11,12,13,14,15,-1,-1,-1,-1,-1,-1,-1,-1,-1
00232   ,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1
00233   };
00234 
00235 
00236 #if HTTP_PARSER_STRICT
00237 # define T(v) 0
00238 #else
00239 # define T(v) v
00240 #endif
00241 
00242 
00243 static const uint8_t normal_url_char[32] = {
00244 /*   0 nul    1 soh    2 stx    3 etx    4 eot    5 enq    6 ack    7 bel  */
00245         0    |   0    |   0    |   0    |   0    |   0    |   0    |   0,
00246 /*   8 bs     9 ht    10 nl    11 vt    12 np    13 cr    14 so    15 si   */
00247         0    | T(2)   |   0    |   0    | T(16)  |   0    |   0    |   0,
00248 /*  16 dle   17 dc1   18 dc2   19 dc3   20 dc4   21 nak   22 syn   23 etb */
00249         0    |   0    |   0    |   0    |   0    |   0    |   0    |   0,
00250 /*  24 can   25 em    26 sub   27 esc   28 fs    29 gs    30 rs    31 us  */
00251         0    |   0    |   0    |   0    |   0    |   0    |   0    |   0,
00252 /*  32 sp    33  !    34  "    35  #    36  $    37  %    38  &    39  '  */
00253         0    |   2    |   4    |   0    |   16   |   32   |   64   |  128,
00254 /*  40  (    41  )    42  *    43  +    44  ,    45  -    46  .    47  /  */
00255         1    |   2    |   4    |   8    |   16   |   32   |   64   |  128,
00256 /*  48  0    49  1    50  2    51  3    52  4    53  5    54  6    55  7  */
00257         1    |   2    |   4    |   8    |   16   |   32   |   64   |  128,
00258 /*  56  8    57  9    58  :    59  ;    60  <    61  =    62  >    63  ?  */
00259         1    |   2    |   4    |   8    |   16   |   32   |   64   |   0,
00260 /*  64  @    65  A    66  B    67  C    68  D    69  E    70  F    71  G  */
00261         1    |   2    |   4    |   8    |   16   |   32   |   64   |  128,
00262 /*  72  H    73  I    74  J    75  K    76  L    77  M    78  N    79  O  */
00263         1    |   2    |   4    |   8    |   16   |   32   |   64   |  128,
00264 /*  80  P    81  Q    82  R    83  S    84  T    85  U    86  V    87  W  */
00265         1    |   2    |   4    |   8    |   16   |   32   |   64   |  128,
00266 /*  88  X    89  Y    90  Z    91  [    92  \    93  ]    94  ^    95  _  */
00267         1    |   2    |   4    |   8    |   16   |   32   |   64   |  128,
00268 /*  96  `    97  a    98  b    99  c   100  d   101  e   102  f   103  g  */
00269         1    |   2    |   4    |   8    |   16   |   32   |   64   |  128,
00270 /* 104  h   105  i   106  j   107  k   108  l   109  m   110  n   111  o  */
00271         1    |   2    |   4    |   8    |   16   |   32   |   64   |  128,
00272 /* 112  p   113  q   114  r   115  s   116  t   117  u   118  v   119  w  */
00273         1    |   2    |   4    |   8    |   16   |   32   |   64   |  128,
00274 /* 120  x   121  y   122  z   123  {   124  |   125  }   126  ~   127 del */
00275         1    |   2    |   4    |   8    |   16   |   32   |   64   |   0, };
00276 
00277 #undef T
00278 
00279 enum state
00280   { s_dead = 1 /* important that this is > 0 */
00281 
00282   , s_start_req_or_res
00283   , s_res_or_resp_H
00284   , s_start_res
00285   , s_res_H
00286   , s_res_HT
00287   , s_res_HTT
00288   , s_res_HTTP
00289   , s_res_first_http_major
00290   , s_res_http_major
00291   , s_res_first_http_minor
00292   , s_res_http_minor
00293   , s_res_first_status_code
00294   , s_res_status_code
00295   , s_res_status_start
00296   , s_res_status
00297   , s_res_line_almost_done
00298 
00299   , s_start_req
00300 
00301   , s_req_method
00302   , s_req_spaces_before_url
00303   , s_req_schema
00304   , s_req_schema_slash
00305   , s_req_schema_slash_slash
00306   , s_req_server_start
00307   , s_req_server
00308   , s_req_server_with_at
00309   , s_req_path
00310   , s_req_query_string_start
00311   , s_req_query_string
00312   , s_req_fragment_start
00313   , s_req_fragment
00314   , s_req_http_start
00315   , s_req_http_H
00316   , s_req_http_HT
00317   , s_req_http_HTT
00318   , s_req_http_HTTP
00319   , s_req_first_http_major
00320   , s_req_http_major
00321   , s_req_first_http_minor
00322   , s_req_http_minor
00323   , s_req_line_almost_done
00324 
00325   , s_header_field_start
00326   , s_header_field
00327   , s_header_value_discard_ws
00328   , s_header_value_discard_ws_almost_done
00329   , s_header_value_discard_lws
00330   , s_header_value_start
00331   , s_header_value
00332   , s_header_value_lws
00333 
00334   , s_header_almost_done
00335 
00336   , s_chunk_size_start
00337   , s_chunk_size
00338   , s_chunk_parameters
00339   , s_chunk_size_almost_done
00340 
00341   , s_headers_almost_done
00342   , s_headers_done
00343 
00344   /* Important: 's_headers_done' must be the last 'header' state. All
00345    * states beyond this must be 'body' states. It is used for overflow
00346    * checking. See the PARSING_HEADER() macro.
00347    */
00348 
00349   , s_chunk_data
00350   , s_chunk_data_almost_done
00351   , s_chunk_data_done
00352 
00353   , s_body_identity
00354   , s_body_identity_eof
00355 
00356   , s_message_done
00357   };
00358 
00359 
00360 #define PARSING_HEADER(state) (state <= s_headers_done)
00361 
00362 
00363 enum header_states
00364   { h_general = 0
00365   , h_C
00366   , h_CO
00367   , h_CON
00368 
00369   , h_matching_connection
00370   , h_matching_proxy_connection
00371   , h_matching_content_length
00372   , h_matching_transfer_encoding
00373   , h_matching_upgrade
00374 
00375   , h_connection
00376   , h_content_length
00377   , h_transfer_encoding
00378   , h_upgrade
00379 
00380   , h_matching_transfer_encoding_chunked
00381   , h_matching_connection_token_start
00382   , h_matching_connection_keep_alive
00383   , h_matching_connection_close
00384   , h_matching_connection_upgrade
00385   , h_matching_connection_token
00386 
00387   , h_transfer_encoding_chunked
00388   , h_connection_keep_alive
00389   , h_connection_close
00390   , h_connection_upgrade
00391   };
00392 
00393 enum http_host_state
00394   {
00395     s_http_host_dead = 1
00396   , s_http_userinfo_start
00397   , s_http_userinfo
00398   , s_http_host_start
00399   , s_http_host_v6_start
00400   , s_http_host
00401   , s_http_host_v6
00402   , s_http_host_v6_end
00403   , s_http_host_v6_zone_start
00404   , s_http_host_v6_zone
00405   , s_http_host_port_start
00406   , s_http_host_port
00407 };
00408 
00409 /* Macros for character classes; depends on strict-mode  */
00410 #define CR                  '\r'
00411 #define LF                  '\n'
00412 #define LOWER(c)            (unsigned char)(c | 0x20)
00413 #define IS_ALPHA(c)         (LOWER(c) >= 'a' && LOWER(c) <= 'z')
00414 #define IS_NUM(c)           ((c) >= '0' && (c) <= '9')
00415 #define IS_ALPHANUM(c)      (IS_ALPHA(c) || IS_NUM(c))
00416 #define IS_HEX(c)           (IS_NUM(c) || (LOWER(c) >= 'a' && LOWER(c) <= 'f'))
00417 #define IS_MARK(c)          ((c) == '-' || (c) == '_' || (c) == '.' || \
00418   (c) == '!' || (c) == '~' || (c) == '*' || (c) == '\'' || (c) == '(' || \
00419   (c) == ')')
00420 #define IS_USERINFO_CHAR(c) (IS_ALPHANUM(c) || IS_MARK(c) || (c) == '%' || \
00421   (c) == ';' || (c) == ':' || (c) == '&' || (c) == '=' || (c) == '+' || \
00422   (c) == '$' || (c) == ',')
00423 
00424 #define STRICT_TOKEN(c)     (tokens[(unsigned char)c])
00425 
00426 #if HTTP_PARSER_STRICT
00427 #define TOKEN(c)            (tokens[(unsigned char)c])
00428 #define IS_URL_CHAR(c)      (BIT_AT(normal_url_char, (unsigned char)c))
00429 #define IS_HOST_CHAR(c)     (IS_ALPHANUM(c) || (c) == '.' || (c) == '-')
00430 #else
00431 #define TOKEN(c)            ((c == ' ') ? ' ' : tokens[(unsigned char)c])
00432 #define IS_URL_CHAR(c)                                                         \
00433   (BIT_AT(normal_url_char, (unsigned char)c) || ((c) & 0x80))
00434 #define IS_HOST_CHAR(c)                                                        \
00435   (IS_ALPHANUM(c) || (c) == '.' || (c) == '-' || (c) == '_')
00436 #endif
00437 
00438 /**
00439  * Verify that a char is a valid visible (printable) US-ASCII
00440  * character or %x80-FF
00441  **/
00442 #define IS_HEADER_CHAR(ch)                                                     \
00443   (ch == CR || ch == LF || ch == 9 || ((unsigned char)ch > 31 && ch != 127))
00444 
00445 #define start_state (parser->type == HTTP_REQUEST ? s_start_req : s_start_res)
00446 
00447 
00448 #if HTTP_PARSER_STRICT
00449 # define STRICT_CHECK(cond)                                          \
00450 do {                                                                 \
00451   if (cond) {                                                        \
00452     SET_ERRNO(HPE_STRICT);                                           \
00453     goto error;                                                      \
00454   }                                                                  \
00455 } while (0)
00456 # define NEW_MESSAGE() (http_should_keep_alive(parser) ? start_state : s_dead)
00457 #else
00458 # define STRICT_CHECK(cond)
00459 # define NEW_MESSAGE() start_state
00460 #endif
00461 
00462 
00463 /* Map errno values to strings for human-readable output */
00464 #define HTTP_STRERROR_GEN(n, s) { "HPE_" #n, s },
00465 static struct {
00466   const char *name;
00467   const char *description;
00468 } http_strerror_tab[] = {
00469   HTTP_ERRNO_MAP(HTTP_STRERROR_GEN)
00470 };
00471 #undef HTTP_STRERROR_GEN
00472 
00473 int http_message_needs_eof(const http_parser *parser);
00474 
00475 /* Our URL parser.
00476  *
00477  * This is designed to be shared by http_parser_execute() for URL validation,
00478  * hence it has a state transition + byte-for-byte interface. In addition, it
00479  * is meant to be embedded in http_parser_parse_url(), which does the dirty
00480  * work of turning state transitions URL components for its API.
00481  *
00482  * This function should only be invoked with non-space characters. It is
00483  * assumed that the caller cares about (and can detect) the transition between
00484  * URL and non-URL states by looking for these.
00485  */
00486 static enum state
00487 parse_url_char(enum state s, const char ch)
00488 {
00489   if (ch == ' ' || ch == '\r' || ch == '\n') {
00490     return s_dead;
00491   }
00492 
00493 #if HTTP_PARSER_STRICT
00494   if (ch == '\t' || ch == '\f') {
00495     return s_dead;
00496   }
00497 #endif
00498 
00499   switch (s) {
00500     case s_req_spaces_before_url:
00501       /* Proxied requests are followed by scheme of an absolute URI (alpha).
00502        * All methods except CONNECT are followed by '/' or '*'.
00503        */
00504 
00505       if (ch == '/' || ch == '*') {
00506         return s_req_path;
00507       }
00508 
00509       if (IS_ALPHA(ch)) {
00510         return s_req_schema;
00511       }
00512 
00513       break;
00514 
00515     case s_req_schema:
00516       if (IS_ALPHA(ch)) {
00517         return s;
00518       }
00519 
00520       if (ch == ':') {
00521         return s_req_schema_slash;
00522       }
00523 
00524       break;
00525 
00526     case s_req_schema_slash:
00527       if (ch == '/') {
00528         return s_req_schema_slash_slash;
00529       }
00530 
00531       break;
00532 
00533     case s_req_schema_slash_slash:
00534       if (ch == '/') {
00535         return s_req_server_start;
00536       }
00537 
00538       break;
00539 
00540     case s_req_server_with_at:
00541       if (ch == '@') {
00542         return s_dead;
00543       }
00544 
00545     /* FALLTHROUGH */
00546     case s_req_server_start:
00547     case s_req_server:
00548       if (ch == '/') {
00549         return s_req_path;
00550       }
00551 
00552       if (ch == '?') {
00553         return s_req_query_string_start;
00554       }
00555 
00556       if (ch == '@') {
00557         return s_req_server_with_at;
00558       }
00559 
00560       if (IS_USERINFO_CHAR(ch) || ch == '[' || ch == ']') {
00561         return s_req_server;
00562       }
00563 
00564       break;
00565 
00566     case s_req_path:
00567       if (IS_URL_CHAR(ch)) {
00568         return s;
00569       }
00570 
00571       switch (ch) {
00572         case '?':
00573           return s_req_query_string_start;
00574 
00575         case '#':
00576           return s_req_fragment_start;
00577       }
00578 
00579       break;
00580 
00581     case s_req_query_string_start:
00582     case s_req_query_string:
00583       if (IS_URL_CHAR(ch)) {
00584         return s_req_query_string;
00585       }
00586 
00587       switch (ch) {
00588         case '?':
00589           /* allow extra '?' in query string */
00590           return s_req_query_string;
00591 
00592         case '#':
00593           return s_req_fragment_start;
00594       }
00595 
00596       break;
00597 
00598     case s_req_fragment_start:
00599       if (IS_URL_CHAR(ch)) {
00600         return s_req_fragment;
00601       }
00602 
00603       switch (ch) {
00604         case '?':
00605           return s_req_fragment;
00606 
00607         case '#':
00608           return s;
00609       }
00610 
00611       break;
00612 
00613     case s_req_fragment:
00614       if (IS_URL_CHAR(ch)) {
00615         return s;
00616       }
00617 
00618       switch (ch) {
00619         case '?':
00620         case '#':
00621           return s;
00622       }
00623 
00624       break;
00625 
00626     default:
00627       break;
00628   }
00629 
00630   /* We should never fall out of the switch above unless there's an error */
00631   return s_dead;
00632 }
00633 
00634 size_t http_parser_execute (http_parser *parser,
00635                             const http_parser_settings *settings,
00636                             const char *data,
00637                             size_t len)
00638 {
00639   char c, ch;
00640   int8_t unhex_val;
00641   const char *p = data;
00642   const char *header_field_mark = 0;
00643   const char *header_value_mark = 0;
00644   const char *url_mark = 0;
00645   const char *body_mark = 0;
00646   const char *status_mark = 0;
00647   enum state p_state = (enum state) parser->state;
00648   const unsigned int lenient = parser->lenient_http_headers;
00649 
00650   /* We're in an error state. Don't bother doing anything. */
00651   if (HTTP_PARSER_ERRNO(parser) != HPE_OK) {
00652     return 0;
00653   }
00654 
00655   if (len == 0) {
00656     switch (CURRENT_STATE()) {
00657       case s_body_identity_eof:
00658         /* Use of CALLBACK_NOTIFY() here would erroneously return 1 byte read if
00659          * we got paused.
00660          */
00661         CALLBACK_NOTIFY_NOADVANCE(message_complete);
00662         return 0;
00663 
00664       case s_dead:
00665       case s_start_req_or_res:
00666       case s_start_res:
00667       case s_start_req:
00668         return 0;
00669 
00670       default:
00671         SET_ERRNO(HPE_INVALID_EOF_STATE);
00672         return 1;
00673     }
00674   }
00675 
00676 
00677   if (CURRENT_STATE() == s_header_field)
00678     header_field_mark = data;
00679   if (CURRENT_STATE() == s_header_value)
00680     header_value_mark = data;
00681   switch (CURRENT_STATE()) {
00682   case s_req_path:
00683   case s_req_schema:
00684   case s_req_schema_slash:
00685   case s_req_schema_slash_slash:
00686   case s_req_server_start:
00687   case s_req_server:
00688   case s_req_server_with_at:
00689   case s_req_query_string_start:
00690   case s_req_query_string:
00691   case s_req_fragment_start:
00692   case s_req_fragment:
00693     url_mark = data;
00694     break;
00695   case s_res_status:
00696     status_mark = data;
00697     break;
00698   default:
00699     break;
00700   }
00701 
00702   for (p=data; p != data + len; p++) {
00703     ch = *p;
00704 
00705     if (PARSING_HEADER(CURRENT_STATE()))
00706       COUNT_HEADER_SIZE(1);
00707 
00708 reexecute:
00709     switch (CURRENT_STATE()) {
00710 
00711       case s_dead:
00712         /* this state is used after a 'Connection: close' message
00713          * the parser will error out if it reads another message
00714          */
00715         if (LIKELY(ch == CR || ch == LF))
00716           break;
00717 
00718         SET_ERRNO(HPE_CLOSED_CONNECTION);
00719         goto error;
00720 
00721       case s_start_req_or_res:
00722       {
00723         if (ch == CR || ch == LF)
00724           break;
00725         parser->flags = 0;
00726         parser->content_length = ULLONG_MAX;
00727 
00728         if (ch == 'H') {
00729           UPDATE_STATE(s_res_or_resp_H);
00730 
00731           CALLBACK_NOTIFY(message_begin);
00732         } else {
00733           parser->type = HTTP_REQUEST;
00734           UPDATE_STATE(s_start_req);
00735           REEXECUTE();
00736         }
00737 
00738         break;
00739       }
00740 
00741       case s_res_or_resp_H:
00742         if (ch == 'T') {
00743           parser->type = HTTP_RESPONSE;
00744           UPDATE_STATE(s_res_HT);
00745         } else {
00746           if (UNLIKELY(ch != 'E')) {
00747             SET_ERRNO(HPE_INVALID_CONSTANT);
00748             goto error;
00749           }
00750 
00751           parser->type = HTTP_REQUEST;
00752           parser->method = HTTP_HEAD;
00753           parser->index = 2;
00754           UPDATE_STATE(s_req_method);
00755         }
00756         break;
00757 
00758       case s_start_res:
00759       {
00760         parser->flags = 0;
00761         parser->content_length = ULLONG_MAX;
00762 
00763         switch (ch) {
00764           case 'H':
00765             UPDATE_STATE(s_res_H);
00766             break;
00767 
00768           case CR:
00769           case LF:
00770             break;
00771 
00772           default:
00773             SET_ERRNO(HPE_INVALID_CONSTANT);
00774             goto error;
00775         }
00776 
00777         CALLBACK_NOTIFY(message_begin);
00778         break;
00779       }
00780 
00781       case s_res_H:
00782         STRICT_CHECK(ch != 'T');
00783         UPDATE_STATE(s_res_HT);
00784         break;
00785 
00786       case s_res_HT:
00787         STRICT_CHECK(ch != 'T');
00788         UPDATE_STATE(s_res_HTT);
00789         break;
00790 
00791       case s_res_HTT:
00792         STRICT_CHECK(ch != 'P');
00793         UPDATE_STATE(s_res_HTTP);
00794         break;
00795 
00796       case s_res_HTTP:
00797         STRICT_CHECK(ch != '/');
00798         UPDATE_STATE(s_res_first_http_major);
00799         break;
00800 
00801       case s_res_first_http_major:
00802         if (UNLIKELY(ch < '0' || ch > '9')) {
00803           SET_ERRNO(HPE_INVALID_VERSION);
00804           goto error;
00805         }
00806 
00807         parser->http_major = ch - '0';
00808         UPDATE_STATE(s_res_http_major);
00809         break;
00810 
00811       /* major HTTP version or dot */
00812       case s_res_http_major:
00813       {
00814         if (ch == '.') {
00815           UPDATE_STATE(s_res_first_http_minor);
00816           break;
00817         }
00818 
00819         if (!IS_NUM(ch)) {
00820           SET_ERRNO(HPE_INVALID_VERSION);
00821           goto error;
00822         }
00823 
00824         parser->http_major *= 10;
00825         parser->http_major += ch - '0';
00826 
00827         if (UNLIKELY(parser->http_major > 999)) {
00828           SET_ERRNO(HPE_INVALID_VERSION);
00829           goto error;
00830         }
00831 
00832         break;
00833       }
00834 
00835       /* first digit of minor HTTP version */
00836       case s_res_first_http_minor:
00837         if (UNLIKELY(!IS_NUM(ch))) {
00838           SET_ERRNO(HPE_INVALID_VERSION);
00839           goto error;
00840         }
00841 
00842         parser->http_minor = ch - '0';
00843         UPDATE_STATE(s_res_http_minor);
00844         break;
00845 
00846       /* minor HTTP version or end of request line */
00847       case s_res_http_minor:
00848       {
00849         if (ch == ' ') {
00850           UPDATE_STATE(s_res_first_status_code);
00851           break;
00852         }
00853 
00854         if (UNLIKELY(!IS_NUM(ch))) {
00855           SET_ERRNO(HPE_INVALID_VERSION);
00856           goto error;
00857         }
00858 
00859         parser->http_minor *= 10;
00860         parser->http_minor += ch - '0';
00861 
00862         if (UNLIKELY(parser->http_minor > 999)) {
00863           SET_ERRNO(HPE_INVALID_VERSION);
00864           goto error;
00865         }
00866 
00867         break;
00868       }
00869 
00870       case s_res_first_status_code:
00871       {
00872         if (!IS_NUM(ch)) {
00873           if (ch == ' ') {
00874             break;
00875           }
00876 
00877           SET_ERRNO(HPE_INVALID_STATUS);
00878           goto error;
00879         }
00880         parser->status_code = ch - '0';
00881         UPDATE_STATE(s_res_status_code);
00882         break;
00883       }
00884 
00885       case s_res_status_code:
00886       {
00887         if (!IS_NUM(ch)) {
00888           switch (ch) {
00889             case ' ':
00890               UPDATE_STATE(s_res_status_start);
00891               break;
00892             case CR:
00893               UPDATE_STATE(s_res_line_almost_done);
00894               break;
00895             case LF:
00896               UPDATE_STATE(s_header_field_start);
00897               break;
00898             default:
00899               SET_ERRNO(HPE_INVALID_STATUS);
00900               goto error;
00901           }
00902           break;
00903         }
00904 
00905         parser->status_code *= 10;
00906         parser->status_code += ch - '0';
00907 
00908         if (UNLIKELY(parser->status_code > 999)) {
00909           SET_ERRNO(HPE_INVALID_STATUS);
00910           goto error;
00911         }
00912 
00913         break;
00914       }
00915 
00916       case s_res_status_start:
00917       {
00918         if (ch == CR) {
00919           UPDATE_STATE(s_res_line_almost_done);
00920           break;
00921         }
00922 
00923         if (ch == LF) {
00924           UPDATE_STATE(s_header_field_start);
00925           break;
00926         }
00927 
00928         MARK(status);
00929         UPDATE_STATE(s_res_status);
00930         parser->index = 0;
00931         break;
00932       }
00933 
00934       case s_res_status:
00935         if (ch == CR) {
00936           UPDATE_STATE(s_res_line_almost_done);
00937           CALLBACK_DATA(status);
00938           break;
00939         }
00940 
00941         if (ch == LF) {
00942           UPDATE_STATE(s_header_field_start);
00943           CALLBACK_DATA(status);
00944           break;
00945         }
00946 
00947         break;
00948 
00949       case s_res_line_almost_done:
00950         STRICT_CHECK(ch != LF);
00951         UPDATE_STATE(s_header_field_start);
00952         break;
00953 
00954       case s_start_req:
00955       {
00956         if (ch == CR || ch == LF)
00957           break;
00958         parser->flags = 0;
00959         parser->content_length = ULLONG_MAX;
00960 
00961         if (UNLIKELY(!IS_ALPHA(ch))) {
00962           SET_ERRNO(HPE_INVALID_METHOD);
00963           goto error;
00964         }
00965 
00966         parser->method = (enum http_method) 0;
00967         parser->index = 1;
00968         switch (ch) {
00969           case 'A': parser->method = HTTP_ACL; break;
00970           case 'B': parser->method = HTTP_BIND; break;
00971           case 'C': parser->method = HTTP_CONNECT; /* or COPY, CHECKOUT */ break;
00972           case 'D': parser->method = HTTP_DELETE; break;
00973           case 'G': parser->method = HTTP_GET; break;
00974           case 'H': parser->method = HTTP_HEAD; break;
00975           case 'L': parser->method = HTTP_LOCK; /* or LINK */ break;
00976           case 'M': parser->method = HTTP_MKCOL; /* or MOVE, MKACTIVITY, MERGE, M-SEARCH, MKCALENDAR */ break;
00977           case 'N': parser->method = HTTP_NOTIFY; break;
00978           case 'O': parser->method = HTTP_OPTIONS; break;
00979           case 'P': parser->method = HTTP_POST;
00980             /* or PROPFIND|PROPPATCH|PUT|PATCH|PURGE */
00981             break;
00982           case 'R': parser->method = HTTP_REPORT; /* or REBIND */ break;
00983           case 'S': parser->method = HTTP_SUBSCRIBE; /* or SEARCH */ break;
00984           case 'T': parser->method = HTTP_TRACE; break;
00985           case 'U': parser->method = HTTP_UNLOCK; /* or UNSUBSCRIBE, UNBIND, UNLINK */ break;
00986           default:
00987             SET_ERRNO(HPE_INVALID_METHOD);
00988             goto error;
00989         }
00990         UPDATE_STATE(s_req_method);
00991 
00992         CALLBACK_NOTIFY(message_begin);
00993 
00994         break;
00995       }
00996 
00997       case s_req_method:
00998       {
00999         const char *matcher;
01000         if (UNLIKELY(ch == '\0')) {
01001           SET_ERRNO(HPE_INVALID_METHOD);
01002           goto error;
01003         }
01004 
01005         matcher = method_strings[parser->method];
01006         if (ch == ' ' && matcher[parser->index] == '\0') {
01007           UPDATE_STATE(s_req_spaces_before_url);
01008         } else if (ch == matcher[parser->index]) {
01009           ; /* nada */
01010         } else if (IS_ALPHA(ch)) {
01011 
01012           switch (parser->method << 16 | parser->index << 8 | ch) {
01013 #define XX(meth, pos, ch, new_meth) \
01014             case (HTTP_##meth << 16 | pos << 8 | ch): \
01015               parser->method = HTTP_##new_meth; break;
01016 
01017             XX(POST,      1, 'U', PUT)
01018             XX(POST,      1, 'A', PATCH)
01019             XX(CONNECT,   1, 'H', CHECKOUT)
01020             XX(CONNECT,   2, 'P', COPY)
01021             XX(MKCOL,     1, 'O', MOVE)
01022             XX(MKCOL,     1, 'E', MERGE)
01023             XX(MKCOL,     2, 'A', MKACTIVITY)
01024             XX(MKCOL,     3, 'A', MKCALENDAR)
01025             XX(SUBSCRIBE, 1, 'E', SEARCH)
01026             XX(REPORT,    2, 'B', REBIND)
01027             XX(POST,      1, 'R', PROPFIND)
01028             XX(PROPFIND,  4, 'P', PROPPATCH)
01029             XX(PUT,       2, 'R', PURGE)
01030             XX(LOCK,      1, 'I', LINK)
01031             XX(UNLOCK,    2, 'S', UNSUBSCRIBE)
01032             XX(UNLOCK,    2, 'B', UNBIND)
01033             XX(UNLOCK,    3, 'I', UNLINK)
01034 #undef XX
01035 
01036             default:
01037               SET_ERRNO(HPE_INVALID_METHOD);
01038               goto error;
01039           }
01040         } else if (ch == '-' &&
01041                    parser->index == 1 &&
01042                    parser->method == HTTP_MKCOL) {
01043           parser->method = HTTP_MSEARCH;
01044         } else {
01045           SET_ERRNO(HPE_INVALID_METHOD);
01046           goto error;
01047         }
01048 
01049         ++parser->index;
01050         break;
01051       }
01052 
01053       case s_req_spaces_before_url:
01054       {
01055         if (ch == ' ') break;
01056 
01057         MARK(url);
01058         if (parser->method == HTTP_CONNECT) {
01059           UPDATE_STATE(s_req_server_start);
01060         }
01061 
01062         UPDATE_STATE(parse_url_char(CURRENT_STATE(), ch));
01063         if (UNLIKELY(CURRENT_STATE() == s_dead)) {
01064           SET_ERRNO(HPE_INVALID_URL);
01065           goto error;
01066         }
01067 
01068         break;
01069       }
01070 
01071       case s_req_schema:
01072       case s_req_schema_slash:
01073       case s_req_schema_slash_slash:
01074       case s_req_server_start:
01075       {
01076         switch (ch) {
01077           /* No whitespace allowed here */
01078           case ' ':
01079           case CR:
01080           case LF:
01081             SET_ERRNO(HPE_INVALID_URL);
01082             goto error;
01083           default:
01084             UPDATE_STATE(parse_url_char(CURRENT_STATE(), ch));
01085             if (UNLIKELY(CURRENT_STATE() == s_dead)) {
01086               SET_ERRNO(HPE_INVALID_URL);
01087               goto error;
01088             }
01089         }
01090 
01091         break;
01092       }
01093 
01094       case s_req_server:
01095       case s_req_server_with_at:
01096       case s_req_path:
01097       case s_req_query_string_start:
01098       case s_req_query_string:
01099       case s_req_fragment_start:
01100       case s_req_fragment:
01101       {
01102         switch (ch) {
01103           case ' ':
01104             UPDATE_STATE(s_req_http_start);
01105             CALLBACK_DATA(url);
01106             break;
01107           case CR:
01108           case LF:
01109             parser->http_major = 0;
01110             parser->http_minor = 9;
01111             UPDATE_STATE((ch == CR) ?
01112               s_req_line_almost_done :
01113               s_header_field_start);
01114             CALLBACK_DATA(url);
01115             break;
01116           default:
01117             UPDATE_STATE(parse_url_char(CURRENT_STATE(), ch));
01118             if (UNLIKELY(CURRENT_STATE() == s_dead)) {
01119               SET_ERRNO(HPE_INVALID_URL);
01120               goto error;
01121             }
01122         }
01123         break;
01124       }
01125 
01126       case s_req_http_start:
01127         switch (ch) {
01128           case 'H':
01129             UPDATE_STATE(s_req_http_H);
01130             break;
01131           case ' ':
01132             break;
01133           default:
01134             SET_ERRNO(HPE_INVALID_CONSTANT);
01135             goto error;
01136         }
01137         break;
01138 
01139       case s_req_http_H:
01140         STRICT_CHECK(ch != 'T');
01141         UPDATE_STATE(s_req_http_HT);
01142         break;
01143 
01144       case s_req_http_HT:
01145         STRICT_CHECK(ch != 'T');
01146         UPDATE_STATE(s_req_http_HTT);
01147         break;
01148 
01149       case s_req_http_HTT:
01150         STRICT_CHECK(ch != 'P');
01151         UPDATE_STATE(s_req_http_HTTP);
01152         break;
01153 
01154       case s_req_http_HTTP:
01155         STRICT_CHECK(ch != '/');
01156         UPDATE_STATE(s_req_first_http_major);
01157         break;
01158 
01159       /* first digit of major HTTP version */
01160       case s_req_first_http_major:
01161         if (UNLIKELY(ch < '1' || ch > '9')) {
01162           SET_ERRNO(HPE_INVALID_VERSION);
01163           goto error;
01164         }
01165 
01166         parser->http_major = ch - '0';
01167         UPDATE_STATE(s_req_http_major);
01168         break;
01169 
01170       /* major HTTP version or dot */
01171       case s_req_http_major:
01172       {
01173         if (ch == '.') {
01174           UPDATE_STATE(s_req_first_http_minor);
01175           break;
01176         }
01177 
01178         if (UNLIKELY(!IS_NUM(ch))) {
01179           SET_ERRNO(HPE_INVALID_VERSION);
01180           goto error;
01181         }
01182 
01183         parser->http_major *= 10;
01184         parser->http_major += ch - '0';
01185 
01186         if (UNLIKELY(parser->http_major > 999)) {
01187           SET_ERRNO(HPE_INVALID_VERSION);
01188           goto error;
01189         }
01190 
01191         break;
01192       }
01193 
01194       /* first digit of minor HTTP version */
01195       case s_req_first_http_minor:
01196         if (UNLIKELY(!IS_NUM(ch))) {
01197           SET_ERRNO(HPE_INVALID_VERSION);
01198           goto error;
01199         }
01200 
01201         parser->http_minor = ch - '0';
01202         UPDATE_STATE(s_req_http_minor);
01203         break;
01204 
01205       /* minor HTTP version or end of request line */
01206       case s_req_http_minor:
01207       {
01208         if (ch == CR) {
01209           UPDATE_STATE(s_req_line_almost_done);
01210           break;
01211         }
01212 
01213         if (ch == LF) {
01214           UPDATE_STATE(s_header_field_start);
01215           break;
01216         }
01217 
01218         /* XXX allow spaces after digit? */
01219 
01220         if (UNLIKELY(!IS_NUM(ch))) {
01221           SET_ERRNO(HPE_INVALID_VERSION);
01222           goto error;
01223         }
01224 
01225         parser->http_minor *= 10;
01226         parser->http_minor += ch - '0';
01227 
01228         if (UNLIKELY(parser->http_minor > 999)) {
01229           SET_ERRNO(HPE_INVALID_VERSION);
01230           goto error;
01231         }
01232 
01233         break;
01234       }
01235 
01236       /* end of request line */
01237       case s_req_line_almost_done:
01238       {
01239         if (UNLIKELY(ch != LF)) {
01240           SET_ERRNO(HPE_LF_EXPECTED);
01241           goto error;
01242         }
01243 
01244         UPDATE_STATE(s_header_field_start);
01245         break;
01246       }
01247 
01248       case s_header_field_start:
01249       {
01250         if (ch == CR) {
01251           UPDATE_STATE(s_headers_almost_done);
01252           break;
01253         }
01254 
01255         if (ch == LF) {
01256           /* they might be just sending \n instead of \r\n so this would be
01257            * the second \n to denote the end of headers*/
01258           UPDATE_STATE(s_headers_almost_done);
01259           REEXECUTE();
01260         }
01261 
01262         c = TOKEN(ch);
01263 
01264         if (UNLIKELY(!c)) {
01265           SET_ERRNO(HPE_INVALID_HEADER_TOKEN);
01266           goto error;
01267         }
01268 
01269         MARK(header_field);
01270 
01271         parser->index = 0;
01272         UPDATE_STATE(s_header_field);
01273 
01274         switch (c) {
01275           case 'c':
01276             parser->header_state = h_C;
01277             break;
01278 
01279           case 'p':
01280             parser->header_state = h_matching_proxy_connection;
01281             break;
01282 
01283           case 't':
01284             parser->header_state = h_matching_transfer_encoding;
01285             break;
01286 
01287           case 'u':
01288             parser->header_state = h_matching_upgrade;
01289             break;
01290 
01291           default:
01292             parser->header_state = h_general;
01293             break;
01294         }
01295         break;
01296       }
01297 
01298       case s_header_field:
01299       {
01300         const char* start = p;
01301         for (; p != data + len; p++) {
01302           ch = *p;
01303           c = TOKEN(ch);
01304 
01305           if (!c)
01306             break;
01307 
01308           switch (parser->header_state) {
01309             case h_general:
01310               break;
01311 
01312             case h_C:
01313               parser->index++;
01314               parser->header_state = (c == 'o' ? h_CO : h_general);
01315               break;
01316 
01317             case h_CO:
01318               parser->index++;
01319               parser->header_state = (c == 'n' ? h_CON : h_general);
01320               break;
01321 
01322             case h_CON:
01323               parser->index++;
01324               switch (c) {
01325                 case 'n':
01326                   parser->header_state = h_matching_connection;
01327                   break;
01328                 case 't':
01329                   parser->header_state = h_matching_content_length;
01330                   break;
01331                 default:
01332                   parser->header_state = h_general;
01333                   break;
01334               }
01335               break;
01336 
01337             /* connection */
01338 
01339             case h_matching_connection:
01340               parser->index++;
01341               if (parser->index > sizeof(CONNECTION)-1
01342                   || c != CONNECTION[parser->index]) {
01343                 parser->header_state = h_general;
01344               } else if (parser->index == sizeof(CONNECTION)-2) {
01345                 parser->header_state = h_connection;
01346               }
01347               break;
01348 
01349             /* proxy-connection */
01350 
01351             case h_matching_proxy_connection:
01352               parser->index++;
01353               if (parser->index > sizeof(PROXY_CONNECTION)-1
01354                   || c != PROXY_CONNECTION[parser->index]) {
01355                 parser->header_state = h_general;
01356               } else if (parser->index == sizeof(PROXY_CONNECTION)-2) {
01357                 parser->header_state = h_connection;
01358               }
01359               break;
01360 
01361             /* content-length */
01362 
01363             case h_matching_content_length:
01364               parser->index++;
01365               if (parser->index > sizeof(CONTENT_LENGTH)-1
01366                   || c != CONTENT_LENGTH[parser->index]) {
01367                 parser->header_state = h_general;
01368               } else if (parser->index == sizeof(CONTENT_LENGTH)-2) {
01369                 parser->header_state = h_content_length;
01370               }
01371               break;
01372 
01373             /* transfer-encoding */
01374 
01375             case h_matching_transfer_encoding:
01376               parser->index++;
01377               if (parser->index > sizeof(TRANSFER_ENCODING)-1
01378                   || c != TRANSFER_ENCODING[parser->index]) {
01379                 parser->header_state = h_general;
01380               } else if (parser->index == sizeof(TRANSFER_ENCODING)-2) {
01381                 parser->header_state = h_transfer_encoding;
01382               }
01383               break;
01384 
01385             /* upgrade */
01386 
01387             case h_matching_upgrade:
01388               parser->index++;
01389               if (parser->index > sizeof(UPGRADE)-1
01390                   || c != UPGRADE[parser->index]) {
01391                 parser->header_state = h_general;
01392               } else if (parser->index == sizeof(UPGRADE)-2) {
01393                 parser->header_state = h_upgrade;
01394               }
01395               break;
01396 
01397             case h_connection:
01398             case h_content_length:
01399             case h_transfer_encoding:
01400             case h_upgrade:
01401               if (ch != ' ') parser->header_state = h_general;
01402               break;
01403 
01404             default:
01405               assert(0 && "Unknown header_state");
01406               break;
01407           }
01408         }
01409 
01410         COUNT_HEADER_SIZE(p - start);
01411 
01412         if (p == data + len) {
01413           --p;
01414           break;
01415         }
01416 
01417         if (ch == ':') {
01418           UPDATE_STATE(s_header_value_discard_ws);
01419           CALLBACK_DATA(header_field);
01420           break;
01421         }
01422 
01423         SET_ERRNO(HPE_INVALID_HEADER_TOKEN);
01424         goto error;
01425       }
01426 
01427       case s_header_value_discard_ws:
01428         if (ch == ' ' || ch == '\t') break;
01429 
01430         if (ch == CR) {
01431           UPDATE_STATE(s_header_value_discard_ws_almost_done);
01432           break;
01433         }
01434 
01435         if (ch == LF) {
01436           UPDATE_STATE(s_header_value_discard_lws);
01437           break;
01438         }
01439 
01440         /* FALLTHROUGH */
01441 
01442       case s_header_value_start:
01443       {
01444         MARK(header_value);
01445 
01446         UPDATE_STATE(s_header_value);
01447         parser->index = 0;
01448 
01449         c = LOWER(ch);
01450 
01451         switch (parser->header_state) {
01452           case h_upgrade:
01453             parser->flags |= F_UPGRADE;
01454             parser->header_state = h_general;
01455             break;
01456 
01457           case h_transfer_encoding:
01458             /* looking for 'Transfer-Encoding: chunked' */
01459             if ('c' == c) {
01460               parser->header_state = h_matching_transfer_encoding_chunked;
01461             } else {
01462               parser->header_state = h_general;
01463             }
01464             break;
01465 
01466           case h_content_length:
01467             if (UNLIKELY(!IS_NUM(ch))) {
01468               SET_ERRNO(HPE_INVALID_CONTENT_LENGTH);
01469               goto error;
01470             }
01471 
01472             if (parser->flags & F_CONTENTLENGTH) {
01473               SET_ERRNO(HPE_UNEXPECTED_CONTENT_LENGTH);
01474               goto error;
01475             }
01476 
01477             parser->flags |= F_CONTENTLENGTH;
01478             parser->content_length = ch - '0';
01479             break;
01480 
01481           case h_connection:
01482             /* looking for 'Connection: keep-alive' */
01483             if (c == 'k') {
01484               parser->header_state = h_matching_connection_keep_alive;
01485             /* looking for 'Connection: close' */
01486             } else if (c == 'c') {
01487               parser->header_state = h_matching_connection_close;
01488             } else if (c == 'u') {
01489               parser->header_state = h_matching_connection_upgrade;
01490             } else {
01491               parser->header_state = h_matching_connection_token;
01492             }
01493             break;
01494 
01495           /* Multi-value `Connection` header */
01496           case h_matching_connection_token_start:
01497             break;
01498 
01499           default:
01500             parser->header_state = h_general;
01501             break;
01502         }
01503         break;
01504       }
01505 
01506       case s_header_value:
01507       {
01508         const char* start = p;
01509         enum header_states h_state = (enum header_states) parser->header_state;
01510         for (; p != data + len; p++) {
01511           ch = *p;
01512           if (ch == CR) {
01513             UPDATE_STATE(s_header_almost_done);
01514             parser->header_state = h_state;
01515             CALLBACK_DATA(header_value);
01516             break;
01517           }
01518 
01519           if (ch == LF) {
01520             UPDATE_STATE(s_header_almost_done);
01521             COUNT_HEADER_SIZE(p - start);
01522             parser->header_state = h_state;
01523             CALLBACK_DATA_NOADVANCE(header_value);
01524             REEXECUTE();
01525           }
01526 
01527           if (!lenient && !IS_HEADER_CHAR(ch)) {
01528             SET_ERRNO(HPE_INVALID_HEADER_TOKEN);
01529             goto error;
01530           }
01531 
01532           c = LOWER(ch);
01533 
01534           switch (h_state) {
01535             case h_general:
01536             {
01537               const char* p_cr;
01538               const char* p_lf;
01539               size_t limit = data + len - p;
01540 
01541               limit = MIN(limit, HTTP_MAX_HEADER_SIZE);
01542 
01543               p_cr = (const char*) memchr(p, CR, limit);
01544               p_lf = (const char*) memchr(p, LF, limit);
01545               if (p_cr != NULL) {
01546                 if (p_lf != NULL && p_cr >= p_lf)
01547                   p = p_lf;
01548                 else
01549                   p = p_cr;
01550               } else if (UNLIKELY(p_lf != NULL)) {
01551                 p = p_lf;
01552               } else {
01553                 p = data + len;
01554               }
01555               --p;
01556 
01557               break;
01558             }
01559 
01560             case h_connection:
01561             case h_transfer_encoding:
01562               assert(0 && "Shouldn't get here.");
01563               break;
01564 
01565             case h_content_length:
01566             {
01567               uint64_t t;
01568 
01569               if (ch == ' ') break;
01570 
01571               if (UNLIKELY(!IS_NUM(ch))) {
01572                 SET_ERRNO(HPE_INVALID_CONTENT_LENGTH);
01573                 parser->header_state = h_state;
01574                 goto error;
01575               }
01576 
01577               t = parser->content_length;
01578               t *= 10;
01579               t += ch - '0';
01580 
01581               /* Overflow? Test against a conservative limit for simplicity. */
01582               if (UNLIKELY((ULLONG_MAX - 10) / 10 < parser->content_length)) {
01583                 SET_ERRNO(HPE_INVALID_CONTENT_LENGTH);
01584                 parser->header_state = h_state;
01585                 goto error;
01586               }
01587 
01588               parser->content_length = t;
01589               break;
01590             }
01591 
01592             /* Transfer-Encoding: chunked */
01593             case h_matching_transfer_encoding_chunked:
01594               parser->index++;
01595               if (parser->index > sizeof(CHUNKED)-1
01596                   || c != CHUNKED[parser->index]) {
01597                 h_state = h_general;
01598               } else if (parser->index == sizeof(CHUNKED)-2) {
01599                 h_state = h_transfer_encoding_chunked;
01600               }
01601               break;
01602 
01603             case h_matching_connection_token_start:
01604               /* looking for 'Connection: keep-alive' */
01605               if (c == 'k') {
01606                 h_state = h_matching_connection_keep_alive;
01607               /* looking for 'Connection: close' */
01608               } else if (c == 'c') {
01609                 h_state = h_matching_connection_close;
01610               } else if (c == 'u') {
01611                 h_state = h_matching_connection_upgrade;
01612               } else if (STRICT_TOKEN(c)) {
01613                 h_state = h_matching_connection_token;
01614               } else if (c == ' ' || c == '\t') {
01615                 /* Skip lws */
01616               } else {
01617                 h_state = h_general;
01618               }
01619               break;
01620 
01621             /* looking for 'Connection: keep-alive' */
01622             case h_matching_connection_keep_alive:
01623               parser->index++;
01624               if (parser->index > sizeof(KEEP_ALIVE)-1
01625                   || c != KEEP_ALIVE[parser->index]) {
01626                 h_state = h_matching_connection_token;
01627               } else if (parser->index == sizeof(KEEP_ALIVE)-2) {
01628                 h_state = h_connection_keep_alive;
01629               }
01630               break;
01631 
01632             /* looking for 'Connection: close' */
01633             case h_matching_connection_close:
01634               parser->index++;
01635               if (parser->index > sizeof(CLOSE)-1 || c != CLOSE[parser->index]) {
01636                 h_state = h_matching_connection_token;
01637               } else if (parser->index == sizeof(CLOSE)-2) {
01638                 h_state = h_connection_close;
01639               }
01640               break;
01641 
01642             /* looking for 'Connection: upgrade' */
01643             case h_matching_connection_upgrade:
01644               parser->index++;
01645               if (parser->index > sizeof(UPGRADE) - 1 ||
01646                   c != UPGRADE[parser->index]) {
01647                 h_state = h_matching_connection_token;
01648               } else if (parser->index == sizeof(UPGRADE)-2) {
01649                 h_state = h_connection_upgrade;
01650               }
01651               break;
01652 
01653             case h_matching_connection_token:
01654               if (ch == ',') {
01655                 h_state = h_matching_connection_token_start;
01656                 parser->index = 0;
01657               }
01658               break;
01659 
01660             case h_transfer_encoding_chunked:
01661               if (ch != ' ') h_state = h_general;
01662               break;
01663 
01664             case h_connection_keep_alive:
01665             case h_connection_close:
01666             case h_connection_upgrade:
01667               if (ch == ',') {
01668                 if (h_state == h_connection_keep_alive) {
01669                   parser->flags |= F_CONNECTION_KEEP_ALIVE;
01670                 } else if (h_state == h_connection_close) {
01671                   parser->flags |= F_CONNECTION_CLOSE;
01672                 } else if (h_state == h_connection_upgrade) {
01673                   parser->flags |= F_CONNECTION_UPGRADE;
01674                 }
01675                 h_state = h_matching_connection_token_start;
01676                 parser->index = 0;
01677               } else if (ch != ' ') {
01678                 h_state = h_matching_connection_token;
01679               }
01680               break;
01681 
01682             default:
01683               UPDATE_STATE(s_header_value);
01684               h_state = h_general;
01685               break;
01686           }
01687         }
01688         parser->header_state = h_state;
01689 
01690         COUNT_HEADER_SIZE(p - start);
01691 
01692         if (p == data + len)
01693           --p;
01694         break;
01695       }
01696 
01697       case s_header_almost_done:
01698       {
01699         if (UNLIKELY(ch != LF)) {
01700           SET_ERRNO(HPE_LF_EXPECTED);
01701           goto error;
01702         }
01703 
01704         UPDATE_STATE(s_header_value_lws);
01705         break;
01706       }
01707 
01708       case s_header_value_lws:
01709       {
01710         if (ch == ' ' || ch == '\t') {
01711           UPDATE_STATE(s_header_value_start);
01712           REEXECUTE();
01713         }
01714 
01715         /* finished the header */
01716         switch (parser->header_state) {
01717           case h_connection_keep_alive:
01718             parser->flags |= F_CONNECTION_KEEP_ALIVE;
01719             break;
01720           case h_connection_close:
01721             parser->flags |= F_CONNECTION_CLOSE;
01722             break;
01723           case h_transfer_encoding_chunked:
01724             parser->flags |= F_CHUNKED;
01725             break;
01726           case h_connection_upgrade:
01727             parser->flags |= F_CONNECTION_UPGRADE;
01728             break;
01729           default:
01730             break;
01731         }
01732 
01733         UPDATE_STATE(s_header_field_start);
01734         REEXECUTE();
01735       }
01736 
01737       case s_header_value_discard_ws_almost_done:
01738       {
01739         STRICT_CHECK(ch != LF);
01740         UPDATE_STATE(s_header_value_discard_lws);
01741         break;
01742       }
01743 
01744       case s_header_value_discard_lws:
01745       {
01746         if (ch == ' ' || ch == '\t') {
01747           UPDATE_STATE(s_header_value_discard_ws);
01748           break;
01749         } else {
01750           switch (parser->header_state) {
01751             case h_connection_keep_alive:
01752               parser->flags |= F_CONNECTION_KEEP_ALIVE;
01753               break;
01754             case h_connection_close:
01755               parser->flags |= F_CONNECTION_CLOSE;
01756               break;
01757             case h_connection_upgrade:
01758               parser->flags |= F_CONNECTION_UPGRADE;
01759               break;
01760             case h_transfer_encoding_chunked:
01761               parser->flags |= F_CHUNKED;
01762               break;
01763             default:
01764               break;
01765           }
01766 
01767           /* header value was empty */
01768           MARK(header_value);
01769           UPDATE_STATE(s_header_field_start);
01770           CALLBACK_DATA_NOADVANCE(header_value);
01771           REEXECUTE();
01772         }
01773       }
01774 
01775       case s_headers_almost_done:
01776       {
01777         STRICT_CHECK(ch != LF);
01778 
01779         if (parser->flags & F_TRAILING) {
01780           /* End of a chunked request */
01781           UPDATE_STATE(s_message_done);
01782           CALLBACK_NOTIFY_NOADVANCE(chunk_complete);
01783           REEXECUTE();
01784         }
01785 
01786         /* Cannot use chunked encoding and a content-length header together
01787            per the HTTP specification. */
01788         if ((parser->flags & F_CHUNKED) &&
01789             (parser->flags & F_CONTENTLENGTH)) {
01790           SET_ERRNO(HPE_UNEXPECTED_CONTENT_LENGTH);
01791           goto error;
01792         }
01793 
01794         UPDATE_STATE(s_headers_done);
01795 
01796         /* Set this here so that on_headers_complete() callbacks can see it */
01797         parser->upgrade =
01798           ((parser->flags & (F_UPGRADE | F_CONNECTION_UPGRADE)) ==
01799            (F_UPGRADE | F_CONNECTION_UPGRADE) ||
01800            parser->method == HTTP_CONNECT);
01801 
01802         /* Here we call the headers_complete callback. This is somewhat
01803          * different than other callbacks because if the user returns 1, we
01804          * will interpret that as saying that this message has no body. This
01805          * is needed for the annoying case of recieving a response to a HEAD
01806          * request.
01807          *
01808          * We'd like to use CALLBACK_NOTIFY_NOADVANCE() here but we cannot, so
01809          * we have to simulate it by handling a change in errno below.
01810          */
01811         if (settings->on_headers_complete) {
01812           switch (settings->on_headers_complete(parser)) {
01813             case 0:
01814               break;
01815 
01816             case 2:
01817               parser->upgrade = 1;
01818 
01819             case 1:
01820               parser->flags |= F_SKIPBODY;
01821               break;
01822 
01823             default:
01824               SET_ERRNO(HPE_CB_headers_complete);
01825               RETURN(p - data); /* Error */
01826           }
01827         }
01828 
01829         if (HTTP_PARSER_ERRNO(parser) != HPE_OK) {
01830           RETURN(p - data);
01831         }
01832 
01833         REEXECUTE();
01834       }
01835 
01836       case s_headers_done:
01837       {
01838         int hasBody;
01839         STRICT_CHECK(ch != LF);
01840 
01841         parser->nread = 0;
01842 
01843         hasBody = parser->flags & F_CHUNKED ||
01844           (parser->content_length > 0 && parser->content_length != ULLONG_MAX);
01845         if (parser->upgrade && (parser->method == HTTP_CONNECT ||
01846                                 (parser->flags & F_SKIPBODY) || !hasBody)) {
01847           /* Exit, the rest of the message is in a different protocol. */
01848           UPDATE_STATE(NEW_MESSAGE());
01849           CALLBACK_NOTIFY(message_complete);
01850           RETURN((p - data) + 1);
01851         }
01852 
01853         if (parser->flags & F_SKIPBODY) {
01854           UPDATE_STATE(NEW_MESSAGE());
01855           CALLBACK_NOTIFY(message_complete);
01856         } else if (parser->flags & F_CHUNKED) {
01857           /* chunked encoding - ignore Content-Length header */
01858           UPDATE_STATE(s_chunk_size_start);
01859         } else {
01860           if (parser->content_length == 0) {
01861             /* Content-Length header given but zero: Content-Length: 0\r\n */
01862             UPDATE_STATE(NEW_MESSAGE());
01863             CALLBACK_NOTIFY(message_complete);
01864           } else if (parser->content_length != ULLONG_MAX) {
01865             /* Content-Length header given and non-zero */
01866             UPDATE_STATE(s_body_identity);
01867           } else {
01868             if (!http_message_needs_eof(parser)) {
01869               /* Assume content-length 0 - read the next */
01870               UPDATE_STATE(NEW_MESSAGE());
01871               CALLBACK_NOTIFY(message_complete);
01872             } else {
01873               /* Read body until EOF */
01874               UPDATE_STATE(s_body_identity_eof);
01875             }
01876           }
01877         }
01878 
01879         break;
01880       }
01881 
01882       case s_body_identity:
01883       {
01884         uint64_t to_read = MIN(parser->content_length,
01885                                (uint64_t) ((data + len) - p));
01886 
01887         assert(parser->content_length != 0
01888             && parser->content_length != ULLONG_MAX);
01889 
01890         /* The difference between advancing content_length and p is because
01891          * the latter will automaticaly advance on the next loop iteration.
01892          * Further, if content_length ends up at 0, we want to see the last
01893          * byte again for our message complete callback.
01894          */
01895         MARK(body);
01896         parser->content_length -= to_read;
01897         p += to_read - 1;
01898 
01899         if (parser->content_length == 0) {
01900           UPDATE_STATE(s_message_done);
01901 
01902           /* Mimic CALLBACK_DATA_NOADVANCE() but with one extra byte.
01903            *
01904            * The alternative to doing this is to wait for the next byte to
01905            * trigger the data callback, just as in every other case. The
01906            * problem with this is that this makes it difficult for the test
01907            * harness to distinguish between complete-on-EOF and
01908            * complete-on-length. It's not clear that this distinction is
01909            * important for applications, but let's keep it for now.
01910            */
01911           CALLBACK_DATA_(body, p - body_mark + 1, p - data);
01912           REEXECUTE();
01913         }
01914 
01915         break;
01916       }
01917 
01918       /* read until EOF */
01919       case s_body_identity_eof:
01920         MARK(body);
01921         p = data + len - 1;
01922 
01923         break;
01924 
01925       case s_message_done:
01926         UPDATE_STATE(NEW_MESSAGE());
01927         CALLBACK_NOTIFY(message_complete);
01928         if (parser->upgrade) {
01929           /* Exit, the rest of the message is in a different protocol. */
01930           RETURN((p - data) + 1);
01931         }
01932         break;
01933 
01934       case s_chunk_size_start:
01935       {
01936         assert(parser->nread == 1);
01937         assert(parser->flags & F_CHUNKED);
01938 
01939         unhex_val = unhex[(unsigned char)ch];
01940         if (UNLIKELY(unhex_val == -1)) {
01941           SET_ERRNO(HPE_INVALID_CHUNK_SIZE);
01942           goto error;
01943         }
01944 
01945         parser->content_length = unhex_val;
01946         UPDATE_STATE(s_chunk_size);
01947         break;
01948       }
01949 
01950       case s_chunk_size:
01951       {
01952         uint64_t t;
01953 
01954         assert(parser->flags & F_CHUNKED);
01955 
01956         if (ch == CR) {
01957           UPDATE_STATE(s_chunk_size_almost_done);
01958           break;
01959         }
01960 
01961         unhex_val = unhex[(unsigned char)ch];
01962 
01963         if (unhex_val == -1) {
01964           if (ch == ';' || ch == ' ') {
01965             UPDATE_STATE(s_chunk_parameters);
01966             break;
01967           }
01968 
01969           SET_ERRNO(HPE_INVALID_CHUNK_SIZE);
01970           goto error;
01971         }
01972 
01973         t = parser->content_length;
01974         t *= 16;
01975         t += unhex_val;
01976 
01977         /* Overflow? Test against a conservative limit for simplicity. */
01978         if (UNLIKELY((ULLONG_MAX - 16) / 16 < parser->content_length)) {
01979           SET_ERRNO(HPE_INVALID_CONTENT_LENGTH);
01980           goto error;
01981         }
01982 
01983         parser->content_length = t;
01984         break;
01985       }
01986 
01987       case s_chunk_parameters:
01988       {
01989         assert(parser->flags & F_CHUNKED);
01990         /* just ignore this shit. TODO check for overflow */
01991         if (ch == CR) {
01992           UPDATE_STATE(s_chunk_size_almost_done);
01993           break;
01994         }
01995         break;
01996       }
01997 
01998       case s_chunk_size_almost_done:
01999       {
02000         assert(parser->flags & F_CHUNKED);
02001         STRICT_CHECK(ch != LF);
02002 
02003         parser->nread = 0;
02004 
02005         if (parser->content_length == 0) {
02006           parser->flags |= F_TRAILING;
02007           UPDATE_STATE(s_header_field_start);
02008         } else {
02009           UPDATE_STATE(s_chunk_data);
02010         }
02011         CALLBACK_NOTIFY(chunk_header);
02012         break;
02013       }
02014 
02015       case s_chunk_data:
02016       {
02017         uint64_t to_read = MIN(parser->content_length,
02018                                (uint64_t) ((data + len) - p));
02019 
02020         assert(parser->flags & F_CHUNKED);
02021         assert(parser->content_length != 0
02022             && parser->content_length != ULLONG_MAX);
02023 
02024         /* See the explanation in s_body_identity for why the content
02025          * length and data pointers are managed this way.
02026          */
02027         MARK(body);
02028         parser->content_length -= to_read;
02029         p += to_read - 1;
02030 
02031         if (parser->content_length == 0) {
02032           UPDATE_STATE(s_chunk_data_almost_done);
02033         }
02034 
02035         break;
02036       }
02037 
02038       case s_chunk_data_almost_done:
02039         assert(parser->flags & F_CHUNKED);
02040         assert(parser->content_length == 0);
02041         STRICT_CHECK(ch != CR);
02042         UPDATE_STATE(s_chunk_data_done);
02043         CALLBACK_DATA(body);
02044         break;
02045 
02046       case s_chunk_data_done:
02047         assert(parser->flags & F_CHUNKED);
02048         STRICT_CHECK(ch != LF);
02049         parser->nread = 0;
02050         UPDATE_STATE(s_chunk_size_start);
02051         CALLBACK_NOTIFY(chunk_complete);
02052         break;
02053 
02054       default:
02055         assert(0 && "unhandled state");
02056         SET_ERRNO(HPE_INVALID_INTERNAL_STATE);
02057         goto error;
02058     }
02059   }
02060 
02061   /* Run callbacks for any marks that we have leftover after we ran our of
02062    * bytes. There should be at most one of these set, so it's OK to invoke
02063    * them in series (unset marks will not result in callbacks).
02064    *
02065    * We use the NOADVANCE() variety of callbacks here because 'p' has already
02066    * overflowed 'data' and this allows us to correct for the off-by-one that
02067    * we'd otherwise have (since CALLBACK_DATA() is meant to be run with a 'p'
02068    * value that's in-bounds).
02069    */
02070 
02071   assert(((header_field_mark ? 1 : 0) +
02072           (header_value_mark ? 1 : 0) +
02073           (url_mark ? 1 : 0)  +
02074           (body_mark ? 1 : 0) +
02075           (status_mark ? 1 : 0)) <= 1);
02076 
02077   CALLBACK_DATA_NOADVANCE(header_field);
02078   CALLBACK_DATA_NOADVANCE(header_value);
02079   CALLBACK_DATA_NOADVANCE(url);
02080   CALLBACK_DATA_NOADVANCE(body);
02081   CALLBACK_DATA_NOADVANCE(status);
02082 
02083   RETURN(len);
02084 
02085 error:
02086   if (HTTP_PARSER_ERRNO(parser) == HPE_OK) {
02087     SET_ERRNO(HPE_UNKNOWN);
02088   }
02089 
02090   RETURN(p - data);
02091 }
02092 
02093 
02094 /* Does the parser need to see an EOF to find the end of the message? */
02095 int
02096 http_message_needs_eof (const http_parser *parser)
02097 {
02098   if (parser->type == HTTP_REQUEST) {
02099     return 0;
02100   }
02101 
02102   /* See RFC 2616 section 4.4 */
02103   if (parser->status_code / 100 == 1 || /* 1xx e.g. Continue */
02104       parser->status_code == 204 ||     /* No Content */
02105       parser->status_code == 304 ||     /* Not Modified */
02106       parser->flags & F_SKIPBODY) {     /* response to a HEAD request */
02107     return 0;
02108   }
02109 
02110   if ((parser->flags & F_CHUNKED) || parser->content_length != ULLONG_MAX) {
02111     return 0;
02112   }
02113 
02114   return 1;
02115 }
02116 
02117 
02118 int
02119 http_should_keep_alive (const http_parser *parser)
02120 {
02121   if (parser->http_major > 0 && parser->http_minor > 0) {
02122     /* HTTP/1.1 */
02123     if (parser->flags & F_CONNECTION_CLOSE) {
02124       return 0;
02125     }
02126   } else {
02127     /* HTTP/1.0 or earlier */
02128     if (!(parser->flags & F_CONNECTION_KEEP_ALIVE)) {
02129       return 0;
02130     }
02131   }
02132 
02133   return !http_message_needs_eof(parser);
02134 }
02135 
02136 
02137 const char *
02138 http_method_str (enum http_method m)
02139 {
02140   return ELEM_AT(method_strings, m, "<unknown>");
02141 }
02142 
02143 
02144 void
02145 http_parser_init (http_parser *parser, enum http_parser_type t)
02146 {
02147   void *data = parser->data; /* preserve application data */
02148   memset(parser, 0, sizeof(*parser));
02149   parser->data = data;
02150   parser->type = t;
02151   parser->state = (t == HTTP_REQUEST ? s_start_req : (t == HTTP_RESPONSE ? s_start_res : s_start_req_or_res));
02152   parser->http_errno = HPE_OK;
02153 }
02154 
02155 void
02156 http_parser_settings_init(http_parser_settings *settings)
02157 {
02158   memset(settings, 0, sizeof(*settings));
02159 }
02160 
02161 const char *
02162 http_errno_name(enum http_errno err) {
02163   assert(((size_t) err) < ARRAY_SIZE(http_strerror_tab));
02164   return http_strerror_tab[err].name;
02165 }
02166 
02167 const char *
02168 http_errno_description(enum http_errno err) {
02169   assert(((size_t) err) < ARRAY_SIZE(http_strerror_tab));
02170   return http_strerror_tab[err].description;
02171 }
02172 
02173 static enum http_host_state
02174 http_parse_host_char(enum http_host_state s, const char ch) {
02175   switch(s) {
02176     case s_http_userinfo:
02177     case s_http_userinfo_start:
02178       if (ch == '@') {
02179         return s_http_host_start;
02180       }
02181 
02182       if (IS_USERINFO_CHAR(ch)) {
02183         return s_http_userinfo;
02184       }
02185       break;
02186 
02187     case s_http_host_start:
02188       if (ch == '[') {
02189         return s_http_host_v6_start;
02190       }
02191 
02192       if (IS_HOST_CHAR(ch)) {
02193         return s_http_host;
02194       }
02195 
02196       break;
02197 
02198     case s_http_host:
02199       if (IS_HOST_CHAR(ch)) {
02200         return s_http_host;
02201       }
02202 
02203     /* FALLTHROUGH */
02204     case s_http_host_v6_end:
02205       if (ch == ':') {
02206         return s_http_host_port_start;
02207       }
02208 
02209       break;
02210 
02211     case s_http_host_v6:
02212       if (ch == ']') {
02213         return s_http_host_v6_end;
02214       }
02215 
02216     /* FALLTHROUGH */
02217     case s_http_host_v6_start:
02218       if (IS_HEX(ch) || ch == ':' || ch == '.') {
02219         return s_http_host_v6;
02220       }
02221 
02222       if (s == s_http_host_v6 && ch == '%') {
02223         return s_http_host_v6_zone_start;
02224       }
02225       break;
02226 
02227     case s_http_host_v6_zone:
02228       if (ch == ']') {
02229         return s_http_host_v6_end;
02230       }
02231 
02232     /* FALLTHROUGH */
02233     case s_http_host_v6_zone_start:
02234       /* RFC 6874 Zone ID consists of 1*( unreserved / pct-encoded) */
02235       if (IS_ALPHANUM(ch) || ch == '%' || ch == '.' || ch == '-' || ch == '_' ||
02236           ch == '~') {
02237         return s_http_host_v6_zone;
02238       }
02239       break;
02240 
02241     case s_http_host_port:
02242     case s_http_host_port_start:
02243       if (IS_NUM(ch)) {
02244         return s_http_host_port;
02245       }
02246 
02247       break;
02248 
02249     default:
02250       break;
02251   }
02252   return s_http_host_dead;
02253 }
02254 
02255 static int
02256 http_parse_host(const char * buf, struct http_parser_url *u, int found_at) {
02257   enum http_host_state s;
02258 
02259   const char *p;
02260   size_t buflen = u->field_data[UF_HOST].off + u->field_data[UF_HOST].len;
02261 
02262   assert(u->field_set & (1 << UF_HOST));
02263 
02264   u->field_data[UF_HOST].len = 0;
02265 
02266   s = found_at ? s_http_userinfo_start : s_http_host_start;
02267 
02268   for (p = buf + u->field_data[UF_HOST].off; p < buf + buflen; p++) {
02269     enum http_host_state new_s = http_parse_host_char(s, *p);
02270 
02271     if (new_s == s_http_host_dead) {
02272       return 1;
02273     }
02274 
02275     switch(new_s) {
02276       case s_http_host:
02277         if (s != s_http_host) {
02278           u->field_data[UF_HOST].off = p - buf;
02279         }
02280         u->field_data[UF_HOST].len++;
02281         break;
02282 
02283       case s_http_host_v6:
02284         if (s != s_http_host_v6) {
02285           u->field_data[UF_HOST].off = p - buf;
02286         }
02287         u->field_data[UF_HOST].len++;
02288         break;
02289 
02290       case s_http_host_v6_zone_start:
02291       case s_http_host_v6_zone:
02292         u->field_data[UF_HOST].len++;
02293         break;
02294 
02295       case s_http_host_port:
02296         if (s != s_http_host_port) {
02297           u->field_data[UF_PORT].off = p - buf;
02298           u->field_data[UF_PORT].len = 0;
02299           u->field_set |= (1 << UF_PORT);
02300         }
02301         u->field_data[UF_PORT].len++;
02302         break;
02303 
02304       case s_http_userinfo:
02305         if (s != s_http_userinfo) {
02306           u->field_data[UF_USERINFO].off = p - buf ;
02307           u->field_data[UF_USERINFO].len = 0;
02308           u->field_set |= (1 << UF_USERINFO);
02309         }
02310         u->field_data[UF_USERINFO].len++;
02311         break;
02312 
02313       default:
02314         break;
02315     }
02316     s = new_s;
02317   }
02318 
02319   /* Make sure we don't end somewhere unexpected */
02320   switch (s) {
02321     case s_http_host_start:
02322     case s_http_host_v6_start:
02323     case s_http_host_v6:
02324     case s_http_host_v6_zone_start:
02325     case s_http_host_v6_zone:
02326     case s_http_host_port_start:
02327     case s_http_userinfo:
02328     case s_http_userinfo_start:
02329       return 1;
02330     default:
02331       break;
02332   }
02333 
02334   return 0;
02335 }
02336 
02337 void
02338 http_parser_url_init(struct http_parser_url *u) {
02339   memset(u, 0, sizeof(*u));
02340 }
02341 
02342 int
02343 http_parser_parse_url(const char *buf, size_t buflen, int is_connect,
02344                       struct http_parser_url *u)
02345 {
02346   enum state s;
02347   const char *p;
02348   enum http_parser_url_fields uf, old_uf;
02349   int found_at = 0;
02350 
02351   u->port = u->field_set = 0;
02352   s = is_connect ? s_req_server_start : s_req_spaces_before_url;
02353   old_uf = UF_MAX;
02354 
02355   for (p = buf; p < buf + buflen; p++) {
02356     s = parse_url_char(s, *p);
02357 
02358     /* Figure out the next field that we're operating on */
02359     switch (s) {
02360       case s_dead:
02361         return 1;
02362 
02363       /* Skip delimeters */
02364       case s_req_schema_slash:
02365       case s_req_schema_slash_slash:
02366       case s_req_server_start:
02367       case s_req_query_string_start:
02368       case s_req_fragment_start:
02369         continue;
02370 
02371       case s_req_schema:
02372         uf = UF_SCHEMA;
02373         break;
02374 
02375       case s_req_server_with_at:
02376         found_at = 1;
02377 
02378       /* FALLTROUGH */
02379       case s_req_server:
02380         uf = UF_HOST;
02381         break;
02382 
02383       case s_req_path:
02384         uf = UF_PATH;
02385         break;
02386 
02387       case s_req_query_string:
02388         uf = UF_QUERY;
02389         break;
02390 
02391       case s_req_fragment:
02392         uf = UF_FRAGMENT;
02393         break;
02394 
02395       default:
02396         assert(!"Unexpected state");
02397         return 1;
02398     }
02399 
02400     /* Nothing's changed; soldier on */
02401     if (uf == old_uf) {
02402       u->field_data[uf].len++;
02403       continue;
02404     }
02405 
02406     u->field_data[uf].off = p - buf;
02407     u->field_data[uf].len = 1;
02408 
02409     u->field_set |= (1 << uf);
02410     old_uf = uf;
02411   }
02412 
02413   /* host must be present if there is a schema */
02414   /* parsing http:///toto will fail */
02415   if ((u->field_set & (1 << UF_SCHEMA)) &&
02416       (u->field_set & (1 << UF_HOST)) == 0) {
02417     return 1;
02418   }
02419 
02420   if (u->field_set & (1 << UF_HOST)) {
02421     if (http_parse_host(buf, u, found_at) != 0) {
02422       return 1;
02423     }
02424   }
02425 
02426   /* CONNECT requests can only contain "hostname:port" */
02427   if (is_connect && u->field_set != ((1 << UF_HOST)|(1 << UF_PORT))) {
02428     return 1;
02429   }
02430 
02431   if (u->field_set & (1 << UF_PORT)) {
02432     /* Don't bother with endp; we've already validated the string */
02433     unsigned long v = strtoul(buf + u->field_data[UF_PORT].off, NULL, 10);
02434 
02435     /* Ports have a max value of 2^16 */
02436     if (v > 0xffff) {
02437       return 1;
02438     }
02439 
02440     u->port = (uint16_t) v;
02441   }
02442 
02443   return 0;
02444 }
02445 
02446 void
02447 http_parser_pause(http_parser *parser, int paused) {
02448   /* Users should only be pausing/unpausing a parser that is not in an error
02449    * state. In non-debug builds, there's not much that we can do about this
02450    * other than ignore it.
02451    */
02452   if (HTTP_PARSER_ERRNO(parser) == HPE_OK ||
02453       HTTP_PARSER_ERRNO(parser) == HPE_PAUSED) {
02454     SET_ERRNO((paused) ? HPE_PAUSED : HPE_OK);
02455   } else {
02456     assert(0 && "Attempting to pause parser in error state");
02457   }
02458 }
02459 
02460 int
02461 http_body_is_final(const struct http_parser *parser) {
02462     return parser->state == s_message_done;
02463 }
02464 
02465 unsigned long
02466 http_parser_version(void) {
02467   return HTTP_PARSER_VERSION_MAJOR * 0x10000 |
02468          HTTP_PARSER_VERSION_MINOR * 0x00100 |
02469          HTTP_PARSER_VERSION_PATCH * 0x00001;
02470 }