This library is used to make HTTP and HTTPS calls from mbed OS 5 applications.
Fork of mbed-http by
http_parser.c
00001 /* Based on src/http/ngx_http_parse.c from NGINX copyright Igor Sysoev 00002 * 00003 * Additional changes are licensed under the same terms as NGINX and 00004 * copyright Joyent, Inc. and other Node contributors. All rights reserved. 00005 * 00006 * Permission is hereby granted, free of charge, to any person obtaining a copy 00007 * of this software and associated documentation files (the "Software"), to 00008 * deal in the Software without restriction, including without limitation the 00009 * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or 00010 * sell copies of the Software, and to permit persons to whom the Software is 00011 * furnished to do so, subject to the following conditions: 00012 * 00013 * The above copyright notice and this permission notice shall be included in 00014 * all copies or substantial portions of the Software. 00015 * 00016 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 00017 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 00018 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 00019 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 00020 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 00021 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 00022 * IN THE SOFTWARE. 00023 */ 00024 #include "http_parser.h" 00025 #include <assert.h> 00026 #include <stddef.h> 00027 #include <ctype.h> 00028 #include <stdlib.h> 00029 #include <string.h> 00030 #include <limits.h> 00031 00032 #ifndef ULLONG_MAX 00033 # define ULLONG_MAX ((uint64_t) -1) /* 2^64-1 */ 00034 #endif 00035 00036 #ifndef MIN 00037 # define MIN(a,b) ((a) < (b) ? (a) : (b)) 00038 #endif 00039 00040 #ifndef ARRAY_SIZE 00041 # define ARRAY_SIZE(a) (sizeof(a) / sizeof((a)[0])) 00042 #endif 00043 00044 #ifndef BIT_AT 00045 # define BIT_AT(a, i) \ 00046 (!!((unsigned int) (a)[(unsigned int) (i) >> 3] & \ 00047 (1 << ((unsigned int) (i) & 7)))) 00048 #endif 00049 00050 #ifndef ELEM_AT 00051 # define ELEM_AT(a, i, v) ((unsigned int) (i) < ARRAY_SIZE(a) ? (a)[(i)] : (v)) 00052 #endif 00053 00054 #define SET_ERRNO(e) \ 00055 do { \ 00056 parser->http_errno = (e); \ 00057 } while(0) 00058 00059 #define CURRENT_STATE() p_state 00060 #define UPDATE_STATE(V) p_state = (enum state) (V); 00061 #define RETURN(V) \ 00062 do { \ 00063 parser->state = CURRENT_STATE(); \ 00064 return (V); \ 00065 } while (0); 00066 #define REEXECUTE() \ 00067 goto reexecute; \ 00068 00069 00070 #ifdef __GNUC__ 00071 # define LIKELY(X) __builtin_expect(!!(X), 1) 00072 # define UNLIKELY(X) __builtin_expect(!!(X), 0) 00073 #else 00074 # define LIKELY(X) (X) 00075 # define UNLIKELY(X) (X) 00076 #endif 00077 00078 00079 /* Run the notify callback FOR, returning ER if it fails */ 00080 #define CALLBACK_NOTIFY_(FOR, ER) \ 00081 do { \ 00082 assert(HTTP_PARSER_ERRNO(parser) == HPE_OK); \ 00083 \ 00084 if (LIKELY(settings->on_##FOR)) { \ 00085 parser->state = CURRENT_STATE(); \ 00086 if (UNLIKELY(0 != settings->on_##FOR(parser))) { \ 00087 SET_ERRNO(HPE_CB_##FOR); \ 00088 } \ 00089 UPDATE_STATE(parser->state); \ 00090 \ 00091 /* We either errored above or got paused; get out */ \ 00092 if (UNLIKELY(HTTP_PARSER_ERRNO(parser) != HPE_OK)) { \ 00093 return (ER); \ 00094 } \ 00095 } \ 00096 } while (0) 00097 00098 /* Run the notify callback FOR and consume the current byte */ 00099 #define CALLBACK_NOTIFY(FOR) CALLBACK_NOTIFY_(FOR, p - data + 1) 00100 00101 /* Run the notify callback FOR and don't consume the current byte */ 00102 #define CALLBACK_NOTIFY_NOADVANCE(FOR) CALLBACK_NOTIFY_(FOR, p - data) 00103 00104 /* Run data callback FOR with LEN bytes, returning ER if it fails */ 00105 #define CALLBACK_DATA_(FOR, LEN, ER) \ 00106 do { \ 00107 assert(HTTP_PARSER_ERRNO(parser) == HPE_OK); \ 00108 \ 00109 if (FOR##_mark) { \ 00110 if (LIKELY(settings->on_##FOR)) { \ 00111 parser->state = CURRENT_STATE(); \ 00112 if (UNLIKELY(0 != \ 00113 settings->on_##FOR(parser, FOR##_mark, (LEN)))) { \ 00114 SET_ERRNO(HPE_CB_##FOR); \ 00115 } \ 00116 UPDATE_STATE(parser->state); \ 00117 \ 00118 /* We either errored above or got paused; get out */ \ 00119 if (UNLIKELY(HTTP_PARSER_ERRNO(parser) != HPE_OK)) { \ 00120 return (ER); \ 00121 } \ 00122 } \ 00123 FOR##_mark = NULL; \ 00124 } \ 00125 } while (0) 00126 00127 /* Run the data callback FOR and consume the current byte */ 00128 #define CALLBACK_DATA(FOR) \ 00129 CALLBACK_DATA_(FOR, p - FOR##_mark, p - data + 1) 00130 00131 /* Run the data callback FOR and don't consume the current byte */ 00132 #define CALLBACK_DATA_NOADVANCE(FOR) \ 00133 CALLBACK_DATA_(FOR, p - FOR##_mark, p - data) 00134 00135 /* Set the mark FOR; non-destructive if mark is already set */ 00136 #define MARK(FOR) \ 00137 do { \ 00138 if (!FOR##_mark) { \ 00139 FOR##_mark = p; \ 00140 } \ 00141 } while (0) 00142 00143 /* Don't allow the total size of the HTTP headers (including the status 00144 * line) to exceed HTTP_MAX_HEADER_SIZE. This check is here to protect 00145 * embedders against denial-of-service attacks where the attacker feeds 00146 * us a never-ending header that the embedder keeps buffering. 00147 * 00148 * This check is arguably the responsibility of embedders but we're doing 00149 * it on the embedder's behalf because most won't bother and this way we 00150 * make the web a little safer. HTTP_MAX_HEADER_SIZE is still far bigger 00151 * than any reasonable request or response so this should never affect 00152 * day-to-day operation. 00153 */ 00154 #define COUNT_HEADER_SIZE(V) \ 00155 do { \ 00156 parser->nread += (V); \ 00157 if (UNLIKELY(parser->nread > (HTTP_MAX_HEADER_SIZE))) { \ 00158 SET_ERRNO(HPE_HEADER_OVERFLOW); \ 00159 goto error; \ 00160 } \ 00161 } while (0) 00162 00163 00164 #define PROXY_CONNECTION "proxy-connection" 00165 #define CONNECTION "connection" 00166 #define CONTENT_LENGTH "content-length" 00167 #define TRANSFER_ENCODING "transfer-encoding" 00168 #define UPGRADE "upgrade" 00169 #define CHUNKED "chunked" 00170 #define KEEP_ALIVE "keep-alive" 00171 #define CLOSE "close" 00172 00173 00174 static const char *method_strings[] = 00175 { 00176 #define XX(num, name, string) #string, 00177 HTTP_METHOD_MAP(XX) 00178 #undef XX 00179 }; 00180 00181 00182 /* Tokens as defined by rfc 2616. Also lowercases them. 00183 * token = 1*<any CHAR except CTLs or separators> 00184 * separators = "(" | ")" | "<" | ">" | "@" 00185 * | "," | ";" | ":" | "\" | <"> 00186 * | "/" | "[" | "]" | "?" | "=" 00187 * | "{" | "}" | SP | HT 00188 */ 00189 static const char tokens[256] = { 00190 /* 0 nul 1 soh 2 stx 3 etx 4 eot 5 enq 6 ack 7 bel */ 00191 0, 0, 0, 0, 0, 0, 0, 0, 00192 /* 8 bs 9 ht 10 nl 11 vt 12 np 13 cr 14 so 15 si */ 00193 0, 0, 0, 0, 0, 0, 0, 0, 00194 /* 16 dle 17 dc1 18 dc2 19 dc3 20 dc4 21 nak 22 syn 23 etb */ 00195 0, 0, 0, 0, 0, 0, 0, 0, 00196 /* 24 can 25 em 26 sub 27 esc 28 fs 29 gs 30 rs 31 us */ 00197 0, 0, 0, 0, 0, 0, 0, 0, 00198 /* 32 sp 33 ! 34 " 35 # 36 $ 37 % 38 & 39 ' */ 00199 0, '!', 0, '#', '$', '%', '&', '\'', 00200 /* 40 ( 41 ) 42 * 43 + 44 , 45 - 46 . 47 / */ 00201 0, 0, '*', '+', 0, '-', '.', 0, 00202 /* 48 0 49 1 50 2 51 3 52 4 53 5 54 6 55 7 */ 00203 '0', '1', '2', '3', '4', '5', '6', '7', 00204 /* 56 8 57 9 58 : 59 ; 60 < 61 = 62 > 63 ? */ 00205 '8', '9', 0, 0, 0, 0, 0, 0, 00206 /* 64 @ 65 A 66 B 67 C 68 D 69 E 70 F 71 G */ 00207 0, 'a', 'b', 'c', 'd', 'e', 'f', 'g', 00208 /* 72 H 73 I 74 J 75 K 76 L 77 M 78 N 79 O */ 00209 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 00210 /* 80 P 81 Q 82 R 83 S 84 T 85 U 86 V 87 W */ 00211 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 00212 /* 88 X 89 Y 90 Z 91 [ 92 \ 93 ] 94 ^ 95 _ */ 00213 'x', 'y', 'z', 0, 0, 0, '^', '_', 00214 /* 96 ` 97 a 98 b 99 c 100 d 101 e 102 f 103 g */ 00215 '`', 'a', 'b', 'c', 'd', 'e', 'f', 'g', 00216 /* 104 h 105 i 106 j 107 k 108 l 109 m 110 n 111 o */ 00217 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 00218 /* 112 p 113 q 114 r 115 s 116 t 117 u 118 v 119 w */ 00219 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 00220 /* 120 x 121 y 122 z 123 { 124 | 125 } 126 ~ 127 del */ 00221 'x', 'y', 'z', 0, '|', 0, '~', 0 }; 00222 00223 00224 static const int8_t unhex[256] = 00225 {-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1 00226 ,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1 00227 ,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1 00228 , 0, 1, 2, 3, 4, 5, 6, 7, 8, 9,-1,-1,-1,-1,-1,-1 00229 ,-1,10,11,12,13,14,15,-1,-1,-1,-1,-1,-1,-1,-1,-1 00230 ,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1 00231 ,-1,10,11,12,13,14,15,-1,-1,-1,-1,-1,-1,-1,-1,-1 00232 ,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1 00233 }; 00234 00235 00236 #if HTTP_PARSER_STRICT 00237 # define T(v) 0 00238 #else 00239 # define T(v) v 00240 #endif 00241 00242 00243 static const uint8_t normal_url_char[32] = { 00244 /* 0 nul 1 soh 2 stx 3 etx 4 eot 5 enq 6 ack 7 bel */ 00245 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0, 00246 /* 8 bs 9 ht 10 nl 11 vt 12 np 13 cr 14 so 15 si */ 00247 0 | T(2) | 0 | 0 | T(16) | 0 | 0 | 0, 00248 /* 16 dle 17 dc1 18 dc2 19 dc3 20 dc4 21 nak 22 syn 23 etb */ 00249 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0, 00250 /* 24 can 25 em 26 sub 27 esc 28 fs 29 gs 30 rs 31 us */ 00251 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0, 00252 /* 32 sp 33 ! 34 " 35 # 36 $ 37 % 38 & 39 ' */ 00253 0 | 2 | 4 | 0 | 16 | 32 | 64 | 128, 00254 /* 40 ( 41 ) 42 * 43 + 44 , 45 - 46 . 47 / */ 00255 1 | 2 | 4 | 8 | 16 | 32 | 64 | 128, 00256 /* 48 0 49 1 50 2 51 3 52 4 53 5 54 6 55 7 */ 00257 1 | 2 | 4 | 8 | 16 | 32 | 64 | 128, 00258 /* 56 8 57 9 58 : 59 ; 60 < 61 = 62 > 63 ? */ 00259 1 | 2 | 4 | 8 | 16 | 32 | 64 | 0, 00260 /* 64 @ 65 A 66 B 67 C 68 D 69 E 70 F 71 G */ 00261 1 | 2 | 4 | 8 | 16 | 32 | 64 | 128, 00262 /* 72 H 73 I 74 J 75 K 76 L 77 M 78 N 79 O */ 00263 1 | 2 | 4 | 8 | 16 | 32 | 64 | 128, 00264 /* 80 P 81 Q 82 R 83 S 84 T 85 U 86 V 87 W */ 00265 1 | 2 | 4 | 8 | 16 | 32 | 64 | 128, 00266 /* 88 X 89 Y 90 Z 91 [ 92 \ 93 ] 94 ^ 95 _ */ 00267 1 | 2 | 4 | 8 | 16 | 32 | 64 | 128, 00268 /* 96 ` 97 a 98 b 99 c 100 d 101 e 102 f 103 g */ 00269 1 | 2 | 4 | 8 | 16 | 32 | 64 | 128, 00270 /* 104 h 105 i 106 j 107 k 108 l 109 m 110 n 111 o */ 00271 1 | 2 | 4 | 8 | 16 | 32 | 64 | 128, 00272 /* 112 p 113 q 114 r 115 s 116 t 117 u 118 v 119 w */ 00273 1 | 2 | 4 | 8 | 16 | 32 | 64 | 128, 00274 /* 120 x 121 y 122 z 123 { 124 | 125 } 126 ~ 127 del */ 00275 1 | 2 | 4 | 8 | 16 | 32 | 64 | 0, }; 00276 00277 #undef T 00278 00279 enum state 00280 { s_dead = 1 /* important that this is > 0 */ 00281 00282 , s_start_req_or_res 00283 , s_res_or_resp_H 00284 , s_start_res 00285 , s_res_H 00286 , s_res_HT 00287 , s_res_HTT 00288 , s_res_HTTP 00289 , s_res_first_http_major 00290 , s_res_http_major 00291 , s_res_first_http_minor 00292 , s_res_http_minor 00293 , s_res_first_status_code 00294 , s_res_status_code 00295 , s_res_status_start 00296 , s_res_status 00297 , s_res_line_almost_done 00298 00299 , s_start_req 00300 00301 , s_req_method 00302 , s_req_spaces_before_url 00303 , s_req_schema 00304 , s_req_schema_slash 00305 , s_req_schema_slash_slash 00306 , s_req_server_start 00307 , s_req_server 00308 , s_req_server_with_at 00309 , s_req_path 00310 , s_req_query_string_start 00311 , s_req_query_string 00312 , s_req_fragment_start 00313 , s_req_fragment 00314 , s_req_http_start 00315 , s_req_http_H 00316 , s_req_http_HT 00317 , s_req_http_HTT 00318 , s_req_http_HTTP 00319 , s_req_first_http_major 00320 , s_req_http_major 00321 , s_req_first_http_minor 00322 , s_req_http_minor 00323 , s_req_line_almost_done 00324 00325 , s_header_field_start 00326 , s_header_field 00327 , s_header_value_discard_ws 00328 , s_header_value_discard_ws_almost_done 00329 , s_header_value_discard_lws 00330 , s_header_value_start 00331 , s_header_value 00332 , s_header_value_lws 00333 00334 , s_header_almost_done 00335 00336 , s_chunk_size_start 00337 , s_chunk_size 00338 , s_chunk_parameters 00339 , s_chunk_size_almost_done 00340 00341 , s_headers_almost_done 00342 , s_headers_done 00343 00344 /* Important: 's_headers_done' must be the last 'header' state. All 00345 * states beyond this must be 'body' states. It is used for overflow 00346 * checking. See the PARSING_HEADER() macro. 00347 */ 00348 00349 , s_chunk_data 00350 , s_chunk_data_almost_done 00351 , s_chunk_data_done 00352 00353 , s_body_identity 00354 , s_body_identity_eof 00355 00356 , s_message_done 00357 }; 00358 00359 00360 #define PARSING_HEADER(state) (state <= s_headers_done) 00361 00362 00363 enum header_states 00364 { h_general = 0 00365 , h_C 00366 , h_CO 00367 , h_CON 00368 00369 , h_matching_connection 00370 , h_matching_proxy_connection 00371 , h_matching_content_length 00372 , h_matching_transfer_encoding 00373 , h_matching_upgrade 00374 00375 , h_connection 00376 , h_content_length 00377 , h_transfer_encoding 00378 , h_upgrade 00379 00380 , h_matching_transfer_encoding_chunked 00381 , h_matching_connection_token_start 00382 , h_matching_connection_keep_alive 00383 , h_matching_connection_close 00384 , h_matching_connection_upgrade 00385 , h_matching_connection_token 00386 00387 , h_transfer_encoding_chunked 00388 , h_connection_keep_alive 00389 , h_connection_close 00390 , h_connection_upgrade 00391 }; 00392 00393 enum http_host_state 00394 { 00395 s_http_host_dead = 1 00396 , s_http_userinfo_start 00397 , s_http_userinfo 00398 , s_http_host_start 00399 , s_http_host_v6_start 00400 , s_http_host 00401 , s_http_host_v6 00402 , s_http_host_v6_end 00403 , s_http_host_v6_zone_start 00404 , s_http_host_v6_zone 00405 , s_http_host_port_start 00406 , s_http_host_port 00407 }; 00408 00409 /* Macros for character classes; depends on strict-mode */ 00410 #define CR '\r' 00411 #define LF '\n' 00412 #define LOWER(c) (unsigned char)(c | 0x20) 00413 #define IS_ALPHA(c) (LOWER(c) >= 'a' && LOWER(c) <= 'z') 00414 #define IS_NUM(c) ((c) >= '0' && (c) <= '9') 00415 #define IS_ALPHANUM(c) (IS_ALPHA(c) || IS_NUM(c)) 00416 #define IS_HEX(c) (IS_NUM(c) || (LOWER(c) >= 'a' && LOWER(c) <= 'f')) 00417 #define IS_MARK(c) ((c) == '-' || (c) == '_' || (c) == '.' || \ 00418 (c) == '!' || (c) == '~' || (c) == '*' || (c) == '\'' || (c) == '(' || \ 00419 (c) == ')') 00420 #define IS_USERINFO_CHAR(c) (IS_ALPHANUM(c) || IS_MARK(c) || (c) == '%' || \ 00421 (c) == ';' || (c) == ':' || (c) == '&' || (c) == '=' || (c) == '+' || \ 00422 (c) == '$' || (c) == ',') 00423 00424 #define STRICT_TOKEN(c) (tokens[(unsigned char)c]) 00425 00426 #if HTTP_PARSER_STRICT 00427 #define TOKEN(c) (tokens[(unsigned char)c]) 00428 #define IS_URL_CHAR(c) (BIT_AT(normal_url_char, (unsigned char)c)) 00429 #define IS_HOST_CHAR(c) (IS_ALPHANUM(c) || (c) == '.' || (c) == '-') 00430 #else 00431 #define TOKEN(c) ((c == ' ') ? ' ' : tokens[(unsigned char)c]) 00432 #define IS_URL_CHAR(c) \ 00433 (BIT_AT(normal_url_char, (unsigned char)c) || ((c) & 0x80)) 00434 #define IS_HOST_CHAR(c) \ 00435 (IS_ALPHANUM(c) || (c) == '.' || (c) == '-' || (c) == '_') 00436 #endif 00437 00438 /** 00439 * Verify that a char is a valid visible (printable) US-ASCII 00440 * character or %x80-FF 00441 **/ 00442 #define IS_HEADER_CHAR(ch) \ 00443 (ch == CR || ch == LF || ch == 9 || ((unsigned char)ch > 31 && ch != 127)) 00444 00445 #define start_state (parser->type == HTTP_REQUEST ? s_start_req : s_start_res) 00446 00447 00448 #if HTTP_PARSER_STRICT 00449 # define STRICT_CHECK(cond) \ 00450 do { \ 00451 if (cond) { \ 00452 SET_ERRNO(HPE_STRICT); \ 00453 goto error; \ 00454 } \ 00455 } while (0) 00456 # define NEW_MESSAGE() (http_should_keep_alive(parser) ? start_state : s_dead) 00457 #else 00458 # define STRICT_CHECK(cond) 00459 # define NEW_MESSAGE() start_state 00460 #endif 00461 00462 00463 /* Map errno values to strings for human-readable output */ 00464 #define HTTP_STRERROR_GEN(n, s) { "HPE_" #n, s }, 00465 static struct { 00466 const char *name; 00467 const char *description; 00468 } http_strerror_tab[] = { 00469 HTTP_ERRNO_MAP(HTTP_STRERROR_GEN) 00470 }; 00471 #undef HTTP_STRERROR_GEN 00472 00473 int http_message_needs_eof(const http_parser *parser); 00474 00475 /* Our URL parser. 00476 * 00477 * This is designed to be shared by http_parser_execute() for URL validation, 00478 * hence it has a state transition + byte-for-byte interface. In addition, it 00479 * is meant to be embedded in http_parser_parse_url(), which does the dirty 00480 * work of turning state transitions URL components for its API. 00481 * 00482 * This function should only be invoked with non-space characters. It is 00483 * assumed that the caller cares about (and can detect) the transition between 00484 * URL and non-URL states by looking for these. 00485 */ 00486 static enum state 00487 parse_url_char(enum state s, const char ch) 00488 { 00489 if (ch == ' ' || ch == '\r' || ch == '\n') { 00490 return s_dead; 00491 } 00492 00493 #if HTTP_PARSER_STRICT 00494 if (ch == '\t' || ch == '\f') { 00495 return s_dead; 00496 } 00497 #endif 00498 00499 switch (s) { 00500 case s_req_spaces_before_url: 00501 /* Proxied requests are followed by scheme of an absolute URI (alpha). 00502 * All methods except CONNECT are followed by '/' or '*'. 00503 */ 00504 00505 if (ch == '/' || ch == '*') { 00506 return s_req_path; 00507 } 00508 00509 if (IS_ALPHA(ch)) { 00510 return s_req_schema; 00511 } 00512 00513 break; 00514 00515 case s_req_schema: 00516 if (IS_ALPHA(ch)) { 00517 return s; 00518 } 00519 00520 if (ch == ':') { 00521 return s_req_schema_slash; 00522 } 00523 00524 break; 00525 00526 case s_req_schema_slash: 00527 if (ch == '/') { 00528 return s_req_schema_slash_slash; 00529 } 00530 00531 break; 00532 00533 case s_req_schema_slash_slash: 00534 if (ch == '/') { 00535 return s_req_server_start; 00536 } 00537 00538 break; 00539 00540 case s_req_server_with_at: 00541 if (ch == '@') { 00542 return s_dead; 00543 } 00544 00545 /* FALLTHROUGH */ 00546 case s_req_server_start: 00547 case s_req_server: 00548 if (ch == '/') { 00549 return s_req_path; 00550 } 00551 00552 if (ch == '?') { 00553 return s_req_query_string_start; 00554 } 00555 00556 if (ch == '@') { 00557 return s_req_server_with_at; 00558 } 00559 00560 if (IS_USERINFO_CHAR(ch) || ch == '[' || ch == ']') { 00561 return s_req_server; 00562 } 00563 00564 break; 00565 00566 case s_req_path: 00567 if (IS_URL_CHAR(ch)) { 00568 return s; 00569 } 00570 00571 switch (ch) { 00572 case '?': 00573 return s_req_query_string_start; 00574 00575 case '#': 00576 return s_req_fragment_start; 00577 } 00578 00579 break; 00580 00581 case s_req_query_string_start: 00582 case s_req_query_string: 00583 if (IS_URL_CHAR(ch)) { 00584 return s_req_query_string; 00585 } 00586 00587 switch (ch) { 00588 case '?': 00589 /* allow extra '?' in query string */ 00590 return s_req_query_string; 00591 00592 case '#': 00593 return s_req_fragment_start; 00594 } 00595 00596 break; 00597 00598 case s_req_fragment_start: 00599 if (IS_URL_CHAR(ch)) { 00600 return s_req_fragment; 00601 } 00602 00603 switch (ch) { 00604 case '?': 00605 return s_req_fragment; 00606 00607 case '#': 00608 return s; 00609 } 00610 00611 break; 00612 00613 case s_req_fragment: 00614 if (IS_URL_CHAR(ch)) { 00615 return s; 00616 } 00617 00618 switch (ch) { 00619 case '?': 00620 case '#': 00621 return s; 00622 } 00623 00624 break; 00625 00626 default: 00627 break; 00628 } 00629 00630 /* We should never fall out of the switch above unless there's an error */ 00631 return s_dead; 00632 } 00633 00634 size_t http_parser_execute (http_parser *parser, 00635 const http_parser_settings *settings, 00636 const char *data, 00637 size_t len) 00638 { 00639 char c, ch; 00640 int8_t unhex_val; 00641 const char *p = data; 00642 const char *header_field_mark = 0; 00643 const char *header_value_mark = 0; 00644 const char *url_mark = 0; 00645 const char *body_mark = 0; 00646 const char *status_mark = 0; 00647 enum state p_state = (enum state) parser->state; 00648 const unsigned int lenient = parser->lenient_http_headers; 00649 00650 /* We're in an error state. Don't bother doing anything. */ 00651 if (HTTP_PARSER_ERRNO(parser) != HPE_OK) { 00652 return 0; 00653 } 00654 00655 if (len == 0) { 00656 switch (CURRENT_STATE()) { 00657 case s_body_identity_eof: 00658 /* Use of CALLBACK_NOTIFY() here would erroneously return 1 byte read if 00659 * we got paused. 00660 */ 00661 CALLBACK_NOTIFY_NOADVANCE(message_complete); 00662 return 0; 00663 00664 case s_dead: 00665 case s_start_req_or_res: 00666 case s_start_res: 00667 case s_start_req: 00668 return 0; 00669 00670 default: 00671 SET_ERRNO(HPE_INVALID_EOF_STATE); 00672 return 1; 00673 } 00674 } 00675 00676 00677 if (CURRENT_STATE() == s_header_field) 00678 header_field_mark = data; 00679 if (CURRENT_STATE() == s_header_value) 00680 header_value_mark = data; 00681 switch (CURRENT_STATE()) { 00682 case s_req_path: 00683 case s_req_schema: 00684 case s_req_schema_slash: 00685 case s_req_schema_slash_slash: 00686 case s_req_server_start: 00687 case s_req_server: 00688 case s_req_server_with_at: 00689 case s_req_query_string_start: 00690 case s_req_query_string: 00691 case s_req_fragment_start: 00692 case s_req_fragment: 00693 url_mark = data; 00694 break; 00695 case s_res_status: 00696 status_mark = data; 00697 break; 00698 default: 00699 break; 00700 } 00701 00702 for (p=data; p != data + len; p++) { 00703 ch = *p; 00704 00705 if (PARSING_HEADER(CURRENT_STATE())) 00706 COUNT_HEADER_SIZE(1); 00707 00708 reexecute: 00709 switch (CURRENT_STATE()) { 00710 00711 case s_dead: 00712 /* this state is used after a 'Connection: close' message 00713 * the parser will error out if it reads another message 00714 */ 00715 if (LIKELY(ch == CR || ch == LF)) 00716 break; 00717 00718 SET_ERRNO(HPE_CLOSED_CONNECTION); 00719 goto error; 00720 00721 case s_start_req_or_res: 00722 { 00723 if (ch == CR || ch == LF) 00724 break; 00725 parser->flags = 0; 00726 parser->content_length = ULLONG_MAX; 00727 00728 if (ch == 'H') { 00729 UPDATE_STATE(s_res_or_resp_H); 00730 00731 CALLBACK_NOTIFY(message_begin); 00732 } else { 00733 parser->type = HTTP_REQUEST; 00734 UPDATE_STATE(s_start_req); 00735 REEXECUTE(); 00736 } 00737 00738 break; 00739 } 00740 00741 case s_res_or_resp_H: 00742 if (ch == 'T') { 00743 parser->type = HTTP_RESPONSE; 00744 UPDATE_STATE(s_res_HT); 00745 } else { 00746 if (UNLIKELY(ch != 'E')) { 00747 SET_ERRNO(HPE_INVALID_CONSTANT); 00748 goto error; 00749 } 00750 00751 parser->type = HTTP_REQUEST; 00752 parser->method = HTTP_HEAD; 00753 parser->index = 2; 00754 UPDATE_STATE(s_req_method); 00755 } 00756 break; 00757 00758 case s_start_res: 00759 { 00760 parser->flags = 0; 00761 parser->content_length = ULLONG_MAX; 00762 00763 switch (ch) { 00764 case 'H': 00765 UPDATE_STATE(s_res_H); 00766 break; 00767 00768 case CR: 00769 case LF: 00770 break; 00771 00772 default: 00773 SET_ERRNO(HPE_INVALID_CONSTANT); 00774 goto error; 00775 } 00776 00777 CALLBACK_NOTIFY(message_begin); 00778 break; 00779 } 00780 00781 case s_res_H: 00782 STRICT_CHECK(ch != 'T'); 00783 UPDATE_STATE(s_res_HT); 00784 break; 00785 00786 case s_res_HT: 00787 STRICT_CHECK(ch != 'T'); 00788 UPDATE_STATE(s_res_HTT); 00789 break; 00790 00791 case s_res_HTT: 00792 STRICT_CHECK(ch != 'P'); 00793 UPDATE_STATE(s_res_HTTP); 00794 break; 00795 00796 case s_res_HTTP: 00797 STRICT_CHECK(ch != '/'); 00798 UPDATE_STATE(s_res_first_http_major); 00799 break; 00800 00801 case s_res_first_http_major: 00802 if (UNLIKELY(ch < '0' || ch > '9')) { 00803 SET_ERRNO(HPE_INVALID_VERSION); 00804 goto error; 00805 } 00806 00807 parser->http_major = ch - '0'; 00808 UPDATE_STATE(s_res_http_major); 00809 break; 00810 00811 /* major HTTP version or dot */ 00812 case s_res_http_major: 00813 { 00814 if (ch == '.') { 00815 UPDATE_STATE(s_res_first_http_minor); 00816 break; 00817 } 00818 00819 if (!IS_NUM(ch)) { 00820 SET_ERRNO(HPE_INVALID_VERSION); 00821 goto error; 00822 } 00823 00824 parser->http_major *= 10; 00825 parser->http_major += ch - '0'; 00826 00827 if (UNLIKELY(parser->http_major > 999)) { 00828 SET_ERRNO(HPE_INVALID_VERSION); 00829 goto error; 00830 } 00831 00832 break; 00833 } 00834 00835 /* first digit of minor HTTP version */ 00836 case s_res_first_http_minor: 00837 if (UNLIKELY(!IS_NUM(ch))) { 00838 SET_ERRNO(HPE_INVALID_VERSION); 00839 goto error; 00840 } 00841 00842 parser->http_minor = ch - '0'; 00843 UPDATE_STATE(s_res_http_minor); 00844 break; 00845 00846 /* minor HTTP version or end of request line */ 00847 case s_res_http_minor: 00848 { 00849 if (ch == ' ') { 00850 UPDATE_STATE(s_res_first_status_code); 00851 break; 00852 } 00853 00854 if (UNLIKELY(!IS_NUM(ch))) { 00855 SET_ERRNO(HPE_INVALID_VERSION); 00856 goto error; 00857 } 00858 00859 parser->http_minor *= 10; 00860 parser->http_minor += ch - '0'; 00861 00862 if (UNLIKELY(parser->http_minor > 999)) { 00863 SET_ERRNO(HPE_INVALID_VERSION); 00864 goto error; 00865 } 00866 00867 break; 00868 } 00869 00870 case s_res_first_status_code: 00871 { 00872 if (!IS_NUM(ch)) { 00873 if (ch == ' ') { 00874 break; 00875 } 00876 00877 SET_ERRNO(HPE_INVALID_STATUS); 00878 goto error; 00879 } 00880 parser->status_code = ch - '0'; 00881 UPDATE_STATE(s_res_status_code); 00882 break; 00883 } 00884 00885 case s_res_status_code: 00886 { 00887 if (!IS_NUM(ch)) { 00888 switch (ch) { 00889 case ' ': 00890 UPDATE_STATE(s_res_status_start); 00891 break; 00892 case CR: 00893 UPDATE_STATE(s_res_line_almost_done); 00894 break; 00895 case LF: 00896 UPDATE_STATE(s_header_field_start); 00897 break; 00898 default: 00899 SET_ERRNO(HPE_INVALID_STATUS); 00900 goto error; 00901 } 00902 break; 00903 } 00904 00905 parser->status_code *= 10; 00906 parser->status_code += ch - '0'; 00907 00908 if (UNLIKELY(parser->status_code > 999)) { 00909 SET_ERRNO(HPE_INVALID_STATUS); 00910 goto error; 00911 } 00912 00913 break; 00914 } 00915 00916 case s_res_status_start: 00917 { 00918 if (ch == CR) { 00919 UPDATE_STATE(s_res_line_almost_done); 00920 break; 00921 } 00922 00923 if (ch == LF) { 00924 UPDATE_STATE(s_header_field_start); 00925 break; 00926 } 00927 00928 MARK(status); 00929 UPDATE_STATE(s_res_status); 00930 parser->index = 0; 00931 break; 00932 } 00933 00934 case s_res_status: 00935 if (ch == CR) { 00936 UPDATE_STATE(s_res_line_almost_done); 00937 CALLBACK_DATA(status); 00938 break; 00939 } 00940 00941 if (ch == LF) { 00942 UPDATE_STATE(s_header_field_start); 00943 CALLBACK_DATA(status); 00944 break; 00945 } 00946 00947 break; 00948 00949 case s_res_line_almost_done: 00950 STRICT_CHECK(ch != LF); 00951 UPDATE_STATE(s_header_field_start); 00952 break; 00953 00954 case s_start_req: 00955 { 00956 if (ch == CR || ch == LF) 00957 break; 00958 parser->flags = 0; 00959 parser->content_length = ULLONG_MAX; 00960 00961 if (UNLIKELY(!IS_ALPHA(ch))) { 00962 SET_ERRNO(HPE_INVALID_METHOD); 00963 goto error; 00964 } 00965 00966 parser->method = (enum http_method) 0; 00967 parser->index = 1; 00968 switch (ch) { 00969 case 'A': parser->method = HTTP_ACL; break; 00970 case 'B': parser->method = HTTP_BIND; break; 00971 case 'C': parser->method = HTTP_CONNECT; /* or COPY, CHECKOUT */ break; 00972 case 'D': parser->method = HTTP_DELETE; break; 00973 case 'G': parser->method = HTTP_GET; break; 00974 case 'H': parser->method = HTTP_HEAD; break; 00975 case 'L': parser->method = HTTP_LOCK; /* or LINK */ break; 00976 case 'M': parser->method = HTTP_MKCOL; /* or MOVE, MKACTIVITY, MERGE, M-SEARCH, MKCALENDAR */ break; 00977 case 'N': parser->method = HTTP_NOTIFY; break; 00978 case 'O': parser->method = HTTP_OPTIONS; break; 00979 case 'P': parser->method = HTTP_POST; 00980 /* or PROPFIND|PROPPATCH|PUT|PATCH|PURGE */ 00981 break; 00982 case 'R': parser->method = HTTP_REPORT; /* or REBIND */ break; 00983 case 'S': parser->method = HTTP_SUBSCRIBE; /* or SEARCH */ break; 00984 case 'T': parser->method = HTTP_TRACE; break; 00985 case 'U': parser->method = HTTP_UNLOCK; /* or UNSUBSCRIBE, UNBIND, UNLINK */ break; 00986 default: 00987 SET_ERRNO(HPE_INVALID_METHOD); 00988 goto error; 00989 } 00990 UPDATE_STATE(s_req_method); 00991 00992 CALLBACK_NOTIFY(message_begin); 00993 00994 break; 00995 } 00996 00997 case s_req_method: 00998 { 00999 const char *matcher; 01000 if (UNLIKELY(ch == '\0')) { 01001 SET_ERRNO(HPE_INVALID_METHOD); 01002 goto error; 01003 } 01004 01005 matcher = method_strings[parser->method]; 01006 if (ch == ' ' && matcher[parser->index] == '\0') { 01007 UPDATE_STATE(s_req_spaces_before_url); 01008 } else if (ch == matcher[parser->index]) { 01009 ; /* nada */ 01010 } else if (IS_ALPHA(ch)) { 01011 01012 switch (parser->method << 16 | parser->index << 8 | ch) { 01013 #define XX(meth, pos, ch, new_meth) \ 01014 case (HTTP_##meth << 16 | pos << 8 | ch): \ 01015 parser->method = HTTP_##new_meth; break; 01016 01017 XX(POST, 1, 'U', PUT) 01018 XX(POST, 1, 'A', PATCH) 01019 XX(CONNECT, 1, 'H', CHECKOUT) 01020 XX(CONNECT, 2, 'P', COPY) 01021 XX(MKCOL, 1, 'O', MOVE) 01022 XX(MKCOL, 1, 'E', MERGE) 01023 XX(MKCOL, 2, 'A', MKACTIVITY) 01024 XX(MKCOL, 3, 'A', MKCALENDAR) 01025 XX(SUBSCRIBE, 1, 'E', SEARCH) 01026 XX(REPORT, 2, 'B', REBIND) 01027 XX(POST, 1, 'R', PROPFIND) 01028 XX(PROPFIND, 4, 'P', PROPPATCH) 01029 XX(PUT, 2, 'R', PURGE) 01030 XX(LOCK, 1, 'I', LINK) 01031 XX(UNLOCK, 2, 'S', UNSUBSCRIBE) 01032 XX(UNLOCK, 2, 'B', UNBIND) 01033 XX(UNLOCK, 3, 'I', UNLINK) 01034 #undef XX 01035 01036 default: 01037 SET_ERRNO(HPE_INVALID_METHOD); 01038 goto error; 01039 } 01040 } else if (ch == '-' && 01041 parser->index == 1 && 01042 parser->method == HTTP_MKCOL) { 01043 parser->method = HTTP_MSEARCH; 01044 } else { 01045 SET_ERRNO(HPE_INVALID_METHOD); 01046 goto error; 01047 } 01048 01049 ++parser->index; 01050 break; 01051 } 01052 01053 case s_req_spaces_before_url: 01054 { 01055 if (ch == ' ') break; 01056 01057 MARK(url); 01058 if (parser->method == HTTP_CONNECT) { 01059 UPDATE_STATE(s_req_server_start); 01060 } 01061 01062 UPDATE_STATE(parse_url_char(CURRENT_STATE(), ch)); 01063 if (UNLIKELY(CURRENT_STATE() == s_dead)) { 01064 SET_ERRNO(HPE_INVALID_URL); 01065 goto error; 01066 } 01067 01068 break; 01069 } 01070 01071 case s_req_schema: 01072 case s_req_schema_slash: 01073 case s_req_schema_slash_slash: 01074 case s_req_server_start: 01075 { 01076 switch (ch) { 01077 /* No whitespace allowed here */ 01078 case ' ': 01079 case CR: 01080 case LF: 01081 SET_ERRNO(HPE_INVALID_URL); 01082 goto error; 01083 default: 01084 UPDATE_STATE(parse_url_char(CURRENT_STATE(), ch)); 01085 if (UNLIKELY(CURRENT_STATE() == s_dead)) { 01086 SET_ERRNO(HPE_INVALID_URL); 01087 goto error; 01088 } 01089 } 01090 01091 break; 01092 } 01093 01094 case s_req_server: 01095 case s_req_server_with_at: 01096 case s_req_path: 01097 case s_req_query_string_start: 01098 case s_req_query_string: 01099 case s_req_fragment_start: 01100 case s_req_fragment: 01101 { 01102 switch (ch) { 01103 case ' ': 01104 UPDATE_STATE(s_req_http_start); 01105 CALLBACK_DATA(url); 01106 break; 01107 case CR: 01108 case LF: 01109 parser->http_major = 0; 01110 parser->http_minor = 9; 01111 UPDATE_STATE((ch == CR) ? 01112 s_req_line_almost_done : 01113 s_header_field_start); 01114 CALLBACK_DATA(url); 01115 break; 01116 default: 01117 UPDATE_STATE(parse_url_char(CURRENT_STATE(), ch)); 01118 if (UNLIKELY(CURRENT_STATE() == s_dead)) { 01119 SET_ERRNO(HPE_INVALID_URL); 01120 goto error; 01121 } 01122 } 01123 break; 01124 } 01125 01126 case s_req_http_start: 01127 switch (ch) { 01128 case 'H': 01129 UPDATE_STATE(s_req_http_H); 01130 break; 01131 case ' ': 01132 break; 01133 default: 01134 SET_ERRNO(HPE_INVALID_CONSTANT); 01135 goto error; 01136 } 01137 break; 01138 01139 case s_req_http_H: 01140 STRICT_CHECK(ch != 'T'); 01141 UPDATE_STATE(s_req_http_HT); 01142 break; 01143 01144 case s_req_http_HT: 01145 STRICT_CHECK(ch != 'T'); 01146 UPDATE_STATE(s_req_http_HTT); 01147 break; 01148 01149 case s_req_http_HTT: 01150 STRICT_CHECK(ch != 'P'); 01151 UPDATE_STATE(s_req_http_HTTP); 01152 break; 01153 01154 case s_req_http_HTTP: 01155 STRICT_CHECK(ch != '/'); 01156 UPDATE_STATE(s_req_first_http_major); 01157 break; 01158 01159 /* first digit of major HTTP version */ 01160 case s_req_first_http_major: 01161 if (UNLIKELY(ch < '1' || ch > '9')) { 01162 SET_ERRNO(HPE_INVALID_VERSION); 01163 goto error; 01164 } 01165 01166 parser->http_major = ch - '0'; 01167 UPDATE_STATE(s_req_http_major); 01168 break; 01169 01170 /* major HTTP version or dot */ 01171 case s_req_http_major: 01172 { 01173 if (ch == '.') { 01174 UPDATE_STATE(s_req_first_http_minor); 01175 break; 01176 } 01177 01178 if (UNLIKELY(!IS_NUM(ch))) { 01179 SET_ERRNO(HPE_INVALID_VERSION); 01180 goto error; 01181 } 01182 01183 parser->http_major *= 10; 01184 parser->http_major += ch - '0'; 01185 01186 if (UNLIKELY(parser->http_major > 999)) { 01187 SET_ERRNO(HPE_INVALID_VERSION); 01188 goto error; 01189 } 01190 01191 break; 01192 } 01193 01194 /* first digit of minor HTTP version */ 01195 case s_req_first_http_minor: 01196 if (UNLIKELY(!IS_NUM(ch))) { 01197 SET_ERRNO(HPE_INVALID_VERSION); 01198 goto error; 01199 } 01200 01201 parser->http_minor = ch - '0'; 01202 UPDATE_STATE(s_req_http_minor); 01203 break; 01204 01205 /* minor HTTP version or end of request line */ 01206 case s_req_http_minor: 01207 { 01208 if (ch == CR) { 01209 UPDATE_STATE(s_req_line_almost_done); 01210 break; 01211 } 01212 01213 if (ch == LF) { 01214 UPDATE_STATE(s_header_field_start); 01215 break; 01216 } 01217 01218 /* XXX allow spaces after digit? */ 01219 01220 if (UNLIKELY(!IS_NUM(ch))) { 01221 SET_ERRNO(HPE_INVALID_VERSION); 01222 goto error; 01223 } 01224 01225 parser->http_minor *= 10; 01226 parser->http_minor += ch - '0'; 01227 01228 if (UNLIKELY(parser->http_minor > 999)) { 01229 SET_ERRNO(HPE_INVALID_VERSION); 01230 goto error; 01231 } 01232 01233 break; 01234 } 01235 01236 /* end of request line */ 01237 case s_req_line_almost_done: 01238 { 01239 if (UNLIKELY(ch != LF)) { 01240 SET_ERRNO(HPE_LF_EXPECTED); 01241 goto error; 01242 } 01243 01244 UPDATE_STATE(s_header_field_start); 01245 break; 01246 } 01247 01248 case s_header_field_start: 01249 { 01250 if (ch == CR) { 01251 UPDATE_STATE(s_headers_almost_done); 01252 break; 01253 } 01254 01255 if (ch == LF) { 01256 /* they might be just sending \n instead of \r\n so this would be 01257 * the second \n to denote the end of headers*/ 01258 UPDATE_STATE(s_headers_almost_done); 01259 REEXECUTE(); 01260 } 01261 01262 c = TOKEN(ch); 01263 01264 if (UNLIKELY(!c)) { 01265 SET_ERRNO(HPE_INVALID_HEADER_TOKEN); 01266 goto error; 01267 } 01268 01269 MARK(header_field); 01270 01271 parser->index = 0; 01272 UPDATE_STATE(s_header_field); 01273 01274 switch (c) { 01275 case 'c': 01276 parser->header_state = h_C; 01277 break; 01278 01279 case 'p': 01280 parser->header_state = h_matching_proxy_connection; 01281 break; 01282 01283 case 't': 01284 parser->header_state = h_matching_transfer_encoding; 01285 break; 01286 01287 case 'u': 01288 parser->header_state = h_matching_upgrade; 01289 break; 01290 01291 default: 01292 parser->header_state = h_general; 01293 break; 01294 } 01295 break; 01296 } 01297 01298 case s_header_field: 01299 { 01300 const char* start = p; 01301 for (; p != data + len; p++) { 01302 ch = *p; 01303 c = TOKEN(ch); 01304 01305 if (!c) 01306 break; 01307 01308 switch (parser->header_state) { 01309 case h_general: 01310 break; 01311 01312 case h_C: 01313 parser->index++; 01314 parser->header_state = (c == 'o' ? h_CO : h_general); 01315 break; 01316 01317 case h_CO: 01318 parser->index++; 01319 parser->header_state = (c == 'n' ? h_CON : h_general); 01320 break; 01321 01322 case h_CON: 01323 parser->index++; 01324 switch (c) { 01325 case 'n': 01326 parser->header_state = h_matching_connection; 01327 break; 01328 case 't': 01329 parser->header_state = h_matching_content_length; 01330 break; 01331 default: 01332 parser->header_state = h_general; 01333 break; 01334 } 01335 break; 01336 01337 /* connection */ 01338 01339 case h_matching_connection: 01340 parser->index++; 01341 if (parser->index > sizeof(CONNECTION)-1 01342 || c != CONNECTION[parser->index]) { 01343 parser->header_state = h_general; 01344 } else if (parser->index == sizeof(CONNECTION)-2) { 01345 parser->header_state = h_connection; 01346 } 01347 break; 01348 01349 /* proxy-connection */ 01350 01351 case h_matching_proxy_connection: 01352 parser->index++; 01353 if (parser->index > sizeof(PROXY_CONNECTION)-1 01354 || c != PROXY_CONNECTION[parser->index]) { 01355 parser->header_state = h_general; 01356 } else if (parser->index == sizeof(PROXY_CONNECTION)-2) { 01357 parser->header_state = h_connection; 01358 } 01359 break; 01360 01361 /* content-length */ 01362 01363 case h_matching_content_length: 01364 parser->index++; 01365 if (parser->index > sizeof(CONTENT_LENGTH)-1 01366 || c != CONTENT_LENGTH[parser->index]) { 01367 parser->header_state = h_general; 01368 } else if (parser->index == sizeof(CONTENT_LENGTH)-2) { 01369 parser->header_state = h_content_length; 01370 } 01371 break; 01372 01373 /* transfer-encoding */ 01374 01375 case h_matching_transfer_encoding: 01376 parser->index++; 01377 if (parser->index > sizeof(TRANSFER_ENCODING)-1 01378 || c != TRANSFER_ENCODING[parser->index]) { 01379 parser->header_state = h_general; 01380 } else if (parser->index == sizeof(TRANSFER_ENCODING)-2) { 01381 parser->header_state = h_transfer_encoding; 01382 } 01383 break; 01384 01385 /* upgrade */ 01386 01387 case h_matching_upgrade: 01388 parser->index++; 01389 if (parser->index > sizeof(UPGRADE)-1 01390 || c != UPGRADE[parser->index]) { 01391 parser->header_state = h_general; 01392 } else if (parser->index == sizeof(UPGRADE)-2) { 01393 parser->header_state = h_upgrade; 01394 } 01395 break; 01396 01397 case h_connection: 01398 case h_content_length: 01399 case h_transfer_encoding: 01400 case h_upgrade: 01401 if (ch != ' ') parser->header_state = h_general; 01402 break; 01403 01404 default: 01405 assert(0 && "Unknown header_state"); 01406 break; 01407 } 01408 } 01409 01410 COUNT_HEADER_SIZE(p - start); 01411 01412 if (p == data + len) { 01413 --p; 01414 break; 01415 } 01416 01417 if (ch == ':') { 01418 UPDATE_STATE(s_header_value_discard_ws); 01419 CALLBACK_DATA(header_field); 01420 break; 01421 } 01422 01423 SET_ERRNO(HPE_INVALID_HEADER_TOKEN); 01424 goto error; 01425 } 01426 01427 case s_header_value_discard_ws: 01428 if (ch == ' ' || ch == '\t') break; 01429 01430 if (ch == CR) { 01431 UPDATE_STATE(s_header_value_discard_ws_almost_done); 01432 break; 01433 } 01434 01435 if (ch == LF) { 01436 UPDATE_STATE(s_header_value_discard_lws); 01437 break; 01438 } 01439 01440 /* FALLTHROUGH */ 01441 01442 case s_header_value_start: 01443 { 01444 MARK(header_value); 01445 01446 UPDATE_STATE(s_header_value); 01447 parser->index = 0; 01448 01449 c = LOWER(ch); 01450 01451 switch (parser->header_state) { 01452 case h_upgrade: 01453 parser->flags |= F_UPGRADE; 01454 parser->header_state = h_general; 01455 break; 01456 01457 case h_transfer_encoding: 01458 /* looking for 'Transfer-Encoding: chunked' */ 01459 if ('c' == c) { 01460 parser->header_state = h_matching_transfer_encoding_chunked; 01461 } else { 01462 parser->header_state = h_general; 01463 } 01464 break; 01465 01466 case h_content_length: 01467 if (UNLIKELY(!IS_NUM(ch))) { 01468 SET_ERRNO(HPE_INVALID_CONTENT_LENGTH); 01469 goto error; 01470 } 01471 01472 if (parser->flags & F_CONTENTLENGTH) { 01473 SET_ERRNO(HPE_UNEXPECTED_CONTENT_LENGTH); 01474 goto error; 01475 } 01476 01477 parser->flags |= F_CONTENTLENGTH; 01478 parser->content_length = ch - '0'; 01479 break; 01480 01481 case h_connection: 01482 /* looking for 'Connection: keep-alive' */ 01483 if (c == 'k') { 01484 parser->header_state = h_matching_connection_keep_alive; 01485 /* looking for 'Connection: close' */ 01486 } else if (c == 'c') { 01487 parser->header_state = h_matching_connection_close; 01488 } else if (c == 'u') { 01489 parser->header_state = h_matching_connection_upgrade; 01490 } else { 01491 parser->header_state = h_matching_connection_token; 01492 } 01493 break; 01494 01495 /* Multi-value `Connection` header */ 01496 case h_matching_connection_token_start: 01497 break; 01498 01499 default: 01500 parser->header_state = h_general; 01501 break; 01502 } 01503 break; 01504 } 01505 01506 case s_header_value: 01507 { 01508 const char* start = p; 01509 enum header_states h_state = (enum header_states) parser->header_state; 01510 for (; p != data + len; p++) { 01511 ch = *p; 01512 if (ch == CR) { 01513 UPDATE_STATE(s_header_almost_done); 01514 parser->header_state = h_state; 01515 CALLBACK_DATA(header_value); 01516 break; 01517 } 01518 01519 if (ch == LF) { 01520 UPDATE_STATE(s_header_almost_done); 01521 COUNT_HEADER_SIZE(p - start); 01522 parser->header_state = h_state; 01523 CALLBACK_DATA_NOADVANCE(header_value); 01524 REEXECUTE(); 01525 } 01526 01527 if (!lenient && !IS_HEADER_CHAR(ch)) { 01528 SET_ERRNO(HPE_INVALID_HEADER_TOKEN); 01529 goto error; 01530 } 01531 01532 c = LOWER(ch); 01533 01534 switch (h_state) { 01535 case h_general: 01536 { 01537 const char* p_cr; 01538 const char* p_lf; 01539 size_t limit = data + len - p; 01540 01541 limit = MIN(limit, HTTP_MAX_HEADER_SIZE); 01542 01543 p_cr = (const char*) memchr(p, CR, limit); 01544 p_lf = (const char*) memchr(p, LF, limit); 01545 if (p_cr != NULL) { 01546 if (p_lf != NULL && p_cr >= p_lf) 01547 p = p_lf; 01548 else 01549 p = p_cr; 01550 } else if (UNLIKELY(p_lf != NULL)) { 01551 p = p_lf; 01552 } else { 01553 p = data + len; 01554 } 01555 --p; 01556 01557 break; 01558 } 01559 01560 case h_connection: 01561 case h_transfer_encoding: 01562 assert(0 && "Shouldn't get here."); 01563 break; 01564 01565 case h_content_length: 01566 { 01567 uint64_t t; 01568 01569 if (ch == ' ') break; 01570 01571 if (UNLIKELY(!IS_NUM(ch))) { 01572 SET_ERRNO(HPE_INVALID_CONTENT_LENGTH); 01573 parser->header_state = h_state; 01574 goto error; 01575 } 01576 01577 t = parser->content_length; 01578 t *= 10; 01579 t += ch - '0'; 01580 01581 /* Overflow? Test against a conservative limit for simplicity. */ 01582 if (UNLIKELY((ULLONG_MAX - 10) / 10 < parser->content_length)) { 01583 SET_ERRNO(HPE_INVALID_CONTENT_LENGTH); 01584 parser->header_state = h_state; 01585 goto error; 01586 } 01587 01588 parser->content_length = t; 01589 break; 01590 } 01591 01592 /* Transfer-Encoding: chunked */ 01593 case h_matching_transfer_encoding_chunked: 01594 parser->index++; 01595 if (parser->index > sizeof(CHUNKED)-1 01596 || c != CHUNKED[parser->index]) { 01597 h_state = h_general; 01598 } else if (parser->index == sizeof(CHUNKED)-2) { 01599 h_state = h_transfer_encoding_chunked; 01600 } 01601 break; 01602 01603 case h_matching_connection_token_start: 01604 /* looking for 'Connection: keep-alive' */ 01605 if (c == 'k') { 01606 h_state = h_matching_connection_keep_alive; 01607 /* looking for 'Connection: close' */ 01608 } else if (c == 'c') { 01609 h_state = h_matching_connection_close; 01610 } else if (c == 'u') { 01611 h_state = h_matching_connection_upgrade; 01612 } else if (STRICT_TOKEN(c)) { 01613 h_state = h_matching_connection_token; 01614 } else if (c == ' ' || c == '\t') { 01615 /* Skip lws */ 01616 } else { 01617 h_state = h_general; 01618 } 01619 break; 01620 01621 /* looking for 'Connection: keep-alive' */ 01622 case h_matching_connection_keep_alive: 01623 parser->index++; 01624 if (parser->index > sizeof(KEEP_ALIVE)-1 01625 || c != KEEP_ALIVE[parser->index]) { 01626 h_state = h_matching_connection_token; 01627 } else if (parser->index == sizeof(KEEP_ALIVE)-2) { 01628 h_state = h_connection_keep_alive; 01629 } 01630 break; 01631 01632 /* looking for 'Connection: close' */ 01633 case h_matching_connection_close: 01634 parser->index++; 01635 if (parser->index > sizeof(CLOSE)-1 || c != CLOSE[parser->index]) { 01636 h_state = h_matching_connection_token; 01637 } else if (parser->index == sizeof(CLOSE)-2) { 01638 h_state = h_connection_close; 01639 } 01640 break; 01641 01642 /* looking for 'Connection: upgrade' */ 01643 case h_matching_connection_upgrade: 01644 parser->index++; 01645 if (parser->index > sizeof(UPGRADE) - 1 || 01646 c != UPGRADE[parser->index]) { 01647 h_state = h_matching_connection_token; 01648 } else if (parser->index == sizeof(UPGRADE)-2) { 01649 h_state = h_connection_upgrade; 01650 } 01651 break; 01652 01653 case h_matching_connection_token: 01654 if (ch == ',') { 01655 h_state = h_matching_connection_token_start; 01656 parser->index = 0; 01657 } 01658 break; 01659 01660 case h_transfer_encoding_chunked: 01661 if (ch != ' ') h_state = h_general; 01662 break; 01663 01664 case h_connection_keep_alive: 01665 case h_connection_close: 01666 case h_connection_upgrade: 01667 if (ch == ',') { 01668 if (h_state == h_connection_keep_alive) { 01669 parser->flags |= F_CONNECTION_KEEP_ALIVE; 01670 } else if (h_state == h_connection_close) { 01671 parser->flags |= F_CONNECTION_CLOSE; 01672 } else if (h_state == h_connection_upgrade) { 01673 parser->flags |= F_CONNECTION_UPGRADE; 01674 } 01675 h_state = h_matching_connection_token_start; 01676 parser->index = 0; 01677 } else if (ch != ' ') { 01678 h_state = h_matching_connection_token; 01679 } 01680 break; 01681 01682 default: 01683 UPDATE_STATE(s_header_value); 01684 h_state = h_general; 01685 break; 01686 } 01687 } 01688 parser->header_state = h_state; 01689 01690 COUNT_HEADER_SIZE(p - start); 01691 01692 if (p == data + len) 01693 --p; 01694 break; 01695 } 01696 01697 case s_header_almost_done: 01698 { 01699 if (UNLIKELY(ch != LF)) { 01700 SET_ERRNO(HPE_LF_EXPECTED); 01701 goto error; 01702 } 01703 01704 UPDATE_STATE(s_header_value_lws); 01705 break; 01706 } 01707 01708 case s_header_value_lws: 01709 { 01710 if (ch == ' ' || ch == '\t') { 01711 UPDATE_STATE(s_header_value_start); 01712 REEXECUTE(); 01713 } 01714 01715 /* finished the header */ 01716 switch (parser->header_state) { 01717 case h_connection_keep_alive: 01718 parser->flags |= F_CONNECTION_KEEP_ALIVE; 01719 break; 01720 case h_connection_close: 01721 parser->flags |= F_CONNECTION_CLOSE; 01722 break; 01723 case h_transfer_encoding_chunked: 01724 parser->flags |= F_CHUNKED; 01725 break; 01726 case h_connection_upgrade: 01727 parser->flags |= F_CONNECTION_UPGRADE; 01728 break; 01729 default: 01730 break; 01731 } 01732 01733 UPDATE_STATE(s_header_field_start); 01734 REEXECUTE(); 01735 } 01736 01737 case s_header_value_discard_ws_almost_done: 01738 { 01739 STRICT_CHECK(ch != LF); 01740 UPDATE_STATE(s_header_value_discard_lws); 01741 break; 01742 } 01743 01744 case s_header_value_discard_lws: 01745 { 01746 if (ch == ' ' || ch == '\t') { 01747 UPDATE_STATE(s_header_value_discard_ws); 01748 break; 01749 } else { 01750 switch (parser->header_state) { 01751 case h_connection_keep_alive: 01752 parser->flags |= F_CONNECTION_KEEP_ALIVE; 01753 break; 01754 case h_connection_close: 01755 parser->flags |= F_CONNECTION_CLOSE; 01756 break; 01757 case h_connection_upgrade: 01758 parser->flags |= F_CONNECTION_UPGRADE; 01759 break; 01760 case h_transfer_encoding_chunked: 01761 parser->flags |= F_CHUNKED; 01762 break; 01763 default: 01764 break; 01765 } 01766 01767 /* header value was empty */ 01768 MARK(header_value); 01769 UPDATE_STATE(s_header_field_start); 01770 CALLBACK_DATA_NOADVANCE(header_value); 01771 REEXECUTE(); 01772 } 01773 } 01774 01775 case s_headers_almost_done: 01776 { 01777 STRICT_CHECK(ch != LF); 01778 01779 if (parser->flags & F_TRAILING) { 01780 /* End of a chunked request */ 01781 UPDATE_STATE(s_message_done); 01782 CALLBACK_NOTIFY_NOADVANCE(chunk_complete); 01783 REEXECUTE(); 01784 } 01785 01786 /* Cannot use chunked encoding and a content-length header together 01787 per the HTTP specification. */ 01788 if ((parser->flags & F_CHUNKED) && 01789 (parser->flags & F_CONTENTLENGTH)) { 01790 SET_ERRNO(HPE_UNEXPECTED_CONTENT_LENGTH); 01791 goto error; 01792 } 01793 01794 UPDATE_STATE(s_headers_done); 01795 01796 /* Set this here so that on_headers_complete() callbacks can see it */ 01797 parser->upgrade = 01798 ((parser->flags & (F_UPGRADE | F_CONNECTION_UPGRADE)) == 01799 (F_UPGRADE | F_CONNECTION_UPGRADE) || 01800 parser->method == HTTP_CONNECT); 01801 01802 /* Here we call the headers_complete callback. This is somewhat 01803 * different than other callbacks because if the user returns 1, we 01804 * will interpret that as saying that this message has no body. This 01805 * is needed for the annoying case of recieving a response to a HEAD 01806 * request. 01807 * 01808 * We'd like to use CALLBACK_NOTIFY_NOADVANCE() here but we cannot, so 01809 * we have to simulate it by handling a change in errno below. 01810 */ 01811 if (settings->on_headers_complete) { 01812 switch (settings->on_headers_complete(parser)) { 01813 case 0: 01814 break; 01815 01816 case 2: 01817 parser->upgrade = 1; 01818 01819 case 1: 01820 parser->flags |= F_SKIPBODY; 01821 break; 01822 01823 default: 01824 SET_ERRNO(HPE_CB_headers_complete); 01825 RETURN(p - data); /* Error */ 01826 } 01827 } 01828 01829 if (HTTP_PARSER_ERRNO(parser) != HPE_OK) { 01830 RETURN(p - data); 01831 } 01832 01833 REEXECUTE(); 01834 } 01835 01836 case s_headers_done: 01837 { 01838 int hasBody; 01839 STRICT_CHECK(ch != LF); 01840 01841 parser->nread = 0; 01842 01843 hasBody = parser->flags & F_CHUNKED || 01844 (parser->content_length > 0 && parser->content_length != ULLONG_MAX); 01845 if (parser->upgrade && (parser->method == HTTP_CONNECT || 01846 (parser->flags & F_SKIPBODY) || !hasBody)) { 01847 /* Exit, the rest of the message is in a different protocol. */ 01848 UPDATE_STATE(NEW_MESSAGE()); 01849 CALLBACK_NOTIFY(message_complete); 01850 RETURN((p - data) + 1); 01851 } 01852 01853 if (parser->flags & F_SKIPBODY) { 01854 UPDATE_STATE(NEW_MESSAGE()); 01855 CALLBACK_NOTIFY(message_complete); 01856 } else if (parser->flags & F_CHUNKED) { 01857 /* chunked encoding - ignore Content-Length header */ 01858 UPDATE_STATE(s_chunk_size_start); 01859 } else { 01860 if (parser->content_length == 0) { 01861 /* Content-Length header given but zero: Content-Length: 0\r\n */ 01862 UPDATE_STATE(NEW_MESSAGE()); 01863 CALLBACK_NOTIFY(message_complete); 01864 } else if (parser->content_length != ULLONG_MAX) { 01865 /* Content-Length header given and non-zero */ 01866 UPDATE_STATE(s_body_identity); 01867 } else { 01868 if (!http_message_needs_eof(parser)) { 01869 /* Assume content-length 0 - read the next */ 01870 UPDATE_STATE(NEW_MESSAGE()); 01871 CALLBACK_NOTIFY(message_complete); 01872 } else { 01873 /* Read body until EOF */ 01874 UPDATE_STATE(s_body_identity_eof); 01875 } 01876 } 01877 } 01878 01879 break; 01880 } 01881 01882 case s_body_identity: 01883 { 01884 uint64_t to_read = MIN(parser->content_length, 01885 (uint64_t) ((data + len) - p)); 01886 01887 assert(parser->content_length != 0 01888 && parser->content_length != ULLONG_MAX); 01889 01890 /* The difference between advancing content_length and p is because 01891 * the latter will automaticaly advance on the next loop iteration. 01892 * Further, if content_length ends up at 0, we want to see the last 01893 * byte again for our message complete callback. 01894 */ 01895 MARK(body); 01896 parser->content_length -= to_read; 01897 p += to_read - 1; 01898 01899 if (parser->content_length == 0) { 01900 UPDATE_STATE(s_message_done); 01901 01902 /* Mimic CALLBACK_DATA_NOADVANCE() but with one extra byte. 01903 * 01904 * The alternative to doing this is to wait for the next byte to 01905 * trigger the data callback, just as in every other case. The 01906 * problem with this is that this makes it difficult for the test 01907 * harness to distinguish between complete-on-EOF and 01908 * complete-on-length. It's not clear that this distinction is 01909 * important for applications, but let's keep it for now. 01910 */ 01911 CALLBACK_DATA_(body, p - body_mark + 1, p - data); 01912 REEXECUTE(); 01913 } 01914 01915 break; 01916 } 01917 01918 /* read until EOF */ 01919 case s_body_identity_eof: 01920 MARK(body); 01921 p = data + len - 1; 01922 01923 break; 01924 01925 case s_message_done: 01926 UPDATE_STATE(NEW_MESSAGE()); 01927 CALLBACK_NOTIFY(message_complete); 01928 if (parser->upgrade) { 01929 /* Exit, the rest of the message is in a different protocol. */ 01930 RETURN((p - data) + 1); 01931 } 01932 break; 01933 01934 case s_chunk_size_start: 01935 { 01936 assert(parser->nread == 1); 01937 assert(parser->flags & F_CHUNKED); 01938 01939 unhex_val = unhex[(unsigned char)ch]; 01940 if (UNLIKELY(unhex_val == -1)) { 01941 SET_ERRNO(HPE_INVALID_CHUNK_SIZE); 01942 goto error; 01943 } 01944 01945 parser->content_length = unhex_val; 01946 UPDATE_STATE(s_chunk_size); 01947 break; 01948 } 01949 01950 case s_chunk_size: 01951 { 01952 uint64_t t; 01953 01954 assert(parser->flags & F_CHUNKED); 01955 01956 if (ch == CR) { 01957 UPDATE_STATE(s_chunk_size_almost_done); 01958 break; 01959 } 01960 01961 unhex_val = unhex[(unsigned char)ch]; 01962 01963 if (unhex_val == -1) { 01964 if (ch == ';' || ch == ' ') { 01965 UPDATE_STATE(s_chunk_parameters); 01966 break; 01967 } 01968 01969 SET_ERRNO(HPE_INVALID_CHUNK_SIZE); 01970 goto error; 01971 } 01972 01973 t = parser->content_length; 01974 t *= 16; 01975 t += unhex_val; 01976 01977 /* Overflow? Test against a conservative limit for simplicity. */ 01978 if (UNLIKELY((ULLONG_MAX - 16) / 16 < parser->content_length)) { 01979 SET_ERRNO(HPE_INVALID_CONTENT_LENGTH); 01980 goto error; 01981 } 01982 01983 parser->content_length = t; 01984 break; 01985 } 01986 01987 case s_chunk_parameters: 01988 { 01989 assert(parser->flags & F_CHUNKED); 01990 /* just ignore this shit. TODO check for overflow */ 01991 if (ch == CR) { 01992 UPDATE_STATE(s_chunk_size_almost_done); 01993 break; 01994 } 01995 break; 01996 } 01997 01998 case s_chunk_size_almost_done: 01999 { 02000 assert(parser->flags & F_CHUNKED); 02001 STRICT_CHECK(ch != LF); 02002 02003 parser->nread = 0; 02004 02005 if (parser->content_length == 0) { 02006 parser->flags |= F_TRAILING; 02007 UPDATE_STATE(s_header_field_start); 02008 } else { 02009 UPDATE_STATE(s_chunk_data); 02010 } 02011 CALLBACK_NOTIFY(chunk_header); 02012 break; 02013 } 02014 02015 case s_chunk_data: 02016 { 02017 uint64_t to_read = MIN(parser->content_length, 02018 (uint64_t) ((data + len) - p)); 02019 02020 assert(parser->flags & F_CHUNKED); 02021 assert(parser->content_length != 0 02022 && parser->content_length != ULLONG_MAX); 02023 02024 /* See the explanation in s_body_identity for why the content 02025 * length and data pointers are managed this way. 02026 */ 02027 MARK(body); 02028 parser->content_length -= to_read; 02029 p += to_read - 1; 02030 02031 if (parser->content_length == 0) { 02032 UPDATE_STATE(s_chunk_data_almost_done); 02033 } 02034 02035 break; 02036 } 02037 02038 case s_chunk_data_almost_done: 02039 assert(parser->flags & F_CHUNKED); 02040 assert(parser->content_length == 0); 02041 STRICT_CHECK(ch != CR); 02042 UPDATE_STATE(s_chunk_data_done); 02043 CALLBACK_DATA(body); 02044 break; 02045 02046 case s_chunk_data_done: 02047 assert(parser->flags & F_CHUNKED); 02048 STRICT_CHECK(ch != LF); 02049 parser->nread = 0; 02050 UPDATE_STATE(s_chunk_size_start); 02051 CALLBACK_NOTIFY(chunk_complete); 02052 break; 02053 02054 default: 02055 assert(0 && "unhandled state"); 02056 SET_ERRNO(HPE_INVALID_INTERNAL_STATE); 02057 goto error; 02058 } 02059 } 02060 02061 /* Run callbacks for any marks that we have leftover after we ran our of 02062 * bytes. There should be at most one of these set, so it's OK to invoke 02063 * them in series (unset marks will not result in callbacks). 02064 * 02065 * We use the NOADVANCE() variety of callbacks here because 'p' has already 02066 * overflowed 'data' and this allows us to correct for the off-by-one that 02067 * we'd otherwise have (since CALLBACK_DATA() is meant to be run with a 'p' 02068 * value that's in-bounds). 02069 */ 02070 02071 assert(((header_field_mark ? 1 : 0) + 02072 (header_value_mark ? 1 : 0) + 02073 (url_mark ? 1 : 0) + 02074 (body_mark ? 1 : 0) + 02075 (status_mark ? 1 : 0)) <= 1); 02076 02077 CALLBACK_DATA_NOADVANCE(header_field); 02078 CALLBACK_DATA_NOADVANCE(header_value); 02079 CALLBACK_DATA_NOADVANCE(url); 02080 CALLBACK_DATA_NOADVANCE(body); 02081 CALLBACK_DATA_NOADVANCE(status); 02082 02083 RETURN(len); 02084 02085 error: 02086 if (HTTP_PARSER_ERRNO(parser) == HPE_OK) { 02087 SET_ERRNO(HPE_UNKNOWN); 02088 } 02089 02090 RETURN(p - data); 02091 } 02092 02093 02094 /* Does the parser need to see an EOF to find the end of the message? */ 02095 int 02096 http_message_needs_eof (const http_parser *parser) 02097 { 02098 if (parser->type == HTTP_REQUEST) { 02099 return 0; 02100 } 02101 02102 /* See RFC 2616 section 4.4 */ 02103 if (parser->status_code / 100 == 1 || /* 1xx e.g. Continue */ 02104 parser->status_code == 204 || /* No Content */ 02105 parser->status_code == 304 || /* Not Modified */ 02106 parser->flags & F_SKIPBODY) { /* response to a HEAD request */ 02107 return 0; 02108 } 02109 02110 if ((parser->flags & F_CHUNKED) || parser->content_length != ULLONG_MAX) { 02111 return 0; 02112 } 02113 02114 return 1; 02115 } 02116 02117 02118 int 02119 http_should_keep_alive (const http_parser *parser) 02120 { 02121 if (parser->http_major > 0 && parser->http_minor > 0) { 02122 /* HTTP/1.1 */ 02123 if (parser->flags & F_CONNECTION_CLOSE) { 02124 return 0; 02125 } 02126 } else { 02127 /* HTTP/1.0 or earlier */ 02128 if (!(parser->flags & F_CONNECTION_KEEP_ALIVE)) { 02129 return 0; 02130 } 02131 } 02132 02133 return !http_message_needs_eof(parser); 02134 } 02135 02136 02137 const char * 02138 http_method_str (enum http_method m) 02139 { 02140 return ELEM_AT(method_strings, m, "<unknown>"); 02141 } 02142 02143 02144 void 02145 http_parser_init (http_parser *parser, enum http_parser_type t) 02146 { 02147 void *data = parser->data; /* preserve application data */ 02148 memset(parser, 0, sizeof(*parser)); 02149 parser->data = data; 02150 parser->type = t; 02151 parser->state = (t == HTTP_REQUEST ? s_start_req : (t == HTTP_RESPONSE ? s_start_res : s_start_req_or_res)); 02152 parser->http_errno = HPE_OK; 02153 } 02154 02155 void 02156 http_parser_settings_init(http_parser_settings *settings) 02157 { 02158 memset(settings, 0, sizeof(*settings)); 02159 } 02160 02161 const char * 02162 http_errno_name(enum http_errno err) { 02163 assert(((size_t) err) < ARRAY_SIZE(http_strerror_tab)); 02164 return http_strerror_tab[err].name; 02165 } 02166 02167 const char * 02168 http_errno_description(enum http_errno err) { 02169 assert(((size_t) err) < ARRAY_SIZE(http_strerror_tab)); 02170 return http_strerror_tab[err].description; 02171 } 02172 02173 static enum http_host_state 02174 http_parse_host_char(enum http_host_state s, const char ch) { 02175 switch(s) { 02176 case s_http_userinfo: 02177 case s_http_userinfo_start: 02178 if (ch == '@') { 02179 return s_http_host_start; 02180 } 02181 02182 if (IS_USERINFO_CHAR(ch)) { 02183 return s_http_userinfo; 02184 } 02185 break; 02186 02187 case s_http_host_start: 02188 if (ch == '[') { 02189 return s_http_host_v6_start; 02190 } 02191 02192 if (IS_HOST_CHAR(ch)) { 02193 return s_http_host; 02194 } 02195 02196 break; 02197 02198 case s_http_host: 02199 if (IS_HOST_CHAR(ch)) { 02200 return s_http_host; 02201 } 02202 02203 /* FALLTHROUGH */ 02204 case s_http_host_v6_end: 02205 if (ch == ':') { 02206 return s_http_host_port_start; 02207 } 02208 02209 break; 02210 02211 case s_http_host_v6: 02212 if (ch == ']') { 02213 return s_http_host_v6_end; 02214 } 02215 02216 /* FALLTHROUGH */ 02217 case s_http_host_v6_start: 02218 if (IS_HEX(ch) || ch == ':' || ch == '.') { 02219 return s_http_host_v6; 02220 } 02221 02222 if (s == s_http_host_v6 && ch == '%') { 02223 return s_http_host_v6_zone_start; 02224 } 02225 break; 02226 02227 case s_http_host_v6_zone: 02228 if (ch == ']') { 02229 return s_http_host_v6_end; 02230 } 02231 02232 /* FALLTHROUGH */ 02233 case s_http_host_v6_zone_start: 02234 /* RFC 6874 Zone ID consists of 1*( unreserved / pct-encoded) */ 02235 if (IS_ALPHANUM(ch) || ch == '%' || ch == '.' || ch == '-' || ch == '_' || 02236 ch == '~') { 02237 return s_http_host_v6_zone; 02238 } 02239 break; 02240 02241 case s_http_host_port: 02242 case s_http_host_port_start: 02243 if (IS_NUM(ch)) { 02244 return s_http_host_port; 02245 } 02246 02247 break; 02248 02249 default: 02250 break; 02251 } 02252 return s_http_host_dead; 02253 } 02254 02255 static int 02256 http_parse_host(const char * buf, struct http_parser_url *u, int found_at) { 02257 enum http_host_state s; 02258 02259 const char *p; 02260 size_t buflen = u->field_data[UF_HOST].off + u->field_data[UF_HOST].len; 02261 02262 assert(u->field_set & (1 << UF_HOST)); 02263 02264 u->field_data[UF_HOST].len = 0; 02265 02266 s = found_at ? s_http_userinfo_start : s_http_host_start; 02267 02268 for (p = buf + u->field_data[UF_HOST].off; p < buf + buflen; p++) { 02269 enum http_host_state new_s = http_parse_host_char(s, *p); 02270 02271 if (new_s == s_http_host_dead) { 02272 return 1; 02273 } 02274 02275 switch(new_s) { 02276 case s_http_host: 02277 if (s != s_http_host) { 02278 u->field_data[UF_HOST].off = p - buf; 02279 } 02280 u->field_data[UF_HOST].len++; 02281 break; 02282 02283 case s_http_host_v6: 02284 if (s != s_http_host_v6) { 02285 u->field_data[UF_HOST].off = p - buf; 02286 } 02287 u->field_data[UF_HOST].len++; 02288 break; 02289 02290 case s_http_host_v6_zone_start: 02291 case s_http_host_v6_zone: 02292 u->field_data[UF_HOST].len++; 02293 break; 02294 02295 case s_http_host_port: 02296 if (s != s_http_host_port) { 02297 u->field_data[UF_PORT].off = p - buf; 02298 u->field_data[UF_PORT].len = 0; 02299 u->field_set |= (1 << UF_PORT); 02300 } 02301 u->field_data[UF_PORT].len++; 02302 break; 02303 02304 case s_http_userinfo: 02305 if (s != s_http_userinfo) { 02306 u->field_data[UF_USERINFO].off = p - buf ; 02307 u->field_data[UF_USERINFO].len = 0; 02308 u->field_set |= (1 << UF_USERINFO); 02309 } 02310 u->field_data[UF_USERINFO].len++; 02311 break; 02312 02313 default: 02314 break; 02315 } 02316 s = new_s; 02317 } 02318 02319 /* Make sure we don't end somewhere unexpected */ 02320 switch (s) { 02321 case s_http_host_start: 02322 case s_http_host_v6_start: 02323 case s_http_host_v6: 02324 case s_http_host_v6_zone_start: 02325 case s_http_host_v6_zone: 02326 case s_http_host_port_start: 02327 case s_http_userinfo: 02328 case s_http_userinfo_start: 02329 return 1; 02330 default: 02331 break; 02332 } 02333 02334 return 0; 02335 } 02336 02337 void 02338 http_parser_url_init(struct http_parser_url *u) { 02339 memset(u, 0, sizeof(*u)); 02340 } 02341 02342 int 02343 http_parser_parse_url(const char *buf, size_t buflen, int is_connect, 02344 struct http_parser_url *u) 02345 { 02346 enum state s; 02347 const char *p; 02348 enum http_parser_url_fields uf, old_uf; 02349 int found_at = 0; 02350 02351 u->port = u->field_set = 0; 02352 s = is_connect ? s_req_server_start : s_req_spaces_before_url; 02353 old_uf = UF_MAX; 02354 02355 for (p = buf; p < buf + buflen; p++) { 02356 s = parse_url_char(s, *p); 02357 02358 /* Figure out the next field that we're operating on */ 02359 switch (s) { 02360 case s_dead: 02361 return 1; 02362 02363 /* Skip delimeters */ 02364 case s_req_schema_slash: 02365 case s_req_schema_slash_slash: 02366 case s_req_server_start: 02367 case s_req_query_string_start: 02368 case s_req_fragment_start: 02369 continue; 02370 02371 case s_req_schema: 02372 uf = UF_SCHEMA; 02373 break; 02374 02375 case s_req_server_with_at: 02376 found_at = 1; 02377 02378 /* FALLTROUGH */ 02379 case s_req_server: 02380 uf = UF_HOST; 02381 break; 02382 02383 case s_req_path: 02384 uf = UF_PATH; 02385 break; 02386 02387 case s_req_query_string: 02388 uf = UF_QUERY; 02389 break; 02390 02391 case s_req_fragment: 02392 uf = UF_FRAGMENT; 02393 break; 02394 02395 default: 02396 assert(!"Unexpected state"); 02397 return 1; 02398 } 02399 02400 /* Nothing's changed; soldier on */ 02401 if (uf == old_uf) { 02402 u->field_data[uf].len++; 02403 continue; 02404 } 02405 02406 u->field_data[uf].off = p - buf; 02407 u->field_data[uf].len = 1; 02408 02409 u->field_set |= (1 << uf); 02410 old_uf = uf; 02411 } 02412 02413 /* host must be present if there is a schema */ 02414 /* parsing http:///toto will fail */ 02415 if ((u->field_set & (1 << UF_SCHEMA)) && 02416 (u->field_set & (1 << UF_HOST)) == 0) { 02417 return 1; 02418 } 02419 02420 if (u->field_set & (1 << UF_HOST)) { 02421 if (http_parse_host(buf, u, found_at) != 0) { 02422 return 1; 02423 } 02424 } 02425 02426 /* CONNECT requests can only contain "hostname:port" */ 02427 if (is_connect && u->field_set != ((1 << UF_HOST)|(1 << UF_PORT))) { 02428 return 1; 02429 } 02430 02431 if (u->field_set & (1 << UF_PORT)) { 02432 /* Don't bother with endp; we've already validated the string */ 02433 unsigned long v = strtoul(buf + u->field_data[UF_PORT].off, NULL, 10); 02434 02435 /* Ports have a max value of 2^16 */ 02436 if (v > 0xffff) { 02437 return 1; 02438 } 02439 02440 u->port = (uint16_t) v; 02441 } 02442 02443 return 0; 02444 } 02445 02446 void 02447 http_parser_pause(http_parser *parser, int paused) { 02448 /* Users should only be pausing/unpausing a parser that is not in an error 02449 * state. In non-debug builds, there's not much that we can do about this 02450 * other than ignore it. 02451 */ 02452 if (HTTP_PARSER_ERRNO(parser) == HPE_OK || 02453 HTTP_PARSER_ERRNO(parser) == HPE_PAUSED) { 02454 SET_ERRNO((paused) ? HPE_PAUSED : HPE_OK); 02455 } else { 02456 assert(0 && "Attempting to pause parser in error state"); 02457 } 02458 } 02459 02460 int 02461 http_body_is_final(const struct http_parser *parser) { 02462 return parser->state == s_message_done; 02463 } 02464 02465 unsigned long 02466 http_parser_version(void) { 02467 return HTTP_PARSER_VERSION_MAJOR * 0x10000 | 02468 HTTP_PARSER_VERSION_MINOR * 0x00100 | 02469 HTTP_PARSER_VERSION_PATCH * 0x00001; 02470 }
Generated on Fri Jul 15 2022 18:07:13 by
