diff --git a/CHANGES/12231.bugfix.rst b/CHANGES/12231.bugfix.rst new file mode 100644 index 00000000000..cd74bd1e7e5 --- /dev/null +++ b/CHANGES/12231.bugfix.rst @@ -0,0 +1,2 @@ +Adjusted pure-Python request header value validation to align with RFC 9110 control-character handling, while preserving lax response parser behavior, and added regression tests for Host/header control-character cases. +-- by :user:`rodrigobnogueira`. diff --git a/aiohttp/http_parser.py b/aiohttp/http_parser.py index 02d253ac871..b9d527503b3 100644 --- a/aiohttp/http_parser.py +++ b/aiohttp/http_parser.py @@ -67,6 +67,10 @@ # token = 1*tchar _TCHAR_SPECIALS: Final[str] = re.escape("!#$%&'*+-.^_`|~") TOKENRE: Final[Pattern[str]] = re.compile(f"[0-9A-Za-z{_TCHAR_SPECIALS}]+") +# https://www.rfc-editor.org/rfc/rfc9110#section-5.5-5 +_FIELD_VALUE_FORBIDDEN_CTL_RE: Final[Pattern[str]] = re.compile( + r"[\x00-\x08\x0a-\x1f\x7f]" +) VERSRE: Final[Pattern[str]] = re.compile(r"HTTP/(\d)\.(\d)", re.ASCII) DIGITS: Final[Pattern[str]] = re.compile(r"\d+", re.ASCII) HEXDIGITS: Final[Pattern[bytes]] = re.compile(rb"[0-9a-fA-F]+") @@ -184,7 +188,10 @@ def parse_headers( value = bvalue.decode("utf-8", "surrogateescape") # https://www.rfc-editor.org/rfc/rfc9110.html#section-5.5-5 - if "\n" in value or "\r" in value or "\x00" in value: + if self._lax: + if "\n" in value or "\r" in value or "\x00" in value: + raise InvalidHeader(bvalue) + elif _FIELD_VALUE_FORBIDDEN_CTL_RE.search(value): raise InvalidHeader(bvalue) headers.add(name, value) diff --git a/tests/test_http_parser.py b/tests/test_http_parser.py index 28fc3aac03d..b20be7434a4 100644 --- a/tests/test_http_parser.py +++ b/tests/test_http_parser.py @@ -235,6 +235,9 @@ def test_bad_header_name( "Foo : bar", # https://www.rfc-editor.org/rfc/rfc9112.html#section-5.1-2 "Foo\t: bar", "\xffoo: bar", + "Foo: abc\x01def", # CTL bytes forbidden per RFC 9110 ยง5.5 + "Foo: abc\x7fdef", # DEL is also a CTL byte + "Foo: abc\x1fdef", ), ) def test_bad_headers(parser: HttpRequestParser, hdr: str) -> None: @@ -243,6 +246,13 @@ def test_bad_headers(parser: HttpRequestParser, hdr: str) -> None: parser.feed_data(text) +def test_ctl_host_header_bad_characters(parser: HttpRequestParser) -> None: + """CTL byte in Host header must be rejected.""" + text = b"GET /test HTTP/1.1\r\nHost: trusted.example\x01@bad.test\r\n\r\n" + with pytest.raises(http_exceptions.BadHttpMessage): + parser.feed_data(text) + + def test_unpaired_surrogate_in_header_py( loop: asyncio.AbstractEventLoop, protocol: BaseProtocol ) -> None: