|
9 | 9 | import tempfile |
10 | 10 | from io import BytesIO |
11 | 11 | from numbers import Number |
| 12 | +from email.message import Message |
| 13 | +from typing import Dict, Union, Tuple |
12 | 14 |
|
13 | 15 | # Unique missing object. |
14 | 16 | _missing = object() |
|
76 | 78 | QUOTE = b'"'[0] |
77 | 79 |
|
78 | 80 |
|
79 | | -def parse_options_header(value): |
| 81 | +def parse_options_header(value: Union[str, bytes]) -> Tuple[bytes, Dict[bytes, bytes]]: |
80 | 82 | """ |
81 | 83 | Parses a Content-Type header into a value in the following format: |
82 | 84 | (content_type, {parameters}) |
83 | 85 | """ |
| 86 | + # Uses email.message.Message to parse the header as described in PEP 594. |
| 87 | + # Ref: https://peps.python.org/pep-0594/#cgi |
84 | 88 | if not value: |
85 | 89 | return (b'', {}) |
86 | 90 |
|
87 | | - # If we are passed a string, we assume that it conforms to WSGI and does |
88 | | - # not contain any code point that's not in latin-1. |
89 | | - if isinstance(value, str): # pragma: no cover |
90 | | - value = value.encode('latin-1') |
| 91 | + # If we are passed bytes, we assume that it conforms to WSGI, encoding in latin-1. |
| 92 | + if isinstance(value, bytes): # pragma: no cover |
| 93 | + value = value.decode('latin-1') |
| 94 | + |
| 95 | + # For types |
| 96 | + assert isinstance(value, str), 'Value should be a string by now' |
91 | 97 |
|
92 | 98 | # If we have no options, return the string as-is. |
93 | | - if b';' not in value: |
94 | | - return (value.lower().strip(), {}) |
| 99 | + if ";" not in value: |
| 100 | + return (value.lower().strip().encode('latin-1'), {}) |
95 | 101 |
|
96 | 102 | # Split at the first semicolon, to get our value and then options. |
97 | | - ctype, rest = value.split(b';', 1) |
| 103 | + # ctype, rest = value.split(b';', 1) |
| 104 | + message = Message() |
| 105 | + message['content-type'] = value |
| 106 | + params = message.get_params() |
| 107 | + # If there were no parameters, this would have already returned above |
| 108 | + assert params, 'At least the content type value should be present' |
| 109 | + ctype = params.pop(0)[0].encode('latin-1') |
98 | 110 | options = {} |
99 | | - |
100 | | - # Parse the options. |
101 | | - for match in OPTION_RE.finditer(rest): |
102 | | - key = match.group(1).lower() |
103 | | - value = match.group(2) |
104 | | - if value[0] == QUOTE and value[-1] == QUOTE: |
105 | | - # Unquote the value. |
106 | | - value = value[1:-1] |
107 | | - value = value.replace(b'\\\\', b'\\').replace(b'\\"', b'"') |
108 | | - |
| 111 | + for param in params: |
| 112 | + key, value = param |
109 | 113 | # If the value is a filename, we need to fix a bug on IE6 that sends |
110 | 114 | # the full file path instead of the filename. |
111 | | - if key == b'filename': |
112 | | - if value[1:3] == b':\\' or value[:2] == b'\\\\': |
113 | | - value = value.split(b'\\')[-1] |
114 | | - |
115 | | - options[key] = value |
116 | | - |
| 115 | + if key == 'filename': |
| 116 | + if value[1:3] == ':\\' or value[:2] == '\\\\': |
| 117 | + value = value.split('\\')[-1] |
| 118 | + options[key.encode('latin-1')] = value.encode('latin-1') |
117 | 119 | return ctype, options |
118 | 120 |
|
119 | 121 |
|
|
0 commit comments