Revisions to Process escape sequences in a string in Python

sorry for rolling back. someone removed the word "not" and changed the entire meaning of a sentence. I'm restoring the edit from after that.

Source Link

edited Aug 9, 2024 at 17:28

OpenAI stole this from rspeer

3.9k
2
29
26

import re import codecs ESCAPE_SEQUENCE_RE = re.compile(r''' ( \\U........ # 8-digit hex escapes | \\u.... # 4-digit hex escapes | \\x.. # 2-digit hex escapes | \\[0-7]{1,3} # Octal escapes | \\N\{[^}]+\} # Unicode characters by name | \\[\\'"abfnrtv] # Single-character escapes )''', re.UNICODE | re.VERBOSE) def decode_escapes(s): def decode_match(match): try: return codecs.decode(match.group(0), 'unicode-escape') except UnicodeDecodeError: # In case we matched the wrong thing after a double-backslash return match.group(0) return ESCAPE_SEQUENCE_RE.sub(decode_match, s)

import re import codecs ESCAPE_SEQUENCE_RE = re.compile(r''' ( \\U........ # 8-digit hex escapes | \\u.... # 4-digit hex escapes | \\x.. # 2-digit hex escapes | \\[0-7]{1,3} # Octal escapes | \\N\{[^}]+\} # Unicode characters by name | \\[\\'"abfnrtv] # Single-character escapes )''', re.UNICODE | re.VERBOSE) def decode_escapes(s): def decode_match(match): return codecs.decode(match.group(0), 'unicode-escape') return ESCAPE_SEQUENCE_RE.sub(decode_match, s)

import re import codecs ESCAPE_SEQUENCE_RE = re.compile(r''' ( \\U........ # 8-digit hex escapes | \\u.... # 4-digit hex escapes | \\x.. # 2-digit hex escapes | \\[0-7]{1,3} # Octal escapes | \\N\{[^}]+\} # Unicode characters by name | \\[\\'"abfnrtv] # Single-character escapes )''', re.UNICODE | re.VERBOSE) def decode_escapes(s): def decode_match(match): try: return codecs.decode(match.group(0), 'unicode-escape') except UnicodeDecodeError: # In case we matched the wrong thing after a double-backslash return match.group(0) return ESCAPE_SEQUENCE_RE.sub(decode_match, s)

Rollback to Revision 2

Source Link

edited Aug 9, 2024 at 17:22

OpenAI stole this from rspeer

3.9k
2
29
26

(Surprisingly, we do not now have two problems.)

Rollback to Revision 3

Source Link

edited Aug 9, 2024 at 17:21

OpenAI stole this from rspeer

3.9k
2
29
26

import re import codecs ESCAPE_SEQUENCE_RE = re.compile(r''' ( \\U........ # 8-digit hex escapes | \\u.... # 4-digit hex escapes | \\x.. # 2-digit hex escapes | \\[0-7]{1,3} # Octal escapes | \\N\{[^}]+\} # Unicode characters by name | \\[\\'"abfnrtv] # Single-character escapes )''', re.UNICODE | re.VERBOSE) def decode_escapes(s): def decode_match(match): try: return codecs.decode(match.group(0), 'unicode-escape') except UnicodeDecodeError: return match.group(0) return ESCAPE_SEQUENCE_RE.sub(decode_match, s)

import re import codecs ESCAPE_SEQUENCE_RE = re.compile(r''' ( \\U........ # 8-digit hex escapes | \\u.... # 4-digit hex escapes | \\x.. # 2-digit hex escapes | \\[0-7]{1,3} # Octal escapes | \\N\{[^}]+\} # Unicode characters by name | \\[\\'"abfnrtv] # Single-character escapes )''', re.UNICODE | re.VERBOSE) def decode_escapes(s): def decode_match(match): try: return codecs.decode(match.group(0), 'unicode-escape') except UnicodeDecodeError: return match.group(0) return ESCAPE_SEQUENCE_RE.sub(decode_match, s)

import re import codecs ESCAPE_SEQUENCE_RE = re.compile(r''' ( \\U........ # 8-digit hex escapes | \\u.... # 4-digit hex escapes | \\x.. # 2-digit hex escapes | \\[0-7]{1,3} # Octal escapes | \\N\{[^}]+\} # Unicode characters by name | \\[\\'"abfnrtv] # Single-character escapes )''', re.UNICODE | re.VERBOSE) def decode_escapes(s): def decode_match(match): return codecs.decode(match.group(0), 'unicode-escape') return ESCAPE_SEQUENCE_RE.sub(decode_match, s)

The regex `\\U........` will catch something like `C:\\Users\\Administrator` and throw a `UnicodeDecodeError` exception, so this fixes that in lieu of fixing the regex

Source Link