-
- Notifications
You must be signed in to change notification settings - Fork 19.4k
BUG, ENH: Read Data From Password-Protected URL's and allow self signed SSL certs #16910
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 1 commit
8b6e426 145c7f4 9473316 9c7524d 598cf7b 3b454dd d359b2d eb03fd3 cbe3f49 437e0a2 7b034b8 0a4607a 209dd58 f520f7b File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
- Loading branch information
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| | @@ -184,8 +184,8 @@ def _stringify_path(filepath_or_buffer): | |
| | ||
| | ||
| def get_filepath_or_buffer(filepath_or_buffer, encoding=None, | ||
| compression=None, username=None, | ||
| password=None, verify_ssl=None): | ||
| compression=None, auth=None, | ||
| verify_ssl=None): | ||
| """ | ||
| If the filepath_or_buffer is a url, translate and return the buffer. | ||
| Otherwise passthrough. | ||
| | @@ -194,11 +194,10 @@ def get_filepath_or_buffer(filepath_or_buffer, encoding=None, | |
| ---------- | ||
| filepath_or_buffer : a url, filepath (str, py.path.local or pathlib.Path), | ||
| or buffer | ||
| support 'https://username:password@fqdn.com:port/aaa.csv' | ||
| supports 'https://username:password@fqdn.com:port/aaa.csv' | ||
| ||
| encoding : the encoding to use to decode py3 bytes, default is 'utf-8' | ||
| compression: | ||
| ||
| username: Authentication username (for https basic auth) | ||
| password: Authentication password (for https basic auth) | ||
| auth: (str,str), default None. (username, password) for HTTP(s) basic auth | ||
| verify_ssl: Default True. If False, allow self signed and invalid SSL | ||
| certificates for https | ||
| | ||
| Member There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
<var_name> : <data_type>, <defaults> <description> Author There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Actually the | ||
| | @@ -210,8 +209,7 @@ def get_filepath_or_buffer(filepath_or_buffer, encoding=None, | |
| | ||
| if _is_url(filepath_or_buffer): | ||
| ureq, kwargs = get_urlopen_args(filepath_or_buffer, | ||
| uname=username, | ||
| pwd=password, | ||
| auth=auth, | ||
| verify_ssl=verify_ssl) | ||
| req = _urlopen(ureq, **kwargs) | ||
| content_encoding = req.headers.get('Content-Encoding', None) | ||
| | @@ -262,16 +260,45 @@ def file_path_to_url(path): | |
| } | ||
| | ||
| | ||
| def split_uname_from_url(url_with_uname): | ||
| o = parse_url(url_with_uname) | ||
| usrch = '{}:{}@{}'.format(o.username, o.password, o.hostname) | ||
| url_no_usrpwd = url_with_uname.replace(usrch, o.hostname) | ||
| return o.username, o.password, url_no_usrpwd | ||
| | ||
| | ||
| def get_urlopen_args(url_with_uname, uname=None, pwd=None, verify_ssl=True): | ||
| def get_urlopen_args(url_with_uname, auth=None, verify_ssl=True): | ||
| def split_auth_from_url(url_with_uname): | ||
| o = parse_url(url_with_uname) | ||
| usrch = '{}:{}@{}'.format(o.username, o.password, o.hostname) | ||
| url_no_usrpwd = url_with_uname.replace(usrch, o.hostname) | ||
| return (o.username, o.password), url_no_usrpwd | ||
| | ||
| def get_urlopen_args_py2(uname, pwd, url_no_usrpwd, verify_ssl=True): | ||
| req = Request(url_no_usrpwd) | ||
| upstr = '{}:{}'.format(uname, pwd) | ||
| base64string = base64.encodestring(upstr).replace('\n', '') | ||
| req.add_header("Authorization", "Basic {}".format(base64string)) | ||
| # I hope pandas can support self signed certs too | ||
| kwargs = {} | ||
| if verify_ssl not in [None, True]: | ||
| kwargs['context'] = ssl._create_unverified_context() | ||
| return req, kwargs | ||
| | ||
| def get_urlopen_args_py3(uname, pwd, url_no_usrpwd, verify_ssl=True): | ||
| # not using urllib.request Request for PY3 because | ||
| # this looks like better code from extensibility purpose | ||
| passman = HTTPPasswordMgrWithDefaultRealm() | ||
| passman.add_password(None, url_no_usrpwd, uname, pwd) | ||
| authhandler = HTTPBasicAuthHandler(passman) | ||
| if verify_ssl in [None, True]: | ||
| opener = build_opener(authhandler) | ||
| else: | ||
| context = ssl.create_default_context() | ||
| context.check_hostname = False | ||
| context.verify_mode = ssl.CERT_NONE | ||
| opener = build_opener(authhandler, HTTPSHandler(context=context)) | ||
| install_opener(opener) | ||
| return url_no_usrpwd, {} | ||
| | ||
| Member There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. See my comment here to patch the formatting for your parameters listed above. | ||
| uname = pwd = None | ||
| Contributor There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. can you have a username w/o a password (yes?), but of course a password w/o a username should be banned. where do you raise on this? | ||
| if auth and len(auth) == 2: | ||
| uname, pwd = auth | ||
| if not uname and not pwd: | ||
| uname, pwd, url_no_usrpwd = split_uname_from_url(url_with_uname) | ||
| (uname, pwd), url_no_usrpwd = split_auth_from_url(url_with_uname) | ||
| else: | ||
| url_no_usrpwd = url_with_uname | ||
| if compat.PY3: | ||
| | @@ -282,33 +309,6 @@ def get_urlopen_args(url_with_uname, uname=None, pwd=None, verify_ssl=True): | |
| return req, kwargs | ||
| | ||
| | ||
| def get_urlopen_args_py2(uname, pwd, url_no_usrpwd, verify_ssl=True): | ||
| req = Request(url_no_usrpwd) | ||
| upstr = '{}:{}'.format(uname, pwd) | ||
| base64string = base64.encodestring(upstr).replace('\n', '') | ||
| req.add_header("Authorization", "Basic {}".format(base64string)) | ||
| # I hope pandas can support self signed certs too | ||
| kwargs = {} | ||
| if verify_ssl not in [None, True]: | ||
| kwargs['context'] = ssl._create_unverified_context() | ||
| return req, kwargs | ||
| | ||
| | ||
| def get_urlopen_args_py3(uname, pwd, url_no_usrpwd, verify_ssl=True): | ||
| passman = HTTPPasswordMgrWithDefaultRealm() | ||
| passman.add_password(None, url_no_usrpwd, uname, pwd) | ||
| authhandler = HTTPBasicAuthHandler(passman) | ||
| if verify_ssl in [None, True]: | ||
| opener = build_opener(authhandler) | ||
| else: | ||
| context = ssl.create_default_context() | ||
| context.check_hostname = False | ||
| context.verify_mode = ssl.CERT_NONE | ||
| opener = build_opener(authhandler, HTTPSHandler(context=context)) | ||
| install_opener(opener) | ||
| return url_no_usrpwd, {} | ||
| | ||
| | ||
| def _infer_compression(filepath_or_buffer, compression): | ||
| """ | ||
| Get the compression method for filepath_or_buffer. If compression='infer', | ||
| | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| | @@ -391,12 +391,11 @@ def _read(filepath_or_buffer, kwds): | |
| kwds['encoding'] = encoding | ||
| | ||
| compression = kwds.get('compression') | ||
| username = kwds.get('username', None) | ||
| password = kwds.get('password', None) | ||
| auth = kwds.get('auth', None) | ||
| verify_ssl = kwds.get('verify_ssl', None) | ||
| compression = _infer_compression(filepath_or_buffer, compression) | ||
| filepath_or_buffer, _, compression = get_filepath_or_buffer( | ||
| filepath_or_buffer, encoding, compression, username, password, | ||
| filepath_or_buffer, encoding, compression, auth, | ||
| verify_ssl) | ||
| kwds['compression'] = compression | ||
| | ||
| | @@ -580,9 +579,8 @@ def parser_f(filepath_or_buffer, | |
| memory_map=False, | ||
| float_precision=None, | ||
| | ||
| # Basic auth (http/https) | ||
| username=None, | ||
| password=None, | ||
| # Basic auth (http/https) (username, password) | ||
| ||
| auth=None, | ||
| | ||
| # skip verify self signed SSL certificates | ||
| ||
| verify_ssl=None): | ||
| | @@ -667,8 +665,7 @@ def parser_f(filepath_or_buffer, | |
| infer_datetime_format=infer_datetime_format, | ||
| skip_blank_lines=skip_blank_lines, | ||
| | ||
| username=username, | ||
| password=password, | ||
| auth=auth, | ||
| verify_ssl=verify_ssl | ||
| ) | ||
| | ||
| | ||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
this is misaligned. needs to be part of the sentence above.