Skip to content
Closed
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
Change username pwd to auth (username, password)
  • Loading branch information
Sky NSS committed Jul 14, 2017
commit 145c7f4b9f6ff2c1e677432f8134057a7c41d2d4
86 changes: 43 additions & 43 deletions pandas/io/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -184,8 +184,8 @@ def _stringify_path(filepath_or_buffer):


def get_filepath_or_buffer(filepath_or_buffer, encoding=None,
compression=None, username=None,
password=None, verify_ssl=None):
compression=None, auth=None,
verify_ssl=None):
"""
If the filepath_or_buffer is a url, translate and return the buffer.
Otherwise passthrough.
Expand All @@ -194,11 +194,10 @@ def get_filepath_or_buffer(filepath_or_buffer, encoding=None,
----------
filepath_or_buffer : a url, filepath (str, py.path.local or pathlib.Path),
or buffer
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

this is misaligned. needs to be part of the sentence above.

support 'https://username:password@fqdn.com:port/aaa.csv'
supports 'https://username:password@fqdn.com:port/aaa.csv'
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

pls clarify and add a versionadded tag (0.21.)

encoding : the encoding to use to decode py3 bytes, default is 'utf-8'
compression:
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

compression: string, default None # the expl is indented on the next line auth: (string, string), default None username, password...... same for verify_ssl 

add a versionadded tag

username: Authentication username (for https basic auth)
password: Authentication password (for https basic auth)
auth: (str,str), default None. (username, password) for HTTP(s) basic auth
verify_ssl: Default True. If False, allow self signed and invalid SSL
certificates for https

Copy link
Member

@gfyoung gfyoung Jul 13, 2017

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

  1. Why is the compression field empty?

  2. The formatting for auth and verify_ssl should be patched. The general format is the following:

<var_name> : <data_type>, <defaults> <description>
Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Actually the compression already existed, but was not in the comments. I simply added it - to comments and left it empty because I was not too familiar to add best docs. I'll fix the rest

Expand All @@ -210,8 +209,7 @@ def get_filepath_or_buffer(filepath_or_buffer, encoding=None,

if _is_url(filepath_or_buffer):
ureq, kwargs = get_urlopen_args(filepath_or_buffer,
uname=username,
pwd=password,
auth=auth,
verify_ssl=verify_ssl)
req = _urlopen(ureq, **kwargs)
content_encoding = req.headers.get('Content-Encoding', None)
Expand Down Expand Up @@ -262,16 +260,45 @@ def file_path_to_url(path):
}


def split_uname_from_url(url_with_uname):
o = parse_url(url_with_uname)
usrch = '{}:{}@{}'.format(o.username, o.password, o.hostname)
url_no_usrpwd = url_with_uname.replace(usrch, o.hostname)
return o.username, o.password, url_no_usrpwd


def get_urlopen_args(url_with_uname, uname=None, pwd=None, verify_ssl=True):
def get_urlopen_args(url_with_uname, auth=None, verify_ssl=True):
def split_auth_from_url(url_with_uname):
o = parse_url(url_with_uname)
usrch = '{}:{}@{}'.format(o.username, o.password, o.hostname)
url_no_usrpwd = url_with_uname.replace(usrch, o.hostname)
return (o.username, o.password), url_no_usrpwd

def get_urlopen_args_py2(uname, pwd, url_no_usrpwd, verify_ssl=True):
req = Request(url_no_usrpwd)
upstr = '{}:{}'.format(uname, pwd)
base64string = base64.encodestring(upstr).replace('\n', '')
req.add_header("Authorization", "Basic {}".format(base64string))
# I hope pandas can support self signed certs too
kwargs = {}
if verify_ssl not in [None, True]:
kwargs['context'] = ssl._create_unverified_context()
return req, kwargs

def get_urlopen_args_py3(uname, pwd, url_no_usrpwd, verify_ssl=True):
# not using urllib.request Request for PY3 because
# this looks like better code from extensibility purpose
passman = HTTPPasswordMgrWithDefaultRealm()
passman.add_password(None, url_no_usrpwd, uname, pwd)
authhandler = HTTPBasicAuthHandler(passman)
if verify_ssl in [None, True]:
opener = build_opener(authhandler)
else:
context = ssl.create_default_context()
context.check_hostname = False
context.verify_mode = ssl.CERT_NONE
opener = build_opener(authhandler, HTTPSHandler(context=context))
install_opener(opener)
return url_no_usrpwd, {}

Copy link
Member

@gfyoung gfyoung Jul 13, 2017

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

See my comment here to patch the formatting for your parameters listed above.

uname = pwd = None
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

can you have a username w/o a password (yes?), but of course a password w/o a username should be banned. where do you raise on this?

if auth and len(auth) == 2:
uname, pwd = auth
if not uname and not pwd:
uname, pwd, url_no_usrpwd = split_uname_from_url(url_with_uname)
(uname, pwd), url_no_usrpwd = split_auth_from_url(url_with_uname)
else:
url_no_usrpwd = url_with_uname
if compat.PY3:
Expand All @@ -282,33 +309,6 @@ def get_urlopen_args(url_with_uname, uname=None, pwd=None, verify_ssl=True):
return req, kwargs


def get_urlopen_args_py2(uname, pwd, url_no_usrpwd, verify_ssl=True):
req = Request(url_no_usrpwd)
upstr = '{}:{}'.format(uname, pwd)
base64string = base64.encodestring(upstr).replace('\n', '')
req.add_header("Authorization", "Basic {}".format(base64string))
# I hope pandas can support self signed certs too
kwargs = {}
if verify_ssl not in [None, True]:
kwargs['context'] = ssl._create_unverified_context()
return req, kwargs


def get_urlopen_args_py3(uname, pwd, url_no_usrpwd, verify_ssl=True):
passman = HTTPPasswordMgrWithDefaultRealm()
passman.add_password(None, url_no_usrpwd, uname, pwd)
authhandler = HTTPBasicAuthHandler(passman)
if verify_ssl in [None, True]:
opener = build_opener(authhandler)
else:
context = ssl.create_default_context()
context.check_hostname = False
context.verify_mode = ssl.CERT_NONE
opener = build_opener(authhandler, HTTPSHandler(context=context))
install_opener(opener)
return url_no_usrpwd, {}


def _infer_compression(filepath_or_buffer, compression):
"""
Get the compression method for filepath_or_buffer. If compression='infer',
Expand Down
6 changes: 2 additions & 4 deletions pandas/io/excel.py
Original file line number Diff line number Diff line change
Expand Up @@ -212,8 +212,7 @@ def read_excel(io, sheet_name=0, header=0, skiprows=None, skip_footer=0,
if not isinstance(io, ExcelFile):
io = ExcelFile(io,
engine=engine,
username=kwds.get('username', None),
password=kwds.get('password', None),
auth=kwds.get('auth', None),
verify_ssl=kwds.get('verify_ssl', None))

return io._parse_excel(
Expand Down Expand Up @@ -264,8 +263,7 @@ def __init__(self, io, **kwds):
if _is_url(self._io):
verify_ssl = kwds.get('verify_ssl', None)
ureq, kwargs = get_urlopen_args(self._io,
uname=kwds.get('username', None),
pwd=kwds.get('password', None),
auth=kwds.get('auth', None),
verify_ssl=verify_ssl)
io = _urlopen(ureq, **kwargs)
elif not isinstance(self.io, (ExcelFile, xlrd.Book)):
Expand Down
32 changes: 12 additions & 20 deletions pandas/io/html.py
Original file line number Diff line number Diff line change
Expand Up @@ -117,21 +117,20 @@ def _get_skiprows(skiprows):
type(skiprows).__name__)


def _read(obj, username=None, password=None, verify_ssl=None):
def _read(obj, auth=None, verify_ssl=None):
"""Try to read from a url, file or string.

Parameters
----------
obj : str, unicode, or file-like
username: username for http basic auth
password: password for http basic auth
auth: None or (username, password) for http basic auth
verify_ssl: Default True. Set to False to disable cert verification
Returns
-------
raw_text : str
"""
if _is_url(obj):
ureq, kwargs = get_urlopen_args(obj, username, password, verify_ssl)
ureq, kwargs = get_urlopen_args(obj, auth, verify_ssl)
with urlopen(ureq, **kwargs) as url:
text = url.read()
elif hasattr(obj, 'read'):
Expand Down Expand Up @@ -191,14 +190,13 @@ class _HtmlFrameParser(object):
functionality.
"""

def __init__(self, io, match, attrs, encoding, username=None,
password=None, verify_ssl=None):
def __init__(self, io, match, attrs, encoding, auth=None,
verify_ssl=None):
self.io = io
self.match = match
self.attrs = attrs
self.encoding = encoding
self.username = username
self.password = password
self.auth = auth
self.verify_ssl = verify_ssl

def parse_tables(self):
Expand Down Expand Up @@ -452,8 +450,7 @@ def _parse_tables(self, doc, match, attrs):
return result

def _setup_build_doc(self):
raw_text = _read(self.io, self.username,
self.password, self.verify_ssl)
raw_text = _read(self.io, self.auth, self.verify_ssl)
if not raw_text:
raise ValueError('No text parsed from document: %s' % self.io)
return raw_text
Expand Down Expand Up @@ -743,8 +740,7 @@ def _parse(flavor, io, match, attrs, encoding, **kwargs):
p = parser(io, compiled_match,
attrs,
encoding,
username=kwargs.get('username', None),
password=kwargs.get('password', None),
auth=kwargs.get('auth', None),
verify_ssl=kwargs.get('verify_ssl', None))
try:
tables = p.parse_tables()
Expand All @@ -768,7 +764,7 @@ def read_html(io, match='.+', flavor=None, header=None, index_col=None,
skiprows=None, attrs=None, parse_dates=False,
tupleize_cols=False, thousands=',', encoding=None,
decimal='.', converters=None, na_values=None,
keep_default_na=True, username=None, password=None,
keep_default_na=True, auth=None,
verify_ssl=False):
r"""Read HTML tables into a ``list`` of ``DataFrame`` objects.

Expand Down Expand Up @@ -870,11 +866,7 @@ def read_html(io, match='.+', flavor=None, header=None, index_col=None,

.. versionadded:: 0.19.0

username : str, default None
username for HTTP(s) basic auth

password : str, default None
password for HTTP(s) basic auth
auth: (str,str), default None. (username, password) for HTTP(s) basic auth

verify_ssl : bool, default True
If False, ssl certificate is not verified (allow self signed SSL certs)
Expand Down Expand Up @@ -926,5 +918,5 @@ def read_html(io, match='.+', flavor=None, header=None, index_col=None,
parse_dates=parse_dates, tupleize_cols=tupleize_cols,
thousands=thousands, attrs=attrs, encoding=encoding,
decimal=decimal, converters=converters, na_values=na_values,
keep_default_na=keep_default_na, username=username,
password=password, verify_ssl=verify_ssl)
keep_default_na=keep_default_na, auth=auth,
verify_ssl=verify_ssl)
8 changes: 3 additions & 5 deletions pandas/io/json/json.py
Original file line number Diff line number Diff line change
Expand Up @@ -174,7 +174,7 @@ def write(self):
def read_json(path_or_buf=None, orient=None, typ='frame', dtype=True,
convert_axes=True, convert_dates=True, keep_default_dates=True,
numpy=False, precise_float=False, date_unit=None, encoding=None,
lines=False, username=None, password=None, verify_ssl=None):
lines=False, auth=None, verify_ssl=None):
"""
Convert a JSON string to pandas object

Expand Down Expand Up @@ -263,8 +263,7 @@ def read_json(path_or_buf=None, orient=None, typ='frame', dtype=True,

.. versionadded:: 0.19.0

username: str, default None. Authentication username for HTTP(s) basic auth
passowrd: str, default None. Authentication password for HTTP(s) basic auth
auth: (str,str), default None. (username, password) for HTTP(s) basic auth
verify_ssl: boolean, default None (True).
If false, allow self siged SSL certificates

Expand Down Expand Up @@ -327,8 +326,7 @@ def read_json(path_or_buf=None, orient=None, typ='frame', dtype=True,

filepath_or_buffer, _, _ = get_filepath_or_buffer(path_or_buf,
encoding=encoding,
username=username,
password=password,
auth=auth,
verify_ssl=verify_ssl)
if isinstance(filepath_or_buffer, compat.string_types):
try:
Expand Down
13 changes: 5 additions & 8 deletions pandas/io/parsers.py
Original file line number Diff line number Diff line change
Expand Up @@ -391,12 +391,11 @@ def _read(filepath_or_buffer, kwds):
kwds['encoding'] = encoding

compression = kwds.get('compression')
username = kwds.get('username', None)
password = kwds.get('password', None)
auth = kwds.get('auth', None)
verify_ssl = kwds.get('verify_ssl', None)
compression = _infer_compression(filepath_or_buffer, compression)
filepath_or_buffer, _, compression = get_filepath_or_buffer(
filepath_or_buffer, encoding, compression, username, password,
filepath_or_buffer, encoding, compression, auth,
verify_ssl)
kwds['compression'] = compression

Expand Down Expand Up @@ -580,9 +579,8 @@ def parser_f(filepath_or_buffer,
memory_map=False,
float_precision=None,

# Basic auth (http/https)
username=None,
password=None,
# Basic auth (http/https) (username, password)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Remove this comment. Your documentation of the parameters in the docstring should make this clear.

auth=None,

# skip verify self signed SSL certificates
Copy link
Member

@gfyoung gfyoung Jul 13, 2017

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

See my comment above. You should also be able to remove this comment.

verify_ssl=None):
Expand Down Expand Up @@ -667,8 +665,7 @@ def parser_f(filepath_or_buffer,
infer_datetime_format=infer_datetime_format,
skip_blank_lines=skip_blank_lines,

username=username,
password=password,
auth=auth,
verify_ssl=verify_ssl
)

Expand Down
11 changes: 0 additions & 11 deletions pandas/tests/io/test_common.py
Original file line number Diff line number Diff line change
Expand Up @@ -190,17 +190,6 @@ def test_write_fspath_hdf5(self):

tm.assert_frame_equal(result, expected)

def test_split_url_extract_uname_pwd(self):
for url, uname, pwd, nurl in [('https://aaa:bbb@ccc.com:1010/aaa.txt',
'aaa',
'bbb',
'https://ccc.com:1010/aaa.txt'
)]:
un, p, u = common.split_uname_from_url(url)
assert u == nurl
assert un == uname
assert p == pwd


class TestMMapWrapper(object):

Expand Down