Change username pwd to auth (username, password)

pandas-dev · skynss · Jul 13, 2017 · Jul 13, 2017 · Jul 13, 2017 · Jul 13, 2017
commit 145c7f4b9f6ff2c1e677432f8134057a7c41d2d4
diff --git a/pandas/io/common.py b/pandas/io/common.py
@@ -184,8 +184,8 @@ def _stringify_path(filepath_or_buffer):
 
 
 def get_filepath_or_buffer(filepath_or_buffer, encoding=None,
- compression=None, username=None,
- password=None, verify_ssl=None):
+ compression=None, auth=None,
+ verify_ssl=None):
  """
  If the filepath_or_buffer is a url, translate and return the buffer.
  Otherwise passthrough.
@@ -194,11 +194,10 @@ def get_filepath_or_buffer(filepath_or_buffer, encoding=None,
  ----------
  filepath_or_buffer : a url, filepath (str, py.path.local or pathlib.Path),
  or buffer
- support 'https://username:password@fqdn.com:port/aaa.csv'
+ supports 'https://username:password@fqdn.com:port/aaa.csv'
  encoding : the encoding to use to decode py3 bytes, default is 'utf-8'
  compression:
- username: Authentication username (for https basic auth)
- password: Authentication password (for https basic auth)
+ auth: (str,str), default None. (username, password) for HTTP(s) basic auth
  verify_ssl: Default True. If False, allow self signed and invalid SSL
  certificates for https
 
@@ -210,8 +209,7 @@ def get_filepath_or_buffer(filepath_or_buffer, encoding=None,
 
  if _is_url(filepath_or_buffer):
  ureq, kwargs = get_urlopen_args(filepath_or_buffer,
- uname=username,
- pwd=password,
+ auth=auth,
  verify_ssl=verify_ssl)
  req = _urlopen(ureq, **kwargs)
  content_encoding = req.headers.get('Content-Encoding', None)
@@ -262,16 +260,45 @@ def file_path_to_url(path):
 }
 
 
-def split_uname_from_url(url_with_uname):
- o = parse_url(url_with_uname)
- usrch = '{}:{}@{}'.format(o.username, o.password, o.hostname)
- url_no_usrpwd = url_with_uname.replace(usrch, o.hostname)
- return o.username, o.password, url_no_usrpwd
-
-
-def get_urlopen_args(url_with_uname, uname=None, pwd=None, verify_ssl=True):
+def get_urlopen_args(url_with_uname, auth=None, verify_ssl=True):
+ def split_auth_from_url(url_with_uname):
+ o = parse_url(url_with_uname)
+ usrch = '{}:{}@{}'.format(o.username, o.password, o.hostname)
+ url_no_usrpwd = url_with_uname.replace(usrch, o.hostname)
+ return (o.username, o.password), url_no_usrpwd
+
+ def get_urlopen_args_py2(uname, pwd, url_no_usrpwd, verify_ssl=True):
+ req = Request(url_no_usrpwd)
+ upstr = '{}:{}'.format(uname, pwd)
+ base64string = base64.encodestring(upstr).replace('\n', '')
+ req.add_header("Authorization", "Basic {}".format(base64string))
+ # I hope pandas can support self signed certs too
+ kwargs = {}
+ if verify_ssl not in [None, True]:
+ kwargs['context'] = ssl._create_unverified_context()
+ return req, kwargs
+
+ def get_urlopen_args_py3(uname, pwd, url_no_usrpwd, verify_ssl=True):
+ # not using urllib.request Request for PY3 because
+ # this looks like better code from extensibility purpose
+ passman = HTTPPasswordMgrWithDefaultRealm()
+ passman.add_password(None, url_no_usrpwd, uname, pwd)
+ authhandler = HTTPBasicAuthHandler(passman)
+ if verify_ssl in [None, True]:
+ opener = build_opener(authhandler)
+ else:
+ context = ssl.create_default_context()
+ context.check_hostname = False
+ context.verify_mode = ssl.CERT_NONE
+ opener = build_opener(authhandler, HTTPSHandler(context=context))
+ install_opener(opener)
+ return url_no_usrpwd, {}
+
+ uname = pwd = None
+ if auth and len(auth) == 2:
+ uname, pwd = auth
  if not uname and not pwd:
- uname, pwd, url_no_usrpwd = split_uname_from_url(url_with_uname)
+ (uname, pwd), url_no_usrpwd = split_auth_from_url(url_with_uname)
  else:
  url_no_usrpwd = url_with_uname
  if compat.PY3:
@@ -282,33 +309,6 @@ def get_urlopen_args(url_with_uname, uname=None, pwd=None, verify_ssl=True):
  return req, kwargs
 
 
-def get_urlopen_args_py2(uname, pwd, url_no_usrpwd, verify_ssl=True):
- req = Request(url_no_usrpwd)
- upstr = '{}:{}'.format(uname, pwd)
- base64string = base64.encodestring(upstr).replace('\n', '')
- req.add_header("Authorization", "Basic {}".format(base64string))
- # I hope pandas can support self signed certs too
- kwargs = {}
- if verify_ssl not in [None, True]:
- kwargs['context'] = ssl._create_unverified_context()
- return req, kwargs
-
-
-def get_urlopen_args_py3(uname, pwd, url_no_usrpwd, verify_ssl=True):
- passman = HTTPPasswordMgrWithDefaultRealm()
- passman.add_password(None, url_no_usrpwd, uname, pwd)
- authhandler = HTTPBasicAuthHandler(passman)
- if verify_ssl in [None, True]:
- opener = build_opener(authhandler)
- else:
- context = ssl.create_default_context()
- context.check_hostname = False
- context.verify_mode = ssl.CERT_NONE
- opener = build_opener(authhandler, HTTPSHandler(context=context))
- install_opener(opener)
- return url_no_usrpwd, {}
-
-
 def _infer_compression(filepath_or_buffer, compression):
  """
  Get the compression method for filepath_or_buffer. If compression='infer',

diff --git a/pandas/io/excel.py b/pandas/io/excel.py
@@ -212,8 +212,7 @@ def read_excel(io, sheet_name=0, header=0, skiprows=None, skip_footer=0,
  if not isinstance(io, ExcelFile):
  io = ExcelFile(io,
  engine=engine,
- username=kwds.get('username', None),
- password=kwds.get('password', None),
+ auth=kwds.get('auth', None),
  verify_ssl=kwds.get('verify_ssl', None))
 
  return io._parse_excel(
@@ -264,8 +263,7 @@ def __init__(self, io, **kwds):
  if _is_url(self._io):
  verify_ssl = kwds.get('verify_ssl', None)
  ureq, kwargs = get_urlopen_args(self._io,
- uname=kwds.get('username', None),
- pwd=kwds.get('password', None),
+ auth=kwds.get('auth', None),
  verify_ssl=verify_ssl)
  io = _urlopen(ureq, **kwargs)
  elif not isinstance(self.io, (ExcelFile, xlrd.Book)):

diff --git a/pandas/io/html.py b/pandas/io/html.py
@@ -117,21 +117,20 @@ def _get_skiprows(skiprows):
  type(skiprows).__name__)
 
 
-def _read(obj, username=None, password=None, verify_ssl=None):
+def _read(obj, auth=None, verify_ssl=None):
  """Try to read from a url, file or string.
 
  Parameters
  ----------
  obj : str, unicode, or file-like
- username: username for http basic auth
- password: password for http basic auth
+ auth: None or (username, password) for http basic auth
  verify_ssl: Default True. Set to False to disable cert verification
  Returns
  -------
  raw_text : str
  """
  if _is_url(obj):
- ureq, kwargs = get_urlopen_args(obj, username, password, verify_ssl)
+ ureq, kwargs = get_urlopen_args(obj, auth, verify_ssl)
  with urlopen(ureq, **kwargs) as url:
  text = url.read()
  elif hasattr(obj, 'read'):
@@ -191,14 +190,13 @@ class _HtmlFrameParser(object):
  functionality.
  """
 
- def __init__(self, io, match, attrs, encoding, username=None,
- password=None, verify_ssl=None):
+ def __init__(self, io, match, attrs, encoding, auth=None,
+ verify_ssl=None):
  self.io = io
  self.match = match
  self.attrs = attrs
  self.encoding = encoding
- self.username = username
- self.password = password
+ self.auth = auth
  self.verify_ssl = verify_ssl
 
  def parse_tables(self):
@@ -452,8 +450,7 @@ def _parse_tables(self, doc, match, attrs):
  return result
 
  def _setup_build_doc(self):
- raw_text = _read(self.io, self.username,
- self.password, self.verify_ssl)
+ raw_text = _read(self.io, self.auth, self.verify_ssl)
  if not raw_text:
  raise ValueError('No text parsed from document: %s' % self.io)
  return raw_text
@@ -743,8 +740,7 @@ def _parse(flavor, io, match, attrs, encoding, **kwargs):
  p = parser(io, compiled_match,
  attrs,
  encoding,
- username=kwargs.get('username', None),
- password=kwargs.get('password', None),
+ auth=kwargs.get('auth', None),
  verify_ssl=kwargs.get('verify_ssl', None))
  try:
  tables = p.parse_tables()
@@ -768,7 +764,7 @@ def read_html(io, match='.+', flavor=None, header=None, index_col=None,
  skiprows=None, attrs=None, parse_dates=False,
  tupleize_cols=False, thousands=',', encoding=None,
  decimal='.', converters=None, na_values=None,
- keep_default_na=True, username=None, password=None,
+ keep_default_na=True, auth=None,
  verify_ssl=False):
  r"""Read HTML tables into a ``list`` of ``DataFrame`` objects.
 
@@ -870,11 +866,7 @@ def read_html(io, match='.+', flavor=None, header=None, index_col=None,
 
  .. versionadded:: 0.19.0
 
- username : str, default None
- username for HTTP(s) basic auth
-
- password : str, default None
- password for HTTP(s) basic auth
+ auth: (str,str), default None. (username, password) for HTTP(s) basic auth
 
  verify_ssl : bool, default True
  If False, ssl certificate is not verified (allow self signed SSL certs)
@@ -926,5 +918,5 @@ def read_html(io, match='.+', flavor=None, header=None, index_col=None,
  parse_dates=parse_dates, tupleize_cols=tupleize_cols,
  thousands=thousands, attrs=attrs, encoding=encoding,
  decimal=decimal, converters=converters, na_values=na_values,
- keep_default_na=keep_default_na, username=username,
- password=password, verify_ssl=verify_ssl)
+ keep_default_na=keep_default_na, auth=auth,
+ verify_ssl=verify_ssl)
diff --git a/pandas/io/json/json.py b/pandas/io/json/json.py
@@ -174,7 +174,7 @@ def write(self):
 def read_json(path_or_buf=None, orient=None, typ='frame', dtype=True,
  convert_axes=True, convert_dates=True, keep_default_dates=True,
  numpy=False, precise_float=False, date_unit=None, encoding=None,
- lines=False, username=None, password=None, verify_ssl=None):
+ lines=False, auth=None, verify_ssl=None):
  """
  Convert a JSON string to pandas object
 
@@ -263,8 +263,7 @@ def read_json(path_or_buf=None, orient=None, typ='frame', dtype=True,
 
  .. versionadded:: 0.19.0
 
- username: str, default None. Authentication username for HTTP(s) basic auth
- passowrd: str, default None. Authentication password for HTTP(s) basic auth
+ auth: (str,str), default None. (username, password) for HTTP(s) basic auth
  verify_ssl: boolean, default None (True).
  If false, allow self siged SSL certificates
 
@@ -327,8 +326,7 @@ def read_json(path_or_buf=None, orient=None, typ='frame', dtype=True,
 
  filepath_or_buffer, _, _ = get_filepath_or_buffer(path_or_buf,
  encoding=encoding,
- username=username,
- password=password,
+ auth=auth,
  verify_ssl=verify_ssl)
  if isinstance(filepath_or_buffer, compat.string_types):
  try:

diff --git a/pandas/io/parsers.py b/pandas/io/parsers.py
@@ -391,12 +391,11 @@ def _read(filepath_or_buffer, kwds):
  kwds['encoding'] = encoding
 
  compression = kwds.get('compression')
- username = kwds.get('username', None)
- password = kwds.get('password', None)
+ auth = kwds.get('auth', None)
  verify_ssl = kwds.get('verify_ssl', None)
  compression = _infer_compression(filepath_or_buffer, compression)
  filepath_or_buffer, _, compression = get_filepath_or_buffer(
- filepath_or_buffer, encoding, compression, username, password,
+ filepath_or_buffer, encoding, compression, auth,
  verify_ssl)
  kwds['compression'] = compression
 
@@ -580,9 +579,8 @@ def parser_f(filepath_or_buffer,
  memory_map=False,
  float_precision=None,
 
- # Basic auth (http/https)
- username=None,
- password=None,
+ # Basic auth (http/https) (username, password)
+ auth=None,
 
  # skip verify self signed SSL certificates
  verify_ssl=None):
@@ -667,8 +665,7 @@ def parser_f(filepath_or_buffer,
  infer_datetime_format=infer_datetime_format,
  skip_blank_lines=skip_blank_lines,
 
- username=username,
- password=password,
+ auth=auth,
  verify_ssl=verify_ssl
  )
 

diff --git a/pandas/tests/io/test_common.py b/pandas/tests/io/test_common.py
@@ -190,17 +190,6 @@ def test_write_fspath_hdf5(self):
 
  tm.assert_frame_equal(result, expected)
 
- def test_split_url_extract_uname_pwd(self):
- for url, uname, pwd, nurl in [('https://aaa:bbb@ccc.com:1010/aaa.txt',
- 'aaa',
- 'bbb',
- 'https://ccc.com:1010/aaa.txt'
- )]:
- un, p, u = common.split_uname_from_url(url)
- assert u == nurl
- assert un == uname
- assert p == pwd
-
 
 class TestMMapWrapper(object):