akx
diff --git a/‎doc/source/whatsnew/v0.16.0.txt‎
Lines changed: 1 addition & 1 deletion b/‎doc/source/whatsnew/v0.16.0.txt‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎pandas/io/stata.py‎
Lines changed: 11 additions & 5 deletions b/‎pandas/io/stata.py‎
Lines changed: 11 additions & 5 deletions
diff --git a/‎pandas/io/tests/test_stata.py‎
Lines changed: 0 additions & 1 deletion b/‎pandas/io/tests/test_stata.py‎
Lines changed: 0 additions & 1 deletion
@@ -106,7 +106,7 @@ Bug Fixes
 - Bug in left ``join`` on multi-index with ``sort=True`` or null values (:issue:`9210`).
 
 
-
+- Fixed character encoding bug in ``read_stata`` and ``StataReader`` when loading data from a URL (:issue:`9231`).
 
 
 
 
@@ -44,7 +44,7 @@ def read_stata(filepath_or_buffer, convert_dates=True,
  Read value labels and convert columns to Categorical/Factor variables
  encoding : string, None or encoding
  Encoding used to parse the files. Note that Stata doesn't
- support unicode. None defaults to cp1252.
+ support unicode. None defaults to iso-8859-1.
  index : identifier of index column
  identifier of column that should be used as index of the DataFrame
  convert_missing : boolean, defaults to False
@@ -683,7 +683,7 @@ def get_base_missing_value(cls, dtype):
 
 
 class StataParser(object):
- _default_encoding = 'cp1252'
+ _default_encoding = 'iso-8859-1'
 
  def __init__(self, encoding):
  self._encoding = encoding
@@ -823,10 +823,10 @@ class StataReader(StataParser):
  Path to .dta file or object implementing a binary read() functions
  encoding : string, None or encoding
  Encoding used to parse the files. Note that Stata doesn't
- support unicode. None defaults to cp1252.
+ support unicode. None defaults to iso-8859-1.
  """
 
- def __init__(self, path_or_buf, encoding='cp1252'):
+ def __init__(self, path_or_buf, encoding='iso-8859-1'):
  super(StataReader, self).__init__(encoding)
  self.col_sizes = ()
  self._has_string_data = False
@@ -841,7 +841,13 @@ def __init__(self, path_or_buf, encoding='cp1252'):
  if isinstance(path_or_buf, (str, compat.text_type, bytes)):
  self.path_or_buf = open(path_or_buf, 'rb')
  else:
- self.path_or_buf = path_or_buf
+ # Copy to BytesIO, and ensure no encoding
+ contents = path_or_buf.read()
+ try:
+ contents = contents.encode(self._default_encoding)
+ except:
+ pass
+ self.path_or_buf = BytesIO(contents)
 
  self._read_header()
 
 
@@ -889,7 +889,6 @@ def test_categorical_ordering(self):
  tm.assert_equal(False, parsed_115_unordered[col].cat.ordered)
  tm.assert_equal(False, parsed_117_unordered[col].cat.ordered)
 
-
 if __name__ == '__main__':
  nose.runmodule(argv=[__file__, '-vvs', '-x', '--pdb', '--pdb-failure'],
  exit=False)
Original file line number	Diff line number	Diff line change
`@@ -106,7 +106,7 @@ Bug Fixes`
`106`	`106`	- Bug in left ``join`` on multi-index with ``sort=True`` or null values (:issue:`9210`).
`107`	`107`
`108`	`108`
`109`		`-`
	`109`	+- Fixed character encoding bug in ``read_stata`` and ``StataReader`` when loading data from a URL (:issue:`9231`).
`110`	`110`
`111`	`111`
`112`	`112`