@@ -44,7 +44,7 @@ def read_stata(filepath_or_buffer, convert_dates=True,
4444 Read value labels and convert columns to Categorical/Factor variables
4545 encoding : string, None or encoding
4646 Encoding used to parse the files. Note that Stata doesn't
47- support unicode. None defaults to cp1252 .
47+ support unicode. None defaults to iso-8859-1 .
4848 index : identifier of index column
4949 identifier of column that should be used as index of the DataFrame
5050 convert_missing : boolean, defaults to False
@@ -683,7 +683,7 @@ def get_base_missing_value(cls, dtype):
683683
684684
685685class StataParser (object ):
686- _default_encoding = 'cp1252 '
686+ _default_encoding = 'iso-8859-1 '
687687
688688 def __init__ (self , encoding ):
689689 self ._encoding = encoding
@@ -823,10 +823,10 @@ class StataReader(StataParser):
823823 Path to .dta file or object implementing a binary read() functions
824824 encoding : string, None or encoding
825825 Encoding used to parse the files. Note that Stata doesn't
826- support unicode. None defaults to cp1252 .
826+ support unicode. None defaults to iso-8859-1 .
827827 """
828828
829- def __init__ (self , path_or_buf , encoding = 'cp1252 ' ):
829+ def __init__ (self , path_or_buf , encoding = 'iso-8859-1 ' ):
830830 super (StataReader , self ).__init__ (encoding )
831831 self .col_sizes = ()
832832 self ._has_string_data = False
@@ -841,7 +841,13 @@ def __init__(self, path_or_buf, encoding='cp1252'):
841841 if isinstance (path_or_buf , (str , compat .text_type , bytes )):
842842 self .path_or_buf = open (path_or_buf , 'rb' )
843843 else :
844- self .path_or_buf = path_or_buf
844+ # Copy to BytesIO, and ensure no encoding
845+ contents = path_or_buf .read ()
846+ try :
847+ contents = contents .encode (self ._default_encoding )
848+ except :
849+ pass
850+ self .path_or_buf = BytesIO (contents )
845851
846852 self ._read_header ()
847853
0 commit comments