@@ -358,23 +358,16 @@ def read_excel(io,
358358 ** kwds )
359359
360360
361- class ExcelFile (object ):
362- """
363- Class for parsing tabular excel sheets into DataFrame objects.
364- Uses xlrd. See read_excel for more documentation
365-
366- Parameters
367- ----------
368- io : string, path object (pathlib.Path or py._path.local.LocalPath),
369- file-like object or xlrd workbook
370- If a string or path object, expected to be a path to xls or xlsx file
371- engine : string, default None
372- If io is not a buffer or path, this must be set to identify io.
373- Acceptable values are None or xlrd
374- """
361+ class _XlrdReader (object ):
375362
376- def __init__ (self , io , ** kwds ):
363+ def __init__ (self , filepath_or_buffer ):
364+ """Reader using xlrd engine.
377365
366+ Parameters
367+ ----------
368+ filepath_or_buffer : string, path object or Workbook
369+ Object to be parsed.
370+ """
378371 err_msg = "Install xlrd >= 1.0.0 for Excel support"
379372
380373 try :
@@ -386,46 +379,39 @@ def __init__(self, io, **kwds):
386379 raise ImportError (err_msg +
387380 ". Current version " + xlrd .__VERSION__ )
388381
389- # could be a str, ExcelFile, Book, etc.
390- self .io = io
391- # Always a string
392- self ._io = _stringify_path (io )
393-
394- engine = kwds .pop ('engine' , None )
395-
396- if engine is not None and engine != 'xlrd' :
397- raise ValueError ("Unknown engine: {engine}" .format (engine = engine ))
398-
399- # If io is a url, want to keep the data as bytes so can't pass
400- # to get_filepath_or_buffer()
401- if _is_url (self ._io ):
402- io = _urlopen (self ._io )
403- elif not isinstance (self .io , (ExcelFile , xlrd .Book )):
404- io , _ , _ , _ = get_filepath_or_buffer (self ._io )
405-
406- if engine == 'xlrd' and isinstance (io , xlrd .Book ):
407- self .book = io
408- elif not isinstance (io , xlrd .Book ) and hasattr (io , "read" ):
382+ # If filepath_or_buffer is a url, want to keep the data as bytes so
383+ # can't pass to get_filepath_or_buffer()
384+ if _is_url (filepath_or_buffer ):
385+ filepath_or_buffer = _urlopen (filepath_or_buffer )
386+ elif not isinstance (filepath_or_buffer , (ExcelFile , xlrd .Book )):
387+ filepath_or_buffer , _ , _ , _ = get_filepath_or_buffer (
388+ filepath_or_buffer )
389+
390+ if isinstance (filepath_or_buffer , xlrd .Book ):
391+ self .book = filepath_or_buffer
392+ elif not isinstance (filepath_or_buffer , xlrd .Book ) and hasattr (
393+ filepath_or_buffer , "read" ):
409394 # N.B. xlrd.Book has a read attribute too
410- if hasattr (io , 'seek' ):
395+ if hasattr (filepath_or_buffer , 'seek' ):
411396 try :
412397 # GH 19779
413- io .seek (0 )
398+ filepath_or_buffer .seek (0 )
414399 except UnsupportedOperation :
415400 # HTTPResponse does not support seek()
416401 # GH 20434
417402 pass
418403
419- data = io .read ()
404+ data = filepath_or_buffer .read ()
420405 self .book = xlrd .open_workbook (file_contents = data )
421- elif isinstance (self . _io , compat .string_types ):
422- self .book = xlrd .open_workbook (self . _io )
406+ elif isinstance (filepath_or_buffer , compat .string_types ):
407+ self .book = xlrd .open_workbook (filepath_or_buffer )
423408 else :
424409 raise ValueError ('Must explicitly set engine if not passing in'
425410 ' buffer or path for io.' )
426411
427- def __fspath__ (self ):
428- return self ._io
412+ @property
413+ def sheet_names (self ):
414+ return self .book .sheet_names ()
429415
430416 def parse (self ,
431417 sheet_name = 0 ,
@@ -434,12 +420,13 @@ def parse(self,
434420 index_col = None ,
435421 usecols = None ,
436422 squeeze = False ,
437- converters = None ,
423+ dtype = None ,
438424 true_values = None ,
439425 false_values = None ,
440426 skiprows = None ,
441427 nrows = None ,
442428 na_values = None ,
429+ verbose = False ,
443430 parse_dates = False ,
444431 date_parser = None ,
445432 thousands = None ,
@@ -448,72 +435,9 @@ def parse(self,
448435 convert_float = True ,
449436 mangle_dupe_cols = True ,
450437 ** kwds ):
451- """
452- Parse specified sheet(s) into a DataFrame
453-
454- Equivalent to read_excel(ExcelFile, ...) See the read_excel
455- docstring for more info on accepted parameters
456- """
457-
458- # Can't use _deprecate_kwarg since sheetname=None has a special meaning
459- if is_integer (sheet_name ) and sheet_name == 0 and 'sheetname' in kwds :
460- warnings .warn ("The `sheetname` keyword is deprecated, use "
461- "`sheet_name` instead" , FutureWarning , stacklevel = 2 )
462- sheet_name = kwds .pop ("sheetname" )
463- elif 'sheetname' in kwds :
464- raise TypeError ("Cannot specify both `sheet_name` "
465- "and `sheetname`. Use just `sheet_name`" )
466-
467- return self ._parse_excel (sheet_name = sheet_name ,
468- header = header ,
469- names = names ,
470- index_col = index_col ,
471- usecols = usecols ,
472- squeeze = squeeze ,
473- converters = converters ,
474- true_values = true_values ,
475- false_values = false_values ,
476- skiprows = skiprows ,
477- nrows = nrows ,
478- na_values = na_values ,
479- parse_dates = parse_dates ,
480- date_parser = date_parser ,
481- thousands = thousands ,
482- comment = comment ,
483- skipfooter = skipfooter ,
484- convert_float = convert_float ,
485- mangle_dupe_cols = mangle_dupe_cols ,
486- ** kwds )
487-
488- def _parse_excel (self ,
489- sheet_name = 0 ,
490- header = 0 ,
491- names = None ,
492- index_col = None ,
493- usecols = None ,
494- squeeze = False ,
495- dtype = None ,
496- true_values = None ,
497- false_values = None ,
498- skiprows = None ,
499- nrows = None ,
500- na_values = None ,
501- verbose = False ,
502- parse_dates = False ,
503- date_parser = None ,
504- thousands = None ,
505- comment = None ,
506- skipfooter = 0 ,
507- convert_float = True ,
508- mangle_dupe_cols = True ,
509- ** kwds ):
510438
511439 _validate_header_arg (header )
512440
513- if 'chunksize' in kwds :
514- raise NotImplementedError ("chunksize keyword of read_excel "
515- "is not implemented" )
516-
517441 from xlrd import (xldate , XL_CELL_DATE ,
518442 XL_CELL_ERROR , XL_CELL_BOOLEAN ,
519443 XL_CELL_NUMBER )
@@ -563,7 +487,7 @@ def _parse_cell(cell_contents, cell_typ):
563487 sheets = sheet_name
564488 ret_dict = True
565489 elif sheet_name is None :
566- sheets = self .sheet_names
490+ sheets = self .book . sheet_names ()
567491 ret_dict = True
568492 else :
569493 sheets = [sheet_name ]
@@ -678,9 +602,111 @@ def _parse_cell(cell_contents, cell_typ):
678602 else :
679603 return output [asheetname ]
680604
605+
606+ class ExcelFile (object ):
607+ """
608+ Class for parsing tabular excel sheets into DataFrame objects.
609+ Uses xlrd. See read_excel for more documentation
610+
611+ Parameters
612+ ----------
613+ io : string, path object (pathlib.Path or py._path.local.LocalPath),
614+ file-like object or xlrd workbook
615+ If a string or path object, expected to be a path to xls or xlsx file.
616+ engine : string, default None
617+ If io is not a buffer or path, this must be set to identify io.
618+ Acceptable values are None or ``xlrd``.
619+ """
620+
621+ _engines = {
622+ 'xlrd' : _XlrdReader ,
623+ }
624+
625+ def __init__ (self , io , engine = None ):
626+ if engine is None :
627+ engine = 'xlrd'
628+ if engine not in self ._engines :
629+ raise ValueError ("Unknown engine: {engine}" .format (engine = engine ))
630+
631+ # could be a str, ExcelFile, Book, etc.
632+ self .io = io
633+ # Always a string
634+ self ._io = _stringify_path (io )
635+
636+ self ._reader = self ._engines [engine ](self ._io )
637+
638+ def __fspath__ (self ):
639+ return self ._io
640+
641+ def parse (self ,
642+ sheet_name = 0 ,
643+ header = 0 ,
644+ names = None ,
645+ index_col = None ,
646+ usecols = None ,
647+ squeeze = False ,
648+ converters = None ,
649+ true_values = None ,
650+ false_values = None ,
651+ skiprows = None ,
652+ nrows = None ,
653+ na_values = None ,
654+ parse_dates = False ,
655+ date_parser = None ,
656+ thousands = None ,
657+ comment = None ,
658+ skipfooter = 0 ,
659+ convert_float = True ,
660+ mangle_dupe_cols = True ,
661+ ** kwds ):
662+ """
663+ Parse specified sheet(s) into a DataFrame
664+
665+ Equivalent to read_excel(ExcelFile, ...) See the read_excel
666+ docstring for more info on accepted parameters
667+ """
668+
669+ # Can't use _deprecate_kwarg since sheetname=None has a special meaning
670+ if is_integer (sheet_name ) and sheet_name == 0 and 'sheetname' in kwds :
671+ warnings .warn ("The `sheetname` keyword is deprecated, use "
672+ "`sheet_name` instead" , FutureWarning , stacklevel = 2 )
673+ sheet_name = kwds .pop ("sheetname" )
674+ elif 'sheetname' in kwds :
675+ raise TypeError ("Cannot specify both `sheet_name` "
676+ "and `sheetname`. Use just `sheet_name`" )
677+
678+ if 'chunksize' in kwds :
679+ raise NotImplementedError ("chunksize keyword of read_excel "
680+ "is not implemented" )
681+
682+ return self ._reader .parse (sheet_name = sheet_name ,
683+ header = header ,
684+ names = names ,
685+ index_col = index_col ,
686+ usecols = usecols ,
687+ squeeze = squeeze ,
688+ converters = converters ,
689+ true_values = true_values ,
690+ false_values = false_values ,
691+ skiprows = skiprows ,
692+ nrows = nrows ,
693+ na_values = na_values ,
694+ parse_dates = parse_dates ,
695+ date_parser = date_parser ,
696+ thousands = thousands ,
697+ comment = comment ,
698+ skipfooter = skipfooter ,
699+ convert_float = convert_float ,
700+ mangle_dupe_cols = mangle_dupe_cols ,
701+ ** kwds )
702+
703+ @property
704+ def book (self ):
705+ return self ._reader .book
706+
681707 @property
682708 def sheet_names (self ):
683- return self .book .sheet_names ()
709+ return self ._reader .sheet_names
684710
685711 def close (self ):
686712 """close io if necessary"""
0 commit comments