88from datetime import date , datetime
99from io import StringIO
1010
11- from dateutil .parser import parse
11+ from dateutil .parser import parse as du_parse
12+ from hypothesis import given , settings , strategies as st
1213import numpy as np
1314import pytest
1415import pytz
1516
1617from pandas ._libs .tslib import Timestamp
1718from pandas ._libs .tslibs import parsing
18- from pandas .compat import lrange
19+ from pandas ._libs .tslibs .parsing import parse_datetime_string
20+ from pandas .compat import is_platform_windows , lrange
1921from pandas .compat .numpy import np_array_datetime64_compat
2022
2123import pandas as pd
2628import pandas .io .date_converters as conv
2729import pandas .io .parsers as parsers
2830
31+ # constant
32+ _DEFAULT_DATETIME = datetime (1 , 1 , 1 )
33+
34+ # Strategy for hypothesis
35+ if is_platform_windows ():
36+ date_strategy = st .datetimes (min_value = datetime (1900 , 1 , 1 ))
37+ else :
38+ date_strategy = st .datetimes ()
39+
2940
3041def test_separator_date_conflict (all_parsers ):
3142 # Regression test for gh-4678
@@ -439,7 +450,7 @@ def test_parse_dates_custom_euro_format(all_parsers, kwargs):
439450"""
440451 if "dayfirst" in kwargs :
441452 df = parser .read_csv (StringIO (data ), names = ["time" , "Q" , "NTU" ],
442- date_parser = lambda d : parse (d , ** kwargs ),
453+ date_parser = lambda d : du_parse (d , ** kwargs ),
443454 header = 0 , index_col = 0 , parse_dates = True ,
444455 na_values = ["NA" ])
445456 exp_index = Index ([datetime (2010 , 1 , 31 ), datetime (2010 , 2 , 1 ),
@@ -451,7 +462,7 @@ def test_parse_dates_custom_euro_format(all_parsers, kwargs):
451462 msg = "got an unexpected keyword argument 'day_first'"
452463 with pytest .raises (TypeError , match = msg ):
453464 parser .read_csv (StringIO (data ), names = ["time" , "Q" , "NTU" ],
454- date_parser = lambda d : parse (d , ** kwargs ),
465+ date_parser = lambda d : du_parse (d , ** kwargs ),
455466 skiprows = [0 ], index_col = 0 , parse_dates = True ,
456467 na_values = ["NA" ])
457468
@@ -849,3 +860,82 @@ def test_parse_timezone(all_parsers):
849860
850861 expected = DataFrame (expected_data )
851862 tm .assert_frame_equal (result , expected )
863+
864+
865+ @pytest .mark .parametrize ("date_string" , [
866+ "32/32/2019" ,
867+ "02/30/2019" ,
868+ "13/13/2019" ,
869+ "13/2019" ,
870+ "a3/11/2018" ,
871+ "10/11/2o17"
872+ ])
873+ def test_invalid_parse_delimited_date (all_parsers , date_string ):
874+ parser = all_parsers
875+ expected = DataFrame ({0 : [date_string ]}, dtype = "object" )
876+ result = parser .read_csv (StringIO (date_string ),
877+ header = None , parse_dates = [0 ])
878+ tm .assert_frame_equal (result , expected )
879+
880+
881+ @pytest .mark .parametrize ("date_string,dayfirst,expected" , [
882+ # %d/%m/%Y; month > 12 thus replacement
883+ ("13/02/2019" , False , datetime (2019 , 2 , 13 )),
884+ ("13/02/2019" , True , datetime (2019 , 2 , 13 )),
885+ # %m/%d/%Y; day > 12 thus there will be no replacement
886+ ("02/13/2019" , False , datetime (2019 , 2 , 13 )),
887+ ("02/13/2019" , True , datetime (2019 , 2 , 13 )),
888+ # %d/%m/%Y; dayfirst==True thus replacement
889+ ("04/02/2019" , True , datetime (2019 , 2 , 4 ))
890+ ])
891+ def test_parse_delimited_date_swap (all_parsers , date_string ,
892+ dayfirst , expected ):
893+ parser = all_parsers
894+ expected = DataFrame ({0 : [expected ]}, dtype = "datetime64[ns]" )
895+ result = parser .read_csv (StringIO (date_string ), header = None ,
896+ dayfirst = dayfirst , parse_dates = [0 ])
897+ tm .assert_frame_equal (result , expected )
898+
899+
900+ def _helper_hypothesis_delimited_date (call , date_string , ** kwargs ):
901+ msg , result = None , None
902+ try :
903+ result = call (date_string , ** kwargs )
904+ except ValueError as er :
905+ msg = str (er )
906+ pass
907+ return msg , result
908+
909+
910+ @given (date_strategy )
911+ @settings (deadline = None )
912+ @pytest .mark .parametrize ("delimiter" , list (" -./" ))
913+ @pytest .mark .parametrize ("dayfirst" , [True , False ])
914+ @pytest .mark .parametrize ("date_format" , [
915+ "%d %m %Y" ,
916+ "%m %d %Y" ,
917+ "%m %Y" ,
918+ "%Y %m %d" ,
919+ "%y %m %d" ,
920+ "%Y%m%d" ,
921+ "%y%m%d" ,
922+ ])
923+ def test_hypothesis_delimited_date (date_format , dayfirst ,
924+ delimiter , test_datetime ):
925+ if date_format == "%m %Y" and delimiter == "." :
926+ pytest .skip ("parse_datetime_string cannot reliably tell whether \
927+ e.g. %m.%Y is a float or a date, thus we skip it" )
928+ result , expected = None , None
929+ except_in_dateutil , except_out_dateutil = None , None
930+ date_string = test_datetime .strftime (date_format .replace (' ' , delimiter ))
931+
932+ except_out_dateutil , result = _helper_hypothesis_delimited_date (
933+ parse_datetime_string , date_string ,
934+ dayfirst = dayfirst )
935+ except_in_dateutil , expected = _helper_hypothesis_delimited_date (
936+ du_parse , date_string ,
937+ default = _DEFAULT_DATETIME ,
938+ dayfirst = dayfirst , yearfirst = False )
939+
940+ assert except_out_dateutil == except_in_dateutil
941+ assert result == expected
0 commit comments