Fixes for datapythonista's review

pandas-dev · jreback · Jan 2, 2020 · Dec 26, 2019 · Dec 26, 2019 · Dec 29, 2019
commit c19a2a5bc55697a2e9d79b51cb8492b6cbc7a958
diff --git a/ci/code_checks.sh b/ci/code_checks.sh
@@ -101,7 +101,11 @@ if [[ -z "$CHECK" || "$CHECK" == "lint" ]]; then
  RET=$(($RET + $?)) ; echo $MSG "DONE"
 
  MSG='Check for use of not concatenated strings' ; echo $MSG
- $BASE_DIR/scripts/validate_string_concatenation.py pandas
+ if [[ "$GITHUB_ACTIONS" == "true" ]]; then
+ $BASE_DIR/scripts/validate_string_concatenation.py --format="[error]{source_path}:{line_number}:String unnecessarily split in two by black. Please merge them manually." .
+ else
+ $BASE_DIR/scripts/validate_string_concatenation.py .
+ fi
  RET=$(($RET + $?)) ; echo $MSG "DONE"
 
  echo "isort --version-number"

diff --git a/scripts/validate_string_concatenation.py b/scripts/validate_string_concatenation.py
@@ -1,4 +1,4 @@
-#!/usr/bin/env python3
+#!/usr/bin/env python
 """
 GH #30454
 
@@ -12,7 +12,6 @@
 ... "baz"
 ... )
 
-
 into this:
 
 >>> foo = ("bar " "baz")
@@ -22,65 +21,102 @@
 so we are checking it here.
 """
 
+import argparse
 import os
 import sys
 import token
 import tokenize
-from typing import FrozenSet, Generator, List
+from typing import Dict, Generator, List
+
+FILE_EXTENSIONS_TO_CHECK = (".py", ".pyx", ".pyx.ini", ".pxd")
 
-FILE_EXTENSIONS_TO_CHECK: FrozenSet[str] = frozenset(
- (".pxd", ".py", ".pyx", ".pyx.ini")
-)
 
+def main(source_path: str, output_format: str) -> bool:
+ """
+ Main entry point of the script.
+
+ Parameters
+ ----------
+ source_path : str, default '.'
+ Source path representing path to a file/directory.
+ output_format : str
+ Output format of the script.
+
+ Returns
+ -------
+ bool
+ True if found any strings that needs to be concatenated.
 
-def strings_to_concatenate(file_path: str) -> Generator[str, None, None]:
+ Raises
+ ------
+ ValueError
+ If the `source_path` is not pointing to existing file/directory.
+ """
+ if not os.path.exists(source_path):
+ raise ValueError(
+ "Please enter a valid path, pointing to a valid file/directory."
+ )
+
+ is_failed: bool = False
+
+ if os.path.isfile(source_path):
+ for values in strings_to_concatenate(source_path):
+ is_failed = True
+ print(output_format.format(**values))
+
+ for subdir, _, files in os.walk(source_path):
+ for file_name in files:
+ if any(
+ file_name.endswith(extension) for extension in FILE_EXTENSIONS_TO_CHECK
+ ):
+ for values in strings_to_concatenate(os.path.join(subdir, file_name)):
+ is_failed = True
+ print(output_format.format(**values))
- print(output_format.format(**values))
+ msg = "String unnecessarily split in two by black. Please merge them manually."
+ print(output_format.format(source_path=source_path, line_number=line_number, msg=msg))
- print(output_format.format(**values))
+ msg = "String unnecessarily split in two by black. Please merge them manually."
+ print(output_format.format(source_path=source_path, line_number=line_number, msg=msg))
+ return is_failed
+
+
+def strings_to_concatenate(source_path: str) -> Generator[Dict[str, str], None, None]:
  """
  Yielding the strings that needs to be concatenated in a given file.
 
  Parameters
  ----------
- file_path : str
+ source_path : str
  File path pointing to a single file.
 
  Yields
  ------
- str
- Message containing info about the string that needs to be concatenated.
+ dict of {str: str}
+ Containing:
+ source_path
+ Source file path.
+ line_number
+ Line number of unconcatenated string.
  """
- with open(file_path, "r") as file_name:
+ with open(source_path, "r") as file_name:
  tokens: List = list(tokenize.generate_tokens(file_name.readline))
 
  for current_token, next_token in zip(tokens, tokens[1:]):
  if current_token[0] == next_token[0] == token.STRING:
- line_number = current_token[2][0]
- start = current_token[1]
- end = next_token[1]
- yield f"{file_path}:{line_number}:\t between {start} and {end}\n"
+ yield {"source_path": source_path, "line_number": current_token[2][0]}
 
 
 if __name__ == "__main__":
- path: str = sys.argv[1]
-
- if not os.path.exists(path):
- raise ValueError("Please enter a valid path, to a file/directory.")
-
- failed: bool = False
-
- if os.path.isfile(path):
- for msg in strings_to_concatenate(path):
- if msg:
- failed = True
- print(msg)
-
- for subdir, _, files in os.walk(path):
- for file_name in files:
- if any(
- file_name.endswith(extension) for extension in FILE_EXTENSIONS_TO_CHECK
- ):
- file_extension = os.path.join(subdir, file_name)
-
- for msg in strings_to_concatenate(os.path.join(subdir, file_name)):
- if msg:
- failed = True
- print(msg)
- sys.exit(failed)
+ parser = argparse.ArgumentParser(description="Validate concatenated strings")
+
+ parser.add_argument(
+ "path", nargs="?", default=".", help="Source path of file/directory to check."
+ )
+ parser.add_argument(
+ "--format",
+ "-f",
+ default=(
+ "{source_path}:{line_number}:String unnecessarily split in two by black. "
+ "Please merge them manually."
+ ),
+ help="Output format of the unconcatenated strings.",
+ )
+
+ args = parser.parse_args()
+
+ sys.exit(main(source_path=args.path, output_format=args.format))