python/bin/generate_tests.py

#!/usr/bin/env python3
"""
Generates exercise test suites using an exercise's canonical-data.json
(found in problem-specifications) and $exercise/.meta/template.j2.
If either does not exist, generation will not be attempted.

Usage:
    generate_tests.py           Generates tests for all exercises
    generate_tests.py two-fer   Generates tests for two-fer exercise
    generate_tests.py t*        Generates tests for all exercises matching t*

    generate_tests.py --check           Checks if test files are out of sync with templates
    generate_tests.py --check two-fer   Checks if two-fer test file is out of sync with template
"""
import sys

from githelp import Repo

_py = sys.version_info
if _py.major < 3 or (_py.major == 3 and _py.minor < 7):
    print("Python version must be at least 3.7")
    sys.exit(1)

import argparse
from datetime import datetime
from datetime import timezone
import difflib
import filecmp
import importlib.util
import json
import logging
from pathlib import Path, PurePath, PureWindowsPath
import re
import shutil
from itertools import repeat
from string import punctuation, whitespace
from subprocess import check_call
from tempfile import NamedTemporaryFile
from textwrap import wrap
from typing import Any, Dict, List, NoReturn, Union

# Tomli was subsumed into Python 3.11.x, but was renamed to to tomllib.
# This avoids ci failures for Python < 3.11.2.
try:
    import tomllib
except ModuleNotFoundError:
    import tomli as tomllib

from jinja2 import Environment, FileSystemLoader, TemplateNotFound, UndefinedError
from dateutil.parser import parse

from githelp import clone_if_missing, Repo
from data import TestsTOML

VERSION = "0.3.0"

TypeJSON = Dict[str, Any]

PROBLEM_SPEC_REPO = "https://github.com/exercism/problem-specifications.git"
DEFAULT_SPEC_LOCATION = Path(".problem-specifications")
RGX_WORDS = re.compile(r"[-_\s]|(?=[A-Z])")

logging.basicConfig()
logger = logging.getLogger("generator")
logger.setLevel(logging.WARN)


def replace_all(string: str, chars: Union[str, List[str]], rep: str) -> str:
    """
    Replace any char in chars with rep, reduce runs and strip terminal ends.
    """
    trans = str.maketrans(dict(zip(chars, repeat(rep))))
    return re.sub("{0}+".format(re.escape(rep)), rep, string.translate(trans)).strip(
        rep
    )


def to_snake(string: str, wordchars_only: bool = False) -> str:
    """
    Convert pretty much anything to to_snake.

    By default whitespace and punctuation will be converted
    to underscores as well, pass wordchars_only=True to preserve these as is.
    """
    clean = re.sub("(.)([A-Z][a-z]+)", r"\1_\2", string)
    clean = re.sub("([a-z0-9])([A-Z])", r"\1_\2", clean).lower()
    return clean if wordchars_only else replace_all(clean, whitespace + punctuation, "_")


def camel_case(string: str) -> str:
    """
    Convert pretty much anything to CamelCase.
    """
    return "".join(w.title() for w in to_snake(string).split("_"))


def wrap_overlong(string: str, width: int = 70) -> List[str]:
    """
    Break an overly long string literal into escaped lines.
    """
    return ["{0!r} \\".format(w) for w in wrap(string, width)]


def parse_datetime(string: str, strip_module: bool = False) -> datetime:
    """
    Parse a (hopefully ISO 8601) datestamp to a datetime object and
    return its repr for use in a jinja2 template.

    If used the template will need to import the datetime module.

        import datetime

    However if strip_module is True then the template will need to
    import the datetime _class_ instead.

        from datetime import datetime
    """
    result = repr(parse(string))
    if strip_module:
        return result.replace("datetime.", "", 1)
    return result

INVALID_ESCAPE_RE = re.compile(
    r"""
    \\(?!                           # a backslash NOT followed by
        newline                     # the literal newline
    |[                              # OR precisely one of
        \\                          # another backslash
        '                           # the single quote
        "                           # the double quote
        a                           # the ASCII bell
        b                           # the ASCII backspace
        f                           # the ASCII formfeed
        n                           # the ASCII linefeed
        r                           # the ASCII carriage return
        t                           # the ASCII horizontal tab
        v                           # the ASCII vertical tab
    ]|                              # OR
        o(?:[0-8]{1,3})             # an octal value
    |                               # OR
        x(?:[0-9A-Fa-f]{2})         # a hexidecimal value
    |                               # OR
        N                           # a unicode char name composed of
        \{                          # an opening brace
            [A-Z][A-Z\ \-]*[A-Z]    # uppercase WORD, WORDs (or WORD-WORDs)
        \}                          # and a closing brace
    |                               # OR
        u(?:[0-9A-Fa-f]{4})         # a 16-bit unicode char
    |                               # OR
        U(?:[0-9A-Fa-f]{8})         # a 32-bit unicode char
    )""", flags=re.VERBOSE)

def escape_invalid_escapes(string: str) -> str:
    """
    Some canonical data includes invalid escape sequences, which
    need to be properly escaped before template render.
    """
    return INVALID_ESCAPE_RE.sub(r"\\\\", string)

ALL_VALID = r"\newline\\\'\"\a\b\f\n\r\t\v\o123" \
            r"\xFF\N{GREATER-THAN SIGN}\u0394\U00000394"

assert ALL_VALID == escape_invalid_escapes(ALL_VALID)

def get_tested_properties(spec: TypeJSON) -> List[str]:
    """
    Get set of tested properties from spec. Include nested cases.
    """
    props = set()
    for case in spec["cases"]:
        if "property" in case:
            props.add(case["property"])
        if "cases" in case:
            props.update(get_tested_properties(case))
    return sorted(props)


def error_case(case: TypeJSON) -> bool:
    return (
        "expected" in case
        and isinstance(case["expected"], dict)
        and "error" in case["expected"]
    )


def has_error_case(cases: List[TypeJSON]) -> bool:
    cases = cases[:]
    while cases:
        case = cases.pop(0)
        if error_case(case):
            return True
        cases.extend(case.get("cases", []))
    return False


def regex_replace(s: str, find: str, repl: str) -> str:
    return re.sub(find, repl, s)


def regex_find(s: str, find: str) -> List[Any]:
    return re.findall(find, s)


def regex_split(s: str, find: str) -> List[str]:
    return re.split(find, s)


def filter_test_cases(cases: List[TypeJSON], opts: TestsTOML) -> List[TypeJSON]:
    """
    Returns a filtered copy of `cases` where only cases whose UUID is marked True in
    `opts` are included.
    """
    filtered = []
    for case in cases:
        if "uuid" in case:
            uuid = case["uuid"]
            case_opts = opts.cases.get(uuid, None)
            if case_opts is not None and case_opts.include:
                filtered.append(case)
            else:
                logger.debug(f"uuid {uuid} either missing or not marked for include")
        elif "cases" in case:
            subfiltered = filter_test_cases(case["cases"], opts)
            if subfiltered:
                case_copy = dict(case)
                case_copy["cases"] = subfiltered
                filtered.append(case_copy)
    return filtered


def load_canonical(exercise: str, spec_path: Path, test_opts: TestsTOML) -> TypeJSON:
    """
    Loads the canonical data for an exercise as a nested dictionary
    """
    full_path = spec_path / "exercises" / exercise / "canonical-data.json"
    with full_path.open() as f:
        spec = json.load(f)
    spec["cases"] = filter_test_cases(spec["cases"], test_opts)
    spec["properties"] = get_tested_properties(spec)
    return spec


def load_additional_tests(exercise: Path) -> List[TypeJSON]:
    """
    Loads additional tests from .meta/additional_tests.json
    """
    full_path = exercise / ".meta/additional_tests.json"
    try:
        with full_path.open() as f:
            data = json.load(f)
        return data.get("cases", [])
    except FileNotFoundError:
        return []


def format_file(path: Path) -> NoReturn:
    """
    Runs black auto-formatter on file at path
    """
    check_call(["black", "-q", path])


def check_template(slug: str, tests_path: Path, tmpfile: Path):
    """Generate a new test file and diff against existing file.

    Note:  The timestamp in each test file creates issues with
           Python difflib, so it is skipped when being prepped
           for diff.

           You can see this "skipping" on lines 281 & 283.
           However, this rather crude method creates
           an empty "false positive" diff.  This empty diff is
           then skipped in lines 293 & 294, so that it can be
           considered a pass..
    """

    try:
        check_ok = True
        if not tmpfile.is_file():
            logger.debug(f"{slug}: tmp file {tmpfile} not found")
            check_ok = False
        if not tests_path.is_file():
            logger.debug(f"{slug}: tests file {tests_path} not found")
            check_ok = False
        if check_ok and not filecmp.cmp(tmpfile, tests_path):
            with tests_path.open() as f:
                current_lines = f.readlines()[3:]
            with tmpfile.open() as f:
                rendered_lines = f.readlines()[3:]

            diff = list(difflib.unified_diff(
                current_lines,
                rendered_lines,
                fromfile=f"[current] {tests_path.name}",
                tofile=f"[generated] {tmpfile.name}",
                lineterm="\n",
            ))
            if not diff:
                check_ok = True
            else:
                logger.debug(f"{slug}: ##### DIFF START #####")
                for line in diff:
                    logger.debug(line.strip())
                logger.debug(f"{slug}: ##### DIFF END #####")
                check_ok = False
        if not check_ok:
            logger.error(
                f"{slug}: check failed; tests must be regenerated with bin/generate_tests.py"
            )
            return False
        logger.debug(f"{slug}: check passed")
    finally:
        logger.debug(f"{slug}: removing tmp file {tmpfile}")
        tmpfile.unlink()
    return True


def generate_exercise(env: Environment, spec_path: Path, exercise: Path, check: bool = False):
    """
    Renders test suite for exercise and if check is:
    True: verifies that current tests file matches rendered
    False: saves rendered to tests file
    """
    slug = exercise.name
    meta_dir = exercise / ".meta"
    plugins_module = None
    plugins_name = "plugins"
    plugins_source = meta_dir / f"{plugins_name}.py"
    try:
        if plugins_source.is_file():
            plugins_spec = importlib.util.spec_from_file_location(
                plugins_name, plugins_source
            )
            plugins_module = importlib.util.module_from_spec(plugins_spec)
            sys.modules[plugins_name] = plugins_module
            plugins_spec.loader.exec_module(plugins_module)
        try:
            test_opts = TestsTOML.load(meta_dir / "tests.toml")
        except FileNotFoundError:
            logger.error(f"{slug}: tests.toml not found; skipping.")
            return True

        spec = load_canonical(slug, spec_path, test_opts)
        additional_tests = load_additional_tests(exercise)
        spec["additional_cases"] = additional_tests
        template_path = exercise.relative_to("exercises") / ".meta/template.j2"

        # See https://github.com/pallets/jinja/issues/767 for why this is needed on Windows systems.
        if "\\" in str(template_path):
            template_path = PureWindowsPath(template_path).as_posix()

        template = env.get_template(str(template_path))
        tests_path = exercise / f"{to_snake(slug)}_test.py"
        spec["has_error_case"] = has_error_case(spec["cases"])

        if plugins_module is not None:
            spec[plugins_name] = plugins_module
        logger.debug(f"{slug}: attempting render")
        rendered = template.render(**spec)
        with NamedTemporaryFile("w", delete=False) as tmp:
            logger.debug(f"{slug}: writing render to tmp file {tmp.name}")
            tmpfile = Path(tmp.name)
            tmp.write(rendered)
        try:
            logger.debug(f"{slug}: formatting tmp file {tmpfile}")
            format_file(tmpfile)
        except FileNotFoundError as e:
            logger.error(f"{slug}: the black utility must be installed")
            return False

        if check:
            return check_template(slug, tests_path, tmpfile)
        else:
            logger.debug(f"{slug}: moving tmp file {tmpfile}->{tests_path}")
            shutil.move(tmpfile, tests_path)
            print(f"{slug} generated at {tests_path}")
    except (TypeError, UndefinedError, SyntaxError) as e:
        logger.debug(str(e))
        logger.error(f"{slug}: generation failed")
        return False
    except TemplateNotFound as e:
        logger.debug(str(e))
        logger.info(f"{slug}: no template found; skipping")
    except FileNotFoundError as e:
        logger.debug(str(e))
        logger.info(f"{slug}: no canonical data found; skipping")
    return True


def generate(
    exercise_glob: str,
    spec_path: Path = DEFAULT_SPEC_LOCATION,
    stop_on_failure: bool = False,
    check: bool = False,
    **_,
):
    """
    Primary entry point. Generates test files for all exercises matching exercise_glob
    """
    # black must be installed or all test files will error
    if not shutil.which("black"):
        logger.error("the black utility must be installed")
        sys.exit(1)
    loader = FileSystemLoader(["config", "exercises"])
    env = Environment(loader=loader, keep_trailing_newline=True)
    env.filters["to_snake"] = to_snake
    env.filters["camel_case"] = camel_case
    env.filters["wrap_overlong"] = wrap_overlong
    env.filters["regex_replace"] = regex_replace
    env.filters["regex_find"] = regex_find
    env.filters["regex_split"] = regex_split
    env.filters["zip"] = zip
    env.filters["parse_datetime"] = parse_datetime
    env.filters["escape_invalid_escapes"] = escape_invalid_escapes
    env.globals["current_date"] = datetime.now(tz=timezone.utc).date()
    env.tests["error_case"] = error_case
    result = True
    for exercise in sorted(Path("exercises/practice").glob(exercise_glob)):
        if not generate_exercise(env, spec_path, exercise, check):
            result = False
            if stop_on_failure:
                break
    if not result:
        sys.exit(1)


if __name__ == "__main__":
    parser = argparse.ArgumentParser()
    parser.add_argument("exercise_glob", nargs="?", default="*", metavar="EXERCISE")
    parser.add_argument(
        "--version",
        action="version",
        version="%(prog)s {} for Python {}".format(VERSION, sys.version.split("\n")[0]),
    )
    parser.add_argument("-v", "--verbose", action="store_true")
    parser.add_argument(
        "-p",
        "--spec-path",
        default=DEFAULT_SPEC_LOCATION,
        type=Path,
        help=(
            "path to clone of exercism/problem-specifications " "(default: %(default)s)"
        ),
    )
    parser.add_argument("--stop-on-failure", action="store_true")
    parser.add_argument(
        "--check",
        action="store_true",
        help="check if tests are up-to-date, but do not modify test files",
    )
    opts = parser.parse_args()
    if opts.verbose:
        logger.setLevel(logging.DEBUG)
    with clone_if_missing(repo=Repo.ProblemSpecifications, directory=opts.spec_path):
        generate(**opts.__dict__)