CoCalc -- run_doctest.py

GitHub Repository: pola-rs/polars
Path: blob/main/py-polars/tests/docs/run_doctest.py
⁶⁹³⁹ views
1
"""
2
Run all doctest examples of the `polars` module using Python's built-in doctest module.
3

4
How to check examples: run this script, if exits with code 0, all is good. Otherwise,
5
the errors will be reported.
6

7
How to modify behaviour for doctests:
8
1. if you would like code to be run and output checked: add the output below the code
9
   block
10
2. if you would like code to be run (and thus checked whether it actually not fails),
11
   but output not be checked: add `# doctest: +IGNORE_RESULT` to the code block. You may
12
   still add example output.
13
3. if you would not like code to run: add `#doctest: +SKIP`. You may still add example
14
   output.
15

16
Notes
17
-----
18
* Doctest does not have a built-in IGNORE_RESULT directive. We have a number of tests
19
  where we want to ensure that the code runs, but the output may be random by design, or
20
  not interesting for us to check. To allow for this behaviour, a custom output checker
21
  has been created, see below.
22
* The doctests depend on the exact string representation staying the same. This may not
23
  be true in the future. For instance, in the past, the printout of DataFrames has
24
  changed from rounded corners to less rounded corners. To facilitate such a change,
25
  whilst not immediately having to add IGNORE_RESULT directives everywhere or changing
26
  all outputs, set `IGNORE_RESULT_ALL=True` below. Do note that this does mean no output
27
  is being checked anymore.
28
"""
29

30
from __future__ import annotations
31

32
import doctest
33
import importlib
34
import re
35
import sys
36
import unittest
37
import warnings
38
from pathlib import Path
39
from tempfile import TemporaryDirectory
40
from typing import TYPE_CHECKING, Any
41

42
import polars as pl
43

44
if TYPE_CHECKING:
45
    from collections.abc import Iterator
46
    from types import ModuleType
47

48

49
if sys.version_info < (3, 12):
50
    # Tests that print an OrderedDict fail (e.g. DataFrame.schema) as the repr
51
    # has changed in Python 3.12
52
    warnings.warn(
53
        "Certain doctests may fail when running on a Python version below 3.12."
54
        " Update your Python version to 3.12 or later to make sure all tests pass.",
55
        stacklevel=2,
56
    )
57

58
# associate specific doctest method names with optional modules.
59
# if the module is found in the environment those doctests will
60
# run; if the module is not found, their doctests are skipped.
61
OPTIONAL_MODULES_AND_METHODS: dict[str, set[str]] = {
62
    "jax": {"to_jax"},
63
    "torch": {"to_torch"},
64
}
65
OPTIONAL_MODULES: set[str] = set()
66
SKIP_METHODS: set[str] = set()
67

68
for mod, methods in OPTIONAL_MODULES_AND_METHODS.items():
69
    try:
70
        importlib.import_module(mod)
71
    except ImportError:  # noqa: PERF203
72
        SKIP_METHODS.update(methods)
73
        OPTIONAL_MODULES.add(mod)
74

75

76
def doctest_teardown(d: doctest.DocTest) -> None:
77
    # don't let config changes or string cache state leak between tests
78
    pl.Config.restore_defaults()
79

80

81
def modules_in_path(p: Path) -> Iterator[ModuleType]:
82
    for file in p.rglob("*.py"):
83
        # Construct path as string for import, for instance "dataframe.frame".
84
        # (The -3 drops the ".py")
85
        try:
86
            file_name_import = ".".join(file.relative_to(p).parts)[:-3]
87
            temp_module = importlib.import_module(p.name + "." + file_name_import)
88
            yield temp_module
89
        except ImportError as err:  # noqa: PERF203
90
            if not any(re.search(rf"\b{mod}\b", str(err)) for mod in OPTIONAL_MODULES):
91
                raise
92

93

94
class FilteredTestSuite(unittest.TestSuite):  # noqa: D101
95
    def __iter__(self) -> Iterator[Any]:
96
        for suite in self._tests:
97
            suite._tests = [  # type: ignore[attr-defined]
98
                test
99
                for test in suite._tests  # type: ignore[attr-defined]
100
                if test.id().rsplit(".", 1)[-1] not in SKIP_METHODS
101
            ]
102
            yield suite
103

104

105
if __name__ == "__main__":
106
    # set to True to just run the code, and do not check any output.
107
    # Will still report errors if the code is invalid
108
    IGNORE_RESULT_ALL = False
109

110
    # Below the implementation of the IGNORE_RESULT directive
111
    # You can ignore the result of a doctest by adding "doctest: +IGNORE_RESULT" into
112
    # the code block. The difference with SKIP is that if the code errors on running,
113
    # that will still be reported.
114
    IGNORE_RESULT = doctest.register_optionflag("IGNORE_RESULT")
115

116
    # Set doctests to fail on warnings
117
    warnings.simplefilter("error", Warning)
118
    warnings.filterwarnings(
119
        "ignore",
120
        message="datetime.datetime.utcfromtimestamp\\(\\) is deprecated.*",
121
        category=DeprecationWarning,
122
    )
123
    warnings.filterwarnings(
124
        "ignore",
125
        message="datetime.datetime.utcnow\\(\\) is deprecated.*",
126
        category=DeprecationWarning,
127
    )
128

129
    OutputChecker = doctest.OutputChecker
130

131
    class IgnoreResultOutputChecker(OutputChecker):
132
        """Python doctest output checker with support for IGNORE_RESULT."""
133

134
        def check_output(self, want: str, got: str, optionflags: Any) -> bool:
135
            """Return True iff the actual output from an example matches the output."""
136
            if IGNORE_RESULT_ALL:
137
                return True
138
            if IGNORE_RESULT & optionflags:
139
                return True
140
            else:
141
                return OutputChecker.check_output(self, want, got, optionflags)
142

143
    doctest.OutputChecker = IgnoreResultOutputChecker  # type: ignore[misc]
144

145
    # Want to be relaxed about whitespace, strict on True vs 1, and allow '...' pattern
146
    doctest.NORMALIZE_WHITESPACE = True
147
    doctest.DONT_ACCEPT_TRUE_FOR_1 = True
148
    doctest.ELLIPSIS = True
149

150
    # If REPORT_NDIFF is turned on, it will report on line by line, character by
151
    # character, differences. The disadvantage is that you cannot just copy the output
152
    # directly into the docstring.
153
    # doctest.REPORT_NDIFF = True
154

155
    # __file__ returns the __init__.py, so grab the parent
156
    src_dir = Path(pl.__file__).parent
157

158
    with TemporaryDirectory() as tmpdir:
159
        # collect all tests
160
        tests = [
161
            doctest.DocTestSuite(
162
                m,
163
                extraglobs={"pl": pl, "dirpath": Path(tmpdir)},
164
                tearDown=doctest_teardown,
165
                optionflags=1,
166
            )
167
            for m in modules_in_path(src_dir)
168
        ]
169
        test_suite = FilteredTestSuite(tests)
170

171
        # Ensure that we clean up any artifacts produced by the doctests
172
        # with patch(pl.DataFrame.write_csv):
173
        # run doctests and report
174
        result = unittest.TextTestRunner().run(test_suite)
175
        success_flag = (result.testsRun > 0) & (len(result.failures) == 0)
176
        sys.exit(int(not success_flag))
177

178
Product

Resources

Company