CoCalc -- conftest.py

GitHub Repository: yt-project/yt
Path: blob/main/conftest.py
⁹²⁵ views
1
import os
2
import shutil
3
import sys
4
import tempfile
5
from importlib.metadata import version
6
from importlib.util import find_spec
7
from pathlib import Path
8

9
import pytest
10
import yaml
11
from packaging.version import Version
12

13
from yt.config import ytcfg
14
from yt.utilities.answer_testing.testing_utilities import (
15
    _compare_raw_arrays,
16
    _hash_results,
17
    _save_raw_arrays,
18
    _save_result,
19
    _streamline_for_io,
20
    data_dir_load,
21
)
22

23
NUMPY_VERSION = Version(version("numpy"))
24
PILLOW_VERSION = Version(version("Pillow"))
25
MATPLOTLIB_VERSION = Version(version("matplotlib"))
26

27
# setuptools does not ship with the standard lib starting in Python 3.12, so we need to
28
# be resilient if it's not available at runtime
29
if find_spec("setuptools") is not None:
30
    SETUPTOOLS_VERSION = Version(version("setuptools"))
31
else:
32
    SETUPTOOLS_VERSION = None
33

34
if find_spec("pandas") is not None:
35
    PANDAS_VERSION = Version(version("pandas"))
36
else:
37
    PANDAS_VERSION = None
38

39

40
def pytest_addoption(parser):
41
    """
42
    Lets options be passed to test functions.
43
    """
44
    parser.addoption(
45
        "--with-answer-testing",
46
        action="store_true",
47
    )
48
    parser.addoption(
49
        "--answer-store",
50
        action="store_true",
51
    )
52
    parser.addoption(
53
        "--answer-raw-arrays",
54
        action="store_true",
55
    )
56
    parser.addoption(
57
        "--raw-answer-store",
58
        action="store_true",
59
    )
60
    parser.addoption(
61
        "--force-overwrite",
62
        action="store_true",
63
    )
64
    parser.addoption(
65
        "--no-hash",
66
        action="store_true",
67
    )
68
    parser.addoption("--local-dir", default=None, help="Where answers are saved.")
69
    # Tell pytest about the local-dir option in the ini files. This
70
    # option is used for creating the answer directory on CI
71
    parser.addini(
72
        "local-dir",
73
        default=str(Path(__file__).parent / "answer-store"),
74
        help="answer directory.",
75
    )
76
    parser.addini(
77
        "test_data_dir",
78
        default=ytcfg.get("yt", "test_data_dir"),
79
        help="Directory where data for tests is stored.",
80
    )
81

82

83
def pytest_configure(config):
84
    r"""
85
    Reads in the tests/tests.yaml file. This file contains a list of
86
    each answer test's answer file (including the changeset number).
87
    """
88
    # Register custom marks for answer tests and big data
89
    config.addinivalue_line("markers", "answer_test: Run the answer tests.")
90
    config.addinivalue_line(
91
        "markers", "big_data: Run answer tests that require large data files."
92
    )
93
    for value in (
94
        # treat most warnings as errors
95
        "error",
96
        # >>> warnings emitted by testing frameworks, or in testing contexts
97
        # we still have some yield-based tests, awaiting for transition into pytest
98
        "ignore::pytest.PytestCollectionWarning",
99
        # matplotlib warnings related to the Agg backend which is used in CI, not much we can do about it
100
        "ignore:Matplotlib is currently using agg, which is a non-GUI backend, so cannot show the figure.:UserWarning",
101
        r"ignore:tight_layout.+falling back to Agg renderer:UserWarning",
102
        #
103
        # >>> warnings from wrong values passed to numpy
104
        # these should normally be curated out of the test suite but they are too numerous
105
        # to deal with in a reasonable time at the moment.
106
        "ignore:invalid value encountered in log10:RuntimeWarning",
107
        "ignore:divide by zero encountered in log10:RuntimeWarning",
108
        #
109
        # >>> there are many places in yt (most notably at the frontend level)
110
        # where we open files but never explicitly close them
111
        # Although this is in general bad practice, it can be intentional and
112
        # justified in contexts where reading speeds should be optimized.
113
        # It is not clear at the time of writing how to approach this,
114
        # so I'm going to ignore this class of warnings altogether for now.
115
        "ignore:unclosed file.*:ResourceWarning",
116
    ):
117
        config.addinivalue_line("filterwarnings", value)
118

119
    if SETUPTOOLS_VERSION is not None and SETUPTOOLS_VERSION >= Version("67.3.0"):
120
        # may be triggered by multiple dependencies
121
        # see https://github.com/glue-viz/glue/issues/2364
122
        # see https://github.com/matplotlib/matplotlib/issues/25244
123
        config.addinivalue_line(
124
            "filterwarnings",
125
            r"ignore:(Deprecated call to `pkg_resources\.declare_namespace\('.*'\)`\.\n)?"
126
            r"Implementing implicit namespace packages \(as specified in PEP 420\) "
127
            r"is preferred to `pkg_resources\.declare_namespace`\.:DeprecationWarning",
128
        )
129

130
    if SETUPTOOLS_VERSION is not None and SETUPTOOLS_VERSION >= Version("67.5.0"):
131
        # may be triggered by multiple dependencies
132
        # see https://github.com/glue-viz/glue/issues/2364
133
        # see https://github.com/matplotlib/matplotlib/issues/25244
134
        config.addinivalue_line(
135
            "filterwarnings",
136
            "ignore:pkg_resources is deprecated as an API:DeprecationWarning",
137
        )
138

139
    if NUMPY_VERSION >= Version("1.25"):
140
        if find_spec("h5py") is not None and (
141
            Version(version("h5py")) < Version("3.9")
142
        ):
143
            # https://github.com/h5py/h5py/pull/2242
144
            config.addinivalue_line(
145
                "filterwarnings",
146
                "ignore:`product` is deprecated as of NumPy 1.25.0:DeprecationWarning",
147
            )
148

149
    if PILLOW_VERSION >= Version("11.3.0") and MATPLOTLIB_VERSION <= Version("3.10.3"):
150
        # patched upstream: https://github.com/matplotlib/matplotlib/pull/30221
151
        config.addinivalue_line(
152
            "filterwarnings",
153
            r"ignore:'mode' parameter is deprecated:DeprecationWarning",
154
        )
155

156
    if PANDAS_VERSION is not None and PANDAS_VERSION >= Version("2.2.0"):
157
        config.addinivalue_line(
158
            "filterwarnings",
159
            r"ignore:\s*Pyarrow will become a required dependency of pandas:DeprecationWarning",
160
        )
161

162
    if sys.version_info >= (3, 12):
163
        # already patched (but not released) upstream:
164
        # https://github.com/dateutil/dateutil/pull/1285
165
        config.addinivalue_line(
166
            "filterwarnings",
167
            r"ignore:datetime\.datetime\.utcfromtimestamp\(\) is deprecated:DeprecationWarning",
168
        )
169

170
        if find_spec("ratarmount"):
171
            # On Python 3.12+, there is a deprecation warning when calling os.fork()
172
            # in a multi-threaded process. We use this mechanism to mount archives.
173
            config.addinivalue_line(
174
                "filterwarnings",
175
                r"ignore:This process \(pid=\d+\) is multi-threaded, use of fork\(\) "
176
                r"may lead to deadlocks in the child\."
177
                ":DeprecationWarning",
178
            )
179

180
    if find_spec("datatree"):
181
        # the cf_radial dependency arm-pyart<=1.9.2 installs the now deprecated
182
        # xarray-datatree package (which imports as datatree), which triggers
183
        # a bunch of runtimewarnings when importing xarray.
184
        # https://github.com/yt-project/yt/pull/5042#issuecomment-2457797694
185
        config.addinivalue_line(
186
            "filterwarnings",
187
            "ignore:" r"Engine.*loading failed.*" ":RuntimeWarning",
188
        )
189

190

191
def pytest_collection_modifyitems(config, items):
192
    r"""
193
    Decide which tests to skip based on command-line options.
194
    """
195
    # Set up the skip marks
196
    skip_answer = pytest.mark.skip(reason="--with-answer-testing not set.")
197
    skip_unit = pytest.mark.skip(reason="Running answer tests, so skipping unit tests.")
198
    skip_big = pytest.mark.skip(reason="--answer-big-data not set.")
199
    # Loop over every collected test function
200
    for item in items:
201
        # If it's an answer test and the appropriate CL option hasn't
202
        # been set, skip it
203
        if "answer_test" in item.keywords and not config.getoption(
204
            "--with-answer-testing"
205
        ):
206
            item.add_marker(skip_answer)
207
        # If it's an answer test that requires big data and the CL
208
        # option hasn't been set, skip it
209
        if (
210
            "big_data" in item.keywords
211
            and not config.getoption("--with-answer-testing")
212
            and not config.getoption("--answer-big-data")
213
        ):
214
            item.add_marker(skip_big)
215
        if "answer_test" not in item.keywords and config.getoption(
216
            "--with-answer-testing"
217
        ):
218
            item.add_marker(skip_unit)
219

220

221
def pytest_itemcollected(item):
222
    # Customize pytest-mpl decorator to add sensible defaults
223

224
    mpl_marker = item.get_closest_marker("mpl_image_compare")
225
    if mpl_marker is not None:
226
        # in a future version, pytest-mpl may gain an option for doing this:
227
        # https://github.com/matplotlib/pytest-mpl/pull/181
228
        mpl_marker.kwargs.setdefault("tolerance", 0.5)
229

230

231
def _param_list(request):
232
    r"""
233
    Saves the non-ds, non-fixture function arguments for saving to
234
    the answer file.
235
    """
236
    # pytest treats parameterized arguments as fixtures, so there's no
237
    # clean way to separate them out from other other fixtures (that I
238
    # know of), so we do it explicitly
239
    blacklist = [
240
        "hashing",
241
        "answer_file",
242
        "request",
243
        "answer_compare",
244
        "temp_dir",
245
        "orbit_traj",
246
        "etc_traj",
247
    ]
248
    test_params = {}
249
    for key, val in request.node.funcargs.items():
250
        if key not in blacklist:
251
            # For plotwindow, the callback arg is a tuple and the second
252
            # element contains a memory address, so we need to drop it.
253
            # The first element is the callback name, which is all that's
254
            # needed
255
            if key == "callback":
256
                val = val[0]
257
            test_params[key] = str(val)
258
    # Convert python-specific data objects (such as tuples) to a more
259
    # io-friendly format (in order to not have python-specific anchors
260
    # in the answer yaml file)
261
    test_params = _streamline_for_io(test_params)
262
    return test_params
263

264

265
def _get_answer_files(request):
266
    """
267
    Gets the path to where the hashed and raw answers are saved.
268
    """
269
    answer_file = f"{request.cls.__name__}_{request.cls.answer_version}.yaml"
270
    raw_answer_file = f"{request.cls.__name__}_{request.cls.answer_version}.h5"
271
    # Add the local-dir aspect of the path. If there's a command line value,
272
    # have that override the ini file value
273
    clLocalDir = request.config.getoption("--local-dir")
274
    iniLocalDir = request.config.getini("local-dir")
275
    if clLocalDir is not None:
276
        answer_file = os.path.join(os.path.expanduser(clLocalDir), answer_file)
277
        raw_answer_file = os.path.join(os.path.expanduser(clLocalDir), raw_answer_file)
278
    else:
279
        answer_file = os.path.join(os.path.expanduser(iniLocalDir), answer_file)
280
        raw_answer_file = os.path.join(os.path.expanduser(iniLocalDir), raw_answer_file)
281
    # Make sure we don't overwrite unless we mean to
282
    overwrite = request.config.getoption("--force-overwrite")
283
    storing = request.config.getoption("--answer-store")
284
    raw_storing = request.config.getoption("--raw-answer-store")
285
    raw = request.config.getoption("--answer-raw-arrays")
286
    if os.path.exists(answer_file) and storing and not overwrite:
287
        raise FileExistsError(
288
            "Use `--force-overwrite` to overwrite an existing answer file."
289
        )
290
    if os.path.exists(raw_answer_file) and raw_storing and raw and not overwrite:
291
        raise FileExistsError(
292
            "Use `--force-overwrite` to overwrite an existing raw answer file."
293
        )
294
    # If we do mean to overwrite, do so here by deleting the original file
295
    if os.path.exists(answer_file) and storing and overwrite:
296
        os.remove(answer_file)
297
    if os.path.exists(raw_answer_file) and raw_storing and raw and overwrite:
298
        os.remove(raw_answer_file)
299
    print(os.path.abspath(answer_file))
300
    return answer_file, raw_answer_file
301

302

303
@pytest.fixture(scope="function")
304
def hashing(request):
305
    r"""
306
    Handles initialization, generation, and saving of answer test
307
    result hashes.
308
    """
309
    no_hash = request.config.getoption("--no-hash")
310
    store_hash = request.config.getoption("--answer-store")
311
    raw = request.config.getoption("--answer-raw-arrays")
312
    raw_store = request.config.getoption("--raw-answer-store")
313
    # This check is so that, when checking if the answer file exists in
314
    # _get_answer_files, we don't continuously fail. With this check,
315
    # _get_answer_files is called once per class, despite this having function
316
    # scope
317
    if request.cls.answer_file is None:
318
        request.cls.answer_file, request.cls.raw_answer_file = _get_answer_files(
319
            request
320
        )
321
    if not no_hash and not store_hash and request.cls.saved_hashes is None:
322
        try:
323
            with open(request.cls.answer_file) as fd:
324
                request.cls.saved_hashes = yaml.safe_load(fd)
325
        except FileNotFoundError:
326
            module_filename = f"{request.function.__module__.replace('.', os.sep)}.py"
327
            with open(f"generate_test_{os.getpid()}.txt", "a") as fp:
328
                fp.write(f"{module_filename}::{request.cls.__name__}\n")
329
            pytest.fail(msg="Answer file not found.", pytrace=False)
330
    request.cls.hashes = {}
331
    # Load the saved answers if we're comparing. We don't do this for the raw
332
    # answers because those are huge
333
    yield
334
    # Get arguments and their values passed to the test (e.g., axis, field, etc.)
335
    params = _param_list(request)
336
    # Hash the test results. Don't save to request.cls.hashes so we still have
337
    # raw data, in case we want to work with that
338
    hashes = _hash_results(request.cls.hashes)
339
    # Add the other test parameters
340
    hashes.update(params)
341
    # Add the function name as the "master" key to the hashes dict
342
    hashes = {request.node.name: hashes}
343
    # Save hashes
344
    if not no_hash and store_hash:
345
        _save_result(hashes, request.cls.answer_file)
346
    # Compare hashes
347
    elif not no_hash and not store_hash:
348
        try:
349
            for test_name, test_hash in hashes.items():
350
                assert test_name in request.cls.saved_hashes
351
                assert test_hash == request.cls.saved_hashes[test_name]
352
        except AssertionError:
353
            pytest.fail(f"Comparison failure: {request.node.name}", pytrace=False)
354
    # Save raw data
355
    if raw and raw_store:
356
        _save_raw_arrays(
357
            request.cls.hashes, request.cls.raw_answer_file, request.node.name
358
        )
359
    # Compare raw data. This is done one test at a time because the
360
    # arrays can get quite large and storing everything in memory would
361
    # be bad
362
    if raw and not raw_store:
363
        _compare_raw_arrays(
364
            request.cls.hashes, request.cls.raw_answer_file, request.node.name
365
        )
366

367

368
@pytest.fixture(scope="function")
369
def temp_dir():
370
    r"""
371
    Creates a temporary directory needed by certain tests.
372
    """
373
    curdir = os.getcwd()
374
    if int(os.environ.get("GENERATE_YTDATA", 0)):
375
        tmpdir = os.getcwd()
376
    else:
377
        tmpdir = tempfile.mkdtemp()
378
    os.chdir(tmpdir)
379
    yield tmpdir
380
    os.chdir(curdir)
381
    if tmpdir != curdir:
382
        shutil.rmtree(tmpdir)
383

384

385
@pytest.fixture(scope="class")
386
def ds(request):
387
    # data_dir_load can take the cls, args, and kwargs. These optional
388
    # arguments, if present,  are given in a dictionary as the second
389
    # element of the list
390
    if isinstance(request.param, str):
391
        ds_fn = request.param
392
        opts = {}
393
    else:
394
        ds_fn, opts = request.param
395
    try:
396
        return data_dir_load(
397
            ds_fn, cls=opts.get("cls"), args=opts.get("args"), kwargs=opts.get("kwargs")
398
        )
399
    except FileNotFoundError:
400
        return pytest.skip(f"Data file: `{request.param}` not found.")
401

402

403
@pytest.fixture(scope="class")
404
def field(request):
405
    """
406
    Fixture for returning the field. Needed because indirect=True is
407
    used for loading the datasets.
408
    """
409
    return request.param
410

411

412
@pytest.fixture(scope="class")
413
def dobj(request):
414
    """
415
    Fixture for returning the ds_obj. Needed because indirect=True is
416
    used for loading the datasets.
417
    """
418
    return request.param
419

420

421
@pytest.fixture(scope="class")
422
def axis(request):
423
    """
424
    Fixture for returning the axis. Needed because indirect=True is
425
    used for loading the datasets.
426
    """
427
    return request.param
428

429

430
@pytest.fixture(scope="class")
431
def weight(request):
432
    """
433
    Fixture for returning the weight_field. Needed because
434
    indirect=True is used for loading the datasets.
435
    """
436
    return request.param
437

438

439
@pytest.fixture(scope="class")
440
def ds_repr(request):
441
    """
442
    Fixture for returning the string representation of a dataset.
443
    Needed because indirect=True is used for loading the datasets.
444
    """
445
    return request.param
446

447

448
@pytest.fixture(scope="class")
449
def Npart(request):
450
    """
451
    Fixture for returning the number of particles in a dataset.
452
    Needed because indirect=True is used for loading the datasets.
453
    """
454
    return request.param
455

456
Product

Resources

Company