aboutsummaryrefslogtreecommitdiffstats
path: root/tests/TestScripts/testSharding.py
diff options
context:
space:
mode:
authorLexi Winter <lexi@le-fay.org>2025-06-29 19:25:29 +0100
committerLexi Winter <lexi@le-fay.org>2025-06-29 19:25:29 +0100
commitbc524d70253a4ab2fe40c3ca3e5666e267c0a4d1 (patch)
tree1e629e7b46b1d9972a973bc93fd100bcebd395be /tests/TestScripts/testSharding.py
downloadnihil-vendor/catch2.tar.gz
nihil-vendor/catch2.tar.bz2
Diffstat (limited to 'tests/TestScripts/testSharding.py')
-rwxr-xr-xtests/TestScripts/testSharding.py165
1 files changed, 165 insertions, 0 deletions
diff --git a/tests/TestScripts/testSharding.py b/tests/TestScripts/testSharding.py
new file mode 100755
index 0000000..fa6f94d
--- /dev/null
+++ b/tests/TestScripts/testSharding.py
@@ -0,0 +1,165 @@
+#!/usr/bin/env python3
+
+# Copyright Catch2 Authors
+# Distributed under the Boost Software License, Version 1.0.
+# (See accompanying file LICENSE.txt or copy at
+# https://www.boost.org/LICENSE_1_0.txt)
+
+# SPDX-License-Identifier: BSL-1.0
+
+"""
+This test script verifies that sharding tests does change which tests are run.
+This is done by running the binary multiple times, once to list all the tests,
+once per shard to list the tests for that shard, and once again per shard to
+execute the tests. The sharded lists are compared to the full list to ensure
+none are skipped, duplicated, and that the order remains the same.
+"""
+
+import random
+import subprocess
+import sys
+import xml.etree.ElementTree as ET
+
+from collections import namedtuple
+
+from typing import List, Dict
+
+seed = random.randint(0, 2 ** 32 - 1)
+number_of_shards = 5
+
+def make_base_commandline(self_test_exe):
+ return [
+ self_test_exe,
+ '--reporter', 'xml',
+ '--order', 'rand',
+ '--rng-seed', str(seed),
+ "[generators]~[benchmarks]~[.]"
+ ]
+
+
+def list_tests(self_test_exe: str, extra_args: List[str] = None):
+ cmd = make_base_commandline(self_test_exe) + ['--list-tests']
+ if extra_args:
+ cmd.extend(extra_args)
+
+ try:
+ ret = subprocess.run(cmd,
+ stdout = subprocess.PIPE,
+ stderr = subprocess.PIPE,
+ timeout = 10,
+ check = True,
+ universal_newlines = True)
+ except subprocess.CalledProcessError as ex:
+ print('Could not list tests:\n{}'.format(ex.stderr))
+
+ if ret.stderr:
+ raise RuntimeError("Unexpected error output:\n" + ret.stderr)
+
+ root = ET.fromstring(ret.stdout)
+ result = [elem.text for elem in root.findall('./TestCase/Name')]
+
+ if len(result) < 2:
+ raise RuntimeError("Unexpectedly few tests listed (got {})".format(
+ len(result)))
+
+
+ return result
+
+
+def execute_tests(self_test_exe: str, extra_args: List[str] = None):
+ cmd = make_base_commandline(self_test_exe)
+ if extra_args:
+ cmd.extend(extra_args)
+
+ try:
+ ret = subprocess.run(cmd,
+ stdout = subprocess.PIPE,
+ stderr = subprocess.PIPE,
+ timeout = 10,
+ check = True,
+ universal_newlines = True)
+ except subprocess.CalledProcessError as ex:
+ print('Could not list tests:\n{}'.format(ex.stderr))
+
+ if ret.stderr:
+ raise RuntimeError("Unexpected error output:\n" + process.stderr)
+
+ root = ET.fromstring(ret.stdout)
+ result = [elem.attrib["name"] for elem in root.findall('./TestCase')]
+
+ if len(result) < 2:
+ raise RuntimeError("Unexpectedly few tests listed (got {})".format(
+ len(result)))
+
+ return result
+
+
+def test_sharded_listing(self_test_exe: str) -> Dict[int, List[str]]:
+ """
+ Asks the test binary for list of all tests, and also for lists of
+ tests from shards.
+
+ The combination of shards is then checked whether it corresponds to
+ the full list of all tests.
+
+ Returns the dictionary of shard-index => listed tests for later use.
+ """
+ all_tests = list_tests(self_test_exe)
+ big_shard_tests = list_tests(self_test_exe, ['--shard-count', '1', '--shard-index', '0'])
+
+ assert all_tests == big_shard_tests, (
+ "No-sharding test list does not match the listing of big shard:\nNo shard:\n{}\n\nWith shard:\n{}\n".format(
+ '\n'.join(all_tests),
+ '\n'.join(big_shard_tests)
+ )
+ )
+
+ shard_listings = dict()
+ for shard_idx in range(number_of_shards):
+ shard_listings[shard_idx] = list_tests(self_test_exe, ['--shard-count', str(number_of_shards), '--shard-index', str(shard_idx)])
+
+ shard_sizes = [len(v) for v in shard_listings.values()]
+ assert len(all_tests) == sum(shard_sizes)
+
+ # Check that the shards have roughly the right sizes (e.g. we don't
+ # have all tests in single shard and the others are empty)
+ differences = [abs(x1 - x2) for x1, x2 in zip(shard_sizes, shard_sizes[1:])]
+ assert all(diff <= 1 for diff in differences), "A shard has weird size: {}".format(shard_sizes)
+
+ combined_shards = [inner for outer in shard_listings.values() for inner in outer]
+ assert all_tests == combined_shards, (
+ "All tests and combined shards disagree.\nNo shard:\n{}\n\nCombined:\n{}\n\n".format(
+ '\n'.join(all_tests),
+ '\n'.join(combined_shards)
+ )
+ )
+ shard_listings[-1] = all_tests
+
+ return shard_listings
+
+
+def test_sharded_execution(self_test_exe: str, listings: Dict[int, List[str]]):
+ """
+ Runs the test binary and checks that the executed tests match the
+ previously listed tests.
+
+ Also does this for various shard indices, and that the combination
+ of all shards matches the full run/listing.
+ """
+ all_tests = execute_tests(self_test_exe)
+ big_shard_tests = execute_tests(self_test_exe, ['--shard-count', '1', '--shard-index', '0'])
+ assert all_tests == big_shard_tests
+
+ assert listings[-1] == all_tests
+
+ for shard_idx in range(number_of_shards):
+ assert listings[shard_idx] == execute_tests(self_test_exe, ['--shard-count', str(number_of_shards), '--shard-index', str(shard_idx)])
+
+
+def main():
+ self_test_exe, = sys.argv[1:]
+ listings = test_sharded_listing(self_test_exe)
+ test_sharded_execution(self_test_exe, listings)
+
+if __name__ == '__main__':
+ sys.exit(main())