From 4e1556ed4d43da1f930b3fcf0fc20d827a34f3d2 Mon Sep 17 00:00:00 2001 From: Patrick Hilhorst Date: Sat, 1 Jan 2022 22:35:20 +0100 Subject: [PATCH] nixos/test-driver: add polling_condition --- .../writing-nixos-tests.section.md | 51 ++ .../writing-nixos-tests.section.xml | 858 ++++++++++-------- nixos/lib/test-driver/test_driver/driver.py | 40 +- nixos/lib/test-driver/test_driver/machine.py | 6 + .../test_driver/polling_condition.py | 90 ++ nixos/tests/vscodium.nix | 50 +- 6 files changed, 679 insertions(+), 416 deletions(-) create mode 100644 nixos/lib/test-driver/test_driver/polling_condition.py diff --git a/nixos/doc/manual/development/writing-nixos-tests.section.md b/nixos/doc/manual/development/writing-nixos-tests.section.md index d9749d37da79..7de57d0d2a37 100644 --- a/nixos/doc/manual/development/writing-nixos-tests.section.md +++ b/nixos/doc/manual/development/writing-nixos-tests.section.md @@ -88,6 +88,8 @@ starting them in parallel: start_all() ``` +## Machine objects {#ssec-machine-objects} + The following methods are available on machine objects: `start` @@ -313,3 +315,52 @@ repository): # fmt: on ''; ``` + +## Failing tests early {#ssec-failing-tests-early} + +To fail tests early when certain invariables are no longer met (instead of waiting for the build to time out), the decorator `polling_condition` is provided. For example, if we are testing a program `foo` that should not quit after being started, we might write the following: + +```py +@polling_condition +def foo_running(): + machine.succeed("pgrep -x foo") + + +machine.succeed("foo --start") +machine.wait_until_succeeds("pgrep -x foo") + +with foo_running: + ... # Put `foo` through its paces +``` + + +`polling_condition` takes the following (optional) arguments: + +`seconds_interval` + +: + specifies how often the condition should be polled: + + ```py + @polling_condition(seconds_interval=10) + def foo_running(): + machine.succeed("pgrep -x foo") + ``` + +`description` + +: + is used in the log when the condition is checked. If this is not provided, the description is pulled from the docstring of the function. These two are therefore equivalent: + + ```py + @polling_condition + def foo_running(): + "check that foo is running" + machine.succeed("pgrep -x foo") + ``` + + ```py + @polling_condition(description="check that foo is running") + def foo_running(): + machine.succeed("pgrep -x foo") + ``` diff --git a/nixos/doc/manual/from_md/development/writing-nixos-tests.section.xml b/nixos/doc/manual/from_md/development/writing-nixos-tests.section.xml index 0d523681b639..45c9c40c6095 100644 --- a/nixos/doc/manual/from_md/development/writing-nixos-tests.section.xml +++ b/nixos/doc/manual/from_md/development/writing-nixos-tests.section.xml @@ -117,407 +117,413 @@ if not "Linux" in machine.succeed("uname"): start_all() - - The following methods are available on machine objects: - - - - - start - - - - Start the virtual machine. This method is asynchronous — it - does not wait for the machine to finish booting. - - - - - - shutdown - - - - Shut down the machine, waiting for the VM to exit. - - - - - - crash - - - - Simulate a sudden power failure, by telling the VM to exit - immediately. - - - - - - block - - - - Simulate unplugging the Ethernet cable that connects the - machine to the other machines. - - - - - - unblock - - - - Undo the effect of block. - - - - - - screenshot - - - - Take a picture of the display of the virtual machine, in PNG - format. The screenshot is linked from the HTML log. - - - - - - get_screen_text_variants - - - - Return a list of different interpretations of what is - currently visible on the machine's screen using optical - character recognition. The number and order of the - interpretations is not specified and is subject to change, but - if no exception is raised at least one will be returned. - - +
+ Machine objects + + The following methods are available on machine objects: + + + + + start + + - This requires passing enableOCR to the - test attribute set. + Start the virtual machine. This method is asynchronous — it + does not wait for the machine to finish booting. - - - - - - get_screen_text - - - - Return a textual representation of what is currently visible - on the machine's screen using optical character recognition. - - + + + + + shutdown + + - This requires passing enableOCR to the - test attribute set. + Shut down the machine, waiting for the VM to exit. - - - - - - send_monitor_command - - - - Send a command to the QEMU monitor. This is rarely used, but - allows doing stuff such as attaching virtual USB disks to a - running machine. - - - - - - send_key - - - - Simulate pressing keys on the virtual keyboard, e.g., - send_key("ctrl-alt-delete"). - - - - - - send_chars - - - - Simulate typing a sequence of characters on the virtual - keyboard, e.g., - send_chars("foobar\n") will type - the string foobar followed by the Enter - key. - - - - - - execute - - - - Execute a shell command, returning a list - (status, stdout). If the command detaches, - it must close stdout, as execute will wait - for this to consume all output reliably. This can be achieved - by redirecting stdout to stderr >&2, - to /dev/console, - /dev/null or a file. Examples of detaching - commands are sleep 365d &, where the - shell forks a new process that can write to stdout and - xclip -i, where the - xclip command itself forks without closing - stdout. Takes an optional parameter - check_return that defaults to - True. Setting this parameter to - False will not check for the return code - and return -1 instead. This can be used for commands that shut - down the VM and would therefore break the pipe that would be - used for retrieving the return code. - - - - - - succeed - - - - Execute a shell command, raising an exception if the exit - status is not zero, otherwise returning the standard output. - Commands are run with set -euo pipefail - set: - - - - - If several commands are separated by ; - and one fails, the command as a whole will fail. - - - - - For pipelines, the last non-zero exit status will be - returned (if there is one, zero will be returned - otherwise). - - - - - Dereferencing unset variables fail the command. - - - - - It will wait for stdout to be closed. See - execute for the implications. - - - - - - - - fail - - - - Like succeed, but raising an exception if - the command returns a zero status. - - - - - - wait_until_succeeds - - - - Repeat a shell command with 1-second intervals until it - succeeds. - - - - - - wait_until_fails - - - - Repeat a shell command with 1-second intervals until it fails. - - - - - - wait_for_unit - - - - Wait until the specified systemd unit has reached the - active state. - - - - - - wait_for_file - - - - Wait until the specified file exists. - - - - - - wait_for_open_port - - - - Wait until a process is listening on the given TCP port (on - localhost, at least). - - - - - - wait_for_closed_port - - - - Wait until nobody is listening on the given TCP port. - - - - - - wait_for_x - - - - Wait until the X11 server is accepting connections. - - - - - - wait_for_text - - - - Wait until the supplied regular expressions matches the - textual contents of the screen by using optical character - recognition (see get_screen_text and - get_screen_text_variants). - - + + + + + crash + + - This requires passing enableOCR to the - test attribute set. + Simulate a sudden power failure, by telling the VM to exit + immediately. - - - - - - wait_for_console_text - - - - Wait until the supplied regular expressions match a line of - the serial console output. This method is useful when OCR is - not possibile or accurate enough. - - - - - - wait_for_window - - - - Wait until an X11 window has appeared whose name matches the - given regular expression, e.g., - wait_for_window("Terminal"). - - - - - - copy_from_host - - - - Copies a file from host to machine, e.g., - copy_from_host("myfile", "/etc/my/important/file"). - - - The first argument is the file on the host. The file needs to - be accessible while building the nix derivation. The second - argument is the location of the file on the machine. - - - - - - systemctl - - - - Runs systemctl commands with optional - support for systemctl --user - - + + + + + block + + + + Simulate unplugging the Ethernet cable that connects the + machine to the other machines. + + + + + + unblock + + + + Undo the effect of block. + + + + + + screenshot + + + + Take a picture of the display of the virtual machine, in PNG + format. The screenshot is linked from the HTML log. + + + + + + get_screen_text_variants + + + + Return a list of different interpretations of what is + currently visible on the machine's screen using optical + character recognition. The number and order of the + interpretations is not specified and is subject to change, + but if no exception is raised at least one will be returned. + + + + This requires passing enableOCR to the + test attribute set. + + + + + + + get_screen_text + + + + Return a textual representation of what is currently visible + on the machine's screen using optical character recognition. + + + + This requires passing enableOCR to the + test attribute set. + + + + + + + send_monitor_command + + + + Send a command to the QEMU monitor. This is rarely used, but + allows doing stuff such as attaching virtual USB disks to a + running machine. + + + + + + send_key + + + + Simulate pressing keys on the virtual keyboard, e.g., + send_key("ctrl-alt-delete"). + + + + + + send_chars + + + + Simulate typing a sequence of characters on the virtual + keyboard, e.g., + send_chars("foobar\n") will + type the string foobar followed by the + Enter key. + + + + + + execute + + + + Execute a shell command, returning a list + (status, stdout). If the command + detaches, it must close stdout, as + execute will wait for this to consume all + output reliably. This can be achieved by redirecting stdout + to stderr >&2, to + /dev/console, + /dev/null or a file. Examples of + detaching commands are sleep 365d &, + where the shell forks a new process that can write to stdout + and xclip -i, where the + xclip command itself forks without + closing stdout. Takes an optional parameter + check_return that defaults to + True. Setting this parameter to + False will not check for the return code + and return -1 instead. This can be used for commands that + shut down the VM and would therefore break the pipe that + would be used for retrieving the return code. + + + + + + succeed + + + + Execute a shell command, raising an exception if the exit + status is not zero, otherwise returning the standard output. + Commands are run with set -euo pipefail + set: + + + + + If several commands are separated by + ; and one fails, the command as a + whole will fail. + + + + + For pipelines, the last non-zero exit status will be + returned (if there is one, zero will be returned + otherwise). + + + + + Dereferencing unset variables fail the command. + + + + + It will wait for stdout to be closed. See + execute for the implications. + + + + + + + + fail + + + + Like succeed, but raising an exception if + the command returns a zero status. + + + + + + wait_until_succeeds + + + + Repeat a shell command with 1-second intervals until it + succeeds. + + + + + + wait_until_fails + + + + Repeat a shell command with 1-second intervals until it + fails. + + + + + + wait_for_unit + + + + Wait until the specified systemd unit has reached the + active state. + + + + + + wait_for_file + + + + Wait until the specified file exists. + + + + + + wait_for_open_port + + + + Wait until a process is listening on the given TCP port (on + localhost, at least). + + + + + + wait_for_closed_port + + + + Wait until nobody is listening on the given TCP port. + + + + + + wait_for_x + + + + Wait until the X11 server is accepting connections. + + + + + + wait_for_text + + + + Wait until the supplied regular expressions matches the + textual contents of the screen by using optical character + recognition (see get_screen_text and + get_screen_text_variants). + + + + This requires passing enableOCR to the + test attribute set. + + + + + + + wait_for_console_text + + + + Wait until the supplied regular expressions match a line of + the serial console output. This method is useful when OCR is + not possibile or accurate enough. + + + + + + wait_for_window + + + + Wait until an X11 window has appeared whose name matches the + given regular expression, e.g., + wait_for_window("Terminal"). + + + + + + copy_from_host + + + + Copies a file from host to machine, e.g., + copy_from_host("myfile", "/etc/my/important/file"). + + + The first argument is the file on the host. The file needs + to be accessible while building the nix derivation. The + second argument is the location of the file on the machine. + + + + + + systemctl + + + + Runs systemctl commands with optional + support for systemctl --user + + machine.systemctl("list-jobs --no-pager") # runs `systemctl list-jobs --no-pager` machine.systemctl("list-jobs --no-pager", "any-user") # spawns a shell for `any-user` and runs `systemctl --user list-jobs --no-pager` - - - - - shell_interact - - - - Allows you to directly interact with the guest shell. This - should only be used during test development, not in production - tests. Killing the interactive session with - Ctrl-d or Ctrl-c also - ends the guest session. - - - - - - To test user units declared by - systemd.user.services the optional - user argument can be used: - - + + + + + shell_interact + + + + Allows you to directly interact with the guest shell. This + should only be used during test development, not in + production tests. Killing the interactive session with + Ctrl-d or Ctrl-c also + ends the guest session. + + + + + + To test user units declared by + systemd.user.services the optional + user argument can be used: + + machine.start() machine.wait_for_x() machine.wait_for_unit("xautolock.service", "x-session-user") - - This applies to systemctl, - get_unit_info, wait_for_unit, - start_job and stop_job. - - - For faster dev cycles it's also possible to disable the code-linters - (this shouldn't be commited though): - - + + This applies to systemctl, + get_unit_info, + wait_for_unit, start_job and + stop_job. + + + For faster dev cycles it's also possible to disable the + code-linters (this shouldn't be commited though): + + import ./make-test-python.nix { skipLint = true; machine = @@ -531,13 +537,13 @@ import ./make-test-python.nix { ''; } - - This will produce a Nix warning at evaluation time. To fully disable - the linter, wrap the test script in comment directives to disable - the Black linter directly (again, don't commit this within the - Nixpkgs repository): - - + + This will produce a Nix warning at evaluation time. To fully + disable the linter, wrap the test script in comment directives to + disable the Black linter directly (again, don't commit this within + the Nixpkgs repository): + + testScript = '' # fmt: off @@ -545,4 +551,66 @@ import ./make-test-python.nix { # fmt: on ''; +
+
+ Failing tests early + + To fail tests early when certain invariables are no longer met + (instead of waiting for the build to time out), the decorator + polling_condition is provided. For example, if + we are testing a program foo that should not + quit after being started, we might write the following: + + +@polling_condition +def foo_running(): + machine.succeed("pgrep -x foo") + + +machine.succeed("foo --start") +machine.wait_until_succeeds("pgrep -x foo") + +with foo_running: + ... # Put `foo` through its paces + + + polling_condition takes the following + (optional) arguments: + + + seconds_interval + + + : specifies how often the condition should be polled: + + +```py +@polling_condition(seconds_interval=10) +def foo_running(): + machine.succeed("pgrep -x foo") +``` + + + description + + + : is used in the log when the condition is checked. If this is not + provided, the description is pulled from the docstring of the + function. These two are therefore equivalent: + + +```py +@polling_condition +def foo_running(): + "check that foo is running" + machine.succeed("pgrep -x foo") +``` + +```py +@polling_condition(description="check that foo is running") +def foo_running(): + machine.succeed("pgrep -x foo") +``` + +
diff --git a/nixos/lib/test-driver/test_driver/driver.py b/nixos/lib/test-driver/test_driver/driver.py index f3af98537ad6..e22f9ee7a757 100644 --- a/nixos/lib/test-driver/test_driver/driver.py +++ b/nixos/lib/test-driver/test_driver/driver.py @@ -1,12 +1,13 @@ from contextlib import contextmanager from pathlib import Path -from typing import Any, Dict, Iterator, List +from typing import Any, Dict, Iterator, List, Union, Optional, Callable, ContextManager import os import tempfile from test_driver.logger import rootlog from test_driver.machine import Machine, NixStartScript, retry from test_driver.vlan import VLan +from test_driver.polling_condition import PollingCondition class Driver: @@ -16,6 +17,7 @@ class Driver: tests: str vlans: List[VLan] machines: List[Machine] + polling_conditions: List[Callable] def __init__( self, @@ -36,12 +38,15 @@ class Driver: for s in scripts: yield NixStartScript(s) + self.polling_conditions = [] + self.machines = [ Machine( start_command=cmd, keep_vm_state=keep_vm_state, name=cmd.machine_name, tmp_dir=tmp_dir, + fail_early=self.fail_early, ) for cmd in cmd(start_scripts) ] @@ -84,6 +89,7 @@ class Driver: retry=retry, serial_stdout_off=self.serial_stdout_off, serial_stdout_on=self.serial_stdout_on, + polling_condition=self.polling_condition, Machine=Machine, # for typing ) machine_symbols = {m.name: m for m in self.machines} @@ -159,3 +165,35 @@ class Driver: def serial_stdout_off(self) -> None: rootlog._print_serial_logs = False + + def fail_early(self) -> bool: + return any(not f() for f in self.polling_conditions) + + def polling_condition( + self, + fun_: Optional[Callable] = None, + *, + seconds_interval: float = 2.0, + description: Optional[str] = None, + ) -> Union[Callable[[Callable], ContextManager], ContextManager]: + driver = self + + class Poll: + def __init__(self, fun: Callable): + self.condition = PollingCondition( + fun, + seconds_interval, + description, + ).check + + def __enter__(self) -> None: + driver.polling_conditions.append(self.condition) + + def __exit__(self, a, b, c) -> None: # type: ignore + res = driver.polling_conditions.pop() + assert res is self.condition + + if fun_ is None: + return Poll + else: + return Poll(fun_) diff --git a/nixos/lib/test-driver/test_driver/machine.py b/nixos/lib/test-driver/test_driver/machine.py index b3dbe5126fcc..dbf9fd244861 100644 --- a/nixos/lib/test-driver/test_driver/machine.py +++ b/nixos/lib/test-driver/test_driver/machine.py @@ -17,6 +17,7 @@ import threading import time from test_driver.logger import rootlog +from test_driver.polling_condition import PollingCondition, coopmulti CHAR_TO_KEY = { "A": "shift-a", @@ -318,6 +319,7 @@ class Machine: # Store last serial console lines for use # of wait_for_console_text last_lines: Queue = Queue() + fail_early: Callable def __repr__(self) -> str: return f"" @@ -329,12 +331,14 @@ class Machine: name: str = "machine", keep_vm_state: bool = False, allow_reboot: bool = False, + fail_early: Callable = lambda: False, ) -> None: self.tmp_dir = tmp_dir self.keep_vm_state = keep_vm_state self.allow_reboot = allow_reboot self.name = name self.start_command = start_command + self.fail_early = fail_early # set up directories self.shared_dir = self.tmp_dir / "shared-xchg" @@ -405,6 +409,7 @@ class Machine: break return answer + @coopmulti def send_monitor_command(self, command: str) -> str: with self.nested("sending monitor command: {}".format(command)): message = ("{}\n".format(command)).encode() @@ -506,6 +511,7 @@ class Machine: break return "".join(output_buffer) + @coopmulti def execute( self, command: str, check_return: bool = True, timeout: Optional[int] = 900 ) -> Tuple[int, str]: diff --git a/nixos/lib/test-driver/test_driver/polling_condition.py b/nixos/lib/test-driver/test_driver/polling_condition.py new file mode 100644 index 000000000000..f38dea71376e --- /dev/null +++ b/nixos/lib/test-driver/test_driver/polling_condition.py @@ -0,0 +1,90 @@ +from typing import Callable, Optional, Any, List, Dict +from functools import wraps + +import time + +from .logger import rootlog + + +class PollingConditionFailed(Exception): + pass + + +def coopmulti(fun: Callable, *, machine: Any = None) -> Callable: + assert not (fun is None and machine is None) + + def inner(fun_: Callable) -> Any: + @wraps(fun_) + def wrapper(*args: List[Any], **kwargs: Dict[str, Any]) -> Any: + this_machine = args[0] if machine is None else machine + + if this_machine.fail_early(): # type: ignore + raise PollingConditionFailed("Action interrupted early...") + + return fun_(*args, **kwargs) + + return wrapper + + if fun is None: + return inner + else: + return inner(fun) + + +class PollingCondition: + condition: Callable[[], bool] + seconds_interval: float + description: Optional[str] + + last_called: float + entered: bool + + def __init__( + self, + condition: Callable[[], Optional[bool]], + seconds_interval: float = 2.0, + description: Optional[str] = None, + ): + self.condition = condition # type: ignore + self.seconds_interval = seconds_interval + + if description is None: + self.description = condition.__doc__ + else: + self.description = str(description) + + self.last_called = float("-inf") + self.entered = False + + def check(self) -> bool: + if self.entered or not self.overdue: + return True + + with self, rootlog.nested(self.nested_message): + rootlog.info(f"Time since last: {time.monotonic() - self.last_called:.2f}s") + try: + res = self.condition() # type: ignore + except Exception: + res = False + res = res is None or res + rootlog.info(f"Polling condition {'succeeded' if res else 'failed'}") + return res + + @property + def nested_message(self) -> str: + nested_message = ["Checking polling condition"] + if self.description is not None: + nested_message.append(repr(self.description)) + + return " ".join(nested_message) + + @property + def overdue(self) -> bool: + return self.last_called + self.seconds_interval < time.monotonic() + + def __enter__(self) -> None: + self.entered = True + + def __exit__(self, exc_type, exc_value, traceback) -> None: # type: ignore + self.entered = False + self.last_called = time.monotonic() diff --git a/nixos/tests/vscodium.nix b/nixos/tests/vscodium.nix index 43a0d61c856f..66baea73ec62 100644 --- a/nixos/tests/vscodium.nix +++ b/nixos/tests/vscodium.nix @@ -34,36 +34,46 @@ let }; enableOCR = true; testScript = '' + @polling_condition + def codium_running(): + machine.succeed('pgrep -x codium') + + start_all() machine.wait_for_unit('graphical.target') machine.wait_until_succeeds('pgrep -x codium') - # Wait until vscodium is visible. "File" is in the menu bar. - machine.wait_for_text('File') - machine.screenshot('start_screen') + with codium_running: + # Wait until vscodium is visible. "File" is in the menu bar. + machine.wait_for_text('Get Started') + machine.screenshot('start_screen') - test_string = 'testfile' + test_string = 'testfile' - # Create a new file - machine.send_key('ctrl-n') - machine.wait_for_text('Untitled') - machine.screenshot('empty_editor') + # Create a new file + machine.send_key('ctrl-n') + machine.wait_for_text('Untitled') + machine.screenshot('empty_editor') - # Type a string - machine.send_chars(test_string) - machine.wait_for_text(test_string) - machine.screenshot('editor') + # Type a string + machine.send_chars(test_string) + machine.wait_for_text(test_string) + machine.screenshot('editor') - # Save the file - machine.send_key('ctrl-s') - machine.wait_for_text('Save') - machine.screenshot('save_window') - machine.send_key('ret') + # Save the file + machine.send_key('ctrl-s') + machine.wait_for_text('Save') + machine.screenshot('save_window') + machine.send_key('ret') - # (the default filename is the first line of the file) - machine.wait_for_file(f'/home/alice/{test_string}') + # (the default filename is the first line of the file) + machine.wait_for_file(f'/home/alice/{test_string}') + + machine.send_key('ctrl-q') + machine.wait_until_fails('pgrep -x codium') ''; }); -in builtins.mapAttrs (k: v: mkTest k v { }) tests +in +builtins.mapAttrs (k: v: mkTest k v { }) tests