From 4e1556ed4d43da1f930b3fcf0fc20d827a34f3d2 Mon Sep 17 00:00:00 2001
From: Patrick Hilhorst <git@hilhorst.be>
Date: Sat, 1 Jan 2022 22:35:20 +0100
Subject: [PATCH] nixos/test-driver: add polling_condition

---
 .../writing-nixos-tests.section.md            |  51 ++
 .../writing-nixos-tests.section.xml           | 858 ++++++++++--------
 nixos/lib/test-driver/test_driver/driver.py   |  40 +-
 nixos/lib/test-driver/test_driver/machine.py  |   6 +
 .../test_driver/polling_condition.py          |  90 ++
 nixos/tests/vscodium.nix                      |  50 +-
 6 files changed, 679 insertions(+), 416 deletions(-)
 create mode 100644 nixos/lib/test-driver/test_driver/polling_condition.py
diff --git a/nixos/doc/manual/development/writing-nixos-tests.section.md b/nixos/doc/manual/development/writing-nixos-tests.section.md
index d9749d37da79..7de57d0d2a37 100644
--- a/nixos/doc/manual/development/writing-nixos-tests.section.md
+++ b/nixos/doc/manual/development/writing-nixos-tests.section.md
@@ -88,6 +88,8 @@ starting them in parallel:
 start_all()
 ```
 
+## Machine objects {#ssec-machine-objects}
+
 The following methods are available on machine objects:
 
 `start`
@@ -313,3 +315,52 @@ repository):
       # fmt: on
     '';
 ```
+
+## Failing tests early {#ssec-failing-tests-early}
+
+To fail tests early when certain invariables are no longer met (instead of waiting for the build to time out), the decorator `polling_condition` is provided. For example, if we are testing a program `foo` that should not quit after being started, we might write the following:
+
+```py
+@polling_condition
+def foo_running():
+    machine.succeed("pgrep -x foo")
+
+
+machine.succeed("foo --start")
+machine.wait_until_succeeds("pgrep -x foo")
+
+with foo_running:
+    ...  # Put `foo` through its paces
+```
+
+
+`polling_condition` takes the following (optional) arguments:
+
+`seconds_interval`
+
+:
+    specifies how often the condition should be polled:
+
+    ```py
+    @polling_condition(seconds_interval=10)
+    def foo_running():
+        machine.succeed("pgrep -x foo")
+    ```
+
+`description`
+
+:
+    is used in the log when the condition is checked. If this is not provided, the description is pulled from the docstring of the function. These two are therefore equivalent:
+
+    ```py
+    @polling_condition
+    def foo_running():
+        "check that foo is running"
+        machine.succeed("pgrep -x foo")
+    ```
+
+    ```py
+    @polling_condition(description="check that foo is running")
+    def foo_running():
+        machine.succeed("pgrep -x foo")
+    ```
diff --git a/nixos/doc/manual/from_md/development/writing-nixos-tests.section.xml b/nixos/doc/manual/from_md/development/writing-nixos-tests.section.xml
index 0d523681b639..45c9c40c6095 100644
--- a/nixos/doc/manual/from_md/development/writing-nixos-tests.section.xml
+++ b/nixos/doc/manual/from_md/development/writing-nixos-tests.section.xml
@@ -117,407 +117,413 @@ if not &quot;Linux&quot; in machine.succeed(&quot;uname&quot;):
   <programlisting language="python">
 start_all()
 </programlisting>
-  <para>
-    The following methods are available on machine objects:
-  </para>
-  <variablelist>
-    <varlistentry>
-      <term>
-        <literal>start</literal>
-      </term>
-      <listitem>
-        <para>
-          Start the virtual machine. This method is asynchronous — it
-          does not wait for the machine to finish booting.
-        </para>
-      </listitem>
-    </varlistentry>
-    <varlistentry>
-      <term>
-        <literal>shutdown</literal>
-      </term>
-      <listitem>
-        <para>
-          Shut down the machine, waiting for the VM to exit.
-        </para>
-      </listitem>
-    </varlistentry>
-    <varlistentry>
-      <term>
-        <literal>crash</literal>
-      </term>
-      <listitem>
-        <para>
-          Simulate a sudden power failure, by telling the VM to exit
-          immediately.
-        </para>
-      </listitem>
-    </varlistentry>
-    <varlistentry>
-      <term>
-        <literal>block</literal>
-      </term>
-      <listitem>
-        <para>
-          Simulate unplugging the Ethernet cable that connects the
-          machine to the other machines.
-        </para>
-      </listitem>
-    </varlistentry>
-    <varlistentry>
-      <term>
-        <literal>unblock</literal>
-      </term>
-      <listitem>
-        <para>
-          Undo the effect of <literal>block</literal>.
-        </para>
-      </listitem>
-    </varlistentry>
-    <varlistentry>
-      <term>
-        <literal>screenshot</literal>
-      </term>
-      <listitem>
-        <para>
-          Take a picture of the display of the virtual machine, in PNG
-          format. The screenshot is linked from the HTML log.
-        </para>
-      </listitem>
-    </varlistentry>
-    <varlistentry>
-      <term>
-        <literal>get_screen_text_variants</literal>
-      </term>
-      <listitem>
-        <para>
-          Return a list of different interpretations of what is
-          currently visible on the machine's screen using optical
-          character recognition. The number and order of the
-          interpretations is not specified and is subject to change, but
-          if no exception is raised at least one will be returned.
-        </para>
-        <note>
+  <section xml:id="ssec-machine-objects">
+    <title>Machine objects</title>
+    <para>
+      The following methods are available on machine objects:
+    </para>
+    <variablelist>
+      <varlistentry>
+        <term>
+          <literal>start</literal>
+        </term>
+        <listitem>
           <para>
-            This requires passing <literal>enableOCR</literal> to the
-            test attribute set.
+            Start the virtual machine. This method is asynchronous — it
+            does not wait for the machine to finish booting.
           </para>
-        </note>
-      </listitem>
-    </varlistentry>
-    <varlistentry>
-      <term>
-        <literal>get_screen_text</literal>
-      </term>
-      <listitem>
-        <para>
-          Return a textual representation of what is currently visible
-          on the machine's screen using optical character recognition.
-        </para>
-        <note>
+        </listitem>
+      </varlistentry>
+      <varlistentry>
+        <term>
+          <literal>shutdown</literal>
+        </term>
+        <listitem>
           <para>
-            This requires passing <literal>enableOCR</literal> to the
-            test attribute set.
+            Shut down the machine, waiting for the VM to exit.
           </para>
-        </note>
-      </listitem>
-    </varlistentry>
-    <varlistentry>
-      <term>
-        <literal>send_monitor_command</literal>
-      </term>
-      <listitem>
-        <para>
-          Send a command to the QEMU monitor. This is rarely used, but
-          allows doing stuff such as attaching virtual USB disks to a
-          running machine.
-        </para>
-      </listitem>
-    </varlistentry>
-    <varlistentry>
-      <term>
-        <literal>send_key</literal>
-      </term>
-      <listitem>
-        <para>
-          Simulate pressing keys on the virtual keyboard, e.g.,
-          <literal>send_key(&quot;ctrl-alt-delete&quot;)</literal>.
-        </para>
-      </listitem>
-    </varlistentry>
-    <varlistentry>
-      <term>
-        <literal>send_chars</literal>
-      </term>
-      <listitem>
-        <para>
-          Simulate typing a sequence of characters on the virtual
-          keyboard, e.g.,
-          <literal>send_chars(&quot;foobar\n&quot;)</literal> will type
-          the string <literal>foobar</literal> followed by the Enter
-          key.
-        </para>
-      </listitem>
-    </varlistentry>
-    <varlistentry>
-      <term>
-        <literal>execute</literal>
-      </term>
-      <listitem>
-        <para>
-          Execute a shell command, returning a list
-          <literal>(status, stdout)</literal>. If the command detaches,
-          it must close stdout, as <literal>execute</literal> will wait
-          for this to consume all output reliably. This can be achieved
-          by redirecting stdout to stderr <literal>&gt;&amp;2</literal>,
-          to <literal>/dev/console</literal>,
-          <literal>/dev/null</literal> or a file. Examples of detaching
-          commands are <literal>sleep 365d &amp;</literal>, where the
-          shell forks a new process that can write to stdout and
-          <literal>xclip -i</literal>, where the
-          <literal>xclip</literal> command itself forks without closing
-          stdout. Takes an optional parameter
-          <literal>check_return</literal> that defaults to
-          <literal>True</literal>. Setting this parameter to
-          <literal>False</literal> will not check for the return code
-          and return -1 instead. This can be used for commands that shut
-          down the VM and would therefore break the pipe that would be
-          used for retrieving the return code.
-        </para>
-      </listitem>
-    </varlistentry>
-    <varlistentry>
-      <term>
-        <literal>succeed</literal>
-      </term>
-      <listitem>
-        <para>
-          Execute a shell command, raising an exception if the exit
-          status is not zero, otherwise returning the standard output.
-          Commands are run with <literal>set -euo pipefail</literal>
-          set:
-        </para>
-        <itemizedlist>
-          <listitem>
-            <para>
-              If several commands are separated by <literal>;</literal>
-              and one fails, the command as a whole will fail.
-            </para>
-          </listitem>
-          <listitem>
-            <para>
-              For pipelines, the last non-zero exit status will be
-              returned (if there is one, zero will be returned
-              otherwise).
-            </para>
-          </listitem>
-          <listitem>
-            <para>
-              Dereferencing unset variables fail the command.
-            </para>
-          </listitem>
-          <listitem>
-            <para>
-              It will wait for stdout to be closed. See
-              <literal>execute</literal> for the implications.
-            </para>
-          </listitem>
-        </itemizedlist>
-      </listitem>
-    </varlistentry>
-    <varlistentry>
-      <term>
-        <literal>fail</literal>
-      </term>
-      <listitem>
-        <para>
-          Like <literal>succeed</literal>, but raising an exception if
-          the command returns a zero status.
-        </para>
-      </listitem>
-    </varlistentry>
-    <varlistentry>
-      <term>
-        <literal>wait_until_succeeds</literal>
-      </term>
-      <listitem>
-        <para>
-          Repeat a shell command with 1-second intervals until it
-          succeeds.
-        </para>
-      </listitem>
-    </varlistentry>
-    <varlistentry>
-      <term>
-        <literal>wait_until_fails</literal>
-      </term>
-      <listitem>
-        <para>
-          Repeat a shell command with 1-second intervals until it fails.
-        </para>
-      </listitem>
-    </varlistentry>
-    <varlistentry>
-      <term>
-        <literal>wait_for_unit</literal>
-      </term>
-      <listitem>
-        <para>
-          Wait until the specified systemd unit has reached the
-          <quote>active</quote> state.
-        </para>
-      </listitem>
-    </varlistentry>
-    <varlistentry>
-      <term>
-        <literal>wait_for_file</literal>
-      </term>
-      <listitem>
-        <para>
-          Wait until the specified file exists.
-        </para>
-      </listitem>
-    </varlistentry>
-    <varlistentry>
-      <term>
-        <literal>wait_for_open_port</literal>
-      </term>
-      <listitem>
-        <para>
-          Wait until a process is listening on the given TCP port (on
-          <literal>localhost</literal>, at least).
-        </para>
-      </listitem>
-    </varlistentry>
-    <varlistentry>
-      <term>
-        <literal>wait_for_closed_port</literal>
-      </term>
-      <listitem>
-        <para>
-          Wait until nobody is listening on the given TCP port.
-        </para>
-      </listitem>
-    </varlistentry>
-    <varlistentry>
-      <term>
-        <literal>wait_for_x</literal>
-      </term>
-      <listitem>
-        <para>
-          Wait until the X11 server is accepting connections.
-        </para>
-      </listitem>
-    </varlistentry>
-    <varlistentry>
-      <term>
-        <literal>wait_for_text</literal>
-      </term>
-      <listitem>
-        <para>
-          Wait until the supplied regular expressions matches the
-          textual contents of the screen by using optical character
-          recognition (see <literal>get_screen_text</literal> and
-          <literal>get_screen_text_variants</literal>).
-        </para>
-        <note>
+        </listitem>
+      </varlistentry>
+      <varlistentry>
+        <term>
+          <literal>crash</literal>
+        </term>
+        <listitem>
           <para>
-            This requires passing <literal>enableOCR</literal> to the
-            test attribute set.
+            Simulate a sudden power failure, by telling the VM to exit
+            immediately.
           </para>
-        </note>
-      </listitem>
-    </varlistentry>
-    <varlistentry>
-      <term>
-        <literal>wait_for_console_text</literal>
-      </term>
-      <listitem>
-        <para>
-          Wait until the supplied regular expressions match a line of
-          the serial console output. This method is useful when OCR is
-          not possibile or accurate enough.
-        </para>
-      </listitem>
-    </varlistentry>
-    <varlistentry>
-      <term>
-        <literal>wait_for_window</literal>
-      </term>
-      <listitem>
-        <para>
-          Wait until an X11 window has appeared whose name matches the
-          given regular expression, e.g.,
-          <literal>wait_for_window(&quot;Terminal&quot;)</literal>.
-        </para>
-      </listitem>
-    </varlistentry>
-    <varlistentry>
-      <term>
-        <literal>copy_from_host</literal>
-      </term>
-      <listitem>
-        <para>
-          Copies a file from host to machine, e.g.,
-          <literal>copy_from_host(&quot;myfile&quot;, &quot;/etc/my/important/file&quot;)</literal>.
-        </para>
-        <para>
-          The first argument is the file on the host. The file needs to
-          be accessible while building the nix derivation. The second
-          argument is the location of the file on the machine.
-        </para>
-      </listitem>
-    </varlistentry>
-    <varlistentry>
-      <term>
-        <literal>systemctl</literal>
-      </term>
-      <listitem>
-        <para>
-          Runs <literal>systemctl</literal> commands with optional
-          support for <literal>systemctl --user</literal>
-        </para>
-        <programlisting language="python">
+        </listitem>
+      </varlistentry>
+      <varlistentry>
+        <term>
+          <literal>block</literal>
+        </term>
+        <listitem>
+          <para>
+            Simulate unplugging the Ethernet cable that connects the
+            machine to the other machines.
+          </para>
+        </listitem>
+      </varlistentry>
+      <varlistentry>
+        <term>
+          <literal>unblock</literal>
+        </term>
+        <listitem>
+          <para>
+            Undo the effect of <literal>block</literal>.
+          </para>
+        </listitem>
+      </varlistentry>
+      <varlistentry>
+        <term>
+          <literal>screenshot</literal>
+        </term>
+        <listitem>
+          <para>
+            Take a picture of the display of the virtual machine, in PNG
+            format. The screenshot is linked from the HTML log.
+          </para>
+        </listitem>
+      </varlistentry>
+      <varlistentry>
+        <term>
+          <literal>get_screen_text_variants</literal>
+        </term>
+        <listitem>
+          <para>
+            Return a list of different interpretations of what is
+            currently visible on the machine's screen using optical
+            character recognition. The number and order of the
+            interpretations is not specified and is subject to change,
+            but if no exception is raised at least one will be returned.
+          </para>
+          <note>
+            <para>
+              This requires passing <literal>enableOCR</literal> to the
+              test attribute set.
+            </para>
+          </note>
+        </listitem>
+      </varlistentry>
+      <varlistentry>
+        <term>
+          <literal>get_screen_text</literal>
+        </term>
+        <listitem>
+          <para>
+            Return a textual representation of what is currently visible
+            on the machine's screen using optical character recognition.
+          </para>
+          <note>
+            <para>
+              This requires passing <literal>enableOCR</literal> to the
+              test attribute set.
+            </para>
+          </note>
+        </listitem>
+      </varlistentry>
+      <varlistentry>
+        <term>
+          <literal>send_monitor_command</literal>
+        </term>
+        <listitem>
+          <para>
+            Send a command to the QEMU monitor. This is rarely used, but
+            allows doing stuff such as attaching virtual USB disks to a
+            running machine.
+          </para>
+        </listitem>
+      </varlistentry>
+      <varlistentry>
+        <term>
+          <literal>send_key</literal>
+        </term>
+        <listitem>
+          <para>
+            Simulate pressing keys on the virtual keyboard, e.g.,
+            <literal>send_key(&quot;ctrl-alt-delete&quot;)</literal>.
+          </para>
+        </listitem>
+      </varlistentry>
+      <varlistentry>
+        <term>
+          <literal>send_chars</literal>
+        </term>
+        <listitem>
+          <para>
+            Simulate typing a sequence of characters on the virtual
+            keyboard, e.g.,
+            <literal>send_chars(&quot;foobar\n&quot;)</literal> will
+            type the string <literal>foobar</literal> followed by the
+            Enter key.
+          </para>
+        </listitem>
+      </varlistentry>
+      <varlistentry>
+        <term>
+          <literal>execute</literal>
+        </term>
+        <listitem>
+          <para>
+            Execute a shell command, returning a list
+            <literal>(status, stdout)</literal>. If the command
+            detaches, it must close stdout, as
+            <literal>execute</literal> will wait for this to consume all
+            output reliably. This can be achieved by redirecting stdout
+            to stderr <literal>&gt;&amp;2</literal>, to
+            <literal>/dev/console</literal>,
+            <literal>/dev/null</literal> or a file. Examples of
+            detaching commands are <literal>sleep 365d &amp;</literal>,
+            where the shell forks a new process that can write to stdout
+            and <literal>xclip -i</literal>, where the
+            <literal>xclip</literal> command itself forks without
+            closing stdout. Takes an optional parameter
+            <literal>check_return</literal> that defaults to
+            <literal>True</literal>. Setting this parameter to
+            <literal>False</literal> will not check for the return code
+            and return -1 instead. This can be used for commands that
+            shut down the VM and would therefore break the pipe that
+            would be used for retrieving the return code.
+          </para>
+        </listitem>
+      </varlistentry>
+      <varlistentry>
+        <term>
+          <literal>succeed</literal>
+        </term>
+        <listitem>
+          <para>
+            Execute a shell command, raising an exception if the exit
+            status is not zero, otherwise returning the standard output.
+            Commands are run with <literal>set -euo pipefail</literal>
+            set:
+          </para>
+          <itemizedlist>
+            <listitem>
+              <para>
+                If several commands are separated by
+                <literal>;</literal> and one fails, the command as a
+                whole will fail.
+              </para>
+            </listitem>
+            <listitem>
+              <para>
+                For pipelines, the last non-zero exit status will be
+                returned (if there is one, zero will be returned
+                otherwise).
+              </para>
+            </listitem>
+            <listitem>
+              <para>
+                Dereferencing unset variables fail the command.
+              </para>
+            </listitem>
+            <listitem>
+              <para>
+                It will wait for stdout to be closed. See
+                <literal>execute</literal> for the implications.
+              </para>
+            </listitem>
+          </itemizedlist>
+        </listitem>
+      </varlistentry>
+      <varlistentry>
+        <term>
+          <literal>fail</literal>
+        </term>
+        <listitem>
+          <para>
+            Like <literal>succeed</literal>, but raising an exception if
+            the command returns a zero status.
+          </para>
+        </listitem>
+      </varlistentry>
+      <varlistentry>
+        <term>
+          <literal>wait_until_succeeds</literal>
+        </term>
+        <listitem>
+          <para>
+            Repeat a shell command with 1-second intervals until it
+            succeeds.
+          </para>
+        </listitem>
+      </varlistentry>
+      <varlistentry>
+        <term>
+          <literal>wait_until_fails</literal>
+        </term>
+        <listitem>
+          <para>
+            Repeat a shell command with 1-second intervals until it
+            fails.
+          </para>
+        </listitem>
+      </varlistentry>
+      <varlistentry>
+        <term>
+          <literal>wait_for_unit</literal>
+        </term>
+        <listitem>
+          <para>
+            Wait until the specified systemd unit has reached the
+            <quote>active</quote> state.
+          </para>
+        </listitem>
+      </varlistentry>
+      <varlistentry>
+        <term>
+          <literal>wait_for_file</literal>
+        </term>
+        <listitem>
+          <para>
+            Wait until the specified file exists.
+          </para>
+        </listitem>
+      </varlistentry>
+      <varlistentry>
+        <term>
+          <literal>wait_for_open_port</literal>
+        </term>
+        <listitem>
+          <para>
+            Wait until a process is listening on the given TCP port (on
+            <literal>localhost</literal>, at least).
+          </para>
+        </listitem>
+      </varlistentry>
+      <varlistentry>
+        <term>
+          <literal>wait_for_closed_port</literal>
+        </term>
+        <listitem>
+          <para>
+            Wait until nobody is listening on the given TCP port.
+          </para>
+        </listitem>
+      </varlistentry>
+      <varlistentry>
+        <term>
+          <literal>wait_for_x</literal>
+        </term>
+        <listitem>
+          <para>
+            Wait until the X11 server is accepting connections.
+          </para>
+        </listitem>
+      </varlistentry>
+      <varlistentry>
+        <term>
+          <literal>wait_for_text</literal>
+        </term>
+        <listitem>
+          <para>
+            Wait until the supplied regular expressions matches the
+            textual contents of the screen by using optical character
+            recognition (see <literal>get_screen_text</literal> and
+            <literal>get_screen_text_variants</literal>).
+          </para>
+          <note>
+            <para>
+              This requires passing <literal>enableOCR</literal> to the
+              test attribute set.
+            </para>
+          </note>
+        </listitem>
+      </varlistentry>
+      <varlistentry>
+        <term>
+          <literal>wait_for_console_text</literal>
+        </term>
+        <listitem>
+          <para>
+            Wait until the supplied regular expressions match a line of
+            the serial console output. This method is useful when OCR is
+            not possibile or accurate enough.
+          </para>
+        </listitem>
+      </varlistentry>
+      <varlistentry>
+        <term>
+          <literal>wait_for_window</literal>
+        </term>
+        <listitem>
+          <para>
+            Wait until an X11 window has appeared whose name matches the
+            given regular expression, e.g.,
+            <literal>wait_for_window(&quot;Terminal&quot;)</literal>.
+          </para>
+        </listitem>
+      </varlistentry>
+      <varlistentry>
+        <term>
+          <literal>copy_from_host</literal>
+        </term>
+        <listitem>
+          <para>
+            Copies a file from host to machine, e.g.,
+            <literal>copy_from_host(&quot;myfile&quot;, &quot;/etc/my/important/file&quot;)</literal>.
+          </para>
+          <para>
+            The first argument is the file on the host. The file needs
+            to be accessible while building the nix derivation. The
+            second argument is the location of the file on the machine.
+          </para>
+        </listitem>
+      </varlistentry>
+      <varlistentry>
+        <term>
+          <literal>systemctl</literal>
+        </term>
+        <listitem>
+          <para>
+            Runs <literal>systemctl</literal> commands with optional
+            support for <literal>systemctl --user</literal>
+          </para>
+          <programlisting language="python">
 machine.systemctl(&quot;list-jobs --no-pager&quot;) # runs `systemctl list-jobs --no-pager`
 machine.systemctl(&quot;list-jobs --no-pager&quot;, &quot;any-user&quot;) # spawns a shell for `any-user` and runs `systemctl --user list-jobs --no-pager`
 </programlisting>
-      </listitem>
-    </varlistentry>
-    <varlistentry>
-      <term>
-        <literal>shell_interact</literal>
-      </term>
-      <listitem>
-        <para>
-          Allows you to directly interact with the guest shell. This
-          should only be used during test development, not in production
-          tests. Killing the interactive session with
-          <literal>Ctrl-d</literal> or <literal>Ctrl-c</literal> also
-          ends the guest session.
-        </para>
-      </listitem>
-    </varlistentry>
-  </variablelist>
-  <para>
-    To test user units declared by
-    <literal>systemd.user.services</literal> the optional
-    <literal>user</literal> argument can be used:
-  </para>
-  <programlisting language="python">
+        </listitem>
+      </varlistentry>
+      <varlistentry>
+        <term>
+          <literal>shell_interact</literal>
+        </term>
+        <listitem>
+          <para>
+            Allows you to directly interact with the guest shell. This
+            should only be used during test development, not in
+            production tests. Killing the interactive session with
+            <literal>Ctrl-d</literal> or <literal>Ctrl-c</literal> also
+            ends the guest session.
+          </para>
+        </listitem>
+      </varlistentry>
+    </variablelist>
+    <para>
+      To test user units declared by
+      <literal>systemd.user.services</literal> the optional
+      <literal>user</literal> argument can be used:
+    </para>
+    <programlisting language="python">
 machine.start()
 machine.wait_for_x()
 machine.wait_for_unit(&quot;xautolock.service&quot;, &quot;x-session-user&quot;)
 </programlisting>
-  <para>
-    This applies to <literal>systemctl</literal>,
-    <literal>get_unit_info</literal>, <literal>wait_for_unit</literal>,
-    <literal>start_job</literal> and <literal>stop_job</literal>.
-  </para>
-  <para>
-    For faster dev cycles it's also possible to disable the code-linters
-    (this shouldn't be commited though):
-  </para>
-  <programlisting language="bash">
+    <para>
+      This applies to <literal>systemctl</literal>,
+      <literal>get_unit_info</literal>,
+      <literal>wait_for_unit</literal>, <literal>start_job</literal> and
+      <literal>stop_job</literal>.
+    </para>
+    <para>
+      For faster dev cycles it's also possible to disable the
+      code-linters (this shouldn't be commited though):
+    </para>
+    <programlisting language="bash">
 import ./make-test-python.nix {
   skipLint = true;
   machine =
@@ -531,13 +537,13 @@ import ./make-test-python.nix {
     '';
 }
 </programlisting>
-  <para>
-    This will produce a Nix warning at evaluation time. To fully disable
-    the linter, wrap the test script in comment directives to disable
-    the Black linter directly (again, don't commit this within the
-    Nixpkgs repository):
-  </para>
-  <programlisting language="bash">
+    <para>
+      This will produce a Nix warning at evaluation time. To fully
+      disable the linter, wrap the test script in comment directives to
+      disable the Black linter directly (again, don't commit this within
+      the Nixpkgs repository):
+    </para>
+    <programlisting language="bash">
   testScript =
     ''
       # fmt: off
@@ -545,4 +551,66 @@ import ./make-test-python.nix {
       # fmt: on
     '';
 </programlisting>
+  </section>
+  <section xml:id="ssec-failing-tests-early">
+    <title>Failing tests early</title>
+    <para>
+      To fail tests early when certain invariables are no longer met
+      (instead of waiting for the build to time out), the decorator
+      <literal>polling_condition</literal> is provided. For example, if
+      we are testing a program <literal>foo</literal> that should not
+      quit after being started, we might write the following:
+    </para>
+    <programlisting language="python">
+@polling_condition
+def foo_running():
+    machine.succeed(&quot;pgrep -x foo&quot;)
+
+
+machine.succeed(&quot;foo --start&quot;)
+machine.wait_until_succeeds(&quot;pgrep -x foo&quot;)
+
+with foo_running:
+    ...  # Put `foo` through its paces
+</programlisting>
+    <para>
+      <literal>polling_condition</literal> takes the following
+      (optional) arguments:
+    </para>
+    <para>
+      <literal>seconds_interval</literal>
+    </para>
+    <para>
+      : specifies how often the condition should be polled:
+    </para>
+    <programlisting>
+```py
+@polling_condition(seconds_interval=10)
+def foo_running():
+    machine.succeed(&quot;pgrep -x foo&quot;)
+```
+</programlisting>
+    <para>
+      <literal>description</literal>
+    </para>
+    <para>
+      : is used in the log when the condition is checked. If this is not
+      provided, the description is pulled from the docstring of the
+      function. These two are therefore equivalent:
+    </para>
+    <programlisting>
+```py
+@polling_condition
+def foo_running():
+    &quot;check that foo is running&quot;
+    machine.succeed(&quot;pgrep -x foo&quot;)
+```
+
+```py
+@polling_condition(description=&quot;check that foo is running&quot;)
+def foo_running():
+    machine.succeed(&quot;pgrep -x foo&quot;)
+```
+</programlisting>
+  </section>
 </section>
diff --git a/nixos/lib/test-driver/test_driver/driver.py b/nixos/lib/test-driver/test_driver/driver.py
index f3af98537ad6..e22f9ee7a757 100644
--- a/nixos/lib/test-driver/test_driver/driver.py
+++ b/nixos/lib/test-driver/test_driver/driver.py
@@ -1,12 +1,13 @@
 from contextlib import contextmanager
 from pathlib import Path
-from typing import Any, Dict, Iterator, List
+from typing import Any, Dict, Iterator, List, Union, Optional, Callable, ContextManager
 import os
 import tempfile
 
 from test_driver.logger import rootlog
 from test_driver.machine import Machine, NixStartScript, retry
 from test_driver.vlan import VLan
+from test_driver.polling_condition import PollingCondition
 
 
 class Driver:
@@ -16,6 +17,7 @@ class Driver:
     tests: str
     vlans: List[VLan]
     machines: List[Machine]
+    polling_conditions: List[Callable]
 
     def __init__(
         self,
@@ -36,12 +38,15 @@ class Driver:
             for s in scripts:
                 yield NixStartScript(s)
 
+        self.polling_conditions = []
+
         self.machines = [
             Machine(
                 start_command=cmd,
                 keep_vm_state=keep_vm_state,
                 name=cmd.machine_name,
                 tmp_dir=tmp_dir,
+                fail_early=self.fail_early,
             )
             for cmd in cmd(start_scripts)
         ]
@@ -84,6 +89,7 @@ class Driver:
             retry=retry,
             serial_stdout_off=self.serial_stdout_off,
             serial_stdout_on=self.serial_stdout_on,
+            polling_condition=self.polling_condition,
             Machine=Machine,  # for typing
         )
         machine_symbols = {m.name: m for m in self.machines}
@@ -159,3 +165,35 @@ class Driver:
 
     def serial_stdout_off(self) -> None:
         rootlog._print_serial_logs = False
+
+    def fail_early(self) -> bool:
+        return any(not f() for f in self.polling_conditions)
+
+    def polling_condition(
+        self,
+        fun_: Optional[Callable] = None,
+        *,
+        seconds_interval: float = 2.0,
+        description: Optional[str] = None,
+    ) -> Union[Callable[[Callable], ContextManager], ContextManager]:
+        driver = self
+
+        class Poll:
+            def __init__(self, fun: Callable):
+                self.condition = PollingCondition(
+                    fun,
+                    seconds_interval,
+                    description,
+                ).check
+
+            def __enter__(self) -> None:
+                driver.polling_conditions.append(self.condition)
+
+            def __exit__(self, a, b, c) -> None:  # type: ignore
+                res = driver.polling_conditions.pop()
+                assert res is self.condition
+
+        if fun_ is None:
+            return Poll
+        else:
+            return Poll(fun_)
diff --git a/nixos/lib/test-driver/test_driver/machine.py b/nixos/lib/test-driver/test_driver/machine.py
index b3dbe5126fcc..dbf9fd244861 100644
--- a/nixos/lib/test-driver/test_driver/machine.py
+++ b/nixos/lib/test-driver/test_driver/machine.py
@@ -17,6 +17,7 @@ import threading
 import time
 
 from test_driver.logger import rootlog
+from test_driver.polling_condition import PollingCondition, coopmulti
 
 CHAR_TO_KEY = {
     "A": "shift-a",
@@ -318,6 +319,7 @@ class Machine:
     # Store last serial console lines for use
     # of wait_for_console_text
     last_lines: Queue = Queue()
+    fail_early: Callable
 
     def __repr__(self) -> str:
         return f"<Machine '{self.name}'>"
@@ -329,12 +331,14 @@ class Machine:
         name: str = "machine",
         keep_vm_state: bool = False,
         allow_reboot: bool = False,
+        fail_early: Callable = lambda: False,
     ) -> None:
         self.tmp_dir = tmp_dir
         self.keep_vm_state = keep_vm_state
         self.allow_reboot = allow_reboot
         self.name = name
         self.start_command = start_command
+        self.fail_early = fail_early
 
         # set up directories
         self.shared_dir = self.tmp_dir / "shared-xchg"
@@ -405,6 +409,7 @@ class Machine:
                     break
             return answer
 
+    @coopmulti
     def send_monitor_command(self, command: str) -> str:
         with self.nested("sending monitor command: {}".format(command)):
             message = ("{}\n".format(command)).encode()
@@ -506,6 +511,7 @@ class Machine:
                 break
         return "".join(output_buffer)
 
+    @coopmulti
     def execute(
         self, command: str, check_return: bool = True, timeout: Optional[int] = 900
     ) -> Tuple[int, str]:
diff --git a/nixos/lib/test-driver/test_driver/polling_condition.py b/nixos/lib/test-driver/test_driver/polling_condition.py
new file mode 100644
index 000000000000..f38dea71376e
--- /dev/null
+++ b/nixos/lib/test-driver/test_driver/polling_condition.py
@@ -0,0 +1,90 @@
+from typing import Callable, Optional, Any, List, Dict
+from functools import wraps
+
+import time
+
+from .logger import rootlog
+
+
+class PollingConditionFailed(Exception):
+    pass
+
+
+def coopmulti(fun: Callable, *, machine: Any = None) -> Callable:
+    assert not (fun is None and machine is None)
+
+    def inner(fun_: Callable) -> Any:
+        @wraps(fun_)
+        def wrapper(*args: List[Any], **kwargs: Dict[str, Any]) -> Any:
+            this_machine = args[0] if machine is None else machine
+
+            if this_machine.fail_early():  # type: ignore
+                raise PollingConditionFailed("Action interrupted early...")
+
+            return fun_(*args, **kwargs)
+
+        return wrapper
+
+    if fun is None:
+        return inner
+    else:
+        return inner(fun)
+
+
+class PollingCondition:
+    condition: Callable[[], bool]
+    seconds_interval: float
+    description: Optional[str]
+
+    last_called: float
+    entered: bool
+
+    def __init__(
+        self,
+        condition: Callable[[], Optional[bool]],
+        seconds_interval: float = 2.0,
+        description: Optional[str] = None,
+    ):
+        self.condition = condition  # type: ignore
+        self.seconds_interval = seconds_interval
+
+        if description is None:
+            self.description = condition.__doc__
+        else:
+            self.description = str(description)
+
+        self.last_called = float("-inf")
+        self.entered = False
+
+    def check(self) -> bool:
+        if self.entered or not self.overdue:
+            return True
+
+        with self, rootlog.nested(self.nested_message):
+            rootlog.info(f"Time since last: {time.monotonic() - self.last_called:.2f}s")
+            try:
+                res = self.condition()  # type: ignore
+            except Exception:
+                res = False
+            res = res is None or res
+            rootlog.info(f"Polling condition {'succeeded' if res else 'failed'}")
+            return res
+
+    @property
+    def nested_message(self) -> str:
+        nested_message = ["Checking polling condition"]
+        if self.description is not None:
+            nested_message.append(repr(self.description))
+
+        return " ".join(nested_message)
+
+    @property
+    def overdue(self) -> bool:
+        return self.last_called + self.seconds_interval < time.monotonic()
+
+    def __enter__(self) -> None:
+        self.entered = True
+
+    def __exit__(self, exc_type, exc_value, traceback) -> None:  # type: ignore
+        self.entered = False
+        self.last_called = time.monotonic()
diff --git a/nixos/tests/vscodium.nix b/nixos/tests/vscodium.nix
index 43a0d61c856f..66baea73ec62 100644
--- a/nixos/tests/vscodium.nix
+++ b/nixos/tests/vscodium.nix
@@ -34,36 +34,46 @@ let
       };
       enableOCR = true;
       testScript = ''
+        @polling_condition
+        def codium_running():
+            machine.succeed('pgrep -x codium')
+
+
         start_all()
 
         machine.wait_for_unit('graphical.target')
         machine.wait_until_succeeds('pgrep -x codium')
 
-        # Wait until vscodium is visible. "File" is in the menu bar.
-        machine.wait_for_text('File')
-        machine.screenshot('start_screen')
+        with codium_running:
+            # Wait until vscodium is visible. "File" is in the menu bar.
+            machine.wait_for_text('Get Started')
+            machine.screenshot('start_screen')
 
-        test_string = 'testfile'
+            test_string = 'testfile'
 
-        # Create a new file
-        machine.send_key('ctrl-n')
-        machine.wait_for_text('Untitled')
-        machine.screenshot('empty_editor')
+            # Create a new file
+            machine.send_key('ctrl-n')
+            machine.wait_for_text('Untitled')
+            machine.screenshot('empty_editor')
 
-        # Type a string
-        machine.send_chars(test_string)
-        machine.wait_for_text(test_string)
-        machine.screenshot('editor')
+            # Type a string
+            machine.send_chars(test_string)
+            machine.wait_for_text(test_string)
+            machine.screenshot('editor')
 
-        # Save the file
-        machine.send_key('ctrl-s')
-        machine.wait_for_text('Save')
-        machine.screenshot('save_window')
-        machine.send_key('ret')
+            # Save the file
+            machine.send_key('ctrl-s')
+            machine.wait_for_text('Save')
+            machine.screenshot('save_window')
+            machine.send_key('ret')
 
-        # (the default filename is the first line of the file)
-        machine.wait_for_file(f'/home/alice/{test_string}')
+            # (the default filename is the first line of the file)
+            machine.wait_for_file(f'/home/alice/{test_string}')
+
+        machine.send_key('ctrl-q')
+        machine.wait_until_fails('pgrep -x codium')
       '';
     });
 
-in builtins.mapAttrs (k: v: mkTest k v { }) tests
+in
+builtins.mapAttrs (k: v: mkTest k v { }) tests