Merge pull request #259065 from tweag/fileset.difference

`lib.fileset.difference`: init
This commit is contained in:
Silvan Mosberger 2023-11-01 19:40:45 +01:00 committed by GitHub
commit fc28c5e5b7
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
3 changed files with 231 additions and 0 deletions

View file

@ -9,6 +9,7 @@ let
_fileFilter
_printFileset
_intersection
_difference
;
inherit (builtins)
@ -368,6 +369,58 @@ If a directory does not recursively contain any file, it is omitted from the sto
(elemAt filesets 0)
(elemAt filesets 1);
/*
The file set containing all files from the first file set that are not in the second file set.
See also [Difference (set theory)](https://en.wikipedia.org/wiki/Complement_(set_theory)#Relative_complement).
The given file sets are evaluated as lazily as possible,
with the first argument being evaluated first if needed.
Type:
union :: FileSet -> FileSet -> FileSet
Example:
# Create a file set containing all files from the current directory,
# except ones under ./tests
difference ./. ./tests
let
# A set of Nix-related files
nixFiles = unions [ ./default.nix ./nix ./tests/default.nix ];
in
# Create a file set containing all files under ./tests, except ones in `nixFiles`,
# meaning only without ./tests/default.nix
difference ./tests nixFiles
*/
difference =
# The positive file set.
# The result can only contain files that are also in this file set.
#
# This argument can also be a path,
# which gets [implicitly coerced to a file set](#sec-fileset-path-coercion).
positive:
# The negative file set.
# The result will never contain files that are also in this file set.
#
# This argument can also be a path,
# which gets [implicitly coerced to a file set](#sec-fileset-path-coercion).
negative:
let
filesets = _coerceMany "lib.fileset.difference" [
{
context = "first argument (positive set)";
value = positive;
}
{
context = "second argument (negative set)";
value = negative;
}
];
in
_difference
(elemAt filesets 0)
(elemAt filesets 1);
/*
Incrementally evaluate and trace a file set in a pretty way.
This function is only intended for debugging purposes.

View file

@ -651,6 +651,86 @@ rec {
# In all other cases it's the rhs
rhs;
# Compute the set difference between two file sets.
# The filesets must already be coerced and validated to be in the same filesystem root.
# Type: Fileset -> Fileset -> Fileset
_difference = positive: negative:
let
# The common base components prefix, e.g.
# (/foo/bar, /foo/bar/baz) -> /foo/bar
# (/foo/bar, /foo/baz) -> /foo
commonBaseComponentsLength =
# TODO: Have a `lib.lists.commonPrefixLength` function such that we don't need the list allocation from commonPrefix here
length (
commonPrefix
positive._internalBaseComponents
negative._internalBaseComponents
);
# We need filesetTree's with the same base to be able to compute the difference between them
# This here is the filesetTree from the negative file set, but for a base path that matches the positive file set.
# Examples:
# For `difference /foo /foo/bar`, `negativeTreeWithPositiveBase = { bar = "directory"; }`
# because under the base path of `/foo`, only `bar` from the negative file set is included
# For `difference /foo/bar /foo`, `negativeTreeWithPositiveBase = "directory"`
# because under the base path of `/foo/bar`, everything from the negative file set is included
# For `difference /foo /bar`, `negativeTreeWithPositiveBase = null`
# because under the base path of `/foo`, nothing from the negative file set is included
negativeTreeWithPositiveBase =
if commonBaseComponentsLength == length positive._internalBaseComponents then
# The common prefix is the same as the positive base path, so the second path is equal or longer.
# We need to _shorten_ the negative filesetTree to the same base path as the positive one
# E.g. for `difference /foo /foo/bar` the common prefix is /foo, equal to the positive file set's base
# So we need to shorten the base of the tree for the negative argument from /foo/bar to just /foo
_shortenTreeBase positive._internalBaseComponents negative
else if commonBaseComponentsLength == length negative._internalBaseComponents then
# The common prefix is the same as the negative base path, so the first path is longer.
# We need to lengthen the negative filesetTree to the same base path as the positive one.
# E.g. for `difference /foo/bar /foo` the common prefix is /foo, equal to the negative file set's base
# So we need to lengthen the base of the tree for the negative argument from /foo to /foo/bar
_lengthenTreeBase positive._internalBaseComponents negative
else
# The common prefix is neither the first nor the second path.
# This means there's no overlap between the two file sets,
# and nothing from the negative argument should get removed from the positive one
# E.g for `difference /foo /bar`, we remove nothing to get the same as `/foo`
null;
resultingTree =
_differenceTree
positive._internalBase
positive._internalTree
negativeTreeWithPositiveBase;
in
# If the first file set is empty, we can never have any files in the result
if positive._internalIsEmptyWithoutBase then
_emptyWithoutBase
# If the second file set is empty, nothing gets removed, so the result is just the first file set
else if negative._internalIsEmptyWithoutBase then
positive
else
# We use the positive file set base for the result,
# because only files from the positive side may be included,
# which is what base path is for
_create positive._internalBase resultingTree;
# Computes the set difference of two filesetTree's
# Type: Path -> filesetTree -> filesetTree
_differenceTree = path: lhs: rhs:
# If the lhs doesn't have any files, or the right hand side includes all files
if lhs == null || isString rhs then
# The result will always be empty
null
# If the right hand side has no files
else if rhs == null then
# The result is always the left hand side, because nothing gets removed
lhs
else
# Otherwise we always have two attribute sets to recurse into
mapAttrs (name: lhsValue:
_differenceTree (path + "/${name}") lhsValue (rhs.${name} or null)
) (_directoryEntries path lhs);
_fileFilter = predicate: fileset:
let
recurse = path: tree:

View file

@ -684,6 +684,104 @@ tree=(
)
checkFileset 'intersection (unions [ ./a/b ./c/d ./c/e ]) (unions [ ./a ./c/d/f ./c/e ])'
## Difference
# Subtracting something from itself results in nothing
tree=(
[a]=0
)
checkFileset 'difference ./. ./.'
# The tree of the second argument should not be evaluated if not needed
checkFileset 'difference _emptyWithoutBase (_create ./. (abort "This should not be used!"))'
checkFileset 'difference (_create ./. null) (_create ./. (abort "This should not be used!"))'
# Subtracting nothing gives the same thing back
tree=(
[a]=1
)
checkFileset 'difference ./. _emptyWithoutBase'
checkFileset 'difference ./. (_create ./. null)'
# Subtracting doesn't influence the base path
mkdir a b
touch {a,b}/x
expectEqual 'toSource { root = ./a; fileset = difference ./a ./b; }' 'toSource { root = ./a; fileset = ./a; }'
rm -rf -- *
# Also not the other way around
mkdir a
expectFailure 'toSource { root = ./a; fileset = difference ./. ./a; }' 'lib.fileset.toSource: `fileset` could contain files in '"$work"', which is not under the `root` \('"$work"'/a\). Potential solutions:
\s*- Set `root` to '"$work"' or any directory higher up. This changes the layout of the resulting store path.
\s*- Set `fileset` to a file set that cannot contain files outside the `root` \('"$work"'/a\). This could change the files included in the result.'
rm -rf -- *
# Difference actually works
# We test all combinations of ./., ./a, ./a/x and ./b
tree=(
[a/x]=0
[a/y]=0
[b]=0
[c]=0
)
checkFileset 'difference ./. ./.'
checkFileset 'difference ./a ./.'
checkFileset 'difference ./a/x ./.'
checkFileset 'difference ./b ./.'
checkFileset 'difference ./a ./a'
checkFileset 'difference ./a/x ./a'
checkFileset 'difference ./a/x ./a/x'
checkFileset 'difference ./b ./b'
tree=(
[a/x]=0
[a/y]=0
[b]=1
[c]=1
)
checkFileset 'difference ./. ./a'
tree=(
[a/x]=1
[a/y]=1
[b]=0
[c]=0
)
checkFileset 'difference ./a ./b'
tree=(
[a/x]=1
[a/y]=0
[b]=0
[c]=0
)
checkFileset 'difference ./a/x ./b'
tree=(
[a/x]=0
[a/y]=1
[b]=0
[c]=0
)
checkFileset 'difference ./a ./a/x'
tree=(
[a/x]=0
[a/y]=0
[b]=1
[c]=0
)
checkFileset 'difference ./b ./a'
checkFileset 'difference ./b ./a/x'
tree=(
[a/x]=0
[a/y]=1
[b]=1
[c]=1
)
checkFileset 'difference ./. ./a/x'
tree=(
[a/x]=1
[a/y]=1
[b]=0
[c]=1
)
checkFileset 'difference ./. ./b'
## File filter