Merge master into staging-next

This commit is contained in:
github-actions[bot] 2021-09-18 12:01:25 +00:00 committed by GitHub
commit 7da057ad4e
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
18 changed files with 348 additions and 69 deletions

View file

@ -37,6 +37,13 @@
PostgreSQL now defaults to major version 13.
</para>
</listitem>
<listitem>
<para>
spark now defaults to spark 3, updated from 2. A
<link xlink:href="https://spark.apache.org/docs/latest/core-migration-guide.html#upgrading-from-core-24-to-30">migration
guide</link> is available.
</para>
</listitem>
<listitem>
<para>
Activation scripts can now opt int to be run when running
@ -260,6 +267,12 @@
entry</link>.
</para>
</listitem>
<listitem>
<para>
<link xlink:href="https://spark.apache.org/">spark</link>, a
unified analytics engine for large-scale data processing.
</para>
</listitem>
</itemizedlist>
</section>
<section xml:id="sec-release-21.11-incompatibilities">

View file

@ -14,6 +14,8 @@ In addition to numerous new and upgraded packages, this release has the followin
- PostgreSQL now defaults to major version 13.
- spark now defaults to spark 3, updated from 2. A [migration guide](https://spark.apache.org/docs/latest/core-migration-guide.html#upgrading-from-core-24-to-30) is available.
- Activation scripts can now opt int to be run when running `nixos-rebuild dry-activate` and detect the dry activation by reading `$NIXOS_ACTION`.
This allows activation scripts to output what they would change if the activation was really run.
The users/modules activation script supports this and outputs some of is actions.
@ -82,6 +84,8 @@ In addition to numerous new and upgraded packages, this release has the followin
or sends them to a downstream service for further analysis.
Documented in [its manual entry](#module-services-parsedmarc).
- [spark](https://spark.apache.org/), a unified analytics engine for large-scale data processing.
## Backward Incompatibilities {#sec-release-21.11-incompatibilities}
- The `paperless` module and package have been removed. All users should migrate to the

View file

@ -297,6 +297,7 @@
./services/cluster/kubernetes/pki.nix
./services/cluster/kubernetes/proxy.nix
./services/cluster/kubernetes/scheduler.nix
./services/cluster/spark/default.nix
./services/computing/boinc/client.nix
./services/computing/foldingathome/client.nix
./services/computing/slurm/slurm.nix

View file

@ -0,0 +1,162 @@
{config, pkgs, lib, ...}:
let
cfg = config.services.spark;
in
with lib;
{
options = {
services.spark = {
master = {
enable = mkEnableOption "Spark master service";
bind = mkOption {
type = types.str;
description = "Address the spark master binds to.";
default = "127.0.0.1";
example = "0.0.0.0";
};
restartIfChanged = mkOption {
type = types.bool;
description = ''
Automatically restart master service on config change.
This can be set to false to defer restarts on clusters running critical applications.
Please consider the security implications of inadvertently running an older version,
and the possibility of unexpected behavior caused by inconsistent versions across a cluster when disabling this option.
'';
default = true;
};
extraEnvironment = mkOption {
type = types.attrsOf types.str;
description = "Extra environment variables to pass to spark master. See spark-standalone documentation.";
default = {};
example = {
SPARK_MASTER_WEBUI_PORT = 8181;
SPARK_MASTER_OPTS = "-Dspark.deploy.defaultCores=5";
};
};
};
worker = {
enable = mkEnableOption "Spark worker service";
workDir = mkOption {
type = types.path;
description = "Spark worker work dir.";
default = "/var/lib/spark";
};
master = mkOption {
type = types.str;
description = "Address of the spark master.";
default = "127.0.0.1:7077";
};
restartIfChanged = mkOption {
type = types.bool;
description = ''
Automatically restart worker service on config change.
This can be set to false to defer restarts on clusters running critical applications.
Please consider the security implications of inadvertently running an older version,
and the possibility of unexpected behavior caused by inconsistent versions across a cluster when disabling this option.
'';
default = true;
};
extraEnvironment = mkOption {
type = types.attrsOf types.str;
description = "Extra environment variables to pass to spark worker.";
default = {};
example = {
SPARK_WORKER_CORES = 5;
SPARK_WORKER_MEMORY = "2g";
};
};
};
confDir = mkOption {
type = types.path;
description = "Spark configuration directory. Spark will use the configuration files (spark-defaults.conf, spark-env.sh, log4j.properties, etc) from this directory.";
default = "${cfg.package}/lib/${cfg.package.untarDir}/conf";
defaultText = literalExample "\${cfg.package}/lib/\${cfg.package.untarDir}/conf";
};
logDir = mkOption {
type = types.path;
description = "Spark log directory.";
default = "/var/log/spark";
};
package = mkOption {
type = types.package;
description = "Spark package.";
default = pkgs.spark;
defaultText = "pkgs.spark";
example = literalExample ''pkgs.spark.overrideAttrs (super: rec {
pname = "spark";
version = "2.4.4";
src = pkgs.fetchzip {
url = "mirror://apache/spark/"''${pname}-''${version}/''${pname}-''${version}-bin-without-hadoop.tgz";
sha256 = "1a9w5k0207fysgpxx6db3a00fs5hdc2ncx99x4ccy2s0v5ndc66g";
};
})'';
};
};
};
config = lib.mkIf (cfg.worker.enable || cfg.master.enable) {
environment.systemPackages = [ cfg.package ];
systemd = {
services = {
spark-master = lib.mkIf cfg.master.enable {
path = with pkgs; [ procps openssh nettools ];
description = "spark master service.";
after = [ "network.target" ];
wantedBy = [ "multi-user.target" ];
restartIfChanged = cfg.master.restartIfChanged;
environment = cfg.master.extraEnvironment // {
SPARK_MASTER_HOST = cfg.master.bind;
SPARK_CONF_DIR = cfg.confDir;
SPARK_LOG_DIR = cfg.logDir;
};
serviceConfig = {
Type = "forking";
User = "spark";
Group = "spark";
WorkingDirectory = "${cfg.package}/lib/${cfg.package.untarDir}";
ExecStart = "${cfg.package}/lib/${cfg.package.untarDir}/sbin/start-master.sh";
ExecStop = "${cfg.package}/lib/${cfg.package.untarDir}/sbin/stop-master.sh";
TimeoutSec = 300;
StartLimitBurst=10;
Restart = "always";
};
};
spark-worker = lib.mkIf cfg.worker.enable {
path = with pkgs; [ procps openssh nettools rsync ];
description = "spark master service.";
after = [ "network.target" ];
wantedBy = [ "multi-user.target" ];
restartIfChanged = cfg.worker.restartIfChanged;
environment = cfg.worker.extraEnvironment // {
SPARK_MASTER = cfg.worker.master;
SPARK_CONF_DIR = cfg.confDir;
SPARK_LOG_DIR = cfg.logDir;
SPARK_WORKER_DIR = cfg.worker.workDir;
};
serviceConfig = {
Type = "forking";
User = "spark";
WorkingDirectory = "${cfg.package}/lib/${cfg.package.untarDir}";
ExecStart = "${cfg.package}/lib/${cfg.package.untarDir}/sbin/start-worker.sh spark://${cfg.worker.master}";
ExecStop = "${cfg.package}/lib/${cfg.package.untarDir}/sbin/stop-worker.sh";
TimeoutSec = 300;
StartLimitBurst=10;
Restart = "always";
};
};
};
tmpfiles.rules = [
"d '${cfg.worker.workDir}' - spark spark - -"
"d '${cfg.logDir}' - spark spark - -"
];
};
users = {
users.spark = {
description = "spark user.";
group = "spark";
isSystemUser = true;
};
groups.spark = { };
};
};
}

View file

@ -0,0 +1,28 @@
import ../make-test-python.nix ({...}: {
name = "spark";
nodes = {
worker = { nodes, pkgs, ... }: {
virtualisation.memorySize = 1024;
services.spark.worker = {
enable = true;
master = "master:7077";
};
};
master = { config, pkgs, ... }: {
services.spark.master = {
enable = true;
bind = "0.0.0.0";
};
networking.firewall.allowedTCPPorts = [ 22 7077 8080 ];
};
};
testScript = ''
master.wait_for_unit("spark-master.service")
worker.wait_for_unit("spark-worker.service")
worker.copy_from_host( "${./spark_sample.py}", "/spark_sample.py" )
assert "<title>Spark Master at spark://" in worker.succeed("curl -sSfkL http://master:8080/")
worker.succeed("spark-submit --master spark://master:7077 --executor-memory 512m --executor-cores 1 /spark_sample.py")
'';
})

View file

@ -0,0 +1,40 @@
from pyspark.sql import Row, SparkSession
from pyspark.sql import functions as F
from pyspark.sql.functions import udf
from pyspark.sql.types import *
from pyspark.sql.functions import explode
def explode_col(weight):
return int(weight//10) * [10.0] + ([] if weight%10==0 else [weight%10])
spark = SparkSession.builder.getOrCreate()
dataSchema = [
StructField("feature_1", FloatType()),
StructField("feature_2", FloatType()),
StructField("bias_weight", FloatType())
]
data = [
Row(0.1, 0.2, 10.32),
Row(0.32, 1.43, 12.8),
Row(1.28, 1.12, 0.23)
]
df = spark.createDataFrame(spark.sparkContext.parallelize(data), StructType(dataSchema))
normalizing_constant = 100
sum_bias_weight = df.select(F.sum('bias_weight')).collect()[0][0]
normalizing_factor = normalizing_constant / sum_bias_weight
df = df.withColumn('normalized_bias_weight', df.bias_weight * normalizing_factor)
df = df.drop('bias_weight')
df = df.withColumnRenamed('normalized_bias_weight', 'bias_weight')
my_udf = udf(lambda x: explode_col(x), ArrayType(FloatType()))
df1 = df.withColumn('explode_val', my_udf(df.bias_weight))
df1 = df1.withColumn("explode_val_1", explode(df1.explode_val)).drop("explode_val")
df1 = df1.drop('bias_weight').withColumnRenamed('explode_val_1', 'bias_weight')
df1.show()
assert(df1.count() == 12)

View file

@ -2,13 +2,13 @@
mkDerivation rec {
pname = "gpxsee";
version = "9.5";
version = "9.6";
src = fetchFromGitHub {
owner = "tumic0";
repo = "GPXSee";
rev = version;
sha256 = "sha256-KYw3RXdL/iiE2zFbrDzRWe8jdLYbF6gvOFAGyWgd3GM=";
sha256 = "sha256-Yj8lR8zgIV+gshea7rzLbMF84n1nyN3DytiIkr3B274=";
};
patches = (substituteAll {

View file

@ -18,9 +18,9 @@
}
},
"beta": {
"version": "94.0.4606.41",
"sha256": "0kdi8rbly002abg9jwkmv6xmfcv7g0rm68lbdsnhr0biy7wvadij",
"sha256bin64": "0ayhppzwc4ia36hpakqg9rh3cggw33c7d2zkfaias3zm14k86ga2",
"version": "94.0.4606.50",
"sha256": "1aqy9bvypx66bvn5p15g94p47yfbal8mixs1d0j82pznqnqgph1z",
"sha256bin64": "07hq4qnbgq6m43zhipgy84yhiiy1fs6ffjkgsi8ixhr9b5pipzpv",
"deps": {
"gn": {
"version": "2021-08-11",
@ -31,9 +31,9 @@
}
},
"dev": {
"version": "95.0.4636.4",
"sha256": "1rhkmja9p2a8cxnjsrs0ipzajxwgp3c8q3903rp8ns1g579c4g3j",
"sha256bin64": "0l89iqi95fa10m3jdhbvfdjf4x88gscx6imkhy2x2ax669d19606",
"version": "95.0.4638.10",
"sha256": "0pgd5k24yly9fqpzigc5qqx6lvn6m95fjp7294cgmk0132icx71j",
"sha256bin64": "1gfaal3yxmi1n2nvfp39xp82g8vykzm0fjbdk0c1wh4gvlq2xx85",
"deps": {
"gn": {
"version": "2021-08-11",

View file

@ -1,56 +1,75 @@
{ lib, stdenv, fetchzip, makeWrapper, jre, pythonPackages, coreutils, hadoop
{ lib, stdenv, fetchzip, makeWrapper, jdk8, python3Packages, extraPythonPackages ? [], coreutils, hadoop
, RSupport? true, R
}:
with lib;
stdenv.mkDerivation rec {
let
spark = { pname, version, src }:
stdenv.mkDerivation rec {
inherit pname version src;
nativeBuildInputs = [ makeWrapper ];
buildInputs = [ jdk8 python3Packages.python ]
++ extraPythonPackages
++ optional RSupport R;
pname = "spark";
version = "2.4.4";
untarDir = "${pname}-${version}";
installPhase = ''
mkdir -p $out/{lib/${untarDir}/conf,bin,/share/java}
mv * $out/lib/${untarDir}
src = fetchzip {
url = "mirror://apache/spark/${pname}-${version}/${pname}-${version}-bin-without-hadoop.tgz";
sha256 = "1a9w5k0207fysgpxx6db3a00fs5hdc2ncx99x4ccy2s0v5ndc66g";
cp $out/lib/${untarDir}/conf/log4j.properties{.template,}
cat > $out/lib/${untarDir}/conf/spark-env.sh <<- EOF
export JAVA_HOME="${jdk8}"
export SPARK_HOME="$out/lib/${untarDir}"
export SPARK_DIST_CLASSPATH=$(${hadoop}/bin/hadoop classpath)
export PYSPARK_PYTHON="${python3Packages.python}/bin/${python3Packages.python.executable}"
export PYTHONPATH="\$PYTHONPATH:$PYTHONPATH"
${optionalString RSupport ''
export SPARKR_R_SHELL="${R}/bin/R"
export PATH="\$PATH:${R}/bin"''}
EOF
for n in $(find $out/lib/${untarDir}/bin -type f ! -name "*.*"); do
makeWrapper "$n" "$out/bin/$(basename $n)"
substituteInPlace "$n" --replace dirname ${coreutils.out}/bin/dirname
done
for n in $(find $out/lib/${untarDir}/sbin -type f); do
# Spark deprecated scripts with "slave" in the name.
# This line adds forward compatibility with the nixos spark module for
# older versions of spark that don't have the new "worker" scripts.
ln -s "$n" $(echo "$n" | sed -r 's/slave(s?).sh$/worker\1.sh/g') || true
done
ln -s $out/lib/${untarDir}/lib/spark-assembly-*.jar $out/share/java
'';
meta = {
description = "Apache Spark is a fast and general engine for large-scale data processing";
homepage = "http://spark.apache.org";
license = lib.licenses.asl20;
platforms = lib.platforms.all;
maintainers = with maintainers; [ thoughtpolice offline kamilchm illustris ];
repositories.git = "git://git.apache.org/spark.git";
};
};
in {
spark3 = spark rec {
pname = "spark";
version = "3.1.2";
src = fetchzip {
url = "mirror://apache/spark/${pname}-${version}/${pname}-${version}-bin-without-hadoop.tgz";
sha256 = "1bgh2y6jm7wqy6yc40rx68xkki31i3jiri2yixb1bm0i9pvsj9yf";
};
};
spark2 = spark rec {
pname = "spark";
version = "2.4.8";
nativeBuildInputs = [ makeWrapper ];
buildInputs = [ jre pythonPackages.python pythonPackages.numpy ]
++ optional RSupport R;
untarDir = "${pname}-${version}-bin-without-hadoop";
installPhase = ''
mkdir -p $out/{lib/${untarDir}/conf,bin,/share/java}
mv * $out/lib/${untarDir}
sed -e 's/INFO, console/WARN, console/' < \
$out/lib/${untarDir}/conf/log4j.properties.template > \
$out/lib/${untarDir}/conf/log4j.properties
cat > $out/lib/${untarDir}/conf/spark-env.sh <<- EOF
export JAVA_HOME="${jre}"
export SPARK_HOME="$out/lib/${untarDir}"
export SPARK_DIST_CLASSPATH=$(${hadoop}/bin/hadoop classpath)
export PYSPARK_PYTHON="${pythonPackages.python}/bin/${pythonPackages.python.executable}"
export PYTHONPATH="\$PYTHONPATH:$PYTHONPATH"
${optionalString RSupport
''export SPARKR_R_SHELL="${R}/bin/R"
export PATH=$PATH:"${R}/bin/R"''}
EOF
for n in $(find $out/lib/${untarDir}/bin -type f ! -name "*.*"); do
makeWrapper "$n" "$out/bin/$(basename $n)"
substituteInPlace "$n" --replace dirname ${coreutils.out}/bin/dirname
done
ln -s $out/lib/${untarDir}/lib/spark-assembly-*.jar $out/share/java
'';
meta = {
description = "Apache Spark is a fast and general engine for large-scale data processing";
homepage = "http://spark.apache.org";
license = lib.licenses.asl20;
platforms = lib.platforms.all;
maintainers = with maintainers; [ thoughtpolice offline kamilchm ];
repositories.git = "git://git.apache.org/spark.git";
src = fetchzip {
url = "mirror://apache/spark/${pname}-${version}/${pname}-${version}-bin-without-hadoop.tgz";
sha256 = "1mkyq0gz9fiav25vr0dba5ivp0wh0mh7kswwnx8pvsmb6wbwyfxv";
};
};
}

View file

@ -2,13 +2,13 @@
stdenv.mkDerivation rec {
pname = "rdkafka";
version = "1.7.0";
version = "1.8.0";
src = fetchFromGitHub {
owner = "edenhill";
repo = "librdkafka";
rev = "v${version}";
sha256 = "sha256-NLlg9S3bn5rAFyRa1ETeQGhFJYb/1y2ZiDylOy7xNbY=";
sha256 = "sha256-LTO27UQqGHOEOXaw2Aln1i37ZaXIAKK8r7P2WAx8nIQ=";
};
nativeBuildInputs = [ pkg-config python3 ];

View file

@ -11,7 +11,11 @@ stdenv.mkDerivation {
createFindlibDestdir = true;
configurePhase = "ocaml setup.ml -configure --prefix $out";
configurePhase = ''
runHook preConfigure
ocaml setup.ml -configure --prefix $out
runHook postConfigure
'';
meta = {
description = "Extensible Markdown library and tool in OCaml";

View file

@ -3,13 +3,13 @@
rustPlatform.buildRustPackage rec {
pname = "trunk";
version = "0.10.0";
version = "0.13.1";
src = fetchFromGitHub {
owner = "thedodd";
repo = "trunk";
rev = "v${version}";
sha256 = "W6d05MKquG1QFkvofqWk94+6j5q8yuAjNgZFG3Z3kNo=";
sha256 = "sha256-pFF3x4vfouqO49q+MVyvYS40cH8cVn4yB61o14K6ABY=";
};
nativeBuildInputs = [ pkg-config ];
@ -17,7 +17,10 @@ rustPlatform.buildRustPackage rec {
then [ libiconv CoreServices Security ]
else [ openssl ];
cargoSha256 = "sha256-0ehz0ETNA2gOvTJUu8uq5H+bv4VXOJMq6AA8kn65m/Q=";
# requires network
checkFlags = [ "--skip=tools::tests::download_and_install_binaries" ];
cargoSha256 = "sha256-Faj0xZkGTs5z5vMfr2BwN1/xm5vopewI9ZWkOhyPq9c=";
meta = with lib; {
homepage = "https://github.com/thedodd/trunk";

View file

@ -292,12 +292,14 @@ let
/* vim-plug is an extremely popular vim plugin manager.
*/
/* Remove repeated "/." suffixes from a path */
stripDots = path: lib.head (builtins.split "(/\\.)*$" path);
plugImpl =
(''
source ${vimPlugins.vim-plug.rtp}/plug.vim
silent! call plug#begin('/dev/null')
'' + (lib.concatMapStringsSep "\n" (pkg: "Plug '${pkg.rtp}'") plug.plugins) + ''
'' + (lib.concatMapStringsSep "\n" (pkg: "Plug '${stripDots pkg.rtp}'") plug.plugins) + ''
call plug#end()
'');

View file

@ -9,7 +9,7 @@
stdenv.mkDerivation rec {
pname = "slurm";
version = "21.08.0.1";
version = "21.08.1.1";
# N.B. We use github release tags instead of https://www.schedmd.com/downloads.php
# because the latter does not keep older releases.
@ -18,7 +18,7 @@ stdenv.mkDerivation rec {
repo = "slurm";
# The release tags use - instead of .
rev = "${pname}-${builtins.replaceStrings ["."] ["-"] version}";
sha256 = "0f1i64vby1qa2y9gv9a9x595s58p6dpw4yhljbgrc2wr7glvnfi3";
sha256 = "13fd72ifa1ar8yjavdywzvlk4fzxzhybi30pzcdvjnijc9y52j4m";
};
outputs = [ "out" "dev" ];

View file

@ -7,14 +7,14 @@
beamPackages.mixRelease rec {
pname = "pleroma";
version = "2.4.0";
version = "2.4.1";
src = fetchFromGitLab {
domain = "git.pleroma.social";
owner = "pleroma";
repo = "pleroma";
rev = "v${version}";
sha256 = "sha256-1zp/qVk2K3q8AtkfXab0MBAHaQnY5enVtfdu64FFPhg=";
sha256 = "sha256-XYZIf8/Vznl4FvVAOy5GVfTBTCwhfUol/3vWWIDwIxQ=";
};
mixNixDeps = import ./mix.nix {

View file

@ -2,13 +2,13 @@
buildGoModule rec {
pname = "tar2ext4";
version = "0.8.21";
version = "0.8.22";
src = fetchFromGitHub {
owner = "microsoft";
repo = "hcsshim";
rev = "v${version}";
sha256 = "sha256-oYCL6agif/BklMY5/ub6PExS6D/ZlTxi1QaabMOsEfw=";
sha256 = "sha256-z8w/xzNEebnQJTO4H5PlU5W+69MY1wQwmuz5inXBl1k=";
};
sourceRoot = "source/cmd/tar2ext4";

View file

@ -12,11 +12,11 @@
stdenv.mkDerivation rec {
pname = "crowdin-cli";
version = "3.6.5";
version = "3.7.0";
src = fetchurl {
url = "https://github.com/crowdin/${pname}/releases/download/${version}/${pname}.zip";
sha256 = "sha256-9bw85dFazlcbZhx/bUoxW4iIsw+RdE3d+O5Cj3Obb7A=";
sha256 = "sha256-2TQL5k2ndckFjOOXNz7clVpwPUMStR4xgd1P+qUhNC8=";
};
nativeBuildInputs = [ installShellFiles makeWrapper unzip ];

View file

@ -13146,7 +13146,10 @@ with pkgs;
self = pkgsi686Linux.callPackage ../development/interpreters/self { };
spark = callPackage ../applications/networking/cluster/spark { };
inherit (callPackages ../applications/networking/cluster/spark { hadoop = hadoop_3_1; })
spark3
spark2;
spark = spark3;
sparkleshare = callPackage ../applications/version-management/sparkleshare { };