2017-10-18 16:05:07 +02:00
|
|
|
{ stdenv, fetchzip, makeWrapper, jre, pythonPackages, coreutils, hadoop
|
2017-03-26 18:17:17 +02:00
|
|
|
, RSupport? true, R
|
2014-12-04 17:27:01 +01:00
|
|
|
}:
|
|
|
|
|
|
|
|
with stdenv.lib;
|
2014-02-20 12:54:04 +01:00
|
|
|
|
|
|
|
stdenv.mkDerivation rec {
|
2017-02-06 21:18:15 +01:00
|
|
|
|
2019-08-13 23:52:01 +02:00
|
|
|
pname = "spark";
|
2019-10-02 22:55:30 +02:00
|
|
|
version = "2.4.4";
|
2014-02-20 12:54:04 +01:00
|
|
|
|
2015-12-26 18:29:08 +01:00
|
|
|
src = fetchzip {
|
2019-08-13 23:52:01 +02:00
|
|
|
url = "mirror://apache/spark/${pname}-${version}/${pname}-${version}-bin-without-hadoop.tgz";
|
2020-08-15 16:49:40 +02:00
|
|
|
sha256 = "1a9w5k0207fysgpxx6db3a00fs5hdc2ncx99x4ccy2s0v5ndc66g";
|
2014-02-20 12:54:04 +01:00
|
|
|
};
|
|
|
|
|
2014-12-04 17:27:01 +01:00
|
|
|
buildInputs = [ makeWrapper jre pythonPackages.python pythonPackages.numpy ]
|
2020-08-15 16:49:40 +02:00
|
|
|
++ optional RSupport R;
|
2014-02-20 12:54:04 +01:00
|
|
|
|
2019-08-13 23:52:01 +02:00
|
|
|
untarDir = "${pname}-${version}-bin-without-hadoop";
|
2014-02-20 12:54:04 +01:00
|
|
|
installPhase = ''
|
2015-12-26 18:30:28 +01:00
|
|
|
mkdir -p $out/{lib/${untarDir}/conf,bin,/share/java}
|
2014-12-04 17:27:01 +01:00
|
|
|
mv * $out/lib/${untarDir}
|
|
|
|
|
2015-12-26 18:29:49 +01:00
|
|
|
sed -e 's/INFO, console/WARN, console/' < \
|
|
|
|
$out/lib/${untarDir}/conf/log4j.properties.template > \
|
|
|
|
$out/lib/${untarDir}/conf/log4j.properties
|
|
|
|
|
2014-12-04 17:27:01 +01:00
|
|
|
cat > $out/lib/${untarDir}/conf/spark-env.sh <<- EOF
|
|
|
|
export JAVA_HOME="${jre}"
|
|
|
|
export SPARK_HOME="$out/lib/${untarDir}"
|
2017-10-18 16:05:07 +02:00
|
|
|
export SPARK_DIST_CLASSPATH=$(${hadoop}/bin/hadoop classpath)
|
2014-12-04 17:27:01 +01:00
|
|
|
export PYSPARK_PYTHON="${pythonPackages.python}/bin/${pythonPackages.python.executable}"
|
|
|
|
export PYTHONPATH="\$PYTHONPATH:$PYTHONPATH"
|
2017-03-26 18:17:17 +02:00
|
|
|
${optionalString RSupport
|
|
|
|
''export SPARKR_R_SHELL="${R}/bin/R"
|
|
|
|
export PATH=$PATH:"${R}/bin/R"''}
|
2014-02-20 12:54:04 +01:00
|
|
|
EOF
|
|
|
|
|
2014-12-04 17:27:01 +01:00
|
|
|
for n in $(find $out/lib/${untarDir}/bin -type f ! -name "*.*"); do
|
|
|
|
makeWrapper "$n" "$out/bin/$(basename $n)"
|
2018-03-21 01:57:58 +01:00
|
|
|
substituteInPlace "$n" --replace dirname ${coreutils.out}/bin/dirname
|
2014-02-20 12:54:04 +01:00
|
|
|
done
|
2015-12-26 18:30:28 +01:00
|
|
|
ln -s $out/lib/${untarDir}/lib/spark-assembly-*.jar $out/share/java
|
2014-02-20 12:54:04 +01:00
|
|
|
'';
|
|
|
|
|
2014-03-02 12:51:22 +01:00
|
|
|
meta = {
|
2017-03-26 18:17:17 +02:00
|
|
|
description = "Apache Spark is a fast and general engine for large-scale data processing";
|
2014-04-29 17:34:28 +02:00
|
|
|
homepage = "http://spark.apache.org";
|
|
|
|
license = stdenv.lib.licenses.asl20;
|
|
|
|
platforms = stdenv.lib.platforms.all;
|
2017-10-18 16:05:07 +02:00
|
|
|
maintainers = with maintainers; [ thoughtpolice offline kamilchm ];
|
2020-04-01 03:11:51 +02:00
|
|
|
repositories.git = "git://git.apache.org/spark.git";
|
2014-02-20 12:54:04 +01:00
|
|
|
};
|
|
|
|
}
|