From af17bfdedf94dc6e7fd34739ec827c915689a80c Mon Sep 17 00:00:00 2001 From: Maximilian Bosch Date: Sat, 14 Jul 2018 13:20:37 +0200 Subject: [PATCH] pythonPackages.scikitlearn: apply `max_iter` patch from scikitlearn master (#43483) See https://github.com/scikit-learn/scikit-learn/pull/10723 This fixes the build of `scikitlearn` on master and nixos-unstable. The issue is originally an upstream issue (see https://github.com/scikit-learn/scikit-learn/issues/10619) which was fixed on master and was mainly caused by changes to the environment. Closes #43466 --- .../python-modules/scikitlearn/default.nix | 4 + ...ld-be-less-than-max_iter-using-lbgfs.patch | 73 +++++++++++++++++++ 2 files changed, 77 insertions(+) create mode 100644 pkgs/development/python-modules/scikitlearn/n_iter-should-be-less-than-max_iter-using-lbgfs.patch diff --git a/pkgs/development/python-modules/scikitlearn/default.nix b/pkgs/development/python-modules/scikitlearn/default.nix index dc63fe7d104d..edaf7cd90cc5 100644 --- a/pkgs/development/python-modules/scikitlearn/default.nix +++ b/pkgs/development/python-modules/scikitlearn/default.nix @@ -14,6 +14,10 @@ buildPythonPackage rec { sha256 = "5ca0ad32ee04abe0d4ba02c8d89d501b4e5e0304bdf4d45c2e9875a735b323a0"; }; + # basically https://github.com/scikit-learn/scikit-learn/pull/10723, + # but rebased onto 0.19.1 + patches = [ ./n_iter-should-be-less-than-max_iter-using-lbgfs.patch ]; + buildInputs = [ nose pillow gfortran glibcLocales ]; propagatedBuildInputs = [ numpy scipy numpy.blas ]; diff --git a/pkgs/development/python-modules/scikitlearn/n_iter-should-be-less-than-max_iter-using-lbgfs.patch b/pkgs/development/python-modules/scikitlearn/n_iter-should-be-less-than-max_iter-using-lbgfs.patch new file mode 100644 index 000000000000..67309a673d08 --- /dev/null +++ b/pkgs/development/python-modules/scikitlearn/n_iter-should-be-less-than-max_iter-using-lbgfs.patch @@ -0,0 +1,73 @@ +diff --git a/sklearn/linear_model/huber.py b/sklearn/linear_model/huber.py +index e17dc1e..665654d 100644 +--- a/sklearn/linear_model/huber.py ++++ b/sklearn/linear_model/huber.py +@@ -181,7 +181,11 @@ class HuberRegressor(LinearModel, RegressorMixin, BaseEstimator): + + n_iter_ : int + Number of iterations that fmin_l_bfgs_b has run for. +- Not available if SciPy version is 0.9 and below. ++ ++ .. versionchanged:: 0.20 ++ ++ In SciPy <= 1.0.0 the number of lbfgs iterations may exceed ++ ``max_iter``. ``n_iter_`` will now report at most ``max_iter``. + + outliers_ : array, shape (n_samples,) + A boolean mask which is set to True where the samples are identified +@@ -272,7 +276,9 @@ class HuberRegressor(LinearModel, RegressorMixin, BaseEstimator): + raise ValueError("HuberRegressor convergence failed:" + " l-BFGS-b solver terminated with %s" + % dict_['task'].decode('ascii')) +- self.n_iter_ = dict_.get('nit', None) ++ # In scipy <= 1.0.0, nit may exceed maxiter. ++ # See https://github.com/scipy/scipy/issues/7854. ++ self.n_iter_ = min(dict_.get('nit', None), self.max_iter) + self.scale_ = parameters[-1] + if self.fit_intercept: + self.intercept_ = parameters[-2] +diff --git a/sklearn/linear_model/logistic.py b/sklearn/linear_model/logistic.py +index 8646c9a..c72a7d9 100644 +--- a/sklearn/linear_model/logistic.py ++++ b/sklearn/linear_model/logistic.py +@@ -718,7 +718,9 @@ def logistic_regression_path(X, y, pos_class=None, Cs=10, fit_intercept=True, + warnings.warn("lbfgs failed to converge. Increase the number " + "of iterations.") + try: +- n_iter_i = info['nit'] - 1 ++ # In scipy <= 1.0.0, nit may exceed maxiter. ++ # See https://github.com/scipy/scipy/issues/7854. ++ n_iter_i = min(info['nit'], max_iter) + except: + n_iter_i = info['funcalls'] - 1 + elif solver == 'newton-cg': +@@ -1115,6 +1117,11 @@ class LogisticRegression(BaseEstimator, LinearClassifierMixin, + it returns only 1 element. For liblinear solver, only the maximum + number of iteration across all classes is given. + ++ .. versionchanged:: 0.20 ++ ++ In SciPy <= 1.0.0 the number of lbfgs iterations may exceed ++ ``max_iter``. ``n_iter_`` will now report at most ``max_iter``. ++ + See also + -------- + SGDClassifier : incrementally trained logistic regression (when given +diff --git a/sklearn/linear_model/tests/test_huber.py b/sklearn/linear_model/tests/test_huber.py +index 08f4fdf..ca1092f 100644 +--- a/sklearn/linear_model/tests/test_huber.py ++++ b/sklearn/linear_model/tests/test_huber.py +@@ -42,6 +42,13 @@ def test_huber_equals_lr_for_high_epsilon(): + assert_almost_equal(huber.intercept_, lr.intercept_, 2) + + ++def test_huber_max_iter(): ++ X, y = make_regression_with_outliers() ++ huber = HuberRegressor(max_iter=1) ++ huber.fit(X, y) ++ assert huber.n_iter_ == huber.max_iter ++ ++ + def test_huber_gradient(): + # Test that the gradient calculated by _huber_loss_and_gradient is correct + rng = np.random.RandomState(1)