asd
This commit is contained in:
@ -0,0 +1,5 @@
|
||||
"""This module contains least-squares algorithms."""
|
||||
from .least_squares import least_squares
|
||||
from .lsq_linear import lsq_linear
|
||||
|
||||
__all__ = ['least_squares', 'lsq_linear']
|
||||
183
venv/lib/python3.12/site-packages/scipy/optimize/_lsq/bvls.py
Normal file
183
venv/lib/python3.12/site-packages/scipy/optimize/_lsq/bvls.py
Normal file
@ -0,0 +1,183 @@
|
||||
"""Bounded-variable least-squares algorithm."""
|
||||
import numpy as np
|
||||
from numpy.linalg import norm, lstsq
|
||||
from scipy.optimize import OptimizeResult
|
||||
|
||||
from .common import print_header_linear, print_iteration_linear
|
||||
|
||||
|
||||
def compute_kkt_optimality(g, on_bound):
|
||||
"""Compute the maximum violation of KKT conditions."""
|
||||
g_kkt = g * on_bound
|
||||
free_set = on_bound == 0
|
||||
g_kkt[free_set] = np.abs(g[free_set])
|
||||
return np.max(g_kkt)
|
||||
|
||||
|
||||
def bvls(A, b, x_lsq, lb, ub, tol, max_iter, verbose, rcond=None):
|
||||
m, n = A.shape
|
||||
|
||||
x = x_lsq.copy()
|
||||
on_bound = np.zeros(n)
|
||||
|
||||
mask = x <= lb
|
||||
x[mask] = lb[mask]
|
||||
on_bound[mask] = -1
|
||||
|
||||
mask = x >= ub
|
||||
x[mask] = ub[mask]
|
||||
on_bound[mask] = 1
|
||||
|
||||
free_set = on_bound == 0
|
||||
active_set = ~free_set
|
||||
free_set, = np.nonzero(free_set)
|
||||
|
||||
r = A.dot(x) - b
|
||||
cost = 0.5 * np.dot(r, r)
|
||||
initial_cost = cost
|
||||
g = A.T.dot(r)
|
||||
|
||||
cost_change = None
|
||||
step_norm = None
|
||||
iteration = 0
|
||||
|
||||
if verbose == 2:
|
||||
print_header_linear()
|
||||
|
||||
# This is the initialization loop. The requirement is that the
|
||||
# least-squares solution on free variables is feasible before BVLS starts.
|
||||
# One possible initialization is to set all variables to lower or upper
|
||||
# bounds, but many iterations may be required from this state later on.
|
||||
# The implemented ad-hoc procedure which intuitively should give a better
|
||||
# initial state: find the least-squares solution on current free variables,
|
||||
# if its feasible then stop, otherwise, set violating variables to
|
||||
# corresponding bounds and continue on the reduced set of free variables.
|
||||
|
||||
while free_set.size > 0:
|
||||
if verbose == 2:
|
||||
optimality = compute_kkt_optimality(g, on_bound)
|
||||
print_iteration_linear(iteration, cost, cost_change, step_norm,
|
||||
optimality)
|
||||
|
||||
iteration += 1
|
||||
x_free_old = x[free_set].copy()
|
||||
|
||||
A_free = A[:, free_set]
|
||||
b_free = b - A.dot(x * active_set)
|
||||
z = lstsq(A_free, b_free, rcond=rcond)[0]
|
||||
|
||||
lbv = z < lb[free_set]
|
||||
ubv = z > ub[free_set]
|
||||
v = lbv | ubv
|
||||
|
||||
if np.any(lbv):
|
||||
ind = free_set[lbv]
|
||||
x[ind] = lb[ind]
|
||||
active_set[ind] = True
|
||||
on_bound[ind] = -1
|
||||
|
||||
if np.any(ubv):
|
||||
ind = free_set[ubv]
|
||||
x[ind] = ub[ind]
|
||||
active_set[ind] = True
|
||||
on_bound[ind] = 1
|
||||
|
||||
ind = free_set[~v]
|
||||
x[ind] = z[~v]
|
||||
|
||||
r = A.dot(x) - b
|
||||
cost_new = 0.5 * np.dot(r, r)
|
||||
cost_change = cost - cost_new
|
||||
cost = cost_new
|
||||
g = A.T.dot(r)
|
||||
step_norm = norm(x[free_set] - x_free_old)
|
||||
|
||||
if np.any(v):
|
||||
free_set = free_set[~v]
|
||||
else:
|
||||
break
|
||||
|
||||
if max_iter is None:
|
||||
max_iter = n
|
||||
max_iter += iteration
|
||||
|
||||
termination_status = None
|
||||
|
||||
# Main BVLS loop.
|
||||
|
||||
optimality = compute_kkt_optimality(g, on_bound)
|
||||
for iteration in range(iteration, max_iter): # BVLS Loop A
|
||||
if verbose == 2:
|
||||
print_iteration_linear(iteration, cost, cost_change,
|
||||
step_norm, optimality)
|
||||
|
||||
if optimality < tol:
|
||||
termination_status = 1
|
||||
|
||||
if termination_status is not None:
|
||||
break
|
||||
|
||||
move_to_free = np.argmax(g * on_bound)
|
||||
on_bound[move_to_free] = 0
|
||||
|
||||
while True: # BVLS Loop B
|
||||
|
||||
free_set = on_bound == 0
|
||||
active_set = ~free_set
|
||||
free_set, = np.nonzero(free_set)
|
||||
|
||||
x_free = x[free_set]
|
||||
x_free_old = x_free.copy()
|
||||
lb_free = lb[free_set]
|
||||
ub_free = ub[free_set]
|
||||
|
||||
A_free = A[:, free_set]
|
||||
b_free = b - A.dot(x * active_set)
|
||||
z = lstsq(A_free, b_free, rcond=rcond)[0]
|
||||
|
||||
lbv, = np.nonzero(z < lb_free)
|
||||
ubv, = np.nonzero(z > ub_free)
|
||||
v = np.hstack((lbv, ubv))
|
||||
|
||||
if v.size > 0:
|
||||
alphas = np.hstack((
|
||||
lb_free[lbv] - x_free[lbv],
|
||||
ub_free[ubv] - x_free[ubv])) / (z[v] - x_free[v])
|
||||
|
||||
i = np.argmin(alphas)
|
||||
i_free = v[i]
|
||||
alpha = alphas[i]
|
||||
|
||||
x_free *= 1 - alpha
|
||||
x_free += alpha * z
|
||||
x[free_set] = x_free
|
||||
|
||||
if i < lbv.size:
|
||||
on_bound[free_set[i_free]] = -1
|
||||
else:
|
||||
on_bound[free_set[i_free]] = 1
|
||||
else:
|
||||
x_free = z
|
||||
x[free_set] = x_free
|
||||
break
|
||||
|
||||
step_norm = norm(x_free - x_free_old)
|
||||
|
||||
r = A.dot(x) - b
|
||||
cost_new = 0.5 * np.dot(r, r)
|
||||
cost_change = cost - cost_new
|
||||
|
||||
if cost_change < tol * cost:
|
||||
termination_status = 2
|
||||
cost = cost_new
|
||||
|
||||
g = A.T.dot(r)
|
||||
optimality = compute_kkt_optimality(g, on_bound)
|
||||
|
||||
if termination_status is None:
|
||||
termination_status = 0
|
||||
|
||||
return OptimizeResult(
|
||||
x=x, fun=r, cost=cost, optimality=optimality, active_mask=on_bound,
|
||||
nit=iteration + 1, status=termination_status,
|
||||
initial_cost=initial_cost)
|
||||
733
venv/lib/python3.12/site-packages/scipy/optimize/_lsq/common.py
Normal file
733
venv/lib/python3.12/site-packages/scipy/optimize/_lsq/common.py
Normal file
@ -0,0 +1,733 @@
|
||||
"""Functions used by least-squares algorithms."""
|
||||
from math import copysign
|
||||
|
||||
import numpy as np
|
||||
from numpy.linalg import norm
|
||||
|
||||
from scipy.linalg import cho_factor, cho_solve, LinAlgError
|
||||
from scipy.sparse import issparse
|
||||
from scipy.sparse.linalg import LinearOperator, aslinearoperator
|
||||
|
||||
|
||||
EPS = np.finfo(float).eps
|
||||
|
||||
|
||||
# Functions related to a trust-region problem.
|
||||
|
||||
|
||||
def intersect_trust_region(x, s, Delta):
|
||||
"""Find the intersection of a line with the boundary of a trust region.
|
||||
|
||||
This function solves the quadratic equation with respect to t
|
||||
||(x + s*t)||**2 = Delta**2.
|
||||
|
||||
Returns
|
||||
-------
|
||||
t_neg, t_pos : tuple of float
|
||||
Negative and positive roots.
|
||||
|
||||
Raises
|
||||
------
|
||||
ValueError
|
||||
If `s` is zero or `x` is not within the trust region.
|
||||
"""
|
||||
a = np.dot(s, s)
|
||||
if a == 0:
|
||||
raise ValueError("`s` is zero.")
|
||||
|
||||
b = np.dot(x, s)
|
||||
|
||||
c = np.dot(x, x) - Delta**2
|
||||
if c > 0:
|
||||
raise ValueError("`x` is not within the trust region.")
|
||||
|
||||
d = np.sqrt(b*b - a*c) # Root from one fourth of the discriminant.
|
||||
|
||||
# Computations below avoid loss of significance, see "Numerical Recipes".
|
||||
q = -(b + copysign(d, b))
|
||||
t1 = q / a
|
||||
t2 = c / q
|
||||
|
||||
if t1 < t2:
|
||||
return t1, t2
|
||||
else:
|
||||
return t2, t1
|
||||
|
||||
|
||||
def solve_lsq_trust_region(n, m, uf, s, V, Delta, initial_alpha=None,
|
||||
rtol=0.01, max_iter=10):
|
||||
"""Solve a trust-region problem arising in least-squares minimization.
|
||||
|
||||
This function implements a method described by J. J. More [1]_ and used
|
||||
in MINPACK, but it relies on a single SVD of Jacobian instead of series
|
||||
of Cholesky decompositions. Before running this function, compute:
|
||||
``U, s, VT = svd(J, full_matrices=False)``.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
n : int
|
||||
Number of variables.
|
||||
m : int
|
||||
Number of residuals.
|
||||
uf : ndarray
|
||||
Computed as U.T.dot(f).
|
||||
s : ndarray
|
||||
Singular values of J.
|
||||
V : ndarray
|
||||
Transpose of VT.
|
||||
Delta : float
|
||||
Radius of a trust region.
|
||||
initial_alpha : float, optional
|
||||
Initial guess for alpha, which might be available from a previous
|
||||
iteration. If None, determined automatically.
|
||||
rtol : float, optional
|
||||
Stopping tolerance for the root-finding procedure. Namely, the
|
||||
solution ``p`` will satisfy ``abs(norm(p) - Delta) < rtol * Delta``.
|
||||
max_iter : int, optional
|
||||
Maximum allowed number of iterations for the root-finding procedure.
|
||||
|
||||
Returns
|
||||
-------
|
||||
p : ndarray, shape (n,)
|
||||
Found solution of a trust-region problem.
|
||||
alpha : float
|
||||
Positive value such that (J.T*J + alpha*I)*p = -J.T*f.
|
||||
Sometimes called Levenberg-Marquardt parameter.
|
||||
n_iter : int
|
||||
Number of iterations made by root-finding procedure. Zero means
|
||||
that Gauss-Newton step was selected as the solution.
|
||||
|
||||
References
|
||||
----------
|
||||
.. [1] More, J. J., "The Levenberg-Marquardt Algorithm: Implementation
|
||||
and Theory," Numerical Analysis, ed. G. A. Watson, Lecture Notes
|
||||
in Mathematics 630, Springer Verlag, pp. 105-116, 1977.
|
||||
"""
|
||||
def phi_and_derivative(alpha, suf, s, Delta):
|
||||
"""Function of which to find zero.
|
||||
|
||||
It is defined as "norm of regularized (by alpha) least-squares
|
||||
solution minus `Delta`". Refer to [1]_.
|
||||
"""
|
||||
denom = s**2 + alpha
|
||||
p_norm = norm(suf / denom)
|
||||
phi = p_norm - Delta
|
||||
phi_prime = -np.sum(suf ** 2 / denom**3) / p_norm
|
||||
return phi, phi_prime
|
||||
|
||||
suf = s * uf
|
||||
|
||||
# Check if J has full rank and try Gauss-Newton step.
|
||||
if m >= n:
|
||||
threshold = EPS * m * s[0]
|
||||
full_rank = s[-1] > threshold
|
||||
else:
|
||||
full_rank = False
|
||||
|
||||
if full_rank:
|
||||
p = -V.dot(uf / s)
|
||||
if norm(p) <= Delta:
|
||||
return p, 0.0, 0
|
||||
|
||||
alpha_upper = norm(suf) / Delta
|
||||
|
||||
if full_rank:
|
||||
phi, phi_prime = phi_and_derivative(0.0, suf, s, Delta)
|
||||
alpha_lower = -phi / phi_prime
|
||||
else:
|
||||
alpha_lower = 0.0
|
||||
|
||||
if initial_alpha is None or not full_rank and initial_alpha == 0:
|
||||
alpha = max(0.001 * alpha_upper, (alpha_lower * alpha_upper)**0.5)
|
||||
else:
|
||||
alpha = initial_alpha
|
||||
|
||||
for it in range(max_iter):
|
||||
if alpha < alpha_lower or alpha > alpha_upper:
|
||||
alpha = max(0.001 * alpha_upper, (alpha_lower * alpha_upper)**0.5)
|
||||
|
||||
phi, phi_prime = phi_and_derivative(alpha, suf, s, Delta)
|
||||
|
||||
if phi < 0:
|
||||
alpha_upper = alpha
|
||||
|
||||
ratio = phi / phi_prime
|
||||
alpha_lower = max(alpha_lower, alpha - ratio)
|
||||
alpha -= (phi + Delta) * ratio / Delta
|
||||
|
||||
if np.abs(phi) < rtol * Delta:
|
||||
break
|
||||
|
||||
p = -V.dot(suf / (s**2 + alpha))
|
||||
|
||||
# Make the norm of p equal to Delta, p is changed only slightly during
|
||||
# this. It is done to prevent p lie outside the trust region (which can
|
||||
# cause problems later).
|
||||
p *= Delta / norm(p)
|
||||
|
||||
return p, alpha, it + 1
|
||||
|
||||
|
||||
def solve_trust_region_2d(B, g, Delta):
|
||||
"""Solve a general trust-region problem in 2 dimensions.
|
||||
|
||||
The problem is reformulated as a 4th order algebraic equation,
|
||||
the solution of which is found by numpy.roots.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
B : ndarray, shape (2, 2)
|
||||
Symmetric matrix, defines a quadratic term of the function.
|
||||
g : ndarray, shape (2,)
|
||||
Defines a linear term of the function.
|
||||
Delta : float
|
||||
Radius of a trust region.
|
||||
|
||||
Returns
|
||||
-------
|
||||
p : ndarray, shape (2,)
|
||||
Found solution.
|
||||
newton_step : bool
|
||||
Whether the returned solution is the Newton step which lies within
|
||||
the trust region.
|
||||
"""
|
||||
try:
|
||||
R, lower = cho_factor(B)
|
||||
p = -cho_solve((R, lower), g)
|
||||
if np.dot(p, p) <= Delta**2:
|
||||
return p, True
|
||||
except LinAlgError:
|
||||
pass
|
||||
|
||||
a = B[0, 0] * Delta**2
|
||||
b = B[0, 1] * Delta**2
|
||||
c = B[1, 1] * Delta**2
|
||||
|
||||
d = g[0] * Delta
|
||||
f = g[1] * Delta
|
||||
|
||||
coeffs = np.array(
|
||||
[-b + d, 2 * (a - c + f), 6 * b, 2 * (-a + c + f), -b - d])
|
||||
t = np.roots(coeffs) # Can handle leading zeros.
|
||||
t = np.real(t[np.isreal(t)])
|
||||
|
||||
p = Delta * np.vstack((2 * t / (1 + t**2), (1 - t**2) / (1 + t**2)))
|
||||
value = 0.5 * np.sum(p * B.dot(p), axis=0) + np.dot(g, p)
|
||||
i = np.argmin(value)
|
||||
p = p[:, i]
|
||||
|
||||
return p, False
|
||||
|
||||
|
||||
def update_tr_radius(Delta, actual_reduction, predicted_reduction,
|
||||
step_norm, bound_hit):
|
||||
"""Update the radius of a trust region based on the cost reduction.
|
||||
|
||||
Returns
|
||||
-------
|
||||
Delta : float
|
||||
New radius.
|
||||
ratio : float
|
||||
Ratio between actual and predicted reductions.
|
||||
"""
|
||||
if predicted_reduction > 0:
|
||||
ratio = actual_reduction / predicted_reduction
|
||||
elif predicted_reduction == actual_reduction == 0:
|
||||
ratio = 1
|
||||
else:
|
||||
ratio = 0
|
||||
|
||||
if ratio < 0.25:
|
||||
Delta = 0.25 * step_norm
|
||||
elif ratio > 0.75 and bound_hit:
|
||||
Delta *= 2.0
|
||||
|
||||
return Delta, ratio
|
||||
|
||||
|
||||
# Construction and minimization of quadratic functions.
|
||||
|
||||
|
||||
def build_quadratic_1d(J, g, s, diag=None, s0=None):
|
||||
"""Parameterize a multivariate quadratic function along a line.
|
||||
|
||||
The resulting univariate quadratic function is given as follows::
|
||||
|
||||
f(t) = 0.5 * (s0 + s*t).T * (J.T*J + diag) * (s0 + s*t) +
|
||||
g.T * (s0 + s*t)
|
||||
|
||||
Parameters
|
||||
----------
|
||||
J : ndarray, sparse matrix or LinearOperator shape (m, n)
|
||||
Jacobian matrix, affects the quadratic term.
|
||||
g : ndarray, shape (n,)
|
||||
Gradient, defines the linear term.
|
||||
s : ndarray, shape (n,)
|
||||
Direction vector of a line.
|
||||
diag : None or ndarray with shape (n,), optional
|
||||
Addition diagonal part, affects the quadratic term.
|
||||
If None, assumed to be 0.
|
||||
s0 : None or ndarray with shape (n,), optional
|
||||
Initial point. If None, assumed to be 0.
|
||||
|
||||
Returns
|
||||
-------
|
||||
a : float
|
||||
Coefficient for t**2.
|
||||
b : float
|
||||
Coefficient for t.
|
||||
c : float
|
||||
Free term. Returned only if `s0` is provided.
|
||||
"""
|
||||
v = J.dot(s)
|
||||
a = np.dot(v, v)
|
||||
if diag is not None:
|
||||
a += np.dot(s * diag, s)
|
||||
a *= 0.5
|
||||
|
||||
b = np.dot(g, s)
|
||||
|
||||
if s0 is not None:
|
||||
u = J.dot(s0)
|
||||
b += np.dot(u, v)
|
||||
c = 0.5 * np.dot(u, u) + np.dot(g, s0)
|
||||
if diag is not None:
|
||||
b += np.dot(s0 * diag, s)
|
||||
c += 0.5 * np.dot(s0 * diag, s0)
|
||||
return a, b, c
|
||||
else:
|
||||
return a, b
|
||||
|
||||
|
||||
def minimize_quadratic_1d(a, b, lb, ub, c=0):
|
||||
"""Minimize a 1-D quadratic function subject to bounds.
|
||||
|
||||
The free term `c` is 0 by default. Bounds must be finite.
|
||||
|
||||
Returns
|
||||
-------
|
||||
t : float
|
||||
Minimum point.
|
||||
y : float
|
||||
Minimum value.
|
||||
"""
|
||||
t = [lb, ub]
|
||||
if a != 0:
|
||||
extremum = -0.5 * b / a
|
||||
if lb < extremum < ub:
|
||||
t.append(extremum)
|
||||
t = np.asarray(t)
|
||||
y = t * (a * t + b) + c
|
||||
min_index = np.argmin(y)
|
||||
return t[min_index], y[min_index]
|
||||
|
||||
|
||||
def evaluate_quadratic(J, g, s, diag=None):
|
||||
"""Compute values of a quadratic function arising in least squares.
|
||||
|
||||
The function is 0.5 * s.T * (J.T * J + diag) * s + g.T * s.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
J : ndarray, sparse matrix or LinearOperator, shape (m, n)
|
||||
Jacobian matrix, affects the quadratic term.
|
||||
g : ndarray, shape (n,)
|
||||
Gradient, defines the linear term.
|
||||
s : ndarray, shape (k, n) or (n,)
|
||||
Array containing steps as rows.
|
||||
diag : ndarray, shape (n,), optional
|
||||
Addition diagonal part, affects the quadratic term.
|
||||
If None, assumed to be 0.
|
||||
|
||||
Returns
|
||||
-------
|
||||
values : ndarray with shape (k,) or float
|
||||
Values of the function. If `s` was 2-D, then ndarray is
|
||||
returned, otherwise, float is returned.
|
||||
"""
|
||||
if s.ndim == 1:
|
||||
Js = J.dot(s)
|
||||
q = np.dot(Js, Js)
|
||||
if diag is not None:
|
||||
q += np.dot(s * diag, s)
|
||||
else:
|
||||
Js = J.dot(s.T)
|
||||
q = np.sum(Js**2, axis=0)
|
||||
if diag is not None:
|
||||
q += np.sum(diag * s**2, axis=1)
|
||||
|
||||
l = np.dot(s, g)
|
||||
|
||||
return 0.5 * q + l
|
||||
|
||||
|
||||
# Utility functions to work with bound constraints.
|
||||
|
||||
|
||||
def in_bounds(x, lb, ub):
|
||||
"""Check if a point lies within bounds."""
|
||||
return np.all((x >= lb) & (x <= ub))
|
||||
|
||||
|
||||
def step_size_to_bound(x, s, lb, ub):
|
||||
"""Compute a min_step size required to reach a bound.
|
||||
|
||||
The function computes a positive scalar t, such that x + s * t is on
|
||||
the bound.
|
||||
|
||||
Returns
|
||||
-------
|
||||
step : float
|
||||
Computed step. Non-negative value.
|
||||
hits : ndarray of int with shape of x
|
||||
Each element indicates whether a corresponding variable reaches the
|
||||
bound:
|
||||
|
||||
* 0 - the bound was not hit.
|
||||
* -1 - the lower bound was hit.
|
||||
* 1 - the upper bound was hit.
|
||||
"""
|
||||
non_zero = np.nonzero(s)
|
||||
s_non_zero = s[non_zero]
|
||||
steps = np.empty_like(x)
|
||||
steps.fill(np.inf)
|
||||
with np.errstate(over='ignore'):
|
||||
steps[non_zero] = np.maximum((lb - x)[non_zero] / s_non_zero,
|
||||
(ub - x)[non_zero] / s_non_zero)
|
||||
min_step = np.min(steps)
|
||||
return min_step, np.equal(steps, min_step) * np.sign(s).astype(int)
|
||||
|
||||
|
||||
def find_active_constraints(x, lb, ub, rtol=1e-10):
|
||||
"""Determine which constraints are active in a given point.
|
||||
|
||||
The threshold is computed using `rtol` and the absolute value of the
|
||||
closest bound.
|
||||
|
||||
Returns
|
||||
-------
|
||||
active : ndarray of int with shape of x
|
||||
Each component shows whether the corresponding constraint is active:
|
||||
|
||||
* 0 - a constraint is not active.
|
||||
* -1 - a lower bound is active.
|
||||
* 1 - a upper bound is active.
|
||||
"""
|
||||
active = np.zeros_like(x, dtype=int)
|
||||
|
||||
if rtol == 0:
|
||||
active[x <= lb] = -1
|
||||
active[x >= ub] = 1
|
||||
return active
|
||||
|
||||
lower_dist = x - lb
|
||||
upper_dist = ub - x
|
||||
|
||||
lower_threshold = rtol * np.maximum(1, np.abs(lb))
|
||||
upper_threshold = rtol * np.maximum(1, np.abs(ub))
|
||||
|
||||
lower_active = (np.isfinite(lb) &
|
||||
(lower_dist <= np.minimum(upper_dist, lower_threshold)))
|
||||
active[lower_active] = -1
|
||||
|
||||
upper_active = (np.isfinite(ub) &
|
||||
(upper_dist <= np.minimum(lower_dist, upper_threshold)))
|
||||
active[upper_active] = 1
|
||||
|
||||
return active
|
||||
|
||||
|
||||
def make_strictly_feasible(x, lb, ub, rstep=1e-10):
|
||||
"""Shift a point to the interior of a feasible region.
|
||||
|
||||
Each element of the returned vector is at least at a relative distance
|
||||
`rstep` from the closest bound. If ``rstep=0`` then `np.nextafter` is used.
|
||||
"""
|
||||
x_new = x.copy()
|
||||
|
||||
active = find_active_constraints(x, lb, ub, rstep)
|
||||
lower_mask = np.equal(active, -1)
|
||||
upper_mask = np.equal(active, 1)
|
||||
|
||||
if rstep == 0:
|
||||
x_new[lower_mask] = np.nextafter(lb[lower_mask], ub[lower_mask])
|
||||
x_new[upper_mask] = np.nextafter(ub[upper_mask], lb[upper_mask])
|
||||
else:
|
||||
x_new[lower_mask] = (lb[lower_mask] +
|
||||
rstep * np.maximum(1, np.abs(lb[lower_mask])))
|
||||
x_new[upper_mask] = (ub[upper_mask] -
|
||||
rstep * np.maximum(1, np.abs(ub[upper_mask])))
|
||||
|
||||
tight_bounds = (x_new < lb) | (x_new > ub)
|
||||
x_new[tight_bounds] = 0.5 * (lb[tight_bounds] + ub[tight_bounds])
|
||||
|
||||
return x_new
|
||||
|
||||
|
||||
def CL_scaling_vector(x, g, lb, ub):
|
||||
"""Compute Coleman-Li scaling vector and its derivatives.
|
||||
|
||||
Components of a vector v are defined as follows::
|
||||
|
||||
| ub[i] - x[i], if g[i] < 0 and ub[i] < np.inf
|
||||
v[i] = | x[i] - lb[i], if g[i] > 0 and lb[i] > -np.inf
|
||||
| 1, otherwise
|
||||
|
||||
According to this definition v[i] >= 0 for all i. It differs from the
|
||||
definition in paper [1]_ (eq. (2.2)), where the absolute value of v is
|
||||
used. Both definitions are equivalent down the line.
|
||||
Derivatives of v with respect to x take value 1, -1 or 0 depending on a
|
||||
case.
|
||||
|
||||
Returns
|
||||
-------
|
||||
v : ndarray with shape of x
|
||||
Scaling vector.
|
||||
dv : ndarray with shape of x
|
||||
Derivatives of v[i] with respect to x[i], diagonal elements of v's
|
||||
Jacobian.
|
||||
|
||||
References
|
||||
----------
|
||||
.. [1] M.A. Branch, T.F. Coleman, and Y. Li, "A Subspace, Interior,
|
||||
and Conjugate Gradient Method for Large-Scale Bound-Constrained
|
||||
Minimization Problems," SIAM Journal on Scientific Computing,
|
||||
Vol. 21, Number 1, pp 1-23, 1999.
|
||||
"""
|
||||
v = np.ones_like(x)
|
||||
dv = np.zeros_like(x)
|
||||
|
||||
mask = (g < 0) & np.isfinite(ub)
|
||||
v[mask] = ub[mask] - x[mask]
|
||||
dv[mask] = -1
|
||||
|
||||
mask = (g > 0) & np.isfinite(lb)
|
||||
v[mask] = x[mask] - lb[mask]
|
||||
dv[mask] = 1
|
||||
|
||||
return v, dv
|
||||
|
||||
|
||||
def reflective_transformation(y, lb, ub):
|
||||
"""Compute reflective transformation and its gradient."""
|
||||
if in_bounds(y, lb, ub):
|
||||
return y, np.ones_like(y)
|
||||
|
||||
lb_finite = np.isfinite(lb)
|
||||
ub_finite = np.isfinite(ub)
|
||||
|
||||
x = y.copy()
|
||||
g_negative = np.zeros_like(y, dtype=bool)
|
||||
|
||||
mask = lb_finite & ~ub_finite
|
||||
x[mask] = np.maximum(y[mask], 2 * lb[mask] - y[mask])
|
||||
g_negative[mask] = y[mask] < lb[mask]
|
||||
|
||||
mask = ~lb_finite & ub_finite
|
||||
x[mask] = np.minimum(y[mask], 2 * ub[mask] - y[mask])
|
||||
g_negative[mask] = y[mask] > ub[mask]
|
||||
|
||||
mask = lb_finite & ub_finite
|
||||
d = ub - lb
|
||||
t = np.remainder(y[mask] - lb[mask], 2 * d[mask])
|
||||
x[mask] = lb[mask] + np.minimum(t, 2 * d[mask] - t)
|
||||
g_negative[mask] = t > d[mask]
|
||||
|
||||
g = np.ones_like(y)
|
||||
g[g_negative] = -1
|
||||
|
||||
return x, g
|
||||
|
||||
|
||||
# Functions to display algorithm's progress.
|
||||
|
||||
|
||||
def print_header_nonlinear():
|
||||
print("{:^15}{:^15}{:^15}{:^15}{:^15}{:^15}"
|
||||
.format("Iteration", "Total nfev", "Cost", "Cost reduction",
|
||||
"Step norm", "Optimality"))
|
||||
|
||||
|
||||
def print_iteration_nonlinear(iteration, nfev, cost, cost_reduction,
|
||||
step_norm, optimality):
|
||||
if cost_reduction is None:
|
||||
cost_reduction = " " * 15
|
||||
else:
|
||||
cost_reduction = f"{cost_reduction:^15.2e}"
|
||||
|
||||
if step_norm is None:
|
||||
step_norm = " " * 15
|
||||
else:
|
||||
step_norm = f"{step_norm:^15.2e}"
|
||||
|
||||
print("{:^15}{:^15}{:^15.4e}{}{}{:^15.2e}"
|
||||
.format(iteration, nfev, cost, cost_reduction,
|
||||
step_norm, optimality))
|
||||
|
||||
|
||||
def print_header_linear():
|
||||
print("{:^15}{:^15}{:^15}{:^15}{:^15}"
|
||||
.format("Iteration", "Cost", "Cost reduction", "Step norm",
|
||||
"Optimality"))
|
||||
|
||||
|
||||
def print_iteration_linear(iteration, cost, cost_reduction, step_norm,
|
||||
optimality):
|
||||
if cost_reduction is None:
|
||||
cost_reduction = " " * 15
|
||||
else:
|
||||
cost_reduction = f"{cost_reduction:^15.2e}"
|
||||
|
||||
if step_norm is None:
|
||||
step_norm = " " * 15
|
||||
else:
|
||||
step_norm = f"{step_norm:^15.2e}"
|
||||
|
||||
print(f"{iteration:^15}{cost:^15.4e}{cost_reduction}{step_norm}{optimality:^15.2e}")
|
||||
|
||||
|
||||
# Simple helper functions.
|
||||
|
||||
|
||||
def compute_grad(J, f):
|
||||
"""Compute gradient of the least-squares cost function."""
|
||||
if isinstance(J, LinearOperator):
|
||||
return J.rmatvec(f)
|
||||
else:
|
||||
return J.T.dot(f)
|
||||
|
||||
|
||||
def compute_jac_scale(J, scale_inv_old=None):
|
||||
"""Compute variables scale based on the Jacobian matrix."""
|
||||
if issparse(J):
|
||||
scale_inv = np.asarray(J.power(2).sum(axis=0)).ravel()**0.5
|
||||
else:
|
||||
scale_inv = np.sum(J**2, axis=0)**0.5
|
||||
|
||||
if scale_inv_old is None:
|
||||
scale_inv[scale_inv == 0] = 1
|
||||
else:
|
||||
scale_inv = np.maximum(scale_inv, scale_inv_old)
|
||||
|
||||
return 1 / scale_inv, scale_inv
|
||||
|
||||
|
||||
def left_multiplied_operator(J, d):
|
||||
"""Return diag(d) J as LinearOperator."""
|
||||
J = aslinearoperator(J)
|
||||
|
||||
def matvec(x):
|
||||
return d * J.matvec(x)
|
||||
|
||||
def matmat(X):
|
||||
return d[:, np.newaxis] * J.matmat(X)
|
||||
|
||||
def rmatvec(x):
|
||||
return J.rmatvec(x.ravel() * d)
|
||||
|
||||
return LinearOperator(J.shape, matvec=matvec, matmat=matmat,
|
||||
rmatvec=rmatvec)
|
||||
|
||||
|
||||
def right_multiplied_operator(J, d):
|
||||
"""Return J diag(d) as LinearOperator."""
|
||||
J = aslinearoperator(J)
|
||||
|
||||
def matvec(x):
|
||||
return J.matvec(np.ravel(x) * d)
|
||||
|
||||
def matmat(X):
|
||||
return J.matmat(X * d[:, np.newaxis])
|
||||
|
||||
def rmatvec(x):
|
||||
return d * J.rmatvec(x)
|
||||
|
||||
return LinearOperator(J.shape, matvec=matvec, matmat=matmat,
|
||||
rmatvec=rmatvec)
|
||||
|
||||
|
||||
def regularized_lsq_operator(J, diag):
|
||||
"""Return a matrix arising in regularized least squares as LinearOperator.
|
||||
|
||||
The matrix is
|
||||
[ J ]
|
||||
[ D ]
|
||||
where D is diagonal matrix with elements from `diag`.
|
||||
"""
|
||||
J = aslinearoperator(J)
|
||||
m, n = J.shape
|
||||
|
||||
def matvec(x):
|
||||
return np.hstack((J.matvec(x), diag * x))
|
||||
|
||||
def rmatvec(x):
|
||||
x1 = x[:m]
|
||||
x2 = x[m:]
|
||||
return J.rmatvec(x1) + diag * x2
|
||||
|
||||
return LinearOperator((m + n, n), matvec=matvec, rmatvec=rmatvec)
|
||||
|
||||
|
||||
def right_multiply(J, d, copy=True):
|
||||
"""Compute J diag(d).
|
||||
|
||||
If `copy` is False, `J` is modified in place (unless being LinearOperator).
|
||||
"""
|
||||
if copy and not isinstance(J, LinearOperator):
|
||||
J = J.copy()
|
||||
|
||||
if issparse(J):
|
||||
J.data *= d.take(J.indices, mode='clip') # scikit-learn recipe.
|
||||
elif isinstance(J, LinearOperator):
|
||||
J = right_multiplied_operator(J, d)
|
||||
else:
|
||||
J *= d
|
||||
|
||||
return J
|
||||
|
||||
|
||||
def left_multiply(J, d, copy=True):
|
||||
"""Compute diag(d) J.
|
||||
|
||||
If `copy` is False, `J` is modified in place (unless being LinearOperator).
|
||||
"""
|
||||
if copy and not isinstance(J, LinearOperator):
|
||||
J = J.copy()
|
||||
|
||||
if issparse(J):
|
||||
J.data *= np.repeat(d, np.diff(J.indptr)) # scikit-learn recipe.
|
||||
elif isinstance(J, LinearOperator):
|
||||
J = left_multiplied_operator(J, d)
|
||||
else:
|
||||
J *= d[:, np.newaxis]
|
||||
|
||||
return J
|
||||
|
||||
|
||||
def check_termination(dF, F, dx_norm, x_norm, ratio, ftol, xtol):
|
||||
"""Check termination condition for nonlinear least squares."""
|
||||
ftol_satisfied = dF < ftol * F and ratio > 0.25
|
||||
xtol_satisfied = dx_norm < xtol * (xtol + x_norm)
|
||||
|
||||
if ftol_satisfied and xtol_satisfied:
|
||||
return 4
|
||||
elif ftol_satisfied:
|
||||
return 2
|
||||
elif xtol_satisfied:
|
||||
return 3
|
||||
else:
|
||||
return None
|
||||
|
||||
|
||||
def scale_for_robust_loss_function(J, f, rho):
|
||||
"""Scale Jacobian and residuals for a robust loss function.
|
||||
|
||||
Arrays are modified in place.
|
||||
"""
|
||||
J_scale = rho[1] + 2 * rho[2] * f**2
|
||||
J_scale[J_scale < EPS] = EPS
|
||||
J_scale **= 0.5
|
||||
|
||||
f *= rho[1] / J_scale
|
||||
|
||||
return left_multiply(J, J_scale, copy=False), f
|
||||
331
venv/lib/python3.12/site-packages/scipy/optimize/_lsq/dogbox.py
Normal file
331
venv/lib/python3.12/site-packages/scipy/optimize/_lsq/dogbox.py
Normal file
@ -0,0 +1,331 @@
|
||||
"""
|
||||
Dogleg algorithm with rectangular trust regions for least-squares minimization.
|
||||
|
||||
The description of the algorithm can be found in [Voglis]_. The algorithm does
|
||||
trust-region iterations, but the shape of trust regions is rectangular as
|
||||
opposed to conventional elliptical. The intersection of a trust region and
|
||||
an initial feasible region is again some rectangle. Thus, on each iteration a
|
||||
bound-constrained quadratic optimization problem is solved.
|
||||
|
||||
A quadratic problem is solved by well-known dogleg approach, where the
|
||||
function is minimized along piecewise-linear "dogleg" path [NumOpt]_,
|
||||
Chapter 4. If Jacobian is not rank-deficient then the function is decreasing
|
||||
along this path, and optimization amounts to simply following along this
|
||||
path as long as a point stays within the bounds. A constrained Cauchy step
|
||||
(along the anti-gradient) is considered for safety in rank deficient cases,
|
||||
in this situations the convergence might be slow.
|
||||
|
||||
If during iterations some variable hit the initial bound and the component
|
||||
of anti-gradient points outside the feasible region, then a next dogleg step
|
||||
won't make any progress. At this state such variables satisfy first-order
|
||||
optimality conditions and they are excluded before computing a next dogleg
|
||||
step.
|
||||
|
||||
Gauss-Newton step can be computed exactly by `numpy.linalg.lstsq` (for dense
|
||||
Jacobian matrices) or by iterative procedure `scipy.sparse.linalg.lsmr` (for
|
||||
dense and sparse matrices, or Jacobian being LinearOperator). The second
|
||||
option allows to solve very large problems (up to couple of millions of
|
||||
residuals on a regular PC), provided the Jacobian matrix is sufficiently
|
||||
sparse. But note that dogbox is not very good for solving problems with
|
||||
large number of constraints, because of variables exclusion-inclusion on each
|
||||
iteration (a required number of function evaluations might be high or accuracy
|
||||
of a solution will be poor), thus its large-scale usage is probably limited
|
||||
to unconstrained problems.
|
||||
|
||||
References
|
||||
----------
|
||||
.. [Voglis] C. Voglis and I. E. Lagaris, "A Rectangular Trust Region Dogleg
|
||||
Approach for Unconstrained and Bound Constrained Nonlinear
|
||||
Optimization", WSEAS International Conference on Applied
|
||||
Mathematics, Corfu, Greece, 2004.
|
||||
.. [NumOpt] J. Nocedal and S. J. Wright, "Numerical optimization, 2nd edition".
|
||||
"""
|
||||
import numpy as np
|
||||
from numpy.linalg import lstsq, norm
|
||||
|
||||
from scipy.sparse.linalg import LinearOperator, aslinearoperator, lsmr
|
||||
from scipy.optimize import OptimizeResult
|
||||
|
||||
from .common import (
|
||||
step_size_to_bound, in_bounds, update_tr_radius, evaluate_quadratic,
|
||||
build_quadratic_1d, minimize_quadratic_1d, compute_grad,
|
||||
compute_jac_scale, check_termination, scale_for_robust_loss_function,
|
||||
print_header_nonlinear, print_iteration_nonlinear)
|
||||
|
||||
|
||||
def lsmr_operator(Jop, d, active_set):
|
||||
"""Compute LinearOperator to use in LSMR by dogbox algorithm.
|
||||
|
||||
`active_set` mask is used to excluded active variables from computations
|
||||
of matrix-vector products.
|
||||
"""
|
||||
m, n = Jop.shape
|
||||
|
||||
def matvec(x):
|
||||
x_free = x.ravel().copy()
|
||||
x_free[active_set] = 0
|
||||
return Jop.matvec(x * d)
|
||||
|
||||
def rmatvec(x):
|
||||
r = d * Jop.rmatvec(x)
|
||||
r[active_set] = 0
|
||||
return r
|
||||
|
||||
return LinearOperator((m, n), matvec=matvec, rmatvec=rmatvec, dtype=float)
|
||||
|
||||
|
||||
def find_intersection(x, tr_bounds, lb, ub):
|
||||
"""Find intersection of trust-region bounds and initial bounds.
|
||||
|
||||
Returns
|
||||
-------
|
||||
lb_total, ub_total : ndarray with shape of x
|
||||
Lower and upper bounds of the intersection region.
|
||||
orig_l, orig_u : ndarray of bool with shape of x
|
||||
True means that an original bound is taken as a corresponding bound
|
||||
in the intersection region.
|
||||
tr_l, tr_u : ndarray of bool with shape of x
|
||||
True means that a trust-region bound is taken as a corresponding bound
|
||||
in the intersection region.
|
||||
"""
|
||||
lb_centered = lb - x
|
||||
ub_centered = ub - x
|
||||
|
||||
lb_total = np.maximum(lb_centered, -tr_bounds)
|
||||
ub_total = np.minimum(ub_centered, tr_bounds)
|
||||
|
||||
orig_l = np.equal(lb_total, lb_centered)
|
||||
orig_u = np.equal(ub_total, ub_centered)
|
||||
|
||||
tr_l = np.equal(lb_total, -tr_bounds)
|
||||
tr_u = np.equal(ub_total, tr_bounds)
|
||||
|
||||
return lb_total, ub_total, orig_l, orig_u, tr_l, tr_u
|
||||
|
||||
|
||||
def dogleg_step(x, newton_step, g, a, b, tr_bounds, lb, ub):
|
||||
"""Find dogleg step in a rectangular region.
|
||||
|
||||
Returns
|
||||
-------
|
||||
step : ndarray, shape (n,)
|
||||
Computed dogleg step.
|
||||
bound_hits : ndarray of int, shape (n,)
|
||||
Each component shows whether a corresponding variable hits the
|
||||
initial bound after the step is taken:
|
||||
* 0 - a variable doesn't hit the bound.
|
||||
* -1 - lower bound is hit.
|
||||
* 1 - upper bound is hit.
|
||||
tr_hit : bool
|
||||
Whether the step hit the boundary of the trust-region.
|
||||
"""
|
||||
lb_total, ub_total, orig_l, orig_u, tr_l, tr_u = find_intersection(
|
||||
x, tr_bounds, lb, ub
|
||||
)
|
||||
bound_hits = np.zeros_like(x, dtype=int)
|
||||
|
||||
if in_bounds(newton_step, lb_total, ub_total):
|
||||
return newton_step, bound_hits, False
|
||||
|
||||
to_bounds, _ = step_size_to_bound(np.zeros_like(x), -g, lb_total, ub_total)
|
||||
|
||||
# The classical dogleg algorithm would check if Cauchy step fits into
|
||||
# the bounds, and just return it constrained version if not. But in a
|
||||
# rectangular trust region it makes sense to try to improve constrained
|
||||
# Cauchy step too. Thus, we don't distinguish these two cases.
|
||||
|
||||
cauchy_step = -minimize_quadratic_1d(a, b, 0, to_bounds)[0] * g
|
||||
|
||||
step_diff = newton_step - cauchy_step
|
||||
step_size, hits = step_size_to_bound(cauchy_step, step_diff,
|
||||
lb_total, ub_total)
|
||||
bound_hits[(hits < 0) & orig_l] = -1
|
||||
bound_hits[(hits > 0) & orig_u] = 1
|
||||
tr_hit = np.any((hits < 0) & tr_l | (hits > 0) & tr_u)
|
||||
|
||||
return cauchy_step + step_size * step_diff, bound_hits, tr_hit
|
||||
|
||||
|
||||
def dogbox(fun, jac, x0, f0, J0, lb, ub, ftol, xtol, gtol, max_nfev, x_scale,
|
||||
loss_function, tr_solver, tr_options, verbose):
|
||||
f = f0
|
||||
f_true = f.copy()
|
||||
nfev = 1
|
||||
|
||||
J = J0
|
||||
njev = 1
|
||||
|
||||
if loss_function is not None:
|
||||
rho = loss_function(f)
|
||||
cost = 0.5 * np.sum(rho[0])
|
||||
J, f = scale_for_robust_loss_function(J, f, rho)
|
||||
else:
|
||||
cost = 0.5 * np.dot(f, f)
|
||||
|
||||
g = compute_grad(J, f)
|
||||
|
||||
jac_scale = isinstance(x_scale, str) and x_scale == 'jac'
|
||||
if jac_scale:
|
||||
scale, scale_inv = compute_jac_scale(J)
|
||||
else:
|
||||
scale, scale_inv = x_scale, 1 / x_scale
|
||||
|
||||
Delta = norm(x0 * scale_inv, ord=np.inf)
|
||||
if Delta == 0:
|
||||
Delta = 1.0
|
||||
|
||||
on_bound = np.zeros_like(x0, dtype=int)
|
||||
on_bound[np.equal(x0, lb)] = -1
|
||||
on_bound[np.equal(x0, ub)] = 1
|
||||
|
||||
x = x0
|
||||
step = np.empty_like(x0)
|
||||
|
||||
if max_nfev is None:
|
||||
max_nfev = x0.size * 100
|
||||
|
||||
termination_status = None
|
||||
iteration = 0
|
||||
step_norm = None
|
||||
actual_reduction = None
|
||||
|
||||
if verbose == 2:
|
||||
print_header_nonlinear()
|
||||
|
||||
while True:
|
||||
active_set = on_bound * g < 0
|
||||
free_set = ~active_set
|
||||
|
||||
g_free = g[free_set]
|
||||
g_full = g.copy()
|
||||
g[active_set] = 0
|
||||
|
||||
g_norm = norm(g, ord=np.inf)
|
||||
if g_norm < gtol:
|
||||
termination_status = 1
|
||||
|
||||
if verbose == 2:
|
||||
print_iteration_nonlinear(iteration, nfev, cost, actual_reduction,
|
||||
step_norm, g_norm)
|
||||
|
||||
if termination_status is not None or nfev == max_nfev:
|
||||
break
|
||||
|
||||
x_free = x[free_set]
|
||||
lb_free = lb[free_set]
|
||||
ub_free = ub[free_set]
|
||||
scale_free = scale[free_set]
|
||||
|
||||
# Compute (Gauss-)Newton and build quadratic model for Cauchy step.
|
||||
if tr_solver == 'exact':
|
||||
J_free = J[:, free_set]
|
||||
newton_step = lstsq(J_free, -f, rcond=-1)[0]
|
||||
|
||||
# Coefficients for the quadratic model along the anti-gradient.
|
||||
a, b = build_quadratic_1d(J_free, g_free, -g_free)
|
||||
elif tr_solver == 'lsmr':
|
||||
Jop = aslinearoperator(J)
|
||||
|
||||
# We compute lsmr step in scaled variables and then
|
||||
# transform back to normal variables, if lsmr would give exact lsq
|
||||
# solution, this would be equivalent to not doing any
|
||||
# transformations, but from experience it's better this way.
|
||||
|
||||
# We pass active_set to make computations as if we selected
|
||||
# the free subset of J columns, but without actually doing any
|
||||
# slicing, which is expensive for sparse matrices and impossible
|
||||
# for LinearOperator.
|
||||
|
||||
lsmr_op = lsmr_operator(Jop, scale, active_set)
|
||||
newton_step = -lsmr(lsmr_op, f, **tr_options)[0][free_set]
|
||||
newton_step *= scale_free
|
||||
|
||||
# Components of g for active variables were zeroed, so this call
|
||||
# is correct and equivalent to using J_free and g_free.
|
||||
a, b = build_quadratic_1d(Jop, g, -g)
|
||||
|
||||
actual_reduction = -1.0
|
||||
while actual_reduction <= 0 and nfev < max_nfev:
|
||||
tr_bounds = Delta * scale_free
|
||||
|
||||
step_free, on_bound_free, tr_hit = dogleg_step(
|
||||
x_free, newton_step, g_free, a, b, tr_bounds, lb_free, ub_free)
|
||||
|
||||
step.fill(0.0)
|
||||
step[free_set] = step_free
|
||||
|
||||
if tr_solver == 'exact':
|
||||
predicted_reduction = -evaluate_quadratic(J_free, g_free,
|
||||
step_free)
|
||||
elif tr_solver == 'lsmr':
|
||||
predicted_reduction = -evaluate_quadratic(Jop, g, step)
|
||||
|
||||
# gh11403 ensure that solution is fully within bounds.
|
||||
x_new = np.clip(x + step, lb, ub)
|
||||
|
||||
f_new = fun(x_new)
|
||||
nfev += 1
|
||||
|
||||
step_h_norm = norm(step * scale_inv, ord=np.inf)
|
||||
|
||||
if not np.all(np.isfinite(f_new)):
|
||||
Delta = 0.25 * step_h_norm
|
||||
continue
|
||||
|
||||
# Usual trust-region step quality estimation.
|
||||
if loss_function is not None:
|
||||
cost_new = loss_function(f_new, cost_only=True)
|
||||
else:
|
||||
cost_new = 0.5 * np.dot(f_new, f_new)
|
||||
actual_reduction = cost - cost_new
|
||||
|
||||
Delta, ratio = update_tr_radius(
|
||||
Delta, actual_reduction, predicted_reduction,
|
||||
step_h_norm, tr_hit
|
||||
)
|
||||
|
||||
step_norm = norm(step)
|
||||
termination_status = check_termination(
|
||||
actual_reduction, cost, step_norm, norm(x), ratio, ftol, xtol)
|
||||
|
||||
if termination_status is not None:
|
||||
break
|
||||
|
||||
if actual_reduction > 0:
|
||||
on_bound[free_set] = on_bound_free
|
||||
|
||||
x = x_new
|
||||
# Set variables exactly at the boundary.
|
||||
mask = on_bound == -1
|
||||
x[mask] = lb[mask]
|
||||
mask = on_bound == 1
|
||||
x[mask] = ub[mask]
|
||||
|
||||
f = f_new
|
||||
f_true = f.copy()
|
||||
|
||||
cost = cost_new
|
||||
|
||||
J = jac(x, f)
|
||||
njev += 1
|
||||
|
||||
if loss_function is not None:
|
||||
rho = loss_function(f)
|
||||
J, f = scale_for_robust_loss_function(J, f, rho)
|
||||
|
||||
g = compute_grad(J, f)
|
||||
|
||||
if jac_scale:
|
||||
scale, scale_inv = compute_jac_scale(J, scale_inv)
|
||||
else:
|
||||
step_norm = 0
|
||||
actual_reduction = 0
|
||||
|
||||
iteration += 1
|
||||
|
||||
if termination_status is None:
|
||||
termination_status = 0
|
||||
|
||||
return OptimizeResult(
|
||||
x=x, cost=cost, fun=f_true, jac=J, grad=g_full, optimality=g_norm,
|
||||
active_mask=on_bound, nfev=nfev, njev=njev, status=termination_status)
|
||||
Binary file not shown.
@ -0,0 +1,967 @@
|
||||
"""Generic interface for least-squares minimization."""
|
||||
from warnings import warn
|
||||
|
||||
import numpy as np
|
||||
from numpy.linalg import norm
|
||||
|
||||
from scipy.sparse import issparse
|
||||
from scipy.sparse.linalg import LinearOperator
|
||||
from scipy.optimize import _minpack, OptimizeResult
|
||||
from scipy.optimize._numdiff import approx_derivative, group_columns
|
||||
from scipy.optimize._minimize import Bounds
|
||||
|
||||
from .trf import trf
|
||||
from .dogbox import dogbox
|
||||
from .common import EPS, in_bounds, make_strictly_feasible
|
||||
|
||||
|
||||
TERMINATION_MESSAGES = {
|
||||
-1: "Improper input parameters status returned from `leastsq`",
|
||||
0: "The maximum number of function evaluations is exceeded.",
|
||||
1: "`gtol` termination condition is satisfied.",
|
||||
2: "`ftol` termination condition is satisfied.",
|
||||
3: "`xtol` termination condition is satisfied.",
|
||||
4: "Both `ftol` and `xtol` termination conditions are satisfied."
|
||||
}
|
||||
|
||||
|
||||
FROM_MINPACK_TO_COMMON = {
|
||||
0: -1, # Improper input parameters from MINPACK.
|
||||
1: 2,
|
||||
2: 3,
|
||||
3: 4,
|
||||
4: 1,
|
||||
5: 0
|
||||
# There are 6, 7, 8 for too small tolerance parameters,
|
||||
# but we guard against it by checking ftol, xtol, gtol beforehand.
|
||||
}
|
||||
|
||||
|
||||
def call_minpack(fun, x0, jac, ftol, xtol, gtol, max_nfev, x_scale, diff_step):
|
||||
n = x0.size
|
||||
|
||||
if diff_step is None:
|
||||
epsfcn = EPS
|
||||
else:
|
||||
epsfcn = diff_step**2
|
||||
|
||||
# Compute MINPACK's `diag`, which is inverse of our `x_scale` and
|
||||
# ``x_scale='jac'`` corresponds to ``diag=None``.
|
||||
if isinstance(x_scale, str) and x_scale == 'jac':
|
||||
diag = None
|
||||
else:
|
||||
diag = 1 / x_scale
|
||||
|
||||
full_output = True
|
||||
col_deriv = False
|
||||
factor = 100.0
|
||||
|
||||
if jac is None:
|
||||
if max_nfev is None:
|
||||
# n squared to account for Jacobian evaluations.
|
||||
max_nfev = 100 * n * (n + 1)
|
||||
x, info, status = _minpack._lmdif(
|
||||
fun, x0, (), full_output, ftol, xtol, gtol,
|
||||
max_nfev, epsfcn, factor, diag)
|
||||
else:
|
||||
if max_nfev is None:
|
||||
max_nfev = 100 * n
|
||||
x, info, status = _minpack._lmder(
|
||||
fun, jac, x0, (), full_output, col_deriv,
|
||||
ftol, xtol, gtol, max_nfev, factor, diag)
|
||||
|
||||
f = info['fvec']
|
||||
|
||||
if callable(jac):
|
||||
J = jac(x)
|
||||
else:
|
||||
J = np.atleast_2d(approx_derivative(fun, x))
|
||||
|
||||
cost = 0.5 * np.dot(f, f)
|
||||
g = J.T.dot(f)
|
||||
g_norm = norm(g, ord=np.inf)
|
||||
|
||||
nfev = info['nfev']
|
||||
njev = info.get('njev', None)
|
||||
|
||||
status = FROM_MINPACK_TO_COMMON[status]
|
||||
active_mask = np.zeros_like(x0, dtype=int)
|
||||
|
||||
return OptimizeResult(
|
||||
x=x, cost=cost, fun=f, jac=J, grad=g, optimality=g_norm,
|
||||
active_mask=active_mask, nfev=nfev, njev=njev, status=status)
|
||||
|
||||
|
||||
def prepare_bounds(bounds, n):
|
||||
lb, ub = (np.asarray(b, dtype=float) for b in bounds)
|
||||
if lb.ndim == 0:
|
||||
lb = np.resize(lb, n)
|
||||
|
||||
if ub.ndim == 0:
|
||||
ub = np.resize(ub, n)
|
||||
|
||||
return lb, ub
|
||||
|
||||
|
||||
def check_tolerance(ftol, xtol, gtol, method):
|
||||
def check(tol, name):
|
||||
if tol is None:
|
||||
tol = 0
|
||||
elif tol < EPS:
|
||||
warn(f"Setting `{name}` below the machine epsilon ({EPS:.2e}) effectively "
|
||||
f"disables the corresponding termination condition.",
|
||||
stacklevel=3)
|
||||
return tol
|
||||
|
||||
ftol = check(ftol, "ftol")
|
||||
xtol = check(xtol, "xtol")
|
||||
gtol = check(gtol, "gtol")
|
||||
|
||||
if method == "lm" and (ftol < EPS or xtol < EPS or gtol < EPS):
|
||||
raise ValueError("All tolerances must be higher than machine epsilon "
|
||||
f"({EPS:.2e}) for method 'lm'.")
|
||||
elif ftol < EPS and xtol < EPS and gtol < EPS:
|
||||
raise ValueError("At least one of the tolerances must be higher than "
|
||||
f"machine epsilon ({EPS:.2e}).")
|
||||
|
||||
return ftol, xtol, gtol
|
||||
|
||||
|
||||
def check_x_scale(x_scale, x0):
|
||||
if isinstance(x_scale, str) and x_scale == 'jac':
|
||||
return x_scale
|
||||
|
||||
try:
|
||||
x_scale = np.asarray(x_scale, dtype=float)
|
||||
valid = np.all(np.isfinite(x_scale)) and np.all(x_scale > 0)
|
||||
except (ValueError, TypeError):
|
||||
valid = False
|
||||
|
||||
if not valid:
|
||||
raise ValueError("`x_scale` must be 'jac' or array_like with "
|
||||
"positive numbers.")
|
||||
|
||||
if x_scale.ndim == 0:
|
||||
x_scale = np.resize(x_scale, x0.shape)
|
||||
|
||||
if x_scale.shape != x0.shape:
|
||||
raise ValueError("Inconsistent shapes between `x_scale` and `x0`.")
|
||||
|
||||
return x_scale
|
||||
|
||||
|
||||
def check_jac_sparsity(jac_sparsity, m, n):
|
||||
if jac_sparsity is None:
|
||||
return None
|
||||
|
||||
if not issparse(jac_sparsity):
|
||||
jac_sparsity = np.atleast_2d(jac_sparsity)
|
||||
|
||||
if jac_sparsity.shape != (m, n):
|
||||
raise ValueError("`jac_sparsity` has wrong shape.")
|
||||
|
||||
return jac_sparsity, group_columns(jac_sparsity)
|
||||
|
||||
|
||||
# Loss functions.
|
||||
|
||||
|
||||
def huber(z, rho, cost_only):
|
||||
mask = z <= 1
|
||||
rho[0, mask] = z[mask]
|
||||
rho[0, ~mask] = 2 * z[~mask]**0.5 - 1
|
||||
if cost_only:
|
||||
return
|
||||
rho[1, mask] = 1
|
||||
rho[1, ~mask] = z[~mask]**-0.5
|
||||
rho[2, mask] = 0
|
||||
rho[2, ~mask] = -0.5 * z[~mask]**-1.5
|
||||
|
||||
|
||||
def soft_l1(z, rho, cost_only):
|
||||
t = 1 + z
|
||||
rho[0] = 2 * (t**0.5 - 1)
|
||||
if cost_only:
|
||||
return
|
||||
rho[1] = t**-0.5
|
||||
rho[2] = -0.5 * t**-1.5
|
||||
|
||||
|
||||
def cauchy(z, rho, cost_only):
|
||||
rho[0] = np.log1p(z)
|
||||
if cost_only:
|
||||
return
|
||||
t = 1 + z
|
||||
rho[1] = 1 / t
|
||||
rho[2] = -1 / t**2
|
||||
|
||||
|
||||
def arctan(z, rho, cost_only):
|
||||
rho[0] = np.arctan(z)
|
||||
if cost_only:
|
||||
return
|
||||
t = 1 + z**2
|
||||
rho[1] = 1 / t
|
||||
rho[2] = -2 * z / t**2
|
||||
|
||||
|
||||
IMPLEMENTED_LOSSES = dict(linear=None, huber=huber, soft_l1=soft_l1,
|
||||
cauchy=cauchy, arctan=arctan)
|
||||
|
||||
|
||||
def construct_loss_function(m, loss, f_scale):
|
||||
if loss == 'linear':
|
||||
return None
|
||||
|
||||
if not callable(loss):
|
||||
loss = IMPLEMENTED_LOSSES[loss]
|
||||
rho = np.empty((3, m))
|
||||
|
||||
def loss_function(f, cost_only=False):
|
||||
z = (f / f_scale) ** 2
|
||||
loss(z, rho, cost_only=cost_only)
|
||||
if cost_only:
|
||||
return 0.5 * f_scale ** 2 * np.sum(rho[0])
|
||||
rho[0] *= f_scale ** 2
|
||||
rho[2] /= f_scale ** 2
|
||||
return rho
|
||||
else:
|
||||
def loss_function(f, cost_only=False):
|
||||
z = (f / f_scale) ** 2
|
||||
rho = loss(z)
|
||||
if cost_only:
|
||||
return 0.5 * f_scale ** 2 * np.sum(rho[0])
|
||||
rho[0] *= f_scale ** 2
|
||||
rho[2] /= f_scale ** 2
|
||||
return rho
|
||||
|
||||
return loss_function
|
||||
|
||||
|
||||
def least_squares(
|
||||
fun, x0, jac='2-point', bounds=(-np.inf, np.inf), method='trf',
|
||||
ftol=1e-8, xtol=1e-8, gtol=1e-8, x_scale=1.0, loss='linear',
|
||||
f_scale=1.0, diff_step=None, tr_solver=None, tr_options={},
|
||||
jac_sparsity=None, max_nfev=None, verbose=0, args=(), kwargs={}):
|
||||
"""Solve a nonlinear least-squares problem with bounds on the variables.
|
||||
|
||||
Given the residuals f(x) (an m-D real function of n real
|
||||
variables) and the loss function rho(s) (a scalar function), `least_squares`
|
||||
finds a local minimum of the cost function F(x)::
|
||||
|
||||
minimize F(x) = 0.5 * sum(rho(f_i(x)**2), i = 0, ..., m - 1)
|
||||
subject to lb <= x <= ub
|
||||
|
||||
The purpose of the loss function rho(s) is to reduce the influence of
|
||||
outliers on the solution.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
fun : callable
|
||||
Function which computes the vector of residuals, with the signature
|
||||
``fun(x, *args, **kwargs)``, i.e., the minimization proceeds with
|
||||
respect to its first argument. The argument ``x`` passed to this
|
||||
function is an ndarray of shape (n,) (never a scalar, even for n=1).
|
||||
It must allocate and return a 1-D array_like of shape (m,) or a scalar.
|
||||
If the argument ``x`` is complex or the function ``fun`` returns
|
||||
complex residuals, it must be wrapped in a real function of real
|
||||
arguments, as shown at the end of the Examples section.
|
||||
x0 : array_like with shape (n,) or float
|
||||
Initial guess on independent variables. If float, it will be treated
|
||||
as a 1-D array with one element. When `method` is 'trf', the initial
|
||||
guess might be slightly adjusted to lie sufficiently within the given
|
||||
`bounds`.
|
||||
jac : {'2-point', '3-point', 'cs', callable}, optional
|
||||
Method of computing the Jacobian matrix (an m-by-n matrix, where
|
||||
element (i, j) is the partial derivative of f[i] with respect to
|
||||
x[j]). The keywords select a finite difference scheme for numerical
|
||||
estimation. The scheme '3-point' is more accurate, but requires
|
||||
twice as many operations as '2-point' (default). The scheme 'cs'
|
||||
uses complex steps, and while potentially the most accurate, it is
|
||||
applicable only when `fun` correctly handles complex inputs and
|
||||
can be analytically continued to the complex plane. Method 'lm'
|
||||
always uses the '2-point' scheme. If callable, it is used as
|
||||
``jac(x, *args, **kwargs)`` and should return a good approximation
|
||||
(or the exact value) for the Jacobian as an array_like (np.atleast_2d
|
||||
is applied), a sparse matrix (csr_matrix preferred for performance) or
|
||||
a `scipy.sparse.linalg.LinearOperator`.
|
||||
bounds : 2-tuple of array_like or `Bounds`, optional
|
||||
There are two ways to specify bounds:
|
||||
|
||||
1. Instance of `Bounds` class
|
||||
2. Lower and upper bounds on independent variables. Defaults to no
|
||||
bounds. Each array must match the size of `x0` or be a scalar,
|
||||
in the latter case a bound will be the same for all variables.
|
||||
Use ``np.inf`` with an appropriate sign to disable bounds on all
|
||||
or some variables.
|
||||
method : {'trf', 'dogbox', 'lm'}, optional
|
||||
Algorithm to perform minimization.
|
||||
|
||||
* 'trf' : Trust Region Reflective algorithm, particularly suitable
|
||||
for large sparse problems with bounds. Generally robust method.
|
||||
* 'dogbox' : dogleg algorithm with rectangular trust regions,
|
||||
typical use case is small problems with bounds. Not recommended
|
||||
for problems with rank-deficient Jacobian.
|
||||
* 'lm' : Levenberg-Marquardt algorithm as implemented in MINPACK.
|
||||
Doesn't handle bounds and sparse Jacobians. Usually the most
|
||||
efficient method for small unconstrained problems.
|
||||
|
||||
Default is 'trf'. See Notes for more information.
|
||||
ftol : float or None, optional
|
||||
Tolerance for termination by the change of the cost function. Default
|
||||
is 1e-8. The optimization process is stopped when ``dF < ftol * F``,
|
||||
and there was an adequate agreement between a local quadratic model and
|
||||
the true model in the last step.
|
||||
|
||||
If None and 'method' is not 'lm', the termination by this condition is
|
||||
disabled. If 'method' is 'lm', this tolerance must be higher than
|
||||
machine epsilon.
|
||||
xtol : float or None, optional
|
||||
Tolerance for termination by the change of the independent variables.
|
||||
Default is 1e-8. The exact condition depends on the `method` used:
|
||||
|
||||
* For 'trf' and 'dogbox' : ``norm(dx) < xtol * (xtol + norm(x))``.
|
||||
* For 'lm' : ``Delta < xtol * norm(xs)``, where ``Delta`` is
|
||||
a trust-region radius and ``xs`` is the value of ``x``
|
||||
scaled according to `x_scale` parameter (see below).
|
||||
|
||||
If None and 'method' is not 'lm', the termination by this condition is
|
||||
disabled. If 'method' is 'lm', this tolerance must be higher than
|
||||
machine epsilon.
|
||||
gtol : float or None, optional
|
||||
Tolerance for termination by the norm of the gradient. Default is 1e-8.
|
||||
The exact condition depends on a `method` used:
|
||||
|
||||
* For 'trf' : ``norm(g_scaled, ord=np.inf) < gtol``, where
|
||||
``g_scaled`` is the value of the gradient scaled to account for
|
||||
the presence of the bounds [STIR]_.
|
||||
* For 'dogbox' : ``norm(g_free, ord=np.inf) < gtol``, where
|
||||
``g_free`` is the gradient with respect to the variables which
|
||||
are not in the optimal state on the boundary.
|
||||
* For 'lm' : the maximum absolute value of the cosine of angles
|
||||
between columns of the Jacobian and the residual vector is less
|
||||
than `gtol`, or the residual vector is zero.
|
||||
|
||||
If None and 'method' is not 'lm', the termination by this condition is
|
||||
disabled. If 'method' is 'lm', this tolerance must be higher than
|
||||
machine epsilon.
|
||||
x_scale : array_like or 'jac', optional
|
||||
Characteristic scale of each variable. Setting `x_scale` is equivalent
|
||||
to reformulating the problem in scaled variables ``xs = x / x_scale``.
|
||||
An alternative view is that the size of a trust region along jth
|
||||
dimension is proportional to ``x_scale[j]``. Improved convergence may
|
||||
be achieved by setting `x_scale` such that a step of a given size
|
||||
along any of the scaled variables has a similar effect on the cost
|
||||
function. If set to 'jac', the scale is iteratively updated using the
|
||||
inverse norms of the columns of the Jacobian matrix (as described in
|
||||
[JJMore]_).
|
||||
loss : str or callable, optional
|
||||
Determines the loss function. The following keyword values are allowed:
|
||||
|
||||
* 'linear' (default) : ``rho(z) = z``. Gives a standard
|
||||
least-squares problem.
|
||||
* 'soft_l1' : ``rho(z) = 2 * ((1 + z)**0.5 - 1)``. The smooth
|
||||
approximation of l1 (absolute value) loss. Usually a good
|
||||
choice for robust least squares.
|
||||
* 'huber' : ``rho(z) = z if z <= 1 else 2*z**0.5 - 1``. Works
|
||||
similarly to 'soft_l1'.
|
||||
* 'cauchy' : ``rho(z) = ln(1 + z)``. Severely weakens outliers
|
||||
influence, but may cause difficulties in optimization process.
|
||||
* 'arctan' : ``rho(z) = arctan(z)``. Limits a maximum loss on
|
||||
a single residual, has properties similar to 'cauchy'.
|
||||
|
||||
If callable, it must take a 1-D ndarray ``z=f**2`` and return an
|
||||
array_like with shape (3, m) where row 0 contains function values,
|
||||
row 1 contains first derivatives and row 2 contains second
|
||||
derivatives. Method 'lm' supports only 'linear' loss.
|
||||
f_scale : float, optional
|
||||
Value of soft margin between inlier and outlier residuals, default
|
||||
is 1.0. The loss function is evaluated as follows
|
||||
``rho_(f**2) = C**2 * rho(f**2 / C**2)``, where ``C`` is `f_scale`,
|
||||
and ``rho`` is determined by `loss` parameter. This parameter has
|
||||
no effect with ``loss='linear'``, but for other `loss` values it is
|
||||
of crucial importance.
|
||||
max_nfev : None or int, optional
|
||||
Maximum number of function evaluations before the termination.
|
||||
If None (default), the value is chosen automatically:
|
||||
|
||||
* For 'trf' and 'dogbox' : 100 * n.
|
||||
* For 'lm' : 100 * n if `jac` is callable and 100 * n * (n + 1)
|
||||
otherwise (because 'lm' counts function calls in Jacobian
|
||||
estimation).
|
||||
|
||||
diff_step : None or array_like, optional
|
||||
Determines the relative step size for the finite difference
|
||||
approximation of the Jacobian. The actual step is computed as
|
||||
``x * diff_step``. If None (default), then `diff_step` is taken to be
|
||||
a conventional "optimal" power of machine epsilon for the finite
|
||||
difference scheme used [NR]_.
|
||||
tr_solver : {None, 'exact', 'lsmr'}, optional
|
||||
Method for solving trust-region subproblems, relevant only for 'trf'
|
||||
and 'dogbox' methods.
|
||||
|
||||
* 'exact' is suitable for not very large problems with dense
|
||||
Jacobian matrices. The computational complexity per iteration is
|
||||
comparable to a singular value decomposition of the Jacobian
|
||||
matrix.
|
||||
* 'lsmr' is suitable for problems with sparse and large Jacobian
|
||||
matrices. It uses the iterative procedure
|
||||
`scipy.sparse.linalg.lsmr` for finding a solution of a linear
|
||||
least-squares problem and only requires matrix-vector product
|
||||
evaluations.
|
||||
|
||||
If None (default), the solver is chosen based on the type of Jacobian
|
||||
returned on the first iteration.
|
||||
tr_options : dict, optional
|
||||
Keyword options passed to trust-region solver.
|
||||
|
||||
* ``tr_solver='exact'``: `tr_options` are ignored.
|
||||
* ``tr_solver='lsmr'``: options for `scipy.sparse.linalg.lsmr`.
|
||||
Additionally, ``method='trf'`` supports 'regularize' option
|
||||
(bool, default is True), which adds a regularization term to the
|
||||
normal equation, which improves convergence if the Jacobian is
|
||||
rank-deficient [Byrd]_ (eq. 3.4).
|
||||
|
||||
jac_sparsity : {None, array_like, sparse matrix}, optional
|
||||
Defines the sparsity structure of the Jacobian matrix for finite
|
||||
difference estimation, its shape must be (m, n). If the Jacobian has
|
||||
only few non-zero elements in *each* row, providing the sparsity
|
||||
structure will greatly speed up the computations [Curtis]_. A zero
|
||||
entry means that a corresponding element in the Jacobian is identically
|
||||
zero. If provided, forces the use of 'lsmr' trust-region solver.
|
||||
If None (default), then dense differencing will be used. Has no effect
|
||||
for 'lm' method.
|
||||
verbose : {0, 1, 2}, optional
|
||||
Level of algorithm's verbosity:
|
||||
|
||||
* 0 (default) : work silently.
|
||||
* 1 : display a termination report.
|
||||
* 2 : display progress during iterations (not supported by 'lm'
|
||||
method).
|
||||
|
||||
args, kwargs : tuple and dict, optional
|
||||
Additional arguments passed to `fun` and `jac`. Both empty by default.
|
||||
The calling signature is ``fun(x, *args, **kwargs)`` and the same for
|
||||
`jac`.
|
||||
|
||||
Returns
|
||||
-------
|
||||
result : OptimizeResult
|
||||
`OptimizeResult` with the following fields defined:
|
||||
|
||||
x : ndarray, shape (n,)
|
||||
Solution found.
|
||||
cost : float
|
||||
Value of the cost function at the solution.
|
||||
fun : ndarray, shape (m,)
|
||||
Vector of residuals at the solution.
|
||||
jac : ndarray, sparse matrix or LinearOperator, shape (m, n)
|
||||
Modified Jacobian matrix at the solution, in the sense that J^T J
|
||||
is a Gauss-Newton approximation of the Hessian of the cost function.
|
||||
The type is the same as the one used by the algorithm.
|
||||
grad : ndarray, shape (m,)
|
||||
Gradient of the cost function at the solution.
|
||||
optimality : float
|
||||
First-order optimality measure. In unconstrained problems, it is
|
||||
always the uniform norm of the gradient. In constrained problems,
|
||||
it is the quantity which was compared with `gtol` during iterations.
|
||||
active_mask : ndarray of int, shape (n,)
|
||||
Each component shows whether a corresponding constraint is active
|
||||
(that is, whether a variable is at the bound):
|
||||
|
||||
* 0 : a constraint is not active.
|
||||
* -1 : a lower bound is active.
|
||||
* 1 : an upper bound is active.
|
||||
|
||||
Might be somewhat arbitrary for 'trf' method as it generates a
|
||||
sequence of strictly feasible iterates and `active_mask` is
|
||||
determined within a tolerance threshold.
|
||||
nfev : int
|
||||
Number of function evaluations done. Methods 'trf' and 'dogbox' do
|
||||
not count function calls for numerical Jacobian approximation, as
|
||||
opposed to 'lm' method.
|
||||
njev : int or None
|
||||
Number of Jacobian evaluations done. If numerical Jacobian
|
||||
approximation is used in 'lm' method, it is set to None.
|
||||
status : int
|
||||
The reason for algorithm termination:
|
||||
|
||||
* -1 : improper input parameters status returned from MINPACK.
|
||||
* 0 : the maximum number of function evaluations is exceeded.
|
||||
* 1 : `gtol` termination condition is satisfied.
|
||||
* 2 : `ftol` termination condition is satisfied.
|
||||
* 3 : `xtol` termination condition is satisfied.
|
||||
* 4 : Both `ftol` and `xtol` termination conditions are satisfied.
|
||||
|
||||
message : str
|
||||
Verbal description of the termination reason.
|
||||
success : bool
|
||||
True if one of the convergence criteria is satisfied (`status` > 0).
|
||||
|
||||
See Also
|
||||
--------
|
||||
leastsq : A legacy wrapper for the MINPACK implementation of the
|
||||
Levenberg-Marquadt algorithm.
|
||||
curve_fit : Least-squares minimization applied to a curve-fitting problem.
|
||||
|
||||
Notes
|
||||
-----
|
||||
Method 'lm' (Levenberg-Marquardt) calls a wrapper over least-squares
|
||||
algorithms implemented in MINPACK (lmder, lmdif). It runs the
|
||||
Levenberg-Marquardt algorithm formulated as a trust-region type algorithm.
|
||||
The implementation is based on paper [JJMore]_, it is very robust and
|
||||
efficient with a lot of smart tricks. It should be your first choice
|
||||
for unconstrained problems. Note that it doesn't support bounds. Also,
|
||||
it doesn't work when m < n.
|
||||
|
||||
Method 'trf' (Trust Region Reflective) is motivated by the process of
|
||||
solving a system of equations, which constitute the first-order optimality
|
||||
condition for a bound-constrained minimization problem as formulated in
|
||||
[STIR]_. The algorithm iteratively solves trust-region subproblems
|
||||
augmented by a special diagonal quadratic term and with trust-region shape
|
||||
determined by the distance from the bounds and the direction of the
|
||||
gradient. This enhancements help to avoid making steps directly into bounds
|
||||
and efficiently explore the whole space of variables. To further improve
|
||||
convergence, the algorithm considers search directions reflected from the
|
||||
bounds. To obey theoretical requirements, the algorithm keeps iterates
|
||||
strictly feasible. With dense Jacobians trust-region subproblems are
|
||||
solved by an exact method very similar to the one described in [JJMore]_
|
||||
(and implemented in MINPACK). The difference from the MINPACK
|
||||
implementation is that a singular value decomposition of a Jacobian
|
||||
matrix is done once per iteration, instead of a QR decomposition and series
|
||||
of Givens rotation eliminations. For large sparse Jacobians a 2-D subspace
|
||||
approach of solving trust-region subproblems is used [STIR]_, [Byrd]_.
|
||||
The subspace is spanned by a scaled gradient and an approximate
|
||||
Gauss-Newton solution delivered by `scipy.sparse.linalg.lsmr`. When no
|
||||
constraints are imposed the algorithm is very similar to MINPACK and has
|
||||
generally comparable performance. The algorithm works quite robust in
|
||||
unbounded and bounded problems, thus it is chosen as a default algorithm.
|
||||
|
||||
Method 'dogbox' operates in a trust-region framework, but considers
|
||||
rectangular trust regions as opposed to conventional ellipsoids [Voglis]_.
|
||||
The intersection of a current trust region and initial bounds is again
|
||||
rectangular, so on each iteration a quadratic minimization problem subject
|
||||
to bound constraints is solved approximately by Powell's dogleg method
|
||||
[NumOpt]_. The required Gauss-Newton step can be computed exactly for
|
||||
dense Jacobians or approximately by `scipy.sparse.linalg.lsmr` for large
|
||||
sparse Jacobians. The algorithm is likely to exhibit slow convergence when
|
||||
the rank of Jacobian is less than the number of variables. The algorithm
|
||||
often outperforms 'trf' in bounded problems with a small number of
|
||||
variables.
|
||||
|
||||
Robust loss functions are implemented as described in [BA]_. The idea
|
||||
is to modify a residual vector and a Jacobian matrix on each iteration
|
||||
such that computed gradient and Gauss-Newton Hessian approximation match
|
||||
the true gradient and Hessian approximation of the cost function. Then
|
||||
the algorithm proceeds in a normal way, i.e., robust loss functions are
|
||||
implemented as a simple wrapper over standard least-squares algorithms.
|
||||
|
||||
.. versionadded:: 0.17.0
|
||||
|
||||
References
|
||||
----------
|
||||
.. [STIR] M. A. Branch, T. F. Coleman, and Y. Li, "A Subspace, Interior,
|
||||
and Conjugate Gradient Method for Large-Scale Bound-Constrained
|
||||
Minimization Problems," SIAM Journal on Scientific Computing,
|
||||
Vol. 21, Number 1, pp 1-23, 1999.
|
||||
.. [NR] William H. Press et. al., "Numerical Recipes. The Art of Scientific
|
||||
Computing. 3rd edition", Sec. 5.7.
|
||||
.. [Byrd] R. H. Byrd, R. B. Schnabel and G. A. Shultz, "Approximate
|
||||
solution of the trust region problem by minimization over
|
||||
two-dimensional subspaces", Math. Programming, 40, pp. 247-263,
|
||||
1988.
|
||||
.. [Curtis] A. Curtis, M. J. D. Powell, and J. Reid, "On the estimation of
|
||||
sparse Jacobian matrices", Journal of the Institute of
|
||||
Mathematics and its Applications, 13, pp. 117-120, 1974.
|
||||
.. [JJMore] J. J. More, "The Levenberg-Marquardt Algorithm: Implementation
|
||||
and Theory," Numerical Analysis, ed. G. A. Watson, Lecture
|
||||
Notes in Mathematics 630, Springer Verlag, pp. 105-116, 1977.
|
||||
.. [Voglis] C. Voglis and I. E. Lagaris, "A Rectangular Trust Region
|
||||
Dogleg Approach for Unconstrained and Bound Constrained
|
||||
Nonlinear Optimization", WSEAS International Conference on
|
||||
Applied Mathematics, Corfu, Greece, 2004.
|
||||
.. [NumOpt] J. Nocedal and S. J. Wright, "Numerical optimization,
|
||||
2nd edition", Chapter 4.
|
||||
.. [BA] B. Triggs et. al., "Bundle Adjustment - A Modern Synthesis",
|
||||
Proceedings of the International Workshop on Vision Algorithms:
|
||||
Theory and Practice, pp. 298-372, 1999.
|
||||
|
||||
Examples
|
||||
--------
|
||||
In this example we find a minimum of the Rosenbrock function without bounds
|
||||
on independent variables.
|
||||
|
||||
>>> import numpy as np
|
||||
>>> def fun_rosenbrock(x):
|
||||
... return np.array([10 * (x[1] - x[0]**2), (1 - x[0])])
|
||||
|
||||
Notice that we only provide the vector of the residuals. The algorithm
|
||||
constructs the cost function as a sum of squares of the residuals, which
|
||||
gives the Rosenbrock function. The exact minimum is at ``x = [1.0, 1.0]``.
|
||||
|
||||
>>> from scipy.optimize import least_squares
|
||||
>>> x0_rosenbrock = np.array([2, 2])
|
||||
>>> res_1 = least_squares(fun_rosenbrock, x0_rosenbrock)
|
||||
>>> res_1.x
|
||||
array([ 1., 1.])
|
||||
>>> res_1.cost
|
||||
9.8669242910846867e-30
|
||||
>>> res_1.optimality
|
||||
8.8928864934219529e-14
|
||||
|
||||
We now constrain the variables, in such a way that the previous solution
|
||||
becomes infeasible. Specifically, we require that ``x[1] >= 1.5``, and
|
||||
``x[0]`` left unconstrained. To this end, we specify the `bounds` parameter
|
||||
to `least_squares` in the form ``bounds=([-np.inf, 1.5], np.inf)``.
|
||||
|
||||
We also provide the analytic Jacobian:
|
||||
|
||||
>>> def jac_rosenbrock(x):
|
||||
... return np.array([
|
||||
... [-20 * x[0], 10],
|
||||
... [-1, 0]])
|
||||
|
||||
Putting this all together, we see that the new solution lies on the bound:
|
||||
|
||||
>>> res_2 = least_squares(fun_rosenbrock, x0_rosenbrock, jac_rosenbrock,
|
||||
... bounds=([-np.inf, 1.5], np.inf))
|
||||
>>> res_2.x
|
||||
array([ 1.22437075, 1.5 ])
|
||||
>>> res_2.cost
|
||||
0.025213093946805685
|
||||
>>> res_2.optimality
|
||||
1.5885401433157753e-07
|
||||
|
||||
Now we solve a system of equations (i.e., the cost function should be zero
|
||||
at a minimum) for a Broyden tridiagonal vector-valued function of 100000
|
||||
variables:
|
||||
|
||||
>>> def fun_broyden(x):
|
||||
... f = (3 - x) * x + 1
|
||||
... f[1:] -= x[:-1]
|
||||
... f[:-1] -= 2 * x[1:]
|
||||
... return f
|
||||
|
||||
The corresponding Jacobian matrix is sparse. We tell the algorithm to
|
||||
estimate it by finite differences and provide the sparsity structure of
|
||||
Jacobian to significantly speed up this process.
|
||||
|
||||
>>> from scipy.sparse import lil_matrix
|
||||
>>> def sparsity_broyden(n):
|
||||
... sparsity = lil_matrix((n, n), dtype=int)
|
||||
... i = np.arange(n)
|
||||
... sparsity[i, i] = 1
|
||||
... i = np.arange(1, n)
|
||||
... sparsity[i, i - 1] = 1
|
||||
... i = np.arange(n - 1)
|
||||
... sparsity[i, i + 1] = 1
|
||||
... return sparsity
|
||||
...
|
||||
>>> n = 100000
|
||||
>>> x0_broyden = -np.ones(n)
|
||||
...
|
||||
>>> res_3 = least_squares(fun_broyden, x0_broyden,
|
||||
... jac_sparsity=sparsity_broyden(n))
|
||||
>>> res_3.cost
|
||||
4.5687069299604613e-23
|
||||
>>> res_3.optimality
|
||||
1.1650454296851518e-11
|
||||
|
||||
Let's also solve a curve fitting problem using robust loss function to
|
||||
take care of outliers in the data. Define the model function as
|
||||
``y = a + b * exp(c * t)``, where t is a predictor variable, y is an
|
||||
observation and a, b, c are parameters to estimate.
|
||||
|
||||
First, define the function which generates the data with noise and
|
||||
outliers, define the model parameters, and generate data:
|
||||
|
||||
>>> from numpy.random import default_rng
|
||||
>>> rng = default_rng()
|
||||
>>> def gen_data(t, a, b, c, noise=0., n_outliers=0, seed=None):
|
||||
... rng = default_rng(seed)
|
||||
...
|
||||
... y = a + b * np.exp(t * c)
|
||||
...
|
||||
... error = noise * rng.standard_normal(t.size)
|
||||
... outliers = rng.integers(0, t.size, n_outliers)
|
||||
... error[outliers] *= 10
|
||||
...
|
||||
... return y + error
|
||||
...
|
||||
>>> a = 0.5
|
||||
>>> b = 2.0
|
||||
>>> c = -1
|
||||
>>> t_min = 0
|
||||
>>> t_max = 10
|
||||
>>> n_points = 15
|
||||
...
|
||||
>>> t_train = np.linspace(t_min, t_max, n_points)
|
||||
>>> y_train = gen_data(t_train, a, b, c, noise=0.1, n_outliers=3)
|
||||
|
||||
Define function for computing residuals and initial estimate of
|
||||
parameters.
|
||||
|
||||
>>> def fun(x, t, y):
|
||||
... return x[0] + x[1] * np.exp(x[2] * t) - y
|
||||
...
|
||||
>>> x0 = np.array([1.0, 1.0, 0.0])
|
||||
|
||||
Compute a standard least-squares solution:
|
||||
|
||||
>>> res_lsq = least_squares(fun, x0, args=(t_train, y_train))
|
||||
|
||||
Now compute two solutions with two different robust loss functions. The
|
||||
parameter `f_scale` is set to 0.1, meaning that inlier residuals should
|
||||
not significantly exceed 0.1 (the noise level used).
|
||||
|
||||
>>> res_soft_l1 = least_squares(fun, x0, loss='soft_l1', f_scale=0.1,
|
||||
... args=(t_train, y_train))
|
||||
>>> res_log = least_squares(fun, x0, loss='cauchy', f_scale=0.1,
|
||||
... args=(t_train, y_train))
|
||||
|
||||
And, finally, plot all the curves. We see that by selecting an appropriate
|
||||
`loss` we can get estimates close to optimal even in the presence of
|
||||
strong outliers. But keep in mind that generally it is recommended to try
|
||||
'soft_l1' or 'huber' losses first (if at all necessary) as the other two
|
||||
options may cause difficulties in optimization process.
|
||||
|
||||
>>> t_test = np.linspace(t_min, t_max, n_points * 10)
|
||||
>>> y_true = gen_data(t_test, a, b, c)
|
||||
>>> y_lsq = gen_data(t_test, *res_lsq.x)
|
||||
>>> y_soft_l1 = gen_data(t_test, *res_soft_l1.x)
|
||||
>>> y_log = gen_data(t_test, *res_log.x)
|
||||
...
|
||||
>>> import matplotlib.pyplot as plt
|
||||
>>> plt.plot(t_train, y_train, 'o')
|
||||
>>> plt.plot(t_test, y_true, 'k', linewidth=2, label='true')
|
||||
>>> plt.plot(t_test, y_lsq, label='linear loss')
|
||||
>>> plt.plot(t_test, y_soft_l1, label='soft_l1 loss')
|
||||
>>> plt.plot(t_test, y_log, label='cauchy loss')
|
||||
>>> plt.xlabel("t")
|
||||
>>> plt.ylabel("y")
|
||||
>>> plt.legend()
|
||||
>>> plt.show()
|
||||
|
||||
In the next example, we show how complex-valued residual functions of
|
||||
complex variables can be optimized with ``least_squares()``. Consider the
|
||||
following function:
|
||||
|
||||
>>> def f(z):
|
||||
... return z - (0.5 + 0.5j)
|
||||
|
||||
We wrap it into a function of real variables that returns real residuals
|
||||
by simply handling the real and imaginary parts as independent variables:
|
||||
|
||||
>>> def f_wrap(x):
|
||||
... fx = f(x[0] + 1j*x[1])
|
||||
... return np.array([fx.real, fx.imag])
|
||||
|
||||
Thus, instead of the original m-D complex function of n complex
|
||||
variables we optimize a 2m-D real function of 2n real variables:
|
||||
|
||||
>>> from scipy.optimize import least_squares
|
||||
>>> res_wrapped = least_squares(f_wrap, (0.1, 0.1), bounds=([0, 0], [1, 1]))
|
||||
>>> z = res_wrapped.x[0] + res_wrapped.x[1]*1j
|
||||
>>> z
|
||||
(0.49999999999925893+0.49999999999925893j)
|
||||
|
||||
"""
|
||||
if method not in ['trf', 'dogbox', 'lm']:
|
||||
raise ValueError("`method` must be 'trf', 'dogbox' or 'lm'.")
|
||||
|
||||
if jac not in ['2-point', '3-point', 'cs'] and not callable(jac):
|
||||
raise ValueError("`jac` must be '2-point', '3-point', 'cs' or "
|
||||
"callable.")
|
||||
|
||||
if tr_solver not in [None, 'exact', 'lsmr']:
|
||||
raise ValueError("`tr_solver` must be None, 'exact' or 'lsmr'.")
|
||||
|
||||
if loss not in IMPLEMENTED_LOSSES and not callable(loss):
|
||||
raise ValueError("`loss` must be one of {} or a callable."
|
||||
.format(IMPLEMENTED_LOSSES.keys()))
|
||||
|
||||
if method == 'lm' and loss != 'linear':
|
||||
raise ValueError("method='lm' supports only 'linear' loss function.")
|
||||
|
||||
if verbose not in [0, 1, 2]:
|
||||
raise ValueError("`verbose` must be in [0, 1, 2].")
|
||||
|
||||
if max_nfev is not None and max_nfev <= 0:
|
||||
raise ValueError("`max_nfev` must be None or positive integer.")
|
||||
|
||||
if np.iscomplexobj(x0):
|
||||
raise ValueError("`x0` must be real.")
|
||||
|
||||
x0 = np.atleast_1d(x0).astype(float)
|
||||
|
||||
if x0.ndim > 1:
|
||||
raise ValueError("`x0` must have at most 1 dimension.")
|
||||
|
||||
if isinstance(bounds, Bounds):
|
||||
lb, ub = bounds.lb, bounds.ub
|
||||
bounds = (lb, ub)
|
||||
else:
|
||||
if len(bounds) == 2:
|
||||
lb, ub = prepare_bounds(bounds, x0.shape[0])
|
||||
else:
|
||||
raise ValueError("`bounds` must contain 2 elements.")
|
||||
|
||||
if method == 'lm' and not np.all((lb == -np.inf) & (ub == np.inf)):
|
||||
raise ValueError("Method 'lm' doesn't support bounds.")
|
||||
|
||||
if lb.shape != x0.shape or ub.shape != x0.shape:
|
||||
raise ValueError("Inconsistent shapes between bounds and `x0`.")
|
||||
|
||||
if np.any(lb >= ub):
|
||||
raise ValueError("Each lower bound must be strictly less than each "
|
||||
"upper bound.")
|
||||
|
||||
if not in_bounds(x0, lb, ub):
|
||||
raise ValueError("`x0` is infeasible.")
|
||||
|
||||
x_scale = check_x_scale(x_scale, x0)
|
||||
|
||||
ftol, xtol, gtol = check_tolerance(ftol, xtol, gtol, method)
|
||||
|
||||
if method == 'trf':
|
||||
x0 = make_strictly_feasible(x0, lb, ub)
|
||||
|
||||
def fun_wrapped(x):
|
||||
return np.atleast_1d(fun(x, *args, **kwargs))
|
||||
|
||||
f0 = fun_wrapped(x0)
|
||||
|
||||
if f0.ndim != 1:
|
||||
raise ValueError("`fun` must return at most 1-d array_like. "
|
||||
f"f0.shape: {f0.shape}")
|
||||
|
||||
if not np.all(np.isfinite(f0)):
|
||||
raise ValueError("Residuals are not finite in the initial point.")
|
||||
|
||||
n = x0.size
|
||||
m = f0.size
|
||||
|
||||
if method == 'lm' and m < n:
|
||||
raise ValueError("Method 'lm' doesn't work when the number of "
|
||||
"residuals is less than the number of variables.")
|
||||
|
||||
loss_function = construct_loss_function(m, loss, f_scale)
|
||||
if callable(loss):
|
||||
rho = loss_function(f0)
|
||||
if rho.shape != (3, m):
|
||||
raise ValueError("The return value of `loss` callable has wrong "
|
||||
"shape.")
|
||||
initial_cost = 0.5 * np.sum(rho[0])
|
||||
elif loss_function is not None:
|
||||
initial_cost = loss_function(f0, cost_only=True)
|
||||
else:
|
||||
initial_cost = 0.5 * np.dot(f0, f0)
|
||||
|
||||
if callable(jac):
|
||||
J0 = jac(x0, *args, **kwargs)
|
||||
|
||||
if issparse(J0):
|
||||
J0 = J0.tocsr()
|
||||
|
||||
def jac_wrapped(x, _=None):
|
||||
return jac(x, *args, **kwargs).tocsr()
|
||||
|
||||
elif isinstance(J0, LinearOperator):
|
||||
def jac_wrapped(x, _=None):
|
||||
return jac(x, *args, **kwargs)
|
||||
|
||||
else:
|
||||
J0 = np.atleast_2d(J0)
|
||||
|
||||
def jac_wrapped(x, _=None):
|
||||
return np.atleast_2d(jac(x, *args, **kwargs))
|
||||
|
||||
else: # Estimate Jacobian by finite differences.
|
||||
if method == 'lm':
|
||||
if jac_sparsity is not None:
|
||||
raise ValueError("method='lm' does not support "
|
||||
"`jac_sparsity`.")
|
||||
|
||||
if jac != '2-point':
|
||||
warn(f"jac='{jac}' works equivalently to '2-point' for method='lm'.",
|
||||
stacklevel=2)
|
||||
|
||||
J0 = jac_wrapped = None
|
||||
else:
|
||||
if jac_sparsity is not None and tr_solver == 'exact':
|
||||
raise ValueError("tr_solver='exact' is incompatible "
|
||||
"with `jac_sparsity`.")
|
||||
|
||||
jac_sparsity = check_jac_sparsity(jac_sparsity, m, n)
|
||||
|
||||
def jac_wrapped(x, f):
|
||||
J = approx_derivative(fun, x, rel_step=diff_step, method=jac,
|
||||
f0=f, bounds=bounds, args=args,
|
||||
kwargs=kwargs, sparsity=jac_sparsity)
|
||||
if J.ndim != 2: # J is guaranteed not sparse.
|
||||
J = np.atleast_2d(J)
|
||||
|
||||
return J
|
||||
|
||||
J0 = jac_wrapped(x0, f0)
|
||||
|
||||
if J0 is not None:
|
||||
if J0.shape != (m, n):
|
||||
raise ValueError(
|
||||
f"The return value of `jac` has wrong shape: expected {(m, n)}, "
|
||||
f"actual {J0.shape}."
|
||||
)
|
||||
|
||||
if not isinstance(J0, np.ndarray):
|
||||
if method == 'lm':
|
||||
raise ValueError("method='lm' works only with dense "
|
||||
"Jacobian matrices.")
|
||||
|
||||
if tr_solver == 'exact':
|
||||
raise ValueError(
|
||||
"tr_solver='exact' works only with dense "
|
||||
"Jacobian matrices.")
|
||||
|
||||
jac_scale = isinstance(x_scale, str) and x_scale == 'jac'
|
||||
if isinstance(J0, LinearOperator) and jac_scale:
|
||||
raise ValueError("x_scale='jac' can't be used when `jac` "
|
||||
"returns LinearOperator.")
|
||||
|
||||
if tr_solver is None:
|
||||
if isinstance(J0, np.ndarray):
|
||||
tr_solver = 'exact'
|
||||
else:
|
||||
tr_solver = 'lsmr'
|
||||
|
||||
if method == 'lm':
|
||||
result = call_minpack(fun_wrapped, x0, jac_wrapped, ftol, xtol, gtol,
|
||||
max_nfev, x_scale, diff_step)
|
||||
|
||||
elif method == 'trf':
|
||||
result = trf(fun_wrapped, jac_wrapped, x0, f0, J0, lb, ub, ftol, xtol,
|
||||
gtol, max_nfev, x_scale, loss_function, tr_solver,
|
||||
tr_options.copy(), verbose)
|
||||
|
||||
elif method == 'dogbox':
|
||||
if tr_solver == 'lsmr' and 'regularize' in tr_options:
|
||||
warn("The keyword 'regularize' in `tr_options` is not relevant "
|
||||
"for 'dogbox' method.",
|
||||
stacklevel=2)
|
||||
tr_options = tr_options.copy()
|
||||
del tr_options['regularize']
|
||||
|
||||
result = dogbox(fun_wrapped, jac_wrapped, x0, f0, J0, lb, ub, ftol,
|
||||
xtol, gtol, max_nfev, x_scale, loss_function,
|
||||
tr_solver, tr_options, verbose)
|
||||
|
||||
result.message = TERMINATION_MESSAGES[result.status]
|
||||
result.success = result.status > 0
|
||||
|
||||
if verbose >= 1:
|
||||
print(result.message)
|
||||
print("Function evaluations {}, initial cost {:.4e}, final cost "
|
||||
"{:.4e}, first-order optimality {:.2e}."
|
||||
.format(result.nfev, initial_cost, result.cost,
|
||||
result.optimality))
|
||||
|
||||
return result
|
||||
@ -0,0 +1,362 @@
|
||||
"""Linear least squares with bound constraints on independent variables."""
|
||||
import numpy as np
|
||||
from numpy.linalg import norm
|
||||
from scipy.sparse import issparse, csr_matrix
|
||||
from scipy.sparse.linalg import LinearOperator, lsmr
|
||||
from scipy.optimize import OptimizeResult
|
||||
from scipy.optimize._minimize import Bounds
|
||||
|
||||
from .common import in_bounds, compute_grad
|
||||
from .trf_linear import trf_linear
|
||||
from .bvls import bvls
|
||||
|
||||
|
||||
def prepare_bounds(bounds, n):
|
||||
if len(bounds) != 2:
|
||||
raise ValueError("`bounds` must contain 2 elements.")
|
||||
lb, ub = (np.asarray(b, dtype=float) for b in bounds)
|
||||
|
||||
if lb.ndim == 0:
|
||||
lb = np.resize(lb, n)
|
||||
|
||||
if ub.ndim == 0:
|
||||
ub = np.resize(ub, n)
|
||||
|
||||
return lb, ub
|
||||
|
||||
|
||||
TERMINATION_MESSAGES = {
|
||||
-1: "The algorithm was not able to make progress on the last iteration.",
|
||||
0: "The maximum number of iterations is exceeded.",
|
||||
1: "The first-order optimality measure is less than `tol`.",
|
||||
2: "The relative change of the cost function is less than `tol`.",
|
||||
3: "The unconstrained solution is optimal."
|
||||
}
|
||||
|
||||
|
||||
def lsq_linear(A, b, bounds=(-np.inf, np.inf), method='trf', tol=1e-10,
|
||||
lsq_solver=None, lsmr_tol=None, max_iter=None,
|
||||
verbose=0, *, lsmr_maxiter=None,):
|
||||
r"""Solve a linear least-squares problem with bounds on the variables.
|
||||
|
||||
Given a m-by-n design matrix A and a target vector b with m elements,
|
||||
`lsq_linear` solves the following optimization problem::
|
||||
|
||||
minimize 0.5 * ||A x - b||**2
|
||||
subject to lb <= x <= ub
|
||||
|
||||
This optimization problem is convex, hence a found minimum (if iterations
|
||||
have converged) is guaranteed to be global.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
A : array_like, sparse matrix of LinearOperator, shape (m, n)
|
||||
Design matrix. Can be `scipy.sparse.linalg.LinearOperator`.
|
||||
b : array_like, shape (m,)
|
||||
Target vector.
|
||||
bounds : 2-tuple of array_like or `Bounds`, optional
|
||||
Lower and upper bounds on parameters. Defaults to no bounds.
|
||||
There are two ways to specify the bounds:
|
||||
|
||||
- Instance of `Bounds` class.
|
||||
|
||||
- 2-tuple of array_like: Each element of the tuple must be either
|
||||
an array with the length equal to the number of parameters, or a
|
||||
scalar (in which case the bound is taken to be the same for all
|
||||
parameters). Use ``np.inf`` with an appropriate sign to disable
|
||||
bounds on all or some parameters.
|
||||
|
||||
method : 'trf' or 'bvls', optional
|
||||
Method to perform minimization.
|
||||
|
||||
* 'trf' : Trust Region Reflective algorithm adapted for a linear
|
||||
least-squares problem. This is an interior-point-like method
|
||||
and the required number of iterations is weakly correlated with
|
||||
the number of variables.
|
||||
* 'bvls' : Bounded-variable least-squares algorithm. This is
|
||||
an active set method, which requires the number of iterations
|
||||
comparable to the number of variables. Can't be used when `A` is
|
||||
sparse or LinearOperator.
|
||||
|
||||
Default is 'trf'.
|
||||
tol : float, optional
|
||||
Tolerance parameter. The algorithm terminates if a relative change
|
||||
of the cost function is less than `tol` on the last iteration.
|
||||
Additionally, the first-order optimality measure is considered:
|
||||
|
||||
* ``method='trf'`` terminates if the uniform norm of the gradient,
|
||||
scaled to account for the presence of the bounds, is less than
|
||||
`tol`.
|
||||
* ``method='bvls'`` terminates if Karush-Kuhn-Tucker conditions
|
||||
are satisfied within `tol` tolerance.
|
||||
|
||||
lsq_solver : {None, 'exact', 'lsmr'}, optional
|
||||
Method of solving unbounded least-squares problems throughout
|
||||
iterations:
|
||||
|
||||
* 'exact' : Use dense QR or SVD decomposition approach. Can't be
|
||||
used when `A` is sparse or LinearOperator.
|
||||
* 'lsmr' : Use `scipy.sparse.linalg.lsmr` iterative procedure
|
||||
which requires only matrix-vector product evaluations. Can't
|
||||
be used with ``method='bvls'``.
|
||||
|
||||
If None (default), the solver is chosen based on type of `A`.
|
||||
lsmr_tol : None, float or 'auto', optional
|
||||
Tolerance parameters 'atol' and 'btol' for `scipy.sparse.linalg.lsmr`
|
||||
If None (default), it is set to ``1e-2 * tol``. If 'auto', the
|
||||
tolerance will be adjusted based on the optimality of the current
|
||||
iterate, which can speed up the optimization process, but is not always
|
||||
reliable.
|
||||
max_iter : None or int, optional
|
||||
Maximum number of iterations before termination. If None (default), it
|
||||
is set to 100 for ``method='trf'`` or to the number of variables for
|
||||
``method='bvls'`` (not counting iterations for 'bvls' initialization).
|
||||
verbose : {0, 1, 2}, optional
|
||||
Level of algorithm's verbosity:
|
||||
|
||||
* 0 : work silently (default).
|
||||
* 1 : display a termination report.
|
||||
* 2 : display progress during iterations.
|
||||
lsmr_maxiter : None or int, optional
|
||||
Maximum number of iterations for the lsmr least squares solver,
|
||||
if it is used (by setting ``lsq_solver='lsmr'``). If None (default), it
|
||||
uses lsmr's default of ``min(m, n)`` where ``m`` and ``n`` are the
|
||||
number of rows and columns of `A`, respectively. Has no effect if
|
||||
``lsq_solver='exact'``.
|
||||
|
||||
Returns
|
||||
-------
|
||||
OptimizeResult with the following fields defined:
|
||||
x : ndarray, shape (n,)
|
||||
Solution found.
|
||||
cost : float
|
||||
Value of the cost function at the solution.
|
||||
fun : ndarray, shape (m,)
|
||||
Vector of residuals at the solution.
|
||||
optimality : float
|
||||
First-order optimality measure. The exact meaning depends on `method`,
|
||||
refer to the description of `tol` parameter.
|
||||
active_mask : ndarray of int, shape (n,)
|
||||
Each component shows whether a corresponding constraint is active
|
||||
(that is, whether a variable is at the bound):
|
||||
|
||||
* 0 : a constraint is not active.
|
||||
* -1 : a lower bound is active.
|
||||
* 1 : an upper bound is active.
|
||||
|
||||
Might be somewhat arbitrary for the `trf` method as it generates a
|
||||
sequence of strictly feasible iterates and active_mask is determined
|
||||
within a tolerance threshold.
|
||||
unbounded_sol : tuple
|
||||
Unbounded least squares solution tuple returned by the least squares
|
||||
solver (set with `lsq_solver` option). If `lsq_solver` is not set or is
|
||||
set to ``'exact'``, the tuple contains an ndarray of shape (n,) with
|
||||
the unbounded solution, an ndarray with the sum of squared residuals,
|
||||
an int with the rank of `A`, and an ndarray with the singular values
|
||||
of `A` (see NumPy's ``linalg.lstsq`` for more information). If
|
||||
`lsq_solver` is set to ``'lsmr'``, the tuple contains an ndarray of
|
||||
shape (n,) with the unbounded solution, an int with the exit code,
|
||||
an int with the number of iterations, and five floats with
|
||||
various norms and the condition number of `A` (see SciPy's
|
||||
``sparse.linalg.lsmr`` for more information). This output can be
|
||||
useful for determining the convergence of the least squares solver,
|
||||
particularly the iterative ``'lsmr'`` solver. The unbounded least
|
||||
squares problem is to minimize ``0.5 * ||A x - b||**2``.
|
||||
nit : int
|
||||
Number of iterations. Zero if the unconstrained solution is optimal.
|
||||
status : int
|
||||
Reason for algorithm termination:
|
||||
|
||||
* -1 : the algorithm was not able to make progress on the last
|
||||
iteration.
|
||||
* 0 : the maximum number of iterations is exceeded.
|
||||
* 1 : the first-order optimality measure is less than `tol`.
|
||||
* 2 : the relative change of the cost function is less than `tol`.
|
||||
* 3 : the unconstrained solution is optimal.
|
||||
|
||||
message : str
|
||||
Verbal description of the termination reason.
|
||||
success : bool
|
||||
True if one of the convergence criteria is satisfied (`status` > 0).
|
||||
|
||||
See Also
|
||||
--------
|
||||
nnls : Linear least squares with non-negativity constraint.
|
||||
least_squares : Nonlinear least squares with bounds on the variables.
|
||||
|
||||
Notes
|
||||
-----
|
||||
The algorithm first computes the unconstrained least-squares solution by
|
||||
`numpy.linalg.lstsq` or `scipy.sparse.linalg.lsmr` depending on
|
||||
`lsq_solver`. This solution is returned as optimal if it lies within the
|
||||
bounds.
|
||||
|
||||
Method 'trf' runs the adaptation of the algorithm described in [STIR]_ for
|
||||
a linear least-squares problem. The iterations are essentially the same as
|
||||
in the nonlinear least-squares algorithm, but as the quadratic function
|
||||
model is always accurate, we don't need to track or modify the radius of
|
||||
a trust region. The line search (backtracking) is used as a safety net
|
||||
when a selected step does not decrease the cost function. Read more
|
||||
detailed description of the algorithm in `scipy.optimize.least_squares`.
|
||||
|
||||
Method 'bvls' runs a Python implementation of the algorithm described in
|
||||
[BVLS]_. The algorithm maintains active and free sets of variables, on
|
||||
each iteration chooses a new variable to move from the active set to the
|
||||
free set and then solves the unconstrained least-squares problem on free
|
||||
variables. This algorithm is guaranteed to give an accurate solution
|
||||
eventually, but may require up to n iterations for a problem with n
|
||||
variables. Additionally, an ad-hoc initialization procedure is
|
||||
implemented, that determines which variables to set free or active
|
||||
initially. It takes some number of iterations before actual BVLS starts,
|
||||
but can significantly reduce the number of further iterations.
|
||||
|
||||
References
|
||||
----------
|
||||
.. [STIR] M. A. Branch, T. F. Coleman, and Y. Li, "A Subspace, Interior,
|
||||
and Conjugate Gradient Method for Large-Scale Bound-Constrained
|
||||
Minimization Problems," SIAM Journal on Scientific Computing,
|
||||
Vol. 21, Number 1, pp 1-23, 1999.
|
||||
.. [BVLS] P. B. Start and R. L. Parker, "Bounded-Variable Least-Squares:
|
||||
an Algorithm and Applications", Computational Statistics, 10,
|
||||
129-141, 1995.
|
||||
|
||||
Examples
|
||||
--------
|
||||
In this example, a problem with a large sparse matrix and bounds on the
|
||||
variables is solved.
|
||||
|
||||
>>> import numpy as np
|
||||
>>> from scipy.sparse import rand
|
||||
>>> from scipy.optimize import lsq_linear
|
||||
>>> rng = np.random.default_rng()
|
||||
...
|
||||
>>> m = 20000
|
||||
>>> n = 10000
|
||||
...
|
||||
>>> A = rand(m, n, density=1e-4, random_state=rng)
|
||||
>>> b = rng.standard_normal(m)
|
||||
...
|
||||
>>> lb = rng.standard_normal(n)
|
||||
>>> ub = lb + 1
|
||||
...
|
||||
>>> res = lsq_linear(A, b, bounds=(lb, ub), lsmr_tol='auto', verbose=1)
|
||||
# may vary
|
||||
The relative change of the cost function is less than `tol`.
|
||||
Number of iterations 16, initial cost 1.5039e+04, final cost 1.1112e+04,
|
||||
first-order optimality 4.66e-08.
|
||||
"""
|
||||
if method not in ['trf', 'bvls']:
|
||||
raise ValueError("`method` must be 'trf' or 'bvls'")
|
||||
|
||||
if lsq_solver not in [None, 'exact', 'lsmr']:
|
||||
raise ValueError("`solver` must be None, 'exact' or 'lsmr'.")
|
||||
|
||||
if verbose not in [0, 1, 2]:
|
||||
raise ValueError("`verbose` must be in [0, 1, 2].")
|
||||
|
||||
if issparse(A):
|
||||
A = csr_matrix(A)
|
||||
elif not isinstance(A, LinearOperator):
|
||||
A = np.atleast_2d(np.asarray(A))
|
||||
|
||||
if method == 'bvls':
|
||||
if lsq_solver == 'lsmr':
|
||||
raise ValueError("method='bvls' can't be used with "
|
||||
"lsq_solver='lsmr'")
|
||||
|
||||
if not isinstance(A, np.ndarray):
|
||||
raise ValueError("method='bvls' can't be used with `A` being "
|
||||
"sparse or LinearOperator.")
|
||||
|
||||
if lsq_solver is None:
|
||||
if isinstance(A, np.ndarray):
|
||||
lsq_solver = 'exact'
|
||||
else:
|
||||
lsq_solver = 'lsmr'
|
||||
elif lsq_solver == 'exact' and not isinstance(A, np.ndarray):
|
||||
raise ValueError("`exact` solver can't be used when `A` is "
|
||||
"sparse or LinearOperator.")
|
||||
|
||||
if len(A.shape) != 2: # No ndim for LinearOperator.
|
||||
raise ValueError("`A` must have at most 2 dimensions.")
|
||||
|
||||
if max_iter is not None and max_iter <= 0:
|
||||
raise ValueError("`max_iter` must be None or positive integer.")
|
||||
|
||||
m, n = A.shape
|
||||
|
||||
b = np.atleast_1d(b)
|
||||
if b.ndim != 1:
|
||||
raise ValueError("`b` must have at most 1 dimension.")
|
||||
|
||||
if b.size != m:
|
||||
raise ValueError("Inconsistent shapes between `A` and `b`.")
|
||||
|
||||
if isinstance(bounds, Bounds):
|
||||
lb = bounds.lb
|
||||
ub = bounds.ub
|
||||
else:
|
||||
lb, ub = prepare_bounds(bounds, n)
|
||||
|
||||
if lb.shape != (n,) and ub.shape != (n,):
|
||||
raise ValueError("Bounds have wrong shape.")
|
||||
|
||||
if np.any(lb >= ub):
|
||||
raise ValueError("Each lower bound must be strictly less than each "
|
||||
"upper bound.")
|
||||
|
||||
if lsmr_maxiter is not None and lsmr_maxiter < 1:
|
||||
raise ValueError("`lsmr_maxiter` must be None or positive integer.")
|
||||
|
||||
if not ((isinstance(lsmr_tol, float) and lsmr_tol > 0) or
|
||||
lsmr_tol in ('auto', None)):
|
||||
raise ValueError("`lsmr_tol` must be None, 'auto', or positive float.")
|
||||
|
||||
if lsq_solver == 'exact':
|
||||
unbd_lsq = np.linalg.lstsq(A, b, rcond=-1)
|
||||
elif lsq_solver == 'lsmr':
|
||||
first_lsmr_tol = lsmr_tol # tol of first call to lsmr
|
||||
if lsmr_tol is None or lsmr_tol == 'auto':
|
||||
first_lsmr_tol = 1e-2 * tol # default if lsmr_tol not defined
|
||||
unbd_lsq = lsmr(A, b, maxiter=lsmr_maxiter,
|
||||
atol=first_lsmr_tol, btol=first_lsmr_tol)
|
||||
x_lsq = unbd_lsq[0] # extract the solution from the least squares solver
|
||||
|
||||
if in_bounds(x_lsq, lb, ub):
|
||||
r = A @ x_lsq - b
|
||||
cost = 0.5 * np.dot(r, r)
|
||||
termination_status = 3
|
||||
termination_message = TERMINATION_MESSAGES[termination_status]
|
||||
g = compute_grad(A, r)
|
||||
g_norm = norm(g, ord=np.inf)
|
||||
|
||||
if verbose > 0:
|
||||
print(termination_message)
|
||||
print(f"Final cost {cost:.4e}, first-order optimality {g_norm:.2e}")
|
||||
|
||||
return OptimizeResult(
|
||||
x=x_lsq, fun=r, cost=cost, optimality=g_norm,
|
||||
active_mask=np.zeros(n), unbounded_sol=unbd_lsq,
|
||||
nit=0, status=termination_status,
|
||||
message=termination_message, success=True)
|
||||
|
||||
if method == 'trf':
|
||||
res = trf_linear(A, b, x_lsq, lb, ub, tol, lsq_solver, lsmr_tol,
|
||||
max_iter, verbose, lsmr_maxiter=lsmr_maxiter)
|
||||
elif method == 'bvls':
|
||||
res = bvls(A, b, x_lsq, lb, ub, tol, max_iter, verbose)
|
||||
|
||||
res.unbounded_sol = unbd_lsq
|
||||
res.message = TERMINATION_MESSAGES[res.status]
|
||||
res.success = res.status > 0
|
||||
|
||||
if verbose > 0:
|
||||
print(res.message)
|
||||
print(
|
||||
f"Number of iterations {res.nit}, initial cost {res.initial_cost:.4e}, "
|
||||
f"final cost {res.cost:.4e}, first-order optimality {res.optimality:.2e}."
|
||||
)
|
||||
|
||||
del res.initial_cost
|
||||
|
||||
return res
|
||||
560
venv/lib/python3.12/site-packages/scipy/optimize/_lsq/trf.py
Normal file
560
venv/lib/python3.12/site-packages/scipy/optimize/_lsq/trf.py
Normal file
@ -0,0 +1,560 @@
|
||||
"""Trust Region Reflective algorithm for least-squares optimization.
|
||||
|
||||
The algorithm is based on ideas from paper [STIR]_. The main idea is to
|
||||
account for the presence of the bounds by appropriate scaling of the variables (or,
|
||||
equivalently, changing a trust-region shape). Let's introduce a vector v:
|
||||
|
||||
| ub[i] - x[i], if g[i] < 0 and ub[i] < np.inf
|
||||
v[i] = | x[i] - lb[i], if g[i] > 0 and lb[i] > -np.inf
|
||||
| 1, otherwise
|
||||
|
||||
where g is the gradient of a cost function and lb, ub are the bounds. Its
|
||||
components are distances to the bounds at which the anti-gradient points (if
|
||||
this distance is finite). Define a scaling matrix D = diag(v**0.5).
|
||||
First-order optimality conditions can be stated as
|
||||
|
||||
D^2 g(x) = 0.
|
||||
|
||||
Meaning that components of the gradient should be zero for strictly interior
|
||||
variables, and components must point inside the feasible region for variables
|
||||
on the bound.
|
||||
|
||||
Now consider this system of equations as a new optimization problem. If the
|
||||
point x is strictly interior (not on the bound), then the left-hand side is
|
||||
differentiable and the Newton step for it satisfies
|
||||
|
||||
(D^2 H + diag(g) Jv) p = -D^2 g
|
||||
|
||||
where H is the Hessian matrix (or its J^T J approximation in least squares),
|
||||
Jv is the Jacobian matrix of v with components -1, 1 or 0, such that all
|
||||
elements of matrix C = diag(g) Jv are non-negative. Introduce the change
|
||||
of the variables x = D x_h (_h would be "hat" in LaTeX). In the new variables,
|
||||
we have a Newton step satisfying
|
||||
|
||||
B_h p_h = -g_h,
|
||||
|
||||
where B_h = D H D + C, g_h = D g. In least squares B_h = J_h^T J_h, where
|
||||
J_h = J D. Note that J_h and g_h are proper Jacobian and gradient with respect
|
||||
to "hat" variables. To guarantee global convergence we formulate a
|
||||
trust-region problem based on the Newton step in the new variables:
|
||||
|
||||
0.5 * p_h^T B_h p + g_h^T p_h -> min, ||p_h|| <= Delta
|
||||
|
||||
In the original space B = H + D^{-1} C D^{-1}, and the equivalent trust-region
|
||||
problem is
|
||||
|
||||
0.5 * p^T B p + g^T p -> min, ||D^{-1} p|| <= Delta
|
||||
|
||||
Here, the meaning of the matrix D becomes more clear: it alters the shape
|
||||
of a trust-region, such that large steps towards the bounds are not allowed.
|
||||
In the implementation, the trust-region problem is solved in "hat" space,
|
||||
but handling of the bounds is done in the original space (see below and read
|
||||
the code).
|
||||
|
||||
The introduction of the matrix D doesn't allow to ignore bounds, the algorithm
|
||||
must keep iterates strictly feasible (to satisfy aforementioned
|
||||
differentiability), the parameter theta controls step back from the boundary
|
||||
(see the code for details).
|
||||
|
||||
The algorithm does another important trick. If the trust-region solution
|
||||
doesn't fit into the bounds, then a reflected (from a firstly encountered
|
||||
bound) search direction is considered. For motivation and analysis refer to
|
||||
[STIR]_ paper (and other papers of the authors). In practice, it doesn't need
|
||||
a lot of justifications, the algorithm simply chooses the best step among
|
||||
three: a constrained trust-region step, a reflected step and a constrained
|
||||
Cauchy step (a minimizer along -g_h in "hat" space, or -D^2 g in the original
|
||||
space).
|
||||
|
||||
Another feature is that a trust-region radius control strategy is modified to
|
||||
account for appearance of the diagonal C matrix (called diag_h in the code).
|
||||
|
||||
Note that all described peculiarities are completely gone as we consider
|
||||
problems without bounds (the algorithm becomes a standard trust-region type
|
||||
algorithm very similar to ones implemented in MINPACK).
|
||||
|
||||
The implementation supports two methods of solving the trust-region problem.
|
||||
The first, called 'exact', applies SVD on Jacobian and then solves the problem
|
||||
very accurately using the algorithm described in [JJMore]_. It is not
|
||||
applicable to large problem. The second, called 'lsmr', uses the 2-D subspace
|
||||
approach (sometimes called "indefinite dogleg"), where the problem is solved
|
||||
in a subspace spanned by the gradient and the approximate Gauss-Newton step
|
||||
found by ``scipy.sparse.linalg.lsmr``. A 2-D trust-region problem is
|
||||
reformulated as a 4th order algebraic equation and solved very accurately by
|
||||
``numpy.roots``. The subspace approach allows to solve very large problems
|
||||
(up to couple of millions of residuals on a regular PC), provided the Jacobian
|
||||
matrix is sufficiently sparse.
|
||||
|
||||
References
|
||||
----------
|
||||
.. [STIR] Branch, M.A., T.F. Coleman, and Y. Li, "A Subspace, Interior,
|
||||
and Conjugate Gradient Method for Large-Scale Bound-Constrained
|
||||
Minimization Problems," SIAM Journal on Scientific Computing,
|
||||
Vol. 21, Number 1, pp 1-23, 1999.
|
||||
.. [JJMore] More, J. J., "The Levenberg-Marquardt Algorithm: Implementation
|
||||
and Theory," Numerical Analysis, ed. G. A. Watson, Lecture
|
||||
"""
|
||||
import numpy as np
|
||||
from numpy.linalg import norm
|
||||
from scipy.linalg import svd, qr
|
||||
from scipy.sparse.linalg import lsmr
|
||||
from scipy.optimize import OptimizeResult
|
||||
|
||||
from .common import (
|
||||
step_size_to_bound, find_active_constraints, in_bounds,
|
||||
make_strictly_feasible, intersect_trust_region, solve_lsq_trust_region,
|
||||
solve_trust_region_2d, minimize_quadratic_1d, build_quadratic_1d,
|
||||
evaluate_quadratic, right_multiplied_operator, regularized_lsq_operator,
|
||||
CL_scaling_vector, compute_grad, compute_jac_scale, check_termination,
|
||||
update_tr_radius, scale_for_robust_loss_function, print_header_nonlinear,
|
||||
print_iteration_nonlinear)
|
||||
|
||||
|
||||
def trf(fun, jac, x0, f0, J0, lb, ub, ftol, xtol, gtol, max_nfev, x_scale,
|
||||
loss_function, tr_solver, tr_options, verbose):
|
||||
# For efficiency, it makes sense to run the simplified version of the
|
||||
# algorithm when no bounds are imposed. We decided to write the two
|
||||
# separate functions. It violates the DRY principle, but the individual
|
||||
# functions are kept the most readable.
|
||||
if np.all(lb == -np.inf) and np.all(ub == np.inf):
|
||||
return trf_no_bounds(
|
||||
fun, jac, x0, f0, J0, ftol, xtol, gtol, max_nfev, x_scale,
|
||||
loss_function, tr_solver, tr_options, verbose)
|
||||
else:
|
||||
return trf_bounds(
|
||||
fun, jac, x0, f0, J0, lb, ub, ftol, xtol, gtol, max_nfev, x_scale,
|
||||
loss_function, tr_solver, tr_options, verbose)
|
||||
|
||||
|
||||
def select_step(x, J_h, diag_h, g_h, p, p_h, d, Delta, lb, ub, theta):
|
||||
"""Select the best step according to Trust Region Reflective algorithm."""
|
||||
if in_bounds(x + p, lb, ub):
|
||||
p_value = evaluate_quadratic(J_h, g_h, p_h, diag=diag_h)
|
||||
return p, p_h, -p_value
|
||||
|
||||
p_stride, hits = step_size_to_bound(x, p, lb, ub)
|
||||
|
||||
# Compute the reflected direction.
|
||||
r_h = np.copy(p_h)
|
||||
r_h[hits.astype(bool)] *= -1
|
||||
r = d * r_h
|
||||
|
||||
# Restrict trust-region step, such that it hits the bound.
|
||||
p *= p_stride
|
||||
p_h *= p_stride
|
||||
x_on_bound = x + p
|
||||
|
||||
# Reflected direction will cross first either feasible region or trust
|
||||
# region boundary.
|
||||
_, to_tr = intersect_trust_region(p_h, r_h, Delta)
|
||||
to_bound, _ = step_size_to_bound(x_on_bound, r, lb, ub)
|
||||
|
||||
# Find lower and upper bounds on a step size along the reflected
|
||||
# direction, considering the strict feasibility requirement. There is no
|
||||
# single correct way to do that, the chosen approach seems to work best
|
||||
# on test problems.
|
||||
r_stride = min(to_bound, to_tr)
|
||||
if r_stride > 0:
|
||||
r_stride_l = (1 - theta) * p_stride / r_stride
|
||||
if r_stride == to_bound:
|
||||
r_stride_u = theta * to_bound
|
||||
else:
|
||||
r_stride_u = to_tr
|
||||
else:
|
||||
r_stride_l = 0
|
||||
r_stride_u = -1
|
||||
|
||||
# Check if reflection step is available.
|
||||
if r_stride_l <= r_stride_u:
|
||||
a, b, c = build_quadratic_1d(J_h, g_h, r_h, s0=p_h, diag=diag_h)
|
||||
r_stride, r_value = minimize_quadratic_1d(
|
||||
a, b, r_stride_l, r_stride_u, c=c)
|
||||
r_h *= r_stride
|
||||
r_h += p_h
|
||||
r = r_h * d
|
||||
else:
|
||||
r_value = np.inf
|
||||
|
||||
# Now correct p_h to make it strictly interior.
|
||||
p *= theta
|
||||
p_h *= theta
|
||||
p_value = evaluate_quadratic(J_h, g_h, p_h, diag=diag_h)
|
||||
|
||||
ag_h = -g_h
|
||||
ag = d * ag_h
|
||||
|
||||
to_tr = Delta / norm(ag_h)
|
||||
to_bound, _ = step_size_to_bound(x, ag, lb, ub)
|
||||
if to_bound < to_tr:
|
||||
ag_stride = theta * to_bound
|
||||
else:
|
||||
ag_stride = to_tr
|
||||
|
||||
a, b = build_quadratic_1d(J_h, g_h, ag_h, diag=diag_h)
|
||||
ag_stride, ag_value = minimize_quadratic_1d(a, b, 0, ag_stride)
|
||||
ag_h *= ag_stride
|
||||
ag *= ag_stride
|
||||
|
||||
if p_value < r_value and p_value < ag_value:
|
||||
return p, p_h, -p_value
|
||||
elif r_value < p_value and r_value < ag_value:
|
||||
return r, r_h, -r_value
|
||||
else:
|
||||
return ag, ag_h, -ag_value
|
||||
|
||||
|
||||
def trf_bounds(fun, jac, x0, f0, J0, lb, ub, ftol, xtol, gtol, max_nfev,
|
||||
x_scale, loss_function, tr_solver, tr_options, verbose):
|
||||
x = x0.copy()
|
||||
|
||||
f = f0
|
||||
f_true = f.copy()
|
||||
nfev = 1
|
||||
|
||||
J = J0
|
||||
njev = 1
|
||||
m, n = J.shape
|
||||
|
||||
if loss_function is not None:
|
||||
rho = loss_function(f)
|
||||
cost = 0.5 * np.sum(rho[0])
|
||||
J, f = scale_for_robust_loss_function(J, f, rho)
|
||||
else:
|
||||
cost = 0.5 * np.dot(f, f)
|
||||
|
||||
g = compute_grad(J, f)
|
||||
|
||||
jac_scale = isinstance(x_scale, str) and x_scale == 'jac'
|
||||
if jac_scale:
|
||||
scale, scale_inv = compute_jac_scale(J)
|
||||
else:
|
||||
scale, scale_inv = x_scale, 1 / x_scale
|
||||
|
||||
v, dv = CL_scaling_vector(x, g, lb, ub)
|
||||
v[dv != 0] *= scale_inv[dv != 0]
|
||||
Delta = norm(x0 * scale_inv / v**0.5)
|
||||
if Delta == 0:
|
||||
Delta = 1.0
|
||||
|
||||
g_norm = norm(g * v, ord=np.inf)
|
||||
|
||||
f_augmented = np.zeros(m + n)
|
||||
if tr_solver == 'exact':
|
||||
J_augmented = np.empty((m + n, n))
|
||||
elif tr_solver == 'lsmr':
|
||||
reg_term = 0.0
|
||||
regularize = tr_options.pop('regularize', True)
|
||||
|
||||
if max_nfev is None:
|
||||
max_nfev = x0.size * 100
|
||||
|
||||
alpha = 0.0 # "Levenberg-Marquardt" parameter
|
||||
|
||||
termination_status = None
|
||||
iteration = 0
|
||||
step_norm = None
|
||||
actual_reduction = None
|
||||
|
||||
if verbose == 2:
|
||||
print_header_nonlinear()
|
||||
|
||||
while True:
|
||||
v, dv = CL_scaling_vector(x, g, lb, ub)
|
||||
|
||||
g_norm = norm(g * v, ord=np.inf)
|
||||
if g_norm < gtol:
|
||||
termination_status = 1
|
||||
|
||||
if verbose == 2:
|
||||
print_iteration_nonlinear(iteration, nfev, cost, actual_reduction,
|
||||
step_norm, g_norm)
|
||||
|
||||
if termination_status is not None or nfev == max_nfev:
|
||||
break
|
||||
|
||||
# Now compute variables in "hat" space. Here, we also account for
|
||||
# scaling introduced by `x_scale` parameter. This part is a bit tricky,
|
||||
# you have to write down the formulas and see how the trust-region
|
||||
# problem is formulated when the two types of scaling are applied.
|
||||
# The idea is that first we apply `x_scale` and then apply Coleman-Li
|
||||
# approach in the new variables.
|
||||
|
||||
# v is recomputed in the variables after applying `x_scale`, note that
|
||||
# components which were identically 1 not affected.
|
||||
v[dv != 0] *= scale_inv[dv != 0]
|
||||
|
||||
# Here, we apply two types of scaling.
|
||||
d = v**0.5 * scale
|
||||
|
||||
# C = diag(g * scale) Jv
|
||||
diag_h = g * dv * scale
|
||||
|
||||
# After all this has been done, we continue normally.
|
||||
|
||||
# "hat" gradient.
|
||||
g_h = d * g
|
||||
|
||||
f_augmented[:m] = f
|
||||
if tr_solver == 'exact':
|
||||
J_augmented[:m] = J * d
|
||||
J_h = J_augmented[:m] # Memory view.
|
||||
J_augmented[m:] = np.diag(diag_h**0.5)
|
||||
U, s, V = svd(J_augmented, full_matrices=False)
|
||||
V = V.T
|
||||
uf = U.T.dot(f_augmented)
|
||||
elif tr_solver == 'lsmr':
|
||||
J_h = right_multiplied_operator(J, d)
|
||||
|
||||
if regularize:
|
||||
a, b = build_quadratic_1d(J_h, g_h, -g_h, diag=diag_h)
|
||||
to_tr = Delta / norm(g_h)
|
||||
ag_value = minimize_quadratic_1d(a, b, 0, to_tr)[1]
|
||||
reg_term = -ag_value / Delta**2
|
||||
|
||||
lsmr_op = regularized_lsq_operator(J_h, (diag_h + reg_term)**0.5)
|
||||
gn_h = lsmr(lsmr_op, f_augmented, **tr_options)[0]
|
||||
S = np.vstack((g_h, gn_h)).T
|
||||
S, _ = qr(S, mode='economic')
|
||||
JS = J_h.dot(S) # LinearOperator does dot too.
|
||||
B_S = np.dot(JS.T, JS) + np.dot(S.T * diag_h, S)
|
||||
g_S = S.T.dot(g_h)
|
||||
|
||||
# theta controls step back step ratio from the bounds.
|
||||
theta = max(0.995, 1 - g_norm)
|
||||
|
||||
actual_reduction = -1
|
||||
while actual_reduction <= 0 and nfev < max_nfev:
|
||||
if tr_solver == 'exact':
|
||||
p_h, alpha, n_iter = solve_lsq_trust_region(
|
||||
n, m, uf, s, V, Delta, initial_alpha=alpha)
|
||||
elif tr_solver == 'lsmr':
|
||||
p_S, _ = solve_trust_region_2d(B_S, g_S, Delta)
|
||||
p_h = S.dot(p_S)
|
||||
|
||||
p = d * p_h # Trust-region solution in the original space.
|
||||
step, step_h, predicted_reduction = select_step(
|
||||
x, J_h, diag_h, g_h, p, p_h, d, Delta, lb, ub, theta)
|
||||
|
||||
x_new = make_strictly_feasible(x + step, lb, ub, rstep=0)
|
||||
f_new = fun(x_new)
|
||||
nfev += 1
|
||||
|
||||
step_h_norm = norm(step_h)
|
||||
|
||||
if not np.all(np.isfinite(f_new)):
|
||||
Delta = 0.25 * step_h_norm
|
||||
continue
|
||||
|
||||
# Usual trust-region step quality estimation.
|
||||
if loss_function is not None:
|
||||
cost_new = loss_function(f_new, cost_only=True)
|
||||
else:
|
||||
cost_new = 0.5 * np.dot(f_new, f_new)
|
||||
actual_reduction = cost - cost_new
|
||||
Delta_new, ratio = update_tr_radius(
|
||||
Delta, actual_reduction, predicted_reduction,
|
||||
step_h_norm, step_h_norm > 0.95 * Delta)
|
||||
|
||||
step_norm = norm(step)
|
||||
termination_status = check_termination(
|
||||
actual_reduction, cost, step_norm, norm(x), ratio, ftol, xtol)
|
||||
if termination_status is not None:
|
||||
break
|
||||
|
||||
alpha *= Delta / Delta_new
|
||||
Delta = Delta_new
|
||||
|
||||
if actual_reduction > 0:
|
||||
x = x_new
|
||||
|
||||
f = f_new
|
||||
f_true = f.copy()
|
||||
|
||||
cost = cost_new
|
||||
|
||||
J = jac(x, f)
|
||||
njev += 1
|
||||
|
||||
if loss_function is not None:
|
||||
rho = loss_function(f)
|
||||
J, f = scale_for_robust_loss_function(J, f, rho)
|
||||
|
||||
g = compute_grad(J, f)
|
||||
|
||||
if jac_scale:
|
||||
scale, scale_inv = compute_jac_scale(J, scale_inv)
|
||||
else:
|
||||
step_norm = 0
|
||||
actual_reduction = 0
|
||||
|
||||
iteration += 1
|
||||
|
||||
if termination_status is None:
|
||||
termination_status = 0
|
||||
|
||||
active_mask = find_active_constraints(x, lb, ub, rtol=xtol)
|
||||
return OptimizeResult(
|
||||
x=x, cost=cost, fun=f_true, jac=J, grad=g, optimality=g_norm,
|
||||
active_mask=active_mask, nfev=nfev, njev=njev,
|
||||
status=termination_status)
|
||||
|
||||
|
||||
def trf_no_bounds(fun, jac, x0, f0, J0, ftol, xtol, gtol, max_nfev,
|
||||
x_scale, loss_function, tr_solver, tr_options, verbose):
|
||||
x = x0.copy()
|
||||
|
||||
f = f0
|
||||
f_true = f.copy()
|
||||
nfev = 1
|
||||
|
||||
J = J0
|
||||
njev = 1
|
||||
m, n = J.shape
|
||||
|
||||
if loss_function is not None:
|
||||
rho = loss_function(f)
|
||||
cost = 0.5 * np.sum(rho[0])
|
||||
J, f = scale_for_robust_loss_function(J, f, rho)
|
||||
else:
|
||||
cost = 0.5 * np.dot(f, f)
|
||||
|
||||
g = compute_grad(J, f)
|
||||
|
||||
jac_scale = isinstance(x_scale, str) and x_scale == 'jac'
|
||||
if jac_scale:
|
||||
scale, scale_inv = compute_jac_scale(J)
|
||||
else:
|
||||
scale, scale_inv = x_scale, 1 / x_scale
|
||||
|
||||
Delta = norm(x0 * scale_inv)
|
||||
if Delta == 0:
|
||||
Delta = 1.0
|
||||
|
||||
if tr_solver == 'lsmr':
|
||||
reg_term = 0
|
||||
damp = tr_options.pop('damp', 0.0)
|
||||
regularize = tr_options.pop('regularize', True)
|
||||
|
||||
if max_nfev is None:
|
||||
max_nfev = x0.size * 100
|
||||
|
||||
alpha = 0.0 # "Levenberg-Marquardt" parameter
|
||||
|
||||
termination_status = None
|
||||
iteration = 0
|
||||
step_norm = None
|
||||
actual_reduction = None
|
||||
|
||||
if verbose == 2:
|
||||
print_header_nonlinear()
|
||||
|
||||
while True:
|
||||
g_norm = norm(g, ord=np.inf)
|
||||
if g_norm < gtol:
|
||||
termination_status = 1
|
||||
|
||||
if verbose == 2:
|
||||
print_iteration_nonlinear(iteration, nfev, cost, actual_reduction,
|
||||
step_norm, g_norm)
|
||||
|
||||
if termination_status is not None or nfev == max_nfev:
|
||||
break
|
||||
|
||||
d = scale
|
||||
g_h = d * g
|
||||
|
||||
if tr_solver == 'exact':
|
||||
J_h = J * d
|
||||
U, s, V = svd(J_h, full_matrices=False)
|
||||
V = V.T
|
||||
uf = U.T.dot(f)
|
||||
elif tr_solver == 'lsmr':
|
||||
J_h = right_multiplied_operator(J, d)
|
||||
|
||||
if regularize:
|
||||
a, b = build_quadratic_1d(J_h, g_h, -g_h)
|
||||
to_tr = Delta / norm(g_h)
|
||||
ag_value = minimize_quadratic_1d(a, b, 0, to_tr)[1]
|
||||
reg_term = -ag_value / Delta**2
|
||||
|
||||
damp_full = (damp**2 + reg_term)**0.5
|
||||
gn_h = lsmr(J_h, f, damp=damp_full, **tr_options)[0]
|
||||
S = np.vstack((g_h, gn_h)).T
|
||||
S, _ = qr(S, mode='economic')
|
||||
JS = J_h.dot(S)
|
||||
B_S = np.dot(JS.T, JS)
|
||||
g_S = S.T.dot(g_h)
|
||||
|
||||
actual_reduction = -1
|
||||
while actual_reduction <= 0 and nfev < max_nfev:
|
||||
if tr_solver == 'exact':
|
||||
step_h, alpha, n_iter = solve_lsq_trust_region(
|
||||
n, m, uf, s, V, Delta, initial_alpha=alpha)
|
||||
elif tr_solver == 'lsmr':
|
||||
p_S, _ = solve_trust_region_2d(B_S, g_S, Delta)
|
||||
step_h = S.dot(p_S)
|
||||
|
||||
predicted_reduction = -evaluate_quadratic(J_h, g_h, step_h)
|
||||
step = d * step_h
|
||||
x_new = x + step
|
||||
f_new = fun(x_new)
|
||||
nfev += 1
|
||||
|
||||
step_h_norm = norm(step_h)
|
||||
|
||||
if not np.all(np.isfinite(f_new)):
|
||||
Delta = 0.25 * step_h_norm
|
||||
continue
|
||||
|
||||
# Usual trust-region step quality estimation.
|
||||
if loss_function is not None:
|
||||
cost_new = loss_function(f_new, cost_only=True)
|
||||
else:
|
||||
cost_new = 0.5 * np.dot(f_new, f_new)
|
||||
actual_reduction = cost - cost_new
|
||||
|
||||
Delta_new, ratio = update_tr_radius(
|
||||
Delta, actual_reduction, predicted_reduction,
|
||||
step_h_norm, step_h_norm > 0.95 * Delta)
|
||||
|
||||
step_norm = norm(step)
|
||||
termination_status = check_termination(
|
||||
actual_reduction, cost, step_norm, norm(x), ratio, ftol, xtol)
|
||||
if termination_status is not None:
|
||||
break
|
||||
|
||||
alpha *= Delta / Delta_new
|
||||
Delta = Delta_new
|
||||
|
||||
if actual_reduction > 0:
|
||||
x = x_new
|
||||
|
||||
f = f_new
|
||||
f_true = f.copy()
|
||||
|
||||
cost = cost_new
|
||||
|
||||
J = jac(x, f)
|
||||
njev += 1
|
||||
|
||||
if loss_function is not None:
|
||||
rho = loss_function(f)
|
||||
J, f = scale_for_robust_loss_function(J, f, rho)
|
||||
|
||||
g = compute_grad(J, f)
|
||||
|
||||
if jac_scale:
|
||||
scale, scale_inv = compute_jac_scale(J, scale_inv)
|
||||
else:
|
||||
step_norm = 0
|
||||
actual_reduction = 0
|
||||
|
||||
iteration += 1
|
||||
|
||||
if termination_status is None:
|
||||
termination_status = 0
|
||||
|
||||
active_mask = np.zeros_like(x)
|
||||
return OptimizeResult(
|
||||
x=x, cost=cost, fun=f_true, jac=J, grad=g, optimality=g_norm,
|
||||
active_mask=active_mask, nfev=nfev, njev=njev,
|
||||
status=termination_status)
|
||||
@ -0,0 +1,249 @@
|
||||
"""The adaptation of Trust Region Reflective algorithm for a linear
|
||||
least-squares problem."""
|
||||
import numpy as np
|
||||
from numpy.linalg import norm
|
||||
from scipy.linalg import qr, solve_triangular
|
||||
from scipy.sparse.linalg import lsmr
|
||||
from scipy.optimize import OptimizeResult
|
||||
|
||||
from .givens_elimination import givens_elimination
|
||||
from .common import (
|
||||
EPS, step_size_to_bound, find_active_constraints, in_bounds,
|
||||
make_strictly_feasible, build_quadratic_1d, evaluate_quadratic,
|
||||
minimize_quadratic_1d, CL_scaling_vector, reflective_transformation,
|
||||
print_header_linear, print_iteration_linear, compute_grad,
|
||||
regularized_lsq_operator, right_multiplied_operator)
|
||||
|
||||
|
||||
def regularized_lsq_with_qr(m, n, R, QTb, perm, diag, copy_R=True):
|
||||
"""Solve regularized least squares using information from QR-decomposition.
|
||||
|
||||
The initial problem is to solve the following system in a least-squares
|
||||
sense::
|
||||
|
||||
A x = b
|
||||
D x = 0
|
||||
|
||||
where D is diagonal matrix. The method is based on QR decomposition
|
||||
of the form A P = Q R, where P is a column permutation matrix, Q is an
|
||||
orthogonal matrix and R is an upper triangular matrix.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
m, n : int
|
||||
Initial shape of A.
|
||||
R : ndarray, shape (n, n)
|
||||
Upper triangular matrix from QR decomposition of A.
|
||||
QTb : ndarray, shape (n,)
|
||||
First n components of Q^T b.
|
||||
perm : ndarray, shape (n,)
|
||||
Array defining column permutation of A, such that ith column of
|
||||
P is perm[i]-th column of identity matrix.
|
||||
diag : ndarray, shape (n,)
|
||||
Array containing diagonal elements of D.
|
||||
|
||||
Returns
|
||||
-------
|
||||
x : ndarray, shape (n,)
|
||||
Found least-squares solution.
|
||||
"""
|
||||
if copy_R:
|
||||
R = R.copy()
|
||||
v = QTb.copy()
|
||||
|
||||
givens_elimination(R, v, diag[perm])
|
||||
|
||||
abs_diag_R = np.abs(np.diag(R))
|
||||
threshold = EPS * max(m, n) * np.max(abs_diag_R)
|
||||
nns, = np.nonzero(abs_diag_R > threshold)
|
||||
|
||||
R = R[np.ix_(nns, nns)]
|
||||
v = v[nns]
|
||||
|
||||
x = np.zeros(n)
|
||||
x[perm[nns]] = solve_triangular(R, v)
|
||||
|
||||
return x
|
||||
|
||||
|
||||
def backtracking(A, g, x, p, theta, p_dot_g, lb, ub):
|
||||
"""Find an appropriate step size using backtracking line search."""
|
||||
alpha = 1
|
||||
while True:
|
||||
x_new, _ = reflective_transformation(x + alpha * p, lb, ub)
|
||||
step = x_new - x
|
||||
cost_change = -evaluate_quadratic(A, g, step)
|
||||
if cost_change > -0.1 * alpha * p_dot_g:
|
||||
break
|
||||
alpha *= 0.5
|
||||
|
||||
active = find_active_constraints(x_new, lb, ub)
|
||||
if np.any(active != 0):
|
||||
x_new, _ = reflective_transformation(x + theta * alpha * p, lb, ub)
|
||||
x_new = make_strictly_feasible(x_new, lb, ub, rstep=0)
|
||||
step = x_new - x
|
||||
cost_change = -evaluate_quadratic(A, g, step)
|
||||
|
||||
return x, step, cost_change
|
||||
|
||||
|
||||
def select_step(x, A_h, g_h, c_h, p, p_h, d, lb, ub, theta):
|
||||
"""Select the best step according to Trust Region Reflective algorithm."""
|
||||
if in_bounds(x + p, lb, ub):
|
||||
return p
|
||||
|
||||
p_stride, hits = step_size_to_bound(x, p, lb, ub)
|
||||
r_h = np.copy(p_h)
|
||||
r_h[hits.astype(bool)] *= -1
|
||||
r = d * r_h
|
||||
|
||||
# Restrict step, such that it hits the bound.
|
||||
p *= p_stride
|
||||
p_h *= p_stride
|
||||
x_on_bound = x + p
|
||||
|
||||
# Find the step size along reflected direction.
|
||||
r_stride_u, _ = step_size_to_bound(x_on_bound, r, lb, ub)
|
||||
|
||||
# Stay interior.
|
||||
r_stride_l = (1 - theta) * r_stride_u
|
||||
r_stride_u *= theta
|
||||
|
||||
if r_stride_u > 0:
|
||||
a, b, c = build_quadratic_1d(A_h, g_h, r_h, s0=p_h, diag=c_h)
|
||||
r_stride, r_value = minimize_quadratic_1d(
|
||||
a, b, r_stride_l, r_stride_u, c=c)
|
||||
r_h = p_h + r_h * r_stride
|
||||
r = d * r_h
|
||||
else:
|
||||
r_value = np.inf
|
||||
|
||||
# Now correct p_h to make it strictly interior.
|
||||
p_h *= theta
|
||||
p *= theta
|
||||
p_value = evaluate_quadratic(A_h, g_h, p_h, diag=c_h)
|
||||
|
||||
ag_h = -g_h
|
||||
ag = d * ag_h
|
||||
ag_stride_u, _ = step_size_to_bound(x, ag, lb, ub)
|
||||
ag_stride_u *= theta
|
||||
a, b = build_quadratic_1d(A_h, g_h, ag_h, diag=c_h)
|
||||
ag_stride, ag_value = minimize_quadratic_1d(a, b, 0, ag_stride_u)
|
||||
ag *= ag_stride
|
||||
|
||||
if p_value < r_value and p_value < ag_value:
|
||||
return p
|
||||
elif r_value < p_value and r_value < ag_value:
|
||||
return r
|
||||
else:
|
||||
return ag
|
||||
|
||||
|
||||
def trf_linear(A, b, x_lsq, lb, ub, tol, lsq_solver, lsmr_tol,
|
||||
max_iter, verbose, *, lsmr_maxiter=None):
|
||||
m, n = A.shape
|
||||
x, _ = reflective_transformation(x_lsq, lb, ub)
|
||||
x = make_strictly_feasible(x, lb, ub, rstep=0.1)
|
||||
|
||||
if lsq_solver == 'exact':
|
||||
QT, R, perm = qr(A, mode='economic', pivoting=True)
|
||||
QT = QT.T
|
||||
|
||||
if m < n:
|
||||
R = np.vstack((R, np.zeros((n - m, n))))
|
||||
|
||||
QTr = np.zeros(n)
|
||||
k = min(m, n)
|
||||
elif lsq_solver == 'lsmr':
|
||||
r_aug = np.zeros(m + n)
|
||||
auto_lsmr_tol = False
|
||||
if lsmr_tol is None:
|
||||
lsmr_tol = 1e-2 * tol
|
||||
elif lsmr_tol == 'auto':
|
||||
auto_lsmr_tol = True
|
||||
|
||||
r = A.dot(x) - b
|
||||
g = compute_grad(A, r)
|
||||
cost = 0.5 * np.dot(r, r)
|
||||
initial_cost = cost
|
||||
|
||||
termination_status = None
|
||||
step_norm = None
|
||||
cost_change = None
|
||||
|
||||
if max_iter is None:
|
||||
max_iter = 100
|
||||
|
||||
if verbose == 2:
|
||||
print_header_linear()
|
||||
|
||||
for iteration in range(max_iter):
|
||||
v, dv = CL_scaling_vector(x, g, lb, ub)
|
||||
g_scaled = g * v
|
||||
g_norm = norm(g_scaled, ord=np.inf)
|
||||
if g_norm < tol:
|
||||
termination_status = 1
|
||||
|
||||
if verbose == 2:
|
||||
print_iteration_linear(iteration, cost, cost_change,
|
||||
step_norm, g_norm)
|
||||
|
||||
if termination_status is not None:
|
||||
break
|
||||
|
||||
diag_h = g * dv
|
||||
diag_root_h = diag_h ** 0.5
|
||||
d = v ** 0.5
|
||||
g_h = d * g
|
||||
|
||||
A_h = right_multiplied_operator(A, d)
|
||||
if lsq_solver == 'exact':
|
||||
QTr[:k] = QT.dot(r)
|
||||
p_h = -regularized_lsq_with_qr(m, n, R * d[perm], QTr, perm,
|
||||
diag_root_h, copy_R=False)
|
||||
elif lsq_solver == 'lsmr':
|
||||
lsmr_op = regularized_lsq_operator(A_h, diag_root_h)
|
||||
r_aug[:m] = r
|
||||
if auto_lsmr_tol:
|
||||
eta = 1e-2 * min(0.5, g_norm)
|
||||
lsmr_tol = max(EPS, min(0.1, eta * g_norm))
|
||||
p_h = -lsmr(lsmr_op, r_aug, maxiter=lsmr_maxiter,
|
||||
atol=lsmr_tol, btol=lsmr_tol)[0]
|
||||
|
||||
p = d * p_h
|
||||
|
||||
p_dot_g = np.dot(p, g)
|
||||
if p_dot_g > 0:
|
||||
termination_status = -1
|
||||
|
||||
theta = 1 - min(0.005, g_norm)
|
||||
step = select_step(x, A_h, g_h, diag_h, p, p_h, d, lb, ub, theta)
|
||||
cost_change = -evaluate_quadratic(A, g, step)
|
||||
|
||||
# Perhaps almost never executed, the idea is that `p` is descent
|
||||
# direction thus we must find acceptable cost decrease using simple
|
||||
# "backtracking", otherwise the algorithm's logic would break.
|
||||
if cost_change < 0:
|
||||
x, step, cost_change = backtracking(
|
||||
A, g, x, p, theta, p_dot_g, lb, ub)
|
||||
else:
|
||||
x = make_strictly_feasible(x + step, lb, ub, rstep=0)
|
||||
|
||||
step_norm = norm(step)
|
||||
r = A.dot(x) - b
|
||||
g = compute_grad(A, r)
|
||||
|
||||
if cost_change < tol * cost:
|
||||
termination_status = 2
|
||||
|
||||
cost = 0.5 * np.dot(r, r)
|
||||
|
||||
if termination_status is None:
|
||||
termination_status = 0
|
||||
|
||||
active_mask = find_active_constraints(x, lb, ub, rtol=tol)
|
||||
|
||||
return OptimizeResult(
|
||||
x=x, fun=r, cost=cost, optimality=g_norm, active_mask=active_mask,
|
||||
nit=iteration + 1, status=termination_status,
|
||||
initial_cost=initial_cost)
|
||||
Reference in New Issue
Block a user