1import math
2import types
3import typing as typ
4import warnings
5
6import pyxu.abc as pxa
7import pyxu.info.ptype as pxt
8import pyxu.operator as pxo
9import pyxu.opt.stop as pxst
10
11__all__ = [
12 *("CondatVu", "CV"),
13 "PD3O",
14 *("ChambollePock", "CP"),
15 *("LorisVerhoeven", "LV"),
16 *("DavisYin", "DY"),
17 *("DouglasRachford", "DR"),
18 "ADMM",
19 *("ForwardBackward", "FB"),
20 *("ProximalPoint", "PP"),
21]
22
23
24class _PrimalDualSplitting(pxa.Solver):
25 r"""
26 Base class for Primal-Dual Splitting (PDS) solvers.
27 """
28 TuningSpec = typ.Literal[1, 2, 3] #: valid tuning_parameter values
29
30 def __init__(
31 self,
32 f: typ.Optional[pxa.DiffFunc] = None,
33 g: typ.Optional[pxa.ProxFunc] = None,
34 h: typ.Optional[pxa.ProxFunc] = None,
35 K: typ.Optional[pxa.DiffMap] = None,
36 **kwargs,
37 ):
38 kwargs.update(log_var=kwargs.get("log_var", ("x", "z")))
39 super().__init__(**kwargs)
40 if (f is None) and (g is None) and (h is None):
41 msg = " ".join(
42 [
43 "Cannot minimize always-0 functional.",
44 "At least one of Parameter[f, g, h] must be specified.",
45 ]
46 )
47 raise ValueError(msg)
48
49 if f is not None:
50 primal_dim_shape = f.dim_shape
51 elif g is not None:
52 primal_dim_shape = g.dim_shape
53 else:
54 primal_dim_shape = h.dim_shape
55
56 if h is not None:
57 dual_dim_shape = h.dim_shape
58 elif K is not None: # todo: isn't this elif-clause useless? For which solver is it triggered?
59 dual_dim_shape = K.codim_shape
60 else:
61 dual_dim_shape = primal_dim_shape
62
63 self._f = pxo.NullFunc(dim_shape=primal_dim_shape) if (f is None) else f
64 self._g = pxo.NullFunc(dim_shape=primal_dim_shape) if (g is None) else g
65 self._h = pxo.NullFunc(dim_shape=dual_dim_shape) if (h is None) else h
66 if h is not None:
67 self._K = pxo.IdentityOp(dim_shape=h.dim_shape) if (K is None) else K
68 else:
69 if K is None:
70 K_dim_shape = f.dim_shape if f is not None else g.dim_shape
71 self._K = pxo.NullOp(dim_shape=K_dim_shape, codim_shape=K_dim_shape)
72 else:
73 raise ValueError("Optional argument ``h`` mut be specified if ``K`` is not None.")
74 self._objective_func = self._f + self._g + (self._h * self._K)
75
76 def m_init(
77 self,
78 x0: pxt.NDArray,
79 z0: typ.Optional[pxt.NDArray] = None,
80 tau: typ.Optional[pxt.Real] = None,
81 sigma: typ.Optional[pxt.Real] = None,
82 rho: typ.Optional[pxt.Real] = None,
83 beta: typ.Optional[pxt.Real] = None,
84 tuning_strategy: TuningSpec = 1,
85 ):
86 mst = self._mstate # shorthand
87 mst["x"] = x0
88 mst["z"] = self._set_dual_variable(z0)
89 self._tuning_strategy = int(tuning_strategy)
90 self._beta = self._set_beta(beta)
91 gamma = self._set_gamma(tuning_strategy)
92 mst["tau"], mst["sigma"], delta = self._set_step_sizes(tau, sigma, gamma)
93 mst["rho"] = self._set_momentum_term(rho, delta)
94
95 def m_step(self):
96 raise NotImplementedError
97
98 def default_stop_crit(self) -> pxa.StoppingCriterion:
99 stop_crit_x = pxst.RelError(
100 eps=1e-4,
101 var="x",
102 f=None,
103 norm=2,
104 satisfy_all=True,
105 )
106 stop_crit_z = pxst.RelError(
107 eps=1e-4,
108 var="z",
109 f=None,
110 norm=2,
111 satisfy_all=True,
112 )
113 return stop_crit_x & stop_crit_z if self._h._name != "NullFunc" else stop_crit_x
114
115 def solution(self, which: typ.Literal["primal", "dual"] = "primal") -> pxt.NDArray:
116 data, _ = self.stats()
117 if which == "primal":
118 assert "x" in data.keys(), "Primal variable x was not logged (declare it in log_var to log it)."
119 elif which == "dual":
120 assert "z" in data.keys(), "Dual variable z was not logged (declare it in log_var to log it)."
121 else:
122 raise ValueError(f"Parameter which must be one of ['primal', 'dual'] got: {which}.")
123 return data.get("x") if which == "primal" else data.get("z")
124
125 def objective_func(self) -> pxt.NDArray:
126 return self._objective_func(self._mstate["x"])
127
128 def _set_beta(self, beta: typ.Optional[pxt.Real]) -> pxt.Real:
129 r"""
130 Sets the Lipschitz constant.
131
132 Returns
133 -------
134 beta: Real
135 Lipschitz constant.
136 """
137 if beta is None:
138 if math.isfinite(dl := self._f.diff_lipschitz):
139 return dl
140 else:
141 msg = "beta: automatic inference not supported for operators with unbounded Lipschitz gradients."
142 raise ValueError(msg)
143 else:
144 return beta
145
146 def _set_dual_variable(self, z: typ.Optional[pxt.NDArray]) -> pxt.NDArray:
147 r"""
148 Initialize the dual variable if it is :py:obj:`None` by mapping the primal variable through the operator K.
149
150 Returns
151 -------
152 NDArray
153 Initialized dual variable.
154 """
155 if z is None:
156 return self._K(self._mstate["x"].copy())
157 else:
158 return z
159
160 def _set_gamma(self, tuning_strategy: typ.Literal[1, 2, 3]) -> pxt.Real:
161 r"""
162 Sets the gamma parameter according to the tuning strategy.
163
164 Returns
165 -------
166 gamma: Real
167 Gamma parameter.
168 """
169 return self._beta if tuning_strategy != 2 else self._beta / 1.9
170
171 def _set_step_sizes(
172 self,
173 tau: typ.Optional[pxt.Real],
174 sigma: typ.Optional[pxt.Real],
175 gamma: pxt.Real,
176 ) -> tuple[pxt.Real, pxt.Real, pxt.Real]:
177 raise NotImplementedError
178
179 def _set_momentum_term(
180 self,
181 rho: typ.Optional[pxt.Real],
182 delta: pxt.Real,
183 ) -> pxt.Real:
184 r"""
185 Sets the momentum term according to Theorem 8.2 in [PSA]_.
186
187 Returns
188 -------
189 pho: Real
190 Momentum term.
191
192 Notes
193 -----
194 The :math:`O(1/\sqrt(k))` objective functional convergence rate of (Theorem 1 of [dPSA]_) is for `\rho=1`.
195 """
196 if rho is None:
197 rho = 1.0 if self._tuning_strategy != 3 else delta - 0.1
198 else:
199 assert rho <= delta, f"Parameter rho must be smaller than delta: {rho} > {delta}."
200 return rho
201
202
203_PDS = _PrimalDualSplitting #: shorthand
204
205
[docs]
206class CondatVu(_PrimalDualSplitting):
207 r"""
208 Condat-Vu primal-dual splitting algorithm.
209
210 This solver is also accessible via the alias :py:class:`~pyxu.opt.solver.CV`.
211
212 The *Condat-Vu (CV)* primal-dual method is described in [CVS]_. (This particular implementation is based on the
213 pseudo-code Algorithm 7.1 provided in [FuncSphere]_ Chapter 7, Section1.)
214
215 It can be used to solve problems of the form:
216
217 .. math::
218
219 {\min_{\mathbf{x}\in\mathbb{R}^N} \;\mathcal{F}(\mathbf{x})\;\;+\;\;\mathcal{G}(\mathbf{x})\;\;+\;\;\mathcal{H}(\mathcal{K} \mathbf{x})},
220
221 where:
222
223 * :math:`\mathcal{F}:\mathbb{R}^N\rightarrow \mathbb{R}` is *convex* and *differentiable*, with
224 :math:`\beta`-*Lipschitz continuous* gradient, for some :math:`\beta\in[0,+\infty[`.
225
226 * :math:`\mathcal{G}:\mathbb{R}^N\rightarrow \mathbb{R}\cup\{+\infty\}` and
227 :math:`\mathcal{H}:\mathbb{R}^M\rightarrow \mathbb{R}\cup\{+\infty\}` are *proper*, *lower semicontinuous* and
228 *convex functions* with *simple proximal operators*.
229
230 * :math:`\mathcal{K}:\mathbb{R}^N\rightarrow \mathbb{R}^M` is a *differentiable map* (e.g. a *linear operator*
231 :math:`\mathbf{K}`), with **operator norm**:
232
233 .. math::
234
235 \Vert{\mathcal{K}}\Vert_2=\sup_{\mathbf{x}\in\mathbb{R}^N,\Vert\mathbf{x}\Vert_2=1} \Vert\mathcal{K}(\mathbf{x})\Vert_2.
236
237 Remarks
238 -------
239 * The problem is *feasible*, i.e. there exists at least one solution.
240
241 * The algorithm is still valid if one or more of the terms :math:`\mathcal{F}`, :math:`\mathcal{G}` or
242 :math:`\mathcal{H}` is zero.
243
244 * The algorithm has convergence guarantees when :math:`\mathcal{H}` is composed with a *linear operator*
245 :math:`\mathbf{K}`. When :math:`\mathcal{F}=0`, convergence can be proven for *non-linear differentiable maps*
246 :math:`\mathcal{K}` (see [NLCP]_). Note that :py:class:`~pyxu.opt.solver.CondatVu` does not yet support automatic
247 selection of hyperparameters for the case of *non-linear differentiable maps* :math:`\mathcal{K}`.
248
249 * Assume that either of the following holds:
250
251 * :math:`\beta>0` and:
252
253 - :math:`\gamma \geq \frac{\beta}{2}`,
254 - :math:`\frac{1}{\tau}-\sigma\Vert\mathbf{K}\Vert_{2}^2\geq \gamma`,
255 - :math:`\rho \in ]0,\delta[`, where :math:`\delta:=2-\frac{\beta}{2}\gamma^{-1}\in[1,2[` (:math:`\delta=2` is
256 possible when :math:`\mathcal{F}` is *quadratic* and :math:`\gamma \geq \beta`, see [PSA]_).
257
258 * :math:`\beta=0` and:
259
260 - :math:`\tau\sigma\Vert\mathbf{K}\Vert_{2}^2\leq 1`,
261 - :math:`\rho \in ]0,2[`.
262
263 Then there exists a pair :math:`(\mathbf{x}^\star,\mathbf{z}^\star)\in\mathbb{R}^N\times \mathbb{R}^M` solution
264 s.t. the primal and dual sequences of estimates :math:`(\mathbf{x}_n)_{n\in\mathbb{N}}` and
265 :math:`(\mathbf{z}_n)_{n\in\mathbb{N}}` *converge* towards :math:`\mathbf{x}^\star` and :math:`\mathbf{z}^\star`
266 respectively, i.e.
267
268 .. math::
269
270 \lim_{n\rightarrow +\infty}\Vert\mathbf{x}^\star-\mathbf{x}_n\Vert_2=0, \quad \text{and}
271 \quad
272 \lim_{n\rightarrow +\infty}\Vert\mathbf{z}^\star-\mathbf{z}_n\Vert_2=0.
273
274 Parameters (``__init__()``)
275 ---------------------------
276 * **f** (:py:class:`~pyxu.abc.DiffFunc`, :py:obj:`None`)
277 --
278 Differentiable function :math:`\mathcal{F}`.
279 * **g** (:py:class:`~pyxu.abc.ProxFunc`, :py:obj:`None`)
280 --
281 Proximable function :math:`\mathcal{G}`.
282 * **h** (:py:class:`~pyxu.abc.ProxFunc`, :py:obj:`None`)
283 --
284 Proximable function :math:`\mathcal{H}`.
285 * **K** (:py:class:`~pyxu.abc.DiffMap`, :py:class:`~pyxu.abc.LinOp`, :py:obj:`None`)
286 --
287 Differentiable map or linear operator :math:`\mathcal{K}`.
288 * **\*\*kwargs** (:py:class:`~collections.abc.Mapping`)
289 --
290 Other keyword parameters passed on to :py:meth:`pyxu.abc.Solver.__init__`.
291
292 Parameters (``fit()``)
293 ----------------------
294 * **x0** (:py:attr:`~pyxu.info.ptype.NDArray`)
295 --
296 (..., N) initial point(s) for the primal variable.
297 * **z0** (:py:attr:`~pyxu.info.ptype.NDArray`, :py:obj:`None`)
298 --
299 (..., M) initial point(s) for the dual variable.
300 If ``None`` (default), then use ``K(x0)`` as the initial point for the dual variable.
301 * **tau** (:py:attr:`~pyxu.info.ptype.Real`, :py:obj:`None`)
302 --
303 Primal step size.
304 * **sigma** (:py:attr:`~pyxu.info.ptype.Real`, :py:obj:`None`)
305 --
306 Dual step size.
307 * **rho** (:py:attr:`~pyxu.info.ptype.Real`, :py:obj:`None`)
308 --
309 Momentum parameter.
310 * **beta** (:py:attr:`~pyxu.info.ptype.Real`, :py:obj:`None`)
311 --
312 Lipschitz constant :math:`\beta` of :math:`\nabla\mathcal{F}`.
313 If not provided, it will be automatically estimated.
314 * **tuning_strategy** (1, 2, 3)
315 --
316 Strategy to be employed when setting the hyperparameters (default to 1).
317 See section below for more details.
318 * **\*\*kwargs** (:py:class:`~collections.abc.Mapping`)
319 --
320 Other keyword parameters passed on to :py:meth:`pyxu.abc.Solver.fit`.
321
322 .. rubric:: Default hyperparameter values
323
324 This class supports three strategies to automatically set the hyperparameters (see [PSA]_ for more details and
325 numerical experiments comparing the performance of the three strategies):
326
327 - ``tuning_strategy == 1``: :math:`\gamma = \beta` (safe step sizes) and :math:`\rho=1` (no relaxation).
328
329 This is the most standard way of setting the parameters in the literature.
330 - ``tuning_strategy == 2``: :math:`\gamma = \beta/1.9` (large step sizes) and :math:`\rho=1` (no relaxation).
331
332 This strategy favours large step sizes forbidding the use of overrelaxation. When :math:`\beta=0`, coincides with
333 the first strategy.
334 - ``tuning_strategy == 3``: :math:`\gamma = \beta` (safe step sizes) and :math:`\rho=\delta - 0.1 > 1` (overrelaxation).
335
336 This strategy chooses smaller step sizes, but performs overrelaxation.
337
338 Once :math:`\gamma` chosen, the convergence speed of the algorithm is improved by choosing :math:`\sigma` and
339 :math:`\tau` as large as possible and relatively well-balanced -- so that both the primal and dual variables
340 converge at the same pace. Whenever possible, we therefore choose perfectly balanced parameters :math:`\sigma=\tau`
341 saturating the convergence inequalities for a given value of :math:`\gamma`.
342
343 * For :math:`\beta>0` and :math:`\mathcal{H}\neq 0` this yields:
344
345 .. math::
346
347 \frac{1}{\tau}-\tau\Vert\mathbf{K}\Vert_{2}^2= \gamma \quad\Longleftrightarrow\quad -\tau^2\Vert\mathbf{K}\Vert_{2}^2-\gamma\tau+1=0,
348
349 which admits one positive root
350
351 .. math::
352
353 \tau=\sigma=\frac{1}{\Vert\mathbf{K}\Vert_{2}^2}\left(-\frac{\gamma}{2}+\sqrt{\frac{\gamma^2}{4}+\Vert\mathbf{K}\Vert_{2}^2}\right).
354
355 * For :math:`\beta>0` and :math:`\mathcal{H}=0` this yields: :math:`\tau=1/\gamma.`
356
357 * For :math:`\beta=0` this yields: :math:`\tau=\sigma=\Vert\mathbf{K}\Vert_{2}^{-1}`.
358
359 When :math:`\tau` is provided (:math:`\tau = \tau_{1}`), but not :math:`\sigma`, the latter is chosen as:
360
361 .. math::
362
363 \frac{1}{\tau_{1}}-\sigma\Vert\mathbf{K}\Vert_{2}^2= \gamma \quad\Longleftrightarrow\quad \sigma=\left(\frac{1}{\tau_{1}}-\gamma\right)\frac{1}{\Vert\mathbf{K}\Vert_{2}^2}.
364
365 When :math:`\sigma` is provided (:math:`\sigma = \sigma_{1}`), but not :math:`\tau`, the latter is chosen as:
366
367 .. math::
368
369 \frac{1}{\tau}-\sigma_{1}\Vert\mathbf{K}\Vert_{2}^2= \gamma \quad\Longleftrightarrow\quad \tau=\frac{1}{\left(\gamma+\sigma_{1}\Vert\mathbf{K}\Vert_{2}^2\right)}.
370
371 Warning
372 -------
373 When values are provided for both :math:`\tau` and :math:`\sigma`, it is assumed that the latter satisfy the
374 convergence inequalities, but no check is explicitly performed. Automatic selection of hyperparameters for the case
375 of non-linear differentiable maps :math:`\mathcal{K}` is not supported yet.
376
377 Example
378 -------
379 Consider the following optimisation problem:
380
381 .. math::
382
383 \min_{\mathbf{x}\in\mathbb{R}_+^N}\frac{1}{2}\left\|\mathbf{y}-\mathbf{G}\mathbf{x}\right\|_2^2\quad+\quad\lambda_1 \|\mathbf{D}\mathbf{x}\|_1\quad+\quad\lambda_2 \|\mathbf{x}\|_1,
384
385 with :math:`\mathbf{D}\in\mathbb{R}^{N\times N}` the discrete derivative operator and
386 :math:`\mathbf{G}\in\mathbb{R}^{L\times N}, \, \mathbf{y}\in\mathbb{R}^L, \lambda_1,\lambda_2>0.` This problem can
387 be solved via PDS with :math:`\mathcal{F}(\mathbf{x})=
388 \frac{1}{2}\left\|\mathbf{y}-\mathbf{G}\mathbf{x}\right\|_2^2`,
389 :math:`\mathcal{G}(\mathbf{x})=\lambda_2\|\mathbf{x}\|_1,` :math:`\mathcal{H}(\mathbf{x})=\lambda \|\mathbf{x}\|_1`
390 and :math:`\mathbf{K}=\mathbf{D}`.
391
392 .. plot::
393
394 import matplotlib.pyplot as plt
395 import numpy as np
396 import pyxu.operator as pxo
397 from pyxu.operator import SubSample, PartialDerivative
398 from pyxu.opt.solver import CV
399
400 x = np.repeat(np.asarray([0, 2, 1, 3, 0, 2, 0]), 10)
401 N = x.size
402
403 D = PartialDerivative.finite_difference(dim_shape=(N,), order=(1,))
404
405 downsample = SubSample(N, slice(None, None, 3))
406 y = downsample(x)
407 loss = (1 / 2) * pxo.SquaredL2Norm(y.size).argshift(-y)
408 F = loss * downsample
409
410 cv = CV(f=F, g=0.01 * pxo.L1Norm(N), h=0.1 * pxo.L1Norm((N)), K=D)
411 x0, z0 = np.zeros((2, N))
412 cv.fit(x0=x0, z0=z0)
413 x_recons = cv.solution()
414
415 plt.figure()
416 plt.stem(x, linefmt="C0-", markerfmt="C0o")
417 mask_ids = np.where(downsample.adjoint(np.ones_like(y)))[0]
418 markerline, stemlines, baseline = plt.stem(mask_ids, y, linefmt="C3-", markerfmt="C3o")
419 markerline.set_markerfacecolor("none")
420 plt.stem(x_recons, linefmt="C1--", markerfmt="C1s")
421 plt.legend(["Ground truth", "Observation", "CV Estimate"])
422 plt.show()
423
424 See Also
425 --------
426 :py:class:`~pyxu.opt.solver.CV`,
427 :py:class:`~pyxu.opt.solver.PD3O`,
428 :py:class:`~pyxu.opt.solver.ChambollePock`,
429 :py:class:`~pyxu.opt.solver.DouglasRachford`
430 """
431
432 def m_step(self):
433 mst = self._mstate
434 x_temp = self._g.prox(
435 mst["x"] - mst["tau"] * self._f.grad(mst["x"]) - mst["tau"] * self._K.jacobian(mst["x"]).adjoint(mst["z"]),
436 tau=mst["tau"],
437 )
438 if not self._h._name == "NullFunc":
439 u = 2 * x_temp - mst["x"]
440 z_temp = self._h.fenchel_prox(
441 mst["z"] + mst["sigma"] * self._K(u),
442 sigma=mst["sigma"],
443 )
444 mst["z"] = mst["rho"] * z_temp + (1 - mst["rho"]) * mst["z"]
445 mst["x"] = mst["rho"] * x_temp + (1 - mst["rho"]) * mst["x"]
446
447 def _set_step_sizes(
448 self,
449 tau: typ.Optional[pxt.Real],
450 sigma: typ.Optional[pxt.Real],
451 gamma: typ.Optional[pxt.Real],
452 ) -> tuple[pxt.Real, pxt.Real, pxt.Real]:
453 r"""
454 Set the primal/dual step sizes.
455
456 Returns
457 -------
458 Sensible primal/dual step sizes and value of the parameter :math:`delta`.
459 """
460 # todo: this method in general is quite dirty.
461 if not issubclass(self._K.__class__, pxa.LinOp):
462 # todo: wrong! Must raise only if tau/sigma = None
463 msg = (
464 f"Automatic selection of parameters is only supported in the case in which K is a linear operator. "
465 f"Got operator of type {self._K.__class__}."
466 )
467 raise ValueError(msg)
468 tau = None if tau == 0 else tau
469 sigma = None if sigma == 0 else sigma
470
471 if (tau is not None) and (sigma is None):
472 assert tau > 0, f"Parameter tau must be positive, got {tau}."
473 if self._h._name == "NullFunc":
474 assert tau <= 1 / gamma, f"Parameter tau must be smaller than 1/gamma: {tau} > {1 / gamma}."
475 sigma = 0
476 else:
477 if math.isfinite(self._K.lipschitz):
478 sigma = ((1 / tau) - gamma) * (1 / self._K.lipschitz**2)
479 else:
480 msg = "Please compute the Lipschitz constant of the linear operator K by calling its method 'estimate_lipschitz()'"
481 raise ValueError(msg)
482 elif (tau is None) and (sigma is not None):
483 assert sigma > 0
484 if self._h._name == "NullFunc":
485 tau = 1 / gamma
486 else:
487 if math.isfinite(self._K.lipschitz):
488 tau = 1 / (gamma + (sigma * self._K.lipschitz**2))
489 else:
490 msg = "Please compute the Lipschitz constant of the linear operator K by calling its method 'estimate_lipschitz()'"
491 raise ValueError(msg)
492 elif (tau is None) and (sigma is None):
493 if self._beta > 0:
494 if self._h._name == "NullFunc":
495 tau = 1 / gamma
496 sigma = 0
497 else:
498 if math.isfinite(self._K.lipschitz):
499 tau = sigma = (1 / (self._K.lipschitz) ** 2) * (
500 (-gamma / 2) + math.sqrt((gamma**2 / 4) + self._K.lipschitz**2)
501 )
502 else:
503 msg = "Please compute the Lipschitz constant of the linear operator K by calling its method 'estimate_lipschitz()'"
504 raise ValueError(msg)
505 else:
506 if self._h._name == "NullFunc":
507 tau = 1
508 sigma = 0
509 else:
510 if math.isfinite(self._K.lipschitz):
511 tau = sigma = 1 / self._K.lipschitz
512 else:
513 msg = "Please compute the Lipschitz constant of the linear operator K by calling its method 'estimate_lipschitz()'"
514 raise ValueError(msg)
515 delta = (
516 2
517 if (self._beta == 0 or (isinstance(self._f, pxa.QuadraticFunc) and gamma <= self._beta))
518 else 2 - self._beta / (2 * gamma)
519 )
520 return tau, sigma, delta
521
522
523CV = CondatVu #: Alias of :py:class:`~pyxu.opt.solver.CondatVu`.
524
525
[docs]
526class PD3O(_PrimalDualSplitting):
527 r"""
528 Primal-Dual Three-Operator Splitting (PD3O) algorithm.
529
530 The *Primal-Dual three Operator splitting (PD3O)* method is described in [PD3O]_.
531
532 It can be used to solve problems of the form:
533
534 .. math::
535
536 {\min_{\mathbf{x}\in\mathbb{R}^N} \;\Psi(\mathbf{x}):=\mathcal{F}(\mathbf{x})\;\;+\;\;\mathcal{G}(\mathbf{x})\;\;+\;\;\mathcal{H}(\mathcal{K} \mathbf{x}),}
537
538 where:
539
540 * :math:`\mathcal{F}:\mathbb{R}^N\rightarrow \mathbb{R}` is *convex* and *differentiable*, with
541 :math:`\beta`-*Lipschitz continuous* gradient, for some :math:`\beta\in[0,+\infty[`.
542
543 * :math:`\mathcal{G}:\mathbb{R}^N\rightarrow \mathbb{R}\cup\{+\infty\}` and
544 :math:`\mathcal{H}:\mathbb{R}^M\rightarrow \mathbb{R}\cup\{+\infty\}` are *proper*, *lower semicontinuous* and
545 *convex functions* with *simple proximal operators*.
546
547 * :math:`\mathcal{K}:\mathbb{R}^N\rightarrow \mathbb{R}^M` is a *differentiable map* (e.g. a *linear operator*
548 :math:`\mathbf{K}`), with **operator norm**:
549
550 .. math::
551
552 \Vert{\mathcal{K}}\Vert_2=\sup_{\mathbf{x}\in\mathbb{R}^N,\Vert\mathbf{x}\Vert_2=1} \Vert\mathcal{K}(\mathbf{x})\Vert_2.
553
554 Remarks
555 -------
556 * The problem is *feasible* -- i.e. there exists at least one solution.
557
558 * The algorithm is still valid if one or more of the terms :math:`\mathcal{F}`, :math:`\mathcal{G}` or
559 :math:`\mathcal{H}` is zero.
560
561 * The algorithm has convergence guarantees for the case in which :math:`\mathcal{H}` is composed with a *linear
562 operator* :math:`\mathbf{K}`. When :math:`\mathcal{F}=0`, convergence can be proven for *non-linear
563 differentiable maps* :math:`\mathcal{K}`. (See [NLCP]_.) Note that this class does not yet support automatic
564 selection of hyperparameters for the case of *non-linear differentiable maps* :math:`\mathcal{K}`.
565
566 * Assume that the following holds:
567
568 * :math:`\gamma\geq\frac{\beta}{2}`,
569 * :math:`\tau \in ]0, \frac{1}{\gamma}[`,
570 * :math:`\tau\sigma\Vert\mathbf{K}\Vert_{2}^2 \leq 1`,
571 * :math:`\delta = 2-\beta\tau/2 \in [1, 2[` and :math:`\rho \in (0, \delta]`.
572
573 Then there exists a pair :math:`(\mathbf{x}^\star,\mathbf{z}^\star)\in\mathbb{R}^N\times \mathbb{R}^M` solution
574 s.t. the primal and dual sequences of estimates :math:`(\mathbf{x}_n)_{n\in\mathbb{N}}` and
575 :math:`(\mathbf{z}_n)_{n\in\mathbb{N}}` *converge* towards :math:`\mathbf{x}^\star` and :math:`\mathbf{z}^\star`
576 respectively (Theorem 8.2 of [PSA]_), i.e.
577
578 .. math::
579
580 \lim_{n\rightarrow +\infty}\Vert\mathbf{x}^\star-\mathbf{x}_n\Vert_2=0, \quad \text{and} \quad \lim_{n\rightarrow +\infty}\Vert\mathbf{z}^\star-\mathbf{z}_n\Vert_2=0.
581
582 Futhermore, when :math:`\rho=1`, the objective functional sequence
583 :math:`\left(\Psi(\mathbf{x}_n)\right)_{n\in\mathbb{N}}` can be shown to converge towards its minimum
584 :math:`\Psi^\ast` with rate :math:`o(1/\sqrt{n})` (Theorem 1 of [dPSA]_):
585
586 .. math::
587
588 \Psi(\mathbf{x}_n) - \Psi^\ast = o(1/\sqrt{n}).
589
590 Parameters (``__init__()``)
591 ---------------------------
592 * **f** (:py:class:`~pyxu.abc.DiffFunc`, :py:obj:`None`)
593 --
594 Differentiable function :math:`\mathcal{F}`.
595 * **g** (:py:class:`~pyxu.abc.ProxFunc`, :py:obj:`None`)
596 --
597 Proximable function :math:`\mathcal{G}`.
598 * **h** (:py:class:`~pyxu.abc.ProxFunc`, :py:obj:`None`)
599 --
600 Proximable function :math:`\mathcal{H}`.
601 * **K** (:py:class:`~pyxu.abc.DiffMap`, :py:class:`~pyxu.abc.LinOp`, :py:obj:`None`)
602 --
603 Differentiable map or linear operator :math:`\mathcal{K}`.
604 * **\*\*kwargs** (:py:class:`~collections.abc.Mapping`)
605 --
606 Other keyword parameters passed on to :py:meth:`pyxu.abc.Solver.__init__`.
607
608 Parameters (``fit()``)
609 ----------------------
610 * **x0** (:py:attr:`~pyxu.info.ptype.NDArray`)
611 --
612 (..., N) initial point(s) for the primal variable.
613 * **z0** (:py:attr:`~pyxu.info.ptype.NDArray`, :py:obj:`None`)
614 --
615 (..., M) initial point(s) for the dual variable.
616 If ``None`` (default), then use ``K(x0)`` as the initial point for the dual variable.
617 * **tau** (:py:attr:`~pyxu.info.ptype.Real`, :py:obj:`None`)
618 --
619 Primal step size.
620 * **sigma** (:py:attr:`~pyxu.info.ptype.Real`, :py:obj:`None`)
621 --
622 Dual step size.
623 * **rho** (:py:attr:`~pyxu.info.ptype.Real`, :py:obj:`None`)
624 --
625 Momentum parameter.
626 * **beta** (:py:attr:`~pyxu.info.ptype.Real`, :py:obj:`None`)
627 --
628 Lipschitz constant :math:`\beta` of :math:`\nabla\mathcal{F}`.
629 If not provided, it will be automatically estimated.
630 * **tuning_strategy** (1, 2, 3)
631 --
632 Strategy to be employed when setting the hyperparameters (default to 1).
633 See section below for more details.
634 * **\*\*kwargs** (:py:class:`~collections.abc.Mapping`)
635 --
636 Other keyword parameters passed on to :py:meth:`pyxu.abc.Solver.fit`.
637
638 .. rubric:: Default hyperparameter values
639
640 This class supports three strategies to automatically set the hyperparameters (see [PSA]_ for more details and
641 numerical experiments comparing the performance of the three strategies):
642
643 - ``tuning_strategy == 1``: :math:`\gamma = \beta` (safe step sizes) and :math:`\rho=1` (no relaxation).
644
645 This is the most standard way of setting the parameters in the literature.
646 - ``tuning_strategy == 2``: :math:`\gamma = \beta/1.9` (large step sizes) and :math:`\rho=1` (no relaxation).
647
648 This strategy favours large step sizes forbidding the use of overrelaxation. When :math:`\beta=0`, coincides with
649 the first strategy.
650 - ``tuning_strategy == 3``: :math:`\gamma = \beta` (safe step sizes) and :math:`\rho=\delta - 0.1 > 1` (overrelaxation).
651
652 This strategy chooses smaller step sizes, but performs overrelaxation.
653
654 Once :math:`\gamma` chosen, the convergence speed of the algorithm is improved by choosing :math:`\sigma` and
655 :math:`\tau` as large as possible and relatively well-balanced -- so that both the primal and dual variables
656 converge at the same pace. Whenever possible, we therefore choose perfectly balanced parameters :math:`\sigma=\tau`
657 saturating the convergence inequalities for a given value of :math:`\gamma`.
658
659 In practice, the following linear programming optimization problem is solved:
660
661 .. math::
662
663 (\tau, \, \sigma) = \operatorname{arg} \max_{(\tau^{*}, \, \sigma^{*})} \quad & \operatorname{log}(\tau^{*}) + \operatorname{log}(\sigma^{*})\\
664 \text{s.t.} \quad & \operatorname{log}(\tau^{*}) + \operatorname{log}(\sigma^{*}) \leq 2\operatorname{log}(\Vert\mathbf{K}\Vert_{2})\\
665 & \operatorname{log}(\tau^{*}) \leq -\operatorname{log}(\gamma)\\
666 & \operatorname{log}(\tau^{*}) = \operatorname{log}(\sigma^{*}).
667
668 When :math:`\tau \leq 1/\gamma` is given (i.e., :math:`\tau=\tau_{1}`), but not :math:`\sigma`, the latter is chosen
669 as:
670
671 .. math::
672
673 \tau_{1}\sigma\Vert\mathbf{K}\Vert_{2}^2= 1 \quad\Longleftrightarrow\quad \sigma=\frac{1}{\tau_{1}\Vert\mathbf{K}\Vert_{2}^{2}}.
674
675 When :math:`\sigma` is given (i.e., :math:`\sigma=\sigma_{1}`), but not :math:`\tau`, the latter is chosen as:
676
677 .. math::
678
679 \tau = \min \left\{\frac{1}{\gamma}, \frac{1}{\sigma_{1}\Vert\mathbf{K}\Vert_{2}^{2}}\right\}.
680
681 Warning
682 -------
683 When values are provided for both :math:`\tau` and :math:`\sigma`, it is assumed that the latter satisfy the
684 convergence inequalities, but no check is explicitly performed. Automatic selection of hyperparameters for the case
685 of non-linear differentiable maps :math:`\mathcal{K}` is not supported yet.
686
687 Example
688 -------
689 Consider the following optimisation problem:
690
691 .. math::
692
693 \min_{\mathbf{x}\in\mathbb{R}_+^N}\frac{1}{2}\left\|\mathbf{y}-\mathbf{G}\mathbf{x}\right\|_2^2\quad+\quad\lambda_1 \|\mathbf{D}\mathbf{x}\|_1\quad+\quad\lambda_2 \|\mathbf{x}\|_1,
694
695 with :math:`\mathbf{D}\in\mathbb{R}^{N\times N}` the discrete derivative operator and
696 :math:`\mathbf{G}\in\mathbb{R}^{L\times N}, \, \mathbf{y}\in\mathbb{R}^L, \lambda_1,\lambda_2>0.` This problem can
697 be solved via PD3O with :math:`\mathcal{F}(\mathbf{x})=
698 \frac{1}{2}\left\|\mathbf{y}-\mathbf{G}\mathbf{x}\right\|_2^2`,
699 :math:`\mathcal{G}(\mathbf{x})=\lambda_2\|\mathbf{x}\|_1,` :math:`\mathcal{H}(\mathbf{x})=\lambda \|\mathbf{x}\|_1`
700 and :math:`\mathbf{K}=\mathbf{D}`.
701
702 .. plot::
703
704 import matplotlib.pyplot as plt
705 import numpy as np
706 import pyxu.operator as pxo
707 from pyxu.operator import SubSample, PartialDerivative
708 from pyxu.opt.solver import PD3O
709
710 x = np.repeat(np.asarray([0, 2, 1, 3, 0, 2, 0]), 10)
711 N = x.size
712
713 D = PartialDerivative.finite_difference(dim_shape=(N,), order=(1,))
714
715 downsample = SubSample(N, slice(None, None, 3))
716 y = downsample(x)
717 loss = (1 / 2) * pxo.SquaredL2Norm(y.size).argshift(-y)
718 F = loss * downsample
719
720 pd3o = PD3O(f=F, g=0.01 * pxo.L1Norm(N), h=0.1 * pxo.L1Norm((N)), K=D)
721 x0, z0 = np.zeros((2, N))
722 pd3o.fit(x0=x0, z0=z0)
723 x_recons = pd3o.solution()
724
725 plt.figure()
726 plt.stem(x, linefmt="C0-", markerfmt="C0o")
727 mask_ids = np.where(downsample.adjoint(np.ones_like(y)))[0]
728 markerline, stemlines, baseline = plt.stem(mask_ids, y, linefmt="C3-", markerfmt="C3o")
729 markerline.set_markerfacecolor("none")
730 plt.stem(x_recons, linefmt="C1--", markerfmt="C1s")
731 plt.legend(["Ground truth", "Observation", "PD3O Estimate"])
732 plt.show()
733 """
734
735 def m_init(
736 self,
737 x0: pxt.NDArray,
738 z0: typ.Optional[pxt.NDArray] = None,
739 tau: typ.Optional[pxt.Real] = None,
740 sigma: typ.Optional[pxt.Real] = None,
741 rho: typ.Optional[pxt.Real] = None,
742 tuning_strategy: _PDS.TuningSpec = 1,
743 ):
744 super().m_init(
745 x0=x0,
746 z0=z0,
747 tau=tau,
748 sigma=sigma,
749 rho=rho,
750 tuning_strategy=tuning_strategy,
751 )
752
753 # if x0 == u0 the first step wouldn't change x and the solver would stop at the first iteration
754 if self._g._name == self._h._name == "NullFunc":
755 self._mstate["u"] = x0 * 1.01
756 else:
757 self._mstate["u"] = x0.copy()
758
759 def m_step(self):
760 # Slightly more efficient rewriting of iterations (216) of [PSA] with M=1. Faster than (185) since only one call to the adjoint and the gradient per iteration.
761 mst = self._mstate
762 mst["x"] = self._g.prox(
763 mst["u"] - mst["tau"] * self._K.jacobian(mst["u"]).adjoint(mst["z"]),
764 tau=mst["tau"],
765 )
766 u_temp = mst["x"] - mst["tau"] * self._f.grad(mst["x"])
767 if not self._h._name == "NullFunc":
768 z_temp = self._h.fenchel_prox(
769 mst["z"] + mst["sigma"] * self._K(mst["x"] + u_temp - mst["u"]),
770 sigma=mst["sigma"],
771 )
772 mst["z"] = (1 - mst["rho"]) * mst["z"] + mst["rho"] * z_temp
773 mst["u"] = (1 - mst["rho"]) * mst["u"] + mst["rho"] * u_temp
774
775 def _set_step_sizes(
776 self,
777 tau: typ.Optional[pxt.Real],
778 sigma: typ.Optional[pxt.Real],
779 gamma: pxt.Real,
780 ) -> tuple[pxt.Real, pxt.Real, pxt.Real]:
781 r"""
782 Set the primal/dual step sizes.
783
784 Returns
785 -------
786 Tuple[Real, Real, Real]
787 Sensible primal/dual step sizes and value of :math:`\delta`.
788 """
789
790 if not issubclass(self._K.__class__, pxa.LinOp):
791 msg = (
792 f"Automatic selection of parameters is only supported in the case in which K is a linear operator. "
793 f"Got operator of type {self._K.__class__}."
794 )
795 raise ValueError(msg)
796 tau = None if tau == 0 else tau
797 sigma = None if sigma == 0 else sigma
798
799 if (tau is not None) and (sigma is None):
800 assert 0 < tau <= 1 / gamma, "tau must be positive and smaller than 1/gamma."
801 if self._h._name == "NullFunc":
802 sigma = 0
803 else:
804 if math.isfinite(self._K.lipschitz):
805 sigma = 1 / (tau * self._K.lipschitz**2)
806 else:
807 msg = "Please compute the Lipschitz constant of the linear operator K by calling its method 'estimate_lipschitz()'"
808 raise ValueError(msg)
809 elif (tau is None) and (sigma is not None):
810 assert sigma > 0, f"sigma must be positive, got {sigma}."
811 if self._h._name == "NullFunc":
812 tau = 1 / gamma
813 else:
814 if math.isfinite(self._K.lipschitz):
815 tau = min(1 / (sigma * self._K.lipschitz**2), 1 / gamma)
816 else:
817 msg = "Please compute the Lipschitz constant of the linear operator K by calling its method 'estimate_lipschitz()'"
818 raise ValueError(msg)
819 elif (tau is None) and (sigma is None):
820 if self._beta > 0:
821 if self._h._name == "NullFunc":
822 tau = 1 / gamma
823 sigma = 0
824 else:
825 if math.isfinite(self._K.lipschitz):
826 tau, sigma = self._optimize_step_sizes(gamma)
827 else:
828 msg = "Please compute the Lipschitz constant of the linear operator K by calling its method 'estimate_lipschitz()'"
829 raise ValueError(msg)
830 else:
831 if self._h._name == "NullFunc":
832 tau = 1
833 sigma = 0
834 else:
835 if math.isfinite(self._K.lipschitz):
836 tau = sigma = 1 / self._K.lipschitz
837 else:
838 msg = "Please compute the Lipschitz constant of the linear operator K by calling its method 'estimate_lipschitz()'"
839 raise ValueError(msg)
840 delta = 2 if self._beta == 0 else 2 - self._beta * tau / 2
841 return tau, sigma, delta
842
843 def _optimize_step_sizes(self, gamma: pxt.Real) -> pxt.Real:
844 r"""
845 Optimize the primal/dual step sizes.
846
847 Parameters
848 ----------
849 gamma: Real
850 Gamma parameter.
851
852 Returns
853 -------
854 Tuple[Real, Real]
855 Sensible primal/dual step sizes.
856 """
857 import numpy as np
858 from scipy.optimize import linprog
859
860 c = np.array([-1, -1])
861 A_ub = np.array([[1, 1], [1, 0]])
862 b_ub = np.array([np.log(0.99) - 2 * np.log(self._K.lipschitz), np.log(1 / gamma)])
863 A_eq = np.array([[1, -1]])
864 b_eq = np.array([0])
865 result = linprog(
866 c=c,
867 A_ub=A_ub,
868 b_ub=b_ub,
869 A_eq=A_eq,
870 b_eq=b_eq,
871 bounds=(None, None),
872 )
873 if not result.success:
874 warnings.warn("Automatic parameter selection has not converged.", UserWarning)
875 return np.exp(result.x)
876
877
[docs]
878def ChambollePock(
879 g: typ.Optional[pxa.ProxFunc] = None,
880 h: typ.Optional[pxa.ProxFunc] = None,
881 K: typ.Optional[pxa.DiffMap] = None,
882 base: typ.Type[_PrimalDualSplitting] = CondatVu,
883 **kwargs,
884):
885 r"""
886 Chambolle-Pock primal-dual splitting method.
887
888 Parameters
889 ----------
890 g: :py:class:`~pyxu.abc.ProxFunc`, None
891 Proximable function :math:`\mathcal{G}`.
892 h: :py:class:`~pyxu.abc.ProxFunc`, None
893 Proximable function :math:`\mathcal{H}`.
894 K: :py:class:`~pyxu.abc.DiffMap`, :py:class:`~pyxu.abc.LinOp`, None
895 Differentiable map or linear operator :math:`\mathcal{K}`.
896 base: :py:class:`~pyxu.opt.solver.CondatVu`, :py:class:`~pyxu.opt.solver.PD3O`
897 Specifies the base primal-dual algorithm.
898 (Default = :py:class:`~pyxu.opt.solver.CondatVu`)
899 \*\*kwargs: ~collections.abc.Mapping
900 Other keyword parameters passed on to :py:meth:`pyxu.abc.Solver.__init__`.
901
902
903 The *Chambolle-Pock (CP) primal-dual splitting* method can be used to solve problems of the form:
904
905 .. math::
906
907 {\min_{\mathbf{x}\in\mathbb{R}^N} \mathcal{G}(\mathbf{x})\;\;+\;\;\mathcal{H}(\mathbf{K} \mathbf{x}),}
908
909 where:
910
911 * :math:`\mathcal{G}:\mathbb{R}^N\rightarrow \mathbb{R}\cup\{+\infty\}` and
912 :math:`\mathcal{H}:\mathbb{R}^M\rightarrow \mathbb{R}\cup\{+\infty\}` are *proper*, *lower semicontinuous* and
913 *convex functions* with *simple proximal operators*.
914 * :math:`\mathcal{K}:\mathbb{R}^N\rightarrow \mathbb{R}^M` is a *differentiable map* (e.g. a *linear operator*
915 :math:`\mathbf{K}`), with **operator norm**:
916
917 .. math::
918
919 \Vert{\mathcal{K}}\Vert_2=\sup_{\mathbf{x}\in\mathbb{R}^N,\Vert\mathbf{x}\Vert_2=1} \Vert\mathcal{K}(\mathbf{x})\Vert_2.
920
921 Remarks
922 -------
923 * The problem is *feasible*, i.e. there exists at least one solution.
924
925 * The algorithm is still valid if one of the terms :math:`\mathcal{G}` or :math:`\mathcal{H}` is zero.
926
927 * Automatic selection of parameters is not supported for *non-linear differentiable maps* :math:`\mathcal{K}`.
928
929 * The *Chambolle-Pock (CP) primal-dual splitting* method can be obtained by choosing :math:`\mathcal{F}=0` in
930 :py:class:`~pyxu.opt.solver.CondatVu` or :py:class:`~pyxu.opt.solver.PD3O`. Chambolle and Pock originally
931 introduced the algorithm without relaxation (:math:`\rho=1`) [CPA]_. Relaxed versions have been proposed
932 afterwards [PSA]_. Chambolle-Pock's algorithm is also known as the *Primal-Dual Hybrid Gradient (PDHG)*
933 algorithm. It can be seen as a preconditionned ADMM method [CPA]_.
934
935 Parameters (``fit()``)
936 ----------------------
937 * **x0** (:py:attr:`~pyxu.info.ptype.NDArray`)
938 --
939 (..., N) initial point(s) for the primal variable.
940 * **z0** (:py:attr:`~pyxu.info.ptype.NDArray`, :py:obj:`None`)
941 --
942 (..., M) initial point(s) for the dual variable.
943 If ``None`` (default), then use ``K(x0)`` as the initial point for the dual variable.
944 * **tau** (:py:attr:`~pyxu.info.ptype.Real`, :py:obj:`None`)
945 --
946 Primal step size.
947 * **sigma** (:py:attr:`~pyxu.info.ptype.Real`, :py:obj:`None`)
948 --
949 Dual step size.
950 * **rho** (:py:attr:`~pyxu.info.ptype.Real`, :py:obj:`None`)
951 --
952 Momentum parameter.
953 * **tuning_strategy** (1, 2, 3)
954 --
955 Strategy to be employed when setting the hyperparameters (default to 1).
956 See `base` for more details.
957 * **\*\*kwargs** (:py:class:`~collections.abc.Mapping`)
958 --
959 Other keyword parameters passed on to :py:meth:`pyxu.abc.Solver.fit`.
960
961 See Also
962 --------
963 :py:func:`~pyxu.opt.solver.CP`,
964 :py:class:`~pyxu.opt.solver.CondatVu`,
965 :py:class:`~pyxu.opt.solver.PD3O`,
966 :py:func:`~pyxu.opt.solver.DouglasRachford`
967 """
968 kwargs.update(log_var=kwargs.get("log_var", ("x", "z")))
969 obj = base(
970 f=None,
971 g=g,
972 h=h,
973 K=K,
974 **kwargs,
975 )
976 obj.__repr__ = lambda _: "ChambollePock"
977 return obj
978
979
980CP = ChambollePock #: Alias of :py:func:`~pyxu.opt.solver.ChambollePock`.
981
982
[docs]
983class LorisVerhoeven(PD3O):
984 r"""
985 Loris-Verhoeven splitting algorithm.
986
987 This solver is also accessible via the alias :py:class:`~pyxu.opt.solver.LV`.
988
989 The *Loris-Verhoeven (LV) primal-dual splitting* can be used to solve problems of the form:
990
991 .. math::
992
993 {\min_{\mathbf{x}\in\mathbb{R}^N} \mathcal{F}(\mathbf{x})\;\;+\;\;\mathcal{H}(\mathbf{K} \mathbf{x}),}
994
995 where:
996
997 * :math:`\mathcal{F}:\mathbb{R}^N\rightarrow \mathbb{R}` is *convex* and *differentiable*, with
998 :math:`\beta`-*Lipschitz continuous* gradient, for some :math:`\beta\in[0,+\infty[`.
999
1000 * :math:`\mathcal{H}:\mathbb{R}^M\rightarrow \mathbb{R}\cup\{+\infty\}` is a *proper*, *lower semicontinuous* and
1001 *convex function* with *simple proximal operator*.
1002
1003 * :math:`\mathcal{K}:\mathbb{R}^N\rightarrow \mathbb{R}^M` is a *differentiable map* (e.g. a *linear operator*
1004 :math:`\mathbf{K}`), with **operator norm**:
1005
1006 .. math::
1007
1008 \Vert{\mathcal{K}}\Vert_2=\sup_{\mathbf{x}\in\mathbb{R}^N,\Vert\mathbf{x}\Vert_2=1} \Vert\mathcal{K}(\mathbf{x})\Vert_2.
1009
1010 Remarks
1011 -------
1012 * The problem is *feasible*, i.e. there exists at least one solution.
1013
1014 * The algorithm is still valid if one of the terms :math:`\mathcal{F}` or :math:`\mathcal{H}` is zero.
1015
1016 * Automatic selection of parameters is not supported for *non-linear differentiable maps* :math:`\mathcal{K}`.
1017
1018 * The *Loris-Verhoeven (CP) primal-dual splitting* method can be obtained by choosing :math:`\mathcal{G}=0` in
1019 :py:class:`~pyxu.opt.solver.PD3O`.
1020
1021 * In the specific case where :math:`\mathcal{F}` is *quadratic*, then one can set :math:`\rho \in ]0,\delta[` with
1022 :math:`\delta=2`. (See Theorem 4.3 in [PSA]_.)
1023
1024 Parameters (``__init__()``)
1025 ---------------------------
1026 * **f** (:py:class:`~pyxu.abc.DiffFunc`, :py:obj:`None`)
1027 --
1028 Differentiable function :math:`\mathcal{F}`.
1029 * **h** (:py:class:`~pyxu.abc.ProxFunc`, :py:obj:`None`)
1030 --
1031 Proximable function :math:`\mathcal{H}`.
1032 * **K** (:py:class:`~pyxu.abc.DiffMap`, :py:class:`~pyxu.abc.LinOp`, :py:obj:`None`)
1033 --
1034 Differentiable map or linear operator :math:`\mathcal{K}`.
1035 * **beta** (:py:attr:`~pyxu.info.ptype.Real`, :py:obj:`None`)
1036 --
1037 Lipschitz constant :math:`\beta` of :math:`\nabla\mathcal{F}`.
1038 If not provided, it will be automatically estimated.
1039 * **\*\*kwargs** (:py:class:`~collections.abc.Mapping`)
1040 --
1041 Other keyword parameters passed on to :py:meth:`pyxu.abc.Solver.__init__`.
1042
1043 Parameters (``fit()``)
1044 ----------------------
1045 * **x0** (:py:attr:`~pyxu.info.ptype.NDArray`)
1046 --
1047 (..., N) initial point(s) for the primal variable.
1048 * **z0** (:py:attr:`~pyxu.info.ptype.NDArray`, :py:obj:`None`)
1049 --
1050 (..., M) initial point(s) for the dual variable.
1051 If ``None`` (default), then use ``K(x0)`` as the initial point for the dual variable.
1052 * **tau** (:py:attr:`~pyxu.info.ptype.Real`, :py:obj:`None`)
1053 --
1054 Primal step size.
1055 * **sigma** (:py:attr:`~pyxu.info.ptype.Real`, :py:obj:`None`)
1056 --
1057 Dual step size.
1058 * **rho** (:py:attr:`~pyxu.info.ptype.Real`, :py:obj:`None`)
1059 --
1060 Momentum parameter.
1061 * **tuning_strategy** (1, 2, 3)
1062 --
1063 Strategy to be employed when setting the hyperparameters (default to 1).
1064 See :py:class:`~pyxu.opt.solver.PD3O` for more details.
1065 * **\*\*kwargs** (:py:class:`~collections.abc.Mapping`)
1066 --
1067 Other keyword parameters passed on to :py:meth:`pyxu.abc.Solver.fit`.
1068
1069 See Also
1070 --------
1071 :py:class:`~pyxu.opt.solver.PD3O`,
1072 :py:class:`~pyxu.opt.solver.DavisYin`,
1073 :py:class:`~pyxu.opt.solver.PGD`,
1074 :py:func:`~pyxu.opt.solver.ChambollePock`,
1075 :py:func:`~pyxu.opt.solver.DouglasRachford`
1076 """
1077
1078 def __init__(
1079 self,
1080 f: typ.Optional[pxa.DiffFunc] = None,
1081 h: typ.Optional[pxa.ProxFunc] = None,
1082 K: typ.Optional[pxa.DiffMap] = None,
1083 **kwargs,
1084 ):
1085 kwargs.update(log_var=kwargs.get("log_var", ("x", "z")))
1086 super().__init__(
1087 f=f,
1088 g=None,
1089 h=h,
1090 K=K,
1091 **kwargs,
1092 )
1093
1094 def _set_step_sizes(
1095 self,
1096 tau: typ.Optional[pxt.Real],
1097 sigma: typ.Optional[pxt.Real],
1098 gamma: typ.Optional[pxt.Real],
1099 ) -> tuple[pxt.Real, pxt.Real, pxt.Real]:
1100 r"""
1101 Set the primal/dual step sizes.
1102
1103 Returns
1104 -------
1105 Tuple[Real, Real, Real]
1106 Sensible primal/dual step sizes and value of the parameter :math:`delta`.
1107 """
1108 tau, sigma, _ = super()._set_step_sizes(tau=tau, sigma=sigma, gamma=gamma)
1109 delta = 2 if (self._beta == 0 or isinstance(self._f, pxa.QuadraticFunc)) else 2 - self._beta / (2 * gamma)
1110 return tau, sigma, delta
1111
1112
1113LV = LorisVerhoeven #: Alias of :py:class:`~pyxu.opt.solver.LorisVerhoeven`.
1114
1115
[docs]
1116class DavisYin(PD3O):
1117 r"""
1118 Davis-Yin primal-dual splitting method.
1119
1120 This solver is also accessible via the alias :py:class:`~pyxu.opt.solver.DY`.
1121
1122 The *Davis-Yin (DY) primal-dual splitting* method can be used to solve problems of the form:
1123
1124 .. math::
1125
1126 {\min_{\mathbf{x}\in\mathbb{R}^N} \;\mathcal{F}(\mathbf{x})\;\;+\;\;\mathcal{G}(\mathbf{x})\;\;+\;\;\mathcal{H}(\mathbf{x}),}
1127
1128 where:
1129
1130 * :math:`\mathcal{F}:\mathbb{R}^N\rightarrow \mathbb{R}` is *convex* and *differentiable*, with
1131 :math:`\beta`-*Lipschitz continuous* gradient, for some :math:`\beta\in[0,+\infty[`.
1132
1133 * :math:`\mathcal{G}:\mathbb{R}^N\rightarrow \mathbb{R}\cup\{+\infty\}` and
1134 :math:`\mathcal{H}:\mathbb{R}^N\rightarrow \mathbb{R}\cup\{+\infty\}` are *proper*, *lower semicontinuous* and
1135 *convex functions* with *simple proximal operators*.
1136
1137 Remarks
1138 -------
1139 * The problem is *feasible*, i.e. there exists at least one solution.
1140
1141 * The algorithm is still valid if one of the terms :math:`\mathcal{G}` or :math:`\mathcal{H}` is zero.
1142
1143 * The *Davis-Yin primal-dual splitting* method can be obtained by choosing :math:`\mathcal{K}=\mathbf{I}` (identity)
1144 and :math:`\tau=1/\sigma` in :py:class:`~pyxu.opt.solver.PD3O`, provided a suitable change of variable [PSA]_.
1145
1146 Parameters (``__init__()``)
1147 ---------------------------
1148 * **f** (:py:class:`~pyxu.abc.DiffFunc`)
1149 --
1150 Differentiable function :math:`\mathcal{F}`.
1151 * **g** (:py:class:`~pyxu.abc.ProxFunc`, :py:obj:`None`)
1152 --
1153 Proximable function :math:`\mathcal{G}`.
1154 * **h** (:py:class:`~pyxu.abc.ProxFunc`, :py:obj:`None`)
1155 --
1156 Proximable function :math:`\mathcal{H}`.
1157 * **beta** (:py:attr:`~pyxu.info.ptype.Real`, :py:obj:`None`)
1158 --
1159 Lipschitz constant :math:`\beta` of :math:`\nabla\mathcal{F}`.
1160 If not provided, it will be automatically estimated.
1161 * **\*\*kwargs** (:py:class:`~collections.abc.Mapping`)
1162 --
1163 Other keyword parameters passed on to :py:meth:`pyxu.abc.Solver.__init__`.
1164
1165 Parameters (``fit()``)
1166 ----------------------
1167 * **x0** (:py:attr:`~pyxu.info.ptype.NDArray`)
1168 --
1169 (..., N) initial point(s) for the primal variable.
1170 * **z0** (:py:attr:`~pyxu.info.ptype.NDArray`, :py:obj:`None`)
1171 --
1172 (..., N) initial point(s) for the dual variable.
1173 If ``None`` (default), then use ``x0`` as the initial point for the dual variable.
1174 * **tau** (:py:attr:`~pyxu.info.ptype.Real`, :py:obj:`None`)
1175 --
1176 Primal step size.
1177 * **sigma** (:py:attr:`~pyxu.info.ptype.Real`, :py:obj:`None`)
1178 --
1179 Dual step size.
1180 * **rho** (:py:attr:`~pyxu.info.ptype.Real`, :py:obj:`None`)
1181 --
1182 Momentum parameter.
1183 * **tuning_strategy** (1, 2, 3)
1184 --
1185 Strategy to be employed when setting the hyperparameters (default to 1).
1186 See :py:class:`~pyxu.opt.solver.PD3O` for more details.
1187 * **\*\*kwargs** (:py:class:`~collections.abc.Mapping`)
1188 --
1189 Other keyword parameters passed on to :py:meth:`pyxu.abc.Solver.fit`.
1190
1191 See Also
1192 --------
1193 :py:class:`~pyxu.opt.solver.PD3O`,
1194 :py:class:`~pyxu.opt.solver.LorisVerhoeven`,
1195 :py:class:`~pyxu.opt.solver.PGD`,
1196 :py:func:`~pyxu.opt.solver.ChambollePock`,
1197 :py:func:`~pyxu.opt.solver.DouglasRachford`
1198 """
1199
1200 def __init__(
1201 self,
1202 f: typ.Optional[pxa.DiffFunc],
1203 g: typ.Optional[pxa.ProxFunc] = None,
1204 h: typ.Optional[pxa.ProxFunc] = None,
1205 **kwargs,
1206 ):
1207 kwargs.update(log_var=kwargs.get("log_var", ("x", "z")))
1208 super().__init__(
1209 f=f,
1210 g=g,
1211 h=h,
1212 K=None,
1213 **kwargs,
1214 )
1215
1216 def _set_step_sizes(
1217 self,
1218 tau: typ.Optional[pxt.Real],
1219 sigma: typ.Optional[pxt.Real],
1220 gamma: pxt.Real,
1221 ) -> tuple[pxt.Real, pxt.Real, pxt.Real]:
1222 r"""
1223 Set the primal/dual step sizes.
1224
1225 Returns
1226 -------
1227 Tuple[Real, Real, Real]
1228 Sensible primal/dual step sizes and value of :math:`\delta`.
1229 """
1230 if tau is not None:
1231 assert 0 < tau <= 1 / gamma, "tau must be positive and smaller than 1/gamma."
1232 else:
1233 tau = 1.0 if self._beta == 0 else 1 / gamma
1234
1235 delta = 2.0 if self._beta == 0 else 2 - self._beta * tau / 2
1236
1237 return tau, 1 / tau, delta
1238
1239
1240DY = DavisYin #: Alias of :py:class:`~pyxu.opt.solver.DavisYin`.
1241
1242
[docs]
1243def DouglasRachford(
1244 g: typ.Optional[pxa.ProxFunc] = None,
1245 h: typ.Optional[pxa.ProxFunc] = None,
1246 base: typ.Type[_PrimalDualSplitting] = CondatVu,
1247 **kwargs,
1248):
1249 r"""
1250 Douglas-Rachford splitting algorithm.
1251
1252 Parameters
1253 ----------
1254 g: :py:class:`~pyxu.abc.ProxFunc`, None
1255 Proximable function :math:`\mathcal{G}`.
1256 h: :py:class:`~pyxu.abc.ProxFunc`, None
1257 Proximable function :math:`\mathcal{H}`.
1258 base: :py:class:`~pyxu.opt.solver.CondatVu`, :py:class:`~pyxu.opt.solver.PD3O`
1259 Specifies the base primal-dual algorithm.
1260 (Default = :py:class:`~pyxu.opt.solver.CondatVu`)
1261 \*\*kwargs: ~collections.abc.Mapping
1262 Other keyword parameters passed on to :py:meth:`pyxu.abc.Solver.__init__`.
1263
1264
1265 The *Douglas-Rachford (DR) primal-dual splitting* method can be used to solve problems of the form:
1266
1267 .. math::
1268
1269 {\min_{\mathbf{x}\in\mathbb{R}^N} \mathcal{G}(\mathbf{x})\;\;+\;\;\mathcal{H}(\mathbf{x}),}
1270
1271 where :math:`\mathcal{G}:\mathbb{R}^N\rightarrow \mathbb{R}\cup\{+\infty\}` and
1272 :math:`\mathcal{H}:\mathbb{R}^N\rightarrow \mathbb{R}\cup\{+\infty\}` are *proper*, *lower semicontinuous* and
1273 *convex functions* with *simple proximal operators*.
1274
1275 Remarks
1276 -------
1277 * The problem is *feasible*, i.e. there exists at least one solution.
1278
1279 * The algorithm is still valid if one of the terms :math:`\mathcal{G}` or :math:`\mathcal{H}` is zero.
1280
1281 * The *Douglas-Rachford (DR) primal-dual splitting* method can be obtained by choosing :math:`\mathcal{F}=0`,
1282 :math:`\mathbf{K}=\mathbf{Id}` and :math:`\tau=1/\sigma` in :py:class:`~pyxu.opt.solver.CondatVu` or
1283 :py:class:`~pyxu.opt.solver.PD3O`. Douglas and Rachford originally introduced the algorithm without relaxation
1284 (:math:`\rho=1`), but relaxed versions have been proposed afterwards [PSA]_. When :math:`\rho=1`,
1285 Douglas-Rachford's algorithm is *functionally equivalent* to ADMM (up to a change of variable, see [PSA]_ for a
1286 derivation).
1287
1288 Parameters (``fit()``)
1289 ----------------------
1290 * **x0** (:py:attr:`~pyxu.info.ptype.NDArray`)
1291 --
1292 (..., N) initial point(s) for the primal variable.
1293 * **z0** (:py:attr:`~pyxu.info.ptype.NDArray`, :py:obj:`None`)
1294 --
1295 (..., N) initial point(s) for the dual variable.
1296 If ``None`` (default), then use ``x0`` as the initial point for the dual variable.
1297 * **tau** (:py:attr:`~pyxu.info.ptype.Real`, :py:obj:`None`)
1298 --
1299 Primal step size. Defaults to 1.
1300 * **\*\*kwargs** (:py:class:`~collections.abc.Mapping`)
1301 --
1302 Other keyword parameters passed on to :py:meth:`pyxu.abc.Solver.fit`.
1303
1304 See Also
1305 --------
1306 :py:class:`~pyxu.opt.solver.CondatVu`,
1307 :py:class:`~pyxu.opt.solver.PD3O`,
1308 :py:func:`~pyxu.opt.solver.ChambollePock`,
1309 :py:func:`~pyxu.opt.solver.ForwardBackward`
1310 """
1311 kwargs.update(log_var=kwargs.get("log_var", ("x", "z")))
1312 obj = base(f=None, g=g, h=h, K=None, **kwargs)
1313 obj.__repr__ = lambda _: "DouglasRachford"
1314
1315 def _set_step_sizes_custom(
1316 obj: typ.Type[_PrimalDualSplitting],
1317 tau: typ.Optional[pxt.Real],
1318 sigma: typ.Optional[pxt.Real],
1319 gamma: pxt.Real,
1320 ) -> tuple[pxt.Real, pxt.Real, pxt.Real]:
1321 tau = 1.0 if tau is None else tau
1322 delta = 2.0
1323 return tau, 1 / tau, delta
1324
1325 obj._set_step_sizes = types.MethodType(_set_step_sizes_custom, obj)
1326 return obj
1327
1328
1329DR = DouglasRachford #: Alias of :py:func:`~pyxu.opt.solver.DouglasRachford`.
1330
1331
[docs]
1332class ADMM(_PDS):
1333 r"""
1334 Alternating Direction Method of Multipliers.
1335
1336 The *Alternating Direction Method of Multipliers (ADMM)* can be used to solve problems of the form:
1337
1338 .. math::
1339
1340 \min_{\mathbf{x}\in\mathbb{R}^N} \quad \mathcal{F}(\mathbf{x})\;\;+\;\;\mathcal{H}(\mathbf{K}\mathbf{x}),
1341
1342 where (see below for additional details on the assumptions):
1343
1344 * :math:`\mathcal{F}:\mathbb{R}^N\rightarrow \mathbb{R}\cup\{+\infty\}` is a *proper*, *lower semicontinuous* and
1345 *convex functional* potentially with *simple proximal operator* or *Lipschitz-differentiable*,
1346 * :math:`\mathcal{H}:\mathbb{R}^M\rightarrow \mathbb{R}\cup\{+\infty\}` is a *proper*, *lower semicontinuous* and
1347 *convex functional* with *simple proximal operator*,
1348 * :math:`\mathbf{K}:\mathbb{R}^N\rightarrow \mathbb{R}^M` is a *linear operator* with **operator norm**
1349 :math:`\Vert{\mathbf{K}}\Vert_2`.
1350
1351 Remarks
1352 -------
1353 * The problem is *feasible*, i.e. there exists at least one solution.
1354
1355 * The algorithm is still valid if either :math:`\mathcal{F}` or :math:`\mathcal{H}` is zero.
1356
1357 * When :math:`\mathbf{K} = \mathbf{I}_{N}`, ADMM is equivalent to the :py:func:`~pyxu.opt.solver.DouglasRachford`
1358 method (up to a change of variable, see [PSA]_ for a derivation).
1359
1360 * This is an implementation of the algorithm described in Section 5.4 of [PSA]_, which handles the non-smooth
1361 composite term :math:`\mathcal{H}(\mathbf{K}\mathbf{x})` by means of a change of variable and an infimal
1362 postcomposition trick. The update equation of this algorithm involves the following
1363 :math:`\mathbf{x}`-minimization step:
1364
1365 .. math::
1366 :label: eq:x_minimization
1367
1368 \mathcal{V} = \operatorname*{arg\,min}_{\mathbf{x} \in \mathbb{R}^N} \quad \mathcal{F}(\mathbf{x}) + \frac1{2 \tau}
1369 \Vert \mathbf{K} \mathbf{x} - \mathbf{a} \Vert_2^2,
1370
1371 where :math:`\tau` is the primal step size and :math:`\mathbf{a} \in \mathbb{R}^M` is an iteration-dependant
1372 vector.
1373
1374 The following cases are covered in this implementation:
1375
1376 - :math:`\mathbf{K}` is ``None`` (i.e. the identity operator) and :math:`\mathcal{F}` is a
1377 :py:class:`~pyxu.abc.ProxFunc`. Then, :math:numref:`eq:x_minimization` has a single solution provided by
1378 :math:`\operatorname*{prox}_{\tau \mathcal{F}}(\mathbf{a})`. This yields the *classical ADMM algorithm*
1379 described in Section 5.3 of [PSA]_ (i.e. without the postcomposition trick).
1380
1381 - :math:`\mathcal{F}` is a :py:class:`~pyxu.abc.QuadraticFunc`, i.e. :math:`\mathcal{F}(\mathbf{x})=\frac{1}{2}
1382 \langle\mathbf{x}, \mathbf{Q}\mathbf{x}\rangle + \mathbf{c}^T\mathbf{x} + t`. Then the unique solution to
1383 :math:numref:`eq:x_minimization` is obtained by solving a linear system of the form:
1384
1385 .. math::
1386 :label: eq:linear_system
1387
1388 \Big( \mathbf{Q} + \frac1\tau \mathbf{K}^* \mathbf{K} \Big) \mathbf{x} =
1389 \frac1\tau \mathbf{K}^\ast\mathbf{a}-\mathbf{c}, \qquad \mathbf{x} \in \mathbb{R}^N.
1390
1391 This linear system is solved via a sub-iterative :py:class:`~pyxu.opt.solver.CG` algorithm involving the
1392 repeated application of :math:`\mathbf{Q}` and :math:`\mathbf{K}^{*}\mathbf{K}`. This sub-iterative scheme may
1393 be computationally intensive if these operators do not admit fast matrix-free implementations.
1394
1395 - :math:`\mathcal{F}` is a :py:class:`~pyxu.abc.DiffFunc`. Then, :math:numref:`eq:x_minimization` is solved via a
1396 sub-iterative :py:class:`~pyxu.opt.solver.NLCG` algorithm involving repeated evaluation of
1397 :math:`\nabla\mathcal{F}` and :math:`\mathbf{K}^{*}\mathbf{K}`. This sub-iterative scheme may be costly if
1398 these operators cannot be evaluated with fast algorithms. In this scenario, the use of multiple initial points
1399 in :py:meth:`~pyxu.abc.Solver.fit` is not supported.
1400
1401 The user may also provide a *custom* callable solver :math:`s: \mathbb{R}^M \times \mathbb{R} \to \mathbb{R}^N`,
1402 taking as input :math:`(\mathbf{a}, \tau)` and solving :math:numref:`eq:x_minimization`, i.e. :math:`s(\mathbf{a},
1403 \tau) \in \mathcal{V}`. (See example below.) If :py:class:`~pyxu.opt.solver.ADMM` is initialized with such a
1404 solver, then the latter is used to solve :math:numref:`eq:x_minimization` regardless of whether one of the
1405 above-mentioned cases is met.
1406
1407 * Unlike traditional implementations of ADMM, :py:class:`~pyxu.opt.solver.ADMM` supports relaxation, i.e.
1408 :math:`\rho\neq 1`.
1409
1410 Parameters (``__init__()``)
1411 ---------------------------
1412 * **f** (:py:class:`~pyxu.abc.DiffFunc`, :py:class:`~pyxu.abc.ProxFunc`, :py:obj:`None`)
1413 --
1414 Differentiable or proximable function :math:`\mathcal{F}`.
1415 * **h** (:py:class:`~pyxu.abc.ProxFunc`, :py:obj:`None`)
1416 --
1417 Proximable function :math:`\mathcal{H}`.
1418 * **K** (:py:class:`~pyxu.abc.LinOp`, :py:obj:`None`)
1419 --
1420 Linear operator :math:`\mathbf{K}`.
1421 * **solver** (:py:class:`~collections.abc.Callable`, :py:obj:`None`)
1422 --
1423 Custom callable to solve the :math:`\mathbf{x}`-minimization step :math:numref:`eq:x_minimization`.
1424
1425 If provided, `solver` must have the `NumPy signature
1426 <https://numpy.org/neps/nep-0020-gufunc-signature-enhancement.html>`_ ``(n), (1) -> (n)``.
1427 * **solver_kwargs** (:py:class:`~collections.abc.Mapping`)
1428 --
1429 Keyword parameters passed to the ``__init__()`` method of sub-iterative :py:class:`~pyxu.opt.solver.CG` or
1430 :py:class:`~pyxu.opt.solver.NLCG` solvers.
1431
1432 `solver_kwargs` is ignored if `solver` provided.
1433 * **\*\*kwargs** (:py:class:`~collections.abc.Mapping`)
1434 --
1435 Other keyword parameters passed on to :py:meth:`pyxu.abc.Solver.__init__`.
1436
1437 Parameters (``fit()``)
1438 ----------------------
1439 * **x0** (:py:attr:`~pyxu.info.ptype.NDArray`)
1440 --
1441 (..., N) initial point(s) for the primal variable.
1442 * **z0** (:py:attr:`~pyxu.info.ptype.NDArray`, :py:obj:`None`)
1443 --
1444 (..., M) initial point(s) for the dual variable.
1445 If ``None`` (default), then use ``K(x0)`` as the initial point for the dual variable.
1446 * **tau** (:py:attr:`~pyxu.info.ptype.Real`, :py:obj:`None`)
1447 --
1448 Primal step size.
1449 * **rho** (:py:attr:`~pyxu.info.ptype.Real`, :py:obj:`None`)
1450 --
1451 Momentum parameter for relaxation.
1452 * **tuning_strategy** (1, 2, 3)
1453 --
1454 Strategy to be employed when setting the hyperparameters (default to 1).
1455 See base class for more details.
1456 * **solver_kwargs** (:py:class:`~collections.abc.Mapping`)
1457 --
1458 Keyword parameters passed to the ``fit()`` method of sub-iterative
1459 :py:class:`~pyxu.opt.solver.CG` or :py:class:`~pyxu.opt.solver.NLCG` solvers.
1460
1461 `solver_kwargs` is ignored if `solver` was provided in ``__init__()``.
1462 * **\*\*kwargs** (:py:class:`~collections.abc.Mapping`)
1463 --
1464 Other keyword parameters passed on to :py:meth:`pyxu.abc.Solver.fit`.
1465
1466 Warning
1467 -------
1468 ``tuning_strategy`` docstring says to look at base class for details, but nothing mentioned there!
1469
1470 Example
1471 -------
1472 Consider the following optimization problem:
1473
1474 .. math::
1475
1476 \min_{\mathbf{x}\in\mathbb{R}^N}\frac{1}{2}\left\|\mathbf{y}-\mathbf{G}\mathbf{x}\right\|_2^2\quad+\quad\lambda \|\mathbf{D}\mathbf{x}\|_1,
1477
1478 with :math:`\mathbf{D}\in\mathbb{R}^{N\times N}` the discrete second-order derivative operator,
1479 :math:`\mathbf{G}\in\mathbb{R}^{M\times N}, \, \mathbf{y}\in\mathbb{R}^M, \lambda>0.` This problem can be solved via
1480 ADMM with :math:`\mathcal{F}(\mathbf{x})= \frac{1}{2}\left\|\mathbf{y}-\mathbf{G}\mathbf{x}\right\|_2^2`,
1481 :math:`\mathcal{H}(\mathbf{x})=\lambda \|\mathbf{D}\mathbf{x}\|_1,` and :math:`\mathbf{K}=\mathbf{D}`. The
1482 functional :math:`\mathcal{F}` being quadratic, the :math:`\mathbf{x}`-minimization step consists in solving a
1483 linear system of the form :math:numref:`eq:linear_system`. Here, we demonstrate how to provide a custom solver,
1484 which consists in applying a matrix inverse, for this step. Otherwise, a sub-iterative
1485 :py:class:`~pyxu.opt.solver.CG` algorithm would have been used automatically instead.
1486
1487 .. plot::
1488
1489 import matplotlib.pyplot as plt
1490 import numpy as np
1491 import pyxu.abc as pxa
1492 import pyxu.operator as pxo
1493 import scipy as sp
1494 from pyxu.opt.solver import ADMM
1495
1496 N = 100 # Dimension of the problem
1497
1498 # Generate piecewise-linear ground truth
1499 x_gt = np.array([10, 25, 60, 90]) # Knot locations
1500 a_gt = np.array([2, -4, 3, -2]) # Amplitudes of the knots
1501 gt = np.zeros(N) # Ground-truth signal
1502 for n in range(len(x_gt)):
1503 gt[x_gt[n] :] += a_gt[n] * np.arange(N - x_gt[n]) / N
1504
1505 # Generate data (noisy samples at random locations)
1506 M = 20 # Number of data points
1507 rng = np.random.default_rng(seed=0)
1508 x_samp = rng.choice(np.arange(N // M), size=M) + np.arange(N, step=N // M) # sampling locations
1509 sigma = 2 * 1e-2 # noise variance
1510 y = gt[x_samp] + sigma * rng.standard_normal(size=M) # noisy data points
1511
1512 # Data-fidelity term
1513 subsamp_mat = sp.sparse.lil_matrix((M, N))
1514 for i in range(M):
1515 subsamp_mat[i, x_samp[i]] = 1
1516 G = pxa.LinOp.from_array(subsamp_mat.tocsr())
1517 F = 1 / 2 * pxo.SquaredL2Norm(dim=y.size).argshift(-y) * G
1518 F.diff_lipschitz = F.estimate_diff_lipschitz(method="svd")
1519
1520 # Regularization term (promotes sparse second derivatives)
1521 deriv_mat = sp.sparse.diags(diagonals=[1, -2, 1], offsets=[0, 1, 2], shape=(N - 2, N))
1522 D = pxa.LinOp.from_array(deriv_mat)
1523 _lambda = 1e-1 # regularization parameter
1524 H = _lambda * pxo.L1Norm(dim=D.codim)
1525
1526 # Solver for ADMM
1527 tau = 1 / _lambda # internal ADMM parameter
1528 # Inverse operator to solve the linear system
1529 A_inv = sp.linalg.inv(G.gram().asarray() + (1 / tau) * D.gram().asarray())
1530
1531
1532 def solver_ADMM(arr, tau):
1533 b = (1 / tau) * D.adjoint(arr) + G.adjoint(y)
1534 return A_inv @ b.squeeze()
1535
1536
1537 # Solve optimization problem
1538 admm = ADMM(f=F, h=H, K=D, solver=solver_ADMM,show_progress=False) # with solver
1539 admm.fit(x0=np.zeros(N), tau=tau)
1540 x_opt = admm.solution() # reconstructed signal
1541
1542 # Plots
1543 plt.figure()
1544 plt.plot(np.arange(N), gt, label="Ground truth")
1545 plt.plot(x_samp, y, "kx", label="Noisy data points")
1546 plt.plot(np.arange(N), x_opt, label="Reconstructed signal")
1547 plt.legend()
1548
1549 See Also
1550 --------
1551 :py:class:`~pyxu.opt.solver.CondatVu`,
1552 :py:class:`~pyxu.opt.solver.PD3O`,
1553 :py:class:`~pyxu.opt.solver.PGD`,
1554 :py:func:`~pyxu.opt.solver.ChambollePock`,
1555 :py:func:`~pyxu.opt.solver.DouglasRachford`
1556 """
1557
1558 def __init__(
1559 self,
1560 f: typ.Optional[pxa.Func] = None,
1561 h: typ.Optional[pxa.ProxFunc] = None,
1562 K: typ.Optional[pxa.DiffMap] = None,
1563 solver: typ.Callable[[pxt.NDArray, float], pxt.NDArray] = None,
1564 solver_kwargs: typ.Optional[dict] = None,
1565 **kwargs,
1566 ):
1567 kwargs.update(log_var=kwargs.get("log_var", ("x", "u", "z")))
1568
1569 x_update_solver = "custom" # Method for the x-minimization step
1570 g = None
1571 if solver is None:
1572 if f is None:
1573 if h is None:
1574 msg = " ".join(
1575 [
1576 "Cannot minimize always-0 functional.",
1577 "At least one of Parameter[f, h] must be specified.",
1578 ]
1579 )
1580 raise ValueError(msg)
1581 if K is None: # Prox scenario (classical ADMM)
1582 f = pxo.NullFunc(h.dim_shape)
1583 else: # Sub-iterative CG scenario
1584 f = pxa.QuadraticFunc(
1585 dim_shape=h.dim_shape,
1586 codim_shape=(1,),
1587 Q=pxo.NullOp(dim_shape=h.dim_shape, codim_shape=h.dim_shape),
1588 c=pxo.NullFunc(dim_shape=h.dim_shape),
1589 )
1590 if f.has(pxa.Property.PROXIMABLE) and K is None:
1591 x_update_solver = "prox"
1592 g = f # In this case, f corresponds to g in the _PDS terminology
1593 f = None
1594 elif isinstance(f, pxa.QuadraticFunc):
1595 x_update_solver = "cg"
1596 self._K_gram = K.gram()
1597 warnings.warn(
1598 "A sub-iterative conjugate gradient algorithm is used for the x-minimization step "
1599 "of ADMM. This might be computationally expensive.",
1600 UserWarning,
1601 )
1602 elif f.has(pxa.Property.DIFFERENTIABLE_FUNCTION):
1603 x_update_solver = "nlcg"
1604 self._K_gram = K.gram()
1605 warnings.warn(
1606 "A sub-iterative non-linear conjugate gradient algorithm is used for the "
1607 "x-minimization step of ADMM. This might be computationally expensive.",
1608 UserWarning,
1609 )
1610 else:
1611 raise TypeError(
1612 "Unsupported scenario: f must either be a ProxFunc (in which case K must be None), a "
1613 "QuadraticFunc, or a DiffMap. If neither of these scenarios hold, a solver must be provided for "
1614 "the x-minimization step of ADMM."
1615 )
1616 self._solver = solver
1617 self._x_update_solver = x_update_solver
1618 self._init_kwargs = solver_kwargs if solver_kwargs is not None else dict(show_progress=False)
1619
1620 super().__init__(
1621 f=f,
1622 g=g,
1623 h=h,
1624 K=K,
1625 **kwargs,
1626 )
1627
1628 def m_init(
1629 self,
1630 x0: pxt.NDArray,
1631 z0: typ.Optional[pxt.NDArray] = None,
1632 tau: typ.Optional[pxt.Real] = None,
1633 rho: typ.Optional[pxt.Real] = None,
1634 tuning_strategy: _PDS.TuningSpec = 1,
1635 solver_kwargs: typ.Optional[dict] = None,
1636 **kwargs,
1637 ):
1638 super().m_init(
1639 x0=x0,
1640 z0=z0,
1641 tau=tau,
1642 sigma=None,
1643 rho=rho,
1644 tuning_strategy=tuning_strategy,
1645 )
1646 mst = self._mstate # shorthand
1647 mst["u"] = self._K(x0)
1648
1649 # Fit kwargs of sub-iterative solver
1650 if solver_kwargs is None:
1651 solver_kwargs = dict()
1652 self._fit_kwargs = solver_kwargs
1653
1654 def m_step(self):
1655 # Algorithm (145) in [PSA]. Paper -> code correspondence: L -> K, K -> -Id, c -> 0, y -> u, v -> z, g -> h
1656 mst = self._mstate
1657 mst["x"] = self._x_update(mst["u"] - mst["z"], tau=mst["tau"])
1658 z_temp = mst["z"] + self._K(mst["x"]) - mst["u"]
1659 if not self._h._name == "NullFunc":
1660 mst["u"] = self._h.prox(self._K(mst["x"]) + z_temp, tau=mst["tau"])
1661 mst["z"] = z_temp + (mst["rho"] - 1) * (self._K(mst["x"]) - mst["u"])
1662
1663 def _x_update(self, arr: pxt.NDArray, tau: float) -> pxt.NDArray:
1664 if self._x_update_solver == "custom":
1665 return self._solver(arr, tau)
1666 elif self._x_update_solver == "prox":
1667 return self._g.prox(arr, tau=tau)
1668 elif self._x_update_solver == "cg":
1669 from pyxu.opt.solver import CG
1670
1671 b = (1 / tau) * self._K.adjoint(arr) - self._f._c.grad(arr)
1672 A = self._f._Q + (1 / tau) * self._K_gram
1673 slvr = CG(A=A, **self._init_kwargs)
1674 slvr.fit(b=b, x0=self._mstate["x"].copy(), **self._fit_kwargs) # Initialize CG with previous iterate
1675 return slvr.solution()
1676 elif self._x_update_solver == "nlcg":
1677 from pyxu.opt.solver import NLCG
1678
1679 c = pxa.LinFunc.from_array(-self._K.adjoint(arr))
1680 quad_func = pxa.QuadraticFunc(dim_shape=self._f.dim_shape, codim_shape=(1,), Q=2 * self._K_gram, c=c)
1681 slvr = NLCG(f=self._f + (1 / tau) * quad_func, **self._init_kwargs)
1682 slvr.fit(x0=self._mstate["x"].copy(), **self._fit_kwargs) # Initialize NLCG with previous iterate
1683 return slvr.solution()
1684
1685 def solution(self, which: typ.Literal["primal", "primal_h", "dual"] = "primal") -> pxt.NDArray:
1686 data, _ = self.stats()
1687 if which == "primal":
1688 assert "x" in data.keys(), "Primal variable x in dom(g) was not logged (declare it in log_var to log it)."
1689 return data.get("x")
1690 elif which == "primal_h":
1691 assert "u" in data.keys(), "Primal variable u in dom(h) was not logged (declare it in log_var to log it)."
1692 return data.get("u")
1693 elif which == "dual":
1694 assert "z" in data.keys(), "Dual variable z was not logged (declare it in log_var to log it)."
1695 return data.get("z") / self._mstate["tau"]
1696 else:
1697 raise ValueError(f"Parameter which must be one of ['primal', 'primal_h', 'dual'] got: {which}.")
1698
1699 def _set_step_sizes(
1700 self,
1701 tau: typ.Optional[pxt.Real],
1702 sigma: typ.Optional[pxt.Real],
1703 gamma: pxt.Real,
1704 ) -> tuple[pxt.Real, pxt.Real, pxt.Real]:
1705 if tau is not None:
1706 assert tau > 0, f"Parameter tau must be positive, got {tau}."
1707 else:
1708 tau = 1.0
1709 delta = 2.0
1710 return tau, 1 / tau, delta
1711
1712
[docs]
1713class ForwardBackward(CondatVu):
1714 r"""
1715 Forward-backward splitting algorithm.
1716
1717 This solver is also accessible via the alias :py:class:`~pyxu.opt.solver.FB`.
1718
1719 The *Forward-backward (FB) splitting* method can be used to solve problems of the form:
1720
1721 .. math::
1722
1723 {\min_{\mathbf{x}\in\mathbb{R}^N} \;\mathcal{F}(\mathbf{x})\;\;+\;\;\mathcal{G}(\mathbf{x}),}
1724
1725 where:
1726
1727 * :math:`\mathcal{F}:\mathbb{R}^N\rightarrow \mathbb{R}` is *convex* and *differentiable*, with
1728 :math:`\beta`-*Lipschitz continuous* gradient, for some :math:`\beta\in[0,+\infty[`.
1729 * :math:`\mathcal{G}:\mathbb{R}^N\rightarrow \mathbb{R}\cup\{+\infty\}` is *proper*, *lower semicontinuous* and
1730 *convex function* with *simple proximal operator*.
1731
1732 Remarks
1733 -------
1734 * The problem is *feasible*, i.e. there exists at least one solution.
1735
1736 * The algorithm is still valid if one of the terms :math:`\mathcal{F}` or :math:`\mathcal{G}` is zero.
1737
1738 * The *Forward-backward (FB) primal-dual splitting* method can be obtained by choosing :math:`\mathcal{H}=0` in
1739 :py:class:`~pyxu.opt.solver.CondatVu`. Mercier originally introduced the algorithm without relaxation
1740 (:math:`\rho=1`) [FB]_. Relaxed versions have been proposed afterwards [PSA]_. The Forward-backward algorithm is
1741 also known as the *Proximal Gradient Descent (PGD)* algorithm. For the accelerated version of PGD, use
1742 :py:class:`~pyxu.opt.solver.PGD`.
1743
1744 Parameters (``__init__()``)
1745 ---------------------------
1746 * **f** (:py:class:`~pyxu.abc.DiffFunc`, :py:obj:`None`)
1747 --
1748 Differentiable function :math:`\mathcal{F}`.
1749 * **g** (:py:class:`~pyxu.abc.ProxFunc`, :py:obj:`None`)
1750 --
1751 Proximable function :math:`\mathcal{G}`.
1752 * **beta** (:py:attr:`~pyxu.info.ptype.Real`, :py:obj:`None`)
1753 --
1754 Lipschitz constant :math:`\beta` of :math:`\nabla\mathcal{F}`.
1755 If not provided, it will be automatically estimated.
1756 * **\*\*kwargs** (:py:class:`~collections.abc.Mapping`)
1757 --
1758 Other keyword parameters passed on to :py:meth:`pyxu.abc.Solver.__init__`.
1759
1760 Parameters (``fit()``)
1761 ----------------------
1762 * **x0** (:py:attr:`~pyxu.info.ptype.NDArray`)
1763 --
1764 (..., N) initial point(s) for the primal variable.
1765 * **z0** (:py:attr:`~pyxu.info.ptype.NDArray`, :py:obj:`None`)
1766 --
1767 (..., N) initial point(s) for the dual variable.
1768 If ``None`` (default), then use ``x0`` as the initial point for the dual variable.
1769 * **tau** (:py:attr:`~pyxu.info.ptype.Real`, :py:obj:`None`)
1770 --
1771 Primal step size.
1772 * **rho** (:py:attr:`~pyxu.info.ptype.Real`, :py:obj:`None`)
1773 --
1774 Momentum parameter.
1775 * **tuning_strategy** (1, 2, 3)
1776 --
1777 Strategy to be employed when setting the hyperparameters (default to 1).
1778 See :py:class:`~pyxu.opt.solver.CondatVu` for more details.
1779 * **\*\*kwargs** (:py:class:`~collections.abc.Mapping`)
1780 --
1781 Other keyword parameters passed on to :py:meth:`pyxu.abc.Solver.fit`.
1782
1783 See Also
1784 --------
1785 :py:class:`~pyxu.opt.solver.CondatVu`,
1786 :py:class:`~pyxu.opt.solver.PD3O`,
1787 :py:class:`~pyxu.opt.solver.PGD`,
1788 :py:func:`~pyxu.opt.solver.ChambollePock`,
1789 :py:func:`~pyxu.opt.solver.DouglasRachford`
1790 """
1791
1792 def __init__(
1793 self,
1794 f: typ.Optional[pxa.DiffFunc] = None,
1795 g: typ.Optional[pxa.ProxFunc] = None,
1796 **kwargs,
1797 ):
1798 kwargs.update(log_var=kwargs.get("log_var", ("x",)))
1799 super().__init__(
1800 f=f,
1801 g=g,
1802 h=None,
1803 K=None,
1804 **kwargs,
1805 )
1806
1807
1808FB = ForwardBackward #: Alias of :py:class:`~pyxu.opt.solver.ForwardBackward`.
1809
1810
[docs]
1811def ProximalPoint(
1812 g: typ.Optional[pxa.ProxFunc] = None,
1813 base: typ.Optional[_PrimalDualSplitting] = CondatVu,
1814 **kwargs,
1815):
1816 r"""
1817 Proximal-point method.
1818
1819 Parameters
1820 ----------
1821 g: :py:class:`~pyxu.abc.ProxFunc`
1822 Proximable function :math:`\mathcal{G}`.
1823 base: :py:class:`~pyxu.opt.solver.CondatVu`, :py:class:`~pyxu.opt.solver.PD3O`
1824 Specifies the base primal-dual algorithm from which mathematical updates are inherited.
1825 (Default = :py:class:`~pyxu.opt.solver.CondatVu`)
1826 \*\*kwargs: ~collections.abc.Mapping
1827 Other keyword parameters passed on to :py:meth:`pyxu.abc.Solver.__init__`.
1828
1829
1830 The *Proximal-point (PP)* method can be used to solve problems of the form:
1831
1832 .. math::
1833
1834 {\min_{\mathbf{x}\in\mathbb{R}^N} \;\mathcal{G}(\mathbf{x}),}
1835
1836 where :math:`\mathcal{G}:\mathbb{R}^N\rightarrow \mathbb{R}\cup\{+\infty\}` is *proper*, *lower semicontinuous* and
1837 *convex function* with *simple proximal operator*.
1838
1839 Remarks
1840 -------
1841 * The problem is *feasible*, i.e. there exists at least one solution.
1842
1843 * The *Proximal-point* algorithm can be obtained by choosing :math:`\mathcal{F}=0` and :math:`\mathcal{H}=0` in
1844 :py:class:`~pyxu.opt.solver.CondatVu` or :py:class:`~pyxu.opt.solver.PD3O`. The original version of the algorithm
1845 was introduced without relaxation (:math:`\rho=1`) [PP]_. Relaxed versions have been proposed afterwards [PSA]_.
1846
1847 Parameters (``fit()``)
1848 ----------------------
1849 * **x0** (:py:attr:`~pyxu.info.ptype.NDArray`)
1850 --
1851 (..., N) initial point(s) for the primal variable.
1852 * **tau** (:py:attr:`~pyxu.info.ptype.Real`, :py:obj:`None`)
1853 --
1854 Primal step size.
1855 * **rho** (:py:attr:`~pyxu.info.ptype.Real`, :py:obj:`None`)
1856 --
1857 Momentum parameter.
1858 * **\*\*kwargs** (:py:class:`~collections.abc.Mapping`)
1859 --
1860 Other keyword parameters passed on to :py:meth:`pyxu.abc.Solver.fit`.
1861
1862 See Also
1863 --------
1864 :py:class:`~pyxu.opt.solver.PP`,
1865 :py:class:`~pyxu.opt.solver.CondatVu`,
1866 :py:class:`~pyxu.opt.solver.PD3O`,
1867 :py:class:`~pyxu.opt.solver.PGD`,
1868 :py:func:`~pyxu.opt.solver.ChambollePock`,
1869 :py:func:`~pyxu.opt.solver.DouglasRachford`
1870 """
1871 kwargs.update(log_var=kwargs.get("log_var", ("x",)))
1872 obj = base(
1873 f=None,
1874 g=g,
1875 h=None,
1876 K=None,
1877 **kwargs,
1878 )
1879
1880 obj.__repr__ = lambda _: "ProximalPoint"
1881 return obj
1882
1883
1884PP = ProximalPoint #: Alias of :py:func:`~pyxu.opt.solver.ProximalPoint`.