Source code for pyxu.opt.solver.pds

   1import math
   2import types
   3import typing as typ
   4import warnings
   5
   6import pyxu.abc as pxa
   7import pyxu.info.ptype as pxt
   8import pyxu.operator as pxo
   9import pyxu.opt.stop as pxst
  10
  11__all__ = [
  12    *("CondatVu", "CV"),
  13    "PD3O",
  14    *("ChambollePock", "CP"),
  15    *("LorisVerhoeven", "LV"),
  16    *("DavisYin", "DY"),
  17    *("DouglasRachford", "DR"),
  18    "ADMM",
  19    *("ForwardBackward", "FB"),
  20    *("ProximalPoint", "PP"),
  21]
  22
  23
  24class _PrimalDualSplitting(pxa.Solver):
  25    r"""
  26    Base class for Primal-Dual Splitting (PDS) solvers.
  27    """
  28    TuningSpec = typ.Literal[1, 2, 3]  #: valid tuning_parameter values
  29
  30    def __init__(
  31        self,
  32        f: typ.Optional[pxa.DiffFunc] = None,
  33        g: typ.Optional[pxa.ProxFunc] = None,
  34        h: typ.Optional[pxa.ProxFunc] = None,
  35        K: typ.Optional[pxa.DiffMap] = None,
  36        **kwargs,
  37    ):
  38        kwargs.update(log_var=kwargs.get("log_var", ("x", "z")))
  39        super().__init__(**kwargs)
  40        if (f is None) and (g is None) and (h is None):
  41            msg = " ".join(
  42                [
  43                    "Cannot minimize always-0 functional.",
  44                    "At least one of Parameter[f, g, h] must be specified.",
  45                ]
  46            )
  47            raise ValueError(msg)
  48
  49        if f is not None:
  50            primal_dim_shape = f.dim_shape
  51        elif g is not None:
  52            primal_dim_shape = g.dim_shape
  53        else:
  54            primal_dim_shape = h.dim_shape
  55
  56        if h is not None:
  57            dual_dim_shape = h.dim_shape
  58        elif K is not None:  # todo: isn't this elif-clause useless? For which solver is it triggered?
  59            dual_dim_shape = K.codim_shape
  60        else:
  61            dual_dim_shape = primal_dim_shape
  62
  63        self._f = pxo.NullFunc(dim_shape=primal_dim_shape) if (f is None) else f
  64        self._g = pxo.NullFunc(dim_shape=primal_dim_shape) if (g is None) else g
  65        self._h = pxo.NullFunc(dim_shape=dual_dim_shape) if (h is None) else h
  66        if h is not None:
  67            self._K = pxo.IdentityOp(dim_shape=h.dim_shape) if (K is None) else K
  68        else:
  69            if K is None:
  70                K_dim_shape = f.dim_shape if f is not None else g.dim_shape
  71                self._K = pxo.NullOp(dim_shape=K_dim_shape, codim_shape=K_dim_shape)
  72            else:
  73                raise ValueError("Optional argument ``h`` mut be specified if ``K`` is not None.")
  74        self._objective_func = self._f + self._g + (self._h * self._K)
  75
  76    def m_init(
  77        self,
  78        x0: pxt.NDArray,
  79        z0: typ.Optional[pxt.NDArray] = None,
  80        tau: typ.Optional[pxt.Real] = None,
  81        sigma: typ.Optional[pxt.Real] = None,
  82        rho: typ.Optional[pxt.Real] = None,
  83        beta: typ.Optional[pxt.Real] = None,
  84        tuning_strategy: TuningSpec = 1,
  85    ):
  86        mst = self._mstate  # shorthand
  87        mst["x"] = x0
  88        mst["z"] = self._set_dual_variable(z0)
  89        self._tuning_strategy = int(tuning_strategy)
  90        self._beta = self._set_beta(beta)
  91        gamma = self._set_gamma(tuning_strategy)
  92        mst["tau"], mst["sigma"], delta = self._set_step_sizes(tau, sigma, gamma)
  93        mst["rho"] = self._set_momentum_term(rho, delta)
  94
  95    def m_step(self):
  96        raise NotImplementedError
  97
  98    def default_stop_crit(self) -> pxa.StoppingCriterion:
  99        stop_crit_x = pxst.RelError(
 100            eps=1e-4,
 101            var="x",
 102            f=None,
 103            norm=2,
 104            satisfy_all=True,
 105        )
 106        stop_crit_z = pxst.RelError(
 107            eps=1e-4,
 108            var="z",
 109            f=None,
 110            norm=2,
 111            satisfy_all=True,
 112        )
 113        return stop_crit_x & stop_crit_z if self._h._name != "NullFunc" else stop_crit_x
 114
 115    def solution(self, which: typ.Literal["primal", "dual"] = "primal") -> pxt.NDArray:
 116        data, _ = self.stats()
 117        if which == "primal":
 118            assert "x" in data.keys(), "Primal variable x was not logged (declare it in log_var to log it)."
 119        elif which == "dual":
 120            assert "z" in data.keys(), "Dual variable z was not logged (declare it in log_var to log it)."
 121        else:
 122            raise ValueError(f"Parameter which must be one of ['primal', 'dual'] got: {which}.")
 123        return data.get("x") if which == "primal" else data.get("z")
 124
 125    def objective_func(self) -> pxt.NDArray:
 126        return self._objective_func(self._mstate["x"])
 127
 128    def _set_beta(self, beta: typ.Optional[pxt.Real]) -> pxt.Real:
 129        r"""
 130        Sets the Lipschitz constant.
 131
 132        Returns
 133        -------
 134        beta: Real
 135            Lipschitz constant.
 136        """
 137        if beta is None:
 138            if math.isfinite(dl := self._f.diff_lipschitz):
 139                return dl
 140            else:
 141                msg = "beta: automatic inference not supported for operators with unbounded Lipschitz gradients."
 142            raise ValueError(msg)
 143        else:
 144            return beta
 145
 146    def _set_dual_variable(self, z: typ.Optional[pxt.NDArray]) -> pxt.NDArray:
 147        r"""
 148        Initialize the dual variable if it is :py:obj:`None` by mapping the primal variable through the operator K.
 149
 150        Returns
 151        -------
 152        NDArray
 153            Initialized dual variable.
 154        """
 155        if z is None:
 156            return self._K(self._mstate["x"].copy())
 157        else:
 158            return z
 159
 160    def _set_gamma(self, tuning_strategy: typ.Literal[1, 2, 3]) -> pxt.Real:
 161        r"""
 162        Sets the gamma parameter according to the tuning strategy.
 163
 164        Returns
 165        -------
 166        gamma: Real
 167            Gamma parameter.
 168        """
 169        return self._beta if tuning_strategy != 2 else self._beta / 1.9
 170
 171    def _set_step_sizes(
 172        self,
 173        tau: typ.Optional[pxt.Real],
 174        sigma: typ.Optional[pxt.Real],
 175        gamma: pxt.Real,
 176    ) -> tuple[pxt.Real, pxt.Real, pxt.Real]:
 177        raise NotImplementedError
 178
 179    def _set_momentum_term(
 180        self,
 181        rho: typ.Optional[pxt.Real],
 182        delta: pxt.Real,
 183    ) -> pxt.Real:
 184        r"""
 185        Sets the momentum term according to Theorem 8.2 in [PSA]_.
 186
 187        Returns
 188        -------
 189        pho: Real
 190            Momentum term.
 191
 192        Notes
 193        -----
 194        The :math:`O(1/\sqrt(k))` objective functional convergence rate of (Theorem 1 of [dPSA]_) is for `\rho=1`.
 195        """
 196        if rho is None:
 197            rho = 1.0 if self._tuning_strategy != 3 else delta - 0.1
 198        else:
 199            assert rho <= delta, f"Parameter rho must be smaller than delta: {rho} > {delta}."
 200        return rho
 201
 202
 203_PDS = _PrimalDualSplitting  #: shorthand
 204
 205
[docs] 206class CondatVu(_PrimalDualSplitting): 207 r""" 208 Condat-Vu primal-dual splitting algorithm. 209 210 This solver is also accessible via the alias :py:class:`~pyxu.opt.solver.CV`. 211 212 The *Condat-Vu (CV)* primal-dual method is described in [CVS]_. (This particular implementation is based on the 213 pseudo-code Algorithm 7.1 provided in [FuncSphere]_ Chapter 7, Section1.) 214 215 It can be used to solve problems of the form: 216 217 .. math:: 218 219 {\min_{\mathbf{x}\in\mathbb{R}^N} \;\mathcal{F}(\mathbf{x})\;\;+\;\;\mathcal{G}(\mathbf{x})\;\;+\;\;\mathcal{H}(\mathcal{K} \mathbf{x})}, 220 221 where: 222 223 * :math:`\mathcal{F}:\mathbb{R}^N\rightarrow \mathbb{R}` is *convex* and *differentiable*, with 224 :math:`\beta`-*Lipschitz continuous* gradient, for some :math:`\beta\in[0,+\infty[`. 225 226 * :math:`\mathcal{G}:\mathbb{R}^N\rightarrow \mathbb{R}\cup\{+\infty\}` and 227 :math:`\mathcal{H}:\mathbb{R}^M\rightarrow \mathbb{R}\cup\{+\infty\}` are *proper*, *lower semicontinuous* and 228 *convex functions* with *simple proximal operators*. 229 230 * :math:`\mathcal{K}:\mathbb{R}^N\rightarrow \mathbb{R}^M` is a *differentiable map* (e.g. a *linear operator* 231 :math:`\mathbf{K}`), with **operator norm**: 232 233 .. math:: 234 235 \Vert{\mathcal{K}}\Vert_2=\sup_{\mathbf{x}\in\mathbb{R}^N,\Vert\mathbf{x}\Vert_2=1} \Vert\mathcal{K}(\mathbf{x})\Vert_2. 236 237 Remarks 238 ------- 239 * The problem is *feasible*, i.e. there exists at least one solution. 240 241 * The algorithm is still valid if one or more of the terms :math:`\mathcal{F}`, :math:`\mathcal{G}` or 242 :math:`\mathcal{H}` is zero. 243 244 * The algorithm has convergence guarantees when :math:`\mathcal{H}` is composed with a *linear operator* 245 :math:`\mathbf{K}`. When :math:`\mathcal{F}=0`, convergence can be proven for *non-linear differentiable maps* 246 :math:`\mathcal{K}` (see [NLCP]_). Note that :py:class:`~pyxu.opt.solver.CondatVu` does not yet support automatic 247 selection of hyperparameters for the case of *non-linear differentiable maps* :math:`\mathcal{K}`. 248 249 * Assume that either of the following holds: 250 251 * :math:`\beta>0` and: 252 253 - :math:`\gamma \geq \frac{\beta}{2}`, 254 - :math:`\frac{1}{\tau}-\sigma\Vert\mathbf{K}\Vert_{2}^2\geq \gamma`, 255 - :math:`\rho \in ]0,\delta[`, where :math:`\delta:=2-\frac{\beta}{2}\gamma^{-1}\in[1,2[` (:math:`\delta=2` is 256 possible when :math:`\mathcal{F}` is *quadratic* and :math:`\gamma \geq \beta`, see [PSA]_). 257 258 * :math:`\beta=0` and: 259 260 - :math:`\tau\sigma\Vert\mathbf{K}\Vert_{2}^2\leq 1`, 261 - :math:`\rho \in ]0,2[`. 262 263 Then there exists a pair :math:`(\mathbf{x}^\star,\mathbf{z}^\star)\in\mathbb{R}^N\times \mathbb{R}^M` solution 264 s.t. the primal and dual sequences of estimates :math:`(\mathbf{x}_n)_{n\in\mathbb{N}}` and 265 :math:`(\mathbf{z}_n)_{n\in\mathbb{N}}` *converge* towards :math:`\mathbf{x}^\star` and :math:`\mathbf{z}^\star` 266 respectively, i.e. 267 268 .. math:: 269 270 \lim_{n\rightarrow +\infty}\Vert\mathbf{x}^\star-\mathbf{x}_n\Vert_2=0, \quad \text{and} 271 \quad 272 \lim_{n\rightarrow +\infty}\Vert\mathbf{z}^\star-\mathbf{z}_n\Vert_2=0. 273 274 Parameters (``__init__()``) 275 --------------------------- 276 * **f** (:py:class:`~pyxu.abc.DiffFunc`, :py:obj:`None`) 277 -- 278 Differentiable function :math:`\mathcal{F}`. 279 * **g** (:py:class:`~pyxu.abc.ProxFunc`, :py:obj:`None`) 280 -- 281 Proximable function :math:`\mathcal{G}`. 282 * **h** (:py:class:`~pyxu.abc.ProxFunc`, :py:obj:`None`) 283 -- 284 Proximable function :math:`\mathcal{H}`. 285 * **K** (:py:class:`~pyxu.abc.DiffMap`, :py:class:`~pyxu.abc.LinOp`, :py:obj:`None`) 286 -- 287 Differentiable map or linear operator :math:`\mathcal{K}`. 288 * **\*\*kwargs** (:py:class:`~collections.abc.Mapping`) 289 -- 290 Other keyword parameters passed on to :py:meth:`pyxu.abc.Solver.__init__`. 291 292 Parameters (``fit()``) 293 ---------------------- 294 * **x0** (:py:attr:`~pyxu.info.ptype.NDArray`) 295 -- 296 (..., N) initial point(s) for the primal variable. 297 * **z0** (:py:attr:`~pyxu.info.ptype.NDArray`, :py:obj:`None`) 298 -- 299 (..., M) initial point(s) for the dual variable. 300 If ``None`` (default), then use ``K(x0)`` as the initial point for the dual variable. 301 * **tau** (:py:attr:`~pyxu.info.ptype.Real`, :py:obj:`None`) 302 -- 303 Primal step size. 304 * **sigma** (:py:attr:`~pyxu.info.ptype.Real`, :py:obj:`None`) 305 -- 306 Dual step size. 307 * **rho** (:py:attr:`~pyxu.info.ptype.Real`, :py:obj:`None`) 308 -- 309 Momentum parameter. 310 * **beta** (:py:attr:`~pyxu.info.ptype.Real`, :py:obj:`None`) 311 -- 312 Lipschitz constant :math:`\beta` of :math:`\nabla\mathcal{F}`. 313 If not provided, it will be automatically estimated. 314 * **tuning_strategy** (1, 2, 3) 315 -- 316 Strategy to be employed when setting the hyperparameters (default to 1). 317 See section below for more details. 318 * **\*\*kwargs** (:py:class:`~collections.abc.Mapping`) 319 -- 320 Other keyword parameters passed on to :py:meth:`pyxu.abc.Solver.fit`. 321 322 .. rubric:: Default hyperparameter values 323 324 This class supports three strategies to automatically set the hyperparameters (see [PSA]_ for more details and 325 numerical experiments comparing the performance of the three strategies): 326 327 - ``tuning_strategy == 1``: :math:`\gamma = \beta` (safe step sizes) and :math:`\rho=1` (no relaxation). 328 329 This is the most standard way of setting the parameters in the literature. 330 - ``tuning_strategy == 2``: :math:`\gamma = \beta/1.9` (large step sizes) and :math:`\rho=1` (no relaxation). 331 332 This strategy favours large step sizes forbidding the use of overrelaxation. When :math:`\beta=0`, coincides with 333 the first strategy. 334 - ``tuning_strategy == 3``: :math:`\gamma = \beta` (safe step sizes) and :math:`\rho=\delta - 0.1 > 1` (overrelaxation). 335 336 This strategy chooses smaller step sizes, but performs overrelaxation. 337 338 Once :math:`\gamma` chosen, the convergence speed of the algorithm is improved by choosing :math:`\sigma` and 339 :math:`\tau` as large as possible and relatively well-balanced -- so that both the primal and dual variables 340 converge at the same pace. Whenever possible, we therefore choose perfectly balanced parameters :math:`\sigma=\tau` 341 saturating the convergence inequalities for a given value of :math:`\gamma`. 342 343 * For :math:`\beta>0` and :math:`\mathcal{H}\neq 0` this yields: 344 345 .. math:: 346 347 \frac{1}{\tau}-\tau\Vert\mathbf{K}\Vert_{2}^2= \gamma \quad\Longleftrightarrow\quad -\tau^2\Vert\mathbf{K}\Vert_{2}^2-\gamma\tau+1=0, 348 349 which admits one positive root 350 351 .. math:: 352 353 \tau=\sigma=\frac{1}{\Vert\mathbf{K}\Vert_{2}^2}\left(-\frac{\gamma}{2}+\sqrt{\frac{\gamma^2}{4}+\Vert\mathbf{K}\Vert_{2}^2}\right). 354 355 * For :math:`\beta>0` and :math:`\mathcal{H}=0` this yields: :math:`\tau=1/\gamma.` 356 357 * For :math:`\beta=0` this yields: :math:`\tau=\sigma=\Vert\mathbf{K}\Vert_{2}^{-1}`. 358 359 When :math:`\tau` is provided (:math:`\tau = \tau_{1}`), but not :math:`\sigma`, the latter is chosen as: 360 361 .. math:: 362 363 \frac{1}{\tau_{1}}-\sigma\Vert\mathbf{K}\Vert_{2}^2= \gamma \quad\Longleftrightarrow\quad \sigma=\left(\frac{1}{\tau_{1}}-\gamma\right)\frac{1}{\Vert\mathbf{K}\Vert_{2}^2}. 364 365 When :math:`\sigma` is provided (:math:`\sigma = \sigma_{1}`), but not :math:`\tau`, the latter is chosen as: 366 367 .. math:: 368 369 \frac{1}{\tau}-\sigma_{1}\Vert\mathbf{K}\Vert_{2}^2= \gamma \quad\Longleftrightarrow\quad \tau=\frac{1}{\left(\gamma+\sigma_{1}\Vert\mathbf{K}\Vert_{2}^2\right)}. 370 371 Warning 372 ------- 373 When values are provided for both :math:`\tau` and :math:`\sigma`, it is assumed that the latter satisfy the 374 convergence inequalities, but no check is explicitly performed. Automatic selection of hyperparameters for the case 375 of non-linear differentiable maps :math:`\mathcal{K}` is not supported yet. 376 377 Example 378 ------- 379 Consider the following optimisation problem: 380 381 .. math:: 382 383 \min_{\mathbf{x}\in\mathbb{R}_+^N}\frac{1}{2}\left\|\mathbf{y}-\mathbf{G}\mathbf{x}\right\|_2^2\quad+\quad\lambda_1 \|\mathbf{D}\mathbf{x}\|_1\quad+\quad\lambda_2 \|\mathbf{x}\|_1, 384 385 with :math:`\mathbf{D}\in\mathbb{R}^{N\times N}` the discrete derivative operator and 386 :math:`\mathbf{G}\in\mathbb{R}^{L\times N}, \, \mathbf{y}\in\mathbb{R}^L, \lambda_1,\lambda_2>0.` This problem can 387 be solved via PDS with :math:`\mathcal{F}(\mathbf{x})= 388 \frac{1}{2}\left\|\mathbf{y}-\mathbf{G}\mathbf{x}\right\|_2^2`, 389 :math:`\mathcal{G}(\mathbf{x})=\lambda_2\|\mathbf{x}\|_1,` :math:`\mathcal{H}(\mathbf{x})=\lambda \|\mathbf{x}\|_1` 390 and :math:`\mathbf{K}=\mathbf{D}`. 391 392 .. plot:: 393 394 import matplotlib.pyplot as plt 395 import numpy as np 396 import pyxu.operator as pxo 397 from pyxu.operator import SubSample, PartialDerivative 398 from pyxu.opt.solver import CV 399 400 x = np.repeat(np.asarray([0, 2, 1, 3, 0, 2, 0]), 10) 401 N = x.size 402 403 D = PartialDerivative.finite_difference(dim_shape=(N,), order=(1,)) 404 405 downsample = SubSample(N, slice(None, None, 3)) 406 y = downsample(x) 407 loss = (1 / 2) * pxo.SquaredL2Norm(y.size).argshift(-y) 408 F = loss * downsample 409 410 cv = CV(f=F, g=0.01 * pxo.L1Norm(N), h=0.1 * pxo.L1Norm((N)), K=D) 411 x0, z0 = np.zeros((2, N)) 412 cv.fit(x0=x0, z0=z0) 413 x_recons = cv.solution() 414 415 plt.figure() 416 plt.stem(x, linefmt="C0-", markerfmt="C0o") 417 mask_ids = np.where(downsample.adjoint(np.ones_like(y)))[0] 418 markerline, stemlines, baseline = plt.stem(mask_ids, y, linefmt="C3-", markerfmt="C3o") 419 markerline.set_markerfacecolor("none") 420 plt.stem(x_recons, linefmt="C1--", markerfmt="C1s") 421 plt.legend(["Ground truth", "Observation", "CV Estimate"]) 422 plt.show() 423 424 See Also 425 -------- 426 :py:class:`~pyxu.opt.solver.CV`, 427 :py:class:`~pyxu.opt.solver.PD3O`, 428 :py:class:`~pyxu.opt.solver.ChambollePock`, 429 :py:class:`~pyxu.opt.solver.DouglasRachford` 430 """ 431 432 def m_step(self): 433 mst = self._mstate 434 x_temp = self._g.prox( 435 mst["x"] - mst["tau"] * self._f.grad(mst["x"]) - mst["tau"] * self._K.jacobian(mst["x"]).adjoint(mst["z"]), 436 tau=mst["tau"], 437 ) 438 if not self._h._name == "NullFunc": 439 u = 2 * x_temp - mst["x"] 440 z_temp = self._h.fenchel_prox( 441 mst["z"] + mst["sigma"] * self._K(u), 442 sigma=mst["sigma"], 443 ) 444 mst["z"] = mst["rho"] * z_temp + (1 - mst["rho"]) * mst["z"] 445 mst["x"] = mst["rho"] * x_temp + (1 - mst["rho"]) * mst["x"] 446 447 def _set_step_sizes( 448 self, 449 tau: typ.Optional[pxt.Real], 450 sigma: typ.Optional[pxt.Real], 451 gamma: typ.Optional[pxt.Real], 452 ) -> tuple[pxt.Real, pxt.Real, pxt.Real]: 453 r""" 454 Set the primal/dual step sizes. 455 456 Returns 457 ------- 458 Sensible primal/dual step sizes and value of the parameter :math:`delta`. 459 """ 460 # todo: this method in general is quite dirty. 461 if not issubclass(self._K.__class__, pxa.LinOp): 462 # todo: wrong! Must raise only if tau/sigma = None 463 msg = ( 464 f"Automatic selection of parameters is only supported in the case in which K is a linear operator. " 465 f"Got operator of type {self._K.__class__}." 466 ) 467 raise ValueError(msg) 468 tau = None if tau == 0 else tau 469 sigma = None if sigma == 0 else sigma 470 471 if (tau is not None) and (sigma is None): 472 assert tau > 0, f"Parameter tau must be positive, got {tau}." 473 if self._h._name == "NullFunc": 474 assert tau <= 1 / gamma, f"Parameter tau must be smaller than 1/gamma: {tau} > {1 / gamma}." 475 sigma = 0 476 else: 477 if math.isfinite(self._K.lipschitz): 478 sigma = ((1 / tau) - gamma) * (1 / self._K.lipschitz**2) 479 else: 480 msg = "Please compute the Lipschitz constant of the linear operator K by calling its method 'estimate_lipschitz()'" 481 raise ValueError(msg) 482 elif (tau is None) and (sigma is not None): 483 assert sigma > 0 484 if self._h._name == "NullFunc": 485 tau = 1 / gamma 486 else: 487 if math.isfinite(self._K.lipschitz): 488 tau = 1 / (gamma + (sigma * self._K.lipschitz**2)) 489 else: 490 msg = "Please compute the Lipschitz constant of the linear operator K by calling its method 'estimate_lipschitz()'" 491 raise ValueError(msg) 492 elif (tau is None) and (sigma is None): 493 if self._beta > 0: 494 if self._h._name == "NullFunc": 495 tau = 1 / gamma 496 sigma = 0 497 else: 498 if math.isfinite(self._K.lipschitz): 499 tau = sigma = (1 / (self._K.lipschitz) ** 2) * ( 500 (-gamma / 2) + math.sqrt((gamma**2 / 4) + self._K.lipschitz**2) 501 ) 502 else: 503 msg = "Please compute the Lipschitz constant of the linear operator K by calling its method 'estimate_lipschitz()'" 504 raise ValueError(msg) 505 else: 506 if self._h._name == "NullFunc": 507 tau = 1 508 sigma = 0 509 else: 510 if math.isfinite(self._K.lipschitz): 511 tau = sigma = 1 / self._K.lipschitz 512 else: 513 msg = "Please compute the Lipschitz constant of the linear operator K by calling its method 'estimate_lipschitz()'" 514 raise ValueError(msg) 515 delta = ( 516 2 517 if (self._beta == 0 or (isinstance(self._f, pxa.QuadraticFunc) and gamma <= self._beta)) 518 else 2 - self._beta / (2 * gamma) 519 ) 520 return tau, sigma, delta
521 522 523CV = CondatVu #: Alias of :py:class:`~pyxu.opt.solver.CondatVu`. 524 525
[docs] 526class PD3O(_PrimalDualSplitting): 527 r""" 528 Primal-Dual Three-Operator Splitting (PD3O) algorithm. 529 530 The *Primal-Dual three Operator splitting (PD3O)* method is described in [PD3O]_. 531 532 It can be used to solve problems of the form: 533 534 .. math:: 535 536 {\min_{\mathbf{x}\in\mathbb{R}^N} \;\Psi(\mathbf{x}):=\mathcal{F}(\mathbf{x})\;\;+\;\;\mathcal{G}(\mathbf{x})\;\;+\;\;\mathcal{H}(\mathcal{K} \mathbf{x}),} 537 538 where: 539 540 * :math:`\mathcal{F}:\mathbb{R}^N\rightarrow \mathbb{R}` is *convex* and *differentiable*, with 541 :math:`\beta`-*Lipschitz continuous* gradient, for some :math:`\beta\in[0,+\infty[`. 542 543 * :math:`\mathcal{G}:\mathbb{R}^N\rightarrow \mathbb{R}\cup\{+\infty\}` and 544 :math:`\mathcal{H}:\mathbb{R}^M\rightarrow \mathbb{R}\cup\{+\infty\}` are *proper*, *lower semicontinuous* and 545 *convex functions* with *simple proximal operators*. 546 547 * :math:`\mathcal{K}:\mathbb{R}^N\rightarrow \mathbb{R}^M` is a *differentiable map* (e.g. a *linear operator* 548 :math:`\mathbf{K}`), with **operator norm**: 549 550 .. math:: 551 552 \Vert{\mathcal{K}}\Vert_2=\sup_{\mathbf{x}\in\mathbb{R}^N,\Vert\mathbf{x}\Vert_2=1} \Vert\mathcal{K}(\mathbf{x})\Vert_2. 553 554 Remarks 555 ------- 556 * The problem is *feasible* -- i.e. there exists at least one solution. 557 558 * The algorithm is still valid if one or more of the terms :math:`\mathcal{F}`, :math:`\mathcal{G}` or 559 :math:`\mathcal{H}` is zero. 560 561 * The algorithm has convergence guarantees for the case in which :math:`\mathcal{H}` is composed with a *linear 562 operator* :math:`\mathbf{K}`. When :math:`\mathcal{F}=0`, convergence can be proven for *non-linear 563 differentiable maps* :math:`\mathcal{K}`. (See [NLCP]_.) Note that this class does not yet support automatic 564 selection of hyperparameters for the case of *non-linear differentiable maps* :math:`\mathcal{K}`. 565 566 * Assume that the following holds: 567 568 * :math:`\gamma\geq\frac{\beta}{2}`, 569 * :math:`\tau \in ]0, \frac{1}{\gamma}[`, 570 * :math:`\tau\sigma\Vert\mathbf{K}\Vert_{2}^2 \leq 1`, 571 * :math:`\delta = 2-\beta\tau/2 \in [1, 2[` and :math:`\rho \in (0, \delta]`. 572 573 Then there exists a pair :math:`(\mathbf{x}^\star,\mathbf{z}^\star)\in\mathbb{R}^N\times \mathbb{R}^M` solution 574 s.t. the primal and dual sequences of estimates :math:`(\mathbf{x}_n)_{n\in\mathbb{N}}` and 575 :math:`(\mathbf{z}_n)_{n\in\mathbb{N}}` *converge* towards :math:`\mathbf{x}^\star` and :math:`\mathbf{z}^\star` 576 respectively (Theorem 8.2 of [PSA]_), i.e. 577 578 .. math:: 579 580 \lim_{n\rightarrow +\infty}\Vert\mathbf{x}^\star-\mathbf{x}_n\Vert_2=0, \quad \text{and} \quad \lim_{n\rightarrow +\infty}\Vert\mathbf{z}^\star-\mathbf{z}_n\Vert_2=0. 581 582 Futhermore, when :math:`\rho=1`, the objective functional sequence 583 :math:`\left(\Psi(\mathbf{x}_n)\right)_{n\in\mathbb{N}}` can be shown to converge towards its minimum 584 :math:`\Psi^\ast` with rate :math:`o(1/\sqrt{n})` (Theorem 1 of [dPSA]_): 585 586 .. math:: 587 588 \Psi(\mathbf{x}_n) - \Psi^\ast = o(1/\sqrt{n}). 589 590 Parameters (``__init__()``) 591 --------------------------- 592 * **f** (:py:class:`~pyxu.abc.DiffFunc`, :py:obj:`None`) 593 -- 594 Differentiable function :math:`\mathcal{F}`. 595 * **g** (:py:class:`~pyxu.abc.ProxFunc`, :py:obj:`None`) 596 -- 597 Proximable function :math:`\mathcal{G}`. 598 * **h** (:py:class:`~pyxu.abc.ProxFunc`, :py:obj:`None`) 599 -- 600 Proximable function :math:`\mathcal{H}`. 601 * **K** (:py:class:`~pyxu.abc.DiffMap`, :py:class:`~pyxu.abc.LinOp`, :py:obj:`None`) 602 -- 603 Differentiable map or linear operator :math:`\mathcal{K}`. 604 * **\*\*kwargs** (:py:class:`~collections.abc.Mapping`) 605 -- 606 Other keyword parameters passed on to :py:meth:`pyxu.abc.Solver.__init__`. 607 608 Parameters (``fit()``) 609 ---------------------- 610 * **x0** (:py:attr:`~pyxu.info.ptype.NDArray`) 611 -- 612 (..., N) initial point(s) for the primal variable. 613 * **z0** (:py:attr:`~pyxu.info.ptype.NDArray`, :py:obj:`None`) 614 -- 615 (..., M) initial point(s) for the dual variable. 616 If ``None`` (default), then use ``K(x0)`` as the initial point for the dual variable. 617 * **tau** (:py:attr:`~pyxu.info.ptype.Real`, :py:obj:`None`) 618 -- 619 Primal step size. 620 * **sigma** (:py:attr:`~pyxu.info.ptype.Real`, :py:obj:`None`) 621 -- 622 Dual step size. 623 * **rho** (:py:attr:`~pyxu.info.ptype.Real`, :py:obj:`None`) 624 -- 625 Momentum parameter. 626 * **beta** (:py:attr:`~pyxu.info.ptype.Real`, :py:obj:`None`) 627 -- 628 Lipschitz constant :math:`\beta` of :math:`\nabla\mathcal{F}`. 629 If not provided, it will be automatically estimated. 630 * **tuning_strategy** (1, 2, 3) 631 -- 632 Strategy to be employed when setting the hyperparameters (default to 1). 633 See section below for more details. 634 * **\*\*kwargs** (:py:class:`~collections.abc.Mapping`) 635 -- 636 Other keyword parameters passed on to :py:meth:`pyxu.abc.Solver.fit`. 637 638 .. rubric:: Default hyperparameter values 639 640 This class supports three strategies to automatically set the hyperparameters (see [PSA]_ for more details and 641 numerical experiments comparing the performance of the three strategies): 642 643 - ``tuning_strategy == 1``: :math:`\gamma = \beta` (safe step sizes) and :math:`\rho=1` (no relaxation). 644 645 This is the most standard way of setting the parameters in the literature. 646 - ``tuning_strategy == 2``: :math:`\gamma = \beta/1.9` (large step sizes) and :math:`\rho=1` (no relaxation). 647 648 This strategy favours large step sizes forbidding the use of overrelaxation. When :math:`\beta=0`, coincides with 649 the first strategy. 650 - ``tuning_strategy == 3``: :math:`\gamma = \beta` (safe step sizes) and :math:`\rho=\delta - 0.1 > 1` (overrelaxation). 651 652 This strategy chooses smaller step sizes, but performs overrelaxation. 653 654 Once :math:`\gamma` chosen, the convergence speed of the algorithm is improved by choosing :math:`\sigma` and 655 :math:`\tau` as large as possible and relatively well-balanced -- so that both the primal and dual variables 656 converge at the same pace. Whenever possible, we therefore choose perfectly balanced parameters :math:`\sigma=\tau` 657 saturating the convergence inequalities for a given value of :math:`\gamma`. 658 659 In practice, the following linear programming optimization problem is solved: 660 661 .. math:: 662 663 (\tau, \, \sigma) = \operatorname{arg} \max_{(\tau^{*}, \, \sigma^{*})} \quad & \operatorname{log}(\tau^{*}) + \operatorname{log}(\sigma^{*})\\ 664 \text{s.t.} \quad & \operatorname{log}(\tau^{*}) + \operatorname{log}(\sigma^{*}) \leq 2\operatorname{log}(\Vert\mathbf{K}\Vert_{2})\\ 665 & \operatorname{log}(\tau^{*}) \leq -\operatorname{log}(\gamma)\\ 666 & \operatorname{log}(\tau^{*}) = \operatorname{log}(\sigma^{*}). 667 668 When :math:`\tau \leq 1/\gamma` is given (i.e., :math:`\tau=\tau_{1}`), but not :math:`\sigma`, the latter is chosen 669 as: 670 671 .. math:: 672 673 \tau_{1}\sigma\Vert\mathbf{K}\Vert_{2}^2= 1 \quad\Longleftrightarrow\quad \sigma=\frac{1}{\tau_{1}\Vert\mathbf{K}\Vert_{2}^{2}}. 674 675 When :math:`\sigma` is given (i.e., :math:`\sigma=\sigma_{1}`), but not :math:`\tau`, the latter is chosen as: 676 677 .. math:: 678 679 \tau = \min \left\{\frac{1}{\gamma}, \frac{1}{\sigma_{1}\Vert\mathbf{K}\Vert_{2}^{2}}\right\}. 680 681 Warning 682 ------- 683 When values are provided for both :math:`\tau` and :math:`\sigma`, it is assumed that the latter satisfy the 684 convergence inequalities, but no check is explicitly performed. Automatic selection of hyperparameters for the case 685 of non-linear differentiable maps :math:`\mathcal{K}` is not supported yet. 686 687 Example 688 ------- 689 Consider the following optimisation problem: 690 691 .. math:: 692 693 \min_{\mathbf{x}\in\mathbb{R}_+^N}\frac{1}{2}\left\|\mathbf{y}-\mathbf{G}\mathbf{x}\right\|_2^2\quad+\quad\lambda_1 \|\mathbf{D}\mathbf{x}\|_1\quad+\quad\lambda_2 \|\mathbf{x}\|_1, 694 695 with :math:`\mathbf{D}\in\mathbb{R}^{N\times N}` the discrete derivative operator and 696 :math:`\mathbf{G}\in\mathbb{R}^{L\times N}, \, \mathbf{y}\in\mathbb{R}^L, \lambda_1,\lambda_2>0.` This problem can 697 be solved via PD3O with :math:`\mathcal{F}(\mathbf{x})= 698 \frac{1}{2}\left\|\mathbf{y}-\mathbf{G}\mathbf{x}\right\|_2^2`, 699 :math:`\mathcal{G}(\mathbf{x})=\lambda_2\|\mathbf{x}\|_1,` :math:`\mathcal{H}(\mathbf{x})=\lambda \|\mathbf{x}\|_1` 700 and :math:`\mathbf{K}=\mathbf{D}`. 701 702 .. plot:: 703 704 import matplotlib.pyplot as plt 705 import numpy as np 706 import pyxu.operator as pxo 707 from pyxu.operator import SubSample, PartialDerivative 708 from pyxu.opt.solver import PD3O 709 710 x = np.repeat(np.asarray([0, 2, 1, 3, 0, 2, 0]), 10) 711 N = x.size 712 713 D = PartialDerivative.finite_difference(dim_shape=(N,), order=(1,)) 714 715 downsample = SubSample(N, slice(None, None, 3)) 716 y = downsample(x) 717 loss = (1 / 2) * pxo.SquaredL2Norm(y.size).argshift(-y) 718 F = loss * downsample 719 720 pd3o = PD3O(f=F, g=0.01 * pxo.L1Norm(N), h=0.1 * pxo.L1Norm((N)), K=D) 721 x0, z0 = np.zeros((2, N)) 722 pd3o.fit(x0=x0, z0=z0) 723 x_recons = pd3o.solution() 724 725 plt.figure() 726 plt.stem(x, linefmt="C0-", markerfmt="C0o") 727 mask_ids = np.where(downsample.adjoint(np.ones_like(y)))[0] 728 markerline, stemlines, baseline = plt.stem(mask_ids, y, linefmt="C3-", markerfmt="C3o") 729 markerline.set_markerfacecolor("none") 730 plt.stem(x_recons, linefmt="C1--", markerfmt="C1s") 731 plt.legend(["Ground truth", "Observation", "PD3O Estimate"]) 732 plt.show() 733 """ 734 735 def m_init( 736 self, 737 x0: pxt.NDArray, 738 z0: typ.Optional[pxt.NDArray] = None, 739 tau: typ.Optional[pxt.Real] = None, 740 sigma: typ.Optional[pxt.Real] = None, 741 rho: typ.Optional[pxt.Real] = None, 742 tuning_strategy: _PDS.TuningSpec = 1, 743 ): 744 super().m_init( 745 x0=x0, 746 z0=z0, 747 tau=tau, 748 sigma=sigma, 749 rho=rho, 750 tuning_strategy=tuning_strategy, 751 ) 752 753 # if x0 == u0 the first step wouldn't change x and the solver would stop at the first iteration 754 if self._g._name == self._h._name == "NullFunc": 755 self._mstate["u"] = x0 * 1.01 756 else: 757 self._mstate["u"] = x0.copy() 758 759 def m_step(self): 760 # Slightly more efficient rewriting of iterations (216) of [PSA] with M=1. Faster than (185) since only one call to the adjoint and the gradient per iteration. 761 mst = self._mstate 762 mst["x"] = self._g.prox( 763 mst["u"] - mst["tau"] * self._K.jacobian(mst["u"]).adjoint(mst["z"]), 764 tau=mst["tau"], 765 ) 766 u_temp = mst["x"] - mst["tau"] * self._f.grad(mst["x"]) 767 if not self._h._name == "NullFunc": 768 z_temp = self._h.fenchel_prox( 769 mst["z"] + mst["sigma"] * self._K(mst["x"] + u_temp - mst["u"]), 770 sigma=mst["sigma"], 771 ) 772 mst["z"] = (1 - mst["rho"]) * mst["z"] + mst["rho"] * z_temp 773 mst["u"] = (1 - mst["rho"]) * mst["u"] + mst["rho"] * u_temp 774 775 def _set_step_sizes( 776 self, 777 tau: typ.Optional[pxt.Real], 778 sigma: typ.Optional[pxt.Real], 779 gamma: pxt.Real, 780 ) -> tuple[pxt.Real, pxt.Real, pxt.Real]: 781 r""" 782 Set the primal/dual step sizes. 783 784 Returns 785 ------- 786 Tuple[Real, Real, Real] 787 Sensible primal/dual step sizes and value of :math:`\delta`. 788 """ 789 790 if not issubclass(self._K.__class__, pxa.LinOp): 791 msg = ( 792 f"Automatic selection of parameters is only supported in the case in which K is a linear operator. " 793 f"Got operator of type {self._K.__class__}." 794 ) 795 raise ValueError(msg) 796 tau = None if tau == 0 else tau 797 sigma = None if sigma == 0 else sigma 798 799 if (tau is not None) and (sigma is None): 800 assert 0 < tau <= 1 / gamma, "tau must be positive and smaller than 1/gamma." 801 if self._h._name == "NullFunc": 802 sigma = 0 803 else: 804 if math.isfinite(self._K.lipschitz): 805 sigma = 1 / (tau * self._K.lipschitz**2) 806 else: 807 msg = "Please compute the Lipschitz constant of the linear operator K by calling its method 'estimate_lipschitz()'" 808 raise ValueError(msg) 809 elif (tau is None) and (sigma is not None): 810 assert sigma > 0, f"sigma must be positive, got {sigma}." 811 if self._h._name == "NullFunc": 812 tau = 1 / gamma 813 else: 814 if math.isfinite(self._K.lipschitz): 815 tau = min(1 / (sigma * self._K.lipschitz**2), 1 / gamma) 816 else: 817 msg = "Please compute the Lipschitz constant of the linear operator K by calling its method 'estimate_lipschitz()'" 818 raise ValueError(msg) 819 elif (tau is None) and (sigma is None): 820 if self._beta > 0: 821 if self._h._name == "NullFunc": 822 tau = 1 / gamma 823 sigma = 0 824 else: 825 if math.isfinite(self._K.lipschitz): 826 tau, sigma = self._optimize_step_sizes(gamma) 827 else: 828 msg = "Please compute the Lipschitz constant of the linear operator K by calling its method 'estimate_lipschitz()'" 829 raise ValueError(msg) 830 else: 831 if self._h._name == "NullFunc": 832 tau = 1 833 sigma = 0 834 else: 835 if math.isfinite(self._K.lipschitz): 836 tau = sigma = 1 / self._K.lipschitz 837 else: 838 msg = "Please compute the Lipschitz constant of the linear operator K by calling its method 'estimate_lipschitz()'" 839 raise ValueError(msg) 840 delta = 2 if self._beta == 0 else 2 - self._beta * tau / 2 841 return tau, sigma, delta 842 843 def _optimize_step_sizes(self, gamma: pxt.Real) -> pxt.Real: 844 r""" 845 Optimize the primal/dual step sizes. 846 847 Parameters 848 ---------- 849 gamma: Real 850 Gamma parameter. 851 852 Returns 853 ------- 854 Tuple[Real, Real] 855 Sensible primal/dual step sizes. 856 """ 857 import numpy as np 858 from scipy.optimize import linprog 859 860 c = np.array([-1, -1]) 861 A_ub = np.array([[1, 1], [1, 0]]) 862 b_ub = np.array([np.log(0.99) - 2 * np.log(self._K.lipschitz), np.log(1 / gamma)]) 863 A_eq = np.array([[1, -1]]) 864 b_eq = np.array([0]) 865 result = linprog( 866 c=c, 867 A_ub=A_ub, 868 b_ub=b_ub, 869 A_eq=A_eq, 870 b_eq=b_eq, 871 bounds=(None, None), 872 ) 873 if not result.success: 874 warnings.warn("Automatic parameter selection has not converged.", UserWarning) 875 return np.exp(result.x)
876 877
[docs] 878def ChambollePock( 879 g: typ.Optional[pxa.ProxFunc] = None, 880 h: typ.Optional[pxa.ProxFunc] = None, 881 K: typ.Optional[pxa.DiffMap] = None, 882 base: typ.Type[_PrimalDualSplitting] = CondatVu, 883 **kwargs, 884): 885 r""" 886 Chambolle-Pock primal-dual splitting method. 887 888 Parameters 889 ---------- 890 g: :py:class:`~pyxu.abc.ProxFunc`, None 891 Proximable function :math:`\mathcal{G}`. 892 h: :py:class:`~pyxu.abc.ProxFunc`, None 893 Proximable function :math:`\mathcal{H}`. 894 K: :py:class:`~pyxu.abc.DiffMap`, :py:class:`~pyxu.abc.LinOp`, None 895 Differentiable map or linear operator :math:`\mathcal{K}`. 896 base: :py:class:`~pyxu.opt.solver.CondatVu`, :py:class:`~pyxu.opt.solver.PD3O` 897 Specifies the base primal-dual algorithm. 898 (Default = :py:class:`~pyxu.opt.solver.CondatVu`) 899 \*\*kwargs: ~collections.abc.Mapping 900 Other keyword parameters passed on to :py:meth:`pyxu.abc.Solver.__init__`. 901 902 903 The *Chambolle-Pock (CP) primal-dual splitting* method can be used to solve problems of the form: 904 905 .. math:: 906 907 {\min_{\mathbf{x}\in\mathbb{R}^N} \mathcal{G}(\mathbf{x})\;\;+\;\;\mathcal{H}(\mathbf{K} \mathbf{x}),} 908 909 where: 910 911 * :math:`\mathcal{G}:\mathbb{R}^N\rightarrow \mathbb{R}\cup\{+\infty\}` and 912 :math:`\mathcal{H}:\mathbb{R}^M\rightarrow \mathbb{R}\cup\{+\infty\}` are *proper*, *lower semicontinuous* and 913 *convex functions* with *simple proximal operators*. 914 * :math:`\mathcal{K}:\mathbb{R}^N\rightarrow \mathbb{R}^M` is a *differentiable map* (e.g. a *linear operator* 915 :math:`\mathbf{K}`), with **operator norm**: 916 917 .. math:: 918 919 \Vert{\mathcal{K}}\Vert_2=\sup_{\mathbf{x}\in\mathbb{R}^N,\Vert\mathbf{x}\Vert_2=1} \Vert\mathcal{K}(\mathbf{x})\Vert_2. 920 921 Remarks 922 ------- 923 * The problem is *feasible*, i.e. there exists at least one solution. 924 925 * The algorithm is still valid if one of the terms :math:`\mathcal{G}` or :math:`\mathcal{H}` is zero. 926 927 * Automatic selection of parameters is not supported for *non-linear differentiable maps* :math:`\mathcal{K}`. 928 929 * The *Chambolle-Pock (CP) primal-dual splitting* method can be obtained by choosing :math:`\mathcal{F}=0` in 930 :py:class:`~pyxu.opt.solver.CondatVu` or :py:class:`~pyxu.opt.solver.PD3O`. Chambolle and Pock originally 931 introduced the algorithm without relaxation (:math:`\rho=1`) [CPA]_. Relaxed versions have been proposed 932 afterwards [PSA]_. Chambolle-Pock's algorithm is also known as the *Primal-Dual Hybrid Gradient (PDHG)* 933 algorithm. It can be seen as a preconditionned ADMM method [CPA]_. 934 935 Parameters (``fit()``) 936 ---------------------- 937 * **x0** (:py:attr:`~pyxu.info.ptype.NDArray`) 938 -- 939 (..., N) initial point(s) for the primal variable. 940 * **z0** (:py:attr:`~pyxu.info.ptype.NDArray`, :py:obj:`None`) 941 -- 942 (..., M) initial point(s) for the dual variable. 943 If ``None`` (default), then use ``K(x0)`` as the initial point for the dual variable. 944 * **tau** (:py:attr:`~pyxu.info.ptype.Real`, :py:obj:`None`) 945 -- 946 Primal step size. 947 * **sigma** (:py:attr:`~pyxu.info.ptype.Real`, :py:obj:`None`) 948 -- 949 Dual step size. 950 * **rho** (:py:attr:`~pyxu.info.ptype.Real`, :py:obj:`None`) 951 -- 952 Momentum parameter. 953 * **tuning_strategy** (1, 2, 3) 954 -- 955 Strategy to be employed when setting the hyperparameters (default to 1). 956 See `base` for more details. 957 * **\*\*kwargs** (:py:class:`~collections.abc.Mapping`) 958 -- 959 Other keyword parameters passed on to :py:meth:`pyxu.abc.Solver.fit`. 960 961 See Also 962 -------- 963 :py:func:`~pyxu.opt.solver.CP`, 964 :py:class:`~pyxu.opt.solver.CondatVu`, 965 :py:class:`~pyxu.opt.solver.PD3O`, 966 :py:func:`~pyxu.opt.solver.DouglasRachford` 967 """ 968 kwargs.update(log_var=kwargs.get("log_var", ("x", "z"))) 969 obj = base( 970 f=None, 971 g=g, 972 h=h, 973 K=K, 974 **kwargs, 975 ) 976 obj.__repr__ = lambda _: "ChambollePock" 977 return obj
978 979 980CP = ChambollePock #: Alias of :py:func:`~pyxu.opt.solver.ChambollePock`. 981 982
[docs] 983class LorisVerhoeven(PD3O): 984 r""" 985 Loris-Verhoeven splitting algorithm. 986 987 This solver is also accessible via the alias :py:class:`~pyxu.opt.solver.LV`. 988 989 The *Loris-Verhoeven (LV) primal-dual splitting* can be used to solve problems of the form: 990 991 .. math:: 992 993 {\min_{\mathbf{x}\in\mathbb{R}^N} \mathcal{F}(\mathbf{x})\;\;+\;\;\mathcal{H}(\mathbf{K} \mathbf{x}),} 994 995 where: 996 997 * :math:`\mathcal{F}:\mathbb{R}^N\rightarrow \mathbb{R}` is *convex* and *differentiable*, with 998 :math:`\beta`-*Lipschitz continuous* gradient, for some :math:`\beta\in[0,+\infty[`. 999 1000 * :math:`\mathcal{H}:\mathbb{R}^M\rightarrow \mathbb{R}\cup\{+\infty\}` is a *proper*, *lower semicontinuous* and 1001 *convex function* with *simple proximal operator*. 1002 1003 * :math:`\mathcal{K}:\mathbb{R}^N\rightarrow \mathbb{R}^M` is a *differentiable map* (e.g. a *linear operator* 1004 :math:`\mathbf{K}`), with **operator norm**: 1005 1006 .. math:: 1007 1008 \Vert{\mathcal{K}}\Vert_2=\sup_{\mathbf{x}\in\mathbb{R}^N,\Vert\mathbf{x}\Vert_2=1} \Vert\mathcal{K}(\mathbf{x})\Vert_2. 1009 1010 Remarks 1011 ------- 1012 * The problem is *feasible*, i.e. there exists at least one solution. 1013 1014 * The algorithm is still valid if one of the terms :math:`\mathcal{F}` or :math:`\mathcal{H}` is zero. 1015 1016 * Automatic selection of parameters is not supported for *non-linear differentiable maps* :math:`\mathcal{K}`. 1017 1018 * The *Loris-Verhoeven (CP) primal-dual splitting* method can be obtained by choosing :math:`\mathcal{G}=0` in 1019 :py:class:`~pyxu.opt.solver.PD3O`. 1020 1021 * In the specific case where :math:`\mathcal{F}` is *quadratic*, then one can set :math:`\rho \in ]0,\delta[` with 1022 :math:`\delta=2`. (See Theorem 4.3 in [PSA]_.) 1023 1024 Parameters (``__init__()``) 1025 --------------------------- 1026 * **f** (:py:class:`~pyxu.abc.DiffFunc`, :py:obj:`None`) 1027 -- 1028 Differentiable function :math:`\mathcal{F}`. 1029 * **h** (:py:class:`~pyxu.abc.ProxFunc`, :py:obj:`None`) 1030 -- 1031 Proximable function :math:`\mathcal{H}`. 1032 * **K** (:py:class:`~pyxu.abc.DiffMap`, :py:class:`~pyxu.abc.LinOp`, :py:obj:`None`) 1033 -- 1034 Differentiable map or linear operator :math:`\mathcal{K}`. 1035 * **beta** (:py:attr:`~pyxu.info.ptype.Real`, :py:obj:`None`) 1036 -- 1037 Lipschitz constant :math:`\beta` of :math:`\nabla\mathcal{F}`. 1038 If not provided, it will be automatically estimated. 1039 * **\*\*kwargs** (:py:class:`~collections.abc.Mapping`) 1040 -- 1041 Other keyword parameters passed on to :py:meth:`pyxu.abc.Solver.__init__`. 1042 1043 Parameters (``fit()``) 1044 ---------------------- 1045 * **x0** (:py:attr:`~pyxu.info.ptype.NDArray`) 1046 -- 1047 (..., N) initial point(s) for the primal variable. 1048 * **z0** (:py:attr:`~pyxu.info.ptype.NDArray`, :py:obj:`None`) 1049 -- 1050 (..., M) initial point(s) for the dual variable. 1051 If ``None`` (default), then use ``K(x0)`` as the initial point for the dual variable. 1052 * **tau** (:py:attr:`~pyxu.info.ptype.Real`, :py:obj:`None`) 1053 -- 1054 Primal step size. 1055 * **sigma** (:py:attr:`~pyxu.info.ptype.Real`, :py:obj:`None`) 1056 -- 1057 Dual step size. 1058 * **rho** (:py:attr:`~pyxu.info.ptype.Real`, :py:obj:`None`) 1059 -- 1060 Momentum parameter. 1061 * **tuning_strategy** (1, 2, 3) 1062 -- 1063 Strategy to be employed when setting the hyperparameters (default to 1). 1064 See :py:class:`~pyxu.opt.solver.PD3O` for more details. 1065 * **\*\*kwargs** (:py:class:`~collections.abc.Mapping`) 1066 -- 1067 Other keyword parameters passed on to :py:meth:`pyxu.abc.Solver.fit`. 1068 1069 See Also 1070 -------- 1071 :py:class:`~pyxu.opt.solver.PD3O`, 1072 :py:class:`~pyxu.opt.solver.DavisYin`, 1073 :py:class:`~pyxu.opt.solver.PGD`, 1074 :py:func:`~pyxu.opt.solver.ChambollePock`, 1075 :py:func:`~pyxu.opt.solver.DouglasRachford` 1076 """ 1077 1078 def __init__( 1079 self, 1080 f: typ.Optional[pxa.DiffFunc] = None, 1081 h: typ.Optional[pxa.ProxFunc] = None, 1082 K: typ.Optional[pxa.DiffMap] = None, 1083 **kwargs, 1084 ): 1085 kwargs.update(log_var=kwargs.get("log_var", ("x", "z"))) 1086 super().__init__( 1087 f=f, 1088 g=None, 1089 h=h, 1090 K=K, 1091 **kwargs, 1092 ) 1093 1094 def _set_step_sizes( 1095 self, 1096 tau: typ.Optional[pxt.Real], 1097 sigma: typ.Optional[pxt.Real], 1098 gamma: typ.Optional[pxt.Real], 1099 ) -> tuple[pxt.Real, pxt.Real, pxt.Real]: 1100 r""" 1101 Set the primal/dual step sizes. 1102 1103 Returns 1104 ------- 1105 Tuple[Real, Real, Real] 1106 Sensible primal/dual step sizes and value of the parameter :math:`delta`. 1107 """ 1108 tau, sigma, _ = super()._set_step_sizes(tau=tau, sigma=sigma, gamma=gamma) 1109 delta = 2 if (self._beta == 0 or isinstance(self._f, pxa.QuadraticFunc)) else 2 - self._beta / (2 * gamma) 1110 return tau, sigma, delta
1111 1112 1113LV = LorisVerhoeven #: Alias of :py:class:`~pyxu.opt.solver.LorisVerhoeven`. 1114 1115
[docs] 1116class DavisYin(PD3O): 1117 r""" 1118 Davis-Yin primal-dual splitting method. 1119 1120 This solver is also accessible via the alias :py:class:`~pyxu.opt.solver.DY`. 1121 1122 The *Davis-Yin (DY) primal-dual splitting* method can be used to solve problems of the form: 1123 1124 .. math:: 1125 1126 {\min_{\mathbf{x}\in\mathbb{R}^N} \;\mathcal{F}(\mathbf{x})\;\;+\;\;\mathcal{G}(\mathbf{x})\;\;+\;\;\mathcal{H}(\mathbf{x}),} 1127 1128 where: 1129 1130 * :math:`\mathcal{F}:\mathbb{R}^N\rightarrow \mathbb{R}` is *convex* and *differentiable*, with 1131 :math:`\beta`-*Lipschitz continuous* gradient, for some :math:`\beta\in[0,+\infty[`. 1132 1133 * :math:`\mathcal{G}:\mathbb{R}^N\rightarrow \mathbb{R}\cup\{+\infty\}` and 1134 :math:`\mathcal{H}:\mathbb{R}^N\rightarrow \mathbb{R}\cup\{+\infty\}` are *proper*, *lower semicontinuous* and 1135 *convex functions* with *simple proximal operators*. 1136 1137 Remarks 1138 ------- 1139 * The problem is *feasible*, i.e. there exists at least one solution. 1140 1141 * The algorithm is still valid if one of the terms :math:`\mathcal{G}` or :math:`\mathcal{H}` is zero. 1142 1143 * The *Davis-Yin primal-dual splitting* method can be obtained by choosing :math:`\mathcal{K}=\mathbf{I}` (identity) 1144 and :math:`\tau=1/\sigma` in :py:class:`~pyxu.opt.solver.PD3O`, provided a suitable change of variable [PSA]_. 1145 1146 Parameters (``__init__()``) 1147 --------------------------- 1148 * **f** (:py:class:`~pyxu.abc.DiffFunc`) 1149 -- 1150 Differentiable function :math:`\mathcal{F}`. 1151 * **g** (:py:class:`~pyxu.abc.ProxFunc`, :py:obj:`None`) 1152 -- 1153 Proximable function :math:`\mathcal{G}`. 1154 * **h** (:py:class:`~pyxu.abc.ProxFunc`, :py:obj:`None`) 1155 -- 1156 Proximable function :math:`\mathcal{H}`. 1157 * **beta** (:py:attr:`~pyxu.info.ptype.Real`, :py:obj:`None`) 1158 -- 1159 Lipschitz constant :math:`\beta` of :math:`\nabla\mathcal{F}`. 1160 If not provided, it will be automatically estimated. 1161 * **\*\*kwargs** (:py:class:`~collections.abc.Mapping`) 1162 -- 1163 Other keyword parameters passed on to :py:meth:`pyxu.abc.Solver.__init__`. 1164 1165 Parameters (``fit()``) 1166 ---------------------- 1167 * **x0** (:py:attr:`~pyxu.info.ptype.NDArray`) 1168 -- 1169 (..., N) initial point(s) for the primal variable. 1170 * **z0** (:py:attr:`~pyxu.info.ptype.NDArray`, :py:obj:`None`) 1171 -- 1172 (..., N) initial point(s) for the dual variable. 1173 If ``None`` (default), then use ``x0`` as the initial point for the dual variable. 1174 * **tau** (:py:attr:`~pyxu.info.ptype.Real`, :py:obj:`None`) 1175 -- 1176 Primal step size. 1177 * **sigma** (:py:attr:`~pyxu.info.ptype.Real`, :py:obj:`None`) 1178 -- 1179 Dual step size. 1180 * **rho** (:py:attr:`~pyxu.info.ptype.Real`, :py:obj:`None`) 1181 -- 1182 Momentum parameter. 1183 * **tuning_strategy** (1, 2, 3) 1184 -- 1185 Strategy to be employed when setting the hyperparameters (default to 1). 1186 See :py:class:`~pyxu.opt.solver.PD3O` for more details. 1187 * **\*\*kwargs** (:py:class:`~collections.abc.Mapping`) 1188 -- 1189 Other keyword parameters passed on to :py:meth:`pyxu.abc.Solver.fit`. 1190 1191 See Also 1192 -------- 1193 :py:class:`~pyxu.opt.solver.PD3O`, 1194 :py:class:`~pyxu.opt.solver.LorisVerhoeven`, 1195 :py:class:`~pyxu.opt.solver.PGD`, 1196 :py:func:`~pyxu.opt.solver.ChambollePock`, 1197 :py:func:`~pyxu.opt.solver.DouglasRachford` 1198 """ 1199 1200 def __init__( 1201 self, 1202 f: typ.Optional[pxa.DiffFunc], 1203 g: typ.Optional[pxa.ProxFunc] = None, 1204 h: typ.Optional[pxa.ProxFunc] = None, 1205 **kwargs, 1206 ): 1207 kwargs.update(log_var=kwargs.get("log_var", ("x", "z"))) 1208 super().__init__( 1209 f=f, 1210 g=g, 1211 h=h, 1212 K=None, 1213 **kwargs, 1214 ) 1215 1216 def _set_step_sizes( 1217 self, 1218 tau: typ.Optional[pxt.Real], 1219 sigma: typ.Optional[pxt.Real], 1220 gamma: pxt.Real, 1221 ) -> tuple[pxt.Real, pxt.Real, pxt.Real]: 1222 r""" 1223 Set the primal/dual step sizes. 1224 1225 Returns 1226 ------- 1227 Tuple[Real, Real, Real] 1228 Sensible primal/dual step sizes and value of :math:`\delta`. 1229 """ 1230 if tau is not None: 1231 assert 0 < tau <= 1 / gamma, "tau must be positive and smaller than 1/gamma." 1232 else: 1233 tau = 1.0 if self._beta == 0 else 1 / gamma 1234 1235 delta = 2.0 if self._beta == 0 else 2 - self._beta * tau / 2 1236 1237 return tau, 1 / tau, delta
1238 1239 1240DY = DavisYin #: Alias of :py:class:`~pyxu.opt.solver.DavisYin`. 1241 1242
[docs] 1243def DouglasRachford( 1244 g: typ.Optional[pxa.ProxFunc] = None, 1245 h: typ.Optional[pxa.ProxFunc] = None, 1246 base: typ.Type[_PrimalDualSplitting] = CondatVu, 1247 **kwargs, 1248): 1249 r""" 1250 Douglas-Rachford splitting algorithm. 1251 1252 Parameters 1253 ---------- 1254 g: :py:class:`~pyxu.abc.ProxFunc`, None 1255 Proximable function :math:`\mathcal{G}`. 1256 h: :py:class:`~pyxu.abc.ProxFunc`, None 1257 Proximable function :math:`\mathcal{H}`. 1258 base: :py:class:`~pyxu.opt.solver.CondatVu`, :py:class:`~pyxu.opt.solver.PD3O` 1259 Specifies the base primal-dual algorithm. 1260 (Default = :py:class:`~pyxu.opt.solver.CondatVu`) 1261 \*\*kwargs: ~collections.abc.Mapping 1262 Other keyword parameters passed on to :py:meth:`pyxu.abc.Solver.__init__`. 1263 1264 1265 The *Douglas-Rachford (DR) primal-dual splitting* method can be used to solve problems of the form: 1266 1267 .. math:: 1268 1269 {\min_{\mathbf{x}\in\mathbb{R}^N} \mathcal{G}(\mathbf{x})\;\;+\;\;\mathcal{H}(\mathbf{x}),} 1270 1271 where :math:`\mathcal{G}:\mathbb{R}^N\rightarrow \mathbb{R}\cup\{+\infty\}` and 1272 :math:`\mathcal{H}:\mathbb{R}^N\rightarrow \mathbb{R}\cup\{+\infty\}` are *proper*, *lower semicontinuous* and 1273 *convex functions* with *simple proximal operators*. 1274 1275 Remarks 1276 ------- 1277 * The problem is *feasible*, i.e. there exists at least one solution. 1278 1279 * The algorithm is still valid if one of the terms :math:`\mathcal{G}` or :math:`\mathcal{H}` is zero. 1280 1281 * The *Douglas-Rachford (DR) primal-dual splitting* method can be obtained by choosing :math:`\mathcal{F}=0`, 1282 :math:`\mathbf{K}=\mathbf{Id}` and :math:`\tau=1/\sigma` in :py:class:`~pyxu.opt.solver.CondatVu` or 1283 :py:class:`~pyxu.opt.solver.PD3O`. Douglas and Rachford originally introduced the algorithm without relaxation 1284 (:math:`\rho=1`), but relaxed versions have been proposed afterwards [PSA]_. When :math:`\rho=1`, 1285 Douglas-Rachford's algorithm is *functionally equivalent* to ADMM (up to a change of variable, see [PSA]_ for a 1286 derivation). 1287 1288 Parameters (``fit()``) 1289 ---------------------- 1290 * **x0** (:py:attr:`~pyxu.info.ptype.NDArray`) 1291 -- 1292 (..., N) initial point(s) for the primal variable. 1293 * **z0** (:py:attr:`~pyxu.info.ptype.NDArray`, :py:obj:`None`) 1294 -- 1295 (..., N) initial point(s) for the dual variable. 1296 If ``None`` (default), then use ``x0`` as the initial point for the dual variable. 1297 * **tau** (:py:attr:`~pyxu.info.ptype.Real`, :py:obj:`None`) 1298 -- 1299 Primal step size. Defaults to 1. 1300 * **\*\*kwargs** (:py:class:`~collections.abc.Mapping`) 1301 -- 1302 Other keyword parameters passed on to :py:meth:`pyxu.abc.Solver.fit`. 1303 1304 See Also 1305 -------- 1306 :py:class:`~pyxu.opt.solver.CondatVu`, 1307 :py:class:`~pyxu.opt.solver.PD3O`, 1308 :py:func:`~pyxu.opt.solver.ChambollePock`, 1309 :py:func:`~pyxu.opt.solver.ForwardBackward` 1310 """ 1311 kwargs.update(log_var=kwargs.get("log_var", ("x", "z"))) 1312 obj = base(f=None, g=g, h=h, K=None, **kwargs) 1313 obj.__repr__ = lambda _: "DouglasRachford" 1314 1315 def _set_step_sizes_custom( 1316 obj: typ.Type[_PrimalDualSplitting], 1317 tau: typ.Optional[pxt.Real], 1318 sigma: typ.Optional[pxt.Real], 1319 gamma: pxt.Real, 1320 ) -> tuple[pxt.Real, pxt.Real, pxt.Real]: 1321 tau = 1.0 if tau is None else tau 1322 delta = 2.0 1323 return tau, 1 / tau, delta 1324 1325 obj._set_step_sizes = types.MethodType(_set_step_sizes_custom, obj) 1326 return obj
1327 1328 1329DR = DouglasRachford #: Alias of :py:func:`~pyxu.opt.solver.DouglasRachford`. 1330 1331
[docs] 1332class ADMM(_PDS): 1333 r""" 1334 Alternating Direction Method of Multipliers. 1335 1336 The *Alternating Direction Method of Multipliers (ADMM)* can be used to solve problems of the form: 1337 1338 .. math:: 1339 1340 \min_{\mathbf{x}\in\mathbb{R}^N} \quad \mathcal{F}(\mathbf{x})\;\;+\;\;\mathcal{H}(\mathbf{K}\mathbf{x}), 1341 1342 where (see below for additional details on the assumptions): 1343 1344 * :math:`\mathcal{F}:\mathbb{R}^N\rightarrow \mathbb{R}\cup\{+\infty\}` is a *proper*, *lower semicontinuous* and 1345 *convex functional* potentially with *simple proximal operator* or *Lipschitz-differentiable*, 1346 * :math:`\mathcal{H}:\mathbb{R}^M\rightarrow \mathbb{R}\cup\{+\infty\}` is a *proper*, *lower semicontinuous* and 1347 *convex functional* with *simple proximal operator*, 1348 * :math:`\mathbf{K}:\mathbb{R}^N\rightarrow \mathbb{R}^M` is a *linear operator* with **operator norm** 1349 :math:`\Vert{\mathbf{K}}\Vert_2`. 1350 1351 Remarks 1352 ------- 1353 * The problem is *feasible*, i.e. there exists at least one solution. 1354 1355 * The algorithm is still valid if either :math:`\mathcal{F}` or :math:`\mathcal{H}` is zero. 1356 1357 * When :math:`\mathbf{K} = \mathbf{I}_{N}`, ADMM is equivalent to the :py:func:`~pyxu.opt.solver.DouglasRachford` 1358 method (up to a change of variable, see [PSA]_ for a derivation). 1359 1360 * This is an implementation of the algorithm described in Section 5.4 of [PSA]_, which handles the non-smooth 1361 composite term :math:`\mathcal{H}(\mathbf{K}\mathbf{x})` by means of a change of variable and an infimal 1362 postcomposition trick. The update equation of this algorithm involves the following 1363 :math:`\mathbf{x}`-minimization step: 1364 1365 .. math:: 1366 :label: eq:x_minimization 1367 1368 \mathcal{V} = \operatorname*{arg\,min}_{\mathbf{x} \in \mathbb{R}^N} \quad \mathcal{F}(\mathbf{x}) + \frac1{2 \tau} 1369 \Vert \mathbf{K} \mathbf{x} - \mathbf{a} \Vert_2^2, 1370 1371 where :math:`\tau` is the primal step size and :math:`\mathbf{a} \in \mathbb{R}^M` is an iteration-dependant 1372 vector. 1373 1374 The following cases are covered in this implementation: 1375 1376 - :math:`\mathbf{K}` is ``None`` (i.e. the identity operator) and :math:`\mathcal{F}` is a 1377 :py:class:`~pyxu.abc.ProxFunc`. Then, :math:numref:`eq:x_minimization` has a single solution provided by 1378 :math:`\operatorname*{prox}_{\tau \mathcal{F}}(\mathbf{a})`. This yields the *classical ADMM algorithm* 1379 described in Section 5.3 of [PSA]_ (i.e. without the postcomposition trick). 1380 1381 - :math:`\mathcal{F}` is a :py:class:`~pyxu.abc.QuadraticFunc`, i.e. :math:`\mathcal{F}(\mathbf{x})=\frac{1}{2} 1382 \langle\mathbf{x}, \mathbf{Q}\mathbf{x}\rangle + \mathbf{c}^T\mathbf{x} + t`. Then the unique solution to 1383 :math:numref:`eq:x_minimization` is obtained by solving a linear system of the form: 1384 1385 .. math:: 1386 :label: eq:linear_system 1387 1388 \Big( \mathbf{Q} + \frac1\tau \mathbf{K}^* \mathbf{K} \Big) \mathbf{x} = 1389 \frac1\tau \mathbf{K}^\ast\mathbf{a}-\mathbf{c}, \qquad \mathbf{x} \in \mathbb{R}^N. 1390 1391 This linear system is solved via a sub-iterative :py:class:`~pyxu.opt.solver.CG` algorithm involving the 1392 repeated application of :math:`\mathbf{Q}` and :math:`\mathbf{K}^{*}\mathbf{K}`. This sub-iterative scheme may 1393 be computationally intensive if these operators do not admit fast matrix-free implementations. 1394 1395 - :math:`\mathcal{F}` is a :py:class:`~pyxu.abc.DiffFunc`. Then, :math:numref:`eq:x_minimization` is solved via a 1396 sub-iterative :py:class:`~pyxu.opt.solver.NLCG` algorithm involving repeated evaluation of 1397 :math:`\nabla\mathcal{F}` and :math:`\mathbf{K}^{*}\mathbf{K}`. This sub-iterative scheme may be costly if 1398 these operators cannot be evaluated with fast algorithms. In this scenario, the use of multiple initial points 1399 in :py:meth:`~pyxu.abc.Solver.fit` is not supported. 1400 1401 The user may also provide a *custom* callable solver :math:`s: \mathbb{R}^M \times \mathbb{R} \to \mathbb{R}^N`, 1402 taking as input :math:`(\mathbf{a}, \tau)` and solving :math:numref:`eq:x_minimization`, i.e. :math:`s(\mathbf{a}, 1403 \tau) \in \mathcal{V}`. (See example below.) If :py:class:`~pyxu.opt.solver.ADMM` is initialized with such a 1404 solver, then the latter is used to solve :math:numref:`eq:x_minimization` regardless of whether one of the 1405 above-mentioned cases is met. 1406 1407 * Unlike traditional implementations of ADMM, :py:class:`~pyxu.opt.solver.ADMM` supports relaxation, i.e. 1408 :math:`\rho\neq 1`. 1409 1410 Parameters (``__init__()``) 1411 --------------------------- 1412 * **f** (:py:class:`~pyxu.abc.DiffFunc`, :py:class:`~pyxu.abc.ProxFunc`, :py:obj:`None`) 1413 -- 1414 Differentiable or proximable function :math:`\mathcal{F}`. 1415 * **h** (:py:class:`~pyxu.abc.ProxFunc`, :py:obj:`None`) 1416 -- 1417 Proximable function :math:`\mathcal{H}`. 1418 * **K** (:py:class:`~pyxu.abc.LinOp`, :py:obj:`None`) 1419 -- 1420 Linear operator :math:`\mathbf{K}`. 1421 * **solver** (:py:class:`~collections.abc.Callable`, :py:obj:`None`) 1422 -- 1423 Custom callable to solve the :math:`\mathbf{x}`-minimization step :math:numref:`eq:x_minimization`. 1424 1425 If provided, `solver` must have the `NumPy signature 1426 <https://numpy.org/neps/nep-0020-gufunc-signature-enhancement.html>`_ ``(n), (1) -> (n)``. 1427 * **solver_kwargs** (:py:class:`~collections.abc.Mapping`) 1428 -- 1429 Keyword parameters passed to the ``__init__()`` method of sub-iterative :py:class:`~pyxu.opt.solver.CG` or 1430 :py:class:`~pyxu.opt.solver.NLCG` solvers. 1431 1432 `solver_kwargs` is ignored if `solver` provided. 1433 * **\*\*kwargs** (:py:class:`~collections.abc.Mapping`) 1434 -- 1435 Other keyword parameters passed on to :py:meth:`pyxu.abc.Solver.__init__`. 1436 1437 Parameters (``fit()``) 1438 ---------------------- 1439 * **x0** (:py:attr:`~pyxu.info.ptype.NDArray`) 1440 -- 1441 (..., N) initial point(s) for the primal variable. 1442 * **z0** (:py:attr:`~pyxu.info.ptype.NDArray`, :py:obj:`None`) 1443 -- 1444 (..., M) initial point(s) for the dual variable. 1445 If ``None`` (default), then use ``K(x0)`` as the initial point for the dual variable. 1446 * **tau** (:py:attr:`~pyxu.info.ptype.Real`, :py:obj:`None`) 1447 -- 1448 Primal step size. 1449 * **rho** (:py:attr:`~pyxu.info.ptype.Real`, :py:obj:`None`) 1450 -- 1451 Momentum parameter for relaxation. 1452 * **tuning_strategy** (1, 2, 3) 1453 -- 1454 Strategy to be employed when setting the hyperparameters (default to 1). 1455 See base class for more details. 1456 * **solver_kwargs** (:py:class:`~collections.abc.Mapping`) 1457 -- 1458 Keyword parameters passed to the ``fit()`` method of sub-iterative 1459 :py:class:`~pyxu.opt.solver.CG` or :py:class:`~pyxu.opt.solver.NLCG` solvers. 1460 1461 `solver_kwargs` is ignored if `solver` was provided in ``__init__()``. 1462 * **\*\*kwargs** (:py:class:`~collections.abc.Mapping`) 1463 -- 1464 Other keyword parameters passed on to :py:meth:`pyxu.abc.Solver.fit`. 1465 1466 Warning 1467 ------- 1468 ``tuning_strategy`` docstring says to look at base class for details, but nothing mentioned there! 1469 1470 Example 1471 ------- 1472 Consider the following optimization problem: 1473 1474 .. math:: 1475 1476 \min_{\mathbf{x}\in\mathbb{R}^N}\frac{1}{2}\left\|\mathbf{y}-\mathbf{G}\mathbf{x}\right\|_2^2\quad+\quad\lambda \|\mathbf{D}\mathbf{x}\|_1, 1477 1478 with :math:`\mathbf{D}\in\mathbb{R}^{N\times N}` the discrete second-order derivative operator, 1479 :math:`\mathbf{G}\in\mathbb{R}^{M\times N}, \, \mathbf{y}\in\mathbb{R}^M, \lambda>0.` This problem can be solved via 1480 ADMM with :math:`\mathcal{F}(\mathbf{x})= \frac{1}{2}\left\|\mathbf{y}-\mathbf{G}\mathbf{x}\right\|_2^2`, 1481 :math:`\mathcal{H}(\mathbf{x})=\lambda \|\mathbf{D}\mathbf{x}\|_1,` and :math:`\mathbf{K}=\mathbf{D}`. The 1482 functional :math:`\mathcal{F}` being quadratic, the :math:`\mathbf{x}`-minimization step consists in solving a 1483 linear system of the form :math:numref:`eq:linear_system`. Here, we demonstrate how to provide a custom solver, 1484 which consists in applying a matrix inverse, for this step. Otherwise, a sub-iterative 1485 :py:class:`~pyxu.opt.solver.CG` algorithm would have been used automatically instead. 1486 1487 .. plot:: 1488 1489 import matplotlib.pyplot as plt 1490 import numpy as np 1491 import pyxu.abc as pxa 1492 import pyxu.operator as pxo 1493 import scipy as sp 1494 from pyxu.opt.solver import ADMM 1495 1496 N = 100 # Dimension of the problem 1497 1498 # Generate piecewise-linear ground truth 1499 x_gt = np.array([10, 25, 60, 90]) # Knot locations 1500 a_gt = np.array([2, -4, 3, -2]) # Amplitudes of the knots 1501 gt = np.zeros(N) # Ground-truth signal 1502 for n in range(len(x_gt)): 1503 gt[x_gt[n] :] += a_gt[n] * np.arange(N - x_gt[n]) / N 1504 1505 # Generate data (noisy samples at random locations) 1506 M = 20 # Number of data points 1507 rng = np.random.default_rng(seed=0) 1508 x_samp = rng.choice(np.arange(N // M), size=M) + np.arange(N, step=N // M) # sampling locations 1509 sigma = 2 * 1e-2 # noise variance 1510 y = gt[x_samp] + sigma * rng.standard_normal(size=M) # noisy data points 1511 1512 # Data-fidelity term 1513 subsamp_mat = sp.sparse.lil_matrix((M, N)) 1514 for i in range(M): 1515 subsamp_mat[i, x_samp[i]] = 1 1516 G = pxa.LinOp.from_array(subsamp_mat.tocsr()) 1517 F = 1 / 2 * pxo.SquaredL2Norm(dim=y.size).argshift(-y) * G 1518 F.diff_lipschitz = F.estimate_diff_lipschitz(method="svd") 1519 1520 # Regularization term (promotes sparse second derivatives) 1521 deriv_mat = sp.sparse.diags(diagonals=[1, -2, 1], offsets=[0, 1, 2], shape=(N - 2, N)) 1522 D = pxa.LinOp.from_array(deriv_mat) 1523 _lambda = 1e-1 # regularization parameter 1524 H = _lambda * pxo.L1Norm(dim=D.codim) 1525 1526 # Solver for ADMM 1527 tau = 1 / _lambda # internal ADMM parameter 1528 # Inverse operator to solve the linear system 1529 A_inv = sp.linalg.inv(G.gram().asarray() + (1 / tau) * D.gram().asarray()) 1530 1531 1532 def solver_ADMM(arr, tau): 1533 b = (1 / tau) * D.adjoint(arr) + G.adjoint(y) 1534 return A_inv @ b.squeeze() 1535 1536 1537 # Solve optimization problem 1538 admm = ADMM(f=F, h=H, K=D, solver=solver_ADMM,show_progress=False) # with solver 1539 admm.fit(x0=np.zeros(N), tau=tau) 1540 x_opt = admm.solution() # reconstructed signal 1541 1542 # Plots 1543 plt.figure() 1544 plt.plot(np.arange(N), gt, label="Ground truth") 1545 plt.plot(x_samp, y, "kx", label="Noisy data points") 1546 plt.plot(np.arange(N), x_opt, label="Reconstructed signal") 1547 plt.legend() 1548 1549 See Also 1550 -------- 1551 :py:class:`~pyxu.opt.solver.CondatVu`, 1552 :py:class:`~pyxu.opt.solver.PD3O`, 1553 :py:class:`~pyxu.opt.solver.PGD`, 1554 :py:func:`~pyxu.opt.solver.ChambollePock`, 1555 :py:func:`~pyxu.opt.solver.DouglasRachford` 1556 """ 1557 1558 def __init__( 1559 self, 1560 f: typ.Optional[pxa.Func] = None, 1561 h: typ.Optional[pxa.ProxFunc] = None, 1562 K: typ.Optional[pxa.DiffMap] = None, 1563 solver: typ.Callable[[pxt.NDArray, float], pxt.NDArray] = None, 1564 solver_kwargs: typ.Optional[dict] = None, 1565 **kwargs, 1566 ): 1567 kwargs.update(log_var=kwargs.get("log_var", ("x", "u", "z"))) 1568 1569 x_update_solver = "custom" # Method for the x-minimization step 1570 g = None 1571 if solver is None: 1572 if f is None: 1573 if h is None: 1574 msg = " ".join( 1575 [ 1576 "Cannot minimize always-0 functional.", 1577 "At least one of Parameter[f, h] must be specified.", 1578 ] 1579 ) 1580 raise ValueError(msg) 1581 if K is None: # Prox scenario (classical ADMM) 1582 f = pxo.NullFunc(h.dim_shape) 1583 else: # Sub-iterative CG scenario 1584 f = pxa.QuadraticFunc( 1585 dim_shape=h.dim_shape, 1586 codim_shape=(1,), 1587 Q=pxo.NullOp(dim_shape=h.dim_shape, codim_shape=h.dim_shape), 1588 c=pxo.NullFunc(dim_shape=h.dim_shape), 1589 ) 1590 if f.has(pxa.Property.PROXIMABLE) and K is None: 1591 x_update_solver = "prox" 1592 g = f # In this case, f corresponds to g in the _PDS terminology 1593 f = None 1594 elif isinstance(f, pxa.QuadraticFunc): 1595 x_update_solver = "cg" 1596 self._K_gram = K.gram() 1597 warnings.warn( 1598 "A sub-iterative conjugate gradient algorithm is used for the x-minimization step " 1599 "of ADMM. This might be computationally expensive.", 1600 UserWarning, 1601 ) 1602 elif f.has(pxa.Property.DIFFERENTIABLE_FUNCTION): 1603 x_update_solver = "nlcg" 1604 self._K_gram = K.gram() 1605 warnings.warn( 1606 "A sub-iterative non-linear conjugate gradient algorithm is used for the " 1607 "x-minimization step of ADMM. This might be computationally expensive.", 1608 UserWarning, 1609 ) 1610 else: 1611 raise TypeError( 1612 "Unsupported scenario: f must either be a ProxFunc (in which case K must be None), a " 1613 "QuadraticFunc, or a DiffMap. If neither of these scenarios hold, a solver must be provided for " 1614 "the x-minimization step of ADMM." 1615 ) 1616 self._solver = solver 1617 self._x_update_solver = x_update_solver 1618 self._init_kwargs = solver_kwargs if solver_kwargs is not None else dict(show_progress=False) 1619 1620 super().__init__( 1621 f=f, 1622 g=g, 1623 h=h, 1624 K=K, 1625 **kwargs, 1626 ) 1627 1628 def m_init( 1629 self, 1630 x0: pxt.NDArray, 1631 z0: typ.Optional[pxt.NDArray] = None, 1632 tau: typ.Optional[pxt.Real] = None, 1633 rho: typ.Optional[pxt.Real] = None, 1634 tuning_strategy: _PDS.TuningSpec = 1, 1635 solver_kwargs: typ.Optional[dict] = None, 1636 **kwargs, 1637 ): 1638 super().m_init( 1639 x0=x0, 1640 z0=z0, 1641 tau=tau, 1642 sigma=None, 1643 rho=rho, 1644 tuning_strategy=tuning_strategy, 1645 ) 1646 mst = self._mstate # shorthand 1647 mst["u"] = self._K(x0) 1648 1649 # Fit kwargs of sub-iterative solver 1650 if solver_kwargs is None: 1651 solver_kwargs = dict() 1652 self._fit_kwargs = solver_kwargs 1653 1654 def m_step(self): 1655 # Algorithm (145) in [PSA]. Paper -> code correspondence: L -> K, K -> -Id, c -> 0, y -> u, v -> z, g -> h 1656 mst = self._mstate 1657 mst["x"] = self._x_update(mst["u"] - mst["z"], tau=mst["tau"]) 1658 z_temp = mst["z"] + self._K(mst["x"]) - mst["u"] 1659 if not self._h._name == "NullFunc": 1660 mst["u"] = self._h.prox(self._K(mst["x"]) + z_temp, tau=mst["tau"]) 1661 mst["z"] = z_temp + (mst["rho"] - 1) * (self._K(mst["x"]) - mst["u"]) 1662 1663 def _x_update(self, arr: pxt.NDArray, tau: float) -> pxt.NDArray: 1664 if self._x_update_solver == "custom": 1665 return self._solver(arr, tau) 1666 elif self._x_update_solver == "prox": 1667 return self._g.prox(arr, tau=tau) 1668 elif self._x_update_solver == "cg": 1669 from pyxu.opt.solver import CG 1670 1671 b = (1 / tau) * self._K.adjoint(arr) - self._f._c.grad(arr) 1672 A = self._f._Q + (1 / tau) * self._K_gram 1673 slvr = CG(A=A, **self._init_kwargs) 1674 slvr.fit(b=b, x0=self._mstate["x"].copy(), **self._fit_kwargs) # Initialize CG with previous iterate 1675 return slvr.solution() 1676 elif self._x_update_solver == "nlcg": 1677 from pyxu.opt.solver import NLCG 1678 1679 c = pxa.LinFunc.from_array(-self._K.adjoint(arr)) 1680 quad_func = pxa.QuadraticFunc(dim_shape=self._f.dim_shape, codim_shape=(1,), Q=2 * self._K_gram, c=c) 1681 slvr = NLCG(f=self._f + (1 / tau) * quad_func, **self._init_kwargs) 1682 slvr.fit(x0=self._mstate["x"].copy(), **self._fit_kwargs) # Initialize NLCG with previous iterate 1683 return slvr.solution() 1684 1685 def solution(self, which: typ.Literal["primal", "primal_h", "dual"] = "primal") -> pxt.NDArray: 1686 data, _ = self.stats() 1687 if which == "primal": 1688 assert "x" in data.keys(), "Primal variable x in dom(g) was not logged (declare it in log_var to log it)." 1689 return data.get("x") 1690 elif which == "primal_h": 1691 assert "u" in data.keys(), "Primal variable u in dom(h) was not logged (declare it in log_var to log it)." 1692 return data.get("u") 1693 elif which == "dual": 1694 assert "z" in data.keys(), "Dual variable z was not logged (declare it in log_var to log it)." 1695 return data.get("z") / self._mstate["tau"] 1696 else: 1697 raise ValueError(f"Parameter which must be one of ['primal', 'primal_h', 'dual'] got: {which}.") 1698 1699 def _set_step_sizes( 1700 self, 1701 tau: typ.Optional[pxt.Real], 1702 sigma: typ.Optional[pxt.Real], 1703 gamma: pxt.Real, 1704 ) -> tuple[pxt.Real, pxt.Real, pxt.Real]: 1705 if tau is not None: 1706 assert tau > 0, f"Parameter tau must be positive, got {tau}." 1707 else: 1708 tau = 1.0 1709 delta = 2.0 1710 return tau, 1 / tau, delta
1711 1712
[docs] 1713class ForwardBackward(CondatVu): 1714 r""" 1715 Forward-backward splitting algorithm. 1716 1717 This solver is also accessible via the alias :py:class:`~pyxu.opt.solver.FB`. 1718 1719 The *Forward-backward (FB) splitting* method can be used to solve problems of the form: 1720 1721 .. math:: 1722 1723 {\min_{\mathbf{x}\in\mathbb{R}^N} \;\mathcal{F}(\mathbf{x})\;\;+\;\;\mathcal{G}(\mathbf{x}),} 1724 1725 where: 1726 1727 * :math:`\mathcal{F}:\mathbb{R}^N\rightarrow \mathbb{R}` is *convex* and *differentiable*, with 1728 :math:`\beta`-*Lipschitz continuous* gradient, for some :math:`\beta\in[0,+\infty[`. 1729 * :math:`\mathcal{G}:\mathbb{R}^N\rightarrow \mathbb{R}\cup\{+\infty\}` is *proper*, *lower semicontinuous* and 1730 *convex function* with *simple proximal operator*. 1731 1732 Remarks 1733 ------- 1734 * The problem is *feasible*, i.e. there exists at least one solution. 1735 1736 * The algorithm is still valid if one of the terms :math:`\mathcal{F}` or :math:`\mathcal{G}` is zero. 1737 1738 * The *Forward-backward (FB) primal-dual splitting* method can be obtained by choosing :math:`\mathcal{H}=0` in 1739 :py:class:`~pyxu.opt.solver.CondatVu`. Mercier originally introduced the algorithm without relaxation 1740 (:math:`\rho=1`) [FB]_. Relaxed versions have been proposed afterwards [PSA]_. The Forward-backward algorithm is 1741 also known as the *Proximal Gradient Descent (PGD)* algorithm. For the accelerated version of PGD, use 1742 :py:class:`~pyxu.opt.solver.PGD`. 1743 1744 Parameters (``__init__()``) 1745 --------------------------- 1746 * **f** (:py:class:`~pyxu.abc.DiffFunc`, :py:obj:`None`) 1747 -- 1748 Differentiable function :math:`\mathcal{F}`. 1749 * **g** (:py:class:`~pyxu.abc.ProxFunc`, :py:obj:`None`) 1750 -- 1751 Proximable function :math:`\mathcal{G}`. 1752 * **beta** (:py:attr:`~pyxu.info.ptype.Real`, :py:obj:`None`) 1753 -- 1754 Lipschitz constant :math:`\beta` of :math:`\nabla\mathcal{F}`. 1755 If not provided, it will be automatically estimated. 1756 * **\*\*kwargs** (:py:class:`~collections.abc.Mapping`) 1757 -- 1758 Other keyword parameters passed on to :py:meth:`pyxu.abc.Solver.__init__`. 1759 1760 Parameters (``fit()``) 1761 ---------------------- 1762 * **x0** (:py:attr:`~pyxu.info.ptype.NDArray`) 1763 -- 1764 (..., N) initial point(s) for the primal variable. 1765 * **z0** (:py:attr:`~pyxu.info.ptype.NDArray`, :py:obj:`None`) 1766 -- 1767 (..., N) initial point(s) for the dual variable. 1768 If ``None`` (default), then use ``x0`` as the initial point for the dual variable. 1769 * **tau** (:py:attr:`~pyxu.info.ptype.Real`, :py:obj:`None`) 1770 -- 1771 Primal step size. 1772 * **rho** (:py:attr:`~pyxu.info.ptype.Real`, :py:obj:`None`) 1773 -- 1774 Momentum parameter. 1775 * **tuning_strategy** (1, 2, 3) 1776 -- 1777 Strategy to be employed when setting the hyperparameters (default to 1). 1778 See :py:class:`~pyxu.opt.solver.CondatVu` for more details. 1779 * **\*\*kwargs** (:py:class:`~collections.abc.Mapping`) 1780 -- 1781 Other keyword parameters passed on to :py:meth:`pyxu.abc.Solver.fit`. 1782 1783 See Also 1784 -------- 1785 :py:class:`~pyxu.opt.solver.CondatVu`, 1786 :py:class:`~pyxu.opt.solver.PD3O`, 1787 :py:class:`~pyxu.opt.solver.PGD`, 1788 :py:func:`~pyxu.opt.solver.ChambollePock`, 1789 :py:func:`~pyxu.opt.solver.DouglasRachford` 1790 """ 1791 1792 def __init__( 1793 self, 1794 f: typ.Optional[pxa.DiffFunc] = None, 1795 g: typ.Optional[pxa.ProxFunc] = None, 1796 **kwargs, 1797 ): 1798 kwargs.update(log_var=kwargs.get("log_var", ("x",))) 1799 super().__init__( 1800 f=f, 1801 g=g, 1802 h=None, 1803 K=None, 1804 **kwargs, 1805 )
1806 1807 1808FB = ForwardBackward #: Alias of :py:class:`~pyxu.opt.solver.ForwardBackward`. 1809 1810
[docs] 1811def ProximalPoint( 1812 g: typ.Optional[pxa.ProxFunc] = None, 1813 base: typ.Optional[_PrimalDualSplitting] = CondatVu, 1814 **kwargs, 1815): 1816 r""" 1817 Proximal-point method. 1818 1819 Parameters 1820 ---------- 1821 g: :py:class:`~pyxu.abc.ProxFunc` 1822 Proximable function :math:`\mathcal{G}`. 1823 base: :py:class:`~pyxu.opt.solver.CondatVu`, :py:class:`~pyxu.opt.solver.PD3O` 1824 Specifies the base primal-dual algorithm from which mathematical updates are inherited. 1825 (Default = :py:class:`~pyxu.opt.solver.CondatVu`) 1826 \*\*kwargs: ~collections.abc.Mapping 1827 Other keyword parameters passed on to :py:meth:`pyxu.abc.Solver.__init__`. 1828 1829 1830 The *Proximal-point (PP)* method can be used to solve problems of the form: 1831 1832 .. math:: 1833 1834 {\min_{\mathbf{x}\in\mathbb{R}^N} \;\mathcal{G}(\mathbf{x}),} 1835 1836 where :math:`\mathcal{G}:\mathbb{R}^N\rightarrow \mathbb{R}\cup\{+\infty\}` is *proper*, *lower semicontinuous* and 1837 *convex function* with *simple proximal operator*. 1838 1839 Remarks 1840 ------- 1841 * The problem is *feasible*, i.e. there exists at least one solution. 1842 1843 * The *Proximal-point* algorithm can be obtained by choosing :math:`\mathcal{F}=0` and :math:`\mathcal{H}=0` in 1844 :py:class:`~pyxu.opt.solver.CondatVu` or :py:class:`~pyxu.opt.solver.PD3O`. The original version of the algorithm 1845 was introduced without relaxation (:math:`\rho=1`) [PP]_. Relaxed versions have been proposed afterwards [PSA]_. 1846 1847 Parameters (``fit()``) 1848 ---------------------- 1849 * **x0** (:py:attr:`~pyxu.info.ptype.NDArray`) 1850 -- 1851 (..., N) initial point(s) for the primal variable. 1852 * **tau** (:py:attr:`~pyxu.info.ptype.Real`, :py:obj:`None`) 1853 -- 1854 Primal step size. 1855 * **rho** (:py:attr:`~pyxu.info.ptype.Real`, :py:obj:`None`) 1856 -- 1857 Momentum parameter. 1858 * **\*\*kwargs** (:py:class:`~collections.abc.Mapping`) 1859 -- 1860 Other keyword parameters passed on to :py:meth:`pyxu.abc.Solver.fit`. 1861 1862 See Also 1863 -------- 1864 :py:class:`~pyxu.opt.solver.PP`, 1865 :py:class:`~pyxu.opt.solver.CondatVu`, 1866 :py:class:`~pyxu.opt.solver.PD3O`, 1867 :py:class:`~pyxu.opt.solver.PGD`, 1868 :py:func:`~pyxu.opt.solver.ChambollePock`, 1869 :py:func:`~pyxu.opt.solver.DouglasRachford` 1870 """ 1871 kwargs.update(log_var=kwargs.get("log_var", ("x",))) 1872 obj = base( 1873 f=None, 1874 g=g, 1875 h=None, 1876 K=None, 1877 **kwargs, 1878 ) 1879 1880 obj.__repr__ = lambda _: "ProximalPoint" 1881 return obj
1882 1883 1884PP = ProximalPoint #: Alias of :py:func:`~pyxu.opt.solver.ProximalPoint`.