Source code for pyxu.opt.solver.pds

   1import math
   2import types
   3import typing as typ
   4import warnings
   5
   6import pyxu.abc as pxa
   7import pyxu.info.ptype as pxt
   8import pyxu.operator as pxo
   9import pyxu.opt.stop as pxst
  10
  11__all__ = [
  12    *("CondatVu", "CV"),
  13    "PD3O",
  14    *("ChambollePock", "CP"),
  15    *("LorisVerhoeven", "LV"),
  16    *("DavisYin", "DY"),
  17    *("DouglasRachford", "DR"),
  18    "ADMM",
  19    *("ForwardBackward", "FB"),
  20    *("ProximalPoint", "PP"),
  21]
  22
  23
  24class _PrimalDualSplitting(pxa.Solver):
  25    r"""
  26    Base class for Primal-Dual Splitting (PDS) solvers.
  27    """
  28    TuningSpec = typ.Literal[1, 2, 3]  #: valid tuning_parameter values
  29
  30    def __init__(
  31        self,
  32        f: typ.Optional[pxa.DiffFunc] = None,
  33        g: typ.Optional[pxa.ProxFunc] = None,
  34        h: typ.Optional[pxa.ProxFunc] = None,
  35        K: typ.Optional[pxa.DiffMap] = None,
  36        **kwargs,
  37    ):
  38        kwargs.update(log_var=kwargs.get("log_var", ("x", "z")))
  39        super().__init__(**kwargs)
  40        if (f is None) and (g is None) and (h is None):
  41            msg = " ".join(
  42                [
  43                    "Cannot minimize always-0 functional.",
  44                    "At least one of Parameter[f, g, h] must be specified.",
  45                ]
  46            )
  47            raise ValueError(msg)
  48
  49        if f is not None:
  50            primal_dim_shape = f.dim_shape
  51        elif g is not None:
  52            primal_dim_shape = g.dim_shape
  53        else:
  54            primal_dim_shape = h.dim_shape
  55
  56        if h is not None:
  57            dual_dim_shape = h.dim_shape
  58        elif K is not None:  # todo: isn't this elif-clause useless? For which solver is it triggered?
  59            dual_dim_shape = K.codim_shape
  60        else:
  61            dual_dim_shape = primal_dim_shape
  62
  63        self._f = pxo.NullFunc(dim_shape=primal_dim_shape) if (f is None) else f
  64        self._g = pxo.NullFunc(dim_shape=primal_dim_shape) if (g is None) else g
  65        self._h = pxo.NullFunc(dim_shape=dual_dim_shape) if (h is None) else h
  66        if h is not None:
  67            self._K = pxo.IdentityOp(dim_shape=h.dim_shape) if (K is None) else K
  68        else:
  69            if K is None:
  70                K_dim_shape = f.dim_shape if f is not None else g.dim_shape
  71                self._K = pxo.NullOp(dim_shape=K_dim_shape, codim_shape=K_dim_shape)
  72            else:
  73                raise ValueError("Optional argument ``h`` mut be specified if ``K`` is not None.")
  74        self._objective_func = self._f + self._g + (self._h * self._K)
  75
  76    def m_init(
  77        self,
  78        x0: pxt.NDArray,
  79        z0: typ.Optional[pxt.NDArray] = None,
  80        tau: typ.Optional[pxt.Real] = None,
  81        sigma: typ.Optional[pxt.Real] = None,
  82        rho: typ.Optional[pxt.Real] = None,
  83        beta: typ.Optional[pxt.Real] = None,
  84        tuning_strategy: TuningSpec = 1,
  85    ):
  86        mst = self._mstate  # shorthand
  87        mst["x"] = x0
  88        mst["z"] = self._set_dual_variable(z0)
  89        self._tuning_strategy = int(tuning_strategy)
  90        self._beta = self._set_beta(beta)
  91        gamma = self._set_gamma(tuning_strategy)
  92        mst["tau"], mst["sigma"], delta = self._set_step_sizes(tau, sigma, gamma)
  93        mst["rho"] = self._set_momentum_term(rho, delta)
  94
  95    def m_step(self):
  96        raise NotImplementedError
  97
  98    def default_stop_crit(self) -> pxa.StoppingCriterion:
  99        stop_crit_x = pxst.RelError(
 100            eps=1e-4,
 101            var="x",
 102            f=None,
 103            norm=2,
 104            satisfy_all=True,
 105        )
 106        stop_crit_z = pxst.RelError(
 107            eps=1e-4,
 108            var="z",
 109            f=None,
 110            norm=2,
 111            satisfy_all=True,
 112        )
 113        return stop_crit_x & stop_crit_z if self._h._name != "NullFunc" else stop_crit_x
 114
 115    def solution(self, which: typ.Literal["primal", "dual"] = "primal") -> pxt.NDArray:
 116        data, _ = self.stats()
 117        if which == "primal":
 118            assert "x" in data.keys(), "Primal variable x was not logged (declare it in log_var to log it)."
 119        elif which == "dual":
 120            assert "z" in data.keys(), "Dual variable z was not logged (declare it in log_var to log it)."
 121        else:
 122            raise ValueError(f"Parameter which must be one of ['primal', 'dual'] got: {which}.")
 123        return data.get("x") if which == "primal" else data.get("z")
 124
 125    def objective_func(self) -> pxt.NDArray:
 126        return self._objective_func(self._mstate["x"])
 127
 128    def _set_beta(self, beta: typ.Optional[pxt.Real]) -> pxt.Real:
 129        r"""
 130        Sets the Lipschitz constant.
 131
 132        Returns
 133        -------
 134        beta: Real
 135            Lipschitz constant.
 136        """
 137        if beta is None:
 138            if math.isfinite(dl := self._f.diff_lipschitz):
 139                return dl
 140            else:
 141                msg = "beta: automatic inference not supported for operators with unbounded Lipschitz gradients."
 142            raise ValueError(msg)
 143        else:
 144            return beta
 145
 146    def _set_dual_variable(self, z: typ.Optional[pxt.NDArray]) -> pxt.NDArray:
 147        r"""
 148        Initialize the dual variable if it is :py:obj:`None` by mapping the primal variable through the operator K.
 149
 150        Returns
 151        -------
 152        NDArray
 153            Initialized dual variable.
 154        """
 155        if z is None:
 156            return self._K(self._mstate["x"].copy())
 157        else:
 158            return z
 159
 160    def _set_gamma(self, tuning_strategy: typ.Literal[1, 2, 3]) -> pxt.Real:
 161        r"""
 162        Sets the gamma parameter according to the tuning strategy.
 163
 164        Returns
 165        -------
 166        gamma: Real
 167            Gamma parameter.
 168        """
 169        return self._beta if tuning_strategy != 2 else self._beta / 1.9
 170
 171    def _set_step_sizes(
 172        self,
 173        tau: typ.Optional[pxt.Real],
 174        sigma: typ.Optional[pxt.Real],
 175        gamma: pxt.Real,
 176    ) -> tuple[pxt.Real, pxt.Real, pxt.Real]:
 177        raise NotImplementedError
 178
 179    def _set_momentum_term(
 180        self,
 181        rho: typ.Optional[pxt.Real],
 182        delta: pxt.Real,
 183    ) -> pxt.Real:
 184        r"""
 185        Sets the momentum term according to Theorem 8.2 in [PSA]_.
 186
 187        Returns
 188        -------
 189        pho: Real
 190            Momentum term.
 191
 192        Notes
 193        -----
 194        The :math:`O(1/\sqrt(k))` objective functional convergence rate of (Theorem 1 of [dPSA]_) is for `\rho=1`.
 195        """
 196        if rho is None:
 197            rho = 1.0 if self._tuning_strategy != 3 else delta - 0.1
 198        else:
 199            assert rho <= delta, f"Parameter rho must be smaller than delta: {rho} > {delta}."
 200        return rho
 201
 202
 203_PDS = _PrimalDualSplitting  #: shorthand
 204
 205

[docs]
 206class CondatVu(_PrimalDualSplitting):
 207    r"""
 208    Condat-Vu primal-dual splitting algorithm.
 209
 210    This solver is also accessible via the alias :py:class:`~pyxu.opt.solver.CV`.
 211
 212    The *Condat-Vu (CV)* primal-dual method is described in [CVS]_.  (This particular implementation is based on the
 213    pseudo-code Algorithm 7.1 provided in [FuncSphere]_ Chapter 7, Section1.)
 214
 215    It can be used to solve problems of the form:
 216
 217    .. math::
 218
 219       {\min_{\mathbf{x}\in\mathbb{R}^N} \;\mathcal{F}(\mathbf{x})\;\;+\;\;\mathcal{G}(\mathbf{x})\;\;+\;\;\mathcal{H}(\mathcal{K} \mathbf{x})},
 220
 221    where:
 222
 223    * :math:`\mathcal{F}:\mathbb{R}^N\rightarrow \mathbb{R}` is *convex* and *differentiable*, with
 224      :math:`\beta`-*Lipschitz continuous* gradient, for some :math:`\beta\in[0,+\infty[`.
 225
 226    * :math:`\mathcal{G}:\mathbb{R}^N\rightarrow \mathbb{R}\cup\{+\infty\}` and
 227      :math:`\mathcal{H}:\mathbb{R}^M\rightarrow \mathbb{R}\cup\{+\infty\}` are *proper*, *lower semicontinuous* and
 228      *convex functions* with *simple proximal operators*.
 229
 230    * :math:`\mathcal{K}:\mathbb{R}^N\rightarrow \mathbb{R}^M` is a *differentiable map* (e.g. a *linear operator*
 231      :math:`\mathbf{K}`), with **operator norm**:
 232
 233      .. math::
 234
 235         \Vert{\mathcal{K}}\Vert_2=\sup_{\mathbf{x}\in\mathbb{R}^N,\Vert\mathbf{x}\Vert_2=1} \Vert\mathcal{K}(\mathbf{x})\Vert_2.
 236
 237    Remarks
 238    -------
 239    * The problem is *feasible*, i.e. there exists at least one solution.
 240
 241    * The algorithm is still valid if one or more of the terms :math:`\mathcal{F}`, :math:`\mathcal{G}` or
 242      :math:`\mathcal{H}` is zero.
 243
 244    * The algorithm has convergence guarantees when :math:`\mathcal{H}` is composed with a *linear operator*
 245      :math:`\mathbf{K}`.  When :math:`\mathcal{F}=0`, convergence can be proven for *non-linear differentiable maps*
 246      :math:`\mathcal{K}` (see [NLCP]_).  Note that :py:class:`~pyxu.opt.solver.CondatVu` does not yet support automatic
 247      selection of hyperparameters for the case of *non-linear differentiable maps* :math:`\mathcal{K}`.
 248
 249    * Assume that either of the following holds:
 250
 251      * :math:`\beta>0` and:
 252
 253        - :math:`\gamma \geq \frac{\beta}{2}`,
 254        - :math:`\frac{1}{\tau}-\sigma\Vert\mathbf{K}\Vert_{2}^2\geq \gamma`,
 255        - :math:`\rho \in ]0,\delta[`, where :math:`\delta:=2-\frac{\beta}{2}\gamma^{-1}\in[1,2[` (:math:`\delta=2` is
 256          possible when :math:`\mathcal{F}` is *quadratic* and :math:`\gamma \geq \beta`, see [PSA]_).
 257
 258      * :math:`\beta=0` and:
 259
 260        - :math:`\tau\sigma\Vert\mathbf{K}\Vert_{2}^2\leq 1`,
 261        - :math:`\rho \in ]0,2[`.
 262
 263      Then there exists a pair :math:`(\mathbf{x}^\star,\mathbf{z}^\star)\in\mathbb{R}^N\times \mathbb{R}^M` solution
 264      s.t. the primal and dual sequences of  estimates :math:`(\mathbf{x}_n)_{n\in\mathbb{N}}` and
 265      :math:`(\mathbf{z}_n)_{n\in\mathbb{N}}` *converge* towards :math:`\mathbf{x}^\star` and :math:`\mathbf{z}^\star`
 266      respectively, i.e.
 267
 268      .. math::
 269
 270         \lim_{n\rightarrow +\infty}\Vert\mathbf{x}^\star-\mathbf{x}_n\Vert_2=0, \quad \text{and}
 271         \quad
 272         \lim_{n\rightarrow +\infty}\Vert\mathbf{z}^\star-\mathbf{z}_n\Vert_2=0.
 273
 274    Parameters (``__init__()``)
 275    ---------------------------
 276    * **f** (:py:class:`~pyxu.abc.DiffFunc`, :py:obj:`None`)
 277      --
 278      Differentiable function :math:`\mathcal{F}`.
 279    * **g** (:py:class:`~pyxu.abc.ProxFunc`, :py:obj:`None`)
 280      --
 281      Proximable function :math:`\mathcal{G}`.
 282    * **h** (:py:class:`~pyxu.abc.ProxFunc`, :py:obj:`None`)
 283      --
 284      Proximable function :math:`\mathcal{H}`.
 285    * **K** (:py:class:`~pyxu.abc.DiffMap`, :py:class:`~pyxu.abc.LinOp`, :py:obj:`None`)
 286      --
 287      Differentiable map or linear operator :math:`\mathcal{K}`.
 288    * **\*\*kwargs** (:py:class:`~collections.abc.Mapping`)
 289      --
 290      Other keyword parameters passed on to :py:meth:`pyxu.abc.Solver.__init__`.
 291
 292    Parameters (``fit()``)
 293    ----------------------
 294    * **x0** (:py:attr:`~pyxu.info.ptype.NDArray`)
 295      --
 296      (..., N) initial point(s) for the primal variable.
 297    * **z0** (:py:attr:`~pyxu.info.ptype.NDArray`, :py:obj:`None`)
 298      --
 299      (..., M) initial point(s) for the dual variable.
 300      If ``None`` (default), then use ``K(x0)`` as the initial point for the dual variable.
 301    * **tau** (:py:attr:`~pyxu.info.ptype.Real`, :py:obj:`None`)
 302      --
 303      Primal step size.
 304    * **sigma** (:py:attr:`~pyxu.info.ptype.Real`, :py:obj:`None`)
 305      --
 306      Dual step size.
 307    * **rho** (:py:attr:`~pyxu.info.ptype.Real`, :py:obj:`None`)
 308      --
 309      Momentum parameter.
 310    * **beta** (:py:attr:`~pyxu.info.ptype.Real`, :py:obj:`None`)
 311      --
 312      Lipschitz constant :math:`\beta` of :math:`\nabla\mathcal{F}`.
 313      If not provided, it will be automatically estimated.
 314    * **tuning_strategy** (1, 2, 3)
 315      --
 316      Strategy to be employed when setting the hyperparameters (default to 1).
 317      See section below for more details.
 318    * **\*\*kwargs** (:py:class:`~collections.abc.Mapping`)
 319      --
 320      Other keyword parameters passed on to :py:meth:`pyxu.abc.Solver.fit`.
 321
 322    .. rubric:: Default hyperparameter values
 323
 324    This class supports three strategies to automatically set the hyperparameters (see [PSA]_ for more details and
 325    numerical experiments comparing the performance of the three strategies):
 326
 327    - ``tuning_strategy == 1``: :math:`\gamma = \beta` (safe step sizes) and :math:`\rho=1` (no relaxation).
 328
 329      This is the most standard way of setting the parameters in the literature.
 330    - ``tuning_strategy == 2``: :math:`\gamma = \beta/1.9` (large step sizes) and :math:`\rho=1` (no relaxation).
 331
 332      This strategy favours large step sizes forbidding the use of overrelaxation.  When :math:`\beta=0`, coincides with
 333      the first strategy.
 334    - ``tuning_strategy == 3``: :math:`\gamma = \beta` (safe step sizes) and :math:`\rho=\delta - 0.1 > 1` (overrelaxation).
 335
 336      This strategy chooses smaller step sizes, but performs overrelaxation.
 337
 338    Once :math:`\gamma` chosen, the convergence speed  of the algorithm is improved by choosing :math:`\sigma` and
 339    :math:`\tau` as large as possible and relatively well-balanced -- so that both the primal and dual variables
 340    converge at the same pace.  Whenever possible, we therefore choose perfectly balanced parameters :math:`\sigma=\tau`
 341    saturating the convergence inequalities for a given value of :math:`\gamma`.
 342
 343    * For :math:`\beta>0` and :math:`\mathcal{H}\neq 0` this yields:
 344
 345      .. math::
 346
 347         \frac{1}{\tau}-\tau\Vert\mathbf{K}\Vert_{2}^2= \gamma \quad\Longleftrightarrow\quad -\tau^2\Vert\mathbf{K}\Vert_{2}^2-\gamma\tau+1=0,
 348
 349      which admits one positive root
 350
 351      .. math::
 352
 353         \tau=\sigma=\frac{1}{\Vert\mathbf{K}\Vert_{2}^2}\left(-\frac{\gamma}{2}+\sqrt{\frac{\gamma^2}{4}+\Vert\mathbf{K}\Vert_{2}^2}\right).
 354
 355    * For :math:`\beta>0` and :math:`\mathcal{H}=0` this yields: :math:`\tau=1/\gamma.`
 356
 357    * For :math:`\beta=0` this yields: :math:`\tau=\sigma=\Vert\mathbf{K}\Vert_{2}^{-1}`.
 358
 359    When :math:`\tau` is provided (:math:`\tau = \tau_{1}`), but not :math:`\sigma`, the latter is chosen as:
 360
 361    .. math::
 362
 363       \frac{1}{\tau_{1}}-\sigma\Vert\mathbf{K}\Vert_{2}^2= \gamma \quad\Longleftrightarrow\quad \sigma=\left(\frac{1}{\tau_{1}}-\gamma\right)\frac{1}{\Vert\mathbf{K}\Vert_{2}^2}.
 364
 365    When :math:`\sigma` is provided (:math:`\sigma = \sigma_{1}`), but not :math:`\tau`, the latter is chosen as:
 366
 367    .. math::
 368
 369       \frac{1}{\tau}-\sigma_{1}\Vert\mathbf{K}\Vert_{2}^2= \gamma \quad\Longleftrightarrow\quad \tau=\frac{1}{\left(\gamma+\sigma_{1}\Vert\mathbf{K}\Vert_{2}^2\right)}.
 370
 371    Warning
 372    -------
 373    When values are provided for both :math:`\tau` and :math:`\sigma`, it is assumed that the latter satisfy the
 374    convergence inequalities, but no check is explicitly performed.  Automatic selection of hyperparameters for the case
 375    of non-linear differentiable maps :math:`\mathcal{K}` is not supported yet.
 376
 377    Example
 378    -------
 379    Consider the following optimisation problem:
 380
 381    .. math::
 382
 383       \min_{\mathbf{x}\in\mathbb{R}_+^N}\frac{1}{2}\left\|\mathbf{y}-\mathbf{G}\mathbf{x}\right\|_2^2\quad+\quad\lambda_1 \|\mathbf{D}\mathbf{x}\|_1\quad+\quad\lambda_2 \|\mathbf{x}\|_1,
 384
 385    with :math:`\mathbf{D}\in\mathbb{R}^{N\times N}` the discrete derivative operator and
 386    :math:`\mathbf{G}\in\mathbb{R}^{L\times N}, \, \mathbf{y}\in\mathbb{R}^L, \lambda_1,\lambda_2>0.` This problem can
 387    be solved via PDS with :math:`\mathcal{F}(\mathbf{x})=
 388    \frac{1}{2}\left\|\mathbf{y}-\mathbf{G}\mathbf{x}\right\|_2^2`,
 389    :math:`\mathcal{G}(\mathbf{x})=\lambda_2\|\mathbf{x}\|_1,` :math:`\mathcal{H}(\mathbf{x})=\lambda \|\mathbf{x}\|_1`
 390    and :math:`\mathbf{K}=\mathbf{D}`.
 391
 392    .. plot::
 393
 394       import matplotlib.pyplot as plt
 395       import numpy as np
 396       import pyxu.operator as pxo
 397       from pyxu.operator import SubSample, PartialDerivative
 398       from pyxu.opt.solver import CV
 399
 400       x = np.repeat(np.asarray([0, 2, 1, 3, 0, 2, 0]), 10)
 401       N = x.size
 402
 403       D = PartialDerivative.finite_difference(dim_shape=(N,), order=(1,))
 404
 405       downsample = SubSample(N, slice(None, None, 3))
 406       y = downsample(x)
 407       loss = (1 / 2) * pxo.SquaredL2Norm(y.size).argshift(-y)
 408       F = loss * downsample
 409
 410       cv = CV(f=F, g=0.01 * pxo.L1Norm(N), h=0.1 * pxo.L1Norm((N)), K=D)
 411       x0, z0 = np.zeros((2, N))
 412       cv.fit(x0=x0, z0=z0)
 413       x_recons = cv.solution()
 414
 415       plt.figure()
 416       plt.stem(x, linefmt="C0-", markerfmt="C0o")
 417       mask_ids = np.where(downsample.adjoint(np.ones_like(y)))[0]
 418       markerline, stemlines, baseline = plt.stem(mask_ids, y, linefmt="C3-", markerfmt="C3o")
 419       markerline.set_markerfacecolor("none")
 420       plt.stem(x_recons, linefmt="C1--", markerfmt="C1s")
 421       plt.legend(["Ground truth", "Observation", "CV Estimate"])
 422       plt.show()
 423
 424    See Also
 425    --------
 426    :py:class:`~pyxu.opt.solver.CV`,
 427    :py:class:`~pyxu.opt.solver.PD3O`,
 428    :py:class:`~pyxu.opt.solver.ChambollePock`,
 429    :py:class:`~pyxu.opt.solver.DouglasRachford`
 430    """
 431
 432    def m_step(self):
 433        mst = self._mstate
 434        x_temp = self._g.prox(
 435            mst["x"] - mst["tau"] * self._f.grad(mst["x"]) - mst["tau"] * self._K.jacobian(mst["x"]).adjoint(mst["z"]),
 436            tau=mst["tau"],
 437        )
 438        if not self._h._name == "NullFunc":
 439            u = 2 * x_temp - mst["x"]
 440            z_temp = self._h.fenchel_prox(
 441                mst["z"] + mst["sigma"] * self._K(u),
 442                sigma=mst["sigma"],
 443            )
 444            mst["z"] = mst["rho"] * z_temp + (1 - mst["rho"]) * mst["z"]
 445        mst["x"] = mst["rho"] * x_temp + (1 - mst["rho"]) * mst["x"]
 446
 447    def _set_step_sizes(
 448        self,
 449        tau: typ.Optional[pxt.Real],
 450        sigma: typ.Optional[pxt.Real],
 451        gamma: typ.Optional[pxt.Real],
 452    ) -> tuple[pxt.Real, pxt.Real, pxt.Real]:
 453        r"""
 454        Set the primal/dual step sizes.
 455
 456        Returns
 457        -------
 458        Sensible primal/dual step sizes and value of the parameter :math:`delta`.
 459        """
 460        # todo: this method in general is quite dirty.
 461        if not issubclass(self._K.__class__, pxa.LinOp):
 462            # todo: wrong! Must raise only if tau/sigma = None
 463            msg = (
 464                f"Automatic selection of parameters is only supported in the case in which K is a linear operator. "
 465                f"Got operator of type {self._K.__class__}."
 466            )
 467            raise ValueError(msg)
 468        tau = None if tau == 0 else tau
 469        sigma = None if sigma == 0 else sigma
 470
 471        if (tau is not None) and (sigma is None):
 472            assert tau > 0, f"Parameter tau must be positive, got {tau}."
 473            if self._h._name == "NullFunc":
 474                assert tau <= 1 / gamma, f"Parameter tau must be smaller than 1/gamma: {tau} > {1 / gamma}."
 475                sigma = 0
 476            else:
 477                if math.isfinite(self._K.lipschitz):
 478                    sigma = ((1 / tau) - gamma) * (1 / self._K.lipschitz**2)
 479                else:
 480                    msg = "Please compute the Lipschitz constant of the linear operator K by calling its method 'estimate_lipschitz()'"
 481                    raise ValueError(msg)
 482        elif (tau is None) and (sigma is not None):
 483            assert sigma > 0
 484            if self._h._name == "NullFunc":
 485                tau = 1 / gamma
 486            else:
 487                if math.isfinite(self._K.lipschitz):
 488                    tau = 1 / (gamma + (sigma * self._K.lipschitz**2))
 489                else:
 490                    msg = "Please compute the Lipschitz constant of the linear operator K by calling its method 'estimate_lipschitz()'"
 491                    raise ValueError(msg)
 492        elif (tau is None) and (sigma is None):
 493            if self._beta > 0:
 494                if self._h._name == "NullFunc":
 495                    tau = 1 / gamma
 496                    sigma = 0
 497                else:
 498                    if math.isfinite(self._K.lipschitz):
 499                        tau = sigma = (1 / (self._K.lipschitz) ** 2) * (
 500                            (-gamma / 2) + math.sqrt((gamma**2 / 4) + self._K.lipschitz**2)
 501                        )
 502                    else:
 503                        msg = "Please compute the Lipschitz constant of the linear operator K by calling its method 'estimate_lipschitz()'"
 504                        raise ValueError(msg)
 505            else:
 506                if self._h._name == "NullFunc":
 507                    tau = 1
 508                    sigma = 0
 509                else:
 510                    if math.isfinite(self._K.lipschitz):
 511                        tau = sigma = 1 / self._K.lipschitz
 512                    else:
 513                        msg = "Please compute the Lipschitz constant of the linear operator K by calling its method 'estimate_lipschitz()'"
 514                        raise ValueError(msg)
 515        delta = (
 516            2
 517            if (self._beta == 0 or (isinstance(self._f, pxa.QuadraticFunc) and gamma <= self._beta))
 518            else 2 - self._beta / (2 * gamma)
 519        )
 520        return tau, sigma, delta

 521
 522
 523CV = CondatVu  #: Alias of :py:class:`~pyxu.opt.solver.CondatVu`.
 524
 525

[docs]
 526class PD3O(_PrimalDualSplitting):
 527    r"""
 528    Primal-Dual Three-Operator Splitting (PD3O) algorithm.
 529
 530    The *Primal-Dual three Operator splitting (PD3O)* method is described in [PD3O]_.
 531
 532    It can be used to solve problems of the form:
 533
 534    .. math::
 535
 536       {\min_{\mathbf{x}\in\mathbb{R}^N} \;\Psi(\mathbf{x}):=\mathcal{F}(\mathbf{x})\;\;+\;\;\mathcal{G}(\mathbf{x})\;\;+\;\;\mathcal{H}(\mathcal{K} \mathbf{x}),}
 537
 538    where:
 539
 540    * :math:`\mathcal{F}:\mathbb{R}^N\rightarrow \mathbb{R}` is *convex* and *differentiable*, with
 541      :math:`\beta`-*Lipschitz continuous* gradient, for some :math:`\beta\in[0,+\infty[`.
 542
 543    * :math:`\mathcal{G}:\mathbb{R}^N\rightarrow \mathbb{R}\cup\{+\infty\}` and
 544      :math:`\mathcal{H}:\mathbb{R}^M\rightarrow \mathbb{R}\cup\{+\infty\}` are *proper*, *lower semicontinuous* and
 545      *convex functions* with *simple proximal operators*.
 546
 547    * :math:`\mathcal{K}:\mathbb{R}^N\rightarrow \mathbb{R}^M` is a *differentiable map* (e.g. a *linear operator*
 548      :math:`\mathbf{K}`), with **operator norm**:
 549
 550      .. math::
 551
 552         \Vert{\mathcal{K}}\Vert_2=\sup_{\mathbf{x}\in\mathbb{R}^N,\Vert\mathbf{x}\Vert_2=1} \Vert\mathcal{K}(\mathbf{x})\Vert_2.
 553
 554    Remarks
 555    -------
 556    * The problem is *feasible* -- i.e. there exists at least one solution.
 557
 558    * The algorithm is still valid if one or more of the terms :math:`\mathcal{F}`, :math:`\mathcal{G}` or
 559      :math:`\mathcal{H}` is zero.
 560
 561    * The algorithm has convergence guarantees for the case in which :math:`\mathcal{H}` is composed with a *linear
 562      operator* :math:`\mathbf{K}`.  When :math:`\mathcal{F}=0`, convergence can be proven for *non-linear
 563      differentiable maps* :math:`\mathcal{K}`. (See [NLCP]_.) Note that this class does not yet support automatic
 564      selection of hyperparameters for the case of *non-linear differentiable maps* :math:`\mathcal{K}`.
 565
 566    * Assume that the following holds:
 567
 568      * :math:`\gamma\geq\frac{\beta}{2}`,
 569      * :math:`\tau \in ]0, \frac{1}{\gamma}[`,
 570      * :math:`\tau\sigma\Vert\mathbf{K}\Vert_{2}^2 \leq 1`,
 571      * :math:`\delta = 2-\beta\tau/2 \in [1, 2[` and :math:`\rho \in (0, \delta]`.
 572
 573      Then there exists a pair :math:`(\mathbf{x}^\star,\mathbf{z}^\star)\in\mathbb{R}^N\times \mathbb{R}^M` solution
 574      s.t. the primal and dual sequences of  estimates :math:`(\mathbf{x}_n)_{n\in\mathbb{N}}` and
 575      :math:`(\mathbf{z}_n)_{n\in\mathbb{N}}` *converge* towards :math:`\mathbf{x}^\star` and :math:`\mathbf{z}^\star`
 576      respectively (Theorem 8.2 of [PSA]_), i.e.
 577
 578      .. math::
 579
 580         \lim_{n\rightarrow +\infty}\Vert\mathbf{x}^\star-\mathbf{x}_n\Vert_2=0, \quad \text{and} \quad  \lim_{n\rightarrow +\infty}\Vert\mathbf{z}^\star-\mathbf{z}_n\Vert_2=0.
 581
 582      Futhermore, when :math:`\rho=1`, the objective functional sequence
 583      :math:`\left(\Psi(\mathbf{x}_n)\right)_{n\in\mathbb{N}}` can be shown to converge towards its minimum
 584      :math:`\Psi^\ast` with rate :math:`o(1/\sqrt{n})` (Theorem 1 of [dPSA]_):
 585
 586      .. math::
 587
 588         \Psi(\mathbf{x}_n) - \Psi^\ast = o(1/\sqrt{n}).
 589
 590    Parameters (``__init__()``)
 591    ---------------------------
 592    * **f** (:py:class:`~pyxu.abc.DiffFunc`, :py:obj:`None`)
 593      --
 594      Differentiable function :math:`\mathcal{F}`.
 595    * **g** (:py:class:`~pyxu.abc.ProxFunc`, :py:obj:`None`)
 596      --
 597      Proximable function :math:`\mathcal{G}`.
 598    * **h** (:py:class:`~pyxu.abc.ProxFunc`, :py:obj:`None`)
 599      --
 600      Proximable function :math:`\mathcal{H}`.
 601    * **K** (:py:class:`~pyxu.abc.DiffMap`, :py:class:`~pyxu.abc.LinOp`, :py:obj:`None`)
 602      --
 603      Differentiable map or linear operator :math:`\mathcal{K}`.
 604    * **\*\*kwargs** (:py:class:`~collections.abc.Mapping`)
 605      --
 606      Other keyword parameters passed on to :py:meth:`pyxu.abc.Solver.__init__`.
 607
 608    Parameters (``fit()``)
 609    ----------------------
 610    * **x0** (:py:attr:`~pyxu.info.ptype.NDArray`)
 611      --
 612      (..., N) initial point(s) for the primal variable.
 613    * **z0** (:py:attr:`~pyxu.info.ptype.NDArray`, :py:obj:`None`)
 614      --
 615      (..., M) initial point(s) for the dual variable.
 616      If ``None`` (default), then use ``K(x0)`` as the initial point for the dual variable.
 617    * **tau** (:py:attr:`~pyxu.info.ptype.Real`, :py:obj:`None`)
 618      --
 619      Primal step size.
 620    * **sigma** (:py:attr:`~pyxu.info.ptype.Real`, :py:obj:`None`)
 621      --
 622      Dual step size.
 623    * **rho** (:py:attr:`~pyxu.info.ptype.Real`, :py:obj:`None`)
 624      --
 625      Momentum parameter.
 626    * **beta** (:py:attr:`~pyxu.info.ptype.Real`, :py:obj:`None`)
 627      --
 628      Lipschitz constant :math:`\beta` of :math:`\nabla\mathcal{F}`.
 629      If not provided, it will be automatically estimated.
 630    * **tuning_strategy** (1, 2, 3)
 631      --
 632      Strategy to be employed when setting the hyperparameters (default to 1).
 633      See section below for more details.
 634    * **\*\*kwargs** (:py:class:`~collections.abc.Mapping`)
 635      --
 636      Other keyword parameters passed on to :py:meth:`pyxu.abc.Solver.fit`.
 637
 638    .. rubric:: Default hyperparameter values
 639
 640    This class supports three strategies to automatically set the hyperparameters (see [PSA]_ for more details and
 641    numerical experiments comparing the performance of the three strategies):
 642
 643    - ``tuning_strategy == 1``: :math:`\gamma = \beta` (safe step sizes) and :math:`\rho=1` (no relaxation).
 644
 645      This is the most standard way of setting the parameters in the literature.
 646    - ``tuning_strategy == 2``: :math:`\gamma = \beta/1.9` (large step sizes) and :math:`\rho=1` (no relaxation).
 647
 648      This strategy favours large step sizes forbidding the use of overrelaxation.  When :math:`\beta=0`, coincides with
 649      the first strategy.
 650    - ``tuning_strategy == 3``: :math:`\gamma = \beta` (safe step sizes) and :math:`\rho=\delta - 0.1 > 1` (overrelaxation).
 651
 652      This strategy chooses smaller step sizes, but performs overrelaxation.
 653
 654    Once :math:`\gamma` chosen, the convergence speed  of the algorithm is improved by choosing :math:`\sigma` and
 655    :math:`\tau` as large as possible and relatively well-balanced -- so that both the primal and dual variables
 656    converge at the same pace.  Whenever possible, we therefore choose perfectly balanced parameters :math:`\sigma=\tau`
 657    saturating the convergence inequalities for a given value of :math:`\gamma`.
 658
 659    In practice, the following linear programming optimization problem is solved:
 660
 661    .. math::
 662
 663       (\tau, \, \sigma) = \operatorname{arg} \max_{(\tau^{*}, \,  \sigma^{*})} \quad & \operatorname{log}(\tau^{*}) + \operatorname{log}(\sigma^{*})\\
 664       \text{s.t.} \quad & \operatorname{log}(\tau^{*}) + \operatorname{log}(\sigma^{*}) \leq 2\operatorname{log}(\Vert\mathbf{K}\Vert_{2})\\
 665       & \operatorname{log}(\tau^{*}) \leq -\operatorname{log}(\gamma)\\
 666       & \operatorname{log}(\tau^{*}) = \operatorname{log}(\sigma^{*}).
 667
 668    When :math:`\tau \leq 1/\gamma` is given (i.e., :math:`\tau=\tau_{1}`), but not :math:`\sigma`, the latter is chosen
 669    as:
 670
 671    .. math::
 672
 673       \tau_{1}\sigma\Vert\mathbf{K}\Vert_{2}^2= 1 \quad\Longleftrightarrow\quad \sigma=\frac{1}{\tau_{1}\Vert\mathbf{K}\Vert_{2}^{2}}.
 674
 675    When :math:`\sigma` is given (i.e., :math:`\sigma=\sigma_{1}`), but not :math:`\tau`, the latter is chosen as:
 676
 677    .. math::
 678
 679       \tau = \min \left\{\frac{1}{\gamma}, \frac{1}{\sigma_{1}\Vert\mathbf{K}\Vert_{2}^{2}}\right\}.
 680
 681    Warning
 682    -------
 683    When values are provided for both :math:`\tau` and :math:`\sigma`, it is assumed that the latter satisfy the
 684    convergence inequalities, but no check is explicitly performed.  Automatic selection of hyperparameters for the case
 685    of non-linear differentiable maps :math:`\mathcal{K}` is not supported yet.
 686
 687    Example
 688    -------
 689    Consider the following optimisation problem:
 690
 691    .. math::
 692
 693       \min_{\mathbf{x}\in\mathbb{R}_+^N}\frac{1}{2}\left\|\mathbf{y}-\mathbf{G}\mathbf{x}\right\|_2^2\quad+\quad\lambda_1 \|\mathbf{D}\mathbf{x}\|_1\quad+\quad\lambda_2 \|\mathbf{x}\|_1,
 694
 695    with :math:`\mathbf{D}\in\mathbb{R}^{N\times N}` the discrete derivative operator and
 696    :math:`\mathbf{G}\in\mathbb{R}^{L\times N}, \, \mathbf{y}\in\mathbb{R}^L, \lambda_1,\lambda_2>0.` This problem can
 697    be solved via PD3O with :math:`\mathcal{F}(\mathbf{x})=
 698    \frac{1}{2}\left\|\mathbf{y}-\mathbf{G}\mathbf{x}\right\|_2^2`,
 699    :math:`\mathcal{G}(\mathbf{x})=\lambda_2\|\mathbf{x}\|_1,` :math:`\mathcal{H}(\mathbf{x})=\lambda \|\mathbf{x}\|_1`
 700    and :math:`\mathbf{K}=\mathbf{D}`.
 701
 702    .. plot::
 703
 704       import matplotlib.pyplot as plt
 705       import numpy as np
 706       import pyxu.operator as pxo
 707       from pyxu.operator import SubSample, PartialDerivative
 708       from pyxu.opt.solver import PD3O
 709
 710       x = np.repeat(np.asarray([0, 2, 1, 3, 0, 2, 0]), 10)
 711       N = x.size
 712
 713       D = PartialDerivative.finite_difference(dim_shape=(N,), order=(1,))
 714
 715       downsample = SubSample(N, slice(None, None, 3))
 716       y = downsample(x)
 717       loss = (1 / 2) * pxo.SquaredL2Norm(y.size).argshift(-y)
 718       F = loss * downsample
 719
 720       pd3o = PD3O(f=F, g=0.01 * pxo.L1Norm(N), h=0.1 * pxo.L1Norm((N)), K=D)
 721       x0, z0 = np.zeros((2, N))
 722       pd3o.fit(x0=x0, z0=z0)
 723       x_recons = pd3o.solution()
 724
 725       plt.figure()
 726       plt.stem(x, linefmt="C0-", markerfmt="C0o")
 727       mask_ids = np.where(downsample.adjoint(np.ones_like(y)))[0]
 728       markerline, stemlines, baseline = plt.stem(mask_ids, y, linefmt="C3-", markerfmt="C3o")
 729       markerline.set_markerfacecolor("none")
 730       plt.stem(x_recons, linefmt="C1--", markerfmt="C1s")
 731       plt.legend(["Ground truth", "Observation", "PD3O Estimate"])
 732       plt.show()
 733    """
 734
 735    def m_init(
 736        self,
 737        x0: pxt.NDArray,
 738        z0: typ.Optional[pxt.NDArray] = None,
 739        tau: typ.Optional[pxt.Real] = None,
 740        sigma: typ.Optional[pxt.Real] = None,
 741        rho: typ.Optional[pxt.Real] = None,
 742        tuning_strategy: _PDS.TuningSpec = 1,
 743    ):
 744        super().m_init(
 745            x0=x0,
 746            z0=z0,
 747            tau=tau,
 748            sigma=sigma,
 749            rho=rho,
 750            tuning_strategy=tuning_strategy,
 751        )
 752
 753        # if x0 == u0 the first step wouldn't change x and the solver would stop at the first iteration
 754        if self._g._name == self._h._name == "NullFunc":
 755            self._mstate["u"] = x0 * 1.01
 756        else:
 757            self._mstate["u"] = x0.copy()
 758
 759    def m_step(self):
 760        # Slightly more efficient rewriting of iterations (216) of [PSA] with M=1. Faster than (185) since only one call to the adjoint and the gradient per iteration.
 761        mst = self._mstate
 762        mst["x"] = self._g.prox(
 763            mst["u"] - mst["tau"] * self._K.jacobian(mst["u"]).adjoint(mst["z"]),
 764            tau=mst["tau"],
 765        )
 766        u_temp = mst["x"] - mst["tau"] * self._f.grad(mst["x"])
 767        if not self._h._name == "NullFunc":
 768            z_temp = self._h.fenchel_prox(
 769                mst["z"] + mst["sigma"] * self._K(mst["x"] + u_temp - mst["u"]),
 770                sigma=mst["sigma"],
 771            )
 772            mst["z"] = (1 - mst["rho"]) * mst["z"] + mst["rho"] * z_temp
 773        mst["u"] = (1 - mst["rho"]) * mst["u"] + mst["rho"] * u_temp
 774
 775    def _set_step_sizes(
 776        self,
 777        tau: typ.Optional[pxt.Real],
 778        sigma: typ.Optional[pxt.Real],
 779        gamma: pxt.Real,
 780    ) -> tuple[pxt.Real, pxt.Real, pxt.Real]:
 781        r"""
 782        Set the primal/dual step sizes.
 783
 784        Returns
 785        -------
 786        Tuple[Real, Real, Real]
 787            Sensible primal/dual step sizes and value of :math:`\delta`.
 788        """
 789
 790        if not issubclass(self._K.__class__, pxa.LinOp):
 791            msg = (
 792                f"Automatic selection of parameters is only supported in the case in which K is a linear operator. "
 793                f"Got operator of type {self._K.__class__}."
 794            )
 795            raise ValueError(msg)
 796        tau = None if tau == 0 else tau
 797        sigma = None if sigma == 0 else sigma
 798
 799        if (tau is not None) and (sigma is None):
 800            assert 0 < tau <= 1 / gamma, "tau must be positive and smaller than 1/gamma."
 801            if self._h._name == "NullFunc":
 802                sigma = 0
 803            else:
 804                if math.isfinite(self._K.lipschitz):
 805                    sigma = 1 / (tau * self._K.lipschitz**2)
 806                else:
 807                    msg = "Please compute the Lipschitz constant of the linear operator K by calling its method 'estimate_lipschitz()'"
 808                    raise ValueError(msg)
 809        elif (tau is None) and (sigma is not None):
 810            assert sigma > 0, f"sigma must be positive, got {sigma}."
 811            if self._h._name == "NullFunc":
 812                tau = 1 / gamma
 813            else:
 814                if math.isfinite(self._K.lipschitz):
 815                    tau = min(1 / (sigma * self._K.lipschitz**2), 1 / gamma)
 816                else:
 817                    msg = "Please compute the Lipschitz constant of the linear operator K by calling its method 'estimate_lipschitz()'"
 818                    raise ValueError(msg)
 819        elif (tau is None) and (sigma is None):
 820            if self._beta > 0:
 821                if self._h._name == "NullFunc":
 822                    tau = 1 / gamma
 823                    sigma = 0
 824                else:
 825                    if math.isfinite(self._K.lipschitz):
 826                        tau, sigma = self._optimize_step_sizes(gamma)
 827                    else:
 828                        msg = "Please compute the Lipschitz constant of the linear operator K by calling its method 'estimate_lipschitz()'"
 829                        raise ValueError(msg)
 830            else:
 831                if self._h._name == "NullFunc":
 832                    tau = 1
 833                    sigma = 0
 834                else:
 835                    if math.isfinite(self._K.lipschitz):
 836                        tau = sigma = 1 / self._K.lipschitz
 837                    else:
 838                        msg = "Please compute the Lipschitz constant of the linear operator K by calling its method 'estimate_lipschitz()'"
 839                        raise ValueError(msg)
 840        delta = 2 if self._beta == 0 else 2 - self._beta * tau / 2
 841        return tau, sigma, delta
 842
 843    def _optimize_step_sizes(self, gamma: pxt.Real) -> pxt.Real:
 844        r"""
 845        Optimize the primal/dual step sizes.
 846
 847        Parameters
 848        ----------
 849        gamma: Real
 850            Gamma parameter.
 851
 852        Returns
 853        -------
 854        Tuple[Real, Real]
 855            Sensible primal/dual step sizes.
 856        """
 857        import numpy as np
 858        from scipy.optimize import linprog
 859
 860        c = np.array([-1, -1])
 861        A_ub = np.array([[1, 1], [1, 0]])
 862        b_ub = np.array([np.log(0.99) - 2 * np.log(self._K.lipschitz), np.log(1 / gamma)])
 863        A_eq = np.array([[1, -1]])
 864        b_eq = np.array([0])
 865        result = linprog(
 866            c=c,
 867            A_ub=A_ub,
 868            b_ub=b_ub,
 869            A_eq=A_eq,
 870            b_eq=b_eq,
 871            bounds=(None, None),
 872        )
 873        if not result.success:
 874            warnings.warn("Automatic parameter selection has not converged.", UserWarning)
 875        return np.exp(result.x)

 876
 877

[docs]
 878def ChambollePock(
 879    g: typ.Optional[pxa.ProxFunc] = None,
 880    h: typ.Optional[pxa.ProxFunc] = None,
 881    K: typ.Optional[pxa.DiffMap] = None,
 882    base: typ.Type[_PrimalDualSplitting] = CondatVu,
 883    **kwargs,
 884):
 885    r"""
 886    Chambolle-Pock primal-dual splitting method.
 887
 888    Parameters
 889    ----------
 890    g: :py:class:`~pyxu.abc.ProxFunc`, None
 891        Proximable function :math:`\mathcal{G}`.
 892    h: :py:class:`~pyxu.abc.ProxFunc`, None
 893        Proximable function :math:`\mathcal{H}`.
 894    K: :py:class:`~pyxu.abc.DiffMap`, :py:class:`~pyxu.abc.LinOp`, None
 895        Differentiable map or linear operator :math:`\mathcal{K}`.
 896    base: :py:class:`~pyxu.opt.solver.CondatVu`, :py:class:`~pyxu.opt.solver.PD3O`
 897        Specifies the base primal-dual algorithm.
 898        (Default = :py:class:`~pyxu.opt.solver.CondatVu`)
 899    \*\*kwargs: ~collections.abc.Mapping
 900        Other keyword parameters passed on to :py:meth:`pyxu.abc.Solver.__init__`.
 901
 902
 903    The *Chambolle-Pock (CP) primal-dual splitting* method can be used to solve problems of the form:
 904
 905    .. math::
 906
 907       {\min_{\mathbf{x}\in\mathbb{R}^N} \mathcal{G}(\mathbf{x})\;\;+\;\;\mathcal{H}(\mathbf{K} \mathbf{x}),}
 908
 909    where:
 910
 911    * :math:`\mathcal{G}:\mathbb{R}^N\rightarrow \mathbb{R}\cup\{+\infty\}` and
 912      :math:`\mathcal{H}:\mathbb{R}^M\rightarrow \mathbb{R}\cup\{+\infty\}` are *proper*, *lower semicontinuous* and
 913      *convex functions* with *simple proximal operators*.
 914    * :math:`\mathcal{K}:\mathbb{R}^N\rightarrow \mathbb{R}^M` is a *differentiable map* (e.g. a *linear operator*
 915      :math:`\mathbf{K}`), with **operator norm**:
 916
 917      .. math::
 918
 919         \Vert{\mathcal{K}}\Vert_2=\sup_{\mathbf{x}\in\mathbb{R}^N,\Vert\mathbf{x}\Vert_2=1} \Vert\mathcal{K}(\mathbf{x})\Vert_2.
 920
 921    Remarks
 922    -------
 923    * The problem is *feasible*, i.e. there exists at least one solution.
 924
 925    * The algorithm is still valid if one of the terms :math:`\mathcal{G}` or :math:`\mathcal{H}` is zero.
 926
 927    * Automatic selection of parameters is not supported for *non-linear differentiable maps* :math:`\mathcal{K}`.
 928
 929    * The *Chambolle-Pock (CP) primal-dual splitting* method can be obtained by choosing :math:`\mathcal{F}=0` in
 930      :py:class:`~pyxu.opt.solver.CondatVu` or :py:class:`~pyxu.opt.solver.PD3O`.  Chambolle and Pock originally
 931      introduced the algorithm without relaxation (:math:`\rho=1`) [CPA]_.  Relaxed versions have been proposed
 932      afterwards [PSA]_.  Chambolle-Pock's algorithm is also known as the *Primal-Dual Hybrid Gradient (PDHG)*
 933      algorithm.  It can be seen as a preconditionned ADMM method [CPA]_.
 934
 935    Parameters (``fit()``)
 936    ----------------------
 937    * **x0** (:py:attr:`~pyxu.info.ptype.NDArray`)
 938      --
 939      (..., N) initial point(s) for the primal variable.
 940    * **z0** (:py:attr:`~pyxu.info.ptype.NDArray`, :py:obj:`None`)
 941      --
 942      (..., M) initial point(s) for the dual variable.
 943      If ``None`` (default), then use ``K(x0)`` as the initial point for the dual variable.
 944    * **tau** (:py:attr:`~pyxu.info.ptype.Real`, :py:obj:`None`)
 945      --
 946      Primal step size.
 947    * **sigma** (:py:attr:`~pyxu.info.ptype.Real`, :py:obj:`None`)
 948      --
 949      Dual step size.
 950    * **rho** (:py:attr:`~pyxu.info.ptype.Real`, :py:obj:`None`)
 951      --
 952      Momentum parameter.
 953    * **tuning_strategy** (1, 2, 3)
 954      --
 955      Strategy to be employed when setting the hyperparameters (default to 1).
 956      See `base` for more details.
 957    * **\*\*kwargs** (:py:class:`~collections.abc.Mapping`)
 958      --
 959      Other keyword parameters passed on to :py:meth:`pyxu.abc.Solver.fit`.
 960
 961    See Also
 962    --------
 963    :py:func:`~pyxu.opt.solver.CP`,
 964    :py:class:`~pyxu.opt.solver.CondatVu`,
 965    :py:class:`~pyxu.opt.solver.PD3O`,
 966    :py:func:`~pyxu.opt.solver.DouglasRachford`
 967    """
 968    kwargs.update(log_var=kwargs.get("log_var", ("x", "z")))
 969    obj = base(
 970        f=None,
 971        g=g,
 972        h=h,
 973        K=K,
 974        **kwargs,
 975    )
 976    obj.__repr__ = lambda _: "ChambollePock"
 977    return obj

 978
 979
 980CP = ChambollePock  #: Alias of :py:func:`~pyxu.opt.solver.ChambollePock`.
 981
 982

[docs]
 983class LorisVerhoeven(PD3O):
 984    r"""
 985    Loris-Verhoeven splitting algorithm.
 986
 987    This solver is also accessible via the alias :py:class:`~pyxu.opt.solver.LV`.
 988
 989    The *Loris-Verhoeven (LV) primal-dual splitting* can be used to solve problems of the form:
 990
 991    .. math::
 992
 993       {\min_{\mathbf{x}\in\mathbb{R}^N} \mathcal{F}(\mathbf{x})\;\;+\;\;\mathcal{H}(\mathbf{K} \mathbf{x}),}
 994
 995    where:
 996
 997    * :math:`\mathcal{F}:\mathbb{R}^N\rightarrow \mathbb{R}` is *convex* and *differentiable*, with
 998      :math:`\beta`-*Lipschitz continuous* gradient, for some :math:`\beta\in[0,+\infty[`.
 999
1000    * :math:`\mathcal{H}:\mathbb{R}^M\rightarrow \mathbb{R}\cup\{+\infty\}` is a *proper*, *lower semicontinuous* and
1001      *convex function* with *simple proximal operator*.
1002
1003    * :math:`\mathcal{K}:\mathbb{R}^N\rightarrow \mathbb{R}^M` is a *differentiable map* (e.g. a *linear operator*
1004      :math:`\mathbf{K}`), with **operator norm**:
1005
1006      .. math::
1007
1008         \Vert{\mathcal{K}}\Vert_2=\sup_{\mathbf{x}\in\mathbb{R}^N,\Vert\mathbf{x}\Vert_2=1} \Vert\mathcal{K}(\mathbf{x})\Vert_2.
1009
1010    Remarks
1011    -------
1012    * The problem is *feasible*, i.e. there exists at least one solution.
1013
1014    * The algorithm is still valid if one of the terms :math:`\mathcal{F}` or :math:`\mathcal{H}` is zero.
1015
1016    * Automatic selection of parameters is not supported for *non-linear differentiable maps* :math:`\mathcal{K}`.
1017
1018    * The *Loris-Verhoeven (CP) primal-dual splitting* method can be obtained by choosing :math:`\mathcal{G}=0` in
1019      :py:class:`~pyxu.opt.solver.PD3O`.
1020
1021    * In the specific case where :math:`\mathcal{F}` is *quadratic*, then one can set :math:`\rho \in ]0,\delta[` with
1022      :math:`\delta=2`. (See Theorem 4.3 in [PSA]_.)
1023
1024    Parameters (``__init__()``)
1025    ---------------------------
1026    * **f** (:py:class:`~pyxu.abc.DiffFunc`, :py:obj:`None`)
1027      --
1028      Differentiable function :math:`\mathcal{F}`.
1029    * **h** (:py:class:`~pyxu.abc.ProxFunc`, :py:obj:`None`)
1030      --
1031      Proximable function :math:`\mathcal{H}`.
1032    * **K** (:py:class:`~pyxu.abc.DiffMap`, :py:class:`~pyxu.abc.LinOp`, :py:obj:`None`)
1033      --
1034      Differentiable map or linear operator :math:`\mathcal{K}`.
1035    * **beta** (:py:attr:`~pyxu.info.ptype.Real`, :py:obj:`None`)
1036      --
1037      Lipschitz constant :math:`\beta` of :math:`\nabla\mathcal{F}`.
1038      If not provided, it will be automatically estimated.
1039    * **\*\*kwargs** (:py:class:`~collections.abc.Mapping`)
1040      --
1041      Other keyword parameters passed on to :py:meth:`pyxu.abc.Solver.__init__`.
1042
1043    Parameters (``fit()``)
1044    ----------------------
1045    * **x0** (:py:attr:`~pyxu.info.ptype.NDArray`)
1046      --
1047      (..., N) initial point(s) for the primal variable.
1048    * **z0** (:py:attr:`~pyxu.info.ptype.NDArray`, :py:obj:`None`)
1049      --
1050      (..., M) initial point(s) for the dual variable.
1051      If ``None`` (default), then use ``K(x0)`` as the initial point for the dual variable.
1052    * **tau** (:py:attr:`~pyxu.info.ptype.Real`, :py:obj:`None`)
1053      --
1054      Primal step size.
1055    * **sigma** (:py:attr:`~pyxu.info.ptype.Real`, :py:obj:`None`)
1056      --
1057      Dual step size.
1058    * **rho** (:py:attr:`~pyxu.info.ptype.Real`, :py:obj:`None`)
1059      --
1060      Momentum parameter.
1061    * **tuning_strategy** (1, 2, 3)
1062      --
1063      Strategy to be employed when setting the hyperparameters (default to 1).
1064      See :py:class:`~pyxu.opt.solver.PD3O` for more details.
1065    * **\*\*kwargs** (:py:class:`~collections.abc.Mapping`)
1066      --
1067      Other keyword parameters passed on to :py:meth:`pyxu.abc.Solver.fit`.
1068
1069    See Also
1070    --------
1071    :py:class:`~pyxu.opt.solver.PD3O`,
1072    :py:class:`~pyxu.opt.solver.DavisYin`,
1073    :py:class:`~pyxu.opt.solver.PGD`,
1074    :py:func:`~pyxu.opt.solver.ChambollePock`,
1075    :py:func:`~pyxu.opt.solver.DouglasRachford`
1076    """
1077
1078    def __init__(
1079        self,
1080        f: typ.Optional[pxa.DiffFunc] = None,
1081        h: typ.Optional[pxa.ProxFunc] = None,
1082        K: typ.Optional[pxa.DiffMap] = None,
1083        **kwargs,
1084    ):
1085        kwargs.update(log_var=kwargs.get("log_var", ("x", "z")))
1086        super().__init__(
1087            f=f,
1088            g=None,
1089            h=h,
1090            K=K,
1091            **kwargs,
1092        )
1093
1094    def _set_step_sizes(
1095        self,
1096        tau: typ.Optional[pxt.Real],
1097        sigma: typ.Optional[pxt.Real],
1098        gamma: typ.Optional[pxt.Real],
1099    ) -> tuple[pxt.Real, pxt.Real, pxt.Real]:
1100        r"""
1101        Set the primal/dual step sizes.
1102
1103        Returns
1104        -------
1105        Tuple[Real, Real, Real]
1106            Sensible primal/dual step sizes and value of the parameter :math:`delta`.
1107        """
1108        tau, sigma, _ = super()._set_step_sizes(tau=tau, sigma=sigma, gamma=gamma)
1109        delta = 2 if (self._beta == 0 or isinstance(self._f, pxa.QuadraticFunc)) else 2 - self._beta / (2 * gamma)
1110        return tau, sigma, delta

1111
1112
1113LV = LorisVerhoeven  #: Alias of :py:class:`~pyxu.opt.solver.LorisVerhoeven`.
1114
1115

[docs]
1116class DavisYin(PD3O):
1117    r"""
1118    Davis-Yin primal-dual splitting method.
1119
1120    This solver is also accessible via the alias :py:class:`~pyxu.opt.solver.DY`.
1121
1122    The *Davis-Yin (DY) primal-dual splitting* method can be used to solve problems of the form:
1123
1124    .. math::
1125
1126       {\min_{\mathbf{x}\in\mathbb{R}^N} \;\mathcal{F}(\mathbf{x})\;\;+\;\;\mathcal{G}(\mathbf{x})\;\;+\;\;\mathcal{H}(\mathbf{x}),}
1127
1128    where:
1129
1130    * :math:`\mathcal{F}:\mathbb{R}^N\rightarrow \mathbb{R}` is *convex* and *differentiable*, with
1131      :math:`\beta`-*Lipschitz continuous* gradient, for some :math:`\beta\in[0,+\infty[`.
1132
1133    * :math:`\mathcal{G}:\mathbb{R}^N\rightarrow \mathbb{R}\cup\{+\infty\}` and
1134      :math:`\mathcal{H}:\mathbb{R}^N\rightarrow \mathbb{R}\cup\{+\infty\}` are *proper*, *lower semicontinuous* and
1135      *convex functions* with *simple proximal operators*.
1136
1137    Remarks
1138    -------
1139    * The problem is *feasible*, i.e. there exists at least one solution.
1140
1141    * The algorithm is still valid if one of the terms :math:`\mathcal{G}` or :math:`\mathcal{H}` is zero.
1142
1143    * The *Davis-Yin primal-dual splitting* method can be obtained by choosing :math:`\mathcal{K}=\mathbf{I}` (identity)
1144      and :math:`\tau=1/\sigma` in :py:class:`~pyxu.opt.solver.PD3O`, provided a suitable change of variable [PSA]_.
1145
1146    Parameters (``__init__()``)
1147    ---------------------------
1148    * **f** (:py:class:`~pyxu.abc.DiffFunc`)
1149      --
1150      Differentiable function :math:`\mathcal{F}`.
1151    * **g** (:py:class:`~pyxu.abc.ProxFunc`, :py:obj:`None`)
1152      --
1153      Proximable function :math:`\mathcal{G}`.
1154    * **h** (:py:class:`~pyxu.abc.ProxFunc`, :py:obj:`None`)
1155      --
1156      Proximable function :math:`\mathcal{H}`.
1157    * **beta** (:py:attr:`~pyxu.info.ptype.Real`, :py:obj:`None`)
1158      --
1159      Lipschitz constant :math:`\beta` of :math:`\nabla\mathcal{F}`.
1160      If not provided, it will be automatically estimated.
1161    * **\*\*kwargs** (:py:class:`~collections.abc.Mapping`)
1162      --
1163      Other keyword parameters passed on to :py:meth:`pyxu.abc.Solver.__init__`.
1164
1165    Parameters (``fit()``)
1166    ----------------------
1167    * **x0** (:py:attr:`~pyxu.info.ptype.NDArray`)
1168      --
1169      (..., N) initial point(s) for the primal variable.
1170    * **z0** (:py:attr:`~pyxu.info.ptype.NDArray`, :py:obj:`None`)
1171      --
1172      (..., N) initial point(s) for the dual variable.
1173      If ``None`` (default), then use ``x0`` as the initial point for the dual variable.
1174    * **tau** (:py:attr:`~pyxu.info.ptype.Real`, :py:obj:`None`)
1175      --
1176      Primal step size.
1177    * **sigma** (:py:attr:`~pyxu.info.ptype.Real`, :py:obj:`None`)
1178      --
1179      Dual step size.
1180    * **rho** (:py:attr:`~pyxu.info.ptype.Real`, :py:obj:`None`)
1181      --
1182      Momentum parameter.
1183    * **tuning_strategy** (1, 2, 3)
1184      --
1185      Strategy to be employed when setting the hyperparameters (default to 1).
1186      See :py:class:`~pyxu.opt.solver.PD3O` for more details.
1187    * **\*\*kwargs** (:py:class:`~collections.abc.Mapping`)
1188      --
1189      Other keyword parameters passed on to :py:meth:`pyxu.abc.Solver.fit`.
1190
1191    See Also
1192    --------
1193    :py:class:`~pyxu.opt.solver.PD3O`,
1194    :py:class:`~pyxu.opt.solver.LorisVerhoeven`,
1195    :py:class:`~pyxu.opt.solver.PGD`,
1196    :py:func:`~pyxu.opt.solver.ChambollePock`,
1197    :py:func:`~pyxu.opt.solver.DouglasRachford`
1198    """
1199
1200    def __init__(
1201        self,
1202        f: typ.Optional[pxa.DiffFunc],
1203        g: typ.Optional[pxa.ProxFunc] = None,
1204        h: typ.Optional[pxa.ProxFunc] = None,
1205        **kwargs,
1206    ):
1207        kwargs.update(log_var=kwargs.get("log_var", ("x", "z")))
1208        super().__init__(
1209            f=f,
1210            g=g,
1211            h=h,
1212            K=None,
1213            **kwargs,
1214        )
1215
1216    def _set_step_sizes(
1217        self,
1218        tau: typ.Optional[pxt.Real],
1219        sigma: typ.Optional[pxt.Real],
1220        gamma: pxt.Real,
1221    ) -> tuple[pxt.Real, pxt.Real, pxt.Real]:
1222        r"""
1223        Set the primal/dual step sizes.
1224
1225        Returns
1226        -------
1227        Tuple[Real, Real, Real]
1228            Sensible primal/dual step sizes and value of :math:`\delta`.
1229        """
1230        if tau is not None:
1231            assert 0 < tau <= 1 / gamma, "tau must be positive and smaller than 1/gamma."
1232        else:
1233            tau = 1.0 if self._beta == 0 else 1 / gamma
1234
1235        delta = 2.0 if self._beta == 0 else 2 - self._beta * tau / 2
1236
1237        return tau, 1 / tau, delta

1238
1239
1240DY = DavisYin  #: Alias of :py:class:`~pyxu.opt.solver.DavisYin`.
1241
1242

[docs]
1243def DouglasRachford(
1244    g: typ.Optional[pxa.ProxFunc] = None,
1245    h: typ.Optional[pxa.ProxFunc] = None,
1246    base: typ.Type[_PrimalDualSplitting] = CondatVu,
1247    **kwargs,
1248):
1249    r"""
1250    Douglas-Rachford splitting algorithm.
1251
1252    Parameters
1253    ----------
1254    g: :py:class:`~pyxu.abc.ProxFunc`, None
1255        Proximable function :math:`\mathcal{G}`.
1256    h: :py:class:`~pyxu.abc.ProxFunc`, None
1257        Proximable function :math:`\mathcal{H}`.
1258    base: :py:class:`~pyxu.opt.solver.CondatVu`, :py:class:`~pyxu.opt.solver.PD3O`
1259        Specifies the base primal-dual algorithm.
1260        (Default = :py:class:`~pyxu.opt.solver.CondatVu`)
1261    \*\*kwargs: ~collections.abc.Mapping
1262        Other keyword parameters passed on to :py:meth:`pyxu.abc.Solver.__init__`.
1263
1264
1265    The *Douglas-Rachford (DR) primal-dual splitting* method can be used to solve problems of the form:
1266
1267    .. math::
1268
1269       {\min_{\mathbf{x}\in\mathbb{R}^N} \mathcal{G}(\mathbf{x})\;\;+\;\;\mathcal{H}(\mathbf{x}),}
1270
1271    where :math:`\mathcal{G}:\mathbb{R}^N\rightarrow \mathbb{R}\cup\{+\infty\}` and
1272    :math:`\mathcal{H}:\mathbb{R}^N\rightarrow \mathbb{R}\cup\{+\infty\}` are *proper*, *lower semicontinuous* and
1273    *convex functions* with *simple proximal operators*.
1274
1275    Remarks
1276    -------
1277    * The problem is *feasible*, i.e. there exists at least one solution.
1278
1279    * The algorithm is still valid if one of the terms :math:`\mathcal{G}` or :math:`\mathcal{H}` is zero.
1280
1281    * The *Douglas-Rachford (DR) primal-dual splitting* method can be obtained by choosing :math:`\mathcal{F}=0`,
1282      :math:`\mathbf{K}=\mathbf{Id}` and :math:`\tau=1/\sigma` in :py:class:`~pyxu.opt.solver.CondatVu` or
1283      :py:class:`~pyxu.opt.solver.PD3O`.  Douglas and Rachford originally introduced the algorithm without relaxation
1284      (:math:`\rho=1`), but relaxed versions have been proposed afterwards [PSA]_.  When :math:`\rho=1`,
1285      Douglas-Rachford's algorithm is *functionally equivalent* to ADMM (up to a change of variable, see [PSA]_ for a
1286      derivation).
1287
1288    Parameters (``fit()``)
1289    ----------------------
1290    * **x0** (:py:attr:`~pyxu.info.ptype.NDArray`)
1291      --
1292      (..., N) initial point(s) for the primal variable.
1293    * **z0** (:py:attr:`~pyxu.info.ptype.NDArray`, :py:obj:`None`)
1294      --
1295      (..., N) initial point(s) for the dual variable.
1296      If ``None`` (default), then use ``x0`` as the initial point for the dual variable.
1297    * **tau** (:py:attr:`~pyxu.info.ptype.Real`, :py:obj:`None`)
1298      --
1299      Primal step size. Defaults to 1.
1300    * **\*\*kwargs** (:py:class:`~collections.abc.Mapping`)
1301      --
1302      Other keyword parameters passed on to :py:meth:`pyxu.abc.Solver.fit`.
1303
1304    See Also
1305    --------
1306    :py:class:`~pyxu.opt.solver.CondatVu`,
1307    :py:class:`~pyxu.opt.solver.PD3O`,
1308    :py:func:`~pyxu.opt.solver.ChambollePock`,
1309    :py:func:`~pyxu.opt.solver.ForwardBackward`
1310    """
1311    kwargs.update(log_var=kwargs.get("log_var", ("x", "z")))
1312    obj = base(f=None, g=g, h=h, K=None, **kwargs)
1313    obj.__repr__ = lambda _: "DouglasRachford"
1314
1315    def _set_step_sizes_custom(
1316        obj: typ.Type[_PrimalDualSplitting],
1317        tau: typ.Optional[pxt.Real],
1318        sigma: typ.Optional[pxt.Real],
1319        gamma: pxt.Real,
1320    ) -> tuple[pxt.Real, pxt.Real, pxt.Real]:
1321        tau = 1.0 if tau is None else tau
1322        delta = 2.0
1323        return tau, 1 / tau, delta
1324
1325    obj._set_step_sizes = types.MethodType(_set_step_sizes_custom, obj)
1326    return obj

1327
1328
1329DR = DouglasRachford  #: Alias of :py:func:`~pyxu.opt.solver.DouglasRachford`.
1330
1331

[docs]
1332class ADMM(_PDS):
1333    r"""
1334    Alternating Direction Method of Multipliers.
1335
1336    The *Alternating Direction Method of Multipliers (ADMM)* can be used to solve problems of the form:
1337
1338    .. math::
1339
1340       \min_{\mathbf{x}\in\mathbb{R}^N} \quad \mathcal{F}(\mathbf{x})\;\;+\;\;\mathcal{H}(\mathbf{K}\mathbf{x}),
1341
1342    where (see below for additional details on the assumptions):
1343
1344    * :math:`\mathcal{F}:\mathbb{R}^N\rightarrow \mathbb{R}\cup\{+\infty\}` is a *proper*, *lower semicontinuous* and
1345      *convex functional* potentially with *simple proximal operator* or *Lipschitz-differentiable*,
1346    * :math:`\mathcal{H}:\mathbb{R}^M\rightarrow \mathbb{R}\cup\{+\infty\}` is a *proper*, *lower semicontinuous* and
1347      *convex functional* with *simple proximal operator*,
1348    * :math:`\mathbf{K}:\mathbb{R}^N\rightarrow \mathbb{R}^M` is a *linear operator* with **operator norm**
1349      :math:`\Vert{\mathbf{K}}\Vert_2`.
1350
1351    Remarks
1352    -------
1353    * The problem is *feasible*, i.e. there exists at least one solution.
1354
1355    * The algorithm is still valid if either :math:`\mathcal{F}` or :math:`\mathcal{H}` is zero.
1356
1357    * When :math:`\mathbf{K} = \mathbf{I}_{N}`, ADMM is equivalent to the :py:func:`~pyxu.opt.solver.DouglasRachford`
1358      method (up to a change of variable, see [PSA]_ for a derivation).
1359
1360    * This is an implementation of the algorithm described in Section 5.4 of [PSA]_, which handles the non-smooth
1361      composite term :math:`\mathcal{H}(\mathbf{K}\mathbf{x})` by means of a change of variable and an infimal
1362      postcomposition trick.  The update equation of this algorithm involves the following
1363      :math:`\mathbf{x}`-minimization step:
1364
1365      .. math::
1366         :label: eq:x_minimization
1367
1368         \mathcal{V} = \operatorname*{arg\,min}_{\mathbf{x} \in \mathbb{R}^N} \quad \mathcal{F}(\mathbf{x}) + \frac1{2 \tau}
1369         \Vert \mathbf{K} \mathbf{x} - \mathbf{a} \Vert_2^2,
1370
1371      where :math:`\tau` is the primal step size and :math:`\mathbf{a} \in \mathbb{R}^M` is an iteration-dependant
1372      vector.
1373
1374      The following cases are covered in this implementation:
1375
1376      - :math:`\mathbf{K}` is ``None`` (i.e. the identity operator) and :math:`\mathcal{F}` is a
1377        :py:class:`~pyxu.abc.ProxFunc`.  Then, :math:numref:`eq:x_minimization` has a single solution provided by
1378        :math:`\operatorname*{prox}_{\tau \mathcal{F}}(\mathbf{a})`.  This yields the *classical ADMM algorithm*
1379        described in Section 5.3 of [PSA]_ (i.e. without the postcomposition trick).
1380
1381      - :math:`\mathcal{F}` is a :py:class:`~pyxu.abc.QuadraticFunc`, i.e.  :math:`\mathcal{F}(\mathbf{x})=\frac{1}{2}
1382        \langle\mathbf{x}, \mathbf{Q}\mathbf{x}\rangle + \mathbf{c}^T\mathbf{x} + t`.  Then the unique solution to
1383        :math:numref:`eq:x_minimization` is obtained by solving a linear system of the form:
1384
1385        .. math::
1386           :label: eq:linear_system
1387
1388           \Big( \mathbf{Q} + \frac1\tau \mathbf{K}^* \mathbf{K} \Big) \mathbf{x} =
1389           \frac1\tau \mathbf{K}^\ast\mathbf{a}-\mathbf{c}, \qquad \mathbf{x} \in \mathbb{R}^N.
1390
1391        This linear system is solved via a sub-iterative :py:class:`~pyxu.opt.solver.CG` algorithm involving the
1392        repeated application of :math:`\mathbf{Q}` and :math:`\mathbf{K}^{*}\mathbf{K}`.  This sub-iterative scheme may
1393        be computationally intensive if these operators do not admit fast matrix-free implementations.
1394
1395      - :math:`\mathcal{F}` is a :py:class:`~pyxu.abc.DiffFunc`.  Then, :math:numref:`eq:x_minimization` is solved via a
1396        sub-iterative :py:class:`~pyxu.opt.solver.NLCG` algorithm involving repeated evaluation of
1397        :math:`\nabla\mathcal{F}` and :math:`\mathbf{K}^{*}\mathbf{K}`.  This sub-iterative scheme may be costly if
1398        these operators cannot be evaluated with fast algorithms.  In this scenario, the use of multiple initial points
1399        in :py:meth:`~pyxu.abc.Solver.fit` is not supported.
1400
1401      The user may also provide a *custom* callable solver :math:`s: \mathbb{R}^M \times \mathbb{R} \to \mathbb{R}^N`,
1402      taking as input :math:`(\mathbf{a}, \tau)` and solving :math:numref:`eq:x_minimization`, i.e. :math:`s(\mathbf{a},
1403      \tau) \in \mathcal{V}`. (See example below.) If :py:class:`~pyxu.opt.solver.ADMM` is initialized with such a
1404      solver, then the latter is used to solve :math:numref:`eq:x_minimization` regardless of whether one of the
1405      above-mentioned cases is met.
1406
1407    * Unlike traditional implementations of ADMM, :py:class:`~pyxu.opt.solver.ADMM` supports relaxation, i.e.
1408      :math:`\rho\neq 1`.
1409
1410    Parameters (``__init__()``)
1411    ---------------------------
1412    * **f** (:py:class:`~pyxu.abc.DiffFunc`, :py:class:`~pyxu.abc.ProxFunc`, :py:obj:`None`)
1413      --
1414      Differentiable or proximable function :math:`\mathcal{F}`.
1415    * **h** (:py:class:`~pyxu.abc.ProxFunc`, :py:obj:`None`)
1416      --
1417      Proximable function :math:`\mathcal{H}`.
1418    * **K** (:py:class:`~pyxu.abc.LinOp`, :py:obj:`None`)
1419      --
1420      Linear operator :math:`\mathbf{K}`.
1421    * **solver** (:py:class:`~collections.abc.Callable`, :py:obj:`None`)
1422      --
1423      Custom callable to solve the :math:`\mathbf{x}`-minimization step :math:numref:`eq:x_minimization`.
1424
1425      If provided, `solver` must have the `NumPy signature
1426      <https://numpy.org/neps/nep-0020-gufunc-signature-enhancement.html>`_ ``(n), (1) -> (n)``.
1427    * **solver_kwargs** (:py:class:`~collections.abc.Mapping`)
1428      --
1429      Keyword parameters passed to the ``__init__()`` method of sub-iterative :py:class:`~pyxu.opt.solver.CG` or
1430      :py:class:`~pyxu.opt.solver.NLCG` solvers.
1431
1432      `solver_kwargs` is ignored if `solver` provided.
1433    * **\*\*kwargs** (:py:class:`~collections.abc.Mapping`)
1434      --
1435      Other keyword parameters passed on to :py:meth:`pyxu.abc.Solver.__init__`.
1436
1437    Parameters (``fit()``)
1438    ----------------------
1439    * **x0** (:py:attr:`~pyxu.info.ptype.NDArray`)
1440      --
1441      (..., N) initial point(s) for the primal variable.
1442    * **z0** (:py:attr:`~pyxu.info.ptype.NDArray`, :py:obj:`None`)
1443      --
1444      (..., M) initial point(s) for the dual variable.
1445      If ``None`` (default), then use ``K(x0)`` as the initial point for the dual variable.
1446    * **tau** (:py:attr:`~pyxu.info.ptype.Real`, :py:obj:`None`)
1447      --
1448      Primal step size.
1449    * **rho** (:py:attr:`~pyxu.info.ptype.Real`, :py:obj:`None`)
1450      --
1451      Momentum parameter for relaxation.
1452    * **tuning_strategy** (1, 2, 3)
1453      --
1454      Strategy to be employed when setting the hyperparameters (default to 1).
1455      See base class for more details.
1456    * **solver_kwargs** (:py:class:`~collections.abc.Mapping`)
1457      --
1458      Keyword parameters passed to the ``fit()`` method of sub-iterative
1459      :py:class:`~pyxu.opt.solver.CG` or :py:class:`~pyxu.opt.solver.NLCG` solvers.
1460
1461      `solver_kwargs` is ignored if `solver` was provided in ``__init__()``.
1462    * **\*\*kwargs** (:py:class:`~collections.abc.Mapping`)
1463      --
1464      Other keyword parameters passed on to :py:meth:`pyxu.abc.Solver.fit`.
1465
1466    Warning
1467    -------
1468    ``tuning_strategy`` docstring says to look at base class for details, but nothing mentioned there!
1469
1470    Example
1471    -------
1472    Consider the following optimization problem:
1473
1474    .. math::
1475
1476       \min_{\mathbf{x}\in\mathbb{R}^N}\frac{1}{2}\left\|\mathbf{y}-\mathbf{G}\mathbf{x}\right\|_2^2\quad+\quad\lambda \|\mathbf{D}\mathbf{x}\|_1,
1477
1478    with :math:`\mathbf{D}\in\mathbb{R}^{N\times N}` the discrete second-order derivative operator,
1479    :math:`\mathbf{G}\in\mathbb{R}^{M\times N}, \, \mathbf{y}\in\mathbb{R}^M, \lambda>0.` This problem can be solved via
1480    ADMM with :math:`\mathcal{F}(\mathbf{x})= \frac{1}{2}\left\|\mathbf{y}-\mathbf{G}\mathbf{x}\right\|_2^2`,
1481    :math:`\mathcal{H}(\mathbf{x})=\lambda \|\mathbf{D}\mathbf{x}\|_1,` and :math:`\mathbf{K}=\mathbf{D}`.  The
1482    functional :math:`\mathcal{F}` being quadratic, the :math:`\mathbf{x}`-minimization step consists in solving a
1483    linear system of the form :math:numref:`eq:linear_system`.  Here, we demonstrate how to provide a custom solver,
1484    which consists in applying a matrix inverse, for this step.  Otherwise, a sub-iterative
1485    :py:class:`~pyxu.opt.solver.CG` algorithm would have been used automatically instead.
1486
1487    .. plot::
1488
1489       import matplotlib.pyplot as plt
1490       import numpy as np
1491       import pyxu.abc as pxa
1492       import pyxu.operator as pxo
1493       import scipy as sp
1494       from pyxu.opt.solver import ADMM
1495
1496       N = 100  # Dimension of the problem
1497
1498       # Generate piecewise-linear ground truth
1499       x_gt = np.array([10, 25, 60, 90])  # Knot locations
1500       a_gt = np.array([2, -4, 3, -2])  # Amplitudes of the knots
1501       gt = np.zeros(N)  # Ground-truth signal
1502       for n in range(len(x_gt)):
1503           gt[x_gt[n] :] += a_gt[n] * np.arange(N - x_gt[n]) / N
1504
1505       # Generate data (noisy samples at random locations)
1506       M = 20  # Number of data points
1507       rng = np.random.default_rng(seed=0)
1508       x_samp = rng.choice(np.arange(N // M), size=M) + np.arange(N, step=N // M)  # sampling locations
1509       sigma = 2 * 1e-2  # noise variance
1510       y = gt[x_samp] + sigma * rng.standard_normal(size=M)  # noisy data points
1511
1512       # Data-fidelity term
1513       subsamp_mat = sp.sparse.lil_matrix((M, N))
1514       for i in range(M):
1515           subsamp_mat[i, x_samp[i]] = 1
1516       G = pxa.LinOp.from_array(subsamp_mat.tocsr())
1517       F = 1 / 2 * pxo.SquaredL2Norm(dim=y.size).argshift(-y) * G
1518       F.diff_lipschitz = F.estimate_diff_lipschitz(method="svd")
1519
1520       # Regularization term (promotes sparse second derivatives)
1521       deriv_mat = sp.sparse.diags(diagonals=[1, -2, 1], offsets=[0, 1, 2], shape=(N - 2, N))
1522       D = pxa.LinOp.from_array(deriv_mat)
1523       _lambda = 1e-1  # regularization parameter
1524       H = _lambda * pxo.L1Norm(dim=D.codim)
1525
1526       # Solver for ADMM
1527       tau = 1 / _lambda  # internal ADMM parameter
1528       # Inverse operator to solve the linear system
1529       A_inv = sp.linalg.inv(G.gram().asarray() + (1 / tau) * D.gram().asarray())
1530
1531
1532       def solver_ADMM(arr, tau):
1533           b = (1 / tau) * D.adjoint(arr) + G.adjoint(y)
1534           return A_inv @ b.squeeze()
1535
1536
1537       # Solve optimization problem
1538       admm = ADMM(f=F, h=H, K=D, solver=solver_ADMM,show_progress=False)  # with solver
1539       admm.fit(x0=np.zeros(N), tau=tau)
1540       x_opt = admm.solution()  # reconstructed signal
1541
1542       # Plots
1543       plt.figure()
1544       plt.plot(np.arange(N), gt, label="Ground truth")
1545       plt.plot(x_samp, y, "kx", label="Noisy data points")
1546       plt.plot(np.arange(N), x_opt, label="Reconstructed signal")
1547       plt.legend()
1548
1549    See Also
1550    --------
1551    :py:class:`~pyxu.opt.solver.CondatVu`,
1552    :py:class:`~pyxu.opt.solver.PD3O`,
1553    :py:class:`~pyxu.opt.solver.PGD`,
1554    :py:func:`~pyxu.opt.solver.ChambollePock`,
1555    :py:func:`~pyxu.opt.solver.DouglasRachford`
1556    """
1557
1558    def __init__(
1559        self,
1560        f: typ.Optional[pxa.Func] = None,
1561        h: typ.Optional[pxa.ProxFunc] = None,
1562        K: typ.Optional[pxa.DiffMap] = None,
1563        solver: typ.Callable[[pxt.NDArray, float], pxt.NDArray] = None,
1564        solver_kwargs: typ.Optional[dict] = None,
1565        **kwargs,
1566    ):
1567        kwargs.update(log_var=kwargs.get("log_var", ("x", "u", "z")))
1568
1569        x_update_solver = "custom"  # Method for the x-minimization step
1570        g = None
1571        if solver is None:
1572            if f is None:
1573                if h is None:
1574                    msg = " ".join(
1575                        [
1576                            "Cannot minimize always-0 functional.",
1577                            "At least one of Parameter[f, h] must be specified.",
1578                        ]
1579                    )
1580                    raise ValueError(msg)
1581                if K is None:  # Prox scenario (classical ADMM)
1582                    f = pxo.NullFunc(h.dim_shape)
1583                else:  # Sub-iterative CG scenario
1584                    f = pxa.QuadraticFunc(
1585                        dim_shape=h.dim_shape,
1586                        codim_shape=(1,),
1587                        Q=pxo.NullOp(dim_shape=h.dim_shape, codim_shape=h.dim_shape),
1588                        c=pxo.NullFunc(dim_shape=h.dim_shape),
1589                    )
1590            if f.has(pxa.Property.PROXIMABLE) and K is None:
1591                x_update_solver = "prox"
1592                g = f  # In this case, f corresponds to g in the _PDS terminology
1593                f = None
1594            elif isinstance(f, pxa.QuadraticFunc):
1595                x_update_solver = "cg"
1596                self._K_gram = K.gram()
1597                warnings.warn(
1598                    "A sub-iterative conjugate gradient algorithm is used for the x-minimization step "
1599                    "of ADMM. This might be computationally expensive.",
1600                    UserWarning,
1601                )
1602            elif f.has(pxa.Property.DIFFERENTIABLE_FUNCTION):
1603                x_update_solver = "nlcg"
1604                self._K_gram = K.gram()
1605                warnings.warn(
1606                    "A sub-iterative non-linear conjugate gradient algorithm is used for the "
1607                    "x-minimization step of ADMM. This might be computationally expensive.",
1608                    UserWarning,
1609                )
1610            else:
1611                raise TypeError(
1612                    "Unsupported scenario: f must either be a ProxFunc (in which case K must be None), a "
1613                    "QuadraticFunc, or a DiffMap. If neither of these scenarios hold, a solver must be provided for "
1614                    "the x-minimization step of ADMM."
1615                )
1616        self._solver = solver
1617        self._x_update_solver = x_update_solver
1618        self._init_kwargs = solver_kwargs if solver_kwargs is not None else dict(show_progress=False)
1619
1620        super().__init__(
1621            f=f,
1622            g=g,
1623            h=h,
1624            K=K,
1625            **kwargs,
1626        )
1627
1628    def m_init(
1629        self,
1630        x0: pxt.NDArray,
1631        z0: typ.Optional[pxt.NDArray] = None,
1632        tau: typ.Optional[pxt.Real] = None,
1633        rho: typ.Optional[pxt.Real] = None,
1634        tuning_strategy: _PDS.TuningSpec = 1,
1635        solver_kwargs: typ.Optional[dict] = None,
1636        **kwargs,
1637    ):
1638        super().m_init(
1639            x0=x0,
1640            z0=z0,
1641            tau=tau,
1642            sigma=None,
1643            rho=rho,
1644            tuning_strategy=tuning_strategy,
1645        )
1646        mst = self._mstate  # shorthand
1647        mst["u"] = self._K(x0)
1648
1649        # Fit kwargs of sub-iterative solver
1650        if solver_kwargs is None:
1651            solver_kwargs = dict()
1652        self._fit_kwargs = solver_kwargs
1653
1654    def m_step(self):
1655        # Algorithm (145) in [PSA]. Paper -> code correspondence: L -> K, K -> -Id, c -> 0, y -> u, v -> z, g -> h
1656        mst = self._mstate
1657        mst["x"] = self._x_update(mst["u"] - mst["z"], tau=mst["tau"])
1658        z_temp = mst["z"] + self._K(mst["x"]) - mst["u"]
1659        if not self._h._name == "NullFunc":
1660            mst["u"] = self._h.prox(self._K(mst["x"]) + z_temp, tau=mst["tau"])
1661        mst["z"] = z_temp + (mst["rho"] - 1) * (self._K(mst["x"]) - mst["u"])
1662
1663    def _x_update(self, arr: pxt.NDArray, tau: float) -> pxt.NDArray:
1664        if self._x_update_solver == "custom":
1665            return self._solver(arr, tau)
1666        elif self._x_update_solver == "prox":
1667            return self._g.prox(arr, tau=tau)
1668        elif self._x_update_solver == "cg":
1669            from pyxu.opt.solver import CG
1670
1671            b = (1 / tau) * self._K.adjoint(arr) - self._f._c.grad(arr)
1672            A = self._f._Q + (1 / tau) * self._K_gram
1673            slvr = CG(A=A, **self._init_kwargs)
1674            slvr.fit(b=b, x0=self._mstate["x"].copy(), **self._fit_kwargs)  # Initialize CG with previous iterate
1675            return slvr.solution()
1676        elif self._x_update_solver == "nlcg":
1677            from pyxu.opt.solver import NLCG
1678
1679            c = pxa.LinFunc.from_array(-self._K.adjoint(arr))
1680            quad_func = pxa.QuadraticFunc(dim_shape=self._f.dim_shape, codim_shape=(1,), Q=2 * self._K_gram, c=c)
1681            slvr = NLCG(f=self._f + (1 / tau) * quad_func, **self._init_kwargs)
1682            slvr.fit(x0=self._mstate["x"].copy(), **self._fit_kwargs)  # Initialize NLCG with previous iterate
1683            return slvr.solution()
1684
1685    def solution(self, which: typ.Literal["primal", "primal_h", "dual"] = "primal") -> pxt.NDArray:
1686        data, _ = self.stats()
1687        if which == "primal":
1688            assert "x" in data.keys(), "Primal variable x in dom(g) was not logged (declare it in log_var to log it)."
1689            return data.get("x")
1690        elif which == "primal_h":
1691            assert "u" in data.keys(), "Primal variable u in dom(h) was not logged (declare it in log_var to log it)."
1692            return data.get("u")
1693        elif which == "dual":
1694            assert "z" in data.keys(), "Dual variable z was not logged (declare it in log_var to log it)."
1695            return data.get("z") / self._mstate["tau"]
1696        else:
1697            raise ValueError(f"Parameter which must be one of ['primal', 'primal_h', 'dual'] got: {which}.")
1698
1699    def _set_step_sizes(
1700        self,
1701        tau: typ.Optional[pxt.Real],
1702        sigma: typ.Optional[pxt.Real],
1703        gamma: pxt.Real,
1704    ) -> tuple[pxt.Real, pxt.Real, pxt.Real]:
1705        if tau is not None:
1706            assert tau > 0, f"Parameter tau must be positive, got {tau}."
1707        else:
1708            tau = 1.0
1709        delta = 2.0
1710        return tau, 1 / tau, delta

1711
1712

[docs]
1713class ForwardBackward(CondatVu):
1714    r"""
1715    Forward-backward splitting algorithm.
1716
1717    This solver is also accessible via the alias :py:class:`~pyxu.opt.solver.FB`.
1718
1719    The *Forward-backward (FB) splitting* method can be used to solve problems of the form:
1720
1721    .. math::
1722
1723       {\min_{\mathbf{x}\in\mathbb{R}^N} \;\mathcal{F}(\mathbf{x})\;\;+\;\;\mathcal{G}(\mathbf{x}),}
1724
1725    where:
1726
1727    * :math:`\mathcal{F}:\mathbb{R}^N\rightarrow \mathbb{R}` is *convex* and *differentiable*, with
1728      :math:`\beta`-*Lipschitz continuous* gradient, for some :math:`\beta\in[0,+\infty[`.
1729    * :math:`\mathcal{G}:\mathbb{R}^N\rightarrow \mathbb{R}\cup\{+\infty\}` is *proper*, *lower semicontinuous* and
1730      *convex function* with *simple proximal operator*.
1731
1732    Remarks
1733    -------
1734    * The problem is *feasible*, i.e. there exists at least one solution.
1735
1736    * The algorithm is still valid if one of the terms :math:`\mathcal{F}` or :math:`\mathcal{G}` is zero.
1737
1738    * The *Forward-backward (FB) primal-dual splitting* method can be obtained by choosing :math:`\mathcal{H}=0` in
1739      :py:class:`~pyxu.opt.solver.CondatVu`.  Mercier originally introduced the algorithm without relaxation
1740      (:math:`\rho=1`) [FB]_.  Relaxed versions have been proposed afterwards [PSA]_.  The Forward-backward algorithm is
1741      also known as the *Proximal Gradient Descent (PGD)* algorithm.  For the accelerated version of PGD, use
1742      :py:class:`~pyxu.opt.solver.PGD`.
1743
1744    Parameters (``__init__()``)
1745    ---------------------------
1746    * **f** (:py:class:`~pyxu.abc.DiffFunc`, :py:obj:`None`)
1747      --
1748      Differentiable function :math:`\mathcal{F}`.
1749    * **g** (:py:class:`~pyxu.abc.ProxFunc`, :py:obj:`None`)
1750      --
1751      Proximable function :math:`\mathcal{G}`.
1752    * **beta** (:py:attr:`~pyxu.info.ptype.Real`, :py:obj:`None`)
1753      --
1754      Lipschitz constant :math:`\beta` of :math:`\nabla\mathcal{F}`.
1755      If not provided, it will be automatically estimated.
1756    * **\*\*kwargs** (:py:class:`~collections.abc.Mapping`)
1757      --
1758      Other keyword parameters passed on to :py:meth:`pyxu.abc.Solver.__init__`.
1759
1760    Parameters (``fit()``)
1761    ----------------------
1762    * **x0** (:py:attr:`~pyxu.info.ptype.NDArray`)
1763      --
1764      (..., N) initial point(s) for the primal variable.
1765    * **z0** (:py:attr:`~pyxu.info.ptype.NDArray`, :py:obj:`None`)
1766      --
1767      (..., N) initial point(s) for the dual variable.
1768      If ``None`` (default), then use ``x0`` as the initial point for the dual variable.
1769    * **tau** (:py:attr:`~pyxu.info.ptype.Real`, :py:obj:`None`)
1770      --
1771      Primal step size.
1772    * **rho** (:py:attr:`~pyxu.info.ptype.Real`, :py:obj:`None`)
1773      --
1774      Momentum parameter.
1775    * **tuning_strategy** (1, 2, 3)
1776      --
1777      Strategy to be employed when setting the hyperparameters (default to 1).
1778      See :py:class:`~pyxu.opt.solver.CondatVu` for more details.
1779    * **\*\*kwargs** (:py:class:`~collections.abc.Mapping`)
1780      --
1781      Other keyword parameters passed on to :py:meth:`pyxu.abc.Solver.fit`.
1782
1783    See Also
1784    --------
1785    :py:class:`~pyxu.opt.solver.CondatVu`,
1786    :py:class:`~pyxu.opt.solver.PD3O`,
1787    :py:class:`~pyxu.opt.solver.PGD`,
1788    :py:func:`~pyxu.opt.solver.ChambollePock`,
1789    :py:func:`~pyxu.opt.solver.DouglasRachford`
1790    """
1791
1792    def __init__(
1793        self,
1794        f: typ.Optional[pxa.DiffFunc] = None,
1795        g: typ.Optional[pxa.ProxFunc] = None,
1796        **kwargs,
1797    ):
1798        kwargs.update(log_var=kwargs.get("log_var", ("x",)))
1799        super().__init__(
1800            f=f,
1801            g=g,
1802            h=None,
1803            K=None,
1804            **kwargs,
1805        )

1806
1807
1808FB = ForwardBackward  #: Alias of :py:class:`~pyxu.opt.solver.ForwardBackward`.
1809
1810

[docs]
1811def ProximalPoint(
1812    g: typ.Optional[pxa.ProxFunc] = None,
1813    base: typ.Optional[_PrimalDualSplitting] = CondatVu,
1814    **kwargs,
1815):
1816    r"""
1817    Proximal-point method.
1818
1819    Parameters
1820    ----------
1821    g: :py:class:`~pyxu.abc.ProxFunc`
1822        Proximable function :math:`\mathcal{G}`.
1823    base: :py:class:`~pyxu.opt.solver.CondatVu`, :py:class:`~pyxu.opt.solver.PD3O`
1824        Specifies the base primal-dual algorithm from which mathematical updates are inherited.
1825        (Default = :py:class:`~pyxu.opt.solver.CondatVu`)
1826    \*\*kwargs: ~collections.abc.Mapping
1827        Other keyword parameters passed on to :py:meth:`pyxu.abc.Solver.__init__`.
1828
1829
1830    The *Proximal-point (PP)* method can be used to solve problems of the form:
1831
1832    .. math::
1833
1834       {\min_{\mathbf{x}\in\mathbb{R}^N} \;\mathcal{G}(\mathbf{x}),}
1835
1836    where :math:`\mathcal{G}:\mathbb{R}^N\rightarrow \mathbb{R}\cup\{+\infty\}` is *proper*, *lower semicontinuous* and
1837    *convex function* with *simple proximal operator*.
1838
1839    Remarks
1840    -------
1841    * The problem is *feasible*, i.e. there exists at least one solution.
1842
1843    * The *Proximal-point* algorithm can be obtained by choosing :math:`\mathcal{F}=0` and :math:`\mathcal{H}=0` in
1844      :py:class:`~pyxu.opt.solver.CondatVu` or :py:class:`~pyxu.opt.solver.PD3O`.  The original version of the algorithm
1845      was introduced without relaxation (:math:`\rho=1`) [PP]_.  Relaxed versions have been proposed afterwards [PSA]_.
1846
1847    Parameters (``fit()``)
1848    ----------------------
1849    * **x0** (:py:attr:`~pyxu.info.ptype.NDArray`)
1850      --
1851      (..., N) initial point(s) for the primal variable.
1852    * **tau** (:py:attr:`~pyxu.info.ptype.Real`, :py:obj:`None`)
1853      --
1854      Primal step size.
1855    * **rho** (:py:attr:`~pyxu.info.ptype.Real`, :py:obj:`None`)
1856      --
1857      Momentum parameter.
1858    * **\*\*kwargs** (:py:class:`~collections.abc.Mapping`)
1859      --
1860      Other keyword parameters passed on to :py:meth:`pyxu.abc.Solver.fit`.
1861
1862    See Also
1863    --------
1864    :py:class:`~pyxu.opt.solver.PP`,
1865    :py:class:`~pyxu.opt.solver.CondatVu`,
1866    :py:class:`~pyxu.opt.solver.PD3O`,
1867    :py:class:`~pyxu.opt.solver.PGD`,
1868    :py:func:`~pyxu.opt.solver.ChambollePock`,
1869    :py:func:`~pyxu.opt.solver.DouglasRachford`
1870    """
1871    kwargs.update(log_var=kwargs.get("log_var", ("x",)))
1872    obj = base(
1873        f=None,
1874        g=g,
1875        h=None,
1876        K=None,
1877        **kwargs,
1878    )
1879
1880    obj.__repr__ = lambda _: "ProximalPoint"
1881    return obj

1882
1883
1884PP = ProximalPoint  #: Alias of :py:func:`~pyxu.opt.solver.ProximalPoint`.