Group lasso with overlapΒΆ

Comparison of solvers for a least squares with overlapping group lasso regularization.

../_images/sphx_glr_plot_overlapping_group_lasso_001.png

Out:

0.401250471401398
beta = 0
beta = 0.001
beta = 0.01
Achieved relative tolerance at iteration 4850
beta = 0.1
Achieved relative tolerance at iteration 675
Achieved relative tolerance at iteration 531

import numpy as np
from scipy.sparse import linalg as splinalg
from sklearn import datasets
import pylab as plt
import copt as cp

np.random.seed(0)

# .. generate some data ..

n_samples, n_features = 100, 100
groups = [np.arange(8 * i, 8 * i + 10) for i in range(n_features // 8)]


ground_truth = np.zeros(n_features)
ground_truth[groups[4]] = 1
ground_truth[groups[5]] = 0.5


A = np.random.randn(n_samples, n_features)
p = 0.5
for i in range(1, n_features):
    A[:, i] = p * A[:, i] + (1 - p) * A[:, i-1]
A[:, 0] /= np.sqrt(1 - p ** 2)
sigma = 1.
b = A.dot(ground_truth) + sigma * np.random.randn(n_samples)


# .. compute the step-size ..
max_iter = 5000
s = splinalg.svds(A, k=1, return_singular_vectors=False)[0]
step_size = 1. / cp.utils.get_lipschitz(A, 'square')
print(step_size)
f = cp.utils.SquareLoss(A, b)

# .. run the solver for different values ..
# .. of the regularization parameter beta ..
all_betas = [0, 1e-3, 1e-2, 1e-1]
all_trace_ls, all_trace_nols, all_trace_pdhg_nols, all_trace_pdhg = [], [], [], []
all_trace_ls_time, all_trace_nols_time, all_trace_pdhg_nols_time, all_trace_pdhg_time = [], [], [], []
out_img = []
for i, beta in enumerate(all_betas):
    print('beta = %s' % beta)
    groups1, groups2 = groups[::2], groups[1::2].copy()
    G1 = cp.utils.GroupL1(beta, groups1)
    G2 = cp.utils.GroupL1(beta, groups2)

    def loss(x):
        return f(x) + G1(x) + G2(x)

    cb_tosls = cp.utils.Trace()
    x0 = np.zeros(n_features)
    cb_tosls(x0)
    tos_ls = cp.minimize_TOS(
        f.func_grad, x0, G1.prox, G2.prox, step_size=3 * step_size,
        max_iter=max_iter, tol=1e-14, verbose=1,
        callback=cb_tosls)
    trace_ls = np.array([loss(x) for x in cb_tosls.trace_x])
    all_trace_ls.append(trace_ls)
    all_trace_ls_time.append(cb_tosls.trace_time)

    cb_tos = cp.utils.Trace()
    x0 = np.zeros(n_features)
    cb_tos(x0)
    tos = cp.minimize_TOS(
        f.func_grad, x0, G1.prox, G2.prox,
        step_size=step_size,
        max_iter=max_iter, tol=1e-14, verbose=1,
        line_search=True, callback=cb_tos, h_Lipschitz=beta)
    trace_nols = np.array([loss(x) for x in cb_tos.trace_x])
    all_trace_nols.append(trace_nols)
    all_trace_nols_time.append(cb_tos.trace_time)
    out_img.append(tos.x)

    cb_pdhg = cp.utils.Trace()
    x0 = np.zeros(n_features)
    cb_pdhg(x0)
    pdhg = cp.gradient.minimize_PDHG(
        f.func_grad, x0, G1.prox, G2.prox,
        callback=cb_pdhg, max_iter=max_iter,
        step_size=step_size,
        step_size2=(1. / step_size) / 2, tol=0, line_search=False)
    trace_pdhg = np.array([loss(x) for x in cb_pdhg.trace_x])
    all_trace_pdhg.append(trace_pdhg)
    all_trace_pdhg_time.append(cb_pdhg.trace_time)

    cb_pdhg_nols = cp.utils.Trace()
    x0 = np.zeros(n_features)
    cb_pdhg_nols(x0)
    pdhg_nols = cp.gradient.minimize_PDHG(
        f.func_grad, x0, G1.prox, G2.prox,
        callback=cb_pdhg_nols, max_iter=max_iter,
        step_size=step_size,
        step_size2=(1. / step_size) / 2, tol=0, line_search=False)
    trace_pdhg_nols = np.array([loss(x) for x in cb_pdhg_nols.trace_x])
    all_trace_pdhg_nols.append(trace_pdhg_nols)
    all_trace_pdhg_nols_time.append(cb_pdhg_nols.trace_time)
    #

# .. plot the results ..
fig, ax = plt.subplots(2, 4, sharey=False)
xlim = [0.02, 0.02, 0.1]
for i, beta in enumerate(all_betas):
    # ax[0, i].set_title(r'$\lambda=%s$' % beta)
    # ax[0, i].set_title(r'$\lambda=%s$' % beta)
    # ax[0, i].plot(out_img[i])
    # ax[0, i].plot(ground_truth)
    # ax[0, i].set_xticks(())
    # ax[0, i].set_yticks(())

    fmin = min(np.min(all_trace_ls[i]), np.min(all_trace_nols[i]))
    scale = 1. # all_trace_ls[i][0] - fmin
    plot_tos, = ax[1, i].plot(
        (all_trace_ls[i] - fmin) / scale,
        lw=4, marker='o', markevery=100,
        markersize=10)

    plot_nols, = ax[1, i].plot(
        (all_trace_nols[i] - fmin) / scale,
        lw=4, marker='h', markevery=100,
        markersize=10)

    plot_pdhg, = ax[1, i].plot(
        (all_trace_pdhg[i] - fmin) / scale,
        lw=4, marker='^', markevery=100,
        markersize=10)

    plot_pdhg_nols, = ax[1, i].plot(
        (all_trace_pdhg_nols[i] - fmin) / scale,
        lw=4, marker='d', markevery=100,
        markersize=10)

    ax[1, i].set_xlabel('Iterations')
    ax[1, i].set_yscale('log')
    ax[1, i].set_ylim((1e-14, None))
    ax[1, i].grid(True)


plt.gcf().subplots_adjust(bottom=0.15)
plt.figlegend(
    (plot_tos, plot_nols, plot_pdhg, plot_pdhg_nols),
    ('TOS with line search', 'TOS without line search', 'PDHG LS', 'PDHG no LS'), ncol=5,
    scatterpoints=1,
    loc=(-0.00, -0.0), frameon=False,
    bbox_to_anchor=[0.05, 0.01])

ax[1, 0].set_ylabel('Objective minus optimum')
plt.show()

Total running time of the script: ( 8 minutes 40.017 seconds)

Gallery generated by Sphinx-Gallery