Source code for pccg.CL

import torch
import numpy as np
from scipy import optimize


[docs]
def contrastive_learning(log_q_noise, log_q_data,
                         basis_noise, basis_data,
                         options = {'disp': True,
                                    'gtol': 1e-5}
):

    """
    Contrastive learning coefficients

    Parameters
    ----------
    log_q_noise: 1-dimensional tensor
        the logrithm of probability density for noise data under the noise distribution
    log_q_data: 1-dimensional tensor
        the logrithm of probability density for target data under the noise distribution
    basis_noise: 2-dimensional tensor
        the design matrix contraining basis values of noise data for compute the logrithm of probablity density for the target distribution
    basis_data: 2-dimensional tensor
        the design matrix contraining basis values of target data for compute the logrithm of probablity density for the target distribution

    Returns
    -------
    alpha: 
    
    """
    
    assert(basis_noise.shape[-1] == basis_data.shape[-1])
    assert(len(log_q_noise) == basis_noise.shape[0])
    assert(len(log_q_data) == basis_data.shape[0])

    basis_size = basis_noise.shape[-1]
    alphas = torch.zeros(basis_size, dtype=torch.float64)
    F = torch.zeros(1, dtype=torch.float64)

    x_init = np.concatenate([alphas.data.numpy(), F])
    
    def compute_loss_and_grad(x):
        alphas = torch.tensor(x[0:basis_size], requires_grad = True)
        F = torch.tensor(x[-1], requires_grad = True)

        u_data = torch.matmul(basis_data, alphas)
        u_noise = torch.matmul(basis_noise, alphas)

        num_samples_p = basis_data.shape[0]
        num_samples_q = basis_noise.shape[0]

        nu = F.new_tensor([num_samples_q / num_samples_p])
        
        log_p_data = - (u_data - F) - torch.log(nu)
        log_p_noise = - (u_noise - F) - torch.log(nu)

        log_q = torch.cat([log_q_noise, log_q_data])
        log_p = torch.cat([log_p_noise, log_p_data])

        logit = log_p - log_q
        target = torch.cat([torch.zeros_like(log_q_noise),
                            torch.ones_like(log_q_data)])
        loss = torch.nn.functional.binary_cross_entropy_with_logits(
            logit, target)               
        loss.backward()
        
        grad = torch.cat([alphas.grad, F.grad[None]]).numpy()

        return loss.item(), grad

    loss, grad = compute_loss_and_grad(x_init)
    
    
    results = optimize.minimize(compute_loss_and_grad,
                                x_init,
                                jac=True,
                                method='L-BFGS-B',
                                options = options)
    x = results['x']
    
    # x, f, d = optimize.fmin_l_bfgs_b(compute_loss_and_grad,
    #                                  x_init,
    #                                  iprint = 1,
    #                                  pgtol = 1e-6,
    #                                  factr = 100)

    alphas = x[0:basis_size]
    F = x[-1]

    return torch.from_numpy(alphas), F


def contrastive_learning_numpy(log_q_noise, log_q_data,
                               basis_noise, basis_data):

    """
    Contrastive learning coefficients

    Parameters
    ----------
    log_q_noise: 1-dimensional array
        the logrithm of probability density for noise data under the noise distribution
    log_q_data: 1-dimensional array
        the logrithm of probability density for target data under the noise distribution
    basis_noise: 2-dimensional array
        the design matrix contraining basis values of noise data for compute the logrithm of probablity density for the target distribution
    basis_data: 2-dimensional array
        the design matrix contraining basis values of target data for compute the logrithm of probablity density for the target distribution

    Returns
    -------
    alpha: 
    
    """
    
    assert(basis_noise.shape[-1] == basis_data.shape[-1])
    assert(len(log_q_noise) == basis_noise.shape[0])
    assert(len(log_q_data) == basis_data.shape[0])

    basis_size = basis_noise.shape[-1]
    alphas = np.zeros(basis_size)
    F = np.zeros(1)
    
    x_init = np.concatenate([alphas, F])

    log_q = np.concatenate([log_q_noise, log_q_data])
    y = np.concatenate([np.zeros_like(log_q_noise),
                        np.ones_like(log_q_data)])

    basis = np.concatenate([basis_noise, basis_data])

    num_samples_p = basis_data.shape[0]
    num_samples_q = basis_noise.shape[0]

    log_nu = np.log(num_samples_q / float(num_samples_p))
    
    def compute_loss_and_grad(x):
        alphas = x[0:basis_size]
        F = x[-1]

        ## compute loss = -(y*h - np.log(1 + np.exp(h)))
        h = -(np.matmul(basis, alphas) - F) - log_q - log_nu
        loss = -(y*h - (np.maximum(h, 0) + np.log(1 + np.exp(-np.abs(h)))))
        loss = np.mean(loss, 0)

        ## compute gradients
        p = 1. / (1 + np.exp(-np.abs(h)))
        p[h < 0] = 1 - p[h < 0]

        grad_alphas = np.matmul(basis.T, y - p) / y.shape[0]
        grad_F = -np.mean(y - p, keepdims = True)
        
        grad = np.concatenate([grad_alphas, grad_F])

        return loss, grad

    loss, grad = compute_loss_and_grad(x_init)
    x, f, d = optimize.fmin_l_bfgs_b(compute_loss_and_grad,
                                     x_init,
                                     iprint = 1)

    alphas = x[0:basis_size]
    F = x[-1]

    return alphas, F