Source code for salamanca.ineq

import math

import numpy as np

from scipy.stats import norm, lognorm
from scipy.special import erf, erfinv

from salamanca.utils import AttrObject

# empirical limits with gini = 0.99
MAX_THEIL = 6.64


[docs]def gini_to_std(g):
    r"""Returns :math:`\sigma = 2 * erf^{-1}(g)`"""
    return 2.0 * erfinv(g)


[docs]def std_to_gini(s):
    r"""Returns :math:`g = erf( \frac{\sigma}{2} )`"""
    return erf(0.5 * s)


[docs]def theil_to_std(t):
    r"""Returns :math:`\sigma = \sqrt{2t}`"""
    return (2 * t) ** 0.5


[docs]def std_to_theil(s):
    r"""Returns :math:`t = \frac{\sigma^2}{2}`"""
    return s ** 2 / 2.0


def _theil_empirical_constants():
    """
    These quadractic constants were established by Narasimha to translate between
    Thiel values calculated from deciles to those calculated from Ginis
    """
    return 0.216, 0.991, 0.003


[docs]def gini_to_theil(g, empirical=False):
    r"""Translate gini to theil

    .. math::

       t(g) = \sqrt{2} \Phi^{-1} \left( \frac{1 + g}{2} \right)

    Where :math:`\Phi` is cumulative distribution function (CDF) of the standard
    normal distribution.

    Parameters
    ----------
    g : numeric or array-like
        gini coefficient(s)
    empirical : bool, optional, default: False
        whether to use empirical relationship for theil

    """
    if not (np.all(g > 0) and np.all(g < 1)):
        raise ValueError('Gini not within (0, 1)')

    s = gini_to_std(g)
    t = std_to_theil(s)
    if empirical:
        # quadratic method root finding
        a, b, c = _theil_empirical_constants()
        t = (-b + (b ** 2 - 4 * a * (c - t)) ** 0.5) / (2 * a)

    if not (np.all(t < MAX_THEIL) and np.all(t > 0)):
        raise ValueError('Theil not within (0, 2.88): {}'.format(t))
    return t


[docs]def theil_to_gini(t, empirical=False):
    r"""Translate theil to gini

    .. math::

       t(g) = \sqrt{2} \Phi^{-1} \left( \frac{1 + g}{2} \right)

    Where :math:`\Phi` is cumulative distribution function (CDF) of the standard
    normal distribution.

    Parameters
    ----------
    t : numeric or array-like
        theil coefficient(s)
    empirical : bool, optional, default: False
        whether to use empirical relationship for theil
    """
    if not (np.all(t < MAX_THEIL) and np.all(t > 0)):
        raise ValueError('Theil not within (0, 2.88): {}'.format(t))

    if empirical:
        a, b, c = _theil_empirical_constants()
        t = a * t ** 2 + b * t + c
    s = theil_to_std(t)
    g = std_to_gini(s)

    if not (np.all(g > 0) and np.all(g < 1)):
        raise ValueError('Gini not within (0, 1)')
    return g


[docs]class LogNormalData(AttrObject):
    """Object for storing and updating LogNormal distribution data"""

    def _has(self, x):
        return getattr(self, x, None) is None

    def add_defaults(self, copy=True):
        # add new defaults for kwargs here
        defaults = {
            'inc': 1,
            'mean': True,
            'gini': None,
            'theil': None,
        }
        return self.update(copy=copy, override=False, **defaults)

    def check(self):
        x = ('theil', 'gini')
        bad = all(self._has(_) for _ in x)
        if bad:
            raise ValueError('Must use either theil or gini')
        bad = all(not self._has(_) for _ in x)
        if bad:
            raise ValueError('Cannot use both theil and gini')

        x = ('inc', 'mean')
        for _ in x:
            if self._has(_):
                raise ValueError('Must declare value for ' + _)
        return self


[docs]class LogNormal(object):
    """
    .. _lognorm-docs:

    An interfrace to the log-normal distribution.

    For mathematical descriptions see ScipyLogNorm_.

    Parameters
    ----------
    inc  : numeric, optional, default: 1
    mean : bool, optional, default: True
        whether income is representative of mean (True) or median (False)
    gini : numeric, optional
    theil : numeric, optional
    """

    def __init__(self, **kwargs):
        """See lognorm-docs_ for possible arguments"""
        self.init_data = LogNormalData(**kwargs)

[docs]    def params(self, **kwargs):
        """Returns (shape, scale) tuple for use in scipy.stats.lognorm"""
        data = (
            self.init_data
            .update(**kwargs)
            .add_defaults(copy=True)
            .check()
        )

        shape = gini_to_std(data.gini) if data.theil is None \
            else theil_to_std(data.theil)
        # scale assumes a median value, adjust is made if income is mean income
        scale = np.exp(np.log(data.inc) - shape ** 2 / 2) if data.mean \
            else data.inc
        return shape, scale

[docs]    def pdf(self, x, **kwargs):
        """See ScipyLogNorm_"""
        shape, scale = self.params(**kwargs)
        return lognorm.pdf(x, shape, scale=scale)

[docs]    def ppf(self, x, **kwargs):
        """See ScipyLogNorm_"""
        shape, scale = self.params(**kwargs)
        return lognorm.ppf(x, shape, scale=scale)

[docs]    def cdf(self, x, **kwargs):
        """See ScipyLogNorm_"""
        shape, scale = self.params(**kwargs)
        return lognorm.cdf(x, shape, scale=scale)

[docs]    def below_threshold(self, threshold, **kwargs):
        """Returns the fraction of population below a given income threshold (equivalent
        to `cdf`).

        Parameters
        ----------
        threshold : numeric
            income threshold
        """
        return self.cdf(threshold, **kwargs)

[docs]    def lorenz(self, x, **kwargs):
        r"""The Lorenz curve for log-normal distributions is defined as:

        .. math::

           L(x) = \Phi \left( \Phi^{-1} (x) - \sigma \right)

        Where :math:`\Phi` is cumulative distribution function (CDF) of the standard
        normal distribution.
        """
        shape, scale = self.params(**kwargs)
        return norm.cdf(norm.ppf(x) - shape)

[docs]    def mean(self, **kwargs):
        """See ScipyLogNorm_"""
        shape, scale = self.params(**kwargs)
        return lognorm.mean(shape, scale=scale)

[docs]    def median(self, **kwargs):
        """See ScipyLogNorm_"""
        shape, scale = self.params(**kwargs)
        return lognorm.median(shape, scale=scale)

[docs]    def var(self, **kwargs):
        """See ScipyLogNorm_"""
        shape, scale = self.params(**kwargs)
        return lognorm.var(shape, scale=scale)

[docs]    def std(self, **kwargs):
        """See ScipyLogNorm_"""
        shape, scale = self.params(**kwargs)
        return lognorm.std(shape, scale=scale)