# meu_framework.py
# Copyright (C) 2006, 2007 Martin Jansche
#
# Permission is hereby granted, free of charge, to any person obtaining
# a copy of this software and associated documentation files (the
# "Software"), to deal in the Software without restriction, including
# without limitation the rights to use, copy, modify, merge, publish,
# distribute, distribute with modifications, sublicense, and/or sell
# copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be
# included in all copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
# IN NO EVENT SHALL THE ABOVE COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
# DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
# OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR
# THE USE OR OTHER DEALINGS IN THE SOFTWARE.
#
# Except as contained in this notice, the name(s) of the above copyright
# holders shall not be used in advertising or otherwise to promote the
# sale, use or other dealings in this Software without prior written
# authorization.

"""
A proof-of-concept implementation of the algorithms in the paper
"A Maximum Expected Utility Framework for Binary Sequence Labeling"
(ACL 2007).
"""

from __future__ import division
from itertools import izip
from math import exp, log, floor, ceil

import double
import select


######################################################################
##
##  Decoding algorithms
##

# auxiliary method for exact decoding
def exact_expectF(z, p, beta=1):
    assert len(z) == len(p)
    g = F_Score(z, beta)
    return evaluate_Eg(g, p)

# auxiliary method for inexact decoding
def inexact_expectF(z, p, beta=1, L=lambda n: 2*n+1):
    assert len(z) == len(p)
    g = F_Score(z, beta)
    limit = max(1, L(len(z)))
    E_approx,E_max,error,Q = evaluate_inexact_Eg(g, p, limit)
    return E_approx

#
# This is Algorithm 1 in the paper:
#
def max_EF_decode(p, expectF=exact_expectF):
    I = list(enumerate(p))
    I.sort(lambda a,b: cmp(b[1], a[1]))
    z = [0 for _ in p]
    a = -1  # the paper uses 1-based indexing, but Python is 0-based
    v = expectF(z, p)
    for j in xrange(len(p)):
        i,pi = I[j]
        assert p[i] == pi
        assert z[i] == 0
        z[i] = 1         # switch on the i-th bit
        u = expectF(z, p)
        ##print j, z, u
        if u > v:
            a = j
            v = u
    ##print a
    for zi in z:
        assert zi == 1
    for j in xrange(a+1, len(p)):
        i,pi = I[j]
        z[i] = 0         # switch off the i-th bit
    return (z, v)


######################################################################
##
##  Simple utility/loss functions
##

#
# This is Algorithm 2 in the paper:
#
class F_Score:
    def __init__(self, z, beta=1):
        self.z = z
        self.beta = beta
        return

    def start(self):
        (A,T) = (0,0)
        assert self._state_invariant(A, T)
        return (A,T)

    def transition(self, (A,T), i, yi):
        assert self._state_invariant(A, T)
        if yi == 1:
            T += 1
            if self.z[i] == 1:
                A += 1
        assert self._state_invariant(A, T)
        return (A,T)

    def a(self, (A,T)):
        assert self._state_invariant(A, T)
        F = fdiv((self.beta + 1) * A,
                 dot(self.z, self.z) + self.beta * T)
        assert 0 <= F and F <= 1
        return F

    def _state_invariant(self, A, T):
        """The invariant of the index/state set K."""
        return (0 <= A
                and A <= T
                and T <= len(self.z)
                and A <= dot(self.z, self.z))

#
# Hamming loss, discussed in Section 3.5 of the paper:
#
class Hamming_Loss:
    def __init__(self, z, threshold=None):
        self.z = z
        if threshold is None:
          self.threshold = len(z)
        else:
          self.threshold = threshold
        assert self.threshold >= 1
        return

    def start(self):
        return 0

    def transition(self, k, i, yi):
        assert k <= self.threshold
        if self.z[i] != yi and k + 1 <= self.threshold:
            return k + 1
        else:
            return k

    def a(self, k):
        assert k <= self.threshold
        return k

#
# Zero-one loss, a special case of Hamming loss counting up to a
# threshold of one.
#
def ZeroOne_Loss(z):
    return Hamming_Loss(z, threshold=1)

#
# P_k loss, also mentioned in Section 3.5 of the paper:
#
class Pk_Loss:
    def __init__(self, z, k):
        assert len(z) >= k
        self.z = z
        self.k = k
        return

    def start(self):
        return (0, ('#',) * self.k)

    def transition(self, (mf,window), i, yi):
        if i >= self.k:
            if 1 in self.z[i-self.k:i]:
                if 1 not in window:
                    mf += 1
            elif 1 in window:
                mf += 1
        window = window[1:] + (yi,)
        return (mf, window)

    def a(self, (mf,window)):
        return fdiv(mf, len(self.z) - self.k)


######################################################################
##
##  Evaluating a function and its expectation at a given point
##

#
# This is Algorithm 3 in the paper:
#
def evaluate_g(g, y):
    k = g.start()
    for i,yi in enumerate(y):
        k = g.transition(k, i, yi)
    return g.a(k)

#
# This is Algorithm 4 in the paper:
#
def evaluate_Eg(g, p):
    # NB: This method is general and works with all loss/utility
    # functions that fit into the present framework.
    n = len(p)
    M = {}
    M[g.start()] = 1
    for i in xrange(n):
        N = {}
        for (k,P) in M.iteritems():
            # transition on yi = 0
            k0 = g.transition(k, i, 0)
            if k0 not in N:
                N[k0] = 0
            N[k0] += P * (1 - p[i])
            # transition on yi = 1
            k1 = g.transition(k, i, 1)
            if k1 not in N:
                N[k1] = 0
            N[k1] += P * p[i]
        M = N

    E = 0
    for (k,P) in M.iteritems():
        E += g.a(k) * P
    return E

#
# This is the inexact method discussed in Section 4:
#
def evaluate_inexact_Eg(g, p, lim):
    # NB: This method is partially specialized to the F-score.  It can
    # be used with all loss/utility functions that fit into the
    # present framework, but error estimates are only available for
    # the expected F-score.
    assert lim >= 1

    def limit(M_items):
        m = len(M_items) - int(lim)
        if m > 0:
            select.select(m, M_items, lambda (k1,P1),(k2,P2): cmp(P1,P2))
            J = M_items[m:]
            assert len(J) == int(lim)
        else:
            J = M_items
        assert 1 <= len(J) and len(J) <= lim
        return J

    n = len(p)
    Q = 0  # number of expanded states
    M = {}
    M[g.start()] = 1
    for i in xrange(n):
        assert len(M) <= 2 * lim
        N = {}
        for (k,P) in limit(M.items()):
            Q += 1
            # transition on yi = 0
            k0 = g.transition(k, i, 0)
            if k0 not in N:
                N[k0] = 0
            N[k0] += P * (1 - p[i])
            # transition on yi = 1
            k1 = g.transition(k, i, 1)
            if k1 not in N:
                N[k1] = 0
            N[k1] += P * p[i]
        M = N

    T = 0  # total probability mass covered by the expanded states
    E = 0
    for (k,P) in limit(M.items()):
        Q += 1
        T += P
        E += g.a(k) * P

    if isinstance(g, F_Score):  # HACK!
        # For the F-score, we can estimate the error of the inexact
        # computation as follows.  In the worst case, the probability
        # mass 1-T of unseen states corresponds to a single state with
        # coefficient 1 (all coefficients a_k must lie in the interval
        # [0;1]).  This provides a loose upper bound E_max on the
        # exact expectation.
        E_max = min(1, E + max(0, 1 - T))
        error = abs(E_max - E)
    else:
        E_max = None
        error = None
    # Check that the number of states is within the specified limit:
    assert Q <= (n + 1) * lim
    return E, E_max, error, Q


######################################################################
##
##  Auxiliary functions
##

def fdiv(x, y):
    """Nonstandard floating point division, differing from IEEE
    semantics in that 0/0 == 1.  Needless to say this is inconsistent,
    but makes sense when computing the F score, where 0/0 can arise as
    a limiting case."""
    if x == 0 and y == 0:
        if double.signbit(x) == double.signbit(y):
            return 1.0
        else:
            return -1.0
    else:
        return double.fdiv(x, y)


def dot(xs, ys):
    """Generic dot product in arbitrary coordinate space."""
    d = 0
    for x,y in izip(xs, ys):
        d += x * y
    return d

## eof