#! /usr/bin/env python3
#
# generate Zipf distribution
#
# method taken from:
#   Luc Devroye,
#  "Non-Uniform Random Variate Generation"
#  p. 550-551.
#  Springer 1986
#
# the method works for an infinite bound, the finite bound condition has been
# added.

a = 1.1
N = 1000000
M = 1

import sys
if len(sys.argv) >= 3:
    a = float(sys.argv[1])
    N = int(sys.argv[2])
if len(sys.argv) >= 4:
    M = int(sys.argv[3])

# beware, a close to 1 and n small (eg 100) leads to large number of iterations
# i.e. rejection probability is high when a -> 1
# - 1.001: 280
# - 1.002: 139.2
# - 1.005:  55.9
# - 1.010:  28.4
# - 1.020:  14.8
# - 1.050:   6.2
# - 1.100:   3.5
# however if n is larger the number of iterations decreases significantly

from random import random
from math import exp

def zipfgen(a, N):
    assert a > 1.0, "a must be greater than 1"
    b = 2.0 ** (a - 1.0)
    i = 0 # count iterations
    while True:
        i += 1
        u, v = random(), random()
        try:
            x = int(u ** (- 1.0 / (a - 1.0)))
            t = (1.0 + 1.0 / x) ** (a - 1.0)
            # reject if too large or out of bound
            if v * x * (t - 1.0) / (b - 1.0) <= t / b and x <= N:
                break
        except OverflowError: # on u ** ...
            pass
    return (x, i)

if M == 1:
    x, i = zipfgen(a, N)
    print("X = %d (%d)" % (x, i))
else:
    c = [0 for i in range(0, N)]
    cost = 0
    for i in range(0, M):
        x, i = zipfgen(a, N)
        # assert 1 <= x and x <= N, "x = %d" % x
        cost += i
        c[x-1] += 1
    print("c = %s (%f iterations per draw)" % (c, cost/M))
