#! /usr/bin/env python3
#
# generate Zipf distribution
#
# method taken from:
# Luc Devroye,
# "Non-Uniform Random Variate Generation"
# p. 550-551.
# Springer 1986
#
# the method works for an infinite bound, the finite bound condition has been
# added.
a = 1.1
N = 1000000
M = 1
import sys
if len(sys.argv) >= 3:
a = float(sys.argv[1])
N = int(sys.argv[2])
if len(sys.argv) >= 4:
M = int(sys.argv[3])
# beware, a close to 1 and n small (eg 100) leads to large number of iterations
# i.e. rejection probability is high when a -> 1
# - 1.001: 280
# - 1.002: 139.2
# - 1.005: 55.9
# - 1.010: 28.4
# - 1.020: 14.8
# - 1.050: 6.2
# - 1.100: 3.5
# however if n is larger the number of iterations decreases significantly
from random import random
from math import exp
def zipfgen(a, N):
assert a > 1.0, "a must be greater than 1"
b = 2.0 ** (a - 1.0)
i = 0 # count iterations
while True:
i += 1
u, v = random(), random()
try:
x = int(u ** (- 1.0 / (a - 1.0)))
t = (1.0 + 1.0 / x) ** (a - 1.0)
# reject if too large or out of bound
if v * x * (t - 1.0) / (b - 1.0) <= t / b and x <= N:
break
except OverflowError: # on u ** ...
pass
return (x, i)
if M == 1:
x, i = zipfgen(a, N)
print("X = %d (%d)" % (x, i))
else:
c = [0 for i in range(0, N)]
cost = 0
for i in range(0, M):
x, i = zipfgen(a, N)
# assert 1 <= x and x <= N, "x = %d" % x
cost += i
c[x-1] += 1
print("c = %s (%f iterations per draw)" % (c, cost/M))