weighted random in python

edderkop · March 1, 2018, 10:36am

here is a piece of code i created to generate loot from a loot table where every object has a weight that decides how often it gets picked.


import random

def genLootFromTable(count=2,table=None):
        if table:
            tmp = []
            out = []
            
            for item in table:
               itm = table[item]
               t = [ item ] * itm
               tmp += t
            
            for c in range(count):
                out += [random.choice(tmp)]
            
            return out

the loot table used is a dictionary like this


loottable = {"stone":200,"stick":200,"Apple":50,"Super Rare Ring":1,"Silver Armor":10}

working example


import random

def genLootFromTable(count=2,table=None):
        if table:
            tmp = []
            out = []
            
            for item in table:
               itm = table[item]
               t = [ item ] * itm
               tmp += t
            
            for c in range(count):
                out += [random.choice(tmp)]
            
            return out

#--- testing start -----

loottable = {"stone":200,"stick":200,"Apple":50,"Super Rare Ring":1,"Silver Armor":10}

out = {}
loot = genLootFromTable(1000,loottable)

for item in loot:
    if item in out:
        out[item] += 1
    else:
        out[item] = 1
          
print(out)

#--- testing end -----

here the output shows the distribution over 1000 iterations

sample output: {‘stick’: 416, ‘stone’: 438, ‘Apple’: 124, ‘Super Rare Ring’: 2, ‘Silver Armor’: 20}

edderkop · March 1, 2018, 10:52am

most common use is this


lootcount = 3 
loot = genLootFromTable(lootcount,loottable)

#output
print(loot)

sample output : [‘stick’, ‘stone’, ‘stone’]

agoose77 · March 4, 2018, 7:11am

Cool!
A heads up, Python3.6 now includes support for this in the random module with random.choices

edderkop · March 4, 2018, 7:58am

yes i know, but i wanted it to be useful in older versions of blender to

edderkop · March 4, 2018, 9:40am

here is a version that works almost the same as the one in python 3.6, but this one works in older versions of python (i have tested it under python 2.7, 3.5 and 3.6)


from random import random,choice
from bisect import bisect
import operator

def accumulate(iterable, func=operator.add):
    it = iter(iterable)
    try:
        total = next(it)
    except StopIteration:
        return
    yield total
    for element in it:
        total = func(total, element)
        yield total
        
def weightedRandom(items, weights=None, count=1):

        if weights is None:
            total = len(items)
            return [items[int(random() * total)] for i in range(count)]
        
        accumulated_weights = list(accumulate(weights))

        if len(accumulated_weights) != len(items):
            raise ValueError('The number of weights does not match the items')
        
        total = accumulated_weights[-1]
        return [items[bisect(accumulated_weights, random() * total)] for i in range(count)]

def genLootFromTable(count=2,table=None):
    out = [None]
    if table:
        out = weightedRandom(list(table.keys()),list(table.values()),count)
        
    return out
    
#--- testing start -----

loottable = {"stone":50,"stick":50,"Apple":50,"Super Rare Ring":1,"Silver Armor":10}

loot = genLootFromTable(3,loottable)
        
print(loot)

wkk · March 6, 2018, 6:33am


test = dict(
    a = 100,
    b = 5,
    c = 1,
)

import random

def weighted_choice(data):
    total = sum(data.values())
    x = random.random() * total

    current = 0
    for item, weight in data.items():
        current += weight
        if x &lt;= current:
            return item

    return item

I feel like this would be slightly easier ?

I tested using this:


stats = dict()
for i in range(1000000):
    item = weighted_choice(test)
    stats.setdefault(item, 0)
    stats[item] += 1

print(stats, sum(stats.values()))

# RESULTS:
# {'a': 943419, 'b': 47160, 'c': 9421} 1000000

edderkop · March 6, 2018, 11:38am

wkk:


test = dict(
    a = 100,
    b = 5,
    c = 1,
)

import random

def weighted_choice(data):
    total = sum(data.values())
    x = random.random() * total

    current = 0
    for item, weight in data.items():
        current += weight
        if x &lt;= current:
            return item

    return item

I feel like this would be slightly easier ?

I tested using this:


stats = dict()
for i in range(1000000):
    item = weighted_choice(test)
    stats.setdefault(item, 0)
    stats[item] += 1

print(stats, sum(stats.values()))

# RESULTS:
# {'a': 943419, 'b': 47160, 'c': 9421} 1000000

this is some nice code and it works as intended, so i took a good look at it and made a few test and i discovered that it is not that fast ,the code i posted in #5 is almost twice as fast

here is some test results. (i used the timeit module for this)

python3 test.py
{‘stick’: 36, ‘Silver Armor’: 7, ‘stone’: 28, ‘Apple’: 29} 100
0.00014356199972098693
{‘stone’: 30, ‘stick’: 33, ‘Apple’: 31, ‘Silver Armor’: 4, ‘Super Rare Ring’: 2} 100
0.0002355879987590015

python3 test.py
{‘stone’: 36, ‘stick’: 34, ‘Apple’: 27, ‘Silver Armor’: 3} 100
0.00012723799591185525
{‘Apple’: 32, ‘stone’: 36, ‘stick’: 25, ‘Silver Armor’: 6, ‘Super Rare Ring’: 1} 100
0.0002105270032188855

python3 test.py
{‘stick’: 310379, ‘Apple’: 310919, ‘stone’: 310433, ‘Silver Armor’: 62124, ‘Super Rare Ring’: 6145} 1000000
0.7358932580027613
{‘Silver Armor’: 62359, ‘Apple’: 311052, ‘stone’: 310701, ‘stick’: 309805, ‘Super Rare Ring’: 6083} 1000000
1.64791057700495

python3 test.py
{‘Apple’: 310332, ‘stone’: 310754, ‘stick’: 310318, ‘Silver Armor’: 62356, ‘Super Rare Ring’: 6240} 1000000
0.7834161549981218
{‘Apple’: 311073, ‘stone’: 310514, ‘stick’: 310050, ‘Silver Armor’: 62150, ‘Super Rare Ring’: 6213} 1000000
1.6628896110050846

wkk · March 6, 2018, 2:34pm

Indeed, although it depends on what grounds you measured…

At first I timed my version against yours comparing multiple executions: mine is 4x faster.
But indeed, once we generate multiple items using your argument ‘count’ yours is faster (thanks to bisect)

Yet I felt like yours was kinda ugly x)

Also, doing list(table.values()) and list(table.keys()) makes me worry about the fact that there is no garranty that the relation key/value is kept between the indexes of the two lists.

What I mean is that dicts ensure a correspondance between a key and a value. But using the .keys() and .values() might break it.

So I tried my best to fix it:


def fast_weighted_choices(data, count = 1):

    # ensuring a correct correspondance between key/value
    items = [item for item in data]
    values = [data[item] for item in items]

    # accumulating the weights for bisect
    current = 0
    accumulated = []
    for value in values:
        current += value
        accumulated.append(current)

    # returning a random item using bisect
    total = accumulated[-1]
    return [items[bisect(accumulated, total * random())]
        for _ in range(count)]

This is also very slightly faster than your version. Although at the end of the day, its the same idea behind.

Gist with my tests:

gist.github.com

https://gist.github.com/marechal-p/457b378abcb939c2a8b5db6b0ba35bc9

result_on_wkk_pc.txt

one, many time
edderkop 0.46010318212813817
wkk fast 0.4306279644637152
wkk 0.1075775262068176

many, just once
edderkop 0.054590656858598985
wkk fast 0.0520779658673689
wkk 0.1094092187746678

weighted_choice_tests.py

from random import random, choice
from bisect import bisect
import operator

def accumulate(iterable, func=operator.add):
    it = iter(iterable)
    try:
        total = next(it)
    except StopIteration:
        return

This file has been truncated. show original

Also, returning [None] looks weird to me, why not returning an empty list ?


def genLootFromTable(count=2, table=None):
    out = []
    if table:
        items = [item for item in table]
        values = [table[item] for item in items]
        out = weightedRandom(items, values, count)
    return out

edderkop · March 7, 2018, 1:02am

yes its kind of ugly but that’s not whats important and the None in there is a leftover from something i was trying so just ignore it.

and about the 2 list thing ,there is a good reason for this and if you look closely you can see it do a different thing based on if you omit the
weighted list or not.

edderkop · March 7, 2018, 1:42am

i have update my code do to some good feedback from WKnight02

here is the current version


def choices(items, weights=None, count=1):

        if weights is None:
            total = len(items)
            return [items[int(random() * total)] for i in range(count)]
        
        # accumulating the weights for bisect
        current = 0
        accumulated_weights = []
        for value in weights:
            current += value
            accumulated_weights.append(current)

        if len(accumulated_weights) != len(items):
            raise ValueError('The number of weights does not match the items')
        
        total = accumulated_weights[-1]
        return [items[bisect(accumulated_weights, random() * total)] for i in range(count)]

usage:


items = ["stick","stone","pot"]
weights = [5,5,1]
out =  choices(items,weights,count=5)

# -or- if weights is omitted  
# this is basically the same as " out = [random.choice(items) for i in range(count) ] "

out =  choices(items,count=5)