Jupyter Snippet CB2nd 01_slow

Jupyter Snippet CB2nd 01_slow

5.1. Knowing Python to write faster code

import random
l = [random.normalvariate(0,1) for i in range(100000)]
def sum1():
    # BAD: not Pythonic and slow
    res = 0
    for i in range(len(l)):
        res = res + l[i]
    return res
sum1()
319.346
%timeit sum1()
6.64 ms ± 69.1 µs per loop (mean ± std. dev. of 7 runs,
    100 loops each)
def sum2():
    # STILL BAD
    res = 0
    for x in l:
        res = res + x
    return res
sum2()
319.346
%timeit sum2()
3.3 ms ± 54.7 µs per loop (mean ± std. dev. of 7 runs,
    100 loops each)
def sum3():
    # GOOD
    return sum(l)
sum3()
319.346
%timeit sum3()
391 µs ± 840 ns per loop (mean ± std. dev. of 7 runs,
    1000 loops each)
strings = ['%.3f' % x for x in l]
strings[:3]
['-0.056', '-0.417', '-0.357']
def concat1():
    # BAD: not Pythonic
    cat = strings[0]
    for s in strings[1:]:
        cat = cat + ', ' + s
    return cat
concat1()[:24]
'-0.056, -0.417, -0.357, '
%timeit concat1()
1.31 s ± 12.1 ms per loop (mean ± std. dev. of 7 runs,
    1 loop each)
def concat2():
    # GOOD
    return ', '.join(strings)
concat2()[:24]
'-0.056, -0.417, -0.357, '
%timeit concat2()
797 µs ± 13.7 µs per loop (mean ± std. dev. of 7 runs,
    1000 loops each)
l = [random.randint(0, 100) for _ in range(100000)]
def hist1():
    # BAD
    count = {}
    for x in l:
        # We need to initialize every number
        # the first time it appears in the list.
        if x not in count:
            count[x] = 0
        count[x] += 1
    return count
hist1()
{0: 979,
 1: 971,
 2: 990,
 ...
 99: 995,
 100: 1009}
%timeit hist1()
8.7 ms ± 27.6 µs per loop (mean ± std. dev. of 7 runs,
    100 loops each)
from collections import defaultdict
def hist2():
    # BETTER
    count = defaultdict(int)
    for x in l:
        # The key is created and the value
        # initialized at 0 when needed.
        count[x] += 1
    return count
hist2()
defaultdict(int,
            {0: 979,
             1: 971,
             ...
             99: 995,
             100: 1009})
%timeit hist2()
6.82 ms ± 217 µs per loop (mean ± std. dev. of 7 runs,
    100 loops each)
from collections import Counter
def hist3():
    # GOOD
    return Counter(l)
hist3()
Counter({0: 979,
         1: 971,
         ...
         99: 995,
         100: 1009})
%timeit hist3()
3.69 ms ± 105 µs per loop (mean ± std. dev. of 7 runs,
    100 loops each)