Jupyter Snippet CB2nd 01_slow
Jupyter Snippet CB2nd 01_slow
5.1. Knowing Python to write faster code
import random
l = [random.normalvariate(0,1) for i in range(100000)]
def sum1():
# BAD: not Pythonic and slow
res = 0
for i in range(len(l)):
res = res + l[i]
return res
sum1()
319.346
%timeit sum1()
6.64 ms ± 69.1 µs per loop (mean ± std. dev. of 7 runs,
100 loops each)
def sum2():
# STILL BAD
res = 0
for x in l:
res = res + x
return res
sum2()
319.346
%timeit sum2()
3.3 ms ± 54.7 µs per loop (mean ± std. dev. of 7 runs,
100 loops each)
def sum3():
# GOOD
return sum(l)
sum3()
319.346
%timeit sum3()
391 µs ± 840 ns per loop (mean ± std. dev. of 7 runs,
1000 loops each)
strings = ['%.3f' % x for x in l]
strings[:3]
['-0.056', '-0.417', '-0.357']
def concat1():
# BAD: not Pythonic
cat = strings[0]
for s in strings[1:]:
cat = cat + ', ' + s
return cat
concat1()[:24]
'-0.056, -0.417, -0.357, '
%timeit concat1()
1.31 s ± 12.1 ms per loop (mean ± std. dev. of 7 runs,
1 loop each)
def concat2():
# GOOD
return ', '.join(strings)
concat2()[:24]
'-0.056, -0.417, -0.357, '
%timeit concat2()
797 µs ± 13.7 µs per loop (mean ± std. dev. of 7 runs,
1000 loops each)
l = [random.randint(0, 100) for _ in range(100000)]
def hist1():
# BAD
count = {}
for x in l:
# We need to initialize every number
# the first time it appears in the list.
if x not in count:
count[x] = 0
count[x] += 1
return count
hist1()
{0: 979,
1: 971,
2: 990,
...
99: 995,
100: 1009}
%timeit hist1()
8.7 ms ± 27.6 µs per loop (mean ± std. dev. of 7 runs,
100 loops each)
from collections import defaultdict
def hist2():
# BETTER
count = defaultdict(int)
for x in l:
# The key is created and the value
# initialized at 0 when needed.
count[x] += 1
return count
hist2()
defaultdict(int,
{0: 979,
1: 971,
...
99: 995,
100: 1009})
%timeit hist2()
6.82 ms ± 217 µs per loop (mean ± std. dev. of 7 runs,
100 loops each)
from collections import Counter
def hist3():
# GOOD
return Counter(l)
hist3()
Counter({0: 979,
1: 971,
...
99: 995,
100: 1009})
%timeit hist3()
3.69 ms ± 105 µs per loop (mean ± std. dev. of 7 runs,
100 loops each)