Jupyter Snippet NP ch18-code-listing
Jupyter Snippet NP ch18-code-listing
Chapter 18: Code listing
Robert Johansson
Source code listings for Numerical Python - Scientific Computing and Data Science Applications with Numpy, SciPy and Matplotlib (ISBN 978-1-484242-45-2).
Imports
from __future__ import print_function
import numpy as np
np.random.seed(0)
import pandas as pd
import csv
import json
import h5py
import tables
import pickle
# import cPickle
import msgpack
CSV
%%writefile playerstats-2013-2014.csv
# 2013-2014 / Regular Season / All Skaters / Summary / Points
Rank,Player,Team,Pos,GP,G,A,P,+/-,PIM,PPG,PPP,SHG,SHP,GW,OT,S,S%,TOI/GP,Shift/GP,FO%
1,Sidney Crosby,PIT,C,80,36,68,104,+18,46,11,38,0,0,5,1,259,13.9,21:58,24.0,52.5
2,Ryan Getzlaf,ANA,C,77,31,56,87,+28,31,5,23,0,0,7,1,204,15.2,21:17,25.2,49.0
3,Claude Giroux,PHI,C,82,28,58,86,+7,46,7,37,0,0,7,1,223,12.6,20:26,25.1,52.9
4,Tyler Seguin,DAL,C,80,37,47,84,+16,18,11,25,0,0,8,0,294,12.6,19:20,23.4,41.5
5,Corey Perry,ANA,R,81,43,39,82,+32,65,8,18,0,0,9,1,280,15.4,19:28,23.2,36.0
Overwriting playerstats-2013-2014.csv
%%writefile playerstats-2013-2014-top30.csv
# 2013-2014 / Regular Season / All Skaters / Summary / Points
Rank,Player,Team,Pos,GP,G,A,P,+/-,PIM,PPG,PPP,SHG,SHP,GW,OT,S,S%,TOI/GP,Shift/GP,FO%
1,Sidney Crosby,PIT,C,80,36,68,104,+18,46,11,38,0,0,5,1,259,13.9,21:58,24.0,52.5
2,Ryan Getzlaf,ANA,C,77,31,56,87,+28,31,5,23,0,0,7,1,204,15.2,21:17,25.2,49.0
3,Claude Giroux,PHI,C,82,28,58,86,+7,46,7,37,0,0,7,1,223,12.6,20:26,25.1,52.9
4,Tyler Seguin,DAL,C,80,37,47,84,+16,18,11,25,0,0,8,0,294,12.6,19:20,23.4,41.5
5,Corey Perry,ANA,R,81,43,39,82,+32,65,8,18,0,0,9,1,280,15.4,19:28,23.2,36.0
6,Phil Kessel,TOR,R,82,37,43,80,-5,27,8,20,0,0,6,0,305,12.1,20:39,24.5,14.3
7,Taylor Hall,EDM,L,75,27,53,80,-15,44,7,17,0,1,1,1,250,10.8,20:00,25.4,45.7
8,Alex Ovechkin,WSH,L,78,51,28,79,-35,48,24,39,0,1,10,3,386,13.2,20:32,21.8,66.7
9,Joe Pavelski,SJS,C,82,41,38,79,+23,32,16,31,1,2,3,0,225,18.2,19:51,27.1,56.0
10,Jamie Benn,DAL,L,81,34,45,79,+21,64,5,19,1,3,3,1,279,12.2,19:09,25.0,52.8
11,Nicklas Backstrom,WSH,C,82,18,61,79,-20,54,6,44,1,1,1,0,196,9.2,19:48,23.3,50.4
12,Patrick Sharp,CHI,L,82,34,44,78,+13,40,10,25,0,0,3,1,313,10.9,18:53,22.7,54.6
13,Joe Thornton,SJS,C,82,11,65,76,+20,32,2,19,0,1,3,1,122,9.0,18:55,26.3,56.1
14,Erik Karlsson,OTT,D,82,20,54,74,-15,36,5,31,0,0,1,0,257,7.8,27:04,28.6,0.0
15,Evgeni Malkin,PIT,C,60,23,49,72,+10,62,7,30,0,0,3,0,191,12.0,20:03,21.4,48.8
16,Patrick Marleau,SJS,L,82,33,37,70,+0,18,11,23,2,2,4,0,285,11.6,20:31,27.3,52.9
17,Anze Kopitar,LAK,C,82,29,41,70,+34,24,10,23,0,0,9,2,200,14.5,20:53,25.4,53.3
18,Matt Duchene,COL,C,71,23,47,70,+8,19,5,17,0,0,6,1,217,10.6,18:29,22.0,50.3
19,Martin St. Louis,"TBL, NYR",R,81,30,39,69,+13,10,9,21,1,2,5,1,204,14.7,20:56,25.7,40.7
20,Patrick Kane,CHI,R,69,29,40,69,+7,22,10,25,0,0,6,0,227,12.8,19:36,22.9,50.0
21,Blake Wheeler,WPG,R,82,28,41,69,+4,63,8,19,0,0,4,2,225,12.4,18:41,24.0,37.5
22,Kyle Okposo,NYI,R,71,27,42,69,-9,51,5,15,0,0,4,1,195,13.8,20:26,22.2,47.5
23,David Krejci,BOS,C,80,19,50,69,+39,28,3,19,0,0,6,1,169,11.2,19:07,21.3,51.2
24,Chris Kunitz,PIT,L,78,35,33,68,+25,66,13,22,0,0,8,0,218,16.1,19:09,22.2,75.0
25,Jonathan Toews,CHI,C,76,28,40,68,+26,34,5,15,3,5,5,0,193,14.5,20:28,25.9,57.2
26,Thomas Vanek,"BUF, NYI, MTL",L,78,27,41,68,+7,46,8,18,0,0,4,0,248,10.9,19:21,21.6,43.5
27,Jaromir Jagr,NJD,R,82,24,43,67,+16,46,5,17,0,0,6,1,231,10.4,19:09,22.8,0.0
28,John Tavares,NYI,C,59,24,42,66,-6,40,8,25,0,0,4,0,188,12.8,21:14,22.3,49.1
29,Jason Spezza,OTT,C,75,23,43,66,-26,46,9,22,0,0,5,0,223,10.3,18:12,23.8,54.0
30,Jordan Eberle,EDM,R,80,28,37,65,-11,18,7,20,1,1,4,1,200,14.0,19:32,25.4,38.1
Overwriting playerstats-2013-2014-top30.csv
!head -n 5 playerstats-2013-2014-top30.csv
# 2013-2014 / Regular Season / All Skaters / Summary / Points
Rank,Player,Team,Pos,GP,G,A,P,+/-,PIM,PPG,PPP,SHG,SHP,GW,OT,S,S%,TOI/GP,Shift/GP,FO%
1,Sidney Crosby,PIT,C,80,36,68,104,+18,46,11,38,0,0,5,1,259,13.9,21:58,24.0,52.5
2,Ryan Getzlaf,ANA,C,77,31,56,87,+28,31,5,23,0,0,7,1,204,15.2,21:17,25.2,49.0
3,Claude Giroux,PHI,C,82,28,58,86,+7,46,7,37,0,0,7,1,223,12.6,20:26,25.1,52.9
rows = []
with open("playerstats-2013-2014.csv") as f:
csvreader = csv.reader(f)
rows = [fields for fields in csvreader]
rows[1][1:6]
['Player', 'Team', 'Pos', 'GP', 'G']
rows[2][1:6]
['Sidney Crosby', 'PIT', 'C', '80', '36']
data = np.random.randn(100, 3)
np.savetxt("data.csv", data, delimiter=",", header="x, y, z", comments="# Random x, y, z coordinates\n")
!head -n 5 data.csv
# Random x, y, z coordinates
x, y, z
1.764052345967664026e+00,4.001572083672232938e-01,9.787379841057392005e-01
2.240893199201457797e+00,1.867557990149967484e+00,-9.772778798764110153e-01
9.500884175255893682e-01,-1.513572082976978872e-01,-1.032188517935578448e-01
data_load = np.loadtxt("data.csv", skiprows=2, delimiter=",")
data_load[1,:]
array([ 2.2408932 , 1.86755799, -0.97727788])
data_load.dtype
dtype('float64')
(data == data_load).all()
True
data = np.loadtxt("playerstats-2013-2014.csv", skiprows=2, delimiter=",", dtype=bytes)
data[0][1:6]
array([b'Sidney Crosby', b'PIT', b'C', b'80', b'36'], dtype='|S13')
np.loadtxt("playerstats-2013-2014.csv", skiprows=2, delimiter=",", usecols=[6,7,8])
array([[ 68., 104., 18.],
[ 56., 87., 28.],
[ 58., 86., 7.],
[ 47., 84., 16.],
[ 39., 82., 32.]])
df = pd.read_csv("playerstats-2013-2014.csv", skiprows=1)
df = df.set_index("Rank")
df[["Player", "GP", "G", "A", "P"]]
.dataframe tbody tr th {
vertical-align: top;
}
.dataframe thead th {
text-align: right;
}
df.info()
<class 'pandas.core.frame.DataFrame'>
Int64Index: 5 entries, 1 to 5
Data columns (total 20 columns):
Player 5 non-null object
Team 5 non-null object
Pos 5 non-null object
GP 5 non-null int64
G 5 non-null int64
A 5 non-null int64
P 5 non-null int64
+/- 5 non-null int64
PIM 5 non-null int64
PPG 5 non-null int64
PPP 5 non-null int64
SHG 5 non-null int64
SHP 5 non-null int64
GW 5 non-null int64
OT 5 non-null int64
S 5 non-null int64
S% 5 non-null float64
TOI/GP 5 non-null object
Shift/GP 5 non-null float64
FO% 5 non-null float64
dtypes: float64(3), int64(13), object(4)
memory usage: 840.0+ bytes
df[["Player", "GP", "G", "A", "P"]].to_csv("playerstats-2013-2014-subset.csv")
!head -n 5 playerstats-2013-2014-subset.csv
Rank,Player,GP,G,A,P
1,Sidney Crosby,80,36,68,104
2,Ryan Getzlaf,77,31,56,87
3,Claude Giroux,82,28,58,86
4,Tyler Seguin,80,37,47,84
HDF5
h5py
import h5py
# mode = "w", "r", "w-", "r+", "a"
f = h5py.File("data.h5", "w")
f.mode
'r+'
f.flush()
f.close()
f = h5py.File("data.h5", "w")
f.name
'/'
grp1 = f.create_group("experiment1")
grp1.name
'/experiment1'
grp2_meas = f.create_group("experiment2/measurement")
grp2_meas.name
'/experiment2/measurement'
grp2_sim = f.create_group("experiment2/simulation")
grp2_sim.name
'/experiment2/simulation'
f["/experiment1"]
<HDF5 group "/experiment1" (0 members)>
f["/experiment2/simulation"]
<HDF5 group "/experiment2/simulation" (0 members)>
grp_expr2 = f["/experiment2"]
grp_expr2['simulation']
<HDF5 group "/experiment2/simulation" (0 members)>
list(f.keys())
['experiment1', 'experiment2']
list(f.items())
[('experiment1', <HDF5 group "/experiment1" (0 members)>),
('experiment2', <HDF5 group "/experiment2" (2 members)>)]
f.visit(lambda x: print(x))
experiment1
experiment2
experiment2/measurement
experiment2/simulation
f.visititems(lambda name, value: print(name, value))
experiment1 <HDF5 group "/experiment1" (0 members)>
experiment2 <HDF5 group "/experiment2" (2 members)>
experiment2/measurement <HDF5 group "/experiment2/measurement" (0 members)>
experiment2/simulation <HDF5 group "/experiment2/simulation" (0 members)>
"experiment1" in f
True
"simulation" in f["experiment2"]
True
"experiment3" in f
False
f.flush()
!h5ls -r data.h5
/ Group
/experiment1 Group
/experiment2 Group
/experiment2/measurement Group
/experiment2/simulation Group
data1 = np.arange(10)
data2 = np.random.randn(100, 100)
f["array1"] = data1
f["/experiment2/measurement/meas1"] = data2
f.visititems(lambda name, value: print(name, value))
array1 <HDF5 dataset "array1": shape (10,), type "<i8">
experiment1 <HDF5 group "/experiment1" (0 members)>
experiment2 <HDF5 group "/experiment2" (2 members)>
experiment2/measurement <HDF5 group "/experiment2/measurement" (1 members)>
experiment2/measurement/meas1 <HDF5 dataset "meas1": shape (100, 100), type "<f8">
experiment2/simulation <HDF5 group "/experiment2/simulation" (0 members)>
ds = f["array1"]
ds
<HDF5 dataset "array1": shape (10,), type "<i8">
ds.name
'/array1'
ds.dtype
dtype('int64')
ds.shape
(10,)
ds.len()
10
ds.value
/Users/rob/miniconda3/envs/py3.6/lib/python3.6/site-packages/h5py/_hl/dataset.py:313: H5pyDeprecationWarning: dataset.value has been deprecated. Use dataset[()] instead.
"Use dataset[()] instead.", H5pyDeprecationWarning)
array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])
ds = f["/experiment2/measurement/meas1"]
ds
<HDF5 dataset "meas1": shape (100, 100), type "<f8">
ds.dtype
dtype('<f8')
ds.shape
(100, 100)
data_full = ds[...]
type(data_full)
numpy.ndarray
data_full.shape
(100, 100)
data_col = ds[:, 0]
data_col.shape
(100,)
ds[10:20:3, 10:20:3]
array([[ 0.60270766, -0.34804638, -0.813596 , -1.29737966],
[ 0.91320192, -1.06343294, 0.22734595, 0.52759738],
[ 1.25774422, -0.32775492, 1.4849256 , 0.28005786],
[-0.84907287, -0.30000358, 1.79691852, -0.19871506]])
ds[[1,2,3], :].shape
(3, 100)
ds[[1,2,3], :].shape
(3, 100)
mask = ds[:, 0] > 2.0
mask.shape, mask.dtype
((100,), dtype('bool'))
ds[mask, 0]
array([2.04253623, 2.1041854 , 2.05689385])
ds[mask, :5]
array([[ 2.04253623, -0.91946118, 0.11467003, -0.1374237 , 1.36552692],
[ 2.1041854 , 0.22725706, -1.1291663 , -0.28133197, -0.7394167 ],
[ 2.05689385, 0.18041971, -0.06670925, -0.02835398, 0.48480475]])
# create empty data sets, assign and update datasets
ds = f.create_dataset("array2", data=np.random.randint(10, size=10))
ds
<HDF5 dataset "array2": shape (10,), type "<i8">
ds.value
array([0, 2, 2, 4, 7, 3, 7, 2, 4, 1])
ds = f.create_dataset("/experiment2/simulation/data1", shape=(5, 5), fillvalue=-1)
ds
<HDF5 dataset "data1": shape (5, 5), type "<f4">
ds.value
array([[-1., -1., -1., -1., -1.],
[-1., -1., -1., -1., -1.],
[-1., -1., -1., -1., -1.],
[-1., -1., -1., -1., -1.],
[-1., -1., -1., -1., -1.]], dtype=float32)
ds = f.create_dataset("/experiment1/simulation/data1", shape=(5000, 5000, 5000),
fillvalue=0, compression='gzip')
ds
<HDF5 dataset "data1": shape (5000, 5000, 5000), type "<f4">
ds[:, 0, 0] = np.random.rand(5000)
ds[1, :, 0] += np.random.rand(5000)
ds[:2, :5, 0]
array([[0.6939344 , 0. , 0. , 0. , 0. ],
[1.4819994 , 0.01639538, 0.54387355, 0.11130908, 0.9928771 ]],
dtype=float32)
ds.fillvalue
0.0
f["experiment1"].visititems(lambda name, value: print(name, value))
simulation <HDF5 group "/experiment1/simulation" (1 members)>
simulation/data1 <HDF5 dataset "data1": shape (5000, 5000, 5000), type "<f4">
float(np.prod(ds.shape) * ds[0,0,0].nbytes) / (1024**3) # Gb
465.66128730773926
f.flush()
f.filename
'data.h5'
!ls -lh data.h5
-rw-r--r-- 1 rob staff 357K May 6 16:11 data.h5
del f["/experiment1/simulation/data1"]
f["experiment1"].visititems(lambda name, value: print(name, value))
simulation <HDF5 group "/experiment1/simulation" (0 members)>
f.close()
# attributes
f = h5py.File("data.h5")
f.attrs
<Attributes of HDF5 object at 4768620880>
f.attrs["desc"] = "Result sets from experiments and simulations"
f["experiment1"].attrs["date"] = "2015-1-1"
f["experiment2"].attrs["date"] = "2015-1-2"
f["experiment2/simulation/data1"].attrs["k"] = 1.5
f["experiment2/simulation/data1"].attrs["T"] = 1000
list(f["experiment1"].attrs.keys())
['date']
list(f["experiment2/simulation/data1"].attrs.items())
[('T', 1000), ('k', 1.5)]
"T" in f["experiment2/simulation/data1"].attrs
True
del f["experiment2/simulation/data1"].attrs["T"]
"T" in f["experiment2/simulation/data1"].attrs
False
f["experiment2/simulation/data1"].attrs["t"] = np.array([1, 2, 3])
f["experiment2/simulation/data1"].attrs["t"]
array([1, 2, 3])
f.close()
pytables
df = pd.read_csv("playerstats-2013-2014-top30.csv", skiprows=1)
df = df.set_index("Rank")
df[["Player", "Pos", "GP", "P", "G", "A", "S%", "Shift/GP"]].head(5)
.dataframe tbody tr th {
vertical-align: top;
}
.dataframe thead th {
text-align: right;
}
f = tables.open_file("playerstats-2013-2014.h5", mode="w")
grp = f.create_group("/", "season_2013_2014", title="NHL player statistics for the 2013/2014 season")
grp
/season_2013_2014 (Group) 'NHL player statistics for the 2013/2014 season'
children := []
f.root
/ (RootGroup) ''
children := ['season_2013_2014' (Group)]
class PlayerStat(tables.IsDescription):
player = tables.StringCol(20, dflt="")
position = tables.StringCol(1, dflt="C")
games_played = tables.UInt8Col(dflt=0)
points = tables.UInt16Col(dflt=0)
goals = tables.UInt16Col(dflt=0)
assists = tables.UInt16Col(dflt=0)
shooting_percentage = tables.Float64Col(dflt=0.0)
shifts_per_game_played = tables.Float64Col(dflt=0.0)
top30_table = f.create_table(grp, 'top30', PlayerStat, "Top 30 point leaders")
playerstat = top30_table.row
type(playerstat)
tables.tableextension.Row
for index, row_series in df.iterrows():
playerstat["player"] = row_series["Player"]
playerstat["position"] = row_series["Pos"]
playerstat["games_played"] = row_series["GP"]
playerstat["points"] = row_series["P"]
playerstat["goals"] = row_series["G"]
playerstat["assists"] = row_series["A"]
playerstat["shooting_percentage"] = row_series["S%"]
playerstat["shifts_per_game_played"] = row_series["Shift/GP"]
playerstat.append()
top30_table.flush()
top30_table.cols.player[:5]
array([b'Sidney Crosby', b'Ryan Getzlaf', b'Claude Giroux',
b'Tyler Seguin', b'Corey Perry'], dtype='|S20')
top30_table.cols.points[:5]
array([104, 87, 86, 84, 82], dtype=uint16)
def print_playerstat(row):
print("%20s\t%s\t%s\t%s" %
(row["player"].decode('UTF-8'), row["points"], row["goals"], row["assists"]))
for row in top30_table.iterrows():
print_playerstat(row)
Sidney Crosby 104 36 68
Ryan Getzlaf 87 31 56
Claude Giroux 86 28 58
Tyler Seguin 84 37 47
Corey Perry 82 43 39
Phil Kessel 80 37 43
Taylor Hall 80 27 53
Alex Ovechkin 79 51 28
Joe Pavelski 79 41 38
Jamie Benn 79 34 45
Nicklas Backstrom 79 18 61
Patrick Sharp 78 34 44
Joe Thornton 76 11 65
Erik Karlsson 74 20 54
Evgeni Malkin 72 23 49
Patrick Marleau 70 33 37
Anze Kopitar 70 29 41
Matt Duchene 70 23 47
Martin St. Louis 69 30 39
Patrick Kane 69 29 40
Blake Wheeler 69 28 41
Kyle Okposo 69 27 42
David Krejci 69 19 50
Chris Kunitz 68 35 33
Jonathan Toews 68 28 40
Thomas Vanek 68 27 41
Jaromir Jagr 67 24 43
John Tavares 66 24 42
Jason Spezza 66 23 43
Jordan Eberle 65 28 37
for row in top30_table.where("(points > 75) & (points <= 80)"):
print_playerstat(row)
Phil Kessel 80 37 43
Taylor Hall 80 27 53
Alex Ovechkin 79 51 28
Joe Pavelski 79 41 38
Jamie Benn 79 34 45
Nicklas Backstrom 79 18 61
Patrick Sharp 78 34 44
Joe Thornton 76 11 65
for row in top30_table.where("(goals > 40) & (points < 80)"):
print_playerstat(row)
Alex Ovechkin 79 51 28
Joe Pavelski 79 41 38
f
File(filename=playerstats-2013-2014.h5, title='', mode='w', root_uep='/', filters=Filters(complevel=0, shuffle=False, bitshuffle=False, fletcher32=False, least_significant_digit=None))
/ (RootGroup) ''
/season_2013_2014 (Group) 'NHL player statistics for the 2013/2014 season'
/season_2013_2014/top30 (Table(30,)) 'Top 30 point leaders'
description := {
"assists": UInt16Col(shape=(), dflt=0, pos=0),
"games_played": UInt8Col(shape=(), dflt=0, pos=1),
"goals": UInt16Col(shape=(), dflt=0, pos=2),
"player": StringCol(itemsize=20, shape=(), dflt=b'', pos=3),
"points": UInt16Col(shape=(), dflt=0, pos=4),
"position": StringCol(itemsize=1, shape=(), dflt=b'C', pos=5),
"shifts_per_game_played": Float64Col(shape=(), dflt=0.0, pos=6),
"shooting_percentage": Float64Col(shape=(), dflt=0.0, pos=7)}
byteorder := 'little'
chunkshape := (1489,)
f.flush()
f.close()
!h5ls -rv playerstats-2013-2014.h5
Opened "playerstats-2013-2014.h5" with sec2 driver.
/ Group
Attribute: CLASS scalar
Type: 5-byte null-terminated UTF-8 string
Data: "GROUP"
Attribute: PYTABLES_FORMAT_VERSION scalar
Type: 3-byte null-terminated UTF-8 string
Data: "2.1"
Attribute: TITLE null
Type: 1-byte null-terminated UTF-8 string
Attribute: VERSION scalar
Type: 3-byte null-terminated UTF-8 string
Data: "1.0"
Location: 1:96
Links: 1
/season_2013_2014 Group
Attribute: CLASS scalar
Type: 5-byte null-terminated UTF-8 string
Data: "GROUP"
Attribute: TITLE scalar
Type: 46-byte null-terminated UTF-8 string
Data: "NHL player statistics for the 2013/2014 season"
Attribute: VERSION scalar
Type: 3-byte null-terminated UTF-8 string
Data: "1.0"
Location: 1:1024
Links: 1
/season_2013_2014/top30 Dataset {30/Inf}
Attribute: CLASS scalar
Type: 5-byte null-terminated UTF-8 string
Data: "TABLE"
Attribute: FIELD_0_FILL scalar
Type: native unsigned short
Data: 0
Attribute: FIELD_0_NAME scalar
Type: 7-byte null-terminated UTF-8 string
Data: "assists"
Attribute: FIELD_1_FILL scalar
Type: native unsigned char
Data: 0
Attribute: FIELD_1_NAME scalar
Type: 12-byte null-terminated UTF-8 string
Data: "games_played"
Attribute: FIELD_2_FILL scalar
Type: native unsigned short
Data: 0
Attribute: FIELD_2_NAME scalar
Type: 5-byte null-terminated UTF-8 string
Data: "goals"
Attribute: FIELD_3_FILL scalar
Type: 1-byte null-terminated ASCII string
Data: ""
Attribute: FIELD_3_NAME scalar
Type: 6-byte null-terminated UTF-8 string
Data: "player"
Attribute: FIELD_4_FILL scalar
Type: native unsigned short
Data: 0
Attribute: FIELD_4_NAME scalar
Type: 6-byte null-terminated UTF-8 string
Data: "points"
Attribute: FIELD_5_FILL scalar
Type: 1-byte null-terminated ASCII string
Data: "C"
Attribute: FIELD_5_NAME scalar
Type: 8-byte null-terminated UTF-8 string
Data: "position"
Attribute: FIELD_6_FILL scalar
Type: native double
Data: 0
Attribute: FIELD_6_NAME scalar
Type: 22-byte null-terminated UTF-8 string
Data: "shifts_per_game_played"
Attribute: FIELD_7_FILL scalar
Type: native double
Data: 0
Attribute: FIELD_7_NAME scalar
Type: 19-byte null-terminated UTF-8 string
Data: "shooting_percentage"
Attribute: NROWS scalar
Type: native long
Data: 30
Attribute: TITLE scalar
Type: 20-byte null-terminated UTF-8 string
Data: "Top 30 point leaders"
Attribute: VERSION scalar
Type: 3-byte null-terminated UTF-8 string
Data: "2.7"
Location: 1:2264
Links: 1
Chunks: {1489} 65516 bytes
Storage: 1320 logical bytes, 65516 allocated bytes, 2.01% utilization
Type: struct {
"assists" +0 native unsigned short
"games_played" +2 native unsigned char
"goals" +3 native unsigned short
"player" +5 20-byte null-terminated ASCII string
"points" +25 native unsigned short
"position" +27 1-byte null-terminated ASCII string
"shifts_per_game_played" +28 native double
"shooting_percentage" +36 native double
} 44 bytes
H5tools-DIAG: Error detected in HDF5:tools (1.8.14) thread 0:
#000: h5tools_dump.c line 1843 in h5tools_dump_mem(): H5Sis_simple failed
major: Failure in tools library
minor: error in function
Pandas hdfstore
import pandas as pd
store = pd.HDFStore('store.h5')
df = pd.DataFrame(np.random.rand(5,5))
store["df1"] = df
df = pd.read_csv("playerstats-2013-2014-top30.csv", skiprows=1)
store["df2"] = df
store.keys()
['/df1', '/df2']
'df2' in store
True
df = store["df1"]
store.root
/ (RootGroup) ''
children := ['df1' (Group), 'df2' (Group)]
store.close()
f = h5py.File("store.h5")
f.visititems(lambda x, y: print(x, "\t" * int(3 - len(str(x))//8), y))
df1 <HDF5 group "/df1" (4 members)>
df1/axis0 <HDF5 dataset "axis0": shape (5,), type "<i8">
df1/axis1 <HDF5 dataset "axis1": shape (5,), type "<i8">
df1/block0_items <HDF5 dataset "block0_items": shape (5,), type "<i8">
df1/block0_values <HDF5 dataset "block0_values": shape (5, 5), type "<f8">
df2 <HDF5 group "/df2" (8 members)>
df2/axis0 <HDF5 dataset "axis0": shape (21,), type "|S8">
df2/axis1 <HDF5 dataset "axis1": shape (30,), type "<i8">
df2/block0_items <HDF5 dataset "block0_items": shape (3,), type "|S8">
df2/block0_values <HDF5 dataset "block0_values": shape (30, 3), type "<f8">
df2/block1_items <HDF5 dataset "block1_items": shape (14,), type "|S4">
df2/block1_values <HDF5 dataset "block1_values": shape (30, 14), type "<i8">
df2/block2_items <HDF5 dataset "block2_items": shape (4,), type "|S6">
df2/block2_values <HDF5 dataset "block2_values": shape (1,), type "|O">
f["/df2/block0_items"].value
array([b'S%', b'Shift/GP', b'FO%'], dtype='|S8')
f["/df2/block0_values"][:3]
array([[13.9, 24. , 52.5],
[15.2, 25.2, 49. ],
[12.6, 25.1, 52.9]])
f["/df2/block1_items"].value
array([b'Rank', b'GP', b'G', b'A', b'P', b'+/-', b'PIM', b'PPG', b'PPP',
b'SHG', b'SHP', b'GW', b'OT', b'S'], dtype='|S4')
f["/df2/block1_values"][:3, :5]
array([[ 1, 80, 36, 68, 104],
[ 2, 77, 31, 56, 87],
[ 3, 82, 28, 58, 86]])
JSON
data = ["string", 1.0, 2, None]
data_json = json.dumps(data)
data_json
'["string", 1.0, 2, null]'
data2 = json.loads(data_json)
data
['string', 1.0, 2, None]
data[0]
'string'
data = {"one": 1, "two": 2.0, "three": "three"}
data_json = json.dumps(data)
print(data_json)
{"one": 1, "two": 2.0, "three": "three"}
data = json.loads(data_json)
data["two"]
2.0
data["three"]
'three'
data = {"one": [1],
"two": [1, 2],
"three": [1, 2, 3]}
data_json = json.dumps(data, indent=True)
print(data_json)
{
"one": [
1
],
"two": [
1,
2
],
"three": [
1,
2,
3
]
}
data = {"one": [1],
"two": {"one": 1, "two": 2},
"three": [(1,), (1, 2), (1, 2, 3)],
"four": "a text string"}
with open("data.json", "w") as f:
json.dump(data, f)
!cat data.json
{"one": [1], "two": {"one": 1, "two": 2}, "three": [[1], [1, 2], [1, 2, 3]], "four": "a text string"}
with open("data.json", "r") as f:
data_from_file = json.load(f)
data_from_file["two"]
{'one': 1, 'two': 2}
data_from_file["three"]
[[1], [1, 2], [1, 2, 3]]
!head -n 20 tokyo-metro.json
{
"C": {
"color": "#149848",
"transfers": [
[
"C3",
"F15"
],
[
"C4",
"Z2"
],
[
"C4",
"G2"
],
[
"C7",
"M14"
],
!wc tokyo-metro.json
1471 1508 27638 tokyo-metro.json
with open("tokyo-metro.json", "r") as f:
data = json.load(f)
data.keys()
dict_keys(['C', 'G', 'F', 'H', 'M', 'N', 'T', 'Y', 'Z'])
data["C"].keys()
dict_keys(['color', 'transfers', 'travel_times'])
data["C"]["color"]
'#149848'
data["C"]["transfers"]
[['C3', 'F15'],
['C4', 'Z2'],
['C4', 'G2'],
['C7', 'M14'],
['C7', 'N6'],
['C7', 'G6'],
['C8', 'M15'],
['C8', 'H6'],
['C9', 'H7'],
['C9', 'Y18'],
['C11', 'T9'],
['C11', 'M18'],
['C11', 'Z8'],
['C12', 'M19'],
['C18', 'H21']]
[(s, e, tt) for s, e, tt in data["C"]["travel_times"] if tt == 1]
[('C3', 'C4', 1), ('C7', 'C8', 1), ('C9', 'C10', 1)]
data
{'C': {'color': '#149848',
'transfers': [['C3', 'F15'],
['C4', 'Z2'],
['C4', 'G2'],
['C7', 'M14'],
['C7', 'N6'],
['C7', 'G6'],
['C8', 'M15'],
['C8', 'H6'],
['C9', 'H7'],
['C9', 'Y18'],
['C11', 'T9'],
['C11', 'M18'],
['C11', 'Z8'],
['C12', 'M19'],
['C18', 'H21']],
'travel_times': [['C1', 'C2', 2],
['C2', 'C3', 2],
['C3', 'C4', 1],
['C4', 'C5', 2],
['C5', 'C6', 2],
['C6', 'C7', 2],
['C7', 'C8', 1],
['C8', 'C9', 3],
['C9', 'C10', 1],
['C10', 'C11', 2],
['C11', 'C12', 2],
['C12', 'C13', 2],
['C13', 'C14', 2],
['C14', 'C15', 2],
['C15', 'C16', 2],
['C16', 'C17', 3],
['C17', 'C18', 3],
['C18', 'C19', 3]]},
'G': {'color': '#f59230',
'transfers': [['G1', 'Z1'],
['G1', 'F16'],
['G2', 'Z2'],
['G2', 'C4'],
['G4', 'Z3'],
['G5', 'M13'],
['G5', 'Y16'],
['G5', 'Z4'],
['G5', 'N7'],
['G6', 'N6'],
['G6', 'M14'],
['G6', 'C7'],
['G9', 'M16'],
['G9', 'H8'],
['G11', 'T10'],
['G12', 'Z9'],
['G15', 'H16'],
['G16', 'H17']],
'travel_times': [['G1', 'G2', 2],
['G2', 'G3', 1],
['G3', 'G4', 2],
['G4', 'G5', 2],
['G5', 'G6', 2],
['G6', 'G7', 2],
['G7', 'G8', 2],
['G8', 'G9', 2],
['G9', 'G10', 1],
['G10', 'G11', 2],
['G11', 'G12', 2],
['G12', 'G13', 1],
['G13', 'G14', 2],
['G14', 'G15', 2],
['G15', 'G16', 1],
['G16', 'G17', 2],
['G17', 'G18', 1],
['G18', 'G19', 2]]},
'F': {'color': '#b96528',
'transfers': [['F1', 'Y1'],
['F2', 'Y2'],
['F3', 'Y3'],
['F4', 'Y4'],
['F5', 'Y5'],
['F6', 'Y6'],
['F7', 'Y7'],
['F8', 'Y8'],
['F9', 'Y9'],
['F9', 'M25'],
['F13', 'M9'],
['F15', 'C3'],
['F16', 'Z1'],
['F16', 'G1']],
'travel_times': [['F1', 'F2', 3],
['F2', 'F3', 2],
['F3', 'F4', 3],
['F4', 'F5', 2],
['F5', 'F6', 2],
['F6', 'F7', 2],
['F7', 'F8', 2],
['F8', 'F9', 2],
['F9', 'F10', 3],
['F10', 'F11', 2],
['F11', 'F12', 2],
['F12', 'F13', 2],
['F13', 'F14', 3],
['F14', 'F15', 2],
['F15', 'F16', 2]]},
'H': {'color': '#9cacb5',
'transfers': [['H6', 'M15'],
['H6', 'C8'],
['H7', 'Y18'],
['H7', 'C9'],
['H8', 'M16'],
['H8', 'G9'],
['H12', 'T11'],
['H16', 'G15'],
['H17', 'G16'],
['H21', 'C18']],
'travel_times': [['H1', 'H2', 3],
['H2', 'H3', 3],
['H3', 'H4', 3],
['H4', 'H5', 3],
['H5', 'H6', 2],
['H6', 'H7', 3],
['H7', 'H8', 1],
['H8', 'H9', 2],
['H9', 'H10', 2],
['H10', 'H11', 2],
['H11', 'H12', 1],
['H12', 'H13', 3],
['H13', 'H14', 1],
['H14', 'H15', 2],
['H15', 'H16', 2],
['H16', 'H17', 1],
['H17', 'H18', 2],
['H18', 'H19', 2],
['H19', 'H20', 2],
['H20', 'H21', 3]]},
'M': {'color': '#ff0000',
'transfers': [['M9', 'F13'],
['M12', 'N8'],
['M13', 'G5'],
['M13', 'Y16'],
['M13', 'Z4'],
['M13', 'N7'],
['M14', 'C7'],
['M14', 'G6'],
['M14', 'N6'],
['M15', 'H6'],
['M15', 'C8'],
['M16', 'G9'],
['M16', 'H8'],
['M18', 'T9'],
['M18', 'C11'],
['M18', 'Z8'],
['M19', 'C12'],
['M22', 'N11'],
['M25', 'Y9'],
['M25', 'F9']],
'travel_times': [['M1', 'M2', 2],
['M2', 'M3', 2],
['M3', 'M4', 2],
['M4', 'M5', 2],
['M5', 'M6', 2],
['M6', 'M7', 2],
['M7', 'M8', 2],
['M8', 'M9', 2],
['M9', 'M10', 1],
['M10', 'M11', 2],
['M11', 'M12', 2],
['M12', 'M13', 3],
['M13', 'M14', 2],
['M14', 'M15', 1],
['M15', 'M16', 3],
['M16', 'M17', 2],
['M17', 'M18', 2],
['M18', 'M19', 2],
['M19', 'M20', 1],
['M20', 'M21', 2],
['M21', 'M22', 2],
['M22', 'M23', 3],
['M23', 'M24', 2],
['M24', 'M25', 3],
['m3', 'm4', 2],
['m4', 'm5', 2],
['m5', 'M6', 2]]},
'N': {'color': '#1aaca9',
'transfers': [['N1', 'T1'],
['N2', 'T2'],
['N3', 'T3'],
['N6', 'G6'],
['N6', 'M14'],
['N6', 'C7'],
['N7', 'Y16'],
['N7', 'Z4'],
['N7', 'G5'],
['N7', 'M13'],
['N8', 'M12'],
['N9', 'Y14'],
['N10', 'Y13'],
['N10', 'T6'],
['N11', 'M22']],
'travel_times': [['N1', 'N2', 2],
['N2', 'N3', 2],
['N3', 'N4', 2],
['N4', 'N5', 2],
['N5', 'N6', 2],
['N6', 'N7', 2],
['N7', 'N8', 2],
['N8', 'N9', 2],
['N9', 'N10', 2],
['N10', 'N11', 2],
['N11', 'N12', 3],
['N12', 'N13', 2],
['N13', 'N14', 2],
['N14', 'N15', 3],
['N15', 'N16', 1],
['N16', 'N17', 3],
['N17', 'N18', 2],
['N18', 'N19', 2]]},
'T': {'color': '#1aa7d8',
'transfers': [['T6', 'N10'],
['T6', 'Y13'],
['T7', 'Z6'],
['T9', 'M18'],
['T9', 'C11'],
['T9', 'Z8'],
['T10', 'G11'],
['T11', 'H12']],
'travel_times': [['T1', 'T2', 0],
['T2', 'T3', 3],
['T3', 'T4', 6],
['T4', 'T5', 9],
['T5', 'T6', 11],
['T6', 'T7', 13],
['T7', 'T8', 14],
['T8', 'T9', 16],
['T9', 'T10', 18],
['T10', 'T11', 20],
['T11', 'T12', 21],
['T12', 'T13', 24],
['T13', 'T14', 26],
['T14', 'T15', 27],
['T15', 'T16', 30],
['T16', 'T17', 33],
['T17', 'T18', 35],
['T18', 'T19', 37],
['T19', 'T20', 39],
['T20', 'T21', 41],
['T21', 'T22', 43],
['T22', 'T23', 46],
['T23', 'T24', 49]]},
'Y': {'color': '#ede7c3',
'transfers': [['Y1', 'F1'],
['Y2', 'F2'],
['Y3', 'F3'],
['Y4', 'F4'],
['Y5', 'F5'],
['Y6', 'F6'],
['Y7', 'F7'],
['Y8', 'F8'],
['Y9', 'F9'],
['Y9', 'M25'],
['Y13', 'T6'],
['Y13', 'N10'],
['Y14', 'N9'],
['Y16', 'Z4'],
['Y16', 'N7'],
['Y16', 'G5'],
['Y16', 'M13'],
['Y18', 'H7'],
['Y18', 'C9']],
'travel_times': [['Y1', 'Y2', 4],
['Y2', 'Y3', 2],
['Y3', 'Y4', 3],
['Y4', 'Y5', 2],
['Y5', 'Y6', 2],
['Y6', 'Y7', 2],
['Y7', 'Y8', 2],
['Y8', 'Y9', 3],
['Y9', 'Y10', 2],
['Y10', 'Y11', 2],
['Y11', 'Y12', 2],
['Y12', 'Y13', 3],
['Y13', 'Y14', 2],
['Y14', 'Y15', 2],
['Y15', 'Y16', 1],
['Y16', 'Y17', 2],
['Y17', 'Y18', 2],
['Y18', 'Y19', 2],
['Y19', 'Y20', 2],
['Y20', 'Y21', 2],
['Y21', 'Y22', 2],
['Y22', 'Y23', 3],
['Y23', 'Y24', 2]]},
'Z': {'color': '#a384bf',
'transfers': [['Z1', 'F16'],
['Z1', 'G1'],
['Z2', 'C4'],
['Z2', 'G2'],
['Z3', 'G4'],
['Z4', 'Y16'],
['Z4', 'N7'],
['Z4', 'M13'],
['Z4', 'G5'],
['Z6', 'T7'],
['Z8', 'M18'],
['Z8', 'C11'],
['Z8', 'T9'],
['Z9', 'G12']],
'travel_times': [['Z1', 'Z2', 3],
['Z2', 'Z3', 2],
['Z3', 'Z4', 2],
['Z4', 'Z5', 2],
['Z5', 'Z6', 2],
['Z6', 'Z7', 2],
['Z7', 'Z8', 2],
['Z8', 'Z9', 2],
['Z9', 'Z10', 3],
['Z10', 'Z11', 3],
['Z11', 'Z12', 3],
['Z12', 'Z13', 2],
['Z13', 'Z14', 2]]}}
!ls -lh tokyo-metro.json
-rw-r--r-- 1 rob staff 27K Mar 25 2018 tokyo-metro.json
data_pack = msgpack.packb(data)
# del data
type(data_pack)
bytes
len(data_pack)
3021
with open("tokyo-metro.msgpack", "wb") as f:
f.write(data_pack)
!ls -lh tokyo-metro.msgpack
-rw-r--r-- 1 rob staff 3.0K May 6 16:12 tokyo-metro.msgpack
with open("tokyo-metro.msgpack", "rb") as f:
data_msgpack = f.read()
data = msgpack.unpackb(data_msgpack)
list(data.keys())
[b'C', b'G', b'F', b'H', b'M', b'N', b'T', b'Y', b'Z']
with open("tokyo-metro.pickle", "wb") as f:
pickle.dump(data, f)
del data
!ls -lh tokyo-metro.pickle
-rw-r--r-- 1 rob staff 8.5K May 6 16:12 tokyo-metro.pickle
with open("tokyo-metro.pickle", "rb") as f:
data = pickle.load(f)
data.keys()
dict_keys([b'C', b'G', b'F', b'H', b'M', b'N', b'T', b'Y', b'Z'])
Versions
%reload_ext version_information
%version_information numpy, pandas, csv, json, tables, h5py, msgpack