NBA metric height and weight (python code)

Python 3.6 using Jupyter Notebook

# %load ../standard_import.txt
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import axes3d
import seaborn as sns

from sklearn.preprocessing import scale
import sklearn.linear_model as skl_lm
from sklearn.metrics import mean_squared_error, r2_score
import statsmodels.api as sm
import statsmodels.formula.api as smf

%matplotlib inline‘seaborn-white’)

a = pd.read_csv(‘/…/1913-1933nba.csv’) # add your location for your file in …

b = pd.read_csv(‘/…/1934-1959nba.csv’) # add your location for your file in …

c = pd.read_csv(‘/…/1960-1979 nba.csv’) # add your location for your file in …

d = pd.read_csv(‘/…/1980-1997 nba.csv’) # add your location for your file in …

sns.regplot(a.weight_lbs, a.height_ft, order=1, ci=None, scatter_kws={‘color’:’g’, ‘s’:12})
sns.regplot(b.weight_lbs, b.height_ft, order=1, ci=None, scatter_kws={‘color’:’r’, ‘s’:12})
sns.regplot(c.weight_lbs, c.height_ft, order=1, ci=None, scatter_kws={‘color’:’b’, ‘s’:12})
sns.regplot(d.weight_lbs, d.height_ft, order=1, ci=None, scatter_kws={‘color’:’y’, ‘s’:12})

# multiple regression lines and changing the color with letter symbol

regr = skl_lm.LinearRegression()

X = a.weight_lbs.values.reshape(-1,1)
y = a.height_ft,y)

# regression coefficient for 1913-1933 (4.24)

a[[‘weight_lbs’, ‘height_ft’]].describe()
# 1913-1933 note 6.3 ft mean and 192.7 lbs mean

d[[‘weight_lbs’, ‘height_ft’]].describe()
# 1980-1997 note 6.59 ft mean and 219.9 lbs mean
# increase of +0.03 ft in the mean and +6.8 lbs in the mean

# Create a coordinate grid
weight_lbs = np.arange(0,50)
height_ft = np.arange(0,300)

B1, B2 = np.meshgrid(weight_lbs, height_ft, indexing=’xy’)
Z = np.zeros((height_ft.size, weight_lbs.size))

for (i,j),v in np.ndenumerate(Z):
Z[i,j] =(regr.intercept_ + B1[i,j]*regr.coef_[0] + B2[i,j]*regr.coef_[1])

# Create plot
fig = plt.figure(figsize=(12,8))
fig.suptitle(‘NBA players born between 1910 – 1997′, fontsize=20)

ax = axes3d.Axes3D(fig)

ax.plot_surface(B1, B2, Z, rstride=10, cstride=5, alpha=0.4)
ax.scatter3D(a.weight_lbs, a.height_ft, a.born, c=’g’)
ax.scatter3D(b.weight_lbs, b.height_ft, b.born, c=’r’)
ax.scatter3D(c.weight_lbs, c.height_ft, c.born, c=’b’)
ax.scatter3D(d.weight_lbs, d.height_ft, d.born, c=’y’)


sns.jointplot(x=’weight_lbs’,y=’height_ft’,data=a,kind=’hex’) #1913-1933 showing the mean of height and weight
# note the pearson r is the strength of the linear relationship between the two variables 0.79

Leave a Reply

Fill in your details below or click an icon to log in: Logo

You are commenting using your account. Log Out /  Change )

Google photo

You are commenting using your Google account. Log Out /  Change )

Twitter picture

You are commenting using your Twitter account. Log Out /  Change )

Facebook photo

You are commenting using your Facebook account. Log Out /  Change )

Connecting to %s