San Francisco Police Department traffic stops data 2017 Python code

http://sanfranciscopolice.org/data#trafficstops

(See file) Stops by Race and Ethnicity – data (2017)

# %load ../standard_import.txt
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import axes3d
import seaborn as sns

from sklearn.preprocessing import scale
import sklearn.linear_model as skl_lm
from sklearn.metrics import mean_squared_error, r2_score
import statsmodels.api as sm
import statsmodels.formula.api as smf

%matplotlib inline
plt.style.use('seaborn-white')


df = pd.read_csv('/.../sfpd2017.csv')
df.head()
# I renamed the file so that it was easier to load

df.info()
sns.heatmap(df.isnull(),yticklabels=False,cbar=False,cmap='viridis')
# to find missing data in the data set
fig = plt.figure(figsize=(15,9))
fig.suptitle('SFPD demographic chart', fontsize=20)

sns.set_style('whitegrid')
sns.countplot(x='Race_description',hue='Sex',data=df,palette='RdBu_r')
sns.distplot(df['Age'].dropna(),kde=False,color='darkred',bins=50)
sns.countplot(x='Race_description',data=df)
sns.countplot(x='Sex',data=df)
plt.figure(figsize=(12, 7))
sns.boxplot(x='Race_description',y='Age',data=df,palette='winter')
df['Race_description'].hist(color='green',bins=40,figsize=(8,4))
df = pd.DataFrame(np.random.randn(1000, 2), columns=['Race_description', 'Age'])
df.plot.hexbin(x='Race_description',y='Age',gridsize=25,cmap='Oranges')
sns.lmplot(x='Time_hour',y='Age',data=df,col='Race_description',hue='Sex',palette='coolwarm',
          aspect=0.6,size=8)

Leave a Reply

Fill in your details below or click an icon to log in:

WordPress.com Logo

You are commenting using your WordPress.com account. Log Out /  Change )

Google photo

You are commenting using your Google account. Log Out /  Change )

Twitter picture

You are commenting using your Twitter account. Log Out /  Change )

Facebook photo

You are commenting using your Facebook account. Log Out /  Change )

Connecting to %s