import pandas as pd
import numpy as np
import seaborn as sns
from matplotlib import pyplot as plt
from scipy import stats
data = pd.read_csv('clean_murder_data.csv')
# stateAbbr = pd.read_csv("StatesAbbr.csv")
data.head()
print(data.columns)
print(stateAbbr.columns)
relationship_df = data.groupby(['Relationship']).CaseID.count().reset_index()
relationship_df
# Total Murders by state since 1976
totalMurders = data.groupby('State')['CaseID'].count()
print(totalMurders.head())
totalMurders_df = totalMurders.reset_index().rename(columns={'CaseID':'TotalMurders'})
print(totalMurders_df.head())
# Number of UNSOLVED homicides by state since 1976
unsolved = data.loc[data['Solved'] == 'No']
unsolved = unsolved.groupby('State')['CaseID'].count()
print(unsolved.head())
unsolved_df = unsolved.reset_index().rename(columns={'CaseID':'TotalUnsolved'})
print(unsolved_df.head())
# Percent of UNSOLVED homicides by state since 1976
percentUnsolved = (unsolved/totalMurders)*100
percentUnsolved = percentUnsolved.round(1)
print(percentUnsolved.round(1).head())
percentUnsolved_df = pd.DataFrame(percentUnsolved).reset_index().rename(columns={'CaseID':'PercentUnsolved'})
print(percentUnsolved_df.head())
# Number of SOLVED homicides by state since 1976
solved = data.loc[data['Solved'] == 'Yes']
solved = solved.groupby('State')['CaseID'].count()
print(solved.head())
solved_df = solved.reset_index().rename(columns={'CaseID':'TotalSolved'})
print(solved_df.head())
# Percent of SOLVED homicides by state since 1976
percentSolved = (solved/totalMurders)*100
percentSolved = percentSolved.round(1)
print(percentSolved.head())
percentSolved_df = pd.DataFrame(percentSolved).reset_index().rename(columns={'CaseID':'PercentSolved'})
print(percentSolved_df.head())
# stateWeapon= data.groupby(['State','Weapon'])["CaseID"].count().reset_index()
# print(stateWeapon.head())
# stateWeapon = stateWeapon.sort_values(["CaseID"], ascending=False)
# stateWeapon_df = stateWeapon.drop_duplicates(["State"])
# print(stateWeapon_df.head())
# Merge all State data to create final csv
state_db = stateAbbr.merge(totalMurders_df,on="State")
state_db = state_db.merge(unsolved_df, on="State")
state_db = state_db.merge(percentUnsolved_df, on="State")
state_db = state_db.merge(solved_df, on="State")
state_db = state_db.merge(percentSolved_df, on="State")
print(state_db.head())
state_db.to_csv("state_db.csv", index=False)
def month_string_to_number(string):
m = {
'jan': 1,
'feb': 2,
'mar': 3,
'apr':4,
'may':5,
'jun':6,
'jul':7,
'aug':8,
'sep':9,
'oct':10,
'nov':11,
'dec':12
}
s = string.strip()[:3].lower()
try:
out = m[s]
return out
except:
raise ValueError('Not a month')
stateMurdersByMonth = data.groupby(['State','Month'])['CaseID'].count().reset_index()
#print(stateMurdersByMonth.head())
stateMurdersByMonth['MonthNums']=stateMurdersByMonth['Month'].map(month_string_to_number)
stateMurdersByMonth = stateMurdersByMonth.rename(columns={'CaseID':'TotalMurders'})
stateMurdersByMonth = stateMurdersByMonth.sort_values(['State',"MonthNums"], ascending=True)
plotala=stateMurdersByMonth.loc[stateMurdersByMonth['State']=='North Carolina']
print(plotala)
plt.errorbar(plotala.MonthNums, plotala['TotalMurders'],
linestyle='--', marker='.', markersize=8)
pivot = stateMurdersByMonth.pivot(index='MonthNums', columns='State',values='TotalMurders')
print(pivot.head())
pivot = pivot.reset_index()
for state in stateAbbr['State']:
plt.errorbar(pivot.MonthNums, pivot[state], fmt='2',
linestyle='--', marker='.', markersize=8,capsize=4)
plt.show()
relationship_df = data.groupby(['Relationship']).CaseID.count().reset_index()
relationship_df
weapon_df = data.groupby(['Weapon']).CaseID.count().reset_index()
weapon_df
circumstance_df = data.groupby(['Circumstance']).CaseID.count().reset_index()
circumstance_df
year_df = data.groupby(['Year']).CaseID.count().reset_index()
year_df
NC_df = data.loc[data["StateAbbr"]=="NC"]
agency_df = NC_df.groupby(['Agency']).CaseID.count().reset_index()
agency_df.sort_values(['CaseID'], ascending=False)
MSA_df = NC_df.groupby(['MSA']).CaseID.count().reset_index()
MSA_df.sort_values(['CaseID'], ascending=False)
rel_state = data.groupby(['State','Relationship'])["CaseID"].count().reset_index()
relationship2=rel_state.sort_values(["CaseID","Relationship"], ascending=False)
relbystate=relationship2.drop_duplicates(["State"])
relbystate.head(5)
off_state = data.groupby(['State','OffRace'])["CaseID"].count().reset_index()
off2=off_state.sort_values(["CaseID","OffRace"], ascending=False)
offracebystate=off2.drop_duplicates(["State"])
offracebystate.head(51)
vic_state = data.groupby(['State','VicRace'])["CaseID"].count().reset_index()
vic2=vic_state.sort_values(["CaseID","VicRace"], ascending=False)
vicracebystate=vic2.drop_duplicates(["State"])
vicracebystate.head(51)
unsolved=data.loc[data["Solved"]=='No']
age_state = data.groupby(['State','VicAge'])["CaseID"].count().reset_index()
age2=age_state.sort_values(["CaseID","VicAge"], ascending=False)
vicagebystate=age2.drop_duplicates(["State"])
vicagebystate=vicagebystate.rename(columns={'CaseID':'Murders','VicAge': 'VictimAge'})
vicagebystate.head(51)
vicagebystate.plot(x='VictimAge',y='Murders',kind='Bar')
weapon_counts = data["Weapon"].value_counts().reset_index()
weaponcounts2 = weapon_counts.rename(columns={'Weapon':'Murders'})
weapon=weaponcounts2.rename(columns={"index":"Weapon"})
weapon
weapons=weapon.set_index("Weapon")
weapons2=weapons.head(20)
weapons2.plot(kind='bar',y='Murders',figsize=(30,10),fontsize=25,legend=False,color="red")
plt.xlabel("Weapon",fontsize=30)
plt.ylabel('Murders',fontsize=30)
plt.ylim(ymax=125000)
plt.title('Murder Weapons',fontsize=50)
plt.show()
savefig('weapon.png',transparent=True)
#vic age by state graph bar??
#vic sex of unsolved murders
csv_path = "clean_murder_data.csv"
data_df = pd.read_csv (csv_path)
data_df.head()
data2=data_df.loc[data_df['Solved']=='No']