Messing with data¶

import pandas as pd
import numpy as np
import seaborn as sns

from matplotlib import pyplot as plt
from scipy import stats

data = pd.read_csv('clean_murder_data.csv')
# stateAbbr = pd.read_csv("StatesAbbr.csv")

data.head()

/Users/randi/anaconda3/lib/python3.6/site-packages/IPython/core/interactiveshell.py:2698: DtypeWarning: Columns (15,19) have mixed types. Specify dtype option on import or set low_memory=False.
  interactivity=interactivity, compiler=compiler, result=result)

print(data.columns)
print(stateAbbr.columns)
relationship_df = data.groupby(['Relationship']).CaseID.count().reset_index()
relationship_df

Index(['ID', 'CNTYFIPS', 'Ori', 'State', 'Agency', 'Agentype', 'Source',
       'Solved', 'Year', 'StateName', 'Month', 'Incident', 'ActionType',
       'Homicide', 'Situation', 'VicAge', 'VicSex', 'VicRace', 'VicEthnic',
       'OffAge', 'OffSex', 'OffRace', 'OffEthnic', 'Weapon', 'Relationship',
       'Circumstance', 'Subcircum', 'VicCount', 'OffCount', 'FileDate',
       'fstate', 'MSA'],
      dtype='object')

---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
<ipython-input-6-8f83de2b1d42> in <module>()
      1 print(data.columns)
----> 2 print(stateAbbr.columns)
      3 relationship_df = data.groupby(['Relationship']).CaseID.count().reset_index()
      4 relationship_df

NameError: name 'stateAbbr' is not defined

Creating State database¶

# Total Murders by state since 1976
totalMurders = data.groupby('State')['CaseID'].count()
print(totalMurders.head())
totalMurders_df = totalMurders.reset_index().rename(columns={'CaseID':'TotalMurders'})
print(totalMurders_df.head())

# Number of UNSOLVED homicides by state since 1976
unsolved = data.loc[data['Solved'] == 'No']
unsolved = unsolved.groupby('State')['CaseID'].count()
print(unsolved.head())
unsolved_df = unsolved.reset_index().rename(columns={'CaseID':'TotalUnsolved'})
print(unsolved_df.head())

# Percent of UNSOLVED homicides by state since 1976
percentUnsolved = (unsolved/totalMurders)*100
percentUnsolved = percentUnsolved.round(1)
print(percentUnsolved.round(1).head())
percentUnsolved_df = pd.DataFrame(percentUnsolved).reset_index().rename(columns={'CaseID':'PercentUnsolved'})
print(percentUnsolved_df.head())

# Number of SOLVED homicides by state since 1976
solved = data.loc[data['Solved'] == 'Yes']
solved = solved.groupby('State')['CaseID'].count()
print(solved.head())
solved_df = solved.reset_index().rename(columns={'CaseID':'TotalSolved'})
print(solved_df.head())

# Percent of SOLVED homicides by state since 1976
percentSolved = (solved/totalMurders)*100
percentSolved = percentSolved.round(1)
print(percentSolved.head())
percentSolved_df = pd.DataFrame(percentSolved).reset_index().rename(columns={'CaseID':'PercentSolved'})
print(percentSolved_df.head())

# stateWeapon= data.groupby(['State','Weapon'])["CaseID"].count().reset_index()
# print(stateWeapon.head())
# stateWeapon = stateWeapon.sort_values(["CaseID"], ascending=False)
# stateWeapon_df = stateWeapon.drop_duplicates(["State"])
# print(stateWeapon_df.head())

# Merge all State data to create final csv
state_db = stateAbbr.merge(totalMurders_df,on="State")
state_db = state_db.merge(unsolved_df, on="State")
state_db = state_db.merge(percentUnsolved_df, on="State")
state_db = state_db.merge(solved_df, on="State")
state_db = state_db.merge(percentSolved_df, on="State")
print(state_db.head())
state_db.to_csv("state_db.csv", index=False)

Testing for Murder data by month¶

def month_string_to_number(string):
    m = {
        'jan': 1,
        'feb': 2,
        'mar': 3,
        'apr':4,
         'may':5,
         'jun':6,
         'jul':7,
         'aug':8,
         'sep':9,
         'oct':10,
         'nov':11,
         'dec':12
        }
    s = string.strip()[:3].lower()

    try:
        out = m[s]
        return out
    except:
        raise ValueError('Not a month')

stateMurdersByMonth = data.groupby(['State','Month'])['CaseID'].count().reset_index()
#print(stateMurdersByMonth.head())
stateMurdersByMonth['MonthNums']=stateMurdersByMonth['Month'].map(month_string_to_number)
stateMurdersByMonth = stateMurdersByMonth.rename(columns={'CaseID':'TotalMurders'})
stateMurdersByMonth = stateMurdersByMonth.sort_values(['State',"MonthNums"], ascending=True)

plotala=stateMurdersByMonth.loc[stateMurdersByMonth['State']=='North Carolina']
print(plotala)
plt.errorbar(plotala.MonthNums, plotala['TotalMurders'], 
                 linestyle='--', marker='.', markersize=8)

pivot = stateMurdersByMonth.pivot(index='MonthNums', columns='State',values='TotalMurders')
print(pivot.head())
pivot = pivot.reset_index()
for state in stateAbbr['State']:
    plt.errorbar(pivot.MonthNums, pivot[state], fmt='2', 
                 linestyle='--', marker='.', markersize=8,capsize=4)
plt.show()

relationship_df = data.groupby(['Relationship']).CaseID.count().reset_index()
relationship_df

weapon_df = data.groupby(['Weapon']).CaseID.count().reset_index()
weapon_df

circumstance_df = data.groupby(['Circumstance']).CaseID.count().reset_index()
circumstance_df

year_df = data.groupby(['Year']).CaseID.count().reset_index()
year_df

NC_df = data.loc[data["StateAbbr"]=="NC"]
agency_df = NC_df.groupby(['Agency']).CaseID.count().reset_index()
agency_df.sort_values(['CaseID'], ascending=False)

MSA_df = NC_df.groupby(['MSA']).CaseID.count().reset_index()
MSA_df.sort_values(['CaseID'], ascending=False)

rel_state = data.groupby(['State','Relationship'])["CaseID"].count().reset_index()
relationship2=rel_state.sort_values(["CaseID","Relationship"], ascending=False)
relbystate=relationship2.drop_duplicates(["State"])
relbystate.head(5)

off_state = data.groupby(['State','OffRace'])["CaseID"].count().reset_index()
off2=off_state.sort_values(["CaseID","OffRace"], ascending=False)
offracebystate=off2.drop_duplicates(["State"])
offracebystate.head(51)

vic_state = data.groupby(['State','VicRace'])["CaseID"].count().reset_index()
vic2=vic_state.sort_values(["CaseID","VicRace"], ascending=False)
vicracebystate=vic2.drop_duplicates(["State"])
vicracebystate.head(51)

unsolved=data.loc[data["Solved"]=='No']
age_state = data.groupby(['State','VicAge'])["CaseID"].count().reset_index()
age2=age_state.sort_values(["CaseID","VicAge"], ascending=False)
vicagebystate=age2.drop_duplicates(["State"])
vicagebystate=vicagebystate.rename(columns={'CaseID':'Murders','VicAge': 'VictimAge'})
vicagebystate.head(51)

vicagebystate.plot(x='VictimAge',y='Murders',kind='Bar')

weapon_counts = data["Weapon"].value_counts().reset_index()
weaponcounts2 = weapon_counts.rename(columns={'Weapon':'Murders'})
weapon=weaponcounts2.rename(columns={"index":"Weapon"})
weapon

weapons=weapon.set_index("Weapon")
weapons2=weapons.head(20)
weapons2.plot(kind='bar',y='Murders',figsize=(30,10),fontsize=25,legend=False,color="red")
plt.xlabel("Weapon",fontsize=30)
plt.ylabel('Murders',fontsize=30)
plt.ylim(ymax=125000)
plt.title('Murder Weapons',fontsize=50)
plt.show()
savefig('weapon.png',transparent=True)

#vic age by state graph bar??
#vic sex of unsolved murders

csv_path = "clean_murder_data.csv"
data_df = pd.read_csv (csv_path)
data_df.head()

/Users/randi/anaconda3/lib/python3.6/site-packages/IPython/core/interactiveshell.py:2698: DtypeWarning: Columns (15,19) have mixed types. Specify dtype option on import or set low_memory=False.
  interactivity=interactivity, compiler=compiler, result=result)

data2=data_df.loc[data_df['Solved']=='No']

	ID	CNTYFIPS	Ori	State	Agency	Agentype	Source	Solved	Year	StateName	...	OffEthnic	Weapon	Relationship	Circumstance	Subcircum	FileDate	fstate	MSA
0	197601001AKASP00	Juneau, AK	AKASP00	Alaska	State Troopers	Primary state LE	FBI	Yes	1976	ALASKA	...	Unknown or not reported	Knife or cutting instrument	Husband	Other arguments	NaN	30180	Alaska	Rural Alaska
1	197601001AL00102	Jefferson, AL	AL00102	Alabama	Birmingham	Municipal police	FBI	Yes	1976	ALA	...	Unknown or not reported	Shotgun	Acquaintance	Felon killed by private citizen	Felon killed in commission of a crime	30180	Alabama	Birmingham-Hoover, AL
2	197601001AL00104	Jefferson, AL	AL00104	Alabama	Fairfield	Municipal police	FBI	Yes	1976	ALA	...	Unknown or not reported	Shotgun	Wife	Other	NaN	30180	Alabama	Birmingham-Hoover, AL
3	197601001AL00106	Jefferson, AL	AL00106	Alabama	Leeds	Municipal police	FBI	Yes	1976	ALA	...	Unknown or not reported	Knife or cutting instrument	Brother	Other arguments	NaN	30180	Alabama	Birmingham-Hoover, AL
4	197601001AL00201	Mobile, AL	AL00201	Alabama	Mobile	Municipal police	FBI	Yes	1976	ALA	...	Unknown or not reported	Strangulation - hanging	Acquaintance	Circumstances undetermined	NaN	30180	Alabama	Mobile, AL

	ID	CNTYFIPS	Ori	State	Agency	Agentype	Source	Solved	Year	StateName	...	OffEthnic	Weapon	Relationship	Circumstance	Subcircum	FileDate	fstate	MSA
0	197601001AKASP00	Juneau, AK	AKASP00	Alaska	State Troopers	Primary state LE	FBI	Yes	1976	ALASKA	...	Unknown or not reported	Knife or cutting instrument	Husband	Other arguments	NaN	30180	Alaska	Rural Alaska
1	197601001AL00102	Jefferson, AL	AL00102	Alabama	Birmingham	Municipal police	FBI	Yes	1976	ALA	...	Unknown or not reported	Shotgun	Acquaintance	Felon killed by private citizen	Felon killed in commission of a crime	30180	Alabama	Birmingham-Hoover, AL
2	197601001AL00104	Jefferson, AL	AL00104	Alabama	Fairfield	Municipal police	FBI	Yes	1976	ALA	...	Unknown or not reported	Shotgun	Wife	Other	NaN	30180	Alabama	Birmingham-Hoover, AL
3	197601001AL00106	Jefferson, AL	AL00106	Alabama	Leeds	Municipal police	FBI	Yes	1976	ALA	...	Unknown or not reported	Knife or cutting instrument	Brother	Other arguments	NaN	30180	Alabama	Birmingham-Hoover, AL
4	197601001AL00201	Mobile, AL	AL00201	Alabama	Mobile	Municipal police	FBI	Yes	1976	ALA	...	Unknown or not reported	Strangulation - hanging	Acquaintance	Circumstances undetermined	NaN	30180	Alabama	Mobile, AL