Messing with data¶

import pandas as pd
import numpy as np
import seaborn as sns

from matplotlib import pyplot as plt
from scipy import stats

data = pd.read_csv('clean_murder_data.csv')
stateAbbr = pd.read_csv("StatesAbbr.csv")

data.head()

C:\Users\Zachk\Anaconda3\envs\PythonData\lib\site-packages\IPython\core\interactiveshell.py:2728: DtypeWarning: Columns (16,20) have mixed types. Specify dtype option on import or set low_memory=False.
  interactivity=interactivity, compiler=compiler, result=result)

print(data.columns)
print(stateAbbr.columns)
relationship_df = data.groupby(['Relationship']).CaseID.count().reset_index()
relationship_df

Index(['Unnamed: 0', 'CaseID', 'CNTYFIPS', 'Ori', 'State', 'Agency',
       'Agentype', 'Source', 'Solved', 'Year', 'StateName', 'Month',
       'Incident', 'ActionType', 'Homicide', 'Situation', 'VicAge', 'VicSex',
       'VicRace', 'VicEthnic', 'OffAge', 'OffSex', 'OffRace', 'OffEthnic',
       'Weapon', 'Relationship', 'Circumstance', 'Subcircum', 'VicCount',
       'OffCount', 'FileDate', 'fstate', 'MSA', 'StateAbbr'],
      dtype='object')
Index(['State', 'Abbreviation'], dtype='object')

Creating State database¶

# Total Murders by state since 1976
totalMurders = data.groupby('State')['CaseID'].count()
print(totalMurders.head())
totalMurders_df = totalMurders.reset_index().rename(columns={'CaseID':'TotalMurders'})
print(totalMurders_df.head())

State
Alabama        15687
Alaska          1952
Arizona        14482
Arkansas        8081
California    114621
Name: CaseID, dtype: int64
        State  TotalMurders
0     Alabama         15687
1      Alaska          1952
2     Arizona         14482
3    Arkansas          8081
4  California        114621

# Number of UNSOLVED homicides by state since 1976
unsolved = data.loc[data['Solved'] == 'No']
unsolved = unsolved.groupby('State')['CaseID'].count()
print(unsolved.head())
unsolved_df = unsolved.reset_index().rename(columns={'CaseID':'TotalUnsolved'})
print(unsolved_df.head())

State
Alabama        3350
Alaska          362
Arizona        4065
Arkansas       1217
California    41459
Name: CaseID, dtype: int64
        State  TotalUnsolved
0     Alabama           3350
1      Alaska            362
2     Arizona           4065
3    Arkansas           1217
4  California          41459

# Percent of UNSOLVED homicides by state since 1976
percentUnsolved = (unsolved/totalMurders)*100
percentUnsolved = percentUnsolved.round(1)
print(percentUnsolved.round(1).head())
percentUnsolved_df = pd.DataFrame(percentUnsolved).reset_index().rename(columns={'CaseID':'PercentUnsolved'})
print(percentUnsolved_df.head())

State
Alabama       21.4
Alaska        18.5
Arizona       28.1
Arkansas      15.1
California    36.2
Name: CaseID, dtype: float64
        State  PercentUnsolved
0     Alabama             21.4
1      Alaska             18.5
2     Arizona             28.1
3    Arkansas             15.1
4  California             36.2

# Number of SOLVED homicides by state since 1976
solved = data.loc[data['Solved'] == 'Yes']
solved = solved.groupby('State')['CaseID'].count()
print(solved.head())
solved_df = solved.reset_index().rename(columns={'CaseID':'TotalSolved'})
print(solved_df.head())

State
Alabama       12337
Alaska         1590
Arizona       10417
Arkansas       6864
California    73162
Name: CaseID, dtype: int64
        State  TotalSolved
0     Alabama        12337
1      Alaska         1590
2     Arizona        10417
3    Arkansas         6864
4  California        73162

# Percent of SOLVED homicides by state since 1976
percentSolved = (solved/totalMurders)*100
percentSolved = percentSolved.round(1)
print(percentSolved.head())
percentSolved_df = pd.DataFrame(percentSolved).reset_index().rename(columns={'CaseID':'PercentSolved'})
print(percentSolved_df.head())

State
Alabama       78.6
Alaska        81.5
Arizona       71.9
Arkansas      84.9
California    63.8
Name: CaseID, dtype: float64
        State  PercentSolved
0     Alabama           78.6
1      Alaska           81.5
2     Arizona           71.9
3    Arkansas           84.9
4  California           63.8

# stateWeapon= data.groupby(['State','Weapon'])["CaseID"].count().reset_index()
# print(stateWeapon.head())
# stateWeapon = stateWeapon.sort_values(["CaseID"], ascending=False)
# stateWeapon_df = stateWeapon.drop_duplicates(["State"])
# print(stateWeapon_df.head())

# Merge all State data to create final csv
state_db = stateAbbr.merge(totalMurders_df,on="State")
state_db = state_db.merge(unsolved_df, on="State")
state_db = state_db.merge(percentUnsolved_df, on="State")
state_db = state_db.merge(solved_df, on="State")
state_db = state_db.merge(percentSolved_df, on="State")
print(state_db.head())
state_db.to_csv("state_db.csv", index=False)

        State Abbreviation  TotalMurders  TotalUnsolved  PercentUnsolved  \
0     Alabama           AL         15687           3350             21.4   
1      Alaska           AK          1952            362             18.5   
2     Arizona           AZ         14482           4065             28.1   
3    Arkansas           AR          8081           1217             15.1   
4  California           CA        114621          41459             36.2   

   TotalSolved  PercentSolved  
0        12337           78.6  
1         1590           81.5  
2        10417           71.9  
3         6864           84.9  
4        73162           63.8

Testing for Murder data by month¶

def month_string_to_number(string):
    m = {
        'jan': 1,
        'feb': 2,
        'mar': 3,
        'apr':4,
         'may':5,
         'jun':6,
         'jul':7,
         'aug':8,
         'sep':9,
         'oct':10,
         'nov':11,
         'dec':12
        }
    s = string.strip()[:3].lower()

    try:
        out = m[s]
        return out
    except:
        raise ValueError('Not a month')

stateMurdersByMonth = data.groupby(['State','Month'])['CaseID'].count().reset_index()
#print(stateMurdersByMonth.head())
stateMurdersByMonth['MonthNums']=stateMurdersByMonth['Month'].map(month_string_to_number)
stateMurdersByMonth = stateMurdersByMonth.rename(columns={'CaseID':'TotalMurders'})
stateMurdersByMonth = stateMurdersByMonth.sort_values(['State',"MonthNums"], ascending=True)

plotala=stateMurdersByMonth.loc[stateMurdersByMonth['State']=='North Carolina']
print(plotala)
plt.errorbar(plotala.MonthNums, plotala['TotalMurders'], 
                 linestyle='--', marker='.', markersize=8)

              State      Month  TotalMurders  MonthNums
400  North Carolina    January          2029          1
399  North Carolina   February          1757          2
403  North Carolina      March          1921          3
396  North Carolina      April          1962          4
404  North Carolina        May          2002          5
402  North Carolina       June          2019          6
401  North Carolina       July          2183          7
397  North Carolina     August          2164          8
407  North Carolina  September          1967          9
406  North Carolina    October          1994         10
405  North Carolina   November          1936         11
398  North Carolina   December          2024         12

<Container object of 3 artists>

pivot = stateMurdersByMonth.pivot(index='MonthNums', columns='State',values='TotalMurders')
print(pivot.head())
pivot = pivot.reset_index()
for state in stateAbbr['State']:
    plt.errorbar(pivot.MonthNums, pivot[state], fmt='2', 
                 linestyle='--', marker='.', markersize=8,capsize=4)
plt.show()

State      Alabama  Alaska  Arizona  Arkansas  California  Colorado  \
MonthNums                                                             
1             1314     154     1188       704        9513       629   
2             1120     148     1068       572        8233       545   
3             1205     193     1240       646        9005       589   
4             1243     135     1191       642        9105       622   
5             1245     173     1195       684        9623       595   

State      Connecticut  Delaware  District of Columbia  Florida   ...     \
MonthNums                                                         ...      
1                  444       121                   711     3722   ...      
2                  358       123                   604     3346   ...      
3                  383       122                   641     3665   ...      
4                  406       112                   571     3483   ...      
5                  443       117                   707     3732   ...      

State      South Dakota  Tennessee  Texas  Utah  Vermont  Virginia  \
MonthNums                                                            
1                    45       1362   5757   180       20      1581   
2                    45       1245   5291   176       33      1309   
3                    32       1437   5847   164       38      1463   
4                    51       1410   5883   195       37      1539   
5                    47       1467   6226   206       35      1543   

State      Washington  West Virginia  Wisconsin  Wyoming  
MonthNums                                                 
1                 717            290        561       55  
2                 682            289        511       44  
3                 792            293        525       61  
4                 701            281        590       61  
5                 669            314        629       60  

[5 rows x 51 columns]

relationship_df = data.groupby(['Relationship']).CaseID.count().reset_index()
relationship_df

weapon_df = data.groupby(['Weapon']).CaseID.count().reset_index()
weapon_df

circumstance_df = data.groupby(['Circumstance']).CaseID.count().reset_index()
circumstance_df

year_df = data.groupby(['Year']).CaseID.count().reset_index()
year_df

NC_df = data.loc[data["StateAbbr"]=="NC"]
agency_df = NC_df.groupby(['Agency']).CaseID.count().reset_index()
agency_df=agency_df.rename(columns={'CaseID':'Murders'})
agency_sort=agency_df.sort_values(['Murders'], ascending=False)
agency=agency_sort.head(10)
agency

agency.reset_index(drop=True,inplace=True)
labels=agency['Agency']
colors=['red','tan','darkred','grey']
agency.plot.pie('Murders',labels=labels,legend=False,autopct="%0.0f%%",colors=colors,fontsize=12)
plt.axis('equal')
plt.title("North Carolinas Most Murderous Cities",fontsize=15)
plt.ylabel('')
fig = plt.gcf()
fig.set_size_inches(10,10)
plt.tight_layout()
plt.savefig('ncmurder.png', bbox_inches='tight', transparent=True)
plt.show()

MSA_df = NC_df.groupby(['MSA']).CaseID.count().reset_index()
MSA_df.sort_values(['CaseID'], ascending=False)

rel_state = data.groupby(['State','Relationship'])["CaseID"].count().reset_index()
relationship2=rel_state.sort_values(["CaseID","Relationship"], ascending=False)
relbystate=relationship2.drop_duplicates(["State"])
relbystate.head(5)

off_state = data.groupby(['State','OffRace'])["CaseID"].count().reset_index()
off2=off_state.sort_values(["CaseID","OffRace"], ascending=False)
offracebystate=off2.drop_duplicates(["State"])
offracebystate.head(51)

vic_state = data.groupby(['State','VicRace'])["CaseID"].count().reset_index()
vic2=vic_state.sort_values(["CaseID","VicRace"], ascending=False)
vicracebystate=vic2.drop_duplicates(["State"])
vicracebystate.head(51)

unsolved=data.loc[data["Solved"]=='No']
age_state = data.groupby(['State','VicAge'])["CaseID"].count().reset_index()
age2=age_state.sort_values(["CaseID","VicAge"], ascending=False)
vicagebystate=age2.drop_duplicates(["State"])
vicagebystate=vicagebystate.rename(columns={'CaseID':'Murders','VicAge': 'VictimAge'})
vicagebystate.head(51)

weapon_counts = data["Weapon"].value_counts().reset_index()
weaponcounts2 = weapon_counts.rename(columns={'Weapon':'Murders'})
weapon=weaponcounts2.rename(columns={"index":"Weapon"})
weapon

weapons=weapon.set_index("Weapon")
weapons2=weapons.head(20)
weapons2.plot(kind='bar',y='Murders',figsize=(15,15),fontsize=15,legend=False,color="red",)
plt.xlabel("Weapon",fontsize=30)
plt.ylabel('Murders',fontsize=30)
plt.ylim(ymax=125000)
plt.title('Murder Weapons',fontsize=40)
plt.tight_layout()
plt.savefig('mweapons.png',transparent=True)
plt.show()

	Unnamed: 0	CaseID	CNTYFIPS	Ori	State	Agency	Agentype	Source	Solved	Year	...	Weapon	Relationship	Circumstance	Subcircum	OffCount	FileDate	fstate	MSA	StateAbbr
0	0	197601001AKASP00	Juneau, AK	AL00102	Alabama	Birmingham	Municipal police	FBI	Yes	1976	...	Shotgun	Acquaintance	Felon killed by private citizen	Felon killed in commission of a crime	0	30180	Alabama	Birmingham-Hoover, AL	AL
1	1	197601001AL00102	Jefferson, AL	AL00104	Alabama	Fairfield	Municipal police	FBI	Yes	1976	...	Shotgun	Wife	Other	NaN	0	30180	Alabama	Birmingham-Hoover, AL	AL
2	2	197601001AL00104	Jefferson, AL	AL00106	Alabama	Leeds	Municipal police	FBI	Yes	1976	...	Knife or cutting instrument	Brother	Other arguments	NaN	0	30180	Alabama	Birmingham-Hoover, AL	AL
3	3	197601001AL00106	Jefferson, AL	AL00201	Alabama	Mobile	Municipal police	FBI	Yes	1976	...	Strangulation - hanging	Acquaintance	Circumstances undetermined	NaN	0	30180	Alabama	Mobile, AL	AL
4	4	197601001AL00201	Mobile, AL	AL00202	Alabama	Prichard	Municipal police	FBI	Yes	1976	...	Rifle	Friend	Other arguments	NaN	2	30180	Alabama	Mobile, AL	AL

	Relationship	CaseID
0	Acquaintance	150341
1	Boyfriend	8687
2	Brother	6633
3	Common-law husband	2684
4	Common-law wife	3220
5	Daughter	8679
6	Employee	441
7	Employer	573
8	Ex-husband	865
9	Ex-wife	2330
10	Father	5380
11	Friend	26196
12	Girlfriend	18876
13	Homosexual relationship	1636
14	Husband	11797
15	In-law	4728
16	Mother	5044
17	Neighbor	7888
18	Other - known to victim	33464
19	Other family	11186
20	Relationship not determined	283106
21	Sister	1564
22	Son	11661
23	Stepdaughter	860
24	Stepfather	1686
25	Stepmother	261
26	Stepson	1438
27	Stranger	112668
28	Wife	27893

	Relationship	CaseID
0	Acquaintance	150341
1	Boyfriend	8687
2	Brother	6633
3	Common-law husband	2684
4	Common-law wife	3220
5	Daughter	8679
6	Employee	441
7	Employer	573
8	Ex-husband	865
9	Ex-wife	2330
10	Father	5380
11	Friend	26196
12	Girlfriend	18876
13	Homosexual relationship	1636
14	Husband	11797
15	In-law	4728
16	Mother	5044
17	Neighbor	7888
18	Other - known to victim	33464
19	Other family	11186
20	Relationship not determined	283106
21	Sister	1564
22	Son	11661
23	Stepdaughter	860
24	Stepfather	1686
25	Stepmother	261
26	Stepson	1438
27	Stranger	112668
28	Wife	27893

	Weapon	CaseID
0	Asphyxiation - includes death by gas	4597
1	Blunt object - hammer, club, etc	34176
2	Drowning	1461
3	Explosives	602
4	Fire	7453
5	Firearm, type not stated	57238
6	Handgun - pistol, revolver, etc	370146
7	Knife or cutting instrument	112692
8	Narcotics or drugs, sleeping pills	1979
9	Other gun	2680
10	Other or type unknown	37552
11	Personal weapons, includes beating	43819
12	Poison - does not include gas	521
13	Pushed or thrown out window	247
14	Rifle	28626
15	Shotgun	38196
16	Strangulation - hanging	9800

	Circumstance	CaseID
0	Abortion	10
1	All other manslaughter by negligence	4560
2	All suspected felony type	13108
3	Argument over money or property	15152
4	Arson	4547
5	Brawl due to influence of alcohol	16004
6	Brawl due to influence of narcotics	5014
7	Burglary	6306
8	Child killed by babysitter	1241
9	Children playing with gun	1372
10	Circumstances undetermined	194564
11	Felon killed by police	16037
12	Felon killed by private citizen	12251
13	Gambling	1065
14	Gangland killings	4451
15	Gun-cleaning death - other than self	144
16	Institutional killings	1064
17	Juvenile gang killings	23652
18	Larceny	738
19	Lovers triangle	11895
20	Motor vehicle theft	1253
21	Narcotic drug laws	28205
22	Other	97130
23	Other - not specified	24112
24	Other arguments	198205
25	Other negligent handling of gun	3550
26	Other sex offense	1441
27	Prostitution and commercialized vice	598
28	Rape	4173
29	Robbery	58986
30	Sniper attack	548
31	Victim shot in hunting accident	409

	Year	CaseID
0	1976	17619
1	1977	18844
2	1978	19523
3	1979	21698
4	1980	23092
5	1981	21208
6	1982	20544
7	1983	19653
8	1984	18093
9	1985	18386
10	1986	20125
11	1987	18783
12	1988	18779
13	1989	19868
14	1990	21246
15	1991	22657
16	1992	23793
17	1993	24336
18	1994	23246
19	1995	21179
20	1996	18996
21	1997	17830
22	1998	16089
23	1999	14644
24	2000	14692
25	2001	15821
26	2002	16287
27	2003	16532
28	2004	16246
29	2005	16852
30	2006	17294
31	2007	17327
32	2008	15610
33	2009	15857
34	2010	15133
35	2011	15119
36	2012	15390
37	2013	14841
38	2014	14702
39	2015	16064
40	2016	17787

	Agency	Murders
65	Charlotte-Mecklenburg	2869
145	Greensboro	959
103	Durham	950
397	Winston-Salem	823
289	Raleigh	705
121	Fayetteville	700
89	Cumberland County	588
304	Robeson County	527
161	High Point	363
390	Wilmington	351

	MSA	CaseID
12	Rural North Carolina	7543
2	Charlotte-Gastonia-Concord, NC-SC	4355
6	Greensboro-High Point, NC	1970
10	Raleigh-Cary, NC	1581
3	Durham, NC	1493
4	Fayetteville, NC	1469
15	Winston-Salem, NC	1127
8	Hickory-Lenoir-Morganton, NC	875
0	Asheville, NC	691
11	Rocky Mount, NC	652
14	Wilmington, NC	636
5	Goldsboro, NC	479
7	Greenville, NC	439
1	Burlington, NC	334
9	Jacksonville, NC	286
13	Virginia Beach-Norfolk-Newport News, VA-NC	28

	State	Relationship	CaseID
136	California	Relationship not determined	39349
931	New York	Relationship not determined	34271
275	Florida	Relationship not determined	28105
1237	Texas	Relationship not determined	22396
648	Michigan	Relationship not determined	14674

	State	OffRace	CaseID
24	California	White	46773
218	Texas	White	34012
163	New York	Unknown	27423
49	Florida	White	15951
112	Michigan	Black	15741
67	Illinois	Black	14029
191	Pennsylvania	Black	12644
52	Georgia	Black	12624
92	Louisiana	Black	12252
167	North Carolina	Black	11259
176	Ohio	Black	9887
211	Tennessee	Black	8695
230	Virginia	Black	8644
102	Maryland	Black	8439
14	Arizona	White	8408
2	Alabama	Black	8164
201	South Carolina	Black	7999
128	Missouri	Unknown	6452
152	New Jersey	Black	6086
183	Oklahoma	White	5428
43	District of Columbia	Unknown	5202
122	Mississippi	Black	5045
237	Washington	White	4979
89	Kentucky	White	4685
29	Colorado	White	4632
72	Indiana	Black	4632
17	Arkansas	Black	3850
188	Oregon	White	3270
159	New Mexico	White	3027
245	Wisconsin	Black	3003
144	Nevada	White	2967
242	West Virginia	White	2759
109	Massachusetts	White	2753
34	Connecticut	White	2004
119	Minnesota	White	1906
84	Kansas	White	1850
223	Utah	White	1658
79	Iowa	White	1361
64	Idaho	White	1204
139	Nebraska	White	909
99	Maine	White	858
9	Alaska	White	825
37	Delaware	Black	671
198	Rhode Island	White	644
252	Wyoming	White	632
149	New Hampshire	White	606
56	Hawaii	Asian or Pacific Islander	601
134	Montana	White	597
227	Vermont	White	381
208	South Dakota	White	337
173	North Dakota	White	265

	State	VicRace	CaseID
24	California	White	74483
217	Texas	White	47024
160	New York	Black	29621
48	Florida	White	23526
111	Michigan	Black	23125
66	Illinois	Black	21950
91	Louisiana	Black	17064
190	Pennsylvania	Black	16656
51	Georgia	Black	16614
101	Maryland	Black	14709
175	Ohio	Black	13354
165	North Carolina	Black	12992
14	Arizona	White	11925
126	Missouri	Black	10980
229	Virginia	Black	10546
2	Alabama	Black	10115
210	Tennessee	Black	9678
150	New Jersey	Black	9420
200	South Carolina	Black	8166
41	District of Columbia	Black	7509
71	Indiana	Black	7104
182	Oklahoma	White	6614
236	Washington	White	6480
29	Colorado	White	6080
88	Kentucky	White	5998
121	Mississippi	Black	5621
143	Nevada	White	4392
17	Arkansas	Black	4262
157	New Mexico	White	4072
187	Oregon	White	4012
108	Massachusetts	White	4009
246	Wisconsin	White	3609
241	West Virginia	White	3132
34	Connecticut	White	2940
118	Minnesota	White	2514
83	Kansas	White	2422
222	Utah	White	1983
78	Iowa	White	1644
63	Idaho	White	1316
138	Nebraska	White	1077
9	Alaska	White	1027
98	Maine	White	996
197	Rhode Island	White	966
36	Delaware	Black	804
55	Hawaii	Asian or Pacific Islander	742
147	New Hampshire	White	731
251	Wyoming	White	710
133	Montana	White	653
226	Vermont	White	435
207	South Dakota	White	381
172	North Dakota	White	304

	State	VictimAge	Murders
392	California	20	4749
3414	New York	25	2549
4812	Texas	22	1939
1425	Illinois	19	1479
1052	Florida	25	1356
2297	Michigan	21	1236
1912	Louisiana	22	951
1150	Georgia	23	866
3602	North Carolina	22	843
2101	Maryland	20	824
3773	Ohio	20	806
0	Alabama	0	708
5088	Virginia	22	693
2589	Missouri	21	676
4075	Pennsylvania	21	667
4619	Tennessee	23	627
1530	Indiana	23	523
199	Arizona	22	521
4441	South Carolina	25	480
869	District of Columbia	20	402
3877	Oklahoma	25	339
3209	New Jersey	21	321
480	Colorado	1	316
2199	Massachusetts	20	314
5187	Washington	22	297
5367	Wisconsin	1	292
2494	Mississippi	23	281
1820	Kentucky	25	279
298	Arkansas	23	256
2383	Minnesota	1	207
2932	Nevada	19	203
598	Connecticut	22	198
3958	Oregon	1	163
3308	New Mexico	23	161
1709	Kansas	1	146
4898	Utah	1	144
5294	West Virginia	29	115
1615	Iowa	1	110
100	Alaska	1	72
2826	Nebraska	1	72
1323	Idaho	1	69
4270	Rhode Island	20	68
1250	Hawaii	24	60
788	Delaware	29	54
5463	Wyoming	1	53
4522	South Dakota	1	38
1996	Maine	1	37
3010	New Hampshire	1	34
2675	Montana	1	29
4989	Vermont	1	23
3686	North Dakota	1	22