practice

practice#

age: defendant’s age
c_charge_degree: degree charged (Misdemeanor of Felony)
race: defendant’s race
age_cat: defendant’s age quantized in “less than 25”, “25-45”, or “over 45”
score_text: COMPAS score: ‘low’(1 to 5), ‘medium’ (5 to 7), and ‘high’ (8 to 10).
sex: defendant’s gender
priors_count: number of prior charges
days_b_screening_arrest: number of days between charge date and arrest where defendant was screened for compas score
decile_score: COMPAS score from 1 to 10 (low risk to high risk)
is_recid: if the defendant recidivized
two_year_recid: if the defendant within two years
c_jail_in: date defendant was imprisoned
c_jail_out: date defendant was released from jail
length_of_stay: length of jail stay

import numpy as np
import pandas as pd
import scipy
import matplotlib.pyplot as plt
import seaborn as sns
import itertools
from sklearn.metrics import roc_curve
import warnings
warnings.filterwarnings('ignore')

clean_data_url = 'https://raw.githubusercontent.com/ml4sts/outreach-compas/main/data/compas_c.csv'

df= pd.read_csv(clean_data_url,
                 header= 0).set_index('id')

df.head()

	age	c_charge_degree	race	age_cat	score_text	sex	priors_count	days_b_screening_arrest	decile_score	is_recid	two_year_recid	c_jail_in	c_jail_out	length_of_stay
id
3	34	F	African-American	25 - 45	Low	Male	0	-1.0	3	1	1	2013-01-26 03:45:27	2013-02-05 05:36:53	10
4	24	F	African-American	Less than 25	Low	Male	4	-1.0	4	1	1	2013-04-13 04:58:34	2013-04-14 07:02:04	1
8	41	F	Caucasian	25 - 45	Medium	Male	14	-1.0	6	1	1	2014-02-18 05:08:24	2014-02-24 12:18:30	6
10	39	M	Caucasian	25 - 45	Low	Female	0	-1.0	1	0	0	2014-03-15 05:35:34	2014-03-18 04:28:46	2
14	27	F	Caucasian	25 - 45	Low	Male	0	-1.0	4	0	0	2013-11-25 06:31:06	2013-11-26 08:26:57	1

df.tail()

	age	c_charge_degree	race	age_cat	score_text	sex	priors_count	days_b_screening_arrest	decile_score	is_recid	two_year_recid	c_jail_in	c_jail_out	length_of_stay
id
10994	30	M	African-American	25 - 45	Low	Male	0	-1.0	2	1	1	2014-05-09 10:01:33	2014-05-10 08:28:12	0
10995	20	F	African-American	Less than 25	High	Male	0	-1.0	9	0	0	2013-10-19 11:17:15	2013-10-20 08:13:06	0
10996	23	F	African-American	Less than 25	Medium	Male	0	-1.0	7	0	0	2013-11-22 05:18:27	2013-11-24 02:59:20	1
10997	23	F	African-American	Less than 25	Low	Male	0	-1.0	3	0	0	2014-01-31 07:13:54	2014-02-02 04:03:52	1
11000	33	M	African-American	25 - 45	Low	Female	3	-1.0	2	0	0	2014-03-08 08:06:02	2014-03-09 12:18:04	1

Sex = df['sex'].value_counts()

Sex.plot(kind= 'bar')

<Axes: xlabel='sex'>

_images/8bdbe990827f8b5b753fdda5552524593e1193bfb4f124beaabb324a8c9f519f.png

Age= df['age_cat'].value_counts()

Age.plot(kind= 'bar')

<Axes: xlabel='age_cat'>

_images/e00d587fd0f1100b9ffc75e54e189cfd1a6b4bbbb5f7ee77fb565d92fac271fa.png

priorA= df['priors_count'].value_counts()

priorA.plot(kind= "bar")
# 0 is the most common number of prior arrest!!!

<Axes: xlabel='priors_count'>

_images/dba9d04fb762259533af28fcf96310dad91a725c023cd8ff51f6f75cafe77160.png

REa= df['two_year_recid'].value_counts()

REa.plot(kind= 'bar')

<Axes: xlabel='two_year_recid'>

_images/626b0b636797dc46933c317b8e04e13d0ecbfcd0818474b3894b749c820c3dec.png

REa

two_year_recid
0    2795
1    2483
Name: count, dtype: int64

2483/(2483+2795)*100
# math to find the exact percentage

47.04433497536946

# 47% of people got re-arrested!!!

compasS= df['decile_score'].value_counts()

compasS.plot(kind= 'bar')
#  1 is the most common score!!!

<Axes: xlabel='decile_score'>

_images/fd836d6ea22ac4c7480476a2fc799f78223b2de445374bee3c7c58742c7fc87a.png