{ "cells": [ { "cell_type": "markdown", "metadata": { "tags": [] }, "source": [ "# Replicating Propublica's COMPAS Audit" ] }, { "cell_type": "markdown", "metadata": { "tags": [] }, "source": [ "## Why COMPAS?\n", "\n", "Propublica started the COMPAS Debate with the article [Machine Bias](https://www.propublica.org/article/machine-bias-risk-assessments-in-criminal-sentencin). With their article, they also released details of their methodology and their [data and code](https://github.com/propublica/compas-analysis). This presents a real data set that can be used for research on how data is used in a criminal justice setting without researchers having to perform their own requests for information, so it has been used and reused a lot of times.\n" ] }, { "cell_type": "code", "execution_count": 22, "metadata": {}, "outputs": [], "source": [ "import numpy as np\n", "import pandas as pd\n", "import scipy\n", "import matplotlib.pyplot as plt\n", "import seaborn as sns\n", "import itertools\n", "from sklearn.metrics import roc_curve\n", "import warnings\n", "warnings.filterwarnings('ignore')" ] }, { "cell_type": "code", "execution_count": 62, "metadata": { "tags": [] }, "outputs": [], "source": [ "propublica_data_url = 'https://github.com/propublica/compas-analysis/raw/master/compas-scores-two-years.csv'\n", "df_pp = pd.read_csv(propublica_data_url,\n", " header=0).set_index('id')" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", " | name | \n", "first | \n", "last | \n", "compas_screening_date | \n", "sex | \n", "dob | \n", "age | \n", "age_cat | \n", "race | \n", "juv_fel_count | \n", "... | \n", "v_decile_score | \n", "v_score_text | \n", "v_screening_date | \n", "in_custody | \n", "out_custody | \n", "priors_count.1 | \n", "start | \n", "end | \n", "event | \n", "two_year_recid | \n", "
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
id | \n", "\n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " |
1 | \n", "miguel hernandez | \n", "miguel | \n", "hernandez | \n", "2013-08-14 | \n", "Male | \n", "1947-04-18 | \n", "69 | \n", "Greater than 45 | \n", "Other | \n", "0 | \n", "... | \n", "1 | \n", "Low | \n", "2013-08-14 | \n", "2014-07-07 | \n", "2014-07-14 | \n", "0 | \n", "0 | \n", "327 | \n", "0 | \n", "0 | \n", "
3 | \n", "kevon dixon | \n", "kevon | \n", "dixon | \n", "2013-01-27 | \n", "Male | \n", "1982-01-22 | \n", "34 | \n", "25 - 45 | \n", "African-American | \n", "0 | \n", "... | \n", "1 | \n", "Low | \n", "2013-01-27 | \n", "2013-01-26 | \n", "2013-02-05 | \n", "0 | \n", "9 | \n", "159 | \n", "1 | \n", "1 | \n", "
4 | \n", "ed philo | \n", "ed | \n", "philo | \n", "2013-04-14 | \n", "Male | \n", "1991-05-14 | \n", "24 | \n", "Less than 25 | \n", "African-American | \n", "0 | \n", "... | \n", "3 | \n", "Low | \n", "2013-04-14 | \n", "2013-06-16 | \n", "2013-06-16 | \n", "4 | \n", "0 | \n", "63 | \n", "0 | \n", "1 | \n", "
5 | \n", "marcu brown | \n", "marcu | \n", "brown | \n", "2013-01-13 | \n", "Male | \n", "1993-01-21 | \n", "23 | \n", "Less than 25 | \n", "African-American | \n", "0 | \n", "... | \n", "6 | \n", "Medium | \n", "2013-01-13 | \n", "NaN | \n", "NaN | \n", "1 | \n", "0 | \n", "1174 | \n", "0 | \n", "0 | \n", "
6 | \n", "bouthy pierrelouis | \n", "bouthy | \n", "pierrelouis | \n", "2013-03-26 | \n", "Male | \n", "1973-01-22 | \n", "43 | \n", "25 - 45 | \n", "Other | \n", "0 | \n", "... | \n", "1 | \n", "Low | \n", "2013-03-26 | \n", "NaN | \n", "NaN | \n", "2 | \n", "0 | \n", "1102 | \n", "0 | \n", "0 | \n", "
5 rows × 52 columns
\n", "\n", " | name | \n", "first | \n", "last | \n", "compas_screening_date | \n", "sex | \n", "dob | \n", "age | \n", "age_cat | \n", "race | \n", "juv_fel_count | \n", "... | \n", "v_decile_score | \n", "v_score_text | \n", "v_screening_date | \n", "in_custody | \n", "out_custody | \n", "priors_count.1 | \n", "start | \n", "end | \n", "event | \n", "two_year_recid | \n", "
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
id | \n", "\n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " |
10996 | \n", "steven butler | \n", "steven | \n", "butler | \n", "2013-11-23 | \n", "Male | \n", "1992-07-17 | \n", "23 | \n", "Less than 25 | \n", "African-American | \n", "0 | \n", "... | \n", "5 | \n", "Medium | \n", "2013-11-23 | \n", "2013-11-22 | \n", "2013-11-24 | \n", "0 | \n", "1 | \n", "860 | \n", "0 | \n", "0 | \n", "
10997 | \n", "malcolm simmons | \n", "malcolm | \n", "simmons | \n", "2014-02-01 | \n", "Male | \n", "1993-03-25 | \n", "23 | \n", "Less than 25 | \n", "African-American | \n", "0 | \n", "... | \n", "5 | \n", "Medium | \n", "2014-02-01 | \n", "2014-01-31 | \n", "2014-02-02 | \n", "0 | \n", "1 | \n", "790 | \n", "0 | \n", "0 | \n", "
10999 | \n", "winston gregory | \n", "winston | \n", "gregory | \n", "2014-01-14 | \n", "Male | \n", "1958-10-01 | \n", "57 | \n", "Greater than 45 | \n", "Other | \n", "0 | \n", "... | \n", "1 | \n", "Low | \n", "2014-01-14 | \n", "2014-01-13 | \n", "2014-01-14 | \n", "0 | \n", "0 | \n", "808 | \n", "0 | \n", "0 | \n", "
11000 | \n", "farrah jean | \n", "farrah | \n", "jean | \n", "2014-03-09 | \n", "Female | \n", "1982-11-17 | \n", "33 | \n", "25 - 45 | \n", "African-American | \n", "0 | \n", "... | \n", "2 | \n", "Low | \n", "2014-03-09 | \n", "2014-03-08 | \n", "2014-03-09 | \n", "3 | \n", "0 | \n", "754 | \n", "0 | \n", "0 | \n", "
11001 | \n", "florencia sanmartin | \n", "florencia | \n", "sanmartin | \n", "2014-06-30 | \n", "Female | \n", "1992-12-18 | \n", "23 | \n", "Less than 25 | \n", "Hispanic | \n", "0 | \n", "... | \n", "4 | \n", "Low | \n", "2014-06-30 | \n", "2015-03-15 | \n", "2015-03-15 | \n", "2 | \n", "0 | \n", "258 | \n", "0 | \n", "1 | \n", "
5 rows × 52 columns
\n", "