homepath = '/home/gswarrin/research/gerrymander/' ################################################## # imports import pystan import pandas as pd import numpy as np import matplotlib.pyplot as plt import matplotlib # import seaborn.apionly as sns; sns.set_context('notebook') from matplotlib.colors import LinearSegmentedColormap import scipy.stats as stats from pandas.tools.plotting import scatter_matrix ################################################## import util # for making histograms and scatter plots import metrics as mt # functions for computing metrics (e.g., declination, EG) # read_data requirest the classes module import read_data as rd # functions for reading in all data import comparison as cmp import comparison_pics as cmppics # This should only be needed if reimputing (change to import) # load(homepath + 'model.py') # statistical model for imputing values # Index of state in this list corresponds to the convention many of our data files use. # '00' is added at he beginning to make this correspondence correct. stlist = ['00','AL','AK','AZ','AR','CA','CO','CT','DE','FL','GA','HI','ID','IL','IN','IA','KS','KY',\ 'LA','ME','MD','MA','MI','MN','MS','MO','MT','NE','NV','NH','NJ','NM','NY','NC','ND','OH',\ 'OK','OR','PA','RI','SC','SD','TN','TX','UT','VT','VA','WA','WV','WI','WY'] GLOBAL_MIN_YEAR = 1971 ############################# # Read in saved election data ############################# ryrs,rstates,rcycstates,relecs = rd.read_elections('elec-data-dec30.csv') ##################################################### # for working with pandas approach to comparison data # everything should start here - 05/23/18 ##################################################### N = 7 # minimum size of district we're interested in # read in data and compute metrics df = cmp.make_df(relecs) df = cmp.populate_metrics(df,cmp.metric_dict,relecs) alle_df = df[df['N'] >= N] # all elections with at least minimum number of districts nosw_df = alle_df[alle_df['nosw']] # only elections that aren't sweeps comp_df = alle_df[alle_df['comp']] # competitive elections with Dem support between 45% and 55% # get correlation matrices among various statistics corr_nosw_df = nosw_df[cmp.metric_dict.keys()].corr() corr_comp_df = comp_df[cmp.metric_dict.keys()].corr() # TODO: Add in rescaled valuations as columns? alle_df = cmp.rescale_metrics(alle_df,cmp.metric_dict) nosw_df = cmp.rescale_metrics(nosw_df,cmp.metric_dict) comp_df = cmp.rescale_metrics(comp_df,cmp.metric_dict) #################################################################### # generate tables and pictures for comparison paper #################################################################### ###### Illustration of how we plot elections dvotes = [0.35,0.40,0.45,0.6,0.8] cmppics.comp_std_example('risk-defex',dvotes) ###### Definition of declination # TODO: Pick a different election to show not using median districts? cmppics.comp_dec_def_create('risk-dec-def','2014_NC_11',relecs) ###### Table of means and std deviations for measures cmppics.distribution_table(nosw_df,cmp.metric_dict) ###### Overlay rescaled distributions (kde plots) cmppics.distribution_grid('dist_grid',nosw_df) ###### Scatter matrix of correlations cmppics.pic_scatter_matrix('comp-scatter-matrix',nosw_df) ###### make tables of worst evaluations # print "competitive ones; worst is first" w55df = cmppics.table_worst_elecs(comp_df,rn=True,prout=False) # # print "all ; worst is first" walldf = cmppics.table_worst_elecs(alle_df,rn=True,prout=False) result = pd.concat([w55df,walldf],ignore_index=True) result = result[['EG','DG','LG','SG','VC1','VC2','Dec','BDec','MM','Bias','Lop','EVW']] # print "Combined table" print result.to_latex() ####### worst elections for each measure (as grid of pictures) # Not very efficient to recompute, but doesn't seem worth fixing # Not currently used because leads to too much redundancy in pictures # w55df = cmppics.table_worst_elecs(comp_df,rn=False) # walldf = cmppics.table_worst_elecs(alle_df,rn=False) # cmppics.make_all_worst_triples('worst-trips',w55df.loc[:2,:],walldf.loc[:2,:],metric_dict.keys(),relecs) ####### worst elections for each measure # these are hard-coded to minimize redundancy in figures in paper - need to check manually against tables. cmppics.make_extremal_pics(alle_df,relecs,cmp.metric_dict) ####### hypothetical elections (as grid of pictures) cmppics.comp_make_hypo_grid('hypo_grid',cmppics.hypo_elecs) ####### make table of evaluations on hypothetical elections cmppics.table_hypo(nosw_df,cmppics.hypo_elecs,cmp.metric_dict) ####### make pictures of disagreements arr = cmp.make_pairwise(comp_df,['EG','BDec','MM','Bias']) arr = cmp.make_pairwise(alle_df,['EG','BDec','MM','Bias']) pair_tdf = cmppics.table_worst_elecs(comp_df,arr,rn=False) pair_udf = cmppics.table_worst_elecs(alle_df,arr,rn=False) cmppics.make_worst_disagreements('worst-pair-diffs',alle_df,pair_tdf.loc[:2,:],pair_udf.loc[:2,:],arr,relecs,cmp.metric_dict) ####### find_least_consensus(nosw_df) cmppics.make_worst_rms('disagree',alle_df,comp_df,relecs,cmp.metric_dict,r=2,c=4) ####### pull out elections at various intervals - only comp used in paper (methinks) # find_0123('nosw-samples',nosw_df,relecs,cmp.metric_dict,['EG','BDec','MM','Bias']) cmppics.find_0123('comp-samples',comp_df,relecs,cmp.metric_dict,['EG','BDec','MM','Bias']) ####### example talking about the efficiency principle cmppics.make_ep_ex('EP-ex',nosw_df,relecs,cmp.metric_dict) #################################################################### # generate data referenced in comparison paper #################################################################### ####### summarize number of elections of various types cmppics.summarize_numbers(df,comp_df,nosw_df,alle_df) ####### Average of election in Figure 1. print "average of E_0 election:", np.mean(dvotes) ####### Metric avlues on E_0 election cmppics.summarize_e0_metrics(dvotes,cmp.metric_dict) ####### Various correlations referenced in the paper cmppics.summarize_correlations(nosw_df,comp_df) ####### Investigation of lopsided-mean cmppics.summarize_lopsided(relecs)