import comparison as cmp import metrics as mt import numpy as np import matplotlib.pyplot as plt import matplotlib import pandas as pd import seaborn as sns # this is in pandas.plotting for pandas >= 0.20 I believe. from pandas.tools.plotting import scatter_matrix myr = '#ff8080' myb = '#8080ff' scalef = 1 mydpi = 100 # for plotting the kernel densities distn_a = ['EG','LG','DG','SG'] distn_b = ['Dec','BDec','MM','Lop'] distn_c = ['VC1','VC2','Bias'] homepath = '/home/gswarrin/research/gerrymander/' #################################################### # figure showing how elections are plotted #################################################### # Fig. 1 def comp_std_example(fnstr,dvotes): """ """ fig = plt.figure(figsize=(scalef*8,scalef*4),dpi=mydpi) fig.patch.set_visible(True) fig.patch.set_facecolor('white') ax1 = fig.gca() ax1.spines['top'].set_visible(False) ax1.spines['right'].set_visible(False) ax1.yaxis.set_ticks_position('left') comp_plot_one_declination(ax1,dvotes,'',plotslopes=False,plotdec=False,xaxislab=True,yaxislab=True) plt.tight_layout() # changed # output_fig(fig,fnstr) plt.savefig('/home/gswarrin/research/gerrymander/pics/' + fnstr, dpi=mydpi) plt.close() # with open('/home/gswarrin/research/gerrymander/pics/' + fnstr + '.png', 'w') as outfile: # fig.canvas.print_png(outfile) # # plt.close() def comp_plot_one_declination(axe,arr,title,plotslopes=True,lrg=True,plotdec=True,xaxislab=True,yaxislab=True,\ plotfullreg=False,ylab='Dem. vote'): """ plot a single declination picture """ axe.set_axis_bgcolor('none') axe.xaxis.set_ticks_position('bottom') axe.yaxis.set_ticks_position('left') vals = sorted(arr) N = len(vals) m = len(filter(lambda x: x < 0.5, vals)) n = N-m # plot actual vote fractions x1 = [j*1.0/N-1.0/(2*N) for j in range(1,m+1)] x2 = [m*1.0/N + j*1.0/N-1.0/(2*N) for j in range(1,n+1)] y1 = vals[:m] y2 = vals[m:] # plot mid line axe.axhline(0.5, color = 'black', linestyle = 'dotted') if lrg: lrg_sz = 60 lrg_marksz = 5 lrg_lw = 3 fs = 14 for item in ([axe.title, axe.xaxis.label, axe.yaxis.label] +\ axe.get_xticklabels() + axe.get_yticklabels()): item.set_fontsize(fs) else: lrg_sz = 420 lrg_marksz = 20 lrg_lw = 9 fs = 50 for item in ([axe.title, axe.xaxis.label, axe.yaxis.label] +\ axe.get_xticklabels() + axe.get_yticklabels()): item.set_fontsize(fs) axe.spines['top'].set_visible(False) axe.spines['right'].set_visible(False) # plot values of metrics if plotdec: fa = mt.get_declination('',vals) # *math.log(len(vals))/2 eg = mt.get_tau_gap(vals,0) tstr = "D vote = " + ("% .2f" % (np.mean(vals))) if abs(fa) >= 2: tmpstr = '$\\delta = N/A$' tmpstr2 = 'Seats = N/A' tmpstr3 = 'EG = ' + ("% .2f" % (eg)) else: if fa >= 0: tmpstr = '$\ \ \ \ {\\delta} = ' + ("% .2f$" % (fa)) tmpstr2 = 'Seats = ' + ("% .1f" % (fa*5.0*len(arr)/12)) tmpstr3 = 'EG = ' + ("% .2f" % (eg)) else: tmpstr = '$\\delta = ' + ("% .2f$" % (fa)) tmpstr2 = 'Seats = ' + ("% .1f" % (fa*5.0*len(arr)/12)) tmpstr3 = 'EG = ' + ("% .2f" % (eg)) # axe.annotate(tmpstr, (0.02,0.84)) if lrg: # axe.annotate(tstr, (0.65,0.14), fontsize=fs) axe.annotate(tmpstr, (0.6,0.10), fontsize=fs) # axe.annotate(tmpstr2, (0.65,0.06), fontsize=fs) # axe.annotate(tmpstr3, (0.65,0.02), fontsize=fs) else: # axe.annotate(tstr, (0.5,0.14), fontsize=fs) axe.annotate(tmpstr, (0.5,0.10), fontsize=fs) # axe.annotate(tmpstr2, (0.5,0.06), fontsize=fs) # axe.annotate(tmpstr3, (0.5,0.02), fontsize=fs) axe.get_xaxis().set_ticks([]) axe.set_ylim(0,1) axe.set_xlim(0,1) axe.set_title(title,fontsize=fs) # elec.state + ' ' + elec.yr) if yaxislab: axe.set_ylabel(ylab) if xaxislab: axe.set_xlabel('District') if m > 0 and n > 0 and plotslopes: # plot angled lines ybar = np.mean(vals[:m]) zbar = np.mean(vals[m:]) med_marksz = lrg_marksz*2.0/3 axe.plot([m*1.0/(2*N),m*1.0/N], [ybar,0.5], 'k-', linewidth=lrg_lw) axe.plot([m*1.0/N,m*1.0/N+n*1.0/(2*N)], [0.5,zbar], 'k-', linewidth=lrg_lw) axe.plot([m*1.0/(2*N),m*1.0/N,m*1.0/N+n*1.0/(2*N)],[ybar,0.5,zbar],'ko',markersize=med_marksz) if plotfullreg: l = stats.linregress(x1+x2,vals) axe.plot([0,1],[l[1],l[1]+l[0]],'k-',linewidth=1) print " Full regression line: slope %.2f inter %.2f r %.3f p %.4f " % \ (l[0],l[1],l[2],l[3]) axe.scatter(x1,y1,color = myr,s=lrg_sz) axe.scatter(x2,y2,color = myb,s=lrg_sz) axe.set_axis_bgcolor('none') ######################################### # definition of declination ######################################### # Fig 2. def comp_dec_def_plot_angle(k,axes,fig,elections,seatsbool=False): """ copied from plot_angle """ axes.get_xaxis().set_ticks([]) axes.set_xlabel('District') elec = elections[k] vals = sorted(elec.demfrac) N = len(vals) m = len(filter(lambda x: x < 0.5, vals)) n = N-m ybar = np.mean(vals[:m]) zbar = np.mean(vals[m+1:]) for item in ([axes.title, axes.xaxis.label, axes.yaxis.label] +\ axes.get_xticklabels() + axes.get_yticklabels()): item.set_fontsize(14) # plot actual vote fractions x1 = [i*1.0/N-1.0/(2*N) for i in range(1,m+1)] x2 = [m*1.0/N + i*1.0/N-1.0/(2*N) for i in range(1,n+1)] y1 = vals[:m] y2 = vals[m:] axes.scatter(x1,y1,color = myr,s=60) axes.scatter(x2,y2,color = myb,s=60) # plot mid line axes.plot([0,1], [0.5,0.5], color = 'black') # plot angled lines axes.plot([m*1.0/(2*N),m*1.0/N], [ybar,0.5], 'k-') axes.plot([m*1.0/N,m*1.0/N+n*1.0/(2*N)], [0.5,zbar], 'k-') # ax = gca() ptF = [m*1.0/(2*N),ybar] ptG = [m*1.0/N,0.5] ptH = [m*1.0/N + n*1.0/(2*N),zbar] ptT = [0,0.5] ptU = [1,0.5] axes.set_ylabel('Democratic vote') if not seatsbool: # plt # josh # axes.annotate('$\\theta_P$',(0.85,0.57),fontsize=16) # axes.add_patch(matplotlib.patches.Arc(ptG, .4, .4, 0, 180, 196.5, color='green',lw=3)) # plt # josh # axes.annotate('$\\theta_Q$',(0.52,0.46),fontsize=16) # axes.add_patch(matplotlib.patches.Arc(ptG, .2, .2, 0, 0, 65, color='green',lw=3)) axes.annotate('T',(0,0.45),fontsize=16) axes.annotate('U',(1,0.45),fontsize=16) axes.plot([ptT[0],ptU[0]],[ptT[1],ptU[1]],'ko',markersize=5) axes.plot([ptG[0],1],[ptG[1],ptG[1]+(1-ptG[0])*(0.5-ybar)/(ptG[0]-ptF[0])],'k-.') axes.add_patch(matplotlib.patches.Arc(ptG, .4, .4, 0, 16.5, 65, color='green',lw=3)) # axes.annotate('$\\delta\\pi/2$',(0.94,0.63),fontsize=16) # josh axes.annotate('Declination',(0.94,0.63),fontsize=12) axes.annotate('F',(0.38,0.33),fontsize=16) axes.annotate('G',(0.77,0.45),fontsize=16) axes.annotate('H',(0.9,0.71),fontsize=16) axes.plot([m*1.0/(2*N),m*1.0/N+n*1.0/(2*N)],[ybar,zbar],'ko',markersize=5) axes.plot([ptG[0]],[ptG[1]],'ko',markersize=5) axes.axis([-0.1,1.1,0.25,0.8]) def comp_dec_def_create(fnstr,elecstr,elections,seatsbool=False): """ """ fig = plt.figure(figsize=(scalef*8,scalef*4),dpi=mydpi) fig.patch.set_visible(True) fig.patch.set_facecolor('white') ax1 = fig.gca() ax1.spines['top'].set_visible(False) ax1.spines['right'].set_visible(False) ax1.yaxis.set_ticks_position('left') comp_dec_def_plot_angle(elecstr,ax1,fig,elections,seatsbool) plt.tight_layout(w_pad=1,h_pad=1) # changed # output_fig(fig,fnstr) plt.savefig('/home/gswarrin/research/gerrymander/pics/' + fnstr, dpi=mydpi) plt.close() ############################################################# # list out mean and std of measures ############################################################# def distribution_table(df,mdict,prtable=True): """ make a table from data for each measure """ ndf = pd.DataFrame(index=['mean','std dev']) for x in mdict.keys(): ndf.loc['mean',x] = df[x].mean() ndf.loc['std dev',x] = df[x].std() print "% Tab A (goes into Slot A)" ndf = ndf[['EG','DG','LG','SG','VC1','VC2','Dec','BDec','MM','Bias','Lop','EVW']] print ndf.to_latex(float_format="%.2f") return ndf ############################################################# # make kernel density plots of various rescaled distributions ############################################################# def distribution_grid(fnstr,df,r=1,c=3): """ make a grid of hypothetical races """ fig, axes = plt.subplots(r,c, figsize=(6*c,4*r), dpi=mydpi, sharex=True, sharey=True) axes = axes.ravel() tdf = df[[x + cmp.rescale_suffix for x in distn_a + distn_b + distn_c]] tdf.columns = tdf.columns.str.replace(cmp.rescale_suffix,'') xvals = np.linspace(-2,2,100) linestyles = ['-', '--', '-.', ':','-', '--'] for i,x in enumerate(distn_a): pl = sns.kdeplot(df[tdf[x].notnull()][x],ax=axes[0],linestyle=linestyles[i]) # pl = sns.kdeplot(tdf[(tdf['BDec'].notnull()) & (tdf['Dec'].notnull())][x],ax=axes[0],linestyle=linestyles[i]) axes[0].set_xlabel('Standard deviations') axes[0].set_ylabel('Frequency') pl.plot(xvals,matplotlib.mlab.normpdf(xvals,0,1),linewidth=4,color='gray') # ,ax=axes[0]) for i,x in enumerate(distn_b): pl = sns.kdeplot(tdf[tdf[x].notnull()][x],ax=axes[1],linestyle=linestyles[i]) # pl = sns.kdeplot(tdf[(tdf['BDec'].notnull()) & (tdf['Dec'].notnull())][x],ax=axes[1],linestyle=linestyles[i]) axes[1].set_xlabel('Standard deviations') pl.plot(xvals,matplotlib.mlab.normpdf(xvals,0,1),linewidth=4,color='gray') # ,ax=axes[0]) # plt.plot(xvals,matplotlib.mlab.normpdf(xvals,0,1)) # ,axes=axes[1]) for i,x in enumerate(distn_c): pl = sns.kdeplot(tdf[tdf[x].notnull()][x],ax=axes[2],linestyle=linestyles[i]) # pl = sns.kdeplot(tdf[(tdf['BDec'].notnull()) & (tdf['Dec'].notnull())][x],ax=axes[2],linestyle=linestyles[i]) axes[2].set_xlabel('Standard deviations') pl.plot(xvals,matplotlib.mlab.normpdf(xvals,0,1),linewidth=4,color='gray') # ,ax=axes[0]) # plt.plot(xvals,matplotlib.mlab.normpdf(xvals,0,1)) # ,axes=axes[2]) # all were sns.plt plt.xlim((-2,2)) plt.tight_layout() plt.savefig(homepath + 'pics/' + fnstr) plt.close() ############################################################ # big scatter matrix ############################################################ def pic_scatter_matrix(fnstr,df,arr=['EG','VC2','BDec','Bias','MM','Lop','DG']): """ make scatter matrix picture of various metrics """ plt.close() sm = scatter_matrix(df[arr], alpha=0.2, figsize=(18,18), diagonal='kde') #Hide all ticks [s.set_xticks(()) for s in sm.reshape(-1)] [s.set_yticks(()) for s in sm.reshape(-1)] [s.xaxis.label.set_size(20) for s in sm.reshape(-1)] [s.yaxis.label.set_size(20) for s in sm.reshape(-1)] # make y-axis label horizontal # [s.yaxis.label.set_rotation(0) for s in sm.reshape(-1)] plt.savefig('/home/gswarrin/research/gerrymander/pics/' + fnstr) plt.close() ####################################################### # make a table with 10 worst elections for each measure ####################################################### def rename_elecs(x): """ rename elections to nice names for display """ # '9' means state election if x[-1] == '9': return x[2:4] + ' ' + x[5:7] + '*' else: return x[2:4] + ' ' + x[5:7] + ' ' def table_worst_elecs(df,arr=cmp.metric_dict,rn=True,mynum=10,prout=False): """ generate a table of the worst elections as judged by each metric """ worstdf = pd.DataFrame() for k in arr: # get rows we want if k in ['Lop','EVW']: tmpf = df.loc[df[k + '-sig']] else: tmpf = df.loc[df[k + '-abs'].notnull()] ser = tmpf[['elecs',k + '-abs']].sort_values(k + '-abs',ascending=False).reset_index(drop=True) ntmpdf = ser.head(mynum) # rename the 'elecs' column heading since we want to keep track of which metric # has these elections is its most extreme examples. ntmpdf.columns = [k,k + '-abs'] # To get rid of pandas opy warning ntmpdf = ntmpdf.copy() if rn: ntmpdf.loc[:,k] = ntmpdf[k].apply(rename_elecs) if len(worstdf.columns) == 0: worstdf = pd.DataFrame(index=ntmpdf.index) worstdf = pd.merge(worstdf,ntmpdf[[k]],left_index=True,right_index=True) if prout: print worstdf.to_latex() return worstdf ######################################################## # make grid consisting of particular races ######################################################## def make_worst_triple(fnstr,keys1,keys2,elecs,r=2,c=3): """ make a grid of hypothetical races """ fig, axes = plt.subplots(r,c, figsize=(10*c,5*r), dpi=mydpi, sharex=True, sharey=True) axes = axes.ravel() for i,k in enumerate(keys1 + keys2): vals = elecs[k].demfrac xlab = (i > (r-1)*c) ylab = (i%c==0) # make the big plot # print "vals: ",vals comp_newfig_plot_extremes(axes[i],vals,k,\ plotslopes=False,lrg=False,\ plotdec=False,xaxislab=xlab,yaxislab=ylab,plotfullreg=False) xpos = (i%c)*0.8/c+2.0/(3*c)+0.08 ypos = (r-1-i/c)*0.835/r+1*1.0/(5*r)+0.07 for i in range(len(hypo_elecs),r*c): axes[i].set_axis_off() fig.subplots_adjust(wspace=0.2, hspace=0.2) # plt.tight_layout() plt.savefig('/home/gswarrin/research/gerrymander/pics/' + fnstr, dpi=mydpi) plt.close() def make_all_worst_triples(fnstr,dfb55,dfall,measures,elecs,r=1,c=3): """ """ # print dfb55 # print dfall for m in measures: # print "measure",m,list(df[m]) make_worst_triple(fnstr + '-' + m, list(dfb55[m]), list(dfall[m]), elecs) ################################################ # hypothetical elections (as grid of pictures) ################################################ def comp_newfig_plot_extremes(axe,arr,title,plotslopes=True,lrg=True,plotdec=True,xaxislab=True,yaxislab=True, plotfullreg=False,ylab='Dem. vote'): """ Make grid of angles for paper """ axe.set_axis_bgcolor('none') axe.xaxis.set_ticks_position('bottom') axe.yaxis.set_ticks_position('left') vals = sorted(arr) N = len(vals) m = len(filter(lambda x: x <= 0.5, vals)) n = N-m # plot actual vote fractions x1 = [j*1.0/N-1.0/(2*N) for j in range(1,m+1)] x2 = [m*1.0/N + j*1.0/N-1.0/(2*N) for j in range(1,n+1)] y1 = vals[:m] y2 = vals[m:] # plot mid line axe.axhline(0.5, color = 'black') #, linestyle = 'dotted') axe.axhline(np.mean(vals), color = 'black', linestyle = 'dotted') if lrg: lrg_sz = 60 lrg_marksz = 5 lrg_lw = 3 fs = 10 for item in ([axe.title, axe.xaxis.label, axe.yaxis.label] +\ axe.get_xticklabels() + axe.get_yticklabels()): item.set_fontsize(fs) else: lrg_sz = 420 lrg_marksz = 20 lrg_lw = 9 fs = 30 for item in ([axe.title, axe.xaxis.label, axe.yaxis.label] +\ axe.get_xticklabels() + axe.get_yticklabels()): item.set_fontsize(fs) axe.spines['top'].set_visible(False) axe.spines['right'].set_visible(False) # plot values of metrics # if plotdec: # for i,val in enumerate(oth_vals): # tmpstr = "%s = %.2f" % (hdrs[i],val) # if lrg: # axe.annotate(tmpstr, (0.5,0.10+ 0.05*i), fontsize=fs) # else: # axe.annotate(tmpstr, (0.5,0.10 + 0.05*i), fontsize=fs) axe.get_xaxis().set_ticks([]) axe.set_ylim(0,1) axe.set_xlim(0,1) if '_11' in title: title = title[:4] + ' ' + title[5:7] + " Cong." if '_9' in title: title = title[:4] + ' ' + title[5:7] + " Leg." axe.set_title(title,fontsize=fs) # elec.state + ' ' + elec.yr) if yaxislab: axe.set_ylabel(ylab) if xaxislab: axe.set_xlabel('District') if m > 0 and n > 0 and plotslopes: # plot angled lines ybar = np.mean(vals[:m]) zbar = np.mean(vals[m:]) med_marksz = lrg_marksz*2.0/3 axe.plot([m*1.0/(2*N),m*1.0/N], [ybar,0.5], 'k-', linewidth=lrg_lw) axe.plot([m*1.0/N,m*1.0/N+n*1.0/(2*N)], [0.5,zbar], 'k-', linewidth=lrg_lw) axe.plot([m*1.0/(2*N),m*1.0/N,m*1.0/N+n*1.0/(2*N)],[ybar,0.5,zbar],'ko',markersize=med_marksz) if plotfullreg: l = stats.linregress(x1+x2,vals) axe.plot([0,1],[l[1],l[1]+l[0]],'k-',linewidth=1) print " Full regression line: slope %.2f inter %.2f r %.3f p %.4f " % \ (l[0],l[1],l[2],l[3]) axe.scatter(x1,y1,color = myr,s=lrg_sz) axe.scatter(x2,y2,color = myb,s=lrg_sz) axe.set_axis_bgcolor('none') # make grid consisting of particular races def comp_make_hypo_grid(fnstr,hypo_elecs,r=3,c=4): """ make a grid of hypothetical races """ fig, axes = plt.subplots(r,c, figsize=(8*c,6*r), dpi=mydpi, sharex=True, sharey=True) axes = axes.ravel() for i in range(len(hypo_elec_list)): # enumerate(hypo_elecs): k = hypo_elec_list[i] vals = hypo_elecs[k] xlab = (i >= (r-1)*c) ylab = (i%c==0) # make the big plot comp_newfig_plot_extremes(axes[i],vals,k,\ plotslopes=False,lrg=False,\ plotdec=False,xaxislab=xlab,yaxislab=ylab,plotfullreg=False) xpos = (i%c)*0.8/c+2.0/(3*c)+0.08 ypos = (r-1-i/c)*0.835/r+1*1.0/(5*r)+0.07 axes[0].text(0.05,.94,'A',fontsize=36,transform=fig.transFigure,fontweight='bold') axes[1].text(0.29,.94,'B',fontsize=36,transform=fig.transFigure,fontweight='bold') axes[2].text(0.53,.94,'C',fontsize=36,transform=fig.transFigure,fontweight='bold') axes[3].text(0.77,.94,'D',fontsize=36,transform=fig.transFigure,fontweight='bold') axes[4].text(0.05,.61,'E',fontsize=36,transform=fig.transFigure,fontweight='bold') axes[5].text(0.29,.61,'F',fontsize=36,transform=fig.transFigure,fontweight='bold') axes[6].text(0.53,.61,'G',fontsize=36,transform=fig.transFigure,fontweight='bold') axes[7].text(0.77,.61,'H',fontsize=36,transform=fig.transFigure,fontweight='bold') axes[8].text(0.05,.29,'I',fontsize=36,transform=fig.transFigure,fontweight='bold') axes[9].text(0.29,.29,'J',fontsize=36,transform=fig.transFigure,fontweight='bold') axes[10].text(0.53,.29,'K',fontsize=36,transform=fig.transFigure,fontweight='bold') axes[11].text(0.77,.29,'L',fontsize=36,transform=fig.transFigure,fontweight='bold') for i in range(len(hypo_elecs),r*c): axes[i].set_axis_off() fig.subplots_adjust(wspace=0.2, hspace=0.2) plt.tight_layout() plt.savefig('/home/gswarrin/research/gerrymander/pics/' + fnstr, dpi=mydpi) plt.close() ################################################ # table of evaluations for hypothetical elections ################################################ def make_hypo_df(helecs,mdict,sigmas): """ make a pandas dataframe from hypothetical elections populate with basic data about the election """ df = pd.DataFrame(hypo_elec_list) # hypo???elecs.keys() df.columns = ['elecs'] df['N'] = df.elecs.apply(lambda x: len(helecs[x])) df['Mean'] = df.elecs.apply(lambda x: np.mean(helecs[x])) # print df for fkey in mdict.keys(): df[fkey] = df.elecs.apply(lambda x: mdict[fkey](helecs[x])/sigmas[fkey]) return df def table_hypo(df,helecs,mlist): """ """ metric_sigmas = cmp.get_sigma(df,mlist) hypodf = make_hypo_df(helecs,mlist,metric_sigmas) hypodf = hypodf[['N','Mean','EG','DG','LG','VC1','VC2','Dec','BDec','MM','Bias','Lop']] hypodf.Lop = hypodf.Lop.apply(lambda x: x[0]) hypodf = hypodf.applymap(lambda x: '%.2f' % x) print hypodf.to_latex(float_format="%.2f") ##################################################################################################### ##################################################################################################### ##################################################################################################### ####################################################################################################3 # data for hypothetical elections # looks like a classic gerrymander # - SG will think it's dandy (it's not) hypo_NC = [0.35,0.37,0.39,0.41,0.43,0.45,0.47,0.71,0.71,0.71] # classic gerrymander by a minority party # - LG will think it's dandy (it's not) hypo_wd = [x - .20 for x in hypo_NC] # sweep # - looks kind of like Massachusetts # - Dec is undefined hypo_MA = [0.55,0.57,0.59,0.61,0.63,0.65,0.67,0.69,0.71,0.73] # uncompetitive # - bipartisan gerrymander - 4 + 6 seats # - MM will hate it hypo_un = [0.25,0.28,0.31,0.34,0.60,0.63,0.66,0.69,0.72,0.75] hypo_wide = [0.1,0.13,0.16,0.19,0.70,0.73,0.76,0.79,0.82,0.85] # uncompetitive but shifted closer to Republicans (so Dem gerrymander) hypo_sh = [x - 0.08 for x in hypo_un] # very competitive in some races # - Bias, EG will hate this one # - similar to 1994 WA 11 hypo_co = [0.43,0.45,0.47,0.51,0.51,0.51,0.51,0.57,0.61,0.63] # At 50% already, so bias won't shift at all and will indicate the other way hypo_ev = [0.41,0.43,0.45,0.51,0.51,0.51,0.51,0.56,0.60,0.61] # Miscalibrated hypo_mi = [x-0.03 for x in [0.35, 0.4, 0.45, 0.58, 0.60, 0.62, 0.64, 0.77, 0.82, 0.87]] # evenly matched a-proportional distributions prop1 = [0.2 for i in range(5)] + list(np.linspace(0.24,0.76,13)) + [0.8 for i in range(5)] prop2 = np.linspace(0.25,0.75,25) prop3 = np.linspace(0.34,0.66,25) # proportionality hypo_1 = [x + 0.1 for x in prop1] hypo_2 = [x + 0.1 for x in prop2] hypo_3 = [x + 0.1 for x in prop3] hypo_elecs = {'Classic': hypo_NC, 'Inverted': hypo_wd, 'Sweep': hypo_MA, 'Uncompetitive': hypo_un,\ 'Anti-majoritarian': hypo_sh, '1-proportionality': hypo_1, '2-proportionality': hypo_2,\ '3-proportionality': hypo_3,'Competitive': hypo_co,\ 'Competitive even': hypo_ev,'Very uncompetitive': hypo_wide,'Mixed': hypo_mi} # in a particular order # hypo_elec_list = ['one_prop','two_prop','three_prop','comp','sweep','uncomp','shifted','classic','inverted'] hypo_elec_list = ['1-proportionality','2-proportionality','3-proportionality','Sweep',\ 'Competitive','Competitive even','Uncompetitive','Very uncompetitive',\ 'Anti-majoritarian','Classic','Inverted','Mixed'] #####################################################################3 # pictures/tables I'm not sure yet if I'll need in the paper def pic_metric_distributions(fnstr,df,arr=['BDec','EG','LG','DG','SG','VC2','Bias','MM']): """ show various rescaled distributions for comparison """ for x in arr: pl = sns.kdeplot(df[x + cmp.rescale_suffix]) sns.plt.xlim((-2,2)) sns.plt.savefig(homepath + 'pics/' + fnstr) sns.plt.close() def comp_plot_flex(axe,arr,title,meas_arr,plotslopes=True,lrg=True,plotdec=True,xaxislab=True,yaxislab=True, plotfullreg=False,ylab='Dem. vote'): """ plot an individual election, show a list of measure vales in lower right """ axe.set_axis_bgcolor('none') axe.xaxis.set_ticks_position('bottom') axe.yaxis.set_ticks_position('left') vals = sorted(arr) N = len(vals) m = len(filter(lambda x: x <= 0.5, vals)) n = N-m # plot actual vote fractions x1 = [j*1.0/N-1.0/(2*N) for j in range(1,m+1)] x2 = [m*1.0/N + j*1.0/N-1.0/(2*N) for j in range(1,n+1)] y1 = vals[:m] y2 = vals[m:] # plot mid line axe.axhline(0.5, color = 'black') #, linestyle = 'dotted') axe.axhline(np.mean(vals), color = 'black', linestyle = 'dotted') if lrg: lrg_sz = 60 lrg_marksz = 5 lrg_lw = 3 fs = 10 for item in ([axe.title, axe.xaxis.label, axe.yaxis.label] +\ axe.get_xticklabels() + axe.get_yticklabels()): item.set_fontsize(fs) else: lrg_sz = 420 lrg_marksz = 20 lrg_lw = 9 fs = 24 for item in ([axe.title, axe.xaxis.label, axe.yaxis.label] +\ axe.get_xticklabels() + axe.get_yticklabels()): item.set_fontsize(fs) axe.spines['top'].set_visible(False) axe.spines['right'].set_visible(False) # plot values of metrics if plotdec: offsetsx = [0.7,0.7,0.4,0.4,0.1,0.1] offsetsy = [0.05,0.15,0.05,0.15,0.05,0.15] for i,val in enumerate(meas_arr): if meas_arr[i][0] == '': continue tmpstr = "%s = %.2f" % (meas_arr[i][0],meas_arr[i][1]) axe.annotate(tmpstr, (offsetsx[i],offsetsy[i]), fontsize=fs) # if lrg: # axe.annotate(tmpstr, (0.5,0.10+ 0.05*i), fontsize=fs) # else: # axe.annotate(tmpstr, (0.5,0.10 + 0.05*i), fontsize=fs) axe.get_xaxis().set_ticks([]) axe.set_ylim(0,1) axe.set_xlim(0,1) if '_11' in title: title = title[:4] + ' ' + title[5:7] + " Cong." if '_9' in title: title = title[:4] + ' ' + title[5:7] + " Leg." axe.set_title(title,fontsize=fs) # elec.state + ' ' + elec.yr) if yaxislab: axe.set_ylabel(ylab) if xaxislab: axe.set_xlabel('District') if m > 0 and n > 0 and plotslopes: # plot angled lines ybar = np.mean(vals[:m]) zbar = np.mean(vals[m:]) med_marksz = lrg_marksz*2.0/3 axe.plot([m*1.0/(2*N),m*1.0/N], [ybar,0.5], 'k-', linewidth=lrg_lw) axe.plot([m*1.0/N,m*1.0/N+n*1.0/(2*N)], [0.5,zbar], 'k-', linewidth=lrg_lw) axe.plot([m*1.0/(2*N),m*1.0/N,m*1.0/N+n*1.0/(2*N)],[ybar,0.5,zbar],'ko',markersize=med_marksz) if plotfullreg: l = stats.linregress(x1+x2,vals) axe.plot([0,1],[l[1],l[1]+l[0]],'k-',linewidth=1) print " Full regression line: slope %.2f inter %.2f r %.3f p %.4f " % \ (l[0],l[1],l[2],l[3]) axe.scatter(x1,y1,color = myr,s=lrg_sz) axe.scatter(x2,y2,color = myb,s=lrg_sz) axe.set_axis_bgcolor('none') # make grid consisting of particular races def comp_flex_grid(fnstr,title_arr,val_arr,meas_arr,sigmas,mdict,r=4,c=4,plotslopes=False): """ make a grid of hypothetical races val_arr[i] = data for i'th row = [[vals,[stat, stat val]] val_arr[ """ fig, axes = plt.subplots(r,c, figsize=(8*c,6*r), dpi=mydpi, sharex=True, sharey=True) axes = axes.ravel() for i in range(r): vals_row = val_arr[i] meas_row = meas_arr[i] title_row = title_arr[i] plxlab = (i==r-1) for j in range(len(vals_row)): ylab = (j==0) vals = vals_row[j] meas = meas_row[j] titl = title_row[j] plylab = (j==0) # print "hello!" # print vals # print meas # print titl comp_plot_flex(axes[i*c+j],vals,titl,meas,plotslopes=plotslopes,lrg=False,\ plotdec=True,xaxislab=plxlab,yaxislab=plylab,\ plotfullreg=False,ylab='Dem. vote') for j in range(len(vals_row),c): axes[i*c+j].set_axis_off() fig.subplots_adjust(wspace=0.2, hspace=0.2) plt.tight_layout() plt.savefig('/home/gswarrin/research/gerrymander/pics/' + fnstr, dpi=mydpi) plt.close() def make_one_row_input(row,elecs,sigmas,mdict,uselist=False,tdf=None): """ massage info to make palatable """ vals = [] titles = [] meas = [] for i,r in enumerate(row): if uselist: vals.append(tdf[r[0] + '-Dem-frac']) else: vals.append(elecs[r[0]].demfrac) if r[0][-1] == '9': titles.append(r[0][:4] + ' ' + r[0][5:7] + " Leg.") if r[0][-1] == '1': titles.append(r[0][:4] + ' ' + r[0][5:7] + " Cong.") if r[0][-1] not in ['9','1']: titles.append(r[0]) tmp = [] for j in range(1,len(r)): if r[j] == '': tmp.append(['',0]) else: if uselist: ans = mdict[r[j]](tdf[r[0] + '-Dem-frac']) else: ans = mdict[r[j]](elecs[r[0]].demfrac) if r[j] in ['Lop','EVW']: ans = ans[0] tmp.append([r[j],ans/sigmas[r[j]]]) meas.append(tmp) return titles,vals,meas def make_several_rows(rows,elecs,sigmas,mdict,uselist=False,tdf=None): """ """ titles = [] vals = [] meas = [] for x in rows: a,b,c = make_one_row_input(x,elecs,sigmas,mdict,uselist,tdf) titles.append(a) vals.append(b) meas.append(c) return titles,vals,meas ################################################################################ # For making extremal pictures def make_extremal_pics(df,elecs,mdict): sigmas = cmp.get_sigma(df,mdict) # competitive ones for the proportionality measures onerow = [['1978_WA_11','DG','EG','LG','VC1','VC2'],\ ['1994_WA_11','DG','EG','LG','VC1','VC2'],\ ['2012_OH_11','','EG','LG','VC1','VC2'],\ ['2006_MI_11','DG','','','','']] titles,vals,meas = make_one_row_input(onerow,elecs,sigmas,mdict) comp_flex_grid('extremal-comp-DG',[titles],[vals],[meas],sigmas,mdict,r=1,c=4) # uncompetitive ones onerow = [['2006_NY_9','DG'],\ ['2008_NY_9','DG'],\ ['2006_NY_11','DG']] tworow = [['2010_MA_11','LG','VC1',''],\ ['2014_MA_11','LG','VC1','VC2'],\ ['2002_MA_11','LG','VC1','']] trerow = [['2010_MA_11','EG','VC2'],\ ['1978_WA_11','EG','VC2'],\ ['1994_WA_11','EG','']] titles,vals,meas = make_several_rows([onerow,tworow,trerow],elecs,sigmas,mdict) comp_flex_grid('extremal-un-DG',titles,vals,meas,sigmas,mdict,r=3,c=3) # SG onerow = [['2010_FL_9','SG'],['1986_IL_11','SG'],['2010_IL_11','SG']] tworow = [['1972_LA_9','SG'],['2000_MA_11','SG'],['1974_AL_9','SG']] titles,vals,meas = make_several_rows([onerow,tworow],elecs,sigmas,mdict) comp_flex_grid('extremal-both-SG',titles,vals,meas,sigmas,mdict,r=2,c=3) # Dec onerow = [['1978_WA_11','Dec'],['2012_OH_11','Dec'],['2014_OH_11','Dec']] tworow = [['1980_VA_11','Dec'],['1990_MA_11','Dec'],['1976_TX_11','Dec']] titles,vals,meas = make_several_rows([onerow,tworow],elecs,sigmas,mdict) comp_flex_grid('extremal-both-Dec',titles,vals,meas,sigmas,mdict,r=2,c=3) # BDec onerow = [['2012_OH_11','BDec'],['2012_PA_11','BDec'],['2014_NC_11','BDec']] tworow = [['1992_HI_9','BDec'],['1988_MA_11','BDec'],['1990_MA_11','BDec']] titles,vals,meas = make_several_rows([onerow,tworow],elecs,sigmas,mdict) comp_flex_grid('extremal-both-BDec',titles,vals,meas,sigmas,mdict,r=2,c=3) # EVW onerow = [['1980_IL_11','EVW'],['2004_MI_11','EVW'],['1988_KY_11','EVW']] titles,vals,meas = make_several_rows([onerow],elecs,sigmas,mdict) comp_flex_grid('extremal-both-EVW',titles,vals,meas,sigmas,mdict,r=1,c=3) # MM onerow = [['2000_TN_11','MM'],['2006_GA_11','MM'],['2006_TN_11','MM']] tworow = [['1974_AL_11','MM'],['1972_AL_11','MM'],['2000_TN_11','MM']] titles,vals,meas = make_several_rows([onerow,tworow],elecs,sigmas,mdict) comp_flex_grid('extremal-both-MM',titles,vals,meas,sigmas,mdict,r=2,c=3) # Bias onerow = [['1996_WA_11','Bias'],['2014_NC_11','Bias'],['2012_NC_11','Bias']] tworow = [['2012_AL_11','Bias'],['2014_AL_11','Bias'],['2016_SC_11','Bias']] titles,vals,meas = make_several_rows([onerow,tworow],elecs,sigmas,mdict) comp_flex_grid('extremal-both-Bias',titles,vals,meas,sigmas,mdict,r=2,c=3) # Lop onerow = [['2006_MI_11','Lop'],['2012_PA_11','Lop'],['2010_IL_11','Lop']] tworow = [['1972_LA_9','Lop'],['2006_NY_11','Lop'],['2006_NY_9','Lop']] titles,vals,meas = make_several_rows([onerow,tworow],elecs,sigmas,mdict) comp_flex_grid('extremal-both-Lop',titles,vals,meas,sigmas,mdict,r=2,c=3) ################################################################################## # abs2 = table_worst_elecs(alle_df,arr=['MM','BDec','Bias','EG'],rn=False,num=100) ######################################################## # for worst disagreements # - have it generate the data to send to comp_flex_grid ######################################################## def make_worst_disagree_pair(fnstr,m,keys1,keys2,elecs,r=2,c=3): """ make a grid of hypothetical races """ fig, axes = plt.subplots(r,c, figsize=(10*c,5*r), dpi=mydpi, sharex=True, sharey=True) axes = axes.ravel() for i,k in enumerate(keys1 + keys2): vals = elecs[k].demfrac xlab = (i > (r-1)*c) ylab = (i%c==0) # make the big plot # print "vals: ",vals comp_newfig_plot_extremes(axes[i],vals,k,\ plotslopes=False,lrg=False,\ plotdec=False,xaxislab=xlab,yaxislab=ylab,plotfullreg=False) xpos = (i%c)*0.8/c+2.0/(3*c)+0.08 ypos = (r-1-i/c)*0.835/r+1*1.0/(5*r)+0.07 for i in range(len(hypo_elecs),r*c): axes[i].set_axis_off() fig.subplots_adjust(wspace=0.2, hspace=0.2) # plt.tight_layout() plt.savefig('/home/gswarrin/research/gerrymander/pics/' + fnstr, dpi=mydpi) plt.close() def make_worst_disagreements(fnstr,df,dfb55,dfall,measures,elecs,mdict,r=2,c=3): """ """ sigmas = cmp.get_sigma(df,mdict) for m in measures: # print "measure",m,list(dfb55[m]) m1,m2 = m.split('-') onerow = [[x,m1,m2] for x in list(dfb55[m])] tworow = [[x,m1,m2] for x in list(dfall[m])] # print onerow # print tworow titles,vals,meas = make_several_rows([onerow,tworow],elecs,sigmas,mdict) comp_flex_grid(fnstr + '-' + m,titles,vals,meas,sigmas,mdict,r=2,c=3) # make_worst_disagree_pair(fnstr + '-' + m, m, list(dfb55[m]), list(dfall[m]), elecs) def make_worst_rms(fnstr,alledf,df,elecs,mdict,r=2,c=3): """ make picture showing least agreement among four measures """ tdf = cmp.find_least_consensus(df)[['elecs','RMS']] # print tdf sigmas = cmp.get_sigma(alledf,mdict) m1 = 'EG' m2 = 'BDec' m3 = 'MM' m4 = 'Bias' onerow = [[x,m1,m2,m3,m4] for x in list(tdf['elecs'][:4])] tworow = [[x,m1,m2,m3,m4] for x in list(tdf['elecs'][4:8])] # print onerow # print tworow titles,vals,meas = make_several_rows([onerow,tworow],elecs,sigmas,mdict) comp_flex_grid(fnstr + '-RMS',titles,vals,meas,sigmas,mdict,r=2,c=4) # make_worst_disagree_pair(fnstr + '-' + m, m, list(dfb55[m]), list(dfall[m]), elecs) def find_0123(fnstr,df,elecs,mdict,measures): """ add a new column with rms error among given measures """ sigmas = cmp.get_sigma(df,mdict) rows = [] for m in measures: arr = [] # print m mre = m + cmp.rescale_suffix mreabs = mre + cmp.absolute_value_suffix df.loc[:,mreabs] = df.loc[:,mre].apply(abs) # print df[[mre,mreabs]].head() tdf = df[['elecs',mreabs]].sort_values(mreabs) # print tdf.head() for i in range(4): tmp = tdf[tdf[mreabs] >= i].head(1) # print tmp.iloc[0,0] arr.append([tdf[tdf[mreabs] >= i].head(1).iloc[0,0],m]) # print 'asdfasdfasdf' # print tdf[tdf[mreabs] >= 1].head(1) # print tdf[tdf[mreabs] >= 2].head(1) # print tdf[tdf[mreabs] >= 3].head(1) # for i in rows: # print i,tdf.iloc[i] # print arr rows.append(arr) titles,vals,meas = make_several_rows(rows,elecs,sigmas,mdict) comp_flex_grid(fnstr,titles,vals,meas,sigmas,mdict,r=4,c=4) ############################################################################3 # check correlation between BDec and Dec for various thresholds def check_dec_corr(elecs): """ Sees how highly correlated Dec and BDec are for different classes of elections. Splits according to seat fraction of parties. """ for M in [0.01,0.05,0.10,0.15,0.20,0.25,0.30,0.35,0.40]: arr = [] for k in elecs.keys(): vals = elecs[k].demfrac if elecs[k].Ndists >= 7 and \ M <= len(filter(lambda x: x > 0.5, vals))*1.0/len(vals) <= 1-M: arr.append([get_dec(vals),mt.get_bdec(vals)]) a1,p1 = stats.pearsonr([x[0] for x in arr],[x[1] for x in arr]) print "M:%.2f corr=%.2f" % (M,a1) ############################################################################3 # check correlation between BDec and Dec for various thresholds def check_dec_ep(elecs): """ """ cnt = 0 arr = [] for i,k1 in enumerate(elecs.keys()): e1 = elecs[k1] vals1 = e1.demfrac dec1 = get_dec(vals1) seats1 = len(filter(lambda x: x > 0.5, vals1)) if e1.Ndists < 7 or seats1 == 0 or seats1 == e1.Ndists: continue frac1 = seats1*1.0/e1.Ndists for j,k2 in enumerate(elecs.keys()): e2 = elecs[k2] numdiff = abs(e1.Ndists - e2.Ndists) if k1 == k2 or e2.Ndists < 7: continue # shift so uniformly the same vote myshift = (np.mean(e1.demfrac)-np.mean(e2.demfrac)) vals2 = [x + myshift for x in e2.demfrac] seats2 = len(filter(lambda x: x > 0.5, vals2)) dec2 = get_dec(vals2) frac2 = seats2*1.0/e2.Ndists if seats2 == 0 or seats2 == e2.Ndists or e1.Ndists != e2.Ndists: continue if (numdiff*1.0/min(seats1,seats2)) > 0.10: continue if 0.25 > min(frac1,frac2) or 0.75 < max(frac1,frac2): continue cnt = cnt + 1 if (dec1 < dec2 and seats1 < seats2) or (dec1 > dec2 and frac1 > frac2): # print myshift print abs(frac1-frac2),abs(dec1-dec2),k1,k2 # print k1,dec1,seats1,frac1 # print k2,dec2,seats2,frac2 # print arr.append([abs(frac1-frac2), abs(dec1-dec2)]) # see if Dec(A) # print len(arr),max([x[1] for x in arr]) def make_ep_ex(fnstr,noswdf,elecs,mdict): """ make picture showing least agreement among four measures """ sigmas = cmp.get_sigma(noswdf,{'BDec': mt.get_bdec}) # sigmas = {'BDec': 1} onerow = [['1986_NC_11','BDec'],['1994_VA_11','BDec']] titles,vals,meas = make_several_rows([onerow],elecs,sigmas,mdict) shift = np.mean(vals[0][0])-np.mean(vals[0][1]) vals[0][1] = [x + shift for x in vals[0][1]] titles[0][1] = '1994 VA (shifted up by 0.10)' print "tit: ",titles print "vals: ",vals print "meas: ",meas comp_flex_grid(fnstr,titles,vals,meas,sigmas,mdict,r=1,c=2,plotslopes=True) # make_worst_disagree_pair(fnstr + '-' + m, m, list(dfb55[m]), list(dfall[m]), elecs) ############################################################################################### ############################################################################################### ############################################################################################### # data we want to compute ######################################################## # print out some data regarding numbers of elecs ######################################################## def summarize_numbers(df,comp_df,nosw_df,alle_df): """ """ print "Number of elections: ",len(df) print "Number with at least one seat: ",len(df[df['N'] > 0]) print "Number with >= 7 seats: ",len(alle_df) print "Number with >= 7 seats that aren't sweeps: ",len(nosw_df) print "Number with >= 7 seats that are competitive: ",len(comp_df) print "Number with 1 seats: ",len(df[(df['N'] == 1)]) print "Number with 1 seats that are sweeps: ",len(df[(df['N'] == 1) & (~df['nosw'])]) print "Number with 2 seats that are sweeps: ",len(df[(df['N'] == 2) & (~df['nosw'])]) print "Number with 3 seats that are sweeps: ",len(df[(df['N'] == 3) & (~df['nosw'])]) print "Number with 4 seats that are sweeps: ",len(df[(df['N'] == 4) & (~df['nosw'])]) print "Number with 5 seats that are sweeps: ",len(df[(df['N'] == 5) & (~df['nosw'])]) print "Number with 6 seats that are sweeps: ",len(df[(df['N'] == 6) & (~df['nosw'])]) print "2..6 seats that are sweeps: ",len(df[(2 <= df['N']) & (df['N'] <= 6) & (~df['nosw'])]) print "2..6 seats total: ",len(df[(2 <= df['N']) & (df['N'] <= 6)]) print ">= seats that are sweeps: ",len(df[(7 <= df['N']) & (~df['nosw'])]) ######################################################## # values of metrics on sample election E0 ######################################################## def summarize_e0_metrics(arr,mdict): for x in mdict.keys(): tmp = mdict[x](arr) if x == 'Lop': print "Lop Diff: %.2f t:%.2f p:%.2f sig:%s" % (tmp[0],tmp[1],tmp[2],tmp[3]) elif x == 'EVW': print "EVW Diff: %.2f Anti-maj:%s" % (tmp[0],tmp[1]) else: print "%s %.2f" % (x,tmp) ########## list out correlations for some other metrics def summarize_correlations(df,cdf): print df[['Davg','SG']].corr() print df[['BDec','EG','VC2']].corr() print df[['Lop','DG']].corr() print df[['EG','MM','Bias']].corr() print cdf[['Lop','Dec']].corr() def summarize_lopsided(elecs): for yr in range(2006,2016,2): wavg = [] lavg = [] print "--------------------------" print "--------------------------" print "starting yr ",yr print "from our imputations: " print mt.get_t_test(elecs[str(yr) + '_WI_9'].demfrac) arr = [x for x in elecs[str(yr) + '_WI_9'].demfrac] print "our winning margin: ", np.mean(filter(lambda x: x > 0.5, arr)), 1-np.mean(filter(lambda x: x <= 0.5, arr)) print "number of races ",len(arr) for i in range(len(arr)): if elecs[str(yr) + '_WI_9'].status[i] == 1: if arr[i] > 0.5: wavg.append(arr[i]) arr[i] = 0.75 else: lavg.append(1-arr[i]) arr[i] = 0.25 print print yr,"uncontested: ",len(filter(lambda x: x < 2, elecs[str(yr) + '_WI_9'].status)) print "using 75-25: " print "their winning margin: ", np.mean(filter(lambda x: x > 0.5, arr)), 1-np.mean(filter(lambda x: x <= 0.5, arr)) print mt.get_t_test(arr) print "winn and losing imputed: ",len(wavg),np.mean(wavg),len(lavg),np.mean(lavg) print