Visualisation of Mark Pack's Pollbase Dataset¶
I picked up the excel sheet from Mark Pack's blog Processed it with a script in this notebook That script creates this flat csv output
This notebook is primarily proof of concept for a workflow that lets me publish Jupyter Notebooks with functional javascript widgets (i.e. please play with the buttons to the right of charts!).
However, I will come back to this and try out some timeseries analysis techniques.
Toggle code visiblity on/off with the button below
from IPython.display import display
from IPython.display import HTML
import IPython.core.display as di # Example: di.display_html('<h3>%s:</h3>' % str, raw=True)
# This line will hide code by default when the notebook is exported as HTML
di.display_html('<script>jQuery(function() {if (jQuery("body.notebook_app").length == 0) { jQuery(".input_area").toggle(); jQuery(".prompt").toggle();}});</script>', raw=True)
# This line will add a button to toggle visibility of code blocks, for use with the HTML export version
di.display_html('''<button onclick="jQuery('.input_area').toggle(); jQuery('.prompt').toggle();">Toggle code</button>''', raw=True)
# %matplotlib inline
import numpy as np, pandas as pd, matplotlib.pyplot as plt, seaborn as sns
import pickle, os, gc, re
from utility import *
BES_data_folder = '../BES_analysis_data/'
Pollbase_folder = create_subdir(BES_data_folder,"Pollbase")
from bokeh.models import ColumnDataSource
from bokeh.plotting import figure, show, output_notebook
from bokeh.layouts import column, row
from bokeh.embed import components
output_notebook()
from bokeh.models import HoverTool
from bokeh.models import Span, Label
from bokeh.models import BoxAnnotation
from datetime import timedelta
from scipy.signal import find_peaks
from datetime import datetime
import pytz
df = pd.read_csv(Pollbase_folder+"MarkPackPollBase_flat_v7.csv",
parse_dates=['Fieldwork_Start_Date', 'Fieldwork_Finish_Date',
'Fieldwork_Midpoint_Date'],index_col="Unnamed: 0")
Simple set of linked charts of Con/Lab/LD voteshare
Controls on the right let you pan, box zoom, box select, mouse wheel zoom, or reset the chart
Charts are all linked so that selection/movement is mirrored
tools='box_select,pan,xwheel_zoom,box_zoom,reset'
active_drag='pan'
active_scroll='xwheel_zoom'
hover_tool = HoverTool(names=['polls'],tooltips = [
('Type','@Poll_Type'),
('By','@Polling'),
('For','@Publisher'),
('Fieldwork Start','@Fieldwork_Start_Date{%d/%m/%y}'),
('Fieldwork Finish','@Fieldwork_Finish_Date{%d/%m/%y}'),
('Con [lead]','@Con{%0.1f} (@{Con Change}{%0.1f}) [@{Con lead}{%0.1f} (@{Con lead Change}{%0.1f})]'),
('Lab','@Lab{%0.1f} (@{Lab Change}{%0.1f})'),
('LD','@Lab{%0.1f} (@{LD Change}{%0.1f})'),
],
formatters={'Fieldwork_Start_Date': 'datetime',
'Fieldwork_Finish_Date': 'datetime',
'Con' : 'printf',
'Con Change' : 'printf',
'Con lead' : 'printf',
'Con lead Change' : 'printf',
'Lab' : 'printf',
'Lab Change' : 'printf',
'LD' : 'printf',
'LD Change' : 'printf',
},
# display a tooltip whenever the cursor is vertically in line with a glyph
# mode='vline'
)
normal_poll_source = ColumnDataSource(df)
plot_width = 980
plot_height = 250
p1 = figure(x_axis_type="datetime",x_axis_label = "Fieldwork_Midpoint_Date",y_axis_label="%",
plot_width=plot_width, plot_height=plot_height, title="Conservative Voting Intention",
tools=tools, active_drag=active_drag,active_scroll=active_scroll)
p1.scatter('Fieldwork_Midpoint_Date', 'Con', source=normal_poll_source, color='blue',name='polls')
p1.add_tools(hover_tool)
p2 = figure(x_axis_type="datetime",x_axis_label = "Fieldwork_Midpoint_Date",y_axis_label="%",
plot_width=plot_width, plot_height=plot_height, title="Labour Voting Intention",
tools=tools, active_drag=active_drag,active_scroll=active_scroll)
p2.scatter('Fieldwork_Midpoint_Date', 'Lab', source=normal_poll_source, color='red',name='polls')
p2.add_tools(hover_tool)
p3 = figure(x_axis_type="datetime",x_axis_label = "Fieldwork_Midpoint_Date",y_axis_label="%",
plot_width=plot_width, plot_height=plot_height, title="LibDem Voting Intention",
tools=tools, active_drag=active_drag,active_scroll=active_scroll)
p3.scatter('Fieldwork_Midpoint_Date', 'LD', source=normal_poll_source, color='orange',name='polls')
p3.add_tools(hover_tool)
# Link the x_range of p2 to p1: p2.x_range
p2.x_range = p1.x_range
# Link the y_range of p2 to p1: p2.y_range
p2.y_range = p1.y_range
# Link the x_range of p3 to p1: p3.x_range
p3.x_range = p1.x_range
# Link the y_range of p3 to p1: p3.y_range
p3.y_range = p1.y_range
layout = column(p1,p2,p3)
show(layout)
Full set of linked charts for all parties ever appearing in these national polls (ordered by how many polls they participate in)
Controls on the right let you pan, box zoom, box select, mouse wheel zoom, or reset the chart
Charts are all linked so that selection/movement is mirrored but only in the x-axis (not helpful to zoom in a major party y axis while you want to also look at minor parties)
Polls now separated into Normal Polls (circles), General Election Results (big diamonds) and Exit Polls (big X's)
normal_poll_source = ColumnDataSource( df[df["Poll_Type"]=="Normal Poll"] )
result_source = ColumnDataSource( df[df["Poll_Type"]=="Result"] )
exit_poll_source = ColumnDataSource( df[df["Poll_Type"]=="Exit Poll"] )
def VI_time_series(title,col,colour):
p = figure(x_axis_type="datetime",x_axis_label = "Fieldwork_Midpoint_Date",y_axis_label="%",
plot_width=plot_width, plot_height=plot_height, title=title,
tools=tools, active_drag=active_drag,active_scroll=active_scroll)
p.scatter('Fieldwork_Midpoint_Date', col, source=normal_poll_source, color=colour,name='polls')
p.diamond('Fieldwork_Midpoint_Date', col, source=result_source, color=colour, size=40,fill_alpha=0.3,line_alpha=0.0,name='polls')
p.x('Fieldwork_Midpoint_Date', col, source=exit_poll_source, color=colour, size=40,fill_alpha=0.3,line_alpha=0.3,name='polls')
p.add_tools(hover_tool)
return p
# Conservatives
p1 = VI_time_series(title="Conservative (Voting Intention)",col="Con",colour='blue')
# Labour
p2 = VI_time_series(title="Labour (Voting Intention)",col="Lab",colour='red')
# Con-Lab
p3 = VI_time_series(title="Conservative lead over Labour (Voting Intention)",col="Con lead",colour='pink')
# LibDems
p4 = VI_time_series(title="LibDem (Voting Intention)",col="LD",colour='orange')
# UKIP
p5 = VI_time_series(title="UKIP (Voting Intention)",col="UKIP",colour='purple')
# Green
p6 = VI_time_series(title="Green (Voting Intention)",col="Green",colour='green')
# BNP
p7 = VI_time_series(title="BNP (Voting Intention)",col="BNP",colour='black')
# SDP
p8 = VI_time_series(title="SDP (Voting Intention)",col="SDP",colour='yellow')
# BXP
p9 = VI_time_series(title="BXP (Voting Intention)",col="BXP",colour='cyan')
# TIG/CUK
p10 = VI_time_series(title="TIG/CUK (Voting Intention)",col="TIG/CUK",colour='grey')
# TIG/CUK
p11 = VI_time_series(title="Referendum (Voting Intention)",col="Referendum",colour='purple')
# Link the x/y ranges
# okay - not the y-range, that makes LD invisible!
# p2.y_range = p1.y_range
# p3.y_range = p1.y_range
p2.x_range = p1.x_range
p3.x_range = p1.x_range
figs = [p1,p2,p3,p4,p5,p6,p7,p8,p9,p10,p11]
for fig in figs[1:]:
fig.x_range = p1.x_range
layout = column(figs)
show(layout)
TODO:
short period of Male/Female breakdown - 05-10, 84 rows
Leader versions
Govt good/bad/net
Best PM
Predicted Party share - just 25 rows! - compare to polling/results?
Note: Con-Lab lead can be filled in for all values automatically (e.g. exit poll/result entries as well as normal polls)
# df["Con Change"].hist(bins=223);
# df["Con Change"].value_counts()
# df.loc[(df["Con Change"]<=-11)]
Comments
comments powered by Disqus