In [44]:
import warnings
warnings.filterwarnings('ignore')

from IPython.display import display
from IPython.display import HTML
import IPython.core.display as di # Example: di.display_html('<h3>%s:</h3>' % str, raw=True)

# This line will hide code by default when the notebook is exported as HTML
di.display_html('<script>jQuery(function() {if (jQuery("body.notebook_app").length == 0) { jQuery(".input_area").toggle(); jQuery(".prompt").toggle();}});</script>', raw=True)

# This line will add a button to toggle visibility of code blocks, for use with the HTML export version
di.display_html('''<button onclick="jQuery('.input_area').toggle(); jQuery('.prompt').toggle();">Toggle code</button>''', raw=True)
In [2]:
dataset_name = "W16_comb"
df_list = [ "BES_Panel", ]
In [3]:
%matplotlib inline

import numpy as np, pandas as pd, matplotlib.pyplot as plt, seaborn as sns
import pickle, os, gc, re

sns.set();
sns.set_palette("colorblind")

from IPython.display import display, display_html, HTML
from IPython.core.debugger import set_trace
# plt.rcParams["axes.grid"] = False

import Jupyter_module_loader
from utility import *
import gaussian_kde

import warnings
warnings.filterwarnings('ignore')

import holoviews as hv
from holoviews import opts

encoding = "ISO-8859-1"
In [4]:
# you should clone this git to a subdirectory called 'BES_analysis_code' (in some directory - I call it BES_analysis - doesn't matter though)
# %matplotlib inline
(BES_code_folder, BES_small_data_files, BES_data_folder,
 BES_output_folder, BES_file_manifest, BES_R_data_files) = setup_directories()

global BES_Panel, BES_numeric, BES_reduced, BES_reduced_with_na, BES_non_numeric
data_subfolder = BES_data_folder + dataset_name + os.sep

(manifest, dataset_filename, dataset_description, dataset_citation,
 dataset_start, dataset_stop, dataset_wave) = get_manifest(dataset_name, BES_file_manifest)

for df in df_list:
    if df=="BES_Panel":
#         globals()[df]  = pd.read_msgpack(data_subfolder + dataset_filename.replace('.dta','.msgpack'))
        globals()[df]  = pd.read_pickle(data_subfolder + dataset_filename.replace('.dta','.zip'),compression='zip')
    else:
        globals()[df]  = pd.read_msgpack(data_subfolder + df + '.msgpack' )
        globals()[df].replace(-1,np.nan,inplace=True)
  
# (var_type, cat_dictionary, new_old_col_names, old_new_col_names) = get_small_files(data_subfolder, encoding)

# get full set of inferred "cross wave" auth-lib/left-right values and ages
pan_dataset_allr_values = pd.read_csv(BES_small_data_files + "pan_dataset_allr_values"+".csv")
pan_dataset_ages = pd.read_csv( BES_small_data_files + "pan_dataset_ages"+".csv" )
In [5]:
from bokeh.models import ColumnDataSource
from bokeh.plotting import figure, show,  output_notebook
from bokeh.layouts import column, row, layout
from bokeh.embed import components
from bokeh.models import HoverTool
from bokeh.models import Span, Label
output_notebook()
from datetime import timedelta
Loading BokehJS ...
In [6]:
BES_Panel = pd.read_pickle("..\\BES_analysis_data\\"+"W19_comb"+os.sep+"BES2019_W19_Panel_v0.1.zip",compression='zip')

Step One: Locate the period of the Major Shift In Sentiment

  • Find all the directly-immigration-related sentiment variables

  • personal preferences for immigration levels (immigrationLevel|immigSelf|(types)More)

  • personal perception of impact of immigration (immigEcon|immigCultural|immigContibuteTake|ImmigrantsWelfareState|ukCoopMovement)
  • personal expectations about immigration levels/control (effectsEUImmigration|changeImmig|controlImmig)
  • perception of parties/(gov/opp) preference/ability to/responsibility for changing/reducing immig/prioritisation of immig (immig(parties)/responsibleImmig(parties)/achieveReduceImmig/(party)Priorities_immig)
  • preference/perception of immigration priority in EU negotiations (euPriorityBalance|dealPriority|negotiationSpecifics)

  • not sure where "benefitsToMigrants" fits in - not really an impact/level/expectation

  • but I would expect it to correlate with immigContributeTake/ImmigrantsWelfareState
In [ ]:
 
In [ ]:
 
In [ ]:
 
In [ ]:
 
In [ ]:
 
In [7]:
# %%time
# Fix up all the datasets we'll be using (make sure all categories correctly/consistently ordered)
immig_var_stub = ["immigEcon","immigCultural","immigSelf","immigrationLevel","immigContributeTake",
                  "immigrantsWelfareState","controlImmig","effectsEUImmigration","euPriorityBalance",
                  "changeImmig","changeImmigLab","govtHandleImmig","labHandleImmig",
                  "asylumMore","euMore","noneuMore","studentsMore","familiesMore",
                  "responsibleImmig","achieveReduceImmig",
                  "conPriorities_immig","labPriorities_immig","ldPriorities_immig","ukipPriorities_immig",
                  "dealPriority","ukCoopMovement","negotiationSpecifics","benefitsToMigrants"]

fix_cat_dict = {}
# not sure about best way to line up "expectation" variables
fix_cat_dict['changeImmig']=['Getting a lot lower', 'Getting a little lower','Staying about the same',
                             'Getting a little higher', 'Getting a lot higher',  "Don't know" ]
fix_cat_dict['effectsEUImmigration'] = ['Much lower', 'Lower', 'About the same', 'Higher', 'Much higher', "Don't know"]
fix_cat_dict['controlImmig']=['No control at all', 'A little control', 'Some control', 'A lot of control', 'Complete control', "Don't know"]

# lets try to get every axis pointing in the same "sentiment" direction
fix_cat_dict['immigEcon']=['Bad for economy','2', '3', '4', '5', '6', 'Good for economy',  "Don't know"]
fix_cat_dict['immigCultural']=['Undermines cultural life', '2', '3', '4', '5', '6', 'Enriches cultural life', "Don't know" ]
fix_cat_dict["immig(Self|Con|Lab|TIG|Brexit|Green|UKIP|PC|SNP|LD)W"] = ['Allow many fewer', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'Allow many more', "Don't know"]
fix_cat_dict['(students|eu|asylum|families|noneu)More'] = ['Many fewer', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'Many more', "Don't know"]
fix_cat_dict['immigrantsWelfareState'] = ['Strongly agree', 'Agree', 'Neither agree nor disagree', 'Disagree', 'Strongly disagree', "Don't know"]
fix_cat_dict['immigContributeTake'] = ['Get more than they pay', '2.0', '3.0', '4.0',
                                       '5.0', '6.0', 'Pay more than they get', "Don't know"]
fix_cat_dict['euPriorityBalance'] = ['Control immigration', '9', '8', '7', '6', '5', '4', '3', '2', '1', 'Access to the single market', "Don't know"]
fix_cat_dict['immigrationLevel'] = ['Decreased a lot', 'Decreased a little', 'Left the same as it is now',
                                    'Increased a little', 'Increased a lot',  "Don't know"]
fix_cat_dict['ukCoopMovement'] = ["Bad for Britain", "Neither good nor bad for Britain", "Good for Britain", "Don't know"]
fix_cat_dict['benefitsToMigrants'] = ['Immediately on arrival','After living in GB for a year',
                                      'After they have worked and paid taxes for at least four years',
                                      'After they become a British citizen', 'Never', "Don't know"]



# fix_cat_dict['immigrantsWelfareState'] = ['Strongly disagree', 'Disagree',  'Neither agree nor disagree','Agree', 'Strongly agree',"Don't know", ]
# mostly dealPrioritImmig, but might be worth setting alongside the other dealPriority variables
fix_cat_dict['dealPriority'] = ['Not important at all', 'Not very important', 'Somewhat important',
                                'Very important', 'Extremely important' , "Don't know"]

# fix_cat_dict['euPriorityBalance'] = ['Access to the single market', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'Control immigration', "Don't know"]
fix_cat_dict['(lab|govt)HandleImmig'] = ['Very badly', 'Fairly badly', 'Neither well nor badly', 'Fairly well', 'Very well', "Don't know"]
fix_cat_dict['responsibleImmig(Conservatives|LibDems|Labour|ScottishGovt|WelshGovt|None|DontKnow)'] = ["No","Yes","Don't know"]
fix_cat_dict['achieveReduceImmig(Brexit|Con|Grn|LD|Lab|None|PC|SNP|TIG|UKIP)'] = ["No","Yes","Don't know"]

fix_cat_dict["(con|lab|ld|ukip)Priorities_immig"] = ["No","Yes","Don't know"]
# my time plotting function assumes variable names won't contain _ - quick hack to circumvent having to change that
BES_Panel[[x.replace("_i","I") for x in search(BES_Panel,"Priorities_immig").index]] = BES_Panel[[x for x in search(BES_Panel,"Priorities_immig").index]]

# my time plotting function assumes variable names won't contain _ - quick hack to circumvent having to change that
BES_Panel[[x.replace("_1","ContinuePayingMoney").replace("_2","AcceptingFoM").replace("_3","AcceptingEUReg").replace("_none","Nothing") for x in search(BES_Panel,"negotiationSpecifics").index]] = BES_Panel[[x for x in search(BES_Panel,"negotiationSpecifics").index]]

fix_cat_dict["negotiationSpecifics"] = ["No","Yes","Don't know"]

def cat_2_num_drop_dk(x):
    if x.dtype.name=='category':
        return x.replace("Don't know",np.nan).cat.remove_unused_categories().cat.codes.replace(-1,np.nan).astype('UInt16')
    else:
        return x

def fix_cats(fix_cat_dict):
    for key in fix_cat_dict.keys():
        BES_Panel[ match(BES_Panel, key ).index ] = BES_Panel[ match(BES_Panel,key ).index ]\
            .apply( lambda x: x.cat.set_categories( fix_cat_dict[key], ordered =True ) )
    
fix_cats(fix_cat_dict)

def weighted_mean(x, **kws):
    val, weight = map(np.asarray, zip(*x))
    val, weight = val[~np.isnan(val)],weight[~np.isnan(val)]
    return (val * weight).sum() / weight.sum()

# max_wave = int(re.match("W(\d+)_",dataset_name).groups()[0])
max_wave = 19
num_to_wave = {x:"W"+str(x) for x in range(1,max_wave+1)}
wts_for_wave = { "W"+str(y):[x for x in BES_Panel.columns.sort_values(ascending=False) if re.match("wt_(new|full)_W"+str(y)+"(_result)?"+"$",x)][0] for y in range(1,max_wave+1) }
wave_to_date = BES_file_manifest[BES_file_manifest["Only_or_Combined"]=="Only"][["Wave No","Date_Start"]].set_index("Wave No")["Date_Start"]
wave_to_date[17] = "Nov-19"
wave_to_date[18] = "Nov-19"
wave_to_date[19] = "Dec-19"

# fix endtimeW3 bug!
BES_Panel.loc[BES_Panel["endtimeW3"]=='1970-01-01 00:00:00',"endtimeW3"] = \
    BES_Panel[BES_Panel["endtimeW3"]=='1970-01-01 00:00:00']["starttimeW3"].values

# this is also suspect - date not unreasonable, but overlaps with a different wave! (3 days between start/end)
# 41222   2015-03-27 18:11:37.047
# Name: starttimeW5, dtype: datetime64[ns]
BES_Panel.loc[BES_Panel["starttimeW5"]=='2015-03-27 18:11:37.047000064',"starttimeW5"] = \
    BES_Panel[BES_Panel["starttimeW5"]=='2015-03-27 18:11:37.047000064']["endtimeW5"].values

# still some overlap between waves 4 and 5

midpoint_dict = {}
startpoint_dict = {}
endpoint_dict = {}
# create correct midpoints (technically we should weight these!)
n = 1
min_sample_size = 100
for wave_no in range(1,max_wave+1):
    wave = "W"+str(wave_no)
#     print(wave)

    BES_Panel["midpoint"+wave] = pd.qcut(BES_Panel["endtime"+wave]+((BES_Panel["endtime"+wave]-BES_Panel["starttime"+wave])/2),n)
    date_cats_dict = {BES_Panel["midpoint"+wave].cat.categories[x]:(BES_Panel["midpoint"+wave].cat.categories[x].left+ (BES_Panel["midpoint"+wave].cat.categories[x].right - BES_Panel["midpoint"+wave].cat.categories[x].left)/2).strftime("%Y-%m-%d") for x in range(n)}
    BES_Panel["midpoint"+wave] = pd.to_datetime(BES_Panel["midpoint"+wave].replace(date_cats_dict))  
    BES_Panel["endpoint"+wave] = pd.to_datetime(BES_Panel["endtime"+wave]).max() 
    BES_Panel["startpoint"+wave] = pd.to_datetime(BES_Panel["starttime"+wave]).min() 
    
    BES_Panel["midtime"+wave] = (BES_Panel["starttime"+wave]+(BES_Panel["endtime"+wave]-BES_Panel["starttime"+wave])/2)
#     BES_Panel["midtime"+wave] = BES_Panel["midtime"+wave].apply(lambda x:x.replace(second=0, microsecond=0, nanosecond=0))


    startpoint_dict[wave] = BES_Panel["startpoint"+wave].dropna().values[0]
    endpoint_dict[wave] = BES_Panel["endpoint"+wave].dropna().values[0]
    midpoint_dict[wave] = BES_Panel["midpoint"+wave].dropna().values[0]
In [8]:
def time_series(var_name,title,subtract_var=False,retain_var=True,specific_dates=True, specific_suffix_set="([a-zA-Z]*)",
                use_midpoints=False,col_name="party",dk_str="Don't know",max_y_size=10.0,min_waves_included=2,max_y_size_dict=None,max_wave=19):

    whole_wave_dk_average = True
    df=pd.DataFrame()
    df2 =pd.DataFrame()
    df_wts =pd.DataFrame()
    df_dates = pd.DataFrame()
    
    if subtract_var:
        title=title+"\n(where respondents place parties relative to their own preference set at 0.5)"

    wave_list = []
    redist_vars = pd.Series([re.match(var_name+specific_suffix_set+"($|W\d+)",x).groups()[0] for x in BES_Panel.columns\
                             if re.match(var_name+specific_suffix_set+"($|W\d+)",x)]).value_counts()
    redist_vars = redist_vars[redist_vars>=min_waves_included].index

   
    
    for subj in redist_vars:
        for wave in ["W"+str(x) for x in range(1,max_wave+1)]:
            if var_name+subj+wave not in BES_Panel.columns:
                continue
            else:
                wave_list.append(wave)
            if max_y_size_dict:
                max_y_size = max_y_size_dict[subj]
#             df[var_name+"num_"+subj+"_"+wave] = zip(BES_Panel[var_name+subj+wave].replace(dk_str,np.nan).cat.codes.replace(-1,np.nan)/max_y_size,
#                                                  BES_Panel[var_name+subj+wave].apply(lambda x: x==dk_str if pd.notnull(x) else np.nan),
#                                                  BES_Panel[wts_for_wave[wave]]
#                                                    )
            
            df[var_name+"num_"+subj+"_"+wave] = BES_Panel[var_name+subj+wave].replace(dk_str,np.nan).cat.codes.replace(-1,np.nan)/max_y_size

            
            df2[var_name+"dk_"+subj+"_"+wave] = BES_Panel[var_name+subj+wave]==dk_str
            df2[var_name+"dk_"+subj+"_"+wave][BES_Panel[var_name+subj+wave].isnull()] = np.nan

           
            
    for wave in pd.unique(wave_list):        
        df_wts["wt_"+wave] = BES_Panel[wts_for_wave[wave]]
        
        if use_midpoints:
            df_dates["dt_"+wave] = BES_Panel["midpoint"+wave]
        else:
            df_dates["dt_"+wave] = (BES_Panel["starttime"+wave]+(BES_Panel["endtime"+wave]-BES_Panel["starttime"+wave])/2)
#             .apply(lambda x:x.date())

#     set_trace()
#     df_wts = df_wts[df.notnull().any(axis=1)]
#     df = df[df.notnull().any(axis=1)]

    df2.columns = df2.columns.str.split('_', expand=True)
    df2 = df2.stack(dropna=False)\
                .reset_index()\
                .rename(columns={'level_1':"wave"})\
                .rename(columns={'level_0':"id"}) 

    df_wts.columns = df_wts.columns.str.split('_', expand=True)
    df_wts = df_wts.stack(dropna=False)\
                .reset_index()\
                .rename(columns={'level_1':"wave"})\
                .rename(columns={'level_0':"id"}) 

    df_dates.columns = df_dates.columns.str.split('_', expand=True)
    df_dates = df_dates.stack(dropna=False)\
                .reset_index()\
                .rename(columns={'level_1':"wave"})\
                .rename(columns={'level_0':"id"}) 

    df.columns = df.columns.str.split('_', expand=True)
    df = df.stack(dropna=False)\
            .reset_index()\
            .rename(columns={'level_1':"wave"})\
            .rename(columns={'level_0':"id"}) 

    content_columns = [(var_name+"dk",x) for x in redist_vars]
    df["wt"] = df_wts["wt"]
    df[content_columns] = df2[content_columns]
    df["date"] = df_dates["dt"]
    content_columns = [(var_name+"num",x) for x in redist_vars]+[(var_name+"dk",x) for x in redist_vars]
    # only keep rows with content (variable values/dks)

    df = df[df[content_columns].notnull().any(axis=1)]

#     df = df.loc[ df[[x for x in df.columns if var_name+"num" in x]].notnull().any(axis=1) ]
    df.loc[:,"wt"] = df.loc[:,"wt"].fillna(1.0).values
    temp_ind_name = "temp_index"

#     if specific_dates:
#         df["date"] = df[["id","wave"]].merge(right=df_dates,
#                  how="left",left_on=["id","wave"],right_on=["id","wave"])["dt"].values

    df[temp_ind_name] = list(zip(df["wave"],df["wt"],df["date"],df["id"]))
    df = df.set_index(temp_ind_name).drop(["id","wave","wt","date"],axis=1)



    if subtract_var:
        if retain_var:
            focal_vars = [x for x in df.columns if (var_name+"num" in x) and (subtract_var not in x)]
            df[focal_vars] = df[focal_vars].apply(lambda x: x-df[(var_name+"num",  subtract_var)])+0.5
        else:
            focal_vars = [x for x in df.columns if var_name+"num" in x]
            df[focal_vars] = df[focal_vars].apply(lambda x: x-df[(var_name+"num",  subtract_var)])+0.5
            df.drop((var_name+"num",  subtract_var),axis=1,inplace=True)

#     df2["wt"] = df_wts["wt"]

#     df2 = df2.loc[ df2[[x for x in df2.columns if var_name+"dk" in x]].notnull().any(axis=1) ]
#     df2.loc[:,"wt"] = df2.loc[:,"wt"].fillna(1.0).values
#     if specific_dates:
#         df2["date"] = df2[["id","wave"]].merge(right=df_dates,
#                  how="left",left_on=["id","wave"],right_on=["id","wave"])["dt"].values

#     temp_ind_name = "temp_index"
#     df2[temp_ind_name] = list(zip(df2["wave"],df2["wt"],df2["date"]))
#     df2 = df2.set_index(temp_ind_name).drop(["id","wave","wt","date"],axis=1)


    flat_df_num = df.stack().reset_index().rename(columns={'level_1':col_name})

    if specific_dates:
        flat_df_num["wave"] = flat_df_num[temp_ind_name].apply(lambda x:x[0])    
    else:
        flat_df_num["wave"] = flat_df_num[temp_ind_name].apply(lambda x:int(x[0].split("W")[1]))

    flat_df_num["wt"]   = flat_df_num[temp_ind_name].apply(lambda x:x[1])
    flat_df_num["date"] = flat_df_num[temp_ind_name].apply(lambda x:x[2])
    flat_df_num["id"]   = flat_df_num[temp_ind_name].apply(lambda x:x[3])
    
    
    
    flat_df_num.drop(temp_ind_name,axis=1,inplace=True)
    flat_df_num[col_name] = flat_df_num[col_name].astype('category')

    flat_df_num[var_name+"_wts"] = list(zip(flat_df_num[var_name+"num"],flat_df_num["wt"]))
    flat_df_num[var_name+"_dk"] = list(zip(flat_df_num[var_name+"dk"],flat_df_num["wt"]))

    
#     flat_df_dk = df2.stack().reset_index().rename(columns={'level_1':col_name,0:"dk"})
#     if specific_dates:
#         flat_df_dk["wave"] = flat_df_dk[temp_ind_name].apply(lambda x:x[0])    
#     else:
#         flat_df_dk["wave"] = flat_df_dk[temp_ind_name].apply(lambda x:int(x[0].split("W")[1]))

#     flat_df_dk["wt"]   = flat_df_dk[temp_ind_name].apply(lambda x:x[1])
#     flat_df_dk["date"] = flat_df_dk[temp_ind_name].apply(lambda x:x[2])
    
#     flat_df_dk.drop(temp_ind_name,axis=1,inplace=True)
#     flat_df_dk[var_name+"dk"] = flat_df_dk[var_name+"dk"].astype('int')
    
#     if whole_wave_dk_average:
#     # calculating weighted total-wave dk average
#         flat_df_dk = flat_df_dk.groupby(["wave",col_name]).apply(lambda x: (x["wt"]*x[var_name+"dk"]).sum()/x["wt"].sum() ).reset_index().rename(columns={0:"dk"})
#         flat_df_num = flat_df_num.merge(right=flat_df_dk[[col_name,"wave","dk"]],how='left',left_on=[col_name,"wave"],right_on=[col_name,"wave"])        
#     else:
    
#         flat_df_dk["dk"+"_wts"] = list(zip(flat_df_dk[var_name+"dk"],flat_df_num["wt"]))
    

#     flat_df_num = flat_df_num.merge(right=flat_df_dk[[col_name,"wave","dk"]],how='left',left_on=[col_name,"wave"],right_on=[col_name,"wave"])
    if not specific_dates:
        flat_df_num["date"] = flat_df_num["wave"].apply(lambda x: wave_to_date[x])
        flat_df_num["date"] = pd.to_datetime(flat_df_num["wave"] , format="%b-%y")

#     raise Exception
        
    return flat_df_num, df, df2, 
In [ ]:

In [ ]:
 
In [9]:
def sample_size(x, **kws):
    global max_sample_size    
    return len(x)/max_sample_size

def between_wave_retention(*args, **kwargs):
    global position_in_var_list
    var_name = kwargs['var_name'][position_in_var_list]
    position_in_var_list = position_in_var_list+1
    color = kwargs['color']
    label = kwargs['label']
    alpha = kwargs['alpha']
    df = BES_Panel[match(BES_Panel,var_name+"($|W\d+)").index].apply(lambda x: x.cat.codes.replace(-1,np.nan)).diff(axis=1).notnull().sum()/match(BES_Panel,var_name+"($|W\d+)")
    waves_present = {"W"+x.split("W")[-1]:BES_Panel["midpointW"+x.split("W")[-1]].dropna().values[0] for x in df.index}    
    df.index = [BES_Panel["midpointW"+x.split("W")[-1]].dropna().values[0] for x in df.index]
    df.drop(df.index[0],inplace=True)
    sns.lineplot(data=df,color=color,label=label,alpha=alpha)

    for wave in waves_present.keys():
        plt.text(x=waves_present[wave],y=0,s=wave, rotation=90, fontsize=12)   
        
def plot_time_series(var_name,specific_suffix_set,title,col_name,col_wrap,treatment):
    dk_str="Don't know"
    lowest_alpha=0.3
    low_alpha=0.5    
    date_dict = {"22 May 2014":"EE","7 May 2015":"GE","23 June 2016":"EU","8 June 2017":"GE","23 May 2019":"EE"}
    date_col_dict = {"EE":'green',"GE":'red',"EU":'blue'}    
    
    max_y_size_dict = {x: len(BES_Panel[match(BES_Panel,var_name+x+"($|W\d+)").index[0]].cat.remove_categories(dk_str).cat.categories)-1 for x in specific_suffix_set.replace("(","").replace(")","").split("|") }
    # max_y_size=len(BES_Panel[ match(BES_Panel,var_name+specific_suffix_set+"($|W\d+)").index[0] ].cat.remove_categories(dk_str).cat.categories)-1,
    flat_df_num, df, df2, = time_series(var_name,title,specific_suffix_set=specific_suffix_set,col_name="party",
                                max_y_size_dict = max_y_size_dict,
                                dk_str=dk_str,min_waves_included=2,use_midpoints=False)
    
#     raise Exception
    flat_df_num["wave_midpoint"] = flat_df_num["wave"].apply( lambda x: midpoint_dict[x] )
    flat_df_num["wave_startpoint"] = flat_df_num["wave"].apply( lambda x: startpoint_dict[x] )
    flat_df_num["wave_endpoint"] = flat_df_num["wave"].apply( lambda x: endpoint_dict[x] )
    
    

    global max_sample_size
    global position_in_var_list
    position_in_var_list=0
#     max_sample_size = flat_df_num["dk"].value_counts().max()
    max_sample_size = flat_df_num.groupby(["party","wave"]).apply(lambda x: len(x)).max()


    g = sns.FacetGrid(data=flat_df_num, col=col_name, col_wrap=col_wrap, ylim=(0.0, 1.0), legend_out=False, height=height);
    
#     raise Exception
    
    g.map(sns.lineplot, "wave_midpoint",var_name+"_wts", estimator=weighted_mean,ci=None, label="Mean answer",
          err_style="bars", markers=True, dashes=False);
    g.map(sns.lineplot, "wave_midpoint",var_name+"_dk", color='r', estimator=weighted_mean,ci=None,
          err_style="bars", label="DK fraction",markers=True, dashes=False, alpha=low_alpha);
    g.map(sns.lineplot, "wave_midpoint","wave", color='g', estimator=sample_size,ci=None,
          label="Sample Size\n(% of max: "+str(max_sample_size)+")",markers=True, dashes=False, alpha=low_alpha);
    g.map(between_wave_retention, "wave_midpoint","wave", color='k', var_name=[var_name+x for x in g.col_names],
          label="Retention",markers=True, dashes=False, alpha=low_alpha);
    

    for date in date_dict.keys():
        if flat_df_num["wave_midpoint"].min()<pd.to_datetime(date) and flat_df_num["wave_midpoint"].max()>pd.to_datetime(date):
            g.map(plt.axvline, x=pd.to_datetime(date), ls='--', c=date_col_dict[date_dict[date]], linewidth=1,
                  alpha=lowest_alpha, label=date_dict[date] )
            
    g.map(plt.axhline, y=0.5, ls='-.', c='grey',linewidth=1, alpha=lowest_alpha)

    g.add_legend().set_ylabels("").set_titles(col_template="{col_name}")
    g.fig.suptitle(title, y=1.0+0.03*len(title.split("\n")));
    [plt.setp(ax.get_xticklabels(), rotation=45) for ax in g.axes.flat]

    output_subfolder = create_subdir(BES_output_folder, treatment)
    g.savefig(output_subfolder +clean_filename(var_name +specific_suffix_set)+ ".png", bbox_inches='tight')
    return flat_df_num
In [10]:
from scipy.signal import find_peaks
from bokeh.models import Span
from datetime import datetime
import pytz

# def rolling_av(party, var_name):

#     df_rolling = flat_df_num[flat_df_num["party"]==party].sort_values(by='date').copy().set_index('date')
# # df_rolling = df_rolling["immignum"].rolling(400,center=True).mean()
#     df_rolling = df_rolling[var_name].dropna().rolling(500,center=True,win_type='blackmanharris').mean()
# # win_type='gaussian').sum(std=3)
# # df_rolling = df_rolling["immignum"].rolling('1h',min_periods=10).mean()
#     return df_rolling

def weighted_mean_for_rolling(processed_dataframe_windowed):
    return np.average(a=processed_dataframe_windowed,weights=processed_dataframe_windowed.index)

def rolling_av_exerimental( party, var_name, win_type='blackmanharris',window_size=500 ):
    
    df_rolling = flat_df_num[flat_df_num["party"]==party].sort_values(by='date').copy()
    min_sample_size=200
    temp_dates = pd.qcut( df_rolling["date"], 
            int(np.floor(df_rolling["date"].notnull().sum()/(min_sample_size))) ).apply(lambda x: x.left+(x.right-x.left)/2)

    return df_rolling.groupby(temp_dates)[var_name].agg( lambda x: weighted_mean(x) )

def rolling_av( party, var_name, win_type='blackmanharris',window_size=250 ):

    df_rolling = flat_df_num[flat_df_num["party"]==party].sort_values(by='date').copy()
    fmt = "%Y-%m-%d:%H"

    # slow!
    if win_type=="BES":
        
        # this is a really dumb way to get the right date index!
        
        date_ind = df_rolling.set_index("date")[var_name].dropna().rolling(window_size,
            center=True).mean().index
#         print(date_ind.shape)
        #df_rolling["date"].copy().values
#         df_rolling.index = flat_df_num[flat_df_num["party"]==party].sort_values(by='date').copy()["date"].values
#         raise Exception
        df_rolling = df_rolling.set_index("wt")[[var_name,'date']].dropna().rolling(window_size,
            on='date',center=True)[var_name].apply(weighted_mean_for_rolling, raw=False)
#         print(df_rolling.shape)
        df_rolling.index = date_ind
#         flat_df_num[flat_df_num["party"]==party].sort_values(by='date').copy()["date"].values
#         return temp
    else:
        df_rolling = df_rolling.set_index("date")[var_name].dropna().rolling(window_size,center=True,win_type=win_type).mean()       


    df_rolling.index = [x.strftime(fmt) for x in df_rolling.index]
    df_rolling.reset_index().groupby('index').mean()
    df_rolling = df_rolling.reset_index().groupby('index').mean().dropna()
    df_rolling.index = [pd.to_datetime(x,format=fmt) for x in df_rolling.index]
    df_rolling.index.name='date'
    return df_rolling[var_name]
    

def date_fraction(x,df_temp):
    
    floor_x = int( np.floor(x) )
    return df_temp.index[floor_x]+(x-floor_x)*(df_temp.index[floor_x+1]-df_temp.index[floor_x])


def detect_peaks(df_rolling,prominence,width):

    df_temp = df_rolling.copy().dropna()
    peaks, properties = find_peaks(df_temp, prominence=prominence, width=width)
    properties["prominences"], properties["widths"]

    x= df_temp.index[peaks]
    y= df_temp[peaks].values

    outliers = pd.DataFrame(properties)
    outliers.index = x

    outliers["left_ips"]  = outliers["left_ips"].apply(lambda x: date_fraction(x,df_temp))
    outliers["right_ips"] = outliers["right_ips"].apply(lambda x: date_fraction(x,df_temp))

    aggregate_to_the_hour_fmt = "%Y-%m-%d:%H"
    aggregate_to_the_day_fmt = "%Y-%m-%d"
    fmt = aggregate_to_the_hour_fmt

    outliers["right_ips"] = outliers["right_ips"].apply(lambda x:pd.to_datetime(x.strftime(fmt),format=fmt) if pd.notnull(x) else np.nan)
    outliers["left_ips"] = outliers["left_ips"].apply(lambda x:pd.to_datetime(x.strftime(fmt),format=fmt) if pd.notnull(x) else np.nan)

    outliers.index = outliers.reset_index()["date"].apply(lambda x:pd.to_datetime(x.strftime(fmt),format=fmt) if pd.notnull(x) else np.nan)
    outliers = outliers.rename(columns = {"left_ips":"start","right_ips":"stop"})

    return x,y,outliers

def get_bokeh_source(flat_df_num,min_sample_size=100,var_name=""):

    flat_df_bokeh = flat_df_num.copy()

    # aggregate dates to the level you wish
    aggregate_to_the_hour_fmt = "%Y-%m-%d:%H"
    aggregate_to_the_day_fmt = "%Y-%m-%d"
    fmt = aggregate_to_the_day_fmt
    flat_df_bokeh["date"] = pd.to_datetime(flat_df_bokeh["date"])
    flat_df_bokeh["shortdate"] = pd.to_datetime(flat_df_bokeh["date"].apply(lambda x:x.strftime(fmt) if pd.notnull(x) else np.nan),
                                        format=fmt)

    flat_df_bokeh["date"] = list(zip(flat_df_bokeh["date"].apply(lambda x: x.timestamp() if pd.notnull(x) else np.nan),
                                     flat_df_num["wt"])) # turn into numeric timestamp
    
    df = flat_df_bokeh.groupby(['shortdate','party'])["date",var_name+"_wts",var_name+"_dk"].agg( lambda x: weighted_mean(x) ).reset_index()
    df["date"] = df["date"].apply(lambda x: datetime.fromtimestamp(x,tz=pytz.timezone('GMT')) if pd.notnull(x) else np.nan  ) # turn back from timestamp
    
    df["N"] = flat_df_bokeh.groupby(['shortdate','party']).count().reset_index()[var_name+"num"]
    df["wt_mean"] = flat_df_bokeh.groupby(['shortdate','party']).mean().reset_index()["wt"]*100/2 # "natural" position at 50%
    df["wt_std"] = flat_df_bokeh.groupby(['shortdate','party']).std().reset_index()["wt"]*100/3
    df["dk"] = df[var_name+"_dk"]*100 # % less confusing than fraction
    df["stdev"] = flat_df_bokeh.groupby(['shortdate','party'])[var_name+"num"].std().reset_index()[var_name+"num"]
    df = df[ df["N"]>=min_sample_size ]
    # plot in bokeh so we can explore!
    df.index = df["date"]
    df = df.rename(columns={var_name+"_wts":var_name+"num"}) # ,"shortdate":"date"
    df = df.pivot(index = "date",columns="party", values=[var_name+'num', 'dk', 'N', 'stdev','wt_mean','wt_std'])
    df.columns = ["_".join(x).strip("_") for x in df.columns]
    df = df.reset_index()
    party_list = flat_df_bokeh["party"].unique()
    
    return df,party_list

from bokeh.models import BoxAnnotation

def bokeh_time(dtstr):
#     return pd.to_datetime(dtstr).value / 1e6
      return pd.to_datetime(dtstr).timestamp() * 1000

def bokeh_explorer_charts(x_axis_label,y_axis_label,color_dict,title_dict,var_name,x='date',min_sample_size=0,
                          plot_width=490,plot_height=300,dk=True,
                          tools='box_select,pan,xwheel_zoom,box_zoom,reset',active_drag='pan',active_scroll='xwheel_zoom',
                          prominence=.15,width=20,rolling_win_type = 'blackmanharris',window_size=250):


    y_axis_label_dk = "&tri=daily-mean-wt/squ=daily-std-wt"
    wave_gap_days_max =2
    lowest_alpha=0.3
    low_alpha=0.5    
    date_dict = {"22 May 2014":"EE","7 May 2015":"GE","23 June 2016":"EU",
                 "8 June 2017":"GE","23 May 2019":"EE","21 April 2016":"OB",
                 "9 October 2014":"CL","12 December 2019":"GE"}
    date_col_dict = {"EE":'green',"GE":'red',"EU":'blue',"OB":'pink'}        
    
    df,party_list =  get_bokeh_source(flat_df_num, min_sample_size=min_sample_size, var_name=var_name)
    party_list = list( color_dict.keys() )    
    for party in party_list:
        df["scaled_N_"+party] = df["N_"+party].apply(lambda x: np.log2(x+1))/2
    source = ColumnDataSource( df )    


    p = {}
    p_dk = {}
    tool_dict = {}
    outlier_dict = {}
#     rolling_win_type = 'BES'

    
    

    for party in party_list:

        title = title_dict[party]
        color = color_dict[party]
        y = var_name+'num_'+party
        
# hours = ['%Hh', '%H:%M']

# days = ['%m/%d', '%a%d']

# months = ['%m/%Y', '%b %Y']

# years = ['%Y']        

        tool_dict[party]=\
            HoverTool(names=['daily_aggregates'],tooltips = [
                      ('Date','@date{%d/%m/%y}'),
            #                               ('Date','@wave'),
                      ('Mean','@'+var_name+'num_'+party),
                      ('N','@N_'+party),
                      ("SD",'@stdev_'+party),
                      ("DK%",'@dk_'+party),
                     ],
                formatters={'date': 'datetime'},
                # display a tooltip whenever the cursor is vertically in line with a glyph
#                 mode='vline'
                     )


        p[party]= figure(x_axis_type="datetime", x_axis_label = x_axis_label,
                    y_axis_label = y_axis_label,
                    plot_width = plot_width, plot_height=plot_height,
                    title = title, tools = tools, active_drag = active_drag, active_scroll=active_scroll)
        p[party].title.text_font_size = '8pt'

        df_rolling = rolling_av( party, var_name+"num" ,win_type=rolling_win_type,window_size=window_size )
        inferred_wave_gaps = (df_rolling.reset_index()["date"].diff()>timedelta(days=wave_gap_days_max)).astype('int').cumsum()

        for inf_wave in range(0,inferred_wave_gaps.max()+1):   
            rolling_source = ColumnDataSource(pd.DataFrame(df_rolling[df_rolling.index[inferred_wave_gaps==inf_wave]]))
            p[party].line(x,var_name+"num", source=rolling_source, color=color)


        x_out,y_out,outliers = detect_peaks(df_rolling,prominence,width)
        p[party].x(x_out,y_out,color = 'green',size=20, alpha=0.5)

#         y = var_name+"_wts"
        p[party].circle(x,y, source=source, color=color,name='daily_aggregates', size='scaled_N_'+party,
                        fill_alpha=0.0,line_width=1)
        p[party].add_tools(tool_dict[party])
        outliers.name = party
        outlier_dict[party] = outliers

         
                             

        if dk:
            df_rolling = rolling_av( party, var_name+"dk",win_type=rolling_win_type,window_size=window_size )*100
#             rolling_source = ColumnDataSource(pd.DataFrame(df_rolling))

            p_dk[party]= figure(x_axis_type="datetime",x_axis_label = x_axis_label,
                            y_axis_label=y_axis_label_dk,
                            plot_width = plot_width, plot_height=plot_height,
                            title="".join(title.split(" (")[:-1]+[" (% Don't know)"]),
                            tools=tools, active_drag=active_drag, active_scroll=active_scroll)
            p_dk[party].title.text_font_size = '8pt'
#             p_dk[party].scatter('date', var_name+"_dk", source=rolling_source, color=color, name='daily_aggregates')
            p_dk[party].circle('date', 'dk_'+party, source=source, color=color, name='daily_aggregates',
                                size='scaled_N_'+party, fill_alpha=0.0,line_width=1)
            p_dk[party].triangle('date', 'wt_mean_'+party, source=source, color=color, 
                                size='scaled_N_'+party, fill_alpha=0.0,line_width=1)    
            p_dk[party].square('date', 'wt_std_'+party, source=source, color=color, 
                                size='scaled_N_'+party, fill_alpha=0.0,line_width=1) 
            
            p_dk[party].add_tools( tool_dict[party] )

            inferred_wave_gaps = (df_rolling.reset_index()["date"].diff()>timedelta(days=wave_gap_days_max)).astype('int').cumsum()
            for inf_wave in range(0,inferred_wave_gaps.max()+1):   
                rolling_source = ColumnDataSource(pd.DataFrame(df_rolling[df_rolling.index[inferred_wave_gaps==inf_wave]]))
                p_dk[party].line(x,var_name+"dk", source=rolling_source, color=color)

            x_out,y_out,outliers = detect_peaks(df_rolling,prominence*100,width)
            p_dk[party].x(x_out,y_out,color = 'green',size=20, alpha=0.5)
   
        for date in date_dict.keys():       
            
            if flat_df_num["wave_startpoint"].min()<pd.to_datetime(date) and flat_df_num["wave_endpoint"].max()>pd.to_datetime(date):
                
                my_label = Label(x=pd.to_datetime(date), y=200, y_units='screen', text=date_dict[date])
                p[party].add_layout(my_label) 
            
                box_left = pd.to_datetime(date)
                box_right = pd.to_datetime(date)+timedelta(days=1)
                if date_dict[date]=="OB":
                    box_right = box_right+timedelta(days=1)
                box = BoxAnnotation(left=box_left, right=box_right,
                                    line_width=1, line_color='black', line_dash='dashed',
                                    fill_alpha=0.2, fill_color='orange')

                p[party].add_layout(box)   
                if dk:
                    p_dk[party].add_layout(my_label) 
                    p_dk[party].add_layout(box) 

    for pno in range(0,len(party_list)):
        for pno2 in range(0,len(party_list)):
            if pno==pno2:
                continue
            p[party_list[pno]].x_range = p[party_list[pno2]].x_range
            p[party_list[pno2]].x_range = p[party_list[pno]].x_range

            if dk:
                p[party_list[pno2]].x_range = p_dk[party_list[pno2]].x_range
                p_dk[party_list[pno]].x_range = p_dk[party_list[pno2]].x_range

    if len(party_list)>1:
        p[party_list[1]].x_range = p[party_list[0]].x_range                
                
    if dk:
        p[party_list[0]].x_range = p_dk[party_list[0]].x_range
        lay = layout( [[y for y in x] for x in list(zip(p.values(),p_dk.values()))] )
    else:
        lay = layout( list(p.values()) )



    show(lay)

    for party in party_list:
        print(party)
        display(outlier_dict[party])
        
    return df,df_rolling
In [11]:
colour_list = ["blue","red","purple", "olive","orange","brown", "pink","cyan","grey","green"]
In [ ]:
 

(1) immgEcon, immigCultural

very low, consistent DKs sample size high (big boost in last wave is completely new people, so retention takes a dive) they start in almost the same place, but immigEcon moves more (makes me wonder what happened before 2014 - always in lockstep?)

only a pity they didn't cover this in W9 right after the euref still, notable that the W7->W8 (pre-campaign -> campaign) shift is completely in line with the W8->W10 shift again, kind of a shame we don't see W5/W6

sharpest shift W7/W8 -> W10/W11

In [12]:
%%time
var_name = "immig"
title= "\n".join(["Do you think immigration is good or bad for Britain’s economy/cultural life?",
                  "Bad for economy/Undermines cultural life(0)-Good for economy/Enriches cultural life(1) (7 step scale)"])
specific_suffix_set = "(Econ|Cultural)"
col_wrap = 2
height   = 6

flat_df_num = plot_time_series(var_name=var_name,specific_suffix_set=specific_suffix_set,title=title,col_name="party",
                               col_wrap=col_wrap,treatment="immig_timeseries")
Wall time: 4min 33s
In [13]:
%%time

y_axis_label = "circles=daily-mean/line=moving average"
x_axis_label = "Response Date"


suff_list = specific_suffix_set.replace(")","").replace("(","").split("|")
color_dict = dict(zip(suff_list,colour_list))
title_dict = dict(zip(suff_list,["Do you think immigration is bad(0) or good(1) for Britain’s "+"("+x+")?" for x in suff_list]))

prominence=.15
width=20
                                 
df,df_rolling = bokeh_explorer_charts(x_axis_label,y_axis_label,color_dict,title_dict, var_name,
                    prominence=prominence,width=width)
#                                       ,rolling_win_type="BES")
#, rolling_win_type="BES")
Econ
prominences left_bases right_bases widths width_heights start stop
date
2014-06-23 18:00:00 0.197828 158 1674 70.104772 0.448665 2014-06-18 10:00:00 2014-06-24 00:00:00
2014-10-13 16:00:00 0.241922 1674 2285 162.373092 0.483020 2014-10-12 11:00:00 2015-03-09 16:00:00
2015-03-27 04:00:00 0.162197 2211 2285 24.879359 0.458001 2015-03-26 21:00:00 2015-03-27 22:00:00
2016-04-22 08:00:00 0.244154 1674 3684 43.543438 0.499778 2016-04-21 08:00:00 2016-04-23 09:00:00
2016-06-21 12:00:00 0.172447 3684 3940 22.333261 0.475694 2016-06-20 19:00:00 2016-06-21 18:00:00
2016-12-02 10:00:00 0.191437 1674 4218 28.855572 0.544315 2016-12-01 09:00:00 2016-12-02 14:00:00
2019-03-26 12:00:00 0.205456 1674 6340 43.011393 0.606291 2019-03-24 19:00:00 2019-03-26 15:00:00
Cultural
prominences left_bases right_bases widths width_heights start stop
date
2014-03-05 22:00:00 0.195328 159 567 54.776389 0.441493 2014-03-05 14:00:00 2014-03-07 20:00:00
2014-06-07 16:00:00 0.157039 567 925 43.763271 0.448976 2014-06-06 10:00:00 2014-06-08 12:00:00
2014-06-23 18:00:00 0.226755 159 1673 69.632059 0.428506 2014-06-18 11:00:00 2014-06-23 23:00:00
2014-10-04 08:00:00 0.172389 1330 1673 70.955799 0.437952 2014-10-03 15:00:00 2014-10-06 17:00:00
2014-10-13 15:00:00 0.282846 159 3188 160.855500 0.483045 2014-10-12 10:00:00 2015-03-09 14:00:00
2015-03-18 23:00:00 0.161570 1909 2099 21.315225 0.470873 2015-03-18 19:00:00 2015-03-19 17:00:00
2015-03-29 20:00:00 0.222114 2284 2492 30.863376 0.472214 2015-03-28 18:00:00 2015-03-30 01:00:00
2016-04-21 14:00:00 0.254030 2492 3188 43.406010 0.473962 2016-04-21 08:00:00 2016-04-23 09:00:00
2016-06-17 16:00:00 0.159920 3188 3939 26.188731 0.437263 2016-06-17 12:00:00 2016-06-18 14:00:00
2017-05-01 13:00:00 0.230598 3188 4606 42.556880 0.490538 2017-04-30 11:00:00 2017-05-02 05:00:00
2018-05-13 09:00:00 0.191158 3188 5147 32.755981 0.525044 2018-05-12 19:00:00 2018-05-14 04:00:00
2019-03-24 13:00:00 0.223871 159 6145 24.630744 0.548099 2019-03-23 18:00:00 2019-03-24 18:00:00
Wall time: 1min 3s
In [ ]:
 
In [ ]:
 

immigEcon

DEFINITE IMPACT

UNCLEAR

I don't buy any of the extra points that show up in immigCultural

In [ ]:
 

(2) immgSelf|Con|Lab|TIG|Brexit|Green|UKIP|PC|SNP|LD

Chart is a bit ugly because W12/13 only appear in a small/very small fraction of those cohorts (I think as top-up exercises) Bit heart-breaking, but probably need to be dropped for anything involving personal differences (/some painful model-based massive imputation to fill in all the gaps)

Scale of movement of immigSelf is ... sloooow - but steady (particularly when the W12/13 fractions are dropped) Re: parties - another one where only the Con chart is really interesting

In [14]:
%%time
var_name = "immig"

title= "\n".join(["Some people think that the UK should allow *many more* immigrants to come to the UK",
                  "to live andothers think that the UK should allow *many fewer* immigrants.",
                  "Where would you place yourself and the parties on this scale?",
                  "Many fewer (0) - Many more (10)"])
specific_suffix_set = "(Self|Con|Lab|TIG|Brexit|Green|UKIP|PC|SNP|LD)"
col_name="party"
col_wrap=3
height = 4

flat_df_num = plot_time_series(var_name=var_name,specific_suffix_set=specific_suffix_set,title=title,col_name="party",
                               col_wrap=col_wrap,treatment="immig_timeseries")
Wall time: 9min 9s
In [15]:
BES_Panel.drop( match(BES_Panel,"immig(Self|Con|Lab|TIG|Brexit|Green|UKIP|PC|SNP|LD)(W12|W13)").index, axis=1, inplace=True)
In [16]:
%%time
## now without those two waves


var_name = "immig"

title= "\n".join(["Some people think that the UK should allow *many more* immigrants to come to the UK",
                  "to live andothers think that the UK should allow *many fewer* immigrants.",
                  "Where would you place yourself and the parties on this scale?",
                  "Many fewer (0) - Many more (10)"])
specific_suffix_set = "(Self|Con|Lab|TIG|Green|UKIP|PC|SNP|LD)"
# drop Brexit because in only one wave
col_name="party"
col_wrap=3
height = 4

flat_df_num = plot_time_series(var_name=var_name,specific_suffix_set=specific_suffix_set,title=title,col_name="party",
                               col_wrap=col_wrap,treatment="immig_timeseries")
Wall time: 8min 27s
In [ ]:
 
In [ ]:
 
In [ ]:
 
In [17]:
%%time

y_axis_label = "circles=daily-mean/line=moving average"
x_axis_label = "Response Date"

suff_list = specific_suffix_set.replace(")","").replace("(","").split("|")
color_dict = dict(zip(suff_list,colour_list))
title_dict = dict(zip(suff_list,["Think the UK should allow many more/fewer immigrants?"+"("+x+")" for x in suff_list]))

df,df_rolling = bokeh_explorer_charts(x_axis_label,y_axis_label,color_dict,title_dict, var_name, rolling_win_type="BES")
Self
prominences left_bases right_bases widths width_heights start stop
date
2016-04-21 20:00:00 0.218459 24 1880 35.487238 0.320955 2016-04-21 09:00:00 2016-04-22 21:00:00
2016-05-20 11:00:00 0.156942 586 1257 20.480832 0.309937 2016-05-19 18:00:00 2016-05-20 14:00:00
Con
prominences left_bases right_bases widths width_heights start stop
date
Lab
prominences left_bases right_bases widths width_heights start stop
date
TIG
prominences left_bases right_bases widths width_heights start stop
date
Green
prominences left_bases right_bases widths width_heights start stop
date
2019-03-27 14:00:00 0.174589 414 3665 35.494537 0.673462 2019-03-26 09:00:00 2019-03-27 20:00:00
UKIP
prominences left_bases right_bases widths width_heights start stop
date
PC
prominences left_bases right_bases widths width_heights start stop
date
SNP
prominences left_bases right_bases widths width_heights start stop
date
LD
prominences left_bases right_bases widths width_heights start stop
date
2016-11-29 10:00:00 0.179545 416 2639 139.611242 0.623921 2016-07-04 10:00:00 2016-11-29 22:00:00
Wall time: 10min 57s
In [ ]:
 
In [ ]:
 
In [ ]:
 

(3) studentsMore|euMore|asylumMore|familiesMore|noneuMore

- consistent increase 8->13
- 7-8 sharper but less consistent (flat for eu, bump for noneu, neg for families)
In [18]:
%%time
var_name = ""

title= "\n".join(["Do you think that Britain should allow more or fewer of the",
                  "following kinds of people to come and live in Britain?",
                  "Many fewer (0) - Many more (10)"])
specific_suffix_set = "(studentsMore|euMore|asylumMore|familiesMore|noneuMore)"
col_name="party"
col_wrap = 3
height   = 5

flat_df_num = plot_time_series(var_name=var_name,specific_suffix_set=specific_suffix_set,title=title,col_name="party",
                               col_wrap=col_wrap,treatment="immig_timeseries")
Wall time: 2min 4s
In [19]:
%%time

y_axis_label = "circles=daily-mean/line=moving average"
x_axis_label = "Response Date"


suff_list = specific_suffix_set.replace(")","").replace("(","").split("|")
color_dict = dict(zip(suff_list,colour_list))
title_dict = dict(zip(suff_list,["Britain should allow more/fewer of the following kinds of people?"+"("+x+")" for x in suff_list]))

prominence=.15
width=20
                                 
df,df_rolling = bokeh_explorer_charts(x_axis_label,y_axis_label,color_dict,title_dict, var_name,prominence=prominence,width=width,)
#, rolling_win_type="BES")
studentsMore
prominences left_bases right_bases widths width_heights start stop
date
2016-04-22 09:00:00 0.224791 70 1587 41.879951 0.466643 2016-04-21 09:00:00 2016-04-23 08:00:00
euMore
prominences left_bases right_bases widths width_heights start stop
date
2016-04-22 10:00:00 0.174153 29 978 37.301631 0.391545 2016-04-21 10:00:00 2016-04-23
asylumMore
prominences left_bases right_bases widths width_heights start stop
date
2016-04-21 18:00:00 0.260395 11 1330 35.447252 0.387838 2016-04-21 10:00:00 2016-04-22 22:00:00
2016-05-16 10:00:00 0.178187 567 1330 23.699943 0.339899 2016-05-15 12:00:00 2016-05-16 11:00:00
familiesMore
prominences left_bases right_bases widths width_heights start stop
date
2016-04-22 10:00:00 0.21188 24 760 40.236017 0.425152 2016-04-21 10:00:00 2016-04-23 07:00:00
noneuMore
prominences left_bases right_bases widths width_heights start stop
date
2016-04-22 05:00:00 0.199518 60 597 34.188204 0.369299 2016-04-21 10:00:00 2016-04-22 22:00:00
Wall time: 34.3 s
In [ ]:
 

(4) (labHandleImmig|govtHandleImmig)

Supremely dull

In [20]:
# %%time
var_name = ""

title= "\n".join(["How well do you think The Present Government has handled",
                  "/A Labour Government would handle immigration?",
                  "Very Badly (0) - Very Well (1) (5pt scale)"])
specific_suffix_set = "(labHandleImmig|govtHandleImmig)"
col_name="party"
col_wrap=2
height = 6

flat_df_num = plot_time_series(var_name=var_name,specific_suffix_set=specific_suffix_set,title=title,col_name="party",
                               col_wrap=col_wrap,treatment="immig_timeseries")
In [21]:
%%time

y_axis_label = "circles=daily-mean/line=moving average"
x_axis_label = "Response Date"

color_dict= {"govtHandleImmig":"blue","labHandleImmig":"red"}
title_dict= {"govtHandleImmig":"How well do you think The Present Government has handled immigration? (5pt scale)",
             "labHandleImmig":"How well do you think The Labour Party would handle immigration? (5pt scale)"}
prominence=.15
width=20
df,df_rolling = bokeh_explorer_charts(x_axis_label,y_axis_label,color_dict,title_dict,var_name,
                                      prominence=prominence,width=width,rolling_win_type="BES")
govtHandleImmig
prominences left_bases right_bases widths width_heights start stop
date
labHandleImmig
prominences left_bases right_bases widths width_heights start stop
date
2015-03-25 11:00:00 0.158404 874 1866 140.166103 0.338546 2015-03-23 10:00:00 2015-03-29 18:00:00
Wall time: 34.6 s

(5) (immigrationLevel|immigContributeTake)

immigrationLevel in theory could be used to extend immigSelf (v similar question) - even though it's rather "meh"

immigContributeTake is a really important variable despite us only having it in two waves (and only a fraction of the cohort!) partly because it's the right two waves - and partly because it just crops up a lot as a driver

In [22]:
%%time
var_name = ""

title= "\n".join(["Do you think the number of immigrants from foreign countries who are permitted to come to the United Kingdom to live should be increased, decreased, or left the same as it is now?",
                  "Do you think people who come to live in Britain from other EU countries get more in benefits and services than they pay in taxes?",
                  "Decreased a lot/Get More Than They Pay (0) - Increase a lot/Pay More Than They Get (1)"
                  ])
specific_suffix_set = "(immigrationLevel|immigContributeTake)"
col_name="party"
col_wrap=2
height = 6

flat_df_num = plot_time_series(var_name=var_name,specific_suffix_set=specific_suffix_set,title=title,col_name="party",
                               col_wrap=col_wrap,treatment="immig_timeseries")
Wall time: 25.3 s
In [23]:
%%time

y_axis_label = "circles=daily-mean/line=moving average"
x_axis_label = "Response Date"

color_dict= {"immigrationLevel":"blue","immigContributeTake":"red"}
title_dict= {"immigrationLevel":"immigrationLevel (5pt scale)",
             "immigContributeTake":"immigContributeTake (7pt scale)"}
prominence=.15
width=20
df,df_rolling = bokeh_explorer_charts(x_axis_label,y_axis_label,color_dict,title_dict,var_name,
                                      prominence=prominence,width=width, rolling_win_type="BES")
immigrationLevel
prominences left_bases right_bases widths width_heights start stop
date
immigContributeTake
prominences left_bases right_bases widths width_heights start stop
date
2016-05-30 12:00:00 0.173569 22 1247 33.580120 0.226357 2016-05-29 23:00:00 2016-05-31 10:00:00
2016-06-18 03:00:00 0.183316 22 1457 33.414623 0.320638 2016-06-17 11:00:00 2016-06-18 22:00:00
Wall time: 37.6 s
In [ ]:
 
In [ ]:
 
In [ ]:
 

(6) (changeImmig|changeImmigLab|controlImmig)

changeImmig is a very long panel, decent coverage - sharp change around euref (shame it doesn't appear in W9) changeImmigLab - dull! controlImmig has a unfortunate problem the W8 campaign sample would be perfect but it's not a full sample and the question shifts quite a lot (there's three different versions, but probably big difference is between W8/W9+ - how much control does the UK have/would the UK have after ...)

In [24]:
%%time
var_name = ""

title= "\n".join(["Do you think that the level of immigration is getting higher, getting lower or staying about the same?",
                  "If there were a *Labour* UK government today, do you think that the level of immigration would be getting higher, getting lower or staying about the same?",
                  "How much control do you think Britain has/will have over immigration to the UK?"
                  ])
specific_suffix_set = "(changeImmig|changeImmigLab|controlImmig)"
col_name="party"
col_wrap=3
height = 6

flat_df_num = plot_time_series(var_name=var_name,specific_suffix_set=specific_suffix_set,title=title,col_name="party",
                               col_wrap=col_wrap,treatment="immig_timeseries")
Wall time: 4min 36s
In [25]:
%%time

y_axis_label = "circles=daily-mean/line=moving average"
x_axis_label = "Response Date"

color_dict= {"changeImmig":"blue","changeImmigLab":"red","controlImmig":"purple"}
title_dict= {"changeImmig":"changeImmig (5pt scale)",
             "changeImmigLab":"changeImmigLab (5pt scale)",
             "controlImmig":"controlImmig (5pt scale)",}
prominence=.15
width=20
df,df_rolling = bokeh_explorer_charts(x_axis_label,y_axis_label,color_dict,title_dict,var_name,
                                      prominence=prominence,width=width, rolling_win_type="BES")
changeImmig
prominences left_bases right_bases widths width_heights start stop
date
2014-03-01 06:00:00 0.162767 111 1092 103.321170 0.739169 2014-02-27 02:00:00 2014-03-03 12:00:00
2014-06-28 10:00:00 0.177232 111 1736 43.133125 0.740932 2014-06-25 16:00:00 2014-09-19 19:00:00
2015-03-13 10:00:00 0.157735 1736 2100 22.715402 0.764143 2015-03-13 04:00:00 2015-03-14 02:00:00
2016-04-16 13:00:00 0.222764 1736 4830 113.528181 0.756433 2015-03-30 13:00:00 2016-04-18 18:00:00
2016-05-29 17:00:00 0.164539 3125 4830 114.580724 0.784998 2016-05-25 15:00:00 2016-05-30 11:00:00
changeImmigLab
prominences left_bases right_bases widths width_heights start stop
date
controlImmig
prominences left_bases right_bases widths width_heights start stop
date
Wall time: 5min 23s
In [26]:
# I'll have to check that glitch in Retention out later
In [ ]:
 

(5) (immigrantsWelfareState|effectsEUImmigration|euPriorityBalance)

immigrantsWelfareState - this variable is super important (long profile, good sample size and it crops up in lots of regressions)

effectsEUImmigration - "Do you think the following would be higher, lower or about the same if the UK leaves the European Union?" important since this is an actual explicit cause and effect in relation to the UK leaving the EU (and not, say, a Con government being elected/perception of UK politicians)

euPriorityBalance - reflects the slow but continuous positive shift in UK attitudes to Freedom of Movement

In [27]:
%%time
var_name = ""

title= "\n".join(["Immigrants are a burden on the welfare state (Strongly Agree 0 - Strongly Disagree 1)",
                  "Do you think the immigration to the UK would be higher, lower or about the same if the UK leaves the European Union? (Much Lower 0 - Much Higher 1)",
                  "When Britain negotiates to leave the EU is it more important for the UK governmentto protect Britain's access to the single market",
                  "or to gain full control of immigration? (Control Immigration 0 - Access to Single Market 1: 11pt scale)",
                  "Is it good/bad/neither/DK for Britain: Allowing the free movement of workers within Europe?"])
specific_suffix_set = "(immigrantsWelfareState|effectsEUImmigration|euPriorityBalance|ukCoopMovement)"
col_name="party"
col_wrap=2
height = 6

flat_df_num = plot_time_series(var_name=var_name,specific_suffix_set=specific_suffix_set,title=title,col_name="party",
                               col_wrap=col_wrap,treatment="immig_timeseries")
Wall time: 5min 9s
In [28]:
%%time

y_axis_label = "circles=daily-mean/line=moving average"
x_axis_label = "Response Date"

color_dict= {"immigrantsWelfareState":"blue","effectsEUImmigration":"red","euPriorityBalance":"purple","ukCoopMovement":"orange"}
title_dict= {"immigrantsWelfareState":"Immigrants are a burden on the welfare state (5pt scale)",
             "effectsEUImmigration":"Would immigration be higher, lower or about the same if the UK leaves the EU? (5pt scale)",
             "euPriorityBalance":"Brexit priority: access to single market versus controlling immigration? (11pt scale)",
             "ukCoopMovement":"IS allowing the free movement of workers within Europe Good/Bad/Neither (3pt scale)",}
prominence=.15
width=20
df,df_rolling = bokeh_explorer_charts(x_axis_label,y_axis_label,color_dict,title_dict,var_name,
                                      prominence=prominence,width=width, rolling_win_type="BES")
immigrantsWelfareState
prominences left_bases right_bases widths width_heights start stop
date
2014-10-13 14:00:00 0.226811 1664 2355 156.032161 0.370942 2014-10-12 13:00:00 2015-03-10 08:00:00
2015-03-29 08:00:00 0.157662 2219 2355 37.553824 0.380031 2015-03-28 19:00:00 2015-03-30 11:00:00
2016-04-22 07:00:00 0.213845 1664 3643 40.751673 0.381239 2016-04-21 10:00:00 2016-04-23 07:00:00
2016-06-17 11:00:00 0.155551 3643 3914 49.897803 0.374465 2016-06-17 04:00:00 2016-06-19 06:00:00
effectsEUImmigration
prominences left_bases right_bases widths width_heights start stop
date
euPriorityBalance
prominences left_bases right_bases widths width_heights start stop
date
2016-12-04 00:00:00 0.203610 77 1405 35.853250 0.456127 2016-12-03 21:00:00 2016-12-05 09:00:00
2019-03-26 11:00:00 0.206577 77 2997 63.057096 0.532902 2019-03-23 21:00:00 2019-03-26 13:00:00
ukCoopMovement
prominences left_bases right_bases widths width_heights start stop
date
2016-04-21 23:00:00 0.287932 24 838 41.697208 0.456773 2016-04-21 10:00:00 2016-04-23 08:00:00
2016-05-19 11:00:00 0.150096 617 838 45.668867 0.405256 2016-05-18 16:00:00 2016-05-20 14:00:00
2016-12-04 18:00:00 0.156966 838 1868 35.303180 0.512320 2016-12-03 21:00:00 2016-12-05 08:00:00
2019-03-14 21:00:00 0.185314 24 2155 20.423779 0.618436 2019-03-14 12:00:00 2019-03-15 11:00:00
2019-03-24 10:00:00 0.181684 24 2345 59.077759 0.664294 2019-03-23 21:00:00 2019-03-26 09:00:00
Wall time: 6min 7s

(6) responsibleImmig(Conservatives|LibDems|Labour|ScottishGovt|WelshGovt|None|DontKnow)

interesting, because it directly follows from changeImmig questions ... ... and yet the trends are also deeply boring

In [29]:
%%time
var_name = "responsibleImmig"

title= "\n".join(["Thinking about the changes in the levels of immigration you just described.",
                  "Who do you think these are the result of?",
                  "*Please tick all that apply for each issue.* No (0) - Yes (1)"])
specific_suffix_set = "(Conservatives|LibDems|Labour|ScottishGovt|WelshGovt|None|DontKnow)"
col_name="party"
col_wrap=4
height = 4

flat_df_num = plot_time_series(var_name=var_name,specific_suffix_set=specific_suffix_set,title=title,col_name="party",
                               col_wrap=col_wrap,treatment="immig_timeseries")
Wall time: 2min 22s
In [30]:
%%time

y_axis_label = "circles=daily-mean/line=moving average"
x_axis_label = "Response Date"


suff_list = specific_suffix_set.replace(")","").replace("(","").split("|")
color_dict = dict(zip(suff_list,colour_list))
title_dict = dict(zip(suff_list,["Who is responsible for the changes in lvls of imm. you just described?"+"("+x+")" for x in suff_list]))

prominence=.15
width=20
                                 
df,df_rolling = bokeh_explorer_charts(x_axis_label,y_axis_label,color_dict,title_dict, var_name,prominence=prominence,width=width,
                                      rolling_win_type="BES")
Conservatives
prominences left_bases right_bases widths width_heights start stop
date
2014-03-02 22:00:00 0.173094 3 325 29.186120 0.558342 2014-03-02 14:00:00 2014-03-03 20:00:00
2014-03-07 18:00:00 0.171883 355 472 53.745515 0.537766 2014-03-06 20:00:00 2014-03-09 05:00:00
2014-06-12 15:00:00 0.200028 617 1077 172.232044 0.545628 2014-06-10 19:00:00 2014-06-21 09:00:00
2014-09-23 01:00:00 0.179098 1077 1207 22.183882 0.574276 2014-09-22 08:00:00 2014-09-23 06:00:00
2014-10-04 13:00:00 0.181721 1376 1662 62.280578 0.563591 2014-10-03 19:00:00 2014-10-06 14:00:00
2015-03-14 19:00:00 0.214889 1662 2013 32.073802 0.568246 2015-03-13 13:00:00 2015-03-14 21:00:00
LibDems
prominences left_bases right_bases widths width_heights start stop
date
2015-03-19 17:00:00 0.160901 1887 2150 23.334999 0.236207 2015-03-19 09:00:00 2015-03-20 09:00:00
Labour
prominences left_bases right_bases widths width_heights start stop
date
2014-03-01 05:00:00 0.158536 85 869 42.246292 0.369637 2014-02-27 19:00:00 2014-03-01 14:00:00
2014-09-20 08:00:00 0.218919 869 1853 43.080345 0.348890 2014-09-19 21:00:00 2014-09-22 09:00:00
2014-10-04 06:00:00 0.171675 1430 1786 20.001813 0.340804 2014-10-03 15:00:00 2014-10-04 14:00:00
2015-03-20 11:00:00 0.159236 1951 2246 24.799880 0.335152 2015-03-19 17:00:00 2015-03-20 18:00:00
2015-03-25 16:00:00 0.163634 2079 2246 53.143233 0.309294 2015-03-25 14:00:00 2015-03-27 19:00:00
ScottishGovt
prominences left_bases right_bases widths width_heights start stop
date
WelshGovt
prominences left_bases right_bases widths width_heights start stop
date
None
prominences left_bases right_bases widths width_heights start stop
date
2014-10-13 14:00:00 0.211954 1347 2124 39.725892 0.187046 2014-10-12 21:00:00 2014-10-14 16:00:00
DontKnow
prominences left_bases right_bases widths width_heights start stop
date
2014-03-05 21:00:00 0.169584 21 466 20.631810 0.130538 2014-03-05 18:00:00 2014-03-06 14:00:00
2014-10-15 08:00:00 0.200756 1221 1818 102.798351 0.153986 2014-10-13 12:00:00 2014-10-19 11:00:00
Wall time: 4min 5s

(7) achieveReduceImmig(Brexit|Con|Grn|LD|Lab|None|PC|SNP|TIG|UKIP)

Wow - charts for LD/Green are not broken - there are some Yes's - just very few Con pattern interesting - the way it ever-so-briefly kisses UKIP before collapsing back into the GE UKIP is trending down even before the euref

In [31]:
%%time
var_name = "achieveReduceImmig"

title= "\n".join(["Would any of the following political parties be successful",
                  " – if elected/if they were in government – ",
                  "in reducing the level of immigration? No (0) - Yes (1)"])
specific_suffix_set = "(Brexit|Con|Grn|LD|Lab|None|PC|SNP|TIG|UKIP)"
col_name="party"
col_wrap=5
height = 4

flat_df_num = plot_time_series(var_name=var_name,specific_suffix_set=specific_suffix_set,title=title,col_name="party",
                               col_wrap=col_wrap,treatment="immig_timeseries")
Wall time: 12min 19s
In [32]:
%%time

y_axis_label = "circles=daily-mean/line=moving average"
x_axis_label = "Response Date"


suff_list = specific_suffix_set.replace(")","").replace("(","").split("|")
color_dict = dict(zip(suff_list,colour_list))
title_dict = dict(zip(suff_list,["Would this party be succ in reducing imm if elected/in gov?"+"("+x+")" for x in suff_list]))

prominence=.15
width=20
                                 
df,df_rolling = bokeh_explorer_charts(x_axis_label,y_axis_label,color_dict,title_dict, var_name,prominence=prominence,width=width,
                                      rolling_win_type="BES")
Brexit
prominences left_bases right_bases widths width_heights start stop
date
Con
prominences left_bases right_bases widths width_heights start stop
date
2016-04-21 15:00:00 0.209184 598 822 31.776587 0.181705 2016-04-21 07:00:00 2016-04-22 16:00:00
2016-11-29 16:00:00 0.205389 598 1378 20.510823 0.261882 2016-11-28 21:00:00 2016-11-29 17:00:00
2017-04-27 07:00:00 0.430267 598 3876 50.727947 0.294923 2017-04-25 20:00:00 2017-04-27 22:00:00
2019-11-04 19:00:00 0.185657 3988 4053 25.281295 0.390335 2019-11-04 05:00:00 2019-11-05 06:00:00
Grn
prominences left_bases right_bases widths width_heights start stop
date
LD
prominences left_bases right_bases widths width_heights start stop
date
Lab
prominences left_bases right_bases widths width_heights start stop
date
None
prominences left_bases right_bases widths width_heights start stop
date
2015-03-19 01:00:00 0.205897 132 455 21.560851 0.316700 2015-03-18 14:00:00 2015-03-19 12:00:00
2019-03-28 10:00:00 0.154471 3392 3612 36.058889 0.420713 2019-03-27 23:00:00 2019-05-24 08:00:00
2019-06-05 17:00:00 0.256391 3612 3921 22.369614 0.377062 2019-06-05 14:00:00 2019-06-06 12:00:00
2019-06-10 21:00:00 0.191768 3807 3921 27.269617 0.340330 2019-06-09 20:00:00 2019-06-10 23:00:00
2019-06-14 11:00:00 0.250957 3921 4149 24.880853 0.377377 2019-06-13 13:00:00 2019-11-01 14:00:00
2019-11-15 19:00:00 0.214728 3921 4546 20.224036 0.405374 2019-11-15 14:00:00 2019-11-16 10:00:00
2019-12-05 19:00:00 0.192982 4690 4759 20.568785 0.402222 2019-12-05 12:00:00 2019-12-06 08:00:00
PC
prominences left_bases right_bases widths width_heights start stop
date
SNP
prominences left_bases right_bases widths width_heights start stop
date
TIG
prominences left_bases right_bases widths width_heights start stop
date
UKIP
prominences left_bases right_bases widths width_heights start stop
date
2016-04-16 18:00:00 0.173138 522 629 22.735714 0.478115 2016-04-16 07:00:00 2016-04-17 06:00:00
2016-04-22 17:00:00 0.289909 654 748 43.806588 0.501444 2016-04-21 05:00:00 2016-04-23 07:00:00
2016-05-02 18:00:00 0.310596 748 1403 23.338862 0.499951 2016-05-02 11:00:00 2016-05-03 10:00:00
2016-11-28 15:00:00 0.212138 1253 1403 21.371855 0.431656 2016-11-27 20:00:00 2016-11-28 17:00:00
2018-05-13 09:00:00 0.291711 1861 3740 20.246971 0.376817 2018-05-12 22:00:00 2018-05-13 18:00:00
Wall time: 3min 4s
In [ ]:
 

(8) (conPriorities_immig|labPriorities_immig|ldPriorities_immig|ukipPriorities_immig)

low sample size even at peak bump around 2015 GE

In [ ]:
 
In [33]:
%%time
var_name = ""

title= "\n".join(["Is immigration one of the most important issues do you think are most important to each of the following political parties?",
                  "*Please tick all the issues that are important to each party*. No (0) - Yes (1)"])
specific_suffix_set = "(conPrioritiesImmig|labPrioritiesImmig|ldPrioritiesImmig|ukipPrioritiesImmig)"
col_name="party"
col_wrap=2
height = 6

flat_df_num = plot_time_series(var_name=var_name,specific_suffix_set=specific_suffix_set,title=title,col_name="party",
                               col_wrap=col_wrap,treatment="immig_timeseries")
Wall time: 1min 11s
In [34]:
%%time

y_axis_label = "circles=daily-mean/line=moving average"
x_axis_label = "Response Date"


suff_list = specific_suffix_set.replace(")","").replace("(","").split("|")
color_dict = dict(zip(suff_list,colour_list))
title_dict = dict(zip(suff_list,["Is immigration one of the most important issues for this party?"+"("+x+")" for x in suff_list]))

prominence=.15
width=20
                                 
df,df_rolling = bokeh_explorer_charts(x_axis_label,y_axis_label,color_dict,title_dict, var_name,prominence=prominence,width=width,)
#, rolling_win_type="BES")
conPrioritiesImmig
prominences left_bases right_bases widths width_heights start stop
date
2014-02-23 08:00:00 0.164038 58 124 24.015383 0.365426 2014-02-22 13:00:00 2014-02-23 13:00:00
2014-03-05 12:00:00 0.218123 124 452 20.661767 0.382222 2014-03-04 20:00:00 2014-03-05 17:00:00
2014-06-24 08:00:00 0.256415 124 1700 55.298603 0.388589 2014-06-23 18:00:00 2014-09-19 15:00:00
2014-09-25 10:00:00 0.157249 871 928 20.413067 0.363968 2014-09-24 19:00:00 2014-09-25 17:00:00
2014-10-03 11:00:00 0.150705 928 1196 35.984171 0.371775 2014-10-02 09:00:00 2014-10-04 05:00:00
2014-10-14 11:00:00 0.173396 928 1368 107.205387 0.369923 2014-10-12 09:00:00 2015-03-09 15:00:00
2016-06-30 21:00:00 0.223491 2150 2290 41.235409 0.438604 2016-06-30 07:00:00 2016-07-02 09:00:00
2019-11-01 17:00:00 0.270753 1700 2383 30.198445 0.430493 2016-07-04 08:00:00 2019-11-02 08:00:00
labPrioritiesImmig
prominences left_bases right_bases widths width_heights start stop
date
2014-06-24 08:00:00 0.188955 445 1895 24.014004 0.220975 2014-06-24 05:00:00 2014-06-25 06:00:00
2016-06-29 11:00:00 0.202710 1895 2485 28.154937 0.212848 2016-06-28 12:00:00 2016-06-29 17:00:00
ldPrioritiesImmig
prominences left_bases right_bases widths width_heights start stop
date
2014-06-04 10:00:00 0.189975 452 1791 177.480153 0.192215 2014-06-01 10:00:00 2014-06-22 04:00:00
2014-09-26 15:00:00 0.160636 847 1522 40.336193 0.162988 2014-09-25 21:00:00 2014-09-27 18:00:00
ukipPrioritiesImmig
prominences left_bases right_bases widths width_heights start stop
date
Wall time: 18.2 s

(9) dealPriority(MarketAccess|NIUKBorder|NIEireBorder|TradeDeal|RedTape|Immig)

okay - no signficant changes W15->W16 only thing of note - how low "control immigration" (or, indeed, RedTape) stands relative to the other priorities (suspect this is largely because of Leave/Remain split)

In [35]:
%%time
var_name = "dealPriority"

title= "\n".join(["How would you rate the importance of these different aspects of a deal to leave the European Union?",
                  "Not important at all (0) - Extremely important (1) (5pt scale)"])
specific_suffix_set = "(MarketAccess|NIUKBorder|NIEireBorder|TradeDeal|RedTape|Immig)"
col_name="party"
col_wrap=3
height = 6

flat_df_num = plot_time_series(var_name=var_name,specific_suffix_set=specific_suffix_set,title=title,col_name="party",
                               col_wrap=col_wrap,treatment="immig_timeseries")
Wall time: 1min 48s
In [36]:
# %%time

y_axis_label = "circles=daily-mean/line=moving average"
x_axis_label = "Response Date"


suff_list = specific_suffix_set.replace(")","").replace("(","").split("|")
color_dict = dict(zip(suff_list,colour_list))
title_dict = dict(zip(suff_list,["How would you rate the imp. of diff. aspects of a Deal?"+"("+x+")" for x in suff_list]))

prominence=.15
width=20
                                 
df,df_rolling = bokeh_explorer_charts(x_axis_label,y_axis_label,color_dict,title_dict, var_name,prominence=prominence,width=width,
                                      rolling_win_type="BES")
MarketAccess
prominences left_bases right_bases widths width_heights start stop
date
NIUKBorder
prominences left_bases right_bases widths width_heights start stop
date
NIEireBorder
prominences left_bases right_bases widths width_heights start stop
date
TradeDeal
prominences left_bases right_bases widths width_heights start stop
date
RedTape
prominences left_bases right_bases widths width_heights start stop
date
Immig
prominences left_bases right_bases widths width_heights start stop
date
2019-11-04 16:00:00 0.164373 331 1123 48.943061 0.656663 2019-11-04 12:00:00 2019-11-06 14:00:00
In [37]:
# date, dealPriority_wts, dealPriority_dk       
In [38]:
# %debug

(9) negotiationSpecifics(ContinuePayingMoney|AcceptingFoM|AcceptingEUReg|Nothing)

Feels like there's one transition W9 -> W10 where a chunk of people go from "no concessions necessary" -> "some concessions necessary"

Then one more transition W14 -> W15 where another chunk of people reconsider some of those concessions (FoM/Paying money) Notable that AcceptingEUReg doesn't waver. Something to do with perception of weakness of May/strength of ERG as perceived by people reading tabloids?

In [39]:
%%time
var_name = "negotiationSpecifics"

title= "\n".join(["When Britain negotiates leaving terms with the EU, what will it take for the",
                  "UK government to get unrestricted access to EU markets for British business?",
                  "Tick all that apply No (0) Yes (1)"])
specific_suffix_set = "(ContinuePayingMoney|AcceptingFoM|AcceptingEUReg|Nothing)"
col_name="party"
col_wrap=2
height = 6

flat_df_num = plot_time_series(var_name=var_name,specific_suffix_set=specific_suffix_set,title=title,col_name="party",
                               col_wrap=col_wrap,treatment="immig_timeseries")
Wall time: 2min 42s
In [40]:
%%time

y_axis_label = "circles=daily-mean/line=moving average"
x_axis_label = "Response Date"


suff_list = specific_suffix_set.replace(")","").replace("(","").split("|")
color_dict = dict(zip(suff_list,colour_list))
title_dict = dict(zip(suff_list,["What must the UK give to get Single Market access?"+"("+x+")" for x in suff_list]))

prominence=.15
width=20
                                 
df,df_rolling = bokeh_explorer_charts(x_axis_label,y_axis_label,color_dict,title_dict, var_name,prominence=prominence,width=width,
                                      rolling_win_type="BES")
ContinuePayingMoney
prominences left_bases right_bases widths width_heights start stop
date
2016-12-01 22:00:00 0.220345 42 510 24.271097 0.632896 2016-12-01 13:00:00 2016-12-02 13:00:00
2018-05-08 13:00:00 0.154172 1079 1367 23.234228 0.648062 2018-05-08 11:00:00 2018-05-09 10:00:00
2018-05-15 15:00:00 0.233658 1079 1723 54.848336 0.646121 2018-05-14 11:00:00 2018-05-16 17:00:00
AcceptingFoM
prominences left_bases right_bases widths width_heights start stop
date
2016-06-29 19:00:00 0.313684 4 1671 38.429302 0.634547 2016-06-29 15:00:00 2016-07-01 05:00:00
2017-06-15 07:00:00 0.160224 757 1021 25.540785 0.649847 2017-06-14 13:00:00 2017-06-15 14:00:00
2017-06-18 09:00:00 0.282839 508 1671 68.693528 0.626830 2017-06-17 19:00:00 2017-06-20 18:00:00
2017-06-23 12:00:00 0.169663 1112 1388 55.703045 0.640186 2017-06-21 20:00:00 2018-05-04 15:00:00
2018-05-14 13:00:00 0.223759 1388 1671 30.403371 0.644496 2018-05-13 16:00:00 2018-05-14 22:00:00
AcceptingEUReg
prominences left_bases right_bases widths width_heights start stop
date
2016-06-29 21:00:00 0.159383 118 181 21.106045 0.635074 2016-06-29 18:00:00 2016-06-30 15:00:00
2016-07-02 19:00:00 0.218467 118 511 30.082567 0.622116 2016-07-02 11:00:00 2016-07-03 18:00:00
2017-04-24 11:00:00 0.159284 511 648 22.406626 0.610675 2016-12-09 16:00:00 2017-04-24 11:00:00
2018-05-08 07:00:00 0.178472 1102 1274 20.679046 0.609319 2018-05-07 20:00:00 2018-05-08 17:00:00
2018-05-14 14:00:00 0.180619 1102 1680 26.718444 0.623872 2018-05-13 20:00:00 2018-05-14 23:00:00
Nothing
prominences left_bases right_bases widths width_heights start stop
date
Wall time: 4min 26s
In [ ]:
 
In [ ]:
 

(9) benefitsToMigrants

Thinking of people coming to live in Great Britain, when do you think they should obtain the same rights to social benefits and services as citizens already living here?

wow - superflat does this period cover the time when Cameron was trying to negotiate this? Even afterwards, it seems weird for it to be so very very flat

In [41]:
%%time
var_name = ""

title= "\n".join(["Thinking of people coming to live in Great Britain, when do you think they should",
                  "obtain the same rights to social benefits and services as citizens already living here?",
                  "Immediately (0) - Never (1)",
                  ])
specific_suffix_set = "(benefitsToMigrants)"
col_name="party"
col_wrap=1
height = 5

flat_df_num = plot_time_series(var_name=var_name,specific_suffix_set=specific_suffix_set,title=title,col_name="party",
                               col_wrap=col_wrap,treatment="immig_timeseries")
Wall time: 14.6 s
In [42]:
%%time

y_axis_label = "circles=daily-mean/line=moving average"
x_axis_label = "Response Date"

color_dict= {"benefitsToMigrants":"blue"}
title_dict= {"benefitsToMigrants":"When should immigrants get access to UK benefits?"}

df,df_rolling = bokeh_explorer_charts(x_axis_label,y_axis_label,color_dict,title_dict, var_name, rolling_win_type="BES")
benefitsToMigrants
prominences left_bases right_bases widths width_heights start stop
date
Wall time: 27.9 s
In [43]:
BES_Panel["benefitsToMigrantsW8"].value_counts()
Out[43]:
After they have worked and paid taxes for at least four years    15612
After they become a British citizen                               7810
Never                                                             3960
After living in GB for a year                                     2403
Don't know                                                        2293
Immediately on arrival                                            1424
Name: benefitsToMigrantsW8, dtype: int64
In [ ]:
 
In [ ]:
 
In [ ]:
 
In [ ]:
 
In [ ]:
 
In [ ]:
 

Comments

comments powered by Disqus