In [44]:

import warnings
warnings.filterwarnings('ignore')

from IPython.display import display
from IPython.display import HTML
import IPython.core.display as di # Example: di.display_html('<h3>%s:</h3>' % str, raw=True)

# This line will hide code by default when the notebook is exported as HTML
di.display_html('<script>jQuery(function() {if (jQuery("body.notebook_app").length == 0) { jQuery(".input_area").toggle(); jQuery(".prompt").toggle();}});</script>', raw=True)

# This line will add a button to toggle visibility of code blocks, for use with the HTML export version
di.display_html('''<button onclick="jQuery('.input_area').toggle(); jQuery('.prompt').toggle();">Toggle code</button>''', raw=True)

In [2]:

dataset_name = "W16_comb"
df_list = [ "BES_Panel", ]

In [3]:

%matplotlib inline

import numpy as np, pandas as pd, matplotlib.pyplot as plt, seaborn as sns
import pickle, os, gc, re

sns.set();
sns.set_palette("colorblind")

from IPython.display import display, display_html, HTML
from IPython.core.debugger import set_trace
# plt.rcParams["axes.grid"] = False

import Jupyter_module_loader
from utility import *
import gaussian_kde

import warnings
warnings.filterwarnings('ignore')

import holoviews as hv
from holoviews import opts

encoding = "ISO-8859-1"

In [4]:

# you should clone this git to a subdirectory called 'BES_analysis_code' (in some directory - I call it BES_analysis - doesn't matter though)
# %matplotlib inline
(BES_code_folder, BES_small_data_files, BES_data_folder,
 BES_output_folder, BES_file_manifest, BES_R_data_files) = setup_directories()

global BES_Panel, BES_numeric, BES_reduced, BES_reduced_with_na, BES_non_numeric
data_subfolder = BES_data_folder + dataset_name + os.sep

(manifest, dataset_filename, dataset_description, dataset_citation,
 dataset_start, dataset_stop, dataset_wave) = get_manifest(dataset_name, BES_file_manifest)

for df in df_list:
    if df=="BES_Panel":
#         globals()[df]  = pd.read_msgpack(data_subfolder + dataset_filename.replace('.dta','.msgpack'))
        globals()[df]  = pd.read_pickle(data_subfolder + dataset_filename.replace('.dta','.zip'),compression='zip')
    else:
        globals()[df]  = pd.read_msgpack(data_subfolder + df + '.msgpack' )
        globals()[df].replace(-1,np.nan,inplace=True)
  
# (var_type, cat_dictionary, new_old_col_names, old_new_col_names) = get_small_files(data_subfolder, encoding)

# get full set of inferred "cross wave" auth-lib/left-right values and ages
pan_dataset_allr_values = pd.read_csv(BES_small_data_files + "pan_dataset_allr_values"+".csv")
pan_dataset_ages = pd.read_csv( BES_small_data_files + "pan_dataset_ages"+".csv" )

In [5]:

from bokeh.models import ColumnDataSource
from bokeh.plotting import figure, show,  output_notebook
from bokeh.layouts import column, row, layout
from bokeh.embed import components
from bokeh.models import HoverTool
from bokeh.models import Span, Label
output_notebook()
from datetime import timedelta

Loading BokehJS ...

In [6]:

BES_Panel = pd.read_pickle("..\\BES_analysis_data\\"+"W19_comb"+os.sep+"BES2019_W19_Panel_v0.1.zip",compression='zip')

Step One: Locate the period of the Major Shift In Sentiment¶

Find all the directly-immigration-related sentiment variables
personal preferences for immigration levels (immigrationLevel|immigSelf|(types)More)
personal perception of impact of immigration (immigEcon|immigCultural|immigContibuteTake|ImmigrantsWelfareState|ukCoopMovement)
personal expectations about immigration levels/control (effectsEUImmigration|changeImmig|controlImmig)
perception of parties/(gov/opp) preference/ability to/responsibility for changing/reducing immig/prioritisation of immig (immig(parties)/responsibleImmig(parties)/achieveReduceImmig/(party)Priorities_immig)
preference/perception of immigration priority in EU negotiations (euPriorityBalance|dealPriority|negotiationSpecifics)
not sure where "benefitsToMigrants" fits in - not really an impact/level/expectation
but I would expect it to correlate with immigContributeTake/ImmigrantsWelfareState

In [ ]:

In [7]:

# %%time
# Fix up all the datasets we'll be using (make sure all categories correctly/consistently ordered)
immig_var_stub = ["immigEcon","immigCultural","immigSelf","immigrationLevel","immigContributeTake",
                  "immigrantsWelfareState","controlImmig","effectsEUImmigration","euPriorityBalance",
                  "changeImmig","changeImmigLab","govtHandleImmig","labHandleImmig",
                  "asylumMore","euMore","noneuMore","studentsMore","familiesMore",
                  "responsibleImmig","achieveReduceImmig",
                  "conPriorities_immig","labPriorities_immig","ldPriorities_immig","ukipPriorities_immig",
                  "dealPriority","ukCoopMovement","negotiationSpecifics","benefitsToMigrants"]

fix_cat_dict = {}
# not sure about best way to line up "expectation" variables
fix_cat_dict['changeImmig']=['Getting a lot lower', 'Getting a little lower','Staying about the same',
                             'Getting a little higher', 'Getting a lot higher',  "Don't know" ]
fix_cat_dict['effectsEUImmigration'] = ['Much lower', 'Lower', 'About the same', 'Higher', 'Much higher', "Don't know"]
fix_cat_dict['controlImmig']=['No control at all', 'A little control', 'Some control', 'A lot of control', 'Complete control', "Don't know"]

# lets try to get every axis pointing in the same "sentiment" direction
fix_cat_dict['immigEcon']=['Bad for economy','2', '3', '4', '5', '6', 'Good for economy',  "Don't know"]
fix_cat_dict['immigCultural']=['Undermines cultural life', '2', '3', '4', '5', '6', 'Enriches cultural life', "Don't know" ]
fix_cat_dict["immig(Self|Con|Lab|TIG|Brexit|Green|UKIP|PC|SNP|LD)W"] = ['Allow many fewer', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'Allow many more', "Don't know"]
fix_cat_dict['(students|eu|asylum|families|noneu)More'] = ['Many fewer', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'Many more', "Don't know"]
fix_cat_dict['immigrantsWelfareState'] = ['Strongly agree', 'Agree', 'Neither agree nor disagree', 'Disagree', 'Strongly disagree', "Don't know"]
fix_cat_dict['immigContributeTake'] = ['Get more than they pay', '2.0', '3.0', '4.0',
                                       '5.0', '6.0', 'Pay more than they get', "Don't know"]
fix_cat_dict['euPriorityBalance'] = ['Control immigration', '9', '8', '7', '6', '5', '4', '3', '2', '1', 'Access to the single market', "Don't know"]
fix_cat_dict['immigrationLevel'] = ['Decreased a lot', 'Decreased a little', 'Left the same as it is now',
                                    'Increased a little', 'Increased a lot',  "Don't know"]
fix_cat_dict['ukCoopMovement'] = ["Bad for Britain", "Neither good nor bad for Britain", "Good for Britain", "Don't know"]
fix_cat_dict['benefitsToMigrants'] = ['Immediately on arrival','After living in GB for a year',
                                      'After they have worked and paid taxes for at least four years',
                                      'After they become a British citizen', 'Never', "Don't know"]



# fix_cat_dict['immigrantsWelfareState'] = ['Strongly disagree', 'Disagree',  'Neither agree nor disagree','Agree', 'Strongly agree',"Don't know", ]
# mostly dealPrioritImmig, but might be worth setting alongside the other dealPriority variables
fix_cat_dict['dealPriority'] = ['Not important at all', 'Not very important', 'Somewhat important',
                                'Very important', 'Extremely important' , "Don't know"]

# fix_cat_dict['euPriorityBalance'] = ['Access to the single market', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'Control immigration', "Don't know"]
fix_cat_dict['(lab|govt)HandleImmig'] = ['Very badly', 'Fairly badly', 'Neither well nor badly', 'Fairly well', 'Very well', "Don't know"]
fix_cat_dict['responsibleImmig(Conservatives|LibDems|Labour|ScottishGovt|WelshGovt|None|DontKnow)'] = ["No","Yes","Don't know"]
fix_cat_dict['achieveReduceImmig(Brexit|Con|Grn|LD|Lab|None|PC|SNP|TIG|UKIP)'] = ["No","Yes","Don't know"]

fix_cat_dict["(con|lab|ld|ukip)Priorities_immig"] = ["No","Yes","Don't know"]
# my time plotting function assumes variable names won't contain _ - quick hack to circumvent having to change that
BES_Panel[[x.replace("_i","I") for x in search(BES_Panel,"Priorities_immig").index]] = BES_Panel[[x for x in search(BES_Panel,"Priorities_immig").index]]

# my time plotting function assumes variable names won't contain _ - quick hack to circumvent having to change that
BES_Panel[[x.replace("_1","ContinuePayingMoney").replace("_2","AcceptingFoM").replace("_3","AcceptingEUReg").replace("_none","Nothing") for x in search(BES_Panel,"negotiationSpecifics").index]] = BES_Panel[[x for x in search(BES_Panel,"negotiationSpecifics").index]]

fix_cat_dict["negotiationSpecifics"] = ["No","Yes","Don't know"]

def cat_2_num_drop_dk(x):
    if x.dtype.name=='category':
        return x.replace("Don't know",np.nan).cat.remove_unused_categories().cat.codes.replace(-1,np.nan).astype('UInt16')
    else:
        return x

def fix_cats(fix_cat_dict):
    for key in fix_cat_dict.keys():
        BES_Panel[ match(BES_Panel, key ).index ] = BES_Panel[ match(BES_Panel,key ).index ]\
            .apply( lambda x: x.cat.set_categories( fix_cat_dict[key], ordered =True ) )
    
fix_cats(fix_cat_dict)

def weighted_mean(x, **kws):
    val, weight = map(np.asarray, zip(*x))
    val, weight = val[~np.isnan(val)],weight[~np.isnan(val)]
    return (val * weight).sum() / weight.sum()

# max_wave = int(re.match("W(\d+)_",dataset_name).groups()[0])
max_wave = 19
num_to_wave = {x:"W"+str(x) for x in range(1,max_wave+1)}
wts_for_wave = { "W"+str(y):[x for x in BES_Panel.columns.sort_values(ascending=False) if re.match("wt_(new|full)_W"+str(y)+"(_result)?"+"$",x)][0] for y in range(1,max_wave+1) }
wave_to_date = BES_file_manifest[BES_file_manifest["Only_or_Combined"]=="Only"][["Wave No","Date_Start"]].set_index("Wave No")["Date_Start"]
wave_to_date[17] = "Nov-19"
wave_to_date[18] = "Nov-19"
wave_to_date[19] = "Dec-19"

# fix endtimeW3 bug!
BES_Panel.loc[BES_Panel["endtimeW3"]=='1970-01-01 00:00:00',"endtimeW3"] = \
    BES_Panel[BES_Panel["endtimeW3"]=='1970-01-01 00:00:00']["starttimeW3"].values

# this is also suspect - date not unreasonable, but overlaps with a different wave! (3 days between start/end)
# 41222   2015-03-27 18:11:37.047
# Name: starttimeW5, dtype: datetime64[ns]
BES_Panel.loc[BES_Panel["starttimeW5"]=='2015-03-27 18:11:37.047000064',"starttimeW5"] = \
    BES_Panel[BES_Panel["starttimeW5"]=='2015-03-27 18:11:37.047000064']["endtimeW5"].values

# still some overlap between waves 4 and 5

midpoint_dict = {}
startpoint_dict = {}
endpoint_dict = {}
# create correct midpoints (technically we should weight these!)
n = 1
min_sample_size = 100
for wave_no in range(1,max_wave+1):
    wave = "W"+str(wave_no)
#     print(wave)

    BES_Panel["midpoint"+wave] = pd.qcut(BES_Panel["endtime"+wave]+((BES_Panel["endtime"+wave]-BES_Panel["starttime"+wave])/2),n)
    date_cats_dict = {BES_Panel["midpoint"+wave].cat.categories[x]:(BES_Panel["midpoint"+wave].cat.categories[x].left+ (BES_Panel["midpoint"+wave].cat.categories[x].right - BES_Panel["midpoint"+wave].cat.categories[x].left)/2).strftime("%Y-%m-%d") for x in range(n)}
    BES_Panel["midpoint"+wave] = pd.to_datetime(BES_Panel["midpoint"+wave].replace(date_cats_dict))  
    BES_Panel["endpoint"+wave] = pd.to_datetime(BES_Panel["endtime"+wave]).max() 
    BES_Panel["startpoint"+wave] = pd.to_datetime(BES_Panel["starttime"+wave]).min() 
    
    BES_Panel["midtime"+wave] = (BES_Panel["starttime"+wave]+(BES_Panel["endtime"+wave]-BES_Panel["starttime"+wave])/2)
#     BES_Panel["midtime"+wave] = BES_Panel["midtime"+wave].apply(lambda x:x.replace(second=0, microsecond=0, nanosecond=0))


    startpoint_dict[wave] = BES_Panel["startpoint"+wave].dropna().values[0]
    endpoint_dict[wave] = BES_Panel["endpoint"+wave].dropna().values[0]
    midpoint_dict[wave] = BES_Panel["midpoint"+wave].dropna().values[0]

In [8]:

def time_series(var_name,title,subtract_var=False,retain_var=True,specific_dates=True, specific_suffix_set="([a-zA-Z]*)",
                use_midpoints=False,col_name="party",dk_str="Don't know",max_y_size=10.0,min_waves_included=2,max_y_size_dict=None,max_wave=19):

    whole_wave_dk_average = True
    df=pd.DataFrame()
    df2 =pd.DataFrame()
    df_wts =pd.DataFrame()
    df_dates = pd.DataFrame()
    
    if subtract_var:
        title=title+"\n(where respondents place parties relative to their own preference set at 0.5)"

    wave_list = []
    redist_vars = pd.Series([re.match(var_name+specific_suffix_set+"($|W\d+)",x).groups()[0] for x in BES_Panel.columns\
                             if re.match(var_name+specific_suffix_set+"($|W\d+)",x)]).value_counts()
    redist_vars = redist_vars[redist_vars>=min_waves_included].index

   
    
    for subj in redist_vars:
        for wave in ["W"+str(x) for x in range(1,max_wave+1)]:
            if var_name+subj+wave not in BES_Panel.columns:
                continue
            else:
                wave_list.append(wave)
            if max_y_size_dict:
                max_y_size = max_y_size_dict[subj]
#             df[var_name+"num_"+subj+"_"+wave] = zip(BES_Panel[var_name+subj+wave].replace(dk_str,np.nan).cat.codes.replace(-1,np.nan)/max_y_size,
#                                                  BES_Panel[var_name+subj+wave].apply(lambda x: x==dk_str if pd.notnull(x) else np.nan),
#                                                  BES_Panel[wts_for_wave[wave]]
#                                                    )
            
            df[var_name+"num_"+subj+"_"+wave] = BES_Panel[var_name+subj+wave].replace(dk_str,np.nan).cat.codes.replace(-1,np.nan)/max_y_size

            
            df2[var_name+"dk_"+subj+"_"+wave] = BES_Panel[var_name+subj+wave]==dk_str
            df2[var_name+"dk_"+subj+"_"+wave][BES_Panel[var_name+subj+wave].isnull()] = np.nan

           
            
    for wave in pd.unique(wave_list):        
        df_wts["wt_"+wave] = BES_Panel[wts_for_wave[wave]]
        
        if use_midpoints:
            df_dates["dt_"+wave] = BES_Panel["midpoint"+wave]
        else:
            df_dates["dt_"+wave] = (BES_Panel["starttime"+wave]+(BES_Panel["endtime"+wave]-BES_Panel["starttime"+wave])/2)
#             .apply(lambda x:x.date())

#     set_trace()
#     df_wts = df_wts[df.notnull().any(axis=1)]
#     df = df[df.notnull().any(axis=1)]

    df2.columns = df2.columns.str.split('_', expand=True)
    df2 = df2.stack(dropna=False)\
                .reset_index()\
                .rename(columns={'level_1':"wave"})\
                .rename(columns={'level_0':"id"}) 

    df_wts.columns = df_wts.columns.str.split('_', expand=True)
    df_wts = df_wts.stack(dropna=False)\
                .reset_index()\
                .rename(columns={'level_1':"wave"})\
                .rename(columns={'level_0':"id"}) 

    df_dates.columns = df_dates.columns.str.split('_', expand=True)
    df_dates = df_dates.stack(dropna=False)\
                .reset_index()\
                .rename(columns={'level_1':"wave"})\
                .rename(columns={'level_0':"id"}) 

    df.columns = df.columns.str.split('_', expand=True)
    df = df.stack(dropna=False)\
            .reset_index()\
            .rename(columns={'level_1':"wave"})\
            .rename(columns={'level_0':"id"}) 

    content_columns = [(var_name+"dk",x) for x in redist_vars]
    df["wt"] = df_wts["wt"]
    df[content_columns] = df2[content_columns]
    df["date"] = df_dates["dt"]
    content_columns = [(var_name+"num",x) for x in redist_vars]+[(var_name+"dk",x) for x in redist_vars]
    # only keep rows with content (variable values/dks)

    df = df[df[content_columns].notnull().any(axis=1)]

#     df = df.loc[ df[[x for x in df.columns if var_name+"num" in x]].notnull().any(axis=1) ]
    df.loc[:,"wt"] = df.loc[:,"wt"].fillna(1.0).values
    temp_ind_name = "temp_index"

#     if specific_dates:
#         df["date"] = df[["id","wave"]].merge(right=df_dates,
#                  how="left",left_on=["id","wave"],right_on=["id","wave"])["dt"].values

    df[temp_ind_name] = list(zip(df["wave"],df["wt"],df["date"],df["id"]))
    df = df.set_index(temp_ind_name).drop(["id","wave","wt","date"],axis=1)



    if subtract_var:
        if retain_var:
            focal_vars = [x for x in df.columns if (var_name+"num" in x) and (subtract_var not in x)]
            df[focal_vars] = df[focal_vars].apply(lambda x: x-df[(var_name+"num",  subtract_var)])+0.5
        else:
            focal_vars = [x for x in df.columns if var_name+"num" in x]
            df[focal_vars] = df[focal_vars].apply(lambda x: x-df[(var_name+"num",  subtract_var)])+0.5
            df.drop((var_name+"num",  subtract_var),axis=1,inplace=True)

#     df2["wt"] = df_wts["wt"]

#     df2 = df2.loc[ df2[[x for x in df2.columns if var_name+"dk" in x]].notnull().any(axis=1) ]
#     df2.loc[:,"wt"] = df2.loc[:,"wt"].fillna(1.0).values
#     if specific_dates:
#         df2["date"] = df2[["id","wave"]].merge(right=df_dates,
#                  how="left",left_on=["id","wave"],right_on=["id","wave"])["dt"].values

#     temp_ind_name = "temp_index"
#     df2[temp_ind_name] = list(zip(df2["wave"],df2["wt"],df2["date"]))
#     df2 = df2.set_index(temp_ind_name).drop(["id","wave","wt","date"],axis=1)


    flat_df_num = df.stack().reset_index().rename(columns={'level_1':col_name})

    if specific_dates:
        flat_df_num["wave"] = flat_df_num[temp_ind_name].apply(lambda x:x[0])    
    else:
        flat_df_num["wave"] = flat_df_num[temp_ind_name].apply(lambda x:int(x[0].split("W")[1]))

    flat_df_num["wt"]   = flat_df_num[temp_ind_name].apply(lambda x:x[1])
    flat_df_num["date"] = flat_df_num[temp_ind_name].apply(lambda x:x[2])
    flat_df_num["id"]   = flat_df_num[temp_ind_name].apply(lambda x:x[3])
    
    
    
    flat_df_num.drop(temp_ind_name,axis=1,inplace=True)
    flat_df_num[col_name] = flat_df_num[col_name].astype('category')

    flat_df_num[var_name+"_wts"] = list(zip(flat_df_num[var_name+"num"],flat_df_num["wt"]))
    flat_df_num[var_name+"_dk"] = list(zip(flat_df_num[var_name+"dk"],flat_df_num["wt"]))

    
#     flat_df_dk = df2.stack().reset_index().rename(columns={'level_1':col_name,0:"dk"})
#     if specific_dates:
#         flat_df_dk["wave"] = flat_df_dk[temp_ind_name].apply(lambda x:x[0])    
#     else:
#         flat_df_dk["wave"] = flat_df_dk[temp_ind_name].apply(lambda x:int(x[0].split("W")[1]))

#     flat_df_dk["wt"]   = flat_df_dk[temp_ind_name].apply(lambda x:x[1])
#     flat_df_dk["date"] = flat_df_dk[temp_ind_name].apply(lambda x:x[2])
    
#     flat_df_dk.drop(temp_ind_name,axis=1,inplace=True)
#     flat_df_dk[var_name+"dk"] = flat_df_dk[var_name+"dk"].astype('int')
    
#     if whole_wave_dk_average:
#     # calculating weighted total-wave dk average
#         flat_df_dk = flat_df_dk.groupby(["wave",col_name]).apply(lambda x: (x["wt"]*x[var_name+"dk"]).sum()/x["wt"].sum() ).reset_index().rename(columns={0:"dk"})
#         flat_df_num = flat_df_num.merge(right=flat_df_dk[[col_name,"wave","dk"]],how='left',left_on=[col_name,"wave"],right_on=[col_name,"wave"])        
#     else:
    
#         flat_df_dk["dk"+"_wts"] = list(zip(flat_df_dk[var_name+"dk"],flat_df_num["wt"]))
    

#     flat_df_num = flat_df_num.merge(right=flat_df_dk[[col_name,"wave","dk"]],how='left',left_on=[col_name,"wave"],right_on=[col_name,"wave"])
    if not specific_dates:
        flat_df_num["date"] = flat_df_num["wave"].apply(lambda x: wave_to_date[x])
        flat_df_num["date"] = pd.to_datetime(flat_df_num["wave"] , format="%b-%y")

#     raise Exception
        
    return flat_df_num, df, df2,

In [ ]:

In [9]:

def sample_size(x, **kws):
    global max_sample_size    
    return len(x)/max_sample_size

def between_wave_retention(*args, **kwargs):
    global position_in_var_list
    var_name = kwargs['var_name'][position_in_var_list]
    position_in_var_list = position_in_var_list+1
    color = kwargs['color']
    label = kwargs['label']
    alpha = kwargs['alpha']
    df = BES_Panel[match(BES_Panel,var_name+"($|W\d+)").index].apply(lambda x: x.cat.codes.replace(-1,np.nan)).diff(axis=1).notnull().sum()/match(BES_Panel,var_name+"($|W\d+)")
    waves_present = {"W"+x.split("W")[-1]:BES_Panel["midpointW"+x.split("W")[-1]].dropna().values[0] for x in df.index}    
    df.index = [BES_Panel["midpointW"+x.split("W")[-1]].dropna().values[0] for x in df.index]
    df.drop(df.index[0],inplace=True)
    sns.lineplot(data=df,color=color,label=label,alpha=alpha)

    for wave in waves_present.keys():
        plt.text(x=waves_present[wave],y=0,s=wave, rotation=90, fontsize=12)   
        
def plot_time_series(var_name,specific_suffix_set,title,col_name,col_wrap,treatment):
    dk_str="Don't know"
    lowest_alpha=0.3
    low_alpha=0.5    
    date_dict = {"22 May 2014":"EE","7 May 2015":"GE","23 June 2016":"EU","8 June 2017":"GE","23 May 2019":"EE"}
    date_col_dict = {"EE":'green',"GE":'red',"EU":'blue'}    
    
    max_y_size_dict = {x: len(BES_Panel[match(BES_Panel,var_name+x+"($|W\d+)").index[0]].cat.remove_categories(dk_str).cat.categories)-1 for x in specific_suffix_set.replace("(","").replace(")","").split("|") }
    # max_y_size=len(BES_Panel[ match(BES_Panel,var_name+specific_suffix_set+"($|W\d+)").index[0] ].cat.remove_categories(dk_str).cat.categories)-1,
    flat_df_num, df, df2, = time_series(var_name,title,specific_suffix_set=specific_suffix_set,col_name="party",
                                max_y_size_dict = max_y_size_dict,
                                dk_str=dk_str,min_waves_included=2,use_midpoints=False)
    
#     raise Exception
    flat_df_num["wave_midpoint"] = flat_df_num["wave"].apply( lambda x: midpoint_dict[x] )
    flat_df_num["wave_startpoint"] = flat_df_num["wave"].apply( lambda x: startpoint_dict[x] )
    flat_df_num["wave_endpoint"] = flat_df_num["wave"].apply( lambda x: endpoint_dict[x] )
    
    

    global max_sample_size
    global position_in_var_list
    position_in_var_list=0
#     max_sample_size = flat_df_num["dk"].value_counts().max()
    max_sample_size = flat_df_num.groupby(["party","wave"]).apply(lambda x: len(x)).max()


    g = sns.FacetGrid(data=flat_df_num, col=col_name, col_wrap=col_wrap, ylim=(0.0, 1.0), legend_out=False, height=height);
    
#     raise Exception
    
    g.map(sns.lineplot, "wave_midpoint",var_name+"_wts", estimator=weighted_mean,ci=None, label="Mean answer",
          err_style="bars", markers=True, dashes=False);
    g.map(sns.lineplot, "wave_midpoint",var_name+"_dk", color='r', estimator=weighted_mean,ci=None,
          err_style="bars", label="DK fraction",markers=True, dashes=False, alpha=low_alpha);
    g.map(sns.lineplot, "wave_midpoint","wave", color='g', estimator=sample_size,ci=None,
          label="Sample Size\n(% of max: "+str(max_sample_size)+")",markers=True, dashes=False, alpha=low_alpha);
    g.map(between_wave_retention, "wave_midpoint","wave", color='k', var_name=[var_name+x for x in g.col_names],
          label="Retention",markers=True, dashes=False, alpha=low_alpha);
    

    for date in date_dict.keys():
        if flat_df_num["wave_midpoint"].min()<pd.to_datetime(date) and flat_df_num["wave_midpoint"].max()>pd.to_datetime(date):
            g.map(plt.axvline, x=pd.to_datetime(date), ls='--', c=date_col_dict[date_dict[date]], linewidth=1,
                  alpha=lowest_alpha, label=date_dict[date] )
            
    g.map(plt.axhline, y=0.5, ls='-.', c='grey',linewidth=1, alpha=lowest_alpha)

    g.add_legend().set_ylabels("").set_titles(col_template="{col_name}")
    g.fig.suptitle(title, y=1.0+0.03*len(title.split("\n")));
    [plt.setp(ax.get_xticklabels(), rotation=45) for ax in g.axes.flat]

    output_subfolder = create_subdir(BES_output_folder, treatment)
    g.savefig(output_subfolder +clean_filename(var_name +specific_suffix_set)+ ".png", bbox_inches='tight')
    return flat_df_num

In [10]:

from scipy.signal import find_peaks
from bokeh.models import Span
from datetime import datetime
import pytz

# def rolling_av(party, var_name):

#     df_rolling = flat_df_num[flat_df_num["party"]==party].sort_values(by='date').copy().set_index('date')
# # df_rolling = df_rolling["immignum"].rolling(400,center=True).mean()
#     df_rolling = df_rolling[var_name].dropna().rolling(500,center=True,win_type='blackmanharris').mean()
# # win_type='gaussian').sum(std=3)
# # df_rolling = df_rolling["immignum"].rolling('1h',min_periods=10).mean()
#     return df_rolling

def weighted_mean_for_rolling(processed_dataframe_windowed):
    return np.average(a=processed_dataframe_windowed,weights=processed_dataframe_windowed.index)

def rolling_av_exerimental( party, var_name, win_type='blackmanharris',window_size=500 ):
    
    df_rolling = flat_df_num[flat_df_num["party"]==party].sort_values(by='date').copy()
    min_sample_size=200
    temp_dates = pd.qcut( df_rolling["date"], 
            int(np.floor(df_rolling["date"].notnull().sum()/(min_sample_size))) ).apply(lambda x: x.left+(x.right-x.left)/2)

    return df_rolling.groupby(temp_dates)[var_name].agg( lambda x: weighted_mean(x) )

def rolling_av( party, var_name, win_type='blackmanharris',window_size=250 ):

    df_rolling = flat_df_num[flat_df_num["party"]==party].sort_values(by='date').copy()
    fmt = "%Y-%m-%d:%H"

    # slow!
    if win_type=="BES":
        
        # this is a really dumb way to get the right date index!
        
        date_ind = df_rolling.set_index("date")[var_name].dropna().rolling(window_size,
            center=True).mean().index
#         print(date_ind.shape)
        #df_rolling["date"].copy().values
#         df_rolling.index = flat_df_num[flat_df_num["party"]==party].sort_values(by='date').copy()["date"].values
#         raise Exception
        df_rolling = df_rolling.set_index("wt")[[var_name,'date']].dropna().rolling(window_size,
            on='date',center=True)[var_name].apply(weighted_mean_for_rolling, raw=False)
#         print(df_rolling.shape)
        df_rolling.index = date_ind
#         flat_df_num[flat_df_num["party"]==party].sort_values(by='date').copy()["date"].values
#         return temp
    else:
        df_rolling = df_rolling.set_index("date")[var_name].dropna().rolling(window_size,center=True,win_type=win_type).mean()       


    df_rolling.index = [x.strftime(fmt) for x in df_rolling.index]
    df_rolling.reset_index().groupby('index').mean()
    df_rolling = df_rolling.reset_index().groupby('index').mean().dropna()
    df_rolling.index = [pd.to_datetime(x,format=fmt) for x in df_rolling.index]
    df_rolling.index.name='date'
    return df_rolling[var_name]
    

def date_fraction(x,df_temp):
    
    floor_x = int( np.floor(x) )
    return df_temp.index[floor_x]+(x-floor_x)*(df_temp.index[floor_x+1]-df_temp.index[floor_x])


def detect_peaks(df_rolling,prominence,width):

    df_temp = df_rolling.copy().dropna()
    peaks, properties = find_peaks(df_temp, prominence=prominence, width=width)
    properties["prominences"], properties["widths"]

    x= df_temp.index[peaks]
    y= df_temp[peaks].values

    outliers = pd.DataFrame(properties)
    outliers.index = x

    outliers["left_ips"]  = outliers["left_ips"].apply(lambda x: date_fraction(x,df_temp))
    outliers["right_ips"] = outliers["right_ips"].apply(lambda x: date_fraction(x,df_temp))

    aggregate_to_the_hour_fmt = "%Y-%m-%d:%H"
    aggregate_to_the_day_fmt = "%Y-%m-%d"
    fmt = aggregate_to_the_hour_fmt

    outliers["right_ips"] = outliers["right_ips"].apply(lambda x:pd.to_datetime(x.strftime(fmt),format=fmt) if pd.notnull(x) else np.nan)
    outliers["left_ips"] = outliers["left_ips"].apply(lambda x:pd.to_datetime(x.strftime(fmt),format=fmt) if pd.notnull(x) else np.nan)

    outliers.index = outliers.reset_index()["date"].apply(lambda x:pd.to_datetime(x.strftime(fmt),format=fmt) if pd.notnull(x) else np.nan)
    outliers = outliers.rename(columns = {"left_ips":"start","right_ips":"stop"})

    return x,y,outliers

def get_bokeh_source(flat_df_num,min_sample_size=100,var_name=""):

    flat_df_bokeh = flat_df_num.copy()

    # aggregate dates to the level you wish
    aggregate_to_the_hour_fmt = "%Y-%m-%d:%H"
    aggregate_to_the_day_fmt = "%Y-%m-%d"
    fmt = aggregate_to_the_day_fmt
    flat_df_bokeh["date"] = pd.to_datetime(flat_df_bokeh["date"])
    flat_df_bokeh["shortdate"] = pd.to_datetime(flat_df_bokeh["date"].apply(lambda x:x.strftime(fmt) if pd.notnull(x) else np.nan),
                                        format=fmt)

    flat_df_bokeh["date"] = list(zip(flat_df_bokeh["date"].apply(lambda x: x.timestamp() if pd.notnull(x) else np.nan),
                                     flat_df_num["wt"])) # turn into numeric timestamp
    
    df = flat_df_bokeh.groupby(['shortdate','party'])["date",var_name+"_wts",var_name+"_dk"].agg( lambda x: weighted_mean(x) ).reset_index()
    df["date"] = df["date"].apply(lambda x: datetime.fromtimestamp(x,tz=pytz.timezone('GMT')) if pd.notnull(x) else np.nan  ) # turn back from timestamp
    
    df["N"] = flat_df_bokeh.groupby(['shortdate','party']).count().reset_index()[var_name+"num"]
    df["wt_mean"] = flat_df_bokeh.groupby(['shortdate','party']).mean().reset_index()["wt"]*100/2 # "natural" position at 50%
    df["wt_std"] = flat_df_bokeh.groupby(['shortdate','party']).std().reset_index()["wt"]*100/3
    df["dk"] = df[var_name+"_dk"]*100 # % less confusing than fraction
    df["stdev"] = flat_df_bokeh.groupby(['shortdate','party'])[var_name+"num"].std().reset_index()[var_name+"num"]
    df = df[ df["N"]>=min_sample_size ]
    # plot in bokeh so we can explore!
    df.index = df["date"]
    df = df.rename(columns={var_name+"_wts":var_name+"num"}) # ,"shortdate":"date"
    df = df.pivot(index = "date",columns="party", values=[var_name+'num', 'dk', 'N', 'stdev','wt_mean','wt_std'])
    df.columns = ["_".join(x).strip("_") for x in df.columns]
    df = df.reset_index()
    party_list = flat_df_bokeh["party"].unique()
    
    return df,party_list

from bokeh.models import BoxAnnotation

def bokeh_time(dtstr):
#     return pd.to_datetime(dtstr).value / 1e6
      return pd.to_datetime(dtstr).timestamp() * 1000

def bokeh_explorer_charts(x_axis_label,y_axis_label,color_dict,title_dict,var_name,x='date',min_sample_size=0,
                          plot_width=490,plot_height=300,dk=True,
                          tools='box_select,pan,xwheel_zoom,box_zoom,reset',active_drag='pan',active_scroll='xwheel_zoom',
                          prominence=.15,width=20,rolling_win_type = 'blackmanharris',window_size=250):


    y_axis_label_dk = "&tri=daily-mean-wt/squ=daily-std-wt"
    wave_gap_days_max =2
    lowest_alpha=0.3
    low_alpha=0.5    
    date_dict = {"22 May 2014":"EE","7 May 2015":"GE","23 June 2016":"EU",
                 "8 June 2017":"GE","23 May 2019":"EE","21 April 2016":"OB",
                 "9 October 2014":"CL","12 December 2019":"GE"}
    date_col_dict = {"EE":'green',"GE":'red',"EU":'blue',"OB":'pink'}        
    
    df,party_list =  get_bokeh_source(flat_df_num, min_sample_size=min_sample_size, var_name=var_name)
    party_list = list( color_dict.keys() )    
    for party in party_list:
        df["scaled_N_"+party] = df["N_"+party].apply(lambda x: np.log2(x+1))/2
    source = ColumnDataSource( df )    


    p = {}
    p_dk = {}
    tool_dict = {}
    outlier_dict = {}
#     rolling_win_type = 'BES'

    
    

    for party in party_list:

        title = title_dict[party]
        color = color_dict[party]
        y = var_name+'num_'+party
        
# hours = ['%Hh', '%H:%M']

# days = ['%m/%d', '%a%d']

# months = ['%m/%Y', '%b %Y']

# years = ['%Y']        

        tool_dict[party]=\
            HoverTool(names=['daily_aggregates'],tooltips = [
                      ('Date','@date{%d/%m/%y}'),
            #                               ('Date','@wave'),
                      ('Mean','@'+var_name+'num_'+party),
                      ('N','@N_'+party),
                      ("SD",'@stdev_'+party),
                      ("DK%",'@dk_'+party),
                     ],
                formatters={'date': 'datetime'},
                # display a tooltip whenever the cursor is vertically in line with a glyph
#                 mode='vline'
                     )


        p[party]= figure(x_axis_type="datetime", x_axis_label = x_axis_label,
                    y_axis_label = y_axis_label,
                    plot_width = plot_width, plot_height=plot_height,
                    title = title, tools = tools, active_drag = active_drag, active_scroll=active_scroll)
        p[party].title.text_font_size = '8pt'

        df_rolling = rolling_av( party, var_name+"num" ,win_type=rolling_win_type,window_size=window_size )
        inferred_wave_gaps = (df_rolling.reset_index()["date"].diff()>timedelta(days=wave_gap_days_max)).astype('int').cumsum()

        for inf_wave in range(0,inferred_wave_gaps.max()+1):   
            rolling_source = ColumnDataSource(pd.DataFrame(df_rolling[df_rolling.index[inferred_wave_gaps==inf_wave]]))
            p[party].line(x,var_name+"num", source=rolling_source, color=color)


        x_out,y_out,outliers = detect_peaks(df_rolling,prominence,width)
        p[party].x(x_out,y_out,color = 'green',size=20, alpha=0.5)

#         y = var_name+"_wts"
        p[party].circle(x,y, source=source, color=color,name='daily_aggregates', size='scaled_N_'+party,
                        fill_alpha=0.0,line_width=1)
        p[party].add_tools(tool_dict[party])
        outliers.name = party
        outlier_dict[party] = outliers

         
                             

        if dk:
            df_rolling = rolling_av( party, var_name+"dk",win_type=rolling_win_type,window_size=window_size )*100
#             rolling_source = ColumnDataSource(pd.DataFrame(df_rolling))

            p_dk[party]= figure(x_axis_type="datetime",x_axis_label = x_axis_label,
                            y_axis_label=y_axis_label_dk,
                            plot_width = plot_width, plot_height=plot_height,
                            title="".join(title.split(" (")[:-1]+[" (% Don't know)"]),
                            tools=tools, active_drag=active_drag, active_scroll=active_scroll)
            p_dk[party].title.text_font_size = '8pt'
#             p_dk[party].scatter('date', var_name+"_dk", source=rolling_source, color=color, name='daily_aggregates')
            p_dk[party].circle('date', 'dk_'+party, source=source, color=color, name='daily_aggregates',
                                size='scaled_N_'+party, fill_alpha=0.0,line_width=1)
            p_dk[party].triangle('date', 'wt_mean_'+party, source=source, color=color, 
                                size='scaled_N_'+party, fill_alpha=0.0,line_width=1)    
            p_dk[party].square('date', 'wt_std_'+party, source=source, color=color, 
                                size='scaled_N_'+party, fill_alpha=0.0,line_width=1) 
            
            p_dk[party].add_tools( tool_dict[party] )

            inferred_wave_gaps = (df_rolling.reset_index()["date"].diff()>timedelta(days=wave_gap_days_max)).astype('int').cumsum()
            for inf_wave in range(0,inferred_wave_gaps.max()+1):   
                rolling_source = ColumnDataSource(pd.DataFrame(df_rolling[df_rolling.index[inferred_wave_gaps==inf_wave]]))
                p_dk[party].line(x,var_name+"dk", source=rolling_source, color=color)

            x_out,y_out,outliers = detect_peaks(df_rolling,prominence*100,width)
            p_dk[party].x(x_out,y_out,color = 'green',size=20, alpha=0.5)
   
        for date in date_dict.keys():       
            
            if flat_df_num["wave_startpoint"].min()<pd.to_datetime(date) and flat_df_num["wave_endpoint"].max()>pd.to_datetime(date):
                
                my_label = Label(x=pd.to_datetime(date), y=200, y_units='screen', text=date_dict[date])
                p[party].add_layout(my_label) 
            
                box_left = pd.to_datetime(date)
                box_right = pd.to_datetime(date)+timedelta(days=1)
                if date_dict[date]=="OB":
                    box_right = box_right+timedelta(days=1)
                box = BoxAnnotation(left=box_left, right=box_right,
                                    line_width=1, line_color='black', line_dash='dashed',
                                    fill_alpha=0.2, fill_color='orange')

                p[party].add_layout(box)   
                if dk:
                    p_dk[party].add_layout(my_label) 
                    p_dk[party].add_layout(box) 

    for pno in range(0,len(party_list)):
        for pno2 in range(0,len(party_list)):
            if pno==pno2:
                continue
            p[party_list[pno]].x_range = p[party_list[pno2]].x_range
            p[party_list[pno2]].x_range = p[party_list[pno]].x_range

            if dk:
                p[party_list[pno2]].x_range = p_dk[party_list[pno2]].x_range
                p_dk[party_list[pno]].x_range = p_dk[party_list[pno2]].x_range

    if len(party_list)>1:
        p[party_list[1]].x_range = p[party_list[0]].x_range                
                
    if dk:
        p[party_list[0]].x_range = p_dk[party_list[0]].x_range
        lay = layout( [[y for y in x] for x in list(zip(p.values(),p_dk.values()))] )
    else:
        lay = layout( list(p.values()) )



    show(lay)

    for party in party_list:
        print(party)
        display(outlier_dict[party])
        
    return df,df_rolling

In [11]:

colour_list = ["blue","red","purple", "olive","orange","brown", "pink","cyan","grey","green"]

In [ ]:

(1) immgEcon, immigCultural¶

very low, consistent DKs sample size high (big boost in last wave is completely new people, so retention takes a dive) they start in almost the same place, but immigEcon moves more (makes me wonder what happened before 2014 - always in lockstep?)

only a pity they didn't cover this in W9 right after the euref still, notable that the W7->W8 (pre-campaign -> campaign) shift is completely in line with the W8->W10 shift again, kind of a shame we don't see W5/W6

sharpest shift W7/W8 -> W10/W11

In [12]:

%%time
var_name = "immig"
title= "\n".join(["Do you think immigration is good or bad for Britain’s economy/cultural life?",
                  "Bad for economy/Undermines cultural life(0)-Good for economy/Enriches cultural life(1) (7 step scale)"])
specific_suffix_set = "(Econ|Cultural)"
col_wrap = 2
height   = 6

flat_df_num = plot_time_series(var_name=var_name,specific_suffix_set=specific_suffix_set,title=title,col_name="party",
                               col_wrap=col_wrap,treatment="immig_timeseries")

Wall time: 4min 33s

In [13]:

%%time

y_axis_label = "circles=daily-mean/line=moving average"
x_axis_label = "Response Date"


suff_list = specific_suffix_set.replace(")","").replace("(","").split("|")
color_dict = dict(zip(suff_list,colour_list))
title_dict = dict(zip(suff_list,["Do you think immigration is bad(0) or good(1) for Britain’s "+"("+x+")?" for x in suff_list]))

prominence=.15
width=20
                                 
df,df_rolling = bokeh_explorer_charts(x_axis_label,y_axis_label,color_dict,title_dict, var_name,
                    prominence=prominence,width=width)
#                                       ,rolling_win_type="BES")
#, rolling_win_type="BES")

Econ

	prominences	left_bases	right_bases	widths	width_heights	start	stop
date
2014-06-23 18:00:00	0.197828	158	1674	70.104772	0.448665	2014-06-18 10:00:00	2014-06-24 00:00:00
2014-10-13 16:00:00	0.241922	1674	2285	162.373092	0.483020	2014-10-12 11:00:00	2015-03-09 16:00:00
2015-03-27 04:00:00	0.162197	2211	2285	24.879359	0.458001	2015-03-26 21:00:00	2015-03-27 22:00:00
2016-04-22 08:00:00	0.244154	1674	3684	43.543438	0.499778	2016-04-21 08:00:00	2016-04-23 09:00:00
2016-06-21 12:00:00	0.172447	3684	3940	22.333261	0.475694	2016-06-20 19:00:00	2016-06-21 18:00:00
2016-12-02 10:00:00	0.191437	1674	4218	28.855572	0.544315	2016-12-01 09:00:00	2016-12-02 14:00:00
2019-03-26 12:00:00	0.205456	1674	6340	43.011393	0.606291	2019-03-24 19:00:00	2019-03-26 15:00:00

Cultural

	prominences	left_bases	right_bases	widths	width_heights	start	stop
date
2014-03-05 22:00:00	0.195328	159	567	54.776389	0.441493	2014-03-05 14:00:00	2014-03-07 20:00:00
2014-06-07 16:00:00	0.157039	567	925	43.763271	0.448976	2014-06-06 10:00:00	2014-06-08 12:00:00
2014-06-23 18:00:00	0.226755	159	1673	69.632059	0.428506	2014-06-18 11:00:00	2014-06-23 23:00:00
2014-10-04 08:00:00	0.172389	1330	1673	70.955799	0.437952	2014-10-03 15:00:00	2014-10-06 17:00:00
2014-10-13 15:00:00	0.282846	159	3188	160.855500	0.483045	2014-10-12 10:00:00	2015-03-09 14:00:00
2015-03-18 23:00:00	0.161570	1909	2099	21.315225	0.470873	2015-03-18 19:00:00	2015-03-19 17:00:00
2015-03-29 20:00:00	0.222114	2284	2492	30.863376	0.472214	2015-03-28 18:00:00	2015-03-30 01:00:00
2016-04-21 14:00:00	0.254030	2492	3188	43.406010	0.473962	2016-04-21 08:00:00	2016-04-23 09:00:00
2016-06-17 16:00:00	0.159920	3188	3939	26.188731	0.437263	2016-06-17 12:00:00	2016-06-18 14:00:00
2017-05-01 13:00:00	0.230598	3188	4606	42.556880	0.490538	2017-04-30 11:00:00	2017-05-02 05:00:00
2018-05-13 09:00:00	0.191158	3188	5147	32.755981	0.525044	2018-05-12 19:00:00	2018-05-14 04:00:00
2019-03-24 13:00:00	0.223871	159	6145	24.630744	0.548099	2019-03-23 18:00:00	2019-03-24 18:00:00

Wall time: 1min 3s

In [ ]:

immigEcon¶

DEFINITE IMPACT¶

2014-10-13: Clacton by-election on 10th - Douglas Carswell elected as UKIP MP
2014-03-04: Something is definitiely happening around the 3/3/2014 ... the impact is pretty sustained. I just don't know what
- https://www.dailymail.co.uk/news/article-2571144/Home-Secretary-Theresa-May-hits-Cabinet-colleague-Vince-Cable-celebrating-huge-rise-immigration.html
- https://www.dailymail.co.uk/news/article-2569103/Net-migration-soars-212-000-year-despite-Camerons-tough-talk-cutting-number-tens-thousands.html
2016-04-21 22:00:00: Obama telegraph op-en and response by Johnson

UNCLEAR¶

2017-05-03 15:00:00: https://en.wikipedia.org/wiki/Portal:Current_events/May_2017 (some 2017GE campaign related stuff) - not convincing
2019-03-26 12:00:00: https://www.telegraph.co.uk/news/2019/03/24/pictures-day-24-march-2019/former-ukip-leader-nigel-farage-addresses-marchers-top-bus-start/ - March to Leave/People's Vote March
2015-03-28 11:00:00: Germanwings Flight 9525 -> UK GE announced??
2014-06-25: Eh - phonehacking? UKIP had a good showing in a byelection a week before?

I don't buy any of the extra points that show up in immigCultural¶

In [ ]:

(2) immgSelf|Con|Lab|TIG|Brexit|Green|UKIP|PC|SNP|LD¶

Chart is a bit ugly because W12/13 only appear in a small/very small fraction of those cohorts (I think as top-up exercises) Bit heart-breaking, but probably need to be dropped for anything involving personal differences (/some painful model-based massive imputation to fill in all the gaps)

Scale of movement of immigSelf is ... sloooow - but steady (particularly when the W12/13 fractions are dropped) Re: parties - another one where only the Con chart is really interesting

In [14]:

%%time
var_name = "immig"

title= "\n".join(["Some people think that the UK should allow *many more* immigrants to come to the UK",
                  "to live andothers think that the UK should allow *many fewer* immigrants.",
                  "Where would you place yourself and the parties on this scale?",
                  "Many fewer (0) - Many more (10)"])
specific_suffix_set = "(Self|Con|Lab|TIG|Brexit|Green|UKIP|PC|SNP|LD)"
col_name="party"
col_wrap=3
height = 4

flat_df_num = plot_time_series(var_name=var_name,specific_suffix_set=specific_suffix_set,title=title,col_name="party",
                               col_wrap=col_wrap,treatment="immig_timeseries")

Wall time: 9min 9s

In [15]:

BES_Panel.drop( match(BES_Panel,"immig(Self|Con|Lab|TIG|Brexit|Green|UKIP|PC|SNP|LD)(W12|W13)").index, axis=1, inplace=True)

In [16]:

%%time
## now without those two waves


var_name = "immig"

title= "\n".join(["Some people think that the UK should allow *many more* immigrants to come to the UK",
                  "to live andothers think that the UK should allow *many fewer* immigrants.",
                  "Where would you place yourself and the parties on this scale?",
                  "Many fewer (0) - Many more (10)"])
specific_suffix_set = "(Self|Con|Lab|TIG|Green|UKIP|PC|SNP|LD)"
# drop Brexit because in only one wave
col_name="party"
col_wrap=3
height = 4

flat_df_num = plot_time_series(var_name=var_name,specific_suffix_set=specific_suffix_set,title=title,col_name="party",
                               col_wrap=col_wrap,treatment="immig_timeseries")

Wall time: 8min 27s

In [ ]:

In [17]:

%%time

y_axis_label = "circles=daily-mean/line=moving average"
x_axis_label = "Response Date"

suff_list = specific_suffix_set.replace(")","").replace("(","").split("|")
color_dict = dict(zip(suff_list,colour_list))
title_dict = dict(zip(suff_list,["Think the UK should allow many more/fewer immigrants?"+"("+x+")" for x in suff_list]))

df,df_rolling = bokeh_explorer_charts(x_axis_label,y_axis_label,color_dict,title_dict, var_name, rolling_win_type="BES")

Self

	prominences	left_bases	right_bases	widths	width_heights	start	stop
date
2016-04-21 20:00:00	0.218459	24	1880	35.487238	0.320955	2016-04-21 09:00:00	2016-04-22 21:00:00
2016-05-20 11:00:00	0.156942	586	1257	20.480832	0.309937	2016-05-19 18:00:00	2016-05-20 14:00:00

Con

	prominences	left_bases	right_bases	widths	width_heights	start	stop
date

Lab

	prominences	left_bases	right_bases	widths	width_heights	start	stop
date

TIG

	prominences	left_bases	right_bases	widths	width_heights	start	stop
date

Green

	prominences	left_bases	right_bases	widths	width_heights	start	stop
date
2019-03-27 14:00:00	0.174589	414	3665	35.494537	0.673462	2019-03-26 09:00:00	2019-03-27 20:00:00

UKIP

	prominences	left_bases	right_bases	widths	width_heights	start	stop
date

PC

	prominences	left_bases	right_bases	widths	width_heights	start	stop
date

SNP

	prominences	left_bases	right_bases	widths	width_heights	start	stop
date

LD

	prominences	left_bases	right_bases	widths	width_heights	start	stop
date
2016-11-29 10:00:00	0.179545	416	2639	139.611242	0.623921	2016-07-04 10:00:00	2016-11-29 22:00:00

Wall time: 10min 57s

In [ ]:

(3) studentsMore|euMore|asylumMore|familiesMore|noneuMore¶

- consistent increase 8->13
- 7-8 sharper but less consistent (flat for eu, bump for noneu, neg for families)

In [18]:

%%time
var_name = ""

title= "\n".join(["Do you think that Britain should allow more or fewer of the",
                  "following kinds of people to come and live in Britain?",
                  "Many fewer (0) - Many more (10)"])
specific_suffix_set = "(studentsMore|euMore|asylumMore|familiesMore|noneuMore)"
col_name="party"
col_wrap = 3
height   = 5

flat_df_num = plot_time_series(var_name=var_name,specific_suffix_set=specific_suffix_set,title=title,col_name="party",
                               col_wrap=col_wrap,treatment="immig_timeseries")

Wall time: 2min 4s

In [19]:

%%time

y_axis_label = "circles=daily-mean/line=moving average"
x_axis_label = "Response Date"


suff_list = specific_suffix_set.replace(")","").replace("(","").split("|")
color_dict = dict(zip(suff_list,colour_list))
title_dict = dict(zip(suff_list,["Britain should allow more/fewer of the following kinds of people?"+"("+x+")" for x in suff_list]))

prominence=.15
width=20
                                 
df,df_rolling = bokeh_explorer_charts(x_axis_label,y_axis_label,color_dict,title_dict, var_name,prominence=prominence,width=width,)
#, rolling_win_type="BES")

studentsMore

	prominences	left_bases	right_bases	widths	width_heights	start	stop
date
2016-04-22 09:00:00	0.224791	70	1587	41.879951	0.466643	2016-04-21 09:00:00	2016-04-23 08:00:00

euMore

	prominences	left_bases	right_bases	widths	width_heights	start	stop
date
2016-04-22 10:00:00	0.174153	29	978	37.301631	0.391545	2016-04-21 10:00:00	2016-04-23

asylumMore

	prominences	left_bases	right_bases	widths	width_heights	start	stop
date
2016-04-21 18:00:00	0.260395	11	1330	35.447252	0.387838	2016-04-21 10:00:00	2016-04-22 22:00:00
2016-05-16 10:00:00	0.178187	567	1330	23.699943	0.339899	2016-05-15 12:00:00	2016-05-16 11:00:00

familiesMore

	prominences	left_bases	right_bases	widths	width_heights	start	stop
date
2016-04-22 10:00:00	0.21188	24	760	40.236017	0.425152	2016-04-21 10:00:00	2016-04-23 07:00:00

noneuMore

	prominences	left_bases	right_bases	widths	width_heights	start	stop
date
2016-04-22 05:00:00	0.199518	60	597	34.188204	0.369299	2016-04-21 10:00:00	2016-04-22 22:00:00

Wall time: 34.3 s

In [ ]:

(4) (labHandleImmig|govtHandleImmig)¶

Supremely dull

In [20]:

# %%time
var_name = ""

title= "\n".join(["How well do you think The Present Government has handled",
                  "/A Labour Government would handle immigration?",
                  "Very Badly (0) - Very Well (1) (5pt scale)"])
specific_suffix_set = "(labHandleImmig|govtHandleImmig)"
col_name="party"
col_wrap=2
height = 6

flat_df_num = plot_time_series(var_name=var_name,specific_suffix_set=specific_suffix_set,title=title,col_name="party",
                               col_wrap=col_wrap,treatment="immig_timeseries")

In [21]:

%%time

y_axis_label = "circles=daily-mean/line=moving average"
x_axis_label = "Response Date"

color_dict= {"govtHandleImmig":"blue","labHandleImmig":"red"}
title_dict= {"govtHandleImmig":"How well do you think The Present Government has handled immigration? (5pt scale)",
             "labHandleImmig":"How well do you think The Labour Party would handle immigration? (5pt scale)"}
prominence=.15
width=20
df,df_rolling = bokeh_explorer_charts(x_axis_label,y_axis_label,color_dict,title_dict,var_name,
                                      prominence=prominence,width=width,rolling_win_type="BES")

govtHandleImmig

	prominences	left_bases	right_bases	widths	width_heights	start	stop
date

labHandleImmig

	prominences	left_bases	right_bases	widths	width_heights	start	stop
date
2015-03-25 11:00:00	0.158404	874	1866	140.166103	0.338546	2015-03-23 10:00:00	2015-03-29 18:00:00

Wall time: 34.6 s

(5) (immigrationLevel|immigContributeTake)¶

immigrationLevel in theory could be used to extend immigSelf (v similar question) - even though it's rather "meh"

immigContributeTake is a really important variable despite us only having it in two waves (and only a fraction of the cohort!) partly because it's the right two waves - and partly because it just crops up a lot as a driver

In [22]:

%%time
var_name = ""

title= "\n".join(["Do you think the number of immigrants from foreign countries who are permitted to come to the United Kingdom to live should be increased, decreased, or left the same as it is now?",
                  "Do you think people who come to live in Britain from other EU countries get more in benefits and services than they pay in taxes?",
                  "Decreased a lot/Get More Than They Pay (0) - Increase a lot/Pay More Than They Get (1)"
                  ])
specific_suffix_set = "(immigrationLevel|immigContributeTake)"
col_name="party"
col_wrap=2
height = 6

flat_df_num = plot_time_series(var_name=var_name,specific_suffix_set=specific_suffix_set,title=title,col_name="party",
                               col_wrap=col_wrap,treatment="immig_timeseries")

Wall time: 25.3 s

In [23]:

%%time

y_axis_label = "circles=daily-mean/line=moving average"
x_axis_label = "Response Date"

color_dict= {"immigrationLevel":"blue","immigContributeTake":"red"}
title_dict= {"immigrationLevel":"immigrationLevel (5pt scale)",
             "immigContributeTake":"immigContributeTake (7pt scale)"}
prominence=.15
width=20
df,df_rolling = bokeh_explorer_charts(x_axis_label,y_axis_label,color_dict,title_dict,var_name,
                                      prominence=prominence,width=width, rolling_win_type="BES")

immigrationLevel

	prominences	left_bases	right_bases	widths	width_heights	start	stop
date

immigContributeTake

	prominences	left_bases	right_bases	widths	width_heights	start	stop
date
2016-05-30 12:00:00	0.173569	22	1247	33.580120	0.226357	2016-05-29 23:00:00	2016-05-31 10:00:00
2016-06-18 03:00:00	0.183316	22	1457	33.414623	0.320638	2016-06-17 11:00:00	2016-06-18 22:00:00

Wall time: 37.6 s

In [ ]:

(6) (changeImmig|changeImmigLab|controlImmig)¶

changeImmig is a very long panel, decent coverage - sharp change around euref (shame it doesn't appear in W9) changeImmigLab - dull! controlImmig has a unfortunate problem the W8 campaign sample would be perfect but it's not a full sample and the question shifts quite a lot (there's three different versions, but probably big difference is between W8/W9+ - how much control does the UK have/would the UK have after ...)

In [24]:

%%time
var_name = ""

title= "\n".join(["Do you think that the level of immigration is getting higher, getting lower or staying about the same?",
                  "If there were a *Labour* UK government today, do you think that the level of immigration would be getting higher, getting lower or staying about the same?",
                  "How much control do you think Britain has/will have over immigration to the UK?"
                  ])
specific_suffix_set = "(changeImmig|changeImmigLab|controlImmig)"
col_name="party"
col_wrap=3
height = 6

flat_df_num = plot_time_series(var_name=var_name,specific_suffix_set=specific_suffix_set,title=title,col_name="party",
                               col_wrap=col_wrap,treatment="immig_timeseries")

Wall time: 4min 36s

In [25]:

%%time

y_axis_label = "circles=daily-mean/line=moving average"
x_axis_label = "Response Date"

color_dict= {"changeImmig":"blue","changeImmigLab":"red","controlImmig":"purple"}
title_dict= {"changeImmig":"changeImmig (5pt scale)",
             "changeImmigLab":"changeImmigLab (5pt scale)",
             "controlImmig":"controlImmig (5pt scale)",}
prominence=.15
width=20
df,df_rolling = bokeh_explorer_charts(x_axis_label,y_axis_label,color_dict,title_dict,var_name,
                                      prominence=prominence,width=width, rolling_win_type="BES")

changeImmig

	prominences	left_bases	right_bases	widths	width_heights	start	stop
date
2014-03-01 06:00:00	0.162767	111	1092	103.321170	0.739169	2014-02-27 02:00:00	2014-03-03 12:00:00
2014-06-28 10:00:00	0.177232	111	1736	43.133125	0.740932	2014-06-25 16:00:00	2014-09-19 19:00:00
2015-03-13 10:00:00	0.157735	1736	2100	22.715402	0.764143	2015-03-13 04:00:00	2015-03-14 02:00:00
2016-04-16 13:00:00	0.222764	1736	4830	113.528181	0.756433	2015-03-30 13:00:00	2016-04-18 18:00:00
2016-05-29 17:00:00	0.164539	3125	4830	114.580724	0.784998	2016-05-25 15:00:00	2016-05-30 11:00:00

changeImmigLab

	prominences	left_bases	right_bases	widths	width_heights	start	stop
date

controlImmig

	prominences	left_bases	right_bases	widths	width_heights	start	stop
date

Wall time: 5min 23s

In [26]:

# I'll have to check that glitch in Retention out later

In [ ]:

(5) (immigrantsWelfareState|effectsEUImmigration|euPriorityBalance)¶

immigrantsWelfareState - this variable is super important (long profile, good sample size and it crops up in lots of regressions)

effectsEUImmigration - "Do you think the following would be higher, lower or about the same if the UK leaves the European Union?" important since this is an actual explicit cause and effect in relation to the UK leaving the EU (and not, say, a Con government being elected/perception of UK politicians)

euPriorityBalance - reflects the slow but continuous positive shift in UK attitudes to Freedom of Movement

In [27]:

%%time
var_name = ""

title= "\n".join(["Immigrants are a burden on the welfare state (Strongly Agree 0 - Strongly Disagree 1)",
                  "Do you think the immigration to the UK would be higher, lower or about the same if the UK leaves the European Union? (Much Lower 0 - Much Higher 1)",
                  "When Britain negotiates to leave the EU is it more important for the UK governmentto protect Britain's access to the single market",
                  "or to gain full control of immigration? (Control Immigration 0 - Access to Single Market 1: 11pt scale)",
                  "Is it good/bad/neither/DK for Britain: Allowing the free movement of workers within Europe?"])
specific_suffix_set = "(immigrantsWelfareState|effectsEUImmigration|euPriorityBalance|ukCoopMovement)"
col_name="party"
col_wrap=2
height = 6

flat_df_num = plot_time_series(var_name=var_name,specific_suffix_set=specific_suffix_set,title=title,col_name="party",
                               col_wrap=col_wrap,treatment="immig_timeseries")

Wall time: 5min 9s

In [28]:

%%time

y_axis_label = "circles=daily-mean/line=moving average"
x_axis_label = "Response Date"

color_dict= {"immigrantsWelfareState":"blue","effectsEUImmigration":"red","euPriorityBalance":"purple","ukCoopMovement":"orange"}
title_dict= {"immigrantsWelfareState":"Immigrants are a burden on the welfare state (5pt scale)",
             "effectsEUImmigration":"Would immigration be higher, lower or about the same if the UK leaves the EU? (5pt scale)",
             "euPriorityBalance":"Brexit priority: access to single market versus controlling immigration? (11pt scale)",
             "ukCoopMovement":"IS allowing the free movement of workers within Europe Good/Bad/Neither (3pt scale)",}
prominence=.15
width=20
df,df_rolling = bokeh_explorer_charts(x_axis_label,y_axis_label,color_dict,title_dict,var_name,
                                      prominence=prominence,width=width, rolling_win_type="BES")

immigrantsWelfareState

	prominences	left_bases	right_bases	widths	width_heights	start	stop
date
2014-10-13 14:00:00	0.226811	1664	2355	156.032161	0.370942	2014-10-12 13:00:00	2015-03-10 08:00:00
2015-03-29 08:00:00	0.157662	2219	2355	37.553824	0.380031	2015-03-28 19:00:00	2015-03-30 11:00:00
2016-04-22 07:00:00	0.213845	1664	3643	40.751673	0.381239	2016-04-21 10:00:00	2016-04-23 07:00:00
2016-06-17 11:00:00	0.155551	3643	3914	49.897803	0.374465	2016-06-17 04:00:00	2016-06-19 06:00:00

effectsEUImmigration

	prominences	left_bases	right_bases	widths	width_heights	start	stop
date

euPriorityBalance

	prominences	left_bases	right_bases	widths	width_heights	start	stop
date
2016-12-04 00:00:00	0.203610	77	1405	35.853250	0.456127	2016-12-03 21:00:00	2016-12-05 09:00:00
2019-03-26 11:00:00	0.206577	77	2997	63.057096	0.532902	2019-03-23 21:00:00	2019-03-26 13:00:00

ukCoopMovement

	prominences	left_bases	right_bases	widths	width_heights	start	stop
date
2016-04-21 23:00:00	0.287932	24	838	41.697208	0.456773	2016-04-21 10:00:00	2016-04-23 08:00:00
2016-05-19 11:00:00	0.150096	617	838	45.668867	0.405256	2016-05-18 16:00:00	2016-05-20 14:00:00
2016-12-04 18:00:00	0.156966	838	1868	35.303180	0.512320	2016-12-03 21:00:00	2016-12-05 08:00:00
2019-03-14 21:00:00	0.185314	24	2155	20.423779	0.618436	2019-03-14 12:00:00	2019-03-15 11:00:00
2019-03-24 10:00:00	0.181684	24	2345	59.077759	0.664294	2019-03-23 21:00:00	2019-03-26 09:00:00

Wall time: 6min 7s

(6) responsibleImmig(Conservatives|LibDems|Labour|ScottishGovt|WelshGovt|None|DontKnow)¶

interesting, because it directly follows from changeImmig questions ... ... and yet the trends are also deeply boring

In [29]:

%%time
var_name = "responsibleImmig"

title= "\n".join(["Thinking about the changes in the levels of immigration you just described.",
                  "Who do you think these are the result of?",
                  "*Please tick all that apply for each issue.* No (0) - Yes (1)"])
specific_suffix_set = "(Conservatives|LibDems|Labour|ScottishGovt|WelshGovt|None|DontKnow)"
col_name="party"
col_wrap=4
height = 4

flat_df_num = plot_time_series(var_name=var_name,specific_suffix_set=specific_suffix_set,title=title,col_name="party",
                               col_wrap=col_wrap,treatment="immig_timeseries")

Wall time: 2min 22s

In [30]:

%%time

y_axis_label = "circles=daily-mean/line=moving average"
x_axis_label = "Response Date"


suff_list = specific_suffix_set.replace(")","").replace("(","").split("|")
color_dict = dict(zip(suff_list,colour_list))
title_dict = dict(zip(suff_list,["Who is responsible for the changes in lvls of imm. you just described?"+"("+x+")" for x in suff_list]))

prominence=.15
width=20
                                 
df,df_rolling = bokeh_explorer_charts(x_axis_label,y_axis_label,color_dict,title_dict, var_name,prominence=prominence,width=width,
                                      rolling_win_type="BES")

Conservatives

	prominences	left_bases	right_bases	widths	width_heights	start	stop
date
2014-03-02 22:00:00	0.173094	3	325	29.186120	0.558342	2014-03-02 14:00:00	2014-03-03 20:00:00
2014-03-07 18:00:00	0.171883	355	472	53.745515	0.537766	2014-03-06 20:00:00	2014-03-09 05:00:00
2014-06-12 15:00:00	0.200028	617	1077	172.232044	0.545628	2014-06-10 19:00:00	2014-06-21 09:00:00
2014-09-23 01:00:00	0.179098	1077	1207	22.183882	0.574276	2014-09-22 08:00:00	2014-09-23 06:00:00
2014-10-04 13:00:00	0.181721	1376	1662	62.280578	0.563591	2014-10-03 19:00:00	2014-10-06 14:00:00
2015-03-14 19:00:00	0.214889	1662	2013	32.073802	0.568246	2015-03-13 13:00:00	2015-03-14 21:00:00

LibDems

	prominences	left_bases	right_bases	widths	width_heights	start	stop
date
2015-03-19 17:00:00	0.160901	1887	2150	23.334999	0.236207	2015-03-19 09:00:00	2015-03-20 09:00:00

Labour

	prominences	left_bases	right_bases	widths	width_heights	start	stop
date
2014-03-01 05:00:00	0.158536	85	869	42.246292	0.369637	2014-02-27 19:00:00	2014-03-01 14:00:00
2014-09-20 08:00:00	0.218919	869	1853	43.080345	0.348890	2014-09-19 21:00:00	2014-09-22 09:00:00
2014-10-04 06:00:00	0.171675	1430	1786	20.001813	0.340804	2014-10-03 15:00:00	2014-10-04 14:00:00
2015-03-20 11:00:00	0.159236	1951	2246	24.799880	0.335152	2015-03-19 17:00:00	2015-03-20 18:00:00
2015-03-25 16:00:00	0.163634	2079	2246	53.143233	0.309294	2015-03-25 14:00:00	2015-03-27 19:00:00

ScottishGovt

	prominences	left_bases	right_bases	widths	width_heights	start	stop
date

WelshGovt

	prominences	left_bases	right_bases	widths	width_heights	start	stop
date

None

	prominences	left_bases	right_bases	widths	width_heights	start	stop
date
2014-10-13 14:00:00	0.211954	1347	2124	39.725892	0.187046	2014-10-12 21:00:00	2014-10-14 16:00:00

DontKnow

	prominences	left_bases	right_bases	widths	width_heights	start	stop
date
2014-03-05 21:00:00	0.169584	21	466	20.631810	0.130538	2014-03-05 18:00:00	2014-03-06 14:00:00
2014-10-15 08:00:00	0.200756	1221	1818	102.798351	0.153986	2014-10-13 12:00:00	2014-10-19 11:00:00

Wall time: 4min 5s

(7) achieveReduceImmig(Brexit|Con|Grn|LD|Lab|None|PC|SNP|TIG|UKIP)¶

Wow - charts for LD/Green are not broken - there are some Yes's - just very few Con pattern interesting - the way it ever-so-briefly kisses UKIP before collapsing back into the GE UKIP is trending down even before the euref

In [31]:

%%time
var_name = "achieveReduceImmig"

title= "\n".join(["Would any of the following political parties be successful",
                  " – if elected/if they were in government – ",
                  "in reducing the level of immigration? No (0) - Yes (1)"])
specific_suffix_set = "(Brexit|Con|Grn|LD|Lab|None|PC|SNP|TIG|UKIP)"
col_name="party"
col_wrap=5
height = 4

flat_df_num = plot_time_series(var_name=var_name,specific_suffix_set=specific_suffix_set,title=title,col_name="party",
                               col_wrap=col_wrap,treatment="immig_timeseries")

Wall time: 12min 19s

In [32]:

%%time

y_axis_label = "circles=daily-mean/line=moving average"
x_axis_label = "Response Date"


suff_list = specific_suffix_set.replace(")","").replace("(","").split("|")
color_dict = dict(zip(suff_list,colour_list))
title_dict = dict(zip(suff_list,["Would this party be succ in reducing imm if elected/in gov?"+"("+x+")" for x in suff_list]))

prominence=.15
width=20
                                 
df,df_rolling = bokeh_explorer_charts(x_axis_label,y_axis_label,color_dict,title_dict, var_name,prominence=prominence,width=width,
                                      rolling_win_type="BES")

Brexit

	prominences	left_bases	right_bases	widths	width_heights	start	stop
date

Con

	prominences	left_bases	right_bases	widths	width_heights	start	stop
date
2016-04-21 15:00:00	0.209184	598	822	31.776587	0.181705	2016-04-21 07:00:00	2016-04-22 16:00:00
2016-11-29 16:00:00	0.205389	598	1378	20.510823	0.261882	2016-11-28 21:00:00	2016-11-29 17:00:00
2017-04-27 07:00:00	0.430267	598	3876	50.727947	0.294923	2017-04-25 20:00:00	2017-04-27 22:00:00
2019-11-04 19:00:00	0.185657	3988	4053	25.281295	0.390335	2019-11-04 05:00:00	2019-11-05 06:00:00

Grn

	prominences	left_bases	right_bases	widths	width_heights	start	stop
date

LD

	prominences	left_bases	right_bases	widths	width_heights	start	stop
date

Lab

	prominences	left_bases	right_bases	widths	width_heights	start	stop
date

None

	prominences	left_bases	right_bases	widths	width_heights	start	stop
date
2015-03-19 01:00:00	0.205897	132	455	21.560851	0.316700	2015-03-18 14:00:00	2015-03-19 12:00:00
2019-03-28 10:00:00	0.154471	3392	3612	36.058889	0.420713	2019-03-27 23:00:00	2019-05-24 08:00:00
2019-06-05 17:00:00	0.256391	3612	3921	22.369614	0.377062	2019-06-05 14:00:00	2019-06-06 12:00:00
2019-06-10 21:00:00	0.191768	3807	3921	27.269617	0.340330	2019-06-09 20:00:00	2019-06-10 23:00:00
2019-06-14 11:00:00	0.250957	3921	4149	24.880853	0.377377	2019-06-13 13:00:00	2019-11-01 14:00:00
2019-11-15 19:00:00	0.214728	3921	4546	20.224036	0.405374	2019-11-15 14:00:00	2019-11-16 10:00:00
2019-12-05 19:00:00	0.192982	4690	4759	20.568785	0.402222	2019-12-05 12:00:00	2019-12-06 08:00:00

PC

	prominences	left_bases	right_bases	widths	width_heights	start	stop
date

SNP

	prominences	left_bases	right_bases	widths	width_heights	start	stop
date

TIG

	prominences	left_bases	right_bases	widths	width_heights	start	stop
date

UKIP

	prominences	left_bases	right_bases	widths	width_heights	start	stop
date
2016-04-16 18:00:00	0.173138	522	629	22.735714	0.478115	2016-04-16 07:00:00	2016-04-17 06:00:00
2016-04-22 17:00:00	0.289909	654	748	43.806588	0.501444	2016-04-21 05:00:00	2016-04-23 07:00:00
2016-05-02 18:00:00	0.310596	748	1403	23.338862	0.499951	2016-05-02 11:00:00	2016-05-03 10:00:00
2016-11-28 15:00:00	0.212138	1253	1403	21.371855	0.431656	2016-11-27 20:00:00	2016-11-28 17:00:00
2018-05-13 09:00:00	0.291711	1861	3740	20.246971	0.376817	2018-05-12 22:00:00	2018-05-13 18:00:00

Wall time: 3min 4s

In [ ]:

(8) (conPriorities_immig|labPriorities_immig|ldPriorities_immig|ukipPriorities_immig)¶

low sample size even at peak bump around 2015 GE

In [ ]:

In [33]:

%%time
var_name = ""

title= "\n".join(["Is immigration one of the most important issues do you think are most important to each of the following political parties?",
                  "*Please tick all the issues that are important to each party*. No (0) - Yes (1)"])
specific_suffix_set = "(conPrioritiesImmig|labPrioritiesImmig|ldPrioritiesImmig|ukipPrioritiesImmig)"
col_name="party"
col_wrap=2
height = 6

flat_df_num = plot_time_series(var_name=var_name,specific_suffix_set=specific_suffix_set,title=title,col_name="party",
                               col_wrap=col_wrap,treatment="immig_timeseries")

Wall time: 1min 11s

In [34]:

%%time

y_axis_label = "circles=daily-mean/line=moving average"
x_axis_label = "Response Date"


suff_list = specific_suffix_set.replace(")","").replace("(","").split("|")
color_dict = dict(zip(suff_list,colour_list))
title_dict = dict(zip(suff_list,["Is immigration one of the most important issues for this party?"+"("+x+")" for x in suff_list]))

prominence=.15
width=20
                                 
df,df_rolling = bokeh_explorer_charts(x_axis_label,y_axis_label,color_dict,title_dict, var_name,prominence=prominence,width=width,)
#, rolling_win_type="BES")

conPrioritiesImmig

	prominences	left_bases	right_bases	widths	width_heights	start	stop
date
2014-02-23 08:00:00	0.164038	58	124	24.015383	0.365426	2014-02-22 13:00:00	2014-02-23 13:00:00
2014-03-05 12:00:00	0.218123	124	452	20.661767	0.382222	2014-03-04 20:00:00	2014-03-05 17:00:00
2014-06-24 08:00:00	0.256415	124	1700	55.298603	0.388589	2014-06-23 18:00:00	2014-09-19 15:00:00
2014-09-25 10:00:00	0.157249	871	928	20.413067	0.363968	2014-09-24 19:00:00	2014-09-25 17:00:00
2014-10-03 11:00:00	0.150705	928	1196	35.984171	0.371775	2014-10-02 09:00:00	2014-10-04 05:00:00
2014-10-14 11:00:00	0.173396	928	1368	107.205387	0.369923	2014-10-12 09:00:00	2015-03-09 15:00:00
2016-06-30 21:00:00	0.223491	2150	2290	41.235409	0.438604	2016-06-30 07:00:00	2016-07-02 09:00:00
2019-11-01 17:00:00	0.270753	1700	2383	30.198445	0.430493	2016-07-04 08:00:00	2019-11-02 08:00:00

labPrioritiesImmig

	prominences	left_bases	right_bases	widths	width_heights	start	stop
date
2014-06-24 08:00:00	0.188955	445	1895	24.014004	0.220975	2014-06-24 05:00:00	2014-06-25 06:00:00
2016-06-29 11:00:00	0.202710	1895	2485	28.154937	0.212848	2016-06-28 12:00:00	2016-06-29 17:00:00

ldPrioritiesImmig

	prominences	left_bases	right_bases	widths	width_heights	start	stop
date
2014-06-04 10:00:00	0.189975	452	1791	177.480153	0.192215	2014-06-01 10:00:00	2014-06-22 04:00:00
2014-09-26 15:00:00	0.160636	847	1522	40.336193	0.162988	2014-09-25 21:00:00	2014-09-27 18:00:00

ukipPrioritiesImmig

	prominences	left_bases	right_bases	widths	width_heights	start	stop
date

Wall time: 18.2 s

(9) dealPriority(MarketAccess|NIUKBorder|NIEireBorder|TradeDeal|RedTape|Immig)¶

okay - no signficant changes W15->W16 only thing of note - how low "control immigration" (or, indeed, RedTape) stands relative to the other priorities (suspect this is largely because of Leave/Remain split)

In [35]:

%%time
var_name = "dealPriority"

title= "\n".join(["How would you rate the importance of these different aspects of a deal to leave the European Union?",
                  "Not important at all (0) - Extremely important (1) (5pt scale)"])
specific_suffix_set = "(MarketAccess|NIUKBorder|NIEireBorder|TradeDeal|RedTape|Immig)"
col_name="party"
col_wrap=3
height = 6

flat_df_num = plot_time_series(var_name=var_name,specific_suffix_set=specific_suffix_set,title=title,col_name="party",
                               col_wrap=col_wrap,treatment="immig_timeseries")

Wall time: 1min 48s

In [36]:

# %%time

y_axis_label = "circles=daily-mean/line=moving average"
x_axis_label = "Response Date"


suff_list = specific_suffix_set.replace(")","").replace("(","").split("|")
color_dict = dict(zip(suff_list,colour_list))
title_dict = dict(zip(suff_list,["How would you rate the imp. of diff. aspects of a Deal?"+"("+x+")" for x in suff_list]))

prominence=.15
width=20
                                 
df,df_rolling = bokeh_explorer_charts(x_axis_label,y_axis_label,color_dict,title_dict, var_name,prominence=prominence,width=width,
                                      rolling_win_type="BES")

MarketAccess

	prominences	left_bases	right_bases	widths	width_heights	start	stop
date

NIUKBorder

	prominences	left_bases	right_bases	widths	width_heights	start	stop
date

NIEireBorder

	prominences	left_bases	right_bases	widths	width_heights	start	stop
date

TradeDeal

	prominences	left_bases	right_bases	widths	width_heights	start	stop
date

RedTape

	prominences	left_bases	right_bases	widths	width_heights	start	stop
date

Immig

	prominences	left_bases	right_bases	widths	width_heights	start	stop
date
2019-11-04 16:00:00	0.164373	331	1123	48.943061	0.656663	2019-11-04 12:00:00	2019-11-06 14:00:00

In [37]:

# date, dealPriority_wts, dealPriority_dk

In [38]:

# %debug

(9) negotiationSpecifics(ContinuePayingMoney|AcceptingFoM|AcceptingEUReg|Nothing)¶

Feels like there's one transition W9 -> W10 where a chunk of people go from "no concessions necessary" -> "some concessions necessary"

Then one more transition W14 -> W15 where another chunk of people reconsider some of those concessions (FoM/Paying money) Notable that AcceptingEUReg doesn't waver. Something to do with perception of weakness of May/strength of ERG as perceived by people reading tabloids?

In [39]:

%%time
var_name = "negotiationSpecifics"

title= "\n".join(["When Britain negotiates leaving terms with the EU, what will it take for the",
                  "UK government to get unrestricted access to EU markets for British business?",
                  "Tick all that apply No (0) Yes (1)"])
specific_suffix_set = "(ContinuePayingMoney|AcceptingFoM|AcceptingEUReg|Nothing)"
col_name="party"
col_wrap=2
height = 6

flat_df_num = plot_time_series(var_name=var_name,specific_suffix_set=specific_suffix_set,title=title,col_name="party",
                               col_wrap=col_wrap,treatment="immig_timeseries")

Wall time: 2min 42s

In [40]:

%%time

y_axis_label = "circles=daily-mean/line=moving average"
x_axis_label = "Response Date"


suff_list = specific_suffix_set.replace(")","").replace("(","").split("|")
color_dict = dict(zip(suff_list,colour_list))
title_dict = dict(zip(suff_list,["What must the UK give to get Single Market access?"+"("+x+")" for x in suff_list]))

prominence=.15
width=20
                                 
df,df_rolling = bokeh_explorer_charts(x_axis_label,y_axis_label,color_dict,title_dict, var_name,prominence=prominence,width=width,
                                      rolling_win_type="BES")

ContinuePayingMoney

	prominences	left_bases	right_bases	widths	width_heights	start	stop
date
2016-12-01 22:00:00	0.220345	42	510	24.271097	0.632896	2016-12-01 13:00:00	2016-12-02 13:00:00
2018-05-08 13:00:00	0.154172	1079	1367	23.234228	0.648062	2018-05-08 11:00:00	2018-05-09 10:00:00
2018-05-15 15:00:00	0.233658	1079	1723	54.848336	0.646121	2018-05-14 11:00:00	2018-05-16 17:00:00

AcceptingFoM

	prominences	left_bases	right_bases	widths	width_heights	start	stop
date
2016-06-29 19:00:00	0.313684	4	1671	38.429302	0.634547	2016-06-29 15:00:00	2016-07-01 05:00:00
2017-06-15 07:00:00	0.160224	757	1021	25.540785	0.649847	2017-06-14 13:00:00	2017-06-15 14:00:00
2017-06-18 09:00:00	0.282839	508	1671	68.693528	0.626830	2017-06-17 19:00:00	2017-06-20 18:00:00
2017-06-23 12:00:00	0.169663	1112	1388	55.703045	0.640186	2017-06-21 20:00:00	2018-05-04 15:00:00
2018-05-14 13:00:00	0.223759	1388	1671	30.403371	0.644496	2018-05-13 16:00:00	2018-05-14 22:00:00

AcceptingEUReg

	prominences	left_bases	right_bases	widths	width_heights	start	stop
date
2016-06-29 21:00:00	0.159383	118	181	21.106045	0.635074	2016-06-29 18:00:00	2016-06-30 15:00:00
2016-07-02 19:00:00	0.218467	118	511	30.082567	0.622116	2016-07-02 11:00:00	2016-07-03 18:00:00
2017-04-24 11:00:00	0.159284	511	648	22.406626	0.610675	2016-12-09 16:00:00	2017-04-24 11:00:00
2018-05-08 07:00:00	0.178472	1102	1274	20.679046	0.609319	2018-05-07 20:00:00	2018-05-08 17:00:00
2018-05-14 14:00:00	0.180619	1102	1680	26.718444	0.623872	2018-05-13 20:00:00	2018-05-14 23:00:00

Nothing

	prominences	left_bases	right_bases	widths	width_heights	start	stop
date

Wall time: 4min 26s

In [ ]:

(9) benefitsToMigrants¶

Thinking of people coming to live in Great Britain, when do you think they should obtain the same rights to social benefits and services as citizens already living here?

wow - superflat does this period cover the time when Cameron was trying to negotiate this? Even afterwards, it seems weird for it to be so very very flat

In [41]:

%%time
var_name = ""

title= "\n".join(["Thinking of people coming to live in Great Britain, when do you think they should",
                  "obtain the same rights to social benefits and services as citizens already living here?",
                  "Immediately (0) - Never (1)",
                  ])
specific_suffix_set = "(benefitsToMigrants)"
col_name="party"
col_wrap=1
height = 5

flat_df_num = plot_time_series(var_name=var_name,specific_suffix_set=specific_suffix_set,title=title,col_name="party",
                               col_wrap=col_wrap,treatment="immig_timeseries")

Wall time: 14.6 s

In [42]:

%%time

y_axis_label = "circles=daily-mean/line=moving average"
x_axis_label = "Response Date"

color_dict= {"benefitsToMigrants":"blue"}
title_dict= {"benefitsToMigrants":"When should immigrants get access to UK benefits?"}

df,df_rolling = bokeh_explorer_charts(x_axis_label,y_axis_label,color_dict,title_dict, var_name, rolling_win_type="BES")

benefitsToMigrants

	prominences	left_bases	right_bases	widths	width_heights	start	stop
date

Wall time: 27.9 s

In [43]:

BES_Panel["benefitsToMigrantsW8"].value_counts()

Out[43]:

After they have worked and paid taxes for at least four years    15612
After they become a British citizen                               7810
Never                                                             3960
After living in GB for a year                                     2403
Don't know                                                        2293
Immediately on arrival                                            1424
Name: benefitsToMigrantsW8, dtype: int64

In [ ]:

Shifts in Immigration Sentiment Around the EU Referendum

Step One: Locate the period of the Major Shift In Sentiment¶

(1) immgEcon, immigCultural¶

immigEcon¶

DEFINITE IMPACT¶

UNCLEAR¶

I don't buy any of the extra points that show up in immigCultural¶

(2) immgSelf|Con|Lab|TIG|Brexit|Green|UKIP|PC|SNP|LD¶

(3) studentsMore|euMore|asylumMore|familiesMore|noneuMore¶

(4) (labHandleImmig|govtHandleImmig)¶

(5) (immigrationLevel|immigContributeTake)¶

(6) (changeImmig|changeImmigLab|controlImmig)¶

(5) (immigrantsWelfareState|effectsEUImmigration|euPriorityBalance)¶

(6) responsibleImmig(Conservatives|LibDems|Labour|ScottishGovt|WelshGovt|None|DontKnow)¶

(7) achieveReduceImmig(Brexit|Con|Grn|LD|Lab|None|PC|SNP|TIG|UKIP)¶

(8) (conPriorities_immig|labPriorities_immig|ldPriorities_immig|ukipPriorities_immig)¶

(9) dealPriority(MarketAccess|NIUKBorder|NIEireBorder|TradeDeal|RedTape|Immig)¶

(9) negotiationSpecifics(ContinuePayingMoney|AcceptingFoM|AcceptingEUReg|Nothing)¶

(9) benefitsToMigrants¶

Comments