#install.packages("survey")
library(survey)
#install.packages("srvyr")
library(srvyr)
#install.packages("haven")
library(haven)
Exercise 2: European Election Study 2019
Session 4
This is a demo of (slightly complex) data management, also showing how to use regular expressions and apply survey weights.
Download datasets on your computer
Load data and install useful packages
library(tidyverse) # {dplyr}, {ggplot2}, {readxl}, {stringr}, {tidyr}, etc.
<- "data" repository
# Load Survey
<- haven::read_dta(paste0(repository,"/ZA7581_v1-0-0.dta"))
dta head(dta)
# A tibble: 6 × 129
za_nr version doi respid serial hCountry countrycode region region_NUTS1
<dbl> <chr> <chr> <dbl> <dbl> <dbl+lb> <dbl+lbl> <dbl+lbl> <chr>
1 7581 1.0.0 (… doi:… 7762 1.00e8 1 [Aust… 1040 [Aust… 108 [Nie… AT1
2 7581 1.0.0 (… doi:… 7801 1.00e8 1 [Aust… 1040 [Aust… 108 [Nie… AT1
3 7581 1.0.0 (… doi:… 7904 1.00e8 1 [Aust… 1040 [Aust… 102 [Tir… AT3
4 7581 1.0.0 (… doi:… 9223 1.00e8 1 [Aust… 1040 [Aust… 109 [Wie… AT1
5 7581 1.0.0 (… doi:… 9338 1.00e8 1 [Aust… 1040 [Aust… 102 [Tir… AT3
6 7581 1.0.0 (… doi:… 99 1.00e8 1 [Aust… 1040 [Aust… 108 [Nie… AT1
# ℹ 120 more variables: region_NUTS2 <chr>, region_NUTS3 <chr>, Q1_1 <chr>,
# noanswerQ1 <dbl+lbl>, Q2 <dbl+lbl>, Q2n <dbl+lbl>, Q2_EES <dbl+lbl>,
# Q3 <dbl+lbl>, Q4 <dbl+lbl>, Q5 <dbl+lbl>, Q6 <dbl+lbl>, Q7 <dbl+lbl>,
# Q7n <dbl+lbl>, Q7_ees <dbl+lbl>, Q8 <dbl+lbl>, Q9 <dbl+lbl>, Q9n <dbl+lbl>,
# Q9_ees <dbl+lbl>, q10_1 <dbl+lbl>, q10_2 <dbl+lbl>, q10_3 <dbl+lbl>,
# q10_4 <dbl+lbl>, q10_5 <dbl+lbl>, q10_6 <dbl+lbl>, q10_7 <dbl+lbl>,
# q10_8 <dbl+lbl>, q10_9 <dbl+lbl>, q10_10 <dbl+lbl>, Q11 <dbl+lbl>, …
# Load Party List dataset
<- readr::read_csv(paste0(repository,"/ZA7581_cp.csv")) p
Rows: 426 Columns: 20
── Column specification ────────────────────────────────────────────────────────
Delimiter: ","
chr (9): Coutnry_short, Region, Q10_PTV, Q13_left_right, Q24_EU, Party_name...
dbl (11): countrycode, Unifed_party_code, Q7, Q7n, Q2, Q2_EES, Q2n, Q9_Q25_E...
ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
head(p)
# A tibble: 6 × 20
Coutnry_short countrycode Region Unifed_party_code Q10_PTV Q13_left_right
<chr> <dbl> <chr> <dbl> <chr> <chr>
1 AT 1040 <NA> 1 Q10_1 Q13_1
2 AT 1040 <NA> 2 Q10_2 Q13_2
3 AT 1040 <NA> 3 Q10_3 Q13_3
4 AT 1040 <NA> 4 Q10_4 Q13_4
5 AT 1040 <NA> 5 Q10_5 Q13_5
6 AT 1040 <NA> 6 Q10_6 Q13_6
# ℹ 14 more variables: Q24_EU <chr>, Party_name_questionnaire <chr>,
# `English name` <chr>, Q7_EES <chr>, Q7 <dbl>, Q7n <dbl>, Q2 <dbl>,
# Q2_EES <dbl>, Q2n <dbl>, Q9_Q25_EES <dbl>, Q9 <dbl>, Q9n <dbl>, q25 <chr>,
# Q25n <dbl>
Explore the datasets
See dplyr::group_by
and dplyr::count
which works like dplyr::summarise
but without having to specify the function (sum
etc.)
# country sample sizes
%>%
dta group_by(countrycode) %>%
count()
# A tibble: 28 × 2
# Groups: countrycode [28]
countrycode n
<dbl+lbl> <int>
1 1040 [Austria] 1000
2 1056 [Belgium] 1000
3 1110 [Bulgaria] 1016
4 1191 [Croatia] 1008
5 1196 [Cyprus] 501
6 1203 [Czech Rep.] 1000
7 1208 [Denmark] 1000
8 1233 [Estonia] 1000
9 1246 [Finland] 1000
10 1250 [France] 1000
# ℹ 18 more rows
Use:
dplyr::select
to select variables starting withq10
(see alsodplyr::starts_with
)haven::print_labels
on theq10_7
variable
# Select data column
%>% select(starts_with("q10")) %>% head() dta
# A tibble: 6 × 10
q10_1 q10_2 q10_3 q10_4 q10_5 q10_6 q10_7 q10_8 q10_9
<dbl+lbl> <dbl+lb> <dbl> <dbl> <dbl+l> <dbl+l> <dbl+lb> <dbl+lb> <dbl+lb>
1 6 [6] 6 [6] 6 [6] 6 [6] 0 [0 n… 6 [6] 96 [Ite… 96 [Ite… 96 [Ite…
2 2 [2] 1 [1] 8 [8] 6 [6] 0 [0 n… 7 [7] 96 [Ite… 96 [Ite… 96 [Ite…
3 4 [4] 5 [5] 5 [5] 8 [8] 0 [0 n… 3 [3] 96 [Ite… 96 [Ite… 96 [Ite…
4 6 [6] 8 [8] 3 [3] 7 [7] 0 [0 n… 0 [0 n… 96 [Ite… 96 [Ite… 96 [Ite…
5 0 [0 not at a… 10 [10 … 5 [5] 3 [3] 0 [0 n… 0 [0 n… 96 [Ite… 96 [Ite… 96 [Ite…
6 4 [4] 3 [3] 7 [7] 6 [6] 4 [4] 2 [2] 96 [Ite… 96 [Ite… 96 [Ite…
# ℹ 1 more variable: q10_10 <dbl+lbl>
# inspect coding
::print_labels(dta$q10_7) haven
Labels:
value label
0 0 not at all probable
1 1
2 2
3 3
4 4
5 5
6 6
7 7
8 8
9 9
10 10 very probable
96 Item not asked in country
97 Refusal
98 Dont know the party
Use tidyr::pivot_longer(everything(), ...
%>%
dta select(starts_with("Q10_")) %>%
::pivot_longer(everything(), names_to = "party", values_to = "q10") %>%
tidyrhead()
We apply the following transformation
<- dta %>%
d # select respondent and country identifiers, plus Q10_* responses
select(respid:countrycode, starts_with("Q10_")) %>%
# reshape from 'wide' to 'long' format
pivot_longer(starts_with("Q10"), names_to = "party", values_to = "q10") %>%
# recode response items above 10 as missing
mutate(q10 = if_else(q10 > 10, NA_real_, as.numeric(q10)))
head(d)
# A tibble: 6 × 6
respid serial hCountry countrycode party q10
<dbl> <dbl> <dbl+lbl> <dbl+lbl> <chr> <dbl>
1 7762 100007762 1 [Austria] 1040 [Austria] q10_1 6
2 7762 100007762 1 [Austria] 1040 [Austria] q10_2 6
3 7762 100007762 1 [Austria] 1040 [Austria] q10_3 6
4 7762 100007762 1 [Austria] 1040 [Austria] q10_4 6
5 7762 100007762 1 [Austria] 1040 [Austria] q10_5 0
6 7762 100007762 1 [Austria] 1040 [Austria] q10_6 6
See:
dplyr::group_by
dplyr::arrange
dplyr::summarise
# percentages of non-missing Q10_* responses
%>%
d group_by(countrycode, party) %>%
arrange(party) %>%
summarise(
n_total = n(),
p_nonmissing = 100 * sum(!is.na(q10)) / n_total
)
`summarise()` has grouped output by 'countrycode'. You can override using the
`.groups` argument.
# A tibble: 280 × 4
# Groups: countrycode [28]
countrycode party n_total p_nonmissing
<dbl+lbl> <chr> <int> <dbl>
1 1040 [Austria] q10_1 1000 97.8
2 1040 [Austria] q10_10 1000 0
3 1040 [Austria] q10_2 1000 98.2
4 1040 [Austria] q10_3 1000 97.4
5 1040 [Austria] q10_4 1000 98.5
6 1040 [Austria] q10_5 1000 98
7 1040 [Austria] q10_6 1000 96.7
8 1040 [Austria] q10_7 1000 0
9 1040 [Austria] q10_8 1000 0
10 1040 [Austria] q10_9 1000 0
# ℹ 270 more rows
# print(d, n = Inf)
Finally, we tidy the dataset p
to keep only some renamed columns.
<- p %>%
p # rename and rename variables (columns)
select(
country = Coutnry_short,
countrycode,partyname = "English name", # Party_name_questionnaire
party = Q10_PTV,
partycode = Q9_Q25_EES,
ends_with("_EES")
%>%
) # subset to rows with non-missing countries
filter(!is.na(countrycode)) %>%
# keep only unique rows
distinct()
<- p %>% filter(!is.na(party))
p head(p)
# A tibble: 6 × 7
country countrycode partyname party partycode Q7_EES Q2_EES
<chr> <dbl> <chr> <chr> <dbl> <chr> <dbl>
1 AT 1040 Austrian People's Party Q10_1 1040520 10405… 1.04e6
2 AT 1040 Austrian Social Democratic … Q10_2 1040320 10403… 1.04e6
3 AT 1040 NEOS - The New Austria and … Q10_3 1040423 10404… 1.04e6
4 AT 1040 The Greens Q10_4 1040110 10401… 1.04e6
5 AT 1040 Austrian Freedom Party Q10_5 1040420 10404… 1.04e6
6 AT 1040 Alliance for the Future of … Q10_6 1040600 10406… 1.04e6
REGular EXpressions (regex)
Here are quick demos of matching on regular expressions
%>%
p filter(stringr::str_detect(partyname, "[Gg]reen|[Ee]colog"))
# A tibble: 16 × 7
country countrycode partyname party partycode Q7_EES Q2_EES
<chr> <dbl> <chr> <chr> <dbl> <chr> <dbl>
1 AT 1040 The Greens Q10_4 1040110 10401… 1.04e6
2 BE 1056 Green Q10_6 1056112 10561… 1.06e6
3 BE 1056 Ecologists Q10_4 1056111 10561… 1.06e6
4 CY 1196 Ecological and Environment… Q10_5 1196110 <NA> 1.20e6
5 DE 1276 Alliance 90 / The Greens Q10_4 1276113 12761… 1.28e6
6 DK 1208 Red-Green Unity List Q10_6 1208220 12082… 1.21e6
7 EE 1233 Estonian Greens Q10_7 1233100 12331… 1.23e6
8 FI 1246 Green Union Q10_5 1246110 12461… 1.25e6
9 FR 1250 Europe Ecology - The Greens Q10_4 1250110 12501… 1.25e6
10 IE 1372 Green Party Q10_4 1372110 13721… 1.37e6
11 LV 1428 Green and Farmers' Union Q10_6 1428110 14281… 1.43e6
12 LT 1440 Lithuanian Peasant and Gre… Q10_7 1440524 14405… 1.44e6
13 LU 1442 The Greens Q10_4 1442113 14421… 1.44e6
14 NL 1528 Green Left Q10_5 1528110 15281… 1.53e6
15 SE 1752 Green Ecology Party Q10_3 1752110 17521… 1.75e6
16 UK 1826 Green Party Q10_4 1826110 18261… 1.83e6
%>%
p filter(stringr::str_detect(partyname, "[Gg]reen")) %>%
select(country, partyname) %>%
mutate(what = stringr::str_extract(partyname, "Green\\s\\w+")) %>%
filter(!is.na(what))
# A tibble: 8 × 3
country partyname what
<chr> <chr> <chr>
1 CY Ecological and Environmental Movement (Cyprus Green Party) Green Party
2 DK Red-Green Unity List Green Unity
3 FI Green Union Green Union
4 IE Green Party Green Party
5 LV Green and Farmers' Union Green and
6 NL Green Left Green Left
7 SE Green Ecology Party Green Ecol…
8 UK Green Party Green Party
Merging
We want to harmonize party codes.
We want to merge this…
%>% select(countrycode, party, q10) d
# A tibble: 265,380 × 3
countrycode party q10
<dbl+lbl> <chr> <dbl>
1 1040 [Austria] q10_1 6
2 1040 [Austria] q10_2 6
3 1040 [Austria] q10_3 6
4 1040 [Austria] q10_4 6
5 1040 [Austria] q10_5 0
6 1040 [Austria] q10_6 6
7 1040 [Austria] q10_7 NA
8 1040 [Austria] q10_8 NA
9 1040 [Austria] q10_9 NA
10 1040 [Austria] q10_10 NA
# ℹ 265,370 more rows
… with that
%>% select(countrycode, party, partyname, partycode) p
# A tibble: 208 × 4
countrycode party partyname partycode
<dbl> <chr> <chr> <dbl>
1 1040 Q10_1 Austrian People's Party 1040520
2 1040 Q10_2 Austrian Social Democratic Party 1040320
3 1040 Q10_3 NEOS - The New Austria and Liberal Forum 1040423
4 1040 Q10_4 The Greens 1040110
5 1040 Q10_5 Austrian Freedom Party 1040420
6 1040 Q10_6 Alliance for the Future of Austria 1040600
7 1056 Q10_1 Workers Party of Belgium 1056325
8 1056 Q10_2 Christian Democratic and Flemish Party 1056521
9 1056 Q10_3 Socialist Party Different 1056327
10 1056 Q10_4 Open Flemish Liberals and Democrats 1056421
# ℹ 198 more rows
You can use unique(dataset$variable)
to explore variable modalities.
Then, you can take a look at stringr::str_to_upper
.
We have to transform d$party
to get uppercases.
# inspect country codes
unique(d$countrycode)
<labelled<double>[28]>: ISO country codes
[1] 1040 1056 1110 1191 1196 1203 1208 1276 1233 1246 1250 1300 1348 1372 1380
[16] 1428 1440 1442 1470 1528 1616 1620 1642 1705 1703 1724 1752 1826
Labels:
value label
1040 Austria
1056 Belgium
1110 Bulgaria
1191 Croatia
1196 Cyprus
1203 Czech Rep.
1208 Denmark
1233 Estonia
1246 Finland
1250 France
1276 Germany
1300 Greece
1348 Hungary
1372 Ireland
1380 Italy
1428 Latvia
1440 Lithuania
1442 Luxembourg
1470 Malta
1528 Netherlands
1616 Poland
1620 Portugal
1642 Romania
1703 Slovakia
1705 Slovenia
1724 Spain
1752 Sweden
1826 United Kingdom
unique(p$countrycode)
[1] 1040 1056 1110 1119 1196 1203 1276 1208 1233 1246 1250 1300 1348 1372 1380
[16] 1428 1440 1442 1470 1528 1616 1620 1642 1705 1703 1724 1752 1826
# parties
unique(p$party) # some of those will be dropped when merging
[1] "Q10_1" "Q10_2"
[3] "Q10_3" "Q10_4"
[5] "Q10_5" "Q10_6"
[7] "Q10_7" "Q10_8"
[9] "Q10_9" "mean (Q10_1, Q10_2, Q10_3)"
[11] "mean (Q10_2, Q10_8)"
unique(d$party) # some of those are have only missing values and will also go
[1] "q10_1" "q10_2" "q10_3" "q10_4" "q10_5" "q10_6" "q10_7" "q10_8"
[9] "q10_9" "q10_10"
# solution to problem
$party <- stringr::str_to_upper(d$party) d
We have to remove Belgium because party names are not attached to a unique Q10_* identifier because of Wallonia and Flanders. Merging would cause duplicated rows in the result.
<- p %>%
p filter(!country %in% "BE")
See dplyr::inner_join
.
head(d)
# A tibble: 6 × 6
respid serial hCountry countrycode party q10
<dbl> <dbl> <dbl+lbl> <dbl+lbl> <chr> <dbl>
1 7762 100007762 1 [Austria] 1040 [Austria] Q10_1 6
2 7762 100007762 1 [Austria] 1040 [Austria] Q10_2 6
3 7762 100007762 1 [Austria] 1040 [Austria] Q10_3 6
4 7762 100007762 1 [Austria] 1040 [Austria] Q10_4 6
5 7762 100007762 1 [Austria] 1040 [Austria] Q10_5 0
6 7762 100007762 1 [Austria] 1040 [Austria] Q10_6 6
head(p)
# A tibble: 6 × 7
country countrycode partyname party partycode Q7_EES Q2_EES
<chr> <dbl> <chr> <chr> <dbl> <chr> <dbl>
1 AT 1040 Austrian People's Party Q10_1 1040520 10405… 1.04e6
2 AT 1040 Austrian Social Democratic … Q10_2 1040320 10403… 1.04e6
3 AT 1040 NEOS - The New Austria and … Q10_3 1040423 10404… 1.04e6
4 AT 1040 The Greens Q10_4 1040110 10401… 1.04e6
5 AT 1040 Austrian Freedom Party Q10_5 1040420 10404… 1.04e6
6 AT 1040 Alliance for the Future of … Q10_6 1040600 10406… 1.04e6
# inner merge party names with Q10 answers
<- inner_join(d, p, by = c("countrycode", "party"))
ptv head(ptv)
# A tibble: 6 × 11
respid serial hCountry countrycode party q10 country partyname partycode
<dbl> <dbl> <dbl+lbl> <dbl+lbl> <chr> <dbl> <chr> <chr> <dbl>
1 7762 100007762 1 [Austr… 1040 [Aust… Q10_1 6 AT Austrian… 1040520
2 7762 100007762 1 [Austr… 1040 [Aust… Q10_2 6 AT Austrian… 1040320
3 7762 100007762 1 [Austr… 1040 [Aust… Q10_3 6 AT NEOS - T… 1040423
4 7762 100007762 1 [Austr… 1040 [Aust… Q10_4 6 AT The Gree… 1040110
5 7762 100007762 1 [Austr… 1040 [Aust… Q10_5 0 AT Austrian… 1040420
6 7762 100007762 1 [Austr… 1040 [Aust… Q10_6 6 AT Alliance… 1040600
# ℹ 2 more variables: Q7_EES <chr>, Q2_EES <dbl>
Tidy a little bit more:
<- ptv %>%
ptv # group by country-party dyad
group_by(country, countrycode, party, partyname, partycode) %>%
# compute mean probability to vote, with sd
summarise(
mu_ptv = mean(q10, na.rm = TRUE),
sd_ptv = sd(q10, na.rm = TRUE)
%>%
) # sort by country and then PTV by descending order
arrange(country, -mu_ptv)
`summarise()` has grouped output by 'country', 'countrycode', 'party',
'partyname'. You can override using the `.groups` argument.
# data from respondent answers
<- d %>% filter(hCountry == 11)
d2 table(d2$party, d2$q10, exclude = NULL)
0 1 2 3 4 5 6 7 8 9 10 <NA>
Q10_1 318 65 74 83 67 123 54 65 46 27 48 30
Q10_10 0 0 0 0 0 0 0 0 0 0 0 1000
Q10_2 347 57 67 82 76 105 63 71 56 21 24 31
Q10_3 446 38 44 37 39 67 59 33 46 35 124 32
Q10_4 238 43 46 68 65 140 76 106 85 33 73 27
Q10_5 439 59 61 59 52 92 49 54 35 17 40 43
Q10_6 412 68 67 49 61 94 43 30 22 14 16 124
Q10_7 372 58 47 45 45 106 56 45 69 43 81 33
Q10_8 0 0 0 0 0 0 0 0 0 0 0 1000
Q10_9 0 0 0 0 0 0 0 0 0 0 0 1000
# data from party dataset
%>% filter(country == "FR") p
# A tibble: 7 × 7
country countrycode partyname party partycode Q7_EES Q2_EES
<chr> <dbl> <chr> <chr> <dbl> <chr> <dbl>
1 FR 1250 The Republicans Q10_1 1250626 1250626 1.25e6
2 FR 1250 Socialist Party Q10_2 1250320 1250320 1.25e6
3 FR 1250 National Rally Q10_3 1250720 1250720 1.25e6
4 FR 1250 Europe Ecology - The Greens Q10_4 1250110 1250110 1.25e6
5 FR 1250 Unbowed France Q10_5 1250225 1250225 1.25e6
6 FR 1250 Generation.s, the movement Q10_6 1250321 1250321 1.25e6
7 FR 1250 The Republic Onwards! Q10_7 1250438 1250438 1.25e6
# merged results for French parties
%>% filter(country == "FR") ptv
# A tibble: 7 × 7
# Groups: country, countrycode, party, partyname [7]
country countrycode party partyname partycode mu_ptv sd_ptv
<chr> <dbl+lbl> <chr> <chr> <dbl> <dbl> <dbl>
1 FR 1250 [France] Q10_4 Europe Ecology - The Gree… 1250110 4.32 3.30
2 FR 1250 [France] Q10_7 The Republic Onwards! 1250438 3.51 3.57
3 FR 1250 [France] Q10_3 National Rally 1250720 3.34 3.80
4 FR 1250 [France] Q10_1 The Republicans 1250626 3.31 3.16
5 FR 1250 [France] Q10_2 Socialist Party 1250320 3.11 3.04
6 FR 1250 [France] Q10_5 Unbowed France 1250225 2.64 3.12
7 FR 1250 [France] Q10_6 Generation.s, the movement 1250321 2.28 2.77
Survey weighting [OPTIONAL]
A small demo
# French respondents, voted in last (EU) election
%>%
dta filter(hCountry == 11) %>%
count(Q6)
# A tibble: 3 × 2
Q6 n
<dbl+lbl> <int>
1 1 [Yes, voted] 700
2 2 [No, did not vote] 276
3 98 [Don't know] 24
# French respondents, party list voted for
%>% filter(hCountry == 11) %>%
dta count(Q7)
# A tibble: 18 × 2
Q7 n
<dbl+lbl> <int>
1 0 [did not vote] 300
2 90 [other party] 29
3 96 [did vote blanc or nil] 34
4 98 [do not remember] 23
5 1101 [La liste de La France insoumise de Jean-Luc Mélanchon, menée par… 50
6 1102 [La liste de la République en Marche et du MoDem, menée par Natha… 141
7 1103 [La liste Patriote et Gilets jaunes, menée par Florian Philippot] 9
8 1104 [La liste Urgence écologie, menée par Dominique Bourg] 12
9 1105 [La liste du Parti socialiste et de Place publique, menée par Rap… 39
10 1106 [La liste Debout La France, menée par Nicolas Dupont-Aignan] 25
11 1107 [La liste Lutte Ouvrirère, menée par Nathalie Arthaud] 4
12 1108 [La Liste du Parti communiste français, menée par Ian Brossat] 10
13 1109 [La liste pour le Frexit, menée par François Asselineau] 4
14 1110 [La liste Génération.s et Dème-Diem 25, menée par Benoît Hamon] 23
15 1111 [La liste du Rassemblement National de Marine Le Pen, menée par J… 141
16 1112 [La Liste Alliance jaune, menée par Francis Lalanne] 9
17 1113 [La liste des Républicains, menée par François Xavier Bellamy] 64
18 1114 [La liste Europe Ecologie, menée par Yannick Jadot] 83
# survey weights (EES documentation Appendix 3)
%>% select(starts_with("WGT")) dta
# A tibble: 26,538 × 5
WGT1 WGT2 WGT3 WGT4 WGT5
<dbl> <dbl> <dbl> <dbl> <dbl>
1 1.07 0.466 0.915 1.58 1.45
2 1.05 0.455 0.909 0.768 0.696
3 0.867 0.455 0.854 1.58 1.09
4 1.07 0.553 0.988 1.58 1.52
5 0.817 0.455 0.748 0.768 0.576
6 1.05 0.455 0.909 1.16 1.02
7 1.02 0.463 0.835 1.16 0.884
8 0.928 0.463 1.56 0.639 0.855
9 0.917 0.455 0.836 0.631 0.496
10 1.08 0.443 0.868 1.16 1.10
# ℹ 26,528 more rows
# WGT5 adjusts weights to age/sex, urbanity, region, education
# + turnout (Q6) + recall (Q7)
# create a survey design object for French respondents
<- dta %>% filter(hCountry == 11) %>%
fra # replace party codes with party names (labels)
mutate(Q7 = droplevels(haven::as_factor(Q7))) %>%
# weight using WGT5 (ids ~ 1 means that there is no PSU or strata)
::svydesign(ids = ~ 1, weights = ~ WGT5, data = .)
survey
# weighted vote responses, using the {survey} package
::svytotal(~ Q7, fra) survey
total
Q7did not vote 422.8775
Q7other party 17.9710
Q7did vote blanc or nil 40.8829
Q7do not remember 35.0657
Q7La liste de La France insoumise de Jean-Luc Mélanchon, menée par Manon Aubry 31.6115
Q7La liste de la République en Marche et du MoDem, menée par Nathalie Loiseau 112.2196
Q7La liste Patriote et Gilets jaunes, menée par Florian Philippot 10.3498
Q7La liste Urgence écologie, menée par Dominique Bourg 10.7089
Q7La liste du Parti socialiste et de Place publique, menée par Raphaël Glucksman 31.0348
Q7La liste Debout La France, menée par Nicolas Dupont-Aignan 17.6310
Q7La liste Lutte Ouvrirère, menée par Nathalie Arthaud 3.3237
Q7La Liste du Parti communiste français, menée par Ian Brossat 12.4710
Q7La liste pour le Frexit, menée par François Asselineau 4.0409
Q7La liste Génération.s et Dème-Diem 25, menée par Benoît Hamon 16.3582
Q7La liste du Rassemblement National de Marine Le Pen, menée par Jordan Bardella 116.8896
Q7La Liste Alliance jaune, menée par Francis Lalanne 6.5118
Q7La liste des Républicains, menée par François Xavier Bellamy 42.5182
Q7La liste Europe Ecologie, menée par Yannick Jadot 67.5339
SE
Q7did not vote 67.2959
Q7other party 8.5136
Q7did vote blanc or nil 11.9653
Q7do not remember 11.6320
Q7La liste de La France insoumise de Jean-Luc Mélanchon, menée par Manon Aubry 8.0567
Q7La liste de la République en Marche et du MoDem, menée par Nathalie Loiseau 16.4559
Q7La liste Patriote et Gilets jaunes, menée par Florian Philippot 7.2598
Q7La liste Urgence écologie, menée par Dominique Bourg 4.6131
Q7La liste du Parti socialiste et de Place publique, menée par Raphaël Glucksman 7.9570
Q7La liste Debout La France, menée par Nicolas Dupont-Aignan 5.7106
Q7La liste Lutte Ouvrirère, menée par Nathalie Arthaud 2.6141
Q7La Liste du Parti communiste français, menée par Ian Brossat 4.6142
Q7La liste pour le Frexit, menée par François Asselineau 2.1652
Q7La liste Génération.s et Dème-Diem 25, menée par Benoît Hamon 4.5655
Q7La liste du Rassemblement National de Marine Le Pen, menée par Jordan Bardella 26.1978
Q7La Liste Alliance jaune, menée par Francis Lalanne 3.1291
Q7La liste des Républicains, menée par François Xavier Bellamy 15.6058
Q7La liste Europe Ecologie, menée par Yannick Jadot 15.0317
# weighted vote responses, using the {srvyr} package
::as_survey(fra) %>%
srvyr::survey_count(Q7) srvyr
# A tibble: 18 × 3
Q7 n n_se
<fct> <dbl> <dbl>
1 did not vote 423. 67.3
2 other party 18.0 8.51
3 did vote blanc or nil 40.9 12.0
4 do not remember 35.1 11.6
5 La liste de La France insoumise de Jean-Luc Mélanchon, menée pa… 31.6 8.06
6 La liste de la République en Marche et du MoDem, menée par Nath… 112. 16.5
7 La liste Patriote et Gilets jaunes, menée par Florian Philippot 10.3 7.26
8 La liste Urgence écologie, menée par Dominique Bourg 10.7 4.61
9 La liste du Parti socialiste et de Place publique, menée par Ra… 31.0 7.96
10 La liste Debout La France, menée par Nicolas Dupont-Aignan 17.6 5.71
11 La liste Lutte Ouvrirère, menée par Nathalie Arthaud 3.32 2.61
12 La Liste du Parti communiste français, menée par Ian Brossat 12.5 4.61
13 La liste pour le Frexit, menée par François Asselineau 4.04 2.17
14 La liste Génération.s et Dème-Diem 25, menée par Benoît Hamon 16.4 4.57
15 La liste du Rassemblement National de Marine Le Pen, menée par … 117. 26.2
16 La Liste Alliance jaune, menée par Francis Lalanne 6.51 3.13
17 La liste des Républicains, menée par François Xavier Bellamy 42.5 15.6
18 La liste Europe Ecologie, menée par Yannick Jadot 67.5 15.0
Source
Data sources
Schmitt, Hermann, Hobolt, Sara B., Brug, Wouter van der, & Popa, Sebastian A. (2022). European Parliament Election Study 2019, Voter Study. GESIS, Cologne. ZA7581 Data file Version 2.0.1, https://doi.org/10.4232/1.13846.