knitr::opts_chunk$set(comment = "#>",
echo = T,
results = 'show',
message = FALSE,
warning = FALSE,
cache = F)
library(readxl)
library(tidyverse)
library(wbstats)
library(gghighlight)
library(lubridate)
library(ggridges)
library(sjPlot)
library(webshot)
library(DT)
library(tidytext)
library(plotly)
library(stringr)
library(stringdist)
library(fuzzyjoin)
library(wbstats)
library(countrycode)
library(maps)
library(Rilostat)
library(geomtextpath)
library(kableExtra)
library(flextable)
library(officer)
library(htmltools)
library(scales)
library(forcats)
library(stringr)
library(tools)
library(countrycode)
library(wbstats)
library(GGally)
library(WDI)
library(ggrepel)
library(flextable)
library(ggpubr)
# Read the main results file with exposure levels (no computer data)
data <- read_excel("./Results/Results_AI_LAC.xlsx", skip = 3, sheet = "AI_LAC") %>%
select(-1) %>%
rename(
code = 1,
`AI Categories` = 2,
Total = 3
) %>%
fill(code, .direction = "down") %>%
rename(potential = "AI Categories") %>%
mutate(potential = ifelse(potential == "missing", "Other", potential))
# Read list of country codes used by WB
wb_codes <- read_excel("./bases/ILO/Country_codes_wb.xlsx")
# Attach WB codes to project data
data <- left_join(data, wb_codes) %>%
select(country, code, everything())
# Create a long version of project data
data1 <- data %>%
select(country, 1:6) %>%
pivot_longer(cols = c(Total:Male),
names_to = "sex",
values_to = "value") %>%
mutate(potential = factor(potential, levels = c("Automation Potential",
"Augmentation Potential",
"The Big Uknown",
"Other")))
# Create a list of all countries (from WB and ILO results)
countries_wb <- data %>% distinct(code) %>% drop_na()
countries_ilo <- read_csv("./bases/ILO/Microdata_ILO/LATAM_FINAL_ILO_totals_small.csv") %>% distinct(code)
all_countries <- rbind(countries_ilo, countries_wb) %>%
unique() %>%
left_join(., wb_codes)
# Create a df with LAC names and codes
LAC_countries <- all_countries %>%
select(country, code) %>%
distinct()
# Create a list of LAC codes
LAC_codes <- LAC_countries %>%
select(code) %>%
pull()
# Get ISCO 1-digit data from ILO modeled estimates for year 2022 through API:
Z <- get_ilostat("EMP_2EMP_SEX_OCU_NB_A", cache = F)
Z <- Z %>%
filter(time == 2022,
ref_area %in% LAC_codes)
Z <- Z %>% label_ilostat()
toc <- get_ilostat_toc(segment = "ref_area") %>%
select(ref_area,
ref_area.label,
wb_income_group.label,
ilo_region.label,
ilo_subregion_broad.label,
ilo_subregion_detailed.label) %>%
distinct() %>%
drop_na() %>%
# Create income-subregion categories for each country
mutate(region_income = paste0(ilo_region.label, ": ", gsub("World: ", "", wb_income_group.label)),
subregion_income = paste0(ilo_subregion_broad.label, ": ", gsub("World: ", "", wb_income_group.label)))
# Merge with labels
Z1 <- left_join(Z, toc) %>%
# Create easier labels
rename(country = ref_area.label,
countrycode = ref_area,
region = ilo_region.label,
income = wb_income_group.label,
subregion = ilo_subregion_broad.label,
subregion2 = ilo_subregion_detailed.label,
year = time,
sex = sex.label)
# Format
lac1d <- Z1 %>%
rename(code = countrycode) %>%
select(country, code, year, sex, classif1.label, obs_value, income) %>%
# Copy the totals by gender from the main column into a new column
mutate(total_by_gender = ifelse(classif1.label == "Occupation (Detailed): Total", obs_value, NA)) %>%
# Fill in totals by country, year, sex, source in the new column
fill(total_by_gender, .direction = "down") %>%
rename(job = classif1.label,
value = obs_value,
total = total_by_gender) %>%
mutate(share = round(value/total, 2)) %>%
group_by(country, code, year, sex) %>%
mutate(check = sum(share)) %>%
filter(job != "Occupation (Detailed): Total") %>%
mutate(check = sum(share)) %>%
#Adjust gender labels
mutate(sex = case_when(sex == "SEX_M" ~ "Men",
sex == "SEX_F" ~ "Women",
sex == "SEX_T" ~ "Total",
TRUE ~ sex)) %>%
# Create smaller ISCO labels
mutate(job = str_replace(job, "Occupation \\(Detailed\\): ", "")) %>%
mutate(isco1 = substr(job, start = 1, stop = 1)) %>%
mutate(isco1 = ifelse(isco1 == "9", "96", isco1)) %>%
mutate(sex = str_replace(sex, "Sex: ", "")) %>%
mutate(income = str_replace(income, "World: ", "")) %>%
ungroup() %>%
mutate(job = factor(job, levels = c(
"1. Managers",
"2. Professionals",
"3. Technicians and associate professionals",
"4. Clerical support workers",
"5. Service and sales workers",
"7. Craft and related trades workers",
"8. Plant and machine operators, and assemblers",
"96. Elementary occupations and skilled agricultural, forestry and fishery workers"
))) %>%
mutate(income = factor(income, levels = c(
"Lower-middle income",
"Upper-middle income",
"High income"
)))
# Create list of LAC countries with income status from ILO API download
income_ILO1d <- lac1d %>%
select(country, income) %>%
distinct()
# Create the final df with 22 country names, codes and income status, adding manually the missing ones
income2 <- left_join(all_countries, income_ILO1d) %>%
select(country, code, income) %>%
mutate(income = as.character(income))%>%
mutate(income = ifelse(code %in% c("GRD","MSR"), "Upper-middle income", income),
income = ifelse(code %in% c("BOL"), "Lower-middle income", income )) %>%
mutate(income = factor(income, levels = c(
"Lower-middle income",
"Upper-middle income",
"High income"
))) %>%
arrange(income, country)
# Get ILO Employment by sex and ISCO2: R download
X <- get_ilostat("EMP_2EMP_SEX_AGE_NB_A", cache = F)
modelled_totals_2023 <- X %>% filter(sex == "SEX_T") %>%
mutate(obs_value = obs_value * 1000) %>%
filter(classif1 == "AGE_YTHADULT_YGE15") %>%
rename(code = ref_area) %>%
rename(Total_EMPL_modelled_2022_R = obs_value) %>%
select(code, time, Total_EMPL_modelled_2022_R) %>%
rename(year = time) %>%
filter(year == "2023")
modelled_totals_2023_by_sex <- X %>%
mutate(sex = case_when(sex == "SEX_M" ~ "Male",
sex == "SEX_F" ~ "Female",
sex == "SEX_T" ~ "Total",
TRUE ~ sex)) %>%
filter(sex != "Total") %>%
mutate(obs_value = obs_value * 1000) %>%
filter(classif1 == "AGE_YTHADULT_YGE15") %>%
rename(code = ref_area) %>%
rename(Total_EMPL_modelled_2022_R_by_sex = obs_value) %>%
filter(time == "2023") %>%
select(code, sex, Total_EMPL_modelled_2022_R_by_sex) %>%
mutate(sex = as.character(sex))
# GET POPULATION AND GDP FROM THE WORLD BANK for 2022
variables <- c("SP.POP.TOTL", "NY.GDP.PCAP.CD")
popul_gdp_data <- WDI(country = LAC_codes, indicator = variables, start = 2022, end = 2022) %>%
rename(population = "SP.POP.TOTL",
gdp_percap = "NY.GDP.PCAP.CD") %>%
# Merge to df with income status
left_join(., income2)
# Generate Figure 1
ggplot(popul_gdp_data, aes(x = population / 1000000, y = gdp_percap, label = country)) +
geom_point(aes(color = factor(income)), size = 3) +
geom_text_repel(size = 4, angle = 0, max.overlaps = Inf) +
scale_color_brewer(palette = "Set1") +
theme_minimal() +
labs(x = "Population (millions)", y = "GDP per Capita (USD)",
color = "") +
theme(legend.position = "bottom",
legend.text = element_text(size = 11))
# Save
ggsave(filename = "Figure1.jpeg",
path = "Figures",
width = 8, height = 6,
units = "in",
dpi = 300)
gbb_data <- read_csv("./bases/ILO/GBB_subregion_sex.csv") %>%
filter(sex == "Total") %>%
select(region, Distribution_Percentage, impact) %>%
rename(total = Distribution_Percentage) %>%
mutate(lac = ifelse(region == "Latin America and the Caribbean", "LAC", "nonLAC"))
a <- ggplot(gbb_data %>% filter(impact == "Automation Potential"),
aes(x = reorder(region, total), y = total, fill = lac)) +
geom_col(alpha = 1) +
coord_flip() +
facet_wrap(~impact) +
scale_fill_manual(values = c("orange3", "skyblue3"), guide = guide_legend(reverse = TRUE)) +
theme(legend.position = "none",
axis.text.x = element_text(size =11),
axis.text.y = element_text(size =13),
axis.title = element_text(size = 10),
strip.text = element_text(size = 12)) +
labs(x = "", y = "Share of total employment (%)")+
ylim(0, 35) +
geom_text(aes(label = paste(total, "%")), hjust = -0.2, size = 4)
b <- ggplot(gbb_data %>% filter(impact == "Augmentation Potential"),
aes(x = reorder(region, total), y = total, fill = lac)) +
geom_col(alpha = 1) +
coord_flip() +
facet_wrap(~impact) +
scale_fill_manual(values = c("orange3", "skyblue3"), guide = guide_legend(reverse = TRUE)) +
theme(legend.position = "none",
axis.text = element_text(size =11),
axis.text.y = element_text(size =13),
axis.title = element_text(size = 10),
strip.text = element_text(size = 12)) +
labs(x = "", y = "Share of total employment (%)") +
ylim(0, 35) +
geom_text(aes(label = paste(total, "%")), hjust = -0.2, size = 4)
ggarrange(a, b)
# Save
ggsave(filename = "Figure2.jpeg",
path = "Figures",
width = 11, height = 4,
units = "in",
dpi = 230)
# WB data
indicators <- c("NY.GDP.PCAP.PP.KD", "IT.NET.USER.ZS")
data <- WDI(country = "all", indicator = indicators, start = 2019, end = 2021, extra = TRUE)
data1 <- data %>%
# Drop rows with missing values in the selected indicators
filter(!is.na(NY.GDP.PCAP.PP.KD) & !is.na(IT.NET.USER.ZS)) %>%
# Drop aggregates and regional NAs
filter(!is.na(region),
region != "Aggregates") %>%
# Select the latest year for each country
group_by(country) %>%
filter(year == max(year)) %>%
ungroup() %>%
# Drop rows with missing year
filter(!is.na(year)) %>%
# Filter data for the years 2019 or later
filter(year >= 2019) %>%
# Create new variable lgdp (log of GDP per capita) and LAC
mutate(lgdp = log(NY.GDP.PCAP.PP.KD)) %>%
# Create LAC identifier
mutate(lac = ifelse(region =="Latin America & Caribbean", "LAC", "non-LAC"))
# Plot
a <- ggplot(data1, aes(x = lgdp, y = IT.NET.USER.ZS, color = lac)) +
geom_point(size = 1) +
geom_smooth(method = "loess", formula = y ~ log(x), se = FALSE, aes(color = "Fitted line (loess)"), linewidth = 1) +
scale_color_manual(values = c("Fitted line (loess)" = "green", "LAC" = "red", "non-LAC" = "blue"),
labels = c("Fitted line (loess)", "LAC", "non-LAC"),
guide = guide_legend(reverse = TRUE)) +
labs(x = "GDP per capita, USD 2017 PPP (log) (WDI, circa 2021)",
y = "Internet Users per 100 people (ITU, circa 2021)",
color = "") +
theme_minimal() +
theme(legend.position = "bottom",
legend.text = element_text(size = 10),
axis.title = element_text(size = 11))
# Plot LAC only
data_lac_net <- data1 %>%
filter(lac == "LAC") %>%
mutate(
country = ifelse(country == "Bahamas, The", "The Bahamas", country),
income = factor(income, levels = c("High income", "Upper middle income", "Lower middle income"))
)
b <- ggplot(data_lac_net,
aes(x = lgdp, y = IT.NET.USER.ZS, color = income)) +
geom_point(size = 1) +
geom_smooth(method = "glm", formula = y ~ log(x), se = FALSE, color = "green2") +
scale_y_continuous(limits = c(0, 100)) +
theme_minimal() +
theme(legend.position = "bottom",
legend.title = element_blank(),
legend.text = element_text(size = 10),
axis.title = element_text(size = 11)) +
labs(x = "Per capita income in USD (log) (WDI, circa 2021)",
y = "% population with internet access (ITU, 2021)") +
geom_text_repel(aes(label = country), size = 3, angle = 0, max.overlaps = Inf, color = "black") +
scale_color_manual(
values = c(
"High income" = "blue",
"Upper middle income" = "orange2",
"Lower middle income" = "red"
),
labels = c("High income", "Upper middle income", "Lower middle income"),
guide = guide_legend(reverse = TRUE)
)
# Save as one figure
ggarrange(a, b, ncol =2)
ggsave(filename = "Figure3.jpeg",
path = "Figures",
width = 12, height = 5,
units = "in",
dpi = 300)
# Calculate mean shares based on ILO modelled data at 1-digit
means <- lac1d %>%
group_by(job, sex, income) %>%
summarise(mean = mean(share)) %>%
ungroup()
# Plot
ggplot(means %>% filter(sex != "Total"),
aes(x = fct_rev(job), y = mean, fill = sex)) +
geom_col(position = "dodge") +
facet_wrap(~income) +
coord_flip() +
theme(legend.position = "right",
legend.title =element_blank(),
axis.text.y = element_text()) +
labs(x = "",
y = "Mean share of employment within each income group") +
geom_text(aes(label = paste(round(mean *100, 1), "%")),
position = position_dodge(width = 0.9),
hjust = -0.1,
size = 3) +
scale_fill_manual(values = c("#ff7f0e", "#1f77b4"), guide = guide_legend(reverse = TRUE)) +
scale_x_discrete(labels = function(x) str_wrap(x, width = 30)) +
ylim(0,0.56)
# Save
ggsave(filename = "Figure4.jpeg",
path = "Figures",
width = 9, height = 4.5,
units = "in",
dpi = 300)
# ILO dataset based on micro data extraction
data_5_ilo <- read_csv("./bases/ILO/Microdata_ILO/LATAM_FINAL_ILO_totals_small.csv") %>%
rename(ilo_total = distribution_percentage) %>%
select(-source, -country) %>%
mutate(potential = case_when(potential == "Automation" ~ "Automation Potential",
potential == "Augmentation" ~"Augmentation Potential",
potential == "Big Unknown" ~ "The Big Unknown"))
# WB main results
data_5_wb <- read_excel("./Results/Results_AI_LAC.xlsx", skip = 3, sheet = "AI_LAC") %>%
select(-1) %>%
rename(code = 1, `AI Categories` = 2, Total = 3) %>%
select(1:3) %>%
fill(code, .direction = "down") %>%
rename(potential = "AI Categories") %>%
mutate(potential = ifelse(potential == "missing", "Other", potential)) %>%
filter(potential != "Other") %>%
rename(wb_total = Total) %>%
mutate(potential = ifelse(potential == "The Big Uknown", "The Big Unknown", potential))
data_5_compare <- full_join(data_5_ilo, data_5_wb)%>% # check 2d vs 4d data labels, then remove
left_join(., wb_codes) %>%
filter(!is.na(country)) %>%
select(1:5) %>%
rename(WB = wb_total,
ILO = ilo_total) %>%
pivot_longer(cols = ILO:WB,
names_to = "source",
values_to = "value") %>%
drop_na() %>%
mutate(country = factor(country),
potential = case_when(potential == "Automation Potential" ~ "Automation",
potential == "Augmentation Potential" ~ "Augmentation",
TRUE ~potential),
potential = factor(potential, levels = c("Automation", "Augmentation", "The Big Unknown"))) %>%
filter(country != "Argentina")
# Plot
ggplot(data_5_compare, aes(x = fct_rev(country), y = value, fill = source)) +
geom_col(position = "dodge") +
coord_flip() +
facet_wrap(~ potential, ncol = 3) +
geom_text(aes(label = paste0(round(value, 2), "%", " (", source, ")")),
position = position_dodge(width = 1),
vjust = 0.4,
hjust = -0.1,
size = 2.5) +
labs(x = "",
y = "Share of total employment (%)",
title = "") +
scale_fill_manual(values = c("skyblue3", "orange2"), guide = guide_legend(reverse = TRUE)) +
theme(legend.title = element_blank(),
axis.title = element_text(size = 10),
strip.text = element_text(size = 10),
axis.text.y = element_text(size = 10)) +
ylim(0, 50)
# Save
ggsave(filename = "Figure5.jpeg",
path = "Figures",
width = 9, height = 6,
units = "in",
dpi = 300)
# ILO dataset based on micro data extraction
data_7_ilo <- read_csv("./bases/ILO/Microdata_ILO/LATAM_FINAL_ILO_totals_small.csv") %>%
rename(ilo_total = distribution_percentage) %>%
select(-source, -country) %>%
mutate(potential = case_when(potential == "Automation" ~ "Automation Potential",
potential == "Augmentation" ~"Augmentation Potential",
potential == "Big Unknown" ~ "The Big Unknown"))
data_7_wb <- read_excel("./Results/Results_AI_LAC.xlsx", skip = 3, sheet = "AI_LAC") %>%
select(-1) %>%
rename(code = 1, `AI Categories` = 2, Total = 3) %>%
select(1:3) %>%
fill(code, .direction = "down") %>%
rename(potential = "AI Categories") %>%
mutate(potential = ifelse(potential == "missing", "Other", potential)) %>%
filter(potential != "Other") %>%
rename(wb_total = Total) %>%
mutate(potential = ifelse(potential == "The Big Uknown", "The Big Unknown", potential))
data_7_combined <- full_join(data_7_ilo, data_7_wb)%>%
mutate(source = ifelse(is.na(wb_total), "ILO", "WB"),
exposure = ifelse(is.na(wb_total), ilo_total, wb_total)) %>%
mutate(exposure = round(exposure, 2))%>%
left_join(., wb_codes) %>%
left_join(., modelled_totals_2023) %>% # Add ILO estimates
# Add manually employment in Grenada (33800, 2020) and Montserrat (2489, 2020)
mutate(Total_EMPL_modelled_2022_R = case_when(country == "Montserrat" ~ 2489,
country == "Grenada" ~ 33800,
TRUE ~ Total_EMPL_modelled_2022_R)) %>%
mutate(exposed_jobs_country = round(exposure/100 * Total_EMPL_modelled_2022_R, 0)) %>%
# Calculate total exposed jobs in 3 categories for each country
group_by(code) %>%
mutate(total_jobs_exposed_country = sum(exposed_jobs_country),
exposure_total = sum(exposure)) %>%
ungroup() %>%
select(country, potential, exposure, exposure_total, total_jobs_exposed_country, Total_EMPL_modelled_2022_R)
# Add results for HIC from Gmyrek, Berg and Bescond 2023 and format to append to the main df
hic_ilo <- read_csv("./bases/ILO/GBB_2023_HIC_results_formatted.csv") %>%
rename(exposure = value,
total_jobs_exposed_country = exposed_total) %>%
select(country, potential, exposure, exposure_total, total_jobs_exposed_country, Total_EMPL_modelled_2022_R)
final_exposures_LAC <- rbind(data_7_combined, hic_ilo) %>%
filter(country != "Argentina") %>%
mutate(potential = factor(potential, levels = c("Automation Potential", "Augmentation Potential", "The Big Unknown")))
# Plot totals by country with both datasets
ggplot(final_exposures_LAC,
aes(x = reorder(country, exposure_total), y = exposure, fill = fct_rev(potential))) +
geom_col() +
coord_flip() +
scale_fill_manual(values = c("Augmentation Potential" = "orange",
"Automation Potential" = "red3",
"The Big Unknown" = "skyblue"),
guide = guide_legend(reverse = TRUE)) +
theme(legend.title = element_blank(),
axis.text.x = element_text(angle = 0, vjust = 0.5, hjust = 1),
legend.position = "bottom") +
labs(x = "",
y = "Share of total employment (%)") +
geom_text(aes(label = paste(round(exposure, 0), "%")), position = position_stack(vjust = 0.6), size = 3) +
ylim(0,60) +
geom_text(aes(label = paste(round(exposure_total, 0), "%", "(", round(total_jobs_exposed_country / 1000000, 2), "M jobs", ")"), y = exposure_total), size = 3, hjust = -0.1)
# Save
ggsave(filename = "Figure7.jpeg",
path = "Figures",
width = 9, height = 5,
units = "in",
dpi = 300)
# Box plots by category
category = "Augmentation Potential"
box <- data1 %>%
filter(potential == category) %>%
select(-potential) %>%
pivot_longer(cols = c(2:last_col()),
values_to = "value",
names_to = "var")
varnames <- box %>% distinct(var) %>% pull()
box <- box %>%
mutate(var = factor(var, levels = varnames)) %>%
mutate(class = case_when(var %in% c("Female", "Male") ~ "Gender",
var %in% c("Rural", "Urban") ~ "Location",
var %in% c("15-24", "25-34", "35-44", "45-54", "55-64") ~ "Age",
var %in% c("Low education", "Medium education", "High education") ~ "Education",
var %in% c("Poor", "Non-Poor") ~ "Poverty",
var %in% c("Q1", "Q2", "Q3", "Q4", "Q5") ~ "Household Income (Q)",
var %in% c("Formal (legal)", "Informal (legal)") ~ "Informality (legal)",
var %in% c("Formal (prod)", "Informal (prod)") ~ "Informality (prod)",
var %in% c("Employer", "Salaried employee",
"Self-employed", "Family worker (without salary)") ~ "Employment",
var %in% c("Primary Activities", "Low Technology Industries", "Other Industries",
"Construction", "Retail, Wholesale Trade, Restaurants, Hotels, etc.",
"Electricity, Gas, Water, Transportation, Communication",
"Banks, Finance, Insurance, Professional Services",
"Public Administration, Defense", "Education, Health, Personal Services",
"Domestic Services") ~ "Sector"
)) %>%
filter(var != "Total") %>%
mutate(class = factor(class, levels = c("Gender",
"Location",
"Age",
"Education",
"Poverty",
"Household Income (Q)",
"Informality (legal)",
"Informality (prod)",
"Employment",
"Sector")))
# Plot
ggplot(box, aes(x = var, y = value)) +
geom_boxplot() +
geom_point(alpha = 0.5, shape = "diamond", size = 2) +
stat_summary(fun = mean, geom = "point", shape = 18, size = 3, color = "red") +
stat_summary(fun = mean, geom = "line", color = "red", group = 1) +
theme(axis.text.x = element_text(angle = 90, hjust = 1, vjust = 0, size = 11),
legend.title = element_blank(),
strip.text = element_text(size = 11)) +
labs(
# title = category,
# subtitle = "Distribution of country-level correlates",
y = "Share of employment exposed to augmentation (%)",
x = "") +
facet_wrap(~class, scales = "free", nrow = 2)
# Save
ggsave(filename = "Figure9.jpeg",
path = "Figures",
width = 10, height = 8,
units = "in",
dpi = 300)
oecd_countries <- c("AUS", "AUT", "BEL", "CAN", "CHL", "COL", "CZE", "DNK", "EST", "FIN", "FRA", "DEU", "GRC",
"HUN", "ISL", "IRL", "ISR", "ITA", "JPN", "KOR", "LVA", "LTU", "LUX", "MEX", "NLD", "NZL",
"NOR", "POL", "PRT", "SVK", "SVN", "ESP", "SWE", "CHE", "TUR", "GBR", "USA")
# Selection of countries from 2015 and 2017 PIAAC
piaac_2015 <- c("Chile", "New Zealand", "Slovenia")
piaac_2017 <- c("Ecuador", "Mexico", "Peru")
# Country names df
country_names <- c("Chile", "Ecuador", "Mexico", "New Zealand", "Peru", "Slovenia")
country_codes <- c("CHL", "ECU", "MEX", "NZL", "PER", "SVN")
country_tibble <- tibble(code = country_codes, country = country_names)
# PIACC dataset
data_piaac <- read_excel("./Results/Results_AI_LAC.xlsx", skip = 3, sheet = "AI_PIAAC_ONLY_CPU") %>%
select(-1, -4) %>%
rename(code = 1,
potential = 2,
exposed = 3) %>%
fill(code, .direction = "down") %>%
filter(code %in% c("SVN", "NZL", "CHL", "ECU", "MEX", "PER")) %>%
mutate(computer = str_split_fixed(potential, ", ", 2)[,2],
potential = str_split_fixed(potential, ", ", 2)[,1]) %>%
select(code, potential, computer, everything()) %>%
mutate(potential = str_replace_all(potential, c("^Pr\\(" = "", "\\)$" = "")))%>%
mutate(potential = str_replace_all(potential, "=1", "")) %>%
mutate(potential = ifelse(potential == "Missing", "Other", potential)) %>%
mutate(computer = str_replace_all(computer, "Computer at work=", "")) %>%
mutate(computer = str_replace_all(computer, "\\)$", "")) %>%
mutate(computer = ifelse(computer == 1, "Computer at work", "No computer at work")) %>%
left_join(., wb_codes) %>%
filter(potential == "Augmentation") %>%
select(code, potential, computer, exposed) %>%
group_by(code) %>%
mutate(total_augmentation = sum(exposed)) %>%
ungroup() %>%
# ADD LAC and OECD labels
mutate(OECD = if_else(code %in% oecd_countries, "OECD", "Non-OECD")) %>%
left_join(., LAC_countries) %>%
rename(LAC = country) %>%
mutate(LAC = ifelse(is.na(LAC), "Other", "LAC")) %>%
left_join(., country_tibble) %>%
select(country, code, everything()) %>%
# Add a dummy variable for legend purpose
mutate(dummy_legend = ifelse(LAC == "LAC", "LAC countries", NA)) %>%
# Create a sorting variable
mutate(comp = ifelse(computer == "Computer at work", exposed, NA)) %>%
fill(comp, .direction = "down")
# Plot
ggplot(data_piaac,
aes(x = reorder(country, comp), y = exposed, fill = fct_rev(computer))) +
geom_col(alpha = 0.8) +
scale_fill_manual(values = c("skyblue", "orange2"),
guide = guide_legend(reverse = T, override.aes = list(color = NA))) +
theme(legend.position = "top",
axis.text.x = element_text(angle = 0, size = 12, hjust = 1, vjust = 1),
axis.text.y = element_text(size = 12),
axis.title = element_text(size=10),
legend.title = element_blank(),
legend.text = element_text(size = 9)) +
labs(x = "",
y = "Share of total employment (%)"
) +
coord_flip() +
ylim(0,23) +
geom_text(aes(label = paste(round(exposed, 1), "%")), position = position_stack(vjust = 0.5), angle = 0, size = 3.5) +
geom_text(aes(label = paste(round(total_augmentation, 1), "%"), y = total_augmentation + 1.5), angle = 0, size = 3.5) +
# Add LAC markers
geom_point(data = subset(data_piaac, !is.na(dummy_legend)), aes(x = country, y = 30, color = dummy_legend), size = 3, show.legend = TRUE) +
geom_col(data = subset(data_piaac, LAC == "LAC"), aes(x = reorder(country, comp), y = total_augmentation),
position = "identity", color = "brown", size = 0.7, alpha = 0.1, show.legend = FALSE) +
scale_color_manual(values = c("LAC countries" = "brown"), guide = guide_legend(override.aes = list(shape = 16)))
# Save
ggsave(filename = "Figure10.jpeg",
path = "Figures",
width = 10, height = 3,
units = "in",
dpi = 300)
Figure 11. Exposure by country, exposure type and access to digital infrastructure
data_11 <- read_excel("./Results/Results_AI_LAC.xlsx", skip = 3, sheet = "AI_SEDLAC_PIAAC_CPU") %>%
select(-1, -4) %>%
rename(code = 1,
potential = 2,
Total = 3) %>%
fill(code, .direction = "down") %>%
mutate(potential = ifelse(potential == "missing", "Other", potential)) %>%
mutate(computer = str_split_fixed(potential, ", ", 2)[,2],
potential = str_split_fixed(potential, ", ", 2)[,1]) %>%
select(code, potential, computer, everything()) %>%
mutate(potential = str_replace_all(potential, c("^Pr\\(" = "", "\\)$" = ""))) %>%
mutate(potential = str_replace_all(potential, "=1", "")) %>%
mutate(computer = str_replace_all(computer, "Computer at work=", "")) %>%
mutate(computer = str_replace_all(computer, "\\)$", "")) %>%
mutate(computer = ifelse(computer == 1, "Computer", "No computer")) %>%
left_join(., wb_codes) %>%
select(country, 1:6) %>%
select(-Total) %>%
pivot_longer(cols = c(Female:Male),
names_to = "sex",
values_to = "value") %>%
group_by(country, potential, sex) %>%
mutate(total = sum(value)) %>%
ungroup() %>%
mutate(computer = factor(computer, levels= c("No computer", "Computer"))) %>%
# Create twoway data
mutate(male = ifelse(sex == "Male", -value, NA),
female = ifelse(sex == "Female", value, NA)) %>%
mutate(total_men = -total,
total_women = total) %>%
mutate(total_men = round(total_men, 1),
total_women = round(total_women, 1)) %>%
mutate(total_men = ifelse(sex == "Male", total_men, NA_character_)) %>%
mutate(total_women = ifelse(sex == "Female", total_women, NA_character_)) %>%
filter(potential %in% c("Automation", "Augmentation", "Big Unknown")) %>%
mutate(potential = factor(potential, levels =c("Automation", "Augmentation", "Big Unknown")))
# Plot
ggplot(data_11,
aes(x = country, y = female, fill = computer, label = paste0(round(female, 1)))) +
geom_col(position = "stack") +
coord_flip() +
facet_wrap(~potential, ncol = 1) +
geom_text(position = position_stack(vjust = 0.5), size = 2) +
labs(x = "",
y = "Share of employment (%)") +
theme(axis.text.x = element_blank(),
axis.ticks.x = element_blank(),
legend.position = "bottom",
legend.title = element_blank()) +
scale_fill_manual(values = c("skyblue", "orange2"), guide = guide_legend(reverse = TRUE)) +
geom_hline(yintercept = 0, linetype = "solid", color = "black") +
geom_col(aes(x = country, y = male, fill = computer, position = "stack")) +
geom_text(aes(x = reorder(country, value), y = male, label = paste0(round(-male, 1))), position = position_stack(vjust = 0.5), size = 2) +
annotate(geom = "label", x = max(data_11$male, na.rm = TRUE) * 1.05,
y = length(unique(data_11$country)) + 1, label = "Men ",
hjust = 9.5, vjust = -0.5, size = 4, color = "red", fill = "grey", label.size = 0.4) +
annotate(geom = "label", x = min(data_11$female, na.rm = TRUE) * 1.05,
y = length(unique(data_11$country)) + 1, label = "Women", hjust = -1, vjust = 0, size = 4, color = "blue", fill = "grey", label.size = 0.4)
# Save
ggsave(filename = "Figure11.jpeg",
path = "Figures",
width = 9, height = 8,
units = "in",
dpi = 300)
data_11 %>% write_csv("Figure11_data.csv")
### Take data on exposure: select only no computer and total augmentation, by sex (total is total % of empl in augmentation by sex, value is % of empl with no computer)
jobs_data <- data_11 %>%
filter(computer == "No computer",
potential == "Augmentation") %>%
select(country, code, potential, computer, sex, value, total)
# Get ILO Employment by sex for 2023 and ISCO2 to create country weights for LAC countries: R download
X <- get_ilostat("EMP_2EMP_SEX_AGE_NB_A", cache = F)
modelled_totals_2023_by_sex <- X %>%
mutate(sex = case_when(sex == "SEX_M" ~ "Male",
sex == "SEX_F" ~ "Female",
sex == "SEX_T" ~ "Total",
TRUE ~ sex)) %>%
filter(sex != "Total") %>%
mutate(obs_value = obs_value * 1000) %>%
filter(classif1 == "AGE_YTHADULT_YGE15") %>% # Take all workers 15+
rename(code = ref_area) %>%
rename(EMPL_modelled_2023_R_by_sex = obs_value) %>%
filter(time == "2023") %>%
select(code, sex, EMPL_modelled_2023_R_by_sex) %>%
mutate(sex = as.character(sex)) %>%
filter(code %in% LAC_codes)
# Create a final dataset with empl totals and empl weights by sex
jobs_data2 <- left_join(jobs_data, modelled_totals_2023_by_sex) %>%
arrange(sex, country) %>%
group_by(sex) %>%
mutate(total_LAC_empl_by_sex = sum(EMPL_modelled_2023_R_by_sex),
country_weight_by_sex = EMPL_modelled_2023_R_by_sex / total_LAC_empl_by_sex,
check =sum(country_weight_by_sex)) %>% # check that weights grouped by sex add to 1
ungroup() %>%
# Calculate the nr of jobs in augmentation, based on ILO 2023 estimates
mutate(country_augment_jobs_no_comp_sex = round(value/100 * EMPL_modelled_2023_R_by_sex, 0),
total_jobs_augmentation_country_sex = round(total/100 * EMPL_modelled_2023_R_by_sex, 0),
share_no_comp_in_augmentation_country_sex = round(country_augment_jobs_no_comp_sex/total_jobs_augmentation_country_sex,2)) %>%
group_by(sex) %>%
mutate(total_LAC_jobs_aug_no_comp_sex = sum(country_augment_jobs_no_comp_sex),
total_LAC_jobs_augmentation = sum(total_jobs_augmentation_country_sex)) %>%
ungroup() %>%
mutate(LAC_share_no_comp_augmentation_in_total_empl_sex = total_LAC_jobs_aug_no_comp_sex / total_LAC_empl_by_sex * 100,
LAC_share_no_comp_in_augmentation_sex = total_LAC_jobs_aug_no_comp_sex / total_LAC_jobs_augmentation * 100) %>%
# Alternative calculation: mean of country "value" wieghted by employment
group_by(sex) %>%
mutate(LAC_share_no_comp_augmentation_in_total_empl_sex2 = sum(value * country_weight_by_sex)) %>% # Same as going through totals
ungroup()
data_12 <- read_excel("./Results/Results_AI_LAC.xlsx", skip = 3, sheet = "AI_SEDLAC_PIAAC_CPU") %>%
select(-1, -4) %>%
rename(code = 1,
potential = 2,
Total = 3) %>%
rename("Formal (legal)" = "Formal...25",
"Informal (legal)" = "Informal...26",
"Formal (prod)" ="Formal...27",
"Informal (prod)" = "Informal...28") %>%
fill(code, .direction = "down") %>%
mutate(computer = str_split_fixed(potential, ", ", 2)[,2],
potential = str_split_fixed(potential, ", ", 2)[,1]) %>%
select(code, potential, computer, everything()) %>%
mutate(potential = str_replace_all(potential, c("^Pr\\(" = "", "\\)$" = ""))) %>%
mutate(potential = str_replace_all(potential, "=1", "")) %>%
mutate(computer = str_replace_all(computer, "Computer at work=", "")) %>%
mutate(computer = str_replace_all(computer, "\\)$", "")) %>%
mutate(computer = ifelse(computer == 1, "Computer", "No computer")) %>%
mutate(potential = ifelse(potential == "Missing", "Other", potential)) %>%
left_join(., wb_codes) %>%
select(country, code, everything()) %>%
select(-code, -country_label)
all_comp <- data_12 %>%
pivot_longer(cols = Total:last_col(),
names_to = "variable",
values_to = "value") %>%
mutate(potential = factor(potential, levels = c("Automation",
"Augmentation",
"Big Unknown",
"Other"))) %>%
group_by(country, potential, variable) %>%
mutate(total = sum(value)) %>%
ungroup() %>%
mutate(variable = factor(variable, levels = c(
"Total", "Female", "Male",
"Rural", "Urban",
"15-24", "25-34", "35-44", "45-54", "55-64",
"Low education", "Medium education", "High education",
"Non-Poor", "Poor",
"Q1", "Q2", "Q3", "Q4", "Q5",
"Formal (legal)", "Informal (legal)", "Formal (prod)", "Informal (prod)",
"Employer", "Salaried employee", "Self-employed", "Family worker (without salary)"))) %>%
drop_na()
computer <- all_comp %>%
filter(computer == "Computer",
potential != "Other",
country %in% c("Colombia", "Costa Rica"))
no_computer <- all_comp %>%
filter(computer == "No computer",
potential != "Other",
country %in% c("Colombia", "Costa Rica"))
ggplot(computer,
aes(x = fct_rev(variable), y = value, fill = fct_rev(potential))) +
geom_col() +
geom_area() +
facet_wrap(country~potential, ncol = 3) +
coord_flip() +
theme(legend.position = "none",
axis.title = element_blank()) +
geom_col(data = no_computer, aes(x = variable, y = total, fill = fct_rev(potential)), alpha = 0.5) +
scale_fill_manual(values = c("#1f77b4", "#2ca02c", "#d62728"), guide = guide_legend(reverse = TRUE)) +
geom_text(aes(label = paste(round(total, 1), "%"), y = total), hjust = -0.1, size = 2.5)
# Save
ggsave(filename = "Figure12.jpeg",
path = "Figures",
width = 9, height = 8,
units = "in",
dpi = 300)
# Data from ILO STATS with employment related income for LAC countries at 2 digit level, local currency
data <- read_excel("./bases/ILO/Microdata_ILO/EAR_OCU2Digits_LAC.xlsx") %>%
filter(sample_ees > 5,# Filter out too small samples
sample_slf > 5) %>%
rename(country = ref_area,
isco = classif1,
year = time) %>%
select(country, ACRO, year, isco, median2d_SLF, median2d_EES, sample_ees, sample_slf) %>%
group_by(country, ACRO) %>%
# Determine the latest year available for country and survey
mutate(latest_year_for_total = ifelse(all(is.na(ifelse(isco == "OC2_ISCO08_TOTAL" & !is.na(median2d_EES), year, NA))),
NA,
max(ifelse(isco == "OC2_ISCO08_TOTAL" & !is.na(median2d_EES), year, NA), na.rm = TRUE))) %>%
filter(year == latest_year_for_total) %>%
ungroup() %>%
mutate(small_sample_ees = ifelse(sample_ees < 15, "U", NA),
small_sample_slf = ifelse(sample_slf < 15, "U", NA))
overview <- data %>% distinct(country, ACRO, year, latest_year_for_total) # In several cases more than 1 survey per country but always in different years
data1 <- data %>%
select(-latest_year_for_total) %>%
group_by(country) %>%
# Determine the latest survey available for country and filter only these obs
mutate(latest_year_for_total = max(ifelse(isco == "OC2_ISCO08_TOTAL" & !is.na(median2d_EES), year, NA), na.rm = TRUE)) %>%
filter(year == latest_year_for_total) %>%
select(-latest_year_for_total)
overview2 <- data1 %>% distinct(country, ACRO, year) # 1 survey per country, latest year available
data2 <- data1 %>%
# Move the overall median to a column for employees and self-employed
mutate(median2d_SLF_total = ifelse(isco =="OC2_ISCO08_TOTAL", median2d_SLF, NA)) %>%
fill(median2d_SLF_total, .direction = "up") %>%
mutate(median2d_EES_total = ifelse(isco =="OC2_ISCO08_TOTAL", median2d_EES, NA)) %>%
fill(median2d_EES_total, .direction = "up") %>%
filter(isco != "OC2_ISCO08_TOTAL") %>%
ungroup() %>%
select(country, year, isco, median2d_SLF, median2d_SLF_total, median2d_EES, median2d_EES_total, sample_ees, sample_slf, small_sample_ees, small_sample_slf) %>%
# Calculate the distance of individual ISCO 2-digts occupations from the median of Employees
mutate(difference_from_overall_median_EES_perc = round((median2d_EES - median2d_EES_total) / median2d_EES_total * 100, 1)) %>%
mutate(difference_from_overall_median_SLF_perc = round((median2d_SLF - median2d_SLF_total) / median2d_EES_total * 100, 1)) %>%
filter(difference_from_overall_median_SLF_perc < 500) %>%
# Extract last two characters from 'isco'
mutate(isco2d = substr(isco, nchar(isco)-1, nchar(isco))) %>%
filter(!isco %in% c("OC2_ISCO08_01", # Take out the army occupations and non-classified
"OC2_ISCO08_02",
"OC2_ISCO08_03",
"OC2_ISCO08_96")) %>%
rename(code = country)
# Data from the final results - sheet with exposure type and computer use by ISCO 2-digit and 2-digit shares of employment by country
heatmap_data <- read_excel("./Results/Results_AI_LAC.xlsx", sheet = "AI_CPU_ISCO2d", skip = 2) %>%
fill(Country, .direction = "down") %>%
select(-1, -TOTAL) %>%
rename(country_name = Country,
isco2d = "ISCO 2d") %>%
mutate(country_name = toTitleCase(tolower(country_name)))
# File with country codes and labels (from basic data on top)
wb_codes1 <- wb_codes %>%
rename(country_name = country)
# Merge datasets
heatmap_data2 <- left_join(heatmap_data, wb_codes1) %>%
select(country_name, code, everything())
data3 <- left_join(data2, heatmap_data2) %>%
rename(share_empl = 'Share of Total Employment') %>%
mutate(across(`Augmentation & Computer`:`Missing & No Computer`, as.numeric)) %>%
pivot_longer(cols = `Augmentation & Computer`: `Missing & No Computer`,
names_to = "variable",
values_to = "Share_of_exposure") %>%
mutate(isco2d = as.character(isco2d)) %>%
arrange(isco2d) %>%
# Select only exposed with computer the not affected occupations
filter(variable %in% c("Augmentation & Computer",
"Automation & Computer",
"Big Unknown & Computer"))
# Read isco labels from a file with official mapping of ISCO levels
isco_labels <- read_csv("./bases/ILO/mapping.csv") %>%
select(isco08_2d, description_2d) %>%
distinct() %>%
rename(isco2d_code = isco08_2d) %>%
arrange(isco2d_code) %>%
rename(isco2d = isco2d_code)
# Add labels to the main dataset
data4 <- left_join(data3, isco_labels) %>%
mutate(isco_labels = paste(isco2d, "-", description_2d)) %>%
mutate(isco_labels = factor(isco_labels)) %>%
# Pivot long to plot
pivot_longer(cols= difference_from_overall_median_EES_perc: difference_from_overall_median_SLF_perc,
names_to = "difference",
values_to = "difference_value") %>%
mutate(income_difference_from_EES_median_filtered = ifelse(Share_of_exposure > 25, difference_value, NA)) %>%
select(code, year, isco_labels, median2d_SLF, median2d_EES, difference, difference_value, income_difference_from_EES_median_filtered,
share_empl, variable, Share_of_exposure, sample_ees, sample_slf, small_sample_ees, small_sample_slf) %>%
mutate(currency = "local currency") %>%
rename(potential = variable) %>%
mutate(small_sample_ees_marker = ifelse(small_sample_ees == "U", difference_value, NA))
data4 %>% write_csv("./bases/ILO/Microdata_ILO/EAR_OCU2Digits_LAC_Processed.csv")
data4 <- read_csv("./bases/ILO/Microdata_ILO/EAR_OCU2Digits_LAC_Processed.csv")
# Define custom labels for x-axis
custom_labels <- setNames(
c("Self-employed", "Employees"),
c("difference_from_overall_median_SLF_perc", "difference_from_overall_median_EES_perc"))
# Define custom legend colors
custom_Legend_colors <- setNames(
c("red", "green", "blue3"),
c("Automation & Computer", "Augmentation & Computer", "Big Unknown & Computer")
)
# Plot
ggplot(data4, aes(x = fct_rev(isco_labels), y = difference_value)) +
geom_jitter(aes(size = share_empl), alpha = 0.3, color = "grey") + # Plot all
geom_jitter(aes(x = fct_rev(isco_labels), y = income_difference_from_EES_median_filtered, color = potential, size = share_empl, alpha = Share_of_exposure)) + # Ovelay exposed jobs
geom_smooth(method = "loess", se = FALSE, color = "black", linewidth = 0.3, linetype ="dashed", aes(group = potential)) +
geom_hline(yintercept = 0, color = "brown", size = 0.7) +
facet_wrap(~difference, ncol = 1, labeller = labeller(difference = custom_labels)) +
scale_color_manual(values = custom_Legend_colors) + # Apply custom color scale
theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust = 1),
legend.position = "top",
legend.box = "horizontal",
legend.box.just = "left",
legend.direction = "horizontal",
legend.title = element_text(size = 10),
axis.title.y = element_text(size = 10)) +
labs(x = "",
y = "Income: distance from the group's median income as % of the median income of employees",
size = "Size: share of total employment",
alpha = "Shading: Exposed share of ISCO-08 2-digit occupations") +
guides(
color = guide_legend(order = 3, title.position = "top", nrow = 3, byrow = TRUE, title = NULL),
size = guide_legend(order = 1, title.position = "top", nrow = 1, byrow = TRUE),
alpha = "none")
# Save
ggsave(filename = "Figure14.jpeg",
path = "Figures",
width = 10, height = 10,
units = "in",
dpi = 300)
data_tech <- read_csv("./bases/ILO/techXposure_isco4d_by_tech.csv") %>%
rename(ISCO_08 = isco4d) %>%
mutate(ISCO_08 = as.character(ISCO_08)) %>%
group_by(ISCO_08, isco4d_label) %>%
summarise(x_mean = mean(xposure4d, na.rm = T)) # calculate the mean score for each 4-d isco08
data_gbb <- read_csv("./bases/ILO/Gmyrek_Berg_Bescond_scores_2023.csv") %>%
rename(mean_score_ilo = mean_score)
comparison_gbb_techxposure <- left_join(data_tech, data_gbb)
# Standardize the data
comparison_gbb_techxposure$mean_score_ilo_centered <- scale(comparison_gbb_techxposure$mean_score_ilo)[, 1]
comparison_gbb_techxposure$xposure4d_centered <- scale(comparison_gbb_techxposure$x_mean)[, 1]
# Plot - all values
ggplot(comparison_gbb_techxposure,
aes(x = mean_score_ilo_centered, y = xposure4d_centered, colour = description_1d)) +
geom_point(alpha = .5) +
facet_wrap(~description_1d, ncol = 3) +
theme(legend.position = "none",
strip.text = element_text(size = 10)) +
geom_abline(intercept = 0, slope = 1, linetype = "dashed", color = "blue") +
labs(y = "TechXposure (mean by occupation), z-scores",
x = "ILO scores (mean by occupation), z-scores") +
ylim(-3,3) +
xlim(-3,3)
ggsave(filename = "FigureA1.jpeg",
path = "Figures",
width = 10, height = 6,
units = "in",
dpi = 300)
data_imf <- read_excel("./bases/ILO/IMF_AIOE_CAIOE_theta_for_sharing.xlsx", sheet = "data_ISCO08") %>%
mutate(exposure = ifelse(aioe_all > median_aioe_all, "High Exposure", "Low exposure"),
complementarity = ifelse(complementarity_theta > median_theta, "High Complementarity", "Low Complementarity"),
imf = paste(exposure, "-", complementarity)) %>%
rename(ISCO_08 = isco08)
data_gbb <- read_csv("./bases/ILO/Gmyrek_Berg_Bescond_scores_2023.csv") %>%
rename(mean_score_ilo = mean_score)
gbb_imf_compare <- left_join(data_imf, data_gbb) %>%
mutate(potential = ifelse(potential =="missing", "Other", potential)) %>%
filter(!is.na(Title), # IMF data contains also ISCO 3d jobs, so these titles appear as NA after merger
description_1d != "Armed forces occupations")
# Standardize the ilo and imf scores
gbb_imf_compare$mean_score_ilo_centered <- scale(gbb_imf_compare$mean_score_ilo)[, 1]
gbb_imf_compare$aioe_all_centered <- scale(gbb_imf_compare$aioe_lm)[, 1]
# Plot the standardized scores with a 45 degree line
ggplot(gbb_imf_compare,
aes(x = mean_score_ilo_centered, y = aioe_all_centered, colour = description_1d)) +
geom_point(alpha = .5) +
facet_wrap(~description_1d, ncol = 3) +
theme(legend.position = "none",
strip.text = element_text(size = 10)) +
geom_abline(intercept = 0, slope = 1, linetype = "dashed", color = "blue") + # 45-degree line
scale_x_continuous(name = "Standardized Mean ILO Score") +
scale_y_continuous(name = "Standardized AIOE (LM Scores)")
# Save
ggsave(filename = "FigureA2.jpeg",
path = "Figures",
width = 10, height = 6,
units = "in",
dpi = 300)
# PLOT 2: 2-digit employment
data_2d <- read_csv("./bases/ILO/Microdata_ILO/LATAM_FINAL_ILO_data.csv") %>%
select(country, acro, sex, year, isco2d, isco_share_in_empl) %>%
group_by(country) %>%
filter(year == max(year)) %>%
ungroup() %>%
drop_na() %>%
filter(isco2d != "Total",
country != "Nicaragua") %>% # 2014 survey in ISCO-88 format
arrange(country, isco2d) %>%
mutate(isco2d = factor(isco2d))
ggplot(data_2d %>% filter(sex != "Total"), aes(x = country, y = fct_rev(isco2d), fill = isco_share_in_empl)) +
geom_tile() +
scale_fill_gradient(low = "white", high = "red", labels = percent_format(accuracy = 1)) +
labs(x = "", y = "", fill = "Employment Share") +
theme(axis.text.x = element_text(angle = 90, size = 8, hjust = 1, vjust = 0.5),
axis.text.y = element_text(size = 9),
legend.position = "top",
legend.title = element_text(size = 9),
plot.background = element_rect(fill = "white", color = NA),
panel.background = element_rect(fill = "white", color = NA),
panel.grid.major = element_blank(),
panel.grid.minor = element_blank()) +
geom_text(aes(label = paste(round(isco_share_in_empl * 100, 1), "%")), size = 2) +
facet_wrap(~sex, ncol = 1)
# Save
ggsave(filename = "FigureA3.jpeg",
path = "Figures",
width = 10, height = 14,
units = "in",
dpi = 300)
ggplot(final_exposures_LAC,
aes(x = reorder_within(country, exposure, potential), y = exposure, fill = fct_rev(potential))) +
geom_col() +
theme(legend.title = element_blank(),
axis.text.x = element_text(angle = 90, vjust = 0.5, hjust = 1),
legend.position = "none") +
labs(x = "",
y = "Percentage of employment") +
facet_wrap(~potential, scales = "free_x", ncol = 3) +
scale_fill_manual(values = c("Augmentation Potential" = "orange",
"Automation Potential" = "red3",
"The Big Unknown" = "skyblue")) +
scale_x_reordered() +
geom_text(aes(label = paste(round(exposure, 0), "%"), y = exposure + 3), size = 2.5, angle = 90) +
ylim(0,31)
# Save
ggsave(filename = "FigureA4.jpeg",
path = "Figures",
width = 10, height = 4,
units = "in",
dpi = 300)
# SEDLAC with imputed computer at work
sedlac_imputed_computer_at_work <- read_excel("./Results/Results_AI_LAC.xlsx", skip = 3, sheet = "AI_SEDLAC_PIAAC_CPU") %>%
select(-1, -4) %>%
rename(code = 1,
potential = 2,
Total = 3) %>%
fill(code, .direction = "down") %>%
mutate(potential = ifelse(potential == "missing", "Other", potential)) %>%
mutate(computer = str_split_fixed(potential, ", ", 2)[,2],
potential = str_split_fixed(potential, ", ", 2)[,1]) %>%
select(code, potential, computer, everything()) %>%
mutate(potential = str_replace_all(potential, c("^Pr\\(" = "", "\\)$" = ""))) %>%
mutate(potential = str_replace_all(potential, "=1", "")) %>%
mutate(computer = str_replace_all(computer, "Computer at work=", "")) %>%
mutate(computer = str_replace_all(computer, "\\)$", "")) %>%
mutate(computer = ifelse(computer == 1, "Computer", "No computer")) %>%
left_join(., wb_codes) %>%
select(country, code, everything()) %>%
filter(potential == "Augmentation",
computer == "Computer") %>%
select(-country_label) %>%
rename(SEDLAC_computer_at_work_imputed = Total) %>%
select(1:5)
# SEDLAC with imputed computer and internet at work
sedlac_imputed_computer_internet_at_work <- read_excel("./Results/Results_AI_LAC.xlsx", skip = 3, sheet = "AI_SEDLAC_PIAAC_CPUINT") %>%
select(-1, -4) %>%
rename(code = 1,
potential = 2,
Total = 3) %>%
fill(code, .direction = "down") %>%
mutate(computer = str_split_fixed(potential, ", ", 2)[,2],
potential = str_split_fixed(potential, ", ", 2)[,1]) %>%
select(code, potential, computer, everything()) %>%
mutate(potential = str_replace_all(potential, c("^Pr\\(" = "", "\\)$" = ""))) %>%
mutate(potential = str_replace_all(potential, "=1", "")) %>%
mutate(potential = ifelse(potential == "Missing", "Other", potential)) %>%
mutate(computer = str_replace_all(computer, "Computer & Internet at work=", "")) %>%
mutate(computer = str_replace_all(computer, "\\)$", "")) %>%
mutate(computer = ifelse(computer == 1, "Computer", "No computer")) %>%
left_join(., wb_codes) %>%
select(country, code, everything()) %>%
filter(potential == "Augmentation",
computer == "Computer") %>%
select(-country_label) %>%
rename(SEDLAC_computer_internet_at_work_imputed = Total) %>%
select(1:5)
# Read final results sheet for PIAAC
sedlac_computer_at_home <- read_excel("./Results/Results_AI_LAC.xlsx", skip = 3, sheet = "AI_COMPUHH") %>%
select(-1, -4) %>%
rename(code = 1,
potential = 2,
Total = 3) %>%
fill(code, .direction = "down") %>%
mutate(potential = ifelse(potential == "missing", "Other", potential)) %>%
mutate(computer = str_split_fixed(potential, ", ", 2)[,2],
potential = str_split_fixed(potential, ", ", 2)[,1]) %>%
select(code, potential, computer, everything()) %>%
mutate(potential = str_replace_all(potential, c("^Pr\\(" = "", "\\)$" = ""))) %>%
mutate(potential = str_replace_all(potential, "=1", "")) %>%
mutate(computer = str_replace_all(computer, "Computer at home=", "")) %>%
mutate(computer = str_replace_all(computer, "\\)$", "")) %>%
mutate(computer = ifelse(computer == 1, "Computer", "No computer")) %>%
#Append sedlac codes
left_join(., wb_codes) %>%
select(country, code, everything()) %>%
filter(potential == "Augmentation",
computer == "Computer") %>%
select(-country_label) %>%
rename(SEDLAC_reported_computer_at_home = Total) %>%
select(1:5)
# Merge the three datasets
compare <- left_join(sedlac_imputed_computer_at_work, sedlac_imputed_computer_internet_at_work) %>%
left_join(.,sedlac_computer_at_home) %>%
pivot_longer(cols = SEDLAC_computer_at_work_imputed: SEDLAC_reported_computer_at_home,
names_to = "source",
values_to = "value") %>%
filter(value != 0) %>%
mutate(country = factor(country))
ggplot(compare,
aes(x = country, y = value, fill = source)) +
geom_col(position = position_dodge(width = 0.8), alpha = 0.9) +
geom_text(aes(label = paste(round(value, 1), "%")),
position = position_dodge(width = 1),
size = 3.5,
angle = 90,
vjust = 0.5,
hjust = -0.2) +
facet_wrap(~potential, ncol = 1) +
theme(legend.title = element_blank(),
axis.text.x = element_text(angle = 45, hjust = 1, size = 11, vjust = 1),
legend.position = "top") +
scale_fill_manual(values = c("SEDLAC_computer_at_work_imputed" = "skyblue3",
"SEDLAC_computer_internet_at_work_imputed" = "brown",
"SEDLAC_reported_computer_at_home" = "yellow3"),
labels = c("SEDLAC_computer_at_work_imputed" = "Computer at work (imputed from PIACC)",
"SEDLAC_computer_internet_at_work_imputed" = "Computer & Internet at work (imputed from PIACC)",
"SEDLAC_reported_computer_at_home" = "Computer at home (reported in SEDLAC)"
)) +
ylim(0,13) +
guides(fill = guide_legend(reverse = F)) +
labs(x = "",
y = "Share of employment (%)")
# Save
ggsave(filename = "FigureA5.jpeg",
path = "Figures",
width = 10, height = 4,
units = "in",
dpi = 300)
# Data from calculations in line 980
total_jobs_aug_no_computer <- jobs_data2 %>%
select(1:5, country_augment_jobs_no_comp_sex) %>%
mutate(plot = ifelse(sex == "Male", -round(country_augment_jobs_no_comp_sex / 1000000, 2), round(country_augment_jobs_no_comp_sex/ 1000000, 2))) %>%
mutate(label_men = ifelse(sex == "Male", plot, NA),
label_women = ifelse(sex == "Female", plot, NA))
ggplot(total_jobs_aug_no_computer,
aes(x = reorder(country, country_augment_jobs_no_comp_sex), y = plot, fill = sex)) +
geom_bar(stat = "identity", position = "identity") +
coord_flip() +
scale_y_continuous(labels = abs, limits = c(-3.7,3.7)) +
theme(legend.position = "bottom",
legend.title = element_blank()) +
labs(x = "", y = "Millions of jobs") +
geom_text(aes(x = reorder(country, country_augment_jobs_no_comp_sex), y = plot,
label = ifelse(!is.na(label_men), paste(abs(label_men), "M"), "")),
size = 3,
hjust = 1.1) +
geom_text(aes(x = reorder(country, country_augment_jobs_no_comp_sex), y = plot,
label = ifelse(!is.na(label_women), paste(abs(label_women), "M"), "")),
size = 3,
hjust = -0.2) +
scale_fill_manual(values = c("#ff7f0e", "#1f77b4"), guide = guide_legend(reverse = TRUE))
# Save
ggsave(filename = "FigureA6.jpeg",
path = "Figures",
width = 10, height = 4,
units = "in",
dpi = 300)
version