Data exploration report

A whiteboard to understand the data, test ideas, and shape our deliverables

Author

Carlos Daboin Contreras

Code

# libraries
library(arrow)
library(readr)
library(dplyr)
library(tidyr)
library(ggplot2)
library(ggrepel)
library(stringr)

# source file with recurrent functions
source("etl_and_viz_functions.R")

# Set colors for plots
Grey<-"#9c938e"     #18.7%

Yellow<-"#fdb517"   #8.9 %
Blue<-"#005476"     #7.9 %
Blue_2<-"#0386b5"
Yellow_2<-"#fcecb3" #1.4 %
Yellow_3<-"#f1c259" #1.4 %
Blue_3<-"#b0cbd2"   #1.2 %
Blue_4<-"#45788c"   #1.2 %

#country colors
country_colors<-c("ARG"=Blue_2,"CHL"=Blue,"URY"=Yellow)

binary_colors<-c(Blue_4,Yellow_3)

sector_vars<-c('accommodation_and_food_services',
 'administrative_and_support_services', 'agriculture_forestry_fishing_and_hunting','arts_entertainment_and_recreation', 'construction', 'educational_services','finance_and_insurance',
 'government' , 'health_care_and_social_assistance', 'information', 'management_of_companies_and_enterprises', 'manufacturing',
 'mining_quarrying_and_oil_and_gas_extraction', 'other_services_except_public_administration',
 'professional_scientific_and_technical_services','real_estate_and_rental_and_leasing', 
 'transportation_and_warehousing', 'utilities','wholesale_trade', 'retail_trade')
work_vars<-c('occupation','onet_job','schedule', 'zones', 'remote','green_job','area')
origin_vars<-c('doc_id','date_posted','country_code','firm','source','rm','city','city_name')
ability_vars<-c('Cognitive Abilities','Sensory Abilities','Physical Abilities','Psychomotor Abilities')
subability_vars<-c('Arm-Hand_Steadiness', 'Auditory_Attention', 'Category_Flexibility',
 'Control_Precision', 'Deductive_Reasoning', 'Depth_Perception', 
 #'Dynamic_Flexibility',
  'Dynamic_Strength', 'Explosive_Strength', 'Extent_Flexibility', 'Far_Vision', 'Finger_Dexterity', 'Flexibility_of_Closure', 'Fluency_of_Ideas', 'Gross_Body_Coordination',
 'Gross_Body_Equilibrium', 'Hearing_Sensitivity','Inductive_Reasoning', 'Information_Ordering',
 'Manual_Dexterity', 'Mathematical_Reasoning','Memorization', 'Multilimb_Coordination',
 'Near_Vision', 'Night_Vision','Number_Facility',
 'Oral_Comprehension', 'Oral_Expression','Originality', 'Perceptual_Speed',
 'Peripheral_Vision',  'Problem_Sensitivity' ,'Rate_Control', 'Reaction_Time',
 'Response_Orientation', 'Selective_Attention', 
 'Sound_Localization', 'Spatial_Orientation','Speech_Clarity','Speech_Recognition',
 'Speed_of_Closure', 'Speed_of_Limb_Movement','Stamina', 'Static_Strength',
 'Time_Sharing', 'Trunk_Strength','Visual_Color_Discrimination',
 'Visualization','Wrist-Finger_Speed','Written_Comprehension', 'Written_Expression')

sectors_focus<-c("Professional Scientific And Technical Services",
"Health Care And Social Assistance",
"Transportation And Warehousing",
"Construction",
"Information",
"Agriculture Forestry Fishing And Hunting",
"Mining Quarrying And Oil And Gas Extraction")

south_cone_df<-rbind(
    read_parquet("raw/arg_new_dict.parquet")|>
select(origin_vars,work_vars,sector_vars,ability_vars,subability_vars),
    read_parquet("raw/chl_new_dict.parquet")|>
select(origin_vars,work_vars,sector_vars,ability_vars,subability_vars),
    read_parquet("raw/ury_new_dict.parquet")|>
select(origin_vars,work_vars,sector_vars,ability_vars,subability_vars)
)
south_cone_df %>%
  summarise_at(sector_vars, ~mean(.,na.rm = T))%>%
  pivot_longer(cols=sector_vars)%>% 
  mutate(total=sum(value))


south_cone_df %>%
  summarise_at(ability_vars, ~mean(.,na.rm = T))%>%
  pivot_longer(cols=ability_vars)%>% 
  mutate(total=sum(value))


south_cone_df %>%
  summarise_at(subability_vars, ~mean(.,na.rm = T))%>%
  pivot_longer(cols=subability_vars)%>% 
  mutate(total=sum(value))


total_sector<-south_cone_df %>%
            select(doc_id,sector_vars)%>%
            pivot_longer(cols=sector_vars)%>% 
            group_by(doc_id)%>%
            summarise(total_sector=sum(value,na.rm = TRUE))%>%
  mutate(total_sector=ifelse(is.nan(total_sector),1,total_sector))
total_ability<-south_cone_df %>%
            select(doc_id,ability_vars)%>%
            pivot_longer(cols=ability_vars)%>% 
            group_by(doc_id)%>%
            summarise(total_ability=sum(value,na.rm = TRUE))%>%
  mutate(total_ability=ifelse(is.nan(total_ability),1,total_ability))
total_subability<-south_cone_df %>%
            select(doc_id,subability_vars)%>%
            pivot_longer(cols=subability_vars)%>% 
            group_by(doc_id)%>%
            summarise(total_subability=sum(value,na.rm = TRUE))%>%
  mutate(total_subability=ifelse(is.nan(total_subability),1,total_subability))



# Modificacion de datos inicial
south_cone_df<-south_cone_df%>% 
  ## Modificacion de variables binarias
  mutate(area_bin=ifelse(area=="Conocimiento",TRUE,FALSE),
         green_job_bin=ifelse(is.na(green_job),FALSE,TRUE)) %>% 
  ## Modificacion de variables categoricas
  mutate(zones_label=case_when(zones==1 ~ '(1) Poca o ninguna preparación',
             zones==2 ~ '(2) Algo de preparación',
             zones==3 ~ '(3) Preparación media',
             zones==4 ~ '(4) Preparación considerable', 
             zones==5 ~ '(5) Mucha o extensa preparación')) %>% 
  ## Variable alternativa de sector: Sector de mas peso o Main Sector
  left_join(south_cone_df %>% 
              select(doc_id,sector_vars) %>% 
              pivot_longer(cols = sector_vars,
                           names_to = "sector",
                           values_to = "wt") %>% 
              # keep max chances sector
              group_by(doc_id) %>% 
              filter(wt==max(wt)) %>% 
              # remove duplicates in case there is a tie. Keep the first coming up
              distinct(doc_id,.keep_all = T) %>% 
              ungroup() %>% 
              mutate(sector=str_replace_all(sector,"_"," "),
                     sector=str_to_title(sector)) %>% 
              rename(main_sector=sector,
                     main_sector_wt=wt), by="doc_id")%>%
  ## Normalizar sectores
  left_join(total_sector, by="doc_id")%>%
  mutate(across(sector_vars,~./total_sector))%>% 
  ## Normalizar habilidades
  left_join(total_ability, by="doc_id")%>%
  mutate(across(ability_vars,~./total_ability))%>% 
  ## Normalizar subabilidades
  left_join(total_subability, by="doc_id")%>%
  mutate(across(subability_vars,~./total_subability))

south_cone_df %>%
  summarise_at(sector_vars, ~mean(.,na.rm = T))%>%
  pivot_longer(cols=sector_vars)%>% 
  mutate(total=sum(value))

south_cone_df %>%
  summarise_at(ability_vars, ~mean(.,na.rm = T))%>%
  pivot_longer(cols=ability_vars)%>% 
  mutate(total=sum(value))

south_cone_df %>%
  summarise_at(subability_vars, ~mean(.,na.rm = T))%>%
  pivot_longer(cols=subability_vars)%>% 
  mutate(total=sum(value))

Overview

This document can be considered a whiteboard or a catalog gathering all the insights and ideas we can think of as reflecting on Labor Demand in the South Cone via Online Job postings. Half the effort falls on understanding the data set nuances, while the other half is focused on collecting insights around the Energy Transition, Remote Work, Knowledge Sectors, Gender Inclusion, The Silver Economy, Immigrant labor assimilation, and Regional Economies.

It’s a work in progress.

Use the table of contents on the right to travel between document sections: TBD stands for To Be Done, while WIP stands for Work in Progress.

I’ve examined occupational, sector, abilities, sub-abilities, job zones, work schedules, regions, cities, and firm distributions of online job vacancies. I’ve also looked at the distribution of binary categories like green jobs, remote jobs, and knowledge jobs.

Overall, I can say that:

The figures presented here are based on a month of data. It goes from September 25 to October 26.
That amounts to 60.000 job postings. Chile accounts for about 58%, Argentina for 38% and Uruguay for 4%. Remember that Chile, Argentina, and Uruguay account for 39%, 54%, and 7% of formal employment in the South Cone, respectively. See Table 1
We compared occupations shares in total employment with their share in online job vacancies. The following major occupational groups are over-represented in the latter: “Sales and Related Occupations”, “Healthcare Practitioners and Technical Occupations”, “Architecture and Engineering Occupations”, “Office and Administrative Support Occupations”, “Protective Service Occupations”, “Computer and Mathematical Occupations” , “Production Occupations”, “Management Occupations”, “Business and Financial Operations Occupations,” in that order. See Table 12.
We obtained similar conclusions at the sector level. The following sectors are over-represented in online job postings: “Real Estate And Rental And Leasing”, “Professional Scientific And Technical Services”, “Finance And Insurance”, Manufacturing, “Retail Trade”, “Educational Services”, “Health Care And Social Assistance”, and “Accommodation And Food Services”. See Table 13. Note that over represented means more common that would be expected by their share of employment.
Regions like Santiago (13000), Buenos Aires (9300), Valparaiso (4900), Concepcion (3774), Rosario (2316), and Gran Temuco (2125) have more online vacancies than Uruguay (2060). See Table 10

Differences between countries:

Argentina’s online job demand aims towards more educated, trained, and technical workers than Chile. This is evident in the job zones, and abilities distributions (see Figure 23 , Figure 12), but also foreseeable from the sector and occupational distributions alone (see Figure 9 , ?@fig-occupation_country). Uruguay mimics that in many ways, but we’re always less sure due to its narrow sample size.
Despite being 1.5x outsized by its’ andean neighbor, Argentina has more vacancies in “Financial Operation Occupations” and a similar number of vacancies in “Architecture and Engineering,” “Computer and Mathematical,” “Educational,” and “Construction and extraction” occupations (see ?@fig-occupation_country). It’s also remarkably close to Chile in the number of vacancies with “Considerable preparation” (Job Zone 4.)
When compared with it’s region neighbor, Chile has an extraordinary high number of openings asking for “Some Preparation” (Job Zone 2.) Indeed, these account for about 46% of all the sampled vacancies, 10 points more than URY and 11 points more than ARG.

Abilities and sub-abilities

“Cognitive” and “Sensory” abilities are the most prevalent in online job postings. “Physical” and “Psychomotor” abilities are almost half as required. See Figure 11.
The most “Cognitive” abilities-intensive sectors are “Professional, Scientific and Technical Services,” “Utilities,” and “Management of Companies and Enterprises.” See Figure 18.
The most “Sensory” sectors are “Wholesale Trade,” “Management of Companies and Enterprises,” and “Educational Services.” These are sectors demanding many “Sensory” subabilities to a high degree. See Figure 18.
The “Construction” sector demands “Cognitive,” “Physical,” “Psychomotor,” and “Sensory” abilities similarly. The demand for abilities in the “Accommodation and Food Services” sector is similarly even. See Figure 18.
Sub-abilities’ intensity in Google Jobs data aligns well with O*NET data on Sub-abilities’ importance by occupation. Figure 70. There are methods to identify occupations exposed to AI by looking at subabilities (See Felten et. Al. 2018).

Work from Home arrangements (WFH):

The fraction of remote/hybrid vacancies detected is too low. Chile’s share of remote vacancies is 1.3%, while New Zealand’s is 10% (see Table 6 and ?@fig-bloom3). However, global surveys say Chileans work from home 0.9 days a week, while New Zealanders do so 1.0 days per week (see ?@fig-bloom3).
In the Argentinean files reviewed, 85% of vacancies with the word “hybrid” on the description were classified as not remote/hybrid (see Table 9). This evidence suggests a high prevalence of Type II errors. This likely happens in Chile and Uruguay files.
Type II errors aside, Santiago and Buenos Aires account for 24% and 20% of all remote vacancies, respectively. Regions like Córdoba, Mendoza, Corrientes, and Metropolitana account for 4%, 4%, 2%, and 4% of remote job postings, respectively. These last regions and Buenos Aires account for a more significant share of remote vacancies than expected by chance (See Table 7).

Energy transition:

Green jobs represent 15% of all online job postings. Argentina has the highest share of green jobs in its’ job postings (17%), while Chile has the lowest (13%). See Table 3.
“Green Increased Demand” accounts for 45% of Green jobs, while “Green Enhanced Skills” and “Green New & Emerging” account for 40% and 14%, respectively. See Table 4.
Argentina is intensive in “Green Increased Demand” (48%), while Chile is relatively intensive in “Green Enhanced Skills” (43.4%). See Table 4
Santiago and Buenos Aires account for 18% and 18% of all green online vacancies, while regions like Rosario, Mendoza, and Antofagasta account for 6%, 4%, and 4%, respectively. Buenos Aires, Rosario, Mendoza, and Antofagasta account for a more significant share of Green vacancies than expected by chance. See Table 5.

Job Zones:

41% of job vacancies in the South Cone require “(2) Some preparation”, while 25% require “(4) Considerable preparation”, and another 25% require “(3) Middle preparation”. Only 8% of the demand falls on the “(1) no preparation” and “(5) a lot of preparation” extremes. See Figure 19.
46% of job postings in Chile demand “(2) Some preparation”. It’s the country most concentrated in that area of demand by a considerable margin. See Figure 23.
30.5% of job postings in Argentina demand “(4) Some preparation”. It’s the country most concentrated in that area of demand by a moderate margin. See Figure 23.
We show sectors’ composition of online vacancies by Job Zone. One of the surprises we found is that Transportation and Warehousing asks for [(4) Preparación Considerable] en around 15% of online vacancies, at least in Argentina. See Figure 24.
The breakdown of job zones aligns with our understanding of training and preparation requirements by sector. We’re able to spot minor variations within countries.

Firms:

Chile and Argentina have around 800 firms (see Table 11 ). The 100 most prominent firms in both countries account for around 40% of all job vacancies. We present a plot to help policymakers spot the hottest demand firms across different periods (see Figure 51).

Ideas moving forward:

Automating this report: We could work on automating updates to this or other similar on a monthly basis by establishing an API connection.
Creating a dashboard with dates and country filters: We could work on a dashboard that allows the user reproduce all these plots and statistics and includes date/country filters. The tool would be connected to the API.
Follow developments on Argentina Labor markets: Data could be used to track the effects of the incoming labor markets de-regulation in Argentina.
Discuss the effect of AI on labor demand: Acemoglu, Autor, et al 2022 have used online job vacancies and AI exposure measures to discuss heterogeneous effects of AI on labor demand. Our data is very similar, only short.
Measure labor market tightness: Geographical granularity offers valuable insights for policy makers. This could allow researchers create estimates of labor market tightness in large regions by calculating the ratio of vacancies to unemployment. On the other end, firm granularity could allow policy makers to reach out to firms leading job demand.
Assist green transition efforts: We could explore other dimensions of “green labor demand.” What sectors and firms are behind it? What abilities are they more reliant on? How does it change following COP28 resolutions?
Evidence found here suggest there is a possibility of reducing type II error in remote work classification at a low cost. First step would consist on building a simple NLP model for WFH detection and compare it with “human-in-the-loop” classifications of a sub-sample of postings to measure improvements. Algorithms could grow more complex if needed. If that’s the case I suggest using Taska, Bloom, et. al. 2023) work as guidance.

The IDB online job postings database

Variables

We identify four groups of variables:

Sector weights: will tell us the sector distribution of firms searching for workers. Each column is named after one of the 20 NAICS 2-digits sectors.
Names in the database
- accommodation_and_food_services, administrative_and_support_services, agriculture_forestry_fishing_and_hunting, arts_entertainment_and_recreation, construction, educational_services, finance_and_insurance, government, health_care_and_social_assistance, information, management_of_companies_and_enterprises, manufacturing, mining_quarrying_and_oil_and_gas_extraction, other_services_except_public_administration, professional_scientific_and_technical_services, real_estate_and_rental_and_leasing, transportation_and_warehousing, utilities, wholesale_trade, retail_trade
Abilities and sub abilities weights: works similar to the sector ones. Each column shows a score associated with that (sub)ability. The raw score apparently lacks any interpretation, but it can be used to either rank items from most to least important, or weight each observations to calculate the aggregate importance of each item. (Sub)Abilities are defined in the ONET Content Model Ability.
Names in the database
- Abilities: Cognitive Abilities, Sensory Abilities, Physical Abilities, Psychomotor Abilities
- Sunbilities: Arm-Hand_Steadiness, Auditory_Attention, Category_Flexibility, Control_Precision, Deductive_Reasoning, Depth_Perception, Dynamic_Strength, Explosive_Strength, Extent_Flexibility, Far_Vision, Finger_Dexterity, Flexibility_of_Closure, Fluency_of_Ideas, Gross_Body_Coordination, Gross_Body_Equilibrium, Hearing_Sensitivity, Inductive_Reasoning, Information_Ordering, Manual_Dexterity, Mathematical_Reasoning, Memorization, Multilimb_Coordination, Near_Vision, Night_Vision, Number_Facility, Oral_Comprehension, Oral_Expression, Originality, Perceptual_Speed, Peripheral_Vision, Problem_Sensitivity, Rate_Control, Reaction_Time, Response_Orientation, Selective_Attention, Sound_Localization, Spatial_Orientation, Speech_Clarity, Speech_Recognition, Speed_of_Closure, Speed_of_Limb_Movement, Stamina, Static_Strength, Time_Sharing, Trunk_Strength, Visual_Color_Discrimination, Visualization, Wrist-Finger_Speed, Written_Comprehension, Written_Expression
Work related variables: Including the occupation title, the work schedule, training and education requirements (zones), whether remote or not, whether green or not, and whether knowledge activity or not.
Names in the database
- occupation: Contains occupation titles according to the ONETSOC19 system. The actual codes aren’t available in the table, but titles can be joined to official crosswalks to recover them. Its’ spanish version can be found in onet_job. Problem to report: 2.5% of occupation records are empty, 0% of onet_job are empty.
- remote: Binary indicator on whether a possition offers any kind of work from home (WFH) arrangement. Namely remote or hybrid work.
- area: Binary indicator on whether a the employer is likely be a knowledge-intensive services provider, as defined by the Ley de Economía del Conocimiento Argentina: **software; nanotecnología; biotecnología; las industrias audiovisual, aeroespacial y satelital; la ingeniería para la industria nuclear y la robótica, entre otras actividades.*
- green_job: Variable showing the ONET green occupation category a vacancy falls into (Green New & Emerging, Green Enhanced Skills, and Green Increased Demand.)
- job_zone: Variable showing the ONET category of preparation requirements an vacancy falls into. Here, preparation stands for a mix of education, experience, and training.
- schedule: Variable showing the contractual arrangement offered in the vacancy. It can take “Intership”, “Contractor”, “Part-time”, “Full-time”, and “other” as categories.
Origin variables: Including the id of the vacancy, the date, the country code, firm name, platform, and region.
Names in the database
- country_code: The name of the country.
- date_posted: The date the vacancy was posted in yyyy-mm-dd format.
- firm: The name of the firm publishing the post.
- rm: Region Metropolitana. It has 24 unique values for Argentina (equal to Provincia in when the count of vacancies is small, otherwise accounting for important metropolitan areas). Similarly, “rm” has 18 unique values for Chile (two more of what’s supposed to be if the intention is showing Regiones), and 6 unique values for Uruguay (way below the 18 Departamentos).
- city_name: City. Good providing more geographic granularity. A high-level analysis shows that cities like Vicente Lopez and Quilmes have a combined number of vacancies similar to that of Santa Fe and Rosario combined, Córdoba Capital, and Mendoza Capital.
- job_name: The name the employer gave to the vacancy in the posting.
- descrip: The raw text description of the job.

There is an statistical summary of these and other relevant variables in table Table 2.

Database statistics

Here we present the dimension and summary statistics of our dataset:

[1] “There are 60689 postings in our data. Job postings count by country:”

Code

latest_country_PEA<-read_csv("data/latest_country_pea.csv") %>% 
  select(country_code=ref_area, PEA=obs_value) %>% 
  mutate(PEA=PEA,
         PEA_share=PEA/sum(PEA))

country_code_df %>%
  left_join(latest_country_PEA) %>% 
  gt() %>% 
  tab_header(title = "Overall Statistics",
             subtitle = paste0("Between ", min(south_cone_df$date_posted), " and ",max(south_cone_df$date_posted), ". Population data comes from ILOSTAT")
  ) %>% 
  fmt_percent(ends_with("share")) %>% 
  fmt_integer(columns = vars(country_vacancies,PEA)) %>% 
  cols_label(
    country_vacancies = "Online vacancies",
    country_share = "Online vacancies (%)",
    PEA = "Working Age Pop (Thousands)",
    PEA_share = "Working Age Pop (%)"
  )

Table 1:
Summary
country_code	Online vacancies	Online vacancies (%)	Working Age Pop (Thousands)	Working Age Pop (%)
Overall Statistics
Between 2023-09-25 and 2023-10-29. Population data comes from ILOSTAT
CHL	35,194	57.99%	15,706	38.69%
ARG	23,435	38.61%	22,049	54.32%
URY	2,060	3.39%	2,837	6.99%

Code

south_cone_df %>% 
  group_by(country_code) %>% 
  mutate(date_posted=lubridate::as_date(date_posted)) %>% 
  select(date_posted,'firm','source','rm','city','city_name',work_vars,area_bin,green_job_bin) %>% 
  skimr::skim()

Table 2: Detailed summary

(a) Data summary
Name	Piped data
Number of rows	60689
Number of columns	16
_______________________
Column type frequency:
character	10
Date	1
logical	3
numeric	1
________________________
Group variables	country_code

Variable type: character

skim_variable	country_code	n_missing	complete_rate	min	max	empty	n_unique
firm	ARG	0	1.00	0	93	11	6037
firm	CHL	0	1.00	2	212	0	9596
firm	URY	0	1.00	3	123	0	798
source	ARG	0	1.00	15	64	0	200
source	CHL	0	1.00	15	120	0	199
source	URY	0	1.00	16	53	0	48
rm	ARG	0	1.00	5	35	0	24
rm	CHL	0	1.00	5	16	0	18
rm	URY	0	1.00	3	13	0	6
city	ARG	0	1.00	0	89	6	806
city	CHL	0	1.00	4	25	0	240
city	URY	0	1.00	4	25	0	70
city_name	ARG	0	1.00	4	35	0	216
city_name	CHL	0	1.00	4	20	0	201
city_name	URY	0	1.00	4	22	0	48
occupation	ARG	0	1.00	0	97	366	619
occupation	CHL	0	1.00	0	94	1025	680
occupation	URY	0	1.00	0	94	32	284
onet_job	ARG	0	1.00	7	113	0	637
onet_job	CHL	0	1.00	7	114	0	703
onet_job	URY	0	1.00	7	107	0	292
schedule	ARG	0	1.00	5	10	0	5
schedule	CHL	0	1.00	5	10	0	5
schedule	URY	0	1.00	5	10	0	5
green_job	ARG	19330	0.18	20	22	0	3
green_job	CHL	30639	0.13	20	22	0	3
green_job	URY	1738	0.16	20	22	0	3
area	ARG	0	1.00	12	15	0	2
area	CHL	0	1.00	12	15	0	2
area	URY	0	1.00	12	15	0	2

Variable type: Date

skim_variable	country_code	complete_rate	min	max	median	n_unique
date_posted	ARG	1	2023-09-29	2023-10-29	2023-10-26	31
date_posted	CHL	1	2023-09-28	2023-10-28	2023-10-26	31
date_posted	URY	1	2023-09-25	2023-10-24	2023-10-22	30

Variable type: logical

skim_variable	country_code	complete_rate	mean	count
remote	ARG	1	0.03	FAL: 22837, TRU: 598
remote	CHL	1	0.01	FAL: 34729, TRU: 465
remote	URY	1	0.02	FAL: 2023, TRU: 37
area_bin	ARG	1	0.38	FAL: 14452, TRU: 8983
area_bin	CHL	1	0.31	FAL: 24248, TRU: 10946
area_bin	URY	1	0.36	FAL: 1310, TRU: 750
green_job_bin	ARG	1	0.18	FAL: 19330, TRU: 4105
green_job_bin	CHL	1	0.13	FAL: 30639, TRU: 4555
green_job_bin	URY	1	0.16	FAL: 1738, TRU: 322

Variable type: numeric

skim_variable	country_code	complete_rate	mean	sd	p0	p25	p50	p75	p100	hist
zones	ARG	1	3.04	0.99	1	2	3	4	5	▁▇▆▇▂
zones	CHL	1	2.83	1.00	1	2	3	4	5	▁▇▅▃▁
zones	URY	1	2.97	0.98	1	2	3	4	5	▁▇▆▆▂

Characterizing labor market demand (Work in progress)

This section consists in highlighting vacancy distributions of each country across different indicators (occupational groups, sectors, skills, sub-skils, job zones, work schedule, green jobs, remote jobs, knowledge jobs, regions, and firms). Each indicator will have its own section.

Unless otherwise specified, each section will start with the overall distribution of vacancies across that variable, followed by the same distribution within each country. Each section is then finalized a relative concentration analysis, showing where is each country more specialized vis a vis its’ peers.

In other words, analysis will answer the following questions:

Whats more common?
What’s the most common in each country?
Which country has more of each group?
Which country has a higher than average concentration on each group?

Across occupations

We prepare the South Cone data for aggregation at the Major SOC group (2018) level. We first get the ONET SOC 19 code of each occupational title in the data, and then use ONET crosswalk to SOC18.

Summary

“Sales and Related”, “Office and Administrative Support”, “Production”, “Business and Financial”, “Management”, “Architecture and Engineering”, and “Computer and Mathematical” occupations are the most prevalent occupational groups across all countries. Together they account for about 70% of all vacancies.
Argentina’s demand is remarkably strong in “Business and Financial Operations”, “Arts, Design, Entertainment, and Media” and “Installation, Maintenance, and Repair” occupations, as well as remarkably in “Transportation and Material Moving”,“Protective services” and “Community and Social Service” occupations. It’s above average in “Computer and Mathematical” and “Architecture and Engineering Occupations”.
Chile accounts for almost 60% of the sample, so it swings much less from the average than Argentina and Uruguay. Chile’s demand is remarkably strong in “Transportation and Material Moving Occupations”, “Building and Ground Cleaning and Maintenance”, and “Community and Social service” occupations. It’s remarkably in “Business and Financial Operations Occupations” and below average in “Computer and Mathematical occupations”.
Uruguay’s demand is remarkably strong in “Computer and Mathematical Occupations”, “Educational Instruction and Library Occupations”, “Personal Care and Service”, “Construction and Extraction”, and “Legal” occupations. It’s remarkably in “Management”, “Healthcare Support”, and “Healthcare Practitioners and Technical” Occupations, which was unexpected.

What’s more common?

We calculate the frequency of each Major SOC group in the South Cone as a whole.

Code

## frecuency table of occupations
major_group_df<-south_cone_df %>%
    group_by(major_group,major_group_title)%>%
    summarise(group_vacancies=n())%>%
    ungroup()%>%
    mutate(group_share=group_vacancies/sum(group_vacancies))%>%
    arrange(desc(group_vacancies)) 

## frequency table of occupations, by country
major_group_by_cty<-south_cone_df %>%
  group_by(country_code,major_group,major_group_title)%>%
  summarise(count=n() ) %>%
  ungroup() %>% 
  left_join(major_group_df %>%
              select(major_group,group_vacancies))  %>% 
  left_join(country_code_df %>%
              select(country_code,country_vacancies))  %>%
  mutate(group_in_country_share=count/country_vacancies,
         country_in_group_share=count/group_vacancies) %>% 
  ungroup()

Code

cat_var_chart(data=mutate(south_cone_df,
                          major_group_title=str_remove_all(major_group_title,"Occupations")),
              category = "major_group_title")+
    geom_text(aes(label=paste(round(group_share,2)*100,"%")), 
            size=3,nudge_y = 0.02)+
  theme(axis.text.x = element_text(angle = 65, hjust=1))+
  labs(title = "Share of vacancies postings by SOC major group",
       x=NULL)

What’s more common in each country?

We calculate the frequency of each Major SOC group in each country. For each row, we calculate the share of that group in the country and the share of the country in the group.

Code

country_major_soc_df<-country_var_count(
  data=mutate(south_cone_df,major_group_title=str_remove_all(major_group_title," Occupations")),
  country = "country_code",
  category = "major_group_title")

country_var_chart(agg_data=country_major_soc_df,
                  country = "country_code",
                  category = "major_group_title")[[1]]+
  scale_fill_manual(values=country_colors)+
  theme(axis.text.x = element_text(angle = 65, hjust=1))+
  labs(title = "Share of vacancies postings by SOC major group",
       fill="Country",
       shape=NULL,
       x=NULL,
       y=NULL)

Figure 2: Major SOC distribution, by country

What’s the country most specialized in each group?

Code

# 10 largest occupational groups
top_10_soc<-var_count(data=mutate(south_cone_df,major_group_title=str_remove_all(major_group_title," Occupations")),
                      category="major_group_title") %>% 
  top_n(10,group_share) %>% 
  pull(major_group_title)

country_var_chart(agg_data = filter(country_major_soc_df, major_group_title%in% top_10_soc),
                  category = "major_group_title",
                  country="country_code")[[2]] +
  scale_fill_manual(values=country_colors)+
  labs(subtitle="Ten most prevalent occupational groups in vacancies data",
       fill=NULL,
       x=NULL,
       y=NULL)


# 12 smallest occupational groups
bottom_12_soc<-var_count(data=mutate(south_cone_df,major_group_title=str_remove_all(major_group_title," Occupations")),
                      category="major_group_title")  %>% 
  top_n(12,-group_share) %>% 
  pull(major_group_title)

country_var_chart(agg_data = filter(country_major_soc_df, major_group_title%in% bottom_12_soc),
                  category = "major_group_title",
                  country="country_code")[[2]] +
  scale_fill_manual(values=country_colors)+
  labs(subtitle="Twelve less prevalent occupational groups in vacancies data",
       fill=NULL,
       x=NULL,
       y=NULL)

Figure 3: Major SOC distribution by country, side by side

Figure 4: Major SOC distribution by country, side by side

Which country has the largest number of vacancies in each group?

Code

# 10 largest
country_var_chart(agg_data = filter(country_major_soc_df, major_group_title%in% top_10_soc),
                  category = "major_group_title",
                  country="country_code")[[3]] +
  scale_fill_manual(values=country_colors)+
  labs(subtitle="Ten most prevalent occupational groups in vacancies data",
       fill=NULL,
       x=NULL,
       y=NULL)


# 12 smallest
country_var_chart(agg_data = filter(country_major_soc_df, major_group_title%in% bottom_12_soc),
                  category = "major_group_title",
                  country="country_code")[[3]] +
  scale_fill_manual(values=country_colors)+
  labs(subtitle="Twelve less prevalent occupational groups in vacancies data",
       fill=NULL,
       x=NULL,
       y=NULL)

Figure 5: Major SOC distribution, by country

Figure 6: Major SOC distribution, by country

Table View

Code

country_var_table(data_agg=country_major_soc_df,category="major_group_title", country="country_code",interactive=FALSE)

?(caption)

	major_group_title	Vacancies	% of Vacancies	% of ARG	% of CHL	% of URY
	Sales and Related	9448	15.94%	16.36%	15.63%	16.57%
	Office and Administrative Support	8988	15.17%	14.77%	15.36%	16.42%
	Production	5213	8.80%	8.11%	9.39%	6.66%
	Business and Financial Operations	4702	7.94%	11.00%	5.82%	8.73%
	Transportation and Material Moving	4298	7.25%	4.14%	9.46%	5.52%
	Management	3902	6.59%	6.70%	6.64%	4.34%
	Architecture and Engineering	3088	5.21%	6.23%	4.55%	4.64%
	Computer and Mathematical	2498	4.22%	5.06%	3.42%	7.89%
	Installation, Maintenance, and Repair	2227	3.76%	4.99%	2.91%	4.09%
	Healthcare Practitioners and Technical	2222	3.75%	3.24%	4.23%	1.48%
	Educational Instruction and Library	1863	3.14%	3.52%	2.75%	5.47%
	Protective Service	1810	3.05%	2.10%	3.74%	2.42%
	Food Preparation and Serving Related	1601	2.70%	2.14%	3.08%	2.76%
	Construction and Extraction	1513	2.55%	2.99%	2.20%	3.55%
	Life, Physical, and Social Science	1380	2.33%	2.22%	2.44%	1.73%
	Building and Grounds Cleaning and Maintenance	1297	2.19%	1.16%	2.91%	1.73%
	Arts, Design, Entertainment, Sports, and Media	928	1.57%	2.15%	1.14%	2.02%
	Healthcare Support	834	1.41%	1.02%	1.71%	0.69%
	Personal Care and Service	810	1.37%	1.35%	1.33%	2.27%
	Community and Social Service	296	0.50%	0.17%	0.73%	0.30%
	Farming, Fishing, and Forestry	209	0.35%	0.30%	0.39%	0.35%
	Legal	128	0.22%	0.27%	0.17%	0.39%
sum	—	59,255.00	1.00	1.00	1.00	1.00

Major SOC distribution, by country

Across Sectors (Ramas)

Code

rama_df<-south_cone_df %>% 
  select(country_code,sector_vars) %>% 
  group_by(country_code) %>% 
  summarise(across(sector_vars,~sum(.))) %>% 
  pivot_longer(cols = sector_vars,
               names_to = "sector",
               values_to = "weigths_sum") %>% 
  mutate(sector=str_replace_all(sector,"_"," "),
         sector=str_to_title(sector)) 

rama_df<-rama_df %>% 
  group_by(country_code) %>% 
  mutate(group_in_country_share=weigths_sum/sum(weigths_sum)) %>%
  group_by(sector) %>% 
  mutate(country_in_group_share=weigths_sum/sum(weigths_sum)) %>% 
  ungroup() %>% 
  left_join(
    rama_df %>%
      mutate(total=sum(weigths_sum)) %>% 
      group_by(sector) %>% 
      summarise(group_vacancies=sum(weigths_sum),
                group_share=sum(weigths_sum)/mean(total))
  )

The sectors demanding more jobs online are “Retail Trade”, “Manufacturing”, “Professional scientific and Technical Services”, “Educational Services”, “Government”,and “Finance and Insurance”. They accout for about 83% of all postings.
Argentina’s demand is strong in “Finance and Insurance”, “Professional Scientific and technical services”, and “Construction”. It’s particularly weak in “Retail”, “Accommodation and food services”, “Administrative and support services”, and “transportation and warehousing”
Chile’s demand is strong in “Retail trade”, “Manufacturing”, “Health Care and Social Assistance”, “Administrative and Support Services”, “Retail trade”, “Transportation and Warehousing”, and “Wholesale Trade”. It’s particularly weak in “Professional Scientific and Technical Services”, “Finance and Insurance”, “Construction”, “Other Services Except Public Administration”, and “Information.”
Uruguay is super strong in “Professional Scientific and Technical Services”, “Educational Services”,“Construction”, and “Information”. It’s strong in “Construction”. It’s particularly weak in “Health care and social assistance” and “Manufacturing”.

What’s more common?

Code

rama_df %>% 
  distinct(sector,group_share) %>% 
  ggplot(aes(x=reorder(sector,
                       -group_share),
             y=group_share))+
  geom_col(fill="gray50")+
  geom_text(aes(label=paste(round(group_share,2)*100,"%")), 
            size=3,nudge_y = 0.02)+
  theme(axis.text.x = element_text(angle = 65, hjust=1))+
  labs(title = "Share of vacancies postings by Sector",
       x=NULL,
       y=NULL)

Figure 7: Sector distribution, with number of vacancies wegithed by the weight of the sector

What’s more common in each country?

Code

country_var_chart(agg_data=rename(rama_df,count=weigths_sum),
                  country = "country_code",
                  category = "sector")[[1]]+
  scale_fill_manual(values=country_colors)+
  theme(axis.text.x = element_text(angle = 65, hjust=1))+
  labs(title = "Share of vacancies postings by Sector",
       fill="Country",
       shape=NULL,
       x=NULL,
       y=NULL)

What are countries specialized in?

Code

top_10_naics<-rama_df %>%
  top_n(30,group_vacancies) %>% 
  pull(sector)

# 10 largest
country_var_chart(agg_data=rename(filter(rama_df,sector %in% top_10_naics),count=weigths_sum),
                  country = "country_code",
                  category = "sector")[[2]]+
  scale_fill_manual(values=country_colors)+
  labs(subtitle="Ten most prevalent sectors in vacancies data",
       fill=NULL,
       x=NULL,
       y=NULL)

Figure 9: Sector distribution, by country

Code

# 10 smallest
bottom_10_naics<-rama_df %>%
  top_n(30,-group_vacancies) %>% 
  pull(sector)
country_var_chart(agg_data=rename(filter(rama_df,sector %in% bottom_10_naics),count=weigths_sum),
                  country = "country_code",
                  category = "sector")[[2]]+
  scale_fill_manual(values=country_colors)+
  labs(subtitle="Ten least prevalent sectors in vacancies data",
       fill=NULL,
       x=NULL,
       y=NULL)

Code

# Summary
rama_df %>% 
  ggplot(aes(x=reorder(str_to_title(sector),-group_share),y=country_code, 
             fill=group_in_country_share/group_share))+
  geom_tile(color="black")+
  theme(axis.text.x = element_text(angle = 65, hjust=1, size=13))+
  scale_fill_fermenter(palette = "RdBu",direction = 1, 
                       breaks= c(0.35,0.7,1,1.3),
                       labels = function(x) paste0( x, 'x'))+
  labs(title = "Summary: Demand hotspots by country and sector",
       subtitle = "sorted from more to less total vacancies",
       fill="Location\nquotient",
       y=NULL,
       x=NULL)

Which country accounts for the largest number of vacancies?

Code

# 10 largest
country_var_chart(agg_data=rename(filter(rama_df,sector %in% top_10_naics),count=weigths_sum),
                  country = "country_code",
                  category = "sector")[[3]]+
  scale_fill_manual(values=country_colors)+
  labs(subtitle="Ten most prevalent sectors in vacancies data",
       fill=NULL,
       x=NULL,
       y=NULL)

Code

# 10 smallest
country_var_chart(agg_data=rename(filter(rama_df,sector %in% bottom_10_naics),count=weigths_sum),
                  country = "country_code",
                  category = "sector")[[3]]+
  scale_fill_manual(values=country_colors)+
  labs(subtitle="Ten least prevalent sectors in vacancies data",
       fill=NULL,
       x=NULL,
       y=NULL)

Table View

Code

country_var_table(data_agg=rename(rama_df,count=weigths_sum),
                  category="sector",
                  country="country_code",interactive=FALSE)

?(caption)

	sector	Vacancies	% of Vacancies	% of ARG	% of CHL	% of URY
	Retail Trade	9648.4056	16.28%	14.80%	17.31%	15.76%
	Professional Scientific And Technical Services	8593.0141	14.50%	16.80%	12.73%	18.24%
	Manufacturing	8204.4577	13.85%	14.25%	13.80%	9.96%
	Health Care And Social Assistance	4556.7503	7.69%	6.56%	8.56%	5.80%
	Educational Services	4339.2046	7.32%	7.93%	6.78%	9.54%
	Government	4052.9903	6.84%	7.62%	6.27%	7.52%
	Finance And Insurance	3833.9949	6.47%	7.96%	5.41%	7.37%
	Administrative And Support Services	3697.3056	6.24%	4.67%	7.39%	4.68%
	Accommodation And Food Services	2380.7913	4.02%	3.14%	4.56%	4.81%
	Construction	1827.4932	3.08%	3.48%	2.77%	3.80%
	Transportation And Warehousing	1656.7556	2.80%	2.14%	3.28%	2.15%
	Other Services Except Public Administration	1631.0383	2.75%	3.21%	2.44%	2.83%
	Wholesale Trade	1517.1876	2.56%	2.33%	2.75%	1.93%
	Information	1140.0591	1.92%	2.09%	1.76%	2.73%
	Real Estate And Rental And Leasing	841.0378	1.42%	0.92%	1.77%	1.23%
	Management Of Companies And Enterprises	394.4781	0.67%	0.77%	0.60%	0.55%
	Arts Entertainment And Recreation	321.2924	0.54%	0.57%	0.54%	0.33%
	Agriculture Forestry Fishing And Hunting	219.8413	0.37%	0.35%	0.39%	0.37%
	Utilities	218.2831	0.37%	0.18%	0.50%	0.32%
	Mining Quarrying And Oil And Gas Extraction	180.6191	0.30%	0.22%	0.38%	0.08%
sum	—	59,255.00	1.00	1.00	1.00	1.00

Abilities

The most in-demand abilities online are Cognitive (33%) and Sensory (33%).Demand of Physical and Psychomotor activities is almost half of that.

Code

abilities=c('Cognitive Abilities','Sensory Abilities','Psychomotor Abilities','Physical Abilities')


abilities_df<-country_var_count_groups(data=south_cone_df,
                                       country = NULL,
                         variable_names=abilities, 
                         name_of_categories="abilities") 

country_abilities_df<-country_var_count_groups(data=south_cone_df,
                                       country = 'country_code',
                         variable_names=abilities, 
                         name_of_categories="abilities")

Code

abilities_df %>% 
  ggplot(aes(x=reorder(abilities,
                       -group_share),
             y=group_share))+
  geom_col(fill="gray50")+
  geom_text(aes(label=paste(round(group_share,2)*100,"%")), 
            size=3,nudge_y = 0.02)+
  theme(axis.text.x = element_text(angle = 65, hjust=1))+
  labs(subtitle = "Share of vacancies requiring each Ability to some extent",
       x=NULL,
       y=NULL)

Charts

Code

country_var_chart(agg_data=country_abilities_df,
                  country = "country_code",
                  category = "abilities")[[1]]+
  scale_fill_manual(values=country_colors)+
  labs(title = "Share of vacancies requiring each Ability to some extent",
       fill="Country",
       shape=NULL,
       x=NULL,
       y=NULL)

Code

country_var_chart(agg_data=rename(country_abilities_df),
                  country = "country_code",
                  category = "abilities")[[2]]+
  scale_fill_manual(values=country_colors)+
  labs(x=NULL,
       y=NULL,
       fill=NULL)

Code

country_var_chart(agg_data=country_abilities_df,
                  country = "country_code",
                  category = "abilities")[[3]]+
  scale_fill_manual(values=country_colors)+
  labs(title = "Number of vacancies requiring an Ability, by Country",
       fill="Country",
       shape=NULL,
       x=NULL,
       y=NULL)

Table View

Code

country_var_table(country_abilities_df,
                  category  = "abilities",country = "country_code",
                  interactive = FALSE)

?(caption)

	abilities	Vacancies	% of Vacancies	% of ARG	% of CHL	% of URY
	Cognitive Abilities	58971	99.52%	99.61%	99.46%	99.61%
	Sensory Abilities	58971	99.52%	99.61%	99.46%	99.61%
	Psychomotor Abilities	43369	73.19%	69.36%	75.73%	74.06%
	Physical Abilities	31618	53.36%	46.18%	58.52%	48.03%
sum	—	192,929.00	3.26	3.15	3.33	3.21

Sub-abilities

There are 51 Sub Skills but Dynamic_Flexibility is missing from Uruguay. This report shows the remaining 50 until the original data is fixed.
Oral Comprehension and Oral Expression are the most in-demand sub abilities, followed by Near Vision, Written Comprehension, and Deductive Reasoning. Argentina and Uruguay demand this skills with a higher intensity than Chile.
Number facility and Mathematical Reasoning rank 21th and 22th in the ranking of most demanded sub abilities. Argentina and Uruguay demand this skills with a higher intensity than Chile.
We compared the prevalence of sub-abilites job postings contrasted it what O*NET experts think are the typical importance and mastery levels of each skill within an occupation. We got a strong positive correlation, which suggests our text mining algorithms were able to capture some of the knowledge occupational experts have.

Code

subabilities = c('Arm-Hand_Steadiness', 'Auditory_Attention', 'Category_Flexibility',
                'Control_Precision', 'Deductive_Reasoning', 'Depth_Perception',
                # 'Dynamic_Flexibility',
                'Dynamic_Strength','Explosive_Strength',
                'Extent_Flexibility', 'Far_Vision', 'Finger_Dexterity',
                'Flexibility_of_Closure','Fluency_of_Ideas', 'Gross_Body_Coordination',
                'Gross_Body_Equilibrium', 'Hearing_Sensitivity','Inductive_Reasoning',
                'Information_Ordering', 'Manual_Dexterity', 'Mathematical_Reasoning',
                'Memorization', 'Multilimb_Coordination', 'Near_Vision',
                'Night_Vision', 'Number_Facility','Oral_Comprehension',
                'Oral_Expression', 'Originality', 'Perceptual_Speed',
                'Peripheral_Vision','Problem_Sensitivity','Rate_Control',
                'Reaction_Time', 'Response_Orientation', 'Selective_Attention',
                'Sound_Localization', 'Spatial_Orientation', 'Speech_Clarity',
                'Speech_Recognition', 'Speed_of_Closure','Speed_of_Limb_Movement',
                'Stamina', 'Static_Strength', 'Time_Sharing', 'Trunk_Strength', 
                'Visual_Color_Discrimination', 'Visualization', 'Wrist-Finger_Speed',
                'Written_Comprehension', 'Written_Expression')

abilities_taxonomy<-read_delim("raw/ONET_28_0/Abilities.txt",delim = "\t" ) %>%
  janitor::clean_names() %>%
  distinct(subabilities_id=element_id,subabilities=element_name) %>% 
  mutate(ability_id=substr(subabilities_id,1,5)) %>% 
  mutate(ability=case_when(ability_id=="1.A.1"~"Cognitive Abilities",
                           ability_id=="1.A.2"~"Psychomotor Abilities",
                           ability_id=="1.A.3"~"Physical Abilities",
                           ability_id=="1.A.4"~"Sensory Abilities"))

subabilities_df<-country_var_count_groups(data=south_cone_df,
                                       country = NULL,
                         variable_names=subabilities, 
                         name_of_categories="subabilities") %>% 
   mutate(subabilities=str_replace_all(subabilities,"_"," "),
         subabilities=str_to_title(subabilities),
         subabilities=str_replace_all(subabilities," Of "," of ")) %>% 
  left_join(abilities_taxonomy)

country_subabilities_df<-country_var_count_groups(data=south_cone_df,
                                       country = 'country_code',
                         variable_names=subabilities, 
                         name_of_categories="subabilities") %>% 
   mutate(subabilities=str_replace_all(subabilities,"_"," "),
         subabilities=str_to_title(subabilities),
         subabilities=str_replace_all(subabilities," Of "," of ")) %>% 
  left_join(abilities_taxonomy)

Charts

Code

subabilities_df %>% 
  ggplot(aes(x=reorder(subabilities,
                       -group_share),
             y=group_share))+
  geom_col(fill="gray50")+
  geom_text(aes(label=paste(round(group_share,2)*100,"%")), 
            size=3,nudge_y = 0.02)+
  facet_wrap(vars(ability),scales = "free_x")+
  theme(axis.text.x = element_text(angle = 65, hjust=1))+
  labs(title = "Share of vacancies requiring each Subability to some extent",
       x=NULL,
       y=NULL)

Code

purrr::map(c("ARG","CHL","URY"),
       skills_barchart,
       data_agg=country_subabilities_df)

[[1]]


[[2]]


[[3]]

Code

country_subabilities_df %>% 
  ggplot(aes(x=reorder(subabilities,-group_share ),
             y=country_code, fill=group_in_country_share /group_share ))+
  geom_tile(color="black")+
  theme(axis.text.x = element_text(angle = 65, hjust=1))+
  scale_fill_fermenter(palette = "RdBu",direction = 1, 
                       breaks= c(0.35,0.7,1,1.3),
                       labels = function(x) paste0( x, 'x'))+
   facet_wrap(vars(ability),scales = "free_x")+
  labs(title = "Demand hotspots by country and subabilities",
       subtitle = "sorted from more to less total vacancies",
       fill="Location\nquotient",
       y=NULL,
       x=NULL)

Code

top_subabilities<-subabilities_df %>% 
  top_n(15, group_share) %>% 
  pull(subabilities)

country_var_chart(agg_data=filter(country_subabilities_df,subabilities %in% top_subabilities),
                  country = "country_code",
                  category = "subabilities")[[3]]+
  scale_fill_manual(values=country_colors)+
  labs(title = "Number of vacancies requiring an Ability, by Country",
       fill="Country",
       shape=NULL,
       x=NULL,
       y=NULL)

Table View

Code

country_var_table(country_subabilities_df,
                  category  = "subabilities",country = "country_code",
                  interactive = FALSE)

?(caption)

	subabilities	Vacancies	% of Vacancies	% of ARG	% of CHL	% of URY
	Information Ordering	58971	99.52%	99.61%	99.46%	99.61%
	Oral Comprehension	58971	99.52%	99.61%	99.46%	99.61%
	Oral Expression	58971	99.52%	99.61%	99.46%	99.61%
	Problem Sensitivity	58971	99.52%	99.61%	99.46%	99.61%
	Selective Attention	58971	99.52%	99.61%	99.46%	99.61%
	Speech Recognition	58971	99.52%	99.61%	99.46%	99.61%
	Speech Clarity	58967	99.51%	99.60%	99.45%	99.61%
	Deductive Reasoning	58930	99.45%	99.60%	99.34%	99.61%
	Inductive Reasoning	58930	99.45%	99.60%	99.34%	99.61%
	Near Vision	58801	99.23%	99.20%	99.24%	99.41%
	Category Flexibility	58380	98.52%	98.79%	98.34%	98.57%
	Far Vision	57258	96.63%	95.88%	97.11%	97.19%
	Written Comprehension	57224	96.57%	97.86%	95.64%	97.68%
	Written Expression	56106	94.69%	96.24%	93.74%	92.90%
	Flexibility of Closure	55782	94.14%	95.85%	92.90%	95.51%
	Perceptual Speed	53588	90.44%	91.42%	89.78%	90.38%
	Time Sharing	53324	89.99%	90.31%	89.77%	90.09%
	Fluency of Ideas	47708	80.51%	85.89%	76.55%	86.14%
	Originality	46125	77.84%	83.35%	73.78%	83.63%
	Number Facility	46016	77.66%	81.81%	74.93%	76.33%
	Visualization	45519	76.82%	77.85%	75.93%	79.98%
	Mathematical Reasoning	44783	75.58%	81.26%	71.70%	76.23%
	Finger Dexterity	41803	70.55%	68.11%	72.13%	71.55%
	Speed of Closure	39592	66.82%	69.88%	64.62%	68.98%
	Memorization	38493	64.96%	70.53%	60.85%	70.96%
	Visual Color Discrimination	32468	54.79%	50.96%	57.53%	52.32%
	Trunk Strength	31361	52.93%	45.68%	58.11%	47.93%
	Arm-Hand Steadiness	29918	50.49%	46.44%	53.53%	45.32%
	Auditory Attention	27724	46.79%	43.45%	49.02%	47.19%
	Manual Dexterity	25114	42.38%	37.66%	45.90%	36.88%
	Multilimb Coordination	23929	40.38%	34.17%	44.84%	36.00%
	Static Strength	23104	38.99%	32.46%	43.74%	33.23%
	Hearing Sensitivity	19745	33.32%	31.66%	34.47%	32.99%
	Control Precision	19688	33.23%	31.80%	34.41%	29.54%
	Depth Perception	16336	27.57%	24.24%	29.93%	25.64%
	Extent Flexibility	16130	27.22%	23.01%	30.19%	25.05%
	Stamina	15674	26.45%	20.20%	30.81%	24.11%
	Reaction Time	13876	23.42%	22.41%	24.18%	22.09%
	Gross Body Coordination	10846	18.30%	14.94%	20.62%	17.50%
	Rate Control	10240	17.28%	16.47%	17.87%	16.62%
	Response Orientation	9749	16.45%	16.17%	16.66%	16.17%
	Dynamic Strength	8427	14.22%	13.07%	14.95%	15.04%
	Gross Body Equilibrium	5167	8.72%	8.85%	8.45%	11.79%
	Wrist-Finger Speed	4841	8.17%	8.26%	8.01%	9.86%
	Spatial Orientation	4324	7.30%	5.26%	8.65%	7.69%
	Speed of Limb Movement	3363	5.68%	5.90%	5.46%	6.71%
	Sound Localization	1791	3.02%	3.34%	2.85%	2.27%
	Peripheral Vision	1788	3.02%	2.24%	3.51%	3.55%
	Explosive Strength	1654	2.79%	1.54%	3.62%	2.96%
	Night Vision	321	0.54%	0.46%	0.57%	0.99%
sum	—	1,688,733.00	28.50	28.21	28.70	28.41

Extension: Sub-Skills by sector

As we said before, sectors and sub-skills aren’t discretely assign to each online vacancies. Instead, each sector and skill has a weight on each job vacancy associated with the chances the firm belongs to that sector (or demands that skill).

To offer a tractable measure of the skills demand by sector we’re simply going to assign a 1 to the sector with the maximum chances of being the vacancy’s sector. Then we either count the number of times an ability (or subability) is required by a vacancy in the chosen sectors, or their average importance within the latter.

Another way is just counting the percentage of all postings within a country in which both the skill and the sector had a higher than average weight. We’ll try different specifications and use the most satisfactory one in the final deliverable.

This is the frequency in which each sectors is a vacancy’s most-likely sector:

Code

main_sector_df<-country_var_count(south_cone_df,
                                  country = NULL,
                                  category = 'main_sector')

main_sector_df%>% 
  ggplot(aes(x=reorder(main_sector,-group_share),y=group_share))+
  geom_col(fill="gray50")+
  geom_text(aes(label=paste(round(group_share,2)*100,"%")), 
            size=3,nudge_y = 0.02)+
  scale_y_continuous(labels=scales::percent_format())+
  theme(axis.text.x = element_text(angle = 65, hjust=1))

This is the frequency in which each skill has positive changes of being demanded in a vacancy:

Code

subskills_df<-south_cone_df %>% 
  select(doc_id,subabilities) %>% 
  mutate(across(c(subabilities),~ifelse(.>0,TRUE,FALSE))) 

subskills_df %>% 
  select(-1) %>% 
  skimr::skim() %>% 
  as_tibble() %>% 
  arrange(desc(logical.mean)) %>% 
  kableExtra::kable()

?(caption)

skim_type	skim_variable	n_missing	complete_rate	logical.mean	logical.count
logical	Information_Ordering	284	0.9952072	1.0000000	TRU: 58971
logical	Oral_Comprehension	284	0.9952072	1.0000000	TRU: 58971
logical	Oral_Expression	284	0.9952072	1.0000000	TRU: 58971
logical	Problem_Sensitivity	284	0.9952072	1.0000000	TRU: 58971
logical	Selective_Attention	284	0.9952072	1.0000000	TRU: 58971
logical	Speech_Recognition	284	0.9952072	1.0000000	TRU: 58971
logical	Speech_Clarity	284	0.9952072	0.9999322	TRU: 58967, FAL: 4
logical	Deductive_Reasoning	284	0.9952072	0.9993047	TRU: 58930, FAL: 41
logical	Inductive_Reasoning	284	0.9952072	0.9993047	TRU: 58930, FAL: 41
logical	Near_Vision	284	0.9952072	0.9971172	TRU: 58801, FAL: 170
logical	Category_Flexibility	284	0.9952072	0.9899781	TRU: 58380, FAL: 591
logical	Far_Vision	284	0.9952072	0.9709518	TRU: 57258, FAL: 1713
logical	Written_Comprehension	284	0.9952072	0.9703753	TRU: 57224, FAL: 1747
logical	Written_Expression	284	0.9952072	0.9514168	TRU: 56106, FAL: 2865
logical	Flexibility_of_Closure	284	0.9952072	0.9459226	TRU: 55782, FAL: 3189
logical	Perceptual_Speed	284	0.9952072	0.9087178	TRU: 53588, FAL: 5383
logical	Time_Sharing	284	0.9952072	0.9042411	TRU: 53324, FAL: 5647
logical	Fluency_of_Ideas	284	0.9952072	0.8090078	TRU: 47708, FAL: 11263
logical	Originality	284	0.9952072	0.7821641	TRU: 46125, FAL: 12846
logical	Number_Facility	284	0.9952072	0.7803157	TRU: 46016, FAL: 12955
logical	Visualization	284	0.9952072	0.7718879	TRU: 45519, FAL: 13452
logical	Mathematical_Reasoning	284	0.9952072	0.7594072	TRU: 44783, FAL: 14188
logical	Finger_Dexterity	284	0.9952072	0.7088739	TRU: 41803, FAL: 17168
logical	Speed_of_Closure	284	0.9952072	0.6713808	TRU: 39592, FAL: 19379
logical	Memorization	284	0.9952072	0.6527446	TRU: 38493, FAL: 20478
logical	Visual_Color_Discrimination	284	0.9952072	0.5505757	TRU: 32468, FAL: 26503
logical	Trunk_Strength	284	0.9952072	0.5318038	TRU: 31361, FAL: 27610
logical	Arm-Hand_Steadiness	284	0.9952072	0.5073341	TRU: 29918, FAL: 29053
logical	Auditory_Attention	284	0.9952072	0.4701294	FAL: 31247, TRU: 27724
logical	Manual_Dexterity	284	0.9952072	0.4258703	FAL: 33857, TRU: 25114
logical	Multilimb_Coordination	284	0.9952072	0.4057757	FAL: 35042, TRU: 23929
logical	Static_Strength	284	0.9952072	0.3917858	FAL: 35867, TRU: 23104
logical	Hearing_Sensitivity	284	0.9952072	0.3348256	FAL: 39226, TRU: 19745
logical	Control_Precision	284	0.9952072	0.3338590	FAL: 39283, TRU: 19688
logical	Depth_Perception	284	0.9952072	0.2770175	FAL: 42635, TRU: 16336
logical	Extent_Flexibility	284	0.9952072	0.2735243	FAL: 42841, TRU: 16130
logical	Stamina	284	0.9952072	0.2657917	FAL: 43297, TRU: 15674
logical	Reaction_Time	284	0.9952072	0.2353021	FAL: 45095, TRU: 13876
logical	Gross_Body_Coordination	284	0.9952072	0.1839209	FAL: 48125, TRU: 10846
logical	Rate_Control	284	0.9952072	0.1736447	FAL: 48731, TRU: 10240
logical	Response_Orientation	284	0.9952072	0.1653185	FAL: 49222, TRU: 9749
logical	Dynamic_Strength	284	0.9952072	0.1429007	FAL: 50544, TRU: 8427
logical	Gross_Body_Equilibrium	284	0.9952072	0.0876193	FAL: 53804, TRU: 5167
logical	Wrist-Finger_Speed	284	0.9952072	0.0820912	FAL: 54130, TRU: 4841
logical	Spatial_Orientation	284	0.9952072	0.0733242	FAL: 54647, TRU: 4324
logical	Speed_of_Limb_Movement	284	0.9952072	0.0570280	FAL: 55608, TRU: 3363
logical	Sound_Localization	284	0.9952072	0.0303709	FAL: 57180, TRU: 1791
logical	Peripheral_Vision	284	0.9952072	0.0303200	FAL: 57183, TRU: 1788
logical	Explosive_Strength	284	0.9952072	0.0280477	FAL: 57317, TRU: 1654
logical	Night_Vision	284	0.9952072	0.0054434	FAL: 58650, TRU: 321

Code

mainsector_abilities_df<-country_var_count_groups(data=south_cone_df,
                                       country = 'main_sector',
                         variable_names=abilities, 
                         name_of_categories="abilities") %>% 
   mutate(abilities=str_replace_all(abilities,"_"," "),
         abilities=str_to_title(abilities),
         abilities=str_replace_all(abilities," Of "," of ")) 

sector_skills_matrix(data_agg = mainsector_abilities_df,
                     ability_val = NULL,metric = "mean")

Code

mainsector_subabilities_df<-country_var_count_groups(data=south_cone_df,
                                       country = 'main_sector',
                         variable_names=subabilities, 
                         name_of_categories="subabilities") %>% 
   mutate(subabilities=str_replace_all(subabilities,"_"," "),
         subabilities=str_to_title(subabilities),
         subabilities=str_replace_all(subabilities," Of "," of ")) %>% 
  left_join(abilities_taxonomy)


purrr::map(c("Cognitive Abilities","Psychomotor Abilities","Physical Abilities","Sensory Abilities"),
       sector_skills_matrix,
       data_agg=mainsector_subabilities_df,
       metric="mean")

[[1]]


[[2]]


[[3]]


[[4]]

Job zones

41% of job vacancies in the South Cone require “(2) Some preparation”, while 25% require “(4) Considerable preparation”, and another 25% requires “(3) Middle preparation”. Only 8% of the demand is focused on the “(1) no preparation” and “(5) a lot of preparation” extremes.
46% of job postings in Chile demand “(2) Some preparation”. It’s the country most concentrated in that area of demand by a considerable margin.
30.5% of job postings in Argentina demand “(4) Considerable preparation”. It’s the country most concentrated in that area of demand by a moderate margin.
29.6% of job postings in Uruguay demand “(3) Middle preparation”. It’s the country most concentrated in that area of demand by a moderate margin.

Code

zones_df<-country_var_count(data = south_cone_df,
                  category = "zones_label",
                  country=NULL)

zones_country_df<-country_var_count(data = south_cone_df,
                  category = "zones_label",
                  country="country_code")

zones_df %>% 
  ggplot(aes(x=zones_label,
             y=group_share))+
  geom_col(fill="gray50")+
  geom_text(aes(label=paste(round(group_share,2)*100,"%")), 
            size=3,nudge_y = 0.02)+
  theme(axis.text.x = element_text(angle = 65, hjust=1))+
  labs(title = "Share of vacancies postings by Zone",
       x=NULL,
       y=NULL)

Code

zones_country_df %>% 
  filter(country_code=="ARG") %>% 
  top_n(10,group_in_country_share) %>% 
  ggplot(aes(x=zones_label,
             y=group_in_country_share))+
  geom_col(aes(fill=country_code))+
  geom_text(aes(label=paste(round(group_in_country_share,2)*100,"%")), 
            size=3,nudge_y = 0.02)+
  scale_fill_manual(values=country_colors, guide='none')+
  scale_y_continuous(labels = scales::percent_format())+
  theme(axis.text.x = element_text(
    size=13,
    angle = 65, hjust=1
   ))+
  labs(title = NULL,
       fill=NULL,
       shape=NULL,
       x=NULL,
       y="Porcentaje de vacantes")


zones_country_df %>% 
  filter(country_code=="CHL") %>% 
  top_n(10,group_in_country_share) %>% 
  ggplot(aes(x=zones_label,
             y=group_in_country_share))+
  geom_col(aes(fill=country_code))+
  geom_text(aes(label=paste(round(group_in_country_share,2)*100,"%")), 
            size=3,nudge_y = 0.02)+
  scale_fill_manual(values=country_colors, guide='none')+
  scale_y_continuous(labels = scales::percent_format())+
  theme(axis.text.x = element_text(
    size=13,
    angle = 65, hjust=1
    ))+
  labs(title = NULL,
       fill=NULL,
       shape=NULL,
       x=NULL,
       y="Porcentaje de vacantes")

zones_country_df %>% 
  filter(country_code=="URY") %>% 
  top_n(10,group_in_country_share) %>% 
  ggplot(aes(x=zones_label,
             y=group_in_country_share))+
  geom_col(aes(fill=country_code))+
  geom_text(aes(label=paste(round(group_in_country_share,2)*100,"%")), 
            size=3,nudge_y = 0.02)+
  scale_fill_manual(values=country_colors, guide='none')+
  scale_y_continuous(labels = scales::percent_format())+
  theme(axis.text.x = element_text(
    size=13,
    angle = 65, hjust=1
    ))+
  labs(title = NULL,
       fill=NULL,
       shape=NULL,
       x=NULL,
       y="Porcentaje de vacantes")

Code

country_var_chart(zones_country_df,
                  country = "country_code",
                  category = "zones_label")[[1]]+
  scale_fill_manual(values=country_colors)+
  theme(axis.text.x = element_text(angle = 65, hjust=1))+
  labs(title = "Share of vacancies postings by zone",
       subtitle = "Only showing 20 most common",
       fill="Country",
       shape=NULL,
       x=NULL,
       y=NULL)

Code

country_var_chart(zones_country_df,
                  country = "country_code",
                  category = "zones_label")[[2]]+
  scale_fill_manual(values=country_colors)+
  labs(subtitle="Job zones distribution in online vacancies data",
       x=NULL,
       fill=NULL,
       fill=NULL)

Code

country_var_chart(zones_country_df,
                  country = "country_code",
                  category = "zones_label")[[3]]+
  scale_fill_manual(values=country_colors)+
  labs(subtitle="Job zones distribution in online vacancies data",
       x=NULL,
       fill=NULL,
       fill=NULL)

Code

country_var_table(data_agg = zones_country_df,
                  category =  "zones_label",
                  country="country_code",
                  interactive = FALSE)

?(caption)

	zones_label	Vacancies	% of Vacancies	% of ARG	% of CHL	% of URY
	(2) Algo de preparación	24478	41.31%	34.95%	45.92%	36.05%
	(4) Preparación considerable	15175	25.61%	30.48%	22.33%	25.49%
	(3) Preparación media	14530	24.52%	26.35%	22.98%	29.64%
	(5) Mucha o extensa preparación	3612	6.10%	6.24%	5.99%	6.26%
	(1) Poca o ninguna preparación	1460	2.46%	1.98%	2.78%	2.56%
sum	—	59,255.00	1.00	1.00	1.00	1.00

Job zones across sectors

Code

zones_sector_df<-country_var_count(south_cone_df,
                                  category = "zones_label",country="main_sector")

zones_sector_df %>% 
  ggplot(aes(x=main_sector,
             y=group_in_country_share))+
  geom_col(aes(fill=zones_label),position = "fill", color="black")+
  geom_label(data=group_by(zones_sector_df,main_sector) %>% 
              mutate(label_y=ifelse(group_in_country_share==max(group_in_country_share),
                             paste(round(group_in_country_share,2)*100,"%"),NA)),
            aes(label=label_y, color=zones_label),
            alpha=.9, size=2, position = position_fill(vjust = 0.5), show.legend = FALSE)+
  coord_flip()+
  scale_y_continuous(labels = scales::percent_format())+
  scale_fill_manual(values=RColorBrewer::brewer.pal(9,"YlGnBu")[c(3,4,5,7,9)])+
  scale_color_manual(values=RColorBrewer::brewer.pal(9,"YlGnBu")[c(3,4,5,7,9)])+
  labs(title = "Share of Zones in job postings by Main Sector",
       fill=NULL,
       color=NULL,
       y=NULL,
       x=NULL)

Code

purrr::map(c("ARG","CHL","URY"),
       categories_barplot,
       data=south_cone_df,
       category = "zones_label",country="main_sector")

[[1]]


[[2]]


[[3]]

Code

library(readxl)
sector_names<-readxl::read_excel("data/traducciones.xlsx", sheet=1)%>%
  janitor::clean_names()%>%
  rename(sector=1, sector_es=2)

charts_sectors<-purrr::map(c("ARG","CHL","URY"),
       categories_barplot,
       data=south_cone_df%>%
         filter(main_sector %in% sectors_focus)%>%
         left_join(sector_names,by=c("main_sector"="sector"))%>%
         mutate(main_sector=sector_es),
       category = "zones_label",country="main_sector")

charts_areas<-purrr::map(c("ARG","CHL","URY"),
       categories_barplot,
       data=south_cone_df,
       category = "zones_label",country="area")  
library(patchwork)
(charts_sectors[[1]]+ labs(title = NULL)+
    theme(axis.text.x = element_blank(),axis.ticks.x = element_blank())) +
  (charts_areas[[1]]+ labs(title = NULL))+
  plot_layout( ncol = 1,heights =c(8,2),guides = "collect")

(charts_sectors[[2]]+ labs(title = NULL)+
    theme(axis.text.x = element_blank(),axis.ticks.x = element_blank())) +
  (charts_areas[[2]]+ labs(title = NULL))+
  plot_layout( ncol = 1,heights =c(8,2),guides = "collect")

(charts_sectors[[3]]+ labs(title = NULL)+
    theme(axis.text.x = element_blank(),axis.ticks.x = element_blank())) +
  (charts_areas[[3]]+ labs(title = NULL))+
  plot_layout( ncol = 1,heights =c(8,2),guides = "collect")

Job zones across occupational groups

Code

zones_soc_df<-country_var_count(south_cone_df %>% 
                                  mutate(major_group_title = str_remove_all(major_group_title," Occupations")),
                                  category = "zones_label",
                                country="major_group_title")

zones_sector_df %>% 
  ggplot(aes(x=main_sector,
             y=group_in_country_share))+
  geom_col(aes(fill=zones_label),position = "fill", color="black")+
  geom_label(data=group_by(zones_sector_df,main_sector) %>% 
              mutate(label_y=ifelse(group_in_country_share==max(group_in_country_share),
                             paste(round(group_in_country_share,2)*100,"%"),NA)),
            aes(label=label_y, color=zones_label),
            alpha=.9, size=2, position = position_fill(vjust = 0.5), show.legend = FALSE)+
  coord_flip()+
  scale_y_continuous(labels = scales::percent_format())+
  scale_fill_manual(values=RColorBrewer::brewer.pal(9,"YlGnBu")[c(3,4,5,7,9)])+
  scale_color_manual(values=RColorBrewer::brewer.pal(9,"YlGnBu")[c(3,4,5,7,9)])+
  labs(title = "Share of Zones in job postings by Major SOC",
       fill=NULL,
       color=NULL,
       y=NULL,
       x=NULL)

Code

purrr::map(c("ARG","CHL","URY"),
       categories_barplot,
       data=south_cone_df %>% mutate(major_group_title = str_remove_all(major_group_title," Occupations")),
       category = "zones_label",country="major_group_title")

[[1]]


[[2]]


[[3]]

Work Schedule

83% of postings seek to fill full-time positions.
14% of postings in Chile correspond to “Other”. This needs clarification. This is associated with the lower than average share of full-time positions in Chile.
5.7% of Chile job postings correspond to “Part-time” roles (about 1 percent point above average).
The contractor mode is more prevalent in Uruguay job postings (almost twice the average). This would be consistent with the rumors about many Uruguay firms outsourcing Argentinean workers. This should be corroborated with remote work data.

Code

schedule_df<-country_var_count(data = south_cone_df,
                                       category =  "schedule",country=NULL)
schedule_country_df<-country_var_count(data = south_cone_df,
                                       category =  "schedule",country="country_code")

country_var_table(data=schedule_country_df, 
                  category = "schedule",country="country_code",
                  interactive = FALSE)

?(caption)

	schedule	Vacancies	% of Vacancies	% of ARG	% of CHL	% of URY
	Full-time	49201	83.03%	88.97%	78.58%	90.53%
	Other	6278	10.59%	5.54%	14.36%	4.68%
	Part-time	2822	4.76%	3.66%	5.65%	2.27%
	Contractor	705	1.19%	1.26%	1.09%	2.07%
	Internship	249	0.42%	0.56%	0.32%	0.44%
sum	—	59,255.00	1.00	1.00	1.00	1.00

Code

schedule_df %>% 
  ggplot(aes(x=reorder(schedule,
                       -group_share),
             y=group_share))+
  geom_col(fill="gray50")+
  geom_text(aes(label=paste(round(group_share,2)*100,"%")), 
            size=3,nudge_y = 0.02)+
  theme(axis.text.x = element_text(angle = 65, hjust=1))+
  labs(title = "Share of vacancies postings by Schedule",
       x=NULL,
       y=NULL)

Code

country_var_chart(schedule_country_df, 
                  category = "schedule",country="country_code")[[1]]+
  scale_fill_manual(values=country_colors)+
  theme(axis.text.x = element_text(angle = 65, hjust=1))+
  labs(title = "Share of vacancies postings by Schedule",
       subtitle = "Only showing 20 most common",
       fill="Country",
       shape=NULL,
       x=NULL,
       y=NULL)

Code

country_var_chart(schedule_country_df, 
                  category = "schedule",country="country_code")[[2]]+
  scale_fill_manual(values=country_colors)+
  labs(subtitle="Schedule distribution in online vacancies data",
       x=NULL,
       fill=NULL)

Code

country_var_chart(schedule_country_df, 
                  category = "schedule",country="country_code")[[3]]+
  scale_fill_manual(values=country_colors)+
  labs(subtitle="Schedule distribution in online vacancies data",
       x=NULL,
       fill=NULL)

Green jobs

Green jobs represent 15% of all online job postings.
Argentina has the highest share of green jobs in its’ job postings (17%).
Within green jobs, the most demanded are classified as “Green Increased Demand” (45%).
Green job postings in Argentina are 48% “Green Increased Demand”, 37% are “Green enhanced skills”, and 14% are “Green New & Emerging”.
Greener regions in terms of online vacancies are Santiago, Buenos Ares, Valparaíso, Concepción, Rosario, Córdoba y Antofagasta. Buenos aires, Concepción, Rosario, Córdoba, and Antofagasta are overrepresented in the sample of green online vacancies.

Code

green_country_df_1<-country_var_count(data = south_cone_df,
                                      category = "green_job_bin",country="country_code")

Code

country_var_table(data=green_country_df_1, category = "green_job_bin",country="country_code",
                  interactive = FALSE)

Table 3: Green jobs distribution
	green_job_bin	Vacancies	% of Vacancies	% of ARG	% of CHL	% of URY
	FALSE	50273	84.84%	82.20%	86.67%	84.12%
	TRUE	8982	15.16%	17.80%	13.33%	15.88%
sum	—	59,255.00	1.00	1.00	1.00	1.00

Code

country_var_chart(green_country_df_1, 
                  category = "green_job_bin",country="country_code")[[2]]+
  scale_fill_manual(values=country_colors)+
  labs(subtitle="Green jobs distribution in online vacancies data",
       x=NULL, 
       fill=NULL)

Figure 31: Green jobs distribution, by country

Code

country_var_chart(green_country_df_1, 
                  category = "green_job_bin",country="country_code")[[3]]+
  scale_fill_manual(values=country_colors)+
  labs(subtitle="Green jobs distribution in online vacancies data",
       x=NULL, 
       fill=NULL)

Code

## Decomposition of green jobs
green_country_df_2<-country_var_count(data = filter(south_cone_df,
                                                    green_job_bin==TRUE),
                                      category = "green_job",
                                      country="country_code")

country_var_table(data=green_country_df_2, 
                  category = "green_job",country="country_code",
                  interactive = FALSE)

Table 4: Green job types distribution
	green_job	Vacancies	% of Vacancies	% of ARG	% of CHL	% of URY
	Green Increased Demand	4042	45.00%	48.43%	42.06%	42.86%
	Green Enhanced Skills	3654	40.68%	37.59%	43.36%	42.24%
	Green New & Emerging	1286	14.32%	13.98%	14.58%	14.91%
sum	—	8,982.00	1.00	1.00	1.00	1.00

Code

country_var_chart(green_country_df_2, 
                  category = "green_job",country="country_code")[[2]]+
  scale_fill_manual(values=country_colors)+
  labs(subtitle="Green jobs types in online vacancies data",
       x=NULL,
       y=NULL,
       fill=NULL)

Figure 32: Green job types distribution by country

Code

country_var_chart(green_country_df_2, 
                  category = "green_job",country="country_code")[[3]]+
  scale_fill_manual(values=country_colors)+
  labs(subtitle="Green jobs types in online vacancies data",
       x=NULL,
       y=NULL,
       fill=NULL)

Code

library(patchwork)

# Hole size
hsize <- 3

p1<-green_country_df_1 %>% 
  filter(country_code=="ARG") %>% 
  mutate(green_job_bin=ifelse(green_job_bin==TRUE,"Verde","Otros"))%>%
  mutate(x=hsize) %>% 
  ggplot(aes(fill=green_job_bin, y=group_in_country_share, x=hsize)) +
  geom_col(colour="grey30") +
  geom_text(color="black",aes(label = paste(round(group_in_country_share,2)*100,"%")),
              position = position_stack(vjust = 0.5)) +
  coord_polar(theta="y") +
  scale_fill_manual(values=c("gray70",RColorBrewer::brewer.pal(3,"Greens")[[2]]))+
     xlim(c(0.2, hsize + 0.5))+
     theme_void()+
     labs(fill="Tipo de\nTrabajo")

p2<-green_country_df_2 %>% 
  filter(country_code=="ARG")%>%
  ggplot(aes(x=1,y=group_in_country_share,fill=green_job))+
  geom_col(color="grey30")+
  geom_text(color="black",aes(label = paste(round(group_in_country_share,2)*100,"%")),
              position = position_stack(vjust = 0.5)) +
  theme_void()+
  scale_fill_manual(  values=RColorBrewer::brewer.pal(3,"Greens"))+
  labs(fill="Tipo de\nTrabajo\nVerde")
p1+p2

p1<-green_country_df_1 %>% 
  filter(country_code=="CHL") %>% 
  mutate(green_job_bin=ifelse(green_job_bin==TRUE,"Verde","Otros"))%>%
  mutate(x=hsize) %>% 
  ggplot(aes(fill=green_job_bin, y=group_in_country_share, x=hsize)) +
  geom_col(colour="grey30") +
  geom_text(color="black",aes(label = paste(round(group_in_country_share,2)*100,"%")),
              position = position_stack(vjust = 0.5)) +
  coord_polar(theta="y") +
  scale_fill_manual(values=c("gray70",RColorBrewer::brewer.pal(3,"Greens")[[2]]))+
     xlim(c(0.2, hsize + 0.5))+
     theme_void()+
     labs(fill="Tipo de\nTrabajo")

p2<-green_country_df_2 %>% 
  filter(country_code=="CHL")%>%
  ggplot(aes(x=1,y=group_in_country_share,fill=green_job))+
  geom_col(color="grey30")+
  geom_text(color="black",aes(label = paste(round(group_in_country_share,2)*100,"%")),
              position = position_stack(vjust = 0.5)) +
  theme_void()+
  scale_fill_manual(  values=RColorBrewer::brewer.pal(3,"Greens"))+
  labs(fill="Tipo de\nTrabajo\nVerde")
p1+p2

p1<-green_country_df_1 %>% 
  filter(country_code=="URY") %>% 
  mutate(green_job_bin=ifelse(green_job_bin==TRUE,"Verde","Otros"))%>%
  mutate(x=hsize) %>% 
  ggplot(aes(fill=green_job_bin, y=group_in_country_share, x=hsize)) +
  geom_col(colour="grey30") +
  geom_text(color="black",aes(label = paste(round(group_in_country_share,2)*100,"%")),
              position = position_stack(vjust = 0.5)) +
  coord_polar(theta="y") +
  scale_fill_manual(values=c("gray70",RColorBrewer::brewer.pal(3,"Greens")[[2]]))+
     xlim(c(0.2, hsize + 0.5))+
     theme_void()+
     labs(fill="Tipo de\nTrabajo")

p2<-green_country_df_2 %>% 
  filter(country_code=="URY")%>%
  ggplot(aes(x=1,y=group_in_country_share,fill=green_job))+
  geom_col(color="grey30")+
  geom_text(color="black",aes(label = paste(round(group_in_country_share,2)*100,"%")),
              position = position_stack(vjust = 0.5)) +
  theme_void()+
  scale_fill_manual(  values=RColorBrewer::brewer.pal(3,"Greens"))+
  labs(fill="Tipo de\nTrabajo\nVerde")
p1+p2

Location of green jobs across regions

Code

country_var_count(south_cone_df, 
                  category = "rm",
                  country="green_job_bin") %>% 
  arrange(desc(group_vacancies)) %>% 
  filter(green_job_bin==1) %>% 
  mutate(ratio=round(group_in_country_share/group_share,2)) %>% 
  mutate(share=scales::percent(group_in_country_share,accuracy=1),
         group_share=scales::percent(group_share, accuracy=2)) %>% 
  arrange(desc(group_vacancies)) %>%
  filter(group_vacancies>500) %>% 
  ungroup() %>% 
  select(region=rm,`Job postings`=group_vacancies,`share in all data`=group_share,`share in green`=group_in_country_share,ratio) %>% 
  head(15) %>% 
  kableExtra::kable()

Table 5: Green jobs distribution, by region
region	Job postings	share in all data	share in green	ratio
Santiago	13725	24%	0.1871521	0.81
Buenos Aires (GZM)	9313	16%	0.1845914	1.17
Valparaíso	4932	8%	0.0676909	0.81
Concepción	3774	6%	0.0659096	1.03
Rosario	2316	4%	0.0503229	1.29
Gran Temuco	2125	4%	0.0210421	0.59
Coquimbo	1913	4%	0.0292808	0.91
Córdoba	1810	4%	0.0360721	1.18
Mendoza	1734	2%	0.0338455	1.16
Antofagasta	1477	2%	0.0351815	1.41
Puerto Montt	1251	2%	0.0227121	1.08
Metropolitana	1220	2%	0.0218214	1.06
Tarapacá	1168	2%	0.0237141	1.20
Corrientes	1074	2%	0.0191494	1.06
Región Metropolitana Confluencia	889	2%	0.0180361	1.20

Code

country_var_count(south_cone_df, 
                  category = "rm",
                  country="green_job_bin")%>% 
  arrange(desc(group_vacancies)) %>% 
  filter(green_job_bin==TRUE) %>% 
  mutate(green_all_ratio=group_in_country_share/group_share) %>% 
  arrange(desc(green_all_ratio)) %>%
  filter(group_vacancies>500) %>% 
  ggplot(aes(x=group_share,y=group_in_country_share))+
  geom_point()+
  geom_label_repel(aes(label=rm), size=2)+
  geom_abline(slope = 1,intercept = 0)+
  coord_fixed()+
  labs(subtitle = "Those above the line are more intensive in green vacancies",
       caption = "Cities with more than 1000 job postings",
       y="Share of green jobs postings",
       x="Share of all job postings")

Green jobs across sectors

Code

charts_sectors<-purrr::map(c("ARG","CHL","URY"),
       categories_barplot,
       data=south_cone_df %>%
         mutate(green_job=ifelse(is.na(green_job),"Not Green",green_job))%>%
         filter(main_sector %in% sectors_focus)%>%
         left_join(sector_names,by=c("main_sector"="sector"))%>%
         mutate(main_sector=sector_es),
       category = "green_job",country="main_sector")  

charts_areas<-purrr::map(c("ARG","CHL","URY"),
       categories_barplot,
       data=south_cone_df %>%
         mutate(green_job=ifelse(is.na(green_job),"Not Green",green_job)),
       category = "green_job",country="area")  

(charts_sectors[[1]]+ labs(title = NULL)+
    theme(axis.text.x = element_blank(),axis.ticks.x = element_blank())) +
  (charts_areas[[1]]+ labs(title = NULL))+
  plot_layout( ncol = 1,heights =c(8,2),guides = "collect")

(charts_sectors[[2]]+ labs(title = NULL)+
    theme(axis.text.x = element_blank(),axis.ticks.x = element_blank())) +
  (charts_areas[[2]]+ labs(title = NULL))+
  plot_layout( ncol = 1,heights =c(8,2),guides = "collect")

(charts_sectors[[3]]+ labs(title = NULL)+
    theme(axis.text.x = element_blank(),axis.ticks.x = element_blank())) +
  (charts_areas[[3]]+ labs(title = NULL))+
  plot_layout( ncol = 1,heights =c(8,2),guides = "collect")

Remote jobs

Only 1.8% of jobs postings are classified remote. This contrast with 10% and 12% for countries like New Zeland and Australia, which aren’t much different than Chile according to remote work surveys at the firm level. This gap is puzzling.
The occupational major groups with the highest shares of remote postings are “Legal”, “Business and Financial Operations”, “Personal Care and Service” (weird), “Management”, and “Computer and Mathematical” occupations, in that order. This is different from the ranking of remote online vacancies found by Lightcast in English speaking countries: “Computer and Mathematical”, “Business and Financial Operations”, “Legal”, “Management”, and “Architecture and Engineering.”

Code

remote_country_df<-country_var_count(data = mutate(south_cone_df,
                                           remote=
                                             ifelse(remote==1,"Remote/Hybrid","In-Person")),
                             category =  "remote",country="country_code")

country_var_table(data_agg =remote_country_df, 
                  category = "remote",country="country_code", interactive=FALSE)

Table 6: Remote work distribution
	remote	Vacancies	% of Vacancies	% of ARG	% of CHL	% of URY
	In-Person	58174	98.18%	97.45%	98.67%	98.18%
	Remote/Hybrid	1081	1.82%	2.55%	1.33%	1.82%
sum	—	59,255.00	1.00	1.00	1.00	1.00

Code

country_var_chart(remote_country_df, 
                  category = "remote",country="country_code")[[2]]+
  scale_fill_manual(values=country_colors)+
  labs(subtitle="Remote/Hybrid jobs distribution in online vacancies data",
       x=NULL,
       y=NULL,
       fill=NULL)

Figure 39: Remote work distribution by country

Code

country_var_chart(remote_country_df, 
                  category = "remote",country="country_code")[[3]]+
  scale_fill_manual(values=country_colors)+
  labs(subtitle="Remote/Hybrid jobs distribution in online vacancies data",
       x=NULL,
       y=NULL,
       fill=NULL)

Code

remote_df_2<-country_var_count(data = mutate(south_cone_df,
                                             remote=
                                               ifelse(remote==1,"Remote/Hybrid","In-Person")),
                             category = "remote",country="major_group_title")

remote_df_3<-rbind(
  country_var_count(data = mutate(
    filter(south_cone_df,country_code=="ARG"),
    remote=ifelse(remote==1,"Remote/Hybrid","In-Person")),
    category = "remote",country="major_group_title") %>% 
    mutate(country_code="ARG"),
  country_var_count(data = mutate(
    filter(south_cone_df,country_code=="CHL"),
    remote=ifelse(remote==1,"Remote/Hybrid","In-Person")),
    category = "remote",country="major_group_title") %>% 
    mutate(country_code="CHL"),
  country_var_count(data = mutate(
    filter(south_cone_df,country_code=="URY"),                            
    remote=ifelse(remote==1,"Remote/Hybrid","In-Person")),
    category = "remote",country="major_group_title") %>% 
    mutate(country_code="URY")
  )%>% 
  # I don't need the share of remote jobs in each country.
  select(-group_vacancies,-group_share) %>% 
  # I want only remote shares of major groups in each country
  filter(remote=="Remote/Hybrid") 

(chart_rmw<-remote_df_2 %>% 
  filter(remote=="Remote/Hybrid") %>% 
  ggplot(aes(
    x=reorder(str_remove(major_group_title,"Occupations"),-group_in_country_share),
    y=group_in_country_share))+
  geom_col(fill="gray80")+
  geom_line(aes(y=group_share,group=remote))+
  geom_text(aes(label=paste(round(group_in_country_share,3)*100,"%")), size=3)+
  theme(axis.text.x = element_text(angle = 65, hjust=1))+
  labs(title = "Share of remote/hybrid job postings by SOC major group",
       x=NULL))

Code

remote_df_3 %>% 
  # attach the share of remote vacancies in each  group 
  left_join(remote_df_2 %>% 
              filter(remote=="Remote/Hybrid") %>%
              select(major_group_title,share_of_remote_in_group=group_in_country_share,
                     group_share)) %>% 
  # plot the share of remote vacancies in each group, by country
  ggplot(aes(x=reorder(str_remove(major_group_title,"Occupations"),-share_of_remote_in_group),
             y=group_in_country_share)) +
  geom_col(aes(fill=country_code))+
  geom_line(aes(y=group_share,group=remote))+
  geom_text(aes(label=paste(round(group_in_country_share,3)*100,"%")), size=3)+
  scale_fill_manual(values=country_colors)+
  facet_wrap(vars(country_code),ncol=1) + 
  theme(axis.text.x = element_text(angle = 65, hjust=1))+
  labs(title = "Share of remote/hybrid job postings by SOC major group and country",
       fill=NULL,
       x=NULL)

There are much fewer job vacancies classified as remote than there should be. These charts show it’s not consistent with Ligthcast estimates US, UK, Australia and New Zeland markets (Remote Work across Jobs, Companies, and Space by Stephen Hansen, Peter John Lambert, Nick Bloom, Steven J. Davis, Raffaella Sadun, Bledi Taska :: SSRN).

Code

chart_rmw+
  geom_hline(aes(yintercept=0.02,color="NZ in 2020"))+
  geom_hline(aes(yintercept=0.04,color="NZ in 2021"))+
  geom_hline(aes(yintercept=0.1,color="NZ in 2023"))+
  labs(color="Benchmark")

bloom3 bloom4

The Global Survey of Work (GSWA) arrangements looked at 34 countries in April-May 2023 and reports that Latin American workers work from home (WFH) 0.9 days a week on average, (which coincides with the global average) while workers in New Zeland and Australia work from home an average of 1 and 1.3 days, respectively. According to this, we should expect job vacancies to show WFH rates in Chile (1.8%) to be at least close to New Zeland (10%).

The GSWA review also shows Chileans work from home more than Argentineans, which refuses our results. Of course, this could be due to the difference in sector compositions between the GSWA and online job vacancies.

How does it allign with Dingle & Neiman (2020)?

Does a 1.8% share of remote job vacancies make sense when we take into account the occupations these vacancies are concentrated in? We’ll use Dingle and Neiman (2020) definition of teleworkable occupations and calculate the percentage of online job postings that fall in that category and see whether it’s lower than the observed in English Speaking countries. Dingle and Neiman (2020) define teleworkable occupations as those not involving evidently ‘in-place’ actvities and can be perfomed remotely.

Take a look at the classification of a few occupations:

Code

onet_teleworkable<-read_csv("raw/ONET_28_0/onet_teleworkable_r.csv")

print(paste(c(
  "Not-teleworkable occupations in ONET 28.0:",
  "Teleworkable occupations in ONET 28.0:"),
  table(onet_teleworkable$teleworkable),"(",
  round(table(onet_teleworkable$teleworkable)/nrow(onet_teleworkable),2),")"
  )
  )

[1] “Not-teleworkable occupations in ONET 28.0: 565 ( 0.65 )” [2] “Teleworkable occupations in ONET 28.0: 308 ( 0.35 )”

Code

kableExtra::kable(head(select(onet_teleworkable,1:5)))

o_net_soc_code	title	n	teleworkable
11-1011.00	Chief Executives	29.50	1
11-1011.03	Chief Sustainability Officers	27.00	1
11-1021.00	General and Operations Managers	31.75	1
11-2011.00	Advertising and Promotions Managers	20.50	1
11-2021.00	Marketing Managers	39.75	1
11-2022.00	Sales Managers	23.00	1

The table below show the share of online vacancies in occupations that could be performed remotely. 42% of all job vacancies could be feasibly performed from home according the the average work context and activities of the occupations they were assigned on. It’s below the 50% share I spotted on US job postings between 2020-2021, but it’s large considering they only account for 35% of all occupational codes and around 35% of employment in the US at the onset of the pandemic.

Code

teleworkable_by_country<-south_cone_df %>%
  select(country_code,o_net_soc_2019_code) %>% 
  left_join(onet_teleworkable %>% 
              select(o_net_soc_code,teleworkable),
            by=c("o_net_soc_2019_code"="o_net_soc_code"))

teleworkable_by_country<- country_var_count(
  data = teleworkable_by_country,
  category = "teleworkable",country="country_code")

country_var_table(data=teleworkable_by_country,
                      category = "teleworkable",country="country_code",
                      interactive=FALSE)

?(caption)

	teleworkable	Vacancies	% of Vacancies	% of ARG	% of CHL	% of URY
	0	34097	57.54%	53.56%	60.41%	54.59%
	1	24874	41.98%	46.05%	39.05%	45.02%
	NA	284	0.48%	0.39%	0.54%	0.39%
sum	1.00	59,255.00	1.00	1.00	1.00	1.00

Code

# gap_by_group<-
#   bind_rows(
#   south_cone_df %>%
#   select(country_code,o_net_soc_2019_code,remote,major_group,major_group_title) %>% 
#   left_join(onet_teleworkable %>% 
#               select(o_net_soc_code,teleworkable),
#             by=c("o_net_soc_2019_code"="o_net_soc_code"))  %>% 
#   summarise(n=n(),remote=mean(remote),
#             teleworkable=mean(teleworkable,na.rm = T)) %>% 
#   mutate(major_group_title="Overall"),
#   south_cone_df %>%
#   select(country_code,o_net_soc_2019_code,remote,major_group,major_group_title) %>% 
#   left_join(onet_teleworkable %>% 
#               select(o_net_soc_code,teleworkable),
#             by=c("o_net_soc_2019_code"="o_net_soc_code"))  %>% 
#   group_by(major_group_title) %>% 
#   summarise(n=n(),remote=mean(remote),
#             teleworkable=mean(teleworkable,na.rm = T))
# ) %>% 
#   ungroup() %>% 
#   arrange(desc(n))
# 
# gap_by_group %>% 
#   pivot_longer(cols = c("teleworkable", "remote")) %>% 
#   ggplot(aes(x=reorder(str_remove(major_group_title," Occupations"),n),y=value))+
#   geom_line()+
#   geom_point(aes(color=name),size=3)+
#   coord_flip()+
#   labs(x=NULL)
#   
# south_cone_df %>%
#   select(country_code,o_net_soc_2019_code,occupation,remote,major_group,major_group_title) %>% 
#   left_join(onet_teleworkable %>% 
#               select(o_net_soc_code,teleworkable),
#             by=c("o_net_soc_2019_code"="o_net_soc_code"))  %>% 
#   filter(major_group_title=="Transportation and Material Moving Occupations" & teleworkable==TRUE) %>% 
#   distinct(o_net_soc_2019_code,occupation)
#

Is there any spatial concentration pattern in remote postings?

Do we see more remote postings in large or small cities? No at a glance. It’d be worth controlling for sectorial composition of employment to test this hypothesis.

Code

country_var_count(data = south_cone_df, category = "rm",country="remote") %>% 
  arrange(desc(group_vacancies)) %>% 
  filter(remote==1) %>% 
  mutate(ratio=round(group_in_country_share/group_share,2)) %>% 
  mutate(share=scales::percent(group_in_country_share,accuracy=2),
         group_share=scales::percent(group_share, accuracy=2)) %>% 
  arrange(desc(group_vacancies)) %>%
  filter(group_vacancies>500) %>% 
  ungroup() %>% 
  select(region=rm,`Job postings`=group_vacancies,`share in all data`=group_share,`share in remote`=share,ratio) %>% 
  head(15) %>% 
  kableExtra::kable()

Table 7: Remote work by Metropolitan Region
region	Job postings	share in all data	share in remote	ratio
Santiago	13725	24%	24%	1.02
Buenos Aires (GZM)	9313	16%	20%	1.22
Valparaíso	4932	8%	4%	0.47
Concepción	3774	6%	4%	0.49
Rosario	2316	4%	4%	0.95
Gran Temuco	2125	4%	0%	0.13
Coquimbo	1913	4%	2%	0.83
Córdoba	1810	4%	4%	1.54
Mendoza	1734	2%	4%	1.52
Antofagasta	1477	2%	2%	0.78
Puerto Montt	1251	2%	2%	0.53
Metropolitana	1220	2%	2%	1.39
Tarapacá	1168	2%	0%	0.47
Corrientes	1074	2%	4%	2.35
Región Metropolitana Confluencia	889	2%	2%	0.74

Code

country_var_count(data=south_cone_df,category = "rm",country="remote") %>% 
  arrange(desc(group_vacancies)) %>% 
  filter(remote==TRUE) %>% 
  mutate(remote_all_ratio=group_in_country_share/group_share) %>% 
  arrange(desc(remote_all_ratio)) %>% 
  filter(group_vacancies>1000) %>% 
  ggplot(aes(x=group_share,y=group_in_country_share))+
  geom_point()+
  geom_label_repel(aes(label=rm), size=2)+
  geom_abline(slope = 1,intercept = 0)+
  coord_fixed()+
  labs(subtitle = "Those above the line are more intensive in remote postings",
       caption = "Cities with more than 1000 job postings",
       y="Share of remote job postings",
       x="Share of all job postings")

Which firms are hiring remotely?

Code

country_var_count(data = south_cone_df, category ="remote",country= "firm") %>% 
  arrange(desc(group_vacancies)) %>% 
  filter(remote==1) %>% 
  mutate(ratio=round(group_in_country_share/group_share,2)) %>% 
  mutate(share=scales::percent(group_in_country_share,accuracy=2),
         group_share=scales::percent(group_share, accuracy=2)) %>% 
  arrange(desc(count)) %>%
  filter(country_vacancies>100) %>% 
  ungroup() %>% 
  select(Firm=firm,`Firm postings`=country_vacancies,`Remote Postings`=count,`% Remote`=share) %>% 
  head(20) %>% 
  kableExtra::kable()

Table 8: Firms hiring remote ot hybrid
Firm	Firm postings	Remote Postings	% Remote
Confidencial	2890	71	2%
Emprego	2447	67	2%
Wurth Argentina S.a	132	24	18%
Mendoza, Capital, Mendoza, Argentina	327	13	4%
Buenos Aires, CABA, Argentina	225	8	4%
Entel Empresa de Contact Center	118	7	6%
ACTIVOS CHILE	448	6	2%
ADN - Recursos Humanos	125	6	4%
Babysits	217	6	2%
Tawa	257	6	2%
Manpower Chile	677	5	0%
ManpowerGroup	308	5	2%
Adecco Chile	678	4	0%
ECRGROUP® Chile	118	3	2%
Emprego CL C2	122	3	2%
Neuquén, Argentina	105	3	2%
Progestion Chile	890	3	0%
Randstad AR	121	3	2%
Cygnus	569	2	0%
Grupo Gestión	259	2	0%

Type II Errors: Classified as non-remote nor hybrid when they are

There are plenty of cases like this. The table below shows the number of type 2 errors we found in Argentinean data:

Code

pattern<-"\\b[hH]\\w*brid"

review<-read_parquet("raw/arg_new_dict.parquet") %>%  
  distinct(firm,descrip,job_name,occupation,remote) %>% 
  filter(str_detect(descrip,pattern))

review %>% 
  mutate(error=!remote) %>% 
  group_by(`Type II Error`=error) %>% 
  summarise(Count=n(),
            Share=n()/nrow(review))%>% 
  mutate(Share=scales::percent(Share, accuracy=2)) %>% 
  kableExtra::kable()

Table 9: Postings mentioning remote or hybrid format but not classified as WFH
Type II Error	Count	Share
FALSE	95	14%
TRUE	553	86%

Below we show the list of some examples:

Code

review %>% 
  head(10) %>% 
  mutate(match_position_1=str_locate(descrip,pattern)[,"start"],
         match_position_2=str_locate(descrip,pattern)[,"end"]) %>% 
  mutate(description_extract=paste0("...",substr(descrip,match_position_1-20,match_position_2+20),"...")) %>% 
  select(Firma=firm,Position=job_name,`WFH piece`=description_extract,`Work from Home`=remote)%>% 
  kableExtra::kable()

Firma	Position	WFH piece	Work from Home
Umbral Capital Humano	Operador de flota propia \| (SJ095)	...itivo. - Modalidad híbrida. - Reales oportun...	FALSE
Umbral Capital Humano	Supervisor de Limpieza - Mendoza, Luján de Cuyo	...odalidad de trabajo híbrida...	FALSE
Umbral Capital Humano	técnicos electromecánicos y electrónicos	...itivo. - Modalidad híbrida. - Reales oportun...	FALSE
Umbral Capital Humano	Ingeniero de procesos mendoza	...odalidad de trabajo híbrida...	FALSE
Umbral Capital Humano	MZ534 Mendoza Operario de Mantenimiento Industrial	...itivo. - Modalidad híbrida. - Reales oportun...	FALSE
Umbral Capital Humano	Promotora - Activacio n en Punto de venta	...itivo. - Modalidad híbrida. - Reales oportun...	FALSE
Camera di Commercio Italiana nella Repubblica Argentina	CDC Personal de Depósito - Zona Oeste	... central. Modalidad Híbrida (2 días en las ofi...	TRUE
Adlatina Group	EMPLEADO DE MOSTRADOR, LOCAL DE SANITARIOS	...cios. El trabajo es híbrido en la zona del cen...	FALSE
Grupo Myth	Administrativo Contable	... 18hs en un esquema hibrido de 3 dias en la of...	TRUE
Camera di Commercio Italiana nella Repubblica Argentina	pasante - ingenieria de produccion	... central. Modalidad Híbrida (2 días en las ofi...	TRUE

Knowledge Jobs

33% of all online vacancies were classified as belonging to knowledge sectors.
Argentina, with a 38% is the country with the highest intensity in these job postings.
The occupational groups these knowledge vacancies belong to sound like occupational groups a knowledge firm will require to function.

Code

area_country_df_1<-country_var_count(data = south_cone_df,
                             category = "area",country="country_code")


country_var_table(area_country_df_1, category = "area",country="country_code",
                  interactive = FALSE)

?(caption)

	area	Vacancies	% of Vacancies	% of ARG	% of CHL	% of URY
	No conocimiento	39266	66.27%	61.67%	69.51%	63.86%
	Conocimiento	19989	33.73%	38.33%	30.49%	36.14%
sum	—	59,255.00	1.00	1.00	1.00	1.00

Code

country_var_chart(area_country_df_1,
                  category = "area",
                  country="country_code")[[2]]+
  scale_fill_manual(values=country_colors)+
  labs(subtitle="Knowledge sector distribution in online vacancies data",
       x=NULL,
       fill=NULL)


country_var_chart(area_country_df_1,
                  category = "area",
                  country="country_code")[[3]]+
  scale_fill_manual(values=country_colors)+
  labs(subtitle="Knowledge sector distribution in online vacancies data",
       x=NULL,
       fill=NULL)

Knowledge jobs across occupations

Code

area_df_2<-country_var_count(data = south_cone_df,
                             category = "area",country="major_group_title")

area_df_3<-rbind(
  country_var_count(data = filter(south_cone_df,country_code=="ARG"),
                             category = "area",country="major_group_title") %>% 
        mutate(country_code="ARG"),
  country_var_count(data = filter(south_cone_df,country_code=="CHL"),
                             category = "area",country="major_group_title")%>% 
        mutate(country_code="CHL"),
  country_var_count(data = filter(south_cone_df,country_code=="URY"),
                             category = "area",country="major_group_title")%>% 
        mutate(country_code="URY")
  )%>% 
  # I don't need the share of area jobs in each country.
  select(-group_vacancies,-group_share) %>% 
  # I want only area shares of the knowledge groups in each country
  filter(area=="Conocimiento") 

area_df_2 %>% 
  filter(area=="Conocimiento") %>% 
  ggplot(aes(x=reorder(str_remove(major_group_title,"Occupations"),-group_in_country_share),
             y=group_in_country_share))+
  geom_col(fill="gray80")+
  geom_line(aes(y=group_share,group=area))+
  geom_text(aes(label=paste(round(group_in_country_share,3)*100,"%")), size=3)+
  theme(axis.text.x = element_text(angle = 65, hjust=1))+
  labs(title = "Share of Knowledge job postings by SOC major group",
       x=NULL)

Code

area_df_3 %>% 
  # attach the share of remote vacancies in each  group 
  left_join(area_df_2 %>% 
              filter(area=="Conocimiento") %>%
              select(major_group_title,share_of_remote_in_group=group_in_country_share,
                     group_share)) %>% 
  # plot the share of remote vacancies in each group, by country
  ggplot(aes(x=reorder(str_remove(major_group_title,"Occupations"),-share_of_remote_in_group),
             y=group_in_country_share))+
  geom_col(aes(fill=country_code))+
  geom_line(aes(y=group_share,group=area))+
  geom_text(aes(label=paste(round(group_in_country_share,3)*100,"%")), size=3)+
  scale_fill_manual(values=country_colors)+
  facet_wrap(vars(country_code),ncol=1) + 
  theme(axis.text.x = element_text(angle = 65, hjust=1))+
  labs(title = "Share of knowledge job postings by SOC major group and country",
       x=NULL,
       y=NULL,
       fill=NULL)

Knowledge jobs across industries

Code

area_sector_df<-country_var_count(data = south_cone_df ,
                  category = "area",country="main_sector")

area_sector_df %>% 
  filter(area=="Conocimiento") %>% 
  ggplot(aes(x=reorder(main_sector,-group_in_country_share),
             y=group_in_country_share))+
  geom_col(fill="gray80")+
  geom_line(aes(y=group_share,group=area))+
  geom_text(aes(label=paste(round(group_in_country_share,3)*100,"%")), size=3)+
  theme(axis.text.x = element_text(angle = 65, hjust=1))+
  labs(title = "Share of Knowledge job postings by Main Sector",
       x=NULL)

Code

area_sector_country_df<-rbind(
  country_var_count(data = filter(south_cone_df ,country_code=="ARG"),
                             category = "area",country="main_sector") %>% 
        mutate(country_code="ARG"),
  country_var_count(data = filter(south_cone_df ,country_code=="CHL"),
                             category = "area",country="main_sector")%>% 
        mutate(country_code="CHL"),
  country_var_count(data = filter(south_cone_df ,country_code=="URY"),
                             category = "area",country="main_sector")%>% 
        mutate(country_code="URY")
  )%>% 
  # I don't need the share of area jobs in each country.
  select(-group_vacancies,-group_share) %>% 
  # I want only area shares of the knowledge groups in each country
  filter(area=="Conocimiento") 

area_sector_country_df %>% 
  # attach the share of remote vacancies in each  group 
  left_join(area_sector_df %>% 
              filter(area=="Conocimiento") %>%
              select(main_sector,share_of_remote_in_group=group_in_country_share,
                     group_share)) %>% 
  # plot the share of remote vacancies in each group, by country
  ggplot(aes(x=reorder(main_sector,-share_of_remote_in_group),
             y=group_in_country_share))+
  geom_col(aes(fill=country_code))+
  geom_line(aes(y=group_share,group=area))+
  geom_text(aes(label=paste(round(group_in_country_share,3)*100,"%")), size=3)+
  scale_fill_manual(values=country_colors)+
  facet_wrap(vars(country_code),ncol=1) + 
  theme(axis.text.x = element_text(angle = 65, hjust=1))+
  labs(title = "Share of knowledge job postings by Main Sector and country",
       x=NULL,
       y=NULL,
       fill=NULL)

Location of Knowledge jobs across regions

Code

country_var_count(data=south_cone_df,category = "rm",country="area") %>% 
  arrange(desc(group_vacancies)) %>% 
  filter(area=="Conocimiento") %>% 
  mutate(ratio=round(group_in_country_share/group_share,2)) %>% 
  mutate(share=scales::percent(group_in_country_share,accuracy=2),
         group_share=scales::percent(group_share, accuracy=2)) %>% 
  arrange(desc(group_vacancies)) %>%
  filter(group_vacancies>500) %>% 
  ungroup() %>% 
  select(Region=rm,`Job postings`=group_vacancies,
         `% in all data`=group_share,
         `% in knowledge jobs`=share,Ratio=ratio) %>% 
  head(15) %>% 
  kableExtra::kable()

?(caption)

Region	Job postings	% in all data	% in knowledge jobs	Ratio
Santiago	13725	24%	20%	0.86
Buenos Aires (GZM)	9313	16%	16%	1.07
Valparaíso	4932	8%	8%	0.88
Concepción	3774	6%	6%	0.93
Rosario	2316	4%	4%	1.14
Gran Temuco	2125	4%	4%	0.91
Coquimbo	1913	4%	2%	0.88
Córdoba	1810	4%	4%	1.17
Mendoza	1734	2%	4%	1.18
Antofagasta	1477	2%	4%	1.27
Puerto Montt	1251	2%	2%	0.97
Metropolitana	1220	2%	2%	1.10
Tarapacá	1168	2%	2%	1.16
Corrientes	1074	2%	2%	1.21
Región Metropolitana Confluencia	889	2%	2%	1.29

Regions

There are three geographic aggregation variables in the data. This is the count of unique, missing, and empty values each of these indicator has.

Code

south_cone_df %>% 
  group_by(country_code) %>% 
  select(rm, city,city_name) %>% 
  skimr::skim() %>% as_tibble() %>% 
  select(-character.min,-character.max) %>% 
  kableExtra::kable()

?(caption)

skim_type	skim_variable	country_code	complete_rate	character.empty	character.n_unique
character	rm	ARG	1	0	24
character	rm	CHL	1	0	18
character	rm	URY	1	0	6
character	city	ARG	1	6	800
character	city	CHL	1	0	239
character	city	URY	1	0	70
character	city_name	ARG	1	0	216
character	city_name	CHL	1	0	201
character	city_name	URY	1	0	48

And this is a list of the most frequent regions of each country

Code

country_var_count(data = south_cone_df, 
                  country = "country_code",
                  category = "rm")%>%
  country_region_table(country = "country_code",
                     category = "rm",top_number = 20)

Table 10: Main regions, by country
rank	ARG	CHL	URY
1	Buenos Aires (GZM), 9313 (40.4%)	Santiago, 13725 (40.2%)	Metropolitana, 1220 (60.2%)
2	Rosario, 2316 (10%)	Valparaíso, 4932 (14.4%)	Este, 347 (17.1%)
3	Córdoba, 1810 (7.8%)	Concepción, 3774 (11%)	Centro, 127 (6.3%)
4	Mendoza, 1734 (7.5%)	Gran Temuco, 2125 (6.2%)	Sur, 117 (5.8%)
5	Corrientes, 1074 (4.7%)	Coquimbo, 1913 (5.6%)	Norte, 111 (5.5%)
6	Región Metropolitana Confluencia, 889 (3.9%)	Antofagasta, 1477 (4.3%)	Noreste, 106 (5.2%)
7	Entre Ríos, 796 (3.5%)	Puerto Montt, 1251 (3.7%)	NA
8	Tucumán, 624 (2.7%)	Tarapacá, 1168 (3.4%)	NA
9	Chaco, 622 (2.7%)	Rancagua, 521 (1.5%)	NA
10	Santiago del Estero, 544 (2.4%)	Copiapó, 399 (1.2%)	NA
11	Valle de Lerma (AMVL), 435 (1.9%)	Valdivia, 396 (1.2%)	NA
12	Chubut, 414 (1.8%)	Calama, 381 (1.1%)	NA
13	San Luis, 376 (1.6%)	Arica-Paranicota, 367 (1.1%)	NA
14	Mar del Plata, 343 (1.5%)	Osorno, 361 (1.1%)	NA
15	San Fernando del Valle de Catamarca, 328 (1.4%)	Punta Arenas, 348 (1%)	NA
16	Posadas-Garupá-Candelaria (AMPGC), 265 (1.1%)	Talca, 347 (1%)	NA
17	San Salvador de Jujuy, 259 (1.1%)	Chillán, 339 (1%)	NA
18	Bahía Blanca, 250 (1.1%)	Curicó, 339 (1%)	NA
19	VIRCH-Valdés, 193 (0.8%)	NA	NA
20	La Rioja, 183 (0.8%)	NA	NA

Cities in Argentina

This is a look at the most frequent values of ‘city’ and ‘city_name’ in Argentina:

city captures lots of company names and shows low levels of detail within Buenos Aires and other regions.
I haven’t seen any company names within ‘city_name’ values. It looks like the best variable to use.

Code

## ya vimos que tiene problemas
# region_count(data = south_cone_df %>% 
#   filter(country_code=="ARG") %>% 
#   inner_join(south_cone_df %>% 
#                group_by(country_code,rm) %>% 
#                summarise(count=n()) %>% 
#                top_n(5,count)),
#   country = "rm",
#   region="city",
#   top_number = 20)

south_cone_df %>%
               filter(country_code=="ARG") %>%
               group_by(country_code,rm) %>%
               summarise(count_vacancies=n()) %>%
               top_n(10,count_vacancies) %>%
  arrange(desc(count_vacancies)) %>%
  inner_join(south_cone_df %>%
  distinct(rm,city_name) %>%
  group_by(rm) %>%
  summarise(count_cities=n()) ) %>%
  ungroup() %>% 
  select(`Región Metropolitana`=rm,
         `Vacantes`=count_vacancies,
         Ciudaddes=count_cities) %>% 
  kableExtra::kable()
country_var_count(data = south_cone_df %>% 
                    inner_join(south_cone_df %>%
                    filter(country_code=="ARG") %>% 
                    group_by(country_code,rm) %>% 
                    summarise(count=n()) %>% 
                    top_n(5,count)), 
                  country = "rm",
                  category = "city_name") %>%
  country_region_table(country = "rm",
                  category = "city_name",
                  top_number = 20)

true

Región Metropolitana	Vacantes	Ciudaddes
Buenos Aires (GZM)	9313	42
Rosario	2316	25
Córdoba	1810	50
Mendoza	1734	14
Corrientes	1074	9
Región Metropolitana Confluencia	889	11
Entre Ríos	796	14
Tucumán	624	10
Chaco	622	3
Santiago del Estero	544	5

rank	Buenos Aires (GZM)	Corrientes	Córdoba	Mendoza	Rosario
1	Buenos Aires, 521 (5.6%)	Corrientes, 306 (28.5%)	Capital, 450 (24.9%)	Mendoza, 345 (19.9%)	Santa Fe, 394 (17%)
2	Vicente López, 417 (4.5%)	Ituzaingó, 276 (25.7%)	Córdoba, 209 (11.5%)	Mendoza Capital, 298 (17.2%)	Rosario, 362 (15.6%)
3	Quilmes, 390 (4.2%)	Bella Vista, 189 (17.6%)	Río Cuarto, 175 (9.7%)	Luján de Cuyo, 217 (12.5%)	San Justo, 257 (11.1%)
4	San Isidro, 349 (3.7%)	Mercedes, 176 (16.4%)	Alta Gracia, 151 (8.3%)	Godoy Cruz, 205 (11.8%)	Granadero Baigorria, 159 (6.9%)
5	La Matanza, 338 (3.6%)	Santo Tomé, 53 (4.9%)	Monte Cristo, 89 (4.9%)	Maipú, 162 (9.3%)	Villa Gobernador Gálvez, 156 (6.7%)
6	Zárate, 336 (3.6%)	Goya, 45 (4.2%)	Malagueño, 83 (4.6%)	San Rafael, 137 (7.9%)	Arroyo Seco, 153 (6.6%)
7	Morón, 332 (3.6%)	Paso de los Libres, 14 (1.3%)	Colón, 73 (4%)	San Martín, 111 (6.4%)	Alvear, 144 (6.2%)
8	La Plata, 331 (3.6%)	Curuzú Cuatiá, 12 (1.1%)	Oncativo, 67 (3.7%)	Las Heras, 68 (3.9%)	Pérez, 140 (6%)
9	Avellaneda, 324 (3.5%)	Monte Caseros, 3 (0.3%)	Villa Allende, 67 (3.7%)	Guaymallén, 60 (3.5%)	Álvarez, 136 (5.9%)
10	General San Martín, 322 (3.5%)	NA	Villa Carlos Paz, 55 (3%)	Lavalle, 60 (3.5%)	Funes, 103 (4.4%)
11	Ezeiza, 296 (3.2%)	NA	Jesús María, 47 (2.6%)	Tupungato, 29 (1.7%)	La Capital, 77 (3.3%)
12	Campana, 294 (3.2%)	NA	La Calera, 46 (2.5%)	Tunuyán, 23 (1.3%)	Villa Constitución, 53 (2.3%)
13	Lanús, 279 (3%)	NA	Cosquín, 25 (1.4%)	General Alvear, 10 (0.6%)	General Lagos, 46 (2%)
14	Lomas de Zamora, 275 (3%)	NA	Sinsacate, 23 (1.3%)	Malargüe, 9 (0.5%)	San Lorenzo, 42 (1.8%)
15	Almirante Brown, 270 (2.9%)	NA	Mendiolaza, 21 (1.2%)	NA	Capitán Bermúdez, 29 (1.3%)
16	Isidro Casanova, 267 (2.9%)	NA	Estación Juárez Celman, 20 (1.1%)	NA	Pueblo Esther, 17 (0.7%)
17	Tigre, 251 (2.7%)	NA	Juárez Celman, 18 (1%)	NA	Fray Luis Beltrán, 16 (0.7%)
18	Esteban Echeverría, 246 (2.6%)	NA	Unquillo, 16 (0.9%)	NA	Castellanos, 11 (0.5%)
19	Luján, 245 (2.6%)	NA	Cruz del Eje, 15 (0.8%)	NA	Soldini, 6 (0.3%)
20	Escobar, 244 (2.6%)	NA	Calamuchita, 14 (0.8%)	NA	Pavón, 4 (0.2%)

Cities in Chile

This is a look at the most frequent values of ‘city’ and ‘city_name’ in Chile:

city evidently has low levels of details within Santiago and Valparaiso Regions.
city_name has more granularity, but there are lots of cases where it defaults to region name (when it doesn’t guess a city name, imputes the Region name)

Code

## Vimos que city_name es mejor
# region_count(data = south_cone_df %>% 
#   filter(country_code=="CHL") %>% 
#   inner_join(south_cone_df %>% 
#                group_by(country_code,rm) %>% 
#                summarise(count=n()) %>% 
#                top_n(5,count)),
#   country = "rm",
#   region="city",
#   top_number = 20)

south_cone_df %>%
               filter(country_code=="CHL") %>%
               group_by(country_code,rm) %>%
               summarise(count_vacancies=n()) %>%
               top_n(10,count_vacancies) %>%
  arrange(desc(count_vacancies)) %>%
  inner_join(south_cone_df %>%
  distinct(rm,city_name) %>%
  group_by(rm) %>%
  summarise(count_cities=n()) ) %>%
  ungroup() %>% 
  select(`Región Metropolitana`=rm,
         `Vacantes`=count_vacancies,
         Ciudaddes=count_cities) %>% 
  kableExtra::kable()

Región Metropolitana	Vacantes	Ciudaddes
Santiago	13725	52
Valparaíso	4932	38
Concepción	3774	33
Gran Temuco	2125	32
Coquimbo	1913	15
Antofagasta	1477	8
Puerto Montt	1251	4
Tarapacá	1168	7
Rancagua	521	2
Copiapó	399	1

Code

country_var_count(data = south_cone_df %>% 
                    inner_join(south_cone_df %>%
                    filter(country_code=="CHL") %>% 
                    group_by(country_code,rm) %>% 
                    summarise(count=n()) %>% 
                    top_n(5,count)), 
                  country = "rm",
                  category = "city_name") %>%
  country_region_table(country = "rm",
                  category = "city_name",
                  top_number = 20)

rank	Concepción	Coquimbo	Gran Temuco	Santiago	Valparaíso
1	Concepción, 470 (12.5%)	Coquimbo, 362 (18.9%)	Temuco, 404 (19%)	Ñuñoa, 497 (3.6%)	Viña del Mar, 322 (6.5%)
2	Chiguayante, 435 (11.5%)	Ovalle, 293 (15.3%)	Angol, 229 (10.8%)	Huechuraba, 469 (3.4%)	Valparaíso, 297 (6%)
3	Los Ángeles, 330 (8.7%)	La Serena, 281 (14.7%)	Villarrica, 197 (9.3%)	Renca, 441 (3.2%)	Los Andes, 290 (5.9%)
4	Talcahuano, 302 (8%)	Salamanca, 221 (11.6%)	Pucón, 184 (8.7%)	Pudahuel, 419 (3.1%)	Quilpué, 278 (5.6%)
5	Coronel, 280 (7.4%)	Illapel, 197 (10.3%)	Victoria, 179 (8.4%)	Santiago, 416 (3%)	San Antonio, 277 (5.6%)
6	Hualpén, 240 (6.4%)	Los Vilos, 137 (7.2%)	Lautaro, 150 (7.1%)	Quilicura, 405 (3%)	La Calera, 275 (5.6%)
7	San Pedro de la Paz, 211 (5.6%)	Monte Patria, 107 (5.6%)	Nueva Imperial, 98 (4.6%)	Lampa, 379 (2.8%)	Quillota, 257 (5.2%)
8	Penco, 200 (5.3%)	Canela, 103 (5.4%)	Pitrufquén, 94 (4.4%)	San Bernardo, 377 (2.7%)	San Felipe, 250 (5.1%)
9	Tomé, 175 (4.6%)	Vicuña, 63 (3.3%)	Gorbea, 56 (2.6%)	San Joaquín, 364 (2.7%)	Concón, 234 (4.7%)
10	Lota, 159 (4.2%)	Río Hurtado, 54 (2.8%)	Loncoche, 52 (2.4%)	Cerrillos, 361 (2.6%)	Casablanca, 223 (4.5%)
11	Curanilahue, 132 (3.5%)	Andacollo, 35 (1.8%)	Padre Las Casas, 52 (2.4%)	Colina, 350 (2.6%)	Limache, 221 (4.5%)
12	Lebu, 119 (3.2%)	Punitaqui, 24 (1.3%)	Collipulli, 49 (2.3%)	La Florida, 349 (2.5%)	Quintero, 219 (4.4%)
13	Cabrero, 103 (2.7%)	Combarbalá, 16 (0.8%)	Freire, 46 (2.2%)	La Reina, 341 (2.5%)	Puchuncaví, 187 (3.8%)
14	Cañete, 99 (2.6%)	La Higuera, 12 (0.6%)	Traiguén, 42 (2%)	Recoleta, 341 (2.5%)	Cartagena, 158 (3.2%)
15	Arauco, 91 (2.4%)	Paihuano, 8 (0.4%)	Carahue, 41 (1.9%)	Las Condes, 334 (2.4%)	Villa Alemana, 155 (3.1%)
16	Nacimiento, 76 (2%)	NA	Cholchol, 33 (1.6%)	Macul, 324 (2.4%)	El Quisco, 129 (2.6%)
17	Mulchén, 50 (1.3%)	NA	Curacautín, 28 (1.3%)	Vitacura, 316 (2.3%)	Olmué, 124 (2.5%)
18	Santa Juana, 40 (1.1%)	NA	Renaico, 27 (1.3%)	San Miguel, 311 (2.3%)	Llay-Llay, 121 (2.5%)
19	Laja, 37 (1%)	NA	Cunco, 24 (1.1%)	Maipú, 304 (2.2%)	Algarrobo, 108 (2.2%)
20	Florida, 33 (0.9%)	NA	Vilcún, 23 (1.1%)	Melipilla, 293 (2.1%)	Nogales, 84 (1.7%)

Cities in Uruguay

This is a look at the most frequent values of ‘city’ and ‘city_name’ in Uruguay:

city doens’t look as bad as in Argentina and Chile.
city_name offers more granularity within region “Metropolitana”

Code

## Vimos que city name es mejor
# region_count(data = south_cone_df %>% 
#   filter(country_code=="URY") %>% 
#   inner_join(south_cone_df %>% 
#                group_by(country_code,rm) %>% 
#                summarise(count=n()) %>% 
#                top_n(5,count)),
#   country = "rm",
#   region="city",
#   top_number = 20)

south_cone_df %>%
               filter(country_code=="URY") %>%
               group_by(country_code,rm) %>%
               summarise(count_vacancies=n()) %>%
               top_n(10,count_vacancies) %>%
  arrange(desc(count_vacancies)) %>%
  inner_join(south_cone_df %>%
  distinct(rm,city_name) %>%
  group_by(rm) %>%
  summarise(count_cities=n()) ) %>%
  ungroup() %>% 
  select(`Región Metropolitana`=rm,
         `Vacantes`=count_vacancies,
         Ciudaddes=count_cities) %>% 
  kableExtra::kable()

Región Metropolitana	Vacantes	Ciudaddes
Metropolitana	1220	23
Este	347	7
Centro	127	4
Sur	117	7
Norte	111	3
Noreste	106	4

Code

country_var_count(data = south_cone_df %>% 
                    inner_join(south_cone_df %>%
                    filter(country_code=="URY") %>% 
                    group_by(country_code,rm) %>% 
                    summarise(count=n()) %>% 
                    top_n(5,count)), 
                  country = "rm",
                  category = "city_name") %>%
  country_region_table(country = "rm",
                  category = "city_name",
                  top_number = 20)

rank	Centro	Este	Metropolitana	Norte	Sur
1	Florida, 77 (60.6%)	Maldonado, 177 (51%)	Montevideo, 388 (31.8%)	Salto, 54 (48.6%)	Colonia, 60 (51.3%)
2	Durazno, 24 (18.9%)	Punta del Este, 83 (23.9%)	Ciudad de la Costa, 151 (12.4%)	Paysandú, 52 (46.8%)	Dolores, 13 (11.1%)
3	Flores, 19 (15%)	Minas, 22 (6.3%)	Canelones, 115 (9.4%)	Artigas, 5 (4.5%)	Río Negro, 13 (11.1%)
4	Trinidad, 7 (5.5%)	San Carlos, 22 (6.3%)	Las Piedras, 114 (9.3%)	NA	Soriano, 11 (9.4%)
5	NA	Rocha, 16 (4.6%)	Progreso, 98 (8%)	NA	Colonia del Sacramento, 8 (6.8%)
6	NA	Treinta y Tres, 16 (4.6%)	18 de Mayo, 66 (5.4%)	NA	Fray Bentos, 7 (6%)
7	NA	Lavalleja, 11 (3.2%)	Paso Carrasco, 59 (4.8%)	NA	Mercedes, 5 (4.3%)
8	NA	NA	Barros Blancos, 55 (4.5%)	NA	NA
9	NA	NA	Santa Lucía, 34 (2.8%)	NA	NA
10	NA	NA	La Paz, 33 (2.7%)	NA	NA
11	NA	NA	Pando, 19 (1.6%)	NA	NA
12	NA	NA	San José, 16 (1.3%)	NA	NA
13	NA	NA	Toledo, 16 (1.3%)	NA	NA
14	NA	NA	Joaquín Suárez, 12 (1%)	NA	NA
15	NA	NA	Ciudad del Plata, 11 (0.9%)	NA	NA
16	NA	NA	Atlántida, 7 (0.6%)	NA	NA
17	NA	NA	Salinas, 7 (0.6%)	NA	NA
18	NA	NA	Libertad, 6 (0.5%)	NA	NA
19	NA	NA	General Líber Seregni, 4 (0.3%)	NA	NA
20	NA	NA	Parque del Plata, 3 (0.2%)	NA	NA
21	NA	NA	Tala, 3 (0.2%)	NA	NA

Firms

How many firms are in each country?5900 in Argentina, 9400 in Chile, and 789 in Uruguay.

Code

south_cone_df %>% 
  group_by(country_code) %>% 
  select(firm) %>% 
  skimr::skim() %>% 
  as_tibble() %>% 
  select(-character.min,-character.max,-skim_type) %>% 
  kableExtra::kable()

Table 11: Firms by country
skim_variable	country_code	complete_rate	character.empty	character.n_unique
firm	ARG	1	10	5983
firm	CHL	1	0	9431
firm	URY	1	0	789

Which are the most important firms across countries and regions?

Emprego en Argentina, Confidencial en Chile, Gallito Trabajo en Uruguay.
HR agencies seem to represent most of the postings (at least this month).
There are many cases where they list the place of the vacancy instead of the company. Mostly in Argentina.

Code

country_var_count(data = south_cone_df, 
                  country = "country_code",
                  category = "firm") %>%
  country_region_table(country = "country_code",
                  category = "firm",
                  top_number = 10)

?(caption)

rank	ARG	CHL	URY
1	Emprego, 2447 (10.6%)	Confidencial, 2130 (6.2%)	Gallito Trabajo, 88 (4.3%)
2	Confidencial, 748 (3.2%)	Progestion Chile, 890 (2.6%)	ManpowerGroup, 61 (3%)
3	Mendoza, Capital, Mendoza, Argentina, 327 (1.4%)	Adecco Chile, 678 (2%)	Inclusion Cloud, 48 (2.4%)
4	Adecco Argentina S.A., 261 (1.1%)	Manpower Chile, 677 (2%)	Superprof, 33 (1.6%)
5	Grupo Gestión, 259 (1.1%)	Fundación Integra, 575 (1.7%)	Aldeas Infantiles SOS Uruguay, 30 (1.5%)
6	ManpowerGroup, 230 (1%)	Cygnus, 569 (1.7%)	Advice, 26 (1.3%)
7	Buenos Aires, CABA, Argentina, 225 (1%)	ACTIVOS CHILE, 448 (1.3%)	Adecco, 25 (1.2%)
8	Tusclases, 214 (0.9%)	Eurofirms Chile, 432 (1.3%)	confidential, 24 (1.2%)
9	LatinHire, 198 (0.9%)	XinerLink, 416 (1.2%)	Randstad Uruguay, 22 (1.1%)
10	Wurth Argentina S.a, 132 (0.6%)	Walmart Chile, 296 (0.9%)	Securitas Uruguay, 22 (1.1%)

Which are the most important firms in the most demanded roles:

Code

top_occupations<-c(
  # "Architecture and Engineering Occupations",
  "Sales and Related Occupations",
  # "Healthcare Practitioners and Technical Occupations",
  "Computer and Mathematical Occupations",
  "Business and Financial Operations Occupations",
  "Office and Administrative Support Occupations"
  )


country_var_count(data = south_cone_df %>% 
                    filter(major_group_title %in% top_occupations) %>% 
                    mutate(major_group_title=str_remove(major_group_title," Occupations")), 
                  country = "major_group_title",
                  category = "firm") %>%
  country_region_table(country = "major_group_title",
                  category = "firm",
                  top_number = 10)

rank	Business and Financial Operations	Computer and Mathematical	Office and Administrative Support	Sales and Related
1	Emprego, 251 (5.3%)	Emprego, 116 (4.6%)	Confidencial, 438 (4.9%)	Confidencial, 453 (4.8%)
2	Confidencial, 212 (4.5%)	Confidencial, 96 (3.8%)	Emprego, 417 (4.6%)	Emprego, 440 (4.7%)
3	Progestion Chile, 51 (1.1%)	Recruiting from Scratch, 37 (1.5%)	Progestion Chile, 124 (1.4%)	Progestion Chile, 176 (1.9%)
4	Adecco Chile, 47 (1%)	Buenos Aires, CABA, Argentina, 36 (1.4%)	Adecco Chile, 112 (1.2%)	Adecco Chile, 113 (1.2%)
5	Mendoza, Capital, Mendoza, Argentina, 45 (1%)	Mendoza, Capital, Mendoza, Argentina, 34 (1.4%)	Fundación Integra, 107 (1.2%)	Manpower Chile, 108 (1.1%)
6	Buenos Aires, CABA, Argentina, 40 (0.9%)	Fundación Integra, 25 (1%)	Cygnus, 101 (1.1%)	Cygnus, 89 (0.9%)
7	Manpower Chile, 37 (0.8%)	Progestion Chile, 24 (1%)	Manpower Chile, 100 (1.1%)	Fundación Integra, 83 (0.9%)
8	ACTIVOS CHILE, 34 (0.7%)	Manpower Chile, 23 (0.9%)	ACTIVOS CHILE, 67 (0.7%)	XinerLink, 70 (0.7%)
9	Cygnus, 33 (0.7%)	Adecco Chile, 20 (0.8%)	Eurofirms Chile, 63 (0.7%)	Eurofirms Chile, 56 (0.6%)
10	Adecco Argentina S.A., 28 (0.6%)	Eurofirms Chile, 18 (0.7%)	XinerLink, 59 (0.7%)	ManpowerGroup, 55 (0.6%)
11	Fundación Integra, 28 (0.6%)	Inclusion Cloud, 18 (0.7%)	NA	NA

Which are the most important firms in the most active sectors:

Code

top_sectors<-c("Professional Scientific And Technical Services",
"Finance And Insurance","Retail Trade","Manufacturing")

country_var_count(data = south_cone_df %>% 
                    filter(main_sector %in% top_sectors), 
                  country = "main_sector",
                  category = "firm") %>%
  country_region_table(country = "main_sector",
                  category = "firm",
                  top_number = 10)

rank	Finance And Insurance	Manufacturing	Professional Scientific And Technical Services	Retail Trade
1	Emprego, 157 (5.2%)	Confidencial, 539 (4.9%)	Emprego, 402 (4.7%)	Confidencial, 485 (5.3%)
2	Confidencial, 128 (4.3%)	Emprego, 483 (4.4%)	Confidencial, 367 (4.3%)	Emprego, 340 (3.7%)
3	Mendoza, Capital, Mendoza, Argentina, 35 (1.2%)	Adecco Chile, 168 (1.5%)	Progestion Chile, 97 (1.1%)	Progestion Chile, 197 (2.1%)
4	Progestion Chile, 33 (1.1%)	Manpower Chile, 162 (1.5%)	Buenos Aires, CABA, Argentina, 95 (1.1%)	Adecco Chile, 113 (1.2%)
5	Adecco Chile, 31 (1%)	Progestion Chile, 149 (1.4%)	Adecco Chile, 90 (1.1%)	Manpower Chile, 108 (1.2%)
6	Cygnus, 23 (0.8%)	ACTIVOS CHILE, 121 (1.1%)	Manpower Chile, 83 (1%)	Cygnus, 106 (1.2%)
7	Buenos Aires, CABA, Argentina, 22 (0.7%)	Cygnus, 110 (1%)	Mendoza, Capital, Mendoza, Argentina, 81 (1%)	Fundación Integra, 106 (1.2%)
8	Manpower Chile, 22 (0.7%)	Fundación Integra, 85 (0.8%)	Fundación Integra, 65 (0.8%)	Eurofirms Chile, 74 (0.8%)
9	Fundación Integra, 21 (0.7%)	XinerLink, 81 (0.7%)	Eurofirms Chile, 56 (0.7%)	ACTIVOS CHILE, 71 (0.8%)
10	ManpowerGroup, 21 (0.7%)	Eurofirms Chile, 77 (0.7%)	Recruiting from Scratch, 56 (0.7%)	XinerLink, 71 (0.8%)

How concentrated are online vacancies within firms across different regions?

Code

south_cone_df %>% 
  group_by(country_code,firm) %>% 
  summarise(count=n()) %>% 
  group_by(country_code) %>%
  mutate(share=count/sum(count)) %>% 
  arrange(country_code,desc(count)) %>% 
  mutate(cum_share=cumsum(share),
         rank=row_number()) %>% 
  top_n(100,-rank) %>% 
  ggplot(aes(x=rank,y=cum_share,group=country_code,,color=country_code))+
  geom_point()+
  geom_line()+
  scale_color_manual(values=country_colors)+
  geom_label_repel(aes(label=ifelse(rank %in% c(1,2,20, 50, 70, 100),substr(firm,1,20),NA)),size=2)+
  labs(title="Cummulative share of vacancies by in 100 largest firms",
       subtitle="Vacancies in Chile and Argentina are similarly\nconcentrated within the first 100 firms",
       color=NULL,
       y="Share of country vacancies",
       x="Firm ranking  (from largest to smallest)")

Representativity Assessment (Work in progress)

Which occupations and sectors are over(under)represented in each country? We’ll compare vacancies data to Employment estimates in employment or household surveys to figure it out.

Comparing against ILOSTAT data by occupation

ILOSTAT data contains tables of employment at the ISCO 08 2-digits level. Samples for Chile and Uruguay managed to classify all occupations from the original surveys, while the Argentina’s failed to assign an ISCO 08 code to 16% of employment in the original sample.

Code

## Load ilostat data
ilostat_isco08<-read_csv("data/latest_country_isco08_2d_2022.csv") %>% 
  filter(isco08_2d!="TOTAL")

## load crosswalk
isco08_soc10_crosswalk<-read_csv("raw/catalogs_and_crosswalks/isco_soc.csv") %>% 
  janitor::clean_names()

The table below shows the correlation between employment and online job vacancies distributions.

Code

correlations() %>% 
  gt() %>% 
  fmt_percent('estimate') %>% 
  tab_header(title="Correlation between employment and online vacancies distributions",
             subtitle="Estimates correspond to Pearson's correlation coefficietns")

estimate	statistic	group
Correlation between employment and online vacancies distributions
Estimates correspond to Pearson's correlation coefficietns
41.28%	3.625998	Total
45.13%	2.261776	ARG
42.50%	2.099956	CHL
41.26%	2.025881	URY

These tables show the detailed distributions behind these correlations.

Regular comparisson

Code

table_comparisson(data = ,country = NULL)

Table 12:
Comparisson of employment and postigns distribution
major_group_title	Share of employment	Share of online vacancies	%Vacancies-%Employment Ratio
Comparing employment and online vacancies distributions
All countries
Architecture and Engineering Occupations	0.94%	5.21%	5.5567533
Sales and Related Occupations	3.79%	15.94%	4.2102031
Healthcare Practitioners and Technical Occupations	1.02%	3.75%	3.6663749
Computer and Mathematical Occupations	1.39%	4.22%	3.0369834
Business and Financial Operations Occupations	4.49%	7.94%	1.7667467
Office and Administrative Support Occupations	8.64%	15.17%	1.7549611
Protective Service Occupations	1.80%	3.05%	1.6953629
Production Occupations	5.81%	8.80%	1.5144336
Installation, Maintenance, and Repair Occupations	2.90%	3.76%	1.2938168
Management Occupations	5.68%	6.59%	1.1601441
Life, Physical, and Social Science Occupations	2.25%	2.33%	1.0335882
Educational Instruction and Library Occupations	3.37%	3.14%	0.9324198
Healthcare Support Occupations	1.76%	1.41%	0.8015756
Transportation and Material Moving Occupations	9.45%	7.25%	0.7678933
Food Preparation and Serving Related Occupations	4.36%	2.70%	0.6198655
Legal Occupations	0.39%	0.22%	0.5593416
Arts, Design, Entertainment, Sports, and Media Occupations	3.12%	1.57%	0.5012117
Personal Care and Service Occupations	2.80%	1.37%	0.4873648
Building and Grounds Cleaning and Maintenance Occupations	5.18%	2.19%	0.4225617
Construction and Extraction Occupations	6.80%	2.55%	0.3756950
Community and Social Service Occupations	3.12%	0.50%	0.1600554
Farming, Fishing, and Forestry Occupations	3.90%	0.35%	0.0904473
Military Specific Occupations	0.10%	NA	NA
Not ISCO classified	16.94%	NA	NA

Code

table_comparisson(data = major_occupation_country_emp,country = "ARG")

major_group_title	Share of employment	Share of online vacancies	%Vacancies-%Employment Ratio
Comparing employment and online vacancies distributions
For ARG
Architecture and Engineering Occupations	0.60%	6.23%	10.35614233
Computer and Mathematical Occupations	0.79%	5.06%	6.39520094
Sales and Related Occupations	3.18%	16.36%	5.13873248
Healthcare Practitioners and Technical Occupations	0.82%	3.24%	3.95113856
Business and Financial Operations Occupations	3.64%	11.00%	3.02030193
Installation, Maintenance, and Repair Occupations	2.19%	4.99%	2.27373478
Legal Occupations	0.14%	0.27%	1.91762454
Office and Administrative Support Occupations	9.11%	14.77%	1.62119261
Life, Physical, and Social Science Occupations	1.41%	2.22%	1.56959156
Production Occupations	5.32%	8.11%	1.52348341
Protective Service Occupations	1.49%	2.10%	1.40983336
Educational Instruction and Library Occupations	2.62%	3.52%	1.33973535
Management Occupations	5.34%	6.70%	1.25503176
Arts, Design, Entertainment, Sports, and Media Occupations	2.03%	2.15%	1.06075969
Healthcare Support Occupations	1.53%	1.02%	0.66777677
Personal Care and Service Occupations	2.16%	1.35%	0.62514740
Construction and Extraction Occupations	5.85%	2.99%	0.51043367
Food Preparation and Serving Related Occupations	4.23%	2.14%	0.50510655
Transportation and Material Moving Occupations	8.62%	4.14%	0.47980490
Building and Grounds Cleaning and Maintenance Occupations	3.83%	1.16%	0.30259148
Farming, Fishing, and Forestry Occupations	2.08%	0.30%	0.14349291
Community and Social Service Occupations	2.40%	0.17%	0.07215078
Not ISCO classified	30.58%	NA	NA

Code

table_comparisson(data = major_occupation_country_emp, country = "CHL")

major_group_title	Share of employment	Share of online vacancies	%Vacancies-%Employment Ratio
Comparing employment and online vacancies distributions
For CHL
Sales and Related Occupations	4.64%	15.63%	3.36787179
Healthcare Practitioners and Technical Occupations	1.29%	4.23%	3.26505462
Architecture and Engineering Occupations	1.40%	4.55%	3.25307830
Office and Administrative Support Occupations	7.22%	15.36%	2.12709996
Protective Service Occupations	2.19%	3.74%	1.70306387
Computer and Mathematical Occupations	2.22%	3.42%	1.54277627
Production Occupations	6.37%	9.39%	1.47418350
Management Occupations	6.27%	6.64%	1.05882171
Business and Financial Operations Occupations	5.67%	5.82%	1.02631875
Transportation and Material Moving Occupations	10.24%	9.46%	0.92350870
Healthcare Support Occupations	2.01%	1.71%	0.85187665
Installation, Maintenance, and Repair Occupations	3.80%	2.91%	0.76539088
Life, Physical, and Social Science Occupations	3.42%	2.44%	0.71363564
Food Preparation and Serving Related Occupations	4.61%	3.08%	0.66863742
Educational Instruction and Library Occupations	4.25%	2.75%	0.64809475
Building and Grounds Cleaning and Maintenance Occupations	6.58%	2.91%	0.44253335
Personal Care and Service Occupations	3.58%	1.33%	0.37077225
Construction and Extraction Occupations	7.95%	2.20%	0.27700706
Arts, Design, Entertainment, Sports, and Media Occupations	4.58%	1.14%	0.24942855
Legal Occupations	0.68%	0.17%	0.24581679
Community and Social Service Occupations	4.02%	0.73%	0.18198460
Farming, Fishing, and Forestry Occupations	6.02%	0.39%	0.06469844
Military Specific Occupations	0.19%	NA	NA
Not ISCO classified	0.81%	NA	NA

Code

table_comparisson(data = major_occupation_country_emp,country = "URY")

major_group_title	Share of employment	Share of online vacancies	%Vacancies-%Employment Ratio
Comparing employment and online vacancies distributions
For URY
Computer and Mathematical Occupations	1.41%	7.89%	5.59560888
Architecture and Engineering Occupations	0.98%	4.64%	4.70884645
Sales and Related Occupations	3.75%	16.57%	4.42004178
Business and Financial Operations Occupations	4.56%	8.73%	1.91473729
Healthcare Practitioners and Technical Occupations	1.09%	1.48%	1.36309776
Office and Administrative Support Occupations	12.87%	16.42%	1.27614057
Educational Instruction and Library Occupations	4.31%	5.47%	1.27073174
Installation, Maintenance, and Repair Occupations	3.47%	4.09%	1.17921679
Protective Service Occupations	2.06%	2.42%	1.17537027
Production Occupations	6.47%	6.66%	1.02831731
Management Occupations	4.99%	4.34%	0.86954934
Life, Physical, and Social Science Occupations	2.32%	1.73%	0.74346507
Food Preparation and Serving Related Occupations	3.98%	2.76%	0.69387131
Personal Care and Service Occupations	3.56%	2.27%	0.63723797
Legal Occupations	0.66%	0.39%	0.59995729
Arts, Design, Entertainment, Sports, and Media Occupations	3.57%	2.02%	0.56672538
Transportation and Material Moving Occupations	11.42%	5.52%	0.48340585
Construction and Extraction Occupations	7.75%	3.55%	0.45799856
Healthcare Support Occupations	2.13%	0.69%	0.32365365
Building and Grounds Cleaning and Maintenance Occupations	7.92%	1.73%	0.21780797
Community and Social Service Occupations	3.70%	0.30%	0.07990463
Farming, Fishing, and Forestry Occupations	6.25%	0.35%	0.05519691
Military Specific Occupations	0.42%	NA	NA
Not ISCO classified	0.36%	NA	NA

These charts give you a straigthforward view:

Code

major_occupation_emp%>% 
  ggplot(aes(x=employment_share,y=group_in_country_share))+
  geom_point()+
  ggrepel::geom_label_repel(aes(label=ifelse(rank(gap)>16 | rank(gap)<4, str_remove(major_group_title," Occupations"),NA)),
                            size=3)+
  coord_fixed()+
  geom_smooth(method="lm",se = FALSE, aes(linetype="OLS"))+
  geom_abline(aes(slope=1,intercept=0),alpha=0.2)+
  labs(x = "Share of employment (ILOSTATS)",
       y = "Share of Online Vacancies (IDB)",
       linetype=NULL)

Code

major_occupation_country_emp%>% 
  ggplot(aes(x=employment_share,y=group_in_country_share))+
  geom_abline(aes(slope=1,intercept=0),alpha=0.2)+
  geom_point(aes(color=country_code))+
  facet_wrap(vars(country_code))+
  ggrepel::geom_label_repel(aes(label=ifelse(rank(gap)>55 | rank(gap)<8, str_remove(major_group_title," Occupations"),NA)),
                            size=3)+
  scale_color_manual(values=country_colors)+
  labs(x = "Share of employment (ILOSTATS)",
       y = "Share of Online Vacancies (IDB)",
       color="Country",
       linetype=NULL)

These charts shows the change in rankings from one database to the other:

Rank comparisson

Code

rank_comparisson(data=major_occupation_emp, 
                             category="major_group_title", 
                             country=NULL)

Code

rank_comparisson(data=major_occupation_country_emp, 
                             category="major_group_title", 
                             country="ARG")

Code

rank_comparisson(data=major_occupation_country_emp, 
                             category="major_group_title", 
                             country="CHL")

Code

rank_comparisson(data=major_occupation_country_emp, 
                             category="major_group_title", 
                             country="URY")

Comparing against ILOSTAT data by sector

Code

sector_emp<-latest_country_EC2d  %>% 
  left_join(naics_isic_2d_fixed ,by=c("EC2d"="isic_code")) %>% 
  # there are many naics for the same isic. we avoid double counting by splitting employment
  group_by(country_code,EC2d) %>% 
    mutate(n_naics_outputs=n()) %>% 
  # we calculate employment by country and naics.
  group_by(naics_2d, naics_2d_desc) %>% 
  summarise(employment=sum(obs_value/n_naics_outputs,na.rm = T)) %>%
  # we calculate employment share
  ungroup() %>% 
    mutate(employment_share=employment/sum(employment)) %>% 
  # format variable in a specific way in order to match bid data
  mutate(main_sector=str_remove_all(naics_2d_desc,","),
         main_sector=str_remove_all(main_sector,"[()]"),
         main_sector=str_to_title(main_sector)) 

sector_country_emp<-latest_country_EC2d  %>% 
  left_join(naics_isic_2d_fixed ,by=c("EC2d"="isic_code")) %>% 
  # there are many naics for the same isic. we avoid double counting by splitting employment
  group_by(country_code,EC2d) %>% 
    mutate(n_naics_outputs=n()) %>% 
  # we calculate employment by country and naics.
  group_by(country_code,naics_2d, naics_2d_desc) %>% 
    summarise(employment=sum(obs_value/n_naics_outputs,na.rm = T)) %>%
  # we calculate employment share
  group_by(country_code) %>% 
    mutate(employment_share=employment/sum(employment)) %>% 
    ungroup() %>% 
  # format variable in a specific way in order to match bid data
  mutate(main_sector=str_remove_all(naics_2d_desc,","),
         main_sector=str_remove_all(main_sector,"[()]"),
         main_sector=str_to_title(main_sector))

c('total in ILOSTAT'=sum(latest_country_EC2d$obs_value,na.rm = T),
  'total after crosswalk'=sum(sector_emp$employment),
  'total after crosswalk (country)'=sum(sector_country_emp$employment))


sector_country_emp<-sector_country_emp %>% 
  left_join(country_var_count(data = south_cone_df,
                  country = 'country_code',
                  category = 'main_sector'), by=c("country_code","main_sector")) %>% 
  mutate(gap=(group_in_country_share/employment_share))

sector_emp<-sector_country_emp%>% 
    group_by(naics_2d,main_sector) %>% 
    summarise(employment=sum(employment,na.rm = T),
              count=sum(count)) %>% 
    ungroup() %>% 
    mutate(employment_share=employment/sum(employment,na.rm = T),
           group_in_country_share=count/sum(count,na.rm = T),
           gap=(group_in_country_share/employment_share))

The table below shows the correlation between employment and online job vacancies distributions.

Code

correlations(data = sector_country_emp) %>% 
  gt() %>% 
  fmt_percent('estimate') %>% 
  tab_header(title="Correlation between employment and online vacancies distributions",
             subtitle="Estimates correspond to Pearson's correlation coefficietns")

estimate	statistic	group
Correlation between employment and online vacancies distributions
Estimates correspond to Pearson's correlation coefficietns
48.01%	4.131854	Total
33.71%	1.519098	ARG
65.31%	3.659099	CHL
43.44%	1.988377	URY

Regular comparisson

Code

table_comparisson(data = sector_emp,category = "main_sector",country = NULL)

Table 13:
Comparisson of employment and postigns distribution
main_sector	Share of employment	Share of online vacancies	%Vacancies-%Employment Ratio
Comparing employment and online vacancies distributions
All countries
Real Estate And Rental And Leasing	0.44%	1.55%	3.52315105
Professional Scientific And Technical Services	4.09%	14.38%	3.51429835
Finance And Insurance	1.77%	5.07%	2.86879512
Manufacturing	8.18%	18.48%	2.25771694
Retail Trade	10.38%	15.54%	1.49668407
Educational Services	8.31%	11.41%	1.37212519
Health Care And Social Assistance	6.86%	8.16%	1.18872261
Accommodation And Food Services	3.83%	4.42%	1.15255035
Government	3.80%	3.87%	1.01800309
Administrative And Support Services	7.06%	5.89%	0.83453484
Transportation And Warehousing	3.12%	2.26%	0.72347901
Information	1.56%	1.02%	0.65294551
Construction	7.50%	3.04%	0.40564957
Utilities	1.68%	0.43%	0.25485583
Mining Quarrying And Oil And Gas Extraction	1.33%	0.31%	0.23651480
Other Services Except Public Administration	10.96%	2.11%	0.19240156
Arts Entertainment And Recreation	2.29%	0.42%	0.18460085
Wholesale Trade	7.03%	1.26%	0.17944930
Agriculture Forestry Fishing And Hunting	4.82%	0.38%	0.07952526
Management Of Companies And Enterprises	0.69%	NA	NA
NA	4.29%	NA	NA

Code

table_comparisson(data = sector_country_emp,category = "main_sector",country = "ARG")

main_sector	Share of employment	Share of online vacancies	%Vacancies-%Employment Ratio
Comparing employment and online vacancies distributions
For ARG
Professional Scientific And Technical Services	3.62%	16.84%	4.65414729
Finance And Insurance	2.00%	6.52%	3.25211316
Real Estate And Rental And Leasing	0.38%	1.15%	3.00939420
Retail Trade	5.16%	13.64%	2.64016873
Manufacturing	9.44%	18.44%	1.95395607
Educational Services	7.97%	12.43%	1.55954184
Health Care And Social Assistance	6.78%	6.79%	1.00171185
Government	4.70%	4.30%	0.91476051
Accommodation And Food Services	3.49%	3.17%	0.91001921
Transportation And Warehousing	3.00%	2.30%	0.76785696
Information	2.11%	1.25%	0.59321785
Mining Quarrying And Oil And Gas Extraction	0.46%	0.26%	0.56951984
Administrative And Support Services	7.76%	4.36%	0.56148507
Construction	8.59%	3.17%	0.36856659
Other Services Except Public Administration	12.43%	2.85%	0.22950854
Agriculture Forestry Fishing And Hunting	2.41%	0.36%	0.15140259
Arts Entertainment And Recreation	2.42%	0.34%	0.14181843
Utilities	1.55%	0.21%	0.13673441
Wholesale Trade	11.96%	1.61%	0.13452701
Management Of Companies And Enterprises	0.68%	0.01%	0.01277311
NA	3.10%	NA	NA

Code

slides_cat_var_chart(agg_data = sector_country_emp %>%
                       filter( !is.na(main_sector) & country_code=="ARG"),
                     category = "main_sector",
                     country="country_code")+
  scale_fill_manual(values = country_colors,guide='none')+ 
  coord_flip()+
  labs( y="Porcentaje de vacantes",
        fill=NULL,
        x=NULL)

Code

table_comparisson(data = sector_country_emp,category = "main_sector", country = "CHL")

main_sector	Share of employment	Share of online vacancies	%Vacancies-%Employment Ratio
Comparing employment and online vacancies distributions
For CHL
Real Estate And Rental And Leasing	0.52%	1.83%	3.499289010
Manufacturing	6.38%	18.76%	2.939475187
Professional Scientific And Technical Services	4.57%	12.45%	2.723058494
Finance And Insurance	1.52%	4.09%	2.689493710
Wholesale Trade	0.51%	1.05%	2.050729898
Government	2.50%	3.57%	1.427876157
Health Care And Social Assistance	6.64%	9.18%	1.383287005
Accommodation And Food Services	4.38%	5.17%	1.181238044
Administrative And Support Services	6.00%	7.03%	1.172018295
Educational Services	9.06%	10.54%	1.163625581
Retail Trade	17.47%	16.90%	0.966934217
Information	0.90%	0.85%	0.935105262
Transportation And Warehousing	3.24%	2.22%	0.686318979
Construction	6.34%	2.93%	0.461564532
Utilities	1.85%	0.58%	0.312499935
Arts Entertainment And Recreation	2.08%	0.49%	0.235997633
Other Services Except Public Administration	8.83%	1.60%	0.181272953
Mining Quarrying And Oil And Gas Extraction	2.71%	0.36%	0.134022207
Agriculture Forestry Fishing And Hunting	7.26%	0.40%	0.054859893
Management Of Companies And Enterprises	0.67%	0.01%	0.008738326
NA	6.56%	NA	NA

Code

slides_cat_var_chart(agg_data = sector_country_emp %>%
                       filter( !is.na(main_sector) & country_code=="CHL"),
                     category = "main_sector",
                     country="country_code")+
  scale_fill_manual(values = country_colors,guide='none')+ 
  coord_flip()+
  labs( y="Porcentaje de vacantes",
        fill=NULL,
        x=NULL)

Code

table_comparisson(data = sector_country_emp,category="main_sector",country = "URY")

main_sector	Share of employment	Share of online vacancies	%Vacancies-%Employment Ratio
Comparing employment and online vacancies distributions
For URY
Finance And Insurance	1.31%	5.03%	3.85118969
Professional Scientific And Technical Services	5.07%	18.98%	3.74763739
Real Estate And Rental And Leasing	0.44%	1.43%	3.21842519
Educational Services	7.08%	14.40%	2.03433042
Manufacturing	8.24%	14.05%	1.70642774
Accommodation And Food Services	3.56%	5.82%	1.63607161
Information	0.96%	1.33%	1.39199897
Retail Trade	12.14%	14.35%	1.18195545
Government	3.94%	4.14%	1.05027185
Health Care And Social Assistance	8.54%	6.41%	0.75054988
Transportation And Warehousing	3.41%	2.37%	0.69317417
Construction	5.53%	3.55%	0.64209629
Administrative And Support Services	7.38%	4.14%	0.56155648
Utilities	1.78%	0.39%	0.22162829
Other Services Except Public Administration	11.05%	2.17%	0.19633118
Wholesale Trade	4.44%	0.79%	0.17769403
Mining Quarrying And Oil And Gas Extraction	0.64%	0.10%	0.15290897
Arts Entertainment And Recreation	2.49%	0.20%	0.07929429
Agriculture Forestry Fishing And Hunting	9.75%	0.35%	0.03540332
Management Of Companies And Enterprises	0.87%	NA	NA
NA	1.39%	NA	NA

Code

slides_cat_var_chart(agg_data = sector_country_emp %>%
                       filter( !is.na(main_sector) & country_code=="URY"),
                     category = "main_sector",
                     country="country_code")+
  scale_fill_manual(values = country_colors,guide='none')+ 
  coord_flip()+
  labs( y="Porcentaje de vacantes",
        fill=NULL,
        x=NULL)

Code

sector_emp%>% 
  ggplot(aes(x=employment_share,y=group_in_country_share))+
  geom_point()+
  ggrepel::geom_label_repel(aes(label=ifelse(rank(gap)>16 | rank(gap)<4, main_sector,NA)),
                            size=3)+
  geom_smooth(method="lm",se = FALSE, aes(linetype="OLS"))+
  geom_abline(aes(slope=1,intercept=0),alpha=0.2)+
  labs(x = "Share of employment (ILOSTATS)",
       y = "Share of Online Vacancies (IDB)",
       linetype=NULL)

Code

sector_country_emp%>% 
  ggplot(aes(x=employment_share,y=group_in_country_share))+
  geom_abline(aes(slope=1,intercept=0),alpha=0.2)+
  geom_point(aes(color=country_code))+
  facet_wrap(vars(country_code))+
  ggrepel::geom_label_repel(aes(label=ifelse(rank(gap)>55 | rank(gap)<8, main_sector,NA)),
                            size=3)+
  scale_color_manual(values=country_colors)+
  labs(x = "Share of employment (ILOSTATS)",
       y = "Share of Online Vacancies (IDB)",
       color="Country",
       linetype=NULL)

These charts shows the change in rankings from one database to the other:

Rank comparisson

Code

rank_comparisson(data=sector_emp, 
                             category="main_sector", 
                             country=NULL)

Code

rank_comparisson(data=sector_country_emp, 
                             category="main_sector", 
                             country="ARG")

Code

rank_comparisson(data=sector_country_emp, 
                             category="main_sector", 
                             country="CHL")

Code

rank_comparisson(data=sector_country_emp, 
                             category="main_sector", 
                             country="URY")

Appendix

Slides

Code

## Pongamos labels en español.
library(readxl)
sector_names<-readxl::read_excel("data/traducciones.xlsx", sheet=1)%>%
  janitor::clean_names()%>%
  rename(sector=1, sector_es=2)
abilities_names<-readxl::read_excel("data/traducciones.xlsx", sheet=2)%>%
  janitor::clean_names()%>%
  rename(abilities=1, habilidades=2)
subabilities_names<-readxl::read_excel("data/traducciones.xlsx", sheet=3)%>%
  janitor::clean_names()%>%
  rename(subabilities=1, subhabilidades=2)
major_group_names<-readxl::read_excel("data/traducciones.xlsx", sheet=4)%>%
  janitor::clean_names()%>%
  rename(major_group_title=1, major_group_title_es=2)
sector_rubro<-readxl::read_excel("data/traducciones.xlsx", sheet=5)%>%
  janitor::clean_names()

Sectores

Número de vacantes ponderado por la importancia de cada sector

Code

rama_df %>% 
  distinct(sector,group_share,group_vacancies) %>% 
  filter(sector %in% sectors_focus)%>%
  left_join(sector_names)%>%
  mutate(sector=sector_es)%>%
  ggplot(aes(x=reorder(sector,
                       group_share),
             y=group_share))+
  geom_col(fill="gray50")+
  geom_text(aes(label=paste0(round(group_vacancies)," (",round(group_share,2)*100,"%)")), 
            size=3,nudge_y = 0.02)+
  coord_flip()+
  scale_y_continuous(labels=scales::percent_format(),limits = c(0,0.2))+
  labs(x=NULL,
       y="Porcentaje de Vacantes")

Code

rama_df %>% 
  filter(country_code=="ARG") %>% 
  left_join(sector_names)%>%
  mutate(sector=sector_es)%>%
  ggplot(aes(x=reorder(sector,
                       group_in_country_share),
             y=group_in_country_share))+
  geom_col(aes(fill=country_code))+
  geom_text(aes(label=paste(round(group_in_country_share,2)*100,"%")), 
            size=3,nudge_y = 0.02)+
  scale_fill_manual(values=country_colors, guide="none")+
  scale_y_continuous(labels=scales::percent_format())+
  coord_flip()+
  # theme(axis.text.x = element_text(angle = 65, hjust=1))+
  labs(title = NULL,
       fill=NULL,
       shape=NULL,
       x=NULL,
       y="Porcentaje de vacantes")

rama_df %>% 
  filter(country_code=="CHL") %>% 
  left_join(sector_names)%>%
  mutate(sector=sector_es)%>%
  ggplot(aes(x=reorder(sector,
                       group_in_country_share),
             y=group_in_country_share))+
  geom_col(aes(fill=country_code))+
  geom_text(aes(label=paste(round(group_in_country_share,2)*100,"%")), 
            size=3,nudge_y = 0.02)+
  scale_fill_manual(values=country_colors, guide="none")+
  scale_y_continuous(labels=scales::percent_format())+
  coord_flip()+
  # theme(axis.text.x = element_text(angle = 65, hjust=1))+
  labs(title = NULL,
       fill=NULL,
       shape=NULL,
       x=NULL,
       y="Porcentaje de vacantes")

rama_df %>% 
  filter(country_code=="URY") %>% 
  left_join(sector_names)%>%
  mutate(sector=sector_es)%>%
  ggplot(aes(x=reorder(sector,
                       group_in_country_share),
             y=group_in_country_share))+
  geom_col(aes(fill=country_code))+
  geom_text(aes(label=paste(round(group_in_country_share,2)*100,"%")), 
            size=3,nudge_y = 0.02)+
  scale_fill_manual(values=country_colors, guide="none")+
  scale_y_continuous(labels=scales::percent_format())+
  coord_flip()+
  # theme(axis.text.x = element_text(angle = 65, hjust=1))+
  labs(title = NULL,
       fill=NULL,
       shape=NULL,
       x=NULL,
       y="Porcentaje de vacantes")

Porcentaje de vacantes donde cada sector es la más importante.

Code

main_sector_df%>% 
  filter(main_sector%in%sectors_focus)%>%
  left_join(sector_names, by=c("main_sector"="sector"))%>%
  mutate(main_sector=sector_es)%>%
  ggplot(aes(x=reorder(main_sector,group_share),y=group_share))+
  geom_col(fill="gray50")+
geom_text(aes(label=paste0(round(group_vacancies)," (",round(group_share,2)*100,"%)")), 
            size=3,nudge_y = 0.02)+
  scale_y_continuous(labels=scales::percent_format(), limits = c(0,.2))+
  coord_flip()+
  labs(y="Porcentaje de Vacantes",
       x=NULL)

Ocupaciones

Top Occupational groups

Code

major_group_by_cty %>% 
  filter(country_code=="ARG") %>% 
  left_join(major_group_names)%>%
  mutate(major_group_title=major_group_title_es)%>%
  top_n(10,group_in_country_share) %>% 
  ggplot(aes(x=reorder(str_remove(major_group_title,"Ocupaciones de"),
                       group_in_country_share),
             y=group_in_country_share))+
  geom_col(aes(fill=country_code))+
  geom_text(aes(label=paste(round(group_in_country_share,2)*100,"%")), 
            size=3,nudge_y = 0.02)+
  scale_fill_manual(values=country_colors, guide='none')+
  scale_y_continuous(labels = scales::percent_format())+
  coord_flip()+
  # theme(axis.text.x = element_text(angle = 65, hjust=1))+
  labs(title = NULL,
       fill=NULL,
       shape=NULL,
       x=NULL,
       y="Porcentaje de vacantes")


major_group_by_cty %>% 
  filter(country_code=="CHL") %>% 
  left_join(major_group_names)%>%
  mutate(major_group_title=major_group_title_es)%>%
  top_n(10,group_in_country_share) %>% 
  ggplot(aes(x=reorder(str_remove(major_group_title,"Ocupaciones de"),
                       group_in_country_share),
             y=group_in_country_share))+
  geom_col(aes(fill=country_code))+
  geom_text(aes(label=paste(round(group_in_country_share,2)*100,"%")), 
            size=3,nudge_y = 0.02)+
  scale_fill_manual(values=country_colors, guide='none')+
  scale_y_continuous(labels = scales::percent_format())+
  coord_flip()+
  # theme(axis.text.x = element_text(angle = 65, hjust=1))+
  labs(title = NULL,
       fill=NULL,
       shape=NULL,
       x=NULL,
       y="Porcentaje de vacantes")

major_group_by_cty %>% 
  filter(country_code=="URY") %>% 
  left_join(major_group_names)%>%
  mutate(major_group_title=major_group_title_es)%>%
  top_n(10,group_in_country_share) %>% 
  ggplot(aes(x=reorder(str_remove(major_group_title,"Ocupaciones de"),
                       group_in_country_share),
             y=group_in_country_share))+
  geom_col(aes(fill=country_code))+
  geom_text(aes(label=paste(round(group_in_country_share,2)*100,"%")), 
            size=3,nudge_y = 0.02)+
  scale_fill_manual(values=country_colors, guide='none')+
  scale_y_continuous(labels = scales::percent_format())+
  coord_flip()+
  # theme(axis.text.x = element_text(angle = 65, hjust=1))+
  labs(title = NULL,
       fill=NULL,
       shape=NULL,
       x=NULL,
       y="Porcentaje de vacantes")

Top occupations

Code

## frecuency table of occupations
onet_job_df<-south_cone_df %>%
    group_by(onet_job)%>%
    summarise(group_vacancies=n())%>%
    ungroup()%>%
    mutate(group_share=group_vacancies/sum(group_vacancies))%>%
    arrange(desc(group_vacancies)) 

## frequency table of occupations, by country
onet_job_by_cty<-south_cone_df %>%
  group_by(country_code,onet_job)%>%
  summarise(count=n() ) %>%
  ungroup() %>% 
  left_join(onet_job_df %>%
              select(onet_job,group_vacancies))  %>% 
  left_join(country_code_df %>%
              select(country_code,country_vacancies))  %>%
  mutate(group_in_country_share=count/country_vacancies,
         country_in_group_share=count/group_vacancies) %>% 
  ungroup()


onet_job_by_cty %>% 
  filter(country_code=="ARG") %>% 
  top_n(10,group_in_country_share) %>% 
  ggplot(aes(x=reorder(onet_job,
                       group_in_country_share),
             y=group_in_country_share))+
  geom_col(aes(fill=country_code))+
  geom_text(aes(label=case_when(count<50~paste0(round(group_in_country_share,2)*100,"%"),
                                TRUE~paste0(count," (",round(group_in_country_share,2)*100,"%)")),
                y=group_in_country_share/2),
            color="white",
            fontface="bold",
            size=3)+
  scale_fill_manual(values=country_colors, guide='none')+
  scale_y_continuous(labels = scales::percent_format())+
  coord_flip()+
  # theme(axis.text.x = element_text(angle = 65, hjust=1))+
  labs(title = NULL,
       fill=NULL,
       shape=NULL,
       x=NULL,
       y="Porcentaje de vacantes")

Code

onet_job_by_cty %>% 
  filter(country_code=="CHL") %>% 
  top_n(10,group_in_country_share) %>% 
  ggplot(aes(x=reorder(onet_job,
                       group_in_country_share),
             y=group_in_country_share))+
  geom_col(aes(fill=country_code))+
  geom_text(aes(label=case_when(count<50~paste0(round(group_in_country_share,2)*100,"%"),
                                TRUE~paste0(count," (",round(group_in_country_share,2)*100,"%)")),
                y=group_in_country_share/2),
            color="white",
            fontface="bold",
            size=3)+
  scale_fill_manual(values=country_colors, guide='none')+
  scale_y_continuous(labels = scales::percent_format())+
  coord_flip()+
  # theme(axis.text.x = element_text(angle = 65, hjust=1))+
  labs(title = NULL,
       fill=NULL,
       shape=NULL,
       x=NULL,
       y="Porcentaje de vacantes")

Code

onet_job_by_cty %>% 
  filter(country_code=="URY") %>% 
  top_n(10,group_in_country_share) %>% 
  ggplot(aes(x=reorder(onet_job,
                       group_in_country_share),
             y=group_in_country_share))+
  geom_col(aes(fill=country_code))+
  geom_text(aes(label=case_when(count<50~paste0(round(group_in_country_share,2)*100,"%"),
                                TRUE~paste0(count," (",round(group_in_country_share,2)*100,"%)")),
                y=group_in_country_share/2),
            color="white",
            fontface="bold",
            size=3)+
  scale_fill_manual(values=country_colors, guide='none')+
  scale_y_continuous(labels = scales::percent_format())+
  coord_flip()+
  # theme(axis.text.x = element_text(angle = 65, hjust=1))+
  labs(title = NULL,
       fill=NULL,
       shape=NULL,
       x=NULL,
       y="Porcentaje de vacantes")

Habilidades

Número de vacantes ponderado por la importancia de cada habilidad

Code

Blue_sensorial="#64a3c9"
Orange_cognitivo="#fe9b2d"
Blue_psicomotor="#5c7f97"
Yellow_fisico="#ffb615"

abilities_colors=c("Habilidades Cognitivas"=Orange_cognitivo,
                   "Habilidades Físicas"=Yellow_fisico,
                   "Habilidades Psicomotoras"=Blue_psicomotor,
                   "Habilidades Sensoriales"=Blue_sensorial)
# Hole size
hsize <- 1

country_abilities_df %>% 
  filter(country_code=="ARG") %>%
  left_join(abilities_names)%>%
  mutate(abilities=habilidades)%>%
  mutate(x=hsize) %>% 
  ggplot(aes(fill=abilities, y=group_in_country_share_sum, x=hsize)) +
  geom_col(colour="grey30") +
  geom_text(color="black",aes(label = paste(round(group_in_country_share_sum,2)*100,"%")),
              position = position_stack(vjust = 0.5)) +
  coord_polar(theta="y") +
  scale_fill_manual(values=abilities_colors)+
     xlim(c(0.2, hsize + 0.5))+
     theme_void()+
     labs(fill=NULL)

country_abilities_df %>% 
  filter(country_code=="CHL") %>%
  left_join(abilities_names)%>%
  mutate(abilities=habilidades)%>%
  mutate(x=hsize) %>% 
  ggplot(aes(fill=abilities, y=group_in_country_share_sum, x=hsize)) +
  geom_col(colour="grey30") +
  geom_text(color="black",aes(label = paste(round(group_in_country_share_sum,2)*100,"%")),
              position = position_stack(vjust = 0.5)) +
  coord_polar(theta="y") +
  scale_fill_manual(values=abilities_colors)+
     xlim(c(0.2, hsize + 0.5))+
     theme_void()+
     labs(fill=NULL)

country_abilities_df %>% 
  filter(country_code=="URY") %>%
  left_join(abilities_names)%>%
  mutate(abilities=habilidades)%>%
  mutate(x=hsize) %>% 
  ggplot(aes(fill=abilities, y=group_in_country_share_sum, x=hsize)) +
  geom_col(colour="grey30") +
  geom_text(color="black",aes(label = paste(round(group_in_country_share_sum,2)*100,"%")),
              position = position_stack(vjust = 0.5)) +
  coord_polar(theta="y") +
  scale_fill_manual(values=abilities_colors)+
     xlim(c(0.2, hsize + 0.5))+
     theme_void()+
     labs(fill=NULL)

Porcentaje de vacantes que requiere cada habilidad con probabilidad mayor a cero.

Code

country_abilities_df %>% 
  filter(country_code=="ARG") %>% 
  left_join(abilities_names)%>%
  mutate(abilities=habilidades)%>%
  ggplot(aes(x=reorder(abilities,
                       -group_in_country_share),
             y=group_in_country_share))+
  geom_col(aes(fill=country_code))+
  geom_text(aes(label=paste(round(group_in_country_share,2)*100,"%")), 
            size=3,nudge_y = 0.02)+
  scale_fill_manual(values=country_colors, guide='none')+
  scale_y_continuous(labels = scales::percent_format())+
  theme(axis.text.x = element_text(
    size=10
    ))+
  labs(title = NULL,
       fill=NULL,
       shape=NULL,
       x=NULL,
       y="Porcentaje de vacantes")


country_abilities_df %>% 
  filter(country_code=="CHL") %>% 
   left_join(abilities_names)%>%
  mutate(abilities=habilidades)%>%
  ggplot(aes(x=reorder(abilities,
                       -group_in_country_share),
             y=group_in_country_share))+
  geom_col(aes(fill=country_code))+
  geom_text(aes(label=paste(round(group_in_country_share,2)*100,"%")), 
            size=3,nudge_y = 0.02)+
  scale_fill_manual(values=country_colors, guide='none')+
  scale_y_continuous(labels = scales::percent_format())+
  theme(axis.text.x = element_text(
    size=10
    ))+
  labs(title = NULL,
       fill=NULL,
       shape=NULL,
       x=NULL,
       y="Porcentaje de vacantes")

country_abilities_df %>% 
  filter(country_code=="URY") %>% 
   left_join(abilities_names)%>%
  mutate(abilities=habilidades)%>%
  ggplot(aes(x=reorder(abilities,
                       -group_in_country_share),
             y=group_in_country_share))+
  geom_col(aes(fill=country_code))+
  geom_text(aes(label=paste(round(group_in_country_share,2)*100,"%")), 
            size=3,nudge_y = 0.02)+
  scale_fill_manual(values=country_colors, guide='none')+
  scale_y_continuous(labels = scales::percent_format())+
  theme(axis.text.x = element_text(
    size=10
    ))+
  labs(title = NULL,
       fill=NULL,
       shape=NULL,
       x=NULL,
       y="Porcentaje de vacantes")

Subhabilidades

Top 5 subhabilidades dentro de cada pais

Code

country_var_count_groups(data=south_cone_df%>%
                           filter(country_code=="ARG"),
                                       country = NULL,
                         variable_names=subabilities, 
                         name_of_categories="subabilities") %>% 
  mutate(country_code="ARG")%>%
   mutate(subabilities=str_replace_all(subabilities,"_"," "),
         subabilities=str_to_title(subabilities),
         subabilities=str_replace_all(subabilities," Of "," of ")) %>% 
  left_join(subabilities_names)%>%
  mutate(subabilities=subhabilidades)%>%
  top_n(10,group_share_sum) %>% 
  ggplot(aes(x=reorder(subabilities,
                       group_share_sum),
             y=group_share_sum))+
  geom_col(aes(fill=country_code))+
  geom_text(aes(label=case_when(group_vacancies_sum <50~
                                  paste0(round(group_share_sum,
                                                      2)*100,"%"),
                                TRUE~paste0(round(group_vacancies_sum )," (",
                                      round(group_share_sum,2)*100,"%)")),
                y=group_share_sum/2),
            color="white",
            fontface="bold",
            size=3)+
  scale_fill_manual(values=country_colors, guide='none')+
  scale_y_continuous(labels = scales::percent_format())+
  coord_flip()+
  labs(title = NULL,
       fill=NULL,
       shape=NULL,
       x=NULL,
       y="Porcentaje de vacantes")

Code

country_var_count_groups(data=south_cone_df%>%
                           filter(country_code=="CHL"),
                                       country = NULL,
                         variable_names=subabilities, 
                         name_of_categories="subabilities") %>% 
  mutate(country_code="CHL")%>%
   mutate(subabilities=str_replace_all(subabilities,"_"," "),
         subabilities=str_to_title(subabilities),
         subabilities=str_replace_all(subabilities," Of "," of ")) %>% 
  left_join(subabilities_names)%>%
  mutate(subabilities=subhabilidades)%>%
  top_n(10,group_share_sum) %>% 
  ggplot(aes(x=reorder(subabilities,
                       group_share_sum),
             y=group_share_sum))+
  geom_col(aes(fill=country_code))+
  geom_text(aes(label=case_when(group_vacancies_sum <50~
                                  paste0(round(group_share_sum,
                                                      2)*100,"%"),
                                TRUE~paste0(round(group_vacancies_sum )," (",
                                      round(group_share_sum,2)*100,"%)")),
                y=group_share_sum/2),
            color="white",
            fontface="bold",
            size=3)+
  scale_fill_manual(values=country_colors, guide='none')+
  scale_y_continuous(labels = scales::percent_format())+
  coord_flip()+
  labs(title = NULL,
       fill=NULL,
       shape=NULL,
       x=NULL,
       y="Porcentaje de vacantes")

Code

country_var_count_groups(data=south_cone_df%>%
                           filter(country_code=="URY"),
                                       country = NULL,
                         variable_names=subabilities, 
                         name_of_categories="subabilities") %>% 
  mutate(country_code="URY")%>%
   mutate(subabilities=str_replace_all(subabilities,"_"," "),
         subabilities=str_to_title(subabilities),
         subabilities=str_replace_all(subabilities," Of "," of ")) %>% 
  left_join(subabilities_names)%>%
  mutate(subabilities=subhabilidades)%>%
  top_n(10,group_share_sum) %>% 
  ggplot(aes(x=reorder(subabilities,
                       group_share_sum),
             y=group_share_sum))+
  geom_col(aes(fill=country_code))+
  geom_text(aes(label=case_when(group_vacancies_sum <50~
                                  paste0(round(group_share_sum,
                                                      2)*100,"%"),
                                TRUE~paste0(round(group_vacancies_sum )," (",
                                      round(group_share_sum,2)*100,"%)")),
                y=group_share_sum/2),
            color="white",
            fontface="bold",
            size=3)+
  scale_fill_manual(values=country_colors, guide='none')+
  scale_y_continuous(labels = scales::percent_format())+
  coord_flip()+
  labs(title = NULL,
       fill=NULL,
       shape=NULL,
       x=NULL,
       y="Porcentaje de vacantes")

Top 5 Subhabilidades dentro de cada grupo de habilidades

Code

purrr::map(c("ARG","CHL","URY"),
           slides=TRUE,
       skills_barchart,
       data_agg=country_subabilities_df%>%
        left_join(subabilities_names, by="subabilities")%>%
         mutate(subabilities=subhabilidades)%>%
        left_join(abilities_names, by=c("ability"="abilities"))%>%
        mutate(ability=habilidades)%>%
          group_by(country_code,ability) %>% 
          top_n(5,count)
       )

[[1]]


[[2]]


[[3]]

Habilidades demandadas por sector por pais

Code

p1<-country_var_count_groups(
    data=south_cone_df%>%
      filter(country_code=="ARG"),
    country = 'main_sector',
    variable_names=abilities, 
    name_of_categories="abilities"
    ) %>% 
    mutate(abilities=str_replace_all(abilities,"_"," "),
           abilities=str_to_title(abilities),
           abilities=str_replace_all(abilities," Of "," of ")) %>%
    filter(main_sector %in% sectors_focus )%>%
    left_join(sector_names, by =c("main_sector"="sector"))%>%
    left_join(abilities_names, by =c("abilities"))%>%
    mutate(main_sector=sector_es,
           abilities=habilidades)  %>%
    ggplot(aes(x=main_sector,
             y=promedio))+
    geom_col(aes(fill=abilities),position = "fill", color="black")+
    geom_label(aes(label=paste0(round(promedio*100),"%"), color=abilities),
              alpha=.9, size=2, position = position_fill(vjust = 0.5), 
              show.legend = FALSE)+
    coord_flip()+
    scale_y_continuous(labels = scales::percent_format())+
    scale_fill_manual(values=abilities_colors)+
    scale_color_manual(values=abilities_colors)+
    labs(fill=NULL,
         color=NULL,
         y=NULL,
         x=NULL)

p2<-country_var_count_groups(
    data=south_cone_df%>%
      filter(country_code=="ARG"),
    country = 'area',
    variable_names=abilities, 
    name_of_categories="abilities"
    ) %>% 
    mutate(abilities=str_replace_all(abilities,"_"," "),
           abilities=str_to_title(abilities),
           abilities=str_replace_all(abilities," Of "," of ")) %>%
    left_join(abilities_names, by =c("abilities"))%>%
    mutate(abilities=habilidades)  %>%
    ggplot(aes(x=area,
             y=promedio))+
    geom_col(aes(fill=abilities),position = "fill", color="black")+
    geom_label(aes(label=paste0(round(promedio*100),"%"), color=abilities),
              alpha=.9, size=2, position = position_fill(vjust = 0.5), 
              show.legend = FALSE)+
    coord_flip()+
    scale_y_continuous(labels = scales::percent_format())+
    scale_fill_manual(values=abilities_colors)+
    scale_color_manual(values=abilities_colors)+
    labs(fill=NULL,
         color=NULL,
         y=NULL,
         x=NULL)

(p1+ theme(axis.text.x = element_blank(),
           axis.ticks.x = element_blank()))+
  (p2)+
  plot_layout( ncol = 1,heights =c(8,2),guides = "collect")

Code

p1<-country_var_count_groups(
    data=south_cone_df%>%
      filter(country_code=="CHL"),
    country = 'main_sector',
    variable_names=abilities, 
    name_of_categories="abilities"
    ) %>% 
    mutate(abilities=str_replace_all(abilities,"_"," "),
           abilities=str_to_title(abilities),
           abilities=str_replace_all(abilities," Of "," of ")) %>%
    filter(main_sector %in% sectors_focus )%>%
    left_join(sector_names, by =c("main_sector"="sector"))%>%
    left_join(abilities_names, by =c("abilities"))%>%
    mutate(main_sector=sector_es,
           abilities=habilidades)  %>%
    ggplot(aes(x=main_sector,
             y=promedio))+
    geom_col(aes(fill=abilities),position = "fill", color="black")+
    geom_label(aes(label=paste0(round(promedio*100),"%"), color=abilities),
              alpha=.9, size=2, position = position_fill(vjust = 0.5), 
              show.legend = FALSE)+
    coord_flip()+
    scale_y_continuous(labels = scales::percent_format())+
    scale_fill_manual(values=abilities_colors)+
    scale_color_manual(values=abilities_colors)+
    labs(fill=NULL,
         color=NULL,
         y=NULL,
         x=NULL)

p2<-country_var_count_groups(
    data=south_cone_df%>%
      filter(country_code=="CHL"),
    country = 'area',
    variable_names=abilities, 
    name_of_categories="abilities"
    ) %>% 
    mutate(abilities=str_replace_all(abilities,"_"," "),
           abilities=str_to_title(abilities),
           abilities=str_replace_all(abilities," Of "," of ")) %>%
    left_join(abilities_names, by =c("abilities"))%>%
    mutate(abilities=habilidades)  %>%
    ggplot(aes(x=area,
             y=promedio))+
    geom_col(aes(fill=abilities),position = "fill", color="black")+
    geom_label(aes(label=paste0(round(promedio*100),"%"), color=abilities),
              alpha=.9, size=2, position = position_fill(vjust = 0.5), 
              show.legend = FALSE)+
    coord_flip()+
    scale_y_continuous(labels = scales::percent_format())+
    scale_fill_manual(values=abilities_colors)+
    scale_color_manual(values=abilities_colors)+
    labs(fill=NULL,
         color=NULL,
         y=NULL,
         x=NULL)

(p1+ theme(axis.text.x = element_blank(),
           axis.ticks.x = element_blank()))+
  (p2)+
  plot_layout( ncol = 1,heights =c(8,2),guides = "collect")

Code

p1<-country_var_count_groups(
    data=south_cone_df%>%
      filter(country_code=="URY"),
    country = 'main_sector',
    variable_names=abilities, 
    name_of_categories="abilities"
    ) %>% 
    mutate(abilities=str_replace_all(abilities,"_"," "),
           abilities=str_to_title(abilities),
           abilities=str_replace_all(abilities," Of "," of ")) %>%
    filter(main_sector %in% sectors_focus )%>%
    left_join(sector_names, by =c("main_sector"="sector"))%>%
    left_join(abilities_names, by =c("abilities"))%>%
    mutate(main_sector=sector_es,
           abilities=habilidades)  %>%
    ggplot(aes(x=main_sector,
             y=promedio))+
    geom_col(aes(fill=abilities),position = "fill", color="black")+
    geom_label(aes(label=paste0(round(promedio*100),"%"), color=abilities),
              alpha=.9, size=2, position = position_fill(vjust = 0.5), 
              show.legend = FALSE)+
    coord_flip()+
    scale_y_continuous(labels = scales::percent_format())+
    scale_fill_manual(values=abilities_colors)+
    scale_color_manual(values=abilities_colors)+
    labs(fill=NULL,
         color=NULL,
         y=NULL,
         x=NULL)

p2<-country_var_count_groups(
    data=south_cone_df%>%
      filter(country_code=="URY"),
    country = 'area',
    variable_names=abilities, 
    name_of_categories="abilities"
    ) %>% 
    mutate(abilities=str_replace_all(abilities,"_"," "),
           abilities=str_to_title(abilities),
           abilities=str_replace_all(abilities," Of "," of ")) %>%
    left_join(abilities_names, by =c("abilities"))%>%
    mutate(abilities=habilidades)  %>%
    ggplot(aes(x=area,
             y=promedio))+
    geom_col(aes(fill=abilities),position = "fill", color="black")+
    geom_label(aes(label=paste0(round(promedio*100),"%"), color=abilities),
              alpha=.9, size=2, position = position_fill(vjust = 0.5), 
              show.legend = FALSE)+
    coord_flip()+
    scale_y_continuous(labels = scales::percent_format())+
    scale_fill_manual(values=abilities_colors)+
    scale_color_manual(values=abilities_colors)+
    labs(fill=NULL,
         color=NULL,
         y=NULL,
         x=NULL)

(p1+ theme(axis.text.x = element_blank(),
           axis.ticks.x = element_blank()))+
  (p2)+
  plot_layout( ncol = 1,heights =c(8,2),guides = "collect")

Code

sector_skills_matrix(
  data_agg = country_var_count_groups(
    data=south_cone_df%>%
      filter(country_code=="ARG"),
    country = 'main_sector',
    variable_names=abilities, 
    name_of_categories="abilities"
    ) %>% 
    mutate(abilities=str_replace_all(abilities,"_"," "),
           abilities=str_to_title(abilities),
           abilities=str_replace_all(abilities," Of "," of ")) %>%
    filter(main_sector %in% sectors_focus )%>%
    left_join(sector_names, by =c("main_sector"="sector"))%>%
    left_join(abilities_names, by =c("abilities"))%>%
    mutate(main_sector=sector_es,
           abilities=habilidades),
  ability_val = NULL,
  metric = "mean")+
  labs(fill="Importancia \nPromedio",
       title=NULL,
       x=NULL,
       y=NULL)

Code

sector_skills_matrix(
  data_agg = country_var_count_groups(
    data=south_cone_df%>%
      filter(country_code=="CHL"),
    country = 'main_sector',
    variable_names=abilities, 
    name_of_categories="abilities"
    ) %>% 
    mutate(abilities=str_replace_all(abilities,"_"," "),
           abilities=str_to_title(abilities),
           abilities=str_replace_all(abilities," Of "," of ")) %>%
    filter(main_sector %in% sectors_focus )%>%
    left_join(sector_names, by =c("main_sector"="sector"))%>%
    left_join(abilities_names, by =c("abilities"))%>%
    mutate(main_sector=sector_es,
           abilities=habilidades),
  ability_val = NULL,
  metric = "mean")+
  labs(fill="Importancia \nPromedio",
       title=NULL,
       x=NULL,
       y=NULL)

Code

sector_skills_matrix(
  data_agg = country_var_count_groups(
    data=south_cone_df%>%
      filter(country_code=="URY"),
    country = 'main_sector',
    variable_names=abilities, 
    name_of_categories="abilities"
    ) %>% 
    mutate(abilities=str_replace_all(abilities,"_"," "),
           abilities=str_to_title(abilities),
           abilities=str_replace_all(abilities," Of "," of ")) %>%
    filter(main_sector %in% sectors_focus )%>%
    left_join(sector_names, by =c("main_sector"="sector"))%>%
    left_join(abilities_names, by =c("abilities"))%>%
    mutate(main_sector=sector_es,
           abilities=habilidades),
  ability_val = NULL,
  metric = "mean")+
  labs(fill="Importancia \nPromedio",
       title=NULL,
       x=NULL,
       y=NULL)

Top 5 Sub habilidades por sector por pais

Code

country_var_count_groups(data=south_cone_df%>%
                           filter(country_code=="ARG"),
                                       country = 'main_sector',
                         variable_names=subabilities, 
                         name_of_categories="subabilities") %>%
  mutate(subabilities=str_replace_all(subabilities,"_"," "),
       subabilities=str_to_title(subabilities),
       subabilities=str_replace_all(subabilities," Of "," of ")) %>%
  filter(main_sector %in% sectors_focus)%>%
  left_join(sector_names,by=c("main_sector"="sector"))%>%
  mutate(main_sector=sector_es)%>%
  bind_rows(
    country_var_count_groups(data=south_cone_df%>%
                           filter(country_code=="ARG"),
                                       country = 'area',
                         variable_names=subabilities, 
                         name_of_categories="subabilities") %>%
      filter(area=="Conocimiento")%>%
    mutate(main_sector=area)%>%
      mutate(
       subabilities=str_replace_all(subabilities,"_"," "),
       subabilities=str_to_title(subabilities),
       subabilities=str_replace_all(subabilities," Of "," of "))
  )%>%
  left_join(subabilities_names, by="subabilities")%>%
  mutate(subabilities=subhabilidades)%>%
  mutate(country_code="ARG")%>%
  group_by(main_sector)%>% 
  top_n(5,promedio)%>%
  ungroup()%>%
  ggplot(aes(x=reorder(subabilities,promedio),y=promedio))+
  geom_col(aes(fill=country_code))+
  scale_fill_manual(values=country_colors, guide="none")+
  coord_flip()+
  facet_wrap(vars(str_wrap(main_sector,30)),ncol = 2, scales = "free")+
  scale_y_continuous(labels = scales::percent_format())+
  labs(x=NULL,
       y="Porcentaje de vacantes")

Code

country_var_count_groups(data=south_cone_df%>%
                           filter(country_code=="CHL"),
                                       country = 'main_sector',
                         variable_names=subabilities, 
                         name_of_categories="subabilities") %>%
  mutate(subabilities=str_replace_all(subabilities,"_"," "),
       subabilities=str_to_title(subabilities),
       subabilities=str_replace_all(subabilities," Of "," of ")) %>%
  filter(main_sector %in% sectors_focus)%>%
  left_join(sector_names,by=c("main_sector"="sector"))%>%
  mutate(main_sector=sector_es)%>%
  bind_rows(
    country_var_count_groups(data=south_cone_df%>%
                           filter(country_code=="CHL"),
                                       country = 'area',
                         variable_names=subabilities, 
                         name_of_categories="subabilities") %>%
      filter(area=="Conocimiento")%>%
    mutate(main_sector=area)%>%
      mutate(
       subabilities=str_replace_all(subabilities,"_"," "),
       subabilities=str_to_title(subabilities),
       subabilities=str_replace_all(subabilities," Of "," of "))
  )%>%
  left_join(subabilities_names, by="subabilities")%>%
  mutate(subabilities=subhabilidades)%>%
  mutate(country_code="CHL")%>%
  group_by(main_sector)%>% 
  top_n(5,promedio)%>%
  ungroup()%>%
  ggplot(aes(x=reorder(subabilities,promedio),y=promedio))+
  geom_col(aes(fill=country_code))+
  scale_fill_manual(values=country_colors, guide="none")+
  coord_flip()+
  facet_wrap(vars(str_wrap(main_sector,30)),ncol = 2, scales = "free")+
  scale_y_continuous(labels = scales::percent_format())+
  labs(x=NULL,
       y="Porcentaje de vacantes")

Code

country_var_count_groups(data=south_cone_df%>%
                           filter(country_code=="URY"),
                                       country = 'main_sector',
                         variable_names=subabilities, 
                         name_of_categories="subabilities") %>%
  mutate(subabilities=str_replace_all(subabilities,"_"," "),
       subabilities=str_to_title(subabilities),
       subabilities=str_replace_all(subabilities," Of "," of ")) %>%
  filter(main_sector %in% sectors_focus)%>%
  left_join(sector_names,by=c("main_sector"="sector"))%>%
  mutate(main_sector=sector_es)%>%
  bind_rows(
    country_var_count_groups(data=south_cone_df%>%
                           filter(country_code=="URY"),
                                       country = 'area',
                         variable_names=subabilities, 
                         name_of_categories="subabilities") %>%
      filter(area=="Conocimiento")%>%
    mutate(main_sector=area)%>%
      mutate(
       subabilities=str_replace_all(subabilities,"_"," "),
       subabilities=str_to_title(subabilities),
       subabilities=str_replace_all(subabilities," Of "," of "))
  )%>%
  left_join(subabilities_names, by="subabilities")%>%
  mutate(subabilities=subhabilidades)%>%
  mutate(country_code="URY")%>%
  group_by(main_sector)%>% 
  top_n(5,promedio)%>%
  ungroup()%>%
  ggplot(aes(x=reorder(subabilities,promedio),y=promedio))+
  geom_col(aes(fill=country_code))+
  scale_fill_manual(values=country_colors, guide="none")+
  coord_flip()+
  facet_wrap(vars(str_wrap(main_sector,30)),ncol = 2, scales = "free")+
  scale_y_continuous(labels = scales::percent_format())+
  labs(x=NULL,
       y="Porcentaje de vacantes")

Top occupations of most focus sectors

Code

country_var_count(data = south_cone_df %>% 
                    inner_join(
                      south_cone_df %>%
                        filter(main_sector %in% sectors_focus)%>%
                      group_by(main_sector,onet_job) %>% 
                      summarise(count=n()) %>% 
                      top_n(5,count)), 
                  country = "main_sector",
                  category = "onet_job") %>%
  country_region_table(country = "main_sector",
                  category = "onet_job",
                  top_number = 20)

?(caption)

rank	Agriculture Forestry Fishing And Hunting	Construction	Health Care And Social Assistance	Information	Mining Quarrying And Oil And Gas Extraction	Professional Scientific And Technical Services	Transportation And Warehousing
1	Trabajadores de Pesca y Caza, 46 (26.6%)	Operadores de Grúas y Torres, 238 (26.8%)	Recepcionistas y Oficinistas de Información, 392 (24.2%)	Directores / Gerentes técnicos de Medios de Comunicación, 122 (30%)	Ingenieros Petroleros, 59 (40.7%)	Gerentes Generales y de Operaciones, 1341 (38.7%)	Gerentes de Transporte, Almacenamiento, y Distribución, 251 (37.1%)
2	Supervisores Directos de Trabajadores de Ocupaciones Relacionadas con la Agricultura, la Pesca, y la Silvicultura, 45 (26%)	Electricistas, 226 (25.5%)	Supervisores Directos de Empleados de Oficina y de Apoyo Administrativo, 367 (22.6%)	Agentes de Venta de Publicidad, 114 (28%)	Operadores de Perforadoras Giratorias, Petróleo y Gas, 29 (20%)	Contadores y Auditores, 963 (27.8%)	Cargadores de Vagones, Camiones y Barcos Tanque, 119 (17.6%)
3	Operadores de Equipo de Tala Forestal, 41 (23.7%)	Ayudantes de Trabajadores de Ocupaciones Relacionadas con la Instalación, Mantenimiento y Reparación, 189 (21.3%)	Enfermeros Graduados, 342 (21.1%)	Asistentes de Vestuario, 76 (18.7%)	Operadores de Torres de Perforación, Petróleo y Gas, 20 (13.8%)	Especialistas en Apoyo Técnico para Usuarios de Computadoras, 409 (11.8%)	Agentes de Carga y Flete, 113 (16.7%)
4	Operadores de Equipo Agrícola, 23 (13.3%)	Obreros de la Construcción, 136 (15.3%)	Niñeras, 267 (16.5%)	Instaladores y Reparadores de Equipo de Telecomunicaciones, 61 (15%)	Operadores de Máquinas de Minería de Operación Continua, 19 (13.1%)	Analistas de Gestión, 382 (11%)	Operadores de Camiones y Tractores Industriales, 105 (15.5%)
5	Trabajadores y Jornaleros Agrícolas, de Cultivos, de Viveros y de Invernaderos, 18 (10.4%)	Supervisores Directos de Trabajadores de Oficios de Construcción y Extracción, 98 (11%)	Psicólogos Clínicos y de Consejería, 255 (15.7%)	Instaladores y Reparadores de Equipos de Radio, Telefonía Celular y Torres, 34 (8.4%)	Ayudantes de Trabajadores de Ocupaciones Relacionadas con la Extracción, 18 (12.4%)	Analistas de Investigación de Mercado y Especialistas en Mercadeo, 369 (10.7%)	Supervisores de Transporte Aéreo de Carga, 88 (13%)

Actividades del conocimiento

Code

var_count(data = south_cone_df,
                             category = "area")%>%
  ggplot(aes(x=reorder(area,
                       group_share),
             y=group_share))+
  geom_col(fill="gray50")+
  geom_text(aes(label=paste0(round(group_vacancies)," (",round(group_share,2)*100,"%)")), 
            size=3,nudge_y = 0.02)+
  # coord_flip()+
  scale_y_continuous(labels=scales::percent_format())+
  labs(x=NULL,
       y="Porcentaje de Vacantes")

Code

# c("ARG","CHL","URY"),

country_var_count(data = south_cone_df,
                  country = "country_code",
                             category = "area")%>%
  filter(country_code=="ARG")%>%
  ggplot(aes(x=reorder(area,
                       group_share),
             y=group_share))+
  geom_col(aes(fill=country_code))+
  geom_text(aes(label=paste0(round(count)," (",round(group_in_country_share,2)*100,"%)")), 
            size=3,nudge_y = 0.02)+
  scale_fill_manual(values=country_colors, guide="none")+
  scale_y_continuous(labels=scales::percent_format())+
  labs(x=NULL,
       y="Porcentaje de Vacantes")

Code

country_var_count(data = south_cone_df,
                  country = "country_code",
                             category = "area")%>%
  filter(country_code=="CHL")%>%
  ggplot(aes(x=reorder(area,
                       group_share),
             y=group_share))+
  geom_col(aes(fill=country_code))+
  geom_text(aes(label=paste0(round(count)," (",round(group_in_country_share,2)*100,"%)")), 
            size=3,nudge_y = 0.02)+
  scale_fill_manual(values=country_colors, guide="none")+
  scale_y_continuous(labels=scales::percent_format())+
  labs(x=NULL,
       y="Porcentaje de Vacantes")

Code

country_var_count(data = south_cone_df,
                  country = "country_code",
                             category = "area")%>%
  filter(country_code=="URY")%>%
  ggplot(aes(x=reorder(area,
                       group_share),
             y=group_share))+
  geom_col(aes(fill=country_code))+
  geom_text(aes(label=paste0(round(count)," (",round(group_in_country_share,2)*100,"%)")), 
            size=3,nudge_y = 0.02)+
  scale_fill_manual(values=country_colors, guide="none")+
  scale_y_continuous(labels=scales::percent_format())+
  labs(x=NULL,
       y="Porcentaje de Vacantes")

PIB y Empleo por Sector

PIB

Code

library(readxl)
chile_raw <- read_excel("raw/chl_macro/Cuadro_18122023133746.xlsx", 
    sheet = "Cuadro", skip = 2)%>%
  janitor::clean_names()

# CEPAL: Producto interno bruto (PIB) anual por actividad económica a precios corrientes en dólares (Millones de dólares)
# https://statistics.cepal.org/portal/cepalstat/dashboard.html?theme=2&lang=es
# CEPAL / Comisión Económica para América Latina y el Caribe / Estimaciones basadas en fuentes oficiales

cepal_raw <- read_excel("raw/cepal/data_1703074551.xlsx")%>%
  janitor::clean_names()%>%
  mutate(country_code=case_when(pais_estandar=="Argentina"~"ARG",
                                pais_estandar=="Chile"~"CHL",
                                pais_estandar=="Uruguay"~"URY"))%>%
  left_join(sector_rubro%>%
              distinct(rubro,rubro_resumen),
            c("rubro_sector_cuentas_nacionales_anuales"="rubro"))

print("Uruguay tiene un sector menos: explotacion de minas y canteras (incluye extraccion de petroleo crudo y gas natural")

[1] "Uruguay tiene un sector menos: explotacion de minas y canteras (incluye extraccion de petroleo crudo y gas natural"

Code

cepal_raw %>%
  distinct(pais_estandar,rubro_resumen)%>%
  group_by(pais_estandar) %>%
  count()

# A tibble: 3 × 2
# Groups:   pais_estandar [3]
  pais_estandar     n
  <chr>         <int>
1 Argentina        10
2 Chile            10
3 Uruguay           9

Code

cepal_pib<-cepal_raw %>%
  filter(rubro_sector_cuentas_nacionales_anuales!="Producto interno bruto (PIB)") %>%
  group_by(country_code)%>%
  mutate(share=value/sum(value))

Code

cepal_pib%>%
  filter(country_code=="ARG")%>%
  ggplot(aes(x=reorder(str_wrap(rubro_resumen,30),share),
             y=share))+
  geom_col(aes(fill=country_code))+
  coord_flip()+
  geom_text(aes(label=case_when(share <1~
                                paste0(round(share,
                                                    2)*100,"%"),
                              TRUE~paste0(scales::number(round(value/1000 ),
                                                          prefix = "USD",
                                                         big.mark = ".") ,
                                          " MM (",
                                    round(share,2)*100,"%)")),
              y=share/2),
          color="white",
          fontface="bold",
          size=3)+
  scale_fill_manual(values=country_colors, guide="none")+
  scale_y_continuous(labels=scales::percent_format())+
  labs(x=NULL,
       y="Porcentaje de PIB")

Code

cepal_pib%>%
  filter(country_code=="CHL")%>%
  ggplot(aes(x=reorder(str_wrap(rubro_resumen,30),share),
             y=share))+
  geom_col(aes(fill=country_code))+
  coord_flip()+
  geom_text(aes(label=case_when(share <1~
                                paste0(round(share,
                                                    2)*100,"%"),
                              TRUE~paste0(scales::number(round(value/1000 ),
                                                          prefix = "USD",
                                                         big.mark = ".") ,
                                          " MM (",
                                    round(share,2)*100,"%)")),
              y=share/2),
          color="white",
          fontface="bold",
          size=3)+
  scale_fill_manual(values=country_colors, guide="none")+
  scale_y_continuous(labels=scales::percent_format())+
  labs(x=NULL,
       y="Porcentaje de PIB")

Code

cepal_pib%>%
  filter(country_code=="URY")%>%
  ggplot(aes(x=reorder(str_wrap(rubro_resumen,30),share),
             y=share))+
  geom_col(aes(fill=country_code))+
  coord_flip()+
  geom_text(aes(label=case_when(share <1~
                                paste0(round(share,
                                                    2)*100,"%"),
                              TRUE~paste0(scales::number(round(value/1000 ),
                                                          prefix = "USD",
                                                         big.mark = ".") ,
                                          " MM (",
                                    round(share,2)*100,"%)")),
              y=share/2),
          color="white",
          fontface="bold",
          size=3)+
  scale_fill_manual(values=country_colors, guide="none")+
  scale_y_continuous(labels=scales::percent_format())+
  labs(x=NULL,
       y="Porcentaje de PIB")

EMPLEO

Code

rubro_country_emp<-sector_country_emp%>%
  left_join(sector_names, by=c("main_sector"="sector"))%>%
  left_join(sector_rubro, by=c("sector_es"="sector"))%>%
  mutate(rubro_resumen=ifelse(is.na(rubro_resumen),"Sin Clasfificar",
                              rubro_resumen))%>%
  group_by(country_code, rubro_resumen)%>%
  summarise(employment=sum(employment),
            group_vacancies=sum(group_vacancies,na.rm = T))%>%
  group_by(country_code)%>%
  mutate(employment_share=employment/sum(employment),
         group_share=group_vacancies/sum(group_vacancies))


rubro_country_emp%>%
    filter(country_code=="ARG")%>%
  ggplot(aes(x=reorder(str_wrap(rubro_resumen,30),employment_share),
             y=employment_share))+
  geom_col(aes(fill=country_code))+
  coord_flip()+
  geom_text(aes(label=case_when(employment_share <1~
                                paste0(round(employment_share,
                                                    2)*100,"%"),
                              TRUE~paste0(round(employment ) ,
                                          " (",
                                    round(employment_share,2)*100,"%)")),
              y=employment_share/2),
          color="white",
          fontface="bold",
          size=3)+
  scale_fill_manual(values=country_colors, guide="none")+
  scale_y_continuous(labels=scales::percent_format())+
  labs(x=NULL,
       y="Porcentaje de empleo")

Code

rubro_country_emp%>%
    filter(country_code=="CHL")%>%
  ggplot(aes(x=reorder(str_wrap(rubro_resumen,30),employment_share),
             y=employment_share))+
  geom_col(aes(fill=country_code))+
  coord_flip()+
  geom_text(aes(label=case_when(employment_share <1~
                                paste0(round(employment_share,
                                                    2)*100,"%"),
                              TRUE~paste0(round(employment ) ,
                                          " (",
                                    round(employment_share,2)*100,"%)")),
              y=employment_share/2),
          color="white",
          fontface="bold",
          size=3)+
  scale_fill_manual(values=country_colors, guide="none")+
  scale_y_continuous(labels=scales::percent_format())+
  labs(x=NULL,
       y="Porcentaje de empleo")

Code

rubro_country_emp%>%
    filter(country_code=="URY")%>%
  ggplot(aes(x=reorder(str_wrap(rubro_resumen,30),employment_share),
             y=employment_share))+
  geom_col(aes(fill=country_code))+
  coord_flip()+
  geom_text(aes(label=case_when(employment_share <1~
                                paste0(round(employment_share,
                                                    2)*100,"%"),
                              TRUE~paste0(round(employment ) ,
                                          " (",
                                    round(employment_share,2)*100,"%)")),
              y=employment_share/2),
          color="white",
          fontface="bold",
          size=3)+
  scale_fill_manual(values=country_colors, guide="none")+
  scale_y_continuous(labels=scales::percent_format())+
  labs(x=NULL,
       y="Porcentaje de empleo")

PIB y Empleo

Code

rubro_country_emp%>%
  ungroup()%>%
  left_join(cepal_pib,by=c("country_code","rubro_resumen"))%>%
  filter(country_code=="ARG")%>%
  select(rubro_resumen,employment_share,share)%>%
  arrange(desc(share))%>%
  gt::gt()%>%
  gt::fmt_percent(columns = c("share","employment_share"))%>%
  gt::cols_label( .list = list(
    "rubro_resumen"="Rubro",
    "employment_share"="Porcentaje de Empleo",
    "share"="Porcentaje de PIB"
  ))

Rubro	Porcentaje de Empleo	Porcentaje de PIB
Comercio, Reparación y Hostelería	17.12%	21.75%
Servicios Públicos, Bienestar Social y Apoyo Comunitario	37.78%	21.63%
Manufactura	9.44%	19.14%
Servicios Empresariales, Financieros e Inmobiliarios	16.56%	14.38%
Actividades Agropecuarias	2.41%	7.92%
Transporte, Almacenamiento y Comunicaciones	3.00%	5.51%
Construcción	8.59%	4.44%
Hidrocarburos y Mineria	0.46%	4.25%
Suministro de Electricidad, Gas y Agua	1.55%	0.98%
Sin Clasfificar	3.10%	NA

Code

rubro_country_emp%>%
  left_join(cepal_pib,by=c("country_code","rubro_resumen"))%>%
  filter(country_code=="ARG")%>%
  ggplot(aes(x=employment_share,
             y=share))+
  ggrepel::geom_text_repel(aes(label=rubro_resumen),size=3)+
  geom_point(aes(fill=country_code),shape=21,size=3)+
  scale_fill_manual(values = country_colors, guide="none")+
  scale_y_continuous(labels=scales::percent_format())+
  scale_x_continuous(labels=scales::percent_format())+
  labs(y="Porcentaje de PIB (2022)",
       x="Porcentaje de Empleo (2022)")

Code

rubro_country_emp%>%
  ungroup()%>%
  left_join(cepal_pib,by=c("country_code","rubro_resumen"))%>%
  filter(country_code=="CHL")%>%
  select(rubro_resumen,employment_share,share)%>%
  arrange(desc(share))%>%
  gt::gt()%>%
  gt::fmt_percent(columns = c("share","employment_share"))%>%
  gt::cols_label( .list = list(
    "rubro_resumen"="Rubro",
    "employment_share"="Porcentaje de Empleo",
    "share"="Porcentaje de PIB"
  ))

Rubro	Porcentaje de Empleo	Porcentaje de PIB
Servicios Empresariales, Financieros e Inmobiliarios	14.19%	22.44%
Servicios Públicos, Bienestar Social y Apoyo Comunitario	33.49%	18.27%
Hidrocarburos y Mineria	2.71%	15.86%
Comercio, Reparación y Hostelería	17.99%	11.85%
Manufactura	6.38%	10.85%
Transporte, Almacenamiento y Comunicaciones	3.24%	7.88%
Construcción	6.34%	6.55%
Actividades Agropecuarias	7.26%	3.95%
Suministro de Electricidad, Gas y Agua	1.85%	2.36%
Sin Clasfificar	6.56%	NA

Code

rubro_country_emp%>%
  left_join(cepal_pib,by=c("country_code","rubro_resumen"))%>%
  filter(country_code=="CHL")%>%
  ggplot(aes(x=employment_share,
             y=share))+
  ggrepel::geom_text_repel(aes(label=rubro_resumen),size=3)+
  geom_point(aes(fill=country_code),shape=21,size=3)+
  scale_fill_manual(values = country_colors, guide="none")+
  scale_y_continuous(labels=scales::percent_format())+
  scale_x_continuous(labels=scales::percent_format())+
  labs(y="Porcentaje de PIB (2022)",
       x="Porcentaje de Empleo (2022)")

Code

rubro_country_emp%>%
  ungroup()%>%
  left_join(cepal_pib,by=c("country_code","rubro_resumen"))%>%
  filter(country_code=="URY")%>%
  select(rubro_resumen,employment_share,share)%>%
  arrange(desc(share))%>%
  gt::gt()%>%
  gt::fmt_percent(columns = c("share","employment_share"))%>%
  gt::cols_label( .list = list(
    "rubro_resumen"="Rubro",
    "employment_share"="Porcentaje de Empleo",
    "share"="Porcentaje de PIB"
  ))

Rubro	Porcentaje de Empleo	Porcentaje de PIB
Servicios Públicos, Bienestar Social y Apoyo Comunitario	36.66%	32.60%
Comercio, Reparación y Hostelería	16.58%	16.37%
Servicios Empresariales, Financieros e Inmobiliarios	16.02%	13.59%
Manufactura	8.24%	11.77%
Transporte, Almacenamiento y Comunicaciones	3.41%	9.20%
Actividades Agropecuarias	9.75%	8.46%
Construcción	5.53%	5.41%
Suministro de Electricidad, Gas y Agua	1.78%	2.60%
Hidrocarburos y Mineria	0.64%	NA
Sin Clasfificar	1.39%	NA

Code

rubro_country_emp%>%
  left_join(cepal_pib,by=c("country_code","rubro_resumen"))%>%
  filter(country_code=="URY")%>%
  ggplot(aes(x=employment_share,
             y=share))+
  ggrepel::geom_text_repel(aes(label=rubro_resumen),size=3)+
  geom_point(aes(fill=country_code),shape=21,size=3)+
  scale_fill_manual(values = country_colors, guide="none")+
  scale_y_continuous(labels=scales::percent_format())+
  scale_x_continuous(labels=scales::percent_format())+
  labs(y="Porcentaje de PIB (2022)",
       x="Porcentaje de Empleo (2022)")

Discussing occupational codes in online job postings data [DONE]

The occupation classification system is O*NET SOC 19.
O*NET SOC 19 is compatible with SOC 18.
SOC 18 allows us to classify jobs into occupational major and minor groups, as well as to use wage estimates of the US to categorize them into high, medium, and low wage occupations.
More importantly, SOC 18 groups are compatible with SOC 10 groups, and SOC 10 groups are compatible with IDB occupational groups
There are 9 occupations without the proper occupational title in English. That must be due to an error in the ETL process. Will notify Eric.
The Uruguay file doesn’t have the Dynamic Flexibility sub ability.
There are vacancies with null sector weights (don’t belong to any sector.) Will notify Eric.

**How we made sure O*NET SOC 19 was used to name the occupations?**

We load the list of occupational titles in Argentina, Urugay and Chile vacancies’ samples and compare it with the official O*NET SOC 19 catalog.

We find a perfect match.

Code

# load three files and keep occupations data. store in csv for later
idb_occupations<-rbind(
   read_parquet("raw/arg_new_dict.parquet")|>
   distinct(occupation,onet_job),
   read_parquet("raw/chl_new_dict.parquet")|>
   distinct(occupation,onet_job)   ,
   read_parquet("raw/ury_new_dict.parquet")|>
    distinct(occupation,onet_job)
)|>
distinct()
write_csv(idb_occupations,"data/idb_occupations_in_data.csv")

Code

# load onet catelog
onet_19<-read_csv("raw/catalogs_and_crosswalks/onet_2019_occupations.csv")
# load the afore created list of occupations in vacancy data
idb_occupations<-read_csv("data/idb_occupations_in_data.csv")

Code

# isolate english titles (the official ones)
ibd_occupations_en<-idb_occupations%>%
    distinct(occupation)

# count how many occupations are in the vacancies data
ar_occs<-nrow(ibd_occupations_en)

# check if they match the onetsoc19 crosswalk
occs_in_onet_19<-ibd_occupations_en|>
    select(occupation)|>
    inner_join(onet_19, by=c("occupation"="onet_soc_title_19"))|>
    nrow()

print(paste("There are ",ar_occs,"occupations in the arg, ury and chl data"))

[1] "There are  758 occupations in the arg, ury and chl data"

Code

print(paste(occs_in_onet_19,"of these occupations were found in SOC O*NET 19 catalog"))

[1] "757 of these occupations were found in SOC O*NET 19 catalog"

Code

ibd_occupations_en|>
    select(occupation)|>
    anti_join(onet_19, by=c("occupation"="onet_soc_title_19"))|>
    select(occupation)

# A tibble: 1 × 1
  occupation
  <chr>     
1 <NA>

Some examples of the ONET SOC 19 codes found are

Code

ibd_occupations_en|>
    select(occupation)|>
    inner_join(onet_19, by=c("occupation"="onet_soc_title_19"))|>
    head(15)

# A tibble: 15 × 3
   occupation                                  onet_soc_code_19 onet_soc_desc_19
   <chr>                                       <chr>            <chr>           
 1 Labor Relations Specialists                 13-1075.00       Resolve dispute…
 2 Foreign Language and Literature Teachers, … 25-1124.00       Teach languages…
 3 Construction and Building Inspectors        47-4011.00       Inspect structu…
 4 Accountants and Auditors                    13-2011.00       Examine, analyz…
 5 Retail Salespersons                         41-2031.00       Sell merchandis…
 6 Cooks, Restaurant                           35-2014.00       Prepare, season…
 7 Cashiers                                    41-2011.00       Receive and dis…
 8 Chefs and Head Cooks                        35-1011.00       Direct and may …
 9 Executive Secretaries and Executive Admini… 43-6011.00       Provide high-le…
10 Industrial Engineering Technologists and T… 17-3026.00       Apply engineeri…
11 First-Line Supervisors of Production and O… 51-1011.00       Directly superv…
12 Waiters and Waitresses                      35-3031.00       Take orders and…
13 Potters, Manufacturing                      51-9195.05       Operate product…
14 Aircraft Mechanics and Service Technicians  49-3011.00       Diagnose, adjus…
15 Multiple Machine Tool Setters, Operators, … 51-4081.00       Set up, operate…

Every occupation in spanish should have its’ english counterpart. Some doesn’t

Code

# There are occupation titles in spanish (onet_job) with no occupation title in english (occupation)
print('There are occupation titles in spanish (onet_job) with no occupation title in english (occupation)')

[1] "There are occupation titles in spanish (onet_job) with no occupation title in english (occupation)"

Code

idb_occupations%>%
    filter(is.na(occupation))  %>%
    head(30)

# A tibble: 27 × 2
   occupation onet_job                                                      
   <chr>      <chr>                                                         
 1 <NA>       Conductores de Vehículos de Servicios de Transporte y Choferes
 2 <NA>       Analistas Financieros y de Inversiones                        
 3 <NA>       Diseñadores de Programas Software                             
 4 <NA>       Auxiliares Docentes de Educación Especial                     
 5 <NA>       Técnicos de Emergencias Médicas                               
 6 <NA>       Científico de Datos                                           
 7 <NA>       Maestros de Educación Especial de Jardín de Infantes          
 8 <NA>       Gerentes de Instalaciones                                     
 9 <NA>       Analistas Forenses Digitales                                  
10 <NA>       Administradores de Seguridad                                  
# ℹ 17 more rows

We are able to map these O*NET SOC 19 to SOC 18 detailed, and major occupations

Code

# read onet_soc19 to soc18 crosswalk
onetsoc19_soc18_crosswalk<-read_csv("raw/catalogs_and_crosswalks/onet_2019_to_soc_18_crosswalk.csv")|>
    janitor::clean_names()

# attach soc18 broad, minor groups data
soc18_groups<-read_csv("raw/catalogs_and_crosswalks/soc_structure_2018_clean.csv")|>
    janitor::clean_names()%>%
    rename( x2018_soc_code=1 ,x2018_soc_title=2)

# there is an almost perfect match ( no duplicates, only one missing)
onetsoc19_soc18_full_crosswalk<-onetsoc19_soc18_crosswalk %>%
    inner_join(soc18_groups)

# this is the result
head(onetsoc19_soc18_full_crosswalk)

# A tibble: 6 × 10
  o_net_soc_2019_code o_net_soc_2019_title        x2018_soc_code x2018_soc_title
  <chr>               <chr>                       <chr>          <chr>          
1 11-1011.00          Chief Executives            11-1011        Chief Executiv…
2 11-1011.03          Chief Sustainability Offic… 11-1011        Chief Executiv…
3 11-1021.00          General and Operations Man… 11-1021        General and Op…
4 11-1031.00          Legislators                 11-1031        Legislators    
5 11-2011.00          Advertising and Promotions… 11-2011        Advertising an…
6 11-2021.00          Marketing Managers          11-2021        Marketing Mana…
# ℹ 6 more variables: broad_group <chr>, broad_group_title <chr>,
#   minor_group <chr>, minor_group_title <chr>, major_group <chr>,
#   major_group_title <chr>

Code

# this is the occupation without soc 18 information
onetsoc19_soc18_crosswalk %>%
    anti_join(soc18_groups)

# A tibble: 2 × 4
  o_net_soc_2019_code o_net_soc_2019_title        x2018_soc_code x2018_soc_title
  <chr>               <chr>                       <chr>          <chr>          
1 33-3051.00          Police and Sheriff's Patro… 33-3051        Police and She…
2 33-3051.04          Customs and Border Protect… 33-3051        Police and She…

Understanding Uruguay demand by occupation

There is demand for personal service, but not so much for healthcare highly technical services. However, sample size is so small one needs to be cautions when drawing conclusions about these sectors. Specially for Uruguay, it’s best to focus on larger sample occupational groups like “Sales and Related”, “Office and Administrative Support” etc.

Code

contratictory_major_soc<- c("Personal Care and Service Occupations","Healthcare Support Occupations",
                                  "Healthcare Practitioners and Technical Occupations")

south_cone_df %>% 
  filter(country_code=="URY") %>% 
  filter(major_group_title %in% contratictory_major_soc) %>% 
  group_by(major_group_title, occupation) %>% 
  summarise(postings=n()) %>% 
  group_by(major_group_title) %>%
  top_n(5,postings) %>% 
  left_join(south_cone_df %>% 
            filter(country_code=="URY") %>% 
            group_by(occupation) %>% 
            summarize(mean_zones=mean(zones))) %>% 
  ungroup() %>% 
  head(15) %>% 
  gt()

major_group_title	occupation	postings	mean_zones
Healthcare Practitioners and Technical Occupations	Acupuncturists	3	5
Healthcare Practitioners and Technical Occupations	Dentists, General	3	5
Healthcare Practitioners and Technical Occupations	Ophthalmologists, Except Pediatric	6	5
Healthcare Practitioners and Technical Occupations	Orthodontists	3	5
Healthcare Practitioners and Technical Occupations	Pharmacists	5	5
Healthcare Practitioners and Technical Occupations	Registered Nurses	3	4
Healthcare Support Occupations	Home Health Aides	1	2
Healthcare Support Occupations	Nursing Assistants	4	3
Healthcare Support Occupations	Personal Care Aides	7	2
Healthcare Support Occupations	Pharmacy Aides	2	2
Personal Care and Service Occupations	Childcare Workers	6	2
Personal Care and Service Occupations	Costume Attendants	3	2
Personal Care and Service Occupations	First-Line Supervisors of Personal Service Workers	15	3
Personal Care and Service Occupations	Manicurists and Pedicurists	3	2
Personal Care and Service Occupations	Nannies	13	2

Google jobs abilities compared to O*NET’s

The prevalence of subabilities in online job vacancies can by grouped by occupation and contrasted with the level and importance scores O*NET Analysts assigned to each subability in each occupation profile.

If we find that online vacancies in the South Cone require different skills than what O*NET experts said is important to perform at a job we’ll have an interesting discussion about what the Vacancies Minning algorithm is doing and how different are the same occupations across different countries.

Code

# how important is that they have acceptable proficiency in this ability
abilties_importance<-read_delim("raw/ONET_28_0/Abilities.txt") %>% 
  janitor::clean_names() %>% 
  filter(scale_id=="IM")

# How good people must be at this ability
abilties_level<-read_delim("raw/ONET_28_0/Abilities.txt") %>% 
  janitor::clean_names() %>% 
  filter(scale_id=="LV")


# How many abilities in our vacancies data are in ONET
table(str_replace_all(subabilities,"_"," ") %in%
        unique(abilties_importance$element_name))

# How many onet abilities are in our vacancies data?
table(unique(abilties_importance$element_name) %in%
        str_replace_all(subabilities,"_"," "))

Code

# I average subabilities weights in job vacancies data by occupation (ONETsoc19)
onet_soc_19_df<-south_cone_df %>% 
  select(doc_id, occupation,o_net_soc_2019_code, subabilities) %>% 
  group_by(o_net_soc_2019_code) %>% 
  summarise(across(subabilities, mean)) %>% 
  # traspose the data
  pivot_longer(cols = subabilities,
               names_to = "element_name",
               values_to = "idb_value") %>% 
  # remove _ in element_name
  mutate(element_name=str_replace_all(element_name,"_"," ")) %>% 
  # I join the importance and level scores in onet
  left_join(abilties_importance %>% 
              select(o_net_soc_code,element_name,
                     importance_value=data_value,
                     importance_sd=standard_error,
                     importance_n=n),
            by= c("o_net_soc_2019_code"="o_net_soc_code",
                                       "element_name"="element_name")) %>% 
  left_join(abilties_level %>% 
              select(o_net_soc_code,element_name,
                     level_value=data_value,
                     level_sd=standard_error,
                     level_n=n),
            by= c("o_net_soc_2019_code"="o_net_soc_code",
                                       "element_name"="element_name")) 
  
library(GGally)
onet_soc_19_df %>% 
  select(ends_with("value")) %>% 
  ggpairs()+
  labs(title = "There is a strong correlation between IDB abilities scores and O*NET importance and level indicators",
       subtitle = "Each dot is an occupation-ability combination")

Discussing the sector (rama) information available in online job postings data (DONE)

The names of the 20 presented ramas coincide with NAICS 2-digits classifications. Most LAC data sources show employment estimates by industry in ISIC or ISIC-related codes.

Interestingly, the sector or “Rama” is across multiple columns and each doc_id can have multiple values. There isn’t a categorical classification of the rama each firm belongs to, but rather a continuous one, where there are wegiths representing the chances a firm belongs to each sector.

They don’t assing a category, but rather a 20 positions vector that gives probabilities from 0 to 1 to each vacancy.

I found a couple of puzzling things in the data.:

There are doc_ids with no prediction. They don’t belong to any sector.
There are (ties) doc_ids with the same positive prediciton. This turns makes any attempt to assign only one sector to each posting a little polemic.

Code

library(arrow)
library(dplyr)
library(tidyr)

df_ar<-read_parquet("raw/arg_new_dict.parquet")

sectors<-c("accommodation_and_food_services", "administrative_and_support_services",
"agriculture_forestry_fishing_and_hunting",
"arts_entertainment_and_recreation",
 "construction", "educational_services","finance_and_insurance",
 "government" , "health_care_and_social_assistance",
 "information", "management_of_companies_and_enterprises",
 "manufacturing", "mining_quarrying_and_oil_and_gas_extraction",
 "other_services_except_public_administration",
 "professional_scientific_and_technical_services",
 "real_estate_and_rental_and_leasing", 
 "transportation_and_warehousing",
 "utilities", "wholesale_trade","retail_trade")

Code

test1<-df_ar |>  
    select(doc_id,sectors)|>
    pivot_longer(cols=2:21,
             names_to="sector",
             values_to="value")|>
    # Assume zero means not in this sector.
    # filter(value!=0)        
    mutate(is_zero= ifelse(value==0,"Is zero","Not zero"))%>%
    group_by(doc_id,is_zero) %>%
    count()%>%
    ungroup()

# lo guardo como referencia
write_csv(test1,"data/pregunta_ramas_per_doc_id_arg.csv")


# The representative document has a value different than 0 in 2.24 industries  
test1%>%
    group_by(is_zero)%>%
    summarise(mean_casos=mean(n))

# A tibble: 2 × 2
  is_zero  mean_casos
  <chr>         <dbl>
1 Is zero       17.8 
2 Not zero       2.24

Code

# If I keep the max value for each document I see that 
max_total_rama<-df_ar |>  
    select(doc_id,sectors)|>
    pivot_longer(cols=2:21,
             names_to="sector",
             values_to="value")|>
    group_by(doc_id)|>
    mutate(total=sum(value))%>%
    filter(value==max(value))%>%
    mutate(n=n())%>%
    ungroup()

head(max_total_rama)

# A tibble: 6 × 5
  doc_id                   sector                              value total     n
  <chr>                    <chr>                               <dbl> <dbl> <int>
1 IcBBOjuXZT8BiD2XAAAAAA== other_services_except_public_admin…    79    79     1
2 ct1JrwQ54xivPQpxAAAAAA== educational_services                   99    99     1
3 bo7X77th8vwAAAAAAAAAAA   government                             43    78     1
4 72DNO-KNkrEAAAAAAAAAAA   government                             43    78     1
5 kqPOGYOTJTYAAAAAAAAAAA   professional_scientific_and_techni…    31    31     1
6 If6uaYoqh6CHO4dpAAAAAA== other_services_except_public_admin…    79    79     1

Code

### Facts:
print(paste("Total documents:", nrow(df_ar)))

[1] "Total documents: 23435"

Code

print(paste("Documents with a prediction",max_total_rama%>%
    filter(value>0)%>%
    distinct(doc_id)%>%
    nrow()))

[1] "Documents with a prediction 23069"

Code

print(paste("Documents without a prediction",max_total_rama%>%
    filter(value==0)%>%
    distinct(doc_id)%>%
    nrow()))

[1] "Documents without a prediction 366"

Code

print(paste("Documents with more than 1 prediction",
            "(excluding those with no prediction)",
    max_total_rama%>%
    filter(value>0)%>%
    filter(n>1)%>%
    nrow()))

[1] "Documents with more than 1 prediction (excluding those with no prediction) 2260"

Code

# examples of these cases:
max_total_rama%>%
    filter(value>0)%>%
    filter(n>1)

# A tibble: 2,260 × 5
   doc_id                   sector                             value total     n
   <chr>                    <chr>                              <dbl> <dbl> <int>
 1 vubaCpc3QxIAAAAAAAAAAA   other_services_except_public_admi…    21    66     2
 2 vubaCpc3QxIAAAAAAAAAAA   professional_scientific_and_techn…    21    66     2
 3 FAVNbJ4eY_wLOcg4AAAAAA== professional_scientific_and_techn…    12    24     2
 4 FAVNbJ4eY_wLOcg4AAAAAA== retail_trade                          12    24     2
 5 EzJVBKu6nSupL4lkAAAAAA== professional_scientific_and_techn…    12    24     2
 6 EzJVBKu6nSupL4lkAAAAAA== retail_trade                          12    24     2
 7 Af-RP22V5xoAAAAAAAAAAA   professional_scientific_and_techn…    12    24     2
 8 Af-RP22V5xoAAAAAAAAAAA   retail_trade                          12    24     2
 9 SXIxuzy1_lUAAAAAAAAAAA   professional_scientific_and_techn…    12    24     2
10 SXIxuzy1_lUAAAAAAAAAAA   retail_trade                          12    24     2
# ℹ 2,250 more rows

Code

print(paste("Documents with 100% certainty on 1 prediction",
    max_total_rama%>%
    filter(total==100)%>%
    nrow()))

[1] "Documents with 100% certainty on 1 prediction 167"