Data exploration report

A whiteboard to understand the data, test ideas, and shape our deliverables

Author

Carlos Daboin Contreras

Code
# libraries
library(arrow)
library(readr)
library(dplyr)
library(tidyr)
library(ggplot2)
library(ggrepel)
library(stringr)

# source file with recurrent functions
source("etl_and_viz_functions.R")

# Set colors for plots
Grey<-"#9c938e"     #18.7%

Yellow<-"#fdb517"   #8.9 %
Blue<-"#005476"     #7.9 %
Blue_2<-"#0386b5"
Yellow_2<-"#fcecb3" #1.4 %
Yellow_3<-"#f1c259" #1.4 %
Blue_3<-"#b0cbd2"   #1.2 %
Blue_4<-"#45788c"   #1.2 %

#country colors
country_colors<-c("ARG"=Blue_2,"CHL"=Blue,"URY"=Yellow)

binary_colors<-c(Blue_4,Yellow_3)

sector_vars<-c('accommodation_and_food_services',
 'administrative_and_support_services', 'agriculture_forestry_fishing_and_hunting','arts_entertainment_and_recreation', 'construction', 'educational_services','finance_and_insurance',
 'government' , 'health_care_and_social_assistance', 'information', 'management_of_companies_and_enterprises', 'manufacturing',
 'mining_quarrying_and_oil_and_gas_extraction', 'other_services_except_public_administration',
 'professional_scientific_and_technical_services','real_estate_and_rental_and_leasing', 
 'transportation_and_warehousing', 'utilities','wholesale_trade', 'retail_trade')
work_vars<-c('occupation','onet_job','schedule', 'zones', 'remote','green_job','area')
origin_vars<-c('doc_id','date_posted','country_code','firm','source','rm','city','city_name')
ability_vars<-c('Cognitive Abilities','Sensory Abilities','Physical Abilities','Psychomotor Abilities')
subability_vars<-c('Arm-Hand_Steadiness', 'Auditory_Attention', 'Category_Flexibility',
 'Control_Precision', 'Deductive_Reasoning', 'Depth_Perception', 
 #'Dynamic_Flexibility',
  'Dynamic_Strength', 'Explosive_Strength', 'Extent_Flexibility', 'Far_Vision', 'Finger_Dexterity', 'Flexibility_of_Closure', 'Fluency_of_Ideas', 'Gross_Body_Coordination',
 'Gross_Body_Equilibrium', 'Hearing_Sensitivity','Inductive_Reasoning', 'Information_Ordering',
 'Manual_Dexterity', 'Mathematical_Reasoning','Memorization', 'Multilimb_Coordination',
 'Near_Vision', 'Night_Vision','Number_Facility',
 'Oral_Comprehension', 'Oral_Expression','Originality', 'Perceptual_Speed',
 'Peripheral_Vision',  'Problem_Sensitivity' ,'Rate_Control', 'Reaction_Time',
 'Response_Orientation', 'Selective_Attention', 
 'Sound_Localization', 'Spatial_Orientation','Speech_Clarity','Speech_Recognition',
 'Speed_of_Closure', 'Speed_of_Limb_Movement','Stamina', 'Static_Strength',
 'Time_Sharing', 'Trunk_Strength','Visual_Color_Discrimination',
 'Visualization','Wrist-Finger_Speed','Written_Comprehension', 'Written_Expression')

sectors_focus<-c("Professional Scientific And Technical Services",
"Health Care And Social Assistance",
"Transportation And Warehousing",
"Construction",
"Information",
"Agriculture Forestry Fishing And Hunting",
"Mining Quarrying And Oil And Gas Extraction")

south_cone_df<-rbind(
    read_parquet("raw/arg_new_dict.parquet")|>
select(origin_vars,work_vars,sector_vars,ability_vars,subability_vars),
    read_parquet("raw/chl_new_dict.parquet")|>
select(origin_vars,work_vars,sector_vars,ability_vars,subability_vars),
    read_parquet("raw/ury_new_dict.parquet")|>
select(origin_vars,work_vars,sector_vars,ability_vars,subability_vars)
)
south_cone_df %>%
  summarise_at(sector_vars, ~mean(.,na.rm = T))%>%
  pivot_longer(cols=sector_vars)%>% 
  mutate(total=sum(value))


south_cone_df %>%
  summarise_at(ability_vars, ~mean(.,na.rm = T))%>%
  pivot_longer(cols=ability_vars)%>% 
  mutate(total=sum(value))


south_cone_df %>%
  summarise_at(subability_vars, ~mean(.,na.rm = T))%>%
  pivot_longer(cols=subability_vars)%>% 
  mutate(total=sum(value))


total_sector<-south_cone_df %>%
            select(doc_id,sector_vars)%>%
            pivot_longer(cols=sector_vars)%>% 
            group_by(doc_id)%>%
            summarise(total_sector=sum(value,na.rm = TRUE))%>%
  mutate(total_sector=ifelse(is.nan(total_sector),1,total_sector))
total_ability<-south_cone_df %>%
            select(doc_id,ability_vars)%>%
            pivot_longer(cols=ability_vars)%>% 
            group_by(doc_id)%>%
            summarise(total_ability=sum(value,na.rm = TRUE))%>%
  mutate(total_ability=ifelse(is.nan(total_ability),1,total_ability))
total_subability<-south_cone_df %>%
            select(doc_id,subability_vars)%>%
            pivot_longer(cols=subability_vars)%>% 
            group_by(doc_id)%>%
            summarise(total_subability=sum(value,na.rm = TRUE))%>%
  mutate(total_subability=ifelse(is.nan(total_subability),1,total_subability))



# Modificacion de datos inicial
south_cone_df<-south_cone_df%>% 
  ## Modificacion de variables binarias
  mutate(area_bin=ifelse(area=="Conocimiento",TRUE,FALSE),
         green_job_bin=ifelse(is.na(green_job),FALSE,TRUE)) %>% 
  ## Modificacion de variables categoricas
  mutate(zones_label=case_when(zones==1 ~ '(1) Poca o ninguna preparación',
             zones==2 ~ '(2) Algo de preparación',
             zones==3 ~ '(3) Preparación media',
             zones==4 ~ '(4) Preparación considerable', 
             zones==5 ~ '(5) Mucha o extensa preparación')) %>% 
  ## Variable alternativa de sector: Sector de mas peso o Main Sector
  left_join(south_cone_df %>% 
              select(doc_id,sector_vars) %>% 
              pivot_longer(cols = sector_vars,
                           names_to = "sector",
                           values_to = "wt") %>% 
              # keep max chances sector
              group_by(doc_id) %>% 
              filter(wt==max(wt)) %>% 
              # remove duplicates in case there is a tie. Keep the first coming up
              distinct(doc_id,.keep_all = T) %>% 
              ungroup() %>% 
              mutate(sector=str_replace_all(sector,"_"," "),
                     sector=str_to_title(sector)) %>% 
              rename(main_sector=sector,
                     main_sector_wt=wt), by="doc_id")%>%
  ## Normalizar sectores
  left_join(total_sector, by="doc_id")%>%
  mutate(across(sector_vars,~./total_sector))%>% 
  ## Normalizar habilidades
  left_join(total_ability, by="doc_id")%>%
  mutate(across(ability_vars,~./total_ability))%>% 
  ## Normalizar subabilidades
  left_join(total_subability, by="doc_id")%>%
  mutate(across(subability_vars,~./total_subability))

south_cone_df %>%
  summarise_at(sector_vars, ~mean(.,na.rm = T))%>%
  pivot_longer(cols=sector_vars)%>% 
  mutate(total=sum(value))

south_cone_df %>%
  summarise_at(ability_vars, ~mean(.,na.rm = T))%>%
  pivot_longer(cols=ability_vars)%>% 
  mutate(total=sum(value))

south_cone_df %>%
  summarise_at(subability_vars, ~mean(.,na.rm = T))%>%
  pivot_longer(cols=subability_vars)%>% 
  mutate(total=sum(value))

Overview

This document can be considered a whiteboard or a catalog gathering all the insights and ideas we can think of as reflecting on Labor Demand in the South Cone via Online Job postings. Half the effort falls on understanding the data set nuances, while the other half is focused on collecting insights around the Energy Transition, Remote Work, Knowledge Sectors, Gender Inclusion, The Silver Economy, Immigrant labor assimilation, and Regional Economies.

It’s a work in progress.

Use the table of contents on the right to travel between document sections: TBD stands for To Be Done, while WIP stands for Work in Progress.

I’ve examined occupational, sector, abilities, sub-abilities, job zones, work schedules, regions, cities, and firm distributions of online job vacancies. I’ve also looked at the distribution of binary categories like green jobs, remote jobs, and knowledge jobs.

Overall, I can say that:

  • The figures presented here are based on a month of data. It goes from September 25 to October 26.

  • That amounts to 60.000 job postings. Chile accounts for about 58%, Argentina for 38% and Uruguay for 4%. Remember that Chile, Argentina, and Uruguay account for 39%, 54%, and 7% of formal employment in the South Cone, respectively. See Table 1

  • We compared occupations shares in total employment with their share in online job vacancies. The following major occupational groups are over-represented in the latter: “Sales and Related Occupations”, “Healthcare Practitioners and Technical Occupations”, “Architecture and Engineering Occupations”, “Office and Administrative Support Occupations”, “Protective Service Occupations”, “Computer and Mathematical Occupations” , “Production Occupations”, “Management Occupations”, “Business and Financial Operations Occupations,” in that order. See Table 12.

  • We obtained similar conclusions at the sector level. The following sectors are over-represented in online job postings: “Real Estate And Rental And Leasing”, “Professional Scientific And Technical Services”, “Finance And Insurance”, Manufacturing, “Retail Trade”, “Educational Services”, “Health Care And Social Assistance”, and “Accommodation And Food Services”. See Table 13. Note that over represented means more common that would be expected by their share of employment.

  • Regions like Santiago (13000), Buenos Aires (9300), Valparaiso (4900), Concepcion (3774), Rosario (2316), and Gran Temuco (2125) have more online vacancies than Uruguay (2060). See Table 10

Differences between countries:

  • Argentina’s online job demand aims towards more educated, trained, and technical workers than Chile. This is evident in the job zones, and abilities distributions (see Figure 23 , Figure 12), but also foreseeable from the sector and occupational distributions alone (see Figure 9 , ?@fig-occupation_country). Uruguay mimics that in many ways, but we’re always less sure due to its narrow sample size.

  • Despite being 1.5x outsized by its’ andean neighbor, Argentina has more vacancies in “Financial Operation Occupations” and a similar number of vacancies in “Architecture and Engineering,” “Computer and Mathematical,” “Educational,” and “Construction and extraction” occupations (see ?@fig-occupation_country). It’s also remarkably close to Chile in the number of vacancies with “Considerable preparation” (Job Zone 4.)

  • When compared with it’s region neighbor, Chile has an extraordinary high number of openings asking for “Some Preparation” (Job Zone 2.) Indeed, these account for about 46% of all the sampled vacancies, 10 points more than URY and 11 points more than ARG.

Abilities and sub-abilities

  • “Cognitive” and “Sensory” abilities are the most prevalent in online job postings. “Physical” and “Psychomotor” abilities are almost half as required. See Figure 11.

  • The most “Cognitive” abilities-intensive sectors are “Professional, Scientific and Technical Services,” “Utilities,” and “Management of Companies and Enterprises.” See Figure 18.

  • The most “Sensory” sectors are “Wholesale Trade,” “Management of Companies and Enterprises,” and “Educational Services.” These are sectors demanding many “Sensory” subabilities to a high degree. See Figure 18.

  • The “Construction” sector demands “Cognitive,” “Physical,” “Psychomotor,” and “Sensory” abilities similarly. The demand for abilities in the “Accommodation and Food Services” sector is similarly even. See Figure 18.

  • Sub-abilities’ intensity in Google Jobs data aligns well with O*NET data on Sub-abilities’ importance by occupation. Figure 70. There are methods to identify occupations exposed to AI by looking at subabilities (See Felten et. Al. 2018).

Work from Home arrangements (WFH):

  • The fraction of remote/hybrid vacancies detected is too low. Chile’s share of remote vacancies is 1.3%, while New Zealand’s is 10% (see Table 6 and ?@fig-bloom3). However, global surveys say Chileans work from home 0.9 days a week, while New Zealanders do so 1.0 days per week (see ?@fig-bloom3).

  • In the Argentinean files reviewed, 85% of vacancies with the word “hybrid” on the description were classified as not remote/hybrid (see Table 9). This evidence suggests a high prevalence of Type II errors. This likely happens in Chile and Uruguay files.

  • Type II errors aside, Santiago and Buenos Aires account for 24% and 20% of all remote vacancies, respectively. Regions like Córdoba, Mendoza, Corrientes, and Metropolitana account for 4%, 4%, 2%, and 4% of remote job postings, respectively. These last regions and Buenos Aires account for a more significant share of remote vacancies than expected by chance (See Table 7).

Energy transition:

  • Green jobs represent 15% of all online job postings. Argentina has the highest share of green jobs in its’ job postings (17%), while Chile has the lowest (13%). See Table 3.

  • “Green Increased Demand” accounts for 45% of Green jobs, while “Green Enhanced Skills” and “Green New & Emerging” account for 40% and 14%, respectively. See Table 4.

  • Argentina is intensive in “Green Increased Demand” (48%), while Chile is relatively intensive in “Green Enhanced Skills” (43.4%). See Table 4

  • Santiago and Buenos Aires account for 18% and 18% of all green online vacancies, while regions like Rosario, Mendoza, and Antofagasta account for 6%, 4%, and 4%, respectively. Buenos Aires, Rosario, Mendoza, and Antofagasta account for a more significant share of Green vacancies than expected by chance. See Table 5.

Job Zones:

  • 41% of job vacancies in the South Cone require “(2) Some preparation”, while 25% require “(4) Considerable preparation”, and another 25% require “(3) Middle preparation”. Only 8% of the demand falls on the “(1) no preparation” and “(5) a lot of preparation” extremes. See Figure 19.

  • 46% of job postings in Chile demand “(2) Some preparation”. It’s the country most concentrated in that area of demand by a considerable margin. See Figure 23.

  • 30.5% of job postings in Argentina demand “(4) Some preparation”. It’s the country most concentrated in that area of demand by a moderate margin. See Figure 23.

  • We show sectors’ composition of online vacancies by Job Zone. One of the surprises we found is that Transportation and Warehousing asks for [(4) Preparación Considerable] en around 15% of online vacancies, at least in Argentina. See Figure 24.

  • The breakdown of job zones aligns with our understanding of training and preparation requirements by sector. We’re able to spot minor variations within countries.

Firms:

  • Chile and Argentina have around 800 firms (see Table 11 ). The 100 most prominent firms in both countries account for around 40% of all job vacancies. We present a plot to help policymakers spot the hottest demand firms across different periods (see Figure 51).

Ideas moving forward:

  • Automating this report: We could work on automating updates to this or other similar on a monthly basis by establishing an API connection.

  • Creating a dashboard with dates and country filters: We could work on a dashboard that allows the user reproduce all these plots and statistics and includes date/country filters. The tool would be connected to the API.

  • Follow developments on Argentina Labor markets: Data could be used to track the effects of the incoming labor markets de-regulation in Argentina.

  • Discuss the effect of AI on labor demand: Acemoglu, Autor, et al 2022 have used online job vacancies and AI exposure measures to discuss heterogeneous effects of AI on labor demand. Our data is very similar, only short.

  • Measure labor market tightness: Geographical granularity offers valuable insights for policy makers. This could allow researchers create estimates of labor market tightness in large regions by calculating the ratio of vacancies to unemployment. On the other end, firm granularity could allow policy makers to reach out to firms leading job demand.

  • Assist green transition efforts: We could explore other dimensions of “green labor demand.” What sectors and firms are behind it? What abilities are they more reliant on? How does it change following COP28 resolutions?

  • Evidence found here suggest there is a possibility of reducing type II error in remote work classification at a low cost. First step would consist on building a simple NLP model for WFH detection and compare it with “human-in-the-loop” classifications of a sub-sample of postings to measure improvements. Algorithms could grow more complex if needed. If that’s the case I suggest using Taska, Bloom, et. al. 2023) work as guidance.

The IDB online job postings database

Variables

We identify four groups of variables:

  • Sector weights: will tell us the sector distribution of firms searching for workers. Each column is named after one of the 20 NAICS 2-digits sectors.

    Names in the database
    • accommodation_and_food_services, administrative_and_support_services, agriculture_forestry_fishing_and_hunting, arts_entertainment_and_recreation, construction, educational_services, finance_and_insurance, government, health_care_and_social_assistance, information, management_of_companies_and_enterprises, manufacturing, mining_quarrying_and_oil_and_gas_extraction, other_services_except_public_administration, professional_scientific_and_technical_services, real_estate_and_rental_and_leasing, transportation_and_warehousing, utilities, wholesale_trade, retail_trade
  • Abilities and sub abilities weights: works similar to the sector ones. Each column shows a score associated with that (sub)ability. The raw score apparently lacks any interpretation, but it can be used to either rank items from most to least important, or weight each observations to calculate the aggregate importance of each item. (Sub)Abilities are defined in the ONET Content Model Ability.

    Names in the database
    • Abilities: Cognitive Abilities, Sensory Abilities, Physical Abilities, Psychomotor Abilities

    • Sunbilities: Arm-Hand_Steadiness, Auditory_Attention, Category_Flexibility, Control_Precision, Deductive_Reasoning, Depth_Perception, Dynamic_Strength, Explosive_Strength, Extent_Flexibility, Far_Vision, Finger_Dexterity, Flexibility_of_Closure, Fluency_of_Ideas, Gross_Body_Coordination, Gross_Body_Equilibrium, Hearing_Sensitivity, Inductive_Reasoning, Information_Ordering, Manual_Dexterity, Mathematical_Reasoning, Memorization, Multilimb_Coordination, Near_Vision, Night_Vision, Number_Facility, Oral_Comprehension, Oral_Expression, Originality, Perceptual_Speed, Peripheral_Vision, Problem_Sensitivity, Rate_Control, Reaction_Time, Response_Orientation, Selective_Attention, Sound_Localization, Spatial_Orientation, Speech_Clarity, Speech_Recognition, Speed_of_Closure, Speed_of_Limb_Movement, Stamina, Static_Strength, Time_Sharing, Trunk_Strength, Visual_Color_Discrimination, Visualization, Wrist-Finger_Speed, Written_Comprehension, Written_Expression

  • Work related variables: Including the occupation title, the work schedule, training and education requirements (zones), whether remote or not, whether green or not, and whether knowledge activity or not.

    Names in the database
    • occupation: Contains occupation titles according to the ONETSOC19 system. The actual codes aren’t available in the table, but titles can be joined to official crosswalks to recover them. Its’ spanish version can be found in onet_job. Problem to report: 2.5% of occupation records are empty, 0% of onet_job are empty.

    • remote: Binary indicator on whether a possition offers any kind of work from home (WFH) arrangement. Namely remote or hybrid work.

    • area: Binary indicator on whether a the employer is likely be a knowledge-intensive services provider, as defined by the Ley de Economía del Conocimiento Argentina: **software; nanotecnología; biotecnología; las industrias audiovisual, aeroespacial y satelital; la ingeniería para la industria nuclear y la robótica, entre otras actividades.*

    • green_job: Variable showing the ONET green occupation category a vacancy falls into (Green New & Emerging, Green Enhanced Skills, and Green Increased Demand.)

    • job_zone: Variable showing the ONET category of preparation requirements an vacancy falls into. Here, preparation stands for a mix of education, experience, and training.

    • schedule: Variable showing the contractual arrangement offered in the vacancy. It can take “Intership”, “Contractor”, “Part-time”, “Full-time”, and “other” as categories.

  • Origin variables: Including the id of the vacancy, the date, the country code, firm name, platform, and region.

    Names in the database
    • country_code: The name of the country.

    • date_posted: The date the vacancy was posted in yyyy-mm-dd format.

    • firm: The name of the firm publishing the post.

    • rm: Region Metropolitana. It has 24 unique values for Argentina (equal to Provincia in when the count of vacancies is small, otherwise accounting for important metropolitan areas). Similarly, “rm” has 18 unique values for Chile (two more of what’s supposed to be if the intention is showing Regiones), and 6 unique values for Uruguay (way below the 18 Departamentos).

    • city_name: City. Good providing more geographic granularity. A high-level analysis shows that cities like Vicente Lopez and Quilmes have a combined number of vacancies similar to that of Santa Fe and Rosario combined, Córdoba Capital, and Mendoza Capital.

    • job_name: The name the employer gave to the vacancy in the posting.

    • descrip: The raw text description of the job.

There is an statistical summary of these and other relevant variables in table Table 2.

Database statistics

Here we present the dimension and summary statistics of our dataset:

[1] “There are 60689 postings in our data. Job postings count by country:”

Code
latest_country_PEA<-read_csv("data/latest_country_pea.csv") %>% 
  select(country_code=ref_area, PEA=obs_value) %>% 
  mutate(PEA=PEA,
         PEA_share=PEA/sum(PEA))

country_code_df %>%
  left_join(latest_country_PEA) %>% 
  gt() %>% 
  tab_header(title = "Overall Statistics",
             subtitle = paste0("Between ", min(south_cone_df$date_posted), " and ",max(south_cone_df$date_posted), ". Population data comes from ILOSTAT")
  ) %>% 
  fmt_percent(ends_with("share")) %>% 
  fmt_integer(columns = vars(country_vacancies,PEA)) %>% 
  cols_label(
    country_vacancies = "Online vacancies",
    country_share = "Online vacancies (%)",
    PEA = "Working Age Pop (Thousands)",
    PEA_share = "Working Age Pop (%)"
  )
Table 1:

Summary

Overall Statistics
Between 2023-09-25 and 2023-10-29. Population data comes from ILOSTAT
country_code Online vacancies Online vacancies (%) Working Age Pop (Thousands) Working Age Pop (%)
CHL 35,194 57.99% 15,706 38.69%
ARG 23,435 38.61% 22,049 54.32%
URY 2,060 3.39% 2,837 6.99%
Code
south_cone_df %>% 
  group_by(country_code) %>% 
  mutate(date_posted=lubridate::as_date(date_posted)) %>% 
  select(date_posted,'firm','source','rm','city','city_name',work_vars,area_bin,green_job_bin) %>% 
  skimr::skim()

Table 2: Detailed summary

(a) Data summary
Name Piped data
Number of rows 60689
Number of columns 16
_______________________
Column type frequency:
character 10
Date 1
logical 3
numeric 1
________________________
Group variables country_code

Variable type: character

skim_variable country_code n_missing complete_rate min max empty n_unique whitespace
firm ARG 0 1.00 0 93 11 6037 0
firm CHL 0 1.00 2 212 0 9596 0
firm URY 0 1.00 3 123 0 798 0
source ARG 0 1.00 15 64 0 200 0
source CHL 0 1.00 15 120 0 199 0
source URY 0 1.00 16 53 0 48 0
rm ARG 0 1.00 5 35 0 24 0
rm CHL 0 1.00 5 16 0 18 0
rm URY 0 1.00 3 13 0 6 0
city ARG 0 1.00 0 89 6 806 0
city CHL 0 1.00 4 25 0 240 0
city URY 0 1.00 4 25 0 70 0
city_name ARG 0 1.00 4 35 0 216 0
city_name CHL 0 1.00 4 20 0 201 0
city_name URY 0 1.00 4 22 0 48 0
occupation ARG 0 1.00 0 97 366 619 0
occupation CHL 0 1.00 0 94 1025 680 0
occupation URY 0 1.00 0 94 32 284 0
onet_job ARG 0 1.00 7 113 0 637 0
onet_job CHL 0 1.00 7 114 0 703 0
onet_job URY 0 1.00 7 107 0 292 0
schedule ARG 0 1.00 5 10 0 5 0
schedule CHL 0 1.00 5 10 0 5 0
schedule URY 0 1.00 5 10 0 5 0
green_job ARG 19330 0.18 20 22 0 3 0
green_job CHL 30639 0.13 20 22 0 3 0
green_job URY 1738 0.16 20 22 0 3 0
area ARG 0 1.00 12 15 0 2 0
area CHL 0 1.00 12 15 0 2 0
area URY 0 1.00 12 15 0 2 0

Variable type: Date

skim_variable country_code n_missing complete_rate min max median n_unique
date_posted ARG 0 1 2023-09-29 2023-10-29 2023-10-26 31
date_posted CHL 0 1 2023-09-28 2023-10-28 2023-10-26 31
date_posted URY 0 1 2023-09-25 2023-10-24 2023-10-22 30

Variable type: logical

skim_variable country_code n_missing complete_rate mean count
remote ARG 0 1 0.03 FAL: 22837, TRU: 598
remote CHL 0 1 0.01 FAL: 34729, TRU: 465
remote URY 0 1 0.02 FAL: 2023, TRU: 37
area_bin ARG 0 1 0.38 FAL: 14452, TRU: 8983
area_bin CHL 0 1 0.31 FAL: 24248, TRU: 10946
area_bin URY 0 1 0.36 FAL: 1310, TRU: 750
green_job_bin ARG 0 1 0.18 FAL: 19330, TRU: 4105
green_job_bin CHL 0 1 0.13 FAL: 30639, TRU: 4555
green_job_bin URY 0 1 0.16 FAL: 1738, TRU: 322

Variable type: numeric

skim_variable country_code n_missing complete_rate mean sd p0 p25 p50 p75 p100 hist
zones ARG 0 1 3.04 0.99 1 2 3 4 5 ▁▇▆▇▂
zones CHL 0 1 2.83 1.00 1 2 3 4 5 ▁▇▅▃▁
zones URY 0 1 2.97 0.98 1 2 3 4 5 ▁▇▆▆▂

Characterizing labor market demand (Work in progress)

This section consists in highlighting vacancy distributions of each country across different indicators (occupational groups, sectors, skills, sub-skils, job zones, work schedule, green jobs, remote jobs, knowledge jobs, regions, and firms). Each indicator will have its own section.

Unless otherwise specified, each section will start with the overall distribution of vacancies across that variable, followed by the same distribution within each country. Each section is then finalized a relative concentration analysis, showing where is each country more specialized vis a vis its’ peers.

In other words, analysis will answer the following questions:

  • Whats more common?

  • What’s the most common in each country?

  • Which country has more of each group?

  • Which country has a higher than average concentration on each group?

Across occupations

We prepare the South Cone data for aggregation at the Major SOC group (2018) level. We first get the ONET SOC 19 code of each occupational title in the data, and then use ONET crosswalk to SOC18.

Summary

  • “Sales and Related”, “Office and Administrative Support”, “Production”, “Business and Financial”, “Management”, “Architecture and Engineering”, and “Computer and Mathematical” occupations are the most prevalent occupational groups across all countries. Together they account for about 70% of all vacancies.

  • Argentina’s demand is remarkably strong in “Business and Financial Operations”, “Arts, Design, Entertainment, and Media” and “Installation, Maintenance, and Repair” occupations, as well as remarkably in “Transportation and Material Moving”,“Protective services” and “Community and Social Service” occupations. It’s above average in “Computer and Mathematical” and “Architecture and Engineering Occupations”.

  • Chile accounts for almost 60% of the sample, so it swings much less from the average than Argentina and Uruguay. Chile’s demand is remarkably strong in “Transportation and Material Moving Occupations”, “Building and Ground Cleaning and Maintenance”, and “Community and Social service” occupations. It’s remarkably in “Business and Financial Operations Occupations” and below average in “Computer and Mathematical occupations”.

  • Uruguay’s demand is remarkably strong in “Computer and Mathematical Occupations”, “Educational Instruction and Library Occupations”, “Personal Care and Service”, “Construction and Extraction”, and “Legal” occupations. It’s remarkably in “Management”, “Healthcare Support”, and “Healthcare Practitioners and Technical” Occupations, which was unexpected.

What’s more common?

We calculate the frequency of each Major SOC group in the South Cone as a whole.

Code
## frecuency table of occupations
major_group_df<-south_cone_df %>%
    group_by(major_group,major_group_title)%>%
    summarise(group_vacancies=n())%>%
    ungroup()%>%
    mutate(group_share=group_vacancies/sum(group_vacancies))%>%
    arrange(desc(group_vacancies)) 

## frequency table of occupations, by country
major_group_by_cty<-south_cone_df %>%
  group_by(country_code,major_group,major_group_title)%>%
  summarise(count=n() ) %>%
  ungroup() %>% 
  left_join(major_group_df %>%
              select(major_group,group_vacancies))  %>% 
  left_join(country_code_df %>%
              select(country_code,country_vacancies))  %>%
  mutate(group_in_country_share=count/country_vacancies,
         country_in_group_share=count/group_vacancies) %>% 
  ungroup()
Code
cat_var_chart(data=mutate(south_cone_df,
                          major_group_title=str_remove_all(major_group_title,"Occupations")),
              category = "major_group_title")+
    geom_text(aes(label=paste(round(group_share,2)*100,"%")), 
            size=3,nudge_y = 0.02)+
  theme(axis.text.x = element_text(angle = 65, hjust=1))+
  labs(title = "Share of vacancies postings by SOC major group",
       x=NULL)

Figure 1: Major SOC group distribution

What’s more common in each country?

We calculate the frequency of each Major SOC group in each country. For each row, we calculate the share of that group in the country and the share of the country in the group.

Code
country_major_soc_df<-country_var_count(
  data=mutate(south_cone_df,major_group_title=str_remove_all(major_group_title," Occupations")),
  country = "country_code",
  category = "major_group_title")

country_var_chart(agg_data=country_major_soc_df,
                  country = "country_code",
                  category = "major_group_title")[[1]]+
  scale_fill_manual(values=country_colors)+
  theme(axis.text.x = element_text(angle = 65, hjust=1))+
  labs(title = "Share of vacancies postings by SOC major group",
       fill="Country",
       shape=NULL,
       x=NULL,
       y=NULL)

Figure 2: Major SOC distribution, by country

What’s the country most specialized in each group?

Code
# 10 largest occupational groups
top_10_soc<-var_count(data=mutate(south_cone_df,major_group_title=str_remove_all(major_group_title," Occupations")),
                      category="major_group_title") %>% 
  top_n(10,group_share) %>% 
  pull(major_group_title)

country_var_chart(agg_data = filter(country_major_soc_df, major_group_title%in% top_10_soc),
                  category = "major_group_title",
                  country="country_code")[[2]] +
  scale_fill_manual(values=country_colors)+
  labs(subtitle="Ten most prevalent occupational groups in vacancies data",
       fill=NULL,
       x=NULL,
       y=NULL)


# 12 smallest occupational groups
bottom_12_soc<-var_count(data=mutate(south_cone_df,major_group_title=str_remove_all(major_group_title," Occupations")),
                      category="major_group_title")  %>% 
  top_n(12,-group_share) %>% 
  pull(major_group_title)

country_var_chart(agg_data = filter(country_major_soc_df, major_group_title%in% bottom_12_soc),
                  category = "major_group_title",
                  country="country_code")[[2]] +
  scale_fill_manual(values=country_colors)+
  labs(subtitle="Twelve less prevalent occupational groups in vacancies data",
       fill=NULL,
       x=NULL,
       y=NULL)

Figure 3: Major SOC distribution by country, side by side

Figure 4: Major SOC distribution by country, side by side

Which country has the largest number of vacancies in each group?

Code
# 10 largest
country_var_chart(agg_data = filter(country_major_soc_df, major_group_title%in% top_10_soc),
                  category = "major_group_title",
                  country="country_code")[[3]] +
  scale_fill_manual(values=country_colors)+
  labs(subtitle="Ten most prevalent occupational groups in vacancies data",
       fill=NULL,
       x=NULL,
       y=NULL)


# 12 smallest
country_var_chart(agg_data = filter(country_major_soc_df, major_group_title%in% bottom_12_soc),
                  category = "major_group_title",
                  country="country_code")[[3]] +
  scale_fill_manual(values=country_colors)+
  labs(subtitle="Twelve less prevalent occupational groups in vacancies data",
       fill=NULL,
       x=NULL,
       y=NULL)

Figure 5: Major SOC distribution, by country

Figure 6: Major SOC distribution, by country

Table View

Code
country_var_table(data_agg=country_major_soc_df,category="major_group_title", country="country_code",interactive=FALSE)

?(caption)

major_group_title Vacancies % of Vacancies % of ARG % of CHL % of URY
Sales and Related 9448 15.94% 16.36% 15.63% 16.57%
Office and Administrative Support 8988 15.17% 14.77% 15.36% 16.42%
Production 5213 8.80% 8.11% 9.39% 6.66%
Business and Financial Operations 4702 7.94% 11.00% 5.82% 8.73%
Transportation and Material Moving 4298 7.25% 4.14% 9.46% 5.52%
Management 3902 6.59% 6.70% 6.64% 4.34%
Architecture and Engineering 3088 5.21% 6.23% 4.55% 4.64%
Computer and Mathematical 2498 4.22% 5.06% 3.42% 7.89%
Installation, Maintenance, and Repair 2227 3.76% 4.99% 2.91% 4.09%
Healthcare Practitioners and Technical 2222 3.75% 3.24% 4.23% 1.48%
Educational Instruction and Library 1863 3.14% 3.52% 2.75% 5.47%
Protective Service 1810 3.05% 2.10% 3.74% 2.42%
Food Preparation and Serving Related 1601 2.70% 2.14% 3.08% 2.76%
Construction and Extraction 1513 2.55% 2.99% 2.20% 3.55%
Life, Physical, and Social Science 1380 2.33% 2.22% 2.44% 1.73%
Building and Grounds Cleaning and Maintenance 1297 2.19% 1.16% 2.91% 1.73%
Arts, Design, Entertainment, Sports, and Media 928 1.57% 2.15% 1.14% 2.02%
Healthcare Support 834 1.41% 1.02% 1.71% 0.69%
Personal Care and Service 810 1.37% 1.35% 1.33% 2.27%
Community and Social Service 296 0.50% 0.17% 0.73% 0.30%
Farming, Fishing, and Forestry 209 0.35% 0.30% 0.39% 0.35%
Legal 128 0.22% 0.27% 0.17% 0.39%
sum 59,255.00 1.00 1.00 1.00 1.00

Major SOC distribution, by country

Across Sectors (Ramas)

Code
rama_df<-south_cone_df %>% 
  select(country_code,sector_vars) %>% 
  group_by(country_code) %>% 
  summarise(across(sector_vars,~sum(.))) %>% 
  pivot_longer(cols = sector_vars,
               names_to = "sector",
               values_to = "weigths_sum") %>% 
  mutate(sector=str_replace_all(sector,"_"," "),
         sector=str_to_title(sector)) 

rama_df<-rama_df %>% 
  group_by(country_code) %>% 
  mutate(group_in_country_share=weigths_sum/sum(weigths_sum)) %>%
  group_by(sector) %>% 
  mutate(country_in_group_share=weigths_sum/sum(weigths_sum)) %>% 
  ungroup() %>% 
  left_join(
    rama_df %>%
      mutate(total=sum(weigths_sum)) %>% 
      group_by(sector) %>% 
      summarise(group_vacancies=sum(weigths_sum),
                group_share=sum(weigths_sum)/mean(total))
  )
  • The sectors demanding more jobs online are “Retail Trade”, “Manufacturing”, “Professional scientific and Technical Services”, “Educational Services”, “Government”,and “Finance and Insurance”. They accout for about 83% of all postings.

  • Argentina’s demand is strong in “Finance and Insurance”, “Professional Scientific and technical services”, and “Construction”. It’s particularly weak in “Retail”, “Accommodation and food services”, “Administrative and support services”, and “transportation and warehousing”

  • Chile’s demand is strong in “Retail trade”, “Manufacturing”, “Health Care and Social Assistance”, “Administrative and Support Services”, “Retail trade”, “Transportation and Warehousing”, and “Wholesale Trade”. It’s particularly weak in “Professional Scientific and Technical Services”, “Finance and Insurance”, “Construction”, “Other Services Except Public Administration”, and “Information.”

  • Uruguay is super strong in “Professional Scientific and Technical Services”, “Educational Services”,“Construction”, and “Information”. It’s strong in “Construction”. It’s particularly weak in “Health care and social assistance” and “Manufacturing”.

What’s more common?

Code
rama_df %>% 
  distinct(sector,group_share) %>% 
  ggplot(aes(x=reorder(sector,
                       -group_share),
             y=group_share))+
  geom_col(fill="gray50")+
  geom_text(aes(label=paste(round(group_share,2)*100,"%")), 
            size=3,nudge_y = 0.02)+
  theme(axis.text.x = element_text(angle = 65, hjust=1))+
  labs(title = "Share of vacancies postings by Sector",
       x=NULL,
       y=NULL)

Figure 7: Sector distribution, with number of vacancies wegithed by the weight of the sector

What’s more common in each country?

Code
country_var_chart(agg_data=rename(rama_df,count=weigths_sum),
                  country = "country_code",
                  category = "sector")[[1]]+
  scale_fill_manual(values=country_colors)+
  theme(axis.text.x = element_text(angle = 65, hjust=1))+
  labs(title = "Share of vacancies postings by Sector",
       fill="Country",
       shape=NULL,
       x=NULL,
       y=NULL)

Figure 8: ?(caption)

What are countries specialized in?

Code
top_10_naics<-rama_df %>%
  top_n(30,group_vacancies) %>% 
  pull(sector)

# 10 largest
country_var_chart(agg_data=rename(filter(rama_df,sector %in% top_10_naics),count=weigths_sum),
                  country = "country_code",
                  category = "sector")[[2]]+
  scale_fill_manual(values=country_colors)+
  labs(subtitle="Ten most prevalent sectors in vacancies data",
       fill=NULL,
       x=NULL,
       y=NULL)

Figure 9: Sector distribution, by country
Code
# 10 smallest
bottom_10_naics<-rama_df %>%
  top_n(30,-group_vacancies) %>% 
  pull(sector)
country_var_chart(agg_data=rename(filter(rama_df,sector %in% bottom_10_naics),count=weigths_sum),
                  country = "country_code",
                  category = "sector")[[2]]+
  scale_fill_manual(values=country_colors)+
  labs(subtitle="Ten least prevalent sectors in vacancies data",
       fill=NULL,
       x=NULL,
       y=NULL)

Code
# Summary
rama_df %>% 
  ggplot(aes(x=reorder(str_to_title(sector),-group_share),y=country_code, 
             fill=group_in_country_share/group_share))+
  geom_tile(color="black")+
  theme(axis.text.x = element_text(angle = 65, hjust=1, size=13))+
  scale_fill_fermenter(palette = "RdBu",direction = 1, 
                       breaks= c(0.35,0.7,1,1.3),
                       labels = function(x) paste0( x, 'x'))+
  labs(title = "Summary: Demand hotspots by country and sector",
       subtitle = "sorted from more to less total vacancies",
       fill="Location\nquotient",
       y=NULL,
       x=NULL)

Which country accounts for the largest number of vacancies?

Code
# 10 largest
country_var_chart(agg_data=rename(filter(rama_df,sector %in% top_10_naics),count=weigths_sum),
                  country = "country_code",
                  category = "sector")[[3]]+
  scale_fill_manual(values=country_colors)+
  labs(subtitle="Ten most prevalent sectors in vacancies data",
       fill=NULL,
       x=NULL,
       y=NULL)

Figure 10: ?(caption)
Code
# 10 smallest
country_var_chart(agg_data=rename(filter(rama_df,sector %in% bottom_10_naics),count=weigths_sum),
                  country = "country_code",
                  category = "sector")[[3]]+
  scale_fill_manual(values=country_colors)+
  labs(subtitle="Ten least prevalent sectors in vacancies data",
       fill=NULL,
       x=NULL,
       y=NULL)

Table View

Code
country_var_table(data_agg=rename(rama_df,count=weigths_sum),
                  category="sector",
                  country="country_code",interactive=FALSE)

?(caption)

sector Vacancies % of Vacancies % of ARG % of CHL % of URY
Retail Trade 9648.4056 16.28% 14.80% 17.31% 15.76%
Professional Scientific And Technical Services 8593.0141 14.50% 16.80% 12.73% 18.24%
Manufacturing 8204.4577 13.85% 14.25% 13.80% 9.96%
Health Care And Social Assistance 4556.7503 7.69% 6.56% 8.56% 5.80%
Educational Services 4339.2046 7.32% 7.93% 6.78% 9.54%
Government 4052.9903 6.84% 7.62% 6.27% 7.52%
Finance And Insurance 3833.9949 6.47% 7.96% 5.41% 7.37%
Administrative And Support Services 3697.3056 6.24% 4.67% 7.39% 4.68%
Accommodation And Food Services 2380.7913 4.02% 3.14% 4.56% 4.81%
Construction 1827.4932 3.08% 3.48% 2.77% 3.80%
Transportation And Warehousing 1656.7556 2.80% 2.14% 3.28% 2.15%
Other Services Except Public Administration 1631.0383 2.75% 3.21% 2.44% 2.83%
Wholesale Trade 1517.1876 2.56% 2.33% 2.75% 1.93%
Information 1140.0591 1.92% 2.09% 1.76% 2.73%
Real Estate And Rental And Leasing 841.0378 1.42% 0.92% 1.77% 1.23%
Management Of Companies And Enterprises 394.4781 0.67% 0.77% 0.60% 0.55%
Arts Entertainment And Recreation 321.2924 0.54% 0.57% 0.54% 0.33%
Agriculture Forestry Fishing And Hunting 219.8413 0.37% 0.35% 0.39% 0.37%
Utilities 218.2831 0.37% 0.18% 0.50% 0.32%
Mining Quarrying And Oil And Gas Extraction 180.6191 0.30% 0.22% 0.38% 0.08%
sum 59,255.00 1.00 1.00 1.00 1.00

Abilities

  • The most in-demand abilities online are Cognitive (33%) and Sensory (33%).Demand of Physical and Psychomotor activities is almost half of that.
Code
abilities=c('Cognitive Abilities','Sensory Abilities','Psychomotor Abilities','Physical Abilities')


abilities_df<-country_var_count_groups(data=south_cone_df,
                                       country = NULL,
                         variable_names=abilities, 
                         name_of_categories="abilities") 

country_abilities_df<-country_var_count_groups(data=south_cone_df,
                                       country = 'country_code',
                         variable_names=abilities, 
                         name_of_categories="abilities") 
Code
abilities_df %>% 
  ggplot(aes(x=reorder(abilities,
                       -group_share),
             y=group_share))+
  geom_col(fill="gray50")+
  geom_text(aes(label=paste(round(group_share,2)*100,"%")), 
            size=3,nudge_y = 0.02)+
  theme(axis.text.x = element_text(angle = 65, hjust=1))+
  labs(subtitle = "Share of vacancies requiring each Ability to some extent",
       x=NULL,
       y=NULL)

Figure 11: ?(caption)

Charts

Code
country_var_chart(agg_data=country_abilities_df,
                  country = "country_code",
                  category = "abilities")[[1]]+
  scale_fill_manual(values=country_colors)+
  labs(title = "Share of vacancies requiring each Ability to some extent",
       fill="Country",
       shape=NULL,
       x=NULL,
       y=NULL)

Figure 12: Subabilities by country
Code
country_var_chart(agg_data=rename(country_abilities_df),
                  country = "country_code",
                  category = "abilities")[[2]]+
  scale_fill_manual(values=country_colors)+
  labs(x=NULL,
       y=NULL,
       fill=NULL)

Code
country_var_chart(agg_data=country_abilities_df,
                  country = "country_code",
                  category = "abilities")[[3]]+
  scale_fill_manual(values=country_colors)+
  labs(title = "Number of vacancies requiring an Ability, by Country",
       fill="Country",
       shape=NULL,
       x=NULL,
       y=NULL)

Table View

Code
country_var_table(country_abilities_df,
                  category  = "abilities",country = "country_code",
                  interactive = FALSE)

?(caption)

abilities Vacancies % of Vacancies % of ARG % of CHL % of URY
Cognitive Abilities 58971 99.52% 99.61% 99.46% 99.61%
Sensory Abilities 58971 99.52% 99.61% 99.46% 99.61%
Psychomotor Abilities 43369 73.19% 69.36% 75.73% 74.06%
Physical Abilities 31618 53.36% 46.18% 58.52% 48.03%
sum 192,929.00 3.26 3.15 3.33 3.21

Sub-abilities

  • There are 51 Sub Skills but Dynamic_Flexibility is missing from Uruguay. This report shows the remaining 50 until the original data is fixed.

  • Oral Comprehension and Oral Expression are the most in-demand sub abilities, followed by Near Vision, Written Comprehension, and Deductive Reasoning. Argentina and Uruguay demand this skills with a higher intensity than Chile.

  • Number facility and Mathematical Reasoning rank 21th and 22th in the ranking of most demanded sub abilities. Argentina and Uruguay demand this skills with a higher intensity than Chile.

  • We compared the prevalence of sub-abilites job postings contrasted it what O*NET experts think are the typical importance and mastery levels of each skill within an occupation. We got a strong positive correlation, which suggests our text mining algorithms were able to capture some of the knowledge occupational experts have.

Code
subabilities = c('Arm-Hand_Steadiness', 'Auditory_Attention', 'Category_Flexibility',
                'Control_Precision', 'Deductive_Reasoning', 'Depth_Perception',
                # 'Dynamic_Flexibility',
                'Dynamic_Strength','Explosive_Strength',
                'Extent_Flexibility', 'Far_Vision', 'Finger_Dexterity',
                'Flexibility_of_Closure','Fluency_of_Ideas', 'Gross_Body_Coordination',
                'Gross_Body_Equilibrium', 'Hearing_Sensitivity','Inductive_Reasoning',
                'Information_Ordering', 'Manual_Dexterity', 'Mathematical_Reasoning',
                'Memorization', 'Multilimb_Coordination', 'Near_Vision',
                'Night_Vision', 'Number_Facility','Oral_Comprehension',
                'Oral_Expression', 'Originality', 'Perceptual_Speed',
                'Peripheral_Vision','Problem_Sensitivity','Rate_Control',
                'Reaction_Time', 'Response_Orientation', 'Selective_Attention',
                'Sound_Localization', 'Spatial_Orientation', 'Speech_Clarity',
                'Speech_Recognition', 'Speed_of_Closure','Speed_of_Limb_Movement',
                'Stamina', 'Static_Strength', 'Time_Sharing', 'Trunk_Strength', 
                'Visual_Color_Discrimination', 'Visualization', 'Wrist-Finger_Speed',
                'Written_Comprehension', 'Written_Expression')

abilities_taxonomy<-read_delim("raw/ONET_28_0/Abilities.txt",delim = "\t" ) %>%
  janitor::clean_names() %>%
  distinct(subabilities_id=element_id,subabilities=element_name) %>% 
  mutate(ability_id=substr(subabilities_id,1,5)) %>% 
  mutate(ability=case_when(ability_id=="1.A.1"~"Cognitive Abilities",
                           ability_id=="1.A.2"~"Psychomotor Abilities",
                           ability_id=="1.A.3"~"Physical Abilities",
                           ability_id=="1.A.4"~"Sensory Abilities"))

subabilities_df<-country_var_count_groups(data=south_cone_df,
                                       country = NULL,
                         variable_names=subabilities, 
                         name_of_categories="subabilities") %>% 
   mutate(subabilities=str_replace_all(subabilities,"_"," "),
         subabilities=str_to_title(subabilities),
         subabilities=str_replace_all(subabilities," Of "," of ")) %>% 
  left_join(abilities_taxonomy)

country_subabilities_df<-country_var_count_groups(data=south_cone_df,
                                       country = 'country_code',
                         variable_names=subabilities, 
                         name_of_categories="subabilities") %>% 
   mutate(subabilities=str_replace_all(subabilities,"_"," "),
         subabilities=str_to_title(subabilities),
         subabilities=str_replace_all(subabilities," Of "," of ")) %>% 
  left_join(abilities_taxonomy)

Charts

Code
subabilities_df %>% 
  ggplot(aes(x=reorder(subabilities,
                       -group_share),
             y=group_share))+
  geom_col(fill="gray50")+
  geom_text(aes(label=paste(round(group_share,2)*100,"%")), 
            size=3,nudge_y = 0.02)+
  facet_wrap(vars(ability),scales = "free_x")+
  theme(axis.text.x = element_text(angle = 65, hjust=1))+
  labs(title = "Share of vacancies requiring each Subability to some extent",
       x=NULL,
       y=NULL)

Figure 13: ?(caption)
Code
purrr::map(c("ARG","CHL","URY"),
       skills_barchart,
       data_agg=country_subabilities_df)
[[1]]

[[2]]

[[3]]

Figure 14: ?(caption)

Figure 15: ?(caption)

Figure 16: ?(caption)
Code
country_subabilities_df %>% 
  ggplot(aes(x=reorder(subabilities,-group_share ),
             y=country_code, fill=group_in_country_share /group_share ))+
  geom_tile(color="black")+
  theme(axis.text.x = element_text(angle = 65, hjust=1))+
  scale_fill_fermenter(palette = "RdBu",direction = 1, 
                       breaks= c(0.35,0.7,1,1.3),
                       labels = function(x) paste0( x, 'x'))+
   facet_wrap(vars(ability),scales = "free_x")+
  labs(title = "Demand hotspots by country and subabilities",
       subtitle = "sorted from more to less total vacancies",
       fill="Location\nquotient",
       y=NULL,
       x=NULL)

Code
top_subabilities<-subabilities_df %>% 
  top_n(15, group_share) %>% 
  pull(subabilities)

country_var_chart(agg_data=filter(country_subabilities_df,subabilities %in% top_subabilities),
                  country = "country_code",
                  category = "subabilities")[[3]]+
  scale_fill_manual(values=country_colors)+
  labs(title = "Number of vacancies requiring an Ability, by Country",
       fill="Country",
       shape=NULL,
       x=NULL,
       y=NULL)

Table View

Code
country_var_table(country_subabilities_df,
                  category  = "subabilities",country = "country_code",
                  interactive = FALSE)

?(caption)

subabilities Vacancies % of Vacancies % of ARG % of CHL % of URY
Information Ordering 58971 99.52% 99.61% 99.46% 99.61%
Oral Comprehension 58971 99.52% 99.61% 99.46% 99.61%
Oral Expression 58971 99.52% 99.61% 99.46% 99.61%
Problem Sensitivity 58971 99.52% 99.61% 99.46% 99.61%
Selective Attention 58971 99.52% 99.61% 99.46% 99.61%
Speech Recognition 58971 99.52% 99.61% 99.46% 99.61%
Speech Clarity 58967 99.51% 99.60% 99.45% 99.61%
Deductive Reasoning 58930 99.45% 99.60% 99.34% 99.61%
Inductive Reasoning 58930 99.45% 99.60% 99.34% 99.61%
Near Vision 58801 99.23% 99.20% 99.24% 99.41%
Category Flexibility 58380 98.52% 98.79% 98.34% 98.57%
Far Vision 57258 96.63% 95.88% 97.11% 97.19%
Written Comprehension 57224 96.57% 97.86% 95.64% 97.68%
Written Expression 56106 94.69% 96.24% 93.74% 92.90%
Flexibility of Closure 55782 94.14% 95.85% 92.90% 95.51%
Perceptual Speed 53588 90.44% 91.42% 89.78% 90.38%
Time Sharing 53324 89.99% 90.31% 89.77% 90.09%
Fluency of Ideas 47708 80.51% 85.89% 76.55% 86.14%
Originality 46125 77.84% 83.35% 73.78% 83.63%
Number Facility 46016 77.66% 81.81% 74.93% 76.33%
Visualization 45519 76.82% 77.85% 75.93% 79.98%
Mathematical Reasoning 44783 75.58% 81.26% 71.70% 76.23%
Finger Dexterity 41803 70.55% 68.11% 72.13% 71.55%
Speed of Closure 39592 66.82% 69.88% 64.62% 68.98%
Memorization 38493 64.96% 70.53% 60.85% 70.96%
Visual Color Discrimination 32468 54.79% 50.96% 57.53% 52.32%
Trunk Strength 31361 52.93% 45.68% 58.11% 47.93%
Arm-Hand Steadiness 29918 50.49% 46.44% 53.53% 45.32%
Auditory Attention 27724 46.79% 43.45% 49.02% 47.19%
Manual Dexterity 25114 42.38% 37.66% 45.90% 36.88%
Multilimb Coordination 23929 40.38% 34.17% 44.84% 36.00%
Static Strength 23104 38.99% 32.46% 43.74% 33.23%
Hearing Sensitivity 19745 33.32% 31.66% 34.47% 32.99%
Control Precision 19688 33.23% 31.80% 34.41% 29.54%
Depth Perception 16336 27.57% 24.24% 29.93% 25.64%
Extent Flexibility 16130 27.22% 23.01% 30.19% 25.05%
Stamina 15674 26.45% 20.20% 30.81% 24.11%
Reaction Time 13876 23.42% 22.41% 24.18% 22.09%
Gross Body Coordination 10846 18.30% 14.94% 20.62% 17.50%
Rate Control 10240 17.28% 16.47% 17.87% 16.62%
Response Orientation 9749 16.45% 16.17% 16.66% 16.17%
Dynamic Strength 8427 14.22% 13.07% 14.95% 15.04%
Gross Body Equilibrium 5167 8.72% 8.85% 8.45% 11.79%
Wrist-Finger Speed 4841 8.17% 8.26% 8.01% 9.86%
Spatial Orientation 4324 7.30% 5.26% 8.65% 7.69%
Speed of Limb Movement 3363 5.68% 5.90% 5.46% 6.71%
Sound Localization 1791 3.02% 3.34% 2.85% 2.27%
Peripheral Vision 1788 3.02% 2.24% 3.51% 3.55%
Explosive Strength 1654 2.79% 1.54% 3.62% 2.96%
Night Vision 321 0.54% 0.46% 0.57% 0.99%
sum 1,688,733.00 28.50 28.21 28.70 28.41

Extension: Sub-Skills by sector

As we said before, sectors and sub-skills aren’t discretely assign to each online vacancies. Instead, each sector and skill has a weight on each job vacancy associated with the chances the firm belongs to that sector (or demands that skill).

To offer a tractable measure of the skills demand by sector we’re simply going to assign a 1 to the sector with the maximum chances of being the vacancy’s sector. Then we either count the number of times an ability (or subability) is required by a vacancy in the chosen sectors, or their average importance within the latter.

Another way is just counting the percentage of all postings within a country in which both the skill and the sector had a higher than average weight. We’ll try different specifications and use the most satisfactory one in the final deliverable.

This is the frequency in which each sectors is a vacancy’s most-likely sector:

Code
main_sector_df<-country_var_count(south_cone_df,
                                  country = NULL,
                                  category = 'main_sector')

main_sector_df%>% 
  ggplot(aes(x=reorder(main_sector,-group_share),y=group_share))+
  geom_col(fill="gray50")+
  geom_text(aes(label=paste(round(group_share,2)*100,"%")), 
            size=3,nudge_y = 0.02)+
  scale_y_continuous(labels=scales::percent_format())+
  theme(axis.text.x = element_text(angle = 65, hjust=1))

Figure 17: ?(caption)

This is the frequency in which each skill has positive changes of being demanded in a vacancy:

Code
subskills_df<-south_cone_df %>% 
  select(doc_id,subabilities) %>% 
  mutate(across(c(subabilities),~ifelse(.>0,TRUE,FALSE))) 

subskills_df %>% 
  select(-1) %>% 
  skimr::skim() %>% 
  as_tibble() %>% 
  arrange(desc(logical.mean)) %>% 
  kableExtra::kable()

?(caption)

skim_type skim_variable n_missing complete_rate logical.mean logical.count
logical Information_Ordering 284 0.9952072 1.0000000 TRU: 58971
logical Oral_Comprehension 284 0.9952072 1.0000000 TRU: 58971
logical Oral_Expression 284 0.9952072 1.0000000 TRU: 58971
logical Problem_Sensitivity 284 0.9952072 1.0000000 TRU: 58971
logical Selective_Attention 284 0.9952072 1.0000000 TRU: 58971
logical Speech_Recognition 284 0.9952072 1.0000000 TRU: 58971
logical Speech_Clarity 284 0.9952072 0.9999322 TRU: 58967, FAL: 4
logical Deductive_Reasoning 284 0.9952072 0.9993047 TRU: 58930, FAL: 41
logical Inductive_Reasoning 284 0.9952072 0.9993047 TRU: 58930, FAL: 41
logical Near_Vision 284 0.9952072 0.9971172 TRU: 58801, FAL: 170
logical Category_Flexibility 284 0.9952072 0.9899781 TRU: 58380, FAL: 591
logical Far_Vision 284 0.9952072 0.9709518 TRU: 57258, FAL: 1713
logical Written_Comprehension 284 0.9952072 0.9703753 TRU: 57224, FAL: 1747
logical Written_Expression 284 0.9952072 0.9514168 TRU: 56106, FAL: 2865
logical Flexibility_of_Closure 284 0.9952072 0.9459226 TRU: 55782, FAL: 3189
logical Perceptual_Speed 284 0.9952072 0.9087178 TRU: 53588, FAL: 5383
logical Time_Sharing 284 0.9952072 0.9042411 TRU: 53324, FAL: 5647
logical Fluency_of_Ideas 284 0.9952072 0.8090078 TRU: 47708, FAL: 11263
logical Originality 284 0.9952072 0.7821641 TRU: 46125, FAL: 12846
logical Number_Facility 284 0.9952072 0.7803157 TRU: 46016, FAL: 12955
logical Visualization 284 0.9952072 0.7718879 TRU: 45519, FAL: 13452
logical Mathematical_Reasoning 284 0.9952072 0.7594072 TRU: 44783, FAL: 14188
logical Finger_Dexterity 284 0.9952072 0.7088739 TRU: 41803, FAL: 17168
logical Speed_of_Closure 284 0.9952072 0.6713808 TRU: 39592, FAL: 19379
logical Memorization 284 0.9952072 0.6527446 TRU: 38493, FAL: 20478
logical Visual_Color_Discrimination 284 0.9952072 0.5505757 TRU: 32468, FAL: 26503
logical Trunk_Strength 284 0.9952072 0.5318038 TRU: 31361, FAL: 27610
logical Arm-Hand_Steadiness 284 0.9952072 0.5073341 TRU: 29918, FAL: 29053
logical Auditory_Attention 284 0.9952072 0.4701294 FAL: 31247, TRU: 27724
logical Manual_Dexterity 284 0.9952072 0.4258703 FAL: 33857, TRU: 25114
logical Multilimb_Coordination 284 0.9952072 0.4057757 FAL: 35042, TRU: 23929
logical Static_Strength 284 0.9952072 0.3917858 FAL: 35867, TRU: 23104
logical Hearing_Sensitivity 284 0.9952072 0.3348256 FAL: 39226, TRU: 19745
logical Control_Precision 284 0.9952072 0.3338590 FAL: 39283, TRU: 19688
logical Depth_Perception 284 0.9952072 0.2770175 FAL: 42635, TRU: 16336
logical Extent_Flexibility 284 0.9952072 0.2735243 FAL: 42841, TRU: 16130
logical Stamina 284 0.9952072 0.2657917 FAL: 43297, TRU: 15674
logical Reaction_Time 284 0.9952072 0.2353021 FAL: 45095, TRU: 13876
logical Gross_Body_Coordination 284 0.9952072 0.1839209 FAL: 48125, TRU: 10846
logical Rate_Control 284 0.9952072 0.1736447 FAL: 48731, TRU: 10240
logical Response_Orientation 284 0.9952072 0.1653185 FAL: 49222, TRU: 9749
logical Dynamic_Strength 284 0.9952072 0.1429007 FAL: 50544, TRU: 8427
logical Gross_Body_Equilibrium 284 0.9952072 0.0876193 FAL: 53804, TRU: 5167
logical Wrist-Finger_Speed 284 0.9952072 0.0820912 FAL: 54130, TRU: 4841
logical Spatial_Orientation 284 0.9952072 0.0733242 FAL: 54647, TRU: 4324
logical Speed_of_Limb_Movement 284 0.9952072 0.0570280 FAL: 55608, TRU: 3363
logical Sound_Localization 284 0.9952072 0.0303709 FAL: 57180, TRU: 1791
logical Peripheral_Vision 284 0.9952072 0.0303200 FAL: 57183, TRU: 1788
logical Explosive_Strength 284 0.9952072 0.0280477 FAL: 57317, TRU: 1654
logical Night_Vision 284 0.9952072 0.0054434 FAL: 58650, TRU: 321
Code
mainsector_abilities_df<-country_var_count_groups(data=south_cone_df,
                                       country = 'main_sector',
                         variable_names=abilities, 
                         name_of_categories="abilities") %>% 
   mutate(abilities=str_replace_all(abilities,"_"," "),
         abilities=str_to_title(abilities),
         abilities=str_replace_all(abilities," Of "," of ")) 

sector_skills_matrix(data_agg = mainsector_abilities_df,
                     ability_val = NULL,metric = "mean")

Figure 18: Subabilities by sector
Code
mainsector_subabilities_df<-country_var_count_groups(data=south_cone_df,
                                       country = 'main_sector',
                         variable_names=subabilities, 
                         name_of_categories="subabilities") %>% 
   mutate(subabilities=str_replace_all(subabilities,"_"," "),
         subabilities=str_to_title(subabilities),
         subabilities=str_replace_all(subabilities," Of "," of ")) %>% 
  left_join(abilities_taxonomy)


purrr::map(c("Cognitive Abilities","Psychomotor Abilities","Physical Abilities","Sensory Abilities"),
       sector_skills_matrix,
       data_agg=mainsector_subabilities_df,
       metric="mean")
[[1]]


[[2]]


[[3]]


[[4]]

Job zones

  • 41% of job vacancies in the South Cone require “(2) Some preparation”, while 25% require “(4) Considerable preparation”, and another 25% requires “(3) Middle preparation”. Only 8% of the demand is focused on the “(1) no preparation” and “(5) a lot of preparation” extremes.

  • 46% of job postings in Chile demand “(2) Some preparation”. It’s the country most concentrated in that area of demand by a considerable margin.

  • 30.5% of job postings in Argentina demand “(4) Considerable preparation”. It’s the country most concentrated in that area of demand by a moderate margin.

  • 29.6% of job postings in Uruguay demand “(3) Middle preparation”. It’s the country most concentrated in that area of demand by a moderate margin.

Code
zones_df<-country_var_count(data = south_cone_df,
                  category = "zones_label",
                  country=NULL)

zones_country_df<-country_var_count(data = south_cone_df,
                  category = "zones_label",
                  country="country_code")

zones_df %>% 
  ggplot(aes(x=zones_label,
             y=group_share))+
  geom_col(fill="gray50")+
  geom_text(aes(label=paste(round(group_share,2)*100,"%")), 
            size=3,nudge_y = 0.02)+
  theme(axis.text.x = element_text(angle = 65, hjust=1))+
  labs(title = "Share of vacancies postings by Zone",
       x=NULL,
       y=NULL)

Figure 19: ?(caption)
Code
zones_country_df %>% 
  filter(country_code=="ARG") %>% 
  top_n(10,group_in_country_share) %>% 
  ggplot(aes(x=zones_label,
             y=group_in_country_share))+
  geom_col(aes(fill=country_code))+
  geom_text(aes(label=paste(round(group_in_country_share,2)*100,"%")), 
            size=3,nudge_y = 0.02)+
  scale_fill_manual(values=country_colors, guide='none')+
  scale_y_continuous(labels = scales::percent_format())+
  theme(axis.text.x = element_text(
    size=13,
    angle = 65, hjust=1
   ))+
  labs(title = NULL,
       fill=NULL,
       shape=NULL,
       x=NULL,
       y="Porcentaje de vacantes")


zones_country_df %>% 
  filter(country_code=="CHL") %>% 
  top_n(10,group_in_country_share) %>% 
  ggplot(aes(x=zones_label,
             y=group_in_country_share))+
  geom_col(aes(fill=country_code))+
  geom_text(aes(label=paste(round(group_in_country_share,2)*100,"%")), 
            size=3,nudge_y = 0.02)+
  scale_fill_manual(values=country_colors, guide='none')+
  scale_y_continuous(labels = scales::percent_format())+
  theme(axis.text.x = element_text(
    size=13,
    angle = 65, hjust=1
    ))+
  labs(title = NULL,
       fill=NULL,
       shape=NULL,
       x=NULL,
       y="Porcentaje de vacantes")

zones_country_df %>% 
  filter(country_code=="URY") %>% 
  top_n(10,group_in_country_share) %>% 
  ggplot(aes(x=zones_label,
             y=group_in_country_share))+
  geom_col(aes(fill=country_code))+
  geom_text(aes(label=paste(round(group_in_country_share,2)*100,"%")), 
            size=3,nudge_y = 0.02)+
  scale_fill_manual(values=country_colors, guide='none')+
  scale_y_continuous(labels = scales::percent_format())+
  theme(axis.text.x = element_text(
    size=13,
    angle = 65, hjust=1
    ))+
  labs(title = NULL,
       fill=NULL,
       shape=NULL,
       x=NULL,
       y="Porcentaje de vacantes")

Figure 20: ?(caption)

Figure 21: ?(caption)

Figure 22: ?(caption)
Code
country_var_chart(zones_country_df,
                  country = "country_code",
                  category = "zones_label")[[1]]+
  scale_fill_manual(values=country_colors)+
  theme(axis.text.x = element_text(angle = 65, hjust=1))+
  labs(title = "Share of vacancies postings by zone",
       subtitle = "Only showing 20 most common",
       fill="Country",
       shape=NULL,
       x=NULL,
       y=NULL)

Figure 23: Jobzones
Code
country_var_chart(zones_country_df,
                  country = "country_code",
                  category = "zones_label")[[2]]+
  scale_fill_manual(values=country_colors)+
  labs(subtitle="Job zones distribution in online vacancies data",
       x=NULL,
       fill=NULL,
       fill=NULL)

Code
country_var_chart(zones_country_df,
                  country = "country_code",
                  category = "zones_label")[[3]]+
  scale_fill_manual(values=country_colors)+
  labs(subtitle="Job zones distribution in online vacancies data",
       x=NULL,
       fill=NULL,
       fill=NULL)

Code
country_var_table(data_agg = zones_country_df,
                  category =  "zones_label",
                  country="country_code",
                  interactive = FALSE)

?(caption)

zones_label Vacancies % of Vacancies % of ARG % of CHL % of URY
(2) Algo de preparación 24478 41.31% 34.95% 45.92% 36.05%
(4) Preparación considerable 15175 25.61% 30.48% 22.33% 25.49%
(3) Preparación media 14530 24.52% 26.35% 22.98% 29.64%
(5) Mucha o extensa preparación 3612 6.10% 6.24% 5.99% 6.26%
(1) Poca o ninguna preparación 1460 2.46% 1.98% 2.78% 2.56%
sum 59,255.00 1.00 1.00 1.00 1.00

Job zones across sectors

Code
zones_sector_df<-country_var_count(south_cone_df,
                                  category = "zones_label",country="main_sector")

zones_sector_df %>% 
  ggplot(aes(x=main_sector,
             y=group_in_country_share))+
  geom_col(aes(fill=zones_label),position = "fill", color="black")+
  geom_label(data=group_by(zones_sector_df,main_sector) %>% 
              mutate(label_y=ifelse(group_in_country_share==max(group_in_country_share),
                             paste(round(group_in_country_share,2)*100,"%"),NA)),
            aes(label=label_y, color=zones_label),
            alpha=.9, size=2, position = position_fill(vjust = 0.5), show.legend = FALSE)+
  coord_flip()+
  scale_y_continuous(labels = scales::percent_format())+
  scale_fill_manual(values=RColorBrewer::brewer.pal(9,"YlGnBu")[c(3,4,5,7,9)])+
  scale_color_manual(values=RColorBrewer::brewer.pal(9,"YlGnBu")[c(3,4,5,7,9)])+
  labs(title = "Share of Zones in job postings by Main Sector",
       fill=NULL,
       color=NULL,
       y=NULL,
       x=NULL)

Figure 24: Jobzones by sector
Code
purrr::map(c("ARG","CHL","URY"),
       categories_barplot,
       data=south_cone_df,
       category = "zones_label",country="main_sector")  
[[1]]


[[2]]


[[3]]

Code
library(readxl)
sector_names<-readxl::read_excel("data/traducciones.xlsx", sheet=1)%>%
  janitor::clean_names()%>%
  rename(sector=1, sector_es=2)

charts_sectors<-purrr::map(c("ARG","CHL","URY"),
       categories_barplot,
       data=south_cone_df%>%
         filter(main_sector %in% sectors_focus)%>%
         left_join(sector_names,by=c("main_sector"="sector"))%>%
         mutate(main_sector=sector_es),
       category = "zones_label",country="main_sector")

charts_areas<-purrr::map(c("ARG","CHL","URY"),
       categories_barplot,
       data=south_cone_df,
       category = "zones_label",country="area")  
library(patchwork)
(charts_sectors[[1]]+ labs(title = NULL)+
    theme(axis.text.x = element_blank(),axis.ticks.x = element_blank())) +
  (charts_areas[[1]]+ labs(title = NULL))+
  plot_layout( ncol = 1,heights =c(8,2),guides = "collect")

(charts_sectors[[2]]+ labs(title = NULL)+
    theme(axis.text.x = element_blank(),axis.ticks.x = element_blank())) +
  (charts_areas[[2]]+ labs(title = NULL))+
  plot_layout( ncol = 1,heights =c(8,2),guides = "collect")

(charts_sectors[[3]]+ labs(title = NULL)+
    theme(axis.text.x = element_blank(),axis.ticks.x = element_blank())) +
  (charts_areas[[3]]+ labs(title = NULL))+
  plot_layout( ncol = 1,heights =c(8,2),guides = "collect")

Figure 25: ?(caption)

Figure 26: ?(caption)

Figure 27: ?(caption)

Job zones across occupational groups

Code
zones_soc_df<-country_var_count(south_cone_df %>% 
                                  mutate(major_group_title = str_remove_all(major_group_title," Occupations")),
                                  category = "zones_label",
                                country="major_group_title")

zones_sector_df %>% 
  ggplot(aes(x=main_sector,
             y=group_in_country_share))+
  geom_col(aes(fill=zones_label),position = "fill", color="black")+
  geom_label(data=group_by(zones_sector_df,main_sector) %>% 
              mutate(label_y=ifelse(group_in_country_share==max(group_in_country_share),
                             paste(round(group_in_country_share,2)*100,"%"),NA)),
            aes(label=label_y, color=zones_label),
            alpha=.9, size=2, position = position_fill(vjust = 0.5), show.legend = FALSE)+
  coord_flip()+
  scale_y_continuous(labels = scales::percent_format())+
  scale_fill_manual(values=RColorBrewer::brewer.pal(9,"YlGnBu")[c(3,4,5,7,9)])+
  scale_color_manual(values=RColorBrewer::brewer.pal(9,"YlGnBu")[c(3,4,5,7,9)])+
  labs(title = "Share of Zones in job postings by Major SOC",
       fill=NULL,
       color=NULL,
       y=NULL,
       x=NULL)

Figure 28: Jobzones by soc
Code
purrr::map(c("ARG","CHL","URY"),
       categories_barplot,
       data=south_cone_df %>% mutate(major_group_title = str_remove_all(major_group_title," Occupations")),
       category = "zones_label",country="major_group_title")  
[[1]]


[[2]]


[[3]]

Work Schedule

  • 83% of postings seek to fill full-time positions.

  • 14% of postings in Chile correspond to “Other”. This needs clarification. This is associated with the lower than average share of full-time positions in Chile.

  • 5.7% of Chile job postings correspond to “Part-time” roles (about 1 percent point above average).

  • The contractor mode is more prevalent in Uruguay job postings (almost twice the average). This would be consistent with the rumors about many Uruguay firms outsourcing Argentinean workers. This should be corroborated with remote work data.

Code
schedule_df<-country_var_count(data = south_cone_df,
                                       category =  "schedule",country=NULL)
schedule_country_df<-country_var_count(data = south_cone_df,
                                       category =  "schedule",country="country_code")

country_var_table(data=schedule_country_df, 
                  category = "schedule",country="country_code",
                  interactive = FALSE)

?(caption)

schedule Vacancies % of Vacancies % of ARG % of CHL % of URY
Full-time 49201 83.03% 88.97% 78.58% 90.53%
Other 6278 10.59% 5.54% 14.36% 4.68%
Part-time 2822 4.76% 3.66% 5.65% 2.27%
Contractor 705 1.19% 1.26% 1.09% 2.07%
Internship 249 0.42% 0.56% 0.32% 0.44%
sum 59,255.00 1.00 1.00 1.00 1.00
Code
schedule_df %>% 
  ggplot(aes(x=reorder(schedule,
                       -group_share),
             y=group_share))+
  geom_col(fill="gray50")+
  geom_text(aes(label=paste(round(group_share,2)*100,"%")), 
            size=3,nudge_y = 0.02)+
  theme(axis.text.x = element_text(angle = 65, hjust=1))+
  labs(title = "Share of vacancies postings by Schedule",
       x=NULL,
       y=NULL)

Figure 29: ?(caption)
Code
country_var_chart(schedule_country_df, 
                  category = "schedule",country="country_code")[[1]]+
  scale_fill_manual(values=country_colors)+
  theme(axis.text.x = element_text(angle = 65, hjust=1))+
  labs(title = "Share of vacancies postings by Schedule",
       subtitle = "Only showing 20 most common",
       fill="Country",
       shape=NULL,
       x=NULL,
       y=NULL)

Figure 30: ?(caption)
Code
country_var_chart(schedule_country_df, 
                  category = "schedule",country="country_code")[[2]]+
  scale_fill_manual(values=country_colors)+
  labs(subtitle="Schedule distribution in online vacancies data",
       x=NULL,
       fill=NULL)

Code
country_var_chart(schedule_country_df, 
                  category = "schedule",country="country_code")[[3]]+
  scale_fill_manual(values=country_colors)+
  labs(subtitle="Schedule distribution in online vacancies data",
       x=NULL,
       fill=NULL)

Green jobs

  • Green jobs represent 15% of all online job postings.

  • Argentina has the highest share of green jobs in its’ job postings (17%).

  • Within green jobs, the most demanded are classified as “Green Increased Demand” (45%).

  • Green job postings in Argentina are 48% “Green Increased Demand”, 37% are “Green enhanced skills”, and 14% are “Green New & Emerging”.

  • Greener regions in terms of online vacancies are Santiago, Buenos Ares, Valparaíso, Concepción, Rosario, Córdoba y Antofagasta. Buenos aires, Concepción, Rosario, Córdoba, and Antofagasta are overrepresented in the sample of green online vacancies.

Code
green_country_df_1<-country_var_count(data = south_cone_df,
                                      category = "green_job_bin",country="country_code")
Code
country_var_table(data=green_country_df_1, category = "green_job_bin",country="country_code",
                  interactive = FALSE)
Table 3: Green jobs distribution
green_job_bin Vacancies % of Vacancies % of ARG % of CHL % of URY
FALSE 50273 84.84% 82.20% 86.67% 84.12%
TRUE 8982 15.16% 17.80% 13.33% 15.88%
sum 59,255.00 1.00 1.00 1.00 1.00
Code
country_var_chart(green_country_df_1, 
                  category = "green_job_bin",country="country_code")[[2]]+
  scale_fill_manual(values=country_colors)+
  labs(subtitle="Green jobs distribution in online vacancies data",
       x=NULL, 
       fill=NULL)

Figure 31: Green jobs distribution, by country
Code
country_var_chart(green_country_df_1, 
                  category = "green_job_bin",country="country_code")[[3]]+
  scale_fill_manual(values=country_colors)+
  labs(subtitle="Green jobs distribution in online vacancies data",
       x=NULL, 
       fill=NULL)

Code
## Decomposition of green jobs
green_country_df_2<-country_var_count(data = filter(south_cone_df,
                                                    green_job_bin==TRUE),
                                      category = "green_job",
                                      country="country_code")

country_var_table(data=green_country_df_2, 
                  category = "green_job",country="country_code",
                  interactive = FALSE)
Table 4: Green job types distribution
green_job Vacancies % of Vacancies % of ARG % of CHL % of URY
Green Increased Demand 4042 45.00% 48.43% 42.06% 42.86%
Green Enhanced Skills 3654 40.68% 37.59% 43.36% 42.24%
Green New & Emerging 1286 14.32% 13.98% 14.58% 14.91%
sum 8,982.00 1.00 1.00 1.00 1.00
Code
country_var_chart(green_country_df_2, 
                  category = "green_job",country="country_code")[[2]]+
  scale_fill_manual(values=country_colors)+
  labs(subtitle="Green jobs types in online vacancies data",
       x=NULL,
       y=NULL,
       fill=NULL)

Figure 32: Green job types distribution by country
Code
country_var_chart(green_country_df_2, 
                  category = "green_job",country="country_code")[[3]]+
  scale_fill_manual(values=country_colors)+
  labs(subtitle="Green jobs types in online vacancies data",
       x=NULL,
       y=NULL,
       fill=NULL)

Code
library(patchwork)

# Hole size
hsize <- 3

p1<-green_country_df_1 %>% 
  filter(country_code=="ARG") %>% 
  mutate(green_job_bin=ifelse(green_job_bin==TRUE,"Verde","Otros"))%>%
  mutate(x=hsize) %>% 
  ggplot(aes(fill=green_job_bin, y=group_in_country_share, x=hsize)) +
  geom_col(colour="grey30") +
  geom_text(color="black",aes(label = paste(round(group_in_country_share,2)*100,"%")),
              position = position_stack(vjust = 0.5)) +
  coord_polar(theta="y") +
  scale_fill_manual(values=c("gray70",RColorBrewer::brewer.pal(3,"Greens")[[2]]))+
     xlim(c(0.2, hsize + 0.5))+
     theme_void()+
     labs(fill="Tipo de\nTrabajo")

p2<-green_country_df_2 %>% 
  filter(country_code=="ARG")%>%
  ggplot(aes(x=1,y=group_in_country_share,fill=green_job))+
  geom_col(color="grey30")+
  geom_text(color="black",aes(label = paste(round(group_in_country_share,2)*100,"%")),
              position = position_stack(vjust = 0.5)) +
  theme_void()+
  scale_fill_manual(  values=RColorBrewer::brewer.pal(3,"Greens"))+
  labs(fill="Tipo de\nTrabajo\nVerde")
p1+p2

p1<-green_country_df_1 %>% 
  filter(country_code=="CHL") %>% 
  mutate(green_job_bin=ifelse(green_job_bin==TRUE,"Verde","Otros"))%>%
  mutate(x=hsize) %>% 
  ggplot(aes(fill=green_job_bin, y=group_in_country_share, x=hsize)) +
  geom_col(colour="grey30") +
  geom_text(color="black",aes(label = paste(round(group_in_country_share,2)*100,"%")),
              position = position_stack(vjust = 0.5)) +
  coord_polar(theta="y") +
  scale_fill_manual(values=c("gray70",RColorBrewer::brewer.pal(3,"Greens")[[2]]))+
     xlim(c(0.2, hsize + 0.5))+
     theme_void()+
     labs(fill="Tipo de\nTrabajo")

p2<-green_country_df_2 %>% 
  filter(country_code=="CHL")%>%
  ggplot(aes(x=1,y=group_in_country_share,fill=green_job))+
  geom_col(color="grey30")+
  geom_text(color="black",aes(label = paste(round(group_in_country_share,2)*100,"%")),
              position = position_stack(vjust = 0.5)) +
  theme_void()+
  scale_fill_manual(  values=RColorBrewer::brewer.pal(3,"Greens"))+
  labs(fill="Tipo de\nTrabajo\nVerde")
p1+p2

p1<-green_country_df_1 %>% 
  filter(country_code=="URY") %>% 
  mutate(green_job_bin=ifelse(green_job_bin==TRUE,"Verde","Otros"))%>%
  mutate(x=hsize) %>% 
  ggplot(aes(fill=green_job_bin, y=group_in_country_share, x=hsize)) +
  geom_col(colour="grey30") +
  geom_text(color="black",aes(label = paste(round(group_in_country_share,2)*100,"%")),
              position = position_stack(vjust = 0.5)) +
  coord_polar(theta="y") +
  scale_fill_manual(values=c("gray70",RColorBrewer::brewer.pal(3,"Greens")[[2]]))+
     xlim(c(0.2, hsize + 0.5))+
     theme_void()+
     labs(fill="Tipo de\nTrabajo")

p2<-green_country_df_2 %>% 
  filter(country_code=="URY")%>%
  ggplot(aes(x=1,y=group_in_country_share,fill=green_job))+
  geom_col(color="grey30")+
  geom_text(color="black",aes(label = paste(round(group_in_country_share,2)*100,"%")),
              position = position_stack(vjust = 0.5)) +
  theme_void()+
  scale_fill_manual(  values=RColorBrewer::brewer.pal(3,"Greens"))+
  labs(fill="Tipo de\nTrabajo\nVerde")
p1+p2

Figure 33: ?(caption)

Figure 34: ?(caption)

Figure 35: ?(caption)

Location of green jobs across regions

Code
country_var_count(south_cone_df, 
                  category = "rm",
                  country="green_job_bin") %>% 
  arrange(desc(group_vacancies)) %>% 
  filter(green_job_bin==1) %>% 
  mutate(ratio=round(group_in_country_share/group_share,2)) %>% 
  mutate(share=scales::percent(group_in_country_share,accuracy=1),
         group_share=scales::percent(group_share, accuracy=2)) %>% 
  arrange(desc(group_vacancies)) %>%
  filter(group_vacancies>500) %>% 
  ungroup() %>% 
  select(region=rm,`Job postings`=group_vacancies,`share in all data`=group_share,`share in green`=group_in_country_share,ratio) %>% 
  head(15) %>% 
  kableExtra::kable()
Table 5: Green jobs distribution, by region
region Job postings share in all data share in green ratio
Santiago 13725 24% 0.1871521 0.81
Buenos Aires (GZM) 9313 16% 0.1845914 1.17
Valparaíso 4932 8% 0.0676909 0.81
Concepción 3774 6% 0.0659096 1.03
Rosario 2316 4% 0.0503229 1.29
Gran Temuco 2125 4% 0.0210421 0.59
Coquimbo 1913 4% 0.0292808 0.91
Córdoba 1810 4% 0.0360721 1.18
Mendoza 1734 2% 0.0338455 1.16
Antofagasta 1477 2% 0.0351815 1.41
Puerto Montt 1251 2% 0.0227121 1.08
Metropolitana 1220 2% 0.0218214 1.06
Tarapacá 1168 2% 0.0237141 1.20
Corrientes 1074 2% 0.0191494 1.06
Región Metropolitana Confluencia 889 2% 0.0180361 1.20
Code
country_var_count(south_cone_df, 
                  category = "rm",
                  country="green_job_bin")%>% 
  arrange(desc(group_vacancies)) %>% 
  filter(green_job_bin==TRUE) %>% 
  mutate(green_all_ratio=group_in_country_share/group_share) %>% 
  arrange(desc(green_all_ratio)) %>%
  filter(group_vacancies>500) %>% 
  ggplot(aes(x=group_share,y=group_in_country_share))+
  geom_point()+
  geom_label_repel(aes(label=rm), size=2)+
  geom_abline(slope = 1,intercept = 0)+
  coord_fixed()+
  labs(subtitle = "Those above the line are more intensive in green vacancies",
       caption = "Cities with more than 1000 job postings",
       y="Share of green jobs postings",
       x="Share of all job postings")

Green jobs across sectors

Code
charts_sectors<-purrr::map(c("ARG","CHL","URY"),
       categories_barplot,
       data=south_cone_df %>%
         mutate(green_job=ifelse(is.na(green_job),"Not Green",green_job))%>%
         filter(main_sector %in% sectors_focus)%>%
         left_join(sector_names,by=c("main_sector"="sector"))%>%
         mutate(main_sector=sector_es),
       category = "green_job",country="main_sector")  

charts_areas<-purrr::map(c("ARG","CHL","URY"),
       categories_barplot,
       data=south_cone_df %>%
         mutate(green_job=ifelse(is.na(green_job),"Not Green",green_job)),
       category = "green_job",country="area")  

(charts_sectors[[1]]+ labs(title = NULL)+
    theme(axis.text.x = element_blank(),axis.ticks.x = element_blank())) +
  (charts_areas[[1]]+ labs(title = NULL))+
  plot_layout( ncol = 1,heights =c(8,2),guides = "collect")

(charts_sectors[[2]]+ labs(title = NULL)+
    theme(axis.text.x = element_blank(),axis.ticks.x = element_blank())) +
  (charts_areas[[2]]+ labs(title = NULL))+
  plot_layout( ncol = 1,heights =c(8,2),guides = "collect")

(charts_sectors[[3]]+ labs(title = NULL)+
    theme(axis.text.x = element_blank(),axis.ticks.x = element_blank())) +
  (charts_areas[[3]]+ labs(title = NULL))+
  plot_layout( ncol = 1,heights =c(8,2),guides = "collect")

Figure 36: ?(caption)

Figure 37: ?(caption)

Figure 38: ?(caption)

Remote jobs

  • Only 1.8% of jobs postings are classified remote. This contrast with 10% and 12% for countries like New Zeland and Australia, which aren’t much different than Chile according to remote work surveys at the firm level. This gap is puzzling.

  • The occupational major groups with the highest shares of remote postings are “Legal”, “Business and Financial Operations”, “Personal Care and Service” (weird), “Management”, and “Computer and Mathematical” occupations, in that order. This is different from the ranking of remote online vacancies found by Lightcast in English speaking countries: “Computer and Mathematical”, “Business and Financial Operations”, “Legal”, “Management”, and “Architecture and Engineering.”

Code
remote_country_df<-country_var_count(data = mutate(south_cone_df,
                                           remote=
                                             ifelse(remote==1,"Remote/Hybrid","In-Person")),
                             category =  "remote",country="country_code")

country_var_table(data_agg =remote_country_df, 
                  category = "remote",country="country_code", interactive=FALSE)
Table 6: Remote work distribution
remote Vacancies % of Vacancies % of ARG % of CHL % of URY
In-Person 58174 98.18% 97.45% 98.67% 98.18%
Remote/Hybrid 1081 1.82% 2.55% 1.33% 1.82%
sum 59,255.00 1.00 1.00 1.00 1.00
Code
country_var_chart(remote_country_df, 
                  category = "remote",country="country_code")[[2]]+
  scale_fill_manual(values=country_colors)+
  labs(subtitle="Remote/Hybrid jobs distribution in online vacancies data",
       x=NULL,
       y=NULL,
       fill=NULL)

Figure 39: Remote work distribution by country
Code
country_var_chart(remote_country_df, 
                  category = "remote",country="country_code")[[3]]+
  scale_fill_manual(values=country_colors)+
  labs(subtitle="Remote/Hybrid jobs distribution in online vacancies data",
       x=NULL,
       y=NULL,
       fill=NULL)

Code
remote_df_2<-country_var_count(data = mutate(south_cone_df,
                                             remote=
                                               ifelse(remote==1,"Remote/Hybrid","In-Person")),
                             category = "remote",country="major_group_title")

remote_df_3<-rbind(
  country_var_count(data = mutate(
    filter(south_cone_df,country_code=="ARG"),
    remote=ifelse(remote==1,"Remote/Hybrid","In-Person")),
    category = "remote",country="major_group_title") %>% 
    mutate(country_code="ARG"),
  country_var_count(data = mutate(
    filter(south_cone_df,country_code=="CHL"),
    remote=ifelse(remote==1,"Remote/Hybrid","In-Person")),
    category = "remote",country="major_group_title") %>% 
    mutate(country_code="CHL"),
  country_var_count(data = mutate(
    filter(south_cone_df,country_code=="URY"),                            
    remote=ifelse(remote==1,"Remote/Hybrid","In-Person")),
    category = "remote",country="major_group_title") %>% 
    mutate(country_code="URY")
  )%>% 
  # I don't need the share of remote jobs in each country.
  select(-group_vacancies,-group_share) %>% 
  # I want only remote shares of major groups in each country
  filter(remote=="Remote/Hybrid") 

(chart_rmw<-remote_df_2 %>% 
  filter(remote=="Remote/Hybrid") %>% 
  ggplot(aes(
    x=reorder(str_remove(major_group_title,"Occupations"),-group_in_country_share),
    y=group_in_country_share))+
  geom_col(fill="gray80")+
  geom_line(aes(y=group_share,group=remote))+
  geom_text(aes(label=paste(round(group_in_country_share,3)*100,"%")), size=3)+
  theme(axis.text.x = element_text(angle = 65, hjust=1))+
  labs(title = "Share of remote/hybrid job postings by SOC major group",
       x=NULL))

Figure 40: ?(caption)
Code
remote_df_3 %>% 
  # attach the share of remote vacancies in each  group 
  left_join(remote_df_2 %>% 
              filter(remote=="Remote/Hybrid") %>%
              select(major_group_title,share_of_remote_in_group=group_in_country_share,
                     group_share)) %>% 
  # plot the share of remote vacancies in each group, by country
  ggplot(aes(x=reorder(str_remove(major_group_title,"Occupations"),-share_of_remote_in_group),
             y=group_in_country_share)) +
  geom_col(aes(fill=country_code))+
  geom_line(aes(y=group_share,group=remote))+
  geom_text(aes(label=paste(round(group_in_country_share,3)*100,"%")), size=3)+
  scale_fill_manual(values=country_colors)+
  facet_wrap(vars(country_code),ncol=1) + 
  theme(axis.text.x = element_text(angle = 65, hjust=1))+
  labs(title = "Share of remote/hybrid job postings by SOC major group and country",
       fill=NULL,
       x=NULL)

Figure 41: ?(caption)

There are much fewer job vacancies classified as remote than there should be. These charts show it’s not consistent with Ligthcast estimates US, UK, Australia and New Zeland markets (Remote Work across Jobs, Companies, and Space by Stephen Hansen, Peter John Lambert, Nick Bloom, Steven J. Davis, Raffaella Sadun, Bledi Taska :: SSRN).

Code
chart_rmw+
  geom_hline(aes(yintercept=0.02,color="NZ in 2020"))+
  geom_hline(aes(yintercept=0.04,color="NZ in 2021"))+
  geom_hline(aes(yintercept=0.1,color="NZ in 2023"))+
  labs(color="Benchmark")

Figure 42: ?(caption)

bloom3 bloom4

The Global Survey of Work (GSWA) arrangements looked at 34 countries in April-May 2023 and reports that Latin American workers work from home (WFH) 0.9 days a week on average, (which coincides with the global average) while workers in New Zeland and Australia work from home an average of 1 and 1.3 days, respectively. According to this, we should expect job vacancies to show WFH rates in Chile (1.8%) to be at least close to New Zeland (10%).

The GSWA review also shows Chileans work from home more than Argentineans, which refuses our results. Of course, this could be due to the difference in sector compositions between the GSWA and online job vacancies.

Figure 43: bloom1

How does it allign with Dingle & Neiman (2020)?

Does a 1.8% share of remote job vacancies make sense when we take into account the occupations these vacancies are concentrated in? We’ll use Dingle and Neiman (2020) definition of teleworkable occupations and calculate the percentage of online job postings that fall in that category and see whether it’s lower than the observed in English Speaking countries. Dingle and Neiman (2020) define teleworkable occupations as those not involving evidently ‘in-place’ actvities and can be perfomed remotely.

Take a look at the classification of a few occupations:

Code
onet_teleworkable<-read_csv("raw/ONET_28_0/onet_teleworkable_r.csv")

print(paste(c(
  "Not-teleworkable occupations in ONET 28.0:",
  "Teleworkable occupations in ONET 28.0:"),
  table(onet_teleworkable$teleworkable),"(",
  round(table(onet_teleworkable$teleworkable)/nrow(onet_teleworkable),2),")"
  )
  )

[1] “Not-teleworkable occupations in ONET 28.0: 565 ( 0.65 )” [2] “Teleworkable occupations in ONET 28.0: 308 ( 0.35 )”

Code
kableExtra::kable(head(select(onet_teleworkable,1:5)))
o_net_soc_code title n teleworkable physical_activities
11-1011.00 Chief Executives 29.50 1 0
11-1011.03 Chief Sustainability Officers 27.00 1 0
11-1021.00 General and Operations Managers 31.75 1 0
11-2011.00 Advertising and Promotions Managers 20.50 1 0
11-2021.00 Marketing Managers 39.75 1 0
11-2022.00 Sales Managers 23.00 1 0

The table below show the share of online vacancies in occupations that could be performed remotely. 42% of all job vacancies could be feasibly performed from home according the the average work context and activities of the occupations they were assigned on. It’s below the 50% share I spotted on US job postings between 2020-2021, but it’s large considering they only account for 35% of all occupational codes and around 35% of employment in the US at the onset of the pandemic.

Code
teleworkable_by_country<-south_cone_df %>%
  select(country_code,o_net_soc_2019_code) %>% 
  left_join(onet_teleworkable %>% 
              select(o_net_soc_code,teleworkable),
            by=c("o_net_soc_2019_code"="o_net_soc_code"))

teleworkable_by_country<- country_var_count(
  data = teleworkable_by_country,
  category = "teleworkable",country="country_code")

country_var_table(data=teleworkable_by_country,
                      category = "teleworkable",country="country_code",
                      interactive=FALSE)

?(caption)

teleworkable Vacancies % of Vacancies % of ARG % of CHL % of URY
0 34097 57.54% 53.56% 60.41% 54.59%
1 24874 41.98% 46.05% 39.05% 45.02%
NA 284 0.48% 0.39% 0.54% 0.39%
sum 1.00 59,255.00 1.00 1.00 1.00 1.00
Code
# gap_by_group<-
#   bind_rows(
#   south_cone_df %>%
#   select(country_code,o_net_soc_2019_code,remote,major_group,major_group_title) %>% 
#   left_join(onet_teleworkable %>% 
#               select(o_net_soc_code,teleworkable),
#             by=c("o_net_soc_2019_code"="o_net_soc_code"))  %>% 
#   summarise(n=n(),remote=mean(remote),
#             teleworkable=mean(teleworkable,na.rm = T)) %>% 
#   mutate(major_group_title="Overall"),
#   south_cone_df %>%
#   select(country_code,o_net_soc_2019_code,remote,major_group,major_group_title) %>% 
#   left_join(onet_teleworkable %>% 
#               select(o_net_soc_code,teleworkable),
#             by=c("o_net_soc_2019_code"="o_net_soc_code"))  %>% 
#   group_by(major_group_title) %>% 
#   summarise(n=n(),remote=mean(remote),
#             teleworkable=mean(teleworkable,na.rm = T))
# ) %>% 
#   ungroup() %>% 
#   arrange(desc(n))
# 
# gap_by_group %>% 
#   pivot_longer(cols = c("teleworkable", "remote")) %>% 
#   ggplot(aes(x=reorder(str_remove(major_group_title," Occupations"),n),y=value))+
#   geom_line()+
#   geom_point(aes(color=name),size=3)+
#   coord_flip()+
#   labs(x=NULL)
#   
# south_cone_df %>%
#   select(country_code,o_net_soc_2019_code,occupation,remote,major_group,major_group_title) %>% 
#   left_join(onet_teleworkable %>% 
#               select(o_net_soc_code,teleworkable),
#             by=c("o_net_soc_2019_code"="o_net_soc_code"))  %>% 
#   filter(major_group_title=="Transportation and Material Moving Occupations" & teleworkable==TRUE) %>% 
#   distinct(o_net_soc_2019_code,occupation)
#   

Is there any spatial concentration pattern in remote postings?

  • Do we see more remote postings in large or small cities? No at a glance. It’d be worth controlling for sectorial composition of employment to test this hypothesis.
Code
country_var_count(data = south_cone_df, category = "rm",country="remote") %>% 
  arrange(desc(group_vacancies)) %>% 
  filter(remote==1) %>% 
  mutate(ratio=round(group_in_country_share/group_share,2)) %>% 
  mutate(share=scales::percent(group_in_country_share,accuracy=2),
         group_share=scales::percent(group_share, accuracy=2)) %>% 
  arrange(desc(group_vacancies)) %>%
  filter(group_vacancies>500) %>% 
  ungroup() %>% 
  select(region=rm,`Job postings`=group_vacancies,`share in all data`=group_share,`share in remote`=share,ratio) %>% 
  head(15) %>% 
  kableExtra::kable()
Table 7: Remote work by Metropolitan Region
region Job postings share in all data share in remote ratio
Santiago 13725 24% 24% 1.02
Buenos Aires (GZM) 9313 16% 20% 1.22
Valparaíso 4932 8% 4% 0.47
Concepción 3774 6% 4% 0.49
Rosario 2316 4% 4% 0.95
Gran Temuco 2125 4% 0% 0.13
Coquimbo 1913 4% 2% 0.83
Córdoba 1810 4% 4% 1.54
Mendoza 1734 2% 4% 1.52
Antofagasta 1477 2% 2% 0.78
Puerto Montt 1251 2% 2% 0.53
Metropolitana 1220 2% 2% 1.39
Tarapacá 1168 2% 0% 0.47
Corrientes 1074 2% 4% 2.35
Región Metropolitana Confluencia 889 2% 2% 0.74
Code
country_var_count(data=south_cone_df,category = "rm",country="remote") %>% 
  arrange(desc(group_vacancies)) %>% 
  filter(remote==TRUE) %>% 
  mutate(remote_all_ratio=group_in_country_share/group_share) %>% 
  arrange(desc(remote_all_ratio)) %>% 
  filter(group_vacancies>1000) %>% 
  ggplot(aes(x=group_share,y=group_in_country_share))+
  geom_point()+
  geom_label_repel(aes(label=rm), size=2)+
  geom_abline(slope = 1,intercept = 0)+
  coord_fixed()+
  labs(subtitle = "Those above the line are more intensive in remote postings",
       caption = "Cities with more than 1000 job postings",
       y="Share of remote job postings",
       x="Share of all job postings")

Figure 44: ?(caption)

Which firms are hiring remotely?

Code
country_var_count(data = south_cone_df, category ="remote",country= "firm") %>% 
  arrange(desc(group_vacancies)) %>% 
  filter(remote==1) %>% 
  mutate(ratio=round(group_in_country_share/group_share,2)) %>% 
  mutate(share=scales::percent(group_in_country_share,accuracy=2),
         group_share=scales::percent(group_share, accuracy=2)) %>% 
  arrange(desc(count)) %>%
  filter(country_vacancies>100) %>% 
  ungroup() %>% 
  select(Firm=firm,`Firm postings`=country_vacancies,`Remote Postings`=count,`% Remote`=share) %>% 
  head(20) %>% 
  kableExtra::kable()
Table 8: Firms hiring remote ot hybrid
Firm Firm postings Remote Postings % Remote
Confidencial 2890 71 2%
Emprego 2447 67 2%
Wurth Argentina S.a 132 24 18%
Mendoza, Capital, Mendoza, Argentina 327 13 4%
Buenos Aires, CABA, Argentina 225 8 4%
Entel Empresa de Contact Center 118 7 6%
ACTIVOS CHILE 448 6 2%
ADN - Recursos Humanos 125 6 4%
Babysits 217 6 2%
Tawa 257 6 2%
Manpower Chile 677 5 0%
ManpowerGroup 308 5 2%
Adecco Chile 678 4 0%
ECRGROUP® Chile 118 3 2%
Emprego CL C2 122 3 2%
Neuquén, Argentina 105 3 2%
Progestion Chile 890 3 0%
Randstad AR 121 3 2%
Cygnus 569 2 0%
Grupo Gestión 259 2 0%

Type II Errors: Classified as non-remote nor hybrid when they are

There are plenty of cases like this. The table below shows the number of type 2 errors we found in Argentinean data:

Code
pattern<-"\\b[hH]\\w*brid"

review<-read_parquet("raw/arg_new_dict.parquet") %>%  
  distinct(firm,descrip,job_name,occupation,remote) %>% 
  filter(str_detect(descrip,pattern))

review %>% 
  mutate(error=!remote) %>% 
  group_by(`Type II Error`=error) %>% 
  summarise(Count=n(),
            Share=n()/nrow(review))%>% 
  mutate(Share=scales::percent(Share, accuracy=2)) %>% 
  kableExtra::kable()
Table 9: Postings mentioning remote or hybrid format but not classified as WFH
Type II Error Count Share
FALSE 95 14%
TRUE 553 86%

Below we show the list of some examples:

Code
review %>% 
  head(10) %>% 
  mutate(match_position_1=str_locate(descrip,pattern)[,"start"],
         match_position_2=str_locate(descrip,pattern)[,"end"]) %>% 
  mutate(description_extract=paste0("...",substr(descrip,match_position_1-20,match_position_2+20),"...")) %>% 
  select(Firma=firm,Position=job_name,`WFH piece`=description_extract,`Work from Home`=remote)%>% 
  kableExtra::kable()
Firma Position WFH piece Work from Home
Umbral Capital Humano Operador de flota propia | (SJ095) ...itivo. - Modalidad híbrida. - Reales oportun... FALSE
Umbral Capital Humano Supervisor de Limpieza - Mendoza, Luján de Cuyo ...odalidad de trabajo híbrida... FALSE
Umbral Capital Humano técnicos electromecánicos y electrónicos ...itivo. - Modalidad híbrida. - Reales oportun... FALSE
Umbral Capital Humano Ingeniero de procesos mendoza ...odalidad de trabajo híbrida... FALSE
Umbral Capital Humano MZ534 Mendoza Operario de Mantenimiento Industrial ...itivo. - Modalidad híbrida. - Reales oportun... FALSE
Umbral Capital Humano Promotora - Activacio n en Punto de venta ...itivo. - Modalidad híbrida. - Reales oportun... FALSE
Camera di Commercio Italiana nella Repubblica Argentina CDC Personal de Depósito - Zona Oeste ... central. Modalidad Híbrida (2 días en las ofi... TRUE
Adlatina Group EMPLEADO DE MOSTRADOR, LOCAL DE SANITARIOS ...cios. El trabajo es híbrido en la zona del cen... FALSE
Grupo Myth Administrativo Contable ... 18hs en un esquema hibrido de 3 dias en la of... TRUE
Camera di Commercio Italiana nella Repubblica Argentina pasante - ingenieria de produccion ... central. Modalidad Híbrida (2 días en las ofi... TRUE

Knowledge Jobs

  • 33% of all online vacancies were classified as belonging to knowledge sectors.

  • Argentina, with a 38% is the country with the highest intensity in these job postings.

  • The occupational groups these knowledge vacancies belong to sound like occupational groups a knowledge firm will require to function.

Code
area_country_df_1<-country_var_count(data = south_cone_df,
                             category = "area",country="country_code")


country_var_table(area_country_df_1, category = "area",country="country_code",
                  interactive = FALSE)

?(caption)

area Vacancies % of Vacancies % of ARG % of CHL % of URY
No conocimiento 39266 66.27% 61.67% 69.51% 63.86%
Conocimiento 19989 33.73% 38.33% 30.49% 36.14%
sum 59,255.00 1.00 1.00 1.00 1.00
Code
country_var_chart(area_country_df_1,
                  category = "area",
                  country="country_code")[[2]]+
  scale_fill_manual(values=country_colors)+
  labs(subtitle="Knowledge sector distribution in online vacancies data",
       x=NULL,
       fill=NULL)


country_var_chart(area_country_df_1,
                  category = "area",
                  country="country_code")[[3]]+
  scale_fill_manual(values=country_colors)+
  labs(subtitle="Knowledge sector distribution in online vacancies data",
       x=NULL,
       fill=NULL)

Figure 45: TRUE

Figure 46: TRUE

Knowledge jobs across occupations

Code
area_df_2<-country_var_count(data = south_cone_df,
                             category = "area",country="major_group_title")

area_df_3<-rbind(
  country_var_count(data = filter(south_cone_df,country_code=="ARG"),
                             category = "area",country="major_group_title") %>% 
        mutate(country_code="ARG"),
  country_var_count(data = filter(south_cone_df,country_code=="CHL"),
                             category = "area",country="major_group_title")%>% 
        mutate(country_code="CHL"),
  country_var_count(data = filter(south_cone_df,country_code=="URY"),
                             category = "area",country="major_group_title")%>% 
        mutate(country_code="URY")
  )%>% 
  # I don't need the share of area jobs in each country.
  select(-group_vacancies,-group_share) %>% 
  # I want only area shares of the knowledge groups in each country
  filter(area=="Conocimiento") 

area_df_2 %>% 
  filter(area=="Conocimiento") %>% 
  ggplot(aes(x=reorder(str_remove(major_group_title,"Occupations"),-group_in_country_share),
             y=group_in_country_share))+
  geom_col(fill="gray80")+
  geom_line(aes(y=group_share,group=area))+
  geom_text(aes(label=paste(round(group_in_country_share,3)*100,"%")), size=3)+
  theme(axis.text.x = element_text(angle = 65, hjust=1))+
  labs(title = "Share of Knowledge job postings by SOC major group",
       x=NULL)

Figure 47: ?(caption)
Code
area_df_3 %>% 
  # attach the share of remote vacancies in each  group 
  left_join(area_df_2 %>% 
              filter(area=="Conocimiento") %>%
              select(major_group_title,share_of_remote_in_group=group_in_country_share,
                     group_share)) %>% 
  # plot the share of remote vacancies in each group, by country
  ggplot(aes(x=reorder(str_remove(major_group_title,"Occupations"),-share_of_remote_in_group),
             y=group_in_country_share))+
  geom_col(aes(fill=country_code))+
  geom_line(aes(y=group_share,group=area))+
  geom_text(aes(label=paste(round(group_in_country_share,3)*100,"%")), size=3)+
  scale_fill_manual(values=country_colors)+
  facet_wrap(vars(country_code),ncol=1) + 
  theme(axis.text.x = element_text(angle = 65, hjust=1))+
  labs(title = "Share of knowledge job postings by SOC major group and country",
       x=NULL,
       y=NULL,
       fill=NULL)

Figure 48: ?(caption)

Knowledge jobs across industries

Code
area_sector_df<-country_var_count(data = south_cone_df ,
                  category = "area",country="main_sector")

area_sector_df %>% 
  filter(area=="Conocimiento") %>% 
  ggplot(aes(x=reorder(main_sector,-group_in_country_share),
             y=group_in_country_share))+
  geom_col(fill="gray80")+
  geom_line(aes(y=group_share,group=area))+
  geom_text(aes(label=paste(round(group_in_country_share,3)*100,"%")), size=3)+
  theme(axis.text.x = element_text(angle = 65, hjust=1))+
  labs(title = "Share of Knowledge job postings by Main Sector",
       x=NULL)

Figure 49: ?(caption)
Code
area_sector_country_df<-rbind(
  country_var_count(data = filter(south_cone_df ,country_code=="ARG"),
                             category = "area",country="main_sector") %>% 
        mutate(country_code="ARG"),
  country_var_count(data = filter(south_cone_df ,country_code=="CHL"),
                             category = "area",country="main_sector")%>% 
        mutate(country_code="CHL"),
  country_var_count(data = filter(south_cone_df ,country_code=="URY"),
                             category = "area",country="main_sector")%>% 
        mutate(country_code="URY")
  )%>% 
  # I don't need the share of area jobs in each country.
  select(-group_vacancies,-group_share) %>% 
  # I want only area shares of the knowledge groups in each country
  filter(area=="Conocimiento") 

area_sector_country_df %>% 
  # attach the share of remote vacancies in each  group 
  left_join(area_sector_df %>% 
              filter(area=="Conocimiento") %>%
              select(main_sector,share_of_remote_in_group=group_in_country_share,
                     group_share)) %>% 
  # plot the share of remote vacancies in each group, by country
  ggplot(aes(x=reorder(main_sector,-share_of_remote_in_group),
             y=group_in_country_share))+
  geom_col(aes(fill=country_code))+
  geom_line(aes(y=group_share,group=area))+
  geom_text(aes(label=paste(round(group_in_country_share,3)*100,"%")), size=3)+
  scale_fill_manual(values=country_colors)+
  facet_wrap(vars(country_code),ncol=1) + 
  theme(axis.text.x = element_text(angle = 65, hjust=1))+
  labs(title = "Share of knowledge job postings by Main Sector and country",
       x=NULL,
       y=NULL,
       fill=NULL)

Figure 50: ?(caption)

Location of Knowledge jobs across regions

Code
country_var_count(data=south_cone_df,category = "rm",country="area") %>% 
  arrange(desc(group_vacancies)) %>% 
  filter(area=="Conocimiento") %>% 
  mutate(ratio=round(group_in_country_share/group_share,2)) %>% 
  mutate(share=scales::percent(group_in_country_share,accuracy=2),
         group_share=scales::percent(group_share, accuracy=2)) %>% 
  arrange(desc(group_vacancies)) %>%
  filter(group_vacancies>500) %>% 
  ungroup() %>% 
  select(Region=rm,`Job postings`=group_vacancies,
         `% in all data`=group_share,
         `% in knowledge jobs`=share,Ratio=ratio) %>% 
  head(15) %>% 
  kableExtra::kable()

?(caption)

Region Job postings % in all data % in knowledge jobs Ratio
Santiago 13725 24% 20% 0.86
Buenos Aires (GZM) 9313 16% 16% 1.07
Valparaíso 4932 8% 8% 0.88
Concepción 3774 6% 6% 0.93
Rosario 2316 4% 4% 1.14
Gran Temuco 2125 4% 4% 0.91
Coquimbo 1913 4% 2% 0.88
Córdoba 1810 4% 4% 1.17
Mendoza 1734 2% 4% 1.18
Antofagasta 1477 2% 4% 1.27
Puerto Montt 1251 2% 2% 0.97
Metropolitana 1220 2% 2% 1.10
Tarapacá 1168 2% 2% 1.16
Corrientes 1074 2% 2% 1.21
Región Metropolitana Confluencia 889 2% 2% 1.29

Regions

There are three geographic aggregation variables in the data. This is the count of unique, missing, and empty values each of these indicator has.

Code
south_cone_df %>% 
  group_by(country_code) %>% 
  select(rm, city,city_name) %>% 
  skimr::skim() %>% as_tibble() %>% 
  select(-character.min,-character.max) %>% 
  kableExtra::kable()

?(caption)

skim_type skim_variable country_code n_missing complete_rate character.empty character.n_unique character.whitespace
character rm ARG 0 1 0 24 0
character rm CHL 0 1 0 18 0
character rm URY 0 1 0 6 0
character city ARG 0 1 6 800 0
character city CHL 0 1 0 239 0
character city URY 0 1 0 70 0
character city_name ARG 0 1 0 216 0
character city_name CHL 0 1 0 201 0
character city_name URY 0 1 0 48 0

And this is a list of the most frequent regions of each country

Code
country_var_count(data = south_cone_df, 
                  country = "country_code",
                  category = "rm")%>%
  country_region_table(country = "country_code",
                     category = "rm",top_number = 20)
Table 10: Main regions, by country
rank ARG CHL URY
1 Buenos Aires (GZM), 9313 (40.4%) Santiago, 13725 (40.2%) Metropolitana, 1220 (60.2%)
2 Rosario, 2316 (10%) Valparaíso, 4932 (14.4%) Este, 347 (17.1%)
3 Córdoba, 1810 (7.8%) Concepción, 3774 (11%) Centro, 127 (6.3%)
4 Mendoza, 1734 (7.5%) Gran Temuco, 2125 (6.2%) Sur, 117 (5.8%)
5 Corrientes, 1074 (4.7%) Coquimbo, 1913 (5.6%) Norte, 111 (5.5%)
6 Región Metropolitana Confluencia, 889 (3.9%) Antofagasta, 1477 (4.3%) Noreste, 106 (5.2%)
7 Entre Ríos, 796 (3.5%) Puerto Montt, 1251 (3.7%) NA
8 Tucumán, 624 (2.7%) Tarapacá, 1168 (3.4%) NA
9 Chaco, 622 (2.7%) Rancagua, 521 (1.5%) NA
10 Santiago del Estero, 544 (2.4%) Copiapó, 399 (1.2%) NA
11 Valle de Lerma (AMVL), 435 (1.9%) Valdivia, 396 (1.2%) NA
12 Chubut, 414 (1.8%) Calama, 381 (1.1%) NA
13 San Luis, 376 (1.6%) Arica-Paranicota, 367 (1.1%) NA
14 Mar del Plata, 343 (1.5%) Osorno, 361 (1.1%) NA
15 San Fernando del Valle de Catamarca, 328 (1.4%) Punta Arenas, 348 (1%) NA
16 Posadas-Garupá-Candelaria (AMPGC), 265 (1.1%) Talca, 347 (1%) NA
17 San Salvador de Jujuy, 259 (1.1%) Chillán, 339 (1%) NA
18 Bahía Blanca, 250 (1.1%) Curicó, 339 (1%) NA
19 VIRCH-Valdés, 193 (0.8%) NA NA
20 La Rioja, 183 (0.8%) NA NA

Cities in Argentina

This is a look at the most frequent values of ‘city’ and ‘city_name’ in Argentina:

  • city captures lots of company names and shows low levels of detail within Buenos Aires and other regions.

  • I haven’t seen any company names within ‘city_name’ values. It looks like the best variable to use.

Code
## ya vimos que tiene problemas
# region_count(data = south_cone_df %>% 
#   filter(country_code=="ARG") %>% 
#   inner_join(south_cone_df %>% 
#                group_by(country_code,rm) %>% 
#                summarise(count=n()) %>% 
#                top_n(5,count)),
#   country = "rm",
#   region="city",
#   top_number = 20)

south_cone_df %>%
               filter(country_code=="ARG") %>%
               group_by(country_code,rm) %>%
               summarise(count_vacancies=n()) %>%
               top_n(10,count_vacancies) %>%
  arrange(desc(count_vacancies)) %>%
  inner_join(south_cone_df %>%
  distinct(rm,city_name) %>%
  group_by(rm) %>%
  summarise(count_cities=n()) ) %>%
  ungroup() %>% 
  select(`Región Metropolitana`=rm,
         `Vacantes`=count_vacancies,
         Ciudaddes=count_cities) %>% 
  kableExtra::kable()
country_var_count(data = south_cone_df %>% 
                    inner_join(south_cone_df %>%
                    filter(country_code=="ARG") %>% 
                    group_by(country_code,rm) %>% 
                    summarise(count=n()) %>% 
                    top_n(5,count)), 
                  country = "rm",
                  category = "city_name") %>%
  country_region_table(country = "rm",
                  category = "city_name",
                  top_number = 20)

true

Región Metropolitana Vacantes Ciudaddes
Buenos Aires (GZM) 9313 42
Rosario 2316 25
Córdoba 1810 50
Mendoza 1734 14
Corrientes 1074 9
Región Metropolitana Confluencia 889 11
Entre Ríos 796 14
Tucumán 624 10
Chaco 622 3
Santiago del Estero 544 5
rank Buenos Aires (GZM) Corrientes Córdoba Mendoza Rosario
1 Buenos Aires, 521 (5.6%) Corrientes, 306 (28.5%) Capital, 450 (24.9%) Mendoza, 345 (19.9%) Santa Fe, 394 (17%)
2 Vicente López, 417 (4.5%) Ituzaingó, 276 (25.7%) Córdoba, 209 (11.5%) Mendoza Capital, 298 (17.2%) Rosario, 362 (15.6%)
3 Quilmes, 390 (4.2%) Bella Vista, 189 (17.6%) Río Cuarto, 175 (9.7%) Luján de Cuyo, 217 (12.5%) San Justo, 257 (11.1%)
4 San Isidro, 349 (3.7%) Mercedes, 176 (16.4%) Alta Gracia, 151 (8.3%) Godoy Cruz, 205 (11.8%) Granadero Baigorria, 159 (6.9%)
5 La Matanza, 338 (3.6%) Santo Tomé, 53 (4.9%) Monte Cristo, 89 (4.9%) Maipú, 162 (9.3%) Villa Gobernador Gálvez, 156 (6.7%)
6 Zárate, 336 (3.6%) Goya, 45 (4.2%) Malagueño, 83 (4.6%) San Rafael, 137 (7.9%) Arroyo Seco, 153 (6.6%)
7 Morón, 332 (3.6%) Paso de los Libres, 14 (1.3%) Colón, 73 (4%) San Martín, 111 (6.4%) Alvear, 144 (6.2%)
8 La Plata, 331 (3.6%) Curuzú Cuatiá, 12 (1.1%) Oncativo, 67 (3.7%) Las Heras, 68 (3.9%) Pérez, 140 (6%)
9 Avellaneda, 324 (3.5%) Monte Caseros, 3 (0.3%) Villa Allende, 67 (3.7%) Guaymallén, 60 (3.5%) Álvarez, 136 (5.9%)
10 General San Martín, 322 (3.5%) NA Villa Carlos Paz, 55 (3%) Lavalle, 60 (3.5%) Funes, 103 (4.4%)
11 Ezeiza, 296 (3.2%) NA Jesús María, 47 (2.6%) Tupungato, 29 (1.7%) La Capital, 77 (3.3%)
12 Campana, 294 (3.2%) NA La Calera, 46 (2.5%) Tunuyán, 23 (1.3%) Villa Constitución, 53 (2.3%)
13 Lanús, 279 (3%) NA Cosquín, 25 (1.4%) General Alvear, 10 (0.6%) General Lagos, 46 (2%)
14 Lomas de Zamora, 275 (3%) NA Sinsacate, 23 (1.3%) Malargüe, 9 (0.5%) San Lorenzo, 42 (1.8%)
15 Almirante Brown, 270 (2.9%) NA Mendiolaza, 21 (1.2%) NA Capitán Bermúdez, 29 (1.3%)
16 Isidro Casanova, 267 (2.9%) NA Estación Juárez Celman, 20 (1.1%) NA Pueblo Esther, 17 (0.7%)
17 Tigre, 251 (2.7%) NA Juárez Celman, 18 (1%) NA Fray Luis Beltrán, 16 (0.7%)
18 Esteban Echeverría, 246 (2.6%) NA Unquillo, 16 (0.9%) NA Castellanos, 11 (0.5%)
19 Luján, 245 (2.6%) NA Cruz del Eje, 15 (0.8%) NA Soldini, 6 (0.3%)
20 Escobar, 244 (2.6%) NA Calamuchita, 14 (0.8%) NA Pavón, 4 (0.2%)

Cities in Chile

This is a look at the most frequent values of ‘city’ and ‘city_name’ in Chile:

  • city evidently has low levels of details within Santiago and Valparaiso Regions.

  • city_name has more granularity, but there are lots of cases where it defaults to region name (when it doesn’t guess a city name, imputes the Region name)

Code
## Vimos que city_name es mejor
# region_count(data = south_cone_df %>% 
#   filter(country_code=="CHL") %>% 
#   inner_join(south_cone_df %>% 
#                group_by(country_code,rm) %>% 
#                summarise(count=n()) %>% 
#                top_n(5,count)),
#   country = "rm",
#   region="city",
#   top_number = 20)

south_cone_df %>%
               filter(country_code=="CHL") %>%
               group_by(country_code,rm) %>%
               summarise(count_vacancies=n()) %>%
               top_n(10,count_vacancies) %>%
  arrange(desc(count_vacancies)) %>%
  inner_join(south_cone_df %>%
  distinct(rm,city_name) %>%
  group_by(rm) %>%
  summarise(count_cities=n()) ) %>%
  ungroup() %>% 
  select(`Región Metropolitana`=rm,
         `Vacantes`=count_vacancies,
         Ciudaddes=count_cities) %>% 
  kableExtra::kable()
Región Metropolitana Vacantes Ciudaddes
Santiago 13725 52
Valparaíso 4932 38
Concepción 3774 33
Gran Temuco 2125 32
Coquimbo 1913 15
Antofagasta 1477 8
Puerto Montt 1251 4
Tarapacá 1168 7
Rancagua 521 2
Copiapó 399 1
Code
country_var_count(data = south_cone_df %>% 
                    inner_join(south_cone_df %>%
                    filter(country_code=="CHL") %>% 
                    group_by(country_code,rm) %>% 
                    summarise(count=n()) %>% 
                    top_n(5,count)), 
                  country = "rm",
                  category = "city_name") %>%
  country_region_table(country = "rm",
                  category = "city_name",
                  top_number = 20)
rank Concepción Coquimbo Gran Temuco Santiago Valparaíso
1 Concepción, 470 (12.5%) Coquimbo, 362 (18.9%) Temuco, 404 (19%) Ñuñoa, 497 (3.6%) Viña del Mar, 322 (6.5%)
2 Chiguayante, 435 (11.5%) Ovalle, 293 (15.3%) Angol, 229 (10.8%) Huechuraba, 469 (3.4%) Valparaíso, 297 (6%)
3 Los Ángeles, 330 (8.7%) La Serena, 281 (14.7%) Villarrica, 197 (9.3%) Renca, 441 (3.2%) Los Andes, 290 (5.9%)
4 Talcahuano, 302 (8%) Salamanca, 221 (11.6%) Pucón, 184 (8.7%) Pudahuel, 419 (3.1%) Quilpué, 278 (5.6%)
5 Coronel, 280 (7.4%) Illapel, 197 (10.3%) Victoria, 179 (8.4%) Santiago, 416 (3%) San Antonio, 277 (5.6%)
6 Hualpén, 240 (6.4%) Los Vilos, 137 (7.2%) Lautaro, 150 (7.1%) Quilicura, 405 (3%) La Calera, 275 (5.6%)
7 San Pedro de la Paz, 211 (5.6%) Monte Patria, 107 (5.6%) Nueva Imperial, 98 (4.6%) Lampa, 379 (2.8%) Quillota, 257 (5.2%)
8 Penco, 200 (5.3%) Canela, 103 (5.4%) Pitrufquén, 94 (4.4%) San Bernardo, 377 (2.7%) San Felipe, 250 (5.1%)
9 Tomé, 175 (4.6%) Vicuña, 63 (3.3%) Gorbea, 56 (2.6%) San Joaquín, 364 (2.7%) Concón, 234 (4.7%)
10 Lota, 159 (4.2%) Río Hurtado, 54 (2.8%) Loncoche, 52 (2.4%) Cerrillos, 361 (2.6%) Casablanca, 223 (4.5%)
11 Curanilahue, 132 (3.5%) Andacollo, 35 (1.8%) Padre Las Casas, 52 (2.4%) Colina, 350 (2.6%) Limache, 221 (4.5%)
12 Lebu, 119 (3.2%) Punitaqui, 24 (1.3%) Collipulli, 49 (2.3%) La Florida, 349 (2.5%) Quintero, 219 (4.4%)
13 Cabrero, 103 (2.7%) Combarbalá, 16 (0.8%) Freire, 46 (2.2%) La Reina, 341 (2.5%) Puchuncaví, 187 (3.8%)
14 Cañete, 99 (2.6%) La Higuera, 12 (0.6%) Traiguén, 42 (2%) Recoleta, 341 (2.5%) Cartagena, 158 (3.2%)
15 Arauco, 91 (2.4%) Paihuano, 8 (0.4%) Carahue, 41 (1.9%) Las Condes, 334 (2.4%) Villa Alemana, 155 (3.1%)
16 Nacimiento, 76 (2%) NA Cholchol, 33 (1.6%) Macul, 324 (2.4%) El Quisco, 129 (2.6%)
17 Mulchén, 50 (1.3%) NA Curacautín, 28 (1.3%) Vitacura, 316 (2.3%) Olmué, 124 (2.5%)
18 Santa Juana, 40 (1.1%) NA Renaico, 27 (1.3%) San Miguel, 311 (2.3%) Llay-Llay, 121 (2.5%)
19 Laja, 37 (1%) NA Cunco, 24 (1.1%) Maipú, 304 (2.2%) Algarrobo, 108 (2.2%)
20 Florida, 33 (0.9%) NA Vilcún, 23 (1.1%) Melipilla, 293 (2.1%) Nogales, 84 (1.7%)

Cities in Uruguay

This is a look at the most frequent values of ‘city’ and ‘city_name’ in Uruguay:

  • city doens’t look as bad as in Argentina and Chile.

  • city_name offers more granularity within region “Metropolitana”

Code
## Vimos que city name es mejor
# region_count(data = south_cone_df %>% 
#   filter(country_code=="URY") %>% 
#   inner_join(south_cone_df %>% 
#                group_by(country_code,rm) %>% 
#                summarise(count=n()) %>% 
#                top_n(5,count)),
#   country = "rm",
#   region="city",
#   top_number = 20)

south_cone_df %>%
               filter(country_code=="URY") %>%
               group_by(country_code,rm) %>%
               summarise(count_vacancies=n()) %>%
               top_n(10,count_vacancies) %>%
  arrange(desc(count_vacancies)) %>%
  inner_join(south_cone_df %>%
  distinct(rm,city_name) %>%
  group_by(rm) %>%
  summarise(count_cities=n()) ) %>%
  ungroup() %>% 
  select(`Región Metropolitana`=rm,
         `Vacantes`=count_vacancies,
         Ciudaddes=count_cities) %>% 
  kableExtra::kable()
Región Metropolitana Vacantes Ciudaddes
Metropolitana 1220 23
Este 347 7
Centro 127 4
Sur 117 7
Norte 111 3
Noreste 106 4
Code
country_var_count(data = south_cone_df %>% 
                    inner_join(south_cone_df %>%
                    filter(country_code=="URY") %>% 
                    group_by(country_code,rm) %>% 
                    summarise(count=n()) %>% 
                    top_n(5,count)), 
                  country = "rm",
                  category = "city_name") %>%
  country_region_table(country = "rm",
                  category = "city_name",
                  top_number = 20)
rank Centro Este Metropolitana Norte Sur
1 Florida, 77 (60.6%) Maldonado, 177 (51%) Montevideo, 388 (31.8%) Salto, 54 (48.6%) Colonia, 60 (51.3%)
2 Durazno, 24 (18.9%) Punta del Este, 83 (23.9%) Ciudad de la Costa, 151 (12.4%) Paysandú, 52 (46.8%) Dolores, 13 (11.1%)
3 Flores, 19 (15%) Minas, 22 (6.3%) Canelones, 115 (9.4%) Artigas, 5 (4.5%) Río Negro, 13 (11.1%)
4 Trinidad, 7 (5.5%) San Carlos, 22 (6.3%) Las Piedras, 114 (9.3%) NA Soriano, 11 (9.4%)
5 NA Rocha, 16 (4.6%) Progreso, 98 (8%) NA Colonia del Sacramento, 8 (6.8%)
6 NA Treinta y Tres, 16 (4.6%) 18 de Mayo, 66 (5.4%) NA Fray Bentos, 7 (6%)
7 NA Lavalleja, 11 (3.2%) Paso Carrasco, 59 (4.8%) NA Mercedes, 5 (4.3%)
8 NA NA Barros Blancos, 55 (4.5%) NA NA
9 NA NA Santa Lucía, 34 (2.8%) NA NA
10 NA NA La Paz, 33 (2.7%) NA NA
11 NA NA Pando, 19 (1.6%) NA NA
12 NA NA San José, 16 (1.3%) NA NA
13 NA NA Toledo, 16 (1.3%) NA NA
14 NA NA Joaquín Suárez, 12 (1%) NA NA
15 NA NA Ciudad del Plata, 11 (0.9%) NA NA
16 NA NA Atlántida, 7 (0.6%) NA NA
17 NA NA Salinas, 7 (0.6%) NA NA
18 NA NA Libertad, 6 (0.5%) NA NA
19 NA NA General Líber Seregni, 4 (0.3%) NA NA
20 NA NA Parque del Plata, 3 (0.2%) NA NA
21 NA NA Tala, 3 (0.2%) NA NA

Firms

How many firms are in each country?5900 in Argentina, 9400 in Chile, and 789 in Uruguay.

Code
south_cone_df %>% 
  group_by(country_code) %>% 
  select(firm) %>% 
  skimr::skim() %>% 
  as_tibble() %>% 
  select(-character.min,-character.max,-skim_type) %>% 
  kableExtra::kable()
Table 11: Firms by country
skim_variable country_code n_missing complete_rate character.empty character.n_unique character.whitespace
firm ARG 0 1 10 5983 0
firm CHL 0 1 0 9431 0
firm URY 0 1 0 789 0

Which are the most important firms across countries and regions?

  • Emprego en Argentina, Confidencial en Chile, Gallito Trabajo en Uruguay.

  • HR agencies seem to represent most of the postings (at least this month).

  • There are many cases where they list the place of the vacancy instead of the company. Mostly in Argentina.

Code
country_var_count(data = south_cone_df, 
                  country = "country_code",
                  category = "firm") %>%
  country_region_table(country = "country_code",
                  category = "firm",
                  top_number = 10)

?(caption)

rank ARG CHL URY
1 Emprego, 2447 (10.6%) Confidencial, 2130 (6.2%) Gallito Trabajo, 88 (4.3%)
2 Confidencial, 748 (3.2%) Progestion Chile, 890 (2.6%) ManpowerGroup, 61 (3%)
3 Mendoza, Capital, Mendoza, Argentina, 327 (1.4%) Adecco Chile, 678 (2%) Inclusion Cloud, 48 (2.4%)
4 Adecco Argentina S.A., 261 (1.1%) Manpower Chile, 677 (2%) Superprof, 33 (1.6%)
5 Grupo Gestión, 259 (1.1%) Fundación Integra, 575 (1.7%) Aldeas Infantiles SOS Uruguay, 30 (1.5%)
6 ManpowerGroup, 230 (1%) Cygnus, 569 (1.7%) Advice, 26 (1.3%)
7 Buenos Aires, CABA, Argentina, 225 (1%) ACTIVOS CHILE, 448 (1.3%) Adecco, 25 (1.2%)
8 Tusclases, 214 (0.9%) Eurofirms Chile, 432 (1.3%) confidential, 24 (1.2%)
9 LatinHire, 198 (0.9%) XinerLink, 416 (1.2%) Randstad Uruguay, 22 (1.1%)
10 Wurth Argentina S.a, 132 (0.6%) Walmart Chile, 296 (0.9%) Securitas Uruguay, 22 (1.1%)

Which are the most important firms in the most demanded roles:

Code
top_occupations<-c(
  # "Architecture and Engineering Occupations",
  "Sales and Related Occupations",
  # "Healthcare Practitioners and Technical Occupations",
  "Computer and Mathematical Occupations",
  "Business and Financial Operations Occupations",
  "Office and Administrative Support Occupations"
  )


country_var_count(data = south_cone_df %>% 
                    filter(major_group_title %in% top_occupations) %>% 
                    mutate(major_group_title=str_remove(major_group_title," Occupations")), 
                  country = "major_group_title",
                  category = "firm") %>%
  country_region_table(country = "major_group_title",
                  category = "firm",
                  top_number = 10)
rank Business and Financial Operations Computer and Mathematical Office and Administrative Support Sales and Related
1 Emprego, 251 (5.3%) Emprego, 116 (4.6%) Confidencial, 438 (4.9%) Confidencial, 453 (4.8%)
2 Confidencial, 212 (4.5%) Confidencial, 96 (3.8%) Emprego, 417 (4.6%) Emprego, 440 (4.7%)
3 Progestion Chile, 51 (1.1%) Recruiting from Scratch, 37 (1.5%) Progestion Chile, 124 (1.4%) Progestion Chile, 176 (1.9%)
4 Adecco Chile, 47 (1%) Buenos Aires, CABA, Argentina, 36 (1.4%) Adecco Chile, 112 (1.2%) Adecco Chile, 113 (1.2%)
5 Mendoza, Capital, Mendoza, Argentina, 45 (1%) Mendoza, Capital, Mendoza, Argentina, 34 (1.4%) Fundación Integra, 107 (1.2%) Manpower Chile, 108 (1.1%)
6 Buenos Aires, CABA, Argentina, 40 (0.9%) Fundación Integra, 25 (1%) Cygnus, 101 (1.1%) Cygnus, 89 (0.9%)
7 Manpower Chile, 37 (0.8%) Progestion Chile, 24 (1%) Manpower Chile, 100 (1.1%) Fundación Integra, 83 (0.9%)
8 ACTIVOS CHILE, 34 (0.7%) Manpower Chile, 23 (0.9%) ACTIVOS CHILE, 67 (0.7%) XinerLink, 70 (0.7%)
9 Cygnus, 33 (0.7%) Adecco Chile, 20 (0.8%) Eurofirms Chile, 63 (0.7%) Eurofirms Chile, 56 (0.6%)
10 Adecco Argentina S.A., 28 (0.6%) Eurofirms Chile, 18 (0.7%) XinerLink, 59 (0.7%) ManpowerGroup, 55 (0.6%)
11 Fundación Integra, 28 (0.6%) Inclusion Cloud, 18 (0.7%) NA NA

Which are the most important firms in the most active sectors:

Code
top_sectors<-c("Professional Scientific And Technical Services",
"Finance And Insurance","Retail Trade","Manufacturing")

country_var_count(data = south_cone_df %>% 
                    filter(main_sector %in% top_sectors), 
                  country = "main_sector",
                  category = "firm") %>%
  country_region_table(country = "main_sector",
                  category = "firm",
                  top_number = 10)
rank Finance And Insurance Manufacturing Professional Scientific And Technical Services Retail Trade
1 Emprego, 157 (5.2%) Confidencial, 539 (4.9%) Emprego, 402 (4.7%) Confidencial, 485 (5.3%)
2 Confidencial, 128 (4.3%) Emprego, 483 (4.4%) Confidencial, 367 (4.3%) Emprego, 340 (3.7%)
3 Mendoza, Capital, Mendoza, Argentina, 35 (1.2%) Adecco Chile, 168 (1.5%) Progestion Chile, 97 (1.1%) Progestion Chile, 197 (2.1%)
4 Progestion Chile, 33 (1.1%) Manpower Chile, 162 (1.5%) Buenos Aires, CABA, Argentina, 95 (1.1%) Adecco Chile, 113 (1.2%)
5 Adecco Chile, 31 (1%) Progestion Chile, 149 (1.4%) Adecco Chile, 90 (1.1%) Manpower Chile, 108 (1.2%)
6 Cygnus, 23 (0.8%) ACTIVOS CHILE, 121 (1.1%) Manpower Chile, 83 (1%) Cygnus, 106 (1.2%)
7 Buenos Aires, CABA, Argentina, 22 (0.7%) Cygnus, 110 (1%) Mendoza, Capital, Mendoza, Argentina, 81 (1%) Fundación Integra, 106 (1.2%)
8 Manpower Chile, 22 (0.7%) Fundación Integra, 85 (0.8%) Fundación Integra, 65 (0.8%) Eurofirms Chile, 74 (0.8%)
9 Fundación Integra, 21 (0.7%) XinerLink, 81 (0.7%) Eurofirms Chile, 56 (0.7%) ACTIVOS CHILE, 71 (0.8%)
10 ManpowerGroup, 21 (0.7%) Eurofirms Chile, 77 (0.7%) Recruiting from Scratch, 56 (0.7%) XinerLink, 71 (0.8%)

How concentrated are online vacancies within firms across different regions?

Code
south_cone_df %>% 
  group_by(country_code,firm) %>% 
  summarise(count=n()) %>% 
  group_by(country_code) %>%
  mutate(share=count/sum(count)) %>% 
  arrange(country_code,desc(count)) %>% 
  mutate(cum_share=cumsum(share),
         rank=row_number()) %>% 
  top_n(100,-rank) %>% 
  ggplot(aes(x=rank,y=cum_share,group=country_code,,color=country_code))+
  geom_point()+
  geom_line()+
  scale_color_manual(values=country_colors)+
  geom_label_repel(aes(label=ifelse(rank %in% c(1,2,20, 50, 70, 100),substr(firm,1,20),NA)),size=2)+
  labs(title="Cummulative share of vacancies by in 100 largest firms",
       subtitle="Vacancies in Chile and Argentina are similarly\nconcentrated within the first 100 firms",
       color=NULL,
       y="Share of country vacancies",
       x="Firm ranking  (from largest to smallest)")

Figure 51: ?(caption)

Representativity Assessment (Work in progress)

Which occupations and sectors are over(under)represented in each country? We’ll compare vacancies data to Employment estimates in employment or household surveys to figure it out.

Comparing against ILOSTAT data by occupation

ILOSTAT data contains tables of employment at the ISCO 08 2-digits level. Samples for Chile and Uruguay managed to classify all occupations from the original surveys, while the Argentina’s failed to assign an ISCO 08 code to 16% of employment in the original sample.

Code
## Load ilostat data
ilostat_isco08<-read_csv("data/latest_country_isco08_2d_2022.csv") %>% 
  filter(isco08_2d!="TOTAL")

## load crosswalk
isco08_soc10_crosswalk<-read_csv("raw/catalogs_and_crosswalks/isco_soc.csv") %>% 
  janitor::clean_names()

The table below shows the correlation between employment and online job vacancies distributions.

Code
correlations() %>% 
  gt() %>% 
  fmt_percent('estimate') %>% 
  tab_header(title="Correlation between employment and online vacancies distributions",
             subtitle="Estimates correspond to Pearson's correlation coefficietns")
Correlation between employment and online vacancies distributions
Estimates correspond to Pearson's correlation coefficietns
estimate statistic group
41.28% 3.625998 Total
45.13% 2.261776 ARG
42.50% 2.099956 CHL
41.26% 2.025881 URY

These tables show the detailed distributions behind these correlations.

Regular comparisson

Code
table_comparisson(data = ,country = NULL)
Table 12:

Comparisson of employment and postigns distribution

Comparing employment and online vacancies distributions
All countries
major_group_title Share of employment Share of online vacancies %Vacancies-%Employment Ratio
Architecture and Engineering Occupations 0.94% 5.21% 5.5567533
Sales and Related Occupations 3.79% 15.94% 4.2102031
Healthcare Practitioners and Technical Occupations 1.02% 3.75% 3.6663749
Computer and Mathematical Occupations 1.39% 4.22% 3.0369834
Business and Financial Operations Occupations 4.49% 7.94% 1.7667467
Office and Administrative Support Occupations 8.64% 15.17% 1.7549611
Protective Service Occupations 1.80% 3.05% 1.6953629
Production Occupations 5.81% 8.80% 1.5144336
Installation, Maintenance, and Repair Occupations 2.90% 3.76% 1.2938168
Management Occupations 5.68% 6.59% 1.1601441
Life, Physical, and Social Science Occupations 2.25% 2.33% 1.0335882
Educational Instruction and Library Occupations 3.37% 3.14% 0.9324198
Healthcare Support Occupations 1.76% 1.41% 0.8015756
Transportation and Material Moving Occupations 9.45% 7.25% 0.7678933
Food Preparation and Serving Related Occupations 4.36% 2.70% 0.6198655
Legal Occupations 0.39% 0.22% 0.5593416
Arts, Design, Entertainment, Sports, and Media Occupations 3.12% 1.57% 0.5012117
Personal Care and Service Occupations 2.80% 1.37% 0.4873648
Building and Grounds Cleaning and Maintenance Occupations 5.18% 2.19% 0.4225617
Construction and Extraction Occupations 6.80% 2.55% 0.3756950
Community and Social Service Occupations 3.12% 0.50% 0.1600554
Farming, Fishing, and Forestry Occupations 3.90% 0.35% 0.0904473
Military Specific Occupations 0.10% NA NA
Not ISCO classified 16.94% NA NA
Code
table_comparisson(data = major_occupation_country_emp,country = "ARG")
Comparing employment and online vacancies distributions
For ARG
major_group_title Share of employment Share of online vacancies %Vacancies-%Employment Ratio
Architecture and Engineering Occupations 0.60% 6.23% 10.35614233
Computer and Mathematical Occupations 0.79% 5.06% 6.39520094
Sales and Related Occupations 3.18% 16.36% 5.13873248
Healthcare Practitioners and Technical Occupations 0.82% 3.24% 3.95113856
Business and Financial Operations Occupations 3.64% 11.00% 3.02030193
Installation, Maintenance, and Repair Occupations 2.19% 4.99% 2.27373478
Legal Occupations 0.14% 0.27% 1.91762454
Office and Administrative Support Occupations 9.11% 14.77% 1.62119261
Life, Physical, and Social Science Occupations 1.41% 2.22% 1.56959156
Production Occupations 5.32% 8.11% 1.52348341
Protective Service Occupations 1.49% 2.10% 1.40983336
Educational Instruction and Library Occupations 2.62% 3.52% 1.33973535
Management Occupations 5.34% 6.70% 1.25503176
Arts, Design, Entertainment, Sports, and Media Occupations 2.03% 2.15% 1.06075969
Healthcare Support Occupations 1.53% 1.02% 0.66777677
Personal Care and Service Occupations 2.16% 1.35% 0.62514740
Construction and Extraction Occupations 5.85% 2.99% 0.51043367
Food Preparation and Serving Related Occupations 4.23% 2.14% 0.50510655
Transportation and Material Moving Occupations 8.62% 4.14% 0.47980490
Building and Grounds Cleaning and Maintenance Occupations 3.83% 1.16% 0.30259148
Farming, Fishing, and Forestry Occupations 2.08% 0.30% 0.14349291
Community and Social Service Occupations 2.40% 0.17% 0.07215078
Not ISCO classified 30.58% NA NA
Code
table_comparisson(data = major_occupation_country_emp, country = "CHL")
Comparing employment and online vacancies distributions
For CHL
major_group_title Share of employment Share of online vacancies %Vacancies-%Employment Ratio
Sales and Related Occupations 4.64% 15.63% 3.36787179
Healthcare Practitioners and Technical Occupations 1.29% 4.23% 3.26505462
Architecture and Engineering Occupations 1.40% 4.55% 3.25307830
Office and Administrative Support Occupations 7.22% 15.36% 2.12709996
Protective Service Occupations 2.19% 3.74% 1.70306387
Computer and Mathematical Occupations 2.22% 3.42% 1.54277627
Production Occupations 6.37% 9.39% 1.47418350
Management Occupations 6.27% 6.64% 1.05882171
Business and Financial Operations Occupations 5.67% 5.82% 1.02631875
Transportation and Material Moving Occupations 10.24% 9.46% 0.92350870
Healthcare Support Occupations 2.01% 1.71% 0.85187665
Installation, Maintenance, and Repair Occupations 3.80% 2.91% 0.76539088
Life, Physical, and Social Science Occupations 3.42% 2.44% 0.71363564
Food Preparation and Serving Related Occupations 4.61% 3.08% 0.66863742
Educational Instruction and Library Occupations 4.25% 2.75% 0.64809475
Building and Grounds Cleaning and Maintenance Occupations 6.58% 2.91% 0.44253335
Personal Care and Service Occupations 3.58% 1.33% 0.37077225
Construction and Extraction Occupations 7.95% 2.20% 0.27700706
Arts, Design, Entertainment, Sports, and Media Occupations 4.58% 1.14% 0.24942855
Legal Occupations 0.68% 0.17% 0.24581679
Community and Social Service Occupations 4.02% 0.73% 0.18198460
Farming, Fishing, and Forestry Occupations 6.02% 0.39% 0.06469844
Military Specific Occupations 0.19% NA NA
Not ISCO classified 0.81% NA NA
Code
table_comparisson(data = major_occupation_country_emp,country = "URY")
Comparing employment and online vacancies distributions
For URY
major_group_title Share of employment Share of online vacancies %Vacancies-%Employment Ratio
Computer and Mathematical Occupations 1.41% 7.89% 5.59560888
Architecture and Engineering Occupations 0.98% 4.64% 4.70884645
Sales and Related Occupations 3.75% 16.57% 4.42004178
Business and Financial Operations Occupations 4.56% 8.73% 1.91473729
Healthcare Practitioners and Technical Occupations 1.09% 1.48% 1.36309776
Office and Administrative Support Occupations 12.87% 16.42% 1.27614057
Educational Instruction and Library Occupations 4.31% 5.47% 1.27073174
Installation, Maintenance, and Repair Occupations 3.47% 4.09% 1.17921679
Protective Service Occupations 2.06% 2.42% 1.17537027
Production Occupations 6.47% 6.66% 1.02831731
Management Occupations 4.99% 4.34% 0.86954934
Life, Physical, and Social Science Occupations 2.32% 1.73% 0.74346507
Food Preparation and Serving Related Occupations 3.98% 2.76% 0.69387131
Personal Care and Service Occupations 3.56% 2.27% 0.63723797
Legal Occupations 0.66% 0.39% 0.59995729
Arts, Design, Entertainment, Sports, and Media Occupations 3.57% 2.02% 0.56672538
Transportation and Material Moving Occupations 11.42% 5.52% 0.48340585
Construction and Extraction Occupations 7.75% 3.55% 0.45799856
Healthcare Support Occupations 2.13% 0.69% 0.32365365
Building and Grounds Cleaning and Maintenance Occupations 7.92% 1.73% 0.21780797
Community and Social Service Occupations 3.70% 0.30% 0.07990463
Farming, Fishing, and Forestry Occupations 6.25% 0.35% 0.05519691
Military Specific Occupations 0.42% NA NA
Not ISCO classified 0.36% NA NA

These charts give you a straigthforward view:

Code
major_occupation_emp%>% 
  ggplot(aes(x=employment_share,y=group_in_country_share))+
  geom_point()+
  ggrepel::geom_label_repel(aes(label=ifelse(rank(gap)>16 | rank(gap)<4, str_remove(major_group_title," Occupations"),NA)),
                            size=3)+
  coord_fixed()+
  geom_smooth(method="lm",se = FALSE, aes(linetype="OLS"))+
  geom_abline(aes(slope=1,intercept=0),alpha=0.2)+
  labs(x = "Share of employment (ILOSTATS)",
       y = "Share of Online Vacancies (IDB)",
       linetype=NULL)

Code
major_occupation_country_emp%>% 
  ggplot(aes(x=employment_share,y=group_in_country_share))+
  geom_abline(aes(slope=1,intercept=0),alpha=0.2)+
  geom_point(aes(color=country_code))+
  facet_wrap(vars(country_code))+
  ggrepel::geom_label_repel(aes(label=ifelse(rank(gap)>55 | rank(gap)<8, str_remove(major_group_title," Occupations"),NA)),
                            size=3)+
  scale_color_manual(values=country_colors)+
  labs(x = "Share of employment (ILOSTATS)",
       y = "Share of Online Vacancies (IDB)",
       color="Country",
       linetype=NULL)

These charts shows the change in rankings from one database to the other:

Rank comparisson

Code
rank_comparisson(data=major_occupation_emp, 
                             category="major_group_title", 
                             country=NULL)

Code
rank_comparisson(data=major_occupation_country_emp, 
                             category="major_group_title", 
                             country="ARG")

Code
rank_comparisson(data=major_occupation_country_emp, 
                             category="major_group_title", 
                             country="CHL")

Code
rank_comparisson(data=major_occupation_country_emp, 
                             category="major_group_title", 
                             country="URY")

Comparing against ILOSTAT data by sector

Code
sector_emp<-latest_country_EC2d  %>% 
  left_join(naics_isic_2d_fixed ,by=c("EC2d"="isic_code")) %>% 
  # there are many naics for the same isic. we avoid double counting by splitting employment
  group_by(country_code,EC2d) %>% 
    mutate(n_naics_outputs=n()) %>% 
  # we calculate employment by country and naics.
  group_by(naics_2d, naics_2d_desc) %>% 
  summarise(employment=sum(obs_value/n_naics_outputs,na.rm = T)) %>%
  # we calculate employment share
  ungroup() %>% 
    mutate(employment_share=employment/sum(employment)) %>% 
  # format variable in a specific way in order to match bid data
  mutate(main_sector=str_remove_all(naics_2d_desc,","),
         main_sector=str_remove_all(main_sector,"[()]"),
         main_sector=str_to_title(main_sector)) 

sector_country_emp<-latest_country_EC2d  %>% 
  left_join(naics_isic_2d_fixed ,by=c("EC2d"="isic_code")) %>% 
  # there are many naics for the same isic. we avoid double counting by splitting employment
  group_by(country_code,EC2d) %>% 
    mutate(n_naics_outputs=n()) %>% 
  # we calculate employment by country and naics.
  group_by(country_code,naics_2d, naics_2d_desc) %>% 
    summarise(employment=sum(obs_value/n_naics_outputs,na.rm = T)) %>%
  # we calculate employment share
  group_by(country_code) %>% 
    mutate(employment_share=employment/sum(employment)) %>% 
    ungroup() %>% 
  # format variable in a specific way in order to match bid data
  mutate(main_sector=str_remove_all(naics_2d_desc,","),
         main_sector=str_remove_all(main_sector,"[()]"),
         main_sector=str_to_title(main_sector))

c('total in ILOSTAT'=sum(latest_country_EC2d$obs_value,na.rm = T),
  'total after crosswalk'=sum(sector_emp$employment),
  'total after crosswalk (country)'=sum(sector_country_emp$employment))


sector_country_emp<-sector_country_emp %>% 
  left_join(country_var_count(data = south_cone_df,
                  country = 'country_code',
                  category = 'main_sector'), by=c("country_code","main_sector")) %>% 
  mutate(gap=(group_in_country_share/employment_share))

sector_emp<-sector_country_emp%>% 
    group_by(naics_2d,main_sector) %>% 
    summarise(employment=sum(employment,na.rm = T),
              count=sum(count)) %>% 
    ungroup() %>% 
    mutate(employment_share=employment/sum(employment,na.rm = T),
           group_in_country_share=count/sum(count,na.rm = T),
           gap=(group_in_country_share/employment_share))

The table below shows the correlation between employment and online job vacancies distributions.

Code
correlations(data = sector_country_emp) %>% 
  gt() %>% 
  fmt_percent('estimate') %>% 
  tab_header(title="Correlation between employment and online vacancies distributions",
             subtitle="Estimates correspond to Pearson's correlation coefficietns")
Correlation between employment and online vacancies distributions
Estimates correspond to Pearson's correlation coefficietns
estimate statistic group
48.01% 4.131854 Total
33.71% 1.519098 ARG
65.31% 3.659099 CHL
43.44% 1.988377 URY

Regular comparisson

Code
table_comparisson(data = sector_emp,category = "main_sector",country = NULL)
Table 13:

Comparisson of employment and postigns distribution

Comparing employment and online vacancies distributions
All countries
main_sector Share of employment Share of online vacancies %Vacancies-%Employment Ratio
Real Estate And Rental And Leasing 0.44% 1.55% 3.52315105
Professional Scientific And Technical Services 4.09% 14.38% 3.51429835
Finance And Insurance 1.77% 5.07% 2.86879512
Manufacturing 8.18% 18.48% 2.25771694
Retail Trade 10.38% 15.54% 1.49668407
Educational Services 8.31% 11.41% 1.37212519
Health Care And Social Assistance 6.86% 8.16% 1.18872261
Accommodation And Food Services 3.83% 4.42% 1.15255035
Government 3.80% 3.87% 1.01800309
Administrative And Support Services 7.06% 5.89% 0.83453484
Transportation And Warehousing 3.12% 2.26% 0.72347901
Information 1.56% 1.02% 0.65294551
Construction 7.50% 3.04% 0.40564957
Utilities 1.68% 0.43% 0.25485583
Mining Quarrying And Oil And Gas Extraction 1.33% 0.31% 0.23651480
Other Services Except Public Administration 10.96% 2.11% 0.19240156
Arts Entertainment And Recreation 2.29% 0.42% 0.18460085
Wholesale Trade 7.03% 1.26% 0.17944930
Agriculture Forestry Fishing And Hunting 4.82% 0.38% 0.07952526
Management Of Companies And Enterprises 0.69% NA NA
NA 4.29% NA NA
Code
table_comparisson(data = sector_country_emp,category = "main_sector",country = "ARG")
Comparing employment and online vacancies distributions
For ARG
main_sector Share of employment Share of online vacancies %Vacancies-%Employment Ratio
Professional Scientific And Technical Services 3.62% 16.84% 4.65414729
Finance And Insurance 2.00% 6.52% 3.25211316
Real Estate And Rental And Leasing 0.38% 1.15% 3.00939420
Retail Trade 5.16% 13.64% 2.64016873
Manufacturing 9.44% 18.44% 1.95395607
Educational Services 7.97% 12.43% 1.55954184
Health Care And Social Assistance 6.78% 6.79% 1.00171185
Government 4.70% 4.30% 0.91476051
Accommodation And Food Services 3.49% 3.17% 0.91001921
Transportation And Warehousing 3.00% 2.30% 0.76785696
Information 2.11% 1.25% 0.59321785
Mining Quarrying And Oil And Gas Extraction 0.46% 0.26% 0.56951984
Administrative And Support Services 7.76% 4.36% 0.56148507
Construction 8.59% 3.17% 0.36856659
Other Services Except Public Administration 12.43% 2.85% 0.22950854
Agriculture Forestry Fishing And Hunting 2.41% 0.36% 0.15140259
Arts Entertainment And Recreation 2.42% 0.34% 0.14181843
Utilities 1.55% 0.21% 0.13673441
Wholesale Trade 11.96% 1.61% 0.13452701
Management Of Companies And Enterprises 0.68% 0.01% 0.01277311
NA 3.10% NA NA
Code
slides_cat_var_chart(agg_data = sector_country_emp %>%
                       filter( !is.na(main_sector) & country_code=="ARG"),
                     category = "main_sector",
                     country="country_code")+
  scale_fill_manual(values = country_colors,guide='none')+ 
  coord_flip()+
  labs( y="Porcentaje de vacantes",
        fill=NULL,
        x=NULL)

Figure 52: ?(caption)
Code
table_comparisson(data = sector_country_emp,category = "main_sector", country = "CHL")
Comparing employment and online vacancies distributions
For CHL
main_sector Share of employment Share of online vacancies %Vacancies-%Employment Ratio
Real Estate And Rental And Leasing 0.52% 1.83% 3.499289010
Manufacturing 6.38% 18.76% 2.939475187
Professional Scientific And Technical Services 4.57% 12.45% 2.723058494
Finance And Insurance 1.52% 4.09% 2.689493710
Wholesale Trade 0.51% 1.05% 2.050729898
Government 2.50% 3.57% 1.427876157
Health Care And Social Assistance 6.64% 9.18% 1.383287005
Accommodation And Food Services 4.38% 5.17% 1.181238044
Administrative And Support Services 6.00% 7.03% 1.172018295
Educational Services 9.06% 10.54% 1.163625581
Retail Trade 17.47% 16.90% 0.966934217
Information 0.90% 0.85% 0.935105262
Transportation And Warehousing 3.24% 2.22% 0.686318979
Construction 6.34% 2.93% 0.461564532
Utilities 1.85% 0.58% 0.312499935
Arts Entertainment And Recreation 2.08% 0.49% 0.235997633
Other Services Except Public Administration 8.83% 1.60% 0.181272953
Mining Quarrying And Oil And Gas Extraction 2.71% 0.36% 0.134022207
Agriculture Forestry Fishing And Hunting 7.26% 0.40% 0.054859893
Management Of Companies And Enterprises 0.67% 0.01% 0.008738326
NA 6.56% NA NA
Code
slides_cat_var_chart(agg_data = sector_country_emp %>%
                       filter( !is.na(main_sector) & country_code=="CHL"),
                     category = "main_sector",
                     country="country_code")+
  scale_fill_manual(values = country_colors,guide='none')+ 
  coord_flip()+
  labs( y="Porcentaje de vacantes",
        fill=NULL,
        x=NULL)

Figure 53: ?(caption)
Code
table_comparisson(data = sector_country_emp,category="main_sector",country = "URY")
Comparing employment and online vacancies distributions
For URY
main_sector Share of employment Share of online vacancies %Vacancies-%Employment Ratio
Finance And Insurance 1.31% 5.03% 3.85118969
Professional Scientific And Technical Services 5.07% 18.98% 3.74763739
Real Estate And Rental And Leasing 0.44% 1.43% 3.21842519
Educational Services 7.08% 14.40% 2.03433042
Manufacturing 8.24% 14.05% 1.70642774
Accommodation And Food Services 3.56% 5.82% 1.63607161
Information 0.96% 1.33% 1.39199897
Retail Trade 12.14% 14.35% 1.18195545
Government 3.94% 4.14% 1.05027185
Health Care And Social Assistance 8.54% 6.41% 0.75054988
Transportation And Warehousing 3.41% 2.37% 0.69317417
Construction 5.53% 3.55% 0.64209629
Administrative And Support Services 7.38% 4.14% 0.56155648
Utilities 1.78% 0.39% 0.22162829
Other Services Except Public Administration 11.05% 2.17% 0.19633118
Wholesale Trade 4.44% 0.79% 0.17769403
Mining Quarrying And Oil And Gas Extraction 0.64% 0.10% 0.15290897
Arts Entertainment And Recreation 2.49% 0.20% 0.07929429
Agriculture Forestry Fishing And Hunting 9.75% 0.35% 0.03540332
Management Of Companies And Enterprises 0.87% NA NA
NA 1.39% NA NA
Code
slides_cat_var_chart(agg_data = sector_country_emp %>%
                       filter( !is.na(main_sector) & country_code=="URY"),
                     category = "main_sector",
                     country="country_code")+
  scale_fill_manual(values = country_colors,guide='none')+ 
  coord_flip()+
  labs( y="Porcentaje de vacantes",
        fill=NULL,
        x=NULL)

Figure 54: ?(caption)
Code
sector_emp%>% 
  ggplot(aes(x=employment_share,y=group_in_country_share))+
  geom_point()+
  ggrepel::geom_label_repel(aes(label=ifelse(rank(gap)>16 | rank(gap)<4, main_sector,NA)),
                            size=3)+
  geom_smooth(method="lm",se = FALSE, aes(linetype="OLS"))+
  geom_abline(aes(slope=1,intercept=0),alpha=0.2)+
  labs(x = "Share of employment (ILOSTATS)",
       y = "Share of Online Vacancies (IDB)",
       linetype=NULL)

Code
sector_country_emp%>% 
  ggplot(aes(x=employment_share,y=group_in_country_share))+
  geom_abline(aes(slope=1,intercept=0),alpha=0.2)+
  geom_point(aes(color=country_code))+
  facet_wrap(vars(country_code))+
  ggrepel::geom_label_repel(aes(label=ifelse(rank(gap)>55 | rank(gap)<8, main_sector,NA)),
                            size=3)+
  scale_color_manual(values=country_colors)+
  labs(x = "Share of employment (ILOSTATS)",
       y = "Share of Online Vacancies (IDB)",
       color="Country",
       linetype=NULL)

These charts shows the change in rankings from one database to the other:

Rank comparisson

Code
rank_comparisson(data=sector_emp, 
                             category="main_sector", 
                             country=NULL)

Code
rank_comparisson(data=sector_country_emp, 
                             category="main_sector", 
                             country="ARG")

Code
rank_comparisson(data=sector_country_emp, 
                             category="main_sector", 
                             country="CHL")

Code
rank_comparisson(data=sector_country_emp, 
                             category="main_sector", 
                             country="URY")

Appendix

Slides

Code
## Pongamos labels en español.
library(readxl)
sector_names<-readxl::read_excel("data/traducciones.xlsx", sheet=1)%>%
  janitor::clean_names()%>%
  rename(sector=1, sector_es=2)
abilities_names<-readxl::read_excel("data/traducciones.xlsx", sheet=2)%>%
  janitor::clean_names()%>%
  rename(abilities=1, habilidades=2)
subabilities_names<-readxl::read_excel("data/traducciones.xlsx", sheet=3)%>%
  janitor::clean_names()%>%
  rename(subabilities=1, subhabilidades=2)
major_group_names<-readxl::read_excel("data/traducciones.xlsx", sheet=4)%>%
  janitor::clean_names()%>%
  rename(major_group_title=1, major_group_title_es=2)
sector_rubro<-readxl::read_excel("data/traducciones.xlsx", sheet=5)%>%
  janitor::clean_names()

Sectores

Número de vacantes ponderado por la importancia de cada sector

Code
rama_df %>% 
  distinct(sector,group_share,group_vacancies) %>% 
  filter(sector %in% sectors_focus)%>%
  left_join(sector_names)%>%
  mutate(sector=sector_es)%>%
  ggplot(aes(x=reorder(sector,
                       group_share),
             y=group_share))+
  geom_col(fill="gray50")+
  geom_text(aes(label=paste0(round(group_vacancies)," (",round(group_share,2)*100,"%)")), 
            size=3,nudge_y = 0.02)+
  coord_flip()+
  scale_y_continuous(labels=scales::percent_format(),limits = c(0,0.2))+
  labs(x=NULL,
       y="Porcentaje de Vacantes")

Code
rama_df %>% 
  filter(country_code=="ARG") %>% 
  left_join(sector_names)%>%
  mutate(sector=sector_es)%>%
  ggplot(aes(x=reorder(sector,
                       group_in_country_share),
             y=group_in_country_share))+
  geom_col(aes(fill=country_code))+
  geom_text(aes(label=paste(round(group_in_country_share,2)*100,"%")), 
            size=3,nudge_y = 0.02)+
  scale_fill_manual(values=country_colors, guide="none")+
  scale_y_continuous(labels=scales::percent_format())+
  coord_flip()+
  # theme(axis.text.x = element_text(angle = 65, hjust=1))+
  labs(title = NULL,
       fill=NULL,
       shape=NULL,
       x=NULL,
       y="Porcentaje de vacantes")

rama_df %>% 
  filter(country_code=="CHL") %>% 
  left_join(sector_names)%>%
  mutate(sector=sector_es)%>%
  ggplot(aes(x=reorder(sector,
                       group_in_country_share),
             y=group_in_country_share))+
  geom_col(aes(fill=country_code))+
  geom_text(aes(label=paste(round(group_in_country_share,2)*100,"%")), 
            size=3,nudge_y = 0.02)+
  scale_fill_manual(values=country_colors, guide="none")+
  scale_y_continuous(labels=scales::percent_format())+
  coord_flip()+
  # theme(axis.text.x = element_text(angle = 65, hjust=1))+
  labs(title = NULL,
       fill=NULL,
       shape=NULL,
       x=NULL,
       y="Porcentaje de vacantes")

rama_df %>% 
  filter(country_code=="URY") %>% 
  left_join(sector_names)%>%
  mutate(sector=sector_es)%>%
  ggplot(aes(x=reorder(sector,
                       group_in_country_share),
             y=group_in_country_share))+
  geom_col(aes(fill=country_code))+
  geom_text(aes(label=paste(round(group_in_country_share,2)*100,"%")), 
            size=3,nudge_y = 0.02)+
  scale_fill_manual(values=country_colors, guide="none")+
  scale_y_continuous(labels=scales::percent_format())+
  coord_flip()+
  # theme(axis.text.x = element_text(angle = 65, hjust=1))+
  labs(title = NULL,
       fill=NULL,
       shape=NULL,
       x=NULL,
       y="Porcentaje de vacantes")

Figure 55: ?(caption)

Figure 56: ?(caption)

Figure 57: ?(caption)

Porcentaje de vacantes donde cada sector es la más importante.

Code
main_sector_df%>% 
  filter(main_sector%in%sectors_focus)%>%
  left_join(sector_names, by=c("main_sector"="sector"))%>%
  mutate(main_sector=sector_es)%>%
  ggplot(aes(x=reorder(main_sector,group_share),y=group_share))+
  geom_col(fill="gray50")+
geom_text(aes(label=paste0(round(group_vacancies)," (",round(group_share,2)*100,"%)")), 
            size=3,nudge_y = 0.02)+
  scale_y_continuous(labels=scales::percent_format(), limits = c(0,.2))+
  coord_flip()+
  labs(y="Porcentaje de Vacantes",
       x=NULL)

Ocupaciones

Top Occupational groups

Code
major_group_by_cty %>% 
  filter(country_code=="ARG") %>% 
  left_join(major_group_names)%>%
  mutate(major_group_title=major_group_title_es)%>%
  top_n(10,group_in_country_share) %>% 
  ggplot(aes(x=reorder(str_remove(major_group_title,"Ocupaciones de"),
                       group_in_country_share),
             y=group_in_country_share))+
  geom_col(aes(fill=country_code))+
  geom_text(aes(label=paste(round(group_in_country_share,2)*100,"%")), 
            size=3,nudge_y = 0.02)+
  scale_fill_manual(values=country_colors, guide='none')+
  scale_y_continuous(labels = scales::percent_format())+
  coord_flip()+
  # theme(axis.text.x = element_text(angle = 65, hjust=1))+
  labs(title = NULL,
       fill=NULL,
       shape=NULL,
       x=NULL,
       y="Porcentaje de vacantes")


major_group_by_cty %>% 
  filter(country_code=="CHL") %>% 
  left_join(major_group_names)%>%
  mutate(major_group_title=major_group_title_es)%>%
  top_n(10,group_in_country_share) %>% 
  ggplot(aes(x=reorder(str_remove(major_group_title,"Ocupaciones de"),
                       group_in_country_share),
             y=group_in_country_share))+
  geom_col(aes(fill=country_code))+
  geom_text(aes(label=paste(round(group_in_country_share,2)*100,"%")), 
            size=3,nudge_y = 0.02)+
  scale_fill_manual(values=country_colors, guide='none')+
  scale_y_continuous(labels = scales::percent_format())+
  coord_flip()+
  # theme(axis.text.x = element_text(angle = 65, hjust=1))+
  labs(title = NULL,
       fill=NULL,
       shape=NULL,
       x=NULL,
       y="Porcentaje de vacantes")

major_group_by_cty %>% 
  filter(country_code=="URY") %>% 
  left_join(major_group_names)%>%
  mutate(major_group_title=major_group_title_es)%>%
  top_n(10,group_in_country_share) %>% 
  ggplot(aes(x=reorder(str_remove(major_group_title,"Ocupaciones de"),
                       group_in_country_share),
             y=group_in_country_share))+
  geom_col(aes(fill=country_code))+
  geom_text(aes(label=paste(round(group_in_country_share,2)*100,"%")), 
            size=3,nudge_y = 0.02)+
  scale_fill_manual(values=country_colors, guide='none')+
  scale_y_continuous(labels = scales::percent_format())+
  coord_flip()+
  # theme(axis.text.x = element_text(angle = 65, hjust=1))+
  labs(title = NULL,
       fill=NULL,
       shape=NULL,
       x=NULL,
       y="Porcentaje de vacantes")

Figure 58: ?(caption)

Figure 59: ?(caption)

Figure 60: ?(caption)

Top occupations

Code
## frecuency table of occupations
onet_job_df<-south_cone_df %>%
    group_by(onet_job)%>%
    summarise(group_vacancies=n())%>%
    ungroup()%>%
    mutate(group_share=group_vacancies/sum(group_vacancies))%>%
    arrange(desc(group_vacancies)) 

## frequency table of occupations, by country
onet_job_by_cty<-south_cone_df %>%
  group_by(country_code,onet_job)%>%
  summarise(count=n() ) %>%
  ungroup() %>% 
  left_join(onet_job_df %>%
              select(onet_job,group_vacancies))  %>% 
  left_join(country_code_df %>%
              select(country_code,country_vacancies))  %>%
  mutate(group_in_country_share=count/country_vacancies,
         country_in_group_share=count/group_vacancies) %>% 
  ungroup()


onet_job_by_cty %>% 
  filter(country_code=="ARG") %>% 
  top_n(10,group_in_country_share) %>% 
  ggplot(aes(x=reorder(onet_job,
                       group_in_country_share),
             y=group_in_country_share))+
  geom_col(aes(fill=country_code))+
  geom_text(aes(label=case_when(count<50~paste0(round(group_in_country_share,2)*100,"%"),
                                TRUE~paste0(count," (",round(group_in_country_share,2)*100,"%)")),
                y=group_in_country_share/2),
            color="white",
            fontface="bold",
            size=3)+
  scale_fill_manual(values=country_colors, guide='none')+
  scale_y_continuous(labels = scales::percent_format())+
  coord_flip()+
  # theme(axis.text.x = element_text(angle = 65, hjust=1))+
  labs(title = NULL,
       fill=NULL,
       shape=NULL,
       x=NULL,
       y="Porcentaje de vacantes")

Code
onet_job_by_cty %>% 
  filter(country_code=="CHL") %>% 
  top_n(10,group_in_country_share) %>% 
  ggplot(aes(x=reorder(onet_job,
                       group_in_country_share),
             y=group_in_country_share))+
  geom_col(aes(fill=country_code))+
  geom_text(aes(label=case_when(count<50~paste0(round(group_in_country_share,2)*100,"%"),
                                TRUE~paste0(count," (",round(group_in_country_share,2)*100,"%)")),
                y=group_in_country_share/2),
            color="white",
            fontface="bold",
            size=3)+
  scale_fill_manual(values=country_colors, guide='none')+
  scale_y_continuous(labels = scales::percent_format())+
  coord_flip()+
  # theme(axis.text.x = element_text(angle = 65, hjust=1))+
  labs(title = NULL,
       fill=NULL,
       shape=NULL,
       x=NULL,
       y="Porcentaje de vacantes")

Code
onet_job_by_cty %>% 
  filter(country_code=="URY") %>% 
  top_n(10,group_in_country_share) %>% 
  ggplot(aes(x=reorder(onet_job,
                       group_in_country_share),
             y=group_in_country_share))+
  geom_col(aes(fill=country_code))+
  geom_text(aes(label=case_when(count<50~paste0(round(group_in_country_share,2)*100,"%"),
                                TRUE~paste0(count," (",round(group_in_country_share,2)*100,"%)")),
                y=group_in_country_share/2),
            color="white",
            fontface="bold",
            size=3)+
  scale_fill_manual(values=country_colors, guide='none')+
  scale_y_continuous(labels = scales::percent_format())+
  coord_flip()+
  # theme(axis.text.x = element_text(angle = 65, hjust=1))+
  labs(title = NULL,
       fill=NULL,
       shape=NULL,
       x=NULL,
       y="Porcentaje de vacantes")

Habilidades

Número de vacantes ponderado por la importancia de cada habilidad

Code
Blue_sensorial="#64a3c9"
Orange_cognitivo="#fe9b2d"
Blue_psicomotor="#5c7f97"
Yellow_fisico="#ffb615"

abilities_colors=c("Habilidades Cognitivas"=Orange_cognitivo,
                   "Habilidades Físicas"=Yellow_fisico,
                   "Habilidades Psicomotoras"=Blue_psicomotor,
                   "Habilidades Sensoriales"=Blue_sensorial)
# Hole size
hsize <- 1

country_abilities_df %>% 
  filter(country_code=="ARG") %>%
  left_join(abilities_names)%>%
  mutate(abilities=habilidades)%>%
  mutate(x=hsize) %>% 
  ggplot(aes(fill=abilities, y=group_in_country_share_sum, x=hsize)) +
  geom_col(colour="grey30") +
  geom_text(color="black",aes(label = paste(round(group_in_country_share_sum,2)*100,"%")),
              position = position_stack(vjust = 0.5)) +
  coord_polar(theta="y") +
  scale_fill_manual(values=abilities_colors)+
     xlim(c(0.2, hsize + 0.5))+
     theme_void()+
     labs(fill=NULL)

country_abilities_df %>% 
  filter(country_code=="CHL") %>%
  left_join(abilities_names)%>%
  mutate(abilities=habilidades)%>%
  mutate(x=hsize) %>% 
  ggplot(aes(fill=abilities, y=group_in_country_share_sum, x=hsize)) +
  geom_col(colour="grey30") +
  geom_text(color="black",aes(label = paste(round(group_in_country_share_sum,2)*100,"%")),
              position = position_stack(vjust = 0.5)) +
  coord_polar(theta="y") +
  scale_fill_manual(values=abilities_colors)+
     xlim(c(0.2, hsize + 0.5))+
     theme_void()+
     labs(fill=NULL)

country_abilities_df %>% 
  filter(country_code=="URY") %>%
  left_join(abilities_names)%>%
  mutate(abilities=habilidades)%>%
  mutate(x=hsize) %>% 
  ggplot(aes(fill=abilities, y=group_in_country_share_sum, x=hsize)) +
  geom_col(colour="grey30") +
  geom_text(color="black",aes(label = paste(round(group_in_country_share_sum,2)*100,"%")),
              position = position_stack(vjust = 0.5)) +
  coord_polar(theta="y") +
  scale_fill_manual(values=abilities_colors)+
     xlim(c(0.2, hsize + 0.5))+
     theme_void()+
     labs(fill=NULL)

Figure 61: ?(caption)

Figure 62: ?(caption)

Figure 63: ?(caption)

Porcentaje de vacantes que requiere cada habilidad con probabilidad mayor a cero.

Code
country_abilities_df %>% 
  filter(country_code=="ARG") %>% 
  left_join(abilities_names)%>%
  mutate(abilities=habilidades)%>%
  ggplot(aes(x=reorder(abilities,
                       -group_in_country_share),
             y=group_in_country_share))+
  geom_col(aes(fill=country_code))+
  geom_text(aes(label=paste(round(group_in_country_share,2)*100,"%")), 
            size=3,nudge_y = 0.02)+
  scale_fill_manual(values=country_colors, guide='none')+
  scale_y_continuous(labels = scales::percent_format())+
  theme(axis.text.x = element_text(
    size=10
    ))+
  labs(title = NULL,
       fill=NULL,
       shape=NULL,
       x=NULL,
       y="Porcentaje de vacantes")


country_abilities_df %>% 
  filter(country_code=="CHL") %>% 
   left_join(abilities_names)%>%
  mutate(abilities=habilidades)%>%
  ggplot(aes(x=reorder(abilities,
                       -group_in_country_share),
             y=group_in_country_share))+
  geom_col(aes(fill=country_code))+
  geom_text(aes(label=paste(round(group_in_country_share,2)*100,"%")), 
            size=3,nudge_y = 0.02)+
  scale_fill_manual(values=country_colors, guide='none')+
  scale_y_continuous(labels = scales::percent_format())+
  theme(axis.text.x = element_text(
    size=10
    ))+
  labs(title = NULL,
       fill=NULL,
       shape=NULL,
       x=NULL,
       y="Porcentaje de vacantes")

country_abilities_df %>% 
  filter(country_code=="URY") %>% 
   left_join(abilities_names)%>%
  mutate(abilities=habilidades)%>%
  ggplot(aes(x=reorder(abilities,
                       -group_in_country_share),
             y=group_in_country_share))+
  geom_col(aes(fill=country_code))+
  geom_text(aes(label=paste(round(group_in_country_share,2)*100,"%")), 
            size=3,nudge_y = 0.02)+
  scale_fill_manual(values=country_colors, guide='none')+
  scale_y_continuous(labels = scales::percent_format())+
  theme(axis.text.x = element_text(
    size=10
    ))+
  labs(title = NULL,
       fill=NULL,
       shape=NULL,
       x=NULL,
       y="Porcentaje de vacantes")

Figure 64: ?(caption)

Figure 65: ?(caption)

Figure 66: ?(caption)

Subhabilidades

Top 5 subhabilidades dentro de cada pais

Code
country_var_count_groups(data=south_cone_df%>%
                           filter(country_code=="ARG"),
                                       country = NULL,
                         variable_names=subabilities, 
                         name_of_categories="subabilities") %>% 
  mutate(country_code="ARG")%>%
   mutate(subabilities=str_replace_all(subabilities,"_"," "),
         subabilities=str_to_title(subabilities),
         subabilities=str_replace_all(subabilities," Of "," of ")) %>% 
  left_join(subabilities_names)%>%
  mutate(subabilities=subhabilidades)%>%
  top_n(10,group_share_sum) %>% 
  ggplot(aes(x=reorder(subabilities,
                       group_share_sum),
             y=group_share_sum))+
  geom_col(aes(fill=country_code))+
  geom_text(aes(label=case_when(group_vacancies_sum <50~
                                  paste0(round(group_share_sum,
                                                      2)*100,"%"),
                                TRUE~paste0(round(group_vacancies_sum )," (",
                                      round(group_share_sum,2)*100,"%)")),
                y=group_share_sum/2),
            color="white",
            fontface="bold",
            size=3)+
  scale_fill_manual(values=country_colors, guide='none')+
  scale_y_continuous(labels = scales::percent_format())+
  coord_flip()+
  labs(title = NULL,
       fill=NULL,
       shape=NULL,
       x=NULL,
       y="Porcentaje de vacantes")

Code
country_var_count_groups(data=south_cone_df%>%
                           filter(country_code=="CHL"),
                                       country = NULL,
                         variable_names=subabilities, 
                         name_of_categories="subabilities") %>% 
  mutate(country_code="CHL")%>%
   mutate(subabilities=str_replace_all(subabilities,"_"," "),
         subabilities=str_to_title(subabilities),
         subabilities=str_replace_all(subabilities," Of "," of ")) %>% 
  left_join(subabilities_names)%>%
  mutate(subabilities=subhabilidades)%>%
  top_n(10,group_share_sum) %>% 
  ggplot(aes(x=reorder(subabilities,
                       group_share_sum),
             y=group_share_sum))+
  geom_col(aes(fill=country_code))+
  geom_text(aes(label=case_when(group_vacancies_sum <50~
                                  paste0(round(group_share_sum,
                                                      2)*100,"%"),
                                TRUE~paste0(round(group_vacancies_sum )," (",
                                      round(group_share_sum,2)*100,"%)")),
                y=group_share_sum/2),
            color="white",
            fontface="bold",
            size=3)+
  scale_fill_manual(values=country_colors, guide='none')+
  scale_y_continuous(labels = scales::percent_format())+
  coord_flip()+
  labs(title = NULL,
       fill=NULL,
       shape=NULL,
       x=NULL,
       y="Porcentaje de vacantes")

Code
country_var_count_groups(data=south_cone_df%>%
                           filter(country_code=="URY"),
                                       country = NULL,
                         variable_names=subabilities, 
                         name_of_categories="subabilities") %>% 
  mutate(country_code="URY")%>%
   mutate(subabilities=str_replace_all(subabilities,"_"," "),
         subabilities=str_to_title(subabilities),
         subabilities=str_replace_all(subabilities," Of "," of ")) %>% 
  left_join(subabilities_names)%>%
  mutate(subabilities=subhabilidades)%>%
  top_n(10,group_share_sum) %>% 
  ggplot(aes(x=reorder(subabilities,
                       group_share_sum),
             y=group_share_sum))+
  geom_col(aes(fill=country_code))+
  geom_text(aes(label=case_when(group_vacancies_sum <50~
                                  paste0(round(group_share_sum,
                                                      2)*100,"%"),
                                TRUE~paste0(round(group_vacancies_sum )," (",
                                      round(group_share_sum,2)*100,"%)")),
                y=group_share_sum/2),
            color="white",
            fontface="bold",
            size=3)+
  scale_fill_manual(values=country_colors, guide='none')+
  scale_y_continuous(labels = scales::percent_format())+
  coord_flip()+
  labs(title = NULL,
       fill=NULL,
       shape=NULL,
       x=NULL,
       y="Porcentaje de vacantes")

Top 5 Subhabilidades dentro de cada grupo de habilidades

Code
purrr::map(c("ARG","CHL","URY"),
           slides=TRUE,
       skills_barchart,
       data_agg=country_subabilities_df%>%
        left_join(subabilities_names, by="subabilities")%>%
         mutate(subabilities=subhabilidades)%>%
        left_join(abilities_names, by=c("ability"="abilities"))%>%
        mutate(ability=habilidades)%>%
          group_by(country_code,ability) %>% 
          top_n(5,count)
       )
[[1]]

[[2]]

[[3]]

Figure 67: ?(caption)

Figure 68: ?(caption)

Figure 69: ?(caption)

Habilidades demandadas por sector por pais

Code
p1<-country_var_count_groups(
    data=south_cone_df%>%
      filter(country_code=="ARG"),
    country = 'main_sector',
    variable_names=abilities, 
    name_of_categories="abilities"
    ) %>% 
    mutate(abilities=str_replace_all(abilities,"_"," "),
           abilities=str_to_title(abilities),
           abilities=str_replace_all(abilities," Of "," of ")) %>%
    filter(main_sector %in% sectors_focus )%>%
    left_join(sector_names, by =c("main_sector"="sector"))%>%
    left_join(abilities_names, by =c("abilities"))%>%
    mutate(main_sector=sector_es,
           abilities=habilidades)  %>%
    ggplot(aes(x=main_sector,
             y=promedio))+
    geom_col(aes(fill=abilities),position = "fill", color="black")+
    geom_label(aes(label=paste0(round(promedio*100),"%"), color=abilities),
              alpha=.9, size=2, position = position_fill(vjust = 0.5), 
              show.legend = FALSE)+
    coord_flip()+
    scale_y_continuous(labels = scales::percent_format())+
    scale_fill_manual(values=abilities_colors)+
    scale_color_manual(values=abilities_colors)+
    labs(fill=NULL,
         color=NULL,
         y=NULL,
         x=NULL)

p2<-country_var_count_groups(
    data=south_cone_df%>%
      filter(country_code=="ARG"),
    country = 'area',
    variable_names=abilities, 
    name_of_categories="abilities"
    ) %>% 
    mutate(abilities=str_replace_all(abilities,"_"," "),
           abilities=str_to_title(abilities),
           abilities=str_replace_all(abilities," Of "," of ")) %>%
    left_join(abilities_names, by =c("abilities"))%>%
    mutate(abilities=habilidades)  %>%
    ggplot(aes(x=area,
             y=promedio))+
    geom_col(aes(fill=abilities),position = "fill", color="black")+
    geom_label(aes(label=paste0(round(promedio*100),"%"), color=abilities),
              alpha=.9, size=2, position = position_fill(vjust = 0.5), 
              show.legend = FALSE)+
    coord_flip()+
    scale_y_continuous(labels = scales::percent_format())+
    scale_fill_manual(values=abilities_colors)+
    scale_color_manual(values=abilities_colors)+
    labs(fill=NULL,
         color=NULL,
         y=NULL,
         x=NULL)

(p1+ theme(axis.text.x = element_blank(),
           axis.ticks.x = element_blank()))+
  (p2)+
  plot_layout( ncol = 1,heights =c(8,2),guides = "collect")

Code
p1<-country_var_count_groups(
    data=south_cone_df%>%
      filter(country_code=="CHL"),
    country = 'main_sector',
    variable_names=abilities, 
    name_of_categories="abilities"
    ) %>% 
    mutate(abilities=str_replace_all(abilities,"_"," "),
           abilities=str_to_title(abilities),
           abilities=str_replace_all(abilities," Of "," of ")) %>%
    filter(main_sector %in% sectors_focus )%>%
    left_join(sector_names, by =c("main_sector"="sector"))%>%
    left_join(abilities_names, by =c("abilities"))%>%
    mutate(main_sector=sector_es,
           abilities=habilidades)  %>%
    ggplot(aes(x=main_sector,
             y=promedio))+
    geom_col(aes(fill=abilities),position = "fill", color="black")+
    geom_label(aes(label=paste0(round(promedio*100),"%"), color=abilities),
              alpha=.9, size=2, position = position_fill(vjust = 0.5), 
              show.legend = FALSE)+
    coord_flip()+
    scale_y_continuous(labels = scales::percent_format())+
    scale_fill_manual(values=abilities_colors)+
    scale_color_manual(values=abilities_colors)+
    labs(fill=NULL,
         color=NULL,
         y=NULL,
         x=NULL)

p2<-country_var_count_groups(
    data=south_cone_df%>%
      filter(country_code=="CHL"),
    country = 'area',
    variable_names=abilities, 
    name_of_categories="abilities"
    ) %>% 
    mutate(abilities=str_replace_all(abilities,"_"," "),
           abilities=str_to_title(abilities),
           abilities=str_replace_all(abilities," Of "," of ")) %>%
    left_join(abilities_names, by =c("abilities"))%>%
    mutate(abilities=habilidades)  %>%
    ggplot(aes(x=area,
             y=promedio))+
    geom_col(aes(fill=abilities),position = "fill", color="black")+
    geom_label(aes(label=paste0(round(promedio*100),"%"), color=abilities),
              alpha=.9, size=2, position = position_fill(vjust = 0.5), 
              show.legend = FALSE)+
    coord_flip()+
    scale_y_continuous(labels = scales::percent_format())+
    scale_fill_manual(values=abilities_colors)+
    scale_color_manual(values=abilities_colors)+
    labs(fill=NULL,
         color=NULL,
         y=NULL,
         x=NULL)

(p1+ theme(axis.text.x = element_blank(),
           axis.ticks.x = element_blank()))+
  (p2)+
  plot_layout( ncol = 1,heights =c(8,2),guides = "collect")

Code
p1<-country_var_count_groups(
    data=south_cone_df%>%
      filter(country_code=="URY"),
    country = 'main_sector',
    variable_names=abilities, 
    name_of_categories="abilities"
    ) %>% 
    mutate(abilities=str_replace_all(abilities,"_"," "),
           abilities=str_to_title(abilities),
           abilities=str_replace_all(abilities," Of "," of ")) %>%
    filter(main_sector %in% sectors_focus )%>%
    left_join(sector_names, by =c("main_sector"="sector"))%>%
    left_join(abilities_names, by =c("abilities"))%>%
    mutate(main_sector=sector_es,
           abilities=habilidades)  %>%
    ggplot(aes(x=main_sector,
             y=promedio))+
    geom_col(aes(fill=abilities),position = "fill", color="black")+
    geom_label(aes(label=paste0(round(promedio*100),"%"), color=abilities),
              alpha=.9, size=2, position = position_fill(vjust = 0.5), 
              show.legend = FALSE)+
    coord_flip()+
    scale_y_continuous(labels = scales::percent_format())+
    scale_fill_manual(values=abilities_colors)+
    scale_color_manual(values=abilities_colors)+
    labs(fill=NULL,
         color=NULL,
         y=NULL,
         x=NULL)

p2<-country_var_count_groups(
    data=south_cone_df%>%
      filter(country_code=="URY"),
    country = 'area',
    variable_names=abilities, 
    name_of_categories="abilities"
    ) %>% 
    mutate(abilities=str_replace_all(abilities,"_"," "),
           abilities=str_to_title(abilities),
           abilities=str_replace_all(abilities," Of "," of ")) %>%
    left_join(abilities_names, by =c("abilities"))%>%
    mutate(abilities=habilidades)  %>%
    ggplot(aes(x=area,
             y=promedio))+
    geom_col(aes(fill=abilities),position = "fill", color="black")+
    geom_label(aes(label=paste0(round(promedio*100),"%"), color=abilities),
              alpha=.9, size=2, position = position_fill(vjust = 0.5), 
              show.legend = FALSE)+
    coord_flip()+
    scale_y_continuous(labels = scales::percent_format())+
    scale_fill_manual(values=abilities_colors)+
    scale_color_manual(values=abilities_colors)+
    labs(fill=NULL,
         color=NULL,
         y=NULL,
         x=NULL)

(p1+ theme(axis.text.x = element_blank(),
           axis.ticks.x = element_blank()))+
  (p2)+
  plot_layout( ncol = 1,heights =c(8,2),guides = "collect")

Code
sector_skills_matrix(
  data_agg = country_var_count_groups(
    data=south_cone_df%>%
      filter(country_code=="ARG"),
    country = 'main_sector',
    variable_names=abilities, 
    name_of_categories="abilities"
    ) %>% 
    mutate(abilities=str_replace_all(abilities,"_"," "),
           abilities=str_to_title(abilities),
           abilities=str_replace_all(abilities," Of "," of ")) %>%
    filter(main_sector %in% sectors_focus )%>%
    left_join(sector_names, by =c("main_sector"="sector"))%>%
    left_join(abilities_names, by =c("abilities"))%>%
    mutate(main_sector=sector_es,
           abilities=habilidades),
  ability_val = NULL,
  metric = "mean")+
  labs(fill="Importancia \nPromedio",
       title=NULL,
       x=NULL,
       y=NULL)

Code
sector_skills_matrix(
  data_agg = country_var_count_groups(
    data=south_cone_df%>%
      filter(country_code=="CHL"),
    country = 'main_sector',
    variable_names=abilities, 
    name_of_categories="abilities"
    ) %>% 
    mutate(abilities=str_replace_all(abilities,"_"," "),
           abilities=str_to_title(abilities),
           abilities=str_replace_all(abilities," Of "," of ")) %>%
    filter(main_sector %in% sectors_focus )%>%
    left_join(sector_names, by =c("main_sector"="sector"))%>%
    left_join(abilities_names, by =c("abilities"))%>%
    mutate(main_sector=sector_es,
           abilities=habilidades),
  ability_val = NULL,
  metric = "mean")+
  labs(fill="Importancia \nPromedio",
       title=NULL,
       x=NULL,
       y=NULL)

Code
sector_skills_matrix(
  data_agg = country_var_count_groups(
    data=south_cone_df%>%
      filter(country_code=="URY"),
    country = 'main_sector',
    variable_names=abilities, 
    name_of_categories="abilities"
    ) %>% 
    mutate(abilities=str_replace_all(abilities,"_"," "),
           abilities=str_to_title(abilities),
           abilities=str_replace_all(abilities," Of "," of ")) %>%
    filter(main_sector %in% sectors_focus )%>%
    left_join(sector_names, by =c("main_sector"="sector"))%>%
    left_join(abilities_names, by =c("abilities"))%>%
    mutate(main_sector=sector_es,
           abilities=habilidades),
  ability_val = NULL,
  metric = "mean")+
  labs(fill="Importancia \nPromedio",
       title=NULL,
       x=NULL,
       y=NULL)

Top 5 Sub habilidades por sector por pais

Code
country_var_count_groups(data=south_cone_df%>%
                           filter(country_code=="ARG"),
                                       country = 'main_sector',
                         variable_names=subabilities, 
                         name_of_categories="subabilities") %>%
  mutate(subabilities=str_replace_all(subabilities,"_"," "),
       subabilities=str_to_title(subabilities),
       subabilities=str_replace_all(subabilities," Of "," of ")) %>%
  filter(main_sector %in% sectors_focus)%>%
  left_join(sector_names,by=c("main_sector"="sector"))%>%
  mutate(main_sector=sector_es)%>%
  bind_rows(
    country_var_count_groups(data=south_cone_df%>%
                           filter(country_code=="ARG"),
                                       country = 'area',
                         variable_names=subabilities, 
                         name_of_categories="subabilities") %>%
      filter(area=="Conocimiento")%>%
    mutate(main_sector=area)%>%
      mutate(
       subabilities=str_replace_all(subabilities,"_"," "),
       subabilities=str_to_title(subabilities),
       subabilities=str_replace_all(subabilities," Of "," of "))
  )%>%
  left_join(subabilities_names, by="subabilities")%>%
  mutate(subabilities=subhabilidades)%>%
  mutate(country_code="ARG")%>%
  group_by(main_sector)%>% 
  top_n(5,promedio)%>%
  ungroup()%>%
  ggplot(aes(x=reorder(subabilities,promedio),y=promedio))+
  geom_col(aes(fill=country_code))+
  scale_fill_manual(values=country_colors, guide="none")+
  coord_flip()+
  facet_wrap(vars(str_wrap(main_sector,30)),ncol = 2, scales = "free")+
  scale_y_continuous(labels = scales::percent_format())+
  labs(x=NULL,
       y="Porcentaje de vacantes")

Code
country_var_count_groups(data=south_cone_df%>%
                           filter(country_code=="CHL"),
                                       country = 'main_sector',
                         variable_names=subabilities, 
                         name_of_categories="subabilities") %>%
  mutate(subabilities=str_replace_all(subabilities,"_"," "),
       subabilities=str_to_title(subabilities),
       subabilities=str_replace_all(subabilities," Of "," of ")) %>%
  filter(main_sector %in% sectors_focus)%>%
  left_join(sector_names,by=c("main_sector"="sector"))%>%
  mutate(main_sector=sector_es)%>%
  bind_rows(
    country_var_count_groups(data=south_cone_df%>%
                           filter(country_code=="CHL"),
                                       country = 'area',
                         variable_names=subabilities, 
                         name_of_categories="subabilities") %>%
      filter(area=="Conocimiento")%>%
    mutate(main_sector=area)%>%
      mutate(
       subabilities=str_replace_all(subabilities,"_"," "),
       subabilities=str_to_title(subabilities),
       subabilities=str_replace_all(subabilities," Of "," of "))
  )%>%
  left_join(subabilities_names, by="subabilities")%>%
  mutate(subabilities=subhabilidades)%>%
  mutate(country_code="CHL")%>%
  group_by(main_sector)%>% 
  top_n(5,promedio)%>%
  ungroup()%>%
  ggplot(aes(x=reorder(subabilities,promedio),y=promedio))+
  geom_col(aes(fill=country_code))+
  scale_fill_manual(values=country_colors, guide="none")+
  coord_flip()+
  facet_wrap(vars(str_wrap(main_sector,30)),ncol = 2, scales = "free")+
  scale_y_continuous(labels = scales::percent_format())+
  labs(x=NULL,
       y="Porcentaje de vacantes")

Code
country_var_count_groups(data=south_cone_df%>%
                           filter(country_code=="URY"),
                                       country = 'main_sector',
                         variable_names=subabilities, 
                         name_of_categories="subabilities") %>%
  mutate(subabilities=str_replace_all(subabilities,"_"," "),
       subabilities=str_to_title(subabilities),
       subabilities=str_replace_all(subabilities," Of "," of ")) %>%
  filter(main_sector %in% sectors_focus)%>%
  left_join(sector_names,by=c("main_sector"="sector"))%>%
  mutate(main_sector=sector_es)%>%
  bind_rows(
    country_var_count_groups(data=south_cone_df%>%
                           filter(country_code=="URY"),
                                       country = 'area',
                         variable_names=subabilities, 
                         name_of_categories="subabilities") %>%
      filter(area=="Conocimiento")%>%
    mutate(main_sector=area)%>%
      mutate(
       subabilities=str_replace_all(subabilities,"_"," "),
       subabilities=str_to_title(subabilities),
       subabilities=str_replace_all(subabilities," Of "," of "))
  )%>%
  left_join(subabilities_names, by="subabilities")%>%
  mutate(subabilities=subhabilidades)%>%
  mutate(country_code="URY")%>%
  group_by(main_sector)%>% 
  top_n(5,promedio)%>%
  ungroup()%>%
  ggplot(aes(x=reorder(subabilities,promedio),y=promedio))+
  geom_col(aes(fill=country_code))+
  scale_fill_manual(values=country_colors, guide="none")+
  coord_flip()+
  facet_wrap(vars(str_wrap(main_sector,30)),ncol = 2, scales = "free")+
  scale_y_continuous(labels = scales::percent_format())+
  labs(x=NULL,
       y="Porcentaje de vacantes")

Top occupations of most focus sectors

Code
country_var_count(data = south_cone_df %>% 
                    inner_join(
                      south_cone_df %>%
                        filter(main_sector %in% sectors_focus)%>%
                      group_by(main_sector,onet_job) %>% 
                      summarise(count=n()) %>% 
                      top_n(5,count)), 
                  country = "main_sector",
                  category = "onet_job") %>%
  country_region_table(country = "main_sector",
                  category = "onet_job",
                  top_number = 20)

?(caption)

rank Agriculture Forestry Fishing And Hunting Construction Health Care And Social Assistance Information Mining Quarrying And Oil And Gas Extraction Professional Scientific And Technical Services Transportation And Warehousing
1 Trabajadores de Pesca y Caza, 46 (26.6%) Operadores de Grúas y Torres, 238 (26.8%) Recepcionistas y Oficinistas de Información, 392 (24.2%) Directores / Gerentes técnicos de Medios de Comunicación, 122 (30%) Ingenieros Petroleros, 59 (40.7%) Gerentes Generales y de Operaciones, 1341 (38.7%) Gerentes de Transporte, Almacenamiento, y Distribución, 251 (37.1%)
2 Supervisores Directos de Trabajadores de Ocupaciones Relacionadas con la Agricultura, la Pesca, y la Silvicultura, 45 (26%) Electricistas, 226 (25.5%) Supervisores Directos de Empleados de Oficina y de Apoyo Administrativo, 367 (22.6%) Agentes de Venta de Publicidad, 114 (28%) Operadores de Perforadoras Giratorias, Petróleo y Gas, 29 (20%) Contadores y Auditores, 963 (27.8%) Cargadores de Vagones, Camiones y Barcos Tanque, 119 (17.6%)
3 Operadores de Equipo de Tala Forestal, 41 (23.7%) Ayudantes de Trabajadores de Ocupaciones Relacionadas con la Instalación, Mantenimiento y Reparación, 189 (21.3%) Enfermeros Graduados, 342 (21.1%) Asistentes de Vestuario, 76 (18.7%) Operadores de Torres de Perforación, Petróleo y Gas, 20 (13.8%) Especialistas en Apoyo Técnico para Usuarios de Computadoras, 409 (11.8%) Agentes de Carga y Flete, 113 (16.7%)
4 Operadores de Equipo Agrícola, 23 (13.3%) Obreros de la Construcción, 136 (15.3%) Niñeras, 267 (16.5%) Instaladores y Reparadores de Equipo de Telecomunicaciones, 61 (15%) Operadores de Máquinas de Minería de Operación Continua, 19 (13.1%) Analistas de Gestión, 382 (11%) Operadores de Camiones y Tractores Industriales, 105 (15.5%)
5 Trabajadores y Jornaleros Agrícolas, de Cultivos, de Viveros y de Invernaderos, 18 (10.4%) Supervisores Directos de Trabajadores de Oficios de Construcción y Extracción, 98 (11%) Psicólogos Clínicos y de Consejería, 255 (15.7%) Instaladores y Reparadores de Equipos de Radio, Telefonía Celular y Torres, 34 (8.4%) Ayudantes de Trabajadores de Ocupaciones Relacionadas con la Extracción, 18 (12.4%) Analistas de Investigación de Mercado y Especialistas en Mercadeo, 369 (10.7%) Supervisores de Transporte Aéreo de Carga, 88 (13%)

Actividades del conocimiento

Code
var_count(data = south_cone_df,
                             category = "area")%>%
  ggplot(aes(x=reorder(area,
                       group_share),
             y=group_share))+
  geom_col(fill="gray50")+
  geom_text(aes(label=paste0(round(group_vacancies)," (",round(group_share,2)*100,"%)")), 
            size=3,nudge_y = 0.02)+
  # coord_flip()+
  scale_y_continuous(labels=scales::percent_format())+
  labs(x=NULL,
       y="Porcentaje de Vacantes")

Code
# c("ARG","CHL","URY"),

country_var_count(data = south_cone_df,
                  country = "country_code",
                             category = "area")%>%
  filter(country_code=="ARG")%>%
  ggplot(aes(x=reorder(area,
                       group_share),
             y=group_share))+
  geom_col(aes(fill=country_code))+
  geom_text(aes(label=paste0(round(count)," (",round(group_in_country_share,2)*100,"%)")), 
            size=3,nudge_y = 0.02)+
  scale_fill_manual(values=country_colors, guide="none")+
  scale_y_continuous(labels=scales::percent_format())+
  labs(x=NULL,
       y="Porcentaje de Vacantes")

Code
country_var_count(data = south_cone_df,
                  country = "country_code",
                             category = "area")%>%
  filter(country_code=="CHL")%>%
  ggplot(aes(x=reorder(area,
                       group_share),
             y=group_share))+
  geom_col(aes(fill=country_code))+
  geom_text(aes(label=paste0(round(count)," (",round(group_in_country_share,2)*100,"%)")), 
            size=3,nudge_y = 0.02)+
  scale_fill_manual(values=country_colors, guide="none")+
  scale_y_continuous(labels=scales::percent_format())+
  labs(x=NULL,
       y="Porcentaje de Vacantes")

Code
country_var_count(data = south_cone_df,
                  country = "country_code",
                             category = "area")%>%
  filter(country_code=="URY")%>%
  ggplot(aes(x=reorder(area,
                       group_share),
             y=group_share))+
  geom_col(aes(fill=country_code))+
  geom_text(aes(label=paste0(round(count)," (",round(group_in_country_share,2)*100,"%)")), 
            size=3,nudge_y = 0.02)+
  scale_fill_manual(values=country_colors, guide="none")+
  scale_y_continuous(labels=scales::percent_format())+
  labs(x=NULL,
       y="Porcentaje de Vacantes")

PIB y Empleo por Sector

PIB

Code
library(readxl)
chile_raw <- read_excel("raw/chl_macro/Cuadro_18122023133746.xlsx", 
    sheet = "Cuadro", skip = 2)%>%
  janitor::clean_names()

# CEPAL: Producto interno bruto (PIB) anual por actividad económica a precios corrientes en dólares (Millones de dólares)
# https://statistics.cepal.org/portal/cepalstat/dashboard.html?theme=2&lang=es
# CEPAL / Comisión Económica para América Latina y el Caribe / Estimaciones basadas en fuentes oficiales

cepal_raw <- read_excel("raw/cepal/data_1703074551.xlsx")%>%
  janitor::clean_names()%>%
  mutate(country_code=case_when(pais_estandar=="Argentina"~"ARG",
                                pais_estandar=="Chile"~"CHL",
                                pais_estandar=="Uruguay"~"URY"))%>%
  left_join(sector_rubro%>%
              distinct(rubro,rubro_resumen),
            c("rubro_sector_cuentas_nacionales_anuales"="rubro"))

print("Uruguay tiene un sector menos: explotacion de minas y canteras (incluye extraccion de petroleo crudo y gas natural")
[1] "Uruguay tiene un sector menos: explotacion de minas y canteras (incluye extraccion de petroleo crudo y gas natural"
Code
cepal_raw %>%
  distinct(pais_estandar,rubro_resumen)%>%
  group_by(pais_estandar) %>%
  count()
# A tibble: 3 × 2
# Groups:   pais_estandar [3]
  pais_estandar     n
  <chr>         <int>
1 Argentina        10
2 Chile            10
3 Uruguay           9
Code
cepal_pib<-cepal_raw %>%
  filter(rubro_sector_cuentas_nacionales_anuales!="Producto interno bruto (PIB)") %>%
  group_by(country_code)%>%
  mutate(share=value/sum(value))
Code
cepal_pib%>%
  filter(country_code=="ARG")%>%
  ggplot(aes(x=reorder(str_wrap(rubro_resumen,30),share),
             y=share))+
  geom_col(aes(fill=country_code))+
  coord_flip()+
  geom_text(aes(label=case_when(share <1~
                                paste0(round(share,
                                                    2)*100,"%"),
                              TRUE~paste0(scales::number(round(value/1000 ),
                                                          prefix = "USD",
                                                         big.mark = ".") ,
                                          " MM (",
                                    round(share,2)*100,"%)")),
              y=share/2),
          color="white",
          fontface="bold",
          size=3)+
  scale_fill_manual(values=country_colors, guide="none")+
  scale_y_continuous(labels=scales::percent_format())+
  labs(x=NULL,
       y="Porcentaje de PIB")

Code
cepal_pib%>%
  filter(country_code=="CHL")%>%
  ggplot(aes(x=reorder(str_wrap(rubro_resumen,30),share),
             y=share))+
  geom_col(aes(fill=country_code))+
  coord_flip()+
  geom_text(aes(label=case_when(share <1~
                                paste0(round(share,
                                                    2)*100,"%"),
                              TRUE~paste0(scales::number(round(value/1000 ),
                                                          prefix = "USD",
                                                         big.mark = ".") ,
                                          " MM (",
                                    round(share,2)*100,"%)")),
              y=share/2),
          color="white",
          fontface="bold",
          size=3)+
  scale_fill_manual(values=country_colors, guide="none")+
  scale_y_continuous(labels=scales::percent_format())+
  labs(x=NULL,
       y="Porcentaje de PIB")

Code
cepal_pib%>%
  filter(country_code=="URY")%>%
  ggplot(aes(x=reorder(str_wrap(rubro_resumen,30),share),
             y=share))+
  geom_col(aes(fill=country_code))+
  coord_flip()+
  geom_text(aes(label=case_when(share <1~
                                paste0(round(share,
                                                    2)*100,"%"),
                              TRUE~paste0(scales::number(round(value/1000 ),
                                                          prefix = "USD",
                                                         big.mark = ".") ,
                                          " MM (",
                                    round(share,2)*100,"%)")),
              y=share/2),
          color="white",
          fontface="bold",
          size=3)+
  scale_fill_manual(values=country_colors, guide="none")+
  scale_y_continuous(labels=scales::percent_format())+
  labs(x=NULL,
       y="Porcentaje de PIB")

EMPLEO

Code
rubro_country_emp<-sector_country_emp%>%
  left_join(sector_names, by=c("main_sector"="sector"))%>%
  left_join(sector_rubro, by=c("sector_es"="sector"))%>%
  mutate(rubro_resumen=ifelse(is.na(rubro_resumen),"Sin Clasfificar",
                              rubro_resumen))%>%
  group_by(country_code, rubro_resumen)%>%
  summarise(employment=sum(employment),
            group_vacancies=sum(group_vacancies,na.rm = T))%>%
  group_by(country_code)%>%
  mutate(employment_share=employment/sum(employment),
         group_share=group_vacancies/sum(group_vacancies))


rubro_country_emp%>%
    filter(country_code=="ARG")%>%
  ggplot(aes(x=reorder(str_wrap(rubro_resumen,30),employment_share),
             y=employment_share))+
  geom_col(aes(fill=country_code))+
  coord_flip()+
  geom_text(aes(label=case_when(employment_share <1~
                                paste0(round(employment_share,
                                                    2)*100,"%"),
                              TRUE~paste0(round(employment ) ,
                                          " (",
                                    round(employment_share,2)*100,"%)")),
              y=employment_share/2),
          color="white",
          fontface="bold",
          size=3)+
  scale_fill_manual(values=country_colors, guide="none")+
  scale_y_continuous(labels=scales::percent_format())+
  labs(x=NULL,
       y="Porcentaje de empleo")

Code
rubro_country_emp%>%
    filter(country_code=="CHL")%>%
  ggplot(aes(x=reorder(str_wrap(rubro_resumen,30),employment_share),
             y=employment_share))+
  geom_col(aes(fill=country_code))+
  coord_flip()+
  geom_text(aes(label=case_when(employment_share <1~
                                paste0(round(employment_share,
                                                    2)*100,"%"),
                              TRUE~paste0(round(employment ) ,
                                          " (",
                                    round(employment_share,2)*100,"%)")),
              y=employment_share/2),
          color="white",
          fontface="bold",
          size=3)+
  scale_fill_manual(values=country_colors, guide="none")+
  scale_y_continuous(labels=scales::percent_format())+
  labs(x=NULL,
       y="Porcentaje de empleo")

Code
rubro_country_emp%>%
    filter(country_code=="URY")%>%
  ggplot(aes(x=reorder(str_wrap(rubro_resumen,30),employment_share),
             y=employment_share))+
  geom_col(aes(fill=country_code))+
  coord_flip()+
  geom_text(aes(label=case_when(employment_share <1~
                                paste0(round(employment_share,
                                                    2)*100,"%"),
                              TRUE~paste0(round(employment ) ,
                                          " (",
                                    round(employment_share,2)*100,"%)")),
              y=employment_share/2),
          color="white",
          fontface="bold",
          size=3)+
  scale_fill_manual(values=country_colors, guide="none")+
  scale_y_continuous(labels=scales::percent_format())+
  labs(x=NULL,
       y="Porcentaje de empleo")

PIB y Empleo

Code
rubro_country_emp%>%
  ungroup()%>%
  left_join(cepal_pib,by=c("country_code","rubro_resumen"))%>%
  filter(country_code=="ARG")%>%
  select(rubro_resumen,employment_share,share)%>%
  arrange(desc(share))%>%
  gt::gt()%>%
  gt::fmt_percent(columns = c("share","employment_share"))%>%
  gt::cols_label( .list = list(
    "rubro_resumen"="Rubro",
    "employment_share"="Porcentaje de Empleo",
    "share"="Porcentaje de PIB"
  ))
Rubro Porcentaje de Empleo Porcentaje de PIB
Comercio, Reparación y Hostelería 17.12% 21.75%
Servicios Públicos, Bienestar Social y Apoyo Comunitario 37.78% 21.63%
Manufactura 9.44% 19.14%
Servicios Empresariales, Financieros e Inmobiliarios 16.56% 14.38%
Actividades Agropecuarias 2.41% 7.92%
Transporte, Almacenamiento y Comunicaciones 3.00% 5.51%
Construcción 8.59% 4.44%
Hidrocarburos y Mineria 0.46% 4.25%
Suministro de Electricidad, Gas y Agua 1.55% 0.98%
Sin Clasfificar 3.10% NA
Code
rubro_country_emp%>%
  left_join(cepal_pib,by=c("country_code","rubro_resumen"))%>%
  filter(country_code=="ARG")%>%
  ggplot(aes(x=employment_share,
             y=share))+
  ggrepel::geom_text_repel(aes(label=rubro_resumen),size=3)+
  geom_point(aes(fill=country_code),shape=21,size=3)+
  scale_fill_manual(values = country_colors, guide="none")+
  scale_y_continuous(labels=scales::percent_format())+
  scale_x_continuous(labels=scales::percent_format())+
  labs(y="Porcentaje de PIB (2022)",
       x="Porcentaje de Empleo (2022)")

Code
rubro_country_emp%>%
  ungroup()%>%
  left_join(cepal_pib,by=c("country_code","rubro_resumen"))%>%
  filter(country_code=="CHL")%>%
  select(rubro_resumen,employment_share,share)%>%
  arrange(desc(share))%>%
  gt::gt()%>%
  gt::fmt_percent(columns = c("share","employment_share"))%>%
  gt::cols_label( .list = list(
    "rubro_resumen"="Rubro",
    "employment_share"="Porcentaje de Empleo",
    "share"="Porcentaje de PIB"
  ))
Rubro Porcentaje de Empleo Porcentaje de PIB
Servicios Empresariales, Financieros e Inmobiliarios 14.19% 22.44%
Servicios Públicos, Bienestar Social y Apoyo Comunitario 33.49% 18.27%
Hidrocarburos y Mineria 2.71% 15.86%
Comercio, Reparación y Hostelería 17.99% 11.85%
Manufactura 6.38% 10.85%
Transporte, Almacenamiento y Comunicaciones 3.24% 7.88%
Construcción 6.34% 6.55%
Actividades Agropecuarias 7.26% 3.95%
Suministro de Electricidad, Gas y Agua 1.85% 2.36%
Sin Clasfificar 6.56% NA
Code
rubro_country_emp%>%
  left_join(cepal_pib,by=c("country_code","rubro_resumen"))%>%
  filter(country_code=="CHL")%>%
  ggplot(aes(x=employment_share,
             y=share))+
  ggrepel::geom_text_repel(aes(label=rubro_resumen),size=3)+
  geom_point(aes(fill=country_code),shape=21,size=3)+
  scale_fill_manual(values = country_colors, guide="none")+
  scale_y_continuous(labels=scales::percent_format())+
  scale_x_continuous(labels=scales::percent_format())+
  labs(y="Porcentaje de PIB (2022)",
       x="Porcentaje de Empleo (2022)")

Code
rubro_country_emp%>%
  ungroup()%>%
  left_join(cepal_pib,by=c("country_code","rubro_resumen"))%>%
  filter(country_code=="URY")%>%
  select(rubro_resumen,employment_share,share)%>%
  arrange(desc(share))%>%
  gt::gt()%>%
  gt::fmt_percent(columns = c("share","employment_share"))%>%
  gt::cols_label( .list = list(
    "rubro_resumen"="Rubro",
    "employment_share"="Porcentaje de Empleo",
    "share"="Porcentaje de PIB"
  ))
Rubro Porcentaje de Empleo Porcentaje de PIB
Servicios Públicos, Bienestar Social y Apoyo Comunitario 36.66% 32.60%
Comercio, Reparación y Hostelería 16.58% 16.37%
Servicios Empresariales, Financieros e Inmobiliarios 16.02% 13.59%
Manufactura 8.24% 11.77%
Transporte, Almacenamiento y Comunicaciones 3.41% 9.20%
Actividades Agropecuarias 9.75% 8.46%
Construcción 5.53% 5.41%
Suministro de Electricidad, Gas y Agua 1.78% 2.60%
Hidrocarburos y Mineria 0.64% NA
Sin Clasfificar 1.39% NA
Code
rubro_country_emp%>%
  left_join(cepal_pib,by=c("country_code","rubro_resumen"))%>%
  filter(country_code=="URY")%>%
  ggplot(aes(x=employment_share,
             y=share))+
  ggrepel::geom_text_repel(aes(label=rubro_resumen),size=3)+
  geom_point(aes(fill=country_code),shape=21,size=3)+
  scale_fill_manual(values = country_colors, guide="none")+
  scale_y_continuous(labels=scales::percent_format())+
  scale_x_continuous(labels=scales::percent_format())+
  labs(y="Porcentaje de PIB (2022)",
       x="Porcentaje de Empleo (2022)")

Discussing occupational codes in online job postings data [DONE]

  • The occupation classification system is O*NET SOC 19.

  • O*NET SOC 19 is compatible with SOC 18.

  • SOC 18 allows us to classify jobs into occupational major and minor groups, as well as to use wage estimates of the US to categorize them into high, medium, and low wage occupations.

  • More importantly, SOC 18 groups are compatible with SOC 10 groups, and SOC 10 groups are compatible with IDB occupational groups

  • There are 9 occupations without the proper occupational title in English. That must be due to an error in the ETL process. Will notify Eric.

  • The Uruguay file doesn’t have the Dynamic Flexibility sub ability.

  • There are vacancies with null sector weights (don’t belong to any sector.) Will notify Eric.

**How we made sure O*NET SOC 19 was used to name the occupations?**

We load the list of occupational titles in Argentina, Urugay and Chile vacancies’ samples and compare it with the official O*NET SOC 19 catalog.

We find a perfect match.

Code
# load three files and keep occupations data. store in csv for later
idb_occupations<-rbind(
   read_parquet("raw/arg_new_dict.parquet")|>
   distinct(occupation,onet_job),
   read_parquet("raw/chl_new_dict.parquet")|>
   distinct(occupation,onet_job)   ,
   read_parquet("raw/ury_new_dict.parquet")|>
    distinct(occupation,onet_job)
)|>
distinct()
write_csv(idb_occupations,"data/idb_occupations_in_data.csv")
Code
# load onet catelog
onet_19<-read_csv("raw/catalogs_and_crosswalks/onet_2019_occupations.csv")
# load the afore created list of occupations in vacancy data
idb_occupations<-read_csv("data/idb_occupations_in_data.csv")
Code
# isolate english titles (the official ones)
ibd_occupations_en<-idb_occupations%>%
    distinct(occupation)

# count how many occupations are in the vacancies data
ar_occs<-nrow(ibd_occupations_en)

# check if they match the onetsoc19 crosswalk
occs_in_onet_19<-ibd_occupations_en|>
    select(occupation)|>
    inner_join(onet_19, by=c("occupation"="onet_soc_title_19"))|>
    nrow()

print(paste("There are ",ar_occs,"occupations in the arg, ury and chl data"))
[1] "There are  758 occupations in the arg, ury and chl data"
Code
print(paste(occs_in_onet_19,"of these occupations were found in SOC O*NET 19 catalog"))
[1] "757 of these occupations were found in SOC O*NET 19 catalog"
Code
ibd_occupations_en|>
    select(occupation)|>
    anti_join(onet_19, by=c("occupation"="onet_soc_title_19"))|>
    select(occupation)
# A tibble: 1 × 1
  occupation
  <chr>     
1 <NA>      

Some examples of the ONET SOC 19 codes found are

Code
ibd_occupations_en|>
    select(occupation)|>
    inner_join(onet_19, by=c("occupation"="onet_soc_title_19"))|>
    head(15)
# A tibble: 15 × 3
   occupation                                  onet_soc_code_19 onet_soc_desc_19
   <chr>                                       <chr>            <chr>           
 1 Labor Relations Specialists                 13-1075.00       Resolve dispute…
 2 Foreign Language and Literature Teachers, … 25-1124.00       Teach languages…
 3 Construction and Building Inspectors        47-4011.00       Inspect structu…
 4 Accountants and Auditors                    13-2011.00       Examine, analyz…
 5 Retail Salespersons                         41-2031.00       Sell merchandis…
 6 Cooks, Restaurant                           35-2014.00       Prepare, season…
 7 Cashiers                                    41-2011.00       Receive and dis…
 8 Chefs and Head Cooks                        35-1011.00       Direct and may …
 9 Executive Secretaries and Executive Admini… 43-6011.00       Provide high-le…
10 Industrial Engineering Technologists and T… 17-3026.00       Apply engineeri…
11 First-Line Supervisors of Production and O… 51-1011.00       Directly superv…
12 Waiters and Waitresses                      35-3031.00       Take orders and…
13 Potters, Manufacturing                      51-9195.05       Operate product…
14 Aircraft Mechanics and Service Technicians  49-3011.00       Diagnose, adjus…
15 Multiple Machine Tool Setters, Operators, … 51-4081.00       Set up, operate…

Every occupation in spanish should have its’ english counterpart. Some doesn’t

Code
# There are occupation titles in spanish (onet_job) with no occupation title in english (occupation)
print('There are occupation titles in spanish (onet_job) with no occupation title in english (occupation)')
[1] "There are occupation titles in spanish (onet_job) with no occupation title in english (occupation)"
Code
idb_occupations%>%
    filter(is.na(occupation))  %>%
    head(30)  
# A tibble: 27 × 2
   occupation onet_job                                                      
   <chr>      <chr>                                                         
 1 <NA>       Conductores de Vehículos de Servicios de Transporte y Choferes
 2 <NA>       Analistas Financieros y de Inversiones                        
 3 <NA>       Diseñadores de Programas Software                             
 4 <NA>       Auxiliares Docentes de Educación Especial                     
 5 <NA>       Técnicos de Emergencias Médicas                               
 6 <NA>       Científico de Datos                                           
 7 <NA>       Maestros de Educación Especial de Jardín de Infantes          
 8 <NA>       Gerentes de Instalaciones                                     
 9 <NA>       Analistas Forenses Digitales                                  
10 <NA>       Administradores de Seguridad                                  
# ℹ 17 more rows

We are able to map these O*NET SOC 19 to SOC 18 detailed, and major occupations

Code
# read onet_soc19 to soc18 crosswalk
onetsoc19_soc18_crosswalk<-read_csv("raw/catalogs_and_crosswalks/onet_2019_to_soc_18_crosswalk.csv")|>
    janitor::clean_names()

# attach soc18 broad, minor groups data
soc18_groups<-read_csv("raw/catalogs_and_crosswalks/soc_structure_2018_clean.csv")|>
    janitor::clean_names()%>%
    rename( x2018_soc_code=1 ,x2018_soc_title=2)

# there is an almost perfect match ( no duplicates, only one missing)
onetsoc19_soc18_full_crosswalk<-onetsoc19_soc18_crosswalk %>%
    inner_join(soc18_groups)

# this is the result
head(onetsoc19_soc18_full_crosswalk)
# A tibble: 6 × 10
  o_net_soc_2019_code o_net_soc_2019_title        x2018_soc_code x2018_soc_title
  <chr>               <chr>                       <chr>          <chr>          
1 11-1011.00          Chief Executives            11-1011        Chief Executiv…
2 11-1011.03          Chief Sustainability Offic… 11-1011        Chief Executiv…
3 11-1021.00          General and Operations Man… 11-1021        General and Op…
4 11-1031.00          Legislators                 11-1031        Legislators    
5 11-2011.00          Advertising and Promotions… 11-2011        Advertising an…
6 11-2021.00          Marketing Managers          11-2021        Marketing Mana…
# ℹ 6 more variables: broad_group <chr>, broad_group_title <chr>,
#   minor_group <chr>, minor_group_title <chr>, major_group <chr>,
#   major_group_title <chr>
Code
# this is the occupation without soc 18 information
onetsoc19_soc18_crosswalk %>%
    anti_join(soc18_groups)
# A tibble: 2 × 4
  o_net_soc_2019_code o_net_soc_2019_title        x2018_soc_code x2018_soc_title
  <chr>               <chr>                       <chr>          <chr>          
1 33-3051.00          Police and Sheriff's Patro… 33-3051        Police and She…
2 33-3051.04          Customs and Border Protect… 33-3051        Police and She…

Understanding Uruguay demand by occupation

There is demand for personal service, but not so much for healthcare highly technical services. However, sample size is so small one needs to be cautions when drawing conclusions about these sectors. Specially for Uruguay, it’s best to focus on larger sample occupational groups like “Sales and Related”, “Office and Administrative Support” etc.

Code
contratictory_major_soc<- c("Personal Care and Service Occupations","Healthcare Support Occupations",
                                  "Healthcare Practitioners and Technical Occupations")

south_cone_df %>% 
  filter(country_code=="URY") %>% 
  filter(major_group_title %in% contratictory_major_soc) %>% 
  group_by(major_group_title, occupation) %>% 
  summarise(postings=n()) %>% 
  group_by(major_group_title) %>%
  top_n(5,postings) %>% 
  left_join(south_cone_df %>% 
            filter(country_code=="URY") %>% 
            group_by(occupation) %>% 
            summarize(mean_zones=mean(zones))) %>% 
  ungroup() %>% 
  head(15) %>% 
  gt()
major_group_title occupation postings mean_zones
Healthcare Practitioners and Technical Occupations Acupuncturists 3 5
Healthcare Practitioners and Technical Occupations Dentists, General 3 5
Healthcare Practitioners and Technical Occupations Ophthalmologists, Except Pediatric 6 5
Healthcare Practitioners and Technical Occupations Orthodontists 3 5
Healthcare Practitioners and Technical Occupations Pharmacists 5 5
Healthcare Practitioners and Technical Occupations Registered Nurses 3 4
Healthcare Support Occupations Home Health Aides 1 2
Healthcare Support Occupations Nursing Assistants 4 3
Healthcare Support Occupations Personal Care Aides 7 2
Healthcare Support Occupations Pharmacy Aides 2 2
Personal Care and Service Occupations Childcare Workers 6 2
Personal Care and Service Occupations Costume Attendants 3 2
Personal Care and Service Occupations First-Line Supervisors of Personal Service Workers 15 3
Personal Care and Service Occupations Manicurists and Pedicurists 3 2
Personal Care and Service Occupations Nannies 13 2

Google jobs abilities compared to O*NET’s

The prevalence of subabilities in online job vacancies can by grouped by occupation and contrasted with the level and importance scores O*NET Analysts assigned to each subability in each occupation profile.

If we find that online vacancies in the South Cone require different skills than what O*NET experts said is important to perform at a job we’ll have an interesting discussion about what the Vacancies Minning algorithm is doing and how different are the same occupations across different countries.

Code
# how important is that they have acceptable proficiency in this ability
abilties_importance<-read_delim("raw/ONET_28_0/Abilities.txt") %>% 
  janitor::clean_names() %>% 
  filter(scale_id=="IM")

# How good people must be at this ability
abilties_level<-read_delim("raw/ONET_28_0/Abilities.txt") %>% 
  janitor::clean_names() %>% 
  filter(scale_id=="LV")


# How many abilities in our vacancies data are in ONET
table(str_replace_all(subabilities,"_"," ") %in%
        unique(abilties_importance$element_name))

# How many onet abilities are in our vacancies data?
table(unique(abilties_importance$element_name) %in%
        str_replace_all(subabilities,"_"," "))
Code
# I average subabilities weights in job vacancies data by occupation (ONETsoc19)
onet_soc_19_df<-south_cone_df %>% 
  select(doc_id, occupation,o_net_soc_2019_code, subabilities) %>% 
  group_by(o_net_soc_2019_code) %>% 
  summarise(across(subabilities, mean)) %>% 
  # traspose the data
  pivot_longer(cols = subabilities,
               names_to = "element_name",
               values_to = "idb_value") %>% 
  # remove _ in element_name
  mutate(element_name=str_replace_all(element_name,"_"," ")) %>% 
  # I join the importance and level scores in onet
  left_join(abilties_importance %>% 
              select(o_net_soc_code,element_name,
                     importance_value=data_value,
                     importance_sd=standard_error,
                     importance_n=n),
            by= c("o_net_soc_2019_code"="o_net_soc_code",
                                       "element_name"="element_name")) %>% 
  left_join(abilties_level %>% 
              select(o_net_soc_code,element_name,
                     level_value=data_value,
                     level_sd=standard_error,
                     level_n=n),
            by= c("o_net_soc_2019_code"="o_net_soc_code",
                                       "element_name"="element_name")) 
  
library(GGally)
onet_soc_19_df %>% 
  select(ends_with("value")) %>% 
  ggpairs()+
  labs(title = "There is a strong correlation between IDB abilities scores and O*NET importance and level indicators",
       subtitle = "Each dot is an occupation-ability combination")

Figure 70: ?(caption)

Discussing the sector (rama) information available in online job postings data (DONE)

The names of the 20 presented ramas coincide with NAICS 2-digits classifications. Most LAC data sources show employment estimates by industry in ISIC or ISIC-related codes.

Interestingly, the sector or “Rama” is across multiple columns and each doc_id can have multiple values. There isn’t a categorical classification of the rama each firm belongs to, but rather a continuous one, where there are wegiths representing the chances a firm belongs to each sector.

They don’t assing a category, but rather a 20 positions vector that gives probabilities from 0 to 1 to each vacancy.

I found a couple of puzzling things in the data.:

  • There are doc_ids with no prediction. They don’t belong to any sector.
  • There are (ties) doc_ids with the same positive prediciton. This turns makes any attempt to assign only one sector to each posting a little polemic.
Code
library(arrow)
library(dplyr)
library(tidyr)

df_ar<-read_parquet("raw/arg_new_dict.parquet")

sectors<-c("accommodation_and_food_services", "administrative_and_support_services",
"agriculture_forestry_fishing_and_hunting",
"arts_entertainment_and_recreation",
 "construction", "educational_services","finance_and_insurance",
 "government" , "health_care_and_social_assistance",
 "information", "management_of_companies_and_enterprises",
 "manufacturing", "mining_quarrying_and_oil_and_gas_extraction",
 "other_services_except_public_administration",
 "professional_scientific_and_technical_services",
 "real_estate_and_rental_and_leasing", 
 "transportation_and_warehousing",
 "utilities", "wholesale_trade","retail_trade")
Code
test1<-df_ar |>  
    select(doc_id,sectors)|>
    pivot_longer(cols=2:21,
             names_to="sector",
             values_to="value")|>
    # Assume zero means not in this sector.
    # filter(value!=0)        
    mutate(is_zero= ifelse(value==0,"Is zero","Not zero"))%>%
    group_by(doc_id,is_zero) %>%
    count()%>%
    ungroup()

# lo guardo como referencia
write_csv(test1,"data/pregunta_ramas_per_doc_id_arg.csv")


# The representative document has a value different than 0 in 2.24 industries  
test1%>%
    group_by(is_zero)%>%
    summarise(mean_casos=mean(n))
# A tibble: 2 × 2
  is_zero  mean_casos
  <chr>         <dbl>
1 Is zero       17.8 
2 Not zero       2.24
Code
# If I keep the max value for each document I see that 
max_total_rama<-df_ar |>  
    select(doc_id,sectors)|>
    pivot_longer(cols=2:21,
             names_to="sector",
             values_to="value")|>
    group_by(doc_id)|>
    mutate(total=sum(value))%>%
    filter(value==max(value))%>%
    mutate(n=n())%>%
    ungroup()

head(max_total_rama)
# A tibble: 6 × 5
  doc_id                   sector                              value total     n
  <chr>                    <chr>                               <dbl> <dbl> <int>
1 IcBBOjuXZT8BiD2XAAAAAA== other_services_except_public_admin…    79    79     1
2 ct1JrwQ54xivPQpxAAAAAA== educational_services                   99    99     1
3 bo7X77th8vwAAAAAAAAAAA   government                             43    78     1
4 72DNO-KNkrEAAAAAAAAAAA   government                             43    78     1
5 kqPOGYOTJTYAAAAAAAAAAA   professional_scientific_and_techni…    31    31     1
6 If6uaYoqh6CHO4dpAAAAAA== other_services_except_public_admin…    79    79     1
Code
### Facts:
print(paste("Total documents:", nrow(df_ar)))
[1] "Total documents: 23435"
Code
print(paste("Documents with a prediction",max_total_rama%>%
    filter(value>0)%>%
    distinct(doc_id)%>%
    nrow()))
[1] "Documents with a prediction 23069"
Code
print(paste("Documents without a prediction",max_total_rama%>%
    filter(value==0)%>%
    distinct(doc_id)%>%
    nrow()))
[1] "Documents without a prediction 366"
Code
print(paste("Documents with more than 1 prediction",
            "(excluding those with no prediction)",
    max_total_rama%>%
    filter(value>0)%>%
    filter(n>1)%>%
    nrow()))
[1] "Documents with more than 1 prediction (excluding those with no prediction) 2260"
Code
# examples of these cases:
max_total_rama%>%
    filter(value>0)%>%
    filter(n>1)
# A tibble: 2,260 × 5
   doc_id                   sector                             value total     n
   <chr>                    <chr>                              <dbl> <dbl> <int>
 1 vubaCpc3QxIAAAAAAAAAAA   other_services_except_public_admi…    21    66     2
 2 vubaCpc3QxIAAAAAAAAAAA   professional_scientific_and_techn…    21    66     2
 3 FAVNbJ4eY_wLOcg4AAAAAA== professional_scientific_and_techn…    12    24     2
 4 FAVNbJ4eY_wLOcg4AAAAAA== retail_trade                          12    24     2
 5 EzJVBKu6nSupL4lkAAAAAA== professional_scientific_and_techn…    12    24     2
 6 EzJVBKu6nSupL4lkAAAAAA== retail_trade                          12    24     2
 7 Af-RP22V5xoAAAAAAAAAAA   professional_scientific_and_techn…    12    24     2
 8 Af-RP22V5xoAAAAAAAAAAA   retail_trade                          12    24     2
 9 SXIxuzy1_lUAAAAAAAAAAA   professional_scientific_and_techn…    12    24     2
10 SXIxuzy1_lUAAAAAAAAAAA   retail_trade                          12    24     2
# ℹ 2,250 more rows
Code
print(paste("Documents with 100% certainty on 1 prediction",
    max_total_rama%>%
    filter(total==100)%>%
    nrow()))
[1] "Documents with 100% certainty on 1 prediction 167"