Importing, Wrangling & Analyzing Football Wages Data Using the tidyverse in R

This page contains the code that corresponds to the following YouTube video:

 

 

The video was published on the StatistikinDD YouTube channel by Wolf Riepl and explains how to read, wrangle, analyze, and visualize a football wages data set.

Please find the code below:

# install.packages("tidyverse")                   # Install tidyverse packages
library("tidyverse")                              # Load tidyverse packages
 
my_path <- "D:/Dropbox/Jock/Data Sets/"           # Specify directory path
 
tib_sp <- read_csv(str_c(my_path,                 # Import CSV file
                         "SalaryPrediction.csv"))
tib_sp                                            # Print tibble
 
tib_sp %>%                                        # Class of data set
  class()
 
tib_sp %>%                                        # Show entire data set
  View()
 
tib_sp_sel <- tib_sp %>%                          # Extract certain columns
  select(Wage, League, Position, Age)
tib_sp_sel                                        # Print updated tibble
 
lm(Wage ~ ., tib_sp_sel) %>%                      # Estimate regression model
  summary()
 
tib_sp_mean <- tib_sp %>%                         # Group by Age
  group_by(Age) %>%
  dplyr::summarize(mean_Wage = mean(Wage))
tib_sp_mean                                       # Print updated tibble
 
tib_sp_mean %>%                                   # Draw ggplot2 line chart
  ggplot(aes(x = Age,
             y = mean_Wage)) +
  geom_line()
 
tib_sp %>%                                        # Do all at once
  group_by(Age) %>%
  dplyr::summarize(mean_Wage = mean(Wage),
                   .groups = "drop") %>% 
  ggplot(aes(x = Age,
             y = mean_Wage)) +
  geom_line()
 
tib_sp %>%                                        # Group by Age & League
  group_by(Age, League) %>%
  dplyr::summarize(mean_Wage = mean(Wage),
                   .groups = "drop") %>% 
  ggplot(aes(x = Age,
             y = mean_Wage,
             col = League)) +
  geom_line(size = 2)
 
tib_sp %>%                                        # Barplot by League
  group_by(League) %>%
  dplyr::summarize(mean_Wage = mean(Wage),
                   .groups = "drop") %>% 
  ggplot(aes(x = League,
             y = mean_Wage)) +
  geom_col()
 
tib_sp %>%                                        # Barplot by Club
  group_by(Club, League) %>%
  dplyr::summarize(mean_Wage = mean(Wage),
                   .groups = "drop") %>% 
  mutate(Club = reorder(Club, - mean_Wage, FUN = mean)) %>%
  ggplot(aes(x = Club,
             y = mean_Wage,
             fill = League)) +
  geom_col() +
  theme(axis.text.x = element_text(angle = 90,
                                   hjust = 1,
                                   vjust = 0.5))

 

Subscribe to the Statistics Globe Newsletter

Get regular updates on the latest tutorials, offers & news at Statistics Globe.
I hate spam & you may opt out anytime: Privacy Policy.


Leave a Reply

Your email address will not be published. Required fields are marked *

Fill out this field
Fill out this field
Please enter a valid email address.

Top