# Importing, Wrangling & Analyzing Football Wages Data Using the tidyverse in R

The video was published on the StatistikinDD YouTube channel by Wolf Riepl and explains how to read, wrangle, analyze, and visualize a football wages data set.

```# install.packages("tidyverse")                   # Install tidyverse packages

my_path <- "D:/Dropbox/Jock/Data Sets/"           # Specify directory path

tib_sp <- read_csv(str_c(my_path,                 # Import CSV file
"SalaryPrediction.csv"))
tib_sp                                            # Print tibble

tib_sp %>%                                        # Class of data set
class()

tib_sp %>%                                        # Show entire data set
View()

tib_sp_sel <- tib_sp %>%                          # Extract certain columns
select(Wage, League, Position, Age)
tib_sp_sel                                        # Print updated tibble

lm(Wage ~ ., tib_sp_sel) %>%                      # Estimate regression model
summary()

tib_sp_mean <- tib_sp %>%                         # Group by Age
group_by(Age) %>%
dplyr::summarize(mean_Wage = mean(Wage))
tib_sp_mean                                       # Print updated tibble

tib_sp_mean %>%                                   # Draw ggplot2 line chart
ggplot(aes(x = Age,
y = mean_Wage)) +
geom_line()

tib_sp %>%                                        # Do all at once
group_by(Age) %>%
dplyr::summarize(mean_Wage = mean(Wage),
.groups = "drop") %>%
ggplot(aes(x = Age,
y = mean_Wage)) +
geom_line()

tib_sp %>%                                        # Group by Age & League
group_by(Age, League) %>%
dplyr::summarize(mean_Wage = mean(Wage),
.groups = "drop") %>%
ggplot(aes(x = Age,
y = mean_Wage,
col = League)) +
geom_line(size = 2)

tib_sp %>%                                        # Barplot by League
group_by(League) %>%
dplyr::summarize(mean_Wage = mean(Wage),
.groups = "drop") %>%
ggplot(aes(x = League,
y = mean_Wage)) +
geom_col()

tib_sp %>%                                        # Barplot by Club
group_by(Club, League) %>%
dplyr::summarize(mean_Wage = mean(Wage),
.groups = "drop") %>%
mutate(Club = reorder(Club, - mean_Wage, FUN = mean)) %>%
ggplot(aes(x = Club,
y = mean_Wage,
fill = League)) +
geom_col() +
theme(axis.text.x = element_text(angle = 90,
hjust = 1,
vjust = 0.5))```

Subscribe to the Statistics Globe Newsletter