##### Data Manipulation in R ##### vec_1 <- c(1, 1, 5, 3, 1, 5) # Create vector object vec_1 # Print vector object data_1 <- data.frame(x1 = c(7, 2, 8, 3, 3, 7), # Create data frame x2 = c("x", "y", "x", "x", "x", "y"), x3 = 11:16) data_1 # Print data frame list_1 <- list(1:5, # Create list vec_1, data_1) list_1 # Print list class(vec_1) # Check class of vector elements vec_2 <- c("a", "b", "a", "c") # Create character vector vec_2 # Create character vector class(vec_2) # Check class of vector elements vec_3 <- factor(c("gr1", "gr1", "gr2", "gr3", "gr2")) # Create factor vector vec_3 # Print factor vector class(vec_3) # Check class of vector elements vec_4 <- as.character(vec_3) # Convert factor to character vec_4 # Print updated vector class(vec_4) # Check class of updated vector elements data_2 <- data_1 # Create duplicate of data frame data_2$x4 <- vec_1 # Add new column to data frame data_2 # Print updated data frame data_3 <- data_2[ , colnames(data_2) != "x2"] # Remove column from data frame data_3 # Print updated data frame data_4 <- data_3 # Create duplicate of data frame colnames(data_4) <- c("col_A", "col_B", "col_C") # Change column names data_4 # Print updated data frame data_5 <- rbind(data_3, 101:103) # Add new row to data frame data_5 # Print updated data frame data_6 <- data_5[data_5$x1 > 3, ] # Remove rows from data frame data_6 # Print updated data frame data_7 <- data.frame(ID = 101:106, # Create first data frame x1 = letters[1:6], x2 = letters[6:1]) data_7 # Print first data frame data_8 <- data.frame(ID = 104:108, # Create second data frame y1 = 1:5, y2 = 5:1, y3 = 5) data_8 # Print second data frame data_9 <- merge(x = data_7, # Merge two data frames y = data_8, by = "ID", all = TRUE) data_9 # Print merged data frame vec_5 <- vec_1 # Create duplicate of vector vec_5[vec_5 == 1] <- 99 # Replace certain value in vector vec_5 # Print updated vector data_10 <- data_1 # Create duplicate of data frame data_10$x2[data_10$x2 == "y"] <- "new" # Replace values in column data_10 # Print updated data frame getwd() # Get current working directory setwd("C:/Users/Joach/Desktop/my directory") getwd() # Get current working directory write.csv(data_10, # Export data frame to CSV file "data_10.csv", row.names = FALSE) data_11 <- read.csv("data_10.csv") # Import data frame from CSV file data_11 # Print imported data frame ##### Creating Graphics in R ##### data(iris) # Load iris data set head(iris) # Print head of iris data set plot(x = iris$Sepal.Length, # Draw Base R scatterplot y = iris$Sepal.Width, col = iris$Species) plot(density(x = iris$Sepal.Length)) # Draw Base R density plot hist(x = iris$Sepal.Length) # Draw Base R histogram boxplot(iris$Sepal.Length ~ iris$Species) # Draw Base R boxplot install.packages("ggplot2") # Install ggplot2 package library("ggplot2") # Load ggplot2 ggplot(iris, # Draw ggplot2 scatterplot aes(x = Sepal.Length, y = Sepal.Width, col = Species)) + geom_point() ggplot(iris, # Draw ggplot2 density plot aes(x = Sepal.Length)) + geom_density() ggplot(iris, # Draw multiple ggplot2 density plots aes(x = Sepal.Length, col = Species)) + geom_density() ggplot(iris, # Fill ggplot2 density plots aes(x = Sepal.Length, col = Species, fill = Species)) + geom_density() ggplot(iris, # Opacity of ggplot2 density plots aes(x = Sepal.Length, col = Species, fill = Species)) + geom_density(alpha = 0.3) ggplot(iris, # Draw ggplot2 histogram aes(x = Sepal.Length)) + geom_histogram() ggplot(iris, # Draw ggplot2 boxplot aes(x = Species, y = Sepal.Length)) + geom_boxplot() ggplot(iris, # Add colors to ggplot2 boxplot aes(x = Species, y = Sepal.Length, fill = Species)) + geom_boxplot() iris_groups <- iris # Create duplicate of iris data set iris_groups$Sub <- letters[1:3] # Add subgroups to data iris_groups <- aggregate(formula = Sepal.Length ~ Species + Sub, # Mean by subgroup data = iris_groups, FUN = mean) iris_groups # Print aggregated iris data set ggplot(iris_groups, # Draw ggplot2 barplot aes(x = Species, y = Sepal.Length)) + geom_bar(stat = "identity") ggplot(iris_groups, # Draw stacked ggplot2 barplot aes(x = Species, y = Sepal.Length, fill = Sub)) + geom_bar(stat = "identity") ggplot(iris_groups, # Draw grouped ggplot2 barplot aes(x = Species, y = Sepal.Length, fill = Sub)) + geom_bar(stat = "identity", position = "dodge") ##### Data Analysis & Descriptive Statistics in R ##### mean(vec_1) # Calculate mean median(vec_1) # Calculate median min(vec_1) # Calculate minimum max(vec_1) # Calculate maximum sum(vec_1) # Calculate sum var(vec_1) # Calculate variance sd(vec_1) # Calculate standard deviation summary(vec_1) # Calculate multiple descriptive statistics table(vec_1) # Create frequency table table(data_1[ , c("x1", "x2")]) # Create contingency table mod_1 <- lm(formula = Sepal.Width ~ Sepal.Length, # Estimate linear regression model data = iris) summary(mod_1) # Summary statistics of model ggplot(iris, # Draw scatterplot with regression line aes(x = Sepal.Length, y = Sepal.Width)) + geom_point() + geom_smooth(method = "lm") mod_2 <- lm(formula = Sepal.Width ~ Sepal.Length + Species, # Model wit multiple predictors data = iris) summary(mod_2) # Summary statistics of model ggplot(iris, # Draw multiple regression lines aes(x = Sepal.Length, y = Sepal.Width, col = Species)) + geom_point() + geom_smooth(method = "lm") ##### Advanced Techniques in R ##### vec_6 <- numeric() # Create empty numeric vector vec_6 # Print empty numeric vector for(i in 1:length(vec_1)) { # Apply for loop to vector vec_6[i] <- vec_1[i] + i } vec_1 # Print vec_1 for comparison vec_6 # Print new vector data_12 <- data_1 # Create duplicate of data frame data_12$x4 <- NA # Add new column containing only NA data_12 # Print new data frame for(i in 1:nrow(data_1)) { # Loop over rows of data frame data_12$x4[i] <- data_12$x1[i] + i * data_12$x3[i] } data_12 # Print updated data frame vec_7 <- character() # Create empty character vector vec_7 # Print empty character vector for(i in 1:length(vec_1)) { # for loop & nested if else statement if(vec_1[i] > 3) { vec_7[i] <- "high" } else { vec_7[i] <- "low" } } vec_7 # Print updated vector vec_8 <- ifelse(test = vec_1 > 3, # Apply ifelse function yes = "high", no = "low") vec_8 # Print new vector fun_1 <- function(x) { # Create simple user-defined function out <- x^2 + 5 * x out } fun_1(x = vec_1) # Apply simple user-defined function fun_2 <- function(x, y) { # Create complex user-defined function if(y > 3) { out <- (x^2 + 5 * x) / y } else { out <- (x^2 + 5 * x) / (10 * y) } out } for(i in 1:5) { # Complex user-defined function in for loop print(paste0("This is the result of iteration ", i, ": ", fun_2(x = 5, y = i))) }