R Basics

Mathematical functions

log(X) # natural logarithm of x
log10(X) # logarithm base 10 of x
sqrt(X) # square root of x
abs(X) # absolute value of x

Vector functions

VECTOR <- c(X1,X2,X3,...) # concatenate function to put values together in a vector
VECTOR <- seq(X1,XN, by=INCREMENT) # create a vector of numbers in a sequence from X1 to XN, in increments of INCREMENT

sum(VECTOR, na.rm = T/F) # sum of a set of numbers; set na.rm to T to remove missing values
mean(VECTOR, na.rm = T/F) # mean of a set of numbers; set na.rm to T to remove missing values
sd(VECTOR, na.rm = T/F) # sd of a set of numbers; set na.rm to T to remove missing values

VECTOR[X] # extract value X from a vector
VECTOR[X1:X2] # extract values X1 through X2 from a vector
VECTOR[c(X1,X2)] # extract values X1 and X2 from a vector
VECTOR[-X] # remove value X from a vector
VECTOR[-(X1:X2)] # remove values X1 through X2 from a vector
VECTOR[-c(X1,X2)] # remove values X1 and X2 from a vector

Data frame basics

DATA <- data.frame(VARIALBE1=VALUES1,VARIALBE2=VALUES2,...) # create a data frame by inputing values for variables

DATA$VARIABLE # Extract a variable from a data frame
DATA[[X,Y]] # Extract the value in row X and column Y

Data frames

Creating and viewing data frames

DATA <- read.csv("FILENAME.csv") # create a data frame by uploading a csv file

head(DATA) # view the top few rows of your data frame
View(DATA) # view the entire data frame in a new tab (note the capital V)
names(DATA) # view the variable (column) names in your data frame
str(DATA) # view the structure of your data frame, including variable names and types

Changing variable types

DATA$VARIABLE <- as.factor(DATA$VARIABLE) # convert a variable to a factor variable
DATA$VARIABLE <- as.character(DATA$VARIABLE) # convert a variable to a character variable
DATA$VARIABLE <- as.integer(DATA$VARIABLE) # convert a variable to an integer variable
DATA$VARIABLE <- as.numeric(DATA$VARIABLE) # convert a variable to a numeric variable

Manipulating variables with the tidyverse package

library(tidyverse) # load tidyverse

DATA_FILTER <- filter(DATA, VARIABLE==CRITERIA) # Filter data so new data frame includes only the values for a variable the meet a certain CRITERIA. If the variable is a text variable, the CRITERIA need to go inside quotes. CRITERIA can also use <, >, and != (not equal to) in place of ==.
DATA_SELECT <- select(DATA, VARIABLE1, VARIABLE2,...) # select particular variables from a data frame
DATA_MUTATE <- mutate(DATA,NEWVARIABLE = FUNCTION(VARIABLE)) # Create a new variable in the data frame by applying a function to existing variable(s). Functions can include, but are not limited to: log, sum, min, max, mean.
GROUP <- group_by(DATA,VARIABLE) # Group a data frame by a particular variable(s) in preparation for summarizing a data frame. The grouped data frame will not look different than the original, but above the data frame, you will be able to see the number of groups for the variable you grouped by.
DATA_SUMMARY <- summarise(GROUP, SUMMARYVARIABLE1 = FUNCTION(VARIABLE1), SUMMARYVARIABLE2 = FUNCTION(VARIABLE2),...) # Summarize variables in a data frame. Note that the first argument is a grouped data frame created using the group_by function. You can summarize more than one variable at a time with this function.

Graphing with the ggplot2 package

Note: ggplot2 is part of the tidyverse set of packages

General structure

ggplot(DATA,aes(X=INDEPENDENTVARIABLE,Y=DEPENDENTVARIABLE)) # Initialize a ggplot graph by identifying the data frame, independent, and dependent variable. Note that some graphs (e.g., histograms) might only require an independent variable

# Graph components are added after the ggplot function, using a plus sign (+) to add each additional component, as follows
ggplot(DATA,aes(X=INDEPENDENTVARIABLE,Y=DEPENDENTVARIABLE)) +
  geom_GRAPHTYPE() +
  labs(x = "X AXIS LABEL", y = "Y AXIS LABEL") +
  theme_classic()

Specific graph types

Note: multiple graph types can be added as components to the same plot

# Histogram (use this to graph the frequency of value for a single variable)
ggplot(DATA,aes(x=INDEPENDENTVARIABLE)) +
  geom_histogram() +
  labs(x = "X AXIS LABEL", y = "Y AXIS LABEL") +
  theme_classic()

# Density (use this to graph the probability of value for a single variable)
ggplot(DATA,aes(x=INDEPENDENTVARIABLE,y=DENSITY)) +
  geom_density(stat="identity",linewidth=1)+
  labs(x = "X AXIS LABEL", y = "Y AXIS LABEL") +
  theme_classic()

# Vertical line (use this to add a vertical line to a graph)
ggplot(DATA,aes(x=INDEPENDENTVARIABLE)) +
  geom_vline(aes(xintercept=INTERCEPT),color="COLOR",linewidth=1) +
  labs(x = "X AXIS LABEL", y = "Y AXIS LABEL") +
  theme_classic()

# Boxplot (use this for a categorical independent variable and a continuous dependent variable)
ggplot(DATA, aes(x=INDEPENDENTVARIABLE, y=DEPENDENTVARIABLE)) +
  geom_boxplot() +
  labs(x = "X AXIS LABEL", y = "Y AXIS LABEL") +
  theme_classic()

# Scatterplot with best fit line (use this for a continuous independent variable and a continuous dependent variable)
ggplot(DATA, aes(x=INDEPENDENTVARIABLE, y=DEPENDENTVARIABLE))+
  geom_point() +
  geom_smooth(method = "lm") +
  labs(x = "X AXIS LABEL", y = "Y AXIS LABEL") +
  theme_classic()

Building general linear models

# Null model
null <- lm(INDEPENDENTVARIABLE ~ 1, DATA)

# Alternative model
alternative <- lm(INDEPENDENTVARIABLE ~ DEPENDENTVARIABLE, DATA)

# Paired t-test, random effects model
paired <- lm(INDEPENDENTVARIABLE ~ DEPENDENTVARIABLE + (1|SAMPLENUMBER), DATA)

# Extracting coefficients
COEFFICIENTS <- summary(MODELNAME)$coefficients

# Extracting model standard deviation
SIGMA <- summary(MODELNAME)$sigma

Running statistical tests

Maximum likelihood approach (code has the same structure for all tests)

# First use the lm function (see previous section) to build your models
# Calculate AIC
AIC(null,altnernative) # You can compare more than two models at a time, so if you have more than one alternative model, you can add them to the list.

Classical frequentist

T-tests

Categorical independent variable (only 2 categories), continuous independent variable

# Two sample t-test, equal variances
t.test(DEPENDENTVARIABLE ~ INDEPENDENTVARIABLE, data = DATA, var.equal = TRUE)

# Two sample t-test, unequal variances
t.test(DEPENDENTVARIABLE ~ INDEPENDENTVARIABLE, data = DATA, var.equal = FALSE)

# One sample t-test
t.test(x = DATA$DEPENDENTVARIABLE, mu = TRUEMEAN)

# Paired t-test
paired <- t.test(DEPENDENTVARIABLE ~ INDEPENDENTVARIABLE, data = DATA, paired = TRUE)