log(X) # natural logarithm of x
log10(X) # logarithm base 10 of x
sqrt(X) # square root of x
abs(X) # absolute value of x
VECTOR <- c(X1,X2,X3,...) # concatenate function to put values together in a vector
VECTOR <- seq(X1,XN, by=INCREMENT) # create a vector of numbers in a sequence from X1 to XN, in increments of INCREMENT
sum(VECTOR, na.rm = T/F) # sum of a set of numbers; set na.rm to T to remove missing values
mean(VECTOR, na.rm = T/F) # mean of a set of numbers; set na.rm to T to remove missing values
sd(VECTOR, na.rm = T/F) # sd of a set of numbers; set na.rm to T to remove missing values
VECTOR[X] # extract value X from a vector
VECTOR[X1:X2] # extract values X1 through X2 from a vector
VECTOR[c(X1,X2)] # extract values X1 and X2 from a vector
VECTOR[-X] # remove value X from a vector
VECTOR[-(X1:X2)] # remove values X1 through X2 from a vector
VECTOR[-c(X1,X2)] # remove values X1 and X2 from a vector
DATA <- data.frame(VARIALBE1=VALUES1,VARIALBE2=VALUES2,...) # create a data frame by inputing values for variables
DATA$VARIABLE # Extract a variable from a data frame
DATA[[X,Y]] # Extract the value in row X and column Y
DATA <- read.csv("FILENAME.csv") # create a data frame by uploading a csv file
head(DATA) # view the top few rows of your data frame
View(DATA) # view the entire data frame in a new tab (note the capital V)
names(DATA) # view the variable (column) names in your data frame
str(DATA) # view the structure of your data frame, including variable names and types
DATA$VARIABLE <- as.factor(DATA$VARIABLE) # convert a variable to a factor variable
DATA$VARIABLE <- as.character(DATA$VARIABLE) # convert a variable to a character variable
DATA$VARIABLE <- as.integer(DATA$VARIABLE) # convert a variable to an integer variable
DATA$VARIABLE <- as.numeric(DATA$VARIABLE) # convert a variable to a numeric variable
library(tidyverse) # load tidyverse
DATA_FILTER <- filter(DATA, VARIABLE==CRITERIA) # Filter data so new data frame includes only the values for a variable the meet a certain CRITERIA. If the variable is a text variable, the CRITERIA need to go inside quotes. CRITERIA can also use <, >, and != (not equal to) in place of ==.
DATA_SELECT <- select(DATA, VARIABLE1, VARIABLE2,...) # select particular variables from a data frame
DATA_MUTATE <- mutate(DATA,NEWVARIABLE = FUNCTION(VARIABLE)) # Create a new variable in the data frame by applying a function to existing variable(s). Functions can include, but are not limited to: log, sum, min, max, mean.
GROUP <- group_by(DATA,VARIABLE) # Group a data frame by a particular variable(s) in preparation for summarizing a data frame. The grouped data frame will not look different than the original, but above the data frame, you will be able to see the number of groups for the variable you grouped by.
DATA_SUMMARY <- summarise(GROUP, SUMMARYVARIABLE1 = FUNCTION(VARIABLE1), SUMMARYVARIABLE2 = FUNCTION(VARIABLE2),...) # Summarize variables in a data frame. Note that the first argument is a grouped data frame created using the group_by function. You can summarize more than one variable at a time with this function.
Note: ggplot2 is part of the tidyverse set of packages
ggplot(DATA,aes(X=INDEPENDENTVARIABLE,Y=DEPENDENTVARIABLE)) # Initialize a ggplot graph by identifying the data frame, independent, and dependent variable. Note that some graphs (e.g., histograms) might only require an independent variable
# Graph components are added after the ggplot function, using a plus sign (+) to add each additional component, as follows
ggplot(DATA,aes(X=INDEPENDENTVARIABLE,Y=DEPENDENTVARIABLE)) +
geom_GRAPHTYPE() +
labs(x = "X AXIS LABEL", y = "Y AXIS LABEL") +
theme_classic()
Note: multiple graph types can be added as components to the same plot
# Histogram (use this to graph the frequency of value for a single variable)
ggplot(DATA,aes(x=INDEPENDENTVARIABLE)) +
geom_histogram() +
labs(x = "X AXIS LABEL", y = "Y AXIS LABEL") +
theme_classic()
# Density (use this to graph the probability of value for a single variable)
ggplot(DATA,aes(x=INDEPENDENTVARIABLE,y=DENSITY)) +
geom_density(stat="identity",linewidth=1)+
labs(x = "X AXIS LABEL", y = "Y AXIS LABEL") +
theme_classic()
# Vertical line (use this to add a vertical line to a graph)
ggplot(DATA,aes(x=INDEPENDENTVARIABLE)) +
geom_vline(aes(xintercept=INTERCEPT),color="COLOR",linewidth=1) +
labs(x = "X AXIS LABEL", y = "Y AXIS LABEL") +
theme_classic()
# Boxplot (use this for a categorical independent variable and a continuous dependent variable)
ggplot(DATA, aes(x=INDEPENDENTVARIABLE, y=DEPENDENTVARIABLE)) +
geom_boxplot() +
labs(x = "X AXIS LABEL", y = "Y AXIS LABEL") +
theme_classic()
# Scatterplot with best fit line (use this for a continuous independent variable and a continuous dependent variable)
ggplot(DATA, aes(x=INDEPENDENTVARIABLE, y=DEPENDENTVARIABLE))+
geom_point() +
geom_smooth(method = "lm") +
labs(x = "X AXIS LABEL", y = "Y AXIS LABEL") +
theme_classic()
# Null model
null <- lm(INDEPENDENTVARIABLE ~ 1, DATA)
# Alternative model
alternative <- lm(INDEPENDENTVARIABLE ~ DEPENDENTVARIABLE, DATA)
# Paired t-test, random effects model
paired <- lm(INDEPENDENTVARIABLE ~ DEPENDENTVARIABLE + (1|SAMPLENUMBER), DATA)
# Extracting coefficients
COEFFICIENTS <- summary(MODELNAME)$coefficients
# Extracting model standard deviation
SIGMA <- summary(MODELNAME)$sigma
# First use the lm function (see previous section) to build your models
# Calculate AIC
AIC(null,altnernative) # You can compare more than two models at a time, so if you have more than one alternative model, you can add them to the list.
Categorical independent variable (only 2 categories), continuous independent variable
# Two sample t-test, equal variances
t.test(DEPENDENTVARIABLE ~ INDEPENDENTVARIABLE, data = DATA, var.equal = TRUE)
# Two sample t-test, unequal variances
t.test(DEPENDENTVARIABLE ~ INDEPENDENTVARIABLE, data = DATA, var.equal = FALSE)
# One sample t-test
t.test(x = DATA$DEPENDENTVARIABLE, mu = TRUEMEAN)
# Paired t-test
paired <- t.test(DEPENDENTVARIABLE ~ INDEPENDENTVARIABLE, data = DATA, paired = TRUE)