(1) LINEAR REGRESSION

Linear Regression: Output

# Function: fitLinRegOutput
# Fits a simple linear regression, provides the output only (without plot)
# inputs: numeric vector of predictor variable 'x' and response variable 'y'
# outputs: slope, p-value, standard error, and t-value
fitLinRegOutput <- function(x=runif(37),y=runif(37)){
  myModel <- lm(y~x)
  myOutput <-c(slope=summary(myModel)$coefficients[2,1],
            pValue=summary(myModel)$coefficients[2,4],
            stdErr=summary(myModel)$coefficients[2,2],
            tValue=summary(myModel)$coefficients[2,3])
  return(myOutput)
}            

# Running function with default settings
fitLinRegOutput()
##      slope     pValue     stdErr     tValue 
## 0.30395909 0.08101533 0.16917467 1.79671751

Linear Regression: Plot

# Function: fitLinRegPlot
# Plots linear regression
# inputs: numeric vector of predictor variable 'x' and response variable 'y'
# output: linear regression plot
fitLinRegPlot <- function(x=runif(20),y=runif(20)){
  myModel <- lm(y~x)
  myOutput <- plot(x=x,y=y, pch=21, bg="turquoise", main = "Linear Regression Example", xlab = "Hours", ylab = "Height (m)")
  abline(myModel)
  return(myOutput)
}

# Running function with default settings
fitLinRegPlot()

## NULL

Testing Linear Regression Functions

# Making matrix for testing function
data <- matrix(c(1:20), ncol = 2, nrow = 10)
x<-data[,1]
y<-data[,2]

# Testing LinRegOutput Function
fitLinRegOutput(x=x,y=y)
##         slope        pValue        stdErr        tValue 
##  1.000000e+00 1.095702e-123  1.773411e-16  5.638852e+15
# Testing LinRegPlot Function
fitLinRegPlot(x=x,y=y)

## NULL

(2) ANOVA

ANOVA: Output

# Function: fitANOVA
# Runs an analysis of variance test on the data, provides the output only (without plot)
# inputs: numeric vectors xVar (predictor) and yVar (response)
# outputs: test statistic, df, p-value


#xVar=as.factor(rep(c("pH_8.1","pH_7.5","pH_7.0"),each=50))
#rand<-seq(from=100,to=350,by=0.5)
#sample(rand,150,replace=TRUE)

fitANOVA <- function(xVar=as.factor(rep(c("pH_8.1","pH_7.5","pH_7.0"),each=50)), yVar= sample(seq(from=100,to=350,by=0.1), 150, replace=TRUE)){
  dataFrame <- data.frame(xVar,yVar)
  myAOVModel <- aov(yVar~xVar,data = dataFrame)
  myAOVOutput<-summary(myAOVModel)
  return(myAOVOutput)
}

# Running function with default settings
fitANOVA()
##              Df Sum Sq Mean Sq F value Pr(>F)
## xVar          2   3077    1539   0.308  0.736
## Residuals   147 734944    5000

ANOVA: Plot

# Function: fitANOVAPlot
# Plots boxplot from analysis of variance
# inputs: numeric vectors xVar (predictor) and yVar (response)
# outputs: boxplot

fitANOVAPlot <- function(xVar=as.factor(rep(c("8.1","7.5","7.0"),each=50)), yVar= sample(seq(from=100,to=350,by=0.1), 150, replace=TRUE)){
  dataFrame <- data.frame(xVar,yVar)
  
  myAOVPlot <- boxplot(yVar~xVar, data=dataFrame, col=c("red", "green", "blue"), main = "ANOVA Plot Example of pH Treatment Impact \n on Larval Urchin Body Size", xlab = "pH treatment", ylab = "body size (micrometers)")
  return(myAOVPlot)
}

# Running function with default settings
fitANOVAPlot()

## $stats
##        [,1]  [,2]  [,3]
## [1,] 100.80 107.8 101.0
## [2,] 157.40 167.8 177.4
## [3,] 202.65 225.9 234.7
## [4,] 271.30 282.9 299.1
## [5,] 335.70 349.9 349.8
## 
## $n
## [1] 50 50 50
## 
## $conf
##          [,1]     [,2]     [,3]
## [1,] 177.1995 200.1814 207.5067
## [2,] 228.1005 251.6186 261.8933
## 
## $out
## numeric(0)
## 
## $group
## numeric(0)
## 
## $names
## [1] "7.0" "7.5" "8.1"

Testing ANOVA Functions

# Making dataframe for testing functions
xVar <- as.factor(rep(c("8.1","7.5","7.0"),each=50))
yVar <- sample(seq(from=100,to=350,by=0.25), 150, replace=TRUE)
dat <- data.frame(xVar,yVar)

# Testing fitANOVA function
fitANOVA(dat)
##              Df Sum Sq Mean Sq F value Pr(>F)
## xVar          2   2688    1344   0.262   0.77
## Residuals   147 755484    5139
# Testing fitANOVAPlot function
fitANOVAPlot(dat)

## $stats
##         [,1]   [,2]   [,3]
## [1,] 106.250 100.50 112.75
## [2,] 164.500 153.25 166.00
## [3,] 233.875 221.25 219.75
## [4,] 268.500 311.25 267.75
## [5,] 343.000 345.25 347.75
## 
## $n
## [1] 50 50 50
## 
## $conf
##          [,1]     [,2]     [,3]
## [1,] 210.6366 185.9456 197.0144
## [2,] 257.1134 256.5544 242.4856
## 
## $out
## numeric(0)
## 
## $group
## numeric(0)
## 
## $names
## [1] "7.0" "7.5" "8.1"

(3) Contingency Table

Contingency Table: Output

# Function: ContTable
# Computes a table from chi-square analysis
# inputs: xVar & yVar (both discrete variables)
# outputs: chi-squared value, df, and p-value

ContTable <- function(xVar=sample(1:100, 3), yVar=sample(1:100, 3)){
  dat <- rbind(xVar,yVar)
  rownames(dat) <- c("Control pH", "Low pH")
  colnames(dat) <- c("Pluteus", "Metamorph", "Juvenile")
CSMod <- chisq.test(dat)
return(print(CSMod))
}

# Running function with default settings
ContTable()
## Warning in chisq.test(dat): Chi-squared approximation may be incorrect
## 
##  Pearson's Chi-squared test
## 
## data:  dat
## X-squared = 11.754, df = 2, p-value = 0.002803

Contingency Table: Plot

# Function: ContTablePlot
# Plots results from contingency table
# inputs: xVar & yVar (both discrete variables)
# outputs: contingency table plot (based on chi-squared analysis)

ContTablePlot <- function(xVar=sample(1:100, 3), yVar=sample(1:100, 3)){
  dat <- rbind(xVar,yVar)
  rownames(dat) <- c("Control pH", "Low pH")
  colnames(dat) <- c("Pluteus", "Metamorph", "Juvenile")
  MyPlot <- mosaicplot(x=dat, col = c("plum","pink","lightgreen"), main = "Contingency Table Plot Example")
  return(MyPlot)
}

# Running function with default settings
ContTablePlot()

## NULL

Testing Contingency Table Functions

# Making 'fake' dataframe for analysis
xVar2 <- c(200,173,168)
yVar2 <- c(103,78,67)
dat2 <- rbind(xVar2,yVar2)

# Testing ContTable function
ContTable(xVar=dat2[1,], yVar=dat2[2,])
## 
##  Pearson's Chi-squared test
## 
## data:  dat
## X-squared = 1.8677, df = 2, p-value = 0.393
# Testing ContTablePlot function
ContTablePlot(xVar=dat2[1,], yVar=dat2[2,])

## NULL

(4) Logistic Regression

Logistic Regression: Output

# Function: LogReg
# Describes the relationship between a dependent binary variable (y) with an independent variable (x)
# inputs: xVar - continuous variable and yVar - discrete variable
# outputs: slope, z-value, p-value, and deviance residuals

LogReg <- function(xVar=rgamma(n=100,shape=5,scale=5), yVar=rbinom(n=100,size=1,p=0.5)) {
  dat <- data.frame(xVar,yVar)
  myModel <- glm(yVar~xVar, data=dat, family=binomial(link="logit"))
return(summary(myModel))
}

# Running function with default settings
LogReg()
## 
## Call:
## glm(formula = yVar ~ xVar, family = binomial(link = "logit"), 
##     data = dat)
## 
## Deviance Residuals: 
##    Min      1Q  Median      3Q     Max  
## -1.238  -1.202   1.113   1.155   1.283  
## 
## Coefficients:
##              Estimate Std. Error z value Pr(>|z|)
## (Intercept)  0.209532   0.438185   0.478    0.633
## xVar        -0.006107   0.014047  -0.435    0.664
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 138.59  on 99  degrees of freedom
## Residual deviance: 138.40  on 98  degrees of freedom
## AIC: 142.4
## 
## Number of Fisher Scoring iterations: 3

Logistic Regression: Plot

# Function: LogRegPlot
# Plots the relationship between a dependent binary variable (y) with an independent variable (x)
# inputs: xVar - continuous variable and yVar - discrete variable
# outputs: logistic regression plot

LogRegPlot <- function(xVar=rgamma(n=100,shape=5,scale=5), yVar=rbinom(n=100,size=1,p=0.5)) {
  dat <- data.frame(xVar,yVar)
  MyPlot <- plot(x=dat$xVar, y=dat$yVar, main = "Example of Logistic Regression", pch=21, bg="blue2", cex=1.5)
  return(MyPlot)
}

# Running function with default settings
LogRegPlot()

## NULL

Testing Logistic Regression Functions

# Making fake variables xVar2 and yVar2 as input
xVar2 <- rgamma(n=500, shape=3, scale=5)
yVar2 <- rbinom(n=500, size=1, p=0.5)

# Testing LogReg function
LogReg(x=xVar2,y=yVar2)
## 
## Call:
## glm(formula = yVar ~ xVar, family = binomial(link = "logit"), 
##     data = dat)
## 
## Deviance Residuals: 
##    Min      1Q  Median      3Q     Max  
## -1.203  -1.189   1.152   1.165   1.214  
## 
## Coefficients:
##              Estimate Std. Error z value Pr(>|z|)
## (Intercept)  0.065009   0.178727   0.364    0.716
## xVar        -0.002663   0.010047  -0.265    0.791
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 693.08  on 499  degrees of freedom
## Residual deviance: 693.00  on 498  degrees of freedom
## AIC: 697
## 
## Number of Fisher Scoring iterations: 3
# Testing LogRegPlot function
LogRegPlot(x=xVar2,y=yVar2)

## NULL