- Re-organize code from Homework 8 to follow structured programming principles:
# Description -------------------------------
# Script used to simulate data I may expect to collect based on my research hypothesis and previous work conducted in my field.
# 23 March 2022
# HRS
# Initialize --------------------------------
library(ggplot2)
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
# Load Functions ----------------------------
####################################
# FUNCTION: d_frame
# purpose: create simulated data frame
# input: sample ID sequence, treatments, and days to flowering
# output: data frame of simulated data
#---------------------------------------------
d_frame <- function(ID = 1:120,
Treatment = rep(c("Control","Hot"),each=60),
DaysToFlower = c(control, hot)) {
data_frame <- data.frame(ID, Treatment, DaysToFlower)
return(data_frame)
}
####################################
# FUNCTION: p_val
# purpose: calculate p-value for t-test ran on simulated data
# input: x & y, the two treatment groups
# output: p-value for t-test
#--------------------------------------------
p_val <- function(x = control,
y = hot) {
p_value <- t.test(x,y)$p.value
return(p_value)
}
####################################
# FUNCTION: graph
# purpose: graph simulated data
# input: data frame of simulated data
# output: histogram of simulated data colored by treatment group
#--------------------------------------------
graph <- function() {
histogram <- data %>%
ggplot(aes(x=DaysToFlower, fill=Treatment)) +
geom_histogram() +
scale_fill_manual(values=c("blue", "red"))
return(histogram)
}
# Set Global Variables ----------------------
control <- round(rnorm(n=60, mean=50, sd=5))
hot <- round(rnorm(n=60, mean=65, sd=5))
#Program Body -------------------------------
data <- d_frame()
p_val()
## [1] 6.722988e-31
graph()
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
- Modify above code to record a new summary variable. Copy a function above, rename it, and modify it. Here, modify p-val() function to record another summary statistic, the treatment group means. Then add bars to the graph() output to indicate means on the histogram.
####################################
# FUNCTION: summary
# purpose: calculate summary statistics (p-value and group means) for t-test ran on simulated data
# input: x & y, the two treatment groups
# output: p-value for t-test and group means
#--------------------------------------------
summary <- function(x = control,
y = hot) {
p_value <- t.test(x,y)$p.value
means <- c(mean(control), mean(hot))
return(list(p_value, means))
}
####################################
# FUNCTION: graph
# purpose: graph simulated data, add a bar for each group mean
# input: data frame of simulated data
# output: histogram of simulated data colored by treatment group and with means indicated
#--------------------------------------------
graph <- function() {
histogram <- data %>%
ggplot(aes(x=DaysToFlower, fill=Treatment)) +
geom_histogram() +
scale_fill_manual(values=c("blue", "red")) +
geom_vline(aes(xintercept=mean(control)), col="blue") +
geom_vline(aes(xintercept=mean(hot)), col="red")
return(histogram)
}
# Running the updated code:
data <- d_frame()
summary()
## [[1]]
## [1] 6.722988e-31
##
## [[2]]
## [1] 50.20000 64.71667
graph()
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.