1. Re-organize code from Homework 8 to follow structured programming principles:
# Description -------------------------------
# Script used to simulate data I may expect to collect based on my research hypothesis and previous work conducted in my field.
# 23 March 2022
# HRS
# Initialize --------------------------------
library(ggplot2)
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
# Load Functions ----------------------------
####################################
# FUNCTION: d_frame
# purpose: create simulated data frame
# input: sample ID sequence, treatments, and days to flowering
# output: data frame of simulated data
#---------------------------------------------
d_frame <- function(ID = 1:120,
                   Treatment = rep(c("Control","Hot"),each=60),
                   DaysToFlower = c(control, hot)) {
  data_frame <- data.frame(ID, Treatment, DaysToFlower)
  return(data_frame)
}

####################################
# FUNCTION: p_val
# purpose: calculate p-value for t-test ran on simulated data
# input: x & y, the two treatment groups
# output: p-value for t-test
#--------------------------------------------
p_val <- function(x = control,
                  y = hot) {
  p_value <- t.test(x,y)$p.value
  return(p_value)
}

####################################
# FUNCTION: graph
# purpose: graph simulated data
# input: data frame of simulated data
# output: histogram of simulated data colored by treatment group
#--------------------------------------------
graph <- function() {
  histogram <- data %>%
  ggplot(aes(x=DaysToFlower, fill=Treatment)) +
    geom_histogram() +
  scale_fill_manual(values=c("blue", "red"))
  return(histogram)
}
# Set Global Variables ----------------------
control <- round(rnorm(n=60, mean=50, sd=5))
hot <- round(rnorm(n=60, mean=65, sd=5))
#Program Body -------------------------------
data <- d_frame()
p_val()
## [1] 6.722988e-31
graph()
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

  1. Modify above code to record a new summary variable. Copy a function above, rename it, and modify it. Here, modify p-val() function to record another summary statistic, the treatment group means. Then add bars to the graph() output to indicate means on the histogram.
####################################
# FUNCTION: summary
# purpose: calculate summary statistics (p-value and group means) for t-test ran on simulated data
# input: x & y, the two treatment groups
# output: p-value for t-test and group means
#--------------------------------------------
summary <- function(x = control,
                  y = hot) {
  p_value <- t.test(x,y)$p.value
  means <- c(mean(control), mean(hot))
  return(list(p_value, means))
}

####################################
# FUNCTION: graph
# purpose: graph simulated data, add a bar for each group mean
# input: data frame of simulated data
# output: histogram of simulated data colored by treatment group and with means indicated
#--------------------------------------------
graph <- function() {
  histogram <- data %>%
  ggplot(aes(x=DaysToFlower, fill=Treatment)) +
    geom_histogram() +
  scale_fill_manual(values=c("blue", "red")) +
    geom_vline(aes(xintercept=mean(control)), col="blue") +
    geom_vline(aes(xintercept=mean(hot)), col="red")
  return(histogram)
}
# Running the updated code:
data <- d_frame()
summary()
## [[1]]
## [1] 6.722988e-31
## 
## [[2]]
## [1] 50.20000 64.71667
graph()
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.