####################################
# FUNCTION: zeros
# purpose: calculate the number of zeroes in a numeric vector
# input: x = numeric vector
# output: counter = number of 0s in vector
#----------------------------------
counter <- 0
vector <- c(0,1,2,3,0,4,5,6,0)
zeros <- function(x=vector) {
for(i in seq_along(x)) {
if (x[i] == 0) counter <- counter+1
}
return(counter)
}
zeros(vector)
## [1] 3
count <- length(vector[vector==0])
print(count)
## [1] 3
####################################
# FUNCTION: my_matrix
# purpose: create matrix populated with products of (row number) * (column number)
# input: i= rows; j= columns
# output: matrix
#----------------------------------
my_matrix <- function(i=10,
j=10) {
mat <- matrix(nrow=i,ncol=j)
for(i in 1:nrow(mat)) {
for(j in 1:ncol(mat)) {
mat[i,j] <- (i*j)
}
}
return(mat)
}
my_matrix()
## [,1] [,2] [,3] [,4] [,5] [,6] [,7] [,8] [,9] [,10]
## [1,] 1 2 3 4 5 6 7 8 9 10
## [2,] 2 4 6 8 10 12 14 16 18 20
## [3,] 3 6 9 12 15 18 21 24 27 30
## [4,] 4 8 12 16 20 24 28 32 36 40
## [5,] 5 10 15 20 25 30 35 40 45 50
## [6,] 6 12 18 24 30 36 42 48 54 60
## [7,] 7 14 21 28 35 42 49 56 63 70
## [8,] 8 16 24 32 40 48 56 64 72 80
## [9,] 9 18 27 36 45 54 63 72 81 90
## [10,] 10 20 30 40 50 60 70 80 90 100
4.a. Simulate 3 groups of data with different means, place all data into a data frame
group1 <- round(rnorm(n=10, mean=100, sd=5))
group2 <- round(rnorm(n=10, mean=200, sd=5))
group3 <- round(rnorm(n=10, mean=300, sd=5))
group <- rep(c("group1","group2", "group3"),each=10)
response <- c(group1,group2,group3)
d_frame <- data.frame(group, response)
print(d_frame)
## group response
## 1 group1 99
## 2 group1 84
## 3 group1 100
## 4 group1 105
## 5 group1 101
## 6 group1 96
## 7 group1 102
## 8 group1 111
## 9 group1 97
## 10 group1 99
## 11 group2 198
## 12 group2 207
## 13 group2 192
## 14 group2 194
## 15 group2 197
## 16 group2 202
## 17 group2 196
## 18 group2 198
## 19 group2 198
## 20 group2 197
## 21 group3 302
## 22 group3 299
## 23 group3 303
## 24 group3 299
## 25 group3 302
## 26 group3 298
## 27 group3 302
## 28 group3 301
## 29 group3 301
## 30 group3 302
4.b. Custom function to shuffle response variables and recalculate group means
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
####################################
# FUNCTION: shuffle
# purpose: shuffle response variables, calculate new means
# input: data frame
# output: shuffled data frame & vector of means
#----------------------------------
shuffle_data <- function(z=d_frame) {
shuffled <- data.frame(group=z$group,respose=sample(z$response))
means <- c(mean(shuffled[shuffled$group == "group1",2]),mean(shuffled[shuffled$group == "group2",2]),mean(shuffled[shuffled$group == "group3",2]))
return(list(shuffled,means))
}
shuffle_data()
## [[1]]
## group respose
## 1 group1 298
## 2 group1 301
## 3 group1 299
## 4 group1 97
## 5 group1 101
## 6 group1 99
## 7 group1 302
## 8 group1 198
## 9 group1 111
## 10 group1 197
## 11 group2 194
## 12 group2 302
## 13 group2 198
## 14 group2 197
## 15 group2 303
## 16 group2 100
## 17 group2 207
## 18 group2 96
## 19 group2 102
## 20 group2 301
## 21 group3 84
## 22 group3 99
## 23 group3 299
## 24 group3 105
## 25 group3 198
## 26 group3 302
## 27 group3 202
## 28 group3 192
## 29 group3 196
## 30 group3 302
##
## [[2]]
## [1] 200.3 200.0 197.9
4.c. Repeat function in 4.b. 100 times and store new results in dataframe
d.frame <- data.frame(iter=NA, mean1=NA, mean2=NA, mean3=NA)
for (i in 1:100) {
shuffled <- shuffle_data(d_frame)
unlist(shuffled[[2]])
d.frame[i,] <- c(i, shuffled[[2]][1], shuffled[[2]][2], shuffled[[2]][3])
}
print(d.frame)
## iter mean1 mean2 mean3
## 1 1 199.4 189.3 209.5
## 2 2 208.6 189.9 199.7
## 3 3 199.9 189.3 209.0
## 4 4 239.9 188.4 169.9
## 5 5 220.1 220.0 158.1
## 6 6 180.1 228.7 189.4
## 7 7 200.8 188.9 208.5
## 8 8 200.5 187.3 210.4
## 9 9 211.9 188.0 198.3
## 10 10 168.8 230.4 199.0
## 11 11 208.2 181.2 208.8
## 12 12 239.7 182.1 176.4
## 13 13 248.7 190.8 158.7
## 14 14 179.7 210.2 208.3
## 15 15 220.0 159.1 219.1
## 16 16 180.7 230.5 187.0
## 17 17 187.4 199.7 211.1
## 18 18 187.4 170.9 239.9
## 19 19 219.3 181.7 197.2
## 20 20 189.4 210.9 197.9
## 21 21 181.5 187.1 229.6
## 22 22 218.5 200.2 179.5
## 23 23 198.7 219.7 179.8
## 24 24 209.9 228.9 159.4
## 25 25 209.4 177.6 211.2
## 26 26 232.1 196.9 169.2
## 27 27 229.7 170.7 197.8
## 28 28 159.1 198.8 240.3
## 29 29 218.2 180.4 199.6
## 30 30 199.4 209.3 189.5
## 31 31 182.5 187.5 228.2
## 32 32 199.3 190.4 208.5
## 33 33 170.8 209.9 217.5
## 34 34 191.0 207.0 200.2
## 35 35 249.7 191.8 156.7
## 36 36 211.1 167.3 219.8
## 37 37 219.9 160.4 217.9
## 38 38 157.0 229.6 211.6
## 39 39 179.9 167.9 250.4
## 40 40 198.8 220.9 178.5
## 41 41 209.3 168.6 220.3
## 42 42 179.5 190.9 227.8
## 43 43 187.7 190.4 220.1
## 44 44 270.8 209.3 118.1
## 45 45 176.2 181.6 240.4
## 46 46 209.9 179.5 208.8
## 47 47 187.8 210.0 200.4
## 48 48 212.1 198.8 187.3
## 49 49 210.3 177.7 210.2
## 50 50 231.7 219.0 147.5
## 51 51 198.9 199.2 200.1
## 52 52 229.2 201.2 167.8
## 53 53 206.9 210.9 180.4
## 54 54 209.4 198.0 190.8
## 55 55 168.7 198.9 230.6
## 56 56 240.2 218.4 139.6
## 57 57 220.0 170.6 207.6
## 58 58 217.5 178.9 201.8
## 59 59 179.4 239.0 179.8
## 60 60 209.6 171.5 217.1
## 61 61 157.6 250.4 190.2
## 62 62 179.7 236.8 181.7
## 63 63 226.7 160.6 210.9
## 64 64 199.0 180.9 218.3
## 65 65 160.4 208.9 228.9
## 66 66 199.7 197.6 200.9
## 67 67 201.1 217.7 179.4
## 68 68 191.0 206.8 200.4
## 69 69 200.2 198.2 199.8
## 70 70 197.5 161.6 239.1
## 71 71 186.9 200.5 210.8
## 72 72 219.5 188.0 190.7
## 73 73 180.0 176.9 241.3
## 74 74 189.9 227.5 180.8
## 75 75 188.3 209.0 200.9
## 76 76 190.3 201.2 206.7
## 77 77 209.9 200.5 187.8
## 78 78 218.1 168.9 211.2
## 79 79 217.9 209.1 171.2
## 80 80 159.0 229.8 209.4
## 81 81 228.7 200.8 168.7
## 82 82 218.5 180.7 199.0
## 83 83 167.9 191.0 239.3
## 84 84 178.6 209.0 210.6
## 85 85 230.2 178.5 189.5
## 86 86 207.9 218.7 171.6
## 87 87 220.4 198.7 179.1
## 88 88 209.1 200.2 188.9
## 89 89 207.8 189.7 200.7
## 90 90 197.6 189.1 211.5
## 91 91 230.1 161.3 206.8
## 92 92 208.5 191.4 198.3
## 93 93 209.8 191.6 196.8
## 94 94 217.4 170.3 210.5
## 95 95 198.2 189.2 210.8
## 96 96 222.0 208.8 167.4
## 97 97 141.2 238.4 218.6
## 98 98 217.0 190.9 190.3
## 99 99 158.0 180.2 260.0
## 100 100 239.8 169.4 189.0
4.d. Create histograms to reflect data in c
library(ggplot2)
qplot(x=d.frame$mean1,color=I("black"),fill=I("goldenrod"))
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
qplot(x=d.frame$mean2,color=I("black"),fill=I("goldenrod"))
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
qplot(x=d.frame$mean3,color=I("black"),fill=I("goldenrod"))
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
# Comparison to original means: from the graphs we can see that the new means are all centered around 200. This is same as the mean of group2, but far off from the mean of group1 (100) and the mean of group3 (300). This makes sense that it would center on the average of all the groups combined (mean of group1 + group2 + group3 = 200) - albeit with a larger average standard deviation - since all the responses were randomly shuffled.