Homework 10

For loop within a function to find number of zeros in numeric vector

####################################
# FUNCTION: zeros
# purpose: calculate the number of zeroes in a numeric vector
# input: x = numeric vector
# output: counter = number of 0s in vector
#----------------------------------
counter <- 0
vector <- c(0,1,2,3,0,4,5,6,0)

zeros <- function(x=vector) {
  for(i in seq_along(x)) {
    if (x[i] == 0) counter <- counter+1
  }
  return(counter)
}
zeros(vector)

## [1] 3

Do 1 but with subsetting instead of function/loop

count <- length(vector[vector==0])
print(count)

## [1] 3

Function for matrix creation

####################################
# FUNCTION: my_matrix
# purpose: create matrix populated with products of (row number) * (column number)
# input: i= rows; j= columns
# output: matrix
#----------------------------------
my_matrix <- function(i=10,
                      j=10) {
  mat <- matrix(nrow=i,ncol=j)
  for(i in 1:nrow(mat)) {
    for(j in 1:ncol(mat)) {
      mat[i,j] <- (i*j)
    }
  }
  return(mat)
}
my_matrix()

##       [,1] [,2] [,3] [,4] [,5] [,6] [,7] [,8] [,9] [,10]
##  [1,]    1    2    3    4    5    6    7    8    9    10
##  [2,]    2    4    6    8   10   12   14   16   18    20
##  [3,]    3    6    9   12   15   18   21   24   27    30
##  [4,]    4    8   12   16   20   24   28   32   36    40
##  [5,]    5   10   15   20   25   30   35   40   45    50
##  [6,]    6   12   18   24   30   36   42   48   54    60
##  [7,]    7   14   21   28   35   42   49   56   63    70
##  [8,]    8   16   24   32   40   48   56   64   72    80
##  [9,]    9   18   27   36   45   54   63   72   81    90
## [10,]   10   20   30   40   50   60   70   80   90   100

4.a. Simulate 3 groups of data with different means, place all data into a data frame

group1 <- round(rnorm(n=10, mean=100, sd=5))
group2 <- round(rnorm(n=10, mean=200, sd=5))
group3 <- round(rnorm(n=10, mean=300, sd=5))
group <- rep(c("group1","group2", "group3"),each=10)
response <- c(group1,group2,group3)
d_frame <- data.frame(group, response)
print(d_frame)

##     group response
## 1  group1       99
## 2  group1       84
## 3  group1      100
## 4  group1      105
## 5  group1      101
## 6  group1       96
## 7  group1      102
## 8  group1      111
## 9  group1       97
## 10 group1       99
## 11 group2      198
## 12 group2      207
## 13 group2      192
## 14 group2      194
## 15 group2      197
## 16 group2      202
## 17 group2      196
## 18 group2      198
## 19 group2      198
## 20 group2      197
## 21 group3      302
## 22 group3      299
## 23 group3      303
## 24 group3      299
## 25 group3      302
## 26 group3      298
## 27 group3      302
## 28 group3      301
## 29 group3      301
## 30 group3      302

4.b. Custom function to shuffle response variables and recalculate group means

library(dplyr)

## 
## Attaching package: 'dplyr'

## The following objects are masked from 'package:stats':
## 
##     filter, lag

## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union

####################################
# FUNCTION: shuffle
# purpose: shuffle response variables, calculate new means
# input: data frame
# output: shuffled data frame & vector of means
#----------------------------------
shuffle_data <- function(z=d_frame) {
  shuffled <- data.frame(group=z$group,respose=sample(z$response))
  means <- c(mean(shuffled[shuffled$group == "group1",2]),mean(shuffled[shuffled$group == "group2",2]),mean(shuffled[shuffled$group == "group3",2]))
  return(list(shuffled,means))
}
shuffle_data()

## [[1]]
##     group respose
## 1  group1     298
## 2  group1     301
## 3  group1     299
## 4  group1      97
## 5  group1     101
## 6  group1      99
## 7  group1     302
## 8  group1     198
## 9  group1     111
## 10 group1     197
## 11 group2     194
## 12 group2     302
## 13 group2     198
## 14 group2     197
## 15 group2     303
## 16 group2     100
## 17 group2     207
## 18 group2      96
## 19 group2     102
## 20 group2     301
## 21 group3      84
## 22 group3      99
## 23 group3     299
## 24 group3     105
## 25 group3     198
## 26 group3     302
## 27 group3     202
## 28 group3     192
## 29 group3     196
## 30 group3     302
## 
## [[2]]
## [1] 200.3 200.0 197.9

4.c. Repeat function in 4.b. 100 times and store new results in dataframe

d.frame <- data.frame(iter=NA, mean1=NA, mean2=NA, mean3=NA)
for (i in 1:100) {
  shuffled <- shuffle_data(d_frame)
  unlist(shuffled[[2]])
  d.frame[i,] <- c(i, shuffled[[2]][1], shuffled[[2]][2], shuffled[[2]][3])
}
print(d.frame)

##     iter mean1 mean2 mean3
## 1      1 199.4 189.3 209.5
## 2      2 208.6 189.9 199.7
## 3      3 199.9 189.3 209.0
## 4      4 239.9 188.4 169.9
## 5      5 220.1 220.0 158.1
## 6      6 180.1 228.7 189.4
## 7      7 200.8 188.9 208.5
## 8      8 200.5 187.3 210.4
## 9      9 211.9 188.0 198.3
## 10    10 168.8 230.4 199.0
## 11    11 208.2 181.2 208.8
## 12    12 239.7 182.1 176.4
## 13    13 248.7 190.8 158.7
## 14    14 179.7 210.2 208.3
## 15    15 220.0 159.1 219.1
## 16    16 180.7 230.5 187.0
## 17    17 187.4 199.7 211.1
## 18    18 187.4 170.9 239.9
## 19    19 219.3 181.7 197.2
## 20    20 189.4 210.9 197.9
## 21    21 181.5 187.1 229.6
## 22    22 218.5 200.2 179.5
## 23    23 198.7 219.7 179.8
## 24    24 209.9 228.9 159.4
## 25    25 209.4 177.6 211.2
## 26    26 232.1 196.9 169.2
## 27    27 229.7 170.7 197.8
## 28    28 159.1 198.8 240.3
## 29    29 218.2 180.4 199.6
## 30    30 199.4 209.3 189.5
## 31    31 182.5 187.5 228.2
## 32    32 199.3 190.4 208.5
## 33    33 170.8 209.9 217.5
## 34    34 191.0 207.0 200.2
## 35    35 249.7 191.8 156.7
## 36    36 211.1 167.3 219.8
## 37    37 219.9 160.4 217.9
## 38    38 157.0 229.6 211.6
## 39    39 179.9 167.9 250.4
## 40    40 198.8 220.9 178.5
## 41    41 209.3 168.6 220.3
## 42    42 179.5 190.9 227.8
## 43    43 187.7 190.4 220.1
## 44    44 270.8 209.3 118.1
## 45    45 176.2 181.6 240.4
## 46    46 209.9 179.5 208.8
## 47    47 187.8 210.0 200.4
## 48    48 212.1 198.8 187.3
## 49    49 210.3 177.7 210.2
## 50    50 231.7 219.0 147.5
## 51    51 198.9 199.2 200.1
## 52    52 229.2 201.2 167.8
## 53    53 206.9 210.9 180.4
## 54    54 209.4 198.0 190.8
## 55    55 168.7 198.9 230.6
## 56    56 240.2 218.4 139.6
## 57    57 220.0 170.6 207.6
## 58    58 217.5 178.9 201.8
## 59    59 179.4 239.0 179.8
## 60    60 209.6 171.5 217.1
## 61    61 157.6 250.4 190.2
## 62    62 179.7 236.8 181.7
## 63    63 226.7 160.6 210.9
## 64    64 199.0 180.9 218.3
## 65    65 160.4 208.9 228.9
## 66    66 199.7 197.6 200.9
## 67    67 201.1 217.7 179.4
## 68    68 191.0 206.8 200.4
## 69    69 200.2 198.2 199.8
## 70    70 197.5 161.6 239.1
## 71    71 186.9 200.5 210.8
## 72    72 219.5 188.0 190.7
## 73    73 180.0 176.9 241.3
## 74    74 189.9 227.5 180.8
## 75    75 188.3 209.0 200.9
## 76    76 190.3 201.2 206.7
## 77    77 209.9 200.5 187.8
## 78    78 218.1 168.9 211.2
## 79    79 217.9 209.1 171.2
## 80    80 159.0 229.8 209.4
## 81    81 228.7 200.8 168.7
## 82    82 218.5 180.7 199.0
## 83    83 167.9 191.0 239.3
## 84    84 178.6 209.0 210.6
## 85    85 230.2 178.5 189.5
## 86    86 207.9 218.7 171.6
## 87    87 220.4 198.7 179.1
## 88    88 209.1 200.2 188.9
## 89    89 207.8 189.7 200.7
## 90    90 197.6 189.1 211.5
## 91    91 230.1 161.3 206.8
## 92    92 208.5 191.4 198.3
## 93    93 209.8 191.6 196.8
## 94    94 217.4 170.3 210.5
## 95    95 198.2 189.2 210.8
## 96    96 222.0 208.8 167.4
## 97    97 141.2 238.4 218.6
## 98    98 217.0 190.9 190.3
## 99    99 158.0 180.2 260.0
## 100  100 239.8 169.4 189.0

4.d. Create histograms to reflect data in c

library(ggplot2)
qplot(x=d.frame$mean1,color=I("black"),fill=I("goldenrod"))

## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

qplot(x=d.frame$mean2,color=I("black"),fill=I("goldenrod"))

## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

qplot(x=d.frame$mean3,color=I("black"),fill=I("goldenrod"))

## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

# Comparison to original means: from the graphs we can see that the new means are all centered around 200. This is same as the mean of group2, but far off from the mean of group1 (100) and the mean of group3 (300). This makes sense that it would center on the average of all the groups combined (mean of group1 + group2 + group3 = 200) - albeit with a larger average standard deviation - since all the responses were randomly shuffled.

Homework 10

Hannah Shafer

3/30/2022