library(ggplot2) # for graphics
## Warning: package 'ggplot2' was built under R version 4.1.2
library(MASS) # for maximum likelihood estimation
starfish <- read.table("C:/Users/andre/Desktop/Comp-Bio_381/TableS3_FigS3_data.csv",header=TRUE,sep=",")
str(starfish)
## 'data.frame': 124 obs. of 9 variables:
## $ experiment : chr "one" "one" "one" "one" ...
## $ tank : chr "one" "one" "one" "one" ...
## $ species : chr "Evasterias" "Evasterias" "Evasterias" "Evasterias" ...
## $ treatment : chr "4 Eva" "4 Eva" "4 Eva" "4 Eva" ...
## $ arm_length : int 39 41 49 56 33 39 48 57 40 45 ...
## $ start_weight : num 9.23 8.18 6.66 21.32 4.55 ...
## $ end_weight : num 10.28 9.54 7.1 24.58 3.98 ...
## $ change_weight : num 1.05 1.366 0.44 3.268 -0.576 ...
## $ percent_change: num 11.4 16.7 6.6 15.3 -12.7 ...
summary(starfish)
## experiment tank species treatment
## Length:124 Length:124 Length:124 Length:124
## Class :character Class :character Class :character Class :character
## Mode :character Mode :character Mode :character Mode :character
##
##
##
## arm_length start_weight end_weight change_weight
## Min. :29.00 Min. : 4.553 Min. : 3.977 Min. :-9.600
## 1st Qu.:45.00 1st Qu.: 16.338 1st Qu.: 17.745 1st Qu.: 0.600
## Median :54.00 Median : 25.637 Median : 27.392 Median : 1.863
## Mean :53.68 Mean : 33.562 Mean : 36.507 Mean : 2.945
## 3rd Qu.:62.00 3rd Qu.: 46.340 3rd Qu.: 50.475 3rd Qu.: 4.622
## Max. :78.00 Max. :128.400 Max. :127.000 Max. :24.400
## percent_change
## Min. :-15.812
## 1st Qu.: 1.914
## Median : 8.384
## Mean : 11.690
## 3rd Qu.: 17.220
## Max. :100.000
p1 <- ggplot(data=starfish, aes(x=arm_length, y=..density..)) +
geom_histogram(color="grey60",fill="cornsilk",size=0.2)
print(p1)
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
p1 <- p1 + geom_density(linetype="dotted",size=0.75)
print(p1)
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
normPars <- fitdistr(starfish$arm_length,"normal")
print(normPars)
## mean sd
## 53.6774194 11.5643699
## ( 1.0385111) ( 0.7343382)
str(normPars)
## List of 5
## $ estimate: Named num [1:2] 53.7 11.6
## ..- attr(*, "names")= chr [1:2] "mean" "sd"
## $ sd : Named num [1:2] 1.039 0.734
## ..- attr(*, "names")= chr [1:2] "mean" "sd"
## $ vcov : num [1:2, 1:2] 1.079 0 0 0.539
## ..- attr(*, "dimnames")=List of 2
## .. ..$ : chr [1:2] "mean" "sd"
## .. ..$ : chr [1:2] "mean" "sd"
## $ n : int 124
## $ loglik : num -479
## - attr(*, "class")= chr "fitdistr"
normPars$estimate["mean"] # note structure of getting a named attribute
## mean
## 53.67742
normPars$estimate["sd"]
## sd
## 11.56437
normal
probability densitymeanML <- normPars$estimate["mean"]
sdML <- normPars$estimate["sd"]
xval <- seq(0,max(starfish$arm_length),len=length(starfish$arm_length))
stat <- stat_function(aes(x = xval, y = ..y..), fun = dnorm, colour="red", n = length(starfish$arm_length), args = list(mean = meanML, sd = sdML))
p1 + stat
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
exponential
probability densityexpoPars <- fitdistr(starfish$arm_length,"exponential")
rateML <- expoPars$estimate["rate"]
stat2 <- stat_function(aes(x = xval, y = ..y..), fun = dexp, colour="blue", n = length(starfish$arm_length), args = list(rate=rateML))
p1 + stat + stat2
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
uniform
probability densitystat3 <- stat_function(aes(x = xval, y = ..y..), fun = dunif, colour="darkgreen", n = length(starfish$arm_length), args = list(min=min(starfish$arm_length), max=max(starfish$arm_length)))
p1 + stat + stat2 + stat3
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
gamma
probability densitygammaPars <- fitdistr(starfish$arm_length,"gamma")
## Warning in densfun(x, parm[1], parm[2], ...): NaNs produced
shapeML <- gammaPars$estimate["shape"]
rateML <- gammaPars$estimate["rate"]
stat4 <- stat_function(aes(x = xval, y = ..y..), fun = dgamma, colour="brown", n = length(starfish$arm_length), args = list(shape=shapeML, rate=rateML))
p1 + stat + stat2 + stat3 + stat4
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
beta
probability densitypSpecial <- ggplot(data=starfish, aes(x=arm_length/(max(arm_length + 0.1)), y=..density..)) +
geom_histogram(color="grey60",fill="cornsilk",size=0.2) +
xlim(c(0,1)) +
geom_density(size=0.75,linetype="dotted")
betaPars <- fitdistr(x=starfish$arm_length/max(starfish$arm_length + 0.1),start=list(shape1=1,shape2=2),"beta")
## Warning in densfun(x, parm[1], parm[2], ...): NaNs produced
## Warning in densfun(x, parm[1], parm[2], ...): NaNs produced
## Warning in densfun(x, parm[1], parm[2], ...): NaNs produced
shape1ML <- betaPars$estimate["shape1"]
shape2ML <- betaPars$estimate["shape2"]
statSpecial <- stat_function(aes(x = xval, y = ..y..), fun = dbeta, colour="orchid", n = length(starfish$arm_length), args = list(shape1=shape1ML,shape2=shape2ML))
pSpecial + statSpecial
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## Warning: Removed 2 rows containing missing values (geom_bar).
The best fitting distribution for this dataset is the gamma distribution, although the normal distribution is close as well.
newdata <- rgamma(n=(length(starfish$arm_length)),shape=shapeML, rate=rateML)
newdata <- data.frame(1:(length(starfish$arm_length)),newdata)
names(newdata) <- list("ID","myVar")
newdata <- newdata[newdata$myVar>0,]
str(newdata)
## 'data.frame': 124 obs. of 2 variables:
## $ ID : int 1 2 3 4 5 6 7 8 9 10 ...
## $ myVar: num 39.7 42.7 62.8 88.2 46.2 ...
summary(newdata$myVar)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 27.99 44.70 50.15 51.62 58.19 88.18
p2 <- ggplot(data=newdata, aes(x=myVar, y=..density..)) +
geom_histogram(color="grey60",fill="cornsilk",size=0.2)
print(p2)
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.