read in required libraries
library(ggplot2) # for graphics
library(MASS) # for maximum likelihood estimation
read in data from desktop
z <- read.csv("~/Desktop/tair850-2010.csv")
sd(complete.cases(z))
## [1] 0.4670539
z <- z[,2:3]
z$number <- c(1:2400)
str(z)
## 'data.frame': 2400 obs. of 3 variables:
## $ Var2 : chr "A" "A" "A" "A" ...
## $ Freq : num 277 277 277 277 277 ...
## $ number: int 1 2 3 4 5 6 7 8 9 10 ...
head(z)
## Var2 Freq number
## 1 A 276.9623 1
## 2 A 276.7063 2
## 3 A 276.5017 3
## 4 A 277.0096 4
## 5 A 276.5850 5
## 6 A 276.6914 6
z <- na.omit(z)
summary(z)
## Var2 Freq number
## Length:1629 Min. :247.2 Min. : 1
## Class :character 1st Qu.:255.2 1st Qu.: 666
## Mode :character Median :262.3 Median :1298
## Mean :262.0 Mean :1260
## 3rd Qu.:268.8 3rd Qu.:1865
## Max. :277.7 Max. :2394
plot histogram
p1 <- ggplot(data=z, aes(x=Freq, y=..density..)) +
geom_histogram(color="grey60",fill="cornsilk",size=0.2)
print(p1)
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
add kernel density plot
p1 <- p1 + geom_density(linetype="dotted",size=0.75)
print(p1)
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
Normal distribution
fit normal distribution
normPars <- fitdistr(z$Freq,"normal")
print(normPars)
## mean sd
## 261.9704068 8.3076762
## ( 0.2058349) ( 0.1455473)
str(normPars)
## List of 5
## $ estimate: Named num [1:2] 261.97 8.31
## ..- attr(*, "names")= chr [1:2] "mean" "sd"
## $ sd : Named num [1:2] 0.206 0.146
## ..- attr(*, "names")= chr [1:2] "mean" "sd"
## $ vcov : num [1:2, 1:2] 0.0424 0 0 0.0212
## ..- attr(*, "dimnames")=List of 2
## .. ..$ : chr [1:2] "mean" "sd"
## .. ..$ : chr [1:2] "mean" "sd"
## $ n : int 1629
## $ loglik : num -5760
## - attr(*, "class")= chr "fitdistr"
normPars$estimate["mean"] # note structure of getting a named attribute
## mean
## 261.9704
probability density for normal dist
meanML <- normPars$estimate["mean"]
sdML <- normPars$estimate["sd"]
xval <- seq(0,max(z$Freq),len=length(z$Freq))
stat <- stat_function(aes(x = xval, y = ..y..), fun = dnorm, colour="red", n = length(z$Freq), args = list(mean = meanML, sd = sdML))
length(stat)
## [1] 11
p1 + stat
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
Exponential distribution
expoPars <- fitdistr(z$Freq,"exponential")
rateML <- expoPars$estimate["rate"]
stat2 <- stat_function(aes(x = xval, y = ..y..), fun = dexp, colour="blue", n = length(z$Freq), args = list(rate=rateML))
p1 + stat + stat2
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
Gamma distribution
gammaPars <- fitdistr(z$Freq,"gamma")
shapeML <- gammaPars$estimate["shape"]
rateML <- gammaPars$estimate["rate"]
stat4 <- stat_function(aes(x = xval, y = ..y..), fun = dgamma, colour="brown", n = length(z$Freq), args = list(shape=shapeML, rate=rateML))
p1 + stat + stat2 + stat3 + stat4
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
Beta distribution
pSpecial <- ggplot(data=z, aes(x=Freq/(max(Freq + 0.1)), y=..density..)) +
geom_histogram(color="grey60",fill="cornsilk",size=0.2) +
xlim(c(0,1)) +
geom_density(size=0.75,linetype="dotted")
betaPars <- fitdistr(x=z$Freq/max(z$Freq +
0.1),start=list(shape1=1,shape2=2),"beta")
## Warning in densfun(x, parm[1], parm[2], ...): NaNs produced
## Warning in densfun(x, parm[1], parm[2], ...): NaNs produced
## Warning in densfun(x, parm[1], parm[2], ...): NaNs produced
## Warning in densfun(x, parm[1], parm[2], ...): NaNs produced
## Warning in densfun(x, parm[1], parm[2], ...): NaNs produced
## Warning in densfun(x, parm[1], parm[2], ...): NaNs produced
## Warning in densfun(x, parm[1], parm[2], ...): NaNs produced
## Warning in densfun(x, parm[1], parm[2], ...): NaNs produced
## Warning in densfun(x, parm[1], parm[2], ...): NaNs produced
## Warning in densfun(x, parm[1], parm[2], ...): NaNs produced
## Warning in densfun(x, parm[1], parm[2], ...): NaNs produced
## Warning in densfun(x, parm[1], parm[2], ...): NaNs produced
## Warning in densfun(x, parm[1], parm[2], ...): NaNs produced
shape1ML <- betaPars$estimate["shape1"]
shape2ML <- betaPars$estimate["shape2"]
statSpecial <- stat_function(aes(x = xval, y = ..y..), fun = dbeta,
colour="orchid", n = length(z$Freq), args =
list(shape1=shape1ML,shape2=shape2ML))
pSpecial + statSpecial
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## Warning: Removed 2 rows containing missing values (geom_bar).
normal distribution fit the data the best
new normal distribution
r <- rnorm(100,262,0.4670539)
df_r <- as.data.frame(r)
p7 <- ggplot(data=df_r, aes(x=r, y=..density..)) +
geom_histogram(color="grey60",fill="cornsilk",size=0.2)
p7
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.