read in required libraries

library(ggplot2) # for graphics
library(MASS) # for maximum likelihood estimation

read in data from desktop

z <- read.csv("~/Desktop/tair850-2010.csv")
sd(complete.cases(z))
## [1] 0.4670539
z <- z[,2:3]
z$number <- c(1:2400)
str(z)
## 'data.frame':    2400 obs. of  3 variables:
##  $ Var2  : chr  "A" "A" "A" "A" ...
##  $ Freq  : num  277 277 277 277 277 ...
##  $ number: int  1 2 3 4 5 6 7 8 9 10 ...
head(z)
##   Var2     Freq number
## 1    A 276.9623      1
## 2    A 276.7063      2
## 3    A 276.5017      3
## 4    A 277.0096      4
## 5    A 276.5850      5
## 6    A 276.6914      6
z <- na.omit(z)
summary(z)
##      Var2                Freq           number    
##  Length:1629        Min.   :247.2   Min.   :   1  
##  Class :character   1st Qu.:255.2   1st Qu.: 666  
##  Mode  :character   Median :262.3   Median :1298  
##                     Mean   :262.0   Mean   :1260  
##                     3rd Qu.:268.8   3rd Qu.:1865  
##                     Max.   :277.7   Max.   :2394

plot histogram

p1 <- ggplot(data=z, aes(x=Freq, y=..density..)) +
  geom_histogram(color="grey60",fill="cornsilk",size=0.2)
print(p1)
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

add kernel density plot

p1 <-  p1 +  geom_density(linetype="dotted",size=0.75)
print(p1)
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

Normal distribution

fit normal distribution

normPars <- fitdistr(z$Freq,"normal")
print(normPars)
##       mean           sd     
##   261.9704068     8.3076762 
##  (  0.2058349) (  0.1455473)
str(normPars)
## List of 5
##  $ estimate: Named num [1:2] 261.97 8.31
##   ..- attr(*, "names")= chr [1:2] "mean" "sd"
##  $ sd      : Named num [1:2] 0.206 0.146
##   ..- attr(*, "names")= chr [1:2] "mean" "sd"
##  $ vcov    : num [1:2, 1:2] 0.0424 0 0 0.0212
##   ..- attr(*, "dimnames")=List of 2
##   .. ..$ : chr [1:2] "mean" "sd"
##   .. ..$ : chr [1:2] "mean" "sd"
##  $ n       : int 1629
##  $ loglik  : num -5760
##  - attr(*, "class")= chr "fitdistr"
normPars$estimate["mean"] # note structure of getting a named attribute
##     mean 
## 261.9704

probability density for normal dist

meanML <- normPars$estimate["mean"]
sdML <- normPars$estimate["sd"]

xval <- seq(0,max(z$Freq),len=length(z$Freq))

stat <- stat_function(aes(x = xval, y = ..y..), fun = dnorm, colour="red", n = length(z$Freq), args = list(mean = meanML, sd = sdML))

length(stat)
## [1] 11
p1 + stat
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

Exponential distribution

expoPars <- fitdistr(z$Freq,"exponential")
rateML <- expoPars$estimate["rate"]

stat2 <- stat_function(aes(x = xval, y = ..y..), fun = dexp, colour="blue", n = length(z$Freq), args = list(rate=rateML))
 p1 + stat + stat2
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

Uniform distribution

stat3 <- stat_function(aes(x = xval, y = ..y..), fun = dunif, colour="darkgreen", n = length(z$Freq), args = list(min=min(z$Freq), max=max(z$Freq)))
 p1 + stat + stat2 + stat3
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

Gamma distribution

gammaPars <- fitdistr(z$Freq,"gamma")
shapeML <- gammaPars$estimate["shape"]
rateML <- gammaPars$estimate["rate"]

stat4 <- stat_function(aes(x = xval, y = ..y..), fun = dgamma, colour="brown", n = length(z$Freq), args = list(shape=shapeML, rate=rateML))
 p1 + stat + stat2 + stat3 + stat4
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

Beta distribution

pSpecial <- ggplot(data=z, aes(x=Freq/(max(Freq + 0.1)), y=..density..)) +
  geom_histogram(color="grey60",fill="cornsilk",size=0.2) + 
  xlim(c(0,1)) +
  geom_density(size=0.75,linetype="dotted")

betaPars <- fitdistr(x=z$Freq/max(z$Freq +      
  0.1),start=list(shape1=1,shape2=2),"beta")
## Warning in densfun(x, parm[1], parm[2], ...): NaNs produced

## Warning in densfun(x, parm[1], parm[2], ...): NaNs produced

## Warning in densfun(x, parm[1], parm[2], ...): NaNs produced

## Warning in densfun(x, parm[1], parm[2], ...): NaNs produced

## Warning in densfun(x, parm[1], parm[2], ...): NaNs produced

## Warning in densfun(x, parm[1], parm[2], ...): NaNs produced

## Warning in densfun(x, parm[1], parm[2], ...): NaNs produced

## Warning in densfun(x, parm[1], parm[2], ...): NaNs produced

## Warning in densfun(x, parm[1], parm[2], ...): NaNs produced

## Warning in densfun(x, parm[1], parm[2], ...): NaNs produced

## Warning in densfun(x, parm[1], parm[2], ...): NaNs produced

## Warning in densfun(x, parm[1], parm[2], ...): NaNs produced

## Warning in densfun(x, parm[1], parm[2], ...): NaNs produced
  shape1ML <- betaPars$estimate["shape1"]
  shape2ML <- betaPars$estimate["shape2"]

statSpecial <- stat_function(aes(x = xval, y = ..y..), fun = dbeta,   
  colour="orchid", n = length(z$Freq), args =
  list(shape1=shape1ML,shape2=shape2ML))

pSpecial + statSpecial
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## Warning: Removed 2 rows containing missing values (geom_bar).

normal distribution fit the data the best

new normal distribution

r <- rnorm(100,262,0.4670539)
df_r <- as.data.frame(r)
p7 <- ggplot(data=df_r, aes(x=r, y=..density..)) +
  geom_histogram(color="grey60",fill="cornsilk",size=0.2)

p7
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.