#————————————————-
file_builder <- function(file_n=10,
file_folder="Homework11Files/",
file_size=c(15,100),
file_na=3){
for(i in seq_len(file_n)){
file_length <- sample(file_size[1]:file_size[2],
size=1)
var_x <- rnorm(n=file_length, mean=0.7, sd=0.5) # create random x
var_y <- runif(file_length) # create random y
df <- data.frame(var_x,var_y) # bind into a dataframe
file_label <- paste(file_folder,
"ranFile",
formatC(i,
width=3,
format="d",
flag="0"),
".csv",sep="")
# set up dta file and incorporate time stamp and minimal metadata
write.table(cat("# Simulated random data file for batch processing homework 11","\n",
"# timestamp: ",as.character(Sys.time()),"\n",
"# GLS","\n",
"# --------------------", "\n",
"\n",
file=file_label,
row.names="",
col.names="",
sep=""))
# now add the data frame
write.table(x=df,
file=file_label,
sep=",",
row.names=FALSE,
append=TRUE)
}
}
#————————————————-
reg_stats <- function(d=NULL){
if(is.null(d)){
x_var <- runif(10)
y_var <- runif(10)
d <- data.frame(x_var,y_var)
}
. <- lm(data=d,d[,2]~d[,1])
. <- summary(.)
stats_list <- list(slope=.$coefficients[2,1],
p_val=.$coefficients[2,4],
r2=.$r.squared)
return(stats_list)
}
#——————————————– # Global variables
file_folder <- "Homework11Files/"
n_files <- 100
file_out <- "StatsSummary_Homework11.csv"
dir.create(file_folder)
file_builder(file_n=n_files)
file_names <- list.files(path=file_folder)
head(file_names)
ID <- seq_along(file_names)
file_name <- file_names
slope <- rep(NA,n_files)
p_val <- rep(NA,n_files)
r2 <- rep(NA,n_files)
stats_out <- data.frame(ID,file_name,slope,p_val,r2)
for(i in seq_along(file_names)) {
data <- read.table(file=paste(file_folder,file_names[i],sep=""),
sep=",",
header=TRUE) #read in next file
d_clean <- data[complete.cases(data),]
. <- reg_stats(d_clean) #regression stats from clean file
stats_out[i,3:5] <- unlist(.)
}
write.table(cat("# Summary stats for ",
"batch processing of regression models homework 11","\n",
"# timestamp: ",as.character(Sys.time()),"\n",
"# GLS","\n",
"# ------------------------", "\n",
"\n",
file=file_out,
row.names="",
col.names="",
sep=""))
## ""
write.table(x=stats_out,
file=file_out,
row.names=FALSE,
col.names=TRUE,
sep=",",
append=TRUE)
hist(stats_out$p_val)