library(data.table)
Sys.setenv(TZ = "UTC")
options(digits=2)
library(randomForest)

slist <- c("14155848","547446","2398386","14722068","2519718",
           "5682602","12776226","547238","550119","14595225",
           "5682100","660436","14462360",
           "14573303","5630365")
#slist <- c("14155848")
#slist <- c("547446")
slist <- c("2398386")
t1 <- as.POSIXct("2022-05-17 00:00:00 CET")
t2 <- as.POSIXct("2022-07-25 23:59:59 CET")

i=1
for (i in seq_along(slist)) {

  pdfname <- paste0("oklab_",slist[i],"_calibration_quality.pdf")
  pdf(file=pdfname, pointsize=10)

  fname <- paste0("oklab_",slist[i],"_oklab.Rdata")
  message(fname)
  load(fname)

  pm10org <- aggpm10[,i+1]
  pm10ref <- aggpm10$'idOPC004'

  size <- c(0,max(pm10ref[!is.na(pm10ref)]))
  plot(pm10ref,pm10org,xlab="PM10 reference EDM164 OPC004",ylab=paste0("PM10 SDS011 ",slist[i]),xlim=size,ylim=size)
  abline(0,1)

  # fitting by random forests using sds011 and humidity as predictor
  set.seed(2)
  rhum <- aggrhum[,i+1]
  hum <- rhum   [ !is.na(pm10ref) & !is.na(pm10org) & !is.na(rhum)]
  ref <- pm10ref[ !is.na(pm10ref) & !is.na(pm10org) & !is.na(rhum)]
  obs <- pm10org[ !is.na(pm10ref) & !is.na(pm10org) & !is.na(rhum)]
  pm10rftime <- as.POSIXct(aggpm10$time[ !is.na(pm10ref) & !is.na(pm10org) & !is.na(rhum)])

  mask <- seq(1:length(ref))<=length(ref) # all true
  tc1 <- as.POSIXct("2022-07-10 15:45:00",format="%Y-%m-%d %H:%M:%S",tz="UTC")
  tc2 <- as.POSIXct("2022-07-12 15:45:00",format="%Y-%m-%d %H:%M:%S",tz="UTC")
  mask[as.numeric(pm10rftime) >= as.numeric(tc1) & as.numeric(pm10rftime) <= as.numeric(tc2)] <- FALSE
  length(which(mask))

  ref_cal <- ref[mask]
  ref_val <- ref[!mask]
  obs_cal <- obs[mask]
  obs_val <- obs[!mask]
  hum_cal <- hum[mask]
  hum_val <- hum[!mask]
  pm10time_cal <- pm10rftime[mask]
  pm10time_val <- pm10rftime[!mask]

  rfdata <- data.frame(ref=ref_cal,obs=obs_cal,hum=hum_cal)
  message("random forests ...")
  rf <-randomForest(ref~.,data=rfdata, ntree=1000) 
  print(rf)
  rfdata <- data.frame(obs=obs_val,hum=hum_val)
  message("calibration ...")
  pm10rf <- predict(rf, rfdata)
  points(ref_val,pm10rf,col="red")

  # error estimation
  bias <- mean(pm10rf) - mean(ref_val)
  biaspercent <- ( bias / mean(ref_val) ) *100
  mae <- mean(abs(pm10rf - ref_val),na.rm=T)
  cor2 <- cor(pm10rf,ref_val,use="complete.obs")**2
  prec <- sqrt( sum((pm10rf - ref_val)**2)/length(ref_val) )
  precpercent <- ( prec / mean(ref_val) ) *100
  accuracy <- sqrt(prec**2 - bias**2) # Foken

  message("bias  = ",bias)
  message("bias% = ",biaspercent)
  message("mae   = ",mae)
  message("cor²  = ",cor2)
  message("prec  = ",prec)
  message("prec% = ",precpercent)
  
  bias <- as.character( format(round(bias, 2), nsmall = 2) )
  biaspercent <- as.character( format(round(biaspercent, 2), nsmall = 2) )
  mae <- as.character( format(round(mae, 2), nsmall = 2) )
  cor2 <- as.character( format(round(cor2, 2), nsmall = 2) )
  prec <- as.character( format(round(prec, 2), nsmall = 2) )
  precpercent <- as.character( format(round(precpercent, 2), nsmall = 2) )
  
  #  mtext(paste("bias =",bias," MAE =",mae," r² =",cor2,side=3))
  mtext(paste0("Bias=",bias," (",biaspercent,"%)"," MAE=",mae," r²=",cor2," Precision=",prec," (",precpercent,"%)"),side=3)

  dev.off()

  # application
  mask <- seq(1:length(ref))<=length(ref) # all true
  tc1 <- as.POSIXct("2022-07-12 16:00:00",format="%Y-%m-%d %H:%M:%S",tz="UTC")
  tc2 <- as.POSIXct("2022-07-13 18:00:00",format="%Y-%m-%d %H:%M:%S",tz="UTC")
  mask[as.numeric(pm10rftime) >= as.numeric(tc1) & as.numeric(pm10rftime) <= as.numeric(tc2)] <- FALSE
  length(which(mask))

  ref_cal <- ref[mask]
  ref_val <- ref[!mask]
  obs_cal <- obs[mask]
  obs_val <- obs[!mask]
  hum_cal <- hum[mask]
  hum_val <- hum[!mask]
  pm10time_cal <- pm10rftime[mask]
  pm10time_val <- pm10rftime[!mask]

  #rfdata <- data.frame(ref=ref_cal,obs=obs_cal,hum=hum_cal)
  #message("random forests ...")
  #rf <-randomForest(ref~.,data=rfdata, ntree=1000) 
  #print(rf)
  rfdata <- data.frame(obs=obs_val,hum=hum_val)
  message("recalibration ...")
  pm10rf <- predict(rf, rfdata)

  rfpm10 <- rf
  rfpm10data <- data.frame(time=pm10time_val,ref=ref_val,obs=obs_val,cal=pm10rf)
  fname <- paste0("oklab_",slist[1],"_calib_pm10.Rdata")
  #save(rfpm10,rfpm10data,file=fname,compress=T,compression_level=9)
  save(rfpm10data,file=fname,compress=T,compression_level=9)

}
