library(data.table)
Sys.setenv(TZ = "UTC")
options(digits=2)
library(randomForest)

#slist <- c("14155848","547446","2398386","14722068","2519718",
#           "5682602","12776226","547238","550119","14595225",
#           "5682100","660436","14462360",
#           "14573303","5630365")
#slist <- c("14155848")
# SELECT ID
args <- commandArgs(trailingOnly = TRUE)
message("ARGUMENTS = ",length(args))
slist <- c(as.character(args[1]))

#slist <- c("14155848")

t1 <- as.POSIXct("2022-05-17 00:00:00 CET")
t2 <- as.POSIXct("2022-07-25 23:59:59 CET")

i=1
for (i in seq_along(slist)) {

  fname <- paste0("oklab_",slist[i],"_oklab.Rdata")
  message(fname)
  load(fname)
  
  str(aggpm10)
  
  # ------------------
  pdfname <- paste0("oklab_",slist[i],"_calibration.pdf")
  pdf(file=pdfname, pointsize=10)

  #pm10org <- aggpm10[,2]
  #pm10org <- get(paste0("aggpm10$id",slist[i]))
  pm10org <- aggpm10[[paste0("id",slist[i])]]
  message(as.character(mean(pm10org,na.rm=T)))
  
  pm10ref <- aggpm10$'idOPC004'

  # ------------------
  size <- c(0,max(pm10ref[!is.na(pm10ref)]))
  plot(pm10ref,pm10org,xlab="PM10 reference EDM164 OPC004",ylab=paste0("PM10 SDS011 ",slist[i]),xlim=size,ylim=size)
  abline(0,1) 

  # fitting by random forests using sds011 and humidity as predictor
  set.seed(2)
  #rhum <- aggrhum[,2]
  #rhum <- get(paste0("aggrhum$id",slist[i]))
  rhum <- aggrhum[[paste0("id",slist[i])]]
  hum <- rhum   [ !is.na(pm10ref) & !is.na(pm10org) & !is.na(rhum)]
  ref <- pm10ref[ !is.na(pm10ref) & !is.na(pm10org) & !is.na(rhum)]
  obs <- pm10org[ !is.na(pm10ref) & !is.na(pm10org) & !is.na(rhum)]
  pm10rftime <- as.POSIXct(aggpm10$time[ !is.na(pm10ref) & !is.na(pm10org) & !is.na(rhum)])

  mask <- seq(1:length(ref))<=length(ref) # all true
  tc1 <- as.POSIXct("2022-07-01 15:45:00",format="%Y-%m-%d %H:%M:%S",tz="UTC")
  tc2 <- as.POSIXct("2022-07-12 15:45:00",format="%Y-%m-%d %H:%M:%S",tz="UTC")
  mask[as.numeric(pm10rftime) >= as.numeric(tc1) & as.numeric(pm10rftime) <= as.numeric(tc2)] <- FALSE
  length(which(mask))

  ref_cal <- ref[mask]
  ref_val <- ref[!mask]
  obs_cal <- obs[mask]
  obs_val <- obs[!mask]
  hum_cal <- hum[mask]
  hum_val <- hum[!mask]
  pm10time_cal <- pm10rftime[mask]
  pm10time_val <- pm10rftime[!mask]

  rfdata <- data.frame(ref=ref_cal,obs=obs_cal,hum=hum_cal)
  message("random forests ...")
  rf <-randomForest(ref~.,data=rfdata, ntree=1000) 
  print(rf)
  rfdata <- data.frame(obs=obs_val,hum=hum_val)
  message("calibration ...")
  pm10rf <- predict(rf, rfdata)

  # ------------------
  points(ref_val,pm10rf,col="red")

  # error estimation
  bias <- mean(pm10rf) - mean(ref_val)
  biaspercent <- ( bias / mean(ref_val) ) *100
  mae <- mean(abs(pm10rf - ref_val),na.rm=T)
  cor2 <- cor(pm10rf,ref_val,use="complete.obs")**2
  prec <- sqrt( sum((pm10rf - ref_val)**2)/length(ref_val) )
  precpercent <- ( prec / mean(ref_val) ) *100
  accuracy <- sqrt(prec**2 - bias**2) # Foken

  message("bias  = ",bias)
  message("bias% = ",biaspercent)
  message("mae   = ",mae)
  message("cor²  = ",cor2)
  message("prec  = ",prec)
  message("prec% = ",precpercent)
  bias <- as.character( format(round(bias, 2), nsmall = 2) )
  biaspercent <- as.character( format(round(biaspercent, 2), nsmall = 2) )
  mae <- as.character( format(round(mae, 2), nsmall = 2) )
  cor2 <- as.character( format(round(cor2, 2), nsmall = 2) )
  prec <- as.character( format(round(prec, 2), nsmall = 2) )
  precpercent <- as.character( format(round(precpercent, 2), nsmall = 2) )

  # ------------------
  #  mtext(paste("bias =",bias," MAE =",mae," r² =",cor2,side=3))
  mtext(paste0("Bias=",bias," (",biaspercent,"%)"," MAE=",mae," r²=",cor2," Precision=",prec," (",precpercent,"%)"),side=3)

  dev.off()
  message("calibration done!")
  
  # ------------------
  pdfname <- paste0("oklab_",slist[i],"_caltimeseries.pdf")
  message(pdfname)
  pdf(file=pdfname, pointsize=10, width=15, height=4)
  plot( pm10time_cal, obs_cal,t="l",xlim=c(t1,t2), main=paste0("SDS011 ",slist[i]," PM10 calibration(blue)/validation(red) data" ),
  xlab="time (UTC)",ylab="PM10 (µg/m³)")
  lines(pm10time_cal,ref_cal,t="l",col="blue")
  lines(pm10time_val,pm10rf,t="l",col="red")
  lines(pm10time_val,ref_val,t="l",col="cyan")
  dev.off()
  message("done!")
  

  # application
  mask <- seq(1:length(ref))<=length(ref) # all true
  tc1 <- as.POSIXct("2022-07-12 14:00:00",format="%Y-%m-%d %H:%M:%S",tz="UTC")
  tc2 <- as.POSIXct("2022-07-13 16:00:00",format="%Y-%m-%d %H:%M:%S",tz="UTC")
  mask[as.numeric(pm10rftime) >= as.numeric(tc1) & as.numeric(pm10rftime) <= as.numeric(tc2)] <- FALSE
  length(which(mask))

  ref_cal <- ref[mask]
  ref_val <- ref[!mask]
  obs_cal <- obs[mask]
  obs_val <- obs[!mask]
  hum_cal <- hum[mask]
  hum_val <- hum[!mask]
  pm10time_cal <- pm10rftime[mask]
  pm10time_val <- pm10rftime[!mask]
  

  #rfdata <- data.frame(ref=ref_cal,obs=obs_cal,hum=hum_cal)
  #message("random forests ...")
  #rf <-randomForest(ref~.,data=rfdata, ntree=1000) 
  #print(rf)
  rfdata <- data.frame(obs=obs_val,hum=hum_val)
  message("recalibration ...")
  pm10rf <- predict(rf, rfdata)


  # ------------------
  pdfname <- paste0("oklab_",slist[i],"_apptimeseries.pdf")
  pdf(file=pdfname, pointsize=10,width=15, height=4)
  plot( pm10time_cal,obs_cal,t="l",xlim=c(t1,t2),ylim=c(0,30),main=paste0("SDS011 ",slist[i]," recalibrated PM10 data" ),
  xlab="time (UTC)",ylab="PM10 (µg/m³)")
  lines(pm10time_cal,ref_cal,t="l",col="blue")
  lines(pm10time_val,pm10rf,t="l",col="red")
  lines(pm10time_val,ref_val,t="l",col="cyan")
  dev.off()

  # ------------------
  pdfname <- paste0("oklab_",slist[i],"_campaigntimeseries.pdf")
  pdf(file=pdfname, pointsize=10,width=15, height=4)
  plot( pm10time_val,pm10rf,t="l",xlim=c(tc1,tc2),ylim=c(0,30),main=paste0("SDS011 ",slist[i]," recalibrated PM10 data (black) 2022-07-12 14:00 to 2022-07-13 18:00 UTC" ),
  xlab="time (UTC)",ylab="PM10 (µg/m³)")
  lines(pm10time_val,obs_val,t="l",col="cyan")
  lines(pm10time_val,ref_val,t="l",col="blue")
  dev.off()

  rfpm10 <- rf
  rfpm10data <- data.frame(time=pm10time_val,ref=ref_val,obs=obs_val,cal=pm10rf)
  fname <- paste0("oklab_",slist[1],"_calib_pm10.Rdata")
  #save(rfpm10,rfpm10data,file=fname,compress=T,compression_level=9)
  save(rfpm10data,file=fname,compress=T,compression_level=9)

}
