# SELECT SENSOR-ID
# <<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
id <- "id12776226"
# <<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<

args <- commandArgs(trailingOnly = TRUE)
message("ARGUMENTS = ",length(args))
if (length(args)==0) {
  id <- as.POSIXlt(args[1])
}

#library("readxl")
#url <- "https://uniaugsburg-my.sharepoint.com/:x:/g/personal/andreas_philipp_uni-a_de/ETgvm1h53WlHowKks387oqgBRptiupQzXp4Pbu2e1rY-4A?e=7GxXEg&download=1"
#x <- read_excel("periods.xlsx",na="NA")


# READ DATA
load("oklab.Rdata")
load("edm.Rdata")
load("asopc.Rdata")
message("reading done!")

# COPY DATA TO STANDARD VARIABLE NAMES
pm10 <- eval(parse(text=paste0("oklabaggpm10$",id)))
pm25 <- eval(parse(text=paste0("oklabaggpm25$",id)))
temp <- eval(parse(text=paste0("oklabaggtemp$",id)))
temp[temp < -999] <- NA
rhum <- eval(parse(text=paste0("oklabaggrhum$",id)))
rhum[rhum < -999] <- NA
time <- as.POSIXlt(oklabaggpm10$time,tz="UTC")
t1 <- min(time) # 2023-05-23 16:00:00 UTC
t2 <- max(time) # present

# <<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
# DEFINE TRAINING PERIOD FOR TEMP & RHUM:
t1train_th <- as.POSIXct("2023-05-23 16:00:00 UTC")
t2train_th <- as.POSIXct("2023-06-26 23:59:59 UTC")
# DEFINE TRAINING PERIOD FOR PM10 & PM25:
t1train_pm <- as.POSIXct("2023-05-23 16:00:00 UTC")
t2train_pm <- as.POSIXct("2023-06-26 23:59:59 UTC")
# DEFINE WHICH PERIOD SHOULD BE RECALIBRATED (OBSERVATION PERIOD OF FIELD EXPERIMENT)
t1exp <- as.POSIXct("2023-06-27 00:00:00 UTC")
t2exp <- as.POSIXct("2023-07-03 06:00:00 UTC")
# <<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<

#### OPTIONAL: exclude periods for calibration (repeat for more than one exclusion period):
#t1exclude <- as.POSIXct("2023-05-23 16:00:00 UTC")
#t2exclude <- as.POSIXct("2023-05-23 17:00:00 UTC")
# <<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
#temp [ time >= t1exclude & time <= t2exclude ] <- NA
#rhum [ time >= t1exclude & time <= t2exclude ] <- NA
#pm10 [ time >= t1exclude & time <= t2exclude ] <- NA
#pm25 [ time >= t1exclude & time <= t2exclude ] <- NA
# <<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<



# SELECT REFERENCE DATA
tempref <-asopcaggtemp$id177410813
tempreftime <-  as.POSIXlt(asopcaggtemp$time + 2*60*60,tz="UTC")
rhumref <- asopcaggrhum$id177410813
rhumreftime <-  as.POSIXlt(asopcaggrhum$time + 2*60*60,tz="UTC")
pm10ref <- edmaggpm10$idOPC004
pm10reftime <- as.POSIXlt(edmaggpm10$time,tz="UTC")
pm25ref <- edmaggpm25$idOPC004
pm25reftime <- as.POSIXlt(edmaggpm25$time,tz="UTC")

# PREPARE PLOTS
tmin <- min(time)
tmax <- max(time)
message("tmin =",tmin)
message("tmax =",tmax)
tstart <- min(seq(tmin, tmax, by = "hour"))
tstop <- max(seq(tmin, tmax, by = "hour"))

# PLOT PM10
message("plotting time series for pm10 ...")
png(paste0(id,"_pm10_timeser.png"),width=1900, height=600, units="px", res=300, pointsize=3)
pm10max <- min( max(c(pm10,pm10ref),na.rm=TRUE) , 60 )
plot(time,pm10,xlab=paste("time UTC"),ylab="pm10 (µg/m³)",
xlim=c(as.POSIXct(tstart),as.POSIXct(tstop)),ylim=c(0,pm10max),type="n",xaxs="i",yaxs="i",xaxt="n",
main=paste("PM10 oklab 5min-mean (",tmin," - ",tmax," UTC)" ))
ihours <- round( seq( as.POSIXct(tstart), as.POSIXct(tstop),by=3600) , units="hours")
axis.POSIXct(1, x=time, at = ihours, format = "%a %m-%d %H:%M")
abline(v= as.POSIXct(ihours) ,lwd=0.2,col="gray")
abline(h=seq(0,pm10max,by=10),lwd=0.2,col="gray")
lines(pm10reftime,pm10ref,col="black",t="l",lwd=1.5)
lines(time,pm10,xlim=c(as.POSIXct(tstart),as.POSIXct(tstop)),col="red",t="l",lwd=0.3,lty=1)
legend("topright",c("OPC004"),col=c("black"),lty=1,lwd=0.5)
legend("topleft",id,col="red",lty=1,lwd=0.3)
dev.off()

# PLOT PM25
message("plotting time series for pm25 ...")
png(paste0(id,"_pm25_timeser.png"),width=1900, height=600, units="px", res=300, pointsize=3)
pm25max <- min( max(c(pm25,pm25ref),na.rm=TRUE) , 50 )
plot(time,pm25,xlab=paste("time UTC"),ylab="pm25 (µg/m³)",
xlim=c(as.POSIXct(tstart),as.POSIXct(tstop)),ylim=c(0,pm25max),type="n",xaxs="i",yaxs="i",xaxt="n",
main=paste("PM25 oklab 5min-mean (",tmin," - ",tmax," UTC)" ))
ihours <- round( seq( as.POSIXct(tstart), as.POSIXct(tstop),by=3600) , units="hours")
axis.POSIXct(1, x=time, at = ihours, format = "%a %m-%d %H:%M")
abline(v= as.POSIXct(ihours) ,lwd=0.2,col="gray")
abline(h=seq(0,pm25max,by=10),lwd=0.2,col="gray")
lines(pm25reftime,pm25ref,col="black",t="l",lwd=1.5)
lines(time,pm25,xlim=c(as.POSIXct(tstart),as.POSIXct(tstop)),col="red",t="l",lwd=0.3,lty=1)
legend("topright",c("OPC004"),col=c("black"),lty=1,lwd=0.5)
legend("topleft",id,col="red",lty=1,lwd=0.3)
dev.off()

# PLOT TEMP
message("plotting time series for temp ...")
png(paste0(id,"_temp_timeser.png"),width=1900, height=600, units="px", res=300, pointsize=3)
tempmin <- min(c(temp,tempref),na.rm=TRUE)
tempmax <- max(c(temp,tempref),na.rm=TRUE)
plot(time,temp,xlab=paste("time UTC"),ylab="Temperature (°C)",
xlim=c(as.POSIXct(tstart),as.POSIXct(tstop)),ylim=c(tempmin,tempmax),type="n",xaxs="i",yaxs="i",xaxt="n",
main=paste("Temp oklab 5min-mean (",tmin," - ",tmax," UTC)" ))
ihours <- round( seq( as.POSIXct(tstart), as.POSIXct(tstop),by=3600) , units="hours")
axis.POSIXct(1, x=time, at = ihours, format = "%a %m-%d %H:%M")
abline(v= as.POSIXct(ihours) ,lwd=0.2,col="gray")
abline(h=seq(tempmin,tempmax,by=5),lwd=0.2,col="gray")
lines(tempreftime,tempref,col="black",t="l",lwd=1.5)
lines(time,temp,xlim=c(as.POSIXct(tstart),as.POSIXct(tstop)),col="red",t="l",lwd=0.3,lty=1)
legend("topright",c("SHT85"),col=c("black"),lty=1,lwd=0.5)
legend("topleft",id,col="red",lty=1,lwd=0.3)
dev.off()

# PLOT RHUM
message("plotting time series for rhum ...")
png(paste0(id,"_rhum_timeser.png"),width=1900, height=600, units="px", res=300, pointsize=3)
rhummin <- min(c(rhum,rhumref),na.rm=TRUE)
rhummax <- max(c(rhum,rhumref),na.rm=TRUE)
plot(time,rhum,xlab=paste("time UTC"),ylab="Relative Humidity (%)",
xlim=c(as.POSIXct(tstart),as.POSIXct(tstop)),ylim=c(rhummin,rhummax),type="n",xaxs="i",yaxs="i",xaxt="n",
main=paste("Rhum oklab 5min-mean (",tmin," - ",tmax," UTC)" ))
ihours <- round( seq( as.POSIXct(tstart), as.POSIXct(tstop),by=3600) , units="hours")
axis.POSIXct(1, x=time, at = ihours, format = "%a %m-%d %H:%M")
abline(v= as.POSIXct(ihours) ,lwd=0.2,col="gray")
abline(h=seq(rhummin,rhummax,by=5),lwd=0.2,col="gray")
lines(rhumreftime,rhumref,col="black",t="l",lwd=1.5)
lines(time,rhum,xlim=c(as.POSIXct(tstart),as.POSIXct(tstop)),col="red",t="l",lwd=0.3,lty=1)
legend("topright",c("SHT85"),col=c("black"),lty=1,lwd=0.5)
legend("topleft",id,col="red",lty=1,lwd=0.3)
dev.off()

# CREATE MERGED DATA FRAME "okm" OF SAME LENGTH (including NAs)
ok <- data.frame(time=time,pm10=pm10,pm25=pm25,temp=temp,rhum=rhum)
as <- data.frame(time=tempreftime,tempref=tempref,rhumref=rhumref)
edm <- data.frame(time=pm10reftime,pm10ref=pm10ref,pm25ref=pm25ref)
okm <- merge(ok,edm,by="time",all.x=TRUE)
okm <- merge(okm,as,by="time",all.x=TRUE)


# CREATE RAW DATA SCATTER PLOTS
message("plotting scatter plot for pm10 ...")
png(paste0(id,"_pm10_scatter_raw.png"),width=800, height=800, units="px", res=300, pointsize=3)
plot(okm$pm10ref,okm$pm10,xlim=c(0,pm10max),ylim=c(0,pm10max),ylab="PM10 SDS011 (µg/m³)",xlab="PM10 EDM164 (µg/m³)",main=paste("PM10 raw data ",id," versus reference"))
abline(0,1)
dev.off()

message("plotting scatter plot for pm25 ...")
png(paste0(id,"_pm25_scatter_raw.png"),width=800, height=800, units="px", res=300, pointsize=3)
plot(okm$pm25ref,okm$pm25,xlim=c(0,pm25max),ylim=c(0,pm25max),ylab="PM25 SDS011 (µg/m³)",xlab="PM25 EDM164 (µg/m³)",main=paste("PM25 raw data ",id," versus reference"))
abline(0,1)
dev.off()

message("plotting scatter plot for temp ...")
png(paste0(id,"_temp_scatter_raw.png"),width=800, height=800, units="px", res=300, pointsize=3)
plot(okm$tempref,okm$temp,xlim=c(tempmin,tempmax),ylim=c(tempmin,tempmax),ylab="TEMP BME128 (°C)",xlab="TEMP SHT85 (°C)",main=paste("Temp raw data ",id," versus reference"))
abline(0,1)
dev.off()

message("plotting scatter plot for rhum ...")
png(paste0(id,"_rhum_scatter_raw.png"),width=800, height=800, units="px", res=300, pointsize=3)
plot(okm$rhumref,okm$rhum,xlim=c(rhummin,rhummax),ylim=c(rhummin,rhummax),ylab="RHUM BME128 (°C)",xlab="RHUM SHT85 (°C)",main=paste("Rhum raw data ",id," versus reference"))
abline(0,1)
dev.off()


#######################################################################################
# BIAS CORRECTION (ONLY TESTWISE FOR TEMP, IS REPLACED BY ANN BELOW)
# REMOVE NAs
temp <- okm$temp[ ! is.na(okm$temp) & ! is.na(okm$tempref) ]
rhum <- okm$rhum[ ! is.na(okm$rhum) & ! is.na(okm$rhumref) ]
pm10 <- okm$pm10[ ! is.na(okm$pm10) & ! is.na(okm$pm10ref) ]
pm25 <- okm$pm25[ ! is.na(okm$pm25) & ! is.na(okm$pm25ref) ]
tempref <- okm$tempref[ ! is.na(okm$temp) & ! is.na(okm$tempref) ]
rhumref <- okm$rhumref[ ! is.na(okm$rhum) & ! is.na(okm$rhumref) ]
pm10ref <- okm$pm10ref[ ! is.na(okm$pm10) & ! is.na(okm$pm10ref) ]
pm25ref <- okm$pm25ref[ ! is.na(okm$pm25) & ! is.na(okm$pm25ref) ]

# BIAS 
tempbias <- mean(temp) - mean(tempref)
tempcorbias <- temp - tempbias
message("plotting scatter plot for temp ...")
png(paste0(id,"_tempcor_scatter.png"),width=800, height=800, units="px", res=300, pointsize=3)
plot(tempcorbias,tempref,xlim=c(tempmin,tempmax),ylim=c(tempmin,tempmax),xlab="TEMP BME128 (°C)",ylab="TEMP SHT85 (°C)",main=paste("OKlab sensor ",id," versus reference"))
abline(0,1)
dev.off()


#######################################################################################
geccolibrary <- require("gecco")

if ( ! geccolibrary ){
message("library gecco not available! Get it from:")
message("https://saqn.geo.uni-augsburg.de/download/gecco_0.3.tar.gz")
message("and install it by:")
message("R CMD INSTALL gecco_0.3.tar.gz")
}

if ( geccolibrary ){

message("running ann for temp and rhum ...")

# !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
# Train Neural Network for fitting both, oklab-temp (input1) and oklab-rhum (input2) at the same time to reference measurements (SHT85, output1 and output2)
# Note: all values have to be scaled to range between 0 and 1, results are therefore also scaled and have to be rescaled to get accurate units!


# PREPARE DATA FOR TEMP AND RHUM FITTING
n <- 1
i <- 1
scalerange_th <- data.frame(mintempraw=rep(as.double(NA),n),maxtempraw=rep(as.double(NA),n),scaletempraw=rep(as.double(NA),n),
			 mintempref=rep(as.double(NA),n),maxtempref=rep(as.double(NA),n),scaletempref=rep(as.double(NA),n),
                         minrhumraw=rep(as.double(NA),n),maxrhumraw=rep(as.double(NA),n),scalerhumraw=rep(as.double(NA),n),
                         minrhumref=rep(as.double(NA),n),maxrhumref=rep(as.double(NA),n),scalerhumref=rep(as.double(NA),n))


timeann_th <- okm$time[ ! is.na(okm$temp) & ! is.na(okm$tempref) & ! is.na(okm$rhum) & ! is.na(okm$rhumref)]

index_th <- rep(0,length(timeann_th))
index_th [ timeann_th >= t1train_th & timeann_th <= t2train_th ] <- 1
index_th [ timeann_th >= t1exp & timeann_th <= t2exp ] <- 4


temp <- okm$temp[ ! is.na(okm$temp) & ! is.na(okm$tempref) & ! is.na(okm$rhum) & ! is.na(okm$rhumref)]
 input1 <- temp
 scalerange_th$mintempraw[i] <- min(input1)
 scalerange_th$maxtempraw[i] <- max(input1)
 scalerange_th$scaletempraw[i] <- max(input1-min(input1))
 input1s <- (input1-min(input1)) / max(input1-min(input1)) # scale to 0 - 1

rhum <- okm$rhum[ ! is.na(okm$temp) & ! is.na(okm$tempref) & ! is.na(okm$rhum) & ! is.na(okm$rhumref)]
 input2 <- rhum
 scalerange_th$minrhumraw[i] <- min(input2)
 scalerange_th$maxrhumraw[i] <- max(input2)
 scalerange_th$scalerhumraw[i] <- max(input2-min(input2))
 input2s <- (input2-min(input2)) / max(input2-min(input2)) # scale to 0 - 1

tempref <- okm$tempref[ ! is.na(okm$temp) & ! is.na(okm$tempref) & ! is.na(okm$rhum) & ! is.na(okm$rhumref)]
 output1 <- tempref
 scalerange_th$mintempref[i] <- min(output1)
 scalerange_th$maxtempref[i] <- max(output1)
 scalerange_th$scaletempref[i] <- max(output1-min(output1))
 output1s <- (output1-min(output1)) / max(output1-min(output1)) # scale to 0 - 1

rhumref <- okm$rhumref[ ! is.na(okm$temp) & ! is.na(okm$tempref) & ! is.na(okm$rhum) & ! is.na(okm$rhumref)]
 output2 <- rhumref
 scalerange_th$minrhumref[i] <- min(output2)
 scalerange_th$maxrhumref[i] <- max(output2)
 scalerange_th$scalerhumref[i] <- max(output2-min(output2))
 output2s <- (output2-min(output2)) / max(output2-min(output2)) # scale to 0 - 1

 # CONVERT TO MATRIX
 intab <- data.frame(input1=input1s,input2=input2s)
 inmat <- as.matrix(intab)
 outtab <- data.frame(output1=output1s,output2=output2s)
 outmat <- as.matrix(outtab)

 # define index for separation into 1:training, 2:evaluation and 3:testing data subset
 # you can modify the vector "index" to select which data should be used for training and testing
 len <- length(input1[index_th != 4])
 index_th[floor(len*0.6):floor(len*0.8)] <- 2
 index_th[floor(len*0.8):len] <- 3

 # INIT AND TRAIN NEURAL NETWORK
 hl <- c(2,3,3,2)
 anni_th <- initANN(x=inmat,y=outmat,hl=hl,bias=1,transferFun="logsig")
 ann_th <- train(object=anni_th, x=inmat, y=outmat, spIndex=index_th, shuffleDiv=FALSE)
 print("training finished for temp/rhum!")
 summary(ann_th)

 # APPLY MODEL
 prediction <- predict(ann_th,newdata=inmat)
 pre <- as.data.frame(prediction)

 tempmod <- pre$V1 * scalerange_th$scaletempref[i] + scalerange_th$mintempref[i]
 temprecal <- tempmod[ index_th == 4 ]
 tempmod <- tempmod[ index_th == 3 ] # select only last 20% = testing data subset
 tempref <- tempref[ index_th == 3 ] # select only last 20% = testing data subset

 rhummod <- pre$V2 * scalerange_th$scalerhumref[i] + scalerange_th$minrhumref[i]
 rhumrecal <- rhummod[ index_th == 4 ]
 rhummod <- rhummod[ index_th == 3 ] # select only last 20% = testing data subset
 rhumref <- rhumref[ index_th == 3 ] # select only last 20% = testing data subset
 
 timerecal_th <- timeann_th[ index_th == 4 ]

 # ACCURACY METRICS & SCATTER PLOT TEMP
 mae <- mean(abs(tempmod-tempref),na.rm=T)
 bias <- mean(tempmod-tempref,na.rm=T)
 rmse <- sqrt(sum( (tempmod-tempref)**2,na.rm=T)/length(tempmod))
 r2 <- cor(tempmod,tempref,method="spearman")
 message(paste0("temp: mae =",mae," bias =",bias," rmse =",rmse))
 png(paste0(id,"_temp_scatter_ann.png"),width=800, height=800, units="px", res=300, pointsize=3)
 plot(tempref,tempmod,xlim=c(tempmin,tempmax),ylim=c(tempmin,tempmax),ylab="TEMP BME238 (°C)",xlab="TEMP SHT85 (°C)",
      main=paste0("Temp fitted data ",id," mae=",format(round(mae,3),nsmall=3)," bias=",format(round(bias,3),nsmall=3)," rmse=",format(round(rmse,3),nsmall=3)," r²=",format(round(r2,3),nsmall=3)))
 abline(0,1)
 dev.off()

 # ACCURACY METRICS & SCATTER PLOT RHUM
 mae <- mean(abs(rhummod-rhumref),na.rm=T)
 bias <- mean(rhummod-rhumref,na.rm=T)
 rmse <- sqrt(sum( (rhummod-rhumref)**2,na.rm=T)/length(rhummod))
 r2 <- cor(rhummod,rhumref,method="spearman")
 message(paste0("rhum: mae =",mae," bias =",bias," rmse =",rmse))
 png(paste0(id,"_rhum_scatter_ann.png"),width=800, height=800, units="px", res=300, pointsize=3)
 plot(rhumref,rhummod,xlim=c(rhummin,rhummax),ylim=c(rhummin,rhummax),ylab="RHUM BME238 (%)",xlab="RHUM SHT85 (%)",
      main=paste0("Rhum fitted data ",id," mae=",format(round(mae,3),nsmall=3)," bias=",format(round(bias,3),nsmall=3)," rmse=",format(round(rmse,3),nsmall=3)," r²=",format(round(r2,3),nsmall=3)))
 abline(0,1)
 dev.off()

# !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
# Train Neural Network for fitting both, oklab-pm10 (input3) and oklab-pm25 (input4) at the same time to reference measurements (EDM164, output1 and output2)
# ADDITIONAL INPUT IS RHUM AND TEMP IN ORDER TO REDUCE THE HUMIDITY ERROR!
# Note: all values have to be scaled to range between 0 and 1, results are therefore also scaled and have to be rescaled to get accurate units!
# PREPARE DATA FOR PM10 AND PM25 FITTING

message("")
message("running ann for pm10 and pm25 ...")

scalerange_pm <- data.frame(mintempraw=rep(as.double(NA),n),maxtempraw=rep(as.double(NA),n),scaletempraw=rep(as.double(NA),n),
			 mintempref=rep(as.double(NA),n),maxtempref=rep(as.double(NA),n),scaletempref=rep(as.double(NA),n),
                         minrhumraw=rep(as.double(NA),n),maxrhumraw=rep(as.double(NA),n),scalerhumraw=rep(as.double(NA),n),
                         minrhumref=rep(as.double(NA),n),maxrhumref=rep(as.double(NA),n),scalerhumref=rep(as.double(NA),n),
                         minpm25raw=rep(as.double(NA),n),maxpm25raw=rep(as.double(NA),n),scalepm25raw=rep(as.double(NA),n),
                         minpm25ref=rep(as.double(NA),n),maxpm25ref=rep(as.double(NA),n),scalepm25ref=rep(as.double(NA),n),
                         minpm10raw=rep(as.double(NA),n),maxpm10raw=rep(as.double(NA),n),scalepm10raw=rep(as.double(NA),n),
                         minpm10ref=rep(as.double(NA),n),maxpm10ref=rep(as.double(NA),n),scalepm10ref=rep(as.double(NA),n))

timeann_pm <- okm$time [ ! is.na(okm$temp) & ! is.na(okm$tempref) & ! is.na(okm$rhum) & ! is.na(okm$rhumref) & ! is.na(okm$pm10) & ! is.na(okm$pm10ref) & ! is.na(okm$pm25) & ! is.na(okm$pm25ref) ]

index_pm <- rep(0,length(timeann_pm))
index_pm [ timeann_pm >= t1train_pm & timeann_pm <= t2train_pm ] <- 1
index_pm [ timeann_pm >= t1exp & timeann_pm <= t2exp ] <- 4


temp <- okm$temp[ ! is.na(okm$temp) & ! is.na(okm$tempref) & ! is.na(okm$rhum) & ! is.na(okm$rhumref) & ! is.na(okm$pm10) & ! is.na(okm$pm10ref) & ! is.na(okm$pm25) & ! is.na(okm$pm25ref)]
 input1 <- temp
 scalerange_pm$mintempraw[i] <- min(input1)
 scalerange_pm$maxtempraw[i] <- max(input1)
 scalerange_pm$scaletempraw[i] <- max(input1-min(input1))
 input1s <- (input1-min(input1)) / max(input1-min(input1)) # scale to 0 - 1

rhum <- okm$rhum[ ! is.na(okm$temp) & ! is.na(okm$tempref) & ! is.na(okm$rhum) & ! is.na(okm$rhumref) & ! is.na(okm$pm10) & ! is.na(okm$pm10ref) & ! is.na(okm$pm25) & ! is.na(okm$pm25ref)]
 input2 <- rhum
 scalerange_pm$minrhumraw[i] <- min(input2)
 scalerange_pm$maxrhumraw[i] <- max(input2)
 scalerange_pm$scalerhumraw[i] <- max(input2-min(input2))
 input2s <- (input2-min(input2)) / max(input2-min(input2)) # scale to 0 - 1

pm10 <- okm$pm10[ ! is.na(okm$temp) & ! is.na(okm$tempref) & ! is.na(okm$rhum) & ! is.na(okm$rhumref) & ! is.na(okm$pm10) & ! is.na(okm$pm10ref) & ! is.na(okm$pm25) & ! is.na(okm$pm25ref)]
 input3 <- pm10
 scalerange_pm$minpm10raw[i] <- min(input3)
 scalerange_pm$maxpm10raw[i] <- max(input3)
 scalerange_pm$scalepm10raw[i] <- max(input3-min(input3))
 input3s <- (input3-min(input3)) / max(input3-min(input3)) # scale to 0 - 1

pm25 <- okm$pm25[ ! is.na(okm$temp) & ! is.na(okm$tempref) & ! is.na(okm$rhum) & ! is.na(okm$rhumref) & ! is.na(okm$pm10) & ! is.na(okm$pm10ref) & ! is.na(okm$pm25) & ! is.na(okm$pm25ref)]
 input4 <- pm25
 scalerange_pm$minpm25raw[i] <- min(input4)
 scalerange_pm$maxpm25raw[i] <- max(input4)
 scalerange_pm$scalepm25raw[i] <- max(input4-min(input4))
 input4s <- (input4-min(input4)) / max(input4-min(input4)) # scale to 0 - 1


pm10ref <- okm$pm10ref[ ! is.na(okm$temp) & ! is.na(okm$tempref) & ! is.na(okm$rhum) & ! is.na(okm$rhumref) & ! is.na(okm$pm10) & ! is.na(okm$pm10ref) & ! is.na(okm$pm25) & ! is.na(okm$pm25ref)]
 output1 <- pm10ref
 scalerange_pm$minpm10ref[i] <- min(output1)
 scalerange_pm$maxpm10ref[i] <- max(output1)
 scalerange_pm$scalepm10ref[i] <- max(output1-min(output1))
 output1s <- (output1-min(output1)) / max(output1-min(output1)) # scale to 0 - 1

pm25ref <- okm$pm25ref[ ! is.na(okm$temp) & ! is.na(okm$tempref) & ! is.na(okm$rhum) & ! is.na(okm$rhumref) & ! is.na(okm$pm10) & ! is.na(okm$pm10ref) & ! is.na(okm$pm25) & ! is.na(okm$pm25ref)]
 output2 <- pm25ref
 scalerange_pm$minpm25ref[i] <- min(output2)
 scalerange_pm$maxpm25ref[i] <- max(output2)
 scalerange_pm$scalepm25ref[i] <- max(output2-min(output2))
 output2s <- (output2-min(output2)) / max(output2-min(output2)) # scale to 0 - 1

 # CONVERT TO MATRIX
 intab <- data.frame(input1=input1s,input2=input2s,input3=input3s,input4=input4s)
 inmat <- as.matrix(intab)
 outtab <- data.frame(output1=output1s,output2=output2s)
 outmat <- as.matrix(outtab)
 
 # define index for separation into 1:training, 2:evaluation and 3:testing data subset
 # you can modify the vector "index" to select which data should be used for training and testing
 len <- length(input1[index_pm != 4])
 index_pm[floor(len*0.6):floor(len*0.8)] <- 2
 index_pm[floor(len*0.8):len] <- 3

 # INIT AND TRAIN NEURAL NETWORK
 hl <- c(4,4,3,2)
 anni_pm <- initANN(x=inmat,y=outmat,hl=hl,bias=1,transferFun="logsig")
 ann_pm <- train(object=anni_pm, x=inmat, y=outmat, spIndex=index_pm, shuffleDiv=FALSE)
 print("training finished for pm10/pm25!")
 summary(ann_pm)

 # APPLY MODEL
 prediction <- predict(ann_pm,newdata=inmat)
 pre <- as.data.frame(prediction)
 
 pm10mod <- pre$V1 * scalerange_pm$scalepm10ref[i] + scalerange_pm$minpm10ref[i]
 pm10recal <- pm10mod[ index_pm == 4 ]
 pm10mod <- pm10mod[ index_pm == 3 ] # select only last 20% = testing data subset
 pm10ref <- pm10ref[ index_pm == 3 ] # select only last 20% = testing data subset
 
 pm25mod <- pre$V2 * scalerange_pm$scalepm25ref[i] + scalerange_pm$minpm25ref[i]
 pm25recal <- pm25mod[ index_pm == 4 ]
 pm25mod <- pm25mod[ index_pm == 3 ] # select only last 20% = testing data subset
 pm25ref <- pm25ref[ index_pm == 3 ] # select only last 20% = testing data subset
 
 timerecal_pm <- timeann_pm[ index_pm == 4 ]


 # ACCURACY METRICS & SCATTER PLOT PM10
 mae <- mean(abs(pm10mod-pm10ref),na.rm=T)
 bias <- mean(pm10mod-pm10ref,na.rm=T)
 rmse <- sqrt(sum( (pm10mod-pm10ref)**2,na.rm=T)/length(pm10mod))
 r2 <- cor(pm10mod,pm10ref,method="spearman")
 png(paste0(id,"_pm10_scatter_ann.png"),width=800, height=800, units="px", res=300, pointsize=3)
 plot(pm10ref,pm10mod,xlim=c(tempmin,tempmax),ylim=c(tempmin,tempmax),ylab="PM10 SDS011 (µg/m³)",xlab="PM10 EDM164 (µg/m³)",
      main=paste0("PM10 fitted data ",id," mae=",format(round(mae,3),nsmall=3)," bias=",format(round(bias,3),nsmall=3)," rmse=",format(round(rmse,3),nsmall=3)," r²=",format(round(r2,3),nsmall=3)))
 abline(0,1)
 dev.off()

 # ACCURACY METRICS & SCATTER PLOT PM25
 mae <- mean(abs(pm25mod-pm25ref),na.rm=T)
 bias <- mean(pm25mod-pm25ref,na.rm=T)
 rmse <- sqrt(sum( (pm25mod-pm25ref)**2,na.rm=T)/length(pm25mod))
 r2 <- cor(pm25mod,pm25ref,method="spearman")
 png(paste0(id,"_pm25_scatter_ann.png"),width=800, height=800, units="px", res=300, pointsize=3)
 plot(pm10ref,pm10mod,xlim=c(tempmin,tempmax),ylim=c(tempmin,tempmax),ylab="PM10 SDS011 (µg/m³)",xlab="PM10 EDM164 (µg/m³)",
      main=paste0("PM25 fitted data",id," mae=",format(round(mae,3),nsmall=3)," bias=",format(round(bias,3),nsmall=3)," rmse=",format(round(rmse,3),nsmall=3)," r²=",format(round(r2,3),nsmall=3)))
 abline(0,1)
 dev.off()

save(scalerange_th,ann_th,scalerange_pm,ann_pm,file=paste0(id,"_recalibration_ann.Rdata"))


# timeann and fitted values ... 
save(timerecal_th,temprecal,rhumrecal, timerecal_pm,pm10recal,pm25recal ,file=paste0(id,"_recalibrated.Rdata"))

# PREPARE PLOTS FOR RECALIBRATED DATA ONLY
tmin <- min(timerecal_th)
tmax <- max(timerecal_th)
message("tmin th =",tmin)
message("tmax th =",tmax)

 if( length(timerecal_th) > 0 ){

tstart <- min(seq(tmin, tmax, by = "hour"))
tstop <- max(seq(tmin, tmax, by = "hour"))


# PLOT TEMP
message("plotting recalibrated time series for temp ...")
png(paste0(id,"_temp_recalser.png"),width=1900, height=600, units="px", res=300, pointsize=3)
tempmin <- min(c(temprecal,tempref),na.rm=TRUE)
tempmax <- max(c(temprecal,tempref),na.rm=TRUE)
plot(timerecal_th,temprecal,xlab=paste("time UTC"),ylab="Temperature (°C)",
xlim=c(as.POSIXct(tstart),as.POSIXct(tstop)),ylim=c(tempmin,tempmax),type="n",xaxs="i",yaxs="i",xaxt="n",
main=paste("recalibrated Temp oklab 5min-mean (",tmin," - ",tmax," UTC)" ))
ihours <- round( seq( as.POSIXct(tstart), as.POSIXct(tstop),by=3600) , units="hours")
axis.POSIXct(1, x=time, at = ihours, format = "%a %m-%d %H:%M")
abline(v= as.POSIXct(ihours) ,lwd=0.2,col="gray")
abline(h=seq(tempmin,tempmax,by=5),lwd=0.2,col="gray")
#lines(tempreftime,tempref,col="black",t="l",lwd=1.5)
lines(timerecal_th,temprecal,xlim=c(as.POSIXct(tstart),as.POSIXct(tstop)),col="red",t="l",lwd=0.3,lty=1)
legend("topright",c("SHT85"),col=c("black"),lty=1,lwd=0.5)
legend("topleft",id,col="red",lty=1,lwd=0.3)
dev.off()

# PLOT RHUM
message("plotting recalibrated time series for rhum ...")
png(paste0(id,"_rhum_recalser.png"),width=1900, height=600, units="px", res=300, pointsize=3)
rhummin <- min(c(rhum,rhumref),na.rm=TRUE)
rhummax <- max(c(rhum,rhumref),na.rm=TRUE)
plot(timerecal_th,rhumrecal,xlab=paste("time UTC"),ylab="Relative Humidity (%)",
xlim=c(as.POSIXct(tstart),as.POSIXct(tstop)),ylim=c(rhummin,rhummax),type="n",xaxs="i",yaxs="i",xaxt="n",
main=paste("recalibrated Rhum oklab 5min-mean (",tmin," - ",tmax," UTC)" ))
ihours <- round( seq( as.POSIXct(tstart), as.POSIXct(tstop),by=3600) , units="hours")
axis.POSIXct(1, x=time, at = ihours, format = "%a %m-%d %H:%M")
abline(v= as.POSIXct(ihours) ,lwd=0.2,col="gray")
abline(h=seq(rhummin,rhummax,by=5),lwd=0.2,col="gray")
#lines(rhumreftime,rhumref,col="black",t="l",lwd=1.5)
lines(timerecal_th,rhumrecal,xlim=c(as.POSIXct(tstart),as.POSIXct(tstop)),col="red",t="l",lwd=0.3,lty=1)
legend("topright",c("SHT85"),col=c("black"),lty=1,lwd=0.5)
legend("topleft",id,col="red",lty=1,lwd=0.3)
dev.off()

}

# PREPARE PLOTS FOR RECALIBRATED DATA ONLY
tmin <- min(timerecal_pm)
tmax <- max(timerecal_pm)
message("tmin pm =",tmin)
message("tmax pm =",tmax)


if(length(timerecal_pm)>0){
  tstart <- min(seq(tmin, tmax, by = "hour"))
  tstop <- max(seq(tmin, tmax, by = "hour"))

# PLOT PM10
message("plotting recalibrated time series for recalibrated pm10 ...")
png(paste0(id,"_pm10_recalser.png"),width=1900, height=600, units="px", res=300, pointsize=3)
pm10max <- min( max(c(pm10recal,pm10ref),na.rm=TRUE) , 60 )
plot(timerecal_pm,pm10recal,xlab=paste("time UTC"),ylab="pm10 (µg/m³)",
xlim=c(as.POSIXct(tstart),as.POSIXct(tstop)),ylim=c(0,pm10max),type="n",xaxs="i",yaxs="i",xaxt="n",
main=paste("recalibrated PM10 oklab 5min-mean (",tmin," - ",tmax," UTC)" ))
ihours <- round( seq( as.POSIXct(tstart), as.POSIXct(tstop),by=3600) , units="hours")
axis.POSIXct(1, x=time, at = ihours, format = "%a %m-%d %H:%M")
abline(v= as.POSIXct(ihours) ,lwd=0.2,col="gray")
abline(h=seq(0,pm10max,by=10),lwd=0.2,col="gray")
#lines(pm10reftime,pm10ref,col="black",t="l",lwd=1.5)
lines(timerecal_pm,pm10recal,xlim=c(as.POSIXct(tstart),as.POSIXct(tstop)),col="red",t="l",lwd=0.3,lty=1)
legend("topright",c("OPC004"),col=c("black"),lty=1,lwd=0.5)
legend("topleft",id,col="red",lty=1,lwd=0.3)
dev.off()

# PLOT PM25
message("plotting recalibrated time series for pm25 ...")
png(paste0(id,"_pm25_recalser.png"),width=1900, height=600, units="px", res=300, pointsize=3)
pm25max <- min( max(c(pm25recal,pm25ref),na.rm=TRUE) , 50 )
plot(timerecal_pm,pm25recal,xlab=paste("time UTC"),ylab="pm25 (µg/m³)",
xlim=c(as.POSIXct(tstart),as.POSIXct(tstop)),ylim=c(0,pm25max),type="n",xaxs="i",yaxs="i",xaxt="n",
main=paste("recalibrated PM25 oklab 5min-mean (",tmin," - ",tmax," UTC)" ))
ihours <- round( seq( as.POSIXct(tstart), as.POSIXct(tstop),by=3600) , units="hours")
axis.POSIXct(1, x=time, at = ihours, format = "%a %m-%d %H:%M")
abline(v= as.POSIXct(ihours) ,lwd=0.2,col="gray")
abline(h=seq(0,pm25max,by=10),lwd=0.2,col="gray")
#lines(pm25reftime,pm25ref,col="black",t="l",lwd=1.5)
lines(timerecal_pm,pm25recal,xlim=c(as.POSIXct(tstart),as.POSIXct(tstop)),col="red",t="l",lwd=0.3,lty=1)
legend("topright",c("OPC004"),col=c("black"),lty=1,lwd=0.5)
legend("topleft",id,col="red",lty=1,lwd=0.3)
dev.off()

}

}

