message("")
message("#########################################")
message("#########################################")
message("#########################################")


# SELECT SENSOR-ID
# <<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
id <- "id12776226"
id <- "id2398386"
# <<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<

args <- commandArgs(trailingOnly = TRUE)
message("ARGUMENTS = ",length(args))
if (length(args)==1) {
  id <- args[1]
}
message("ID = ",id)

# READ DATA
load("oklab.Rdata")
load("edm.Rdata")
load("asopc.Rdata")
message("reading done!")

# COPY DATA TO STANDARD VARIABLE NAMES
pm10 <- eval(parse(text=paste0("oklabaggpm10$",id)))
pm25 <- eval(parse(text=paste0("oklabaggpm25$",id)))
temp <- eval(parse(text=paste0("oklabaggtemp$",id)))
rhum <- eval(parse(text=paste0("oklabaggrhum$",id)))
time <- as.POSIXlt(oklabaggpm10$time,tz="UTC")

# FILTER
pm10[pm10<1|pm10>85] <- NA
pm25[pm25<1|pm25>60] <- NA
temp[temp < -99 | temp > 60] <- NA
rhum[rhum < 1 | rhum > 99] <- NA


t1 <- min(time) # 2023-05-23 16:00:00 UTC
t2 <- max(time) # present

#########################################################

message("#########################################")
message("reading periods.xlsx ...")
library("readxl")
url <- "https://uniaugsburg-my.sharepoint.com/:x:/g/personal/andreas_philipp_uni-a_de/ETgvm1h53WlHowKks387oqgBRptiupQzXp4Pbu2e1rY-4A?e=7GxXEg&download=1"
x <- read_excel("periods.xlsx",na="NA")
#str(x)
x$SensorID <- as.character(x$SensorID)
x$t1excl1 <- as.character(x$t1excl1)
x$t2excl1 <- as.character(x$t2excl1)
x$t1excl2 <- as.character(x$t1excl2)
x$t2excl2 <- as.character(x$t2excl2)
x$t1excl3 <- as.character(x$t1excl3)
x$t2excl3 <- as.character(x$t2excl3)
#str(x)

message("reading period dates ...")
#seq_along(x$SensorID)
for( i in seq_along(x$SensorID) ) {
 # message("i = ",i)
 # message("id = ",x$SensorID[i]) 
 sid <- paste0("id",x$SensorID[i])
 if( sid == id ){
  message("checking ", sid )
  t1train_th <- as.POSIXct(x$t1train_th[i])
  t2train_th <- as.POSIXct(x$t2train_th[i])
  t1train_pm <- as.POSIXct(x$t1train_pm[i])
  t2train_pm <- as.POSIXct(x$t2train_pm[i])
  t1exp <- as.POSIXct(x$t1exp[i])
  t2exp <- as.POSIXct(x$t2exp[i])

  if( ! is.na(x$t1excl1[i]) & ! is.na(x$t2excl1[i]) ){
    t1exclude <- as.POSIXct(x$t1ecl1[i])
    t2exclude <- as.POSIXct(x$t2ecl1[i])
    temp [ time >= t1exclude & time <= t2exclude ] <- NA
    rhum [ time >= t1exclude & time <= t2exclude ] <- NA
    pm10 [ time >= t1exclude & time <= t2exclude ] <- NA
    pm25 [ time >= t1exclude & time <= t2exclude ] <- NA
  }

  if( ! is.na(x$t1excl2[i]) & ! is.na(x$t2excl2[i]) ){
    t1exclude <- as.POSIXct(x$t1ecl2[i])
    t2exclude <- as.POSIXct(x$t2ecl2[i])
    temp [ time >= t1exclude & time <= t2exclude ] <- NA
    rhum [ time >= t1exclude & time <= t2exclude ] <- NA
    pm10 [ time >= t1exclude & time <= t2exclude ] <- NA
    pm25 [ time >= t1exclude & time <= t2exclude ] <- NA
  }

  if( ! is.na(x$t1excl3[i]) & ! is.na(x$t2excl3[i]) ){
    t1exclude <- as.POSIXct(x$t1ecl3[i])
    t2exclude <- as.POSIXct(x$t2ecl3[i])
    temp [ time >= t1exclude & time <= t2exclude ] <- NA
    rhum [ time >= t1exclude & time <= t2exclude ] <- NA
    pm10 [ time >= t1exclude & time <= t2exclude ] <- NA
    pm25 [ time >= t1exclude & time <= t2exclude ] <- NA
  }
} # if sid
} # for i

message("t1train_th = ",t1train_th)
message("t2train_th = ",t2train_th)
message("t1train_pm = ",t1train_pm)
message("t2train_pm = ",t2train_pm)
message("t1exp      = ",t1exp)
message("t2exp      = ",t2exp)

## <<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
## DEFINE TRAINING PERIOD FOR TEMP & RHUM:
#t1train_th <- as.POSIXct("2023-05-23 16:00:00 UTC")
#t2train_th <- as.POSIXct("2023-06-26 23:59:59 UTC")
## DEFINE TRAINING PERIOD FOR PM10 & PM25:
#t1train_pm <- as.POSIXct("2023-05-23 16:00:00 UTC")
#t2train_pm <- as.POSIXct("2023-06-26 23:59:59 UTC")
## DEFINE WHICH PERIOD SHOULD BE RECALIBRATED (OBSERVATION PERIOD OF FIELD EXPERIMENT)
#t1exp <- as.POSIXct("2023-06-27 00:00:00 UTC")
#t2exp <- as.POSIXct("2023-07-03 06:00:00 UTC")
## <<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<

#### OPTIONAL: exclude periods for calibration (repeat for more than one exclusion period):
#t1exclude <- as.POSIXct("2023-05-23 16:00:00 UTC")
#t2exclude <- as.POSIXct("2023-05-23 17:00:00 UTC")
# <<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
#temp [ time >= t1exclude & time <= t2exclude ] <- NA
#rhum [ time >= t1exclude & time <= t2exclude ] <- NA
#pm10 [ time >= t1exclude & time <= t2exclude ] <- NA
#pm25 [ time >= t1exclude & time <= t2exclude ] <- NA
# <<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<



# SELECT REFERENCE DATA
#tempref <-asopcaggtemp$id177410813
#tempreftime <-  as.POSIXlt(asopcaggtemp$time + 2*60*60,tz="UTC")
#rhumref <- asopcaggrhum$id177410813
#rhumreftime <-  as.POSIXlt(asopcaggrhum$time + 2*60*60,tz="UTC")

tempref <- edmaggtemp$idOPC004
tempreftime <- as.POSIXlt(edmaggtemp$time,tz="UTC")
rhumref <- edmaggrhum$idOPC004
rhumreftime <- as.POSIXlt(edmaggrhum$time,tz="UTC")


pm10ref <- edmaggpm10$idOPC004
pm10reftime <- as.POSIXlt(edmaggpm10$time,tz="UTC")
pm25ref <- edmaggpm25$idOPC004
pm25reftime <- as.POSIXlt(edmaggpm25$time,tz="UTC")

# FILTER
pm10ref[pm10ref<1|pm10ref>85] <- NA
pm25ref[pm25ref<1|pm25ref>60] <- NA
tempref[tempref < -99 | tempref > 60] <- NA
rhumref[rhumref < 1 | rhumref > 99] <- NA


# PREPARE PLOTS
#tmin <- min(time)
#tmax <- max(time)
tmin <- t1train_pm
tmax <- max(time)

message("tmin =",tmin)
message("tmax =",tmax)
tstart <- min(seq(tmin, tmax, by = "hour"))
tstop <- max(seq(tmin, tmax, by = "hour"))

col1 <- "lightblue"
col2 <- "lavender"


pm10max <- max(c(pm10,pm10ref),na.rm=TRUE)
pm10min <- 0
pm25max <- max(c(pm25,pm25ref),na.rm=TRUE) 
pm25min <- 0


# PLOT PM10
message("plotting raw time series for pm10 ...")
png(paste0(id,"_pm10_timeser.png"),width=1900, height=600, units="px", res=300, pointsize=3)
plot(time,pm10,xlab=paste("time UTC"),ylab="pm10 (µg/m³)",
xlim=c(as.POSIXct(tstart),as.POSIXct(tstop)),ylim=c(0,pm10max),type="n",xaxs="i",yaxs="i",xaxt="n",
main=paste("raw data PM10 oklab 5min-mean (",tmin," - ",tmax," UTC)" ))

rect(xleft=t1train_pm,ybottom=0,xright=t2train_pm,ytop=pm10max,col=col1)
rect(xleft=t1exp,ybottom=0,xright=t2exp,ytop=pm10max,col=col2)

ihours <- round( seq( as.POSIXct(tstart), as.POSIXct(tstop),by=3600*24) , units="hours")
axis.POSIXct(1, x=time, at = ihours, format = "%a %m-%d %H:%M")
abline(v= as.POSIXct(ihours) ,lwd=0.2,col="gray")
abline(h=seq(0,pm10max,by=10),lwd=0.2,col="gray")
lines(pm10reftime,pm10ref,col="black",t="l",lwd=1.5)
lines(time,pm10,xlim=c(as.POSIXct(tstart),as.POSIXct(tstop)),col="red",t="l",lwd=0.3,lty=1)
legend("topright",c("OPC004"),col=c("black"),lty=1,lwd=0.5)
legend("topleft",id,col="red",lty=1,lwd=0.3)
dev.off()

# PLOT PM25
message("plotting raw time series for pm25 ...")
png(paste0(id,"_pm25_timeser.png"),width=1900, height=600, units="px", res=300, pointsize=3)
plot(time,pm25,xlab=paste("time UTC"),ylab="pm25 (µg/m³)",
xlim=c(as.POSIXct(tstart),as.POSIXct(tstop)),ylim=c(0,pm25max),type="n",xaxs="i",yaxs="i",xaxt="n",
main=paste("raw data PM25 oklab 5min-mean (",tmin," - ",tmax," UTC)" ))

rect(xleft=t1train_pm,ybottom=0,xright=t2train_pm,ytop=pm25max,col=col1)
rect(xleft=t1exp,ybottom=0,xright=t2exp,ytop=pm25max,col=col2)

ihours <- round( seq( as.POSIXct(tstart), as.POSIXct(tstop),by=3600*24) , units="hours")
axis.POSIXct(1, x=time, at = ihours, format = "%a %m-%d %H:%M")
abline(v= as.POSIXct(ihours) ,lwd=0.2,col="gray")
abline(h=seq(0,pm25max,by=10),lwd=0.2,col="gray")
lines(pm25reftime,pm25ref,col="black",t="l",lwd=1.5)
lines(time,pm25,xlim=c(as.POSIXct(tstart),as.POSIXct(tstop)),col="red",t="l",lwd=0.3,lty=1)
legend("topright",c("OPC004"),col=c("black"),lty=1,lwd=0.5)
legend("topleft",id,col="red",lty=1,lwd=0.3)
dev.off()


tmin <- t1train_th
tmax <- max(time)

# PLOT TEMP
message("plotting raw time series for temp ...")
png(paste0(id,"_temp_timeser.png"),width=1900, height=600, units="px", res=300, pointsize=3)
tempmin <- min(c(temp,tempref),na.rm=TRUE)
tempmax <- max(c(temp,tempref),na.rm=TRUE)
plot(time,temp,xlab=paste("time UTC"),ylab="Temperature (°C)",
xlim=c(as.POSIXct(tstart),as.POSIXct(tstop)),ylim=c(tempmin,tempmax),type="n",xaxs="i",yaxs="i",xaxt="n",
main=paste("raw data Temp oklab 5min-mean (",tmin," - ",tmax," UTC)" ))

rect(xleft=t1train_th,ybottom=tempmin,xright=t2train_th,ytop=tempmax,col=col1)
rect(xleft=t1exp,ybottom=tempmin,xright=t2exp,ytop=tempmax,col=col2)

ihours <- round( seq( as.POSIXct(tstart), as.POSIXct(tstop),by=3600*24) , units="hours")
axis.POSIXct(1, x=time, at = ihours, format = "%a %m-%d %H:%M")
abline(v= as.POSIXct(ihours) ,lwd=0.2,col="gray")
abline(h=seq(tempmin,tempmax,by=5),lwd=0.2,col="gray")
lines(tempreftime,tempref,col="black",t="l",lwd=1.5)
lines(time,temp,xlim=c(as.POSIXct(tstart),as.POSIXct(tstop)),col="red",t="l",lwd=0.3,lty=1)
legend("topright",c("SHT85"),col=c("black"),lty=1,lwd=0.5)
legend("topleft",id,col="red",lty=1,lwd=0.3)
dev.off()

# PLOT RHUM
message("plotting raw time series for rhum ...")
png(paste0(id,"_rhum_timeser.png"),width=1900, height=600, units="px", res=300, pointsize=3)
rhummin <- min(c(rhum,rhumref),na.rm=TRUE)
rhummax <- max(c(rhum,rhumref),na.rm=TRUE)
plot(time,rhum,xlab=paste("time UTC"),ylab="Relative Humidity (%)",
xlim=c(as.POSIXct(tstart),as.POSIXct(tstop)),ylim=c(rhummin,rhummax),type="n",xaxs="i",yaxs="i",xaxt="n",
main=paste("raw data Rhum oklab 5min-mean (",tmin," - ",tmax," UTC)" ))

rect(xleft=t1train_th,ybottom=rhummin,xright=t2train_th,ytop=rhummax,col=col1)
rect(xleft=t1exp,ybottom=rhummin,xright=t2exp,ytop=rhummax,col=col2)

ihours <- round( seq( as.POSIXct(tstart), as.POSIXct(tstop),by=3600*24) , units="hours")
axis.POSIXct(1, x=time, at = ihours, format = "%a %m-%d %H:%M")
abline(v= as.POSIXct(ihours) ,lwd=0.2,col="gray")
abline(h=seq(rhummin,rhummax,by=5),lwd=0.2,col="gray")
lines(rhumreftime,rhumref,col="black",t="l",lwd=1.5)
lines(time,rhum,xlim=c(as.POSIXct(tstart),as.POSIXct(tstop)),col="red",t="l",lwd=0.3,lty=1)
legend("topright",c("SHT85"),col=c("black"),lty=1,lwd=0.5)
legend("topleft",id,col="red",lty=1,lwd=0.3)
dev.off()

# CREATE MERGED DATA FRAME "okm" OF SAME LENGTH (including NAs)
ok <- data.frame(time=time,pm10=pm10,pm25=pm25,temp=temp,rhum=rhum)
as <- data.frame(time=tempreftime,tempref=tempref,rhumref=rhumref)
edm <- data.frame(time=pm10reftime,pm10ref=pm10ref,pm25ref=pm25ref)
okm <- merge(ok,edm,by="time",all.x=TRUE)
okm <- merge(okm,as,by="time",all.x=TRUE)


# CREATE RAW DATA SCATTER PLOTS
 # ACCURACY METRICS & SCATTER PLOT PM25
 mae <- mean(abs(okm$pm10-okm$pm10ref),na.rm=T)
 bias <- mean(okm$pm10-okm$pm10ref,na.rm=T)
 rmse <- sqrt(sum( (okm$pm10-okm$pm10ref)**2,na.rm=T)/length(okm$pm10[!is.na(okm$pm10)]))
 r2 <- cor(okm$pm10,okm$pm10ref,method="spearman")
message("plotting raw scatter plot for pm10 ...")
png(paste0(id,"_pm10_scatter_raw.png"),width=800, height=800, units="px", res=300, pointsize=3)
plot(okm$pm10ref,okm$pm10,xlim=c(0,pm10max),ylim=c(0,pm10max),ylab="PM10 SDS011 (µg/m³)",xlab="PM10 EDM164 (µg/m³)",main=paste("PM10 raw data ",id," versus reference"))
abline(0,1)
dev.off()

message("plotting raw scatter plot for pm25 ...")
png(paste0(id,"_pm25_scatter_raw.png"),width=800, height=800, units="px", res=300, pointsize=3)
plot(okm$pm25ref,okm$pm25,xlim=c(0,pm25max),ylim=c(0,pm25max),ylab="PM25 SDS011 (µg/m³)",xlab="PM25 EDM164 (µg/m³)",main=paste("PM25 raw data ",id," versus reference"))
abline(0,1)
dev.off()


message("plotting raw scatter plot for temp ...")
png(paste0(id,"_temp_scatter_raw.png"),width=800, height=800, units="px", res=300, pointsize=3)
plot(okm$tempref,okm$temp,xlim=c(tempmin,tempmax),ylim=c(tempmin,tempmax),ylab="TEMP BME128 (°C)",xlab="TEMP SHT85 (°C)",main=paste("Temp raw data ",id," versus reference"))
abline(0,1)
dev.off()

message("plotting raw scatter plot for rhum ...")
png(paste0(id,"_rhum_scatter_raw.png"),width=800, height=800, units="px", res=300, pointsize=3)
plot(okm$rhumref,okm$rhum,xlim=c(rhummin,rhummax),ylim=c(rhummin,rhummax),ylab="RHUM BME128 (°C)",xlab="RHUM SHT85 (°C)",main=paste("Rhum raw data ",id," versus reference"))
abline(0,1)
dev.off()


#######################################################################################
# BIAS CORRECTION (ONLY TESTWISE FOR TEMP, IS REPLACED BY ANN BELOW)
# REMOVE NAs
#temp <- okm$temp[ ! is.na(okm$temp) & ! is.na(okm$tempref) ]
#rhum <- okm$rhum[ ! is.na(okm$rhum) & ! is.na(okm$rhumref) ]
#pm10 <- okm$pm10[ ! is.na(okm$pm10) & ! is.na(okm$pm10ref) ]
#pm25 <- okm$pm25[ ! is.na(okm$pm25) & ! is.na(okm$pm25ref) ]
#tempref <- okm$tempref[ ! is.na(okm$temp) & ! is.na(okm$tempref) ]
#rhumref <- okm$rhumref[ ! is.na(okm$rhum) & ! is.na(okm$rhumref) ]
#pm10ref <- okm$pm10ref[ ! is.na(okm$pm10) & ! is.na(okm$pm10ref) ]
#pm25ref <- okm$pm25ref[ ! is.na(okm$pm25) & ! is.na(okm$pm25ref) ]

## BIAS 
#tempbias <- mean(temp) - mean(tempref)
#tempcorbias <- temp - tempbias
#message("plotting scatter plot for temp ...")
#png(paste0(id,"_tempcor_scatter.png"),width=800, height=800, units="px", res=300, pointsize=3)
#plot(tempcorbias,tempref,xlim=c(tempmin,tempmax),ylim=c(tempmin,tempmax),xlab="TEMP BME128 (°C)",ylab="TEMP SHT85 (°C)",main=paste("OKlab sensor ",id," versus reference"))
#abline(0,1)
#dev.off()


#######################################################################################
geccolibrary <- require("gecco")

if ( ! geccolibrary ){
message("library gecco not available! Get it from:")
message("https://saqn.geo.uni-augsburg.de/download/gecco_0.3.tar.gz")
message("and install it by:")
message("R CMD INSTALL gecco_0.3.tar.gz")
}

if ( geccolibrary ){

message("")
message("##########################################################################")
message("##########################################################################")
message("running ann for temp and rhum ...")

# !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
# Train Neural Network for fitting both, oklab-temp (input1) and oklab-rhum (input2) at the same time to reference measurements (SHT85, output1 and output2)
# Note: all values have to be scaled to range between 0 and 1, results are therefore also scaled and have to be rescaled to get accurate units!


# PREPARE DATA FOR TEMP AND RHUM FITTING
n <- 1
i <- 1
scalerange_th <- data.frame(mintempraw=rep(as.double(NA),n),maxtempraw=rep(as.double(NA),n),scaletempraw=rep(as.double(NA),n),
			 mintempref=rep(as.double(NA),n),maxtempref=rep(as.double(NA),n),scaletempref=rep(as.double(NA),n),
                         minrhumraw=rep(as.double(NA),n),maxrhumraw=rep(as.double(NA),n),scalerhumraw=rep(as.double(NA),n),
                         minrhumref=rep(as.double(NA),n),maxrhumref=rep(as.double(NA),n),scalerhumref=rep(as.double(NA),n))


timeann_th <- okm$time[ ! is.na(okm$temp) & ! is.na(okm$tempref) & ! is.na(okm$rhum) & ! is.na(okm$rhumref)]

 # define index for separation into 1:training, 2:evaluation and 3:testing data subset
 message("index for separation into: 1=training, 2=stopping and 3=testing data subset ...")

 index_th <- rep(0,length(timeann_th))
 index_th [ timeann_th >= t1train_th & timeann_th <= t2train_th ] <- 1
 len <- length(timeann_th[index_th == 1] )
 begin <- match(1,index_th)
 message("length of ann input data = ",len,"  starting at index ",begin)
 index_th[ (begin+floor(len*0.6)) : (begin+floor(len*0.8)) ] <- 2
 index_th[ (begin+floor(len*0.8)) : (begin+(len-1)) ] <- 3
 index_th [ timeann_th >= t1exp & timeann_th <= t2exp ] <- 4

 message("index_th == 1: ",length(index_th[index_th == 1]))
 message("index_th == 2: ",length(index_th[index_th == 2]))
 message("index_th == 3: ",length(index_th[index_th == 3]))
 message("index_th == 4: ",length(index_th[index_th == 4]))


temp <- okm$temp[ ! is.na(okm$temp) & ! is.na(okm$tempref) & ! is.na(okm$rhum) & ! is.na(okm$rhumref)]
 input1 <- temp
 scalerange_th$mintempraw[i] <- min(input1)
 scalerange_th$maxtempraw[i] <- max(input1)
 scalerange_th$scaletempraw[i] <- max(input1-min(input1))
 input1s <- (input1-min(input1)) / max(input1-min(input1)) # scale to 0 - 1

rhum <- okm$rhum[ ! is.na(okm$temp) & ! is.na(okm$tempref) & ! is.na(okm$rhum) & ! is.na(okm$rhumref)]
 input2 <- rhum
 scalerange_th$minrhumraw[i] <- min(input2)
 scalerange_th$maxrhumraw[i] <- max(input2)
 scalerange_th$scalerhumraw[i] <- max(input2-min(input2))
 input2s <- (input2-min(input2)) / max(input2-min(input2)) # scale to 0 - 1

tempref <- okm$tempref[ ! is.na(okm$temp) & ! is.na(okm$tempref) & ! is.na(okm$rhum) & ! is.na(okm$rhumref)]
 output1 <- tempref
 scalerange_th$mintempref[i] <- min(output1)
 scalerange_th$maxtempref[i] <- max(output1)
 scalerange_th$scaletempref[i] <- max(output1-min(output1))
 output1s <- (output1-min(output1)) / max(output1-min(output1)) # scale to 0 - 1

rhumref <- okm$rhumref[ ! is.na(okm$temp) & ! is.na(okm$tempref) & ! is.na(okm$rhum) & ! is.na(okm$rhumref)]
 output2 <- rhumref
 scalerange_th$minrhumref[i] <- min(output2)
 scalerange_th$maxrhumref[i] <- max(output2)
 scalerange_th$scalerhumref[i] <- max(output2-min(output2))
 output2s <- (output2-min(output2)) / max(output2-min(output2)) # scale to 0 - 1

 # CONVERT TO MATRIX
 intab <- data.frame(input1=input1s,input2=input2s)
 inmat <- as.matrix(intab)
 outtab <- data.frame(output1=output1s,output2=output2s)
 outmat <- as.matrix(outtab)


 # INIT AND TRAIN NEURAL NETWORK
 hl <- c(2,3,3,2)
 anni_th <- initANN(x=inmat,y=outmat,hl=hl,bias=1,transferFun="logsig")
 ann_th <- train(object=anni_th, x=inmat, y=outmat, spIndex=index_th, shuffleDiv=FALSE)
 print("training finished for temp/rhum!")
 summary(ann_th)

 # APPLY MODEL
 prediction <- predict(ann_th,newdata=inmat)
 pre <- as.data.frame(prediction)

 tempmod <- pre$V1 * scalerange_th$scaletempref[i] + scalerange_th$mintempref[i]
 temprecal <- tempmod[ index_th == 4 ]
 tempmod <- tempmod[ index_th == 3 ] # select only last 20% = testing data subset
 tempref <- tempref[ index_th == 3 ] # select only last 20% = testing data subset

 rhummod <- pre$V2 * scalerange_th$scalerhumref[i] + scalerange_th$minrhumref[i]
 rhumrecal <- rhummod[ index_th == 4 ]
 rhummod <- rhummod[ index_th == 3 ] # select only last 20% = testing data subset
 rhumref <- rhumref[ index_th == 3 ] # select only last 20% = testing data subset
 
 timerecal_th <- timeann_th[ index_th == 4 ]

 # ACCURACY METRICS & SCATTER PLOT TEMP
 mae <- mean(abs(tempmod-tempref),na.rm=T)
 bias <- mean(tempmod-tempref,na.rm=T)
 rmse <- sqrt(sum( (tempmod-tempref)**2,na.rm=T)/length(tempmod))
 r2 <- cor(tempmod,tempref,method="spearman")
 message(paste0("temp: mae =",mae," bias =",bias," rmse =",rmse))
 message("plotting recalibrated temp scatter plot: ",paste0(id,"_temp_scatter_ann.png"))
 png(paste0(id,"_temp_scatter_ann.png"),width=800, height=800, units="px", res=300, pointsize=3)
 plot(tempref,tempmod,xlim=c(tempmin,tempmax),ylim=c(tempmin,tempmax),ylab="TEMP BME238 (°C)",xlab="TEMP SHT85 (°C)",
      main=paste0("Temp fitted data ",id," mae=",format(round(mae,3),nsmall=3)," bias=",format(round(bias,3),nsmall=3)," rmse=",format(round(rmse,3),nsmall=3)," r²=",format(round(r2,3),nsmall=3)))
 abline(0,1)
 dev.off()

 # ACCURACY METRICS & SCATTER PLOT RHUM
 mae <- mean(abs(rhummod-rhumref),na.rm=T)
 bias <- mean(rhummod-rhumref,na.rm=T)
 rmse <- sqrt(sum( (rhummod-rhumref)**2,na.rm=T)/length(rhummod))
 r2 <- cor(rhummod,rhumref,method="spearman")
 message(paste0("rhum: mae =",mae," bias =",bias," rmse =",rmse))
 message("plotting recalibrated rhum scatter plot: ",paste0(id,"_rhum_scatter_ann.png"))
 png(paste0(id,"_rhum_scatter_ann.png"),width=800, height=800, units="px", res=300, pointsize=3)
 plot(rhumref,rhummod,xlim=c(rhummin,rhummax),ylim=c(rhummin,rhummax),ylab="RHUM BME238 (%)",xlab="RHUM SHT85 (%)",
      main=paste0("Rhum fitted data ",id," mae=",format(round(mae,3),nsmall=3)," bias=",format(round(bias,3),nsmall=3)," rmse=",format(round(rmse,3),nsmall=3)," r²=",format(round(r2,3),nsmall=3)))
 abline(0,1)
 dev.off()


# save model
message("")
message("saving trained ann model to ",paste0(id,"_th_recalibration_ann.Rdata"," ..."))
save(scalerange_th,ann_th,file=paste0(id,"_th_recalibration_ann.Rdata"))

# save timeann and fitted values ... 
message("")
message("saving recalibrated th data to ",paste0(id,"_th_recalibrated.Rdata"," ..."))
save(timerecal_th,temprecal,rhumrecal ,file=paste0(id,"_th_recalibrated.Rdata"))

message("")
message("plotting recalibrated th ...")

# PREPARE PLOTS FOR RECALIBRATED DATA ONLY
tmin <- min(timerecal_th)
tmax <- max(timerecal_th)
message("tmin th =",tmin)
message("tmax th =",tmax)

 if( length(timerecal_th) > 0 ){

tstart <- min(seq(tmin, tmax, by = "hour"))
tstop <- max(seq(tmin, tmax, by = "hour"))


# PLOT TEMP
message("plotting recalibrated time series for temp: ",paste0(id,"_temp_recalser.png"))
png(paste0(id,"_temp_recalser.png"),width=1900, height=600, units="px", res=300, pointsize=3)
tempmin <- min(c(temprecal,tempref),na.rm=TRUE)
tempmax <- max(c(temprecal,tempref),na.rm=TRUE)
plot(timerecal_th,temprecal,xlab=paste("time UTC"),ylab="Temperature (°C)",
xlim=c(as.POSIXct(tstart),as.POSIXct(tstop)),ylim=c(tempmin,tempmax),type="n",xaxs="i",yaxs="i",xaxt="n",
main=paste("recalibrated Temp oklab 5min-mean (",tmin," - ",tmax," UTC)" ))
ihours <- round( seq( as.POSIXct(tstart), as.POSIXct(tstop),by=3600) , units="hours")
axis.POSIXct(1, x=time, at = ihours, format = "%a %m-%d %H:%M")
abline(v= as.POSIXct(ihours) ,lwd=0.2,col="gray")
abline(h=seq(tempmin,tempmax,by=5),lwd=0.2,col="gray")
#lines(tempreftime,tempref,col="black",t="l",lwd=1.5)
lines(timerecal_th,temprecal,xlim=c(as.POSIXct(tstart),as.POSIXct(tstop)),col="red",t="l",lwd=0.3,lty=1)
legend("topright",c("SHT85"),col=c("black"),lty=1,lwd=0.5)
legend("topleft",id,col="red",lty=1,lwd=0.3)
dev.off()

# PLOT RHUM
message("plotting recalibrated time series for rhum: ",paste0(id,"_rhum_recalser.png"))
png(paste0(id,"_rhum_recalser.png"),width=1900, height=600, units="px", res=300, pointsize=3)
rhummin <- min(c(rhum,rhumref),na.rm=TRUE)
rhummax <- max(c(rhum,rhumref),na.rm=TRUE)
plot(timerecal_th,rhumrecal,xlab=paste("time UTC"),ylab="Relative Humidity (%)",
xlim=c(as.POSIXct(tstart),as.POSIXct(tstop)),ylim=c(rhummin,rhummax),type="n",xaxs="i",yaxs="i",xaxt="n",
main=paste("recalibrated Rhum oklab 5min-mean (",tmin," - ",tmax," UTC)" ))
ihours <- round( seq( as.POSIXct(tstart), as.POSIXct(tstop),by=3600) , units="hours")
axis.POSIXct(1, x=time, at = ihours, format = "%a %m-%d %H:%M")
abline(v= as.POSIXct(ihours) ,lwd=0.2,col="gray")
abline(h=seq(rhummin,rhummax,by=5),lwd=0.2,col="gray")
#lines(rhumreftime,rhumref,col="black",t="l",lwd=1.5)
lines(timerecal_th,rhumrecal,xlim=c(as.POSIXct(tstart),as.POSIXct(tstop)),col="red",t="l",lwd=0.3,lty=1)
legend("topright",c("SHT85"),col=c("black"),lty=1,lwd=0.5)
legend("topleft",id,col="red",lty=1,lwd=0.3)
dev.off()

}

# !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
# Train Neural Network for fitting both, oklab-pm10 (input3) and oklab-pm25 (input4) at the same time to reference measurements (EDM164, output1 and output2)
# ADDITIONAL INPUT IS RHUM AND TEMP IN ORDER TO REDUCE THE HUMIDITY ERROR!
# Note: all values have to be scaled to range between 0 and 1, results are therefore also scaled and have to be rescaled to get accurate units!
# PREPARE DATA FOR PM10 AND PM25 FITTING

message("")
message("##########################################################################")
message("##########################################################################")
message("running ann for pm10 and pm25 ...")

scalerange_pm <- data.frame(mintempraw=rep(as.double(NA),n),maxtempraw=rep(as.double(NA),n),scaletempraw=rep(as.double(NA),n),
			 mintempref=rep(as.double(NA),n),maxtempref=rep(as.double(NA),n),scaletempref=rep(as.double(NA),n),
                         minrhumraw=rep(as.double(NA),n),maxrhumraw=rep(as.double(NA),n),scalerhumraw=rep(as.double(NA),n),
                         minrhumref=rep(as.double(NA),n),maxrhumref=rep(as.double(NA),n),scalerhumref=rep(as.double(NA),n),
                         minpm25raw=rep(as.double(NA),n),maxpm25raw=rep(as.double(NA),n),scalepm25raw=rep(as.double(NA),n),
                         minpm25ref=rep(as.double(NA),n),maxpm25ref=rep(as.double(NA),n),scalepm25ref=rep(as.double(NA),n),
                         minpm10raw=rep(as.double(NA),n),maxpm10raw=rep(as.double(NA),n),scalepm10raw=rep(as.double(NA),n),
                         minpm10ref=rep(as.double(NA),n),maxpm10ref=rep(as.double(NA),n),scalepm10ref=rep(as.double(NA),n))

timeann_pm <- okm$time [ ! is.na(okm$temp) & ! is.na(okm$tempref) & ! is.na(okm$rhum) & ! is.na(okm$rhumref) & ! is.na(okm$pm10) & ! is.na(okm$pm10ref) & ! is.na(okm$pm25) & ! is.na(okm$pm25ref) ]

 message("index for separation into: 1=training, 2=stopping and 3=testing data subset ...")
 # you can modify the vector "index" to select which data should be used for training and testing
 #index_pm <- rep(0,length(timeann_pm))
 #index_pm [ timeann_pm >= t1train_pm & timeann_pm <= t2train_pm ] <- 1
 #index_pm [ timeann_pm >= t1exp & timeann_pm <= t2exp ] <- 4
 #len <- length(timeann_th[index_pm == 1])
 #begin <- match(1,index_th)
 #message("length of ann input data =",len,"  staring at index ",begin)
 #index_pm[begin+floor(len*0.6) : begin+floor(len*0.8)] <- 2
 #index_pm[begin+floor(len*0.8):len] <- 3


 index_pm <- rep(0,length(timeann_pm))
 index_pm [ timeann_pm >= t1train_pm & timeann_pm <= t2train_pm ] <- 1
 len <- length(timeann_pm[index_pm == 1] )
 begin <- match(1,index_pm)
 message("length of ann input data = ",len,"  starting at index ",begin)
 index_pm[ (begin+floor(len*0.6)) : (begin+floor(len*0.8)) ] <- 2
 index_pm[ (begin+floor(len*0.8)) : (begin+(len-1)) ] <- 3
 index_pm [ timeann_pm >= t1exp & timeann_pm <= t2exp ] <- 4


 message("index_pm == 1: ",length(index_pm[index_pm == 1]))
 message("index_pm == 2: ",length(index_pm[index_pm == 2]))
 message("index_pm == 3: ",length(index_pm[index_pm == 3]))
 message("index_pm == 4: ",length(index_pm[index_pm == 4]))


#temp <- okm$temp[ ! is.na(okm$temp) & ! is.na(okm$tempref) & ! is.na(okm$rhum) & ! is.na(okm$rhumref) & ! is.na(okm$pm10) & ! is.na(okm$pm10ref) & ! is.na(okm$pm25) & ! is.na(okm$pm25ref)]
# input1 <- temp
# scalerange_pm$mintempraw[i] <- min(input1)
# scalerange_pm$maxtempraw[i] <- max(input1)
# scalerange_pm$scaletempraw[i] <- max(input1-min(input1))
# input1s <- (input1-min(input1)) / max(input1-min(input1)) # scale to 0 - 1

temp <- okm$tempref[ ! is.na(okm$temp) & ! is.na(okm$tempref) & ! is.na(okm$rhum) & ! is.na(okm$rhumref) & ! is.na(okm$pm10) & ! is.na(okm$pm10ref) & ! is.na(okm$pm25) & ! is.na(okm$pm25ref)]
 input1 <- temp
 scalerange_pm$mintempraw[i] <- min(input1)
 scalerange_pm$maxtempraw[i] <- max(input1)
 scalerange_pm$scaletempraw[i] <- max(input1-min(input1))
 input1s <- (input1-min(input1)) / max(input1-min(input1)) # scale to 0 - 1

#rhum <- okm$rhum[ ! is.na(okm$temp) & ! is.na(okm$tempref) & ! is.na(okm$rhum) & ! is.na(okm$rhumref) & ! is.na(okm$pm10) & ! is.na(okm$pm10ref) & ! is.na(okm$pm25) & ! is.na(okm$pm25ref)]
# input2 <- rhum
# scalerange_pm$minrhumraw[i] <- min(input2)
# scalerange_pm$maxrhumraw[i] <- max(input2)
# scalerange_pm$scalerhumraw[i] <- max(input2-min(input2))
# input2s <- (input2-min(input2)) / max(input2-min(input2)) # scale to 0 - 1

rhum <- okm$rhumref[ ! is.na(okm$temp) & ! is.na(okm$tempref) & ! is.na(okm$rhum) & ! is.na(okm$rhumref) & ! is.na(okm$pm10) & ! is.na(okm$pm10ref) & ! is.na(okm$pm25) & ! is.na(okm$pm25ref)]
 input2 <- rhum
 scalerange_pm$minrhumraw[i] <- min(input2)
 scalerange_pm$maxrhumraw[i] <- max(input2)
 scalerange_pm$scalerhumraw[i] <- max(input2-min(input2))
 input2s <- (input2-min(input2)) / max(input2-min(input2)) # scale to 0 - 1

pm10 <- okm$pm10[ ! is.na(okm$temp) & ! is.na(okm$tempref) & ! is.na(okm$rhum) & ! is.na(okm$rhumref) & ! is.na(okm$pm10) & ! is.na(okm$pm10ref) & ! is.na(okm$pm25) & ! is.na(okm$pm25ref)]
 input3 <- pm10
 scalerange_pm$minpm10raw[i] <- min(input3)
 scalerange_pm$maxpm10raw[i] <- max(input3)
 scalerange_pm$scalepm10raw[i] <- max(input3-min(input3))
 input3s <- (input3-min(input3)) / max(input3-min(input3)) # scale to 0 - 1

pm25 <- okm$pm25[ ! is.na(okm$temp) & ! is.na(okm$tempref) & ! is.na(okm$rhum) & ! is.na(okm$rhumref) & ! is.na(okm$pm10) & ! is.na(okm$pm10ref) & ! is.na(okm$pm25) & ! is.na(okm$pm25ref)]
 input4 <- pm25
 scalerange_pm$minpm25raw[i] <- min(input4)
 scalerange_pm$maxpm25raw[i] <- max(input4)
 scalerange_pm$scalepm25raw[i] <- max(input4-min(input4))
 input4s <- (input4-min(input4)) / max(input4-min(input4)) # scale to 0 - 1


pm10ref <- okm$pm10ref[ ! is.na(okm$temp) & ! is.na(okm$tempref) & ! is.na(okm$rhum) & ! is.na(okm$rhumref) & ! is.na(okm$pm10) & ! is.na(okm$pm10ref) & ! is.na(okm$pm25) & ! is.na(okm$pm25ref)]
 output1 <- pm10ref
 scalerange_pm$minpm10ref[i] <- min(output1)
 scalerange_pm$maxpm10ref[i] <- max(output1)
 scalerange_pm$scalepm10ref[i] <- max(output1-min(output1))
 output1s <- (output1-min(output1)) / max(output1-min(output1)) # scale to 0 - 1

pm25ref <- okm$pm25ref[ ! is.na(okm$temp) & ! is.na(okm$tempref) & ! is.na(okm$rhum) & ! is.na(okm$rhumref) & ! is.na(okm$pm10) & ! is.na(okm$pm10ref) & ! is.na(okm$pm25) & ! is.na(okm$pm25ref)]
 output2 <- pm25ref
 scalerange_pm$minpm25ref[i] <- min(output2)
 scalerange_pm$maxpm25ref[i] <- max(output2)
 scalerange_pm$scalepm25ref[i] <- max(output2-min(output2))
 output2s <- (output2-min(output2)) / max(output2-min(output2)) # scale to 0 - 1

 # CONVERT TO MATRIX
 intab <- data.frame(input1=input1s,input2=input2s,input3=input3s,input4=input4s)
 inmat <- as.matrix(intab)
 outtab <- data.frame(output1=output1s,output2=output2s)
 outmat <- as.matrix(outtab)
 


 # INIT AND TRAIN NEURAL NETWORK
 hl <- c(4,4,3,2)
 anni_pm <- initANN(x=inmat,y=outmat,hl=hl,bias=1,transferFun="logsig")
 ann_pm <- train(object=anni_pm, x=inmat, y=outmat, spIndex=index_pm, shuffleDiv=FALSE)
 print("training finished for pm10/pm25!")
 summary(ann_pm)

 # APPLY MODEL
 prediction <- predict(ann_pm,newdata=inmat)
 pre <- as.data.frame(prediction)
 
 pm10mod <- pre$V1 * scalerange_pm$scalepm10ref[i] + scalerange_pm$minpm10ref[i]
 pm10recal <- pm10mod[ index_pm == 4 ]
 pm10mod <- pm10mod[ index_pm == 3 ] # select only last 20% = testing data subset
 pm10ref <- pm10ref[ index_pm == 3 ] # select only last 20% = testing data subset
 
 pm25mod <- pre$V2 * scalerange_pm$scalepm25ref[i] + scalerange_pm$minpm25ref[i]
 pm25recal <- pm25mod[ index_pm == 4 ]
 pm25mod <- pm25mod[ index_pm == 3 ] # select only last 20% = testing data subset
 pm25ref <- pm25ref[ index_pm == 3 ] # select only last 20% = testing data subset
 
 timerecal_pm <- timeann_pm[ index_pm == 4 ]


 # ACCURACY METRICS & SCATTER PLOT PM10
 mae <- mean(abs(pm10mod-pm10ref),na.rm=T)
 bias <- mean(pm10mod-pm10ref,na.rm=T)
 rmse <- sqrt(sum( (pm10mod-pm10ref)**2,na.rm=T)/length(pm10mod))
 r2 <- cor(pm10mod,pm10ref,method="spearman")
 message("plotting recalibrated pm10 scatter plot: ",paste0(id,"_pm10_scatter_ann.png"))
 png(paste0(id,"_pm10_scatter_ann.png"),width=800, height=800, units="px", res=300, pointsize=3)
 plot(pm10ref,pm10mod,xlim=c(pm10min,pm10max),ylim=c(pm10min,pm10max),ylab="PM10 SDS011 (µg/m³)",xlab="PM10 EDM164 (µg/m³)",
      main=paste0("PM10 fitted data ",id," mae=",format(round(mae,3),nsmall=3)," bias=",format(round(bias,3),nsmall=3)," rmse=",format(round(rmse,3),nsmall=3)," r²=",format(round(r2,3),nsmall=3)))
 abline(0,1)
 dev.off()

 # ACCURACY METRICS & SCATTER PLOT PM25
 mae <- mean(abs(pm25mod-pm25ref),na.rm=T)
 bias <- mean(pm25mod-pm25ref,na.rm=T)
 rmse <- sqrt(sum( (pm25mod-pm25ref)**2,na.rm=T)/length(pm25mod))
 r2 <- cor(pm25mod,pm25ref,method="spearman")
 message("plotting recalibrated pm25 scatter plot: ",paste0(id,"_pm25_scatter_ann.png"))
 png(paste0(id,"_pm25_scatter_ann.png"),width=800, height=800, units="px", res=300, pointsize=3)
 plot(pm10ref,pm10mod,xlim=c(pm25min,pm25max),ylim=c(pm25min,pm25max),ylab="PM10 SDS011 (µg/m³)",xlab="PM10 EDM164 (µg/m³)",
      main=paste0("PM25 fitted data",id," mae=",format(round(mae,3),nsmall=3)," bias=",format(round(bias,3),nsmall=3)," rmse=",format(round(rmse,3),nsmall=3)," r²=",format(round(r2,3),nsmall=3)))
 abline(0,1)
 dev.off()

# save model
message("")
message("saving trained ann model to ",paste0(id,"_pm_recalibration_ann.Rdata"," ..."))
save(scalerange_pm,ann_pm,file=paste0(id,"_pm_recalibration_ann.Rdata"))

# save timeann and fitted values ... 
message("")
message("saving recalibrated pm data to ",paste0(id,"_pm_recalibrated.Rdata"," ..."))
save(timerecal_pm,pm10recal,pm25recal ,file=paste0(id,"_pm_recalibrated.Rdata"))


# PREPARE PLOTS FOR RECALIBRATED DATA ONLY
message("")
message("plotting recalibrated pm ...")

tmin <- min(timerecal_pm)
tmax <- max(timerecal_pm)
message("tmin pm =",tmin)
message("tmax pm =",tmax)


if(length(timerecal_pm)>0){
  tstart <- min(seq(tmin, tmax, by = "hour"))
  tstop <- max(seq(tmin, tmax, by = "hour"))

# PLOT PM10
message("plotting recalibrated time series for recalibrated pm10: ",paste0(id,"_pm10_recalser.png"))
png(paste0(id,"_pm10_recalser.png"),width=1900, height=600, units="px", res=300, pointsize=3)
pm10max <- min( max(c(pm10recal,pm10ref),na.rm=TRUE) , 60 )
plot(timerecal_pm,pm10recal,xlab=paste("time UTC"),ylab="pm10 (µg/m³)",
xlim=c(as.POSIXct(tstart),as.POSIXct(tstop)),ylim=c(0,pm10max),type="n",xaxs="i",yaxs="i",xaxt="n",
main=paste("recalibrated PM10 oklab 5min-mean (",tmin," - ",tmax," UTC)" ))
ihours <- round( seq( as.POSIXct(tstart), as.POSIXct(tstop),by=3600) , units="hours")
axis.POSIXct(1, x=time, at = ihours, format = "%a %m-%d %H:%M")
abline(v= as.POSIXct(ihours) ,lwd=0.2,col="gray")
abline(h=seq(0,pm10max,by=10),lwd=0.2,col="gray")
#lines(pm10reftime,pm10ref,col="black",t="l",lwd=1.5)
lines(timerecal_pm,pm10recal,xlim=c(as.POSIXct(tstart),as.POSIXct(tstop)),col="red",t="l",lwd=0.3,lty=1)
legend("topright",c("OPC004"),col=c("black"),lty=1,lwd=0.5)
legend("topleft",id,col="red",lty=1,lwd=0.3)
dev.off()

# PLOT PM25
message("plotting recalibrated time series for pm25: ",paste0(id,"_pm25_recalser.png"))
png(paste0(id,"_pm25_recalser.png"),width=1900, height=600, units="px", res=300, pointsize=3)
pm25max <- min( max(c(pm25recal,pm25ref),na.rm=TRUE) , 50 )
plot(timerecal_pm,pm25recal,xlab=paste("time UTC"),ylab="pm25 (µg/m³)",
xlim=c(as.POSIXct(tstart),as.POSIXct(tstop)),ylim=c(0,pm25max),type="n",xaxs="i",yaxs="i",xaxt="n",
main=paste("recalibrated PM25 oklab 5min-mean (",tmin," - ",tmax," UTC)" ))
ihours <- round( seq( as.POSIXct(tstart), as.POSIXct(tstop),by=3600) , units="hours")
axis.POSIXct(1, x=time, at = ihours, format = "%a %m-%d %H:%M")
abline(v= as.POSIXct(ihours) ,lwd=0.2,col="gray")
abline(h=seq(0,pm25max,by=10),lwd=0.2,col="gray")
#lines(pm25reftime,pm25ref,col="black",t="l",lwd=1.5)
lines(timerecal_pm,pm25recal,xlim=c(as.POSIXct(tstart),as.POSIXct(tstop)),col="red",t="l",lwd=0.3,lty=1)
legend("topright",c("OPC004"),col=c("black"),lty=1,lwd=0.5)
legend("topleft",id,col="red",lty=1,lwd=0.3)
dev.off()

}

}
message("")
message("finished!")
