library(lubridate)

###################################################################
# SELECT TIME PERIOD
# seconds for  2 days
daystep <- 60*60*24
step <- daystep*7
args <- commandArgs(trailingOnly = TRUE)
message("ARGUMENTS = ",length(args))
if (length(args)==0) {
  t2 <- Sys.time()
  t2 <- round_date(as.POSIXct(t2),"5 minutes")
  t1 <- t2 - step
  #"args:" length(args)
} else {
  t1 <- as.POSIXct(args[1])
  t2 <- as.POSIXct(args[2])
}

# SET TIME PERIOD HERE:
#t1 <- as.POSIXct("2022-11-22 00:00:00 CET")
#t2 <- as.POSIXct("2021-11-13 00:00:00 CET")
t1 <- as.POSIXct("2023-05-23 14:00:00 UTC")
#t2 <- as.POSIXlt(Sys.time(),tz="UTC")
t2 <- as.POSIXct("2023-08-22 06:00:00 UTC")
t2 <- round_date(as.POSIXct(t2),"5 minutes")

t1 <- as.POSIXct(format(t1,"%Y-%m-%d %H:%M:00 UTC"))
t2 <- as.POSIXct(format(t2,"%Y-%m-%d %H:%M:00 UTC"))


message("PERIOD = ",t1," ",t2)
runningdate <- seq.Date( from=as.Date(t1), to=as.Date(t2), by="day")

aggregation <- "60 sec"
aggregation <- "1 min"
aggregation <- "5 min"



###################################################################
# GET MATCHING FILE NAMES FOR SELECTED DAYS
flist <- NULL
for (i in seq_along(runningdate)) {
  name1 <- runningdate[i]
  #name1 <- "2021-11-03"
  #name1 <- format(t1,"%Y-%m-%d")
  #name2 <- "OPC"
  #name3 <- runningdate[3]
  name2 <- "177410813"
  name3 <- ""
  url <- paste("https://saqn.geo.uni-augsburg.de/asopc.cgi?name1=",name1,"&name2=",name2,"&name3=",name3,sep="")
  message("URL = ",url)
  flist_tmp <- try(read.table(url,stringsAsFactors=F))
  if(inherits(flist_tmp, "try-error")){
    message("no files")
  }else{
    #message("FILE LIST = ",flist_tmp)
    flist <- rbind(flist,flist_tmp)
  }
}

###################################################################
# READ MATCHING FILES DATA IN OBJECT asopc
opcid <- ""
for (i in seq_along(flist$V1)) {
  #message("flist =",flist$V1[i]," ",i)
  opctype <- strsplit(flist$V1[i],"_",fixed=T)[[1]][3]
  opcid <- strsplit(flist$V1[i],"_",fixed=T)[[1]][4]
  shttype <- strsplit(flist$V1[i],"_",fixed=T)[[1]][5]
  shtid <- strsplit(flist$V1[i],"_",fixed=T)[[1]][6]
  shtid <- gsub(".txt", "", shtid)
  #fname <- paste("https://saqn.geo.uni-augsburg.de/asopc/",substr(flist$V1[i],2,9),"/",flist$V1[i],sep="")
  fname <- paste("https://saqn.geo.uni-augsburg.de/asopc/",flist$V1[i],sep="")
  message(opcid," FILE ADRESS = ",fname)
  if (i == 1) {
    asopc <- read.table(fname,header=T,stringsAsFactors=F)
    asopc$opctype <- opctype
    asopc$opcid <- opcid
    asopc$shttype <- shttype
    asopc$shtid <- shtid
    if(grepl( "OPC-N2", fname, fixed = TRUE)){
      asopc$Bin_16 <- NA
      asopc$Bin_17 <- NA
      asopc$Bin_18 <- NA
      asopc$Bin_19 <- NA
      asopc$Bin_20 <- NA
      asopc$Bin_21 <- NA
      asopc$Bin_22 <- NA
      asopc$Bin_23 <- NA
      asopc$Fan_rev_count <- NA
      asopc$Laser_status <- NA
      asopc$PM_1 <- asopc$PM1
      asopc <- subset( asopc, select = -PM1 )
      asopc$PM_10 <- asopc$PM10
      asopc <- subset( asopc, select = -PM10 )
      asopc$PM_2.5 <- asopc$PM2.5
      asopc <- subset( asopc, select = -PM2.5 )
      asopc <- subset( asopc, select = -Pressure )
      asopc$Reject_count_Glitch  <- NA
      asopc$Reject_count_LongTOF <- NA
      asopc$Reject_Count_OutOfRange <- NA
      asopc$Reject_count_Ratio <- NA
      asopc$Relative_humidity <- NA
      asopc$Sample_Flow_Rate <- NA
      asopc$Sampling_Period <- NA
      asopc <- subset( asopc, select = -SFR )
    }
   }
  else {
    asopc_temp <- read.table(fname,header=T,stringsAsFactors=F)
    asopc_temp$opctype <- opctype
    asopc_temp$opcid <- opcid
    asopc_temp$shttype <- shttype
    asopc_temp$shtid <- shtid
    if(grepl( "OPC-N2", fname, fixed = TRUE)){
      asopc_temp$Bin_16 <- NA
      asopc_temp$Bin_17 <- NA
      asopc_temp$Bin_18 <- NA
      asopc_temp$Bin_19 <- NA
      asopc_temp$Bin_20 <- NA
      asopc_temp$Bin_21 <- NA
      asopc_temp$Bin_22 <- NA
      asopc_temp$Bin_23 <- NA
      asopc_temp$Fan_rev_count <- NA
      asopc_temp$Laser_status <- NA
      asopc_temp$PM_1 <- asopc_temp$PM1
      asopc_temp <- subset( asopc_temp, select = -PM1 )
      asopc_temp$PM_10 <- asopc_temp$PM10
      asopc_temp <- subset( asopc_temp, select = -PM10 )
      asopc_temp$PM_2.5 <- asopc_temp$PM2.5
      asopc_temp <- subset( asopc_temp, select = -PM2.5 )
      asopc_temp <- subset( asopc_temp, select = -Pressure )
      asopc_temp$Reject_count_Glitch  <- NA
      asopc_temp$Reject_count_LongTOF <- NA
      asopc_temp$Reject_Count_OutOfRange <- NA
      asopc_temp$Reject_count_Ratio <- NA
      asopc_temp$Relative_humidity <- NA
      asopc_temp$Sample_Flow_Rate <- NA
      asopc_temp$Sampling_Period <- NA
      asopc_temp <- subset( asopc_temp, select = -SFR )

      #asopc_temp[as.character(asopc_temp)=="nan"] <- NA
      asopc_temp[asopc_temp=="nan"] <- NA
    }
    #message(str(asopc_temp))
    asopc <- rbind(asopc,asopc_temp)
  }
}
asopc$time <- as.POSIXct(asopc$time_UTC,format="%Y-%m-%dT%H:%M:%S")
#asopc[as.character(asopc)=="nan"] <- NA
message("reading done!")

###################################################################
# CREATE DATA FRAME OF aggregation INTERVALL TIME STEPS
#secs <- seq.POSIXt(from=as.POSIXct(t1),to=as.POSIXct(t2),by="sec")
#secs <- data.frame( time=seq.POSIXt(from=as.POSIXct(t1),to=as.POSIXct(t2),by=aggregation))
secs <- data.frame( time=seq(from=as.POSIXct(t1),to=as.POSIXct(t2),by=aggregation))
aggpm10 <- secs
aggpm25 <- secs
aggtemp <- secs
aggrhum <- secs
ids <- sort(unique(asopc$opcid))
for (i in seq_along(ids)) {
   message(" ")
   message("id =",ids[i])

   idserpm10 <- asopc$PM_10[asopc$opcid==ids[i]]
   idserpm25 <- asopc$PM_2.5[asopc$opcid==ids[i]]
   idsertemp <- asopc$SHT_Temp[asopc$opcid==ids[i]]
   idserrhum <- asopc$SHT_Rhum[asopc$opcid==ids[i]]

   if ( length(idserpm10) == 0 ){
     message("skip!")
     next }
   #idser[1] <- NA
   #idtime <- c(as.POSIXct(t1),asopc$time[asopc$opcid==ids[i]])
   idtime <-  as.POSIXct(asopc$time[asopc$opcid==ids[i]])
   tmppm10 <- data.frame(time=idtime,pm10=idserpm10)
   tmppm25 <- data.frame(time=idtime,pm25=idserpm25)
   tmptemp <- data.frame(time=idtime,temp=idsertemp)
   tmprhum <- data.frame(time=idtime,rhum=idserrhum)
#message(summary(tmptemp,narm=T))

   #tmp$cuts <- cut( idtime,breaks=aggregation)
   #cuts <- cut( idtime,breaks=secs$time)
   #cuts <- cut( idtime1,breaks=secstime1,labels=secs$time)

   secstime1 <- as.numeric(secs$time)
   idtime1 <- as.numeric(idtime)
   cuts <- cut( idtime1,breaks=secstime1,labels=format(secs$time[1:length(secstime1)-1],"%Y-%m-%d %H:%M:%S UTC"))

   if ( all(is.na(cuts)) ) {
     message("skipping!")
     aggpm10$var <- as.numeric(NA)
     names(aggpm10)[length(names(aggpm10))] <- paste0("id",ids[i])
     aggpm25$var <- as.numeric(NA)
     names(aggpm25)[length(names(aggpm25))] <- paste0("id",ids[i])
     aggtemp$var <- as.numeric(NA)
     names(aggtemp)[length(names(aggtemp))] <- paste0("id",ids[i])
     aggrhum$var <- as.numeric(NA)
     names(aggrhum)[length(names(aggrhum))] <- paste0("id",ids[i])
     next
   }
   tmppm10$cuts <- cuts
   tmppm25$cuts <- cuts
   tmptemp$cuts <- cuts
   tmprhum$cuts <- cuts

message("pm10")

   if( all(is.na(tmppm10$pm10)) ) {
    aggpm10$pm10 <- as.numeric(NA)
    names(aggpm10)[length(names(aggpm10))] <- paste0("id",ids[i])
   } else {
   agg <- aggregate(. ~ cuts, tmppm10, mean)
   # "%Y-%m-%d %H:%M:00"
   #agg$time <- as.POSIXct(agg$cuts,format="%Y-%m-%d H%:%M:%S UTC")
   #agg$time <- as.POSIXct( format(agg$cuts,"%Y-%m-%d H%:%M:%S UTC"))
   #agg$time <- as.POSIXct( as.numeric(agg$cuts),origin="1970-01-01")
   agg$time <- as.POSIXct(agg$cuts)
   reg <- merge(secs,agg,by="time",all.x=T)
   aggpm10$pm10 <- as.numeric(reg$pm10)
   names(aggpm10)[length(names(aggpm10))] <- paste0("id",ids[i])
   }

message("pm25")
   if( all(is.na(tmppm25$pm25)) ) {
    aggpm25$pm10 <- as.numeric(NA)
    names(aggpm25)[length(names(aggpm25))] <- paste0("id",ids[i])
   } else {
   agg <- aggregate(. ~ cuts, tmppm25, mean)
   agg$time <- as.POSIXct(agg$cuts)
   reg <- merge(secs,agg,by="time",all.x=T)
   aggpm25$pm25 <- as.numeric(reg$pm25)
   names(aggpm25)[length(names(aggpm25))] <- paste0("id",ids[i])
   }

message("temp")
   if( all(is.na(tmptemp$temp)) ) {
    aggtemp$temp <- as.numeric(NA)
    names(aggtemp)[length(names(aggtemp))] <- paste0("id",ids[i])
   } else {
#message("summary:")
#message(summary(tmptemp$temp,na.rm=T))
   agg <- aggregate(. ~ cuts, tmptemp, mean)
   agg$time <- as.POSIXct(agg$cuts)
   reg <- merge(secs,agg,by="time",all.x=T)
   aggtemp$temp <- as.numeric(reg$temp)
   names(aggtemp)[length(names(aggtemp))] <- paste0("id",ids[i])
   }
#message(summary(aggtemp[,length(names(aggtemp))],na.rm=T))

message("rhum")
   if( all(is.na(tmprhum$rhum)) ) {
    aggrhum$rhum <- as.numeric(NA)
    names(aggrhum)[length(names(aggrhum))] <- paste0("id",ids[i])
   } else {
   agg <- aggregate(. ~ cuts, tmprhum, mean)
   agg$time <- as.POSIXct(agg$cuts)
   reg <- merge(secs,agg,by="time",all.x=T)
   aggrhum$rhum <- as.numeric(reg$rhum)
   names(aggrhum)[length(names(aggrhum))] <- paste0("id",ids[i])
   }


}
message("aggregation done!")

asopcaggpm10 <- aggpm10
asopcaggpm25 <- aggpm25
asopcaggtemp <- aggtemp
asopcaggrhum <- aggrhum
save(asopcaggpm10,asopcaggpm25,asopcaggtemp,asopcaggrhum,file="asopc.Rdata")

