########################################################################
########################################################################
##                                                                    ##        
## Spatially shifting temporal points: estimating pooled              ##
## within-time-series variograms for scarce hydrological data         ##
##                                                                    ##
## Avit Kumar Bhowmik                                                 ##
## Quantitative Landscape Ecology,                                    ## 
## Institute for Environmental Sciences                               ##
## University of Koblenz-Landau                                       ##
## Fortstra?e 7, 76829 Landau in der Pfalz, Germany                   ##
##                                                                    ##
## Pedro Cabral                                                       ##
## NOVA IMS, Universidade Nova de Lisboa                              ## 
## 1070-312 Lisboa, Portugal                                          ## 
##                                                                    ##
##                                                                    ##
## Corresponding Author:                                              ##
## Avit Kumar Bhowmik                                                 ##
## E-mail: bhowmik@uni-landau.de                                      ##
## Telephone: +49 6341 280-31331                                      ##
## Fax: +49 6341 280-31326                                            ##
##                                                                    ##
########################################################################
########################################################################
##                  Supplementary material (SM) 2                     ##
########################################################################
########################################################################
##                                                                    ##
##                               SSTP                                 ##
##                                                                    ##
##        Estimation of pooled within-time series variograms          ##
##                               with                                 ##
##                 spatially shifting temporal points                 ##
##                                                                    ##
########################################################################
########################################################################

## The free open source software package “R” is required with three packages “spacetime”,
## “intamap” and “gstat” to perform SSTP (see references in the paper for documentations).
## R software package can be downloaded from http://www.r-project.org/. Once the software is
## downloaded and installed (please follow the installation guide provided on the website) the
## mentioned packages can be installed by executing the following codes in the command console:

install.packages("spacetime")
install.packages("intamap")
install.packages("gstat")

# The users may need to select the geographic region of interest to install the packages.
# The installed packages need to be initiated in R environment by the following commands:

library(spacetime)
library(intamap)
library(gstat)

# A directory needs to be set as the working directory of R. The data “SM3_Bhowmik_Cabral.Rdata”,
# which is provided as a supplimentary material and is also available from the online repository:
# https://github.com/AvitBhowmik/SSTP, needs to be copied in the directory and loaded in the R
# environment. For example, if a folder called “Sample_Data” in the directory “C:\” (Windows OS)
# contains the data, the working directory can be set and the data can be loaded in R and checked
# for details by executing the following codes:

setwd("C:/Sample_Data")

# Note for Windows OS users: if you copy the code on a Windows OS, please change the slash “\” to
# back slash “/” while setting the working directory. 

# Linux example
# setwd("/media/storage/projects/SSTP/Data/")
# Mac OS X example
# setwd("/Users/<username>/projects/SSTP/Data/")

# Loading data into the R environment
load("Sample_data.Rdata")

# Check data componenets
str(Data_SSTP)

# Data_SSTP is the “spacetimedataframe” object where the spatial, temporal and attribute information
# are stored. “spPoints” are the spatial coordinates with WGS84 reference, “timepoints” are the
# temporal steps (years) between 1993-2007 and the “dataObj” is the attribute (PRCPTOT) at each
# spPoint and each timePoint. Two separation distances need to be defined for spatial shifting of the
# temporal steps. As described in the paper, the separations distance between two shifted temporal
# points is set to 1111km ≈ 10 decimal degree as following:
  
sepDist <- 10

# To calculate the smallest- and largest-spatial-lags for a year or within a time series, see the
# "spDists" function. Now the arbitrary spatial coordinates need to be created to shift the temporal
# data. Since 15 years of PRCPTOT data (1993-2007) need to be shifted and assigned to different spatial
# coordinate clusters, 15 spatial coordinate clusters will be created. The spatial coordinates can
# be obtained from Data_SSTP and new coordinates clusters can be created by executing the following codes:

allCoords <- Data_SSTP@sp@coords
allCoords <- rbind(allCoords,Data_SSTP@sp@coords+cbind(rep(sepDist,32),rep(0,32)))
allCoords <- rbind(allCoords,Data_SSTP@sp@coords+cbind(rep(0,32),rep(sepDist,32)))
allCoords <- rbind(allCoords,Data_SSTP@sp@coords+cbind(rep(-sepDist,32),rep(0,32)))
allCoords <- rbind(allCoords,Data_SSTP@sp@coords+cbind(rep(0,32),rep(-sepDist,32)))
allCoords <- rbind(allCoords,Data_SSTP@sp@coords+cbind(rep(sepDist,32),rep(sepDist,32)))
allCoords <- rbind(allCoords,Data_SSTP@sp@coords+cbind(rep(-sepDist,32),rep(sepDist,32)))
allCoords <- rbind(allCoords,Data_SSTP@sp@coords+cbind(rep(sepDist,32),rep(-sepDist,32)))
allCoords <- rbind(allCoords,Data_SSTP@sp@coords+cbind(rep(-sepDist,32),rep(-sepDist,32)))
allCoords <- rbind(allCoords,Data_SSTP@sp@coords+cbind(rep(0,32),rep(2*sepDist,32)))
allCoords <- rbind(allCoords,Data_SSTP@sp@coords+cbind(rep(0,32),rep(-2*sepDist,32)))
allCoords <- rbind(allCoords,Data_SSTP@sp@coords+cbind(rep(sepDist,32),rep(2*sepDist,32)))
allCoords <- rbind(allCoords,Data_SSTP@sp@coords+cbind(rep(-sepDist,32),rep(2*sepDist,32)))
allCoords <- rbind(allCoords,Data_SSTP@sp@coords+cbind(rep(sepDist,32),rep(-2*sepDist,32)))
allCoords <- rbind(allCoords,Data_SSTP@sp@coords+cbind(rep(-sepDist,32),rep(-2*sepDist,32)))

# Now the PRCPTOT data from 1993-2007 need to be assigned to the 15 spatial coordinates clusters
# and finally a single “SpatialPointsDataFrame” object can be created with the shifted temporal
# data using the following codes. To give the arbitrary coordinates an unique id, their rownames are
# defined as the rownames of the shifted PRCPTOT data. The year (time step) information will also be
# stored. 

allData <- Data_SSTP[,1:15]@data
allData$Years <- rep(1993:2007, each=32)
rownames(allCoords) <- rownames(allData)
allSpPoints <- SpatialPointsDataFrame(allCoords,allData,proj4string=CRS(proj4string(Data_SSTP)))

# The output object can be plotted to check the temporally observed PRCPTOT values at the shifted
# spatial points, which now corresponds to the temporal points.

spplot(allSpPoints["PRCPTOTWet"], col.regions=bpy.colors(), scales=list(draw=T), colorkey=T)

# For the further steps, the “NA” values need to be removed from the object:

allSpPoints <- allSpPoints[!is.na(allSpPoints@data[[1]]),]

# The next step is to check for anisotropy within the spatially shifted period. This can be done
# by executing the "estimateAnisotropy" function from the “intamap” package in R. Please consult the
# package vignettes for the methodology of anisotropy estimation.

Set  <- 1:15
spacetimedata <- lapply(Set, function(i) {x = Data_SSTP[,i]; x$years = i+1992; x})
spacedata <- do.call(rbind, spacetimedata)
params=NULL
estimateAnisotropy(spacedata[!is.na(spacedata$PRCPTOTWet),], PRCPTOTWet, PRCPTOTWet~1)

# The output “ratio” value is the ratio of the minor and major axis of the ellipse (2.133343) and
# direction is the angle of the anisotropic axis from the normal east to the clockwise directions
# (41.90442). This ratio of the minor and major axis of the ellipse requires transformation before
# input in “gstat” functions as it takes ratio between major and minor axes (A:B) = 1 / 2.133343
# = 0.47 as input. The anisotropy angle remains the same because alpha = anisotropy angle (Ø) from
# normal North to anti-clockwise direction = alternate interior angle of the angle of the anisotropic
# axis from the normal east to the clockwise directions = 41.90. See package vignettes for details.

# The next step is to compute the pooled empirical variogram (semivariances) applying the "variogram"
# function of the “gstat” package. While computing the semivariances, the smallest and largest
# spatial-lags are controlled with the "width" and "cutoff" arguments, respectively as described in
# the paper. The anisotropy parameters are also implemeneted. Finally, the computed empirical
# variogram can be plotted and the initial variogram model parameters can be guessed (see the paper
# for details). In a final step, the variogram will be estimated by fitting the variogram model using
# "fit.variogram" function to the empirical variogram. Users may try fitting other variogram models
# and parameters.

PRCPTOTWet.dir.SSTP = variogram(PRCPTOTWet ~ 1, allSpPoints, alpha=41.90442, width=27.51, cutoff=550)
plot(PRCPTOTWet.dir.SSTP)
PRCPTOTWetdir.model.SSTP = vgm(psill=5, model="Pow", range=1.93, nugget=185800,
                               anis=c(41.90442, 0.4687479))
PRCPTOTWetdir.fit.SSTP = fit.variogram(PRCPTOTWet.dir.SSTP, PRCPTOTWetdir.model.SSTP, fit.sills = TRUE,
                                       fit.ranges = TRUE, fit.method = 7)
plot(PRCPTOTWet.dir.SSTP, PRCPTOTWetdir.fit.SSTP)

## To enable comparison of the computed semivariances and estimated variogram by our SSTP method with
## the avaiable method, we provide codes for computing semivariances and estimating variograms by
## "averaging empirical variograms (AEV)" and weighted AEV method below. For details on the AEV and
## weighted AEV variogram estimation methods, please refer to the paper, cited literature and
## package vignettes.

########################################################################
########################################################################
##                                                                    ##
##                                AEV                                 ##
##                                                                    ##
##        Estimation of pooled within-time series variograms          ##
##                               with                                 ##
##                   Averaging empirical variograms                   ##
##                                                                    ##
########################################################################
########################################################################

PRCPTOTWet.dir.AEV = variogram(object=PRCPTOTWet ~ years, data=spacedata[!is.na(spacedata$PRCPTOTWet),],
                               dX=0, alpha=41.90442, width=27.51, cutoff=550)
plot(PRCPTOTWet.dir.AEV)
PRCPTOTWetdir.model.AEV = vgm(psill=3.398, model="Pow", range=1.86, nugget=202100,
                              anis=c(41.90442, 0.4687479))
PRCPTOTWetdir.fit.AEV = fit.variogram(PRCPTOTWet.dir.AEV, PRCPTOTWetdir.model.AEV, fit.sills = TRUE,
                                      fit.ranges = TRUE, fit.method = 7)
plot(PRCPTOTWet.dir.AEV, PRCPTOTWetdir.fit.AEV)

########################################################################
########################################################################
##                                                                    ##
##                           Weighted AEV                             ##
##                                                                    ##
##        Estimation of pooled within-time series variograms          ##
##                               with                                 ##
##            Weighted averaging of empirical variograms              ##
##                                                                    ##
########################################################################
########################################################################

## Since no function is avaiable for direct estimation of pooled variograms by weighted AEV, we will
## follow the steps below as per the method described in the paper and cited literature.

## First semivariances for individual time steps will be computed.

PRCPTOTWet.dir.WAEV <- list()
for(i in 1993:2007){
  PRCPTOTWet.dir.WAEV[[as.character(i)]] = variogram(object=PRCPTOTWet ~ 1,
                                                     data=spacedata[which(spacedata$years==i &
                                                                            spacedata$PRCPTOTWet!="NA"),],
                                                     alpha=41.90442, width=27.51, cutoff=550)
}

## Computed semivariances from individual time steps will be pooled and binned for different
# spatial-lags avaiable across the time-steps.

PRCPTOT.WAEV<- do.call(rbind, PRCPTOTWet.dir.WAEV)
PRCPTOT.WAEV$bin <- as.numeric(cut(PRCPTOT.WAEV$dist, breaks=c(0, PRCPTOTWet.dir.AEV$dist, 550),
                                   labels=1:21))

## For each spatial lag, a weighted mean of the binned semivariances will be computed. The number of
## total point pairs from all time steps used for comparisons is also computed.

PRCPTOT.wt.WAEV <- data.frame()
for(i in sort(unique(PRCPTOT.WAEV$bin))){
  PRCPTOT.wt.WAEV[i,"np"] <- sum(PRCPTOT.WAEV[which(PRCPTOT.WAEV$bin==i),"np"])
  PRCPTOT.wt.WAEV[i,"dist"] <- max(PRCPTOT.WAEV[which(PRCPTOT.WAEV$bin==i),"dist"])
  PRCPTOT.wt.WAEV[i,"gamma"] <- weighted.mean(PRCPTOT.WAEV[which(PRCPTOT.WAEV$bin==i),"gamma"],
                                              PRCPTOT.WAEV[which(PRCPTOT.WAEV$bin==i),"np"])
  PRCPTOT.wt.WAEV[i,"dir.hor"] <- mean(PRCPTOT.WAEV[which(PRCPTOT.WAEV$bin==i),"dir.hor"])
  PRCPTOT.wt.WAEV[i,"dir.ver"] <- mean(PRCPTOT.WAEV[which(PRCPTOT.WAEV$bin==i),"dir.ver"])
  PRCPTOT.wt.WAEV[i,"id"] <- factor("var1")
}

## To avoid problems with creating a variogram object, we will replace the parameter values of an
## existing variogram object with the computed parameter values above, e.g. mean semivariances will
# be replaced by weighted mean semivariances.

PRCPTOT.wgt.WAEV <- PRCPTOTWet.dir.AEV
PRCPTOT.wgt.WAEV$np <- PRCPTOT.wt.WAEV$np
PRCPTOT.wgt.WAEV$dist <- PRCPTOT.wt.WAEV$dist
PRCPTOT.wgt.WAEV$gamma <- PRCPTOT.wt.WAEV$gamma
PRCPTOT.wgt.WAEV$dir.hor <- PRCPTOT.wt.WAEV$dir.hor
PRCPTOT.wgt.WAEV$dir.ver <- PRCPTOT.wt.WAEV$dir.ver
PRCPTOT.wgt.WAEV$id <- PRCPTOT.wt.WAEV$id

## The computed weighted empirical variogram can be plotted and a variogram model can be fitted.

plot(PRCPTOT.wgt.WAEV)
PRCPTOT.wgt.WAEV.model =  vgm(psill=40, model="Pow", range=1.6, nugget=202100,
                              anis=c(41.90442, 0.4687479))
PRCPTOTWetdir.fit.WAEV = fit.variogram(PRCPTOTWet.dir.AEV, PRCPTOTWetdir.model.AEV, fit.sills = TRUE,
                                      fit.ranges = TRUE, fit.method = 7)
plot(PRCPTOT.wgt.WAEV, PRCPTOTWetdir.fit.WAEV)


########################################################################
##                               End                                  ##
########################################################################