#####################################################################################################
#Main function
#####################################################################################################



TopREML=function(Vtrain,VNew=NULL,frm=as.formula(val~trend),SpaMod='exp',optmeth='BFGS',Aweight=T){
  out=VNew
  lhscol=all.vars(frm)[1]

  library(corpcor)# to calculate matrix inverse
  library(rgeos)
  Vtrain$Acum=sapply(Vtrain@polygons, function(x) x@area)
  if(!is.null(VNew)) VNew$Acum=sapply(VNew@polygons, function(x) x@area)
  
  
  #Initial Checks  
  if(is.null(VNew)) warning('New locations not provided. I will only return the estimated parameters: no predictions!')
  if(class(Vtrain)[1]!="SpatialPolygonsDataFrame") stop('TopREML requires SpatialPolygonDataFrames inputs. Shapefiles can be imported using rgdal:readOGR. Use the REML function to regionalize non topological data.')
  if(!(lhscol %in% names(Vtrain))) stop('Column name of the variable to regionalized not found. Provide correct lhscol as argument')
  if(!is.null(VNew)){
    VNew$tmp=1
    x=try(model.matrix(update(frm, tmp~.),as.data.frame(VNew)))
    if(class(x)=='try-error'){
      warning('Some of the trend column(s) not found. No trends are considered.')
      frm=as.formula(~1)
      x=model.matrix(frm,as.data.frame(VNew))
    }
    r=summary(lm(as.formula(paste(lhscol,'~Acum')),Vtrain))$r.squared
    if(r>0.85) warning(paste(round(100*r,digits=1)),' % of lhs variance explained by watershed area. Check that the lhs parameters was correctly normalized by basin area')
}
  
  #Get area weights and topology from the spatialpolygondataframe
  Uo=getU(Vtrain)
  if(!is.null(VNew)) {
    Uup=getUup(Vtrain,VNew)
    Ud=getUd(Vtrain,VNew)
    Uext0=rbind(cbind(Uo,Ud),cbind(Uup,diag(nrow(VNew))))
  }
  DFtrain=try(getLocBasDF(Vtrain,Uo))
  if(class(DFtrain)=='try-error') DFtrain=getLocBasDFRobust(Vtrain,Uo)
  if(!is.null(VNew)) {
    DFNew=try(getLocBasDFNew(Vtrain, VNew,Uup))
    if(class(DFNew)=='try-error') DFNew=getLocBasDFNewRobust(Vtrain, VNew,Uup)
  }
  if(!Aweight){
    DFNew$A<-DFNew$Acum<-1
    DFtrain$A<-DFtrain$Acum<-1
  }
  
  
  # ESTIMATE PARAMETERS
  parReml=REML.est(DFtrain,Uo,SpaMod,frm,lhscol,optmeth)
  if(is.null(VNew)){
    names(parReml)=c('tau','u','G','Sigma2','Range','Xi','K.inv')
    
    return(parReml) #returns estimated parameters list(tau,u,G,sigma2,rge,C)
  }
  
  # PREDICT
  tau=parReml[[1]]
  u=parReml[[2]]
  G=parReml[[3]]
  sigma2=parReml[[4]]
  rge=parReml[[5]]
  mu=parReml[[6]]
  Cinv=parReml[[7]]
  invG=pseudoinverse(G)
  Uext=Uext0/matrix(c(DFtrain$Acum,DFNew$Acum),nrow=nrow(DFtrain)+nrow(DFNew),ncol=nrow(DFtrain)+nrow(DFNew))
  Abas=c(DFtrain$A,DFNew$A)
  Aext=Abas%*%t(Abas)
  Dext=getD(rbind(DFtrain[,c('x','y')],DFNew[,c('x','y')]))
  Corext=getCor(Dext,SpaMod,rge)

  g=(Uext%*%(Aext*Corext)%*%t(Uext))
  g=matrix((g[(nrow(DFtrain)+1):(nrow(DFtrain)+nrow(DFNew)),1:nrow(DFtrain)]),nrow=nrow(DFNew))
  
  Cinv22=Cinv[((length(tau)+1):nrow(Cinv)),((length(tau)+1):ncol(Cinv))]
  Cinv12=Cinv[1:length(tau),((length(tau)+1):ncol(Cinv))]
  Cinv11=Cinv[1:length(tau),1:length(tau)]
  var<-val<-NULL
  x=matrix(as.vector(x),nrow=nrow(DFNew))
  for(i in 1:nrow(VNew)){
    xi=as.vector(x[i,])
    gi=as.vector(g[i,])
    val=c(val,as.vector(xi%*%as.matrix(tau)+(gi)%*%invG%*%u))
    var=c(var,as.vector(sigma2*t(xi)%*%Cinv11%*%(xi)+sigma2*t(gi)%*%t(invG)%*%Cinv22%*%(invG)%*%gi+2*sigma2*t(xi)%*%Cinv12%*%t(invG)%*%gi))
  }
out$pred.val=val
out$pred.var=var

  return(out)
}


    

#####################################################################################################
#Estimation of model parameters
#####################################################################################################
REML.est=function(DF,Uo,SpaMod='exp',trendcols=as.formula(~trend),lhscol='val',optmeth=NULL){
  
  ################################
  #SpaMod=c('exp','sph')
  #################################

  library(rgeos) #to do vector polygon operation
  library(corpcor)# to calculate pseudo inverse
  X=(model.matrix(trendcols,DF))
  D=getD(DF)
  z=as.matrix(DF[,lhscol])   
  U=Uo/matrix(DF$Acum,nrow = nrow(Uo),ncol = ncol(Uo))
  A=DF$A%*%t(DF$A)
  par0=c(var(lm(trendcols,DF)$resid)/2,mean(D),1)
parsc=c(1,100,0.1)
  i=0
  repeat{
    i=i+1
    optm=try(optim(par0,RL,X=X,z=z,D=D,U=U,A=A,SpaMod=SpaMod,control=list(fnscale=-1,trace=0,parscale=parsc),method=optmeth))
    if(class(optm)!='try-error'|i==5) break
  }
  pars=(abs(optm$par))
  rge=pars[2]
  sigma2=pars[1]  
  mu=pars[3]
  Cor=getCor(D,SpaMod,rge)
  G=U%*%(A*Cor)%*%t(U)
  C=rbind(cbind(t(X)%*%X,t(X)),cbind(X,diag(nrow(X))+1/mu*pseudoinverse(G)))
  C=as.matrix(forceSymmetric(C)) #Remove numerical inaccuracies
  Cinv=pseudoinverse(C)
  estim=Cinv%*%c(t(X)%*%z,diag(length(z))%*%z)
  tau=estim[1:ncol(X)]
  u=estim[(ncol(X)+1):length(estim)]
  return(list(tau,u,G,sigma2,rge,mu,Cinv))
}

#####################################################################################################
#Restricted likelihood function
#####################################################################################################
RL=function(pars,X,z,D,U,A,SpaMod){
  #Corbeil 1976: RL form
  # Lark 2005: N random effects groups
  #sigma2,psy,a: optimization params. If topREML psy0 must be large enough
  #D distance matrix
  #U topological matrix (upstream basins)
  #A area matrix
  #X  fixed effects matrix
  #z  lhs observation vectors
  #Z=identidy if only one random effects... can think of applying two random effects: topo and distance. But this will require applying #see eqn 13 Corbeil and Searle. Try deriving analytically by a, sigma2 and psy for the exponential case...
  ###########################################
  
  library(Matrix)
  sigma2=abs(pars[1])
  rge=abs(pars[2])
  mu=abs(pars[3])
  Z=diag(nrow(X))
  Cor=getCor(D,SpaMod,rge)
  
  G=U%*%(A*Cor)%*%t(U)
  G=as.matrix(forceSymmetric(G))
  H=mu*G+diag(nrow(Z))
  H=as.matrix(forceSymmetric(H)) 
  
  C=rbind(cbind(t(X)%*%X,t(X)%*%Z),cbind(t(Z)%*%X,t(Z)%*%Z+1/mu*pseudoinverse(G)))

  
  W=cbind(X,Z)
  n=nrow(X)
  p=ncol(X)
  C=as.matrix(forceSymmetric(C)) 
  invC=pseudoinverse(C)
  
  P=diag(nrow(W))-W%*%pseudoinverse(C)%*%t(W)
  l=-0.5*(log(det(H))+log(det(t(X)%*%pseudoinverse(H)%*%X))+log(sigma2)*(n-p)+1/sigma2*t(z)%*%P%*%z)  
  return(l)
}


#####################################################################################################
#Spatial Correlation functions
#####################################################################################################



getCor=function(D,SpaMod,rge){
  Exp=function(D,rge) return(exp(-D/rge))
  Sph=function(D,rge) return(ifelse(D<rge,1-3*D/(2*rge)+0.5*(D/rge)^3,0))
  if(SpaMod=='exp') return(Exp(D,rge))
  if(SpaMod=='sph') return(Sph(D,rge))
  stop('unknown variogram model')
}


getD=function(DF){
  library(flexclust)
  return(dist2(DF[,c('x','y')],DF[,c('x','y')]))
}



#####################################################################################################
#Auxilliary functions to extract network topology from nested catchment polygons.
#####################################################################################################

getU=function(vct){
  vct$Acum=sapply(vct@polygons, function(x) x@area)
  pts=as.data.frame(vct)
  coordinates(pts)=coordinates(vct)
  pts@proj4string=(vct@proj4string)
  U=matrix(NA,nrow=nrow(vct),ncol=nrow(vct))
  for(i in 1:nrow(vct)) U[i,]=vct$Acum<vct$Acum[i]&!is.na(over(pts,as(vct[i,], "SpatialPolygons"))) 
  U=U+diag(rep(1,nrow(U)))
  rownames(U) <- vct$SID
  colnames(U) <- vct$SID
  return(U)
}






getLocBasDFRobust=function(vct,U,npt=10000){ #npt is the number of point on the average sized polygon.
  print('Bad shapefile topology. Impementing alternate robust method: A,x and y are approximated.')
  DF=vct@data
  c=coordinates(vct)
  DF$x=c[,1]
  DF$y=c[,2]
  DF$A=DF$Acum
  for(i in 1:nrow(vct)){
    #Use U to improve performance
    if(length(which(U[i,]==1))==1) next #This is himself
    upstr=vct[which(U[i,]==1),]
    n=npt*vct$Acum[i]/mean(vct$Acum)
    pts=spsample(vct[i,],n,type='regular')
    for(j in 1:nrow(upstr)){
      if(vct$Acum[i]==upstr$Acum[j]) next #This is himself
      pts=pts[is.na(over(pts,as(upstr[j,], "SpatialPolygons")))] #1 if point of col was found in row polygon
    }
    
    #coords
    coor=colMeans(pts@coords) 
    DF$x[i]=coor[1]
    DF$y[i]=coor[2]
    #area
    DF$A[i]=DF$Acum[i]*length(pts)/n
  }  
  if(median(DF$A)>10000 | median(DF$Acum)>100000){
    warning('Original coordinate system is in meters. Switching to km.')
    DF$A=DF$A/1e6
    DF$Acum=DF$Acum/1e6
    DF$x=DF$x/1e3
    DF$y=DF$y/1e3
  }
  if(median(DF$A)<1 | median(DF$Acum)<5){
    warning('Original coordinate system is in latlong Switching to km.')
    DF$A=DF$A*1e4
    DF$Acum=DF$Acum*1e4
    DF$x=DF$x*1e2
    DF$y=DF$y*1e2
  }
  return(DF)
}




getUup=function(vctT,vctN){
  vctT$Acum=sapply(vctT@polygons, function(x) x@area)
  vctN$Acum=sapply(vctN@polygons, function(x) x@area)
  ptsT=as.data.frame(vctT)
  coordinates(ptsT)=coordinates(vctT)
  ptsT@proj4string=(vctT@proj4string)
  Uup=matrix(NA,nrow=nrow(vctN),ncol=nrow(vctT))
  for(i in 1:nrow(vctN)) Uup[i,]=vctT$Acum<vctN$Acum[i]&!is.na(over(ptsT,as(vctN[i,], "SpatialPolygons"))) 
  rownames(Uup) <- vctN$SID
  colnames(Uup) <- vctT$SID
  return(1*Uup)
}

getUd=function(vctT,vctN){
  vctT$Acum=sapply(vctT@polygons, function(x) x@area)
  vctN$Acum=sapply(vctN@polygons, function(x) x@area)
  ptsN=as.data.frame(vctN)
  coordinates(ptsN)=coordinates(vctN)
  ptsN@proj4string=(vctN@proj4string)  
  Ud=matrix(NA,nrow=nrow(vctT),ncol=nrow(vctN))
  for(i in 1:nrow(vctT)) Ud[i,]=vctN$Acum<vctT$Acum[i]&!is.na(over(ptsN,as(vctT[i,], "SpatialPolygons"))) 
  rownames(Ud) <- vctT$SID
  colnames(Ud) <- vctN$SID
  return(1*Ud)
}



getLocBasDF=function(vct,U,getvct=F){
  library(rgeos)
  vctN=vct
  for(i in 1:nrow(vct)){
    #Use U to improve performance
    if(length(which(U[i,]==1))==1) next #headbasin.
    upstr=vct[which((U-diag(rep(1,nrow(U))))[i,]==1),] #all basins upstream (excluding the considered basin)
    unio=upstr[1,]
    if(nrow(upstr)>1) for(j in 2:nrow(upstr)) unio=gUnion(unio,upstr[j,]) #merge all basins upstream
    dif=gDifference(vct[i,],unio)
    ar=sapply(dif@polygons[[1]]@Polygons,function(x) x@area) #remove the rubbish of small areas...
    dif@polygons[[1]]@Polygons=list(dif@polygons[[1]]@Polygons[[which(ar==max(ar))]])
    vctN@polygons[i]=dif@polygons[1]
  }
  vctN$xc=coordinates(vctN)[,1]
  vctN$yc=coordinates(vctN)[,2]
  vctN$A=sapply(vctN@polygons, function(x) x@area)
  if(getvct) return(vctN)
  DF=vctN@data
  if(median(DF$A)>10000 | median(DF$Acum)>100000){
    warning('Original coordinate system is in meters. Switching to km.')
    DF$A=DF$A/1e6
    DF$Acum=DF$Acum/1e6
    DF$xc=DF$xc/1e3
    DF$yc=DF$yc/1e3
  }
  
  if(median(DF$A)<1 | median(DF$Acum)<5){
    warning('Original coordinate system is in latlong Switching to km.')
    DF$A=DF$A*1e4
    DF$Acum=DF$Acum*1e4
    DF$xc=DF$xc*1e2
    DF$yc=DF$yc*1e2
  }
  names(DF)[which(names(DF) %in% c('xc','yc'))]=c('x','y')
  return(DF)
}


getLocBasDFNew=function(vctT,vctN,Uup){
  vctNN=vctN
  for(i in 1:nrow(vctN)){
    if(length(which(Uup[i,]==1))==0) next#headbasin.
    upstr=vctT[which(Uup[i,]==1),]
    unio=upstr[1,]
    if(nrow(upstr)>1) for(j in 2:nrow(upstr)) unio=gUnion(unio,upstr[j,]) #merge all basins upstream
    dif=gDifference(vctN[i,],unio)
    ar=sapply(dif@polygons[[1]]@Polygons,function(x) x@area) #remove the rubbish of small areas...
    dif@polygons[[1]]@Polygons=list(dif@polygons[[1]]@Polygons[[which(ar==max(ar))]])
    vctNN@polygons[i]=dif@polygons[1]
  }
  vctNN$xc=coordinates(vctNN)[,1]
  vctNN$yc=coordinates(vctNN)[,2]
  vctNN$A=sapply(vctNN@polygons, function(x) x@area)
  DF=vctNN@data
  if(median(DF$A)>10000 | median(DF$Acum)>100000){
    warning('Original coordinate system is in meters. Switching to km.')
    DF$A=DF$A/1e6
    DF$Acum=DF$Acum/1e6
    DF$xc=DF$xc/1e3
    DF$yc=DF$yc/1e3
  }
  if(median(DF$A)<1 | median(DF$Acum)<5){# | diff(range(DF$xc))<1){the distance criteria does not work if there is only one basin!!
    warning('Original coordinate system is in latlong Switching to km.')
    DF$A=DF$A*1e4
    DF$Acum=DF$Acum*1e4
    DF$xc=DF$xc*1e2
    DF$yc=DF$yc*1e2
  }
  names(DF)[which(names(DF) %in% c('xc','yc'))]=c('x','y')
  return(DF)
}



getLocBasDFNewRobust=function(vctT,vctN,Uup,npt=10000){ #npt is the number of point on the average sized polygon.
  print('Bad shapefile topology. Impementing alternate robust method: A,x and y are approximated.')
  DF=vctN@data
  c=coordinates(vctN)
  DF$x=c[1]
  DF$y=c[2]
  DF$A=DF$Acum
  for(i in 1:nrow(vctN)){
    #Use U to improve performance
    if(length(which(Uup[i,]==1))==0) next
    upstr=vctT[which(Uup[i,]==1),]
    n=npt*vctN$Acum[i]/mean(vctN$Acum)
    pts=spsample(vctN[i,],n,type='regular')
    
    #removes all points in upstr
    for(j in 1:nrow(upstr)){
      if(vctN$Acum[i]==upstr$Acum[j]) next #This is himself
      pts=pts[is.na(over(pts,as(upstr[j,], "SpatialPolygons")))] #1 if point of col was found in row polygon
    }
    #coords
    coor=colMeans(pts@coords) 
    DF$x[i]=coor[1]
    DF$y[i]=coor[2]
    #area
    DF$A[i]=DF$Acum[i]*length(pts)/n
  }  
  if(median(DF$A)>10000 | median(DF$Acum)>100000){
    warning('Original coordinate system is in meters. Switching to km.')
    DF$A=DF$A/1e6
    DF$Acum=DF$Acum/1e6
    DF$x=DF$x/1e3
    DF$y=DF$y/1e3
  }
  if(median(DF$A)<1 | median(DF$Acum)<5){
    warning('Original coordinate system is in latlong Switching to km.')
    DF$A=DF$A*1e4
    DF$Acum=DF$Acum*1e4
    DF$x=DF$x*1e2
    DF$y=DF$y*1e2
  }
  return(DF)
}


