
##############################################
#iteratively reweighted least squares (IRLS) with optional point weights
#  (in addition to residual weights that are adjusted to downweight data with unusually large residuals,
#     as in conventional IRLS)
#
#Author: James Kirchner, ETH Zurich
#Public use allowed under GNU Public License v. 3 (see http://www.gnu.org/licenses/gpl-3.0.en.html)
#
##############################################
IRLS <- function(Y, X, ww=rep(1,length(Y)), wt=rep(1,length(Y)), type="Cauchy") {
  
  # Y is a numeric vector representing a response variable
  # X is a numeric vector or array representing one or more explanatory variables
  # ww is an optional numeric vector of point weights (such as masses for mass-weighted regressions)
  #    the default weight vector gives all ww's the value of 1
  # wt is an optional numeric vector of initial values for the residual weights for the IRLS
  #    the default weight vector gives all initial wt's the value of 1
  # type is an optional string constant indicating the weight function to use.  
  #    If type is spedified, it must be "bisquare", "Welsch", or "Cauchy". Anything else generates an error.
  # the default weight function is Cauchy
  
  # Y, ww and X (or each vector comprising X, if X is two-dimensional) must be of the same length, otherwise an error results.
  # Y and X must not be exactly collinear (that is, there must be some nonzero residuals).  This is not checked.
  
  # IRLS returns an object of class "lm", generated by a call lm(Y ~ X, weights=wt, na.action="na.omit") with the iteratively determined weights. 
  # This object can then be handled just like any other return from lm.  
  # However, the variable names in this "lm" object will be Y and X (as defined internally here) regardless of what names were passed to this function!
  
  
  ##############################################
  #define bisquare weight function
  ##############################################
  bisquare <- function(x, MAR) {
    if (MAR==0) w <- ifelse(x==0, 1, 0) #if MAR is zero, only 0 or 1 weights are possible
    else {
      w <- (1-(x/(6*MAR))^2)^2  #bisquare function
      w[x>6*MAR] <- 0  #replace with zero whenever X is more than 6 times the median absolute residual
    }
    return(w)
  }
  
  
  ##############################################
  #define Welsch weight function
  ##############################################
  Welsch <- function(x, MAR) {
    if (MAR==0) w <- ifelse(x==0, 1, 0) #if MAR is zero, only 0 or 1 weights are possible
    else  w <- exp(-(x/(4.4255*MAR))^2)  #Welsch weight function    
    return(w)
  }
  
  
  ##############################################
  #define Cauchy weight function
  ##############################################
  Cauchy <- function(x,MAR) {
    if (MAR==0) w <- ifelse(x==0, 1, 0) #if MAR is zero, only 0 or 1 weights are possible
    else w <- 1/(1+(x/(3.536*MAR))^2)  #Cauchy weight function
    return(w)
  }
  
  
  if (type!="bisquare") {
    if (type!="Welsch") {
      if (type!="Cauchy") stop("IRLS stopped: no valid weight type specified.  Valid functions are 'bisquare', 'Welsch', and 'Cauchy'")
    }
  }
  
  
  if (length(Y) != length(ww)) stop("IRLS stopped: supplied weight vector must be same length as Y")
  
  
  wwwt <- ww*wt
  
  fit <- lm(Y ~ X, weights=wwwt, na.action="na.omit") #initial least-squares fit
  
  wt_chg <- 999.0 #initialize weight change
  iter <- 0 #initialize the iteration counter
  
  ##############################################
  #Here's the iteration loop, which runs until the largest weight change for any point is less than 0.01, or iteration limit is exceeded
  ##############################################
  while ( (max(wt_chg,na.rm=TRUE) > 0.01) & !all(iter>10, summary(fit)$r.squared>0.999) ){
    if (iter>1000) stop("IRLS stopped: more than 1000 interations, sorry!")
    # This error can arise when Y is perfectly collinear with X for more than half the points,
    # and thus the MAR fluctuates near zero, with the weights never stabilizing.
    # That should normally be handled by the r-squared criterion for loop exiting as defined above.
    
    iter <- iter+1 #increment the iteration counter
    old_wt <- wt #save the old vector of weights for comparison with the next one
    
    const <- fit$coefficients[1]   #this is the intercept
    slope <- fit$coefficients[2:length(fit$coefficients)] #this is the vector of regression coefficients
    
    #explicit calculation of residuals
    if (length(slope)==1) resid <- Y - const - X*slope 
    else resid <- as.vector(Y - const - X %*% slope) 
    #can't use residuals(fit) because missing values will mess up the assignment of weights in the steps that follow
    #note %*% is matrix multiplication in R
    
    abs_resid <- abs(resid)
    abs_resid_nonzero <- ifelse(Y==0, NA, abs_resid) #exclude residuals corresponding to exact zeroes from median, to avoid blowup when there are many repeated zeroes in any flow decile
    MAR <- median(abs_resid_nonzero, na.rm=TRUE)
    
    if (MAR==0.0) stop("IRLS stopped. Solution has collapsed: median absolute residual is zero!")
    
    if (type=="bisquare") wt <- bisquare(abs_resid,MAR) #use bisquare weights
    else if (type=="Welsch") wt <- Welsch(abs_resid,MAR) #use Welsch weights
    else if (type=="Cauchy") wt <- Cauchy(abs_resid,MAR) #use Cauchy weights
  
    wwwt <- ww*wt
    
    fit <- lm(Y ~ X, weights=wwwt, na.action="na.omit") #run multiple regression
    
    wt_chg <- abs(wt-old_wt) #calculate change in weights from previous iteration
    
  } #end while
  
  qq <- list("fit"=fit, "wt"=wt) 
  
  return(qq)
  
}
##############################################
#END OF iteratively reweighted least squares
##############################################






# here is a quick demo

# construct a vector of x values with two wild outliers on each end
x <- 1:1000
x <- x/100
x[1] <- -100
x[2] <- -100
x[999] <- 100
x[1000] <- 100

# construct a vector of y values based on an x-y slope of 1, with normally distributed residuals, 
# but again with two wild outliers on each end (that give a slope near 10 instead of 1)

resid <- rnorm(100)
y <- x+resid
y[1] <- -1000
y[2] <- -1000
y[999] <- 1000
y[1000] <- 1000

# first do ordinary least squares.  This will give a highly corrupted slope near 8.5 or so.
fit <- lm(y~x, na.action="na.omit")
summary(fit)

# now do IRLS.  This will give a slope close to the value of 1.0 that characterizes almost all of the data.
fit <- IRLS(y, x, type="Cauchy")
summary(fit$fit)

# now do IRLS with weights.  This will give a slope close to the value of 1.0 that characterizes almost all of the data.
ww <- runif(length(y), 1.0, 2.0)
fit <- IRLS(y, x, ww, type="Cauchy")
summary(fit$fit)






# here is a quick demo in multiple dimensions

# x is a three-dimensional disk (ball) of normally distributed values
x <- matrix(10.0*rnorm(3000), ncol=3)

# true dependence of y on x is slope of 1 in all dimensions, plus random noise
y <- x[,1]+x[,2]+x[,3]+resid

# now corrupt this, as before, with wild outliers
x[1,1] <- -1000
x[2,1] <- -1000
x[999,1] <- 1000
x[1000,1] <- 1000

y[1] <- -10000
y[2] <- -10000
y[999] <-10000
y[1000] <-10000

# first do ordinary least squares.  This will give a highly corrupted slope near 10 or so in the x[1] dimension.
fit <- lm(y~x, na.action="na.omit")
summary(fit)

# now do IRLS.  This will give a slope close to the value of 1.0 that characterizes almost all of the data.
fit <- IRLS(y, x, type="Cauchy")
summary(fit$fit)

# now do IRLS with weights.  This will give a slope close to the value of 1.0 that characterizes almost all of the data.
ww <- runif(length(y), 0.0, 10.0)
fit <- IRLS(y, x, ww, type="Cauchy")
summary(fit$fit)










# Third demo, this time with NA's in data set (this just tests whether NA's cause problems)
x[10:30,1] <- NA

# first do ordinary least squares.  This will give a highly corrupted slope near 10 or so in the x[1] dimension.
fit <- lm(y~x, na.action="na.omit")
summary(fit)

# now do IRLS.  This will give a slope close to the value of 1.0 that characterizes almost all of the data.
fit <- IRLS(y, x, type="Cauchy")
summary(fit$fit)

# now do IRLS with weights.  This will give a slope close to the value of 1.0 that characterizes almost all of the data.
ww <- runif(length(y), 0.0, 1.0)
fit <- IRLS(y, x, ww, type="Cauchy")
summary(fit$fit)





