/** This helps find the geometric median of a set of n-dimensional points.
    The geometric median is also called the spatial median, the
    Fermat-Torricelli-Weber point, the multivariate L_1 median, etc.  The
    geometric median is the point minimizing the *sum* of the distances to the
    given points.

    As an example, if you wanted several people to meet by flying to a place
    that minimizes the total miles flown by everyone put together, you want the
    geometric median.

    This also allows the points to be "weighted", meaning that the cost for
    one person to travel can be made higher.

    This follows the algorithm described in:

    Vardi, Y, and C H Zhang. “The multivariate L1-median and associated data
    depth.” Proceedings of the National Academy of Sciences of the United
    States of America vol. 97,4 (2000): 1423-6. doi:10.1073/pnas.97.4.1423

    https://www.ncbi.nlm.nih.gov/pmc/articles/PMC26449/

    which is a minor modification of the Weiszfeld algorithm.

    Also see corrections (specifically eq. 12) in
    "A comparison of algorithms for the multivariate L1-median"
      Heinrich Fritz, Peter Filzmoser, Christophe Croux 
    https://feb.kuleuven.be/public/u0017833/PDF-FILES/l1medianR2.pdf

    You generally just want to call GeometricMedian.solve[points].

    See GeometricMedianTest.frink for an example of its usage.

    TODO:  If you specify weights, certain weights may take a long
    time to converge.
*/

class GeometricMedian
{
   /** Solves for the geometric median of an array of points and their
       optional weights. 
   
       params:
         points:  An array of n-dimensional points.

         weights:  An optional array of weights for each point.  The number of
                  weights should be the same as the number of points.  If
                  weights is undef, all points are weighted equally.

       returns
          a point containing the GeometricMedian
   */
   class solve[points, weights=undef, relativeError = 1e-15] :=
   {
      y = average[points]
      if length[points] == 1
         return points@0

      if length[points] == 2
      {
         [y, r] = nextPointVardi[points, y, weights]
         return y
      }

      if weights != undef and length[points] != length[weights]
      {
         println["GeometricMedian.solve:  points and weights do not have the same length."]
         return undef
      }
         
//      y = points@0
      r = undef
      lastr = undef
      lasty = undef
      do
      {
         lastr = r
         lasty = y
         [y, r] = nextPointVardi[points, y, weights]
//         println["y is $y, r is $r"]
      } until ((lastr != undef and r >= lastr) or r < relativeError)

      if (lastr != undef and (r > lastr))
         return lasty
      
      return y
   }
   
   /** Perform one round of several calculations.  This takes an array of
       n-dimensional points and an array of weights with the same length.
       (If the weights are undef, they will all be weighted equally.)
       This is actually Weizsfeld's algorithm if you iterate it (and if you
       never have a case where the workingPoint is the same one of the points.

       params:
         points:  An array of n-dimensional points.

         workingPoint:  The current guess for the geometric median.  This is y
                  in the paper.

         weights:  An optional array of weights for each point.  The number of
                  weights should be the same as the number of points.  If
                  weights is undef, all points are weighted equally.

       returns: [T, r, etaY]
         T:  The value of T from eq. 2.4 in the paper above.  This is a point
             approximating the next point.
         r:  A dimensionless term approximately representing the remaining error
             This should monotonically decrease to zero if iterated using the
             Vardi algorithm.
         etaY:  The "multiplicity" at y 
   */
   class calcOnceWeiszfeld[points, workingPoint, weights=undef] :=
   {
      // We do this to maintain units of measure in the result.
      vecsum = undef   // Left part of eq. 2.4
      sum = undef
      rsum = undef     
      etaY = etaYVardi[workingPoint]
      
      for i = 0 to length[points]-1
      {
         point = points@i
         if weights == undef
            weight = 1
         else
            weight = weights@i
         
         distance = distance[point, workingPoint]  // ||y-x_i||
         // println["Distance is $distance"]
         
         // TODO:  Check if distance is zero magnitude.  This may mean that
         // the points are colinear.
         if point != workingPoint
         {
            //println["Vecsum is $vecsum, point is $point, distance is $distance"]
            vecterm = multiplyScalar[point, weight/distance]
            vecsum = addVectors[vecsum, vecterm]

            // println["sum is $sum, weight is $weight, distance is $distance"]

            sumterm = weight / distance
            if sum == undef
               sum = sumterm
            else
               sum = sum + sumterm

            // Eq. 2.7
            rsumterm = multiplyScalar[subtractVectors[point, workingPoint],
                                      weight/distance]
            rsum = addVectors[rsum, rsumterm]
         } else
            etaY = weight
      }
      
      T = divideScalar[vecsum, sum]
      r = vectorLength[rsum]
      return [T, r, etaY]
   }

   /** Perform one round of several calculations.  This takes an array of
       n-dimensional points and an array of weights with the same length.
       (If the weights are undef, they will all be weighted equally.)

       params:
         points:  An array of n-dimensional points.

         workingPoint:  The current guess for the geometric median.  This is y
                  in the paper.

         weights:  An optional array of weights for each point.  The number of
                  weights should be the same as the number of points.  If
                  weights is undef, all points are weighted equally.

       returns: T
         T:  The value of T from eq. 2.4 in the paper above.  This is a point
             approximating the next point.
   */
   class calcOnceVardi[points, workingPoint, weights=undef] :=
   {
      // We do this to maintain units of measure in the result.
      vecsum = undef   // Vector, Right part of eq. 2.4
      sum = undef      // Scalar, Left part of eq. 2.4
      rsum = undef 
      etaY = 0
      
      for i = 0 to length[points]-1
      {
         point = points@i
         if weights == undef
            weight = 1
         else
            weight = weights@i
         
         distance = distance[workingPoint, point]  // ||y-x_i||
         // println["Distance is $distance"]
         
         // TODO:  Check if distance is zero magnitude.  This may mean that
         // the points are colinear.
         if point != workingPoint
         {
            //println["Vecsum is $vecsum, point is $point, distance is $distance"]
            vecterm = multiplyScalar[point, weight/distance] // Right frac 2.4
            vecsum = addVectors[vecsum, vecterm]  // Right summation 2.4

            // println["sum is $sum, weight is $weight, distance is $distance"]

            sumterm = weight / distance
            if sum == undef
               sum = sumterm
            else
               sum = sum + sumterm
         }
      }
      
      T = divideScalar[vecsum, sum]
      return T
   }
   
   /** This returns the next point in the iteration using the Vardi-Zhang
       algorithm.  That is, eq. 2.6 in the paper.

       It returns a point that is the next point in the iteration and its error.
   
       returns [T, r]
          T: The next point
          r: The relative error.  This should monotonically decrease toward 0.
   */
   class nextPointVardi[points, workingPoint, weights=undef] :=
   {
      T = calcOnceVardi[points, workingPoint, weights]
      [r, etaY] = rEtaVardi[points, workingPoint, weights]
      ratio = ( r==0 || etaY == 0 ? 0 : etaY/r )
      // println["r is $r, etaY is $etaY, ratio is $ratio"]
      T = addVectors[multiplyScalar[T,(1-ratio)],
                     multiplyScalar[workingPoint, min[1,ratio]]]

      return [T, r]
   }

   /** This returns the next point in the iteration using the Weiszfeld
       algorithm.  This will get stuck if the working point is exactly on
       one of the points.

       returns [T, r]
          T: The next point
          r: The relative error.
   */
   class nextPointWeiszfeld[points, workingPoint, weights=undef] :=
   {
      [T, r, etaY] = calcOnceWeiszfeld[points, workingPoint, weights]
      return [T, r]
   }

   /** Return the distance between two n-dimensional points, represented as
       arrays. */
   class distance[x, y] :=
   {
      d2 = 0 (x@0)^2   // Preserve dimensions
      for i = 0 to length[x]-1
         d2 = d2 + (x@i - y@i)^2
      
      return sqrt[d2]
   }

   /** Return the magnitude of a vector. */
   class vectorMagnitude[x] :=
   {
      d2 = 0 (x@0)^2   // Preserve dimensions
      for i = 0 to length[x]-1
         d2 = d2 + (x@i)^2
      
      return sqrt[d2]
   }

   /** Return the distances between an array of points and the working point
       as an array of distances.
   */
   class distances[points, workingPoint] :=
   {
      result = new array
      for p = points
         result.push[distance[p, workingPoint]]

      return result
   }

   /** Return the length of a single n-dimensional vector. */
   class vectorLength[x] :=
   {
      d2 = 0 (x@0)^2   // Preserve dimensions
      for i = 0 to length[x]-1
         d2 = d2 + (x@i)^2
      
      return sqrt[d2]
   }
   
   /** Multiply a vector (point) by a scalar.  It will be nice when Frink does
       vector multiplication automatically.
   */
   class multiplyScalar[vec, scalar] :=
   {
      result = new array[length[vec]]
      for i = rangeOf[vec]
         result@i = vec@i * scalar

       return result
   }

   /** Divide a vector (point) by a scalar.  It will be nice when Frink does
       vector multiplication automatically.
   */
   class divideScalar[vec, scalar] :=
   {
      result = new array[length[vec]]
      for i = rangeOf[vec]
         result@i = vec@i / scalar

      return result
   }
   
   /** Add vectors v1+v2 and returnt the result as a vector. */
   class addVectors[v1, v2] :=
   {
      if v1 == undef
         return v2
      
      result = new array[length[v1]]
      for i = rangeOf[v1]
         result@i = v1@i + v2@i

      return result
   }
   
   /** Subtract vectors v1-v2 and return the result as a vector. */
   class subtractVectors[v1, v2] :=
   {
      result = new array[length[v1]]
      for i = rangeOf[v1]
         result@i = v1@i - v2@i

      return result
   }

   /** Find the average of a bunch of points.  This gives a good first guess
       to the solver.  Input is an array of points. */
   class average[points] :=
   {
      items = length[points]
      result = undef
      for p = points
         result = addVectors[result, p]
      
      return divideScalar[result, items]
   }

   /** Returns the magnitude r[y] in the Vardi algoirhtm.  This is equation
       2.7 for r[y] and equation 2.5 for eta[y]

       Returns [r[y], eta[y]]
   */
   class rEtaVardi[points, workingPoint, weights] :=
   {
      zerounits = 0 workingPoint@0   // Make units of measure come out right
      etaY = 0
      
      R = undef
      for i = 0 to length[points]-1
      {
         xi = points@i
         etai = (weights == undef ? 1 : weights@i)
         dist = distance[xi, workingPoint]  // Bottom of eq 2.7
//         println["dist is $dist, zerounits is $zerounits"]
         if dist != zerounits
         {
//            println["xi is $xi, workingPoint is $workingPoint"]
            vec = subtractVectors[xi, workingPoint] // Top of eq. 2.7
            term = multiplyScalar[vec, etai / dist]
            R = addVectors[R, term]
         } else
            etaY = etaY + etai  // See correction (eq. 12) in Fritz
      }

//      println["R is $R"]
      return [vectorMagnitude[R], etaY]
   }
}