/** This library contains routines to perform a best linear or other type of
    fit to a set of data points.  This is often referred to as "regression"
    because of some historical accident.

    These functions are designed to preserve units of measure, to provide
    coefficients with correct units of measure, and to work for purely
    symbolic values!

    To use this file, see the examples in curveFitTest.frink
    Those examples are powerful and let you, say, derive the gravitic equation.

    Also see LeastSquares.frink and the test program LeastSquaresTest.frink
    which allow you to fit arbitrary linear systems to arbitrary basis
    functions. 

    Also see the leastSquares function in Matrix.frink and the test program
    MatrixQRTest.frink which demonstrate linear least-squares fitting using QR
    decomposition which is general for a wide variety of linear systems with
    non-exact measurements.

    See:
    https://mathworld.wolfram.com/LeastSquaresFitting.html

    See also for special curve fits:
    TODO: Implement these.
    https://mathworld.wolfram.com/LeastSquaresFittingExponential.html
    https://mathworld.wolfram.com/LeastSquaresFittingLogarithmic.html
    https://mathworld.wolfram.com/LeastSquaresFittingPowerLaw.html
*/

/** Performs a best linear fit of the specified data points.  In other words,
    this finds the coefficients a and b of a line (in slope-intercept form)
    with the equation:

    y = a x + b

    params:
      data:   an array or set of [x,y] pairs.

    returns:
      [a, b, r]

    where
      r is the correlation coefficient
*/
linearFit[data] :=
{
   array = toArray[data]
   N = length[array]
   if N < 2
      return [undef, undef, undef]

   // We do it this way to preserve units of measure by initializing the sum
   // with the first element.
   [x,y] = array@0
   sxy = x y        // Sum of x*y
   sx = x           // Sum of x
   sy = y           // Sum of y
   sx2 = x^2        // Sum of x^2
   sy2 = y^2        // Sum of y^2

   for i = 1 to N-1
   {
      [x,y] = array@i
      sxy = sxy + x y
      sx = sx + x
      sy = sy + y
      sx2 = sx2 + x^2
      sy2 = sy2 + y^2
   }

   denom = N sx2 - sx^2
   a = (N sxy - sx sy) / denom
   b = (sy sx2 - sx sxy) / denom
//   println["got here, denom=$denom"]
//   println["N=$N, sxy=$sxy, sx=$sx, sy=$sy"]

   // TODO:  make these arbitrary-precision sqrt?  That means we have to
   // fix arbitrary-precision sqrt so it takes units of measure.
   r = (N sxy - sx sy) / (sqrt[denom] sqrt[N sy2 - sy^2])
//   println["Got past r"]
   
   return [a, b, r]
}

/** This performs a linear fit of the specified data as above, but instead
    of returning the coefficients, it returns a function representing the
    line, or, in other words, an anonymous function f that represents
    y = f[x] of the best-fit line, or in more Frink-like notation, the
    anonymous function looks like:

    {|x|  a x + b }
*/
linearFitFunction[data] :=
{
   [a, b, r] = linearFit[data]
   return parseToExpression["{|x| (" + inputForm[a] + ") x + (" + inputForm[b] + ")}"]
}

/** This performs a linear fit of the specified data as above, but instead
    of returning the coefficients, it returns an expression representing the
    line with x as the unknown variable.  For example,

    a x + b
*/
linearFitExpression[data] :=
{
   [a, b, r] = linearFit[data]
   return parseToExpression["(" + inputForm[a] + ") x + (" + inputForm[b] + ")"]
}

/** Performs a best quadratic fit of the specified data points.  In other words,
    this finds the coefficients a,b,c of a quadratic equation, that is,
    the equation:

    y = a x^2 + b x + c

    params:
      data:   an array or set of [x,y] pairs.

    returns:
      [a, b, c, r]

    where
      r is the correlation coefficient
*/
quadraticFit[data] :=
{
   array = toArray[data]
   N = length[array]
   if N < 3
      return [undef, undef, undef, undef]

   // We do it this way to preserve units of measure by initializing the sum
   // with the first element.
   [x,y] = array@0
   sx = x           // Sum of x
   sy = y           // Sum of y
   sxy = x y        // Sum of x*y
   sx2 = x^2        // Sum of x^2
   sx3 = x^3        // Sum of x^3
   sx4 = x^4        // Sum of x^4
   sy2 = y^2        // Sum of y^2
   sx2y = x^2 y     // Sum of x^2 * y
   
   for i = 1 to N-1
   {
      [x,y] = array@i
      sxy = sxy + x y
      sx = sx + x
      sy = sy + y
      sx2 = sx2 + x^2
      sx3 = sx3 + x^3
      sx4 = sx4 + x^4
      sy2 = sy2 + y^2
      sx2y = sx2y + x^2 y
   }

   // These symbol changes make it more concise and match the symbols
   // in Jean Meeus, Astronomical Algorithms, 4.5 and 4.6
   P = sx
   Q = sx2
   R = sx3
   S = sx4
   T = sy
   U = sxy
   V = sx2y

   D = N Q S + 2 P Q R - Q^3 - P^2 S - N R^2

   a = (N Q V + P R T + P Q U - Q^2 T - P^2 V - N R U) / D

   b = (N S U + P Q V + Q R T - Q^2 U - P S T - N R V) / D

   c = (Q S T + Q R U + P R V - Q^2 V - P S U - R^2 T) / D

   meany = sy / N
   
   [x,y] = array@0
   SSE = (y - a x^2 - b x - c)^2
   SST = (y - meany)^2
   for i = 1 to N-1
   {
      [x,y] = array@i
      SSE = SSE + (y - a x^2 - b x - c)^2
      SST = SST + (y - meany)^2
   }

   r = sqrt[1 - SSE/SST]
   
   return [a, b, c, r]
}

/** This performs a quadratic fit of the specified data as above, but instead
    of returning the coefficients, it returns an expression representing the
    fit with x as the unknown.  For example:

    a x^2 + b x + c
*/
quadraticFitExpression[data] :=
{
   [a, b, c, r] = quadraticFit[data]
   return parseToExpression["(" + inputForm[a] + ") x^2 + (" + inputForm[b] + ") x + (" + inputForm[c] + ")"]
}

/** This performs a quadratic fit of the specified data as above, but instead
    of returning the coefficients, it returns a function representing the
    function, or, in other words, an anonymous function f that represents
    y = f[x] of the best-fit line, or in more Frink-like notation, the
    anonymous function looks like:

    {|x|  a x^2 + b x + c }
*/
quadraticFitFunction[data] :=
{
   [a, b, c, r] = quadraticFit[data]
   return parseToExpression["{|x| (" + inputForm[a] + ") x^2 + (" + inputForm[b] + ") x + (" + inputForm[c] + ")}"]
}


/** Fit an exponential function of the form y = A e^(B x).

    See:
     https://mathworld.wolfram.com/LeastSquaresFittingExponential.html

    This uses the former fit on the page.  It gives more weight to small y
    values.

    It was not originally clear how to make these equations have units of
    measure.  Taking, say, the logarithm or the exp of anything but a
    dimensionless term makes no sense.  The product (B x) must be
    dimensionless (to be able to take the exponential function) so the
    dimensions of B must be the dimensions of (1/x).  Similarly, A and y must
    have the same dimensions.  But to take ln[y] means that y *must* be
    dimensionless (which is unnecessary.  It could have the same dimensions as
    A.)

    returns:
      [A, B]
*/
exponentialFit[data] :=
{
   [xa, ya] = unzip[data]
   len = length[xa]
   [x, xdim] = factorUnits[xa@0]
   [y, ydim] = factorUnits[ya@0]
   lny = ln[y]
   a1n = 0 y
   a2n = 0 x^2
   a3n = 0 x
   a4n = 0 x lny
   for i=0 to len-1
   {
      x = xa@i / xdim
      y = ya@i / ydim
      lny = ln[y]
      a1n = a1n + lny
      a2n = a2n + x^2
      a3n = a3n + x
      a4n = a4n + x lny
   }

   denom = len a2n - a3n^2
   a = (a1n a2n - a3n a4n) / denom 
   b = (len a4n - a3n a1n) / denom
   return [exp[a] ydim, b / xdim]
}

/** This performs an exponential fit of the specified data as above, but
    instead of returning the coefficients, it returns an expression
    representing the fit with x as the unknown.

    A e^(B x)
*/
exponentialFitExpression[data] :=
{
   [A, B] = exponentialFit[data]
   return parseToExpression["(" + inputForm[A] + ") e^((" + inputForm[B] + ")*(" + inputForm[x] + "))"]
}

/** This performs an exponential fit of the specified data as above, but
    instead of returning the coefficients, it returns a function representing
    the function, or, in other words, an anonymous function f that represents
    y = f[x] of the best-fit line, or in more Frink-like notation, the
    anonymous function looks like:

    {|x|  A e^(B x)}
*/
exponentialFitFunction[data] :=
{
   [A, B] = exponentialFit[data]
   return parseToExpression["{|x| (" + inputForm[A] + ") e^((" + inputForm[B] + ")*(" + inputForm[x] + "))}"]
}

/** Fit an exponential function of the form y = A e^(B x).

    See:
     https://mathworld.wolfram.com/LeastSquaresFittingExponential.html

    This uses the latter fit on the page.  It gives more weight to higher y
    values.

    It was not originally clear how to make these equations have units of
    measure.  Taking, say, the logarithm or the exp of anything but a
    dimensionless term makes no sense.  The product (B x) must be
    dimensionless (to be able to take the exponential function) so the
    dimensions of B must be the dimensions of (1/x).  Similarly, A and y must
    have the same dimensions.  But to take ln[y] means that y *must* be
    dimensionless (which is unnecessary.  It could have the same dimensions as
    A.)

    returns:
      [A, B]
*/
exponentialFit1[data] :=
{
   [xa, ya] = unzip[data]
   len = length[xa]
   [x, xdim] = factorUnits[xa@0]
   [y, ydim] = factorUnits[ya@0]
   lny = ln[y]
   a1n = 0 x^2y
   a2n = 0 y
   a3n = 0 x y
   a4n = 0 x y lny
   a1d = 0 y
   for i=0 to len-1
   {
      x = xa@i / xdim
      y = ya@i / ydim
      lny = ln[y]
      a1n = a1n + x^2 y
      a2n = a2n + y lny
      a3n = a3n + x y
      a4n = a4n + x y lny
      a1d = a1d + y
   }

   denom = (a1d a1n - a3n^2)
   a = (a1n a2n - a3n a4n) / denom 
   b = (a1d a4n - a3n a2n) / denom
   return [exp[a] ydim, b / xdim]
}

/** This performs an exponential fit of the specified data as above, but
    instead of returning the coefficients, it returns an expression
    representing the fit with x as the unknown.

    A e^(B x)
*/
exponentialFitExpression1[data] :=
{
   [A, B] = exponentialFit1[data]
   return parseToExpression["(" + inputForm[A] + ") e^((" + inputForm[B] + ")*(" + inputForm[x] + "))"]
}

/** This performs an exponential fit of the specified data as above, but
    instead of returning the coefficients, it returns a function representing
    the function, or, in other words, an anonymous function f that represents
    y = f[x] of the best-fit line, or in more Frink-like notation, the
    anonymous function looks like:

    {|x|  A e^(B x)}
*/
exponentialFitFunction1[data] :=
{
   [A, B] = exponentialFit1[data]
   return parseToExpression["{|x| (" + inputForm[A] + ") e^((" + inputForm[B] + ")*(" + inputForm[x] + "))}"]
}


/** Calculates the r-value of the correlation between the yvalues and the fit
    function's predicted yvalues.  See
      https://stats.libretexts.org/Bookshelves/Introductory_Statistics/OpenIntro_Statistics_(Diez_et_al)./07%3A_Introduction_to_Linear_Regression/7.02%3A_Line_Fitting_Residuals_and_Correlation */
rValue[function, data] :=
{
   [xvalues, yvalues] = unzip[data]
   ycalc = map[function, xvalues]
   len = length[yvalues]

   [meanx, sdx] = meanAndSD[yvalues, true]
   [meany, sdy] = meanAndSD[ycalc,   true]

   //println["mean x: $meanx  sdx: $sdx"]
   //println["mean y: $meany  sdy: $sdy"]
   sum = undef  // Make units come out right
   for i=rangeOf[yvalues]
   {
      term = ((yvalues@i - meanx) / sdx) * ((ycalc@i - meany) / sdy)
      if sum == undef
         sum = term
      else
         sum = sum + term
   }

   return sum / (len-1)
}

/** Attempts to optimize an exponential fit by breaking it into 2 piecewise
    segments each with different coefficients.

    returns [function, rValue]
*/
exponentialFitFunctionPiecewise[data, debug=false] :=
{
   sorted = sort[deepCopy[data], byColumn[0]]
   len = length[sorted]
   bestR = -1000
   bestF = undef
   for cutBefore = 3 to len-3
   {
      cutx = sorted@cutBefore@0           // x value after cut point
      data1 = first[sorted, cutBefore]
      data2 = rest[sorted, cutBefore]
      e1 = exponentialFitExpression[data1]
      e2 = exponentialFitExpression[data2]
      fs = "{|x| \n" +
           "   if x < " + inputForm[cutx] + "\n" +
           "      return " + inputForm[e1] + "\n" +
           "   else\n" +
           "      return " + inputForm[e2] + "\n" +
           "}"
      if debug
         println[fs]
      f = parseToExpression[fs]
      r = rValue[f, data]
      if debug
         print["r = $r  "]
      if r > bestR
      {
         bestR = r
         bestF = f
         if debug
            println["*"]
      }

      if debug
         println["\n"]
   }

   return [bestF, bestR]
}


/** Attempts to optimize a linear fit by breaking it into 2 piecewise
    segments each with different coefficients.

    returns [function, rValue]
*/
linearFitFunctionPiecewise[data, debug=false] :=
{
   sorted = sort[deepCopy[data], byColumn[0]]
   len = length[sorted]
   bestR = -1000
   bestF = undef
   for cutBefore = 2 to len-2
   {
      cutx = sorted@cutBefore@0           // x value after cut point
      data1 = first[sorted, cutBefore]
      data2 = rest[sorted, cutBefore]
      e1 = linearFitExpression[data1]
      e2 = linearFitExpression[data2]
      fs = "{|x| \n" +
           "   if x < " + inputForm[cutx] + "\n" +
           "      return " + inputForm[e1] + "\n" +
           "   else\n" +
           "      return " + inputForm[e2] + "\n" +
           "}"
      if debug
         println[fs]
      f = parseToExpression[fs]
      r = rValue[f, data]
      if debug
         print["r = $r  "]
      if r > bestR
      {
         bestR = r
         bestF = f
         if debug
            println["*"]
      }

      if debug
         println["\n"]
   }

   return [bestF, bestR]
}


/** Attempts to optimize a linear fit by breaking it into 3 piecewise
    segments each with different coefficients.

    returns [function, rValue]
*/
linearFitFunctionPiecewise3[data, debug=false] :=
{
   sorted = sort[deepCopy[data], byColumn[0]]
   len = length[sorted]
   bestR = -1000
   bestF = undef
   for cutBefore1 = 2 to len-4
   {
      cutx1 = sorted@cutBefore1@0           // x value after cut point1
      data1 = slice[sorted, 0, cutBefore1]
      e1 = linearFitExpression[data1]
      
      for cutBefore2 = cutBefore1 + 2 to len-2
      {
         data2 = slice[sorted, cutBefore1, cutBefore2]
         data3 = slice[sorted, cutBefore2, len]
         cutx2 = data3@0@0           // x value after cut point 2
         e2 = linearFitExpression[data2]
         e3 = linearFitExpression[data3]
         fs = "{|x| \n" +
              "   if x < " + inputForm[cutx1] + "\n" +
              "      return " + inputForm[e1] + "\n" +
              "   else\n" +
              "      if x < " + inputForm[cutx2] + "\n" +
              "         return " + inputForm[e2] + "\n" +
              "      else\n" +
              "         return " + inputForm[e3] + "\n" +
              "}"

         if debug
            println[fs]
         
         f = parseToExpression[fs]
         r = rValue[f, data]
         if debug
            print["r = $r  "]
         if r > bestR
         {
            bestR = r
            bestF = f
            if debug
               println["*"]
         }

         if debug
            println["\n"]
       }
   }

   return [bestF, bestR]
}


/** Attempts to optimize a quadratic fit by breaking it into 2 piecewise
    segments each with different coefficients.

    returns [function, rValue]
*/
quadraticFitFunctionPiecewise[data, debug=false] :=
{
   sorted = sort[deepCopy[data], byColumn[0]]
   len = length[sorted]
   bestR = -1000
   bestF = undef
   for cutBefore = 3 to len-3
   {
      cutx = sorted@cutBefore@0           // x value after cut point
      data1 = first[sorted, cutBefore]
      data2 = rest[sorted, cutBefore]
      e1 = quadraticFitExpression[data1]
      e2 = quadraticFitExpression[data2]
      fs = "{|x| \n" +
           "   if x < " + inputForm[cutx] + "\n" +
           "      return " + inputForm[e1] + "\n" +
           "   else\n" +
           "      return " + inputForm[e2] + "\n" +
           "}"
      if debug
         println[fs]
      f = parseToExpression[fs]
      r = rValue[f, data]
      if debug
         print["r = $r  "]
      if r > bestR
      {
         bestR = r
         bestF = f
         if debug
            println["*"]
      }

      if debug
         println["\n"]
   }

   return [bestF, bestR]
}


/** Attempts to optimize a quadratic fit by breaking it into 3 piecewise
    segments each with different coefficients.

    returns [function, rValue]
*/
quadraticFitFunctionPiecewise3[data, debug=false] :=
{
   sorted = sort[deepCopy[data], byColumn[0]]
   len = length[sorted]
   bestR = -1000
   bestF = undef
   for cutBefore1 = 3 to len-6
   {
      cutx1 = sorted@cutBefore1@0           // x value after cut point1
      data1 = slice[sorted, 0, cutBefore1]
      e1 = quadraticFitExpression[data1]
      
      for cutBefore2 = cutBefore1 + 3 to len-3
      {
         data2 = slice[sorted, cutBefore1, cutBefore2]
         data3 = slice[sorted, cutBefore2, len]
         cutx2 = data3@0@0           // x value after cut point 2
         e2 = quadraticFitExpression[data2]
         e3 = quadraticFitExpression[data3]
         fs = "{|x| \n" +
              "   if x < " + inputForm[cutx1] + "\n" +
              "      return " + inputForm[e1] + "\n" +
              "   else\n" +
              "      if x < " + inputForm[cutx2] + "\n" +
              "         return " + inputForm[e2] + "\n" +
              "      else\n" +
              "         return " + inputForm[e3] + "\n" +
              "}"

         if debug
            println[fs]
         
         f = parseToExpression[fs]
         r = rValue[f, data]
         if debug
            print["r = $r  "]
         if r > bestR
         {
            bestR = r
            bestF = f
            if debug
               println["*"]
         }

         if debug
            println["\n"]
       }
   }

   return [bestF, bestR]
}