/** This library calculates nearest square roots of integers using the
    Karatsuba square root recursive algorithm.  It also contains
    functions that build upon the integer routines to calculate floating-point
    (and rational) square roots of integers and floating-point numbers to an
    arbitrary number of digits. 

    For reference to the Karatsuba square root algorithm see:
       https://hal.inria.fr/inria-00072854/PDF/RR-3805.pdf
*/


/** Perform Karatsuba square root, returning a floating-point square root of an
    integer or calculating the floating-point square root of a floating-point
    number (by first converting it to a giant integer.)

    This works using a rather simple trick.  If we need a lot of
    digits in the result, we "shift left" the number by the number of bits
    we need (by multiplying by 2^m bits, (see how m is calculated below))
    and round it to an integer.  We then do an integer square root of this big
    integer, and divide the result by 2^(m/2) bits which turns the result into
    the correct floating-point (or actually rational) square root of the
    number.

    (Note that this trick also elegantly works to *reduce* the number of digits
     we have to calculate by "shifting right" when the requested number of
     digits is small.  For example, karatsubaFloat[10^1000, 3] would
     normally take a 1000-digit number and use the full force of Frink to
     calculate 3 digits of its square root.)
   
    This algorithm is able to calculate a limited number of digits, which
    makes it suitable for providing a good initial guess (with a large number
    of correct digits) for an operation like Newton iteration for larger
    numbers.  Newton iteration approximately doubles the number of correct
    digits per iteration if given a good guess, so an optimal algorithm *may*
    use this algorithm and Newton iteration.  Note that as of Java 1.8,
    the integer operations from Frink were incorporated into Java, making
    integer operations much faster than floating-point for large numbers.
   
    If rational==false, (the default,) this returns a floating-point number.
    If rational==true,  this returns a rational number (which may be faster)
*/
karatsubaSqrt[n, digits=getPrecision[], rational=false] :=
{
   oldPrec = getPrecision[]

   try
   {
      m = ceil[digits log[10]/log[2] - approxLog2[n]]
      setPrecision[digits+3]

      // Make the number of bits even
      if m mod 2 == 1
         m = m + 1

      shift = 2^m
      
      [num, remainder] = sqrtRem[round[n * shift]]
      denom = 2^(m/2)

      // println["$num / $denom, remainder = $remainder"]
      
      result = num/denom
      
      /* This adjusts the result using the remainder and a Taylor series
         approximation:

         The Karatsuba square root algorithm only returns integers, so it
         estimates the nearest integer to the result (called s in the paper)
         as:
         result = floor[sqrt[n]]

         And the remainder (called r in the paper) is:
         remainder = n - result^2
      
         thus, the correct value of sqrt[n] is:
         sqrt[n] = sqrt[result^2 + remainder]

         However, this is not easily turned into a number without doing
         another whole square root algorithm, so we correct it by calculating
         a Taylor series for the corrections in the remainder.  That is, the
         Taylor series of the above:  sqrt[result^2 + remainder]

         Due to the choice of bits that we chose in the initial calculation,
         1 term in the Taylor series seems to be just right at enough.
         Without using the remainder, we have to calculate *twice* as many
         digits in the recursive algorithm above to get correct answers.  By
         using the remainder, we can get all digits correct by calculating a
         couple more digits than requested, rather than doubling the number of
         digits.

         The following terms are the first terms of the Taylor series.  

         First term:  r / (2 s)
      */
      radj = remainder / (2 result shift)
      result = result + radj
      //println["Remainder adjustment is $radj"]

      /* Second-order Taylor series remainder adjustment.  This is generally
         not necessary and may only affect the last digit or so.
      
         Second term:  -r^2 (8 s^3)
      */
      
      /* radj2 = - remainder^2 / (8 (result shift)^3)
      //  println["Remainder adjustment (second order) is $radj2"]
      result = result + radj2 */
      
      setPrecision[digits]
      
      if rational == false
         return 1. * result   // Return floating-point number
      else
         return result        // Return rational number
   }
   finally
   {
      setPrecision[oldPrec]
   }
}

/* Karatsuba recursive square root.

   For explication of algorithm, see:
     https://hal.inria.fr/inria-00072854/PDF/RR-3805.pdf

   Thanks to Jeremy Roach for much hard work at turning my broken
   implementation into a working implementation.
*/
sqrtRem[n] :=
{
   // Fallback case when numbers are smallish (TODO: tune this threshold)
   if n < 2^256
   {
      s = intSqrt[n]
      return [s, n - s^2]
   }

   [w, z] = divideAndRemainder[bitLength[n], 4]
   w = ((4 w + z) + ((4-z) mod 4)) / 4
   b = 2^w

   if z == 1 || z == 2
   {
      fixup = true
      n = 4 n
   } else
   {
      fixup = false
   }

   a0 = bitAnd[n, b-1]
   n = shiftRight[n, w]
   a1 = bitAnd[n, b-1]
   n = shiftRight[n, w]
   a2 = bitAnd[n, b-1]
   n = shiftRight[n, w]
   a3 = bitAnd[n, b-1]

   [s1, r1] = sqrtRem[a3*b + a2]
   [q, u] = divideAndRemainder[r1*b + a1, 2*s1]
   s = s1*b + q
   r = u*b + a0 - q^2
   if r < 0
   {
      r = r + 2*s - 1
      s = s - 1
   }
   if fixup
      return [s/2, r/4]
   
   return [s, r]
}

/* Integer square root: returns the greatest integer less than or equal to the 
   square root of n.  Only call this with integers.

   This is Exercise 5.7 in Bressoud with my own modifications for better
   initial guess.
*/
intSqrt[n] :=
{
   a = 2^((bitLength[n]+1) div 2)
   b = a - 1

   while b<a
   {
      a = b
      b = (a^2 + n) div (2*a)
   }

   return a
}