/* Copyright (C) 1996-2000, 2007-2009 Gerard Jungman, Brian Gough
 *
 * This program is free software; you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation; either version 3 of the License, or (at
 * your option) any later version.
 * 
 * This program is distributed in the hope that it will be useful, but
 * WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 * General Public License for more details.
 * 
 * You should have received a copy of the GNU General Public License
 * along with this program; if not, see <http://www.gnu.org/licenses/>.
 */

/* Derived from GNU Scientific Library, version 1.13, files as noted below */

#include "svd.h"
#include "gsl.h"
#include <math.h>
#include <stdio.h>
#include <stdlib.h>

/* From gsl_minmax.h */

#define GSL_MAX(a,b) ((a) > (b) ? (a) : (b))

/* From gsl_machine.h */

#define GSL_DBL_EPSILON        2.2204460492503131e-16

/* From vector/gsl_vector_double.h */

typedef struct
{
  gsl_vector vector;
} _gsl_vector_view;

typedef _gsl_vector_view gsl_vector_view;

void gsl_vector_set_zero (gsl_vector * v);
int gsl_vector_scale (gsl_vector * a, const double x);

#define gsl_vector_get(v, i) (v->data[(i) * (v)->stride])
#define gsl_vector_set(v, i, x) (v->data[(i) * (v)->stride] = (x))

/* From vector/view.h */

#define NULL_VECTOR {0, 0, 0, 0, 0}
#define NULL_VECTOR_VIEW {{0, 0, 0, 0, 0}}

/* From block/gsl_block_double.h */

gsl_block *gsl_block_alloc (const size_t n);

void gsl_block_free (gsl_block * b);

/* From matrix/gsl_matrix_double.h */

void gsl_matrix_set_identity (gsl_matrix * m);
_gsl_vector_view 
gsl_matrix_column (gsl_matrix * m, const size_t j);

/* From blas/gsl_blas.h */

int gsl_blas_ddot (const gsl_vector * X,
                   const gsl_vector * Y,
                   double * result
                   );
double gsl_blas_dnrm2  (const gsl_vector * X);

/* From config.h.in */

#define GSL_COERCE_DBL(x) (gsl_coerce_double(x))

/* From cblas/gsl_cblas.h */

double cblas_ddot(const int N, const double *X, const int incX,
                  const double *Y, const int incY);
double cblas_dnrm2(const int N, const double *X, const int incX);

/* From cblas/cblas.h */

#define INDEX int
#define OFFSET(N, incX) ((incX) > 0 ?  0 : ((N) - 1) * (-(incX)))

/* From sys/coerce.c */

double 
gsl_coerce_double (const double x)
{
  volatile double y;
  y = x;
  return y;
}

/* From linalg/svd.c */

/* Factorise a general M x N matrix A into,
 *
 *   A = U D V^T
 *
 * where U is a column-orthogonal M x N matrix (U^T U = I), 
 * D is a diagonal N x N matrix, 
 * and V is an N x N orthogonal matrix (V^T V = V V^T = I)
 *
 * U is stored in the original matrix A, which has the same size
 *
 * V is stored as a separate matrix (not V^T). You must take the
 * transpose to form the product above.
 *
 * The diagonal matrix D is stored in the vector S,  D_ii = S_i
 */

/* This is a the jacobi version */
/* Author:  G. Jungman */

/*
 * Algorithm due to J.C. Nash, Compact Numerical Methods for
 * Computers (New York: Wiley and Sons, 1979), chapter 3.
 * See also Algorithm 4.1 in
 * James Demmel, Kresimir Veselic, "Jacobi's Method is more
 * accurate than QR", Lapack Working Note 15 (LAWN15), October 1989.
 * Available from netlib.
 *
 * Based on code by Arthur Kosowsky, Rutgers University
 *                  kosowsky@physics.rutgers.edu  
 *
 * Another relevant paper is, P.P.M. De Rijk, "A One-Sided Jacobi
 * Algorithm for computing the singular value decomposition on a
 * vector computer", SIAM Journal of Scientific and Statistical
 * Computing, Vol 10, No 2, pp 359-371, March 1989.
 * 
 */

int
gsl_linalg_SV_decomp_jacobi (gsl_matrix * A, gsl_matrix * Q, gsl_vector * S)
{
  if (A->size1 < A->size2)
    {
      /* FIXME: only implemented  M>=N case so far */

      GSL_ERROR ("svd of MxN matrix, M<N, is not implemented", GSL_EUNIMPL);
    }
  else if (Q->size1 != A->size2)
    {
      GSL_ERROR ("square matrix Q must match second dimension of matrix A",
                 GSL_EBADLEN);
    }
  else if (Q->size1 != Q->size2)
    {
      GSL_ERROR ("matrix Q must be square", GSL_ENOTSQR);
    }
  else if (S->size != A->size2)
    {
      GSL_ERROR ("length of vector S must match second dimension of matrix A",
                 GSL_EBADLEN);
    }
  else
    {
      const size_t M = A->size1;
      const size_t N = A->size2;
      size_t i, j, k;

      /* Initialize the rotation counter and the sweep counter. */
      int count = 1;
      int sweep = 0;
      int sweepmax = 5*N;

      double tolerance = 10 * M * GSL_DBL_EPSILON;

      /* Always do at least 12 sweeps. */
      sweepmax = GSL_MAX (sweepmax, 12);

      /* Set Q to the identity matrix. */
      gsl_matrix_set_identity (Q);

      /* Store the column error estimates in S, for use during the
         orthogonalization */

      for (j = 0; j < N; j++)
        {
          gsl_vector_view cj = gsl_matrix_column (A, j);
          double sj = gsl_blas_dnrm2 (&cj.vector);
          gsl_vector_set(S, j, GSL_DBL_EPSILON * sj);
        }
    
      /* Orthogonalize A by plane rotations. */

      while (count > 0 && sweep <= sweepmax)
        {
          /* Initialize rotation counter. */
          count = N * (N - 1) / 2;

          for (j = 0; j < N - 1; j++)
            {
              for (k = j + 1; k < N; k++)
                {
                  double a = 0.0;
                  double b = 0.0;
                  double p = 0.0;
                  double q = 0.0;
                  double cosine, sine;
                  double v;
                  double abserr_a, abserr_b;
                  int sorted, orthog, noisya, noisyb;

                  gsl_vector_view cj = gsl_matrix_column (A, j);
                  gsl_vector_view ck = gsl_matrix_column (A, k);

                  gsl_blas_ddot (&cj.vector, &ck.vector, &p);
                  p *= 2.0 ;  /* equation 9a:  p = 2 x.y */

                  a = gsl_blas_dnrm2 (&cj.vector);
                  b = gsl_blas_dnrm2 (&ck.vector);

                  q = a * a - b * b;
                  v = hypot(p, q);

                  /* test for columns j,k orthogonal, or dominant errors */

                  abserr_a = gsl_vector_get(S,j);
                  abserr_b = gsl_vector_get(S,k);

                  sorted = (GSL_COERCE_DBL(a) >= GSL_COERCE_DBL(b));
                  orthog = (fabs (p) <= tolerance * GSL_COERCE_DBL(a * b));
                  noisya = (a < abserr_a);
                  noisyb = (b < abserr_b);

                  if (sorted && (orthog || noisya || noisyb))
                    {
                      count--;
                      continue;
                    }

                  /* calculate rotation angles */
                  if (v == 0 || !sorted)
                    {
                      cosine = 0.0;
                      sine = 1.0;
                    }
                  else
                    {
                      cosine = sqrt((v + q) / (2.0 * v));
                      sine = p / (2.0 * v * cosine);
                    }

                  /* apply rotation to A */
                  for (i = 0; i < M; i++)
                    {
                      const double Aik = gsl_matrix_get (A, i, k);
                      const double Aij = gsl_matrix_get (A, i, j);
                      gsl_matrix_set (A, i, j, Aij * cosine + Aik * sine);
                      gsl_matrix_set (A, i, k, -Aij * sine + Aik * cosine);
                    }

                  gsl_vector_set(S, j, fabs(cosine) * abserr_a + fabs(sine) * abserr_b);
                  gsl_vector_set(S, k, fabs(sine) * abserr_a + fabs(cosine) * abserr_b);

                  /* apply rotation to Q */
                  for (i = 0; i < N; i++)
                    {
                      const double Qij = gsl_matrix_get (Q, i, j);
                      const double Qik = gsl_matrix_get (Q, i, k);
                      gsl_matrix_set (Q, i, j, Qij * cosine + Qik * sine);
                      gsl_matrix_set (Q, i, k, -Qij * sine + Qik * cosine);
                    }
                }
            }

          /* Sweep completed. */
          sweep++;
        }

      /* 
       * Orthogonalization complete. Compute singular values.
       */

      {
        double prev_norm = -1.0;

        for (j = 0; j < N; j++)
          {
            gsl_vector_view column = gsl_matrix_column (A, j);
            double norm = gsl_blas_dnrm2 (&column.vector);

            /* Determine if singular value is zero, according to the
               criteria used in the main loop above (i.e. comparison
               with norm of previous column). */

            if (norm == 0.0 || prev_norm == 0.0 
                || (j > 0 && norm <= tolerance * prev_norm))
              {
                gsl_vector_set (S, j, 0.0);     /* singular */
                gsl_vector_set_zero (&column.vector);   /* annihilate column */

                prev_norm = 0.0;
              }
            else
              {
                gsl_vector_set (S, j, norm);    /* non-singular */
                gsl_vector_scale (&column.vector, 1.0 / norm);  /* normalize column */

                prev_norm = norm;
              }
          }
      }

      if (count > 0)
        {
          /* reached sweep limit */
          GSL_ERROR ("Jacobi iterations did not reach desired tolerance",
                     GSL_ETOL);
        }

      return GSL_SUCCESS;
    }
}

/* Modified from err/error.c and err/stream.c */

void
gsl_error (const char * reason, const char * file, int line, int gsl_errno)
{
#if 0
  if (gsl_error_handler) 
    {
      (*gsl_error_handler) (reason, file, line, gsl_errno);
      return ;
    }
#endif

  fprintf (stderr, "gsl: %s:%d: %s: %s\n", file, line, "ERROR", reason);

  fflush (stdout);
  fprintf (stderr, "Default GSL error handler invoked.\n");
  fflush (stderr);

  abort ();
}

/* From blas/blas.c */

#define INT(X) ((int)(X))

int
gsl_blas_ddot (const gsl_vector * X, const gsl_vector * Y, double *result)
{
  if (X->size == Y->size)
    {
      *result =
        cblas_ddot (INT (X->size), X->data, INT (X->stride), Y->data,
                    INT (Y->stride));
      return GSL_SUCCESS;
    }
  else
    {
      GSL_ERROR ("invalid length", GSL_EBADLEN);
    }
}

double
gsl_blas_dnrm2 (const gsl_vector * X)
{
  return cblas_dnrm2 (INT (X->size), X->data, INT (X->stride));
}

/* From cblas/dnrm2.c and cblas/source_nrm2_r.h */

double
cblas_dnrm2 (const int N, const double *X, const int incX)
{
#define BASE double
  BASE scale = 0.0;
  BASE ssq = 1.0;
  INDEX i;
  INDEX ix = 0;

  if (N <= 0 || incX <= 0) {
    return 0;
  } else if (N == 1) {
    return fabs(X[0]);
  }

  for (i = 0; i < N; i++) {
    const BASE x = X[ix];

    if (x != 0.0) {
      const BASE ax = fabs(x);

      if (scale < ax) {
        ssq = 1.0 + ssq * (scale / ax) * (scale / ax);
        scale = ax;
      } else {
        ssq += (ax / scale) * (ax / scale);
      }
    }

    ix += incX;
  }

  return scale * sqrt(ssq);
#undef BASE
}

/* From cblas/ddot.c and cblas/source_dot_r.h */

double
cblas_ddot (const int N, const double *X, const int incX, const double *Y,
            const int incY)
{
#define INIT_VAL  0.0
#define ACC_TYPE  double
#define BASE double
  ACC_TYPE r = INIT_VAL;
  INDEX i;
  INDEX ix = OFFSET(N, incX);
  INDEX iy = OFFSET(N, incY);

  for (i = 0; i < N; i++) {
    r += X[ix] * Y[iy];
    ix += incX;
    iy += incY;
  }

  return r;
#undef ACC_TYPE
#undef BASE
#undef INIT_VAL
}

/* From templates_on.h */

#define BASE double
#define SHORT
#define ATOMIC double
#define MULTIPLICITY 1
#define FP 1
#define IN_FORMAT "%lg"
#define OUT_FORMAT "%g"
#define ATOMIC_IO ATOMIC
#define ZERO 0.0
#define ONE 1.0
#define BASE_EPSILON GSL_DBL_EPSILON

#define CONCAT2x(a,b) a ## _ ## b 
#define CONCAT2(a,b) CONCAT2x(a,b)

#define FUNCTION(dir,name) CONCAT2(dir,name)
#define TYPE(dir) dir
#define VIEW(dir,name) CONCAT2(dir,name)
#define QUALIFIED_TYPE(dir) TYPE(dir)
#define QUALIFIED_VIEW(dir,name) CONCAT2(dir,name)

/* From block/init_source.c */

TYPE (gsl_block) *
FUNCTION (gsl_block, alloc) (const size_t n)
{
  TYPE (gsl_block) * b;

  if (n == 0)
    {
      GSL_ERROR_VAL ("block length n must be positive integer",
                        GSL_EINVAL, 0);
    }

  b = (TYPE (gsl_block) *) malloc (sizeof (TYPE (gsl_block)));

  if (b == 0)
    {
      GSL_ERROR_VAL ("failed to allocate space for block struct",
                        GSL_ENOMEM, 0);
    }

  b->data = (ATOMIC *) malloc (MULTIPLICITY * n * sizeof (ATOMIC));

  if (b->data == 0)
    {
      free (b);         /* exception in constructor, avoid memory leak */

      GSL_ERROR_VAL ("failed to allocate space for block data",
                        GSL_ENOMEM, 0);
    }

  b->size = n;

  return b;
}

void
FUNCTION (gsl_block, free) (TYPE (gsl_block) * b)
{
  RETURN_IF_NULL (b);
  free (b->data);
  free (b);
}

/* From vector/init_source.c */

TYPE (gsl_vector) *
FUNCTION (gsl_vector, alloc) (const size_t n)
{
  TYPE (gsl_block) * block;
  TYPE (gsl_vector) * v;

  if (n == 0)
    {
      GSL_ERROR_VAL ("vector length n must be positive integer",
                        GSL_EINVAL, 0);
    }

  v = (TYPE (gsl_vector) *) malloc (sizeof (TYPE (gsl_vector)));

  if (v == 0)
    {
      GSL_ERROR_VAL ("failed to allocate space for vector struct",
                        GSL_ENOMEM, 0);
    }

  block = FUNCTION (gsl_block,alloc) (n);

  if (block == 0)
    {
      free (v) ;

      GSL_ERROR_VAL ("failed to allocate space for block",
                        GSL_ENOMEM, 0);
    }
      
  v->data = block->data ;
  v->size = n;
  v->stride = 1;
  v->block = block;
  v->owner = 1;

  return v;
}

void
FUNCTION (gsl_vector, free) (TYPE (gsl_vector) * v)
{
  RETURN_IF_NULL (v);

  if (v->owner)
    {
      FUNCTION(gsl_block, free) (v->block) ;
    }
  free (v);
}

void
FUNCTION(gsl_vector, set_zero) (TYPE (gsl_vector) * v)
{
  ATOMIC * const data = v->data;
  const size_t n = v->size;
  const size_t stride = v->stride;
  const BASE zero = ZERO ;

  size_t i;

  for (i = 0; i < n; i++)
    {
      *(BASE *) (data + MULTIPLICITY * i * stride) = zero;
    }
}


/* From vector/oper_source.c */

int 
FUNCTION(gsl_vector, scale) (TYPE(gsl_vector) * a, const double x)
{
  const size_t N = a->size;
  const size_t stride = a->stride;
  
  size_t i;
  
  for (i = 0; i < N; i++)
    {
      a->data[i * stride] *= x;
    }
  
  return GSL_SUCCESS;
}

/* From matrix/init_source.c */

TYPE (gsl_matrix) *
FUNCTION (gsl_matrix, alloc) (const size_t n1, const size_t n2)
{
  TYPE (gsl_block) * block;
  TYPE (gsl_matrix) * m;

  if (n1 == 0)
    {
      GSL_ERROR_VAL ("matrix dimension n1 must be positive integer",
                        GSL_EINVAL, 0);
    }
  else if (n2 == 0)
    {
      GSL_ERROR_VAL ("matrix dimension n2 must be positive integer",
                        GSL_EINVAL, 0);
    }

  m = (TYPE (gsl_matrix) *) malloc (sizeof (TYPE (gsl_matrix)));

  if (m == 0)
    {
      GSL_ERROR_VAL ("failed to allocate space for matrix struct",
                        GSL_ENOMEM, 0);
    }

  /* FIXME: n1*n2 could overflow for large dimensions */

  block = FUNCTION(gsl_block, alloc) (n1 * n2) ;

  if (block == 0)
    {
      GSL_ERROR_VAL ("failed to allocate space for block",
                        GSL_ENOMEM, 0);
    }

  m->data = block->data;
  m->size1 = n1;
  m->size2 = n2;
  m->tda = n2; 
  m->block = block;
  m->owner = 1;

  return m;
}

void
FUNCTION (gsl_matrix, free) (TYPE (gsl_matrix) * m)
{
  RETURN_IF_NULL (m);

  if (m->owner)
    {
      FUNCTION(gsl_block, free) (m->block);
    }

  free (m);
}

void
FUNCTION (gsl_matrix, set_identity) (TYPE (gsl_matrix) * m)
{
  size_t i, j;
  ATOMIC * const data = m->data;
  const size_t p = m->size1 ;
  const size_t q = m->size2 ;
  const size_t tda = m->tda ;

  const BASE zero = ZERO;
  const BASE one = ONE;

  for (i = 0; i < p; i++)
    {
      for (j = 0; j < q; j++)
        {
          *(BASE *) (data + MULTIPLICITY * (i * tda + j)) = ((i == j) ? one : zero);
        }
    }
}

/* From matrix/rowcol_source.c */

QUALIFIED_VIEW(_gsl_vector,view)
FUNCTION (gsl_matrix, column) (QUALIFIED_TYPE(gsl_matrix) * m, const size_t j)
{
  QUALIFIED_VIEW(_gsl_vector,view) view = NULL_VECTOR_VIEW;
  
  if (j >= m->size2)
    {
      GSL_ERROR_VAL ("column index is out of range", GSL_EINVAL, view);
    }

  {
    TYPE(gsl_vector) v = NULL_VECTOR;
    
    v.data = m->data + j * MULTIPLICITY;
    v.size = m->size1;
    v.stride = m->tda;
    v.block = m->block;
    v.owner = 0;

    view.vector = v;
    return view;
  }
}
