linear_solver.cc
Go to the documentation of this file.
1 //LIC// ====================================================================
2 //LIC// This file forms part of oomph-lib, the object-oriented,
3 //LIC// multi-physics finite-element library, available
4 //LIC// at http://www.oomph-lib.org.
5 //LIC//
6 //LIC// Version 1.0; svn revision $LastChangedRevision: 1097 $
7 //LIC//
8 //LIC// $LastChangedDate: 2015-12-17 11:53:17 +0000 (Thu, 17 Dec 2015) $
9 //LIC//
10 //LIC// Copyright (C) 2006-2016 Matthias Heil and Andrew Hazel
11 //LIC//
12 //LIC// This library is free software; you can redistribute it and/or
13 //LIC// modify it under the terms of the GNU Lesser General Public
14 //LIC// License as published by the Free Software Foundation; either
15 //LIC// version 2.1 of the License, or (at your option) any later version.
16 //LIC//
17 //LIC// This library is distributed in the hope that it will be useful,
18 //LIC// but WITHOUT ANY WARRANTY; without even the implied warranty of
19 //LIC// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
20 //LIC// Lesser General Public License for more details.
21 //LIC//
22 //LIC// You should have received a copy of the GNU Lesser General Public
23 //LIC// License along with this library; if not, write to the Free Software
24 //LIC// Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
25 //LIC// 02110-1301 USA.
26 //LIC//
27 //LIC// The authors may be contacted at oomph-lib@maths.man.ac.uk.
28 //LIC//
29 //LIC//====================================================================
30 //The actual solve functions for dense LU linear solvers.
31 
32 // Config header generated by autoconfig
33 #ifdef HAVE_CONFIG_H
34  #include <oomph-lib-config.h>
35 #endif
36 
37 #ifdef OOMPH_HAS_MPI
38 #include "mpi.h"
39 #endif
40 
41 //oomph-lib includes
42 #include "Vector.h"
43 #include "linear_solver.h"
44 #include "matrices.h"
45 #include "problem.h"
46 
47 
48 namespace oomph
49 {
50 
51 
52 //=============================================================================
53 /// Solver: Takes pointer to problem and returns the results Vector
54 /// which contains the solution of the linear system defined by
55 /// the problem's fully assembled Jacobian and residual Vector.
56 //=============================================================================
57  void DenseLU::solve(Problem* const &problem_pt, DoubleVector &result)
58  {
59  //Initialise timer
60  double t_start = TimingHelpers::timer();
61 
62  //Find # of degrees of freedom (variables)
63  const unsigned n_dof = problem_pt->ndof();
64 
65  //Allocate storage for the residuals vector and the jacobian matrix
66  DoubleVector residuals;
67  DenseDoubleMatrix jacobian(n_dof);
68 
69  // initialise timer
70  double t_start_jacobian = TimingHelpers::timer();
71 
72  //Get the full jacobian and residuals of the problem
73  problem_pt->get_jacobian(residuals,jacobian);
74 
75  // compute jacobian setup time
76  double t_end_jacobian = TimingHelpers::timer();
77  Jacobian_setup_time = t_end_jacobian - t_start_jacobian;
78 
79  //Report the time
80  if(Doc_time)
81  {
82  oomph_info << std::endl << "CPU for setup of Dense Jacobian [sec]: "
83  << Jacobian_setup_time << std::endl;
84  }
85 
86  //Solve by dense LU decomposition VERY INEFFICIENT!
87  solve(&jacobian,residuals,result);
88 
89  //Set the sign of the determinant of the jacobian
91 
92  // Finalise/doc timings
93  double t_end = TimingHelpers::timer();
94  double total_time=t_end-t_start;
95  if(Doc_time)
96  {
97  oomph_info << "CPU for DenseLU LinearSolver [sec]: "
98  << total_time << std::endl << std::endl;
99  }
100  }
101 
102 
103 //=============================================================================
104 /// Delete the storage that has been allocated for the LU factors, if
105 /// the matrix data is not itself being overwritten.
106 //=============================================================================
108 {
109  // delete the Distribution_pt
110  this->clear_distribution();
111 
112  //Clean up the LU factor storage, if it has been allocated
113  //N.B. we don't need to check the index storage as well.
114  if(LU_factors!=0)
115  {
116  //Delete the pointer to the LU factors
117  delete[] LU_factors;
118  //Null out the vector
119  LU_factors = 0;
120  //Delete the pointer to the Index
121  delete[] Index;
122  //Null out
123  Index=0;
124  }
125 }
126 
127 //=============================================================================
128 /// LU decompose the matrix.
129 /// WARNING: this class does not perform any PARANOID checks on the vectors -
130 /// these are all performed in the solve(...) method.
131 //=============================================================================
132 void DenseLU::factorise(DoubleMatrixBase* const &matrix_pt)
133 {
134  //Set the number of unknowns
135  const unsigned long n = matrix_pt->nrow();
136 
137  //Small constant
138  const double small_number=1.0e-20;
139 
140  //Vector scaling stores the implicit scaling of each row
141  Vector<double> scaling(n);
142 
143  //Integer to store the sign that must multiply the determinant as
144  //a consequence of the row/column interchanges
145  int signature = 1;
146 
147  //Loop over rows to get implicit scaling information
148  for(unsigned long i=0;i<n;i++)
149  {
150  double largest_entry=0.0;
151  for(unsigned long j=0;j<n;j++)
152  {
153  double tmp = std::fabs((*matrix_pt)(i,j));
154  if(tmp > largest_entry) largest_entry = tmp;
155  }
156  if(largest_entry==0.0)
157  {
158  throw OomphLibError("Singular Matrix",
159  OOMPH_CURRENT_FUNCTION,
160  OOMPH_EXCEPTION_LOCATION);
161  }
162  //Save the scaling
163  scaling[i] = 1.0/largest_entry;
164  }
165 
166  //Firsly, we shall delete any previous LU storage.
167  //If the user calls this function twice without changing the matrix
168  //then it is their own inefficiency, not ours (this time).
169  clean_up_memory();
170 
171  //Allocate storage for the LU factors, the index and store
172  //the number of unknowns
173  LU_factors = new double[n*n];
174  Index = new long[n];
175 
176  //Now we know that memory has been allocated, copy over
177  //the matrix values
178  unsigned count=0;
179  for(unsigned long i=0;i<n;i++)
180  {
181  for(unsigned long j=0;j<n;j++)
182  {
183  LU_factors[count] = (*matrix_pt)(i,j);
184  ++count;
185  }
186  }
187 
188  //Loop over columns
189  for(unsigned long j=0;j<n;j++)
190  {
191  //Initialise imax
192  unsigned long imax=0;
193 
194  for(unsigned long i=0;i<j;i++)
195  {
196  double sum = LU_factors[n*i+j];
197  for(unsigned long k=0;k<i;k++)
198  {
199  sum -= LU_factors[n*i+k]*LU_factors[n*k+j];
200  }
201  LU_factors[n*i+j] = sum;
202  }
203 
204  //Initialise search for largest pivot element
205  double largest_entry=0.0;
206  for(unsigned long i=j;i<n;i++)
207  {
208  double sum = LU_factors[n*i+j];
209  for(unsigned long k=0;k<j;k++)
210  {
211  sum -= LU_factors[n*i+k]*LU_factors[n*k+j];
212  }
213  LU_factors[n*i+j] = sum;
214  //Set temporary
215  double tmp = scaling[i]*std::fabs(sum);
216  if(tmp >= largest_entry)
217  {
218  largest_entry = tmp;
219  imax = i;
220  }
221  }
222 
223  //Test to see if we need to interchange rows
224  if(j != imax)
225  {
226  for(unsigned long k=0;k<n;k++)
227  {
228  double tmp = LU_factors[n*imax+k];
229  LU_factors[n*imax+k] = LU_factors[n*j+k];
230  LU_factors[n*j+k] = tmp;
231  }
232  //Change the parity of signature
233  signature = -signature;
234 
235  //Interchange scale factor
236  scaling[imax] = scaling[j];
237  }
238 
239  //Set the index
240  Index[j] = imax;
241  if(LU_factors[n*j+j] == 0.0)
242  {
243  LU_factors[n*j+j] = small_number;
244  }
245  //Divide by pivot element
246  if(j != n-1)
247  {
248  double tmp = 1.0/LU_factors[n*j+j];
249  for(unsigned long i=j+1;i<n;i++)
250  {
251  LU_factors[n*i+j] *= tmp;
252  }
253  }
254 
255  } //End of loop over columns
256 
257 
258  //Now multiply all the diagonal terms together to get the determinant
259  //Note that we need to use the mantissa, exponent formulation to
260  //avoid underflow errors
261  double determinant_mantissa=1.0;
262  int determinant_exponent = 0, iexp;
263  for(unsigned i=0; i<n; i++)
264  {
265  //Multiply by the next diagonal entry's mantissa
266  //and return the exponent
267  determinant_mantissa *= frexp(LU_factors[n*i+i], &iexp);
268 
269  //Add the new exponent to the current exponent
270  determinant_exponent += iexp;
271 
272  // normalise
273  determinant_mantissa = frexp(determinant_mantissa,&iexp);
274  determinant_exponent += iexp;
275  }
276 
277  //If paranoid issue a warning that the matrix is near singular
278 // #ifdef PARANOID
279 // int tiny_exponent = -60;
280 // if(determinant_exponent < tiny_exponent)
281 // {
282 // std::ostringstream warning_stream;
283 // warning_stream << "The determinant of the matrix is very close to zero.\n"
284 // << "It is " << determinant_mantissa << " x 2^"
285 // << determinant_exponent << "\n";
286 // warning_stream << "The results will depend on the exact details of the\n"
287 // << "floating point implementation ... just to let you know\n";
288 // OomphLibWarning(warning_stream.str(),
289 // "DenseLU::factorise()",
290 // OOMPH_EXCEPTION_LOCATION);
291 // }
292 // #endif
293 
294  //Integer to store the sign of the determinant
295  int sign = 0;
296 
297  //Find the sign of the determinant
298  if(determinant_mantissa > 0.0) {sign = 1;}
299  if(determinant_mantissa < 0.0) {sign = -1;}
300 
301  //Multiply the sign by the signature
302  sign *= signature;
303 
304  //Return the sign of the determinant
306  }
307 
308 //=============================================================================
309 /// Do the backsubstitution for the DenseLU solver.
310 /// WARNING: this class does not perform any PARANOID checks on the vectors -
311 /// these are all performed in the solve(...) method.
312 //=============================================================================
314  DoubleVector &result)
315 {
316  // Get pointers to first entries
317  const double* rhs_pt = rhs.values_pt();
318  double* result_pt = result.values_pt();
319 
320  //Copy the rhs vector into the result vector
321  const unsigned long n = rhs.nrow();
322  for(unsigned long i=0;i<n;++i)
323  {
324  result_pt[i] = rhs_pt[i];
325  }
326 
327  // Loop over all rows for forward substition
328  unsigned long k=0;
329  for(unsigned long i=0;i<n;i++)
330  {
331  unsigned long ip = Index[i];
332  double sum = result_pt[ip];
333  result_pt[ip] = result_pt[i];
334  if(k != 0)
335  {
336  for(unsigned long j=k-1;j<i;j++)
337  {
338  sum -= LU_factors[n*i+j]*result_pt[j];
339  }
340  }
341  else if(sum != 0.0)
342  {
343  k = i+1;
344  }
345  result_pt[i] = sum;
346  }
347 
348  //Now do the back substitution
349  for (long i=long(n)-1;i>=0;i--)
350  {
351  double sum = result_pt[i];
352  for(long j=i+1;j<long(n);j++)
353  {
354  sum -= LU_factors[n*i+j]*result_pt[j];
355  }
356  result_pt[i] = sum/LU_factors[n*i+i];
357  }
358 }
359 
360 //=============================================================================
361 /// Do the backsubstitution for the DenseLU solver.
362 /// WARNING: this class does not perform any PARANOID checks on the vectors -
363 /// these are all performed in the solve(...) method. So, if you call backsub
364 /// directly, you have been warned...
365 //=============================================================================
367  Vector<double> &result)
368 {
369  //Copy the rhs vector into the result vector
370  const unsigned long n = rhs.size();
371  for(unsigned long i=0;i<n;++i)
372  {
373  result[i] = rhs[i];
374  }
375 
376  // Loop over all rows for forward substition
377  unsigned long k=0;
378  for(unsigned long i=0;i<n;i++)
379  {
380  unsigned long ip = Index[i];
381  double sum = result[ip];
382  result[ip] = result[i];
383  if(k != 0)
384  {
385  for(unsigned long j=k-1;j<i;j++)
386  {
387  sum -= LU_factors[n*i+j]*result[j];
388  }
389  }
390  else if(sum != 0.0)
391  {
392  k = i+1;
393  }
394  result[i] = sum;
395  }
396 
397  //Now do the back substitution
398  for (long i=long(n)-1;i>=0;i--)
399  {
400  double sum = result[i];
401  for(long j=i+1;j<long(n);j++)
402  {
403  sum -= LU_factors[n*i+j]*result[j];
404  }
405  result[i] = sum/LU_factors[n*i+i];
406  }
407 }
408 
409 
410 //=============================================================================
411  /// \short Linear-algebra-type solver: Takes pointer to a matrix and rhs
412  /// vector and returns the solution of the linear system.
413 //============================================================================
414  void DenseLU::solve(DoubleMatrixBase* const &matrix_pt,
415  const DoubleVector &rhs,
416  DoubleVector &result)
417 {
418 #ifdef PARANOID
419  // check that the rhs vector is not distributed
420  if (rhs.distribution_pt()->distributed())
421  {
422  std::ostringstream error_message_stream;
423  error_message_stream
424  << "The vectors rhs and result must not be distributed";
425  throw OomphLibError(error_message_stream.str(),
426  OOMPH_CURRENT_FUNCTION,
427  OOMPH_EXCEPTION_LOCATION);
428  }
429 
430  // check that the matrix is square
431  if (matrix_pt->nrow() != matrix_pt->ncol())
432  {
433  std::ostringstream error_message_stream;
434  error_message_stream
435  << "The matrix at matrix_pt must be square.";
436  throw OomphLibError(error_message_stream.str(),
437  OOMPH_CURRENT_FUNCTION,
438  OOMPH_EXCEPTION_LOCATION);
439  }
440  // check that the matrix and the rhs vector have the same nrow()
441  if (matrix_pt->nrow() != rhs.nrow())
442  {
443  std::ostringstream error_message_stream;
444  error_message_stream
445  << "The matrix and the rhs vector must have the same number of rows.";
446  throw OomphLibError(error_message_stream.str(),
447  OOMPH_CURRENT_FUNCTION,
448  OOMPH_EXCEPTION_LOCATION);
449  }
450 
451  // if the matrix is distributable then it too should have the same
452  // communicator as the rhs vector and should not be distributed
453  DistributableLinearAlgebraObject* dist_matrix_pt =
454  dynamic_cast<DistributableLinearAlgebraObject*>(matrix_pt);
455  if (dist_matrix_pt != 0)
456  {
457  if (dist_matrix_pt->distribution_pt()->communicator_pt()->nproc() > 1 &&
458  dist_matrix_pt->distribution_pt()->distributed() == true)
459  {
460  throw OomphLibError(
461  "Matrix must not be distributed or only one processor",
462  OOMPH_CURRENT_FUNCTION,
463  OOMPH_EXCEPTION_LOCATION);
464  }
466  if (!(temp_comm == *dist_matrix_pt->distribution_pt()->communicator_pt()))
467  {
468  std::ostringstream error_message_stream;
469  error_message_stream
470  << "The matrix matrix_pt must have the same communicator as the vectors"
471  << " rhs and result must have the same communicator";
472  throw OomphLibError(error_message_stream.str(),
473  OOMPH_CURRENT_FUNCTION,
474  OOMPH_EXCEPTION_LOCATION);
475  }
476  }
477  // if the result vector is setup then check it is not distributed and has
478  // the same communicator as the rhs vector
479  if (result.distribution_built())
480  {
481  if (!(*result.distribution_pt() == *rhs.distribution_pt()))
482  {
483  std::ostringstream error_message_stream;
484  error_message_stream
485  << "The result vector distribution has been setup; it must have the "
486  << "same distribution as the rhs vector.";
487  throw OomphLibError(error_message_stream.str(),
488  OOMPH_CURRENT_FUNCTION,
489  OOMPH_EXCEPTION_LOCATION);
490  }
491  }
492 #endif
493 
494  if (!result.distribution_built())
495  {
496  result.build(rhs.distribution_pt(),0.0);
497  }
498 
499  // set the distribution
500  this->build_distribution(rhs.distribution_pt());
501 
502  // Time the solver
503  double t_start = TimingHelpers::timer();
504 
505  // factorise
506  factorise(matrix_pt);
507 
508  // backsubstitute
509  backsub(rhs,result);
510 
511  //Doc time for solver
512  double t_end = TimingHelpers::timer();
513 
514  Solution_time = t_end-t_start;
515  if(Doc_time)
516  {
517  oomph_info << std::endl << "CPU for solve with DenseLU [sec]: "
518  << Solution_time << std::endl << std::endl;
519  }
520 
521  //If we are not resolving then delete storage
523 }
524 
525 //=============================================================================
526 /// \short Linear-algebra-type solver: Takes pointer to a matrix and rhs
527 /// vector and returns the solution of the linear system.
528 //=============================================================================
529 void DenseLU::solve(DoubleMatrixBase* const &matrix_pt,
530  const Vector<double> &rhs,
531  Vector<double> &result)
532 {
533  // Time the solver
534  clock_t t_start = clock();
535 
536  factorise(matrix_pt);
537  backsub(rhs,result);
538 
539  //Doc time for solver
540  clock_t t_end = clock();
541 
542  Solution_time = double(t_end-t_start)/CLOCKS_PER_SEC;
543  if(Doc_time)
544  {
545  oomph_info << "CPU for solve with DenseLU [sec]: "
546  << Solution_time << std::endl;
547  }
548 
549  //If we are not resolving then delete storage
551 }
552 
553 //==================================================================
554 /// Solver: Takes pointer to problem and returns the results Vector
555 /// which contains the solution of the linear system defined by
556 /// the problem's residual Vector. (Jacobian assembled by FD).
557 //==================================================================
558 void FD_LU::solve(Problem* const &problem_pt, DoubleVector &result)
559 {
560  //Initialise timer
561  clock_t t_start = clock();
562 
563 #ifdef PARANOID
564  // if the result vector is setup then check it is not distributed and has
565  // the same communicator as the rhs vector
566  if (result.built())
567  {
568  if (result.distributed())
569  {
570  std::ostringstream error_message_stream;
571  error_message_stream
572  << "The result vector must not be distributed";
573  throw OomphLibError(error_message_stream.str(),
574  OOMPH_CURRENT_FUNCTION,
575  OOMPH_EXCEPTION_LOCATION);
576  }
577  }
578 #endif
579 
580  //Find # of degrees of freedom
581  unsigned long n_dof = problem_pt->ndof();
582 
583  //Allocate storage for the residuals vector and the jacobian matrix
584  DoubleVector residuals;
585  DenseDoubleMatrix jacobian(n_dof);
586 
587  {
588  // initialise timer
589  clock_t t_start = clock();
590 
591  //Get the full jacobian by finite differencing) VERY INEFFICIENT!
592  problem_pt->get_fd_jacobian(residuals,jacobian);
593 
594  // compute jacobian setup time
595  clock_t t_end = clock();
596  Jacobian_setup_time = double(t_end-t_start)/CLOCKS_PER_SEC;
597 
598  //Report the time
599  if(Doc_time)
600  {
601  oomph_info << std::endl << "CPU for setup of Dense Jacobian [sec]: "
602  << Jacobian_setup_time << std::endl << std::endl;
603  }
604  }
605 
606  //Solve by dense LU decomposition (not efficient)
607  solve(&jacobian,residuals,result);
608 
609  //Set the sign of the determinant of the jacobian
611 
612  // Finalise/doc timings
613  clock_t t_end = clock();
614  double total_time=double(t_end-t_start)/CLOCKS_PER_SEC;
615  if(Doc_time)
616  {
617  oomph_info << "CPU for FD DenseLU LinearSolver [sec]: "
618  << total_time << std::endl << std::endl;
619  }
620 }
621 
622 
623 //===================================================================
624 // Interface to SuperLU wrapper
625 //===================================================================
626 extern "C"
627 {
628  int superlu(int *, int *, int *, int *,
629  double *, int *, int *,
630  double *, int *, int *, int *,
631  void*, int *);
632 }
633 
634 
635 #ifdef OOMPH_HAS_MPI
636 
637 //===================================================================
638 // Interface to SuperLU_DIST wrapper
639 //===================================================================
640 extern "C"
641 {
642 
643  // Interface to distributed SuperLU solver where each processor
644  // holds the entire matrix
645  void superlu_dist_global_matrix(int opt_flag, int allow_permutations,
646  int n, int nnz, double *values,
647  int *row_index, int *col_start,
648  double *b, int nprow, int npcol,
649  int doc, void **data, int *info,
650  MPI_Comm comm);
651 
652  // Interface to distributed SuperLU solver where each processor
653  // holds part of the matrix
654  void superlu_dist_distributed_matrix(int opt_flag, int allow_permutations,
655  int n, int nnz_local,
656  int nrow_local, int first_row,
657  double *values, int *col_index,
658  int *row_start, double *b,
659  int nprow, int npcol,
660  int doc, void **data, int *info,
661  MPI_Comm comm);
662 
663 // helper method - just calls the superlu method dCompRow_to_CompCol to convert
664 // the c-style vectors of a cr matrix to a cc matrix
665  void superlu_cr_to_cc(int nrow, int ncol, int nnz, double* cr_values,
666  int* cr_index, int* cr_start, double** cc_values,
667  int** cc_index, int** cc_start);
668 
669 }
670 #endif
671 
672 
673 //=============================================================================
674 /// Solver: Takes pointer to problem and returns the results Vector
675 /// which contains the solution of the linear system defined by
676 /// the problem's fully assembled Jacobian and residual Vector.
677 //=============================================================================
678 void SuperLUSolver::solve(Problem* const &problem_pt, DoubleVector &result)
679 {
680  // wipe memory
681  this->clean_up_memory();
682 
683 #ifdef OOMPH_HAS_MPI
684  // USING SUPERLU DIST
685  /////////////////////
686  if (Solver_type == Distributed ||
687  (Solver_type == Default && problem_pt->communicator_pt()->nproc() > 1))
688  {
689  // init the timers
690  double t_start = TimingHelpers::timer();
691 
692  // number of dofs
693  unsigned n_dof = problem_pt->ndof();
694 
695  // set the distribution
696  LinearAlgebraDistribution dist(problem_pt->communicator_pt(),n_dof,
698  this->build_distribution(dist);
699 
700  // Take a copy of Delete_matrix_data
701  bool copy_of_Delete_matrix_data = Dist_delete_matrix_data;
702 
703  // Set Delete_matrix to true
705 
706  // Use the distributed version of SuperLU_DIST?
708  {
709  // Initialise timer
710  double t_start = TimingHelpers::timer();
711 
712  // Storage for the residuals vector
713  DoubleVector residuals(this->distribution_pt(),0.0);
714 
715  // Get the sparse jacobian and residuals of the problem
716  CRDoubleMatrix jacobian(this->distribution_pt());
717  problem_pt->get_jacobian(residuals, jacobian);
718 
719  // Doc time for setup
720  double t_end = TimingHelpers::timer();
721  Jacobian_setup_time = t_end-t_start;
722  if (Doc_time)
723  {
724  oomph_info << "Time to set up CRDoubleMatrix Jacobian [sec] : "
725  << Jacobian_setup_time << std::endl;
726  }
727 
728  //Now call the linear algebra solve, if desired
729  if(!Suppress_solve)
730  {
731  //If the distribution of the result has been build and
732  //does not match that of
733  //the solver then redistribute before the solve and return
734  //to the incoming distribution afterwards.
735  if((result.built()) &&
736  (!(*result.distribution_pt() == *this->distribution_pt())))
737  {
739  temp_global_dist(result.distribution_pt());
740  result.build(this->distribution_pt(),0.0);
741  solve(&jacobian,residuals,result);
742  result.redistribute(&temp_global_dist);
743  }
744  else
745  {
746  solve(&jacobian,residuals,result);
747  }
748  }
749  }
750  //Otherwise its the global solve version
751  else
752  {
753  // Storage for the residuals vector
754  // A non-distriubted residuals vector
755  LinearAlgebraDistribution dist(problem_pt->communicator_pt(),
756  problem_pt->ndof(),
757  false);
758  DoubleVector residuals(&dist,0.0);
759  CRDoubleMatrix jacobian(&dist);
760 
761  //Get the sparse jacobian and residuals of the problem
762  problem_pt->get_jacobian(residuals, jacobian);
763 
764  // Doc time for setup
765  double t_end = TimingHelpers::timer();
766  Jacobian_setup_time = t_end-t_start;
767  if (Doc_time)
768  {
769  oomph_info << "Time to set up CR Jacobian [sec] : "
770  << Jacobian_setup_time << std::endl;
771  }
772 
773  //Now call the linear algebra solve, if desired
774  if(!Suppress_solve)
775  {
776  //If the result distribution has been built and
777  //does not match the global distribution
778  //the redistribute before the solve and then return to the
779  //distributed version afterwards
780  if((result.built()) && (!(*result.distribution_pt() == dist)))
781  {
783  temp_global_dist(result.distribution_pt());
784  result.build(&dist,0.0);
785  solve(&jacobian,residuals,result);
786  result.redistribute(&temp_global_dist);
787  }
788  else
789  {
790  solve(&jacobian,residuals,result);
791  }
792  }
793  }
794  // Set Delete_matrix back to original value
795  Dist_delete_matrix_data = copy_of_Delete_matrix_data;
796  }
797 
798  // OTHERWISE WE ARE USING SUPERLU (SERIAL)
799  //////////////////////////////////////////
800  else
801 #endif
802  {
803 
804  // set the solver distribution
805  LinearAlgebraDistribution dist(problem_pt->communicator_pt(),
806  problem_pt->ndof(),false);
807  this->build_distribution(dist);
808 
809  //Allocate storage for the residuals vector
810  DoubleVector residuals(dist,0.0);
811 
812  // Use the compressed row version?
814  {
815 
816  // Initialise timer
817  double t_start = TimingHelpers::timer();
818 
819  //Get the sparse jacobian and residuals of the problem
820  CRDoubleMatrix CR_jacobian(this->distribution_pt());
821  problem_pt->get_jacobian(residuals,CR_jacobian);
822 
823  // If we want to compute the gradient for the globally convergent
824  // Newton method, then do it here
825  if(Compute_gradient)
826  {
827  // Compute it
828  CR_jacobian.multiply_transpose(residuals,
830  // Set the flag
832  }
833 
834  // Doc time for setup
835  double t_end = TimingHelpers::timer();
836  Jacobian_setup_time = t_end-t_start;
837  if(Doc_time)
838  {
839  oomph_info << std::endl
840  << "Time to set up CRDoubleMatrix Jacobian [sec]: "
841  << Jacobian_setup_time << std::endl;
842  }
843 
844  //Now call the linear algebra solve, if desired
845  if(!Suppress_solve)
846  {
847  //If the result vector is built and distributed
848  //then need to redistribute into the same form as the
849  //RHS (non-distributed)
850  if((result.built()) &&
851  (!(*result.distribution_pt() == *this->distribution_pt())))
852  {
854  temp_global_dist(result.distribution_pt());
855  result.build(this->distribution_pt(),0.0);
856  solve(&CR_jacobian,residuals,result);
857  result.redistribute(&temp_global_dist);
858  }
859  //Otherwise just solve
860  else
861  {
862  solve(&CR_jacobian,residuals,result);
863  }
864  }
865  }
866  //Otherwise its the compressed column version
867  else
868  {
869  // Initialise timer
870  double t_start = TimingHelpers::timer();
871 
872  //Get the sparse jacobian and residuals of the problem
873  CCDoubleMatrix CC_jacobian;
874  problem_pt->get_jacobian(residuals,CC_jacobian);
875 
876  // If we want to compute the gradient for the globally convergent
877  // Newton method, then do it here
878  if(Compute_gradient)
879  {
880  // Compute it
881  CC_jacobian.multiply_transpose(residuals,
883  // Set the flag
885  }
886 
887  // Doc time for setup
888  double t_end = TimingHelpers::timer();
889  Jacobian_setup_time=t_end-t_start;
890  if(Doc_time)
891  {
892  oomph_info << "\nTime to set up CCDoubleMatrix Jacobian [sec]: "
893  << Jacobian_setup_time << std::endl;
894  }
895 
896  //Now call the linear algebra solve, if desired
897  if(!Suppress_solve)
898  {
899  //If the result vector is built and distributed
900  //then need to redistribute into the same form as the
901  //RHS
902  if((result.built()) &&
903  (!(*result.distribution_pt() == *this->distribution_pt())))
904  {
906  temp_global_dist(result.distribution_pt());
907  result.build(this->distribution_pt(),0.0);
908  solve(&CC_jacobian,residuals,result);
909  result.redistribute(&temp_global_dist);
910  }
911  //Otherwise just solve
912  else
913  {
914  solve(&CC_jacobian,residuals,result);
915  }
916  }
917  }
918 
919  //Set the sign of the jacobian
920  //(this is computed in the LU decomposition phase)
922  }
923 }
924 
925 //=========================================================================
926 /// Linear-algebra-type solver: Takes pointer to a matrix and rhs
927 /// vector and returns the solution of the linear system. Problem pointer
928 /// defaults to NULL and can be omitted. The function returns the global
929 /// result Vector.
930 /// Note: if Delete_matrix_data is true the function
931 /// matrix_pt->clean_up_memory() will be used to wipe the matrix data.
932 //=========================================================================
933 void SuperLUSolver::solve(DoubleMatrixBase* const &matrix_pt,
934  const DoubleVector &rhs,
935  DoubleVector &result)
936 {
937  // Initialise timer
938  double t_start = TimingHelpers::timer();
939 
940 #ifdef PARANOID
941  // check that the rhs vector is setup
942  if (!rhs.built())
943  {
944  std::ostringstream error_message_stream;
945  error_message_stream
946  << "The vectors rhs must be setup";
947  throw OomphLibError(error_message_stream.str(),
948  OOMPH_CURRENT_FUNCTION,
949  OOMPH_EXCEPTION_LOCATION);
950  }
951 
952  // check that the matrix is square
953  if (matrix_pt->nrow() != matrix_pt->ncol())
954  {
955  std::ostringstream error_message_stream;
956  error_message_stream
957  << "The matrix at matrix_pt must be square.";
958  throw OomphLibError(error_message_stream.str(),
959  OOMPH_CURRENT_FUNCTION,
960  OOMPH_EXCEPTION_LOCATION);
961  }
962 
963  // check that the matrix has some entries, and so has a values_pt that
964  // makes sense (only for CR because CC is never used I think dense
965  // matrices will be safe since they don't use a values pointer).
966  CRDoubleMatrix* cr_pt = dynamic_cast<CRDoubleMatrix*>(matrix_pt);
967  if (cr_pt != 0)
968  {
969  if (cr_pt->nnz() == 0)
970  {
971  std::ostringstream error_message_stream;
972  error_message_stream
973  << "Attempted to call SuperLu on a CRDoubleMatrix with no entries, "
974  << "SuperLU would segfault (because the values array pt is "
975  << "uninitialised or null).";
976  throw OomphLibError(error_message_stream.str(),
977  OOMPH_CURRENT_FUNCTION,
978  OOMPH_EXCEPTION_LOCATION);
979  }
980  }
981 
982  // check that the matrix and the rhs vector have the same nrow()
983  if (matrix_pt->nrow() != rhs.nrow())
984  {
985  std::ostringstream error_message_stream;
986  error_message_stream
987  << "The matrix and the rhs vector must have the same number of rows.";
988  throw OomphLibError(error_message_stream.str(),
989  OOMPH_CURRENT_FUNCTION,
990  OOMPH_EXCEPTION_LOCATION);
991  }
992 
993  // if the matrix is distributable then should have the same distribution
994  // as the rhs vector
995  DistributableLinearAlgebraObject* dist_matrix_pt =
996  dynamic_cast<DistributableLinearAlgebraObject*>(matrix_pt);
997  if (dist_matrix_pt != 0)
998  {
999  if (!(*dist_matrix_pt->distribution_pt() == *rhs.distribution_pt()))
1000  {
1001  std::ostringstream error_message_stream;
1002  error_message_stream
1003  << "The matrix matrix_pt must have the same distribution as the "
1004  << "rhs vector.";
1005  throw OomphLibError(error_message_stream.str(),
1006  OOMPH_CURRENT_FUNCTION,
1007  OOMPH_EXCEPTION_LOCATION);
1008  }
1009  }
1010  // if the matrix is not distributable then it the rhs vector should not be
1011  // distributed
1012  else
1013  {
1014  if (rhs.distribution_pt()->distributed())
1015  {
1016  std::ostringstream error_message_stream;
1017  error_message_stream
1018  << "The matrix (matrix_pt) is not distributable and therefore the rhs"
1019  << " vector must not be distributed";
1020  throw OomphLibError(error_message_stream.str(),
1021  OOMPH_CURRENT_FUNCTION,
1022  OOMPH_EXCEPTION_LOCATION);
1023  }
1024  }
1025  // if the result vector is setup then check it has the same distribution
1026  // as the rhs
1027  if (result.built())
1028  {
1029  if (!(*result.distribution_pt() == *rhs.distribution_pt()))
1030  {
1031  std::ostringstream error_message_stream;
1032  error_message_stream
1033  << "The result vector distribution has been setup; it must have the "
1034  << "same distribution as the rhs vector.";
1035  throw OomphLibError(error_message_stream.str(),
1036  OOMPH_CURRENT_FUNCTION,
1037  OOMPH_EXCEPTION_LOCATION);
1038  }
1039  }
1040 #endif
1041 
1042  // set the distribution
1043  if (dynamic_cast<DistributableLinearAlgebraObject*>(matrix_pt))
1044  {
1045  // the solver has the same distribution as the matrix if possible
1046  this->build_distribution(dynamic_cast<DistributableLinearAlgebraObject*>
1047  (matrix_pt)->distribution_pt());
1048  }
1049  else
1050  {
1051  // the solver has the same distribution as the RHS
1052  this->build_distribution(rhs.distribution_pt());
1053  }
1054 
1055  //Factorise the matrix
1056  factorise(matrix_pt);
1057 
1058  //Now do the back solve
1059  backsub(rhs,result);
1060 
1061  // Doc time for solve
1062  double t_end = TimingHelpers::timer();
1063  Solution_time = t_end-t_start;
1064  if (Doc_time)
1065  {
1066  oomph_info << "Time for SuperLUSolver solve [sec] : "
1067  << t_end-t_start << std::endl;
1068  }
1069 
1070  // If we are not storing the solver data for resolves, delete it
1071  if (!Enable_resolve)
1072  {
1073  clean_up_memory();
1074  }
1075 }
1076 
1077 //===============================================================
1078 /// Resolve the system for a given RHS
1079 //===============================================================
1081  DoubleVector &result)
1082 {
1083  // Store starting time for solve
1084  double t_start = TimingHelpers::timer();
1085 
1086  // backsub
1087  backsub(rhs,result);
1088 
1089  // Doc time for solve
1090  double t_end = TimingHelpers::timer();
1091  Solution_time = t_end-t_start;
1092  if (Doc_time)
1093  {
1094  oomph_info << "Time for SuperLUSolver solve [sec]: "
1095  << t_end-t_start << std::endl;
1096  }
1097 }
1098 
1099 //===================================================================
1100 ///\short LU decompose the matrix addressed by matrix_pt by using
1101 /// the SuperLU solver. The resulting matrix factors are stored
1102 /// internally.
1103 //===================================================================
1105 {
1106  // wipe memory
1107  this->clean_up_memory();
1108 
1109  // if we have mpi and the solver is distributed or default and nproc
1110  // gt 1
1111 #ifdef OOMPH_HAS_MPI
1112  DistributableLinearAlgebraObject* dist_matrix_pt =
1113  dynamic_cast<DistributableLinearAlgebraObject*>(matrix_pt);
1114  unsigned nproc = 1;
1115  if (dist_matrix_pt != 0)
1116  {
1117  nproc = dist_matrix_pt->distribution_pt()->communicator_pt()->nproc();
1118  }
1119  if (Solver_type == Distributed ||
1120  (Solver_type == Default && nproc > 1 &&
1122  {
1123 
1124  // if the matrix is a distributed linear algebra object then use SuperLU
1125  // dist
1126  if (dist_matrix_pt != 0)
1127  {
1128  factorise_distributed(matrix_pt);
1129  Using_dist = true;
1130  }
1131  else
1132  {
1133  factorise_serial(matrix_pt);
1134  Using_dist = false;
1135  }
1136  }
1137  else
1138 #endif
1139  {
1140  factorise_serial(matrix_pt);
1141  Using_dist = false;
1142  }
1143 }
1144 
1145 #ifdef OOMPH_HAS_MPI
1146 //=============================================================================
1147 /// LU decompose the matrix addressed by matrix_pt using
1148 /// the SuperLU_DIST solver. The resulting matrix factors are stored
1149 /// internally.
1150 //=============================================================================
1152 {
1153  //Check that we have a square matrix
1154 #ifdef PARANOID
1155  int m = matrix_pt->ncol();
1156  int n = matrix_pt->nrow();
1157  if(n != m)
1158  {
1159  std::ostringstream error_message_stream;
1160  error_message_stream << "Can only solve for square matrices\n"
1161  << "N, M " << n << " " << m << std::endl;
1162 
1163  throw OomphLibError(error_message_stream.str(),
1164  OOMPH_CURRENT_FUNCTION,
1165  OOMPH_EXCEPTION_LOCATION);
1166  }
1167 #endif
1168 
1169  // number of processors
1170  unsigned nproc = MPI_Helpers::communicator_pt()->nproc();
1171  if(dynamic_cast<DistributableLinearAlgebraObject*>(matrix_pt) != 0)
1172  {
1173  nproc = dynamic_cast<DistributableLinearAlgebraObject*>
1174  (matrix_pt)->distribution_pt()->communicator_pt()->nproc();
1175  }
1176 
1177  // Find number of rows and columns for the process grid
1178  // First guess at number of rows:
1179  int nprow=int(sqrt(double(nproc)));
1180 
1181  // Does this evenly divide the processor grid?
1182  while (nprow>1)
1183  {
1184  if (nproc%nprow==0) break;
1185  nprow-=1;
1186  }
1187 
1188  // Store Number of rows/columns for process grid
1189  Dist_nprow=nprow;
1190  Dist_npcol=nproc/Dist_nprow;
1191 
1192  // Make sure any existing factors are deleted
1193  clean_up_memory();
1194 
1195  // Doc (0/1) = (true/false)
1196  int doc = !Doc_stats;
1197 
1198  // Rset Info
1199  Dist_info=0;
1200 
1201  // Flag for row and column permutations
1202  int allow_permutations = Dist_allow_row_and_col_permutations;
1203 
1204  // Is it a DistributedCRDoubleMatrix?
1205  if(dynamic_cast<CRDoubleMatrix*>(matrix_pt) != 0)
1206  {
1207  // Get a cast pointer to the matrix
1208  CRDoubleMatrix* cr_matrix_pt = dynamic_cast<CRDoubleMatrix*>(matrix_pt);
1209 
1210  //Get the distribution from the matrix
1211  this->build_distribution(cr_matrix_pt->distribution_pt());
1212 
1213 #ifdef PARANOID
1214  // paranoid check that the matrix has been setup
1215  if (!cr_matrix_pt->built())
1216  {
1217  throw OomphLibError
1218  ("To apply SuperLUSolver to a CRDoubleMatrix - it must be built",
1219  OOMPH_CURRENT_FUNCTION,OOMPH_EXCEPTION_LOCATION);
1220  }
1221 #endif
1222 
1223  // if the matrix is distributed then setup setup superlu dist distributed
1224  if (cr_matrix_pt->distributed())
1225  {
1226  // Find the number of non-zero entries in the matrix
1227  const int nnz_local = int(cr_matrix_pt->nnz());
1228 
1229  // Set up the pointers to the matrix.
1230  // NOTE: these arrays (accessed via value_pt, index_pt and
1231  // start_pt) may be modified by the SuperLU_DIST routines, and so
1232  // a copy must be taken if the matrix is to be preserved.
1233 
1234  // Copy values
1235  Dist_value_pt = new double[nnz_local];
1236  double* matrix_value_pt = cr_matrix_pt->value();
1237  for(int i=0;i<nnz_local;i++)
1238  {
1239  Dist_value_pt[i] = matrix_value_pt[i];
1240  }
1241 
1242  // Copy column indices
1243  Dist_index_pt = new int[nnz_local];
1244  int* matrix_index_pt = cr_matrix_pt->column_index();
1245  for (int i=0; i<nnz_local; i++)
1246  {
1247  Dist_index_pt[i] = matrix_index_pt[i];
1248  }
1249 
1250  // Copy row starts
1251  int nrow_local = cr_matrix_pt->nrow_local();
1252  Dist_start_pt = new int[nrow_local+1];
1253  int* matrix_start_pt = cr_matrix_pt->row_start();
1254  for (int i=0; i<=nrow_local; i++)
1255  {
1256  Dist_start_pt[i] = matrix_start_pt[i];
1257  }
1258 
1259  // cache
1260  int ndof = cr_matrix_pt->distribution_pt()->nrow();
1261  int first_row = cr_matrix_pt->first_row();
1262 
1263  // Now delete the matrix if we are allowed
1264  if (Dist_delete_matrix_data==true)
1265  {
1266  cr_matrix_pt->clear();
1267  }
1268 
1269  // Factorize
1270  superlu_dist_distributed_matrix(1, allow_permutations,
1271  ndof, nnz_local, nrow_local,
1272  first_row, Dist_value_pt, Dist_index_pt,
1275  this->distribution_pt()->
1276  communicator_pt()->mpi_comm());
1277 
1278  // Record that data is stored
1280  }
1281  // else the CRDoubleMatrix is not distributed
1282  else
1283  {
1284  // Find the number of non-zero entries in the matrix
1285  const int nnz = int(cr_matrix_pt->nnz());
1286 
1287  // cache the number of rows
1288  int nrow = cr_matrix_pt->nrow();
1289 
1290  // Set up the pointers to the matrix.
1291  // NOTE: these arrays (accessed via value_pt, index_pt and
1292  // start_pt) may be modified by the SuperLU_DIST routines, and so
1293  // a copy must be taken if the matrix is to be preserved.
1294 
1295  // create the corresponing cc matrix
1296  superlu_cr_to_cc(nrow,nrow,nnz,cr_matrix_pt->value(),
1297  cr_matrix_pt->column_index(),
1298  cr_matrix_pt->row_start(),
1300 
1301  // Delete the matrix if we are allowed
1302  if (Dist_delete_matrix_data==true)
1303  {
1304  cr_matrix_pt->clear();
1305  }
1306 
1307  // do the factorization
1308  superlu_dist_global_matrix(1, allow_permutations,
1309  nrow, nnz, Dist_value_pt, Dist_index_pt,
1310  Dist_start_pt,
1311  0, Dist_nprow, Dist_npcol, doc,
1313  this->distribution_pt()
1314  ->communicator_pt()->mpi_comm());
1315 
1316  // Record that data is stored
1318  }
1319  }
1320 
1321  // Or is it a CCDoubleMatrix?
1322 else if(dynamic_cast<CCDoubleMatrix*>(matrix_pt))
1323  {
1324  // Get a cast pointer to the matrix
1325  CCDoubleMatrix* serial_matrix_pt = dynamic_cast<CCDoubleMatrix*>(matrix_pt);
1326 
1327  // Find the number of non-zero entries in the matrix
1328  const int nnz = int(serial_matrix_pt->nnz());
1329 
1330  // Find # of degrees of freedom (variables)
1331  int ndof = int(serial_matrix_pt->nrow());
1332 
1333  // Find the local number of degrees of freedom in the linear system
1334  int ndof_local = ndof;
1335 
1336  // Set up the pointers to the matrix.
1337  // NOTE: these arrays (accessed via value_pt, index_pt and
1338  // start_pt) may be modified by the SuperLU_DIST routines, and so
1339  // a copy must be taken if the matrix is to be preserved.
1340 
1341  // Copy values
1342  Dist_value_pt = new double[nnz];
1343  double* matrix_value_pt = serial_matrix_pt->value();
1344  for(int i=0;i<nnz;i++)
1345  {
1346  Dist_value_pt[i] = matrix_value_pt[i];
1347  }
1348 
1349  // copy row indices
1350  Dist_index_pt = new int[nnz];
1351  int* matrix_index_pt = serial_matrix_pt->row_index();
1352  for (int i=0; i<nnz; i++)
1353  {
1354  Dist_index_pt[i] = matrix_index_pt[i];
1355  }
1356 
1357  // copy column starts
1358  Dist_start_pt = new int[ndof_local+1];
1359  int* matrix_start_pt = serial_matrix_pt->column_start();
1360  for (int i=0; i<=ndof_local; i++)
1361  {
1362  Dist_start_pt[i] = matrix_start_pt[i];
1363  }
1364 
1365  // Delete the matrix if we are allowed
1366  if (Dist_delete_matrix_data==true)
1367  {
1368  serial_matrix_pt->clean_up_memory();
1369  }
1370 
1371  // do the factorization
1372  superlu_dist_global_matrix(1, allow_permutations,
1373  ndof, nnz, Dist_value_pt, Dist_index_pt,
1374  Dist_start_pt, 0, Dist_nprow, Dist_npcol, doc,
1376  this->distribution_pt()
1377  ->communicator_pt()->mpi_comm());
1378 
1379  // Record that data is stored
1381  }
1382  // Otherwise throw an error
1383  else
1384  {
1385  std::ostringstream error_message_stream;
1386  error_message_stream << "SuperLUSolver implemented only for "
1387  << " CCDoubleMatrix, CRDoubleMatrix\n"
1388  << "and DistributedCRDoubleMatrix matrices\n";
1389  throw OomphLibError(error_message_stream.str(),
1390  OOMPH_CURRENT_FUNCTION,
1391  OOMPH_EXCEPTION_LOCATION);
1392  }
1393 
1394  // Throw an error if superLU returned an error status in info.
1395  if(Dist_info != 0)
1396  {
1397  std::ostringstream error_msg;
1398  error_msg << "SuperLU returned the error status code "
1399  << Dist_info
1400  << " . See the SuperLU documentation for what this means.";
1401  throw OomphLibError(error_msg.str(),
1402  OOMPH_CURRENT_FUNCTION,
1403  OOMPH_EXCEPTION_LOCATION);
1404  }
1405 }
1406 #endif
1407 
1408 //===================================================================
1409 ///\short LU decompose the matrix addressed by matrix_pt by using
1410 /// the SuperLU solver. The resulting matrix factors are stored
1411 /// internally.
1412 //===================================================================
1414 {
1415 #ifdef PARANOID
1416  // PARANOID check that if the matrix is distributable then it should not be
1417  // then it should not be distributed
1418  if (dynamic_cast<DistributableLinearAlgebraObject*>(matrix_pt) != 0)
1419  {
1420  if (dynamic_cast<DistributableLinearAlgebraObject*>
1421  (matrix_pt)->distributed())
1422  {
1423  std::ostringstream error_message_stream;
1424  error_message_stream
1425  << "The matrix must not be distributed.";
1426  throw OomphLibError(error_message_stream.str(),
1427  OOMPH_CURRENT_FUNCTION,
1428  OOMPH_EXCEPTION_LOCATION);
1429  }
1430  }
1431 #endif
1432 
1433  //Find # of degrees of freedom (variables)
1434  int n = matrix_pt->nrow();
1435 
1436  //Check that we have a square matrix
1437 #ifdef PARANOID
1438  int m = matrix_pt->ncol();
1439  if(n != m)
1440  {
1441  std::ostringstream error_message_stream;
1442  error_message_stream << "Can only solve for square matrices\n"
1443  << "N, M " << n << " " << m << std::endl;
1444 
1445  throw OomphLibError(error_message_stream.str(),
1446  OOMPH_CURRENT_FUNCTION,
1447  OOMPH_EXCEPTION_LOCATION);
1448  }
1449 #endif
1450 
1451  //Storage for the values, rows and column indices
1452  //required by SuplerLU
1453  double *value = 0;
1454  int *index=0, *start=0;
1455 
1456  //Integer used to represent compressed row or column format
1457  //Default compressed row
1458  int transpose = 0;
1459 
1460  //Number of non-zero entries in the matrix
1461  int nnz = 0;
1462 
1463  // Doc flag (convert to int for SuperLU)
1464  int doc = Doc_stats;
1465 
1466  //Is it a CR matrix
1467  if(dynamic_cast<CRDoubleMatrix*>(matrix_pt))
1468  {
1469  //Set the appropriate row flags
1471  transpose = 1;
1472  //Get a cast pointer to the matrix
1473  CRDoubleMatrix* CR_matrix_pt = dynamic_cast<CRDoubleMatrix*>(matrix_pt);
1474 
1475  //Now set the pointers to the interanally stored values
1476  //and indices
1477  nnz = CR_matrix_pt->nnz();
1478  value = CR_matrix_pt->value();
1479  index = CR_matrix_pt->column_index();
1480  start = CR_matrix_pt->row_start();
1481  }
1482  //Otherwise is it the compressed column version?
1483  else if(dynamic_cast<CCDoubleMatrix*>(matrix_pt))
1484  {
1485  //Set the compressed row flag to false
1487  //Get a cast pointer to the matrix
1488  CCDoubleMatrix* CC_matrix_pt = dynamic_cast<CCDoubleMatrix*>(matrix_pt);
1489 
1490  //Now set the pointers to the interanally stored values
1491  //and indices
1492  nnz = CC_matrix_pt->nnz();
1493  value = CC_matrix_pt->value();
1494  index = CC_matrix_pt->row_index();
1495  start = CC_matrix_pt->column_start();
1496  }
1497  //Otherwise throw and error
1498  else
1499  {
1500  throw OomphLibError("SuperLU only works with CR or CC Double matrices",
1501  OOMPH_CURRENT_FUNCTION,
1502  OOMPH_EXCEPTION_LOCATION);
1503  }
1504 
1505  // Clean up any previous storage so that if this is called twice with
1506  // the same matrix, we don't get a memory leak
1507  clean_up_memory();
1508 
1509  //Perform the lu decompose phase (i=1)
1510  int i=1;
1512  value, index, start,
1513  0, &n, &transpose, &doc,
1514  &Serial_f_factors,
1515  &Serial_info);
1516 
1517  // Throw an error if superLU returned an error status in info.
1518  if(Serial_info != 0)
1519  {
1520  std::ostringstream error_msg;
1521  error_msg << "SuperLU returned the error status code "
1522  << Serial_info
1523  << " . See the SuperLU documentation for what this means.";
1524  throw OomphLibError(error_msg.str(),
1525  OOMPH_CURRENT_FUNCTION,
1526  OOMPH_EXCEPTION_LOCATION);
1527  }
1528 
1529 
1530  //Set the number of degrees of freedom in the linear system
1531  Serial_n_dof = n;
1532 }
1533 
1534 //=============================================================================
1535 /// Do the backsubstitution for SuperLUSolver.
1536 /// Note - this method performs no paranoid checks - these are all performed in
1537 /// solve(...) and resolve(...)
1538 //=============================================================================
1540  DoubleVector &result)
1541 {
1542 #ifdef OOMPH_HAS_MPI
1543  if (Using_dist)
1544  {
1545  backsub_distributed(rhs,result);
1546  }
1547  else
1548 #endif
1549  {
1550  backsub_serial(rhs,result);
1551  }
1552 }
1553 
1554 #ifdef OOMPH_HAS_MPI
1555 //=========================================================================
1556 ///Static warning to suppress warnings about incorrect distribution of
1557 ///RHS vector. Default is false
1558 //=========================================================================
1560  =false;
1561 
1562 //=============================================================================
1563 /// Do the backsubstitution for SuperLU solver.
1564 /// Note - this method performs no paranoid checks - these are all performed in
1565 /// solve(...) and resolve(...)
1566 //=============================================================================
1568  DoubleVector &result)
1569 {
1570 #ifdef PARANOID
1571  // check that the rhs vector is setup
1572  if (!rhs.distribution_pt()->built())
1573  {
1574  std::ostringstream error_message_stream;
1575  error_message_stream
1576  << "The vectors rhs must be setup";
1577  throw OomphLibError(error_message_stream.str(),
1578  OOMPH_CURRENT_FUNCTION,
1579  OOMPH_EXCEPTION_LOCATION);
1580  }
1581 #endif
1582  // check that the rhs distribution is the same as the distribution as this
1583  // solver. If not redistribute and issue a warning
1584  LinearAlgebraDistribution rhs_distribution(rhs.distribution_pt());
1585  if (!(*rhs.distribution_pt() == *this->distribution_pt()))
1586  {
1588  {
1589  std::ostringstream warning_stream;
1590  warning_stream
1591  << "The distribution of rhs vector does not match that ofthe solver.\n";
1592  warning_stream
1593  << "The rhs will be redistributed, which is likely to be inefficient\n";
1594  warning_stream
1595  << "To remove this warning you can either:\n"
1596  << " i) Ensure that the rhs vector has the correct distribution\n"
1597  << " before calling the resolve() function\n"
1598  << "or ii) Set the flag \n"
1599  << " SuperLUSolver::Suppress_incorrect_rhs_distribution_warning_in_resolve\n"
1600  << " to be true\n\n";
1601 
1602  OomphLibWarning(warning_stream.str(),
1603  "SuperLUSolver::resolve()",
1604  OOMPH_EXCEPTION_LOCATION);
1605  }
1606 
1607  //Have to cast away const-ness (which tells us that we shouldn't really
1608  //be doing this!)
1609  const_cast<DoubleVector&>(rhs).redistribute(this->distribution_pt());
1610  }
1611 
1612 #ifdef PARANOID
1613  // if the result vector is setup then check it has the same distribution
1614  // as the rhs
1615  if (result.distribution_built())
1616  {
1617  if (!(*result.distribution_pt() == *rhs.distribution_pt()))
1618  {
1619  std::ostringstream error_message_stream;
1620  error_message_stream
1621  << "The result vector distribution has been setup; it must have the "
1622  << "same distribution as the rhs vector.";
1623  throw OomphLibError(error_message_stream.str(),
1624  OOMPH_CURRENT_FUNCTION,
1625  OOMPH_EXCEPTION_LOCATION);
1626  }
1627  }
1628 #endif
1629  // Doc (0/1) = (true/false)
1630  int doc = !Doc_stats;
1631 
1632  // Reset Info
1633  Dist_info=0;
1634 
1635  // number of DOFs
1636  int ndof = this->distribution_pt()->nrow();
1637 
1638  // Copy the rhs values to result
1639  result = rhs;
1640 
1641  // Do the backsubsitition phase
1643  {
1644  // Call distributed solver
1645  superlu_dist_distributed_matrix(2, -1, ndof, 0, 0, 0, 0, 0, 0,
1646  result.values_pt(), Dist_nprow,
1647  Dist_npcol, doc,
1649  this->distribution_pt()->
1650  communicator_pt()->mpi_comm());
1651  }
1653  {
1654  // Call global solver
1655  superlu_dist_global_matrix(2, -1, ndof, 0, 0, 0, 0, result.values_pt(),
1658  this->distribution_pt()->communicator_pt()->mpi_comm());
1659  }
1660  else
1661  {
1662  throw OomphLibError("The matrix factors have not been stored",
1663  OOMPH_CURRENT_FUNCTION,
1664  OOMPH_EXCEPTION_LOCATION);
1665  }
1666 
1667  // Throw an error if superLU returned an error status in info.
1668  if(Dist_info != 0)
1669  {
1670  std::ostringstream error_msg;
1671  error_msg << "SuperLU returned the error status code "
1672  << Dist_info << " . See the SuperLU documentation for what this means.";
1673  throw OomphLibError(error_msg.str(),
1674  OOMPH_CURRENT_FUNCTION,
1675  OOMPH_EXCEPTION_LOCATION);
1676  }
1677 
1678  //Redistribute to original distribution
1679  //Have to cast away const-ness (which tells us that we shouldn't really
1680  //be doing this!)
1681  const_cast<DoubleVector&>(rhs).redistribute(&rhs_distribution);
1682 }
1683 #endif
1684 
1685 //================================================================
1686 /// Do the backsubstitution for SuperLU
1687 //================================================================
1689  DoubleVector &result)
1690 {
1691  //Find the number of unknowns
1692  int n = rhs.nrow();
1693 
1694 #ifdef PARANOID
1695  // PARANOID check that this rhs distribution is setup
1696  if (!rhs.built())
1697  {
1698  std::ostringstream error_message_stream;
1699  error_message_stream
1700  << "The rhs vector distribution must be setup.";
1701  throw OomphLibError(error_message_stream.str(),
1702  OOMPH_CURRENT_FUNCTION,
1703  OOMPH_EXCEPTION_LOCATION);
1704  }
1705  // PARANOID check that the rhs has the right number of global rows
1706  if(static_cast<int>(Serial_n_dof) != n)
1707  {
1708  throw OomphLibError(
1709  "RHS does not have the same dimension as the linear system",
1710  OOMPH_CURRENT_FUNCTION,
1711  OOMPH_EXCEPTION_LOCATION);
1712  }
1713  // PARANOID check that the rhs is not distributed
1714  if (rhs.distribution_pt()->distributed())
1715  {
1716  std::ostringstream error_message_stream;
1717  error_message_stream
1718  << "The rhs vector must not be distributed.";
1719  throw OomphLibError(error_message_stream.str(),
1720  OOMPH_CURRENT_FUNCTION,
1721  OOMPH_EXCEPTION_LOCATION);
1722  }
1723  // PARANOID check that if the result is setup it matches the distribution
1724  // of the rhs
1725  if (result.built())
1726  {
1727  if (!(*rhs.distribution_pt() == *result.distribution_pt()))
1728  {
1729  std::ostringstream error_message_stream;
1730  error_message_stream
1731  << "If the result distribution is setup then it must be the same as the "
1732  << "rhs distribution";
1733  throw OomphLibError(error_message_stream.str(),
1734  OOMPH_CURRENT_FUNCTION,
1735  OOMPH_EXCEPTION_LOCATION);
1736  }
1737  }
1738 #endif
1739 
1740  // copy result to rhs
1741  result=rhs;
1742 
1743  //Number of RHSs
1744  int nrhs=1;
1745 
1746  //Cast the boolean flags to ints for SuperLU
1747  int transpose = Serial_compressed_row_flag;
1748  int doc = Doc_stats;
1749 
1750  //Do the backsubsitition phase
1751  int i=2;
1752  superlu(&i, &n, 0, &nrhs,
1753  0, 0, 0,
1754  result.values_pt(), &n, &transpose, &doc,
1756 
1757  // Throw an error if superLU returned an error status in info.
1758  if(Serial_info != 0)
1759  {
1760  std::ostringstream error_msg;
1761  error_msg << "SuperLU returned the error status code "
1762  << Serial_info
1763  << " . See the SuperLU documentation for what this means.";
1764  throw OomphLibError(error_msg.str(),
1765  OOMPH_CURRENT_FUNCTION,
1766  OOMPH_EXCEPTION_LOCATION);
1767  }
1768 }
1769 
1770 
1771 //=============================================================================
1772 /// Clean up the memory
1773 //=============================================================================
1775 {
1776  //If we have non-zero LU factors stored
1777  if(Serial_f_factors!=0)
1778  {
1779  //Clean up those factors
1780  int i=3;
1781  int transpose = Serial_compressed_row_flag;
1782  superlu(&i, 0, 0, 0, 0, 0, 0,
1783  0, 0, &transpose, 0,
1785 
1786  //Set the F_factors to zero
1787  Serial_f_factors=0;
1788  Serial_n_dof=0;
1789  }
1790 
1791 #ifdef OOMPH_HAS_MPI
1792  //If we have non-zero LU factors stored
1793  if(Dist_solver_data_pt!=0)
1794  {
1795  //Clean up any stored solver data
1796 
1797  // Doc (0/1) = (true/false)
1798  int doc = !Doc_stats;
1799 
1800  // Reset Info flag
1801  Dist_info=0;
1802 
1803  // number of DOFs
1804  int ndof = this->distribution_pt()->nrow();
1805 
1807  {
1808  superlu_dist_distributed_matrix(3, -1, ndof, 0, 0, 0, 0, 0, 0, 0,
1809  Dist_nprow, Dist_npcol, doc,
1811  &Dist_info,
1812  this->distribution_pt()->communicator_pt()
1813  ->mpi_comm());
1815  }
1817  {
1818  superlu_dist_global_matrix(3, -1, ndof, 0, 0, 0, 0, 0,
1819  Dist_nprow, Dist_npcol, doc,
1821  this->distribution_pt()->communicator_pt()
1822  ->mpi_comm());
1824  }
1825 
1827 
1828  // Delete internal copy of the matrix
1829  delete[] Dist_value_pt;
1830  delete[] Dist_index_pt;
1831  delete[] Dist_start_pt;
1832  Dist_value_pt=0;
1833  Dist_index_pt=0;
1834  Dist_start_pt=0;
1835 
1836  // and the distribution
1837  this->clear_distribution();
1838  }
1839 #endif
1840 }
1841 
1842 } //end of oomph namespace
int * column_index()
Access to C-style column index array.
Definition: matrices.h:1056
Class of matrices containing doubles, and stored as a DenseMatrix<double>, but with solving functiona...
Definition: matrices.h:1234
void clean_up_memory()
Clean up the memory allocated by the solver.
bool built() const
unsigned nrow_local() const
access function for the num of local rows on this processor.
void solve(Problem *const &problem_pt, DoubleVector &result)
Solver: Takes pointer to problem and returns the results Vector which contains the solution of the li...
void multiply_transpose(const DoubleVector &x, DoubleVector &soln) const
Multiply the transposed matrix by the vector x: soln=A^T x.
Definition: matrices.cc:729
void * Dist_solver_data_pt
Storage for the LU factors and other data required by SuperLU.
unsigned long nrow() const
Return the number of rows of the matrix.
Definition: matrices.h:2610
void solve(Problem *const &problem_pt, DoubleVector &result)
Solver: Takes pointer to problem and returns the results Vector which contains the solution of the li...
void factorise_distributed(DoubleMatrixBase *const &matrix_pt)
factorise method for SuperLU Dist
int Sign_of_determinant_of_matrix
Sign of the determinant of the matrix (obtained during the LU decomposition)
bool Using_dist
boolean flag indicating whether superlu dist is being used
unsigned first_row() const
access function for the first row on this processor
void redistribute(const LinearAlgebraDistribution *const &dist_pt)
The contents of the vector are redistributed to match the new distribution. In a non-MPI rebuild this...
bool Dist_global_solve_data_allocated
Flag is true if solve data has been generated for a global matrix.
double * values_pt()
access function to the underlying values
int Serial_info
Info flag for the SuperLU solver.
cstr elem_len * i
Definition: cfortran.h:607
void backsub_distributed(const DoubleVector &rhs, DoubleVector &result)
backsub method for SuperLU Dist
bool Dist_delete_matrix_data
Delete_matrix_data flag. SuperLU_dist needs its own copy of the input matrix, therefore a copy must b...
unsigned nrow() const
access function to the number of global rows.
int * row_index()
Access to C-style row index array.
Definition: matrices.h:2492
bool Enable_resolve
Boolean that indicates whether the matrix (or its factors, in the case of direct solver) should be st...
Definition: linear_solver.h:80
The Problem class.
Definition: problem.h:152
int * Dist_start_pt
Pointers for storage of matrix column or row starts.
int Serial_sign_of_determinant_of_matrix
Sign of the determinant of the matrix.
bool Gradient_has_been_computed
flag that indicates whether the gradient was computed or not
Definition: linear_solver.h:91
bool Dist_distributed_solve_data_allocated
Flag is true if solve data has been generated for distributed matrix.
bool distributed() const
distribution is serial or distributed
static OomphCommunicator * communicator_pt()
access to the global oomph-lib communicator
OomphInfo oomph_info
static bool mpi_has_been_initialised()
return true if MPI has been initialised
OomphCommunicator * communicator_pt()
access function to the oomph-lib communicator
Definition: problem.h:1221
double * LU_factors
Pointer to storage for the LU decomposition.
bool distributed() const
access function to the distributed - indicates whether the distribution is serial or distributed ...
double * value()
Access to C-style value array.
Definition: matrices.h:1062
int * Dist_index_pt
Pointer for storage of matrix rows or column indices required by SuperLU_DIST.
LinearAlgebraDistribution * distribution_pt() const
access to the LinearAlgebraDistribution
bool Doc_stats
Set to true to output statistics (false by default).
void backsub_serial(const DoubleVector &rhs, DoubleVector &result)
backsub method for SuperLU (serial)
void backsub(const DoubleVector &rhs, DoubleVector &result)
Do the backsubstitution step to solve the system LU result = rhs.
unsigned long nnz() const
Return the number of nonzero entries (the local nnz)
Definition: matrices.h:1068
Describes the distribution of a distributable linear algebra type object. Typically this is a contain...
void clear()
clear
Definition: matrices.cc:1677
void build(const DoubleVector &old_vector)
Just copys the argument DoubleVector.
int Dist_nprow
Number of rows for the process grid.
Base class for any linear algebra object that is distributable. Just contains storage for the LinearA...
bool Doc_time
Boolean flag that indicates whether the time taken.
Definition: linear_solver.h:84
void factorise(DoubleMatrixBase *const &matrix_pt)
Perform the LU decomposition of the matrix.
DoubleVector Gradient_for_glob_conv_newton_solve
DoubleVector storing the gradient for the globally convergent Newton method.
Definition: linear_solver.h:95
void start(const unsigned &i)
(Re-)start i-th timer
int & sign_of_jacobian()
Access function for the sign of the global jacobian matrix. This will be set by the linear solver...
Definition: problem.h:2301
unsigned long nrow() const
Return the number of rows of the matrix.
Definition: matrices.h:992
void superlu_dist_global_matrix(int opt_flag, int allow_permutations, int n, int nnz, double *values, int *row_index, int *col_start, double *b, int nprow, int npcol, int doc, void **data, int *info, MPI_Comm comm)
unsigned long ndof() const
Return the number of dofs.
Definition: problem.h:1574
unsigned nrow() const
access function to the number of global rows.
int superlu(int *, int *, int *, int *, double *, int *, int *, double *, int *, int *, int *, void *, int *)
double Jacobian_setup_time
Jacobian setup time.
void multiply_transpose(const DoubleVector &x, DoubleVector &soln) const
Multiply the transposed matrix by the vector x: soln=A^T x.
Definition: matrices.cc:1907
bool Dist_allow_row_and_col_permutations
If true then SuperLU_DIST is allowed to permute matrix rows and columns during factorisation. This is the default for SuperLU_DIST, and can lead to significantly faster solves, but has been known to fail, hence the default value is 0.
int Dist_info
Info flag for the SuperLU solver.
void get_fd_jacobian(DoubleVector &residuals, DenseMatrix< double > &jacobian)
Return the fully-assembled Jacobian and residuals, generated by finite differences.
Definition: problem.cc:7804
void resolve(const DoubleVector &rhs, DoubleVector &result)
Resolve the system defined by the last assembled jacobian and the specified rhs vector if resolve has...
double timer()
returns the time in seconds after some point in past
Type Solver_type
the solver type. see SuperLU_solver_type for details.
void clear_distribution()
clear the distribution of this distributable linear algebra object
double Solution_time
Solution time.
bool built() const
access function to the Built flag - indicates whether the matrix has been build - i...
Definition: matrices.h:1177
void superlu_cr_to_cc(int nrow, int ncol, int nnz, double *cr_values, int *cr_index, int *cr_start, double **cc_values, int **cc_index, int **cc_start)
Definition: superlu_dist.c:65
double * Dist_value_pt
Pointer for storage of the matrix values required by SuperLU_DIST.
int Dist_npcol
Number of columns for the process grid.
void clean_up_memory()
Clean up the stored LU factors.
A class for compressed column matrices that store doubles.
Definition: matrices.h:2573
virtual void get_jacobian(DoubleVector &residuals, DenseDoubleMatrix &jacobian)
Return the fully-assembled Jacobian and residuals for the problem Interface for the case when the Jac...
Definition: problem.cc:3851
virtual unsigned long nrow() const =0
Return the number of rows of the matrix.
unsigned long Serial_n_dof
The number of unknowns in the linear system.
bool Suppress_solve
Suppress solve?
void solve(Problem *const &problem_pt, DoubleVector &result)
Solver: Takes pointer to problem and returns the results Vector which contains the solution of the li...
void build_distribution(const LinearAlgebraDistribution *const dist_pt)
setup the distribution of this distributable linear algebra object
void backsub(const DoubleVector &rhs, DoubleVector &result)
Do the backsubstitution for SuperLU solver Note: returns the global result Vector.
void factorise(DoubleMatrixBase *const &matrix_pt)
Do the factorisation stage Note: if Delete_matrix_data is true the function matrix_pt->clean_up_memor...
double Jacobian_setup_time
Jacobian setup time.
static bool Suppress_incorrect_rhs_distribution_warning_in_resolve
Static flag that determines whether the warning about incorrect distribution of RHSs will be printed ...
int * column_start()
Access to C-style column_start array.
Definition: matrices.h:2486
OomphCommunicator * communicator_pt() const
const access to the communicator pointer
unsigned long nnz() const
Return the number of nonzero entries.
Definition: matrices.h:630
void * Serial_f_factors
Storage for the LU factors as required by SuperLU.
A vector in the mathematical sense, initially developed for linear algebra type applications. If MPI then this vector can be distributed - its distribution is described by the LinearAlgebraDistribution object at Distribution_pt. Data is stored in a C-style pointer vector (double*)
Definition: double_vector.h:61
bool Compute_gradient
flag that indicates whether the gradient required for the globally convergent Newton method should be...
Definition: linear_solver.h:88
int * row_start()
Access to C-style row_start array.
Definition: matrices.h:1050
double Solution_time
Solution time.
bool Dist_use_global_solver
Flag that determines whether the MPIProblem based solve function uses the global or distributed versi...
Abstract base class for matrices of doubles – adds abstract interfaces for solving, LU decomposition and multiplication by vectors.
Definition: matrices.h:275
void superlu_dist_distributed_matrix(int opt_flag, int allow_permutations, int n, int nnz_local, int nrow_local, int first_row, double *values, int *col_index, int *row_start, double *b, int nprow, int npcol, int doc, void **data, int *info, MPI_Comm comm)
Definition: superlu_dist.c:106
void clean_up_memory()
Wipe matrix data and set all values to 0.
Definition: matrices.h:2950
A class for compressed row matrices. This is a distributable object.
Definition: matrices.h:872
bool Serial_compressed_row_flag
Use compressed row version?
virtual unsigned long ncol() const =0
Return the number of columns of the matrix.
long * Index
Pointer to storage for the index of permutations in the LU solve.
T * value()
Access to C-style value array.
Definition: matrices.h:618
void factorise_serial(DoubleMatrixBase *const &matrix_pt)
factorise method for SuperLU (serial)
An oomph-lib wrapper to the MPI_Comm communicator object. Just contains an MPI_Comm object (which is ...
Definition: communicator.h:57