SRC/superlu_defs.h File Reference

Definitions which are precision-neutral. More...

#include <mpi.h>
#include <stdlib.h>
#include <stdio.h>
#include "superlu_enum_consts.h"
#include "Cnames.h"
#include "supermatrix.h"
#include "util_dist.h"
#include "psymbfact.h"

Go to the source code of this file.

Data Types

type  superlu_scope_t
type  gridinfo_t
type  Glu_persist_t
type  Glu_freeable_t
type  ScalePermstruct_t
type  superlu_options_t
type  mem_usage_t

Defines

#define mpi_int_t   MPI_INT
#define BC_HEADER   2
#define LB_DESCRIPTOR   2
#define BR_HEADER   3
#define UB_DESCRIPTOR   2
#define NBUFFERS   5
#define NTAGS   10000
#define UjROW   10
#define UkSUB   11
#define UkVAL   12
#define LkSUB   13
#define LkVAL   14
#define LkkDIAG   15
#define XK_H   2
#define LSUM_H   2
#define GSUM   20
#define Xk   21
#define Yk   22
#define LSUM   23
#define COMM_ALL   100
#define COMM_COLUMN   101
#define COMM_ROW   102
#define SUPER_LINEAR   11
#define SUPER_BLOCK   12
#define NO_MARKER   3
#define IAM(comm)   { int rank; MPI_Comm_rank ( comm, &rank ); rank};
#define MYROW(iam, grid)   ( (iam) / grid->npcol )
#define MYCOL(iam, grid)   ( (iam) % grid->npcol )
#define BlockNum(i)   ( supno[i] )
#define FstBlockC(bnum)   ( xsup[bnum] )
#define SuperSize(bnum)   ( xsup[bnum+1]-xsup[bnum] )
#define LBi(bnum, grid)   ( (bnum)/grid->nprow )
#define LBj(bnum, grid)   ( (bnum)/grid->npcol )
#define PROW(bnum, grid)   ( (bnum) % grid->nprow )
#define PCOL(bnum, grid)   ( (bnum) % grid->npcol )
#define PNUM(i, j, grid)   ( (i)*grid->npcol + j )
#define CEILING(a, b)   ( ((a)%(b)) ? ((a)/(b) + 1) : ((a)/(b)) )
#define RHS_ITERATE(i)   for (i = 0; i < nrhs; ++i)
#define X_BLK(i)   ilsum[i] * nrhs + (i+1) * XK_H
#define LSUM_BLK(i)   ilsum[i] * nrhs + (i+1) * LSUM_H
#define SuperLU_timer_   SuperLU_timer_dist_
#define LOG2(x)   (log10((double) x) / log10(2.0))
#define VT_TRACEON
#define VT_TRACEOFF

Typedefs

typedef int int_t

Functions/Subroutines

void set_default_options_dist (superlu_options_t *)
 Set the default values for the options argument.
void print_options_dist (superlu_options_t *)
 Print the options setting.
void Destroy_CompCol_Matrix_dist (SuperMatrix *)
void Destroy_SuperNode_Matrix_dist (SuperMatrix *)
void Destroy_SuperMatrix_Store_dist (SuperMatrix *)
 Deallocate the structure pointing to the actual storage of the matrix.
void Destroy_CompCol_Permuted_dist (SuperMatrix *)
 A is of type Stype==NCP.
void Destroy_CompRowLoc_Matrix_dist (SuperMatrix *)
void Destroy_CompRow_Matrix_dist (SuperMatrix *)
void sp_colorder (superlu_options_t *, SuperMatrix *, int_t *, int_t *, SuperMatrix *)
int_t sp_coletree_dist (int_t *, int_t *, int_t *, int_t, int_t, int_t *)
 Nonsymmetric elimination tree.
void countnz_dist (const int_t, int_t *, int_t *, int_t *, Glu_persist_t *, Glu_freeable_t *)
int_t fixupL_dist (const int_t, const int_t *, Glu_persist_t *, Glu_freeable_t *)
int_tTreePostorder_dist (int_t, int_t *)
float slamch_ (char *)
double dlamch_ (char *)
void * superlu_malloc_dist (size_t)
void superlu_free_dist (void *)
int_tintMalloc_dist (int_t)
int_tintCalloc_dist (int_t)
double SuperLU_timer_ ()
void superlu_abort_and_exit_dist (char *)
int_t sp_ienv_dist (int_t)
 Purpose =======
int lsame_ (char *, char *)
int xerbla_ (char *, int *)
void ifill_dist (int_t *, int_t, int_t)
 Fills an integer array with a given value.
void super_stats_dist (int_t, int_t *)
void ScalePermstructInit (const int_t, const int_t, ScalePermstruct_t *)
 Allocate storage in ScalePermstruct.
void ScalePermstructFree (ScalePermstruct_t *)
 Deallocate ScalePermstruct.
void superlu_gridinit (MPI_Comm, int_t, int_t, gridinfo_t *)
 All processes in the MPI communicator must call this routine.
void superlu_gridmap (MPI_Comm, int_t, int_t, int_t[], int_t, gridinfo_t *)
 All processes in the MPI communicator must call this routine.
void superlu_gridexit (gridinfo_t *)
void get_perm_c_dist (int_t, int_t, SuperMatrix *, int_t *)
void a_plus_at_dist (const int_t, const int_t, int_t *, int_t *, int_t *, int_t **, int_t **)
void bcast_tree (void *, int, MPI_Datatype, int, int, gridinfo_t *, int, int *)
int_t symbfact (superlu_options_t *, int, SuperMatrix *, int_t *, int_t *, Glu_persist_t *, Glu_freeable_t *)
int_t symbfact_SubInit (fact_t, void *, int_t, int_t, int_t, int_t, Glu_persist_t *, Glu_freeable_t *)
int_t symbfact_SubXpand (int_t, int_t, int_t, MemType, int_t *, Glu_freeable_t *)
int_t symbfact_SubFree (Glu_freeable_t *)
void get_diag_procs (int_t, Glu_persist_t *, gridinfo_t *, int_t *, int_t **, int_t **)
int_t QuerySpace_dist (int_t, int_t, Glu_freeable_t *, mem_usage_t *)
void pxerbla (char *, gridinfo_t *, int_t)
void PStatInit (SuperLUStat_t *)
void PStatFree (SuperLUStat_t *)
void PStatPrint (superlu_options_t *, SuperLUStat_t *, gridinfo_t *)
float symbfact_dist (int, int, SuperMatrix *, int_t *, int_t *, int_t *, int_t *, Pslu_freeable_t *, MPI_Comm *, MPI_Comm *, mem_usage_t *)
float get_perm_c_parmetis (SuperMatrix *, int_t *, int_t *, int, int, int_t **, int_t **, gridinfo_t *, MPI_Comm *)
int_t psymbfact_LUXpandMem (int_t, int_t, int_t, int_t, int_t, int_t, int_t, int_t, Pslu_freeable_t *, Llu_symbfact_t *, vtcsInfo_symbfact_t *, psymbfact_stat_t *)
int_t psymbfact_LUXpand (int_t, int_t, int_t, int_t, int_t *, int_t, int_t, int_t, int_t, Pslu_freeable_t *, Llu_symbfact_t *, vtcsInfo_symbfact_t *, psymbfact_stat_t *)
int_t psymbfact_LUXpand_RL (int_t, int_t, int_t, int_t, int_t, int_t, Pslu_freeable_t *, Llu_symbfact_t *, vtcsInfo_symbfact_t *, psymbfact_stat_t *)
int_t psymbfact_prLUXpand (int_t, int_t, MemType, Llu_symbfact_t *, psymbfact_stat_t *)
void print_panel_seg_dist (int_t, int_t, int_t, int_t, int_t *, int_t *)
 Diagnostic print of segment info after panel_dfs().
void check_repfnz_dist (int_t, int_t, int_t, int_t *)
 Check whether repfnz[] == EMPTY after reset.
int_t CheckZeroDiagonal (int_t, int_t *, int_t *, int_t *)
void PrintDouble5 (char *, int_t, double *)
void PrintInt10 (char *, int_t, int_t *)
int file_PrintInt10 (FILE *, char *, int_t, int_t *)


Detailed Description

 -- Distributed SuperLU routine (version 2.2) --
 Lawrence Berkeley National Lab, Univ. of California Berkeley.
 November 1, 2007
 Feburary 20, 2008
 

Define Documentation

#define BC_HEADER   2

#define BlockNum (  )     ( supno[i] )

#define BR_HEADER   3

#define CEILING ( a,
 )     ( ((a)%(b)) ? ((a)/(b) + 1) : ((a)/(b)) )

#define COMM_ALL   100

#define COMM_COLUMN   101

#define COMM_ROW   102

#define FstBlockC ( bnum   )     ( xsup[bnum] )

#define GSUM   20

#define IAM ( comm   )     { int rank; MPI_Comm_rank ( comm, &rank ); rank};

#define LB_DESCRIPTOR   2

#define LBi ( bnum,
grid   )     ( (bnum)/grid->nprow )

#define LBj ( bnum,
grid   )     ( (bnum)/grid->npcol )

#define LkkDIAG   15

#define LkSUB   13

#define LkVAL   14

#define LOG2 (  )     (log10((double) x) / log10(2.0))

#define LSUM   23

#define LSUM_BLK (  )     ilsum[i] * nrhs + (i+1) * LSUM_H

#define LSUM_H   2

#define mpi_int_t   MPI_INT

#define MYCOL ( iam,
grid   )     ( (iam) % grid->npcol )

#define MYROW ( iam,
grid   )     ( (iam) / grid->npcol )

#define NBUFFERS   5

#define NO_MARKER   3

#define NTAGS   10000

#define PCOL ( bnum,
grid   )     ( (bnum) % grid->npcol )

#define PNUM ( i,
j,
grid   )     ( (i)*grid->npcol + j )

#define PROW ( bnum,
grid   )     ( (bnum) % grid->nprow )

#define RHS_ITERATE (  )     for (i = 0; i < nrhs; ++i)

#define SUPER_BLOCK   12

#define SUPER_LINEAR   11

#define SuperLU_timer_   SuperLU_timer_dist_

#define SuperSize ( bnum   )     ( xsup[bnum+1]-xsup[bnum] )

#define UB_DESCRIPTOR   2

#define UjROW   10

#define UkSUB   11

#define UkVAL   12

#define VT_TRACEOFF

#define VT_TRACEON

#define X_BLK (  )     ilsum[i] * nrhs + (i+1) * XK_H

#define Xk   21

#define XK_H   2

#define Yk   22


Typedef Documentation

typedef int int_t


Function Documentation

void a_plus_at_dist ( const   int_t,
const   int_t,
int_t ,
int_t ,
int_t ,
int_t **  ,
int_t **   
)

void bcast_tree ( void *  buf,
int  count,
MPI_Datatype  dtype,
int  root,
int  tag,
gridinfo_t grid,
int  scope,
int *  recvcnt 
)

 Purpose
 =======
   Broadcast an array of *dtype* numbers. The communication pattern
   is a tree with number of branches equal to NBRANCHES.
   The process ranks are between 0 and Np-1.

   The following two pairs of graphs give different ways of viewing the same
   algorithm.  The first pair shows the trees as they should be visualized
   when examining the algorithm.  The second pair are isomorphic graphs of
   of the first, which show the actual pattern of data movement.
   Note that a tree broadcast with NBRANCHES = 2 is isomorphic with a
   hypercube broadcast (however, it does not require the nodes be a
   power of two to work).

    TREE BROADCAST, NBRANCHES = 2     *    TREE BROADCAST, NBRANCHES = 3

     root=2
 i=4   &______________                *
       |              \               *       root=2
 i=2   &______         &______        * i=3     &______________________
       |      \        |      \       *         |          \           \
 i=1   &__     &__     &__     &__    * i=1     &______     &______     &__
       |  \    |  \    |  \    |  \   *         |  \   \    |  \   \    |  \
       2   3   4   5   6   7   0   1  *         2   3   4   5   6   7   0   1

          ISOMORPHIC GRAPHS OF ABOVE, SHOWN IN MORE FAMILIAR TERMS:

                2                                           2
       _________|_________                       ___________|____________
      /         |         \                     /           |      |     \
     6          4          3                   5            0      3      4
    / \         |                             / \           |
   0   7        5                            6   7          1
   |
   1

 Arguments
 =========

 scope
 

void check_repfnz_dist ( int_t  ,
int_t  ,
int_t  ,
int_t  
)

int_t CheckZeroDiagonal ( int_t  ,
int_t ,
int_t ,
int_t  
)

void countnz_dist ( const int_t  n,
int_t xprune,
int_t nnzL,
int_t nnzU,
Glu_persist_t Glu_persist,
Glu_freeable_t Glu_freeable 
)

 Count the total number of nonzeros in factors L and U,  and in the 
 symmetrically reduced L. 
 

void Destroy_CompCol_Matrix_dist ( SuperMatrix  ) 

void Destroy_CompCol_Permuted_dist ( SuperMatrix  ) 

void Destroy_CompRow_Matrix_dist ( SuperMatrix  ) 

void Destroy_CompRowLoc_Matrix_dist ( SuperMatrix  ) 

void Destroy_SuperMatrix_Store_dist ( SuperMatrix  ) 

void Destroy_SuperNode_Matrix_dist ( SuperMatrix  ) 

double dlamch_ ( char *  cmach  ) 

    Purpose   
    =======

    DLAMCH determines double precision machine parameters.

    Arguments   
    =========

    CMACH   (input) CHARACTER*1   
            Specifies the value to be returned by DLAMCH:   
            = 'E' or 'e',   DLAMCH := eps   
            = 'S' or 's ,   DLAMCH := sfmin   
            = 'B' or 'b',   DLAMCH := base   
            = 'P' or 'p',   DLAMCH := eps*base   
            = 'N' or 'n',   DLAMCH := t   
            = 'R' or 'r',   DLAMCH := rnd   
            = 'M' or 'm',   DLAMCH := emin   
            = 'U' or 'u',   DLAMCH := rmin   
            = 'L' or 'l',   DLAMCH := emax   
            = 'O' or 'o',   DLAMCH := rmax

            where

            eps   = relative machine precision   
            sfmin = safe minimum, such that 1/sfmin does not overflow   
            base  = base of the machine   
            prec  = eps*base   
            t     = number of (base) digits in the mantissa   
            rnd   = 1.0 when rounding occurs in addition, 0.0 otherwise   
            emin  = minimum exponent before (gradual) underflow   
            rmin  = underflow threshold - base**(emin-1)   
            emax  = largest exponent before overflow   
            rmax  = overflow threshold  - (base**emax)*(1-eps)

   ===================================================================== 

int file_PrintInt10 ( FILE *  ,
char *  ,
int_t  ,
int_t  
)

int_t fixupL_dist ( const int_t  n,
const int_t perm_r,
Glu_persist_t Glu_persist,
Glu_freeable_t Glu_freeable 
)

 Fix up the data storage lsub for L-subscripts. It removes the subscript
 sets for structural pruning,	and applies permuation to the remaining
 subscripts.
 

void get_diag_procs ( int_t  ,
Glu_persist_t ,
gridinfo_t ,
int_t ,
int_t **  ,
int_t **   
)

void get_perm_c_dist ( int_t  pnum,
int_t  ispec,
SuperMatrix A,
int_t perm_c 
)

 Purpose
 =======

 GET_PERM_C_DIST obtains a permutation matrix Pc, by applying the multiple
 minimum degree ordering code by Joseph Liu to matrix A'*A or A+A',
 or using approximate minimum degree column ordering by Davis et. al.
 The LU factorization of A*Pc tends to have less fill than the LU 
 factorization of A.

 Arguments
 =========

 ispec   (input) colperm_t
         Specifies what type of column permutation to use to reduce fill.
         = NATURAL: natural ordering (i.e., Pc = I)
         = MMD_AT_PLUS_A: minimum degree ordering on structure of A'+A
         = MMD_ATA: minimum degree ordering on structure of A'*A
         = METIS_AT_PLUS_A: MeTis on A'+A

 A       (input) SuperMatrix*
         Matrix A in A*X=B, of dimension (A->nrow, A->ncol). The number
         of the linear equations is A->nrow. Currently, the type of A 
         can be: Stype = SLU_NC; Dtype = SLU_D; Mtype = SLU_GE.
         In the future, more general A can be handled.

 perm_c  (output) int*
	   Column permutation vector of size A->ncol, which defines the 
         permutation matrix Pc; perm_c[i] = j means column i of A is 
         in position j in A*Pc.
 

float get_perm_c_parmetis ( SuperMatrix A,
int_t perm_r,
int_t perm_c,
int  nprocs_i,
int  noDomains,
int_t **  sizes,
int_t **  fstVtxSep,
gridinfo_t grid,
MPI_Comm *  metis_comm 
)

 Purpose
 =======

 GET_PERM_C_PARMETIS obtains a permutation matrix Pc, by applying a
 graph partitioning algorithm to the symmetrized graph A+A'.  The
 multilevel graph partitioning algorithm used is the
 ParMETIS_V3_NodeND routine available in the parallel graph
 partitioning package parMETIS.

 The number of independent sub-domains noDomains computed by this
 algorithm has to be a power of 2.  Hence noDomains is the larger
 number power of 2 that is smaller than nprocs_i, where nprocs_i = nprow
 * npcol is the number of processors used in SuperLU_DIST.

 Arguments
 =========

 A       (input) SuperMatrix*
         Matrix A in A*X=B, of dimension (A->nrow, A->ncol). The number
         of the linear equations is A->nrow.  Matrix A is distributed
         in NRformat_loc format.

 perm_r  (input) int_t*
         Row permutation vector of size A->nrow, which defines the 
         permutation matrix Pr; perm_r[i] = j means row i of A is in 
         position j in Pr*A.

 perm_c  (output) int_t*
	   Column permutation vector of size A->ncol, which defines the 
         permutation matrix Pc; perm_c[i] = j means column i of A is 
         in position j in A*Pc.

 nprocs_i (input) int*
         Number of processors the input matrix is distributed on in a block
         row format.  It corresponds to number of processors used in
         SuperLU_DIST.

 noDomains (input) int*, must be power of 2
         Number of independent domains to be computed by the graph
         partitioning algorithm.  ( noDomains <= nprocs_i )

 sizes   (output) int_t**, of size 2 * noDomains
         Returns pointer to an array containing the number of nodes
         for each sub-domain and each separator.  Separators are stored 
         from left to right.
         Memory for the array is allocated in this routine.

 fstVtxSep (output) int_t**, of size 2 * noDomains
         Returns pointer to an array containing first node for each
         sub-domain and each separator.
         Memory for the array is allocated in this routine.

 Return value
 ============
   < 0, number of bytes allocated on return from the symbolic factorization.
   > 0, number of bytes allocated when out of memory.
 

void ifill_dist ( int_t ,
int_t  ,
int_t   
)

int_t* intCalloc_dist ( int_t   ) 

int_t* intMalloc_dist ( int_t   ) 

int lsame_ ( char *  ca,
char *  cb 
)

    Purpose   
    =======

    LSAME returns .TRUE. if CA is the same letter as CB regardless of case.

    Arguments   
    =========

    CA      (input) CHARACTER*1   
    CB      (input) CHARACTER*1   
            CA and CB specify the single characters to be compared.

   ===================================================================== 

void print_options_dist ( superlu_options_t  ) 

void print_panel_seg_dist ( int_t  ,
int_t  ,
int_t  ,
int_t  ,
int_t ,
int_t  
)

void PrintDouble5 ( char *  ,
int_t  ,
double *   
)

void PrintInt10 ( char *  ,
int_t  ,
int_t  
)

void PStatFree ( SuperLUStat_t  ) 

void PStatInit ( SuperLUStat_t  ) 

void PStatPrint ( superlu_options_t ,
SuperLUStat_t ,
gridinfo_t  
)

int_t psymbfact_LUXpand ( int_t  iam,
int_t  n,
int_t  fstVtxLvl_loc,
int_t  vtxXp,
int_t p_next,
int_t  min_new_len,
int_t  mem_type,
int_t  rout_type,
int_t  free_prev_mem,
Pslu_freeable_t Pslu_freeable,
Llu_symbfact_t Llu_symbfact,
vtcsInfo_symbfact_t VInfo,
psymbfact_stat_t PS 
)

 Expand the data structures for L and U during the factorization.
 Return value: SUCCES_RET - successful return
               ERROR_RET - error due to a memory alocation failure
 

int_t psymbfact_LUXpand_RL ( int_t  iam,
int_t  n,
int_t  vtxXp,
int_t  next,
int_t  len_texp,
int_t  mem_type,
Pslu_freeable_t Pslu_freeable,
Llu_symbfact_t Llu_symbfact,
vtcsInfo_symbfact_t VInfo,
psymbfact_stat_t PS 
)

 Expand the data structures for L and U during the factorization.
 Return value:   0 - successful return
               > 0 - number of bytes allocated when run out of space
 

int_t psymbfact_LUXpandMem ( int_t  iam,
int_t  n,
int_t  vtxXp,
int_t  next,
int_t  min_new_len,
int_t  mem_type,
int_t  rout_type,
int_t  free_prev_mem,
Pslu_freeable_t Pslu_freeable,
Llu_symbfact_t Llu_symbfact,
vtcsInfo_symbfact_t VInfo,
psymbfact_stat_t PS 
)

 Expand the data structures for L and U during the factorization.
 Return value:   0 - successful return
               > 0 - number of bytes allocated when run out of space
 

int_t psymbfact_prLUXpand ( int_t  iam,
int_t  min_new_len,
MemType  mem_type,
Llu_symbfact_t Llu_symbfact,
psymbfact_stat_t PS 
)

 Expand the data structures for L and U pruned during the factorization.
 Return value: SUCCES_RET - successful return
               ERROR_RET - error when run out of space
 

void pxerbla ( char *  ,
gridinfo_t ,
int_t   
)

int_t QuerySpace_dist ( int_t  n,
int_t  lsub_size,
Glu_freeable_t Glu_freeable,
mem_usage_t mem_usage 
)

 mem_usage consists of the following fields:
  • for_lu (float) The amount of space used in bytes for the L data structures.
  • total (float) The amount of space needed in bytes to perform factorization.
  • expansions (int) Number of memory expansions during the LU factorization.

void ScalePermstructFree ( ScalePermstruct_t  ) 

void ScalePermstructInit ( const   int_t,
const   int_t,
ScalePermstruct_t  
)

void set_default_options_dist ( superlu_options_t  ) 

float slamch_ ( char *  cmach  ) 

    Purpose   
    =======

    SLAMCH determines single precision machine parameters.

    Arguments   
    =========

    CMACH   (input) CHARACTER*1   
            Specifies the value to be returned by SLAMCH:   
            = 'E' or 'e',   SLAMCH := eps   
            = 'S' or 's ,   SLAMCH := sfmin   
            = 'B' or 'b',   SLAMCH := base   
            = 'P' or 'p',   SLAMCH := eps*base   
            = 'N' or 'n',   SLAMCH := t   
            = 'R' or 'r',   SLAMCH := rnd   
            = 'M' or 'm',   SLAMCH := emin   
            = 'U' or 'u',   SLAMCH := rmin   
            = 'L' or 'l',   SLAMCH := emax   
            = 'O' or 'o',   SLAMCH := rmax

            where

            eps   = relative machine precision   
            sfmin = safe minimum, such that 1/sfmin does not overflow   
            base  = base of the machine   
            prec  = eps*base   
            t     = number of (base) digits in the mantissa   
            rnd   = 1.0 when rounding occurs in addition, 0.0 otherwise   
            emin  = minimum exponent before (gradual) underflow   
            rmin  = underflow threshold - base**(emin-1)   
            emax  = largest exponent before overflow   
            rmax  = overflow threshold  - (base**emax)*(1-eps)

   ===================================================================== 

int_t sp_coletree_dist ( int_t acolst,
int_t acolend,
int_t arow,
int_t  nr,
int_t  nc,
int_t parent 
)

      Find the elimination tree for A'*A.
      This uses something similar to Liu's algorithm. 
      It runs in time O(nz(A)*log n) and does not form A'*A.

      Input:
        Sparse matrix A.  Numeric values are ignored, so any
        explicit zeros are treated as nonzero.
      Output:
        Integer array of parents representing the elimination
        tree of the symbolic product A'*A.  Each vertex is a
        column of A, and nc means a root of the elimination forest.

      John R. Gilbert, Xerox, 10 Dec 1990
      Based on code by JRG dated 1987, 1988, and 1990.
 

void sp_colorder ( superlu_options_t options,
SuperMatrix A,
int_t perm_c,
int_t etree,
SuperMatrix AC 
)

 Purpose
 =======

 sp_colorder() permutes the columns of the original matrix. It performs
 the following steps:

    1. Apply column permutation perm_c[] to A's column pointers to form AC;

    2. If options->Fact = DOFACT, then
       (1) Compute column elimination tree etree[] of AC'AC;
       (2) Post order etree[] to get a postordered elimination tree etree[],
           and a postorder permutation post[];
       (3) Apply post[] permutation to columns of AC;
       (4) Overwrite perm_c[] with the product perm_c * post.

 Arguments
 =========

 options (input) superlu_options_t*
         Specifies whether or not the elimination tree will be re-used.
         If options->Fact == DOFACT, this means first time factor A, 
         etree is computed and output.
         Otherwise, re-factor A, etree is input, unchanged on exit.

 A       (input) SuperMatrix*
         Matrix A in A*X=B, of dimension (A->nrow, A->ncol). The number
         of the linear equations is A->nrow. Currently, the type of A can be:
         Stype = SLU_NC or SLU_NCP; Dtype = SLU__D; Mtype = SLU_GE.
         In the future, more general A can be handled.

 perm_c  (input/output) int*
	   Column permutation vector of size A->ncol, which defines the 
         permutation matrix Pc; perm_c[i] = j means column i of A is 
         in position j in A*Pc.
         If options->Fact == DOFACT, perm_c is both input and output.
         On output, it is changed according to a postorder of etree.
         Otherwise, perm_c is input.

 etree   (input/output) int*
         Elimination tree of Pc*(A'+A)*Pc', dimension A->ncol.
         If options->Fact == DOFACT, etree is an output argument,
         otherwise it is an input argument.
         Note: etree is a vector of parent pointers for a forest whose
         vertices are the integers 0 to A->ncol-1; etree[root]==A->ncol.

 AC      (output) SuperMatrix*
         The resulting matrix after applied the column permutation
         perm_c[] to matrix A. The type of AC can be:
         Stype = SLU_NCP; Dtype = A->Dtype; Mtype = SLU_GE.
 

int_t sp_ienv_dist ( int_t  ispec  ) 

    Purpose   
    =======

    sp_ienv_dist() is inquired to choose machine-dependent parameters for the
    local environment. See ISPEC for a description of the parameters.

    This version provides a set of parameters which should give good,   
    but not optimal, performance on many of the currently available   
    computers.  Users are encouraged to modify this subroutine to set   
    the tuning parameters for their particular machine using the option   
    and problem size information in the arguments.

    Arguments   
    =========

    ISPEC   (input) int
            Specifies the parameter to be returned as the value of SP_IENV_DIST.
            = 1: the panel size w; a panel consists of w consecutive
	         columns of matrix A in the process of Gaussian elimination.
		 The best value depends on machine's cache characters.
            = 2: the relaxation parameter relax; if the number of
	         nodes (columns) in a subtree of the elimination tree is less
		 than relax, this subtree is considered as one supernode,
		 regardless of the their row structures.
            = 3: the maximum size for a supernode, which must be greater
                 than or equal to relaxation parameter (see case 2);
	    = 4: the minimum row dimension for 2-D blocking to be used;
	    = 5: the minimum column dimension for 2-D blocking to be used;
	    = 6: the estimated fills factor for the adjacency structures 
	         of L and U, compared with A;

   (SP_IENV_DIST) (output) int
            >= 0: the value of the parameter specified by ISPEC   
            < 0:  if SP_IENV_DIST = -k, the k-th argument had an illegal value.

    ===================================================================== 

sp_ienv_dist() is inquired to choose machine-dependent parameters for the local environment. See ISPEC for a description of the parameters.

This version provides a set of parameters which should give good, but not optimal, performance on many of the currently available computers. Users are encouraged to modify this subroutine to set the tuning parameters for their particular machine using the option and problem size information in the arguments.

Arguments =========

ISPEC (input) int Specifies the parameter to be returned as the value of SP_IENV_DIST. = 1: the panel size w; a panel consists of w consecutive columns of matrix A in the process of Gaussian elimination. The best value depends on machine's cache characters. = 2: the relaxation parameter relax; if the number of nodes (columns) in a subtree of the elimination tree is less than relax, this subtree is considered as one supernode, regardless of the their row structures. = 3: the maximum size for a supernode, which must be greater than or equal to relaxation parameter (see case 2); = 4: the minimum row dimension for 2-D blocking to be used; = 5: the minimum column dimension for 2-D blocking to be used; = 6: the estimated fills factor for the adjacency structures of L and U, compared with A;

(SP_IENV_DIST) (output) int >= 0: the value of the parameter specified by ISPEC < 0: if SP_IENV_DIST = -k, the k-th argument had an illegal value.

=====================================================================

void super_stats_dist ( int_t  ,
int_t  
)

void superlu_abort_and_exit_dist ( char *   ) 

void superlu_free_dist ( void *   ) 

void superlu_gridexit ( gridinfo_t  ) 

void superlu_gridinit ( MPI_Comm  ,
int_t  ,
int_t  ,
gridinfo_t  
)

void superlu_gridmap ( MPI_Comm  ,
int_t  ,
int_t  ,
int_t  [],
int_t  ,
gridinfo_t  
)

void* superlu_malloc_dist ( size_t   ) 

double SuperLU_timer_ (  ) 

int_t symbfact ( superlu_options_t options,
int  pnum,
SuperMatrix A,
int_t perm_c,
int_t etree,
Glu_persist_t Glu_persist,
Glu_freeable_t Glu_freeable 
)

 Purpose
 =======
   symbfact() performs a symbolic factorization on matrix A and sets up 
   the nonzero data structures which are suitable for supernodal Gaussian
   elimination with no pivoting (GENP). This routine features:
        o depth-first search (DFS)
        o supernodes
        o symmetric structure pruning

 Return value
 ============
   < 0, number of bytes needed for LSUB.
   = 0, matrix dimension is 1.
   > 0, number of bytes allocated when out of memory.
 

float symbfact_dist ( int  ,
int  ,
SuperMatrix ,
int_t ,
int_t ,
int_t ,
int_t ,
Pslu_freeable_t ,
MPI_Comm *  ,
MPI_Comm *  ,
mem_usage_t  
)

 
 Purpose
 =======
   symbfact_dist() performs symbolic factorization of matrix A suitable
   for performing the supernodal Gaussian elimination with no pivoting (GEPP). 
   This routine computes the structure of one column of L and one row of U 
   at a time.  It uses:
        o distributed input matrix
        o supernodes
        o symmetric structure pruning

 Arguments
 =========

 nprocs_num (input) int
         Number of processors SuperLU_DIST is executed on, and the input 
         matrix is distributed on.

 nprocs_symb (input) int
         Number of processors on which the symbolic factorization is
         performed.  It is equal to the number of independent domains
         idenfied in the graph partitioning algorithm executed
         previously and has to be a power of 2.  It corresponds to
         number of leaves in the separator tree.

 A       (input) SuperMatrix*
         Matrix A in A*X=B, of dimension (A->nrow, A->ncol). The
         number of the linear equations is A->nrow.  Matrix A is
         distributed in NRformat_loc format.
         Matrix A is not yet permuted by perm_c.

 perm_c  (input) int_t*
	   Column permutation vector of size A->ncol, which defines the 
         permutation matrix Pc; perm_c[i] = j means column i of A is 
         in position j in A*Pc.

 perm_r  (input) int_t*
	   Row permutation vector of size A->nrow, which defines the 
         permutation matrix Pr; perm_r[i] = j means column i of A is 
         in position j in Pr*A.

 sizes   (input) int_t*
         Contains the number of vertices in each separator.

 fstVtxSep (input) int_t*
         Contains first vertex for each separator.

 Pslu_freeable (output) Pslu_freeable_t*
         Returns the local L and U structure, and global to local
         information on the indexing of the vertices.  Contains all
         the information necessary for performing the data
         distribution towards the numeric factorization.

 num_comm (input) MPI_Comm*
         Communicator for numerical factorization

 symb_comm (input) MPI_Comm*
         Communicator for symbolic factorization

 symb_mem_usage (input) mem_usage_t *
         Statistics on memory usage.

 Return value
 ============
   < 0, number of bytes allocated on return from the symbolic factorization.
   > 0, number of bytes allocated when out of memory.

 Sketch of the algorithm
 =======================

  Distrbute the vertices on the processors using a subtree to
  subcube algorithm.

  Redistribute the structure of the input matrix A according to the
  subtree to subcube computed previously for the symbolic
  factorization routine.  This implies in particular a distribution
  from nprocs_num processors to nprocs_symb processors.

  Perform symbolic factorization guided by the separator tree provided by
  a graph partitioning algorithm.  The symbolic factorization uses a 
  combined left-looking, right-looking approach. 
 

int_t symbfact_SubFree ( Glu_freeable_t Glu_freeable  ) 

 Deallocate storage of the data structures common to symbolic
 factorization routines.
 

int_t symbfact_SubInit ( fact_t  fact,
void *  work,
int_t  lwork,
int_t  m,
int_t  n,
int_t  annz,
Glu_persist_t Glu_persist,
Glu_freeable_t Glu_freeable 
)

 Allocate storage for the data structures common to symbolic factorization
 routines. For those unpredictable size, make a guess as FILL * nnz(A).
 Return value:
     If lwork = -1, return the estimated amount of space required, plus n;
     otherwise, return the amount of space actually allocated when
     memory allocation failure occurred.
 

int_t symbfact_SubXpand ( int_t  n,
int_t  jcol,
int_t  next,
MemType  mem_type,
int_t maxlen,
Glu_freeable_t Glu_freeable 
)

 Expand the data structures for L and U during the factorization.
 Return value:   0 - successful return
               > 0 - number of bytes allocated when run out of space
 

int_t* TreePostorder_dist ( int_t  ,
int_t  
)

int xerbla_ ( char *  srname,
int *  info 
)

    Purpose   
    =======

    XERBLA  is an error handler for the LAPACK routines.   
    It is called by an LAPACK routine if an input parameter has an   
    invalid value.  A message is printed and execution stops.

    Installers may consider modifying the STOP statement in order to   
    call system-specific exception-handling facilities.

    Arguments   
    =========

    SRNAME  (input) CHARACTER*6   
            The name of the routine which called XERBLA.

    INFO    (input) INT   
            The position of the invalid parameter in the parameter list

            of the calling routine.

   ===================================================================== 


Generated on Tue Aug 30 14:36:33 2011 for SuperLUDistributed by  doxygen 1.5.5