PROGRAM DPLUBT * * Timing program for block LU factorization. * * -- Written on 23-March-1987. * Jeremy Du Croz, Nag Central Office. * * .. Parameters .. INTEGER NOUT PARAMETER ( NOUT = 6 ) INTEGER NMAX, NMIN, NINC PARAMETER ( NMAX = 32, NMIN = 16, NINC = 16 ) INTEGER LDA1, LDA2 PARAMETER ( LDA1 = NMAX + 1, LDA2 = NMAX ) INTEGER LA PARAMETER ( LA = NMAX*LDA1 ) INTEGER NBMAX, NBMIN, NBINC PARAMETER ( NBMAX = 7, NBMIN = 1, NBINC = 1 ) * .. Local Scalars .. INTEGER N, NB * .. Local Arrays .. DOUBLE PRECISION A( LA ), AB( LA ) INTEGER IPIV( NMAX ) * .. External Subroutines .. EXTERNAL TIMSUB * .. Executable Statements .. * WRITE( NOUT, FMT = * ) WRITE( NOUT, FMT = * ) $ 'Speed of DPLU and DPLUB in megaflops: M = N' WRITE( NOUT, FMT = * ) WRITE( NOUT, FMT = * ) $ ' 1st row of each pair obtained with LDA = ', LDA1 WRITE( NOUT, FMT = * ) $ ' 2nd row of each pair obtained with LDA = ', LDA2 WRITE( NOUT, FMT = * ) WRITE( NOUT, FMT = * )' DPLU DPLUB' WRITE( NOUT, FMT = * ) WRITE( NOUT, FMT = '('' N '', 14('' NB='',I2,:))' ) $ ( NB, NB = NBMIN, NBMAX, NBINC ) * DO 10 N = NMIN, NMAX, NINC * * Time routines for one value of N and two values of LDA * WRITE( NOUT, FMT = * ) CALL TIMSUB( N, A, AB, LDA1, IPIV, NBMIN, NBMAX, NBINC, NOUT ) CALL TIMSUB( N, A, AB, LDA2, IPIV, NBMIN, NBMAX, NBINC, NOUT ) 10 CONTINUE STOP END SUBROUTINE TIMSUB( N, A, AB, LDA, IPIV, NBMIN, NBMAX, NBINC, $ NOUT ) * .. Parameters .. INTEGER IBMAX PARAMETER ( IBMAX = 14 ) * .. Scalar Arguments .. INTEGER LDA, N, NBINC, NBMAX, NBMIN, NOUT * .. Array Arguments .. DOUBLE PRECISION A( LDA, N ), AB( LDA, N ) INTEGER IPIV( N ) * .. Local Scalars .. DOUBLE PRECISION EMAX REAL S0, S1, S2, SM1 INTEGER I, IB, IBLIM, INFO, J, NB, NOP * .. Local Arrays .. REAL E( IBMAX ), S( 0: IBMAX ) * .. External Functions .. REAL SECOND EXTERNAL SECOND * .. External Subroutines .. EXTERNAL DPLU, DPLUB * .. Intrinsic Functions .. INTRINSIC ABS, DBLE, MAX, REAL, SQRT * .. Executable Statements .. IB = 0 SM1 = SECOND( ) S0 = SECOND( ) * * Generate data * DO 20 J = 1, N DO 10 I = 1, N A( I, J ) = 1.0D0 - SQRT( DBLE( ABS( I - J ) ) )/DBLE( N ) 10 CONTINUE 20 CONTINUE * * Time unblocked version of the algorithm * S1 = SECOND( ) CALL DPLU( N, N, A, LDA, IPIV, INFO ) S2 = SECOND( ) S( IB ) = ( S2 - S1 ) - ( S0 - SM1 ) IF( INFO.NE.0 )THEN WRITE( NOUT, FMT = * )'DPLU returns INFO = ', INFO STOP END IF * * Time blocked version of the algorithm with different values of NB * DO 70 NB = NBMIN, NBMAX, NBINC IB = IB + 1 IF( IB.GT.IBMAX ) $ STOP * * Generate data * DO 40 J = 1, N DO 30 I = 1, N AB( I, J ) = 1.0D0 - SQRT( DBLE( ABS( I - J ) ) )/ $ DBLE( N ) 30 CONTINUE 40 CONTINUE * * Time one call of the routine * S1 = SECOND( ) CALL DPLUB( N, N, AB, LDA, IPIV, INFO, NB ) S2 = SECOND( ) S( IB ) = ( S2 - S1 ) - ( S0 - SM1 ) IF( INFO.NE.0 )THEN WRITE( NOUT, FMT = * )'DPLUB returns INFO = ', INFO STOP END IF * * Check results for consistency * EMAX = 0.0 DO 60 J = 1, N DO 50 I = 1, N EMAX = MAX( EMAX, ABS( A( I, J ) - AB( I, J ) ) ) 50 CONTINUE 60 CONTINUE E( IB ) = EMAX 70 CONTINUE * * Report results * IBLIM = IB NOP = MAX( 1, ( 2*N*N*N )/3 ) DO 80 IB = 0, IBLIM IF( S( IB ).LE.0.0 )THEN S( IB ) = 0.0 ELSE S( IB ) = REAL( NOP )/( 1.0E6*S( IB ) ) END IF 80 CONTINUE WRITE( NOUT, FMT = '(1X,I5,15F8.2)' )N, ( S( IB ), IB = 0, IBLIM ) WRITE( NOUT, FMT = '(14X,1P,14E8.0)' )( E( IB ), IB = 1, IBLIM ) RETURN END SUBROUTINE DPLUB( M, N, A, LDA, IPIV, INFO, NB ) * * Computes an LU factorization of an m-by-n matrix A, using partial * pivoting with row interchanges. * Blocked algorithm. * * .. Parameters .. DOUBLE PRECISION ONE PARAMETER ( ONE = 1.0D+0 ) * .. Scalar Arguments .. INTEGER INFO, LDA, M, N, NB * .. Array Arguments .. DOUBLE PRECISION A( LDA, * ) INTEGER IPIV( N ) * .. Local Scalars .. INTEGER I, IP, J, JB, LDA1 * .. External Subroutines .. EXTERNAL DGEMM, DPLU, DSWAP, DTRSM * .. Intrinsic Functions .. INTRINSIC MIN * .. Executable Statements .. * INFO = 0 LDA1 = LDA DO 40 J = 1, N, NB JB = MIN( N - J + 1, NB ) IF( J.EQ.N ) $ LDA1 = M - N + 1 * * Apply previous interchanges to current block. * DO 10 I = 1, J - 1 IP = IPIV( I ) IF( IP.NE.I ) $ CALL DSWAP( JB, A( I, J ), LDA, A( IP, J ), LDA ) 10 CONTINUE * * Compute superdiagonal block of U. * CALL DTRSM( 'Left', 'Lower', 'No transpose', 'Unit', J - 1, JB, $ A( 1, 1 ), LDA, A( 1, J ), LDA ) * * Update diagonal and subdiagonal blocks. * CALL DGEMM( 'No transpose', 'No transpose', M - J + 1, JB, $ J - 1, -ONE, A( J, 1 ), LDA, A( 1, J ), LDA, ONE, $ A( J, J ), LDA1 ) * * Factorize diagonal and subdiagonal blocks and test for * singularity. * CALL DPLU( M - J + 1, JB, A( J, J ), LDA1, IPIV( J ), INFO ) DO 20 I = J, J + JB - 1 IPIV( I ) = J - 1 + IPIV( I ) 20 CONTINUE IF( INFO.NE.0 ) $ GO TO 50 * * Apply interchanges to previous blocks. * DO 30 I = J, J + JB - 1 IP = IPIV( I ) IF( IP.NE.I ) $ CALL DSWAP( J - 1, A( I, 1 ), LDA, A( IP, 1 ), LDA ) 30 CONTINUE 40 CONTINUE RETURN * 50 INFO = INFO + J - 1 RETURN END SUBROUTINE DPLU( M, N, A, LDA, IPIV, INFO ) * * Computes an LU factorization of an m-by-n matrix A, using partial * pivoting with row interchanges. * Unblocked algorithm. * * .. Parameters .. DOUBLE PRECISION ONE, ZERO PARAMETER ( ONE = 1.0D+0, ZERO = 0.0D+0 ) * .. Scalar Arguments .. INTEGER INFO, LDA, M, N * .. Array Arguments .. DOUBLE PRECISION A( LDA, * ) INTEGER IPIV( N ) * .. Local Scalars .. DOUBLE PRECISION T INTEGER I, IP, J, JP * .. External Functions .. INTEGER IDAMAX EXTERNAL IDAMAX * .. External Subroutines .. EXTERNAL DGEMV, DSCAL, DSWAP, DTRSV * .. Executable Statements .. * INFO = 0 DO 20 J = 1, N * * Apply previous interchanges to jth column. * DO 10 I = 1, J - 1 IP = IPIV( I ) T = A( I, J ) A( I, J ) = A( IP, J ) A( IP, J ) = T 10 CONTINUE * * Compute elements 1:j-1 of j-th column. * CALL DTRSV( 'Lower', 'No transpose', 'Unit', J - 1, A( 1, 1 ), $ LDA, A( 1, J ), 1 ) * * Update elements j:n of j-th column. * CALL DGEMV( 'No transpose', M - J + 1, J - 1, -ONE, A( J, 1 ), $ LDA, A( 1, J ), 1, ONE, A( J, J ), 1 ) * * Find pivot and test for singularity. * JP = J - 1 + IDAMAX( M - J + 1, A( J, J ), 1 ) IPIV( J ) = JP IF( A( JP, J ).EQ.ZERO ) $ GO TO 30 * * Apply interchange to columns 1:j. * IF( JP.NE.J ) $ CALL DSWAP( J, A( J, 1 ), LDA, A( JP, 1 ), LDA ) * * Compute elements j+1:m of j-th column. * IF( J.LT.M ) $ CALL DSCAL( M - J, ONE/A( J, J ), A( J + 1, J ), 1 ) 20 CONTINUE RETURN * 30 INFO = J RETURN END .