PROGRAM DLLTBT * * Timing program for block L*L**T factorization. * * -- Written on 23-March-1987. * Jeremy Du Croz, Nag Central Office. * * .. Parameters .. INTEGER NOUT PARAMETER ( NOUT = 6 ) INTEGER NMAX, NMIN, NINC PARAMETER ( NMAX = 32, NMIN = 16, NINC = 16 ) INTEGER LDA1, LDA2 PARAMETER ( LDA1 = NMAX + 1, LDA2 = NMAX ) INTEGER LA PARAMETER ( LA = NMAX*LDA1 ) INTEGER NBMAX, NBMIN, NBINC PARAMETER ( NBMAX = 7, NBMIN = 1, NBINC = 1 ) * .. Local Scalars .. INTEGER N, NB * .. Local Arrays .. DOUBLE PRECISION A( LA ), AB( LA ) * .. External Subroutines .. EXTERNAL TIMSUB * .. Executable Statements .. * WRITE( NOUT, FMT = * ) WRITE( NOUT, FMT = * )'Speed of DLLT and DLLTB in megaflops:' WRITE( NOUT, FMT = * ) WRITE( NOUT, FMT = * ) $ ' 1st row of each pair obtained with LDA = ', LDA1 WRITE( NOUT, FMT = * ) $ ' 2nd row of each pair obtained with LDA = ', LDA2 WRITE( NOUT, FMT = * ) WRITE( NOUT, FMT = * )' DLLT DLLTB' WRITE( NOUT, FMT = * ) WRITE( NOUT, FMT = '('' N '', 14('' NB='',I2,:))' ) $ ( NB, NB = NBMIN, NBMAX, NBINC ) * DO 10 N = NMIN, NMAX, NINC * * Time routines for one value of N and two values of LDA * WRITE( NOUT, FMT = * ) CALL TIMSUB( N, A, AB, LDA1, NBMIN, NBMAX, NBINC, NOUT ) CALL TIMSUB( N, A, AB, LDA2, NBMIN, NBMAX, NBINC, NOUT ) 10 CONTINUE STOP END SUBROUTINE TIMSUB( N, A, AB, LDA, NBMIN, NBMAX, NBINC, NOUT ) * .. Parameters .. INTEGER IBMAX PARAMETER ( IBMAX = 14 ) * .. Scalar Arguments .. INTEGER LDA, N, NBINC, NBMAX, NBMIN, NOUT * .. Array Arguments .. DOUBLE PRECISION A( LDA, N ), AB( LDA, N ) * .. Local Scalars .. DOUBLE PRECISION EMAX REAL S0, S1, S2, SM1 INTEGER I, IB, IBLIM, INFO, J, NB, NOP * .. Local Arrays .. REAL E( IBMAX ), S( 0: IBMAX ) * .. External Functions .. REAL SECOND EXTERNAL SECOND * .. External Subroutines .. EXTERNAL DLLT, DLLTB * .. Intrinsic Functions .. INTRINSIC ABS, DBLE, MAX, REAL, SQRT * .. Executable Statements .. IB = 0 SM1 = SECOND( ) S0 = SECOND( ) * * Generate data * DO 20 J = 1, N DO 10 I = J, N A( I, J ) = 1.0D0 - SQRT( DBLE( ABS( I - J ) ) )/DBLE( N ) 10 CONTINUE 20 CONTINUE * * Time unblocked version of the algorithm * S1 = SECOND( ) CALL DLLT( N, A, LDA, INFO ) S2 = SECOND( ) S( IB ) = ( S2 - S1 ) - ( S0 - SM1 ) IF( INFO.NE.0 )THEN WRITE( NOUT, FMT = * )'DLLT returns INFO = ', INFO STOP END IF * * Time blocked version of the algorithm with different values of NB * DO 70 NB = NBMIN, NBMAX, NBINC IB = IB + 1 IF( IB.GT.IBMAX ) $ STOP * * Generate data * DO 40 J = 1, N DO 30 I = J, N AB( I, J ) = 1.0D0 - SQRT( DBLE( ABS( I - J ) ) )/ $ DBLE( N ) 30 CONTINUE 40 CONTINUE * * Time one call of the routine * S1 = SECOND( ) CALL DLLTB( N, AB, LDA, INFO, NB ) S2 = SECOND( ) S( IB ) = ( S2 - S1 ) - ( S0 - SM1 ) IF( INFO.NE.0 )THEN WRITE( NOUT, FMT = * )'DLLTB returns INFO = ', INFO STOP END IF * * Check results for consistency * EMAX = 0.0 DO 60 J = 1, N DO 50 I = J, N EMAX = MAX( EMAX, ABS( A( I, J ) - AB( I, J ) ) ) 50 CONTINUE 60 CONTINUE E( IB ) = EMAX 70 CONTINUE * * Report results * IBLIM = IB NOP = MAX( 1, ( N*N*N )/3 ) DO 80 IB = 0, IBLIM IF( S( IB ).LE.0.0 )THEN S( IB ) = 0.0 ELSE S( IB ) = REAL( NOP )/( 1.0E6*S( IB ) ) END IF 80 CONTINUE WRITE( NOUT, FMT = '(1X,I5,15F8.2)' )N, ( S( IB ), IB = 0, IBLIM ) WRITE( NOUT, FMT = '(14X,1P,14E8.0)' )( E( IB ), IB = 1, IBLIM ) RETURN END SUBROUTINE DLLTB( N, A, LDA, INFO, NB ) * * Computes an L*L**T factorization of a symmetric positive-definite * matrix A. * Blocked algorithm. * * .. Parameters .. DOUBLE PRECISION ONE PARAMETER ( ONE = 1.0D+0 ) * .. Scalar Arguments .. INTEGER INFO, LDA, N, NB * .. Array Arguments .. DOUBLE PRECISION A( LDA, * ) * .. Local Scalars .. INTEGER J, JB, LDA1 * .. External Subroutines .. EXTERNAL DGEMM, DLLT, DSYRK, DTRSM * .. Intrinsic Functions .. INTRINSIC MIN * .. Executable Statements .. * INFO = 0 LDA1 = LDA DO 10 J = 1, N, NB JB = MIN( NB, N - J + 1 ) IF( J.EQ.N ) $ LDA1 = 1 * * Update diagonal block. * CALL DSYRK( 'Right', 'Lower', JB, J - 1, -ONE, A( J, 1 ), LDA, $ ONE, A( J, J ), LDA1 ) * * Factorize diagonal block and test for * non-positive-definiteness. * CALL DLLT( JB, A( J, J ), LDA1, INFO ) IF( INFO.NE.0 ) $ GO TO 20 IF( J + JB.LE.N )THEN * * Update subdiagonal block. * CALL DGEMM( 'No transpose', 'Transpose', N - J - JB + 1, JB, $ J - 1, -ONE, A( J + JB, 1 ), LDA, A( J, 1 ), $ LDA, ONE, A( J + JB, J ), LDA ) * * Compute subdiagonal block of L. * CALL DTRSM( 'Right', 'Lower', 'Transpose', 'Non-unit', $ N - J - JB + 1, JB, A( J, J ), LDA, $ A( J + JB, J ), LDA ) END IF 10 CONTINUE RETURN * 20 INFO = INFO + J - 1 RETURN END SUBROUTINE DLLT( N, A, LDA, INFO ) * * Computes an L*L**T factorization of a symmetric positive-definite * matrix A. * Unblocked algorithm. * * .. Parameters .. DOUBLE PRECISION ONE, ZERO PARAMETER ( ONE = 1.0D+0, ZERO = 0.0D+0 ) * .. Scalar Arguments .. INTEGER INFO, LDA, N * .. Array Arguments .. DOUBLE PRECISION A( LDA, * ) * .. Local Scalars .. INTEGER J * .. External Functions .. DOUBLE PRECISION DDOT EXTERNAL DDOT * .. External Subroutines .. EXTERNAL DGEMV, DSCAL * .. Intrinsic Functions .. INTRINSIC SQRT * .. Executable Statements .. * INFO = 0 DO 10 J = 1, N * * Update a(j,j). * A( J, J ) = A( J, J ) - DDOT( J - 1, A( J, 1 ), LDA, A( J, 1 ), $ LDA ) * * Compute l(j,j) and test for non-positive-definiteness. * IF( A( J, J ).LE.ZERO ) $ GO TO 20 A( J, J ) = SQRT( A( J, J ) ) IF( J.LT.N )THEN * * Update elements j+1:n of j-th column. * CALL DGEMV( 'No transpose', N - J, J - 1, -ONE, $ A( J + 1, 1 ), LDA, A( J, 1 ), LDA, ONE, $ A( J + 1, J ), 1 ) * * Compute elements j+1:n of j-th column of L. * CALL DSCAL( N - J, ONE/A( J, J ), A( J + 1, J ), 1 ) END IF 10 CONTINUE RETURN * 20 INFO = J RETURN END .