require(copula)
doExtras <- FALSE

Intro

This vignette visualizes (log) likelihood functions of Archimedean copulas, some of which are numerically challenging to compute. Because of this computational challenge, we also check for equivalence of some of the several computational methods, testing for numerical near-equality using all.equal(L1, L2).

Auxiliary functions

We start by defining the following auxiliary functions.

##' @title [m]inus Log-Likelihood for Archimedean Copulas ("fast version")
##' @param theta parameter (length 1 for our current families)
##' @param acop Archimedean copula (of class "acopula")
##' @param u data matrix n x d
##' @param n.MC if > 0 MC is applied with sample size equal to n.MC; otherwise,
##'        the exact formula is used
##' @param ... potential further arguments, passed to <acop> @dacopula()
##' @return negative log-likelihood
##' @author Martin Maechler (Marius originally)
mLogL <- function(theta, acop, u, n.MC=0, ...) { # -(log-likelihood)
    -sum([email protected](u, theta, n.MC=n.MC, log=TRUE, ...))
}
##' @title Plotting the Negative Log-Likelihood for Archimedean Copulas
##' @param cop an outer_nacopula (currently with no children)
##' @param u n x d  data matrix
##' @param xlim x-range for curve() plotting
##' @param main title for curve()
##' @param XtrArgs a list of further arguments for mLogL()
##' @param ... further arguments for curve()
##' @return invisible()
##' @author Martin Maechler
curveLogL <- function(cop, u, xlim, main, XtrArgs=list(), ...) {
    unam <- deparse(substitute(u))
    stopifnot(is(cop, "outer_nacopula"), is.list(XtrArgs),
              (d <- ncol(u)) >= 2, d == dim(cop),
              length([email protected]) == 0# not yet *nested* A.copulas
              )
    acop <- [email protected]
    th. <- [email protected] # the true theta
    acop <- setTheta(acop, NA) # so it's clear, the true theta is not used below
    if(missing(main)) {
        tau. <- [email protected]@tau(th.)
        main <- substitute("Neg. Log Lik."~ -italic(l)(theta, UU) ~ TXT ~~
               FUN(theta['*'] == Th) %=>% tau['*'] == Tau,
               list(UU = unam,
                TXT= sprintf("; n=%d, d=%d;  A.cop",
                         nrow(u), d),
                FUN = [email protected],
                Th = format(th.,digits=3),
                Tau = format(tau., digits=3)))
    }
    r <- curve(do.call(Vectorize(mLogL, "theta"), c(list(x, acop, u), XtrArgs)),
               xlim=xlim, main=main,
               xlab = expression(theta),
               ylab = substitute(- log(L(theta, u, ~~ COP)), list([email protected])),
               ...)
    if(is.finite(th.))
        axis(1, at = th., labels=expression(theta["*"]),
             lwd=2, col="dark gray", tck = -1/30)
    else warning("non-finite [email protected]@theta = ", th.)
    axis(1, at = initOpt([email protected]),
         labels = FALSE, lwd = 2, col = 2, tck = 1/20)
    invisible(r)
}

Ensure that we are told about it, if the numerical algorithms choose methods using Rmpfr (R package interfacing to multi precision arithmetic MPFR):

op <- options("copula:verboseUsingRmpfr"=TRUE) # see when "Rmpfr" methods are chosen automatically

Joe's family

Easy case (\(\tau=0.2\))

n <- 200
d <- 100
tau <- 0.2
theta <- [email protected](tau)
cop <- onacopulaL("Joe", list(theta,1:d))
theta
## [1] 1.443824

Here, the three different methods work “the same”:

set.seed(1)
U1 <- rnacopula(n,cop)
enacopula(U1, cop, "mle") # 1.432885 --  fine
## [1] 1.432898
th4 <- 1 + (1:4)/4
mL.tr <- c(-3558.5, -3734.4, -3299.5, -2505.)
mLt1 <- sapply(th4, function(th) mLogL(th, [email protected], U1, method="log.poly")) # default
mLt2 <- sapply(th4, function(th) mLogL(th, [email protected], U1, method="log1p"))
mLt3 <- sapply(th4, function(th) mLogL(th, [email protected], U1, method="poly"))
stopifnot(all.equal(mLt1, mL.tr, tolerance=5e-5),
          all.equal(mLt2, mL.tr, tolerance=5e-5),
          all.equal(mLt3, mL.tr, tolerance=5e-5))
system.time(r1l  <- curveLogL(cop, U1, c(1, 2.5), X=list(method="log.poly")))
##    user  system elapsed 
##   0.791   0.001   0.790
mtext("all three polyJ() methods on top of each other")
system.time({
    r1J <- curveLogL(cop, U1, c(1, 2.5), X=list(method="poly"),
                     add=TRUE, col=adjustcolor("red", .4))
    r1m  <- curveLogL(cop, U1, c(1, 2.5), X=list(method="log1p"),
                      add=TRUE, col=adjustcolor("blue",.5))
})

plot of chunk unnamed-chunk-6

##    user  system elapsed 
##   1.459   0.000   1.459
U2 <- rnacopula(n,cop)
summary(dCopula(U2, cop)) # => density for the *correct* parameter looks okay
##       Min.    1st Qu.     Median       Mean    3rd Qu.       Max. 
##  0.000e+00  4.900e+01  6.430e+02 2.777e+175  1.932e+04 5.553e+177
## hmm: max = 5.5e177
if(doExtras)
    system.time(r2 <- curveLogL(cop, U2, c(1, 2.5)))
stopifnot(all.equal(enacopula(U2, cop, "mle"), 1.43992755, tolerance=1e-5),
          all.equal(mLogL(1.8, [email protected], U2), -4070.1953,tolerance=1e-5)) # (was -Inf)
U3 <- rnacopula(n,cop)
(th. <- enacopula(U3, cop, "mle")) # 1.4495
## [1] 1.449569
system.time(r3 <- curveLogL(cop, U3, c(1, 2.5)))
##    user  system elapsed 
##   0.713   0.000   0.713
axis(1, at = th., label = quote(hat(theta)))

plot of chunk unnamed-chunk-8

U4 <- rnacopula(n,cop)
enacopula(U4, cop, "mle") # 1.4519  (prev. was 2.351 : "completely wrong")
## [1] 1.451916
summary(dCopula(U4, cop)) # ok (had one Inf)
##       Min.    1st Qu.     Median       Mean    3rd Qu.       Max. 
##  0.000e+00  7.500e+01  9.080e+02 1.981e+259  1.434e+04 3.961e+261
if(doExtras)
    system.time(r4 <- curveLogL(cop, U4, c(1, 2.5)))
mLogL(2.2351, [email protected], U4)
## [1] -1789.59
mLogL(1.5,    [email protected], U4)
## [1] -3882.819
mLogL(1.2,    [email protected], U4)
## [1] -3517.366
if(doExtras) # each curve takes almost 2 sec
    system.time({
        curveLogL(cop, U4, c(1, 1.01))
        curveLogL(cop, U4, c(1, 1.0001))
        curveLogL(cop, U4, c(1, 1.000001))
    })
## --> limit goes *VERY* steeply up to  0
## --> theta 1.164 is about the boundary:
stopifnot(identical(setTheta(cop, 1.164), onacopula([email protected], C(1.164, 1:100))),
      all.equal(600.59577,
            [email protected]@dacopula(U4[118,,drop=FALSE],
                    theta=1.164, log = TRUE), tolerance=1e-5)) # was "Inf"

Harder case (\(d=150\), \(\tau=0.3\))

n <- 200
d <- 150
tau <- 0.3
(theta <- [email protected](tau))
## [1] 1.772108
cop <- onacopulaL("Joe",list(theta,1:d))
set.seed(47)
U. <- rnacopula(n,cop)
enacopula(U., cop, "mle") # 1.784578
## [1] 1.78459
system.time(r. <- curveLogL(cop, U., c(1.1, 3)))

plot of chunk unnamed-chunk-11

##    user  system elapsed 
##   0.996   0.000   0.996
## => still looks very good

Even harder case (\(d=180\), \(\tau=0.4\))

d <- 180
tau <- 0.4
(theta <- [email protected](tau))
## [1] 2.219066
cop <- onacopulaL("Joe",list(theta,1:d))
U. <- rnacopula(n,cop)
enacopula(U., cop, "mle") # 2.217582
## [1] 2.217593
if(doExtras)
system.time(r. <- curveLogL(cop, U., c(1.1, 4)))
## => still looks very good

Gumbel's family

Easy case (\(\tau=0.2\))

n <- 200
d <- 50 # smaller 'd' -- so as to not need 'Rmpfr' here
tau <- 0.2
(theta <- [email protected](tau))
## [1] 1.25
cop <- onacopulaL("Gumbel",list(theta,1:d))
set.seed(1)
U1 <- rnacopula(n,cop)
if(doExtras) {
    U2 <- rnacopula(n,cop)
    U3 <- rnacopula(n,cop)
}
enacopula(U1, cop, "mle") # 1.227659 (was 1.241927)
## [1] 1.227659
##--> Plots with "many" likelihood evaluations
system.time(r1 <- curveLogL(cop, U1, c(1, 2.1)))

plot of chunk unnamed-chunk-15

##    user  system elapsed 
##   0.868   0.000   0.868
if(doExtras) system.time({
    mtext("and two other generated samples")
    r2 <- curveLogL(cop, U2, c(1, 2.1), add=TRUE)
    r3 <- curveLogL(cop, U3, c(1, 2.1), add=TRUE)
})

Harder case (\(d=150\), \(\tau=0.6\))

d <- 150
tau <- 0.6
(theta <- [email protected](tau))
## [1] 2.5
cG.5 <- onacopulaL("Gumbel",list(theta,1:d))
set.seed(17)
U4 <- rnacopula(n,cG.5)
U5 <- rnacopula(n,cG.5)
U6 <- rnacopula(n,cG.5)
if(doExtras) { ## "Rmpfr" is used {2012-06-21}: -- therefore about 18 seconds!
 tol <- if(interactive()) 1e-12 else 1e-8
 print(system.time(
 ee. <- c(enacopula(U4, cG.5, "mle", tol=tol),
          enacopula(U5, cG.5, "mle", tol=tol),
          enacopula(U6, cG.5, "mle", tol=tol))))
dput(ee.)# in case the following fails
## tol=1e-12 Linux nb-mm3 3.2.0-25-generic x86_64 (2012-06-23):
##   c(2.47567251789004, 2.48424484287686, 2.50410767129408)
##   c(2.475672518,      2.484244763,      2.504107671),
stopifnot(all.equal(ee., c(2.475672518, 2.484244763, 2.504107671),
            tolerance= max(1e-7, 16*tol)))
}
## --> Plots with "many" likelihood evaluations
th. <- seq(1, 3, by= 1/4)
if(doExtras) # "default2012" (polyG default) partly uses Rmpfr here:
system.time(r4   <- sapply(th., mLogL, [email protected], u=U4))## 25.6 sec
## whereas this (polyG method) is very fast {and still ok}:
system.time(r4.p <- sapply(th., mLogL, [email protected], u=U4, method="pois"))
##    user  system elapsed 
##    0.18    0.00    0.18
r4. <- c(0, -18375.33, -21948.033, -24294.995, -25775.502,
         -26562.609, -26772.767, -26490.809, -25781.224)
stopifnot(!doExtras ||
          all.equal(r4,   r4., tolerance = 8e-8),
          all.equal(r4.p, r4., tolerance = 8e-8))
## --> use fast method here as well:
system.time(r5.p <- sapply(th., mLogL, [email protected], u=U5, method="pois"))
##    user  system elapsed 
##   0.179   0.000   0.179
system.time(r6.p <- sapply(th., mLogL, [email protected], u=U6, method="pois"))
##    user  system elapsed 
##   0.179   0.000   0.179
if(doExtras) {
    if(FALSE) # for speed analysis, etc
        debug(copula:::polyG)
    mLogL(1.65, [email protected], U4) # -23472.96
}
dd <- dCopula(U4, setTheta(cG.5, 1.64), log = TRUE,
              method = if(doExtras)"default" else "pois")
summary(dd)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   41.59   53.30   81.09  116.91  137.54  707.13
stopifnot(!is.na(dd), # no NaN's anymore
      40 < range(dd), range(dd) < 710)

Frank's family (an already hard case)

n <- 64
d <- 5
tau <- 0.8
(theta <- [email protected](tau))
## [1] 18.19154
cop <- onacopulaL("Frank", list(theta,1:d))
set.seed(11) # these seeds give no problems: 101, 41, 21
U. <- rnacopula(n,cop)
[email protected] <- setTheta([email protected], NA) # forget the true theta
system.time(f.ML <- emle(U., cop)); f.ML # --> fine: theta = 18.033, Log-lik = 314.01
##    user  system elapsed 
##   0.027   0.000   0.028
## 
## Call:
## bbmle::mle2(minuslogl = nLL, start = start, optimizer = "optimize", 
##     lower = interval[1], upper = interval[2])
## 
## Coefficients:
##   theta 
## 18.0333 
## 
## Log-likelihood: 314.01
if(doExtras)
    system.time(f.mlMC <- emle(U., cop, n.MC = 1e4)) # with MC
stopifnot(all.equal(unname(coef(f.ML)), 18.03331, tolerance= 1e-6),
      all.equal([email protected], -314.0143, tolerance=1e-6),
      !doExtras || ## Simulate MLE (= SMLE) is "extra" random,  hmm...
      all.equal(unname(coef(f.mlMC)), 17.8, tolerance= 0.01)
      ##           64-bit ubuntu: 17.817523
      ##         ? 64-bit Mac:    17.741
     )

[email protected] <- setTheta([email protected], theta)
r. <- curveLogL(cop, U., c(1, 200)) # => now looks fine

plot of chunk unnamed-chunk-19

tail(as.data.frame(r.), 15)
##          x        y
## 87  172.14 2105.690
## 88  174.13 2143.642
## 89  176.12 2181.637
## 90  178.11 2219.675
## 91  180.10 2257.754
## 92  182.09 2295.874
## 93  184.08 2334.034
## 94  186.07 2372.232
## 95  188.06 2410.468
## 96  190.05 2448.742
## 97  192.04 2487.051
## 98  194.03 2525.396
## 99  196.02 2563.776
## 100 198.01 2602.189
## 101 200.00 2640.636
stopifnot( is.finite( r.$y ),
      ## and is convex (everywhere):
      diff(r.$y, d=2) > 0)
options(op) # revert to previous state

Session information

## R version 3.6.3 (2020-02-29)
## Platform: x86_64-pc-linux-gnu (64-bit)
## Running under: Debian GNU/Linux bullseye/sid
## 
## Matrix products: default
## BLAS:   /srv/R/R-patched/build.20-04-27/lib/libRblas.so
## LAPACK: /srv/R/R-patched/build.20-04-27/lib/libRlapack.so
## 
## attached base packages:
##  [1] parallel  grid      stats4    tools     stats     graphics  grDevices
##  [8] utils     datasets  methods   base     
## 
## other attached packages:
## [1] rugarch_1.4-2   gsl_2.1-6       mev_1.13.1      lattice_0.20-41
## [5] bbmle_1.0.23.1  copula_1.0-0   
## 
## loaded via a namespace (and not attached):
##  [1] zoo_1.8-8                   xfun_0.14                  
##  [3] ks_1.11.7                   partitions_1.9-22          
##  [5] pcaPP_1.9-73                expm_0.999-4               
##  [7] SkewHyperbolic_0.4-0        gmp_0.5-14                 
##  [9] nloptr_1.2.2.1              stringr_1.4.0              
## [11] pspline_1.0-18              bdsmatrix_1.3-4            
## [13] mvtnorm_1.1-0               evaluate_0.14              
## [15] knitr_1.28                  ADGofTest_0.3              
## [17] markdown_1.1                evd_2.3-3                  
## [19] highr_0.8                   xts_0.12-0                 
## [21] Rcpp_1.0.4.6                KernSmooth_2.23-17         
## [23] polynom_1.4-0               DistributionUtils_0.6-0    
## [25] truncnorm_1.0-8             alabama_2015.3-1           
## [27] mime_0.9                    spd_2.0-1                  
## [29] stringi_1.4.6               TruncatedNormal_2.2        
## [31] numDeriv_2016.8-1.1         Runuran_0.30               
## [33] stabledist_0.7-1            magrittr_1.5               
## [35] nleqslv_3.3.2               Rsolnp_1.16                
## [37] MASS_7.3-51.6               GeneralizedHyperbolic_0.8-4
## [39] Matrix_1.2-18               randtoolbox_1.30.1         
## [41] boot_1.3-25                 mclust_5.4.6               
## [43] rngWELL_0.10-6              compiler_3.6.3