experiments with the variance

2026-02-11 19:00:56 +01:00
parent c2c759bb04
commit 9db48a9a33
4 changed files with 364 additions and 24 deletions
@@ -59,6 +59,7 @@ expr_to_label <- function(expr) {
 #' @param curve_col Colour of the reference curve (default = `"red"`).
 #' @param curve_lwd Line width of the reference curve (default = 2).
 #' @param log_plot If True, then the y-axis is on a log scale.
+#' @param main_title Main title for the plot
 #' @return A list with the following components  
 #'   \item{K}{Integer vector `1:maxK`.}  
 #'   \item{sv}{Numeric vector of the smallest singular values for each `K`.}  
@@ -98,7 +99,8 @@ smallest_sv_sequence <- function(
    curve_to = NULL,
    curve_col = "red",
    curve_lwd = 2, 
-    log_plot = FALSE
+    log_plot = FALSE,
+    main_title = "Smallest singular value vs. K"
 ) {
  ## 1. Input validation =======================================================
  if (!is.numeric(a) || length(a) == 0) {
@@ -129,6 +131,9 @@ smallest_sv_sequence <- function(
  if (!inherits(curve_expr, "call") && !is.character(curve_expr)) {
    stop("`curve_expr` must be a call (e.g., quote(20/sqrt(x))) or a character string.")
  }
+  if (!is.character(main_title)){
+    stop("`main_title` must be a character vector.")
+  }
  
  ## 2. Prepare storage ========================================================
  K_vec <- seq_len(maxK)
@@ -147,6 +152,8 @@ smallest_sv_sequence <- function(
      guard = guard
    )
    
+    Q <- 1 /sqrt(n) * Q
+    
    sv_res <- compute_minmax_sv(Q)
    if (!is.list(sv_res) || is.null(sv_res$smallest_singular_value)) {
      stop("`compute_minmax_sv()` must return a list containing `$smallest_singular_value`.")
@@ -157,6 +164,7 @@ smallest_sv_sequence <- function(
  ## 4. Plotting (optional) ====================================================
  if (plot) {
    ## Basic scatter/line plot of the singular values
+    par(mar = c(5, 4, 4, 8))  # extra space on the right for the legend
    plot_args <- list(
      x = K_vec,
      y = smallest_sv,
@@ -165,17 +173,21 @@ smallest_sv_sequence <- function(
      col = "steelblue",
      xlab = "K subdivisions",
      ylab = "Smallest singular value of Q",
-      main = "Smallest singular value vs. K"
+      main = main_title
    )
    if (log_plot) plot_args$log <- "y"
    
    do.call(graphics::plot, plot_args)
-    # graphics::plot(
-    #   K_vec, smallest_sv,
-    #   type = "b", pch = 19, col = "steelblue",
-    #   xlab = "K subdivisions", ylab = "Smallest singular value of Q",
-    #   main = "Smallest singular value vs. K"
-    # )
+    # add legend. The par(xpd = ...) allows drawing outside of the plot region.
+    par(xpd = TRUE)
+    legend("topright",
+           inset=c(-0.2,0), 
+           legend=c("SV of Q"), 
+           col="steelblue", 
+           title="Legend",
+           pch = 16,
+           bty = "n")
+    par(xpd = FALSE)
    ## Add the reference curve if requested
    if (add_curve) {
      ## Determine sensible defaults for the curve limits
@@ -197,8 +209,8 @@ smallest_sv_sequence <- function(
      
      # add label with the curve expression
      label_txt <- expr_to_label(curve_expr)
-      x_pos <- curve_from + 0.9 * (curve_to - curve_from)
-      y_pos <- 0.9 * max(smallest_sv)
+      x_pos <- curve_from + 0.8 * (curve_to - curve_from)
+      y_pos <- 0.85 * max(smallest_sv)
      graphics::text(
        x = x_pos, y = y_pos,
        labels = label_txt,
@@ -192,6 +192,9 @@ compute_matrix <- function(
 #' @export
 compute_minmax_sv <- function(M) {
  s <- svd(M, nu=0, nv=0)$d
+  
+  # just a check if we compute the right thing
+  # s <- sqrt(eigen(M %*% t(M), symmetric = TRUE, only.value=TRUE)$values)

  list(
    largest_singular_value  = max(s),
@@ -50,14 +50,14 @@ title(
 )

 ## 3.3 Plot one dimensional X_i ================================================
-p3 <- create_cond_density(a1, dnorm, pnorm, X1)
-givenX <- c(-0.5)
+p3 <- create_cond_density(2, dnorm, pnorm, X1)
+givenX <- c(-3.0)
 p3_plot <- \(x) p3(x, givenX)
 plot.function(p3_plot, xlab="u", ylab="p(u |x) ")
 title(main=expression("Conditional density for" ~ v %~% N(0, 1)), line=2)
 title(
  main = bquote(
-    a == (.(paste(a1, collapse = ", "))) ~ "," ~
+    a == (.(paste(c(2), collapse = ", "))) ~ "," ~
      N == .(N) ~ "," ~
      x == (.(paste(givenX, collapse = ", ")))
  ),
@@ -3,20 +3,104 @@ source(here::here("R", "singular_values.R"))
 source(here::here("R", "graphon_distribution.R"))
 source(here::here("R", "singular_value_plot.R"))

+# https://stackoverflow.com/a/5790430
+resetPar <- function() {
+  dev.new()
+  op <- par(no.readonly = TRUE)
+  dev.off()
+  op
+}
+
+calc_conv_rate <- function(x,y) {
+  if (!is.numeric(x) || length(x) == 0) {
+    stop("`x` must be a non‑empty numeric vector.")
+  }
+  if (!is.numeric(y) || length(y) == 0) {
+    stop("`y` must be a non‑empty numeric vector.")
+  }
+  if (length(x) != length(y)) {
+    stop("`x` and `y` must have the same length.")
+  }
+  
+  df <- data.frame("x" = x, "y" = y)
+  lm_model <- lm(log(y) ~ log(x), data=df)
+  C <- exp(coefficients(lm_model)[[1]])
+  alpha <- coefficients(lm_model)[[2]]
+  
+  df[, "y_pred"] <- C * df[, "x"]^alpha
+  df[, "residual"] <- df[, "y"] - df[, "y_pred"]
+  
+  out <- list(
+    "C" = C,
+    "alpha" = alpha,
+    "obs" = df
+  )
+  out
+}
+
+calc_exp_conv_rate <- function(x,y) {
+  if (!is.numeric(x) || length(x) == 0) {
+    stop("`x` must be a non‑empty numeric vector.")
+  }
+  if (!is.numeric(y) || length(y) == 0) {
+    stop("`y` must be a non‑empty numeric vector.")
+  }
+  if (length(x) != length(y)) {
+    stop("`x` and `y` must have the same length.")
+  }
+  
+  df <- data.frame("x" = x, "y" = y)
+  fit <- nls(y ~ C * exp(r * x^m),
+             start = list(C = min(y), r= -0.5, m = 1))
+}
+
 # Nearly match with sample function --------------------------------------------
 # v ~ N(0,1) and X ~ discrete Uniform on [1:n]
-smallest_sv_sequence(
+out <- smallest_sv_sequence(
  a = c(0.5),
-  n = 400,
-  maxK= 20,
+  n = 9,
+  maxK= 3,
  sampler_fn = sample,
  guard=1e-12,
  plot=TRUE,
-  curve_expr = quote(20 / sqrt(x))
+  log_plot = TRUE,
+  curve_expr = quote(1 / x^0.545)
 )
+conv_rate <- calc_conv_rate(out$K, out$sv)

 # Normally distributed X ~ N(0,1) and v ~ N(0,1) -------------------------------
-smallest_sv_sequence(
+out <- smallest_sv_sequence(
+  a = c(0.5),
+  n = 1200,
+  maxK = 20,
+  sampler_fn =function(n) matrix(rnorm(n), ncol = 1L),
+  guard=1e-12,
+  plot=TRUE,
+  log_plot = TRUE,
+  curve_expr = quote(1.5 * exp(-0.95 * x^1.34))
+  #curve_expr = quote( 1/exp(x^1.32))
+)
+# convergence rate does not work here, probably because the underlying model
+# does not work well
+conv_rate <- calc_conv_rate(out$K[1:20], out$sv[1:20])
+
+# Uniform distributed X ~ U[0,1] and v ~ N(0,1) --------------------------------
+out <- smallest_sv_sequence(
+  a = c(0.5),
+  n = 400,
+  maxK = 20,
+  sampler_fn =function(n) matrix(runif(n), ncol = 1L),
+  guard=1e-12,
+  plot=TRUE,
+  log_plot = TRUE,
+  curve_expr = quote(1* exp(-1.1 * x^1.5))
+)
+# here the optimal fit does not work too, probably other model
+calc_conv_rate(out$K[1:9], out$sv[1:9])
+
+# Compare of parameters of Normal distribution ----------------------------------
+#
+out_sd0_5 <- smallest_sv_sequence(
  a = c(0.5),
  n = 400,
  maxK = 20,
@@ -24,16 +108,257 @@ smallest_sv_sequence(
  guard=1e-12,
  plot=TRUE,
  log_plot = TRUE,
-  curve_expr = quote(20 / exp(x^1.32))
+  fv = function(x) {dnorm(x, mean=0, sd=0.5)},
+  Fv = function(x) {pnorm(x, mean=0, sd=0.5)},
+  main_title="Smallest SV of v~ N(0,0.5^2) distribution"
 )

-# Uniform distributed X ~ U[0,1] and v ~ N(0,1) --------------------------------
-smallest_sv_sequence(
+out_sd1 <- smallest_sv_sequence(
  a = c(0.5),
  n = 400,
  maxK = 20,
-  sampler_fn =function(n) matrix(runif(n), ncol = 1L),
+  sampler_fn =function(n) matrix(rnorm(n), ncol = 1L),
  guard=1e-12,
  plot=TRUE,
-  curve_expr = quote(20 / x^2)
-)
+  log_plot = TRUE,
+  fv = function(x) {dnorm(x, mean=0, sd=1)},
+  Fv = function(x) {pnorm(x, mean=0, sd=1)},
+  main_title="Smallest SV of v~ N(0,1) distribution"
+)
+
+out_sd2 <- smallest_sv_sequence(
+  a = c(0.5),
+  n = 400,
+  maxK = 20,
+  sampler_fn =function(n) matrix(rnorm(n), ncol = 1L),
+  guard=1e-12,
+  plot=TRUE,
+  log_plot = TRUE,
+  fv = function(x) {dnorm(x, mean=0, sd=2)},
+  Fv = function(x) {pnorm(x, mean=0, sd=2)},
+  main_title="Smallest SV of v~ N(0,2^2) distribution"
+)
+
+out_sd4 <- smallest_sv_sequence(
+  a = c(0.5),
+  n = 400,
+  maxK = 20,
+  sampler_fn =function(n) matrix(rnorm(n), ncol = 1L),
+  guard=1e-12,
+  plot=TRUE,
+  log_plot = TRUE,
+  fv = function(x) {dnorm(x, mean=0, sd=4)},
+  Fv = function(x) {pnorm(x, mean=0, sd=4)},
+  main_title="Smallest SV of v~ N(0,4^2) distribution"
+)
+
+par(mar = c(5, 4, 4, 8)) 
+plot(out_sd0_5$K, out_sd0_5$sv,
+     type = "b",
+     pch = 19,
+     col = "#D3BA68FF",
+     xlab = "K subdivisions",
+     ylab = "Smallest singular value of Q",
+     main="smallest SV for different variances of a normal distribution",
+     sub = "n = 400, a = 0.5",
+     log="y")
+lines(out_sd1$K, out_sd1$sv,
+     type="b", pch=19, col="#D5695DFF", add=TRUE)
+lines(out_sd2$K, out_sd2$sv,
+      type = "b", pch=19, col="#5D8CA8FF", add=TRUE)
+lines(out_sd4$K, out_sd4$sv,
+      type = "b", pch=19, col="#65A479FF", add=TRUE)
+par(xpd = TRUE)
+legend("topright",
+       inset=c(-0.2,0), 
+       legend=c("sd=0.5", "sd=1", "sd=2", "sd=4"), 
+       col=c("#D3BA68FF", "#D5695DFF","#5D8CA8FF", "#65A479FF" ), 
+       title="Legend",
+       pch = 16,
+       bty = "n")
+
+# Break phenomena of Exp-distribution ------------------------------------------
+out_exp <- smallest_sv_sequence(
+  a = c(0.5),
+  n = 400,
+  maxK = 20,
+  sampler_fn =function(n) matrix(rnorm(n), ncol = 1L),
+  guard=1e-12,
+  plot=TRUE,
+  log_plot = TRUE,
+  fv = function(x) {dexp(x, rate=1)},
+  Fv = function(x) {pexp(x, rate=1)},
+  main_title="Smallest SV of v~ Exp(1) distribution"
+)
+
+par(resetPar)
+plot(out_exp$K, out_exp$sv,
+     log="y",
+     xlab="K subdivsions",
+     ylab="Smallest singular value of Q",
+     col="steelblue",
+     type="b",
+     main="Smallest singular value for v ~ Exp(1)",
+     sub="a = 0.5, n = 400")
+arrows(8, 1e-5, 6.5, 1e-7, angle=20, lty = 1, lwd=2)
+text(8.5, 1e-5, "Break only seen for exp-distribution", pos=4)
+
+## Observations of the break point depending the rate --------------------------
+#
+# Note: this also depends on the the parameter n of samples
+out_exp_1 <- smallest_sv_sequence(
+  a = c(0.5),
+  n = 400,
+  maxK = 80,
+  sampler_fn =function(n) matrix(rnorm(n), ncol = 1L),
+  guard=1e-12,
+  plot=FALSE,
+  fv = function(x) {dexp(x, rate=1)},
+  Fv = function(x) {pexp(x, rate=1)}
+)
+
+out_exp_2 <- smallest_sv_sequence(
+  a = c(0.5),
+  n = 400,
+  maxK = 80,
+  sampler_fn =function(n) matrix(rnorm(n), ncol = 1L),
+  guard=1e-12,
+  plot=FALSE,
+  fv = function(x) {dexp(x, rate=2)},
+  Fv = function(x) {pexp(x, rate=2)}
+)
+
+out_exp_3 <- smallest_sv_sequence(
+  a = c(0.5),
+  n = 400,
+  maxK = 80,
+  sampler_fn =function(n) matrix(rnorm(n), ncol = 1L),
+  guard=1e-12,
+  plot=FALSE,
+  fv = function(x) {dexp(x, rate=3)},
+  Fv = function(x) {pexp(x, rate=3)}
+)
+
+out_exp_4 <- smallest_sv_sequence(
+  a = c(0.5),
+  n = 400,
+  maxK = 80,
+  sampler_fn =function(n) matrix(rnorm(n), ncol = 1L),
+  guard=1e-12,
+  plot=FALSE,
+  fv = function(x) {dexp(x, rate=4)},
+  Fv = function(x) {pexp(x, rate=4)}
+)
+
+par(mar = c(5, 4, 4, 8)) 
+plot(out_exp_1$K, out_exp_1$sv,
+     type="b", col="#D3BA68FF", log="y",
+     main="Smallest SV of Q for different rates of Exp-distribution",
+     ylab="Smallest singular value of Q",
+     xlab="K subdivisions",
+     sub="a = 0.5, n = 400, depending also on n")
+lines(out_exp_2$K, out_exp_2$sv, type="b", col="#D5695DFF")
+lines(out_exp_3$K, out_exp_3$sv, type="b", col="#5D8CA8FF")
+lines(out_exp_4$K, out_exp_4$sv, type="b", col="#65A479FF")
+par(xpd=TRUE)
+legend("topright",
+       inset=c(-0.2,0), 
+       legend=c(expression(lambda == 1), expression(lambda == 2), expression(lambda == 3), expression(lambda == 4)),
+       col=c("#D3BA68FF", "#D5695DFF","#5D8CA8FF", "#65A479FF" ), 
+       title="Rate",
+       pch = 16,
+       bty = "n")
+
+
+# Use of the gamma distribution ------------------------------------------------
+# Fix the parameters, such that the mean stays the same and the variance is
+# changing.
+# From the documentation
+# Note that for smallish values of shape (and moderate scale) a large parts of 
+# the mass of the Gamma distribution is on values of x
+# so near zero that they will be represented as zero in computer arithmetic.
+# So rgamma may well return values which will be represented as zero. 
+# (This will also happen for very large values of scale since the actual generation is done for scale = 1.)
+#
+# Take E(X) = 5, so sigma = 5 / alpha, and with this we have
+# Var(X) = sigma^2 * alpha = 25 / alpha. 
+# -> Increasing alpha yields lower variance
+
+# alpha = 1
+alpha <- 1.0
+out_gamma_1 <- smallest_sv_sequence(
+  a = c(0.5),
+  n = 400,
+  maxK = 20,
+  sampler_fn =function(n) matrix(rnorm(n), ncol = 1L),
+  guard=1e-12,
+  plot=TRUE,
+  log_plot = TRUE,
+  fv = function(x) {dgamma(x, shape = alpha, rate = 5 / alpha)},
+  Fv = function(x) {pexp(x, rate=1)},
+  main_title="Smallest SV of v~ Gamma(1, 5) distribution"
+)
+
+# alpha = 2
+alpha <- 2.0
+out_gamma_2 <- smallest_sv_sequence(
+  a = c(0.5),
+  n = 400,
+  maxK = 20,
+  sampler_fn =function(n) matrix(rnorm(n), ncol = 1L),
+  guard=1e-12,
+  plot=TRUE,
+  log_plot = TRUE,
+  fv = function(x) {dgamma(x, shape = alpha, rate = 5 / alpha)},
+  Fv = function(x) {pexp(x, rate=1)},
+  main_title="Smallest SV of v~ Gamma(2, 2.5) distribution"
+)
+
+# alpha = 3
+alpha <- 3.0
+out_gamma_3 <- smallest_sv_sequence(
+  a = c(0.5),
+  n = 400,
+  maxK = 20,
+  sampler_fn =function(n) matrix(rnorm(n), ncol = 1L),
+  guard=1e-12,
+  plot=TRUE,
+  log_plot = TRUE,
+  fv = function(x) {dgamma(x, shape = alpha, rate = 5 / alpha)},
+  Fv = function(x) {pexp(x, rate=1)},
+  main_title="Smallest SV of v~ Gamma(3, 5/3) distribution"
+)
+
+# alpha = 4
+alpha <- 4.0
+out_gamma_4 <- smallest_sv_sequence(
+  a = c(0.5),
+  n = 400,
+  maxK = 20,
+  sampler_fn =function(n) matrix(rnorm(n), ncol = 1L),
+  guard=1e-12,
+  plot=TRUE,
+  log_plot = TRUE,
+  fv = function(x) {dgamma(x, shape = alpha, rate = 5 / alpha)},
+  Fv = function(x) {pexp(x, rate=1)},
+  main_title="Smallest SV of v~ Gamma(3, 5/3) distribution"
+)
+
+par(mar = c(5, 4, 4, 8)) 
+plot(out_gamma_1$K, out_gamma_1$sv,
+     type="b", col="#D3BA68FF", log="y",
+     main="Smallest SV of Q for variance of the Gamma distribution",
+     ylab="Smallest singular value of Q",
+     xlab="K subdivisions",
+     sub="a = 0.5, n = 400")
+lines(out_gamma_2$K, out_gamma_2$sv, type="b", col="#D5695DFF")
+lines(out_gamma_3$K, out_gamma_3$sv, type="b", col="#5D8CA8FF")
+lines(out_gamma_4$K, out_gamma_4$sv, type="b", col="#65A479FF")
+par(xpd=TRUE)
+legend("topright",
+       inset=c(-0.2,0), 
+       legend=c(expression(alpha == 1), expression(alpha == 2), expression(alpha == 3), expression(alpha == 4)),
+       col=c("#D3BA68FF", "#D5695DFF","#5D8CA8FF", "#65A479FF" ), 
+       title="Rate",
+       pch = 16,
+       bty = "n")