From 9db48a9a33e78541f6212b6b0eee483df30ffb0c Mon Sep 17 00:00:00 2001 From: Niclas Date: Wed, 11 Feb 2026 19:00:56 +0100 Subject: [PATCH] experiments with the variance --- R/singular_value_plot.R | 32 ++-- R/singular_values.R | 3 + scripts/plot_densities.R | 6 +- scripts/plot_sv.R | 347 +++++++++++++++++++++++++++++++++++++-- 4 files changed, 364 insertions(+), 24 deletions(-) diff --git a/R/singular_value_plot.R b/R/singular_value_plot.R index ae26838..706ceae 100644 --- a/R/singular_value_plot.R +++ b/R/singular_value_plot.R @@ -59,6 +59,7 @@ expr_to_label <- function(expr) { #' @param curve_col Colour of the reference curve (default = `"red"`). #' @param curve_lwd Line width of the reference curve (default = 2). #' @param log_plot If True, then the y-axis is on a log scale. +#' @param main_title Main title for the plot #' @return A list with the following components #' \item{K}{Integer vector `1:maxK`.} #' \item{sv}{Numeric vector of the smallest singular values for each `K`.} @@ -98,7 +99,8 @@ smallest_sv_sequence <- function( curve_to = NULL, curve_col = "red", curve_lwd = 2, - log_plot = FALSE + log_plot = FALSE, + main_title = "Smallest singular value vs. K" ) { ## 1. Input validation ======================================================= if (!is.numeric(a) || length(a) == 0) { @@ -129,6 +131,9 @@ smallest_sv_sequence <- function( if (!inherits(curve_expr, "call") && !is.character(curve_expr)) { stop("`curve_expr` must be a call (e.g., quote(20/sqrt(x))) or a character string.") } + if (!is.character(main_title)){ + stop("`main_title` must be a character vector.") + } ## 2. Prepare storage ======================================================== K_vec <- seq_len(maxK) @@ -147,6 +152,8 @@ smallest_sv_sequence <- function( guard = guard ) + Q <- 1 /sqrt(n) * Q + sv_res <- compute_minmax_sv(Q) if (!is.list(sv_res) || is.null(sv_res$smallest_singular_value)) { stop("`compute_minmax_sv()` must return a list containing `$smallest_singular_value`.") @@ -157,6 +164,7 @@ smallest_sv_sequence <- function( ## 4. Plotting (optional) ==================================================== if (plot) { ## Basic scatter/line plot of the singular values + par(mar = c(5, 4, 4, 8)) # extra space on the right for the legend plot_args <- list( x = K_vec, y = smallest_sv, @@ -165,17 +173,21 @@ smallest_sv_sequence <- function( col = "steelblue", xlab = "K subdivisions", ylab = "Smallest singular value of Q", - main = "Smallest singular value vs. K" + main = main_title ) if (log_plot) plot_args$log <- "y" do.call(graphics::plot, plot_args) - # graphics::plot( - # K_vec, smallest_sv, - # type = "b", pch = 19, col = "steelblue", - # xlab = "K subdivisions", ylab = "Smallest singular value of Q", - # main = "Smallest singular value vs. K" - # ) + # add legend. The par(xpd = ...) allows drawing outside of the plot region. + par(xpd = TRUE) + legend("topright", + inset=c(-0.2,0), + legend=c("SV of Q"), + col="steelblue", + title="Legend", + pch = 16, + bty = "n") + par(xpd = FALSE) ## Add the reference curve if requested if (add_curve) { ## Determine sensible defaults for the curve limits @@ -197,8 +209,8 @@ smallest_sv_sequence <- function( # add label with the curve expression label_txt <- expr_to_label(curve_expr) - x_pos <- curve_from + 0.9 * (curve_to - curve_from) - y_pos <- 0.9 * max(smallest_sv) + x_pos <- curve_from + 0.8 * (curve_to - curve_from) + y_pos <- 0.85 * max(smallest_sv) graphics::text( x = x_pos, y = y_pos, labels = label_txt, diff --git a/R/singular_values.R b/R/singular_values.R index 8bf3ec3..9e38ab9 100644 --- a/R/singular_values.R +++ b/R/singular_values.R @@ -192,6 +192,9 @@ compute_matrix <- function( #' @export compute_minmax_sv <- function(M) { s <- svd(M, nu=0, nv=0)$d + + # just a check if we compute the right thing + # s <- sqrt(eigen(M %*% t(M), symmetric = TRUE, only.value=TRUE)$values) list( largest_singular_value = max(s), diff --git a/scripts/plot_densities.R b/scripts/plot_densities.R index 8607982..bd7b461 100644 --- a/scripts/plot_densities.R +++ b/scripts/plot_densities.R @@ -50,14 +50,14 @@ title( ) ## 3.3 Plot one dimensional X_i ================================================ -p3 <- create_cond_density(a1, dnorm, pnorm, X1) -givenX <- c(-0.5) +p3 <- create_cond_density(2, dnorm, pnorm, X1) +givenX <- c(-3.0) p3_plot <- \(x) p3(x, givenX) plot.function(p3_plot, xlab="u", ylab="p(u |x) ") title(main=expression("Conditional density for" ~ v %~% N(0, 1)), line=2) title( main = bquote( - a == (.(paste(a1, collapse = ", "))) ~ "," ~ + a == (.(paste(c(2), collapse = ", "))) ~ "," ~ N == .(N) ~ "," ~ x == (.(paste(givenX, collapse = ", "))) ), diff --git a/scripts/plot_sv.R b/scripts/plot_sv.R index 741ae8f..526d7c6 100644 --- a/scripts/plot_sv.R +++ b/scripts/plot_sv.R @@ -3,20 +3,104 @@ source(here::here("R", "singular_values.R")) source(here::here("R", "graphon_distribution.R")) source(here::here("R", "singular_value_plot.R")) +# https://stackoverflow.com/a/5790430 +resetPar <- function() { + dev.new() + op <- par(no.readonly = TRUE) + dev.off() + op +} + +calc_conv_rate <- function(x,y) { + if (!is.numeric(x) || length(x) == 0) { + stop("`x` must be a non‑empty numeric vector.") + } + if (!is.numeric(y) || length(y) == 0) { + stop("`y` must be a non‑empty numeric vector.") + } + if (length(x) != length(y)) { + stop("`x` and `y` must have the same length.") + } + + df <- data.frame("x" = x, "y" = y) + lm_model <- lm(log(y) ~ log(x), data=df) + C <- exp(coefficients(lm_model)[[1]]) + alpha <- coefficients(lm_model)[[2]] + + df[, "y_pred"] <- C * df[, "x"]^alpha + df[, "residual"] <- df[, "y"] - df[, "y_pred"] + + out <- list( + "C" = C, + "alpha" = alpha, + "obs" = df + ) + out +} + +calc_exp_conv_rate <- function(x,y) { + if (!is.numeric(x) || length(x) == 0) { + stop("`x` must be a non‑empty numeric vector.") + } + if (!is.numeric(y) || length(y) == 0) { + stop("`y` must be a non‑empty numeric vector.") + } + if (length(x) != length(y)) { + stop("`x` and `y` must have the same length.") + } + + df <- data.frame("x" = x, "y" = y) + fit <- nls(y ~ C * exp(r * x^m), + start = list(C = min(y), r= -0.5, m = 1)) +} + # Nearly match with sample function -------------------------------------------- # v ~ N(0,1) and X ~ discrete Uniform on [1:n] -smallest_sv_sequence( +out <- smallest_sv_sequence( a = c(0.5), - n = 400, - maxK= 20, + n = 9, + maxK= 3, sampler_fn = sample, guard=1e-12, plot=TRUE, - curve_expr = quote(20 / sqrt(x)) + log_plot = TRUE, + curve_expr = quote(1 / x^0.545) ) +conv_rate <- calc_conv_rate(out$K, out$sv) # Normally distributed X ~ N(0,1) and v ~ N(0,1) ------------------------------- -smallest_sv_sequence( +out <- smallest_sv_sequence( + a = c(0.5), + n = 1200, + maxK = 20, + sampler_fn =function(n) matrix(rnorm(n), ncol = 1L), + guard=1e-12, + plot=TRUE, + log_plot = TRUE, + curve_expr = quote(1.5 * exp(-0.95 * x^1.34)) + #curve_expr = quote( 1/exp(x^1.32)) +) +# convergence rate does not work here, probably because the underlying model +# does not work well +conv_rate <- calc_conv_rate(out$K[1:20], out$sv[1:20]) + +# Uniform distributed X ~ U[0,1] and v ~ N(0,1) -------------------------------- +out <- smallest_sv_sequence( + a = c(0.5), + n = 400, + maxK = 20, + sampler_fn =function(n) matrix(runif(n), ncol = 1L), + guard=1e-12, + plot=TRUE, + log_plot = TRUE, + curve_expr = quote(1* exp(-1.1 * x^1.5)) +) +# here the optimal fit does not work too, probably other model +calc_conv_rate(out$K[1:9], out$sv[1:9]) + +# Compare of parameters of Normal distribution ---------------------------------- +# +out_sd0_5 <- smallest_sv_sequence( a = c(0.5), n = 400, maxK = 20, @@ -24,16 +108,257 @@ smallest_sv_sequence( guard=1e-12, plot=TRUE, log_plot = TRUE, - curve_expr = quote(20 / exp(x^1.32)) + fv = function(x) {dnorm(x, mean=0, sd=0.5)}, + Fv = function(x) {pnorm(x, mean=0, sd=0.5)}, + main_title="Smallest SV of v~ N(0,0.5^2) distribution" ) -# Uniform distributed X ~ U[0,1] and v ~ N(0,1) -------------------------------- -smallest_sv_sequence( +out_sd1 <- smallest_sv_sequence( a = c(0.5), n = 400, maxK = 20, - sampler_fn =function(n) matrix(runif(n), ncol = 1L), + sampler_fn =function(n) matrix(rnorm(n), ncol = 1L), guard=1e-12, plot=TRUE, - curve_expr = quote(20 / x^2) -) \ No newline at end of file + log_plot = TRUE, + fv = function(x) {dnorm(x, mean=0, sd=1)}, + Fv = function(x) {pnorm(x, mean=0, sd=1)}, + main_title="Smallest SV of v~ N(0,1) distribution" +) + +out_sd2 <- smallest_sv_sequence( + a = c(0.5), + n = 400, + maxK = 20, + sampler_fn =function(n) matrix(rnorm(n), ncol = 1L), + guard=1e-12, + plot=TRUE, + log_plot = TRUE, + fv = function(x) {dnorm(x, mean=0, sd=2)}, + Fv = function(x) {pnorm(x, mean=0, sd=2)}, + main_title="Smallest SV of v~ N(0,2^2) distribution" +) + +out_sd4 <- smallest_sv_sequence( + a = c(0.5), + n = 400, + maxK = 20, + sampler_fn =function(n) matrix(rnorm(n), ncol = 1L), + guard=1e-12, + plot=TRUE, + log_plot = TRUE, + fv = function(x) {dnorm(x, mean=0, sd=4)}, + Fv = function(x) {pnorm(x, mean=0, sd=4)}, + main_title="Smallest SV of v~ N(0,4^2) distribution" +) + +par(mar = c(5, 4, 4, 8)) +plot(out_sd0_5$K, out_sd0_5$sv, + type = "b", + pch = 19, + col = "#D3BA68FF", + xlab = "K subdivisions", + ylab = "Smallest singular value of Q", + main="smallest SV for different variances of a normal distribution", + sub = "n = 400, a = 0.5", + log="y") +lines(out_sd1$K, out_sd1$sv, + type="b", pch=19, col="#D5695DFF", add=TRUE) +lines(out_sd2$K, out_sd2$sv, + type = "b", pch=19, col="#5D8CA8FF", add=TRUE) +lines(out_sd4$K, out_sd4$sv, + type = "b", pch=19, col="#65A479FF", add=TRUE) +par(xpd = TRUE) +legend("topright", + inset=c(-0.2,0), + legend=c("sd=0.5", "sd=1", "sd=2", "sd=4"), + col=c("#D3BA68FF", "#D5695DFF","#5D8CA8FF", "#65A479FF" ), + title="Legend", + pch = 16, + bty = "n") + +# Break phenomena of Exp-distribution ------------------------------------------ +out_exp <- smallest_sv_sequence( + a = c(0.5), + n = 400, + maxK = 20, + sampler_fn =function(n) matrix(rnorm(n), ncol = 1L), + guard=1e-12, + plot=TRUE, + log_plot = TRUE, + fv = function(x) {dexp(x, rate=1)}, + Fv = function(x) {pexp(x, rate=1)}, + main_title="Smallest SV of v~ Exp(1) distribution" +) + +par(resetPar) +plot(out_exp$K, out_exp$sv, + log="y", + xlab="K subdivsions", + ylab="Smallest singular value of Q", + col="steelblue", + type="b", + main="Smallest singular value for v ~ Exp(1)", + sub="a = 0.5, n = 400") +arrows(8, 1e-5, 6.5, 1e-7, angle=20, lty = 1, lwd=2) +text(8.5, 1e-5, "Break only seen for exp-distribution", pos=4) + +## Observations of the break point depending the rate -------------------------- +# +# Note: this also depends on the the parameter n of samples +out_exp_1 <- smallest_sv_sequence( + a = c(0.5), + n = 400, + maxK = 80, + sampler_fn =function(n) matrix(rnorm(n), ncol = 1L), + guard=1e-12, + plot=FALSE, + fv = function(x) {dexp(x, rate=1)}, + Fv = function(x) {pexp(x, rate=1)} +) + +out_exp_2 <- smallest_sv_sequence( + a = c(0.5), + n = 400, + maxK = 80, + sampler_fn =function(n) matrix(rnorm(n), ncol = 1L), + guard=1e-12, + plot=FALSE, + fv = function(x) {dexp(x, rate=2)}, + Fv = function(x) {pexp(x, rate=2)} +) + +out_exp_3 <- smallest_sv_sequence( + a = c(0.5), + n = 400, + maxK = 80, + sampler_fn =function(n) matrix(rnorm(n), ncol = 1L), + guard=1e-12, + plot=FALSE, + fv = function(x) {dexp(x, rate=3)}, + Fv = function(x) {pexp(x, rate=3)} +) + +out_exp_4 <- smallest_sv_sequence( + a = c(0.5), + n = 400, + maxK = 80, + sampler_fn =function(n) matrix(rnorm(n), ncol = 1L), + guard=1e-12, + plot=FALSE, + fv = function(x) {dexp(x, rate=4)}, + Fv = function(x) {pexp(x, rate=4)} +) + +par(mar = c(5, 4, 4, 8)) +plot(out_exp_1$K, out_exp_1$sv, + type="b", col="#D3BA68FF", log="y", + main="Smallest SV of Q for different rates of Exp-distribution", + ylab="Smallest singular value of Q", + xlab="K subdivisions", + sub="a = 0.5, n = 400, depending also on n") +lines(out_exp_2$K, out_exp_2$sv, type="b", col="#D5695DFF") +lines(out_exp_3$K, out_exp_3$sv, type="b", col="#5D8CA8FF") +lines(out_exp_4$K, out_exp_4$sv, type="b", col="#65A479FF") +par(xpd=TRUE) +legend("topright", + inset=c(-0.2,0), + legend=c(expression(lambda == 1), expression(lambda == 2), expression(lambda == 3), expression(lambda == 4)), + col=c("#D3BA68FF", "#D5695DFF","#5D8CA8FF", "#65A479FF" ), + title="Rate", + pch = 16, + bty = "n") + + +# Use of the gamma distribution ------------------------------------------------ +# Fix the parameters, such that the mean stays the same and the variance is +# changing. +# From the documentation +# Note that for smallish values of shape (and moderate scale) a large parts of +# the mass of the Gamma distribution is on values of x +# so near zero that they will be represented as zero in computer arithmetic. +# So rgamma may well return values which will be represented as zero. +# (This will also happen for very large values of scale since the actual generation is done for scale = 1.) +# +# Take E(X) = 5, so sigma = 5 / alpha, and with this we have +# Var(X) = sigma^2 * alpha = 25 / alpha. +# -> Increasing alpha yields lower variance + +# alpha = 1 +alpha <- 1.0 +out_gamma_1 <- smallest_sv_sequence( + a = c(0.5), + n = 400, + maxK = 20, + sampler_fn =function(n) matrix(rnorm(n), ncol = 1L), + guard=1e-12, + plot=TRUE, + log_plot = TRUE, + fv = function(x) {dgamma(x, shape = alpha, rate = 5 / alpha)}, + Fv = function(x) {pexp(x, rate=1)}, + main_title="Smallest SV of v~ Gamma(1, 5) distribution" +) + +# alpha = 2 +alpha <- 2.0 +out_gamma_2 <- smallest_sv_sequence( + a = c(0.5), + n = 400, + maxK = 20, + sampler_fn =function(n) matrix(rnorm(n), ncol = 1L), + guard=1e-12, + plot=TRUE, + log_plot = TRUE, + fv = function(x) {dgamma(x, shape = alpha, rate = 5 / alpha)}, + Fv = function(x) {pexp(x, rate=1)}, + main_title="Smallest SV of v~ Gamma(2, 2.5) distribution" +) + +# alpha = 3 +alpha <- 3.0 +out_gamma_3 <- smallest_sv_sequence( + a = c(0.5), + n = 400, + maxK = 20, + sampler_fn =function(n) matrix(rnorm(n), ncol = 1L), + guard=1e-12, + plot=TRUE, + log_plot = TRUE, + fv = function(x) {dgamma(x, shape = alpha, rate = 5 / alpha)}, + Fv = function(x) {pexp(x, rate=1)}, + main_title="Smallest SV of v~ Gamma(3, 5/3) distribution" +) + +# alpha = 4 +alpha <- 4.0 +out_gamma_4 <- smallest_sv_sequence( + a = c(0.5), + n = 400, + maxK = 20, + sampler_fn =function(n) matrix(rnorm(n), ncol = 1L), + guard=1e-12, + plot=TRUE, + log_plot = TRUE, + fv = function(x) {dgamma(x, shape = alpha, rate = 5 / alpha)}, + Fv = function(x) {pexp(x, rate=1)}, + main_title="Smallest SV of v~ Gamma(3, 5/3) distribution" +) + +par(mar = c(5, 4, 4, 8)) +plot(out_gamma_1$K, out_gamma_1$sv, + type="b", col="#D3BA68FF", log="y", + main="Smallest SV of Q for variance of the Gamma distribution", + ylab="Smallest singular value of Q", + xlab="K subdivisions", + sub="a = 0.5, n = 400") +lines(out_gamma_2$K, out_gamma_2$sv, type="b", col="#D5695DFF") +lines(out_gamma_3$K, out_gamma_3$sv, type="b", col="#5D8CA8FF") +lines(out_gamma_4$K, out_gamma_4$sv, type="b", col="#65A479FF") +par(xpd=TRUE) +legend("topright", + inset=c(-0.2,0), + legend=c(expression(alpha == 1), expression(alpha == 2), expression(alpha == 3), expression(alpha == 4)), + col=c("#D3BA68FF", "#D5695DFF","#5D8CA8FF", "#65A479FF" ), + title="Rate", + pch = 16, + bty = "n")