From a8d7924e82cfd4fd4cc5f93ed577b63738af4308 Mon Sep 17 00:00:00 2001 From: Niclas Date: Tue, 31 Mar 2026 15:32:06 +0200 Subject: [PATCH] add plots for nonnormal X --- scripts/plot_non_normal_X.qmd | 350 ++++++++++++++++++++++++++++++++++ 1 file changed, 350 insertions(+) create mode 100644 scripts/plot_non_normal_X.qmd diff --git a/scripts/plot_non_normal_X.qmd b/scripts/plot_non_normal_X.qmd new file mode 100644 index 0000000..2303636 --- /dev/null +++ b/scripts/plot_non_normal_X.qmd @@ -0,0 +1,350 @@ +--- +title: "plots of a dependence" +author: "Niclas" +format: html +editor: visual +execute: + echo: true + working-directory: ../ +--- + +```{r loading libraries} +#| cache: true +#| echo: false +#| collapse: true +# load local files +source(here::here("R", "singular_values.R")) +source(here::here("R", "graphon_distribution.R")) +source(here::here("R","singular_value_plot.R")) + +# load libaries for data handling +library(ggplot2) +library(dplyr) +library(latex2exp) + +``` + +## Setup +We consider the matrix $QQ^\top$ and look at the smallest eigenvalue, i.e. the +smallest non-zero singular value of $Q$. + +The matrix $Q$ is given by +$$ +Q_{ik} = \int_{\frac{k}{K}}^{\frac{k+1}{K}} p_a(u| X_i) \, du +$$ +with +$$ +p_a(u|X) = \frac{f_v(F_a^{-1}(u) - a^\top X)}{f_a(F_a^{-1}(u))} +$$ +In this document we plot different the smallest eigenvalue in dependence of the +parameter $a$ with different "ratios" of the parameters $n$ and +$$ +k = \lfloor n^\alpha \rfloor +$$ + +with $\alpha = 0.1, 0.2, \dots 0.5$. The data matrix $X$ is a random matrix with +i.i.d. distributed entries. We consider $x_{ij} \sim U[0,1]$ and $x_{ij} \sim Exp(\lambda)$. + +## Exponential distribution +```{r k = n^alpha data generation, rate = 1} +#| cache: true +#| echo: false +#| collapse: true +ns <- seq(100, 1000, 100) +as <- seq(0, 20, 2) +alphas <- seq(0.1, 0.5, 0.1) + +set.seed(100) +results <- data.frame(dim_n = integer(), + dim_k = integer(), + param_a = double(), + param_alpha = double(), + ssv = double()) +for (a in as) { + for (i in 1:length(ns)) { + for (j in 1:length(alphas)) { + n <- ns[i] + K <- floor(n^alphas[j]) + if (!K > 0) next # skip if K is equal to zero + # use the default seed 1L + Q <- compute_matrix(seed=1L, + a= a, + n = n, + K = K, + sample_X_fn = function(n) {matrix(rexp(n), ncol = 1L)}, + fv = function(x) {dnorm(x, mean=0, sd=1)}, + Fv = function(x) {pnorm(x, mean=0, sd=1)}, + guard = 1e-12) + + ssv <- compute_minmax_sv(Q)[["smallest_singular_value"]] + + current_res <- data.frame(dim_n = n, dim_k = K, param_a = a, param_alpha=alphas[j], ssv =ssv) + results <- rbind(results, current_res) + } + } +} +``` + +```{r k = n^alpha plotting, rate = 1} +results |> + filter(param_a %in% c(0, 10, 20)) |> + mutate(param_a = as.factor(param_a), + param_alpha = as.factor(param_alpha)) |> + group_by(param_a, param_alpha) |> + ggplot(aes(dim_n, ssv, col=param_a, shape=param_alpha, interaction(param_a, param_alpha))) + + geom_point(size=1.5) + + geom_line() + + geom_function(fun = function(x) {sqrt(x)}, colour="black") + + #scale_y_log10() + + theme_bw() + + labs(x=latex2exp::TeX("$n$"), + y=latex2exp::TeX("Smallest singular value of $Q$"), + title=latex2exp::TeX("Smallest singular value of $Q$ with respect to $a$."), + subtitle = latex2exp::TeX(("Hyperparameter $k = n^{\\alpha}$. Black line is $\\sqrt{n}$, and $X \\sim Exp(1)$")), + colour=latex2exp::TeX("$a$"), + shape=latex2exp::TeX("$\\alpha$")) +``` + + +```{r k = n^alpha data generation, rate = 3} +#| cache: true +#| echo: false +#| collapse: true +ns <- seq(100, 1000, 100) +as <- seq(0, 20, 2) +alphas <- seq(0.1, 0.5, 0.1) + +set.seed(100) +results <- data.frame(dim_n = integer(), + dim_k = integer(), + param_a = double(), + param_alpha = double(), + ssv = double()) +for (a in as) { + for (i in 1:length(ns)) { + for (j in 1:length(alphas)) { + n <- ns[i] + K <- floor(n^alphas[j]) + if (!K > 0) next # skip if K is equal to zero + # use the default seed 1L + Q <- compute_matrix(seed=1L, + a= a, + n = n, + K = K, + sample_X_fn = function(n) {matrix(rexp(n, rate=3), ncol = 1L)}, + fv = function(x) {dnorm(x, mean=0, sd=1)}, + Fv = function(x) {pnorm(x, mean=0, sd=1)}, + guard = 1e-12) + + ssv <- compute_minmax_sv(Q)[["smallest_singular_value"]] + + current_res <- data.frame(dim_n = n, dim_k = K, param_a = a, param_alpha=alphas[j], ssv =ssv) + results <- rbind(results, current_res) + } + } +} +``` + +```{r k = n^alpha plotting, rate = 3} +results |> + filter(param_a %in% c(0, 10, 20)) |> + mutate(param_a = as.factor(param_a), + param_alpha = as.factor(param_alpha)) |> + group_by(param_a, param_alpha) |> + ggplot(aes(dim_n, ssv, col=param_a, shape=param_alpha, interaction(param_a, param_alpha))) + + geom_point(size=1.5) + + geom_line() + + geom_function(fun = function(x) {sqrt(x)}, colour="black") + + #scale_y_log10() + + theme_bw() + + labs(x=latex2exp::TeX("$n$"), + y=latex2exp::TeX("Smallest singular value of $Q$"), + title=latex2exp::TeX("Smallest singular value of $Q$ with respect to $a$."), + subtitle = latex2exp::TeX(("Hyperparameter $k = n^{\\alpha}$. Black line is $\\sqrt{n}$, and $X \\sim Exp(3))$")), + colour=latex2exp::TeX("$a$"), + shape=latex2exp::TeX("$\\alpha$")) +``` +For $a = 0$ the smallest singular value is very close to zero. + +```{r k = n^alpha data generation, rate = 5} +#| cache: true +#| echo: false +#| collapse: true +ns <- seq(100, 1000, 100) +as <- seq(0, 20, 2) +alphas <- seq(0.1, 0.5, 0.1) + +set.seed(100) +results <- data.frame(dim_n = integer(), + dim_k = integer(), + param_a = double(), + param_alpha = double(), + ssv = double()) +for (a in as) { + for (i in 1:length(ns)) { + for (j in 1:length(alphas)) { + n <- ns[i] + K <- floor(n^alphas[j]) + if (!K > 0) next # skip if K is equal to zero + # use the default seed 1L + Q <- compute_matrix(seed=1L, + a= a, + n = n, + K = K, + sample_X_fn = function(n) {matrix(rexp(n, rate=5), ncol = 1L)}, + fv = function(x) {dnorm(x, mean=0, sd=1)}, + Fv = function(x) {pnorm(x, mean=0, sd=1)}, + guard = 1e-12) + + ssv <- compute_minmax_sv(Q)[["smallest_singular_value"]] + + current_res <- data.frame(dim_n = n, dim_k = K, param_a = a, param_alpha=alphas[j], ssv =ssv) + results <- rbind(results, current_res) + } + } +} +``` + +```{r k = n^alpha plotting, rate = 5} +results |> + filter(param_a %in% c(0, 10, 20)) |> + mutate(param_a = as.factor(param_a), + param_alpha = as.factor(param_alpha)) |> + group_by(param_a, param_alpha) |> + ggplot(aes(dim_n, ssv, col=param_a, shape=param_alpha, interaction(param_a, param_alpha))) + + geom_point(size=1.5) + + geom_line() + + geom_function(fun = function(x) {sqrt(x)}, colour="black") + + #scale_y_log10() + + theme_bw() + + labs(x=latex2exp::TeX("$n$"), + y=latex2exp::TeX("Smallest singular value of $Q$"), + title=latex2exp::TeX("Smallest singular value of $Q$ with respect to $a$."), + subtitle = latex2exp::TeX(("Hyperparameter $k = n^{\\alpha}$. Black line is $\\sqrt{n}$, and $X \\sim Exp(5))$")), + colour=latex2exp::TeX("$a$"), + shape=latex2exp::TeX("$\\alpha$")) +``` +Why is here a perfect match for $\alpha = 0.1$ and $a = 20$ to the square function? +The difference is of the order of $10^{-11}$! + + +## Uniform distribution +```{r k = n^alpha data generation, U[0,1]} +#| cache: true +#| echo: false +#| collapse: true +ns <- seq(100, 1000, 100) +as <- seq(0, 20, 2) +alphas <- seq(0.1, 0.5, 0.1) + +set.seed(100) +results <- data.frame(dim_n = integer(), + dim_k = integer(), + param_a = double(), + param_alpha = double(), + ssv = double()) +for (a in as) { + for (i in 1:length(ns)) { + for (j in 1:length(alphas)) { + n <- ns[i] + K <- floor(n^alphas[j]) + if (!K > 0) next # skip if K is equal to zero + # use the default seed 1L + Q <- compute_matrix(seed=1L, + a= a, + n = n, + K = K, + sample_X_fn = function(n) {matrix(runif(n), ncol = 1L)}, + fv = function(x) {dnorm(x, mean=0, sd=1)}, + Fv = function(x) {pnorm(x, mean=0, sd=1)}, + guard = 1e-12) + + ssv <- compute_minmax_sv(Q)[["smallest_singular_value"]] + + current_res <- data.frame(dim_n = n, dim_k = K, param_a = a, param_alpha=alphas[j], ssv =ssv) + results <- rbind(results, current_res) + } + } +} +``` + +```{r k = n^alpha plotting, U[0,1]} +results |> + filter(param_a %in% c(0, 10, 20)) |> + mutate(param_a = as.factor(param_a), + param_alpha = as.factor(param_alpha)) |> + group_by(param_a, param_alpha) |> + ggplot(aes(dim_n, ssv, col=param_a, shape=param_alpha, interaction(param_a, param_alpha))) + + geom_point(size=1.5) + + geom_line() + + geom_function(fun = function(x) {sqrt(x)}, colour="black") + + #scale_y_log10() + + theme_bw() + + labs(x=latex2exp::TeX("$n$"), + y=latex2exp::TeX("Smallest singular value of $Q$"), + title=latex2exp::TeX("Smallest singular value of $Q$ with respect to $a$."), + subtitle = latex2exp::TeX(("Hyperparameter $k = n^{\\alpha}$. Black line is $\\sqrt{n}$, and $X \\sim U[0,1] $")), + colour=latex2exp::TeX("$a$"), + shape=latex2exp::TeX("$\\alpha$")) +``` +Here we have the same effect for $\alpha = 0.1$ and $a = 20$. + + +```{r k = n^alpha data generation, U[0,2]} +#| cache: true +#| echo: false +#| collapse: true +ns <- seq(100, 1000, 100) +as <- seq(0, 20, 2) +alphas <- seq(0.1, 0.5, 0.1) + +set.seed(100) +results <- data.frame(dim_n = integer(), + dim_k = integer(), + param_a = double(), + param_alpha = double(), + ssv = double()) +for (a in as) { + for (i in 1:length(ns)) { + for (j in 1:length(alphas)) { + n <- ns[i] + K <- floor(n^alphas[j]) + if (!K > 0) next # skip if K is equal to zero + # use the default seed 1L + Q <- compute_matrix(seed=1L, + a= a, + n = n, + K = K, + sample_X_fn = function(n) {matrix(runif(n, min = 0, max=2), ncol = 1L)}, + fv = function(x) {dnorm(x, mean=0, sd=1)}, + Fv = function(x) {pnorm(x, mean=0, sd=1)}, + guard = 1e-12) + + ssv <- compute_minmax_sv(Q)[["smallest_singular_value"]] + + current_res <- data.frame(dim_n = n, dim_k = K, param_a = a, param_alpha=alphas[j], ssv =ssv) + results <- rbind(results, current_res) + } + } +} +``` + +```{r k = n^alpha plotting, U[0,2]} +results |> + filter(param_a %in% c(0, 10, 20)) |> + mutate(param_a = as.factor(param_a), + param_alpha = as.factor(param_alpha)) |> + group_by(param_a, param_alpha) |> + ggplot(aes(dim_n, ssv, col=param_a, shape=param_alpha, interaction(param_a, param_alpha))) + + geom_point(size=1.5) + + geom_line() + + geom_function(fun = function(x) {sqrt(x)}, colour="black") + + #scale_y_log10() + + theme_bw() + + labs(x=latex2exp::TeX("$n$"), + y=latex2exp::TeX("Smallest singular value of $Q$"), + title=latex2exp::TeX("Smallest singular value of $Q$ with respect to $a$."), + subtitle = latex2exp::TeX(("Hyperparameter $k = n^{\\alpha}$. Black line is $\\sqrt{n}$, and $X \\sim U[0,2] $")), + colour=latex2exp::TeX("$a$"), + shape=latex2exp::TeX("$\\alpha$")) +```