create plots with a dependence

2026-03-11 14:14:10 +01:00
parent 14b4425570
commit 76f982069e
3 changed files with 269 additions and 1 deletions
--- a/scripts/.gitignore
+++ b/scripts/.gitignore
@@ -1,2 +1,3 @@
 *.html
 *plots_dimensions_files/
+*plots_a_dependence_files/
--- a/scripts/plots_a_dependence.qmd
+++ b/scripts/plots_a_dependence.qmd
@@ -0,0 +1,244 @@
+---
+title: "plots of a dependence"
+author: "Niclas"
+format: html
+editor: visual
+execute:
+  echo: true
+  working-directory: ../
+---
+
+# Plots of the dimensions
+
+## Setup 
+We consider the matrix $QQ^\top$ and look at the smallest eigenvalue, i.e. the
+smallest non-zero singular value of $Q$.
+
+The matrix $Q$ is given by
+$$
+Q_{ik} = \int_{\frac{k}{K}}^{\frac{k+1}{K}} p_a(u| X_i) \, du
+$$
+with
+$$
+p_a(u|X) = \frac{f_v(F_a^{-1}(u) - a^\top X)}{f_a(F_a^{-1}(u))}
+$$
+In this document we plot different the smallest eigenvalue in dependence of the
+parameter $a$ with different "ratios" of the parameters $n$ and $k$.
+
+```{r loading libraries}
+#| cache: true
+#| echo: false
+#| collapse: true
+# load local files
+source(here::here("R", "singular_values.R"))
+source(here::here("R", "graphon_distribution.R"))
+source(here::here("R","singular_value_plot.R"))
+
+# load libaries for data handling
+library(ggplot2)
+library(dplyr)
+library(latex2exp)
+
+```
+
+
+## Hyperparameter with n vs. k = log(n)
+```{r n / log(n) hyperparameters data generation}
+#| cache: true
+#| echo: false
+#| collapse: true
+ns <- seq(100, 1000, 100)
+Ks <- floor(log(ns))
+as <- seq(0, 20, 2)
+
+set.seed(100)
+results <- data.frame(dim_n = integer(),
+                      dim_k = integer(),
+                      param_a = double(),
+                      ssv = double())
+for (a in as) {
+  for (i in 1:length(ns)) {
+    n <- ns[i]
+    K <- Ks[i]
+    # use the default seed 1L
+    out <- smallest_sv_sequence(
+      a = a,
+      n = n,
+      maxK = K,
+      sampler_fn =function(n) matrix(rnorm(n), ncol = 1L),
+      guard=1e-12,
+      plot=FALSE,
+      fv = function(x) {dnorm(x, mean=0, sd=1)},
+      Fv = function(x) {pnorm(x, mean=0, sd=1)}
+      )
+    
+    current_res <- data.frame(dim_n = rep(n, K), dim_k = out$K, param_a = rep(a, K), ssv = out$sv)
+    results <- rbind(results, current_res)
+  }
+}
+```
+
+```{r hyperparameter n vs log(n) plotting}
+results |>
+  mutate(dim_n = as.factor(dim_n)) |>
+  group_by(dim_n) |>
+  filter(dim_k == max(dim_k) & param_a > 0) |>
+  ggplot(aes(param_a, ssv, col=dim_n)) +
+  geom_point(size=1.5) +
+  geom_line() +
+  #scale_y_log10() +
+  theme_bw() +
+  labs(x=latex2exp::TeX("$a$"),
+       y=latex2exp::TeX("Smallest singular value of $Q$"),
+       title=latex2exp::TeX("Smallest singular value of $Q$ with respect to $a$."),
+       subtitle = latex2exp::TeX(("Hyperparameter $k = \\lfloor\\log(n) \\rfloor$")),
+       colour=latex2exp::TeX("$n$"),
+       shape=latex2exp::TeX("$a$"))
+```
+Here we have relatively large values of the smallest singular value (ssv) of $Q$ since
+the values for $k$ are relatively small. We have already observed, that with 
+larger $k$ the ssv shrinks rapidly towards zero. However the largest value for $k$
+is at most $k = `r floor(log(1000))`$ for $n = 1000$. 
+We omitted the value $a = 0$, as this results in a ssv of $10^{-61}$. 
+Note that this is only one sample and it could produce sighlty different results
+for other seeds.
+
+
+## Hyperparameter $n vs k = n^alpha$
+
+```{r n / log(n) hyperparameters data generation}
+#| cache: true
+#| echo: false
+#| collapse: true
+ns <- seq(100, 1000, 100)
+as <- seq(0, 20, 2)
+alphas <-  seq(0.1, 0.5, 0.1)
+
+set.seed(100)
+results <- data.frame(dim_n = integer(),
+                      dim_k = integer(),
+                      param_a = double(),
+                      param_alpha = double(),
+                      ssv = double())
+for (a in as) {
+  for (i in 1:length(ns)) {
+    for (j in 1:length(alphas)) {
+      n <- ns[i]
+      K <- floor(n^alphas[j])
+      if (!K > 0) next # skip if K is equal to zero
+      # use the default seed 1L
+      Q <- compute_matrix(seed=1L,
+                     a= a,
+                     n = n,
+                     K = K,
+                     sample_X_fn = function(n) {matrix(rnorm(n), ncol = 1L)},
+                     fv = function(x) {dnorm(x, mean=0, sd=1)},
+                     Fv = function(x) {pnorm(x, mean=0, sd=1)},
+                     guard = 1e-12)
+      
+      ssv <- compute_minmax_sv(Q)[["smallest_singular_value"]]
+      
+      current_res <- data.frame(dim_n = n, dim_k = K, param_a = a, param_alpha=alphas[j], ssv =ssv)
+      results <- rbind(results, current_res)
+    }
+  }
+}
+```
+
+```{r hyperparameter n / k^alpha = const plotting}
+results |>
+  filter(dim_n %in% c(100, 500, 1000)) |>
+  mutate(dim_n = as.factor(dim_n),
+         param_alpha = as.factor(param_alpha)) |>
+  group_by(dim_n, param_alpha) |>
+  ggplot(aes(param_a, ssv, col=dim_n, shape=param_alpha)) +
+  geom_point(size=1.5) +
+  geom_line() +
+  #scale_y_log10() +
+  theme_bw() +
+  labs(x=latex2exp::TeX("$a$"),
+       y=latex2exp::TeX("Smallest singular value of $Q$"),
+       title=latex2exp::TeX("Smallest singular value of $Q$ with respect to $a$."),
+       subtitle = latex2exp::TeX(("Hyperparameter $k = n^{\\alpha}$")),
+       colour=latex2exp::TeX("$n$"),
+       shape=latex2exp::TeX("$\\alpha$"))
+```
+Here we use $K = \lfloor n^\alpha\rfloor$ as hyperparameter. Why don't we see
+any change w.r.t. $a$ for $\alpha = 0.1$?
+
+
+## Two dimensional example
+Here consider $p = 2$ covariate variables for each node. In order to make the
+parameter $a$ invariant to the direction, we sample 5 different vectors for
+ each $a$ and rescale them. We use $K = 5$ as hyperparameter.
+ 
+```{r data generation for two d example}
+#| cache: true
+#| echo: false
+#| collapse: true
+
+set.seed(10)
+ns <- seq(100, 1000, 100)
+as <- matrix(rnorm(8), ncol=4)
+as_norm <- seq(1, 20, 2)
+for (i in 1:ncol(as)){
+  as[, i] <- as[, i] / sqrt(sum(as[, i]^2))
+}
+
+results <- data.frame(dim_n = integer(),
+                      dim_k = integer(),
+                      param_a = integer(),
+                      param_a_norm = double(),
+                      ssv = double())
+for (a_norm in as_norm) {
+  for (i in 1:length(ns)) {
+    for (j in 1:ncol(as)) {
+      n <- ns[i]
+      K <- 5 # floor(sqrt(n))
+      if (!K > 0) next # skip if K is equal to zero
+      # use the default seed 1L
+      Q <- compute_matrix(seed=1L,
+                     a= as.vector(t(as[, j])),
+                     n = n,
+                     K = K,
+                     sample_X_fn = function(n) {matrix(rnorm(2 * n), ncol = 2L)},
+                     fv = function(x) {dnorm(x, mean=0, sd=1)},
+                     Fv = function(x) {pnorm(x, mean=0, sd=1)},
+                     guard = 1e-12)
+      
+      ssv <- compute_minmax_sv(Q)[["smallest_singular_value"]]
+      
+      current_res <- data.frame(dim_n = n, dim_k = K, param_a = j, param_a_norm=a_norm, ssv =ssv)
+      results <- rbind(results, current_res)
+    }
+  }
+}
+```
+
+```{r 2d a parameter plotting}
+results |>
+  filter(dim_n %in% c(100, 500, 1000)) |>
+  mutate(dim_n = as.factor(dim_n),
+         param_a = as.factor(param_a)) |>
+  group_by(dim_n, param_a) |>
+  ggplot(aes(param_a_norm, ssv, col=dim_n, shape=param_a, interaction(dim_n, param_a))) +
+  geom_point(size=1.5) +
+  geom_line() +
+  #scale_y_log10() +
+  theme_bw() +
+  labs(x=latex2exp::TeX("$\\|a\\|$"),
+       y=latex2exp::TeX("Smallest singular value of $Q$"),
+       title=latex2exp::TeX("Smallest singular value of $Q$ with respect to the norm of $a$."),
+       subtitle = latex2exp::TeX(("Hyperparameter $k = 5$")),
+       colour=latex2exp::TeX("$n$"),
+       shape=latex2exp::TeX("$\\|\\alpha\\|$"))
+```
+```{r plot the vectors}
+plot(0, 0, xlim=c(-1.5, 1.5), ylim=c(-1.5, 1.5), type="n", xlab="x", ylab="y", asp=1)
+arrows(0, 0, as[1, ], as[2, ], col=1:4, lwd=2)
+title(main="Vectors for a rescaled to norm one.")
+```
+
+ It seems, that the direction of the parameter $a$ has a small influence in the 
+ smallest singular value of the matrix $Q$, but not the norm of it??
+ 
--- a/scripts/plots_dimensions.qmd
+++ b/scripts/plots_dimensions.qmd
@@ -44,7 +44,7 @@ library(latex2exp)
 #| cache: true
 #| echo: false
 #| collapse: true
-ns <- c(100, 200, 300, 400, 500)
+ns <- c(100, 200, 300, 400, 500, 600, 700, 800)
 Ks <- floor(sqrt(ns))
 as <- c(0.5, 1.0, 1.5, 2.0)

@@ -83,6 +83,7 @@ for (a in as) {
 #| collapse: true
 #| fig-cap: "Simulation of the smallest singular values w.r.t. a, n and k"
 results |>
+  filter(dim_n <= 400) |>
  mutate(param_a =  as.factor(param_a),
         dim_n = as.factor(dim_n)) |>
  group_by(param_a, dim_n) |>
@@ -119,4 +120,26 @@ summary(model1)
 plot(model1)
  
 ```
+## Plot of n vs. ssv
+
+```{r plot n vs ssv}
+results |>
+  filter(dim_k %in% c(2, 6, 10)) |>
+  mutate(param_a =  as.factor(param_a),
+         dim_k = as.factor(dim_k)) |>
+  group_by(param_a, dim_k) |>
+  ggplot(aes(dim_n, ssv, col=dim_k, shape=param_a, interaction(dim_k, param_a))) +
+  geom_point(size=1.5) +
+  geom_line() +
+  scale_y_log10() +
+  theme_bw() +
+  labs(x=latex2exp::TeX("$n$"),
+       y=latex2exp::TeX("Smallest singular value of $Q$"),
+       title=latex2exp::TeX("Smallest singular value of $Q$ with respect to $n$, $k$, and $a$."),
+       colour=latex2exp::TeX("$k$"),
+       shape=latex2exp::TeX("$a$"))
+```
+```{r}
+Q <- compute_matrix(1, a=0.5, n=10, K = 3, function(n) matrix(rnorm(n), ncol = 1L), fv=dnorm, Fv=pnorm)
+```