Add dimensions plot

2026-03-03 15:45:09 +01:00
parent 5cd52f0c5f
commit 8517c5534d
3 changed files with 129 additions and 1 deletions
--- a/.gitignore
+++ b/.gitignore
@@ -52,4 +52,4 @@ rsconnect/
 /.quarto/
 **/*.quarto_ipynb
-./_freeze/
+_freeze/
--- a/_quarto.yml
+++ b/_quarto.yml
@@ -0,0 +1,6 @@
 project:
  type: default
 execute:
  freeze: auto
  cache: false
--- a/scripts/plots_dimensions.qmd
+++ b/scripts/plots_dimensions.qmd
@@ -0,0 +1,122 @@
 ---
 title: "Plots of n vs. k"
 author: "Niclas"
 format: html
 editor: visual
 execute:
  echo: true
  working-directory: ../
 ---
 # Plots of the dimensions
 ## Setup 
 We consider the matrix $QQ^\top$ and look at the smallest eigenvalue, i.e. the
 smallest non-zero singular value of $Q$.
 The matrix $Q$ is given by
 $$
 Q_{ik} = \int_{\frac{k}{K}}^{\frac{k+1}{K}} p_a(u| X_i) \, du
 $$
 with
 $$
 p_a(u|X) = \frac{f_v(F_a^{-1}(u) - a^\top X)}{f_a(F_a^{-1}(u))}
 $$
 ## Plots of n vs. k
 - The $v$'s are normally distributed with $v \sim \mathcal N(0,1)$
 - Plot $n = 100, 200, 300, 400$ and $k = 1, \dots, K$ with $K = \sqrt n$.
 ```{r Load Libraries}
 # load local files
 source(here::here("R", "singular_values.R"))
 source(here::here("R", "graphon_distribution.R"))
 source(here::here("R","singular_value_plot.R"))
 # load libaries for data handling
 library(ggplot2)
 library(dplyr)
 library(latex2exp)
 ```
 ```{r Compute the data}
 #| cache: true
 #| echo: false
 #| collapse: true
 ns <- c(100, 200, 300, 400, 500)
 Ks <- floor(sqrt(ns))
 as <- c(0.5, 1.0, 1.5, 2.0)
 # set a global seed
 set.seed(42)
 results <- data.frame(dim_n = integer(),
                      dim_k = integer(),
                      param_a = double(),
                      ssv = double())
 for (a in as) {
  for (i in 1:length(ns)) {
    n <- ns[i]
    K <- Ks[i]
    # use the default seed 1L
    out <- smallest_sv_sequence(
      a = a,
      n = n,
      maxK = K,
      sampler_fn =function(n) matrix(rnorm(n), ncol = 1L),
      guard=1e-12,
      plot=FALSE,
      fv = function(x) {dnorm(x, mean=0, sd=1)},
      Fv = function(x) {pnorm(x, mean=0, sd=1)}
      )
    current_res <- data.frame(dim_n = rep(n, K), dim_k = out$K, param_a = rep(a, K), ssv = out$sv)
    results <- rbind(results, current_res)
  }
 }
 ```
 ```{r plot the results}
 #| cache: true
 #| echo: false
 #| collapse: true
 #| fig-cap: "Simulation of the smallest singular values w.r.t. a, n and k"
 results |>
  mutate(param_a =  as.factor(param_a),
         dim_n = as.factor(dim_n)) |>
  group_by(param_a, dim_n) |>
  ggplot(aes(dim_k, ssv, col=dim_n, shape=param_a, interaction(dim_n, param_a))) +
  geom_point(size=1.5) +
  geom_line() +
  scale_y_log10() +
  theme_bw() +
  labs(x=latex2exp::TeX("$k$"),
       y=latex2exp::TeX("Smallest singular value of $Q$"),
       title=latex2exp::TeX("Smallest singular value of $Q$ with respect to $n$, $k$, and $a$."),
       colour=latex2exp::TeX("$n$"),
       shape=latex2exp::TeX("$a$"))
 ```
 The data for $n = 100$ is covered by the data for $n = 200$.
 ## Analysis of the convergence
 We assume that the smallest singular value $\sigma$ can be approximated by:
 $$
 \sigma = C \cdot n^\eta \cdot k^\kappa \cdot a^\alpha
 $$
 to estimate the coefficients we make a log-transform and perform a linear regression, i.e.
 $$
 \log(\sigma) = \log (C) + \eta \log(n) + \kappa\log(k) + \alpha \log(a).
 $$
 ```{r estimate coeffs}
 model1 <- results |>
  filter(ssv > 1e-15) |> # exclude to small values
  lm(formula = log(ssv) ~ log(dim_n) + log(dim_k) + log(param_a))
 summary(model1)
 plot(model1)
 ```