diff --git a/.gitignore b/.gitignore index 6167a6b..1f88218 100644 --- a/.gitignore +++ b/.gitignore @@ -52,4 +52,4 @@ rsconnect/ /.quarto/ **/*.quarto_ipynb -./_freeze/ +_freeze/ diff --git a/_quarto.yml b/_quarto.yml new file mode 100644 index 0000000..1af813c --- /dev/null +++ b/_quarto.yml @@ -0,0 +1,6 @@ +project: + type: default + +execute: + freeze: auto + cache: false diff --git a/scripts/plots_dimensions.qmd b/scripts/plots_dimensions.qmd new file mode 100644 index 0000000..c81e790 --- /dev/null +++ b/scripts/plots_dimensions.qmd @@ -0,0 +1,122 @@ +--- +title: "Plots of n vs. k" +author: "Niclas" +format: html +editor: visual +execute: + echo: true + working-directory: ../ +--- + +# Plots of the dimensions + +## Setup +We consider the matrix $QQ^\top$ and look at the smallest eigenvalue, i.e. the +smallest non-zero singular value of $Q$. + +The matrix $Q$ is given by +$$ +Q_{ik} = \int_{\frac{k}{K}}^{\frac{k+1}{K}} p_a(u| X_i) \, du +$$ +with +$$ +p_a(u|X) = \frac{f_v(F_a^{-1}(u) - a^\top X)}{f_a(F_a^{-1}(u))} +$$ + +## Plots of n vs. k + +- The $v$'s are normally distributed with $v \sim \mathcal N(0,1)$ +- Plot $n = 100, 200, 300, 400$ and $k = 1, \dots, K$ with $K = \sqrt n$. + +```{r Load Libraries} +# load local files +source(here::here("R", "singular_values.R")) +source(here::here("R", "graphon_distribution.R")) +source(here::here("R","singular_value_plot.R")) + +# load libaries for data handling +library(ggplot2) +library(dplyr) +library(latex2exp) +``` + +```{r Compute the data} +#| cache: true +#| echo: false +#| collapse: true +ns <- c(100, 200, 300, 400, 500) +Ks <- floor(sqrt(ns)) +as <- c(0.5, 1.0, 1.5, 2.0) + +# set a global seed +set.seed(42) +results <- data.frame(dim_n = integer(), + dim_k = integer(), + param_a = double(), + ssv = double()) +for (a in as) { + for (i in 1:length(ns)) { + n <- ns[i] + K <- Ks[i] + # use the default seed 1L + out <- smallest_sv_sequence( + a = a, + n = n, + maxK = K, + sampler_fn =function(n) matrix(rnorm(n), ncol = 1L), + guard=1e-12, + plot=FALSE, + fv = function(x) {dnorm(x, mean=0, sd=1)}, + Fv = function(x) {pnorm(x, mean=0, sd=1)} + ) + + current_res <- data.frame(dim_n = rep(n, K), dim_k = out$K, param_a = rep(a, K), ssv = out$sv) + results <- rbind(results, current_res) + } +} + +``` + +```{r plot the results} +#| cache: true +#| echo: false +#| collapse: true +#| fig-cap: "Simulation of the smallest singular values w.r.t. a, n and k" +results |> + mutate(param_a = as.factor(param_a), + dim_n = as.factor(dim_n)) |> + group_by(param_a, dim_n) |> + ggplot(aes(dim_k, ssv, col=dim_n, shape=param_a, interaction(dim_n, param_a))) + + geom_point(size=1.5) + + geom_line() + + scale_y_log10() + + theme_bw() + + labs(x=latex2exp::TeX("$k$"), + y=latex2exp::TeX("Smallest singular value of $Q$"), + title=latex2exp::TeX("Smallest singular value of $Q$ with respect to $n$, $k$, and $a$."), + colour=latex2exp::TeX("$n$"), + shape=latex2exp::TeX("$a$")) +``` +The data for $n = 100$ is covered by the data for $n = 200$. + +## Analysis of the convergence + +We assume that the smallest singular value $\sigma$ can be approximated by: +$$ +\sigma = C \cdot n^\eta \cdot k^\kappa \cdot a^\alpha +$$ +to estimate the coefficients we make a log-transform and perform a linear regression, i.e. +$$ +\log(\sigma) = \log (C) + \eta \log(n) + \kappa\log(k) + \alpha \log(a). +$$ + +```{r estimate coeffs} +model1 <- results |> + filter(ssv > 1e-15) |> # exclude to small values + lm(formula = log(ssv) ~ log(dim_n) + log(dim_k) + log(param_a)) + +summary(model1) +plot(model1) + +``` +