diff --git a/scripts/.gitignore b/scripts/.gitignore index 8788cdf..0707e73 100644 --- a/scripts/.gitignore +++ b/scripts/.gitignore @@ -1,2 +1,3 @@ *.html *plots_dimensions_files/ +*plots_a_dependence_files/ diff --git a/scripts/plots_a_dependence.qmd b/scripts/plots_a_dependence.qmd new file mode 100644 index 0000000..f9a79ca --- /dev/null +++ b/scripts/plots_a_dependence.qmd @@ -0,0 +1,244 @@ +--- +title: "plots of a dependence" +author: "Niclas" +format: html +editor: visual +execute: + echo: true + working-directory: ../ +--- + +# Plots of the dimensions + +## Setup +We consider the matrix $QQ^\top$ and look at the smallest eigenvalue, i.e. the +smallest non-zero singular value of $Q$. + +The matrix $Q$ is given by +$$ +Q_{ik} = \int_{\frac{k}{K}}^{\frac{k+1}{K}} p_a(u| X_i) \, du +$$ +with +$$ +p_a(u|X) = \frac{f_v(F_a^{-1}(u) - a^\top X)}{f_a(F_a^{-1}(u))} +$$ +In this document we plot different the smallest eigenvalue in dependence of the +parameter $a$ with different "ratios" of the parameters $n$ and $k$. + +```{r loading libraries} +#| cache: true +#| echo: false +#| collapse: true +# load local files +source(here::here("R", "singular_values.R")) +source(here::here("R", "graphon_distribution.R")) +source(here::here("R","singular_value_plot.R")) + +# load libaries for data handling +library(ggplot2) +library(dplyr) +library(latex2exp) + +``` + + +## Hyperparameter with n vs. k = log(n) +```{r n / log(n) hyperparameters data generation} +#| cache: true +#| echo: false +#| collapse: true +ns <- seq(100, 1000, 100) +Ks <- floor(log(ns)) +as <- seq(0, 20, 2) + +set.seed(100) +results <- data.frame(dim_n = integer(), + dim_k = integer(), + param_a = double(), + ssv = double()) +for (a in as) { + for (i in 1:length(ns)) { + n <- ns[i] + K <- Ks[i] + # use the default seed 1L + out <- smallest_sv_sequence( + a = a, + n = n, + maxK = K, + sampler_fn =function(n) matrix(rnorm(n), ncol = 1L), + guard=1e-12, + plot=FALSE, + fv = function(x) {dnorm(x, mean=0, sd=1)}, + Fv = function(x) {pnorm(x, mean=0, sd=1)} + ) + + current_res <- data.frame(dim_n = rep(n, K), dim_k = out$K, param_a = rep(a, K), ssv = out$sv) + results <- rbind(results, current_res) + } +} +``` + +```{r hyperparameter n vs log(n) plotting} +results |> + mutate(dim_n = as.factor(dim_n)) |> + group_by(dim_n) |> + filter(dim_k == max(dim_k) & param_a > 0) |> + ggplot(aes(param_a, ssv, col=dim_n)) + + geom_point(size=1.5) + + geom_line() + + #scale_y_log10() + + theme_bw() + + labs(x=latex2exp::TeX("$a$"), + y=latex2exp::TeX("Smallest singular value of $Q$"), + title=latex2exp::TeX("Smallest singular value of $Q$ with respect to $a$."), + subtitle = latex2exp::TeX(("Hyperparameter $k = \\lfloor\\log(n) \\rfloor$")), + colour=latex2exp::TeX("$n$"), + shape=latex2exp::TeX("$a$")) +``` +Here we have relatively large values of the smallest singular value (ssv) of $Q$ since +the values for $k$ are relatively small. We have already observed, that with +larger $k$ the ssv shrinks rapidly towards zero. However the largest value for $k$ +is at most $k = `r floor(log(1000))`$ for $n = 1000$. +We omitted the value $a = 0$, as this results in a ssv of $10^{-61}$. +Note that this is only one sample and it could produce sighlty different results +for other seeds. + + +## Hyperparameter $n vs k = n^alpha$ + +```{r n / log(n) hyperparameters data generation} +#| cache: true +#| echo: false +#| collapse: true +ns <- seq(100, 1000, 100) +as <- seq(0, 20, 2) +alphas <- seq(0.1, 0.5, 0.1) + +set.seed(100) +results <- data.frame(dim_n = integer(), + dim_k = integer(), + param_a = double(), + param_alpha = double(), + ssv = double()) +for (a in as) { + for (i in 1:length(ns)) { + for (j in 1:length(alphas)) { + n <- ns[i] + K <- floor(n^alphas[j]) + if (!K > 0) next # skip if K is equal to zero + # use the default seed 1L + Q <- compute_matrix(seed=1L, + a= a, + n = n, + K = K, + sample_X_fn = function(n) {matrix(rnorm(n), ncol = 1L)}, + fv = function(x) {dnorm(x, mean=0, sd=1)}, + Fv = function(x) {pnorm(x, mean=0, sd=1)}, + guard = 1e-12) + + ssv <- compute_minmax_sv(Q)[["smallest_singular_value"]] + + current_res <- data.frame(dim_n = n, dim_k = K, param_a = a, param_alpha=alphas[j], ssv =ssv) + results <- rbind(results, current_res) + } + } +} +``` + +```{r hyperparameter n / k^alpha = const plotting} +results |> + filter(dim_n %in% c(100, 500, 1000)) |> + mutate(dim_n = as.factor(dim_n), + param_alpha = as.factor(param_alpha)) |> + group_by(dim_n, param_alpha) |> + ggplot(aes(param_a, ssv, col=dim_n, shape=param_alpha)) + + geom_point(size=1.5) + + geom_line() + + #scale_y_log10() + + theme_bw() + + labs(x=latex2exp::TeX("$a$"), + y=latex2exp::TeX("Smallest singular value of $Q$"), + title=latex2exp::TeX("Smallest singular value of $Q$ with respect to $a$."), + subtitle = latex2exp::TeX(("Hyperparameter $k = n^{\\alpha}$")), + colour=latex2exp::TeX("$n$"), + shape=latex2exp::TeX("$\\alpha$")) +``` +Here we use $K = \lfloor n^\alpha\rfloor$ as hyperparameter. Why don't we see +any change w.r.t. $a$ for $\alpha = 0.1$? + + +## Two dimensional example +Here consider $p = 2$ covariate variables for each node. In order to make the +parameter $a$ invariant to the direction, we sample 5 different vectors for + each $a$ and rescale them. We use $K = 5$ as hyperparameter. + +```{r data generation for two d example} +#| cache: true +#| echo: false +#| collapse: true + +set.seed(10) +ns <- seq(100, 1000, 100) +as <- matrix(rnorm(8), ncol=4) +as_norm <- seq(1, 20, 2) +for (i in 1:ncol(as)){ + as[, i] <- as[, i] / sqrt(sum(as[, i]^2)) +} + +results <- data.frame(dim_n = integer(), + dim_k = integer(), + param_a = integer(), + param_a_norm = double(), + ssv = double()) +for (a_norm in as_norm) { + for (i in 1:length(ns)) { + for (j in 1:ncol(as)) { + n <- ns[i] + K <- 5 # floor(sqrt(n)) + if (!K > 0) next # skip if K is equal to zero + # use the default seed 1L + Q <- compute_matrix(seed=1L, + a= as.vector(t(as[, j])), + n = n, + K = K, + sample_X_fn = function(n) {matrix(rnorm(2 * n), ncol = 2L)}, + fv = function(x) {dnorm(x, mean=0, sd=1)}, + Fv = function(x) {pnorm(x, mean=0, sd=1)}, + guard = 1e-12) + + ssv <- compute_minmax_sv(Q)[["smallest_singular_value"]] + + current_res <- data.frame(dim_n = n, dim_k = K, param_a = j, param_a_norm=a_norm, ssv =ssv) + results <- rbind(results, current_res) + } + } +} +``` + +```{r 2d a parameter plotting} +results |> + filter(dim_n %in% c(100, 500, 1000)) |> + mutate(dim_n = as.factor(dim_n), + param_a = as.factor(param_a)) |> + group_by(dim_n, param_a) |> + ggplot(aes(param_a_norm, ssv, col=dim_n, shape=param_a, interaction(dim_n, param_a))) + + geom_point(size=1.5) + + geom_line() + + #scale_y_log10() + + theme_bw() + + labs(x=latex2exp::TeX("$\\|a\\|$"), + y=latex2exp::TeX("Smallest singular value of $Q$"), + title=latex2exp::TeX("Smallest singular value of $Q$ with respect to the norm of $a$."), + subtitle = latex2exp::TeX(("Hyperparameter $k = 5$")), + colour=latex2exp::TeX("$n$"), + shape=latex2exp::TeX("$\\|\\alpha\\|$")) +``` +```{r plot the vectors} +plot(0, 0, xlim=c(-1.5, 1.5), ylim=c(-1.5, 1.5), type="n", xlab="x", ylab="y", asp=1) +arrows(0, 0, as[1, ], as[2, ], col=1:4, lwd=2) +title(main="Vectors for a rescaled to norm one.") +``` + + It seems, that the direction of the parameter $a$ has a small influence in the + smallest singular value of the matrix $Q$, but not the norm of it?? + \ No newline at end of file diff --git a/scripts/plots_dimensions.qmd b/scripts/plots_dimensions.qmd index c81e790..402186f 100644 --- a/scripts/plots_dimensions.qmd +++ b/scripts/plots_dimensions.qmd @@ -44,7 +44,7 @@ library(latex2exp) #| cache: true #| echo: false #| collapse: true -ns <- c(100, 200, 300, 400, 500) +ns <- c(100, 200, 300, 400, 500, 600, 700, 800) Ks <- floor(sqrt(ns)) as <- c(0.5, 1.0, 1.5, 2.0) @@ -83,6 +83,7 @@ for (a in as) { #| collapse: true #| fig-cap: "Simulation of the smallest singular values w.r.t. a, n and k" results |> + filter(dim_n <= 400) |> mutate(param_a = as.factor(param_a), dim_n = as.factor(dim_n)) |> group_by(param_a, dim_n) |> @@ -119,4 +120,26 @@ summary(model1) plot(model1) ``` +## Plot of n vs. ssv + +```{r plot n vs ssv} +results |> + filter(dim_k %in% c(2, 6, 10)) |> + mutate(param_a = as.factor(param_a), + dim_k = as.factor(dim_k)) |> + group_by(param_a, dim_k) |> + ggplot(aes(dim_n, ssv, col=dim_k, shape=param_a, interaction(dim_k, param_a))) + + geom_point(size=1.5) + + geom_line() + + scale_y_log10() + + theme_bw() + + labs(x=latex2exp::TeX("$n$"), + y=latex2exp::TeX("Smallest singular value of $Q$"), + title=latex2exp::TeX("Smallest singular value of $Q$ with respect to $n$, $k$, and $a$."), + colour=latex2exp::TeX("$k$"), + shape=latex2exp::TeX("$a$")) +``` +```{r} +Q <- compute_matrix(1, a=0.5, n=10, K = 3, function(n) matrix(rnorm(n), ncol = 1L), fv=dnorm, Fv=pnorm) +```