experiments with rho_n

add first sketch for estimators
add example for calculation
2026-06-17 10:43:33 +02:00 · 2026-05-20 17:03:45 +02:00 · 2026-05-20 17:02:54 +02:00 · 2026-05-20 17:02:33 +02:00 · 2026-05-20 17:00:33 +02:00 · 2026-05-06 17:33:46 +02:00
8 changed files with 728 additions and 57 deletions
--- a/.gitignore
+++ b/.gitignore
@@ -53,3 +53,4 @@ rsconnect/
 /.quarto/
 **/*.quarto_ipynb
 _freeze/
+.positai
--- a/R/build_network.R
+++ b/R/build_network.R
@@ -0,0 +1,137 @@
+# Load required helper (the file must define `pgraphon`)
+if (requireNamespace("here", quietly = TRUE)) {
+  source(here::here("R", "graphon_distribution.R"))
+} else {
+  stop("Package `here` is required to locate the helper file.")
+}
+
+# TODO: Check the naming consistency across the documentation
+
+#' Compute an adjacency matrix from a graphon model
+#'
+#' @title Generate a the adjacency matrix using a graphon specification
+#' @description
+#' Given a covariate matrix `X_matrix`, a vector of intercepts `v`, a
+#' coefficient vector `a`, a symmetric graphon kernel `phi`, and a scaling
+#' factor `rho_n`, this function draws a random undirected adjacency matrix.
+#' The construction follows the steps:
+#'   1. Compute latent variables \eqn{\xi_i = F_v(a^\top X_i + v_i)}.
+#'   2. Generate a symmetric matrix of i.i.d. Uniform$[0,1]$ random numbers
+#'      with ones on the diagonal.
+#'   3. Evaluate the graphon probabilities \eqn{p_{ij}= \rho_n \, \phi(\xi_i,\xi_j)}.
+#'   4. Set \eqn{A_{ij}=1} if the uniform draw is smaller than the probability,
+#'      otherwise \eqn{0}.  The resulting matrix is symmetric with zeros on the
+#'      diagonal.
+#'
+#' @param X_matrix A numeric matrix of dimension *n \times d* (covariates).  
+#'   Must not contain `NA` or `NaN` values.
+#' @param v A numeric vector of length *n* (intercepts).  
+#'   Must be the same length as the number of rows of `X_matrix`.
+#' @param a A numeric vector of length *d* (coefficients).  
+#'   Must be the same length as the number of columns of `X_matrix`.
+#' @param phi A binary function `phi(x, y)` returning a numeric value.  
+#'   It should be symmetric (`phi(x, y) == phi(y, x)`) and bounded in $[0,1]$.
+#' @param rho_n A numeric scalar in $[0,1]$ that scales the graphon.
+#' @param Fv A cumulative distribution function (CDF) used to transform the
+#'   linear predictor.  Defaults to the standard normal CDF `pnorm`.  
+#'   Must accept a numeric vector and return a numeric vector of the same length.
+#'
+#' @return An *n* × *n* integer matrix (`0L`/`1L`) representing the adjacency
+#'   matrix of an undirected graph.
+#'
+#' @examples
+#' ## Simple example with a multiplicative graphon
+#' set.seed(1)
+#' X  <- matrix(seq(-1, 1, length.out = 5), ncol = 1)
+#' a  <- 2
+#' v  <- seq(0, 0.8, length.out = 5)
+#' phi_fun <- function(x, y) x * y          # multiplicative kernel
+#' adj <- compute_adj_matrix(
+#'   X_matrix = X,
+#'   v        = v,
+#'   a        = a,
+#'   phi      = phi_fun,
+#'   rho_n    = 0.5,
+#'   Fv       = punif
+#' )
+#' adj
+#'
+#' @export
+compute_adj_matrix <- function(
+    X_matrix,
+    v,
+    a,
+    phi,
+    rho_n,
+    Fv = pnorm
+) {
+  ## -------------------------- Guard rails -------------------------- ##
+  # Missing / NULL checks
+  if (missing(X_matrix) || is.null(X_matrix)) {
+    stop("`X_matrix` is missing or NULL.")
+  }
+  if (missing(v) || is.null(v)) {
+    stop("`v` is missing or NULL.")
+  }
+  if (missing(a) || is.null(a)) {
+    stop("`a` is missing or NULL.")
+  }
+  if (missing(phi) || is.null(phi)) {
+    stop("`phi` is missing or NULL.")
+  }
+  if (missing(rho_n) || is.null(rho_n)) {
+    stop("`rho_n` is missing or NULL.")
+  }
+  if (missing(Fv) || is.null(Fv)) {
+    stop("`Fv` is missing or NULL.")
+  }
+  
+  # Type / dimension checks
+  if (!is.matrix(X_matrix) || !is.numeric(X_matrix)) {
+    stop("`X_matrix` must be a numeric matrix.")
+  }
+  if (any(is.na(X_matrix) | is.nan(X_matrix))) {
+    stop("`X_matrix` must not contain NA or NaN values.")
+  }
+  n <- nrow(X_matrix)
+  d <- ncol(X_matrix)
+  
+  if (!is.numeric(v) || length(v) != n) {
+    stop("`v` must be a numeric vector of length equal to nrow(`X_matrix`).")
+  }
+  if (!is.numeric(a) || length(a) != d) {
+    stop("`a` must be a numeric vector of length equal to ncol(`X_matrix`).")
+  }
+  if (!is.function(phi)) {
+    stop("`phi` must be a function of two numeric arguments.")
+  }
+  if (!is.numeric(rho_n) || length(rho_n) != 1 || rho_n < 0 || rho_n > 1) {
+    stop("`rho_n` must be a numeric scalar in [0, 1].")
+  }
+  if (!is.function(Fv)) {
+    stop("`Fv` must be a function (CDF) that accepts a numeric vector.")
+  }
+  
+  ## -------------------------- Core computation -------------------------- ##
+  # 1. Latent variables ξ_i = Fv(aᵀ X_i + v_i)
+  lin_pred <- as.vector(X_matrix %*% a) + v
+  xi <- pgraphon(y = lin_pred, a = a, Fv = Fv, X_matrix = X_matrix)
+  
+  # 2. Symmetric matrix of Uniform[0,1] draws with 1 on the diagonal
+  #    (only the upper triangle is sampled, then mirrored)
+  rand_mat <- matrix(0, n, n)
+  upper_idx <- which(upper.tri(rand_mat, diag = FALSE), arr.ind = TRUE)
+  rand_mat[upper_idx] <- runif(n * (n - 1) / 2)
+  rand_mat <- rand_mat + t(rand_mat) + diag(1, n)
+  
+  # 3. Graphon probabilities p_ij = rho_n * phi(ξ_i, ξ_j)
+  graphon_fun <- function(x, y) rho_n * phi(x, y)
+  probabilities <- outer(xi, xi, FUN = graphon_fun)
+  
+  # 4. Adjacency matrix: A_ij = 1 if U_ij < p_ij, else 0
+  adj_mat <- (rand_mat < probabilities) * 1L
+  
+  ## Return the adjacency matrix
+  adj_mat
+}
+
--- a/R/estimators.R
+++ b/R/estimators.R
@@ -0,0 +1,246 @@
+# load local files
+source(here::here("R", "singular_values.R"))
+source(here::here("R", "graphon_distribution.R"))
+source(here::here("R","singular_value_plot.R"))
+source(here::here("R", "build_network.R"))
+
+# Helper functions -------------------------------------------------------------
+# helper function for wrapping the parameters of the Q_a creation function
+# TODO rename this function
+make_matrix_creation <- function(seed, n, K, matrix_X, fv, Fv, guard) {
+  function(a) {
+    compute_matrix(seed=seed, a, n=n, K=K, matrix_X = matrix_X, fv=fv, Fv=Fv, guard=guard)
+  }
+}
+
+# estimators -------------------------------------------------------------------
+
+## Moore-Penrose Inverse -------------------------------------------------------
+#' Moore‑Penrose pseudoinverse of a matrix
+#'
+#' Computes the Moore‑Penrose generalized inverse of a numeric matrix using a
+#' singular‑value decomposition (SVD). Singular values smaller than the
+#' tolerance are treated as zero to improve numerical stability.
+#'
+#' @param A A numeric matrix (or an object coercible to a matrix) whose
+#'   pseudoinverse is required.
+#' @param tol Tolerance for treating singular values as zero.  By default it
+#'   is set to `max(dim(A)) * max(svd(A)$d) * .Machine$double.eps`, which
+#'   scales with the size of the matrix and machine precision.
+#' @return A matrix representing the Moore‑Penrose inverse of `A`.  The
+#'   dimensions of the result are `ncol(A) × nrow(A)`.
+#' @examples
+#' set.seed(123)
+#' A <- matrix(rnorm(12), nrow = 3)
+#' A_pinv <- pinv(A)
+#' # Verify the Moore‑Penrose properties (A %*% A_pinv %*% A ≈ A)
+#' all.equal(A %*% A_pinv %*% A, A)
+#' @importFrom stats svd
+#' @export
+pinv <- function(A, tol = NULL) {
+  # Coerce to matrix and check type
+  if (!is.matrix(A)) {
+    A <- as.matrix(A)
+  }
+  if (!is.numeric(A)) {
+    stop("`A` must be a numeric matrix.", call. = FALSE)
+  }
+  
+  # Singular value decomposition
+  s <- svd(A)
+  
+  # Determine tolerance if not supplied
+  if (is.null(tol)) {
+    tol <- max(dim(A)) * max(s$d) * .Machine$double.eps
+  }
+  
+  # Invert non‑zero singular values
+  d_inv <- ifelse(s$d > tol, 1 / s$d, 0)
+  
+  # Construct diagonal matrix of inverted singular values
+  D_plus <- diag(d_inv, nrow = length(d_inv))
+  
+  # Moore‑Penrose inverse: V %*% D⁺ %*% t(U)
+  s$v %*% D_plus %*% t(s$u)
+}
+
+
+
+## Estimate Matrix B -----------------------------------------------------------
+#' Estimate the matrix \$B\$ for a graphon model
+#'
+#' For a given graphon scaling parameter `rho_n`, a square matrix `Q_a`, and an
+#' adjacency matrix `A`, this function computes  
+
+#' $ B = \\rho_n \, Q_a^{+\\,T} \, A \, Q_a^{+} $
+
+#' where `Q_a^{+}` denotes the Moore‑Penrose pseudoinverse of `Q_a`. 
+#'
+#' @param rho_n Numeric scalar.  The graphon scaling parameter.
+#' @param Q_a   Square numeric matrix. TODO: write description of the Matrix
+#' @param A     Square numeric adjacency matrix (same dimension as `Q_a`).
+#' @return A numeric matrix of the same dimension as `Q_a` representing the
+#'   estimated \$B\$.
+#' @examples
+#' set.seed(42)
+#' n <- 5
+#' Q_a <- diag(n)               # simple identity basis
+#' A   <- matrix(rbinom(n^2, 1, 0.3), n, n)
+#' rho_n <- 0.5
+#' B_est <- estimate_B_matrix(rho_n, Q_a, A)
+#' str(B_est)
+#' @importFrom stats svd
+#' @export
+estimate_B_matrix <- function(rho_n, Q_a, A) {
+  # ---- Input checks ---------------------------------------------------------
+  if (!is.numeric(rho_n) || length(rho_n) != 1) {
+    stop("`rho_n` must be a single numeric value.", call. = FALSE)
+  }
+  if (!is.matrix(Q_a) || !is.numeric(Q_a)) {
+    stop("`Q_a` must be a numeric matrix.", call. = FALSE)
+  }
+  if (!is.matrix(A) || !is.numeric(A)) {
+    stop("`A` must be a numeric matrix.", call. = FALSE)
+  }
+  if (nrow(A) != ncol(A)) {
+    stop("`A` must be square.", call. = FALSE)
+  }
+  if (ncol(Q_a) != nrow(A)) {
+    stop("Dimensions of `Q_a` and `A` must agree for matrix multiplication.", call. = FALSE)
+  }
+  
+  # ---- Compute pseudoinverse ------------------------------------------------
+  pinv_Qa <- pinv(Q_a)   # assumes `pinv()` is available in the namespace
+  
+  # ---- Estimate B -----------------------------------------------------------
+  B <- rho_n * (t(pinv_Qa) %*% A %*% pinv_Qa)
+  
+  B
+}
+
+# TODO rename this function
+# TODO test the convergence of the function estimate_B
+# with given graphon (block function, Hölder continuous functions)
+# and given K with growing N
+# and other options
+# plot the loss function with respect to a
+estimate_a <- function(A, # adjacency matrix
+                       a0, # start value
+                       n,
+                       K,
+                       sample_X_fn,
+                       fv,
+                       Fv, 
+                       guard
+) {
+  calc_Q_a <- make_matrix_creation(seed, n, K, sample_X_fn, fv, Fv, guard)
+  
+  loss_func <- function(a) {
+    Q_a <- calc_Q_a(a)
+    pinv_Qa <- pinv(Q_a)
+    
+    norm(pinv_Qa %*% Q_a %*% A %*% pinv_Qa %*% Q_a - A, type="F")^2
+  }
+  
+  return(loss_func)
+}
+
+#' Calculate the edge density of an undirected graph
+#'
+#' @title Edge density from an adjacency matrix
+#' @description Computes the proportion of possible edges that are present in an
+#'   undirected, unweighted graph represented by a square adjacency matrix.  The
+#'   density is defined as \eqn{2E / (V(V-1))} where \eqn{E} is the number of edges
+#'   and \eqn{V} is the number of vertices.  This corresponds to the parameter
+#'   \eqn{rho_n}.
+#'   The function checks that the input is a square matrix and treats any
+#'   non‑numeric or missing entries as absent edges.
+#'
+#' @param adj_matrix A square numeric matrix representing the adjacency matrix
+#'   of an undirected graph.  Entries should be 0/1 (or any truthy numeric) with
+#'   `adj_matrix[i, j] == adj_matrix[j, i]`.  Missing values are ignored.
+#'
+#' @return A single numeric value giving the edge density \eqn{rho}.
+#'
+#' @examples
+#' # Simple triangle graph (3 nodes, 3 edges)
+#' A_tri <- matrix(c(0,1,1,
+#'                   1,0,1,
+#'                   1,1,0), nrow = 3, byrow = TRUE)
+#' calculate_edge_density(A_tri)
+#'
+#' # Empty graph (no edges)
+#' A_empty <- matrix(0, nrow = 4, ncol = 4)
+#' calculate_edge_density(A_empty)
+#'
+#' @export
+calculate_edge_density <- function(adj_matrix) {
+  # -------------------------------------------------------------------------
+  # Validate input
+  # -------------------------------------------------------------------------
+  if (!is.matrix(adj_matrix)) {
+    stop("`adj_matrix` must be a matrix.")
+  }
+  if (nrow(adj_matrix) != ncol(adj_matrix)) {
+    stop("`adj_matrix` must be square (same number of rows and columns).")
+  }
+  
+  # -------------------------------------------------------------------------
+  # Count edges and nodes
+  # -------------------------------------------------------------------------
+  edge_idx   <- which(upper.tri(adj_matrix, diag = FALSE), arr.ind = TRUE)
+  edge_count <- sum(adj_matrix[edge_idx], na.rm = TRUE)
+  node_count <- nrow(adj_matrix)
+  
+  # -------------------------------------------------------------------------
+  # Compute density
+  # -------------------------------------------------------------------------
+  rho <- (2 * edge_count) / (node_count * (node_count-1))
+  
+  return(rho)
+}
+# test the estimator routines
+seed <- 17L #  121L this seed works exceptionally well
+set.seed(seed)
+#X  <- matrix(seq(-1, 1, length.out = 5), ncol = 1)
+a  <- 20
+n <- 400
+K <- 4
+rho_n <- log(n) / n
+sample_X_fn <- function(n) {matrix(rnorm(n), ncol = 1L)}
+X <- sample_X_fn(n)
+fv <- function(x) {dnorm(x, mean=0, sd=1)}
+Fv <- function(x) {pnorm(x, mean=0, sd=1)}
+guard <- 1e-12
+v  <- rnorm(n) #seq(0, 0.8, length.out = n)
+phi_fun <-Vectorize(function(x, y) ifelse(((x > 0.5 && y <= 0.5) || (x <= 0.5 && y > 0.5)), 1.6, 0.4))        # multiplicative kernel
+adj <- compute_adj_matrix(
+   X_matrix = X,
+   v        = v,
+   a        = a,
+   phi      = phi_fun,
+   rho_n    = rho_n,
+   Fv       = Fv
+ )
+adj
+
+# Q_a matrix
+Qa <- compute_matrix(seed, a=a, n=n, K=K, fv=fv, Fv=Fv, guard=guard, matrix_X=X)
+
+calc_Q_a <- make_matrix_creation(seed, n=n, K=K, matrix_X = X, fv=fv, Fv=Fv, guard=guard)
+
+loss_func <- function(a) {
+  Q_a <- calc_Q_a(a)
+  pinv_Qa <- pinv(Q_a)
+  
+  norm(pinv_Qa %*% Q_a %*% adj %*% pinv_Qa %*% Q_a - adj, type="F")^2
+}
+
+
+plot_as <- seq(-10, 100, length.out=500)
+loss_vals <- sapply(plot_as, loss_func)
+plot(plot_as , loss_vals, type="b")
+abline(v=a)
+title("Test plot of the loss function")
+
+optim(25, loss_func, lower=10, upper=100, method="Brent", control=list(fnscale=1))
--- a/R/singular_values.R
+++ b/R/singular_values.R
@@ -33,10 +33,6 @@ source(here::here("R", "graphon_distribution.R"))
 #'               generated.
 #' @param K      Positive integer.  Number of divisions of the unit interval;
 #'               the resulting grid has length `K+1`.
-#' @param sample_X_fn
-#'               Function with a single argument `n`.  It must return an
-#'               \eqn{n \times p} matrix (or an object coercible to a matrix) of
-#'               covariate samples.
 #' @param fv     Density function of the latent variable \eqn{v}.  Must be
 #'               vectorised (i.e. accept a numeric vector and return a numeric
 #'               vector of the same length).  Typical examples are
@@ -44,8 +40,15 @@ source(here::here("R", "graphon_distribution.R"))
 #' @param Fv     Cumulative distribution function of the latent variable
 #'               \eqn{v}.  Also has to be vectorised.  Typical examples are
 #'               `pnorm`, `pexp`, ….
+#' @param sample_X_fn
+#'               Function with a single argument `n`.  It must return an
+#'               \eqn{n \times p} matrix (or an object coercible to a matrix) of
+#'               covariate samples. It can be NULL, but then `matrix_X` must be
+#'               given.
 #' @param matrix_X matrix with the covariates at each node. Each row corresponds
-#' to a single node with p attributes.
+#' to a single node with p attributes. The default value is `NULL`. If it
+#' is `NULL` then `sample_X_fun` must be given. If both parameters are provided,
+#' then `matrix_X` is used.
 #' @param guard Positive numeric guard value.  Default is `sqrt(.Machine$double.eps)`,
 #'   which is about `1.5e‑8` on most platforms – small enough to be negligible
 #'   for most computations. If it is null, then it is not used. 
@@ -106,9 +109,9 @@ compute_matrix <- function(
    a,
    n,
    K,
-    sample_X_fn,
    fv,
    Fv,
+    sample_X_fn=NULL,
    matrix_X = NULL,
    guard = sqrt(.Machine$double.eps),
    scaled = FALSE
@@ -118,10 +121,12 @@ compute_matrix <- function(
  if (!is.numeric(a) || !is.vector(a)) stop("'a' must be a numeric vector")
  if (!is.numeric(n) || length(n) != 1 || n <= 0) stop("'n' must be a positive integer")
  if (!is.numeric(K) || length(K) != 1 || K <= 0) stop("'K' must be a positive integer")
-  if (!is.function(sample_X_fn)) stop("'sample_X_fn' must be a function")
+  if (!(is.function(sample_X_fn) || is.null(sample_X_fn))) stop("'sample_X_fn' must be a function or Null and matrix_X must be given")
  if (!is.function(fv)) stop("'f_v' must be a function")
  if (!is.function(Fv)) stop("'F_v' must be a function")
  if (!is.null(matrix_X) && !is.matrix(matrix_X)) stop("matrix_X must be either null or a matrix")
+  if (is.null(matrix_X) && is.null(sample_X_fn)) stop("Either 'matrix_X' or 'sample_X_fn' must be supplied!")
+  if (!is.null(matrix_X) && !is.null(sample_X_fn)) warning("Both arguments 'matrix_X' and `sample_X_fn` is given. Priority is given by to the first!")
  
  ## 1.2 Generate the Matrix X of covariates ===================================
  # If the argument matrix_X is present, use this matrix, otherwise generate one
--- a/notebooks/two-dimensional-calculation-example.qmd
+++ b/notebooks/two-dimensional-calculation-example.qmd
@@ -0,0 +1,48 @@
+---
+title: "Two-dimensional-example"
+format: html
+editor: visual
+---
+
+# Zwei dimensionales Rechenbeispiel für die Matrix $Q_a$.
+
+Mit den Eingabedaten:
+
+- $a = 2.0$
+- $X = (-0.6264, 0.1836)^\top$
+- $n = K = 2$
+und $v \sim \mathcal{N}(0, 1)$
+
+Daraus ergibt sich eine Matrix $Q_a$ mit
+
+$$
+Q_a = \begin{pmatrix} 0.7911 & 0.2089 \\
+ 0.2089 & 0.7911 \\
+ \end{pmatrix}
+$$
+
+
+Anscheinend ist es so, dass für verschiedene Eingabewerte der Matrix $X$, es immer
+wieder zu verschiedenen, aber immer noch diagonaldominanten Einträgen kommt. 
+Wieso passiert dies?
+
+Falls allerdings $X = (0.2167549 -0.5424926)^\top$, dann ist 
+$$
+Q_a = \begin{pmatrix}
+0.2239 & 0.7761 \\
+0.7761 & 0.2239 \\
+\end{pmatrix}
+$$
+wo jetzt die Nebendiagonale dominant ist. Wenn man verschiedene Seeds ausprobiert,
+so sieht man ein Muster. Doch es gibt auch das Beispiel mit $X = (0.7667960 -0.8164583)^\top$
+und dann erhalten wir:
+$$
+Q_a = \begin{pmatrix}
+0.4802 & 0.5198 \\
+0.5198 & 0.4802 \\
+\end{pmatrix}
+$$
+
+Diese Matrix hat immer noch eine dominante Nebendiagonale, aber nicht so stark wie
+alle anderen bekannten Beispiele. Bei den anderen Berechnungen zeigte sich meistens
+ein Unterschied von $| a_{11} - a_{12}| > 0.5$.
--- a/scripts/plot_Qa_wrt_a.R
+++ b/scripts/plot_Qa_wrt_a.R
@@ -0,0 +1,83 @@
+# load local files
+source(here::here("R", "singular_values.R"))
+source(here::here("R", "graphon_distribution.R"))
+source(here::here("R","singular_value_plot.R"))
+
+# load libaries for data handling
+library(ggplot2)
+library(tidyr)
+library(dplyr)
+
+# Line plots -------------------------------------------------------------------
+# Create a grid of a‑values
+a_grid <- seq(-20, 20, length.out = 200)
+
+# function which takes only a to compute Q_c
+make_matrix <- function(a) { compute_matrix(seed=11513215L,
+                              a= a,
+                              n = 2,
+                              K = 2,
+                              sample_X_fn = function(n) {matrix(rnorm(n), ncol = 1L)},
+                              fv = function(x) {dnorm(x, mean=0, sd=1)},
+                              Fv = function(x) {pnorm(x, mean=0, sd=1)},
+                              guard = 1e-12)}
+
+# Compute the matrices and reshape to long format
+df_entries <- tibble(a = a_grid) %>%
+  mutate(
+    M = purrr::map(a, make_matrix),                     # list‑column of matrices
+    m11 = purrr::map_dbl(M, ~ .x[1, 1]),
+    m12 = purrr::map_dbl(M, ~ .x[1, 2]),
+    m21 = purrr::map_dbl(M, ~ .x[2, 1]),
+    m22 = purrr::map_dbl(M, ~ .x[2, 2])
+  ) %>%
+  select(a, m11, m12, m21, m22) %>%
+  pivot_longer(-a, names_to = "entry", values_to = "value")
+
+# Plot
+ggplot(df_entries, aes(x = a, y = value, colour = entry, linetype = entry)) +
+  geom_line(linewidth = 1) +
+  labs(
+    title = "Matrix entries as a function of the parameter `a`",
+    x = "a",
+    y = "Matrix entry value",
+    colour = "Entry"
+  ) +
+  theme_minimal()
+
+# Heat map for a single larger matrix ------------------------------------------
+# TODO Daten für 2x2 und 3x3 an Michael schicken
+# Choose a value of a
+a0 <- 2
+M0 <-  compute_matrix(seed=9L,
+                      a= a0,
+                      n = 2,
+                      K = 2,
+                      sample_X_fn = function(n) {matrix(rnorm(n), ncol = 1L)},
+                      fv = function(x) {dnorm(x, mean=0, sd=1)},
+                      Fv = function(x) {pnorm(x, mean=0, sd=1)},
+                      guard = 1e-12)
+M0
+
+# Convert to a tidy data frame for ggplot
+df_heat <- as.data.frame(M0) %>%
+  mutate(row = row_number()) %>%
+  pivot_longer(-row, names_to = "col", values_to = "value") %>%
+  mutate(
+    col = as.integer(gsub("V", "", col)),   # turn "V1","V2" into 1,2
+    row = factor(row, levels = rev(unique(row)))   # reverse y‑axis for proper orientation
+  )
+
+ggplot(df_heat, aes(x = col, y = row, fill = value)) +
+  geom_tile(colour = "grey30", size = 0.5) +
+  scale_fill_gradient2(
+    low = "steelblue", mid = "white", high = "tomato",
+    midpoint = 0.5, limits = c(min(df_heat$value), max(df_heat$value))
+  ) +
+  labs(
+    title = sprintf("Heat‑map of the matrix at a = %.2f", a0),
+    x = "Column", y = "Row", fill = "Value"
+  ) +
+  theme_minimal() +
+  theme(axis.text = element_blank(),
+        axis.ticks = element_blank())
--- a/scripts/plot_non_normal_X.qmd
+++ b/scripts/plot_non_normal_X.qmd
@@ -25,37 +25,25 @@ library(latex2exp)
 ```

 ## Setup
-We consider the matrix $QQ^\top$ and look at the smallest eigenvalue, i.e. the
-smallest non-zero singular value of $Q$.

-The matrix $Q$ is given by
-$$
-Q_{ik} = \int_{\frac{k}{K}}^{\frac{k+1}{K}} p_a(u| X_i) \, du
-$$
-with
-$$
-p_a(u|X) = \frac{f_v(F_a^{-1}(u) - a^\top X)}{f_a(F_a^{-1}(u))}
-$$
-In this document we plot different the smallest eigenvalue in dependence of the
-parameter $a$ with different "ratios" of the parameters $n$ and
-$$
-k = \lfloor n^\alpha \rfloor
-$$
+We consider the matrix $QQ^\top$ and look at the smallest eigenvalue, i.e. the smallest non-zero singular value of $Q$.

-with $\alpha = 0.1, 0.2, \dots 0.5$. The data matrix $X$ is a random matrix with
-i.i.d. distributed entries. We consider $x_{ij} \sim U[0,1]$ and $x_{ij} \sim Exp(\lambda)$.
+The matrix $Q$ is given by $$Q_{ik} = \int_{\frac{k}{K}}^{\frac{k+1}{K}} p_a(u| X_i) \, du$$ with $$p_a(u|X) = \frac{f_v(F_a^{-1}(u) - a^\top X)}{f_a(F_a^{-1}(u))}$$ In this document we plot different the smallest eigenvalue in dependence of the parameter $a$ with different "ratios" of the parameters $n$ and $$k = \lfloor n^\alpha \rfloor$$
+
+with $\alpha = 0.1, 0.2, \dots 0.5$. The data matrix $X$ is a random matrix with i.i.d. distributed entries. We consider $x_{ij} \sim U[0,1]$ and $x_{ij} \sim Exp(\lambda)$.

 ## Exponential distribution
+
 ```{r k = n^alpha data generation, rate = 1}
 #| cache: true
 #| echo: false
 #| collapse: true
-ns <- seq(100, 1000, 100)
+ns <- seq(100, 10000, 100)
 as <- seq(0, 20, 2)
 alphas <-  seq(0.1, 0.5, 0.1)

 set.seed(100)
-results <- data.frame(dim_n = integer(),
+results01 <- data.frame(dim_n = integer(),
                      dim_k = integer(),
                      param_a = double(),
                      param_alpha = double(),
@@ -79,22 +67,25 @@ for (a in as) {
      ssv <- compute_minmax_sv(Q)[["smallest_singular_value"]]
      
      current_res <- data.frame(dim_n = n, dim_k = K, param_a = a, param_alpha=alphas[j], ssv =ssv)
-      results <- rbind(results, current_res)
+      results01 <- rbind(results01, current_res)
    }
  }
 }
 ```

 ```{r k = n^alpha plotting, rate = 1}
-results |>
-  filter(param_a %in% c(0, 10, 20)) |>
+# plot the results
+results01 |>
+  filter(param_a %in% c(2, 6, 12) & param_alpha <= 0.12) |>
  mutate(param_a = as.factor(param_a),
         param_alpha = as.factor(param_alpha)) |>
  group_by(param_a, param_alpha) |>
  ggplot(aes(dim_n, ssv, col=param_a, shape=param_alpha, interaction(param_a, param_alpha))) +
  geom_point(size=1.5) +
  geom_line() +
-  geom_function(fun = function(x) {sqrt(x)}, colour="black") +
+  geom_line(aes(dim_n, sqrt(dim_n) / dim_k, shape=param_alpha), linetype = 2) +
+  geom_point(size=1.5) +
+  #geom_function(fun = function(x) {sqrt(x)}, colour="black") +
  #scale_y_log10() +
  theme_bw() +
  labs(x=latex2exp::TeX("$n$"),
@@ -105,17 +96,16 @@ results |>
       shape=latex2exp::TeX("$\\alpha$"))
 ```

-
 ```{r k = n^alpha data generation, rate = 3}
 #| cache: true
 #| echo: false
 #| collapse: true
-ns <- seq(100, 1000, 100)
+ns <- seq(100, 10000, 100)
 as <- seq(0, 20, 2)
 alphas <-  seq(0.1, 0.5, 0.1)

 set.seed(100)
-results <- data.frame(dim_n = integer(),
+results02 <- data.frame(dim_n = integer(),
                      dim_k = integer(),
                      param_a = double(),
                      param_alpha = double(),
@@ -139,22 +129,24 @@ for (a in as) {
      ssv <- compute_minmax_sv(Q)[["smallest_singular_value"]]
      
      current_res <- data.frame(dim_n = n, dim_k = K, param_a = a, param_alpha=alphas[j], ssv =ssv)
-      results <- rbind(results, current_res)
+      results02 <- rbind(results02, current_res)
    }
  }
 }
 ```

 ```{r k = n^alpha plotting, rate = 3}
-results |>
-  filter(param_a %in% c(0, 10, 20)) |>
+results02 |>
+  filter(param_a %in% c(0, 10, 20) & param_alpha < 0.12) |>
  mutate(param_a = as.factor(param_a),
         param_alpha = as.factor(param_alpha)) |>
  group_by(param_a, param_alpha) |>
  ggplot(aes(dim_n, ssv, col=param_a, shape=param_alpha, interaction(param_a, param_alpha))) +
  geom_point(size=1.5) +
  geom_line() +
-  geom_function(fun = function(x) {sqrt(x)}, colour="black") +
+  geom_line(aes(dim_n, sqrt(dim_n) / dim_k, shape=param_alpha), linetype = 2) +
+  geom_point(size=1.5) +
+  #geom_function(fun = function(x) {sqrt(x)}, colour="black") +
  #scale_y_log10() +
  theme_bw() +
  labs(x=latex2exp::TeX("$n$"),
@@ -164,18 +156,19 @@ results |>
       colour=latex2exp::TeX("$a$"),
       shape=latex2exp::TeX("$\\alpha$"))
 ```
+
 For $a = 0$ the smallest singular value is very close to zero.

 ```{r k = n^alpha data generation, rate = 5}
 #| cache: true
 #| echo: false
 #| collapse: true
-ns <- seq(100, 1000, 100)
+ns <- seq(100, 10000, 100)
 as <- seq(0, 20, 2)
 alphas <-  seq(0.1, 0.5, 0.1)

 set.seed(100)
-results <- data.frame(dim_n = integer(),
+results03 <- data.frame(dim_n = integer(),
                      dim_k = integer(),
                      param_a = double(),
                      param_alpha = double(),
@@ -199,14 +192,14 @@ for (a in as) {
      ssv <- compute_minmax_sv(Q)[["smallest_singular_value"]]
      
      current_res <- data.frame(dim_n = n, dim_k = K, param_a = a, param_alpha=alphas[j], ssv =ssv)
-      results <- rbind(results, current_res)
+      results03 <- rbind(results03, current_res)
    }
  }
 }
 ```

 ```{r k = n^alpha plotting, rate = 5}
-results |>
+results03 |>
  filter(param_a %in% c(0, 10, 20)) |>
  mutate(param_a = as.factor(param_a),
         param_alpha = as.factor(param_alpha)) |>
@@ -214,7 +207,9 @@ results |>
  ggplot(aes(dim_n, ssv, col=param_a, shape=param_alpha, interaction(param_a, param_alpha))) +
  geom_point(size=1.5) +
  geom_line() +
-  geom_function(fun = function(x) {sqrt(x)}, colour="black") +
+  geom_line(aes(dim_n, sqrt(dim_n) / dim_k, shape=param_alpha), linetype = 2) +
+  geom_point(size=1.5) +
+  #geom_function(fun = function(x) {sqrt(x)}, colour="black") +
  #scale_y_log10() +
  theme_bw() +
  labs(x=latex2exp::TeX("$n$"),
@@ -224,21 +219,21 @@ results |>
       colour=latex2exp::TeX("$a$"),
       shape=latex2exp::TeX("$\\alpha$"))
 ```
-Why is here a perfect match for $\alpha = 0.1$ and $a = 20$ to the square function?
-The difference is of the order of $10^{-11}$!

+Why is here a perfect match for $\alpha = 0.1$ and $a = 20$ to the square function? The difference is of the order of $10^{-11}$!

 ## Uniform distribution
+
 ```{r k = n^alpha data generation, U[0,1]}
 #| cache: true
 #| echo: false
 #| collapse: true
-ns <- seq(100, 1000, 100)
+ns <- seq(100, 10000, 100)
 as <- seq(0, 20, 2)
 alphas <-  seq(0.1, 0.5, 0.1)

 set.seed(100)
-results <- data.frame(dim_n = integer(),
+results04 <- data.frame(dim_n = integer(),
                      dim_k = integer(),
                      param_a = double(),
                      param_alpha = double(),
@@ -262,22 +257,24 @@ for (a in as) {
      ssv <- compute_minmax_sv(Q)[["smallest_singular_value"]]
      
      current_res <- data.frame(dim_n = n, dim_k = K, param_a = a, param_alpha=alphas[j], ssv =ssv)
-      results <- rbind(results, current_res)
+      results04 <- rbind(results04, current_res)
    }
  }
 }
 ```

 ```{r k = n^alpha plotting, U[0,1]}
-results |>
-  filter(param_a %in% c(0, 10, 20)) |>
+results04 |>
+  filter(param_a %in% c(2, 10, 20) & param_alpha < 0.22 & param_alpha > 0.12) |>
  mutate(param_a = as.factor(param_a),
         param_alpha = as.factor(param_alpha)) |>
  group_by(param_a, param_alpha) |>
  ggplot(aes(dim_n, ssv, col=param_a, shape=param_alpha, interaction(param_a, param_alpha))) +
  geom_point(size=1.5) +
  geom_line() +
-  geom_function(fun = function(x) {sqrt(x)}, colour="black") +
+  geom_line(aes(dim_n, sqrt(dim_n) / dim_k, shape=param_alpha), linetype = 2) +
+  geom_point(size=1.5) +
+  #geom_function(fun = function(x) {sqrt(x)}, colour="black") +
  #scale_y_log10() +
  theme_bw() +
  labs(x=latex2exp::TeX("$n$"),
@@ -287,19 +284,19 @@ results |>
       colour=latex2exp::TeX("$a$"),
       shape=latex2exp::TeX("$\\alpha$"))
 ```
-Here we have the same effect for $\alpha = 0.1$ and $a = 20$.

+Here we have the same effect for $\alpha = 0.1$ and $a = 20$.

 ```{r k = n^alpha data generation, U[0,2]}
 #| cache: true
 #| echo: false
 #| collapse: true
-ns <- seq(100, 1000, 100)
+ns <- seq(100, 10000, 100)
 as <- seq(0, 20, 2)
 alphas <-  seq(0.1, 0.5, 0.1)

 set.seed(100)
-results <- data.frame(dim_n = integer(),
+results05 <- data.frame(dim_n = integer(),
                      dim_k = integer(),
                      param_a = double(),
                      param_alpha = double(),
@@ -323,14 +320,14 @@ for (a in as) {
      ssv <- compute_minmax_sv(Q)[["smallest_singular_value"]]
      
      current_res <- data.frame(dim_n = n, dim_k = K, param_a = a, param_alpha=alphas[j], ssv =ssv)
-      results <- rbind(results, current_res)
+      results05 <- rbind(results05, current_res)
    }
  }
 }
 ```

 ```{r k = n^alpha plotting, U[0,2]}
-results |>
+results05 |>
  filter(param_a %in% c(0, 10, 20)) |>
  mutate(param_a = as.factor(param_a),
         param_alpha = as.factor(param_alpha)) |>
@@ -338,7 +335,9 @@ results |>
  ggplot(aes(dim_n, ssv, col=param_a, shape=param_alpha, interaction(param_a, param_alpha))) +
  geom_point(size=1.5) +
  geom_line() +
-  geom_function(fun = function(x) {sqrt(x)}, colour="black") +
+  geom_line(aes(dim_n, sqrt(dim_n) / dim_k, shape=param_alpha), linetype = 2) +
+  geom_point(size=1.5) +
+  #geom_function(fun = function(x) {sqrt(x)}, colour="black") +
  #scale_y_log10() +
  theme_bw() +
  labs(x=latex2exp::TeX("$n$"),
@@ -348,3 +347,155 @@ results |>
       colour=latex2exp::TeX("$a$"),
       shape=latex2exp::TeX("$\\alpha$"))
 ```
+
+```{r k = n^alpha data generation, N(0,1)}
+#| cache: true
+#| echo: false
+#| collapse: true
+ns <- seq(100, 10000, 100)
+as <- seq(0, 20, 2)
+alphas <-  seq(0.1, 0.5, 0.1)
+
+set.seed(100)
+results06 <- data.frame(dim_n = integer(),
+                      dim_k = integer(),
+                      param_a = double(),
+                      param_alpha = double(),
+                      ssv = double())
+for (a in as) {
+  for (i in 1:length(ns)) {
+    for (j in 1:length(alphas)) {
+      n <- ns[i]
+      # HERE WE USE THE CEILING AND NOT FLOOR!
+      K <- ceiling(n^alphas[j])
+      if (!K > 0) next # skip if K is equal to zero
+      # use the default seed 1L
+      Q <- compute_matrix(seed=1L,
+                     a= a,
+                     n = n,
+                     K = K,
+                     sample_X_fn = function(n) {matrix(rnorm(n), ncol = 1L)},
+                     fv = function(x) {dnorm(x, mean=0, sd=1)},
+                     Fv = function(x) {pnorm(x, mean=0, sd=1)},
+                     guard = 1e-12)
+      
+      ssv <- compute_minmax_sv(Q)[["smallest_singular_value"]]
+      
+      current_res <- data.frame(dim_n = n, dim_k = K, param_a = a, param_alpha=alphas[j], ssv =ssv)
+      results06 <- rbind(results06, current_res)
+    }
+  }
+}
+```
+
+```{r k = n^alpha plotting, U[0,2]}
+results06 |>
+  filter(param_a %in% c(0, 10, 20)) |>
+  mutate(param_a = as.factor(param_a),
+         param_alpha = as.factor(param_alpha)) |>
+  group_by(param_a, param_alpha) |>
+  ggplot(aes(dim_n, ssv * dim_k, col=param_a, shape=param_alpha, interaction(param_a, param_alpha))) +
+  geom_point(size=1.5) +
+  geom_line() +
+  geom_line(aes(dim_n, sqrt(dim_n) / dim_k, shape=param_alpha), linetype = 2) +
+  geom_point(size=1.5) +
+  #geom_function(fun = function(x) {x^(0.5)}, colour="black") +
+  #scale_y_log10() +
+  theme_bw() +
+  labs(x=latex2exp::TeX("$n$"),
+       y=latex2exp::TeX("Smallest singular value of $Q$"),
+       title=latex2exp::TeX("Smallest singular value of $Q$ with respect to $a$."),
+       subtitle = latex2exp::TeX(("Hyperparameter $k = n^{\\alpha}$. Black line is $\\sqrt{n}$, and $X \\sim N(0,1) $, use ceil function instead of floor for rounding.")),
+       colour=latex2exp::TeX("$a$"),
+       shape=latex2exp::TeX("$\\alpha$"))
+```
+
+```{r k = n^alpha plotting, U[0,2]}
+results06 |>
+  filter(param_a %in% c(0, 10, 20)) |>
+  mutate(param_a = as.factor(param_a),
+         param_alpha = as.factor(param_alpha)) |>
+  group_by(param_a, param_alpha) |>
+  ggplot(aes(dim_n, ssv / sqrt(dim_n) * dim_k, col=param_a, shape=param_alpha, interaction(param_a, param_alpha))) +
+  geom_point(size=1.5) +
+  geom_line() +
+ # geom_function(fun = function(x) {x^(0.5)}, colour="black") +
+  #scale_y_log10() +
+  theme_bw() +
+  labs(x=latex2exp::TeX("$n$"),
+       y=latex2exp::TeX("Smallest singular value of $Q$ / sqrt(n)"),
+       title=latex2exp::TeX("Smallest singular value of $Q$ with respect to $a$."),
+       subtitle = latex2exp::TeX(("Hyperparameter $k = n^{\\alpha}$. Black line is $\\sqrt{n}$, and $X \\sim N(0,1) $")),
+       colour=latex2exp::TeX("$a$"),
+       shape=latex2exp::TeX("$\\alpha$"))
+```
+
+```{r}
+results <- list(results01, results02, results03, results04, results05, results06)
+save(results, file="results.RData")
+```
+
+## Two dimensional example
+
+```{r k = n^alpha data generation with two dimensions, rate = 1}
+#| cache: true
+#| echo: false
+#| collapse: true
+ns <- seq(100, 1000, 100)
+a <- c(1, 1) / sqrt(2)
+a_norms <- seq(0, 20, 2)
+alphas <-  seq(0.1, 0.5, 0.1)
+
+set.seed(100)
+results07 <- data.frame(dim_n = integer(),
+                      dim_k = integer(),
+                      param_a = double(),
+                      param_alpha = double(),
+                      ssv = double())
+for (a_norm in a_norms) {
+  for (i in 1:length(ns)) {
+    for (j in 1:length(alphas)) {
+      n <- ns[i]
+      K <- floor(n^alphas[j])
+      if (!K > 0) next # skip if K is equal to zero
+      # use the default seed 1L
+      Q <- compute_matrix(seed=1L,
+                     a= a_norm * a,
+                     n = n,
+                     K = K,
+                     sample_X_fn = function(n) {matrix(rexp(2 * n), ncol = 2L)},
+                     fv = function(x) {dnorm(x, mean=0, sd=1)},
+                     Fv = function(x) {pnorm(x, mean=0, sd=1)},
+                     guard = 1e-12)
+      
+      ssv <- compute_minmax_sv(Q)[["smallest_singular_value"]]
+      
+      current_res <- data.frame(dim_n = n, dim_k = K, param_a = a_norm, param_alpha=alphas[j], ssv =ssv)
+      results07 <- rbind(results07, current_res)
+    }
+  }
+}
+```
+
+```{r k = n^alpha plotting, rate = 1}
+# plot the results
+results07 |>
+  filter(param_a %in% c(10, 20) & param_alpha < 0.12) |>
+  mutate(param_a = as.factor(param_a),
+         param_alpha = as.factor(param_alpha)) |>
+  group_by(param_a, param_alpha) |>
+  ggplot(aes(dim_n, ssv, col=param_a, shape=param_alpha, interaction(param_a, param_alpha))) +
+  geom_point(size=1.5) +
+  geom_line() +
+  geom_line(aes(dim_n, sqrt(dim_n) / dim_k, shape=param_alpha), linetype = 2) +
+  geom_point(size=1.5) +
+  #geom_function(fun = function(x) {sqrt(x)}, colour="black") +
+  #scale_y_log10() +
+  theme_bw() +
+  labs(x=latex2exp::TeX("$n$"),
+       y=latex2exp::TeX("Smallest singular value of $Q$"),
+       title=latex2exp::TeX("Smallest singular value of $Q$ with respect to $a$."),
+       subtitle = latex2exp::TeX(("Hyperparameter $k = n^{\\alpha}$. Black line is $\\sqrt{n}$, and $X \\sim Exp(1)$")),
+       colour=latex2exp::TeX("$a$"),
+       shape=latex2exp::TeX("$\\alpha$"))
+```
--- a/scripts/results.RData
+++ b/scripts/results.RData
Author	SHA1	Message	Date
Niclas	9d9d274487	experiments with rho_n	2026-06-17 10:43:33 +02:00
Niclas	d0e0c03428	add first sketch for estimators	2026-05-20 17:03:45 +02:00
Niclas	37f235915c	add example for calculation	2026-05-20 17:02:54 +02:00
Niclas	25fe0903be	adjust plots	2026-05-20 17:02:33 +02:00
Niclas	dcb1468381	add documentation and parameter checks to the build network function	2026-05-20 17:00:33 +02:00
Niclas	106c37a1b6	add heatmap for visualization	2026-05-06 17:33:46 +02:00
Niclas	d13eee49ea	add visualization of the matrix Q wrt parameter a	2026-05-06 17:18:35 +02:00
Niclas	5acc3f4259	add function for adjacency matrix	2026-05-06 16:52:32 +02:00
Niclas	0921f7eb04	plots with n = 10k	2026-04-28 13:19:46 +02:00
Niclas	9b702d154a	first version of building the network	2026-04-01 14:35:51 +02:00
Niclas	fe36738ea1	adjusted non normal plots	2026-04-01 14:35:29 +02:00