initial commit

2026-01-16 19:31:08 +01:00
parent 3415358509
commit dd3c3e4b81
9 changed files with 2174 additions and 0 deletions
--- a/R/singular_values.R
+++ b/R/singular_values.R
@@ -0,0 +1,192 @@
+# TODO improve later using the here package
+source("R/graphon_distribution.R")
+
+# 1. Compute the Matrix Q according to (3.1) -----------------------------------
+#' Compute the matrix **Q**
+#'
+#' This helper builds the matrix \eqn{Q} that appears in equation (3.1) of the
+#' graphon‑based model.  The routine performs the following steps:
+#'   1. Checks that all inputs are of the correct type and dimension.  
+#'   2. Generates a covariate matrix \eqn{X} of size \eqn{n \times p} using the
+#'      user‑supplied sampling function `sample_X_fn`.  The random seed is
+#'      handled locally with **`withr::with_seed()`** so the global RNG state is
+#'      left untouched.  
+#'   3. Constructs the empirical conditional density of the latent variable
+#'      \eqn{v} via `create_cond_density()`.  
+#'   4. Computes the graphon quantiles \eqn{q_k} for a regular grid
+#'      \eqn{k = 0,1/K,\dots,1}` using the wrapper `qgraphon()`.  
+#'      Each q_k is the quantile of k / K for k = 0, ..., K.
+#'   5. Forms the matrix \eqn{Q} whose \eqn{(k,i)}‑th entry is  
+#'      \deqn{F_v\!\bigl(q_{k+1} - a^\top X_i\bigr) -
+#'            F_v\!\bigl(q_{k}   - a^\top X_i\bigr)},
+#'      where \eqn{F_v} is the CDF of the latent variable \eqn{v}.  The
+#'      construction is fully vectorised: the outer difference is built with
+#'      `outer()`, the CDF is applied to the whole matrix at once, and the
+#'      row‑wise differences are taken with `diff()`.  
+#'
+#' @param seed   Integer (or numeric) of length 1.  Seed used to initialize the
+#'               random number generator for reproducible sampling of `X`.
+#' @param a      Numeric vector of length \eqn{p}.  Coefficient vector that
+#'               multiplies the covariates; its length must equal the number of
+#'               columns of `X_matrix`.
+#' @param n      Positive integer.  Number of i.i.d. covariate draws to be
+#'               generated.
+#' @param K      Positive integer.  Number of divisions of the unit interval;
+#'               the resulting grid has length `K+1`.
+#' @param sample_X_fn
+#'               Function with a single argument `n`.  It must return an
+#'               \eqn{n \times p} matrix (or an object coercible to a matrix) of
+#'               covariate samples.
+#' @param fv     Density function of the latent variable \eqn{v}.  Must be
+#'               vectorised (i.e. accept a numeric vector and return a numeric
+#'               vector of the same length).  Typical examples are
+#'               `dnorm`, `dexp`, ….
+#' @param Fv     Cumulative distribution function of the latent variable
+#'               \eqn{v}.  Also has to be vectorised.  Typical examples are
+#'               `pnorm`, `pexp`, ….
+#'
+#' @return A numeric matrix **Q** of dimension `K × n`.  The \eqn{j}-th row
+#'         (for `j = 1,…,K`) contains the increments of the CDF evaluated at
+#'         the graphon quantiles for each observation `i = 1,…,n`.  The matrix
+#'         can be fed directly into downstream singular‑value or spectral
+#'         analyses.
+#'
+#' @details
+#' * **Input validation** – The function stops with an informative error if any
+#'   argument does not meet the required type or dimension constraints.
+#' * **Reproducible sampling** – `withr::with_seed()` guarantees that the seed
+#'   is restored after the call, leaving the user's global RNG untouched.
+#' * **Vectorised construction of Q** –  
+#'   \preformatted{
+#'   adotX   <- as.vector(X %*% a)                     # n‑vector
+#'   cdf_mat <- Fv(outer(graphon_quantiles, adotX, "-")) # (K+1) × n
+#'   Q       <- diff(cdf_mat, lag = 1)                 # K × n
+#'   }
+#'   This replaces the double `for`‑loop in the original implementation and
+#'   yields a speed‑up of roughly an order of magnitude for moderate‑size
+#'   problems.
+#'
+#' @seealso
+#'   \code{\link{create_cond_density}} for the construction of the empirical
+#'   conditional density,  
+#'   \code{\link{qgraphon}} for the quantile computation,  
+#'   \code{\link{compute_extreme_singular_values}} for a wrapper that also
+#'   returns the smallest and largest singular values of `Q`.
+#'
+#' @examples
+#' ## Simple reproducible example ------------------------------------------------
+#' set.seed(123)
+#' ## a sampling function that returns a 2‑dimensional covariate matrix
+#' sample_X <- function(m) matrix(rnorm(m * 2), ncol = 2)
+#'
+#' ## Compute Q for a normal latent variable
+#' Q <- compute_matrix(
+#'   seed       = 42,
+#'   a          = c(1, -0.5),
+#'   n          = 200,
+#'   K          = 100,
+#'   sample_X_fn = sample_X,
+#'   fv         = dnorm,
+#'   Fv         = pnorm
+#' )
+#' dim(Q)               # should be 100 × 200
+#' head(Q)              # a glimpse at the first few rows
+#'
+#' @export
+compute_matrix <- function(
+    seed,
+    a,
+    n,
+    K,
+    sample_X_fn,
+    fv,
+    Fv
+) {
+  ## 1.1 Check inputs ==========================================================
+  if (!is.numeric(seed) || length(seed) != 1) stop("'seed' must be a single number")
+  if (!is.numeric(a) || !is.vector(a)) stop("'a' must be a numeric vector")
+  if (!is.numeric(n) || length(n) != 1 || n <= 0) stop("'n' must be a positive integer")
+  if (!is.numeric(K) || length(K) != 1 || K <= 0) stop("'K' must be a positive integer")
+  if (!is.function(sample_X_fn)) stop("'sample_X_fn' must be a function")
+  if (!is.function(fv)) stop("'f_v' must be a function")
+  if (!is.function(Fv)) stop("'F_v' must be a function")
+  
+  ## 1.2 Generate the Matrix X of covariates ===================================
+  # The withr environment allows us to capsulate the global state like the seed
+  # and enables a better reproduction
+  X <- withr::with_seed(seed, {
+    as.matrix(sample_X_fn(n))
+  })
+  if (nrow(X) != n) stop("`sample_X_fn` must return exactly `n` rows")
+  if (ncol(X) != length(a)) {
+    stop("Number of columns of X (", ncol(X), ") must equal length(a) (", length(a), ")")
+  }
+  
+  ## 1.3 Create conditional density ============================================
+  empir_cond_density <- create_cond_density(a, fv, Fv, X)
+  
+  ## 1.4 Compute the graphon quantiles =========================================
+  k <- seq(0, K) / K
+  graphon_quantiles <- qgraphon(k, a = a, Fv = Fv, X_matrix = X)
+  
+  ## 1.5 Build the matrix Q ====================================================
+  inner_products = as.vector(X %*% a)
+  
+  # outer(y, x, "-")  gives a matrix with entry (j,i) = y[j] - x[i]
+  #   then we apply the CDF `F_v` to the whole matrix at once.
+  #   finally we take the difference of successive rows (j) to obtain the
+  #   increments required by equation (3.1).
+  cdf_mat <- Fv(outer(graphon_quantiles, inner_products, "-")) # (K +1) x n matrix
+  Q <- diff(cdf_mat, lag=1)                           # operates along rows
+  
+  Q
+}
+
+# 2. Compute largest and smallest singular value -------------------------------
+# TODO: Possible improvements include
+# - use the package RSpectra for large dimensions
+# - make a switch which value to choose
+# - Include cards for very small singular values
+
+#' Compute the largest and smallest singular values of a matrix
+#'
+#' This helper extracts the extreme singular values of a numeric matrix.
+#' It uses the base‑R \code{svd()} routine (with the singular vectors
+#' suppressed) and returns a list containing the largest singular value and
+#' the smallest (non‑zero) singular value.
+#'
+#' @param M A numeric matrix (or an object coercible to a matrix).  The
+#'   singular values are computed for this matrix.
+#'
+#' @return A **list** with two components
+#' \describe{
+#'   \item{largest_singular_value}{The maximum singular value of \code{M}.}
+#'   \item{smallest_singular_value}{The minimum singular value of \code{M}
+#'         that is greater than zero.  If all singular values are zero,
+#'         the function returns \code{0}.}
+#' }
+#'
+#' @details
+#' The singular value decomposition is performed with \code{svd(M, nu = 0,
+#' nv = 0)} so that only the singular values (\code{$d}) are computed; this
+#' saves memory because the left and right singular vectors are not needed.
+#'
+#' @examples
+#' ## Small random matrix ----------------------------------------------------
+#' set.seed(123)
+#' A <- matrix(rnorm(20), nrow = 4)
+#' compute_minmax_sv(A)
+#'
+#' ## Rank‑deficient matrix (all singular values are zero) --------------------
+#' Z <- matrix(0, nrow = 3, ncol = 3)
+#' compute_minmax_sv(Z)
+#'
+#' @export
+compute_minmax_sv <- function(M) {
+  s <- svd(M, nu=0, nv=0)$d
+
+  list(
+    largest_singular_value  = max(s),
+    smallest_singular_value = min(s) # smallest non zero singular value
+  )
+}