Files
GraphonSimulation/R/singular_values.R
2026-01-26 11:56:37 +01:00

201 lines
8.8 KiB
R
Raw Permalink Blame History

This file contains invisible Unicode characters
This file contains invisible Unicode characters that are indistinguishable to humans but may be processed differently by a computer. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
# TODO improve later using the here package
source("R/graphon_distribution.R")
# 1. Compute the Matrix Q according to (3.1) -----------------------------------
#' Compute the matrix **Q**
#'
#' This helper builds the matrix \eqn{Q} that appears in equation(3.1) of the
#' graphonbased model. The routine performs the following steps:
#' 1. Checks that all inputs are of the correct type and dimension.
#' 2. Generates a covariate matrix \eqn{X} of size \eqn{n \times p} using the
#' usersupplied sampling function `sample_X_fn`. The random seed is
#' handled locally with **`withr::with_seed()`** so the global RNG state is
#' left untouched.
#' 3. Constructs the empirical conditional density of the latent variable
#' \eqn{v} via `create_cond_density()`.
#' 4. Computes the graphon quantiles \eqn{q_k} for a regular grid
#' \eqn{k = 0,1/K,\dots,1}` using the wrapper `qgraphon()`.
#' Each q_k is the quantile of k / K for k = 0, ..., K.
#' 5. Forms the matrix \eqn{Q} whose \eqn{(k,i)}th entry is
#' \deqn{F_v\!\bigl(q_{k+1} - a^\top X_i\bigr) -
#' F_v\!\bigl(q_{k} - a^\top X_i\bigr)},
#' where \eqn{F_v} is the CDF of the latent variable \eqn{v}. The
#' construction is fully vectorised: the outer difference is built with
#' `outer()`, the CDF is applied to the whole matrix at once, and the
#' rowwise differences are taken with `diff()`.
#'
#' @param seed Integer (or numeric) of length 1. Seed used to initialize the
#' random number generator for reproducible sampling of `X`.
#' @param a Numeric vector of length \eqn{p}. Coefficient vector that
#' multiplies the covariates; its length must equal the number of
#' columns of `X_matrix`.
#' @param n Positive integer. Number of i.i.d. covariate draws to be
#' generated.
#' @param K Positive integer. Number of divisions of the unit interval;
#' the resulting grid has length `K+1`.
#' @param sample_X_fn
#' Function with a single argument `n`. It must return an
#' \eqn{n \times p} matrix (or an object coercible to a matrix) of
#' covariate samples.
#' @param fv Density function of the latent variable \eqn{v}. Must be
#' vectorised (i.e. accept a numeric vector and return a numeric
#' vector of the same length). Typical examples are
#' `dnorm`, `dexp`, ….
#' @param Fv Cumulative distribution function of the latent variable
#' \eqn{v}. Also has to be vectorised. Typical examples are
#' `pnorm`, `pexp`, ….
#' @param guard Positive numeric guard value. Default is `sqrt(.Machine$double.eps)`,
#' which is about `1.5e8` on most platforms small enough to be negligible
#' for most computations. If it is null, then it is not used.
#' The guard is used for the value k = 0, which can cause arithmetic errors.
#'
#' @return A numeric matrix **Q** of dimension `K × n`. The \eqn{j}-th row
#' (for `j = 1,…,K`) contains the increments of the CDF evaluated at
#' the graphon quantiles for each observation `i = 1,…,n`. The matrix
#' can be fed directly into downstream singularvalue or spectral
#' analyses.
#'
#' @details
#' * **Input validation** The function stops with an informative error if any
#' argument does not meet the required type or dimension constraints.
#' * **Reproducible sampling** `withr::with_seed()` guarantees that the seed
#' is restored after the call, leaving the user's global RNG untouched.
#' * **Vectorised construction of Q**
#' \preformatted{
#' adotX <- as.vector(X %*% a) # nvector
#' cdf_mat <- Fv(outer(graphon_quantiles, adotX, "-")) # (K+1) × n
#' Q <- diff(cdf_mat, lag = 1) # K × n
#' }
#' This replaces the double `for`loop in the original implementation and
#' yields a speedup of roughly an order of magnitude for moderatesize
#' problems.
#'
#' @seealso
#' \code{\link{create_cond_density}} for the construction of the empirical
#' conditional density,
#' \code{\link{qgraphon}} for the quantile computation,
#' \code{\link{compute_extreme_singular_values}} for a wrapper that also
#' returns the smallest and largest singular values of `Q`.
#'
#' @examples
#' ## Simple reproducible example ------------------------------------------------
#' set.seed(123)
#' ## a sampling function that returns a 2dimensional covariate matrix
#' sample_X <- function(m) matrix(rnorm(m * 2), ncol = 2)
#'
#' ## Compute Q for a normal latent variable
#' Q <- compute_matrix(
#' seed = 42,
#' a = c(1, -0.5),
#' n = 200,
#' K = 100,
#' sample_X_fn = sample_X,
#' fv = dnorm,
#' Fv = pnorm
#' )
#' dim(Q) # should be 100 × 200
#' head(Q) # a glimpse at the first few rows
#'
#' @export
compute_matrix <- function(
seed,
a,
n,
K,
sample_X_fn,
fv,
Fv,
guard = sqrt(.Machine$double.eps)
) {
## 1.1 Check inputs ==========================================================
if (!is.numeric(seed) || length(seed) != 1) stop("'seed' must be a single number")
if (!is.numeric(a) || !is.vector(a)) stop("'a' must be a numeric vector")
if (!is.numeric(n) || length(n) != 1 || n <= 0) stop("'n' must be a positive integer")
if (!is.numeric(K) || length(K) != 1 || K <= 0) stop("'K' must be a positive integer")
if (!is.function(sample_X_fn)) stop("'sample_X_fn' must be a function")
if (!is.function(fv)) stop("'f_v' must be a function")
if (!is.function(Fv)) stop("'F_v' must be a function")
## 1.2 Generate the Matrix X of covariates ===================================
# The withr environment allows us to capsulate the global state like the seed
# and enables a better reproduction
X <- withr::with_seed(seed, {
as.matrix(sample_X_fn(n))
})
if (nrow(X) != n) stop("`sample_X_fn` must return exactly `n` rows")
if (ncol(X) != length(a)) {
stop("Number of columns of X (", ncol(X), ") must equal length(a) (", length(a), ")")
}
## 1.3 Create conditional density ============================================
empir_cond_density <- create_cond_density(a, fv, Fv, X)
## 1.4 Compute the graphon quantiles =========================================
k <- seq(0, K) / K
if (!is.null(guard)) {
k[1] <- guard
}
graphon_quantiles <- qgraphon(k, a = a, Fv = Fv, X_matrix = X)
## 1.5 Build the matrix Q ====================================================
inner_products = as.vector(X %*% a)
# outer(y, x, "-") gives a matrix with entry (j,i) = y[j] - x[i]
# then we apply the CDF `F_v` to the whole matrix at once.
# finally we take the difference of successive rows (j) to obtain the
# increments required by equation (3.1).
cdf_mat <- Fv(outer(graphon_quantiles, inner_products, "-")) # (K +1) x n matrix
Q <- diff(cdf_mat, lag=1) # operates along rows
Q
}
# 2. Compute largest and smallest singular value -------------------------------
# TODO: Possible improvements include
# - use the package RSpectra for large dimensions
# - make a switch which value to choose
# - Include cards for very small singular values
#' Compute the largest and smallest singular values of a matrix
#'
#' This helper extracts the extreme singular values of a numeric matrix.
#' It uses the baseR \code{svd()} routine (with the singular vectors
#' suppressed) and returns a list containing the largest singular value and
#' the smallest (nonzero) singular value.
#'
#' @param M A numeric matrix (or an object coercible to a matrix). The
#' singular values are computed for this matrix.
#'
#' @return A **list** with two components
#' \describe{
#' \item{largest_singular_value}{The maximum singular value of \code{M}.}
#' \item{smallest_singular_value}{The minimum singular value of \code{M}
#' that is greater than zero. If all singular values are zero,
#' the function returns \code{0}.}
#' }
#'
#' @details
#' The singular value decomposition is performed with \code{svd(M, nu = 0,
#' nv = 0)} so that only the singular values (\code{$d}) are computed; this
#' saves memory because the left and right singular vectors are not needed.
#'
#' @examples
#' ## Small random matrix ----------------------------------------------------
#' set.seed(123)
#' A <- matrix(rnorm(20), nrow = 4)
#' compute_minmax_sv(A)
#'
#' ## Rankdeficient matrix (all singular values are zero) --------------------
#' Z <- matrix(0, nrow = 3, ncol = 3)
#' compute_minmax_sv(Z)
#'
#' @export
compute_minmax_sv <- function(M) {
s <- svd(M, nu=0, nv=0)$d
list(
largest_singular_value = max(s),
smallest_singular_value = min(s) # smallest non zero singular value
)
}