initial commit
This commit is contained in:
192
R/singular_values.R
Normal file
192
R/singular_values.R
Normal file
@@ -0,0 +1,192 @@
|
||||
# TODO improve later using the here package
|
||||
source("R/graphon_distribution.R")
|
||||
|
||||
# 1. Compute the Matrix Q according to (3.1) -----------------------------------
|
||||
#' Compute the matrix **Q**
|
||||
#'
|
||||
#' This helper builds the matrix \eqn{Q} that appears in equation (3.1) of the
|
||||
#' graphon‑based model. The routine performs the following steps:
|
||||
#' 1. Checks that all inputs are of the correct type and dimension.
|
||||
#' 2. Generates a covariate matrix \eqn{X} of size \eqn{n \times p} using the
|
||||
#' user‑supplied sampling function `sample_X_fn`. The random seed is
|
||||
#' handled locally with **`withr::with_seed()`** so the global RNG state is
|
||||
#' left untouched.
|
||||
#' 3. Constructs the empirical conditional density of the latent variable
|
||||
#' \eqn{v} via `create_cond_density()`.
|
||||
#' 4. Computes the graphon quantiles \eqn{q_k} for a regular grid
|
||||
#' \eqn{k = 0,1/K,\dots,1}` using the wrapper `qgraphon()`.
|
||||
#' Each q_k is the quantile of k / K for k = 0, ..., K.
|
||||
#' 5. Forms the matrix \eqn{Q} whose \eqn{(k,i)}‑th entry is
|
||||
#' \deqn{F_v\!\bigl(q_{k+1} - a^\top X_i\bigr) -
|
||||
#' F_v\!\bigl(q_{k} - a^\top X_i\bigr)},
|
||||
#' where \eqn{F_v} is the CDF of the latent variable \eqn{v}. The
|
||||
#' construction is fully vectorised: the outer difference is built with
|
||||
#' `outer()`, the CDF is applied to the whole matrix at once, and the
|
||||
#' row‑wise differences are taken with `diff()`.
|
||||
#'
|
||||
#' @param seed Integer (or numeric) of length 1. Seed used to initialize the
|
||||
#' random number generator for reproducible sampling of `X`.
|
||||
#' @param a Numeric vector of length \eqn{p}. Coefficient vector that
|
||||
#' multiplies the covariates; its length must equal the number of
|
||||
#' columns of `X_matrix`.
|
||||
#' @param n Positive integer. Number of i.i.d. covariate draws to be
|
||||
#' generated.
|
||||
#' @param K Positive integer. Number of divisions of the unit interval;
|
||||
#' the resulting grid has length `K+1`.
|
||||
#' @param sample_X_fn
|
||||
#' Function with a single argument `n`. It must return an
|
||||
#' \eqn{n \times p} matrix (or an object coercible to a matrix) of
|
||||
#' covariate samples.
|
||||
#' @param fv Density function of the latent variable \eqn{v}. Must be
|
||||
#' vectorised (i.e. accept a numeric vector and return a numeric
|
||||
#' vector of the same length). Typical examples are
|
||||
#' `dnorm`, `dexp`, ….
|
||||
#' @param Fv Cumulative distribution function of the latent variable
|
||||
#' \eqn{v}. Also has to be vectorised. Typical examples are
|
||||
#' `pnorm`, `pexp`, ….
|
||||
#'
|
||||
#' @return A numeric matrix **Q** of dimension `K × n`. The \eqn{j}-th row
|
||||
#' (for `j = 1,…,K`) contains the increments of the CDF evaluated at
|
||||
#' the graphon quantiles for each observation `i = 1,…,n`. The matrix
|
||||
#' can be fed directly into downstream singular‑value or spectral
|
||||
#' analyses.
|
||||
#'
|
||||
#' @details
|
||||
#' * **Input validation** – The function stops with an informative error if any
|
||||
#' argument does not meet the required type or dimension constraints.
|
||||
#' * **Reproducible sampling** – `withr::with_seed()` guarantees that the seed
|
||||
#' is restored after the call, leaving the user's global RNG untouched.
|
||||
#' * **Vectorised construction of Q** –
|
||||
#' \preformatted{
|
||||
#' adotX <- as.vector(X %*% a) # n‑vector
|
||||
#' cdf_mat <- Fv(outer(graphon_quantiles, adotX, "-")) # (K+1) × n
|
||||
#' Q <- diff(cdf_mat, lag = 1) # K × n
|
||||
#' }
|
||||
#' This replaces the double `for`‑loop in the original implementation and
|
||||
#' yields a speed‑up of roughly an order of magnitude for moderate‑size
|
||||
#' problems.
|
||||
#'
|
||||
#' @seealso
|
||||
#' \code{\link{create_cond_density}} for the construction of the empirical
|
||||
#' conditional density,
|
||||
#' \code{\link{qgraphon}} for the quantile computation,
|
||||
#' \code{\link{compute_extreme_singular_values}} for a wrapper that also
|
||||
#' returns the smallest and largest singular values of `Q`.
|
||||
#'
|
||||
#' @examples
|
||||
#' ## Simple reproducible example ------------------------------------------------
|
||||
#' set.seed(123)
|
||||
#' ## a sampling function that returns a 2‑dimensional covariate matrix
|
||||
#' sample_X <- function(m) matrix(rnorm(m * 2), ncol = 2)
|
||||
#'
|
||||
#' ## Compute Q for a normal latent variable
|
||||
#' Q <- compute_matrix(
|
||||
#' seed = 42,
|
||||
#' a = c(1, -0.5),
|
||||
#' n = 200,
|
||||
#' K = 100,
|
||||
#' sample_X_fn = sample_X,
|
||||
#' fv = dnorm,
|
||||
#' Fv = pnorm
|
||||
#' )
|
||||
#' dim(Q) # should be 100 × 200
|
||||
#' head(Q) # a glimpse at the first few rows
|
||||
#'
|
||||
#' @export
|
||||
compute_matrix <- function(
|
||||
seed,
|
||||
a,
|
||||
n,
|
||||
K,
|
||||
sample_X_fn,
|
||||
fv,
|
||||
Fv
|
||||
) {
|
||||
## 1.1 Check inputs ==========================================================
|
||||
if (!is.numeric(seed) || length(seed) != 1) stop("'seed' must be a single number")
|
||||
if (!is.numeric(a) || !is.vector(a)) stop("'a' must be a numeric vector")
|
||||
if (!is.numeric(n) || length(n) != 1 || n <= 0) stop("'n' must be a positive integer")
|
||||
if (!is.numeric(K) || length(K) != 1 || K <= 0) stop("'K' must be a positive integer")
|
||||
if (!is.function(sample_X_fn)) stop("'sample_X_fn' must be a function")
|
||||
if (!is.function(fv)) stop("'f_v' must be a function")
|
||||
if (!is.function(Fv)) stop("'F_v' must be a function")
|
||||
|
||||
## 1.2 Generate the Matrix X of covariates ===================================
|
||||
# The withr environment allows us to capsulate the global state like the seed
|
||||
# and enables a better reproduction
|
||||
X <- withr::with_seed(seed, {
|
||||
as.matrix(sample_X_fn(n))
|
||||
})
|
||||
if (nrow(X) != n) stop("`sample_X_fn` must return exactly `n` rows")
|
||||
if (ncol(X) != length(a)) {
|
||||
stop("Number of columns of X (", ncol(X), ") must equal length(a) (", length(a), ")")
|
||||
}
|
||||
|
||||
## 1.3 Create conditional density ============================================
|
||||
empir_cond_density <- create_cond_density(a, fv, Fv, X)
|
||||
|
||||
## 1.4 Compute the graphon quantiles =========================================
|
||||
k <- seq(0, K) / K
|
||||
graphon_quantiles <- qgraphon(k, a = a, Fv = Fv, X_matrix = X)
|
||||
|
||||
## 1.5 Build the matrix Q ====================================================
|
||||
inner_products = as.vector(X %*% a)
|
||||
|
||||
# outer(y, x, "-") gives a matrix with entry (j,i) = y[j] - x[i]
|
||||
# then we apply the CDF `F_v` to the whole matrix at once.
|
||||
# finally we take the difference of successive rows (j) to obtain the
|
||||
# increments required by equation (3.1).
|
||||
cdf_mat <- Fv(outer(graphon_quantiles, inner_products, "-")) # (K +1) x n matrix
|
||||
Q <- diff(cdf_mat, lag=1) # operates along rows
|
||||
|
||||
Q
|
||||
}
|
||||
|
||||
# 2. Compute largest and smallest singular value -------------------------------
|
||||
# TODO: Possible improvements include
|
||||
# - use the package RSpectra for large dimensions
|
||||
# - make a switch which value to choose
|
||||
# - Include cards for very small singular values
|
||||
|
||||
#' Compute the largest and smallest singular values of a matrix
|
||||
#'
|
||||
#' This helper extracts the extreme singular values of a numeric matrix.
|
||||
#' It uses the base‑R \code{svd()} routine (with the singular vectors
|
||||
#' suppressed) and returns a list containing the largest singular value and
|
||||
#' the smallest (non‑zero) singular value.
|
||||
#'
|
||||
#' @param M A numeric matrix (or an object coercible to a matrix). The
|
||||
#' singular values are computed for this matrix.
|
||||
#'
|
||||
#' @return A **list** with two components
|
||||
#' \describe{
|
||||
#' \item{largest_singular_value}{The maximum singular value of \code{M}.}
|
||||
#' \item{smallest_singular_value}{The minimum singular value of \code{M}
|
||||
#' that is greater than zero. If all singular values are zero,
|
||||
#' the function returns \code{0}.}
|
||||
#' }
|
||||
#'
|
||||
#' @details
|
||||
#' The singular value decomposition is performed with \code{svd(M, nu = 0,
|
||||
#' nv = 0)} so that only the singular values (\code{$d}) are computed; this
|
||||
#' saves memory because the left and right singular vectors are not needed.
|
||||
#'
|
||||
#' @examples
|
||||
#' ## Small random matrix ----------------------------------------------------
|
||||
#' set.seed(123)
|
||||
#' A <- matrix(rnorm(20), nrow = 4)
|
||||
#' compute_minmax_sv(A)
|
||||
#'
|
||||
#' ## Rank‑deficient matrix (all singular values are zero) --------------------
|
||||
#' Z <- matrix(0, nrow = 3, ncol = 3)
|
||||
#' compute_minmax_sv(Z)
|
||||
#'
|
||||
#' @export
|
||||
compute_minmax_sv <- function(M) {
|
||||
s <- svd(M, nu=0, nv=0)$d
|
||||
|
||||
list(
|
||||
largest_singular_value = max(s),
|
||||
smallest_singular_value = min(s) # smallest non zero singular value
|
||||
)
|
||||
}
|
||||
Reference in New Issue
Block a user