# TODO improve later using the here package source("R/graphon_distribution.R") # 1. Compute the Matrix Q according to (3.1) ----------------------------------- #' Compute the matrix **Q** #' #' This helper builds the matrix \eqn{Q} that appears in equation (3.1) of the #' graphon‑based model. The routine performs the following steps: #' 1. Checks that all inputs are of the correct type and dimension. #' 2. Generates a covariate matrix \eqn{X} of size \eqn{n \times p} using the #' user‑supplied sampling function `sample_X_fn`. The random seed is #' handled locally with **`withr::with_seed()`** so the global RNG state is #' left untouched. #' 3. Constructs the empirical conditional density of the latent variable #' \eqn{v} via `create_cond_density()`. #' 4. Computes the graphon quantiles \eqn{q_k} for a regular grid #' \eqn{k = 0,1/K,\dots,1}` using the wrapper `qgraphon()`. #' Each q_k is the quantile of k / K for k = 0, ..., K. #' 5. Forms the matrix \eqn{Q} whose \eqn{(k,i)}‑th entry is #' \deqn{F_v\!\bigl(q_{k+1} - a^\top X_i\bigr) - #' F_v\!\bigl(q_{k} - a^\top X_i\bigr)}, #' where \eqn{F_v} is the CDF of the latent variable \eqn{v}. The #' construction is fully vectorised: the outer difference is built with #' `outer()`, the CDF is applied to the whole matrix at once, and the #' row‑wise differences are taken with `diff()`. #' #' @param seed Integer (or numeric) of length 1. Seed used to initialize the #' random number generator for reproducible sampling of `X`. #' @param a Numeric vector of length \eqn{p}. Coefficient vector that #' multiplies the covariates; its length must equal the number of #' columns of `X_matrix`. #' @param n Positive integer. Number of i.i.d. covariate draws to be #' generated. #' @param K Positive integer. Number of divisions of the unit interval; #' the resulting grid has length `K+1`. #' @param sample_X_fn #' Function with a single argument `n`. It must return an #' \eqn{n \times p} matrix (or an object coercible to a matrix) of #' covariate samples. #' @param fv Density function of the latent variable \eqn{v}. Must be #' vectorised (i.e. accept a numeric vector and return a numeric #' vector of the same length). Typical examples are #' `dnorm`, `dexp`, …. #' @param Fv Cumulative distribution function of the latent variable #' \eqn{v}. Also has to be vectorised. Typical examples are #' `pnorm`, `pexp`, …. #' @param guard Positive numeric guard value. Default is `sqrt(.Machine$double.eps)`, #' which is about `1.5e‑8` on most platforms – small enough to be negligible #' for most computations. If it is null, then it is not used. #' The guard is used for the value k = 0, which can cause arithmetic errors. #' #' @return A numeric matrix **Q** of dimension `K × n`. The \eqn{j}-th row #' (for `j = 1,…,K`) contains the increments of the CDF evaluated at #' the graphon quantiles for each observation `i = 1,…,n`. The matrix #' can be fed directly into downstream singular‑value or spectral #' analyses. #' #' @details #' * **Input validation** – The function stops with an informative error if any #' argument does not meet the required type or dimension constraints. #' * **Reproducible sampling** – `withr::with_seed()` guarantees that the seed #' is restored after the call, leaving the user's global RNG untouched. #' * **Vectorised construction of Q** – #' \preformatted{ #' adotX <- as.vector(X %*% a) # n‑vector #' cdf_mat <- Fv(outer(graphon_quantiles, adotX, "-")) # (K+1) × n #' Q <- diff(cdf_mat, lag = 1) # K × n #' } #' This replaces the double `for`‑loop in the original implementation and #' yields a speed‑up of roughly an order of magnitude for moderate‑size #' problems. #' #' @seealso #' \code{\link{create_cond_density}} for the construction of the empirical #' conditional density, #' \code{\link{qgraphon}} for the quantile computation, #' \code{\link{compute_extreme_singular_values}} for a wrapper that also #' returns the smallest and largest singular values of `Q`. #' #' @examples #' ## Simple reproducible example ------------------------------------------------ #' set.seed(123) #' ## a sampling function that returns a 2‑dimensional covariate matrix #' sample_X <- function(m) matrix(rnorm(m * 2), ncol = 2) #' #' ## Compute Q for a normal latent variable #' Q <- compute_matrix( #' seed = 42, #' a = c(1, -0.5), #' n = 200, #' K = 100, #' sample_X_fn = sample_X, #' fv = dnorm, #' Fv = pnorm #' ) #' dim(Q) # should be 100 × 200 #' head(Q) # a glimpse at the first few rows #' #' @export compute_matrix <- function( seed, a, n, K, sample_X_fn, fv, Fv, guard = sqrt(.Machine$double.eps) ) { ## 1.1 Check inputs ========================================================== if (!is.numeric(seed) || length(seed) != 1) stop("'seed' must be a single number") if (!is.numeric(a) || !is.vector(a)) stop("'a' must be a numeric vector") if (!is.numeric(n) || length(n) != 1 || n <= 0) stop("'n' must be a positive integer") if (!is.numeric(K) || length(K) != 1 || K <= 0) stop("'K' must be a positive integer") if (!is.function(sample_X_fn)) stop("'sample_X_fn' must be a function") if (!is.function(fv)) stop("'f_v' must be a function") if (!is.function(Fv)) stop("'F_v' must be a function") ## 1.2 Generate the Matrix X of covariates =================================== # The withr environment allows us to capsulate the global state like the seed # and enables a better reproduction X <- withr::with_seed(seed, { as.matrix(sample_X_fn(n)) }) if (nrow(X) != n) stop("`sample_X_fn` must return exactly `n` rows") if (ncol(X) != length(a)) { stop("Number of columns of X (", ncol(X), ") must equal length(a) (", length(a), ")") } ## 1.3 Create conditional density ============================================ empir_cond_density <- create_cond_density(a, fv, Fv, X) ## 1.4 Compute the graphon quantiles ========================================= k <- seq(0, K) / K if (!is.null(guard)) { k[1] <- guard } graphon_quantiles <- qgraphon(k, a = a, Fv = Fv, X_matrix = X) ## 1.5 Build the matrix Q ==================================================== inner_products = as.vector(X %*% a) # outer(y, x, "-") gives a matrix with entry (j,i) = y[j] - x[i] # then we apply the CDF `F_v` to the whole matrix at once. # finally we take the difference of successive rows (j) to obtain the # increments required by equation (3.1). cdf_mat <- Fv(outer(graphon_quantiles, inner_products, "-")) # (K +1) x n matrix Q <- diff(cdf_mat, lag=1) # operates along rows Q } # 2. Compute largest and smallest singular value ------------------------------- # TODO: Possible improvements include # - use the package RSpectra for large dimensions # - make a switch which value to choose # - Include cards for very small singular values #' Compute the largest and smallest singular values of a matrix #' #' This helper extracts the extreme singular values of a numeric matrix. #' It uses the base‑R \code{svd()} routine (with the singular vectors #' suppressed) and returns a list containing the largest singular value and #' the smallest (non‑zero) singular value. #' #' @param M A numeric matrix (or an object coercible to a matrix). The #' singular values are computed for this matrix. #' #' @return A **list** with two components #' \describe{ #' \item{largest_singular_value}{The maximum singular value of \code{M}.} #' \item{smallest_singular_value}{The minimum singular value of \code{M} #' that is greater than zero. If all singular values are zero, #' the function returns \code{0}.} #' } #' #' @details #' The singular value decomposition is performed with \code{svd(M, nu = 0, #' nv = 0)} so that only the singular values (\code{$d}) are computed; this #' saves memory because the left and right singular vectors are not needed. #' #' @examples #' ## Small random matrix ---------------------------------------------------- #' set.seed(123) #' A <- matrix(rnorm(20), nrow = 4) #' compute_minmax_sv(A) #' #' ## Rank‑deficient matrix (all singular values are zero) -------------------- #' Z <- matrix(0, nrow = 3, ncol = 3) #' compute_minmax_sv(Z) #' #' @export compute_minmax_sv <- function(M) { s <- svd(M, nu=0, nv=0)$d list( largest_singular_value = max(s), smallest_singular_value = min(s) # smallest non zero singular value ) }