initial commit

2026-01-16 19:31:08 +01:00
parent 3415358509
commit dd3c3e4b81
9 changed files with 2174 additions and 0 deletions
--- a/.Rprofile
+++ b/.Rprofile
@@ -0,0 +1 @@
+source("renv/activate.R")
--- a/GraphonSimulation.Rproj
+++ b/GraphonSimulation.Rproj
@@ -0,0 +1,13 @@
+Version: 1.0
+
+RestoreWorkspace: Default
+SaveWorkspace: Default
+AlwaysSaveHistory: Default
+
+EnableCodeIndexing: Yes
+UseSpacesForTab: Yes
+NumSpacesForTab: 2
+Encoding: UTF-8
+
+RnwWeave: Sweave
+LaTeX: pdfLaTeX
--- a/R/graphon_distribution.R
+++ b/R/graphon_distribution.R
@@ -0,0 +1,324 @@
+# Load the general quantile function, the loading can be improved later
+# using the here package.
+source("R/qinf.R")
+
+
+# 1. Distribution Function -----------------------------------------------------
+
+#' Empirical Distribution function for the graphon model.
+#'
+#' Computes the empirical expectation
+#' \deqn{E[F_v(y - a^\top X_i)]}
+#' using observed covariate samples \eqn{X_i}. This is done by evaluating the
+#' supplied CDF \code{Fv} at the shifted values \eqn{y - a^\top X_i} and taking
+#' their empirical average. See also Remark 2.1
+#'
+#' @param y Numeric scalar or vector of evaluation points.
+#' @param a Numeric vector. Coefficient vector used to compute the inner products
+#'   \eqn{a^\top X_i}. Its length must match the number of columns in
+#'   \code{X_matrix}.
+#' @param Fv Function. Cumulative distribution function of the noise variable
+#'   \eqn{v} (e.g., \code{pnorm}, \code{pexp}, etc.). Must accept a numeric vector
+#'   and return a numeric vector of the same length.
+#' @param X_matrix Numeric matrix of dimension \eqn{n \times p}, where each row
+#'   corresponds to an observed covariate vector \eqn{X_i}.
+#'
+#' @return A numeric scalar giving the empirical expectation
+#'   \eqn{(1/n) \sum_{i=1}^n F_v(y - a^\top X_i)}.
+#'
+#' @examples
+#' set.seed(1)
+#' X <- matrix(rnorm(100), ncol = 2)
+#' a <- c(1, -0.5)
+#' y <- 0
+#' pgraphon(y, a, pnorm, X)
+#'
+#' @export
+pgraphon <- function(
+    y,           # scalar
+    a,           # vector (coefficient vector, can be one-dimensional)
+    Fv,          # CDF function of the v's (e.g., pnorm, pexp, etc.)
+    X_matrix     # n x p matrix: each row is X_i
+) {
+  ## 1.1 Check inputs ==========================================================
+  if (ncol(X_matrix) != length(a)) {
+    stop("Number of columns in X_matrix must match length of a")
+  }
+  if (!is.numeric(y)) {
+    stop("'y' must be numeric (scalar or vector)")
+  }
+  if (!is.numeric(a) || !is.vector(a)) {
+    stop("'a' must be a numeric vector")
+  }
+  if (!is.matrix(X_matrix) || !is.numeric(X_matrix)) {
+    stop("'X_matrix' must be a numeric matrix")
+  }
+  if (!is.function(Fv)) {
+    stop("'Fv' must be a function (a CDF)")
+  }
+  ## 1.2 Compute the expected value ============================================
+  
+  # Compute a^T X_i as in the paper. Here we switched to the R convention that each
+  # observation is a row. The paper assumes that each observation is a vector
+  inner_products <- as.vector(X_matrix %*% a)  # vector of length n
+  
+  # outer(y, inner_prod, "-") creates an |y| × n matrix where element (j,i)
+  # equals y[j] - inner_prod[i].
+  dev_mat <- outer(y, inner_products, "-")
+  
+  # Apply CDF Fv to each deviation
+  # With sapply we do not have to assume that Fv is vectorized
+  cdf_values <- sapply(dev_mat, Fv)
+  
+  # Row means give the empirical expectation for each y_j
+  out <- rowMeans(cdf_vals)
+  out
+}
+
+# 2. Density Function ----------------------------------------------------------
+
+#' Empirical Graphon Density Estimate
+#'
+#' Computes the empirical expectation \eqn{E[f_v(y - a^\top X_i)]} using observed
+#' covariate samples \eqn{X_i}. This serves as an empirical approximation of the
+#' graphon density evaluated at \eqn{y}, where \eqn{f_v} is the density of the
+#' latent variable \eqn{v}. See also Remark 2.1
+#'
+#' @param y Numeric scalar or vector of points at which the density should be 
+#'   evaluated.
+#' @param a Numeric vector. Coefficient vector used to compute the linear
+#'   projection \eqn{a^\top X_i}. Its length must match the number of columns in
+#'   \code{X_matrix}.
+#' @param fv Function. Density function of the latent variable \eqn{v} (e.g.,
+#'   \code{dnorm}, \code{dexp}). Must accept a numeric vector and return a
+#'   numeric vector of the same length.
+#' @param X_matrix Numeric matrix of size \eqn{n \times p}. Each row represents
+#'   an observed covariate vector \eqn{X_i}. The number of columns must match the
+#'   length of \code{a}.
+#'
+#' @return A numeric scalar giving the empirical average
+#'   \eqn{\frac{1}{n} \sum_{i=1}^n f_v(y - a^\top X_i)}.
+#'
+#' @examples
+#' set.seed(1)
+#' X <- matrix(rnorm(50), ncol = 2)
+#' a <- c(1, -0.5)
+#' dgraphon(y = 0.2, a = a, fv = dnorm, X_matrix = X)
+#'
+#' @export
+dgraphon <- function(
+    y,           # scalar
+    a,           # vector (coefficient vector, can be )
+    fv,          # density function of the v's (e.g., dnorm, dexp, etc.)
+    X_matrix     # n x p matrix: each row is X_i
+) {
+  ## 2.1 Check inputs ----------------------------------------------------------
+  if (!is.numeric(y)) {
+    stop("'y' must be numeric (scalar or vector)")
+  }
+  if (!is.numeric(a) || !is.vector(a)) {
+    stop("'a' must be a numeric vector")
+  }
+  if (!is.matrix(X_matrix) || !is.numeric(X_matrix)) {
+    stop("'X_matrix' must be a numeric matrix")
+  }
+  if (ncol(X_matrix) != length(a)) {
+    stop("Number of columns in X_matrix must match length of a")
+  }
+  if (!is.function(fv)) {
+    stop("'fv' must be a function (a density)")
+  }
+  
+  ## 1.2 Compute the expected value ============================================
+  
+  # Compute a^T X_i as in the paper. Here we switched to the R convention that each
+  # observation is a row. The paper assumes that each observation is a vector
+  inner_products <- as.vector(X_matrix %*% a)  # vector of length n
+  
+  # outer(y, inner_prod, "-") creates an |y| × n matrix where element (j,i)
+  # equals y[j] - inner_prod[i].
+  dev_mat <- outer(y, inner_products, "-")
+  
+  # Apply the density function to each y[j]
+  # With sapply we do not have to assume that fv is vectorized
+  dens_vals <- sapply(dev_mat, fv)
+  
+  # Row means give the empirical expectation for each y_j
+  out <- rowMeans(dens_vals)
+  out
+}
+
+# 3. Quantile Function ---------------------------------------------------------
+
+#' Quantile of the empirical graphon distribution
+#'
+#' This is a thin wrapper around the generic infimum‑type quantile routine
+#' \code{qinf()}.  It builds the CDF of the graphon,
+#' \eqn{p_{\text{graphon}}(x) = \frac{1}{n}\sum_{i=1}^{n}F_v\bigl(x-a^\top X_i\bigr)},
+#' and then finds the quantile(s) for the supplied probability(ies) \code{p}.
+#'
+#' @param p Numeric vector of probabilities in \eqn{[0,1]}.  Values outside this
+#'   interval are rejected.
+#' @param a Numeric coefficient vector.  Its length must equal the number of
+#'   columns of \code{X_matrix}.
+#' @param Fv Function.  The CDF of the latent variable $v$.  It must be
+#'   vectorised (i.e. accept a numeric vector and return a numeric vector of
+#'   the same length).  Typical examples are \code{pnorm}, \code{pexp}, etc.
+#' @param X_matrix Numeric matrix of dimension \eqn{n\times p}.  Each row
+#'   corresponds to an observation $X_i$.
+#' @param lower,upper Numeric scalars giving the search interval for the root
+#'   finder.  By default they are set to \code{-Inf} and \code{Inf}.
+#' @param tol Numeric tolerance for the bisection algorithm used inside
+#'   \code{qinf()}.  The default is the square‑root of machine epsilon.
+#' @param max.iter Maximum number of bisection iterations (safety guard).
+#' @param ... Additional arguments that are passed **directly** to the
+#'   internal CDF \code{pgraphon}.  This makes the wrapper flexible – you do not
+#'   have to list every argument (e.g. \code{a}, \code{X_matrix}, \code{Fv})
+#'   explicitly.
+#'
+#' @return A numeric vector of the same length as \code{p} containing the
+#'   quantiles.  The vector is named by the probabilities.
+#'
+#' @examples
+#' ## ---- simple normal example ------------------------------------------------
+#' set.seed(123)
+#' X <- matrix(rnorm(200), ncol = 2)          # n = 100, p = 2
+#' a <- c(0.7, -0.3)
+#' qgraphon(p = c(0.25, 0.5, 0.75),
+#'          a = a,
+#'          Fv = pnorm,
+#'          X_matrix = X)
+#'
+#' ## ---- mixture example ------------------------------------------------------
+#' mix_cdf <- function(x, w = 0.4, mu = 0, sigma = 1) {
+#'   w * (x >= 0) + (1 - w) * pnorm(x, mean = mu, sd = sigma)
+#' }
+#' qgraphon(p = seq(0, 1, 0.2),
+#'          a = a,
+#'          Fv = mix_cdf,
+#'          X_matrix = X)
+#'
+#' @export
+qgraphon <- function(
+    p,                               # probabilities
+    a,                               # coefficient vector
+    Fv,                              # CDF of the v's
+    X_matrix,                         # X_i samples, matrix
+    lower = -Inf,                     # lower bound roots
+    upper =  Inf,                     # upper bound roots
+    tol   = .Machine$double.eps^0.5,  # parameter root finding
+    max.iter = 100) {
+  
+  ## 3.1 Check inputs ----------------------------------------------------------
+  if (!is.numeric(p) || any(is.na(p))) {
+    stop("'p' must be a numeric vector without NA values")
+  }
+  if (any(p < 0 | p > 1)) {
+    stop("All probabilities in 'p' must lie in [0, 1]")
+  }
+  
+  if (!is.numeric(a) || !is.vector(a)) {
+    stop("'a' must be a numeric vector")
+  }
+  
+  if (!is.matrix(X_matrix) || !is.numeric(X_matrix)) {
+    stop("'X_matrix' must be a numeric matrix")
+  }
+  
+  if (ncol(X_matrix) != length(a)) {
+    stop("Number of columns of 'X_matrix' (", ncol(X_matrix),
+         ") must equal length of 'a' (", length(a), ")")
+  }
+  
+  if (!is.function(Fv)) {
+    stop("'Fv' must be a function (the CDF of the latent variable)")
+  }
+  
+  ## 3.2 Call the generic quantile function ------------------------------------
+  out <- qinf(F = pgraphon,
+            p = p,
+            lower = lower,
+            upper = upper,
+            tol = tol,
+            max.iter = max.iter,
+            a = a,
+            X_matrix = X_matrix,
+            Fv = Fv)
+  
+  # Name the result by the probabilities – this mirrors the behaviour of
+  # base‑R quantile functions (e.g. qnorm, qbeta).
+  names(out) <- as.character(p)
+  out
+}
+
+
+# 4. Create conditional density ------------------------------------------------
+
+#' Create a Conditional Density Function for the Graphon Model.
+#'
+#' This function constructs and returns another function that computes the
+#' conditional density of the graphon model at specified values.  
+#' The returned function takes inputs \code{u} (probabilities) and \code{x}
+#' (a scalar covariate value) and evaluates the graphon-based conditional
+#' density using the supplied coefficient vector \code{a}, the density
+#' \code{fv}, the distribution function \code{Fv}, and the empirical sample
+#' \code{X_matrix}.
+#'
+#' @param a Numeric vector. Coefficient vector used in the graphon model; must
+#'   have length equal to the number of columns in \code{X_matrix}.
+#' @param fv Function. Density function of the latent variable \eqn{v}
+#'   (e.g., \code{dnorm}, \code{dexp}).
+#' @param Fv Function. Distribution function (CDF) corresponding to \code{fv}
+#'   (e.g., \code{pnorm}, \code{pexp}).
+#' @param X_matrix Numeric matrix. An \eqn{n \times p} matrix of covariates
+#'   \eqn{X_i}, where each row corresponds to an observation.
+#'
+#' @return A function of two arguments \code{u} and \code{x}.  
+#'   The returned function is vectorized in \code{u} and returns the conditional
+#'   density:
+#'   \deqn{ f_{U \mid X}(u \mid x) = \frac{f_v(q(u) - a^\top x)}{
+#'           \mathbb{E}[f_v(q(u) - a^\top X_i)] },
+#'   }
+#'   where \eqn{q(u)} is computed via \code{qgraphon()}.
+#'
+#' @details
+#' Internally, the function constructs a scalar evaluator
+#' \code{scalar_conditional_density()} that computes the conditional density for a
+#' single value of \code{u}.  
+#' A wrapper function \code{conditional_density()} then applies this scalar
+#' function to each element of the input vector \code{u}.
+#'
+#' Note: the function relies on the existence of \code{qgraphon()},
+#' \code{pgraphon()}, and \code{dgraphon()} in the user's environment.
+#'
+#' @examples
+#' \dontrun{
+#' a <- c(1, -0.5)
+#' fv <- dnorm
+#' Fv <- pnorm
+#' X_matrix <- matrix(rnorm(200), ncol = 2)
+#'
+#' cond_dens <- create_cond_density(a, fv, Fv, X_matrix)
+#' cond_dens(c(0.2, 0.5, 0.8), x = 1.0)
+#' }
+#'
+#' @export
+create_cond_density <- function(
+    a,
+    fv,
+    Fv,
+    X_matrix){
+  
+  scalar_conditional_density <-function(u, x) {
+    q_a  <- qgraphon(u, a, Fv, X_matrix)
+    num  <- fv(q_a - x %*% a)
+    den  <- dgraphon(q_a, a, fv, X_matrix)
+    num / den
+  }
+  
+  conditional_density <- function(u, x) {
+    vapply(u, scalar_conditional_density, numeric(1), x = x)
+  }
+  
+  return(conditional_density)
+}
--- a/R/qinf.R
+++ b/R/qinf.R
@@ -0,0 +1,146 @@
+#' Generic infimum‑type quantile function
+#'
+#' @param F   A function that evaluates the distribution function
+#'            (CDF).  It must be monotone non‑decreasing in its first
+#'            argument and return values in $0,1$.  Additional
+#'            arguments for the CDF can be supplied via `...`.
+#' @param p   Numeric vector of probabilities.  Values must lie in
+#'            $0,1$; values outside this range give `NaN`.
+#' @param lower  Left end of the search interval.  By default
+#'            `-Inf` is used; if the CDF is known to be zero below a
+#'            finite value you can give that value to speed up the
+#'            search.
+#' @param upper  Right end of the search interval.  By default
+#'            `Inf` is used; if the CDF is known to be one above a
+#'            finite value you can give that value.
+#' @param tol   Desired absolute tolerance for the quantile.  The
+#'            algorithm stops when the interval width is ≤ `tol`.
+#' @param max.iter Maximum number of bisection iterations (safety
+#'            guard).  The default is `1000`.
+#' @param ...   Additional arguments passed to `F`.
+#'
+#' @return A numeric vector of the same length as `p` containing the
+#'         infimum‑type quantiles.
+#' @examples
+#' ## 1. Continuous normal distribution (compare with qnorm)
+#' qinf(pnorm, p = c(0.025, 0.5, 0.975), lower = -10, upper = 10)
+#' qnorm(c(0.025, 0.5, 0.975))
+#'
+#' ## 2. Discrete distribution: Binomial(10, 0.3)
+#' Fbinom <- function(x, size, prob) pbinom(floor(x), size, prob)
+#' qinf(Fbinom, p = seq(0, 1, 0.1), size = 10, prob = 0.3)
+#' qbinom(seq(0, 1, 0.1), size = 10, prob = 0.3)   # built‑in quantile
+#'
+#' ## 3. Mixed distribution (continuous + point mass at 0)
+#' Fmix <- function(x, sigma = 1) {
+#'   0.3 * (x >= 0) + 0.7 * pnorm(x, sd = sigma)
+#' }
+#' qinf(Fmix, p = c(0.1, 0.3, 0.5, 0.9), lower = -5, upper = 5)
+#' @export
+qinf <- function(F,
+                 p,
+                 lower = -Inf,
+                 upper =  Inf,
+                 tol = .Machine$double.eps^0.5,
+                 max.iter = 1000L,
+                 ...) {
+  
+  # 1.  Input checks -----------------------------------------------------------
+  if (!is.function(F))
+    stop("'F' must be a function that evaluates a CDF")
+  if (!is.numeric(p))
+    stop("'p' must be numeric")
+  if (any(is.na(p)))               # keep NA positions in the output
+    warning("NA probabilities supplied – corresponding quantiles will be NA")
+  if (any(p < 0 | p > 1, na.rm = TRUE))
+    stop("probabilities must lie in [0,1]")
+  
+  # 2.  Helper function for a single probability ---------------------------------
+  
+  one_quantile <- function(p_i) {
+    ## Edge cases: 0 → leftmost point where F(x) >= 0,
+    ##               1 → rightmost point where F(x) >= 1
+    if (is.na(p_i)) return(NA_real_)
+    if (p_i == 0) {
+      ## The infimum of the set {x : F(x) >= 0} is the lower bound of the
+      ## support.  If the user gave a finite lower bound we return it,
+      ## otherwise we try to locate it by moving left until F changes.
+      if (is.finite(lower)) return(lower)
+      ## Search leftwards until we see a change in the CDF (or hit -Inf)
+      x <- 0
+      step <- 1
+      while (is.finite(x) && F(x, ...) == 0) {
+        x <- x - step
+        step <- step * 2
+        if (x < -1e12) break   # give up – treat as -Inf
+      }
+      return(x)
+    }
+    if (p_i == 1) {
+      if (is.finite(upper)) return(upper)
+      ## Search rightwards until F reaches 1
+      x <- 0
+      step <- 1
+      while (is.finite(x) && F(x, ...) < 1) {
+        x <- x + step
+        step <- step * 2
+        if (x > 1e12) break
+      }
+      return(x)
+    }
+    
+    ## 2.1  Initialise the bracketing interval =================================
+    lo <- lower
+    hi <- upper
+    
+    ## If the interval is infinite we first try to find a finite bracket.
+    ## This is done by exponential expansion from 0 (or from the sign of p)
+    ## until the CDF straddles p.
+    if (!is.finite(lo) || !is.finite(hi)) {
+      # start from 0 (or any convenient point)
+      centre <- 0
+      # expand left if needed
+      if (!is.finite(lo)) {
+        step <- 1
+        while (F(centre - step, ...) >= p_i) step <- step * 2
+        lo <- centre - step
+      }
+      # expand right if needed
+      if (!is.finite(hi)) {
+        step <- 1
+        while (F(centre + step, ...) < p_i) step <- step * 2
+        hi <- centre + step
+      }
+    }
+    
+    ## 2.2  Bisection loop =====================================================
+    ## we keep the leftmost point where F(q) >= p
+    iter <- 0L
+    while ((hi - lo) > tol && iter < max.iter) {
+      mid <- (lo + hi) / 2
+      fmid <- F(mid, ...)
+      if (is.na(fmid)) {
+        ## If the CDF returns NA (e.g. because mid is outside the domain)
+        ## we shrink the interval towards the side that is known to be
+        ## finite.
+        if (is.finite(lo)) hi <- mid else lo <- mid
+      } else if (fmid >= p_i) {
+        ## We have reached (or passed) the target probability – move the
+        ## upper bound leftwards to keep the *infimum*.
+        hi <- mid
+      } else {
+        ## Still below the target – move the lower bound rightwards.
+        lo <- mid
+      }
+      iter <- iter + 1L
+    }
+    
+    ## 2.3  Return the upper bound (the smallest x with F(x) >= p) =============
+    hi
+  }
+  
+  # 3.  Vectorised call – preserve names / attributes of p ---------------------
+  out <- vapply(p, one_quantile, numeric(1), USE.NAMES = FALSE)
+  if (!is.null(names(p))) names(out) <- names(p)
+  out
+}
--- a/R/singular_values.R
+++ b/R/singular_values.R
@@ -0,0 +1,192 @@
+# TODO improve later using the here package
+source("R/graphon_distribution.R")
+
+# 1. Compute the Matrix Q according to (3.1) -----------------------------------
+#' Compute the matrix **Q**
+#'
+#' This helper builds the matrix \eqn{Q} that appears in equation (3.1) of the
+#' graphon‑based model.  The routine performs the following steps:
+#'   1. Checks that all inputs are of the correct type and dimension.  
+#'   2. Generates a covariate matrix \eqn{X} of size \eqn{n \times p} using the
+#'      user‑supplied sampling function `sample_X_fn`.  The random seed is
+#'      handled locally with **`withr::with_seed()`** so the global RNG state is
+#'      left untouched.  
+#'   3. Constructs the empirical conditional density of the latent variable
+#'      \eqn{v} via `create_cond_density()`.  
+#'   4. Computes the graphon quantiles \eqn{q_k} for a regular grid
+#'      \eqn{k = 0,1/K,\dots,1}` using the wrapper `qgraphon()`.  
+#'      Each q_k is the quantile of k / K for k = 0, ..., K.
+#'   5. Forms the matrix \eqn{Q} whose \eqn{(k,i)}‑th entry is  
+#'      \deqn{F_v\!\bigl(q_{k+1} - a^\top X_i\bigr) -
+#'            F_v\!\bigl(q_{k}   - a^\top X_i\bigr)},
+#'      where \eqn{F_v} is the CDF of the latent variable \eqn{v}.  The
+#'      construction is fully vectorised: the outer difference is built with
+#'      `outer()`, the CDF is applied to the whole matrix at once, and the
+#'      row‑wise differences are taken with `diff()`.  
+#'
+#' @param seed   Integer (or numeric) of length 1.  Seed used to initialize the
+#'               random number generator for reproducible sampling of `X`.
+#' @param a      Numeric vector of length \eqn{p}.  Coefficient vector that
+#'               multiplies the covariates; its length must equal the number of
+#'               columns of `X_matrix`.
+#' @param n      Positive integer.  Number of i.i.d. covariate draws to be
+#'               generated.
+#' @param K      Positive integer.  Number of divisions of the unit interval;
+#'               the resulting grid has length `K+1`.
+#' @param sample_X_fn
+#'               Function with a single argument `n`.  It must return an
+#'               \eqn{n \times p} matrix (or an object coercible to a matrix) of
+#'               covariate samples.
+#' @param fv     Density function of the latent variable \eqn{v}.  Must be
+#'               vectorised (i.e. accept a numeric vector and return a numeric
+#'               vector of the same length).  Typical examples are
+#'               `dnorm`, `dexp`, ….
+#' @param Fv     Cumulative distribution function of the latent variable
+#'               \eqn{v}.  Also has to be vectorised.  Typical examples are
+#'               `pnorm`, `pexp`, ….
+#'
+#' @return A numeric matrix **Q** of dimension `K × n`.  The \eqn{j}-th row
+#'         (for `j = 1,…,K`) contains the increments of the CDF evaluated at
+#'         the graphon quantiles for each observation `i = 1,…,n`.  The matrix
+#'         can be fed directly into downstream singular‑value or spectral
+#'         analyses.
+#'
+#' @details
+#' * **Input validation** – The function stops with an informative error if any
+#'   argument does not meet the required type or dimension constraints.
+#' * **Reproducible sampling** – `withr::with_seed()` guarantees that the seed
+#'   is restored after the call, leaving the user's global RNG untouched.
+#' * **Vectorised construction of Q** –  
+#'   \preformatted{
+#'   adotX   <- as.vector(X %*% a)                     # n‑vector
+#'   cdf_mat <- Fv(outer(graphon_quantiles, adotX, "-")) # (K+1) × n
+#'   Q       <- diff(cdf_mat, lag = 1)                 # K × n
+#'   }
+#'   This replaces the double `for`‑loop in the original implementation and
+#'   yields a speed‑up of roughly an order of magnitude for moderate‑size
+#'   problems.
+#'
+#' @seealso
+#'   \code{\link{create_cond_density}} for the construction of the empirical
+#'   conditional density,  
+#'   \code{\link{qgraphon}} for the quantile computation,  
+#'   \code{\link{compute_extreme_singular_values}} for a wrapper that also
+#'   returns the smallest and largest singular values of `Q`.
+#'
+#' @examples
+#' ## Simple reproducible example ------------------------------------------------
+#' set.seed(123)
+#' ## a sampling function that returns a 2‑dimensional covariate matrix
+#' sample_X <- function(m) matrix(rnorm(m * 2), ncol = 2)
+#'
+#' ## Compute Q for a normal latent variable
+#' Q <- compute_matrix(
+#'   seed       = 42,
+#'   a          = c(1, -0.5),
+#'   n          = 200,
+#'   K          = 100,
+#'   sample_X_fn = sample_X,
+#'   fv         = dnorm,
+#'   Fv         = pnorm
+#' )
+#' dim(Q)               # should be 100 × 200
+#' head(Q)              # a glimpse at the first few rows
+#'
+#' @export
+compute_matrix <- function(
+    seed,
+    a,
+    n,
+    K,
+    sample_X_fn,
+    fv,
+    Fv
+) {
+  ## 1.1 Check inputs ==========================================================
+  if (!is.numeric(seed) || length(seed) != 1) stop("'seed' must be a single number")
+  if (!is.numeric(a) || !is.vector(a)) stop("'a' must be a numeric vector")
+  if (!is.numeric(n) || length(n) != 1 || n <= 0) stop("'n' must be a positive integer")
+  if (!is.numeric(K) || length(K) != 1 || K <= 0) stop("'K' must be a positive integer")
+  if (!is.function(sample_X_fn)) stop("'sample_X_fn' must be a function")
+  if (!is.function(fv)) stop("'f_v' must be a function")
+  if (!is.function(Fv)) stop("'F_v' must be a function")
+  
+  ## 1.2 Generate the Matrix X of covariates ===================================
+  # The withr environment allows us to capsulate the global state like the seed
+  # and enables a better reproduction
+  X <- withr::with_seed(seed, {
+    as.matrix(sample_X_fn(n))
+  })
+  if (nrow(X) != n) stop("`sample_X_fn` must return exactly `n` rows")
+  if (ncol(X) != length(a)) {
+    stop("Number of columns of X (", ncol(X), ") must equal length(a) (", length(a), ")")
+  }
+  
+  ## 1.3 Create conditional density ============================================
+  empir_cond_density <- create_cond_density(a, fv, Fv, X)
+  
+  ## 1.4 Compute the graphon quantiles =========================================
+  k <- seq(0, K) / K
+  graphon_quantiles <- qgraphon(k, a = a, Fv = Fv, X_matrix = X)
+  
+  ## 1.5 Build the matrix Q ====================================================
+  inner_products = as.vector(X %*% a)
+  
+  # outer(y, x, "-")  gives a matrix with entry (j,i) = y[j] - x[i]
+  #   then we apply the CDF `F_v` to the whole matrix at once.
+  #   finally we take the difference of successive rows (j) to obtain the
+  #   increments required by equation (3.1).
+  cdf_mat <- Fv(outer(graphon_quantiles, inner_products, "-")) # (K +1) x n matrix
+  Q <- diff(cdf_mat, lag=1)                           # operates along rows
+  
+  Q
+}
+
+# 2. Compute largest and smallest singular value -------------------------------
+# TODO: Possible improvements include
+# - use the package RSpectra for large dimensions
+# - make a switch which value to choose
+# - Include cards for very small singular values
+
+#' Compute the largest and smallest singular values of a matrix
+#'
+#' This helper extracts the extreme singular values of a numeric matrix.
+#' It uses the base‑R \code{svd()} routine (with the singular vectors
+#' suppressed) and returns a list containing the largest singular value and
+#' the smallest (non‑zero) singular value.
+#'
+#' @param M A numeric matrix (or an object coercible to a matrix).  The
+#'   singular values are computed for this matrix.
+#'
+#' @return A **list** with two components
+#' \describe{
+#'   \item{largest_singular_value}{The maximum singular value of \code{M}.}
+#'   \item{smallest_singular_value}{The minimum singular value of \code{M}
+#'         that is greater than zero.  If all singular values are zero,
+#'         the function returns \code{0}.}
+#' }
+#'
+#' @details
+#' The singular value decomposition is performed with \code{svd(M, nu = 0,
+#' nv = 0)} so that only the singular values (\code{$d}) are computed; this
+#' saves memory because the left and right singular vectors are not needed.
+#'
+#' @examples
+#' ## Small random matrix ----------------------------------------------------
+#' set.seed(123)
+#' A <- matrix(rnorm(20), nrow = 4)
+#' compute_minmax_sv(A)
+#'
+#' ## Rank‑deficient matrix (all singular values are zero) --------------------
+#' Z <- matrix(0, nrow = 3, ncol = 3)
+#' compute_minmax_sv(Z)
+#'
+#' @export
+compute_minmax_sv <- function(M) {
+  s <- svd(M, nu=0, nv=0)$d
+
+  list(
+    largest_singular_value  = max(s),
+    smallest_singular_value = min(s) # smallest non zero singular value
+  )
+}
--- a/renv.lock
+++ b/renv.lock
@@ -0,0 +1,68 @@
+{
+  "R": {
+    "Version": "4.5.1",
+    "Repositories": [
+      {
+        "Name": "CRAN",
+        "URL": "https://cloud.r-project.org"
+      }
+    ]
+  },
+  "Packages": {
+    "renv": {
+      "Package": "renv",
+      "Version": "1.1.6",
+      "Source": "Repository",
+      "Type": "Package",
+      "Title": "Project Environments",
+      "Authors@R": "c( person(\"Kevin\", \"Ushey\", role = c(\"aut\", \"cre\"), email = \"kevin@rstudio.com\", comment = c(ORCID = \"0000-0003-2880-7407\")), person(\"Hadley\", \"Wickham\", role = c(\"aut\"), email = \"hadley@rstudio.com\", comment = c(ORCID = \"0000-0003-4757-117X\")), person(\"Posit Software, PBC\", role = c(\"cph\", \"fnd\")) )",
+      "Description": "A dependency management toolkit for R. Using 'renv', you can create and manage project-local R libraries, save the state of these libraries to a 'lockfile', and later restore your library as required. Together, these tools can help make your projects more isolated, portable, and reproducible.",
+      "License": "MIT + file LICENSE",
+      "URL": "https://rstudio.github.io/renv/, https://github.com/rstudio/renv",
+      "BugReports": "https://github.com/rstudio/renv/issues",
+      "Imports": [
+        "utils"
+      ],
+      "Suggests": [
+        "BiocManager",
+        "cli",
+        "compiler",
+        "covr",
+        "cpp11",
+        "curl",
+        "devtools",
+        "generics",
+        "gitcreds",
+        "jsonlite",
+        "jsonvalidate",
+        "knitr",
+        "miniUI",
+        "modules",
+        "packrat",
+        "pak",
+        "R6",
+        "remotes",
+        "reticulate",
+        "rmarkdown",
+        "rstudioapi",
+        "shiny",
+        "testthat",
+        "uuid",
+        "waldo",
+        "yaml",
+        "webfakes"
+      ],
+      "Encoding": "UTF-8",
+      "RoxygenNote": "7.3.3",
+      "VignetteBuilder": "knitr",
+      "Config/Needs/website": "tidyverse/tidytemplate",
+      "Config/testthat/edition": "3",
+      "Config/testthat/parallel": "true",
+      "Config/testthat/start-first": "bioconductor,python,install,restore,snapshot,retrieve,remotes",
+      "NeedsCompilation": "no",
+      "Author": "Kevin Ushey [aut, cre] (ORCID: <https://orcid.org/0000-0003-2880-7407>), Hadley Wickham [aut] (ORCID: <https://orcid.org/0000-0003-4757-117X>), Posit Software, PBC [cph, fnd]",
+      "Maintainer": "Kevin Ushey <kevin@rstudio.com>",
+      "Repository": "CRAN"
+    }
+  }
+}
--- a/renv/.gitignore
+++ b/renv/.gitignore
@@ -0,0 +1,7 @@
+library/
+local/
+cellar/
+lock/
+python/
+sandbox/
+staging/
--- a/renv/activate.R
+++ b/renv/activate.R
--- a/renv/settings.json
+++ b/renv/settings.json
@@ -0,0 +1,20 @@
+{
+  "bioconductor.version": null,
+  "external.libraries": [],
+  "ignored.packages": [],
+  "package.dependency.fields": [
+    "Imports",
+    "Depends",
+    "LinkingTo"
+  ],
+  "ppm.enabled": null,
+  "ppm.ignored.urls": [],
+  "r.version": null,
+  "snapshot.dev": false,
+  "snapshot.type": "implicit",
+  "use.cache": true,
+  "vcs.ignore.cellar": true,
+  "vcs.ignore.library": true,
+  "vcs.ignore.local": true,
+  "vcs.manage.ignores": true
+}