initial commit
This commit is contained in:
13
GraphonSimulation.Rproj
Normal file
13
GraphonSimulation.Rproj
Normal file
@@ -0,0 +1,13 @@
|
||||
Version: 1.0
|
||||
|
||||
RestoreWorkspace: Default
|
||||
SaveWorkspace: Default
|
||||
AlwaysSaveHistory: Default
|
||||
|
||||
EnableCodeIndexing: Yes
|
||||
UseSpacesForTab: Yes
|
||||
NumSpacesForTab: 2
|
||||
Encoding: UTF-8
|
||||
|
||||
RnwWeave: Sweave
|
||||
LaTeX: pdfLaTeX
|
||||
324
R/graphon_distribution.R
Normal file
324
R/graphon_distribution.R
Normal file
@@ -0,0 +1,324 @@
|
||||
# Load the general quantile function, the loading can be improved later
|
||||
# using the here package.
|
||||
source("R/qinf.R")
|
||||
|
||||
|
||||
# 1. Distribution Function -----------------------------------------------------
|
||||
|
||||
#' Empirical Distribution function for the graphon model.
|
||||
#'
|
||||
#' Computes the empirical expectation
|
||||
#' \deqn{E[F_v(y - a^\top X_i)]}
|
||||
#' using observed covariate samples \eqn{X_i}. This is done by evaluating the
|
||||
#' supplied CDF \code{Fv} at the shifted values \eqn{y - a^\top X_i} and taking
|
||||
#' their empirical average. See also Remark 2.1
|
||||
#'
|
||||
#' @param y Numeric scalar or vector of evaluation points.
|
||||
#' @param a Numeric vector. Coefficient vector used to compute the inner products
|
||||
#' \eqn{a^\top X_i}. Its length must match the number of columns in
|
||||
#' \code{X_matrix}.
|
||||
#' @param Fv Function. Cumulative distribution function of the noise variable
|
||||
#' \eqn{v} (e.g., \code{pnorm}, \code{pexp}, etc.). Must accept a numeric vector
|
||||
#' and return a numeric vector of the same length.
|
||||
#' @param X_matrix Numeric matrix of dimension \eqn{n \times p}, where each row
|
||||
#' corresponds to an observed covariate vector \eqn{X_i}.
|
||||
#'
|
||||
#' @return A numeric scalar giving the empirical expectation
|
||||
#' \eqn{(1/n) \sum_{i=1}^n F_v(y - a^\top X_i)}.
|
||||
#'
|
||||
#' @examples
|
||||
#' set.seed(1)
|
||||
#' X <- matrix(rnorm(100), ncol = 2)
|
||||
#' a <- c(1, -0.5)
|
||||
#' y <- 0
|
||||
#' pgraphon(y, a, pnorm, X)
|
||||
#'
|
||||
#' @export
|
||||
pgraphon <- function(
|
||||
y, # scalar
|
||||
a, # vector (coefficient vector, can be one-dimensional)
|
||||
Fv, # CDF function of the v's (e.g., pnorm, pexp, etc.)
|
||||
X_matrix # n x p matrix: each row is X_i
|
||||
) {
|
||||
## 1.1 Check inputs ==========================================================
|
||||
if (ncol(X_matrix) != length(a)) {
|
||||
stop("Number of columns in X_matrix must match length of a")
|
||||
}
|
||||
if (!is.numeric(y)) {
|
||||
stop("'y' must be numeric (scalar or vector)")
|
||||
}
|
||||
if (!is.numeric(a) || !is.vector(a)) {
|
||||
stop("'a' must be a numeric vector")
|
||||
}
|
||||
if (!is.matrix(X_matrix) || !is.numeric(X_matrix)) {
|
||||
stop("'X_matrix' must be a numeric matrix")
|
||||
}
|
||||
if (!is.function(Fv)) {
|
||||
stop("'Fv' must be a function (a CDF)")
|
||||
}
|
||||
## 1.2 Compute the expected value ============================================
|
||||
|
||||
# Compute a^T X_i as in the paper. Here we switched to the R convention that each
|
||||
# observation is a row. The paper assumes that each observation is a vector
|
||||
inner_products <- as.vector(X_matrix %*% a) # vector of length n
|
||||
|
||||
# outer(y, inner_prod, "-") creates an |y| × n matrix where element (j,i)
|
||||
# equals y[j] - inner_prod[i].
|
||||
dev_mat <- outer(y, inner_products, "-")
|
||||
|
||||
# Apply CDF Fv to each deviation
|
||||
# With sapply we do not have to assume that Fv is vectorized
|
||||
cdf_values <- sapply(dev_mat, Fv)
|
||||
|
||||
# Row means give the empirical expectation for each y_j
|
||||
out <- rowMeans(cdf_vals)
|
||||
out
|
||||
}
|
||||
|
||||
# 2. Density Function ----------------------------------------------------------
|
||||
|
||||
#' Empirical Graphon Density Estimate
|
||||
#'
|
||||
#' Computes the empirical expectation \eqn{E[f_v(y - a^\top X_i)]} using observed
|
||||
#' covariate samples \eqn{X_i}. This serves as an empirical approximation of the
|
||||
#' graphon density evaluated at \eqn{y}, where \eqn{f_v} is the density of the
|
||||
#' latent variable \eqn{v}. See also Remark 2.1
|
||||
#'
|
||||
#' @param y Numeric scalar or vector of points at which the density should be
|
||||
#' evaluated.
|
||||
#' @param a Numeric vector. Coefficient vector used to compute the linear
|
||||
#' projection \eqn{a^\top X_i}. Its length must match the number of columns in
|
||||
#' \code{X_matrix}.
|
||||
#' @param fv Function. Density function of the latent variable \eqn{v} (e.g.,
|
||||
#' \code{dnorm}, \code{dexp}). Must accept a numeric vector and return a
|
||||
#' numeric vector of the same length.
|
||||
#' @param X_matrix Numeric matrix of size \eqn{n \times p}. Each row represents
|
||||
#' an observed covariate vector \eqn{X_i}. The number of columns must match the
|
||||
#' length of \code{a}.
|
||||
#'
|
||||
#' @return A numeric scalar giving the empirical average
|
||||
#' \eqn{\frac{1}{n} \sum_{i=1}^n f_v(y - a^\top X_i)}.
|
||||
#'
|
||||
#' @examples
|
||||
#' set.seed(1)
|
||||
#' X <- matrix(rnorm(50), ncol = 2)
|
||||
#' a <- c(1, -0.5)
|
||||
#' dgraphon(y = 0.2, a = a, fv = dnorm, X_matrix = X)
|
||||
#'
|
||||
#' @export
|
||||
dgraphon <- function(
|
||||
y, # scalar
|
||||
a, # vector (coefficient vector, can be )
|
||||
fv, # density function of the v's (e.g., dnorm, dexp, etc.)
|
||||
X_matrix # n x p matrix: each row is X_i
|
||||
) {
|
||||
## 2.1 Check inputs ----------------------------------------------------------
|
||||
if (!is.numeric(y)) {
|
||||
stop("'y' must be numeric (scalar or vector)")
|
||||
}
|
||||
if (!is.numeric(a) || !is.vector(a)) {
|
||||
stop("'a' must be a numeric vector")
|
||||
}
|
||||
if (!is.matrix(X_matrix) || !is.numeric(X_matrix)) {
|
||||
stop("'X_matrix' must be a numeric matrix")
|
||||
}
|
||||
if (ncol(X_matrix) != length(a)) {
|
||||
stop("Number of columns in X_matrix must match length of a")
|
||||
}
|
||||
if (!is.function(fv)) {
|
||||
stop("'fv' must be a function (a density)")
|
||||
}
|
||||
|
||||
## 1.2 Compute the expected value ============================================
|
||||
|
||||
# Compute a^T X_i as in the paper. Here we switched to the R convention that each
|
||||
# observation is a row. The paper assumes that each observation is a vector
|
||||
inner_products <- as.vector(X_matrix %*% a) # vector of length n
|
||||
|
||||
# outer(y, inner_prod, "-") creates an |y| × n matrix where element (j,i)
|
||||
# equals y[j] - inner_prod[i].
|
||||
dev_mat <- outer(y, inner_products, "-")
|
||||
|
||||
# Apply the density function to each y[j]
|
||||
# With sapply we do not have to assume that fv is vectorized
|
||||
dens_vals <- sapply(dev_mat, fv)
|
||||
|
||||
# Row means give the empirical expectation for each y_j
|
||||
out <- rowMeans(dens_vals)
|
||||
out
|
||||
}
|
||||
|
||||
# 3. Quantile Function ---------------------------------------------------------
|
||||
|
||||
#' Quantile of the empirical graphon distribution
|
||||
#'
|
||||
#' This is a thin wrapper around the generic infimum‑type quantile routine
|
||||
#' \code{qinf()}. It builds the CDF of the graphon,
|
||||
#' \eqn{p_{\text{graphon}}(x) = \frac{1}{n}\sum_{i=1}^{n}F_v\bigl(x-a^\top X_i\bigr)},
|
||||
#' and then finds the quantile(s) for the supplied probability(ies) \code{p}.
|
||||
#'
|
||||
#' @param p Numeric vector of probabilities in \eqn{[0,1]}. Values outside this
|
||||
#' interval are rejected.
|
||||
#' @param a Numeric coefficient vector. Its length must equal the number of
|
||||
#' columns of \code{X_matrix}.
|
||||
#' @param Fv Function. The CDF of the latent variable $v$. It must be
|
||||
#' vectorised (i.e. accept a numeric vector and return a numeric vector of
|
||||
#' the same length). Typical examples are \code{pnorm}, \code{pexp}, etc.
|
||||
#' @param X_matrix Numeric matrix of dimension \eqn{n\times p}. Each row
|
||||
#' corresponds to an observation $X_i$.
|
||||
#' @param lower,upper Numeric scalars giving the search interval for the root
|
||||
#' finder. By default they are set to \code{-Inf} and \code{Inf}.
|
||||
#' @param tol Numeric tolerance for the bisection algorithm used inside
|
||||
#' \code{qinf()}. The default is the square‑root of machine epsilon.
|
||||
#' @param max.iter Maximum number of bisection iterations (safety guard).
|
||||
#' @param ... Additional arguments that are passed **directly** to the
|
||||
#' internal CDF \code{pgraphon}. This makes the wrapper flexible – you do not
|
||||
#' have to list every argument (e.g. \code{a}, \code{X_matrix}, \code{Fv})
|
||||
#' explicitly.
|
||||
#'
|
||||
#' @return A numeric vector of the same length as \code{p} containing the
|
||||
#' quantiles. The vector is named by the probabilities.
|
||||
#'
|
||||
#' @examples
|
||||
#' ## ---- simple normal example ------------------------------------------------
|
||||
#' set.seed(123)
|
||||
#' X <- matrix(rnorm(200), ncol = 2) # n = 100, p = 2
|
||||
#' a <- c(0.7, -0.3)
|
||||
#' qgraphon(p = c(0.25, 0.5, 0.75),
|
||||
#' a = a,
|
||||
#' Fv = pnorm,
|
||||
#' X_matrix = X)
|
||||
#'
|
||||
#' ## ---- mixture example ------------------------------------------------------
|
||||
#' mix_cdf <- function(x, w = 0.4, mu = 0, sigma = 1) {
|
||||
#' w * (x >= 0) + (1 - w) * pnorm(x, mean = mu, sd = sigma)
|
||||
#' }
|
||||
#' qgraphon(p = seq(0, 1, 0.2),
|
||||
#' a = a,
|
||||
#' Fv = mix_cdf,
|
||||
#' X_matrix = X)
|
||||
#'
|
||||
#' @export
|
||||
qgraphon <- function(
|
||||
p, # probabilities
|
||||
a, # coefficient vector
|
||||
Fv, # CDF of the v's
|
||||
X_matrix, # X_i samples, matrix
|
||||
lower = -Inf, # lower bound roots
|
||||
upper = Inf, # upper bound roots
|
||||
tol = .Machine$double.eps^0.5, # parameter root finding
|
||||
max.iter = 100) {
|
||||
|
||||
## 3.1 Check inputs ----------------------------------------------------------
|
||||
if (!is.numeric(p) || any(is.na(p))) {
|
||||
stop("'p' must be a numeric vector without NA values")
|
||||
}
|
||||
if (any(p < 0 | p > 1)) {
|
||||
stop("All probabilities in 'p' must lie in [0, 1]")
|
||||
}
|
||||
|
||||
if (!is.numeric(a) || !is.vector(a)) {
|
||||
stop("'a' must be a numeric vector")
|
||||
}
|
||||
|
||||
if (!is.matrix(X_matrix) || !is.numeric(X_matrix)) {
|
||||
stop("'X_matrix' must be a numeric matrix")
|
||||
}
|
||||
|
||||
if (ncol(X_matrix) != length(a)) {
|
||||
stop("Number of columns of 'X_matrix' (", ncol(X_matrix),
|
||||
") must equal length of 'a' (", length(a), ")")
|
||||
}
|
||||
|
||||
if (!is.function(Fv)) {
|
||||
stop("'Fv' must be a function (the CDF of the latent variable)")
|
||||
}
|
||||
|
||||
## 3.2 Call the generic quantile function ------------------------------------
|
||||
out <- qinf(F = pgraphon,
|
||||
p = p,
|
||||
lower = lower,
|
||||
upper = upper,
|
||||
tol = tol,
|
||||
max.iter = max.iter,
|
||||
a = a,
|
||||
X_matrix = X_matrix,
|
||||
Fv = Fv)
|
||||
|
||||
# Name the result by the probabilities – this mirrors the behaviour of
|
||||
# base‑R quantile functions (e.g. qnorm, qbeta).
|
||||
names(out) <- as.character(p)
|
||||
out
|
||||
}
|
||||
|
||||
|
||||
# 4. Create conditional density ------------------------------------------------
|
||||
|
||||
#' Create a Conditional Density Function for the Graphon Model.
|
||||
#'
|
||||
#' This function constructs and returns another function that computes the
|
||||
#' conditional density of the graphon model at specified values.
|
||||
#' The returned function takes inputs \code{u} (probabilities) and \code{x}
|
||||
#' (a scalar covariate value) and evaluates the graphon-based conditional
|
||||
#' density using the supplied coefficient vector \code{a}, the density
|
||||
#' \code{fv}, the distribution function \code{Fv}, and the empirical sample
|
||||
#' \code{X_matrix}.
|
||||
#'
|
||||
#' @param a Numeric vector. Coefficient vector used in the graphon model; must
|
||||
#' have length equal to the number of columns in \code{X_matrix}.
|
||||
#' @param fv Function. Density function of the latent variable \eqn{v}
|
||||
#' (e.g., \code{dnorm}, \code{dexp}).
|
||||
#' @param Fv Function. Distribution function (CDF) corresponding to \code{fv}
|
||||
#' (e.g., \code{pnorm}, \code{pexp}).
|
||||
#' @param X_matrix Numeric matrix. An \eqn{n \times p} matrix of covariates
|
||||
#' \eqn{X_i}, where each row corresponds to an observation.
|
||||
#'
|
||||
#' @return A function of two arguments \code{u} and \code{x}.
|
||||
#' The returned function is vectorized in \code{u} and returns the conditional
|
||||
#' density:
|
||||
#' \deqn{ f_{U \mid X}(u \mid x) = \frac{f_v(q(u) - a^\top x)}{
|
||||
#' \mathbb{E}[f_v(q(u) - a^\top X_i)] },
|
||||
#' }
|
||||
#' where \eqn{q(u)} is computed via \code{qgraphon()}.
|
||||
#'
|
||||
#' @details
|
||||
#' Internally, the function constructs a scalar evaluator
|
||||
#' \code{scalar_conditional_density()} that computes the conditional density for a
|
||||
#' single value of \code{u}.
|
||||
#' A wrapper function \code{conditional_density()} then applies this scalar
|
||||
#' function to each element of the input vector \code{u}.
|
||||
#'
|
||||
#' Note: the function relies on the existence of \code{qgraphon()},
|
||||
#' \code{pgraphon()}, and \code{dgraphon()} in the user's environment.
|
||||
#'
|
||||
#' @examples
|
||||
#' \dontrun{
|
||||
#' a <- c(1, -0.5)
|
||||
#' fv <- dnorm
|
||||
#' Fv <- pnorm
|
||||
#' X_matrix <- matrix(rnorm(200), ncol = 2)
|
||||
#'
|
||||
#' cond_dens <- create_cond_density(a, fv, Fv, X_matrix)
|
||||
#' cond_dens(c(0.2, 0.5, 0.8), x = 1.0)
|
||||
#' }
|
||||
#'
|
||||
#' @export
|
||||
create_cond_density <- function(
|
||||
a,
|
||||
fv,
|
||||
Fv,
|
||||
X_matrix){
|
||||
|
||||
scalar_conditional_density <-function(u, x) {
|
||||
q_a <- qgraphon(u, a, Fv, X_matrix)
|
||||
num <- fv(q_a - x %*% a)
|
||||
den <- dgraphon(q_a, a, fv, X_matrix)
|
||||
num / den
|
||||
}
|
||||
|
||||
conditional_density <- function(u, x) {
|
||||
vapply(u, scalar_conditional_density, numeric(1), x = x)
|
||||
}
|
||||
|
||||
return(conditional_density)
|
||||
}
|
||||
146
R/qinf.R
Normal file
146
R/qinf.R
Normal file
@@ -0,0 +1,146 @@
|
||||
#' Generic infimum‑type quantile function
|
||||
#'
|
||||
#' @param F A function that evaluates the distribution function
|
||||
#' (CDF). It must be monotone non‑decreasing in its first
|
||||
#' argument and return values in $0,1$. Additional
|
||||
#' arguments for the CDF can be supplied via `...`.
|
||||
#' @param p Numeric vector of probabilities. Values must lie in
|
||||
#' $0,1$; values outside this range give `NaN`.
|
||||
#' @param lower Left end of the search interval. By default
|
||||
#' `-Inf` is used; if the CDF is known to be zero below a
|
||||
#' finite value you can give that value to speed up the
|
||||
#' search.
|
||||
#' @param upper Right end of the search interval. By default
|
||||
#' `Inf` is used; if the CDF is known to be one above a
|
||||
#' finite value you can give that value.
|
||||
#' @param tol Desired absolute tolerance for the quantile. The
|
||||
#' algorithm stops when the interval width is ≤ `tol`.
|
||||
#' @param max.iter Maximum number of bisection iterations (safety
|
||||
#' guard). The default is `1000`.
|
||||
#' @param ... Additional arguments passed to `F`.
|
||||
#'
|
||||
#' @return A numeric vector of the same length as `p` containing the
|
||||
#' infimum‑type quantiles.
|
||||
#' @examples
|
||||
#' ## 1. Continuous normal distribution (compare with qnorm)
|
||||
#' qinf(pnorm, p = c(0.025, 0.5, 0.975), lower = -10, upper = 10)
|
||||
#' qnorm(c(0.025, 0.5, 0.975))
|
||||
#'
|
||||
#' ## 2. Discrete distribution: Binomial(10, 0.3)
|
||||
#' Fbinom <- function(x, size, prob) pbinom(floor(x), size, prob)
|
||||
#' qinf(Fbinom, p = seq(0, 1, 0.1), size = 10, prob = 0.3)
|
||||
#' qbinom(seq(0, 1, 0.1), size = 10, prob = 0.3) # built‑in quantile
|
||||
#'
|
||||
#' ## 3. Mixed distribution (continuous + point mass at 0)
|
||||
#' Fmix <- function(x, sigma = 1) {
|
||||
#' 0.3 * (x >= 0) + 0.7 * pnorm(x, sd = sigma)
|
||||
#' }
|
||||
#' qinf(Fmix, p = c(0.1, 0.3, 0.5, 0.9), lower = -5, upper = 5)
|
||||
#' @export
|
||||
qinf <- function(F,
|
||||
p,
|
||||
lower = -Inf,
|
||||
upper = Inf,
|
||||
tol = .Machine$double.eps^0.5,
|
||||
max.iter = 1000L,
|
||||
...) {
|
||||
|
||||
# 1. Input checks -----------------------------------------------------------
|
||||
if (!is.function(F))
|
||||
stop("'F' must be a function that evaluates a CDF")
|
||||
if (!is.numeric(p))
|
||||
stop("'p' must be numeric")
|
||||
if (any(is.na(p))) # keep NA positions in the output
|
||||
warning("NA probabilities supplied – corresponding quantiles will be NA")
|
||||
if (any(p < 0 | p > 1, na.rm = TRUE))
|
||||
stop("probabilities must lie in [0,1]")
|
||||
|
||||
# 2. Helper function for a single probability ---------------------------------
|
||||
|
||||
one_quantile <- function(p_i) {
|
||||
## Edge cases: 0 → leftmost point where F(x) >= 0,
|
||||
## 1 → rightmost point where F(x) >= 1
|
||||
if (is.na(p_i)) return(NA_real_)
|
||||
if (p_i == 0) {
|
||||
## The infimum of the set {x : F(x) >= 0} is the lower bound of the
|
||||
## support. If the user gave a finite lower bound we return it,
|
||||
## otherwise we try to locate it by moving left until F changes.
|
||||
if (is.finite(lower)) return(lower)
|
||||
## Search leftwards until we see a change in the CDF (or hit -Inf)
|
||||
x <- 0
|
||||
step <- 1
|
||||
while (is.finite(x) && F(x, ...) == 0) {
|
||||
x <- x - step
|
||||
step <- step * 2
|
||||
if (x < -1e12) break # give up – treat as -Inf
|
||||
}
|
||||
return(x)
|
||||
}
|
||||
if (p_i == 1) {
|
||||
if (is.finite(upper)) return(upper)
|
||||
## Search rightwards until F reaches 1
|
||||
x <- 0
|
||||
step <- 1
|
||||
while (is.finite(x) && F(x, ...) < 1) {
|
||||
x <- x + step
|
||||
step <- step * 2
|
||||
if (x > 1e12) break
|
||||
}
|
||||
return(x)
|
||||
}
|
||||
|
||||
## 2.1 Initialise the bracketing interval =================================
|
||||
lo <- lower
|
||||
hi <- upper
|
||||
|
||||
## If the interval is infinite we first try to find a finite bracket.
|
||||
## This is done by exponential expansion from 0 (or from the sign of p)
|
||||
## until the CDF straddles p.
|
||||
if (!is.finite(lo) || !is.finite(hi)) {
|
||||
# start from 0 (or any convenient point)
|
||||
centre <- 0
|
||||
# expand left if needed
|
||||
if (!is.finite(lo)) {
|
||||
step <- 1
|
||||
while (F(centre - step, ...) >= p_i) step <- step * 2
|
||||
lo <- centre - step
|
||||
}
|
||||
# expand right if needed
|
||||
if (!is.finite(hi)) {
|
||||
step <- 1
|
||||
while (F(centre + step, ...) < p_i) step <- step * 2
|
||||
hi <- centre + step
|
||||
}
|
||||
}
|
||||
|
||||
## 2.2 Bisection loop =====================================================
|
||||
## we keep the leftmost point where F(q) >= p
|
||||
iter <- 0L
|
||||
while ((hi - lo) > tol && iter < max.iter) {
|
||||
mid <- (lo + hi) / 2
|
||||
fmid <- F(mid, ...)
|
||||
if (is.na(fmid)) {
|
||||
## If the CDF returns NA (e.g. because mid is outside the domain)
|
||||
## we shrink the interval towards the side that is known to be
|
||||
## finite.
|
||||
if (is.finite(lo)) hi <- mid else lo <- mid
|
||||
} else if (fmid >= p_i) {
|
||||
## We have reached (or passed) the target probability – move the
|
||||
## upper bound leftwards to keep the *infimum*.
|
||||
hi <- mid
|
||||
} else {
|
||||
## Still below the target – move the lower bound rightwards.
|
||||
lo <- mid
|
||||
}
|
||||
iter <- iter + 1L
|
||||
}
|
||||
|
||||
## 2.3 Return the upper bound (the smallest x with F(x) >= p) =============
|
||||
hi
|
||||
}
|
||||
|
||||
# 3. Vectorised call – preserve names / attributes of p ---------------------
|
||||
out <- vapply(p, one_quantile, numeric(1), USE.NAMES = FALSE)
|
||||
if (!is.null(names(p))) names(out) <- names(p)
|
||||
out
|
||||
}
|
||||
192
R/singular_values.R
Normal file
192
R/singular_values.R
Normal file
@@ -0,0 +1,192 @@
|
||||
# TODO improve later using the here package
|
||||
source("R/graphon_distribution.R")
|
||||
|
||||
# 1. Compute the Matrix Q according to (3.1) -----------------------------------
|
||||
#' Compute the matrix **Q**
|
||||
#'
|
||||
#' This helper builds the matrix \eqn{Q} that appears in equation (3.1) of the
|
||||
#' graphon‑based model. The routine performs the following steps:
|
||||
#' 1. Checks that all inputs are of the correct type and dimension.
|
||||
#' 2. Generates a covariate matrix \eqn{X} of size \eqn{n \times p} using the
|
||||
#' user‑supplied sampling function `sample_X_fn`. The random seed is
|
||||
#' handled locally with **`withr::with_seed()`** so the global RNG state is
|
||||
#' left untouched.
|
||||
#' 3. Constructs the empirical conditional density of the latent variable
|
||||
#' \eqn{v} via `create_cond_density()`.
|
||||
#' 4. Computes the graphon quantiles \eqn{q_k} for a regular grid
|
||||
#' \eqn{k = 0,1/K,\dots,1}` using the wrapper `qgraphon()`.
|
||||
#' Each q_k is the quantile of k / K for k = 0, ..., K.
|
||||
#' 5. Forms the matrix \eqn{Q} whose \eqn{(k,i)}‑th entry is
|
||||
#' \deqn{F_v\!\bigl(q_{k+1} - a^\top X_i\bigr) -
|
||||
#' F_v\!\bigl(q_{k} - a^\top X_i\bigr)},
|
||||
#' where \eqn{F_v} is the CDF of the latent variable \eqn{v}. The
|
||||
#' construction is fully vectorised: the outer difference is built with
|
||||
#' `outer()`, the CDF is applied to the whole matrix at once, and the
|
||||
#' row‑wise differences are taken with `diff()`.
|
||||
#'
|
||||
#' @param seed Integer (or numeric) of length 1. Seed used to initialize the
|
||||
#' random number generator for reproducible sampling of `X`.
|
||||
#' @param a Numeric vector of length \eqn{p}. Coefficient vector that
|
||||
#' multiplies the covariates; its length must equal the number of
|
||||
#' columns of `X_matrix`.
|
||||
#' @param n Positive integer. Number of i.i.d. covariate draws to be
|
||||
#' generated.
|
||||
#' @param K Positive integer. Number of divisions of the unit interval;
|
||||
#' the resulting grid has length `K+1`.
|
||||
#' @param sample_X_fn
|
||||
#' Function with a single argument `n`. It must return an
|
||||
#' \eqn{n \times p} matrix (or an object coercible to a matrix) of
|
||||
#' covariate samples.
|
||||
#' @param fv Density function of the latent variable \eqn{v}. Must be
|
||||
#' vectorised (i.e. accept a numeric vector and return a numeric
|
||||
#' vector of the same length). Typical examples are
|
||||
#' `dnorm`, `dexp`, ….
|
||||
#' @param Fv Cumulative distribution function of the latent variable
|
||||
#' \eqn{v}. Also has to be vectorised. Typical examples are
|
||||
#' `pnorm`, `pexp`, ….
|
||||
#'
|
||||
#' @return A numeric matrix **Q** of dimension `K × n`. The \eqn{j}-th row
|
||||
#' (for `j = 1,…,K`) contains the increments of the CDF evaluated at
|
||||
#' the graphon quantiles for each observation `i = 1,…,n`. The matrix
|
||||
#' can be fed directly into downstream singular‑value or spectral
|
||||
#' analyses.
|
||||
#'
|
||||
#' @details
|
||||
#' * **Input validation** – The function stops with an informative error if any
|
||||
#' argument does not meet the required type or dimension constraints.
|
||||
#' * **Reproducible sampling** – `withr::with_seed()` guarantees that the seed
|
||||
#' is restored after the call, leaving the user's global RNG untouched.
|
||||
#' * **Vectorised construction of Q** –
|
||||
#' \preformatted{
|
||||
#' adotX <- as.vector(X %*% a) # n‑vector
|
||||
#' cdf_mat <- Fv(outer(graphon_quantiles, adotX, "-")) # (K+1) × n
|
||||
#' Q <- diff(cdf_mat, lag = 1) # K × n
|
||||
#' }
|
||||
#' This replaces the double `for`‑loop in the original implementation and
|
||||
#' yields a speed‑up of roughly an order of magnitude for moderate‑size
|
||||
#' problems.
|
||||
#'
|
||||
#' @seealso
|
||||
#' \code{\link{create_cond_density}} for the construction of the empirical
|
||||
#' conditional density,
|
||||
#' \code{\link{qgraphon}} for the quantile computation,
|
||||
#' \code{\link{compute_extreme_singular_values}} for a wrapper that also
|
||||
#' returns the smallest and largest singular values of `Q`.
|
||||
#'
|
||||
#' @examples
|
||||
#' ## Simple reproducible example ------------------------------------------------
|
||||
#' set.seed(123)
|
||||
#' ## a sampling function that returns a 2‑dimensional covariate matrix
|
||||
#' sample_X <- function(m) matrix(rnorm(m * 2), ncol = 2)
|
||||
#'
|
||||
#' ## Compute Q for a normal latent variable
|
||||
#' Q <- compute_matrix(
|
||||
#' seed = 42,
|
||||
#' a = c(1, -0.5),
|
||||
#' n = 200,
|
||||
#' K = 100,
|
||||
#' sample_X_fn = sample_X,
|
||||
#' fv = dnorm,
|
||||
#' Fv = pnorm
|
||||
#' )
|
||||
#' dim(Q) # should be 100 × 200
|
||||
#' head(Q) # a glimpse at the first few rows
|
||||
#'
|
||||
#' @export
|
||||
compute_matrix <- function(
|
||||
seed,
|
||||
a,
|
||||
n,
|
||||
K,
|
||||
sample_X_fn,
|
||||
fv,
|
||||
Fv
|
||||
) {
|
||||
## 1.1 Check inputs ==========================================================
|
||||
if (!is.numeric(seed) || length(seed) != 1) stop("'seed' must be a single number")
|
||||
if (!is.numeric(a) || !is.vector(a)) stop("'a' must be a numeric vector")
|
||||
if (!is.numeric(n) || length(n) != 1 || n <= 0) stop("'n' must be a positive integer")
|
||||
if (!is.numeric(K) || length(K) != 1 || K <= 0) stop("'K' must be a positive integer")
|
||||
if (!is.function(sample_X_fn)) stop("'sample_X_fn' must be a function")
|
||||
if (!is.function(fv)) stop("'f_v' must be a function")
|
||||
if (!is.function(Fv)) stop("'F_v' must be a function")
|
||||
|
||||
## 1.2 Generate the Matrix X of covariates ===================================
|
||||
# The withr environment allows us to capsulate the global state like the seed
|
||||
# and enables a better reproduction
|
||||
X <- withr::with_seed(seed, {
|
||||
as.matrix(sample_X_fn(n))
|
||||
})
|
||||
if (nrow(X) != n) stop("`sample_X_fn` must return exactly `n` rows")
|
||||
if (ncol(X) != length(a)) {
|
||||
stop("Number of columns of X (", ncol(X), ") must equal length(a) (", length(a), ")")
|
||||
}
|
||||
|
||||
## 1.3 Create conditional density ============================================
|
||||
empir_cond_density <- create_cond_density(a, fv, Fv, X)
|
||||
|
||||
## 1.4 Compute the graphon quantiles =========================================
|
||||
k <- seq(0, K) / K
|
||||
graphon_quantiles <- qgraphon(k, a = a, Fv = Fv, X_matrix = X)
|
||||
|
||||
## 1.5 Build the matrix Q ====================================================
|
||||
inner_products = as.vector(X %*% a)
|
||||
|
||||
# outer(y, x, "-") gives a matrix with entry (j,i) = y[j] - x[i]
|
||||
# then we apply the CDF `F_v` to the whole matrix at once.
|
||||
# finally we take the difference of successive rows (j) to obtain the
|
||||
# increments required by equation (3.1).
|
||||
cdf_mat <- Fv(outer(graphon_quantiles, inner_products, "-")) # (K +1) x n matrix
|
||||
Q <- diff(cdf_mat, lag=1) # operates along rows
|
||||
|
||||
Q
|
||||
}
|
||||
|
||||
# 2. Compute largest and smallest singular value -------------------------------
|
||||
# TODO: Possible improvements include
|
||||
# - use the package RSpectra for large dimensions
|
||||
# - make a switch which value to choose
|
||||
# - Include cards for very small singular values
|
||||
|
||||
#' Compute the largest and smallest singular values of a matrix
|
||||
#'
|
||||
#' This helper extracts the extreme singular values of a numeric matrix.
|
||||
#' It uses the base‑R \code{svd()} routine (with the singular vectors
|
||||
#' suppressed) and returns a list containing the largest singular value and
|
||||
#' the smallest (non‑zero) singular value.
|
||||
#'
|
||||
#' @param M A numeric matrix (or an object coercible to a matrix). The
|
||||
#' singular values are computed for this matrix.
|
||||
#'
|
||||
#' @return A **list** with two components
|
||||
#' \describe{
|
||||
#' \item{largest_singular_value}{The maximum singular value of \code{M}.}
|
||||
#' \item{smallest_singular_value}{The minimum singular value of \code{M}
|
||||
#' that is greater than zero. If all singular values are zero,
|
||||
#' the function returns \code{0}.}
|
||||
#' }
|
||||
#'
|
||||
#' @details
|
||||
#' The singular value decomposition is performed with \code{svd(M, nu = 0,
|
||||
#' nv = 0)} so that only the singular values (\code{$d}) are computed; this
|
||||
#' saves memory because the left and right singular vectors are not needed.
|
||||
#'
|
||||
#' @examples
|
||||
#' ## Small random matrix ----------------------------------------------------
|
||||
#' set.seed(123)
|
||||
#' A <- matrix(rnorm(20), nrow = 4)
|
||||
#' compute_minmax_sv(A)
|
||||
#'
|
||||
#' ## Rank‑deficient matrix (all singular values are zero) --------------------
|
||||
#' Z <- matrix(0, nrow = 3, ncol = 3)
|
||||
#' compute_minmax_sv(Z)
|
||||
#'
|
||||
#' @export
|
||||
compute_minmax_sv <- function(M) {
|
||||
s <- svd(M, nu=0, nv=0)$d
|
||||
|
||||
list(
|
||||
largest_singular_value = max(s),
|
||||
smallest_singular_value = min(s) # smallest non zero singular value
|
||||
)
|
||||
}
|
||||
68
renv.lock
Normal file
68
renv.lock
Normal file
@@ -0,0 +1,68 @@
|
||||
{
|
||||
"R": {
|
||||
"Version": "4.5.1",
|
||||
"Repositories": [
|
||||
{
|
||||
"Name": "CRAN",
|
||||
"URL": "https://cloud.r-project.org"
|
||||
}
|
||||
]
|
||||
},
|
||||
"Packages": {
|
||||
"renv": {
|
||||
"Package": "renv",
|
||||
"Version": "1.1.6",
|
||||
"Source": "Repository",
|
||||
"Type": "Package",
|
||||
"Title": "Project Environments",
|
||||
"Authors@R": "c( person(\"Kevin\", \"Ushey\", role = c(\"aut\", \"cre\"), email = \"kevin@rstudio.com\", comment = c(ORCID = \"0000-0003-2880-7407\")), person(\"Hadley\", \"Wickham\", role = c(\"aut\"), email = \"hadley@rstudio.com\", comment = c(ORCID = \"0000-0003-4757-117X\")), person(\"Posit Software, PBC\", role = c(\"cph\", \"fnd\")) )",
|
||||
"Description": "A dependency management toolkit for R. Using 'renv', you can create and manage project-local R libraries, save the state of these libraries to a 'lockfile', and later restore your library as required. Together, these tools can help make your projects more isolated, portable, and reproducible.",
|
||||
"License": "MIT + file LICENSE",
|
||||
"URL": "https://rstudio.github.io/renv/, https://github.com/rstudio/renv",
|
||||
"BugReports": "https://github.com/rstudio/renv/issues",
|
||||
"Imports": [
|
||||
"utils"
|
||||
],
|
||||
"Suggests": [
|
||||
"BiocManager",
|
||||
"cli",
|
||||
"compiler",
|
||||
"covr",
|
||||
"cpp11",
|
||||
"curl",
|
||||
"devtools",
|
||||
"generics",
|
||||
"gitcreds",
|
||||
"jsonlite",
|
||||
"jsonvalidate",
|
||||
"knitr",
|
||||
"miniUI",
|
||||
"modules",
|
||||
"packrat",
|
||||
"pak",
|
||||
"R6",
|
||||
"remotes",
|
||||
"reticulate",
|
||||
"rmarkdown",
|
||||
"rstudioapi",
|
||||
"shiny",
|
||||
"testthat",
|
||||
"uuid",
|
||||
"waldo",
|
||||
"yaml",
|
||||
"webfakes"
|
||||
],
|
||||
"Encoding": "UTF-8",
|
||||
"RoxygenNote": "7.3.3",
|
||||
"VignetteBuilder": "knitr",
|
||||
"Config/Needs/website": "tidyverse/tidytemplate",
|
||||
"Config/testthat/edition": "3",
|
||||
"Config/testthat/parallel": "true",
|
||||
"Config/testthat/start-first": "bioconductor,python,install,restore,snapshot,retrieve,remotes",
|
||||
"NeedsCompilation": "no",
|
||||
"Author": "Kevin Ushey [aut, cre] (ORCID: <https://orcid.org/0000-0003-2880-7407>), Hadley Wickham [aut] (ORCID: <https://orcid.org/0000-0003-4757-117X>), Posit Software, PBC [cph, fnd]",
|
||||
"Maintainer": "Kevin Ushey <kevin@rstudio.com>",
|
||||
"Repository": "CRAN"
|
||||
}
|
||||
}
|
||||
}
|
||||
7
renv/.gitignore
vendored
Normal file
7
renv/.gitignore
vendored
Normal file
@@ -0,0 +1,7 @@
|
||||
library/
|
||||
local/
|
||||
cellar/
|
||||
lock/
|
||||
python/
|
||||
sandbox/
|
||||
staging/
|
||||
1403
renv/activate.R
Normal file
1403
renv/activate.R
Normal file
File diff suppressed because it is too large
Load Diff
20
renv/settings.json
Normal file
20
renv/settings.json
Normal file
@@ -0,0 +1,20 @@
|
||||
{
|
||||
"bioconductor.version": null,
|
||||
"external.libraries": [],
|
||||
"ignored.packages": [],
|
||||
"package.dependency.fields": [
|
||||
"Imports",
|
||||
"Depends",
|
||||
"LinkingTo"
|
||||
],
|
||||
"ppm.enabled": null,
|
||||
"ppm.ignored.urls": [],
|
||||
"r.version": null,
|
||||
"snapshot.dev": false,
|
||||
"snapshot.type": "implicit",
|
||||
"use.cache": true,
|
||||
"vcs.ignore.cellar": true,
|
||||
"vcs.ignore.library": true,
|
||||
"vcs.ignore.local": true,
|
||||
"vcs.manage.ignores": true
|
||||
}
|
||||
Reference in New Issue
Block a user