322 lines
12 KiB
R
322 lines
12 KiB
R
# Load the general quantile function, the loading can be improved later
|
||
# using the here package.
|
||
source("R/qinf.R")
|
||
|
||
|
||
# 1. Distribution Function -----------------------------------------------------
|
||
|
||
#' Empirical Distribution function for the graphon model.
|
||
#'
|
||
#' Computes the empirical expectation
|
||
#' \deqn{E[F_v(y - a^\top X_i)]}
|
||
#' using observed covariate samples \eqn{X_i}. This is done by evaluating the
|
||
#' supplied CDF \code{Fv} at the shifted values \eqn{y - a^\top X_i} and taking
|
||
#' their empirical average. See also Remark 2.1
|
||
#'
|
||
#' @param y Numeric scalar or vector of evaluation points.
|
||
#' @param a Numeric vector. Coefficient vector used to compute the inner products
|
||
#' \eqn{a^\top X_i}. Its length must match the number of columns in
|
||
#' \code{X_matrix}.
|
||
#' @param Fv Function. Cumulative distribution function of the noise variable
|
||
#' \eqn{v} (e.g., \code{pnorm}, \code{pexp}, etc.). Must accept a numeric vector
|
||
#' and return a numeric vector of the same length.
|
||
#' @param X_matrix Numeric matrix of dimension \eqn{n \times p}, where each row
|
||
#' corresponds to an observed covariate vector \eqn{X_i}.
|
||
#'
|
||
#' @return A numeric scalar giving the empirical expectation
|
||
#' \eqn{(1/n) \sum_{i=1}^n F_v(y - a^\top X_i)}.
|
||
#'
|
||
#' @examples
|
||
#' set.seed(1)
|
||
#' X <- matrix(rnorm(100), ncol = 2)
|
||
#' a <- c(1, -0.5)
|
||
#' y <- 0
|
||
#' pgraphon(y, a, pnorm, X)
|
||
#'
|
||
#' @export
|
||
pgraphon <- function(
|
||
y, # scalar
|
||
a, # vector (coefficient vector, can be one-dimensional)
|
||
Fv, # CDF function of the v's (e.g., pnorm, pexp, etc.)
|
||
X_matrix # n x p matrix: each row is X_i
|
||
) {
|
||
## 1.1 Check inputs ==========================================================
|
||
if (ncol(X_matrix) != length(a)) {
|
||
stop("Number of columns in X_matrix must match length of a")
|
||
}
|
||
if (!is.numeric(y)) {
|
||
stop("'y' must be numeric (scalar or vector)")
|
||
}
|
||
if (!is.numeric(a) || !is.vector(a)) {
|
||
stop("'a' must be a numeric vector")
|
||
}
|
||
if (!is.matrix(X_matrix) || !is.numeric(X_matrix)) {
|
||
stop("'X_matrix' must be a numeric matrix")
|
||
}
|
||
if (!is.function(Fv)) {
|
||
stop("'Fv' must be a function (a CDF)")
|
||
}
|
||
## 1.2 Compute the expected value ============================================
|
||
|
||
# Compute a^T X_i as in the paper. Here we switched to the R convention that each
|
||
# observation is a row. The paper assumes that each observation is a vector
|
||
inner_products <- as.vector(X_matrix %*% a) # vector of length n
|
||
|
||
# outer(y, inner_prod, "-") creates an |y| × n matrix where element (j,i)
|
||
# equals y[j] - inner_prod[i].
|
||
dev_mat <- outer(y, inner_products, "-")
|
||
|
||
# Apply CDF Fv to each deviation
|
||
cdf_vals <- Fv(dev_mat)
|
||
|
||
# Row means give the empirical expectation for each y_j
|
||
out <- rowMeans(cdf_vals)
|
||
out
|
||
}
|
||
|
||
# 2. Density Function ----------------------------------------------------------
|
||
|
||
#' Empirical Graphon Density Estimate
|
||
#'
|
||
#' Computes the empirical expectation \eqn{E[f_v(y - a^\top X_i)]} using observed
|
||
#' covariate samples \eqn{X_i}. This serves as an empirical approximation of the
|
||
#' graphon density evaluated at \eqn{y}, where \eqn{f_v} is the density of the
|
||
#' latent variable \eqn{v}. See also Remark 2.1
|
||
#'
|
||
#' @param y Numeric scalar or vector of points at which the density should be
|
||
#' evaluated.
|
||
#' @param a Numeric vector. Coefficient vector used to compute the linear
|
||
#' projection \eqn{a^\top X_i}. Its length must match the number of columns in
|
||
#' \code{X_matrix}.
|
||
#' @param fv Function. Density function of the latent variable \eqn{v} (e.g.,
|
||
#' \code{dnorm}, \code{dexp}). Must accept a numeric vector and return a
|
||
#' numeric vector of the same length.
|
||
#' @param X_matrix Numeric matrix of size \eqn{n \times p}. Each row represents
|
||
#' an observed covariate vector \eqn{X_i}. The number of columns must match the
|
||
#' length of \code{a}.
|
||
#'
|
||
#' @return A numeric scalar giving the empirical average
|
||
#' \eqn{\frac{1}{n} \sum_{i=1}^n f_v(y - a^\top X_i)}.
|
||
#'
|
||
#' @examples
|
||
#' set.seed(1)
|
||
#' X <- matrix(rnorm(50), ncol = 2)
|
||
#' a <- c(1, -0.5)
|
||
#' dgraphon(y = 0.2, a = a, fv = dnorm, X_matrix = X)
|
||
#'
|
||
#' @export
|
||
dgraphon <- function(
|
||
y, # scalar
|
||
a, # vector (coefficient vector, can be )
|
||
fv, # density function of the v's (e.g., dnorm, dexp, etc.)
|
||
X_matrix # n x p matrix: each row is X_i
|
||
) {
|
||
## 2.1 Check inputs ----------------------------------------------------------
|
||
if (!is.numeric(y)) {
|
||
stop("'y' must be numeric (scalar or vector)")
|
||
}
|
||
if (!is.numeric(a) || !is.vector(a)) {
|
||
stop("'a' must be a numeric vector")
|
||
}
|
||
if (!is.matrix(X_matrix) || !is.numeric(X_matrix)) {
|
||
stop("'X_matrix' must be a numeric matrix")
|
||
}
|
||
if (ncol(X_matrix) != length(a)) {
|
||
stop("Number of columns in X_matrix must match length of a")
|
||
}
|
||
if (!is.function(fv)) {
|
||
stop("'fv' must be a function (a density)")
|
||
}
|
||
|
||
## 1.2 Compute the expected value ============================================
|
||
|
||
# Compute a^T X_i as in the paper. Here we switched to the R convention that each
|
||
# observation is a row. The paper assumes that each observation is a vector
|
||
inner_products <- as.vector(X_matrix %*% a) # vector of length n
|
||
|
||
# outer(y, inner_prod, "-") creates an |y| × n matrix where element (j,i)
|
||
# equals y[j] - inner_prod[i].
|
||
dev_mat <- outer(y, inner_products, "-")
|
||
|
||
# Apply the density function to each y[j]
|
||
dens_vals <- fv(dev_mat)
|
||
|
||
# Row means give the empirical expectation for each y_j
|
||
out <- rowMeans(dens_vals)
|
||
out
|
||
}
|
||
|
||
# 3. Quantile Function ---------------------------------------------------------
|
||
|
||
#' Quantile of the empirical graphon distribution
|
||
#'
|
||
#' This is a thin wrapper around the generic infimum‑type quantile routine
|
||
#' \code{qinf()}. It builds the CDF of the graphon,
|
||
#' \eqn{p_{\text{graphon}}(x) = \frac{1}{n}\sum_{i=1}^{n}F_v\bigl(x-a^\top X_i\bigr)},
|
||
#' and then finds the quantile(s) for the supplied probability(ies) \code{p}.
|
||
#'
|
||
#' @param p Numeric vector of probabilities in \eqn{[0,1]}. Values outside this
|
||
#' interval are rejected.
|
||
#' @param a Numeric coefficient vector. Its length must equal the number of
|
||
#' columns of \code{X_matrix}.
|
||
#' @param Fv Function. The CDF of the latent variable $v$. It must be
|
||
#' vectorised (i.e. accept a numeric vector and return a numeric vector of
|
||
#' the same length). Typical examples are \code{pnorm}, \code{pexp}, etc.
|
||
#' @param X_matrix Numeric matrix of dimension \eqn{n\times p}. Each row
|
||
#' corresponds to an observation $X_i$.
|
||
#' @param lower,upper Numeric scalars giving the search interval for the root
|
||
#' finder. By default they are set to \code{-Inf} and \code{Inf}.
|
||
#' @param tol Numeric tolerance for the bisection algorithm used inside
|
||
#' \code{qinf()}. The default is the square‑root of machine epsilon.
|
||
#' @param max.iter Maximum number of bisection iterations (safety guard).
|
||
#' @param ... Additional arguments that are passed **directly** to the
|
||
#' internal CDF \code{pgraphon}. This makes the wrapper flexible – you do not
|
||
#' have to list every argument (e.g. \code{a}, \code{X_matrix}, \code{Fv})
|
||
#' explicitly.
|
||
#'
|
||
#' @return A numeric vector of the same length as \code{p} containing the
|
||
#' quantiles. The vector is named by the probabilities.
|
||
#'
|
||
#' @examples
|
||
#' ## ---- simple normal example ------------------------------------------------
|
||
#' set.seed(123)
|
||
#' X <- matrix(rnorm(200), ncol = 2) # n = 100, p = 2
|
||
#' a <- c(0.7, -0.3)
|
||
#' qgraphon(p = c(0.25, 0.5, 0.75),
|
||
#' a = a,
|
||
#' Fv = pnorm,
|
||
#' X_matrix = X)
|
||
#'
|
||
#' ## ---- mixture example ------------------------------------------------------
|
||
#' mix_cdf <- function(x, w = 0.4, mu = 0, sigma = 1) {
|
||
#' w * (x >= 0) + (1 - w) * pnorm(x, mean = mu, sd = sigma)
|
||
#' }
|
||
#' qgraphon(p = seq(0, 1, 0.2),
|
||
#' a = a,
|
||
#' Fv = mix_cdf,
|
||
#' X_matrix = X)
|
||
#'
|
||
#' @export
|
||
qgraphon <- function(
|
||
p, # probabilities
|
||
a, # coefficient vector
|
||
Fv, # CDF of the v's
|
||
X_matrix, # X_i samples, matrix
|
||
lower = -Inf, # lower bound roots
|
||
upper = Inf, # upper bound roots
|
||
tol = .Machine$double.eps^0.5, # parameter root finding
|
||
max.iter = 100) {
|
||
|
||
## 3.1 Check inputs ----------------------------------------------------------
|
||
if (!is.numeric(p) || any(is.na(p))) {
|
||
stop("'p' must be a numeric vector without NA values")
|
||
}
|
||
if (any(p < 0 | p > 1)) {
|
||
stop("All probabilities in 'p' must lie in [0, 1]")
|
||
}
|
||
|
||
if (!is.numeric(a) || !is.vector(a)) {
|
||
stop("'a' must be a numeric vector")
|
||
}
|
||
|
||
if (!is.matrix(X_matrix) || !is.numeric(X_matrix)) {
|
||
stop("'X_matrix' must be a numeric matrix")
|
||
}
|
||
|
||
if (ncol(X_matrix) != length(a)) {
|
||
stop("Number of columns of 'X_matrix' (", ncol(X_matrix),
|
||
") must equal length of 'a' (", length(a), ")")
|
||
}
|
||
|
||
if (!is.function(Fv)) {
|
||
stop("'Fv' must be a function (the CDF of the latent variable)")
|
||
}
|
||
|
||
## 3.2 Call the generic quantile function ------------------------------------
|
||
out <- qinf(F = pgraphon,
|
||
p = p,
|
||
lower = lower,
|
||
upper = upper,
|
||
tol = tol,
|
||
max.iter = max.iter,
|
||
a = a,
|
||
X_matrix = X_matrix,
|
||
Fv = Fv)
|
||
|
||
# Name the result by the probabilities – this mirrors the behaviour of
|
||
# base‑R quantile functions (e.g. qnorm, qbeta).
|
||
names(out) <- as.character(p)
|
||
out
|
||
}
|
||
|
||
|
||
# 4. Create conditional density ------------------------------------------------
|
||
|
||
#' Create a Conditional Density Function for the Graphon Model.
|
||
#'
|
||
#' This function constructs and returns another function that computes the
|
||
#' conditional density of the graphon model at specified values.
|
||
#' The returned function takes inputs \code{u} (probabilities) and \code{x}
|
||
#' (a scalar covariate value) and evaluates the graphon-based conditional
|
||
#' density using the supplied coefficient vector \code{a}, the density
|
||
#' \code{fv}, the distribution function \code{Fv}, and the empirical sample
|
||
#' \code{X_matrix}.
|
||
#'
|
||
#' @param a Numeric vector. Coefficient vector used in the graphon model; must
|
||
#' have length equal to the number of columns in \code{X_matrix}.
|
||
#' @param fv Function. Density function of the latent variable \eqn{v}
|
||
#' (e.g., \code{dnorm}, \code{dexp}).
|
||
#' @param Fv Function. Distribution function (CDF) corresponding to \code{fv}
|
||
#' (e.g., \code{pnorm}, \code{pexp}).
|
||
#' @param X_matrix Numeric matrix. An \eqn{n \times p} matrix of covariates
|
||
#' \eqn{X_i}, where each row corresponds to an observation.
|
||
#'
|
||
#' @return A function of two arguments \code{u} and \code{x}.
|
||
#' The returned function is vectorized in \code{u} and returns the conditional
|
||
#' density:
|
||
#' \deqn{ f_{U \mid X}(u \mid x) = \frac{f_v(q(u) - a^\top x)}{
|
||
#' \mathbb{E}[f_v(q(u) - a^\top X_i)] },
|
||
#' }
|
||
#' where \eqn{q(u)} is computed via \code{qgraphon()}.
|
||
#'
|
||
#' @details
|
||
#' Internally, the function constructs a scalar evaluator
|
||
#' \code{scalar_conditional_density()} that computes the conditional density for a
|
||
#' single value of \code{u}.
|
||
#' A wrapper function \code{conditional_density()} then applies this scalar
|
||
#' function to each element of the input vector \code{u}.
|
||
#'
|
||
#' Note: the function relies on the existence of \code{qgraphon()},
|
||
#' \code{pgraphon()}, and \code{dgraphon()} in the user's environment.
|
||
#'
|
||
#' @examples
|
||
#' \dontrun{
|
||
#' a <- c(1, -0.5)
|
||
#' fv <- dnorm
|
||
#' Fv <- pnorm
|
||
#' X_matrix <- matrix(rnorm(200), ncol = 2)
|
||
#'
|
||
#' cond_dens <- create_cond_density(a, fv, Fv, X_matrix)
|
||
#' cond_dens(c(0.2, 0.5, 0.8), x = 1.0)
|
||
#' }
|
||
#'
|
||
#' @export
|
||
create_cond_density <- function(
|
||
a,
|
||
fv,
|
||
Fv,
|
||
X_matrix){
|
||
|
||
scalar_conditional_density <-function(u, x) {
|
||
q_a <- qgraphon(u, a, Fv, X_matrix)
|
||
num <- fv(q_a - x %*% a)
|
||
den <- dgraphon(q_a, a, fv, X_matrix)
|
||
num / den
|
||
}
|
||
|
||
conditional_density <- function(u, x) {
|
||
vapply(u, scalar_conditional_density, numeric(1), x = x)
|
||
}
|
||
|
||
return(conditional_density)
|
||
} |