initial commit

2026-01-16 19:31:08 +01:00
parent 3415358509
commit dd3c3e4b81
9 changed files with 2174 additions and 0 deletions
--- a/R/graphon_distribution.R
+++ b/R/graphon_distribution.R
@@ -0,0 +1,324 @@
+# Load the general quantile function, the loading can be improved later
+# using the here package.
+source("R/qinf.R")
+
+
+# 1. Distribution Function -----------------------------------------------------
+
+#' Empirical Distribution function for the graphon model.
+#'
+#' Computes the empirical expectation
+#' \deqn{E[F_v(y - a^\top X_i)]}
+#' using observed covariate samples \eqn{X_i}. This is done by evaluating the
+#' supplied CDF \code{Fv} at the shifted values \eqn{y - a^\top X_i} and taking
+#' their empirical average. See also Remark 2.1
+#'
+#' @param y Numeric scalar or vector of evaluation points.
+#' @param a Numeric vector. Coefficient vector used to compute the inner products
+#'   \eqn{a^\top X_i}. Its length must match the number of columns in
+#'   \code{X_matrix}.
+#' @param Fv Function. Cumulative distribution function of the noise variable
+#'   \eqn{v} (e.g., \code{pnorm}, \code{pexp}, etc.). Must accept a numeric vector
+#'   and return a numeric vector of the same length.
+#' @param X_matrix Numeric matrix of dimension \eqn{n \times p}, where each row
+#'   corresponds to an observed covariate vector \eqn{X_i}.
+#'
+#' @return A numeric scalar giving the empirical expectation
+#'   \eqn{(1/n) \sum_{i=1}^n F_v(y - a^\top X_i)}.
+#'
+#' @examples
+#' set.seed(1)
+#' X <- matrix(rnorm(100), ncol = 2)
+#' a <- c(1, -0.5)
+#' y <- 0
+#' pgraphon(y, a, pnorm, X)
+#'
+#' @export
+pgraphon <- function(
+    y,           # scalar
+    a,           # vector (coefficient vector, can be one-dimensional)
+    Fv,          # CDF function of the v's (e.g., pnorm, pexp, etc.)
+    X_matrix     # n x p matrix: each row is X_i
+) {
+  ## 1.1 Check inputs ==========================================================
+  if (ncol(X_matrix) != length(a)) {
+    stop("Number of columns in X_matrix must match length of a")
+  }
+  if (!is.numeric(y)) {
+    stop("'y' must be numeric (scalar or vector)")
+  }
+  if (!is.numeric(a) || !is.vector(a)) {
+    stop("'a' must be a numeric vector")
+  }
+  if (!is.matrix(X_matrix) || !is.numeric(X_matrix)) {
+    stop("'X_matrix' must be a numeric matrix")
+  }
+  if (!is.function(Fv)) {
+    stop("'Fv' must be a function (a CDF)")
+  }
+  ## 1.2 Compute the expected value ============================================
+  
+  # Compute a^T X_i as in the paper. Here we switched to the R convention that each
+  # observation is a row. The paper assumes that each observation is a vector
+  inner_products <- as.vector(X_matrix %*% a)  # vector of length n
+  
+  # outer(y, inner_prod, "-") creates an |y| × n matrix where element (j,i)
+  # equals y[j] - inner_prod[i].
+  dev_mat <- outer(y, inner_products, "-")
+  
+  # Apply CDF Fv to each deviation
+  # With sapply we do not have to assume that Fv is vectorized
+  cdf_values <- sapply(dev_mat, Fv)
+  
+  # Row means give the empirical expectation for each y_j
+  out <- rowMeans(cdf_vals)
+  out
+}
+
+# 2. Density Function ----------------------------------------------------------
+
+#' Empirical Graphon Density Estimate
+#'
+#' Computes the empirical expectation \eqn{E[f_v(y - a^\top X_i)]} using observed
+#' covariate samples \eqn{X_i}. This serves as an empirical approximation of the
+#' graphon density evaluated at \eqn{y}, where \eqn{f_v} is the density of the
+#' latent variable \eqn{v}. See also Remark 2.1
+#'
+#' @param y Numeric scalar or vector of points at which the density should be 
+#'   evaluated.
+#' @param a Numeric vector. Coefficient vector used to compute the linear
+#'   projection \eqn{a^\top X_i}. Its length must match the number of columns in
+#'   \code{X_matrix}.
+#' @param fv Function. Density function of the latent variable \eqn{v} (e.g.,
+#'   \code{dnorm}, \code{dexp}). Must accept a numeric vector and return a
+#'   numeric vector of the same length.
+#' @param X_matrix Numeric matrix of size \eqn{n \times p}. Each row represents
+#'   an observed covariate vector \eqn{X_i}. The number of columns must match the
+#'   length of \code{a}.
+#'
+#' @return A numeric scalar giving the empirical average
+#'   \eqn{\frac{1}{n} \sum_{i=1}^n f_v(y - a^\top X_i)}.
+#'
+#' @examples
+#' set.seed(1)
+#' X <- matrix(rnorm(50), ncol = 2)
+#' a <- c(1, -0.5)
+#' dgraphon(y = 0.2, a = a, fv = dnorm, X_matrix = X)
+#'
+#' @export
+dgraphon <- function(
+    y,           # scalar
+    a,           # vector (coefficient vector, can be )
+    fv,          # density function of the v's (e.g., dnorm, dexp, etc.)
+    X_matrix     # n x p matrix: each row is X_i
+) {
+  ## 2.1 Check inputs ----------------------------------------------------------
+  if (!is.numeric(y)) {
+    stop("'y' must be numeric (scalar or vector)")
+  }
+  if (!is.numeric(a) || !is.vector(a)) {
+    stop("'a' must be a numeric vector")
+  }
+  if (!is.matrix(X_matrix) || !is.numeric(X_matrix)) {
+    stop("'X_matrix' must be a numeric matrix")
+  }
+  if (ncol(X_matrix) != length(a)) {
+    stop("Number of columns in X_matrix must match length of a")
+  }
+  if (!is.function(fv)) {
+    stop("'fv' must be a function (a density)")
+  }
+  
+  ## 1.2 Compute the expected value ============================================
+  
+  # Compute a^T X_i as in the paper. Here we switched to the R convention that each
+  # observation is a row. The paper assumes that each observation is a vector
+  inner_products <- as.vector(X_matrix %*% a)  # vector of length n
+  
+  # outer(y, inner_prod, "-") creates an |y| × n matrix where element (j,i)
+  # equals y[j] - inner_prod[i].
+  dev_mat <- outer(y, inner_products, "-")
+  
+  # Apply the density function to each y[j]
+  # With sapply we do not have to assume that fv is vectorized
+  dens_vals <- sapply(dev_mat, fv)
+  
+  # Row means give the empirical expectation for each y_j
+  out <- rowMeans(dens_vals)
+  out
+}
+
+# 3. Quantile Function ---------------------------------------------------------
+
+#' Quantile of the empirical graphon distribution
+#'
+#' This is a thin wrapper around the generic infimum‑type quantile routine
+#' \code{qinf()}.  It builds the CDF of the graphon,
+#' \eqn{p_{\text{graphon}}(x) = \frac{1}{n}\sum_{i=1}^{n}F_v\bigl(x-a^\top X_i\bigr)},
+#' and then finds the quantile(s) for the supplied probability(ies) \code{p}.
+#'
+#' @param p Numeric vector of probabilities in \eqn{[0,1]}.  Values outside this
+#'   interval are rejected.
+#' @param a Numeric coefficient vector.  Its length must equal the number of
+#'   columns of \code{X_matrix}.
+#' @param Fv Function.  The CDF of the latent variable $v$.  It must be
+#'   vectorised (i.e. accept a numeric vector and return a numeric vector of
+#'   the same length).  Typical examples are \code{pnorm}, \code{pexp}, etc.
+#' @param X_matrix Numeric matrix of dimension \eqn{n\times p}.  Each row
+#'   corresponds to an observation $X_i$.
+#' @param lower,upper Numeric scalars giving the search interval for the root
+#'   finder.  By default they are set to \code{-Inf} and \code{Inf}.
+#' @param tol Numeric tolerance for the bisection algorithm used inside
+#'   \code{qinf()}.  The default is the square‑root of machine epsilon.
+#' @param max.iter Maximum number of bisection iterations (safety guard).
+#' @param ... Additional arguments that are passed **directly** to the
+#'   internal CDF \code{pgraphon}.  This makes the wrapper flexible – you do not
+#'   have to list every argument (e.g. \code{a}, \code{X_matrix}, \code{Fv})
+#'   explicitly.
+#'
+#' @return A numeric vector of the same length as \code{p} containing the
+#'   quantiles.  The vector is named by the probabilities.
+#'
+#' @examples
+#' ## ---- simple normal example ------------------------------------------------
+#' set.seed(123)
+#' X <- matrix(rnorm(200), ncol = 2)          # n = 100, p = 2
+#' a <- c(0.7, -0.3)
+#' qgraphon(p = c(0.25, 0.5, 0.75),
+#'          a = a,
+#'          Fv = pnorm,
+#'          X_matrix = X)
+#'
+#' ## ---- mixture example ------------------------------------------------------
+#' mix_cdf <- function(x, w = 0.4, mu = 0, sigma = 1) {
+#'   w * (x >= 0) + (1 - w) * pnorm(x, mean = mu, sd = sigma)
+#' }
+#' qgraphon(p = seq(0, 1, 0.2),
+#'          a = a,
+#'          Fv = mix_cdf,
+#'          X_matrix = X)
+#'
+#' @export
+qgraphon <- function(
+    p,                               # probabilities
+    a,                               # coefficient vector
+    Fv,                              # CDF of the v's
+    X_matrix,                         # X_i samples, matrix
+    lower = -Inf,                     # lower bound roots
+    upper =  Inf,                     # upper bound roots
+    tol   = .Machine$double.eps^0.5,  # parameter root finding
+    max.iter = 100) {
+  
+  ## 3.1 Check inputs ----------------------------------------------------------
+  if (!is.numeric(p) || any(is.na(p))) {
+    stop("'p' must be a numeric vector without NA values")
+  }
+  if (any(p < 0 | p > 1)) {
+    stop("All probabilities in 'p' must lie in [0, 1]")
+  }
+  
+  if (!is.numeric(a) || !is.vector(a)) {
+    stop("'a' must be a numeric vector")
+  }
+  
+  if (!is.matrix(X_matrix) || !is.numeric(X_matrix)) {
+    stop("'X_matrix' must be a numeric matrix")
+  }
+  
+  if (ncol(X_matrix) != length(a)) {
+    stop("Number of columns of 'X_matrix' (", ncol(X_matrix),
+         ") must equal length of 'a' (", length(a), ")")
+  }
+  
+  if (!is.function(Fv)) {
+    stop("'Fv' must be a function (the CDF of the latent variable)")
+  }
+  
+  ## 3.2 Call the generic quantile function ------------------------------------
+  out <- qinf(F = pgraphon,
+            p = p,
+            lower = lower,
+            upper = upper,
+            tol = tol,
+            max.iter = max.iter,
+            a = a,
+            X_matrix = X_matrix,
+            Fv = Fv)
+  
+  # Name the result by the probabilities – this mirrors the behaviour of
+  # base‑R quantile functions (e.g. qnorm, qbeta).
+  names(out) <- as.character(p)
+  out
+}
+
+
+# 4. Create conditional density ------------------------------------------------
+
+#' Create a Conditional Density Function for the Graphon Model.
+#'
+#' This function constructs and returns another function that computes the
+#' conditional density of the graphon model at specified values.  
+#' The returned function takes inputs \code{u} (probabilities) and \code{x}
+#' (a scalar covariate value) and evaluates the graphon-based conditional
+#' density using the supplied coefficient vector \code{a}, the density
+#' \code{fv}, the distribution function \code{Fv}, and the empirical sample
+#' \code{X_matrix}.
+#'
+#' @param a Numeric vector. Coefficient vector used in the graphon model; must
+#'   have length equal to the number of columns in \code{X_matrix}.
+#' @param fv Function. Density function of the latent variable \eqn{v}
+#'   (e.g., \code{dnorm}, \code{dexp}).
+#' @param Fv Function. Distribution function (CDF) corresponding to \code{fv}
+#'   (e.g., \code{pnorm}, \code{pexp}).
+#' @param X_matrix Numeric matrix. An \eqn{n \times p} matrix of covariates
+#'   \eqn{X_i}, where each row corresponds to an observation.
+#'
+#' @return A function of two arguments \code{u} and \code{x}.  
+#'   The returned function is vectorized in \code{u} and returns the conditional
+#'   density:
+#'   \deqn{ f_{U \mid X}(u \mid x) = \frac{f_v(q(u) - a^\top x)}{
+#'           \mathbb{E}[f_v(q(u) - a^\top X_i)] },
+#'   }
+#'   where \eqn{q(u)} is computed via \code{qgraphon()}.
+#'
+#' @details
+#' Internally, the function constructs a scalar evaluator
+#' \code{scalar_conditional_density()} that computes the conditional density for a
+#' single value of \code{u}.  
+#' A wrapper function \code{conditional_density()} then applies this scalar
+#' function to each element of the input vector \code{u}.
+#'
+#' Note: the function relies on the existence of \code{qgraphon()},
+#' \code{pgraphon()}, and \code{dgraphon()} in the user's environment.
+#'
+#' @examples
+#' \dontrun{
+#' a <- c(1, -0.5)
+#' fv <- dnorm
+#' Fv <- pnorm
+#' X_matrix <- matrix(rnorm(200), ncol = 2)
+#'
+#' cond_dens <- create_cond_density(a, fv, Fv, X_matrix)
+#' cond_dens(c(0.2, 0.5, 0.8), x = 1.0)
+#' }
+#'
+#' @export
+create_cond_density <- function(
+    a,
+    fv,
+    Fv,
+    X_matrix){
+  
+  scalar_conditional_density <-function(u, x) {
+    q_a  <- qgraphon(u, a, Fv, X_matrix)
+    num  <- fv(q_a - x %*% a)
+    den  <- dgraphon(q_a, a, fv, X_matrix)
+    num / den
+  }
+  
+  conditional_density <- function(u, x) {
+    vapply(u, scalar_conditional_density, numeric(1), x = x)
+  }
+  
+  return(conditional_density)
+}