# Load the general quantile function, the loading can be improved later # using the here package. source("R/qinf.R") # 1. Distribution Function ----------------------------------------------------- #' Empirical Distribution function for the graphon model. #' #' Computes the empirical expectation #' \deqn{E[F_v(y - a^\top X_i)]} #' using observed covariate samples \eqn{X_i}. This is done by evaluating the #' supplied CDF \code{Fv} at the shifted values \eqn{y - a^\top X_i} and taking #' their empirical average. See also Remark 2.1 #' #' @param y Numeric scalar or vector of evaluation points. #' @param a Numeric vector. Coefficient vector used to compute the inner products #' \eqn{a^\top X_i}. Its length must match the number of columns in #' \code{X_matrix}. #' @param Fv Function. Cumulative distribution function of the noise variable #' \eqn{v} (e.g., \code{pnorm}, \code{pexp}, etc.). Must accept a numeric vector #' and return a numeric vector of the same length. #' @param X_matrix Numeric matrix of dimension \eqn{n \times p}, where each row #' corresponds to an observed covariate vector \eqn{X_i}. #' #' @return A numeric scalar giving the empirical expectation #' \eqn{(1/n) \sum_{i=1}^n F_v(y - a^\top X_i)}. #' #' @examples #' set.seed(1) #' X <- matrix(rnorm(100), ncol = 2) #' a <- c(1, -0.5) #' y <- 0 #' pgraphon(y, a, pnorm, X) #' #' @export pgraphon <- function( y, # scalar a, # vector (coefficient vector, can be one-dimensional) Fv, # CDF function of the v's (e.g., pnorm, pexp, etc.) X_matrix # n x p matrix: each row is X_i ) { ## 1.1 Check inputs ========================================================== if (ncol(X_matrix) != length(a)) { stop("Number of columns in X_matrix must match length of a") } if (!is.numeric(y)) { stop("'y' must be numeric (scalar or vector)") } if (!is.numeric(a) || !is.vector(a)) { stop("'a' must be a numeric vector") } if (!is.matrix(X_matrix) || !is.numeric(X_matrix)) { stop("'X_matrix' must be a numeric matrix") } if (!is.function(Fv)) { stop("'Fv' must be a function (a CDF)") } ## 1.2 Compute the expected value ============================================ # Compute a^T X_i as in the paper. Here we switched to the R convention that each # observation is a row. The paper assumes that each observation is a vector inner_products <- as.vector(X_matrix %*% a) # vector of length n # outer(y, inner_prod, "-") creates an |y| × n matrix where element (j,i) # equals y[j] - inner_prod[i]. dev_mat <- outer(y, inner_products, "-") # Apply CDF Fv to each deviation cdf_vals <- Fv(dev_mat) # Row means give the empirical expectation for each y_j out <- rowMeans(cdf_vals) out } # 2. Density Function ---------------------------------------------------------- #' Empirical Graphon Density Estimate #' #' Computes the empirical expectation \eqn{E[f_v(y - a^\top X_i)]} using observed #' covariate samples \eqn{X_i}. This serves as an empirical approximation of the #' graphon density evaluated at \eqn{y}, where \eqn{f_v} is the density of the #' latent variable \eqn{v}. See also Remark 2.1 #' #' @param y Numeric scalar or vector of points at which the density should be #' evaluated. #' @param a Numeric vector. Coefficient vector used to compute the linear #' projection \eqn{a^\top X_i}. Its length must match the number of columns in #' \code{X_matrix}. #' @param fv Function. Density function of the latent variable \eqn{v} (e.g., #' \code{dnorm}, \code{dexp}). Must accept a numeric vector and return a #' numeric vector of the same length. #' @param X_matrix Numeric matrix of size \eqn{n \times p}. Each row represents #' an observed covariate vector \eqn{X_i}. The number of columns must match the #' length of \code{a}. #' #' @return A numeric scalar giving the empirical average #' \eqn{\frac{1}{n} \sum_{i=1}^n f_v(y - a^\top X_i)}. #' #' @examples #' set.seed(1) #' X <- matrix(rnorm(50), ncol = 2) #' a <- c(1, -0.5) #' dgraphon(y = 0.2, a = a, fv = dnorm, X_matrix = X) #' #' @export dgraphon <- function( y, # scalar a, # vector (coefficient vector, can be ) fv, # density function of the v's (e.g., dnorm, dexp, etc.) X_matrix # n x p matrix: each row is X_i ) { ## 2.1 Check inputs ---------------------------------------------------------- if (!is.numeric(y)) { stop("'y' must be numeric (scalar or vector)") } if (!is.numeric(a) || !is.vector(a)) { stop("'a' must be a numeric vector") } if (!is.matrix(X_matrix) || !is.numeric(X_matrix)) { stop("'X_matrix' must be a numeric matrix") } if (ncol(X_matrix) != length(a)) { stop("Number of columns in X_matrix must match length of a") } if (!is.function(fv)) { stop("'fv' must be a function (a density)") } ## 1.2 Compute the expected value ============================================ # Compute a^T X_i as in the paper. Here we switched to the R convention that each # observation is a row. The paper assumes that each observation is a vector inner_products <- as.vector(X_matrix %*% a) # vector of length n # outer(y, inner_prod, "-") creates an |y| × n matrix where element (j,i) # equals y[j] - inner_prod[i]. dev_mat <- outer(y, inner_products, "-") # Apply the density function to each y[j] dens_vals <- fv(dev_mat) # Row means give the empirical expectation for each y_j out <- rowMeans(dens_vals) out } # 3. Quantile Function --------------------------------------------------------- #' Quantile of the empirical graphon distribution #' #' This is a thin wrapper around the generic infimum‑type quantile routine #' \code{qinf()}. It builds the CDF of the graphon, #' \eqn{p_{\text{graphon}}(x) = \frac{1}{n}\sum_{i=1}^{n}F_v\bigl(x-a^\top X_i\bigr)}, #' and then finds the quantile(s) for the supplied probability(ies) \code{p}. #' #' @param p Numeric vector of probabilities in \eqn{[0,1]}. Values outside this #' interval are rejected. #' @param a Numeric coefficient vector. Its length must equal the number of #' columns of \code{X_matrix}. #' @param Fv Function. The CDF of the latent variable $v$. It must be #' vectorised (i.e. accept a numeric vector and return a numeric vector of #' the same length). Typical examples are \code{pnorm}, \code{pexp}, etc. #' @param X_matrix Numeric matrix of dimension \eqn{n\times p}. Each row #' corresponds to an observation $X_i$. #' @param lower,upper Numeric scalars giving the search interval for the root #' finder. By default they are set to \code{-Inf} and \code{Inf}. #' @param tol Numeric tolerance for the bisection algorithm used inside #' \code{qinf()}. The default is the square‑root of machine epsilon. #' @param max.iter Maximum number of bisection iterations (safety guard). #' @param ... Additional arguments that are passed **directly** to the #' internal CDF \code{pgraphon}. This makes the wrapper flexible – you do not #' have to list every argument (e.g. \code{a}, \code{X_matrix}, \code{Fv}) #' explicitly. #' #' @return A numeric vector of the same length as \code{p} containing the #' quantiles. The vector is named by the probabilities. #' #' @examples #' ## ---- simple normal example ------------------------------------------------ #' set.seed(123) #' X <- matrix(rnorm(200), ncol = 2) # n = 100, p = 2 #' a <- c(0.7, -0.3) #' qgraphon(p = c(0.25, 0.5, 0.75), #' a = a, #' Fv = pnorm, #' X_matrix = X) #' #' ## ---- mixture example ------------------------------------------------------ #' mix_cdf <- function(x, w = 0.4, mu = 0, sigma = 1) { #' w * (x >= 0) + (1 - w) * pnorm(x, mean = mu, sd = sigma) #' } #' qgraphon(p = seq(0, 1, 0.2), #' a = a, #' Fv = mix_cdf, #' X_matrix = X) #' #' @export qgraphon <- function( p, # probabilities a, # coefficient vector Fv, # CDF of the v's X_matrix, # X_i samples, matrix lower = -Inf, # lower bound roots upper = Inf, # upper bound roots tol = .Machine$double.eps^0.5, # parameter root finding max.iter = 100) { ## 3.1 Check inputs ---------------------------------------------------------- if (!is.numeric(p) || any(is.na(p))) { stop("'p' must be a numeric vector without NA values") } if (any(p < 0 | p > 1)) { stop("All probabilities in 'p' must lie in [0, 1]") } if (!is.numeric(a) || !is.vector(a)) { stop("'a' must be a numeric vector") } if (!is.matrix(X_matrix) || !is.numeric(X_matrix)) { stop("'X_matrix' must be a numeric matrix") } if (ncol(X_matrix) != length(a)) { stop("Number of columns of 'X_matrix' (", ncol(X_matrix), ") must equal length of 'a' (", length(a), ")") } if (!is.function(Fv)) { stop("'Fv' must be a function (the CDF of the latent variable)") } ## 3.2 Call the generic quantile function ------------------------------------ out <- qinf(F = pgraphon, p = p, lower = lower, upper = upper, tol = tol, max.iter = max.iter, a = a, X_matrix = X_matrix, Fv = Fv) # Name the result by the probabilities – this mirrors the behaviour of # base‑R quantile functions (e.g. qnorm, qbeta). names(out) <- as.character(p) out } # 4. Create conditional density ------------------------------------------------ #' Create a Conditional Density Function for the Graphon Model. #' #' This function constructs and returns another function that computes the #' conditional density of the graphon model at specified values. #' The returned function takes inputs \code{u} (probabilities) and \code{x} #' (a scalar covariate value) and evaluates the graphon-based conditional #' density using the supplied coefficient vector \code{a}, the density #' \code{fv}, the distribution function \code{Fv}, and the empirical sample #' \code{X_matrix}. #' #' @param a Numeric vector. Coefficient vector used in the graphon model; must #' have length equal to the number of columns in \code{X_matrix}. #' @param fv Function. Density function of the latent variable \eqn{v} #' (e.g., \code{dnorm}, \code{dexp}). #' @param Fv Function. Distribution function (CDF) corresponding to \code{fv} #' (e.g., \code{pnorm}, \code{pexp}). #' @param X_matrix Numeric matrix. An \eqn{n \times p} matrix of covariates #' \eqn{X_i}, where each row corresponds to an observation. #' #' @return A function of two arguments \code{u} and \code{x}. #' The returned function is vectorized in \code{u} and returns the conditional #' density: #' \deqn{ f_{U \mid X}(u \mid x) = \frac{f_v(q(u) - a^\top x)}{ #' \mathbb{E}[f_v(q(u) - a^\top X_i)] }, #' } #' where \eqn{q(u)} is computed via \code{qgraphon()}. #' #' @details #' Internally, the function constructs a scalar evaluator #' \code{scalar_conditional_density()} that computes the conditional density for a #' single value of \code{u}. #' A wrapper function \code{conditional_density()} then applies this scalar #' function to each element of the input vector \code{u}. #' #' Note: the function relies on the existence of \code{qgraphon()}, #' \code{pgraphon()}, and \code{dgraphon()} in the user's environment. #' #' @examples #' \dontrun{ #' a <- c(1, -0.5) #' fv <- dnorm #' Fv <- pnorm #' X_matrix <- matrix(rnorm(200), ncol = 2) #' #' cond_dens <- create_cond_density(a, fv, Fv, X_matrix) #' cond_dens(c(0.2, 0.5, 0.8), x = 1.0) #' } #' #' @export create_cond_density <- function( a, fv, Fv, X_matrix){ scalar_conditional_density <-function(u, x) { q_a <- qgraphon(u, a, Fv, X_matrix) num <- fv(q_a - x %*% a) den <- dgraphon(q_a, a, fv, X_matrix) num / den } conditional_density <- function(u, x) { vapply(u, scalar_conditional_density, numeric(1), x = x) } return(conditional_density) }