diff --git a/R/estimators.R b/R/estimators.R new file mode 100644 index 0000000..0addfac --- /dev/null +++ b/R/estimators.R @@ -0,0 +1,177 @@ +# load local files +source(here::here("R", "singular_values.R")) +source(here::here("R", "graphon_distribution.R")) +source(here::here("R","singular_value_plot.R")) +source(here::here("R", "build_network.R")) + +# Helper functions ------------------------------------------------------------- +# helper function for wrapping the parameters of the Q_a creation funciton +# TODO rename this function +make_matrix_creation <- function(seed, n, K, sample_X_fn, fv, Fv, guard) { + function(a) { + compute_matrix(seed, a, n, K, sample_X_fn, fv, Fv, guard) + } +} + +# estimators ------------------------------------------------------------------- + +## Moore-Penrose Inverse ------------------------------------------------------- +#' Moore‑Penrose pseudoinverse of a matrix +#' +#' Computes the Moore‑Penrose generalized inverse of a numeric matrix using a +#' singular‑value decomposition (SVD). Singular values smaller than the +#' tolerance are treated as zero to improve numerical stability. +#' +#' @param A A numeric matrix (or an object coercible to a matrix) whose +#' pseudoinverse is required. +#' @param tol Tolerance for treating singular values as zero. By default it +#' is set to `max(dim(A)) * max(svd(A)$d) * .Machine$double.eps`, which +#' scales with the size of the matrix and machine precision. +#' @return A matrix representing the Moore‑Penrose inverse of `A`. The +#' dimensions of the result are `ncol(A) × nrow(A)`. +#' @examples +#' set.seed(123) +#' A <- matrix(rnorm(12), nrow = 3) +#' A_pinv <- pinv(A) +#' # Verify the Moore‑Penrose properties (A %*% A_pinv %*% A ≈ A) +#' all.equal(A %*% A_pinv %*% A, A) +#' @importFrom stats svd +#' @export +pinv <- function(A, tol = NULL) { + # Coerce to matrix and check type + if (!is.matrix(A)) { + A <- as.matrix(A) + } + if (!is.numeric(A)) { + stop("`A` must be a numeric matrix.", call. = FALSE) + } + + # Singular value decomposition + s <- svd(A) + + # Determine tolerance if not supplied + if (is.null(tol)) { + tol <- max(dim(A)) * max(s$d) * .Machine$double.eps + } + + # Invert non‑zero singular values + d_inv <- ifelse(s$d > tol, 1 / s$d, 0) + + # Construct diagonal matrix of inverted singular values + D_plus <- diag(d_inv, nrow = length(d_inv)) + + # Moore‑Penrose inverse: V %*% D⁺ %*% t(U) + s$v %*% D_plus %*% t(s$u) +} + + + +## Estimate Matrix B ----------------------------------------------------------- +#' Estimate the matrix \$B\$ for a graphon model +#' +#' For a given graphon scaling parameter `rho_n`, a square matrix `Q_a`, and an +#' adjacency matrix `A`, this function computes + +#' $ B = \\rho_n \, Q_a^{+\\,T} \, A \, Q_a^{+} $ + +#' where `Q_a^{+}` denotes the Moore‑Penrose pseudoinverse of `Q_a`. +#' +#' @param rho_n Numeric scalar. The graphon scaling parameter. +#' @param Q_a Square numeric matrix. TODO: write description of the Matrix +#' @param A Square numeric adjacency matrix (same dimension as `Q_a`). +#' @return A numeric matrix of the same dimension as `Q_a` representing the +#' estimated \$B\$. +#' @examples +#' set.seed(42) +#' n <- 5 +#' Q_a <- diag(n) # simple identity basis +#' A <- matrix(rbinom(n^2, 1, 0.3), n, n) +#' rho_n <- 0.5 +#' B_est <- estimate_B_matrix(rho_n, Q_a, A) +#' str(B_est) +#' @importFrom stats svd +#' @export +estimate_B_matrix <- function(rho_n, Q_a, A) { + # ---- Input checks --------------------------------------------------------- + if (!is.numeric(rho_n) || length(rho_n) != 1) { + stop("`rho_n` must be a single numeric value.", call. = FALSE) + } + if (!is.matrix(Q_a) || !is.numeric(Q_a)) { + stop("`Q_a` must be a numeric matrix.", call. = FALSE) + } + if (!is.matrix(A) || !is.numeric(A)) { + stop("`A` must be a numeric matrix.", call. = FALSE) + } + if (nrow(Q_a) != ncol(Q_a)) { + stop("`Q_a` must be square.", call. = FALSE) + } + if (nrow(A) != ncol(A)) { + stop("`A` must be square.", call. = FALSE) + } + if (nrow(Q_a) != nrow(A)) { + stop("Dimensions of `Q_a` and `A` must agree for matrix multiplication.", call. = FALSE) + } + + # ---- Compute pseudoinverse ------------------------------------------------ + pinv_Qa <- pinv(Q_a) # assumes `pinv()` is available in the namespace + + # ---- Estimate B ----------------------------------------------------------- + B <- rho_n * (t(pinv_Qa) %*% A %*% pinv_Qa) + + B +} + +# TODO rename this function +# TODO test the convergence of the function estimate_B +# with given graphon (block function, Hölder continuous functions) +# and given K with growing N +# and other options +# plot the loss function with respect to a +estimate_a <- function(A, # adjacency matrix + a0, # start value + n, + K, + sample_X_fn, + fv, + Fv, + guard +) { + calc_Q_a <- make_matrix_creation(seed, n, K, sample_X_fn, fv, Fv, guard) + + loss_func <- function(a) { + Q_a <- calc_Q_a(a) + pinv_Qa <- pinv(Q_a) + + norm(pinv_Qa %*% Q_a %*% A %*% pinv_Qa %*% Q_a - A)^2 + } + + optim(a0, loss_func) +} + +# test the estimator routines +seed <- 1L +set.seed(seed) +X <- matrix(seq(-1, 1, length.out = 5), ncol = 1) +a <- 2 +n <- 2 +K <- 2 +sample_X_fn <- function(n) {matrix(rnorm(n), ncol = 1L)} +fv <- function(x) {dnorm(x, mean=0, sd=1)} +Fv <- function(x) {pnorm(x, mean=0, sd=1)} +guard <- 1e-12 +v <- seq(0, 0.8, length.out = 5) +phi_fun <- function(x, y) x * y # multiplicative kernel +adj <- compute_adj_matrix( + X_matrix = X, + v = v, + a = a, + phi = phi_fun, + rho_n = 0.5, + Fv = Fv + ) +adj + +# Q_a matrix +Qa <- compute_matrix(seed, a, n, K, sample_X_fn, fv, Fv, guard) + +estimate_B() \ No newline at end of file