# load local files source(here::here("R", "singular_values.R")) source(here::here("R", "graphon_distribution.R")) source(here::here("R","singular_value_plot.R")) source(here::here("R", "build_network.R")) # Helper functions ------------------------------------------------------------- # helper function for wrapping the parameters of the Q_a creation function # TODO rename this function make_matrix_creation <- function(seed, n, K, matrix_X, fv, Fv, guard) { function(a) { compute_matrix(seed=seed, a, n=n, K=K, matrix_X = matrix_X, fv=fv, Fv=Fv, guard=guard) } } # estimators ------------------------------------------------------------------- ## Moore-Penrose Inverse ------------------------------------------------------- #' Moore‑Penrose pseudoinverse of a matrix #' #' Computes the Moore‑Penrose generalized inverse of a numeric matrix using a #' singular‑value decomposition (SVD). Singular values smaller than the #' tolerance are treated as zero to improve numerical stability. #' #' @param A A numeric matrix (or an object coercible to a matrix) whose #' pseudoinverse is required. #' @param tol Tolerance for treating singular values as zero. By default it #' is set to `max(dim(A)) * max(svd(A)$d) * .Machine$double.eps`, which #' scales with the size of the matrix and machine precision. #' @return A matrix representing the Moore‑Penrose inverse of `A`. The #' dimensions of the result are `ncol(A) × nrow(A)`. #' @examples #' set.seed(123) #' A <- matrix(rnorm(12), nrow = 3) #' A_pinv <- pinv(A) #' # Verify the Moore‑Penrose properties (A %*% A_pinv %*% A ≈ A) #' all.equal(A %*% A_pinv %*% A, A) #' @importFrom stats svd #' @export pinv <- function(A, tol = NULL) { # Coerce to matrix and check type if (!is.matrix(A)) { A <- as.matrix(A) } if (!is.numeric(A)) { stop("`A` must be a numeric matrix.", call. = FALSE) } # Singular value decomposition s <- svd(A) # Determine tolerance if not supplied if (is.null(tol)) { tol <- max(dim(A)) * max(s$d) * .Machine$double.eps } # Invert non‑zero singular values d_inv <- ifelse(s$d > tol, 1 / s$d, 0) # Construct diagonal matrix of inverted singular values D_plus <- diag(d_inv, nrow = length(d_inv)) # Moore‑Penrose inverse: V %*% D⁺ %*% t(U) s$v %*% D_plus %*% t(s$u) } ## Estimate Matrix B ----------------------------------------------------------- #' Estimate the matrix \$B\$ for a graphon model #' #' For a given graphon scaling parameter `rho_n`, a square matrix `Q_a`, and an #' adjacency matrix `A`, this function computes #' $ B = \\rho_n \, Q_a^{+\\,T} \, A \, Q_a^{+} $ #' where `Q_a^{+}` denotes the Moore‑Penrose pseudoinverse of `Q_a`. #' #' @param rho_n Numeric scalar. The graphon scaling parameter. #' @param Q_a Square numeric matrix. TODO: write description of the Matrix #' @param A Square numeric adjacency matrix (same dimension as `Q_a`). #' @return A numeric matrix of the same dimension as `Q_a` representing the #' estimated \$B\$. #' @examples #' set.seed(42) #' n <- 5 #' Q_a <- diag(n) # simple identity basis #' A <- matrix(rbinom(n^2, 1, 0.3), n, n) #' rho_n <- 0.5 #' B_est <- estimate_B_matrix(rho_n, Q_a, A) #' str(B_est) #' @importFrom stats svd #' @export estimate_B_matrix <- function(rho_n, Q_a, A) { # ---- Input checks --------------------------------------------------------- if (!is.numeric(rho_n) || length(rho_n) != 1) { stop("`rho_n` must be a single numeric value.", call. = FALSE) } if (!is.matrix(Q_a) || !is.numeric(Q_a)) { stop("`Q_a` must be a numeric matrix.", call. = FALSE) } if (!is.matrix(A) || !is.numeric(A)) { stop("`A` must be a numeric matrix.", call. = FALSE) } if (nrow(A) != ncol(A)) { stop("`A` must be square.", call. = FALSE) } if (ncol(Q_a) != nrow(A)) { stop("Dimensions of `Q_a` and `A` must agree for matrix multiplication.", call. = FALSE) } # ---- Compute pseudoinverse ------------------------------------------------ pinv_Qa <- pinv(Q_a) # assumes `pinv()` is available in the namespace # ---- Estimate B ----------------------------------------------------------- B <- rho_n * (t(pinv_Qa) %*% A %*% pinv_Qa) B } # TODO rename this function # TODO test the convergence of the function estimate_B # with given graphon (block function, Hölder continuous functions) # and given K with growing N # and other options # plot the loss function with respect to a estimate_a <- function(A, # adjacency matrix a0, # start value n, K, sample_X_fn, fv, Fv, guard ) { calc_Q_a <- make_matrix_creation(seed, n, K, sample_X_fn, fv, Fv, guard) loss_func <- function(a) { Q_a <- calc_Q_a(a) pinv_Qa <- pinv(Q_a) norm(pinv_Qa %*% Q_a %*% A %*% pinv_Qa %*% Q_a - A, type="F")^2 } return(loss_func) } #' Calculate the edge density of an undirected graph #' #' @title Edge density from an adjacency matrix #' @description Computes the proportion of possible edges that are present in an #' undirected, unweighted graph represented by a square adjacency matrix. The #' density is defined as \eqn{2E / (V(V-1))} where \eqn{E} is the number of edges #' and \eqn{V} is the number of vertices. This corresponds to the parameter #' \eqn{rho_n}. #' The function checks that the input is a square matrix and treats any #' non‑numeric or missing entries as absent edges. #' #' @param adj_matrix A square numeric matrix representing the adjacency matrix #' of an undirected graph. Entries should be 0/1 (or any truthy numeric) with #' `adj_matrix[i, j] == adj_matrix[j, i]`. Missing values are ignored. #' #' @return A single numeric value giving the edge density \eqn{rho}. #' #' @examples #' # Simple triangle graph (3 nodes, 3 edges) #' A_tri <- matrix(c(0,1,1, #' 1,0,1, #' 1,1,0), nrow = 3, byrow = TRUE) #' calculate_edge_density(A_tri) #' #' # Empty graph (no edges) #' A_empty <- matrix(0, nrow = 4, ncol = 4) #' calculate_edge_density(A_empty) #' #' @export calculate_edge_density <- function(adj_matrix) { # ------------------------------------------------------------------------- # Validate input # ------------------------------------------------------------------------- if (!is.matrix(adj_matrix)) { stop("`adj_matrix` must be a matrix.") } if (nrow(adj_matrix) != ncol(adj_matrix)) { stop("`adj_matrix` must be square (same number of rows and columns).") } # ------------------------------------------------------------------------- # Count edges and nodes # ------------------------------------------------------------------------- edge_idx <- which(upper.tri(adj_matrix, diag = FALSE), arr.ind = TRUE) edge_count <- sum(adj_matrix[edge_idx], na.rm = TRUE) node_count <- nrow(adj_matrix) # ------------------------------------------------------------------------- # Compute density # ------------------------------------------------------------------------- rho <- (2 * edge_count) / (node_count * (node_count-1)) return(rho) } # test the estimator routines seed <- 17L # 121L this seed works exceptionally well set.seed(seed) #X <- matrix(seq(-1, 1, length.out = 5), ncol = 1) a <- 20 n <- 400 K <- 4 rho_n <- log(n) / n sample_X_fn <- function(n) {matrix(rnorm(n), ncol = 1L)} X <- sample_X_fn(n) fv <- function(x) {dnorm(x, mean=0, sd=1)} Fv <- function(x) {pnorm(x, mean=0, sd=1)} guard <- 1e-12 v <- rnorm(n) #seq(0, 0.8, length.out = n) phi_fun <-Vectorize(function(x, y) ifelse(((x > 0.5 && y <= 0.5) || (x <= 0.5 && y > 0.5)), 1.6, 0.4)) # multiplicative kernel adj <- compute_adj_matrix( X_matrix = X, v = v, a = a, phi = phi_fun, rho_n = rho_n, Fv = Fv ) adj # Q_a matrix Qa <- compute_matrix(seed, a=a, n=n, K=K, fv=fv, Fv=Fv, guard=guard, matrix_X=X) calc_Q_a <- make_matrix_creation(seed, n=n, K=K, matrix_X = X, fv=fv, Fv=Fv, guard=guard) loss_func <- function(a) { Q_a <- calc_Q_a(a) pinv_Qa <- pinv(Q_a) norm(pinv_Qa %*% Q_a %*% adj %*% pinv_Qa %*% Q_a - adj, type="F")^2 } plot_as <- seq(-10, 100, length.out=500) loss_vals <- sapply(plot_as, loss_func) plot(plot_as , loss_vals, type="b") abline(v=a) title("Test plot of the loss function") optim(25, loss_func, lower=10, upper=100, method="Brent", control=list(fnscale=1))