experiments with rho_n

2026-06-17 10:43:33 +02:00
parent d0e0c03428
commit 9d9d274487
2 changed files with 100 additions and 26 deletions
@@ -5,11 +5,11 @@ source(here::here("R","singular_value_plot.R"))
 source(here::here("R", "build_network.R"))

 # Helper functions -------------------------------------------------------------
-# helper function for wrapping the parameters of the Q_a creation funciton
+# helper function for wrapping the parameters of the Q_a creation function
 # TODO rename this function
-make_matrix_creation <- function(seed, n, K, sample_X_fn, fv, Fv, guard) {
+make_matrix_creation <- function(seed, n, K, matrix_X, fv, Fv, guard) {
  function(a) {
-    compute_matrix(seed, a, n, K, sample_X_fn, fv, Fv, guard)
+    compute_matrix(seed=seed, a, n=n, K=K, matrix_X = matrix_X, fv=fv, Fv=Fv, guard=guard)
  }
 }

@@ -102,13 +102,10 @@ estimate_B_matrix <- function(rho_n, Q_a, A) {
  if (!is.matrix(A) || !is.numeric(A)) {
    stop("`A` must be a numeric matrix.", call. = FALSE)
  }
-  if (nrow(Q_a) != ncol(Q_a)) {
-    stop("`Q_a` must be square.", call. = FALSE)
-  }
  if (nrow(A) != ncol(A)) {
    stop("`A` must be square.", call. = FALSE)
  }
-  if (nrow(Q_a) != nrow(A)) {
+  if (ncol(Q_a) != nrow(A)) {
    stop("Dimensions of `Q_a` and `A` must agree for matrix multiplication.", call. = FALSE)
  }
  
@@ -142,36 +139,108 @@ estimate_a <- function(A, # adjacency matrix
    Q_a <- calc_Q_a(a)
    pinv_Qa <- pinv(Q_a)
    
-    norm(pinv_Qa %*% Q_a %*% A %*% pinv_Qa %*% Q_a - A)^2
+    norm(pinv_Qa %*% Q_a %*% A %*% pinv_Qa %*% Q_a - A, type="F")^2
  }
  
-  optim(a0, loss_func)
+  return(loss_func)
 }

+#' Calculate the edge density of an undirected graph
+#'
+#' @title Edge density from an adjacency matrix
+#' @description Computes the proportion of possible edges that are present in an
+#'   undirected, unweighted graph represented by a square adjacency matrix.  The
+#'   density is defined as \eqn{2E / (V(V-1))} where \eqn{E} is the number of edges
+#'   and \eqn{V} is the number of vertices.  This corresponds to the parameter
+#'   \eqn{rho_n}.
+#'   The function checks that the input is a square matrix and treats any
+#'   non‑numeric or missing entries as absent edges.
+#'
+#' @param adj_matrix A square numeric matrix representing the adjacency matrix
+#'   of an undirected graph.  Entries should be 0/1 (or any truthy numeric) with
+#'   `adj_matrix[i, j] == adj_matrix[j, i]`.  Missing values are ignored.
+#'
+#' @return A single numeric value giving the edge density \eqn{rho}.
+#'
+#' @examples
+#' # Simple triangle graph (3 nodes, 3 edges)
+#' A_tri <- matrix(c(0,1,1,
+#'                   1,0,1,
+#'                   1,1,0), nrow = 3, byrow = TRUE)
+#' calculate_edge_density(A_tri)
+#'
+#' # Empty graph (no edges)
+#' A_empty <- matrix(0, nrow = 4, ncol = 4)
+#' calculate_edge_density(A_empty)
+#'
+#' @export
+calculate_edge_density <- function(adj_matrix) {
+  # -------------------------------------------------------------------------
+  # Validate input
+  # -------------------------------------------------------------------------
+  if (!is.matrix(adj_matrix)) {
+    stop("`adj_matrix` must be a matrix.")
+  }
+  if (nrow(adj_matrix) != ncol(adj_matrix)) {
+    stop("`adj_matrix` must be square (same number of rows and columns).")
+  }
+  
+  # -------------------------------------------------------------------------
+  # Count edges and nodes
+  # -------------------------------------------------------------------------
+  edge_idx   <- which(upper.tri(adj_matrix, diag = FALSE), arr.ind = TRUE)
+  edge_count <- sum(adj_matrix[edge_idx], na.rm = TRUE)
+  node_count <- nrow(adj_matrix)
+  
+  # -------------------------------------------------------------------------
+  # Compute density
+  # -------------------------------------------------------------------------
+  rho <- (2 * edge_count) / (node_count * (node_count-1))
+  
+  return(rho)
+}
 # test the estimator routines
-seed <- 1L
+seed <- 17L #  121L this seed works exceptionally well
 set.seed(seed)
-X  <- matrix(seq(-1, 1, length.out = 5), ncol = 1)
-a  <- 2
-n <- 2
-K <- 2
+#X  <- matrix(seq(-1, 1, length.out = 5), ncol = 1)
+a  <- 20
+n <- 400
+K <- 4
+rho_n <- log(n) / n
 sample_X_fn <- function(n) {matrix(rnorm(n), ncol = 1L)}
+X <- sample_X_fn(n)
 fv <- function(x) {dnorm(x, mean=0, sd=1)}
 Fv <- function(x) {pnorm(x, mean=0, sd=1)}
 guard <- 1e-12
-v  <- seq(0, 0.8, length.out = 5)
-phi_fun <- function(x, y) x * y          # multiplicative kernel
+v  <- rnorm(n) #seq(0, 0.8, length.out = n)
+phi_fun <-Vectorize(function(x, y) ifelse(((x > 0.5 && y <= 0.5) || (x <= 0.5 && y > 0.5)), 1.6, 0.4))        # multiplicative kernel
 adj <- compute_adj_matrix(
   X_matrix = X,
   v        = v,
   a        = a,
   phi      = phi_fun,
-   rho_n    = 0.5,
+   rho_n    = rho_n,
   Fv       = Fv
 )
 adj

 # Q_a matrix
-Qa <- compute_matrix(seed, a, n, K, sample_X_fn, fv, Fv, guard)
+Qa <- compute_matrix(seed, a=a, n=n, K=K, fv=fv, Fv=Fv, guard=guard, matrix_X=X)

-estimate_B()
+calc_Q_a <- make_matrix_creation(seed, n=n, K=K, matrix_X = X, fv=fv, Fv=Fv, guard=guard)
+
+loss_func <- function(a) {
+  Q_a <- calc_Q_a(a)
+  pinv_Qa <- pinv(Q_a)
+  
+  norm(pinv_Qa %*% Q_a %*% adj %*% pinv_Qa %*% Q_a - adj, type="F")^2
+}
+
+
+plot_as <- seq(-10, 100, length.out=500)
+loss_vals <- sapply(plot_as, loss_func)
+plot(plot_as , loss_vals, type="b")
+abline(v=a)
+title("Test plot of the loss function")
+
+optim(25, loss_func, lower=10, upper=100, method="Brent", control=list(fnscale=1))
@@ -33,10 +33,6 @@ source(here::here("R", "graphon_distribution.R"))
 #'               generated.
 #' @param K      Positive integer.  Number of divisions of the unit interval;
 #'               the resulting grid has length `K+1`.
-#' @param sample_X_fn
-#'               Function with a single argument `n`.  It must return an
-#'               \eqn{n \times p} matrix (or an object coercible to a matrix) of
-#'               covariate samples.
 #' @param fv     Density function of the latent variable \eqn{v}.  Must be
 #'               vectorised (i.e. accept a numeric vector and return a numeric
 #'               vector of the same length).  Typical examples are
@@ -44,8 +40,15 @@ source(here::here("R", "graphon_distribution.R"))
 #' @param Fv     Cumulative distribution function of the latent variable
 #'               \eqn{v}.  Also has to be vectorised.  Typical examples are
 #'               `pnorm`, `pexp`, ….
+#' @param sample_X_fn
+#'               Function with a single argument `n`.  It must return an
+#'               \eqn{n \times p} matrix (or an object coercible to a matrix) of
+#'               covariate samples. It can be NULL, but then `matrix_X` must be
+#'               given.
 #' @param matrix_X matrix with the covariates at each node. Each row corresponds
-#' to a single node with p attributes.
+#' to a single node with p attributes. The default value is `NULL`. If it
+#' is `NULL` then `sample_X_fun` must be given. If both parameters are provided,
+#' then `matrix_X` is used.
 #' @param guard Positive numeric guard value.  Default is `sqrt(.Machine$double.eps)`,
 #'   which is about `1.5e‑8` on most platforms – small enough to be negligible
 #'   for most computations. If it is null, then it is not used. 
@@ -106,9 +109,9 @@ compute_matrix <- function(
    a,
    n,
    K,
-    sample_X_fn,
    fv,
    Fv,
+    sample_X_fn=NULL,
    matrix_X = NULL,
    guard = sqrt(.Machine$double.eps),
    scaled = FALSE
@@ -118,10 +121,12 @@ compute_matrix <- function(
  if (!is.numeric(a) || !is.vector(a)) stop("'a' must be a numeric vector")
  if (!is.numeric(n) || length(n) != 1 || n <= 0) stop("'n' must be a positive integer")
  if (!is.numeric(K) || length(K) != 1 || K <= 0) stop("'K' must be a positive integer")
-  if (!is.function(sample_X_fn)) stop("'sample_X_fn' must be a function")
+  if (!(is.function(sample_X_fn) || is.null(sample_X_fn))) stop("'sample_X_fn' must be a function or Null and matrix_X must be given")
  if (!is.function(fv)) stop("'f_v' must be a function")
  if (!is.function(Fv)) stop("'F_v' must be a function")
  if (!is.null(matrix_X) && !is.matrix(matrix_X)) stop("matrix_X must be either null or a matrix")
+  if (is.null(matrix_X) && is.null(sample_X_fn)) stop("Either 'matrix_X' or 'sample_X_fn' must be supplied!")
+  if (!is.null(matrix_X) && !is.null(sample_X_fn)) warning("Both arguments 'matrix_X' and `sample_X_fn` is given. Priority is given by to the first!")
  
  ## 1.2 Generate the Matrix X of covariates ===================================
  # If the argument matrix_X is present, use this matrix, otherwise generate one