add function for plotting singular values automatically

2026-01-26 14:34:52 +01:00
parent cbf8e63f94
commit b18113a0ea
1 changed files with 222 additions and 0 deletions
--- a/R/singular_value_plot.R
+++ b/R/singular_value_plot.R
@@ -0,0 +1,222 @@
+# Load function ----------------------------------------------------------------
+source(here::here("R", "singular_values.R"))
+source(here::here("R", "graphon_distribution.R"))
+
+
+# Convert a call or character to a nicely formatted character string.
+#   * If the user supplied a character, we keep it unchanged.
+#   * If the user supplied a call (e.g. quote(20 / sqrt(x))) we deparse it
+#     and collapse the result to a single line.
+expr_to_label <- function(expr) {
+  if (is.character(expr)) {
+    expr
+  } else {
+    # deparse returns a character vector; collapse to one line
+    paste(deparse(expr), collapse = "")
+  }
+}
+
+
+#' Compute the smallest singular value of a sequence of matrices Q(K)
+#'
+#' @title Smallest singular values for a family of matrices Q(K)
+#' @description
+#'   For a given vector of coefficients `a`, sample size `n` and a maximum
+#'   rank `maxK`, this function repeatedly calls `compute_matrix()` to build a
+#'   matrix `Q` for each rank `K = 1, …, maxK`.  The smallest singular value of
+#'   each `Q` is extracted with `compute_minmax_sv()`.  The result can be
+#'   returned as a numeric vector and, if desired, plotted together with a
+#'   user‑supplied reference curve.
+#'
+#' @param a Numeric vector of coefficients that are passed to `compute_matrix()`.
+#' @param n Positive integer, the sample size used inside the sampling function.
+#' @param maxK Positive integer, the largest rank `K` for which a matrix `Q`
+#'   will be built.  The function will evaluate `K = 1:maxK`.
+#' @param seed Integer (default = 1) used as the random‑seed argument for
+#'   `compute_matrix()`.  Supplying a seed makes the whole procedure
+#'   reproducible.
+#' @param sampler_fn Function that draws a sample of size `n`.  It must accept a
+#'   single argument `n` and return a **numeric matrix** with `n` rows and
+#'   one column (the shape used in the original script).  The default is a
+#'   thin wrapper around `rnorm()`.
+#' @param fv Function giving the density of the underlying distribution
+#'   (default = `dnorm`).  It is passed unchanged to `compute_matrix()`.
+#' @param Fv Function giving the cumulative distribution function
+#'   (default = `pnorm`).  It is passed unchanged to `compute_matrix()`.
+#' @param guard Positive numeric guard used inside `compute_matrix()` to avoid
+#'   division‑by‑zero or log‑of‑zero problems (default = `1e-12`).
+#' @param plot Logical, whether to produce a quick base‑R plot of the
+#'   smallest singular values (`TRUE` by default).  If `FALSE` the function
+#'   only returns the numeric vector.
+#' @param add_curve Logical, whether to overlay a reference curve on the plot.
+#'   Ignored when `plot = FALSE`.  Default = `TRUE`.
+#' @param curve_expr Expression (as a *character* or *call*) that defines the
+#'   reference curve.  The default reproduces the line you used
+#'   `20 / sqrt(x)`.  The expression must be a valid R expression in which the
+#'   variable `x` stands for the horizontal axis.
+#' @param curve_from,curve_to Numeric limits for the reference curve.  By
+#'   default they are set to the range of `K` (`1:maxK`).
+#' @param curve_col Colour of the reference curve (default = `"red"`).
+#' @param curve_lwd Line width of the reference curve (default = 2).
+#' @param log_plot If True, then the y-axis is on a log scale.
+#' @return A list with the following components  
+#'   \item{K}{Integer vector `1:maxK`.}  
+#'   \item{sv}{Numeric vector of the smallest singular values for each `K`.}  
+#'   \item{plot}{If `plot = TRUE`, the value returned by `graphics::plot()`
+#'   (normally `NULL`).  If `plot = FALSE` this element is omitted.}
+#' @examples
+#' ## Run the whole routine with the defaults
+#' res <- smallest_sv_sequence(
+#'   a = c(0.4), n = 400, maxK = 20,
+#'   seed = 3, guard = 1e-12
+#' )
+#' head(res$sv)
+#'
+#' ## Supply a custom sampler (e.g., uniform)
+#' my_sampler <- function(m) matrix(runif(m, -1, 1), ncol = 1)
+#' res2 <- smallest_sv_sequence(
+#'   a = c(0.4), n = 400, maxK = 10,
+#'   sampler_fn = my_sampler,
+#'   plot = FALSE
+#' )
+#' plot(res2$K, res2$sv, type = "b")
+#' @importFrom graphics plot lines curve
+#' @export
+smallest_sv_sequence <- function(
+    a,
+    n,
+    maxK,
+    seed = 1L,
+    sampler_fn = function(m) matrix(rnorm(m), ncol = 1L),
+    fv = dnorm,
+    Fv = pnorm,
+    guard = 1e-12,
+    plot = TRUE,
+    add_curve = TRUE,
+    curve_expr = quote(20 / sqrt(x)),
+    curve_from = NULL,
+    curve_to = NULL,
+    curve_col = "red",
+    curve_lwd = 2, 
+    log_plot = FALSE
+) {
+  ## 1. Input validation =======================================================
+  if (!is.numeric(a) || length(a) == 0) {
+    stop("`a` must be a non‑empty numeric vector.")
+  }
+  if (!is.numeric(n) || length(n) != 1L || n <= 0 || n != as.integer(n)) {
+    stop("`n` must be a positive integer.")
+  }
+  if (!is.numeric(maxK) || length(maxK) != 1L || maxK <= 0 ||
+      maxK != as.integer(maxK)) {
+    stop("`maxK` must be a positive integer.")
+  }
+  if (!is.function(sampler_fn)) {
+    stop("`sampler_fn` must be a function that takes a single integer argument `n`.")
+  }
+  if (!is.function(fv) || !is.function(Fv)) {
+    stop("`fv` and `Fv` must be corresponding density functions  and cdf (e.g., dnorm/ pnorm).")
+  }
+  if (!is.numeric(guard) || length(guard) != 1L || guard <= 0) {
+    stop("`guard` must be a single positive numeric value.")
+  }
+  if (!is.logical(plot) || length(plot) != 1L) {
+    stop("`plot` must be a single logical value.")
+  }
+  if (!is.logical(add_curve) || length(add_curve) != 1L) {
+    stop("`add_curve` must be a single logical value.")
+  }
+  if (!inherits(curve_expr, "call") && !is.character(curve_expr)) {
+    stop("`curve_expr` must be a call (e.g., quote(20/sqrt(x))) or a character string.")
+  }
+  
+  ## 2. Prepare storage ========================================================
+  K_vec <- seq_len(maxK)
+  smallest_sv <- numeric(maxK)
+  
+  ## 3. Main loop – build Q(K) and extract the smallest singular value =========
+  for (K in K_vec) {
+    Q <- compute_matrix(
+      seed = seed,
+      a = a,
+      n = n,
+      K = K,
+      sample_X_fn = sampler_fn,
+      fv = fv,
+      Fv = Fv,
+      guard = guard
+    )
+    
+    sv_res <- compute_minmax_sv(Q)
+    if (!is.list(sv_res) || is.null(sv_res$smallest_singular_value)) {
+      stop("`compute_minmax_sv()` must return a list containing `$smallest_singular_value`.")
+    }
+    smallest_sv[K] <- sv_res$smallest_singular_value
+  }
+  
+  ## 4. Plotting (optional) ====================================================
+  if (plot) {
+    ## Basic scatter/line plot of the singular values
+    plot_args <- list(
+      x = K_vec,
+      y = smallest_sv,
+      type = "b",
+      pch = 19,
+      col = "steelblue",
+      xlab = "K subdivisions",
+      ylab = "Smallest singular value of Q",
+      main = "Smallest singular value vs. K"
+    )
+    if (log_plot) plot_args$log <- "y"
+    
+    do.call(graphics::plot, plot_args)
+    # graphics::plot(
+    #   K_vec, smallest_sv,
+    #   type = "b", pch = 19, col = "steelblue",
+    #   xlab = "K subdivisions", ylab = "Smallest singular value of Q",
+    #   main = "Smallest singular value vs. K"
+    # )
+    ## Add the reference curve if requested
+    if (add_curve) {
+      ## Determine sensible defaults for the curve limits
+      if (is.null(curve_from)) curve_from <- min(K_vec)
+      if (is.null(curve_to))   curve_to   <- max(K_vec)
+      
+      ## `curve()` expects an *expression* where `x` is the variable.
+      ## If the user supplied a character string we turn it into a call.
+      if (is.character(curve_expr)) curve_expr <- parse(text = curve_expr)[[1L]]
+      curve_fun <- function(x) eval(curve_expr)
+      graphics::curve(
+        expr = curve_fun,
+        from = curve_from,
+        to   = curve_to,
+        add  = TRUE,
+        col  = curve_col,
+        lwd  = curve_lwd
+      )
+      
+      # add label with the curve expression
+      label_txt <- expr_to_label(curve_expr)
+      x_pos <- curve_from + 0.9 * (curve_to - curve_from)
+      y_pos <- 0.9 * max(smallest_sv)
+      graphics::text(
+        x = x_pos, y = y_pos,
+        labels = label_txt,
+        col    = curve_col,
+        pos    = 4,          # 4 = right‑justified (so the text sits left of the point)
+        offset = 0.5,        # a small gap between the point and the text
+        cex    = 0.9        # slightly smaller than default text size
+      )
+      
+    }
+  }
+  ## 5. Return a tidy list =====================================================
+  out <- list(
+    K = K_vec,
+    sv = smallest_sv
+  )
+  # if (plot) out$plot <- NULL   ## the plot is already drawn; we return NULL for consistency
+  out
+}
+
+