Files
GraphonSimulation/R/singular_value_plot.R
2026-02-11 19:00:56 +01:00

235 lines
8.8 KiB
R
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
# Load function ----------------------------------------------------------------
source(here::here("R", "singular_values.R"))
source(here::here("R", "graphon_distribution.R"))
# Convert a call or character to a nicely formatted character string.
# * If the user supplied a character, we keep it unchanged.
# * If the user supplied a call (e.g. quote(20 / sqrt(x))) we deparse it
# and collapse the result to a single line.
expr_to_label <- function(expr) {
if (is.character(expr)) {
expr
} else {
# deparse returns a character vector; collapse to one line
paste(deparse(expr), collapse = "")
}
}
#' Compute the smallest singular value of a sequence of matrices Q(K)
#'
#' @title Smallest singular values for a family of matrices Q(K)
#' @description
#' For a given vector of coefficients `a`, sample size `n` and a maximum
#' rank `maxK`, this function repeatedly calls `compute_matrix()` to build a
#' matrix `Q` for each rank `K = 1, …, maxK`. The smallest singular value of
#' each `Q` is extracted with `compute_minmax_sv()`. The result can be
#' returned as a numeric vector and, if desired, plotted together with a
#' usersupplied reference curve.
#'
#' @param a Numeric vector of coefficients that are passed to `compute_matrix()`.
#' @param n Positive integer, the sample size used inside the sampling function.
#' @param maxK Positive integer, the largest rank `K` for which a matrix `Q`
#' will be built. The function will evaluate `K = 1:maxK`.
#' @param seed Integer (default = 1) used as the randomseed argument for
#' `compute_matrix()`. Supplying a seed makes the whole procedure
#' reproducible.
#' @param sampler_fn Function that draws a sample of size `n`. It must accept a
#' single argument `n` and return a **numeric matrix** with `n` rows and
#' one column (the shape used in the original script). The default is a
#' thin wrapper around `rnorm()`.
#' @param fv Function giving the density of the underlying distribution
#' (default = `dnorm`). It is passed unchanged to `compute_matrix()`.
#' @param Fv Function giving the cumulative distribution function
#' (default = `pnorm`). It is passed unchanged to `compute_matrix()`.
#' @param guard Positive numeric guard used inside `compute_matrix()` to avoid
#' divisionbyzero or logofzero problems (default = `1e-12`).
#' @param plot Logical, whether to produce a quick baseR plot of the
#' smallest singular values (`TRUE` by default). If `FALSE` the function
#' only returns the numeric vector.
#' @param add_curve Logical, whether to overlay a reference curve on the plot.
#' Ignored when `plot = FALSE`. Default = `TRUE`.
#' @param curve_expr Expression (as a *character* or *call*) that defines the
#' reference curve. The default reproduces the line you used
#' `20 / sqrt(x)`. The expression must be a valid R expression in which the
#' variable `x` stands for the horizontal axis.
#' @param curve_from,curve_to Numeric limits for the reference curve. By
#' default they are set to the range of `K` (`1:maxK`).
#' @param curve_col Colour of the reference curve (default = `"red"`).
#' @param curve_lwd Line width of the reference curve (default = 2).
#' @param log_plot If True, then the y-axis is on a log scale.
#' @param main_title Main title for the plot
#' @return A list with the following components
#' \item{K}{Integer vector `1:maxK`.}
#' \item{sv}{Numeric vector of the smallest singular values for each `K`.}
#' \item{plot}{If `plot = TRUE`, the value returned by `graphics::plot()`
#' (normally `NULL`). If `plot = FALSE` this element is omitted.}
#' @examples
#' ## Run the whole routine with the defaults
#' res <- smallest_sv_sequence(
#' a = c(0.4), n = 400, maxK = 20,
#' seed = 3, guard = 1e-12
#' )
#' head(res$sv)
#'
#' ## Supply a custom sampler (e.g., uniform)
#' my_sampler <- function(m) matrix(runif(m, -1, 1), ncol = 1)
#' res2 <- smallest_sv_sequence(
#' a = c(0.4), n = 400, maxK = 10,
#' sampler_fn = my_sampler,
#' plot = FALSE
#' )
#' plot(res2$K, res2$sv, type = "b")
#' @importFrom graphics plot lines curve
#' @export
smallest_sv_sequence <- function(
a,
n,
maxK,
seed = 1L,
sampler_fn = function(m) matrix(rnorm(m), ncol = 1L),
fv = dnorm,
Fv = pnorm,
guard = 1e-12,
plot = TRUE,
add_curve = TRUE,
curve_expr = quote(20 / sqrt(x)),
curve_from = NULL,
curve_to = NULL,
curve_col = "red",
curve_lwd = 2,
log_plot = FALSE,
main_title = "Smallest singular value vs. K"
) {
## 1. Input validation =======================================================
if (!is.numeric(a) || length(a) == 0) {
stop("`a` must be a nonempty numeric vector.")
}
if (!is.numeric(n) || length(n) != 1L || n <= 0 || n != as.integer(n)) {
stop("`n` must be a positive integer.")
}
if (!is.numeric(maxK) || length(maxK) != 1L || maxK <= 0 ||
maxK != as.integer(maxK)) {
stop("`maxK` must be a positive integer.")
}
if (!is.function(sampler_fn)) {
stop("`sampler_fn` must be a function that takes a single integer argument `n`.")
}
if (!is.function(fv) || !is.function(Fv)) {
stop("`fv` and `Fv` must be corresponding density functions and cdf (e.g., dnorm/ pnorm).")
}
if (!is.numeric(guard) || length(guard) != 1L || guard <= 0) {
stop("`guard` must be a single positive numeric value.")
}
if (!is.logical(plot) || length(plot) != 1L) {
stop("`plot` must be a single logical value.")
}
if (!is.logical(add_curve) || length(add_curve) != 1L) {
stop("`add_curve` must be a single logical value.")
}
if (!inherits(curve_expr, "call") && !is.character(curve_expr)) {
stop("`curve_expr` must be a call (e.g., quote(20/sqrt(x))) or a character string.")
}
if (!is.character(main_title)){
stop("`main_title` must be a character vector.")
}
## 2. Prepare storage ========================================================
K_vec <- seq_len(maxK)
smallest_sv <- numeric(maxK)
## 3. Main loop build Q(K) and extract the smallest singular value =========
for (K in K_vec) {
Q <- compute_matrix(
seed = seed,
a = a,
n = n,
K = K,
sample_X_fn = sampler_fn,
fv = fv,
Fv = Fv,
guard = guard
)
Q <- 1 /sqrt(n) * Q
sv_res <- compute_minmax_sv(Q)
if (!is.list(sv_res) || is.null(sv_res$smallest_singular_value)) {
stop("`compute_minmax_sv()` must return a list containing `$smallest_singular_value`.")
}
smallest_sv[K] <- sv_res$smallest_singular_value
}
## 4. Plotting (optional) ====================================================
if (plot) {
## Basic scatter/line plot of the singular values
par(mar = c(5, 4, 4, 8)) # extra space on the right for the legend
plot_args <- list(
x = K_vec,
y = smallest_sv,
type = "b",
pch = 19,
col = "steelblue",
xlab = "K subdivisions",
ylab = "Smallest singular value of Q",
main = main_title
)
if (log_plot) plot_args$log <- "y"
do.call(graphics::plot, plot_args)
# add legend. The par(xpd = ...) allows drawing outside of the plot region.
par(xpd = TRUE)
legend("topright",
inset=c(-0.2,0),
legend=c("SV of Q"),
col="steelblue",
title="Legend",
pch = 16,
bty = "n")
par(xpd = FALSE)
## Add the reference curve if requested
if (add_curve) {
## Determine sensible defaults for the curve limits
if (is.null(curve_from)) curve_from <- min(K_vec)
if (is.null(curve_to)) curve_to <- max(K_vec)
## `curve()` expects an *expression* where `x` is the variable.
## If the user supplied a character string we turn it into a call.
if (is.character(curve_expr)) curve_expr <- parse(text = curve_expr)[[1L]]
curve_fun <- function(x) eval(curve_expr)
graphics::curve(
expr = curve_fun,
from = curve_from,
to = curve_to,
add = TRUE,
col = curve_col,
lwd = curve_lwd
)
# add label with the curve expression
label_txt <- expr_to_label(curve_expr)
x_pos <- curve_from + 0.8 * (curve_to - curve_from)
y_pos <- 0.85 * max(smallest_sv)
graphics::text(
x = x_pos, y = y_pos,
labels = label_txt,
col = curve_col,
pos = 4, # 4 = rightjustified (so the text sits left of the point)
offset = 0.5, # a small gap between the point and the text
cex = 0.9 # slightly smaller than default text size
)
}
}
## 5. Return a tidy list =====================================================
out <- list(
K = K_vec,
sv = smallest_sv
)
# if (plot) out$plot <- NULL ## the plot is already drawn; we return NULL for consistency
out
}