# Load function ---------------------------------------------------------------- source(here::here("R", "singular_values.R")) source(here::here("R", "graphon_distribution.R")) # Convert a call or character to a nicely formatted character string. # * If the user supplied a character, we keep it unchanged. # * If the user supplied a call (e.g. quote(20 / sqrt(x))) we deparse it # and collapse the result to a single line. expr_to_label <- function(expr) { if (is.character(expr)) { expr } else { # deparse returns a character vector; collapse to one line paste(deparse(expr), collapse = "") } } #' Compute the smallest singular value of a sequence of matrices Q(K) #' #' @title Smallest singular values for a family of matrices Q(K) #' @description #' For a given vector of coefficients `a`, sample size `n` and a maximum #' rank `maxK`, this function repeatedly calls `compute_matrix()` to build a #' matrix `Q` for each rank `K = 1, …, maxK`. The smallest singular value of #' each `Q` is extracted with `compute_minmax_sv()`. The result can be #' returned as a numeric vector and, if desired, plotted together with a #' user‑supplied reference curve. #' #' @param a Numeric vector of coefficients that are passed to `compute_matrix()`. #' @param n Positive integer, the sample size used inside the sampling function. #' @param maxK Positive integer, the largest rank `K` for which a matrix `Q` #' will be built. The function will evaluate `K = 1:maxK`. #' @param seed Integer (default = 1) used as the random‑seed argument for #' `compute_matrix()`. Supplying a seed makes the whole procedure #' reproducible. #' @param sampler_fn Function that draws a sample of size `n`. It must accept a #' single argument `n` and return a **numeric matrix** with `n` rows and #' one column (the shape used in the original script). The default is a #' thin wrapper around `rnorm()`. #' @param fv Function giving the density of the underlying distribution #' (default = `dnorm`). It is passed unchanged to `compute_matrix()`. #' @param Fv Function giving the cumulative distribution function #' (default = `pnorm`). It is passed unchanged to `compute_matrix()`. #' @param guard Positive numeric guard used inside `compute_matrix()` to avoid #' division‑by‑zero or log‑of‑zero problems (default = `1e-12`). #' @param plot Logical, whether to produce a quick base‑R plot of the #' smallest singular values (`TRUE` by default). If `FALSE` the function #' only returns the numeric vector. #' @param add_curve Logical, whether to overlay a reference curve on the plot. #' Ignored when `plot = FALSE`. Default = `TRUE`. #' @param curve_expr Expression (as a *character* or *call*) that defines the #' reference curve. The default reproduces the line you used #' `20 / sqrt(x)`. The expression must be a valid R expression in which the #' variable `x` stands for the horizontal axis. #' @param curve_from,curve_to Numeric limits for the reference curve. By #' default they are set to the range of `K` (`1:maxK`). #' @param curve_col Colour of the reference curve (default = `"red"`). #' @param curve_lwd Line width of the reference curve (default = 2). #' @param log_plot If True, then the y-axis is on a log scale. #' @param main_title Main title for the plot #' @return A list with the following components #' \item{K}{Integer vector `1:maxK`.} #' \item{sv}{Numeric vector of the smallest singular values for each `K`.} #' \item{plot}{If `plot = TRUE`, the value returned by `graphics::plot()` #' (normally `NULL`). If `plot = FALSE` this element is omitted.} #' @examples #' ## Run the whole routine with the defaults #' res <- smallest_sv_sequence( #' a = c(0.4), n = 400, maxK = 20, #' seed = 3, guard = 1e-12 #' ) #' head(res$sv) #' #' ## Supply a custom sampler (e.g., uniform) #' my_sampler <- function(m) matrix(runif(m, -1, 1), ncol = 1) #' res2 <- smallest_sv_sequence( #' a = c(0.4), n = 400, maxK = 10, #' sampler_fn = my_sampler, #' plot = FALSE #' ) #' plot(res2$K, res2$sv, type = "b") #' @importFrom graphics plot lines curve #' @export smallest_sv_sequence <- function( a, n, maxK, seed = 1L, sampler_fn = function(m) matrix(rnorm(m), ncol = 1L), fv = dnorm, Fv = pnorm, guard = 1e-12, plot = TRUE, add_curve = TRUE, curve_expr = quote(20 / sqrt(x)), curve_from = NULL, curve_to = NULL, curve_col = "red", curve_lwd = 2, log_plot = FALSE, main_title = "Smallest singular value vs. K" ) { ## 1. Input validation ======================================================= if (!is.numeric(a) || length(a) == 0) { stop("`a` must be a non‑empty numeric vector.") } if (!is.numeric(n) || length(n) != 1L || n <= 0 || n != as.integer(n)) { stop("`n` must be a positive integer.") } if (!is.numeric(maxK) || length(maxK) != 1L || maxK <= 0 || maxK != as.integer(maxK)) { stop("`maxK` must be a positive integer.") } if (!is.function(sampler_fn)) { stop("`sampler_fn` must be a function that takes a single integer argument `n`.") } if (!is.function(fv) || !is.function(Fv)) { stop("`fv` and `Fv` must be corresponding density functions and cdf (e.g., dnorm/ pnorm).") } if (!is.numeric(guard) || length(guard) != 1L || guard <= 0) { stop("`guard` must be a single positive numeric value.") } if (!is.logical(plot) || length(plot) != 1L) { stop("`plot` must be a single logical value.") } if (!is.logical(add_curve) || length(add_curve) != 1L) { stop("`add_curve` must be a single logical value.") } if (!inherits(curve_expr, "call") && !is.character(curve_expr)) { stop("`curve_expr` must be a call (e.g., quote(20/sqrt(x))) or a character string.") } if (!is.character(main_title)){ stop("`main_title` must be a character vector.") } ## 2. Prepare storage ======================================================== K_vec <- seq_len(maxK) smallest_sv <- numeric(maxK) ## 3. Main loop – build Q(K) and extract the smallest singular value ========= for (K in K_vec) { Q <- compute_matrix( seed = seed, a = a, n = n, K = K, sample_X_fn = sampler_fn, fv = fv, Fv = Fv, guard = guard ) Q <- 1 /sqrt(n) * Q sv_res <- compute_minmax_sv(Q) if (!is.list(sv_res) || is.null(sv_res$smallest_singular_value)) { stop("`compute_minmax_sv()` must return a list containing `$smallest_singular_value`.") } smallest_sv[K] <- sv_res$smallest_singular_value } ## 4. Plotting (optional) ==================================================== if (plot) { ## Basic scatter/line plot of the singular values par(mar = c(5, 4, 4, 8)) # extra space on the right for the legend plot_args <- list( x = K_vec, y = smallest_sv, type = "b", pch = 19, col = "steelblue", xlab = "K subdivisions", ylab = "Smallest singular value of Q", main = main_title ) if (log_plot) plot_args$log <- "y" do.call(graphics::plot, plot_args) # add legend. The par(xpd = ...) allows drawing outside of the plot region. par(xpd = TRUE) legend("topright", inset=c(-0.2,0), legend=c("SV of Q"), col="steelblue", title="Legend", pch = 16, bty = "n") par(xpd = FALSE) ## Add the reference curve if requested if (add_curve) { ## Determine sensible defaults for the curve limits if (is.null(curve_from)) curve_from <- min(K_vec) if (is.null(curve_to)) curve_to <- max(K_vec) ## `curve()` expects an *expression* where `x` is the variable. ## If the user supplied a character string we turn it into a call. if (is.character(curve_expr)) curve_expr <- parse(text = curve_expr)[[1L]] curve_fun <- function(x) eval(curve_expr) graphics::curve( expr = curve_fun, from = curve_from, to = curve_to, add = TRUE, col = curve_col, lwd = curve_lwd ) # add label with the curve expression label_txt <- expr_to_label(curve_expr) x_pos <- curve_from + 0.8 * (curve_to - curve_from) y_pos <- 0.85 * max(smallest_sv) graphics::text( x = x_pos, y = y_pos, labels = label_txt, col = curve_col, pos = 4, # 4 = right‑justified (so the text sits left of the point) offset = 0.5, # a small gap between the point and the text cex = 0.9 # slightly smaller than default text size ) } } ## 5. Return a tidy list ===================================================== out <- list( K = K_vec, sv = smallest_sv ) # if (plot) out$plot <- NULL ## the plot is already drawn; we return NULL for consistency out }