Compare commits
27 Commits
85960de4d4
...
main
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
9d9d274487 | ||
|
|
d0e0c03428 | ||
|
|
37f235915c | ||
|
|
25fe0903be | ||
|
|
dcb1468381 | ||
|
|
106c37a1b6 | ||
|
|
d13eee49ea | ||
|
|
5acc3f4259 | ||
|
|
0921f7eb04 | ||
|
|
9b702d154a | ||
|
|
fe36738ea1 | ||
|
|
a8d7924e82 | ||
|
|
5648083823 | ||
|
|
fcdad672c7 | ||
|
|
76f982069e | ||
|
|
14b4425570 | ||
|
|
8517c5534d | ||
|
|
5cd52f0c5f | ||
|
|
bc1f1cc477 | ||
|
|
87066070b0 | ||
|
|
56125e7099 | ||
|
|
9db48a9a33 | ||
|
|
c2c759bb04 | ||
|
|
b18113a0ea | ||
|
|
cbf8e63f94 | ||
|
|
5be86fef69 | ||
|
|
16a1405f16 |
5
.gitignore
vendored
5
.gitignore
vendored
@@ -49,3 +49,8 @@ po/*~
|
|||||||
# RStudio Connect folder
|
# RStudio Connect folder
|
||||||
rsconnect/
|
rsconnect/
|
||||||
|
|
||||||
|
|
||||||
|
/.quarto/
|
||||||
|
**/*.quarto_ipynb
|
||||||
|
_freeze/
|
||||||
|
.positai
|
||||||
|
|||||||
137
R/build_network.R
Normal file
137
R/build_network.R
Normal file
@@ -0,0 +1,137 @@
|
|||||||
|
# Load required helper (the file must define `pgraphon`)
|
||||||
|
if (requireNamespace("here", quietly = TRUE)) {
|
||||||
|
source(here::here("R", "graphon_distribution.R"))
|
||||||
|
} else {
|
||||||
|
stop("Package `here` is required to locate the helper file.")
|
||||||
|
}
|
||||||
|
|
||||||
|
# TODO: Check the naming consistency across the documentation
|
||||||
|
|
||||||
|
#' Compute an adjacency matrix from a graphon model
|
||||||
|
#'
|
||||||
|
#' @title Generate a the adjacency matrix using a graphon specification
|
||||||
|
#' @description
|
||||||
|
#' Given a covariate matrix `X_matrix`, a vector of intercepts `v`, a
|
||||||
|
#' coefficient vector `a`, a symmetric graphon kernel `phi`, and a scaling
|
||||||
|
#' factor `rho_n`, this function draws a random undirected adjacency matrix.
|
||||||
|
#' The construction follows the steps:
|
||||||
|
#' 1. Compute latent variables \eqn{\xi_i = F_v(a^\top X_i + v_i)}.
|
||||||
|
#' 2. Generate a symmetric matrix of i.i.d. Uniform$[0,1]$ random numbers
|
||||||
|
#' with ones on the diagonal.
|
||||||
|
#' 3. Evaluate the graphon probabilities \eqn{p_{ij}= \rho_n \, \phi(\xi_i,\xi_j)}.
|
||||||
|
#' 4. Set \eqn{A_{ij}=1} if the uniform draw is smaller than the probability,
|
||||||
|
#' otherwise \eqn{0}. The resulting matrix is symmetric with zeros on the
|
||||||
|
#' diagonal.
|
||||||
|
#'
|
||||||
|
#' @param X_matrix A numeric matrix of dimension *n \times d* (covariates).
|
||||||
|
#' Must not contain `NA` or `NaN` values.
|
||||||
|
#' @param v A numeric vector of length *n* (intercepts).
|
||||||
|
#' Must be the same length as the number of rows of `X_matrix`.
|
||||||
|
#' @param a A numeric vector of length *d* (coefficients).
|
||||||
|
#' Must be the same length as the number of columns of `X_matrix`.
|
||||||
|
#' @param phi A binary function `phi(x, y)` returning a numeric value.
|
||||||
|
#' It should be symmetric (`phi(x, y) == phi(y, x)`) and bounded in $[0,1]$.
|
||||||
|
#' @param rho_n A numeric scalar in $[0,1]$ that scales the graphon.
|
||||||
|
#' @param Fv A cumulative distribution function (CDF) used to transform the
|
||||||
|
#' linear predictor. Defaults to the standard normal CDF `pnorm`.
|
||||||
|
#' Must accept a numeric vector and return a numeric vector of the same length.
|
||||||
|
#'
|
||||||
|
#' @return An *n* × *n* integer matrix (`0L`/`1L`) representing the adjacency
|
||||||
|
#' matrix of an undirected graph.
|
||||||
|
#'
|
||||||
|
#' @examples
|
||||||
|
#' ## Simple example with a multiplicative graphon
|
||||||
|
#' set.seed(1)
|
||||||
|
#' X <- matrix(seq(-1, 1, length.out = 5), ncol = 1)
|
||||||
|
#' a <- 2
|
||||||
|
#' v <- seq(0, 0.8, length.out = 5)
|
||||||
|
#' phi_fun <- function(x, y) x * y # multiplicative kernel
|
||||||
|
#' adj <- compute_adj_matrix(
|
||||||
|
#' X_matrix = X,
|
||||||
|
#' v = v,
|
||||||
|
#' a = a,
|
||||||
|
#' phi = phi_fun,
|
||||||
|
#' rho_n = 0.5,
|
||||||
|
#' Fv = punif
|
||||||
|
#' )
|
||||||
|
#' adj
|
||||||
|
#'
|
||||||
|
#' @export
|
||||||
|
compute_adj_matrix <- function(
|
||||||
|
X_matrix,
|
||||||
|
v,
|
||||||
|
a,
|
||||||
|
phi,
|
||||||
|
rho_n,
|
||||||
|
Fv = pnorm
|
||||||
|
) {
|
||||||
|
## -------------------------- Guard rails -------------------------- ##
|
||||||
|
# Missing / NULL checks
|
||||||
|
if (missing(X_matrix) || is.null(X_matrix)) {
|
||||||
|
stop("`X_matrix` is missing or NULL.")
|
||||||
|
}
|
||||||
|
if (missing(v) || is.null(v)) {
|
||||||
|
stop("`v` is missing or NULL.")
|
||||||
|
}
|
||||||
|
if (missing(a) || is.null(a)) {
|
||||||
|
stop("`a` is missing or NULL.")
|
||||||
|
}
|
||||||
|
if (missing(phi) || is.null(phi)) {
|
||||||
|
stop("`phi` is missing or NULL.")
|
||||||
|
}
|
||||||
|
if (missing(rho_n) || is.null(rho_n)) {
|
||||||
|
stop("`rho_n` is missing or NULL.")
|
||||||
|
}
|
||||||
|
if (missing(Fv) || is.null(Fv)) {
|
||||||
|
stop("`Fv` is missing or NULL.")
|
||||||
|
}
|
||||||
|
|
||||||
|
# Type / dimension checks
|
||||||
|
if (!is.matrix(X_matrix) || !is.numeric(X_matrix)) {
|
||||||
|
stop("`X_matrix` must be a numeric matrix.")
|
||||||
|
}
|
||||||
|
if (any(is.na(X_matrix) | is.nan(X_matrix))) {
|
||||||
|
stop("`X_matrix` must not contain NA or NaN values.")
|
||||||
|
}
|
||||||
|
n <- nrow(X_matrix)
|
||||||
|
d <- ncol(X_matrix)
|
||||||
|
|
||||||
|
if (!is.numeric(v) || length(v) != n) {
|
||||||
|
stop("`v` must be a numeric vector of length equal to nrow(`X_matrix`).")
|
||||||
|
}
|
||||||
|
if (!is.numeric(a) || length(a) != d) {
|
||||||
|
stop("`a` must be a numeric vector of length equal to ncol(`X_matrix`).")
|
||||||
|
}
|
||||||
|
if (!is.function(phi)) {
|
||||||
|
stop("`phi` must be a function of two numeric arguments.")
|
||||||
|
}
|
||||||
|
if (!is.numeric(rho_n) || length(rho_n) != 1 || rho_n < 0 || rho_n > 1) {
|
||||||
|
stop("`rho_n` must be a numeric scalar in [0, 1].")
|
||||||
|
}
|
||||||
|
if (!is.function(Fv)) {
|
||||||
|
stop("`Fv` must be a function (CDF) that accepts a numeric vector.")
|
||||||
|
}
|
||||||
|
|
||||||
|
## -------------------------- Core computation -------------------------- ##
|
||||||
|
# 1. Latent variables ξ_i = Fv(aᵀ X_i + v_i)
|
||||||
|
lin_pred <- as.vector(X_matrix %*% a) + v
|
||||||
|
xi <- pgraphon(y = lin_pred, a = a, Fv = Fv, X_matrix = X_matrix)
|
||||||
|
|
||||||
|
# 2. Symmetric matrix of Uniform[0,1] draws with 1 on the diagonal
|
||||||
|
# (only the upper triangle is sampled, then mirrored)
|
||||||
|
rand_mat <- matrix(0, n, n)
|
||||||
|
upper_idx <- which(upper.tri(rand_mat, diag = FALSE), arr.ind = TRUE)
|
||||||
|
rand_mat[upper_idx] <- runif(n * (n - 1) / 2)
|
||||||
|
rand_mat <- rand_mat + t(rand_mat) + diag(1, n)
|
||||||
|
|
||||||
|
# 3. Graphon probabilities p_ij = rho_n * phi(ξ_i, ξ_j)
|
||||||
|
graphon_fun <- function(x, y) rho_n * phi(x, y)
|
||||||
|
probabilities <- outer(xi, xi, FUN = graphon_fun)
|
||||||
|
|
||||||
|
# 4. Adjacency matrix: A_ij = 1 if U_ij < p_ij, else 0
|
||||||
|
adj_mat <- (rand_mat < probabilities) * 1L
|
||||||
|
|
||||||
|
## Return the adjacency matrix
|
||||||
|
adj_mat
|
||||||
|
}
|
||||||
|
|
||||||
246
R/estimators.R
Normal file
246
R/estimators.R
Normal file
@@ -0,0 +1,246 @@
|
|||||||
|
# load local files
|
||||||
|
source(here::here("R", "singular_values.R"))
|
||||||
|
source(here::here("R", "graphon_distribution.R"))
|
||||||
|
source(here::here("R","singular_value_plot.R"))
|
||||||
|
source(here::here("R", "build_network.R"))
|
||||||
|
|
||||||
|
# Helper functions -------------------------------------------------------------
|
||||||
|
# helper function for wrapping the parameters of the Q_a creation function
|
||||||
|
# TODO rename this function
|
||||||
|
make_matrix_creation <- function(seed, n, K, matrix_X, fv, Fv, guard) {
|
||||||
|
function(a) {
|
||||||
|
compute_matrix(seed=seed, a, n=n, K=K, matrix_X = matrix_X, fv=fv, Fv=Fv, guard=guard)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
# estimators -------------------------------------------------------------------
|
||||||
|
|
||||||
|
## Moore-Penrose Inverse -------------------------------------------------------
|
||||||
|
#' Moore‑Penrose pseudoinverse of a matrix
|
||||||
|
#'
|
||||||
|
#' Computes the Moore‑Penrose generalized inverse of a numeric matrix using a
|
||||||
|
#' singular‑value decomposition (SVD). Singular values smaller than the
|
||||||
|
#' tolerance are treated as zero to improve numerical stability.
|
||||||
|
#'
|
||||||
|
#' @param A A numeric matrix (or an object coercible to a matrix) whose
|
||||||
|
#' pseudoinverse is required.
|
||||||
|
#' @param tol Tolerance for treating singular values as zero. By default it
|
||||||
|
#' is set to `max(dim(A)) * max(svd(A)$d) * .Machine$double.eps`, which
|
||||||
|
#' scales with the size of the matrix and machine precision.
|
||||||
|
#' @return A matrix representing the Moore‑Penrose inverse of `A`. The
|
||||||
|
#' dimensions of the result are `ncol(A) × nrow(A)`.
|
||||||
|
#' @examples
|
||||||
|
#' set.seed(123)
|
||||||
|
#' A <- matrix(rnorm(12), nrow = 3)
|
||||||
|
#' A_pinv <- pinv(A)
|
||||||
|
#' # Verify the Moore‑Penrose properties (A %*% A_pinv %*% A ≈ A)
|
||||||
|
#' all.equal(A %*% A_pinv %*% A, A)
|
||||||
|
#' @importFrom stats svd
|
||||||
|
#' @export
|
||||||
|
pinv <- function(A, tol = NULL) {
|
||||||
|
# Coerce to matrix and check type
|
||||||
|
if (!is.matrix(A)) {
|
||||||
|
A <- as.matrix(A)
|
||||||
|
}
|
||||||
|
if (!is.numeric(A)) {
|
||||||
|
stop("`A` must be a numeric matrix.", call. = FALSE)
|
||||||
|
}
|
||||||
|
|
||||||
|
# Singular value decomposition
|
||||||
|
s <- svd(A)
|
||||||
|
|
||||||
|
# Determine tolerance if not supplied
|
||||||
|
if (is.null(tol)) {
|
||||||
|
tol <- max(dim(A)) * max(s$d) * .Machine$double.eps
|
||||||
|
}
|
||||||
|
|
||||||
|
# Invert non‑zero singular values
|
||||||
|
d_inv <- ifelse(s$d > tol, 1 / s$d, 0)
|
||||||
|
|
||||||
|
# Construct diagonal matrix of inverted singular values
|
||||||
|
D_plus <- diag(d_inv, nrow = length(d_inv))
|
||||||
|
|
||||||
|
# Moore‑Penrose inverse: V %*% D⁺ %*% t(U)
|
||||||
|
s$v %*% D_plus %*% t(s$u)
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
## Estimate Matrix B -----------------------------------------------------------
|
||||||
|
#' Estimate the matrix \$B\$ for a graphon model
|
||||||
|
#'
|
||||||
|
#' For a given graphon scaling parameter `rho_n`, a square matrix `Q_a`, and an
|
||||||
|
#' adjacency matrix `A`, this function computes
|
||||||
|
|
||||||
|
#' $ B = \\rho_n \, Q_a^{+\\,T} \, A \, Q_a^{+} $
|
||||||
|
|
||||||
|
#' where `Q_a^{+}` denotes the Moore‑Penrose pseudoinverse of `Q_a`.
|
||||||
|
#'
|
||||||
|
#' @param rho_n Numeric scalar. The graphon scaling parameter.
|
||||||
|
#' @param Q_a Square numeric matrix. TODO: write description of the Matrix
|
||||||
|
#' @param A Square numeric adjacency matrix (same dimension as `Q_a`).
|
||||||
|
#' @return A numeric matrix of the same dimension as `Q_a` representing the
|
||||||
|
#' estimated \$B\$.
|
||||||
|
#' @examples
|
||||||
|
#' set.seed(42)
|
||||||
|
#' n <- 5
|
||||||
|
#' Q_a <- diag(n) # simple identity basis
|
||||||
|
#' A <- matrix(rbinom(n^2, 1, 0.3), n, n)
|
||||||
|
#' rho_n <- 0.5
|
||||||
|
#' B_est <- estimate_B_matrix(rho_n, Q_a, A)
|
||||||
|
#' str(B_est)
|
||||||
|
#' @importFrom stats svd
|
||||||
|
#' @export
|
||||||
|
estimate_B_matrix <- function(rho_n, Q_a, A) {
|
||||||
|
# ---- Input checks ---------------------------------------------------------
|
||||||
|
if (!is.numeric(rho_n) || length(rho_n) != 1) {
|
||||||
|
stop("`rho_n` must be a single numeric value.", call. = FALSE)
|
||||||
|
}
|
||||||
|
if (!is.matrix(Q_a) || !is.numeric(Q_a)) {
|
||||||
|
stop("`Q_a` must be a numeric matrix.", call. = FALSE)
|
||||||
|
}
|
||||||
|
if (!is.matrix(A) || !is.numeric(A)) {
|
||||||
|
stop("`A` must be a numeric matrix.", call. = FALSE)
|
||||||
|
}
|
||||||
|
if (nrow(A) != ncol(A)) {
|
||||||
|
stop("`A` must be square.", call. = FALSE)
|
||||||
|
}
|
||||||
|
if (ncol(Q_a) != nrow(A)) {
|
||||||
|
stop("Dimensions of `Q_a` and `A` must agree for matrix multiplication.", call. = FALSE)
|
||||||
|
}
|
||||||
|
|
||||||
|
# ---- Compute pseudoinverse ------------------------------------------------
|
||||||
|
pinv_Qa <- pinv(Q_a) # assumes `pinv()` is available in the namespace
|
||||||
|
|
||||||
|
# ---- Estimate B -----------------------------------------------------------
|
||||||
|
B <- rho_n * (t(pinv_Qa) %*% A %*% pinv_Qa)
|
||||||
|
|
||||||
|
B
|
||||||
|
}
|
||||||
|
|
||||||
|
# TODO rename this function
|
||||||
|
# TODO test the convergence of the function estimate_B
|
||||||
|
# with given graphon (block function, Hölder continuous functions)
|
||||||
|
# and given K with growing N
|
||||||
|
# and other options
|
||||||
|
# plot the loss function with respect to a
|
||||||
|
estimate_a <- function(A, # adjacency matrix
|
||||||
|
a0, # start value
|
||||||
|
n,
|
||||||
|
K,
|
||||||
|
sample_X_fn,
|
||||||
|
fv,
|
||||||
|
Fv,
|
||||||
|
guard
|
||||||
|
) {
|
||||||
|
calc_Q_a <- make_matrix_creation(seed, n, K, sample_X_fn, fv, Fv, guard)
|
||||||
|
|
||||||
|
loss_func <- function(a) {
|
||||||
|
Q_a <- calc_Q_a(a)
|
||||||
|
pinv_Qa <- pinv(Q_a)
|
||||||
|
|
||||||
|
norm(pinv_Qa %*% Q_a %*% A %*% pinv_Qa %*% Q_a - A, type="F")^2
|
||||||
|
}
|
||||||
|
|
||||||
|
return(loss_func)
|
||||||
|
}
|
||||||
|
|
||||||
|
#' Calculate the edge density of an undirected graph
|
||||||
|
#'
|
||||||
|
#' @title Edge density from an adjacency matrix
|
||||||
|
#' @description Computes the proportion of possible edges that are present in an
|
||||||
|
#' undirected, unweighted graph represented by a square adjacency matrix. The
|
||||||
|
#' density is defined as \eqn{2E / (V(V-1))} where \eqn{E} is the number of edges
|
||||||
|
#' and \eqn{V} is the number of vertices. This corresponds to the parameter
|
||||||
|
#' \eqn{rho_n}.
|
||||||
|
#' The function checks that the input is a square matrix and treats any
|
||||||
|
#' non‑numeric or missing entries as absent edges.
|
||||||
|
#'
|
||||||
|
#' @param adj_matrix A square numeric matrix representing the adjacency matrix
|
||||||
|
#' of an undirected graph. Entries should be 0/1 (or any truthy numeric) with
|
||||||
|
#' `adj_matrix[i, j] == adj_matrix[j, i]`. Missing values are ignored.
|
||||||
|
#'
|
||||||
|
#' @return A single numeric value giving the edge density \eqn{rho}.
|
||||||
|
#'
|
||||||
|
#' @examples
|
||||||
|
#' # Simple triangle graph (3 nodes, 3 edges)
|
||||||
|
#' A_tri <- matrix(c(0,1,1,
|
||||||
|
#' 1,0,1,
|
||||||
|
#' 1,1,0), nrow = 3, byrow = TRUE)
|
||||||
|
#' calculate_edge_density(A_tri)
|
||||||
|
#'
|
||||||
|
#' # Empty graph (no edges)
|
||||||
|
#' A_empty <- matrix(0, nrow = 4, ncol = 4)
|
||||||
|
#' calculate_edge_density(A_empty)
|
||||||
|
#'
|
||||||
|
#' @export
|
||||||
|
calculate_edge_density <- function(adj_matrix) {
|
||||||
|
# -------------------------------------------------------------------------
|
||||||
|
# Validate input
|
||||||
|
# -------------------------------------------------------------------------
|
||||||
|
if (!is.matrix(adj_matrix)) {
|
||||||
|
stop("`adj_matrix` must be a matrix.")
|
||||||
|
}
|
||||||
|
if (nrow(adj_matrix) != ncol(adj_matrix)) {
|
||||||
|
stop("`adj_matrix` must be square (same number of rows and columns).")
|
||||||
|
}
|
||||||
|
|
||||||
|
# -------------------------------------------------------------------------
|
||||||
|
# Count edges and nodes
|
||||||
|
# -------------------------------------------------------------------------
|
||||||
|
edge_idx <- which(upper.tri(adj_matrix, diag = FALSE), arr.ind = TRUE)
|
||||||
|
edge_count <- sum(adj_matrix[edge_idx], na.rm = TRUE)
|
||||||
|
node_count <- nrow(adj_matrix)
|
||||||
|
|
||||||
|
# -------------------------------------------------------------------------
|
||||||
|
# Compute density
|
||||||
|
# -------------------------------------------------------------------------
|
||||||
|
rho <- (2 * edge_count) / (node_count * (node_count-1))
|
||||||
|
|
||||||
|
return(rho)
|
||||||
|
}
|
||||||
|
# test the estimator routines
|
||||||
|
seed <- 17L # 121L this seed works exceptionally well
|
||||||
|
set.seed(seed)
|
||||||
|
#X <- matrix(seq(-1, 1, length.out = 5), ncol = 1)
|
||||||
|
a <- 20
|
||||||
|
n <- 400
|
||||||
|
K <- 4
|
||||||
|
rho_n <- log(n) / n
|
||||||
|
sample_X_fn <- function(n) {matrix(rnorm(n), ncol = 1L)}
|
||||||
|
X <- sample_X_fn(n)
|
||||||
|
fv <- function(x) {dnorm(x, mean=0, sd=1)}
|
||||||
|
Fv <- function(x) {pnorm(x, mean=0, sd=1)}
|
||||||
|
guard <- 1e-12
|
||||||
|
v <- rnorm(n) #seq(0, 0.8, length.out = n)
|
||||||
|
phi_fun <-Vectorize(function(x, y) ifelse(((x > 0.5 && y <= 0.5) || (x <= 0.5 && y > 0.5)), 1.6, 0.4)) # multiplicative kernel
|
||||||
|
adj <- compute_adj_matrix(
|
||||||
|
X_matrix = X,
|
||||||
|
v = v,
|
||||||
|
a = a,
|
||||||
|
phi = phi_fun,
|
||||||
|
rho_n = rho_n,
|
||||||
|
Fv = Fv
|
||||||
|
)
|
||||||
|
adj
|
||||||
|
|
||||||
|
# Q_a matrix
|
||||||
|
Qa <- compute_matrix(seed, a=a, n=n, K=K, fv=fv, Fv=Fv, guard=guard, matrix_X=X)
|
||||||
|
|
||||||
|
calc_Q_a <- make_matrix_creation(seed, n=n, K=K, matrix_X = X, fv=fv, Fv=Fv, guard=guard)
|
||||||
|
|
||||||
|
loss_func <- function(a) {
|
||||||
|
Q_a <- calc_Q_a(a)
|
||||||
|
pinv_Qa <- pinv(Q_a)
|
||||||
|
|
||||||
|
norm(pinv_Qa %*% Q_a %*% adj %*% pinv_Qa %*% Q_a - adj, type="F")^2
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
plot_as <- seq(-10, 100, length.out=500)
|
||||||
|
loss_vals <- sapply(plot_as, loss_func)
|
||||||
|
plot(plot_as , loss_vals, type="b")
|
||||||
|
abline(v=a)
|
||||||
|
title("Test plot of the loss function")
|
||||||
|
|
||||||
|
optim(25, loss_func, lower=10, upper=100, method="Brent", control=list(fnscale=1))
|
||||||
@@ -1,6 +1,6 @@
|
|||||||
# Load the general quantile function, the loading can be improved later
|
# Load the general quantile function, the loading can be improved later
|
||||||
# using the here package.
|
# using the here package.
|
||||||
source("R/qinf.R")
|
source(here::here("R", "qinf.R"))
|
||||||
|
|
||||||
|
|
||||||
# 1. Distribution Function -----------------------------------------------------
|
# 1. Distribution Function -----------------------------------------------------
|
||||||
@@ -67,8 +67,7 @@ pgraphon <- function(
|
|||||||
dev_mat <- outer(y, inner_products, "-")
|
dev_mat <- outer(y, inner_products, "-")
|
||||||
|
|
||||||
# Apply CDF Fv to each deviation
|
# Apply CDF Fv to each deviation
|
||||||
# With sapply we do not have to assume that Fv is vectorized
|
cdf_vals <- Fv(dev_mat)
|
||||||
cdf_values <- sapply(dev_mat, Fv)
|
|
||||||
|
|
||||||
# Row means give the empirical expectation for each y_j
|
# Row means give the empirical expectation for each y_j
|
||||||
out <- rowMeans(cdf_vals)
|
out <- rowMeans(cdf_vals)
|
||||||
@@ -140,8 +139,7 @@ dgraphon <- function(
|
|||||||
dev_mat <- outer(y, inner_products, "-")
|
dev_mat <- outer(y, inner_products, "-")
|
||||||
|
|
||||||
# Apply the density function to each y[j]
|
# Apply the density function to each y[j]
|
||||||
# With sapply we do not have to assume that fv is vectorized
|
dens_vals <- fv(dev_mat)
|
||||||
dens_vals <- sapply(dev_mat, fv)
|
|
||||||
|
|
||||||
# Row means give the empirical expectation for each y_j
|
# Row means give the empirical expectation for each y_j
|
||||||
out <- rowMeans(dens_vals)
|
out <- rowMeans(dens_vals)
|
||||||
|
|||||||
236
R/singular_value_plot.R
Normal file
236
R/singular_value_plot.R
Normal file
@@ -0,0 +1,236 @@
|
|||||||
|
# Load function ----------------------------------------------------------------
|
||||||
|
source(here::here("R", "singular_values.R"))
|
||||||
|
source(here::here("R", "graphon_distribution.R"))
|
||||||
|
|
||||||
|
|
||||||
|
# expr_to_label ----------------------------------------------------------------
|
||||||
|
# Convert a call or character to a nicely formatted character string.
|
||||||
|
# * If the user supplied a character, we keep it unchanged.
|
||||||
|
# * If the user supplied a call (e.g. quote(20 / sqrt(x))) we deparse it
|
||||||
|
# and collapse the result to a single line.
|
||||||
|
expr_to_label <- function(expr) {
|
||||||
|
if (is.character(expr)) {
|
||||||
|
expr
|
||||||
|
} else {
|
||||||
|
# deparse returns a character vector; collapse to one line
|
||||||
|
paste(deparse(expr), collapse = "")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
# smallest_sv_sequence ---------------------------------------------------------
|
||||||
|
#' Compute the smallest singular value of a sequence of matrices Q(K)
|
||||||
|
#'
|
||||||
|
#' @title Smallest singular values for a family of matrices Q(K)
|
||||||
|
#' @description
|
||||||
|
#' For a given vector of coefficients `a`, sample size `n` and a maximum
|
||||||
|
#' rank `maxK`, this function repeatedly calls `compute_matrix()` to build a
|
||||||
|
#' matrix `Q` for each rank `K = 1, …, maxK`. The smallest singular value of
|
||||||
|
#' each `Q` is extracted with `compute_minmax_sv()`. The result can be
|
||||||
|
#' returned as a numeric vector and, if desired, plotted together with a
|
||||||
|
#' user‑supplied reference curve.
|
||||||
|
#'
|
||||||
|
#' @param a Numeric vector of coefficients that are passed to `compute_matrix()`.
|
||||||
|
#' @param n Positive integer, the sample size used inside the sampling function.
|
||||||
|
#' @param maxK Positive integer, the largest rank `K` for which a matrix `Q`
|
||||||
|
#' will be built. The function will evaluate `K = 1:maxK`.
|
||||||
|
#' @param seed Integer (default = 1) used as the random‑seed argument for
|
||||||
|
#' `compute_matrix()`. Supplying a seed makes the whole procedure
|
||||||
|
#' reproducible.
|
||||||
|
#' @param sampler_fn Function that draws a sample of size `n`. It must accept a
|
||||||
|
#' single argument `n` and return a **numeric matrix** with `n` rows and
|
||||||
|
#' one column (the shape used in the original script). The default is a
|
||||||
|
#' thin wrapper around `rnorm()`.
|
||||||
|
#' @param fv Function giving the density of the underlying distribution
|
||||||
|
#' (default = `dnorm`). It is passed unchanged to `compute_matrix()`.
|
||||||
|
#' @param Fv Function giving the cumulative distribution function
|
||||||
|
#' (default = `pnorm`). It is passed unchanged to `compute_matrix()`.
|
||||||
|
#' @param guard Positive numeric guard used inside `compute_matrix()` to avoid
|
||||||
|
#' division‑by‑zero or log‑of‑zero problems (default = `1e-12`).
|
||||||
|
#' @param plot Logical, whether to produce a quick base‑R plot of the
|
||||||
|
#' smallest singular values (`TRUE` by default). If `FALSE` the function
|
||||||
|
#' only returns the numeric vector.
|
||||||
|
#' @param add_curve Logical, whether to overlay a reference curve on the plot.
|
||||||
|
#' Ignored when `plot = FALSE`. Default = `TRUE`.
|
||||||
|
#' @param curve_expr Expression (as a *character* or *call*) that defines the
|
||||||
|
#' reference curve. The default reproduces the line you used
|
||||||
|
#' `20 / sqrt(x)`. The expression must be a valid R expression in which the
|
||||||
|
#' variable `x` stands for the horizontal axis.
|
||||||
|
#' @param curve_from,curve_to Numeric limits for the reference curve. By
|
||||||
|
#' default they are set to the range of `K` (`1:maxK`).
|
||||||
|
#' @param curve_col Colour of the reference curve (default = `"red"`).
|
||||||
|
#' @param curve_lwd Line width of the reference curve (default = 2).
|
||||||
|
#' @param log_plot If True, then the y-axis is on a log scale.
|
||||||
|
#' @param main_title Main title for the plot
|
||||||
|
#' @return A list with the following components
|
||||||
|
#' \item{K}{Integer vector `1:maxK`.}
|
||||||
|
#' \item{sv}{Numeric vector of the smallest singular values for each `K`.}
|
||||||
|
#' \item{plot}{If `plot = TRUE`, the value returned by `graphics::plot()`
|
||||||
|
#' (normally `NULL`). If `plot = FALSE` this element is omitted.}
|
||||||
|
#' @examples
|
||||||
|
#' ## Run the whole routine with the defaults
|
||||||
|
#' res <- smallest_sv_sequence(
|
||||||
|
#' a = c(0.4), n = 400, maxK = 20,
|
||||||
|
#' seed = 3, guard = 1e-12
|
||||||
|
#' )
|
||||||
|
#' head(res$sv)
|
||||||
|
#'
|
||||||
|
#' ## Supply a custom sampler (e.g., uniform)
|
||||||
|
#' my_sampler <- function(m) matrix(runif(m, -1, 1), ncol = 1)
|
||||||
|
#' res2 <- smallest_sv_sequence(
|
||||||
|
#' a = c(0.4), n = 400, maxK = 10,
|
||||||
|
#' sampler_fn = my_sampler,
|
||||||
|
#' plot = FALSE
|
||||||
|
#' )
|
||||||
|
#' plot(res2$K, res2$sv, type = "b")
|
||||||
|
#' @importFrom graphics plot lines curve
|
||||||
|
#' @export
|
||||||
|
smallest_sv_sequence <- function(
|
||||||
|
a,
|
||||||
|
n,
|
||||||
|
maxK,
|
||||||
|
seed = 1L,
|
||||||
|
sampler_fn = function(m) matrix(rnorm(m), ncol = 1L),
|
||||||
|
fv = dnorm,
|
||||||
|
Fv = pnorm,
|
||||||
|
guard = 1e-12,
|
||||||
|
plot = TRUE,
|
||||||
|
add_curve = TRUE,
|
||||||
|
curve_expr = quote(20 / sqrt(x)),
|
||||||
|
curve_from = NULL,
|
||||||
|
curve_to = NULL,
|
||||||
|
curve_col = "red",
|
||||||
|
curve_lwd = 2,
|
||||||
|
log_plot = FALSE,
|
||||||
|
main_title = "Smallest singular value vs. K"
|
||||||
|
) {
|
||||||
|
## 1. Input validation =======================================================
|
||||||
|
if (!is.numeric(a) || length(a) == 0) {
|
||||||
|
stop("`a` must be a non‑empty numeric vector.")
|
||||||
|
}
|
||||||
|
if (!is.numeric(n) || length(n) != 1L || n <= 0 || n != as.integer(n)) {
|
||||||
|
stop("`n` must be a positive integer.")
|
||||||
|
}
|
||||||
|
if (!is.numeric(maxK) || length(maxK) != 1L || maxK <= 0 ||
|
||||||
|
maxK != as.integer(maxK)) {
|
||||||
|
stop("`maxK` must be a positive integer.")
|
||||||
|
}
|
||||||
|
if (!is.function(sampler_fn)) {
|
||||||
|
stop("`sampler_fn` must be a function that takes a single integer argument `n`.")
|
||||||
|
}
|
||||||
|
if (!is.function(fv) || !is.function(Fv)) {
|
||||||
|
stop("`fv` and `Fv` must be corresponding density functions and cdf (e.g., dnorm/ pnorm).")
|
||||||
|
}
|
||||||
|
if (!is.numeric(guard) || length(guard) != 1L || guard <= 0) {
|
||||||
|
stop("`guard` must be a single positive numeric value.")
|
||||||
|
}
|
||||||
|
if (!is.logical(plot) || length(plot) != 1L) {
|
||||||
|
stop("`plot` must be a single logical value.")
|
||||||
|
}
|
||||||
|
if (!is.logical(add_curve) || length(add_curve) != 1L) {
|
||||||
|
stop("`add_curve` must be a single logical value.")
|
||||||
|
}
|
||||||
|
if (!inherits(curve_expr, "call") && !is.character(curve_expr)) {
|
||||||
|
stop("`curve_expr` must be a call (e.g., quote(20/sqrt(x))) or a character string.")
|
||||||
|
}
|
||||||
|
if (!is.character(main_title)){
|
||||||
|
stop("`main_title` must be a character vector.")
|
||||||
|
}
|
||||||
|
|
||||||
|
## 2. Prepare storage ========================================================
|
||||||
|
K_vec <- seq_len(maxK)
|
||||||
|
smallest_sv <- numeric(maxK)
|
||||||
|
|
||||||
|
## 3. Main loop – build Q(K) and extract the smallest singular value =========
|
||||||
|
for (K in K_vec) {
|
||||||
|
Q <- compute_matrix(
|
||||||
|
seed = seed,
|
||||||
|
a = a,
|
||||||
|
n = n,
|
||||||
|
K = K,
|
||||||
|
sample_X_fn = sampler_fn,
|
||||||
|
fv = fv,
|
||||||
|
Fv = Fv,
|
||||||
|
guard = guard,
|
||||||
|
scaled = FALSE
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
sv_res <- compute_minmax_sv(Q)
|
||||||
|
if (!is.list(sv_res) || is.null(sv_res$smallest_singular_value)) {
|
||||||
|
stop("`compute_minmax_sv()` must return a list containing `$smallest_singular_value`.")
|
||||||
|
}
|
||||||
|
smallest_sv[K] <- sv_res$smallest_singular_value
|
||||||
|
}
|
||||||
|
|
||||||
|
## 4. Plotting (optional) ====================================================
|
||||||
|
if (plot) {
|
||||||
|
## Basic scatter/line plot of the singular values
|
||||||
|
par(mar = c(5, 4, 4, 8)) # extra space on the right for the legend
|
||||||
|
plot_args <- list(
|
||||||
|
x = K_vec,
|
||||||
|
y = smallest_sv,
|
||||||
|
type = "b",
|
||||||
|
pch = 19,
|
||||||
|
col = "steelblue",
|
||||||
|
xlab = "K subdivisions",
|
||||||
|
ylab = "Smallest singular value of Q",
|
||||||
|
main = main_title
|
||||||
|
)
|
||||||
|
if (log_plot) plot_args$log <- "y"
|
||||||
|
|
||||||
|
do.call(graphics::plot, plot_args)
|
||||||
|
# add legend. The par(xpd = ...) allows drawing outside of the plot region.
|
||||||
|
par(xpd = TRUE)
|
||||||
|
legend("topright",
|
||||||
|
inset=c(-0.2,0),
|
||||||
|
legend=c("SV of Q"),
|
||||||
|
col="steelblue",
|
||||||
|
title="Legend",
|
||||||
|
pch = 16,
|
||||||
|
bty = "n")
|
||||||
|
par(xpd = FALSE)
|
||||||
|
## Add the reference curve if requested
|
||||||
|
if (add_curve) {
|
||||||
|
## Determine sensible defaults for the curve limits
|
||||||
|
if (is.null(curve_from)) curve_from <- min(K_vec)
|
||||||
|
if (is.null(curve_to)) curve_to <- max(K_vec)
|
||||||
|
|
||||||
|
## `curve()` expects an *expression* where `x` is the variable.
|
||||||
|
## If the user supplied a character string we turn it into a call.
|
||||||
|
if (is.character(curve_expr)) curve_expr <- parse(text = curve_expr)[[1L]]
|
||||||
|
curve_fun <- function(x) eval(curve_expr)
|
||||||
|
graphics::curve(
|
||||||
|
expr = curve_fun,
|
||||||
|
from = curve_from,
|
||||||
|
to = curve_to,
|
||||||
|
add = TRUE,
|
||||||
|
col = curve_col,
|
||||||
|
lwd = curve_lwd
|
||||||
|
)
|
||||||
|
|
||||||
|
# add label with the curve expression
|
||||||
|
label_txt <- expr_to_label(curve_expr)
|
||||||
|
x_pos <- curve_from + 0.8 * (curve_to - curve_from)
|
||||||
|
y_pos <- 0.85 * max(smallest_sv)
|
||||||
|
graphics::text(
|
||||||
|
x = x_pos, y = y_pos,
|
||||||
|
labels = label_txt,
|
||||||
|
col = curve_col,
|
||||||
|
pos = 4, # 4 = right‑justified (so the text sits left of the point)
|
||||||
|
offset = 0.5, # a small gap between the point and the text
|
||||||
|
cex = 0.9 # slightly smaller than default text size
|
||||||
|
)
|
||||||
|
|
||||||
|
}
|
||||||
|
}
|
||||||
|
## 5. Return a tidy list =====================================================
|
||||||
|
out <- list(
|
||||||
|
K = K_vec,
|
||||||
|
sv = smallest_sv
|
||||||
|
)
|
||||||
|
# if (plot) out$plot <- NULL ## the plot is already drawn; we return NULL for consistency
|
||||||
|
out
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
@@ -1,5 +1,5 @@
|
|||||||
# TODO improve later using the here package
|
# TODO improve later using the here package
|
||||||
source("R/graphon_distribution.R")
|
source(here::here("R", "graphon_distribution.R"))
|
||||||
|
|
||||||
# 1. Compute the Matrix Q according to (3.1) -----------------------------------
|
# 1. Compute the Matrix Q according to (3.1) -----------------------------------
|
||||||
#' Compute the matrix **Q**
|
#' Compute the matrix **Q**
|
||||||
@@ -33,10 +33,6 @@ source("R/graphon_distribution.R")
|
|||||||
#' generated.
|
#' generated.
|
||||||
#' @param K Positive integer. Number of divisions of the unit interval;
|
#' @param K Positive integer. Number of divisions of the unit interval;
|
||||||
#' the resulting grid has length `K+1`.
|
#' the resulting grid has length `K+1`.
|
||||||
#' @param sample_X_fn
|
|
||||||
#' Function with a single argument `n`. It must return an
|
|
||||||
#' \eqn{n \times p} matrix (or an object coercible to a matrix) of
|
|
||||||
#' covariate samples.
|
|
||||||
#' @param fv Density function of the latent variable \eqn{v}. Must be
|
#' @param fv Density function of the latent variable \eqn{v}. Must be
|
||||||
#' vectorised (i.e. accept a numeric vector and return a numeric
|
#' vectorised (i.e. accept a numeric vector and return a numeric
|
||||||
#' vector of the same length). Typical examples are
|
#' vector of the same length). Typical examples are
|
||||||
@@ -44,6 +40,21 @@ source("R/graphon_distribution.R")
|
|||||||
#' @param Fv Cumulative distribution function of the latent variable
|
#' @param Fv Cumulative distribution function of the latent variable
|
||||||
#' \eqn{v}. Also has to be vectorised. Typical examples are
|
#' \eqn{v}. Also has to be vectorised. Typical examples are
|
||||||
#' `pnorm`, `pexp`, ….
|
#' `pnorm`, `pexp`, ….
|
||||||
|
#' @param sample_X_fn
|
||||||
|
#' Function with a single argument `n`. It must return an
|
||||||
|
#' \eqn{n \times p} matrix (or an object coercible to a matrix) of
|
||||||
|
#' covariate samples. It can be NULL, but then `matrix_X` must be
|
||||||
|
#' given.
|
||||||
|
#' @param matrix_X matrix with the covariates at each node. Each row corresponds
|
||||||
|
#' to a single node with p attributes. The default value is `NULL`. If it
|
||||||
|
#' is `NULL` then `sample_X_fun` must be given. If both parameters are provided,
|
||||||
|
#' then `matrix_X` is used.
|
||||||
|
#' @param guard Positive numeric guard value. Default is `sqrt(.Machine$double.eps)`,
|
||||||
|
#' which is about `1.5e‑8` on most platforms – small enough to be negligible
|
||||||
|
#' for most computations. If it is null, then it is not used.
|
||||||
|
#' The guard is used for the value k = 0, which can cause arithmetic errors.
|
||||||
|
#' @param scaled Rescales the matrix according to the dimension with the factor
|
||||||
|
#' 1 / sqrt(n). Default value is FALSE.
|
||||||
#'
|
#'
|
||||||
#' @return A numeric matrix **Q** of dimension `K × n`. The \eqn{j}-th row
|
#' @return A numeric matrix **Q** of dimension `K × n`. The \eqn{j}-th row
|
||||||
#' (for `j = 1,…,K`) contains the increments of the CDF evaluated at
|
#' (for `j = 1,…,K`) contains the increments of the CDF evaluated at
|
||||||
@@ -98,26 +109,38 @@ compute_matrix <- function(
|
|||||||
a,
|
a,
|
||||||
n,
|
n,
|
||||||
K,
|
K,
|
||||||
sample_X_fn,
|
|
||||||
fv,
|
fv,
|
||||||
Fv
|
Fv,
|
||||||
|
sample_X_fn=NULL,
|
||||||
|
matrix_X = NULL,
|
||||||
|
guard = sqrt(.Machine$double.eps),
|
||||||
|
scaled = FALSE
|
||||||
) {
|
) {
|
||||||
## 1.1 Check inputs ==========================================================
|
## 1.1 Check inputs ==========================================================
|
||||||
if (!is.numeric(seed) || length(seed) != 1) stop("'seed' must be a single number")
|
if (!is.numeric(seed) || length(seed) != 1) stop("'seed' must be a single number")
|
||||||
if (!is.numeric(a) || !is.vector(a)) stop("'a' must be a numeric vector")
|
if (!is.numeric(a) || !is.vector(a)) stop("'a' must be a numeric vector")
|
||||||
if (!is.numeric(n) || length(n) != 1 || n <= 0) stop("'n' must be a positive integer")
|
if (!is.numeric(n) || length(n) != 1 || n <= 0) stop("'n' must be a positive integer")
|
||||||
if (!is.numeric(K) || length(K) != 1 || K <= 0) stop("'K' must be a positive integer")
|
if (!is.numeric(K) || length(K) != 1 || K <= 0) stop("'K' must be a positive integer")
|
||||||
if (!is.function(sample_X_fn)) stop("'sample_X_fn' must be a function")
|
if (!(is.function(sample_X_fn) || is.null(sample_X_fn))) stop("'sample_X_fn' must be a function or Null and matrix_X must be given")
|
||||||
if (!is.function(fv)) stop("'f_v' must be a function")
|
if (!is.function(fv)) stop("'f_v' must be a function")
|
||||||
if (!is.function(Fv)) stop("'F_v' must be a function")
|
if (!is.function(Fv)) stop("'F_v' must be a function")
|
||||||
|
if (!is.null(matrix_X) && !is.matrix(matrix_X)) stop("matrix_X must be either null or a matrix")
|
||||||
|
if (is.null(matrix_X) && is.null(sample_X_fn)) stop("Either 'matrix_X' or 'sample_X_fn' must be supplied!")
|
||||||
|
if (!is.null(matrix_X) && !is.null(sample_X_fn)) warning("Both arguments 'matrix_X' and `sample_X_fn` is given. Priority is given by to the first!")
|
||||||
|
|
||||||
## 1.2 Generate the Matrix X of covariates ===================================
|
## 1.2 Generate the Matrix X of covariates ===================================
|
||||||
# The withr environment allows us to capsulate the global state like the seed
|
# If the argument matrix_X is present, use this matrix, otherwise generate one
|
||||||
# and enables a better reproduction
|
# with sample_X_fn.
|
||||||
X <- withr::with_seed(seed, {
|
if (!is.null(matrix_X)) {
|
||||||
as.matrix(sample_X_fn(n))
|
X <- matrix_X
|
||||||
})
|
} else {
|
||||||
if (nrow(X) != n) stop("`sample_X_fn` must return exactly `n` rows")
|
# The withr environment allows us to encapsulate the global state like the seed
|
||||||
|
# and enables a better reproduction
|
||||||
|
X <- withr::with_seed(seed, {
|
||||||
|
as.matrix(sample_X_fn(n))
|
||||||
|
})
|
||||||
|
}
|
||||||
|
if (nrow(X) != n) stop(" the covariate matrix `X` must have exactly `n` rows")
|
||||||
if (ncol(X) != length(a)) {
|
if (ncol(X) != length(a)) {
|
||||||
stop("Number of columns of X (", ncol(X), ") must equal length(a) (", length(a), ")")
|
stop("Number of columns of X (", ncol(X), ") must equal length(a) (", length(a), ")")
|
||||||
}
|
}
|
||||||
@@ -127,6 +150,9 @@ compute_matrix <- function(
|
|||||||
|
|
||||||
## 1.4 Compute the graphon quantiles =========================================
|
## 1.4 Compute the graphon quantiles =========================================
|
||||||
k <- seq(0, K) / K
|
k <- seq(0, K) / K
|
||||||
|
if (!is.null(guard)) {
|
||||||
|
k[1] <- guard
|
||||||
|
}
|
||||||
graphon_quantiles <- qgraphon(k, a = a, Fv = Fv, X_matrix = X)
|
graphon_quantiles <- qgraphon(k, a = a, Fv = Fv, X_matrix = X)
|
||||||
|
|
||||||
## 1.5 Build the matrix Q ====================================================
|
## 1.5 Build the matrix Q ====================================================
|
||||||
@@ -139,6 +165,7 @@ compute_matrix <- function(
|
|||||||
cdf_mat <- Fv(outer(graphon_quantiles, inner_products, "-")) # (K +1) x n matrix
|
cdf_mat <- Fv(outer(graphon_quantiles, inner_products, "-")) # (K +1) x n matrix
|
||||||
Q <- diff(cdf_mat, lag=1) # operates along rows
|
Q <- diff(cdf_mat, lag=1) # operates along rows
|
||||||
|
|
||||||
|
if (scaled) { Q <- 1 / sqrt(n) * Q }
|
||||||
Q
|
Q
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -184,6 +211,9 @@ compute_matrix <- function(
|
|||||||
#' @export
|
#' @export
|
||||||
compute_minmax_sv <- function(M) {
|
compute_minmax_sv <- function(M) {
|
||||||
s <- svd(M, nu=0, nv=0)$d
|
s <- svd(M, nu=0, nv=0)$d
|
||||||
|
|
||||||
|
# just a check if we compute the right thing
|
||||||
|
# s <- sqrt(eigen(M %*% t(M), symmetric = TRUE, only.value=TRUE)$values)
|
||||||
|
|
||||||
list(
|
list(
|
||||||
largest_singular_value = max(s),
|
largest_singular_value = max(s),
|
||||||
|
|||||||
6
_quarto.yml
Normal file
6
_quarto.yml
Normal file
@@ -0,0 +1,6 @@
|
|||||||
|
project:
|
||||||
|
type: default
|
||||||
|
|
||||||
|
execute:
|
||||||
|
freeze: auto
|
||||||
|
cache: false
|
||||||
48
notebooks/two-dimensional-calculation-example.qmd
Normal file
48
notebooks/two-dimensional-calculation-example.qmd
Normal file
@@ -0,0 +1,48 @@
|
|||||||
|
---
|
||||||
|
title: "Two-dimensional-example"
|
||||||
|
format: html
|
||||||
|
editor: visual
|
||||||
|
---
|
||||||
|
|
||||||
|
# Zwei dimensionales Rechenbeispiel für die Matrix $Q_a$.
|
||||||
|
|
||||||
|
Mit den Eingabedaten:
|
||||||
|
|
||||||
|
- $a = 2.0$
|
||||||
|
- $X = (-0.6264, 0.1836)^\top$
|
||||||
|
- $n = K = 2$
|
||||||
|
und $v \sim \mathcal{N}(0, 1)$
|
||||||
|
|
||||||
|
Daraus ergibt sich eine Matrix $Q_a$ mit
|
||||||
|
|
||||||
|
$$
|
||||||
|
Q_a = \begin{pmatrix} 0.7911 & 0.2089 \\
|
||||||
|
0.2089 & 0.7911 \\
|
||||||
|
\end{pmatrix}
|
||||||
|
$$
|
||||||
|
|
||||||
|
|
||||||
|
Anscheinend ist es so, dass für verschiedene Eingabewerte der Matrix $X$, es immer
|
||||||
|
wieder zu verschiedenen, aber immer noch diagonaldominanten Einträgen kommt.
|
||||||
|
Wieso passiert dies?
|
||||||
|
|
||||||
|
Falls allerdings $X = (0.2167549 -0.5424926)^\top$, dann ist
|
||||||
|
$$
|
||||||
|
Q_a = \begin{pmatrix}
|
||||||
|
0.2239 & 0.7761 \\
|
||||||
|
0.7761 & 0.2239 \\
|
||||||
|
\end{pmatrix}
|
||||||
|
$$
|
||||||
|
wo jetzt die Nebendiagonale dominant ist. Wenn man verschiedene Seeds ausprobiert,
|
||||||
|
so sieht man ein Muster. Doch es gibt auch das Beispiel mit $X = (0.7667960 -0.8164583)^\top$
|
||||||
|
und dann erhalten wir:
|
||||||
|
$$
|
||||||
|
Q_a = \begin{pmatrix}
|
||||||
|
0.4802 & 0.5198 \\
|
||||||
|
0.5198 & 0.4802 \\
|
||||||
|
\end{pmatrix}
|
||||||
|
$$
|
||||||
|
|
||||||
|
Diese Matrix hat immer noch eine dominante Nebendiagonale, aber nicht so stark wie
|
||||||
|
alle anderen bekannten Beispiele. Bei den anderen Berechnungen zeigte sich meistens
|
||||||
|
ein Unterschied von $| a_{11} - a_{12}| > 0.5$.
|
||||||
3
scripts/.gitignore
vendored
3
scripts/.gitignore
vendored
@@ -0,0 +1,3 @@
|
|||||||
|
*.html
|
||||||
|
*plots_dimensions_files/
|
||||||
|
*plots_a_dependence_files/
|
||||||
|
|||||||
83
scripts/plot_Qa_wrt_a.R
Normal file
83
scripts/plot_Qa_wrt_a.R
Normal file
@@ -0,0 +1,83 @@
|
|||||||
|
# load local files
|
||||||
|
source(here::here("R", "singular_values.R"))
|
||||||
|
source(here::here("R", "graphon_distribution.R"))
|
||||||
|
source(here::here("R","singular_value_plot.R"))
|
||||||
|
|
||||||
|
# load libaries for data handling
|
||||||
|
library(ggplot2)
|
||||||
|
library(tidyr)
|
||||||
|
library(dplyr)
|
||||||
|
|
||||||
|
# Line plots -------------------------------------------------------------------
|
||||||
|
# Create a grid of a‑values
|
||||||
|
a_grid <- seq(-20, 20, length.out = 200)
|
||||||
|
|
||||||
|
# function which takes only a to compute Q_c
|
||||||
|
make_matrix <- function(a) { compute_matrix(seed=11513215L,
|
||||||
|
a= a,
|
||||||
|
n = 2,
|
||||||
|
K = 2,
|
||||||
|
sample_X_fn = function(n) {matrix(rnorm(n), ncol = 1L)},
|
||||||
|
fv = function(x) {dnorm(x, mean=0, sd=1)},
|
||||||
|
Fv = function(x) {pnorm(x, mean=0, sd=1)},
|
||||||
|
guard = 1e-12)}
|
||||||
|
|
||||||
|
# Compute the matrices and reshape to long format
|
||||||
|
df_entries <- tibble(a = a_grid) %>%
|
||||||
|
mutate(
|
||||||
|
M = purrr::map(a, make_matrix), # list‑column of matrices
|
||||||
|
m11 = purrr::map_dbl(M, ~ .x[1, 1]),
|
||||||
|
m12 = purrr::map_dbl(M, ~ .x[1, 2]),
|
||||||
|
m21 = purrr::map_dbl(M, ~ .x[2, 1]),
|
||||||
|
m22 = purrr::map_dbl(M, ~ .x[2, 2])
|
||||||
|
) %>%
|
||||||
|
select(a, m11, m12, m21, m22) %>%
|
||||||
|
pivot_longer(-a, names_to = "entry", values_to = "value")
|
||||||
|
|
||||||
|
# Plot
|
||||||
|
ggplot(df_entries, aes(x = a, y = value, colour = entry, linetype = entry)) +
|
||||||
|
geom_line(linewidth = 1) +
|
||||||
|
labs(
|
||||||
|
title = "Matrix entries as a function of the parameter `a`",
|
||||||
|
x = "a",
|
||||||
|
y = "Matrix entry value",
|
||||||
|
colour = "Entry"
|
||||||
|
) +
|
||||||
|
theme_minimal()
|
||||||
|
|
||||||
|
# Heat map for a single larger matrix ------------------------------------------
|
||||||
|
# TODO Daten für 2x2 und 3x3 an Michael schicken
|
||||||
|
# Choose a value of a
|
||||||
|
a0 <- 2
|
||||||
|
M0 <- compute_matrix(seed=9L,
|
||||||
|
a= a0,
|
||||||
|
n = 2,
|
||||||
|
K = 2,
|
||||||
|
sample_X_fn = function(n) {matrix(rnorm(n), ncol = 1L)},
|
||||||
|
fv = function(x) {dnorm(x, mean=0, sd=1)},
|
||||||
|
Fv = function(x) {pnorm(x, mean=0, sd=1)},
|
||||||
|
guard = 1e-12)
|
||||||
|
M0
|
||||||
|
|
||||||
|
# Convert to a tidy data frame for ggplot
|
||||||
|
df_heat <- as.data.frame(M0) %>%
|
||||||
|
mutate(row = row_number()) %>%
|
||||||
|
pivot_longer(-row, names_to = "col", values_to = "value") %>%
|
||||||
|
mutate(
|
||||||
|
col = as.integer(gsub("V", "", col)), # turn "V1","V2" into 1,2
|
||||||
|
row = factor(row, levels = rev(unique(row))) # reverse y‑axis for proper orientation
|
||||||
|
)
|
||||||
|
|
||||||
|
ggplot(df_heat, aes(x = col, y = row, fill = value)) +
|
||||||
|
geom_tile(colour = "grey30", size = 0.5) +
|
||||||
|
scale_fill_gradient2(
|
||||||
|
low = "steelblue", mid = "white", high = "tomato",
|
||||||
|
midpoint = 0.5, limits = c(min(df_heat$value), max(df_heat$value))
|
||||||
|
) +
|
||||||
|
labs(
|
||||||
|
title = sprintf("Heat‑map of the matrix at a = %.2f", a0),
|
||||||
|
x = "Column", y = "Row", fill = "Value"
|
||||||
|
) +
|
||||||
|
theme_minimal() +
|
||||||
|
theme(axis.text = element_blank(),
|
||||||
|
axis.ticks = element_blank())
|
||||||
177
scripts/plot_densities.R
Normal file
177
scripts/plot_densities.R
Normal file
@@ -0,0 +1,177 @@
|
|||||||
|
# 1. Load functions and Libraries ----------------------------------------------
|
||||||
|
source("./R/graphon_distribution.R")
|
||||||
|
|
||||||
|
# 2. Set constants -------------------------------------------------------------
|
||||||
|
withr::with_seed(42) # set seed in a local environment
|
||||||
|
|
||||||
|
a0 <- c(0, 0)
|
||||||
|
a1 <- c(1.0)
|
||||||
|
a2 <- c(0.5, -0.3)
|
||||||
|
|
||||||
|
N <- 400 # number of samples for X
|
||||||
|
|
||||||
|
X1 = matrix(rnorm(N))
|
||||||
|
X2 = matrix(rnorm(2 * N), ncol = 2)
|
||||||
|
|
||||||
|
|
||||||
|
# 3. Normally distributed v's --------------------------------------------------
|
||||||
|
## 3.1 Plot a = 0 ==============================================================
|
||||||
|
# In this section we plot the conditional density p(u | x) for v ~ N(0,1) and
|
||||||
|
# a = (0, 0)
|
||||||
|
p1 <- create_cond_density(a0, dnorm, pnorm, X2)
|
||||||
|
givenX <- c(0, 0)
|
||||||
|
p1_plot <- \(x) p1(x, givenX)
|
||||||
|
plot.function(p1_plot, xlab="u", ylab="p(u |x) ")
|
||||||
|
title(main=expression("Conditional density for" ~ v %~% N(0, 1)), line=2)
|
||||||
|
title(
|
||||||
|
main = bquote(
|
||||||
|
a == (.(paste(a0, collapse = ", "))) ~ "," ~
|
||||||
|
N == .(N) ~ "," ~
|
||||||
|
x == (.(paste(givenX, collapse = ", ")))
|
||||||
|
),
|
||||||
|
line = 1,
|
||||||
|
cex = 0.65
|
||||||
|
)
|
||||||
|
|
||||||
|
## 3.2 Plot a != 0, X != 0 =====================================================
|
||||||
|
p2 <- create_cond_density(a2, dnorm, pnorm, X2)
|
||||||
|
givenX <- c(-0.5, 0.5)
|
||||||
|
p2_plot <- \(x) p2(x, givenX)
|
||||||
|
plot.function(p2_plot, xlab="u", ylab="p(u |x) ")
|
||||||
|
title(main=expression("Conditional density for" ~ v %~% N(0, 1)), line=2)
|
||||||
|
title(
|
||||||
|
main = bquote(
|
||||||
|
a == (.(paste(a2, collapse = ", "))) ~ "," ~
|
||||||
|
N == .(N) ~ "," ~
|
||||||
|
x == (.(paste(givenX, collapse = ", ")))
|
||||||
|
),
|
||||||
|
line = 1,
|
||||||
|
cex = 0.65
|
||||||
|
)
|
||||||
|
|
||||||
|
## 3.3 Plot one dimensional X_i ================================================
|
||||||
|
p3 <- create_cond_density(2, dnorm, pnorm, X1)
|
||||||
|
givenX <- c(-3.0)
|
||||||
|
p3_plot <- \(x) p3(x, givenX)
|
||||||
|
plot.function(p3_plot, xlab="u", ylab="p(u |x) ")
|
||||||
|
title(main=expression("Conditional density for" ~ v %~% N(0, 1)), line=2)
|
||||||
|
title(
|
||||||
|
main = bquote(
|
||||||
|
a == (.(paste(c(2), collapse = ", "))) ~ "," ~
|
||||||
|
N == .(N) ~ "," ~
|
||||||
|
x == (.(paste(givenX, collapse = ", ")))
|
||||||
|
),
|
||||||
|
line = 1,
|
||||||
|
cex = 0.65
|
||||||
|
)
|
||||||
|
|
||||||
|
## 3.4 Plot v ~ N(0, 2) ========================================================
|
||||||
|
dens_func <-\(x) dnorm(x, sd=sqrt(2))
|
||||||
|
cdf <- \(x) pnorm(x, sd=sqrt(2))
|
||||||
|
p4 <- create_cond_density(a1, dens_func, cdf, X1)
|
||||||
|
givenX <- c(-0.5)
|
||||||
|
p4_plot <- \(x) p4(x, givenX)
|
||||||
|
plot.function(p4_plot, xlab="u", ylab="p(u |x) ")
|
||||||
|
title(main=expression("Conditional density for" ~ v %~% N(0, 2)), line=2)
|
||||||
|
title(
|
||||||
|
main = bquote(
|
||||||
|
a == (.(paste(a1, collapse = ", "))) ~ "," ~
|
||||||
|
N == .(N) ~ "," ~
|
||||||
|
x == (.(paste(givenX, collapse = ", ")))
|
||||||
|
),
|
||||||
|
line = 1,
|
||||||
|
cex = 0.65
|
||||||
|
)
|
||||||
|
|
||||||
|
# 4. Expontial distribution for v's --------------------------------------------
|
||||||
|
## 4.1 One dimensional X_i =====================================================
|
||||||
|
# This example has a jump in [0.14, 0.15].
|
||||||
|
# The zero can not be explained and it's not a probability as:
|
||||||
|
# integrate(p5_plot, 0.15, 1, subdivisions = 500)
|
||||||
|
# => 0.952492 with absolute error < 0.00011
|
||||||
|
lambda <- 1.0
|
||||||
|
p5 <- create_cond_density(a1, dexp, pexp, X1)
|
||||||
|
givenX <- c(-0.5)
|
||||||
|
p5_plot <- \(x) p5(x, givenX)
|
||||||
|
plot.function(p5_plot, xlab="u", ylab="p(u |x) ")
|
||||||
|
title(main=expression("Conditional density for" ~ v %~% Exp(1)), line=2)
|
||||||
|
title(
|
||||||
|
main = bquote(
|
||||||
|
a == (.(paste(a1, collapse = ", "))) ~ "," ~
|
||||||
|
N == .(N) ~ "," ~
|
||||||
|
x == (.(paste(givenX, collapse = ", ")))
|
||||||
|
),
|
||||||
|
line = 1,
|
||||||
|
cex = 0.65
|
||||||
|
)
|
||||||
|
|
||||||
|
## 4.2 Two dimensional X_i =====================================================
|
||||||
|
lambda <- 1.0
|
||||||
|
p6 <- create_cond_density(a2, dexp, pexp, X2)
|
||||||
|
givenX <- c(-0.5)
|
||||||
|
p6_plot <- \(x) p6(x, givenX)
|
||||||
|
plot.function(p6_plot, xlab="u", ylab="p(u |x) ")
|
||||||
|
title(main=expression("Conditional density for" ~ v %~% Exp(1)), line=2)
|
||||||
|
title(
|
||||||
|
main = bquote(
|
||||||
|
a == (.(paste(a2, collapse = ", "))) ~ "," ~
|
||||||
|
N == .(N) ~ "," ~
|
||||||
|
x == (.(paste(givenX, collapse = ", ")))
|
||||||
|
),
|
||||||
|
line = 1,
|
||||||
|
cex = 0.65
|
||||||
|
)
|
||||||
|
|
||||||
|
## 4.3 Higher Rate of exponential distribtion ==================================
|
||||||
|
lambda <- 2.0
|
||||||
|
dens_func <- \(x) dexp(x, rate=lambda)
|
||||||
|
cdf <- \(x) pexp(x, rate=lambda)
|
||||||
|
p7 <- create_cond_density(a2, dens_func, cdf, X2)
|
||||||
|
givenX <- c(-0.5, 0.5)
|
||||||
|
p7_plot <- \(x) p7(x, givenX)
|
||||||
|
plot.function(p7_plot, xlab="u", ylab="p(u |x) ")
|
||||||
|
title(main=expression("Conditional density for" ~ v %~% Exp(2.0)), line=2)
|
||||||
|
title(
|
||||||
|
main = bquote(
|
||||||
|
a == (.(paste(a2, collapse = ", "))) ~ "," ~
|
||||||
|
N == .(N) ~ "," ~
|
||||||
|
x == (.(paste(givenX, collapse = ", ")))
|
||||||
|
),
|
||||||
|
line = 1,
|
||||||
|
cex = 0.65
|
||||||
|
)
|
||||||
|
|
||||||
|
# 5. Uniform Distribution ------------------------------------------------------
|
||||||
|
|
||||||
|
## Two dimensional X_i =========================================================
|
||||||
|
p8 <- create_cond_density(a2, dunif, punif, X2)
|
||||||
|
givenX <- c(-0.5, 0.5)
|
||||||
|
p8_plot <- \(x) p8(x, givenX)
|
||||||
|
plot.function(p8_plot, xlab="u", ylab="p(u |x) ")
|
||||||
|
title(main=expression("Conditional density for" ~ v %~% U(0,1)), line=2)
|
||||||
|
title(
|
||||||
|
main = bquote(
|
||||||
|
a == (.(paste(a2, collapse = ", "))) ~ "," ~
|
||||||
|
N == .(N) ~ "," ~
|
||||||
|
x == (.(paste(givenX, collapse = ", ")))
|
||||||
|
),
|
||||||
|
line = 1,
|
||||||
|
cex = 0.65
|
||||||
|
)
|
||||||
|
|
||||||
|
## 5.2 One dimensional X_i =====================================================
|
||||||
|
p9 <- create_cond_density(a1, dunif, punif, X1)
|
||||||
|
givenX <- c(-0.5)
|
||||||
|
p9_plot <- \(x) p9(x, givenX)
|
||||||
|
plot.function(p9_plot, xlab="u", ylab="p(u |x) ")
|
||||||
|
title(main=expression("Conditional density for" ~ v %~% U(0,1)), line=2)
|
||||||
|
title(
|
||||||
|
main = bquote(
|
||||||
|
a == (.(paste(a1, collapse = ", "))) ~ "," ~
|
||||||
|
N == .(N) ~ "," ~
|
||||||
|
x == (.(paste(givenX, collapse = ", ")))
|
||||||
|
),
|
||||||
|
line = 1,
|
||||||
|
cex = 0.65
|
||||||
|
)
|
||||||
|
|
||||||
501
scripts/plot_non_normal_X.qmd
Normal file
501
scripts/plot_non_normal_X.qmd
Normal file
@@ -0,0 +1,501 @@
|
|||||||
|
---
|
||||||
|
title: "plots of a dependence"
|
||||||
|
author: "Niclas"
|
||||||
|
format: html
|
||||||
|
editor: visual
|
||||||
|
execute:
|
||||||
|
echo: true
|
||||||
|
working-directory: ../
|
||||||
|
---
|
||||||
|
|
||||||
|
```{r loading libraries}
|
||||||
|
#| cache: true
|
||||||
|
#| echo: false
|
||||||
|
#| collapse: true
|
||||||
|
# load local files
|
||||||
|
source(here::here("R", "singular_values.R"))
|
||||||
|
source(here::here("R", "graphon_distribution.R"))
|
||||||
|
source(here::here("R","singular_value_plot.R"))
|
||||||
|
|
||||||
|
# load libaries for data handling
|
||||||
|
library(ggplot2)
|
||||||
|
library(dplyr)
|
||||||
|
library(latex2exp)
|
||||||
|
|
||||||
|
```
|
||||||
|
|
||||||
|
## Setup
|
||||||
|
|
||||||
|
We consider the matrix $QQ^\top$ and look at the smallest eigenvalue, i.e. the smallest non-zero singular value of $Q$.
|
||||||
|
|
||||||
|
The matrix $Q$ is given by $$Q_{ik} = \int_{\frac{k}{K}}^{\frac{k+1}{K}} p_a(u| X_i) \, du$$ with $$p_a(u|X) = \frac{f_v(F_a^{-1}(u) - a^\top X)}{f_a(F_a^{-1}(u))}$$ In this document we plot different the smallest eigenvalue in dependence of the parameter $a$ with different "ratios" of the parameters $n$ and $$k = \lfloor n^\alpha \rfloor$$
|
||||||
|
|
||||||
|
with $\alpha = 0.1, 0.2, \dots 0.5$. The data matrix $X$ is a random matrix with i.i.d. distributed entries. We consider $x_{ij} \sim U[0,1]$ and $x_{ij} \sim Exp(\lambda)$.
|
||||||
|
|
||||||
|
## Exponential distribution
|
||||||
|
|
||||||
|
```{r k = n^alpha data generation, rate = 1}
|
||||||
|
#| cache: true
|
||||||
|
#| echo: false
|
||||||
|
#| collapse: true
|
||||||
|
ns <- seq(100, 10000, 100)
|
||||||
|
as <- seq(0, 20, 2)
|
||||||
|
alphas <- seq(0.1, 0.5, 0.1)
|
||||||
|
|
||||||
|
set.seed(100)
|
||||||
|
results01 <- data.frame(dim_n = integer(),
|
||||||
|
dim_k = integer(),
|
||||||
|
param_a = double(),
|
||||||
|
param_alpha = double(),
|
||||||
|
ssv = double())
|
||||||
|
for (a in as) {
|
||||||
|
for (i in 1:length(ns)) {
|
||||||
|
for (j in 1:length(alphas)) {
|
||||||
|
n <- ns[i]
|
||||||
|
K <- floor(n^alphas[j])
|
||||||
|
if (!K > 0) next # skip if K is equal to zero
|
||||||
|
# use the default seed 1L
|
||||||
|
Q <- compute_matrix(seed=1L,
|
||||||
|
a= a,
|
||||||
|
n = n,
|
||||||
|
K = K,
|
||||||
|
sample_X_fn = function(n) {matrix(rexp(n), ncol = 1L)},
|
||||||
|
fv = function(x) {dnorm(x, mean=0, sd=1)},
|
||||||
|
Fv = function(x) {pnorm(x, mean=0, sd=1)},
|
||||||
|
guard = 1e-12)
|
||||||
|
|
||||||
|
ssv <- compute_minmax_sv(Q)[["smallest_singular_value"]]
|
||||||
|
|
||||||
|
current_res <- data.frame(dim_n = n, dim_k = K, param_a = a, param_alpha=alphas[j], ssv =ssv)
|
||||||
|
results01 <- rbind(results01, current_res)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
```{r k = n^alpha plotting, rate = 1}
|
||||||
|
# plot the results
|
||||||
|
results01 |>
|
||||||
|
filter(param_a %in% c(2, 6, 12) & param_alpha <= 0.12) |>
|
||||||
|
mutate(param_a = as.factor(param_a),
|
||||||
|
param_alpha = as.factor(param_alpha)) |>
|
||||||
|
group_by(param_a, param_alpha) |>
|
||||||
|
ggplot(aes(dim_n, ssv, col=param_a, shape=param_alpha, interaction(param_a, param_alpha))) +
|
||||||
|
geom_point(size=1.5) +
|
||||||
|
geom_line() +
|
||||||
|
geom_line(aes(dim_n, sqrt(dim_n) / dim_k, shape=param_alpha), linetype = 2) +
|
||||||
|
geom_point(size=1.5) +
|
||||||
|
#geom_function(fun = function(x) {sqrt(x)}, colour="black") +
|
||||||
|
#scale_y_log10() +
|
||||||
|
theme_bw() +
|
||||||
|
labs(x=latex2exp::TeX("$n$"),
|
||||||
|
y=latex2exp::TeX("Smallest singular value of $Q$"),
|
||||||
|
title=latex2exp::TeX("Smallest singular value of $Q$ with respect to $a$."),
|
||||||
|
subtitle = latex2exp::TeX(("Hyperparameter $k = n^{\\alpha}$. Black line is $\\sqrt{n}$, and $X \\sim Exp(1)$")),
|
||||||
|
colour=latex2exp::TeX("$a$"),
|
||||||
|
shape=latex2exp::TeX("$\\alpha$"))
|
||||||
|
```
|
||||||
|
|
||||||
|
```{r k = n^alpha data generation, rate = 3}
|
||||||
|
#| cache: true
|
||||||
|
#| echo: false
|
||||||
|
#| collapse: true
|
||||||
|
ns <- seq(100, 10000, 100)
|
||||||
|
as <- seq(0, 20, 2)
|
||||||
|
alphas <- seq(0.1, 0.5, 0.1)
|
||||||
|
|
||||||
|
set.seed(100)
|
||||||
|
results02 <- data.frame(dim_n = integer(),
|
||||||
|
dim_k = integer(),
|
||||||
|
param_a = double(),
|
||||||
|
param_alpha = double(),
|
||||||
|
ssv = double())
|
||||||
|
for (a in as) {
|
||||||
|
for (i in 1:length(ns)) {
|
||||||
|
for (j in 1:length(alphas)) {
|
||||||
|
n <- ns[i]
|
||||||
|
K <- floor(n^alphas[j])
|
||||||
|
if (!K > 0) next # skip if K is equal to zero
|
||||||
|
# use the default seed 1L
|
||||||
|
Q <- compute_matrix(seed=1L,
|
||||||
|
a= a,
|
||||||
|
n = n,
|
||||||
|
K = K,
|
||||||
|
sample_X_fn = function(n) {matrix(rexp(n, rate=3), ncol = 1L)},
|
||||||
|
fv = function(x) {dnorm(x, mean=0, sd=1)},
|
||||||
|
Fv = function(x) {pnorm(x, mean=0, sd=1)},
|
||||||
|
guard = 1e-12)
|
||||||
|
|
||||||
|
ssv <- compute_minmax_sv(Q)[["smallest_singular_value"]]
|
||||||
|
|
||||||
|
current_res <- data.frame(dim_n = n, dim_k = K, param_a = a, param_alpha=alphas[j], ssv =ssv)
|
||||||
|
results02 <- rbind(results02, current_res)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
```{r k = n^alpha plotting, rate = 3}
|
||||||
|
results02 |>
|
||||||
|
filter(param_a %in% c(0, 10, 20) & param_alpha < 0.12) |>
|
||||||
|
mutate(param_a = as.factor(param_a),
|
||||||
|
param_alpha = as.factor(param_alpha)) |>
|
||||||
|
group_by(param_a, param_alpha) |>
|
||||||
|
ggplot(aes(dim_n, ssv, col=param_a, shape=param_alpha, interaction(param_a, param_alpha))) +
|
||||||
|
geom_point(size=1.5) +
|
||||||
|
geom_line() +
|
||||||
|
geom_line(aes(dim_n, sqrt(dim_n) / dim_k, shape=param_alpha), linetype = 2) +
|
||||||
|
geom_point(size=1.5) +
|
||||||
|
#geom_function(fun = function(x) {sqrt(x)}, colour="black") +
|
||||||
|
#scale_y_log10() +
|
||||||
|
theme_bw() +
|
||||||
|
labs(x=latex2exp::TeX("$n$"),
|
||||||
|
y=latex2exp::TeX("Smallest singular value of $Q$"),
|
||||||
|
title=latex2exp::TeX("Smallest singular value of $Q$ with respect to $a$."),
|
||||||
|
subtitle = latex2exp::TeX(("Hyperparameter $k = n^{\\alpha}$. Black line is $\\sqrt{n}$, and $X \\sim Exp(3))$")),
|
||||||
|
colour=latex2exp::TeX("$a$"),
|
||||||
|
shape=latex2exp::TeX("$\\alpha$"))
|
||||||
|
```
|
||||||
|
|
||||||
|
For $a = 0$ the smallest singular value is very close to zero.
|
||||||
|
|
||||||
|
```{r k = n^alpha data generation, rate = 5}
|
||||||
|
#| cache: true
|
||||||
|
#| echo: false
|
||||||
|
#| collapse: true
|
||||||
|
ns <- seq(100, 10000, 100)
|
||||||
|
as <- seq(0, 20, 2)
|
||||||
|
alphas <- seq(0.1, 0.5, 0.1)
|
||||||
|
|
||||||
|
set.seed(100)
|
||||||
|
results03 <- data.frame(dim_n = integer(),
|
||||||
|
dim_k = integer(),
|
||||||
|
param_a = double(),
|
||||||
|
param_alpha = double(),
|
||||||
|
ssv = double())
|
||||||
|
for (a in as) {
|
||||||
|
for (i in 1:length(ns)) {
|
||||||
|
for (j in 1:length(alphas)) {
|
||||||
|
n <- ns[i]
|
||||||
|
K <- floor(n^alphas[j])
|
||||||
|
if (!K > 0) next # skip if K is equal to zero
|
||||||
|
# use the default seed 1L
|
||||||
|
Q <- compute_matrix(seed=1L,
|
||||||
|
a= a,
|
||||||
|
n = n,
|
||||||
|
K = K,
|
||||||
|
sample_X_fn = function(n) {matrix(rexp(n, rate=5), ncol = 1L)},
|
||||||
|
fv = function(x) {dnorm(x, mean=0, sd=1)},
|
||||||
|
Fv = function(x) {pnorm(x, mean=0, sd=1)},
|
||||||
|
guard = 1e-12)
|
||||||
|
|
||||||
|
ssv <- compute_minmax_sv(Q)[["smallest_singular_value"]]
|
||||||
|
|
||||||
|
current_res <- data.frame(dim_n = n, dim_k = K, param_a = a, param_alpha=alphas[j], ssv =ssv)
|
||||||
|
results03 <- rbind(results03, current_res)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
```{r k = n^alpha plotting, rate = 5}
|
||||||
|
results03 |>
|
||||||
|
filter(param_a %in% c(0, 10, 20)) |>
|
||||||
|
mutate(param_a = as.factor(param_a),
|
||||||
|
param_alpha = as.factor(param_alpha)) |>
|
||||||
|
group_by(param_a, param_alpha) |>
|
||||||
|
ggplot(aes(dim_n, ssv, col=param_a, shape=param_alpha, interaction(param_a, param_alpha))) +
|
||||||
|
geom_point(size=1.5) +
|
||||||
|
geom_line() +
|
||||||
|
geom_line(aes(dim_n, sqrt(dim_n) / dim_k, shape=param_alpha), linetype = 2) +
|
||||||
|
geom_point(size=1.5) +
|
||||||
|
#geom_function(fun = function(x) {sqrt(x)}, colour="black") +
|
||||||
|
#scale_y_log10() +
|
||||||
|
theme_bw() +
|
||||||
|
labs(x=latex2exp::TeX("$n$"),
|
||||||
|
y=latex2exp::TeX("Smallest singular value of $Q$"),
|
||||||
|
title=latex2exp::TeX("Smallest singular value of $Q$ with respect to $a$."),
|
||||||
|
subtitle = latex2exp::TeX(("Hyperparameter $k = n^{\\alpha}$. Black line is $\\sqrt{n}$, and $X \\sim Exp(5))$")),
|
||||||
|
colour=latex2exp::TeX("$a$"),
|
||||||
|
shape=latex2exp::TeX("$\\alpha$"))
|
||||||
|
```
|
||||||
|
|
||||||
|
Why is here a perfect match for $\alpha = 0.1$ and $a = 20$ to the square function? The difference is of the order of $10^{-11}$!
|
||||||
|
|
||||||
|
## Uniform distribution
|
||||||
|
|
||||||
|
```{r k = n^alpha data generation, U[0,1]}
|
||||||
|
#| cache: true
|
||||||
|
#| echo: false
|
||||||
|
#| collapse: true
|
||||||
|
ns <- seq(100, 10000, 100)
|
||||||
|
as <- seq(0, 20, 2)
|
||||||
|
alphas <- seq(0.1, 0.5, 0.1)
|
||||||
|
|
||||||
|
set.seed(100)
|
||||||
|
results04 <- data.frame(dim_n = integer(),
|
||||||
|
dim_k = integer(),
|
||||||
|
param_a = double(),
|
||||||
|
param_alpha = double(),
|
||||||
|
ssv = double())
|
||||||
|
for (a in as) {
|
||||||
|
for (i in 1:length(ns)) {
|
||||||
|
for (j in 1:length(alphas)) {
|
||||||
|
n <- ns[i]
|
||||||
|
K <- floor(n^alphas[j])
|
||||||
|
if (!K > 0) next # skip if K is equal to zero
|
||||||
|
# use the default seed 1L
|
||||||
|
Q <- compute_matrix(seed=1L,
|
||||||
|
a= a,
|
||||||
|
n = n,
|
||||||
|
K = K,
|
||||||
|
sample_X_fn = function(n) {matrix(runif(n), ncol = 1L)},
|
||||||
|
fv = function(x) {dnorm(x, mean=0, sd=1)},
|
||||||
|
Fv = function(x) {pnorm(x, mean=0, sd=1)},
|
||||||
|
guard = 1e-12)
|
||||||
|
|
||||||
|
ssv <- compute_minmax_sv(Q)[["smallest_singular_value"]]
|
||||||
|
|
||||||
|
current_res <- data.frame(dim_n = n, dim_k = K, param_a = a, param_alpha=alphas[j], ssv =ssv)
|
||||||
|
results04 <- rbind(results04, current_res)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
```{r k = n^alpha plotting, U[0,1]}
|
||||||
|
results04 |>
|
||||||
|
filter(param_a %in% c(2, 10, 20) & param_alpha < 0.22 & param_alpha > 0.12) |>
|
||||||
|
mutate(param_a = as.factor(param_a),
|
||||||
|
param_alpha = as.factor(param_alpha)) |>
|
||||||
|
group_by(param_a, param_alpha) |>
|
||||||
|
ggplot(aes(dim_n, ssv, col=param_a, shape=param_alpha, interaction(param_a, param_alpha))) +
|
||||||
|
geom_point(size=1.5) +
|
||||||
|
geom_line() +
|
||||||
|
geom_line(aes(dim_n, sqrt(dim_n) / dim_k, shape=param_alpha), linetype = 2) +
|
||||||
|
geom_point(size=1.5) +
|
||||||
|
#geom_function(fun = function(x) {sqrt(x)}, colour="black") +
|
||||||
|
#scale_y_log10() +
|
||||||
|
theme_bw() +
|
||||||
|
labs(x=latex2exp::TeX("$n$"),
|
||||||
|
y=latex2exp::TeX("Smallest singular value of $Q$"),
|
||||||
|
title=latex2exp::TeX("Smallest singular value of $Q$ with respect to $a$."),
|
||||||
|
subtitle = latex2exp::TeX(("Hyperparameter $k = n^{\\alpha}$. Black line is $\\sqrt{n}$, and $X \\sim U[0,1] $")),
|
||||||
|
colour=latex2exp::TeX("$a$"),
|
||||||
|
shape=latex2exp::TeX("$\\alpha$"))
|
||||||
|
```
|
||||||
|
|
||||||
|
Here we have the same effect for $\alpha = 0.1$ and $a = 20$.
|
||||||
|
|
||||||
|
```{r k = n^alpha data generation, U[0,2]}
|
||||||
|
#| cache: true
|
||||||
|
#| echo: false
|
||||||
|
#| collapse: true
|
||||||
|
ns <- seq(100, 10000, 100)
|
||||||
|
as <- seq(0, 20, 2)
|
||||||
|
alphas <- seq(0.1, 0.5, 0.1)
|
||||||
|
|
||||||
|
set.seed(100)
|
||||||
|
results05 <- data.frame(dim_n = integer(),
|
||||||
|
dim_k = integer(),
|
||||||
|
param_a = double(),
|
||||||
|
param_alpha = double(),
|
||||||
|
ssv = double())
|
||||||
|
for (a in as) {
|
||||||
|
for (i in 1:length(ns)) {
|
||||||
|
for (j in 1:length(alphas)) {
|
||||||
|
n <- ns[i]
|
||||||
|
K <- floor(n^alphas[j])
|
||||||
|
if (!K > 0) next # skip if K is equal to zero
|
||||||
|
# use the default seed 1L
|
||||||
|
Q <- compute_matrix(seed=1L,
|
||||||
|
a= a,
|
||||||
|
n = n,
|
||||||
|
K = K,
|
||||||
|
sample_X_fn = function(n) {matrix(runif(n, min = 0, max=2), ncol = 1L)},
|
||||||
|
fv = function(x) {dnorm(x, mean=0, sd=1)},
|
||||||
|
Fv = function(x) {pnorm(x, mean=0, sd=1)},
|
||||||
|
guard = 1e-12)
|
||||||
|
|
||||||
|
ssv <- compute_minmax_sv(Q)[["smallest_singular_value"]]
|
||||||
|
|
||||||
|
current_res <- data.frame(dim_n = n, dim_k = K, param_a = a, param_alpha=alphas[j], ssv =ssv)
|
||||||
|
results05 <- rbind(results05, current_res)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
```{r k = n^alpha plotting, U[0,2]}
|
||||||
|
results05 |>
|
||||||
|
filter(param_a %in% c(0, 10, 20)) |>
|
||||||
|
mutate(param_a = as.factor(param_a),
|
||||||
|
param_alpha = as.factor(param_alpha)) |>
|
||||||
|
group_by(param_a, param_alpha) |>
|
||||||
|
ggplot(aes(dim_n, ssv, col=param_a, shape=param_alpha, interaction(param_a, param_alpha))) +
|
||||||
|
geom_point(size=1.5) +
|
||||||
|
geom_line() +
|
||||||
|
geom_line(aes(dim_n, sqrt(dim_n) / dim_k, shape=param_alpha), linetype = 2) +
|
||||||
|
geom_point(size=1.5) +
|
||||||
|
#geom_function(fun = function(x) {sqrt(x)}, colour="black") +
|
||||||
|
#scale_y_log10() +
|
||||||
|
theme_bw() +
|
||||||
|
labs(x=latex2exp::TeX("$n$"),
|
||||||
|
y=latex2exp::TeX("Smallest singular value of $Q$"),
|
||||||
|
title=latex2exp::TeX("Smallest singular value of $Q$ with respect to $a$."),
|
||||||
|
subtitle = latex2exp::TeX(("Hyperparameter $k = n^{\\alpha}$. Black line is $\\sqrt{n}$, and $X \\sim U[0,2] $")),
|
||||||
|
colour=latex2exp::TeX("$a$"),
|
||||||
|
shape=latex2exp::TeX("$\\alpha$"))
|
||||||
|
```
|
||||||
|
|
||||||
|
```{r k = n^alpha data generation, N(0,1)}
|
||||||
|
#| cache: true
|
||||||
|
#| echo: false
|
||||||
|
#| collapse: true
|
||||||
|
ns <- seq(100, 10000, 100)
|
||||||
|
as <- seq(0, 20, 2)
|
||||||
|
alphas <- seq(0.1, 0.5, 0.1)
|
||||||
|
|
||||||
|
set.seed(100)
|
||||||
|
results06 <- data.frame(dim_n = integer(),
|
||||||
|
dim_k = integer(),
|
||||||
|
param_a = double(),
|
||||||
|
param_alpha = double(),
|
||||||
|
ssv = double())
|
||||||
|
for (a in as) {
|
||||||
|
for (i in 1:length(ns)) {
|
||||||
|
for (j in 1:length(alphas)) {
|
||||||
|
n <- ns[i]
|
||||||
|
# HERE WE USE THE CEILING AND NOT FLOOR!
|
||||||
|
K <- ceiling(n^alphas[j])
|
||||||
|
if (!K > 0) next # skip if K is equal to zero
|
||||||
|
# use the default seed 1L
|
||||||
|
Q <- compute_matrix(seed=1L,
|
||||||
|
a= a,
|
||||||
|
n = n,
|
||||||
|
K = K,
|
||||||
|
sample_X_fn = function(n) {matrix(rnorm(n), ncol = 1L)},
|
||||||
|
fv = function(x) {dnorm(x, mean=0, sd=1)},
|
||||||
|
Fv = function(x) {pnorm(x, mean=0, sd=1)},
|
||||||
|
guard = 1e-12)
|
||||||
|
|
||||||
|
ssv <- compute_minmax_sv(Q)[["smallest_singular_value"]]
|
||||||
|
|
||||||
|
current_res <- data.frame(dim_n = n, dim_k = K, param_a = a, param_alpha=alphas[j], ssv =ssv)
|
||||||
|
results06 <- rbind(results06, current_res)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
```{r k = n^alpha plotting, U[0,2]}
|
||||||
|
results06 |>
|
||||||
|
filter(param_a %in% c(0, 10, 20)) |>
|
||||||
|
mutate(param_a = as.factor(param_a),
|
||||||
|
param_alpha = as.factor(param_alpha)) |>
|
||||||
|
group_by(param_a, param_alpha) |>
|
||||||
|
ggplot(aes(dim_n, ssv * dim_k, col=param_a, shape=param_alpha, interaction(param_a, param_alpha))) +
|
||||||
|
geom_point(size=1.5) +
|
||||||
|
geom_line() +
|
||||||
|
geom_line(aes(dim_n, sqrt(dim_n) / dim_k, shape=param_alpha), linetype = 2) +
|
||||||
|
geom_point(size=1.5) +
|
||||||
|
#geom_function(fun = function(x) {x^(0.5)}, colour="black") +
|
||||||
|
#scale_y_log10() +
|
||||||
|
theme_bw() +
|
||||||
|
labs(x=latex2exp::TeX("$n$"),
|
||||||
|
y=latex2exp::TeX("Smallest singular value of $Q$"),
|
||||||
|
title=latex2exp::TeX("Smallest singular value of $Q$ with respect to $a$."),
|
||||||
|
subtitle = latex2exp::TeX(("Hyperparameter $k = n^{\\alpha}$. Black line is $\\sqrt{n}$, and $X \\sim N(0,1) $, use ceil function instead of floor for rounding.")),
|
||||||
|
colour=latex2exp::TeX("$a$"),
|
||||||
|
shape=latex2exp::TeX("$\\alpha$"))
|
||||||
|
```
|
||||||
|
|
||||||
|
```{r k = n^alpha plotting, U[0,2]}
|
||||||
|
results06 |>
|
||||||
|
filter(param_a %in% c(0, 10, 20)) |>
|
||||||
|
mutate(param_a = as.factor(param_a),
|
||||||
|
param_alpha = as.factor(param_alpha)) |>
|
||||||
|
group_by(param_a, param_alpha) |>
|
||||||
|
ggplot(aes(dim_n, ssv / sqrt(dim_n) * dim_k, col=param_a, shape=param_alpha, interaction(param_a, param_alpha))) +
|
||||||
|
geom_point(size=1.5) +
|
||||||
|
geom_line() +
|
||||||
|
# geom_function(fun = function(x) {x^(0.5)}, colour="black") +
|
||||||
|
#scale_y_log10() +
|
||||||
|
theme_bw() +
|
||||||
|
labs(x=latex2exp::TeX("$n$"),
|
||||||
|
y=latex2exp::TeX("Smallest singular value of $Q$ / sqrt(n)"),
|
||||||
|
title=latex2exp::TeX("Smallest singular value of $Q$ with respect to $a$."),
|
||||||
|
subtitle = latex2exp::TeX(("Hyperparameter $k = n^{\\alpha}$. Black line is $\\sqrt{n}$, and $X \\sim N(0,1) $")),
|
||||||
|
colour=latex2exp::TeX("$a$"),
|
||||||
|
shape=latex2exp::TeX("$\\alpha$"))
|
||||||
|
```
|
||||||
|
|
||||||
|
```{r}
|
||||||
|
results <- list(results01, results02, results03, results04, results05, results06)
|
||||||
|
save(results, file="results.RData")
|
||||||
|
```
|
||||||
|
|
||||||
|
## Two dimensional example
|
||||||
|
|
||||||
|
```{r k = n^alpha data generation with two dimensions, rate = 1}
|
||||||
|
#| cache: true
|
||||||
|
#| echo: false
|
||||||
|
#| collapse: true
|
||||||
|
ns <- seq(100, 1000, 100)
|
||||||
|
a <- c(1, 1) / sqrt(2)
|
||||||
|
a_norms <- seq(0, 20, 2)
|
||||||
|
alphas <- seq(0.1, 0.5, 0.1)
|
||||||
|
|
||||||
|
set.seed(100)
|
||||||
|
results07 <- data.frame(dim_n = integer(),
|
||||||
|
dim_k = integer(),
|
||||||
|
param_a = double(),
|
||||||
|
param_alpha = double(),
|
||||||
|
ssv = double())
|
||||||
|
for (a_norm in a_norms) {
|
||||||
|
for (i in 1:length(ns)) {
|
||||||
|
for (j in 1:length(alphas)) {
|
||||||
|
n <- ns[i]
|
||||||
|
K <- floor(n^alphas[j])
|
||||||
|
if (!K > 0) next # skip if K is equal to zero
|
||||||
|
# use the default seed 1L
|
||||||
|
Q <- compute_matrix(seed=1L,
|
||||||
|
a= a_norm * a,
|
||||||
|
n = n,
|
||||||
|
K = K,
|
||||||
|
sample_X_fn = function(n) {matrix(rexp(2 * n), ncol = 2L)},
|
||||||
|
fv = function(x) {dnorm(x, mean=0, sd=1)},
|
||||||
|
Fv = function(x) {pnorm(x, mean=0, sd=1)},
|
||||||
|
guard = 1e-12)
|
||||||
|
|
||||||
|
ssv <- compute_minmax_sv(Q)[["smallest_singular_value"]]
|
||||||
|
|
||||||
|
current_res <- data.frame(dim_n = n, dim_k = K, param_a = a_norm, param_alpha=alphas[j], ssv =ssv)
|
||||||
|
results07 <- rbind(results07, current_res)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
```{r k = n^alpha plotting, rate = 1}
|
||||||
|
# plot the results
|
||||||
|
results07 |>
|
||||||
|
filter(param_a %in% c(10, 20) & param_alpha < 0.12) |>
|
||||||
|
mutate(param_a = as.factor(param_a),
|
||||||
|
param_alpha = as.factor(param_alpha)) |>
|
||||||
|
group_by(param_a, param_alpha) |>
|
||||||
|
ggplot(aes(dim_n, ssv, col=param_a, shape=param_alpha, interaction(param_a, param_alpha))) +
|
||||||
|
geom_point(size=1.5) +
|
||||||
|
geom_line() +
|
||||||
|
geom_line(aes(dim_n, sqrt(dim_n) / dim_k, shape=param_alpha), linetype = 2) +
|
||||||
|
geom_point(size=1.5) +
|
||||||
|
#geom_function(fun = function(x) {sqrt(x)}, colour="black") +
|
||||||
|
#scale_y_log10() +
|
||||||
|
theme_bw() +
|
||||||
|
labs(x=latex2exp::TeX("$n$"),
|
||||||
|
y=latex2exp::TeX("Smallest singular value of $Q$"),
|
||||||
|
title=latex2exp::TeX("Smallest singular value of $Q$ with respect to $a$."),
|
||||||
|
subtitle = latex2exp::TeX(("Hyperparameter $k = n^{\\alpha}$. Black line is $\\sqrt{n}$, and $X \\sim Exp(1)$")),
|
||||||
|
colour=latex2exp::TeX("$a$"),
|
||||||
|
shape=latex2exp::TeX("$\\alpha$"))
|
||||||
|
```
|
||||||
364
scripts/plot_sv.R
Normal file
364
scripts/plot_sv.R
Normal file
@@ -0,0 +1,364 @@
|
|||||||
|
# Load function ----------------------------------------------------------------
|
||||||
|
source(here::here("R", "singular_values.R"))
|
||||||
|
source(here::here("R", "graphon_distribution.R"))
|
||||||
|
source(here::here("R", "singular_value_plot.R"))
|
||||||
|
|
||||||
|
# https://stackoverflow.com/a/5790430
|
||||||
|
resetPar <- function() {
|
||||||
|
dev.new()
|
||||||
|
op <- par(no.readonly = TRUE)
|
||||||
|
dev.off()
|
||||||
|
op
|
||||||
|
}
|
||||||
|
|
||||||
|
calc_conv_rate <- function(x,y) {
|
||||||
|
if (!is.numeric(x) || length(x) == 0) {
|
||||||
|
stop("`x` must be a non‑empty numeric vector.")
|
||||||
|
}
|
||||||
|
if (!is.numeric(y) || length(y) == 0) {
|
||||||
|
stop("`y` must be a non‑empty numeric vector.")
|
||||||
|
}
|
||||||
|
if (length(x) != length(y)) {
|
||||||
|
stop("`x` and `y` must have the same length.")
|
||||||
|
}
|
||||||
|
|
||||||
|
df <- data.frame("x" = x, "y" = y)
|
||||||
|
lm_model <- lm(log(y) ~ log(x), data=df)
|
||||||
|
C <- exp(coefficients(lm_model)[[1]])
|
||||||
|
alpha <- coefficients(lm_model)[[2]]
|
||||||
|
|
||||||
|
df[, "y_pred"] <- C * df[, "x"]^alpha
|
||||||
|
df[, "residual"] <- df[, "y"] - df[, "y_pred"]
|
||||||
|
|
||||||
|
out <- list(
|
||||||
|
"C" = C,
|
||||||
|
"alpha" = alpha,
|
||||||
|
"obs" = df
|
||||||
|
)
|
||||||
|
out
|
||||||
|
}
|
||||||
|
|
||||||
|
calc_exp_conv_rate <- function(x,y) {
|
||||||
|
if (!is.numeric(x) || length(x) == 0) {
|
||||||
|
stop("`x` must be a non‑empty numeric vector.")
|
||||||
|
}
|
||||||
|
if (!is.numeric(y) || length(y) == 0) {
|
||||||
|
stop("`y` must be a non‑empty numeric vector.")
|
||||||
|
}
|
||||||
|
if (length(x) != length(y)) {
|
||||||
|
stop("`x` and `y` must have the same length.")
|
||||||
|
}
|
||||||
|
|
||||||
|
df <- data.frame("x" = x, "y" = y)
|
||||||
|
fit <- nls(y ~ C * exp(r * x^m),
|
||||||
|
start = list(C = min(y), r= -0.5, m = 1))
|
||||||
|
}
|
||||||
|
|
||||||
|
# Nearly match with sample function --------------------------------------------
|
||||||
|
# v ~ N(0,1) and X ~ discrete Uniform on [1:n]
|
||||||
|
out <- smallest_sv_sequence(
|
||||||
|
a = c(0.5),
|
||||||
|
n = 9,
|
||||||
|
maxK= 3,
|
||||||
|
sampler_fn = sample,
|
||||||
|
guard=1e-12,
|
||||||
|
plot=TRUE,
|
||||||
|
log_plot = TRUE,
|
||||||
|
curve_expr = quote(1 / x^0.545)
|
||||||
|
)
|
||||||
|
conv_rate <- calc_conv_rate(out$K, out$sv)
|
||||||
|
|
||||||
|
# Normally distributed X ~ N(0,1) and v ~ N(0,1) -------------------------------
|
||||||
|
out <- smallest_sv_sequence(
|
||||||
|
a = c(0.5),
|
||||||
|
n = 1200,
|
||||||
|
maxK = 20,
|
||||||
|
sampler_fn =function(n) matrix(rnorm(n), ncol = 1L),
|
||||||
|
guard=1e-12,
|
||||||
|
plot=TRUE,
|
||||||
|
log_plot = TRUE,
|
||||||
|
curve_expr = quote(1.5 * exp(-0.95 * x^1.34))
|
||||||
|
#curve_expr = quote( 1/exp(x^1.32))
|
||||||
|
)
|
||||||
|
# convergence rate does not work here, probably because the underlying model
|
||||||
|
# does not work well
|
||||||
|
conv_rate <- calc_conv_rate(out$K[1:20], out$sv[1:20])
|
||||||
|
|
||||||
|
# Uniform distributed X ~ U[0,1] and v ~ N(0,1) --------------------------------
|
||||||
|
out <- smallest_sv_sequence(
|
||||||
|
a = c(0.5),
|
||||||
|
n = 400,
|
||||||
|
maxK = 20,
|
||||||
|
sampler_fn =function(n) matrix(runif(n), ncol = 1L),
|
||||||
|
guard=1e-12,
|
||||||
|
plot=TRUE,
|
||||||
|
log_plot = TRUE,
|
||||||
|
curve_expr = quote(1* exp(-1.1 * x^1.5))
|
||||||
|
)
|
||||||
|
# here the optimal fit does not work too, probably other model
|
||||||
|
calc_conv_rate(out$K[1:9], out$sv[1:9])
|
||||||
|
|
||||||
|
# Compare of parameters of Normal distribution ----------------------------------
|
||||||
|
#
|
||||||
|
out_sd0_5 <- smallest_sv_sequence(
|
||||||
|
a = c(0.5),
|
||||||
|
n = 400,
|
||||||
|
maxK = 20,
|
||||||
|
sampler_fn =function(n) matrix(rnorm(n), ncol = 1L),
|
||||||
|
guard=1e-12,
|
||||||
|
plot=TRUE,
|
||||||
|
log_plot = TRUE,
|
||||||
|
fv = function(x) {dnorm(x, mean=0, sd=0.5)},
|
||||||
|
Fv = function(x) {pnorm(x, mean=0, sd=0.5)},
|
||||||
|
main_title="Smallest SV of v~ N(0,0.5^2) distribution"
|
||||||
|
)
|
||||||
|
|
||||||
|
out_sd1 <- smallest_sv_sequence(
|
||||||
|
a = c(0.5),
|
||||||
|
n = 400,
|
||||||
|
maxK = 20,
|
||||||
|
sampler_fn =function(n) matrix(rnorm(n), ncol = 1L),
|
||||||
|
guard=1e-12,
|
||||||
|
plot=TRUE,
|
||||||
|
log_plot = TRUE,
|
||||||
|
fv = function(x) {dnorm(x, mean=0, sd=1)},
|
||||||
|
Fv = function(x) {pnorm(x, mean=0, sd=1)},
|
||||||
|
main_title="Smallest SV of v~ N(0,1) distribution"
|
||||||
|
)
|
||||||
|
|
||||||
|
out_sd2 <- smallest_sv_sequence(
|
||||||
|
a = c(0.5),
|
||||||
|
n = 400,
|
||||||
|
maxK = 20,
|
||||||
|
sampler_fn =function(n) matrix(rnorm(n), ncol = 1L),
|
||||||
|
guard=1e-12,
|
||||||
|
plot=TRUE,
|
||||||
|
log_plot = TRUE,
|
||||||
|
fv = function(x) {dnorm(x, mean=0, sd=2)},
|
||||||
|
Fv = function(x) {pnorm(x, mean=0, sd=2)},
|
||||||
|
main_title="Smallest SV of v~ N(0,2^2) distribution"
|
||||||
|
)
|
||||||
|
|
||||||
|
out_sd4 <- smallest_sv_sequence(
|
||||||
|
a = c(0.5),
|
||||||
|
n = 400,
|
||||||
|
maxK = 20,
|
||||||
|
sampler_fn =function(n) matrix(rnorm(n), ncol = 1L),
|
||||||
|
guard=1e-12,
|
||||||
|
plot=TRUE,
|
||||||
|
log_plot = TRUE,
|
||||||
|
fv = function(x) {dnorm(x, mean=0, sd=4)},
|
||||||
|
Fv = function(x) {pnorm(x, mean=0, sd=4)},
|
||||||
|
main_title="Smallest SV of v~ N(0,4^2) distribution"
|
||||||
|
)
|
||||||
|
|
||||||
|
par(mar = c(5, 4, 4, 8))
|
||||||
|
plot(out_sd0_5$K, out_sd0_5$sv,
|
||||||
|
type = "b",
|
||||||
|
pch = 19,
|
||||||
|
col = "#D3BA68FF",
|
||||||
|
xlab = "K subdivisions",
|
||||||
|
ylab = "Smallest singular value of Q",
|
||||||
|
main="smallest SV for different variances of a normal distribution",
|
||||||
|
sub = "n = 400, a = 0.5",
|
||||||
|
log="y")
|
||||||
|
lines(out_sd1$K, out_sd1$sv,
|
||||||
|
type="b", pch=19, col="#D5695DFF", add=TRUE)
|
||||||
|
lines(out_sd2$K, out_sd2$sv,
|
||||||
|
type = "b", pch=19, col="#5D8CA8FF", add=TRUE)
|
||||||
|
lines(out_sd4$K, out_sd4$sv,
|
||||||
|
type = "b", pch=19, col="#65A479FF", add=TRUE)
|
||||||
|
par(xpd = TRUE)
|
||||||
|
legend("topright",
|
||||||
|
inset=c(-0.2,0),
|
||||||
|
legend=c("sd=0.5", "sd=1", "sd=2", "sd=4"),
|
||||||
|
col=c("#D3BA68FF", "#D5695DFF","#5D8CA8FF", "#65A479FF" ),
|
||||||
|
title="Legend",
|
||||||
|
pch = 16,
|
||||||
|
bty = "n")
|
||||||
|
|
||||||
|
# Break phenomena of Exp-distribution ------------------------------------------
|
||||||
|
out_exp <- smallest_sv_sequence(
|
||||||
|
a = c(0.5),
|
||||||
|
n = 400,
|
||||||
|
maxK = 20,
|
||||||
|
sampler_fn =function(n) matrix(rnorm(n), ncol = 1L),
|
||||||
|
guard=1e-12,
|
||||||
|
plot=TRUE,
|
||||||
|
log_plot = TRUE,
|
||||||
|
fv = function(x) {dexp(x, rate=1)},
|
||||||
|
Fv = function(x) {pexp(x, rate=1)},
|
||||||
|
main_title="Smallest SV of v~ Exp(1) distribution"
|
||||||
|
)
|
||||||
|
|
||||||
|
par(resetPar)
|
||||||
|
plot(out_exp$K, out_exp$sv,
|
||||||
|
log="y",
|
||||||
|
xlab="K subdivsions",
|
||||||
|
ylab="Smallest singular value of Q",
|
||||||
|
col="steelblue",
|
||||||
|
type="b",
|
||||||
|
main="Smallest singular value for v ~ Exp(1)",
|
||||||
|
sub="a = 0.5, n = 400")
|
||||||
|
arrows(8, 1e-5, 6.5, 1e-7, angle=20, lty = 1, lwd=2)
|
||||||
|
text(8.5, 1e-5, "Break only seen for exp-distribution", pos=4)
|
||||||
|
|
||||||
|
## Observations of the break point depending the rate --------------------------
|
||||||
|
#
|
||||||
|
# Note: this also depends on the the parameter n of samples
|
||||||
|
out_exp_1 <- smallest_sv_sequence(
|
||||||
|
a = c(0.5),
|
||||||
|
n = 400,
|
||||||
|
maxK = 80,
|
||||||
|
sampler_fn =function(n) matrix(rnorm(n), ncol = 1L),
|
||||||
|
guard=1e-12,
|
||||||
|
plot=FALSE,
|
||||||
|
fv = function(x) {dexp(x, rate=1)},
|
||||||
|
Fv = function(x) {pexp(x, rate=1)}
|
||||||
|
)
|
||||||
|
|
||||||
|
out_exp_2 <- smallest_sv_sequence(
|
||||||
|
a = c(0.5),
|
||||||
|
n = 400,
|
||||||
|
maxK = 80,
|
||||||
|
sampler_fn =function(n) matrix(rnorm(n), ncol = 1L),
|
||||||
|
guard=1e-12,
|
||||||
|
plot=FALSE,
|
||||||
|
fv = function(x) {dexp(x, rate=2)},
|
||||||
|
Fv = function(x) {pexp(x, rate=2)}
|
||||||
|
)
|
||||||
|
|
||||||
|
out_exp_3 <- smallest_sv_sequence(
|
||||||
|
a = c(0.5),
|
||||||
|
n = 400,
|
||||||
|
maxK = 80,
|
||||||
|
sampler_fn =function(n) matrix(rnorm(n), ncol = 1L),
|
||||||
|
guard=1e-12,
|
||||||
|
plot=FALSE,
|
||||||
|
fv = function(x) {dexp(x, rate=3)},
|
||||||
|
Fv = function(x) {pexp(x, rate=3)}
|
||||||
|
)
|
||||||
|
|
||||||
|
out_exp_4 <- smallest_sv_sequence(
|
||||||
|
a = c(0.5),
|
||||||
|
n = 400,
|
||||||
|
maxK = 80,
|
||||||
|
sampler_fn =function(n) matrix(rnorm(n), ncol = 1L),
|
||||||
|
guard=1e-12,
|
||||||
|
plot=FALSE,
|
||||||
|
fv = function(x) {dexp(x, rate=4)},
|
||||||
|
Fv = function(x) {pexp(x, rate=4)}
|
||||||
|
)
|
||||||
|
|
||||||
|
par(mar = c(5, 4, 4, 8))
|
||||||
|
plot(out_exp_1$K, out_exp_1$sv,
|
||||||
|
type="b", col="#D3BA68FF", log="y",
|
||||||
|
main="Smallest SV of Q for different rates of Exp-distribution",
|
||||||
|
ylab="Smallest singular value of Q",
|
||||||
|
xlab="K subdivisions",
|
||||||
|
sub="a = 0.5, n = 400, depending also on n")
|
||||||
|
lines(out_exp_2$K, out_exp_2$sv, type="b", col="#D5695DFF")
|
||||||
|
lines(out_exp_3$K, out_exp_3$sv, type="b", col="#5D8CA8FF")
|
||||||
|
lines(out_exp_4$K, out_exp_4$sv, type="b", col="#65A479FF")
|
||||||
|
par(xpd=TRUE)
|
||||||
|
legend("topright",
|
||||||
|
inset=c(-0.2,0),
|
||||||
|
legend=c(expression(lambda == 1), expression(lambda == 2), expression(lambda == 3), expression(lambda == 4)),
|
||||||
|
col=c("#D3BA68FF", "#D5695DFF","#5D8CA8FF", "#65A479FF" ),
|
||||||
|
title="Rate",
|
||||||
|
pch = 16,
|
||||||
|
bty = "n")
|
||||||
|
|
||||||
|
|
||||||
|
# Use of the gamma distribution ------------------------------------------------
|
||||||
|
# Fix the parameters, such that the mean stays the same and the variance is
|
||||||
|
# changing.
|
||||||
|
# From the documentation
|
||||||
|
# Note that for smallish values of shape (and moderate scale) a large parts of
|
||||||
|
# the mass of the Gamma distribution is on values of x
|
||||||
|
# so near zero that they will be represented as zero in computer arithmetic.
|
||||||
|
# So rgamma may well return values which will be represented as zero.
|
||||||
|
# (This will also happen for very large values of scale since the actual generation is done for scale = 1.)
|
||||||
|
#
|
||||||
|
# Take E(X) = 5, so sigma = 5 / alpha, and with this we have
|
||||||
|
# Var(X) = sigma^2 * alpha = 25 / alpha.
|
||||||
|
# -> Increasing alpha yields lower variance
|
||||||
|
|
||||||
|
# alpha = 1
|
||||||
|
alpha <- 1.0
|
||||||
|
out_gamma_1 <- smallest_sv_sequence(
|
||||||
|
a = c(0.5),
|
||||||
|
n = 400,
|
||||||
|
maxK = 20,
|
||||||
|
sampler_fn =function(n) matrix(rnorm(n), ncol = 1L),
|
||||||
|
guard=1e-12,
|
||||||
|
plot=TRUE,
|
||||||
|
log_plot = TRUE,
|
||||||
|
fv = function(x) {dgamma(x, shape = alpha, rate = 5 / alpha)},
|
||||||
|
Fv = function(x) {pexp(x, rate=1)},
|
||||||
|
main_title="Smallest SV of v~ Gamma(1, 5) distribution"
|
||||||
|
)
|
||||||
|
|
||||||
|
# alpha = 2
|
||||||
|
alpha <- 2.0
|
||||||
|
out_gamma_2 <- smallest_sv_sequence(
|
||||||
|
a = c(0.5),
|
||||||
|
n = 400,
|
||||||
|
maxK = 20,
|
||||||
|
sampler_fn =function(n) matrix(rnorm(n), ncol = 1L),
|
||||||
|
guard=1e-12,
|
||||||
|
plot=TRUE,
|
||||||
|
log_plot = TRUE,
|
||||||
|
fv = function(x) {dgamma(x, shape = alpha, rate = 5 / alpha)},
|
||||||
|
Fv = function(x) {pexp(x, rate=1)},
|
||||||
|
main_title="Smallest SV of v~ Gamma(2, 2.5) distribution"
|
||||||
|
)
|
||||||
|
|
||||||
|
# alpha = 3
|
||||||
|
alpha <- 3.0
|
||||||
|
out_gamma_3 <- smallest_sv_sequence(
|
||||||
|
a = c(0.5),
|
||||||
|
n = 400,
|
||||||
|
maxK = 20,
|
||||||
|
sampler_fn =function(n) matrix(rnorm(n), ncol = 1L),
|
||||||
|
guard=1e-12,
|
||||||
|
plot=TRUE,
|
||||||
|
log_plot = TRUE,
|
||||||
|
fv = function(x) {dgamma(x, shape = alpha, rate = 5 / alpha)},
|
||||||
|
Fv = function(x) {pexp(x, rate=1)},
|
||||||
|
main_title="Smallest SV of v~ Gamma(3, 5/3) distribution"
|
||||||
|
)
|
||||||
|
|
||||||
|
# alpha = 4
|
||||||
|
alpha <- 4.0
|
||||||
|
out_gamma_4 <- smallest_sv_sequence(
|
||||||
|
a = c(0.5),
|
||||||
|
n = 400,
|
||||||
|
maxK = 20,
|
||||||
|
sampler_fn =function(n) matrix(rnorm(n), ncol = 1L),
|
||||||
|
guard=1e-12,
|
||||||
|
plot=TRUE,
|
||||||
|
log_plot = TRUE,
|
||||||
|
fv = function(x) {dgamma(x, shape = alpha, rate = 5 / alpha)},
|
||||||
|
Fv = function(x) {pexp(x, rate=1)},
|
||||||
|
main_title="Smallest SV of v~ Gamma(3, 5/3) distribution"
|
||||||
|
)
|
||||||
|
|
||||||
|
par(mar = c(5, 4, 4, 8))
|
||||||
|
plot(out_gamma_1$K, out_gamma_1$sv,
|
||||||
|
type="b", col="#D3BA68FF", log="y",
|
||||||
|
main="Smallest SV of Q for variance of the Gamma distribution",
|
||||||
|
ylab="Smallest singular value of Q",
|
||||||
|
xlab="K subdivisions",
|
||||||
|
sub="a = 0.5, n = 400")
|
||||||
|
lines(out_gamma_2$K, out_gamma_2$sv, type="b", col="#D5695DFF")
|
||||||
|
lines(out_gamma_3$K, out_gamma_3$sv, type="b", col="#5D8CA8FF")
|
||||||
|
lines(out_gamma_4$K, out_gamma_4$sv, type="b", col="#65A479FF")
|
||||||
|
par(xpd=TRUE)
|
||||||
|
legend("topright",
|
||||||
|
inset=c(-0.2,0),
|
||||||
|
legend=c(expression(alpha == 1), expression(alpha == 2), expression(alpha == 3), expression(alpha == 4)),
|
||||||
|
col=c("#D3BA68FF", "#D5695DFF","#5D8CA8FF", "#65A479FF" ),
|
||||||
|
title="Rate",
|
||||||
|
pch = 16,
|
||||||
|
bty = "n")
|
||||||
93
scripts/plot_sv_dependence_n.R
Normal file
93
scripts/plot_sv_dependence_n.R
Normal file
@@ -0,0 +1,93 @@
|
|||||||
|
# Load function ----------------------------------------------------------------
|
||||||
|
source(here::here("R", "singular_values.R"))
|
||||||
|
source(here::here("R", "graphon_distribution.R"))
|
||||||
|
source(here::here("R", "singular_value_plot.R"))
|
||||||
|
|
||||||
|
# https://stackoverflow.com/a/5790430
|
||||||
|
resetPar <- function() {
|
||||||
|
dev.new()
|
||||||
|
op <- par(no.readonly = TRUE)
|
||||||
|
dev.off()
|
||||||
|
op
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
# Compare of parameters of Normal distribution ----------------------------------
|
||||||
|
#
|
||||||
|
out_n400_sd05 <- smallest_sv_sequence(
|
||||||
|
a = c(0.5),
|
||||||
|
n = 400,
|
||||||
|
maxK = 20,
|
||||||
|
sampler_fn =function(n) matrix(rnorm(n), ncol = 1L),
|
||||||
|
guard=1e-12,
|
||||||
|
plot=TRUE,
|
||||||
|
log_plot = TRUE,
|
||||||
|
fv = function(x) {dnorm(x, mean=0, sd=0.5)},
|
||||||
|
Fv = function(x) {pnorm(x, mean=0, sd=0.5)},
|
||||||
|
main_title="Smallest SV of v~ N(0,0.5^2) distribution, n = 400"
|
||||||
|
)
|
||||||
|
|
||||||
|
out_n800_sd05 <- smallest_sv_sequence(
|
||||||
|
a = c(0.5),
|
||||||
|
n = 800,
|
||||||
|
maxK = 20,
|
||||||
|
sampler_fn =function(n) matrix(rnorm(n), ncol = 1L),
|
||||||
|
guard=1e-12,
|
||||||
|
plot=TRUE,
|
||||||
|
log_plot = TRUE,
|
||||||
|
fv = function(x) {dnorm(x, mean=0, sd=0.5)},
|
||||||
|
Fv = function(x) {pnorm(x, mean=0, sd=0.5)},
|
||||||
|
main_title="Smallest SV of v~ N(0,0.5^2) distribution, n=800"
|
||||||
|
)
|
||||||
|
|
||||||
|
out_n400_sd2 <- smallest_sv_sequence(
|
||||||
|
a = c(0.5),
|
||||||
|
n = 400,
|
||||||
|
maxK = 20,
|
||||||
|
sampler_fn =function(n) matrix(rnorm(n), ncol = 1L),
|
||||||
|
guard=1e-12,
|
||||||
|
plot=TRUE,
|
||||||
|
log_plot = TRUE,
|
||||||
|
fv = function(x) {dnorm(x, mean=0, sd=2)},
|
||||||
|
Fv = function(x) {pnorm(x, mean=0, sd=2)},
|
||||||
|
main_title="Smallest SV of v~ N(0,2^2) distribution, n=400"
|
||||||
|
)
|
||||||
|
|
||||||
|
out_n800_sd2 <- smallest_sv_sequence(
|
||||||
|
a = c(0.5),
|
||||||
|
n = 800,
|
||||||
|
maxK = 20,
|
||||||
|
sampler_fn =function(n) matrix(rnorm(n), ncol = 1L),
|
||||||
|
guard=1e-12,
|
||||||
|
plot=TRUE,
|
||||||
|
log_plot = TRUE,
|
||||||
|
fv = function(x) {dnorm(x, mean=0, sd=2)},
|
||||||
|
Fv = function(x) {pnorm(x, mean=0, sd=2)},
|
||||||
|
main_title="Smallest SV of v~ N(0,4^2) distribution,n = 800"
|
||||||
|
)
|
||||||
|
|
||||||
|
par(mar = c(5, 4, 4, 8))
|
||||||
|
plot(out_n400_sd05$K, out_n400_sd05$sv,
|
||||||
|
type = "b",
|
||||||
|
pch = 19,
|
||||||
|
col = "#D3BA68FF",
|
||||||
|
xlab = "K subdivisions",
|
||||||
|
ylab = "Smallest singular value of Q",
|
||||||
|
main="smallest SV for different variances of a normal distribution",
|
||||||
|
sub = "n = 400, a = 0.5",
|
||||||
|
log="y")
|
||||||
|
lines(out_n400_sd2$K, out_n400_sd2$sv,
|
||||||
|
type="b", pch=19, col="#D5695DFF", add=TRUE)
|
||||||
|
lines(out_n800_sd05$K, out_n800_sd05$sv,
|
||||||
|
type = "b", pch=19, col="#5D8CA8FF", add=TRUE)
|
||||||
|
lines(out_n800_sd2$K, out_n800_sd2$sv,
|
||||||
|
type = "b", pch=19, col="#65A479FF", add=TRUE)
|
||||||
|
par(xpd = TRUE)
|
||||||
|
legend("topright",
|
||||||
|
inset=c(-0.2,0),
|
||||||
|
legend=c("sd=0.5, n=400", "sd=0.5, n=800", "sd=2, n=400", "sd=2, n=800"),
|
||||||
|
col=c("#D3BA68FF", "#D5695DFF","#5D8CA8FF", "#65A479FF" ),
|
||||||
|
title="Legend",
|
||||||
|
pch = 16,
|
||||||
|
bty = "n")
|
||||||
315
scripts/plots_a_dependence.qmd
Normal file
315
scripts/plots_a_dependence.qmd
Normal file
@@ -0,0 +1,315 @@
|
|||||||
|
---
|
||||||
|
title: "plots of a dependence"
|
||||||
|
author: "Niclas"
|
||||||
|
format: html
|
||||||
|
editor: visual
|
||||||
|
execute:
|
||||||
|
echo: true
|
||||||
|
working-directory: ../
|
||||||
|
---
|
||||||
|
|
||||||
|
# Plots of the dimensions
|
||||||
|
|
||||||
|
## Setup
|
||||||
|
We consider the matrix $QQ^\top$ and look at the smallest eigenvalue, i.e. the
|
||||||
|
smallest non-zero singular value of $Q$.
|
||||||
|
|
||||||
|
The matrix $Q$ is given by
|
||||||
|
$$
|
||||||
|
Q_{ik} = \int_{\frac{k}{K}}^{\frac{k+1}{K}} p_a(u| X_i) \, du
|
||||||
|
$$
|
||||||
|
with
|
||||||
|
$$
|
||||||
|
p_a(u|X) = \frac{f_v(F_a^{-1}(u) - a^\top X)}{f_a(F_a^{-1}(u))}
|
||||||
|
$$
|
||||||
|
In this document we plot different the smallest eigenvalue in dependence of the
|
||||||
|
parameter $a$ with different "ratios" of the parameters $n$ and $k$.
|
||||||
|
|
||||||
|
```{r loading libraries}
|
||||||
|
#| cache: true
|
||||||
|
#| echo: false
|
||||||
|
#| collapse: true
|
||||||
|
# load local files
|
||||||
|
source(here::here("R", "singular_values.R"))
|
||||||
|
source(here::here("R", "graphon_distribution.R"))
|
||||||
|
source(here::here("R","singular_value_plot.R"))
|
||||||
|
|
||||||
|
# load libaries for data handling
|
||||||
|
library(ggplot2)
|
||||||
|
library(dplyr)
|
||||||
|
library(latex2exp)
|
||||||
|
|
||||||
|
```
|
||||||
|
|
||||||
|
|
||||||
|
## Hyperparameter with n vs. k = log(n)
|
||||||
|
```{r n / log(n) hyperparameters data generation}
|
||||||
|
#| cache: true
|
||||||
|
#| echo: false
|
||||||
|
#| collapse: true
|
||||||
|
ns <- seq(100, 1000, 100)
|
||||||
|
Ks <- floor(log(ns))
|
||||||
|
as <- seq(0, 20, 2)
|
||||||
|
|
||||||
|
set.seed(100)
|
||||||
|
results <- data.frame(dim_n = integer(),
|
||||||
|
dim_k = integer(),
|
||||||
|
param_a = double(),
|
||||||
|
ssv = double())
|
||||||
|
for (a in as) {
|
||||||
|
for (i in 1:length(ns)) {
|
||||||
|
n <- ns[i]
|
||||||
|
K <- Ks[i]
|
||||||
|
# use the default seed 1L
|
||||||
|
out <- smallest_sv_sequence(
|
||||||
|
a = a,
|
||||||
|
n = n,
|
||||||
|
maxK = K,
|
||||||
|
sampler_fn =function(n) matrix(rnorm(n), ncol = 1L),
|
||||||
|
guard=1e-12,
|
||||||
|
plot=FALSE,
|
||||||
|
fv = function(x) {dnorm(x, mean=0, sd=1)},
|
||||||
|
Fv = function(x) {pnorm(x, mean=0, sd=1)}
|
||||||
|
)
|
||||||
|
|
||||||
|
current_res <- data.frame(dim_n = rep(n, K), dim_k = out$K, param_a = rep(a, K), ssv = out$sv)
|
||||||
|
results <- rbind(results, current_res)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
```{r hyperparameter n vs log(n) plotting}
|
||||||
|
results |>
|
||||||
|
mutate(dim_n = as.factor(dim_n)) |>
|
||||||
|
group_by(dim_n) |>
|
||||||
|
filter(dim_k == max(dim_k) & param_a > 0) |>
|
||||||
|
ggplot(aes(param_a, ssv, col=dim_n)) +
|
||||||
|
geom_point(size=1.5) +
|
||||||
|
geom_line() +
|
||||||
|
#scale_y_log10() +
|
||||||
|
theme_bw() +
|
||||||
|
labs(x=latex2exp::TeX("$a$"),
|
||||||
|
y=latex2exp::TeX("Smallest singular value of $Q$"),
|
||||||
|
title=latex2exp::TeX("Smallest singular value of $Q$ with respect to $a$."),
|
||||||
|
subtitle = latex2exp::TeX(("Hyperparameter $k = \\lfloor\\log(n) \\rfloor$")),
|
||||||
|
colour=latex2exp::TeX("$n$"),
|
||||||
|
shape=latex2exp::TeX("$a$"))
|
||||||
|
```
|
||||||
|
Here we have relatively large values of the smallest singular value (ssv) of $Q$ since
|
||||||
|
the values for $k$ are relatively small. We have already observed, that with
|
||||||
|
larger $k$ the ssv shrinks rapidly towards zero. However the largest value for $k$
|
||||||
|
is at most $k = `r floor(log(1000))`$ for $n = 1000$.
|
||||||
|
We omitted the value $a = 0$, as this results in a ssv of $10^{-61}$.
|
||||||
|
Note that this is only one sample and it could produce sighlty different results
|
||||||
|
for other seeds.
|
||||||
|
|
||||||
|
|
||||||
|
## Hyperparameter $n vs k = n^alpha$
|
||||||
|
|
||||||
|
```{r n / log(n) hyperparameters data generation}
|
||||||
|
#| cache: true
|
||||||
|
#| echo: false
|
||||||
|
#| collapse: true
|
||||||
|
ns <- seq(100, 1000, 100)
|
||||||
|
as <- seq(0, 20, 2)
|
||||||
|
alphas <- seq(0.1, 0.5, 0.1)
|
||||||
|
|
||||||
|
set.seed(100)
|
||||||
|
results <- data.frame(dim_n = integer(),
|
||||||
|
dim_k = integer(),
|
||||||
|
param_a = double(),
|
||||||
|
param_alpha = double(),
|
||||||
|
ssv = double())
|
||||||
|
for (a in as) {
|
||||||
|
for (i in 1:length(ns)) {
|
||||||
|
for (j in 1:length(alphas)) {
|
||||||
|
n <- ns[i]
|
||||||
|
K <- floor(n^alphas[j])
|
||||||
|
if (!K > 0) next # skip if K is equal to zero
|
||||||
|
# use the default seed 1L
|
||||||
|
Q <- compute_matrix(seed=1L,
|
||||||
|
a= a,
|
||||||
|
n = n,
|
||||||
|
K = K,
|
||||||
|
sample_X_fn = function(n) {matrix(rnorm(n), ncol = 1L)},
|
||||||
|
fv = function(x) {dnorm(x, mean=0, sd=1)},
|
||||||
|
Fv = function(x) {pnorm(x, mean=0, sd=1)},
|
||||||
|
guard = 1e-12)
|
||||||
|
|
||||||
|
ssv <- compute_minmax_sv(Q)[["smallest_singular_value"]]
|
||||||
|
|
||||||
|
current_res <- data.frame(dim_n = n, dim_k = K, param_a = a, param_alpha=alphas[j], ssv =ssv)
|
||||||
|
results <- rbind(results, current_res)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
```{r hyperparameter n / k^alpha = const plotting}
|
||||||
|
results |>
|
||||||
|
filter(dim_n %in% c(100, 500, 1000)) |>
|
||||||
|
mutate(dim_n = as.factor(dim_n),
|
||||||
|
param_alpha = as.factor(param_alpha)) |>
|
||||||
|
group_by(dim_n, param_alpha) |>
|
||||||
|
ggplot(aes(param_a, ssv, col=dim_n, shape=param_alpha)) +
|
||||||
|
geom_point(size=1.5) +
|
||||||
|
geom_line() +
|
||||||
|
#scale_y_log10() +
|
||||||
|
theme_bw() +
|
||||||
|
labs(x=latex2exp::TeX("$a$"),
|
||||||
|
y=latex2exp::TeX("Smallest singular value of $Q$"),
|
||||||
|
title=latex2exp::TeX("Smallest singular value of $Q$ with respect to $a$."),
|
||||||
|
subtitle = latex2exp::TeX(("Hyperparameter $k = n^{\\alpha}$")),
|
||||||
|
colour=latex2exp::TeX("$n$"),
|
||||||
|
shape=latex2exp::TeX("$\\alpha$"))
|
||||||
|
```
|
||||||
|
Here we use $K = \lfloor n^\alpha\rfloor$ as hyperparameter. Why don't we see
|
||||||
|
any change w.r.t. $a$ for $\alpha = 0.1$?
|
||||||
|
|
||||||
|
|
||||||
|
## Two dimensional example
|
||||||
|
Here consider $p = 2$ covariate variables for each node. In order to make the
|
||||||
|
parameter $a$ invariant to the direction, we sample 5 different vectors for
|
||||||
|
each $a$ and rescale them. We use $K = 5$ as hyperparameter.
|
||||||
|
|
||||||
|
```{r data generation for two d example}
|
||||||
|
#| cache: true
|
||||||
|
#| echo: false
|
||||||
|
#| collapse: true
|
||||||
|
|
||||||
|
set.seed(10)
|
||||||
|
ns <- seq(100, 1000, 100)
|
||||||
|
as <- matrix(rnorm(8), ncol=4)
|
||||||
|
as_norm <- seq(1, 20, 2)
|
||||||
|
for (i in 1:ncol(as)){
|
||||||
|
as[, i] <- as[, i] / sqrt(sum(as[, i]^2))
|
||||||
|
}
|
||||||
|
|
||||||
|
results <- data.frame(dim_n = integer(),
|
||||||
|
dim_k = integer(),
|
||||||
|
param_a = integer(),
|
||||||
|
param_a_norm = double(),
|
||||||
|
ssv = double())
|
||||||
|
for (a_norm in as_norm) {
|
||||||
|
for (i in 1:length(ns)) {
|
||||||
|
for (j in 1:ncol(as)) {
|
||||||
|
n <- ns[i]
|
||||||
|
K <- 5 # floor(sqrt(n))
|
||||||
|
if (!K > 0) next # skip if K is equal to zero
|
||||||
|
# use the default seed 1L
|
||||||
|
Q <- compute_matrix(seed=1L,
|
||||||
|
a= as.vector(t(as[, j])),
|
||||||
|
n = n,
|
||||||
|
K = K,
|
||||||
|
sample_X_fn = function(n) {matrix(rnorm(2 * n), ncol = 2L)},
|
||||||
|
fv = function(x) {dnorm(x, mean=0, sd=1)},
|
||||||
|
Fv = function(x) {pnorm(x, mean=0, sd=1)},
|
||||||
|
guard = 1e-12)
|
||||||
|
|
||||||
|
ssv <- compute_minmax_sv(Q)[["smallest_singular_value"]]
|
||||||
|
|
||||||
|
current_res <- data.frame(dim_n = n, dim_k = K, param_a = j, param_a_norm=a_norm, ssv =ssv)
|
||||||
|
results <- rbind(results, current_res)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
```{r 2d a parameter plotting}
|
||||||
|
results |>
|
||||||
|
filter(dim_n %in% c(100, 500, 1000)) |>
|
||||||
|
mutate(dim_n = as.factor(dim_n),
|
||||||
|
param_a = as.factor(param_a)) |>
|
||||||
|
group_by(dim_n, param_a) |>
|
||||||
|
ggplot(aes(param_a_norm, ssv, col=dim_n, shape=param_a, interaction(dim_n, param_a))) +
|
||||||
|
geom_point(size=1.5) +
|
||||||
|
geom_line() +
|
||||||
|
#scale_y_log10() +
|
||||||
|
theme_bw() +
|
||||||
|
labs(x=latex2exp::TeX("$\\|a\\|$"),
|
||||||
|
y=latex2exp::TeX("Smallest singular value of $Q$"),
|
||||||
|
title=latex2exp::TeX("Smallest singular value of $Q$ with respect to the norm of $a$."),
|
||||||
|
subtitle = latex2exp::TeX(("Hyperparameter $k = 5$")),
|
||||||
|
colour=latex2exp::TeX("$n$"),
|
||||||
|
shape=latex2exp::TeX("$\\|\\alpha\\|$"))
|
||||||
|
```
|
||||||
|
```{r plot the vectors}
|
||||||
|
plot(0, 0, xlim=c(-1.5, 1.5), ylim=c(-1.5, 1.5), type="n", xlab="x", ylab="y", asp=1)
|
||||||
|
arrows(0, 0, as[1, ], as[2, ], col=1:4, lwd=2)
|
||||||
|
title(main="Vectors for a rescaled to norm one.")
|
||||||
|
```
|
||||||
|
|
||||||
|
It seems, that the direction of the parameter $a$ has a small influence in the
|
||||||
|
smallest singular value of the matrix $Q$, but not the norm of it??
|
||||||
|
|
||||||
|
|
||||||
|
## Michael's Plot
|
||||||
|
Here we plot for each $n$ the smallest singular value of $Q$. We choose the
|
||||||
|
parameter $K$ as
|
||||||
|
$$
|
||||||
|
K = \lfloor n^\alpha \rfloor
|
||||||
|
$$
|
||||||
|
with $\alpha \in \{0.1, 0.2, 0.3, 0.4, 0.5\}$.
|
||||||
|
|
||||||
|
|
||||||
|
```{r Plot for different alphas}
|
||||||
|
#| cache: true
|
||||||
|
#| echo: false
|
||||||
|
#| collapse: true
|
||||||
|
ns <- seq(250, 10000, 250)
|
||||||
|
as <- c(1.0, 2, 5, 10, 20)
|
||||||
|
alphas <- seq(0.1, 0.5, 0.1)
|
||||||
|
|
||||||
|
set.seed(100)
|
||||||
|
results <- data.frame(dim_n = integer(),
|
||||||
|
dim_k = integer(),
|
||||||
|
param_a = double(),
|
||||||
|
param_alpha = double(),
|
||||||
|
ssv = double())
|
||||||
|
for (a in as) {
|
||||||
|
for (i in 1:length(ns)) {
|
||||||
|
for (j in 1:length(alphas)) {
|
||||||
|
n <- ns[i]
|
||||||
|
K <- floor(n^alphas[j])
|
||||||
|
if (!K > 1) next # skip if K is equal to zero lr one
|
||||||
|
# use the default seed 1L
|
||||||
|
Q <- compute_matrix(seed=1L,
|
||||||
|
a= a,
|
||||||
|
n = n,
|
||||||
|
K = K,
|
||||||
|
sample_X_fn = function(n) {matrix(rnorm(n), ncol = 1L)},
|
||||||
|
fv = function(x) {dnorm(x, mean=0, sd=1)},
|
||||||
|
Fv = function(x) {pnorm(x, mean=0, sd=1)},
|
||||||
|
guard = 1e-12)
|
||||||
|
|
||||||
|
ssv <- compute_minmax_sv(Q)[["smallest_singular_value"]]
|
||||||
|
|
||||||
|
current_res <- data.frame(dim_n = n, dim_k = K, param_a = a, param_alpha=alphas[j], ssv =ssv)
|
||||||
|
results <- rbind(results, current_res)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
```{r hyperparameter n / k^alpha = const plotting}
|
||||||
|
results |>
|
||||||
|
filter(param_alpha > 0.21 & param_alpha <= 0.32) |>
|
||||||
|
mutate(param_alpha = as.factor(param_alpha),
|
||||||
|
param_a = as.factor(param_a)) |>
|
||||||
|
group_by(param_a, param_alpha) |>
|
||||||
|
# filter(dim_k == max(dim_k)) |>
|
||||||
|
ggplot(aes(dim_n, ssv, col=param_a, shape=param_alpha)) +
|
||||||
|
geom_point(size=3.5) +
|
||||||
|
geom_line(size=1.5) +
|
||||||
|
geom_function(fun = function(x) {( sqrt(x)) / (floor(x^0.3))}, col="black" )+
|
||||||
|
# scale_y_log10() +
|
||||||
|
theme_bw() +
|
||||||
|
labs(x=latex2exp::TeX("$n$"),
|
||||||
|
y=latex2exp::TeX("Smallest singular value of $Q$"),
|
||||||
|
title=latex2exp::TeX("Smallest singular value of $Q$ with respect to $\\alpha$."),
|
||||||
|
subtitle = latex2exp::TeX(("Hyperparameter $k = n^{\\alpha}$")),
|
||||||
|
colour=latex2exp::TeX("$n$"),
|
||||||
|
shape=latex2exp::TeX("$\\alpha$"))
|
||||||
|
```
|
||||||
|
|
||||||
|
|
||||||
145
scripts/plots_dimensions.qmd
Normal file
145
scripts/plots_dimensions.qmd
Normal file
@@ -0,0 +1,145 @@
|
|||||||
|
---
|
||||||
|
title: "Plots of n vs. k"
|
||||||
|
author: "Niclas"
|
||||||
|
format: html
|
||||||
|
editor: visual
|
||||||
|
execute:
|
||||||
|
echo: true
|
||||||
|
working-directory: ../
|
||||||
|
---
|
||||||
|
|
||||||
|
# Plots of the dimensions
|
||||||
|
|
||||||
|
## Setup
|
||||||
|
We consider the matrix $QQ^\top$ and look at the smallest eigenvalue, i.e. the
|
||||||
|
smallest non-zero singular value of $Q$.
|
||||||
|
|
||||||
|
The matrix $Q$ is given by
|
||||||
|
$$
|
||||||
|
Q_{ik} = \int_{\frac{k}{K}}^{\frac{k+1}{K}} p_a(u| X_i) \, du
|
||||||
|
$$
|
||||||
|
with
|
||||||
|
$$
|
||||||
|
p_a(u|X) = \frac{f_v(F_a^{-1}(u) - a^\top X)}{f_a(F_a^{-1}(u))}
|
||||||
|
$$
|
||||||
|
|
||||||
|
## Plots of n vs. k
|
||||||
|
|
||||||
|
- The $v$'s are normally distributed with $v \sim \mathcal N(0,1)$
|
||||||
|
- Plot $n = 100, 200, 300, 400$ and $k = 1, \dots, K$ with $K = \sqrt n$.
|
||||||
|
|
||||||
|
```{r Load Libraries}
|
||||||
|
# load local files
|
||||||
|
source(here::here("R", "singular_values.R"))
|
||||||
|
source(here::here("R", "graphon_distribution.R"))
|
||||||
|
source(here::here("R","singular_value_plot.R"))
|
||||||
|
|
||||||
|
# load libaries for data handling
|
||||||
|
library(ggplot2)
|
||||||
|
library(dplyr)
|
||||||
|
library(latex2exp)
|
||||||
|
```
|
||||||
|
|
||||||
|
```{r Compute the data}
|
||||||
|
#| cache: true
|
||||||
|
#| echo: false
|
||||||
|
#| collapse: true
|
||||||
|
ns <- c(100, 200, 300, 400, 500, 600, 700, 800)
|
||||||
|
Ks <- floor(sqrt(ns))
|
||||||
|
as <- c(0.5, 1.0, 1.5, 2.0)
|
||||||
|
|
||||||
|
# set a global seed
|
||||||
|
set.seed(42)
|
||||||
|
results <- data.frame(dim_n = integer(),
|
||||||
|
dim_k = integer(),
|
||||||
|
param_a = double(),
|
||||||
|
ssv = double())
|
||||||
|
for (a in as) {
|
||||||
|
for (i in 1:length(ns)) {
|
||||||
|
n <- ns[i]
|
||||||
|
K <- Ks[i]
|
||||||
|
# use the default seed 1L
|
||||||
|
out <- smallest_sv_sequence(
|
||||||
|
a = a,
|
||||||
|
n = n,
|
||||||
|
maxK = K,
|
||||||
|
sampler_fn =function(n) matrix(rnorm(n), ncol = 1L),
|
||||||
|
guard=1e-12,
|
||||||
|
plot=FALSE,
|
||||||
|
fv = function(x) {dnorm(x, mean=0, sd=1)},
|
||||||
|
Fv = function(x) {pnorm(x, mean=0, sd=1)}
|
||||||
|
)
|
||||||
|
|
||||||
|
current_res <- data.frame(dim_n = rep(n, K), dim_k = out$K, param_a = rep(a, K), ssv = out$sv)
|
||||||
|
results <- rbind(results, current_res)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
```
|
||||||
|
|
||||||
|
```{r plot the results}
|
||||||
|
#| cache: true
|
||||||
|
#| echo: false
|
||||||
|
#| collapse: true
|
||||||
|
#| fig-cap: "Simulation of the smallest singular values w.r.t. a, n and k"
|
||||||
|
results |>
|
||||||
|
filter(dim_n <= 400) |>
|
||||||
|
mutate(param_a = as.factor(param_a),
|
||||||
|
dim_n = as.factor(dim_n)) |>
|
||||||
|
group_by(param_a, dim_n) |>
|
||||||
|
ggplot(aes(dim_k, ssv, col=dim_n, shape=param_a, interaction(dim_n, param_a))) +
|
||||||
|
geom_point(size=1.5) +
|
||||||
|
geom_line() +
|
||||||
|
scale_y_log10() +
|
||||||
|
theme_bw() +
|
||||||
|
labs(x=latex2exp::TeX("$k$"),
|
||||||
|
y=latex2exp::TeX("Smallest singular value of $Q$"),
|
||||||
|
title=latex2exp::TeX("Smallest singular value of $Q$ with respect to $n$, $k$, and $a$."),
|
||||||
|
colour=latex2exp::TeX("$n$"),
|
||||||
|
shape=latex2exp::TeX("$a$"))
|
||||||
|
```
|
||||||
|
The data for $n = 100$ is covered by the data for $n = 200$.
|
||||||
|
|
||||||
|
## Analysis of the convergence
|
||||||
|
|
||||||
|
We assume that the smallest singular value $\sigma$ can be approximated by:
|
||||||
|
$$
|
||||||
|
\sigma = C \cdot n^\eta \cdot k^\kappa \cdot a^\alpha
|
||||||
|
$$
|
||||||
|
to estimate the coefficients we make a log-transform and perform a linear regression, i.e.
|
||||||
|
$$
|
||||||
|
\log(\sigma) = \log (C) + \eta \log(n) + \kappa\log(k) + \alpha \log(a).
|
||||||
|
$$
|
||||||
|
|
||||||
|
```{r estimate coeffs}
|
||||||
|
model1 <- results |>
|
||||||
|
filter(ssv > 1e-15) |> # exclude to small values
|
||||||
|
lm(formula = log(ssv) ~ log(dim_n) + log(dim_k) + log(param_a))
|
||||||
|
|
||||||
|
summary(model1)
|
||||||
|
plot(model1)
|
||||||
|
|
||||||
|
```
|
||||||
|
## Plot of n vs. ssv
|
||||||
|
|
||||||
|
```{r plot n vs ssv}
|
||||||
|
results |>
|
||||||
|
filter(dim_k %in% c(2, 6, 10)) |>
|
||||||
|
mutate(param_a = as.factor(param_a),
|
||||||
|
dim_k = as.factor(dim_k)) |>
|
||||||
|
group_by(param_a, dim_k) |>
|
||||||
|
ggplot(aes(dim_n, ssv, col=dim_k, shape=param_a, interaction(dim_k, param_a))) +
|
||||||
|
geom_point(size=1.5) +
|
||||||
|
geom_line() +
|
||||||
|
scale_y_log10() +
|
||||||
|
theme_bw() +
|
||||||
|
labs(x=latex2exp::TeX("$n$"),
|
||||||
|
y=latex2exp::TeX("Smallest singular value of $Q$"),
|
||||||
|
title=latex2exp::TeX("Smallest singular value of $Q$ with respect to $n$, $k$, and $a$."),
|
||||||
|
colour=latex2exp::TeX("$k$"),
|
||||||
|
shape=latex2exp::TeX("$a$"))
|
||||||
|
```
|
||||||
|
```{r}
|
||||||
|
Q <- compute_matrix(1, a=0.5, n=10, K = 3, function(n) matrix(rnorm(n), ncol = 1L), fv=dnorm, Fv=pnorm)
|
||||||
|
```
|
||||||
|
|
||||||
BIN
scripts/results.RData
Normal file
BIN
scripts/results.RData
Normal file
Binary file not shown.
Reference in New Issue
Block a user