Files
GraphonSimulation/scripts/plot_sv.R
2026-02-11 19:00:56 +01:00

365 lines
10 KiB
R
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
# Load function ----------------------------------------------------------------
source(here::here("R", "singular_values.R"))
source(here::here("R", "graphon_distribution.R"))
source(here::here("R", "singular_value_plot.R"))
# https://stackoverflow.com/a/5790430
resetPar <- function() {
dev.new()
op <- par(no.readonly = TRUE)
dev.off()
op
}
calc_conv_rate <- function(x,y) {
if (!is.numeric(x) || length(x) == 0) {
stop("`x` must be a nonempty numeric vector.")
}
if (!is.numeric(y) || length(y) == 0) {
stop("`y` must be a nonempty numeric vector.")
}
if (length(x) != length(y)) {
stop("`x` and `y` must have the same length.")
}
df <- data.frame("x" = x, "y" = y)
lm_model <- lm(log(y) ~ log(x), data=df)
C <- exp(coefficients(lm_model)[[1]])
alpha <- coefficients(lm_model)[[2]]
df[, "y_pred"] <- C * df[, "x"]^alpha
df[, "residual"] <- df[, "y"] - df[, "y_pred"]
out <- list(
"C" = C,
"alpha" = alpha,
"obs" = df
)
out
}
calc_exp_conv_rate <- function(x,y) {
if (!is.numeric(x) || length(x) == 0) {
stop("`x` must be a nonempty numeric vector.")
}
if (!is.numeric(y) || length(y) == 0) {
stop("`y` must be a nonempty numeric vector.")
}
if (length(x) != length(y)) {
stop("`x` and `y` must have the same length.")
}
df <- data.frame("x" = x, "y" = y)
fit <- nls(y ~ C * exp(r * x^m),
start = list(C = min(y), r= -0.5, m = 1))
}
# Nearly match with sample function --------------------------------------------
# v ~ N(0,1) and X ~ discrete Uniform on [1:n]
out <- smallest_sv_sequence(
a = c(0.5),
n = 9,
maxK= 3,
sampler_fn = sample,
guard=1e-12,
plot=TRUE,
log_plot = TRUE,
curve_expr = quote(1 / x^0.545)
)
conv_rate <- calc_conv_rate(out$K, out$sv)
# Normally distributed X ~ N(0,1) and v ~ N(0,1) -------------------------------
out <- smallest_sv_sequence(
a = c(0.5),
n = 1200,
maxK = 20,
sampler_fn =function(n) matrix(rnorm(n), ncol = 1L),
guard=1e-12,
plot=TRUE,
log_plot = TRUE,
curve_expr = quote(1.5 * exp(-0.95 * x^1.34))
#curve_expr = quote( 1/exp(x^1.32))
)
# convergence rate does not work here, probably because the underlying model
# does not work well
conv_rate <- calc_conv_rate(out$K[1:20], out$sv[1:20])
# Uniform distributed X ~ U[0,1] and v ~ N(0,1) --------------------------------
out <- smallest_sv_sequence(
a = c(0.5),
n = 400,
maxK = 20,
sampler_fn =function(n) matrix(runif(n), ncol = 1L),
guard=1e-12,
plot=TRUE,
log_plot = TRUE,
curve_expr = quote(1* exp(-1.1 * x^1.5))
)
# here the optimal fit does not work too, probably other model
calc_conv_rate(out$K[1:9], out$sv[1:9])
# Compare of parameters of Normal distribution ----------------------------------
#
out_sd0_5 <- smallest_sv_sequence(
a = c(0.5),
n = 400,
maxK = 20,
sampler_fn =function(n) matrix(rnorm(n), ncol = 1L),
guard=1e-12,
plot=TRUE,
log_plot = TRUE,
fv = function(x) {dnorm(x, mean=0, sd=0.5)},
Fv = function(x) {pnorm(x, mean=0, sd=0.5)},
main_title="Smallest SV of v~ N(0,0.5^2) distribution"
)
out_sd1 <- smallest_sv_sequence(
a = c(0.5),
n = 400,
maxK = 20,
sampler_fn =function(n) matrix(rnorm(n), ncol = 1L),
guard=1e-12,
plot=TRUE,
log_plot = TRUE,
fv = function(x) {dnorm(x, mean=0, sd=1)},
Fv = function(x) {pnorm(x, mean=0, sd=1)},
main_title="Smallest SV of v~ N(0,1) distribution"
)
out_sd2 <- smallest_sv_sequence(
a = c(0.5),
n = 400,
maxK = 20,
sampler_fn =function(n) matrix(rnorm(n), ncol = 1L),
guard=1e-12,
plot=TRUE,
log_plot = TRUE,
fv = function(x) {dnorm(x, mean=0, sd=2)},
Fv = function(x) {pnorm(x, mean=0, sd=2)},
main_title="Smallest SV of v~ N(0,2^2) distribution"
)
out_sd4 <- smallest_sv_sequence(
a = c(0.5),
n = 400,
maxK = 20,
sampler_fn =function(n) matrix(rnorm(n), ncol = 1L),
guard=1e-12,
plot=TRUE,
log_plot = TRUE,
fv = function(x) {dnorm(x, mean=0, sd=4)},
Fv = function(x) {pnorm(x, mean=0, sd=4)},
main_title="Smallest SV of v~ N(0,4^2) distribution"
)
par(mar = c(5, 4, 4, 8))
plot(out_sd0_5$K, out_sd0_5$sv,
type = "b",
pch = 19,
col = "#D3BA68FF",
xlab = "K subdivisions",
ylab = "Smallest singular value of Q",
main="smallest SV for different variances of a normal distribution",
sub = "n = 400, a = 0.5",
log="y")
lines(out_sd1$K, out_sd1$sv,
type="b", pch=19, col="#D5695DFF", add=TRUE)
lines(out_sd2$K, out_sd2$sv,
type = "b", pch=19, col="#5D8CA8FF", add=TRUE)
lines(out_sd4$K, out_sd4$sv,
type = "b", pch=19, col="#65A479FF", add=TRUE)
par(xpd = TRUE)
legend("topright",
inset=c(-0.2,0),
legend=c("sd=0.5", "sd=1", "sd=2", "sd=4"),
col=c("#D3BA68FF", "#D5695DFF","#5D8CA8FF", "#65A479FF" ),
title="Legend",
pch = 16,
bty = "n")
# Break phenomena of Exp-distribution ------------------------------------------
out_exp <- smallest_sv_sequence(
a = c(0.5),
n = 400,
maxK = 20,
sampler_fn =function(n) matrix(rnorm(n), ncol = 1L),
guard=1e-12,
plot=TRUE,
log_plot = TRUE,
fv = function(x) {dexp(x, rate=1)},
Fv = function(x) {pexp(x, rate=1)},
main_title="Smallest SV of v~ Exp(1) distribution"
)
par(resetPar)
plot(out_exp$K, out_exp$sv,
log="y",
xlab="K subdivsions",
ylab="Smallest singular value of Q",
col="steelblue",
type="b",
main="Smallest singular value for v ~ Exp(1)",
sub="a = 0.5, n = 400")
arrows(8, 1e-5, 6.5, 1e-7, angle=20, lty = 1, lwd=2)
text(8.5, 1e-5, "Break only seen for exp-distribution", pos=4)
## Observations of the break point depending the rate --------------------------
#
# Note: this also depends on the the parameter n of samples
out_exp_1 <- smallest_sv_sequence(
a = c(0.5),
n = 400,
maxK = 80,
sampler_fn =function(n) matrix(rnorm(n), ncol = 1L),
guard=1e-12,
plot=FALSE,
fv = function(x) {dexp(x, rate=1)},
Fv = function(x) {pexp(x, rate=1)}
)
out_exp_2 <- smallest_sv_sequence(
a = c(0.5),
n = 400,
maxK = 80,
sampler_fn =function(n) matrix(rnorm(n), ncol = 1L),
guard=1e-12,
plot=FALSE,
fv = function(x) {dexp(x, rate=2)},
Fv = function(x) {pexp(x, rate=2)}
)
out_exp_3 <- smallest_sv_sequence(
a = c(0.5),
n = 400,
maxK = 80,
sampler_fn =function(n) matrix(rnorm(n), ncol = 1L),
guard=1e-12,
plot=FALSE,
fv = function(x) {dexp(x, rate=3)},
Fv = function(x) {pexp(x, rate=3)}
)
out_exp_4 <- smallest_sv_sequence(
a = c(0.5),
n = 400,
maxK = 80,
sampler_fn =function(n) matrix(rnorm(n), ncol = 1L),
guard=1e-12,
plot=FALSE,
fv = function(x) {dexp(x, rate=4)},
Fv = function(x) {pexp(x, rate=4)}
)
par(mar = c(5, 4, 4, 8))
plot(out_exp_1$K, out_exp_1$sv,
type="b", col="#D3BA68FF", log="y",
main="Smallest SV of Q for different rates of Exp-distribution",
ylab="Smallest singular value of Q",
xlab="K subdivisions",
sub="a = 0.5, n = 400, depending also on n")
lines(out_exp_2$K, out_exp_2$sv, type="b", col="#D5695DFF")
lines(out_exp_3$K, out_exp_3$sv, type="b", col="#5D8CA8FF")
lines(out_exp_4$K, out_exp_4$sv, type="b", col="#65A479FF")
par(xpd=TRUE)
legend("topright",
inset=c(-0.2,0),
legend=c(expression(lambda == 1), expression(lambda == 2), expression(lambda == 3), expression(lambda == 4)),
col=c("#D3BA68FF", "#D5695DFF","#5D8CA8FF", "#65A479FF" ),
title="Rate",
pch = 16,
bty = "n")
# Use of the gamma distribution ------------------------------------------------
# Fix the parameters, such that the mean stays the same and the variance is
# changing.
# From the documentation
# Note that for smallish values of shape (and moderate scale) a large parts of
# the mass of the Gamma distribution is on values of x
# so near zero that they will be represented as zero in computer arithmetic.
# So rgamma may well return values which will be represented as zero.
# (This will also happen for very large values of scale since the actual generation is done for scale = 1.)
#
# Take E(X) = 5, so sigma = 5 / alpha, and with this we have
# Var(X) = sigma^2 * alpha = 25 / alpha.
# -> Increasing alpha yields lower variance
# alpha = 1
alpha <- 1.0
out_gamma_1 <- smallest_sv_sequence(
a = c(0.5),
n = 400,
maxK = 20,
sampler_fn =function(n) matrix(rnorm(n), ncol = 1L),
guard=1e-12,
plot=TRUE,
log_plot = TRUE,
fv = function(x) {dgamma(x, shape = alpha, rate = 5 / alpha)},
Fv = function(x) {pexp(x, rate=1)},
main_title="Smallest SV of v~ Gamma(1, 5) distribution"
)
# alpha = 2
alpha <- 2.0
out_gamma_2 <- smallest_sv_sequence(
a = c(0.5),
n = 400,
maxK = 20,
sampler_fn =function(n) matrix(rnorm(n), ncol = 1L),
guard=1e-12,
plot=TRUE,
log_plot = TRUE,
fv = function(x) {dgamma(x, shape = alpha, rate = 5 / alpha)},
Fv = function(x) {pexp(x, rate=1)},
main_title="Smallest SV of v~ Gamma(2, 2.5) distribution"
)
# alpha = 3
alpha <- 3.0
out_gamma_3 <- smallest_sv_sequence(
a = c(0.5),
n = 400,
maxK = 20,
sampler_fn =function(n) matrix(rnorm(n), ncol = 1L),
guard=1e-12,
plot=TRUE,
log_plot = TRUE,
fv = function(x) {dgamma(x, shape = alpha, rate = 5 / alpha)},
Fv = function(x) {pexp(x, rate=1)},
main_title="Smallest SV of v~ Gamma(3, 5/3) distribution"
)
# alpha = 4
alpha <- 4.0
out_gamma_4 <- smallest_sv_sequence(
a = c(0.5),
n = 400,
maxK = 20,
sampler_fn =function(n) matrix(rnorm(n), ncol = 1L),
guard=1e-12,
plot=TRUE,
log_plot = TRUE,
fv = function(x) {dgamma(x, shape = alpha, rate = 5 / alpha)},
Fv = function(x) {pexp(x, rate=1)},
main_title="Smallest SV of v~ Gamma(3, 5/3) distribution"
)
par(mar = c(5, 4, 4, 8))
plot(out_gamma_1$K, out_gamma_1$sv,
type="b", col="#D3BA68FF", log="y",
main="Smallest SV of Q for variance of the Gamma distribution",
ylab="Smallest singular value of Q",
xlab="K subdivisions",
sub="a = 0.5, n = 400")
lines(out_gamma_2$K, out_gamma_2$sv, type="b", col="#D5695DFF")
lines(out_gamma_3$K, out_gamma_3$sv, type="b", col="#5D8CA8FF")
lines(out_gamma_4$K, out_gamma_4$sv, type="b", col="#65A479FF")
par(xpd=TRUE)
legend("topright",
inset=c(-0.2,0),
legend=c(expression(alpha == 1), expression(alpha == 2), expression(alpha == 3), expression(alpha == 4)),
col=c("#D3BA68FF", "#D5695DFF","#5D8CA8FF", "#65A479FF" ),
title="Rate",
pch = 16,
bty = "n")