# load local files
source(here::here("R", "singular_values.R"))
source(here::here("R", "graphon_distribution.R"))
source(here::here("R","singular_value_plot.R"))
source(here::here("R", "build_network.R"))

# Helper functions -------------------------------------------------------------
# helper function for wrapping the parameters of the Q_a creation function
# TODO rename this function
make_matrix_creation <- function(seed, n, K, matrix_X, fv, Fv, guard) {
  function(a) {
    compute_matrix(seed=seed, a, n=n, K=K, matrix_X = matrix_X, fv=fv, Fv=Fv, guard=guard)
  }
}

# estimators -------------------------------------------------------------------

## Moore-Penrose Inverse -------------------------------------------------------
#' Moore‑Penrose pseudoinverse of a matrix
#'
#' Computes the Moore‑Penrose generalized inverse of a numeric matrix using a
#' singular‑value decomposition (SVD). Singular values smaller than the
#' tolerance are treated as zero to improve numerical stability.
#'
#' @param A A numeric matrix (or an object coercible to a matrix) whose
#'   pseudoinverse is required.
#' @param tol Tolerance for treating singular values as zero.  By default it
#'   is set to `max(dim(A)) * max(svd(A)$d) * .Machine$double.eps`, which
#'   scales with the size of the matrix and machine precision.
#' @return A matrix representing the Moore‑Penrose inverse of `A`.  The
#'   dimensions of the result are `ncol(A) × nrow(A)`.
#' @examples
#' set.seed(123)
#' A <- matrix(rnorm(12), nrow = 3)
#' A_pinv <- pinv(A)
#' # Verify the Moore‑Penrose properties (A %*% A_pinv %*% A ≈ A)
#' all.equal(A %*% A_pinv %*% A, A)
#' @importFrom stats svd
#' @export
pinv <- function(A, tol = NULL) {
  # Coerce to matrix and check type
  if (!is.matrix(A)) {
    A <- as.matrix(A)
  }
  if (!is.numeric(A)) {
    stop("`A` must be a numeric matrix.", call. = FALSE)
  }
  
  # Singular value decomposition
  s <- svd(A)
  
  # Determine tolerance if not supplied
  if (is.null(tol)) {
    tol <- max(dim(A)) * max(s$d) * .Machine$double.eps
  }
  
  # Invert non‑zero singular values
  d_inv <- ifelse(s$d > tol, 1 / s$d, 0)
  
  # Construct diagonal matrix of inverted singular values
  D_plus <- diag(d_inv, nrow = length(d_inv))
  
  # Moore‑Penrose inverse: V %*% D⁺ %*% t(U)
  s$v %*% D_plus %*% t(s$u)
}


## Estimate Matrix B -----------------------------------------------------------
#' Estimate the matrix \$B\$ for a graphon model
#'
#' For a given graphon scaling parameter `rho_n`, a square matrix `Q_a`, and an
#' adjacency matrix `A`, this function computes  

#' $ B = \\rho_n \, Q_a^{+\\,T} \, A \, Q_a^{+} $

#' where `Q_a^{+}` denotes the Moore‑Penrose pseudoinverse of `Q_a`. 
#'
#' @param rho_n Numeric scalar.  The graphon scaling parameter.
#' @param Q_a   Square numeric matrix. TODO: write description of the Matrix
#' @param A     Square numeric adjacency matrix (same dimension as `Q_a`).
#' @return A numeric matrix of the same dimension as `Q_a` representing the
#'   estimated \$B\$.
#' @examples
#' set.seed(42)
#' n <- 5
#' Q_a <- diag(n)               # simple identity basis
#' A   <- matrix(rbinom(n^2, 1, 0.3), n, n)
#' rho_n <- 0.5
#' B_est <- estimate_B_matrix(rho_n, Q_a, A)
#' str(B_est)
#' @importFrom stats svd
#' @export
estimate_B_matrix <- function(rho_n, Q_a, A) {
  # ---- Input checks ---------------------------------------------------------
  if (!is.numeric(rho_n) || length(rho_n) != 1) {
    stop("`rho_n` must be a single numeric value.", call. = FALSE)
  }
  if (!is.matrix(Q_a) || !is.numeric(Q_a)) {
    stop("`Q_a` must be a numeric matrix.", call. = FALSE)
  }
  if (!is.matrix(A) || !is.numeric(A)) {
    stop("`A` must be a numeric matrix.", call. = FALSE)
  }
  if (nrow(A) != ncol(A)) {
    stop("`A` must be square.", call. = FALSE)
  }
  if (ncol(Q_a) != nrow(A)) {
    stop("Dimensions of `Q_a` and `A` must agree for matrix multiplication.", call. = FALSE)
  }
  
  # ---- Compute pseudoinverse ------------------------------------------------
  pinv_Qa <- pinv(Q_a)   # assumes `pinv()` is available in the namespace
  
  # ---- Estimate B -----------------------------------------------------------
  B <- rho_n * (t(pinv_Qa) %*% A %*% pinv_Qa)
  
  B
}

# TODO rename this function
# TODO test the convergence of the function estimate_B
# with given graphon (block function, Hölder continuous functions)
# and given K with growing N
# and other options
# plot the loss function with respect to a
estimate_a <- function(A, # adjacency matrix
                       a0, # start value
                       n,
                       K,
                       sample_X_fn,
                       fv,
                       Fv, 
                       guard
) {
  calc_Q_a <- make_matrix_creation(seed, n, K, sample_X_fn, fv, Fv, guard)
  
  loss_func <- function(a) {
    Q_a <- calc_Q_a(a)
    pinv_Qa <- pinv(Q_a)
    
    norm(pinv_Qa %*% Q_a %*% A %*% pinv_Qa %*% Q_a - A, type="F")^2
  }
  
  return(loss_func)
}

#' Calculate the edge density of an undirected graph
#'
#' @title Edge density from an adjacency matrix
#' @description Computes the proportion of possible edges that are present in an
#'   undirected, unweighted graph represented by a square adjacency matrix.  The
#'   density is defined as \eqn{2E / (V(V-1))} where \eqn{E} is the number of edges
#'   and \eqn{V} is the number of vertices.  This corresponds to the parameter
#'   \eqn{rho_n}.
#'   The function checks that the input is a square matrix and treats any
#'   non‑numeric or missing entries as absent edges.
#'
#' @param adj_matrix A square numeric matrix representing the adjacency matrix
#'   of an undirected graph.  Entries should be 0/1 (or any truthy numeric) with
#'   `adj_matrix[i, j] == adj_matrix[j, i]`.  Missing values are ignored.
#'
#' @return A single numeric value giving the edge density \eqn{rho}.
#'
#' @examples
#' # Simple triangle graph (3 nodes, 3 edges)
#' A_tri <- matrix(c(0,1,1,
#'                   1,0,1,
#'                   1,1,0), nrow = 3, byrow = TRUE)
#' calculate_edge_density(A_tri)
#'
#' # Empty graph (no edges)
#' A_empty <- matrix(0, nrow = 4, ncol = 4)
#' calculate_edge_density(A_empty)
#'
#' @export
calculate_edge_density <- function(adj_matrix) {
  # -------------------------------------------------------------------------
  # Validate input
  # -------------------------------------------------------------------------
  if (!is.matrix(adj_matrix)) {
    stop("`adj_matrix` must be a matrix.")
  }
  if (nrow(adj_matrix) != ncol(adj_matrix)) {
    stop("`adj_matrix` must be square (same number of rows and columns).")
  }
  
  # -------------------------------------------------------------------------
  # Count edges and nodes
  # -------------------------------------------------------------------------
  edge_idx   <- which(upper.tri(adj_matrix, diag = FALSE), arr.ind = TRUE)
  edge_count <- sum(adj_matrix[edge_idx], na.rm = TRUE)
  node_count <- nrow(adj_matrix)
  
  # -------------------------------------------------------------------------
  # Compute density
  # -------------------------------------------------------------------------
  rho <- (2 * edge_count) / (node_count * (node_count-1))
  
  return(rho)
}
# test the estimator routines
seed <- 17L #  121L this seed works exceptionally well
set.seed(seed)
#X  <- matrix(seq(-1, 1, length.out = 5), ncol = 1)
a  <- 20
n <- 400
K <- 4
rho_n <- log(n) / n
sample_X_fn <- function(n) {matrix(rnorm(n), ncol = 1L)}
X <- sample_X_fn(n)
fv <- function(x) {dnorm(x, mean=0, sd=1)}
Fv <- function(x) {pnorm(x, mean=0, sd=1)}
guard <- 1e-12
v  <- rnorm(n) #seq(0, 0.8, length.out = n)
phi_fun <-Vectorize(function(x, y) ifelse(((x > 0.5 && y <= 0.5) || (x <= 0.5 && y > 0.5)), 1.6, 0.4))        # multiplicative kernel
adj <- compute_adj_matrix(
   X_matrix = X,
   v        = v,
   a        = a,
   phi      = phi_fun,
   rho_n    = rho_n,
   Fv       = Fv
 )
adj

# Q_a matrix
Qa <- compute_matrix(seed, a=a, n=n, K=K, fv=fv, Fv=Fv, guard=guard, matrix_X=X)

calc_Q_a <- make_matrix_creation(seed, n=n, K=K, matrix_X = X, fv=fv, Fv=Fv, guard=guard)

loss_func <- function(a) {
  Q_a <- calc_Q_a(a)
  pinv_Qa <- pinv(Q_a)
  
  norm(pinv_Qa %*% Q_a %*% adj %*% pinv_Qa %*% Q_a - adj, type="F")^2
}


plot_as <- seq(-10, 100, length.out=500)
loss_vals <- sapply(plot_as, loss_func)
plot(plot_as , loss_vals, type="b")
abline(v=a)
title("Test plot of the loss function")

optim(25, loss_func, lower=10, upper=100, method="Brent", control=list(fnscale=1))