6 Spatial Transcriptomics and Spatial Metabolomics Alignment

6.1 Registration

To calculate the correlation between gene expression and metabolite abundance, we aligned the coordinates of the spatial transcriptomics spots with those of the spatial metabolomics pixels. The detailed alignment algorithm and computational methods are described in the Methods section. Briefly, we first centralized the coordinates of the spatial transcriptomics spots to establish a common reference frame. Subsequently, we applied scaling, rotation, and translation transformations to minimize the spatial discrepancy, ensuring that the transcriptomics spots overlapped as closely as possible with the metabolomics pixels. Given that spatial transcriptomics and spatial metabolomics represent distinct omics layers, we relied solely on the overall tissue morphology and outline for alignment, rather than molecular features, to avoid introducing biases from inter-omics variability.

This shape-based approach enhances robustness across heterogeneous datasets. Although the alignment process was primarily automated using optimization techniques such as iterative closest point (ICP) matching, manual validation by domain experts was essential to confirm accuracy, address edge cases like tissue deformations, and refine transformations where automated results showed minor misalignments. This hybrid strategy ensured high-fidelity spatial registration, enabling reliable downstream correlation analyses between gene expression profiles and metabolite levels.

library(tidyverse)
library(FNN)
library(parallel)

# Set the parameter range and resolution based on experience.
moveplain <- expand_grid(
  a = seq(0.9,1.1,0.01),
  b = seq(0.9,1.1,0.01),
  x0 = seq(-10,10,3),
  y0 = seq(-10,10,3),
  eta = seq(0, 3.14, 0.01)
)

# tissue position from spaceranger
tissue_rna <- read_csv(input_tissue_pth) |> dplyr::filter(in_tissue == 1)
X <- tissue_rna[,c("col", "row")]

# tissue position from metabolism matrix
metabolism_matrix_ft <- colSums(metabolism_matrix) > 0
metabolism_df <- metabolism_df[metabolism_matrix_ft,]
metabolism_matrix <- metabolism_matrix[,metabolism_matrix_ft]

Y <- metabolism_df[,c("x", "y")]
# Scale the data size based on experience.
Y$x <- Y$x * 80
Y$y <- Y$y * 80
trainX <- function(X, plain){
  eta <- plain$eta
  a <- plain$a
  b <- plain$b
  x0 <- plain$x0
  y0 <- plain$y0
  
  # Centralization
  X$col <- X$col - mean(X$col)
  X$row <- X$row - mean(X$row)
  
  # Stretch and pressure
  X$col <- X$col * a
  X$row <- X$row * b
  
  # Rotate
  X$col <- X$col * cos(eta) - X$row * sin(eta)
  X$row <- X$col * sin(eta) + X$row * cos(eta)
  
  # Translation
  X$col <- X$col - x0
  X$row <- X$row - y0
  
  X
}

distance_total <- parallel::mclapply(1:nrow(moveplain), function(i){
  plain <- moveplain[i,]
  Y_hat <- trainX(X, plain)
  # k is set based on experience.
  nnlistab <- FNN::get.knn(Y_hat, Y, k = 5)
  nnlistba <- FNN::get.knn(Y, Y_hat, k = 5)
  distance_total <- rowSums(nnlistab$nn.dist) + rowSums(nnlistba$nn.dist)
  distance_total
}, mc.cores = 40L) |> unlist()
moveplain$distance_total <- distance_total

# With the most accurate method.
best_plain <- moveplain |> slice_min(order_by = distance_total, n = 1)
Y_hat_best <- trainX(X, best_plain) 

# k is set based on experience.
Y_hat_bestnn <- FNN::get.knn(Y_hat_best, Y, k = 2)

# Generate the metabolome matrix.
combined_metabolism <- Y_hat_bestnn$nn.index |> 
  as.data.frame()|> 
  mutate(
    barcode = tissue_rna$barcode
  ) |> 
  pivot_longer(- barcode, names_to = "nst_n", values_to = "metabolism_id") |> 
  left_join(
    t(metabolism_matrix) |> as.data.frame() |> mutate(metabolism_id = 1:n()) |> 
      pivot_longer(- metabolism_id, names_to = "ion_peak", values_to = "intensity")
  ) |> 
  group_by(barcode, ion_peak) |> 
  summarise(intensity = mean(intensity)) |> 
  pivot_wider(id_cols = ion_peak, names_from = barcode, values_from = intensity) |> 
  column_to_rownames(var = ion_peak) |> 
  as.matrix()