gsdensity starts with gene sets. We first need to calculate the relevance between each cell and the gene sets. Then we use weighted two dimensional kernel density estimation (weighted kde2d) to investigate if the relevant cells show some spatial patterns.
# Run the gsdensity pipeline; very similar to what is shown in 'pbmc3k_example'
# compute the cell and gene embeddings; we will still refer to each data point as a 'cell' although it is actually mixtures of cells in 10x visium.
ce <- compute.mca(object = brain)
## 2.13 sec elapsed
## 52.08 sec elapsed
## 4 sec elapsed
# compute the deviation; we want to use only the gene sets with more than 20 genes
res <- compute.kld(coembed = ce,
genes.use = rownames(brain),
n.grids = 100,
gene.set.list = gene.set.list,
gene.set.cutoff = 20,
n.times = 100)
# we will then focus on th deviated gene sets; here we set a more stringent alpha level cutoff
gene.set.deviated <- res[res$p.adj < 0.001, ]$gene.set
# length(gene.set.deviated)
# compute a nearest neighbor graph (edge list) in the MCA space
cells <- colnames(brain)
el <- compute.nn.edges(coembed = ce, nn.use = 300)
# We then compute the relevance between each cell and the deviated gene sets
cv.df <- run.rwr.list(el = el, gene_set_list = gene.set.list[gene.set.deviated], cells = cells)
cv.df[1:3, 1:3]
## GOBP_ACIDIC_AMINO_ACID_TRANSPORT
## AAACAAGTATCTCCCA-1 0.0011666124
## AAACACCAATAACTGC-1 0.0005729724
## AAACAGAGCGACTCCT-1 0.0000000000
## GOBP_ACTIN_CYTOSKELETON_REORGANIZATION
## AAACAAGTATCTCCCA-1 0.0001623796
## AAACACCAATAACTGC-1 0.0009500982
## AAACAGAGCGACTCCT-1 0.0000000000
## GOBP_ACTIN_FILAMENT_BASED_MOVEMENT
## AAACAAGTATCTCCCA-1 0.0003621734
## AAACACCAATAACTGC-1 0.0001055359
## AAACAGAGCGACTCCT-1 0.0002774095
# We can then binarize the data for each gene get
cl.df <- compute.cell.label.df(cv.df)
# An optional filtering step: we want to only keep the terms with certain numbers of positive cells; here we use 100
positive.count <- apply(cl.df, MARGIN = 2, FUN = function(x) {length(x[x == "positive"])})
gene.set.deviated.2 <- names(positive.count[positive.count > 100])
# first we need to find the spatial information of the cells
# this coords.df should be a cell by coordinate matrix/dataframe with cells in rows and coordinates in columns
# in this dataset, this information can be found as below:
coords.df <- brain@images$anterior1@coordinates[, c("imagerow", "imagecol")]
head(coords.df)
## imagerow imagecol
## AAACAAGTATCTCCCA-1 7475 8501
## AAACACCAATAACTGC-1 8553 2788
## AAACAGAGCGACTCCT-1 3164 7950
## AAACAGCTTTCAGAAG-1 6637 2099
## AAACAGGGTCTATATT-1 7116 2375
## AAACATGGTGAGAGGA-1 8913 1480
# compute the spatial relevance of gene sets
# the 'weight_df' should have a format as the output of 'run.rwr.list'; here we use the terms with at least 100 positive cells
# the parameter 'n' defines how to split the spatial map. n = 10 means that 10 splits are made in each dimension (total 100 grids) for the kde process
spatial.klds <- compute.spatial.kld.df(spatial.coords = coords.df,
weight_df = cv.df[, gene.set.deviated.2],
n = 10)
# Then we want to nominate gene sets: here we want to find highly spatially related gene sets
top.spatial.terms <- spatial.klds[spatial.klds > quantile(spatial.klds, 0.99)]
top.spatial.terms
## GOBP_LAMELLIPODIUM_ASSEMBLY
## 1.385714e-06
## GOBP_NON_MOTILE_CILIUM_ASSEMBLY
## 1.590994e-06
## GOBP_POSITIVE_REGULATION_OF_LAMELLIPODIUM_ASSEMBLY
## 1.722241e-06
## GOBP_POSITIVE_REGULATION_OF_LAMELLIPODIUM_ORGANIZATION
## 1.531802e-06
## GOBP_REGULATION_OF_AMYLOID_PRECURSOR_PROTEIN_CATABOLIC_PROCESS
## 1.428714e-06
# visualize some of the terms
# add the label propagation probability to metadata
brain@meta.data$GOBP_POSITIVE_REGULATION_OF_LAMELLIPODIUM_ASSEMBLY <- cv.df[rownames(brain@meta.data),
"GOBP_POSITIVE_REGULATION_OF_LAMELLIPODIUM_ASSEMBLY"]
SpatialFeaturePlot(brain, features = c("GOBP_POSITIVE_REGULATION_OF_LAMELLIPODIUM_ASSEMBLY")) +
theme(legend.position = "top")

brain@meta.data$GOBP_NON_MOTILE_CILIUM_ASSEMBLY <- cv.df[rownames(brain@meta.data),
"GOBP_NON_MOTILE_CILIUM_ASSEMBLY"]
SpatialFeaturePlot(brain, features = c("GOBP_NON_MOTILE_CILIUM_ASSEMBLY")) +
theme(legend.position = "top")

brain@meta.data$GOBP_REGULATION_OF_AMYLOID_PRECURSOR_PROTEIN_CATABOLIC_PROCESS <- cv.df[rownames(brain@meta.data),
"GOBP_REGULATION_OF_AMYLOID_PRECURSOR_PROTEIN_CATABOLIC_PROCESS"]
SpatialFeaturePlot(brain, features = c("GOBP_REGULATION_OF_AMYLOID_PRECURSOR_PROTEIN_CATABOLIC_PROCESS")) +
theme(legend.position = "top")
