loading required packages:
load("/sibcb2/bioinformatics2/zhangzhiqiang/CancerMHBs/Fig1/MHB/MHB_correlation/RnBeads_450K_hg19_Probes_GR.RData")
RnBeads_450K_hg19_GR = RnBeads_450K_hg19_GR[RnBeads_450K_hg19_GR$QC]
Tx = fread(file = "/sibcb1/bioinformatics/dataupload/cancermhaps/Fig6/Aging_CpG/GSE87571_tableS1_rev.csv.gz") %>%
dplyr::rename(P.value = 4)
T1 = Tx[(Tx$P.value < 10^-7) & (Tx$beta > 0), ]
T2 = Tx[(Tx$P.value < 10^-7) & (Tx$beta < 0), ]
uGR = RnBeads_450K_hg19_GR[names(RnBeads_450K_hg19_GR) %in% as.character(T1$IlmnID)]
dGR = RnBeads_450K_hg19_GR[names(RnBeads_450K_hg19_GR) %in% as.character(T2$IlmnID)]
# MHB
Tx = fread(file = "/sibcb1/bioinformatics/dataupload/cancermhaps/Fig1/MHB/MHB_11_cancers_clusters.bed.gz")
mhb = Tx %>% toGRanges()
mhb = mhb[countOverlaps(mhb, RnBeads_450K_hg19_GR, type = "any", ignore.strand = TRUE) > 0]
## Pos
mhb_aging = mhb[countOverlaps(mhb, uGR, type = "any", ignore.strand = TRUE) > 0]
## Neg
mhb_aging_neg = mhb[countOverlaps(mhb, dGR, type = "any", ignore.strand = TRUE) > 0]
# PLOT
myCol <- brewer.pal(3, "Set1")
HM450K_MHB = paste(mhb[1:length(mhb)],sep=":")
MHB_Aging = c(paste(mhb_aging[1:length(mhb_aging)],sep=":"),
paste(mhb_aging_neg[1:length(mhb_aging_neg)],sep=":"))
x <-list("HM450K_MHB" = HM450K_MHB,"MHB_Aging" = MHB_Aging)
p<-ggvenn(x, fill_color = myCol,
stroke_size = 0.5,
set_name_size = 4)
print(p)
# Aging-associated MHB positive/negative
data = data.frame(Tag=rep("Aging_Associated_MHBs",2),
Type = c("Pos","Neg"),Num=c(length(mhb_aging),length(mhb_aging_neg)))
p<-ggplot(data,aes(x=Tag,y=Num,fill=Type)) +
geom_bar(stat="identity",position="fill") +
scale_fill_manual(values=c("#377EB8","#E41A1C")) +
scale_y_continuous(expand=c(0,0),breaks=c(0,0.25,0.5,0.75,1),
labels=100*c(0,0.25,0.5,0.75,1))+
labs(x="",y="Percent (%)") +
theme_bw()+
theme(panel.background = element_blank(),
panel.grid = element_blank(),
plot.title = element_text(hjust=0.5))+
guides(fill=guide_legend(title=NULL))
print(p)
load("/sibcb2/bioinformatics2/zhangzhiqiang/CancerMHBs/Fig1/MHB/MHB_correlation/RnBeads_450K_hg19_Probes_GR.RData")
load("/sibcb2/bioinformatics2/zhangzhiqiang/CancerMHBs/Fig6/Aging_CpG/TCGA_450K_sampleT.RData")
mean0 = function(z) mean(z, na.rm = TRUE)
RnBeads_450K_hg19_GR = RnBeads_450K_hg19_GR[RnBeads_450K_hg19_GR$QC]
# Load the mhb with positive aging
load("/sibcb1/bioinformatics/dataupload/cancermhaps/Fig6/Aging_CpG/mhb_aging_pos.RData")
x = findOverlaps(RnBeads_450K_hg19_GR, mhb_aging, type = "any", ignore.strand = TRUE)
cg = names(RnBeads_450K_hg19_GR[queryHits(x)])
id = as.character(mhb_aging)[subjectHits(x)]
# select the TCGA Cancer types Num(Normal) >10
cancer_type = c()
for (i in names(TCGA_450K_sampleT)){
mx <- nrow(TCGA_450K_sampleT[[i]] %>% filter(grepl("N",TypeID)))
if ( mx >= 10 ){
cancer_type = c(cancer_type,i)
}
}
# running all TCGA cancer types
path = "/sibcb1/bioinformatics/dataupload/cancermhaps/Fig1/TCGA_DNAm"
kTypes = cancer_type
Ns = length(kTypes)
for(i in 1:Ns){
Tag = kTypes[i]
# cat("Running", Tag, "\n")
load(paste0(path, "/", Tag, ".RData"))
MM = get(Tag)[cg, ]
Tx = aggregate(MM, by = list(factor(id)), FUN="mean0")
res = as.matrix(Tx[, -1])
sampleT = TCGA_450K_sampleT[[Tag]][colnames(res), ]
sampleT$median = apply(res, 2, function(z) median(z, na.rm = TRUE))
if(i == 1)
mergedT = sampleT
else
mergedT = rbind(mergedT, sampleT)
rm(list = Tag)
}
status = rep("Cancer", nrow(mergedT))
status[mergedT$TypeID == "NT"] = "Normal"
mergedT$status = status
summaryT = aggregate(mergedT$median[mergedT$status == "Cancer"],
by = list(mergedT$cancer[mergedT$status == "Cancer"]),FUN='median')
orderName = as.character(summaryT[order(-summaryT[,2]), 1])
mergedT$cancer = factor(as.character(mergedT$cancer), levels = orderName)
# PLOT
p <- ggplot(mergedT, aes(x = cancer, y = median, color = status)) +
theme_bw() + labs(x = "", y = "MHB-level methylation", title = "") + ylim(0, 1) +
geom_boxplot(width = 0.6, outlier.shape=NA) +
theme(plot.title = element_text(hjust = 0.5),
panel.grid.major = element_blank(), panel.grid.minor = element_blank(),
axis.text.x = element_text(angle = 45, hjust = 1)) +
scale_colour_manual(values = c("Cancer" = "#E41A1C", "Normal" = "#377EB8"))
print(p)
load("/sibcb2/bioinformatics2/zhangzhiqiang/CancerMHBs/Fig1/MHB/MHB_correlation/RnBeads_450K_hg19_Probes_GR.RData")
RnBeads_450K_hg19_GR = RnBeads_450K_hg19_GR[RnBeads_450K_hg19_GR$HumanMethylation450 & RnBeads_450K_hg19_GR$QC]
mean0 = function(z) mean(z, na.rm = TRUE)
# load MHB aging-pos
load("/sibcb1/bioinformatics/dataupload/cancermhaps/Fig6/Aging_CpG/mhb_aging_pos.RData")
# Data processing
Me_Aging_450K <- function(x) {
a = row.names(x)
b = names(RnBeads_450K_hg19_GR)
cab = intersect(a,b)
RnBeads_450K_hg19_GR = RnBeads_450K_hg19_GR[cab,]
# Aging + MHB
index = findOverlaps(RnBeads_450K_hg19_GR, mhb_aging, type = "any", ignore.strand = TRUE)
cg = names(RnBeads_450K_hg19_GR[queryHits(index)])
id = as.character(mhb_aging)[subjectHits(index)]
# Methyl
MM = x[cg, ]
Tx = aggregate(MM, by = list(factor(id)), FUN="mean0")
res = as.matrix(Tx[, -1])
return(res)
}
Me_Aging_RRBS <- function(x) {
region = GRanges(x[c(1:3)])
names(region) = rownames(x)
##### Aging+MHB
index = findOverlaps(region, mhb_aging, type = "any", ignore.strand = TRUE)
cg = names(region[queryHits(index)])
id = as.character(mhb_aging)[subjectHits(index)]
######Methyl
MM = x[cg,-c(1:3)]
Tx = aggregate(MM, by = list(factor(id)), FUN="mean0")
res = as.matrix(Tx[, -1])
return(res)
}
# plot function
plot<-function(data){
p<-ggplot(data,aes(x=Groups,y=res,colour=Groups)) +
geom_boxplot(width = 0.6, outlier.shape=NA) +
geom_jitter(aes(colour = Groups), position = position_jitter(w = 0.1, h = 0.05), size = 1, alpha = 0.4) +
labs(x = "", y = "MHB-level methylation", title = "") + ylim(0, 1) +
theme_bw() +
theme(plot.title = element_text(hjust = 0.5),
panel.grid.major = element_blank(), panel.grid.minor = element_blank(),
axis.text.x = element_text(angle = 45, hjust = 1),legend.position = "none") +
scale_colour_brewer(palette = "Set1") +
facet_wrap(~set)
return(p)
}
# load GSE69993 Breast cancer
load("/sibcb1/bioinformatics/dataupload/cancermhaps/Fig6/data/GSE69993.RData")
GSE69993_Methyl = GSE69993
GSE69993_info = GSE69993_info
# Breast cancer: normal and Tumor
names(GSE69993_info) = gsub("[: ]","_",names(GSE69993_info))
GSE69993_info$tissue_ch1[GSE69993_info$tissue_ch1 == "normal breast epithelial organoid"] <- "Normal"
GSE69993_info$tissue_ch1[GSE69993_info$tissue_ch1 == "breast ductal carcinoma in situ"] <- "DCIS"
# Aging_Methyl
GSE69993_AM = Me_Aging_RRBS(GSE69993_Methyl)
GSE69993_AM = GSE69993_AM[,rownames(GSE69993_info)]
data = GSE69993_info["tissue_ch1"];names(data) = "Groups"
data$Groups = factor(data$Groups,level=c("Normal","DCIS"))
data$res = apply(GSE69993_AM,2,function(x) median(x,na.rm=TRUE))
data$set = "GSE69993"
# rm
rm(list=c("GSE69993_Methyl","GSE69993_info","GSE69993_AM"))
# PLOT
p<-plot(data)
print(p)
# load Dataset: Breast Cancer (GSE66313)
load("/sibcb1/bioinformatics/dataupload/cancermhaps/Fig6/data/GSE66313.RData")
GSE66313_Methyl = MeM
GSE66313_info = meta
names(GSE66313_info) = gsub("[: ]","_",names(GSE66313_info))
GSE66313_info$tissue_ch1=gsub("Adjacent-","",GSE66313_info$tissue_ch1)
# Aging_Methyl
GSE66313_AM = Me_Aging_450K(GSE66313_Methyl)
GSE66313_AM = GSE66313_AM[,rownames(GSE66313_info)]
data = GSE66313_info["tissue_ch1"];names(data) = "Groups"
data$Groups = factor(data$Groups,level=c("Normal","DCIS"))
data$res = apply(GSE66313_AM,2,function(x) median(x,na.rm=TRUE))
data$set = "GSE66313"
# rm
rm(list=c("GSE66313_Methyl","GSE66313_info","GSE66313_AM","MeM","meta"))
# PLOT
p<-plot(data)
print(p)
# load Dataset: Colorectal cancer (GSE48684)
load("/sibcb1/bioinformatics/dataupload/cancermhaps/Fig6/data/GSE48684.RData")
GSE48684_Methyl = MeM
GSE48684_info = meta
## normal-H ==> normal-C ==> adenoma ==> cancer
## normal_H normal_C adenoma cancer
## 17 24 42 64
names(GSE48684_info) = gsub("[: ]","_",names(GSE48684_info))
GSE48684_info$disease_status_ch1=gsub("-","_",GSE48684_info$disease_status_ch1)
## Aging_Methyl
GSE48684_AM = Me_Aging_450K(GSE48684_Methyl)
GSE48684_AM = GSE48684_AM[,rownames(GSE48684_info)]
data_GSE48684 = GSE48684_info["disease_status_ch1"];names(data_GSE48684) = "Groups"
data_GSE48684$Groups = factor(data_GSE48684$Groups,level=c("normal_H","normal_C","adenoma","cancer"))
data_GSE48684$res = apply(GSE48684_AM,2,function(x) median(x,na.rm=TRUE))
data_GSE48684$set = "GSE48684"
# rm
rm(list=c("GSE48684_Methyl","GSE48684_info","GSE48684_AM","MeM","meta"))
# PLOT
p<-plot(data_GSE48684)
print(p)
# load Dataset: pancreatic cancer (GSE53051)
load("/sibcb1/bioinformatics/dataupload/cancermhaps/Fig6/data/GSE53051.RData")
GSE53051_Methyl = MeM
GSE53051_info = meta
names(GSE53051_info) = gsub("[: ]","_",names(GSE53051_info))
GSE53051_info$Groups=paste(GSE53051_info$tissue_ch1,GSE53051_info$phenotype_ch1,GSE53051_info$state_ch1,sep="_")
# Aging_Methyl
GSE53051_AM = Me_Aging_450K(GSE53051_Methyl)
GSE53051_AM = GSE53051_AM[,rownames(GSE53051_info)]
data_GSE53051 = GSE53051_info["Groups"]
#data_GSE48684$Groups = factor(data_GSE48$Groups,level=c("normal_H","normal_C","adenoma","cancer"))
data_GSE53051$res = apply(GSE53051_AM,2,function(x) median(x,na.rm=TRUE))
data_GSE53051$set = "GSE53051"
# filter pancreatic cancer
data_GSE53051_P = data_GSE53051[grep("pancreas",data_GSE53051$Groups),]
data_GSE53051_P$Groups = gsub("pancreas_cancer_cancer","PACA",data_GSE53051_P$Groups)
data_GSE53051_P$Groups = gsub("pancreas_normal_normal","Normal",data_GSE53051_P$Groups)
data_GSE53051_P$Groups = gsub("pancreas_adenoma_cancer","Adenoma",data_GSE53051_P$Groups)
data_GSE53051_P$Groups = factor(data_GSE53051_P$Groups,level=c("Normal","Adenoma","PACA"))
# PLOT
p<-plot(data_GSE53051_P)
print(p)
# load Dataset: Liver cancer (GSE99036)
load("/sibcb1/bioinformatics/dataupload/cancermhaps/Fig6/data/GSE99036.RData")
GSE99036_Methyl = MeM
GSE99036_info = meta
## disease stage : liver cirrhosis (LC,n=6), dysplastic nodules (DN,n=11),
## early HCC (eHCC, n=9), and progressed HCC (HCC,n=6)
names(GSE99036_info) = gsub("[: ]","_",names(GSE99036_info))
GSE99036_info$disease_stage_ch1=gsub("-","_",GSE99036_info$disease_stage_ch1)
# Aging_Methyl
GSE99036_AM = Me_Aging_450K(GSE99036_Methyl)
GSE99036_AM = GSE99036_AM[,rownames(GSE99036_info)]
data = GSE99036_info["disease_stage_ch1"];names(data) = "Groups"
data$Groups = factor(data$Groups,level=c("LC","DN_high","eHCC","HCC"))
data$res = apply(GSE99036_AM,2,function(x) median(x,na.rm=TRUE))
data$set = "GSE99036"
# rm
rm(list=c("GSE99036_Methyl","GSE99036_info","GSE99036_AM","MeM","meta"))
# PLOT
p <-plot(data)
print(p)
# load Dataset: Gastric Cancer (GSE103186)
load("/sibcb1/bioinformatics/dataupload/cancermhaps/Fig6/data/GSE103186.RData")
GSE103186_Methyl = MeM
GSE103186_info = meta
## intestinal_metaplasia_biopsy_from_gastric_antrum(76),intestinal_metaplasia_biopsy_from_gastric_body(23)
## intestinal_metaplasia_biopsy_from_gastric_cardia (9),mild_intestinal_metaplasia_biopsy_from_gastric_antrum (22)
## normal_biopsy_from_gastric_antrum (39),normal_biopsy_from_gastric_body(11),normal_biopsy_from_gastric_cardia(11)
names(GSE103186_info) = gsub("[: ]","_",names(GSE103186_info))
GSE103186_info$tissue_ch1=gsub(" ","_",GSE103186_info$tissue_ch1)
GSE103186_info$Type[grepl("^intestinal_metaplasia_biopsy",GSE103186_info$tissue_ch1)]<-"IM"
GSE103186_info$Type[grepl("^mild_intestinal_metaplasia_biopsy",GSE103186_info$tissue_ch1)]<-"DIM"
GSE103186_info$Type[grepl("^normal_biopsy",GSE103186_info$tissue_ch1)]<-"NT"
# Aging_Methyl
GSE103186_AM = Me_Aging_450K(GSE103186_Methyl)
GSE103186_AM = GSE103186_AM[,rownames(GSE103186_info)]
data = GSE103186_info["Type"];names(data) = "Groups"
data$Groups = factor(data$Groups,level=c("NT","DIM","IM"))
data$res = apply(GSE103186_AM,2,function(x) median(x,na.rm=TRUE))
data$set = "GSE103186"
# rm
rm(list=c("GSE103186_Methyl","GSE103186_info","GSE103186_AM","MeM","meta"))
# PLOT
p <-plot(data)
print(p)
# load Dataset: Lung Cancer (GSE108123)
load("/sibcb1/bioinformatics/dataupload/cancermhaps/Fig6/data/GSE108123.RData")
GSE108123_Methyl = MeM
GSE108123_info = meta
## Lung carcinoma-in-situ (CIS) lesions are the pre-malignant precursor to lung squamous cell carcinoma
## Control (33) ===> Regressive (38) ====> Progressive (18)
names(GSE108123_info) = gsub("[: ]","_",names(GSE108123_info))
# Aging_Methyl
GSE108123_AM = Me_Aging_450K(GSE108123_Methyl)
GSE108123_AM = GSE108123_AM[,rownames(GSE108123_info)]
data = GSE108123_info["lesion_outcome_ch1"];names(data) = "Groups"
data$Groups = factor(data$Groups,level=c("Control","Regressive","Progressive"))
data$res = apply(GSE108123_AM,2,function(x) median(x,na.rm=TRUE))
data$set = "GSE108123"
# rm
rm(list=c("GSE108123_Methyl","GSE108123_info","GSE108123_AM","MeM","meta"))
# PLOT
p <-plot(data)
print(p)