# Use res_df from memory (no need to read CSV)
all_genes_universe <- rownames (res_df)
N_universe <- length (all_genes_universe)
upregulated_genes <- res_df %>%
filter (category == "Upregulated" ) %>%
rownames ()
downregulated_genes <- res_df %>%
filter (category == "Downregulated" ) %>%
rownames ()
# Read the GAF file
gaf_file <- "data/TriTrypDB-64_TcruziG_GO.gaf"
gaf <- read.delim (gaf_file, header = FALSE , comment.char = "!" , sep = " \t " , quote = "" ,
col.names = c ("DB" , "DB_Object_ID" , "DB_Object_Symbol" , "Qualifier" ,
"GO_ID" , "DB_Reference" , "Evidence_Code" , "With_From" ,
"Aspect" , "DB_Object_Name" , "DB_Object_Synonym" ,
"DB_Object_Type" , "Taxon" , "Date" , "Assigned_By" ,
"Annotation_Extension" , "Gene_Product_Form_ID" ))
go_term_descriptions <- toTable (GOTERM) %>%
dplyr:: select (2 , 3 ) %>%
distinct ()
create_go_list <- function (go_df, desc_df, ontology_code, min_size = 5 ) {
go_filtered <- go_df %>%
filter (Aspect == ontology_code) %>%
dplyr:: select (gene = DB_Object_ID, go_id = GO_ID)
ls_go <- split (go_filtered$ gene, go_filtered$ go_id)
term_names <- desc_df$ Term[match (names (ls_go), desc_df$ go_id)]
names (ls_go) <- term_names
ls_go <- ls_go[! is.na (names (ls_go))]
ls_go <- ls_go[sapply (ls_go, function (x) length (unique (x)) >= min_size)]
return (ls_go)
}
t_cruzi_enrich <- function (ls_go, dge, universe_count) {
n_terms <- sapply (ls_go, function (x) length (unique (x)))
n_intersect <- sapply (ls_go, function (x) { length (intersect (x, dge)) })
n_dge <- length (dge)
intersect_genes <- sapply (ls_go, function (x) {
ids <- intersect (x, dge)
paste0 (ids, collapse = "/" )
})
gene_ratio_numeric <- n_intersect / n_dge
pvalue <- (1 - phyper (n_intersect - 1 , n_terms, universe_count - n_terms, n_dge))
padj <- p.adjust (pvalue, method = "BH" )
res <- data.frame (
description = names (ls_go),
pvalue = pvalue,
padj = padj,
gene_ratio = gene_ratio_numeric,
count = n_intersect,
n_dge = n_dge,
genes = intersect_genes
) %>%
arrange (padj) %>%
filter (count > 0 )
return (res)
}
create_ora_plot <- function (go_enrich_up, go_enrich_down, title) {
if (nrow (go_enrich_up) == 0 & nrow (go_enrich_down) == 0 ) {
message (paste ("No significant terms found for" , title))
return (NULL )
}
if (nrow (go_enrich_up) > 0 ) go_enrich_up <- go_enrich_up %>% mutate (direction = "Upregulated genes" )
if (nrow (go_enrich_down) > 0 ) go_enrich_down <- go_enrich_down %>% mutate (direction = "Downregulated genes" )
go_enrich <- bind_rows (go_enrich_up, go_enrich_down)
go_enrich <- go_enrich %>%
group_by (direction) %>%
slice_min (order_by = padj, n = 10 ) %>%
ungroup ()
go_enrich$ direction <- factor (go_enrich$ direction, levels = c ("Downregulated genes" , "Upregulated genes" ))
p <- ggplot (go_enrich, aes (x = gene_ratio,
y = fct_reorder (description, gene_ratio),
size = count,
col = padj)) +
geom_point () +
facet_wrap (~ direction, scales = "free_y" ) +
scale_color_gradient (low = "black" , high = "#56B1F7" , limits = c (0 , 0.05 ), guide = guide_colorbar (reverse = FALSE )) +
labs (x = "Gene ratio" ,
y = NULL ,
color = "Adjusted \n p-value" ,
size = "Intersection" ,
title = title) +
theme_bw () +
theme (
panel.background = element_rect (fill = "white" , color = "black" ),
panel.grid.major = element_line (linetype = "dotted" , color = "gray60" ),
panel.grid.minor = element_blank (),
strip.background = element_rect (fill = "gray90" , color = "black" , size = 1 ),
strip.text = element_text (color = "black" , face = "plain" , size = 10 ),
axis.text.y = element_text (size = 10 , color = "black" ),
axis.text.x = element_text (size = 9 , color = "black" ),
plot.title = element_text (hjust = 0.5 , face = "plain" ),
legend.position = "right" ,
legend.key = element_blank ()
)
return (p)
}
ls_go_bp <- create_go_list (gaf, go_term_descriptions, "P" , min_size = 5 )
ls_go_cc <- create_go_list (gaf, go_term_descriptions, "C" , min_size = 5 )
ls_go_mf <- create_go_list (gaf, go_term_descriptions, "F" , min_size = 5 )
bp_up <- t_cruzi_enrich (ls_go_bp, upregulated_genes, N_universe) %>% filter (padj <= 0.05 )
bp_down <- t_cruzi_enrich (ls_go_bp, downregulated_genes, N_universe) %>% filter (padj <= 0.05 )
cc_up <- t_cruzi_enrich (ls_go_cc, upregulated_genes, N_universe) %>% filter (padj <= 0.05 )
cc_down <- t_cruzi_enrich (ls_go_cc, downregulated_genes, N_universe) %>% filter (padj <= 0.05 )
mf_up <- t_cruzi_enrich (ls_go_mf, upregulated_genes, N_universe) %>% filter (padj <= 0.05 )
mf_down <- t_cruzi_enrich (ls_go_mf, downregulated_genes, N_universe) %>% filter (padj <= 0.05 )
bp_up |>
mutate (gene_group = "Upregulated" ) |>
bind_rows (bp_down |> mutate (gene_group = "Downregulated" )) |>
write.xlsx ("results/tables/Supplementary_Table_2_BP.xlsx" )
mf_up |>
mutate (gene_group = "Upregulated" ) |>
bind_rows (mf_down |> mutate (gene_group = "Downregulated" )) |>
write.xlsx ("results/tables/Supplementary_Table_3_MF.xlsx" )
p_bp <- create_ora_plot (bp_up, bp_down, title = "G strain vs Y strain - Biological Process" )
if (! is.null (p_bp)) ggsave ("results/figures/bp_enrichment_styled.svg" , p_bp, width = 12 , height = 7 )
p_cc <- create_ora_plot (cc_up, cc_down, title = "G strain vs Y strain - Cellular Component" )
if (! is.null (p_cc)) ggsave ("results/figures/cc_enrichment_styled.svg" , p_cc, width = 12 , height = 7 )
p_mf <- create_ora_plot (mf_up, mf_down, title = "G strain vs Y strain - Molecular Function" )
if (! is.null (p_mf)) ggsave ("results/figures/mf_enrichment_styled.svg" , p_mf, width = 12 , height = 7 )