Get Related Genes

João Cavalcante

03 July, 2023

Extract Autophagy and Mitophagy related genes

library(readr)
library(dplyr)
library(purrr)
library(biomaRt)
library(magrittr)
library(KEGGREST)

get_go_genes <- function(go_term) {
  ensembl <- useEnsembl(biomart = "ensembl", dataset = 'hsapiens_gene_ensembl')

  getBM(
    attributes = c(
      'external_gene_name',
      'go_id',
      'name_1006'
    ),
    filters = 'go',
    values = go_term,
    mart = ensembl
  )
}

Get GO genes

GO terms:

  • autophagy - GO:0006914
  • autophagy of mitocondrion - GO:0000422
  • regulation of autophagy - GO:0010506
  • negative regulation of autophagy - GO:0010507
  • positive regulation of autophagy - GO:0000422
  • mitochondrion organization - GO:0007005
  • mitochondrial fusion - GO:0008053
  • mitochondrial fission - GO:0000266
  • mitochondrion morphogenesis - GO:0070584
  • mitochondrial inner membrane - GO:0005743
  • mitochondrial outer membrane - GO:0005741
  • outer mitochondrial membrane organization - GO:0007008
    • Since human genes were only available for child term “protein insertion into mitochondrial outer membrane” (GO:0045040), we selected from that term instead.
  • inner mitochondrial membrane organization - GO:0007007
  • mitochondrial outer membrane fusion - GO:1990626
  • mitochondrial inner membrane fusion - GO:1990627
go_terms <- c(
  autophagy = "GO:0006914",
  autophagy_of_mitocondrion = "GO:0000422",
  regulation_of_autophagy = "GO:0010506",
  negative_regulation_of_autophagy = "GO:0010507",
  positive_regulation_of_autophagy = "GO:0000422",
  mitochondrion_organization = "GO:0007005",
  mitochondrial_fusion = "GO:0008053",
  mitochondrial_fission = "GO:0000266",
  mitochondrion_morphogenesis = "GO:0070584",
  mitochondrial_inner_membrane = "GO:0005743",
  mitochondrial_outer_membrane = "GO:0005741",
  protein_insertion_into_mitochondrial_outer_membrane = "GO:0045040",
  inner_mitochondrial_membrane_organization = "GO:0007007",
  mitochondrial_outer_membrane_fusion = "GO:1990626",
  mitochondrial_inner_membrane_fusion = "GO:1990627"
  
)

go_selected <- discard(map(go_terms, get_go_genes), ~nrow(.) == 0)

go_renamed <- go_selected %>%
  bind_rows() %>% 
  filter(go_id %in% go_terms) %>%
  filter(external_gene_name != "") %>%
  mutate(database = "GO") %>%
  rename(gene_symbol = external_gene_name,
         annotation_id = go_id,
         annotation_name = name_1006)

Get KEGG genes

KEGG Pathways / BRITE:

  • Mitochondrial Biogenesis - br:hsa03029
  • Autophagy - path:hsa04140
  • Mitophagy - path:hsa04137

We start by downloading gene to pathway mappings from the KEGG API.

download.file(
  "https://ftp.ncbi.nlm.nih.gov/gene/DATA/GENE_INFO/Mammalia/Homo_sapiens.gene_info.gz",
  here::here("data/Homo_sapiens.gene_info.gz")
)

download.file(
  "http://rest.kegg.jp/link/pathway/hsa",
  here::here("data/link_pathway_entrez_hsa.tsv")
)

download.file(
  "http://rest.kegg.jp/link/brite/hsa",
  here::here("data/link_brite_entrez_hsa.tsv")
)

Then, we get the intersection between the datasets.

pathways <- tibble::tribble(
  ~pathway_id,      ~pathway_name,
  "br:hsa03029",  "Mitochondrial Biogenesis",
  "path:hsa04140",  "Autophagy",
  "path:hsa04137",  "Mitophagy"
)

link_pathway_entrez_hsa <- read_tsv(
  here::here("data/link_pathway_entrez_hsa.tsv"),
  col_names = c("entrez_id", "pathway_id"),
  col_types = "cc"
) %>% tidyr::separate(
  entrez_id,
  into = c("species", "entrez_id"),
  sep = ":"
)

link_brite_entrez_hsa <- read_tsv(
  here::here("data/link_brite_entrez_hsa.tsv"),
  col_names = c("entrez_id", "pathway_id"),
  col_types = "cc"
) %>% tidyr::separate(
  entrez_id,
  into = c("species", "entrez_id"),
  sep = ":"
)

# Filtering for pathways/brite of interest
gene_pathways_hsa <- inner_join(link_pathway_entrez_hsa, pathways)
gene_brite_hsa <- inner_join(link_brite_entrez_hsa, pathways)

genes_kegg <- rbind(gene_pathways_hsa, gene_brite_hsa)

Finally, we extract the gene symbols.

entrez_names_hsa <- read_tsv(
  here::here("data/Homo_sapiens.gene_info.gz"),
  skip = 1,
  col_names = c("entrez_id", "gene_symbol"),
  col_types = cols_only("-", "c", "c")
)

genes_kegg_hsa <- genes_kegg %>%
  inner_join(pathways) %>%
  inner_join(entrez_names_hsa) %>%
  dplyr::select(-c(species, entrez_id)) %>%
  rename(annotation_id = pathway_id,
         annotation_name = pathway_name) %>%
  mutate(database = "KEGG")

Merging and writing final dataset

merged <- bind_rows(go_renamed, genes_kegg_hsa)

merged_collapsed <- merged %>%
  group_by(gene_symbol) %>%
  summarise(
    annotation_ids = paste0(unique(annotation_id), collapse="|"),
    annotation_names = paste0(unique(annotation_name), collapse="|"),
    databases = paste0(unique(database), collapse="|"),
    n_annotation = length(unique(annotation_id))
  )

write_csv(merged, here::here("results/selected_genes.csv"))
write_csv(merged_collapsed, here::here("results/selected_genes_collapsed.csv"))