In this section we will cover some essential R functions and packages that facilitate the creation and exploration of networks. We will look at both useful functions in base R as well as their tidverse equivalents:
library(tidyverse)
library(magrittr)
library(igraph)
data(starwars)
starwars
## # A tibble: 87 x 13
## name height mass hair_color skin_color eye_color birth_year gender
## <chr> <int> <dbl> <chr> <chr> <chr> <dbl> <chr>
## 1 Luke Sk… 172 77. blond fair blue 19.0 male
## 2 C-3PO 167 75. <NA> gold yellow 112. <NA>
## 3 R2-D2 96 32. <NA> white, bl… red 33.0 <NA>
## 4 Darth V… 202 136. none white yellow 41.9 male
## 5 Leia Or… 150 49. brown light brown 19.0 female
## 6 Owen La… 178 120. brown, gr… light blue 52.0 male
## 7 Beru Wh… 165 75. brown light blue 47.0 female
## 8 R5-D4 97 32. <NA> white, red red NA <NA>
## 9 Biggs D… 183 84. black light brown 24.0 male
## 10 Obi-Wan… 182 77. auburn, w… fair blue-gray 57.0 male
## # ... with 77 more rows, and 5 more variables: homeworld <chr>,
## # species <chr>, films <list>, vehicles <list>, starships <list>
starwars$films[[1]]
## [1] "Revenge of the Sith" "Return of the Jedi"
## [3] "The Empire Strikes Back" "A New Hope"
## [5] "The Force Awakens"
starwars %>% select(name, films)
## # A tibble: 87 x 2
## name films
## <chr> <list>
## 1 Luke Skywalker <chr [5]>
## 2 C-3PO <chr [6]>
## 3 R2-D2 <chr [7]>
## 4 Darth Vader <chr [4]>
## 5 Leia Organa <chr [5]>
## 6 Owen Lars <chr [3]>
## 7 Beru Whitesun lars <chr [3]>
## 8 R5-D4 <chr [1]>
## 9 Biggs Darklighter <chr [1]>
## 10 Obi-Wan Kenobi <chr [6]>
## # ... with 77 more rows
sw_character_films <- starwars %>%
select(name, films) %>%
unnest() %>%
rename(from = name,
to = films) %>%
mutate(type = 'APPEARED IN')
sw_character_films
## # A tibble: 173 x 3
## from to type
## <chr> <chr> <chr>
## 1 Luke Skywalker Revenge of the Sith APPEARED IN
## 2 Luke Skywalker Return of the Jedi APPEARED IN
## 3 Luke Skywalker The Empire Strikes Back APPEARED IN
## 4 Luke Skywalker A New Hope APPEARED IN
## 5 Luke Skywalker The Force Awakens APPEARED IN
## 6 C-3PO Attack of the Clones APPEARED IN
## 7 C-3PO The Phantom Menace APPEARED IN
## 8 C-3PO Revenge of the Sith APPEARED IN
## 9 C-3PO Return of the Jedi APPEARED IN
## 10 C-3PO The Empire Strikes Back APPEARED IN
## # ... with 163 more rows
sw_characters <- sw_character_films %>%
select(from) %>%
rename(name = from) %>%
distinct() %>%
mutate(type = 'CHARACTER')
sw_characters
## # A tibble: 87 x 2
## name type
## <chr> <chr>
## 1 Luke Skywalker CHARACTER
## 2 C-3PO CHARACTER
## 3 R2-D2 CHARACTER
## 4 Darth Vader CHARACTER
## 5 Leia Organa CHARACTER
## 6 Owen Lars CHARACTER
## 7 Beru Whitesun lars CHARACTER
## 8 R5-D4 CHARACTER
## 9 Biggs Darklighter CHARACTER
## 10 Obi-Wan Kenobi CHARACTER
## # ... with 77 more rows
sw_films <- sw_character_films %>%
select(to) %>%
rename(name = to) %>%
distinct() %>%
mutate(type = 'FILM')
sw_films
## # A tibble: 7 x 2
## name type
## <chr> <chr>
## 1 Revenge of the Sith FILM
## 2 Return of the Jedi FILM
## 3 The Empire Strikes Back FILM
## 4 A New Hope FILM
## 5 The Force Awakens FILM
## 6 Attack of the Clones FILM
## 7 The Phantom Menace FILM
g <- graph_from_data_frame(sw_character_films, F, rbind(sw_characters, sw_films))
plot(g,
vertex.size = 5,
vertex.color = sapply(V(g)$type,
function(x){switch(x, 'CHARACTER' = 'lightblue', 'FILM' = 'orangered')}),
vertex.label.cex = .65
)
head(starwars)
## # A tibble: 6 x 13
## name height mass hair_color skin_color eye_color birth_year gender
## <chr> <int> <dbl> <chr> <chr> <chr> <dbl> <chr>
## 1 Luke Sky… 172 77. blond fair blue 19.0 male
## 2 C-3PO 167 75. <NA> gold yellow 112. <NA>
## 3 R2-D2 96 32. <NA> white, bl… red 33.0 <NA>
## 4 Darth Va… 202 136. none white yellow 41.9 male
## 5 Leia Org… 150 49. brown light brown 19.0 female
## 6 Owen Lars 178 120. brown, gr… light blue 52.0 male
## # ... with 5 more variables: homeworld <chr>, species <chr>, films <list>,
## # vehicles <list>, starships <list>
sw_net_creator <- tibble(
from = 'name',
to = c('hair_color', 'skin_color', 'eye_color', 'gender', 'homeworld',
'species', 'films', 'vehicles', 'starships'),
type = c('HAS HAIR COLOR', 'HAS SKIN COLOR', 'HAS EYE COLOR', 'HAS GENDER', 'HAS HOMEWORLD',
'HAS SPECIES', 'APPEARS IN', 'PILOTS (VEHICLE)', 'PILOTS (STARSHIP)')
)
sw_el_creator <- function(from, to, type){
df <- starwars %>%
select(from, to) %>%
unnest() %>%
mutate(type = type) %>%
rename_("from" = from,
"to" = to)
}
sw_el <- pmap(sw_net_creator, sw_el_creator) %>%
bind_rows %>%
filter(!is.na(to),
to != "none")
sw_nl <- c(sw_net_creator$from,
sw_net_creator$to) %>%
unique %>%
lapply(function(x){
if(x == 'name'){
tibble(
name = starwars[[x]],
type = 'CHARACTER',
height = starwars[['height']],
mass = starwars[['mass']],
birth_year = starwars[['birth_year']]
)
} else {
tibble(
name = starwars[[x]] %>% unlist,
type = ifelse(str_detect(x, 'color'), 'COLOR', str_to_upper(x))
) %>%
filter(!is.na(name))
}
}) %>%
bind_rows() %>%
distinct() %>%
filter(name != 'none')
sw_g <- graph_from_data_frame(sw_el, T, sw_nl)
sw_g %>%
V(.) %>%
.[str_detect(name, 'Naboo')] %>%
{E(sw_g)[. %--% V(sw_g)]} %>%
.['Padmé Amidala' %--% V(sw_g)] %>%
.[type == 'PILOTS (STARSHIP)']
## + 2/676 edges from 3e45fae (vertex names):
## [1] Padmé Amidala->Naboo star skiff Padmé Amidala->Naboo fighter
sw_nodes <- as_data_frame(sw_g, 'vertices')
sw_edges <- as_data_frame(sw_g, 'edges')
head(sw_nodes)
## name type height mass birth_year
## Luke Skywalker Luke Skywalker CHARACTER 172 77 19.0
## C-3PO C-3PO CHARACTER 167 75 112.0
## R2-D2 R2-D2 CHARACTER 96 32 33.0
## Darth Vader Darth Vader CHARACTER 202 136 41.9
## Leia Organa Leia Organa CHARACTER 150 49 19.0
## Owen Lars Owen Lars CHARACTER 178 120 52.0
head(sw_edges)
## from to type
## 1 Luke Skywalker blond HAS HAIR COLOR
## 2 Leia Organa brown HAS HAIR COLOR
## 3 Owen Lars brown, grey HAS HAIR COLOR
## 4 Beru Whitesun lars brown HAS HAIR COLOR
## 5 Biggs Darklighter black HAS HAIR COLOR
## 6 Obi-Wan Kenobi auburn, white HAS HAIR COLOR
sw_nested_films <- sw_edges %>%
filter(type == 'APPEARS IN') %>%
select(from, to) %>%
rename(character = from) %>%
nest(character)
sw_nested_films
## # A tibble: 7 x 2
## to data
## <chr> <list>
## 1 Revenge of the Sith <data.frame [34 × 1]>
## 2 Return of the Jedi <data.frame [20 × 1]>
## 3 The Empire Strikes Back <data.frame [16 × 1]>
## 4 A New Hope <data.frame [18 × 1]>
## 5 The Force Awakens <data.frame [11 × 1]>
## 6 Attack of the Clones <data.frame [40 × 1]>
## 7 The Phantom Menace <data.frame [34 × 1]>
sw_chr_chr <- sw_edges %>%
filter(type == 'APPEARS IN') %>%
select(from, to) %>%
left_join(sw_nested_films) %>%
unnest()
## Joining, by = "to"
sw_chr_chr %>%
head
## from to character
## 1 Luke Skywalker Revenge of the Sith Luke Skywalker
## 2 Luke Skywalker Revenge of the Sith C-3PO
## 3 Luke Skywalker Revenge of the Sith R2-D2
## 4 Luke Skywalker Revenge of the Sith Darth Vader
## 5 Luke Skywalker Revenge of the Sith Leia Organa
## 6 Luke Skywalker Revenge of the Sith Owen Lars
sw_chr_chr %<>%
group_by(from, character) %>%
summarise(shared_movies = n())
sw_chr_chr %>%
head
## # A tibble: 6 x 3
## # Groups: from [1]
## from character shared_movies
## <chr> <chr> <int>
## 1 Ackbar Ackbar 2
## 2 Ackbar Arvel Crynyd 1
## 3 Ackbar BB8 1
## 4 Ackbar Bib Fortuna 1
## 5 Ackbar Boba Fett 1
## 6 Ackbar C-3PO 1
sw_chr_chr %>%
arrange(desc(shared_movies)) %>%
filter(from != character) %>%
head
## # A tibble: 6 x 3
## # Groups: from [3]
## from character shared_movies
## <chr> <chr> <int>
## 1 C-3PO Obi-Wan Kenobi 6
## 2 C-3PO R2-D2 6
## 3 Obi-Wan Kenobi C-3PO 6
## 4 Obi-Wan Kenobi R2-D2 6
## 5 R2-D2 C-3PO 6
## 6 R2-D2 Obi-Wan Kenobi 6
sw_chr_chr %>%
arrange(desc(shared_movies)) %>%
filter(from != character) %>%
head() %>%
apply(1, sort)
## [,1] [,2] [,3] [,4] [,5]
## [1,] "6" "6" "6" "6" "6"
## [2,] "C-3PO" "C-3PO" "C-3PO" "Obi-Wan Kenobi" "C-3PO"
## [3,] "Obi-Wan Kenobi" "R2-D2" "Obi-Wan Kenobi" "R2-D2" "R2-D2"
## [,6]
## [1,] "6"
## [2,] "Obi-Wan Kenobi"
## [3,] "R2-D2"
sw_chr_chr %>%
arrange(desc(shared_movies)) %>%
filter(from != character) %>%
select(from, character) %>%
head() %>%
apply(1, sort) %>%
t()
## [,1] [,2]
## [1,] "C-3PO" "Obi-Wan Kenobi"
## [2,] "C-3PO" "R2-D2"
## [3,] "C-3PO" "Obi-Wan Kenobi"
## [4,] "Obi-Wan Kenobi" "R2-D2"
## [5,] "C-3PO" "R2-D2"
## [6,] "Obi-Wan Kenobi" "R2-D2"
sw_unique_chr_chr <- sw_chr_chr %>%
select(from, character) %>%
filter(from != character) %>%
apply(1, sort) %>%
t() %>%
as.tibble() %>%
distinct() %>%
rename(from = V1, character = V2)
sw_unique_chr_chr %<>%
left_join(sw_chr_chr)
## Joining, by = c("from", "character")
sw_unique_chr_chr %>%
arrange(desc(shared_movies))
## # A tibble: 1,793 x 3
## from character shared_movies
## <chr> <chr> <int>
## 1 C-3PO Obi-Wan Kenobi 6
## 2 C-3PO R2-D2 6
## 3 Obi-Wan Kenobi R2-D2 6
## 4 C-3PO Palpatine 5
## 5 C-3PO Yoda 5
## 6 Chewbacca Leia Organa 5
## 7 Chewbacca Luke Skywalker 5
## 8 Chewbacca R2-D2 5
## 9 Leia Organa Luke Skywalker 5
## 10 Leia Organa R2-D2 5
## # ... with 1,783 more rows
sw_unique_chr_chr %<>%
rename(to = character) %>%
mutate(type = 'APPEARED WITH')
sw_edges <- list(sw_edges, sw_unique_chr_chr) %>%
bind_rows
sw_g <- graph_from_data_frame(sw_edges, F, sw_nodes)
plot(sw_g,
vertex.size = 5,
vertex.label.cex = .65)
just_char <- sw_g %>%
{. - E(.)[type != 'APPEARED WITH'] } %>%
{. - V(.)[type != 'CHARACTER']}
just_char
## IGRAPH af50f60 UN-B 87 1793 --
## + attr: name (v/c), type (v/c), height (v/n), mass (v/n),
## | birth_year (v/n), type (e/c), shared_movies (e/n)
## + edges from af50f60 (vertex names):
## [1] Ackbar --Arvel Crynyd
## [2] Ackbar --BB8
## [3] Ackbar --Bib Fortuna
## [4] Boba Fett --Ackbar
## [5] C-3PO --Ackbar
## [6] Ackbar --Captain Phasma
## [7] Chewbacca --Ackbar
## + ... omitted several edges
plot(just_char,
vertex.size = 5,
vertex.label.cex = .6)
just_char %>%
{. - E(.)[shared_movies < 4]} %>%
{. - V(.)[degree(.) == 0]} %>%
plot(
.,
vertex.size = 5,
vertex.label.cex = .7
)
V(sw_g)$height <- as.double(V(sw_g)$height)
write_graph(sw_g, '../Data/starwars_knowledge_graph.graphml', 'graphml')
vader_g <- sw_g - E(sw_g)[type == 'APPEARED WITH']
anakin_movies <- vader_g %>%
E() %>%
.[type == 'APPEARS IN'] %>%
(function(x){
vader <- x[V(vader_g)[str_detect(name, 'Vader')] %--% V(vader_g)] %>%
head_of(vader_g, .)
anakin <- x[V(vader_g)[str_detect(name, 'Anakin')] %--% V(vader_g)] %>%
head_of(vader_g, .)
anakin[!anakin %in% vader]
})
vader_g <- vader_g %>%
add_edges(
matrix(c(rep('Darth Vader', 2), anakin_movies$name),2, 2, T),
type = 'APPEARS IN'
)
get_indirect <- function(graph, interest, connector, weightType, edgeType){
el <- as_data_frame(graph, 'edges')
nl <- as_data_frame(graph, 'vertices')
main <- el %>%
filter(type == connector) %>%
select(from, to)
other <- main %>%
rename_("changed1" = interest) %>%
nest(changed1)
main %<>%
rename_('changed2' = interest) %>%
left_join(other) %>%
unnest() %>%
group_by(changed1, changed2) %>%
summarise(weight = n())
unique_main <- main %>%
select(changed1, changed2) %>%
apply(1, sort) %>%
t() %>%
as.tibble() %>%
distinct() %>%
rename(changed1 = V1,
changed2 = V2) %>%
filter(changed1 != changed2) %>%
left_join(main) %>%
mutate(type = edgeType)
names(unique_main) <- c('from', 'to', weightType, 'type')
el <- list(el, unique_main) %>%
bind_rows
graph_from_data_frame(el, F, nl)
}
vader_g <- get_indirect(vader_g, interest= 'from', connector = 'APPEARS IN', weightType = 'shared_movies', edgeType = 'APPEARED WITH')
## Joining, by = "to"
## Joining, by = c("changed1", "changed2")
vader_g %>%
{. - E(.)[type != 'APPEARED WITH']} %>%
{. - V(.)[type != 'CHARACTER']} %>%
{. - E(.)[shared_movies < 4 ]} %>%
{. - V(.)[degree(.) == 0]} %>%
plot(
.,
vertex.size = 5,
vertex.label.cex = .7
)
library(igraph)
library(magrittr)
g <- make_empty_graph(directed = T) +
vertices(c('a', 'b', 'c', 'd', 'e'),
type = "letter",
order = c(1, 2, 3, 4, 5)) +
edges(c('a', 'b',
'b', 'c',
'b', 'd',
'c', 'b',
'c', 'e'),
type = "connection",
weight = c(1, 1, 2, 1, 2))
V(g)
## + 5/5 vertices, named, from cf818ff:
## [1] a b c d e
can also be written as
g %>% V(.)
## + 5/5 vertices, named, from cf818ff:
## [1] a b c d e
The . dictates where the left hand side goes. This . behaves just like the object on the left hand side would.
g %>%
V(.) %>%
.$type
## [1] "letter" "letter" "letter" "letter" "letter"
The right hand side utilize the left hand side in mulitple places
g %>%
V(.) %>%
.$type %>%
paste(., 1:length(.), sep = '_')
## [1] "letter_1" "letter_2" "letter_3" "letter_4" "letter_5"
The right hand side can even be an anonymous function
g %>%
V(.) %>%
.$type %>%
paste(., 1:length(.), sep = '_') %>%
(function(x){
assign('savingStep', x, .GlobalEnv)
x %>%
gsub(1, 'first', .) %>%
gsub(length(.), 'last', .)
})
## [1] "letter_first" "letter_2" "letter_3" "letter_4"
## [5] "letter_last"
The beautiful thing about the anonymous function is that it allows us to save the current state of an object to reference later without preventing us from moving forward with our manipulation on it.
paste(V(g))
## [1] "1" "2" "3" "4" "5"