Introduction

In this section we will cover some essential R functions and packages that facilitate the creation and exploration of networks. We will look at both useful functions in base R as well as their tidverse equivalents:

  1. Iteration:
    • apply
    • *apply and map_*
    • mapply and map2_*
  2. string:
    • stringr
      • str_detect
      • str_replace
    • grep/grepl/gsub
  3. tidyverse
    • un/nest()
    • _join
    • filter
  4. magrittr
  5. miscellaneous
    • t()

Starwars Network

library(tidyverse)
library(magrittr)
library(igraph)
data(starwars)
starwars
## # A tibble: 87 x 13
##    name     height  mass hair_color skin_color eye_color birth_year gender
##    <chr>     <int> <dbl> <chr>      <chr>      <chr>          <dbl> <chr> 
##  1 Luke Sk…    172   77. blond      fair       blue            19.0 male  
##  2 C-3PO       167   75. <NA>       gold       yellow         112.  <NA>  
##  3 R2-D2        96   32. <NA>       white, bl… red             33.0 <NA>  
##  4 Darth V…    202  136. none       white      yellow          41.9 male  
##  5 Leia Or…    150   49. brown      light      brown           19.0 female
##  6 Owen La…    178  120. brown, gr… light      blue            52.0 male  
##  7 Beru Wh…    165   75. brown      light      blue            47.0 female
##  8 R5-D4        97   32. <NA>       white, red red             NA   <NA>  
##  9 Biggs D…    183   84. black      light      brown           24.0 male  
## 10 Obi-Wan…    182   77. auburn, w… fair       blue-gray       57.0 male  
## # ... with 77 more rows, and 5 more variables: homeworld <chr>,
## #   species <chr>, films <list>, vehicles <list>, starships <list>
starwars$films[[1]]
## [1] "Revenge of the Sith"     "Return of the Jedi"     
## [3] "The Empire Strikes Back" "A New Hope"             
## [5] "The Force Awakens"
starwars %>% select(name, films)
## # A tibble: 87 x 2
##    name               films    
##    <chr>              <list>   
##  1 Luke Skywalker     <chr [5]>
##  2 C-3PO              <chr [6]>
##  3 R2-D2              <chr [7]>
##  4 Darth Vader        <chr [4]>
##  5 Leia Organa        <chr [5]>
##  6 Owen Lars          <chr [3]>
##  7 Beru Whitesun lars <chr [3]>
##  8 R5-D4              <chr [1]>
##  9 Biggs Darklighter  <chr [1]>
## 10 Obi-Wan Kenobi     <chr [6]>
## # ... with 77 more rows
sw_character_films <- starwars %>%
  select(name, films) %>%
  unnest() %>%
  rename(from = name,
         to = films) %>%
  mutate(type = 'APPEARED IN')

sw_character_films
## # A tibble: 173 x 3
##    from           to                      type       
##    <chr>          <chr>                   <chr>      
##  1 Luke Skywalker Revenge of the Sith     APPEARED IN
##  2 Luke Skywalker Return of the Jedi      APPEARED IN
##  3 Luke Skywalker The Empire Strikes Back APPEARED IN
##  4 Luke Skywalker A New Hope              APPEARED IN
##  5 Luke Skywalker The Force Awakens       APPEARED IN
##  6 C-3PO          Attack of the Clones    APPEARED IN
##  7 C-3PO          The Phantom Menace      APPEARED IN
##  8 C-3PO          Revenge of the Sith     APPEARED IN
##  9 C-3PO          Return of the Jedi      APPEARED IN
## 10 C-3PO          The Empire Strikes Back APPEARED IN
## # ... with 163 more rows
sw_characters <- sw_character_films %>%
  select(from) %>%
  rename(name = from) %>%
  distinct() %>%
  mutate(type = 'CHARACTER')

sw_characters
## # A tibble: 87 x 2
##    name               type     
##    <chr>              <chr>    
##  1 Luke Skywalker     CHARACTER
##  2 C-3PO              CHARACTER
##  3 R2-D2              CHARACTER
##  4 Darth Vader        CHARACTER
##  5 Leia Organa        CHARACTER
##  6 Owen Lars          CHARACTER
##  7 Beru Whitesun lars CHARACTER
##  8 R5-D4              CHARACTER
##  9 Biggs Darklighter  CHARACTER
## 10 Obi-Wan Kenobi     CHARACTER
## # ... with 77 more rows
sw_films <- sw_character_films %>%
  select(to) %>%
  rename(name = to) %>%
  distinct() %>%
  mutate(type = 'FILM') 

sw_films
## # A tibble: 7 x 2
##   name                    type 
##   <chr>                   <chr>
## 1 Revenge of the Sith     FILM 
## 2 Return of the Jedi      FILM 
## 3 The Empire Strikes Back FILM 
## 4 A New Hope              FILM 
## 5 The Force Awakens       FILM 
## 6 Attack of the Clones    FILM 
## 7 The Phantom Menace      FILM
g <- graph_from_data_frame(sw_character_films, F, rbind(sw_characters, sw_films))
plot(g,
     vertex.size = 5,
     vertex.color = sapply(V(g)$type,
                           function(x){switch(x, 'CHARACTER' = 'lightblue', 'FILM' = 'orangered')}),
     vertex.label.cex = .65
     )

generalizing the network creation process

head(starwars)
## # A tibble: 6 x 13
##   name      height  mass hair_color skin_color eye_color birth_year gender
##   <chr>      <int> <dbl> <chr>      <chr>      <chr>          <dbl> <chr> 
## 1 Luke Sky…    172   77. blond      fair       blue            19.0 male  
## 2 C-3PO        167   75. <NA>       gold       yellow         112.  <NA>  
## 3 R2-D2         96   32. <NA>       white, bl… red             33.0 <NA>  
## 4 Darth Va…    202  136. none       white      yellow          41.9 male  
## 5 Leia Org…    150   49. brown      light      brown           19.0 female
## 6 Owen Lars    178  120. brown, gr… light      blue            52.0 male  
## # ... with 5 more variables: homeworld <chr>, species <chr>, films <list>,
## #   vehicles <list>, starships <list>
sw_net_creator <- tibble(
  from = 'name',
  to = c('hair_color', 'skin_color', 'eye_color', 'gender', 'homeworld', 
         'species', 'films', 'vehicles', 'starships'),
  type = c('HAS HAIR COLOR', 'HAS SKIN COLOR', 'HAS EYE COLOR', 'HAS GENDER', 'HAS HOMEWORLD',
           'HAS SPECIES', 'APPEARS IN', 'PILOTS (VEHICLE)', 'PILOTS (STARSHIP)')
)
sw_el_creator <- function(from, to, type){
  df <- starwars %>%
    select(from, to) %>%
    unnest() %>%
    mutate(type = type) %>%
    rename_("from" = from,
           "to" = to)
}
sw_el <- pmap(sw_net_creator, sw_el_creator) %>%
  bind_rows %>%
  filter(!is.na(to),
         to != "none")
sw_nl <- c(sw_net_creator$from,
           sw_net_creator$to) %>%
  unique %>% 
  lapply(function(x){
    if(x == 'name'){
      tibble(
        name = starwars[[x]],
        type = 'CHARACTER',
        height = starwars[['height']],
        mass = starwars[['mass']],
        birth_year = starwars[['birth_year']]
        ) 
      } else {
        tibble(
          name = starwars[[x]] %>% unlist,
          type = ifelse(str_detect(x, 'color'), 'COLOR', str_to_upper(x))
        ) %>%
          filter(!is.na(name))
      }
    }) %>%
  bind_rows() %>%
  distinct() %>%
  filter(name != 'none')
sw_g <- graph_from_data_frame(sw_el, T, sw_nl)
sw_g %>%
  V(.) %>%
  .[str_detect(name, 'Naboo')] %>%
  {E(sw_g)[. %--% V(sw_g)]} %>%
  .['Padmé Amidala' %--% V(sw_g)] %>%
  .[type == 'PILOTS (STARSHIP)']
## + 2/676 edges from 3e45fae (vertex names):
## [1] Padmé Amidala->Naboo star skiff Padmé Amidala->Naboo fighter

making indirect connections

sw_nodes <- as_data_frame(sw_g, 'vertices')
sw_edges <- as_data_frame(sw_g, 'edges')
head(sw_nodes)
##                          name      type height mass birth_year
## Luke Skywalker Luke Skywalker CHARACTER    172   77       19.0
## C-3PO                   C-3PO CHARACTER    167   75      112.0
## R2-D2                   R2-D2 CHARACTER     96   32       33.0
## Darth Vader       Darth Vader CHARACTER    202  136       41.9
## Leia Organa       Leia Organa CHARACTER    150   49       19.0
## Owen Lars           Owen Lars CHARACTER    178  120       52.0
head(sw_edges)
##                 from            to           type
## 1     Luke Skywalker         blond HAS HAIR COLOR
## 2        Leia Organa         brown HAS HAIR COLOR
## 3          Owen Lars   brown, grey HAS HAIR COLOR
## 4 Beru Whitesun lars         brown HAS HAIR COLOR
## 5  Biggs Darklighter         black HAS HAIR COLOR
## 6     Obi-Wan Kenobi auburn, white HAS HAIR COLOR
sw_nested_films <- sw_edges %>%
  filter(type == 'APPEARS IN') %>%
  select(from, to) %>%
  rename(character = from) %>%
  nest(character)

sw_nested_films
## # A tibble: 7 x 2
##   to                      data                 
##   <chr>                   <list>               
## 1 Revenge of the Sith     <data.frame [34 × 1]>
## 2 Return of the Jedi      <data.frame [20 × 1]>
## 3 The Empire Strikes Back <data.frame [16 × 1]>
## 4 A New Hope              <data.frame [18 × 1]>
## 5 The Force Awakens       <data.frame [11 × 1]>
## 6 Attack of the Clones    <data.frame [40 × 1]>
## 7 The Phantom Menace      <data.frame [34 × 1]>
sw_chr_chr <- sw_edges %>%
  filter(type == 'APPEARS IN') %>%
  select(from, to) %>%
  left_join(sw_nested_films) %>%
  unnest()
## Joining, by = "to"
sw_chr_chr %>%
  head
##             from                  to      character
## 1 Luke Skywalker Revenge of the Sith Luke Skywalker
## 2 Luke Skywalker Revenge of the Sith          C-3PO
## 3 Luke Skywalker Revenge of the Sith          R2-D2
## 4 Luke Skywalker Revenge of the Sith    Darth Vader
## 5 Luke Skywalker Revenge of the Sith    Leia Organa
## 6 Luke Skywalker Revenge of the Sith      Owen Lars
sw_chr_chr %<>% 
  group_by(from, character) %>%
  summarise(shared_movies = n())

sw_chr_chr %>% 
  head
## # A tibble: 6 x 3
## # Groups:   from [1]
##   from   character    shared_movies
##   <chr>  <chr>                <int>
## 1 Ackbar Ackbar                   2
## 2 Ackbar Arvel Crynyd             1
## 3 Ackbar BB8                      1
## 4 Ackbar Bib Fortuna              1
## 5 Ackbar Boba Fett                1
## 6 Ackbar C-3PO                    1
sw_chr_chr %>% 
  arrange(desc(shared_movies)) %>%
  filter(from != character) %>%
  head
## # A tibble: 6 x 3
## # Groups:   from [3]
##   from           character      shared_movies
##   <chr>          <chr>                  <int>
## 1 C-3PO          Obi-Wan Kenobi             6
## 2 C-3PO          R2-D2                      6
## 3 Obi-Wan Kenobi C-3PO                      6
## 4 Obi-Wan Kenobi R2-D2                      6
## 5 R2-D2          C-3PO                      6
## 6 R2-D2          Obi-Wan Kenobi             6
sw_chr_chr %>% 
  arrange(desc(shared_movies)) %>%
  filter(from != character) %>%
  head() %>%
  apply(1, sort)
##      [,1]             [,2]    [,3]             [,4]             [,5]   
## [1,] "6"              "6"     "6"              "6"              "6"    
## [2,] "C-3PO"          "C-3PO" "C-3PO"          "Obi-Wan Kenobi" "C-3PO"
## [3,] "Obi-Wan Kenobi" "R2-D2" "Obi-Wan Kenobi" "R2-D2"          "R2-D2"
##      [,6]            
## [1,] "6"             
## [2,] "Obi-Wan Kenobi"
## [3,] "R2-D2"
sw_chr_chr %>% 
  arrange(desc(shared_movies)) %>%
  filter(from != character) %>%
  select(from, character) %>%
  head() %>%
  apply(1, sort) %>%
  t()
##      [,1]             [,2]            
## [1,] "C-3PO"          "Obi-Wan Kenobi"
## [2,] "C-3PO"          "R2-D2"         
## [3,] "C-3PO"          "Obi-Wan Kenobi"
## [4,] "Obi-Wan Kenobi" "R2-D2"         
## [5,] "C-3PO"          "R2-D2"         
## [6,] "Obi-Wan Kenobi" "R2-D2"
sw_unique_chr_chr <- sw_chr_chr %>%
  select(from, character) %>%
  filter(from != character) %>%
  apply(1, sort) %>%
  t() %>%
  as.tibble() %>%
  distinct() %>%
  rename(from = V1, character = V2)
sw_unique_chr_chr %<>%
  left_join(sw_chr_chr)
## Joining, by = c("from", "character")
sw_unique_chr_chr %>%
  arrange(desc(shared_movies))
## # A tibble: 1,793 x 3
##    from           character      shared_movies
##    <chr>          <chr>                  <int>
##  1 C-3PO          Obi-Wan Kenobi             6
##  2 C-3PO          R2-D2                      6
##  3 Obi-Wan Kenobi R2-D2                      6
##  4 C-3PO          Palpatine                  5
##  5 C-3PO          Yoda                       5
##  6 Chewbacca      Leia Organa                5
##  7 Chewbacca      Luke Skywalker             5
##  8 Chewbacca      R2-D2                      5
##  9 Leia Organa    Luke Skywalker             5
## 10 Leia Organa    R2-D2                      5
## # ... with 1,783 more rows
sw_unique_chr_chr %<>%
  rename(to = character) %>%
  mutate(type = 'APPEARED WITH')
sw_edges <- list(sw_edges, sw_unique_chr_chr) %>%
  bind_rows
sw_g <- graph_from_data_frame(sw_edges, F, sw_nodes)
plot(sw_g,
     vertex.size = 5,
     vertex.label.cex = .65)

just_char <- sw_g  %>% 
  {. - E(.)[type != 'APPEARED WITH'] } %>%
  {. - V(.)[type != 'CHARACTER']}
just_char
## IGRAPH af50f60 UN-B 87 1793 -- 
## + attr: name (v/c), type (v/c), height (v/n), mass (v/n),
## | birth_year (v/n), type (e/c), shared_movies (e/n)
## + edges from af50f60 (vertex names):
##  [1] Ackbar               --Arvel Crynyd  
##  [2] Ackbar               --BB8           
##  [3] Ackbar               --Bib Fortuna   
##  [4] Boba Fett            --Ackbar        
##  [5] C-3PO                --Ackbar        
##  [6] Ackbar               --Captain Phasma
##  [7] Chewbacca            --Ackbar        
## + ... omitted several edges
plot(just_char,
     vertex.size = 5, 
     vertex.label.cex = .6)

just_char %>%
  {. - E(.)[shared_movies < 4]} %>%
  {. - V(.)[degree(.) == 0]} %>% 
  plot(
    .,
    vertex.size = 5,
    vertex.label.cex = .7
  )

V(sw_g)$height <- as.double(V(sw_g)$height)
write_graph(sw_g, '../Data/starwars_knowledge_graph.graphml', 'graphml')

Bonus for after next section

vader_g <- sw_g - E(sw_g)[type == 'APPEARED WITH']

anakin_movies <- vader_g %>%
  E() %>%
  .[type == 'APPEARS IN'] %>%
  (function(x){
    vader <- x[V(vader_g)[str_detect(name, 'Vader')] %--% V(vader_g)] %>%
      head_of(vader_g, .)
    anakin <- x[V(vader_g)[str_detect(name, 'Anakin')] %--% V(vader_g)] %>%
      head_of(vader_g, .)
    anakin[!anakin %in% vader]
  })

vader_g <- vader_g %>% 
  add_edges(
   matrix(c(rep('Darth Vader', 2), anakin_movies$name),2, 2, T),
   type = 'APPEARS IN' 
   )
get_indirect <- function(graph, interest, connector, weightType, edgeType){
  el <- as_data_frame(graph, 'edges')
  nl <- as_data_frame(graph, 'vertices')
  
  main <- el %>%
    filter(type == connector) %>%
    select(from, to)
  
  other <- main %>%
    rename_("changed1" = interest) %>%
    nest(changed1)
  
  main %<>%
    rename_('changed2' = interest) %>%
    left_join(other) %>%
    unnest() %>%
    group_by(changed1, changed2) %>%
    summarise(weight = n()) 
  
  unique_main <- main %>%
    select(changed1, changed2) %>%
    apply(1, sort) %>%
    t() %>%
    as.tibble() %>%
    distinct() %>%
    rename(changed1 = V1,
           changed2 = V2) %>%
    filter(changed1 != changed2) %>%
    left_join(main) %>%
    mutate(type = edgeType)
  
  names(unique_main) <- c('from', 'to', weightType, 'type')
  el <- list(el, unique_main) %>%
    bind_rows
  graph_from_data_frame(el, F, nl)
}

vader_g <- get_indirect(vader_g, interest= 'from', connector = 'APPEARS IN', weightType = 'shared_movies', edgeType = 'APPEARED WITH')
## Joining, by = "to"
## Joining, by = c("changed1", "changed2")
vader_g %>%
 {. - E(.)[type != 'APPEARED WITH']} %>%
 {. - V(.)[type != 'CHARACTER']} %>%
 {. - E(.)[shared_movies < 4 ]} %>%
 {. - V(.)[degree(.) == 0]} %>%
  plot(
    ., 
    vertex.size = 5,
    vertex.label.cex = .7
  )

library(igraph)
library(magrittr)

Quick Review of Magrittr:

g <- make_empty_graph(directed = T) +
  vertices(c('a', 'b', 'c', 'd', 'e'),
           type = "letter",
           order = c(1, 2, 3, 4, 5)) +
  edges(c('a', 'b',
          'b', 'c',
          'b', 'd',
          'c', 'b',
          'c', 'e'),
        type = "connection",
        weight = c(1, 1, 2, 1, 2))
V(g)
## + 5/5 vertices, named, from cf818ff:
## [1] a b c d e

can also be written as

g %>% V(.)
## + 5/5 vertices, named, from cf818ff:
## [1] a b c d e

The . dictates where the left hand side goes. This . behaves just like the object on the left hand side would.

g %>% 
  V(.) %>% 
  .$type
## [1] "letter" "letter" "letter" "letter" "letter"

The right hand side utilize the left hand side in mulitple places

g %>% 
  V(.) %>% 
  .$type %>% 
  paste(., 1:length(.), sep = '_')
## [1] "letter_1" "letter_2" "letter_3" "letter_4" "letter_5"

The right hand side can even be an anonymous function

g %>% 
  V(.) %>% 
  .$type %>%
  paste(., 1:length(.), sep = '_') %>% 
  (function(x){
    assign('savingStep', x, .GlobalEnv)
    
    x %>%
      gsub(1, 'first', .) %>%
      gsub(length(.), 'last', .)
  })
## [1] "letter_first" "letter_2"     "letter_3"     "letter_4"    
## [5] "letter_last"

The beautiful thing about the anonymous function is that it allows us to save the current state of an object to reference later without preventing us from moving forward with our manipulation on it.

paste(V(g))
## [1] "1" "2" "3" "4" "5"