SlideShare a Scribd company logo
1
Agenda
extract unique rows
rename columns
sample data
extract columns
slice rows
arrange rows
compare tables
extract/mutate data using predicate functions
count observations for different levels of a variable
•
•
•
•
•
•
•
•
•
2
Libraries
library(dplyr)
library(readr)
3
Data
## # A tibble: 1,000 x 7
## referrer device bouncers n_visit n_pages duration purchase
## <fct> <fct> <lgl> <dbl> <dbl> <dbl> <lgl>
## 1 google laptop TRUE 10 1 693 FALSE
## 2 yahoo tablet TRUE 9 1 459 FALSE
## 3 direct laptop TRUE 0 1 996 FALSE
## 4 bing tablet FALSE 3 18 468 TRUE
## 5 yahoo mobile TRUE 9 1 955 FALSE
## 6 yahoo laptop FALSE 5 5 135 FALSE
## 7 yahoo mobile TRUE 10 1 75 FALSE
## 8 direct mobile TRUE 10 1 908 FALSE
## 9 bing mobile FALSE 3 19 209 FALSE
## 10 google mobile TRUE 6 1 208 FALSE
## # ... with 990 more rows
4
Data Dictionary
referrer: referrer website/search engine
device: device used to visit the website
bouncers: whether a visit bounced (exited from landing page)
duration: time spent on the website (in seconds)
purchase: whether visitor purchased
n_visit: number of visits
n_pages: number of pages visited/browsed
•
•
•
•
•
•
•
5
6
Traffic Sources
distinct(ecom, referrer)
## # A tibble: 5 x 1
## referrer
## <fct>
## 1 google
## 2 yahoo
## 3 direct
## 4 bing
## 5 social
7
Device Types
distinct(ecom, device)
## # A tibble: 3 x 1
## device
## <fct>
## 1 laptop
## 2 tablet
## 3 mobile
8
9
Rename Columns
rename(ecom, time_on_site = duration)
## # A tibble: 1,000 x 7
## referrer device bouncers n_visit n_pages time_on_site purchase
## <fct> <fct> <lgl> <dbl> <dbl> <dbl> <lgl>
## 1 google laptop TRUE 10 1 693 FALSE
## 2 yahoo tablet TRUE 9 1 459 FALSE
## 3 direct laptop TRUE 0 1 996 FALSE
## 4 bing tablet FALSE 3 18 468 TRUE
## 5 yahoo mobile TRUE 9 1 955 FALSE
## 6 yahoo laptop FALSE 5 5 135 FALSE
## 7 yahoo mobile TRUE 10 1 75 FALSE
## 8 direct mobile TRUE 10 1 908 FALSE
## 9 bing mobile FALSE 3 19 209 FALSE
## 10 google mobile TRUE 6 1 208 FALSE
## # ... with 990 more rows
10
11
Sampling Data
sample_n(ecom, size = 700)
## # A tibble: 700 x 7
## referrer device bouncers n_visit n_pages duration purchase
## <fct> <fct> <lgl> <dbl> <dbl> <dbl> <lgl>
## 1 bing laptop FALSE 0 1 21 FALSE
## 2 social tablet FALSE 10 13 247 FALSE
## 3 yahoo tablet FALSE 5 8 80 FALSE
## 4 social tablet TRUE 3 1 111 FALSE
## 5 google mobile FALSE 4 20 420 FALSE
## 6 google laptop TRUE 1 1 958 FALSE
## 7 yahoo tablet TRUE 9 1 459 FALSE
## 8 yahoo mobile FALSE 5 5 150 FALSE
## 9 google mobile FALSE 5 3 66 FALSE
## 10 google tablet TRUE 6 1 551 FALSE
## # ... with 690 more rows
12
Sampling Data
sample_frac(ecom, size = 0.7)
## # A tibble: 700 x 7
## referrer device bouncers n_visit n_pages duration purchase
## <fct> <fct> <lgl> <dbl> <dbl> <dbl> <lgl>
## 1 google laptop FALSE 4 9 117 FALSE
## 2 social mobile FALSE 1 16 336 TRUE
## 3 google tablet FALSE 10 8 200 FALSE
## 4 bing tablet FALSE 5 18 216 FALSE
## 5 social tablet FALSE 9 15 405 FALSE
## 6 direct mobile TRUE 1 1 180 FALSE
## 7 social mobile FALSE 10 14 350 TRUE
## 8 social tablet TRUE 4 1 392 FALSE
## 9 bing mobile FALSE 6 18 342 TRUE
## 10 yahoo tablet TRUE 5 1 10 FALSE
## # ... with 690 more rows
13
14
Sample Data
ecom_mini <- sample_n(ecom, size = 10)
15
Extract Device Column
pull(ecom_mini, device)
## [1] tablet tablet laptop tablet laptop mobile laptop laptop mobile m
## Levels: laptop tablet mobile
16
Extract First Column
pull(ecom_mini, 1)
## [1] direct social direct bing bing social bing direct google s
## Levels: bing direct social yahoo google
17
Extract Last Column
pull(ecom_mini, -1)
## [1] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
18
19
Extract 10 rows starting from 15
slice(ecom, 5:14)
## # A tibble: 10 x 7
## referrer device bouncers n_visit n_pages duration purchase
## <fct> <fct> <lgl> <dbl> <dbl> <dbl> <lgl>
## 1 yahoo mobile TRUE 9 1 955 FALSE
## 2 yahoo laptop FALSE 5 5 135 FALSE
## 3 yahoo mobile TRUE 10 1 75 FALSE
## 4 direct mobile TRUE 10 1 908 FALSE
## 5 bing mobile FALSE 3 19 209 FALSE
## 6 google mobile TRUE 6 1 208 FALSE
## 7 direct laptop TRUE 9 1 738 FALSE
## 8 direct tablet FALSE 6 12 132 FALSE
## 9 direct mobile FALSE 9 14 406 TRUE
## 10 yahoo tablet FALSE 5 8 80 FALSE
20
Extract Last Row
slice(ecom, n())
## # A tibble: 1 x 7
## referrer device bouncers n_visit n_pages duration purchase
## <fct> <fct> <lgl> <dbl> <dbl> <dbl> <lgl>
## 1 google mobile TRUE 9 1 269 FALSE
21
22
Tabulate Referrers
ecom %>%
group_by(referrer) %>%
tally()
## # A tibble: 5 x 2
## referrer n
## <fct> <int>
## 1 bing 194
## 2 direct 191
## 3 social 200
## 4 yahoo 207
## 5 google 208
23
Tabulate Referrers and Bouncers
ecom %>%
group_by(referrer, bouncers) %>%
tally()
## # A tibble: 10 x 3
## # Groups: referrer [5]
## referrer bouncers n
## <fct> <lgl> <int>
## 1 bing FALSE 104
## 2 bing TRUE 90
## 3 direct FALSE 98
## 4 direct TRUE 93
## 5 social FALSE 93
## 6 social TRUE 107
## 7 yahoo FALSE 110
## 8 yahoo TRUE 97
## 9 google FALSE 101
## 10 google TRUE 107
24
Tabulate Referrers and Purchasers
ecom %>%
group_by(referrer, purchase) %>%
tally()
## # A tibble: 10 x 3
## # Groups: referrer [5]
## referrer purchase n
## <fct> <lgl> <int>
## 1 bing FALSE 177
## 2 bing TRUE 17
## 3 direct FALSE 166
## 4 direct TRUE 25
## 5 social FALSE 180
## 6 social TRUE 20
## 7 yahoo FALSE 185
## 8 yahoo TRUE 22
## 9 google FALSE 189
## 10 google TRUE 19
25
Tabulate Referrers & Converts
ecom %>%
group_by(referrer, purchase) %>%
tally() %>%
filter(purchase)
## # A tibble: 5 x 3
## # Groups: referrer [5]
## referrer purchase n
## <fct> <lgl> <int>
## 1 bing TRUE 17
## 2 direct TRUE 25
## 3 social TRUE 20
## 4 yahoo TRUE 22
## 5 google TRUE 19
26
Count
count(ecom, referrer, purchase)
## # A tibble: 10 x 3
## referrer purchase n
## <fct> <lgl> <int>
## 1 bing FALSE 177
## 2 bing TRUE 17
## 3 direct FALSE 166
## 4 direct TRUE 25
## 5 social FALSE 180
## 6 social TRUE 20
## 7 yahoo FALSE 185
## 8 yahoo TRUE 22
## 9 google FALSE 189
## 10 google TRUE 19
27
28
Top 2 Referrers by Orders
ecom %>%
count(referrer, purchase) %>%
filter(purchase) %>%
arrange(desc(n)) %>%
top_n(n = 2)
## Selecting by n
## # A tibble: 2 x 3
## referrer purchase n
## <fct> <lgl> <int>
## 1 direct TRUE 25
## 2 yahoo TRUE 22
29
Between
ecom_sample <- sample_n(ecom, 30)
ecom_sample %>%
pull(n_pages) %>%
between(5, 15)
## [1] TRUE FALSE FALSE FALSE FALSE TRUE FALSE TRUE FALSE FALSE TRU
## [12] TRUE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE TRUE FALS
## [23] TRUE TRUE FALSE FALSE FALSE FALSE FALSE FALSE
30
Case When
ecom %>%
mutate(
repeat_visit = case_when(
n_visit > 0 ~ TRUE,
TRUE ~ FALSE
)
) %>%
select(n_visit, repeat_visit)
## # A tibble: 1,000 x 2
## n_visit repeat_visit
## <dbl> <lgl>
## 1 10 TRUE
## 2 9 TRUE
## 3 0 FALSE
## 4 3 TRUE
## 5 9 TRUE
## 6 5 TRUE
## 7 10 TRUE
## 8 10 TRUE
## 9 3 TRUE
## 10 6 TRUE
## # ... with 990 more rows
31
Select First Observation
ecom %>%
pull(referrer) %>%
nth(1)
## [1] google
## Levels: bing direct social yahoo google
ecom %>%
pull(referrer) %>%
first()
## [1] google
## Levels: bing direct social yahoo google
32
Select 1000th Observation
ecom %>%
pull(referrer) %>%
nth(1000)
## [1] google
## Levels: bing direct social yahoo google
33
Select Last Observation
ecom %>%
pull(referrer) %>%
last()
## [1] google
## Levels: bing direct social yahoo google
34
35

More Related Content

What's hot (15)

PDF
M12 random forest-part01
Raman Kannan
 
PDF
M11 bagging loo cv
Raman Kannan
 
PDF
Easy HTML Tables in RStudio with Tabyl and kableExtra
Barry DeCicco
 
PDF
M09-Cross validating-naive-bayes
Raman Kannan
 
PDF
Adventures on live partitioning
Matteo Melli
 
PDF
Computer practicals(part) Class 12
अयशकांत मिश्र
 
RTF
Seistech SQL code
Simon Hoyle
 
PDF
PLOTCON NYC: Behind Every Great Plot There's a Great Deal of Wrangling
Plotly
 
TXT
Bouncingballs sh
Ben Pope
 
PPTX
Data mining in Apriori and FP-tree
Subhash Rohit
 
PPTX
Python data structures
Harry Potter
 
PDF
Unlocking Museum Systems with Open Source
Richard Barrett-Small
 
PDF
Prediction
Ban Bang
 
PDF
JavaOne2010 Groovy/Spring Roo
Yasuharu Nakano
 
TXT
Quick reference for hql
Rajkumar Asohan, PMP
 
M12 random forest-part01
Raman Kannan
 
M11 bagging loo cv
Raman Kannan
 
Easy HTML Tables in RStudio with Tabyl and kableExtra
Barry DeCicco
 
M09-Cross validating-naive-bayes
Raman Kannan
 
Adventures on live partitioning
Matteo Melli
 
Computer practicals(part) Class 12
अयशकांत मिश्र
 
Seistech SQL code
Simon Hoyle
 
PLOTCON NYC: Behind Every Great Plot There's a Great Deal of Wrangling
Plotly
 
Bouncingballs sh
Ben Pope
 
Data mining in Apriori and FP-tree
Subhash Rohit
 
Python data structures
Harry Potter
 
Unlocking Museum Systems with Open Source
Richard Barrett-Small
 
Prediction
Ban Bang
 
JavaOne2010 Groovy/Spring Roo
Yasuharu Nakano
 
Quick reference for hql
Rajkumar Asohan, PMP
 

Similar to Explore Data using dplyr (13)

PDF
Data Mining Open Ap Is
oscon2007
 
ODP
Beyond PHP - It's not (just) about the code
Wim Godden
 
PDF
Survey and Measure Development in R
George Mount
 
PDF
Bartoz Goralewicz - Advanced Search Summit Napa 2019
Digital Marketers Organization
 
PDF
Duplicates everywhere (Berlin)
Alexey Grigorev
 
ODP
Beyond php - it's not (just) about the code
Wim Godden
 
PDF
Making Table Views Great Again
Lea Marolt Sonnenschein
 
PPTX
4.Data-Visualization.pptx
PratyushJain37
 
DOCX
Ece175 computer programming for engineering applications homework assignment ...
Song Love
 
PDF
Duplicates everywhere (Kiev)
Alexey Grigorev
 
PDF
Dplyr and Plyr
Paul Richards
 
ODP
Beyond php it's not (just) about the code
Wim Godden
 
ODP
Beyond php - it's not (just) about the code
Wim Godden
 
Data Mining Open Ap Is
oscon2007
 
Beyond PHP - It's not (just) about the code
Wim Godden
 
Survey and Measure Development in R
George Mount
 
Bartoz Goralewicz - Advanced Search Summit Napa 2019
Digital Marketers Organization
 
Duplicates everywhere (Berlin)
Alexey Grigorev
 
Beyond php - it's not (just) about the code
Wim Godden
 
Making Table Views Great Again
Lea Marolt Sonnenschein
 
4.Data-Visualization.pptx
PratyushJain37
 
Ece175 computer programming for engineering applications homework assignment ...
Song Love
 
Duplicates everywhere (Kiev)
Alexey Grigorev
 
Dplyr and Plyr
Paul Richards
 
Beyond php it's not (just) about the code
Wim Godden
 
Beyond php - it's not (just) about the code
Wim Godden
 
Ad

More from Rsquared Academy (20)

PDF
Handling Date & Time in R
Rsquared Academy
 
PDF
Joining Data with dplyr
Rsquared Academy
 
PDF
Variables & Data Types in R
Rsquared Academy
 
PDF
How to install & update R packages?
Rsquared Academy
 
PDF
How to get help in R?
Rsquared Academy
 
PDF
Introduction to R
Rsquared Academy
 
PDF
RMySQL Tutorial For Beginners
Rsquared Academy
 
PDF
R Markdown Tutorial For Beginners
Rsquared Academy
 
PDF
R Data Visualization Tutorial: Bar Plots
Rsquared Academy
 
PDF
R Programming: Introduction to Matrices
Rsquared Academy
 
PDF
R Programming: Introduction to Vectors
Rsquared Academy
 
PPTX
R Programming: Variables & Data Types
Rsquared Academy
 
PDF
Data Visualization With R: Learn To Combine Multiple Graphs
Rsquared Academy
 
PDF
R Data Visualization: Learn To Add Text Annotations To Plots
Rsquared Academy
 
PDF
Data Visualization With R: Learn To Modify Font Of Graphical Parameters
Rsquared Academy
 
PDF
Data Visualization With R: Learn To Modify Color Of Plots
Rsquared Academy
 
PDF
Data Visualization With R: Learn To Modify Title, Axis Labels & Range
Rsquared Academy
 
PDF
Data Visualization With R: Introduction
Rsquared Academy
 
PDF
Data Visualization With R
Rsquared Academy
 
PDF
R Programming: Mathematical Functions In R
Rsquared Academy
 
Handling Date & Time in R
Rsquared Academy
 
Joining Data with dplyr
Rsquared Academy
 
Variables & Data Types in R
Rsquared Academy
 
How to install & update R packages?
Rsquared Academy
 
How to get help in R?
Rsquared Academy
 
Introduction to R
Rsquared Academy
 
RMySQL Tutorial For Beginners
Rsquared Academy
 
R Markdown Tutorial For Beginners
Rsquared Academy
 
R Data Visualization Tutorial: Bar Plots
Rsquared Academy
 
R Programming: Introduction to Matrices
Rsquared Academy
 
R Programming: Introduction to Vectors
Rsquared Academy
 
R Programming: Variables & Data Types
Rsquared Academy
 
Data Visualization With R: Learn To Combine Multiple Graphs
Rsquared Academy
 
R Data Visualization: Learn To Add Text Annotations To Plots
Rsquared Academy
 
Data Visualization With R: Learn To Modify Font Of Graphical Parameters
Rsquared Academy
 
Data Visualization With R: Learn To Modify Color Of Plots
Rsquared Academy
 
Data Visualization With R: Learn To Modify Title, Axis Labels & Range
Rsquared Academy
 
Data Visualization With R: Introduction
Rsquared Academy
 
Data Visualization With R
Rsquared Academy
 
R Programming: Mathematical Functions In R
Rsquared Academy
 
Ad

Recently uploaded (20)

PDF
R Cookbook - Processing and Manipulating Geological spatial data with R.pdf
OtnielSimopiaref2
 
PDF
apidays Helsinki & North 2025 - API-Powered Journeys: Mobility in an API-Driv...
apidays
 
PPTX
Climate Action.pptx action plan for climate
justfortalabat
 
PDF
How to Connect Your On-Premises Site to AWS Using Site-to-Site VPN.pdf
Tamanna
 
PPTX
apidays Helsinki & North 2025 - API access control strategies beyond JWT bear...
apidays
 
PPTX
The _Operations_on_Functions_Addition subtruction Multiplication and Division...
mdregaspi24
 
PDF
Building Production-Ready AI Agents with LangGraph.pdf
Tamanna
 
PDF
apidays Helsinki & North 2025 - REST in Peace? Hunting the Dominant Design fo...
apidays
 
PDF
Data Chunking Strategies for RAG in 2025.pdf
Tamanna
 
PDF
OPPOTUS - Malaysias on Malaysia 1Q2025.pdf
Oppotus
 
PPTX
Resmed Rady Landis May 4th - analytics.pptx
Adrian Limanto
 
PDF
What does good look like - CRAP Brighton 8 July 2025
Jan Kierzyk
 
PDF
Product Management in HealthTech (Case Studies from SnappDoctor)
Hamed Shams
 
PPT
Performance Review for Security and Commodity.ppt
chatwithnitin
 
PDF
AUDITABILITY & COMPLIANCE OF AI SYSTEMS IN HEALTHCARE
GAHI Youssef
 
PPTX
apidays Singapore 2025 - Designing for Change, Julie Schiller (Google)
apidays
 
PPT
deep dive data management sharepoint apps.ppt
novaprofk
 
PDF
apidays Helsinki & North 2025 - Monetizing AI APIs: The New API Economy, Alla...
apidays
 
PPTX
apidays Helsinki & North 2025 - From Chaos to Clarity: Designing (AI-Ready) A...
apidays
 
PDF
apidays Helsinki & North 2025 - APIs in the healthcare sector: hospitals inte...
apidays
 
R Cookbook - Processing and Manipulating Geological spatial data with R.pdf
OtnielSimopiaref2
 
apidays Helsinki & North 2025 - API-Powered Journeys: Mobility in an API-Driv...
apidays
 
Climate Action.pptx action plan for climate
justfortalabat
 
How to Connect Your On-Premises Site to AWS Using Site-to-Site VPN.pdf
Tamanna
 
apidays Helsinki & North 2025 - API access control strategies beyond JWT bear...
apidays
 
The _Operations_on_Functions_Addition subtruction Multiplication and Division...
mdregaspi24
 
Building Production-Ready AI Agents with LangGraph.pdf
Tamanna
 
apidays Helsinki & North 2025 - REST in Peace? Hunting the Dominant Design fo...
apidays
 
Data Chunking Strategies for RAG in 2025.pdf
Tamanna
 
OPPOTUS - Malaysias on Malaysia 1Q2025.pdf
Oppotus
 
Resmed Rady Landis May 4th - analytics.pptx
Adrian Limanto
 
What does good look like - CRAP Brighton 8 July 2025
Jan Kierzyk
 
Product Management in HealthTech (Case Studies from SnappDoctor)
Hamed Shams
 
Performance Review for Security and Commodity.ppt
chatwithnitin
 
AUDITABILITY & COMPLIANCE OF AI SYSTEMS IN HEALTHCARE
GAHI Youssef
 
apidays Singapore 2025 - Designing for Change, Julie Schiller (Google)
apidays
 
deep dive data management sharepoint apps.ppt
novaprofk
 
apidays Helsinki & North 2025 - Monetizing AI APIs: The New API Economy, Alla...
apidays
 
apidays Helsinki & North 2025 - From Chaos to Clarity: Designing (AI-Ready) A...
apidays
 
apidays Helsinki & North 2025 - APIs in the healthcare sector: hospitals inte...
apidays
 

Explore Data using dplyr

  • 1. 1
  • 2. Agenda extract unique rows rename columns sample data extract columns slice rows arrange rows compare tables extract/mutate data using predicate functions count observations for different levels of a variable • • • • • • • • • 2
  • 4. Data ## # A tibble: 1,000 x 7 ## referrer device bouncers n_visit n_pages duration purchase ## <fct> <fct> <lgl> <dbl> <dbl> <dbl> <lgl> ## 1 google laptop TRUE 10 1 693 FALSE ## 2 yahoo tablet TRUE 9 1 459 FALSE ## 3 direct laptop TRUE 0 1 996 FALSE ## 4 bing tablet FALSE 3 18 468 TRUE ## 5 yahoo mobile TRUE 9 1 955 FALSE ## 6 yahoo laptop FALSE 5 5 135 FALSE ## 7 yahoo mobile TRUE 10 1 75 FALSE ## 8 direct mobile TRUE 10 1 908 FALSE ## 9 bing mobile FALSE 3 19 209 FALSE ## 10 google mobile TRUE 6 1 208 FALSE ## # ... with 990 more rows 4
  • 5. Data Dictionary referrer: referrer website/search engine device: device used to visit the website bouncers: whether a visit bounced (exited from landing page) duration: time spent on the website (in seconds) purchase: whether visitor purchased n_visit: number of visits n_pages: number of pages visited/browsed • • • • • • • 5
  • 6. 6
  • 7. Traffic Sources distinct(ecom, referrer) ## # A tibble: 5 x 1 ## referrer ## <fct> ## 1 google ## 2 yahoo ## 3 direct ## 4 bing ## 5 social 7
  • 8. Device Types distinct(ecom, device) ## # A tibble: 3 x 1 ## device ## <fct> ## 1 laptop ## 2 tablet ## 3 mobile 8
  • 9. 9
  • 10. Rename Columns rename(ecom, time_on_site = duration) ## # A tibble: 1,000 x 7 ## referrer device bouncers n_visit n_pages time_on_site purchase ## <fct> <fct> <lgl> <dbl> <dbl> <dbl> <lgl> ## 1 google laptop TRUE 10 1 693 FALSE ## 2 yahoo tablet TRUE 9 1 459 FALSE ## 3 direct laptop TRUE 0 1 996 FALSE ## 4 bing tablet FALSE 3 18 468 TRUE ## 5 yahoo mobile TRUE 9 1 955 FALSE ## 6 yahoo laptop FALSE 5 5 135 FALSE ## 7 yahoo mobile TRUE 10 1 75 FALSE ## 8 direct mobile TRUE 10 1 908 FALSE ## 9 bing mobile FALSE 3 19 209 FALSE ## 10 google mobile TRUE 6 1 208 FALSE ## # ... with 990 more rows 10
  • 11. 11
  • 12. Sampling Data sample_n(ecom, size = 700) ## # A tibble: 700 x 7 ## referrer device bouncers n_visit n_pages duration purchase ## <fct> <fct> <lgl> <dbl> <dbl> <dbl> <lgl> ## 1 bing laptop FALSE 0 1 21 FALSE ## 2 social tablet FALSE 10 13 247 FALSE ## 3 yahoo tablet FALSE 5 8 80 FALSE ## 4 social tablet TRUE 3 1 111 FALSE ## 5 google mobile FALSE 4 20 420 FALSE ## 6 google laptop TRUE 1 1 958 FALSE ## 7 yahoo tablet TRUE 9 1 459 FALSE ## 8 yahoo mobile FALSE 5 5 150 FALSE ## 9 google mobile FALSE 5 3 66 FALSE ## 10 google tablet TRUE 6 1 551 FALSE ## # ... with 690 more rows 12
  • 13. Sampling Data sample_frac(ecom, size = 0.7) ## # A tibble: 700 x 7 ## referrer device bouncers n_visit n_pages duration purchase ## <fct> <fct> <lgl> <dbl> <dbl> <dbl> <lgl> ## 1 google laptop FALSE 4 9 117 FALSE ## 2 social mobile FALSE 1 16 336 TRUE ## 3 google tablet FALSE 10 8 200 FALSE ## 4 bing tablet FALSE 5 18 216 FALSE ## 5 social tablet FALSE 9 15 405 FALSE ## 6 direct mobile TRUE 1 1 180 FALSE ## 7 social mobile FALSE 10 14 350 TRUE ## 8 social tablet TRUE 4 1 392 FALSE ## 9 bing mobile FALSE 6 18 342 TRUE ## 10 yahoo tablet TRUE 5 1 10 FALSE ## # ... with 690 more rows 13
  • 14. 14
  • 15. Sample Data ecom_mini <- sample_n(ecom, size = 10) 15
  • 16. Extract Device Column pull(ecom_mini, device) ## [1] tablet tablet laptop tablet laptop mobile laptop laptop mobile m ## Levels: laptop tablet mobile 16
  • 17. Extract First Column pull(ecom_mini, 1) ## [1] direct social direct bing bing social bing direct google s ## Levels: bing direct social yahoo google 17
  • 18. Extract Last Column pull(ecom_mini, -1) ## [1] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE 18
  • 19. 19
  • 20. Extract 10 rows starting from 15 slice(ecom, 5:14) ## # A tibble: 10 x 7 ## referrer device bouncers n_visit n_pages duration purchase ## <fct> <fct> <lgl> <dbl> <dbl> <dbl> <lgl> ## 1 yahoo mobile TRUE 9 1 955 FALSE ## 2 yahoo laptop FALSE 5 5 135 FALSE ## 3 yahoo mobile TRUE 10 1 75 FALSE ## 4 direct mobile TRUE 10 1 908 FALSE ## 5 bing mobile FALSE 3 19 209 FALSE ## 6 google mobile TRUE 6 1 208 FALSE ## 7 direct laptop TRUE 9 1 738 FALSE ## 8 direct tablet FALSE 6 12 132 FALSE ## 9 direct mobile FALSE 9 14 406 TRUE ## 10 yahoo tablet FALSE 5 8 80 FALSE 20
  • 21. Extract Last Row slice(ecom, n()) ## # A tibble: 1 x 7 ## referrer device bouncers n_visit n_pages duration purchase ## <fct> <fct> <lgl> <dbl> <dbl> <dbl> <lgl> ## 1 google mobile TRUE 9 1 269 FALSE 21
  • 22. 22
  • 23. Tabulate Referrers ecom %>% group_by(referrer) %>% tally() ## # A tibble: 5 x 2 ## referrer n ## <fct> <int> ## 1 bing 194 ## 2 direct 191 ## 3 social 200 ## 4 yahoo 207 ## 5 google 208 23
  • 24. Tabulate Referrers and Bouncers ecom %>% group_by(referrer, bouncers) %>% tally() ## # A tibble: 10 x 3 ## # Groups: referrer [5] ## referrer bouncers n ## <fct> <lgl> <int> ## 1 bing FALSE 104 ## 2 bing TRUE 90 ## 3 direct FALSE 98 ## 4 direct TRUE 93 ## 5 social FALSE 93 ## 6 social TRUE 107 ## 7 yahoo FALSE 110 ## 8 yahoo TRUE 97 ## 9 google FALSE 101 ## 10 google TRUE 107 24
  • 25. Tabulate Referrers and Purchasers ecom %>% group_by(referrer, purchase) %>% tally() ## # A tibble: 10 x 3 ## # Groups: referrer [5] ## referrer purchase n ## <fct> <lgl> <int> ## 1 bing FALSE 177 ## 2 bing TRUE 17 ## 3 direct FALSE 166 ## 4 direct TRUE 25 ## 5 social FALSE 180 ## 6 social TRUE 20 ## 7 yahoo FALSE 185 ## 8 yahoo TRUE 22 ## 9 google FALSE 189 ## 10 google TRUE 19 25
  • 26. Tabulate Referrers & Converts ecom %>% group_by(referrer, purchase) %>% tally() %>% filter(purchase) ## # A tibble: 5 x 3 ## # Groups: referrer [5] ## referrer purchase n ## <fct> <lgl> <int> ## 1 bing TRUE 17 ## 2 direct TRUE 25 ## 3 social TRUE 20 ## 4 yahoo TRUE 22 ## 5 google TRUE 19 26
  • 27. Count count(ecom, referrer, purchase) ## # A tibble: 10 x 3 ## referrer purchase n ## <fct> <lgl> <int> ## 1 bing FALSE 177 ## 2 bing TRUE 17 ## 3 direct FALSE 166 ## 4 direct TRUE 25 ## 5 social FALSE 180 ## 6 social TRUE 20 ## 7 yahoo FALSE 185 ## 8 yahoo TRUE 22 ## 9 google FALSE 189 ## 10 google TRUE 19 27
  • 28. 28
  • 29. Top 2 Referrers by Orders ecom %>% count(referrer, purchase) %>% filter(purchase) %>% arrange(desc(n)) %>% top_n(n = 2) ## Selecting by n ## # A tibble: 2 x 3 ## referrer purchase n ## <fct> <lgl> <int> ## 1 direct TRUE 25 ## 2 yahoo TRUE 22 29
  • 30. Between ecom_sample <- sample_n(ecom, 30) ecom_sample %>% pull(n_pages) %>% between(5, 15) ## [1] TRUE FALSE FALSE FALSE FALSE TRUE FALSE TRUE FALSE FALSE TRU ## [12] TRUE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE TRUE FALS ## [23] TRUE TRUE FALSE FALSE FALSE FALSE FALSE FALSE 30
  • 31. Case When ecom %>% mutate( repeat_visit = case_when( n_visit > 0 ~ TRUE, TRUE ~ FALSE ) ) %>% select(n_visit, repeat_visit) ## # A tibble: 1,000 x 2 ## n_visit repeat_visit ## <dbl> <lgl> ## 1 10 TRUE ## 2 9 TRUE ## 3 0 FALSE ## 4 3 TRUE ## 5 9 TRUE ## 6 5 TRUE ## 7 10 TRUE ## 8 10 TRUE ## 9 3 TRUE ## 10 6 TRUE ## # ... with 990 more rows 31
  • 32. Select First Observation ecom %>% pull(referrer) %>% nth(1) ## [1] google ## Levels: bing direct social yahoo google ecom %>% pull(referrer) %>% first() ## [1] google ## Levels: bing direct social yahoo google 32
  • 33. Select 1000th Observation ecom %>% pull(referrer) %>% nth(1000) ## [1] google ## Levels: bing direct social yahoo google 33
  • 34. Select Last Observation ecom %>% pull(referrer) %>% last() ## [1] google ## Levels: bing direct social yahoo google 34
  • 35. 35