SlideShare a Scribd company logo
AI & MACHINE LEARNING
MODULE 2 : EXPLORATORY DATA ANALYSIS
Basic R oblects
•Variables
•Vector
•Array
•Matrics
•Data Frame
BASIC FUNCTIONS
# To pull mtcars data in this session
data()
data("mtcars")
#To view the variables / data dictionary
mtcars # print mydata
#To view the variables / data dictionary
str(mtcars) #To view the variables / data dictionary
dim(mtcars) # dimensions of an object
names(mtcars)
class(mtcars) # class of an object (numeric, matrix, data frame, etc)
head(mtcars) #To view top records of mtcars
# print first 10 rows of mydata
head(mtcars, n=10)
tail(mtcars) #to view bottom records of mtcars
##Variable Identification - Inferences
nrow(mtcars)
ncol(mtcars)
mean(mtcars[,2])
median(mtcars[,9])
range(mtcars$mpg)
mean(mtcars$mpg)
1. UNIVARIATE ANALYSIS
#Interquartile range (75 percentile - 25 percentile)
IQR(mtcars$mpg)
## Univariate Analysis (Pattern Recognition)
summary(mtcars) # Complete summary of the dataset
fivenum(mtcars)
fivenum(mtcars$mpg)
# edit the the data for Outliers
mtcars=edit(mtcars)
mtcars
HISTOGRAM
#Histogram
hist(mtcars$mpg)
hist(mtcars$mpg,col = "Red")
# Colored Histogram with Different Number of Bins
hist(mtcars$mpg, breaks=12, col="red")
# Add labels to the graph
x <- mtcars$mpg
h<-hist(x, breaks=10, col="red", xlab="Miles Per Gallon",
main="Histogram")
##(Histograms can be a poor method for determining the shape of a
distribution because it is so strongly affected by the number of bins used.)
##Kernal density plots are usually a much more effective way to view the
distribution of a variable.
# Kernel Density Plot
d <- density(mtcars$mpg) # returns the density data
plot(d) # plots the results
# Filled Density Plot
d <- density(mtcars$mpg)
plot(d, main="Kernel Density of Miles Per Gallon")
polygon(d, col="red", border="blue")
BAR PLOT
# Simple Bar Plot
counts <- table(mtcars$gear)
counts
barplot(counts, main="Car Distribution",
xlab="Number of Gears")
barplot(counts,col="Yellow", main="Car Distribution",
xlab="Number of Gears")
# Simple Horizontal Bar Plot with Added Labels
counts <- table(mtcars$gear)
barplot(counts, main="Car Distribution", horiz=TRUE)
barplot(counts, main="Car Distribution", horiz=TRUE,
names.arg=c("3 Gears", "4 Gears", "5 Gears"))
BOX PLOT
#Boxplot
boxplot(mtcars$mpg)
boxplot(mtcars$mpg,horizontal = TRUE)
boxplot(mtcars$mpg,horizontal = TRUE,col = "Pink")
boxplot(mtcars$mpg,horizontal = TRUE,col = "Green",main="Mileage")
2. BIVARIATE ANALYSIS
o Continous Numerical Variables vs Continous Numerical Variables
o Continous Numerical Variables vs Discrete Numerical Variables
o Discrete Numerical Variables vs Categorical Variables
STACKED BAR PLOT
# Stacked Bar Plot with Colors and Legend
counts <- table(mtcars$vs, mtcars$gear)
barplot(counts, main="Car Distribution by Gears and VS",
xlab="Number of Gears", col=c("darkblue","red"),
legend = rownames(counts))
counts <- table(mtcars$cyl, mtcars$gear)
barplot(counts, main="Car Distribution by Gears and Cylinders",
xlab="Number of Gears", col=c("darkblue","red","Yellow"),
legend = rownames(counts))
GROUPED BAR PLOT
# Grouped Bar Plot
counts <- table(mtcars$vs, mtcars$gear)
barplot(counts, main="Car Distribution by Gears and VS",
xlab="Number of Gears", col=c("darkblue","red"),
legend = rownames(counts), beside=TRUE)
counts <- table(mtcars$cyl, mtcars$gear)
barplot(counts, main="Car Distribution by Gears and Cylinders",
xlab="Number of Gears", col=c("darkblue","red","Yellow"),
legend = rownames(counts),beside = TRUE)
BOX PLOT
#Comparitive Boxplot
boxplot(mtcars$mpg~mtcars$vs,horizontal=TRUE,col=c("Red","Green"))
boxplot(mtcars$mpg~mtcars$vs,horizontal=TRUE,col=c("Red","Green"),main="Compar
itive Boxplot")
3. MULTIVARIATE ANALYSIS
## To check the correlation among the variables
pairs(~ mpg + disp + hp + wt + drat + qsec , data = mtcars) #graphical
representation
cor(mtcars[,c(1,2,3,4,5,6,7,8,9,10,11)])
Artificial inteliggence and machine learning ppt
Artificial inteliggence and machine learning ppt

More Related Content

Similar to Artificial inteliggence and machine learning ppt (20)

TXT
R console
Ananth Raj
 
PDF
Practical Data Science : Data Cleaning and Summarising
HariniMS1
 
PPTX
Descriptive Statistics in R.pptx
Ramakrishna Reddy Bijjam
 
PDF
Introduction to tibbles
Rsquared Academy
 
PDF
Data Visualization With R: Introduction
Rsquared Academy
 
PDF
Data Visualization With R: Learn To Modify Color Of Plots
Rsquared Academy
 
PDF
Linear Model Selection and Regularization (Article 6 - Practical exercises)
Theodore Grammatikopoulos
 
PPTX
Python chart plotting using Matplotlib.pptx
sonali sonavane
 
PDF
MH prediction modeling and validation in r (1) regression 190709
Min-hyung Kim
 
PDF
Manipulating Data using base R package
Rupak Roy
 
PPTX
Using R for Building a Simple and Effective Dashboard
Andrea Gigli
 
PDF
Regression and Classification with R
Yanchang Zhao
 
PDF
Q plot tutorial
Abhik Seal
 
PPTX
Data manipulation and visualization in r 20190711 myanmarucsy
SmartHinJ
 
PDF
Data manipulation with dplyr
Romain Francois
 
PPTX
Data Science.pptx00000000000000000000000
shaikhmismail66
 
PPTX
Income Qualification ppt.pptx
ShilpaSweety2
 
PPTX
R programming language
Alberto Minetti
 
PPTX
Introduction to R
Stacy Irwin
 
PDF
Kaggle Winning Solution Xgboost algorithm -- Let us learn from its author
Vivian S. Zhang
 
R console
Ananth Raj
 
Practical Data Science : Data Cleaning and Summarising
HariniMS1
 
Descriptive Statistics in R.pptx
Ramakrishna Reddy Bijjam
 
Introduction to tibbles
Rsquared Academy
 
Data Visualization With R: Introduction
Rsquared Academy
 
Data Visualization With R: Learn To Modify Color Of Plots
Rsquared Academy
 
Linear Model Selection and Regularization (Article 6 - Practical exercises)
Theodore Grammatikopoulos
 
Python chart plotting using Matplotlib.pptx
sonali sonavane
 
MH prediction modeling and validation in r (1) regression 190709
Min-hyung Kim
 
Manipulating Data using base R package
Rupak Roy
 
Using R for Building a Simple and Effective Dashboard
Andrea Gigli
 
Regression and Classification with R
Yanchang Zhao
 
Q plot tutorial
Abhik Seal
 
Data manipulation and visualization in r 20190711 myanmarucsy
SmartHinJ
 
Data manipulation with dplyr
Romain Francois
 
Data Science.pptx00000000000000000000000
shaikhmismail66
 
Income Qualification ppt.pptx
ShilpaSweety2
 
R programming language
Alberto Minetti
 
Introduction to R
Stacy Irwin
 
Kaggle Winning Solution Xgboost algorithm -- Let us learn from its author
Vivian S. Zhang
 

Recently uploaded (20)

PPTX
2025 July - ABM for B2B in Hubspot - Demand Gen HUG.pptx
mjenkins13
 
PDF
Why Unipac Equipment Leads the Way Among Gantry Crane Manufacturers in Singap...
UnipacEquipment
 
PDF
Two-phase direct-to-chip cooling - Parker Components
Parker Hannifin Corporation
 
PDF
Smart Lead Magnet Review: Effortless Email List Growth with Automated Funnels...
Larry888358
 
PPTX
World First Cardiovascular & Thoracic CT Scanner
arineta37
 
PDF
Maksym Vyshnivetskyi: Управління закупівлями (UA)
Lviv Startup Club
 
PPTX
Understanding ISO 42001 Standard: AI Governance & Compliance Insights from Ad...
Adeptiv AI
 
PPTX
GE%205.pptx BUSINESS EMAIL this report aims
KenSantos27
 
DOCX
RECLAIM STOLEN CRYPTO REVIEW WITH RECUVA HACKER SOLUTIONS
camilamichaelj7
 
PDF
Gabino Barbosa - A Master Of Efficiency
Gabino Barbosa
 
PDF
Agriculture Machinery PartsAgriculture Machinery Parts
mizhanw168
 
PPTX
DECODING AI AGENTS AND WORKFLOW AUTOMATION FOR MODERN RECRUITMENT
José Kadlec
 
PDF
Thane Stenner - An Industry Expert
Thane Stenner
 
PDF
Azumah Resources reaffirms commitment to Ghana amid dispute with Engineers & ...
Kweku Zurek
 
PDF
Top Farewell Gifts for Seniors Under.pdf
ThreadVibe Living
 
DOCX
TCP Communication Flag Txzczczxcxzzxypes.docx
esso24
 
PDF
CBV - GST Collection Report V16. pdf.
writer28
 
PDF
20250703_A. Stotz All Weather Strategy - Performance review July
FINNOMENAMarketing
 
PPTX
Washington University of Health and Science A Choice You Can Trust
Washington University of Health and Science
 
PDF
Raman Bhaumik - A Passion For Service
Raman Bhaumik
 
2025 July - ABM for B2B in Hubspot - Demand Gen HUG.pptx
mjenkins13
 
Why Unipac Equipment Leads the Way Among Gantry Crane Manufacturers in Singap...
UnipacEquipment
 
Two-phase direct-to-chip cooling - Parker Components
Parker Hannifin Corporation
 
Smart Lead Magnet Review: Effortless Email List Growth with Automated Funnels...
Larry888358
 
World First Cardiovascular & Thoracic CT Scanner
arineta37
 
Maksym Vyshnivetskyi: Управління закупівлями (UA)
Lviv Startup Club
 
Understanding ISO 42001 Standard: AI Governance & Compliance Insights from Ad...
Adeptiv AI
 
GE%205.pptx BUSINESS EMAIL this report aims
KenSantos27
 
RECLAIM STOLEN CRYPTO REVIEW WITH RECUVA HACKER SOLUTIONS
camilamichaelj7
 
Gabino Barbosa - A Master Of Efficiency
Gabino Barbosa
 
Agriculture Machinery PartsAgriculture Machinery Parts
mizhanw168
 
DECODING AI AGENTS AND WORKFLOW AUTOMATION FOR MODERN RECRUITMENT
José Kadlec
 
Thane Stenner - An Industry Expert
Thane Stenner
 
Azumah Resources reaffirms commitment to Ghana amid dispute with Engineers & ...
Kweku Zurek
 
Top Farewell Gifts for Seniors Under.pdf
ThreadVibe Living
 
TCP Communication Flag Txzczczxcxzzxypes.docx
esso24
 
CBV - GST Collection Report V16. pdf.
writer28
 
20250703_A. Stotz All Weather Strategy - Performance review July
FINNOMENAMarketing
 
Washington University of Health and Science A Choice You Can Trust
Washington University of Health and Science
 
Raman Bhaumik - A Passion For Service
Raman Bhaumik
 
Ad

Artificial inteliggence and machine learning ppt

  • 1. AI & MACHINE LEARNING
  • 2. MODULE 2 : EXPLORATORY DATA ANALYSIS Basic R oblects •Variables •Vector •Array •Matrics •Data Frame
  • 3. BASIC FUNCTIONS # To pull mtcars data in this session data() data("mtcars") #To view the variables / data dictionary mtcars # print mydata #To view the variables / data dictionary str(mtcars) #To view the variables / data dictionary dim(mtcars) # dimensions of an object names(mtcars) class(mtcars) # class of an object (numeric, matrix, data frame, etc)
  • 4. head(mtcars) #To view top records of mtcars # print first 10 rows of mydata head(mtcars, n=10) tail(mtcars) #to view bottom records of mtcars ##Variable Identification - Inferences nrow(mtcars) ncol(mtcars) mean(mtcars[,2]) median(mtcars[,9]) range(mtcars$mpg) mean(mtcars$mpg)
  • 5. 1. UNIVARIATE ANALYSIS #Interquartile range (75 percentile - 25 percentile) IQR(mtcars$mpg) ## Univariate Analysis (Pattern Recognition) summary(mtcars) # Complete summary of the dataset fivenum(mtcars) fivenum(mtcars$mpg) # edit the the data for Outliers mtcars=edit(mtcars) mtcars
  • 6. HISTOGRAM #Histogram hist(mtcars$mpg) hist(mtcars$mpg,col = "Red") # Colored Histogram with Different Number of Bins hist(mtcars$mpg, breaks=12, col="red") # Add labels to the graph x <- mtcars$mpg h<-hist(x, breaks=10, col="red", xlab="Miles Per Gallon", main="Histogram")
  • 7. ##(Histograms can be a poor method for determining the shape of a distribution because it is so strongly affected by the number of bins used.) ##Kernal density plots are usually a much more effective way to view the distribution of a variable. # Kernel Density Plot d <- density(mtcars$mpg) # returns the density data plot(d) # plots the results # Filled Density Plot d <- density(mtcars$mpg) plot(d, main="Kernel Density of Miles Per Gallon") polygon(d, col="red", border="blue")
  • 8. BAR PLOT # Simple Bar Plot counts <- table(mtcars$gear) counts barplot(counts, main="Car Distribution", xlab="Number of Gears") barplot(counts,col="Yellow", main="Car Distribution", xlab="Number of Gears") # Simple Horizontal Bar Plot with Added Labels counts <- table(mtcars$gear) barplot(counts, main="Car Distribution", horiz=TRUE) barplot(counts, main="Car Distribution", horiz=TRUE, names.arg=c("3 Gears", "4 Gears", "5 Gears"))
  • 9. BOX PLOT #Boxplot boxplot(mtcars$mpg) boxplot(mtcars$mpg,horizontal = TRUE) boxplot(mtcars$mpg,horizontal = TRUE,col = "Pink") boxplot(mtcars$mpg,horizontal = TRUE,col = "Green",main="Mileage")
  • 10. 2. BIVARIATE ANALYSIS o Continous Numerical Variables vs Continous Numerical Variables o Continous Numerical Variables vs Discrete Numerical Variables o Discrete Numerical Variables vs Categorical Variables
  • 11. STACKED BAR PLOT # Stacked Bar Plot with Colors and Legend counts <- table(mtcars$vs, mtcars$gear) barplot(counts, main="Car Distribution by Gears and VS", xlab="Number of Gears", col=c("darkblue","red"), legend = rownames(counts)) counts <- table(mtcars$cyl, mtcars$gear) barplot(counts, main="Car Distribution by Gears and Cylinders", xlab="Number of Gears", col=c("darkblue","red","Yellow"), legend = rownames(counts))
  • 12. GROUPED BAR PLOT # Grouped Bar Plot counts <- table(mtcars$vs, mtcars$gear) barplot(counts, main="Car Distribution by Gears and VS", xlab="Number of Gears", col=c("darkblue","red"), legend = rownames(counts), beside=TRUE) counts <- table(mtcars$cyl, mtcars$gear) barplot(counts, main="Car Distribution by Gears and Cylinders", xlab="Number of Gears", col=c("darkblue","red","Yellow"), legend = rownames(counts),beside = TRUE)
  • 14. 3. MULTIVARIATE ANALYSIS ## To check the correlation among the variables pairs(~ mpg + disp + hp + wt + drat + qsec , data = mtcars) #graphical representation cor(mtcars[,c(1,2,3,4,5,6,7,8,9,10,11)])