# Load packages and Data
library(readr) # Reading in data
library(tidyverse)
── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
✔ dplyr 1.1.3 ✔ purrr 1.0.2
✔ forcats 1.0.0 ✔ stringr 1.5.0
✔ ggplot2 3.4.4 ✔ tibble 3.2.1
✔ lubridate 1.9.3 ✔ tidyr 1.3.0
── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
✖ dplyr::filter() masks stats::filter()
✖ dplyr::lag() masks stats::lag()
ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(ggthemes) # Data visualization
library(RColorBrewer) # Data visualization
<- read_csv("data/titanic.csv") titanic
Rows: 891 Columns: 12
── Column specification ────────────────────────────────────────────────────────
Delimiter: ","
chr (5): Name, Sex, Ticket, Cabin, Embarked
dbl (7): PassengerId, Survived, Pclass, Age, SibSp, Parch, Fare
ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
nrow(titanic)
[1] 891
head(titanic)
# A tibble: 6 × 12
PassengerId Survived Pclass Name Sex Age SibSp Parch Ticket Fare Cabin
<dbl> <dbl> <dbl> <chr> <chr> <dbl> <dbl> <dbl> <chr> <dbl> <chr>
1 1 0 3 Braund… male 22 1 0 A/5 2… 7.25 <NA>
2 2 1 1 Cuming… fema… 38 1 0 PC 17… 71.3 C85
3 3 1 3 Heikki… fema… 26 0 0 STON/… 7.92 <NA>
4 4 1 1 Futrel… fema… 35 1 0 113803 53.1 C123
5 5 0 3 Allen,… male 35 0 0 373450 8.05 <NA>
6 6 0 3 Moran,… male NA 0 0 330877 8.46 <NA>
# ℹ 1 more variable: Embarked <chr>
table(titanic$Pclass)
1 2 3
216 184 491