Basic Syntax (3)

DF & List

Weekly design


Pre-class video


Data.frame


# Data Frame #
name = c("Cheolsu", "Chunhyang", "Gildong")
age = c(22, 20, 25)
gender = factor(c("M", "F", "M"))
blood.type = factor(c("A", "O", "B"))
patients = data.frame(name, age, gender, blood.type)
patients
       name age gender blood.type
1   Cheolsu  22      M          A
2 Chunhyang  20      F          O
3   Gildong  25      M          B
# Can also be written in one line like this:
patients1 = data.frame(name = c("Cheolsu", "Chunhyang", "Gildong"), 
                       age = c(22, 20, 25), 
                       gender = factor(c("M", "F", "M ")), 
                       blood.type = factor(c("A", "O", "B")))

patients1
       name age gender blood.type
1   Cheolsu  22      M          A
2 Chunhyang  20      F          O
3   Gildong  25     M           B
patients$name # Print name attribute value
[1] "Cheolsu"   "Chunhyang" "Gildong"  
patients[1, ] # Print row 1 value
     name age gender blood.type
1 Cheolsu  22      M          A
patients[, 2] # Print 2nd column values
[1] 22 20 25
patients[3, 1] # Prints 3 rows and 1 column values
[1] "Gildong"
patients[patients$name=="Withdrawal", ] # Extract information about withdrawal among patients
[1] name       age        gender     blood.type
<0 rows> (or 0-length row.names)
patients[patients$name=="Cheolsu", c("name", "age")] # Extract only Cheolsu's name and age information
     name age
1 Cheolsu  22
head(cars) # Check the cars data set. The basic function of the head function is to extract the first 6 data.
  speed dist
1     4    2
2     4   10
3     7    4
4     7   22
5     8   16
6     9   10
attach(cars) # Use the attach function to use each property of cars as a variable
speed # The variable name speed can be used directly.
 [1]  4  4  7  7  8  9 10 10 10 11 11 12 12 12 12 13 13 13 13 14 14 14 14 15 15
[26] 15 16 16 17 17 17 18 18 18 18 19 19 19 20 20 20 20 20 22 23 24 24 24 24 25
detach(cars) # Deactivates the use of each property of cars as a variable through the detach function
# speed # Try to access the variable called speed, but there is no such variable.

# Apply functions using data properties
mean(cars$speed)
[1] 15.4
max(cars$speed)
[1] 25
# Apply a function using the with function
with(cars, mean(speed))
[1] 15.4
with(cars, max(speed))
[1] 25
# Extract only data with speed greater than 20
subset(cars, speed > 20)
   speed dist
44    22   66
45    23   54
46    24   70
47    24   92
48    24   93
49    24  120
50    25   85
# Extract only dist data with speed over 20, select multiple columns, separate c() with ,
subset(cars, speed > 20, select = c(dist))
   dist
44   66
45   54
46   70
47   92
48   93
49  120
50   85
# Extract only data excluding dist from data with a speed exceeding 20
subset(cars, speed > 20, select = -c(dist))
   speed
44    22
45    23
46    24
47    24
48    24
49    24
50    25
head(airquality) # airquality data contains NA
  Ozone Solar.R Wind Temp Month Day
1    41     190  7.4   67     5   1
2    36     118  8.0   72     5   2
3    12     149 12.6   74     5   3
4    18     313 11.5   62     5   4
5    NA      NA 14.3   56     5   5
6    28      NA 14.9   66     5   6
head(na.omit(airquality)) # Extract by excluding values containing NA
  Ozone Solar.R Wind Temp Month Day
1    41     190  7.4   67     5   1
2    36     118  8.0   72     5   2
3    12     149 12.6   74     5   3
4    18     313 11.5   62     5   4
7    23     299  8.6   65     5   7
8    19      99 13.8   59     5   8
# merge(x, y, by = intersect(names(x), names(y)), by.x = by, by.y = by, all = FALSE, all.x = all, all.y = all, sort = TRUE, suffixes = c(".x",".y"), incomparables = NULL, ...)

name = c("Cheolsu", "Chunhyang", "Gildong")
age = c(22, 20, 25)
gender = factor(c("M", "F", "M"))
blood.type = factor(c("A", "O", "B"))
patients1 = data.frame(name, age, gender)
patients1
       name age gender
1   Cheolsu  22      M
2 Chunhyang  20      F
3   Gildong  25      M
patients2 = data.frame(name, blood.type)
patients2
       name blood.type
1   Cheolsu          A
2 Chunhyang          O
3   Gildong          B
patients = merge(patients1, patients2, by = "name")
patients
       name age gender blood.type
1   Cheolsu  22      M          A
2 Chunhyang  20      F          O
3   Gildong  25      M          B
# If there are no column variables with the same name, when merging them into by.x and by.y of the merge function
# You must enter the attribute name of each column to be used.
name1 = c("Cheolsu", "Chunhyang", "Gildong")
name2 = c("Minsu", "Chunhyang", "Gildong")
age = c(22, 20, 25)
gender = factor(c("M", "F", "M"))
blood.type = factor(c("A", "O", "B"))
patients1 = data.frame(name1, age, gender)
patients1
      name1 age gender
1   Cheolsu  22      M
2 Chunhyang  20      F
3   Gildong  25      M
patients2 = data.frame(name2, blood.type)
patients2
      name2 blood.type
1     Minsu          A
2 Chunhyang          O
3   Gildong          B
patients = merge(patients1, patients2, by.x = "name1", by.y = "name2")
patients
      name1 age gender blood.type
1 Chunhyang  20      F          O
2   Gildong  25      M          B
patients = merge(patients1, patients2, by.x = "name1", by.y = "name2", all = TRUE)
patients
      name1 age gender blood.type
1   Cheolsu  22      M       <NA>
2 Chunhyang  20      F          O
3   Gildong  25      M          B
4     Minsu  NA   <NA>          A
x = array(1:12, c(3, 4))

# Currently x is not a data frame
is.data.frame(x) 
[1] FALSE
as.data.frame(x)
  V1 V2 V3 V4
1  1  4  7 10
2  2  5  8 11
3  3  6  9 12
# Just calling the is.data.frame function does not turn x into a data frame
is.data.frame(x)
[1] FALSE
# Convert x to data frame format with the as.data.frame function
x = as.data.frame(x)
x
  V1 V2 V3 V4
1  1  4  7 10
2  2  5  8 11
3  3  6  9 12
# Verify that x has been converted to data frame format
is.data.frame(x)
[1] TRUE
# When converting to a data frame, automatically assigned column names are reassigned to the names function.
names(x) = c("1st", "2nd", "3rd", "4th")
x
  1st 2nd 3rd 4th
1   1   4   7  10
2   2   5   8  11
3   3   6   9  12


List



# List #
patients = data.frame(name = c("Cheolsu", "Chunhyang", "Gildong"), 
                      age = c(22, 20, 25), 
                      gender = factor(c("M", "F", "M ")), 
                      blood.type = factor(c("A", "O", "B")))

no.patients = data.frame(day = c(1:6), no = c(50, 60, 55, 52, 65, 58))


# Simple addition of data
listPatients = list(patients, no.patients)
listPatients
[[1]]
       name age gender blood.type
1   Cheolsu  22      M          A
2 Chunhyang  20      F          O
3   Gildong  25     M           B

[[2]]
  day no
1   1 50
2   2 60
3   3 55
4   4 52
5   5 65
6   6 58
# Add names to each data
listPatients = list(patients=patients, no.patients = no.patients)
listPatients
$patients
       name age gender blood.type
1   Cheolsu  22      M          A
2 Chunhyang  20      F          O
3   Gildong  25     M           B

$no.patients
  day no
1   1 50
2   2 60
3   3 55
4   4 52
5   5 65
6   6 58
# Enter element name
listPatients$patients 
       name age gender blood.type
1   Cheolsu  22      M          A
2 Chunhyang  20      F          O
3   Gildong  25     M           B
# Enter index
listPatients[[1]] 
       name age gender blood.type
1   Cheolsu  22      M          A
2 Chunhyang  20      F          O
3   Gildong  25     M           B
# Enter the element name in ""
listPatients[["patients"]] 
       name age gender blood.type
1   Cheolsu  22      M          A
2 Chunhyang  20      F          O
3   Gildong  25     M           B
# Enter the element name in ""
listPatients[["no.patients"]] 
  day no
1   1 50
2   2 60
3   3 55
4   4 52
5   5 65
6   6 58
# Calculate the average of no.patients elements
lapply(listPatients$no.patients, mean)
$day
[1] 3.5

$no
[1] 56.66667
# Calculate the average of the patients elements. Anything that is not in numeric form is not averaged.
lapply(listPatients$patients, mean)
Warning in mean.default(X[[i]], ...): argument is not numeric or logical:
returning NA

Warning in mean.default(X[[i]], ...): argument is not numeric or logical:
returning NA

Warning in mean.default(X[[i]], ...): argument is not numeric or logical:
returning NA
$name
[1] NA

$age
[1] 22.33333

$gender
[1] NA

$blood.type
[1] NA
sapply(listPatients$no.patients, mean)
     day       no 
 3.50000 56.66667 
# If the simplify option of sapply() is set to F, the same result as lapply() is returned.
sapply(listPatients$no.patients, mean, simplify = F)
$day
[1] 3.5

$no
[1] 56.66667

Class


Pop-up Qz

# Let a vector
V1 = c(1,2,3,4,5,NA,9,10)

How can we get a vector of integers less than 4 from V1?

1) V1[V1 < 4 & !is.na(V1)]

2) V1[V1 < 4 | !is.na(V1)]

3) V1(V1 < 4 | !is.na(V1))

4) V1[V1 < 4] & V1[!is.na(V1)]


# Let a data frame as below
df <- data.frame(name = c("John", "Mary", "Mark"),
                 age  = c(30,16,21),
                 gender = c("M", "F", "M"))
df
  name age gender
1 John  30      M
2 Mary  16      F
3 Mark  21      M

Make an R code to filter if gender is ‘male’ and age is 19 or above.


Practice: Data Frame & List



  • Korea Media Panel data is used in news articles based on media statistics, such as the article below.


  • Consists of


I made a toy data set with the KMP like below.

  • Please download R data set here: List_KMP.RData

  • Place it in your working directory



load("data/List_KMP.RData")

str(List.KMP)
List of 4
 $ :'data.frame':   10 obs. of  11 variables:
  ..$ pid              : num [1:10] 9920004 12500003 17350004 17670001 23860001 ...
  ..$ age              : num [1:10] 9 26 12 55 70 58 50 68 39 37
  ..$ gender           : Factor w/ 2 levels "Male","Female": 1 1 2 1 1 1 1 1 1 2
  ..$ Mobile.lv        : Factor w/ 5 levels "3G","LTE","LTE-A",..: 5 2 2 2 5 2 2 2 2 3
  ..$ Telecom          : Factor w/ 5 levels "SKT","KT","LG U+",..: 5 2 3 1 5 1 1 1 2 2
  ..$ Smartphone.brand : Factor w/ 8 levels "Samsung","Apple",..: 8 1 1 1 8 1 1 1 1 3
  ..$ sp.mobile        : num [1:10] 0 42 19 38 18 65 32 58 63 54
  ..$ sp.device        : num [1:10] 0 0 0 0 0 0 0 10 0 10
  ..$ sp.online.content: num [1:10] 0 53 19 38 18 65 32 68 63 114
  ..$ sp.offline.contet: num [1:10] 10 22 0 0 0 50 36 0 25 40
  ..$ year             : num [1:10] 2017 2017 2017 2017 2017 ...
 $ :'data.frame':   10 obs. of  11 variables:
  ..$ pid              : num [1:10] 9920004 12500003 17350004 17670001 23860001 ...
  ..$ age              : num [1:10] 10 27 13 56 71 59 51 69 40 38
  ..$ gender           : Factor w/ 2 levels "Male","Female": 1 1 2 1 1 1 1 1 1 2
  ..$ Mobile.lv        : Factor w/ 5 levels "3G","LTE","LTE-A",..: 5 3 3 2 5 2 2 2 3 3
  ..$ Telecom          : Factor w/ 5 levels "SKT","KT","LG U+",..: 5 3 3 2 5 3 1 2 1 2
  ..$ Smartphone.brand : Factor w/ 8 levels "Samsung","Apple",..: 8 1 3 1 8 3 1 3 1 3
  ..$ sp.mobile        : num [1:10] 0 90 20 39 30 80 33 36 40 59
  ..$ sp.device        : num [1:10] 0 60 35 0 0 15 0 10 10 12
  ..$ sp.online.content: num [1:10] 0 359 55 39 30 95 33 46 50 71
  ..$ sp.offline.contet: num [1:10] 8 120 0 0 0 12 20 0 0 100
  ..$ year             : num [1:10] 2018 2018 2018 2018 2018 ...
 $ :'data.frame':   10 obs. of  11 variables:
  ..$ pid              : num [1:10] 9920004 12500003 17350004 17670001 23860001 ...
  ..$ age              : num [1:10] 11 28 14 57 72 60 52 70 41 39
  ..$ gender           : Factor w/ 2 levels "Male","Female": 1 1 2 1 1 1 1 1 1 2
  ..$ Mobile.lv        : Factor w/ 5 levels "3G","LTE","LTE-A",..: 5 2 3 2 2 2 2 2 3 3
  ..$ Telecom          : Factor w/ 5 levels "SKT","KT","LG U+",..: 5 3 3 1 2 1 1 2 3 3
  ..$ Smartphone.brand : Factor w/ 8 levels "Samsung","Apple",..: 8 2 1 1 1 1 1 3 3 3
  ..$ sp.mobile        : num [1:10] 0 60 21 30 32 78 35 54 73 45
  ..$ sp.device        : num [1:10] 0 30 0 0 0 0 0 0 20 15
  ..$ sp.online.content: num [1:10] 0 90 21 30 32 78 35 54 93 60
  ..$ sp.offline.contet: num [1:10] 21 0 0 20 0 24 90 0 20 60
  ..$ year             : num [1:10] 2019 2019 2019 2019 2019 ...
 $ :'data.frame':   10 obs. of  4 variables:
  ..$ pid           : num [1:10] 9920004 12500003 17350004 17670001 23860001 ...
  ..$ Smarphone.time: num [1:10] 0 60 345 90 40 65 170 95 85 75
  ..$ SNS.time      : num [1:10] 0 0 10 0 0 0 0 0 0 0
  ..$ year          : num [1:10] 2019 2019 2019 2019 2019 ...


Personal Data 2017

List.KMP[[1]]
        pid age gender     Mobile.lv       Telecom Smartphone.brand sp.mobile
1   9920004   9   Male No Smartphone No Smartphone    No Smartphone         0
2  12500003  26   Male           LTE            KT          Samsung        42
3  17350004  12 Female           LTE         LG U+          Samsung        19
4  17670001  55   Male           LTE           SKT          Samsung        38
5  23860001  70   Male No Smartphone No Smartphone    No Smartphone        18
6  24450001  58   Male           LTE           SKT          Samsung        65
7  27570001  50   Male           LTE           SKT          Samsung        32
8  53620001  68   Male           LTE           SKT          Samsung        58
9  59570001  39   Male           LTE            KT          Samsung        63
10 65840001  37 Female         LTE-A            KT               LG        54
   sp.device sp.online.content sp.offline.contet year
1          0                 0                10 2017
2          0                53                22 2017
3          0                19                 0 2017
4          0                38                 0 2017
5          0                18                 0 2017
6          0                65                50 2017
7          0                32                36 2017
8         10                68                 0 2017
9          0                63                25 2017
10        10               114                40 2017

Personal Data 2018

List.KMP[[2]]
        pid age gender     Mobile.lv       Telecom Smartphone.brand sp.mobile
1   9920004  10   Male No Smartphone No Smartphone    No Smartphone         0
2  12500003  27   Male         LTE-A         LG U+          Samsung        90
3  17350004  13 Female         LTE-A         LG U+               LG        20
4  17670001  56   Male           LTE            KT          Samsung        39
5  23860001  71   Male No Smartphone No Smartphone    No Smartphone        30
6  24450001  59   Male           LTE         LG U+               LG        80
7  27570001  51   Male           LTE           SKT          Samsung        33
8  53620001  69   Male           LTE            KT               LG        36
9  59570001  40   Male         LTE-A           SKT          Samsung        40
10 65840001  38 Female         LTE-A            KT               LG        59
   sp.device sp.online.content sp.offline.contet year
1          0                 0                 8 2018
2         60               359               120 2018
3         35                55                 0 2018
4          0                39                 0 2018
5          0                30                 0 2018
6         15                95                12 2018
7          0                33                20 2018
8         10                46                 0 2018
9         10                50                 0 2018
10        12                71               100 2018

Personal Data 2019

List.KMP[[3]]
        pid age gender     Mobile.lv       Telecom Smartphone.brand sp.mobile
1   9920004  11   Male No Smartphone No Smartphone    No Smartphone         0
2  12500003  28   Male           LTE         LG U+            Apple        60
3  17350004  14 Female         LTE-A         LG U+          Samsung        21
4  17670001  57   Male           LTE           SKT          Samsung        30
5  23860001  72   Male           LTE            KT          Samsung        32
6  24450001  60   Male           LTE           SKT          Samsung        78
7  27570001  52   Male           LTE           SKT          Samsung        35
8  53620001  70   Male           LTE            KT               LG        54
9  59570001  41   Male         LTE-A         LG U+               LG        73
10 65840001  39 Female         LTE-A         LG U+               LG        45
   sp.device sp.online.content sp.offline.contet year
1          0                 0                21 2019
2         30                90                 0 2019
3          0                21                 0 2019
4          0                30                20 2019
5          0                32                 0 2019
6          0                78                24 2019
7          0                35                90 2019
8          0                54                 0 2019
9         20                93                20 2019
10        15                60                60 2019

Personal Media Diary 2019

List.KMP[[4]]
        pid Smarphone.time SNS.time year
1   9920004              0        0 2019
2  12500003             60        0 2019
3  17350004            345       10 2019
4  17670001             90        0 2019
5  23860001             40        0 2019
6  24450001             65        0 2019
7  27570001            170        0 2019
8  53620001             95        0 2019
9  59570001             85        0 2019
10 65840001             75        0 2019


Let’s name the list elements

# Check the existing names
names(List.KMP)
NULL
# Give names to each element of the list
names(List.KMP) <- c("p17", "p18", "p19", "d19")


How can we extract the first element of the list?

# one way
List.KMP[[1]]
        pid age gender     Mobile.lv       Telecom Smartphone.brand sp.mobile
1   9920004   9   Male No Smartphone No Smartphone    No Smartphone         0
2  12500003  26   Male           LTE            KT          Samsung        42
3  17350004  12 Female           LTE         LG U+          Samsung        19
4  17670001  55   Male           LTE           SKT          Samsung        38
5  23860001  70   Male No Smartphone No Smartphone    No Smartphone        18
6  24450001  58   Male           LTE           SKT          Samsung        65
7  27570001  50   Male           LTE           SKT          Samsung        32
8  53620001  68   Male           LTE           SKT          Samsung        58
9  59570001  39   Male           LTE            KT          Samsung        63
10 65840001  37 Female         LTE-A            KT               LG        54
   sp.device sp.online.content sp.offline.contet year
1          0                 0                10 2017
2          0                53                22 2017
3          0                19                 0 2017
4          0                38                 0 2017
5          0                18                 0 2017
6          0                65                50 2017
7          0                32                36 2017
8         10                68                 0 2017
9          0                63                25 2017
10        10               114                40 2017
# the other way
List.KMP[['p17']]
        pid age gender     Mobile.lv       Telecom Smartphone.brand sp.mobile
1   9920004   9   Male No Smartphone No Smartphone    No Smartphone         0
2  12500003  26   Male           LTE            KT          Samsung        42
3  17350004  12 Female           LTE         LG U+          Samsung        19
4  17670001  55   Male           LTE           SKT          Samsung        38
5  23860001  70   Male No Smartphone No Smartphone    No Smartphone        18
6  24450001  58   Male           LTE           SKT          Samsung        65
7  27570001  50   Male           LTE           SKT          Samsung        32
8  53620001  68   Male           LTE           SKT          Samsung        58
9  59570001  39   Male           LTE            KT          Samsung        63
10 65840001  37 Female         LTE-A            KT               LG        54
   sp.device sp.online.content sp.offline.contet year
1          0                 0                10 2017
2          0                53                22 2017
3          0                19                 0 2017
4          0                38                 0 2017
5          0                18                 0 2017
6          0                65                50 2017
7          0                32                36 2017
8         10                68                 0 2017
9          0                63                25 2017
10        10               114                40 2017

Lists of a list


# Create an empty list 
List.KMP[[5]] <- list(0)
# See the structure
str(List.KMP)
List of 5
 $ p17:'data.frame':    10 obs. of  11 variables:
  ..$ pid              : num [1:10] 9920004 12500003 17350004 17670001 23860001 ...
  ..$ age              : num [1:10] 9 26 12 55 70 58 50 68 39 37
  ..$ gender           : Factor w/ 2 levels "Male","Female": 1 1 2 1 1 1 1 1 1 2
  ..$ Mobile.lv        : Factor w/ 5 levels "3G","LTE","LTE-A",..: 5 2 2 2 5 2 2 2 2 3
  ..$ Telecom          : Factor w/ 5 levels "SKT","KT","LG U+",..: 5 2 3 1 5 1 1 1 2 2
  ..$ Smartphone.brand : Factor w/ 8 levels "Samsung","Apple",..: 8 1 1 1 8 1 1 1 1 3
  ..$ sp.mobile        : num [1:10] 0 42 19 38 18 65 32 58 63 54
  ..$ sp.device        : num [1:10] 0 0 0 0 0 0 0 10 0 10
  ..$ sp.online.content: num [1:10] 0 53 19 38 18 65 32 68 63 114
  ..$ sp.offline.contet: num [1:10] 10 22 0 0 0 50 36 0 25 40
  ..$ year             : num [1:10] 2017 2017 2017 2017 2017 ...
 $ p18:'data.frame':    10 obs. of  11 variables:
  ..$ pid              : num [1:10] 9920004 12500003 17350004 17670001 23860001 ...
  ..$ age              : num [1:10] 10 27 13 56 71 59 51 69 40 38
  ..$ gender           : Factor w/ 2 levels "Male","Female": 1 1 2 1 1 1 1 1 1 2
  ..$ Mobile.lv        : Factor w/ 5 levels "3G","LTE","LTE-A",..: 5 3 3 2 5 2 2 2 3 3
  ..$ Telecom          : Factor w/ 5 levels "SKT","KT","LG U+",..: 5 3 3 2 5 3 1 2 1 2
  ..$ Smartphone.brand : Factor w/ 8 levels "Samsung","Apple",..: 8 1 3 1 8 3 1 3 1 3
  ..$ sp.mobile        : num [1:10] 0 90 20 39 30 80 33 36 40 59
  ..$ sp.device        : num [1:10] 0 60 35 0 0 15 0 10 10 12
  ..$ sp.online.content: num [1:10] 0 359 55 39 30 95 33 46 50 71
  ..$ sp.offline.contet: num [1:10] 8 120 0 0 0 12 20 0 0 100
  ..$ year             : num [1:10] 2018 2018 2018 2018 2018 ...
 $ p19:'data.frame':    10 obs. of  11 variables:
  ..$ pid              : num [1:10] 9920004 12500003 17350004 17670001 23860001 ...
  ..$ age              : num [1:10] 11 28 14 57 72 60 52 70 41 39
  ..$ gender           : Factor w/ 2 levels "Male","Female": 1 1 2 1 1 1 1 1 1 2
  ..$ Mobile.lv        : Factor w/ 5 levels "3G","LTE","LTE-A",..: 5 2 3 2 2 2 2 2 3 3
  ..$ Telecom          : Factor w/ 5 levels "SKT","KT","LG U+",..: 5 3 3 1 2 1 1 2 3 3
  ..$ Smartphone.brand : Factor w/ 8 levels "Samsung","Apple",..: 8 2 1 1 1 1 1 3 3 3
  ..$ sp.mobile        : num [1:10] 0 60 21 30 32 78 35 54 73 45
  ..$ sp.device        : num [1:10] 0 30 0 0 0 0 0 0 20 15
  ..$ sp.online.content: num [1:10] 0 90 21 30 32 78 35 54 93 60
  ..$ sp.offline.contet: num [1:10] 21 0 0 20 0 24 90 0 20 60
  ..$ year             : num [1:10] 2019 2019 2019 2019 2019 ...
 $ d19:'data.frame':    10 obs. of  4 variables:
  ..$ pid           : num [1:10] 9920004 12500003 17350004 17670001 23860001 ...
  ..$ Smarphone.time: num [1:10] 0 60 345 90 40 65 170 95 85 75
  ..$ SNS.time      : num [1:10] 0 0 10 0 0 0 0 0 0 0
  ..$ year          : num [1:10] 2019 2019 2019 2019 2019 ...
 $    :List of 1
  ..$ : num 0
# The first element of the fifth element
List.KMP[[5]][[1]]<-c(1:10)

# The second element of the fifth element
List.KMP[[5]][[2]]<-matrix(c(1:12), nrow=4)
List.KMP[[5]]
[[1]]
 [1]  1  2  3  4  5  6  7  8  9 10

[[2]]
     [,1] [,2] [,3]
[1,]    1    5    9
[2,]    2    6   10
[3,]    3    7   11
[4,]    4    8   12


Extract the ‘p17’ element

p17_df <- List.KMP[["p17"]]
p17_df
        pid age gender     Mobile.lv       Telecom Smartphone.brand sp.mobile
1   9920004   9   Male No Smartphone No Smartphone    No Smartphone         0
2  12500003  26   Male           LTE            KT          Samsung        42
3  17350004  12 Female           LTE         LG U+          Samsung        19
4  17670001  55   Male           LTE           SKT          Samsung        38
5  23860001  70   Male No Smartphone No Smartphone    No Smartphone        18
6  24450001  58   Male           LTE           SKT          Samsung        65
7  27570001  50   Male           LTE           SKT          Samsung        32
8  53620001  68   Male           LTE           SKT          Samsung        58
9  59570001  39   Male           LTE            KT          Samsung        63
10 65840001  37 Female         LTE-A            KT               LG        54
   sp.device sp.online.content sp.offline.contet year
1          0                 0                10 2017
2          0                53                22 2017
3          0                19                 0 2017
4          0                38                 0 2017
5          0                18                 0 2017
6          0                65                50 2017
7          0                32                36 2017
8         10                68                 0 2017
9          0                63                25 2017
10        10               114                40 2017
# Summary Statistics
summary(p17_df)
      pid                age           gender          Mobile.lv
 Min.   : 9920004   Min.   : 9.00   Male  :8   3G           :0  
 1st Qu.:17430003   1st Qu.:28.75   Female:2   LTE          :7  
 Median :24155001   Median :44.50              LTE-A        :1  
 Mean   :31235002   Mean   :42.40              5G           :0  
 3rd Qu.:47107501   3rd Qu.:57.25              No Smartphone:2  
 Max.   :65840001   Max.   :70.00                               
                                                                
          Telecom       Smartphone.brand   sp.mobile       sp.device 
 SKT          :4   Samsung      :7       Min.   : 0.00   Min.   : 0  
 KT           :3   No Smartphone:2       1st Qu.:22.25   1st Qu.: 0  
 LG U+        :1   LG           :1       Median :40.00   Median : 0  
 MVNO         :0   Apple        :0       Mean   :38.90   Mean   : 2  
 No Smartphone:2   Pantech      :0       3rd Qu.:57.00   3rd Qu.: 0  
                   Xiaomi       :0       Max.   :65.00   Max.   :10  
                   (Other)      :0                                   
 sp.online.content sp.offline.contet      year     
 Min.   :  0.00    Min.   : 0.00     Min.   :2017  
 1st Qu.: 22.25    1st Qu.: 0.00     1st Qu.:2017  
 Median : 45.50    Median :16.00     Median :2017  
 Mean   : 47.00    Mean   :18.30     Mean   :2017  
 3rd Qu.: 64.50    3rd Qu.:33.25     3rd Qu.:2017  
 Max.   :114.00    Max.   :50.00     Max.   :2017  
                                                   


  • Among 10 people, How many people did use Samsung phone at 2017?

  • How much did people spend for the mobile communication on average?


Do the same thing to 2019 data set and answer the questions below.

  1. In 2019, how many people did use Samsung phone?

  2. Draw boxplot of people’s spending on the mobile communication
    *Hint: use boxplot()


Notice

  • About team project