1 Step: Load and Install all packages that will be needed

1.1 Install the NBClust

library(NbClust)
library(factoextra)
## Loading required package: ggplot2
## Welcome! Want to learn more? See two factoextra-related books at https://goo.gl/ve3WBa
library(caret)
## Loading required package: lattice
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(cluster)

2 Step: Read the Data from csv

2.1 Read the data file

#Read the full data
df_full <- read.csv("BC_Educ_Labour_Market_Performance.csv")
head(df_full)
##                                                 Major.field.of.study
## 1       01. Agriculture, agriculture operations and related sciences
## 2                             03. Natural resources and conservation
## 3                              04. Architecture and related services
## 4              05. Area, ethnic, cultural, gender, and group studies
## 5                 09. Communication, journalism and related programs
## 6   10. Communications technologies/technicians and support services
##   Apprentice_emp Apprentice_unemp Apprentice_med_inc Cert_appren_qual_emp
## 1           62.1              6.7              27865                 60.3
## 2           56.6             10.3              38609                 54.7
## 3           68.2              4.3              45984                 46.2
## 4            0.0              0.0                  0                  0.0
## 5           28.6              0.0                  0                  0.0
## 6           48.9              6.0              31831                 41.8
##   Cert_appren_qual_unemp Cert_appren_qual_med_inc Trades_emp Trades_unemp
## 1                    7.3                    31140       63.3          6.3
## 2                   10.4                    40011       57.5         10.6
## 3                    0.0                    61472       80.0          5.9
## 4                    0.0                        0        0.0          0.0
## 5                    0.0                        0       75.0          0.0
## 6                    4.0                    28844       58.0          7.8
##   Trades_med_inc Col_cert_dip_emp Col_cert_dip_unemp Col_cert_dip_med_inc
## 1          26040             69.8                5.2                32337
## 2          37650             69.1                8.2                44690
## 3          34566             70.4                6.4                43779
## 4              0             56.2                9.8                20909
## 5              0             74.2                5.5                39582
## 6          35535             76.3                7.3                40337
##   Uni_cert_dip_emp Uni_cert_dip_unemp Uni_cert_dip_med_inc Bachelor_emp
## 1             73.8                4.5                28301         67.0
## 2             72.9                5.4                35466         76.5
## 3             67.1                3.4                34960         68.4
## 4             57.8               16.1                24822         73.7
## 5             67.4                8.0                33561         79.5
## 6             77.0                7.5                38461         80.8
##   Bachelor_unemp Bachelor_med_inc Cert_dip_bach_emp Cert_dip_bach_unemp
## 1            3.9            36145              68.1                 3.9
## 2            5.8            54243              76.9                 5.4
## 3            6.0            41491              73.3                 4.7
## 4            5.2            32952              72.5                 4.8
## 5            4.9            37508              78.7                 5.5
## 6            9.1            46993              81.8                 8.0
##   Cert_dip_bach_med_inc Post_bachelor_emp Post_bachelor_unemp
## 1                 35055              70.6                 4.2
## 2                 58736              77.7                 4.5
## 3                 45375              77.1                 3.8
## 4                 67182              69.5                 4.4
## 5                 38182              75.9                 7.7
## 6                 45896              83.9                 4.1
##   Post_bachelor_med_in
## 1                38302
## 2                65019
## 3                59137
## 4                30863
## 5                59137
## 6                39504

2.2 Extract the numerical columns - without description column 1

#Prepare a DF without description column
df_baseline <- df_full[,-c(1)]
head(df_baseline)
##   Apprentice_emp Apprentice_unemp Apprentice_med_inc Cert_appren_qual_emp
## 1           62.1              6.7              27865                 60.3
## 2           56.6             10.3              38609                 54.7
## 3           68.2              4.3              45984                 46.2
## 4            0.0              0.0                  0                  0.0
## 5           28.6              0.0                  0                  0.0
## 6           48.9              6.0              31831                 41.8
##   Cert_appren_qual_unemp Cert_appren_qual_med_inc Trades_emp Trades_unemp
## 1                    7.3                    31140       63.3          6.3
## 2                   10.4                    40011       57.5         10.6
## 3                    0.0                    61472       80.0          5.9
## 4                    0.0                        0        0.0          0.0
## 5                    0.0                        0       75.0          0.0
## 6                    4.0                    28844       58.0          7.8
##   Trades_med_inc Col_cert_dip_emp Col_cert_dip_unemp Col_cert_dip_med_inc
## 1          26040             69.8                5.2                32337
## 2          37650             69.1                8.2                44690
## 3          34566             70.4                6.4                43779
## 4              0             56.2                9.8                20909
## 5              0             74.2                5.5                39582
## 6          35535             76.3                7.3                40337
##   Uni_cert_dip_emp Uni_cert_dip_unemp Uni_cert_dip_med_inc Bachelor_emp
## 1             73.8                4.5                28301         67.0
## 2             72.9                5.4                35466         76.5
## 3             67.1                3.4                34960         68.4
## 4             57.8               16.1                24822         73.7
## 5             67.4                8.0                33561         79.5
## 6             77.0                7.5                38461         80.8
##   Bachelor_unemp Bachelor_med_inc Cert_dip_bach_emp Cert_dip_bach_unemp
## 1            3.9            36145              68.1                 3.9
## 2            5.8            54243              76.9                 5.4
## 3            6.0            41491              73.3                 4.7
## 4            5.2            32952              72.5                 4.8
## 5            4.9            37508              78.7                 5.5
## 6            9.1            46993              81.8                 8.0
##   Cert_dip_bach_med_inc Post_bachelor_emp Post_bachelor_unemp
## 1                 35055              70.6                 4.2
## 2                 58736              77.7                 4.5
## 3                 45375              77.1                 3.8
## 4                 67182              69.5                 4.4
## 5                 38182              75.9                 7.7
## 6                 45896              83.9                 4.1
##   Post_bachelor_med_in
## 1                38302
## 2                65019
## 3                59137
## 4                30863
## 5                59137
## 6                39504

2.3 Standardrize the data using the preProcess from Caret Package

preprocess_model <- preProcess(df_baseline, method = "range")
summary(preprocess_model)
##                   Length Class  Mode    
## dim                2     -none- numeric 
## bc                 0     -none- NULL    
## yj                 0     -none- NULL    
## et                 0     -none- NULL    
## invHyperbolicSine  0     -none- NULL    
## mean               0     -none- NULL    
## std                0     -none- NULL    
## ranges            48     -none- numeric 
## rotation           0     -none- NULL    
## method             2     -none- list    
## thresh             1     -none- numeric 
## pcaComp            0     -none- NULL    
## numComp            0     -none- NULL    
## ica                0     -none- NULL    
## wildcards          2     -none- list    
## k                  1     -none- numeric 
## knnSummary         1     -none- function
## bagImp             0     -none- NULL    
## median             0     -none- NULL    
## data               0     -none- NULL    
## rangeBounds        2     -none- numeric
df_transformed <- predict(preprocess_model, df_baseline)
head(df_transformed)
##   Apprentice_emp Apprentice_unemp Apprentice_med_inc Cert_appren_qual_emp
## 1      0.8673184           0.1675          0.4820350            0.8445378
## 2      0.7905028           0.2575          0.6678949            0.7661064
## 3      0.9525140           0.1075          0.7954746            0.6470588
## 4      0.0000000           0.0000          0.0000000            0.0000000
## 5      0.3994413           0.0000          0.0000000            0.0000000
## 6      0.6829609           0.1500          0.5506427            0.5854342
##   Cert_appren_qual_unemp Cert_appren_qual_med_inc Trades_emp Trades_unemp
## 1                  0.146                0.4986868    0.79125    0.2202797
## 2                  0.208                0.6407501    0.71875    0.3706294
## 3                  0.000                0.9844341    1.00000    0.2062937
## 4                  0.000                0.0000000    0.00000    0.0000000
## 5                  0.000                0.0000000    0.93750    0.0000000
## 6                  0.080                0.4619179    0.72500    0.2727273
##   Trades_med_inc Col_cert_dip_emp Col_cert_dip_unemp Col_cert_dip_med_inc
## 1      0.4287196        0.8960205          0.3636364            0.5845233
## 2      0.6198653        0.8870347          0.5734266            0.8078161
## 3      0.5690907        0.9037227          0.4475524            0.7913488
## 4      0.0000000        0.7214377          0.6853147            0.3779509
## 5      0.0000000        0.9525032          0.3846154            0.7154839
## 6      0.5850442        0.9794608          0.5104895            0.7291313
##   Uni_cert_dip_emp Uni_cert_dip_unemp Uni_cert_dip_med_inc Bachelor_emp
## 1        0.9213483         0.06746627            0.5894691    0.7469342
## 2        0.9101124         0.08095952            0.7387057    0.8528428
## 3        0.8377029         0.05097451            0.7281665    0.7625418
## 4        0.7215980         0.24137931            0.5170065    0.8216276
## 5        0.8414482         0.11994003            0.6990273    0.8862876
## 6        0.9612984         0.11244378            0.8010873    0.9007804
##   Bachelor_unemp Bachelor_med_inc Cert_dip_bach_emp Cert_dip_bach_unemp
## 1      0.2727273        0.4932316         0.7660292              0.4875
## 2      0.4055944        0.7401954         0.8650169              0.6750
## 3      0.4195804        0.5661827         0.8245219              0.5875
## 4      0.3636364        0.4496602         0.8155231              0.6000
## 5      0.3426573        0.5118310         0.8852643              0.6875
## 6      0.6363636        0.6412625         0.9201350              1.0000
##   Cert_dip_bach_med_inc Post_bachelor_emp Post_bachelor_unemp
## 1             0.4737995         0.8374852           0.5454545
## 2             0.7938692         0.9217082           0.5844156
## 3             0.6132834         0.9145907           0.4935065
## 4             0.9080244         0.8244365           0.5714286
## 5             0.5160636         0.9003559           1.0000000
## 6             0.6203252         0.9952550           0.5324675
##   Post_bachelor_med_in
## 1            0.5120657
## 2            0.8692496
## 3            0.7906122
## 4            0.4126125
## 5            0.7906122
## 6            0.5281354

3 Step: Determine the number of Classes

3.1 Using the Elbow Method (Scree Plot)

fviz_nbclust(df_transformed,kmeans, method = "wss")

Based on the elbow method, could be infered that 3 clusters will be a good approach However, it needs to be confirmed using comprehensive approach

3.2 Comprehensive approach

res_nbclust <- NbClust(df_transformed,distance = "euclidean", min.nc = 2, max.nc = 10, method = "kmeans", index = "all")
## Warning in pf(beale, pp, df2): NaNs produced

## Warning in pf(beale, pp, df2): NaNs produced

## *** : The Hubert index is a graphical method of determining the number of clusters.
##                 In the plot of Hubert index, we seek a significant knee that corresponds to a 
##                 significant increase of the value of the measure i.e the significant peak in Hubert
##                 index second differences plot. 
## 

## *** : The D index is a graphical method of determining the number of clusters. 
##                 In the plot of D index, we seek a significant knee (the significant peak in Dindex
##                 second differences plot) that corresponds to a significant increase of the value of
##                 the measure. 
##  
## ******************************************************************* 
## * Among all indices:                                                
## * 1 proposed 2 as the best number of clusters 
## * 16 proposed 3 as the best number of clusters 
## * 2 proposed 4 as the best number of clusters 
## * 1 proposed 7 as the best number of clusters 
## * 1 proposed 8 as the best number of clusters 
## * 1 proposed 9 as the best number of clusters 
## * 1 proposed 10 as the best number of clusters 
## 
##                    ***** Conclusion *****                            
##  
## * According to the majority rule, the best number of clusters is  3 
##  
##  
## *******************************************************************

Considering the out put: “16 proposed 3 as the best number of clusters” The 3 clusters number will be used.

4 Step: Conduct Kmeans analysis using Final Number of Clusters

4.1 Using Kmeans function

kmResult <- kmeans(df_transformed, 3)
str(kmResult)
## List of 9
##  $ cluster     : int [1:40] 3 3 3 2 2 3 3 1 3 2 ...
##  $ centers     : num [1:3, 1:24] 0.804 0.136 0.815 0.136 0.125 ...
##   ..- attr(*, "dimnames")=List of 2
##   .. ..$ : chr [1:3] "1" "2" "3"
##   .. ..$ : chr [1:24] "Apprentice_emp" "Apprentice_unemp" "Apprentice_med_inc" "Cert_appren_qual_emp" ...
##  $ totss       : num 62.8
##  $ withinss    : num [1:3] 4.55 10.46 13.25
##  $ tot.withinss: num 28.3
##  $ betweenss   : num 34.5
##  $ size        : int [1:3] 5 12 23
##  $ iter        : int 2
##  $ ifault      : int 0
##  - attr(*, "class")= chr "kmeans"

4.1.1 Generate a table of cluster courses

using table()

table(kmResult$cluster)
## 
##  1  2  3 
##  5 12 23

4.1.2 Diplay cluster center/means as a df

df_centers <- data.frame(kmResult$centers)
df_centersT <- data.frame(t(df_centers))
df_centersT
##                                  X1          X2         X3
## Apprentice_emp           0.80418994 0.135824022 0.81539956
## Apprentice_unemp         0.13600000 0.125000000 0.16500000
## Apprentice_med_inc       0.70168319 0.014783388 0.51291592
## Cert_appren_qual_emp     0.78375350 0.062558357 0.71605164
## Cert_appren_qual_unemp   0.10440000 0.083333333 0.11330435
## Cert_appren_qual_med_inc 0.66689194 0.000000000 0.40525424
## Trades_emp               0.72400000 0.173958333 0.75896739
## Trades_unemp             0.20979021 0.083333333 0.23654606
## Trades_med_inc           0.56905448 0.004810199 0.46760086
## Col_cert_dip_emp         0.80667522 0.649978605 0.85550036
## Col_cert_dip_unemp       0.46713287 0.388111888 0.41349954
## Col_cert_dip_med_inc     0.68247713 0.415945133 0.64617033
## Uni_cert_dip_emp         0.25917603 0.793279234 0.83357759
## Uni_cert_dip_unemp       0.20000000 0.087331334 0.08676097
## Uni_cert_dip_med_inc     0.00000000 0.540504607 0.67873757
## Bachelor_emp             0.09565217 0.747212932 0.79778004
## Bachelor_unemp           0.00000000 0.348484848 0.39160839
## Bachelor_med_inc         0.00000000 0.529007805 0.57060693
## Cert_dip_bach_emp        0.22159730 0.774371954 0.80339414
## Cert_dip_bach_unemp      0.00000000 0.585416667 0.65434783
## Cert_dip_bach_med_inc    0.00000000 0.480372678 0.52115090
## Post_bachelor_emp        0.25693950 0.805160142 0.83867141
## Post_bachelor_unemp      0.00000000 0.555194805 0.57820440
## Post_bachelor_med_in     0.00000000 0.668237543 0.65396015
df_centersT$MaxOriginal <- t(df_baseline %>% summarise_if(is.numeric, max))
df_centersT$X1Original <- df_centersT$X1 * df_centersT$MaxOriginal
df_centersT$X2Original <- df_centersT$X2 * df_centersT$MaxOriginal
df_centersT$X3Original <- df_centersT$X3 * df_centersT$MaxOriginal
df_centersT
##                                  X1          X2         X3 MaxOriginal
## Apprentice_emp           0.80418994 0.135824022 0.81539956        71.6
## Apprentice_unemp         0.13600000 0.125000000 0.16500000        40.0
## Apprentice_med_inc       0.70168319 0.014783388 0.51291592     57807.0
## Cert_appren_qual_emp     0.78375350 0.062558357 0.71605164        71.4
## Cert_appren_qual_unemp   0.10440000 0.083333333 0.11330435        50.0
## Cert_appren_qual_med_inc 0.66689194 0.000000000 0.40525424     62444.0
## Trades_emp               0.72400000 0.173958333 0.75896739        80.0
## Trades_unemp             0.20979021 0.083333333 0.23654606        28.6
## Trades_med_inc           0.56905448 0.004810199 0.46760086     60739.0
## Col_cert_dip_emp         0.80667522 0.649978605 0.85550036        77.9
## Col_cert_dip_unemp       0.46713287 0.388111888 0.41349954        14.3
## Col_cert_dip_med_inc     0.68247713 0.415945133 0.64617033     55322.0
## Uni_cert_dip_emp         0.25917603 0.793279234 0.83357759        80.1
## Uni_cert_dip_unemp       0.20000000 0.087331334 0.08676097        66.7
## Uni_cert_dip_med_inc     0.00000000 0.540504607 0.67873757     48011.0
## Bachelor_emp             0.09565217 0.747212932 0.79778004        89.7
## Bachelor_unemp           0.00000000 0.348484848 0.39160839        14.3
## Bachelor_med_inc         0.00000000 0.529007805 0.57060693     73282.0
## Cert_dip_bach_emp        0.22159730 0.774371954 0.80339414        88.9
## Cert_dip_bach_unemp      0.00000000 0.585416667 0.65434783         8.0
## Cert_dip_bach_med_inc    0.00000000 0.480372678 0.52115090     73987.0
## Post_bachelor_emp        0.25693950 0.805160142 0.83867141        84.3
## Post_bachelor_unemp      0.00000000 0.555194805 0.57820440         7.7
## Post_bachelor_med_in     0.00000000 0.668237543 0.65396015     74799.0
##                          X1Original   X2Original   X3Original
## Apprentice_emp                57.58     9.725000    58.382609
## Apprentice_unemp               5.44     5.000000     6.600000
## Apprentice_med_inc         40562.20   854.583333 29650.130435
## Cert_appren_qual_emp          55.96     4.466667    51.126087
## Cert_appren_qual_unemp         5.22     4.166667     5.665217
## Cert_appren_qual_med_inc   41643.40     0.000000 25305.695652
## Trades_emp                    57.92    13.916667    60.717391
## Trades_unemp                   6.00     2.383333     6.765217
## Trades_med_inc             34563.80   292.166667 28401.608696
## Col_cert_dip_emp              62.84    50.633333    66.643478
## Col_cert_dip_unemp             6.68     5.550000     5.913043
## Col_cert_dip_med_inc       37756.00 23010.916667 35747.434783
## Uni_cert_dip_emp              20.76    63.541667    66.769565
## Uni_cert_dip_unemp            13.34     5.825000     5.786957
## Uni_cert_dip_med_inc           0.00 25950.166667 32586.869565
## Bachelor_emp                   8.58    67.025000    71.560870
## Bachelor_unemp                 0.00     4.983333     5.600000
## Bachelor_med_inc               0.00 38766.750000 41815.217391
## Cert_dip_bach_emp             19.70    68.841667    71.421739
## Cert_dip_bach_unemp            0.00     4.683333     5.234783
## Cert_dip_bach_med_inc          0.00 35541.333333 38558.391304
## Post_bachelor_emp             21.66    67.875000    70.700000
## Post_bachelor_unemp            0.00     4.275000     4.452174
## Post_bachelor_med_in           0.00 49983.500000 48915.565217
df_centersTOriginal <- df_centersT[,c(4:7)]
df_centersTOriginal
##                          MaxOriginal X1Original   X2Original   X3Original
## Apprentice_emp                  71.6      57.58     9.725000    58.382609
## Apprentice_unemp                40.0       5.44     5.000000     6.600000
## Apprentice_med_inc           57807.0   40562.20   854.583333 29650.130435
## Cert_appren_qual_emp            71.4      55.96     4.466667    51.126087
## Cert_appren_qual_unemp          50.0       5.22     4.166667     5.665217
## Cert_appren_qual_med_inc     62444.0   41643.40     0.000000 25305.695652
## Trades_emp                      80.0      57.92    13.916667    60.717391
## Trades_unemp                    28.6       6.00     2.383333     6.765217
## Trades_med_inc               60739.0   34563.80   292.166667 28401.608696
## Col_cert_dip_emp                77.9      62.84    50.633333    66.643478
## Col_cert_dip_unemp              14.3       6.68     5.550000     5.913043
## Col_cert_dip_med_inc         55322.0   37756.00 23010.916667 35747.434783
## Uni_cert_dip_emp                80.1      20.76    63.541667    66.769565
## Uni_cert_dip_unemp              66.7      13.34     5.825000     5.786957
## Uni_cert_dip_med_inc         48011.0       0.00 25950.166667 32586.869565
## Bachelor_emp                    89.7       8.58    67.025000    71.560870
## Bachelor_unemp                  14.3       0.00     4.983333     5.600000
## Bachelor_med_inc             73282.0       0.00 38766.750000 41815.217391
## Cert_dip_bach_emp               88.9      19.70    68.841667    71.421739
## Cert_dip_bach_unemp              8.0       0.00     4.683333     5.234783
## Cert_dip_bach_med_inc        73987.0       0.00 35541.333333 38558.391304
## Post_bachelor_emp               84.3      21.66    67.875000    70.700000
## Post_bachelor_unemp              7.7       0.00     4.275000     4.452174
## Post_bachelor_med_in         74799.0       0.00 49983.500000 48915.565217

4.2 Validate the Cluster Solution

4.2.1 Load cluster package and do clusplot

clusplot(df_transformed, kmResult$cluster, main="2D Cluster Solution Representation", color=T, shade=T)

4.2.2 Silhouette coeficient (use silhouette function)

sil_out <- silhouette(kmResult$cluster, dist(df_transformed, method="euclidean"))

4.2.3 Plot Silhouette results

plot(sil_out, main="Silhouette Plot from Kmeans Analysis", col=c("red", "green"))

4.3 Indices Approach

 memory.limit (9999999999) ### to optimize running R
## Warning: 'memory.limit()' is no longer supported
## [1] Inf

5 STEP: Implementing Hierarchical Cluster Analysis

5.1 Compute the dissimilarity matrix

dmax <- dist(df_transformed,method="euclidean")

5.2 Feeding the dmax into hclust() clustering function

hclust_out <- hclust(dmax,method="ward.D")

5.3 Plot the dendogram

plot(hclust_out,cex=0.5, hang=-1,main="Dendogram from Hclust Algorithm")

5.4 Plot the dendogram (fviz_dend function)

fviz_dend(hclust_out,k=3,color_labels_by_k = TRUE, cex=0.5)
## Warning: `guides(<scale> = FALSE)` is deprecated. Please use `guides(<scale> =
## "none")` instead.

5.5 Extract summary results - cut the dendogram (use cutree function)

 clust_mem <- cutree(hclust_out, k=3)
 table(clust_mem)
## clust_mem
##  1  2  3 
## 25 10  5

5.6 Generate cluster centroids (means) table (use aggregate function)

clust_centers <- aggregate(df_transformed,list(cluster=clust_mem),mean)
clust_centers
##   cluster Apprentice_emp Apprentice_unemp Apprentice_med_inc
## 1       1     0.78765363           0.1518         0.47188264
## 2       2     0.06927374           0.1500         0.01774007
## 3       3     0.80418994           0.1360         0.70168319
##   Cert_appren_qual_emp Cert_appren_qual_unemp Cert_appren_qual_med_inc
## 1           0.67478992                0.10424                0.3728339
## 2           0.03501401                0.10000                0.0000000
## 3           0.78375350                0.10440                0.6668919
##   Trades_emp Trades_unemp Trades_med_inc Col_cert_dip_emp Col_cert_dip_unemp
## 1   0.752400    0.2176224    0.430192792        0.8479076          0.4190210
## 2   0.073375    0.1000000    0.005772239        0.6278562          0.3692308
## 3   0.724000    0.2097902    0.569054479        0.8066752          0.4671329
##   Col_cert_dip_med_inc Uni_cert_dip_emp Uni_cert_dip_unemp Uni_cert_dip_med_inc
## 1            0.6421069        0.8268664         0.08707646            0.6762042
## 2            0.3800586        0.8019975         0.08665667            0.5191914
## 3            0.6824771        0.2591760         0.20000000            0.0000000
##   Bachelor_emp Bachelor_unemp Bachelor_med_inc Cert_dip_bach_emp
## 1   0.79638796      0.3910490        0.5661838         0.8024297
## 2   0.74057971      0.3412587        0.5317459         0.7709786
## 3   0.09565217      0.0000000        0.0000000         0.2215973
##   Cert_dip_bach_unemp Cert_dip_bach_med_inc Post_bachelor_emp
## 1             0.65950             0.5211094         0.8383867
## 2             0.55875             0.4723208         0.7991696
## 3             0.00000             0.0000000         0.2569395
##   Post_bachelor_unemp Post_bachelor_med_in
## 1           0.6025974             0.665933
## 2           0.4896104             0.641161
## 3           0.0000000             0.000000

5.7 Transpose the clust_centers (use t function)

df_centersT1 <- as.data.frame(t(clust_centers))
df_centersT1 <-df_centersT1[-1,]
df_centersT1$MaxOriginal <-t(df_baseline %>% summarise_if(is.numeric, max))
df_centersT1$X1Original <- df_centersT1$V1 * df_centersT1$MaxOriginal
df_centersT1$X2Original <- df_centersT1$V2 * df_centersT1$MaxOriginal
df_centersT1$X3Original <- df_centersT1$V3 * df_centersT1$MaxOriginal
df_centersT1
##                                  V1          V2         V3 MaxOriginal
## Apprentice_emp           0.78765363 0.069273743 0.80418994        71.6
## Apprentice_unemp         0.15180000 0.150000000 0.13600000        40.0
## Apprentice_med_inc       0.47188264 0.017740066 0.70168319     57807.0
## Cert_appren_qual_emp     0.67478992 0.035014006 0.78375350        71.4
## Cert_appren_qual_unemp   0.10424000 0.100000000 0.10440000        50.0
## Cert_appren_qual_med_inc 0.37283390 0.000000000 0.66689194     62444.0
## Trades_emp               0.75240000 0.073375000 0.72400000        80.0
## Trades_unemp             0.21762238 0.100000000 0.20979021        28.6
## Trades_med_inc           0.43019279 0.005772239 0.56905448     60739.0
## Col_cert_dip_emp         0.84790757 0.627856226 0.80667522        77.9
## Col_cert_dip_unemp       0.41902098 0.369230769 0.46713287        14.3
## Col_cert_dip_med_inc     0.64210694 0.380058566 0.68247713     55322.0
## Uni_cert_dip_emp         0.82686642 0.801997503 0.25917603        80.1
## Uni_cert_dip_unemp       0.08707646 0.086656672 0.20000000        66.7
## Uni_cert_dip_med_inc     0.67620420 0.519191435 0.00000000     48011.0
## Bachelor_emp             0.79638796 0.740579710 0.09565217        89.7
## Bachelor_unemp           0.39104895 0.341258741 0.00000000        14.3
## Bachelor_med_inc         0.56618378 0.531745858 0.00000000     73282.0
## Cert_dip_bach_emp        0.80242970 0.770978628 0.22159730        88.9
## Cert_dip_bach_unemp      0.65950000 0.558750000 0.00000000         8.0
## Cert_dip_bach_med_inc    0.52110938 0.472320813 0.00000000     73987.0
## Post_bachelor_emp        0.83838671 0.799169632 0.25693950        84.3
## Post_bachelor_unemp      0.60259740 0.489610390 0.00000000         7.7
## Post_bachelor_med_in     0.66593297 0.641160978 0.00000000     74799.0
##                          X1Original X2Original X3Original
## Apprentice_emp               56.396       4.96      57.58
## Apprentice_unemp              6.072       6.00       5.44
## Apprentice_med_inc        27278.120    1025.50   40562.20
## Cert_appren_qual_emp         48.180       2.50      55.96
## Cert_appren_qual_unemp        5.212       5.00       5.22
## Cert_appren_qual_med_inc  23281.240       0.00   41643.40
## Trades_emp                   60.192       5.87      57.92
## Trades_unemp                  6.224       2.86       6.00
## Trades_med_inc            26129.480     350.60   34563.80
## Col_cert_dip_emp             66.052      48.91      62.84
## Col_cert_dip_unemp            5.992       5.28       6.68
## Col_cert_dip_med_inc      35522.640   21025.60   37756.00
## Uni_cert_dip_emp             66.232      64.24      20.76
## Uni_cert_dip_unemp            5.808       5.78      13.34
## Uni_cert_dip_med_inc      32465.240   24926.90       0.00
## Bachelor_emp                 71.436      66.43       8.58
## Bachelor_unemp                5.592       4.88       0.00
## Bachelor_med_inc          41491.080   38967.40       0.00
## Cert_dip_bach_emp            71.336      68.54      19.70
## Cert_dip_bach_unemp           5.276       4.47       0.00
## Cert_dip_bach_med_inc     38555.320   34945.60       0.00
## Post_bachelor_emp            70.676      67.37      21.66
## Post_bachelor_unemp           4.640       3.77       0.00
## Post_bachelor_med_in      49811.120   47958.20       0.00
df_centersT1Original <- round(df_centersT[,c(4:7)],2)
df_centersT1Original
##                          MaxOriginal X1Original X2Original X3Original
## Apprentice_emp                  71.6      57.58       9.72      58.38
## Apprentice_unemp                40.0       5.44       5.00       6.60
## Apprentice_med_inc           57807.0   40562.20     854.58   29650.13
## Cert_appren_qual_emp            71.4      55.96       4.47      51.13
## Cert_appren_qual_unemp          50.0       5.22       4.17       5.67
## Cert_appren_qual_med_inc     62444.0   41643.40       0.00   25305.70
## Trades_emp                      80.0      57.92      13.92      60.72
## Trades_unemp                    28.6       6.00       2.38       6.77
## Trades_med_inc               60739.0   34563.80     292.17   28401.61
## Col_cert_dip_emp                77.9      62.84      50.63      66.64
## Col_cert_dip_unemp              14.3       6.68       5.55       5.91
## Col_cert_dip_med_inc         55322.0   37756.00   23010.92   35747.43
## Uni_cert_dip_emp                80.1      20.76      63.54      66.77
## Uni_cert_dip_unemp              66.7      13.34       5.82       5.79
## Uni_cert_dip_med_inc         48011.0       0.00   25950.17   32586.87
## Bachelor_emp                    89.7       8.58      67.02      71.56
## Bachelor_unemp                  14.3       0.00       4.98       5.60
## Bachelor_med_inc             73282.0       0.00   38766.75   41815.22
## Cert_dip_bach_emp               88.9      19.70      68.84      71.42
## Cert_dip_bach_unemp              8.0       0.00       4.68       5.23
## Cert_dip_bach_med_inc        73987.0       0.00   35541.33   38558.39
## Post_bachelor_emp               84.3      21.66      67.88      70.70
## Post_bachelor_unemp              7.7       0.00       4.28       4.45
## Post_bachelor_med_in         74799.0       0.00   49983.50   48915.57

6 STEP: Saving the Results

6.1 Attach cluster membershil to the original dataset

df_full$clustMem <- clust_mem
head(df_full)
##                                                 Major.field.of.study
## 1       01. Agriculture, agriculture operations and related sciences
## 2                             03. Natural resources and conservation
## 3                              04. Architecture and related services
## 4              05. Area, ethnic, cultural, gender, and group studies
## 5                 09. Communication, journalism and related programs
## 6   10. Communications technologies/technicians and support services
##   Apprentice_emp Apprentice_unemp Apprentice_med_inc Cert_appren_qual_emp
## 1           62.1              6.7              27865                 60.3
## 2           56.6             10.3              38609                 54.7
## 3           68.2              4.3              45984                 46.2
## 4            0.0              0.0                  0                  0.0
## 5           28.6              0.0                  0                  0.0
## 6           48.9              6.0              31831                 41.8
##   Cert_appren_qual_unemp Cert_appren_qual_med_inc Trades_emp Trades_unemp
## 1                    7.3                    31140       63.3          6.3
## 2                   10.4                    40011       57.5         10.6
## 3                    0.0                    61472       80.0          5.9
## 4                    0.0                        0        0.0          0.0
## 5                    0.0                        0       75.0          0.0
## 6                    4.0                    28844       58.0          7.8
##   Trades_med_inc Col_cert_dip_emp Col_cert_dip_unemp Col_cert_dip_med_inc
## 1          26040             69.8                5.2                32337
## 2          37650             69.1                8.2                44690
## 3          34566             70.4                6.4                43779
## 4              0             56.2                9.8                20909
## 5              0             74.2                5.5                39582
## 6          35535             76.3                7.3                40337
##   Uni_cert_dip_emp Uni_cert_dip_unemp Uni_cert_dip_med_inc Bachelor_emp
## 1             73.8                4.5                28301         67.0
## 2             72.9                5.4                35466         76.5
## 3             67.1                3.4                34960         68.4
## 4             57.8               16.1                24822         73.7
## 5             67.4                8.0                33561         79.5
## 6             77.0                7.5                38461         80.8
##   Bachelor_unemp Bachelor_med_inc Cert_dip_bach_emp Cert_dip_bach_unemp
## 1            3.9            36145              68.1                 3.9
## 2            5.8            54243              76.9                 5.4
## 3            6.0            41491              73.3                 4.7
## 4            5.2            32952              72.5                 4.8
## 5            4.9            37508              78.7                 5.5
## 6            9.1            46993              81.8                 8.0
##   Cert_dip_bach_med_inc Post_bachelor_emp Post_bachelor_unemp
## 1                 35055              70.6                 4.2
## 2                 58736              77.7                 4.5
## 3                 45375              77.1                 3.8
## 4                 67182              69.5                 4.4
## 5                 38182              75.9                 7.7
## 6                 45896              83.9                 4.1
##   Post_bachelor_med_in clustMem
## 1                38302        1
## 2                65019        1
## 3                59137        1
## 4                30863        2
## 5                59137        1
## 6                39504        1

6.2 Save the output to file

write.csv(df_full, "BC_Educ_Labour_Market_Performance_classified.csv")