library(NbClust)
library(factoextra)
## Loading required package: ggplot2
## Welcome! Want to learn more? See two factoextra-related books at https://goo.gl/ve3WBa
library(caret)
## Loading required package: lattice
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(cluster)
#Read the full data
df_full <- read.csv("BC_Educ_Labour_Market_Performance.csv")
head(df_full)
## Major.field.of.study
## 1 01. Agriculture, agriculture operations and related sciences
## 2 03. Natural resources and conservation
## 3 04. Architecture and related services
## 4 05. Area, ethnic, cultural, gender, and group studies
## 5 09. Communication, journalism and related programs
## 6 10. Communications technologies/technicians and support services
## Apprentice_emp Apprentice_unemp Apprentice_med_inc Cert_appren_qual_emp
## 1 62.1 6.7 27865 60.3
## 2 56.6 10.3 38609 54.7
## 3 68.2 4.3 45984 46.2
## 4 0.0 0.0 0 0.0
## 5 28.6 0.0 0 0.0
## 6 48.9 6.0 31831 41.8
## Cert_appren_qual_unemp Cert_appren_qual_med_inc Trades_emp Trades_unemp
## 1 7.3 31140 63.3 6.3
## 2 10.4 40011 57.5 10.6
## 3 0.0 61472 80.0 5.9
## 4 0.0 0 0.0 0.0
## 5 0.0 0 75.0 0.0
## 6 4.0 28844 58.0 7.8
## Trades_med_inc Col_cert_dip_emp Col_cert_dip_unemp Col_cert_dip_med_inc
## 1 26040 69.8 5.2 32337
## 2 37650 69.1 8.2 44690
## 3 34566 70.4 6.4 43779
## 4 0 56.2 9.8 20909
## 5 0 74.2 5.5 39582
## 6 35535 76.3 7.3 40337
## Uni_cert_dip_emp Uni_cert_dip_unemp Uni_cert_dip_med_inc Bachelor_emp
## 1 73.8 4.5 28301 67.0
## 2 72.9 5.4 35466 76.5
## 3 67.1 3.4 34960 68.4
## 4 57.8 16.1 24822 73.7
## 5 67.4 8.0 33561 79.5
## 6 77.0 7.5 38461 80.8
## Bachelor_unemp Bachelor_med_inc Cert_dip_bach_emp Cert_dip_bach_unemp
## 1 3.9 36145 68.1 3.9
## 2 5.8 54243 76.9 5.4
## 3 6.0 41491 73.3 4.7
## 4 5.2 32952 72.5 4.8
## 5 4.9 37508 78.7 5.5
## 6 9.1 46993 81.8 8.0
## Cert_dip_bach_med_inc Post_bachelor_emp Post_bachelor_unemp
## 1 35055 70.6 4.2
## 2 58736 77.7 4.5
## 3 45375 77.1 3.8
## 4 67182 69.5 4.4
## 5 38182 75.9 7.7
## 6 45896 83.9 4.1
## Post_bachelor_med_in
## 1 38302
## 2 65019
## 3 59137
## 4 30863
## 5 59137
## 6 39504
#Prepare a DF without description column
df_baseline <- df_full[,-c(1)]
head(df_baseline)
## Apprentice_emp Apprentice_unemp Apprentice_med_inc Cert_appren_qual_emp
## 1 62.1 6.7 27865 60.3
## 2 56.6 10.3 38609 54.7
## 3 68.2 4.3 45984 46.2
## 4 0.0 0.0 0 0.0
## 5 28.6 0.0 0 0.0
## 6 48.9 6.0 31831 41.8
## Cert_appren_qual_unemp Cert_appren_qual_med_inc Trades_emp Trades_unemp
## 1 7.3 31140 63.3 6.3
## 2 10.4 40011 57.5 10.6
## 3 0.0 61472 80.0 5.9
## 4 0.0 0 0.0 0.0
## 5 0.0 0 75.0 0.0
## 6 4.0 28844 58.0 7.8
## Trades_med_inc Col_cert_dip_emp Col_cert_dip_unemp Col_cert_dip_med_inc
## 1 26040 69.8 5.2 32337
## 2 37650 69.1 8.2 44690
## 3 34566 70.4 6.4 43779
## 4 0 56.2 9.8 20909
## 5 0 74.2 5.5 39582
## 6 35535 76.3 7.3 40337
## Uni_cert_dip_emp Uni_cert_dip_unemp Uni_cert_dip_med_inc Bachelor_emp
## 1 73.8 4.5 28301 67.0
## 2 72.9 5.4 35466 76.5
## 3 67.1 3.4 34960 68.4
## 4 57.8 16.1 24822 73.7
## 5 67.4 8.0 33561 79.5
## 6 77.0 7.5 38461 80.8
## Bachelor_unemp Bachelor_med_inc Cert_dip_bach_emp Cert_dip_bach_unemp
## 1 3.9 36145 68.1 3.9
## 2 5.8 54243 76.9 5.4
## 3 6.0 41491 73.3 4.7
## 4 5.2 32952 72.5 4.8
## 5 4.9 37508 78.7 5.5
## 6 9.1 46993 81.8 8.0
## Cert_dip_bach_med_inc Post_bachelor_emp Post_bachelor_unemp
## 1 35055 70.6 4.2
## 2 58736 77.7 4.5
## 3 45375 77.1 3.8
## 4 67182 69.5 4.4
## 5 38182 75.9 7.7
## 6 45896 83.9 4.1
## Post_bachelor_med_in
## 1 38302
## 2 65019
## 3 59137
## 4 30863
## 5 59137
## 6 39504
preprocess_model <- preProcess(df_baseline, method = "range")
summary(preprocess_model)
## Length Class Mode
## dim 2 -none- numeric
## bc 0 -none- NULL
## yj 0 -none- NULL
## et 0 -none- NULL
## invHyperbolicSine 0 -none- NULL
## mean 0 -none- NULL
## std 0 -none- NULL
## ranges 48 -none- numeric
## rotation 0 -none- NULL
## method 2 -none- list
## thresh 1 -none- numeric
## pcaComp 0 -none- NULL
## numComp 0 -none- NULL
## ica 0 -none- NULL
## wildcards 2 -none- list
## k 1 -none- numeric
## knnSummary 1 -none- function
## bagImp 0 -none- NULL
## median 0 -none- NULL
## data 0 -none- NULL
## rangeBounds 2 -none- numeric
df_transformed <- predict(preprocess_model, df_baseline)
head(df_transformed)
## Apprentice_emp Apprentice_unemp Apprentice_med_inc Cert_appren_qual_emp
## 1 0.8673184 0.1675 0.4820350 0.8445378
## 2 0.7905028 0.2575 0.6678949 0.7661064
## 3 0.9525140 0.1075 0.7954746 0.6470588
## 4 0.0000000 0.0000 0.0000000 0.0000000
## 5 0.3994413 0.0000 0.0000000 0.0000000
## 6 0.6829609 0.1500 0.5506427 0.5854342
## Cert_appren_qual_unemp Cert_appren_qual_med_inc Trades_emp Trades_unemp
## 1 0.146 0.4986868 0.79125 0.2202797
## 2 0.208 0.6407501 0.71875 0.3706294
## 3 0.000 0.9844341 1.00000 0.2062937
## 4 0.000 0.0000000 0.00000 0.0000000
## 5 0.000 0.0000000 0.93750 0.0000000
## 6 0.080 0.4619179 0.72500 0.2727273
## Trades_med_inc Col_cert_dip_emp Col_cert_dip_unemp Col_cert_dip_med_inc
## 1 0.4287196 0.8960205 0.3636364 0.5845233
## 2 0.6198653 0.8870347 0.5734266 0.8078161
## 3 0.5690907 0.9037227 0.4475524 0.7913488
## 4 0.0000000 0.7214377 0.6853147 0.3779509
## 5 0.0000000 0.9525032 0.3846154 0.7154839
## 6 0.5850442 0.9794608 0.5104895 0.7291313
## Uni_cert_dip_emp Uni_cert_dip_unemp Uni_cert_dip_med_inc Bachelor_emp
## 1 0.9213483 0.06746627 0.5894691 0.7469342
## 2 0.9101124 0.08095952 0.7387057 0.8528428
## 3 0.8377029 0.05097451 0.7281665 0.7625418
## 4 0.7215980 0.24137931 0.5170065 0.8216276
## 5 0.8414482 0.11994003 0.6990273 0.8862876
## 6 0.9612984 0.11244378 0.8010873 0.9007804
## Bachelor_unemp Bachelor_med_inc Cert_dip_bach_emp Cert_dip_bach_unemp
## 1 0.2727273 0.4932316 0.7660292 0.4875
## 2 0.4055944 0.7401954 0.8650169 0.6750
## 3 0.4195804 0.5661827 0.8245219 0.5875
## 4 0.3636364 0.4496602 0.8155231 0.6000
## 5 0.3426573 0.5118310 0.8852643 0.6875
## 6 0.6363636 0.6412625 0.9201350 1.0000
## Cert_dip_bach_med_inc Post_bachelor_emp Post_bachelor_unemp
## 1 0.4737995 0.8374852 0.5454545
## 2 0.7938692 0.9217082 0.5844156
## 3 0.6132834 0.9145907 0.4935065
## 4 0.9080244 0.8244365 0.5714286
## 5 0.5160636 0.9003559 1.0000000
## 6 0.6203252 0.9952550 0.5324675
## Post_bachelor_med_in
## 1 0.5120657
## 2 0.8692496
## 3 0.7906122
## 4 0.4126125
## 5 0.7906122
## 6 0.5281354
fviz_nbclust(df_transformed,kmeans, method = "wss")
Based on the elbow method, could be infered that 3 clusters will be a good approach However, it needs to be confirmed using comprehensive approach
res_nbclust <- NbClust(df_transformed,distance = "euclidean", min.nc = 2, max.nc = 10, method = "kmeans", index = "all")
## Warning in pf(beale, pp, df2): NaNs produced
## Warning in pf(beale, pp, df2): NaNs produced
## *** : The Hubert index is a graphical method of determining the number of clusters.
## In the plot of Hubert index, we seek a significant knee that corresponds to a
## significant increase of the value of the measure i.e the significant peak in Hubert
## index second differences plot.
##
## *** : The D index is a graphical method of determining the number of clusters.
## In the plot of D index, we seek a significant knee (the significant peak in Dindex
## second differences plot) that corresponds to a significant increase of the value of
## the measure.
##
## *******************************************************************
## * Among all indices:
## * 1 proposed 2 as the best number of clusters
## * 16 proposed 3 as the best number of clusters
## * 2 proposed 4 as the best number of clusters
## * 1 proposed 7 as the best number of clusters
## * 1 proposed 8 as the best number of clusters
## * 1 proposed 9 as the best number of clusters
## * 1 proposed 10 as the best number of clusters
##
## ***** Conclusion *****
##
## * According to the majority rule, the best number of clusters is 3
##
##
## *******************************************************************
Considering the out put: “16 proposed 3 as the best number of clusters” The 3 clusters number will be used.
kmResult <- kmeans(df_transformed, 3)
str(kmResult)
## List of 9
## $ cluster : int [1:40] 3 3 3 2 2 3 3 1 3 2 ...
## $ centers : num [1:3, 1:24] 0.804 0.136 0.815 0.136 0.125 ...
## ..- attr(*, "dimnames")=List of 2
## .. ..$ : chr [1:3] "1" "2" "3"
## .. ..$ : chr [1:24] "Apprentice_emp" "Apprentice_unemp" "Apprentice_med_inc" "Cert_appren_qual_emp" ...
## $ totss : num 62.8
## $ withinss : num [1:3] 4.55 10.46 13.25
## $ tot.withinss: num 28.3
## $ betweenss : num 34.5
## $ size : int [1:3] 5 12 23
## $ iter : int 2
## $ ifault : int 0
## - attr(*, "class")= chr "kmeans"
using table()
table(kmResult$cluster)
##
## 1 2 3
## 5 12 23
df_centers <- data.frame(kmResult$centers)
df_centersT <- data.frame(t(df_centers))
df_centersT
## X1 X2 X3
## Apprentice_emp 0.80418994 0.135824022 0.81539956
## Apprentice_unemp 0.13600000 0.125000000 0.16500000
## Apprentice_med_inc 0.70168319 0.014783388 0.51291592
## Cert_appren_qual_emp 0.78375350 0.062558357 0.71605164
## Cert_appren_qual_unemp 0.10440000 0.083333333 0.11330435
## Cert_appren_qual_med_inc 0.66689194 0.000000000 0.40525424
## Trades_emp 0.72400000 0.173958333 0.75896739
## Trades_unemp 0.20979021 0.083333333 0.23654606
## Trades_med_inc 0.56905448 0.004810199 0.46760086
## Col_cert_dip_emp 0.80667522 0.649978605 0.85550036
## Col_cert_dip_unemp 0.46713287 0.388111888 0.41349954
## Col_cert_dip_med_inc 0.68247713 0.415945133 0.64617033
## Uni_cert_dip_emp 0.25917603 0.793279234 0.83357759
## Uni_cert_dip_unemp 0.20000000 0.087331334 0.08676097
## Uni_cert_dip_med_inc 0.00000000 0.540504607 0.67873757
## Bachelor_emp 0.09565217 0.747212932 0.79778004
## Bachelor_unemp 0.00000000 0.348484848 0.39160839
## Bachelor_med_inc 0.00000000 0.529007805 0.57060693
## Cert_dip_bach_emp 0.22159730 0.774371954 0.80339414
## Cert_dip_bach_unemp 0.00000000 0.585416667 0.65434783
## Cert_dip_bach_med_inc 0.00000000 0.480372678 0.52115090
## Post_bachelor_emp 0.25693950 0.805160142 0.83867141
## Post_bachelor_unemp 0.00000000 0.555194805 0.57820440
## Post_bachelor_med_in 0.00000000 0.668237543 0.65396015
df_centersT$MaxOriginal <- t(df_baseline %>% summarise_if(is.numeric, max))
df_centersT$X1Original <- df_centersT$X1 * df_centersT$MaxOriginal
df_centersT$X2Original <- df_centersT$X2 * df_centersT$MaxOriginal
df_centersT$X3Original <- df_centersT$X3 * df_centersT$MaxOriginal
df_centersT
## X1 X2 X3 MaxOriginal
## Apprentice_emp 0.80418994 0.135824022 0.81539956 71.6
## Apprentice_unemp 0.13600000 0.125000000 0.16500000 40.0
## Apprentice_med_inc 0.70168319 0.014783388 0.51291592 57807.0
## Cert_appren_qual_emp 0.78375350 0.062558357 0.71605164 71.4
## Cert_appren_qual_unemp 0.10440000 0.083333333 0.11330435 50.0
## Cert_appren_qual_med_inc 0.66689194 0.000000000 0.40525424 62444.0
## Trades_emp 0.72400000 0.173958333 0.75896739 80.0
## Trades_unemp 0.20979021 0.083333333 0.23654606 28.6
## Trades_med_inc 0.56905448 0.004810199 0.46760086 60739.0
## Col_cert_dip_emp 0.80667522 0.649978605 0.85550036 77.9
## Col_cert_dip_unemp 0.46713287 0.388111888 0.41349954 14.3
## Col_cert_dip_med_inc 0.68247713 0.415945133 0.64617033 55322.0
## Uni_cert_dip_emp 0.25917603 0.793279234 0.83357759 80.1
## Uni_cert_dip_unemp 0.20000000 0.087331334 0.08676097 66.7
## Uni_cert_dip_med_inc 0.00000000 0.540504607 0.67873757 48011.0
## Bachelor_emp 0.09565217 0.747212932 0.79778004 89.7
## Bachelor_unemp 0.00000000 0.348484848 0.39160839 14.3
## Bachelor_med_inc 0.00000000 0.529007805 0.57060693 73282.0
## Cert_dip_bach_emp 0.22159730 0.774371954 0.80339414 88.9
## Cert_dip_bach_unemp 0.00000000 0.585416667 0.65434783 8.0
## Cert_dip_bach_med_inc 0.00000000 0.480372678 0.52115090 73987.0
## Post_bachelor_emp 0.25693950 0.805160142 0.83867141 84.3
## Post_bachelor_unemp 0.00000000 0.555194805 0.57820440 7.7
## Post_bachelor_med_in 0.00000000 0.668237543 0.65396015 74799.0
## X1Original X2Original X3Original
## Apprentice_emp 57.58 9.725000 58.382609
## Apprentice_unemp 5.44 5.000000 6.600000
## Apprentice_med_inc 40562.20 854.583333 29650.130435
## Cert_appren_qual_emp 55.96 4.466667 51.126087
## Cert_appren_qual_unemp 5.22 4.166667 5.665217
## Cert_appren_qual_med_inc 41643.40 0.000000 25305.695652
## Trades_emp 57.92 13.916667 60.717391
## Trades_unemp 6.00 2.383333 6.765217
## Trades_med_inc 34563.80 292.166667 28401.608696
## Col_cert_dip_emp 62.84 50.633333 66.643478
## Col_cert_dip_unemp 6.68 5.550000 5.913043
## Col_cert_dip_med_inc 37756.00 23010.916667 35747.434783
## Uni_cert_dip_emp 20.76 63.541667 66.769565
## Uni_cert_dip_unemp 13.34 5.825000 5.786957
## Uni_cert_dip_med_inc 0.00 25950.166667 32586.869565
## Bachelor_emp 8.58 67.025000 71.560870
## Bachelor_unemp 0.00 4.983333 5.600000
## Bachelor_med_inc 0.00 38766.750000 41815.217391
## Cert_dip_bach_emp 19.70 68.841667 71.421739
## Cert_dip_bach_unemp 0.00 4.683333 5.234783
## Cert_dip_bach_med_inc 0.00 35541.333333 38558.391304
## Post_bachelor_emp 21.66 67.875000 70.700000
## Post_bachelor_unemp 0.00 4.275000 4.452174
## Post_bachelor_med_in 0.00 49983.500000 48915.565217
df_centersTOriginal <- df_centersT[,c(4:7)]
df_centersTOriginal
## MaxOriginal X1Original X2Original X3Original
## Apprentice_emp 71.6 57.58 9.725000 58.382609
## Apprentice_unemp 40.0 5.44 5.000000 6.600000
## Apprentice_med_inc 57807.0 40562.20 854.583333 29650.130435
## Cert_appren_qual_emp 71.4 55.96 4.466667 51.126087
## Cert_appren_qual_unemp 50.0 5.22 4.166667 5.665217
## Cert_appren_qual_med_inc 62444.0 41643.40 0.000000 25305.695652
## Trades_emp 80.0 57.92 13.916667 60.717391
## Trades_unemp 28.6 6.00 2.383333 6.765217
## Trades_med_inc 60739.0 34563.80 292.166667 28401.608696
## Col_cert_dip_emp 77.9 62.84 50.633333 66.643478
## Col_cert_dip_unemp 14.3 6.68 5.550000 5.913043
## Col_cert_dip_med_inc 55322.0 37756.00 23010.916667 35747.434783
## Uni_cert_dip_emp 80.1 20.76 63.541667 66.769565
## Uni_cert_dip_unemp 66.7 13.34 5.825000 5.786957
## Uni_cert_dip_med_inc 48011.0 0.00 25950.166667 32586.869565
## Bachelor_emp 89.7 8.58 67.025000 71.560870
## Bachelor_unemp 14.3 0.00 4.983333 5.600000
## Bachelor_med_inc 73282.0 0.00 38766.750000 41815.217391
## Cert_dip_bach_emp 88.9 19.70 68.841667 71.421739
## Cert_dip_bach_unemp 8.0 0.00 4.683333 5.234783
## Cert_dip_bach_med_inc 73987.0 0.00 35541.333333 38558.391304
## Post_bachelor_emp 84.3 21.66 67.875000 70.700000
## Post_bachelor_unemp 7.7 0.00 4.275000 4.452174
## Post_bachelor_med_in 74799.0 0.00 49983.500000 48915.565217
clusplot(df_transformed, kmResult$cluster, main="2D Cluster Solution Representation", color=T, shade=T)
sil_out <- silhouette(kmResult$cluster, dist(df_transformed, method="euclidean"))
plot(sil_out, main="Silhouette Plot from Kmeans Analysis", col=c("red", "green"))
memory.limit (9999999999) ### to optimize running R
## Warning: 'memory.limit()' is no longer supported
## [1] Inf
dmax <- dist(df_transformed,method="euclidean")
hclust_out <- hclust(dmax,method="ward.D")
plot(hclust_out,cex=0.5, hang=-1,main="Dendogram from Hclust Algorithm")
fviz_dend(hclust_out,k=3,color_labels_by_k = TRUE, cex=0.5)
## Warning: `guides(<scale> = FALSE)` is deprecated. Please use `guides(<scale> =
## "none")` instead.
clust_mem <- cutree(hclust_out, k=3)
table(clust_mem)
## clust_mem
## 1 2 3
## 25 10 5
clust_centers <- aggregate(df_transformed,list(cluster=clust_mem),mean)
clust_centers
## cluster Apprentice_emp Apprentice_unemp Apprentice_med_inc
## 1 1 0.78765363 0.1518 0.47188264
## 2 2 0.06927374 0.1500 0.01774007
## 3 3 0.80418994 0.1360 0.70168319
## Cert_appren_qual_emp Cert_appren_qual_unemp Cert_appren_qual_med_inc
## 1 0.67478992 0.10424 0.3728339
## 2 0.03501401 0.10000 0.0000000
## 3 0.78375350 0.10440 0.6668919
## Trades_emp Trades_unemp Trades_med_inc Col_cert_dip_emp Col_cert_dip_unemp
## 1 0.752400 0.2176224 0.430192792 0.8479076 0.4190210
## 2 0.073375 0.1000000 0.005772239 0.6278562 0.3692308
## 3 0.724000 0.2097902 0.569054479 0.8066752 0.4671329
## Col_cert_dip_med_inc Uni_cert_dip_emp Uni_cert_dip_unemp Uni_cert_dip_med_inc
## 1 0.6421069 0.8268664 0.08707646 0.6762042
## 2 0.3800586 0.8019975 0.08665667 0.5191914
## 3 0.6824771 0.2591760 0.20000000 0.0000000
## Bachelor_emp Bachelor_unemp Bachelor_med_inc Cert_dip_bach_emp
## 1 0.79638796 0.3910490 0.5661838 0.8024297
## 2 0.74057971 0.3412587 0.5317459 0.7709786
## 3 0.09565217 0.0000000 0.0000000 0.2215973
## Cert_dip_bach_unemp Cert_dip_bach_med_inc Post_bachelor_emp
## 1 0.65950 0.5211094 0.8383867
## 2 0.55875 0.4723208 0.7991696
## 3 0.00000 0.0000000 0.2569395
## Post_bachelor_unemp Post_bachelor_med_in
## 1 0.6025974 0.665933
## 2 0.4896104 0.641161
## 3 0.0000000 0.000000
df_centersT1 <- as.data.frame(t(clust_centers))
df_centersT1 <-df_centersT1[-1,]
df_centersT1$MaxOriginal <-t(df_baseline %>% summarise_if(is.numeric, max))
df_centersT1$X1Original <- df_centersT1$V1 * df_centersT1$MaxOriginal
df_centersT1$X2Original <- df_centersT1$V2 * df_centersT1$MaxOriginal
df_centersT1$X3Original <- df_centersT1$V3 * df_centersT1$MaxOriginal
df_centersT1
## V1 V2 V3 MaxOriginal
## Apprentice_emp 0.78765363 0.069273743 0.80418994 71.6
## Apprentice_unemp 0.15180000 0.150000000 0.13600000 40.0
## Apprentice_med_inc 0.47188264 0.017740066 0.70168319 57807.0
## Cert_appren_qual_emp 0.67478992 0.035014006 0.78375350 71.4
## Cert_appren_qual_unemp 0.10424000 0.100000000 0.10440000 50.0
## Cert_appren_qual_med_inc 0.37283390 0.000000000 0.66689194 62444.0
## Trades_emp 0.75240000 0.073375000 0.72400000 80.0
## Trades_unemp 0.21762238 0.100000000 0.20979021 28.6
## Trades_med_inc 0.43019279 0.005772239 0.56905448 60739.0
## Col_cert_dip_emp 0.84790757 0.627856226 0.80667522 77.9
## Col_cert_dip_unemp 0.41902098 0.369230769 0.46713287 14.3
## Col_cert_dip_med_inc 0.64210694 0.380058566 0.68247713 55322.0
## Uni_cert_dip_emp 0.82686642 0.801997503 0.25917603 80.1
## Uni_cert_dip_unemp 0.08707646 0.086656672 0.20000000 66.7
## Uni_cert_dip_med_inc 0.67620420 0.519191435 0.00000000 48011.0
## Bachelor_emp 0.79638796 0.740579710 0.09565217 89.7
## Bachelor_unemp 0.39104895 0.341258741 0.00000000 14.3
## Bachelor_med_inc 0.56618378 0.531745858 0.00000000 73282.0
## Cert_dip_bach_emp 0.80242970 0.770978628 0.22159730 88.9
## Cert_dip_bach_unemp 0.65950000 0.558750000 0.00000000 8.0
## Cert_dip_bach_med_inc 0.52110938 0.472320813 0.00000000 73987.0
## Post_bachelor_emp 0.83838671 0.799169632 0.25693950 84.3
## Post_bachelor_unemp 0.60259740 0.489610390 0.00000000 7.7
## Post_bachelor_med_in 0.66593297 0.641160978 0.00000000 74799.0
## X1Original X2Original X3Original
## Apprentice_emp 56.396 4.96 57.58
## Apprentice_unemp 6.072 6.00 5.44
## Apprentice_med_inc 27278.120 1025.50 40562.20
## Cert_appren_qual_emp 48.180 2.50 55.96
## Cert_appren_qual_unemp 5.212 5.00 5.22
## Cert_appren_qual_med_inc 23281.240 0.00 41643.40
## Trades_emp 60.192 5.87 57.92
## Trades_unemp 6.224 2.86 6.00
## Trades_med_inc 26129.480 350.60 34563.80
## Col_cert_dip_emp 66.052 48.91 62.84
## Col_cert_dip_unemp 5.992 5.28 6.68
## Col_cert_dip_med_inc 35522.640 21025.60 37756.00
## Uni_cert_dip_emp 66.232 64.24 20.76
## Uni_cert_dip_unemp 5.808 5.78 13.34
## Uni_cert_dip_med_inc 32465.240 24926.90 0.00
## Bachelor_emp 71.436 66.43 8.58
## Bachelor_unemp 5.592 4.88 0.00
## Bachelor_med_inc 41491.080 38967.40 0.00
## Cert_dip_bach_emp 71.336 68.54 19.70
## Cert_dip_bach_unemp 5.276 4.47 0.00
## Cert_dip_bach_med_inc 38555.320 34945.60 0.00
## Post_bachelor_emp 70.676 67.37 21.66
## Post_bachelor_unemp 4.640 3.77 0.00
## Post_bachelor_med_in 49811.120 47958.20 0.00
df_centersT1Original <- round(df_centersT[,c(4:7)],2)
df_centersT1Original
## MaxOriginal X1Original X2Original X3Original
## Apprentice_emp 71.6 57.58 9.72 58.38
## Apprentice_unemp 40.0 5.44 5.00 6.60
## Apprentice_med_inc 57807.0 40562.20 854.58 29650.13
## Cert_appren_qual_emp 71.4 55.96 4.47 51.13
## Cert_appren_qual_unemp 50.0 5.22 4.17 5.67
## Cert_appren_qual_med_inc 62444.0 41643.40 0.00 25305.70
## Trades_emp 80.0 57.92 13.92 60.72
## Trades_unemp 28.6 6.00 2.38 6.77
## Trades_med_inc 60739.0 34563.80 292.17 28401.61
## Col_cert_dip_emp 77.9 62.84 50.63 66.64
## Col_cert_dip_unemp 14.3 6.68 5.55 5.91
## Col_cert_dip_med_inc 55322.0 37756.00 23010.92 35747.43
## Uni_cert_dip_emp 80.1 20.76 63.54 66.77
## Uni_cert_dip_unemp 66.7 13.34 5.82 5.79
## Uni_cert_dip_med_inc 48011.0 0.00 25950.17 32586.87
## Bachelor_emp 89.7 8.58 67.02 71.56
## Bachelor_unemp 14.3 0.00 4.98 5.60
## Bachelor_med_inc 73282.0 0.00 38766.75 41815.22
## Cert_dip_bach_emp 88.9 19.70 68.84 71.42
## Cert_dip_bach_unemp 8.0 0.00 4.68 5.23
## Cert_dip_bach_med_inc 73987.0 0.00 35541.33 38558.39
## Post_bachelor_emp 84.3 21.66 67.88 70.70
## Post_bachelor_unemp 7.7 0.00 4.28 4.45
## Post_bachelor_med_in 74799.0 0.00 49983.50 48915.57
df_full$clustMem <- clust_mem
head(df_full)
## Major.field.of.study
## 1 01. Agriculture, agriculture operations and related sciences
## 2 03. Natural resources and conservation
## 3 04. Architecture and related services
## 4 05. Area, ethnic, cultural, gender, and group studies
## 5 09. Communication, journalism and related programs
## 6 10. Communications technologies/technicians and support services
## Apprentice_emp Apprentice_unemp Apprentice_med_inc Cert_appren_qual_emp
## 1 62.1 6.7 27865 60.3
## 2 56.6 10.3 38609 54.7
## 3 68.2 4.3 45984 46.2
## 4 0.0 0.0 0 0.0
## 5 28.6 0.0 0 0.0
## 6 48.9 6.0 31831 41.8
## Cert_appren_qual_unemp Cert_appren_qual_med_inc Trades_emp Trades_unemp
## 1 7.3 31140 63.3 6.3
## 2 10.4 40011 57.5 10.6
## 3 0.0 61472 80.0 5.9
## 4 0.0 0 0.0 0.0
## 5 0.0 0 75.0 0.0
## 6 4.0 28844 58.0 7.8
## Trades_med_inc Col_cert_dip_emp Col_cert_dip_unemp Col_cert_dip_med_inc
## 1 26040 69.8 5.2 32337
## 2 37650 69.1 8.2 44690
## 3 34566 70.4 6.4 43779
## 4 0 56.2 9.8 20909
## 5 0 74.2 5.5 39582
## 6 35535 76.3 7.3 40337
## Uni_cert_dip_emp Uni_cert_dip_unemp Uni_cert_dip_med_inc Bachelor_emp
## 1 73.8 4.5 28301 67.0
## 2 72.9 5.4 35466 76.5
## 3 67.1 3.4 34960 68.4
## 4 57.8 16.1 24822 73.7
## 5 67.4 8.0 33561 79.5
## 6 77.0 7.5 38461 80.8
## Bachelor_unemp Bachelor_med_inc Cert_dip_bach_emp Cert_dip_bach_unemp
## 1 3.9 36145 68.1 3.9
## 2 5.8 54243 76.9 5.4
## 3 6.0 41491 73.3 4.7
## 4 5.2 32952 72.5 4.8
## 5 4.9 37508 78.7 5.5
## 6 9.1 46993 81.8 8.0
## Cert_dip_bach_med_inc Post_bachelor_emp Post_bachelor_unemp
## 1 35055 70.6 4.2
## 2 58736 77.7 4.5
## 3 45375 77.1 3.8
## 4 67182 69.5 4.4
## 5 38182 75.9 7.7
## 6 45896 83.9 4.1
## Post_bachelor_med_in clustMem
## 1 38302 1
## 2 65019 1
## 3 59137 1
## 4 30863 2
## 5 59137 1
## 6 39504 1
write.csv(df_full, "BC_Educ_Labour_Market_Performance_classified.csv")