# This code is used to group objects using the method k-means # Install the following packages: install.packages("ggsignif", type="win.binary") install.packages("ggpubr", type="win.binary") install.packages("factoextra") install.packages("dplyr") install.packages("writexl") # Import data file (File -> Import dataset -> From Excel...) # The contents of the first column should be row name df<-data.frame(Data_UE_2_,row.names = TRUE) mydata <- scale(df) # variables normalisation # Load factoextra package library(factoextra) # Use the fviz_nbclust functions to determine the optimal number of clusters fviz_nbclust(mydata, kmeans, method = "silhouette") #Group similar objects with k-means methods for the indicated number of clusters km.res <- kmeans(mydata, 4, nstart = 25) print(km.res) # Show cluster results aggregate(df, by=list(cluster=km.res$cluster), mean) #average values of variables in clusters library(RColorBrewer) display.brewer.all() # Clusters visualisation fviz_cluster(km.res, data = mydata, palette = c("Accent"), ellipse.type = "euclid", star.plot = TRUE, repel = TRUE, #Non-overlapping labels ggtheme=theme_minimal()) #adding the cluster number to the dataset clusters <- cbind(df, cluster = km.res$cluster) # saving data to excel library("dplyr") library("writexl") #row name as column data<-tibble::rownames_to_column(clusters, "Country") write_xlsx(data, path = "clusters.xlsx") #The end ## Agglomeration clustering - Ward's method (the optimal number of groups should be determined beforehand) hc.res <- eclust(mydata, "hclust", k = 3, hc_metric = "euclidean", hc_method = "ward.D2") # Creating a dendrogram fviz_dend(hc.res, show_labels = TRUE, palette = "jco", as.ggplot = TRUE) # adding the cluster number to the output dataset clusters22 <- cbind(df, cluster = hc.res$cluster)