Browse code

Updates

neobernad authored on 19/06/2024 15:36:13
Showing 4 changed files

1 1
new file mode 100755
... ...
@@ -0,0 +1,56 @@
1
+library(evaluomeR)
2
+
3
+
4
+evaluomeRSupportedCBI()
5
+
6
+dataFrame <- stability(data=ontMetrics, cbi="kmeans", k=3, all_metrics=FALSE, bs=100, L1=10)
7
+assay(dataFrame)
8
+
9
+dataFrame <- stabilityRange(data=ontMetrics, cbi="rskc", k.range=c(3,4), all_metrics=TRUE, bs=100, L1=10)
10
+assay(dataFrame)
11
+
12
+dataFrame <- stabilitySet(data=ontMetrics, k.set=c(3,4), bs=100, cbi="rskc", all_metrics=TRUE, L1=10)
13
+assay(dataFrame)
14
+
15
+dataFrame <- quality(data=ontMetrics, cbi="rskc", k=3, all_metrics=TRUE, L1=10)
16
+assay(dataFrame)
17
+
18
+dataFrame <- qualityRange(data=ontMetrics, cbi="rskc", k.range=c(3,4), all_metrics=TRUE, L1=10)
19
+assay(dataFrame$k_3)
20
+
21
+dataFrame <- qualitySet(data=ontMetrics, cbi="rskc", k.set=c(3,5), all_metrics=TRUE, L1=10)
22
+assay(dataFrame$k_3)
23
+
24
+
25
+# RSKC will not work with a dataframe of 1 column
26
+
27
+sim <-
28
+  function(mu,f){
29
+    D<-matrix(rnorm(60*f),60,f)
30
+    D[1:20,1:50]<-D[1:20,1:50]+mu
31
+    D[21:40,1:50]<-D[21:40,1:50]-mu
32
+    return(D)
33
+  }
34
+sim
35
+d0<-sim(1,500)# generate a dataset
36
+true<-rep(1:3,each=20) # vector of true cluster labels
37
+d<-d0
38
+ncl<-3
39
+for ( i in 1 : 10){
40
+  d[sample(1:60,1),sample(1:500,1)]<-rnorm(1,mean=0,sd=15)
41
+}
42
+
43
+# The generated dataset looks like this...
44
+pairs(
45
+  d[,c(1,2,3,200)],col=true,
46
+  labels=c("clustering feature 1",
47
+           "clustering feature 2","clustering feature 3",
48
+           "noise feature1"),
49
+  main="The sampling distribution of 60 cases colored by true cluster labels",
50
+  lower.panel=NULL)
51
+
52
+d
53
+
54
+# RSKC works when more than 2 columns are provided
55
+
56
+r3<-RSKC(d[,1:5],ncl,alpha=10/60,L1=6,nstart=200)
0 57
new file mode 100755
... ...
@@ -0,0 +1,36 @@
1
+library(evaluomeR)
2
+
3
+individuals_per_cluster = function(qualityResult) {
4
+  qual_df = as.data.frame(assay(qualityResult))
5
+
6
+
7
+  cluster_pos_str = as.character(unlist(qual_df["Cluster_position"]))
8
+  cluster_labels_str = as.character(unlist(qual_df["Cluster_labels"]))
9
+
10
+  cluster_pos = as.list(strsplit(cluster_pos_str, ",")[[1]])
11
+  cluster_labels = as.list(strsplit(cluster_labels_str, ",")[[1]])
12
+
13
+  individuals_in_cluster = as.data.frame(cbind(cluster_labels, cluster_pos))
14
+  colnames(individuals_in_cluster) = c("Individual", "InCluster")
15
+
16
+  return(individuals_in_cluster)
17
+}
18
+
19
+data("ontMetrics")
20
+metricsRelevancy = getMetricsRelevancy(ontMetrics, k=3, alpha=0.1, seed=100)
21
+# RSKC output object
22
+metricsRelevancy$rskc
23
+# Trimmed cases from input (row indexes)
24
+metricsRelevancy$trimmed_cases
25
+# Metrics relevancy table
26
+metricsRelevancy$relevancy
27
+
28
+
29
+test = qualityRange(data=ontMetrics, k.range=c(3,3),
30
+                             seed=13007,
31
+                             all_metrics=TRUE,
32
+                             cbi="rskc", L1=2, alpha=0)
33
+
34
+# Shows how clusters are partitioned according to the individuals
35
+individuals_per_cluster(test$k_3)
36
+
0 37
new file mode 100755
... ...
@@ -0,0 +1,84 @@
1
+library(evaluomeR)
2
+library(RSKC)
3
+library(sparcl)
4
+seed = 100
5
+dataFrame <- quality(data=ontMetrics, cbi="kmeans", k=3)
6
+assay(dataFrame)
7
+# Metric     Cluster_1_SilScore  Cluster_2_SilScore  Cluster_3_SilScore  Avg_Silhouette_Width Cluster_1_Size Cluster_2_Size Cluster_3_Size
8
+# [1,] "ANOnto"   "0.754894925204277" "0.570241066303214" "0.775876285585267" "0.736742918153759"  "12"           "14"           "54"
9
+# [2,] "AROnto"   "0.837074497995987" "0.509946991883709" "0.959264389073384" "0.786971025529677"  "65"           "13"           "2"
10
+# [3,] "CBOOnto"  "0.766630500367533" "0.574451527320666" "0.470708665744913" "0.72319889705568"   "63"           "15"           "2"
11
+# [4,] "CBOOnto2" "0.766630500367533" "0.574451527320666" "0.470708665744913" "0.72319889705568"   "63"           "15"           "2"
12
+# [5,] "CROnto"   "0.885055456924709" "0.636126752920544" "0"                 "0.855322610912838"  "73"           "6"            "1"
13
+# [6,] "DITOnto"  "0.615581638093901" "0.441137593941046" "0.746848044839846" "0.553468450386794"  "41"           "33"           "6"
14
+# [7,] "INROnto"  "0.760945813444805" "0.506239463726949" "0"                 "0.690941232718754"  "60"           "19"           "1"
15
+# [8,] "LCOMOnto" "0.657281417643165" "0.61764525421598"  "0.722333227599342" "0.652913140794165"  "21"           "40"           "19"
16
+# [9,] "NACOnto"  "0.759522276872854" "0.445845264823784" "0.254826579985626" "0.661322430756974"  "58"           "17"           "5"
17
+# [10,] "NOCOnto"  "0.898396530127955" "0.742673517080307" "0.363472944618239" "0.879183827500925"  "75"           "3"            "2"
18
+# [11,] "NOMOnto"  "0.708789049998754" "0.605603643727872" "0"                 "0.668973564992505"  "55"           "24"           "1"
19
+# [12,] "POnto"    "0.755700546488043" "0.737169134813343" "0.651090644844594" "0.67661537075347"   "8"            "14"           "58"
20
+# [13,] "PROnto"   "0.770018889790615" "0.56606585120985"  "0.636058646833202" "0.668644905329162"  "32"           "24"           "24"
21
+# [14,] "RFCOnto"  "0.672903800663584" "0.571360647044581" "0"                 "0.635298846489826"  "56"           "23"           "1"
22
+# [15,] "RROnto"   "0.636058646833202" "0.56606585120985"  "0.770018889790615" "0.668644905329162"  "24"           "24"           "32"
23
+# [16,] "TMOnto"   "0.782948726523096" "0.50860642260504"  "0.634534477835837" "0.710090639489989"  "56"           "18"           "6"
24
+# [17,] "TMOnto2"  "1"                 "0.73737171744016"  "0.462679160671249" "0.724657891719511"  "16"           "45"           "19"
25
+# [18,] "WMCOnto"  "0.868556472442156" "0.369670756071292" "0.763547528087877" "0.828514820105485"  "72"           "6"            "2"
26
+# [19,] "WMCOnto2" "0.891854974826074" "0.598522433823083" "0.613618761016468" "0.870232442430684"  "74"           "4"            "2"
27
+
28
+dataFrame <- quality(data=ontMetrics, cbi="kmeans", k=4)
29
+assay(dataFrame)
30
+# Metric     Cluster_1_SilScore  Cluster_2_SilScore  Cluster_3_SilScore  Cluster_4_SilScore   Avg_Silhouette_Width Cluster_1_Size Cluster_2_Size Cluster_3_Size Cluster_4_Size
31
+# [1,] "ANOnto"   "0.717030499002753" "0.569222510427433" "0.552363239306396" "0.584449669565973"  "0.600638738086962"  "12"           "11"           "4"            "53"
32
+# [2,] "AROnto"   "0.891757427020894" "0.614385150712436" "0.498602630835942" "0.953766280221553"  "0.813833608784603"  "58"           "13"           "7"            "2"
33
+# [3,] "CBOOnto"  "0.682847685112873" "0.475694878561971" "0.418096612044278" "0.462053414220223"  "0.5843870090796"    "46"           "18"           "14"           "2"
34
+# [4,] "CBOOnto2" "0.682847685112873" "0.475694878561971" "0.418096612044278" "0.462053414220223"  "0.5843870090796"    "46"           "18"           "14"           "2"
35
+# [5,] "CROnto"   "0.931552645421743" "0.615016966742524" "0.460688748724164" "0"                  "0.84502648526675"   "63"           "10"           "6"            "1"
36
+# [6,] "DITOnto"  "0.621392145232729" "0.589638237470761" "0.512852920317478" "0.717462336796908"  "0.582143307479606"  "15"           "35"           "24"           "6"
37
+# [7,] "INROnto"  "0.679354776901229" "0.514845315378322" "0.552323396139528" "0"                  "0.609561353444975"  "46"           "19"           "14"           "1"
38
+# [8,] "LCOMOnto" "0.563584714383498" "0.565734453969461" "0.526937877760086" "0.662861247621334"  "0.57713748864992"   "19"           "19"           "23"           "19"
39
+# [9,] "NACOnto"  "0.763008703189753" "0.507554700154524" "0.610806402578204" "0.0693863149967116" "0.627188990478616"  "42"           "23"           "10"           "5"
40
+# [10,] "NOCOnto"  "0.712806750183687" "0.368068489789737" "0.711626648649838" "0.363472944618239"  "0.600607673118847"  "51"           "24"           "3"            "2"
41
+# [11,] "NOMOnto"  "0.796568957921031" "0.487448631370323" "0.505810544669573" "0"                  "0.620956620752701"  "35"           "25"           "19"           "1"
42
+# [12,] "POnto"    "0.755700546488043" "0.717551583859045" "0.702605079149018" "0.531828315626997"  "0.676374911502771"  "8"            "14"           "42"           "16"
43
+# [13,] "PROnto"   "0.808419016380534" "0.406920889282586" "0.546429726628472" "0.636912857924547"  "0.623564355956028"  "22"           "12"           "23"           "23"
44
+# [14,] "RFCOnto"  "0.708660103503223" "0.527891770926241" "0.575667190561062" "0"                  "0.613856368788046"  "37"           "27"           "15"           "1"
45
+# [15,] "RROnto"   "0.636912857924547" "0.546429726628472" "0.406920889282586" "0.808419016380534"  "0.623564355956028"  "23"           "23"           "12"           "22"
46
+# [16,] "TMOnto"   "0.772548576303018" "0.527581279093128" "0.56435245544769"  "0.756878515673905"  "0.694408411158545"  "48"           "15"           "12"           "5"
47
+# [17,] "TMOnto2"  "1"                 "0.709314170957853" "0.593309463294573" "0.516092763511662"  "0.725408613137789"  "16"           "39"           "19"           "6"
48
+# [18,] "WMCOnto"  "0.811550829534933" "0.517887706724764" "0.232935788267106" "0.751527957476758"  "0.737070037248562"  "62"           "12"           "4"            "2"
49
+# [19,] "WMCOnto2" "0.806794961402285" "0.458575230569131" "0.48724511207104"  "0.613618761016468"  "0.72940235766569"   "61"           "13"           "4"            "2"
50
+
51
+dataFrame <- qualityRange(data=ontMetrics, cbi="kmeans", k.range = c(3,4))
52
+assay(dataFrame$k_4)
53
+# Metric     Cluster_1_SilScore  Cluster_2_SilScore  Cluster_3_SilScore   Cluster_4_SilScore  Avg_Silhouette_Width Cluster_1_Size Cluster_2_Size Cluster_3_Size Cluster_4_Size
54
+# 1  "ANOnto"   "0.569222510427433" "0.552363239306396" "0.584449669565973"  "0.717030499002753" "0.600638738086962"  "11"           "4"            "53"           "12"
55
+# 2  "AROnto"   "0.891757427020894" "0.498602630835942" "0.953766280221553"  "0.614385150712436" "0.813833608784603"  "58"           "7"            "2"            "13"
56
+# 3  "CBOOnto"  "0.682847685112873" "0.475694878561971" "0.418096612044278"  "0.462053414220223" "0.5843870090796"    "46"           "18"           "14"           "2"
57
+# 4  "CBOOnto2" "0.682847685112873" "0.475694878561971" "0.418096612044278"  "0.462053414220223" "0.5843870090796"    "46"           "18"           "14"           "2"
58
+# 5  "CROnto"   "0.615016966742524" "0.931552645421743" "0.460688748724164"  "0"                 "0.84502648526675"   "10"           "63"           "6"            "1"
59
+# 6  "DITOnto"  "0.621392145232729" "0.589638237470761" "0.512852920317478"  "0.717462336796908" "0.582143307479606"  "15"           "35"           "24"           "6"
60
+# 7  "INROnto"  "0.679354776901229" "0.514845315378322" "0.552323396139528"  "0"                 "0.609561353444975"  "46"           "19"           "14"           "1"
61
+# 8  "LCOMOnto" "0.563584714383498" "0.565734453969461" "0.526937877760086"  "0.662861247621334" "0.57713748864992"   "19"           "19"           "23"           "19"
62
+# 9  "NACOnto"  "0.507554700154524" "0.763008703189753" "0.0693863149967116" "0.610806402578204" "0.627188990478616"  "23"           "42"           "5"            "10"
63
+# 10 "NOCOnto"  "0.363472944618239" "0.712806750183687" "0.368068489789737"  "0.711626648649838" "0.600607673118847"  "2"            "51"           "24"           "3"
64
+# 11 "NOMOnto"  "0.796568957921031" "0"                 "0.487448631370323"  "0.505810544669573" "0.620956620752701"  "35"           "1"            "25"           "19"
65
+# 12 "POnto"    "0.717551583859045" "0.702605079149018" "0.531828315626997"  "0.755700546488043" "0.676374911502771"  "14"           "42"           "16"           "8"
66
+# 13 "PROnto"   "0.808419016380534" "0.636912857924547" "0.406920889282586"  "0.546429726628472" "0.623564355956028"  "22"           "23"           "12"           "23"
67
+# 14 "RFCOnto"  "0.708660103503223" "0"                 "0.527891770926241"  "0.575667190561062" "0.613856368788046"  "37"           "1"            "27"           "15"
68
+# 15 "RROnto"   "0.808419016380534" "0.636912857924547" "0.406920889282586"  "0.546429726628472" "0.623564355956028"  "22"           "23"           "12"           "23"
69
+# 16 "TMOnto"   "0.527581279093128" "0.772548576303018" "0.756878515673905"  "0.56435245544769"  "0.694408411158545"  "15"           "48"           "5"            "12"
70
+# 17 "TMOnto2"  "0.593309463294573" "1"                 "0.709314170957853"  "0.516092763511662" "0.725408613137789"  "19"           "16"           "39"           "6"
71
+# 18 "WMCOnto"  "0.811550829534933" "0.517887706724764" "0.751527957476758"  "0.232935788267106" "0.737070037248562"  "62"           "12"           "2"            "4"
72
+# 19 "WMCOnto2" "0.48724511207104"  "0.806794961402285" "0.613618761016468"  "0.458575230569131" "0.72940235766569"   "4"            "61"           "2"            "13"
73
+
74
+dataFrame <- qualityRange(data=ontMetrics, cbi="kmeans", k.range = c(3,4), all_metrics=TRUE, getImages = TRUE)
75
+assay(dataFrame$k_3)
76
+# Metric        Cluster_1_SilScore  Cluster_2_SilScore  Cluster_3_SilScore  Cluster_4_SilScore  Avg_Silhouette_Width Cluster_1_Size Cluster_2_Size Cluster_3_Size Cluster_4_Size
77
+# 1 "all_metrics" "0.560364615463509" "0.768006541644696" "0.761635263968552" "0.343459043619883" "0.730815149196402"  "2"            "70"           "2"            "6"
78
+
79
+dataFrame <- quality(data=ontMetrics, cbi="kmeans", k=4, all_metrics=TRUE)
80
+assay(dataFrame)
81
+# Metric        Cluster_1_SilScore  Cluster_2_SilScore  Cluster_3_SilScore  Cluster_4_SilScore  Avg_Silhouette_Width
82
+# [1,] "all_metrics" "0.560364615463509" "0.768006541644696" "0.761635263968552" "0.343459043619883" "0.730815149196402"
83
+# Cluster_1_Size Cluster_2_Size Cluster_3_Size Cluster_4_Size
84
+# [1,] "2"            "70"           "2"            "6"
0 85
new file mode 100755
... ...
@@ -0,0 +1,88 @@
1
+library(evaluomeR)
2
+library(RSKC)
3
+library(sparcl)
4
+
5
+dataFrame <- stability(data=ontMetrics, cbi="kmeans", k=3, bs=100)
6
+assay(dataFrame)
7
+# Metric     Mean_stability_k_3
8
+# [1,] "ANOnto"   "0.711599421597794"
9
+# [2,] "AROnto"   "0.834242802235359"
10
+# [3,] "CBOOnto"  "0.836200447888132"
11
+# [4,] "CBOOnto2" "0.836200447888132"
12
+# [5,] "CROnto"   "0.80871022609772"
13
+# [6,] "DITOnto"  "0.802620378293628"
14
+# [7,] "INROnto"  "0.813132039213596"
15
+# [8,] "LCOMOnto" "0.995402775270891"
16
+# [9,] "NACOnto"  "0.705135779579475"
17
+# [10,] "NOCOnto"  "0.902528819875511"
18
+# [11,] "NOMOnto"  "0.793513639960901"
19
+# [12,] "POnto"    "0.660145923222329"
20
+# [13,] "PROnto"   "0.960518110441289"
21
+# [14,] "RFCOnto"  "0.765127486244089"
22
+# [15,] "RROnto"   "0.960518110441289"
23
+# [16,] "TMOnto"   "0.862955680341511"
24
+# [17,] "TMOnto2"  "0.953719590152899"
25
+# [18,] "WMCOnto"  "0.85715656831332"
26
+# [19,] "WMCOnto2" "0.904134166028688"
27
+
28
+dataFrame <- stability(data=ontMetrics, cbi="kmeans", k=5, bs=100)
29
+assay(dataFrame)
30
+# Metric     Mean_stability_k_5
31
+# [1,] "ANOnto"   "0.53661574785721"
32
+# [2,] "AROnto"   "0.808877375863211"
33
+# [3,] "CBOOnto"  "0.773161766854306"
34
+# [4,] "CBOOnto2" "0.773161766854306"
35
+# [5,] "CROnto"   "0.747939612559589"
36
+# [6,] "DITOnto"  "0.738901091226716"
37
+# [7,] "INROnto"  "0.804579603939195"
38
+# [8,] "LCOMOnto" "0.703629344931179"
39
+# [9,] "NACOnto"  "0.663958844840551"
40
+# [10,] "NOCOnto"  "0.899994756895055"
41
+# [11,] "NOMOnto"  "0.758789978458299"
42
+# [12,] "POnto"    "0.646480707690646"
43
+# [13,] "PROnto"   "0.782307410022412"
44
+# [14,] "RFCOnto"  "0.726761185593769"
45
+# [15,] "RROnto"   "0.782307410022412"
46
+# [16,] "TMOnto"   "0.88221333660635"
47
+# [17,] "TMOnto2"  "0.830282245373099"
48
+# [18,] "WMCOnto"  "0.747236615208537"
49
+# [19,] "WMCOnto2" "0.752468990321845"
50
+
51
+dataFrame <- stabilityRange(data=ontMetrics, cbi="kmeans", k.range = c(3,5), bs=100)
52
+assay(dataFrame)
53
+# Metric     Mean_stability_k_3  Mean_stability_k_4  Mean_stability_k_5
54
+# [1,] "ANOnto"   "0.711599421597794" "0.661877018484356" "0.53661574785721"
55
+# [2,] "AROnto"   "0.834242802235359" "0.905679508527523" "0.808877375863211"
56
+# [3,] "CBOOnto"  "0.836200447888132" "0.809715382620901" "0.773161766854306"
57
+# [4,] "CBOOnto2" "0.836200447888132" "0.809715382620901" "0.773161766854306"
58
+# [5,] "CROnto"   "0.80871022609772"  "0.848428661689236" "0.747939612559589"
59
+# [6,] "DITOnto"  "0.802620378293628" "0.801976319968573" "0.738901091226716"
60
+# [7,] "INROnto"  "0.813132039213596" "0.833324929464065" "0.804579603939195"
61
+# [8,] "LCOMOnto" "0.995402775270891" "0.758953924881616" "0.703629344931179"
62
+# [9,] "NACOnto"  "0.705135779579475" "0.679182045909186" "0.663958844840551"
63
+# [10,] "NOCOnto"  "0.902528819875511" "0.844518653163586" "0.899994756895055"
64
+# [11,] "NOMOnto"  "0.793513639960901" "0.779713596698101" "0.758789978458299"
65
+# [12,] "POnto"    "0.660145923222329" "0.795675361207579" "0.646480707690646"
66
+# [13,] "PROnto"   "0.960518110441289" "0.790969731730725" "0.782307410022412"
67
+# [14,] "RFCOnto"  "0.765127486244089" "0.790802265552443" "0.726761185593769"
68
+# [15,] "RROnto"   "0.960518110441289" "0.790969731730725" "0.782307410022412"
69
+# [16,] "TMOnto"   "0.862955680341511" "0.904973710968594" "0.88221333660635"
70
+# [17,] "TMOnto2"  "0.953719590152899" "0.868195348078741" "0.830282245373099"
71
+# [18,] "WMCOnto"  "0.85715656831332"  "0.854182751568963" "0.747236615208537"
72
+# [19,] "WMCOnto2" "0.904134166028688" "0.883417390847072" "0.752468990321845"
73
+
74
+
75
+dataFrame <- stability(data=ontMetrics, cbi="kmeans", k=3, all_metrics = TRUE, bs=100)
76
+assay(dataFrame)
77
+# Metric        Mean_stability_k_3
78
+# [1,] "all_metrics" "0.846238406081907"
79
+
80
+dataFrame <- stability(data=ontMetrics, cbi="kmeans", k=5, all_metrics = TRUE, bs=100)
81
+assay(dataFrame)
82
+# Metric        Mean_stability_k_3
83
+# [1,] "all_metrics" "0.803322946463351"
84
+
85
+dataFrame <- stabilityRange(data=ontMetrics, cbi="kmeans", k.range = c(3,5), all_metrics = TRUE, bs=100)
86
+assay(dataFrame)
87
+# Metric        Mean_stability_k_3  Mean_stability_k_4  Mean_stability_k_5
88
+# [1,] "all_metrics" "0.846238406081907" "0.783588073668732" "0.803322946463351"