The Word Adjacency network dataset is provided as a gml file, containing 77 nodes and 254 edges.
# Start the timer
t1 <- system.time({
  dataset_path <- system.file("extdata", "word_adjacencies.gml", package = "arlclustering")
  if (dataset_path == "") {
    stop("word_adjacencies.gml file not found")
  }
  
  g <- arlc_get_network_dataset(dataset_path, "Word Adjacency")
  g$graphLabel
  g$totalEdges
  g$totalNodes
  g$averageDegree
})
# Display the total processing time
message("Graph loading Processing Time: ", t1["elapsed"], " seconds\n")
#> Graph loading Processing Time: 0.0139999999999958 secondsNext, we generate transactions from the graph, with a total rows of 102
We obtain the apriori thresholds for the generated transactions. The following are the thresholds for the apriori execution: - The Minimum Support : 0.03 - The Minimum Confidence : 0.5 - The Lift : 20.4 - The Gross Rules length : 649 - The selection Ratio : 6
# Start the timer
t3 <- system.time({
  params <- arlc_get_apriori_thresholds(transactions,
                                      supportRange = seq(0.03, 0.04, by = 0.01),
                                      Conf = 0.5)
  params$minSupp
  params$minConf
  params$bestLift
  params$lenRules
  params$ratio
})
# Display the total processing time
message("Graph loading Processing Time: ", t3["elapsed"], " seconds\n")
#> Graph loading Processing Time: 0.0109999999999957 secondsWe use the obtained parameters to generate gross rules, where we obtain 649 rules.
# Start the timer
t4 <- system.time({
  minLenRules <- 1
  maxLenRules <- params$lenRules
  if (!is.finite(maxLenRules) || maxLenRules > 5*length(transactions)) {
    maxLenRules <- 5*length(transactions)
  }
  grossRules <- arlc_gen_gross_rules(transactions,
                                     minSupp = params$minSupp,
                                     minConf = params$minConf,
                                     minLenRules = minLenRules+1,
                                     maxLenRules = maxLenRules)
  #grossRules$TotalRulesWithLengthFilter
})
#> Apriori
#> 
#> Parameter specification:
#>  confidence minval smax arem  aval originalSupport maxtime support minlen
#>         0.5    0.1    1 none FALSE            TRUE       5    0.03      2
#>  maxlen target  ext
#>     510  rules TRUE
#> 
#> Algorithmic control:
#>  filter tree heap memopt load sort verbose
#>     0.1 TRUE TRUE  FALSE TRUE    2    TRUE
#> 
#> Absolute minimum support count: 3 
#> 
#> set item appearances ...[0 item(s)] done [0.00s].
#> set transactions ...[112 item(s), 102 transaction(s)] done [0.00s].
#> sorting and recoding items ... [80 item(s)] done [0.00s].
#> creating transaction tree ... done [0.00s].
#> checking subsets of size 1 2 3 4 5 done [0.00s].
#> writing ... [649 rule(s)] done [0.00s].
#> creating S4 object  ... done [0.00s].We filter out redundant rules from the generated gross rules. Next, we filter out non-significant rules from the non-redundant rules, and we obtain the 528 rule items.
t5 <- system.time({
  NonRedRules <- arlc_get_NonR_rules(grossRules$GrossRules)
  NonRSigRules <- arlc_get_significant_rules(transactions,
                                             NonRedRules$FiltredRules)
  #NonRSigRules$TotFiltredRules
})
# Display the total number of clusters and the total processing time
message("\nClearing rules Processing Time: ", t5["elapsed"], " seconds\n")
#> 
#> Clearing rules Processing Time: 0.204000000000001 secondsWe clean the final set of rules to prepare for clustering. Then, we generate clusters based on the cleaned rules. The total identified clusters is 20 clusters.
t6 <- system.time({
  cleanedRules <- arlc_clean_final_rules(NonRSigRules$FiltredRules)
  clusters <- arlc_generate_clusters(cleanedRules)
  #clusters$TotClusters
})
# Display the total number of clusters and the total processing time
message("Cleaning final rules Processing Time: ", t6["elapsed"], " seconds\n")
#> Cleaning final rules Processing Time: 0.0380000000000038 secondsFinally, we visualize the identified clusters.
arlc_clusters_plot(g$graph,
                   g$graphLabel,
                   clusters$Clusters)
#> 
#> Total Identified Clusters: 20
#>  =========================
#>   Community 01:2 3 4 14 18 22 23 25 26 27 32 35 43 45 51 55 67
#>   Community 02:3 10 13 18 19 20 26 27 29 32 39 42 44 45 49 51 52 53 54 55 60 69 73 80 88 103 104 105
#>   Community 03:6 53
#>   Community 04:7 18
#>   Community 05:8 18 33
#>   Community 06:10 15 18 20 42 44 52 105
#>   Community 07:13 18 22 25 27 45 49 51
#>   Community 08:15 18 52 73 105
#>   Community 09:16 18 27 32
#>   Community 10:18 19 20 22 25 26 27 28 29 31 32 33 35 37 38 39 40 42 44 45 46 49 51 52 54 55 60 67 69 71 73 76 77 80 81 82 88 103 104 105 106 107
#>   Community 11:22 24 25 26 27 28 32 35 36 45 51 52 55 67
#>   Community 12:25 26 27 28 32 37 67 71 74 84
#>   Community 13:26 27 32 36 39 43 51
#>   Community 14:27 28 32 43 51 67 82 84
#>   Community 15:28 32 37 66 74 84
#>   Community 16:32 35 36 43 67 84
#>   Community 17:44 73 80 81 88 89 103 104 105
#>   Community 18:52 53 71 73 76 103
#>   Community 19:66 105
#>   Community 20:99 105
#>  =========================