Commit 4abf8b8c authored by Matthias Carnein's avatar Matthias Carnein

Documentation improvements

parent 68a03b31
......@@ -2,43 +2,55 @@
#' Evolutionary Algorithm
#'
#' Reclustering using an evolutionary algorithm.
#' This approach is used by \code{evoStream} but can be used for all micro-clusters.
#' This approach was designed for \code{evoStream} but can also be used for other micro-clustering algorithms.
#' The evolutionary algorithm uses existing clustering solutions and creates small variations of them by combining and randomly modfiying them.
#' The modified solutions can yield better partitions and thus can improve the clustering over time.
#' The evolutionary algorithm is incremental, which allows to improve existing macro-clusters instead of recomputing them every time.
#'
#' @param r radius threshold for micro-cluster assignment
#' @param lambda decay rate
#' @param tgap time-interval between outlier detection and clean-up
#' @param k number of macro-clusters
#' @param crossoverRate cross-over rate for the evolutionary algorithm
#' @param generations number of EA generations performed during reclustering
#' @param mutationRate mutation rate for the evolutionary algorithm
#' @param populationsize number of solutions that the evolutionary algorithm maintains
#' @param initializeAfter number of micro-cluster required for the initialization of the evolutionary algorithm.
#' @param incrementalGenerations number of EA generations performed after each observation
#' @param reclusterGenerations number of EA generations performed during reclustering
#'
#' @author Matthias Carnein \email{Matthias.Carnein@@uni-muenster.de}
#'
#' @examples
#' stream <- DSD_Gaussians(k = 3, d = 2)
#' stream <- DSD_Memory(DSD_Gaussians(k = 3, d = 2), 1000)
#'
#' ## online algorithm
#' dbstream <- DSC_DBSTREAM(r=0.1)
#' EA <- DSC_EA(k=3)
#'
#' ## offline algorithm
#' EA <- DSC_EA(k=3, generations=1000)
#'
#' ## create pipeline and insert observations
#' two <- DSC_TwoStage(dbstream, EA)
#' update(two, stream, n=1200)
#' update(two, stream, n=1000)
#'
#' ## plot resut
#' reset_stream(stream)
#' plot(two, stream, type="both")
#'
#' update(dbstream, stream, n = 1200)
#' ## if we have time, evaluate additional generations. This can be called at any time, also between observations.
#' two$macro_dsc$RObj$recluster(2000)
#'
#' ## plot improved result
#' reset_stream(stream)
#' plot(two, stream, type="both")
#'
#'
#' ## alternatively: do not create twostage but apply directly
#' update(dbstream, stream, n = 1000)
#' recluster(EA, dbstream)
#' reset_stream(stream)
#' plot(EA, stream)
#'
#'
#' @export
DSC_EA <- function(k, crossoverRate=.8, mutationRate=.001, populationSize=100, generations=2000) {
DSC_EA <- function(k, generations=2000, crossoverRate=.8, mutationRate=.001, populationSize=100) {
EA <- EA_R$new(k, crossoverRate, mutationRate, populationSize, generations)
EA <- EA_R$new(k, generations, crossoverRate, mutationRate, populationSize)
structure(
......@@ -59,9 +71,9 @@ DSC_EA <- function(k, crossoverRate=.8, mutationRate=.001, populationSize=100, g
#' @field mutationRate mutation rate for the evolutionary algorithm
#' @field populationSize number of solutions that the evolutionary algorithm maintains
#' @field k number of macro-clusters
#' @field generations number of EA generations performed during reclustering
#' @field data micro-clusters to recluster
#' @field weights weights of the micro-clusters
#' @field generations number of EA generations performed during reclustering
#' @field C exposed C class
#'
#' @author Matthias Carnein \email{matthias.carnein@@uni-muenster.de}
......@@ -79,13 +91,13 @@ EA_R <- setRefClass("EA",
),
methods = list(
initialize = function(k, crossoverRate, mutationRate, populationSize, generations) {
initialize = function(k, generations, crossoverRate, mutationRate, populationSize) {
k <<- as.integer(k)
generations <<- as.integer(generations)
crossoverRate <<- crossoverRate
mutationRate <<- mutationRate
populationSize <<- as.integer(populationSize)
generations <<- as.integer(generations)
}
)
)
......@@ -117,7 +129,7 @@ EA_R$methods(
} else{
return(clusterAssignment)
}
}
},
recluster = function(generations=1){.self$C$recluster(generations)}
)
......@@ -6,33 +6,33 @@
#' The micro-clusters are then incrementally reclustered using an evloutionary algorithm.
#' Evolutionary algorithms create slight variations by combining and randomly modifying existing solutions.
#' By iteratively selecting better solutions, an evolutionary pressure is created which improves the clustering over time.
#' Since the evolutionary algorithm is incremental, it is possible to apply between observations, e.g. in the idle time of the stream.
#' Since the evolutionary algorithm is incremental, it is possible to apply it between observations, e.g. in the idle time of the stream.
#' Alternatively it can be applied as a traditional reclustering step, or a combination of both.
#' This implementation allows to uses fixed number of generations after each observation and during reclustering.
#' This implementation allows to use a fixed number of generations after each observation and during reclustering.
#'
#' @param r radius threshold for micro-cluster assignment
#' @param lambda decay rate
#' @param tgap time-interval between outlier detection and clean-up
#' @param k number of macro-clusters
#' @param incrementalGenerations number of EA generations performed after each observation
#' @param reclusterGenerations number of EA generations performed during reclustering
#' @param crossoverRate cross-over rate for the evolutionary algorithm
#' @param mutationRate mutation rate for the evolutionary algorithm
#' @param populationsize number of solutions that the evolutionary algorithm maintains
#' @param initializeAfter number of micro-cluster required for the initialization of the evolutionary algorithm.
#' @param incrementalGenerations number of EA generations performed after each observation
#' @param reclusterGenerations number of EA generations performed during reclustering
#'
#' @author Matthias Carnein \email{Matthias.Carnein@@uni-muenster.de}
#'
#' @examples
#' stream <- DSD_Gaussians(k = 3, d = 2)
#' evoStream <- DSC_evoStream(r=0.05, k=3)
#' evoStream <- DSC_evoStream(r=0.05, k=3, incrementalGenerations=5, reclusterGenerations=2000)
#' update(evoStream, stream, n = 1200)
#' plot(evoStream, stream, type = "both")
#'
#' @export
DSC_evoStream <- function(r, lambda=0.001, tgap=100, k=2, crossoverRate=.8, mutationRate=.001, populationSize=100, initializeAfter=2*k, incrementalGenerations=5, reclusterGenerations=2000) {
DSC_evoStream <- function(r, lambda=0.001, tgap=100, incrementalGenerations=5, reclusterGenerations=2000, k=2, crossoverRate=.8, mutationRate=.001, populationSize=100, initializeAfter=2*k) {
evoStream <- evoStream_R$new(r, lambda, tgap, k, crossoverRate, mutationRate, populationSize, initializeAfter, incrementalGenerations, reclusterGenerations)
evoStream <- evoStream_R$new(r, lambda, tgap, incrementalGenerations, reclusterGenerations, k, crossoverRate, mutationRate, populationSize, initializeAfter)
structure(
list(
......@@ -50,7 +50,7 @@ DSC_evoStream <- function(r, lambda=0.001, tgap=100, k=2, crossoverRate=.8, muta
#' Reference Class evoStream_R
#'
#' Reference class mostly used to expose C class object
#' Reference class mostly used to expose the C class object
#'
#' @field C exposed C class
#'
......@@ -110,7 +110,6 @@ evoStream_R$methods(
}
)
evoStream_R$methods(
recluster = function(generations=1) {
.self$C$recluster(generations)
......
......@@ -4,48 +4,54 @@
\alias{DSC_EA}
\title{Evolutionary Algorithm}
\usage{
DSC_EA(k, crossoverRate = 0.8, mutationRate = 0.001,
populationSize = 100, generations = 2000)
DSC_EA(k, generations = 2000, crossoverRate = 0.8,
mutationRate = 0.001, populationSize = 100)
}
\arguments{
\item{k}{number of macro-clusters}
\item{crossoverRate}{cross-over rate for the evolutionary algorithm}
\item{generations}{number of EA generations performed during reclustering}
\item{mutationRate}{mutation rate for the evolutionary algorithm}
\item{r}{radius threshold for micro-cluster assignment}
\item{lambda}{decay rate}
\item{tgap}{time-interval between outlier detection and clean-up}
\item{populationsize}{number of solutions that the evolutionary algorithm maintains}
\item{initializeAfter}{number of micro-cluster required for the initialization of the evolutionary algorithm.}
\item{incrementalGenerations}{number of EA generations performed after each observation}
\item{reclusterGenerations}{number of EA generations performed during reclustering}
}
\description{
Reclustering using an evolutionary algorithm.
This approach is used by \code{evoStream} but can be used for all micro-clusters.
This approach was designed for \code{evoStream} but can also be used for other micro-clustering algorithms.
The evolutionary algorithm uses existing clustering solutions and creates small variations of them by combining and randomly modfiying them.
The modified solutions can yield better partitions and thus can improve the clustering over time.
The evolutionary algorithm is incremental, which allows to improve existing macro-clusters instead of recomputing them every time.
}
\examples{
stream <- DSD_Gaussians(k = 3, d = 2)
stream <- DSD_Memory(DSD_Gaussians(k = 3, d = 2), 1000)
## online algorithm
dbstream <- DSC_DBSTREAM(r=0.1)
EA <- DSC_EA(k=3)
## offline algorithm
EA <- DSC_EA(k=3, generations=1000)
## create pipeline and insert observations
two <- DSC_TwoStage(dbstream, EA)
update(two, stream, n=1200)
update(two, stream, n=1000)
## plot resut
reset_stream(stream)
plot(two, stream, type="both")
## if we have time, evaluate additional generations. This can be called at any time, also between observations.
EA$RObj$recluster(1000)
## plot improved result
reset_stream(stream)
plot(two, stream, type="both")
update(dbstream, stream, n = 1200)
## alternatively: do not create twostage but apply directly
update(dbstream, stream, n = 1000)
recluster(EA, dbstream)
reset_stream(stream)
plot(EA, stream)
......
......@@ -4,10 +4,10 @@
\alias{DSC_evoStream}
\title{evoStream - Evolutionary Stream Clustering}
\usage{
DSC_evoStream(r, lambda = 0.001, tgap = 100, k = 2,
DSC_evoStream(r, lambda = 0.001, tgap = 100,
incrementalGenerations = 5, reclusterGenerations = 2000, k = 2,
crossoverRate = 0.8, mutationRate = 0.001, populationSize = 100,
initializeAfter = 2 * k, incrementalGenerations = 5,
reclusterGenerations = 2000)
initializeAfter = 2 * k)
}
\arguments{
\item{r}{radius threshold for micro-cluster assignment}
......@@ -16,6 +16,10 @@ DSC_evoStream(r, lambda = 0.001, tgap = 100, k = 2,
\item{tgap}{time-interval between outlier detection and clean-up}
\item{incrementalGenerations}{number of EA generations performed after each observation}
\item{reclusterGenerations}{number of EA generations performed during reclustering}
\item{k}{number of macro-clusters}
\item{crossoverRate}{cross-over rate for the evolutionary algorithm}
......@@ -24,10 +28,6 @@ DSC_evoStream(r, lambda = 0.001, tgap = 100, k = 2,
\item{initializeAfter}{number of micro-cluster required for the initialization of the evolutionary algorithm.}
\item{incrementalGenerations}{number of EA generations performed after each observation}
\item{reclusterGenerations}{number of EA generations performed during reclustering}
\item{populationsize}{number of solutions that the evolutionary algorithm maintains}
}
\description{
......@@ -36,13 +36,13 @@ The online component uses a simplified version of \code{DBSTREAM} to generate mi
The micro-clusters are then incrementally reclustered using an evloutionary algorithm.
Evolutionary algorithms create slight variations by combining and randomly modifying existing solutions.
By iteratively selecting better solutions, an evolutionary pressure is created which improves the clustering over time.
Since the evolutionary algorithm is incremental, it is possible to apply between observations, e.g. in the idle time of the stream.
Since the evolutionary algorithm is incremental, it is possible to apply it between observations, e.g. in the idle time of the stream.
Alternatively it can be applied as a traditional reclustering step, or a combination of both.
This implementation allows to uses fixed number of generations after each observation and during reclustering.
This implementation allows to use a fixed number of generations after each observation and during reclustering.
}
\examples{
stream <- DSD_Gaussians(k = 3, d = 2)
evoStream <- DSC_evoStream(r=0.05, k=3)
evoStream <- DSC_evoStream(r=0.05, k=3, incrementalGenerations=5, reclusterGenerations=2000)
update(evoStream, stream, n = 1200)
plot(evoStream, stream, type = "both")
......
......@@ -19,12 +19,12 @@ Reference class used for Reclustering using an evolutionary algorithm
\item{\code{k}}{number of macro-clusters}
\item{\code{generations}}{number of EA generations performed during reclustering}
\item{\code{data}}{micro-clusters to recluster}
\item{\code{weights}}{weights of the micro-clusters}
\item{\code{generations}}{number of EA generations performed during reclustering}
\item{\code{C}}{exposed C class}
}}
......
......@@ -6,7 +6,7 @@
\alias{evoStream_R}
\title{Reference Class evoStream_R}
\description{
Reference class mostly used to expose C class object
Reference class mostly used to expose the C class object
}
\section{Fields}{
......
......@@ -98,17 +98,17 @@ public:
// since exposed constructors have a limited number of parameters, we expose a setter function and use the default constructor
void setFields(double r, double lambda, int tgap, unsigned int k, double crossoverRate, double mutationRate, int populationSize, unsigned int initializeAfter, int incrementalGenerations, int reclusterGenerations){
void setFields(double r, double lambda, int tgap, int incrementalGenerations, int reclusterGenerations, unsigned int k, double crossoverRate, double mutationRate, int populationSize, unsigned int initializeAfter){
this->r=r;
this->lambda=lambda;
this->tgap=tgap;
this->incrementalGenerations = incrementalGenerations;
this->reclusterGenerations = reclusterGenerations;
this->k=k;
this->crossoverRate=crossoverRate;
this->mutationRate=mutationRate;
this->populationSize=populationSize;
this->initializeAfter=initializeAfter;
this->incrementalGenerations=incrementalGenerations;
this->reclusterGenerations = reclusterGenerations;
this->macroFitness = Rcpp::NumericVector(this->populationSize);
this->omega = pow(2, (-1*lambda * tgap));
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment