Source code for zoo.chronos.detector.anomaly.dbscan_detector

#
# Copyright 2018 Analytics Zoo Authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#

from zoo.chronos.detector.anomaly.abstract import AnomalyDetector

import numpy as np
from sklearn.cluster import DBSCAN


[docs]class DBScanDetector(AnomalyDetector): """ Example: >>> #The dataset to detect is y >>> y = numpy.array(...) >>> ad = DBScanDetector(eps=0.1, min_samples=6) >>> ad.fit(y) >>> anomaly_scores = ad.score() >>> anomaly_indexes = ad.anomaly_indexes() """ def __init__(self, eps=0.01, min_samples=6, **argv): """ Initialize a DBScanDetector. :param eps: The maximum distance between two samples for one to be considered as the neighborhood of the other. It is a parameter of DBSCAN, refer to sklearn.cluster.DBSCAN docs for more details. :param min_samples: The number of samples (or total weight) in a neighborhood for a point to be considered as a core point. It is a parameter of DBSCAN, refer to sklearn.cluster.DBSCAN docs for more details. :param argv: Other parameters used in DBSCAN. Refer to sklearn.cluster.DBSCAN docs for more details. """ self.eps = eps self.min_samples = min_samples self.argv = argv self.anomaly_indexes_ = None self.anomaly_scores_ = None def check_data(self, arr): if len(arr.shape) > 1: raise ValueError("Only univariate time series is supported")
[docs] def fit(self, y): """ Fit the model :param y: the input time series. y must be 1-D numpy array. """ self.check_data(y) self.anomaly_scores_ = np.zeros_like(y) clusters = DBSCAN(eps=self.eps, min_samples=self.min_samples).fit( y.reshape(-1, 1), **self.argv) labels = clusters.labels_ outlier_indexes = np.where(labels == -1)[0] self.anomaly_indexes_ = outlier_indexes self.anomaly_scores_[self.anomaly_indexes_] = 1
[docs] def score(self): """ Gets the anomaly scores for each sample. Each anomaly score is either 0 or 1, where 1 indicates an anomaly. :return: anomaly score for each sample, in an array format with the same size as input """ if self.anomaly_indexes_ is None: raise RuntimeError("Please call fit first") return self.anomaly_scores_
[docs] def anomaly_indexes(self): """ Gets the indexes of the anomalies. :return: the indexes of the anomalies. """ if self.anomaly_indexes_ is None: raise RuntimeError("Please call fit first") return self.anomaly_indexes_