다음은 Holt-Winters를 사용한 순차적 필터링의 예입니다. 칼만 필터와 같은 다른 유형의 순차 모델링에도 동일한 패턴이 작동해야합니다.
from matplotlib import pyplot
import numpy as np
import tensorflow as tf
tf.logging.set_verbosity(tf.logging.INFO)
seasonality = 10
def model_fn(features, targets):
"""Defines a basic Holt-Winters sequential filtering model in TensorFlow.
See http://www.itl.nist.gov/div898/handbook/pmc/section4/pmc435.htm"""
times = features["times"]
values = features["values"]
# Initial estimates
initial_trend = tf.reduce_sum(
(values[seasonality:2*seasonality] - values[:seasonality])
/seasonality ** 2)
initial_smoothed_observation = values[0]
# Seasonal indices are multiplicative, so having them near 0 leads to
# instability
initial_seasonal_indices = 1. + tf.exp(
tf.get_variable("initial_seasonal_indices", shape=[seasonality]))
with tf.variable_scope("smoothing_parameters",
initializer=tf.zeros_initializer):
# Trained scalars for smoothing, transformed to be in (0, 1)
observation_smoothing = tf.sigmoid(
tf.get_variable(name="observation_smoothing", shape=[]))
trend_smoothing = tf.sigmoid(
tf.get_variable(name="trend_smoothing", shape=[]))
seasonal_smoothing = tf.sigmoid(
tf.get_variable(name="seasonal_smoothing", shape=[]))
def filter_function(
current_index, seasonal_indices, previous_smoothed_observation,
previous_trend, previous_loss_sum):
current_time = tf.gather(times, current_index)
current_observation = tf.gather(values, current_index)
current_season = current_time % seasonality
one_step_ahead_prediction = (
(previous_smoothed_observation + previous_trend)
* tf.gather(seasonal_indices, current_season))
new_loss_sum = previous_loss_sum + (
one_step_ahead_prediction - current_observation) ** 2
new_smoothed_observation = (
(observation_smoothing * current_observation
/tf.gather(seasonal_indices, current_season))
+ ((1. - observation_smoothing)
* (previous_smoothed_observation + previous_trend)))
new_trend = (
(trend_smoothing
* (new_smoothed_observation - previous_smoothed_observation))
+ (1. - trend_smoothing) * previous_trend)
updated_seasonal_index = (
seasonal_smoothing * current_observation/new_smoothed_observation
+ ((1. - seasonal_smoothing)
* tf.gather(seasonal_indices, current_season)))
new_seasonal_indices = tf.concat(
concat_dim=0,
values=[seasonal_indices[:current_season],
[updated_seasonal_index],
seasonal_indices[current_season + 1:]])
# Preserve shape to keep the while_loop shape invariants happy
new_seasonal_indices.set_shape(seasonal_indices.get_shape())
return (current_index + 1, new_seasonal_indices, new_smoothed_observation,
new_trend, new_loss_sum)
def while_run_condition(current_index, *unused_args):
return current_index < tf.shape(times)[0]
(_, final_seasonal_indices, final_smoothed_observation, final_trend,
sum_squared_errors) = tf.while_loop(
cond=while_run_condition,
body=filter_function,
loop_vars=[0, initial_seasonal_indices, initial_smoothed_observation,
initial_trend, 0.])
normalized_loss = sum_squared_errors/tf.cast(tf.shape(times)[0],
dtype=tf.float32)
train_op = tf.contrib.layers.optimize_loss(
loss=normalized_loss,
global_step=tf.contrib.framework.get_global_step(),
learning_rate=0.1,
optimizer="Adam")
prediction_times = tf.range(30)
prediction_values = (
(final_smoothed_observation + final_trend * tf.cast(prediction_times,
dtype=tf.float32))
* tf.cast(tf.gather(params=final_seasonal_indices,
indices=prediction_times % seasonality),
dtype=tf.float32))
predictions = {"times": prediction_times,
"values": prediction_values}
return predictions, normalized_loss, train_op
# Create a synthetic time series with seasonality, trend, and a little noise
series_length = 50
times = np.arange(series_length, dtype=np.int32)
values = 5. + (
0.02 * times + np.sin(times * 2 * np.pi/float(seasonality))
+ np.random.normal(size=[series_length], scale=0.2)).astype(np.float32)
# Define an input function to feed the data into our model
input_fn = lambda: ({"times":tf.convert_to_tensor(times, dtype=tf.int32),
"values":tf.convert_to_tensor(values, dtype=tf.float32)},
{})
# Wrap the model in a tf.learn Estimator for training and inference
estimator = tf.contrib.learn.Estimator(model_fn=model_fn)
estimator.fit(input_fn=input_fn, steps=500)
predictions = estimator.predict(input_fn=input_fn, as_iterable=False)
# Plot the training data and predictions
pyplot.plot(range(series_length), values)
pyplot.plot(series_length + predictions["times"], predictions["values"])
pyplot.show()
긴 시간 시리즈까지 확장 할 때
Output of Holt-Winters on synthetic data: training data followed by predictions.
그러나,이 코드는 매우 느려집니다 (이 글을 쓰는 때 나는 TensorFlow의 0.11.0rc0를 사용했다). 문제는 TensorFlow (및 자동 차별화를위한 대부분의 다른 도구)가 순차 계산 (루핑)에서 뛰어난 성능을 갖고 있지 않다는 것입니다. 일반적으로 이것은 데이터를 일괄 처리하고 큰 덩어리로 작업함으로써 개선됩니다. 하나의 시간 단계에서 다음 단계로 넘어갈 필요가있는 상태가 있기 때문에 순차적 모델에서는 다소 까다 롭습니다.
훨씬 빠른 (그러나 덜 만족스러운) 접근법은 자동 회귀 모델을 사용하는 것입니다. 모델이 유지 할 상태가 없기 때문에, 우리는 아주 쉽게 미니 배치 확률 그라데이션 하강을 할 수있는
import numpy as np
from matplotlib import pyplot
import tensorflow as tf
tf.logging.set_verbosity(tf.logging.INFO)
seasonality = 10
# Create a synthetic time series with seasonality, trend, and a little noise
series_length = 50
times = np.arange(series_length, dtype=np.int32)
values = 5. + (0.02 * times + np.sin(times * 2 * np.pi/float(seasonality))
+ np.random.normal(size=[series_length], scale=0.2)).astype(
np.float32)
# Parameters for stochastic gradient descent
batch_size = 16
window_size = 10
# Define a column format for the linear regression
input_column = tf.contrib.layers.real_valued_column(column_name="input_window",
dimension=window_size)
def training_input_fn():
window_starts = tf.random_uniform(shape=[batch_size], dtype=tf.int32,
maxval=series_length - window_size - 1)
element_indices = (tf.expand_dims(window_starts, 1)
+ tf.expand_dims(tf.range(window_size), 0))
return ({input_column: tf.gather(values, element_indices)},
tf.gather(values, window_starts + window_size))
estimator = tf.contrib.learn.LinearRegressor(feature_columns=[input_column])
estimator.fit(input_fn=training_input_fn, steps=500)
predictions = list(values[-10:])
def predict_input_fn():
return ({input_column: tf.reshape(predictions[-10:], [1, 10])}, {})
predict_length = 30
for i in xrange(predict_length):
prediction = estimator.predict(input_fn=predict_input_fn, as_iterable=False)
predictions.append(prediction[0])
predictions = predictions[10:]
pyplot.plot(range(series_length), values)
pyplot.plot(series_length + np.arange(predict_length), predictions)
pyplot.show()
Output of the autoregressive model on the same synthetic dataset.
주의 사항 :이 TensorFlow에서 구현하기가 매우 쉽다는 이점이 있습니다.
클러스터링의 경우 k-means과 같은 것이 시계열로 작동 할 수 있습니다.