Menu

Data Science Dev

Design, develop, deploy

State Space Reconstruction from Time Series Data - Embedding Dimension - Time Delay

I was experimenting with some features related to state space reconstruction trying to determine when a fault occurs from time series data.  Thought I would keep it here in case I needed to refer to it later.  The features are time delay and embedding dimension.  The code is below with Lorenz model generated data.  From the graphs, the time delay = 10 and the embedding dimension = 3.

import numpy as np
from sklearn.neighbors import NearestNeighbors
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D

# plot histograms
def plot_histograms(xedges_1d,hist_1d,xedges_2d,yedges_2d,hist_2d):
    # plot histograms
    xedgesctr_1d = xedges_1d[0:len(xedges_1d)-1]+(xedges_1d[1:len(xedges_1d)] - xedges_1d[0:len(xedges_1d)-1])/2.0
    fig = plt.figure(1)
    fig.add_subplot(121)
    plt.bar(xedgesctr_1d,hist_1d)
    # plot 2d histogram
    xedgesctr_2d = xedges_2d[0:len(xedges_2d)-1]+(xedges_2d[1:len(xedges_2d)] - xedges_2d[0:len(xedges_2d)-1])/2.0
    yedgesctr_2d = yedges_2d[0:len(yedges_2d)-1]+(yedges_2d[1:len(yedges_2d)] - yedges_2d[0:len(yedges_2d)-1])/2.0
    ax1 = fig.add_subplot(122,projection='3d')
    _xx, _yy = np.meshgrid(xedgesctr_2d,yedgesctr_2d)
    xp,yp = _xx.ravel(), _yy.ravel()
    top = hist_2d.ravel()
    bottom = np.zeros_like(top)
    width=depth=2
    ax1.bar3d(xp,yp,bottom,width,depth,top)
    plt.show()


# compute delay for state space reconstruction
def compute_delay(data,bins,max_delay):
    MI = np.zeros((max_delay+1))
    for T in range(0,max_delay+1):
        xs = data[0:len(data)-T]
        ys = data[T:len(data)]

        # compute 1D histograms
        hist_1d, xedges_1d = np.histogram(data,bins=bins)
        norm_hist_1d = hist_1d/float(len(data))

        # compute 2D histogram and normalize
        hist_2d, xedges_2d, yedges_2d = np.histogram2d(xs,ys,bins=bins)
        norm_hist_2d = hist_2d/float(len(xs))

        # compute mutual information
        for i in range(0,len(xs)):
            # compute P(x) and P(y)
            x = xs[i]
            y = ys[i]
            xedge_left = xedges_1d[0:len(xedges_1d)-1]
            xedge_right = xedges_1d[1:len(xedges_1d)]                
            xi = np.argwhere((x >= xedge_left) & (x < xedge_right))
            yi = np.argwhere((y >= xedge_left) & (y < xedge_right))
            if len(xi) == 0:
                xi = len(xedge_left)-1
            else:
                xi = xi[0][0]
            if len(yi) == 0:
                yi = len(yedge_left)-1
            else:
                yi = yi[0][0]
            Px = norm_hist_1d[xi]
            Py = norm_hist_1d[yi]
            # compute P(x,y)
            xedge_left = xedges_2d[0:len(xedges_2d)-1]
            xedge_right = xedges_2d[1:len(xedges_2d)]
            yedge_left = yedges_2d[0:len(yedges_2d)-1]
            yedge_right = yedges_2d[1:len(yedges_2d)]
            xi = np.argwhere((x >= xedge_left) & (x < xedge_right))
            yi = np.argwhere((y >= yedge_left) & (y < yedge_right))
            if len(xi) == 0:
                xi = len(xedge_left)-1
            else:
                xi = xi[0][0]
            if len(yi) == 0:
                yi = len(yedge_left)-1
            else:
                yi = yi[0][0]
            Pxy = norm_hist_2d[xi,yi]
            # add to mutual information (shouldn't have to worry about divide by zero)
            MI[T] += Pxy*np.log2(Pxy/(Px*Py))

    # find first minimum of mutual information or use 1 if minimum is on right edge (noisy signal or small range of T)
    time_delay = 0
    mutual_info = MI[0]
    for T in range(1,max_delay+1):
        if mutual_info < MI[T]:
            break
        else:
            mutual_info = MI[T]
            time_delay = T
    if time_delay == max_delay:
        time_delay = 1

    # return result
    return MI, time_delay, xedges_1d, norm_hist_1d, xedges_2d, yedges_2d, norm_hist_2d

# compute embedding dimension
def compute_embdim(data,time_delay,threshold,max_dimension):
    # arrange data into time delay vectors
    x = np.zeros((len(data)-max_dimension*time_delay,max_dimension+1))
    for D in range(0,max_dimension+1):
        x[:,D] = data[D*time_delay:len(data)-((max_dimension-D)*time_delay)]

    # compute percentage of false nearest neighbors
    pfnn = np.zeros((max_dimension+1))
    for D in range(0,max_dimension):
        d1 = x[:,0:D+1]
        d2 = x[:,0:D+2]
        nbrs = NearestNeighbors(n_neighbors=2, algorithm='ball_tree').fit(d1)
        distances, indices = nbrs.kneighbors(d1)
        indices = indices[:,1]
        d1_nn = d1[indices,:]
        d2_nn = d2[indices,:]
        d1s = d1-d1_nn
        d2s = np.abs(d2-d2_nn)
        d2s = d2s[:,D+1]
        result = np.zeros((len(d1s)))
        # correction factor for zero distances
        cf = 0
        for i in range(0,len(d1s)):
            v = d1s[i,:]
            Rd = np.sqrt(np.matmul(v,v.T))
            result[i] = d2s[i]/Rd
            if result[i] == 0:
                cf = cf + 1
        result_fnn = (result > threshold).astype('float')
        pfnn[D] = np.sum(result_fnn)/float(len(result_fnn)-cf)

    # find embedding dimension which is when pfnn first goes to zero
    for i in range(0,len(pfnn)):
        embedding_dimension = i + 1
        if pfnn[i] < 0.01:
            break

    return pfnn, embedding_dimension
    
# lorenz equation simulated data
x = np.zeros(4000)
y = np.zeros(4000)
z = np.zeros(4000)
x[0] = -12.0
y[0] = 0.001
z[0] = 0.4
Ts = 0.01
for i in range(0,3999):
    x[i+1] = x[i] + Ts*16.0*(y[i]-x[i])
    y[i+1] = y[i] + Ts*(-x[i]*z[i] + 45.92*x[i] - y[i])
    z[i+1] = z[i] + Ts*(x[i]*y[i] - 4.0*z[i])

# compute time delay and embedding dimension

MI, time_delay, xedges_1d, norm_hist_1d, xedges_2d, yedges_2d, norm_hist_2d = compute_delay(x,int(np.sqrt(1000)),50)
pfnn, embedding_dimension = compute_embdim(x,time_delay,15.0,10)

print 'time delay = ' + str(time_delay)
print 'embedding dimension = ' + str(embedding_dimension)

plot_histograms(xedges_1d, norm_hist_1d, xedges_2d, yedges_2d, norm_hist_2d)

plt.subplot(121)
plt.plot(MI,marker='*')
plt.title('mutual information vs. time delay')
plt.subplot(122)
plt.plot(range(1,len(pfnn)+1),pfnn,marker='*')
plt.title('percentage false nearest neighbors vs. embedding dimension')

plt.show()

 

Go Back



Comment