Menu

# #header_text h1#site_heading a, #header_text h1#site_heading{ font-style:normal;text-shadow:none;color:#ffffff;font-weight:400;line-height:1;font-family:'Ribeye Marrow' } @media (min-width: 650px) { #header_text h1#site_heading a, #header_text h1#site_heading{ letter-spacing:0;font-size:110px } } Data Science Dev

## #header_text h2#site_subheading a, #header_text h2#site_subheading{ font-style:normal;text-shadow:none;color:#eeeeee;font-weight:bold;line-height:1;font-family:'Century Gothic' } @media (min-width: 650px) { #header_text h2#site_subheading a, #header_text h2#site_subheading{ letter-spacing:0;font-size:28px } } Design, develop, deploy

### State Space Reconstruction from Time Series Data - Embedding Dimension - Time Delay

I was experimenting with some features related to state space reconstruction trying to determine when a fault occurs from time series data.  Thought I would keep it here in case I needed to refer to it later.  The features are time delay and embedding dimension.  The code is below with Lorenz model generated data.  From the graphs, the time delay = 10 and the embedding dimension = 3.  import numpy as np from sklearn.neighbors import NearestNeighbors import matplotlib.pyplot as plt from mpl_toolkits.mplot3d import Axes3D # plot histograms def plot_histograms(xedges_1d,hist_1d,xedges_2d,yedges_2d,hist_2d):     # plot histograms     xedgesctr_1d = xedges_1d[0:len(xedges_1d)-1]+(xedges_1d[1:len(xedges_1d)] - xedges_1d[0:len(xedges_1d)-1])/2.0     fig = plt.figure(1)     fig.add_subplot(121)     plt.bar(xedgesctr_1d,hist_1d)     # plot 2d histogram     xedgesctr_2d = xedges_2d[0:len(xedges_2d)-1]+(xedges_2d[1:len(xedges_2d)] - xedges_2d[0:len(xedges_2d)-1])/2.0     yedgesctr_2d = yedges_2d[0:len(yedges_2d)-1]+(yedges_2d[1:len(yedges_2d)] - yedges_2d[0:len(yedges_2d)-1])/2.0     ax1 = fig.add_subplot(122,projection='3d')     _xx, _yy = np.meshgrid(xedgesctr_2d,yedgesctr_2d)     xp,yp = _xx.ravel(), _yy.ravel()     top = hist_2d.ravel()     bottom = np.zeros_like(top)     width=depth=2     ax1.bar3d(xp,yp,bottom,width,depth,top)     plt.show() # compute delay for state space reconstruction def compute_delay(data,bins,max_delay):     MI = np.zeros((max_delay+1))     for T in range(0,max_delay+1):         xs = data[0:len(data)-T]         ys = data[T:len(data)]         # compute 1D histograms         hist_1d, xedges_1d = np.histogram(data,bins=bins)         norm_hist_1d = hist_1d/float(len(data))         # compute 2D histogram and normalize         hist_2d, xedges_2d, yedges_2d = np.histogram2d(xs,ys,bins=bins)         norm_hist_2d = hist_2d/float(len(xs))         # compute mutual information         for i in range(0,len(xs)):             # compute P(x) and P(y)             x = xs[i]             y = ys[i]             xedge_left = xedges_1d[0:len(xedges_1d)-1]             xedge_right = xedges_1d[1:len(xedges_1d)]                             xi = np.argwhere((x >= xedge_left) & (x < xedge_right))             yi = np.argwhere((y >= xedge_left) & (y < xedge_right))             if len(xi) == 0:                 xi = len(xedge_left)-1             else:                 xi = xi             if len(yi) == 0:                 yi = len(yedge_left)-1             else:                 yi = yi             Px = norm_hist_1d[xi]             Py = norm_hist_1d[yi]             # compute P(x,y)             xedge_left = xedges_2d[0:len(xedges_2d)-1]             xedge_right = xedges_2d[1:len(xedges_2d)]             yedge_left = yedges_2d[0:len(yedges_2d)-1]             yedge_right = yedges_2d[1:len(yedges_2d)]             xi = np.argwhere((x >= xedge_left) & (x < xedge_right))             yi = np.argwhere((y >= yedge_left) & (y < yedge_right))             if len(xi) == 0:                 xi = len(xedge_left)-1             else:                 xi = xi             if len(yi) == 0:                 yi = len(yedge_left)-1             else:                 yi = yi             Pxy = norm_hist_2d[xi,yi]             # add to mutual information (shouldn't have to worry about divide by zero)             MI[T] += Pxy*np.log2(Pxy/(Px*Py))     # find first minimum of mutual information or use 1 if minimum is on right edge (noisy signal or small range of T)     time_delay = 0     mutual_info = MI     for T in range(1,max_delay+1):         if mutual_info < MI[T]:             break         else:             mutual_info = MI[T]             time_delay = T     if time_delay == max_delay:         time_delay = 1     # return result     return MI, time_delay, xedges_1d, norm_hist_1d, xedges_2d, yedges_2d, norm_hist_2d # compute embedding dimension def compute_embdim(data,time_delay,threshold,max_dimension):     # arrange data into time delay vectors     x = np.zeros((len(data)-max_dimension*time_delay,max_dimension+1))     for D in range(0,max_dimension+1):         x[:,D] = data[D*time_delay:len(data)-((max_dimension-D)*time_delay)]     # compute percentage of false nearest neighbors     pfnn = np.zeros((max_dimension+1))     for D in range(0,max_dimension):         d1 = x[:,0:D+1]         d2 = x[:,0:D+2]         nbrs = NearestNeighbors(n_neighbors=2, algorithm='ball_tree').fit(d1)         distances, indices = nbrs.kneighbors(d1)         indices = indices[:,1]         d1_nn = d1[indices,:]         d2_nn = d2[indices,:]         d1s = d1-d1_nn         d2s = np.abs(d2-d2_nn)         d2s = d2s[:,D+1]         result = np.zeros((len(d1s)))         # correction factor for zero distances         cf = 0         for i in range(0,len(d1s)):             v = d1s[i,:]             Rd = np.sqrt(np.matmul(v,v.T))             result[i] = d2s[i]/Rd             if result[i] == 0:                 cf = cf + 1         result_fnn = (result > threshold).astype('float')         pfnn[D] = np.sum(result_fnn)/float(len(result_fnn)-cf)     # find embedding dimension which is when pfnn first goes to zero     for i in range(0,len(pfnn)):         embedding_dimension = i + 1         if pfnn[i] < 0.01:             break     return pfnn, embedding_dimension      # lorenz equation simulated data x = np.zeros(4000) y = np.zeros(4000) z = np.zeros(4000) x = -12.0 y = 0.001 z = 0.4 Ts = 0.01 for i in range(0,3999):     x[i+1] = x[i] + Ts*16.0*(y[i]-x[i])     y[i+1] = y[i] + Ts*(-x[i]*z[i] + 45.92*x[i] - y[i])     z[i+1] = z[i] + Ts*(x[i]*y[i] - 4.0*z[i]) # compute time delay and embedding dimension MI, time_delay, xedges_1d, norm_hist_1d, xedges_2d, yedges_2d, norm_hist_2d = compute_delay(x,int(np.sqrt(1000)),50) pfnn, embedding_dimension = compute_embdim(x,time_delay,15.0,10) print 'time delay = ' + str(time_delay) print 'embedding dimension = ' + str(embedding_dimension) plot_histograms(xedges_1d, norm_hist_1d, xedges_2d, yedges_2d, norm_hist_2d) plt.subplot(121) plt.plot(MI,marker='*') plt.title('mutual information vs. time delay') plt.subplot(122) plt.plot(range(1,len(pfnn)+1),pfnn,marker='*') plt.title('percentage false nearest neighbors vs. embedding dimension') plt.show()

## Comments

• August 20, 2019
• August 19, 2019
• August 19, 2019
• August 19, 2019
• August 19, 2019