# -*- coding: utf-8 -*-
from __future__ import (absolute_import, division,
print_function, unicode_literals)
from builtins import (
bytes, dict, int, list, object, range, str, ascii, chr, hex, input, next,
oct, open, pow, round, super, filter, map, zip
)
from . import measures as nolds
from . import datasets
import numpy as np
[docs]def plot_hurst_hist():
"""
Plots a histogram of values obtained for the hurst exponent of uniformly
distributed white noise.
This function requires the package ``matplotlib``.
"""
# local import to avoid dependency for non-debug use
import matplotlib.pyplot as plt
hs = [
nolds.hurst_rs(np.random.random(size=10000), corrected=True)
for _ in range(100)
]
plt.hist(hs, bins=20)
plt.xlabel("esimated value of hurst exponent")
plt.ylabel("number of experiments")
plt.show()
[docs]def plot_lyap(maptype="logistic"):
"""
Plots a bifurcation plot of the given map and superimposes the true
lyapunov exponent as well as the estimates of the largest lyapunov exponent
obtained by ``lyap_r`` and ``lyap_e``. The idea for this plot is taken
from [ll]_.
This function requires the package ``matplotlib``.
References:
.. [ll] Manfred Füllsack, "Lyapunov exponent",
url: http://systems-sciences.uni-graz.at/etextbook/sw2/lyapunov.html
Kwargs:
maptype (str):
can be either ``"logistic"`` for the logistic map or ``"tent"`` for the
tent map.
"""
# local import to avoid dependency for non-debug use
import matplotlib.pyplot as plt
x_start = 0.1
n = 140
nbifur = 40
if maptype == "logistic":
param_name = "r"
param_range = np.arange(2, 4, 0.01)
full_data = np.array([
np.fromiter(datasets.logistic_map(x_start, n, r), dtype="float32")
for r in param_range
])
# It can be proven that the lyapunov exponent of the logistic map
# (or any map that is an iterative application of a function) can be
# calculated as the mean of the logarithm of the absolute of the
# derivative at the individual data points.
# For a proof see for example:
# https://blog.abhranil.net/2015/05/15/lyapunov-exponent-of-the-logistic-map-mathematica-code/
# Derivative of logistic map: f(x) = r * x * (1 - x) = r * x - r * x²
# => f'(x) = r - 2 * r * x
lambdas = [
np.mean(np.log(abs(r - 2 * r * x[np.where(x != 0.5)])))
for x, r in zip(full_data, param_range)
]
elif maptype == "tent":
param_name = "$\\mu$"
param_range = np.arange(0, 2, 0.01)
full_data = np.array([
np.fromiter(datasets.tent_map(x_start, n, mu), dtype="float32")
for mu in param_range
])
# for the tent map the lyapunov exponent is much easier to calculate
# since the values are multiplied by mu in each step, two trajectories
# starting in x and x + delta will have a distance of delta * mu^n after n
# steps. Therefore the lyapunov exponent should be log(mu).
lambdas = np.log(param_range, where=param_range > 0)
lambdas[np.where(param_range <= 0)] = np.nan
else:
raise Error("maptype %s not recognized" % maptype)
kwargs_e = {"emb_dim": 6, "matrix_dim": 2}
kwargs_r = {"emb_dim": 6, "lag": 2, "min_tsep": 20, "trajectory_len": 20}
lambdas_e = [max(nolds.lyap_e(d, **kwargs_e)) for d in full_data]
lambdas_r = [nolds.lyap_r(d, **kwargs_r) for d in full_data]
bifur_x = np.repeat(param_range, nbifur)
bifur = np.reshape(full_data[:, -nbifur:], nbifur * param_range.shape[0])
plt.title("Lyapunov exponent of the %s map" % maptype)
plt.plot(param_range, lambdas, "b-", label="true lyap. exponent")
elab = "estimation using lyap_e"
rlab = "estimation using lyap_r"
plt.plot(param_range, lambdas_e, color="#00AAAA", label=elab)
plt.plot(param_range, lambdas_r, color="#AA00AA", label=rlab)
plt.plot(param_range, np.zeros(len(param_range)), "g--")
plt.plot(bifur_x, bifur, "ro", alpha=0.1, label="bifurcation plot")
plt.ylim((-2, 2))
plt.xlabel(param_name)
plt.ylabel("lyap. exp / %s(x, %s)" % (maptype, param_name))
plt.legend(loc="best")
plt.show()
[docs]def profiling():
"""
Runs a profiling test for the function ``lyap_e`` (mainly used for
development)
This function requires the package ``cProfile``.
"""
import cProfile
n = 10000
data = np.cumsum(np.random.random(n) - 0.5)
cProfile.runctx('lyap_e(data)', {'lyap_e': nolds.lyap_e}, {'data': data})
[docs]def hurst_compare_nvals(data, nvals=None):
"""
Creates a plot that compares the results of different choices for nvals
for the function hurst_rs.
Args:
data (array-like of float):
the input data from which the hurst exponent should be estimated
Kwargs:
nvals (array of int):
a manually selected value for the nvals parameter that should be plotted
in comparison to the default choices
"""
import matplotlib.pyplot as plt
data = np.asarray(data)
n_all = np.arange(2, len(data)+1)
dd_all = nolds.hurst_rs(data, nvals=n_all, debug_data=True, fit="poly")
dd_def = nolds.hurst_rs(data, debug_data=True, fit="poly")
n_def = np.round(np.exp(dd_def[1][0])).astype("int32")
n_div = n_all[np.where(len(data) % n_all[:-1] == 0)]
dd_div = nolds.hurst_rs(data, nvals=n_div, debug_data=True, fit="poly")
def corr(nvals):
return [np.log(nolds.expected_rs(n)) for n in nvals]
l_all = plt.plot(dd_all[1][0], dd_all[1][1] - corr(n_all), "o")
l_def = plt.plot(dd_def[1][0], dd_def[1][1] - corr(n_def), "o")
l_div = plt.plot(dd_div[1][0], dd_div[1][1] - corr(n_div), "o")
l_cst = []
t_cst = []
if nvals is not None:
dd_cst = nolds.hurst_rs(data, nvals=nvals, debug_data=True, fit="poly")
l_cst = plt.plot(dd_cst[1][0], dd_cst[1][1] - corr(nvals), "o")
t_cst = ["custom"]
plt.xlabel("log(n)")
plt.ylabel("log((R/S)_n - E[(R/S)_n])")
plt.legend(
l_all + l_def + l_div + l_cst, ["all", "default", "divisors"] + t_cst
)
labeled_data = zip([dd_all[0], dd_def[0], dd_div[0]], ["all", "def", "div"])
for data, label in labeled_data:
print("%s: %.3f" % (label, data))
if nvals is not None:
print("custom: %.3f" % dd_cst[0])
plt.show()
def sampen_default_tolerance():
data = list(datasets.logistic_map(0.34, 1000, r=3.9))
oldtol = 0.2 * np.std(data, ddof=1)
old_res = [
nolds.sampen(data, emb_dim=i, tolerance=oldtol)
for i in range(1, 30)
]
new_res = [
nolds.sampen(data, emb_dim=i)
for i in range(1, 30)
]
for i, old, new in zip(range(1, 30), old_res, new_res):
print("emb_dim={} old={:.3f} corrected={:.3f}".format(i, old, new))
print(" old variance: {:.3f}".format(np.var(old_res)))
print("corrected variance: {:.3f}".format(np.var(new_res)))
def aste_line_fitting(N=100):
"""
Shows plot that proves that the line fitting in T. Astes original MATLAB code
provides the same results as `np.polyfit`.
"""
slope = np.random.random() * 10 - 5
intercept = np.random.random() * 100 - 50
xvals = np.arange(N)
yvals = xvals * slope + intercept + np.random.randn(N)*100
import matplotlib.pyplot as plt
plt.plot(xvals, yvals, "rx", label="data")
plt.plot(
[0, N-1], [intercept, intercept + slope * (N-1)],
"r-", label="true ({:.3f} x + {:.3f})".format(slope, intercept), alpha=0.5
)
i_aste, s_aste = nolds._aste_line_fit(xvals, yvals)
s_np, i_np = np.polyfit(xvals, yvals, 1)
plt.plot(
[0, N-1], [i_aste, i_aste + s_aste * (N-1)],
"b-", label="aste ({:.3f} x + {:.3f})".format(s_aste, i_aste), alpha=0.5
)
plt.plot(
[0, N-1], [i_np, i_np + s_np * (N-1)],
"g-", label="numpy ({:.3f} x + {:.3f})".format(s_np, i_np), alpha=0.5
)
plt.legend()
plt.show()
def hurst_mf_stock(debug=False):
"""
Recreates results from [mfs_1]_ (table at start of section 4) as print
output.
Unfortunately as a layman in finance, I could not determine the exact data
that Di Matteo et al. used. Instead I use the data from
`nolds.datasets.load_financial()`.
Plots H(2) for the following datasets and algorithms.
Datasets (opening values from `load_financial()`):
- jkse: Jakarta Composite Index
- n225: Nikkei 225
- ndx: NASDAQ 100
Algorithms:
- mfhurst_b: GHE according to Barabási et al.
- mfhurst_b + dt: like mfhurst_b, but with linear detrending performed first
- mfhurst_dm: GHE according to Di Matteo et al. (should be identical to
_genhurst)
- _genhurst: GHE according to translated MATLAB code by T. Aste (one of the
co-authors of Di Matteo).
References:
.. [mfs_1] T. Di Matteo, T. Aste, and M. M. Dacorogna, “Scaling behaviors
in differently developed markets,” Physica A: Statistical Mechanics
and its Applications, vol. 324, no. 1–2, pp. 183–188, 2003.
Kwargs:
debug (boolean):
if `True`, a debug plot will be shown for each calculated GHE value
except for the ones generated by `_genhurst`.
"""
print("Dataset mfhurst_b mfhurst_b + dt mfhurst_dm _genhurst")
financial = [
(datasets.jkse, "jkse"), (datasets.n225, "n225"), (datasets.ndx, "ndx")
]
for data, lab in financial:
data = data[1][:, 0]
data = np.log(data)
dists = range(1, 20)
mfh_b = nolds.mfhurst_b(data, qvals=[2], dists=dists, debug_plot=debug)[0]
mfh_b_dt = nolds.mfhurst_b(
nolds.detrend_data(data, order=1),
qvals=[2], dists=dists, debug_plot=debug
)[0]
mfh_dm = nolds.mfhurst_dm(data, qvals=[2], debug_plot=debug)[0][0]
gh = nolds._genhurst(data, 2)
print("{:10s} {:5.3f} {:5.3f} {:5.3f} {:5.3f}".format(lab, mfh_b, mfh_b_dt, mfh_dm, gh))
def barabasi_1991_figure2():
"""
Recreates figure 2 from [bf2]_.
This figure compares calculated and estimated values for H(q) for
a fractal generated by 9 iterations of the `barabasi1991_fractal` function
with b1 = 0.8 and b2 = 0.5.
References:
.. [bf2] A.-L. Barabási and T. Vicsek, “Multifractality of self-affine
fractals,” Physical Review A, vol. 44, no. 4, pp. 2730–2733, 1991.
"""
import matplotlib.pyplot as plt
b1991 = datasets.barabasi1991_fractal(10000000, 9)
qvals = range(1, 11)
qvals_t = range(-10, 11)
b1 = 0.8
b2 = 0.5
dists = [4 ** i for i in range(6, 11)]
# dists = nolds.logarithmic_n(100, 0.01 * len(b1991), 2)
Hq = nolds.mfhurst_b(b1991, qvals=qvals, dists=dists)
Hq_t = [np.log((b1 ** q + b2 ** q) / 2) / np.log(0.25) / q for q in qvals_t]
plt.plot(qvals, Hq, "r+", label="mfhurst_b")
plt.plot(qvals_t, Hq_t, label="calculated value")
plt.legend(loc="best")
plt.xlabel("q")
plt.ylabel("H(q)")
plt.show()
def barabasi_1991_figure3():
"""
Recreates figure 3 from [bf3]_.
This figure compares calculated and estimated values for H(q) for a simple
Brownian motion that moves in unit steps (-1 or +1) in each time step.
References:
.. [bf3] A.-L. Barabási and T. Vicsek, “Multifractality of self-affine
fractals,” Physical Review A, vol. 44, no. 4, pp. 2730–2733, 1991.
"""
import matplotlib.pyplot as plt
brown = np.cumsum(np.random.randint(0, 2, size=10000000)*2-1)
qvals = [-5, -4, -3, -2, -1.1, 0.1, 1, 2, 3, 4, 5]
Hq_t = [0.5 if q > -1 else -0.5/q for q in qvals]
dists = [2 ** i for i in range(6, 15)]
# dists = nolds.logarithmic_n(100, 0.01 * len(brown), 1.5)
Hq = nolds.mfhurst_b(brown, qvals=qvals, dists=dists, debug_plot=False)
plt.plot(qvals, Hq, "r+", label="mfhurst_b")
plt.plot(qvals, Hq_t, label="calculated value")
plt.ylim(0, 1)
plt.legend(loc="best")
plt.xlabel("q")
plt.ylabel("H(q)")
plt.show()
def lorenz():
"""
Calculates different measures for the Lorenz system of ordinary
differential equations and compares nolds results with prescribed
results from the literature.
The Lorenz system is a three dimensional dynamical system given
by the following equations:
dx/dt = sigma * (y - x)
dy/dt = rho * x - y - x * z
dz/dt = x * y - beta * z
To test the reconstruction of higher-dimensional phenomena from
one-dimensional data, the lorenz system is simulated with a
simple Euler method and then the x-, y-, and z-values are used
as one-dimensional input for the nolds algorithms.
Parameters for Lorenz system:
- sigma = 10
- rho = 28
- beta = 8/3
- dt = 0.012
Algorithms:
- ``lyap_r`` with min_tsep=1000, emb_dim=5, tau=0.01, and lag=5 (see [l_4]_)
- ``lyap_e`` with min_tsep=1000, emb_dim=5, matrix_dim=5, and tau=0.01 (see [l_4]_)
- ``corr_dim`` with emb_dim=10, and fit=poly (see [l_1]_)
- ``hurst_rs`` with fit=poly (see [l_3]_)
- ``dfa`` with default parameters (see [l_5]_)
- ``sampen`` with default parameters (see [l_2]_)
References:
.. [l_1] P. Grassberger and I. Procaccia, “Measuring the strangeness
of strange attractors,” Physica D: Nonlinear Phenomena, vol. 9,
no. 1, pp. 189–208, 1983.
.. [l_2] F. Kaffashi, R. Foglyano, C. G. Wilson, and K. A. Loparo,
“The effect of time delay on Approximate & Sample Entropy
calculations,” Physica D: Nonlinear Phenomena, vol. 237, no. 23,
pp. 3069–3074, 2008, doi: 10.1016/j.physd.2008.06.005.
.. [l_3] V. Suyal, A. Prasad, and H. P. Singh, “Nonlinear Time Series
Analysis of Sunspot Data,” Sol Phys, vol. 260, no. 2, pp. 441–449,
2009, doi: 10.1007/s11207-009-9467-x.
.. [l_4] G. A. Leonov and N. V. Kuznetsov, “On differences and
similarities in the analysis of Lorenz, Chen, and Lu systems,”
Applied Mathematics and Computation, vol. 256, pp. 334–343, 2015,
doi: 10.1016/j.amc.2014.12.132.
.. [l_5] S. Wallot, J. P. Irmer, M. Tschense, N. Kuznetsov, A. Højlund,
and M. Dietz, “A Multivariate Method for Dynamic System Analysis:
Multivariate Detrended Fluctuation Analysis Using Generalized Variance,”
Topics in Cognitive Science, p. tops.12688, Sep. 2023,
doi: 10.1111/tops.12688.
"""
import matplotlib.pyplot as plt
sigma = 10
rho = 28
beta = 8.0/3
start = [0, 22, 10]
n = 10000
skip = 10000
dt = 0.012
data = datasets.lorenz_euler(n + skip, sigma, rho, beta, start=start, dt=dt)[skip:]
# fig = plt.figure()
# ax = fig.add_subplot(111, projection="3d")
# ax.plot(data[:, 0], data[:, 1], data[:, 2])
# plt.show()
# plt.close(fig)
lyap_expected = datasets.lorenz_lyap(sigma, rho, beta)
# Rationale for argument values:
# start with medium settings for min_tsep and lag, span a large area with trajectory_len, set fit_offset to 0
# up the embedding dimension until you get a clear line in the debug plot
# adjust trajectory_len and fit_offset to split off only the linear part
# in general: the longer the linear part of the plot, the better
lyap_r_args = dict(min_tsep=10, emb_dim=5, tau=dt, lag=5, trajectory_len=28, fit_offset=8, fit="poly")
lyap_rx = nolds.lyap_r(data[:, 0], **lyap_r_args)
lyap_ry = nolds.lyap_r(data[:, 1], **lyap_r_args)
lyap_rz = nolds.lyap_r(data[:, 2], **lyap_r_args)
# Rationale for argument values:
# Start with emb_dim=matrix_dim, medium min_tsep and min_nb
# After that, no good guidelines for stability. :(
# -> Just experiment with settings until you get close to expected value. ¯\_(ツ)_/¯
# NOTE: It seems from this example and `lyapunov-logistic` that lyap_e has a scaling problem.
lyap_e_args = dict(min_tsep=10, emb_dim=5, matrix_dim=5, tau=dt, min_nb=8)
lyap_ex = nolds.lyap_e(data[:, 0], **lyap_e_args)
lyap_ey = nolds.lyap_e(data[:, 1], **lyap_e_args)
lyap_ez = nolds.lyap_e(data[:, 2], **lyap_e_args)
print("Expected Lyapunov exponent: ", lyap_expected)
print("lyap_r(x) : ", lyap_rx)
print("lyap_r(y) : ", lyap_ry)
print("lyap_r(z) : ", lyap_rz)
print("lyap_e(x) : ", lyap_ex)
print("lyap_e(y) : ", lyap_ey)
print("lyap_e(z) : ", lyap_ez)
print()
# Rationale for argument values:
# Start with moderate settings for lag and a large span of rvals.
# Increase emb_dim until you get a clear line in the debug plot
# Clip rvals to select only the linear part of the plot.
# Increase lag as long as it increases the output. Stop when the output becomes smaller
# (or when you feel that the lag is unreasonably large.)
rvals = nolds.logarithmic_r(1, np.e, 1.1) # determined experimentally
corr_dim_args = dict(emb_dim=5, lag=10, fit="poly", rvals=rvals)
cdx = nolds.corr_dim(data[:, 0], **corr_dim_args)
cdy = nolds.corr_dim(data[:, 1], **corr_dim_args)
cdz = nolds.corr_dim(data[:, 2], **corr_dim_args)
# reference Grassberger-Procaccia 1983
print("Expected correlation dimension: 2.05")
print("corr_dim(x) : ", cdx)
print("corr_dim(y) : ", cdy)
print("corr_dim(z) : ", cdz)
print()
# Rationale for argument values:
# Start with a large range of nvals.
# Reduce those down cutting of the first few data points and then only keep the
# linear-ish looking part of the initial rise.
hurst_rs_args = dict(fit="poly", nvals=nolds.logarithmic_n(10, 70, 1.1))
hx = nolds.hurst_rs(data[:, 0], **hurst_rs_args)
hy = nolds.hurst_rs(data[:, 1], **hurst_rs_args)
hz = nolds.hurst_rs(data[:, 2], **hurst_rs_args)
# reference: Suyal 2009
print("Expected hurst exponent: 0.64 < H < 0.93")
print("hurst_rs(x) : ", hx)
print("hurst_rs(y) : ", hy)
print("hurst_rs(z) : ", hz)
print()
# reference: Wallot 2023, Table 1
# Rationale for argument values: Just follow paper
# NOTE since DFA is quite fast and Wallot 2023 use different initial values
# (x = y = z = 0.1 + e) and size of data (100k data points, 1000 runs) and
# don't report step size, we use different data here
data_dfa = datasets.lorenz_euler(120000, 10, 28, 8/3.0, start=[0.1,0.1,0.1], dt=0.002)[20000:]
nvals = nolds.logarithmic_n(200, len(data_dfa)/8, 2**0.2)
dfa_args = dict(nvals=nvals, order=2, overlap=False, fit_exp="poly")
dx = nolds.dfa(data_dfa[:, 0], **dfa_args)
dy = nolds.dfa(data_dfa[:, 1], **dfa_args)
dz = nolds.dfa(data_dfa[:, 2], **dfa_args)
print("Expected hurst parameter: [1.008 ±0.016, 0.926 ±0.016, 0.650 ±0.22]")
print("dfa(x) : ", dx)
print("dfa(y) : ", dy)
print("dfa(z) : ", dz)
print()
# reference: Kaffashi 2008
# Rationale for argument values: Just follow paper.
sampen_args = dict(emb_dim=2, lag=1)
sx = nolds.sampen(data[:, 0], **sampen_args)
sy = nolds.sampen(data[:, 1], **sampen_args)
sz = nolds.sampen(data[:, 2], **sampen_args)
print("Expected sample entropy: [0.15, 0.15, 0.25]")
print("sampen(x): ", sx)
print("sampen(y): ", sy)
print("sampen(z): ", sz)
if __name__ == "__main__":
# run this with the following command:
# python -m nolds.examples lyapunov-logistic
import sys
def print_options():
print("options are:")
print(" lyapunov-logistic")
print(" lyapunov-tent")
print(" profiling")
print(" hurst-weron2")
print(" hurst-hist")
print(" hurst-nvals")
print(" sampen-tol")
print(" aste-line")
print(" hurst-mf-stock")
print(" lorenz")
if len(sys.argv) < 2:
print("please tell me which tests you want to run")
print_options()
elif sys.argv[1] == "lyapunov-logistic":
plot_lyap()
elif sys.argv[1] == "lyapunov-tent":
plot_lyap("tent")
elif sys.argv[1] == "profiling":
profiling()
elif sys.argv[1] == "hurst-weron2":
n = 1000 if len(sys.argv) < 3 else int(sys.argv[2])
weron_2002_figure2(n)
elif sys.argv[1] == "hurst-hist":
plot_hurst_hist()
elif sys.argv[1] == "hurst-nvals":
hurst_compare_nvals(datasets.brown72)
elif sys.argv[1] == "sampen-tol":
sampen_default_tolerance()
elif sys.argv[1] == "aste-line":
aste_line_fitting()
elif sys.argv[1] == "hurst-mf-stock":
hurst_mf_stock()
elif sys.argv[1] == "hurst-mf-barabasi2":
barabasi_1991_figure2()
elif sys.argv[1] == "hurst-mf-barabasi3":
barabasi_1991_figure3()
elif sys.argv[1] == "lorenz":
lorenz()
else:
print("i do not know any test of that name")
print_options()