diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index 392baebf..da54bca5 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -92,6 +92,7 @@ jobs: src/mdtools/statistics.py \ src/mdtools/structure.py \ src/mdtools/version.py \ + scripts/discretization/discrete_pos.py \ scripts/discretization/plot_*.py \ scripts/discretization/state_lifetime* \ scripts/dynamics/lifetime_autocorr* \ diff --git a/.vscode/settings.json b/.vscode/settings.json index 9e095781..96702638 100644 --- a/.vscode/settings.json +++ b/.vscode/settings.json @@ -6,6 +6,7 @@ "cSpell.ignoreWords": [ "Andreas", "Autoregressive", + "BINFILE", "BLANKLINE", "CBAR", "CMPS", @@ -18,6 +19,7 @@ "GROMACS", "GitHub", "GitLab", + "Golay", "Gromacs", "Kimms", "LZMA", @@ -31,6 +33,7 @@ "RMSD", "Rahman", "SLURM", + "Savitzky", "TOPFILE", "TRJFILE", "Thum", @@ -43,7 +46,9 @@ "arange", "arccos", "argmax", + "argmin", "argparse", + "argsort", "asarray", "ascontiguousarray", "astype", @@ -56,6 +61,7 @@ "autodoc", "autosummary", "autoupdate", + "axhline", "basex", "basey", "biggl", @@ -71,12 +77,15 @@ "cffconvert", "cmap", "cmps", + "colname", + "colnum", "colorama", "colorbar", "colorbars", "columnspacing", "commited", "confint", + "contig", "copyto", "cumav", "cumsum", @@ -85,14 +94,17 @@ "docstring", "docstrings", "doctest", + "doctests", "dtrj", "dtype", "edgecolor", "einsum", + "ekin", "endfit", "errorbar", "ezavod", "facecolor", + "flatnonzero", "fontsize", "fragindices", "frameon", @@ -118,6 +130,8 @@ "lbox", "linalg", "linestyle", + "linestyles", + "linewidth", "linspace", "loadtxt", "logx", @@ -154,7 +168,9 @@ "nblocks", "ncols", "ndarray", + "ndarrays", "ndim", + "ndimage", "noqa", "normaltest", "nosignatures", @@ -163,6 +179,7 @@ "offsetbox", "orcid", "outfiles", + "overfitting", "pcolormesh", "pcov", "pdftoppm", @@ -188,11 +205,14 @@ "resnames", "rfft", "rfftfreq", + "rmsd", "rtol", "savefig", "savetxt", + "savgol", "scipy", "sdist", + "searchsorted", "segids", "segindex", "segindices", @@ -210,12 +230,16 @@ "subsy", "taurenmd", "texlive", + "tfic", "titlesonly", + "tlic", "toarray", "toctree", "tqdm", "trapz", "tril", + "triu", + "trjfile", "ufunc", "unbuffered", "uninstallation", diff --git a/AUTHORS.rst b/AUTHORS.rst index 2bc22341..1a531d85 100644 --- a/AUTHORS.rst +++ b/AUTHORS.rst @@ -30,6 +30,10 @@ Upcoming release :data:`mdtools._metadata.__author__` and in the CITATION.cff file. * Andreas Thum +0.0.6.0 + + * Andreas Thum + 0.0.5.0 * Andreas Thum diff --git a/docs/requirements-docs.txt b/docs/requirements-docs.txt index a7360654..82e9703c 100644 --- a/docs/requirements-docs.txt +++ b/docs/requirements-docs.txt @@ -6,5 +6,5 @@ # on sphinx >=1.6, <6.0 # => The sphix requirement must not be >=6.0 -sphinx >=5.0, <7.0 +sphinx >=5.0, <8.0 sphinx-rtd-theme >=1.0, <2.0 diff --git a/docs/source/doc_pages/dev_guide/guides_and_good_practices.rst b/docs/source/doc_pages/dev_guide/guides_and_good_practices.rst index 8e1a5185..7990d9fd 100644 --- a/docs/source/doc_pages/dev_guide/guides_and_good_practices.rst +++ b/docs/source/doc_pages/dev_guide/guides_and_good_practices.rst @@ -162,7 +162,7 @@ already established for research data to research software: https://github.com/skills/resolve-merge-conflicts .. _Hello GitHub Actions: https://github.com/skills/hello-github-actions .. _Continuous Integration: - https://github.com/skills/continuous-integration + https://github.com/skills/test-with-actions .. _The Turing Way: https://the-turing-way.netlify.app/index.html .. _Open Science Training Handbook: https://open-science-training-handbook.gitbook.io/book/ diff --git a/scripts/discretization/discrete_pos.py b/scripts/discretization/discrete_pos.py index c643c6db..93b36877 100644 --- a/scripts/discretization/discrete_pos.py +++ b/scripts/discretization/discrete_pos.py @@ -1,8 +1,8 @@ #!/usr/bin/env python3 # This file is part of MDTools. -# Copyright (C) 2021 The MDTools Development Team and all contributors -# listed in the file AUTHORS.rst +# Copyright (C) 2021-2023 The MDTools Development Team and all +# contributors listed in the file AUTHORS.rst # # MDTools is free software: you can redistribute it and/or modify it # under the terms of the GNU General Public License as published by the @@ -19,7 +19,7 @@ r""" -Create a discrete posotion trajectory. +Create a discrete position trajectory. Discretize the positions of compounds of a MDAnalysis :class:`~MDAnalysis.core.groups.AtomGroup` in a given spatial @@ -30,88 +30,99 @@ * Allow to choose between center of mass and center of geometry (This feature has to be implemented in :func:`mdtools.structure.discrete_pos_trj`). - * Finish docstring. Options ------- --f Trajectory file. See |supported_coordinate_formats| of - MDAnalysis. --s Topology file. See |supported_topology_formats| of - MDAnalysis. --o Output filename for the discrete trajectory. The discrete - trajectory is written as binary :file:`dtrj.npy` file in a - a compressed |npz_archive| of the given filename. The - discrete trajectory is stored as :class:`numpy.ndarray` of - dtype :attr:`numpy.uint32` and shape ``(n, f)``, where ``n`` - is the number of reference compounds and ``f`` is the number - of frames. The elements of the discrete trajectory are the - states in which a given compound resides at a given frame. ---bins-out Output filename for the bin edges (optional). If provided, - the (average) bin edges used for creating the discrete - trajectory are written to a text file of the given filename. --b First frame to read from the trajectory. Frame numbering - starts at zero. Default: ``0``. --e Last frame to read from the trajectory. This is exclusive, - i.e. the last frame read is actually ``END - 1``. A value - of ``-1`` means to read the very last frame. Default: - ``-1``. ---every Read every n-th frame from the trajectory. Default: ``1``. ---sel Selection string to select a group of atoms for the analysis. - See MDAnalysis' |selection_syntax| for possible choices. ---cmp {'group', 'segments', 'residues', 'fragments', 'atoms'} +-f + Trajectory file. See |supported_coordinate_formats| of MDAnalysis. +-s + Topology file. See |supported_topology_formats| of MDAnalysis. +-o + Output filename for the discrete trajectory. The discrete + trajectory is written as binary :file:`dtrj.npy` file in a a + compressed |npz_archive| of the given filename. The discrete + trajectory is stored as :class:`numpy.ndarray` of dtype + :attr:`numpy.uint32` and shape ``(n, f)``, where ``n`` is the number + of reference compounds and ``f`` is the number of frames. The + elements of the discrete trajectory are the states in which a given + compound resides at a given frame. +--bins-out + Output filename for the bin edges (optional). If provided, the + (average) bin edges used for creating the discrete trajectory are + written to a text file of the given filename. +-b + First frame to read from the trajectory. Frame numbering starts at + zero. Default: ``0``. +-e + Last frame to read from the trajectory. This is exclusive, i.e. the + last frame read is actually ``END - 1``. A value of ``-1`` means to + read the very last frame. Default: ``-1``. +--every + Read every n-th frame from the trajectory. Default: ``1``. +-sel + Selection string to select a group of atoms for the analysis. See + MDAnalysis' |selection_syntax| for possible choices. +--cmp + {'group', 'segments', 'residues', 'fragments', 'atoms'} - The compounds of the selection group whose center of mass - positions should be discretized. Compounds can be 'group' - (the entire selection group), 'segments', 'residues', - 'fragments', or 'atoms'. Refer to the MDAnalysis' user - guide for an |explanation_of_these_terms|. Compounds are - made whole before calculating their centers of mass. The - centers of mass are wrapped back into the primary unit cell - before discretizing their positions. Note that in any case, - even if ``CMP`` is e.g. 'residues', only the atoms belonging - to the selection group are taken into account, even if the - compound might comprise additional atoms that are not - contained in the selection group. Default: ``'atoms'`` --d {'x', 'y', 'z'} + The compounds of the selection group whose center of mass positions + should be discretized. Compounds can be 'group' (the entire + selection group), 'segments', 'residues', 'fragments', or 'atoms'. + Refer to the MDAnalysis' user guide for an + |explanation_of_these_terms|. Compounds are made whole before + calculating their centers of mass. The centers of mass are wrapped + back into the primary unit cell before discretizing their positions. + Note that in any case, even if ``CMP`` is e.g. 'residues', only the + atoms belonging to the selection group are taken into account, even + if the compound might comprise additional atoms that are not + contained in the selection group. Default: ``'atoms'``. +-d {'x', 'y', 'z'} - Direction. The spatial direction in which to bin the - positions of the reference compounds. Default: ``'z'`` + Direction. The spatial direction in which to bin the positions of + the reference compounds. Default: ``'z'``. --bin-start - Point (in Angstrom) on the chosen spatial direction to start - binning. Note that binning naturally starts at zero (origin - of the simulation box). If parsing a start value greater - than zero, the first bin interval will be ``[0, START)``. - In this way you can determine the width of the first bin - independently from the other bins. Note that ``START`` must - lie within the simulation box obtained from the first frame - read and it must be smaller than ``STOP``. Default: ``0`` ---bin-end Point (in Angstrom) on the chosen spatial direction to stop - binning. Note that binning naturally ends at ``lbox + tol`` - (length of the simulation box in the given spatial direction - plus a small tolerance to account for the right-open bin - interval). If parsing a value less than ``lbox``, the last - bin interval will be ``[STOP, lbox+tol)``. In this way you - can determine the width of the last bin independently from - the other bins. Note that ``STOP`` must lie within the - simulation box obtained from the first frame read and it - must be greater than ``START``. Default: ``lbox + tol`` ---bin-num Number of equidistant bins (not bin edges!) to use for - discretizing the given spatial direction between ``START`` - and ``STOP``. Note that two additional bins, ``[0, START)`` - and ``[STOP, lbox+tol)``, are created if ``START`` is not - zero and ``STOP`` is not ``lbox``. Default: ``10`` ---bins Text file containing custom bin edges (in Angstrom). Bin - edges are read from the first column, characters following a - '#' are ignored. Bins do not need to be equidistant. All - bin edges must lie within the simulation box as obtained - from the first frame read. If \--bins is given, it takes - precedence over all other \--bin* flags. ---debug Run in :ref:`debug mode `. + Point (in Angstrom) on the chosen spatial direction to start + binning. Note that binning naturally starts at zero (origin of the + simulation box). If parsing a start value greater than zero, the + first bin interval will be ``[0, START)``. In this way you can + determine the width of the first bin independently from the other + bins. Note that ``START`` must lie within the simulation box + obtained from the first frame read and it must be smaller than + ``STOP``. Default: ``0`` +--bin-end + Point (in Angstrom) on the chosen spatial direction to stop binning. + Note that binning naturally ends at ``lbox + tol`` (length of the + simulation box in the given spatial direction plus a small tolerance + to account for the right-open bin interval). If parsing a value + less than ``lbox``, the last bin interval will be ``[STOP, + lbox+tol)``. In this way you can determine the width of the last + bin independently from the other bins. Note that ``STOP`` must lie + within the simulation box obtained from the first frame read and it + must be greater than ``START``. Default: ``lbox + tol``. +--bin-num + Number of equidistant bins (not bin edges!) to use for discretizing + the given spatial direction between ``START`` and ``STOP``. Note + that two additional bins, ``[0, START)`` and ``[STOP, lbox+tol)``, + are created if ``START`` is not zero and ``STOP`` is not ``lbox``. + Default: ``10``. +--bins + Text file containing custom bin edges (in Angstrom). Bin edges are + read from the first column, characters following a '#' are ignored. + Bins do not need to be equidistant. All bin edges must lie within + the simulation box as obtained from the first frame read. The given + bin edges are sorted in ascending order and and duplicate bin edges + are removed. If \--bins is given, it takes precedence over all + other \--bin* flags. +--tol + The tolerance value added to ``lbox`` to account for the right-open + bin interval of the last bin. Default: ``1e-6``. +--debug + Run in :ref:`debug mode `. See Also -------- :func:`mdtools.structure.discrete_pos_trj` : - Function that creates a discrete posotion trajectory + Function that creates a discrete position trajectory Notes ----- @@ -119,13 +130,9 @@ writes its output to disk. The **simulation box must be orthogonal**, otherwise the discretization -of the center of mass positions of the reference compounds does not work. -For more details about the discretization see the Notes section of -:func:`mdtools.structure.discrete_pos_trj`. - -Examples --------- -TODO +of the center of mass positions of the reference compounds does not +work. For more details about the discretization see the Notes section +of :func:`mdtools.structure.discrete_pos_trj`. """ @@ -133,155 +140,182 @@ # Standard libraries -import sys +import argparse import os +import sys from datetime import datetime, timedelta -# Third party libraries -import psutil -import argparse + +# Third-party libraries import numpy as np -# Local application/library specific imports +import psutil + +# First-party libraries import mdtools as mdt if __name__ == "__main__": timer_tot = datetime.now() proc = psutil.Process() - proc.cpu_percent() # Initiate monitoring of CPU usage + proc.cpu_percent() # Initiate monitoring of CPU usage. parser = argparse.ArgumentParser( description=( - "Create a discrete posotion trajectory. For more" - " information, refer to the documetation of this script." + "Create a discrete position trajectory. For more information," + " refer to the documentation of this script." ) ) parser.add_argument( - '-f', - dest='TRJFILE', + "-f", + dest="TRJFILE", type=str, required=True, - help="Trajectory file." + help="Trajectory file.", ) parser.add_argument( - '-s', - dest='TOPFILE', + "-s", + dest="TOPFILE", type=str, required=True, - help="Topology file." + help="Topology file.", ) parser.add_argument( - '-o', - dest='OUTFILE', + "-o", + dest="OUTFILE", type=str, required=True, - help="Output filename for the discrete trajectory." + help="Output filename for the discrete trajectory.", ) parser.add_argument( - '--bins-out', - dest='OUTFILE_BINS', + "--bins-out", + dest="OUTFILE_BINS", type=str, required=False, default=None, - help="Output filename for the bin edges (optional)." + help="Output filename for the bin edges (optional).", ) parser.add_argument( - '-b', - dest='BEGIN', + "-b", + dest="BEGIN", type=int, required=False, default=0, - help="First frame to read from the trajectory. Frame numbering" - " starts at zero. Default: %(default)s." + help=( + "First frame to read from the trajectory. Frame numbering starts" + " at zero. Default: %(default)s." + ), ) parser.add_argument( - '-e', - dest='END', + "-e", + dest="END", type=int, required=False, default=-1, - help="Last frame to read from the trajectory (exclusive)." - " Default: %(default)s." + help=( + "Last frame to read from the trajectory (exclusive). Default:" + " %(default)s." + ), ) parser.add_argument( - '--every', - dest='EVERY', + "--every", + dest="EVERY", type=int, required=False, default=1, - help="Read every n-th frame from the trajectory. Default:" - " %(default)s." + help=( + "Read every n-th frame from the trajectory. Default: %(default)s." + ), ) parser.add_argument( - '--sel', - dest='SEL', + "--sel", + dest="SEL", type=str, - nargs='+', + nargs="+", required=True, - help="Selection string." + help="Selection string.", ) parser.add_argument( - '--cmp', - dest='CMP', + "--cmp", + dest="CMP", type=str, required=False, - choices=('group', 'segments', 'residues', 'fragments', 'atoms'), - default='atoms', - help="The compounds of the selection group whose center of mass" - " positions should be discretized. Default: %(default)s" + choices=("group", "segments", "residues", "fragments", "atoms"), + default="atoms", + help=( + "The compounds of the selection group whose center of mass" + " positions should be discretized. Default: %(default)s." + ), ) parser.add_argument( - '-d', - dest='DIRECTION', + "-d", + dest="DIRECTION", type=str, required=False, - choices=('x', 'y', 'z'), - default='z', - help="Direction for binning. Default: %(default)s" + choices=("x", "y", "z"), + default="z", + help="Direction for binning. Default: %(default)s.", ) parser.add_argument( - '--bin-start', - dest='START', + "--bin-start", + dest="START", type=float, required=False, default=0, - help="Point (in Angstrom) on the chosen spatial direction to" - " start binning. Default: %(default)s" + help=( + "Point (in Angstrom) on the chosen spatial direction to start" + " binning. Default: %(default)s." + ), ) parser.add_argument( - '--bin-end', - dest='STOP', + "--bin-end", + dest="STOP", type=float, required=False, default=None, - help="Point (in Angstrom) on the chosen spatial direction to" - " stop binning. Default: lbox+tol" + help=( + "Point (in Angstrom) on the chosen spatial direction to stop" + " binning. Default: lbox + tol." + ), ) parser.add_argument( - '--bin-num', - dest='NUM', + "--bin-num", + dest="NUM", type=int, required=False, default=10, - help="Number of equidistant bins (not bin edges!) to use for" - " discretizing the given spatial direction between START" - " and STOP. Default: %(default)s" + help=( + "Number of equidistant bins (not bin edges!) to use for" + " discretizing the given spatial direction between START and STOP." + " Default: %(default)s." + ), ) parser.add_argument( - '--bins', - dest='BINFILE', + "--bins", + dest="BINFILE", type=str, required=False, default=None, - help="Text file containing custom bin edges (in Angstrom). If" - " --bins is given, it takes precedence over all other" - " --bin* flags." + help=( + "Text file containing custom bin edges (in Angstrom). If --bins" + " is given, it takes precedence over all other --bin* flags." + ), ) parser.add_argument( - '--debug', - dest='DEBUG', + "--tol", + dest="TOL", + type=float, + required=False, + default=1e-6, + help=( + "The tolerance value added to lbox to account for the right-open" + " bin interval of the last bin. Default: %(default)s." + ), + ) + parser.add_argument( + "--debug", + dest="DEBUG", required=False, default=False, - action='store_true', - help="Run in debug mode." + action="store_true", + help="Run in debug mode.", ) args = parser.parse_args() print(mdt.rti.run_time_info_str()) @@ -289,10 +323,12 @@ bins = None else: bins = np.loadtxt(args.BINFILE, usecols=0) + if args.TOL < 0: + raise ValueError("--tol ({}) must not be negative.".format(args.TOL)) print("\n") dtrj, bins, lbox_av, time_step = mdt.strc.discrete_pos_trj( - sel=' '.join(args.SEL), + sel=" ".join(args.SEL), topfile=args.TOPFILE, trjfile=args.TRJFILE, begin=args.BEGIN, @@ -304,12 +340,13 @@ bin_stop=args.STOP, bin_num=args.NUM, bins=bins, + tol=args.TOL, return_bins=True, return_lbox=True, return_dt=True, dtype=np.uint32, verbose=True, - debug=args.DEBUG + debug=args.DEBUG, ) print("\n") @@ -320,28 +357,25 @@ print("Created {}".format(args.OUTFILE)) # Bin edges if args.OUTFILE_BINS is not None: - header = ("Bin edges in Angstrom\n" - "Number of bin edges: {:8d}".format(N_CMPS)) + print("Total number of frames: {:>8d}".format(N_FRAMES_TOT)) + print("Frames to read: {:>8d}".format(N_FRAMES)) + print("First frame to read: {:>8d}".format(BEGIN)) + print("Last frame to read: {:>8d}".format(END - 1)) + print("Read every n-th frame: {:>8d}".format(EVERY)) + print("Number of blocks: {:>8d}".format(NBLOCKS)) + print("Frames per block: {:>8d}".format(blocksize)) timer = datetime.now() timer_block = datetime.now() prob = np.full((NBLOCKS, blocksize), np.nan, dtype=float) diff --git a/scripts/discretization/state_lifetime_discrete.py b/scripts/discretization/state_lifetime_discrete.py index b03fba7d..3a7c1a91 100644 --- a/scripts/discretization/state_lifetime_discrete.py +++ b/scripts/discretization/state_lifetime_discrete.py @@ -335,6 +335,12 @@ print("\n") print("Calculating remain probability...") + print("Number of compounds: {:>8d}".format(N_CMPS)) + print("Total number of frames: {:>8d}".format(N_FRAMES_TOT)) + print("Frames to read: {:>8d}".format(N_FRAMES)) + print("First frame to read: {:>8d}".format(BEGIN)) + print("Last frame to read: {:>8d}".format(END - 1)) + print("Read every n-th frame: {:>8d}".format(EVERY)) timer = datetime.now() prob = mdt.dtrj.remain_prob_discrete( dtrj1=dtrj1, diff --git a/scripts/other/attribute_hist.py b/scripts/other/attribute_hist.py index 956bf41d..098d6c5a 100644 --- a/scripts/other/attribute_hist.py +++ b/scripts/other/attribute_hist.py @@ -168,7 +168,6 @@ def _fit_gaussian(xdata, ydata, p0=None): p0=p0, bounds=[(-np.inf, 0), (np.inf, np.inf)], ) - perr = np.sqrt(np.diag(pcov)) except (ValueError, RuntimeError) as err: print("An error has occurred during fitting:") print("{}".format(err), flush=True) @@ -177,20 +176,46 @@ def _fit_gaussian(xdata, ydata, p0=None): popt = np.full(2, np.nan) perr = np.full_like(popt, np.nan) else: + perr = np.sqrt(np.diag(pcov)) fit = mdt.stats.gaussian(xdata, *popt) return fit, popt, perr -def _fit_mb(xdata, ydata): - """Fit the given data by a Maxwell-Boltzmann speed distribution.""" +def _fit_mb(xdata, ydata, temp=None, mass=None): + r""" + Fit the given data by a Maxwell-Boltzmann speed distribution. + + Parameters + ---------- + xdata, ydata : array_like + The data to fit. + temp : float, optional + Temperature of the system in Kelvin. Used for an initial guess + of :math:`\sigma^2 = kT/m`. + mass : float, optional + Mass of the compound in kilograms. Used for an initial guess of + :math:`\sigma^2 = kT/m`. + + Returns + ------- + fit : numpy.ndarray + The y values of the fit. + popt : numpy.ndarray + The fit parameters. + perr : numpy.ndarray + The standard deviation of the fit parameters. + """ func = lambda v, var, drift: mdt.stats.mb_dist( # noqa: E731 v=v, var=var, drift=drift ) + if temp is None: + temp = 273.15 + 25.0 # Default temperature in K. + if mass is None: + mass = 12 * constants.atomic_mass # Default mass in kg. + # Initial guess for `var`: var = kT/m. The factor 1e-4 comes from + # the conversion of (m/s)^2 to (A/ps)^2. + var_guess = 1e-4 * constants.k * temp / mass try: - # Initial guess for `var` is kT/m with T = 273 K and m = 12 u. - # The factor 1e-4 comes from the conversion of (m/s)^2 to - # (A/ps)^2. - var_guess = constants.k * 273 / (12 * constants.atomic_mass) * 1e-4 popt, pcov = optimize.curve_fit( func, xdata=xdata, @@ -198,7 +223,6 @@ def _fit_mb(xdata, ydata): p0=(var_guess, 0), bounds=[(0, 0), (np.inf, np.inf)], ) - perr = np.sqrt(np.diag(pcov)) except (ValueError, RuntimeError) as err: print("An error has occurred during fitting:") print("{}".format(err), flush=True) @@ -207,6 +231,7 @@ def _fit_mb(xdata, ydata): popt = np.full(2, np.nan) perr = np.full_like(popt, np.nan) else: + perr = np.sqrt(np.diag(pcov)) fit = func(xdata, *popt) return fit, popt, perr @@ -672,19 +697,20 @@ def _fit_mb(xdata, ydata): + "{:<14s} {:>50.9e}\n".format("Fit param StD:", perr[1]) ) elif i == N_HISTS - 1 and args.ATTR == "velocities": - # Fit histogram of the Euclidean norm by a Maxwell-Boltzmann - # speed distribution. - # `bin_mids` is given in [A/ps]. - fit, popt, perr = _fit_mb(xdata=bin_mids, ydata=hist_normed) - data = np.column_stack([data, fit]) - aps2ms = 1e2 # Conversion factor [A/ps] -> [m/s]. - ms2aps = 1 / aps2ms # Conversion factor [m/s] -> [A/ps]. - sigma2_ms = popt[0] * aps2ms**2 # sigma^2 in [(m/s)^2]. mass = mdt.strc.cmp_attr( sel, cmp=args.CMP, attr="masses", weights="total" ) mass = np.nanmean(mass) # Mass in [u]. mass_kg = mass * constants.atomic_mass # Mass in [kg]. + # Fit histogram of the Euclidean norm by a Maxwell-Boltzmann + # speed distribution. `bin_mids` is given in [A/ps]. + fit, popt, perr = _fit_mb( + xdata=bin_mids, ydata=hist_normed, mass=mass_kg + ) + data = np.column_stack([data, fit]) + aps2ms = 1e2 # Conversion factor [A/ps] -> [m/s]. + ms2aps = 1 / aps2ms # Conversion factor [m/s] -> [A/ps]. + sigma2_ms = popt[0] * aps2ms**2 # sigma^2 in [(m/s)^2]. temp = mass_kg * sigma2_ms / constants.k # Temperature in [K]. v_p = np.sqrt(2 * constants.k * temp / mass_kg) v_p *= ms2aps # Most probable speed in [A/ps]. diff --git a/src/mdtools/check.py b/src/mdtools/check.py index 0c127b08..1c5281ad 100644 --- a/src/mdtools/check.py +++ b/src/mdtools/check.py @@ -18,7 +18,7 @@ """ Functions for debugging and checking user input or arguments parsed to -other functions, clases, etc. +other functions, classes, etc. """ @@ -99,7 +99,7 @@ def array( Or if the given combination of `shape`, `dim` and `axis` is invalid; `amax` is less than `amin`. - :exc:`numpy.AxisError` + :exc:`numpy.exceptions.AxisError` If `axis` is not ``None`` and the axis is out of bounds for `a`. TypeError If `dtype` is not ``None`` and the data type of `a` is not @@ -644,9 +644,9 @@ def list_of_cms( :class:`NumPy arrays ` and :mod:`SciPy sparse matrices ` is not permitted. shape : tuple, optional - The shape expected for the arrays in `cms`. Default is ``None``, - which means that it is only checked whether all arrays in `cms` - have the same shape, but not what shape that is. + The shape expected for the arrays in `cms`. Default is + ``None``, which means that it is only checked whether all arrays + in `cms` have the same shape, but not what shape that is. dim : int, optional The dimension expected for the arrays in `cms`. If ``None``, the dimension is not checked. @@ -674,7 +674,8 @@ def list_of_cms( `dim` is not ``None`` and the dimension of the arrays in `cms` is not `dim`; The arrays in `cms` contain elements that are less than `amin`; - The arrays in `cms` contain elements that are greater than `amax`. + The arrays in `cms` contain elements that are greater than + `amax`. Or if the given combination of `shape` and `dim` is not satisfiable by @@ -737,8 +738,16 @@ def list_of_cms( return cms -def bins(start, stop, step=None, num=None, amin=0, amax=None, - precision=9, verbose=True): +def bins( # noqa: C901 + start, + stop, + step=None, + num=None, + amin=0, + amax=None, + precision=9, + verbose=True, +): """ Check if start point, end point and step width or number of bins are chosen properly for creating bin edges. @@ -815,11 +824,14 @@ def bins(start, stop, step=None, num=None, amin=0, amax=None, """ # Check input parameters: if step is None and num is None: - raise ValueError("Either 'step' ({}) or 'num' ({}) or both of" - " them must be given".format(step, num)) + raise ValueError( + "Either `step` ({}) or `num` ({}) or both of them must be" + " given".format(step, num) + ) if amin is not None and amax is not None and amax <= amin: - raise ValueError("amax ({}) must be greater than amin ({})" - .format(amax, amin)) + raise ValueError( + "`amax` ({}) must be greater than `amin` ({})".format(amax, amin) + ) # Setting precision for floating point comparison: digits = int(max(len(str(int(start))), len(str(int(stop))))) if step is not None: @@ -849,16 +861,15 @@ def bins(start, stop, step=None, num=None, amin=0, amax=None, if num is not None and int(num) != num: num = int(num) if verbose: - print("'mdtools.check.bins()' set 'num' to {}".format(num)) + print("`mdtools.check.bins()` set `num` to {}".format(num)) if amin is not None and start < amin: start = amin if verbose: - print("'mdtools.check.bins()' set 'start' to {}" - .format(start)) + print("`mdtools.check.bins()` set `start` to {}".format(start)) if amax is not None and stop > amax: stop = amax if verbose: - print("'mdtools.check.bins()' set 'stop' to {}".format(stop)) + print("`mdtools.check.bins()` set `stop` to {}".format(stop)) if stop <= start: if amax is not None and step is not None: if start + step <= amax: @@ -870,25 +881,27 @@ def bins(start, stop, step=None, num=None, amin=0, amax=None, elif amax is None and step is not None: stop = start + step elif amax is None and step is None: - raise ValueError("stop ({}) is equal to or less than start" - " ({})".format(stop, start)) + raise ValueError( + "`stop` ({}) is equal to or less than `start`" + " ({})".format(stop, start) + ) if verbose: - print("'mdtools.check.bins()' set 'stop' to {}".format(stop)) - if step is not None and step > stop - start or step <= 0: + print("`mdtools.check.bins()` set `stop` to {}".format(stop)) + if step is not None and (step > stop - start or step <= 0): if num is not None: step = (stop - start) / num else: step = stop - start if verbose: - print("'mdtools.check.bins()' set 'step' to {}".format(step)) + print("`mdtools.check.bins()` set `step` to {}".format(step)) elif step is None: step = (stop - start) / num if verbose: - print("'mdtools.check.bins()' set 'step' to {}".format(step)) + print("`mdtools.check.bins()` set `step` to {}".format(step)) if num is None or num != (stop - start) / step: num = round((stop - start) / step) if verbose: - print("'mdtools.check.bins()' set 'num' to {}".format(num)) + print("`mdtools.check.bins()` set `num` to {}".format(num)) return float(start), float(stop), float(step), int(num) @@ -990,8 +1003,8 @@ def bin_edges(bins, amin=0, amax=1, right=False, tol=1e-6, verbose=True): Parameters ---------- bins : array_like - Array containing the bin edges. The bin edges are sorted and - dublicates are removed. + Array containing the bin edges. The bin edges are sorted in + ascending order and duplicates are removed. amin : scalar, optional A minimum value that the first bin edge must not undermine. amax : scalar, optional @@ -1031,8 +1044,10 @@ def bin_edges(bins, amin=0, amax=1, right=False, tol=1e-6, verbose=True): ------ ValueError If ``len(bins)`` is zero; - ``bins[0]`` is less than ``amin - tol``; - ``bins[-1]`` is greater than ``amax + tol``. + ``bins[0]`` is less than `amin` or ``amin - tol`` (if `right` is + ``False`` or ``True``); + ``bins[-1]`` is greater than ``amax + tol`` or `amax` (if + `right` is ``False`` or ``True``). See Also -------- @@ -1055,8 +1070,9 @@ def bin_edges(bins, amin=0, amax=1, right=False, tol=1e-6, verbose=True): if len(bins) == 0: raise ValueError("The number of bin edges is zero") if amax <= amin: - raise ValueError("'amax' ({}) must be greater than 'amin' ({})" - .format(amax, amin)) + raise ValueError( + "`amax` ({}) must be greater than `amin` ({})".format(amax, amin) + ) if right: first_bin_edge = amin - tol last_bin_edge = amax @@ -1066,30 +1082,40 @@ def bin_edges(bins, amin=0, amax=1, right=False, tol=1e-6, verbose=True): if np.isclose(bins[0], amin, rtol=0, atol=tol): bins[0] = first_bin_edge if verbose: - print("'mdtools.check.bin_edges()' set 'bins[0]' to {}" - .format(bins[0])) + print( + "`mdtools.check.bin_edges()` set `bins[0]` to" + " {}".format(bins[0]) + ) elif bins[0] > amin: bins = np.insert(bins, 0, first_bin_edge) if verbose: - print("'mdtools.check.bin_edges()' prepended 'bins' with {}" - .format(bins[0])) + print( + "`mdtools.check.bin_edges()` prepended `bins` with" + " {}".format(bins[0]) + ) elif bins[0] < amin: - raise ValueError("The first bin edge ({}) must not be less than" - " 'amin-tol' ({})".format(bins[0], amin - tol)) + raise ValueError( + "The first bin edge ({}) must not be less than `amin - tol`" + " ({})".format(bins[0], amin - tol) + ) if np.isclose(bins[-1], amax, rtol=0, atol=tol): bins[-1] = last_bin_edge if verbose: - print("'mdtools.check.bin_edges()' set 'bins[-1]' to {}" - .format(bins[-1])) + print( + "`mdtools.check.bin_edges()` set `bins[-1]` to" + " {}".format(bins[-1]) + ) elif bins[-1] < amax: bins = np.append(bins, last_bin_edge) if verbose: - print("'mdtools.check.bin_edges()' appended 'bins' with {}" - .format(bins[-1])) + print( + "`mdtools.check.bin_edges()` appended `bins` with" + " {}".format(bins[-1]) + ) elif bins[-1] > amax: - raise ValueError("The last bin edge ({}) must not be greater" - " than 'amax+tol' ({})" - .format(bins[-1], amax + tol)) + raise ValueError( + "The last bin edge ({}) must not be greater than `amax + tol`" + " ({})".format(bins[-1], amax + tol)) return bins @@ -1097,7 +1123,7 @@ def frame_slicing(start, stop, step, n_frames_tot=None, verbose=True): """ Check if the input parameters are suitable for slicing |MDA_trjs|. - Bassically, the same rules as for `slicing numpy arrays`_ apply with + Basically, the same rules as for `slicing numpy arrays`_ apply with the following limitations: * `start` must be positive or zero, but smaller than `stop`. @@ -1211,7 +1237,8 @@ def frame_slicing(start, stop, step, n_frames_tot=None, verbose=True): def block_averaging(n_blocks, n_frames, check_CPUs=False, verbose=True): """ - Check if the number of blocks for block averaging is chosen properly. + Check if the number of blocks for block averaging is chosen + properly. The number of blocks must be greater than zero, but less than the number of available frames. @@ -1248,7 +1275,8 @@ def block_averaging(n_blocks, n_frames, check_CPUs=False, verbose=True): See Also -------- :func:`mdtools.check.frame_slicing` : - Check if the input parameters are suitable for slicing |MDA_trjs| + Check if the input parameters are suitable for slicing + |MDA_trjs| :func:`mdtools.check.frame_lag` : Check if a frame lag ('lag time') is chosen properly :func:`mdtools.check.time_step` : @@ -1298,7 +1326,7 @@ def restarts( .. deprecated:: 0.0.dev0 :func:`mdtools.check.restarts` might be removed in a future - version due to dublicate functionality. Use + version due to duplicate functionality. Use :func:`mdtools.check.frame_lag` instead. Different restarting points are usually used when calculating time @@ -1337,7 +1365,8 @@ def restarts( :func:`mdtools.check.frame_lag` : Check if a frame lag ('lag time') is chosen properly :func:`mdtools.check.frame_slicing` : - Check if the input parameters are suitable for slicing |MDA_trjs| + Check if the input parameters are suitable for slicing + |MDA_trjs| :func:`mdtools.check.block_averaging` : Check if the number of blocks for block averaging is chosen properly @@ -1420,7 +1449,8 @@ def frame_lag( See Also -------- :func:`mdtools.check.frame_slicing` : - Check if the input parameters are suitable for slicing |MDA_trjs| + Check if the input parameters are suitable for slicing + |MDA_trjs| :func:`mdtools.check.block_averaging` : Check if the number of blocks for block averaging is chosen properly @@ -1474,7 +1504,8 @@ def time_step(trj, verbose=True): Parameters ---------- - trj : MDAnalysis.coordinates.base.ReaderBase or MDAnalysis.coordinates.base.FrameIteratorBase + trj : MDAnalysis.coordinates.base.ReaderBase or \ + MDAnalysis.coordinates.base.FrameIteratorBase The |MDA_trj| to check. verbose : bool, optional If ``True``, print progress information to standard output. @@ -1516,7 +1547,8 @@ def masses(ag, flash_test=True): masses before calculating mass dependent quantities like the center of mass. - .. _MDAnalysis always guesses atom masses: https://userguide.mdanalysis.org/formats/guessing.html + .. _MDAnalysis always guesses atom masses: + https://userguide.mdanalysis.org/formats/guessing.html Parameters ---------- @@ -1582,7 +1614,8 @@ def masses_new(ag, verbose=False): of masses before calculating mass dependent quantities like the center of mass. - .. _MDAnalysis always guesses atom masses: https://userguide.mdanalysis.org/formats/guessing.html + .. _MDAnalysis always guesses atom masses: + https://userguide.mdanalysis.org/formats/guessing.html Parameters ---------- diff --git a/src/mdtools/dtrj.py b/src/mdtools/dtrj.py index 2476d8f2..81543ad9 100644 --- a/src/mdtools/dtrj.py +++ b/src/mdtools/dtrj.py @@ -1166,7 +1166,10 @@ def remain_prob( # noqa: C901 restarts = (t0 for t0 in range(0, n_frames - 1, restart)) if verbose: proc = psutil.Process() - restarts = mdt.rti.ProgressBar(restarts, total=n_frames - 2) + n_restarts = int(np.ceil(n_frames / restart)) + restarts = mdt.rti.ProgressBar( + restarts, total=n_restarts, unit="restarts" + ) for t0 in restarts: # When trying to understand the following code, read the "else" # parts fist. Those are the simpler cases upon which the other @@ -1532,7 +1535,10 @@ def remain_prob_discrete( # noqa: C901 restarts = (t0 for t0 in range(0, n_frames - 1, restart)) if verbose: proc = psutil.Process() - restarts = mdt.rti.ProgressBar(restarts, total=n_frames - 2) + n_restarts = int(np.ceil(n_frames / restart)) + restarts = mdt.rti.ProgressBar( + restarts, total=n_restarts, unit="restarts" + ) for t0 in restarts: # When trying to understand the following code, read the "else" # parts fist. Those are the simpler cases upon which the other diff --git a/src/mdtools/file_handler.py b/src/mdtools/file_handler.py index c0b9cb15..4540e36a 100644 --- a/src/mdtools/file_handler.py +++ b/src/mdtools/file_handler.py @@ -715,7 +715,7 @@ def write_matrix_block( outfile.write("\n") -def save_dtrj(fname, dtrj): +def save_dtrj(fname, dtrj, rename=True): """ Save a discrete trajectory to file. @@ -739,6 +739,10 @@ def save_dtrj(fname, dtrj): given in :func:`mdtools.check.dtrj`. Note that the shape of `dtrj` is expanded to ``(1, f)`` if it is only of shape ``(f,)``. + rename : bool, optional + If ``True`` and a file called `fname` already exists, rename it + to ``'fname.bak_timestamp'``. See + :func:`mdtools.file_handler.backup` for more details. See Also -------- @@ -763,6 +767,8 @@ def save_dtrj(fname, dtrj): "{}".format(err), UserWarning, ) + if rename: + mdt.fh.backup(fname) with mdt.fh.xopen(fname, "wb") as fh: np.savez_compressed(fh, dtrj=dtrj) diff --git a/src/mdtools/numpy_helper_functions.py b/src/mdtools/numpy_helper_functions.py index b990e0ad..75f6d5d2 100644 --- a/src/mdtools/numpy_helper_functions.py +++ b/src/mdtools/numpy_helper_functions.py @@ -23,7 +23,7 @@ This module can be called from :mod:`mdtools` via the shortcut ``nph``:: import mdtools as mdt - mdt.nph # insetad of mdt.numpy_helper_functions + mdt.nph # instead of mdt.numpy_helper_functions .. todo:: @@ -340,7 +340,7 @@ def take(a, start=None, stop=None, step=None, axis=None): def ix_along_axis_to_global_ix(ix, axis): """ - Construnct a tuple of index arrays suitable to directly index an + Construct a tuple of index arrays suitable to directly index an array `a` from an array of indices along an axis of `a`. Parameters @@ -378,7 +378,7 @@ def ix_along_axis_to_global_ix(ix, axis): array can generally **not** be used to get the minimum or maximum values of `a` by simply doing ``a[ix]``. Rather, you have to do ``np.squeeze(numpy.take_along_axis(a, numpy.expand_dims(ix, axis=axis), axis=axis))`` - (in this expample, you can of course also use :func:`numpy.amin` and + (in this example, you can of course also use :func:`numpy.amin` and :func:`numpy.amax`). But sometimes, the indices themselves, which would make ``a[ix]`` possible, are required. :func:`numpy.take_along_axis` and :func:`numpy.expand_dims` do not @@ -945,7 +945,7 @@ def ix_of_item_change(a, axis=-1, wrap=False, prepend_zero=False): periodic boundary conditions. Consequently, if the first and the last element of `a` do not match, this is interpreted as item change and the position (index) of this item change is - regared as zero. + regarded as zero. prepend_zero : bool, optional If ``True``, regard the first element of `a` along the given axis as item change and prepend a ``0`` to the returned array of @@ -1024,7 +1024,7 @@ def ix_of_item_change(a, axis=-1, wrap=False, prepend_zero=False): >>> mdt.nph.ix_of_item_change(d, axis=ax, prepend_zero=True) (array([0, 0, 1, 1, 1, 2, 2, 2]), array([0, 1, 0, 1, 2, 0, 2, 3])) - 3-dimensional expample: + 3-dimensional example: >>> e = np.array([[[1, 2, 2], ... [2, 2, 1]], @@ -1134,7 +1134,7 @@ def subtract_mic(x1, x2, amin=None, amax=None, **kwargs): respectively. If `amin` and/or `amax` is an array, they must be broadcastable to a common shape and the difference ``amax - amin`` must be broadcastable to the shape of - ``npumpy.subtract(x1, x2, **kwargs)``. `amin` must be smaller + ``numpy.subtract(x1, x2, **kwargs)``. `amin` must be smaller than `amax`. If the difference between the `x1` and `x2` is larger than ``0.5 * (amax - amin)``, it is wrapped back to lie within that range (see Notes section). @@ -1169,8 +1169,8 @@ def subtract_mic(x1, x2, amin=None, amax=None, **kwargs): function always has dtype ``numpy.float64``. If you want to calculate distance vectors between particle - positions, use :func:`mdtools.box.vdist`, because this funtion - interpretes position and box arrays correctly. + positions, use :func:`mdtools.box.vdist`, because this function + interprets position and box arrays correctly. References ---------- @@ -1367,7 +1367,7 @@ def diff_mic(a, amin=None, amax=None, **kwargs): respectively. If `amin` and/or `amax` is an array, they must be broadcastable to a common shape and the difference ``amax - amin`` must be broadcastable to the shape of - ``npumpy.diff(a, **kwargs)``. `amin` must be smaller than + ``numpy.diff(a, **kwargs)``. `amin` must be smaller than `amax`. If the difference between two elements in `a` is larger than ``0.5 * (amax - amin)``, it is wrapped back to lie within that range (see Notes section). @@ -1562,7 +1562,7 @@ def locate_item_change( # noqa: C901 Array for which to get the positions where its elements change. axis : int, optional The axis along which to search for changing elements. By - default, the search is perfomed along the last axis. + default, the search is performed along the last axis. pin : {"after", "before", "both"} Whether to return the last position ``"before"`` an item change or the first position ``"after"`` an item change. If set to @@ -1597,7 +1597,7 @@ def locate_item_change( # noqa: C901 the last element of `a` do not match, this is interpreted as item change. In this case, the last element of `a` is considered to be the last position before an item change and the - first element of `a` is considered to be the first posistion + first element of `a` is considered to be the first position after an item change. tfic : bool, optional Treat First Item as Change. If ``True``, treat the first item @@ -1656,7 +1656,7 @@ def locate_item_change( # noqa: C901 function, you can use :func:`mdtools.numpy_helper_functions.item_change_ix` instead. - If `mic` is ``True``, this functionen uses + If `mic` is ``True``, this function uses :func:`mdtools.numpy_helper_functions.diff_mic` to calculate the difference between consecutive array elements. Otherwise, :func:`numpy.diff` is used. @@ -2076,7 +2076,7 @@ def locate_item_change( # noqa: C901 [ True, True, True, False], [ True, False, True, True]]) - 3-dimensional expample: + 3-dimensional example: >>> a = np.array([[[1, 2, 2], ... [2, 2, 1]], @@ -2880,7 +2880,7 @@ def item_change_ix(a, axis=-1, *args, **kwargs): Array for which to get all indices where its elements change. axis : int, optional The axis along which to search for changing elements. By - default, the search is perfomed along the last axis. + default, the search is performed along the last axis. kwargs : tuple, optional Additional keyword arguments to parse to :func:`mdtools.numpy_helper_functions.locate_item_change`. See @@ -3287,7 +3287,7 @@ def item_change_ix(a, axis=-1, *args, **kwargs): >>> after_tfic_tlic (array([0, 0, 1, 1, 1, 2, 2, 2]), array([0, 1, 0, 1, 2, 0, 2, 3])) - 3-dimensional expample: + 3-dimensional example: >>> a = np.array([[[1, 2, 2], ... [2, 2, 1]], @@ -3806,7 +3806,7 @@ def item_change_ix(a, axis=-1, *args, **kwargs): >>> mdt.nph.item_change_ix(d, axis=ax, pin="after", tfic=True) (array([0, 0, 1, 1, 1, 2, 2, 2]), array([0, 1, 0, 1, 2, 0, 2, 3])) - 3-dimensional expample: + 3-dimensional example: >>> e = np.array([[[1, 2, 2], ... [2, 2, 1]], @@ -3924,7 +3924,7 @@ def argmin_last(a, axis=None, out=None): See Also -------- :func:`numpy.argmin` : - Returns indices of the first occurence of the minium value + Returns indices of the first occurrence of the minium value :func:`argmax_last` : Same as :func:`argmin_last` but for maximum values :func:`numpy.unravel_index` : @@ -4022,7 +4022,7 @@ def argmax_last(a, axis=None, out=None): See Also -------- :func:`numpy.argmax` : - Returns indices of the first occurence of the maximum value + Returns indices of the first occurrence of the maximum value :func:`argmin_last` : Same as :func:`argmax_last` but for minimum values :func:`numpy.unravel_index` : @@ -4099,7 +4099,7 @@ def ceil_divide(x1, x2, **kwargs): Return the smallest integer greater or equal to the division of the inputs. It is equivalent to ``-(-x1 // x2)`` and can bee regarded - as the oppoiste of :func:`numpy.floor_divide`. + as the opposite of :func:`numpy.floor_divide`. Parameters ---------- @@ -4205,7 +4205,7 @@ def digitize_dd( right-open: [a, b) -> a <= x < b. See :func:`numpy.digitize` for more details. expand_binnumbers : bool, optional - If ``True``, the returned index array is unravled into an array + If ``True``, the returned index array is unraveled into an array of shape ``(D, N)`` where each row gives the bin numbers of the elements of `sample` along the corresponding dimension. If ``False``, the returned index array has shape ``(N,)`` and @@ -4361,36 +4361,334 @@ def digitize_dd( ) -def split_into_consecutive_subarrays(a, step=1, sort=True, debug=False): +def split_into_contig_seqs( + a, step=1, step_tol=1e-08, sort=False, return_ix=False +): """ - Split an array into its subarrays of consecutive numbers. + Split an array into subarrays of contiguous sequences. Parameters ---------- a : array_like - 1-dimensional array which to split into subarrays of consecutive - numbers with stepsize `step`. + 1-dimensional array which to split into subarrays of contiguous + sequences with step size `step`. step : scalar, optional - The stepsize defining the contiguous sequence. + The step size defining the contiguous sequence. + step_tol : scalar, optional + A tolerance value for the step size. Values in `a` are + considered to form a contiguous sequence if their difference + lies within ``step +/- step_tol``. sort : bool, optional - Sort `a` before searching for contiguous sequences. - debug : bool, optional - If ``True``, check the input arguments. + If ``True``, sort `a` before searching for contiguous sequences. + return_ix : bool, optional + If ``True``, return the indices where `a` was split to create + the subarrays. If `sort` is ``True``, these are the indices + where the sorted input array was split. Returns ------- - consecutive_subarrays : list + contig_seqs : list List of subarrays, one for each contiguous sequence of numbers - with stepsize `step` in `a`. + with step size `step` in `a`. + ix_split : numpy.ndarray + Array of indices at which `a` was split. If `sort` is ``True``, + these are the indices at which the sorted input array was split. + + See Also + -------- + :func:`get_const_seqs` : + Get all sequences of constant values in an array + + References + ---------- + Adapted from https://stackoverflow.com/a/7353335. + + Examples + -------- + >>> a = np.array([-3, -2, 0, 3, 2, 4]) + >>> contig_seqs, ix_split = mdt.nph.split_into_contig_seqs( + ... a, return_ix=True + ... ) + >>> contig_seqs + [array([-3, -2]), array([0]), array([3]), array([2]), array([4])] + >>> ix_split + array([2, 3, 4, 5]) + >>> contig_seqs, ix_split = mdt.nph.split_into_contig_seqs( + ... a, sort=True, return_ix=True + ... ) + >>> contig_seqs + [array([-3, -2]), array([0]), array([2, 3, 4])] + >>> ix_split + array([2, 3]) + + >>> a = np.array([0, 1, 2, 5, 7, 9]) + >>> mdt.nph.split_into_contig_seqs(a, step=2) + [array([0]), array([1]), array([2]), array([5, 7, 9])] + >>> a = np.array([0, 1, 2, 2, 1, 0]) + >>> mdt.nph.split_into_contig_seqs(a, step=-1) + [array([0]), array([1]), array([2]), array([2, 1, 0])] + >>> a = np.array([1, 2, 2, 3, 3, 3]) + >>> mdt.nph.split_into_contig_seqs(a, step=0) + [array([1]), array([2, 2]), array([3, 3, 3])] + + Edge cases: + + >>> a = np.array([]) + >>> mdt.nph.split_into_contig_seqs(a, sort=True, return_ix=True) + ([array([], dtype=float64)], array([], dtype=int64)) + >>> a = np.arange(3) + >>> mdt.nph.split_into_contig_seqs(a, sort=True, return_ix=True) + ([array([0, 1, 2])], array([], dtype=int64)) """ a = np.asarray(a) - if debug: - mdt.check.array(a, dim=1) + if a.ndim != 1: + raise ValueError( + "`a` has {} dimension(s) but must be 1-dimensional".format(a.ndim) + ) if sort: a = np.sort(a) - return np.split(a, np.flatnonzero((np.diff(a) != step)) + 1) + is_contiguous = np.isclose(np.diff(a), step, rtol=0, atol=step_tol) + np.bitwise_not(is_contiguous, out=is_contiguous) + ix_split = np.flatnonzero(is_contiguous) + ix_split += 1 + contiguous_seqs = np.split(a, ix_split) + if return_ix: + return contiguous_seqs, ix_split + else: + return contiguous_seqs + + +def get_const_seqs(x, tol=1e-08, sort=False): + """ + Get all sequences of constant values in an array. + + Parameters + ---------- + x : array_like + 1-dimensional array for which to get all sequences of constant + values. + tol : scalar, optional + Tolerance value within which consecutive elements of `x` are + considered to be equal. + sort : bool, optional + If ``True``, sort `x` before searching for sequences of constant + values. + + Returns + ------- + seq_starts : numpy.ndarray + Index array indicating the start of each constant sequence in + `x`. + eq_lengths : numpy.ndarray + The length (i.e. the number of values) of each constant + sequence. + vals : numpy.ndarray + The value of each constant sequence. Because consecutive values + can differ by `tol`, this is the mean value of each constant + sequence. + + See Also + -------- + :func:`split_into_contig_seqs` : + Split an array into subarrays of contiguous sequences. + :func:`find_const_seq_n` : + Find the first sequence of at least `n` constant values in an + array + :func:`find_const_seq_long` : + Find the longest sequence of constant values in an array + + Examples + -------- + >>> a = np.array([0, 2, 2, 4, 4, 4, 3, 3, 1]) + >>> mdt.nph.get_const_seqs(a) + (array([0, 1, 3, 6, 8]), array([1, 2, 3, 2, 1]), array([0., 2., 4., 3., 1.])) + >>> mdt.nph.get_const_seqs(a, sort=True) + (array([0, 1, 2, 4, 6]), array([1, 1, 2, 2, 3]), array([0., 1., 2., 3., 4.])) + >>> np.sort(a) + array([0, 1, 2, 2, 3, 3, 4, 4, 4]) + + Edge cases: + + >>> a = np.array([]) + >>> mdt.nph.get_const_seqs(a, sort=True) + (array([], dtype=int64), array([0]), array([], dtype=float64)) + >>> a = np.ones(3) + >>> mdt.nph.get_const_seqs(a, sort=True) + (array([0]), array([3]), array([1.])) + """ # noqa: E501, W505 + x = np.asarray(x) + seqs, seq_starts = mdt.nph.split_into_contig_seqs( + x, step=0, step_tol=tol, sort=sort, return_ix=True + ) + seq_lengths = np.array([len(seq) for seq in seqs]) + if x.size > 0: + seq_starts = np.insert(seq_starts, 0, 0) + vals = np.array([np.mean(seq) for seq in seqs]) + else: + vals = seqs[0] + return seq_starts, seq_lengths, vals + + +def find_const_seq_n(x, n, tol=1e-08, sort=False): + """ + Find the first sequence of at least `n` constant values in an array. + + Parameters + ---------- + x : array_like + 1-dimensional array in which to find the first sequence of at + least `n` constant values. + tol : scalar, optional + Tolerance value within which consecutive elements of `x` are + considered to be equal. + sort : bool, optional + If ``True``, sort `x` before searching for sequences of constant + values. + + Returns + ------- + start_ix : int + Index indicating the start of the first sequence of at least `n` + constant values in `x`. + length : int + The actual length of this sequence. + val : scalar + The value of this sequence. Because consecutive values can + differ by `tol`, this is the mean value of the sequence. + + See Also + -------- + :func:`get_const_seqs` : + Get all sequences of constant values in an array + :func:`find_const_seq_long` : + Find the longest sequence of constant values in an array + + Notes + ----- + If the array does not contain a sequence of at least `n` constant + values, `(array([]), 0, array([]))`` is returned. + + Examples + -------- + >>> a = np.array([0, 4, 4, 4, 2, 2]) + >>> mdt.nph.find_const_seq_n(a, n=2) + (1, 3, 4.0) + >>> mdt.nph.find_const_seq_n(a, n=2, sort=True) + (1, 2, 2.0) + >>> np.sort(a) + array([0, 2, 2, 4, 4, 4]) + + >>> a = np.array([0, 2, 2, 4, 4, 4, 2, 2, 0]) + >>> mdt.nph.find_const_seq_n(a, n=2) + (1, 2, 2.0) + >>> mdt.nph.find_const_seq_n(a, n=3) + (3, 3, 4.0) + >>> mdt.nph.find_const_seq_n(a, n=2, sort=True) + (0, 2, 0.0) + >>> np.sort(a) + array([0, 0, 2, 2, 2, 2, 4, 4, 4]) + + Edge cases: + + >>> a = np.array([1, 2, 2, 3, 3, 3]) + >>> mdt.nph.find_const_seq_n(a, n=4) + (array([], dtype=int64), 0, array([], dtype=float64)) + >>> mdt.nph.find_const_seq_n(a, n=0) + (0, 1, 1.0) + >>> mdt.nph.find_const_seq_n(a, n=-1) + (0, 1, 1.0) + + >>> a = np.array([]) + >>> mdt.nph.find_const_seq_n(a, n=1, sort=True) + (array([], dtype=int64), 0, array([], dtype=float64)) + >>> a = np.ones(3) + >>> mdt.nph.find_const_seq_n(a, n=1, sort=True) + (0, 3, 1.0) + """ + x = np.asarray(x) + seq_starts, seq_lengths, vals = mdt.nph.get_const_seqs( + x, tol=tol, sort=sort + ) + if x.size == 0: + return seq_starts, seq_lengths[0], vals + else: + ix = np.argmax(seq_lengths >= n) + if seq_lengths[ix] < n: + # This means the array does not contain a sequence of at least + # `n` constant values, i.e. ``not np.any(seq_lengths >= n)``. + return np.array([], dtype=np.int64), 0, np.array([], dtype=vals.dtype) + else: + return seq_starts[ix], seq_lengths[ix], vals[ix] + + +def find_const_seq_long(x, tol=1e-08, sort=False): + """ + Find the longest sequence of constant values in an array. + + If there exist multiple sequences with the longest length, the first + of them is returned. + + Parameters + ---------- + x : array_like + 1-dimensional array in which to find the longest sequence of + constant values. + tol : scalar, optional + Tolerance value within which consecutive elements of `x` are + considered to be equal. + sort : bool, optional + If ``True``, sort `x` before searching for sequences of constant + values. + + Returns + ------- + start_ix : int + Index indicating the start of the longest sequence of constant + values in `x`. + length : int + The actual length of this sequence. + val : scalar + The value of this sequence. Because consecutive values can + differ by `tol`, this is the mean value of the sequence. + + See Also + -------- + :func:`get_const_seqs` : + Get all sequences of constant values in an array. + :func:`find_const_seq_n` + Find the first sequence of at least `n` constant values in an + array + + Examples + -------- + >>> a = np.array([0, 4, 4, 4, 2, 2]) + >>> mdt.nph.find_const_seq_long(a) + (1, 3, 4.0) + >>> mdt.nph.find_const_seq_long(a, sort=True) + (3, 3, 4.0) + >>> np.sort(a) + array([0, 2, 2, 4, 4, 4]) + + Edge cases: + + >>> a = np.array([]) + >>> mdt.nph.find_const_seq_long(a, sort=True) + (array([], dtype=int64), 0, array([], dtype=float64)) + >>> a = np.ones(3) + >>> mdt.nph.find_const_seq_long(a, sort=True) + (0, 3, 1.0) + """ + x = np.asarray(x) + seq_starts, seq_lengths, vals = mdt.nph.get_const_seqs( + x, tol=tol, sort=sort + ) + if x.size == 0: + return seq_starts, seq_lengths[0], vals + else: + ix = np.argmax(seq_lengths) + return seq_starts[ix], seq_lengths[ix], vals[ix] def sequenize(a, step=1, start=0): @@ -4835,7 +5133,7 @@ def cross_section(z, x, y, line, num=None, order=1): `y`. num : int, optional Number of points to use for sampling the cross section along the - given line. If ``None`` (default), approximatly as many sample + given line. If ``None`` (default), approximately as many sample points are used as real data points are on the line. order : int, optional Interpolation order. @@ -4973,7 +5271,7 @@ def cross_section2d(z, ax, x=None, y=None, width=1, mean="arithmetic"): if width == 0 and mean != "arithmetic": print("Setting mean to 'arithmetic', because width is zero") if mean != "arithmetic" and mean != "integral": - raise ValueError("mean must be eithe 'arithmetic' or 'integral'") + raise ValueError("mean must be either 'arithmetic' or 'integral'") width2 = width / 2 cross_sec = np.full(z.shape[zax], np.nan, dtype=np.float64) @@ -5145,7 +5443,7 @@ def find_linear_region( (ideally) preserving the true signal. polyorder : int, optional The order of the polynomial used to fit the samples. `polyorder` - must be less than `window_length` but at least two. Kepp in mind + must be less than `window_length` but at least two. Keep in mind that you are fitting `window_length` data points with a polynomial of order `polyorder`. Thus, in order to avoid overfitting, `polyorder` should in fact be considerably smaller diff --git a/src/mdtools/statistics.py b/src/mdtools/statistics.py index da36e022..8ce0d43d 100644 --- a/src/mdtools/statistics.py +++ b/src/mdtools/statistics.py @@ -1224,7 +1224,8 @@ def exp_dist_log(x, rate): def mb_dist(v, temp=None, mass=None, var=None, drift=0, d=3): r""" - Maxwell-Boltzmann speed distribution in d-dimensional space. [#]_ + Maxwell-Boltzmann speed distribution in :math:`d`-dimensional space. + [#]_ .. math:: @@ -1267,7 +1268,7 @@ def mb_dist(v, temp=None, mass=None, var=None, drift=0, d=3): v : array_like Array of speed values for which to evaluate :math:`p(v)`. temp : scalar or None, optional - Temperature of the particles. Must be provided if `var` is + Temperature of the system. Must be provided if `var` is ``None``. mass : scalar or None, optional Mass of the particles. Must be provided if `var` is ``None``. @@ -1287,6 +1288,11 @@ def mb_dist(v, temp=None, mass=None, var=None, drift=0, d=3): p : scalar or array_like The function values :math:`p(v)` at the given :math:`v` values. + See Also + -------- + :func:`mdtools.statistics.ekin_dist` : + Distribution of kinetic energies in :math:`d`-dimensional space + Notes ----- Be aware of the units of your input values! The unit of the @@ -1402,6 +1408,129 @@ def mb_dist(v, temp=None, mass=None, var=None, drift=0, d=3): return dist +def ekin_dist(ekin, temp, d=3): + r""" + Distribution of kinetic energies in :math:`d`-dimensional space. + [#]_ + + Maxwell-Boltzmann distribution of the kinetic energies of the + particles in a canonical (:math:`NVT`) ensemble: + + .. math:: + + p(E) = \frac{S_d}{2} (\pi k_B T)^{-\frac{d}{2}} + E^{-\frac{d-2}{2}} + \exp{\left( -\frac{E}{k_B T} \right)} + + With the kinetic energy :math:`E`, the Boltzmann constant + :math:`k_B`, the temperature of the system :math:`T` and the surface + area of a :math:`d`-dimensional unit sphere [#]_ + + .. math:: + + S_d = \frac{2\pi^{\frac{d}{2}}}{\Gamma\left(\frac{d}{2}\right)} + = + \begin{cases} + 2 , & d = 1\\ + 2\pi , & d = 2\\ + 4\pi , & d = 3\\ + 2\pi^2 , & d = 4\\ + \vdots & + \end{cases} + + where :math:`\Gamma` is the `gamma function + `_. + + Parameters + ---------- + ekin : array_like + Array of kinetic energy values :math:`E` for which to evaluate + :math:`p(E)`. + temp : scalar + Temperature of the system. + d : int, optional + Number of spatial dimensions in which the particles can move. + + Returns + ------- + p : scalar or array_like + The function values :math:`p(E)` at the given :math:`E` values. + + See Also + -------- + :func:`mdtools.statistics.mb_dist` : + Maxwell-Boltzmann speed distribution in :math:`d`-dimensional + space + + Notes + ----- + Be aware of the units of your input values! The unit of the + Boltzmann constant :math:`k_B` is J/K. Thus, temperatures must + be given in K and energy values must be given in J. + + Note that all arguments should be positive to get a physically + meaningful output. However, it is also possible to parse negative + values to all input arguments. + + The distribution of the kinetic energies :math:`p(E) \text{ d}E` can + be derived from the Maxwell-Boltzmann speed distribution + + .. math:: + + p(v) \text{ d}v = S_d v^{d-1} + \left( \frac{m}{2 \pi k_B T} \right)^{-\frac{d}{2}} + \exp{\left( -\frac{mv^2}{2k_B T} \right)} \text{ d}v + + by substituting :math:`E = \frac{1}{2} mv^2`. + + References + ---------- + .. [#] Wikipedia `Maxwell-Boltzmann distribution § In n-dimensional + space + `_ + .. [#] Wikipedia `n-sphere § Closed forms + `_ + + Examples + -------- + >>> from scipy import constants + >>> temp = 1 / constants.k # Set thermal energy kT to 1 + >>> x = np.linspace(0, 3, 5) + >>> mdt.stats.ekin_dist(x, temp) + array([0. , 0.46159897, 0.30836066, 0.17839543, 0.09730435]) + >>> mdt.stats.ekin_dist(x, temp, d=1) + array([ inf, 0.30773265, 0.10278689, 0.03964343, 0.01621739]) + >>> mdt.stats.ekin_dist(x, temp, d=2) + array([1. , 0.47236655, 0.22313016, 0.10539922, 0.04978707]) + >>> mdt.stats.ekin_dist(x, temp, d=4) + array([0. , 0.35427491, 0.33469524, 0.23714826, 0.14936121]) + + The area below the distribution is one: + + >>> x = np.linspace(0, 20, 200) + >>> # In the 1-dimensional case, the distribution goes to infinity + >>> # for E->0 and therefore cannot be integrated numerically. + >>> for d in range(2, 6): + ... dist = mdt.stats.ekin_dist(x, temp, d) + ... integral = np.trapz(dist, x) + ... np.isclose(integral, 1, rtol=0, atol=1e-2) + True + True + True + True + """ + # Surface area of a d-dimensional unit sphere divided by 2. + sd = np.pi ** (d / 2) + sd /= special.gamma(d / 2) + # Pre-factor + pre = sd * (np.pi * constants.k * temp) ** (-d / 2) + # Distribution + dist = ekin ** ((d - 2) / 2) + dist *= np.exp(-ekin / (constants.k * temp)) + dist *= pre + return dist + + def var_weighted(a, weights=None, axis=None, return_mean=False): r""" Compute the weighted variance along a given axis. diff --git a/src/mdtools/structure.py b/src/mdtools/structure.py index 23846354..02dd2863 100644 --- a/src/mdtools/structure.py +++ b/src/mdtools/structure.py @@ -1015,7 +1015,7 @@ def discrete_pos_trj( # noqa: C901 """ Create a discrete position trajectory. - Discretize the positions of compounds of a MDAnalysis + Discretize the positions of compounds of an MDAnalysis :class:`~MDAnalysis.core.groups.AtomGroup` in a given spatial direction. @@ -1028,7 +1028,7 @@ def discrete_pos_trj( # noqa: C901 sel : MDAnalysis.core.groups.AtomGroup or str 'Selection group': Either a MDAnalysis :class:`~MDAnalysis.core.groups.AtomGroup` (if `trj` is not - ``None``) or a selection string for creating a MDAnalysis + ``None``) or a selection string for creating an MDAnalysis :class:`~MDAnalysis.core.groups.AtomGroup` (if `trj` is ``None``). See MDAnalysis' |selection_syntax| for possible choices of selection strings. @@ -1039,14 +1039,14 @@ def discrete_pos_trj( # noqa: C901 created from `topfile` and `trjfile`. topfile : str, optional Topology file. See |supported_topology_formats| of MDAnalysis. - Ignored if `trj` is not ``None``. + Ignored if `trj` is given. trjfile : str Trajectory file. See |supported_coordinate_formats| of - MDAnalysis. Ignored if `trj` is not ``None``. + MDAnalysis. Ignored if `trj` is given. begin : int, optional First frame to read from a newly created trajectory. Frame - numbering starts at zero. Ignored if `trj` is not ``None``. If - you want to use only specific frames from an already existing + numbering starts at zero. Ignored if `trj` is given. If you + want to use only specific frames from an already existing trajectory, slice the existing trajectory accordingly and parse it as :class:`MDAnalysis.coordinates.base.FrameIteratorSliced` object to the `trj` argument. @@ -1054,10 +1054,10 @@ def discrete_pos_trj( # noqa: C901 Last frame to read from a newly created trajectory. This is exclusive, i.e. the last frame read is actually ``end - 1``. A value of ``-1`` means to read the very last frame. Ignored if - `trj` is not ``None``. + `trj` is given. every : int, optional Read every n-th frame from the newly created trajectory. - Ignored if `trj` is not ``None``. + Ignored if `trj` is given. compound : {'atoms', 'group', 'segments', 'residues', \ 'fragments'}, optional The compounds of the selection group whose center of mass @@ -1111,8 +1111,10 @@ def discrete_pos_trj( # noqa: C901 bins : array_like, optional Array of custom bin edges. Bins do not need to be equidistant. All bin edges must lie within the simulation box as obtained - from the first frame read. If `bins` is given, it takes - precedence over all other bin arguments. + from the first frame read. The given bin edges are sorted + ascending order and and duplicate bin edges are removed. If + `bins` is given, it takes precedence over all other bin + arguments. tol : scalar, optional The tolerance value added to ``lbox`` to account for the right-open bin interval of the last bin. @@ -1142,7 +1144,7 @@ def discrete_pos_trj( # noqa: C901 create an :class:`~MDAnalysis.core.groups.UpdatingAtomGroup`, the number of compounds in this :class:`~MDAnalysis.core.groups.UpdatingAtomGroup` must stay - constant. Ignored if `trj` is not ``None``. + constant. Ignored if `trj` is given. Returns ------- @@ -1184,7 +1186,7 @@ def discrete_pos_trj( # noqa: C901 length in the given spatial direction. Doing so accounts for possible fluctuations of the simulation box (e.g. due to pressure scaling). Note that MDAnalysis always sets the origin of the - simulation box to the origin of the cartesian coordinate system. + simulation box to the origin of the Cartesian coordinate system. All bin intervals are left-closed and right-open, i.e. [a, b) -> a <= x < b. The first bin edge is always zero. The last bin edge @@ -1201,7 +1203,7 @@ def discrete_pos_trj( # noqa: C901 if verbose: timer_tot = datetime.now() proc = psutil.Process() - proc.cpu_percent() # Initiate monitoring of CPU usage + proc.cpu_percent() # Initiate monitoring of CPU usage. print("Running mdtools.structure.discrete_pos_trj()...") if trj is None and (topfile is None or trjfile is None): raise ValueError( @@ -1281,7 +1283,9 @@ def discrete_pos_trj( # noqa: C901 bins = np.linspace(START, STOP, NUM + 1) else: bins = np.unique(bins) / lbox - mdt.check.bin_edges(bins=bins, amin=0, amax=1, tol=tol, verbose=verbose) + bins = mdt.check.bin_edges( + bins=bins, amin=0, amax=1, tol=tol, verbose=verbose + ) if verbose: print("Elapsed time: {}".format(datetime.now() - timer)) print( @@ -1328,7 +1332,7 @@ def discrete_pos_trj( # noqa: C901 print("Time step last frame: {:>12.3f} (ps)".format(trj[END - 1].dt)) timer = datetime.now() trj = mdt.rti.ProgressBar(trj) - lbox_av = 0 # Average box length in the given direction + lbox_av = 0 # Average box length in the given direction. for i, ts in enumerate(trj): mdt.check.box( box=ts.dimensions, with_angles=True, orthorhombic=True, dim=1 diff --git a/src/mdtools/version.py b/src/mdtools/version.py index 350f72a3..4c5cfe48 100644 --- a/src/mdtools/version.py +++ b/src/mdtools/version.py @@ -29,4 +29,4 @@ #: Release version of MDTools in MAJOR-CORE.MAJOR-SCRIPTS.MINOR.PATCH #: format. -__version__ = "0.0.5.0" +__version__ = "0.0.6.0"