Source code for kbkit.io.edr

"""Parser for GROMACS energy (.edr) files."""

import re
import subprocess
import tempfile
from functools import cached_property
from pathlib import Path
from typing import ClassVar

import numpy as np

from kbkit.config.unit_registry import load_unit_registry
from kbkit.utils.format import ENERGY_ALIASES, resolve_attr_key
from kbkit.utils.validation import validate_path


[docs] class EdrParser: """ Interface for extracting energy properties from GROMACS .edr files. Wraps `gmx energy` to provide access to available properties in .edr file. Supports additional properties, `configurational_enthalpy` and `fluctuation properties` (heat capacity and isothermal compressibility). Note that the fluctuation properties return a float object rather than a timeseries. Parameters ---------- edr_path : str or list[str] Path to an .edr file. """ # Standard GROMACS Unit Mapping # Energies: kJ/mol, Temp: K, Press: bar, Density: kg/m^3 GROMACS_UNITS: ClassVar = { "time": "ps", "temperature": "K", "pressure": "bar", "density": "kg/m^3", "volume": "nm^3", "potential": "kJ/mol", "kinetic-en": "kJ/mol", "total-energy": "kJ/mol", "enthalpy": "kJ/mol", } DEFAULT_UNITS: ClassVar = { "time": "ps", "temperature": "K", "pressure": "kPa", "density": "kg/m^3", "volume": "nm^3", "potential": "kJ/mol", "kinetic-en": "kJ/mol", "total-energy": "kJ/mol", "enthalpy": "kJ/mol", "cp": "kJ/mol/K", "cv": "kJ/mol/K", "isothermal-compressibility": "1/kPa", "number-density": "molecule/nm^3", "molar-volume": "cm^3/mol", } FLUCT_PROPS: ClassVar = ("cp", "cv", "isothermal-compressibility") def __init__(self, path: str | Path) -> None: # validate edr_path self.edr_path = validate_path(path, suffix=".edr") # setup unit registry self.ureg = load_unit_registry() self.Q_ = self.ureg.Quantity @cached_property def units(self) -> dict[str, str]: """Returns a dictionary mapping properties to their units.""" return self.DEFAULT_UNITS
[docs] def get_gmx_property(self, name: str, avg: bool = False, **kwargs) -> tuple | float: """Extract gromacs property from energy file. Parameters ---------- name: str Name of property to extract using gmx energy. kwargs: dict[str, str] Dictionary of optional inputs to gmx energy command (e.g., {"-b": 10000}) Returns ------- tuple Tuple of property time series (time, values). """ prop_input = name.lower() + "\n\n" with tempfile.NamedTemporaryFile(mode="w", suffix=".xvg", delete=False) as tmp: tmpfile = tmp.name try: # first build input list cmd = ["gmx", "energy", "-f", str(self.edr_path), "-o", tmpfile] for k, v in kwargs.items(): cmd.extend([str(k), str(v)]) subprocess.run( cmd, input=prop_input, text=True, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL, check=True, ) time, values = np.loadtxt(tmpfile, comments=["@", "#"], unpack=True) finally: Path(tmpfile).unlink(missing_ok=True) return (time, values) if not avg else values.mean()
@cached_property def data(self) -> dict[str, np.ndarray]: """Extract energy data from EDR file.""" names = set(self._get_property_names()) subset = ["time"] subset.extend([prop for prop in self.GROMACS_UNITS.keys() if prop in names]) if not subset: return {} prop_input = "\n".join(subset) + "\n\n" with tempfile.NamedTemporaryFile(mode="w", suffix=".xvg", delete=False) as tmp: tmpfile = tmp.name try: subprocess.run( ["gmx", "energy", "-f", str(self.edr_path), "-o", tmpfile], input=prop_input, text=True, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL, check=True, ) # Parse XVG header to get column order (excluding time) column_names = [] with open(tmpfile, "r") as f: for line in f: if line.startswith("@ s") and "legend" in line: name = line.split('"')[1] column_names.append(name) # Load data raw_data = np.loadtxt(tmpfile, comments=["@", "#"], unpack=True) # Build dictionary: column 0 is time, columns 1+ are the legend entries data = {"time": raw_data[0]} for i, col_name in enumerate(column_names): # i corresponds to legend index (s0, s1, s2...) # but data column is i+1 (because column 0 is time) data_column_index = i + 1 # Normalize XVG name to match your property naming convention # "Kinetic En." -> "Kinetic-En", "Total Energy" -> "Total-Energy" normalized_name = col_name.replace(" ", "-").replace(".", "") data[normalized_name.lower()] = raw_data[data_column_index] finally: Path(tmpfile).unlink(missing_ok=True) return data def _get_property_names(self) -> list[str]: """Captures the full list of property names from the GMX menu, handling names with internal spaces or special characters.""" process = subprocess.run( ["gmx", "energy", "-f", str(self.edr_path)], check=False, input="\n", capture_output=True, text=True ) names = [] # We look for lines that start with an index number. # Example line: " 1 Bond 2 Angle 3 Proper-Dih." # Note: GMX usually uses a fixed-width format for this menu. for line in process.stderr.splitlines(): # Only process lines that actually look like the menu if not re.search(r"^\s*\d+\s+", line): continue # Split the line into chunks based on the pattern " (number) " # This captures the text between the numbers parts = re.split(r"\s*(\d+)\s+", line) # re.split with a capture group returns: ['', '1', 'Name1', '2', 'Name2'...] # We want the elements at indices 2, 4, 6... for i in range(2, len(parts), 2): prop_name = parts[i].strip() if prop_name and not prop_name.isdigit(): names.append(prop_name.lower().replace(".", "")) return names
[docs] def available_properties(self) -> list[str]: """ Return a list of available energy properties in the .edr file(s). Returns ------- list[str] Sorted list of property names in .edr files. """ return list(dict.fromkeys(self.data))
[docs] def get(self, name: str, start_time: float = 0, units: str | None = None) -> np.ndarray: r""" Extract time series data for a given property. Parameters ---------- name : str Property name to extract (e.g., "potential", "temperature"). start_time : float, optional time (in ps) after which data should be included. units: str, optional Returns property in desired units. If empty, used default values (See :meth:`units`). Returns ------- np.ndarray Array of values. .. note:: Filters data based on start_time for reproducibility. """ # first check that property is in edr file try: # resolves common property names for select gmx properties prop_key = resolve_attr_key(name, ENERGY_ALIASES).lower() all_values = self.data[prop_key] except KeyError: try: prop_key = name.lower() all_values = self.data[prop_key] except KeyError as e: raise KeyError(f"Property {prop_key} is not available.") from e # get default units from GROMACS gmx_units = self.GROMACS_UNITS.get(prop_key) units = units or self.units.get(prop_key) # get values from EDR parser values = all_values[self.data["time"] > start_time] # convert to desired units return self.Q_(values, gmx_units).to(units).magnitude
[docs] def molar_volume( self, nmol: int, volume: float = 0, start_time: float = 0, units: str | None = None ) -> np.ndarray | float: r""" Calculate molar volume of a simulation. If ensemble is NVT, i.e., `volume` is not accessible in .edr file, an input volume is required (i.e., read from bottom of .gro file in :class:`~kbkit.systems.properties.SystemProperties`). Parameters ---------- nmol: int Number of total molecules in system. volume: float, optional Simulation box volume. start_time: float, optional Start time for enthalpy calculation. units: str, optional Desired output units. Defaults to ``pyedr`` units (kJ/mol). Returns ------- np.ndarray Molar volume of molecular simulation. """ units = units or self.units.get("molar-volume") try: V = self.get("volume", start_time=start_time, units="nm^3") except KeyError: print(f"Warning! 'Volume' not found in '{self.edr_path}'. Falling back on box volume.") V = np.asarray(volume) molar_vol = V / nmol return self.Q_(molar_vol, "nm^3/molecule").to(units).magnitude
[docs] def configurational_enthalpy( self, volume: float = 0, start_time: float = 0, units: str | None = None ) -> np.ndarray: r""" Calculate enthalpy from potential energy. If ensemble is NVT, i.e., `volume` is not accessible in .edr file, an input volume is required (i.e., read from bottom of .gro file in :class:`~kbkit.systems.properties.SystemProperties`). Parameters ---------- volume: float, optional Simulation box volume. start_time: float, optional Start time for enthalpy calculation. units: str, optional Desired output units. Defaults to ``pyedr`` units (kJ/mol). Returns ------- np.ndarray Enthalpy of molecular simulation. Notes ----- Enthalpy, :math:`H`, is calculated from potential energy (:math:`U`) according to: .. math:: H = U + pV where: - :math:`p` is pressure - :math:`V` is volume """ units = units or self.units.get("enthalpy") U = self.get("potential", start_time=start_time, units="kJ/mol") P = self.get("pressure", start_time=start_time, units="kPa") try: V = self.get("volume", start_time=start_time, units="nm^3") except KeyError: print(f"Warning! 'Volume' not found in '{self.edr_path}'. Falling back on box volume.") V = np.asarray(volume) V = self.Q_(V, "nm^3").to("m^3").magnitude H = U + P * V return self.Q_(H, "kJ/mol").to(units).magnitude
[docs] def molar_enthalpy( self, nmol: int, volume: float = 0, start_time: float = 0, units: str | None = None ) -> np.ndarray: r""" Calculate molar enthalpy. Parameters ---------- nmol: int Number of total molecules in system. volume: float, optional Simulation box volume. start_time: float, optional Start time for enthalpy calculation. units: str, optional Desired output units. Defaults to ``pyedr`` units (kJ/mol). Returns ------- np.ndarray Configurational enthalpy normalized by the total number of molecules. See Also -------- :meth:`configurational_enthalpy` """ # get desired units units = units or self.units.get("enthalpy") H = self.configurational_enthalpy(volume=volume, start_time=start_time, units=units) return H / nmol
[docs] def cp(self, nmol: int, volume: float = 0, start_time: float = 0, units: str | None = None) -> float: r""" Calculate constant pressure heat capacity from :meth:`configurational_enthalpy`. Parameters ---------- nmol: int Number of total molecules in system. volume: float, optional Simulation box volume. start_time: float, optional Start time for enthalpy calculation. units: str, optional Desired output units. Defaults to ``pyedr`` units (kJ/mol). Returns ------- float Constant pressure heat capacity. Notes ----- Constant pressure heat capacity, :math:`c_p` is calculated according to: .. math:: \begin{aligned} c_p &= \frac{\langle H^2 \rangle - \langle H \rangle ^2}{k_B T^2} \\ &= \frac{\sigma_H^2}{k_B T^2} \end{aligned} where: - :math:`\langle H^2 \rangle - \langle H \rangle ^2` is the variance of the enthalpy (also writted as :math:`\sigma_H^2`) - :math:`k_B` is Boltzmann constant - :math:`T` is absolute temperature """ # get desired units units = units or self.units.get("cp") # get enthalpy from potential energy H = self.configurational_enthalpy(volume=volume, start_time=start_time, units="kJ/mol") T = self.get("temperature", start_time=start_time) T_avg = T.mean() R = self.ureg("R").to("kJ/mol/K").magnitude # ddof=1 is for sample variance calculations cp = H.var(ddof=1) / nmol / (R * T_avg**2) return self.Q_(cp, "kJ/mol/K").to(units).magnitude
[docs] def cv(self, nmol: int, start_time: float = 0, units: str | None = None) -> float: r""" Calculate constant volume heat capacity. Parameters ---------- nmol: int Number of total molecules in system. start_time: float, optional Start time for enthalpy calculation. units: str, optional Desired output units. Defaults to ``pyedr`` units (kJ/mol). Returns ------- float Constant volume heat capacity. Notes ----- Constant volume heat capacity, :math:`c_v` is calculated according to: .. math:: \begin{aligned} c_v &= \frac{\langle U^2 \rangle - \langle U \rangle ^2}{k_B T^2} \\ &= \frac{\sigma_U^2}{k_B T^2} \end{aligned} where: - :math:`\langle U^2 \rangle - \langle U \rangle ^2` is the variance of the potential (also writted as :math:`\sigma_U^2`) - :math:`k_B` is Boltzmann constant - :math:`T` is absolute temperature """ # get desired units units = units or self.units.get("cv") # get energy properties from potential energy U = self.get("potential", start_time=start_time, units="kJ/mol") T = self.get("temperature", start_time=start_time) T_avg = T.mean() R = self.ureg("R").to("kJ/mol/K").magnitude # ddof=1 is for sample variance calculations cv = U.var(ddof=1) / nmol / (R * T_avg**2) return self.Q_(cv, "kJ/mol/K").to(units).magnitude
[docs] def isothermal_compressibility(self, start_time: float = 0, units: str | None = None) -> float: r""" Isothermal compressibility. Parameters ---------- start_time: float, optional Start time for enthalpy calculation. units: str, optional Desired output units. Defaults to ``pyedr`` units (kJ/mol). Returns ------- float Isothermal compressibility. """ try: V = self.get("volume", start_time=start_time, units="m^3") except KeyError as e: raise KeyError("Isothermal Compressibility cannot be calculated from constant volume simulation!") from e units = units or self.units.get("isothermal-compressibility") R = self.ureg("R").to("kJ/mol/K").magnitude N_A = self.ureg("N_A").to("1/mol").magnitude T = self.get("Temperature", start_time=start_time) kT = N_A * V.var(ddof=1) / (V.mean() * R * T.mean()) return self.Q_(kT, "1/kPa").to(units).magnitude