Source code for thermo.group_contribution.bondi

"""Chemical Engineering Design Library (ChEDL). Utilities for process modeling.
Copyright (C) 2024 Caleb Bell <Caleb.Andrew.Bell@gmail.com>

Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:

The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.

THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.

This module contains an implementation of the Bondi
group-contribution method.
This functionality requires the RDKit library to work.

.. contents:: :local:


.. autofunction:: thermo.group_contribution.bondi.bondi_van_der_waals_surface_area_volume
.. autofunction:: thermo.group_contribution.bondi.R_from_Van_der_Waals_volume
.. autofunction:: thermo.group_contribution.bondi.Q_from_Van_der_Waals_area


"""
__all__ = ["BONDI_GROUPS", "BONDI_SUBGROUPS", "BondiGroupContribution", "Q_from_Van_der_Waals_area", "R_from_Van_der_Waals_volume", "bondi_van_der_waals_surface_area_volume"]
from thermo.functional_groups import FG_AMIDE, FG_CARBOXYLIC_ACID, count_rings_by_atom_counts, identify_conjugated_bonds, identify_functional_group_atoms
from thermo.group_contribution.group_contribution_base import (
    AROMATIC_BOND,
    DOUBLE_BOND,
    SINGLE_BOND,
    TRIPLE_BOND,
    BaseGroupContribution,
    priority_from_atoms,
    smarts_fragment_priority,
)


# reasonably complete
class BondiGroupContribution(BaseGroupContribution):
    __slots__ = BaseGroupContribution.__slots__ + ("Vw", "Aw",)

    def __init__(self, group, group_id, Vw, Aw, smarts=None,
                 priority=None, atoms=None, bonds=None, hydrogen_from_smarts=False):
        """
        Initialize the GroupContribution object with group name, volume contribution, and polarizability.

        Parameters
        ----------
        - group (str): The name or identifier of the group.
        - group_id (int): Unique identifier for the group.
        - Vw (float): Volume contribution in cm^3/mole.
        - Aw (float): Polarizability contribution in cm^3/mole * 10^9.
        """
        self.group = group
        self.group_id = group_id
        self.Vw = Vw
        self.Aw = Aw
        self.smarts = smarts
        self.atoms = atoms
        self.bonds = bonds
        self.hydrogen_from_smarts = hydrogen_from_smarts
        if priority is None and atoms is not None:
            self.priority = priority_from_atoms(atoms, bonds)
        else:
            self.priority = priority
        self.smart_rdkit = None

    def __repr__(self):
        return f"BondiGroupContribution(group={self.group!r}, group_id={self.group_id!r}, Vw={self.Vw!r}, Aw={self.Aw!r})"

BONDI_GROUPS_BY_ID = {}
BONDI_GROUPS = {}

# TABLE XV in van der Waals Volumes and Radii, 1964
BONDI_GROUPS[1] = BondiGroupContribution("C", 1, 3.33, 0.0, smarts="[CX4;H0]", atoms={"C": 1, "H": 0})
BONDI_GROUPS[2] = BondiGroupContribution("CH", 2, 6.78, 0.57, smarts="[CX4;H1]", atoms={"C": 1, "H": 1})
BONDI_GROUPS[3] = BondiGroupContribution("CH2", 3, 10.23, 1.35, smarts="[CX4;H2]", atoms={"C": 1, "H": 2})
BONDI_GROUPS[4] = BondiGroupContribution("CH3", 4, 13.67, 2.12, smarts="[CX4;H3]", atoms={"C": 1, "H": 3})

BONDI_GROUPS[5] = BondiGroupContribution(
    "CH4", 5, 17.12, 2.90,
    atoms={"C": 1, "H": 4},
    bonds={SINGLE_BOND: 4},
    smarts="[CX4;H4]"
)
# n-paraffins specific correlations: 6.88 + 10.23 Nc for Vw, and 1.54 + 1.35 Nc for Aw

BONDI_GROUPS[6] = BondiGroupContribution(
    "=C=", 6, 6.96, None,
    atoms={"C": 2, "H": 0},
    bonds={DOUBLE_BOND: 2},
    smarts="[C;X2;R0;$(*=,=*)]"
)

# >C=C< (internal double bond with two carbons)
BONDI_GROUPS[7] = BondiGroupContribution(
    ">C=C<", 7, 10.02, 0.61,
    atoms={"C": 2, "H": 0},
    bonds={DOUBLE_BOND: 1, SINGLE_BOND: 4},
    smarts="[C;X3;R0]=[C;X3;R0]"
)

# =CH (terminal double bond with one hydrogen)
BONDI_GROUPS[8] = BondiGroupContribution(
    "=CH-", 8, 8.47, 1.08,
    atoms={"C": 1, "H": 1},
    bonds={DOUBLE_BOND: 1, SINGLE_BOND: 1},
    smarts="[C;H1;X3;$(*=*)]"
)

# =CH2 (terminal double bond with two hydrogens)
BONDI_GROUPS[9] = BondiGroupContribution(
    "=CH2", 9, 11.94, 1.86,
    atoms={"C": 1, "H": 2},
    bonds={DOUBLE_BOND: 1},
    smarts="[C;H2;X3;R0;$(*=*)]"
)

# >C=CH2 (internal double bond with one terminal CH2)
BONDI_GROUPS[10] = BondiGroupContribution(
    ">C=CH2", 10, 16.95, 2.17,
    atoms={"C": 2, "H": 2},
    bonds={DOUBLE_BOND: 1, SINGLE_BOND: 2},
    smarts="[C;X3;R0]=[C;H2;R0]"
)

# >C=CH- (internal double bond with one terminal CH)
BONDI_GROUPS[11] = BondiGroupContribution(
    ">C=CH-", 11, 13.49, 1.39,
    atoms={"C": 2, "H": 1},
    bonds={DOUBLE_BOND: 1, SINGLE_BOND: 3},
    smarts="[C;X3;H0]=[C;X3;H1]"
)

# Acetylenic group definitions
# —C≡ (internal triple bond)
BONDI_GROUPS[12] = BondiGroupContribution(
    "-C≡", 12, 8.05, 0.98,
    atoms={"C": 1},
    bonds={TRIPLE_BOND: 1, SINGLE_BOND: 1},
    smarts="[C;H0;X2;R0;$(*#*)]"
)

# ≡C—H (terminal acetylenic carbon)
BONDI_GROUPS[13] = BondiGroupContribution(
    "≡C-H", 13, 11.55, 1.74,
    atoms={"C": 1, "H": 1},
    bonds={TRIPLE_BOND: 1},
    smarts="[C;H1;X2;R0;$(*#[C;H1;X2;R0;$(*#[C;H1])])][H]"
)
# There is also a version of ≡C- for diacetylene but we it includes Vw and Aw only
# and the value for diacetylene is likely to come from UNIFAC

# >C— (condensation): carbon in a fused aromatic ring system
BONDI_GROUPS[14] = BondiGroupContribution(
    ">C— (condensation)", 14, 4.74, 0.21,
    atoms={"C": 1},
    bonds={AROMATIC_BOND: 2},
    smarts="[c;R2]"
)

# >C— (alkyl): alkyl-substituted aromatic carbon
BONDI_GROUPS[15] = BondiGroupContribution(
    ">C— (alkyl)", 15, 5.54, 0.30,
    atoms={"C": 1},
    bonds={AROMATIC_BOND: 1, SINGLE_BOND: 1},
    smarts="[c;R1;$(C-[C,H])]"
)

# >C—H: terminal aromatic carbon with hydrogen
BONDI_GROUPS[16] = BondiGroupContribution(
    ">C—H", 16, 8.06, 1.00,
    atoms={"C": 1, "H": 1},
    bonds={AROMATIC_BOND: 1, SINGLE_BOND: 1},
    smarts="[cH;R1]"
)

# Benzene
BONDI_GROUPS[17] = BondiGroupContribution(
    "Benzene", 17, 48.36, 6.01,
    atoms={"C": 6},
    bonds={AROMATIC_BOND: 6},
    smarts="c1ccccc1"
)

# Phenyl
BONDI_GROUPS[18] = BondiGroupContribution(
    "Phenyl", 18, 45.84, 5.33,
    atoms={"C": 6},
    bonds={AROMATIC_BOND: 5, SINGLE_BOND: 1},
    smarts="[c1ccccc1]"
)

# Naphthalene
BONDI_GROUPS[19] = BondiGroupContribution(
    "Naphthalene", 19, 73.97, 8.44,
    atoms={"C": 10},
    bonds={AROMATIC_BOND: 12},
    smarts="c1c2ccccc2ccc1"
)

# Naphthyl
BONDI_GROUPS[20] = BondiGroupContribution(
    "Naphthyl", 20, 71.45, 7.76,
    atoms={"C": 10},
    bonds={AROMATIC_BOND: 11, SINGLE_BOND: 1},
    smarts="[c]1ccc2ccccc2c1"
)

# TABLE XVI in van der Waals Volumes and Radii, 1964

# -O- (c.e.): heterocycloaliphatic esters
BONDI_GROUPS[21] = BondiGroupContribution(
    "-O- (c.e.)", 21, 5.20, 0.74,
    atoms={"O": 1},
    bonds={SINGLE_BOND: 2},
    smarts="[O;X2;R1]"
)

# -O- (a.e.): polyalkane ethers
BONDI_GROUPS[22] = BondiGroupContribution(
    "-O- (a.e.)", 22, 3.70, 0.60,
    atoms={"O": 1},
    bonds={SINGLE_BOND: 2},
    smarts="[O;X2;R0]"
)

# -O- (ph.e.): polyphenyl ethers
BONDI_GROUPS[23] = BondiGroupContribution(
    "-O- (ph.e.)", 23, 3.20, 0.54,
    atoms={"O": 1},
    bonds={SINGLE_BOND: 2},
    smarts="[O;X2;$(c1ccccc1)]"
)

# -OH: hydroxyl group
BONDI_GROUPS[24] = BondiGroupContribution(
    "-OH", 24, 8.04, 1.46,
    atoms={"O": 1, "H": 1},
    bonds={SINGLE_BOND: 1},
    smarts="[O;H1]"
)

# >C=O: carbonyl group (non-aromatic)
BONDI_GROUPS[25] = BondiGroupContribution(
    ">C=O", 25, 11.70, 1.60,
    atoms={"C": 1, "O": 1},
    bonds={DOUBLE_BOND: 1, SINGLE_BOND: 2},
    smarts="[C;X3](=O)"
)

# -S- and -SH do not include an Aw contribution

# -S-: sulfur ether
BONDI_GROUPS[26] = BondiGroupContribution(
    "-S-", 26, 10.8, None,
    atoms={"S": 1},
    bonds={SINGLE_BOND: 2},
    smarts="[S;X2]"
)

# -SH: thiol group
BONDI_GROUPS[27] = BondiGroupContribution(
    "-SH", 27, 14.8, None,
    atoms={"S": 1, "H": 1},
    bonds={SINGLE_BOND: 1},
    smarts="[S;H1]"
)

# -NH2 (amino group)
BONDI_GROUPS[28] = BondiGroupContribution(
    "-NH2", 28, 10.54, 1.74,
    atoms={"N": 1, "H": 2},
    bonds={SINGLE_BOND: 1},
    smarts="[N;H2;X3]"
)

# >NH (secondary amine)
BONDI_GROUPS[29] = BondiGroupContribution(
    ">NH", 29, 8.08, 0.99,
    atoms={"N": 1, "H": 1},
    bonds={SINGLE_BOND: 2},
    smarts="[N;H1;X3]"
)

# NX3H0 (tertiary amine)
BONDI_GROUPS[30] = BondiGroupContribution(
    "NX3H0", 30, 4.33, 0.23,
    atoms={"N": 1},
    bonds={SINGLE_BOND: 3},
    smarts="[N;H0;X3]"
)

# -C≡N (nitrile group)
BONDI_GROUPS[31] = BondiGroupContribution(
    "-C≡N", 31, 14.70, 2.19,
    atoms={"C": 1, "N": 1},
    bonds={TRIPLE_BOND: 1, DOUBLE_BOND: 1},
    smarts="[C;X2;H0]#[N;H0;X1]"
)

# -NO2 (nitro group)
BONDI_GROUPS[32] = BondiGroupContribution(
    "-NO2", 32, 16.8, 2.55,
    atoms={"N": 1, "O": 2},
    bonds={SINGLE_BOND: 2, DOUBLE_BOND: 1},
    smarts="[$([NX3](=O)=O),$([NX3+](=O)[O-])][!#8]"
)

# Phosphorous not sure what they tried to draw, looks wrong, has only Vw

# Fluorine group definitions

# -F (pr): primary aliphatic fluorine, attached to an alkane in the primary position
BONDI_GROUPS[33] = BondiGroupContribution(
    "-F (pr)", 33, 5.72, 1.10,
    atoms={"F": 1},
    bonds={SINGLE_BOND: 1},
    smarts="[F;X1;R0]"
)

# -F (s,t): secondary or tertiary aliphatic fluorine, attached to alkane
BONDI_GROUPS[34] = BondiGroupContribution(
    "-F (s,t)", 34, 6.20, 1.18,
    atoms={"F": 1},
    bonds={SINGLE_BOND: 1},
    smarts="[F;X1;R0;$(C([#6,#1])([#6,#1]))]"
)

# -F (p): per- or polyhalide of an alkane
BONDI_GROUPS[35] = BondiGroupContribution(
    "-F (p)", 35, 6.00, 1.15,
    atoms={"F": 1},
    bonds={SINGLE_BOND: 1},
    smarts="[F$([*;!R]C([F,Cl,Br,I])[#6,F,Cl,Br,I])]"
)

# -F (ph): phenyl fluorine, attached to phenyl ring
BONDI_GROUPS[36] = BondiGroupContribution(
    "-F (ph)", 36, 5.80, 1.10,
    atoms={"F": 1},
    bonds={SINGLE_BOND: 1},
    smarts="[F;$(c1ccccc1)]"
)

# Chlorine group definitions

# -Cl (pr): primary aliphatic chlorine, attached to an alkane in the primary position
BONDI_GROUPS[37] = BondiGroupContribution(
    "-Cl (pr)", 37, 11.62, 1.80,
    atoms={"Cl": 1},
    bonds={SINGLE_BOND: 1},
    smarts="[Cl;X1;R0]"
)

# -Cl (s,t,p): secondary, tertiary, or per/polyhalide of an alkane
BONDI_GROUPS[38] = BondiGroupContribution(
    "-Cl (s,t,p)", 38, 12.24, 1.82,
    atoms={"Cl": 1},
    bonds={SINGLE_BOND: 1},
    smarts=[
        "[Cl;X1;R0;$(C([#6,#1])([#6,#1]))]",
        "[Cl$([*;!R]C([F,Cl,Br,I])[#6,F,Cl,Br,I])]"
    ]
)

# -Cl (v): chlorine attached to a vinyl group
BONDI_GROUPS[39] = BondiGroupContribution(
    "-Cl (v)", 39, 11.65, 1.80,
    atoms={"Cl": 1},
    bonds={SINGLE_BOND: 1},
    smarts="[Cl;$([Cl]C=C)]"
)

# -Cl (ph): phenyl chlorine, attached to phenyl ring
BONDI_GROUPS[40] = BondiGroupContribution(
    "-Cl (ph)", 40, 12.0, 1.81,
    atoms={"Cl": 1},
    bonds={SINGLE_BOND: 1},
    smarts="[Cl;$(c1ccccc1)]"
)

# Bromine group definitions

# -Br (pr): primary aliphatic bromine, attached to an alkane in the primary position
BONDI_GROUPS[41] = BondiGroupContribution(
    "-Br (pr)", 41, 14.40, 2.08,
    atoms={"Br": 1},
    bonds={SINGLE_BOND: 1},
    smarts="[Br;X1;R0]"
)

# -Br (s,t,p): secondary, tertiary, or per/polyhalide of an alkane
BONDI_GROUPS[42] = BondiGroupContribution(
    "-Br (s,t,p)", 42, 14.60, 2.09,
    atoms={"Br": 1},
    bonds={SINGLE_BOND: 1},
    smarts=[
        "[Br;X1;R0;$(C([#6,#1])([#6,#1]))]",
        "[Br$([*;!R]C([F,Cl,Br,I])[#6,F,Cl,Br,I])]"
    ]
)

# -Br (ph): phenyl bromine, attached to phenyl ring
BONDI_GROUPS[43] = BondiGroupContribution(
    "-Br (ph)", 43, 15.12, 2.13,
    atoms={"Br": 1},
    bonds={SINGLE_BOND: 1},
    smarts="[Br;$(c1ccccc1)]"
)

# Iodine group definitions

# -I (pr): primary aliphatic iodine, attached to an alkane in the primary position
BONDI_GROUPS[44] = BondiGroupContribution(
    "-I (pr)", 44, 19.18, 2.48,
    atoms={"I": 1},
    bonds={SINGLE_BOND: 1},
    smarts="[I;X1;R0]"
)

# -I (s,t,p): secondary, tertiary, or per/polyhalide of an alkane
BONDI_GROUPS[45] = BondiGroupContribution(
    "-I (s,t,p)", 45, 20.35, 2.54,
    atoms={"I": 1},
    bonds={SINGLE_BOND: 1},
    smarts=[
        "[I;X1;R0;$(C([#6,#1])([#6,#1]))]",
        "[I$([*;!R]C([F,Cl,Br,I])[#6,F,Cl,Br,I])]"
    ]
)

# -I (ph): phenyl iodine, attached to phenyl ring
BONDI_GROUPS[46] = BondiGroupContribution(
    "-I (ph)", 46, 19.64, 2.51,
    atoms={"I": 1},
    bonds={SINGLE_BOND: 1},
    smarts="[I;$(c1ccccc1)]"
)

# TABLE XVII in van der Waals Volumes and Radii, 1964

# Generic groups for database storage, used in complex calculations

# Trans-condensed and free cyclopentyl/cyclohexyl rings
BONDI_GROUPS[47] = BondiGroupContribution(
    "Trans-condensed/free cycloalkyl", 47, -1.14, -0.57
)
# Cis-condensed cyclic naphthenes
BONDI_GROUPS[48] = BondiGroupContribution(
    "Cis-condensed naphthenes", 48, -2.50, -1.2
)

# Methylene rings condensed to benzene or other aromatic ring systems
BONDI_GROUPS[49] = BondiGroupContribution(
    "Methylene rings condensed to aromatics", 49, -1.66, -0.7
)
# Dioxane rings
BONDI_GROUPS[50] = BondiGroupContribution(
    "Dioxane rings", 50, -1.70, -0.7
)

# Single bonds between conjugated double bonds
BONDI_GROUPS[51] = BondiGroupContribution(
    "Conjugation interrupting bonds", 51, -0.25, None
)

# Single bonds adjacent to carboxyl or amide groups
BONDI_GROUPS[52] = BondiGroupContribution(
    "Bonds near acid/amides", 52, -0.22, None
)



for group in BONDI_GROUPS.values():
    BONDI_GROUPS_BY_ID[group.group_id] = group
catalog = BONDI_GROUPS.values()

BONDI_SUBGROUPS = [BONDI_GROUPS[i] for i in [k for k in BONDI_GROUPS.keys()]]

def count_bonds_near_acid_amide(mol):
    """Count single bonds immediately adjacent to carboxyl or amide groups.
    Each such bond contributes a decrement of 0.22 to Vw.
    """
    from rdkit import Chem
    # Get all atoms in carboxyl and amide groups
    carboxyl_matches = identify_functional_group_atoms(mol, FG_CARBOXYLIC_ACID)
    amide_matches = identify_functional_group_atoms(mol, FG_AMIDE)

    # Combine all group atoms into a set
    group_atoms = set()
    for match in carboxyl_matches + amide_matches:
        group_atoms.update(match)

    # Count qualifying single bonds
    count = 0
    for atom_idx in group_atoms:
        atom = mol.GetAtomWithIdx(atom_idx)
        for neighbor in atom.GetNeighbors():
            neighbor_idx = neighbor.GetIdx()
            # Only count bonds to atoms outside the functional group
            if neighbor_idx not in group_atoms:
                bond = mol.GetBondBetweenAtoms(atom_idx, neighbor_idx)
                if bond.GetBondType() == Chem.rdchem.BondType.SINGLE:
                    count += 1

    return count

def count_dioxane_rings(mol):
    """Count dioxane rings for Bondi decrements.
    Each dioxane ring contributes a decrement of 1.70 to Vw and 0.7 to Aw.
    """
    # Dioxane ring has exactly 2 oxygens and 4 carbons
    return count_rings_by_atom_counts(mol, {"O": 2, "C": 4})

def count_conjugation_interrupting_bonds(mol):
    """Count single bonds between conjugated double bonds.
    Each such bond contributes a decrement of 0.25 to Vw.

    Parameters
    ----------
    mol : rdkit.Chem.rdchem.Mol
        Molecule to analyze

    Returns
    -------
    int
        Number of single bonds between conjugated double bonds
    """
    conjugated_systems = identify_conjugated_bonds(mol)
    return len(conjugated_systems)

def count_cis_condensed_naphthenes(mol):
    """Count rings that are cis-condensed cyclic naphthenes.
    Each cis-condensed ring contributes a decrement of 2.50 to Vw and 1.2 to Aw.
    """
    # descriptions are very vague, unlikely to get clarification 60 years later
    # does not seem to work with rdkit
    ring_info = mol.GetRingInfo()
    ring_atoms = ring_info.AtomRings()

    cis_fusions = 0
    for i, ring1 in enumerate(ring_atoms):
        for j, ring2 in enumerate(ring_atoms[i+1:], i+1):
            # Find shared atoms at fusion point
            shared = set(ring1) & set(ring2)
            if len(shared) == 2:  # Standard fusion point
                # Get the two fusion atoms
                fusion_atoms = sorted(shared)
                # Check their stereochemistry
                atom1 = mol.GetAtomWithIdx(fusion_atoms[0])
                atom2 = mol.GetAtomWithIdx(fusion_atoms[1])
                if atom1.GetChiralTag() == atom2.GetChiralTag():
                    cis_fusions += 1

    return cis_fusions

def count_transcondensed_and_free_cycloalkyl(mol):
    """Count cyclopentyl and cyclohexyl rings that are either:
    1. Free (not connected to other rings)
    2. Part of trans-condensed ring systems

    Each ring contributes a decrement of 1.14 to Vw and 0.57 to Aw.
    """
    # descriptions are very vague, unlikely to get clarification 60 years later
    # best effort implementation
    from thermo.functional_groups import is_cycloalkane
    if not is_cycloalkane(mol):
        return 0
    ring_info = mol.GetRingInfo()
    ring_atoms = ring_info.AtomRings()
    if len(ring_atoms) == 1:
        return 0

    count = 0

    # Check each ring
    for i, ring in enumerate(ring_atoms):
        # Skip if not 5 or 6-membered
        if len(ring) not in [5, 6]:
            continue

        # Check if it's all carbons (cyclopentyl/cyclohexyl)
        if not all(mol.GetAtomWithIdx(idx).GetSymbol() == "C" for idx in ring):
            continue

        # Find other rings this ring is fused to
        fused_to = []
        for j, other_ring in enumerate(ring_atoms):
            if i != j:
                shared = set(ring) & set(other_ring)
                if len(shared) == 2:  # Standard fusion point
                    fused_to.append((other_ring, shared))

        if not fused_to:
            # This is a free cycloalkyl ring, count it and move to next ring
            count += 1
            continue

        # Check if all fusions to this ring are trans (different)
        all_trans = True
        for other_ring, shared in fused_to:
            # Get the fusion atoms
            fusion_atoms = sorted(shared)
            # Check stereochemistry
            atom1 = mol.GetAtomWithIdx(fusion_atoms[0])
            atom2 = mol.GetAtomWithIdx(fusion_atoms[1])
            if atom1.GetChiralTag() != atom2.GetChiralTag():
                all_trans = False
                break

        if all_trans and fused_to:
            count += 1

    return count

# checked with various groups
def find_methylene_rings_condensed_to_aromatic_rings(mol):
    r"""Given a `rdkit.Chem.rdchem.Mol` object, find all methylene rings that are
    condensed to benzene rings. A methylene ring is defined as a saturated ring where
    all non-shared atoms are CH2 groups.

    Parameters
    ----------
    mol : rdkit.Chem.rdchem.Mol
        Molecule to analyze

    Returns
    -------
    list
        List of tuples containing the atom indices for each unique methylene ring
        condensed to benzene. The indices are sorted for uniqueness.

    Examples
    --------
    >>> from rdkit.Chem import MolFromSmiles # doctest:+SKIP
    >>> # Indane has one methylene ring
    >>> find_methylene_rings_condensed_to_aromatic_rings(MolFromSmiles('C1CC2=CC=CC=C2C1')) # doctest:+SKIP
    [(0, 1, 2, 7, 8)]
    >>> # Tetralin also has one methylene ring
    >>> find_methylene_rings_condensed_to_aromatic_rings(MolFromSmiles('C1CCC2=CC=CC=C2C1')) # doctest:+SKIP
    [(0, 1, 2, 3, 8, 9)]
    """
    ring_info = mol.GetRingInfo()
    atom_rings = ring_info.AtomRings()

    # Convert to sets for easier intersection operations
    ring_sets = [set(r) for r in atom_rings]

    # Find aromatic rings (benzene rings)
    aromatic_rings = []
    for i, ring in enumerate(ring_sets):
        if all(mol.GetAtomWithIdx(idx).GetIsAromatic() for idx in ring):
            aromatic_rings.append(i)

    # Find potential methylene rings (non-aromatic) condensed to benzene
    methylene_ring_candidates = []
    for arom_idx in aromatic_rings:
        arom_ring = ring_sets[arom_idx]
        for i, ring in enumerate(ring_sets):
            if i == arom_idx:
                continue

            # Check if rings share exactly 2 atoms
            shared = arom_ring.intersection(ring)
            if len(shared) != 2:
                continue

            # Check if shared atoms are adjacent in both rings
            # Convert shared atoms to list for indexing
            shared_list = list(shared)
            for atom1 in shared_list:
                for atom2 in shared_list:
                    if atom1 >= atom2:
                        continue
                    # Check if atoms are bonded
                    bond = mol.GetBondBetweenAtoms(atom1, atom2)
                    if bond is not None:
                        # Quick check - does this ring have at least one CH2?
                        has_ch2 = False
                        for atom_idx in ring:
                            atom = mol.GetAtomWithIdx(atom_idx)
                            if (atom.GetSymbol() == "C" and
                                atom.GetTotalNumHs() == 2 and
                                atom.GetDegree() == 2):
                                has_ch2 = True
                                break
                        if has_ch2:
                            methylene_ring_candidates.append(ring)
                        break
    # Now check each candidate ring to see if it's a true methylene ring
#     print(methylene_ring_candidates, 'methylene_ring_candidates')
    methylene_rings = []
    for ring in methylene_ring_candidates:
        is_methylene = True
        for atom_idx in ring:
            atom = mol.GetAtomWithIdx(atom_idx)
            # Skip the shared atoms (which will be aromatic)
            if atom.GetIsAromatic():
                continue
            # Check if it's a CH2 group:
            # - Must be carbon
            # - Must have exactly 2 hydrogens
            # - Must have exactly 2 bonds (to other ring carbons)
            if (atom.GetSymbol() != "C" or
                atom.GetTotalNumHs() != 2 or
                atom.GetDegree() != 2):
                is_methylene = False
                break

        if is_methylene:
            # Add sorted tuple of atom indices for uniqueness
            methylene_rings.append(tuple(sorted(ring)))

    # Remove any duplicates and return
    return sorted(set(methylene_rings))

def bondi_fragmentation(rdkitmol):
    """
    Fragment an RDKit molecule into Bondi group contributions.

    Parameters
    ----------
    rdkitmol : rdkit.Chem.rdchem.Mol
        RDKit molecule object representing the chemical structure.

    Returns
    -------
    dict
        Dictionary with group_id as keys and their corresponding counts.
    bool
        Success status of the fragmentation process.
    """
    # Perform SMARTS-based fragmentation using the hardcoded catalog
    assignment, _, _, success, _ = smarts_fragment_priority(catalog=BONDI_SUBGROUPS, rdkitmol=rdkitmol)
    if not success:
        return {}, False

    # Dictionary of functions to count special groups
    count_functions = {
        47: lambda: count_transcondensed_and_free_cycloalkyl(rdkitmol),  # Trans-condensed/free cycloalkyl
        48: lambda: count_cis_condensed_naphthenes(rdkitmol),  # Cis-condensed cyclic naphthenes
        49: lambda: len(find_methylene_rings_condensed_to_aromatic_rings(rdkitmol)),  # Methylene rings condensed to aromatics
        50: lambda: count_dioxane_rings(rdkitmol),  # Dioxane rings
        51: lambda: count_conjugation_interrupting_bonds(rdkitmol),  # Conjugation interrupting bonds
        52: lambda: count_bonds_near_acid_amide(rdkitmol)  # Bonds near acid/amides
    }

    # Add counts only if they're non-zero
    for group_id, count_func in count_functions.items():
        count = count_func()
        if count > 0:
            assignment[group_id] = count

    return assignment, True

[docs] def bondi_van_der_waals_surface_area_volume(rdkitmol): """ Calculate the van der Waals volume `V_vdw` and surface area `A_vdw` for a given molecule using Bondi group contributions. This function identifies the Bondi groups present in the molecule using SMARTS-based fragmentation and computes the total van der Waals volume and surface area from the Bondi group contributions method. Parameters ---------- rdkitmol : rdkit.Chem.Mol RDKit molecule object representing the chemical structure of the molecule. Returns ------- V_vdw : float Van der Waals volume, [m^3/mol] A_vdw : float Van der Waals surface area, [m^2/mol] Raises ------ ValueError If the molecule cannot be fragmented successfully. Notes ----- The secondary contributions from Table XVII in [1]_ are: * Decrement per cyclohexyl and per cyclopentyl ring in free and transcondensed cyclic naphthenes * Decrement per ring in cis-condensed cyclic naphthenes * Decrement per methylene ring condensed to benzene or other aromatic ring system * Decrement per dioxane ring * Decrement per single bond between conjugated double bonds * Decrement per single bond adjacent to carboxyl or amide group The implementation follows Bondi's 1964 paper. Examples -------- >>> from rdkit import Chem # doctest:+SKIP >>> from thermo import Chemical # doctest:+SKIP >>> mol = Chemical('decane').rdkitmol # doctest:+SKIP >>> bondi_van_der_waals_surface_area_volume(mol) # doctest:+SKIP (0.00010918, 1504000.0) References ---------- .. [1] Bondi, A. "Van Der Waals Volumes and Radii." The Journal of Physical Chemistry 68, no. 3 (March 1, 1964): 441-451. https://doi.org/10.1021/j100785a001. """ # Fragment the molecule into Bondi group contributions assignment, success = bondi_fragmentation(rdkitmol) if not success: raise ValueError("Could not fragment molecule") V_vdw = 0.0 A_vdw = 0.0 # Calculate primary group contributions for group_id, count in assignment.items(): group = BONDI_GROUPS.get(group_id) if group: V_vdw += group.Vw * count if group.Aw is not None: A_vdw += group.Aw * count # Convert units: # V_vdw: cm³/mol -> m³/mol # A_vdw: cm²/mol x 10⁹ -> m²/mol V_vdw *= 1e-6 A_vdw *= 0.0001 * 1e9 return V_vdw, A_vdw
def R_Q_from_bondi(rdkitmol): V_vdw, A_vdw = bondi_van_der_waals_surface_area_volume(rdkitmol) R = R_from_Van_der_Waals_volume(V_vdw) Q = Q_from_Van_der_Waals_area(A_vdw) return R, Q
[docs] def R_from_Van_der_Waals_volume(V_vdw): r"""Calculates the UNIFAC R parameter from a species' Van der Waals molar volume. .. math:: R_k = \frac{V_{wk}}{15.17} Parameters ---------- V_vdw : float Unnormalized Van der Waals volume, [m^3/mol] Returns ------- R : float R UNIFAC parameter (normalized Van der Waals Volume) [-] Examples -------- >>> R_from_Van_der_Waals_volume(6.826196599999999e-05) 4.4998 Notes ----- This function is the inverse of Van_der_Waals_volume(). """ return V_vdw / 1.517e-05
[docs] def Q_from_Van_der_Waals_area(A_vdw): r"""Calculates the UNIFAC Q parameter from a species' Van der Waals molar surface area. .. math:: Q_k = \frac{A_{wk}}{2.5 \times 10^9} Parameters ---------- A_vdw : float Unnormalized Van der Waals surface area, [m^2/mol] Returns ------- Q : float Q UNIFAC parameter (normalized Van der Waals Area) [-] Examples -------- >>> Q_from_Van_der_Waals_area(964000.0) 3.856 Notes ----- This function is the inverse of Van_der_Waals_area(). """ return A_vdw / 250000.0