Source code for selfies.bond_constraints

import functools
from itertools import product
from typing import Dict, Set, Union

from selfies.constants import ELEMENTS, INDEX_ALPHABET

_DEFAULT_CONSTRAINTS = {
    "H": 1, "F": 1, "Cl": 1, "Br": 1, "I": 1,
    "B": 3, "B+1": 2, "B-1": 4,
    "O": 2, "O+1": 3, "O-1": 1,
    "N": 3, "N+1": 4, "N-1": 2,
    "C": 4, "C+1": 5, "C-1": 3,
    "P": 5, "P+1": 6, "P-1": 4,
    "S": 6, "S+1": 7, "S-1": 5,
    "?": 8
}

_PRESET_CONSTRAINTS = {
    "default": dict(_DEFAULT_CONSTRAINTS),
    "octet_rule": dict(_DEFAULT_CONSTRAINTS),
    "hypervalent": dict(_DEFAULT_CONSTRAINTS)
}
_PRESET_CONSTRAINTS["octet_rule"].update(
    {"S": 2, "S+1": 3, "S-1": 1, "P": 3, "P+1": 4, "P-1": 2}
)
_PRESET_CONSTRAINTS["hypervalent"].update(
    {"Cl": 7, "Br": 7, "I": 7, "N": 5}
)

_current_constraints = _PRESET_CONSTRAINTS["default"]


[docs]def get_preset_constraints(name: str) -> Dict[str, int]: """Returns the preset semantic constraints with the given name. Besides the aforementioned default constraints, :mod:`selfies` offers other preset constraints for convenience; namely, constraints that enforce the `octet rule <https://en.wikipedia.org/wiki/Octet_rule>`_ and constraints that accommodate `hypervalent molecules <https://en.wikipedia.org/wiki/Hypervalent_molecule>`_. The differences between these constraints can be summarized as follows: .. table:: :align: center :widths: auto +-----------------+-----------+---+---+-----+-----+---+-----+-----+ | | Cl, Br, I | N | P | P+1 | P-1 | S | S+1 | S-1 | +-----------------+-----------+---+---+-----+-----+---+-----+-----+ | ``default`` | 1 | 3 | 5 | 6 | 4 | 6 | 7 | 5 | +-----------------+-----------+---+---+-----+-----+---+-----+-----+ | ``octet_rule`` | 1 | 3 | 3 | 4 | 2 | 2 | 3 | 1 | +-----------------+-----------+---+---+-----+-----+---+-----+-----+ | ``hypervalent`` | 7 | 5 | 5 | 6 | 4 | 6 | 7 | 5 | +-----------------+-----------+---+---+-----+-----+---+-----+-----+ :param name: the preset name: ``default`` or ``octet_rule`` or ``hypervalent``. :return: the preset constraints with the specified name, represented as a dictionary which maps atoms (the keys) to their bonding capacities (the values). """ if name not in _PRESET_CONSTRAINTS: raise ValueError("unrecognized preset name '{}'".format(name)) return dict(_PRESET_CONSTRAINTS[name])
[docs]def get_semantic_constraints() -> Dict[str, int]: """Returns the semantic constraints that :mod:`selfies` is currently operating on. :return: the current semantic constraints, represented as a dictionary which maps atoms (the keys) to their bonding capacities (the values). """ global _current_constraints return dict(_current_constraints)
[docs]def set_semantic_constraints( bond_constraints: Union[str, Dict[str, int]] = "default" ) -> None: """Updates the semantic constraints that :mod:`selfies` operates on. If the input is a string, the new constraints are taken to be the preset named ``bond_constraints`` (see :func:`selfies.get_preset_constraints`). Otherwise, the input is a dictionary representing the new constraints. This dictionary maps atoms (the keys) to non-negative bonding capacities (the values); the atoms are specified by strings of the form ``E`` or ``E+C`` or ``E-C``, where ``E`` is an element symbol and ``C`` is a positive integer. For example, one may have: * ``bond_constraints["I-1"] = 0`` * ``bond_constraints["C"] = 4`` This dictionary must also contain the special ``?`` key, which indicates the bond capacities of all atoms that are not explicitly listed in the dictionary. :param bond_constraints: the name of a preset, or a dictionary representing the new semantic constraints. :return: ``None``. """ global _current_constraints if isinstance(bond_constraints, str): _current_constraints = get_preset_constraints(bond_constraints) elif isinstance(bond_constraints, dict): # error checking if "?" not in bond_constraints: raise ValueError("bond_constraints missing '?' as a key") for key, value in bond_constraints.items(): # error checking for keys j = max(key.find("+"), key.find("-")) if key == "?": valid = True elif j == -1: valid = (key in ELEMENTS) else: valid = (key[:j] in ELEMENTS) and key[j + 1:].isnumeric() if not valid: err_msg = "invalid key '{}' in bond_constraints".format(key) raise ValueError(err_msg) # error checking for values if not (isinstance(value, int) and value >= 0): err_msg = "invalid value at " \ "bond_constraints['{}'] = {}".format(key, value) raise ValueError(err_msg) _current_constraints = dict(bond_constraints) else: raise ValueError("bond_constraints must be a str or dict") # clear cache since we changed alphabet get_semantic_robust_alphabet.cache_clear() get_bonding_capacity.cache_clear()
[docs]@functools.lru_cache() def get_semantic_robust_alphabet() -> Set[str]: """Returns a subset of all SELFIES symbols that are constrained by :mod:`selfies` under the current semantic constraints. :return: a subset of all SELFIES symbols that are semantically constrained. """ alphabet_subset = set() bonds = {"": 1, "=": 2, "#": 3} # add atomic symbols for (a, c), (b, m) in product(_current_constraints.items(), bonds.items()): if (m > c) or (a == "?"): continue symbol = "[{}{}]".format(b, a) alphabet_subset.add(symbol) # add branch and ring symbols for i in range(1, 4): alphabet_subset.add("[Ring{}]".format(i)) alphabet_subset.add("[=Ring{}]".format(i)) alphabet_subset.add("[Branch{}]".format(i)) alphabet_subset.add("[=Branch{}]".format(i)) alphabet_subset.add("[#Branch{}]".format(i)) alphabet_subset.update(INDEX_ALPHABET) return alphabet_subset
@functools.lru_cache() def get_bonding_capacity(element: str, charge: int) -> int: """Returns the bonding capacity of a given atom, under the current semantic constraints. :param element: the element of the input atom. :param charge: the charge of the input atom. :return: the bonding capacity of the input atom. """ key = element if charge != 0: key += "{:+}".format(charge) if key in _current_constraints: return _current_constraints[key] else: return _current_constraints["?"]