Skip to content

datamol.descriptors

Various molecular descriptors

compute_many_descriptors(mol, properties_fn=None, add_properties=True)

Compute a list of opiniated molecular properties.

Parameters:

Name Type Description Default
mol Mol

A molecule.

required
properties_fn Dict[str, Union[Callable, str]]

A list of functions that compute properties. If None, a default list of properties is used. If the function is a string, dm.descriptors.any_descriptor() is used to retrieve the descriptor function.

None
add_properties bool

Whether to add the computed properties to the default list.

True

Returns:

Type Description
dict

Computed properties as a dict.

Source code in datamol/descriptors.py
def compute_many_descriptors(
    mol: Mol,
    properties_fn: Dict[str, Union[Callable, str]] = None,
    add_properties: bool = True,
) -> dict:
    """Compute a list of opiniated molecular properties.

    Args:
        mol: A molecule.
        properties_fn: A list of functions that compute properties. If None,
            a default list of properties is used. If the function is a string,
            `dm.descriptors.any_descriptor()` is used to retrieve the descriptor
            function.
        add_properties: Whether to add the computed properties to the default list.

    Returns:
        Computed properties as a dict.
    """

    if properties_fn is None:
        properties_fn = _DEFAULT_PROPERTIES_FN
    elif add_properties:
        [properties_fn.setdefault(k, v) for k, v in _DEFAULT_PROPERTIES_FN.items()]

    props = {}
    for k, v in properties_fn.items():

        if isinstance(v, str):
            v = any_descriptor(v)

        props[k] = v(mol)

    return props

batch_compute_many_descriptors(mols, properties_fn=None, add_properties=True, n_jobs=1, batch_size=None, progress=False, progress_leave=True)

Compute a list of opiniated molecular properties on a list of molecules.

Parameters:

Name Type Description Default
mols List[rdkit.Chem.rdchem.Mol]

A list of molecules.

required
properties_fn Dict[str, Union[Callable, str]]

A list of functions that compute properties. If None, a default list of properties is used. If the function is a string, dm.descriptors.any_descriptor() is used to retrieve the descriptor function.

None
add_properties bool

Whether to add the computed properties to the default list.

True

Returns:

Type Description
DataFrame

A dataframe of computed properties with one row per input molecules.

Source code in datamol/descriptors.py
def batch_compute_many_descriptors(
    mols: List[Mol],
    properties_fn: Dict[str, Union[Callable, str]] = None,
    add_properties: bool = True,
    n_jobs: int = 1,
    batch_size: int = None,
    progress: bool = False,
    progress_leave: bool = True,
) -> pd.DataFrame:
    """Compute a list of opiniated molecular properties on a list of molecules.

    Args:
        mols: A list of molecules.
        properties_fn: A list of functions that compute properties. If None,
            a default list of properties is used. If the function is a string,
            `dm.descriptors.any_descriptor()` is used to retrieve the descriptor
            function.
        add_properties: Whether to add the computed properties to the default list.

    Returns:
        A dataframe of computed properties with one row per input molecules.
    """

    compute_fn = functools.partial(
        compute_many_descriptors,
        properties_fn=properties_fn,
        add_properties=add_properties,
    )

    props = parallelized(
        compute_fn,
        mols,
        batch_size=batch_size,
        progress=progress,
        n_jobs=n_jobs,
        tqdm_kwargs=dict(leave=progress_leave),
    )
    return pd.DataFrame(props)

mw

CalcExactMolWt( (Mol)mol [, (bool)onlyHeavy=False]) -> float : returns the molecule's exact molecular weight

C++ signature :
    double CalcExactMolWt(RDKit::ROMol [,bool=False])

fsp3

CalcFractionCSP3( (Mol)mol) -> float : returns the fraction of C atoms that are SP3 hybridized

C++ signature :
    double CalcFractionCSP3(RDKit::ROMol)

n_hba

CalcNumHBA( (Mol)mol) -> int : returns the number of H-bond acceptors for a molecule

C++ signature :
    unsigned int CalcNumHBA(RDKit::ROMol)

n_hbd

CalcNumHBD( (Mol)mol) -> int : returns the number of H-bond donors for a molecule

C++ signature :
    unsigned int CalcNumHBD(RDKit::ROMol)

n_rings

CalcNumRings( (Mol)mol) -> int : returns the number of rings for a molecule

C++ signature :
    unsigned int CalcNumRings(RDKit::ROMol)

n_hetero_atoms

CalcNumHeteroatoms( (Mol)mol) -> int : returns the number of heteroatoms for a molecule

C++ signature :
    unsigned int CalcNumHeteroatoms(RDKit::ROMol)

n_heavy_atoms(mol)

Number of heavy atoms a molecule.

Source code in rdkit/Chem/Lipinski.py
def HeavyAtomCount(mol):
  " Number of heavy atoms a molecule."
  return mol.GetNumHeavyAtoms()

n_rotatable_bonds

CalcNumRotatableBonds( (Mol)mol, (bool)strict) -> int : returns the number of rotatable bonds for a molecule. strict = NumRotatableBondsOptions.NonStrict - Simple rotatable bond definition. strict = NumRotatableBondsOptions.Strict - (default) does not count things like amide or ester bonds strict = NumRotatableBondsOptions.StrictLinkages - handles linkages between ring systems. - Single bonds between aliphatic ring Cs are always rotatable. This means that the central bond in CC1CCCC(C)C1-C1C(C)CCCC1C is now considered rotatable; it was not before - Heteroatoms in the linked rings no longer affect whether or not the linking bond is rotatable - the linking bond in systems like Cc1cccc(C)c1-c1c(C)cccc1 is now considered non-rotatable

C++ signature :
    unsigned int CalcNumRotatableBonds(RDKit::ROMol,bool)

CalcNumRotatableBonds( (Mol)mol [, (NumRotatableBondsOptions)strict=rdkit.Chem.rdMolDescriptors.NumRotatableBondsOptions.Default]) -> int : returns the number of rotatable bonds for a molecule. strict = NumRotatableBondsOptions.NonStrict - Simple rotatable bond definition. strict = NumRotatableBondsOptions.Strict - (default) does not count things like amide or ester bonds strict = NumRotatableBondsOptions.StrictLinkages - handles linkages between ring systems. - Single bonds between aliphatic ring Cs are always rotatable. This means that the central bond in CC1CCCC(C)C1-C1C(C)CCCC1C is now considered rotatable; it was not before - Heteroatoms in the linked rings no longer affect whether or not the linking bond is rotatable - the linking bond in systems like Cc1cccc(C)c1-c1c(C)cccc1 is now considered non-rotatable

C++ signature :
    unsigned int CalcNumRotatableBonds(RDKit::ROMol [,RDKit::Descriptors::NumRotatableBondsOptions=rdkit.Chem.rdMolDescriptors.NumRotatableBondsOptions.Default])

n_aliphatic_rings(x, y=<Boost.Python.function object at 0x562ec5af9b00>)

CalcNumAliphaticRings( (Mol)mol) -> int : returns the number of aliphatic (containing at least one non-aromatic bond) rings for a molecule

C++ signature :
    unsigned int CalcNumAliphaticRings(RDKit::ROMol)
Source code in rdkit/Chem/Lipinski.py
_fn = lambda x, y=_cfn: y(x)

n_aromatic_rings(x, y=<Boost.Python.function object at 0x562ec5af97f0>)

CalcNumAromaticRings( (Mol)mol) -> int : returns the number of aromatic rings for a molecule

C++ signature :
    unsigned int CalcNumAromaticRings(RDKit::ROMol)
Source code in rdkit/Chem/Lipinski.py
_fn = lambda x, y=_cfn: y(x)

n_saturated_rings(x, y=<Boost.Python.function object at 0x562ec5af9860>)

CalcNumSaturatedRings( (Mol)mol) -> int : returns the number of saturated rings for a molecule

C++ signature :
    unsigned int CalcNumSaturatedRings(RDKit::ROMol)
Source code in rdkit/Chem/Lipinski.py
_fn = lambda x, y=_cfn: y(x)

n_radical_electrons(mol)

The number of radical electrons the molecule has (says nothing about spin state)

NumRadicalElectrons(Chem.MolFromSmiles('CC')) 0 NumRadicalElectrons(Chem.MolFromSmiles('C[CH3]')) 0 NumRadicalElectrons(Chem.MolFromSmiles('C[CH2]')) 1 NumRadicalElectrons(Chem.MolFromSmiles('C[CH]')) 2 NumRadicalElectrons(Chem.MolFromSmiles('C[C]')) 3

Source code in rdkit/Chem/Descriptors.py
def NumRadicalElectrons(mol):
  """ The number of radical electrons the molecule has
      (says nothing about spin state)

    >>> NumRadicalElectrons(Chem.MolFromSmiles('CC'))
    0
    >>> NumRadicalElectrons(Chem.MolFromSmiles('C[CH3]'))
    0
    >>> NumRadicalElectrons(Chem.MolFromSmiles('C[CH2]'))
    1
    >>> NumRadicalElectrons(Chem.MolFromSmiles('C[CH]'))
    2
    >>> NumRadicalElectrons(Chem.MolFromSmiles('C[C]'))
    3

    """
  return sum(atom.GetNumRadicalElectrons() for atom in mol.GetAtoms())

tpsa

CalcTPSA( (Mol)mol [, (bool)force=False [, (bool)includeSandP=False]]) -> float : returns the TPSA value for a molecule

C++ signature :
    double CalcTPSA(RDKit::ROMol [,bool=False [,bool=False]])

qed(mol, w=QEDproperties(MW=0.66, ALOGP=0.46, HBA=0.05, HBD=0.61, PSA=0.06, ROTB=0.65, AROM=0.48, ALERTS=0.95), qedProperties=None)

Calculate the weighted sum of ADS mapped properties

some examples from the QED paper, reference values from Peter G's original implementation

m = Chem.MolFromSmiles('N=C(CCSCc1csc(N=C(N)N)n1)NS(N)(=O)=O') qed(m) 0.253... m = Chem.MolFromSmiles('CNC(=NCCSCc1nc[nH]c1C)NC#N') qed(m) 0.234... m = Chem.MolFromSmiles('CCCCCNC(=N)NN=Cc1c[nH]c2ccc(CO)cc12') qed(m) 0.234...

Source code in rdkit/Chem/QED.py
@setDescriptorVersion(version='1.1.0')
def qed(mol, w=WEIGHT_MEAN, qedProperties=None):
  """ Calculate the weighted sum of ADS mapped properties

  some examples from the QED paper, reference values from Peter G's original implementation
  >>> m = Chem.MolFromSmiles('N=C(CCSCc1csc(N=C(N)N)n1)NS(N)(=O)=O')
  >>> qed(m)
  0.253...
  >>> m = Chem.MolFromSmiles('CNC(=NCCSCc1nc[nH]c1C)NC#N')
  >>> qed(m)
  0.234...
  >>> m = Chem.MolFromSmiles('CCCCCNC(=N)NN=Cc1c[nH]c2ccc(CO)cc12')
  >>> qed(m)
  0.234...
  """
  if qedProperties is None:
      qedProperties = properties(mol)
  d = [ads(pi, adsParameters[name]) for name, pi in qedProperties._asdict().items()]
  t = sum(wi * math.log(di) for wi, di in zip(w, d))
  return math.exp(t / sum(w))

clogp(*x, **y)

Wildman-Crippen LogP value

Uses an atom-based scheme based on the values in the paper: S. A. Wildman and G. M. Crippen JCICS 39 868-873 (1999)

Arguments

- inMol: a molecule

- addHs: (optional) toggles adding of Hs to the molecule for the calculation. If true, hydrogens will be added to the molecule and used in the calculation.

Source code in rdkit/Chem/Crippen.py
MolLogP = lambda *x, **y: rdMolDescriptors.CalcCrippenDescriptors(*x, **y)[0]

sas(m)

Source code in SA_Score/sascorer.py
def calculateScore(m):
    if _fscores is None:
        readFragmentScores()

    # fragment score
    fp = rdMolDescriptors.GetMorganFingerprint(m,
                                               2)  # <- 2 is the *radius* of the circular fingerprint
    fps = fp.GetNonzeroElements()
    score1 = 0.
    nf = 0
    for bitId, v in fps.items():
        nf += v
        sfp = bitId
        score1 += _fscores.get(sfp, -4) * v
    score1 /= nf

    # features score
    nAtoms = m.GetNumAtoms()
    nChiralCenters = len(Chem.FindMolChiralCenters(m, includeUnassigned=True))
    ri = m.GetRingInfo()
    nBridgeheads, nSpiro = numBridgeheadsAndSpiro(m, ri)
    nMacrocycles = 0
    for x in ri.AtomRings():
        if len(x) > 8:
            nMacrocycles += 1

    sizePenalty = nAtoms**1.005 - nAtoms
    stereoPenalty = math.log10(nChiralCenters + 1)
    spiroPenalty = math.log10(nSpiro + 1)
    bridgePenalty = math.log10(nBridgeheads + 1)
    macrocyclePenalty = 0.
    # ---------------------------------------
    # This differs from the paper, which defines:
    #  macrocyclePenalty = math.log10(nMacrocycles+1)
    # This form generates better results when 2 or more macrocycles are present
    if nMacrocycles > 0:
        macrocyclePenalty = math.log10(2)

    score2 = 0. - sizePenalty - stereoPenalty - spiroPenalty - bridgePenalty - macrocyclePenalty

    # correction for the fingerprint density
    # not in the original publication, added in version 1.1
    # to make highly symmetrical molecules easier to synthetise
    score3 = 0.
    if nAtoms > len(fps):
        score3 = math.log(float(nAtoms) / len(fps)) * .5

    sascore = score1 + score2 + score3

    # need to transform "raw" value into scale between 1 and 10
    min = -4.0
    max = 2.5
    sascore = 11. - (sascore - min + 1) / (max - min) * 9.
    # smooth the 10-end
    if sascore > 8.:
        sascore = 8. + math.log(sascore + 1. - 9.)
    if sascore > 10.:
        sascore = 10.0
    elif sascore < 1.:
        sascore = 1.0

    return sascore