Source code for thermo.law

'''Chemical Engineering Design Library (ChEDL). Utilities for process modeling.
Copyright (C) 2016, 2017, 2018, 2019 Caleb Bell <Caleb.Andrew.Bell@gmail.com>

Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:

The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.

THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.
'''


__all__ = [#'DSL_data', 'TSCA_data',
           #'EINECS_data', 'SPIN_data', 'NLP_data',
           'CAN_DSL_flags', 'TSCA_flags',
           'legal_status_methods',
           'legal_status', 'HPV_data', '_ECHATonnageDict', '_EPACDRDict',
           'economic_status', 'economic_status_methods', 'load_economic_data',
           'load_law_data']

import os

from chemicals.identifiers import CAS_to_int
from chemicals.utils import PY37, can_load_data, os_path_join, to_num

DSL = 'DSL'
TSCA = 'TSCA'
EINECS = 'EINECS'
NLP = 'NLP'
SPIN = 'SPIN'
COMBINED = 'COMBINED'
UNLISTED = 'UNLISTED'
LISTED = 'LISTED'




CAN_DSL_flags = {0: LISTED,
                 1: 'Non-Domestic Substances List (NDSL)',
                 2: 'Significant New Activity (SNAc)',
                 3: 'Ministerial Condition pertaining to this substance',
                 4: 'Domestic Substances List, removed (DSL_REM)',
                 5: 'Minister of the Environment has imposed a Ministerial '
                    'Prohibition pertaining to this substance'}


TSCA_flags = {
    'UV': 'Class 2 substance within the UVCB group; unknown molecular formula/structural diagram',
    'E': 'subject of a Section 5(e) Consent Order under TSCA',
    'F': 'subject of a Section 5(f) Rule under TSCA',
    'N': 'polymeric substance containing no free-radical initiator in its Inventory name but is considered to cover the designated polymer made with any free-radical initiator regardless of the amount used',
    'P': 'commenced Premanufacture Notice (PMN) substance',
    'R': 'subject of a Section 6 risk management rule under TSCA',
    'S': 'identified in a final Significant New Uses Rule',
    'SP': 'identified in a proposed Significant New Uses Rule',
    'T': 'subject of a final Section 4 test rule under TSCA',
    'TP': 'subject of a proposed Section 4 test rule under TSCA',
    'XU': 'exempt from reporting under Chemical Date Reporting Rule (formerly the Inventory Update Reporting Rule), i.e., Partial Updating of the TSCA Inventory Data Base Production and Site Reports (40 CFR 711)',
    'Y1': 'exempt polymer that has a number-average molecular weight of 1,000 or greater',
    'Y2': 'exempt polymer that is a polyester and is made only from reactants included in a specified list of low-concern reactants that comprises one of the eligibility criteria for the exemption rule'
}

[docs]def load_law_data():
    global DSL_data, TSCA_data, EINECS_data, SPIN_data, NLP_data
    try:
        TSCA_data
        return
    except:
        pass
    import pandas as pd
    folder = os_path_join(os.path.dirname(__file__), 'Law')


# Data is stored as integers to reduce memory usage
    DSL_data = pd.read_csv(os.path.join(folder, 'Canada Feb 11 2015 - DSL.csv.gz'),
                           sep='\t', index_col=0, compression='gzip')

    TSCA_data = pd.read_csv(os.path.join(folder, 'TSCA Inventory 2016-01.csv.gz'),
                           sep='\t', index_col=0, compression='gzip')


    EINECS_data = pd.read_csv(os.path.join(folder, 'EINECS 2015-03.csv.gz'),
                              index_col=0, compression='gzip')

    SPIN_data = pd.read_csv(os.path.join(folder, 'SPIN Inventory 2015-03.csv.gz'),
                           compression='gzip', index_col=0)

    NLP_data = pd.read_csv(os.path.join(folder, 'EC Inventory No Longer Polymers (NLP).csv'),
                           sep='\t', index_col=0)
    # 161162-67-6 is not a valid CAS number and was removed.


if PY37:
    def __getattr__(name):
        if name in ('DSL_data', 'TSCA_data',
                    'EINECS_data', 'SPIN_data',
                    'NLP_data'):
            load_law_data()
            return globals()[name]
        raise AttributeError(f"module {__name__} has no attribute {name}")
else: # pragma: no cover
    if can_load_data:
        load_law_data()


legal_status_methods = [COMBINED, DSL, TSCA, EINECS, SPIN, NLP]


[docs]def legal_status(CASRN, method=None, get_methods=False, CASi=None):
    r'''Looks up the legal status of a chemical according to either a specifc
    method or with all methods.

    Returns either the status as a string for a specified method, or the
    status of the chemical in all available data sources, in the format
    {source: status}.

    Parameters
    ----------
    CASRN : string
        CASRN [-]

    Returns
    -------
    status : str or dict
        Legal status information [-]
    methods : list, only returned if get_methods == True
        List of methods which can be used to obtain legal status with the
        given inputs

    Other Parameters
    ----------------
    method : string, optional
        A string for the method name to use, as defined by constants in
        legal_status_methods
    get_methods : bool, optional
        If True, function will determine which methods can be used to obtain
        the legal status for the desired chemical, and will return methods
        instead of the status
    CASi : int, optional
        CASRN as an integer, used internally [-]

    Notes
    -----
    Supported methods are:

        * **DSL**: Canada Domestic Substance List, [1]_. As extracted on Feb 11, 2015
          from a html list. This list is updated continuously, so this version
          will always be somewhat old. Strictly speaking, there are multiple
          lists but they are all bundled together here. A chemical may be
          'Listed', or be on the 'Non-Domestic Substances List (NDSL)',
          or be on the list of substances with 'Significant New Activity (SNAc)',
          or be on the DSL but with a 'Ministerial Condition pertaining to this
          substance', or have been removed from the DSL, or have had a
          Ministerial prohibition for the substance.
        * **TSCA**: USA EPA Toxic Substances Control Act Chemical Inventory, [2]_.
          This list is as extracted on 2016-01. It is believed this list is
          updated on a periodic basis (> 6 month). A chemical may simply be
          'Listed', or may have certain flags attached to it. All these flags
          are described in the dict TSCA_flags.
        * **EINECS**: European INventory of Existing Commercial chemical
          Substances, [3]_. As extracted from a spreadsheet dynamically
          generated at [1]_. This list was obtained March 2015; a more recent
          revision already exists.
        * **NLP**: No Longer Polymers, a list of chemicals with special
          regulatory exemptions in EINECS. Also described at [3]_.
        * **SPIN**: Substances Prepared in Nordic Countries. Also a boolean
          data type. Retrieved 2015-03 from [4]_.

    Other methods which could be added are:

        * Australia: AICS Australian Inventory of Chemical Substances
        * China: Inventory of Existing Chemical Substances Produced or Imported
          in China (IECSC)
        * Europe: REACH List of Registered Substances
        * India: List of Hazardous Chemicals
        * Japan: ENCS: Inventory of existing and new chemical substances
        * Korea: Existing Chemicals Inventory (KECI)
        * Mexico: INSQ National Inventory of Chemical Substances in Mexico
        * New Zealand:  Inventory of Chemicals (NZIoC)
        * Philippines: PICCS Philippines Inventory of Chemicals and Chemical
          Substances

    Examples
    --------
    >>> legal_status('64-17-5')
    {'DSL': 'LISTED', 'TSCA': 'LISTED', 'EINECS': 'LISTED', 'NLP': 'UNLISTED', 'SPIN': 'LISTED'}

    References
    ----------
    .. [1] Government of Canada.. "Substances Lists" Feb 11, 2015.
       https://www.ec.gc.ca/subsnouvelles-newsubs/default.asp?n=47F768FE-1.
    .. [2] US EPA. "TSCA Chemical Substance Inventory." Accessed April 2016.
       https://www.epa.gov/tsca-inventory.
    .. [3] ECHA. "EC Inventory". Accessed March 2015.
       http://echa.europa.eu/information-on-chemicals/ec-inventory.
    .. [4] SPIN. "SPIN Substances in Products In Nordic Countries." Accessed
       March 2015. http://195.215.202.233/DotNetNuke/default.aspx.
    '''
    load_law_data()
    if not CASi:
        CASi = CAS_to_int(CASRN)
    methods = [COMBINED, DSL, TSCA, EINECS, NLP, SPIN]
    if get_methods:
        return methods
    if not method:
        method = methods[0]
    if method == DSL:
        if CASi in DSL_data.index:
            status = CAN_DSL_flags[DSL_data.at[CASi, 'Registry']]
        else:
            status = UNLISTED
    elif method == TSCA:
        if CASi in TSCA_data.index:
            data = TSCA_data.loc[CASi].to_dict()
            if any(data.values()):
                status = sorted([TSCA_flags[i] for i in data.keys() if data[i]])
            else:
                status = LISTED
        else:
            status = UNLISTED
    elif method == EINECS:
        if CASi in EINECS_data.index:
            status = LISTED
        else:
            status = UNLISTED
    elif method == NLP:
        if CASi in NLP_data.index:
            status = LISTED
        else:
            status = UNLISTED
    elif method == SPIN:
        if CASi in SPIN_data.index:
            status = LISTED
        else:
            status = UNLISTED
    elif method == COMBINED:
        status = {}
        for method in methods[1:]:
            status[method] = legal_status(CASRN, method=method, CASi=CASi)
    else:
        raise Exception('Failure in in function')
    return status

HPV_data, _EPACDRDict, _ECHATonnageDict = [None]*3

[docs]def load_economic_data():
    global HPV_data
    if HPV_data is not None:
        return None
    global _EPACDRDict, _ECHATonnageDict
    import zipfile

    import pandas as pd
    folder = os_path_join(os.path.dirname(__file__), 'Law')


    """OECD are chemicals produced by and OECD members in > 1000 tonnes/year."""
    HPV_data = pd.read_csv(os.path.join(folder, 'HPV 2015 March 3.csv'),
                           sep='\t', index_col=0)
    # 13061-29-2 not valid and removed

    _ECHATonnageDict = {}
    with zipfile.ZipFile(os.path.join(folder, 'ECHA Tonnage Bands.csv.zip')) as z:
        with z.open(z.namelist()[0]) as f:
            for line in f.readlines():
                # for some reason, the file must be decoded to UTF8 first
                CAS, band = line.decode("utf-8").strip('\n').split('\t')
                if CAS in _ECHATonnageDict:
                    if band in _ECHATonnageDict[CAS]:
                        pass
                    else:
                        _ECHATonnageDict[CAS].append(band)
                else:
                    _ECHATonnageDict[CAS] = [band]


    _EPACDRDict = {}
    with open(os.path.join(folder, 'EPA 2012 Chemical Data Reporting.csv')) as f:
        """EPA summed reported chemical usages. In metric tonnes/year after conversion.
        Many producers keep their date confidential.
        This was originally in terms of lb/year, but rounded to the nearest kg.

        """
        next(f)
        for line in f:
            values = line.rstrip().split('\t')
            CAS, manufactured, imported, exported = to_num(values)
            _EPACDRDict[CAS] = {"Manufactured": manufactured/1000., "Imported": imported/1000.,
                                "Exported": exported/1000.}


EPACDR = 'EPA Chemical Data Reporting (2012)'
ECHA = 'European Chemicals Agency Total Tonnage Bands'
OECD = 'OECD high production volume chemicals'

economic_status_methods = [EPACDR, ECHA, OECD]


[docs]def economic_status(CASRN, method=None, get_methods=False):  # pragma: no cover
    '''Look up the economic status of a chemical.

    This API is considered experimental, and is expected to be removed in a
    future release in favor of a more complete object-oriented interface.

    >>> economic_status(CASRN='98-00-0')
    ["US public: {'Manufactured': 0.0, 'Imported': 10272.711, 'Exported': 184.127}", '10,000 - 100,000 tonnes per annum', 'OECD HPV Chemicals']

    >>> economic_status(CASRN='13775-50-3')  # SODIUM SESQUISULPHATE
    []
    >>> economic_status(CASRN='98-00-0', method='OECD high production volume chemicals')
    'OECD HPV Chemicals'
    >>> economic_status(CASRN='98-01-1', method='European Chemicals Agency Total Tonnage Bands')
    ['10,000 - 100,000 tonnes per annum']
    '''
    load_economic_data()
    CASi = CAS_to_int(CASRN)

    def list_methods():
        methods = []
        methods.append('Combined')
        if CASRN in _EPACDRDict:
            methods.append(EPACDR)
        if CASRN in _ECHATonnageDict:
            methods.append(ECHA)
        if CASi in HPV_data.index:
            methods.append(OECD)
        return methods
    if get_methods:
        return list_methods()
    if not method:
        method = list_methods()[0]
    # This is the calculate, given the method section
    if method == EPACDR:
        status = 'US public: ' + str(_EPACDRDict[CASRN])
    elif method == ECHA:
        status = _ECHATonnageDict[CASRN]
    elif method == OECD:
        status = 'OECD HPV Chemicals'
    elif method == 'Combined':
        status = []
        if CASRN in _EPACDRDict:
            status += ['US public: ' + str(_EPACDRDict[CASRN])]
        if CASRN in _ECHATonnageDict:
            status += _ECHATonnageDict[CASRN]
        if CASi in HPV_data.index:
            status += ['OECD HPV Chemicals']
    else:
        raise Exception('Failure in in function')
    return status