Example Scripts

General and quick start

Get the BSA example mzML file

get_example_BSA_file.main()

Downloads the BSA.mzML example file also used in openMS and Ursgal.

This file is ideally suited for the use with the example scripts to test pyQms.

Will download the BSA1.mzML to the data folder ( ../data/BSA1.mzML )

Usage:

./get_example_BSA_file.py
#!/usr/bin/env python3
# encoding: utf-8
"""
    pyQms
    -----

    Python module for fast and accurate mass spectrometry data quantification

    :license: MIT, see LICENSE.txt for more details

    Authors:

        * Leufken, J.
        * Niehues, A.
        * Sarin, L.P.
        * Hippler, M.
        * Leidel, S.A.
        * Fufezan, C.

"""
import sys
from urllib import request as request
import os
import shutil

def main():
    """
    Downloads the BSA.mzML example file also used in openMS and Ursgal.

    This file is ideally suited for the use with the example scripts to test
    pyQms.

    Will download the BSA1.mzML to the data folder ( ../data/BSA1.mzML )

    Usage:

        ./get_example_BSA_file.py

    """
    mzML_file = os.path.join(
        os.pardir,
        'data',
        'BSA1.mzML'
    )
    if os.path.exists(mzML_file) is False:
        http_url = 'http://sourceforge.net/p/open-ms/code/HEAD/tree/OpenMS/share/OpenMS/examples/BSA/BSA1.mzML?format=raw'
        basename = os.path.basename(http_url).replace('?','') #Win compatible
        output_path = os.path.join( os.path.dirname(mzML_file), basename)
        with open( output_path, 'wb') as ooo:
            local_filename, headers = request.urlretrieve(
                http_url,
                filename = output_path,
            )
        try:
            shutil.move(
                '{0}?format=raw'.format(mzML_file),
                mzML_file
            )
        except:
            shutil.move(
                '{0}format=raw'.format(mzML_file),
                mzML_file
            )
        print(
            'Saved file as {0}'.format(
                mzML_file,
            )
        )

    return


if __name__ == '__main__':
    main()

Basic usage

Parse ident file and quantify

parse_ident_file_and_quantify.main(ident_file=None, mzml_file=None)

Script to automatically parse Ursgal result files and quantify it via pyQms. Please refer to Documenation of Adaptors for further information.

Ursgal result files or files in mzTab format are read in and used for quantification of the BSA example file.

Note

Use e.g. the BSA1.mzML example file. Please download it first using ‘get_example_BSA_file.py’. Evidence files can also be found in the data folder ‘BSA1_omssa_2_1_9_unified.csv’ or ‘BSA1_omssa_2_1_9.mztab’

Usage:

./parse_ident_file_and_quantify.py <ident_file> <mzml_file>
#!/usr/bin/env python3
# encoding: utf-8
"""
    pyQms
    -----

    Python module for fast and accurate mass spectrometry data quantification

    :license: MIT, see LICENSE.txt for more details

    Authors:

        * Leufken, J.
        * Niehues, A.
        * Sarin, L.P.
        * Hippler, M.
        * Leidel, S.A.
        * Fufezan, C.

"""
import pyqms
import sys
import pickle
import os
import pyqms.adaptors
try:
    import pymzml
except:
    print('Please install pymzML via: pip install pymzml')


def main(ident_file=None, mzml_file=None):
    """
    Script to automatically parse `Ursgal`_ result files and quantify it via
    pyQms. Please refer to Documenation of :doc:`adaptors` for further
    information.

    `Ursgal`_ result files or files in `mzTab` format are read in and used for
    quantification of the BSA example file.

    Note:

        Use e.g. the BSA1.mzML example file. Please download it first using
        'get_example_BSA_file.py'. Evidence files can also be found in the
        data folder 'BSA1_omssa_2_1_9_unified.csv' or 'BSA1_omssa_2_1_9.mztab'

    Usage:

        ./parse_ident_file_and_quantify.py <ident_file> <mzml_file>

    .. _Ursgal:
        https://github.com/ursgal/ursgal

    .. _mzTab:
        http://www.psidev.info/mztab

    """

    if ident_file.upper().endswith('MZTAB'):
        evidence_score_field  = 'search_engine_score[1]'
    else:
        # this is the default value in the adaptor
        evidence_score_field = 'PEP'

    print(
        'Evidence score field "{0}" will be used.'.format(
            evidence_score_field
        )
    )

    fixed_labels, evidences, molecules = pyqms.adaptors.parse_evidence(
        fixed_labels         = None,
        evidence_files       = [ ident_file ],
        evidence_score_field = evidence_score_field
    )

    params = {
        'molecules'        : molecules,
        'charges'          : [1, 2, 3, 4, 5],
        'metabolic_labels' : {'15N' : [0]},
        'fixed_labels'     : fixed_labels,
        'verbose'          : True,
        'evidences'        : evidences
    }

    lib = pyqms.IsotopologueLibrary( **params )

    run = pymzml.run.Reader(
        mzml_file
    )
    out_folder         = os.path.dirname(mzml_file)
    mzml_file_basename = os.path.basename(mzml_file)
    results            = None
    for spectrum in run:
        try:
            # pymzML 2.0.0 style
            scan_time = spectrum.scan_time
        except:
            # scan time will be in seconds
            scan_time = spectrum.get('MS:1000016')
        if spectrum['ms level'] == 1:
            results = lib.match_all(
                mz_i_list = spectrum.centroidedPeaks,
                file_name = mzml_file_basename,
                spec_id   = spectrum['id'],
                spec_rt   = scan_time,
                results   = results
            )
    pickle.dump(
        results,
        open(
            os.path.join(
                out_folder,
                '{0}_pyQms_results.pkl'.format(
                    mzml_file_basename
                )
            ),
            'wb'
        )
    )
    return


if __name__ == '__main__':
    if len( sys.argv ) < 3:
        print(main.__doc__)
    else:
        main(
            ident_file = sys.argv[1],
            mzml_file  = sys.argv[2]
        )

Simple match on peak list

basic_quantification_example.main(mzml=None)

Example script as template for most basic usage of quantification using pyQms.

Use spectrum 1165 of the BSA1.mzML example file. A subrange of the spectrum from m/z 400 to 500 is used.

Usage:
./basic_quantification_example.py

Note

This example does not require a reader to access ms spectra, since a simnple peak liost is used.

#!/usr/bin/env python3
# encoding: utf-8
"""
    pyQms
    -----

    Python module for fast and accurate mass spectrometry data quantification

    :license: MIT, see LICENSE.txt for more details

    Authors:

        * Leufken, J.
        * Niehues, A.
        * Sarin, L.P.
        * Hippler, M.
        * Leidel, S.A.
        * Fufezan, C.

"""
import pyqms
import sys
import pickle
import os
import pprint


def main( mzml=None):
    """
    Example script as template for most basic usage of quantification using
    pyQms.

    Use spectrum 1165 of the BSA1.mzML example file. A subrange of the spectrum
    from m/z 400 to 500 is used.

    Usage:
        ./basic_quantification_example.py

    Note:
        This example does not require a reader to access ms spectra, since a
        simnple peak liost is used.

    """

    peak_list = [
        (404.2492407565097, 2652.905029296875),
        (405.3003310237508, 4831.56103515625),
        (408.8403673369115, 23153.7109375),
        (409.17476109421705, 10182.2822265625),
        (409.5098740355617, 4770.97412109375),
        (411.17196124490727, 3454.364013671875),
        (413.26627826402705, 6861.84912109375),
        (419.3157903165357, 90201.5625),
        (420.2440507067882, 11098.4716796875),
        (420.31917273788645, 22288.9140625),
        (420.73825281590496, 8159.7099609375),
        (421.2406187369968, 3768.656494140625),
        (427.3787652898548, 5680.43212890625),
        (433.3316647490907, 8430.30859375),
        (434.705984428002, 25924.38671875),
        (435.2080179219357, 11041.2060546875),
        (443.6708762397708, 4081.282470703125),
        (443.69049198141124, 5107.13330078125),
        (443.6974813419733, 9135.3125),
        (443.7112735313511, 2517650.0),
        (443.7282222289076, 5571.26025390625),
        (443.7379762316008, 5227.4033203125),
        (444.1998579474954, 3021.341796875),
        (444.21248374593875, 1156173.75),
        (444.71384916266277, 336326.96875),
        (445.21533524843596, 58547.0703125),
        (445.71700965093, 4182.04345703125),
        (446.1200302053469, 93216.3359375),
        (447.09963627699824, 3806.537109375),
        (447.1169242266495, 59846.37109375),
        (447.3464079857604, 13170.9541015625),
        (448.11566395552086, 9294.5107421875),
        (448.3500303628631, 3213.052490234375),
        (452.1123280000919, 5092.0869140625),
        (461.1934526664677, 4022.537353515625),
        (462.1463969367603, 99732.5),
        (463.14561508666384, 24247.015625),
        (464.1433022096936, 20417.041015625),
        (465.1421080732791, 3222.4052734375),
        (470.1669593722212, 8621.81640625),
        (475.23989190282134, 3369.073974609375),
        (493.27465300375036, 2725.885986328125),
        (496.0077303201583, 8604.0830078125),
    ]
    print('{0:-^100}'.format('Library generation'))
    lib = pyqms.IsotopologueLibrary(
        molecules        = [ 'DDSPDLPK' ],
        charges          = [ 2 ],
        metabolic_labels = None,
        fixed_labels     = None,
        verbose          = True
    )
    print('{0:-^100}'.format('Library generation'))

    results = lib.match_all(
        mz_i_list = peak_list,
        file_name = 'BSA_test',
        spec_id   = 1165,
        spec_rt   = 29.10,
        results   = None
    )
    print()
    print('{0:-^100}'.format('Results summary'))
    for key in results.keys():
        peptide = results.lookup['formula to molecule'][key.formula][0]
        print(
            'For Peptide {0} with formula {1} and charge {2} the following match could be made:'.format(
                peptide,
                key.formula,
                key.charge
            )
        )
        for match in results[key]['data']:
            print(
                '\tAmount {0:1.2f} (scaling_factor) was detected with a matching score of {1:1.2f}'.format(
                    match.scaling_factor,
                    match.score
                )
            )
            print(
                '\tThe follwowing peaks have been matched:'
            )
            for measured_mz, measured_intensity, relative_i, calculated_mz, calculated_intensity in match.peaks:
                print(
                    '\t\t{0:1.6f} m/z @ {1:1.2e} intensity'.format(
                        measured_mz,
                        measured_intensity
                    )
                )
    print('{0:-^100}'.format('Results summary'))
    return


if __name__ == '__main__':
    main()

View result pkl stats

view_result_pkl_stats.main(result_pkl=None)
usage:
./view_result_pkl_stats.py <Path2ResultPkl>

This script will show the stats of a result pkl file. Can be used to query the number of quantified formulas and charge states etc.

#!/usr/bin/env python3
# encoding: utf-8
"""
    pyQms
    -----

    Python module for fast and accurate mass spectrometry data quantification

    :license: MIT, see LICENSE.txt for more details

    Authors:

        * Leufken, J.
        * Niehues, A.
        * Sarin, L.P.
        * Hippler, M.
        * Leidel, S.A.
        * Fufezan, C.

"""
import pickle
import sys


def main(result_pkl=None):
    '''

    usage:
        ./view_result_pkl_stats.py <Path2ResultPkl>

    This script will show the stats of a result pkl file. Can be used to query
    the number of quantified formulas and charge states etc.

    '''
    results_class = pickle.load(
        open(
            result_pkl,
            'rb'
        )
    )
    print('Result pkl file holds the following information:')
    print()
    for key, value in results_class.index.items():
        print(
            'Number of {0: <20}: {1}'.format(
                key,
                len(value)
            )
        )
        print('\tExample values (up to 5): {0}'.format(list(value)[:5]))
        print()

if __name__ == '__main__':
    if len( sys.argv ) < 2:
        print(main.__doc__)
    else:
        main(
            result_pkl = sys.argv[1],
        )

Access the result class

access_result_class.main(result_pkl=None)
usage:
./access_result_class.py <Path2ResultPkl>

This script will produce a dictionary with all summed up peptide amounts. The main idea is to show how to access the result pkl and loop over the data structure.

Note

Since no filters (score, RT windows, etc.) are applied, this script should not be used to estimate the actual amount of the quantified molecules in the results pkl.

#!/usr/bin/env python3
# encoding: utf-8
"""
    pyQms
    -----

    Python module for fast and accurate mass spectrometry data quantification

    :license: MIT, see LICENSE.txt for more details

    Authors:

        * Leufken, J.
        * Niehues, A.
        * Sarin, L.P.
        * Hippler, M.
        * Leidel, S.A.
        * Fufezan, C.

"""
import pickle
import sys
import pprint


def main(result_pkl=None):
    '''

    usage:
        ./access_result_class.py <Path2ResultPkl>

    This script will produce a dictionary with all summed up peptide amounts.
    The main idea is to show how to access the result pkl and loop over the
    data structure.

    Note:

        Since no filters (score, RT windows, etc.) are applied, this script
        should not be used to estimate the actual amount of the quantified
        molecules in the results pkl.


    '''
    results_class = pickle.load(
        open(
            result_pkl,
            'rb'
        )
    )

    amount_collector = {}

    for key, value in results_class.items():
        peptide = results_class.lookup['formula to molecule'][key.formula][0]
        if peptide not in amount_collector.keys():
            amount_collector[ peptide ] = {
                'amount' : 0
            }
        for matched_spectrum in value['data']:
            amount_collector[peptide]['amount'] += matched_spectrum.scaling_factor

    pprint.pprint(
        amount_collector
    )


if __name__ == '__main__':
    if len( sys.argv ) < 2:
        print(main.__doc__)
    else:
        main(
            result_pkl = sys.argv[1],
        )

Generate quant summary file

generate_quant_summary_file.main(result_pkl=None)
usage:
./generate_quant_summary_file.py <Path2ResultPkl>

This script will produce quant summary file with all according evidence information which are stored in the result pkl file. Amounts (maxI) will be calculated if possible.

Note

Make sure, an evidence lookup is provided in the results class, so that retention time windows can be defined. Otherwise no meaningful amounts can be calculated.

Warning

Can take very long depending on pkl size!

#!/usr/bin/env python3
# encoding: utf-8
"""
    pyQms
    -----

    Python module for fast and accurate mass spectrometry data quantification

    :license: MIT, see LICENSE.txt for more details

    Authors:

        * Leufken, J.
        * Niehues, A.
        * Sarin, L.P.
        * Hippler, M.
        * Leidel, S.A.
        * Fufezan, C.

"""
import pickle
import sys


def main(result_pkl=None):
    '''

    usage:
        ./generate_quant_summary_file.py <Path2ResultPkl>

    This script will produce quant summary file with all according evidence
    information which are stored in the result pkl file. Amounts (maxI) will be
    calculated if possible.

    Note:

        Make sure, an evidence lookup is provided in the results class, so that
        retention time windows can be defined. Otherwise no meaningful amounts
        can be calculated.


    Warning:

        Can take very long depending on pkl size!

    '''
    results_class = pickle.load(
        open(
            result_pkl,
            'rb'
        )
    )
    rt_border_tolerance = 1
    # quant_summary_file  = '{0}_quant_summary.csv'.format(result_pkl)
    quant_summary_file  = '{0}_quant_summary.xlsx'.format(result_pkl)
    results_class.write_rt_info_file(
        output_file         = quant_summary_file,
        list_of_csvdicts    = None,
        trivial_name_lookup = None,
        rt_border_tolerance = rt_border_tolerance,
        update              = True
    )
    results_class.calc_amounts_from_rt_info_file(
        rt_info_file         = quant_summary_file,
        rt_border_tolerance  = rt_border_tolerance,
        calc_amount_function = None
    )
    return

if __name__ == '__main__':
    if len( sys.argv ) < 2:
        print(main.__doc__)
    else:
        main(
            result_pkl = sys.argv[1],
        )

Write raw quant results as csv

write_raw_result_csv.main(result_pkl=None)
usage:
./write_raw_result_csv.py <Path2ResultPkl>

Will write all results of a result pkl into a .csv file. Please refer to Documentation of Result Class for further information.

Warning

The resulting .csv files can become very large depending on the provided pkl file!

Keys in csv:

  • Formula : molecular formula of the molecule (str)
  • Molecule : molecule or trivial name (str)
  • Charge : charge of the molecule (int)
  • ScanID : ScanID of the quantified spectrum (int)
  • Label Percentiles : Labeling percentile ( (element, enrichment in %), )
  • Amount : the determined amount of the molecule
  • Retention Time : retetention time of the ScanID
  • mScore : score of the isotopologue match
  • Filename : filename of spectrum input files
#!/usr/bin/env python3
# encoding: utf-8
"""
    pyQms
    -----

    Python module for fast and accurate mass spectrometry data quantification

    :license: MIT, see LICENSE.txt for more details

    Authors:

        * Leufken, J.
        * Niehues, A.
        * Sarin, L.P.
        * Hippler, M.
        * Leidel, S.A.
        * Fufezan, C.

"""
import pickle
import sys


def main(result_pkl=None):
    '''

    usage:
        ./write_raw_result_csv.py <Path2ResultPkl>

    Will write all results of a result pkl into a .csv file. Please refer to
    Documentation of :doc:`results` for further information.

    Warning:

        The resulting .csv files can become very large depending on the provided
        pkl file!

    Keys in csv:

        * Formula           : molecular formula of the molecule (str)
        * Molecule          : molecule or trivial name (str)
        * Charge            : charge of the molecule (int)
        * ScanID            : ScanID of the quantified spectrum (int)
        * Label Percentiles : Labeling percentile ( (element, enrichment in %), )
        * Amount            : the determined amount of the molecule
        * Retention Time    : retetention time of the ScanID
        * mScore            : score of the isotopologue match
        * Filename          : filename of spectrum input files

    '''
    results_class = pickle.load(
        open(
            result_pkl,
            'rb'
        )
    )

    results_class.write_result_csv(
        output_file_name= '{0}_raw_results.csv'.format(result_pkl)
    )

if __name__ == '__main__':
    if len( sys.argv ) < 2:
        print(main.__doc__)
    else:
        main(
            result_pkl = sys.argv[1],
        )

Write results as mzTab

write_mztab_result.main(result_pkl=None)
usage:
./write_mztab_results.py <Path2ResultPkl>

Will write all results of a result pkl into a .mztab file. Please refer to Documentation of Result Class for further information.

Note

Please note that the ouput in mzTab format is still in beta stage. Since pyQms is a raw quantification tool, some meta data has to be passed/set manually by the user.

#!/usr/bin/env python3
# encoding: utf-8
"""
    pyQms
    -----

    Python module for fast and accurate mass spectrometry data quantification

    :license: MIT, see LICENSE.txt for more details

    Authors:

        * Leufken, J.
        * Niehues, A.
        * Sarin, L.P.
        * Hippler, M.
        * Leidel, S.A.
        * Fufezan, C.

"""
import pickle
import sys


def main(result_pkl=None):
    '''

    usage:
        ./write_mztab_results.py <Path2ResultPkl>

    Will write all results of a result pkl into a .mztab file. Please refer to
    Documentation of :doc:`results` for further information.

    Note:

        Please note that the ouput in mzTab format is still in beta stage.
        Since pyQms is a raw quantification tool, some meta data has to be
        passed/set manually by the user.

    '''
    results_class = pickle.load(
        open(
            result_pkl,
            'rb'
        )
    )

    results_class.write_result_mztab(
        output_file_name = '{0}_results.mztab'.format(result_pkl)
    )

if __name__ == '__main__':
    if len( sys.argv ) < 2:
        print(main.__doc__)
    else:
        main(
            result_pkl = sys.argv[1],
        )

Write results as mzTab (BSA example)

write_BSA_mztab_results.main(result_pkl=None)
usage:
./write_mztab_results.py <Path2ResultPkl>

Will write all results of a result pkl into a .mztab file. Please refer to Documentation of Result Class for further information.

Warning

This example script is specifically for the BSA1.mzML quantification results, since file specific meta data is passed. Please use ‘write_mztab_results.py’ for a more general script to produce mzTab results.

Note

Please note that the ouput in mzTab format is still in beta stage. Since pyQms is a raw quantification tool, some meta data has to be passed/set manually by the user.

#!/usr/bin/env python3
# encoding: utf-8
"""
    pyQms
    -----

    Python module for fast and accurate mass spectrometry data quantification

    :license: MIT, see LICENSE.txt for more details

    Authors:

        * Leufken, J.
        * Niehues, A.
        * Sarin, L.P.
        * Hippler, M.
        * Leidel, S.A.
        * Fufezan, C.

"""
import pickle
import sys


def main(result_pkl=None):
    '''

    usage:
        ./write_mztab_results.py <Path2ResultPkl>

    Will write all results of a result pkl into a .mztab file. Please refer to
    Documentation of :doc:`results` for further information.

    Warning:

        This example script is specifically for the BSA1.mzML quantification
        results, since file specific meta data is passed. Please use
        'write_mztab_results.py' for a more general script to produce mzTab
        results.

    Note:

        Please note that the ouput in mzTab format is still in beta stage.
        Since pyQms is a raw quantification tool, some meta data has to be
        passed/set manually by the user.

    '''
    results_class = pickle.load(
        open(
            result_pkl,
            'rb'
        )
    )
    # provide meta data as lists of mztab specific formats. Pass directly
    # mztab correct format.

    mztab_meta_info = {
        'protein_search_engine_score'   : [],
        'psm_search_engine_score'       : ['[MS,MS:1001475,OMSSA:evalue, ]'],
        'fixed_mod'                     : ['[UNIMOD, UNIMOD:4, Carbamidomethyl, ]'],
        'variable_mod'                  : ['[UNIMOD, UNIMOD:35, Oxidation, ]'],
        'study_variable-description'    : ['Standard BSA measurement'],
        'ms_run-location'               : ['BSA1.mzML'],

    }

    results_class.lookup['mztab_meta_info'] = mztab_meta_info

    results_class.write_result_mztab(
        output_file_name = '{0}_results.mztab'.format(result_pkl)
    )

if __name__ == '__main__':
    if len( sys.argv ) < 2:
        print(main.__doc__)
    else:
        main(
            result_pkl = sys.argv[1],
        )

Advanced usage

Parse ident file and quantify (with CAM)

parse_ident_file_and_quantify_with_carbamidomethylation.main(ident_file=None, mzml_file=None)

Script to automatically parse Ursgal result files and quantify it via pyQms.

For evidence files with molecules with Caramidomethylation as fixed modification. These mode will be stripped from the molecules. This is important if an metabolic label (like 15N) is applied. This ensures that the nitrogens pools of the peptides (which are 15N labeled) do not mix up with the nitrogen pool of the Carbamidomethylation (14N since intriduced during sample preparation). Please refer to Documenation of Adaptors for further information.

Ursgal result files or files in mzTab format are read in and used for quantification of the BSA example file.

Note

Use e.g. the BSA1.mzML example file. Please download it first using ‘get_example_BSA_file.py’. Evidence files can also be found in the data folder ‘BSA1_omssa_2_1_9_unified.csv’ or ‘BSA1_omssa_2_1_9.mztab’

Usage:

./parse_ident_file_and_quantify_with_carbamidomethylation.py <ident_file> <mzml_file>
#!/usr/bin/env python3
# encoding: utf-8
"""
    pyQms
    -----

    Python module for fast and accurate mass spectrometry data quantification

    :license: MIT, see LICENSE.txt for more details

    Authors:

        * Leufken, J.
        * Niehues, A.
        * Sarin, L.P.
        * Hippler, M.
        * Leidel, S.A.
        * Fufezan, C.

"""
import pyqms
import sys
import pickle
import os
import pyqms.adaptors
try:
    import pymzml
except:
    print('Please install pymzML via: pip install pymzml')


def main(ident_file=None, mzml_file=None):
    '''

    Script to automatically parse `Ursgal`_ result files and quantify it via
    pyQms.

    For evidence files with molecules with Caramidomethylation as fixed
    modification. These mode will be stripped from the molecules. This is
    important if an metabolic label (like 15N) is applied. This ensures that the
    nitrogens pools of the peptides (which are 15N labeled) do not mix up with
    the nitrogen pool of the Carbamidomethylation (14N since intriduced during
    sample preparation). Please refer to Documenation of :doc:`adaptors` for
    further information.

    `Ursgal`_ result files or files in `mzTab` format are read in and used for
    quantification of the BSA example file.

    Note:

        Use e.g. the BSA1.mzML example file. Please download it first using
        'get_example_BSA_file.py'. Evidence files can also be found in the
        data folder 'BSA1_omssa_2_1_9_unified.csv' or 'BSA1_omssa_2_1_9.mztab'

    Usage:

        ./parse_ident_file_and_quantify_with_carbamidomethylation.py <ident_file> <mzml_file>

    .. _Ursgal:
        https://github.com/ursgal/ursgal

    .. _mzTab:
        http://www.psidev.info/mztab

    '''

    # define the fixed label for Caramidomethyl
    tmp_fixed_labels = {
        'C' : [
            {
                'element_composition' : {'O': 1, 'H': 3, '14N': 1, 'C': 2},
                'evidence_mod_name': 'Carbamidomethyl'
            },
        ]
    }

    formatted_fixed_labels, evidence_lookup, molecule_list = pyqms.adaptors.parse_evidence(
        fixed_labels   = tmp_fixed_labels,
        evidence_files = [ ident_file ],
    )

    params = {
        'molecules'        : molecule_list,
        'charges'          : [1, 2, 3, 4, 5],
        'metabolic_labels' : {'15N' : [0, ]},
        'fixed_labels'     : formatted_fixed_labels,
        'verbose'          : True,
        'evidences'        : evidence_lookup
    }

    lib = pyqms.IsotopologueLibrary( **params )

    run = pymzml.run.Reader(
        mzml_file
    )
    out_folder         = os.path.dirname(mzml_file)
    mzml_file_basename = os.path.basename(mzml_file)
    results = None
    for spectrum in run:
        spec_id = spectrum['id']
        try:
            # pymzML 2.0.0 style
            scan_time = spectrum.scan_time
        except:
            # scan time will be in seconds
            scan_time = spectrum.get('MS:1000016')
        if spectrum['ms level'] == 1:
            results = lib.match_all(
                mz_i_list = spectrum.centroidedPeaks,
                file_name = mzml_file_basename,
                spec_id   = spectrum['id'],
                spec_rt   = scan_time,
                results   = results
            )

    pickle.dump(
        results,
        open(
            os.path.join(
                out_folder,
                '{0}_pyQms_results.pkl'.format(
                    mzml_file_basename
                )
            ),
            'wb'
        )
    )
    return


if __name__ == '__main__':
    if len( sys.argv ) < 3:
        print(main.__doc__)
    else:
        main(
            ident_file = sys.argv[1],
            mzml_file  = sys.argv[2]
        )

Complete quantification - from identification csv to peptide abundances

complete_BSA_quantification.main(ident_file=None, mzml_file=None)

Examples script to demonstrate a (example) workflow from mzML files to peptide abundances. Will plot for every quantified peptide a matched isotopologue chromatogram (MIC). The plots include RT windows, maximum amount in RT window and identification RT(s).

Ursgal result files or files in mzTab format are read in and used for quantification of the BSA example file.

Note

Use e.g. the BSA1.mzML example file. Please download it first using ‘get_example_BSA_file.py’. Evidence files can also be found in the data folder ‘BSA1_omssa_2_1_9_unified.csv’ or ‘BSA1_omssa_2_1_9.mztab’

Usage:

./complete_BSA_quantification.py <ident_file> <mzml_file>

Note

rpy2 is required for all plotting

#!/usr/bin/env python3
# encoding: utf-8
"""
    pyQms
    -----

    Python module for fast and accurate mass spectrometry data quantification

    :license: MIT, see LICENSE.txt for more details

    Authors:

        * Leufken, J.
        * Niehues, A.
        * Sarin, L.P.
        * Hippler, M.
        * Leidel, S.A.
        * Fufezan, C.

"""
import pyqms
import sys

import pickle
import os
import pyqms.adaptors
try:
    import pymzml
except:
    print('Please install pymzML via: pip install pymzml')


def main(ident_file = None, mzml_file = None):
    '''
    Examples script to demonstrate a (example) workflow from mzML files to
    peptide abundances. Will plot for every quantified peptide a matched
    isotopologue chromatogram (MIC). The plots include RT windows, maximum
    amount in RT window and identification RT(s).

    `Ursgal`_ result files or files in `mzTab` format are read in and used for
    quantification of the BSA example file.

    Note:

        Use e.g. the BSA1.mzML example file. Please download it first using
        'get_example_BSA_file.py'. Evidence files can also be found in the
        data folder 'BSA1_omssa_2_1_9_unified.csv' or 'BSA1_omssa_2_1_9.mztab'

    Usage:

        ./complete_BSA_quantification.py <ident_file> <mzml_file>

    .. _Ursgal:
        https://github.com/ursgal/ursgal

    .. _mzTab:
        http://www.psidev.info/mztab

    Note:
        rpy2 is required for all plotting

    '''

    # define the fixed label for Carbamidomethyl
    tmp_fixed_labels = {
        'C' : [
            {
                'element_composition' : {'O': 1, 'H': 3, '14N': 1, 'C': 2},
                'evidence_mod_name': 'Carbamidomethyl'
            },
        ]
    }
    if ident_file.upper().endswith('MZTAB'):
        evidence_score_field  = 'search_engine_score[1]'
    else:
        # this is the default value in the adaptor
        evidence_score_field = 'PEP'

    print(
        'Evidence score field "{0}" will be used.'.format(
            evidence_score_field
        )
    )
    formatted_fixed_labels, evidence_lookup, molecule_list = pyqms.adaptors.parse_evidence(
        fixed_labels         = tmp_fixed_labels,
        evidence_files       = [ ident_file ],
        evidence_score_field = evidence_score_field
    )

    params = {
        'molecules'        : molecule_list,
        'charges'          : [1, 2, 3, 4, 5],
        'metabolic_labels' : {'15N' : [0, ]},
        'fixed_labels'     : formatted_fixed_labels,
        'verbose'          : True,
        'evidences'        : evidence_lookup
    }

    lib = pyqms.IsotopologueLibrary( **params )

    run = pymzml.run.Reader(
        mzml_file
    )
    out_folder         = os.path.dirname(mzml_file)
    mzml_file_basename = os.path.basename(mzml_file)
    results = None
    for spectrum in run:
        spec_id   = spectrum['id']
        try:
            # pymzML 2.0.0 style
            scan_time = spectrum.scan_time
        except:
            # scan time will be in seconds
            scan_time = spectrum.get('MS:1000016')
        if spectrum['ms level'] == 1:
            results = lib.match_all(
                mz_i_list = spectrum.centroidedPeaks,
                file_name = mzml_file_basename,
                spec_id   = spectrum['id'],
                spec_rt   = scan_time,
                results   = results
            )
    # print(results)
    out_folder = os.path.join(
        os.path.dirname(ident_file),
        'complete_BSA_quantification'
    )
    if os.path.exists(out_folder) is False:
        os.mkdir(out_folder)
    print()
    print('All results go into folder: {0}'.format(out_folder))
    rt_border_tolerance = 1
    quant_summary_file  = os.path.join(
        out_folder,
        'complete_BSA_quantification_summary.xlsx',
    )
    results.write_rt_info_file(
        output_file         = quant_summary_file,
        list_of_csvdicts    = None,
        trivial_name_lookup = None,
        rt_border_tolerance = rt_border_tolerance,
        update              = True
    )
    calculated_amounts = results.calc_amounts_from_rt_info_file(
        rt_info_file         = quant_summary_file,
        rt_border_tolerance  = rt_border_tolerance,
        calc_amount_function = None, # calc_amount_function
    )
    # print(calculated_amounts)
    formula_charge_to_quant_info = {}
    for line_dict in calculated_amounts:
        formula_charge_to_quant_info[ (line_dict['formula'], int(line_dict['charge'])) ] = {
            'rt'           : line_dict['max I in window (rt)'],
            'amount'       : line_dict['max I in window'],
            'rt start'     : line_dict['start (min)'],
            'rt stop'      : line_dict['stop (min)'],
            'evidence_rts' : [],
        }
        if len(formula_charge_to_quant_info[ (line_dict['formula'], int(line_dict['charge'])) ]['evidence_rts']) == 0:
            for ev_string in line_dict['evidences (min)'].split(';'):
                formula_charge_to_quant_info[ (line_dict['formula'], int(line_dict['charge'])) ]['evidence_rts'].append(
                    round( float( ev_string.split('@')[1] ), 2 )
                )
    import_ok = False
    try:
        import rpy2
        import_ok = True
    except:
        pass
    if import_ok:
        print('Plotting results plot including RT windows, abundances and identifications')
        for key in results.keys():
            short_key = ( key.formula, key.charge )

            match_list = results[key]['data']
            if len(match_list) < 15:
                continue
            file_name = os.path.join(
                out_folder ,
                'MIC_2D_{0}_{1}.pdf'.format(
                    '_'.join(
                        results.lookup['formula to molecule'][ key.formula ]
                    ),
                    key.charge,
                )
            )
            graphics, grdevices = results.init_r_plot(file_name)

            ablines = {
                key : [
                    {
                        'v'   : formula_charge_to_quant_info[short_key]['rt'],
                        'lty' : 2
                    },
                    {
                        'v'   : formula_charge_to_quant_info[short_key]['rt start'],
                        'lty' : 2,
                        'col' : 'blue'
                    },
                    {
                        'v'   : formula_charge_to_quant_info[short_key]['rt stop'],
                        'lty' : 2,
                        'col' : 'blue'
                    },
                ]
            }
            # print(formula_charge_to_quant_info[short_key])
            additional_legends = {
                key : [
                    {
                        'x'    : formula_charge_to_quant_info[short_key]['rt'],
                        'y'    : formula_charge_to_quant_info[short_key]['amount'],
                        'text' : 'max intensity: {0:1.3e}'.format(
                            formula_charge_to_quant_info[short_key]['amount'],
                        ),
                        'pos'  : 3 # above
                    },
                    {
                        'x'    : formula_charge_to_quant_info[short_key]['rt start'],
                        'y'    : formula_charge_to_quant_info[short_key]['amount'] / 2,
                        'text' : 'RT Window start',
                        'pos'  : 4, # right
                        'col'  : 'blue'
                    },
                    {
                        'x'    : formula_charge_to_quant_info[short_key]['rt stop'],
                        'y'    : formula_charge_to_quant_info[short_key]['amount'] / 2,
                        'text' : 'RT window stop',
                        'pos'  : 2, # left,
                        'col'  : 'blue'
                    },
                ]
            }

            for evidence_rt in formula_charge_to_quant_info[short_key]['evidence_rts']:
                ablines[key].append(
                    {
                        'v'   : evidence_rt,
                        'lwd' : 0.5,
                        'col' : 'purple',
                    }
                )
                additional_legends[key].append(
                    {
                        'x'    : evidence_rt,
                        'y'    : 0,
                        'lwd'  : 0.5,
                        'col'  : 'purple',
                        'text' : 'MS2 ident',
                        'pos'  : 4,
                        'srt'  : 45 # rotate label
                    }
                )

            results.plot_MICs_2D(
                [key],
                file_name          = None,
                rt_window          = None,
                i_transform        = None,
                xlimits            = [
                    formula_charge_to_quant_info[short_key]['rt start']-0.05,
                    formula_charge_to_quant_info[short_key]['rt stop']+0.05,

                ],
                additional_legends = additional_legends,
                title              = None,
                zlimits            = None,
                ablines            = ablines,
                graphics           = graphics
            )
            print(
                'Plottted {0}'.format(file_name)
            )

    return


if __name__ == '__main__':
    if len( sys.argv ) < 3:
        print(main.__doc__)
    else:
        main(
            ident_file = sys.argv[1],
            mzml_file  = sys.argv[2]
        )

Example plot including RT borders and identification information

Example plot for peptide ‘DDSPDLPK’ with charge 2 in the BSA1.mzML file.

_images/complete_quantification.png

Plotting and visualizattion

Plot example match

plot_match_examples.main(result_pkl=None)
usage:
./plot_match_examples.py <Path2ResultPkl>

Extracts the match information and plots one example isotopologue match into the ‘data’ folder. Uses the plot function of pymzML (pymzML.plot). Use this script as template for annotating spectra with match information.

Note

Plots only one high scored formula (mScore >0.95) from the result pkl. Use e.g. with the ‘BSA1.mzML_pyQms_results.pkl’ obtained from e.g. example script parse_ident_file_and_quantify_with_carbamidomethylation.py to get example plotting data.

#!/usr/bin/env python3
# encoding: utf-8
"""
    pyQms
    -----

    Python module for fast and accurate mass spectrometry data quantification

    :license: MIT, see LICENSE.txt for more details

    Authors:

        * Leufken, J.
        * Niehues, A.
        * Sarin, L.P.
        * Hippler, M.
        * Leidel, S.A.
        * Fufezan, C.

"""
import pickle
import sys
import os
try:
    import pymzml
    import pymzml.plot
except:
    print('Please install pymzML via: pip install pymzml')


def main(result_pkl=None):
    '''

    usage:
        ./plot_match_examples.py <Path2ResultPkl>

    Extracts the match information and plots one example isotopologue match into
    the 'data' folder. Uses the plot function of pymzML (`pymzML.plot`_). Use
    this script as template for annotating spectra with match information.

    Note:

        Plots only one high scored formula (mScore >0.95) from the result pkl.
        Use e.g. with the 'BSA1.mzML_pyQms_results.pkl' obtained from e.g.
        example script parse_ident_file_and_quantify_with_carbamidomethylation.py
        to get example plotting data.

    .. _pymzML.plot:
        https://pymzml.github.io/plot.html

    '''
    results_class = pickle.load(
        open(
            result_pkl,
            'rb'
        )
    )

    for key, i, entry in results_class.extract_results():
        if entry.score > 0.95:
            p = pymzml.plot.Factory()
            label_x           = []
            measured_peaks    = []
            matched_peaks     = []
            for measured_mz, measured_intensity, relative_i, calculated_mz, calculated_intensity in entry.peaks:
                if measured_mz is not None:
                    measured_peaks.append( (measured_mz, measured_intensity) )
                    matched_peaks.append( (calculated_mz, calculated_intensity * entry.scaling_factor) )
                    label_x.append(
                        (
                            calculated_mz,
                            '{0:5.3f} ppm'.format(
                                (measured_mz - calculated_mz) / ( measured_mz * 1e-6 )
                            )
                        )
                    )

            mz_only  = [ n[0] for n in measured_peaks ]
            mz_range = [ min(mz_only)-1, max(mz_only)+1 ]
            peptides = results_class.lookup['formula to molecule'][key.formula]
            if len(peptides) > 1:
                continue
            p.newPlot(
                header = 'Formula: {0}; Peptide: {1}; Charge: {2}\n File: {3}; Scan: {4}; RT: {5:1.3f}\n Amount: {6:1.3f}; Score: {7:1.3f}'.format(
                    key.formula,
                    peptides[0],
                    key.charge,
                    key.file_name,
                    entry.spec_id,
                    entry.rt,
                    entry.scaling_factor,
                    entry.score
                ),
                mzRange = mz_range
            )
            p.add(
                measured_peaks,
                color = (0, 0, 0),
                style = 'sticks'
            )
            p.add(
                matched_peaks,
                color = (0, 200, 0),
                style = 'triangles'
            )
            p.add(
                label_x,
                color = (0, 0, 255),
                style = 'label_x'
            )

            plot_name = os.path.join(
                os.pardir,
                'data',
                '{0}_Peptide_{1}_Charge_{2}.xhtml'.format(
                    key.file_name,
                    peptides[0],
                    key.charge
                )
            )
            p.save(
                filename = plot_name,
                mzRange  = mz_range
            )
            print(
                'Plotted file {0}'.format(
                    plot_name
                )
            )
            break


if __name__ == '__main__':
    if len( sys.argv ) < 2:
        print(main.__doc__)
    else:
        main(
            result_pkl = sys.argv[1],
        )

Example match plot

_images/example_match.png

MIC 3D plot

mic_3d_plot.main(pickle_file)
usage:
./mic_3d_plot.py <path_to_pickled_result_class>

Plots 3-dimensional matched isotope chromatograms (MICs) of pyQms quantification results.

Pickled result class can contain thousands of molecules therefore this example script stops plotting after 10 plotted MICs. Otherwise all quantified formula-charge-filename combinations will be plotted!

Use e.g. the BSA data example. Download via ‘get_example_BSA_file.py’ and quantify using ‘parse_ident_file_and_quantify_with_carbmidomethylation.py’.

Note

Installation of R and rpy2 is required.

#!/usr/bin/env python3
# encoding: utf-8
"""
    pyQms
    -----

    Python module for fast and accurate mass spectrometry data quantification

    :license: MIT, see LICENSE.txt for more details

    Authors:

        * Leufken, J.
        * Niehues, A.
        * Sarin, L.P.
        * Hippler, M.
        * Leidel, S.A.
        * Fufezan, C.

"""

import pickle
import sys
import os

try:
    import rpy2
except:
    print('rpy2 is not installed but required for plotting, please install it and try again')
    print('pip3.4 install rpy2')


def main(pickle_file):
    '''
    usage:
        ./mic_3d_plot.py <path_to_pickled_result_class>

    Plots 3-dimensional matched isotope chromatograms (MICs) of pyQms
    quantification results.

    Pickled result class can contain thousands of molecules therefore this
    example script stops plotting after 10 plotted MICs. Otherwise all
    quantified formula-charge-filename combinations will be plotted!

    Use e.g. the BSA data example. Download via 'get_example_BSA_file.py' and
    quantify using 'parse_ident_file_and_quantify_with_carbmidomethylation.py'.

    Note:

        Installation of R and rpy2 is required.

    '''
    results = pickle.load(
        open( pickle_file, 'rb')
    )
    out_folder = os.path.join(
        os.path.dirname(pickle_file),
        'plots'
    )
    if os.path.exists(out_folder) is False:
        os.mkdir(out_folder)
    print('Plotting into folder: {0}'.format(out_folder))
    if len( results.keys() ) > 10:
        print(
            '''
Result class should not hold more then 10 keys, to prevent plot overflow!
Will stop after 10 plots!
'''
        )
        # sys.exit()
    for n, key in enumerate(results.keys()):
        if n > 10:
            print('Stopping after 10 plots!')
            exit()
        if len(results[key]['data']) <= 15:
            continue
        mzml_filename = key.file_name
        if os.sep in mzml_filename:
            mzml_filename = os.path.basename(mzml_filename)

        file_name = os.path.join(
            out_folder ,
            'MIC_3D_{0}_{1}_{2}_{3}'.format(
                '_'.join(
                    results.lookup['formula to molecule'][ key.formula ]
                ),
                key.charge,
                key.label_percentiles,
                mzml_filename
            )
        )
        results.plot_MIC_3D(
            key,
            file_name = file_name,
        )

    return


if __name__ == '__main__':
    if len(sys.argv) <= 1:
        sys.exit(main.__doc__)
    main( sys.argv[1] )

Example 3D plot

_images/mic_3d.png

MIC 2D plot

mic_2d_plot.main(pickle_file)
usage:
./mic_2d_plot.py <path_to_pickled_result_class>

Plots 2-dimensional matched isotope chromatograms (MICs) of pyQms quantification results.

Pickled result class can contain thousands of molecules therefore this example script stops plotting after 10 plotted MICs. Otherwise all quantified formula-charge-filename combinations will be plotted!

Use e.g. the BSA data example. Download via ‘get_example_BSA_file.py’ and quantify using ‘parse_ident_file_and_quantify_with_carbmidomethylation.py’.

Note

Installation of R and rpy2 is required.

#!/usr/bin/env python3
# encoding: utf-8
"""
    pyQms
    -----

    Python module for fast and accurate mass spectrometry data quantification

    :license: MIT, see LICENSE.txt for more details

    Authors:

        * Leufken, J.
        * Niehues, A.
        * Sarin, L.P.
        * Hippler, M.
        * Leidel, S.A.
        * Fufezan, C.

"""

import pickle
import sys
import os

try:
    import rpy2
except:
    print('rpy2 is not installed but required for plotting, please install it and try again')
    print('pip3.4 install rpy2')


def main(pickle_file):
    '''
    usage:
        ./mic_2d_plot.py <path_to_pickled_result_class>

    Plots 2-dimensional matched isotope chromatograms (MICs) of pyQms
    quantification results.

    Pickled result class can contain thousands of molecules therefore this
    example script stops plotting after 10 plotted MICs. Otherwise all
    quantified formula-charge-filename combinations will be plotted!

    Use e.g. the BSA data example. Download via 'get_example_BSA_file.py' and
    quantify using 'parse_ident_file_and_quantify_with_carbmidomethylation.py'.

    Note:

        Installation of R and rpy2 is required.

    '''
    results = pickle.load(
        open( pickle_file, 'rb')
    )
    out_folder = os.path.join(
        os.path.dirname(pickle_file),
        'plots'
    )
    if os.path.exists(out_folder) is False:
        os.mkdir(out_folder)
    print('Plotting into folder: {0}'.format(out_folder))
    if len( results.keys() ) > 10:
        print(
            '''
Result class should not hold more then 10 keys, to prevent plot overflow!
Will stop after 10 plots!
'''
        )
        # sys.exit()
    for n, key in enumerate(results.keys()):
        if n > 10:
            print('Stopping after 10 plots!')
            exit()
        if len(results[key]['data']) <= 15:
            continue
        mzml_filename = key.file_name
        if os.sep in mzml_filename:
            mzml_filename = os.path.basename(mzml_filename)

        file_name = os.path.join(
            out_folder ,
            'MIC_2D_{0}_{1}_{2}_{3}.pdf'.format(
                '_'.join(
                    results.lookup['formula to molecule'][ key.formula ]
                ),
                key.charge,
                key.label_percentiles,
                mzml_filename
            )
        )
        graphics, grdevices = results.init_r_plot(file_name)
        results.plot_MICs_2D(
            [key],
            graphics = graphics
        )


    return


if __name__ == '__main__':
    if len(sys.argv) <= 1:
        sys.exit(main.__doc__)
    main( sys.argv[1] )

Example 2D plot

_images/mic_2d.png

Determine m/z and intensity errors

determine_mz_and_i_error.main(result_pkl=None)
usage:
./determine_mz_and_i_error.py <Path2ResultPkl>

This script will determine the apparant mz and intensity error present in the quantifications for the given result pkl.

#!/usr/bin/env python3
# encoding: utf-8
"""
    pyQms
    -----

    Python module for fast and accurate mass spectrometry data quantification

    :license: MIT, see LICENSE.txt for more details

    Authors:

        * Leufken, J.
        * Niehues, A.
        * Sarin, L.P.
        * Hippler, M.
        * Leidel, S.A.
        * Fufezan, C.

"""
import pickle
import sys
import os


def main(result_pkl=None):
    '''

    usage:
        ./determine_mz_and_i_error.py <Path2ResultPkl>

    This script will determine the apparant mz and intensity error present
    in the quantifications for the given result pkl.


    '''
    results_class = pickle.load(
        open(
            result_pkl,
            'rb'
        )
    )

    plot_name = os.path.join(
        os.path.dirname(result_pkl),
        'mz_and_intensity_error_{0}.pdf'.format(
            os.path.basename(result_pkl)
        )
    )

    results_class._determine_measured_error(
        score_threshold = None,
        topX            = 3,
        filename        = plot_name,
        plot            = True
    )


if __name__ == '__main__':
    if len( sys.argv ) < 2:
        print(main.__doc__)
    else:
        main(
            result_pkl = sys.argv[1],
        )

Visualize errors on spectrum level

visualize_scoring_information.main(mzml=None)

Example script fort visualizing the m/z and intensity error, which is the basis for the scoring of the matches in pyQms.

Use spectrum 1165 of the BSA1.mzML example file. A subrange of the spectrum from m/z 400 to 500 is used.

Usage:
./visualize_scoring_information.py

Note

This example does not require a reader to access MS spectra, since a simnple peak list is used.

#!/usr/bin/env python3
# encoding: utf-8
"""
    pyQms
    -----

    Python module for fast and accurate mass spectrometry data quantification

    :license: MIT, see LICENSE.txt for more details

    Authors:

        * Leufken, J.
        * Niehues, A.
        * Sarin, L.P.
        * Hippler, M.
        * Leidel, S.A.
        * Fufezan, C.

"""
import pyqms
import sys
import pickle
import os
import pprint
from collections import defaultdict as ddict
try:
    import pymzml
except:
    print('Please install pymzML via: pip install pymzml')


def main( mzml=None):
    """
    Example script fort visualizing the m/z and intensity error, which is the
    basis for the scoring of the matches in pyQms.

    Use spectrum 1165 of the BSA1.mzML example file. A subrange of the spectrum
    from m/z 400 to 500 is used.

    Usage:
        ./visualize_scoring_information.py

    Note:
        This example does not require a reader to access MS spectra, since a
        simnple peak list is used.

    """

    peak_list = [
        (404.2492407565097, 2652.905029296875),
        (405.3003310237508, 4831.56103515625),
        (408.8403673369115, 23153.7109375),
        (409.17476109421705, 10182.2822265625),
        (409.5098740355617, 4770.97412109375),
        (411.17196124490727, 3454.364013671875),
        (413.26627826402705, 6861.84912109375),
        (419.3157903165357, 90201.5625),
        (420.2440507067882, 11098.4716796875),
        (420.31917273788645, 22288.9140625),
        (420.73825281590496, 8159.7099609375),
        (421.2406187369968, 3768.656494140625),
        (427.3787652898548, 5680.43212890625),
        (433.3316647490907, 8430.30859375),
        (434.705984428002, 25924.38671875),
        (435.2080179219357, 11041.2060546875),
        (443.6708762397708, 4081.282470703125),
        (443.69049198141124, 5107.13330078125),
        (443.6974813419733, 9135.3125),
        (443.7112735313511, 2517650.0),
        (443.7282222289076, 5571.26025390625),
        (443.7379762316008, 5227.4033203125),
        (444.1998579474954, 3021.341796875),
        (444.21248374593875, 1156173.75),
        (444.71384916266277, 336326.96875),
        (445.21533524843596, 58547.0703125),
        (445.71700965093, 4182.04345703125),
        (446.1200302053469, 93216.3359375),
        (447.09963627699824, 3806.537109375),
        (447.1169242266495, 59846.37109375),
        (447.3464079857604, 13170.9541015625),
        (448.11566395552086, 9294.5107421875),
        (448.3500303628631, 3213.052490234375),
        (452.1123280000919, 5092.0869140625),
        (461.1934526664677, 4022.537353515625),
        (462.1463969367603, 99732.5),
        (463.14561508666384, 24247.015625),
        (464.1433022096936, 20417.041015625),
        (465.1421080732791, 3222.4052734375),
        (470.1669593722212, 8621.81640625),
        (475.23989190282134, 3369.073974609375),
        (493.27465300375036, 2725.885986328125),
        (496.0077303201583, 8604.0830078125),
    ]
    print('{0:-^100}'.format('Library generation'))
    lib = pyqms.IsotopologueLibrary(
        molecules        = [ 'DDSPDLPK' ],
        charges          = [ 2 ],
        metabolic_labels = None,
        fixed_labels     = None,
        verbose          = True
    )
    print('{0:-^100}'.format('Library generation'))

    results = lib.match_all(
        mz_i_list = peak_list,
        file_name = 'BSA_test',
        spec_id   = 1165,
        spec_rt   = 29.10,
        results   = None
    )
    for key, i, entry in results.extract_results():
        p = pymzml.plot.Factory()
        label_mz_error = []
        label_i_error  = []
        measured_peaks = []
        matched_peaks  = []
        peak_info = ddict(list)
        # pprint.pprint(entry.peaks)
        for measured_mz, measured_intensity, relative_i, calculated_mz, calculated_intensity in entry.peaks:
            if measured_mz is not None:
                measured_peaks.append(
                    (
                        measured_mz,
                        measured_intensity
                    )
                )
                matched_peaks.append(
                    (
                        calculated_mz,
                        calculated_intensity * entry.scaling_factor
                    )
                )
                mz_error = (measured_mz - calculated_mz) / ( measured_mz * 1e-6 )
                label_mz_error.append(
                    (
                        calculated_mz,
                        '{0:5.3f} ppm m/z error'.format(
                            mz_error
                        )
                    )
                )
                scaled_intensity = calculated_intensity * entry.scaling_factor
                rel_i_error  = abs(measured_intensity - scaled_intensity) / scaled_intensity

                peak_info['measured peaks'].append(measured_mz)
                peak_info['theoretical peaks'].append(calculated_mz)
                peak_info['relative intensity'].append(relative_i)
                peak_info['scaled matched peaks'].append( calculated_intensity * entry.scaling_factor )
                peak_info['mz error'].append( mz_error)
                peak_info['i error'].append(  rel_i_error )


                if rel_i_error > 1:
                    rel_i_error = 1

                label_i_error.append(
                    (
                        calculated_mz,
                        '{0:5.3f} rel. intensity error'.format(
                            rel_i_error
                        )
                    )
                )



        mz_only  = [ n[0] for n in measured_peaks ]
        mz_range = [ min(mz_only)-1, max(mz_only)+1 ]
        peptide = results.lookup['formula to molecule'][key.formula][0]
        p.newPlot(
            header = 'Formula: {0}; Peptide: {1}; Charge: {2}\n Amount: {3:1.3f}; Score: {4:1.3f}'.format(
                key.formula,
                peptide,
                key.charge,
                entry.scaling_factor,
                entry.score
            ),
            mzRange = mz_range
        )
        p.add(
            measured_peaks,
            color = (0, 0, 0),
            style = 'sticks'
        )
        p.add(
            matched_peaks,
            color = (0, 200, 0),
            style = 'triangles'
        )
        p.add(
            label_mz_error,
            color = (255, 0, 0),
            style = 'label_x'
        )
        p.add(
            label_i_error,
            color = (255, 0, 0),
            style = 'label_x'
        )

        plot_name = os.path.join(
            os.pardir,
            'data',
            'Score_visualization_Peptide_{1}_Charge_{2}.xhtml'.format(
                key.file_name,
                peptide,
                key.charge
            )
        )
        p.save(
            filename = plot_name,
            mzRange  = mz_range
        )
        print(
                'Plotted file {0}'.format(
                    plot_name
                )
            )
        # print(entry)
        print('Match info')
        for key, value_list in sorted(peak_info.items()):
            print(key)
            print('[{0}]'.format(','.join([str(n) for n in value_list])))
            print()
    return


if __name__ == '__main__':
    main()