add PyInstrument and cProfile profilers

This commit is contained in:
Nicola Tarocco 2024-01-15 23:07:14 +01:00
parent 667135595c
commit 00dc79f492
No known key found for this signature in database
GPG key ID: A08DEF00BA54E806
9 changed files with 458 additions and 5 deletions

View file

@ -55,7 +55,7 @@ https://doi.org/10.1016/j.buildenv.2022.109166
***Open Source Acknowledgments***
For a detailed list of the open-source dependencies used in this project along with their respective licenses, please refer to [License Information](open-source-licences/README.md). This includes both the core dependencies specified in the project's requirements and their transitive dependencies.
For a detailed list of the open-source dependencies used in this project along with their respective licenses, please refer to [License Information](open-source-licences/README.md). This includes both the core dependencies specified in the project's requirements and their transitive dependencies.
The information also features a distribution diagram of licenses and a brief description of each of them.
@ -147,7 +147,6 @@ voila caimira/apps/expert/caimira.ipynb --port=8080
Then visit http://localhost:8080.
### Running the tests
```
@ -155,6 +154,18 @@ pip install -e .[test]
pytest ./caimira
```
### Running the profiler
The profiler is enabled in one of the following cases:
- the calculator app is running in `debug` mode
- the environment variable `CAIMIRA_PROFILER_ENABLED` is set to 1
When visiting http://localhost:8080/profiler, you can start a new session and choose between [PyInstrument](https://github.com/joerick/pyinstrument) or [cProfile](https://docs.python.org/3/library/profile.html#module-cProfile). The app includes two different profilers, mainly because they can give different information.
Keep the profiler page open. Then, in another window, navigate to any page in CAiMIRA, for example generate a new report. Refresh the profiler page, and click on the `Report` link to see the profiler output.
The sessions are stored in a local file in the `/tmp` folder. To share it across multiple web nodes, a shared storage should be added to all web nodes. The folder can be customized via the environment variable `CAIMIRA_PROFILER_CACHE_DIR`.
### Building the whole environment for local development
**Simulate the docker build that takes place on openshift with:**

View file

@ -25,6 +25,7 @@ import loky
from tornado.web import Application, RequestHandler, StaticFileHandler
from tornado.httpclient import AsyncHTTPClient, HTTPRequest
import tornado.log
from caimira.profiler import CaimiraProfiler, Profilers
from caimira.store.data_registry import DataRegistry
from caimira.store.data_service import DataService
@ -43,7 +44,53 @@ from .user import AuthenticatedUser, AnonymousUser
# increase the overall CAiMIRA version (found at ``caimira.__version__``).
__version__ = "4.14.3"
LOG = logging.getLogger("APP")
LOG = logging.getLogger("Calculator")
class ProfilerPage(RequestHandler):
"""Render the profiler page.
This class does not inherit from BaseRequestHandler to avoid profiling the
profiler page itself.
"""
def get(self) -> None:
profiler = CaimiraProfiler()
template_environment = self.settings["template_environment"]
template = template_environment.get_template("profiler.html.j2")
report = template.render(
user=AnonymousUser(),
active_page="Profiler",
xsrf_form_html=self.xsrf_form_html(),
is_active=profiler.is_active,
sessions=profiler.sessions,
)
self.finish(report)
def post(self) -> None:
profiler = CaimiraProfiler()
if self.get_argument("start", None) is not None:
name = self.get_argument("name", None)
profiler_type = Profilers.from_str(self.get_argument("profiler_type", ""))
profiler.start_session(name, profiler_type)
elif self.get_argument("stop", None) is not None:
profiler.stop_session()
elif self.get_argument("clear", None) is not None:
profiler.clear_sessions()
self.redirect(CaimiraProfiler.ROOT_URL)
class ProfilerReport(RequestHandler):
"""Render the profiler HTML report."""
def get(self, report_id) -> None:
profiler = CaimiraProfiler()
_, report_html = profiler.get_report(report_id)
if report_html:
self.finish(report_html)
else:
self.send_error(404)
class BaseRequestHandler(RequestHandler):
@ -64,6 +111,22 @@ class BaseRequestHandler(RequestHandler):
else:
self.current_user = AnonymousUser()
profiler = CaimiraProfiler()
if profiler.is_active and not self.request.path.startswith(CaimiraProfiler.ROOT_URL):
self._request_profiler = profiler.start_profiler()
def on_finish(self) -> None:
"""Called at the end of the request."""
profiler = CaimiraProfiler()
if profiler.is_active and self._request_profiler:
profiler.stop_profiler(
profiler=self._request_profiler,
uri=self.request.uri or "",
path=self.request.path,
query=self.request.query,
method=self.request.method,
)
def write_error(self, status_code: int, **kwargs) -> None:
template = self.settings["template_environment"].get_template(
"error.html.j2")
@ -199,6 +262,7 @@ class StaticModel(BaseRequestHandler):
base_url = self.request.protocol + "://" + self.request.host
report_generator: ReportGenerator = self.settings['report_generator']
executor = loky.get_reusable_executor(max_workers=self.settings['handler_worker_pool_size'])
report_task = executor.submit(
report_generator.build_report, base_url, form,
executor_factory=functools.partial(
@ -452,6 +516,13 @@ def make_app(
'filename': 'userguide.html.j2'}),
]
profiler = os.environ.get('CAIMIRA_PROFILER_ENABLED', 0)
if debug or profiler:
urls += [
(get_root_url(CaimiraProfiler.ROOT_URL), ProfilerPage),
(get_root_url(r'{root_url}/(.*)'.format(root_url=CaimiraProfiler.ROOT_URL)), ProfilerReport),
]
interface: str = os.environ.get('CAIMIRA_THEME', '<undefined>')
if interface != '<undefined>' and (interface != '<undefined>' and 'cern' not in interface): urls = list(filter(lambda i: i in base_urls, urls))

View file

@ -58,7 +58,12 @@ def main():
theme_dir = Path(theme_dir).absolute()
assert theme_dir.exists()
app = make_app(debug=debug, APPLICATION_ROOT=args.app_root, calculator_prefix=args.prefix, theme_dir=theme_dir)
app = make_app(
debug=debug,
APPLICATION_ROOT=args.app_root,
calculator_prefix=args.prefix,
theme_dir=theme_dir
)
app.listen(args.port)
IOLoop.current().start()

View file

@ -14,6 +14,7 @@ import matplotlib.pyplot as plt
from caimira import models
from caimira.apps.calculator import markdown_tools
from caimira.profiler import profile
from caimira.store.data_registry import DataRegistry
from ... import monte_carlo as mc
from .model_generator import VirusFormData
@ -114,6 +115,7 @@ def concentrations_with_sr_breathing(form: VirusFormData, model: models.Exposure
return lower_concentrations
@profile
def calculate_report_data(form: VirusFormData, model: models.ExposureModel) -> typing.Dict[str, typing.Any]:
times = interesting_times(model)
short_range_intervals = [interaction.presence.boundaries()[0] for interaction in model.short_range]

View file

@ -0,0 +1,62 @@
{% extends "layout.html.j2" %}
{% block main %}
<div class="container mt-5 mb-5">
<form method="POST">
<h1>Profiler</h1>
{% if is_active %}
<div class="form-group mt-3">
The profiler is running.
</div>
<button type="submit" class="btn btn-primary" name="stop">Stop current session</button>
{% else %}
The profiler is not running.
<div class="form-group mt-3">
<input type="text" class="form-control" name="name" placeholder="Enter a name for the new session">
<label class="mt-3">Choose the profiler:</label>
<div class="btn-group btn-group-toggle" data-toggle="buttons">
<label class="btn btn-secondary active">
<input type="radio" name="profiler_type" id="pyinstrument" value="pyinstrument" autocomplete="off" checked> PyInstrument
</label>
<label class="btn btn-secondary">
<input type="radio" name="profiler_type" id="cprofiler" value="cprofiler" autocomplete="off"> CProfiler
</label>
</div>
</div>
<button type="submit" class="btn btn-primary mt-3" name="start">Start new session</button>
{% endif %}
{{ xsrf_form_html }}
<h3 class="mt-5">Sessions</h3>
{% if sessions %}
<ol>
{% for name, reports in sessions.items() %}
<li>Name: {{ name }}</li>
<ul>
{% if reports %}
{% for report in reports %}
<li>{{ report["ts"] }} - {{ report["method"] }} {{ report["uri"] }} - <a href="/profiler/{{ report["report_id"] }}" target="_blank">Report</a></li>
{% endfor %}
{% else %}
<i>No reports yet!</i>
{% endif %}
</ul>
{% endfor %}
</ol>
{% if not is_active %}
<div class="mt-3">
<button type="submit" class="btn btn-danger btn-sm" name="clear">Clear all sessions</button>
</div>
{% endif %}
{% else %}
<i>No sessions yet!</i>
{% endif %}
</form>
</div>
{% endblock main %}

294
caimira/profiler.py Normal file
View file

@ -0,0 +1,294 @@
import cProfile
import glob
import json
import logging
import os
import pstats
import shutil
import typing
from datetime import datetime
from enum import Enum
from functools import wraps
from pstats import f8, func_std_string
from pyinstrument import Profiler as PyInstrumentProfiler
logger = logging.getLogger("Profiler")
class Profilers(Enum):
PYINSTRUMENT = "pyinstrument"
CPROFILER = "cprofiler"
@staticmethod
def from_str(value):
if value == Profilers.PYINSTRUMENT.value:
return Profilers.PYINSTRUMENT
elif value == Profilers.CPROFILER.value:
return Profilers.CPROFILER
else:
raise NotImplementedError
class PyInstrumentWrapper:
profiler = PyInstrumentProfiler(async_mode=True)
@property
def is_running(self):
return self.profiler.is_running
def start(self):
self.profiler.start()
def stop(self):
self.profiler.stop()
report_html = self.profiler.output_html(timeline=True)
report_id = str(abs(hash(report_html)))
return report_id, Profilers.PYINSTRUMENT.value, report_html
class CProfilerWrapper:
profiler = cProfile.Profile()
_is_running = False
@property
def is_running(self):
return self._is_running
def start(self):
self.profiler.enable()
self._is_running = True
def stop(self):
self.profiler.disable()
self._is_running = False
ps = pstats.Stats(self.profiler).sort_stats("cumulative")
report_html = "<p>"
report_html += f"{ps.total_calls} function calls"
if ps.total_calls != ps.prim_calls:
report_html += " (%d primitive calls)" % ps.prim_calls
report_html += " in %.3f seconds" % ps.total_tt
report_html += "</p>"
report_html += "<p>"
report_html += f"Ordered by: {ps.sort_type}"
report_html += "</p>"
report_html += "<table><thead><tr>"
report_html += "<td>ncalls</td><td>tottime</td><td>percall</td><td>cumtime</td><td>percall</td><td>filename:lineno(function)</td>"
report_html += "</tr></thead><tbody>"
for func in ps.stats:
report_html += "<tr>"
cc, nc, tt, ct, callers = ps.stats[func]
c = str(nc)
if nc != cc:
c = c + "/" + str(cc)
report_html += f"<td>{c.rjust(9)}</td>"
report_html += f"<td>{f8(tt)}</td>"
report_html += f"<td></td>" if nc == 0 else f"<td>{f8(tt/nc)}</td>"
report_html += f"<td>{f8(ct)}</td>"
report_html += f"<td></td>" if cc == 0 else f"<td>{f8(ct/cc)}</td>"
report_html += f"<td>{func_std_string(func)}</td>"
report_html += "</tr>"
report_html += "</tbody></table>"
report_id = str(abs(hash(report_html)))
return report_id, Profilers.CPROFILER.value, report_html
class Profiler:
profiler = None
profiler_type = None
def __init__(self, profiler_type):
self.profiler_type = profiler_type
def start(self):
if self.profiler_type == Profilers.PYINSTRUMENT:
self.profiler = PyInstrumentWrapper()
elif self.profiler_type == Profilers.CPROFILER:
self.profiler = CProfilerWrapper()
else:
raise NotImplementedError
self.profiler.start()
def stop(self, session_name, uri, path="", query="", method=""):
report_id, report_type, report_html = self.profiler.stop()
self.profiler = None
return report_id, dict(
session_name=session_name,
ts=datetime.utcnow().strftime("%Y-%m-%d - %H:%M:%S"),
uri=uri,
path=path,
query=query,
method=method,
report_id=report_id,
report_type=report_type,
report_html=report_html,
)
def is_running(self):
return self.profiler and self.profiler.is_running
class CaimiraProfiler:
CACHE_DIR: str = os.environ.get("CAIMIRA_PROFILER_CACHE_DIR", "/tmp")
ROOT_URL: str = "/profiler"
_cache_dirpath: str = None
_cache_filepath: str = None
def __init__(self, cache_dir: typing.Optional[str] = None):
_cache_dir = os.path.join(self.CACHE_DIR, "caimira-profiler")
self._cache_dirpath = _cache_dir
self._cache_filepath = os.path.join(_cache_dir, "active")
def _set_active(self, name: str, profiler_type: Profilers):
try:
os.access(self._cache_dirpath, os.W_OK)
except Exception as e:
# Handle the exception, e.g., print an error message
logger.warning(f"cannot write in cache folder {self._cache_dirpath}")
if not os.path.exists(self._cache_dirpath):
os.makedirs(self._cache_dirpath)
with open(self._cache_filepath, "w") as fp:
json.dump(dict(name=name, profiler_type=profiler_type.value), fp)
def _set_inactive(self):
with open(self._cache_filepath, "w") as fp:
json.dump(dict(), fp)
@property
def sessions(self):
"""Return all stored sessions."""
if not os.path.exists(self._cache_dirpath):
return {}
reports_by_session = {}
json_files = glob.glob(os.path.join(self._cache_dirpath, "*.json"))
for json_file in json_files:
with open(json_file) as fp:
report = json.load(fp)
session_name = report["session_name"]
reports_by_session.setdefault(session_name, [])
reports_by_session[session_name].append(report)
return reports_by_session
@property
def session(self):
"""Return the session name."""
if not os.path.exists(self._cache_filepath):
return None
with open(self._cache_filepath) as fp:
d = json.load(fp)
return dict(
name=d["name"], profiler_type=Profilers.from_str(d["profiler_type"])
)
@property
def is_active(self):
"""Return True if a session is active."""
if not os.path.exists(self._cache_filepath):
return False
try:
with open(self._cache_filepath) as fp:
d = json.load(fp)
return True if d else False
except json.decoder.JSONDecodeError:
os.remove(self._cache_filepath)
return False
def start_session(
self,
name: typing.Optional[str] = None,
profiler_type: typing.Optional[Profilers] = Profilers.PYINSTRUMENT,
):
"""Start a new session, assigning the given name."""
now = datetime.utcnow().isoformat()
name = f"{name}-{now}" if name else now
self._set_active(name, profiler_type)
def start_profiler(self):
"""Start the profiler.
In the context of HTTP requests, this should be called at the beginning of
each HTTP request.
"""
if not self.is_active:
raise Exception("No active session.")
profiler_type = self.session["profiler_type"]
profiler = Profiler(profiler_type)
profiler.start()
return profiler
def stop_profiler(
self,
profiler,
uri: str,
path: typing.Optional[str] = "",
query: typing.Optional[str] = "",
method: typing.Optional[str] = "",
):
"""Stop the profiler, previously obtained when starting it.
In the context of HTTP requests, this should be called at the end of
each HTTP request.
"""
if not self.is_active:
raise Exception("No active session.")
report_id, report = profiler.stop(
self.session["name"], uri, path, query, method
)
with open(os.path.join(self._cache_dirpath, f"{report_id}.json"), "w") as fp:
json.dump(report, fp)
def stop_session(self):
"""Stop the current active session."""
if not self.is_active:
raise Exception("No active session.")
self._set_inactive()
def get_report(self, report_id: str):
"""Return a report by the given id."""
for reports in self.sessions.values():
for report in reports:
if report["report_id"] == report_id:
return (
Profilers.from_str(report["report_type"]),
report["report_html"],
)
def clear_sessions(self):
"""Delete all sessions."""
if self.is_active:
raise Exception("Session still active.")
shutil.rmtree(self._cache_dirpath)
def profile(func):
@wraps(func)
def wrapper(*args, **kwargs):
profiler = CaimiraProfiler()
if profiler.is_active:
profiler_run = profiler.start_profiler()
result = func(*args, **kwargs)
if profiler.is_active:
profiler.stop_profiler(profiler=profiler_run, uri=func.__name__)
return result
return wrapper

View file

@ -374,6 +374,10 @@ Including transitive dependencies is essential to acknowledge the full spectrum
- License: [BSD License (BSD-2-Clause)](https://pypi.org/project/Pygments/)
#### pyinstrument 4.6.1
- License: [BSD License (BSD-3-Clause)](https://pypi.org/project/pyinstrument/)
#### pyparsing 3.1.1
- License: [MIT License](https://pypi.org/project/pyparsing/)

View file

@ -1,9 +1,9 @@
[tool:pytest]
addopts = --mypy
[mypy]
no_warn_no_return = True
exclude = caimira/profiler.py
[mypy-loky.*]
ignore_missing_imports = True
@ -32,6 +32,9 @@ ignore_missing_imports = True
[mypy-pandas.*]
ignore_missing_imports = True
[mypy-pstats.*]
follow_imports = skip
[mypy-tabulate.*]
ignore_missing_imports = True

View file

@ -30,6 +30,7 @@ REQUIREMENTS: dict = {
'numpy',
'pandas',
'psutil',
'pyinstrument',
'pyjwt',
'python-dateutil',
'retry',