Source code for astropath.profiling

""" Profiling utilities for the PATH calculations.

This module times the two core PATH steps that were targeted for the
numpy speed-up work (see prompts/speed_up.md):

  * ``localization.calc_LWx`` -- the localization term L(w-x)
  * ``bayesian.px_Oi_fixedgrid`` -- the fixed-grid p(x|O_i)

It is modeled on the ``calculations/step_size/Profiling.ipynb``
notebook: a faux FRB, a circular error ellipse, and a couple of
candidate galaxies.  Rather than timing each sub-step interactively, it
sweeps a range of grid sizes (by varying ``step_size``), builds a table
of timings, prints it, and writes a figure.

Run from the command line::

    python -m astropath.profiling

or import and call :func:`run_profiling` directly.
"""
import os
import time

import numpy as np
import pandas

import matplotlib
matplotlib.use('Agg')  # headless-safe; no interactive display needed
import matplotlib.pyplot as plt  # noqa: E402

from astropy.coordinates import SkyCoord
from astropy import units

from astropath import localization
from astropath import bayesian


# Faux FRB and analysis defaults (mirroring the Profiling notebook)
FRB_RADEC = '21h44m25.255s -40d54m00.10s'
BOX_HWIDTH = 90.  # arcsec; half-width of the analysis box
# step_size sweep (arcsec).  ngrid = 2*box_hwidth/step_size, so with
# box_hwidth=90 these give side lengths of 360 ... 7200 pixels.
DEFAULT_STEP_SIZES = [0.5, 0.25, 0.1, 0.05, 0.025]


NCAND = 50  # number of candidate galaxies in the profiling scenario


[docs] def default_setup(ncand=NCAND): """Build the faux-FRB profiling scenario. Mirrors calculations/step_size/Profiling.ipynb (circular 5" error ellipse), but scatters ``ncand`` candidate galaxies over a range of locations and angular sizes so the per-candidate loop in ``px_Oi_fixedgrid`` is exercised realistically. Args: ncand (int, optional): Number of candidate galaxies. Returns: tuple: (localiz, cand_coords, cand_ang_size, theta_prior) localiz (dict): eellipse localization dict. cand_coords (SkyCoord): candidate host coordinates. cand_ang_size (np.ndarray): candidate angular sizes, arcsec. theta_prior (dict): offset-prior parameters. """ frb_coord = SkyCoord(FRB_RADEC, frame='icrs') # Small but not tiny, circular error ellipse eellipse = dict(a=5., b=5., theta=0.) localiz = dict(type='eellipse', center_coord=frb_coord, eellipse=eellipse) # Scatter the candidates deterministically (no RNG) over a range of # position angles and separations from the transient. Separations # span 0.5"-10" so some fall well inside the localization and some # near its edge. idx = np.arange(ncand) pa = (idx * 360. / ncand) * units.deg # spread in PA sep = np.linspace(0.5, 10., ncand) * units.arcsec # spread in offset cand_coords = frb_coord.directional_offset_by(pa, sep) # Range of angular sizes, 0.2"-3.0" cand_ang_size = np.linspace(0.2, 3.0, ncand) # arcsec theta_prior = dict(max=6., PDF='exp', scale=1.) return localiz, cand_coords, cand_ang_size, theta_prior
[docs] def ellipse_setup(ncand=NCAND): """High-axis-ratio ellipse scenario that exercises _Lwx_correction. Unlike :func:`default_setup` (circular localization, where ``b >= phi`` so no correction fires), this uses a long thin error ellipse (a=12.5", b=0.2", axis ratio ~60) with candidate galaxies LARGER than ``b``. ``px_Oi_local`` therefore triggers the ``_Lwx_correction`` for every candidate. The galaxy sizes (1.5"-2.5") are chosen so the correction grid is ~1000x1000 cells at the default step (0.05) -- and stays below the ~5000-cell skip threshold across the whole step sweep. Args: ncand (int, optional): Number of candidate galaxies. Returns: tuple: (localiz, cand_coords, cand_ang_size, theta_prior), same shape as :func:`default_setup`. """ frb_coord = SkyCoord(FRB_RADEC, frame='icrs') # Long, thin error ellipse (large axis ratio) -> b < phi below. eellipse = dict(a=12.5, b=0.2, theta=0.) localiz = dict(type='eellipse', center_coord=frb_coord, eellipse=eellipse) # Scatter candidates around the transient (some land off the thin # ellipse; that is fine -- we are timing, not validating values). idx = np.arange(ncand) pa = (idx * 360. / ncand) * units.deg sep = np.linspace(0.5, 8., ncand) * units.arcsec cand_coords = frb_coord.directional_offset_by(pa, sep) # Galaxy sizes 1.5"-2.5": all > b=0.2" (correction fires), and sized # so the correction grid is ~1000 cells per side at step 0.05. cand_ang_size = np.linspace(1.5, 2.5, ncand) # arcsec theta_prior = dict(max=6., PDF='exp', scale=1.) return localiz, cand_coords, cand_ang_size, theta_prior
[docs] def small_loc_setup(ncand=NCAND): """Very small (sub-arcsec) circular localization scenario. A tiny circular error ellipse (a=b=0.1") with candidate galaxies spanning 0.1"-20". For every galaxy larger than ``b`` the ``_Lwx_correction`` fires, but because the ellipse major axis is tiny its correction grid (~8a/h cells per side) stays small even as the galaxy grid grows -- the opposite regime to :func:`ellipse_setup` (large axis ratio -> large correction grid). This stresses the deeply under-resolved case (grid spacing ``phi*step`` can be many times ``b``). Args: ncand (int, optional): Number of candidate galaxies. Returns: tuple: (localiz, cand_coords, cand_ang_size, theta_prior), same shape as :func:`default_setup`. """ frb_coord = SkyCoord(FRB_RADEC, frame='icrs') # Very small, circular error ellipse eellipse = dict(a=0.1, b=0.1, theta=0.) localiz = dict(type='eellipse', center_coord=frb_coord, eellipse=eellipse) idx = np.arange(ncand) pa = (idx * 360. / ncand) * units.deg sep = np.linspace(0.05, 2.0, ncand) * units.arcsec cand_coords = frb_coord.directional_offset_by(pa, sep) # Wide range of galaxy sizes, 0.1"-20" cand_ang_size = np.linspace(0.1, 20., ncand) # arcsec theta_prior = dict(max=6., PDF='exp', scale=1.) return localiz, cand_coords, cand_ang_size, theta_prior
def _time_call(func, args, npix, reps=None): """Time a single callable, returning the best wall time in seconds. The number of repetitions is scaled down for large grids to keep the overall sweep fast, unless an explicit ``reps`` is given. Args: func (callable): Function to time. args (tuple): Positional arguments passed to ``func``. npix (int): Number of grid pixels (used to pick repetitions). reps (int, optional): Force a fixed number of repetitions. Use reps=1 to time a single call (e.g. to *include* the numba JIT compilation cost in the first measurement). Returns: float: Best-of-reps wall-clock time, seconds. """ if reps is None: # Fewer reps for big grids (they are slow and stable) if npix > 1_000_000: reps = 1 elif npix > 100_000: reps = 2 else: reps = 3 best = np.inf for _ in range(reps): t0 = time.perf_counter() func(*args) best = min(best, time.perf_counter() - t0) return best def _build_grid(localiz, box_hwidth, step_size): """Build the fixed ra/dec grid the same way px_Oi_fixedgrid does. Args: localiz (dict): eellipse localization dict (needs center_coord). box_hwidth (float): Half-width of the analysis box, arcsec. step_size (float): Grid step size, arcsec. Returns: tuple: (ra, dec, ngrid) with ra/dec numpy arrays in deg and ngrid the number of pixels per side. """ ngrid = int(np.round(2 * box_hwidth / step_size)) x = np.linspace(-box_hwidth, box_hwidth, ngrid) xcoord, ycoord = np.meshgrid(x, x) # Flat-sky; RA increases in x center = localiz['center_coord'] cos_dec = np.cos(np.radians(center.dec.deg)) ra = center.ra.deg + xcoord / 3600. / cos_dec dec = center.dec.deg + ycoord / 3600. return ra, dec, ngrid
[docs] def run_profiling(step_sizes=None, box_hwidth=BOX_HWIDTH): """Profile calc_LWx and px_Oi_fixedgrid over a range of grid sizes. Args: step_sizes (list, optional): Grid step sizes (arcsec) to sweep. Defaults to :data:`DEFAULT_STEP_SIZES`. box_hwidth (float, optional): Analysis-box half-width, arcsec. Returns: pandas.DataFrame: One row per step size with columns ``step_size``, ``ngrid``, ``n_pixels``, ``calc_LWx_s``, ``px_Oi_fixedgrid_s`` (numpy) and ``px_Oi_numba_s`` (the numba ``use_numba=True`` path; NaN if numba is unavailable). """ if step_sizes is None: step_sizes = DEFAULT_STEP_SIZES localiz, cand_coords, cand_ang_size, theta_prior = default_setup() # Default correction applied to all px_Oi_fixedgrid timings here correction = 'p_wO' print("Starting profiling sweep over %d step sizes, %d candidates" % (len(step_sizes), len(cand_ang_size))) print(" using correction=%r" % correction) # NOTE: the numba JIT is intentionally NOT pre-warmed. Its # compilation cost is included in the first (smallest-grid) numba # timing below, so the reported numba time reflects a realistic # one-off sandbox call. rows = [] for step_size in step_sizes: # Grid for the standalone calc_LWx timing ra, dec, ngrid = _build_grid(localiz, box_hwidth, step_size) npix = ngrid * ngrid print(" step_size=%.4f grid=%dx%d (%d pix) ..." % (step_size, ngrid, ngrid, npix)) # Time calc_LWx (localization term only) t_lwx = _time_call(localization.calc_LWx, (ra, dec, localiz), npix) print(" calc_LWx: %.1f ms" % (t_lwx * 1e3)) # Time the full fixed-grid p(x|O_i) -- numpy path t_pxoi = _time_call( lambda *a: bayesian.px_Oi_fixedgrid( box_hwidth, localiz, cand_coords, cand_ang_size, theta_prior, step_size=step_size, correction=correction), (), npix) print(" px_Oi_fixedgrid (numpy): %.1f ms" % (t_pxoi * 1e3)) # Time the numba path (use_numba=True), if available. reps=1 so # the first call's JIT compilation is counted in the timing. if bayesian.HAS_NUMBA: t_numba = _time_call( lambda *a: bayesian.px_Oi_fixedgrid( box_hwidth, localiz, cand_coords, cand_ang_size, theta_prior, step_size=step_size, use_numba=True, correction=correction), (), npix, reps=1) print(" px_Oi_fixedgrid (numba): %.1f ms (%.1fx)" % (t_numba * 1e3, t_pxoi / t_numba)) else: t_numba = np.nan rows.append(dict(step_size=step_size, ngrid=ngrid, n_pixels=npix, calc_LWx_s=t_lwx, px_Oi_fixedgrid_s=t_pxoi, px_Oi_numba_s=t_numba)) print("Profiling sweep complete.") return pandas.DataFrame(rows)
[docs] def run_profiling_local(step_sizes=None): """Profile px_Oi_local over a range of (relative) step sizes. ``px_Oi_local`` builds one grid per candidate, sized to the offset prior (box_hwidth = phi*max) with spacing phi*step_size. The per-candidate pixel count is therefore ``ngrid = 2*max/step_size`` (independent of phi), so sweeping ``step_size`` sweeps the per-candidate grid size. The whole multi-candidate call is timed. Three scenarios are timed against this SAME per-candidate galaxy grid (the x-axis of the figure): * ``circular`` (:func:`default_setup`) -- ``b >= phi``, so the ``_Lwx_correction`` never fires: pure galaxy-grid cost. * ``ellipse`` (:func:`ellipse_setup`) -- a long thin localization (a=12.5", b=0.2") with ``b < phi``, so the correction fires every candidate on a ~1000x1000 grid (at the default step). * ``small`` (:func:`small_loc_setup`) -- a tiny circular localization (a=b=0.1") with galaxies 0.1"-20"; the correction fires but its grid stays small (tiny major axis). Comparing the three isolates how the correction cost scales with the localization size. Args: step_sizes (list, optional): Relative step sizes to sweep. Defaults to :data:`DEFAULT_STEP_SIZES`. Returns: pandas.DataFrame: One row per step size with columns ``step_size``, ``ngrid`` (per candidate), ``n_pixels`` (per candidate), ``ncand``, ``px_Oi_local_s`` (circular), ``px_Oi_local_ellipse_s`` (ellipse + correction), ``px_Oi_local_smallloc_s`` (small localization), and ``corr_ngrid`` (representative ellipse correction-grid side). """ if step_sizes is None: step_sizes = DEFAULT_STEP_SIZES loc_c, cc_c, sz_c, theta_prior = default_setup() loc_e, cc_e, sz_e, _ = ellipse_setup() loc_s, cc_s, sz_s, _ = small_loc_setup() ncand = len(sz_c) max_theta = theta_prior['max'] # Representative correction-grid size uses the ellipse major axis and # a typical galaxy size (mirrors _Lwx_correction's window: ~8a/h). a_e = loc_e['eellipse']['a'] phi_mid = float(np.median(sz_e)) print("Starting px_Oi_local profiling (circular + ellipse + small) " "over %d step sizes, %d candidates" % (len(step_sizes), ncand)) rows = [] for step_size in step_sizes: # Per-candidate galaxy grid size (phi cancels out of ngrid) ngrid = int(np.round(2 * max_theta / step_size)) npix = ngrid * ngrid # Representative correction-grid side for the ellipse scenario. h = 2. * (phi_mid * max_theta) / (ngrid - 1) corr_ngrid = 2 * int(np.ceil(4. * a_e / h)) + 1 print(" step_size=%.4f galaxy grid=%dx%d " "ellipse corr grid~%dx%d ..." % (step_size, ngrid, ngrid, corr_ngrid, corr_ngrid)) # Time all three full multi-candidate calls. Use the total # galaxy pixel budget (ncand * npix) to choose the rep count. t_circ = _time_call( lambda *a: bayesian.px_Oi_local( loc_c, cc_c, sz_c, theta_prior, step_size=step_size), (), ncand * npix) t_ell = _time_call( lambda *a: bayesian.px_Oi_local( loc_e, cc_e, sz_e, theta_prior, step_size=step_size), (), ncand * npix) t_small = _time_call( lambda *a: bayesian.px_Oi_local( loc_s, cc_s, sz_s, theta_prior, step_size=step_size), (), ncand * npix) print(" circular: %.1f ms ellipse(+corr): %.1f ms " "small-loc: %.1f ms" % (t_circ * 1e3, t_ell * 1e3, t_small * 1e3)) rows.append(dict(step_size=step_size, ngrid=ngrid, n_pixels=npix, ncand=ncand, px_Oi_local_s=t_circ, px_Oi_local_ellipse_s=t_ell, px_Oi_local_smallloc_s=t_small, corr_ngrid=corr_ngrid)) print("px_Oi_local profiling complete.") return pandas.DataFrame(rows)
[docs] def plot_local_results(df, outfile): """Plot px_Oi_local timing vs per-candidate grid size (log-log). Args: df (pandas.DataFrame): Output of :func:`run_profiling_local`. outfile (str): Path to write the PNG figure. Returns: str: The path the figure was written to. """ sqrt_pix = np.sqrt(df['n_pixels']) # per-candidate grid side length fig, ax = plt.subplots(figsize=(7, 5)) # Circular localization (a=b=5"): b >= phi for every candidate, so # the _Lwx_correction never fires (verified: 0 invocations). ax.plot(sqrt_pix, df['px_Oi_local_s'], 'D-', color='purple', label='px_Oi_local (circular, b>=phi: no correction)') # Ellipse scenario (b=0.2" < phi): the _Lwx_correction fires for # every candidate on a ~1000x1000 grid. if 'px_Oi_local_ellipse_s' in df: ax.plot(sqrt_pix, df['px_Oi_local_ellipse_s'], 's-', color='darkorange', label='px_Oi_local (ellipse, b<phi: L_wx correction)') # Small circular localization (a=b=0.1" < phi): correction fires but # its grid is tiny (small major axis). if 'px_Oi_local_smallloc_s' in df: ax.plot(sqrt_pix, df['px_Oi_local_smallloc_s'], '^-', color='seagreen', label='px_Oi_local (small loc a=b=0.1", b<phi)') # Reference line at 10 s ax.axhline(10., color='dimgray', linestyle='--', linewidth=1.5) ax.set_xscale('log') ax.set_yscale('log') ax.set_xlabel('sqrt(per-candidate grid pixels)') ax.set_ylabel('Time (s)') ax.set_title('px_Oi_local profiling (%d candidates)' % int(df['ncand'].iloc[0])) ax.grid(True, which='both', alpha=0.3) ax.legend() fig.tight_layout() fig.savefig(outfile, dpi=120) plt.close(fig) return outfile
[docs] def plot_results(df, outfile): """Plot the timing results vs grid size on log-log axes. Args: df (pandas.DataFrame): Output of :func:`run_profiling`. outfile (str): Path to write the PNG figure. Returns: str: The path the figure was written to. """ # x-axis in sqrt(pixels) = grid side length sqrt_pix = np.sqrt(df['n_pixels']) fig, ax = plt.subplots(figsize=(7, 5)) ax.plot(sqrt_pix, df['calc_LWx_s'], 'o-', label='calc_LWx') ax.plot(sqrt_pix, df['px_Oi_fixedgrid_s'], 's-', color='red', label='px_Oi_fixedgrid (numpy)') # numba curve (only if it was timed) if 'px_Oi_numba_s' in df and df['px_Oi_numba_s'].notna().any(): ax.plot(sqrt_pix, df['px_Oi_numba_s'], '^-', color='green', label='px_Oi_fixedgrid (numba)') # Reference line at 10 s ax.axhline(10., color='dimgray', linestyle='--', linewidth=1.5) ax.set_xscale('log') ax.set_yscale('log') ax.set_xlabel('sqrt(grid pixels)') ax.set_ylabel('Time (s)') ax.set_title('PATH profiling') ax.grid(True, which='both', alpha=0.3) ax.set_xlim(None, 10000.) ax.legend() fig.tight_layout() fig.savefig(outfile, dpi=120) plt.close(fig) return outfile
[docs] def main(): """Run the profiling sweep, print the table, and save the figure. Args: None Returns: pandas.DataFrame: The timing table (also printed to screen). """ df = run_profiling() # Print a readable table (times in ms for legibility) show = df.copy() show['calc_LWx_ms'] = show['calc_LWx_s'] * 1e3 show['px_Oi_numpy_ms'] = show['px_Oi_fixedgrid_s'] * 1e3 show['px_Oi_numba_ms'] = show['px_Oi_numba_s'] * 1e3 # numba speed-up factor over the numpy path show['numba_speedup'] = (show['px_Oi_fixedgrid_s'] / show['px_Oi_numba_s']) cols = ['step_size', 'ngrid', 'n_pixels', 'calc_LWx_ms', 'px_Oi_numpy_ms', 'px_Oi_numba_ms', 'numba_speedup'] print('\nPATH profiling results (best-of-reps):') print(show[cols].to_string(index=False, float_format=lambda v: '%.3f' % v)) # Save the figure next to this module outfile = os.path.join(os.path.dirname(__file__), 'profiling_timing.png') plot_results(df, outfile) print('\nFigure written to: %s' % outfile) # ---- px_Oi_local (the local-grid method) ---- df_local = run_profiling_local() show_l = df_local.copy() show_l['circular_ms'] = show_l['px_Oi_local_s'] * 1e3 show_l['ellipse_ms'] = show_l['px_Oi_local_ellipse_s'] * 1e3 show_l['smallloc_ms'] = show_l['px_Oi_local_smallloc_s'] * 1e3 cols_l = ['step_size', 'ngrid', 'ncand', 'corr_ngrid', 'circular_ms', 'ellipse_ms', 'smallloc_ms'] print('\npx_Oi_local profiling results (best-of-reps):') print(show_l[cols_l].to_string( index=False, float_format=lambda v: '%.3f' % v)) outfile_local = os.path.join(os.path.dirname(__file__), 'profiling_local_timing.png') plot_local_results(df_local, outfile_local) print('\nFigure written to: %s' % outfile_local) return df
if __name__ == '__main__': main()