{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "# Example Data Reduction" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "import os\n", "os.environ['OPENTSDB_PYTHON_METRICS_TEST_MODE'] = 'True'\n", "os.environ['API_ROOT'] = \"https://archive-api.lco.global/\"" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "from banzai.calibrations import make_master_calibrations\n", "import requests\n", "from banzai import settings\n", "from banzai import dbs\n", "from banzai.utils.stage_utils import run_pipeline_stages\n", "import logging\n", "from banzai.logs import set_log_level\n", "from glob import glob" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "set_log_level('INFO')\n", "logger = logging.getLogger('banzai')" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "os.makedirs('example_reduction', exist_ok=True)\n", "os.environ['DB_ADDRESS'] = 'sqlite:///example_reduction/banzai-example.db'\n", "settings.processed_path = os.path.join(os.getcwd(), 'example_reduction')\n", "settings.fpack = True\n", "settings.db_address = os.environ['DB_ADDRESS']\n", "settings.reduction_level = 91" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# set up the context object.\n", "import banzai.main\n", "context = banzai.main.parse_args(settings, parse_system_args=False)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# Configuration\n", "# Set to True to download all files locally first, False to access directly from S3\n", "DOWNLOAD_FILES_LOCALLY = True" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Download Data (Optional)\n", "Two approaches are available:\n", "- **Local download**: Download all files first, then process from disk\n", "- **S3 direct**: Access files directly from archive during processing (default)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# We will be using example data from the LCO archive, from site LSC and instrument sq34\n", "\n", "# make directories for the test dataset.\n", "raw_data_dir = 'example_reduction/lsc/sq34/raw'\n", "os.makedirs(raw_data_dir, exist_ok=True)\n", "bpm_dir = 'example_reduction/lsc/sq34/bpm/'\n", "os.makedirs(bpm_dir, exist_ok=True)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# Store the basename and archive frame_id for each of the frames we'll use in this example\n", "bpm_filename = 'lsc0m409-sq34-20240314-bpm-central30x30.fits.fz'\n", "bpm_id = '69359142'\n", "\n", "raw_frames = {\n", " 'lsc0m476-sq34-20250622-0058-f00.fits.fz': '84043242',\n", " 'lsc0m476-sq34-20250622-0059-f00.fits.fz': '84043255',\n", " 'lsc0m476-sq34-20250622-0060-f00.fits.fz': '84043260',\n", " 'lsc0m476-sq34-20250622-0061-f00.fits.fz': '84043265',\n", " 'lsc0m476-sq34-20250622-0062-f00.fits.fz': '84043272',\n", " 'lsc0m476-sq34-20250625-0060-d00.fits.fz': '84133538',\n", " 'lsc0m476-sq34-20250625-0061-d00.fits.fz': '84133758',\n", " 'lsc0m476-sq34-20250625-0062-d00.fits.fz': '84134040',\n", " 'lsc0m476-sq34-20250625-0063-d00.fits.fz': '84134220',\n", " 'lsc0m476-sq34-20250625-0064-d00.fits.fz': '84134361',\n", " 'lsc0m476-sq34-20250626-0018-d00.fits.fz': '84149171',\n", " 'lsc0m476-sq34-20250626-0019-d00.fits.fz': '84149216',\n", " 'lsc0m476-sq34-20250626-0020-d00.fits.fz': '84149302',\n", " 'lsc0m476-sq34-20250626-0021-d00.fits.fz': '84149349',\n", " 'lsc0m476-sq34-20250626-0022-d00.fits.fz': '84149433',\n", " 'lsc0m476-sq34-20250626-0094-e00.fits.fz': '84151307',\n", " 'lsc0m476-sq34-20250626-0368-e00.fits.fz': '84167225',\n", " 'lsc0m476-sq34-20250626-0452-b00.fits.fz': '84172674',\n", " 'lsc0m476-sq34-20250626-0453-b00.fits.fz': '84172677',\n", " 'lsc0m476-sq34-20250626-0454-b00.fits.fz': '84172685',\n", " 'lsc0m476-sq34-20250626-0455-b00.fits.fz': '84172689',\n", " 'lsc0m476-sq34-20250626-0456-b00.fits.fz': '84172695',\n", " 'lsc0m476-sq34-20250626-0457-b00.fits.fz': '84172701',\n", " 'lsc0m476-sq34-20250626-0458-b00.fits.fz': '84172706',\n", " 'lsc0m476-sq34-20250626-0459-b00.fits.fz': '84172716',\n", " 'lsc0m476-sq34-20250626-0460-b00.fits.fz': '84172721',\n", " 'lsc0m476-sq34-20250626-0461-b00.fits.fz': '84172730'\n", "}" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "if DOWNLOAD_FILES_LOCALLY:\n", " # Download the data into the correct directory\n", " # Note that this won't download any files that already exist\n", " for filename, frame_id in raw_frames.items():\n", " if os.path.exists(os.path.join(raw_data_dir, filename)):\n", " continue\n", " archive_url = f'https://archive-api.lco.global/frames/{frame_id}'\n", " frame_info = requests.get(archive_url).json()\n", " with open(os.path.join(raw_data_dir, filename), 'wb') as f:\n", " f.write(requests.get(frame_info['url']).content)\n", "else:\n", " print(\"Using S3 direct access - files will be accessed during processing\")\n", "\n", "# Download the bpm file (always needed locally for database setup)\n", "bpm_archive_url = f'https://archive-api.lco.global/frames/{bpm_id}'\n", "bpm_path = os.path.join(bpm_dir, bpm_filename)\n", "if not os.path.exists(bpm_path):\n", " bpm_frame_info = requests.get(bpm_archive_url).json()\n", " with open(bpm_path, 'wb') as f:\n", " f.write(requests.get(bpm_frame_info['url']).content)\n" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Set up the database" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "os.system(f'banzai_create_db --db-address={os.environ[\"DB_ADDRESS\"]}')\n", "\n", "# This is the site and instrument of the test data we're using\n", "os.system(f'banzai_add_site --site lsc --latitude -30.1673833333 --longitude -70.8047888889 --elevation 2198 --timezone -4 --db-address={os.environ[\"DB_ADDRESS\"]}')\n", "os.system(f'banzai_add_instrument --site lsc --camera sq34 --name sq34 --instrument-type 0m4-SciCam-QHY600 --nx 9600 --ny 6422 --db-address={os.environ[\"DB_ADDRESS\"]}')" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# Add the bpm to the database\n", "logger.info(f'Adding bpm {bpm_filename} to the banzai database')\n", "bpm_filepath = bpm_dir + bpm_filename\n", "os.system(f'banzai_add_super_calibration {bpm_filepath} --db-address={os.environ[\"DB_ADDRESS\"]}')\n" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Get the instrument record" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "instrument = dbs.query_for_instrument(settings.db_address, 'lsc', 'sq34', 'sq34')" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Process and Stack Bias Files" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "if DOWNLOAD_FILES_LOCALLY:\n", " # Access files through local filesystem\n", " bias_files = glob(os.path.join('example_reduction/*/*/raw/*b00*'))\n", " for bias_file in bias_files:\n", " run_pipeline_stages([{'path': bias_file}], context)\n", "else:\n", " # Directly access from S3\n", " bias_frames = {filename: frame_id for filename, frame_id in raw_frames.items() if 'b00' in filename}\n", " for filename, frame_id in bias_frames.items():\n", " run_pipeline_stages([{'filename': filename, 'frameid': frame_id}], context)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# Bias frames need to be manually verified for the first run because we don't have a super bias for automated comparisons\n", "def mark_frames_as_good(filenames):\n", " counter = 0\n", " for filename in glob(f'example_reduction/*/*/*/processed/{filenames}'):\n", " counter += 1\n", " dbs.mark_frame(os.path.basename(filename), \"good\", db_address=os.environ['DB_ADDRESS'])\n", " print(f\"Marked {counter} frames as good\")" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "mark_frames_as_good('*b91*')" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# The date range should contain the data we're using\n", "min_date = '2024-01-01'\n", "max_date = '2026-01-01'\n", "make_master_calibrations(instrument, 'BIAS', min_date, max_date, context)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Process and Stack Dark Files" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "if DOWNLOAD_FILES_LOCALLY:\n", " # Access files through local filesystem\n", " dark_files = glob(os.path.join('example_reduction/*/*/raw/*d00*'))\n", " for dark_file in dark_files:\n", " run_pipeline_stages([{'path': dark_file}], context)\n", "else:\n", " # Directly access from S3\n", " dark_frames = {filename: frame_id for filename, frame_id in raw_frames.items() if 'd00' in filename}\n", " for filename, frame_id in dark_frames.items():\n", " run_pipeline_stages([{'filename': filename, 'frameid': frame_id}], context)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "\n", "mark_frames_as_good('*d91*')" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "\n", "make_master_calibrations(instrument, 'DARK', '2024-01-01', '2026-01-01', context)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Process and Stack Skyflats" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "if DOWNLOAD_FILES_LOCALLY:\n", " # Access files through local filesystem\n", " flat_files = glob(os.path.join('example_reduction/*/*/raw/*f00*'))\n", " for flat_file in flat_files:\n", " run_pipeline_stages([{'path': flat_file}], context)\n", "else:\n", " # Directly access from S3\n", " flat_frames = {filename: frame_id for filename, frame_id in raw_frames.items() if 'f00' in filename}\n", " for filename, frame_id in flat_frames.items():\n", " run_pipeline_stages([{'filename': filename, 'frameid': frame_id}], context)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "mark_frames_as_good('*f91*')" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "make_master_calibrations(instrument, 'SKYFLAT', '2024-01-01', '2026-01-01', context)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Process the Science Exposure" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "if DOWNLOAD_FILES_LOCALLY:\n", " # Access files through local filesystem\n", " science_files = glob(os.path.join('example_reduction/*/*/raw/*e00*'))\n", " for science_file in science_files:\n", " run_pipeline_stages([{'path': science_file}], context)\n", "else:\n", " # Directly access from S3\n", " science_frames = {filename: frame_id for filename, frame_id in raw_frames.items() if 'e00' in filename}\n", " for filename, frame_id in science_frames.items():\n", " run_pipeline_stages([{'filename': filename, 'frameid': frame_id}], context)\n" ] } ], "metadata": { "kernelspec": { "display_name": "banzai", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.12.9" } }, "nbformat": 4, "nbformat_minor": 2 }