{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Example Data Reduction"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "import os\n",
    "os.environ['OPENTSDB_PYTHON_METRICS_TEST_MODE'] = 'True'\n",
    "os.environ['API_ROOT'] = \"https://archive-api.lco.global/\""
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "from banzai.calibrations import make_master_calibrations\n",
    "import requests\n",
    "from banzai import settings\n",
    "from banzai import dbs\n",
    "from banzai.utils.stage_utils import run_pipeline_stages\n",
    "import logging\n",
    "from banzai.logs import set_log_level\n",
    "from glob import glob"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "set_log_level('INFO')\n",
    "logger = logging.getLogger('banzai')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "os.makedirs('example_reduction', exist_ok=True)\n",
    "os.environ['DB_ADDRESS'] = 'sqlite:///example_reduction/banzai-example.db'\n",
    "settings.processed_path = os.path.join(os.getcwd(), 'example_reduction')\n",
    "settings.fpack = True\n",
    "settings.db_address = os.environ['DB_ADDRESS']\n",
    "settings.reduction_level = 91"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# set up the context object.\n",
    "import banzai.main\n",
    "context = banzai.main.parse_args(settings, parse_system_args=False)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Configuration\n",
    "# Set to True to download all files locally first, False to access directly from S3\n",
    "DOWNLOAD_FILES_LOCALLY = True"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Download Data (Optional)\n",
    "Two approaches are available:\n",
    "- **Local download**: Download all files first, then process from disk\n",
    "- **S3 direct**: Access files directly from archive during processing (default)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# We will be using example data from the LCO archive, from site LSC and instrument sq34\n",
    "\n",
    "# make directories for the test dataset.\n",
    "raw_data_dir = 'example_reduction/lsc/sq34/raw'\n",
    "os.makedirs(raw_data_dir, exist_ok=True)\n",
    "bpm_dir = 'example_reduction/lsc/sq34/bpm/'\n",
    "os.makedirs(bpm_dir, exist_ok=True)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Store the basename and archive frame_id for each of the frames we'll use in this example\n",
    "bpm_filename = 'lsc0m409-sq34-20240314-bpm-central30x30.fits.fz'\n",
    "bpm_id = '69359142'\n",
    "\n",
    "raw_frames = {\n",
    "    'lsc0m476-sq34-20250622-0058-f00.fits.fz': '84043242',\n",
    "    'lsc0m476-sq34-20250622-0059-f00.fits.fz': '84043255',\n",
    "    'lsc0m476-sq34-20250622-0060-f00.fits.fz': '84043260',\n",
    "    'lsc0m476-sq34-20250622-0061-f00.fits.fz': '84043265',\n",
    "    'lsc0m476-sq34-20250622-0062-f00.fits.fz': '84043272',\n",
    "    'lsc0m476-sq34-20250625-0060-d00.fits.fz': '84133538',\n",
    "    'lsc0m476-sq34-20250625-0061-d00.fits.fz': '84133758',\n",
    "    'lsc0m476-sq34-20250625-0062-d00.fits.fz': '84134040',\n",
    "    'lsc0m476-sq34-20250625-0063-d00.fits.fz': '84134220',\n",
    "    'lsc0m476-sq34-20250625-0064-d00.fits.fz': '84134361',\n",
    "    'lsc0m476-sq34-20250626-0018-d00.fits.fz': '84149171',\n",
    "    'lsc0m476-sq34-20250626-0019-d00.fits.fz': '84149216',\n",
    "    'lsc0m476-sq34-20250626-0020-d00.fits.fz': '84149302',\n",
    "    'lsc0m476-sq34-20250626-0021-d00.fits.fz': '84149349',\n",
    "    'lsc0m476-sq34-20250626-0022-d00.fits.fz': '84149433',\n",
    "    'lsc0m476-sq34-20250626-0094-e00.fits.fz': '84151307',\n",
    "    'lsc0m476-sq34-20250626-0368-e00.fits.fz': '84167225',\n",
    "    'lsc0m476-sq34-20250626-0452-b00.fits.fz': '84172674',\n",
    "    'lsc0m476-sq34-20250626-0453-b00.fits.fz': '84172677',\n",
    "    'lsc0m476-sq34-20250626-0454-b00.fits.fz': '84172685',\n",
    "    'lsc0m476-sq34-20250626-0455-b00.fits.fz': '84172689',\n",
    "    'lsc0m476-sq34-20250626-0456-b00.fits.fz': '84172695',\n",
    "    'lsc0m476-sq34-20250626-0457-b00.fits.fz': '84172701',\n",
    "    'lsc0m476-sq34-20250626-0458-b00.fits.fz': '84172706',\n",
    "    'lsc0m476-sq34-20250626-0459-b00.fits.fz': '84172716',\n",
    "    'lsc0m476-sq34-20250626-0460-b00.fits.fz': '84172721',\n",
    "    'lsc0m476-sq34-20250626-0461-b00.fits.fz': '84172730'\n",
    "}"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "if DOWNLOAD_FILES_LOCALLY:\n",
    "    # Download the data into the correct directory\n",
    "    # Note that this won't download any files that already exist\n",
    "    for filename, frame_id in raw_frames.items():\n",
    "        if os.path.exists(os.path.join(raw_data_dir, filename)):\n",
    "            continue\n",
    "        archive_url = f'https://archive-api.lco.global/frames/{frame_id}'\n",
    "        frame_info = requests.get(archive_url).json()\n",
    "        with open(os.path.join(raw_data_dir, filename), 'wb') as f:\n",
    "            f.write(requests.get(frame_info['url']).content)\n",
    "else:\n",
    "    print(\"Using S3 direct access - files will be accessed during processing\")\n",
    "\n",
    "# Download the bpm file (always needed locally for database setup)\n",
    "bpm_archive_url = f'https://archive-api.lco.global/frames/{bpm_id}'\n",
    "bpm_path = os.path.join(bpm_dir, bpm_filename)\n",
    "if not os.path.exists(bpm_path):\n",
    "    bpm_frame_info = requests.get(bpm_archive_url).json()\n",
    "    with open(bpm_path, 'wb') as f:\n",
    "        f.write(requests.get(bpm_frame_info['url']).content)\n"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Set up the database"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "os.system(f'banzai_create_db --db-address={os.environ[\"DB_ADDRESS\"]}')\n",
    "\n",
    "# This is the site and instrument of the test data we're using\n",
    "os.system(f'banzai_add_site --site lsc --latitude -30.1673833333 --longitude -70.8047888889 --elevation 2198 --timezone -4 --db-address={os.environ[\"DB_ADDRESS\"]}')\n",
    "os.system(f'banzai_add_instrument --site lsc --camera sq34 --name sq34 --instrument-type 0m4-SciCam-QHY600 --nx 9600 --ny 6422 --db-address={os.environ[\"DB_ADDRESS\"]}')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Add the bpm to the database\n",
    "logger.info(f'Adding bpm {bpm_filename} to the banzai database')\n",
    "bpm_filepath = bpm_dir + bpm_filename\n",
    "os.system(f'banzai_add_super_calibration {bpm_filepath} --db-address={os.environ[\"DB_ADDRESS\"]}')\n"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Get the instrument record"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "instrument = dbs.query_for_instrument(settings.db_address, 'lsc', 'sq34', 'sq34')"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Process and Stack Bias Files"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "if DOWNLOAD_FILES_LOCALLY:\n",
    "    # Access files through local filesystem\n",
    "    bias_files = glob(os.path.join('example_reduction/*/*/raw/*b00*'))\n",
    "    for bias_file in bias_files:\n",
    "        run_pipeline_stages([{'path': bias_file}], context)\n",
    "else:\n",
    "    # Directly access from S3\n",
    "    bias_frames = {filename: frame_id for filename, frame_id in raw_frames.items() if 'b00' in filename}\n",
    "    for filename, frame_id in bias_frames.items():\n",
    "        run_pipeline_stages([{'filename': filename, 'frameid': frame_id}], context)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Bias frames need to be manually verified for the first run because we don't have a super bias for automated comparisons\n",
    "def mark_frames_as_good(filenames):\n",
    "    counter = 0\n",
    "    for filename in glob(f'example_reduction/*/*/*/processed/{filenames}'):\n",
    "        counter += 1\n",
    "        dbs.mark_frame(os.path.basename(filename), \"good\", db_address=os.environ['DB_ADDRESS'])\n",
    "    print(f\"Marked {counter} frames as good\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "mark_frames_as_good('*b91*')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# The date range should contain the data we're using\n",
    "min_date = '2024-01-01'\n",
    "max_date = '2026-01-01'\n",
    "make_master_calibrations(instrument, 'BIAS', min_date, max_date, context)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Process and Stack Dark Files"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "if DOWNLOAD_FILES_LOCALLY:\n",
    "    # Access files through local filesystem\n",
    "    dark_files = glob(os.path.join('example_reduction/*/*/raw/*d00*'))\n",
    "    for dark_file in dark_files:\n",
    "        run_pipeline_stages([{'path': dark_file}], context)\n",
    "else:\n",
    "    # Directly access from S3\n",
    "    dark_frames = {filename: frame_id for filename, frame_id in raw_frames.items() if 'd00' in filename}\n",
    "    for filename, frame_id in dark_frames.items():\n",
    "        run_pipeline_stages([{'filename': filename, 'frameid': frame_id}], context)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "\n",
    "mark_frames_as_good('*d91*')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "\n",
    "make_master_calibrations(instrument, 'DARK', '2024-01-01', '2026-01-01', context)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Process and Stack Skyflats"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "if DOWNLOAD_FILES_LOCALLY:\n",
    "    # Access files through local filesystem\n",
    "    flat_files = glob(os.path.join('example_reduction/*/*/raw/*f00*'))\n",
    "    for flat_file in flat_files:\n",
    "        run_pipeline_stages([{'path': flat_file}], context)\n",
    "else:\n",
    "    # Directly access from S3\n",
    "    flat_frames = {filename: frame_id for filename, frame_id in raw_frames.items() if 'f00' in filename}\n",
    "    for filename, frame_id in flat_frames.items():\n",
    "        run_pipeline_stages([{'filename': filename, 'frameid': frame_id}], context)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "mark_frames_as_good('*f91*')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "make_master_calibrations(instrument, 'SKYFLAT', '2024-01-01', '2026-01-01', context)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Process the Science Exposure"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "if DOWNLOAD_FILES_LOCALLY:\n",
    "    # Access files through local filesystem\n",
    "    science_files = glob(os.path.join('example_reduction/*/*/raw/*e00*'))\n",
    "    for science_file in science_files:\n",
    "        run_pipeline_stages([{'path': science_file}], context)\n",
    "else:\n",
    "    # Directly access from S3\n",
    "    science_frames = {filename: frame_id for filename, frame_id in raw_frames.items() if 'e00' in filename}\n",
    "    for filename, frame_id in science_frames.items():\n",
    "        run_pipeline_stages([{'filename': filename, 'frameid': frame_id}], context)\n"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "banzai",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.12.9"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}