#!/usr/bin/env python3
"""
Watch https://www.doodledandyrescue.org/all-adoptable-doodles for newly added
female dogs and email their profile links.

Stdlib only — no pip installs needed. Designed to run from cron.

State: seen.json next to this script (override with DOODLE_STATE env var).
First run seeds the state without sending email.

Email config via environment variables (put them in the cron line or a
wrapper script):
  SMTP_HOST   default localhost
  SMTP_PORT   default 25
  SMTP_TLS    starttls | ssl | none  (default: starttls if port 587, else none)
  SMTP_USER   optional — omit for no-auth servers (e.g. local postfix)
  SMTP_PASS   optional — required only with SMTP_USER
  MAIL_FROM   sender address (default: SMTP_USER, or doodle-watch@<hostname>)
  MAIL_TO     where to send alerts (default: SMTP_USER)
"""

import json
import os
import re
import smtplib
import ssl
import sys
import urllib.request
from email.message import EmailMessage
from pathlib import Path

LIST_URL = "https://www.doodledandyrescue.org/all-adoptable-doodles"
LINK_RE = re.compile(
    r'href="(https://www\.doodledandyrescue\.org/all-adoptable-doodles/([a-z0-9-]+))"'
)
# Gender appears as e.g. <span ... class="wixui-rich-text__text">Female</span>
GENDER_RE = re.compile(r'class="wixui-rich-text__text"\s*>\s*(Female|Male)\s*<', re.I)

STATE_FILE = Path(os.environ.get("DOODLE_STATE", Path(__file__).parent / "seen.json"))
UA = {"User-Agent": "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36"}


def fetch(url: str) -> str:
    req = urllib.request.Request(url, headers=UA)
    with urllib.request.urlopen(req, timeout=30) as r:
        return r.read().decode("utf-8", errors="replace")


def get_dogs() -> dict[str, str]:
    """slug -> profile URL from the listing page."""
    html = fetch(LIST_URL)
    return {slug: url for url, slug in LINK_RE.findall(html)}


def get_gender(profile_url: str) -> str:
    html = fetch(profile_url)
    m = GENDER_RE.search(html)
    return m.group(1).capitalize() if m else "Unknown"


def send_email(new_females: list[tuple[str, str]]) -> None:
    host = os.environ.get("SMTP_HOST", "localhost")
    port = int(os.environ.get("SMTP_PORT", "25"))
    user = os.environ.get("SMTP_USER")          # optional: omit for no-auth servers
    password = os.environ.get("SMTP_PASS")
    # SMTP_TLS: "starttls" (e.g. port 587), "ssl" (port 465), "none" (port 25/localhost)
    tls = os.environ.get("SMTP_TLS", "starttls" if port == 587 else "none").lower()
    sender = os.environ.get("MAIL_FROM", user or f"doodle-watch@{os.uname().nodename}")
    to = os.environ.get("MAIL_TO", user)
    if not to:
        raise SystemExit("Set MAIL_TO (or SMTP_USER) so I know where to send alerts")

    msg = EmailMessage()
    names = ", ".join(slug.replace("-", " ").title() for slug, _ in new_females)
    msg["Subject"] = f"🐶 New female doodle(s): {names}"
    msg["From"] = sender
    msg["To"] = to
    lines = [f"{slug.replace('-', ' ').title()}: {url}" for slug, url in new_females]
    msg.set_content(
        "New female dogs on Doodle Dandy Rescue:\n\n" + "\n".join(lines) + "\n"
    )

    smtp_cls = smtplib.SMTP_SSL if tls == "ssl" else smtplib.SMTP
    with smtp_cls(host, port, timeout=30) as s:
        if tls == "starttls":
            s.starttls(context=ssl.create_default_context())
        if user and password:
            s.login(user, password)
        s.send_message(msg)


def main() -> int:
    dogs = get_dogs()
    if not dogs:
        print("WARNING: no dogs found on listing page — site layout may have changed",
              file=sys.stderr)
        return 1  # don't wipe state on a bad scrape

    first_run = not STATE_FILE.exists()
    seen = set(json.loads(STATE_FILE.read_text())) if not first_run else set()

    new_slugs = sorted(set(dogs) - seen)
    new_females = []
    genders = {}
    for slug in new_slugs:
        gender = get_gender(dogs[slug])
        genders[slug] = gender
        if gender == "Female":
            new_females.append((slug, dogs[slug]))

    if first_run:
        print(f"First run: seeding state with {len(dogs)} dogs, no email sent.")
    elif new_females:
        send_email(new_females)
        print(f"Emailed about: {', '.join(s for s, _ in new_females)}")
    if new_slugs:
        print("New dogs:", ", ".join(f"{s} ({genders[s]})" for s in new_slugs))
    else:
        print("No new dogs.")

    # Persist the full current list. Dogs that disappear (adopted) drop out,
    # so a dog re-listed later counts as new again — usually what you want.
    STATE_FILE.write_text(json.dumps(sorted(dogs), indent=2))
    return 0


if __name__ == "__main__":
    sys.exit(main())
