#!/usr/bin/env bash
set -euo pipefail

python3 - <<'PY'
import json
import os
import re
import time
import urllib.parse
import urllib.request

OUT_DIR = '/var/www/html/sf/assets/buddy-miles'
TARGET = 60

os.makedirs(OUT_DIR, exist_ok=True)

USER_AGENT = 'bld.sh (buddy miles image fetch)'

def fetch_json(url, retries=3, delay=1.5):
    last_err = None
    for _ in range(retries):
        try:
            req = urllib.request.Request(url, headers={'User-Agent': USER_AGENT})
            with urllib.request.urlopen(req, timeout=30) as resp:
                return json.loads(resp.read().decode('utf-8'))
        except Exception as e:
            last_err = e
            time.sleep(delay)
    raise last_err

def safe_name(name):
    name = name.strip().replace(' ', '_')
    name = re.sub(r'[^A-Za-z0-9._-]+', '_', name)
    return name[:120] or 'image'

def download(url, filename):
    path = os.path.join(OUT_DIR, filename)
    if os.path.exists(path):
        return False
    req = urllib.request.Request(url, headers={'User-Agent': USER_AGENT})
    with urllib.request.urlopen(req, timeout=60) as resp, open(path, 'wb') as f:
        f.write(resp.read())
    return True

def openverse_search(query, page_size=40, max_pages=3):
    results = []
    for page in range(1, max_pages + 1):
        params = {'q': query, 'page_size': page_size, 'page': page}
        url = 'https://api.openverse.engineering/v1/images/?' + urllib.parse.urlencode(params)
        data = fetch_json(url)
        for item in data.get('results', []):
            img_url = item.get('url') or item.get('thumbnail')
            title = item.get('title') or 'buddy_miles'
            if img_url:
                results.append((img_url, title))
        if data.get('page') and data.get('page_count') and data['page'] >= data['page_count']:
            break
    return results

def commons_search(query, limit=50, offset=0):
    params = {
        'action': 'query',
        'format': 'json',
        'list': 'search',
        'srnamespace': '6',
        'srlimit': limit,
        'sroffset': offset,
        'srsearch': query,
    }
    url = 'https://commons.wikimedia.org/w/api.php?' + urllib.parse.urlencode(params)
    data = fetch_json(url)
    return [r['title'] for r in data.get('query', {}).get('search', [])]

def commons_image_url(title, width=1200):
    params = {
        'action': 'query',
        'format': 'json',
        'titles': title,
        'prop': 'imageinfo',
        'iiprop': 'url',
        'iiurlwidth': width,
    }
    url = 'https://commons.wikimedia.org/w/api.php?' + urllib.parse.urlencode(params)
    data = fetch_json(url)
    pages = data.get('query', {}).get('pages', {})
    for _, page in pages.items():
        info = (page.get('imageinfo') or [{}])[0]
        return info.get('thumburl') or info.get('url')
    return None

queries = [
    '"Buddy Miles"',
    '"Buddy Miles" drummer',
    '"Buddy Miles" musician',
    '"Buddy Miles Express"',
    '"Band of Gypsys" Buddy Miles',
]

seen = set()
count = 0

for q in queries:
    for img_url, title in openverse_search(q):
        if count >= TARGET:
            break
        if img_url in seen:
            continue
        seen.add(img_url)
        ext = os.path.splitext(urllib.parse.urlparse(img_url).path)[1].lower()
        if ext not in ['.jpg', '.jpeg', '.png', '.gif', '.webp']:
            ext = '.jpg'
        filename = safe_name(title) + ext
        try:
            if download(img_url, filename):
                count += 1
                print('OK', filename)
        except Exception:
            continue
    if count >= TARGET:
        break

if count < TARGET:
    commons_queries = [
        '"Buddy Miles"',
        'haswbstatement:P180=Q472487',
        'intitle:"Buddy Miles"',
    ]
    for q in commons_queries:
        titles = commons_search(q, limit=50, offset=0)
        for title in titles:
            if count >= TARGET:
                break
            url = commons_image_url(title)
            if not url or url in seen:
                continue
            seen.add(url)
            ext = os.path.splitext(urllib.parse.urlparse(url).path)[1].lower()
            if ext not in ['.jpg', '.jpeg', '.png', '.gif', '.webp']:
                ext = '.jpg'
            filename = safe_name(title.replace('File:', '')) + ext
            try:
                if download(url, filename):
                    count += 1
                    print('OK', filename)
            except Exception:
                continue
        if count >= TARGET:
            break

print('Downloaded', count, 'images to', OUT_DIR)
if count < TARGET:
    print('WARNING: Only', count, 'images found. Provide more sources if you need 60 exactly.')
PY
