#!/usr/bin/env python3

import asyncio
import websockets
import json
import logging
from datetime import datetime

# --- CONFIGURATION ---
# Jetstream instances as of early 2026
JETSTREAM_URL = "wss://jetstream1.us-east.bsky.network/subscribe?wantedCollections=app.bsky.feed.post"
DATA_FILE = "nzpol.jsonl"


KEYWORDS = [
    "#nzpol"
]

# Currently unused -- get's a bit tricker if monitoring for parties such as 'labour'
# as the firehose is everything posted in the world on BlueSky
#
# Probably better to pivot to making use of 'starter packs' to get metadata
# related to relevant people/organisations to follow

KEYWORDS_EXPANDED = [
    "#nzpol",
    "christopher luxon", "chris hipkins"
    # ...
]

logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(message)s')

async def monitor_firehose():
    while True:
        try:
            async with websockets.connect(JETSTREAM_URL) as ws:
                logging.info("Connected to Jetstream. Monitoring NZ Politics...")
                
                async for message in ws:
                    data = json.loads(message)
                    
                    # Look for new posts (commits)
                    if data.get("kind") == "commit" and data.get("commit", {}).get("operation") == "create":
                        record = data["commit"].get("record", {})
                        text = record.get("text", "").lower()
                        
                        # Filter for NZ keywords
                        if any(k in text for k in KEYWORDS):
                            print(f"Text message matched one of the monitor keywords = {text}")
                            # Add metadata students will need for Network Analysis
                            enriched_data = {
                                "timestamp": datetime.now().isoformat(),
                                "did": data["did"],  # Author ID
                                "text": record.get("text"),
                                "reply_to": record.get("reply"), # For network mapping
                                "uri": f"at://{data['did']}/{data['commit']['collection']}/{data['commit']['rkey']}",
                                "langs": record.get("langs")
                            }
                            
                            with open(DATA_FILE, "a", encoding="utf-8") as f:
                                f.write(json.dumps(enriched_data) + "\n")
                                
        except Exception as e:
            logging.error(f"Connection lost: {e}. Retrying in 5 seconds...")
            await asyncio.sleep(5)

if __name__ == "__main__":
    asyncio.run(monitor_firehose())
    
