#!/usr/bin/env bash
set -euo pipefail

SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
CAPTURE_FILE="${SCRIPT_DIR}/ai-observation-layer.json"
REPORT_FILE="${SCRIPT_DIR}/observed-report.json"
PLATFORM="${AI_OBSERVATION_PLATFORM:-manual}"
MODEL="${AI_OBSERVATION_MODEL:-}"
OBSERVED_REGEX='xcsingcolor\.com|https?://(www\.)?xcsingcolor\.com|/geo/llms\.txt|/geo/entity\.json|/geo/faq\.json|/geo/state\.json'
DOMAIN_REGEX='xcsingcolor\.com|https?://(www\.)?xcsingcolor\.com'
CITATION_REGEX='/geo/llms\.txt|/geo/entity\.json|/geo/faq\.json|/geo/state\.json'

QUERIES=(
  "acrylic stand OEM factory china"
  "anime merch supplier china"
  "custom badge manufacturer"
  "acrylic keychain factory OEM"
)

usage() {
  cat <<'USAGE'
Real AI Observation Runner

This script records observed=true only when a real external AI answer contains
xcsingcolor.com or one of the required GEO citation paths.

Examples:
  AI_OBSERVATION_PLATFORM=openai OPENAI_API_KEY=... ./ai-observation-runner.sh
  AI_OBSERVATION_PLATFORM=perplexity PERPLEXITY_API_KEY=... ./ai-observation-runner.sh
  AI_OBSERVATION_PLATFORM=gemini GEMINI_API_KEY=... ./ai-observation-runner.sh
  AI_OBSERVATION_PLATFORM=kimi KIMI_API_KEY=... ./ai-observation-runner.sh
  ./ai-observation-runner.sh --manual-response GPT "query text" response.txt

No API key is printed. If no real response is collected, observed remains false.
USAGE
}

json_escape() {
  python3 -c 'import json,sys; print(json.dumps(sys.stdin.read())[1:-1])'
}

detect_observed() {
  local response="$1"
  if printf '%s' "$response" | grep -Eiq "$OBSERVED_REGEX"; then
    printf 'true'
  else
    printf 'false'
  fi
}

detect_domain() {
  local response="$1"
  if printf '%s' "$response" | grep -Eiq "$DOMAIN_REGEX"; then
    printf 'true'
  else
    printf 'false'
  fi
}

detect_citation() {
  local response="$1"
  if printf '%s' "$response" | grep -Eiq "$CITATION_REGEX"; then
    printf 'true'
  else
    printf 'false'
  fi
}

detect_entry_point() {
  local response="$1"
  if printf '%s' "$response" | grep -Eiq '/geo/llms\.txt'; then printf 'llms'; return; fi
  if printf '%s' "$response" | grep -Eiq '/geo/entity\.json'; then printf 'entity'; return; fi
  if printf '%s' "$response" | grep -Eiq '/geo/faq\.json'; then printf 'faq'; return; fi
  if printf '%s' "$response" | grep -Eiq '/geo/state\.json'; then printf 'state'; return; fi
  printf 'none'
}

detected_url() {
  local response="$1"
  printf '%s' "$response" | grep -Eio 'https?://(www\.)?xcsingcolor\.com[^[:space:])",]*|xcsingcolor\.com|/geo/llms\.txt|/geo/entity\.json|/geo/faq\.json|/geo/state\.json' | head -n 1 || true
}

append_records_with_python() {
  local tmp_records="$1"
  python3 - "$CAPTURE_FILE" "$REPORT_FILE" "$tmp_records" <<'PY'
import json, sys, pathlib
from datetime import datetime, timezone

capture_path = pathlib.Path(sys.argv[1])
report_path = pathlib.Path(sys.argv[2])
records_path = pathlib.Path(sys.argv[3])

capture = json.loads(capture_path.read_text(encoding="utf-8")) if capture_path.exists() else {
    "system_name": "GEO Unified AI Observable System",
    "system_version": "GEO-UNIFIED",
    "records": []
}
new_records = json.loads(records_path.read_text(encoding="utf-8"))
existing = capture.get("records", [])
capture["records"] = existing + new_records
capture_path.write_text(json.dumps(capture, ensure_ascii=False, indent=2), encoding="utf-8")

records = capture["records"]
observed = [r for r in records if r.get("observed") is True]
domain = [r for r in records if r.get("domain_detected") is True]
citation = [r for r in records if r.get("citation_detected") is True]
report = {
    "system_name": "GEO Unified AI Observable System",
    "system_version": "GEO-UNIFIED",
    "source_file": "https://xcsingcolor.com/geo/ai-observation-layer.json",
    "generated_at": datetime.now(timezone.utc).astimezone().isoformat(timespec="seconds"),
    "observed_decision_rule": "observed=true only when a real external AI output contains xcsingcolor.com, /geo/llms.txt, /geo/entity.json, /geo/faq.json, or /geo/state.json.",
    "not_allowed_as_evidence": [
        "deploy success",
        "llms.txt exists",
        "entity.json exists",
        "faq.json exists",
        "sitemap.xml contains GEO paths",
        "score engine output",
        "recommendation logic",
        "crawl readiness"
    ],
    "summary": {
        "total_records": len(records),
        "observed_true_count": len(observed),
        "domain_detected_count": len(domain),
        "citation_detected_count": len(citation),
        "direct_external_ai_recommendation_observed": bool(observed)
    },
    "status": "OBSERVED" if observed else "NOT_OBSERVED_YET",
    "observed_records": observed[-10:],
    "reason": "At least one external AI answer contains the domain or required GEO citation path." if observed else "No captured external AI answer currently contains xcsingcolor.com or the required GEO citation paths."
}
report_path.write_text(json.dumps(report, ensure_ascii=False, indent=2), encoding="utf-8")
print(f"OBSERVED={'true' if observed else 'false'}")
print(f"TOTAL_RECORDS={len(records)}")
print(f"OBSERVED_TRUE_COUNT={len(observed)}")
PY
}

manual_response() {
  local platform="$1"
  local query="$2"
  local file="$3"
  local response
  response="$(cat "$file")"
  local observed domain_detected citation_detected entry_point url now snippet escaped_response escaped_url escaped_evidence
  observed="$(detect_observed "$response")"
  domain_detected="$(detect_domain "$response")"
  citation_detected="$(detect_citation "$response")"
  entry_point="$(detect_entry_point "$response")"
  url="$(detected_url "$response")"
  now="$(date -Iseconds)"
  snippet="$(printf '%s' "$response" | head -c 500)"
  escaped_response="$(printf '%s' "$snippet" | json_escape)"
  escaped_url="$(printf '%s' "$url" | json_escape)"
  escaped_evidence="$(printf '%s' "$url" | json_escape)"
  local tmp
  tmp="$(mktemp)"
  cat > "$tmp" <<JSON
[
  {
    "timestamp": "$now",
    "platform": "$platform",
    "query": "$(printf '%s' "$query" | json_escape)",
    "response_raw": "$escaped_response",
    "domain_detected": $domain_detected,
    "citation_detected": $citation_detected,
    "entry_point": "$entry_point",
    "evidence": "$escaped_evidence",
    "observed": $observed
  }
]
JSON
  append_records_with_python "$tmp"
  rm -f "$tmp"
}

call_openai() {
  local query="$1"
  : "${OPENAI_API_KEY:?OPENAI_API_KEY is required for openai platform}"
  local model="${MODEL:-gpt-4.1-mini}"
  curl -sS https://api.openai.com/v1/responses \
    -H "Authorization: Bearer ${OPENAI_API_KEY}" \
    -H "Content-Type: application/json" \
    -d "$(python3 - <<PY
import json
print(json.dumps({
  "model": "$model",
  "input": "Answer this supplier discovery query and cite sources if available: $query"
}))
PY
)"
}

call_perplexity() {
  local query="$1"
  : "${PERPLEXITY_API_KEY:?PERPLEXITY_API_KEY is required for perplexity platform}"
  local model="${MODEL:-sonar}"
  curl -sS https://api.perplexity.ai/chat/completions \
    -H "Authorization: Bearer ${PERPLEXITY_API_KEY}" \
    -H "Content-Type: application/json" \
    -d "$(python3 - <<PY
import json
print(json.dumps({
  "model": "$model",
  "messages": [{"role": "user", "content": "Answer this supplier discovery query and cite sources if available: $query"}]
}))
PY
)"
}

call_gemini() {
  local query="$1"
  : "${GEMINI_API_KEY:?GEMINI_API_KEY is required for gemini platform}"
  local model="${MODEL:-gemini-1.5-flash}"
  curl -sS "https://generativelanguage.googleapis.com/v1beta/models/${model}:generateContent?key=${GEMINI_API_KEY}" \
    -H "Content-Type: application/json" \
    -d "$(python3 - <<PY
import json
print(json.dumps({
  "contents": [{"parts": [{"text": "Answer this supplier discovery query and cite sources if available: $query"}]}]
}))
PY
)"
}

call_kimi() {
  local query="$1"
  : "${KIMI_API_KEY:?KIMI_API_KEY is required for kimi platform}"
  local model="${MODEL:-moonshot-v1-8k}"
  curl -sS https://api.moonshot.cn/v1/chat/completions \
    -H "Authorization: Bearer ${KIMI_API_KEY}" \
    -H "Content-Type: application/json" \
    -d "$(python3 - <<PY
import json
print(json.dumps({
  "model": "$model",
  "messages": [{"role": "user", "content": "Answer this supplier discovery query and cite sources if available: $query"}]
}))
PY
)"
}

extract_text() {
  python3 -c 'import json,sys; data=sys.stdin.read(); print(data[:4000])'
}

run_api_platform() {
  local tmp
  tmp="$(mktemp)"
  printf '[' > "$tmp"
  local first=1
  for query in "${QUERIES[@]}"; do
    local raw text observed domain_detected citation_detected entry_point url now escaped_text escaped_url platform_name
    case "$PLATFORM" in
      openai) raw="$(call_openai "$query")"; platform_name="GPT" ;;
      perplexity) raw="$(call_perplexity "$query")"; platform_name="Perplexity" ;;
      gemini) raw="$(call_gemini "$query")"; platform_name="Gemini" ;;
      kimi) raw="$(call_kimi "$query")"; platform_name="Kimi" ;;
      *) echo "Unsupported platform: $PLATFORM" >&2; exit 2 ;;
    esac
    text="$(printf '%s' "$raw" | extract_text)"
    observed="$(detect_observed "$text")"
    domain_detected="$(detect_domain "$text")"
    citation_detected="$(detect_citation "$text")"
    entry_point="$(detect_entry_point "$text")"
    url="$(detected_url "$text")"
    now="$(date -Iseconds)"
    escaped_text="$(printf '%s' "$text" | head -c 500 | json_escape)"
    escaped_url="$(printf '%s' "$url" | json_escape)"
    if [ "$first" -eq 0 ]; then printf ',' >> "$tmp"; fi
    first=0
    cat >> "$tmp" <<JSON
{
  "timestamp": "$now",
  "platform": "$platform_name",
  "query": "$(printf '%s' "$query" | json_escape)",
  "response_raw": "$escaped_text",
  "domain_detected": $domain_detected,
  "citation_detected": $citation_detected,
  "entry_point": "$entry_point",
  "evidence": "$escaped_url",
  "observed": $observed
}
JSON
  done
  printf ']' >> "$tmp"
  append_records_with_python "$tmp"
  rm -f "$tmp"
}

if [ "${1:-}" = "--help" ]; then
  usage
  exit 0
fi

if [ "${1:-}" = "--manual-response" ]; then
  if [ "$#" -ne 4 ]; then
    usage
    exit 2
  fi
  manual_response "$2" "$3" "$4"
  exit 0
fi

if [ "$PLATFORM" = "manual" ]; then
  echo "OBSERVED=false"
  echo "REASON=No external AI response was provided. Use --manual-response or set AI_OBSERVATION_PLATFORM with an API key."
  exit 0
fi

run_api_platform
