#!/usr/bin/env bash
set -euo pipefail
HERE="$(cd "$(dirname "$0")" && pwd)"
# shellcheck source=sg-lib.sh
source "$HERE/sg-lib.sh"

usage() {
  cat <<'USAGE'
Usage:
  sg-install-qe-gpu-src-u [--cuda-cc-policy min|max]

Options:
  --cuda-cc-policy min|max
      Auto select policy when multiple GPUs are present.
      min: choose the lowest compute capability (compatibility-first, default)
      max: choose the highest compute capability (performance-first)
USAGE
}

PREFIX="${PREFIX:-$DEFAULT_PREFIX}"
WORKDIR="${WORKDIR:-$DEFAULT_WORKDIR}"
LOGDIR="${LOGDIR:-$DEFAULT_LOGDIR}"
JOBS="${JOBS:-$DEFAULT_JOBS}"
CUDA_CC_POLICY="${CUDA_CC_POLICY:-min}"

while [[ $# -gt 0 ]]; do
  case "$1" in
    --cuda-cc-policy)
      CUDA_CC_POLICY="${2:-}"
      shift 2
      ;;
    -h|--help)
      usage
      exit 0
      ;;
    *)
      die "$EC_ACTION" "unknown arg: $1"
      ;;
  esac
done

case "$CUDA_CC_POLICY" in
  min|max) ;;
  *)
    die "$EC_ACTION" "--cuda-cc-policy must be min or max (got: $CUDA_CC_POLICY)"
    ;;
esac

safe_prefix_or_die "$PREFIX"
mkdirp "$WORKDIR" "$LOGDIR"

log "PREFIX=$PREFIX"
log "WORKDIR=$WORKDIR"
log "LOGDIR=$LOGDIR"
log "CUDA_CC_POLICY=$CUDA_CC_POLICY"

# deps check
if ! command -v git >/dev/null 2>&1; then
  doctor_record "$PREFIX" "$WORKDIR" "$LOGDIR" "err" "deps" "git が見つかりません。" "sudo apt install git など"
  die "$EC_ACTION" "git not found"
fi

if ! detect_nvcc >/dev/null 2>&1; then
  doctor_record "$PREFIX" "$WORKDIR" "$LOGDIR" "err" "deps" "nvcc が見つかりません（CUDA toolkit）。" "CUDA toolkit を利用可能な環境（module等）にする / 管理者に導入依頼"
  die "$EC_ACTION" "nvcc not found"
fi

NVCC="$(detect_nvcc)"
log "nvcc=$NVCC"

# NVHPC is preferred
NVHPC_BIN=""
if NVHPC_BIN="$(detect_nvhpc 2>/dev/null)"; then
  log "NVHPC found: $NVHPC_BIN"
else
  # For now, we require NVHPC for STK-014-U (can add GNU route later)
  doctor_record "$PREFIX" "$WORKDIR" "$LOGDIR" "err" "deps" "NVHPC(nvc) が見つかりません。" "1) module load nvhpc などで nvc をPATHに入れる\n2) もしくは管理者にNVHPC導入依頼\n3) GNU+CUDA ルートは次版で提供予定"
  die "$EC_ACTION" "NVHPC (nvc) not found"
fi

export PATH="$NVHPC_BIN:$PATH"

# fetch QE source (v7.5)
QE_VER="${QE_VER:-7.5}"
QE_TAG="qe-${QE_VER}"
SRC_DIR="${WORKDIR}/qe-src"
BUILD_DIR="${WORKDIR}/qe-build"
INSTALL_DIR="${PREFIX}/qe-${QE_VER}"

log "QE_VER=$QE_VER (tag=$QE_TAG)"
log "SRC_DIR=$SRC_DIR"
log "BUILD_DIR=$BUILD_DIR"
log "INSTALL_DIR=$INSTALL_DIR"

if [ ! -d "$SRC_DIR/.git" ]; then
  log "Cloning QE source..."
  rm -rf "$SRC_DIR"
  git clone --depth 1 --branch "$QE_TAG" https://gitlab.com/QEF/q-e.git "$SRC_DIR" \
    2>&1 | tee "$LOGDIR/clone.log"
else
  log "QE source already present (skip clone)"
fi

mkdirp "$BUILD_DIR"
cd "$SRC_DIR"

# configure & build (minimal)
log "Configuring..."
cat > "$LOGDIR/env.txt" <<EOF
$(ts)
nvcc: $NVCC
nvc: $(command -v nvc)
nvc --version:
$(nvc --version 2>&1 || true)
EOF

# QE build is complex; we provide a conservative baseline:
# - build pw.x only
# - user install under PREFIX
# You may need to tune flags per environment.
log "Building (baseline)..."
# --- GPU build knobs (injected) ---
# Override if needed:
#   CUDA_HOME=/path/to/cuda   CUDA_CC=80   CUDA_RUNTIME=13.1
CUDA_HOME="${CUDA_HOME:-${NVHPC_CUDA_HOME:-}}"
if [ -z "${CUDA_HOME}" ]; then
  if command -v nvcc >/dev/null 2>&1; then
    CUDA_HOME="$(cd "$(dirname "$(command -v nvcc)")/.." && pwd -P)"
  fi
fi
detect_cuda_cc() {
  if ! command -v nvidia-smi >/dev/null 2>&1; then
    return 1
  fi
  local raw_list cc_list
  raw_list="$(nvidia-smi --query-gpu=compute_cap --format=csv,noheader 2>/dev/null | tr -d '[:space:]' || true)"
  if [ -z "$raw_list" ]; then
    return 1
  fi
  cc_list="$(printf '%s\n' "$raw_list" | sed 's/\.//g' | awk '/^[0-9]{2,3}$/')"
  [ -n "$cc_list" ] || return 1
  if [ "$CUDA_CC_POLICY" = "max" ]; then
    printf '%s\n' "$cc_list" | sort -n | tail -n 1
  else
    printf '%s\n' "$cc_list" | sort -n | head -n 1
  fi
}

detect_cuda_cc_list() {
  if ! command -v nvidia-smi >/dev/null 2>&1; then
    return 1
  fi
  nvidia-smi --query-gpu=compute_cap --format=csv,noheader 2>/dev/null \
    | tr -d '[:space:]' \
    | sed 's/\.//g' \
    | awk '/^[0-9]{2,3}$/'
}

AUTO_CUDA_CC="$(detect_cuda_cc || true)"
AUTO_CUDA_CC_LIST="$(detect_cuda_cc_list | sort -n | tr '\n' ' ' | sed 's/[[:space:]]*$//' || true)"
if [ -n "${AUTO_CUDA_CC:-}" ]; then
  CUDA_CC="${CUDA_CC:-$AUTO_CUDA_CC}"   # e.g. 80, 90
else
  CUDA_CC="${CUDA_CC:-80}"              # fallback
fi
CUDA_RUNTIME="${CUDA_RUNTIME:-13.1}"
if [ -n "${CUDA_HOME:-}" ] && [ -d "${CUDA_HOME}/lib64" ]; then
  export LD_LIBRARY_PATH="${CUDA_HOME}/lib64:${LD_LIBRARY_PATH:-}"
else
  echo "[WARN] CUDA_HOME not resolved; GPU configure may fall back to CPU build." >&2
fi

MPI_PREF="${MPI_PREF:-/opt/nvidia/hpc_sdk/Linux_x86_64/25.7/comm_libs/12.9/hpcx/hpcx-2.22.1/ompi/bin}"
if [ -d "$MPI_PREF" ]; then
  export PATH="$MPI_PREF:$PATH"
fi

log "CUDA_HOME=${CUDA_HOME:-<unset>}"
log "CUDA_CC=${CUDA_CC}"
log "CUDA_RUNTIME=${CUDA_RUNTIME}"
if [ -n "${AUTO_CUDA_CC:-}" ]; then
  log "CUDA_CC(auto from nvidia-smi; policy=${CUDA_CC_POLICY})=${AUTO_CUDA_CC}"
  log "CUDA_CC(auto candidates)=${AUTO_CUDA_CC_LIST:-<unavailable>}"
else
  log "CUDA_CC(auto from nvidia-smi)=<unavailable>"
fi

./configure --prefix="$INSTALL_DIR" \
  --with-cuda="$CUDA_HOME" \
  --with-cuda-runtime="$CUDA_RUNTIME" \
  --with-cuda-cc="$CUDA_CC" \
  CC="$(command -v nvc)" \
  F90="$(command -v nvfortran)" \
  MPIF90="$(command -v mpif90)" \
  2>&1 | tee "$LOGDIR/configure.log"

make -j "$JOBS" pw \
  2>&1 | tee "$LOGDIR/make_pw.log"

make install \
  2>&1 | tee "$LOGDIR/make_install.log"

# smoke check
PW="$INSTALL_DIR/bin/pw.x"
if [ ! -x "$PW" ]; then
  doctor_record "$PREFIX" "$WORKDIR" "$LOGDIR" "err" "install" "pw.x が生成されていません。" "tail -n 200 $LOGDIR/make_pw.log"
  die "$EC_INTERNAL" "pw.x missing after install"
fi

SM_LOG="$LOGDIR/pw_sm_arch.txt"
{
  echo "$(ts)"
  echo "pw=$PW"
  if command -v cuobjdump >/dev/null 2>&1; then
    sms="$(cuobjdump --list-elf "$PW" 2>/dev/null | rg -o 'sm_[0-9]+' -N | sort -u | tr '\n' ' ' | sed 's/[[:space:]]*$//')"
  else
    sms=""
  fi
  if [ -z "${sms:-}" ]; then
    sms="$(strings "$PW" 2>/dev/null | rg -o 'sm_[0-9]+' -N | sort -u | tr '\n' ' ' | sed 's/[[:space:]]*$//')"
  fi
  echo "sm_arch=${sms:-unknown}"
} | tee "$SM_LOG"

doctor_record "$PREFIX" "$WORKDIR" "$LOGDIR" "ok" "install" "インストール完了" "PREFIX=$PREFIX $HERE/sg-verify-qe-gpu-src-u --single"
log "OK: installed pw.x at $PW"
