# インフラ構成図

## 1. Apache Spark 全体アーキテクチャ

```mermaid
graph TB
    subgraph "Client"
        CLI["spark-submit / spark-shell"]
        ConnectClient["Spark Connect Client<br/>(gRPC)"]
        JDBC["JDBC/ODBC Client"]
    end

    subgraph "Spark Application"
        Driver["Driver Process<br/>(SparkContext)"]
        ConnectServer["Spark Connect Server<br/>:15002 (gRPC)"]
        ThriftServer["Thrift Server<br/>(JDBC/ODBC)"]
    end

    subgraph "Cluster Manager"
        StandaloneMaster["Standalone Master<br/>:7077"]
        YARN_RM["YARN ResourceManager"]
        K8S_API["Kubernetes API Server"]
    end

    subgraph "Worker Nodes"
        Executor1["Executor 1<br/>Block Manager :7079"]
        Executor2["Executor 2<br/>Block Manager :7079"]
        ExecutorN["Executor N<br/>Block Manager :7079"]
    end

    subgraph "Storage"
        HDFS["HDFS<br/>(Event Log / Data)"]
        CloudStorage["Cloud Storage<br/>(S3/GCS/ABFS)"]
        LocalDisk["Local Disk<br/>(Shuffle/RDD)"]
    end

    subgraph "Monitoring"
        HistoryServer["History Server<br/>:18080"]
        Prometheus["Prometheus"]
        WebUI["Application UI<br/>:4040"]
    end

    CLI --> Driver
    ConnectClient --> ConnectServer
    JDBC --> ThriftServer
    ConnectServer --> Driver
    ThriftServer --> Driver

    Driver -->|"Resource Request"| StandaloneMaster
    Driver -->|"Resource Request"| YARN_RM
    Driver -->|"Pod Create"| K8S_API

    StandaloneMaster -->|"Launch"| Executor1
    StandaloneMaster -->|"Launch"| Executor2
    StandaloneMaster -->|"Launch"| ExecutorN
    YARN_RM -->|"Container"| Executor1
    YARN_RM -->|"Container"| Executor2
    K8S_API -->|"Pod"| Executor1
    K8S_API -->|"Pod"| Executor2
    K8S_API -->|"Pod"| ExecutorN

    Driver -->|"Task Dispatch<br/>:7078"| Executor1
    Driver -->|"Task Dispatch<br/>:7078"| Executor2
    Driver -->|"Task Dispatch<br/>:7078"| ExecutorN

    Executor1 <-->|"Shuffle"| Executor2
    Executor2 <-->|"Shuffle"| ExecutorN

    Executor1 --> HDFS
    Executor1 --> CloudStorage
    Executor1 --> LocalDisk
    Driver --> HDFS

    Driver --> WebUI
    Driver --> HistoryServer
    HistoryServer --> HDFS
    Driver -.->|"/metrics/prometheus"| Prometheus
```

## 2. Standalone クラスタ構成

```mermaid
graph TB
    subgraph "Master Node"
        Master["Spark Master<br/>:7077 (RPC)<br/>:8080 (Web UI)"]
        HS["History Server<br/>:18080"]
    end

    subgraph "Worker Node 1"
        Worker1["Spark Worker<br/>:8081 (Web UI)"]
        Exec1A["Executor 1A"]
        Exec1B["Executor 1B"]
        Worker1 --> Exec1A
        Worker1 --> Exec1B
    end

    subgraph "Worker Node 2"
        Worker2["Spark Worker<br/>:8081 (Web UI)"]
        Exec2A["Executor 2A"]
        Exec2B["Executor 2B"]
        Worker2 --> Exec2A
        Worker2 --> Exec2B
    end

    subgraph "Worker Node N"
        WorkerN["Spark Worker<br/>:8081 (Web UI)"]
        ExecNA["Executor NA"]
        ExecNB["Executor NB"]
        WorkerN --> ExecNA
        WorkerN --> ExecNB
    end

    subgraph "Shared Storage"
        HDFS2["HDFS / NFS"]
    end

    Master -->|"Resource Mgmt"| Worker1
    Master -->|"Resource Mgmt"| Worker2
    Master -->|"Resource Mgmt"| WorkerN

    HS --> HDFS2
    Exec1A --> HDFS2
    Exec2A --> HDFS2
    ExecNA --> HDFS2
```

## 3. Kubernetes クラスタ構成

```mermaid
graph TB
    subgraph "Kubernetes Cluster"
        subgraph "Control Plane"
            APIServer["API Server"]
            Scheduler["Scheduler"]
            ETCD["etcd"]
        end

        subgraph "spark Namespace"
            subgraph "Driver Pod"
                DriverContainer["Driver Container<br/>(azul/zulu-openjdk:21)<br/>Port: 7078 (RPC)<br/>Port: 4040 (UI)"]
                DriverConfVol["ConfigMap Volume<br/>/opt/spark/conf"]
                DriverSecretVol["Secret Volume<br/>/mnt/secrets/spark-kubernetes-credentials"]
            end

            DriverSvc["Driver Service<br/>(ClusterIP)<br/>IPv4 SingleStack"]

            subgraph "Executor Pod 1"
                ExecContainer1["Executor Container<br/>(azul/zulu-openjdk:21)<br/>Port: 7079 (Block Mgr)"]
                ExecLocalDir1["Local Dir Volume<br/>(emptyDir / PVC)"]
            end

            subgraph "Executor Pod 2"
                ExecContainer2["Executor Container<br/>(azul/zulu-openjdk:21)<br/>Port: 7079 (Block Mgr)"]
                ExecLocalDir2["Local Dir Volume<br/>(emptyDir / PVC)"]
            end

            subgraph "Executor Pod N"
                ExecContainerN["Executor Container<br/>..."]
                ExecLocalDirN["Local Dir Volume"]
            end

            SA["ServiceAccount<br/>spark-sa"]
            CR["ClusterRole<br/>spark-role<br/>(pods: *)"]
            CRB["ClusterRoleBinding<br/>spark-role-binding"]
        end
    end

    subgraph "Container Registry"
        Registry["Docker Registry<br/>(pullSecrets)"]
    end

    subgraph "External Storage"
        PVC["PersistentVolumeClaim<br/>(ReadWriteOncePod)"]
        ExtStorage["HDFS / Cloud Storage"]
    end

    APIServer --> Scheduler
    Scheduler --> ETCD

    DriverContainer --> DriverSvc
    DriverSvc --> ExecContainer1
    DriverSvc --> ExecContainer2
    DriverSvc --> ExecContainerN

    DriverContainer -->|"Create Executor Pods"| APIServer
    SA --> CR
    CR --> CRB

    Registry --> DriverContainer
    Registry --> ExecContainer1

    ExecLocalDir1 --> PVC
    ExecLocalDir2 --> PVC
    DriverContainer --> ExtStorage
    ExecContainer1 --> ExtStorage
```

## 4. メトリクス・監視構成

```mermaid
graph LR
    subgraph "Spark Processes"
        MasterProc["Master"]
        WorkerProc["Worker"]
        DriverProc["Driver"]
        ExecutorProc["Executor"]
    end

    subgraph "Metrics Sources"
        JvmSrc["JvmSource<br/>(Heap, GC)"]
        MasterSrc["MasterSource"]
        WorkerSrc["WorkerSource"]
    end

    subgraph "Metrics Sinks"
        MetricsServlet["MetricsServlet<br/>/metrics/json"]
        PromServlet["PrometheusServlet<br/>/metrics/prometheus"]
        ConsoleSink["ConsoleSink"]
        CSVSink["CSVSink<br/>(/tmp/)"]
        JMXSink["JmxSink"]
        GraphiteSink["GraphiteSink<br/>(TCP/UDP)"]
        StatsdSink["StatsdSink<br/>(:8125 UDP)"]
        GangliaSink["GangliaSink<br/>(multicast)"]
        Slf4jSink["Slf4jSink"]
    end

    subgraph "Monitoring Tools"
        Prometheus2["Prometheus"]
        Grafana["Grafana"]
        GraphiteServer["Graphite Server"]
        StatsdServer["StatsD Server"]
    end

    MasterProc --> MasterSrc
    WorkerProc --> WorkerSrc
    DriverProc --> JvmSrc
    ExecutorProc --> JvmSrc

    MasterSrc --> MetricsServlet
    MasterSrc --> PromServlet
    WorkerSrc --> MetricsServlet
    JvmSrc --> MetricsServlet
    JvmSrc --> PromServlet

    MasterSrc --> GraphiteSink
    MasterSrc --> StatsdSink
    MasterSrc --> JMXSink
    MasterSrc --> GangliaSink

    PromServlet --> Prometheus2
    Prometheus2 --> Grafana
    GraphiteSink --> GraphiteServer
    StatsdSink --> StatsdServer
```

## 5. Docker イメージ階層

```mermaid
graph TB
    subgraph "Base Images"
        ZuluJDK["azul/zulu-openjdk:21"]
        UbuntuJammy["ubuntu:jammy-20240911.1"]
    end

    subgraph "Spark Images"
        SparkBase["Spark Base Image<br/>(Scala/Java)<br/>- tini, bash, krb5<br/>- /opt/spark/jars<br/>- /opt/spark/bin<br/>- /opt/spark/sbin<br/>- entrypoint.sh<br/>- UID: 185"]
        SparkPython["Spark Python Image<br/>- python3, pip<br/>- pyspark"]
        SparkR["Spark R Image<br/>- r-base, r-base-dev<br/>- SparkR"]
    end

    subgraph "CI/CD Images"
        InfraImage["Spark Infra Image<br/>- openjdk-17<br/>- Python 3.9-3.13<br/>- PyPy 3.10<br/>- R, Node.js<br/>- gRPC, protobuf"]
    end

    ZuluJDK --> SparkBase
    SparkBase --> SparkPython
    SparkBase --> SparkR
    UbuntuJammy --> InfraImage
```
