# インフラ構成図

## 全体アーキテクチャ

```mermaid
graph TB
    subgraph Client["クライアント"]
        CLI["Flink CLI"]
        WebUI["Web UI / REST API<br/>:8081"]
    end

    subgraph FlinkCluster["Flink Cluster"]
        JM["JobManager<br/>RPC :6123<br/>Blob :6124<br/>REST :8081<br/>Memory: 1600MB"]

        subgraph TaskManagers["TaskManager Pool"]
            TM1["TaskManager 1<br/>Memory: 1728MB<br/>Slots: 1"]
            TM2["TaskManager 2<br/>Memory: 1728MB<br/>Slots: 1"]
            TMn["TaskManager N<br/>Memory: 1728MB<br/>Slots: 1"]
        end
    end

    subgraph Storage["永続ストレージ"]
        HDFS["HDFS<br/>:9000"]
        S3["Amazon S3"]
        GCS["Google Cloud Storage"]
        Azure["Azure Blob Storage"]
    end

    subgraph HA["高可用性"]
        ZK["ZooKeeper<br/>:2181"]
    end

    subgraph Monitoring["監視"]
        Prometheus["Prometheus"]
        OTEL["OpenTelemetry<br/>Collector<br/>gRPC :4317<br/>HTTP :4318"]
        HistoryServer["HistoryServer<br/>:8082"]
    end

    CLI --> JM
    WebUI --> JM
    JM --> TM1
    JM --> TM2
    JM --> TMn
    JM --> ZK
    JM --> HDFS
    JM --> S3
    JM --> GCS
    JM --> Azure
    TM1 --> HDFS
    TM2 --> HDFS
    TMn --> HDFS
    JM --> Prometheus
    JM --> OTEL
    JM --> HistoryServer
```

## Docker Compose デプロイメント（Job Cluster）

```mermaid
graph TB
    subgraph DockerNetwork["Docker Network"]
        subgraph JobCluster["job-cluster Container"]
            JM["JobManager<br/>standalone-job<br/>:8081"]
        end

        subgraph TaskManagers["TaskManager Containers<br/>(scale: N)"]
            TM1["taskmanager 1"]
            TM2["taskmanager 2"]
            TMn["taskmanager N"]
        end
    end

    subgraph Volumes["Shared Volumes"]
        UserLib["/opt/flink/usrlib<br/>USER_LIB"]
        Input["INPUT_VOLUME<br/>INPUT_PATH"]
        Output["OUTPUT_VOLUME<br/>OUTPUT_PATH"]
    end

    JM --> TM1
    JM --> TM2
    JM --> TMn

    JM --> UserLib
    TM1 --> UserLib
    TM2 --> UserLib
    TMn --> UserLib

    JM --> Input
    JM --> Output
    TM1 --> Input
    TM1 --> Output
```

## Docker Compose Hadoopセキュアクラスタ

```mermaid
graph TB
    subgraph DockerHadoopNetwork["docker-hadoop-cluster-network"]
        subgraph KDC["kdc.kerberos.com"]
            KDCService["Kerberos KDC<br/>:88, :749<br/>REALM: EXAMPLE.COM"]
        end

        subgraph Master["master.docker-hadoop-cluster-network"]
            NameNode["HDFS NameNode<br/>:9000, :8020"]
            ResourceManager["YARN ResourceManager<br/>:8030-8033, :8088"]
        end

        subgraph Worker1["worker1.docker-hadoop-cluster-network"]
            DataNode1["HDFS DataNode"]
            NodeManager1["YARN NodeManager<br/>:8040, :8042"]
        end

        subgraph Worker2["worker2.docker-hadoop-cluster-network"]
            DataNode2["HDFS DataNode"]
            NodeManager2["YARN NodeManager<br/>:8040, :8042"]
        end
    end

    KDCService --> Master
    KDCService --> Worker1
    KDCService --> Worker2
    Master --> Worker1
    Master --> Worker2
```

## Kubernetes デプロイメント

```mermaid
graph TB
    subgraph K8sCluster["Kubernetes Cluster"]
        subgraph Service["flink-job-cluster Service<br/>(ClusterIP)"]
            SvcRPC["RPC :6123"]
            SvcBlob["Blob :6124"]
            SvcUI["UI :8081"]
        end

        subgraph JobManagerPod["JobManager Pod"]
            InitContainer["Init Container<br/>artifacts-fetcher<br/>(wget jar)"]
            MainContainer["flink-main-container<br/>imagePullPolicy: Never<br/>ephemeral-storage: 256Mi"]
            SidecarLog["sidecar-log-collector<br/>(log upload)"]
        end

        subgraph TaskManagerPods["TaskManager Pods"]
            TM1Pod["TaskManager Pod 1"]
            TM2Pod["TaskManager Pod 2"]
            TMnPod["TaskManager Pod N"]
        end

        subgraph Volumes["Volumes"]
            HostPath["flink-volume-hostpath<br/>/tmp"]
            Artifact["flink-artifact<br/>emptyDir"]
            Logs["flink-logs<br/>emptyDir"]
        end
    end

    Service --> JobManagerPod
    InitContainer --> MainContainer
    MainContainer --> SidecarLog
    JobManagerPod --> TM1Pod
    JobManagerPod --> TM2Pod
    JobManagerPod --> TMnPod
    MainContainer --> HostPath
    MainContainer --> Artifact
    MainContainer --> Logs
```

## NAT環境構成

```mermaid
graph TB
    subgraph DockerHost["Docker Host<br/>HOST_IP"]
        subgraph ExternalPorts["External Ports"]
            JMRpcEx["JM_RPC_EX_PORT"]
            TM1RpcEx["TM_1_RPC_EX_PORT"]
            TM2RpcEx["TM_2_RPC_EX_PORT"]
            TM1DataEx["TM_1_DATA_EX_PORT"]
            TM2DataEx["TM_2_DATA_EX_PORT"]
            WebUIPort["8081"]
        end
    end

    subgraph DockerContainers["Docker Containers"]
        subgraph JC["job-cluster"]
            JMRpcIn["JM_RPC_IN_PORT"]
            JMWeb["8081"]
        end

        subgraph TM1["taskmanager1"]
            TM1RpcIn["TM_RPC_IN_PORT"]
            TM1DataIn["TM_DATA_IN_PORT"]
        end

        subgraph TM2["taskmanager2"]
            TM2RpcIn["TM_RPC_IN_PORT"]
            TM2DataIn["TM_DATA_IN_PORT"]
        end
    end

    JMRpcEx -.->|NAT| JMRpcIn
    TM1RpcEx -.->|NAT| TM1RpcIn
    TM2RpcEx -.->|NAT| TM2RpcIn
    TM1DataEx -.->|NAT| TM1DataIn
    TM2DataEx -.->|NAT| TM2DataIn
    WebUIPort -.->|NAT| JMWeb

    JC -->|extra_hosts| TM1
    JC -->|extra_hosts| TM2
    TM1 -->|extra_hosts| JC
    TM1 -->|extra_hosts| TM2
    TM2 -->|extra_hosts| JC
    TM2 -->|extra_hosts| TM1
```

## 高可用性（HA）構成

```mermaid
graph TB
    subgraph FlinkHA["Flink HA Cluster"]
        JM1["JobManager 1<br/>(Active)"]
        JM2["JobManager 2<br/>(Standby)"]
        JM3["JobManager 3<br/>(Standby)"]
    end

    subgraph ZKQuorum["ZooKeeper Quorum<br/>:2181"]
        ZK1["ZK Node 1"]
        ZK2["ZK Node 2"]
        ZK3["ZK Node 3"]
    end

    subgraph HAStorage["HA Storage<br/>(HDFS/S3)"]
        CheckpointDir["Checkpoint Directory"]
        MetadataDir["HA Metadata<br/>/flink/ha/"]
    end

    subgraph TaskManagers["TaskManagers"]
        TM1["TaskManager 1"]
        TM2["TaskManager 2"]
        TMn["TaskManager N"]
    end

    JM1 -->|Leader Election| ZKQuorum
    JM2 -->|Leader Election| ZKQuorum
    JM3 -->|Leader Election| ZKQuorum

    JM1 -->|Checkpoint| CheckpointDir
    JM1 -->|Metadata| MetadataDir

    JM1 --> TM1
    JM1 --> TM2
    JM1 --> TMn
```

## メトリクス/監視構成

```mermaid
graph LR
    subgraph FlinkCluster["Flink Cluster"]
        JM["JobManager"]
        TM["TaskManagers"]
    end

    subgraph MetricsReporters["Metrics Reporters"]
        JMX["JMX Exporter"]
        PromReporter["Prometheus Reporter"]
        DDReporter["Datadog Reporter"]
        InfluxReporter["InfluxDB Reporter"]
        GraphiteReporter["Graphite Reporter"]
        StatsDReporter["StatsD Reporter"]
        SLF4JReporter["SLF4J Reporter"]
        OTELReporter["OpenTelemetry Reporter"]
    end

    subgraph MonitoringSystems["Monitoring Systems"]
        Prometheus["Prometheus"]
        Datadog["Datadog"]
        InfluxDB["InfluxDB"]
        Graphite["Graphite"]
        StatsD["StatsD Server"]
        OTELCollector["OpenTelemetry Collector<br/>gRPC :4317<br/>HTTP :4318<br/>zpages :55679"]
    end

    subgraph OTELExporters["OTEL Exporters"]
        FileExporter["File Exporter<br/>/data/logs.json"]
        DebugExporter["Debug Exporter"]
    end

    JM --> JMX
    JM --> PromReporter
    JM --> DDReporter
    JM --> InfluxReporter
    JM --> GraphiteReporter
    JM --> StatsDReporter
    JM --> SLF4JReporter
    JM --> OTELReporter

    TM --> JMX
    TM --> PromReporter
    TM --> OTELReporter

    PromReporter --> Prometheus
    DDReporter --> Datadog
    InfluxReporter --> InfluxDB
    GraphiteReporter --> Graphite
    StatsDReporter --> StatsD
    OTELReporter --> OTELCollector

    OTELCollector --> FileExporter
    OTELCollector --> DebugExporter
```

## ファイルシステム連携

```mermaid
graph TB
    subgraph FlinkCore["Flink Core"]
        FSFactory["FileSystem Factory"]
    end

    subgraph FileSystemPlugins["FileSystem Plugins"]
        HadoopFS["flink-hadoop-fs"]
        S3Hadoop["flink-s3-fs-hadoop"]
        S3Presto["flink-s3-fs-presto"]
        AzureFS["flink-azure-fs-hadoop"]
        GCSFS["flink-gs-fs-hadoop"]
        OSSFS["flink-oss-fs-hadoop"]
    end

    subgraph CloudStorage["Cloud Storage"]
        HDFS["HDFS<br/>hdfs://"]
        S3["Amazon S3<br/>s3://"]
        GCS["Google Cloud Storage<br/>gs://"]
        Azure["Azure Blob Storage<br/>wasb://"]
        OSS["Alibaba OSS<br/>oss://"]
    end

    FSFactory --> HadoopFS
    FSFactory --> S3Hadoop
    FSFactory --> S3Presto
    FSFactory --> AzureFS
    FSFactory --> GCSFS
    FSFactory --> OSSFS

    HadoopFS --> HDFS
    S3Hadoop --> S3
    S3Presto --> S3
    AzureFS --> Azure
    GCSFS --> GCS
    OSSFS --> OSS
```

## YARN デプロイメント

```mermaid
graph TB
    subgraph Client["Flink Client"]
        FlinkCLI["Flink CLI<br/>flink run -m yarn-cluster"]
    end

    subgraph YARNCluster["YARN Cluster"]
        RM["ResourceManager<br/>:8088"]

        subgraph ApplicationMaster["Application Master Container"]
            JM["JobManager"]
        end

        subgraph NodeManager1["NodeManager 1"]
            Container1["Container<br/>TaskManager 1"]
        end

        subgraph NodeManager2["NodeManager 2"]
            Container2["Container<br/>TaskManager 2"]
        end

        subgraph NodeManagerN["NodeManager N"]
            ContainerN["Container<br/>TaskManager N"]
        end
    end

    subgraph HDFSCluster["HDFS"]
        FlinkJars["Flink JARs"]
        UserJars["User JARs"]
        Checkpoints["Checkpoints"]
    end

    FlinkCLI -->|Submit| RM
    RM -->|Launch| ApplicationMaster
    JM -->|Request Containers| RM
    RM -->|Allocate| Container1
    RM -->|Allocate| Container2
    RM -->|Allocate| ContainerN

    JM --> Container1
    JM --> Container2
    JM --> ContainerN

    FlinkCLI -->|Upload| FlinkJars
    FlinkCLI -->|Upload| UserJars
    JM -->|Read/Write| Checkpoints
    Container1 -->|Read| UserJars
```
