Vengineerの妄想(準備期間)

人生は短いけど、長いです。人生を楽しみましょう!

Ryzen AI の DMA & NoCs とは?

はじめに

Ryzen AIの公式の内部構造は、このブログに6月18日にアップした

の中にあるスライドのみです。そのスライドを下記に説明のために引用します。

このスライドの中の 「DMA & NoCs」がどうなっているのかを調べてみました。

GMIO, PLIO, DMA

RyzenAI-SWの例題 (opt-1.3b) のフォルダを見てみると、

xclbin/aieml というフォルダがあり、その下に

の中を覗いてみたら、

  • GMIO
  • PLIO
  • DMA

なるものが出てきました。

それぞれ見てみます。

GMIO

下記のように、12個のGMIOがあります。

        "GMIOs": {
            "gmio0": {
                "id": 0,
                "name": "aie_graph.in_[0][0]",
                "logical_name": "aie_graph_in__0__0_",
                "type": 0,
                "shim_column": 0,
                "channel_number": 2,
                "stream_id": 3,
                "burst_length_in_16byte": 4,
                "pl_kernel_instance_name": "",
                "pl_parameter_index": -1
            },
            "gmio1": {
                "id": 1,
                "name": "aie_graph.in_[0][1]",
                "logical_name": "aie_graph_in__0__1_",
                "type": 0,
                "shim_column": 0,
                "channel_number": 3,
                "stream_id": 7,
                "burst_length_in_16byte": 4,
                "pl_kernel_instance_name": "",
                "pl_parameter_index": -1
            },
            "gmio2": {
                "id": 2,
                "name": "aie_graph.out_[0][0]",
                "logical_name": "aie_graph_out__0__0_",
                "type": 1,
                "shim_column": 0,
                "channel_number": 0,
                "stream_id": 2,
                "burst_length_in_16byte": 4,
                "pl_kernel_instance_name": "",
                "pl_parameter_index": -1
            },
            "gmio3": {
                "id": 3,
                "name": "aie_graph.in_[1][0]",
                "logical_name": "aie_graph_in__1__0_",
                "type": 0,
                "shim_column": 1,
                "channel_number": 2,
                "stream_id": 3,
                "burst_length_in_16byte": 4,
                "pl_kernel_instance_name": "",
                "pl_parameter_index": -1
            },
            "gmio4": {
                "id": 4,
                "name": "aie_graph.in_[1][1]",
                "logical_name": "aie_graph_in__1__1_",
                "type": 0,
                "shim_column": 1,
                "channel_number": 3,
                "stream_id": 7,
                "burst_length_in_16byte": 4,
                "pl_kernel_instance_name": "",
                "pl_parameter_index": -1
            },
            "gmio5": {
                "id": 5,
                "name": "aie_graph.out_[1][0]",
                "logical_name": "aie_graph_out__1__0_",
                "type": 1,
                "shim_column": 1,
                "channel_number": 0,
                "stream_id": 2,
                "burst_length_in_16byte": 4,
                "pl_kernel_instance_name": "",
                "pl_parameter_index": -1
            },
            "gmio6": {
                "id": 6,
                "name": "aie_graph.in_[2][0]",
                "logical_name": "aie_graph_in__2__0_",
                "type": 0,
                "shim_column": 2,
                "channel_number": 2,
                "stream_id": 3,
                "burst_length_in_16byte": 4,
                "pl_kernel_instance_name": "",
                "pl_parameter_index": -1
            },
            "gmio7": {
                "id": 7,
                "name": "aie_graph.in_[2][1]",
                "logical_name": "aie_graph_in__2__1_",
                "type": 0,
                "shim_column": 2,
                "channel_number": 3,
                "stream_id": 7,
                "burst_length_in_16byte": 4,
                "pl_kernel_instance_name": "",
                "pl_parameter_index": -1
            },
            "gmio8": {
                "id": 8,
                "name": "aie_graph.out_[2][0]",
                "logical_name": "aie_graph_out__2__0_",
                "type": 1,
                "shim_column": 2,
                "channel_number": 0,
                "stream_id": 2,
                "burst_length_in_16byte": 4,
                "pl_kernel_instance_name": "",
                "pl_parameter_index": -1
            },
            "gmio9": {
                "id": 9,
                "name": "aie_graph.in_[3][0]",
                "logical_name": "aie_graph_in__3__0_",
                "type": 0,
                "shim_column": 3,
                "channel_number": 2,
                "stream_id": 3,
                "burst_length_in_16byte": 4,
                "pl_kernel_instance_name": "",
                "pl_parameter_index": -1
            },
            "gmio10": {
                "id": 10,
                "name": "aie_graph.in_[3][1]",
                "logical_name": "aie_graph_in__3__1_",
                "type": 0,
                "shim_column": 3,
                "channel_number": 3,
                "stream_id": 7,
                "burst_length_in_16byte": 4,
                "pl_kernel_instance_name": "",
                "pl_parameter_index": -1
            },
            "gmio11": {
                "id": 11,
                "name": "aie_graph.out_[3][0]",
                "logical_name": "aie_graph_out__3__0_",
                "type": 1,
                "shim_column": 3,
                "channel_number": 0,
                "stream_id": 2,
                "burst_length_in_16byte": 4,
                "pl_kernel_instance_name": "",
                "pl_parameter_index": -1
            }
        },

'name' のところに、aie_graph_in31 とか aie_graph_out30 とかあるので、AI Engine が処理に使用する入力ポートや出力ポートに対応するのもだと思います。

この中で、

  • 'shim_column' というのがあります。この GMIO は、shim という部分にあるようです。

PLIO

下記のように、4個のPLIOがあります。

        "PLIOs": {
            "plio0": {
                "id": 0,
                "name": "aie_graph.token_out_[0]",
                "logical_name": "TCT0",
                "shim_column": 0,
                "slaveOrMaster": 1,
                "stream_id": 0
            },
            "plio1": {
                "id": 1,
                "name": "aie_graph.token_out_[1]",
                "logical_name": "TCT1",
                "shim_column": 1,
                "slaveOrMaster": 1,
                "stream_id": 0
            },
            "plio2": {
                "id": 2,
                "name": "aie_graph.token_out_[2]",
                "logical_name": "TCT2",
                "shim_column": 2,
                "slaveOrMaster": 1,
                "stream_id": 0
            },
            "plio3": {
                "id": 3,
                "name": "aie_graph.token_out_[3]",
                "logical_name": "TCT3",
                "shim_column": 3,
                "slaveOrMaster": 1,
                "stream_id": 0
            }
        },

こちらも、- 'shim_column' というのがあります。この PLIO も、shim という部分にあるようです。

DMA

下記のように、dma_ch_confgX なるものが 47個があります。

        "DMAChConfigs": {
            "dma_ch_config0": {
                "port_id": 104,
                "port_name": "aie_graph.mtx_[0].in[0]",
                "parent_id": 0,
                "tile_type": 2,
                "column": 0,
                "row": 0,
                "s2mm_or_mm2s": 0,
                "channel": 0
            },
            "dma_ch_config1": {
                "port_id": 105,
                "port_name": "aie_graph.mtx_[0].in[1]",
                "parent_id": 0,
                "tile_type": 2,
                "column": 0,
                "row": 0,
                "s2mm_or_mm2s": 0,
                "channel": 1
            },
            "dma_ch_config2": {
                "port_id": 106,
                "port_name": "aie_graph.mtx_[0].in[2]",
                "parent_id": 0,
                "tile_type": 2,
                "column": 0,
                "row": 0,
                "s2mm_or_mm2s": 0,
                "channel": 2
            },
            "dma_ch_config3": {
                "port_id": 107,
                "port_name": "aie_graph.mtx_[0].in[3]",
                "parent_id": 0,
                "tile_type": 2,
                "column": 0,
                "row": 0,
                "s2mm_or_mm2s": 0,
                "channel": 3
            },
            "dma_ch_config4": {
                "port_id": 108,
                "port_name": "aie_graph.mtx_[0].in[4]",
                "parent_id": 0,
                "tile_type": 2,
                "column": 0,
                "row": 0,
                "s2mm_or_mm2s": 0,
                "channel": 4
            },
            "dma_ch_config5": {
                "port_id": 109,
                "port_name": "aie_graph.mtx_[0].in[5]",
                "parent_id": 0,
                "tile_type": 2,
                "column": 0,
                "row": 0,
                "s2mm_or_mm2s": 0,
                "channel": 5
            },
            "dma_ch_config6": {
                "port_id": 110,
                "port_name": "aie_graph.mtx_[0].out[0]",
                "parent_id": 0,
                "tile_type": 2,
                "column": 0,
                "row": 0,
                "s2mm_or_mm2s": 1,
                "channel": 0
            },
            "dma_ch_config7": {
                "port_id": 111,
                "port_name": "aie_graph.mtx_[0].out[1]",
                "parent_id": 0,
                "tile_type": 2,
                "column": 0,
                "row": 0,
                "s2mm_or_mm2s": 1,
                "channel": 1
            },
            "dma_ch_config8": {
                "port_id": 112,
                "port_name": "aie_graph.mtx_[0].out[2]",
                "parent_id": 0,
                "tile_type": 2,
                "column": 0,
                "row": 0,
                "s2mm_or_mm2s": 1,
                "channel": 2
            },
            "dma_ch_config9": {
                "port_id": 113,
                "port_name": "aie_graph.mtx_[0].out[3]",
                "parent_id": 0,
                "tile_type": 2,
                "column": 0,
                "row": 0,
                "s2mm_or_mm2s": 1,
                "channel": 3
            },
            "dma_ch_config10": {
                "port_id": 114,
                "port_name": "aie_graph.mtx_[0].out[4]",
                "parent_id": 0,
                "tile_type": 2,
                "column": 0,
                "row": 0,
                "s2mm_or_mm2s": 1,
                "channel": 4
            },
            "dma_ch_config11": {
                "port_id": 115,
                "port_name": "aie_graph.mtx_[0].out[5]",
                "parent_id": 0,
                "tile_type": 2,
                "column": 0,
                "row": 0,
                "s2mm_or_mm2s": 1,
                "channel": 5
            },
            "dma_ch_config12": {
                "port_id": 116,
                "port_name": "aie_graph.mtx_[1].in[0]",
                "parent_id": 1,
                "tile_type": 2,
                "column": 1,
                "row": 0,
                "s2mm_or_mm2s": 0,
                "channel": 0
            },
            "dma_ch_config13": {
                "port_id": 117,
                "port_name": "aie_graph.mtx_[1].in[1]",
                "parent_id": 1,
                "tile_type": 2,
                "column": 1,
                "row": 0,
                "s2mm_or_mm2s": 0,
                "channel": 1
            },
            "dma_ch_config14": {
                "port_id": 118,
                "port_name": "aie_graph.mtx_[1].in[2]",
                "parent_id": 1,
                "tile_type": 2,
                "column": 1,
                "row": 0,
                "s2mm_or_mm2s": 0,
                "channel": 2
            },
            "dma_ch_config15": {
                "port_id": 119,
                "port_name": "aie_graph.mtx_[1].in[3]",
                "parent_id": 1,
                "tile_type": 2,
                "column": 1,
                "row": 0,
                "s2mm_or_mm2s": 0,
                "channel": 3
            },
            "dma_ch_config16": {
                "port_id": 120,
                "port_name": "aie_graph.mtx_[1].in[4]",
                "parent_id": 1,
                "tile_type": 2,
                "column": 1,
                "row": 0,
                "s2mm_or_mm2s": 0,
                "channel": 4
            },
            "dma_ch_config17": {
                "port_id": 121,
                "port_name": "aie_graph.mtx_[1].in[5]",
                "parent_id": 1,
                "tile_type": 2,
                "column": 1,
                "row": 0,
                "s2mm_or_mm2s": 0,
                "channel": 5
            },
            "dma_ch_config18": {
                "port_id": 122,
                "port_name": "aie_graph.mtx_[1].out[0]",
                "parent_id": 1,
                "tile_type": 2,
                "column": 1,
                "row": 0,
                "s2mm_or_mm2s": 1,
                "channel": 0
            },
            "dma_ch_config19": {
                "port_id": 123,
                "port_name": "aie_graph.mtx_[1].out[1]",
                "parent_id": 1,
                "tile_type": 2,
                "column": 1,
                "row": 0,
                "s2mm_or_mm2s": 1,
                "channel": 1
            },
            "dma_ch_config20": {
                "port_id": 124,
                "port_name": "aie_graph.mtx_[1].out[2]",
                "parent_id": 1,
                "tile_type": 2,
                "column": 1,
                "row": 0,
                "s2mm_or_mm2s": 1,
                "channel": 2
            },
            "dma_ch_config21": {
                "port_id": 125,
                "port_name": "aie_graph.mtx_[1].out[3]",
                "parent_id": 1,
                "tile_type": 2,
                "column": 1,
                "row": 0,
                "s2mm_or_mm2s": 1,
                "channel": 3
            },
            "dma_ch_config22": {
                "port_id": 126,
                "port_name": "aie_graph.mtx_[1].out[4]",
                "parent_id": 1,
                "tile_type": 2,
                "column": 1,
                "row": 0,
                "s2mm_or_mm2s": 1,
                "channel": 4
            },
            "dma_ch_config23": {
                "port_id": 127,
                "port_name": "aie_graph.mtx_[1].out[5]",
                "parent_id": 1,
                "tile_type": 2,
                "column": 1,
                "row": 0,
                "s2mm_or_mm2s": 1,
                "channel": 5
            },
            "dma_ch_config24": {
                "port_id": 128,
                "port_name": "aie_graph.mtx_[2].in[0]",
                "parent_id": 2,
                "tile_type": 2,
                "column": 2,
                "row": 0,
                "s2mm_or_mm2s": 0,
                "channel": 0
            },
            "dma_ch_config25": {
                "port_id": 129,
                "port_name": "aie_graph.mtx_[2].in[1]",
                "parent_id": 2,
                "tile_type": 2,
                "column": 2,
                "row": 0,
                "s2mm_or_mm2s": 0,
                "channel": 1
            },
            "dma_ch_config26": {
                "port_id": 130,
                "port_name": "aie_graph.mtx_[2].in[2]",
                "parent_id": 2,
                "tile_type": 2,
                "column": 2,
                "row": 0,
                "s2mm_or_mm2s": 0,
                "channel": 2
            },
            "dma_ch_config27": {
                "port_id": 131,
                "port_name": "aie_graph.mtx_[2].in[3]",
                "parent_id": 2,
                "tile_type": 2,
                "column": 2,
                "row": 0,
                "s2mm_or_mm2s": 0,
                "channel": 3
            },
            "dma_ch_config28": {
                "port_id": 132,
                "port_name": "aie_graph.mtx_[2].in[4]",
                "parent_id": 2,
                "tile_type": 2,
                "column": 2,
                "row": 0,
                "s2mm_or_mm2s": 0,
                "channel": 4
            },
            "dma_ch_config29": {
                "port_id": 133,
                "port_name": "aie_graph.mtx_[2].in[5]",
                "parent_id": 2,
                "tile_type": 2,
                "column": 2,
                "row": 0,
                "s2mm_or_mm2s": 0,
                "channel": 5
            },
            "dma_ch_config30": {
                "port_id": 134,
                "port_name": "aie_graph.mtx_[2].out[0]",
                "parent_id": 2,
                "tile_type": 2,
                "column": 2,
                "row": 0,
                "s2mm_or_mm2s": 1,
                "channel": 0
            },
            "dma_ch_config31": {
                "port_id": 135,
                "port_name": "aie_graph.mtx_[2].out[1]",
                "parent_id": 2,
                "tile_type": 2,
                "column": 2,
                "row": 0,
                "s2mm_or_mm2s": 1,
                "channel": 1
            },
            "dma_ch_config32": {
                "port_id": 136,
                "port_name": "aie_graph.mtx_[2].out[2]",
                "parent_id": 2,
                "tile_type": 2,
                "column": 2,
                "row": 0,
                "s2mm_or_mm2s": 1,
                "channel": 2
            },
            "dma_ch_config33": {
                "port_id": 137,
                "port_name": "aie_graph.mtx_[2].out[3]",
                "parent_id": 2,
                "tile_type": 2,
                "column": 2,
                "row": 0,
                "s2mm_or_mm2s": 1,
                "channel": 3
            },
            "dma_ch_config34": {
                "port_id": 138,
                "port_name": "aie_graph.mtx_[2].out[4]",
                "parent_id": 2,
                "tile_type": 2,
                "column": 2,
                "row": 0,
                "s2mm_or_mm2s": 1,
                "channel": 4
            },
            "dma_ch_config35": {
                "port_id": 139,
                "port_name": "aie_graph.mtx_[3].in[0]",
                "parent_id": 3,
                "tile_type": 2,
                "column": 3,
                "row": 0,
                "s2mm_or_mm2s": 0,
                "channel": 0
            },
            "dma_ch_config36": {
                "port_id": 140,
                "port_name": "aie_graph.mtx_[3].in[1]",
                "parent_id": 3,
                "tile_type": 2,
                "column": 3,
                "row": 0,
                "s2mm_or_mm2s": 0,
                "channel": 1
            },
            "dma_ch_config37": {
                "port_id": 141,
                "port_name": "aie_graph.mtx_[3].in[2]",
                "parent_id": 3,
                "tile_type": 2,
                "column": 3,
                "row": 0,
                "s2mm_or_mm2s": 0,
                "channel": 2
            },
            "dma_ch_config38": {
                "port_id": 142,
                "port_name": "aie_graph.mtx_[3].in[3]",
                "parent_id": 3,
                "tile_type": 2,
                "column": 3,
                "row": 0,
                "s2mm_or_mm2s": 0,
                "channel": 3
            },
            "dma_ch_config39": {
                "port_id": 143,
                "port_name": "aie_graph.mtx_[3].in[4]",
                "parent_id": 3,
                "tile_type": 2,
                "column": 3,
                "row": 0,
                "s2mm_or_mm2s": 0,
                "channel": 4
            },
            "dma_ch_config40": {
                "port_id": 144,
                "port_name": "aie_graph.mtx_[3].in[5]",
                "parent_id": 3,
                "tile_type": 2,
                "column": 3,
                "row": 0,
                "s2mm_or_mm2s": 0,
                "channel": 5
            },
            "dma_ch_config41": {
                "port_id": 145,
                "port_name": "aie_graph.mtx_[3].out[0]",
                "parent_id": 3,
                "tile_type": 2,
                "column": 3,
                "row": 0,
                "s2mm_or_mm2s": 1,
                "channel": 0
            },
            "dma_ch_config42": {
                "port_id": 146,
                "port_name": "aie_graph.mtx_[3].out[1]",
                "parent_id": 3,
                "tile_type": 2,
                "column": 3,
                "row": 0,
                "s2mm_or_mm2s": 1,
                "channel": 1
            },
            "dma_ch_config43": {
                "port_id": 147,
                "port_name": "aie_graph.mtx_[3].out[2]",
                "parent_id": 3,
                "tile_type": 2,
                "column": 3,
                "row": 0,
                "s2mm_or_mm2s": 1,
                "channel": 2
            },
            "dma_ch_config44": {
                "port_id": 148,
                "port_name": "aie_graph.mtx_[3].out[3]",
                "parent_id": 3,
                "tile_type": 2,
                "column": 3,
                "row": 0,
                "s2mm_or_mm2s": 1,
                "channel": 3
            },
            "dma_ch_config45": {
                "port_id": 149,
                "port_name": "aie_graph.mtx_[3].out[4]",
                "parent_id": 3,
                "tile_type": 2,
                "column": 3,
                "row": 0,
                "s2mm_or_mm2s": 1,
                "channel": 4
            },
            "dma_ch_config46": {
                "port_id": 150,
                "port_name": "aie_graph.mtx_[3].out[5]",
                "parent_id": 3,
                "tile_type": 2,
                "column": 3,
                "row": 0,
                "s2mm_or_mm2s": 1,
                "channel": 5
            }

ここで最初の dma_ch_config0 を見てみます。

            "dma_ch_config0": {
                "port_id": 104,
                "port_name": "aie_graph.mtx_[0].in[0]",
                "parent_id": 0,
                "tile_type": 2,
                "column": 0,
                "row": 0,
                "s2mm_or_mm2s": 0,
                "channel": 0
            },

port_name のところに、"aie_graph.mtx[0].in[0]" とあります。aie_graph.mtx[0]は、SharedBufferToTileMapping のところにあります。これは、Memory Tile の部分で Memory Tile の dmaChannels のところに上記の dma_ch_configX が接続するっぽいです。

            "SharedBufferToTileMapping": [
                {
                    "graph": "aie_graph",
                    "tile": "memory",
                    "bufferName": "mtx_[0]",
                    "column": 1,
                    "row": 0,
                    "dmaChannels": {
                        "mtx_[0].in[0]": {
                            "channel": 0,
                            "direction": "s2mm"
                        },
                        "mtx_[0].in[1]": {
                            "channel": 1,
                            "direction": "s2mm"
                        },
                        "mtx_[0].in[2]": {
                            "channel": 2,
                            "direction": "s2mm"
                        },
                        "mtx_[0].in[3]": {
                            "channel": 3,
                            "direction": "s2mm"
                        },
                        "mtx_[0].in[4]": {
                            "channel": 4,
                            "direction": "s2mm"
                        },
                        "mtx_[0].in[5]": {
                            "channel": 5,
                            "direction": "s2mm"
                        },
                        "mtx_[0].out[0]": {
                            "channel": 0,
                            "direction": "mm2s"
                        },
                        "mtx_[0].out[1]": {
                            "channel": 1,
                            "direction": "mm2s"
                        },
                        "mtx_[0].out[2]": {
                            "channel": 2,
                            "direction": "mm2s"
                        },
                        "mtx_[0].out[3]": {
                            "channel": 3,
                            "direction": "mm2s"
                        },
                        "mtx_[0].out[4]": {
                            "channel": 4,
                            "direction": "mm2s"
                        },
                        "mtx_[0].out[5]": {
                            "channel": 5,
                            "direction": "mm2s"
                        }
                    }
                },

DMAの片側は、DRAMで、もう一方が AIE Memory Tile の DMA用ポートに繋がる感じではないでしょうか? DRAM側のアドレスは、メモリ割り当てをした時に決まるアドレスになると思います。

AIE Memory TIle に書き込まれたデータを使って、AI Engine が何らかの処理を行い、DRAMに戻す時は、Memory Tile に書き込み、DMAにてDRAMに書き込まれるのだと思います。

おわりに

Ryzen AI の中のDMAとNoCsの中としては、

というものがあることは分かりました。

GMIO と PLIO は、FPGAのAI Engineにもあるので、この部分も基本的には同じものを使っているのでしょうか?