サンプルコード:hello_world.py
引用 @qpu def hello_world(asm): # Load two vectors of length 16 from the host memory (address=uniforms[0]) to VPM setup_dma_load(nrows=2) start_dma_load(uniform) wait_dma_load() # Setup VPM read/write operations setup_vpm_read(nrows=2) setup_vpm_write() # Compute a + b mov(r0, vpm) mov(r1, vpm) fadd(vpm, r0, r1) # Store the result vector from VPM to the host memory (address=uniforms[1]) setup_dma_store(nrows=1) start_dma_store(uniform) wait_dma_store() # Finish the thread exit()
@qpu がなければ、hello_worldはただの関数か?
hello_worldを呼び出しているのは、下記のようなコード。
引用 with Driver() as drv: # Input vectors a = np.random.random(16).astype('float32') b = np.random.random(16).astype('float32') # Copy vectors to shared memory for DMA transfer inp = drv.copy(np.r_[a, b]) out = drv.alloc(16, 'float32') # Run the program drv.execute( n_threads=1, program=drv.program(hello_world), uniforms=[inp.address, out.address] ) print(' a '.center(80, '=')) print(a) print(' b '.center(80, '=')) print(b) print(' a+b '.center(80, '=')) print(out) print(' error '.center(80, '=')) print(np.abs(a+b-out))
Driverクラスのprogram関数への引数。。。で、Driverクラスのprogram関数は、
引用 def program(self, program, *args, **kwargs): if hasattr(program, '__call__'): program = assemble(program, *args, **kwargs) code = memoryview(program).tobytes() if self.code_pos + len(code) > self.data_area_base: raise DriverError('Program too long') code_addr = self.memory.baseaddr + self.code_pos self.memory.base[self.code_pos:self.code_pos+len(code)] = code self.code_pos += len(code) return Program(code_addr, code)
引用、 def assemble(f, *args, **kwargs): 'Assemble QPU program to byte string.' asm = Assembler() f(asm, *args, **kwargs) return asm._get_code()
にて、QPUプログラムをバイト文字列に変換。
そのprogramをDriverクラスのexecute関数にて実行。
drv.execute( n_threads=1, program=drv.program(hello_world), uniforms=[inp.address, out.address] )
Driverクラスのexecute関数
引用 def execute(self, n_threads, program, uniforms = None, timeout = 10000): if not (1 <= n_threads and n_threads <= self.max_threads): raise DriverError('n_threads exceeds max_threads') if uniforms is not None: if not isinstance(uniforms, Array): uniforms = self.array(uniforms, dtype = 'u4') self.message[:n_threads, 0] = uniforms.addresses().reshape(n_threads, -1)[:, 0] else: self.message[:n_threads, 0] = 0 self.message[:n_threads, 1] = program.address r = self.mailbox.execute_qpu(n_threads, self.message.address, 0, timeout) if r > 0: raise DriverError('QPU execution timeout')
program.addressをQPUに渡して実行!
あれ、どこで関数(hello_world)を変換している。。。もう一度、assembe関数を見ると。。。
引用、 def assemble(f, *args, **kwargs): 'Assemble QPU program to byte string.' asm = Assembler() f(asm, *args, **kwargs) return asm._get_code()
このfだ。。。
これにあった
qpu関数
qpu関数
引用 def qpu(f): """Decorator for writing QPU assembly language. To write a QPU assembly program, decorate a function which has a parameter ``asm`` as the first argument with @qpu like this:: @qpu def code(asm): mov(r0, uniform) iadd(r0, r0, 1) ... exit() This code is equivalent to:: def code(asm): asm.mov(asm.r0, asm.uniform) asm.iadd(asm.r0, asm.r0, 1) ... asm.exit() """ args, _, _, _ = inspect.getargspec(f) if 'asm' not in args: raise AssembleError('Argument named \'asm\' is necessary') tree = ast.parse(inspect.getsource(f)) fundef = tree.body[0] fundef.body = SETUP_ASM_ALIASES.body + fundef.body fundef.decorator_list = [] code = compile(tree, '<qpu>', 'exec') scope = {} exec(code, f.__globals__, scope) return scope[f.__name__]
にあるように、以下のように hello_world 関数になる。
def hello_world(asm): asm.setup_dma_load(nrows=2) asm.start_dma_load(uniform) asm.wait_dma_load() asm.setup_vpm_read(nrows=2) asm.setup_vpm_write() asm.mov(r0, vpm) asm.mov(r1, vpm) asm.fadd(vpm, r0, r1) asm.setup_dma_store(nrows=1) asm.start_dma_store(uniform) asm.wait_dma_store() asm.exit()
引用、 def assemble(f, *args, **kwargs): 'Assemble QPU program to byte string.' asm = Assembler() f(asm, *args, **kwargs) return asm._get_code()
で、asm._get_code()でバイト文字列になるということに。。
わかってよかった。。。