; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
; RUN: llc -mtriple=amdgcn -mcpu=gfx900 < %s | FileCheck -check-prefix=SDAG %s
; RUN: llc -mtriple=amdgcn -mcpu=gfx900 -global-isel=1 -global-isel-abort=2 < %s | FileCheck -check-prefix=GISEL %s

; Note: if you're adding tests here, also add them to
; lower-buffer-fat-pointers-contents-legalization.ll to verify the IR produced by
; the lowering.

;;; Legal types. These are natively supported, no casts should be performed.

define i8 @load_i8(ptr addrspace(8) inreg %buf) {
; SDAG-LABEL: load_i8:
; SDAG:       ; %bb.0:
; SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; SDAG-NEXT:    buffer_load_ubyte v0, off, s[16:19], 0
; SDAG-NEXT:    s_waitcnt vmcnt(0)
; SDAG-NEXT:    s_setpc_b64 s[30:31]
;
; GISEL-LABEL: load_i8:
; GISEL:       ; %bb.0:
; GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GISEL-NEXT:    buffer_load_ubyte v0, off, s[16:19], 0
; GISEL-NEXT:    s_waitcnt vmcnt(0)
; GISEL-NEXT:    s_setpc_b64 s[30:31]
  %p = addrspacecast ptr addrspace(8) %buf to ptr addrspace(7)
  %ret = load i8, ptr addrspace(7) %p
  ret i8 %ret
}

define void @store_i8(i8 %data, ptr addrspace(8) inreg %buf) {
; SDAG-LABEL: store_i8:
; SDAG:       ; %bb.0:
; SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; SDAG-NEXT:    buffer_store_byte v0, off, s[16:19], 0
; SDAG-NEXT:    s_waitcnt vmcnt(0)
; SDAG-NEXT:    s_setpc_b64 s[30:31]
;
; GISEL-LABEL: store_i8:
; GISEL:       ; %bb.0:
; GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GISEL-NEXT:    buffer_store_byte v0, off, s[16:19], 0
; GISEL-NEXT:    s_waitcnt vmcnt(0)
; GISEL-NEXT:    s_setpc_b64 s[30:31]
  %p = addrspacecast ptr addrspace(8) %buf to ptr addrspace(7)
  store i8 %data, ptr addrspace(7) %p
  ret void
}

define i16 @load_i16(ptr addrspace(8) inreg %buf) {
; SDAG-LABEL: load_i16:
; SDAG:       ; %bb.0:
; SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; SDAG-NEXT:    buffer_load_ushort v0, off, s[16:19], 0
; SDAG-NEXT:    s_waitcnt vmcnt(0)
; SDAG-NEXT:    s_setpc_b64 s[30:31]
;
; GISEL-LABEL: load_i16:
; GISEL:       ; %bb.0:
; GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GISEL-NEXT:    buffer_load_ushort v0, off, s[16:19], 0
; GISEL-NEXT:    s_waitcnt vmcnt(0)
; GISEL-NEXT:    s_setpc_b64 s[30:31]
  %p = addrspacecast ptr addrspace(8) %buf to ptr addrspace(7)
  %ret = load i16, ptr addrspace(7) %p
  ret i16 %ret
}

define void @store_i16(i16 %data, ptr addrspace(8) inreg %buf) {
; SDAG-LABEL: store_i16:
; SDAG:       ; %bb.0:
; SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; SDAG-NEXT:    buffer_store_short v0, off, s[16:19], 0
; SDAG-NEXT:    s_waitcnt vmcnt(0)
; SDAG-NEXT:    s_setpc_b64 s[30:31]
;
; GISEL-LABEL: store_i16:
; GISEL:       ; %bb.0:
; GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GISEL-NEXT:    buffer_store_short v0, off, s[16:19], 0
; GISEL-NEXT:    s_waitcnt vmcnt(0)
; GISEL-NEXT:    s_setpc_b64 s[30:31]
  %p = addrspacecast ptr addrspace(8) %buf to ptr addrspace(7)
  store i16 %data, ptr addrspace(7) %p
  ret void
}

define i32 @load_i32(ptr addrspace(8) inreg %buf) {
; SDAG-LABEL: load_i32:
; SDAG:       ; %bb.0:
; SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; SDAG-NEXT:    buffer_load_dword v0, off, s[16:19], 0
; SDAG-NEXT:    s_waitcnt vmcnt(0)
; SDAG-NEXT:    s_setpc_b64 s[30:31]
;
; GISEL-LABEL: load_i32:
; GISEL:       ; %bb.0:
; GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GISEL-NEXT:    buffer_load_dword v0, off, s[16:19], 0
; GISEL-NEXT:    s_waitcnt vmcnt(0)
; GISEL-NEXT:    s_setpc_b64 s[30:31]
  %p = addrspacecast ptr addrspace(8) %buf to ptr addrspace(7)
  %ret = load i32, ptr addrspace(7) %p
  ret i32 %ret
}

define void @store_i32(i32 %data, ptr addrspace(8) inreg %buf) {
; SDAG-LABEL: store_i32:
; SDAG:       ; %bb.0:
; SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; SDAG-NEXT:    buffer_store_dword v0, off, s[16:19], 0
; SDAG-NEXT:    s_waitcnt vmcnt(0)
; SDAG-NEXT:    s_setpc_b64 s[30:31]
;
; GISEL-LABEL: store_i32:
; GISEL:       ; %bb.0:
; GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GISEL-NEXT:    buffer_store_dword v0, off, s[16:19], 0
; GISEL-NEXT:    s_waitcnt vmcnt(0)
; GISEL-NEXT:    s_setpc_b64 s[30:31]
  %p = addrspacecast ptr addrspace(8) %buf to ptr addrspace(7)
  store i32 %data, ptr addrspace(7) %p
  ret void
}

define i64 @load_i64(ptr addrspace(8) inreg %buf) {
; SDAG-LABEL: load_i64:
; SDAG:       ; %bb.0:
; SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; SDAG-NEXT:    buffer_load_dwordx2 v[0:1], off, s[16:19], 0
; SDAG-NEXT:    s_waitcnt vmcnt(0)
; SDAG-NEXT:    s_setpc_b64 s[30:31]
;
; GISEL-LABEL: load_i64:
; GISEL:       ; %bb.0:
; GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GISEL-NEXT:    buffer_load_dwordx2 v[0:1], off, s[16:19], 0
; GISEL-NEXT:    s_waitcnt vmcnt(0)
; GISEL-NEXT:    s_setpc_b64 s[30:31]
  %p = addrspacecast ptr addrspace(8) %buf to ptr addrspace(7)
  %ret = load i64, ptr addrspace(7) %p
  ret i64 %ret
}

define void @store_i64(i64 %data, ptr addrspace(8) inreg %buf) {
; SDAG-LABEL: store_i64:
; SDAG:       ; %bb.0:
; SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; SDAG-NEXT:    buffer_store_dwordx2 v[0:1], off, s[16:19], 0
; SDAG-NEXT:    s_waitcnt vmcnt(0)
; SDAG-NEXT:    s_setpc_b64 s[30:31]
;
; GISEL-LABEL: store_i64:
; GISEL:       ; %bb.0:
; GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GISEL-NEXT:    buffer_store_dwordx2 v[0:1], off, s[16:19], 0
; GISEL-NEXT:    s_waitcnt vmcnt(0)
; GISEL-NEXT:    s_setpc_b64 s[30:31]
  %p = addrspacecast ptr addrspace(8) %buf to ptr addrspace(7)
  store i64 %data, ptr addrspace(7) %p
  ret void
}

define i128 @load_i128(ptr addrspace(8) inreg %buf) {
; SDAG-LABEL: load_i128:
; SDAG:       ; %bb.0:
; SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; SDAG-NEXT:    buffer_load_dwordx4 v[0:3], off, s[16:19], 0
; SDAG-NEXT:    s_waitcnt vmcnt(0)
; SDAG-NEXT:    s_setpc_b64 s[30:31]
;
; GISEL-LABEL: load_i128:
; GISEL:       ; %bb.0:
; GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GISEL-NEXT:    buffer_load_dwordx4 v[0:3], off, s[16:19], 0
; GISEL-NEXT:    s_waitcnt vmcnt(0)
; GISEL-NEXT:    s_setpc_b64 s[30:31]
  %p = addrspacecast ptr addrspace(8) %buf to ptr addrspace(7)
  %ret = load i128, ptr addrspace(7) %p
  ret i128 %ret
}

define void @store_i128(i128 %data, ptr addrspace(8) inreg %buf) {
; SDAG-LABEL: store_i128:
; SDAG:       ; %bb.0:
; SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; SDAG-NEXT:    buffer_store_dwordx4 v[0:3], off, s[16:19], 0
; SDAG-NEXT:    s_waitcnt vmcnt(0)
; SDAG-NEXT:    s_setpc_b64 s[30:31]
;
; GISEL-LABEL: store_i128:
; GISEL:       ; %bb.0:
; GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GISEL-NEXT:    buffer_store_dwordx4 v[0:3], off, s[16:19], 0
; GISEL-NEXT:    s_waitcnt vmcnt(0)
; GISEL-NEXT:    s_setpc_b64 s[30:31]
  %p = addrspacecast ptr addrspace(8) %buf to ptr addrspace(7)
  store i128 %data, ptr addrspace(7) %p
  ret void
}

define <1 x i32> @load_v1i32(ptr addrspace(8) inreg %buf) {
; SDAG-LABEL: load_v1i32:
; SDAG:       ; %bb.0:
; SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; SDAG-NEXT:    buffer_load_dword v0, off, s[16:19], 0
; SDAG-NEXT:    s_waitcnt vmcnt(0)
; SDAG-NEXT:    s_setpc_b64 s[30:31]
;
; GISEL-LABEL: load_v1i32:
; GISEL:       ; %bb.0:
; GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GISEL-NEXT:    buffer_load_dword v0, off, s[16:19], 0
; GISEL-NEXT:    s_waitcnt vmcnt(0)
; GISEL-NEXT:    s_setpc_b64 s[30:31]
  %p = addrspacecast ptr addrspace(8) %buf to ptr addrspace(7)
  %ret = load <1 x i32>, ptr addrspace(7) %p
  ret <1 x i32> %ret
}

define void @store_v1i32(<1 x i32> %data, ptr addrspace(8) inreg %buf) {
; SDAG-LABEL: store_v1i32:
; SDAG:       ; %bb.0:
; SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; SDAG-NEXT:    buffer_store_dword v0, off, s[16:19], 0
; SDAG-NEXT:    s_waitcnt vmcnt(0)
; SDAG-NEXT:    s_setpc_b64 s[30:31]
;
; GISEL-LABEL: store_v1i32:
; GISEL:       ; %bb.0:
; GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GISEL-NEXT:    buffer_store_dword v0, off, s[16:19], 0
; GISEL-NEXT:    s_waitcnt vmcnt(0)
; GISEL-NEXT:    s_setpc_b64 s[30:31]
  %p = addrspacecast ptr addrspace(8) %buf to ptr addrspace(7)
  store <1 x i32> %data, ptr addrspace(7) %p
  ret void
}

define <2 x i32> @load_v2i32(ptr addrspace(8) inreg %buf) {
; SDAG-LABEL: load_v2i32:
; SDAG:       ; %bb.0:
; SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; SDAG-NEXT:    buffer_load_dwordx2 v[0:1], off, s[16:19], 0
; SDAG-NEXT:    s_waitcnt vmcnt(0)
; SDAG-NEXT:    s_setpc_b64 s[30:31]
;
; GISEL-LABEL: load_v2i32:
; GISEL:       ; %bb.0:
; GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GISEL-NEXT:    buffer_load_dwordx2 v[0:1], off, s[16:19], 0
; GISEL-NEXT:    s_waitcnt vmcnt(0)
; GISEL-NEXT:    s_setpc_b64 s[30:31]
  %p = addrspacecast ptr addrspace(8) %buf to ptr addrspace(7)
  %ret = load <2 x i32>, ptr addrspace(7) %p
  ret <2 x i32> %ret
}

define void @store_v2i32(<2 x i32> %data, ptr addrspace(8) inreg %buf) {
; SDAG-LABEL: store_v2i32:
; SDAG:       ; %bb.0:
; SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; SDAG-NEXT:    buffer_store_dwordx2 v[0:1], off, s[16:19], 0
; SDAG-NEXT:    s_waitcnt vmcnt(0)
; SDAG-NEXT:    s_setpc_b64 s[30:31]
;
; GISEL-LABEL: store_v2i32:
; GISEL:       ; %bb.0:
; GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GISEL-NEXT:    buffer_store_dwordx2 v[0:1], off, s[16:19], 0
; GISEL-NEXT:    s_waitcnt vmcnt(0)
; GISEL-NEXT:    s_setpc_b64 s[30:31]
  %p = addrspacecast ptr addrspace(8) %buf to ptr addrspace(7)
  store <2 x i32> %data, ptr addrspace(7) %p
  ret void
}

define <3 x i32> @load_v3i32(ptr addrspace(8) inreg %buf) {
; SDAG-LABEL: load_v3i32:
; SDAG:       ; %bb.0:
; SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; SDAG-NEXT:    buffer_load_dwordx3 v[0:2], off, s[16:19], 0
; SDAG-NEXT:    s_waitcnt vmcnt(0)
; SDAG-NEXT:    s_setpc_b64 s[30:31]
;
; GISEL-LABEL: load_v3i32:
; GISEL:       ; %bb.0:
; GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GISEL-NEXT:    buffer_load_dwordx3 v[0:2], off, s[16:19], 0
; GISEL-NEXT:    s_waitcnt vmcnt(0)
; GISEL-NEXT:    s_setpc_b64 s[30:31]
  %p = addrspacecast ptr addrspace(8) %buf to ptr addrspace(7)
  %ret = load <3 x i32>, ptr addrspace(7) %p
  ret <3 x i32> %ret
}

define void @store_v3i32(<3 x i32> %data, ptr addrspace(8) inreg %buf) {
; SDAG-LABEL: store_v3i32:
; SDAG:       ; %bb.0:
; SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; SDAG-NEXT:    buffer_store_dwordx3 v[0:2], off, s[16:19], 0
; SDAG-NEXT:    s_waitcnt vmcnt(0)
; SDAG-NEXT:    s_setpc_b64 s[30:31]
;
; GISEL-LABEL: store_v3i32:
; GISEL:       ; %bb.0:
; GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GISEL-NEXT:    buffer_store_dwordx3 v[0:2], off, s[16:19], 0
; GISEL-NEXT:    s_waitcnt vmcnt(0)
; GISEL-NEXT:    s_setpc_b64 s[30:31]
  %p = addrspacecast ptr addrspace(8) %buf to ptr addrspace(7)
  store <3 x i32> %data, ptr addrspace(7) %p
  ret void
}

define <4 x i32> @load_v4i32(ptr addrspace(8) inreg %buf) {
; SDAG-LABEL: load_v4i32:
; SDAG:       ; %bb.0:
; SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; SDAG-NEXT:    buffer_load_dwordx4 v[0:3], off, s[16:19], 0
; SDAG-NEXT:    s_waitcnt vmcnt(0)
; SDAG-NEXT:    s_setpc_b64 s[30:31]
;
; GISEL-LABEL: load_v4i32:
; GISEL:       ; %bb.0:
; GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GISEL-NEXT:    buffer_load_dwordx4 v[0:3], off, s[16:19], 0
; GISEL-NEXT:    s_waitcnt vmcnt(0)
; GISEL-NEXT:    s_setpc_b64 s[30:31]
  %p = addrspacecast ptr addrspace(8) %buf to ptr addrspace(7)
  %ret = load <4 x i32>, ptr addrspace(7) %p
  ret <4 x i32> %ret
}

define void @store_v4i32(<4 x i32> %data, ptr addrspace(8) inreg %buf) {
; SDAG-LABEL: store_v4i32:
; SDAG:       ; %bb.0:
; SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; SDAG-NEXT:    buffer_store_dwordx4 v[0:3], off, s[16:19], 0
; SDAG-NEXT:    s_waitcnt vmcnt(0)
; SDAG-NEXT:    s_setpc_b64 s[30:31]
;
; GISEL-LABEL: store_v4i32:
; GISEL:       ; %bb.0:
; GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GISEL-NEXT:    buffer_store_dwordx4 v[0:3], off, s[16:19], 0
; GISEL-NEXT:    s_waitcnt vmcnt(0)
; GISEL-NEXT:    s_setpc_b64 s[30:31]
  %p = addrspacecast ptr addrspace(8) %buf to ptr addrspace(7)
  store <4 x i32> %data, ptr addrspace(7) %p
  ret void
}

define <2 x i16> @load_v2i16(ptr addrspace(8) inreg %buf) {
; SDAG-LABEL: load_v2i16:
; SDAG:       ; %bb.0:
; SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; SDAG-NEXT:    buffer_load_dword v0, off, s[16:19], 0
; SDAG-NEXT:    s_waitcnt vmcnt(0)
; SDAG-NEXT:    s_setpc_b64 s[30:31]
;
; GISEL-LABEL: load_v2i16:
; GISEL:       ; %bb.0:
; GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GISEL-NEXT:    buffer_load_dword v0, off, s[16:19], 0
; GISEL-NEXT:    s_waitcnt vmcnt(0)
; GISEL-NEXT:    s_setpc_b64 s[30:31]
  %p = addrspacecast ptr addrspace(8) %buf to ptr addrspace(7)
  %ret = load <2 x i16>, ptr addrspace(7) %p
  ret <2 x i16> %ret
}

define void @store_v2i16(<2 x i16> %data, ptr addrspace(8) inreg %buf) {
; SDAG-LABEL: store_v2i16:
; SDAG:       ; %bb.0:
; SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; SDAG-NEXT:    buffer_store_dword v0, off, s[16:19], 0
; SDAG-NEXT:    s_waitcnt vmcnt(0)
; SDAG-NEXT:    s_setpc_b64 s[30:31]
;
; GISEL-LABEL: store_v2i16:
; GISEL:       ; %bb.0:
; GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GISEL-NEXT:    buffer_store_dword v0, off, s[16:19], 0
; GISEL-NEXT:    s_waitcnt vmcnt(0)
; GISEL-NEXT:    s_setpc_b64 s[30:31]
  %p = addrspacecast ptr addrspace(8) %buf to ptr addrspace(7)
  store <2 x i16> %data, ptr addrspace(7) %p
  ret void
}

define <4 x i16> @load_v4i16(ptr addrspace(8) inreg %buf) {
; SDAG-LABEL: load_v4i16:
; SDAG:       ; %bb.0:
; SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; SDAG-NEXT:    buffer_load_dwordx2 v[0:1], off, s[16:19], 0
; SDAG-NEXT:    s_waitcnt vmcnt(0)
; SDAG-NEXT:    s_setpc_b64 s[30:31]
;
; GISEL-LABEL: load_v4i16:
; GISEL:       ; %bb.0:
; GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GISEL-NEXT:    buffer_load_dwordx2 v[0:1], off, s[16:19], 0
; GISEL-NEXT:    s_waitcnt vmcnt(0)
; GISEL-NEXT:    s_setpc_b64 s[30:31]
  %p = addrspacecast ptr addrspace(8) %buf to ptr addrspace(7)
  %ret = load <4 x i16>, ptr addrspace(7) %p
  ret <4 x i16> %ret
}

define void @store_v4i16(<4 x i16> %data, ptr addrspace(8) inreg %buf) {
; SDAG-LABEL: store_v4i16:
; SDAG:       ; %bb.0:
; SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; SDAG-NEXT:    buffer_store_dwordx2 v[0:1], off, s[16:19], 0
; SDAG-NEXT:    s_waitcnt vmcnt(0)
; SDAG-NEXT:    s_setpc_b64 s[30:31]
;
; GISEL-LABEL: store_v4i16:
; GISEL:       ; %bb.0:
; GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GISEL-NEXT:    buffer_store_dwordx2 v[0:1], off, s[16:19], 0
; GISEL-NEXT:    s_waitcnt vmcnt(0)
; GISEL-NEXT:    s_setpc_b64 s[30:31]
  %p = addrspacecast ptr addrspace(8) %buf to ptr addrspace(7)
  store <4 x i16> %data, ptr addrspace(7) %p
  ret void
}

define <8 x i16> @load_v8i16(ptr addrspace(8) inreg %buf) {
; SDAG-LABEL: load_v8i16:
; SDAG:       ; %bb.0:
; SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; SDAG-NEXT:    buffer_load_dwordx4 v[0:3], off, s[16:19], 0
; SDAG-NEXT:    s_waitcnt vmcnt(0)
; SDAG-NEXT:    s_setpc_b64 s[30:31]
;
; GISEL-LABEL: load_v8i16:
; GISEL:       ; %bb.0:
; GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GISEL-NEXT:    buffer_load_dwordx4 v[0:3], off, s[16:19], 0
; GISEL-NEXT:    s_waitcnt vmcnt(0)
; GISEL-NEXT:    s_setpc_b64 s[30:31]
  %p = addrspacecast ptr addrspace(8) %buf to ptr addrspace(7)
  %ret = load <8 x i16>, ptr addrspace(7) %p
  ret <8 x i16> %ret
}

define void @store_v8i16(<8 x i16> %data, ptr addrspace(8) inreg %buf) {
; SDAG-LABEL: store_v8i16:
; SDAG:       ; %bb.0:
; SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; SDAG-NEXT:    buffer_store_dwordx4 v[0:3], off, s[16:19], 0
; SDAG-NEXT:    s_waitcnt vmcnt(0)
; SDAG-NEXT:    s_setpc_b64 s[30:31]
;
; GISEL-LABEL: store_v8i16:
; GISEL:       ; %bb.0:
; GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GISEL-NEXT:    buffer_store_dwordx4 v[0:3], off, s[16:19], 0
; GISEL-NEXT:    s_waitcnt vmcnt(0)
; GISEL-NEXT:    s_setpc_b64 s[30:31]
  %p = addrspacecast ptr addrspace(8) %buf to ptr addrspace(7)
  store <8 x i16> %data, ptr addrspace(7) %p
  ret void
}

define <2 x i64> @load_v2i64(ptr addrspace(8) inreg %buf) {
; SDAG-LABEL: load_v2i64:
; SDAG:       ; %bb.0:
; SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; SDAG-NEXT:    buffer_load_dwordx4 v[0:3], off, s[16:19], 0
; SDAG-NEXT:    s_waitcnt vmcnt(0)
; SDAG-NEXT:    s_setpc_b64 s[30:31]
;
; GISEL-LABEL: load_v2i64:
; GISEL:       ; %bb.0:
; GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GISEL-NEXT:    buffer_load_dwordx4 v[0:3], off, s[16:19], 0
; GISEL-NEXT:    s_waitcnt vmcnt(0)
; GISEL-NEXT:    s_setpc_b64 s[30:31]
  %p = addrspacecast ptr addrspace(8) %buf to ptr addrspace(7)
  %ret = load <2 x i64>, ptr addrspace(7) %p
  ret <2 x i64> %ret
}

define void @store_v2i64(<2 x i64> %data, ptr addrspace(8) inreg %buf) {
; SDAG-LABEL: store_v2i64:
; SDAG:       ; %bb.0:
; SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; SDAG-NEXT:    buffer_store_dwordx4 v[0:3], off, s[16:19], 0
; SDAG-NEXT:    s_waitcnt vmcnt(0)
; SDAG-NEXT:    s_setpc_b64 s[30:31]
;
; GISEL-LABEL: store_v2i64:
; GISEL:       ; %bb.0:
; GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GISEL-NEXT:    buffer_store_dwordx4 v[0:3], off, s[16:19], 0
; GISEL-NEXT:    s_waitcnt vmcnt(0)
; GISEL-NEXT:    s_setpc_b64 s[30:31]
  %p = addrspacecast ptr addrspace(8) %buf to ptr addrspace(7)
  store <2 x i64> %data, ptr addrspace(7) %p
  ret void
}

define half @load_f16(ptr addrspace(8) inreg %buf) {
; SDAG-LABEL: load_f16:
; SDAG:       ; %bb.0:
; SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; SDAG-NEXT:    buffer_load_ushort v0, off, s[16:19], 0
; SDAG-NEXT:    s_waitcnt vmcnt(0)
; SDAG-NEXT:    s_setpc_b64 s[30:31]
;
; GISEL-LABEL: load_f16:
; GISEL:       ; %bb.0:
; GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GISEL-NEXT:    buffer_load_ushort v0, off, s[16:19], 0
; GISEL-NEXT:    s_waitcnt vmcnt(0)
; GISEL-NEXT:    s_setpc_b64 s[30:31]
  %p = addrspacecast ptr addrspace(8) %buf to ptr addrspace(7)
  %ret = load half, ptr addrspace(7) %p
  ret half %ret
}

define void @store_f16(half %data, ptr addrspace(8) inreg %buf) {
; SDAG-LABEL: store_f16:
; SDAG:       ; %bb.0:
; SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; SDAG-NEXT:    buffer_store_short v0, off, s[16:19], 0
; SDAG-NEXT:    s_waitcnt vmcnt(0)
; SDAG-NEXT:    s_setpc_b64 s[30:31]
;
; GISEL-LABEL: store_f16:
; GISEL:       ; %bb.0:
; GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GISEL-NEXT:    buffer_store_short v0, off, s[16:19], 0
; GISEL-NEXT:    s_waitcnt vmcnt(0)
; GISEL-NEXT:    s_setpc_b64 s[30:31]
  %p = addrspacecast ptr addrspace(8) %buf to ptr addrspace(7)
  store half %data, ptr addrspace(7) %p
  ret void
}

define bfloat @load_bf16(ptr addrspace(8) inreg %buf) {
; SDAG-LABEL: load_bf16:
; SDAG:       ; %bb.0:
; SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; SDAG-NEXT:    buffer_load_ushort v0, off, s[16:19], 0
; SDAG-NEXT:    s_waitcnt vmcnt(0)
; SDAG-NEXT:    s_setpc_b64 s[30:31]
;
; GISEL-LABEL: load_bf16:
; GISEL:       ; %bb.0:
; GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GISEL-NEXT:    buffer_load_ushort v0, off, s[16:19], 0
; GISEL-NEXT:    s_waitcnt vmcnt(0)
; GISEL-NEXT:    s_setpc_b64 s[30:31]
  %p = addrspacecast ptr addrspace(8) %buf to ptr addrspace(7)
  %ret = load bfloat, ptr addrspace(7) %p
  ret bfloat %ret
}

define void @store_bf16(bfloat %data, ptr addrspace(8) inreg %buf) {
; SDAG-LABEL: store_bf16:
; SDAG:       ; %bb.0:
; SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; SDAG-NEXT:    buffer_store_short v0, off, s[16:19], 0
; SDAG-NEXT:    s_waitcnt vmcnt(0)
; SDAG-NEXT:    s_setpc_b64 s[30:31]
;
; GISEL-LABEL: store_bf16:
; GISEL:       ; %bb.0:
; GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GISEL-NEXT:    buffer_store_short v0, off, s[16:19], 0
; GISEL-NEXT:    s_waitcnt vmcnt(0)
; GISEL-NEXT:    s_setpc_b64 s[30:31]
  %p = addrspacecast ptr addrspace(8) %buf to ptr addrspace(7)
  store bfloat %data, ptr addrspace(7) %p
  ret void
}

define <2 x half> @load_v2f16(ptr addrspace(8) inreg %buf) {
; SDAG-LABEL: load_v2f16:
; SDAG:       ; %bb.0:
; SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; SDAG-NEXT:    buffer_load_dword v0, off, s[16:19], 0
; SDAG-NEXT:    s_waitcnt vmcnt(0)
; SDAG-NEXT:    s_setpc_b64 s[30:31]
;
; GISEL-LABEL: load_v2f16:
; GISEL:       ; %bb.0:
; GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GISEL-NEXT:    buffer_load_dword v0, off, s[16:19], 0
; GISEL-NEXT:    s_waitcnt vmcnt(0)
; GISEL-NEXT:    s_setpc_b64 s[30:31]
  %p = addrspacecast ptr addrspace(8) %buf to ptr addrspace(7)
  %ret = load <2 x half>, ptr addrspace(7) %p
  ret <2 x half> %ret
}

define void @store_v2f16(<2 x half> %data, ptr addrspace(8) inreg %buf) {
; SDAG-LABEL: store_v2f16:
; SDAG:       ; %bb.0:
; SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; SDAG-NEXT:    buffer_store_dword v0, off, s[16:19], 0
; SDAG-NEXT:    s_waitcnt vmcnt(0)
; SDAG-NEXT:    s_setpc_b64 s[30:31]
;
; GISEL-LABEL: store_v2f16:
; GISEL:       ; %bb.0:
; GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GISEL-NEXT:    buffer_store_dword v0, off, s[16:19], 0
; GISEL-NEXT:    s_waitcnt vmcnt(0)
; GISEL-NEXT:    s_setpc_b64 s[30:31]
  %p = addrspacecast ptr addrspace(8) %buf to ptr addrspace(7)
  store <2 x half> %data, ptr addrspace(7) %p
  ret void
}

define <4 x bfloat> @load_v4bf16(ptr addrspace(8) inreg %buf) {
; SDAG-LABEL: load_v4bf16:
; SDAG:       ; %bb.0:
; SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; SDAG-NEXT:    buffer_load_dwordx2 v[0:1], off, s[16:19], 0
; SDAG-NEXT:    s_waitcnt vmcnt(0)
; SDAG-NEXT:    s_setpc_b64 s[30:31]
;
; GISEL-LABEL: load_v4bf16:
; GISEL:       ; %bb.0:
; GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GISEL-NEXT:    buffer_load_dwordx2 v[0:1], off, s[16:19], 0
; GISEL-NEXT:    s_waitcnt vmcnt(0)
; GISEL-NEXT:    s_setpc_b64 s[30:31]
  %p = addrspacecast ptr addrspace(8) %buf to ptr addrspace(7)
  %ret = load <4 x bfloat>, ptr addrspace(7) %p
  ret <4 x bfloat> %ret
}

define void @store_v4bf16(<4 x bfloat> %data, ptr addrspace(8) inreg %buf) {
; SDAG-LABEL: store_v4bf16:
; SDAG:       ; %bb.0:
; SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; SDAG-NEXT:    buffer_store_dwordx2 v[0:1], off, s[16:19], 0
; SDAG-NEXT:    s_waitcnt vmcnt(0)
; SDAG-NEXT:    s_setpc_b64 s[30:31]
;
; GISEL-LABEL: store_v4bf16:
; GISEL:       ; %bb.0:
; GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GISEL-NEXT:    buffer_store_dwordx2 v[0:1], off, s[16:19], 0
; GISEL-NEXT:    s_waitcnt vmcnt(0)
; GISEL-NEXT:    s_setpc_b64 s[30:31]
  %p = addrspacecast ptr addrspace(8) %buf to ptr addrspace(7)
  store <4 x bfloat> %data, ptr addrspace(7) %p
  ret void
}

define <8 x half> @load_v8f16(ptr addrspace(8) inreg %buf) {
; SDAG-LABEL: load_v8f16:
; SDAG:       ; %bb.0:
; SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; SDAG-NEXT:    buffer_load_dwordx4 v[0:3], off, s[16:19], 0
; SDAG-NEXT:    s_waitcnt vmcnt(0)
; SDAG-NEXT:    s_setpc_b64 s[30:31]
;
; GISEL-LABEL: load_v8f16:
; GISEL:       ; %bb.0:
; GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GISEL-NEXT:    buffer_load_dwordx4 v[0:3], off, s[16:19], 0
; GISEL-NEXT:    s_waitcnt vmcnt(0)
; GISEL-NEXT:    s_setpc_b64 s[30:31]
  %p = addrspacecast ptr addrspace(8) %buf to ptr addrspace(7)
  %ret = load <8 x half>, ptr addrspace(7) %p
  ret <8 x half> %ret
}

define void @store_v8f16(<8 x half> %data, ptr addrspace(8) inreg %buf) {
; SDAG-LABEL: store_v8f16:
; SDAG:       ; %bb.0:
; SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; SDAG-NEXT:    buffer_store_dwordx4 v[0:3], off, s[16:19], 0
; SDAG-NEXT:    s_waitcnt vmcnt(0)
; SDAG-NEXT:    s_setpc_b64 s[30:31]
;
; GISEL-LABEL: store_v8f16:
; GISEL:       ; %bb.0:
; GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GISEL-NEXT:    buffer_store_dwordx4 v[0:3], off, s[16:19], 0
; GISEL-NEXT:    s_waitcnt vmcnt(0)
; GISEL-NEXT:    s_setpc_b64 s[30:31]
  %p = addrspacecast ptr addrspace(8) %buf to ptr addrspace(7)
  store <8 x half> %data, ptr addrspace(7) %p
  ret void
}

define float @load_f32(ptr addrspace(8) inreg %buf) {
; SDAG-LABEL: load_f32:
; SDAG:       ; %bb.0:
; SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; SDAG-NEXT:    buffer_load_dword v0, off, s[16:19], 0
; SDAG-NEXT:    s_waitcnt vmcnt(0)
; SDAG-NEXT:    s_setpc_b64 s[30:31]
;
; GISEL-LABEL: load_f32:
; GISEL:       ; %bb.0:
; GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GISEL-NEXT:    buffer_load_dword v0, off, s[16:19], 0
; GISEL-NEXT:    s_waitcnt vmcnt(0)
; GISEL-NEXT:    s_setpc_b64 s[30:31]
  %p = addrspacecast ptr addrspace(8) %buf to ptr addrspace(7)
  %ret = load float, ptr addrspace(7) %p
  ret float %ret
}

define void @store_f32(float %data, ptr addrspace(8) inreg %buf) {
; SDAG-LABEL: store_f32:
; SDAG:       ; %bb.0:
; SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; SDAG-NEXT:    buffer_store_dword v0, off, s[16:19], 0
; SDAG-NEXT:    s_waitcnt vmcnt(0)
; SDAG-NEXT:    s_setpc_b64 s[30:31]
;
; GISEL-LABEL: store_f32:
; GISEL:       ; %bb.0:
; GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GISEL-NEXT:    buffer_store_dword v0, off, s[16:19], 0
; GISEL-NEXT:    s_waitcnt vmcnt(0)
; GISEL-NEXT:    s_setpc_b64 s[30:31]
  %p = addrspacecast ptr addrspace(8) %buf to ptr addrspace(7)
  store float %data, ptr addrspace(7) %p
  ret void
}

define <2 x float> @load_v2f32(ptr addrspace(8) inreg %buf) {
; SDAG-LABEL: load_v2f32:
; SDAG:       ; %bb.0:
; SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; SDAG-NEXT:    buffer_load_dwordx2 v[0:1], off, s[16:19], 0
; SDAG-NEXT:    s_waitcnt vmcnt(0)
; SDAG-NEXT:    s_setpc_b64 s[30:31]
;
; GISEL-LABEL: load_v2f32:
; GISEL:       ; %bb.0:
; GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GISEL-NEXT:    buffer_load_dwordx2 v[0:1], off, s[16:19], 0
; GISEL-NEXT:    s_waitcnt vmcnt(0)
; GISEL-NEXT:    s_setpc_b64 s[30:31]
  %p = addrspacecast ptr addrspace(8) %buf to ptr addrspace(7)
  %ret = load <2 x float>, ptr addrspace(7) %p
  ret <2 x float> %ret
}

define void @store_v2f32(<2 x float> %data, ptr addrspace(8) inreg %buf) {
; SDAG-LABEL: store_v2f32:
; SDAG:       ; %bb.0:
; SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; SDAG-NEXT:    buffer_store_dwordx2 v[0:1], off, s[16:19], 0
; SDAG-NEXT:    s_waitcnt vmcnt(0)
; SDAG-NEXT:    s_setpc_b64 s[30:31]
;
; GISEL-LABEL: store_v2f32:
; GISEL:       ; %bb.0:
; GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GISEL-NEXT:    buffer_store_dwordx2 v[0:1], off, s[16:19], 0
; GISEL-NEXT:    s_waitcnt vmcnt(0)
; GISEL-NEXT:    s_setpc_b64 s[30:31]
  %p = addrspacecast ptr addrspace(8) %buf to ptr addrspace(7)
  store <2 x float> %data, ptr addrspace(7) %p
  ret void
}

define <3 x float> @load_v3f32(ptr addrspace(8) inreg %buf) {
; SDAG-LABEL: load_v3f32:
; SDAG:       ; %bb.0:
; SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; SDAG-NEXT:    buffer_load_dwordx3 v[0:2], off, s[16:19], 0
; SDAG-NEXT:    s_waitcnt vmcnt(0)
; SDAG-NEXT:    s_setpc_b64 s[30:31]
;
; GISEL-LABEL: load_v3f32:
; GISEL:       ; %bb.0:
; GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GISEL-NEXT:    buffer_load_dwordx3 v[0:2], off, s[16:19], 0
; GISEL-NEXT:    s_waitcnt vmcnt(0)
; GISEL-NEXT:    s_setpc_b64 s[30:31]
  %p = addrspacecast ptr addrspace(8) %buf to ptr addrspace(7)
  %ret = load <3 x float>, ptr addrspace(7) %p
  ret <3 x float> %ret
}

define void @store_v3f32(<3 x float> %data, ptr addrspace(8) inreg %buf) {
; SDAG-LABEL: store_v3f32:
; SDAG:       ; %bb.0:
; SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; SDAG-NEXT:    buffer_store_dwordx3 v[0:2], off, s[16:19], 0
; SDAG-NEXT:    s_waitcnt vmcnt(0)
; SDAG-NEXT:    s_setpc_b64 s[30:31]
;
; GISEL-LABEL: store_v3f32:
; GISEL:       ; %bb.0:
; GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GISEL-NEXT:    buffer_store_dwordx3 v[0:2], off, s[16:19], 0
; GISEL-NEXT:    s_waitcnt vmcnt(0)
; GISEL-NEXT:    s_setpc_b64 s[30:31]
  %p = addrspacecast ptr addrspace(8) %buf to ptr addrspace(7)
  store <3 x float> %data, ptr addrspace(7) %p
  ret void
}

define <4 x float> @load_v4f32(ptr addrspace(8) inreg %buf) {
; SDAG-LABEL: load_v4f32:
; SDAG:       ; %bb.0:
; SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; SDAG-NEXT:    buffer_load_dwordx4 v[0:3], off, s[16:19], 0
; SDAG-NEXT:    s_waitcnt vmcnt(0)
; SDAG-NEXT:    s_setpc_b64 s[30:31]
;
; GISEL-LABEL: load_v4f32:
; GISEL:       ; %bb.0:
; GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GISEL-NEXT:    buffer_load_dwordx4 v[0:3], off, s[16:19], 0
; GISEL-NEXT:    s_waitcnt vmcnt(0)
; GISEL-NEXT:    s_setpc_b64 s[30:31]
  %p = addrspacecast ptr addrspace(8) %buf to ptr addrspace(7)
  %ret = load <4 x float>, ptr addrspace(7) %p
  ret <4 x float> %ret
}

define void @store_v4f32(<4 x float> %data, ptr addrspace(8) inreg %buf) {
; SDAG-LABEL: store_v4f32:
; SDAG:       ; %bb.0:
; SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; SDAG-NEXT:    buffer_store_dwordx4 v[0:3], off, s[16:19], 0
; SDAG-NEXT:    s_waitcnt vmcnt(0)
; SDAG-NEXT:    s_setpc_b64 s[30:31]
;
; GISEL-LABEL: store_v4f32:
; GISEL:       ; %bb.0:
; GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GISEL-NEXT:    buffer_store_dwordx4 v[0:3], off, s[16:19], 0
; GISEL-NEXT:    s_waitcnt vmcnt(0)
; GISEL-NEXT:    s_setpc_b64 s[30:31]
  %p = addrspacecast ptr addrspace(8) %buf to ptr addrspace(7)
  store <4 x float> %data, ptr addrspace(7) %p
  ret void
}

define ptr addrspace(0) @load_p0(ptr addrspace(8) inreg %buf) {
; SDAG-LABEL: load_p0:
; SDAG:       ; %bb.0:
; SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; SDAG-NEXT:    buffer_load_dwordx2 v[0:1], off, s[16:19], 0
; SDAG-NEXT:    s_waitcnt vmcnt(0)
; SDAG-NEXT:    s_setpc_b64 s[30:31]
;
; GISEL-LABEL: load_p0:
; GISEL:       ; %bb.0:
; GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GISEL-NEXT:    buffer_load_dwordx2 v[0:1], off, s[16:19], 0
; GISEL-NEXT:    s_waitcnt vmcnt(0)
; GISEL-NEXT:    s_setpc_b64 s[30:31]
  %p = addrspacecast ptr addrspace(8) %buf to ptr addrspace(7)
  %ret = load ptr addrspace(0), ptr addrspace(7) %p
  ret ptr addrspace(0) %ret
}

define void @store_p0(ptr addrspace(0) %data, ptr addrspace(8) inreg %buf) {
; SDAG-LABEL: store_p0:
; SDAG:       ; %bb.0:
; SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; SDAG-NEXT:    buffer_store_dwordx2 v[0:1], off, s[16:19], 0
; SDAG-NEXT:    s_waitcnt vmcnt(0)
; SDAG-NEXT:    s_setpc_b64 s[30:31]
;
; GISEL-LABEL: store_p0:
; GISEL:       ; %bb.0:
; GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GISEL-NEXT:    buffer_store_dwordx2 v[0:1], off, s[16:19], 0
; GISEL-NEXT:    s_waitcnt vmcnt(0)
; GISEL-NEXT:    s_setpc_b64 s[30:31]
  %p = addrspacecast ptr addrspace(8) %buf to ptr addrspace(7)
  store ptr addrspace(0) %data, ptr addrspace(7) %p
  ret void
}

define ptr addrspace(1) @load_p1(ptr addrspace(8) inreg %buf) {
; SDAG-LABEL: load_p1:
; SDAG:       ; %bb.0:
; SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; SDAG-NEXT:    buffer_load_dwordx2 v[0:1], off, s[16:19], 0
; SDAG-NEXT:    s_waitcnt vmcnt(0)
; SDAG-NEXT:    s_setpc_b64 s[30:31]
;
; GISEL-LABEL: load_p1:
; GISEL:       ; %bb.0:
; GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GISEL-NEXT:    buffer_load_dwordx2 v[0:1], off, s[16:19], 0
; GISEL-NEXT:    s_waitcnt vmcnt(0)
; GISEL-NEXT:    s_setpc_b64 s[30:31]
  %p = addrspacecast ptr addrspace(8) %buf to ptr addrspace(7)
  %ret = load ptr addrspace(1), ptr addrspace(7) %p
  ret ptr addrspace(1) %ret
}

define void @store_p1(ptr addrspace(1) %data, ptr addrspace(8) inreg %buf) {
; SDAG-LABEL: store_p1:
; SDAG:       ; %bb.0:
; SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; SDAG-NEXT:    buffer_store_dwordx2 v[0:1], off, s[16:19], 0
; SDAG-NEXT:    s_waitcnt vmcnt(0)
; SDAG-NEXT:    s_setpc_b64 s[30:31]
;
; GISEL-LABEL: store_p1:
; GISEL:       ; %bb.0:
; GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GISEL-NEXT:    buffer_store_dwordx2 v[0:1], off, s[16:19], 0
; GISEL-NEXT:    s_waitcnt vmcnt(0)
; GISEL-NEXT:    s_setpc_b64 s[30:31]
  %p = addrspacecast ptr addrspace(8) %buf to ptr addrspace(7)
  store ptr addrspace(1) %data, ptr addrspace(7) %p
  ret void
}

define ptr addrspace(2) @load_p2(ptr addrspace(8) inreg %buf) {
; SDAG-LABEL: load_p2:
; SDAG:       ; %bb.0:
; SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; SDAG-NEXT:    buffer_load_dword v0, off, s[16:19], 0
; SDAG-NEXT:    s_waitcnt vmcnt(0)
; SDAG-NEXT:    s_setpc_b64 s[30:31]
;
; GISEL-LABEL: load_p2:
; GISEL:       ; %bb.0:
; GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GISEL-NEXT:    buffer_load_dword v0, off, s[16:19], 0
; GISEL-NEXT:    s_waitcnt vmcnt(0)
; GISEL-NEXT:    s_setpc_b64 s[30:31]
  %p = addrspacecast ptr addrspace(8) %buf to ptr addrspace(7)
  %ret = load ptr addrspace(2), ptr addrspace(7) %p
  ret ptr addrspace(2) %ret
}

define void @store_p2(ptr addrspace(2) %data, ptr addrspace(8) inreg %buf) {
; SDAG-LABEL: store_p2:
; SDAG:       ; %bb.0:
; SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; SDAG-NEXT:    buffer_store_dword v0, off, s[16:19], 0
; SDAG-NEXT:    s_waitcnt vmcnt(0)
; SDAG-NEXT:    s_setpc_b64 s[30:31]
;
; GISEL-LABEL: store_p2:
; GISEL:       ; %bb.0:
; GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GISEL-NEXT:    buffer_store_dword v0, off, s[16:19], 0
; GISEL-NEXT:    s_waitcnt vmcnt(0)
; GISEL-NEXT:    s_setpc_b64 s[30:31]
  %p = addrspacecast ptr addrspace(8) %buf to ptr addrspace(7)
  store ptr addrspace(2) %data, ptr addrspace(7) %p
  ret void
}

define ptr addrspace(3) @load_p3(ptr addrspace(8) inreg %buf) {
; SDAG-LABEL: load_p3:
; SDAG:       ; %bb.0:
; SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; SDAG-NEXT:    buffer_load_dword v0, off, s[16:19], 0
; SDAG-NEXT:    s_waitcnt vmcnt(0)
; SDAG-NEXT:    s_setpc_b64 s[30:31]
;
; GISEL-LABEL: load_p3:
; GISEL:       ; %bb.0:
; GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GISEL-NEXT:    buffer_load_dword v0, off, s[16:19], 0
; GISEL-NEXT:    s_waitcnt vmcnt(0)
; GISEL-NEXT:    s_setpc_b64 s[30:31]
  %p = addrspacecast ptr addrspace(8) %buf to ptr addrspace(7)
  %ret = load ptr addrspace(3), ptr addrspace(7) %p
  ret ptr addrspace(3) %ret
}

define void @store_p3(ptr addrspace(3) %data, ptr addrspace(8) inreg %buf) {
; SDAG-LABEL: store_p3:
; SDAG:       ; %bb.0:
; SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; SDAG-NEXT:    buffer_store_dword v0, off, s[16:19], 0
; SDAG-NEXT:    s_waitcnt vmcnt(0)
; SDAG-NEXT:    s_setpc_b64 s[30:31]
;
; GISEL-LABEL: store_p3:
; GISEL:       ; %bb.0:
; GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GISEL-NEXT:    buffer_store_dword v0, off, s[16:19], 0
; GISEL-NEXT:    s_waitcnt vmcnt(0)
; GISEL-NEXT:    s_setpc_b64 s[30:31]
  %p = addrspacecast ptr addrspace(8) %buf to ptr addrspace(7)
  store ptr addrspace(3) %data, ptr addrspace(7) %p
  ret void
}

define ptr addrspace(4) @load_p4(ptr addrspace(8) inreg %buf) {
; SDAG-LABEL: load_p4:
; SDAG:       ; %bb.0:
; SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; SDAG-NEXT:    buffer_load_dwordx2 v[0:1], off, s[16:19], 0
; SDAG-NEXT:    s_waitcnt vmcnt(0)
; SDAG-NEXT:    s_setpc_b64 s[30:31]
;
; GISEL-LABEL: load_p4:
; GISEL:       ; %bb.0:
; GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GISEL-NEXT:    buffer_load_dwordx2 v[0:1], off, s[16:19], 0
; GISEL-NEXT:    s_waitcnt vmcnt(0)
; GISEL-NEXT:    s_setpc_b64 s[30:31]
  %p = addrspacecast ptr addrspace(8) %buf to ptr addrspace(7)
  %ret = load ptr addrspace(4), ptr addrspace(7) %p
  ret ptr addrspace(4) %ret
}

define void @store_p4(ptr addrspace(4) %data, ptr addrspace(8) inreg %buf) {
; SDAG-LABEL: store_p4:
; SDAG:       ; %bb.0:
; SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; SDAG-NEXT:    buffer_store_dwordx2 v[0:1], off, s[16:19], 0
; SDAG-NEXT:    s_waitcnt vmcnt(0)
; SDAG-NEXT:    s_setpc_b64 s[30:31]
;
; GISEL-LABEL: store_p4:
; GISEL:       ; %bb.0:
; GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GISEL-NEXT:    buffer_store_dwordx2 v[0:1], off, s[16:19], 0
; GISEL-NEXT:    s_waitcnt vmcnt(0)
; GISEL-NEXT:    s_setpc_b64 s[30:31]
  %p = addrspacecast ptr addrspace(8) %buf to ptr addrspace(7)
  store ptr addrspace(4) %data, ptr addrspace(7) %p
  ret void
}

define ptr addrspace(5) @load_p5(ptr addrspace(8) inreg %buf) {
; SDAG-LABEL: load_p5:
; SDAG:       ; %bb.0:
; SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; SDAG-NEXT:    buffer_load_dword v0, off, s[16:19], 0
; SDAG-NEXT:    s_waitcnt vmcnt(0)
; SDAG-NEXT:    s_setpc_b64 s[30:31]
;
; GISEL-LABEL: load_p5:
; GISEL:       ; %bb.0:
; GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GISEL-NEXT:    buffer_load_dword v0, off, s[16:19], 0
; GISEL-NEXT:    s_waitcnt vmcnt(0)
; GISEL-NEXT:    s_setpc_b64 s[30:31]
  %p = addrspacecast ptr addrspace(8) %buf to ptr addrspace(7)
  %ret = load ptr addrspace(5), ptr addrspace(7) %p
  ret ptr addrspace(5) %ret
}

define void @store_p5(ptr addrspace(5) %data, ptr addrspace(8) inreg %buf) {
; SDAG-LABEL: store_p5:
; SDAG:       ; %bb.0:
; SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; SDAG-NEXT:    buffer_store_dword v0, off, s[16:19], 0
; SDAG-NEXT:    s_waitcnt vmcnt(0)
; SDAG-NEXT:    s_setpc_b64 s[30:31]
;
; GISEL-LABEL: store_p5:
; GISEL:       ; %bb.0:
; GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GISEL-NEXT:    buffer_store_dword v0, off, s[16:19], 0
; GISEL-NEXT:    s_waitcnt vmcnt(0)
; GISEL-NEXT:    s_setpc_b64 s[30:31]
  %p = addrspacecast ptr addrspace(8) %buf to ptr addrspace(7)
  store ptr addrspace(5) %data, ptr addrspace(7) %p
  ret void
}

define ptr addrspace(6) @load_p6(ptr addrspace(8) inreg %buf) {
; SDAG-LABEL: load_p6:
; SDAG:       ; %bb.0:
; SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; SDAG-NEXT:    buffer_load_dword v0, off, s[16:19], 0
; SDAG-NEXT:    s_waitcnt vmcnt(0)
; SDAG-NEXT:    s_setpc_b64 s[30:31]
;
; GISEL-LABEL: load_p6:
; GISEL:       ; %bb.0:
; GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GISEL-NEXT:    buffer_load_dword v0, off, s[16:19], 0
; GISEL-NEXT:    s_waitcnt vmcnt(0)
; GISEL-NEXT:    s_setpc_b64 s[30:31]
  %p = addrspacecast ptr addrspace(8) %buf to ptr addrspace(7)
  %ret = load ptr addrspace(6), ptr addrspace(7) %p
  ret ptr addrspace(6) %ret
}

define void @store_p6(ptr addrspace(6) %data, ptr addrspace(8) inreg %buf) {
; SDAG-LABEL: store_p6:
; SDAG:       ; %bb.0:
; SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; SDAG-NEXT:    buffer_store_dword v0, off, s[16:19], 0
; SDAG-NEXT:    s_waitcnt vmcnt(0)
; SDAG-NEXT:    s_setpc_b64 s[30:31]
;
; GISEL-LABEL: store_p6:
; GISEL:       ; %bb.0:
; GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GISEL-NEXT:    buffer_store_dword v0, off, s[16:19], 0
; GISEL-NEXT:    s_waitcnt vmcnt(0)
; GISEL-NEXT:    s_setpc_b64 s[30:31]
  %p = addrspacecast ptr addrspace(8) %buf to ptr addrspace(7)
  store ptr addrspace(6) %data, ptr addrspace(7) %p
  ret void
}

define ptr addrspace(8) @load_p8(ptr addrspace(8) inreg %buf) {
; SDAG-LABEL: load_p8:
; SDAG:       ; %bb.0:
; SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; SDAG-NEXT:    buffer_load_dwordx4 v[0:3], off, s[16:19], 0
; SDAG-NEXT:    s_waitcnt vmcnt(0)
; SDAG-NEXT:    s_setpc_b64 s[30:31]
;
; GISEL-LABEL: load_p8:
; GISEL:       ; %bb.0:
; GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GISEL-NEXT:    buffer_load_dwordx4 v[0:3], off, s[16:19], 0
; GISEL-NEXT:    s_waitcnt vmcnt(0)
; GISEL-NEXT:    s_setpc_b64 s[30:31]
  %p = addrspacecast ptr addrspace(8) %buf to ptr addrspace(7)
  %ret = load ptr addrspace(8), ptr addrspace(7) %p
  ret ptr addrspace(8) %ret
}

define void @store_p8(ptr addrspace(8) %data, ptr addrspace(8) inreg %buf) {
; SDAG-LABEL: store_p8:
; SDAG:       ; %bb.0:
; SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; SDAG-NEXT:    buffer_store_dwordx4 v[0:3], off, s[16:19], 0
; SDAG-NEXT:    s_waitcnt vmcnt(0)
; SDAG-NEXT:    s_setpc_b64 s[30:31]
;
; GISEL-LABEL: store_p8:
; GISEL:       ; %bb.0:
; GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GISEL-NEXT:    buffer_store_dwordx4 v[0:3], off, s[16:19], 0
; GISEL-NEXT:    s_waitcnt vmcnt(0)
; GISEL-NEXT:    s_setpc_b64 s[30:31]
  %p = addrspacecast ptr addrspace(8) %buf to ptr addrspace(7)
  store ptr addrspace(8) %data, ptr addrspace(7) %p
  ret void
}

define <2 x ptr addrspace(1)> @load_v2p1(ptr addrspace(8) inreg %buf) {
; SDAG-LABEL: load_v2p1:
; SDAG:       ; %bb.0:
; SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; SDAG-NEXT:    buffer_load_dwordx4 v[0:3], off, s[16:19], 0
; SDAG-NEXT:    s_waitcnt vmcnt(0)
; SDAG-NEXT:    s_setpc_b64 s[30:31]
;
; GISEL-LABEL: load_v2p1:
; GISEL:       ; %bb.0:
; GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GISEL-NEXT:    buffer_load_dwordx4 v[0:3], off, s[16:19], 0
; GISEL-NEXT:    s_waitcnt vmcnt(0)
; GISEL-NEXT:    s_setpc_b64 s[30:31]
  %p = addrspacecast ptr addrspace(8) %buf to ptr addrspace(7)
  %ret = load <2 x ptr addrspace(1)>, ptr addrspace(7) %p
  ret <2 x ptr addrspace(1)> %ret
}

define void @store_v2p1(<2 x ptr addrspace(1)> %data, ptr addrspace(8) inreg %buf) {
; SDAG-LABEL: store_v2p1:
; SDAG:       ; %bb.0:
; SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; SDAG-NEXT:    buffer_store_dwordx4 v[0:3], off, s[16:19], 0
; SDAG-NEXT:    s_waitcnt vmcnt(0)
; SDAG-NEXT:    s_setpc_b64 s[30:31]
;
; GISEL-LABEL: store_v2p1:
; GISEL:       ; %bb.0:
; GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GISEL-NEXT:    buffer_store_dwordx4 v[0:3], off, s[16:19], 0
; GISEL-NEXT:    s_waitcnt vmcnt(0)
; GISEL-NEXT:    s_setpc_b64 s[30:31]
  %p = addrspacecast ptr addrspace(8) %buf to ptr addrspace(7)
  store <2 x ptr addrspace(1)> %data, ptr addrspace(7) %p
  ret void
}

define <2 x ptr addrspace(5)> @load_v2p5(ptr addrspace(8) inreg %buf) {
; SDAG-LABEL: load_v2p5:
; SDAG:       ; %bb.0:
; SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; SDAG-NEXT:    buffer_load_dwordx2 v[0:1], off, s[16:19], 0
; SDAG-NEXT:    s_waitcnt vmcnt(0)
; SDAG-NEXT:    s_setpc_b64 s[30:31]
;
; GISEL-LABEL: load_v2p5:
; GISEL:       ; %bb.0:
; GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GISEL-NEXT:    buffer_load_dwordx2 v[0:1], off, s[16:19], 0
; GISEL-NEXT:    s_waitcnt vmcnt(0)
; GISEL-NEXT:    s_setpc_b64 s[30:31]
  %p = addrspacecast ptr addrspace(8) %buf to ptr addrspace(7)
  %ret = load <2 x ptr addrspace(5)>, ptr addrspace(7) %p
  ret <2 x ptr addrspace(5)> %ret
}

define void @store_v2p5(<2 x ptr addrspace(5)> %data, ptr addrspace(8) inreg %buf) {
; SDAG-LABEL: store_v2p5:
; SDAG:       ; %bb.0:
; SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; SDAG-NEXT:    buffer_store_dwordx2 v[0:1], off, s[16:19], 0
; SDAG-NEXT:    s_waitcnt vmcnt(0)
; SDAG-NEXT:    s_setpc_b64 s[30:31]
;
; GISEL-LABEL: store_v2p5:
; GISEL:       ; %bb.0:
; GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GISEL-NEXT:    buffer_store_dwordx2 v[0:1], off, s[16:19], 0
; GISEL-NEXT:    s_waitcnt vmcnt(0)
; GISEL-NEXT:    s_setpc_b64 s[30:31]
  %p = addrspacecast ptr addrspace(8) %buf to ptr addrspace(7)
  store <2 x ptr addrspace(5)> %data, ptr addrspace(7) %p
  ret void
}

define <3 x ptr addrspace(5)> @load_v3p5(ptr addrspace(8) inreg %buf) {
; SDAG-LABEL: load_v3p5:
; SDAG:       ; %bb.0:
; SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; SDAG-NEXT:    buffer_load_dwordx3 v[0:2], off, s[16:19], 0
; SDAG-NEXT:    s_waitcnt vmcnt(0)
; SDAG-NEXT:    s_setpc_b64 s[30:31]
;
; GISEL-LABEL: load_v3p5:
; GISEL:       ; %bb.0:
; GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GISEL-NEXT:    buffer_load_dwordx3 v[0:2], off, s[16:19], 0
; GISEL-NEXT:    s_waitcnt vmcnt(0)
; GISEL-NEXT:    s_setpc_b64 s[30:31]
  %p = addrspacecast ptr addrspace(8) %buf to ptr addrspace(7)
  %ret = load <3 x ptr addrspace(5)>, ptr addrspace(7) %p
  ret <3 x ptr addrspace(5)> %ret
}

define void @store_v3p5(<3 x ptr addrspace(5)> %data, ptr addrspace(8) inreg %buf) {
; SDAG-LABEL: store_v3p5:
; SDAG:       ; %bb.0:
; SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; SDAG-NEXT:    buffer_store_dwordx3 v[0:2], off, s[16:19], 0
; SDAG-NEXT:    s_waitcnt vmcnt(0)
; SDAG-NEXT:    s_setpc_b64 s[30:31]
;
; GISEL-LABEL: store_v3p5:
; GISEL:       ; %bb.0:
; GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GISEL-NEXT:    buffer_store_dwordx3 v[0:2], off, s[16:19], 0
; GISEL-NEXT:    s_waitcnt vmcnt(0)
; GISEL-NEXT:    s_setpc_b64 s[30:31]
  %p = addrspacecast ptr addrspace(8) %buf to ptr addrspace(7)
  store <3 x ptr addrspace(5)> %data, ptr addrspace(7) %p
  ret void
}

define <4 x ptr addrspace(5)> @load_v4p5(ptr addrspace(8) inreg %buf) {
; SDAG-LABEL: load_v4p5:
; SDAG:       ; %bb.0:
; SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; SDAG-NEXT:    buffer_load_dwordx4 v[0:3], off, s[16:19], 0
; SDAG-NEXT:    s_waitcnt vmcnt(0)
; SDAG-NEXT:    s_setpc_b64 s[30:31]
;
; GISEL-LABEL: load_v4p5:
; GISEL:       ; %bb.0:
; GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GISEL-NEXT:    buffer_load_dwordx4 v[0:3], off, s[16:19], 0
; GISEL-NEXT:    s_waitcnt vmcnt(0)
; GISEL-NEXT:    s_setpc_b64 s[30:31]
  %p = addrspacecast ptr addrspace(8) %buf to ptr addrspace(7)
  %ret = load <4 x ptr addrspace(5)>, ptr addrspace(7) %p
  ret <4 x ptr addrspace(5)> %ret
}

define void @store_v4p5(<4 x ptr addrspace(5)> %data, ptr addrspace(8) inreg %buf) {
; SDAG-LABEL: store_v4p5:
; SDAG:       ; %bb.0:
; SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; SDAG-NEXT:    buffer_store_dwordx4 v[0:3], off, s[16:19], 0
; SDAG-NEXT:    s_waitcnt vmcnt(0)
; SDAG-NEXT:    s_setpc_b64 s[30:31]
;
; GISEL-LABEL: store_v4p5:
; GISEL:       ; %bb.0:
; GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GISEL-NEXT:    buffer_store_dwordx4 v[0:3], off, s[16:19], 0
; GISEL-NEXT:    s_waitcnt vmcnt(0)
; GISEL-NEXT:    s_setpc_b64 s[30:31]
  %p = addrspacecast ptr addrspace(8) %buf to ptr addrspace(7)
  store <4 x ptr addrspace(5)> %data, ptr addrspace(7) %p
  ret void
}

;;; 3 words in a short type. These need to be bitcast to <3 x i32> to be supported.

define <6 x half> @load_v6f16(ptr addrspace(8) inreg %buf) {
; SDAG-LABEL: load_v6f16:
; SDAG:       ; %bb.0:
; SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; SDAG-NEXT:    buffer_load_dwordx3 v[0:2], off, s[16:19], 0
; SDAG-NEXT:    s_waitcnt vmcnt(0)
; SDAG-NEXT:    s_setpc_b64 s[30:31]
;
; GISEL-LABEL: load_v6f16:
; GISEL:       ; %bb.0:
; GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GISEL-NEXT:    buffer_load_dwordx3 v[0:2], off, s[16:19], 0
; GISEL-NEXT:    s_waitcnt vmcnt(0)
; GISEL-NEXT:    s_setpc_b64 s[30:31]
  %p = addrspacecast ptr addrspace(8) %buf to ptr addrspace(7)
  %ret = load <6 x half>, ptr addrspace(7) %p
  ret <6 x half> %ret
}

define void @store_v6f16(<6 x half> %data, ptr addrspace(8) inreg %buf) {
; SDAG-LABEL: store_v6f16:
; SDAG:       ; %bb.0:
; SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; SDAG-NEXT:    buffer_store_dwordx3 v[0:2], off, s[16:19], 0
; SDAG-NEXT:    s_waitcnt vmcnt(0)
; SDAG-NEXT:    s_setpc_b64 s[30:31]
;
; GISEL-LABEL: store_v6f16:
; GISEL:       ; %bb.0:
; GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GISEL-NEXT:    buffer_store_dwordx3 v[0:2], off, s[16:19], 0
; GISEL-NEXT:    s_waitcnt vmcnt(0)
; GISEL-NEXT:    s_setpc_b64 s[30:31]
  %p = addrspacecast ptr addrspace(8) %buf to ptr addrspace(7)
  store <6 x half> %data, ptr addrspace(7) %p
  ret void
}

;;; Long types (32 bit elements). Must be split into multiple operations.

define <5 x float> @load_v5f32(ptr addrspace(8) inreg %buf) {
; SDAG-LABEL: load_v5f32:
; SDAG:       ; %bb.0:
; SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; SDAG-NEXT:    buffer_load_dwordx4 v[0:3], off, s[16:19], 0
; SDAG-NEXT:    buffer_load_dword v4, off, s[16:19], 0 offset:16
; SDAG-NEXT:    s_waitcnt vmcnt(0)
; SDAG-NEXT:    s_setpc_b64 s[30:31]
;
; GISEL-LABEL: load_v5f32:
; GISEL:       ; %bb.0:
; GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GISEL-NEXT:    buffer_load_dwordx4 v[0:3], off, s[16:19], 0
; GISEL-NEXT:    buffer_load_dword v4, off, s[16:19], 0 offset:16
; GISEL-NEXT:    s_waitcnt vmcnt(0)
; GISEL-NEXT:    s_setpc_b64 s[30:31]
  %p = addrspacecast ptr addrspace(8) %buf to ptr addrspace(7)
  %ret = load <5 x float>, ptr addrspace(7) %p
  ret <5 x float> %ret
}

define void @store_v5f32(<5 x float> %data, ptr addrspace(8) inreg %buf) {
; SDAG-LABEL: store_v5f32:
; SDAG:       ; %bb.0:
; SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; SDAG-NEXT:    buffer_store_dwordx4 v[0:3], off, s[16:19], 0
; SDAG-NEXT:    buffer_store_dword v4, off, s[16:19], 0 offset:16
; SDAG-NEXT:    s_waitcnt vmcnt(0)
; SDAG-NEXT:    s_setpc_b64 s[30:31]
;
; GISEL-LABEL: store_v5f32:
; GISEL:       ; %bb.0:
; GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GISEL-NEXT:    buffer_store_dwordx4 v[0:3], off, s[16:19], 0
; GISEL-NEXT:    buffer_store_dword v4, off, s[16:19], 0 offset:16
; GISEL-NEXT:    s_waitcnt vmcnt(0)
; GISEL-NEXT:    s_setpc_b64 s[30:31]
  %p = addrspacecast ptr addrspace(8) %buf to ptr addrspace(7)
  store <5 x float> %data, ptr addrspace(7) %p
  ret void
}

define <6 x float> @load_v6f32(ptr addrspace(8) inreg %buf) {
; SDAG-LABEL: load_v6f32:
; SDAG:       ; %bb.0:
; SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; SDAG-NEXT:    buffer_load_dwordx4 v[0:3], off, s[16:19], 0
; SDAG-NEXT:    buffer_load_dwordx2 v[4:5], off, s[16:19], 0 offset:16
; SDAG-NEXT:    s_waitcnt vmcnt(0)
; SDAG-NEXT:    s_setpc_b64 s[30:31]
;
; GISEL-LABEL: load_v6f32:
; GISEL:       ; %bb.0:
; GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GISEL-NEXT:    buffer_load_dwordx4 v[0:3], off, s[16:19], 0
; GISEL-NEXT:    buffer_load_dwordx2 v[4:5], off, s[16:19], 0 offset:16
; GISEL-NEXT:    s_waitcnt vmcnt(0)
; GISEL-NEXT:    s_setpc_b64 s[30:31]
  %p = addrspacecast ptr addrspace(8) %buf to ptr addrspace(7)
  %ret = load <6 x float>, ptr addrspace(7) %p
  ret <6 x float> %ret
}

define void @store_v6f32(<6 x float> %data, ptr addrspace(8) inreg %buf) {
; SDAG-LABEL: store_v6f32:
; SDAG:       ; %bb.0:
; SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; SDAG-NEXT:    buffer_store_dwordx4 v[0:3], off, s[16:19], 0
; SDAG-NEXT:    buffer_store_dwordx2 v[4:5], off, s[16:19], 0 offset:16
; SDAG-NEXT:    s_waitcnt vmcnt(0)
; SDAG-NEXT:    s_setpc_b64 s[30:31]
;
; GISEL-LABEL: store_v6f32:
; GISEL:       ; %bb.0:
; GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GISEL-NEXT:    buffer_store_dwordx4 v[0:3], off, s[16:19], 0
; GISEL-NEXT:    buffer_store_dwordx2 v[4:5], off, s[16:19], 0 offset:16
; GISEL-NEXT:    s_waitcnt vmcnt(0)
; GISEL-NEXT:    s_setpc_b64 s[30:31]
  %p = addrspacecast ptr addrspace(8) %buf to ptr addrspace(7)
  store <6 x float> %data, ptr addrspace(7) %p
  ret void
}

define <7 x float> @load_v7f32(ptr addrspace(8) inreg %buf) {
; SDAG-LABEL: load_v7f32:
; SDAG:       ; %bb.0:
; SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; SDAG-NEXT:    buffer_load_dwordx4 v[0:3], off, s[16:19], 0
; SDAG-NEXT:    buffer_load_dwordx3 v[4:6], off, s[16:19], 0 offset:16
; SDAG-NEXT:    s_waitcnt vmcnt(0)
; SDAG-NEXT:    s_setpc_b64 s[30:31]
;
; GISEL-LABEL: load_v7f32:
; GISEL:       ; %bb.0:
; GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GISEL-NEXT:    buffer_load_dwordx4 v[0:3], off, s[16:19], 0
; GISEL-NEXT:    buffer_load_dwordx3 v[4:6], off, s[16:19], 0 offset:16
; GISEL-NEXT:    s_waitcnt vmcnt(0)
; GISEL-NEXT:    s_setpc_b64 s[30:31]
  %p = addrspacecast ptr addrspace(8) %buf to ptr addrspace(7)
  %ret = load <7 x float>, ptr addrspace(7) %p
  ret <7 x float> %ret
}

define void @store_v7f32(<7 x float> %data, ptr addrspace(8) inreg %buf) {
; SDAG-LABEL: store_v7f32:
; SDAG:       ; %bb.0:
; SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; SDAG-NEXT:    buffer_store_dwordx4 v[0:3], off, s[16:19], 0
; SDAG-NEXT:    buffer_store_dwordx3 v[4:6], off, s[16:19], 0 offset:16
; SDAG-NEXT:    s_waitcnt vmcnt(0)
; SDAG-NEXT:    s_setpc_b64 s[30:31]
;
; GISEL-LABEL: store_v7f32:
; GISEL:       ; %bb.0:
; GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GISEL-NEXT:    buffer_store_dwordx4 v[0:3], off, s[16:19], 0
; GISEL-NEXT:    buffer_store_dwordx3 v[4:6], off, s[16:19], 0 offset:16
; GISEL-NEXT:    s_waitcnt vmcnt(0)
; GISEL-NEXT:    s_setpc_b64 s[30:31]
  %p = addrspacecast ptr addrspace(8) %buf to ptr addrspace(7)
  store <7 x float> %data, ptr addrspace(7) %p
  ret void
}

define <8 x float> @load_v8f32(ptr addrspace(8) inreg %buf) {
; SDAG-LABEL: load_v8f32:
; SDAG:       ; %bb.0:
; SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; SDAG-NEXT:    buffer_load_dwordx4 v[0:3], off, s[16:19], 0
; SDAG-NEXT:    buffer_load_dwordx4 v[4:7], off, s[16:19], 0 offset:16
; SDAG-NEXT:    s_waitcnt vmcnt(0)
; SDAG-NEXT:    s_setpc_b64 s[30:31]
;
; GISEL-LABEL: load_v8f32:
; GISEL:       ; %bb.0:
; GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GISEL-NEXT:    buffer_load_dwordx4 v[0:3], off, s[16:19], 0
; GISEL-NEXT:    buffer_load_dwordx4 v[4:7], off, s[16:19], 0 offset:16
; GISEL-NEXT:    s_waitcnt vmcnt(0)
; GISEL-NEXT:    s_setpc_b64 s[30:31]
  %p = addrspacecast ptr addrspace(8) %buf to ptr addrspace(7)
  %ret = load <8 x float>, ptr addrspace(7) %p
  ret <8 x float> %ret
}

define void @store_v8f32(<8 x float> %data, ptr addrspace(8) inreg %buf) {
; SDAG-LABEL: store_v8f32:
; SDAG:       ; %bb.0:
; SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; SDAG-NEXT:    buffer_store_dwordx4 v[0:3], off, s[16:19], 0
; SDAG-NEXT:    buffer_store_dwordx4 v[4:7], off, s[16:19], 0 offset:16
; SDAG-NEXT:    s_waitcnt vmcnt(0)
; SDAG-NEXT:    s_setpc_b64 s[30:31]
;
; GISEL-LABEL: store_v8f32:
; GISEL:       ; %bb.0:
; GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GISEL-NEXT:    buffer_store_dwordx4 v[0:3], off, s[16:19], 0
; GISEL-NEXT:    buffer_store_dwordx4 v[4:7], off, s[16:19], 0 offset:16
; GISEL-NEXT:    s_waitcnt vmcnt(0)
; GISEL-NEXT:    s_setpc_b64 s[30:31]
  %p = addrspacecast ptr addrspace(8) %buf to ptr addrspace(7)
  store <8 x float> %data, ptr addrspace(7) %p
  ret void
}

define <10 x float> @load_v10f32(ptr addrspace(8) inreg %buf) {
; SDAG-LABEL: load_v10f32:
; SDAG:       ; %bb.0:
; SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; SDAG-NEXT:    buffer_load_dwordx4 v[0:3], off, s[16:19], 0
; SDAG-NEXT:    buffer_load_dwordx4 v[4:7], off, s[16:19], 0 offset:16
; SDAG-NEXT:    buffer_load_dwordx2 v[8:9], off, s[16:19], 0 offset:32
; SDAG-NEXT:    s_waitcnt vmcnt(0)
; SDAG-NEXT:    s_setpc_b64 s[30:31]
;
; GISEL-LABEL: load_v10f32:
; GISEL:       ; %bb.0:
; GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GISEL-NEXT:    buffer_load_dwordx4 v[0:3], off, s[16:19], 0
; GISEL-NEXT:    buffer_load_dwordx4 v[4:7], off, s[16:19], 0 offset:16
; GISEL-NEXT:    buffer_load_dwordx2 v[8:9], off, s[16:19], 0 offset:32
; GISEL-NEXT:    s_waitcnt vmcnt(0)
; GISEL-NEXT:    s_setpc_b64 s[30:31]
  %p = addrspacecast ptr addrspace(8) %buf to ptr addrspace(7)
  %ret = load <10 x float>, ptr addrspace(7) %p
  ret <10 x float> %ret
}

define void @store_v10f32(<10 x float> %data, ptr addrspace(8) inreg %buf) {
; SDAG-LABEL: store_v10f32:
; SDAG:       ; %bb.0:
; SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; SDAG-NEXT:    buffer_store_dwordx4 v[0:3], off, s[16:19], 0
; SDAG-NEXT:    buffer_store_dwordx4 v[4:7], off, s[16:19], 0 offset:16
; SDAG-NEXT:    buffer_store_dwordx2 v[8:9], off, s[16:19], 0 offset:32
; SDAG-NEXT:    s_waitcnt vmcnt(0)
; SDAG-NEXT:    s_setpc_b64 s[30:31]
;
; GISEL-LABEL: store_v10f32:
; GISEL:       ; %bb.0:
; GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GISEL-NEXT:    buffer_store_dwordx4 v[0:3], off, s[16:19], 0
; GISEL-NEXT:    buffer_store_dwordx4 v[4:7], off, s[16:19], 0 offset:16
; GISEL-NEXT:    buffer_store_dwordx2 v[8:9], off, s[16:19], 0 offset:32
; GISEL-NEXT:    s_waitcnt vmcnt(0)
; GISEL-NEXT:    s_setpc_b64 s[30:31]
  %p = addrspacecast ptr addrspace(8) %buf to ptr addrspace(7)
  store <10 x float> %data, ptr addrspace(7) %p
  ret void
}

define <6 x i32> @load_v6i32(ptr addrspace(8) inreg %buf) {
; SDAG-LABEL: load_v6i32:
; SDAG:       ; %bb.0:
; SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; SDAG-NEXT:    buffer_load_dwordx4 v[0:3], off, s[16:19], 0
; SDAG-NEXT:    buffer_load_dwordx2 v[4:5], off, s[16:19], 0 offset:16
; SDAG-NEXT:    s_waitcnt vmcnt(0)
; SDAG-NEXT:    s_setpc_b64 s[30:31]
;
; GISEL-LABEL: load_v6i32:
; GISEL:       ; %bb.0:
; GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GISEL-NEXT:    buffer_load_dwordx4 v[0:3], off, s[16:19], 0
; GISEL-NEXT:    buffer_load_dwordx2 v[4:5], off, s[16:19], 0 offset:16
; GISEL-NEXT:    s_waitcnt vmcnt(0)
; GISEL-NEXT:    s_setpc_b64 s[30:31]
  %p = addrspacecast ptr addrspace(8) %buf to ptr addrspace(7)
  %ret = load <6 x i32>, ptr addrspace(7) %p
  ret <6 x i32> %ret
}

define void @store_v6i32(<6 x i32> %data, ptr addrspace(8) inreg %buf) {
; SDAG-LABEL: store_v6i32:
; SDAG:       ; %bb.0:
; SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; SDAG-NEXT:    buffer_store_dwordx4 v[0:3], off, s[16:19], 0
; SDAG-NEXT:    buffer_store_dwordx2 v[4:5], off, s[16:19], 0 offset:16
; SDAG-NEXT:    s_waitcnt vmcnt(0)
; SDAG-NEXT:    s_setpc_b64 s[30:31]
;
; GISEL-LABEL: store_v6i32:
; GISEL:       ; %bb.0:
; GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GISEL-NEXT:    buffer_store_dwordx4 v[0:3], off, s[16:19], 0
; GISEL-NEXT:    buffer_store_dwordx2 v[4:5], off, s[16:19], 0 offset:16
; GISEL-NEXT:    s_waitcnt vmcnt(0)
; GISEL-NEXT:    s_setpc_b64 s[30:31]
  %p = addrspacecast ptr addrspace(8) %buf to ptr addrspace(7)
  store <6 x i32> %data, ptr addrspace(7) %p
  ret void
}

define <4 x ptr addrspace(1)> @load_v4p1(ptr addrspace(8) inreg %buf) {
; SDAG-LABEL: load_v4p1:
; SDAG:       ; %bb.0:
; SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; SDAG-NEXT:    buffer_load_dwordx4 v[0:3], off, s[16:19], 0
; SDAG-NEXT:    buffer_load_dwordx4 v[4:7], off, s[16:19], 0 offset:16
; SDAG-NEXT:    s_waitcnt vmcnt(0)
; SDAG-NEXT:    s_setpc_b64 s[30:31]
;
; GISEL-LABEL: load_v4p1:
; GISEL:       ; %bb.0:
; GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GISEL-NEXT:    buffer_load_dwordx4 v[0:3], off, s[16:19], 0
; GISEL-NEXT:    buffer_load_dwordx4 v[4:7], off, s[16:19], 0 offset:16
; GISEL-NEXT:    s_waitcnt vmcnt(0)
; GISEL-NEXT:    s_setpc_b64 s[30:31]
  %p = addrspacecast ptr addrspace(8) %buf to ptr addrspace(7)
  %ret = load <4 x ptr addrspace(1)>, ptr addrspace(7) %p
  ret <4 x ptr addrspace(1)> %ret
}

define void @store_v4p1(<4 x ptr addrspace(1)> %data, ptr addrspace(8) inreg %buf) {
; SDAG-LABEL: store_v4p1:
; SDAG:       ; %bb.0:
; SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; SDAG-NEXT:    buffer_store_dwordx4 v[0:3], off, s[16:19], 0
; SDAG-NEXT:    buffer_store_dwordx4 v[4:7], off, s[16:19], 0 offset:16
; SDAG-NEXT:    s_waitcnt vmcnt(0)
; SDAG-NEXT:    s_setpc_b64 s[30:31]
;
; GISEL-LABEL: store_v4p1:
; GISEL:       ; %bb.0:
; GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GISEL-NEXT:    buffer_store_dwordx4 v[0:3], off, s[16:19], 0
; GISEL-NEXT:    buffer_store_dwordx4 v[4:7], off, s[16:19], 0 offset:16
; GISEL-NEXT:    s_waitcnt vmcnt(0)
; GISEL-NEXT:    s_setpc_b64 s[30:31]
  %p = addrspacecast ptr addrspace(8) %buf to ptr addrspace(7)
  store <4 x ptr addrspace(1)> %data, ptr addrspace(7) %p
  ret void
}

;;; Uneven types with 16-bit elements. Require splitting into multiple operations.

define <1 x i16> @load_v1i16(ptr addrspace(8) inreg %buf) {
; SDAG-LABEL: load_v1i16:
; SDAG:       ; %bb.0:
; SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; SDAG-NEXT:    buffer_load_ushort v0, off, s[16:19], 0
; SDAG-NEXT:    s_waitcnt vmcnt(0)
; SDAG-NEXT:    s_setpc_b64 s[30:31]
;
; GISEL-LABEL: load_v1i16:
; GISEL:       ; %bb.0:
; GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GISEL-NEXT:    buffer_load_ushort v0, off, s[16:19], 0
; GISEL-NEXT:    s_waitcnt vmcnt(0)
; GISEL-NEXT:    s_setpc_b64 s[30:31]
  %p = addrspacecast ptr addrspace(8) %buf to ptr addrspace(7)
  %ret = load <1 x i16>, ptr addrspace(7) %p
  ret <1 x i16> %ret
}

define void @store_v1i16(<1 x i16> %data, ptr addrspace(8) inreg %buf) {
; SDAG-LABEL: store_v1i16:
; SDAG:       ; %bb.0:
; SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; SDAG-NEXT:    buffer_store_short v0, off, s[16:19], 0
; SDAG-NEXT:    s_waitcnt vmcnt(0)
; SDAG-NEXT:    s_setpc_b64 s[30:31]
;
; GISEL-LABEL: store_v1i16:
; GISEL:       ; %bb.0:
; GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GISEL-NEXT:    buffer_store_short v0, off, s[16:19], 0
; GISEL-NEXT:    s_waitcnt vmcnt(0)
; GISEL-NEXT:    s_setpc_b64 s[30:31]
  %p = addrspacecast ptr addrspace(8) %buf to ptr addrspace(7)
  store <1 x i16> %data, ptr addrspace(7) %p
  ret void
}

define <3 x i16> @load_v3i16(ptr addrspace(8) inreg %buf) {
; SDAG-LABEL: load_v3i16:
; SDAG:       ; %bb.0:
; SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; SDAG-NEXT:    buffer_load_dword v0, off, s[16:19], 0
; SDAG-NEXT:    buffer_load_ushort v1, off, s[16:19], 0 offset:4
; SDAG-NEXT:    s_waitcnt vmcnt(0)
; SDAG-NEXT:    s_setpc_b64 s[30:31]
;
; GISEL-LABEL: load_v3i16:
; GISEL:       ; %bb.0:
; GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GISEL-NEXT:    buffer_load_dword v0, off, s[16:19], 0
; GISEL-NEXT:    buffer_load_ushort v1, off, s[16:19], 0 offset:4
; GISEL-NEXT:    s_waitcnt vmcnt(0)
; GISEL-NEXT:    s_setpc_b64 s[30:31]
  %p = addrspacecast ptr addrspace(8) %buf to ptr addrspace(7)
  %ret = load <3 x i16>, ptr addrspace(7) %p
  ret <3 x i16> %ret
}

define void @store_v3i16(<3 x i16> %data, ptr addrspace(8) inreg %buf) {
; SDAG-LABEL: store_v3i16:
; SDAG:       ; %bb.0:
; SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; SDAG-NEXT:    buffer_store_dword v0, off, s[16:19], 0
; SDAG-NEXT:    buffer_store_short v1, off, s[16:19], 0 offset:4
; SDAG-NEXT:    s_waitcnt vmcnt(0)
; SDAG-NEXT:    s_setpc_b64 s[30:31]
;
; GISEL-LABEL: store_v3i16:
; GISEL:       ; %bb.0:
; GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GISEL-NEXT:    buffer_store_dword v0, off, s[16:19], 0
; GISEL-NEXT:    buffer_store_short v1, off, s[16:19], 0 offset:4
; GISEL-NEXT:    s_waitcnt vmcnt(0)
; GISEL-NEXT:    s_setpc_b64 s[30:31]
  %p = addrspacecast ptr addrspace(8) %buf to ptr addrspace(7)
  store <3 x i16> %data, ptr addrspace(7) %p
  ret void
}

define <5 x i16> @load_v5i16(ptr addrspace(8) inreg %buf) {
; SDAG-LABEL: load_v5i16:
; SDAG:       ; %bb.0:
; SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; SDAG-NEXT:    buffer_load_dwordx2 v[0:1], off, s[16:19], 0
; SDAG-NEXT:    buffer_load_ushort v2, off, s[16:19], 0 offset:8
; SDAG-NEXT:    s_waitcnt vmcnt(0)
; SDAG-NEXT:    s_setpc_b64 s[30:31]
;
; GISEL-LABEL: load_v5i16:
; GISEL:       ; %bb.0:
; GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GISEL-NEXT:    buffer_load_dwordx2 v[0:1], off, s[16:19], 0
; GISEL-NEXT:    buffer_load_ushort v2, off, s[16:19], 0 offset:8
; GISEL-NEXT:    s_waitcnt vmcnt(0)
; GISEL-NEXT:    s_setpc_b64 s[30:31]
  %p = addrspacecast ptr addrspace(8) %buf to ptr addrspace(7)
  %ret = load <5 x i16>, ptr addrspace(7) %p
  ret <5 x i16> %ret
}

define void @store_v5i16(<5 x i16> %data, ptr addrspace(8) inreg %buf) {
; SDAG-LABEL: store_v5i16:
; SDAG:       ; %bb.0:
; SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; SDAG-NEXT:    buffer_store_dwordx2 v[0:1], off, s[16:19], 0
; SDAG-NEXT:    buffer_store_short v2, off, s[16:19], 0 offset:8
; SDAG-NEXT:    s_waitcnt vmcnt(0)
; SDAG-NEXT:    s_setpc_b64 s[30:31]
;
; GISEL-LABEL: store_v5i16:
; GISEL:       ; %bb.0:
; GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GISEL-NEXT:    buffer_store_dwordx2 v[0:1], off, s[16:19], 0
; GISEL-NEXT:    buffer_store_short v2, off, s[16:19], 0 offset:8
; GISEL-NEXT:    s_waitcnt vmcnt(0)
; GISEL-NEXT:    s_setpc_b64 s[30:31]
  %p = addrspacecast ptr addrspace(8) %buf to ptr addrspace(7)
  store <5 x i16> %data, ptr addrspace(7) %p
  ret void
}

define <6 x i16> @load_v6i16(ptr addrspace(8) inreg %buf) {
; SDAG-LABEL: load_v6i16:
; SDAG:       ; %bb.0:
; SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; SDAG-NEXT:    buffer_load_dwordx3 v[0:2], off, s[16:19], 0
; SDAG-NEXT:    s_waitcnt vmcnt(0)
; SDAG-NEXT:    s_setpc_b64 s[30:31]
;
; GISEL-LABEL: load_v6i16:
; GISEL:       ; %bb.0:
; GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GISEL-NEXT:    buffer_load_dwordx3 v[0:2], off, s[16:19], 0
; GISEL-NEXT:    s_waitcnt vmcnt(0)
; GISEL-NEXT:    s_setpc_b64 s[30:31]
  %p = addrspacecast ptr addrspace(8) %buf to ptr addrspace(7)
  %ret = load <6 x i16>, ptr addrspace(7) %p
  ret <6 x i16> %ret
}

define void @store_v6i16(<6 x i16> %data, ptr addrspace(8) inreg %buf) {
; SDAG-LABEL: store_v6i16:
; SDAG:       ; %bb.0:
; SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; SDAG-NEXT:    buffer_store_dwordx3 v[0:2], off, s[16:19], 0
; SDAG-NEXT:    s_waitcnt vmcnt(0)
; SDAG-NEXT:    s_setpc_b64 s[30:31]
;
; GISEL-LABEL: store_v6i16:
; GISEL:       ; %bb.0:
; GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GISEL-NEXT:    buffer_store_dwordx3 v[0:2], off, s[16:19], 0
; GISEL-NEXT:    s_waitcnt vmcnt(0)
; GISEL-NEXT:    s_setpc_b64 s[30:31]
  %p = addrspacecast ptr addrspace(8) %buf to ptr addrspace(7)
  store <6 x i16> %data, ptr addrspace(7) %p
  ret void
}

define <7 x i16> @load_v7i16(ptr addrspace(8) inreg %buf) {
; SDAG-LABEL: load_v7i16:
; SDAG:       ; %bb.0:
; SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; SDAG-NEXT:    buffer_load_dwordx3 v[0:2], off, s[16:19], 0
; SDAG-NEXT:    buffer_load_ushort v3, off, s[16:19], 0 offset:12
; SDAG-NEXT:    s_waitcnt vmcnt(0)
; SDAG-NEXT:    s_setpc_b64 s[30:31]
;
; GISEL-LABEL: load_v7i16:
; GISEL:       ; %bb.0:
; GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GISEL-NEXT:    buffer_load_dwordx3 v[0:2], off, s[16:19], 0
; GISEL-NEXT:    buffer_load_ushort v3, off, s[16:19], 0 offset:12
; GISEL-NEXT:    s_waitcnt vmcnt(0)
; GISEL-NEXT:    s_setpc_b64 s[30:31]
  %p = addrspacecast ptr addrspace(8) %buf to ptr addrspace(7)
  %ret = load <7 x i16>, ptr addrspace(7) %p
  ret <7 x i16> %ret
}

define void @store_v7i16(<7 x i16> %data, ptr addrspace(8) inreg %buf) {
; SDAG-LABEL: store_v7i16:
; SDAG:       ; %bb.0:
; SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; SDAG-NEXT:    buffer_store_dwordx3 v[0:2], off, s[16:19], 0
; SDAG-NEXT:    buffer_store_short v3, off, s[16:19], 0 offset:12
; SDAG-NEXT:    s_waitcnt vmcnt(0)
; SDAG-NEXT:    s_setpc_b64 s[30:31]
;
; GISEL-LABEL: store_v7i16:
; GISEL:       ; %bb.0:
; GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GISEL-NEXT:    buffer_store_dwordx3 v[0:2], off, s[16:19], 0
; GISEL-NEXT:    buffer_store_short v3, off, s[16:19], 0 offset:12
; GISEL-NEXT:    s_waitcnt vmcnt(0)
; GISEL-NEXT:    s_setpc_b64 s[30:31]
  %p = addrspacecast ptr addrspace(8) %buf to ptr addrspace(7)
  store <7 x i16> %data, ptr addrspace(7) %p
  ret void
}

define <9 x i16> @load_v9i16(ptr addrspace(8) inreg %buf) {
; SDAG-LABEL: load_v9i16:
; SDAG:       ; %bb.0:
; SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; SDAG-NEXT:    buffer_load_dwordx4 v[0:3], off, s[16:19], 0
; SDAG-NEXT:    buffer_load_ushort v4, off, s[16:19], 0 offset:16
; SDAG-NEXT:    s_waitcnt vmcnt(0)
; SDAG-NEXT:    s_setpc_b64 s[30:31]
;
; GISEL-LABEL: load_v9i16:
; GISEL:       ; %bb.0:
; GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GISEL-NEXT:    buffer_load_dwordx4 v[0:3], off, s[16:19], 0
; GISEL-NEXT:    buffer_load_ushort v4, off, s[16:19], 0 offset:16
; GISEL-NEXT:    s_waitcnt vmcnt(0)
; GISEL-NEXT:    s_setpc_b64 s[30:31]
  %p = addrspacecast ptr addrspace(8) %buf to ptr addrspace(7)
  %ret = load <9 x i16>, ptr addrspace(7) %p
  ret <9 x i16> %ret
}

define void @store_v9i16(<9 x i16> %data, ptr addrspace(8) inreg %buf) {
; SDAG-LABEL: store_v9i16:
; SDAG:       ; %bb.0:
; SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; SDAG-NEXT:    buffer_store_dwordx4 v[0:3], off, s[16:19], 0
; SDAG-NEXT:    buffer_store_short v4, off, s[16:19], 0 offset:16
; SDAG-NEXT:    s_waitcnt vmcnt(0)
; SDAG-NEXT:    s_setpc_b64 s[30:31]
;
; GISEL-LABEL: store_v9i16:
; GISEL:       ; %bb.0:
; GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GISEL-NEXT:    buffer_store_dwordx4 v[0:3], off, s[16:19], 0
; GISEL-NEXT:    buffer_store_short v4, off, s[16:19], 0 offset:16
; GISEL-NEXT:    s_waitcnt vmcnt(0)
; GISEL-NEXT:    s_setpc_b64 s[30:31]
  %p = addrspacecast ptr addrspace(8) %buf to ptr addrspace(7)
  store <9 x i16> %data, ptr addrspace(7) %p
  ret void
}

;;; Byte vectors. Need to be
;;; - Split into multiple operations
;;; - Bitcast if they have a natively supported width

define <1 x i8> @load_v1i8(ptr addrspace(8) inreg %buf) {
; SDAG-LABEL: load_v1i8:
; SDAG:       ; %bb.0:
; SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; SDAG-NEXT:    buffer_load_ubyte v0, off, s[16:19], 0
; SDAG-NEXT:    s_waitcnt vmcnt(0)
; SDAG-NEXT:    s_setpc_b64 s[30:31]
;
; GISEL-LABEL: load_v1i8:
; GISEL:       ; %bb.0:
; GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GISEL-NEXT:    buffer_load_ubyte v0, off, s[16:19], 0
; GISEL-NEXT:    s_waitcnt vmcnt(0)
; GISEL-NEXT:    s_setpc_b64 s[30:31]
  %p = addrspacecast ptr addrspace(8) %buf to ptr addrspace(7)
  %ret = load <1 x i8>, ptr addrspace(7) %p
  ret <1 x i8> %ret
}

define void @store_v1i8(<1 x i8> %data, ptr addrspace(8) inreg %buf) {
; SDAG-LABEL: store_v1i8:
; SDAG:       ; %bb.0:
; SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; SDAG-NEXT:    buffer_store_byte v0, off, s[16:19], 0
; SDAG-NEXT:    s_waitcnt vmcnt(0)
; SDAG-NEXT:    s_setpc_b64 s[30:31]
;
; GISEL-LABEL: store_v1i8:
; GISEL:       ; %bb.0:
; GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GISEL-NEXT:    buffer_store_byte v0, off, s[16:19], 0
; GISEL-NEXT:    s_waitcnt vmcnt(0)
; GISEL-NEXT:    s_setpc_b64 s[30:31]
  %p = addrspacecast ptr addrspace(8) %buf to ptr addrspace(7)
  store <1 x i8> %data, ptr addrspace(7) %p
  ret void
}

define <2 x i8> @load_v2i8(ptr addrspace(8) inreg %buf) {
; SDAG-LABEL: load_v2i8:
; SDAG:       ; %bb.0:
; SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; SDAG-NEXT:    buffer_load_ushort v0, off, s[16:19], 0
; SDAG-NEXT:    s_waitcnt vmcnt(0)
; SDAG-NEXT:    v_lshrrev_b32_e32 v1, 8, v0
; SDAG-NEXT:    s_setpc_b64 s[30:31]
;
; GISEL-LABEL: load_v2i8:
; GISEL:       ; %bb.0:
; GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GISEL-NEXT:    buffer_load_ushort v0, off, s[16:19], 0
; GISEL-NEXT:    s_waitcnt vmcnt(0)
; GISEL-NEXT:    v_lshrrev_b32_e32 v1, 8, v0
; GISEL-NEXT:    s_setpc_b64 s[30:31]
  %p = addrspacecast ptr addrspace(8) %buf to ptr addrspace(7)
  %ret = load <2 x i8>, ptr addrspace(7) %p
  ret <2 x i8> %ret
}

define void @store_v2i8(<2 x i8> %data, ptr addrspace(8) inreg %buf) {
; SDAG-LABEL: store_v2i8:
; SDAG:       ; %bb.0:
; SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; SDAG-NEXT:    v_lshlrev_b16_e32 v1, 8, v1
; SDAG-NEXT:    v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
; SDAG-NEXT:    buffer_store_short v0, off, s[16:19], 0
; SDAG-NEXT:    s_waitcnt vmcnt(0)
; SDAG-NEXT:    s_setpc_b64 s[30:31]
;
; GISEL-LABEL: store_v2i8:
; GISEL:       ; %bb.0:
; GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GISEL-NEXT:    v_and_b32_e32 v1, 0xff, v1
; GISEL-NEXT:    v_lshlrev_b16_e32 v1, 8, v1
; GISEL-NEXT:    v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
; GISEL-NEXT:    buffer_store_short v0, off, s[16:19], 0
; GISEL-NEXT:    s_waitcnt vmcnt(0)
; GISEL-NEXT:    s_setpc_b64 s[30:31]
  %p = addrspacecast ptr addrspace(8) %buf to ptr addrspace(7)
  store <2 x i8> %data, ptr addrspace(7) %p
  ret void
}

define <3 x i8> @load_v3i8(ptr addrspace(8) inreg %buf) {
; SDAG-LABEL: load_v3i8:
; SDAG:       ; %bb.0:
; SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; SDAG-NEXT:    buffer_load_ushort v0, off, s[16:19], 0
; SDAG-NEXT:    buffer_load_ubyte v2, off, s[16:19], 0 offset:2
; SDAG-NEXT:    s_waitcnt vmcnt(1)
; SDAG-NEXT:    v_lshrrev_b32_e32 v1, 8, v0
; SDAG-NEXT:    s_waitcnt vmcnt(0)
; SDAG-NEXT:    s_setpc_b64 s[30:31]
;
; GISEL-LABEL: load_v3i8:
; GISEL:       ; %bb.0:
; GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GISEL-NEXT:    buffer_load_ushort v0, off, s[16:19], 0
; GISEL-NEXT:    buffer_load_ubyte v2, off, s[16:19], 0 offset:2
; GISEL-NEXT:    s_waitcnt vmcnt(1)
; GISEL-NEXT:    v_lshrrev_b32_e32 v1, 8, v0
; GISEL-NEXT:    s_waitcnt vmcnt(0)
; GISEL-NEXT:    s_setpc_b64 s[30:31]
  %p = addrspacecast ptr addrspace(8) %buf to ptr addrspace(7)
  %ret = load <3 x i8>, ptr addrspace(7) %p
  ret <3 x i8> %ret
}

define void @store_v3i8(<3 x i8> %data, ptr addrspace(8) inreg %buf) {
; SDAG-LABEL: store_v3i8:
; SDAG:       ; %bb.0:
; SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; SDAG-NEXT:    v_lshlrev_b16_e32 v1, 8, v1
; SDAG-NEXT:    v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
; SDAG-NEXT:    buffer_store_short v0, off, s[16:19], 0
; SDAG-NEXT:    buffer_store_byte v2, off, s[16:19], 0 offset:2
; SDAG-NEXT:    s_waitcnt vmcnt(0)
; SDAG-NEXT:    s_setpc_b64 s[30:31]
;
; GISEL-LABEL: store_v3i8:
; GISEL:       ; %bb.0:
; GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GISEL-NEXT:    v_and_b32_e32 v1, 0xff, v1
; GISEL-NEXT:    v_lshlrev_b16_e32 v1, 8, v1
; GISEL-NEXT:    v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
; GISEL-NEXT:    buffer_store_short v0, off, s[16:19], 0
; GISEL-NEXT:    buffer_store_byte v2, off, s[16:19], 0 offset:2
; GISEL-NEXT:    s_waitcnt vmcnt(0)
; GISEL-NEXT:    s_setpc_b64 s[30:31]
  %p = addrspacecast ptr addrspace(8) %buf to ptr addrspace(7)
  store <3 x i8> %data, ptr addrspace(7) %p
  ret void
}

define <4 x i8> @load_v4i8(ptr addrspace(8) inreg %buf) {
; SDAG-LABEL: load_v4i8:
; SDAG:       ; %bb.0:
; SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; SDAG-NEXT:    buffer_load_dword v0, off, s[16:19], 0
; SDAG-NEXT:    s_waitcnt vmcnt(0)
; SDAG-NEXT:    v_lshrrev_b32_e32 v2, 16, v0
; SDAG-NEXT:    v_lshrrev_b32_e32 v1, 8, v0
; SDAG-NEXT:    v_lshrrev_b32_e32 v3, 24, v0
; SDAG-NEXT:    s_setpc_b64 s[30:31]
;
; GISEL-LABEL: load_v4i8:
; GISEL:       ; %bb.0:
; GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GISEL-NEXT:    buffer_load_dword v0, off, s[16:19], 0
; GISEL-NEXT:    s_waitcnt vmcnt(0)
; GISEL-NEXT:    v_lshrrev_b32_e32 v1, 8, v0
; GISEL-NEXT:    v_lshrrev_b32_e32 v2, 16, v0
; GISEL-NEXT:    v_lshrrev_b32_e32 v3, 24, v0
; GISEL-NEXT:    s_setpc_b64 s[30:31]
  %p = addrspacecast ptr addrspace(8) %buf to ptr addrspace(7)
  %ret = load <4 x i8>, ptr addrspace(7) %p
  ret <4 x i8> %ret
}

define void @store_v4i8(<4 x i8> %data, ptr addrspace(8) inreg %buf) {
; SDAG-LABEL: store_v4i8:
; SDAG:       ; %bb.0:
; SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; SDAG-NEXT:    v_lshlrev_b16_e32 v1, 8, v1
; SDAG-NEXT:    v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
; SDAG-NEXT:    v_lshlrev_b16_e32 v1, 8, v3
; SDAG-NEXT:    v_or_b32_sdwa v1, v2, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
; SDAG-NEXT:    v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
; SDAG-NEXT:    buffer_store_dword v0, off, s[16:19], 0
; SDAG-NEXT:    s_waitcnt vmcnt(0)
; SDAG-NEXT:    s_setpc_b64 s[30:31]
;
; GISEL-LABEL: store_v4i8:
; GISEL:       ; %bb.0:
; GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GISEL-NEXT:    v_mov_b32_e32 v5, 8
; GISEL-NEXT:    v_mov_b32_e32 v4, 0xff
; GISEL-NEXT:    v_lshlrev_b32_sdwa v1, v5, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0
; GISEL-NEXT:    v_and_or_b32 v0, v0, v4, v1
; GISEL-NEXT:    v_and_b32_e32 v1, 0xff, v2
; GISEL-NEXT:    v_and_b32_e32 v2, 0xff, v3
; GISEL-NEXT:    v_lshlrev_b32_e32 v1, 16, v1
; GISEL-NEXT:    v_lshlrev_b32_e32 v2, 24, v2
; GISEL-NEXT:    v_or3_b32 v0, v0, v1, v2
; GISEL-NEXT:    buffer_store_dword v0, off, s[16:19], 0
; GISEL-NEXT:    s_waitcnt vmcnt(0)
; GISEL-NEXT:    s_setpc_b64 s[30:31]
  %p = addrspacecast ptr addrspace(8) %buf to ptr addrspace(7)
  store <4 x i8> %data, ptr addrspace(7) %p
  ret void
}

define <5 x i8> @load_v5i8(ptr addrspace(8) inreg %buf) {
; SDAG-LABEL: load_v5i8:
; SDAG:       ; %bb.0:
; SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; SDAG-NEXT:    buffer_load_dword v0, off, s[16:19], 0
; SDAG-NEXT:    buffer_load_ubyte v4, off, s[16:19], 0 offset:4
; SDAG-NEXT:    s_waitcnt vmcnt(1)
; SDAG-NEXT:    v_lshrrev_b32_e32 v1, 8, v0
; SDAG-NEXT:    v_lshrrev_b32_e32 v2, 16, v0
; SDAG-NEXT:    v_lshrrev_b32_e32 v3, 24, v0
; SDAG-NEXT:    s_waitcnt vmcnt(0)
; SDAG-NEXT:    s_setpc_b64 s[30:31]
;
; GISEL-LABEL: load_v5i8:
; GISEL:       ; %bb.0:
; GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GISEL-NEXT:    buffer_load_dword v0, off, s[16:19], 0
; GISEL-NEXT:    buffer_load_ubyte v4, off, s[16:19], 0 offset:4
; GISEL-NEXT:    s_waitcnt vmcnt(1)
; GISEL-NEXT:    v_lshrrev_b32_e32 v1, 8, v0
; GISEL-NEXT:    v_lshrrev_b32_e32 v2, 16, v0
; GISEL-NEXT:    v_lshrrev_b32_e32 v3, 24, v0
; GISEL-NEXT:    s_waitcnt vmcnt(0)
; GISEL-NEXT:    s_setpc_b64 s[30:31]
  %p = addrspacecast ptr addrspace(8) %buf to ptr addrspace(7)
  %ret = load <5 x i8>, ptr addrspace(7) %p
  ret <5 x i8> %ret
}

define void @store_v5i8(<5 x i8> %data, ptr addrspace(8) inreg %buf) {
; SDAG-LABEL: store_v5i8:
; SDAG:       ; %bb.0:
; SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; SDAG-NEXT:    v_lshlrev_b16_e32 v1, 8, v1
; SDAG-NEXT:    v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
; SDAG-NEXT:    v_lshlrev_b16_e32 v1, 8, v3
; SDAG-NEXT:    v_or_b32_sdwa v1, v2, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
; SDAG-NEXT:    v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
; SDAG-NEXT:    buffer_store_dword v0, off, s[16:19], 0
; SDAG-NEXT:    buffer_store_byte v4, off, s[16:19], 0 offset:4
; SDAG-NEXT:    s_waitcnt vmcnt(0)
; SDAG-NEXT:    s_setpc_b64 s[30:31]
;
; GISEL-LABEL: store_v5i8:
; GISEL:       ; %bb.0:
; GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GISEL-NEXT:    v_mov_b32_e32 v6, 8
; GISEL-NEXT:    v_mov_b32_e32 v5, 0xff
; GISEL-NEXT:    v_lshlrev_b32_sdwa v1, v6, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0
; GISEL-NEXT:    v_and_or_b32 v0, v0, v5, v1
; GISEL-NEXT:    v_and_b32_e32 v1, 0xff, v2
; GISEL-NEXT:    v_and_b32_e32 v2, 0xff, v3
; GISEL-NEXT:    v_lshlrev_b32_e32 v1, 16, v1
; GISEL-NEXT:    v_lshlrev_b32_e32 v2, 24, v2
; GISEL-NEXT:    v_or3_b32 v0, v0, v1, v2
; GISEL-NEXT:    buffer_store_dword v0, off, s[16:19], 0
; GISEL-NEXT:    buffer_store_byte v4, off, s[16:19], 0 offset:4
; GISEL-NEXT:    s_waitcnt vmcnt(0)
; GISEL-NEXT:    s_setpc_b64 s[30:31]
  %p = addrspacecast ptr addrspace(8) %buf to ptr addrspace(7)
  store <5 x i8> %data, ptr addrspace(7) %p
  ret void
}

define <6 x i8> @load_v6i8(ptr addrspace(8) inreg %buf) {
; SDAG-LABEL: load_v6i8:
; SDAG:       ; %bb.0:
; SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; SDAG-NEXT:    buffer_load_ushort v6, off, s[16:19], 0 offset:4
; SDAG-NEXT:    buffer_load_dword v0, off, s[16:19], 0
; SDAG-NEXT:    s_waitcnt vmcnt(1)
; SDAG-NEXT:    v_and_b32_e32 v1, 0xffff, v6
; SDAG-NEXT:    s_waitcnt vmcnt(0)
; SDAG-NEXT:    v_lshrrev_b32_e32 v7, 8, v0
; SDAG-NEXT:    v_lshrrev_b64 v[3:4], 24, v[0:1]
; SDAG-NEXT:    v_lshrrev_b32_e32 v2, 16, v0
; SDAG-NEXT:    v_lshrrev_b32_e32 v5, 8, v1
; SDAG-NEXT:    v_mov_b32_e32 v4, v6
; SDAG-NEXT:    v_mov_b32_e32 v1, v7
; SDAG-NEXT:    s_setpc_b64 s[30:31]
;
; GISEL-LABEL: load_v6i8:
; GISEL:       ; %bb.0:
; GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GISEL-NEXT:    buffer_load_dword v0, off, s[16:19], 0
; GISEL-NEXT:    buffer_load_ushort v4, off, s[16:19], 0 offset:4
; GISEL-NEXT:    s_waitcnt vmcnt(1)
; GISEL-NEXT:    v_lshrrev_b32_e32 v1, 8, v0
; GISEL-NEXT:    v_lshrrev_b32_e32 v2, 16, v0
; GISEL-NEXT:    v_lshrrev_b32_e32 v3, 24, v0
; GISEL-NEXT:    s_waitcnt vmcnt(0)
; GISEL-NEXT:    v_lshrrev_b32_e32 v5, 8, v4
; GISEL-NEXT:    s_setpc_b64 s[30:31]
  %p = addrspacecast ptr addrspace(8) %buf to ptr addrspace(7)
  %ret = load <6 x i8>, ptr addrspace(7) %p
  ret <6 x i8> %ret
}

define void @store_v6i8(<6 x i8> %data, ptr addrspace(8) inreg %buf) {
; SDAG-LABEL: store_v6i8:
; SDAG:       ; %bb.0:
; SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; SDAG-NEXT:    v_lshlrev_b16_e32 v1, 8, v1
; SDAG-NEXT:    v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
; SDAG-NEXT:    v_lshlrev_b16_e32 v1, 8, v3
; SDAG-NEXT:    v_or_b32_sdwa v1, v2, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
; SDAG-NEXT:    v_lshlrev_b16_e32 v5, 8, v5
; SDAG-NEXT:    v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
; SDAG-NEXT:    v_or_b32_sdwa v4, v4, v5 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
; SDAG-NEXT:    buffer_store_dword v0, off, s[16:19], 0
; SDAG-NEXT:    buffer_store_short v4, off, s[16:19], 0 offset:4
; SDAG-NEXT:    s_waitcnt vmcnt(0)
; SDAG-NEXT:    s_setpc_b64 s[30:31]
;
; GISEL-LABEL: store_v6i8:
; GISEL:       ; %bb.0:
; GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GISEL-NEXT:    v_and_b32_e32 v1, 0xff, v1
; GISEL-NEXT:    v_lshlrev_b16_e32 v1, 8, v1
; GISEL-NEXT:    v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
; GISEL-NEXT:    v_and_b32_e32 v1, 0xff, v3
; GISEL-NEXT:    v_lshlrev_b16_e32 v1, 8, v1
; GISEL-NEXT:    v_or_b32_sdwa v1, v2, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
; GISEL-NEXT:    v_and_b32_e32 v2, 0xff, v5
; GISEL-NEXT:    v_and_b32_e32 v0, 0xffff, v0
; GISEL-NEXT:    v_lshlrev_b16_e32 v2, 8, v2
; GISEL-NEXT:    v_lshl_or_b32 v0, v1, 16, v0
; GISEL-NEXT:    v_or_b32_sdwa v2, v4, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
; GISEL-NEXT:    buffer_store_dword v0, off, s[16:19], 0
; GISEL-NEXT:    buffer_store_short v2, off, s[16:19], 0 offset:4
; GISEL-NEXT:    s_waitcnt vmcnt(0)
; GISEL-NEXT:    s_setpc_b64 s[30:31]
  %p = addrspacecast ptr addrspace(8) %buf to ptr addrspace(7)
  store <6 x i8> %data, ptr addrspace(7) %p
  ret void
}

define <7 x i8> @load_v7i8(ptr addrspace(8) inreg %buf) {
; SDAG-LABEL: load_v7i8:
; SDAG:       ; %bb.0:
; SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; SDAG-NEXT:    buffer_load_dword v0, off, s[16:19], 0
; SDAG-NEXT:    buffer_load_ushort v4, off, s[16:19], 0 offset:4
; SDAG-NEXT:    buffer_load_ubyte v6, off, s[16:19], 0 offset:6
; SDAG-NEXT:    s_waitcnt vmcnt(2)
; SDAG-NEXT:    v_lshrrev_b32_e32 v1, 8, v0
; SDAG-NEXT:    v_lshrrev_b32_e32 v2, 16, v0
; SDAG-NEXT:    v_lshrrev_b32_e32 v3, 24, v0
; SDAG-NEXT:    s_waitcnt vmcnt(1)
; SDAG-NEXT:    v_lshrrev_b32_e32 v5, 8, v4
; SDAG-NEXT:    s_waitcnt vmcnt(0)
; SDAG-NEXT:    s_setpc_b64 s[30:31]
;
; GISEL-LABEL: load_v7i8:
; GISEL:       ; %bb.0:
; GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GISEL-NEXT:    buffer_load_dword v0, off, s[16:19], 0
; GISEL-NEXT:    buffer_load_ushort v4, off, s[16:19], 0 offset:4
; GISEL-NEXT:    buffer_load_ubyte v6, off, s[16:19], 0 offset:6
; GISEL-NEXT:    s_waitcnt vmcnt(2)
; GISEL-NEXT:    v_lshrrev_b32_e32 v1, 8, v0
; GISEL-NEXT:    v_lshrrev_b32_e32 v2, 16, v0
; GISEL-NEXT:    v_lshrrev_b32_e32 v3, 24, v0
; GISEL-NEXT:    s_waitcnt vmcnt(1)
; GISEL-NEXT:    v_lshrrev_b32_e32 v5, 8, v4
; GISEL-NEXT:    s_waitcnt vmcnt(0)
; GISEL-NEXT:    s_setpc_b64 s[30:31]
  %p = addrspacecast ptr addrspace(8) %buf to ptr addrspace(7)
  %ret = load <7 x i8>, ptr addrspace(7) %p
  ret <7 x i8> %ret
}

define void @store_v7i8(<7 x i8> %data, ptr addrspace(8) inreg %buf) {
; SDAG-LABEL: store_v7i8:
; SDAG:       ; %bb.0:
; SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; SDAG-NEXT:    v_lshlrev_b16_e32 v1, 8, v1
; SDAG-NEXT:    v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
; SDAG-NEXT:    v_lshlrev_b16_e32 v1, 8, v3
; SDAG-NEXT:    v_or_b32_sdwa v1, v2, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
; SDAG-NEXT:    v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
; SDAG-NEXT:    buffer_store_dword v0, off, s[16:19], 0
; SDAG-NEXT:    v_lshlrev_b16_e32 v0, 8, v5
; SDAG-NEXT:    v_or_b32_sdwa v0, v4, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
; SDAG-NEXT:    buffer_store_short v0, off, s[16:19], 0 offset:4
; SDAG-NEXT:    buffer_store_byte v6, off, s[16:19], 0 offset:6
; SDAG-NEXT:    s_waitcnt vmcnt(0)
; SDAG-NEXT:    s_setpc_b64 s[30:31]
;
; GISEL-LABEL: store_v7i8:
; GISEL:       ; %bb.0:
; GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GISEL-NEXT:    v_mov_b32_e32 v8, 8
; GISEL-NEXT:    v_mov_b32_e32 v7, 0xff
; GISEL-NEXT:    v_lshlrev_b32_sdwa v1, v8, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0
; GISEL-NEXT:    v_and_or_b32 v0, v0, v7, v1
; GISEL-NEXT:    v_and_b32_e32 v1, 0xff, v2
; GISEL-NEXT:    v_and_b32_e32 v2, 0xff, v3
; GISEL-NEXT:    v_lshlrev_b32_e32 v1, 16, v1
; GISEL-NEXT:    v_lshlrev_b32_e32 v2, 24, v2
; GISEL-NEXT:    v_or3_b32 v0, v0, v1, v2
; GISEL-NEXT:    buffer_store_dword v0, off, s[16:19], 0
; GISEL-NEXT:    v_and_b32_e32 v0, 0xff, v5
; GISEL-NEXT:    v_lshlrev_b16_e32 v0, 8, v0
; GISEL-NEXT:    v_or_b32_sdwa v0, v4, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
; GISEL-NEXT:    buffer_store_short v0, off, s[16:19], 0 offset:4
; GISEL-NEXT:    buffer_store_byte v6, off, s[16:19], 0 offset:6
; GISEL-NEXT:    s_waitcnt vmcnt(0)
; GISEL-NEXT:    s_setpc_b64 s[30:31]
  %p = addrspacecast ptr addrspace(8) %buf to ptr addrspace(7)
  store <7 x i8> %data, ptr addrspace(7) %p
  ret void
}

define <8 x i8> @load_v8i8(ptr addrspace(8) inreg %buf) {
; SDAG-LABEL: load_v8i8:
; SDAG:       ; %bb.0:
; SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; SDAG-NEXT:    buffer_load_dwordx2 v[0:1], off, s[16:19], 0
; SDAG-NEXT:    s_waitcnt vmcnt(0)
; SDAG-NEXT:    v_lshrrev_b64 v[3:4], 24, v[0:1]
; SDAG-NEXT:    v_lshrrev_b32_e32 v8, 8, v0
; SDAG-NEXT:    v_lshrrev_b32_e32 v2, 16, v0
; SDAG-NEXT:    v_lshrrev_b32_e32 v5, 8, v1
; SDAG-NEXT:    v_lshrrev_b32_e32 v6, 16, v1
; SDAG-NEXT:    v_lshrrev_b32_e32 v7, 24, v1
; SDAG-NEXT:    v_mov_b32_e32 v4, v1
; SDAG-NEXT:    v_mov_b32_e32 v1, v8
; SDAG-NEXT:    s_setpc_b64 s[30:31]
;
; GISEL-LABEL: load_v8i8:
; GISEL:       ; %bb.0:
; GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GISEL-NEXT:    buffer_load_dwordx2 v[0:1], off, s[16:19], 0
; GISEL-NEXT:    s_waitcnt vmcnt(0)
; GISEL-NEXT:    v_lshrrev_b32_e32 v8, 8, v0
; GISEL-NEXT:    v_lshrrev_b32_e32 v2, 16, v0
; GISEL-NEXT:    v_lshrrev_b32_e32 v3, 24, v0
; GISEL-NEXT:    v_lshrrev_b32_e32 v5, 8, v1
; GISEL-NEXT:    v_lshrrev_b32_e32 v6, 16, v1
; GISEL-NEXT:    v_lshrrev_b32_e32 v7, 24, v1
; GISEL-NEXT:    v_mov_b32_e32 v4, v1
; GISEL-NEXT:    v_mov_b32_e32 v1, v8
; GISEL-NEXT:    s_setpc_b64 s[30:31]
  %p = addrspacecast ptr addrspace(8) %buf to ptr addrspace(7)
  %ret = load <8 x i8>, ptr addrspace(7) %p
  ret <8 x i8> %ret
}

define void @store_v8i8(<8 x i8> %data, ptr addrspace(8) inreg %buf) {
; SDAG-LABEL: store_v8i8:
; SDAG:       ; %bb.0:
; SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; SDAG-NEXT:    v_lshlrev_b16_e32 v5, 8, v5
; SDAG-NEXT:    v_lshlrev_b16_e32 v1, 8, v1
; SDAG-NEXT:    v_or_b32_sdwa v4, v4, v5 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
; SDAG-NEXT:    v_lshlrev_b16_e32 v5, 8, v7
; SDAG-NEXT:    v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
; SDAG-NEXT:    v_lshlrev_b16_e32 v1, 8, v3
; SDAG-NEXT:    v_or_b32_sdwa v5, v6, v5 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
; SDAG-NEXT:    v_or_b32_sdwa v1, v2, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
; SDAG-NEXT:    v_or_b32_sdwa v4, v4, v5 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
; SDAG-NEXT:    v_or_b32_sdwa v3, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
; SDAG-NEXT:    buffer_store_dwordx2 v[3:4], off, s[16:19], 0
; SDAG-NEXT:    s_waitcnt vmcnt(0)
; SDAG-NEXT:    s_setpc_b64 s[30:31]
;
; GISEL-LABEL: store_v8i8:
; GISEL:       ; %bb.0:
; GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GISEL-NEXT:    v_mov_b32_e32 v9, 8
; GISEL-NEXT:    v_mov_b32_e32 v8, 0xff
; GISEL-NEXT:    v_lshlrev_b32_sdwa v1, v9, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0
; GISEL-NEXT:    v_and_or_b32 v0, v0, v8, v1
; GISEL-NEXT:    v_and_b32_e32 v1, 0xff, v2
; GISEL-NEXT:    v_and_b32_e32 v2, 0xff, v3
; GISEL-NEXT:    v_lshlrev_b32_e32 v1, 16, v1
; GISEL-NEXT:    v_lshlrev_b32_e32 v2, 24, v2
; GISEL-NEXT:    v_or3_b32 v0, v0, v1, v2
; GISEL-NEXT:    v_lshlrev_b32_sdwa v1, v9, v5 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0
; GISEL-NEXT:    v_and_b32_e32 v2, 0xff, v6
; GISEL-NEXT:    v_and_b32_e32 v3, 0xff, v7
; GISEL-NEXT:    v_and_or_b32 v1, v4, v8, v1
; GISEL-NEXT:    v_lshlrev_b32_e32 v2, 16, v2
; GISEL-NEXT:    v_lshlrev_b32_e32 v3, 24, v3
; GISEL-NEXT:    v_or3_b32 v1, v1, v2, v3
; GISEL-NEXT:    buffer_store_dwordx2 v[0:1], off, s[16:19], 0
; GISEL-NEXT:    s_waitcnt vmcnt(0)
; GISEL-NEXT:    s_setpc_b64 s[30:31]
  %p = addrspacecast ptr addrspace(8) %buf to ptr addrspace(7)
  store <8 x i8> %data, ptr addrspace(7) %p
  ret void
}

define <12 x i8> @load_v12i8(ptr addrspace(8) inreg %buf) {
; SDAG-LABEL: load_v12i8:
; SDAG:       ; %bb.0:
; SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; SDAG-NEXT:    buffer_load_dwordx3 v[0:2], off, s[16:19], 0
; SDAG-NEXT:    s_waitcnt vmcnt(0)
; SDAG-NEXT:    v_mov_b32_e32 v8, v2
; SDAG-NEXT:    v_lshrrev_b32_e32 v9, 8, v2
; SDAG-NEXT:    v_lshrrev_b64 v[3:4], 24, v[0:1]
; SDAG-NEXT:    v_lshrrev_b32_e32 v14, 8, v0
; SDAG-NEXT:    v_lshrrev_b32_e32 v13, 16, v0
; SDAG-NEXT:    v_lshrrev_b64 v[11:12], 24, v[8:9]
; SDAG-NEXT:    v_lshrrev_b32_e32 v5, 8, v1
; SDAG-NEXT:    v_lshrrev_b32_e32 v6, 16, v1
; SDAG-NEXT:    v_lshrrev_b32_e32 v7, 24, v1
; SDAG-NEXT:    v_lshrrev_b32_e32 v10, 16, v2
; SDAG-NEXT:    v_mov_b32_e32 v4, v1
; SDAG-NEXT:    v_mov_b32_e32 v1, v14
; SDAG-NEXT:    v_mov_b32_e32 v2, v13
; SDAG-NEXT:    s_setpc_b64 s[30:31]
;
; GISEL-LABEL: load_v12i8:
; GISEL:       ; %bb.0:
; GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GISEL-NEXT:    buffer_load_dwordx3 v[0:2], off, s[16:19], 0
; GISEL-NEXT:    s_waitcnt vmcnt(0)
; GISEL-NEXT:    v_lshrrev_b32_e32 v13, 8, v0
; GISEL-NEXT:    v_lshrrev_b32_e32 v12, 16, v0
; GISEL-NEXT:    v_lshrrev_b32_e32 v3, 24, v0
; GISEL-NEXT:    v_lshrrev_b32_e32 v5, 8, v1
; GISEL-NEXT:    v_lshrrev_b32_e32 v6, 16, v1
; GISEL-NEXT:    v_lshrrev_b32_e32 v7, 24, v1
; GISEL-NEXT:    v_lshrrev_b32_e32 v9, 8, v2
; GISEL-NEXT:    v_lshrrev_b32_e32 v10, 16, v2
; GISEL-NEXT:    v_lshrrev_b32_e32 v11, 24, v2
; GISEL-NEXT:    v_mov_b32_e32 v4, v1
; GISEL-NEXT:    v_mov_b32_e32 v8, v2
; GISEL-NEXT:    v_mov_b32_e32 v1, v13
; GISEL-NEXT:    v_mov_b32_e32 v2, v12
; GISEL-NEXT:    s_setpc_b64 s[30:31]
  %p = addrspacecast ptr addrspace(8) %buf to ptr addrspace(7)
  %ret = load <12 x i8>, ptr addrspace(7) %p
  ret <12 x i8> %ret
}

define void @store_v12i8(<12 x i8> %data, ptr addrspace(8) inreg %buf) {
; SDAG-LABEL: store_v12i8:
; SDAG:       ; %bb.0:
; SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; SDAG-NEXT:    v_lshlrev_b16_e32 v9, 8, v9
; SDAG-NEXT:    v_lshlrev_b16_e32 v5, 8, v5
; SDAG-NEXT:    v_lshlrev_b16_e32 v1, 8, v1
; SDAG-NEXT:    v_or_b32_sdwa v8, v8, v9 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
; SDAG-NEXT:    v_lshlrev_b16_e32 v9, 8, v11
; SDAG-NEXT:    v_or_b32_sdwa v4, v4, v5 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
; SDAG-NEXT:    v_lshlrev_b16_e32 v5, 8, v7
; SDAG-NEXT:    v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
; SDAG-NEXT:    v_lshlrev_b16_e32 v1, 8, v3
; SDAG-NEXT:    v_or_b32_sdwa v9, v10, v9 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
; SDAG-NEXT:    v_or_b32_sdwa v5, v6, v5 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
; SDAG-NEXT:    v_or_b32_sdwa v1, v2, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
; SDAG-NEXT:    v_or_b32_sdwa v8, v8, v9 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
; SDAG-NEXT:    v_or_b32_sdwa v7, v4, v5 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
; SDAG-NEXT:    v_or_b32_sdwa v6, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
; SDAG-NEXT:    buffer_store_dwordx3 v[6:8], off, s[16:19], 0
; SDAG-NEXT:    s_waitcnt vmcnt(0)
; SDAG-NEXT:    s_setpc_b64 s[30:31]
;
; GISEL-LABEL: store_v12i8:
; GISEL:       ; %bb.0:
; GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GISEL-NEXT:    v_mov_b32_e32 v13, 8
; GISEL-NEXT:    v_mov_b32_e32 v12, 0xff
; GISEL-NEXT:    v_lshlrev_b32_sdwa v1, v13, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0
; GISEL-NEXT:    v_and_or_b32 v0, v0, v12, v1
; GISEL-NEXT:    v_and_b32_e32 v1, 0xff, v2
; GISEL-NEXT:    v_and_b32_e32 v2, 0xff, v3
; GISEL-NEXT:    v_lshlrev_b32_e32 v1, 16, v1
; GISEL-NEXT:    v_lshlrev_b32_e32 v2, 24, v2
; GISEL-NEXT:    v_or3_b32 v0, v0, v1, v2
; GISEL-NEXT:    v_lshlrev_b32_sdwa v1, v13, v5 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0
; GISEL-NEXT:    v_and_b32_e32 v2, 0xff, v6
; GISEL-NEXT:    v_and_b32_e32 v3, 0xff, v7
; GISEL-NEXT:    v_and_or_b32 v1, v4, v12, v1
; GISEL-NEXT:    v_lshlrev_b32_e32 v2, 16, v2
; GISEL-NEXT:    v_lshlrev_b32_e32 v3, 24, v3
; GISEL-NEXT:    v_or3_b32 v1, v1, v2, v3
; GISEL-NEXT:    v_lshlrev_b32_sdwa v2, v13, v9 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0
; GISEL-NEXT:    v_and_b32_e32 v3, 0xff, v10
; GISEL-NEXT:    v_and_b32_e32 v4, 0xff, v11
; GISEL-NEXT:    v_and_or_b32 v2, v8, v12, v2
; GISEL-NEXT:    v_lshlrev_b32_e32 v3, 16, v3
; GISEL-NEXT:    v_lshlrev_b32_e32 v4, 24, v4
; GISEL-NEXT:    v_or3_b32 v2, v2, v3, v4
; GISEL-NEXT:    buffer_store_dwordx3 v[0:2], off, s[16:19], 0
; GISEL-NEXT:    s_waitcnt vmcnt(0)
; GISEL-NEXT:    s_setpc_b64 s[30:31]
  %p = addrspacecast ptr addrspace(8) %buf to ptr addrspace(7)
  store <12 x i8> %data, ptr addrspace(7) %p
  ret void
}

define <16 x i8> @load_v16i8(ptr addrspace(8) inreg %buf) {
; SDAG-LABEL: load_v16i8:
; SDAG:       ; %bb.0:
; SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; SDAG-NEXT:    buffer_load_dwordx4 v[0:3], off, s[16:19], 0
; SDAG-NEXT:    s_waitcnt vmcnt(0)
; SDAG-NEXT:    v_lshrrev_b64 v[18:19], 24, v[0:1]
; SDAG-NEXT:    v_lshrrev_b64 v[11:12], 24, v[2:3]
; SDAG-NEXT:    v_lshrrev_b32_e32 v17, 8, v0
; SDAG-NEXT:    v_lshrrev_b32_e32 v16, 16, v0
; SDAG-NEXT:    v_lshrrev_b32_e32 v5, 8, v1
; SDAG-NEXT:    v_lshrrev_b32_e32 v6, 16, v1
; SDAG-NEXT:    v_lshrrev_b32_e32 v7, 24, v1
; SDAG-NEXT:    v_lshrrev_b32_e32 v9, 8, v2
; SDAG-NEXT:    v_lshrrev_b32_e32 v10, 16, v2
; SDAG-NEXT:    v_lshrrev_b32_e32 v13, 8, v3
; SDAG-NEXT:    v_lshrrev_b32_e32 v14, 16, v3
; SDAG-NEXT:    v_lshrrev_b32_e32 v15, 24, v3
; SDAG-NEXT:    v_mov_b32_e32 v4, v1
; SDAG-NEXT:    v_mov_b32_e32 v8, v2
; SDAG-NEXT:    v_mov_b32_e32 v12, v3
; SDAG-NEXT:    v_mov_b32_e32 v1, v17
; SDAG-NEXT:    v_mov_b32_e32 v2, v16
; SDAG-NEXT:    v_mov_b32_e32 v3, v18
; SDAG-NEXT:    s_setpc_b64 s[30:31]
;
; GISEL-LABEL: load_v16i8:
; GISEL:       ; %bb.0:
; GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GISEL-NEXT:    buffer_load_dwordx4 v[0:3], off, s[16:19], 0
; GISEL-NEXT:    s_waitcnt vmcnt(0)
; GISEL-NEXT:    v_lshrrev_b32_e32 v16, 8, v0
; GISEL-NEXT:    v_lshrrev_b32_e32 v17, 16, v0
; GISEL-NEXT:    v_lshrrev_b32_e32 v18, 24, v0
; GISEL-NEXT:    v_lshrrev_b32_e32 v5, 8, v1
; GISEL-NEXT:    v_lshrrev_b32_e32 v6, 16, v1
; GISEL-NEXT:    v_lshrrev_b32_e32 v7, 24, v1
; GISEL-NEXT:    v_lshrrev_b32_e32 v9, 8, v2
; GISEL-NEXT:    v_lshrrev_b32_e32 v10, 16, v2
; GISEL-NEXT:    v_lshrrev_b32_e32 v11, 24, v2
; GISEL-NEXT:    v_lshrrev_b32_e32 v13, 8, v3
; GISEL-NEXT:    v_lshrrev_b32_e32 v14, 16, v3
; GISEL-NEXT:    v_lshrrev_b32_e32 v15, 24, v3
; GISEL-NEXT:    v_mov_b32_e32 v4, v1
; GISEL-NEXT:    v_mov_b32_e32 v8, v2
; GISEL-NEXT:    v_mov_b32_e32 v12, v3
; GISEL-NEXT:    v_mov_b32_e32 v1, v16
; GISEL-NEXT:    v_mov_b32_e32 v2, v17
; GISEL-NEXT:    v_mov_b32_e32 v3, v18
; GISEL-NEXT:    s_setpc_b64 s[30:31]
  %p = addrspacecast ptr addrspace(8) %buf to ptr addrspace(7)
  %ret = load <16 x i8>, ptr addrspace(7) %p
  ret <16 x i8> %ret
}

define void @store_v16i8(<16 x i8> %data, ptr addrspace(8) inreg %buf) {
; SDAG-LABEL: store_v16i8:
; SDAG:       ; %bb.0:
; SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; SDAG-NEXT:    v_lshlrev_b16_e32 v13, 8, v13
; SDAG-NEXT:    v_lshlrev_b16_e32 v9, 8, v9
; SDAG-NEXT:    v_lshlrev_b16_e32 v5, 8, v5
; SDAG-NEXT:    v_lshlrev_b16_e32 v1, 8, v1
; SDAG-NEXT:    v_or_b32_sdwa v12, v12, v13 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
; SDAG-NEXT:    v_lshlrev_b16_e32 v13, 8, v15
; SDAG-NEXT:    v_or_b32_sdwa v8, v8, v9 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
; SDAG-NEXT:    v_lshlrev_b16_e32 v9, 8, v11
; SDAG-NEXT:    v_or_b32_sdwa v4, v4, v5 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
; SDAG-NEXT:    v_lshlrev_b16_e32 v5, 8, v7
; SDAG-NEXT:    v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
; SDAG-NEXT:    v_lshlrev_b16_e32 v1, 8, v3
; SDAG-NEXT:    v_or_b32_sdwa v13, v14, v13 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
; SDAG-NEXT:    v_or_b32_sdwa v9, v10, v9 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
; SDAG-NEXT:    v_or_b32_sdwa v5, v6, v5 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
; SDAG-NEXT:    v_or_b32_sdwa v1, v2, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
; SDAG-NEXT:    v_or_b32_sdwa v12, v12, v13 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
; SDAG-NEXT:    v_or_b32_sdwa v11, v8, v9 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
; SDAG-NEXT:    v_or_b32_sdwa v10, v4, v5 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
; SDAG-NEXT:    v_or_b32_sdwa v9, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
; SDAG-NEXT:    buffer_store_dwordx4 v[9:12], off, s[16:19], 0
; SDAG-NEXT:    s_waitcnt vmcnt(0)
; SDAG-NEXT:    s_setpc_b64 s[30:31]
;
; GISEL-LABEL: store_v16i8:
; GISEL:       ; %bb.0:
; GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GISEL-NEXT:    v_mov_b32_e32 v17, 8
; GISEL-NEXT:    v_mov_b32_e32 v16, 0xff
; GISEL-NEXT:    v_lshlrev_b32_sdwa v1, v17, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0
; GISEL-NEXT:    v_and_or_b32 v0, v0, v16, v1
; GISEL-NEXT:    v_and_b32_e32 v1, 0xff, v2
; GISEL-NEXT:    v_and_b32_e32 v2, 0xff, v3
; GISEL-NEXT:    v_lshlrev_b32_e32 v1, 16, v1
; GISEL-NEXT:    v_lshlrev_b32_e32 v2, 24, v2
; GISEL-NEXT:    v_or3_b32 v0, v0, v1, v2
; GISEL-NEXT:    v_lshlrev_b32_sdwa v1, v17, v5 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0
; GISEL-NEXT:    v_and_b32_e32 v2, 0xff, v6
; GISEL-NEXT:    v_and_b32_e32 v3, 0xff, v7
; GISEL-NEXT:    v_and_or_b32 v1, v4, v16, v1
; GISEL-NEXT:    v_lshlrev_b32_e32 v2, 16, v2
; GISEL-NEXT:    v_lshlrev_b32_e32 v3, 24, v3
; GISEL-NEXT:    v_or3_b32 v1, v1, v2, v3
; GISEL-NEXT:    v_lshlrev_b32_sdwa v2, v17, v9 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0
; GISEL-NEXT:    v_and_b32_e32 v3, 0xff, v10
; GISEL-NEXT:    v_and_b32_e32 v4, 0xff, v11
; GISEL-NEXT:    v_and_or_b32 v2, v8, v16, v2
; GISEL-NEXT:    v_lshlrev_b32_e32 v3, 16, v3
; GISEL-NEXT:    v_lshlrev_b32_e32 v4, 24, v4
; GISEL-NEXT:    v_or3_b32 v2, v2, v3, v4
; GISEL-NEXT:    v_lshlrev_b32_sdwa v3, v17, v13 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0
; GISEL-NEXT:    v_and_b32_e32 v4, 0xff, v14
; GISEL-NEXT:    v_and_b32_e32 v5, 0xff, v15
; GISEL-NEXT:    v_and_or_b32 v3, v12, v16, v3
; GISEL-NEXT:    v_lshlrev_b32_e32 v4, 16, v4
; GISEL-NEXT:    v_lshlrev_b32_e32 v5, 24, v5
; GISEL-NEXT:    v_or3_b32 v3, v3, v4, v5
; GISEL-NEXT:    buffer_store_dwordx4 v[0:3], off, s[16:19], 0
; GISEL-NEXT:    s_waitcnt vmcnt(0)
; GISEL-NEXT:    s_setpc_b64 s[30:31]
  %p = addrspacecast ptr addrspace(8) %buf to ptr addrspace(7)
  store <16 x i8> %data, ptr addrspace(7) %p
  ret void
}

define <32 x i8> @load_v32i8(ptr addrspace(8) inreg %buf) {
; SDAG-LABEL: load_v32i8:
; SDAG:       ; %bb.0:
; SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; SDAG-NEXT:    buffer_load_dwordx4 v[36:39], off, s[16:19], 0
; SDAG-NEXT:    buffer_load_dwordx4 v[32:35], off, s[16:19], 0 offset:16
; SDAG-NEXT:    s_waitcnt vmcnt(1)
; SDAG-NEXT:    v_lshrrev_b64 v[3:4], 24, v[36:37]
; SDAG-NEXT:    v_lshrrev_b64 v[11:12], 24, v[38:39]
; SDAG-NEXT:    s_waitcnt vmcnt(0)
; SDAG-NEXT:    v_lshrrev_b64 v[19:20], 24, v[32:33]
; SDAG-NEXT:    v_lshrrev_b64 v[27:28], 24, v[34:35]
; SDAG-NEXT:    v_lshrrev_b32_e32 v1, 8, v36
; SDAG-NEXT:    v_lshrrev_b32_e32 v2, 16, v36
; SDAG-NEXT:    v_lshrrev_b32_e32 v5, 8, v37
; SDAG-NEXT:    v_lshrrev_b32_e32 v6, 16, v37
; SDAG-NEXT:    v_lshrrev_b32_e32 v7, 24, v37
; SDAG-NEXT:    v_lshrrev_b32_e32 v9, 8, v38
; SDAG-NEXT:    v_lshrrev_b32_e32 v10, 16, v38
; SDAG-NEXT:    v_lshrrev_b32_e32 v13, 8, v39
; SDAG-NEXT:    v_lshrrev_b32_e32 v14, 16, v39
; SDAG-NEXT:    v_lshrrev_b32_e32 v15, 24, v39
; SDAG-NEXT:    v_lshrrev_b32_e32 v17, 8, v32
; SDAG-NEXT:    v_lshrrev_b32_e32 v18, 16, v32
; SDAG-NEXT:    v_lshrrev_b32_e32 v21, 8, v33
; SDAG-NEXT:    v_lshrrev_b32_e32 v22, 16, v33
; SDAG-NEXT:    v_lshrrev_b32_e32 v23, 24, v33
; SDAG-NEXT:    v_lshrrev_b32_e32 v25, 8, v34
; SDAG-NEXT:    v_lshrrev_b32_e32 v26, 16, v34
; SDAG-NEXT:    v_lshrrev_b32_e32 v29, 8, v35
; SDAG-NEXT:    v_lshrrev_b32_e32 v30, 16, v35
; SDAG-NEXT:    v_lshrrev_b32_e32 v31, 24, v35
; SDAG-NEXT:    v_mov_b32_e32 v0, v36
; SDAG-NEXT:    v_mov_b32_e32 v4, v37
; SDAG-NEXT:    v_mov_b32_e32 v8, v38
; SDAG-NEXT:    v_mov_b32_e32 v12, v39
; SDAG-NEXT:    v_mov_b32_e32 v16, v32
; SDAG-NEXT:    v_mov_b32_e32 v20, v33
; SDAG-NEXT:    v_mov_b32_e32 v24, v34
; SDAG-NEXT:    v_mov_b32_e32 v28, v35
; SDAG-NEXT:    s_setpc_b64 s[30:31]
;
; GISEL-LABEL: load_v32i8:
; GISEL:       ; %bb.0:
; GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GISEL-NEXT:    buffer_load_dwordx4 v[0:3], off, s[16:19], 0
; GISEL-NEXT:    buffer_load_dwordx4 v[16:19], off, s[16:19], 0 offset:16
; GISEL-NEXT:    s_waitcnt vmcnt(1)
; GISEL-NEXT:    v_lshrrev_b32_e32 v35, 8, v0
; GISEL-NEXT:    v_lshrrev_b32_e32 v36, 16, v0
; GISEL-NEXT:    v_lshrrev_b32_e32 v37, 24, v0
; GISEL-NEXT:    s_waitcnt vmcnt(0)
; GISEL-NEXT:    v_lshrrev_b32_e32 v32, 8, v16
; GISEL-NEXT:    v_lshrrev_b32_e32 v33, 16, v16
; GISEL-NEXT:    v_lshrrev_b32_e32 v34, 24, v16
; GISEL-NEXT:    v_lshrrev_b32_e32 v5, 8, v1
; GISEL-NEXT:    v_lshrrev_b32_e32 v6, 16, v1
; GISEL-NEXT:    v_lshrrev_b32_e32 v7, 24, v1
; GISEL-NEXT:    v_lshrrev_b32_e32 v9, 8, v2
; GISEL-NEXT:    v_lshrrev_b32_e32 v10, 16, v2
; GISEL-NEXT:    v_lshrrev_b32_e32 v11, 24, v2
; GISEL-NEXT:    v_lshrrev_b32_e32 v13, 8, v3
; GISEL-NEXT:    v_lshrrev_b32_e32 v14, 16, v3
; GISEL-NEXT:    v_lshrrev_b32_e32 v15, 24, v3
; GISEL-NEXT:    v_lshrrev_b32_e32 v21, 8, v17
; GISEL-NEXT:    v_lshrrev_b32_e32 v22, 16, v17
; GISEL-NEXT:    v_lshrrev_b32_e32 v23, 24, v17
; GISEL-NEXT:    v_lshrrev_b32_e32 v25, 8, v18
; GISEL-NEXT:    v_lshrrev_b32_e32 v26, 16, v18
; GISEL-NEXT:    v_lshrrev_b32_e32 v27, 24, v18
; GISEL-NEXT:    v_lshrrev_b32_e32 v29, 8, v19
; GISEL-NEXT:    v_lshrrev_b32_e32 v30, 16, v19
; GISEL-NEXT:    v_lshrrev_b32_e32 v31, 24, v19
; GISEL-NEXT:    v_mov_b32_e32 v4, v1
; GISEL-NEXT:    v_mov_b32_e32 v8, v2
; GISEL-NEXT:    v_mov_b32_e32 v12, v3
; GISEL-NEXT:    v_mov_b32_e32 v20, v17
; GISEL-NEXT:    v_mov_b32_e32 v24, v18
; GISEL-NEXT:    v_mov_b32_e32 v28, v19
; GISEL-NEXT:    v_mov_b32_e32 v1, v35
; GISEL-NEXT:    v_mov_b32_e32 v2, v36
; GISEL-NEXT:    v_mov_b32_e32 v3, v37
; GISEL-NEXT:    v_mov_b32_e32 v17, v32
; GISEL-NEXT:    v_mov_b32_e32 v18, v33
; GISEL-NEXT:    v_mov_b32_e32 v19, v34
; GISEL-NEXT:    s_setpc_b64 s[30:31]
  %p = addrspacecast ptr addrspace(8) %buf to ptr addrspace(7)
  %ret = load <32 x i8>, ptr addrspace(7) %p
  ret <32 x i8> %ret
}

define void @store_v32i8(<32 x i8> %data, ptr addrspace(8) inreg %buf) {
; SDAG-LABEL: store_v32i8:
; SDAG:       ; %bb.0:
; SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; SDAG-NEXT:    v_lshlrev_b16_e32 v9, 8, v9
; SDAG-NEXT:    v_or_b32_sdwa v8, v8, v9 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
; SDAG-NEXT:    v_lshlrev_b16_e32 v9, 8, v11
; SDAG-NEXT:    v_or_b32_sdwa v9, v10, v9 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
; SDAG-NEXT:    buffer_load_ubyte v10, off, s[0:3], s32
; SDAG-NEXT:    v_lshlrev_b16_e32 v13, 8, v13
; SDAG-NEXT:    v_or_b32_sdwa v12, v12, v13 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
; SDAG-NEXT:    v_lshlrev_b16_e32 v13, 8, v15
; SDAG-NEXT:    v_lshlrev_b16_e32 v5, 8, v5
; SDAG-NEXT:    v_lshlrev_b16_e32 v7, 8, v7
; SDAG-NEXT:    v_lshlrev_b16_e32 v1, 8, v1
; SDAG-NEXT:    v_lshlrev_b16_e32 v3, 8, v3
; SDAG-NEXT:    v_or_b32_sdwa v13, v14, v13 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
; SDAG-NEXT:    v_or_b32_sdwa v4, v4, v5 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
; SDAG-NEXT:    v_or_b32_sdwa v5, v6, v7 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
; SDAG-NEXT:    v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
; SDAG-NEXT:    v_or_b32_sdwa v6, v2, v3 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
; SDAG-NEXT:    v_or_b32_sdwa v3, v12, v13 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
; SDAG-NEXT:    v_or_b32_sdwa v2, v8, v9 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
; SDAG-NEXT:    v_or_b32_sdwa v1, v4, v5 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
; SDAG-NEXT:    v_or_b32_sdwa v0, v0, v6 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
; SDAG-NEXT:    v_lshlrev_b16_e32 v11, 8, v29
; SDAG-NEXT:    v_lshlrev_b16_e32 v14, 8, v25
; SDAG-NEXT:    v_lshlrev_b16_e32 v15, 8, v27
; SDAG-NEXT:    v_lshlrev_b16_e32 v21, 8, v21
; SDAG-NEXT:    v_lshlrev_b16_e32 v23, 8, v23
; SDAG-NEXT:    v_lshlrev_b16_e32 v17, 8, v17
; SDAG-NEXT:    v_lshlrev_b16_e32 v19, 8, v19
; SDAG-NEXT:    buffer_store_dwordx4 v[0:3], off, s[16:19], 0
; SDAG-NEXT:    v_or_b32_sdwa v7, v28, v11 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
; SDAG-NEXT:    v_or_b32_sdwa v11, v24, v14 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
; SDAG-NEXT:    v_or_b32_sdwa v14, v26, v15 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
; SDAG-NEXT:    v_or_b32_sdwa v15, v20, v21 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
; SDAG-NEXT:    v_or_b32_sdwa v20, v22, v23 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
; SDAG-NEXT:    v_or_b32_sdwa v16, v16, v17 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
; SDAG-NEXT:    v_or_b32_sdwa v17, v18, v19 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
; SDAG-NEXT:    v_or_b32_sdwa v5, v11, v14 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
; SDAG-NEXT:    v_or_b32_sdwa v4, v15, v20 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
; SDAG-NEXT:    v_or_b32_sdwa v3, v16, v17 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
; SDAG-NEXT:    s_waitcnt vmcnt(1)
; SDAG-NEXT:    v_lshlrev_b16_e32 v0, 8, v10
; SDAG-NEXT:    v_or_b32_sdwa v0, v30, v0 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
; SDAG-NEXT:    v_or_b32_sdwa v6, v7, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
; SDAG-NEXT:    buffer_store_dwordx4 v[3:6], off, s[16:19], 0 offset:16
; SDAG-NEXT:    s_waitcnt vmcnt(0)
; SDAG-NEXT:    s_setpc_b64 s[30:31]
;
; GISEL-LABEL: store_v32i8:
; GISEL:       ; %bb.0:
; GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GISEL-NEXT:    v_mov_b32_e32 v31, 8
; GISEL-NEXT:    v_lshlrev_b32_sdwa v1, v31, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0
; GISEL-NEXT:    v_mov_b32_e32 v32, 0xff
; GISEL-NEXT:    v_and_or_b32 v0, v0, v32, v1
; GISEL-NEXT:    v_lshlrev_b32_sdwa v1, v31, v5 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0
; GISEL-NEXT:    v_and_b32_e32 v5, 0xff, v7
; GISEL-NEXT:    buffer_load_ubyte v7, off, s[0:3], s32
; GISEL-NEXT:    v_and_or_b32 v1, v4, v32, v1
; GISEL-NEXT:    v_and_b32_e32 v2, 0xff, v2
; GISEL-NEXT:    v_and_b32_e32 v3, 0xff, v3
; GISEL-NEXT:    v_and_b32_e32 v4, 0xff, v6
; GISEL-NEXT:    v_lshlrev_b32_e32 v2, 16, v2
; GISEL-NEXT:    v_lshlrev_b32_e32 v3, 24, v3
; GISEL-NEXT:    v_lshlrev_b32_e32 v4, 16, v4
; GISEL-NEXT:    v_lshlrev_b32_e32 v5, 24, v5
; GISEL-NEXT:    v_or3_b32 v0, v0, v2, v3
; GISEL-NEXT:    v_or3_b32 v1, v1, v4, v5
; GISEL-NEXT:    v_lshlrev_b32_sdwa v2, v31, v9 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0
; GISEL-NEXT:    v_and_b32_e32 v3, 0xff, v10
; GISEL-NEXT:    v_and_b32_e32 v4, 0xff, v11
; GISEL-NEXT:    v_and_or_b32 v2, v8, v32, v2
; GISEL-NEXT:    v_lshlrev_b32_e32 v3, 16, v3
; GISEL-NEXT:    v_lshlrev_b32_e32 v4, 24, v4
; GISEL-NEXT:    v_or3_b32 v2, v2, v3, v4
; GISEL-NEXT:    v_lshlrev_b32_sdwa v3, v31, v13 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0
; GISEL-NEXT:    v_and_b32_e32 v4, 0xff, v14
; GISEL-NEXT:    v_and_b32_e32 v5, 0xff, v15
; GISEL-NEXT:    v_and_or_b32 v3, v12, v32, v3
; GISEL-NEXT:    v_lshlrev_b32_e32 v4, 16, v4
; GISEL-NEXT:    v_lshlrev_b32_e32 v5, 24, v5
; GISEL-NEXT:    v_or3_b32 v3, v3, v4, v5
; GISEL-NEXT:    v_lshlrev_b32_sdwa v4, v31, v17 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0
; GISEL-NEXT:    v_and_b32_e32 v5, 0xff, v18
; GISEL-NEXT:    v_and_b32_e32 v6, 0xff, v19
; GISEL-NEXT:    v_and_or_b32 v4, v16, v32, v4
; GISEL-NEXT:    v_lshlrev_b32_e32 v5, 16, v5
; GISEL-NEXT:    v_lshlrev_b32_e32 v6, 24, v6
; GISEL-NEXT:    v_lshlrev_b32_sdwa v8, v31, v21 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0
; GISEL-NEXT:    v_or3_b32 v4, v4, v5, v6
; GISEL-NEXT:    v_and_b32_e32 v5, 0xff, v22
; GISEL-NEXT:    v_and_b32_e32 v6, 0xff, v23
; GISEL-NEXT:    v_and_or_b32 v8, v20, v32, v8
; GISEL-NEXT:    v_lshlrev_b32_e32 v5, 16, v5
; GISEL-NEXT:    v_lshlrev_b32_e32 v6, 24, v6
; GISEL-NEXT:    v_or3_b32 v5, v8, v5, v6
; GISEL-NEXT:    v_lshlrev_b32_sdwa v6, v31, v25 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0
; GISEL-NEXT:    v_and_b32_e32 v8, 0xff, v26
; GISEL-NEXT:    v_and_b32_e32 v9, 0xff, v27
; GISEL-NEXT:    v_and_or_b32 v6, v24, v32, v6
; GISEL-NEXT:    v_lshlrev_b32_e32 v8, 16, v8
; GISEL-NEXT:    v_lshlrev_b32_e32 v9, 24, v9
; GISEL-NEXT:    v_or3_b32 v6, v6, v8, v9
; GISEL-NEXT:    v_lshlrev_b32_sdwa v8, v31, v29 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0
; GISEL-NEXT:    v_and_b32_e32 v9, 0xff, v30
; GISEL-NEXT:    v_and_or_b32 v8, v28, v32, v8
; GISEL-NEXT:    v_lshlrev_b32_e32 v9, 16, v9
; GISEL-NEXT:    s_waitcnt vmcnt(0)
; GISEL-NEXT:    v_lshlrev_b32_e32 v7, 24, v7
; GISEL-NEXT:    v_or3_b32 v7, v8, v9, v7
; GISEL-NEXT:    buffer_store_dwordx4 v[0:3], off, s[16:19], 0
; GISEL-NEXT:    buffer_store_dwordx4 v[4:7], off, s[16:19], 0 offset:16
; GISEL-NEXT:    s_waitcnt vmcnt(0)
; GISEL-NEXT:    s_setpc_b64 s[30:31]
  %p = addrspacecast ptr addrspace(8) %buf to ptr addrspace(7)
  store <32 x i8> %data, ptr addrspace(7) %p
  ret void
}

;;; Arrays. Need to become vectors.

define [1 x i32] @load_a1i32(ptr addrspace(8) inreg %buf) {
; SDAG-LABEL: load_a1i32:
; SDAG:       ; %bb.0:
; SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; SDAG-NEXT:    buffer_load_dword v0, off, s[16:19], 0
; SDAG-NEXT:    s_waitcnt vmcnt(0)
; SDAG-NEXT:    s_setpc_b64 s[30:31]
;
; GISEL-LABEL: load_a1i32:
; GISEL:       ; %bb.0:
; GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GISEL-NEXT:    buffer_load_dword v0, off, s[16:19], 0
; GISEL-NEXT:    s_waitcnt vmcnt(0)
; GISEL-NEXT:    s_setpc_b64 s[30:31]
  %p = addrspacecast ptr addrspace(8) %buf to ptr addrspace(7)
  %ret = load [1 x i32], ptr addrspace(7) %p
  ret [1 x i32] %ret
}

define void @store_a1i32([1 x i32] %data, ptr addrspace(8) inreg %buf) {
; SDAG-LABEL: store_a1i32:
; SDAG:       ; %bb.0:
; SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; SDAG-NEXT:    buffer_store_dword v0, off, s[16:19], 0
; SDAG-NEXT:    s_waitcnt vmcnt(0)
; SDAG-NEXT:    s_setpc_b64 s[30:31]
;
; GISEL-LABEL: store_a1i32:
; GISEL:       ; %bb.0:
; GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GISEL-NEXT:    buffer_store_dword v0, off, s[16:19], 0
; GISEL-NEXT:    s_waitcnt vmcnt(0)
; GISEL-NEXT:    s_setpc_b64 s[30:31]
  %p = addrspacecast ptr addrspace(8) %buf to ptr addrspace(7)
  store [1 x i32] %data, ptr addrspace(7) %p
  ret void
}

define [2 x i32] @load_a2i32(ptr addrspace(8) inreg %buf) {
; SDAG-LABEL: load_a2i32:
; SDAG:       ; %bb.0:
; SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; SDAG-NEXT:    buffer_load_dwordx2 v[0:1], off, s[16:19], 0
; SDAG-NEXT:    s_waitcnt vmcnt(0)
; SDAG-NEXT:    s_setpc_b64 s[30:31]
;
; GISEL-LABEL: load_a2i32:
; GISEL:       ; %bb.0:
; GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GISEL-NEXT:    buffer_load_dwordx2 v[0:1], off, s[16:19], 0
; GISEL-NEXT:    s_waitcnt vmcnt(0)
; GISEL-NEXT:    s_setpc_b64 s[30:31]
  %p = addrspacecast ptr addrspace(8) %buf to ptr addrspace(7)
  %ret = load [2 x i32], ptr addrspace(7) %p
  ret [2 x i32] %ret
}

define void @store_a2i32([2 x i32] %data, ptr addrspace(8) inreg %buf) {
; SDAG-LABEL: store_a2i32:
; SDAG:       ; %bb.0:
; SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; SDAG-NEXT:    buffer_store_dwordx2 v[0:1], off, s[16:19], 0
; SDAG-NEXT:    s_waitcnt vmcnt(0)
; SDAG-NEXT:    s_setpc_b64 s[30:31]
;
; GISEL-LABEL: store_a2i32:
; GISEL:       ; %bb.0:
; GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GISEL-NEXT:    buffer_store_dwordx2 v[0:1], off, s[16:19], 0
; GISEL-NEXT:    s_waitcnt vmcnt(0)
; GISEL-NEXT:    s_setpc_b64 s[30:31]
  %p = addrspacecast ptr addrspace(8) %buf to ptr addrspace(7)
  store [2 x i32] %data, ptr addrspace(7) %p
  ret void
}

define [2 x half] @load_a2f16(ptr addrspace(8) inreg %buf) {
; SDAG-LABEL: load_a2f16:
; SDAG:       ; %bb.0:
; SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; SDAG-NEXT:    buffer_load_dword v0, off, s[16:19], 0
; SDAG-NEXT:    s_waitcnt vmcnt(0)
; SDAG-NEXT:    v_lshrrev_b32_e32 v1, 16, v0
; SDAG-NEXT:    s_setpc_b64 s[30:31]
;
; GISEL-LABEL: load_a2f16:
; GISEL:       ; %bb.0:
; GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GISEL-NEXT:    buffer_load_dword v0, off, s[16:19], 0
; GISEL-NEXT:    s_waitcnt vmcnt(0)
; GISEL-NEXT:    v_lshrrev_b32_e32 v1, 16, v0
; GISEL-NEXT:    s_setpc_b64 s[30:31]
  %p = addrspacecast ptr addrspace(8) %buf to ptr addrspace(7)
  %ret = load [2 x half], ptr addrspace(7) %p
  ret [2 x half] %ret
}

define void @store_a2f16([2 x half] %data, ptr addrspace(8) inreg %buf) {
; SDAG-LABEL: store_a2f16:
; SDAG:       ; %bb.0:
; SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; SDAG-NEXT:    s_mov_b32 s4, 0x5040100
; SDAG-NEXT:    v_perm_b32 v0, v1, v0, s4
; SDAG-NEXT:    buffer_store_dword v0, off, s[16:19], 0
; SDAG-NEXT:    s_waitcnt vmcnt(0)
; SDAG-NEXT:    s_setpc_b64 s[30:31]
;
; GISEL-LABEL: store_a2f16:
; GISEL:       ; %bb.0:
; GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GISEL-NEXT:    v_and_b32_e32 v0, 0xffff, v0
; GISEL-NEXT:    v_lshl_or_b32 v0, v1, 16, v0
; GISEL-NEXT:    buffer_store_dword v0, off, s[16:19], 0
; GISEL-NEXT:    s_waitcnt vmcnt(0)
; GISEL-NEXT:    s_setpc_b64 s[30:31]
  %p = addrspacecast ptr addrspace(8) %buf to ptr addrspace(7)
  store [2 x half] %data, ptr addrspace(7) %p
  ret void
}

define [2 x ptr addrspace(1)] @load_a2p1(ptr addrspace(8) inreg %buf) {
; SDAG-LABEL: load_a2p1:
; SDAG:       ; %bb.0:
; SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; SDAG-NEXT:    buffer_load_dwordx4 v[0:3], off, s[16:19], 0
; SDAG-NEXT:    s_waitcnt vmcnt(0)
; SDAG-NEXT:    s_setpc_b64 s[30:31]
;
; GISEL-LABEL: load_a2p1:
; GISEL:       ; %bb.0:
; GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GISEL-NEXT:    buffer_load_dwordx4 v[0:3], off, s[16:19], 0
; GISEL-NEXT:    s_waitcnt vmcnt(0)
; GISEL-NEXT:    s_setpc_b64 s[30:31]
  %p = addrspacecast ptr addrspace(8) %buf to ptr addrspace(7)
  %ret = load [2 x ptr addrspace(1)], ptr addrspace(7) %p
  ret [2 x ptr addrspace(1)] %ret
}

define void @store_a2p1([2 x ptr addrspace(1)] %data, ptr addrspace(8) inreg %buf) {
; SDAG-LABEL: store_a2p1:
; SDAG:       ; %bb.0:
; SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; SDAG-NEXT:    buffer_store_dwordx4 v[0:3], off, s[16:19], 0
; SDAG-NEXT:    s_waitcnt vmcnt(0)
; SDAG-NEXT:    s_setpc_b64 s[30:31]
;
; GISEL-LABEL: store_a2p1:
; GISEL:       ; %bb.0:
; GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GISEL-NEXT:    buffer_store_dwordx4 v[0:3], off, s[16:19], 0
; GISEL-NEXT:    s_waitcnt vmcnt(0)
; GISEL-NEXT:    s_setpc_b64 s[30:31]
  %p = addrspacecast ptr addrspace(8) %buf to ptr addrspace(7)
  store [2 x ptr addrspace(1)] %data, ptr addrspace(7) %p
  ret void
}

;;; Scalars of atypical width. Need to be cast to vectors and split.

define i40 @load_i40(ptr addrspace(8) inreg %buf) {
; SDAG-LABEL: load_i40:
; SDAG:       ; %bb.0:
; SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; SDAG-NEXT:    buffer_load_dword v0, off, s[16:19], 0
; SDAG-NEXT:    buffer_load_ubyte v1, off, s[16:19], 0 offset:4
; SDAG-NEXT:    s_waitcnt vmcnt(0)
; SDAG-NEXT:    s_setpc_b64 s[30:31]
;
; GISEL-LABEL: load_i40:
; GISEL:       ; %bb.0:
; GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GISEL-NEXT:    buffer_load_dword v0, off, s[16:19], 0
; GISEL-NEXT:    buffer_load_ubyte v1, off, s[16:19], 0 offset:4
; GISEL-NEXT:    v_mov_b32_e32 v2, 0xff
; GISEL-NEXT:    s_waitcnt vmcnt(1)
; GISEL-NEXT:    v_lshrrev_b32_e32 v3, 8, v0
; GISEL-NEXT:    v_lshrrev_b32_e32 v4, 24, v0
; GISEL-NEXT:    v_and_b32_e32 v3, 0xff, v3
; GISEL-NEXT:    v_and_b32_sdwa v2, v0, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
; GISEL-NEXT:    v_lshlrev_b16_e32 v4, 8, v4
; GISEL-NEXT:    v_lshlrev_b16_e32 v3, 8, v3
; GISEL-NEXT:    v_or_b32_e32 v2, v2, v4
; GISEL-NEXT:    v_or_b32_sdwa v0, v0, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
; GISEL-NEXT:    v_and_b32_e32 v2, 0xffff, v2
; GISEL-NEXT:    v_and_b32_e32 v0, 0xffff, v0
; GISEL-NEXT:    v_lshl_or_b32 v0, v2, 16, v0
; GISEL-NEXT:    s_waitcnt vmcnt(0)
; GISEL-NEXT:    s_setpc_b64 s[30:31]
  %p = addrspacecast ptr addrspace(8) %buf to ptr addrspace(7)
  %ret = load i40, ptr addrspace(7) %p
  ret i40 %ret
}

define void @store_i40(i40 %data, ptr addrspace(8) inreg %buf) {
; SDAG-LABEL: store_i40:
; SDAG:       ; %bb.0:
; SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; SDAG-NEXT:    buffer_store_dword v0, off, s[16:19], 0
; SDAG-NEXT:    buffer_store_byte v1, off, s[16:19], 0 offset:4
; SDAG-NEXT:    s_waitcnt vmcnt(0)
; SDAG-NEXT:    s_setpc_b64 s[30:31]
;
; GISEL-LABEL: store_i40:
; GISEL:       ; %bb.0:
; GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GISEL-NEXT:    buffer_store_dword v0, off, s[16:19], 0
; GISEL-NEXT:    buffer_store_byte v1, off, s[16:19], 0 offset:4
; GISEL-NEXT:    s_waitcnt vmcnt(0)
; GISEL-NEXT:    s_setpc_b64 s[30:31]
  %p = addrspacecast ptr addrspace(8) %buf to ptr addrspace(7)
  store i40 %data, ptr addrspace(7) %p
  ret void
}

define i96 @load_i96(ptr addrspace(8) inreg %buf) {
; SDAG-LABEL: load_i96:
; SDAG:       ; %bb.0:
; SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; SDAG-NEXT:    buffer_load_dwordx3 v[0:2], off, s[16:19], 0
; SDAG-NEXT:    s_waitcnt vmcnt(0)
; SDAG-NEXT:    s_setpc_b64 s[30:31]
;
; GISEL-LABEL: load_i96:
; GISEL:       ; %bb.0:
; GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GISEL-NEXT:    buffer_load_dwordx3 v[0:2], off, s[16:19], 0
; GISEL-NEXT:    s_waitcnt vmcnt(0)
; GISEL-NEXT:    s_setpc_b64 s[30:31]
  %p = addrspacecast ptr addrspace(8) %buf to ptr addrspace(7)
  %ret = load i96, ptr addrspace(7) %p
  ret i96 %ret
}

define void @store_i96(i96 %data, ptr addrspace(8) inreg %buf) {
; SDAG-LABEL: store_i96:
; SDAG:       ; %bb.0:
; SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; SDAG-NEXT:    buffer_store_dwordx3 v[0:2], off, s[16:19], 0
; SDAG-NEXT:    s_waitcnt vmcnt(0)
; SDAG-NEXT:    s_setpc_b64 s[30:31]
;
; GISEL-LABEL: store_i96:
; GISEL:       ; %bb.0:
; GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GISEL-NEXT:    buffer_store_dwordx3 v[0:2], off, s[16:19], 0
; GISEL-NEXT:    s_waitcnt vmcnt(0)
; GISEL-NEXT:    s_setpc_b64 s[30:31]
  %p = addrspacecast ptr addrspace(8) %buf to ptr addrspace(7)
  store i96 %data, ptr addrspace(7) %p
  ret void
}

define i160 @load_i160(ptr addrspace(8) inreg %buf) {
; SDAG-LABEL: load_i160:
; SDAG:       ; %bb.0:
; SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; SDAG-NEXT:    buffer_load_dwordx4 v[0:3], off, s[16:19], 0
; SDAG-NEXT:    buffer_load_dword v4, off, s[16:19], 0 offset:16
; SDAG-NEXT:    s_waitcnt vmcnt(0)
; SDAG-NEXT:    s_setpc_b64 s[30:31]
;
; GISEL-LABEL: load_i160:
; GISEL:       ; %bb.0:
; GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GISEL-NEXT:    buffer_load_dwordx4 v[0:3], off, s[16:19], 0
; GISEL-NEXT:    buffer_load_dword v4, off, s[16:19], 0 offset:16
; GISEL-NEXT:    s_waitcnt vmcnt(0)
; GISEL-NEXT:    s_setpc_b64 s[30:31]
  %p = addrspacecast ptr addrspace(8) %buf to ptr addrspace(7)
  %ret = load i160, ptr addrspace(7) %p
  ret i160 %ret
}

define void @store_i160(i160 %data, ptr addrspace(8) inreg %buf) {
; SDAG-LABEL: store_i160:
; SDAG:       ; %bb.0:
; SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; SDAG-NEXT:    buffer_store_dwordx4 v[0:3], off, s[16:19], 0
; SDAG-NEXT:    buffer_store_dword v4, off, s[16:19], 0 offset:16
; SDAG-NEXT:    s_waitcnt vmcnt(0)
; SDAG-NEXT:    s_setpc_b64 s[30:31]
;
; GISEL-LABEL: store_i160:
; GISEL:       ; %bb.0:
; GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GISEL-NEXT:    buffer_store_dwordx4 v[0:3], off, s[16:19], 0
; GISEL-NEXT:    buffer_store_dword v4, off, s[16:19], 0 offset:16
; GISEL-NEXT:    s_waitcnt vmcnt(0)
; GISEL-NEXT:    s_setpc_b64 s[30:31]
  %p = addrspacecast ptr addrspace(8) %buf to ptr addrspace(7)
  store i160 %data, ptr addrspace(7) %p
  ret void
}

define i256 @load_i256(ptr addrspace(8) inreg %buf) {
; SDAG-LABEL: load_i256:
; SDAG:       ; %bb.0:
; SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; SDAG-NEXT:    buffer_load_dwordx4 v[0:3], off, s[16:19], 0
; SDAG-NEXT:    buffer_load_dwordx4 v[4:7], off, s[16:19], 0 offset:16
; SDAG-NEXT:    s_waitcnt vmcnt(0)
; SDAG-NEXT:    s_setpc_b64 s[30:31]
;
; GISEL-LABEL: load_i256:
; GISEL:       ; %bb.0:
; GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GISEL-NEXT:    buffer_load_dwordx4 v[0:3], off, s[16:19], 0
; GISEL-NEXT:    buffer_load_dwordx4 v[4:7], off, s[16:19], 0 offset:16
; GISEL-NEXT:    s_waitcnt vmcnt(0)
; GISEL-NEXT:    s_setpc_b64 s[30:31]
  %p = addrspacecast ptr addrspace(8) %buf to ptr addrspace(7)
  %ret = load i256, ptr addrspace(7) %p
  ret i256 %ret
}

define void @store_i256(i256 %data, ptr addrspace(8) inreg %buf) {
; SDAG-LABEL: store_i256:
; SDAG:       ; %bb.0:
; SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; SDAG-NEXT:    buffer_store_dwordx4 v[0:3], off, s[16:19], 0
; SDAG-NEXT:    buffer_store_dwordx4 v[4:7], off, s[16:19], 0 offset:16
; SDAG-NEXT:    s_waitcnt vmcnt(0)
; SDAG-NEXT:    s_setpc_b64 s[30:31]
;
; GISEL-LABEL: store_i256:
; GISEL:       ; %bb.0:
; GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GISEL-NEXT:    buffer_store_dwordx4 v[0:3], off, s[16:19], 0
; GISEL-NEXT:    buffer_store_dwordx4 v[4:7], off, s[16:19], 0 offset:16
; GISEL-NEXT:    s_waitcnt vmcnt(0)
; GISEL-NEXT:    s_setpc_b64 s[30:31]
  %p = addrspacecast ptr addrspace(8) %buf to ptr addrspace(7)
  store i256 %data, ptr addrspace(7) %p
  ret void
}

;;; Non-byte-sized scalars. Require zero-extension.

define i7 @load_i7(ptr addrspace(8) inreg %buf) {
; SDAG-LABEL: load_i7:
; SDAG:       ; %bb.0:
; SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; SDAG-NEXT:    buffer_load_ubyte v0, off, s[16:19], 0
; SDAG-NEXT:    s_waitcnt vmcnt(0)
; SDAG-NEXT:    s_setpc_b64 s[30:31]
;
; GISEL-LABEL: load_i7:
; GISEL:       ; %bb.0:
; GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GISEL-NEXT:    buffer_load_ubyte v0, off, s[16:19], 0
; GISEL-NEXT:    s_waitcnt vmcnt(0)
; GISEL-NEXT:    s_setpc_b64 s[30:31]
  %p = addrspacecast ptr addrspace(8) %buf to ptr addrspace(7)
  %ret = load i7, ptr addrspace(7) %p
  ret i7 %ret
}

define void @store_i7(i7 %data, ptr addrspace(8) inreg %buf) {
; SDAG-LABEL: store_i7:
; SDAG:       ; %bb.0:
; SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; SDAG-NEXT:    v_and_b32_e32 v0, 0x7f, v0
; SDAG-NEXT:    buffer_store_byte v0, off, s[16:19], 0
; SDAG-NEXT:    s_waitcnt vmcnt(0)
; SDAG-NEXT:    s_setpc_b64 s[30:31]
;
; GISEL-LABEL: store_i7:
; GISEL:       ; %bb.0:
; GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GISEL-NEXT:    v_and_b32_e32 v0, 0x7f, v0
; GISEL-NEXT:    buffer_store_byte v0, off, s[16:19], 0
; GISEL-NEXT:    s_waitcnt vmcnt(0)
; GISEL-NEXT:    s_setpc_b64 s[30:31]
  %p = addrspacecast ptr addrspace(8) %buf to ptr addrspace(7)
  store i7 %data, ptr addrspace(7) %p
  ret void
}

define i4 @load_i4(ptr addrspace(8) inreg %buf) {
; SDAG-LABEL: load_i4:
; SDAG:       ; %bb.0:
; SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; SDAG-NEXT:    buffer_load_ubyte v0, off, s[16:19], 0
; SDAG-NEXT:    s_waitcnt vmcnt(0)
; SDAG-NEXT:    s_setpc_b64 s[30:31]
;
; GISEL-LABEL: load_i4:
; GISEL:       ; %bb.0:
; GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GISEL-NEXT:    buffer_load_ubyte v0, off, s[16:19], 0
; GISEL-NEXT:    s_waitcnt vmcnt(0)
; GISEL-NEXT:    s_setpc_b64 s[30:31]
  %p = addrspacecast ptr addrspace(8) %buf to ptr addrspace(7)
  %ret = load i4, ptr addrspace(7) %p
  ret i4 %ret
}

define void @store_i4(i4 %data, ptr addrspace(8) inreg %buf) {
; SDAG-LABEL: store_i4:
; SDAG:       ; %bb.0:
; SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; SDAG-NEXT:    v_and_b32_e32 v0, 15, v0
; SDAG-NEXT:    buffer_store_byte v0, off, s[16:19], 0
; SDAG-NEXT:    s_waitcnt vmcnt(0)
; SDAG-NEXT:    s_setpc_b64 s[30:31]
;
; GISEL-LABEL: store_i4:
; GISEL:       ; %bb.0:
; GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GISEL-NEXT:    v_and_b32_e32 v0, 15, v0
; GISEL-NEXT:    buffer_store_byte v0, off, s[16:19], 0
; GISEL-NEXT:    s_waitcnt vmcnt(0)
; GISEL-NEXT:    s_setpc_b64 s[30:31]
  %p = addrspacecast ptr addrspace(8) %buf to ptr addrspace(7)
  store i4 %data, ptr addrspace(7) %p
  ret void
}


;;; Byte-sized vectors of i4. Require casts.

define <2 x i4> @load_v2i4(ptr addrspace(8) inreg %buf) {
; SDAG-LABEL: load_v2i4:
; SDAG:       ; %bb.0:
; SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; SDAG-NEXT:    buffer_load_ubyte v0, off, s[16:19], 0
; SDAG-NEXT:    s_waitcnt vmcnt(0)
; SDAG-NEXT:    buffer_store_byte v0, off, s[0:3], s32
; SDAG-NEXT:    buffer_load_ubyte v1, off, s[0:3], s32
; SDAG-NEXT:    s_waitcnt vmcnt(0)
; SDAG-NEXT:    v_and_b32_e32 v0, 15, v1
; SDAG-NEXT:    v_lshrrev_b16_e32 v1, 4, v1
; SDAG-NEXT:    s_setpc_b64 s[30:31]
;
; GISEL-LABEL: load_v2i4:
; GISEL:       ; %bb.0:
; GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GISEL-NEXT:    buffer_load_ubyte v0, off, s[16:19], 0
; GISEL-NEXT:    s_waitcnt vmcnt(0)
; GISEL-NEXT:    v_lshrrev_b32_e32 v1, 4, v0
; GISEL-NEXT:    s_setpc_b64 s[30:31]
  %p = addrspacecast ptr addrspace(8) %buf to ptr addrspace(7)
  %ret = load <2 x i4>, ptr addrspace(7) %p
  ret <2 x i4> %ret
}

define void @store_v2i4(<2 x i4> %data, ptr addrspace(8) inreg %buf) {
; SDAG-LABEL: store_v2i4:
; SDAG:       ; %bb.0:
; SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; SDAG-NEXT:    v_lshlrev_b16_e32 v1, 4, v1
; SDAG-NEXT:    v_and_b32_e32 v0, 15, v0
; SDAG-NEXT:    v_or_b32_e32 v0, v0, v1
; SDAG-NEXT:    buffer_store_byte v0, off, s[0:3], s32
; SDAG-NEXT:    buffer_load_ubyte v0, off, s[0:3], s32
; SDAG-NEXT:    s_waitcnt vmcnt(0)
; SDAG-NEXT:    buffer_store_byte v0, off, s[16:19], 0
; SDAG-NEXT:    s_waitcnt vmcnt(0)
; SDAG-NEXT:    s_setpc_b64 s[30:31]
;
; GISEL-LABEL: store_v2i4:
; GISEL:       ; %bb.0:
; GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GISEL-NEXT:    v_and_b32_e32 v1, 15, v1
; GISEL-NEXT:    v_and_b32_e32 v0, 15, v0
; GISEL-NEXT:    v_lshlrev_b16_e32 v1, 4, v1
; GISEL-NEXT:    v_or_b32_e32 v0, v0, v1
; GISEL-NEXT:    buffer_store_byte v0, off, s[16:19], 0
; GISEL-NEXT:    s_waitcnt vmcnt(0)
; GISEL-NEXT:    s_setpc_b64 s[30:31]
  %p = addrspacecast ptr addrspace(8) %buf to ptr addrspace(7)
  store <2 x i4> %data, ptr addrspace(7) %p
  ret void
}

define <4 x i4> @load_v4i4(ptr addrspace(8) inreg %buf) {
; SDAG-LABEL: load_v4i4:
; SDAG:       ; %bb.0:
; SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; SDAG-NEXT:    buffer_load_ushort v0, off, s[16:19], 0
; SDAG-NEXT:    v_mov_b32_e32 v2, 15
; SDAG-NEXT:    s_waitcnt vmcnt(0)
; SDAG-NEXT:    buffer_store_short v0, off, s[0:3], s32
; SDAG-NEXT:    buffer_load_ushort v1, off, s[0:3], s32
; SDAG-NEXT:    s_waitcnt vmcnt(0)
; SDAG-NEXT:    v_lshrrev_b16_e32 v4, 4, v1
; SDAG-NEXT:    v_and_b32_e32 v0, 15, v1
; SDAG-NEXT:    v_lshrrev_b16_e32 v3, 12, v1
; SDAG-NEXT:    v_and_b32_sdwa v2, v1, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_1 src1_sel:DWORD
; SDAG-NEXT:    v_and_b32_e32 v1, 15, v4
; SDAG-NEXT:    s_setpc_b64 s[30:31]
;
; GISEL-LABEL: load_v4i4:
; GISEL:       ; %bb.0:
; GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GISEL-NEXT:    buffer_load_ushort v0, off, s[16:19], 0
; GISEL-NEXT:    s_waitcnt vmcnt(0)
; GISEL-NEXT:    v_lshrrev_b32_e32 v1, 4, v0
; GISEL-NEXT:    v_lshrrev_b32_e32 v2, 8, v0
; GISEL-NEXT:    v_lshrrev_b32_e32 v3, 12, v0
; GISEL-NEXT:    s_setpc_b64 s[30:31]
  %p = addrspacecast ptr addrspace(8) %buf to ptr addrspace(7)
  %ret = load <4 x i4>, ptr addrspace(7) %p
  ret <4 x i4> %ret
}

define void @store_v4i4(<4 x i4> %data, ptr addrspace(8) inreg %buf) {
; SDAG-LABEL: store_v4i4:
; SDAG:       ; %bb.0:
; SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; SDAG-NEXT:    v_and_b32_e32 v1, 15, v1
; SDAG-NEXT:    v_and_b32_e32 v0, 15, v0
; SDAG-NEXT:    v_lshlrev_b16_e32 v1, 4, v1
; SDAG-NEXT:    v_or_b32_e32 v0, v0, v1
; SDAG-NEXT:    v_mov_b32_e32 v1, 15
; SDAG-NEXT:    v_and_b32_sdwa v1, v2, v1 dst_sel:BYTE_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
; SDAG-NEXT:    v_or_b32_e32 v0, v0, v1
; SDAG-NEXT:    v_lshlrev_b16_e32 v1, 12, v3
; SDAG-NEXT:    v_or_b32_e32 v0, v0, v1
; SDAG-NEXT:    buffer_store_short v0, off, s[16:19], 0
; SDAG-NEXT:    s_waitcnt vmcnt(0)
; SDAG-NEXT:    s_setpc_b64 s[30:31]
;
; GISEL-LABEL: store_v4i4:
; GISEL:       ; %bb.0:
; GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GISEL-NEXT:    v_and_b32_e32 v1, 15, v1
; GISEL-NEXT:    v_and_b32_e32 v0, 15, v0
; GISEL-NEXT:    v_lshlrev_b16_e32 v1, 4, v1
; GISEL-NEXT:    v_or_b32_e32 v0, v0, v1
; GISEL-NEXT:    v_mov_b32_e32 v1, 15
; GISEL-NEXT:    v_and_b32_sdwa v1, v2, v1 dst_sel:BYTE_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
; GISEL-NEXT:    v_or_b32_e32 v0, v0, v1
; GISEL-NEXT:    v_and_b32_e32 v1, 15, v3
; GISEL-NEXT:    v_lshlrev_b16_e32 v1, 12, v1
; GISEL-NEXT:    v_or_b32_e32 v0, v0, v1
; GISEL-NEXT:    buffer_store_short v0, off, s[16:19], 0
; GISEL-NEXT:    s_waitcnt vmcnt(0)
; GISEL-NEXT:    s_setpc_b64 s[30:31]
  %p = addrspacecast ptr addrspace(8) %buf to ptr addrspace(7)
  store <4 x i4> %data, ptr addrspace(7) %p
  ret void
}

define <8 x i4> @load_v8i4(ptr addrspace(8) inreg %buf) {
; SDAG-LABEL: load_v8i4:
; SDAG:       ; %bb.0:
; SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; SDAG-NEXT:    buffer_load_dword v7, off, s[16:19], 0
; SDAG-NEXT:    s_waitcnt vmcnt(0)
; SDAG-NEXT:    v_and_b32_e32 v0, 15, v7
; SDAG-NEXT:    v_bfe_u32 v1, v7, 4, 4
; SDAG-NEXT:    v_bfe_u32 v2, v7, 8, 4
; SDAG-NEXT:    v_bfe_u32 v3, v7, 12, 4
; SDAG-NEXT:    v_bfe_u32 v4, v7, 16, 4
; SDAG-NEXT:    v_bfe_u32 v5, v7, 20, 4
; SDAG-NEXT:    v_bfe_u32 v6, v7, 24, 4
; SDAG-NEXT:    v_lshrrev_b32_e32 v7, 28, v7
; SDAG-NEXT:    s_setpc_b64 s[30:31]
;
; GISEL-LABEL: load_v8i4:
; GISEL:       ; %bb.0:
; GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GISEL-NEXT:    buffer_load_dword v0, off, s[16:19], 0
; GISEL-NEXT:    s_waitcnt vmcnt(0)
; GISEL-NEXT:    v_lshrrev_b32_e32 v1, 4, v0
; GISEL-NEXT:    v_lshrrev_b32_e32 v2, 8, v0
; GISEL-NEXT:    v_lshrrev_b32_e32 v3, 12, v0
; GISEL-NEXT:    v_lshrrev_b32_e32 v4, 16, v0
; GISEL-NEXT:    v_lshrrev_b32_e32 v5, 20, v0
; GISEL-NEXT:    v_lshrrev_b32_e32 v6, 24, v0
; GISEL-NEXT:    v_lshrrev_b32_e32 v7, 28, v0
; GISEL-NEXT:    s_setpc_b64 s[30:31]
  %p = addrspacecast ptr addrspace(8) %buf to ptr addrspace(7)
  %ret = load <8 x i4>, ptr addrspace(7) %p
  ret <8 x i4> %ret
}

define void @store_v8i4(<8 x i4> %data, ptr addrspace(8) inreg %buf) {
; SDAG-LABEL: store_v8i4:
; SDAG:       ; %bb.0:
; SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; SDAG-NEXT:    v_and_b32_e32 v1, 15, v1
; SDAG-NEXT:    v_lshlrev_b32_e32 v1, 4, v1
; SDAG-NEXT:    v_and_or_b32 v0, v0, 15, v1
; SDAG-NEXT:    v_and_b32_e32 v1, 15, v3
; SDAG-NEXT:    v_and_b32_e32 v2, 15, v2
; SDAG-NEXT:    v_lshlrev_b32_e32 v1, 12, v1
; SDAG-NEXT:    v_lshlrev_b32_e32 v2, 8, v2
; SDAG-NEXT:    v_or3_b32 v0, v0, v2, v1
; SDAG-NEXT:    v_and_b32_e32 v1, 15, v5
; SDAG-NEXT:    v_mov_b32_e32 v2, 15
; SDAG-NEXT:    v_lshlrev_b32_e32 v1, 20, v1
; SDAG-NEXT:    v_and_b32_sdwa v3, v4, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
; SDAG-NEXT:    v_or3_b32 v0, v0, v3, v1
; SDAG-NEXT:    v_lshlrev_b32_e32 v1, 28, v7
; SDAG-NEXT:    v_and_b32_sdwa v2, v6, v2 dst_sel:BYTE_3 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
; SDAG-NEXT:    v_or3_b32 v0, v0, v2, v1
; SDAG-NEXT:    buffer_store_dword v0, off, s[16:19], 0
; SDAG-NEXT:    s_waitcnt vmcnt(0)
; SDAG-NEXT:    s_setpc_b64 s[30:31]
;
; GISEL-LABEL: store_v8i4:
; GISEL:       ; %bb.0:
; GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GISEL-NEXT:    v_and_b32_e32 v1, 15, v1
; GISEL-NEXT:    v_lshlrev_b32_e32 v1, 4, v1
; GISEL-NEXT:    v_and_or_b32 v0, v0, 15, v1
; GISEL-NEXT:    v_and_b32_e32 v1, 15, v2
; GISEL-NEXT:    v_and_b32_e32 v2, 15, v3
; GISEL-NEXT:    v_lshlrev_b32_e32 v1, 8, v1
; GISEL-NEXT:    v_lshlrev_b32_e32 v2, 12, v2
; GISEL-NEXT:    v_or3_b32 v0, v0, v1, v2
; GISEL-NEXT:    v_mov_b32_e32 v1, 15
; GISEL-NEXT:    v_and_b32_e32 v3, 15, v5
; GISEL-NEXT:    v_and_b32_sdwa v2, v4, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
; GISEL-NEXT:    v_lshlrev_b32_e32 v3, 20, v3
; GISEL-NEXT:    v_or3_b32 v0, v0, v2, v3
; GISEL-NEXT:    v_and_b32_e32 v2, 15, v7
; GISEL-NEXT:    v_and_b32_sdwa v1, v6, v1 dst_sel:BYTE_3 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
; GISEL-NEXT:    v_lshlrev_b32_e32 v2, 28, v2
; GISEL-NEXT:    v_or3_b32 v0, v0, v1, v2
; GISEL-NEXT:    buffer_store_dword v0, off, s[16:19], 0
; GISEL-NEXT:    s_waitcnt vmcnt(0)
; GISEL-NEXT:    s_setpc_b64 s[30:31]
  %p = addrspacecast ptr addrspace(8) %buf to ptr addrspace(7)
  store <8 x i4> %data, ptr addrspace(7) %p
  ret void
}

;;; Vectors of non-byte-sized integers.

define <2 x i6> @load_v2i6(ptr addrspace(8) inreg %buf) {
; SDAG-LABEL: load_v2i6:
; SDAG:       ; %bb.0:
; SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; SDAG-NEXT:    buffer_load_ushort v1, off, s[16:19], 0
; SDAG-NEXT:    s_waitcnt vmcnt(0)
; SDAG-NEXT:    v_and_b32_e32 v0, 63, v1
; SDAG-NEXT:    v_bfe_u32 v1, v1, 6, 6
; SDAG-NEXT:    s_setpc_b64 s[30:31]
;
; GISEL-LABEL: load_v2i6:
; GISEL:       ; %bb.0:
; GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GISEL-NEXT:    buffer_load_ushort v0, off, s[16:19], 0
; GISEL-NEXT:    s_waitcnt vmcnt(0)
; GISEL-NEXT:    v_lshrrev_b16_e32 v1, 6, v0
; GISEL-NEXT:    s_setpc_b64 s[30:31]
  %p = addrspacecast ptr addrspace(8) %buf to ptr addrspace(7)
  %ret = load <2 x i6>, ptr addrspace(7) %p
  ret <2 x i6> %ret
}

define void @store_v2i6(<2 x i6> %data, ptr addrspace(8) inreg %buf) {
; SDAG-LABEL: store_v2i6:
; SDAG:       ; %bb.0:
; SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; SDAG-NEXT:    v_lshlrev_b16_e32 v1, 6, v1
; SDAG-NEXT:    v_and_b32_e32 v0, 63, v0
; SDAG-NEXT:    v_or_b32_e32 v0, v0, v1
; SDAG-NEXT:    v_and_b32_e32 v0, 0xfff, v0
; SDAG-NEXT:    v_and_b32_e32 v0, 0xffff, v0
; SDAG-NEXT:    buffer_store_short v0, off, s[16:19], 0
; SDAG-NEXT:    s_waitcnt vmcnt(0)
; SDAG-NEXT:    s_setpc_b64 s[30:31]
;
; GISEL-LABEL: store_v2i6:
; GISEL:       ; %bb.0:
; GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GISEL-NEXT:    v_and_b32_e32 v1, 63, v1
; GISEL-NEXT:    v_and_b32_e32 v0, 63, v0
; GISEL-NEXT:    v_lshlrev_b16_e32 v1, 6, v1
; GISEL-NEXT:    v_or_b32_e32 v0, v0, v1
; GISEL-NEXT:    v_and_b32_e32 v0, 0xfff, v0
; GISEL-NEXT:    buffer_store_short v0, off, s[16:19], 0
; GISEL-NEXT:    s_waitcnt vmcnt(0)
; GISEL-NEXT:    s_setpc_b64 s[30:31]
  %p = addrspacecast ptr addrspace(8) %buf to ptr addrspace(7)
  store <2 x i6> %data, ptr addrspace(7) %p
  ret void
}

;; Blocks of fp6 elements
define <6 x i32> @load_v32i6(ptr addrspace(8) inreg %buf) {
; SDAG-LABEL: load_v32i6:
; SDAG:       ; %bb.0:
; SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; SDAG-NEXT:    buffer_load_dwordx4 v[0:3], off, s[16:19], 0
; SDAG-NEXT:    buffer_load_dwordx2 v[4:5], off, s[16:19], 0 offset:16
; SDAG-NEXT:    s_waitcnt vmcnt(0)
; SDAG-NEXT:    s_setpc_b64 s[30:31]
;
; GISEL-LABEL: load_v32i6:
; GISEL:       ; %bb.0:
; GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GISEL-NEXT:    buffer_load_dwordx4 v[0:3], off, s[16:19], 0
; GISEL-NEXT:    buffer_load_dwordx2 v[4:5], off, s[16:19], 0 offset:16
; GISEL-NEXT:    s_waitcnt vmcnt(0)
; GISEL-NEXT:    s_setpc_b64 s[30:31]
  %p = addrspacecast ptr addrspace(8) %buf to ptr addrspace(7)
  %ret = load <32 x i6>, ptr addrspace(7) %p
  %ret.cast = bitcast <32 x i6> %ret to <6 x i32>
  ret <6 x i32> %ret.cast
}

define void @store_v32i6(<6 x i32> %data.abi, ptr addrspace(8) inreg %buf) {
; SDAG-LABEL: store_v32i6:
; SDAG:       ; %bb.0:
; SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; SDAG-NEXT:    buffer_store_dwordx4 v[0:3], off, s[16:19], 0
; SDAG-NEXT:    buffer_store_dwordx2 v[4:5], off, s[16:19], 0 offset:16
; SDAG-NEXT:    s_waitcnt vmcnt(0)
; SDAG-NEXT:    s_setpc_b64 s[30:31]
;
; GISEL-LABEL: store_v32i6:
; GISEL:       ; %bb.0:
; GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GISEL-NEXT:    buffer_store_dwordx4 v[0:3], off, s[16:19], 0
; GISEL-NEXT:    buffer_store_dwordx2 v[4:5], off, s[16:19], 0 offset:16
; GISEL-NEXT:    s_waitcnt vmcnt(0)
; GISEL-NEXT:    s_setpc_b64 s[30:31]
  %data = bitcast <6 x i32> %data.abi to <32 x i6>
  %p = addrspacecast ptr addrspace(8) %buf to ptr addrspace(7)
  store <32 x i6> %data, ptr addrspace(7) %p
  ret void
}

;;; Modifiers

define <4 x i8> @volatile_load_v4i8(ptr addrspace(8) inreg %buf) {
; SDAG-LABEL: volatile_load_v4i8:
; SDAG:       ; %bb.0:
; SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; SDAG-NEXT:    buffer_load_dword v0, off, s[16:19], 0 glc
; SDAG-NEXT:    s_waitcnt vmcnt(0)
; SDAG-NEXT:    v_lshrrev_b32_e32 v2, 16, v0
; SDAG-NEXT:    v_lshrrev_b32_e32 v1, 8, v0
; SDAG-NEXT:    v_lshrrev_b32_e32 v3, 24, v0
; SDAG-NEXT:    s_setpc_b64 s[30:31]
;
; GISEL-LABEL: volatile_load_v4i8:
; GISEL:       ; %bb.0:
; GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GISEL-NEXT:    buffer_load_dword v0, off, s[16:19], 0 glc
; GISEL-NEXT:    s_waitcnt vmcnt(0)
; GISEL-NEXT:    v_lshrrev_b32_e32 v1, 8, v0
; GISEL-NEXT:    v_lshrrev_b32_e32 v2, 16, v0
; GISEL-NEXT:    v_lshrrev_b32_e32 v3, 24, v0
; GISEL-NEXT:    s_setpc_b64 s[30:31]
  %p = addrspacecast ptr addrspace(8) %buf to ptr addrspace(7)
  %ret = load volatile <4 x i8>, ptr addrspace(7) %p
  ret <4 x i8> %ret
}

define void @volatile_store_v4i8(<4 x i8> %data, ptr addrspace(8) inreg %buf) {
; SDAG-LABEL: volatile_store_v4i8:
; SDAG:       ; %bb.0:
; SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; SDAG-NEXT:    v_lshlrev_b16_e32 v1, 8, v1
; SDAG-NEXT:    v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
; SDAG-NEXT:    v_lshlrev_b16_e32 v1, 8, v3
; SDAG-NEXT:    v_or_b32_sdwa v1, v2, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
; SDAG-NEXT:    v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
; SDAG-NEXT:    buffer_store_dword v0, off, s[16:19], 0
; SDAG-NEXT:    s_waitcnt vmcnt(0)
; SDAG-NEXT:    s_setpc_b64 s[30:31]
;
; GISEL-LABEL: volatile_store_v4i8:
; GISEL:       ; %bb.0:
; GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GISEL-NEXT:    v_mov_b32_e32 v5, 8
; GISEL-NEXT:    v_mov_b32_e32 v4, 0xff
; GISEL-NEXT:    v_lshlrev_b32_sdwa v1, v5, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0
; GISEL-NEXT:    v_and_or_b32 v0, v0, v4, v1
; GISEL-NEXT:    v_and_b32_e32 v1, 0xff, v2
; GISEL-NEXT:    v_and_b32_e32 v2, 0xff, v3
; GISEL-NEXT:    v_lshlrev_b32_e32 v1, 16, v1
; GISEL-NEXT:    v_lshlrev_b32_e32 v2, 24, v2
; GISEL-NEXT:    v_or3_b32 v0, v0, v1, v2
; GISEL-NEXT:    buffer_store_dword v0, off, s[16:19], 0
; GISEL-NEXT:    s_waitcnt vmcnt(0)
; GISEL-NEXT:    s_setpc_b64 s[30:31]
  %p = addrspacecast ptr addrspace(8) %buf to ptr addrspace(7)
  store volatile <4 x i8> %data, ptr addrspace(7) %p
  ret void
}

define <6 x i8> @volatile_load_v6i8(ptr addrspace(8) inreg %buf) {
; SDAG-LABEL: volatile_load_v6i8:
; SDAG:       ; %bb.0:
; SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; SDAG-NEXT:    buffer_load_dword v0, off, s[16:19], 0 glc
; SDAG-NEXT:    buffer_load_ushort v6, off, s[16:19], 0 offset:4 glc
; SDAG-NEXT:    s_waitcnt vmcnt(1)
; SDAG-NEXT:    v_lshrrev_b32_e32 v7, 8, v0
; SDAG-NEXT:    s_waitcnt vmcnt(0)
; SDAG-NEXT:    v_and_b32_e32 v1, 0xffff, v6
; SDAG-NEXT:    v_lshrrev_b64 v[3:4], 24, v[0:1]
; SDAG-NEXT:    v_lshrrev_b32_e32 v2, 16, v0
; SDAG-NEXT:    v_lshrrev_b32_e32 v5, 8, v1
; SDAG-NEXT:    v_mov_b32_e32 v4, v6
; SDAG-NEXT:    v_mov_b32_e32 v1, v7
; SDAG-NEXT:    s_setpc_b64 s[30:31]
;
; GISEL-LABEL: volatile_load_v6i8:
; GISEL:       ; %bb.0:
; GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GISEL-NEXT:    buffer_load_dword v0, off, s[16:19], 0 glc
; GISEL-NEXT:    buffer_load_ushort v4, off, s[16:19], 0 offset:4 glc
; GISEL-NEXT:    s_waitcnt vmcnt(1)
; GISEL-NEXT:    v_lshrrev_b32_e32 v1, 8, v0
; GISEL-NEXT:    v_lshrrev_b32_e32 v2, 16, v0
; GISEL-NEXT:    v_lshrrev_b32_e32 v3, 24, v0
; GISEL-NEXT:    s_waitcnt vmcnt(0)
; GISEL-NEXT:    v_lshrrev_b32_e32 v5, 8, v4
; GISEL-NEXT:    s_setpc_b64 s[30:31]
  %p = addrspacecast ptr addrspace(8) %buf to ptr addrspace(7)
  %ret = load volatile <6 x i8>, ptr addrspace(7) %p
  ret <6 x i8> %ret
}

define void @volatile_store_v6i8(<6 x i8> %data, ptr addrspace(8) inreg %buf) {
; SDAG-LABEL: volatile_store_v6i8:
; SDAG:       ; %bb.0:
; SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; SDAG-NEXT:    v_lshlrev_b16_e32 v1, 8, v1
; SDAG-NEXT:    v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
; SDAG-NEXT:    v_lshlrev_b16_e32 v1, 8, v3
; SDAG-NEXT:    v_or_b32_sdwa v1, v2, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
; SDAG-NEXT:    v_lshlrev_b16_e32 v5, 8, v5
; SDAG-NEXT:    v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
; SDAG-NEXT:    v_or_b32_sdwa v4, v4, v5 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
; SDAG-NEXT:    buffer_store_dword v0, off, s[16:19], 0
; SDAG-NEXT:    buffer_store_short v4, off, s[16:19], 0 offset:4
; SDAG-NEXT:    s_waitcnt vmcnt(0)
; SDAG-NEXT:    s_setpc_b64 s[30:31]
;
; GISEL-LABEL: volatile_store_v6i8:
; GISEL:       ; %bb.0:
; GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GISEL-NEXT:    v_and_b32_e32 v1, 0xff, v1
; GISEL-NEXT:    v_lshlrev_b16_e32 v1, 8, v1
; GISEL-NEXT:    v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
; GISEL-NEXT:    v_and_b32_e32 v1, 0xff, v3
; GISEL-NEXT:    v_lshlrev_b16_e32 v1, 8, v1
; GISEL-NEXT:    v_or_b32_sdwa v1, v2, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
; GISEL-NEXT:    v_and_b32_e32 v2, 0xff, v5
; GISEL-NEXT:    v_and_b32_e32 v0, 0xffff, v0
; GISEL-NEXT:    v_lshlrev_b16_e32 v2, 8, v2
; GISEL-NEXT:    v_lshl_or_b32 v0, v1, 16, v0
; GISEL-NEXT:    v_or_b32_sdwa v2, v4, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
; GISEL-NEXT:    buffer_store_dword v0, off, s[16:19], 0
; GISEL-NEXT:    buffer_store_short v2, off, s[16:19], 0 offset:4
; GISEL-NEXT:    s_waitcnt vmcnt(0)
; GISEL-NEXT:    s_setpc_b64 s[30:31]
  %p = addrspacecast ptr addrspace(8) %buf to ptr addrspace(7)
  store volatile <6 x i8> %data, ptr addrspace(7) %p
  ret void
}

define [2 x [2 x i32]] @load_a2a2i32(ptr addrspace(8) inreg %buf) {
; SDAG-LABEL: load_a2a2i32:
; SDAG:       ; %bb.0:
; SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; SDAG-NEXT:    buffer_load_dwordx4 v[0:3], off, s[16:19], 0
; SDAG-NEXT:    s_waitcnt vmcnt(0)
; SDAG-NEXT:    s_setpc_b64 s[30:31]
;
; GISEL-LABEL: load_a2a2i32:
; GISEL:       ; %bb.0:
; GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GISEL-NEXT:    buffer_load_dwordx4 v[0:3], off, s[16:19], 0
; GISEL-NEXT:    s_waitcnt vmcnt(0)
; GISEL-NEXT:    s_setpc_b64 s[30:31]
  %p = addrspacecast ptr addrspace(8) %buf to ptr addrspace(7)
  %ret = load [2 x [2 x i32]], ptr addrspace(7) %p
  ret [2 x [2 x i32]] %ret
}

define void @store_a2a2i32([2 x [2 x i32]] %data, ptr addrspace(8) inreg %buf) {
; SDAG-LABEL: store_a2a2i32:
; SDAG:       ; %bb.0:
; SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; SDAG-NEXT:    buffer_store_dwordx4 v[0:3], off, s[16:19], 0
; SDAG-NEXT:    s_waitcnt vmcnt(0)
; SDAG-NEXT:    s_setpc_b64 s[30:31]
;
; GISEL-LABEL: store_a2a2i32:
; GISEL:       ; %bb.0:
; GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GISEL-NEXT:    buffer_store_dwordx4 v[0:3], off, s[16:19], 0
; GISEL-NEXT:    s_waitcnt vmcnt(0)
; GISEL-NEXT:    s_setpc_b64 s[30:31]
  %p = addrspacecast ptr addrspace(8) %buf to ptr addrspace(7)
  store [2 x [2 x i32]] %data, ptr addrspace(7) %p
  ret void
}

define [2 x <2 x i32>] @load_a2v2i32(ptr addrspace(8) inreg %buf) {
; SDAG-LABEL: load_a2v2i32:
; SDAG:       ; %bb.0:
; SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; SDAG-NEXT:    buffer_load_dwordx4 v[0:3], off, s[16:19], 0
; SDAG-NEXT:    s_waitcnt vmcnt(0)
; SDAG-NEXT:    s_setpc_b64 s[30:31]
;
; GISEL-LABEL: load_a2v2i32:
; GISEL:       ; %bb.0:
; GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GISEL-NEXT:    buffer_load_dwordx4 v[0:3], off, s[16:19], 0
; GISEL-NEXT:    s_waitcnt vmcnt(0)
; GISEL-NEXT:    s_setpc_b64 s[30:31]
  %p = addrspacecast ptr addrspace(8) %buf to ptr addrspace(7)
  %ret = load [2 x <2 x i32>], ptr addrspace(7) %p
  ret [2 x <2 x i32>] %ret
}

define void @store_a2v2i32([2 x <2 x i32>] %data, ptr addrspace(8) inreg %buf) {
; SDAG-LABEL: store_a2v2i32:
; SDAG:       ; %bb.0:
; SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; SDAG-NEXT:    buffer_store_dwordx4 v[0:3], off, s[16:19], 0
; SDAG-NEXT:    s_waitcnt vmcnt(0)
; SDAG-NEXT:    s_setpc_b64 s[30:31]
;
; GISEL-LABEL: store_a2v2i32:
; GISEL:       ; %bb.0:
; GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GISEL-NEXT:    buffer_store_dwordx4 v[0:3], off, s[16:19], 0
; GISEL-NEXT:    s_waitcnt vmcnt(0)
; GISEL-NEXT:    s_setpc_b64 s[30:31]
  %p = addrspacecast ptr addrspace(8) %buf to ptr addrspace(7)
  store [2 x <2 x i32>] %data, ptr addrspace(7) %p
  ret void
}

define { i32 } @load_sl_i32s(ptr addrspace(8) inreg %buf) {
; SDAG-LABEL: load_sl_i32s:
; SDAG:       ; %bb.0:
; SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; SDAG-NEXT:    buffer_load_dword v0, off, s[16:19], 0
; SDAG-NEXT:    s_waitcnt vmcnt(0)
; SDAG-NEXT:    s_setpc_b64 s[30:31]
;
; GISEL-LABEL: load_sl_i32s:
; GISEL:       ; %bb.0:
; GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GISEL-NEXT:    buffer_load_dword v0, off, s[16:19], 0
; GISEL-NEXT:    s_waitcnt vmcnt(0)
; GISEL-NEXT:    s_setpc_b64 s[30:31]
  %p = addrspacecast ptr addrspace(8) %buf to ptr addrspace(7)
  %ret = load { i32 }, ptr addrspace(7) %p
  ret { i32 } %ret
}

define void @store_sl_i32s({ i32 } %data, ptr addrspace(8) inreg %buf) {
; SDAG-LABEL: store_sl_i32s:
; SDAG:       ; %bb.0:
; SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; SDAG-NEXT:    buffer_store_dword v0, off, s[16:19], 0
; SDAG-NEXT:    s_waitcnt vmcnt(0)
; SDAG-NEXT:    s_setpc_b64 s[30:31]
;
; GISEL-LABEL: store_sl_i32s:
; GISEL:       ; %bb.0:
; GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GISEL-NEXT:    buffer_store_dword v0, off, s[16:19], 0
; GISEL-NEXT:    s_waitcnt vmcnt(0)
; GISEL-NEXT:    s_setpc_b64 s[30:31]
  %p = addrspacecast ptr addrspace(8) %buf to ptr addrspace(7)
  store { i32 } %data, ptr addrspace(7) %p
  ret void
}

define { { float } } @load_sl_sl_f32ss(ptr addrspace(8) inreg %buf) {
; SDAG-LABEL: load_sl_sl_f32ss:
; SDAG:       ; %bb.0:
; SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; SDAG-NEXT:    buffer_load_dword v0, off, s[16:19], 0
; SDAG-NEXT:    s_waitcnt vmcnt(0)
; SDAG-NEXT:    s_setpc_b64 s[30:31]
;
; GISEL-LABEL: load_sl_sl_f32ss:
; GISEL:       ; %bb.0:
; GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GISEL-NEXT:    buffer_load_dword v0, off, s[16:19], 0
; GISEL-NEXT:    s_waitcnt vmcnt(0)
; GISEL-NEXT:    s_setpc_b64 s[30:31]
  %p = addrspacecast ptr addrspace(8) %buf to ptr addrspace(7)
  %ret = load { { float } }, ptr addrspace(7) %p
  ret { { float } } %ret
}

define void @store_sl_sl_f32ss({ { float } } %data, ptr addrspace(8) inreg %buf) {
; SDAG-LABEL: store_sl_sl_f32ss:
; SDAG:       ; %bb.0:
; SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; SDAG-NEXT:    buffer_store_dword v0, off, s[16:19], 0
; SDAG-NEXT:    s_waitcnt vmcnt(0)
; SDAG-NEXT:    s_setpc_b64 s[30:31]
;
; GISEL-LABEL: store_sl_sl_f32ss:
; GISEL:       ; %bb.0:
; GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GISEL-NEXT:    buffer_store_dword v0, off, s[16:19], 0
; GISEL-NEXT:    s_waitcnt vmcnt(0)
; GISEL-NEXT:    s_setpc_b64 s[30:31]
  %p = addrspacecast ptr addrspace(8) %buf to ptr addrspace(7)
  store { { float } } %data, ptr addrspace(7) %p
  ret void
}

define { <2 x i32> } @load_sl_v2i32s(ptr addrspace(8) inreg %buf) {
; SDAG-LABEL: load_sl_v2i32s:
; SDAG:       ; %bb.0:
; SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; SDAG-NEXT:    buffer_load_dwordx2 v[0:1], off, s[16:19], 0
; SDAG-NEXT:    s_waitcnt vmcnt(0)
; SDAG-NEXT:    s_setpc_b64 s[30:31]
;
; GISEL-LABEL: load_sl_v2i32s:
; GISEL:       ; %bb.0:
; GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GISEL-NEXT:    buffer_load_dwordx2 v[0:1], off, s[16:19], 0
; GISEL-NEXT:    s_waitcnt vmcnt(0)
; GISEL-NEXT:    s_setpc_b64 s[30:31]
  %p = addrspacecast ptr addrspace(8) %buf to ptr addrspace(7)
  %ret = load { <2 x i32> }, ptr addrspace(7) %p
  ret { <2 x i32> } %ret
}

define void @store_sl_v2i32s({ <2 x i32> } %data, ptr addrspace(8) inreg %buf) {
; SDAG-LABEL: store_sl_v2i32s:
; SDAG:       ; %bb.0:
; SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; SDAG-NEXT:    buffer_store_dwordx2 v[0:1], off, s[16:19], 0
; SDAG-NEXT:    s_waitcnt vmcnt(0)
; SDAG-NEXT:    s_setpc_b64 s[30:31]
;
; GISEL-LABEL: store_sl_v2i32s:
; GISEL:       ; %bb.0:
; GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GISEL-NEXT:    buffer_store_dwordx2 v[0:1], off, s[16:19], 0
; GISEL-NEXT:    s_waitcnt vmcnt(0)
; GISEL-NEXT:    s_setpc_b64 s[30:31]
  %p = addrspacecast ptr addrspace(8) %buf to ptr addrspace(7)
  store { <2 x i32> } %data, ptr addrspace(7) %p
  ret void
}

define { i64, i32 } @load_sl_i64i32s(ptr addrspace(8) inreg %buf) {
; SDAG-LABEL: load_sl_i64i32s:
; SDAG:       ; %bb.0:
; SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; SDAG-NEXT:    buffer_load_dwordx3 v[0:2], off, s[16:19], 0
; SDAG-NEXT:    s_waitcnt vmcnt(0)
; SDAG-NEXT:    s_setpc_b64 s[30:31]
;
; GISEL-LABEL: load_sl_i64i32s:
; GISEL:       ; %bb.0:
; GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GISEL-NEXT:    buffer_load_dwordx3 v[0:2], off, s[16:19], 0
; GISEL-NEXT:    s_waitcnt vmcnt(0)
; GISEL-NEXT:    s_setpc_b64 s[30:31]
  %p = addrspacecast ptr addrspace(8) %buf to ptr addrspace(7)
  %ret = load { i64, i32 }, ptr addrspace(7) %p
  ret { i64, i32 } %ret
}

define void @store_sl_i64i32s({ i64, i32 } %data, ptr addrspace(8) inreg %buf) {
; SDAG-LABEL: store_sl_i64i32s:
; SDAG:       ; %bb.0:
; SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; SDAG-NEXT:    buffer_store_dwordx3 v[0:2], off, s[16:19], 0
; SDAG-NEXT:    s_waitcnt vmcnt(0)
; SDAG-NEXT:    s_setpc_b64 s[30:31]
;
; GISEL-LABEL: store_sl_i64i32s:
; GISEL:       ; %bb.0:
; GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GISEL-NEXT:    buffer_store_dwordx3 v[0:2], off, s[16:19], 0
; GISEL-NEXT:    s_waitcnt vmcnt(0)
; GISEL-NEXT:    s_setpc_b64 s[30:31]
  %p = addrspacecast ptr addrspace(8) %buf to ptr addrspace(7)
  store { i64, i32 } %data, ptr addrspace(7) %p
  ret void
}

define [4 x i7] @load_a4i7(ptr addrspace(8) inreg %buf) {
; SDAG-LABEL: load_a4i7:
; SDAG:       ; %bb.0:
; SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; SDAG-NEXT:    buffer_load_ubyte v0, off, s[16:19], 0
; SDAG-NEXT:    buffer_load_ubyte v1, off, s[16:19], 0 offset:1
; SDAG-NEXT:    buffer_load_ubyte v2, off, s[16:19], 0 offset:2
; SDAG-NEXT:    buffer_load_ubyte v3, off, s[16:19], 0 offset:3
; SDAG-NEXT:    s_waitcnt vmcnt(0)
; SDAG-NEXT:    s_setpc_b64 s[30:31]
;
; GISEL-LABEL: load_a4i7:
; GISEL:       ; %bb.0:
; GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GISEL-NEXT:    buffer_load_ubyte v0, off, s[16:19], 0
; GISEL-NEXT:    buffer_load_ubyte v1, off, s[16:19], 0 offset:1
; GISEL-NEXT:    buffer_load_ubyte v2, off, s[16:19], 0 offset:2
; GISEL-NEXT:    buffer_load_ubyte v3, off, s[16:19], 0 offset:3
; GISEL-NEXT:    s_waitcnt vmcnt(0)
; GISEL-NEXT:    s_setpc_b64 s[30:31]
  %p = addrspacecast ptr addrspace(8) %buf to ptr addrspace(7)
  %ret = load [4 x i7], ptr addrspace(7) %p
  ret [4 x i7] %ret
}

define void @store_a4i7([4 x i7] %data, ptr addrspace(8) inreg %buf) {
; SDAG-LABEL: store_a4i7:
; SDAG:       ; %bb.0:
; SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; SDAG-NEXT:    v_and_b32_e32 v0, 0x7f, v0
; SDAG-NEXT:    buffer_store_byte v0, off, s[16:19], 0
; SDAG-NEXT:    v_and_b32_e32 v0, 0x7f, v1
; SDAG-NEXT:    buffer_store_byte v0, off, s[16:19], 0 offset:1
; SDAG-NEXT:    v_and_b32_e32 v0, 0x7f, v2
; SDAG-NEXT:    buffer_store_byte v0, off, s[16:19], 0 offset:2
; SDAG-NEXT:    v_and_b32_e32 v0, 0x7f, v3
; SDAG-NEXT:    buffer_store_byte v0, off, s[16:19], 0 offset:3
; SDAG-NEXT:    s_waitcnt vmcnt(0)
; SDAG-NEXT:    s_setpc_b64 s[30:31]
;
; GISEL-LABEL: store_a4i7:
; GISEL:       ; %bb.0:
; GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GISEL-NEXT:    v_and_b32_e32 v0, 0x7f, v0
; GISEL-NEXT:    buffer_store_byte v0, off, s[16:19], 0
; GISEL-NEXT:    v_and_b32_e32 v0, 0x7f, v1
; GISEL-NEXT:    buffer_store_byte v0, off, s[16:19], 0 offset:1
; GISEL-NEXT:    v_and_b32_e32 v0, 0x7f, v2
; GISEL-NEXT:    buffer_store_byte v0, off, s[16:19], 0 offset:2
; GISEL-NEXT:    v_and_b32_e32 v0, 0x7f, v3
; GISEL-NEXT:    buffer_store_byte v0, off, s[16:19], 0 offset:3
; GISEL-NEXT:    s_waitcnt vmcnt(0)
; GISEL-NEXT:    s_setpc_b64 s[30:31]
  %p = addrspacecast ptr addrspace(8) %buf to ptr addrspace(7)
  store [4 x i7] %data, ptr addrspace(7) %p
  ret void
}
