; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 3
; RUN: llc < %s -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -mattr=+neon -fp-contract=fast -aarch64-enable-sink-fold=true | FileCheck %s --check-prefix=CHECK-SDAG
; RUN: llc < %s -global-isel -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -mattr=+neon -fp-contract=fast -aarch64-enable-sink-fold=true | FileCheck %s --check-prefix=CHECK-GISEL

define <4 x i8> @test_varidx_extract_v8s8(<8 x i8> %x, i32 %idx) {
; CHECK-SDAG-LABEL: test_varidx_extract_v8s8:
; CHECK-SDAG:       // %bb.0:
; CHECK-SDAG-NEXT:    sub sp, sp, #16
; CHECK-SDAG-NEXT:    .cfi_def_cfa_offset 16
; CHECK-SDAG-NEXT:    add x8, sp, #8
; CHECK-SDAG-NEXT:    // kill: def $w0 killed $w0 def $x0
; CHECK-SDAG-NEXT:    // kill: def $d0 killed $d0 def $q0
; CHECK-SDAG-NEXT:    str d0, [sp, #8]
; CHECK-SDAG-NEXT:    umov w9, v0.b[1]
; CHECK-SDAG-NEXT:    bfxil x8, x0, #0, #3
; CHECK-SDAG-NEXT:    ld1 { v1.b }[0], [x8]
; CHECK-SDAG-NEXT:    umov w8, v0.b[2]
; CHECK-SDAG-NEXT:    mov v1.h[1], w9
; CHECK-SDAG-NEXT:    umov w9, v0.b[3]
; CHECK-SDAG-NEXT:    mov v1.h[2], w8
; CHECK-SDAG-NEXT:    mov v1.h[3], w9
; CHECK-SDAG-NEXT:    fmov d0, d1
; CHECK-SDAG-NEXT:    add sp, sp, #16
; CHECK-SDAG-NEXT:    ret
;
; CHECK-GISEL-LABEL: test_varidx_extract_v8s8:
; CHECK-GISEL:       // %bb.0:
; CHECK-GISEL-NEXT:    sub sp, sp, #16
; CHECK-GISEL-NEXT:    .cfi_def_cfa_offset 16
; CHECK-GISEL-NEXT:    mov w9, w0
; CHECK-GISEL-NEXT:    // kill: def $d0 killed $d0 def $q0
; CHECK-GISEL-NEXT:    mov b1, v0.b[1]
; CHECK-GISEL-NEXT:    add x8, sp, #8
; CHECK-GISEL-NEXT:    and x9, x9, #0x7
; CHECK-GISEL-NEXT:    str d0, [sp, #8]
; CHECK-GISEL-NEXT:    mov b2, v0.b[2]
; CHECK-GISEL-NEXT:    lsl x10, x9, #1
; CHECK-GISEL-NEXT:    mov b0, v0.b[3]
; CHECK-GISEL-NEXT:    sub x9, x10, x9
; CHECK-GISEL-NEXT:    ldrb w8, [x8, x9]
; CHECK-GISEL-NEXT:    fmov w9, s1
; CHECK-GISEL-NEXT:    fmov s1, w8
; CHECK-GISEL-NEXT:    fmov w8, s2
; CHECK-GISEL-NEXT:    mov v1.h[1], w9
; CHECK-GISEL-NEXT:    mov v1.h[2], w8
; CHECK-GISEL-NEXT:    fmov w8, s0
; CHECK-GISEL-NEXT:    mov v1.h[3], w8
; CHECK-GISEL-NEXT:    fmov d0, d1
; CHECK-GISEL-NEXT:    add sp, sp, #16
; CHECK-GISEL-NEXT:    ret
  %tmp = extractelement <8 x i8> %x, i32 %idx
  %tmp2 = insertelement <4 x i8> undef, i8 %tmp, i32 0
  %tmp3 = extractelement <8 x i8> %x, i32 1
  %tmp4 = insertelement <4 x i8> %tmp2, i8 %tmp3, i32 1
  %tmp5 = extractelement <8 x i8> %x, i32 2
  %tmp6 = insertelement <4 x i8> %tmp4, i8 %tmp5, i32 2
  %tmp7 = extractelement <8 x i8> %x, i32 3
  %tmp8 = insertelement <4 x i8> %tmp6, i8 %tmp7, i32 3
  ret <4 x i8> %tmp8
}

define <8 x i8> @test_varidx_extract_v16s8(<16 x i8> %x, i32 %idx) {
; CHECK-SDAG-LABEL: test_varidx_extract_v16s8:
; CHECK-SDAG:       // %bb.0:
; CHECK-SDAG-NEXT:    sub sp, sp, #16
; CHECK-SDAG-NEXT:    .cfi_def_cfa_offset 16
; CHECK-SDAG-NEXT:    mov x8, sp
; CHECK-SDAG-NEXT:    // kill: def $w0 killed $w0 def $x0
; CHECK-SDAG-NEXT:    str q0, [sp]
; CHECK-SDAG-NEXT:    bfxil x8, x0, #0, #4
; CHECK-SDAG-NEXT:    ldr b1, [x8]
; CHECK-SDAG-NEXT:    mov v1.b[1], v0.b[1]
; CHECK-SDAG-NEXT:    mov v1.b[2], v0.b[2]
; CHECK-SDAG-NEXT:    mov v1.b[3], v0.b[3]
; CHECK-SDAG-NEXT:    mov v1.b[4], v0.b[4]
; CHECK-SDAG-NEXT:    mov v1.b[5], v0.b[5]
; CHECK-SDAG-NEXT:    mov v1.b[6], v0.b[6]
; CHECK-SDAG-NEXT:    mov v1.b[7], v0.b[7]
; CHECK-SDAG-NEXT:    fmov d0, d1
; CHECK-SDAG-NEXT:    add sp, sp, #16
; CHECK-SDAG-NEXT:    ret
;
; CHECK-GISEL-LABEL: test_varidx_extract_v16s8:
; CHECK-GISEL:       // %bb.0:
; CHECK-GISEL-NEXT:    sub sp, sp, #16
; CHECK-GISEL-NEXT:    .cfi_def_cfa_offset 16
; CHECK-GISEL-NEXT:    mov w9, w0
; CHECK-GISEL-NEXT:    mov x8, sp
; CHECK-GISEL-NEXT:    str q0, [sp]
; CHECK-GISEL-NEXT:    and x9, x9, #0xf
; CHECK-GISEL-NEXT:    mov b2, v0.b[1]
; CHECK-GISEL-NEXT:    mov b3, v0.b[2]
; CHECK-GISEL-NEXT:    lsl x10, x9, #1
; CHECK-GISEL-NEXT:    sub x9, x10, x9
; CHECK-GISEL-NEXT:    ldr b1, [x8, x9]
; CHECK-GISEL-NEXT:    mov v1.b[0], v1.b[0]
; CHECK-GISEL-NEXT:    mov v1.b[1], v2.b[0]
; CHECK-GISEL-NEXT:    mov b2, v0.b[3]
; CHECK-GISEL-NEXT:    mov v1.b[2], v3.b[0]
; CHECK-GISEL-NEXT:    mov b3, v0.b[4]
; CHECK-GISEL-NEXT:    mov v1.b[3], v2.b[0]
; CHECK-GISEL-NEXT:    mov b2, v0.b[5]
; CHECK-GISEL-NEXT:    mov v1.b[4], v3.b[0]
; CHECK-GISEL-NEXT:    mov b3, v0.b[6]
; CHECK-GISEL-NEXT:    mov b0, v0.b[7]
; CHECK-GISEL-NEXT:    mov v1.b[5], v2.b[0]
; CHECK-GISEL-NEXT:    mov v1.b[6], v3.b[0]
; CHECK-GISEL-NEXT:    mov v1.b[7], v0.b[0]
; CHECK-GISEL-NEXT:    fmov d0, d1
; CHECK-GISEL-NEXT:    add sp, sp, #16
; CHECK-GISEL-NEXT:    ret
  %tmp = extractelement <16 x i8> %x, i32 %idx
  %tmp2 = insertelement <8 x i8> undef, i8 %tmp, i32 0
  %tmp3 = extractelement <16 x i8> %x, i32 1
  %tmp4 = insertelement <8 x i8> %tmp2, i8 %tmp3, i32 1
  %tmp5 = extractelement <16 x i8> %x, i32 2
  %tmp6 = insertelement <8 x i8> %tmp4, i8 %tmp5, i32 2
  %tmp7 = extractelement <16 x i8> %x, i32 3
  %tmp8 = insertelement <8 x i8> %tmp6, i8 %tmp7, i32 3
  %tmp9 = extractelement <16 x i8> %x, i32 4
  %tmp10 = insertelement <8 x i8> %tmp8, i8 %tmp9, i32 4
  %tmp11 = extractelement <16 x i8> %x, i32 5
  %tmp12 = insertelement <8 x i8> %tmp10, i8 %tmp11, i32 5
  %tmp13 = extractelement <16 x i8> %x, i32 6
  %tmp14 = insertelement <8 x i8> %tmp12, i8 %tmp13, i32 6
  %tmp15 = extractelement <16 x i8> %x, i32 7
  %tmp16 = insertelement <8 x i8> %tmp14, i8 %tmp15, i32 7
  ret <8 x i8> %tmp16
}

define i16 @test_varidx_extract_v2s16(<2 x i16> %x, i32 %idx) {
; CHECK-SDAG-LABEL: test_varidx_extract_v2s16:
; CHECK-SDAG:       // %bb.0:
; CHECK-SDAG-NEXT:    sub sp, sp, #16
; CHECK-SDAG-NEXT:    .cfi_def_cfa_offset 16
; CHECK-SDAG-NEXT:    add x8, sp, #8
; CHECK-SDAG-NEXT:    // kill: def $w0 killed $w0 def $x0
; CHECK-SDAG-NEXT:    str d0, [sp, #8]
; CHECK-SDAG-NEXT:    bfi x8, x0, #2, #1
; CHECK-SDAG-NEXT:    ldr w0, [x8]
; CHECK-SDAG-NEXT:    add sp, sp, #16
; CHECK-SDAG-NEXT:    ret
;
; CHECK-GISEL-LABEL: test_varidx_extract_v2s16:
; CHECK-GISEL:       // %bb.0:
; CHECK-GISEL-NEXT:    sub sp, sp, #16
; CHECK-GISEL-NEXT:    .cfi_def_cfa_offset 16
; CHECK-GISEL-NEXT:    // kill: def $d0 killed $d0 def $q0
; CHECK-GISEL-NEXT:    mov s1, v0.s[1]
; CHECK-GISEL-NEXT:    mov w9, w0
; CHECK-GISEL-NEXT:    add x8, sp, #12
; CHECK-GISEL-NEXT:    str h0, [sp, #12]
; CHECK-GISEL-NEXT:    and x9, x9, #0x1
; CHECK-GISEL-NEXT:    str h1, [sp, #14]
; CHECK-GISEL-NEXT:    ldrh w0, [x8, x9, lsl #1]
; CHECK-GISEL-NEXT:    add sp, sp, #16
; CHECK-GISEL-NEXT:    ret
  %tmp = extractelement <2 x i16> %x, i32 %idx
  ret i16 %tmp
}

define <2 x i16> @test_varidx_extract_v4s16(<4 x i16> %x, i32 %idx) {
; CHECK-SDAG-LABEL: test_varidx_extract_v4s16:
; CHECK-SDAG:       // %bb.0:
; CHECK-SDAG-NEXT:    sub sp, sp, #16
; CHECK-SDAG-NEXT:    .cfi_def_cfa_offset 16
; CHECK-SDAG-NEXT:    add x8, sp, #8
; CHECK-SDAG-NEXT:    // kill: def $w0 killed $w0 def $x0
; CHECK-SDAG-NEXT:    // kill: def $d0 killed $d0 def $q0
; CHECK-SDAG-NEXT:    str d0, [sp, #8]
; CHECK-SDAG-NEXT:    umov w9, v0.h[1]
; CHECK-SDAG-NEXT:    bfi x8, x0, #1, #2
; CHECK-SDAG-NEXT:    ld1 { v0.h }[0], [x8]
; CHECK-SDAG-NEXT:    mov v0.s[1], w9
; CHECK-SDAG-NEXT:    // kill: def $d0 killed $d0 killed $q0
; CHECK-SDAG-NEXT:    add sp, sp, #16
; CHECK-SDAG-NEXT:    ret
;
; CHECK-GISEL-LABEL: test_varidx_extract_v4s16:
; CHECK-GISEL:       // %bb.0:
; CHECK-GISEL-NEXT:    sub sp, sp, #16
; CHECK-GISEL-NEXT:    .cfi_def_cfa_offset 16
; CHECK-GISEL-NEXT:    mov w9, w0
; CHECK-GISEL-NEXT:    mov w8, #2 // =0x2
; CHECK-GISEL-NEXT:    add x10, sp, #8
; CHECK-GISEL-NEXT:    and x9, x9, #0x3
; CHECK-GISEL-NEXT:    // kill: def $d0 killed $d0 def $q0
; CHECK-GISEL-NEXT:    str d0, [sp, #8]
; CHECK-GISEL-NEXT:    madd x8, x9, x8, x10
; CHECK-GISEL-NEXT:    umov w9, v0.h[1]
; CHECK-GISEL-NEXT:    ld1 { v0.h }[0], [x8]
; CHECK-GISEL-NEXT:    mov v0.s[1], w9
; CHECK-GISEL-NEXT:    // kill: def $d0 killed $d0 killed $q0
; CHECK-GISEL-NEXT:    add sp, sp, #16
; CHECK-GISEL-NEXT:    ret
  %tmp = extractelement <4 x i16> %x, i32 %idx
  %tmp2 = insertelement <2 x i16> undef, i16 %tmp, i32 0
  %tmp3 = extractelement <4 x i16> %x, i32 1
  %tmp4 = insertelement <2 x i16> %tmp2, i16 %tmp3, i32 1
  ret <2 x i16> %tmp4
}

define <4 x i16> @test_varidx_extract_v8s16(<8 x i16> %x, i32 %idx) {
; CHECK-SDAG-LABEL: test_varidx_extract_v8s16:
; CHECK-SDAG:       // %bb.0:
; CHECK-SDAG-NEXT:    sub sp, sp, #16
; CHECK-SDAG-NEXT:    .cfi_def_cfa_offset 16
; CHECK-SDAG-NEXT:    mov x8, sp
; CHECK-SDAG-NEXT:    // kill: def $w0 killed $w0 def $x0
; CHECK-SDAG-NEXT:    str q0, [sp]
; CHECK-SDAG-NEXT:    bfi x8, x0, #1, #3
; CHECK-SDAG-NEXT:    ldr h1, [x8]
; CHECK-SDAG-NEXT:    mov v1.h[1], v0.h[1]
; CHECK-SDAG-NEXT:    mov v1.h[2], v0.h[2]
; CHECK-SDAG-NEXT:    mov v1.h[3], v0.h[3]
; CHECK-SDAG-NEXT:    fmov d0, d1
; CHECK-SDAG-NEXT:    add sp, sp, #16
; CHECK-SDAG-NEXT:    ret
;
; CHECK-GISEL-LABEL: test_varidx_extract_v8s16:
; CHECK-GISEL:       // %bb.0:
; CHECK-GISEL-NEXT:    sub sp, sp, #16
; CHECK-GISEL-NEXT:    .cfi_def_cfa_offset 16
; CHECK-GISEL-NEXT:    mov w9, w0
; CHECK-GISEL-NEXT:    mov x8, sp
; CHECK-GISEL-NEXT:    str q0, [sp]
; CHECK-GISEL-NEXT:    and x9, x9, #0x7
; CHECK-GISEL-NEXT:    ldr h1, [x8, x9, lsl #1]
; CHECK-GISEL-NEXT:    mov v1.h[1], v0.h[1]
; CHECK-GISEL-NEXT:    mov v1.h[2], v0.h[2]
; CHECK-GISEL-NEXT:    mov v1.h[3], v0.h[3]
; CHECK-GISEL-NEXT:    fmov d0, d1
; CHECK-GISEL-NEXT:    add sp, sp, #16
; CHECK-GISEL-NEXT:    ret
  %tmp = extractelement <8 x i16> %x, i32 %idx
  %tmp2 = insertelement <4 x i16> undef, i16 %tmp, i32 0
  %tmp3 = extractelement <8 x i16> %x, i32 1
  %tmp4 = insertelement <4 x i16> %tmp2, i16 %tmp3, i32 1
  %tmp5 = extractelement <8 x i16> %x, i32 2
  %tmp6 = insertelement <4 x i16> %tmp4, i16 %tmp5, i32 2
  %tmp7 = extractelement <8 x i16> %x, i32 3
  %tmp8 = insertelement <4 x i16> %tmp6, i16 %tmp7, i32 3
  ret <4 x i16> %tmp8
}

define i32 @test_varidx_extract_v2s32(<2 x i32> %x, i32 %idx) {
; CHECK-SDAG-LABEL: test_varidx_extract_v2s32:
; CHECK-SDAG:       // %bb.0:
; CHECK-SDAG-NEXT:    sub sp, sp, #16
; CHECK-SDAG-NEXT:    .cfi_def_cfa_offset 16
; CHECK-SDAG-NEXT:    add x8, sp, #8
; CHECK-SDAG-NEXT:    // kill: def $w0 killed $w0 def $x0
; CHECK-SDAG-NEXT:    str d0, [sp, #8]
; CHECK-SDAG-NEXT:    bfi x8, x0, #2, #1
; CHECK-SDAG-NEXT:    ldr w0, [x8]
; CHECK-SDAG-NEXT:    add sp, sp, #16
; CHECK-SDAG-NEXT:    ret
;
; CHECK-GISEL-LABEL: test_varidx_extract_v2s32:
; CHECK-GISEL:       // %bb.0:
; CHECK-GISEL-NEXT:    sub sp, sp, #16
; CHECK-GISEL-NEXT:    .cfi_def_cfa_offset 16
; CHECK-GISEL-NEXT:    mov w9, w0
; CHECK-GISEL-NEXT:    add x8, sp, #8
; CHECK-GISEL-NEXT:    str d0, [sp, #8]
; CHECK-GISEL-NEXT:    and x9, x9, #0x1
; CHECK-GISEL-NEXT:    ldr w0, [x8, x9, lsl #2]
; CHECK-GISEL-NEXT:    add sp, sp, #16
; CHECK-GISEL-NEXT:    ret
  %tmp = extractelement <2 x i32> %x, i32 %idx
  ret i32 %tmp
}

define <2 x i32> @test_varidx_extract_v4s32(<4 x i32> %x, i32 %idx) {
; CHECK-SDAG-LABEL: test_varidx_extract_v4s32:
; CHECK-SDAG:       // %bb.0:
; CHECK-SDAG-NEXT:    sub sp, sp, #16
; CHECK-SDAG-NEXT:    .cfi_def_cfa_offset 16
; CHECK-SDAG-NEXT:    mov x8, sp
; CHECK-SDAG-NEXT:    // kill: def $w0 killed $w0 def $x0
; CHECK-SDAG-NEXT:    str q0, [sp]
; CHECK-SDAG-NEXT:    bfi x8, x0, #2, #2
; CHECK-SDAG-NEXT:    ldr s1, [x8]
; CHECK-SDAG-NEXT:    mov v1.s[1], v0.s[1]
; CHECK-SDAG-NEXT:    fmov d0, d1
; CHECK-SDAG-NEXT:    add sp, sp, #16
; CHECK-SDAG-NEXT:    ret
;
; CHECK-GISEL-LABEL: test_varidx_extract_v4s32:
; CHECK-GISEL:       // %bb.0:
; CHECK-GISEL-NEXT:    sub sp, sp, #16
; CHECK-GISEL-NEXT:    .cfi_def_cfa_offset 16
; CHECK-GISEL-NEXT:    mov w9, w0
; CHECK-GISEL-NEXT:    mov x8, sp
; CHECK-GISEL-NEXT:    str q0, [sp]
; CHECK-GISEL-NEXT:    and x9, x9, #0x3
; CHECK-GISEL-NEXT:    ldr s1, [x8, x9, lsl #2]
; CHECK-GISEL-NEXT:    mov v1.s[1], v0.s[1]
; CHECK-GISEL-NEXT:    fmov d0, d1
; CHECK-GISEL-NEXT:    add sp, sp, #16
; CHECK-GISEL-NEXT:    ret
  %tmp = extractelement <4 x i32> %x, i32 %idx
  %tmp2 = insertelement <2 x i32> undef, i32 %tmp, i32 0
  %tmp3 = extractelement <4 x i32> %x, i32 1
  %tmp4 = insertelement <2 x i32> %tmp2, i32 %tmp3, i32 1
  ret <2 x i32> %tmp4
}

define i64 @test_varidx_extract_v2s64(<2 x i64> %x, i32 %idx) {
; CHECK-SDAG-LABEL: test_varidx_extract_v2s64:
; CHECK-SDAG:       // %bb.0:
; CHECK-SDAG-NEXT:    sub sp, sp, #16
; CHECK-SDAG-NEXT:    .cfi_def_cfa_offset 16
; CHECK-SDAG-NEXT:    mov x8, sp
; CHECK-SDAG-NEXT:    // kill: def $w0 killed $w0 def $x0
; CHECK-SDAG-NEXT:    str q0, [sp]
; CHECK-SDAG-NEXT:    bfi x8, x0, #3, #1
; CHECK-SDAG-NEXT:    ldr x0, [x8]
; CHECK-SDAG-NEXT:    add sp, sp, #16
; CHECK-SDAG-NEXT:    ret
;
; CHECK-GISEL-LABEL: test_varidx_extract_v2s64:
; CHECK-GISEL:       // %bb.0:
; CHECK-GISEL-NEXT:    sub sp, sp, #16
; CHECK-GISEL-NEXT:    .cfi_def_cfa_offset 16
; CHECK-GISEL-NEXT:    mov w9, w0
; CHECK-GISEL-NEXT:    mov x8, sp
; CHECK-GISEL-NEXT:    str q0, [sp]
; CHECK-GISEL-NEXT:    and x9, x9, #0x1
; CHECK-GISEL-NEXT:    ldr x0, [x8, x9, lsl #3]
; CHECK-GISEL-NEXT:    add sp, sp, #16
; CHECK-GISEL-NEXT:    ret
  %tmp = extractelement <2 x i64> %x, i32 %idx
  ret i64 %tmp
}

define ptr @test_varidx_extract_v2p0(<2 x ptr> %x, i32 %idx) {
; CHECK-SDAG-LABEL: test_varidx_extract_v2p0:
; CHECK-SDAG:       // %bb.0:
; CHECK-SDAG-NEXT:    sub sp, sp, #16
; CHECK-SDAG-NEXT:    .cfi_def_cfa_offset 16
; CHECK-SDAG-NEXT:    mov x8, sp
; CHECK-SDAG-NEXT:    // kill: def $w0 killed $w0 def $x0
; CHECK-SDAG-NEXT:    str q0, [sp]
; CHECK-SDAG-NEXT:    bfi x8, x0, #3, #1
; CHECK-SDAG-NEXT:    ldr x0, [x8]
; CHECK-SDAG-NEXT:    add sp, sp, #16
; CHECK-SDAG-NEXT:    ret
;
; CHECK-GISEL-LABEL: test_varidx_extract_v2p0:
; CHECK-GISEL:       // %bb.0:
; CHECK-GISEL-NEXT:    sub sp, sp, #16
; CHECK-GISEL-NEXT:    .cfi_def_cfa_offset 16
; CHECK-GISEL-NEXT:    mov w9, w0
; CHECK-GISEL-NEXT:    mov x8, sp
; CHECK-GISEL-NEXT:    str q0, [sp]
; CHECK-GISEL-NEXT:    and x9, x9, #0x1
; CHECK-GISEL-NEXT:    ldr x0, [x8, x9, lsl #3]
; CHECK-GISEL-NEXT:    add sp, sp, #16
; CHECK-GISEL-NEXT:    ret
  %tmp = extractelement <2 x ptr> %x, i32 %idx
  ret ptr %tmp
}
