; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
; RUN: llc < %s -mtriple=arm64-linux-gnu | FileCheck %s --check-prefixes=CHECK,CHECK-SD
; RUN: llc < %s -mtriple=arm64-linux-gnu -global-isel | FileCheck %s --check-prefixes=CHECK,CHECK-GI

%0 = type { i64, i64 }

define dso_local i128 @f0(ptr %p) nounwind readonly {
; CHECK-LABEL: f0:
; CHECK:       // %bb.0: // %entry
; CHECK-NEXT:    ldxp x0, x1, [x0]
; CHECK-NEXT:    ret
entry:
  %ldrexd = tail call %0 @llvm.aarch64.ldxp(ptr %p)
  %0 = extractvalue %0 %ldrexd, 1
  %1 = extractvalue %0 %ldrexd, 0
  %2 = zext i64 %0 to i128
  %3 = zext i64 %1 to i128
  %shl = shl nuw i128 %2, 64
  %4 = or i128 %shl, %3
  ret i128 %4
}

define dso_local i32 @f1(ptr %ptr, i128 %val) nounwind {
; CHECK-LABEL: f1:
; CHECK:       // %bb.0: // %entry
; CHECK-NEXT:    stxp w8, x2, x3, [x0]
; CHECK-NEXT:    mov w0, w8
; CHECK-NEXT:    ret
entry:
  %tmp4 = trunc i128 %val to i64
  %tmp6 = lshr i128 %val, 64
  %tmp7 = trunc i128 %tmp6 to i64
  %strexd = tail call i32 @llvm.aarch64.stxp(i64 %tmp4, i64 %tmp7, ptr %ptr)
  ret i32 %strexd
}

declare %0 @llvm.aarch64.ldxp(ptr) nounwind
declare i32 @llvm.aarch64.stxp(i64, i64, ptr) nounwind

@var = dso_local global i64 0, align 8

define dso_local void @test_load_i8(ptr %addr) {
; CHECK-SD-LABEL: test_load_i8:
; CHECK-SD:       // %bb.0:
; CHECK-SD-NEXT:    ldxrb w8, [x0]
; CHECK-SD-NEXT:    adrp x9, var
; CHECK-SD-NEXT:    str x8, [x9, :lo12:var]
; CHECK-SD-NEXT:    ret
;
; CHECK-GI-LABEL: test_load_i8:
; CHECK-GI:       // %bb.0:
; CHECK-GI-NEXT:    ldxrb w9, [x0]
; CHECK-GI-NEXT:    adrp x8, var
; CHECK-GI-NEXT:    and x9, x9, #0xff
; CHECK-GI-NEXT:    str x9, [x8, :lo12:var]
; CHECK-GI-NEXT:    ret

  %val = call i64 @llvm.aarch64.ldxr.p0(ptr elementtype(i8) %addr)
  %shortval = trunc i64 %val to i8
  %extval = zext i8 %shortval to i64
  store i64 %extval, ptr @var, align 8
  ret void
}

define dso_local void @test_load_i16(ptr %addr) {
; CHECK-SD-LABEL: test_load_i16:
; CHECK-SD:       // %bb.0:
; CHECK-SD-NEXT:    ldxrh w8, [x0]
; CHECK-SD-NEXT:    adrp x9, var
; CHECK-SD-NEXT:    str x8, [x9, :lo12:var]
; CHECK-SD-NEXT:    ret
;
; CHECK-GI-LABEL: test_load_i16:
; CHECK-GI:       // %bb.0:
; CHECK-GI-NEXT:    ldxrh w9, [x0]
; CHECK-GI-NEXT:    adrp x8, var
; CHECK-GI-NEXT:    and x9, x9, #0xffff
; CHECK-GI-NEXT:    str x9, [x8, :lo12:var]
; CHECK-GI-NEXT:    ret

  %val = call i64 @llvm.aarch64.ldxr.p0(ptr elementtype(i16) %addr)
  %shortval = trunc i64 %val to i16
  %extval = zext i16 %shortval to i64
  store i64 %extval, ptr @var, align 8
  ret void
}

define dso_local void @test_load_i32(ptr %addr) {
; CHECK-SD-LABEL: test_load_i32:
; CHECK-SD:       // %bb.0:
; CHECK-SD-NEXT:    ldxr w8, [x0]
; CHECK-SD-NEXT:    adrp x9, var
; CHECK-SD-NEXT:    str x8, [x9, :lo12:var]
; CHECK-SD-NEXT:    ret
;
; CHECK-GI-LABEL: test_load_i32:
; CHECK-GI:       // %bb.0:
; CHECK-GI-NEXT:    ldxr w9, [x0]
; CHECK-GI-NEXT:    adrp x8, var
; CHECK-GI-NEXT:    mov w9, w9
; CHECK-GI-NEXT:    str x9, [x8, :lo12:var]
; CHECK-GI-NEXT:    ret

  %val = call i64 @llvm.aarch64.ldxr.p0(ptr elementtype(i32) %addr)
  %shortval = trunc i64 %val to i32
  %extval = zext i32 %shortval to i64
  store i64 %extval, ptr @var, align 8
  ret void
}

define dso_local void @test_load_i64(ptr %addr) {
; CHECK-LABEL: test_load_i64:
; CHECK:       // %bb.0:
; CHECK-NEXT:    ldxr x8, [x0]
; CHECK-NEXT:    adrp x9, var
; CHECK-NEXT:    str x8, [x9, :lo12:var]
; CHECK-NEXT:    ret

  %val = call i64 @llvm.aarch64.ldxr.p0(ptr elementtype(i64) %addr)
  store i64 %val, ptr @var, align 8
  ret void
}


declare i64 @llvm.aarch64.ldxr.p0(ptr) nounwind

define dso_local i32 @test_store_i8(i32, i8 %val, ptr %addr) {
; CHECK-LABEL: test_store_i8:
; CHECK:       // %bb.0:
; CHECK-NEXT:    stxrb w0, w1, [x2]
; CHECK-NEXT:    ret
  %extval = zext i8 %val to i64
  %res = call i32 @llvm.aarch64.stxr.p0(i64 %extval, ptr elementtype(i8) %addr)
  ret i32 %res
}

define dso_local i32 @test_store_i16(i32, i16 %val, ptr %addr) {
; CHECK-LABEL: test_store_i16:
; CHECK:       // %bb.0:
; CHECK-NEXT:    stxrh w0, w1, [x2]
; CHECK-NEXT:    ret
  %extval = zext i16 %val to i64
  %res = call i32 @llvm.aarch64.stxr.p0(i64 %extval, ptr elementtype(i16) %addr)
  ret i32 %res
}

define dso_local i32 @test_store_i32(i32, i32 %val, ptr %addr) {
; CHECK-LABEL: test_store_i32:
; CHECK:       // %bb.0:
; CHECK-NEXT:    stxr w0, w1, [x2]
; CHECK-NEXT:    ret
  %extval = zext i32 %val to i64
  %res = call i32 @llvm.aarch64.stxr.p0(i64 %extval, ptr elementtype(i32) %addr)
  ret i32 %res
}

define dso_local i32 @test_store_i64(i32, i64 %val, ptr %addr) {
; CHECK-LABEL: test_store_i64:
; CHECK:       // %bb.0:
; CHECK-NEXT:    stxr w0, x1, [x2]
; CHECK-NEXT:    ret
  %res = call i32 @llvm.aarch64.stxr.p0(i64 %val, ptr elementtype(i64) %addr)
  ret i32 %res
}

declare i32 @llvm.aarch64.stxr.p0(i64, ptr) nounwind

define dso_local void @test_clear() {
; CHECK-LABEL: test_clear:
; CHECK:       // %bb.0:
; CHECK-NEXT:    clrex
; CHECK-NEXT:    ret
  call void @llvm.aarch64.clrex()
  ret void
}

declare void @llvm.aarch64.clrex() nounwind

define dso_local i128 @test_load_acquire_i128(ptr %p) nounwind readonly {
; CHECK-LABEL: test_load_acquire_i128:
; CHECK:       // %bb.0: // %entry
; CHECK-NEXT:    ldaxp x0, x1, [x0]
; CHECK-NEXT:    ret
entry:
  %ldrexd = tail call %0 @llvm.aarch64.ldaxp(ptr %p)
  %0 = extractvalue %0 %ldrexd, 1
  %1 = extractvalue %0 %ldrexd, 0
  %2 = zext i64 %0 to i128
  %3 = zext i64 %1 to i128
  %shl = shl nuw i128 %2, 64
  %4 = or i128 %shl, %3
  ret i128 %4
}

define dso_local i32 @test_store_release_i128(ptr %ptr, i128 %val) nounwind {
; CHECK-LABEL: test_store_release_i128:
; CHECK:       // %bb.0: // %entry
; CHECK-NEXT:    stlxp w8, x2, x3, [x0]
; CHECK-NEXT:    mov w0, w8
; CHECK-NEXT:    ret
entry:
  %tmp4 = trunc i128 %val to i64
  %tmp6 = lshr i128 %val, 64
  %tmp7 = trunc i128 %tmp6 to i64
  %strexd = tail call i32 @llvm.aarch64.stlxp(i64 %tmp4, i64 %tmp7, ptr %ptr)
  ret i32 %strexd
}

declare %0 @llvm.aarch64.ldaxp(ptr) nounwind
declare i32 @llvm.aarch64.stlxp(i64, i64, ptr) nounwind

define dso_local void @test_load_acquire_i8(ptr %addr) {
; CHECK-SD-LABEL: test_load_acquire_i8:
; CHECK-SD:       // %bb.0:
; CHECK-SD-NEXT:    ldaxrb w8, [x0]
; CHECK-SD-NEXT:    adrp x9, var
; CHECK-SD-NEXT:    str x8, [x9, :lo12:var]
; CHECK-SD-NEXT:    ret
;
; CHECK-GI-LABEL: test_load_acquire_i8:
; CHECK-GI:       // %bb.0:
; CHECK-GI-NEXT:    ldaxrb w9, [x0]
; CHECK-GI-NEXT:    adrp x8, var
; CHECK-GI-NEXT:    and x9, x9, #0xff
; CHECK-GI-NEXT:    str x9, [x8, :lo12:var]
; CHECK-GI-NEXT:    ret

  %val = call i64 @llvm.aarch64.ldaxr.p0(ptr elementtype(i8) %addr)
  %shortval = trunc i64 %val to i8
  %extval = zext i8 %shortval to i64
  store i64 %extval, ptr @var, align 8
  ret void
}

define dso_local void @test_load_acquire_i16(ptr %addr) {
; CHECK-SD-LABEL: test_load_acquire_i16:
; CHECK-SD:       // %bb.0:
; CHECK-SD-NEXT:    ldaxrh w8, [x0]
; CHECK-SD-NEXT:    adrp x9, var
; CHECK-SD-NEXT:    str x8, [x9, :lo12:var]
; CHECK-SD-NEXT:    ret
;
; CHECK-GI-LABEL: test_load_acquire_i16:
; CHECK-GI:       // %bb.0:
; CHECK-GI-NEXT:    ldaxrh w9, [x0]
; CHECK-GI-NEXT:    adrp x8, var
; CHECK-GI-NEXT:    and x9, x9, #0xffff
; CHECK-GI-NEXT:    str x9, [x8, :lo12:var]
; CHECK-GI-NEXT:    ret

  %val = call i64 @llvm.aarch64.ldaxr.p0(ptr elementtype(i16) %addr)
  %shortval = trunc i64 %val to i16
  %extval = zext i16 %shortval to i64
  store i64 %extval, ptr @var, align 8
  ret void
}

define dso_local void @test_load_acquire_i32(ptr %addr) {
; CHECK-SD-LABEL: test_load_acquire_i32:
; CHECK-SD:       // %bb.0:
; CHECK-SD-NEXT:    ldaxr w8, [x0]
; CHECK-SD-NEXT:    adrp x9, var
; CHECK-SD-NEXT:    str x8, [x9, :lo12:var]
; CHECK-SD-NEXT:    ret
;
; CHECK-GI-LABEL: test_load_acquire_i32:
; CHECK-GI:       // %bb.0:
; CHECK-GI-NEXT:    ldaxr w9, [x0]
; CHECK-GI-NEXT:    adrp x8, var
; CHECK-GI-NEXT:    mov w9, w9
; CHECK-GI-NEXT:    str x9, [x8, :lo12:var]
; CHECK-GI-NEXT:    ret

  %val = call i64 @llvm.aarch64.ldaxr.p0(ptr elementtype(i32) %addr)
  %shortval = trunc i64 %val to i32
  %extval = zext i32 %shortval to i64
  store i64 %extval, ptr @var, align 8
  ret void
}

define dso_local void @test_load_acquire_i64(ptr %addr) {
; CHECK-LABEL: test_load_acquire_i64:
; CHECK:       // %bb.0:
; CHECK-NEXT:    ldaxr x8, [x0]
; CHECK-NEXT:    adrp x9, var
; CHECK-NEXT:    str x8, [x9, :lo12:var]
; CHECK-NEXT:    ret

  %val = call i64 @llvm.aarch64.ldaxr.p0(ptr elementtype(i64) %addr)
  store i64 %val, ptr @var, align 8
  ret void
}


declare i64 @llvm.aarch64.ldaxr.p0(ptr) nounwind

define dso_local i32 @test_store_release_i8(i32, i8 %val, ptr %addr) {
; CHECK-LABEL: test_store_release_i8:
; CHECK:       // %bb.0:
; CHECK-NEXT:    stlxrb w0, w1, [x2]
; CHECK-NEXT:    ret
  %extval = zext i8 %val to i64
  %res = call i32 @llvm.aarch64.stlxr.p0(i64 %extval, ptr elementtype(i8) %addr)
  ret i32 %res
}

define dso_local i32 @test_store_release_i16(i32, i16 %val, ptr %addr) {
; CHECK-LABEL: test_store_release_i16:
; CHECK:       // %bb.0:
; CHECK-NEXT:    stlxrh w0, w1, [x2]
; CHECK-NEXT:    ret
  %extval = zext i16 %val to i64
  %res = call i32 @llvm.aarch64.stlxr.p0(i64 %extval, ptr elementtype(i16) %addr)
  ret i32 %res
}

define dso_local i32 @test_store_release_i32(i32, i32 %val, ptr %addr) {
; CHECK-LABEL: test_store_release_i32:
; CHECK:       // %bb.0:
; CHECK-NEXT:    stlxr w0, w1, [x2]
; CHECK-NEXT:    ret
  %extval = zext i32 %val to i64
  %res = call i32 @llvm.aarch64.stlxr.p0(i64 %extval, ptr elementtype(i32) %addr)
  ret i32 %res
}

define dso_local i32 @test_store_release_i64(i32, i64 %val, ptr %addr) {
; CHECK-LABEL: test_store_release_i64:
; CHECK:       // %bb.0:
; CHECK-NEXT:    stlxr w0, x1, [x2]
; CHECK-NEXT:    ret
  %res = call i32 @llvm.aarch64.stlxr.p0(i64 %val, ptr elementtype(i64) %addr)
  ret i32 %res
}

; The stxp result cannot be allocated to the same register as the inputs.
define dso_local i32 @test_stxp_undef(ptr %p, i64 %x) nounwind {
; CHECK-LABEL: test_stxp_undef:
; CHECK:       // %bb.0:
; CHECK-NEXT:    stxp w8, x9, x1, [x0]
; CHECK-NEXT:    mov w0, w8
; CHECK-NEXT:    ret
  %res = call i32 @llvm.aarch64.stxp(i64 undef, i64 %x, ptr %p)
  ret i32 %res
}

; Same as previous test, but using inline asm.
define dso_local i32 @test_stxp_undef_inline_asm(ptr %p, i64 %x) nounwind {
; CHECK-LABEL: test_stxp_undef_inline_asm:
; CHECK:       // %bb.0:
; CHECK-NEXT:    //APP
; CHECK-NEXT:    stxp w8, x9, x1, [x0]
; CHECK-NEXT:    //NO_APP
; CHECK-NEXT:    mov w0, w8
; CHECK-NEXT:    ret
  %res = call i32 asm sideeffect "stxp ${0:w}, ${2}, ${3}, [${1}]", "=&r,r,r,r,~{memory}"(ptr %p, i64 undef, i64 %x)
  ret i32 %res
}

declare i32 @llvm.aarch64.stlxr.p0(i64, ptr) nounwind
