Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

1.11 Optimization pass [with no differentiation] creates different results #1993

Closed
wsmoses opened this issue Oct 21, 2024 · 2 comments
Closed

Comments

@wsmoses
Copy link
Member

wsmoses commented Oct 21, 2024

using Enzyme
using LinearAlgebra

function divdriver_herm(src)
    dest = Matrix{Float64}(undef, 3, 3)

    # Dual-index implementation
    i = 1 - 1
    @inbounds for a in src
        dest[i+=1] = a
    end

    return dest
end

H = Hermitian(Matrix([4.0 1.0 0.0; 1.0 4.0 1.0; 0.0 1.0 4.0]))

@show Enzyme.autodiff(
    ForwardWithPrimal,
    divdriver_herm,
    Const,
    Const(H),
)[1]

# 3×3 Matrix{Float64}:
#  4.0  1.0  4.0
#  1.0  0.0  1.0
#  4.0  1.0  0.0


@show divdriver_herm(H)

# 3×3 Matrix{Float64}:
#  4.0  1.0  0.0
#  1.0  4.0  1.0
#  0.0  1.0  4.0
@wsmoses
Copy link
Member Author

wsmoses commented Oct 22, 2024

using Enzyme
using LinearAlgebra


function divdriver_herm(dest, src)
    # Dual-index implementation
    i = 1 - 1
    @inbounds for a in src
        @inbounds dest[i+=1] = a
    end
    return nothing
end

H = Hermitian(Matrix([4.0 1.0; 2.0 5.0]))
dest = Matrix{Float64}(undef, 2, 2)

Enzyme.autodiff(
    ForwardWithPrimal,
    divdriver_herm,
    Const,
    Const(dest),
    Const(H),
)[1]
@show dest


dest = Matrix{Float64}(undef, 2, 2)
divdriver_herm(dest, H)
@show dest

# 4.0  4.0
# 4.0  2.0

@wsmoses
Copy link
Member Author

wsmoses commented Oct 22, 2024

using Enzyme
using LinearAlgebra

Enzyme.API.printall!(true)

function divdriver_herm(dest, src)
    N = size(src)
    dat = src.data
    len = N[1]

    i = 1
    while true
        j = 1
        while true
            ld = @inbounds if i <= j
                dat[i, j]
            else
                dat[j, i]
            end
            @inbounds dest[(i-1) * 2 + j] = ld
            if j == len
                break
            end
            j += 1
        end
        if i == len
            break
        end
        i += 1
    end
    return nothing
end


H = Hermitian(Matrix([4.0 1.0; 2.0 5.0]))
dest = Matrix{Float64}(undef, 2, 2)

Enzyme.autodiff(
    ForwardWithPrimal,
    divdriver_herm,
    Const,
    Const(dest),
    Const(H),
)[1]
@show dest


dest = Matrix{Float64}(undef, 2, 2)
divdriver_herm(dest, H)
@show dest

# 4.0  4.0
# 4.0  2.0
after simplification :
; Function Attrs: mustprogress willreturn
define void @preprocess_julia_divdriver_herm_4501_inner.1({} addrspace(10)* nocapture noundef nonnull readonly align 8 dereferenceable(32) "enzyme_type"="{[-1]:Pointer, [-1,0]:Pointer, [-1,0,-1]:Float@double, [-1,8]:Pointer, [-1,8,0]:Integer, [-1,8,1]:Integer, [-1,8,2]:Integer, [-1,8,3]:Integer, [-1,8,4]:Integer, [-1,8,5]:Integer, [-1,8,6]:Integer, [-1,8,7]:Integer, [-1,8,8]:Pointer, [-1,8,8,-1]:Float@double, [-1,16]:Integer, [-1,17]:Integer, [-1,18]:Integer, [-1,19]:Integer, [-1,20]:Integer, [-1,21]:Integer, [-1,22]:Integer, [-1,23]:Integer, [-1,24]:Integer, [-1,25]:Integer, [-1,26]:Integer, [-1,27]:Integer, [-1,28]:Integer, [-1,29]:Integer, [-1,30]:Integer, [-1,31]:Integer}" "enzymejl_parmtype"="4699582144" "enzymejl_parmtype_ref"="2" %0, { {} addrspace(10)*, i32 } "enzyme_type"="{[0]:Pointer, [0,0]:Pointer, [0,0,-1]:Float@double, [0,8]:Pointer, [0,8,0]:Integer, [0,8,1]:Integer, [0,8,2]:Integer, [0,8,3]:Integer, [0,8,4]:Integer, [0,8,5]:Integer, [0,8,6]:Integer, [0,8,7]:Integer, [0,8,8]:Pointer, [0,8,8,-1]:Float@double, [0,16]:Integer, [0,17]:Integer, [0,18]:Integer, [0,19]:Integer, [0,20]:Integer, [0,21]:Integer, [0,22]:Integer, [0,23]:Integer, [0,24]:Integer, [0,25]:Integer, [0,26]:Integer, [0,27]:Integer, [0,28]:Integer, [0,29]:Integer, [0,30]:Integer, [0,31]:Integer, [8]:Integer, [9]:Integer, [10]:Integer, [11]:Integer}" "enzymejl_parmtype"="4491982352" "enzymejl_parmtype_ref"="0" %1) local_unnamed_addr #4 !dbg !110 {
entry:
  %.fca.0.extract = extractvalue { {} addrspace(10)*, i32 } %1, 0, !dbg !111
  %pgcstack.i = call {}*** @julia.get_pgcstack() #5
  %ptls_field.i17 = getelementptr inbounds {}**, {}*** %pgcstack.i, i64 2
  %2 = bitcast {}*** %ptls_field.i17 to i64***
  %ptls_load.i1819 = load i64**, i64*** %2, align 8, !tbaa !11
  %3 = getelementptr inbounds i64*, i64** %ptls_load.i1819, i64 2
  %safepoint.i = load i64*, i64** %3, align 8, !tbaa !15
  fence syncscope("singlethread") seq_cst
  call void @julia.safepoint(i64* %safepoint.i) #5, !dbg !112
  fence syncscope("singlethread") seq_cst
  %4 = bitcast {} addrspace(10)* %.fca.0.extract to i8 addrspace(10)*, !dbg !114
  %5 = addrspacecast i8 addrspace(10)* %4 to i8 addrspace(11)*, !dbg !114
  %.sroa.08.0..sroa_idx = getelementptr inbounds i8, i8 addrspace(11)* %5, i64 16, !dbg !114
  %.sroa.08.0..sroa_cast = bitcast i8 addrspace(11)* %.sroa.08.0..sroa_idx to i64 addrspace(11)*, !dbg !114
  %.sroa.08.0.copyload = load i64, i64 addrspace(11)* %.sroa.08.0..sroa_cast, align 8, !dbg !114, !tbaa !26, !alias.scope !27, !noalias !117
  %6 = bitcast {} addrspace(10)* %.fca.0.extract to { i8*, {} addrspace(10)* } addrspace(10)*
  %7 = addrspacecast { i8*, {} addrspace(10)* } addrspace(10)* %6 to { i8*, {} addrspace(10)* } addrspace(11)*
  %8 = bitcast {} addrspace(10)* %.fca.0.extract to {} addrspace(10)** addrspace(10)*
  %9 = addrspacecast {} addrspace(10)** addrspace(10)* %8 to {} addrspace(10)** addrspace(11)*
  %10 = getelementptr inbounds { i8*, {} addrspace(10)* }, { i8*, {} addrspace(10)* } addrspace(11)* %7, i64 0, i32 1
  %11 = bitcast {} addrspace(10)* %0 to { i8*, {} addrspace(10)* } addrspace(10)*
  %12 = addrspacecast { i8*, {} addrspace(10)* } addrspace(10)* %11 to { i8*, {} addrspace(10)* } addrspace(11)*
  %13 = bitcast {} addrspace(10)* %0 to {} addrspace(10)** addrspace(10)*
  %14 = addrspacecast {} addrspace(10)** addrspace(10)* %13 to {} addrspace(10)** addrspace(11)*
  %15 = load {} addrspace(10)**, {} addrspace(10)** addrspace(11)* %14, align 8, !tbaa !37, !alias.scope !40, !noalias !41
  %16 = getelementptr inbounds { i8*, {} addrspace(10)* }, { i8*, {} addrspace(10)* } addrspace(11)* %12, i64 0, i32 1
  %17 = load {} addrspace(10)*, {} addrspace(10)* addrspace(11)* %16, align 8, !tbaa !37, !alias.scope !40, !noalias !41, !dereferenceable_or_null !42, !align !43
  %18 = call "enzyme_type"="{[-1]:Pointer, [-1,-1]:Float@double}" {} addrspace(10)* addrspace(13)* @julia.gc_loaded({} addrspace(10)* noundef %17, {} addrspace(10)** noundef %15) #5, !dbg !111
  br label %L6.i, !dbg !120

L6.i:                                             ; preds = %L151.i, %entry
  %iv = phi i64 [ %iv.next, %L151.i ], [ 0, %entry ]
  %iv.next = add nuw nsw i64 %iv, 1
  %19 = add i64 %iv.next, -1
  %20 = mul i64 %19, %.sroa.08.0.copyload
  %21 = shl i64 %19, 1
  br label %L8.i, !dbg !122

L8.i:                                             ; preds = %L127.i, %L6.i
  %iv1 = phi i64 [ %iv.next2, %L127.i ], [ 0, %L6.i ]
  %iv.next2 = add nuw nsw i64 %iv1, 1, !dbg !123
  %.not = icmp sgt i64 %iv.next, %iv.next2, !dbg !123
  br i1 %.not, label %L69.i, label %L11.i, !dbg !125

L11.i:                                            ; preds = %L8.i
  %22 = add i64 %iv.next2, -1, !dbg !126
  %23 = mul i64 %22, %.sroa.08.0.copyload, !dbg !135
  %24 = load {} addrspace(10)**, {} addrspace(10)** addrspace(11)* %9, align 8, !dbg !136, !tbaa !37, !alias.scope !40, !noalias !41
  %25 = load {} addrspace(10)*, {} addrspace(10)* addrspace(11)* %10, align 8, !dbg !136, !tbaa !37, !alias.scope !40, !noalias !41, !dereferenceable_or_null !42, !align !43
  %26 = add i64 %23, %19, !dbg !136
  %27 = call {} addrspace(10)* addrspace(13)* @julia.gc_loaded({} addrspace(10)* noundef %25, {} addrspace(10)** noundef %24) #5, !dbg !136
  %28 = getelementptr inbounds {} addrspace(10)*, {} addrspace(10)* addrspace(13)* %27, i64 %26, !dbg !136
  %29 = bitcast {} addrspace(10)* %25 to <{ i64, i8* }> addrspace(10)*, !dbg !137
  %30 = getelementptr inbounds <{ i64, i8* }>, <{ i64, i8* }> addrspace(10)* %29, i32 0, i32 1, !dbg !137
  %31 = load i8*, i8* addrspace(10)* %30, align 8, !dbg !137
  %32 = ptrtoint i8* %31 to i64, !dbg !137
  %33 = ptrtoint {} addrspace(10)** %24 to i64, !dbg !137
  %34 = sub nuw i64 %32, %33, !dbg !137
  %35 = add nuw i64 0, %34, !dbg !137
  %36 = mul i64 %26, 8, !dbg !137
  %37 = add i64 0, %36, !dbg !137
  %38 = add nuw i64 %35, %37, !dbg !137
  %39 = bitcast {} addrspace(10)** addrspace(10)* %8 to {} addrspace(10)* addrspace(10)*, !dbg !137
  %40 = bitcast {} addrspace(10)* addrspace(10)* %39 to {} addrspace(10)*, !dbg !137
  br label %L127.i, !dbg !137

L69.i:                                            ; preds = %L8.i
  %41 = load {} addrspace(10)**, {} addrspace(10)** addrspace(11)* %9, align 8, !dbg !138, !tbaa !37, !alias.scope !40, !noalias !41
  %42 = load {} addrspace(10)*, {} addrspace(10)* addrspace(11)* %10, align 8, !dbg !138, !tbaa !37, !alias.scope !40, !noalias !41, !dereferenceable_or_null !42, !align !43
  %43 = add i64 %iv.next2, -1, !dbg !141
  %44 = add i64 %43, %20, !dbg !138
  %45 = call {} addrspace(10)* addrspace(13)* @julia.gc_loaded({} addrspace(10)* noundef %42, {} addrspace(10)** noundef %41) #5, !dbg !138
  %46 = getelementptr inbounds {} addrspace(10)*, {} addrspace(10)* addrspace(13)* %45, i64 %44, !dbg !138
  %47 = bitcast {} addrspace(10)* %42 to <{ i64, i8* }> addrspace(10)*, !dbg !147
  %48 = getelementptr inbounds <{ i64, i8* }>, <{ i64, i8* }> addrspace(10)* %47, i32 0, i32 1, !dbg !147
  %49 = load i8*, i8* addrspace(10)* %48, align 8, !dbg !147
  %50 = ptrtoint i8* %49 to i64, !dbg !147
  %51 = ptrtoint {} addrspace(10)** %41 to i64, !dbg !147
  %52 = sub nuw i64 %50, %51, !dbg !147
  %53 = add nuw i64 0, %52, !dbg !147
  %54 = mul i64 %44, 8, !dbg !147
  %55 = add i64 0, %54, !dbg !147
  %56 = add nuw i64 %53, %55, !dbg !147
  %57 = bitcast {} addrspace(10)** addrspace(10)* %8 to {} addrspace(10)* addrspace(10)*, !dbg !147
  %58 = bitcast {} addrspace(10)* addrspace(10)* %57 to {} addrspace(10)*, !dbg !147
  br label %L127.i, !dbg !147

L127.i:                                           ; preds = %L69.i, %L11.i
  %.pre-phi = phi i64 [ %43, %L69.i ], [ %22, %L11.i ], !dbg !148
  %nodecayed.value_phi3.i.in.in = phi {} addrspace(10)* [ %58, %L69.i ], [ %40, %L11.i ]
  %nodecayedoff.value_phi3.i.in.in = phi i64 [ %56, %L69.i ], [ %38, %L11.i ]
  %59 = bitcast {} addrspace(10)* %nodecayed.value_phi3.i.in.in to <{ i64, i8* }> addrspace(10)*, !dbg !150
  %60 = getelementptr inbounds <{ i64, i8* }>, <{ i64, i8* }> addrspace(10)* %59, i32 0, i32 1, !dbg !150
  %61 = load i8*, i8* addrspace(10)* %60, align 8, !dbg !150
  %62 = bitcast i8* %61 to {} addrspace(10)**, !dbg !150
  %63 = call {} addrspace(10)* addrspace(13)* @julia.gc_loaded({} addrspace(10)* %nodecayed.value_phi3.i.in.in, {} addrspace(10)** %62) #5, !dbg !150
  %64 = bitcast {} addrspace(10)* addrspace(13)* %63 to i8 addrspace(13)*, !dbg !150
  %65 = getelementptr i8, i8 addrspace(13)* %64, i64 %nodecayedoff.value_phi3.i.in.in, !dbg !150
  %66 = bitcast i8 addrspace(13)* %65 to {} addrspace(10)* addrspace(13)*, !dbg !150
  %value_phi3.i.in = bitcast {} addrspace(10)* addrspace(13)* %66 to double addrspace(13)*, !dbg !150
  %value_phi3.i = load double, double addrspace(13)* %value_phi3.i.in, align 8, !dbg !150, !tbaa !90, !alias.scope !93, !noalias !94
  %67 = add i64 %.pre-phi, %21, !dbg !151
  %68 = getelementptr inbounds {} addrspace(10)*, {} addrspace(10)* addrspace(13)* %18, i64 %67, !dbg !151
  %69 = bitcast {} addrspace(10)* addrspace(13)* %68 to double addrspace(13)*, !dbg !151
  store double %value_phi3.i, double addrspace(13)* %69, align 8, !dbg !151, !tbaa !90, !alias.scope !93, !noalias !152
  %.not20 = icmp eq i64 %iv.next2, %.sroa.08.0.copyload, !dbg !153
  %70 = add i64 %iv.next2, 1, !dbg !155
  br i1 %.not20, label %L151.i, label %L8.i, !dbg !157

L151.i:                                           ; preds = %L127.i
  %.not21 = icmp eq i64 %iv.next, %.sroa.08.0.copyload, !dbg !158
  %71 = add i64 %iv.next, 1, !dbg !160
  br i1 %.not21, label %julia_divdriver_herm_4501_inner.exit, label %L6.i, !dbg !162

julia_divdriver_herm_4501_inner.exit:             ; preds = %L151.i
  ret void, !dbg !111
}

; Function Attrs: mustprogress willreturn
define internal void @fwddiffejulia_divdriver_herm_4501_inner.1({} addrspace(10)* nocapture noundef nonnull readonly align 8 dereferenceable(32) "enzyme_type"="{[-1]:Pointer, [-1,0]:Pointer, [-1,0,-1]:Float@double, [-1,8]:Pointer, [-1,8,0]:Integer, [-1,8,1]:Integer, [-1,8,2]:Integer, [-1,8,3]:Integer, [-1,8,4]:Integer, [-1,8,5]:Integer, [-1,8,6]:Integer, [-1,8,7]:Integer, [-1,8,8]:Pointer, [-1,8,8,-1]:Float@double, [-1,16]:Integer, [-1,17]:Integer, [-1,18]:Integer, [-1,19]:Integer, [-1,20]:Integer, [-1,21]:Integer, [-1,22]:Integer, [-1,23]:Integer, [-1,24]:Integer, [-1,25]:Integer, [-1,26]:Integer, [-1,27]:Integer, [-1,28]:Integer, [-1,29]:Integer, [-1,30]:Integer, [-1,31]:Integer}" "enzymejl_parmtype"="4699582144" "enzymejl_parmtype_ref"="2" %0, { {} addrspace(10)*, i32 } "enzyme_type"="{[0]:Pointer, [0,0]:Pointer, [0,0,-1]:Float@double, [0,8]:Pointer, [0,8,0]:Integer, [0,8,1]:Integer, [0,8,2]:Integer, [0,8,3]:Integer, [0,8,4]:Integer, [0,8,5]:Integer, [0,8,6]:Integer, [0,8,7]:Integer, [0,8,8]:Pointer, [0,8,8,-1]:Float@double, [0,16]:Integer, [0,17]:Integer, [0,18]:Integer, [0,19]:Integer, [0,20]:Integer, [0,21]:Integer, [0,22]:Integer, [0,23]:Integer, [0,24]:Integer, [0,25]:Integer, [0,26]:Integer, [0,27]:Integer, [0,28]:Integer, [0,29]:Integer, [0,30]:Integer, [0,31]:Integer, [8]:Integer, [9]:Integer, [10]:Integer, [11]:Integer}" "enzymejl_parmtype"="4491982352" "enzymejl_parmtype_ref"="0" %1) local_unnamed_addr #4 !dbg !163 {
entry:
  %"iv'ac" = alloca i64, align 8
  %"iv1'ac" = alloca i64, align 8
  %.fca.0.extract = extractvalue { {} addrspace(10)*, i32 } %1, 0, !dbg !164
  %pgcstack.i = call {}*** @julia.get_pgcstack() #5
  %ptls_field.i17 = getelementptr inbounds {}**, {}*** %pgcstack.i, i64 2
  %2 = bitcast {}*** %ptls_field.i17 to i64***
  %ptls_load.i1819 = load i64**, i64*** %2, align 8, !tbaa !11, !alias.scope !165, !noalias !168
  %3 = getelementptr inbounds i64*, i64** %ptls_load.i1819, i64 2
  %safepoint.i = load i64*, i64** %3, align 8, !tbaa !15, !alias.scope !170, !noalias !173
  fence syncscope("singlethread") seq_cst
  call void @julia.safepoint(i64* %safepoint.i) #5, !dbg !175
  fence syncscope("singlethread") seq_cst
  %4 = bitcast {} addrspace(10)* %.fca.0.extract to i8 addrspace(10)*, !dbg !177
  %5 = addrspacecast i8 addrspace(10)* %4 to i8 addrspace(11)*, !dbg !177
  %.sroa.08.0..sroa_idx = getelementptr inbounds i8, i8 addrspace(11)* %5, i64 16, !dbg !177
  %.sroa.08.0..sroa_cast = bitcast i8 addrspace(11)* %.sroa.08.0..sroa_idx to i64 addrspace(11)*, !dbg !177
  %.sroa.08.0.copyload = load i64, i64 addrspace(11)* %.sroa.08.0..sroa_cast, align 8, !dbg !177, !tbaa !26, !alias.scope !180, !noalias !183
  %6 = bitcast {} addrspace(10)* %.fca.0.extract to { i8*, {} addrspace(10)* } addrspace(10)*
  %7 = addrspacecast { i8*, {} addrspace(10)* } addrspace(10)* %6 to { i8*, {} addrspace(10)* } addrspace(11)*
  %8 = bitcast {} addrspace(10)* %.fca.0.extract to {} addrspace(10)** addrspace(10)*
  %9 = addrspacecast {} addrspace(10)** addrspace(10)* %8 to {} addrspace(10)** addrspace(11)*
  %10 = getelementptr inbounds { i8*, {} addrspace(10)* }, { i8*, {} addrspace(10)* } addrspace(11)* %7, i64 0, i32 1
  %11 = bitcast {} addrspace(10)* %0 to { i8*, {} addrspace(10)* } addrspace(10)*
  %12 = addrspacecast { i8*, {} addrspace(10)* } addrspace(10)* %11 to { i8*, {} addrspace(10)* } addrspace(11)*
  %13 = bitcast {} addrspace(10)* %0 to {} addrspace(10)** addrspace(10)*
  %14 = addrspacecast {} addrspace(10)** addrspace(10)* %13 to {} addrspace(10)** addrspace(11)*
  %15 = load {} addrspace(10)**, {} addrspace(10)** addrspace(11)* %14, align 8, !tbaa !37, !alias.scope !185, !noalias !188
  %16 = getelementptr inbounds { i8*, {} addrspace(10)* }, { i8*, {} addrspace(10)* } addrspace(11)* %12, i64 0, i32 1
  %17 = load {} addrspace(10)*, {} addrspace(10)* addrspace(11)* %16, align 8, !tbaa !37, !alias.scope !185, !noalias !188, !dereferenceable_or_null !42, !align !43
  %18 = call "enzyme_type"="{[-1]:Pointer, [-1,-1]:Float@double}" {} addrspace(10)* addrspace(13)* @julia.gc_loaded({} addrspace(10)* noundef %17, {} addrspace(10)** noundef %15) #5, !dbg !164
  %19 = add i64 %.sroa.08.0.copyload, -1, !dbg !190
  br label %L6.i, !dbg !190

L6.i:                                             ; preds = %L151.i, %entry
  %iv = phi i64 [ %iv.next, %L151.i ], [ 0, %entry ]
  %iv.next = add nuw nsw i64 %iv, 1
  %20 = add i64 %iv.next, -1
  %21 = mul i64 %20, %.sroa.08.0.copyload
  %22 = shl i64 %20, 1
  br label %L8.i, !dbg !192

L8.i:                                             ; preds = %L127.i, %L6.i
  %iv1 = phi i64 [ %iv.next2, %L127.i ], [ 0, %L6.i ]
  %iv.next2 = add nuw nsw i64 %iv1, 1, !dbg !193
  %.not = icmp sgt i64 %iv.next, %iv.next2, !dbg !193
  br i1 %.not, label %L69.i, label %L11.i, !dbg !195

L11.i:                                            ; preds = %L8.i
  %23 = add i64 %iv.next2, -1, !dbg !196
  %24 = mul i64 %23, %.sroa.08.0.copyload, !dbg !205
  %25 = load {} addrspace(10)**, {} addrspace(10)** addrspace(11)* %9, align 8, !dbg !206, !tbaa !37, !alias.scope !207, !noalias !208
  %26 = load {} addrspace(10)*, {} addrspace(10)* addrspace(11)* %10, align 8, !dbg !206, !tbaa !37, !alias.scope !207, !noalias !208, !dereferenceable_or_null !42, !align !43
  %27 = add i64 %24, %20, !dbg !206
  %28 = bitcast {} addrspace(10)* %26 to <{ i64, i8* }> addrspace(10)*, !dbg !209
  %29 = getelementptr inbounds <{ i64, i8* }>, <{ i64, i8* }> addrspace(10)* %28, i32 0, i32 1, !dbg !209
  %30 = load i8*, i8* addrspace(10)* %29, align 8, !dbg !209, !alias.scope !210, !noalias !213
  %31 = ptrtoint i8* %30 to i64, !dbg !209
  %32 = ptrtoint {} addrspace(10)** %25 to i64, !dbg !209
  %33 = sub nuw i64 %31, %32, !dbg !209
  %34 = add nuw i64 0, %33, !dbg !209
  %35 = mul i64 %27, 8, !dbg !209
  %36 = add i64 0, %35, !dbg !209
  %37 = add nuw i64 %34, %36, !dbg !209
  %38 = bitcast {} addrspace(10)** addrspace(10)* %8 to {} addrspace(10)* addrspace(10)*, !dbg !209
  %39 = bitcast {} addrspace(10)* addrspace(10)* %38 to {} addrspace(10)*, !dbg !209
  br label %L127.i, !dbg !209

L69.i:                                            ; preds = %L8.i
  %40 = load {} addrspace(10)**, {} addrspace(10)** addrspace(11)* %9, align 8, !dbg !215, !tbaa !37, !alias.scope !207, !noalias !208
  %41 = load {} addrspace(10)*, {} addrspace(10)* addrspace(11)* %10, align 8, !dbg !215, !tbaa !37, !alias.scope !207, !noalias !208, !dereferenceable_or_null !42, !align !43
  %42 = add i64 %iv.next2, -1, !dbg !218
  %43 = add i64 %42, %21, !dbg !215
  %44 = bitcast {} addrspace(10)* %41 to <{ i64, i8* }> addrspace(10)*, !dbg !224
  %45 = getelementptr inbounds <{ i64, i8* }>, <{ i64, i8* }> addrspace(10)* %44, i32 0, i32 1, !dbg !224
  %46 = load i8*, i8* addrspace(10)* %45, align 8, !dbg !224, !alias.scope !225, !noalias !228
  %47 = ptrtoint i8* %46 to i64, !dbg !224
  %48 = ptrtoint {} addrspace(10)** %40 to i64, !dbg !224
  %49 = sub nuw i64 %47, %48, !dbg !224
  %50 = add nuw i64 0, %49, !dbg !224
  %51 = mul i64 %43, 8, !dbg !224
  %52 = add i64 0, %51, !dbg !224
  %53 = add nuw i64 %50, %52, !dbg !224
  %54 = bitcast {} addrspace(10)** addrspace(10)* %8 to {} addrspace(10)* addrspace(10)*, !dbg !224
  %55 = bitcast {} addrspace(10)* addrspace(10)* %54 to {} addrspace(10)*, !dbg !224
  br label %L127.i, !dbg !224

L127.i:                                           ; preds = %L69.i, %L11.i
  %.pre-phi = phi i64 [ %42, %L69.i ], [ %23, %L11.i ], !dbg !230
  %nodecayed.value_phi3.i.in.in = phi {} addrspace(10)* [ %55, %L69.i ], [ %39, %L11.i ]
  %nodecayedoff.value_phi3.i.in.in = phi i64 [ %53, %L69.i ], [ %37, %L11.i ]
  %56 = bitcast {} addrspace(10)* %nodecayed.value_phi3.i.in.in to <{ i64, i8* }> addrspace(10)*, !dbg !232
  %57 = getelementptr inbounds <{ i64, i8* }>, <{ i64, i8* }> addrspace(10)* %56, i32 0, i32 1, !dbg !232
  %58 = load i8*, i8* addrspace(10)* %57, align 8, !dbg !232, !alias.scope !233, !noalias !236
  %59 = bitcast i8* %58 to {} addrspace(10)**, !dbg !232
  %60 = call {} addrspace(10)* addrspace(13)* @julia.gc_loaded({} addrspace(10)* %nodecayed.value_phi3.i.in.in, {} addrspace(10)** %59) #5, !dbg !232
  %61 = bitcast {} addrspace(10)* addrspace(13)* %60 to i8 addrspace(13)*, !dbg !232
  %62 = getelementptr i8, i8 addrspace(13)* %61, i64 %nodecayedoff.value_phi3.i.in.in, !dbg !232
  %63 = bitcast i8 addrspace(13)* %62 to {} addrspace(10)* addrspace(13)*, !dbg !232
  %value_phi3.i.in = bitcast {} addrspace(10)* addrspace(13)* %63 to double addrspace(13)*, !dbg !232
  %value_phi3.i = load double, double addrspace(13)* %value_phi3.i.in, align 8, !dbg !232, !tbaa !90, !alias.scope !238, !noalias !241
  %64 = add i64 %.pre-phi, %22, !dbg !243
  %65 = getelementptr inbounds {} addrspace(10)*, {} addrspace(10)* addrspace(13)* %18, i64 %64, !dbg !243
  %66 = bitcast {} addrspace(10)* addrspace(13)* %65 to double addrspace(13)*, !dbg !243
  store double %value_phi3.i, double addrspace(13)* %66, align 8, !dbg !243, !tbaa !90, !alias.scope !93, !noalias !244
  %.not20 = icmp eq i64 %iv.next2, %.sroa.08.0.copyload, !dbg !247
  br i1 %.not20, label %L151.i, label %L8.i, !dbg !249

L151.i:                                           ; preds = %L127.i
  %.not21 = icmp eq i64 %iv.next, %.sroa.08.0.copyload, !dbg !250
  br i1 %.not21, label %julia_divdriver_herm_4501_inner.exit, label %L6.i, !dbg !252

julia_divdriver_herm_4501_inner.exit:             ; preds = %L151.i
  ret void
}

dest = [2.0e-323 4.0; 4.0 2.0]
dest = [4.0 1.0; 1.0 5.0]

@wsmoses wsmoses closed this as completed Oct 23, 2024
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Labels
None yet
Projects
None yet
Development

No branches or pull requests

1 participant