Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
213 changes: 213 additions & 0 deletions libs/jit/src/jit.erl
Original file line number Diff line number Diff line change
Expand Up @@ -3359,6 +3359,65 @@ op_gc_bif2(
Arg2Value = Arg2 bsr 4,
Range2 = {Arg2Value, Arg2Value},
op_gc_bif2_bxor(MMod, MSt0, FailLabel, Live, Bif, Arg1, Arg2, Dest, Range1, Range2);
% mul - both typed integers with range: inline if proven small
op_gc_bif2(
MMod,
MSt0,
FailLabel,
Live,
Bif,
erlang,
'*',
{typed, Arg1, {t_integer, Range1}},
{typed, Arg2, {t_integer, Range2}},
Dest
) ->
op_gc_bif2_mul(MMod, MSt0, FailLabel, Live, Bif, Arg1, Arg2, Dest, Range1, Range2);
op_gc_bif2(
MMod,
MSt0,
FailLabel,
Live,
Bif,
erlang,
'*',
{typed, Arg1, {t_integer, Range1}},
Arg2,
Dest
) when is_integer(Arg2), Arg2 band ?TERM_IMMED_TAG_MASK =:= ?TERM_INTEGER_TAG ->
Arg2Value = Arg2 bsr 4,
Range2 = {Arg2Value, Arg2Value},
op_gc_bif2_mul(MMod, MSt0, FailLabel, Live, Bif, Arg1, Arg2, Dest, Range1, Range2);
% bsl - typed integer with literal shift amount: inline if result fits
op_gc_bif2(
MMod,
MSt0,
FailLabel,
Live,
Bif,
erlang,
'bsl',
{typed, Arg1, {t_integer, Range1}},
Arg2,
Dest
) when is_integer(Arg2), Arg2 band ?TERM_IMMED_TAG_MASK =:= ?TERM_INTEGER_TAG ->
Arg2Value = Arg2 bsr 4,
op_gc_bif2_bsl(MMod, MSt0, FailLabel, Live, Bif, Arg1, Arg2, Dest, Range1, Arg2Value);
% bsr - typed integer with literal shift amount: inline if non-negative and small
op_gc_bif2(
MMod,
MSt0,
FailLabel,
Live,
Bif,
erlang,
'bsr',
{typed, Arg1, {t_integer, Range1}},
Arg2,
Dest
) when is_integer(Arg2), Arg2 band ?TERM_IMMED_TAG_MASK =:= ?TERM_INTEGER_TAG ->
Arg2Value = Arg2 bsr 4,
op_gc_bif2_bsr(MMod, MSt0, FailLabel, Live, Bif, Arg1, Arg2, Dest, Range1, Arg2Value);
% Default case
op_gc_bif2(
MMod, MSt0, FailLabel, Live, Bif, _Module, _Function, {typed, Arg1, _}, {typed, Arg2, _}, Dest
Expand Down Expand Up @@ -3587,6 +3646,160 @@ op_gc_bif2_bxor(MMod, MSt0, FailLabel, Live, Bif, Arg1, Arg2, Dest, Range1, Rang
op_gc_bif2_default(MMod, MSt0, FailLabel, Live, Bif, Arg1, Arg2, Dest)
end.

% Check if multiplication can be inlined based on type ranges
% Returns true if the result is guaranteed to fit in a small integer
can_inline_mul(Range1, Range2, MMod) ->
{MinSafe, MaxSafe} = small_integer_bounds(MMod),
case {Range1, Range2} of
{{Min1, Max1}, {Min2, Max2}} when
is_integer(Min1),
is_integer(Max1),
is_integer(Min2),
is_integer(Max2)
->
% For multiplication, all four corner products must be checked
Products = [Min1 * Min2, Min1 * Max2, Max1 * Min2, Max1 * Max2],
MinResult = lists:min(Products),
MaxResult = lists:max(Products),
MinResult >= MinSafe andalso MaxResult =< MaxSafe;
_ ->
false
end.

% Optimized multiplication with compile-time range checking
op_gc_bif2_mul(MMod, MSt0, FailLabel, Live, Bif, Arg1, Arg2, Dest, Range1, Range2) when
is_integer(Arg2)
->
case can_inline_mul(Range1, Range2, MMod) of
true ->
Arg2Value = Arg2 bsr 4,
case Arg2Value of
C when C > 1 ->
% Strip tag, multiply by constant, re-tag
{MSt1, Reg} = MMod:move_to_native_register(MSt0, Arg1),
{MSt2, Reg} = MMod:and_(MSt1, {free, Reg}, bnot (?TERM_IMMED_TAG_MASK)),
MSt3 = MMod:mul(MSt2, Reg, C),
MSt4 = MMod:or_(MSt3, Reg, ?TERM_INTEGER_TAG),
MSt5 = MMod:move_to_vm_register(MSt4, Reg, Dest),
MMod:free_native_registers(MSt5, [Reg, Dest]);
_ ->
% 0 or 1 would need special handling (0 produces wrong
% tag, 1 is identity), and negative constants require
% sign-aware logic. The compiler typically folds these,
% but fall back defensively.
op_gc_bif2_default(MMod, MSt0, FailLabel, Live, Bif, Arg1, Arg2, Dest)
end;
false ->
op_gc_bif2_default(MMod, MSt0, FailLabel, Live, Bif, Arg1, Arg2, Dest)
end;
op_gc_bif2_mul(MMod, MSt0, FailLabel, Live, Bif, Arg1, Arg2, Dest, Range1, Range2) ->
case can_inline_mul(Range1, Range2, MMod) of
true ->
% Both operands in registers: strip tags, extract value, multiply
{MSt1, Reg1} = MMod:move_to_native_register(MSt0, Arg1),
{MSt2, Reg2} = MMod:move_to_native_register(MSt1, Arg2),
% Strip tag from Reg1: value1 << 4
{MSt3, Reg1} = MMod:and_(MSt2, {free, Reg1}, bnot (?TERM_IMMED_TAG_MASK)),
% Strip tag from Reg2 and shift right by 4 to get raw value2
{MSt4, Reg2} = MMod:and_(MSt3, {free, Reg2}, bnot (?TERM_IMMED_TAG_MASK)),
{MSt5, Reg2} = MMod:shift_right(MSt4, {free, Reg2}, 4),
% Multiply: (value1 << 4) * value2 = (value1 * value2) << 4
MSt6 = MMod:mul(MSt5, Reg1, Reg2),
% Add tag back
MSt7 = MMod:or_(MSt6, Reg1, ?TERM_INTEGER_TAG),
MSt8 = MMod:move_to_vm_register(MSt7, Reg1, Dest),
MMod:free_native_registers(MSt8, [Reg1, Reg2, Dest]);
false ->
op_gc_bif2_default(MMod, MSt0, FailLabel, Live, Bif, Arg1, Arg2, Dest)
end.

% Check if left shift can be inlined based on type range and shift amount
can_inline_bsl(Range1, ShiftAmount, MMod) ->
{MinSafe, MaxSafe} = small_integer_bounds(MMod),
case Range1 of
{Min1, Max1} when
is_integer(Min1),
is_integer(Max1),
ShiftAmount >= 0
->
MinResult = Min1 bsl ShiftAmount,
MaxResult = Max1 bsl ShiftAmount,
MinResult >= MinSafe andalso MaxResult =< MaxSafe;
_ ->
false
end.

% Optimized bsl with compile-time range checking
op_gc_bif2_bsl(MMod, MSt0, FailLabel, Live, Bif, Arg1, Arg2, Dest, Range1, ShiftAmount) ->
case can_inline_bsl(Range1, ShiftAmount, MMod) of
true ->
case ShiftAmount of
0 ->
% No shift - just copy
{MSt1, Reg} = MMod:move_to_native_register(MSt0, Arg1),
MSt2 = MMod:move_to_vm_register(MSt1, Reg, Dest),
MMod:free_native_registers(MSt2, [Reg, Dest]);
_ ->
% Strip tag, shift left, re-tag
{MSt1, Reg} = MMod:move_to_native_register(MSt0, Arg1),
{MSt2, Reg} = MMod:and_(MSt1, {free, Reg}, bnot (?TERM_IMMED_TAG_MASK)),
MSt3 = MMod:shift_left(MSt2, Reg, ShiftAmount),
MSt4 = MMod:or_(MSt3, Reg, ?TERM_INTEGER_TAG),
MSt5 = MMod:move_to_vm_register(MSt4, Reg, Dest),
MMod:free_native_registers(MSt5, [Reg, Dest])
end;
false ->
op_gc_bif2_default(MMod, MSt0, FailLabel, Live, Bif, Arg1, Arg2, Dest)
end.

% Check if right shift can be inlined
% Only safe for non-negative inputs (the generated native code uses logical
% shift right, which does not preserve sign for negative values)
can_inline_bsr(Range1, ShiftAmount, MMod) ->
{_MinSafe, MaxSafe} = small_integer_bounds(MMod),
% Ensure (ShiftAmount + 4) does not exceed register width
% (would be undefined behavior in native shift)
WordBits = MMod:word_size() * 8,
case Range1 of
{Min1, Max1} when
is_integer(Min1),
is_integer(Max1),
Min1 >= 0,
ShiftAmount >= 0,
ShiftAmount + 4 < WordBits
->
% Non-negative input: right shift can only reduce magnitude
Max1 =< MaxSafe;
_ ->
false
end.

% Optimized bsr with compile-time range checking
op_gc_bif2_bsr(MMod, MSt0, FailLabel, Live, Bif, Arg1, Arg2, Dest, Range1, ShiftAmount) ->
case can_inline_bsr(Range1, ShiftAmount, MMod) of
true ->
case ShiftAmount of
0 ->
% No shift - just copy
{MSt1, Reg} = MMod:move_to_native_register(MSt0, Arg1),
MSt2 = MMod:move_to_vm_register(MSt1, Reg, Dest),
MMod:free_native_registers(MSt2, [Reg, Dest]);
_ ->
% For non-negative values: shift right by (S+4), shift left by 4, re-tag.
% This avoids a separate tag-stripping instruction: the combined
% shift (S+4) removes both the 4 tag bits and applies the S-bit
% shift in one operation. The tag bits get shifted away since S+4 >= 5.
{MSt1, Reg} = MMod:move_to_native_register(MSt0, Arg1),
{MSt2, Reg} = MMod:shift_right(MSt1, {free, Reg}, ShiftAmount + 4),
MSt3 = MMod:shift_left(MSt2, Reg, 4),
MSt4 = MMod:or_(MSt3, Reg, ?TERM_INTEGER_TAG),
MSt5 = MMod:move_to_vm_register(MSt4, Reg, Dest),
MMod:free_native_registers(MSt5, [Reg, Dest])
end;
false ->
op_gc_bif2_default(MMod, MSt0, FailLabel, Live, Bif, Arg1, Arg2, Dest)
end.

% Helper to unwrap typed arguments
unwrap_typed({typed, Arg, _Type}) -> Arg;
unwrap_typed(Arg) -> Arg.
Expand Down
13 changes: 10 additions & 3 deletions libs/jit/src/jit_aarch64.erl
Original file line number Diff line number Diff line change
Expand Up @@ -2515,10 +2515,10 @@ sub(#state{stream_module = StreamModule, stream = Stream0, regs = Regs0} = State
%% @end
%% @param State current backend state
%% @param Reg register to multiply
%% @param Val constant multiplier (non-negative integer)
%% @param Val multiplier (an integer constant or a register)
%% @return Updated backend state
%%-----------------------------------------------------------------------------
-spec mul(state(), aarch64_register(), non_neg_integer()) -> state().
-spec mul(state(), aarch64_register(), integer() | aarch64_register()) -> state().
mul(State, _Reg, 1) ->
State;
mul(State, Reg, 2) ->
Expand Down Expand Up @@ -2579,12 +2579,19 @@ mul(
State,
Reg,
Val
) ->
) when is_integer(Val) ->
Temp = first_avail(Avail),
I1 = jit_aarch64_asm:mov(Temp, Val),
I2 = jit_aarch64_asm:mul(Reg, Reg, Temp),
Stream1 = StreamModule:append(Stream0, <<I1/binary, I2/binary>>),
Regs1 = jit_regs:invalidate_reg(jit_regs:invalidate_reg(Regs0, Temp), Reg),
State#state{stream = Stream1, regs = Regs1};
mul(
#state{stream_module = StreamModule, stream = Stream0, regs = Regs0} = State, DestReg, SrcReg
) when is_atom(SrcReg) ->
I1 = jit_aarch64_asm:mul(DestReg, DestReg, SrcReg),
Stream1 = StreamModule:append(Stream0, I1),
Regs1 = jit_regs:invalidate_reg(Regs0, DestReg),
State#state{stream = Stream1, regs = Regs1}.

%%-----------------------------------------------------------------------------
Expand Down
12 changes: 10 additions & 2 deletions libs/jit/src/jit_armv6m.erl
Original file line number Diff line number Diff line change
Expand Up @@ -3588,6 +3588,7 @@ sub(#state{stream_module = StreamModule, available_regs = Avail, regs = Regs0} =
Regs1 = jit_regs:invalidate_reg(jit_regs:invalidate_reg(Regs0, Reg), Temp),
State1#state{available_regs = Avail, stream = Stream2, regs = Regs1}.

-spec mul(state(), armv6m_register(), integer() | armv6m_register()) -> state().
mul(State, _Reg, 1) ->
State;
mul(State, Reg, 2) ->
Expand Down Expand Up @@ -3647,7 +3648,7 @@ mul(
#state{stream_module = StreamModule, available_regs = Avail, regs = Regs0} = State0,
Reg,
Val
) ->
) when is_integer(Val) ->
Temp = first_avail(Avail),
TempBit = reg_bit(Temp),
AT = Avail band (bnot TempBit),
Expand All @@ -3658,7 +3659,14 @@ mul(
Regs1 = jit_regs:invalidate_reg(jit_regs:invalidate_reg(Regs0, Temp), Reg),
State1#state{
stream = Stream2, available_regs = State1#state.available_regs bor TempBit, regs = Regs1
}.
};
mul(
#state{stream_module = StreamModule, stream = Stream0, regs = Regs0} = State, DestReg, SrcReg
) when is_atom(SrcReg) ->
I = jit_armv6m_asm:muls(DestReg, SrcReg),
Stream1 = StreamModule:append(Stream0, I),
Regs1 = jit_regs:invalidate_reg(Regs0, DestReg),
State#state{stream = Stream1, regs = Regs1}.

%%
%% Analysis of AArch64 pattern and ARM Thumb mapping:
Expand Down
12 changes: 10 additions & 2 deletions libs/jit/src/jit_riscv32.erl
Original file line number Diff line number Diff line change
Expand Up @@ -3117,6 +3117,7 @@ sub(#state{stream_module = StreamModule, available_regs = Avail, regs = Regs0} =
Regs1 = jit_regs:invalidate_reg(jit_regs:invalidate_reg(Regs0, Reg), Temp),
State1#state{available_regs = Avail, stream = Stream2, regs = Regs1}.

-spec mul(state(), riscv32_register(), integer() | riscv32_register()) -> state().
mul(State, _Reg, 1) ->
State;
mul(State, Reg, 2) ->
Expand Down Expand Up @@ -3176,7 +3177,7 @@ mul(
#state{stream_module = StreamModule, available_regs = Avail, regs = Regs0} = State0,
Reg,
Val
) ->
) when is_integer(Val) ->
Temp = first_avail(Avail),
AT = Avail band (bnot reg_bit(Temp)),
State1 = mov_immediate(State0#state{available_regs = AT}, Temp, Val),
Expand All @@ -3188,7 +3189,14 @@ mul(
stream = Stream2,
available_regs = State1#state.available_regs bor reg_bit(Temp),
regs = Regs1
}.
};
mul(
#state{stream_module = StreamModule, stream = Stream0, regs = Regs0} = State, DestReg, SrcReg
) when is_atom(SrcReg) ->
I = jit_riscv32_asm:mul(DestReg, DestReg, SrcReg),
Stream1 = StreamModule:append(Stream0, I),
Regs1 = jit_regs:invalidate_reg(Regs0, DestReg),
State#state{stream = Stream1, regs = Regs1}.

%%
%% RISC-V32 implementation (no prolog/epilog needed due to 32 registers):
Expand Down
14 changes: 12 additions & 2 deletions libs/jit/src/jit_x86_64.erl
Original file line number Diff line number Diff line change
Expand Up @@ -2564,6 +2564,7 @@ sub(#state{stream_module = StreamModule, stream = Stream0, regs = Regs0} = State
Regs1 = jit_regs:invalidate_reg(Regs0, Reg),
State#state{stream = Stream1, regs = Regs1}.

-spec mul(state(), x86_64_register(), integer() | x86_64_register()) -> state().
mul(State, _Reg, 1) ->
State;
mul(State, Reg, 2) ->
Expand All @@ -2584,17 +2585,26 @@ mul(
} = State,
Reg,
Val
) when Val < -16#80000000 orelse Val > 16#7FFFFFFF ->
) when is_integer(Val), (Val < -16#80000000 orelse Val > 16#7FFFFFFF) ->
TempReg = first_avail(Avail),
I1 = jit_x86_64_asm:movabsq(Val, TempReg),
I2 = jit_x86_64_asm:imulq(TempReg, Reg),
Stream1 = StreamModule:append(Stream0, <<I1/binary, I2/binary>>),
Regs1 = jit_regs:invalidate_reg(jit_regs:invalidate_reg(Regs0, TempReg), Reg),
State#state{stream = Stream1, regs = Regs1};
mul(#state{stream_module = StreamModule, stream = Stream0, regs = Regs0} = State, Reg, Val) ->
mul(#state{stream_module = StreamModule, stream = Stream0, regs = Regs0} = State, Reg, Val) when
is_integer(Val)
->
I1 = jit_x86_64_asm:imulq(Val, Reg),
Stream1 = StreamModule:append(Stream0, I1),
Regs1 = jit_regs:invalidate_reg(Regs0, Reg),
State#state{stream = Stream1, regs = Regs1};
mul(
#state{stream_module = StreamModule, stream = Stream0, regs = Regs0} = State, DestReg, SrcReg
) when is_atom(SrcReg) ->
I1 = jit_x86_64_asm:imulq(SrcReg, DestReg),
Stream1 = StreamModule:append(Stream0, I1),
Regs1 = jit_regs:invalidate_reg(Regs0, DestReg),
State#state{stream = Stream1, regs = Regs1}.

%% Signed integer division: quotient = DividendReg / DivisorReg
Expand Down
Loading
Loading