From b38f8d3867fe848c51b46e0e100cb4101a0af0098642990c73d288d886ee25dc Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=98=D0=BB=D1=8C=D1=8F=20=D0=98=D0=BD=D0=B4=D0=B8=D0=B3?= =?UTF-8?q?=D0=BE?= Date: Wed, 28 Aug 2024 04:00:36 +0000 Subject: [PATCH 1/8] - Updated to 2.1.20240815 (2.1.1723681758). * Changed file luajit2-name.patch. * Added ppc64le support. OBS-URL: https://build.opensuse.org/package/show/devel:languages:lua/luajit2?expand=0&rev=7 --- .gitattributes | 23 ++++++ .gitignore | 1 + baselibs.conf | 1 + luajit2-2.1-20240314.tar.gz | 3 + luajit2-2.1-20240815.tar.gz | 3 + luajit2-name.patch | 156 ++++++++++++++++++++++++++++++++++++ luajit2.changes | 105 ++++++++++++++++++++++++ luajit2.spec | 95 ++++++++++++++++++++++ 8 files changed, 387 insertions(+) create mode 100644 .gitattributes create mode 100644 .gitignore create mode 100644 baselibs.conf create mode 100644 luajit2-2.1-20240314.tar.gz create mode 100644 luajit2-2.1-20240815.tar.gz create mode 100644 luajit2-name.patch create mode 100644 luajit2.changes create mode 100644 luajit2.spec diff --git a/.gitattributes b/.gitattributes new file mode 100644 index 0000000..9b03811 --- /dev/null +++ b/.gitattributes @@ -0,0 +1,23 @@ +## Default LFS +*.7z filter=lfs diff=lfs merge=lfs -text +*.bsp filter=lfs diff=lfs merge=lfs -text +*.bz2 filter=lfs diff=lfs merge=lfs -text +*.gem filter=lfs diff=lfs merge=lfs -text +*.gz filter=lfs diff=lfs merge=lfs -text +*.jar filter=lfs diff=lfs merge=lfs -text +*.lz filter=lfs diff=lfs merge=lfs -text +*.lzma filter=lfs diff=lfs merge=lfs -text +*.obscpio filter=lfs diff=lfs merge=lfs -text +*.oxt filter=lfs diff=lfs merge=lfs -text +*.pdf filter=lfs diff=lfs merge=lfs -text +*.png filter=lfs diff=lfs merge=lfs -text +*.rpm filter=lfs diff=lfs merge=lfs -text +*.tbz filter=lfs diff=lfs merge=lfs -text +*.tbz2 filter=lfs diff=lfs merge=lfs -text +*.tgz filter=lfs diff=lfs merge=lfs -text +*.ttf filter=lfs diff=lfs merge=lfs -text +*.txz filter=lfs diff=lfs merge=lfs -text +*.whl filter=lfs diff=lfs merge=lfs -text +*.xz filter=lfs diff=lfs merge=lfs -text +*.zip filter=lfs diff=lfs merge=lfs -text +*.zst filter=lfs diff=lfs merge=lfs -text diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..57affb6 --- /dev/null +++ b/.gitignore @@ -0,0 +1 @@ +.osc diff --git a/baselibs.conf b/baselibs.conf new file mode 100644 index 0000000..a0db4e9 --- /dev/null +++ b/baselibs.conf @@ -0,0 +1 @@ +libluajit2-5_1-2 diff --git a/luajit2-2.1-20240314.tar.gz b/luajit2-2.1-20240314.tar.gz new file mode 100644 index 0000000..98b6b08 --- /dev/null +++ b/luajit2-2.1-20240314.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3efddc4104a0ce720ddf4da3d9bce927f3c5816a8a45a043462ca58914cde271 +size 1162566 diff --git a/luajit2-2.1-20240815.tar.gz b/luajit2-2.1-20240815.tar.gz new file mode 100644 index 0000000..b16de02 --- /dev/null +++ b/luajit2-2.1-20240815.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9e59ec13c301c8b2855838b1248def49ef348a3e7563fabef677431706718145 +size 1163521 diff --git a/luajit2-name.patch b/luajit2-name.patch new file mode 100644 index 0000000..55c511b --- /dev/null +++ b/luajit2-name.patch @@ -0,0 +1,156 @@ +diff -Pdpru luajit2-2.1-20240815.orig/Makefile luajit2-2.1-20240815/Makefile +--- luajit2-2.1-20240815.orig/Makefile 2024-08-15 03:29:18.000000000 +0300 ++++ luajit2-2.1-20240815/Makefile 2024-08-28 06:32:57.210186012 +0300 +@@ -38,10 +38,10 @@ DPREFIX= $(DESTDIR)$(PREFIX) + INSTALL_BIN= $(DPREFIX)/bin + INSTALL_LIB= $(DPREFIX)/$(MULTILIB) + INSTALL_SHARE= $(DPREFIX)/share +-INSTALL_DEFINC= $(DPREFIX)/include/luajit-$(MMVERSION) ++INSTALL_DEFINC= $(DPREFIX)/include/luajit2-$(MMVERSION) + INSTALL_INC= $(INSTALL_DEFINC) + +-export INSTALL_LJLIBD= $(INSTALL_SHARE)/luajit-$(MMVERSION) ++export INSTALL_LJLIBD= $(INSTALL_SHARE)/luajit2-$(MMVERSION) + INSTALL_JITLIB= $(INSTALL_LJLIBD)/jit + INSTALL_LMODD= $(INSTALL_SHARE)/lua + INSTALL_LMOD= $(INSTALL_LMODD)/$(ABIVER) +@@ -50,23 +50,20 @@ INSTALL_CMOD= $(INSTALL_CMODD)/$(ABIVER) + INSTALL_MAN= $(INSTALL_SHARE)/man/man1 + INSTALL_PKGCONFIG= $(INSTALL_LIB)/pkgconfig + +-INSTALL_TNAME= luajit-$(VERSION) +-INSTALL_TSYMNAME= luajit +-INSTALL_ANAME= libluajit-$(ABIVER).a +-INSTALL_SOSHORT1= libluajit-$(ABIVER).so +-INSTALL_SOSHORT2= libluajit-$(ABIVER).so.$(MAJVER) +-INSTALL_SONAME= libluajit-$(ABIVER).so.$(VERSION) +-INSTALL_DYLIBSHORT1= libluajit-$(ABIVER).dylib +-INSTALL_DYLIBSHORT2= libluajit-$(ABIVER).$(MAJVER).dylib +-INSTALL_DYLIBNAME= libluajit-$(ABIVER).$(VERSION).dylib +-INSTALL_PCNAME= luajit.pc ++INSTALL_TNAME= luajit2 ++INSTALL_MANNAME= $(INSTALL_TNAME).1 ++INSTALL_SOSHORT1= libluajit2-$(ABIVER).so ++INSTALL_SOSHORT2= libluajit2-$(ABIVER).so.$(MAJVER) ++INSTALL_SONAME= libluajit2-$(ABIVER).so.$(VERSION) ++INSTALL_DYLIBSHORT1= libluajit2-$(ABIVER).dylib ++INSTALL_DYLIBSHORT2= libluajit2-$(ABIVER).$(MAJVER).dylib ++INSTALL_DYLIBNAME= libluajit2-$(ABIVER).$(VERSION).dylib ++INSTALL_PCNAME= luajit2.pc + +-INSTALL_STATIC= $(INSTALL_LIB)/$(INSTALL_ANAME) + INSTALL_DYN= $(INSTALL_LIB)/$(INSTALL_SONAME) + INSTALL_SHORT1= $(INSTALL_LIB)/$(INSTALL_SOSHORT1) + INSTALL_SHORT2= $(INSTALL_LIB)/$(INSTALL_SOSHORT2) + INSTALL_T= $(INSTALL_BIN)/$(INSTALL_TNAME) +-INSTALL_TSYM= $(INSTALL_BIN)/$(INSTALL_TSYMNAME) + INSTALL_PC= $(INSTALL_PKGCONFIG)/$(INSTALL_PCNAME) + + INSTALL_DIRS= $(INSTALL_BIN) $(INSTALL_LIB) $(INSTALL_INC) $(INSTALL_MAN) \ +@@ -78,20 +75,27 @@ RM= rm -f + MKDIR= mkdir -p + RMDIR= rmdir 2>/dev/null + SYMLINK= ln -sf +-INSTALL_X= install -m 0755 +-INSTALL_F= install -m 0644 ++INSTALL_X= install -pm0755 ++INSTALL_F= install -pm0644 + UNINSTALL= $(RM) + LDCONFIG= ldconfig -n 2>/dev/null ++SED_MAN= sed -e "s|luajit|luajit2|g" \ ++ -e "s|LuaJIT|LuaJIT2|g" \ ++ -e "s|luajit2.org|github.com/openresty/luajit2|" \ ++ -e "s|Just-In-Time Compiler for the Lua Language|OpenResty's maintained branch of LuaJIT|" + SED_PC= sed -e "s|^prefix=.*|prefix=$(PREFIX)|" \ + -e "s|^multilib=.*|multilib=$(MULTILIB)|" \ +- -e "s|^relver=.*|relver=$(RELVER)|" ++ -e "s|^relver=.*|relver=$(RELVER)|" \ ++ -e "s|luajit-|luajit2-|g" \ ++ -e "s|LuaJIT|LuaJIT2|g" \ ++ -e "s|luajit.org|github.com/openresty/luajit2|" \ ++ -e "s|Just-in-time compiler for Lua|OpenResty's maintained branch of LuaJIT|" + ifneq ($(INSTALL_DEFINC),$(INSTALL_INC)) + SED_PC+= -e "s|^includedir=.*|includedir=$(INSTALL_INC)|" + endif + +-FILE_T= luajit +-FILE_A= libluajit.a +-FILE_SO= libluajit.so ++FILE_T= luajit2 ++FILE_SO= libluajit2.so + FILE_MAN= luajit.1 + FILE_PC= luajit.pc + FILES_INC= lua.h lualib.h lauxlib.h luaconf.h lua.hpp luajit.h +@@ -130,25 +134,25 @@ install: $(INSTALL_DEP) + @echo "==== Installing LuaJIT $(VERSION) to $(PREFIX) ====" + $(MKDIR) $(INSTALL_DIRS) + cd src && $(INSTALL_X) $(FILE_T) $(INSTALL_T) +- cd src && test -f $(FILE_A) && $(INSTALL_F) $(FILE_A) $(INSTALL_STATIC) || : + $(RM) $(INSTALL_DYN) $(INSTALL_SHORT1) $(INSTALL_SHORT2) + cd src && test -f $(FILE_SO) && \ + $(INSTALL_X) $(FILE_SO) $(INSTALL_DYN) && \ + ( $(LDCONFIG) $(INSTALL_LIB) || : ) && \ + $(SYMLINK) $(INSTALL_SONAME) $(INSTALL_SHORT1) && \ + $(SYMLINK) $(INSTALL_SONAME) $(INSTALL_SHORT2) || : +- cd etc && $(INSTALL_F) $(FILE_MAN) $(INSTALL_MAN) ++ cd etc && $(SED_MAN) $(FILE_MAN) > $(FILE_MAN).tmp && \ ++ $(INSTALL_F) $(FILE_MAN).tmp $(INSTALL_MAN)/$(INSTALL_MANNAME) && \ ++ $(RM) $(FILE_MAN).tmp + cd etc && $(SED_PC) $(FILE_PC) > $(FILE_PC).tmp && \ + $(INSTALL_F) $(FILE_PC).tmp $(INSTALL_PC) && \ + $(RM) $(FILE_PC).tmp + cd src && $(INSTALL_F) $(FILES_INC) $(INSTALL_INC) + cd src/jit && $(INSTALL_F) $(FILES_JITLIB) $(INSTALL_JITLIB) +- $(SYMLINK) $(INSTALL_TNAME) $(INSTALL_TSYM) + @echo "==== Successfully installed LuaJIT $(VERSION) to $(PREFIX) ====" + + uninstall: + @echo "==== Uninstalling LuaJIT $(VERSION) from $(PREFIX) ====" +- $(UNINSTALL) $(INSTALL_TSYM) $(INSTALL_T) $(INSTALL_STATIC) $(INSTALL_DYN) $(INSTALL_SHORT1) $(INSTALL_SHORT2) $(INSTALL_MAN)/$(FILE_MAN) $(INSTALL_PC) ++ $(UNINSTALL) $(INSTALL_T) $(INSTALL_DYN) $(INSTALL_SHORT1) $(INSTALL_SHORT2) $(INSTALL_MAN)/$(FILE_MAN) $(INSTALL_PC) + for file in $(FILES_JITLIB); do \ + $(UNINSTALL) $(INSTALL_JITLIB)/$$file; \ + done +diff -Pdpru luajit2-2.1-20240815.orig/src/luaconf.h luajit2-2.1-20240815/src/luaconf.h +--- luajit2-2.1-20240815.orig/src/luaconf.h 2024-08-15 03:29:18.000000000 +0300 ++++ luajit2-2.1-20240815/src/luaconf.h 2024-08-28 06:35:50.295523664 +0300 +@@ -51,7 +51,7 @@ + #endif + + #ifndef LUA_LJDIR +-#define LUA_LJDIR LUA_JROOT "/share/luajit-2.1" ++#define LUA_LJDIR LUA_JROOT "/share/luajit2-2.1" + #endif + + #define LUA_JPATH ";" LUA_LJDIR "/?.lua" +diff -Pdpru luajit2-2.1-20240815.orig/src/Makefile luajit2-2.1-20240815/src/Makefile +--- luajit2-2.1-20240815.orig/src/Makefile 2024-08-15 03:29:18.000000000 +0300 ++++ luajit2-2.1-20240815/src/Makefile 2024-08-28 06:40:25.429926912 +0300 +@@ -214,11 +214,11 @@ TARGET_AR= $(CROSS)ar rcus + TARGET_STRIP= $(CROSS)strip + + TARGET_LIBPATH= $(or $(PREFIX),/usr/local)/$(or $(MULTILIB),lib) +-TARGET_SONAME= libluajit-$(ABIVER).so.$(MAJVER) +-TARGET_DYLIBNAME= libluajit-$(ABIVER).$(MAJVER).dylib ++TARGET_SONAME= libluajit2-$(ABIVER).so.$(MAJVER) ++TARGET_DYLIBNAME= libluajit2-$(ABIVER).$(MAJVER).dylib + TARGET_DYLIBPATH= $(TARGET_LIBPATH)/$(TARGET_DYLIBNAME) + TARGET_DLLNAME= lua$(NODOTABIVER).dll +-TARGET_DLLDOTANAME= libluajit-$(ABIVER).dll.a ++TARGET_DLLDOTANAME= libluajit2-$(ABIVER).dll.a + TARGET_XSHLDFLAGS= -shared -fPIC -Wl,-soname,$(TARGET_SONAME) + TARGET_DYNXLDOPTS= + +@@ -544,9 +544,9 @@ LIB_VMDEF= jit/vmdef.lua + LIB_VMDEFP= $(LIB_VMDEF) + + LUAJIT_O= luajit.o +-LUAJIT_A= libluajit.a +-LUAJIT_SO= libluajit.so +-LUAJIT_T= luajit ++LUAJIT_A= libluajit2.a ++LUAJIT_SO= libluajit2.so ++LUAJIT_T= luajit2 + + ALL_T= $(LUAJIT_T) $(LUAJIT_A) $(LUAJIT_SO) $(HOST_T) + ALL_HDRGEN= lj_bcdef.h lj_ffdef.h lj_libdef.h lj_recdef.h lj_folddef.h \ diff --git a/luajit2.changes b/luajit2.changes new file mode 100644 index 0000000..1c30e15 --- /dev/null +++ b/luajit2.changes @@ -0,0 +1,105 @@ +------------------------------------------------------------------- +Wed Aug 28 03:54:34 UTC 2024 - Илья Индиго + +- Updated to 2.1.20240815 (2.1.1723681758). + * Changed file luajit2-name.patch. + * Added ppc64le support. + +------------------------------------------------------------------- +Fri May 10 04:09:49 UTC 2024 - Илья Индиго + +- Changed file luajit2-name.patch and fixed package.path for modules. + +------------------------------------------------------------------- +Fri May 3 21:45:30 UTC 2024 - Илья Индиго + +- Removed %check (tests don't run, and it's not known how to run them). + +------------------------------------------------------------------- +Wed May 1 02:13:06 UTC 2024 - Илья Индиго + +- Updated to 2.1.20240314 (2.1.1710398010). +- Refreshed spec-file via spec-cleaner and manual optimizations. +- Added file luajit2-name.patch. +- Renamed package to luajit2. + +------------------------------------------------------------------- +Thu Jan 26 09:08:11 UTC 2023 - Michal Suchanek + +- Opencode %make_build to prevent build failure when not defined. + +------------------------------------------------------------------- +Wed Jan 25 13:46:36 UTC 2023 - Matej Cepl + +- Rewrite moonjit package to build openresty from + https://github.com/openresty/luajit2. + +------------------------------------------------------------------- +Mon Jul 12 17:33:50 UTC 2021 - Matej Cepl + +- Rewrite the packaging to acommodate a possibility of two LuaJIT + implementations in the distribution. + +------------------------------------------------------------------- +Sat Jul 3 19:20:57 UTC 2021 - Matej Cepl + +- Fix Provides ... we don't provide an equvalent of 2.2.0 LuaJIT + (which doesn't exsts). + +------------------------------------------------------------------- +Mon Mar 30 12:31:09 UTC 2020 - Matej Cepl + +- Add moonjit105-string_gsub.patch to fix gsub method, which breaks + other packages (namely neovim). + +------------------------------------------------------------------- +Thu Mar 12 12:09:34 UTC 2020 - Tomáš Chvátal + +- No need to namespace luajit with update-alternatives, we really + want just one version of it to be provided to keep things + simple + +------------------------------------------------------------------- +Thu Mar 12 11:42:21 UTC 2020 - Tomáš Chvátal + +- Update to 2.2.0: + * The v2.2 release of moonjit brings many new features that bring the + project closer to its aim of unifying LuaJIT forks and compatibility + with Lua. +- Do not suffix the version anymore as we support all lua variants, + removes the patch: + * luajit-lua-versioned.patch + +------------------------------------------------------------------- +Wed Dec 18 09:05:04 UTC 2019 - Michel Normand + +- Ignore make check error for PowerPC until upstream solved issue + https://github.com/moonjit/moonjit/issues/40 +- do the same for %arm rather than to disable all %check. + +------------------------------------------------------------------- +Mon Dec 16 09:07:37 UTC 2019 - Guillaume GARDET + +- Disable %check on %arm until the following issue is fixed: + https://github.com/moonjit/moonjit/issues/9 + +------------------------------------------------------------------- +Thu Dec 12 14:12:25 UTC 2019 - Andreas Schwab + +- Use %_lib + +------------------------------------------------------------------- +Mon Dec 9 12:54:59 UTC 2019 - Tomáš Chvátal + +- Obsolete/provide lua51-luajit as it was the old luajit name + +------------------------------------------------------------------- +Fri Dec 6 07:44:53 UTC 2019 - Tomáš Chvátal + +- Fork of luajit with more active upstream +- Version 2.1.2 building on top of 2.1.0-beta3 with following goodies: + * fix for CVE-2019-19391 + * Support for ppc64 ppc64le s390 s390x +- Still carry the patch luajit-lua-versioned.patch to mark it as + compatible with lua 5.1, future 2.2 release should contain + support for 5.2 and 5.3 release of LUA diff --git a/luajit2.spec b/luajit2.spec new file mode 100644 index 0000000..10eeb85 --- /dev/null +++ b/luajit2.spec @@ -0,0 +1,95 @@ +# +# spec file for package luajit2 +# +# Copyright (c) 2024 SUSE LLC +# +# All modifications and additions to the file contributed by third parties +# remain the property of their copyright owners, unless otherwise agreed +# upon. The license for this file, and modifications and additions to the +# file, is the same license as for the pristine package itself (unless the +# license for the pristine package is not an Open Source License, in which +# case the license is the MIT License). An "Open Source License" is a +# license that conforms to the Open Source Definition (Version 1.9) +# published by the Open Source Initiative. + +# Please submit bugfixes or comments via https://bugs.opensuse.org/ +# + + +%define abi_ver 5.1 +%define so_ver 2 +%define lib_ver 5_1-%{so_ver} +%define major 2.1 +%define minor 20240815 +%define upstream 1723681758 +Name: luajit2 +Version: %{major}.%{minor} +Release: 0 +Summary: OpenResty's maintained branch of LuaJIT +License: MIT +URL: https://github.com/openresty/%{name} +Source0: https://github.com/openresty/%{name}/archive/refs/tags/v%{major}-%{minor}.tar.gz#/%{name}-%{major}-%{minor}.tar.gz +Source1: baselibs.conf +Patch0: %{name}-name.patch +BuildRequires: pkgconfig +Requires: lib%{name}-%{lib_ver} = %{version} + +%description +This is the official OpenResty branch of LuaJIT. It is not to be considered a fork, +since we still regularly synchronize changes from the upstream LuaJIT project. + +%package -n lib%{name}-%{lib_ver} +Summary: Library for LuaJIT2 compiler + +%description -n lib%{name}-%{lib_ver} +Libraries to use LuaJIT2 compiler. + +%package devel +Summary: Devel files for %{name} +Requires: %{name} = %{version} +Requires: lib%{name}-%{lib_ver} = %{version} + +%description devel +Devel files for %{name} package. + +%prep +%autosetup -p1 -n %{name}-%{major}-%{minor} + +%build +%make_build %{?_make_output_sync} \ + Q= \ + DYNAMIC_CC="cc -fPIC" \ + LDCONFIG="true" \ + TARGET_AR="ar rcus" \ + TARGET_STRIP=: \ + PREFIX=%{_prefix} \ + MULTILIB=%{_lib} \ + CFLAGS="%{optflags}" + +%install +%make_install \ + DYNAMIC_CC="cc -fPIC" \ + LDCONFIG="true" \ + TARGET_AR="ar rcus" \ + TARGET_STRIP=: \ + PREFIX=%{_prefix} \ + MULTILIB=%{_lib} + +%post -n lib%{name}-%{lib_ver} -p /sbin/ldconfig +%postun -n lib%{name}-%{lib_ver} -p /sbin/ldconfig + +%files +%{_bindir}/%{name} +%{_mandir}/man1/%{name}.1%{?ext_man} +%{_datadir}/%{name}-%{major} + +%files -n lib%{name}-%{lib_ver} +%{_libdir}/lib%{name}-%{abi_ver}.so.%{so_ver} +%{_libdir}/lib%{name}-%{abi_ver}.so.%{major}.%{upstream} + +%files devel +%{_includedir}/%{name}-%{major} +%{_libdir}/lib%{name}-%{abi_ver}.so +%{_libdir}/pkgconfig/%{name}.pc + +%changelog -- 2.51.1 From d03ca043d25ff4aee7c96f341d067931c973d2a9995ca7d76c0fcd91f27d2528 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=98=D0=BB=D1=8C=D1=8F=20=D0=98=D0=BD=D0=B4=D0=B8=D0=B3?= =?UTF-8?q?=D0=BE?= Date: Thu, 7 Nov 2024 08:05:04 +0000 Subject: [PATCH 2/8] - Updated to 2.1.20241104 (2.1.1728714540). * FFI: Added missing coercion when recording 64-bit bit.*(). * ARM64: Used ldr literal to load FP constants. OBS-URL: https://build.opensuse.org/package/show/devel:languages:lua/luajit2?expand=0&rev=9 --- .gitattributes | 23 ++++++ .gitignore | 1 + baselibs.conf | 1 + luajit2-2.1-20240314.tar.gz | 3 + luajit2-2.1-20240815.tar.gz | 3 + luajit2-2.1-20241104.tar.gz | 3 + luajit2-name.patch | 156 ++++++++++++++++++++++++++++++++++++ luajit2.changes | 112 ++++++++++++++++++++++++++ luajit2.spec | 95 ++++++++++++++++++++++ 9 files changed, 397 insertions(+) create mode 100644 .gitattributes create mode 100644 .gitignore create mode 100644 baselibs.conf create mode 100644 luajit2-2.1-20240314.tar.gz create mode 100644 luajit2-2.1-20240815.tar.gz create mode 100644 luajit2-2.1-20241104.tar.gz create mode 100644 luajit2-name.patch create mode 100644 luajit2.changes create mode 100644 luajit2.spec diff --git a/.gitattributes b/.gitattributes new file mode 100644 index 0000000..9b03811 --- /dev/null +++ b/.gitattributes @@ -0,0 +1,23 @@ +## Default LFS +*.7z filter=lfs diff=lfs merge=lfs -text +*.bsp filter=lfs diff=lfs merge=lfs -text +*.bz2 filter=lfs diff=lfs merge=lfs -text +*.gem filter=lfs diff=lfs merge=lfs -text +*.gz filter=lfs diff=lfs merge=lfs -text +*.jar filter=lfs diff=lfs merge=lfs -text +*.lz filter=lfs diff=lfs merge=lfs -text +*.lzma filter=lfs diff=lfs merge=lfs -text +*.obscpio filter=lfs diff=lfs merge=lfs -text +*.oxt filter=lfs diff=lfs merge=lfs -text +*.pdf filter=lfs diff=lfs merge=lfs -text +*.png filter=lfs diff=lfs merge=lfs -text +*.rpm filter=lfs diff=lfs merge=lfs -text +*.tbz filter=lfs diff=lfs merge=lfs -text +*.tbz2 filter=lfs diff=lfs merge=lfs -text +*.tgz filter=lfs diff=lfs merge=lfs -text +*.ttf filter=lfs diff=lfs merge=lfs -text +*.txz filter=lfs diff=lfs merge=lfs -text +*.whl filter=lfs diff=lfs merge=lfs -text +*.xz filter=lfs diff=lfs merge=lfs -text +*.zip filter=lfs diff=lfs merge=lfs -text +*.zst filter=lfs diff=lfs merge=lfs -text diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..57affb6 --- /dev/null +++ b/.gitignore @@ -0,0 +1 @@ +.osc diff --git a/baselibs.conf b/baselibs.conf new file mode 100644 index 0000000..a0db4e9 --- /dev/null +++ b/baselibs.conf @@ -0,0 +1 @@ +libluajit2-5_1-2 diff --git a/luajit2-2.1-20240314.tar.gz b/luajit2-2.1-20240314.tar.gz new file mode 100644 index 0000000..98b6b08 --- /dev/null +++ b/luajit2-2.1-20240314.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3efddc4104a0ce720ddf4da3d9bce927f3c5816a8a45a043462ca58914cde271 +size 1162566 diff --git a/luajit2-2.1-20240815.tar.gz b/luajit2-2.1-20240815.tar.gz new file mode 100644 index 0000000..b16de02 --- /dev/null +++ b/luajit2-2.1-20240815.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9e59ec13c301c8b2855838b1248def49ef348a3e7563fabef677431706718145 +size 1163521 diff --git a/luajit2-2.1-20241104.tar.gz b/luajit2-2.1-20241104.tar.gz new file mode 100644 index 0000000..3de36b9 --- /dev/null +++ b/luajit2-2.1-20241104.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:197a5eb626bc9e0c19dcb025a190735ca1e23890606204bd9ef4c9828d5c4d4a +size 1163274 diff --git a/luajit2-name.patch b/luajit2-name.patch new file mode 100644 index 0000000..55c511b --- /dev/null +++ b/luajit2-name.patch @@ -0,0 +1,156 @@ +diff -Pdpru luajit2-2.1-20240815.orig/Makefile luajit2-2.1-20240815/Makefile +--- luajit2-2.1-20240815.orig/Makefile 2024-08-15 03:29:18.000000000 +0300 ++++ luajit2-2.1-20240815/Makefile 2024-08-28 06:32:57.210186012 +0300 +@@ -38,10 +38,10 @@ DPREFIX= $(DESTDIR)$(PREFIX) + INSTALL_BIN= $(DPREFIX)/bin + INSTALL_LIB= $(DPREFIX)/$(MULTILIB) + INSTALL_SHARE= $(DPREFIX)/share +-INSTALL_DEFINC= $(DPREFIX)/include/luajit-$(MMVERSION) ++INSTALL_DEFINC= $(DPREFIX)/include/luajit2-$(MMVERSION) + INSTALL_INC= $(INSTALL_DEFINC) + +-export INSTALL_LJLIBD= $(INSTALL_SHARE)/luajit-$(MMVERSION) ++export INSTALL_LJLIBD= $(INSTALL_SHARE)/luajit2-$(MMVERSION) + INSTALL_JITLIB= $(INSTALL_LJLIBD)/jit + INSTALL_LMODD= $(INSTALL_SHARE)/lua + INSTALL_LMOD= $(INSTALL_LMODD)/$(ABIVER) +@@ -50,23 +50,20 @@ INSTALL_CMOD= $(INSTALL_CMODD)/$(ABIVER) + INSTALL_MAN= $(INSTALL_SHARE)/man/man1 + INSTALL_PKGCONFIG= $(INSTALL_LIB)/pkgconfig + +-INSTALL_TNAME= luajit-$(VERSION) +-INSTALL_TSYMNAME= luajit +-INSTALL_ANAME= libluajit-$(ABIVER).a +-INSTALL_SOSHORT1= libluajit-$(ABIVER).so +-INSTALL_SOSHORT2= libluajit-$(ABIVER).so.$(MAJVER) +-INSTALL_SONAME= libluajit-$(ABIVER).so.$(VERSION) +-INSTALL_DYLIBSHORT1= libluajit-$(ABIVER).dylib +-INSTALL_DYLIBSHORT2= libluajit-$(ABIVER).$(MAJVER).dylib +-INSTALL_DYLIBNAME= libluajit-$(ABIVER).$(VERSION).dylib +-INSTALL_PCNAME= luajit.pc ++INSTALL_TNAME= luajit2 ++INSTALL_MANNAME= $(INSTALL_TNAME).1 ++INSTALL_SOSHORT1= libluajit2-$(ABIVER).so ++INSTALL_SOSHORT2= libluajit2-$(ABIVER).so.$(MAJVER) ++INSTALL_SONAME= libluajit2-$(ABIVER).so.$(VERSION) ++INSTALL_DYLIBSHORT1= libluajit2-$(ABIVER).dylib ++INSTALL_DYLIBSHORT2= libluajit2-$(ABIVER).$(MAJVER).dylib ++INSTALL_DYLIBNAME= libluajit2-$(ABIVER).$(VERSION).dylib ++INSTALL_PCNAME= luajit2.pc + +-INSTALL_STATIC= $(INSTALL_LIB)/$(INSTALL_ANAME) + INSTALL_DYN= $(INSTALL_LIB)/$(INSTALL_SONAME) + INSTALL_SHORT1= $(INSTALL_LIB)/$(INSTALL_SOSHORT1) + INSTALL_SHORT2= $(INSTALL_LIB)/$(INSTALL_SOSHORT2) + INSTALL_T= $(INSTALL_BIN)/$(INSTALL_TNAME) +-INSTALL_TSYM= $(INSTALL_BIN)/$(INSTALL_TSYMNAME) + INSTALL_PC= $(INSTALL_PKGCONFIG)/$(INSTALL_PCNAME) + + INSTALL_DIRS= $(INSTALL_BIN) $(INSTALL_LIB) $(INSTALL_INC) $(INSTALL_MAN) \ +@@ -78,20 +75,27 @@ RM= rm -f + MKDIR= mkdir -p + RMDIR= rmdir 2>/dev/null + SYMLINK= ln -sf +-INSTALL_X= install -m 0755 +-INSTALL_F= install -m 0644 ++INSTALL_X= install -pm0755 ++INSTALL_F= install -pm0644 + UNINSTALL= $(RM) + LDCONFIG= ldconfig -n 2>/dev/null ++SED_MAN= sed -e "s|luajit|luajit2|g" \ ++ -e "s|LuaJIT|LuaJIT2|g" \ ++ -e "s|luajit2.org|github.com/openresty/luajit2|" \ ++ -e "s|Just-In-Time Compiler for the Lua Language|OpenResty's maintained branch of LuaJIT|" + SED_PC= sed -e "s|^prefix=.*|prefix=$(PREFIX)|" \ + -e "s|^multilib=.*|multilib=$(MULTILIB)|" \ +- -e "s|^relver=.*|relver=$(RELVER)|" ++ -e "s|^relver=.*|relver=$(RELVER)|" \ ++ -e "s|luajit-|luajit2-|g" \ ++ -e "s|LuaJIT|LuaJIT2|g" \ ++ -e "s|luajit.org|github.com/openresty/luajit2|" \ ++ -e "s|Just-in-time compiler for Lua|OpenResty's maintained branch of LuaJIT|" + ifneq ($(INSTALL_DEFINC),$(INSTALL_INC)) + SED_PC+= -e "s|^includedir=.*|includedir=$(INSTALL_INC)|" + endif + +-FILE_T= luajit +-FILE_A= libluajit.a +-FILE_SO= libluajit.so ++FILE_T= luajit2 ++FILE_SO= libluajit2.so + FILE_MAN= luajit.1 + FILE_PC= luajit.pc + FILES_INC= lua.h lualib.h lauxlib.h luaconf.h lua.hpp luajit.h +@@ -130,25 +134,25 @@ install: $(INSTALL_DEP) + @echo "==== Installing LuaJIT $(VERSION) to $(PREFIX) ====" + $(MKDIR) $(INSTALL_DIRS) + cd src && $(INSTALL_X) $(FILE_T) $(INSTALL_T) +- cd src && test -f $(FILE_A) && $(INSTALL_F) $(FILE_A) $(INSTALL_STATIC) || : + $(RM) $(INSTALL_DYN) $(INSTALL_SHORT1) $(INSTALL_SHORT2) + cd src && test -f $(FILE_SO) && \ + $(INSTALL_X) $(FILE_SO) $(INSTALL_DYN) && \ + ( $(LDCONFIG) $(INSTALL_LIB) || : ) && \ + $(SYMLINK) $(INSTALL_SONAME) $(INSTALL_SHORT1) && \ + $(SYMLINK) $(INSTALL_SONAME) $(INSTALL_SHORT2) || : +- cd etc && $(INSTALL_F) $(FILE_MAN) $(INSTALL_MAN) ++ cd etc && $(SED_MAN) $(FILE_MAN) > $(FILE_MAN).tmp && \ ++ $(INSTALL_F) $(FILE_MAN).tmp $(INSTALL_MAN)/$(INSTALL_MANNAME) && \ ++ $(RM) $(FILE_MAN).tmp + cd etc && $(SED_PC) $(FILE_PC) > $(FILE_PC).tmp && \ + $(INSTALL_F) $(FILE_PC).tmp $(INSTALL_PC) && \ + $(RM) $(FILE_PC).tmp + cd src && $(INSTALL_F) $(FILES_INC) $(INSTALL_INC) + cd src/jit && $(INSTALL_F) $(FILES_JITLIB) $(INSTALL_JITLIB) +- $(SYMLINK) $(INSTALL_TNAME) $(INSTALL_TSYM) + @echo "==== Successfully installed LuaJIT $(VERSION) to $(PREFIX) ====" + + uninstall: + @echo "==== Uninstalling LuaJIT $(VERSION) from $(PREFIX) ====" +- $(UNINSTALL) $(INSTALL_TSYM) $(INSTALL_T) $(INSTALL_STATIC) $(INSTALL_DYN) $(INSTALL_SHORT1) $(INSTALL_SHORT2) $(INSTALL_MAN)/$(FILE_MAN) $(INSTALL_PC) ++ $(UNINSTALL) $(INSTALL_T) $(INSTALL_DYN) $(INSTALL_SHORT1) $(INSTALL_SHORT2) $(INSTALL_MAN)/$(FILE_MAN) $(INSTALL_PC) + for file in $(FILES_JITLIB); do \ + $(UNINSTALL) $(INSTALL_JITLIB)/$$file; \ + done +diff -Pdpru luajit2-2.1-20240815.orig/src/luaconf.h luajit2-2.1-20240815/src/luaconf.h +--- luajit2-2.1-20240815.orig/src/luaconf.h 2024-08-15 03:29:18.000000000 +0300 ++++ luajit2-2.1-20240815/src/luaconf.h 2024-08-28 06:35:50.295523664 +0300 +@@ -51,7 +51,7 @@ + #endif + + #ifndef LUA_LJDIR +-#define LUA_LJDIR LUA_JROOT "/share/luajit-2.1" ++#define LUA_LJDIR LUA_JROOT "/share/luajit2-2.1" + #endif + + #define LUA_JPATH ";" LUA_LJDIR "/?.lua" +diff -Pdpru luajit2-2.1-20240815.orig/src/Makefile luajit2-2.1-20240815/src/Makefile +--- luajit2-2.1-20240815.orig/src/Makefile 2024-08-15 03:29:18.000000000 +0300 ++++ luajit2-2.1-20240815/src/Makefile 2024-08-28 06:40:25.429926912 +0300 +@@ -214,11 +214,11 @@ TARGET_AR= $(CROSS)ar rcus + TARGET_STRIP= $(CROSS)strip + + TARGET_LIBPATH= $(or $(PREFIX),/usr/local)/$(or $(MULTILIB),lib) +-TARGET_SONAME= libluajit-$(ABIVER).so.$(MAJVER) +-TARGET_DYLIBNAME= libluajit-$(ABIVER).$(MAJVER).dylib ++TARGET_SONAME= libluajit2-$(ABIVER).so.$(MAJVER) ++TARGET_DYLIBNAME= libluajit2-$(ABIVER).$(MAJVER).dylib + TARGET_DYLIBPATH= $(TARGET_LIBPATH)/$(TARGET_DYLIBNAME) + TARGET_DLLNAME= lua$(NODOTABIVER).dll +-TARGET_DLLDOTANAME= libluajit-$(ABIVER).dll.a ++TARGET_DLLDOTANAME= libluajit2-$(ABIVER).dll.a + TARGET_XSHLDFLAGS= -shared -fPIC -Wl,-soname,$(TARGET_SONAME) + TARGET_DYNXLDOPTS= + +@@ -544,9 +544,9 @@ LIB_VMDEF= jit/vmdef.lua + LIB_VMDEFP= $(LIB_VMDEF) + + LUAJIT_O= luajit.o +-LUAJIT_A= libluajit.a +-LUAJIT_SO= libluajit.so +-LUAJIT_T= luajit ++LUAJIT_A= libluajit2.a ++LUAJIT_SO= libluajit2.so ++LUAJIT_T= luajit2 + + ALL_T= $(LUAJIT_T) $(LUAJIT_A) $(LUAJIT_SO) $(HOST_T) + ALL_HDRGEN= lj_bcdef.h lj_ffdef.h lj_libdef.h lj_recdef.h lj_folddef.h \ diff --git a/luajit2.changes b/luajit2.changes new file mode 100644 index 0000000..cf8b671 --- /dev/null +++ b/luajit2.changes @@ -0,0 +1,112 @@ +------------------------------------------------------------------- +Thu Nov 7 08:00:47 UTC 2024 - Илья Индиго + +- Updated to 2.1.20241104 (2.1.1728714540). + * FFI: Added missing coercion when recording 64-bit bit.*(). + * ARM64: Used ldr literal to load FP constants. + +------------------------------------------------------------------- +Wed Aug 28 03:54:34 UTC 2024 - Илья Индиго + +- Updated to 2.1.20240815 (2.1.1723681758). + * Changed file luajit2-name.patch. + * Added ppc64le support. + +------------------------------------------------------------------- +Fri May 10 04:09:49 UTC 2024 - Илья Индиго + +- Changed file luajit2-name.patch and fixed package.path for modules. + +------------------------------------------------------------------- +Fri May 3 21:45:30 UTC 2024 - Илья Индиго + +- Removed %check (tests don't run, and it's not known how to run them). + +------------------------------------------------------------------- +Wed May 1 02:13:06 UTC 2024 - Илья Индиго + +- Updated to 2.1.20240314 (2.1.1710398010). +- Refreshed spec-file via spec-cleaner and manual optimizations. +- Added file luajit2-name.patch. +- Renamed package to luajit2. + +------------------------------------------------------------------- +Thu Jan 26 09:08:11 UTC 2023 - Michal Suchanek + +- Opencode %make_build to prevent build failure when not defined. + +------------------------------------------------------------------- +Wed Jan 25 13:46:36 UTC 2023 - Matej Cepl + +- Rewrite moonjit package to build openresty from + https://github.com/openresty/luajit2. + +------------------------------------------------------------------- +Mon Jul 12 17:33:50 UTC 2021 - Matej Cepl + +- Rewrite the packaging to acommodate a possibility of two LuaJIT + implementations in the distribution. + +------------------------------------------------------------------- +Sat Jul 3 19:20:57 UTC 2021 - Matej Cepl + +- Fix Provides ... we don't provide an equvalent of 2.2.0 LuaJIT + (which doesn't exsts). + +------------------------------------------------------------------- +Mon Mar 30 12:31:09 UTC 2020 - Matej Cepl + +- Add moonjit105-string_gsub.patch to fix gsub method, which breaks + other packages (namely neovim). + +------------------------------------------------------------------- +Thu Mar 12 12:09:34 UTC 2020 - Tomáš Chvátal + +- No need to namespace luajit with update-alternatives, we really + want just one version of it to be provided to keep things + simple + +------------------------------------------------------------------- +Thu Mar 12 11:42:21 UTC 2020 - Tomáš Chvátal + +- Update to 2.2.0: + * The v2.2 release of moonjit brings many new features that bring the + project closer to its aim of unifying LuaJIT forks and compatibility + with Lua. +- Do not suffix the version anymore as we support all lua variants, + removes the patch: + * luajit-lua-versioned.patch + +------------------------------------------------------------------- +Wed Dec 18 09:05:04 UTC 2019 - Michel Normand + +- Ignore make check error for PowerPC until upstream solved issue + https://github.com/moonjit/moonjit/issues/40 +- do the same for %arm rather than to disable all %check. + +------------------------------------------------------------------- +Mon Dec 16 09:07:37 UTC 2019 - Guillaume GARDET + +- Disable %check on %arm until the following issue is fixed: + https://github.com/moonjit/moonjit/issues/9 + +------------------------------------------------------------------- +Thu Dec 12 14:12:25 UTC 2019 - Andreas Schwab + +- Use %_lib + +------------------------------------------------------------------- +Mon Dec 9 12:54:59 UTC 2019 - Tomáš Chvátal + +- Obsolete/provide lua51-luajit as it was the old luajit name + +------------------------------------------------------------------- +Fri Dec 6 07:44:53 UTC 2019 - Tomáš Chvátal + +- Fork of luajit with more active upstream +- Version 2.1.2 building on top of 2.1.0-beta3 with following goodies: + * fix for CVE-2019-19391 + * Support for ppc64 ppc64le s390 s390x +- Still carry the patch luajit-lua-versioned.patch to mark it as + compatible with lua 5.1, future 2.2 release should contain + support for 5.2 and 5.3 release of LUA diff --git a/luajit2.spec b/luajit2.spec new file mode 100644 index 0000000..84035d2 --- /dev/null +++ b/luajit2.spec @@ -0,0 +1,95 @@ +# +# spec file for package luajit2 +# +# Copyright (c) 2024 SUSE LLC +# +# All modifications and additions to the file contributed by third parties +# remain the property of their copyright owners, unless otherwise agreed +# upon. The license for this file, and modifications and additions to the +# file, is the same license as for the pristine package itself (unless the +# license for the pristine package is not an Open Source License, in which +# case the license is the MIT License). An "Open Source License" is a +# license that conforms to the Open Source Definition (Version 1.9) +# published by the Open Source Initiative. + +# Please submit bugfixes or comments via https://bugs.opensuse.org/ +# + + +%define abi_ver 5.1 +%define so_ver 2 +%define lib_ver 5_1-%{so_ver} +%define major 2.1 +%define minor 20241104 +%define upstream 1728714540 +Name: luajit2 +Version: %{major}.%{minor} +Release: 0 +Summary: OpenResty's maintained branch of LuaJIT +License: MIT +URL: https://github.com/openresty/%{name} +Source0: https://github.com/openresty/%{name}/archive/refs/tags/v%{major}-%{minor}.tar.gz#/%{name}-%{major}-%{minor}.tar.gz +Source1: baselibs.conf +Patch0: %{name}-name.patch +BuildRequires: pkgconfig +Requires: lib%{name}-%{lib_ver} = %{version} + +%description +This is the official OpenResty branch of LuaJIT. It is not to be considered a fork, +since we still regularly synchronize changes from the upstream LuaJIT project. + +%package -n lib%{name}-%{lib_ver} +Summary: Library for LuaJIT2 compiler + +%description -n lib%{name}-%{lib_ver} +Libraries to use LuaJIT2 compiler. + +%package devel +Summary: Devel files for %{name} +Requires: %{name} = %{version} +Requires: lib%{name}-%{lib_ver} = %{version} + +%description devel +Devel files for %{name} package. + +%prep +%autosetup -p1 -n %{name}-%{major}-%{minor} + +%build +%make_build %{?_make_output_sync} \ + Q= \ + DYNAMIC_CC="cc -fPIC" \ + LDCONFIG="true" \ + TARGET_AR="ar rcus" \ + TARGET_STRIP=: \ + PREFIX=%{_prefix} \ + MULTILIB=%{_lib} \ + CFLAGS="%{optflags}" + +%install +%make_install \ + DYNAMIC_CC="cc -fPIC" \ + LDCONFIG="true" \ + TARGET_AR="ar rcus" \ + TARGET_STRIP=: \ + PREFIX=%{_prefix} \ + MULTILIB=%{_lib} + +%post -n lib%{name}-%{lib_ver} -p /sbin/ldconfig +%postun -n lib%{name}-%{lib_ver} -p /sbin/ldconfig + +%files +%{_bindir}/%{name} +%{_mandir}/man1/%{name}.1%{?ext_man} +%{_datadir}/%{name}-%{major} + +%files -n lib%{name}-%{lib_ver} +%{_libdir}/lib%{name}-%{abi_ver}.so.%{so_ver} +%{_libdir}/lib%{name}-%{abi_ver}.so.%{major}.%{upstream} + +%files devel +%{_includedir}/%{name}-%{major} +%{_libdir}/lib%{name}-%{abi_ver}.so +%{_libdir}/pkgconfig/%{name}.pc + +%changelog -- 2.51.1 From 91d46ac4c3ed0484489e8c05f316c14eed0911b9042d8cb821c51b6c6a6dc9fb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=98=D0=BB=D1=8C=D1=8F=20=D0=98=D0=BD=D0=B4=D0=B8=D0=B3?= =?UTF-8?q?=D0=BE?= Date: Sat, 23 Nov 2024 13:22:34 +0000 Subject: [PATCH 3/8] - Updated to 2.1.20241113 (2.1.1731486438). * Upstream doesn't provide a ChangeLog. OBS-URL: https://build.opensuse.org/package/show/devel:languages:lua/luajit2?expand=0&rev=11 --- .gitattributes | 23 ++++++ .gitignore | 1 + baselibs.conf | 1 + luajit2-2.1-20240314.tar.gz | 3 + luajit2-2.1-20240815.tar.gz | 3 + luajit2-2.1-20241104.tar.gz | 3 + luajit2-2.1-20241113.tar.gz | 3 + luajit2-name.patch | 156 ++++++++++++++++++++++++++++++++++++ luajit2.changes | 118 +++++++++++++++++++++++++++ luajit2.spec | 95 ++++++++++++++++++++++ 10 files changed, 406 insertions(+) create mode 100644 .gitattributes create mode 100644 .gitignore create mode 100644 baselibs.conf create mode 100644 luajit2-2.1-20240314.tar.gz create mode 100644 luajit2-2.1-20240815.tar.gz create mode 100644 luajit2-2.1-20241104.tar.gz create mode 100644 luajit2-2.1-20241113.tar.gz create mode 100644 luajit2-name.patch create mode 100644 luajit2.changes create mode 100644 luajit2.spec diff --git a/.gitattributes b/.gitattributes new file mode 100644 index 0000000..9b03811 --- /dev/null +++ b/.gitattributes @@ -0,0 +1,23 @@ +## Default LFS +*.7z filter=lfs diff=lfs merge=lfs -text +*.bsp filter=lfs diff=lfs merge=lfs -text +*.bz2 filter=lfs diff=lfs merge=lfs -text +*.gem filter=lfs diff=lfs merge=lfs -text +*.gz filter=lfs diff=lfs merge=lfs -text +*.jar filter=lfs diff=lfs merge=lfs -text +*.lz filter=lfs diff=lfs merge=lfs -text +*.lzma filter=lfs diff=lfs merge=lfs -text +*.obscpio filter=lfs diff=lfs merge=lfs -text +*.oxt filter=lfs diff=lfs merge=lfs -text +*.pdf filter=lfs diff=lfs merge=lfs -text +*.png filter=lfs diff=lfs merge=lfs -text +*.rpm filter=lfs diff=lfs merge=lfs -text +*.tbz filter=lfs diff=lfs merge=lfs -text +*.tbz2 filter=lfs diff=lfs merge=lfs -text +*.tgz filter=lfs diff=lfs merge=lfs -text +*.ttf filter=lfs diff=lfs merge=lfs -text +*.txz filter=lfs diff=lfs merge=lfs -text +*.whl filter=lfs diff=lfs merge=lfs -text +*.xz filter=lfs diff=lfs merge=lfs -text +*.zip filter=lfs diff=lfs merge=lfs -text +*.zst filter=lfs diff=lfs merge=lfs -text diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..57affb6 --- /dev/null +++ b/.gitignore @@ -0,0 +1 @@ +.osc diff --git a/baselibs.conf b/baselibs.conf new file mode 100644 index 0000000..a0db4e9 --- /dev/null +++ b/baselibs.conf @@ -0,0 +1 @@ +libluajit2-5_1-2 diff --git a/luajit2-2.1-20240314.tar.gz b/luajit2-2.1-20240314.tar.gz new file mode 100644 index 0000000..98b6b08 --- /dev/null +++ b/luajit2-2.1-20240314.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3efddc4104a0ce720ddf4da3d9bce927f3c5816a8a45a043462ca58914cde271 +size 1162566 diff --git a/luajit2-2.1-20240815.tar.gz b/luajit2-2.1-20240815.tar.gz new file mode 100644 index 0000000..b16de02 --- /dev/null +++ b/luajit2-2.1-20240815.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9e59ec13c301c8b2855838b1248def49ef348a3e7563fabef677431706718145 +size 1163521 diff --git a/luajit2-2.1-20241104.tar.gz b/luajit2-2.1-20241104.tar.gz new file mode 100644 index 0000000..3de36b9 --- /dev/null +++ b/luajit2-2.1-20241104.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:197a5eb626bc9e0c19dcb025a190735ca1e23890606204bd9ef4c9828d5c4d4a +size 1163274 diff --git a/luajit2-2.1-20241113.tar.gz b/luajit2-2.1-20241113.tar.gz new file mode 100644 index 0000000..f20ad64 --- /dev/null +++ b/luajit2-2.1-20241113.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3b269f3a55c420e5a286bbd6b8ef8a5425dbcb4194fa2beb9e22eea277cd6638 +size 1163373 diff --git a/luajit2-name.patch b/luajit2-name.patch new file mode 100644 index 0000000..55c511b --- /dev/null +++ b/luajit2-name.patch @@ -0,0 +1,156 @@ +diff -Pdpru luajit2-2.1-20240815.orig/Makefile luajit2-2.1-20240815/Makefile +--- luajit2-2.1-20240815.orig/Makefile 2024-08-15 03:29:18.000000000 +0300 ++++ luajit2-2.1-20240815/Makefile 2024-08-28 06:32:57.210186012 +0300 +@@ -38,10 +38,10 @@ DPREFIX= $(DESTDIR)$(PREFIX) + INSTALL_BIN= $(DPREFIX)/bin + INSTALL_LIB= $(DPREFIX)/$(MULTILIB) + INSTALL_SHARE= $(DPREFIX)/share +-INSTALL_DEFINC= $(DPREFIX)/include/luajit-$(MMVERSION) ++INSTALL_DEFINC= $(DPREFIX)/include/luajit2-$(MMVERSION) + INSTALL_INC= $(INSTALL_DEFINC) + +-export INSTALL_LJLIBD= $(INSTALL_SHARE)/luajit-$(MMVERSION) ++export INSTALL_LJLIBD= $(INSTALL_SHARE)/luajit2-$(MMVERSION) + INSTALL_JITLIB= $(INSTALL_LJLIBD)/jit + INSTALL_LMODD= $(INSTALL_SHARE)/lua + INSTALL_LMOD= $(INSTALL_LMODD)/$(ABIVER) +@@ -50,23 +50,20 @@ INSTALL_CMOD= $(INSTALL_CMODD)/$(ABIVER) + INSTALL_MAN= $(INSTALL_SHARE)/man/man1 + INSTALL_PKGCONFIG= $(INSTALL_LIB)/pkgconfig + +-INSTALL_TNAME= luajit-$(VERSION) +-INSTALL_TSYMNAME= luajit +-INSTALL_ANAME= libluajit-$(ABIVER).a +-INSTALL_SOSHORT1= libluajit-$(ABIVER).so +-INSTALL_SOSHORT2= libluajit-$(ABIVER).so.$(MAJVER) +-INSTALL_SONAME= libluajit-$(ABIVER).so.$(VERSION) +-INSTALL_DYLIBSHORT1= libluajit-$(ABIVER).dylib +-INSTALL_DYLIBSHORT2= libluajit-$(ABIVER).$(MAJVER).dylib +-INSTALL_DYLIBNAME= libluajit-$(ABIVER).$(VERSION).dylib +-INSTALL_PCNAME= luajit.pc ++INSTALL_TNAME= luajit2 ++INSTALL_MANNAME= $(INSTALL_TNAME).1 ++INSTALL_SOSHORT1= libluajit2-$(ABIVER).so ++INSTALL_SOSHORT2= libluajit2-$(ABIVER).so.$(MAJVER) ++INSTALL_SONAME= libluajit2-$(ABIVER).so.$(VERSION) ++INSTALL_DYLIBSHORT1= libluajit2-$(ABIVER).dylib ++INSTALL_DYLIBSHORT2= libluajit2-$(ABIVER).$(MAJVER).dylib ++INSTALL_DYLIBNAME= libluajit2-$(ABIVER).$(VERSION).dylib ++INSTALL_PCNAME= luajit2.pc + +-INSTALL_STATIC= $(INSTALL_LIB)/$(INSTALL_ANAME) + INSTALL_DYN= $(INSTALL_LIB)/$(INSTALL_SONAME) + INSTALL_SHORT1= $(INSTALL_LIB)/$(INSTALL_SOSHORT1) + INSTALL_SHORT2= $(INSTALL_LIB)/$(INSTALL_SOSHORT2) + INSTALL_T= $(INSTALL_BIN)/$(INSTALL_TNAME) +-INSTALL_TSYM= $(INSTALL_BIN)/$(INSTALL_TSYMNAME) + INSTALL_PC= $(INSTALL_PKGCONFIG)/$(INSTALL_PCNAME) + + INSTALL_DIRS= $(INSTALL_BIN) $(INSTALL_LIB) $(INSTALL_INC) $(INSTALL_MAN) \ +@@ -78,20 +75,27 @@ RM= rm -f + MKDIR= mkdir -p + RMDIR= rmdir 2>/dev/null + SYMLINK= ln -sf +-INSTALL_X= install -m 0755 +-INSTALL_F= install -m 0644 ++INSTALL_X= install -pm0755 ++INSTALL_F= install -pm0644 + UNINSTALL= $(RM) + LDCONFIG= ldconfig -n 2>/dev/null ++SED_MAN= sed -e "s|luajit|luajit2|g" \ ++ -e "s|LuaJIT|LuaJIT2|g" \ ++ -e "s|luajit2.org|github.com/openresty/luajit2|" \ ++ -e "s|Just-In-Time Compiler for the Lua Language|OpenResty's maintained branch of LuaJIT|" + SED_PC= sed -e "s|^prefix=.*|prefix=$(PREFIX)|" \ + -e "s|^multilib=.*|multilib=$(MULTILIB)|" \ +- -e "s|^relver=.*|relver=$(RELVER)|" ++ -e "s|^relver=.*|relver=$(RELVER)|" \ ++ -e "s|luajit-|luajit2-|g" \ ++ -e "s|LuaJIT|LuaJIT2|g" \ ++ -e "s|luajit.org|github.com/openresty/luajit2|" \ ++ -e "s|Just-in-time compiler for Lua|OpenResty's maintained branch of LuaJIT|" + ifneq ($(INSTALL_DEFINC),$(INSTALL_INC)) + SED_PC+= -e "s|^includedir=.*|includedir=$(INSTALL_INC)|" + endif + +-FILE_T= luajit +-FILE_A= libluajit.a +-FILE_SO= libluajit.so ++FILE_T= luajit2 ++FILE_SO= libluajit2.so + FILE_MAN= luajit.1 + FILE_PC= luajit.pc + FILES_INC= lua.h lualib.h lauxlib.h luaconf.h lua.hpp luajit.h +@@ -130,25 +134,25 @@ install: $(INSTALL_DEP) + @echo "==== Installing LuaJIT $(VERSION) to $(PREFIX) ====" + $(MKDIR) $(INSTALL_DIRS) + cd src && $(INSTALL_X) $(FILE_T) $(INSTALL_T) +- cd src && test -f $(FILE_A) && $(INSTALL_F) $(FILE_A) $(INSTALL_STATIC) || : + $(RM) $(INSTALL_DYN) $(INSTALL_SHORT1) $(INSTALL_SHORT2) + cd src && test -f $(FILE_SO) && \ + $(INSTALL_X) $(FILE_SO) $(INSTALL_DYN) && \ + ( $(LDCONFIG) $(INSTALL_LIB) || : ) && \ + $(SYMLINK) $(INSTALL_SONAME) $(INSTALL_SHORT1) && \ + $(SYMLINK) $(INSTALL_SONAME) $(INSTALL_SHORT2) || : +- cd etc && $(INSTALL_F) $(FILE_MAN) $(INSTALL_MAN) ++ cd etc && $(SED_MAN) $(FILE_MAN) > $(FILE_MAN).tmp && \ ++ $(INSTALL_F) $(FILE_MAN).tmp $(INSTALL_MAN)/$(INSTALL_MANNAME) && \ ++ $(RM) $(FILE_MAN).tmp + cd etc && $(SED_PC) $(FILE_PC) > $(FILE_PC).tmp && \ + $(INSTALL_F) $(FILE_PC).tmp $(INSTALL_PC) && \ + $(RM) $(FILE_PC).tmp + cd src && $(INSTALL_F) $(FILES_INC) $(INSTALL_INC) + cd src/jit && $(INSTALL_F) $(FILES_JITLIB) $(INSTALL_JITLIB) +- $(SYMLINK) $(INSTALL_TNAME) $(INSTALL_TSYM) + @echo "==== Successfully installed LuaJIT $(VERSION) to $(PREFIX) ====" + + uninstall: + @echo "==== Uninstalling LuaJIT $(VERSION) from $(PREFIX) ====" +- $(UNINSTALL) $(INSTALL_TSYM) $(INSTALL_T) $(INSTALL_STATIC) $(INSTALL_DYN) $(INSTALL_SHORT1) $(INSTALL_SHORT2) $(INSTALL_MAN)/$(FILE_MAN) $(INSTALL_PC) ++ $(UNINSTALL) $(INSTALL_T) $(INSTALL_DYN) $(INSTALL_SHORT1) $(INSTALL_SHORT2) $(INSTALL_MAN)/$(FILE_MAN) $(INSTALL_PC) + for file in $(FILES_JITLIB); do \ + $(UNINSTALL) $(INSTALL_JITLIB)/$$file; \ + done +diff -Pdpru luajit2-2.1-20240815.orig/src/luaconf.h luajit2-2.1-20240815/src/luaconf.h +--- luajit2-2.1-20240815.orig/src/luaconf.h 2024-08-15 03:29:18.000000000 +0300 ++++ luajit2-2.1-20240815/src/luaconf.h 2024-08-28 06:35:50.295523664 +0300 +@@ -51,7 +51,7 @@ + #endif + + #ifndef LUA_LJDIR +-#define LUA_LJDIR LUA_JROOT "/share/luajit-2.1" ++#define LUA_LJDIR LUA_JROOT "/share/luajit2-2.1" + #endif + + #define LUA_JPATH ";" LUA_LJDIR "/?.lua" +diff -Pdpru luajit2-2.1-20240815.orig/src/Makefile luajit2-2.1-20240815/src/Makefile +--- luajit2-2.1-20240815.orig/src/Makefile 2024-08-15 03:29:18.000000000 +0300 ++++ luajit2-2.1-20240815/src/Makefile 2024-08-28 06:40:25.429926912 +0300 +@@ -214,11 +214,11 @@ TARGET_AR= $(CROSS)ar rcus + TARGET_STRIP= $(CROSS)strip + + TARGET_LIBPATH= $(or $(PREFIX),/usr/local)/$(or $(MULTILIB),lib) +-TARGET_SONAME= libluajit-$(ABIVER).so.$(MAJVER) +-TARGET_DYLIBNAME= libluajit-$(ABIVER).$(MAJVER).dylib ++TARGET_SONAME= libluajit2-$(ABIVER).so.$(MAJVER) ++TARGET_DYLIBNAME= libluajit2-$(ABIVER).$(MAJVER).dylib + TARGET_DYLIBPATH= $(TARGET_LIBPATH)/$(TARGET_DYLIBNAME) + TARGET_DLLNAME= lua$(NODOTABIVER).dll +-TARGET_DLLDOTANAME= libluajit-$(ABIVER).dll.a ++TARGET_DLLDOTANAME= libluajit2-$(ABIVER).dll.a + TARGET_XSHLDFLAGS= -shared -fPIC -Wl,-soname,$(TARGET_SONAME) + TARGET_DYNXLDOPTS= + +@@ -544,9 +544,9 @@ LIB_VMDEF= jit/vmdef.lua + LIB_VMDEFP= $(LIB_VMDEF) + + LUAJIT_O= luajit.o +-LUAJIT_A= libluajit.a +-LUAJIT_SO= libluajit.so +-LUAJIT_T= luajit ++LUAJIT_A= libluajit2.a ++LUAJIT_SO= libluajit2.so ++LUAJIT_T= luajit2 + + ALL_T= $(LUAJIT_T) $(LUAJIT_A) $(LUAJIT_SO) $(HOST_T) + ALL_HDRGEN= lj_bcdef.h lj_ffdef.h lj_libdef.h lj_recdef.h lj_folddef.h \ diff --git a/luajit2.changes b/luajit2.changes new file mode 100644 index 0000000..e22b49d --- /dev/null +++ b/luajit2.changes @@ -0,0 +1,118 @@ +------------------------------------------------------------------- +Sat Nov 23 13:05:56 UTC 2024 - Илья Индиго + +- Updated to 2.1.20241113 (2.1.1731486438). + * Upstream doesn't provide a ChangeLog. + +------------------------------------------------------------------- +Thu Nov 7 08:00:47 UTC 2024 - Илья Индиго + +- Updated to 2.1.20241104 (2.1.1728714540). + * FFI: Added missing coercion when recording 64-bit bit.*(). + * ARM64: Used ldr literal to load FP constants. + +------------------------------------------------------------------- +Wed Aug 28 03:54:34 UTC 2024 - Илья Индиго + +- Updated to 2.1.20240815 (2.1.1723681758). + * Changed file luajit2-name.patch. + * Added ppc64le support. + +------------------------------------------------------------------- +Fri May 10 04:09:49 UTC 2024 - Илья Индиго + +- Changed file luajit2-name.patch and fixed package.path for modules. + +------------------------------------------------------------------- +Fri May 3 21:45:30 UTC 2024 - Илья Индиго + +- Removed %check (tests don't run, and it's not known how to run them). + +------------------------------------------------------------------- +Wed May 1 02:13:06 UTC 2024 - Илья Индиго + +- Updated to 2.1.20240314 (2.1.1710398010). +- Refreshed spec-file via spec-cleaner and manual optimizations. +- Added file luajit2-name.patch. +- Renamed package to luajit2. + +------------------------------------------------------------------- +Thu Jan 26 09:08:11 UTC 2023 - Michal Suchanek + +- Opencode %make_build to prevent build failure when not defined. + +------------------------------------------------------------------- +Wed Jan 25 13:46:36 UTC 2023 - Matej Cepl + +- Rewrite moonjit package to build openresty from + https://github.com/openresty/luajit2. + +------------------------------------------------------------------- +Mon Jul 12 17:33:50 UTC 2021 - Matej Cepl + +- Rewrite the packaging to acommodate a possibility of two LuaJIT + implementations in the distribution. + +------------------------------------------------------------------- +Sat Jul 3 19:20:57 UTC 2021 - Matej Cepl + +- Fix Provides ... we don't provide an equvalent of 2.2.0 LuaJIT + (which doesn't exsts). + +------------------------------------------------------------------- +Mon Mar 30 12:31:09 UTC 2020 - Matej Cepl + +- Add moonjit105-string_gsub.patch to fix gsub method, which breaks + other packages (namely neovim). + +------------------------------------------------------------------- +Thu Mar 12 12:09:34 UTC 2020 - Tomáš Chvátal + +- No need to namespace luajit with update-alternatives, we really + want just one version of it to be provided to keep things + simple + +------------------------------------------------------------------- +Thu Mar 12 11:42:21 UTC 2020 - Tomáš Chvátal + +- Update to 2.2.0: + * The v2.2 release of moonjit brings many new features that bring the + project closer to its aim of unifying LuaJIT forks and compatibility + with Lua. +- Do not suffix the version anymore as we support all lua variants, + removes the patch: + * luajit-lua-versioned.patch + +------------------------------------------------------------------- +Wed Dec 18 09:05:04 UTC 2019 - Michel Normand + +- Ignore make check error for PowerPC until upstream solved issue + https://github.com/moonjit/moonjit/issues/40 +- do the same for %arm rather than to disable all %check. + +------------------------------------------------------------------- +Mon Dec 16 09:07:37 UTC 2019 - Guillaume GARDET + +- Disable %check on %arm until the following issue is fixed: + https://github.com/moonjit/moonjit/issues/9 + +------------------------------------------------------------------- +Thu Dec 12 14:12:25 UTC 2019 - Andreas Schwab + +- Use %_lib + +------------------------------------------------------------------- +Mon Dec 9 12:54:59 UTC 2019 - Tomáš Chvátal + +- Obsolete/provide lua51-luajit as it was the old luajit name + +------------------------------------------------------------------- +Fri Dec 6 07:44:53 UTC 2019 - Tomáš Chvátal + +- Fork of luajit with more active upstream +- Version 2.1.2 building on top of 2.1.0-beta3 with following goodies: + * fix for CVE-2019-19391 + * Support for ppc64 ppc64le s390 s390x +- Still carry the patch luajit-lua-versioned.patch to mark it as + compatible with lua 5.1, future 2.2 release should contain + support for 5.2 and 5.3 release of LUA diff --git a/luajit2.spec b/luajit2.spec new file mode 100644 index 0000000..30ceb72 --- /dev/null +++ b/luajit2.spec @@ -0,0 +1,95 @@ +# +# spec file for package luajit2 +# +# Copyright (c) 2024 SUSE LLC +# +# All modifications and additions to the file contributed by third parties +# remain the property of their copyright owners, unless otherwise agreed +# upon. The license for this file, and modifications and additions to the +# file, is the same license as for the pristine package itself (unless the +# license for the pristine package is not an Open Source License, in which +# case the license is the MIT License). An "Open Source License" is a +# license that conforms to the Open Source Definition (Version 1.9) +# published by the Open Source Initiative. + +# Please submit bugfixes or comments via https://bugs.opensuse.org/ +# + + +%define abi_ver 5.1 +%define so_ver 2 +%define lib_ver 5_1-%{so_ver} +%define major 2.1 +%define minor 20241113 +%define upstream 1731486438 +Name: luajit2 +Version: %{major}.%{minor} +Release: 0 +Summary: OpenResty's maintained branch of LuaJIT +License: MIT +URL: https://github.com/openresty/%{name} +Source0: https://github.com/openresty/%{name}/archive/refs/tags/v%{major}-%{minor}.tar.gz#/%{name}-%{major}-%{minor}.tar.gz +Source1: baselibs.conf +Patch0: %{name}-name.patch +BuildRequires: pkgconfig +Requires: lib%{name}-%{lib_ver} = %{version} + +%description +This is the official OpenResty branch of LuaJIT. It is not to be considered a fork, +since we still regularly synchronize changes from the upstream LuaJIT project. + +%package -n lib%{name}-%{lib_ver} +Summary: Library for LuaJIT2 compiler + +%description -n lib%{name}-%{lib_ver} +Libraries to use LuaJIT2 compiler. + +%package devel +Summary: Devel files for %{name} +Requires: %{name} = %{version} +Requires: lib%{name}-%{lib_ver} = %{version} + +%description devel +Devel files for %{name} package. + +%prep +%autosetup -p1 -n %{name}-%{major}-%{minor} + +%build +%make_build %{?_make_output_sync} \ + Q= \ + DYNAMIC_CC="cc -fPIC" \ + LDCONFIG="true" \ + TARGET_AR="ar rcus" \ + TARGET_STRIP=: \ + PREFIX=%{_prefix} \ + MULTILIB=%{_lib} \ + CFLAGS="%{optflags}" + +%install +%make_install \ + DYNAMIC_CC="cc -fPIC" \ + LDCONFIG="true" \ + TARGET_AR="ar rcus" \ + TARGET_STRIP=: \ + PREFIX=%{_prefix} \ + MULTILIB=%{_lib} + +%post -n lib%{name}-%{lib_ver} -p /sbin/ldconfig +%postun -n lib%{name}-%{lib_ver} -p /sbin/ldconfig + +%files +%{_bindir}/%{name} +%{_mandir}/man1/%{name}.1%{?ext_man} +%{_datadir}/%{name}-%{major} + +%files -n lib%{name}-%{lib_ver} +%{_libdir}/lib%{name}-%{abi_ver}.so.%{so_ver} +%{_libdir}/lib%{name}-%{abi_ver}.so.%{major}.%{upstream} + +%files devel +%{_includedir}/%{name}-%{major} +%{_libdir}/lib%{name}-%{abi_ver}.so +%{_libdir}/pkgconfig/%{name}.pc + +%changelog -- 2.51.1 From ddc9648a963282bd126f00d4b094cf33eab304707800e10d167da3a6bbb67956 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=98=D0=BB=D1=8C=D1=8F=20=D0=98=D0=BD=D0=B4=D0=B8=D0=B3?= =?UTF-8?q?=D0=BE?= Date: Sat, 18 Jan 2025 09:21:09 +0000 Subject: [PATCH 4/8] - Updated to 2.1.20250117 (1737090214). * Changed file luajit2-name.patch. * Added riscv support. OBS-URL: https://build.opensuse.org/package/show/devel:languages:lua/luajit2?expand=0&rev=13 --- .gitattributes | 23 ++++++ .gitignore | 1 + baselibs.conf | 1 + luajit2-2.1-20240314.tar.gz | 3 + luajit2-2.1-20240815.tar.gz | 3 + luajit2-2.1-20241104.tar.gz | 3 + luajit2-2.1-20241113.tar.gz | 3 + luajit2-2.1-20250117.tar.gz | 3 + luajit2-name.patch | 156 ++++++++++++++++++++++++++++++++++++ luajit2.changes | 125 +++++++++++++++++++++++++++++ luajit2.spec | 95 ++++++++++++++++++++++ 11 files changed, 416 insertions(+) create mode 100644 .gitattributes create mode 100644 .gitignore create mode 100644 baselibs.conf create mode 100644 luajit2-2.1-20240314.tar.gz create mode 100644 luajit2-2.1-20240815.tar.gz create mode 100644 luajit2-2.1-20241104.tar.gz create mode 100644 luajit2-2.1-20241113.tar.gz create mode 100644 luajit2-2.1-20250117.tar.gz create mode 100644 luajit2-name.patch create mode 100644 luajit2.changes create mode 100644 luajit2.spec diff --git a/.gitattributes b/.gitattributes new file mode 100644 index 0000000..9b03811 --- /dev/null +++ b/.gitattributes @@ -0,0 +1,23 @@ +## Default LFS +*.7z filter=lfs diff=lfs merge=lfs -text +*.bsp filter=lfs diff=lfs merge=lfs -text +*.bz2 filter=lfs diff=lfs merge=lfs -text +*.gem filter=lfs diff=lfs merge=lfs -text +*.gz filter=lfs diff=lfs merge=lfs -text +*.jar filter=lfs diff=lfs merge=lfs -text +*.lz filter=lfs diff=lfs merge=lfs -text +*.lzma filter=lfs diff=lfs merge=lfs -text +*.obscpio filter=lfs diff=lfs merge=lfs -text +*.oxt filter=lfs diff=lfs merge=lfs -text +*.pdf filter=lfs diff=lfs merge=lfs -text +*.png filter=lfs diff=lfs merge=lfs -text +*.rpm filter=lfs diff=lfs merge=lfs -text +*.tbz filter=lfs diff=lfs merge=lfs -text +*.tbz2 filter=lfs diff=lfs merge=lfs -text +*.tgz filter=lfs diff=lfs merge=lfs -text +*.ttf filter=lfs diff=lfs merge=lfs -text +*.txz filter=lfs diff=lfs merge=lfs -text +*.whl filter=lfs diff=lfs merge=lfs -text +*.xz filter=lfs diff=lfs merge=lfs -text +*.zip filter=lfs diff=lfs merge=lfs -text +*.zst filter=lfs diff=lfs merge=lfs -text diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..57affb6 --- /dev/null +++ b/.gitignore @@ -0,0 +1 @@ +.osc diff --git a/baselibs.conf b/baselibs.conf new file mode 100644 index 0000000..a0db4e9 --- /dev/null +++ b/baselibs.conf @@ -0,0 +1 @@ +libluajit2-5_1-2 diff --git a/luajit2-2.1-20240314.tar.gz b/luajit2-2.1-20240314.tar.gz new file mode 100644 index 0000000..98b6b08 --- /dev/null +++ b/luajit2-2.1-20240314.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3efddc4104a0ce720ddf4da3d9bce927f3c5816a8a45a043462ca58914cde271 +size 1162566 diff --git a/luajit2-2.1-20240815.tar.gz b/luajit2-2.1-20240815.tar.gz new file mode 100644 index 0000000..b16de02 --- /dev/null +++ b/luajit2-2.1-20240815.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9e59ec13c301c8b2855838b1248def49ef348a3e7563fabef677431706718145 +size 1163521 diff --git a/luajit2-2.1-20241104.tar.gz b/luajit2-2.1-20241104.tar.gz new file mode 100644 index 0000000..3de36b9 --- /dev/null +++ b/luajit2-2.1-20241104.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:197a5eb626bc9e0c19dcb025a190735ca1e23890606204bd9ef4c9828d5c4d4a +size 1163274 diff --git a/luajit2-2.1-20241113.tar.gz b/luajit2-2.1-20241113.tar.gz new file mode 100644 index 0000000..f20ad64 --- /dev/null +++ b/luajit2-2.1-20241113.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3b269f3a55c420e5a286bbd6b8ef8a5425dbcb4194fa2beb9e22eea277cd6638 +size 1163373 diff --git a/luajit2-2.1-20250117.tar.gz b/luajit2-2.1-20250117.tar.gz new file mode 100644 index 0000000..e2c3bb3 --- /dev/null +++ b/luajit2-2.1-20250117.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:68ff3dc2cc97969f7385679da7c9ff96738aa9cc275fa6bab77316eb3340ea8e +size 1163528 diff --git a/luajit2-name.patch b/luajit2-name.patch new file mode 100644 index 0000000..0174361 --- /dev/null +++ b/luajit2-name.patch @@ -0,0 +1,156 @@ +diff -Pdpru luajit2-2.1-20250117.orig/Makefile luajit2-2.1-20250117/Makefile +--- luajit2-2.1-20250117.orig/Makefile 2025-01-17 08:03:34.000000000 +0300 ++++ luajit2-2.1-20250117/Makefile 2025-01-18 12:03:03.025681445 +0300 +@@ -39,10 +39,10 @@ INSTALL_BIN= $(DPREFIX)/bin + INSTALL_LIB= $(DPREFIX)/$(MULTILIB) + INSTALL_SHARE_= $(PREFIX)/share + INSTALL_SHARE= $(DESTDIR)$(INSTALL_SHARE_) +-INSTALL_DEFINC= $(DPREFIX)/include/luajit-$(MMVERSION) ++INSTALL_DEFINC= $(DPREFIX)/include/luajit2-$(MMVERSION) + INSTALL_INC= $(INSTALL_DEFINC) + +-export INSTALL_LJLIBD= $(INSTALL_SHARE_)/luajit-$(MMVERSION) ++export INSTALL_LJLIBD= $(INSTALL_SHARE_)/luajit2-$(MMVERSION) + INSTALL_JITLIB= $(DESTDIR)$(INSTALL_LJLIBD)/jit + INSTALL_LMODD= $(INSTALL_SHARE)/lua + INSTALL_LMOD= $(INSTALL_LMODD)/$(ABIVER) +@@ -51,23 +51,20 @@ INSTALL_CMOD= $(INSTALL_CMODD)/$(ABIVER) + INSTALL_MAN= $(INSTALL_SHARE)/man/man1 + INSTALL_PKGCONFIG= $(INSTALL_LIB)/pkgconfig + +-INSTALL_TNAME= luajit-$(VERSION) +-INSTALL_TSYMNAME= luajit +-INSTALL_ANAME= libluajit-$(ABIVER).a +-INSTALL_SOSHORT1= libluajit-$(ABIVER).so +-INSTALL_SOSHORT2= libluajit-$(ABIVER).so.$(MAJVER) +-INSTALL_SONAME= libluajit-$(ABIVER).so.$(VERSION) +-INSTALL_DYLIBSHORT1= libluajit-$(ABIVER).dylib +-INSTALL_DYLIBSHORT2= libluajit-$(ABIVER).$(MAJVER).dylib +-INSTALL_DYLIBNAME= libluajit-$(ABIVER).$(VERSION).dylib +-INSTALL_PCNAME= luajit.pc ++INSTALL_TNAME= luajit2 ++INSTALL_MANNAME= $(INSTALL_TNAME).1 ++INSTALL_SOSHORT1= libluajit2-$(ABIVER).so ++INSTALL_SOSHORT2= libluajit2-$(ABIVER).so.$(MAJVER) ++INSTALL_SONAME= libluajit2-$(ABIVER).so.$(VERSION) ++INSTALL_DYLIBSHORT1= libluajit2-$(ABIVER).dylib ++INSTALL_DYLIBSHORT2= libluajit2-$(ABIVER).$(MAJVER).dylib ++INSTALL_DYLIBNAME= libluajit2-$(ABIVER).$(VERSION).dylib ++INSTALL_PCNAME= luajit2.pc + +-INSTALL_STATIC= $(INSTALL_LIB)/$(INSTALL_ANAME) + INSTALL_DYN= $(INSTALL_LIB)/$(INSTALL_SONAME) + INSTALL_SHORT1= $(INSTALL_LIB)/$(INSTALL_SOSHORT1) + INSTALL_SHORT2= $(INSTALL_LIB)/$(INSTALL_SOSHORT2) + INSTALL_T= $(INSTALL_BIN)/$(INSTALL_TNAME) +-INSTALL_TSYM= $(INSTALL_BIN)/$(INSTALL_TSYMNAME) + INSTALL_PC= $(INSTALL_PKGCONFIG)/$(INSTALL_PCNAME) + + INSTALL_DIRS= $(INSTALL_BIN) $(INSTALL_LIB) $(INSTALL_INC) $(INSTALL_MAN) \ +@@ -79,20 +76,27 @@ RM= rm -f + MKDIR= mkdir -p + RMDIR= rmdir 2>/dev/null + SYMLINK= ln -sf +-INSTALL_X= install -m 0755 +-INSTALL_F= install -m 0644 ++INSTALL_X= install -pm0755 ++INSTALL_F= install -pm0644 + UNINSTALL= $(RM) + LDCONFIG= ldconfig -n 2>/dev/null ++SED_MAN= sed -e "s|luajit|luajit2|g" \ ++ -e "s|LuaJIT|LuaJIT2|g" \ ++ -e "s|luajit2.org|github.com/openresty/luajit2|" \ ++ -e "s|Just-In-Time Compiler for the Lua Language|OpenResty's maintained branch of LuaJIT|" + SED_PC= sed -e "s|^prefix=.*|prefix=$(PREFIX)|" \ + -e "s|^multilib=.*|multilib=$(MULTILIB)|" \ +- -e "s|^relver=.*|relver=$(RELVER)|" ++ -e "s|^relver=.*|relver=$(RELVER)|" \ ++ -e "s|luajit-|luajit2-|g" \ ++ -e "s|LuaJIT|LuaJIT2|g" \ ++ -e "s|luajit.org|github.com/openresty/luajit2|" \ ++ -e "s|Just-in-time compiler for Lua|OpenResty's maintained branch of LuaJIT|" + ifneq ($(INSTALL_DEFINC),$(INSTALL_INC)) + SED_PC+= -e "s|^includedir=.*|includedir=$(INSTALL_INC)|" + endif + +-FILE_T= luajit +-FILE_A= libluajit.a +-FILE_SO= libluajit.so ++FILE_T= luajit2 ++FILE_SO= libluajit2.so + FILE_MAN= luajit.1 + FILE_PC= luajit.pc + FILES_INC= lua.h lualib.h lauxlib.h luaconf.h lua.hpp luajit.h +@@ -131,25 +135,25 @@ install: $(INSTALL_DEP) + @echo "==== Installing LuaJIT $(VERSION) to $(PREFIX) ====" + $(MKDIR) $(INSTALL_DIRS) + cd src && $(INSTALL_X) $(FILE_T) $(INSTALL_T) +- cd src && test -f $(FILE_A) && $(INSTALL_F) $(FILE_A) $(INSTALL_STATIC) || : + $(RM) $(INSTALL_DYN) $(INSTALL_SHORT1) $(INSTALL_SHORT2) + cd src && test -f $(FILE_SO) && \ + $(INSTALL_X) $(FILE_SO) $(INSTALL_DYN) && \ + ( $(LDCONFIG) $(INSTALL_LIB) || : ) && \ + $(SYMLINK) $(INSTALL_SONAME) $(INSTALL_SHORT1) && \ + $(SYMLINK) $(INSTALL_SONAME) $(INSTALL_SHORT2) || : +- cd etc && $(INSTALL_F) $(FILE_MAN) $(INSTALL_MAN) ++ cd etc && $(SED_MAN) $(FILE_MAN) > $(FILE_MAN).tmp && \ ++ $(INSTALL_F) $(FILE_MAN).tmp $(INSTALL_MAN)/$(INSTALL_MANNAME) && \ ++ $(RM) $(FILE_MAN).tmp + cd etc && $(SED_PC) $(FILE_PC) > $(FILE_PC).tmp && \ + $(INSTALL_F) $(FILE_PC).tmp $(INSTALL_PC) && \ + $(RM) $(FILE_PC).tmp + cd src && $(INSTALL_F) $(FILES_INC) $(INSTALL_INC) + cd src/jit && $(INSTALL_F) $(FILES_JITLIB) $(INSTALL_JITLIB) +- $(SYMLINK) $(INSTALL_TNAME) $(INSTALL_TSYM) + @echo "==== Successfully installed LuaJIT $(VERSION) to $(PREFIX) ====" + + uninstall: + @echo "==== Uninstalling LuaJIT $(VERSION) from $(PREFIX) ====" +- $(UNINSTALL) $(INSTALL_TSYM) $(INSTALL_T) $(INSTALL_STATIC) $(INSTALL_DYN) $(INSTALL_SHORT1) $(INSTALL_SHORT2) $(INSTALL_MAN)/$(FILE_MAN) $(INSTALL_PC) ++ $(UNINSTALL) $(INSTALL_T) $(INSTALL_DYN) $(INSTALL_SHORT1) $(INSTALL_SHORT2) $(INSTALL_MAN)/$(FILE_MAN) $(INSTALL_PC) + for file in $(FILES_JITLIB); do \ + $(UNINSTALL) $(INSTALL_JITLIB)/$$file; \ + done +diff -Pdpru luajit2-2.1-20250117.orig/src/luaconf.h luajit2-2.1-20250117/src/luaconf.h +--- luajit2-2.1-20250117.orig/src/luaconf.h 2025-01-17 08:03:34.000000000 +0300 ++++ luajit2-2.1-20250117/src/luaconf.h 2025-01-18 12:04:14.635265329 +0300 +@@ -50,7 +50,7 @@ + #endif + + #ifndef LUA_LJDIR +-#define LUA_LJDIR LUA_JROOT "/share/luajit-2.1" ++#define LUA_LJDIR LUA_JROOT "/share/luajit2-2.1" + #endif + + #define LUA_JPATH ";" LUA_LJDIR "/?.lua" +diff -Pdpru luajit2-2.1-20250117.orig/src/Makefile luajit2-2.1-20250117/src/Makefile +--- luajit2-2.1-20250117.orig/src/Makefile 2025-01-17 08:03:34.000000000 +0300 ++++ luajit2-2.1-20250117/src/Makefile 2025-01-18 12:09:11.660041483 +0300 +@@ -214,11 +214,11 @@ TARGET_AR= $(CROSS)ar rcus + TARGET_STRIP= $(CROSS)strip + + TARGET_LIBPATH= $(or $(PREFIX),/usr/local)/$(or $(MULTILIB),lib) +-TARGET_SONAME= libluajit-$(ABIVER).so.$(MAJVER) +-TARGET_DYLIBNAME= libluajit-$(ABIVER).$(MAJVER).dylib ++TARGET_SONAME= libluajit2-$(ABIVER).so.$(MAJVER) ++TARGET_DYLIBNAME= libluajit2-$(ABIVER).$(MAJVER).dylib + TARGET_DYLIBPATH= $(TARGET_LIBPATH)/$(TARGET_DYLIBNAME) + TARGET_DLLNAME= lua$(NODOTABIVER).dll +-TARGET_DLLDOTANAME= libluajit-$(ABIVER).dll.a ++TARGET_DLLDOTANAME= libluajit2-$(ABIVER).dll.a + TARGET_XSHLDFLAGS= -shared -fPIC -Wl,-soname,$(TARGET_SONAME) + TARGET_DYNXLDOPTS= + +@@ -544,9 +544,9 @@ LIB_VMDEF= jit/vmdef.lua + LIB_VMDEFP= $(LIB_VMDEF) + + LUAJIT_O= luajit.o +-LUAJIT_A= libluajit.a +-LUAJIT_SO= libluajit.so +-LUAJIT_T= luajit ++LUAJIT_A= libluajit2.a ++LUAJIT_SO= libluajit2.so ++LUAJIT_T= luajit2 + + ALL_T= $(LUAJIT_T) $(LUAJIT_A) $(LUAJIT_SO) $(HOST_T) + ALL_HDRGEN= lj_bcdef.h lj_ffdef.h lj_libdef.h lj_recdef.h lj_folddef.h \ diff --git a/luajit2.changes b/luajit2.changes new file mode 100644 index 0000000..a2b2867 --- /dev/null +++ b/luajit2.changes @@ -0,0 +1,125 @@ +------------------------------------------------------------------- +Sat Jan 18 09:16:51 UTC 2025 - Илья Индиго + +- Updated to 2.1.20250117 (1737090214). + * Changed file luajit2-name.patch. + * Added riscv support. + +------------------------------------------------------------------- +Sat Nov 23 13:05:56 UTC 2024 - Илья Индиго + +- Updated to 2.1.20241113 (2.1.1731486438). + * Upstream doesn't provide a ChangeLog. + +------------------------------------------------------------------- +Thu Nov 7 08:00:47 UTC 2024 - Илья Индиго + +- Updated to 2.1.20241104 (2.1.1728714540). + * FFI: Added missing coercion when recording 64-bit bit.*(). + * ARM64: Used ldr literal to load FP constants. + +------------------------------------------------------------------- +Wed Aug 28 03:54:34 UTC 2024 - Илья Индиго + +- Updated to 2.1.20240815 (2.1.1723681758). + * Changed file luajit2-name.patch. + * Added ppc64le support. + +------------------------------------------------------------------- +Fri May 10 04:09:49 UTC 2024 - Илья Индиго + +- Changed file luajit2-name.patch and fixed package.path for modules. + +------------------------------------------------------------------- +Fri May 3 21:45:30 UTC 2024 - Илья Индиго + +- Removed %check (tests don't run, and it's not known how to run them). + +------------------------------------------------------------------- +Wed May 1 02:13:06 UTC 2024 - Илья Индиго + +- Updated to 2.1.20240314 (2.1.1710398010). +- Refreshed spec-file via spec-cleaner and manual optimizations. +- Added file luajit2-name.patch. +- Renamed package to luajit2. + +------------------------------------------------------------------- +Thu Jan 26 09:08:11 UTC 2023 - Michal Suchanek + +- Opencode %make_build to prevent build failure when not defined. + +------------------------------------------------------------------- +Wed Jan 25 13:46:36 UTC 2023 - Matej Cepl + +- Rewrite moonjit package to build openresty from + https://github.com/openresty/luajit2. + +------------------------------------------------------------------- +Mon Jul 12 17:33:50 UTC 2021 - Matej Cepl + +- Rewrite the packaging to acommodate a possibility of two LuaJIT + implementations in the distribution. + +------------------------------------------------------------------- +Sat Jul 3 19:20:57 UTC 2021 - Matej Cepl + +- Fix Provides ... we don't provide an equvalent of 2.2.0 LuaJIT + (which doesn't exsts). + +------------------------------------------------------------------- +Mon Mar 30 12:31:09 UTC 2020 - Matej Cepl + +- Add moonjit105-string_gsub.patch to fix gsub method, which breaks + other packages (namely neovim). + +------------------------------------------------------------------- +Thu Mar 12 12:09:34 UTC 2020 - Tomáš Chvátal + +- No need to namespace luajit with update-alternatives, we really + want just one version of it to be provided to keep things + simple + +------------------------------------------------------------------- +Thu Mar 12 11:42:21 UTC 2020 - Tomáš Chvátal + +- Update to 2.2.0: + * The v2.2 release of moonjit brings many new features that bring the + project closer to its aim of unifying LuaJIT forks and compatibility + with Lua. +- Do not suffix the version anymore as we support all lua variants, + removes the patch: + * luajit-lua-versioned.patch + +------------------------------------------------------------------- +Wed Dec 18 09:05:04 UTC 2019 - Michel Normand + +- Ignore make check error for PowerPC until upstream solved issue + https://github.com/moonjit/moonjit/issues/40 +- do the same for %arm rather than to disable all %check. + +------------------------------------------------------------------- +Mon Dec 16 09:07:37 UTC 2019 - Guillaume GARDET + +- Disable %check on %arm until the following issue is fixed: + https://github.com/moonjit/moonjit/issues/9 + +------------------------------------------------------------------- +Thu Dec 12 14:12:25 UTC 2019 - Andreas Schwab + +- Use %_lib + +------------------------------------------------------------------- +Mon Dec 9 12:54:59 UTC 2019 - Tomáš Chvátal + +- Obsolete/provide lua51-luajit as it was the old luajit name + +------------------------------------------------------------------- +Fri Dec 6 07:44:53 UTC 2019 - Tomáš Chvátal + +- Fork of luajit with more active upstream +- Version 2.1.2 building on top of 2.1.0-beta3 with following goodies: + * fix for CVE-2019-19391 + * Support for ppc64 ppc64le s390 s390x +- Still carry the patch luajit-lua-versioned.patch to mark it as + compatible with lua 5.1, future 2.2 release should contain + support for 5.2 and 5.3 release of LUA diff --git a/luajit2.spec b/luajit2.spec new file mode 100644 index 0000000..76ae14c --- /dev/null +++ b/luajit2.spec @@ -0,0 +1,95 @@ +# +# spec file for package luajit2 +# +# Copyright (c) 2025 SUSE LLC +# +# All modifications and additions to the file contributed by third parties +# remain the property of their copyright owners, unless otherwise agreed +# upon. The license for this file, and modifications and additions to the +# file, is the same license as for the pristine package itself (unless the +# license for the pristine package is not an Open Source License, in which +# case the license is the MIT License). An "Open Source License" is a +# license that conforms to the Open Source Definition (Version 1.9) +# published by the Open Source Initiative. + +# Please submit bugfixes or comments via https://bugs.opensuse.org/ +# + + +%define abi_ver 5.1 +%define so_ver 2 +%define lib_ver 5_1-%{so_ver} +%define major 2.1 +%define minor 20250117 +%define upstream 1737090214 +Name: luajit2 +Version: %{major}.%{minor} +Release: 0 +Summary: OpenResty's maintained branch of LuaJIT +License: MIT +URL: https://github.com/openresty/%{name} +Source0: https://github.com/openresty/%{name}/archive/refs/tags/v%{major}-%{minor}.tar.gz#/%{name}-%{major}-%{minor}.tar.gz +Source1: baselibs.conf +Patch0: %{name}-name.patch +BuildRequires: pkgconfig +Requires: lib%{name}-%{lib_ver} = %{version} + +%description +This is the official OpenResty branch of LuaJIT. It is not to be considered a fork, +since we still regularly synchronize changes from the upstream LuaJIT project. + +%package -n lib%{name}-%{lib_ver} +Summary: Library for LuaJIT2 compiler + +%description -n lib%{name}-%{lib_ver} +Libraries to use LuaJIT2 compiler. + +%package devel +Summary: Devel files for %{name} +Requires: %{name} = %{version} +Requires: lib%{name}-%{lib_ver} = %{version} + +%description devel +Devel files for %{name} package. + +%prep +%autosetup -p1 -n %{name}-%{major}-%{minor} + +%build +%make_build %{?_make_output_sync} \ + Q= \ + DYNAMIC_CC="cc -fPIC" \ + LDCONFIG="true" \ + TARGET_AR="ar rcus" \ + TARGET_STRIP=: \ + PREFIX=%{_prefix} \ + MULTILIB=%{_lib} \ + CFLAGS="%{optflags}" + +%install +%make_install \ + DYNAMIC_CC="cc -fPIC" \ + LDCONFIG="true" \ + TARGET_AR="ar rcus" \ + TARGET_STRIP=: \ + PREFIX=%{_prefix} \ + MULTILIB=%{_lib} + +%post -n lib%{name}-%{lib_ver} -p /sbin/ldconfig +%postun -n lib%{name}-%{lib_ver} -p /sbin/ldconfig + +%files +%{_bindir}/%{name} +%{_mandir}/man1/%{name}.1%{?ext_man} +%{_datadir}/%{name}-%{major} + +%files -n lib%{name}-%{lib_ver} +%{_libdir}/lib%{name}-%{abi_ver}.so.%{so_ver} +%{_libdir}/lib%{name}-%{abi_ver}.so.%{major}.%{upstream} + +%files devel +%{_includedir}/%{name}-%{major} +%{_libdir}/lib%{name}-%{abi_ver}.so +%{_libdir}/pkgconfig/%{name}.pc + +%changelog -- 2.51.1 From 94084cd67be89463d07b2968389fd98d2c1147fe9ae7319914b4d170314560db Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=98=D0=BB=D1=8C=D1=8F=20=D0=98=D0=BD=D0=B4=D0=B8=D0=B3?= =?UTF-8?q?=D0=BE?= Date: Sat, 18 Jan 2025 10:19:27 +0000 Subject: [PATCH 5/8] - Updated to 2.1.20250117 (1737090214). * Changed file luajit2-name.patch. * https://github.com/openresty/luajit2/compare/v2.1-20241203...v2.1-20250117 * Fixed recording of BC_VARG. OBS-URL: https://build.opensuse.org/package/show/devel:languages:lua/luajit2?expand=0&rev=14 --- luajit2.changes | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/luajit2.changes b/luajit2.changes index a2b2867..be03841 100644 --- a/luajit2.changes +++ b/luajit2.changes @@ -1,9 +1,10 @@ ------------------------------------------------------------------- -Sat Jan 18 09:16:51 UTC 2025 - Илья Индиго +Sat Jan 18 10:16:26 UTC 2025 - Илья Индиго - Updated to 2.1.20250117 (1737090214). * Changed file luajit2-name.patch. - * Added riscv support. + * https://github.com/openresty/luajit2/compare/v2.1-20241203...v2.1-20250117 + * Fixed recording of BC_VARG. ------------------------------------------------------------------- Sat Nov 23 13:05:56 UTC 2024 - Илья Индиго -- 2.51.1 From f14ef0d82f260a15ab1cebad0e256abb6cbda0cba741d053912aed3bf2dc03c7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=98=D0=BB=D1=8C=D1=8F=20=D0=98=D0=BD=D0=B4=D0=B8=D0=B3?= =?UTF-8?q?=D0=BE?= Date: Wed, 9 Jul 2025 12:41:30 +0300 Subject: [PATCH 6/8] - Updated to 2.1.20250529 (1748495995). * Changed file luajit2-name.patch. * https://github.com/openresty/luajit2/compare/v2.1-20250117...v2.1-20250529 --- luajit2-2.1-20250117.tar.gz | 3 --- luajit2-2.1-20250529.tar.gz | 3 +++ luajit2.changes | 7 +++++++ luajit2.spec | 7 +++---- 4 files changed, 13 insertions(+), 7 deletions(-) delete mode 100644 luajit2-2.1-20250117.tar.gz create mode 100644 luajit2-2.1-20250529.tar.gz diff --git a/luajit2-2.1-20250117.tar.gz b/luajit2-2.1-20250117.tar.gz deleted file mode 100644 index e2c3bb3..0000000 --- a/luajit2-2.1-20250117.tar.gz +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:68ff3dc2cc97969f7385679da7c9ff96738aa9cc275fa6bab77316eb3340ea8e -size 1163528 diff --git a/luajit2-2.1-20250529.tar.gz b/luajit2-2.1-20250529.tar.gz new file mode 100644 index 0000000..3af210e --- /dev/null +++ b/luajit2-2.1-20250529.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a3f6f5ead9fc69603e51c66877614e6b5e93332d552f35d08f932447357ed593 +size 1164879 diff --git a/luajit2.changes b/luajit2.changes index be03841..2327536 100644 --- a/luajit2.changes +++ b/luajit2.changes @@ -1,3 +1,10 @@ +------------------------------------------------------------------- +Wed Jul 9 09:39:48 UTC 2025 - Илья Индиго + +- Updated to 2.1.20250529 (1748495995). + * Changed file luajit2-name.patch. + * https://github.com/openresty/luajit2/compare/v2.1-20250117...v2.1-20250529 + ------------------------------------------------------------------- Sat Jan 18 10:16:26 UTC 2025 - Илья Индиго diff --git a/luajit2.spec b/luajit2.spec index 76ae14c..6587114 100644 --- a/luajit2.spec +++ b/luajit2.spec @@ -20,8 +20,8 @@ %define so_ver 2 %define lib_ver 5_1-%{so_ver} %define major 2.1 -%define minor 20250117 -%define upstream 1737090214 +%define minor 20250529 +%define upstream 1748495995 Name: luajit2 Version: %{major}.%{minor} Release: 0 @@ -75,8 +75,7 @@ Devel files for %{name} package. PREFIX=%{_prefix} \ MULTILIB=%{_lib} -%post -n lib%{name}-%{lib_ver} -p /sbin/ldconfig -%postun -n lib%{name}-%{lib_ver} -p /sbin/ldconfig +%ldconfig_scriptlets -n lib%{name}-%{lib_ver} %files %{_bindir}/%{name} -- 2.51.1 From 1c7789ada1180affc99cc239891329b0b884c125c0ceccbf232c712cf17656fb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=98=D0=BB=D1=8C=D1=8F=20=D0=98=D0=BD=D0=B4=D0=B8=D0=B3?= =?UTF-8?q?=D0=BE?= Date: Fri, 11 Jul 2025 17:08:32 +0300 Subject: [PATCH 7/8] - Updated to 2.1.20250529 (1748495995). * https://github.com/openresty/luajit2/compare/v2.1-20250117...v2.1-20250529 --- luajit2.changes | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/luajit2.changes b/luajit2.changes index 2327536..fc4b891 100644 --- a/luajit2.changes +++ b/luajit2.changes @@ -1,8 +1,7 @@ ------------------------------------------------------------------- -Wed Jul 9 09:39:48 UTC 2025 - Илья Индиго +Fri Jul 11 14:07:00 UTC 2025 - Илья Индиго - Updated to 2.1.20250529 (1748495995). - * Changed file luajit2-name.patch. * https://github.com/openresty/luajit2/compare/v2.1-20250117...v2.1-20250529 ------------------------------------------------------------------- -- 2.51.1 From e5b799710903889e0c62a445b2041aae54d6647f0e034e44f5da36645491291d Mon Sep 17 00:00:00 2001 From: John Paul Adrian Glaubitz Date: Wed, 8 Oct 2025 09:25:44 +0200 Subject: [PATCH 8/8] - Updated to 2.1.20250826 (1756211046) * FFI: Fix dangling CType references (again). * Avoid out-of-range PC for stack overflow error from snapshot restore. * x86/x64: Don't use undefined MUL/IMUL zero flag. * Windows: Add lua52compat option to msvcbuild.bat. - Cherry-pick riscv64-support.patch to add RISC-V support - Cherry-pick loong64-support.patch to add LoongArch support - Increase _default_patch_fuzz to 2 to allow patches to apply --- loong64-support.patch | 10134 +++++++++++++++++++++++++++ luajit2-2.1-20250529.tar.gz | 3 - luajit2-2.1-20250826.tar.gz | 3 + luajit2.changes | 12 + luajit2.spec | 10 +- riscv64-support.patch | 12433 ++++++++++++++++++++++++++++++++++ 6 files changed, 22588 insertions(+), 7 deletions(-) create mode 100644 loong64-support.patch delete mode 100644 luajit2-2.1-20250529.tar.gz create mode 100644 luajit2-2.1-20250826.tar.gz create mode 100644 riscv64-support.patch diff --git a/loong64-support.patch b/loong64-support.patch new file mode 100644 index 0000000..178cb00 --- /dev/null +++ b/loong64-support.patch @@ -0,0 +1,10134 @@ +From 8e40aca7b3a919456b15698273e9b00e9250e769 Mon Sep 17 00:00:00 2001 +From: Xiaolin Zhao +Date: Sat, 2 Nov 2024 10:19:52 +0100 +Subject: [PATCH] Add support for LoongArch64 + +--- + Makefile | 1 + + dynasm/dasm_loongarch64.h | 443 ++++ + dynasm/dasm_loongarch64.lua | 979 ++++++++ + src/Makefile | 10 +- + src/host/buildvm.c | 2 + + src/host/buildvm_asm.c | 9 + + src/jit/bcsave.lua | 1 + + src/jit/dis_loongarch64.lua | 697 ++++++ + src/lib_jit.c | 3 +- + src/lj_arch.h | 32 + + src/lj_asm.c | 4 + + src/lj_asm_loongarch64.h | 1990 +++++++++++++++ + src/lj_ccall.c | 152 +- + src/lj_ccall.h | 17 +- + src/lj_ccallback.c | 58 +- + src/lj_emit_loongarch64.h | 306 +++ + src/lj_frame.h | 9 + + src/lj_gdbjit.c | 12 + + src/lj_jit.h | 15 +- + src/lj_target.h | 4 +- + src/lj_target_loongarch64.h | 313 +++ + src/lj_trace.c | 6 +- + src/lj_vmmath.c | 2 +- + src/vm_loongarch64.dasc | 4625 +++++++++++++++++++++++++++++++++++ + 24 files changed, 9673 insertions(+), 17 deletions(-) + create mode 100644 dynasm/dasm_loongarch64.h + create mode 100644 dynasm/dasm_loongarch64.lua + create mode 100644 src/jit/dis_loongarch64.lua + create mode 100644 src/lj_asm_loongarch64.h + create mode 100644 src/lj_emit_loongarch64.h + create mode 100644 src/lj_target_loongarch64.h + create mode 100644 src/vm_loongarch64.dasc + +diff --git a/Makefile b/Makefile +index f2e4d553b..0c6447df4 100644 +--- a/Makefile ++++ b/Makefile +@@ -101,6 +101,7 @@ FILES_JITLIB= bc.lua bcsave.lua dump.lua p.lua v.lua zone.lua \ + dis_mips64.lua dis_mips64el.lua \ + dis_mips64r6.lua dis_mips64r6el.lua \ + dis_riscv.lua dis_riscv64.lua \ ++ dis_loongarch64.lua \ + vmdef.lua + + ifeq (,$(findstring Windows,$(OS))) +diff --git a/dynasm/dasm_loongarch64.h b/dynasm/dasm_loongarch64.h +new file mode 100644 +index 000000000..6d232ece4 +--- /dev/null ++++ b/dynasm/dasm_loongarch64.h +@@ -0,0 +1,443 @@ ++/* ++** DynASM LoongArch encoding engine. ++** Copyright (C) 2005-2022 Mike Pall. All rights reserved. ++** Released under the MIT license. See dynasm.lua for full copyright notice. ++*/ ++ ++#include ++#include ++#include ++#include ++ ++#define DASM_ARCH "loongarch64" ++ ++#ifndef DASM_EXTERN ++#define DASM_EXTERN(a,b,c,d) 0 ++#endif ++ ++/* Action definitions. */ ++enum { ++ DASM_STOP, DASM_SECTION, DASM_ESC, DASM_REL_EXT, ++ /* The following actions need a buffer position. */ ++ DASM_ALIGN, DASM_REL_LG, DASM_LABEL_LG, ++ /* The following actions also have an argument. */ ++ DASM_REL_PC, DASM_LABEL_PC, DASM_IMM, DASM_IMM2, ++ DASM__MAX ++}; ++ ++/* Maximum number of section buffer positions for a single dasm_put() call. */ ++#define DASM_MAXSECPOS 25 ++ ++/* DynASM encoder status codes. Action list offset or number are or'ed in. */ ++#define DASM_S_OK 0x00000000 ++#define DASM_S_NOMEM 0x01000000 ++#define DASM_S_PHASE 0x02000000 ++#define DASM_S_MATCH_SEC 0x03000000 ++#define DASM_S_RANGE_I 0x11000000 ++#define DASM_S_RANGE_SEC 0x12000000 ++#define DASM_S_RANGE_LG 0x13000000 ++#define DASM_S_RANGE_PC 0x14000000 ++#define DASM_S_RANGE_REL 0x15000000 ++#define DASM_S_UNDEF_LG 0x21000000 ++#define DASM_S_UNDEF_PC 0x22000000 ++ ++/* Macros to convert positions (8 bit section + 24 bit index). */ ++#define DASM_POS2IDX(pos) ((pos)&0x00ffffff) ++#define DASM_POS2BIAS(pos) ((pos)&0xff000000) ++#define DASM_SEC2POS(sec) ((sec)<<24) ++#define DASM_POS2SEC(pos) ((pos)>>24) ++#define DASM_POS2PTR(D, pos) (D->sections[DASM_POS2SEC(pos)].rbuf + (pos)) ++ ++/* Action list type. */ ++typedef const unsigned int *dasm_ActList; ++ ++/* Per-section structure. */ ++typedef struct dasm_Section { ++ int *rbuf; /* Biased buffer pointer (negative section bias). */ ++ int *buf; /* True buffer pointer. */ ++ size_t bsize; /* Buffer size in bytes. */ ++ int pos; /* Biased buffer position. */ ++ int epos; /* End of biased buffer position - max single put. */ ++ int ofs; /* Byte offset into section. */ ++} dasm_Section; ++ ++/* Core structure holding the DynASM encoding state. */ ++struct dasm_State { ++ size_t psize; /* Allocated size of this structure. */ ++ dasm_ActList actionlist; /* Current actionlist pointer. */ ++ int *lglabels; /* Local/global chain/pos ptrs. */ ++ size_t lgsize; ++ int *pclabels; /* PC label chains/pos ptrs. */ ++ size_t pcsize; ++ void **globals; /* Array of globals. */ ++ dasm_Section *section; /* Pointer to active section. */ ++ size_t codesize; /* Total size of all code sections. */ ++ int maxsection; /* 0 <= sectionidx < maxsection. */ ++ int status; /* Status code. */ ++ dasm_Section sections[1]; /* All sections. Alloc-extended. */ ++}; ++ ++/* The size of the core structure depends on the max. number of sections. */ ++#define DASM_PSZ(ms) (sizeof(dasm_State)+(ms-1)*sizeof(dasm_Section)) ++ ++ ++/* Initialize DynASM state. */ ++void dasm_init(Dst_DECL, int maxsection) ++{ ++ dasm_State *D; ++ size_t psz = 0; ++ Dst_REF = NULL; ++ DASM_M_GROW(Dst, struct dasm_State, Dst_REF, psz, DASM_PSZ(maxsection)); ++ D = Dst_REF; ++ D->psize = psz; ++ D->lglabels = NULL; ++ D->lgsize = 0; ++ D->pclabels = NULL; ++ D->pcsize = 0; ++ D->globals = NULL; ++ D->maxsection = maxsection; ++ memset((void *)D->sections, 0, maxsection * sizeof(dasm_Section)); ++} ++ ++/* Free DynASM state. */ ++void dasm_free(Dst_DECL) ++{ ++ dasm_State *D = Dst_REF; ++ int i; ++ for (i = 0; i < D->maxsection; i++) ++ if (D->sections[i].buf) ++ DASM_M_FREE(Dst, D->sections[i].buf, D->sections[i].bsize); ++ if (D->pclabels) DASM_M_FREE(Dst, D->pclabels, D->pcsize); ++ if (D->lglabels) DASM_M_FREE(Dst, D->lglabels, D->lgsize); ++ DASM_M_FREE(Dst, D, D->psize); ++} ++ ++/* Setup global label array. Must be called before dasm_setup(). */ ++void dasm_setupglobal(Dst_DECL, void **gl, unsigned int maxgl) ++{ ++ dasm_State *D = Dst_REF; ++ D->globals = gl; ++ DASM_M_GROW(Dst, int, D->lglabels, D->lgsize, (10+maxgl)*sizeof(int)); ++} ++ ++/* Grow PC label array. Can be called after dasm_setup(), too. */ ++void dasm_growpc(Dst_DECL, unsigned int maxpc) ++{ ++ dasm_State *D = Dst_REF; ++ size_t osz = D->pcsize; ++ DASM_M_GROW(Dst, int, D->pclabels, D->pcsize, maxpc*sizeof(int)); ++ memset((void *)(((unsigned char *)D->pclabels)+osz), 0, D->pcsize-osz); ++} ++ ++/* Setup encoder. */ ++void dasm_setup(Dst_DECL, const void *actionlist) ++{ ++ dasm_State *D = Dst_REF; ++ int i; ++ D->actionlist = (dasm_ActList)actionlist; ++ D->status = DASM_S_OK; ++ D->section = &D->sections[0]; ++ memset((void *)D->lglabels, 0, D->lgsize); ++ if (D->pclabels) memset((void *)D->pclabels, 0, D->pcsize); ++ for (i = 0; i < D->maxsection; i++) { ++ D->sections[i].pos = DASM_SEC2POS(i); ++ D->sections[i].rbuf = D->sections[i].buf - D->sections[i].pos; ++ D->sections[i].ofs = 0; ++ } ++} ++ ++ ++#ifdef DASM_CHECKS ++#define CK(x, st) \ ++ do { if (!(x)) { \ ++ D->status = DASM_S_##st|(int)(p-D->actionlist-1); return; } } while (0) ++#define CKPL(kind, st) \ ++ do { if ((size_t)((char *)pl-(char *)D->kind##labels) >= D->kind##size) { \ ++ D->status = DASM_S_RANGE_##st|(int)(p-D->actionlist-1); return; } } while (0) ++#else ++#define CK(x, st) ((void)0) ++#define CKPL(kind, st) ((void)0) ++#endif ++ ++static int dasm_imm2(unsigned int n) ++{ ++ if ((n >> 21) == 0) ++ return n; ++ else if ((n >> 26) == 0) ++ return n; ++ else ++ return -1; ++} ++ ++/* Pass 1: Store actions and args, link branches/labels, estimate offsets. */ ++void dasm_put(Dst_DECL, int start, ...) ++{ ++ va_list ap; ++ dasm_State *D = Dst_REF; ++ dasm_ActList p = D->actionlist + start; ++ dasm_Section *sec = D->section; ++ int pos = sec->pos, ofs = sec->ofs; ++ int *b; ++ ++ if (pos >= sec->epos) { ++ DASM_M_GROW(Dst, int, sec->buf, sec->bsize, ++ sec->bsize + 2*DASM_MAXSECPOS*sizeof(int)); ++ sec->rbuf = sec->buf - DASM_POS2BIAS(pos); ++ sec->epos = (int)sec->bsize/sizeof(int) - DASM_MAXSECPOS+DASM_POS2BIAS(pos); ++ } ++ ++ b = sec->rbuf; ++ b[pos++] = start; ++ ++ va_start(ap, start); ++ while (1) { ++ unsigned int ins = *p++; ++ unsigned int action = (ins >> 16) - 0xff00; ++ if (action >= DASM__MAX) { ++ ofs += 4; ++ } else { ++ int *pl, n = action >= DASM_REL_PC ? va_arg(ap, int) : 0; ++ switch (action) { ++ case DASM_STOP: goto stop; ++ case DASM_SECTION: ++ n = (ins & 255); CK(n < D->maxsection, RANGE_SEC); ++ D->section = &D->sections[n]; goto stop; ++ case DASM_ESC: p++; ofs += 4; break; ++ case DASM_REL_EXT: break; ++ case DASM_ALIGN: ofs += (ins & 255); b[pos++] = ofs; break; ++ case DASM_REL_LG: ++ n = (ins & 2047) - 10; pl = D->lglabels + n; ++ /* Bkwd rel or global. */ ++ if (n >= 0) { CK(n>=10||*pl<0, RANGE_LG); CKPL(lg, LG); goto putrel; } ++ pl += 10; n = *pl; ++ if (n < 0) n = 0; /* Start new chain for fwd rel if label exists. */ ++ goto linkrel; ++ case DASM_REL_PC: ++ pl = D->pclabels + n; CKPL(pc, PC); ++ putrel: ++ n = *pl; ++ if (n < 0) { /* Label exists. Get label pos and store it. */ ++ b[pos] = -n; ++ } else { ++ linkrel: ++ b[pos] = n; /* Else link to rel chain, anchored at label. */ ++ *pl = pos; ++ } ++ pos++; ++ break; ++ case DASM_LABEL_LG: ++ pl = D->lglabels + (ins & 2047) - 10; CKPL(lg, LG); goto putlabel; ++ case DASM_LABEL_PC: ++ pl = D->pclabels + n; CKPL(pc, PC); ++ putlabel: ++ n = *pl; /* n > 0: Collapse rel chain and replace with label pos. */ ++ while (n > 0) { int *pb = DASM_POS2PTR(D, n); n = *pb; *pb = pos; ++ } ++ *pl = -pos; /* Label exists now. */ ++ b[pos++] = ofs; /* Store pass1 offset estimate. */ ++ break; ++ case DASM_IMM: ++#ifdef DASM_CHECKS ++ CK((n & ((1<<((ins>>10)&31))-1)) == 0, RANGE_I); ++#endif ++ n >>= ((ins>>10)&31); ++#ifdef DASM_CHECKS ++ if (ins & 0x8000) ++ CK(((n + (1<<(((ins>>5)&31)-1)))>>((ins>>5)&31)) == 0, RANGE_I); ++ else ++ CK((n>>((ins>>5)&31)) == 0, RANGE_I); ++#endif ++ b[pos++] = n; ++ break; ++ case DASM_IMM2: ++ CK(dasm_imm2((unsigned int)n) != -1, RANGE_I); ++ b[pos++] = n; ++ break; ++ } ++ } ++ } ++stop: ++ va_end(ap); ++ sec->pos = pos; ++ sec->ofs = ofs; ++} ++#undef CK ++ ++/* Pass 2: Link sections, shrink aligns, fix label offsets. */ ++int dasm_link(Dst_DECL, size_t *szp) ++{ ++ dasm_State *D = Dst_REF; ++ int secnum; ++ int ofs = 0; ++ ++#ifdef DASM_CHECKS ++ *szp = 0; ++ if (D->status != DASM_S_OK) return D->status; ++ { ++ int pc; ++ for (pc = 0; pc*sizeof(int) < D->pcsize; pc++) ++ if (D->pclabels[pc] > 0) return DASM_S_UNDEF_PC|pc; ++ } ++#endif ++ ++ { /* Handle globals not defined in this translation unit. */ ++ int idx; ++ for (idx = 10; idx*sizeof(int) < D->lgsize; idx++) { ++ int n = D->lglabels[idx]; ++ /* Undefined label: Collapse rel chain and replace with marker (< 0). */ ++ while (n > 0) { int *pb = DASM_POS2PTR(D, n); n = *pb; *pb = -idx; } ++ } ++ } ++ ++ /* Combine all code sections. No support for data sections (yet). */ ++ for (secnum = 0; secnum < D->maxsection; secnum++) { ++ dasm_Section *sec = D->sections + secnum; ++ int *b = sec->rbuf; ++ int pos = DASM_SEC2POS(secnum); ++ int lastpos = sec->pos; ++ ++ while (pos != lastpos) { ++ dasm_ActList p = D->actionlist + b[pos++]; ++ while (1) { ++ unsigned int ins = *p++; ++ unsigned int action = (ins >> 16) - 0xff00; ++ switch (action) { ++ case DASM_STOP: case DASM_SECTION: goto stop; ++ case DASM_ESC: p++; break; ++ case DASM_REL_EXT: break; ++ case DASM_ALIGN: ofs -= (b[pos++] + ofs) & (ins & 255); break; ++ case DASM_REL_LG: case DASM_REL_PC: pos++; break; ++ case DASM_LABEL_LG: case DASM_LABEL_PC: b[pos++] += ofs; break; ++ case DASM_IMM: case DASM_IMM2: pos++; break; ++ } ++ } ++ stop: (void)0; ++ } ++ ofs += sec->ofs; /* Next section starts right after current section. */ ++ } ++ ++ D->codesize = ofs; /* Total size of all code sections */ ++ *szp = ofs; ++ return DASM_S_OK; ++} ++ ++#ifdef DASM_CHECKS ++#define CK(x, st) \ ++ do { if (!(x)) return DASM_S_##st|(int)(p-D->actionlist-1); } while (0) ++#else ++#define CK(x, st) ((void)0) ++#endif ++ ++/* Pass 3: Encode sections. */ ++int dasm_encode(Dst_DECL, void *buffer) ++{ ++ dasm_State *D = Dst_REF; ++ char *base = (char *)buffer; ++ unsigned int *cp = (unsigned int *)buffer; ++ int secnum; ++ ++ /* Encode all code sections. No support for data sections (yet). */ ++ for (secnum = 0; secnum < D->maxsection; secnum++) { ++ dasm_Section *sec = D->sections + secnum; ++ int *b = sec->buf; ++ int *endb = sec->rbuf + sec->pos; ++ ++ while (b != endb) { ++ dasm_ActList p = D->actionlist + *b++; ++ while (1) { ++ unsigned int ins = *p++; ++ unsigned int action = (ins >> 16) - 0xff00; ++ int n = (action >= DASM_ALIGN && action < DASM__MAX) ? *b++ : 0; ++ switch (action) { ++ case DASM_STOP: case DASM_SECTION: goto stop; ++ case DASM_ESC: *cp++ = *p++; break; ++ case DASM_REL_EXT: ++ n = DASM_EXTERN(Dst, (unsigned char *)cp, (ins & 2047), 1); ++ goto patchrel; ++ case DASM_ALIGN: ++ ins &= 255; while ((((char *)cp - base) & ins)) *cp++ = 0x60000000; ++ break; ++ case DASM_REL_LG: ++ if (n < 0) { ++ n = (int)((ptrdiff_t)D->globals[-n-10] - (ptrdiff_t)cp + 4); ++ goto patchrel; ++ } ++ /* fallthrough */ ++ case DASM_REL_PC: ++ CK(n >= 0, UNDEF_PC); ++ n = *DASM_POS2PTR(D, n); ++ if (ins & 2048) ++ n = (n + (int)(size_t)base) & 0x0fffffff; ++ else ++ n = n - (int)((char *)cp - base) + 4; ++ patchrel: { ++ unsigned int e = 16 + ((ins >> 12) & 15); ++ CK((n & 3) == 0 && ++ ((n + ((ins & 2048) ? 0 : (1<<(e+1)))) >> (e+2)) == 0, RANGE_REL); ++ if (!(ins & 0xf800)) { /* BEQ, BNE, BLT, BGE, BLTU, BGEU */ ++ cp[-1] |= (((n >> 2) & 0xffff) << 10); ++ } else if ((ins & 0x5000)) { /* BEQZ, BNEZ, BCEQZ, BCNEZ */ ++ cp[-1] |= (((n >> 2) & 0xffff) << 10) | (((n >> 2) & 0x1f0000) >> 16); ++ } else if ((ins & 0xa000)) { /* B, BL */ ++ cp[-1] |= (((n >> 2) & 0xffff) << 10) | (((n >> 2) & 0x3ff0000) >> 16); ++ } ++ } ++ break; ++ case DASM_LABEL_LG: ++ ins &= 2047; if (ins >= 20) D->globals[ins-20] = (void *)(base + n); ++ break; ++ case DASM_LABEL_PC: break; ++ case DASM_IMM2: { ++ //cp[-1] |= ((n>>3) & 4); n &= 0x1f; ++ unsigned int imm2n = dasm_imm2((unsigned int)n); ++ cp[-1] |= ((imm2n&0x3ff0000) | ((imm2n&0xffff))>>10); ++ } ++ break; ++ /* fallthrough */ ++ case DASM_IMM: ++ cp[-1] |= (n & ((1<<((ins>>5)&31))-1)) << (ins&31); ++ break; ++ default: *cp++ = ins; break; ++ } ++ } ++ stop: (void)0; ++ } ++ } ++ ++ if (base + D->codesize != (char *)cp) /* Check for phase errors. */ ++ return DASM_S_PHASE; ++ return DASM_S_OK; ++} ++#undef CK ++ ++/* Get PC label offset. */ ++int dasm_getpclabel(Dst_DECL, unsigned int pc) ++{ ++ dasm_State *D = Dst_REF; ++ if (pc*sizeof(int) < D->pcsize) { ++ int pos = D->pclabels[pc]; ++ if (pos < 0) return *DASM_POS2PTR(D, -pos); ++ if (pos > 0) return -1; /* Undefined. */ ++ } ++ return -2; /* Unused or out of range. */ ++} ++ ++#ifdef DASM_CHECKS ++/* Optional sanity checker to call between isolated encoding steps. */ ++int dasm_checkstep(Dst_DECL, int secmatch) ++{ ++ dasm_State *D = Dst_REF; ++ if (D->status == DASM_S_OK) { ++ int i; ++ for (i = 1; i <= 9; i++) { ++ if (D->lglabels[i] > 0) { D->status = DASM_S_UNDEF_LG|i; break; } ++ D->lglabels[i] = 0; ++ } ++ } ++ if (D->status == DASM_S_OK && secmatch >= 0 && ++ D->section != &D->sections[secmatch]) ++ D->status = DASM_S_MATCH_SEC|(int)(D->section-D->sections); ++ return D->status; ++} ++#endif ++ +diff --git a/dynasm/dasm_loongarch64.lua b/dynasm/dasm_loongarch64.lua +new file mode 100644 +index 000000000..ba6bf67e3 +--- /dev/null ++++ b/dynasm/dasm_loongarch64.lua +@@ -0,0 +1,979 @@ ++------------------------------------------------------------------------------ ++-- DynASM LoongArch module. ++-- ++-- Copyright (C) 2005-2022 Mike Pall. All rights reserved. ++-- See dynasm.lua for full copyright notice. ++------------------------------------------------------------------------------ ++ ++-- Module information: ++local _info = { ++ arch = "loongarch64", ++ description = "DynASM LoongArch64 module", ++ version = "1.5.0", ++ vernum = 10500, ++ release = "2021-05-02", ++ author = "Mike Pall", ++ license = "MIT", ++} ++ ++-- Exported glue functions for the arch-specific module. ++local _M = { _info = _info } ++ ++-- Cache library functions. ++local type, tonumber, pairs, ipairs = type, tonumber, pairs, ipairs ++local assert, setmetatable = assert, setmetatable ++local _s = string ++local sub, format, byte, char = _s.sub, _s.format, _s.byte, _s.char ++local match, gmatch = _s.match, _s.gmatch ++local concat, sort = table.concat, table.sort ++local bit = bit or require("bit") ++local band, shl, shr, sar = bit.band, bit.lshift, bit.rshift, bit.arshift ++local tohex = bit.tohex ++ ++-- Inherited tables and callbacks. ++local g_opt, g_arch ++local wline, werror, wfatal, wwarn ++ ++-- Action name list. ++-- CHECK: Keep this in sync with the C code! ++local action_names = { ++ "STOP", "SECTION", "ESC", "REL_EXT", ++ "ALIGN", "REL_LG", "LABEL_LG", ++ "REL_PC", "LABEL_PC", "IMM", "IMM2", ++} ++ ++-- Maximum number of section buffer positions for dasm_put(). ++-- CHECK: Keep this in sync with the C code! ++local maxsecpos = 25 -- Keep this low, to avoid excessively long C lines. ++ ++-- Action name -> action number. ++local map_action = {} ++for n,name in ipairs(action_names) do ++ map_action[name] = n-1 ++end ++ ++-- Action list buffer. ++local actlist = {} ++ ++-- Argument list for next dasm_put(). Start with offset 0 into action list. ++local actargs = { 0 } ++ ++-- Current number of section buffer positions for dasm_put(). ++local secpos = 1 ++ ++------------------------------------------------------------------------------ ++ ++-- Dump action names and numbers. ++local function dumpactions(out) ++ out:write("DynASM encoding engine action codes:\n") ++ for n,name in ipairs(action_names) do ++ local num = map_action[name] ++ out:write(format(" %-10s %02X %d\n", name, num, num)) ++ end ++ out:write("\n") ++end ++ ++-- Write action list buffer as a huge static C array. ++local function writeactions(out, name) ++ local nn = #actlist ++ if nn == 0 then nn = 1; actlist[0] = map_action.STOP end ++ out:write("static const unsigned int ", name, "[", nn, "] = {\n") ++ for i = 1,nn-1 do ++ assert(out:write("0x", tohex(actlist[i]), ",\n")) ++ end ++ assert(out:write("0x", tohex(actlist[nn]), "\n};\n\n")) ++end ++ ++------------------------------------------------------------------------------ ++ ++-- Add word to action list. ++local function wputxw(n) ++ assert(n >= 0 and n <= 0xffffffff and n % 1 == 0, "word out of range") ++ actlist[#actlist+1] = n ++end ++ ++-- Add action to list with optional arg. Advance buffer pos, too. ++local function waction(action, val, a, num) ++ local w = assert(map_action[action], "bad action name `"..action.."'") ++ wputxw(0xff000000 + w * 0x10000 + (val or 0)) ++ if a then actargs[#actargs+1] = a end ++ if a or num then secpos = secpos + (num or 1) end ++end ++ ++-- Flush action list (intervening C code or buffer pos overflow). ++local function wflush(term) ++ if #actlist == actargs[1] then return end -- Nothing to flush. ++ if not term then waction("STOP") end -- Terminate action list. ++ wline(format("dasm_put(Dst, %s);", concat(actargs, ", ")), true) ++ actargs = { #actlist } -- Actionlist offset is 1st arg to next dasm_put(). ++ secpos = 1 -- The actionlist offset occupies a buffer position, too. ++end ++ ++-- Put escaped word. ++local function wputw(n) ++ if n >= 0xff000000 then waction("ESC") end ++ wputxw(n) ++end ++ ++-- Reserve position for word. ++local function wpos() ++ local pos = #actlist+1 ++ actlist[pos] = "" ++ return pos ++end ++ ++-- Store word to reserved position. ++local function wputpos(pos, n) ++ assert(n >= 0 and n <= 0xffffffff and n % 1 == 0, "word out of range") ++ actlist[pos] = n ++end ++ ++------------------------------------------------------------------------------ ++ ++-- Global label name -> global label number. With auto assignment on 1st use. ++local next_global = 20 ++local map_global = setmetatable({}, { __index = function(t, name) ++ if not match(name, "^[%a_][%w_]*$") then werror("bad global label") end ++ local n = next_global ++ if n > 2047 then werror("too many global labels") end ++ next_global = n + 1 ++ t[name] = n ++ return n ++end}) ++ ++-- Dump global labels. ++local function dumpglobals(out, lvl) ++ local t = {} ++ for name, n in pairs(map_global) do t[n] = name end ++ out:write("Global labels:\n") ++ for i=20,next_global-1 do ++ out:write(format(" %s\n", t[i])) ++ end ++ out:write("\n") ++end ++ ++-- Write global label enum. ++local function writeglobals(out, prefix) ++ local t = {} ++ for name, n in pairs(map_global) do t[n] = name end ++ out:write("enum {\n") ++ for i=20,next_global-1 do ++ out:write(" ", prefix, t[i], ",\n") ++ end ++ out:write(" ", prefix, "_MAX\n};\n") ++end ++ ++-- Write global label names. ++local function writeglobalnames(out, name) ++ local t = {} ++ for name, n in pairs(map_global) do t[n] = name end ++ out:write("static const char *const ", name, "[] = {\n") ++ for i=20,next_global-1 do ++ out:write(" \"", t[i], "\",\n") ++ end ++ out:write(" (const char *)0\n};\n") ++end ++ ++------------------------------------------------------------------------------ ++ ++-- Extern label name -> extern label number. With auto assignment on 1st use. ++local next_extern = 0 ++local map_extern_ = {} ++local map_extern = setmetatable({}, { __index = function(t, name) ++ -- No restrictions on the name for now. ++ local n = next_extern ++ if n > 2047 then werror("too many extern labels") end ++ next_extern = n + 1 ++ t[name] = n ++ map_extern_[n] = name ++ return n ++end}) ++ ++-- Dump extern labels. ++local function dumpexterns(out, lvl) ++ out:write("Extern labels:\n") ++ for i=0,next_extern-1 do ++ out:write(format(" %s\n", map_extern_[i])) ++ end ++ out:write("\n") ++end ++ ++-- Write extern label names. ++local function writeexternnames(out, name) ++ out:write("static const char *const ", name, "[] = {\n") ++ for i=0,next_extern-1 do ++ out:write(" \"", map_extern_[i], "\",\n") ++ end ++ out:write(" (const char *)0\n};\n") ++end ++ ++------------------------------------------------------------------------------ ++ ++-- Arch-specific maps. ++local map_archdef = { sp="r3", ra="r1" } -- Ext. register name -> int. name. ++ ++local map_type = {} -- Type name -> { ctype, reg } ++local ctypenum = 0 -- Type number (for Dt... macros). ++ ++-- Reverse defines for registers. ++function _M.revdef(s) ++ if s == "r3" then return "sp" ++ elseif s == "r1" then return "ra" end ++ return s ++end ++ ++------------------------------------------------------------------------------ ++ ++-- Template strings for LoongArch instructions. ++local map_op = { ++ ["clo.w_2"] = "00001000DJ", ++ ["clz.w_2"] = "00001400DJ", ++ ["cto.w_2"] = "00001800DJ", ++ ["ctz.w_2"] = "00001c00DJ", ++ ["clo.d_2"] = "00002000DJ", ++ ["clz.d_2"] = "00002400DJ", ++ ["cto.d_2"] = "00002800DJ", ++ ["ctz.d_2"] = "00002c00DJ", ++ ["revb.2h_2"] = "00003000DJ", ++ ["revb.4h_2"] = "00003400DJ", ++ ["revb.2w_2"] = "00003800DJ", ++ ["revb.d_2"] = "00003c00DJ", ++ ["revh.2w_2"] = "00004000DJ", ++ ["revh.d_2"] = "00004400DJ", ++ ["bitrev.4b_2"] = "00004800DJ", ++ ["bitrev.8b_2"] = "00004c00DJ", ++ ["bitrev.w_2"] = "00005000DJ", ++ ["bitrev.d_2"] = "00005400DJ", ++ ["ext.w.h_2"] = "00005800DJ", ++ ["ext.w.b_2"] = "00005c00DJ", ++ ++ ["add.w_3"] = "00100000DJK", ++ ["add.d_3"] = "00108000DJK", ++ ["sub.w_3"] = "00110000DJK", ++ ["sub.d_3"] = "00118000DJK", ++ slt_3 = "00120000DJK", ++ sltu_3 = "00128000DJK", ++ maskeqz_3 = "00130000DJK", ++ masknez_3 = "00138000DJK", ++ ++ nor_3 = "00140000DJK", ++ and_3 = "00148000DJK", ++ or_3 = "00150000DJK", ++ xor_3 = "00158000DJK", ++ orn_3 = "00160000DJK", ++ andn_3 = "00168000DJK", ++ ["sll.w_3"] = "00170000DJK", ++ ["srl.w_3"] = "00178000DJK", ++ ["sra.w_3"] = "00180000DJK", ++ ["sll.d_3"] = "00188000DJK", ++ ["srl.d_3"] = "00190000DJK", ++ ["sra.d_3"] = "00198000DJK", ++ ["rotr.w_3"] = "001b0000DJK", ++ ["rotr.d_3"] = "001b8000DJK", ++ ["mul.w_3"] = "001c0000DJK", ++ ["mulh.w_3"] = "001c8000DJK", ++ ["mulh.wu_3"] = "001d0000DJK", ++ ["mul.d_3"] = "001d8000DJK", ++ ["mulh.d_3"] = "001e0000DJK", ++ ["mulh.du_3"] = "001e8000DJK", ++ ["mulw.d.w_3"] = "001f0000DJK", ++ ["mulw.d.wu_3"] = "001f8000DJK", ++ ++ ["fabs.h_2"] = "01140000FG", ++ ["fabs.s_2"] = "01140400FG", ++ ["fabs.d_2"] = "01140800FG", ++ ["fneg.h_2"] = "01141000FG", ++ ["fneg.s_2"] = "01141400FG", ++ ["fneg.d_2"] = "01141800FG", ++ ["flogb.h_2"] = "01142000FG", ++ ["flogb.s_2"] = "01142400FG", ++ ["flogb.d_2"] = "01142800FG", ++ ["fclass.h_2"] = "01143000FG", ++ ["fclass.s_2"] = "01143400FG", ++ ["fclass.d_2"] = "01143800FG", ++ ["fsqrt.h_2"] = "01144000FG", ++ ["fsqrt.s_2"] = "01144400FG", ++ ["fsqrt.d_2"] = "01144800FG", ++ ["frecip.h_2"] = "01145000FG", ++ ["frecip.s_2"] = "01145400FG", ++ ["frecip.d_2"] = "01145800FG", ++ ["frsqrt.h_2"] = "01146000FG", ++ ["frsqrt.s_2"] = "01146400FG", ++ ["frsqrt.d_2"] = "01146800FG", ++ ["frecipe.h_2"] = "01147000FG", ++ ["frecipe.s_2"] = "01147400FG", ++ ["frecipe.d_2"] = "01147800FG", ++ ["frsqrte.h_2"] = "01148000FG", ++ ["frsqrte.s_2"] = "01148400FG", ++ ["frsqrte.d_2"] = "01148800FG", ++ ++ ["fmov.h_2"] = "01149000FG", ++ ["fmov.s_2"] = "01149400FG", ++ ["fmov.d_2"] = "01149800FG", ++ ["movgr2fr.h_2"] = "0114a000FJ", ++ ["movgr2fr.w_2"] = "0114a400FJ", ++ ["movgr2fr.d_2"] = "0114a800FJ", ++ ["movgr2frh.w_2"] = "0114ac00FJ", ++ ["movfr2gr.h_2"] = "0114b000DG", ++ ["movfr2gr.s_2"] = "0114b400DG", ++ ["movfr2gr.d_2"] = "0114b800DG", ++ ["movfrh2gr.s_2"] = "0114bc00DG", ++ movgr2fcsr_2 = "0114c000SG", ++ movfcsr2gr_2 = "0114c800FR", ++ movfr2cf_2 = "0114d000EG", ++ movcf2fr_2 = "0114d400FA", ++ movgr2cf_2 = "0114d800EG", ++ movcf2gr_2 = "0114dc00DA", ++ ["fcvt.ld.d_2"] = "0114e000FG", ++ ["fcvt.ud.d_2"] = "0114e400FG", ++ ["fcvt.s.d_2"] = "01191800FG", ++ ["fcvt.d.s_2"] = "01192400FG", ++ ["ftintrm.w.s_2"] = "011a0400FG", ++ ["ftintrm.w.d_2"] = "011a0800FG", ++ ["ftintrm.l.s_2"] = "011a2400FG", ++ ["ftintrm.l.d_2"] = "011a2800FG", ++ ["ftintrp.w.s_2"] = "011a4400FG", ++ ["ftintrp.w.d_2"] = "011a4800FG", ++ ["ftintrp.l.s_2"] = "011a6400FG", ++ ["ftintrp.l.d_2"] = "011a6800FG", ++ ["ftintrz.w.s_2"] = "011a8400FG", ++ ["ftintrz.w.d_2"] = "011a8800FG", ++ ["ftintrz.l.s_2"] = "011aa400FG", ++ ["ftintrz.l.d_2"] = "011aa800FG", ++ ["ftintrne.w.s_2"] = "011ac400FG", ++ ["ftintrne.w.d_2"] = "011ac800FG", ++ ["ftintrne.l.s_2"] = "011ae400FG", ++ ["ftintrne.l.d_2"] = "011ae800FG", ++ ["ftint.w.s_2"] = "011b0400FG", ++ ["ftint.w.d_2"] = "011b0800FG", ++ ["ftint.l.s_2"] = "011b2400FG", ++ ["ftint.l.d_2"] = "011b2800FG", ++ ["ffint.s.w_2"] = "011d1000FG", ++ ["ffint.s.l_2"] = "011d1800FG", ++ ["ffint.d.w_2"] = "011d2000FG", ++ ["ffint.d.l_2"] = "011d2800FG", ++ ["frint.s_2"] = "011e4400FG", ++ ["frint.d_2"] = "011e4800FG", ++ ++ ["fadd.h_3"] = "01000000FGH", ++ ["fadd.s_3"] = "01008000FGH", ++ ["fadd.d_3"] = "01010000FGH", ++ ["fsub.h_3"] = "01020000FGH", ++ ["fsub.s_3"] = "01028000FGH", ++ ["fsub.d_3"] = "01030000FGH", ++ ["fmul.h_3"] = "01040000FGH", ++ ["fmul.s_3"] = "01048000FGH", ++ ["fmul.d_3"] = "01050000FGH", ++ ["fdiv.h_3"] = "01060000FGH", ++ ["fdiv.s_3"] = "01068000FGH", ++ ["fdiv.d_3"] = "01070000FGH", ++ ["fmax.h_3"] = "01080000FGH", ++ ["fmax.s_3"] = "01088000FGH", ++ ["fmax.d_3"] = "01090000FGH", ++ ["fmin.h_3"] = "010a0000FGH", ++ ["fmin.s_3"] = "010a8000FGH", ++ ["fmin.d_3"] = "010b0000FGH", ++ ["fmaxa.h_3"] = "010c0000FGH", ++ ["fmaxa.s_3"] = "010c8000FGH", ++ ["fmaxa.d_3"] = "010d0000FGH", ++ ["fmina.h_3"] = "010e0000FGH", ++ ["fmina.s_3"] = "010e8000FGH", ++ ["fmina.d_3"] = "010f0000FGH", ++ ["fscaleb.h_3"] = "01100000FGH", ++ ["fscaleb.s_3"] = "01108000FGH", ++ ["fscaleb.d_3"] = "01110000FGH", ++ ["fcopysign.h_3"] = "01120000FGH", ++ ["fcopysign.s_3"] = "01128000FGH", ++ ["fcopysign.d_3"] = "01130000FGH", ++ ++ ["fmadd.s_4"] = "08100000FGHi", ++ ["fmadd.d_4"] = "08200000FGHi", ++ ["fnmadd.d_4"] = "08a00000FGHi", ++ ["fmsub.s_4"] = "08500000FGHi", ++ ["fmsub.d_4"] = "08600000FGHi", ++ ["fnmsub.d_4"] = "08e00000FGHi", ++ ++ ["alsl.w_4"] = "00040000DJKQ", ++ ["alsl.wu_4"] = "00060000DJKQ", ++ ["alsl.d_4"] = "002c0000DJKQ", ++ ["bytepick.w_4"] = "00080000DJKQ", ++ ["bytepick.d_4"] = "000c0000DJKB", ++ ++ ["div.w_3"] = "00200000DJK", ++ ["mod.w_3"] = "00208000DJK", ++ ["div.wu_3"] = "00210000DJK", ++ ["mod.wu_3"] = "00218000DJK", ++ ["div.d_3"] = "00220000DJK", ++ ["mod.d_3"] = "00228000DJK", ++ ["div.du_3"] = "00230000DJK", ++ ["mod.du_3"] = "00238000DJK", ++ ["crc.w.b.w_3"] = "00240000DJK", ++ ["crc.w.h.w_3"] = "00248000DJK", ++ ["crc.w.w.w_3"] = "00250000DJK", ++ ["crc.w.d.w_3"] = "00258000DJK", ++ ["crcc.w.b.w_3"] = "00260000DJK", ++ ["crcc.w.h.w_3"] = "00268000DJK", ++ ["crcc.w.w.w_3"] = "00270000DJK", ++ ["crcc.w.d.w_3"] = "00278000DJK", ++ ++ break_1 = "002a0000C", ++ syscall_1 = "002b0000C", ++ ++ ["slli.w_3"] = "00408000DJU", ++ ["slli.d_3"] = "00410000DJV", ++ ["srli.w_3"] = "00448000DJU", ++ ["srli.d_3"] = "00450000DJV", ++ ["srai.w_3"] = "00488000DJU", ++ ["srai.d_3"] = "00490000DJV", ++ ["rotri.w_3"] = "004c8000DJU", ++ ["rotri.d_3"] = "004d0000DJV", ++ ++ ["bstrins.w_4"] = "00600000DJMU", ++ ["bstrpick.w_4"] = "00608000DJMU", ++ ["bstrins.d_4"] = "00800000DJNV", ++ ["bstrpick.d_4"] = "00c00000DJNV", ++ slti_3 = "02000000DJX", ++ sltui_3 = "02400000DJX", ++ ["addi.w_3"] = "02800000DJX", ++ ["addi.d_3"] = "02c00000DJX", ++ ["lu52i.d_3"] = "03000000DJX", ++ andi_3 = "03400000DJT", ++ ori_3 = "03800000DJT", ++ xori_3 = "03c00000DJT", ++ ["lu12i.w_2"] = "14000000DZ", ++ ["lu32i.d_2"] = "16000000DZ", ++ pcaddi_2 = "18000000DZ", ++ pcalau12i_2 = "1a000000DZ", ++ pcaddu12i_2 = "1c000000DZ", ++ pcaddu18i_2 = "1e000000DZ", ++ ++ ["ldx.b_3"] = "38000000DJK", ++ ["ldx.h_3"] = "38040000DJK", ++ ["ldx.w_3"] = "38080000DJK", ++ ["ldx.d_3"] = "380c0000DJK", ++ ["stx.b_3"] = "38100000DJK", ++ ["stx.h_3"] = "38140000DJK", ++ ["stx.w_3"] = "38180000DJK", ++ ["stx.d_3"] = "381c0000DJK", ++ ["ldx.bu_3"] = "38200000DJK", ++ ["ldx.hu_3"] = "38240000DJK", ++ ["ldx.wu_3"] = "38280000DJK", ++ ["fldx.s_3"] = "38300000FJK", ++ ["fldx.d_3"] = "38340000FJK", ++ ["fstx.s_3"] = "38380000FJK", ++ ["fstx.d_3"] = "383c0000FJK", ++ ["fldgt.s_3"] = "38740000FJK", ++ ["fldgt.d_3"] = "38748000FJK", ++ ["fldle.s_3"] = "38750000FJK", ++ ["fldle.d_3"] = "38758000FJK", ++ ["fstgt.s_3"] = "38760000FJK", ++ ["fstgt.d_3"] = "38768000FJK", ++ ["fstle.s_3"] = "38770000FJK", ++ ["fstle.d_3"] = "38778000FJK", ++ ["ldgt.b_3"] = "38780000DJK", ++ ["ldgt.h_3"] = "38788000DJK", ++ ["ldgt.w_3"] = "38790000DJK", ++ ["ldgt.d_3"] = "38798000DJK", ++ ["ldle.b_3"] = "387a0000DJK", ++ ["ldle.h_3"] = "387a8000DJK", ++ ["ldle.w_3"] = "387b0000DJK", ++ ["ldle.d_3"] = "387b8000DJK", ++ ["stgt.b_3"] = "387c0000DJK", ++ ["stgt.h_3"] = "387c8000DJK", ++ ["stgt.w_3"] = "387d0000DJK", ++ ["stgt.d_3"] = "387d8000DJK", ++ ["stle.b_3"] = "387e0000DJK", ++ ["stle.h_3"] = "387e8000DJK", ++ ["stle.w_3"] = "387f0000DJK", ++ ["stle.d_3"] = "387f8000DJK", ++ ++ ["ll.w_3"] = "20000000DJW", ++ ["sc.w_3"] = "21000000DJW", ++ ["ll.d_3"] = "22000000DJW", ++ ["sc.d_3"] = "23000000DJW", ++ ["ldptr.w_3"] = "24000000DJW", ++ ["stptr.w_3"] = "25000000DJW", ++ ["ldptr.d_3"] = "26000000DJW", ++ ["stptr.d_3"] = "27000000DJW", ++ ++ ["ld.b_2"] = "28000000Do", ++ ["ld.h_2"] = "28400000Do", ++ ["ld.w_2"] = "28800000Do", ++ ["ld.d_2"] = "28c00000Do", ++ ["st.b_2"] = "29000000Do", ++ ["st.h_2"] = "29400000Do", ++ ["st.w_2"] = "29800000Do", ++ ["st.d_2"] = "29c00000Do", ++ ["ld.bu_2"] = "2a000000Do", ++ ["ld.hu_2"] = "2a400000Do", ++ ["ld.wu_2"] = "2a800000Do", ++ ["ldx.d_3"] = "380c0000DJK", ++ ["stx.d_3"] = "381c0000DJK", ++ ["fld.s_2"] = "2b000000Fo", ++ ["fst.s_2"] = "2b400000Fo", ++ ["fld.d_2"] = "2b800000Fo", ++ ["fst.d_2"] = "2bc00000Fo", ++ ++ ["fcmp.caf.s_3"] = "0c100000EGH", ++ ["fcmp.saf.s_3"] = "0c108000EGH", ++ ["fcmp.clt.s_3"] = "0c110000EGH", ++ ["fcmp.slt.s_3"] = "0c118000EGH", ++ ["fcmp.ceq.s_3"] = "0c120000EGH", ++ ["fcmp.seq.s_3"] = "0c128000EGH", ++ ["fcmp.cle.s_3"] = "0c130000EGH", ++ ["fcmp.sle.s_3"] = "0c138000EGH", ++ ["fcmp.cun.s_3"] = "0c140000EGH", ++ ["fcmp.sun.s_3"] = "0c148000EGH", ++ ["fcmp.cult.s_3"] = "0c150000EGH", ++ ["fcmp.sult.s_3"] = "0c158000EGH", ++ ["fcmp.cueq.s_3"] = "0c160000EGH", ++ ["fcmp.sueq.s_3"] = "0c168000EGH", ++ ["fcmp.cule.s_3"] = "0c170000EGH", ++ ["fcmp.sule.s_3"] = "0c178000EGH", ++ ["fcmp.cne.s_3"] = "0c180000EGH", ++ ["fcmp.sne.s_3"] = "0c188000EGH", ++ ["fcmp.cor.s_3"] = "0c1a0000EGH", ++ ["fcmp.sor.s_3"] = "0c1a8000EGH", ++ ["fcmp.cune.s_3"] = "0c1c0000EGH", ++ ["fcmp.sune.s_3"] = "0c1c8000EGH", ++ ["fcmp.caf.d_3"] = "0c200000EGH", ++ ["fcmp.saf.d_3"] = "0c208000EGH", ++ ["fcmp.clt.d_3"] = "0c210000EGH", ++ ["fcmp.slt.d_3"] = "0c218000EGH", ++ ["fcmp.ceq.d_3"] = "0c220000EGH", ++ ["fcmp.seq.d_3"] = "0c228000EGH", ++ ["fcmp.cle.d_3"] = "0c230000EGH", ++ ["fcmp.sle.d_3"] = "0c238000EGH", ++ ["fcmp.cun.d_3"] = "0c240000EGH", ++ ["fcmp.sun.d_3"] = "0c248000EGH", ++ ["fcmp.cult.d_3"] = "0c250000EGH", ++ ["fcmp.sult.d_3"] = "0c258000EGH", ++ ["fcmp.cueq.d_3"] = "0c260000EGH", ++ ["fcmp.sueq.d_3"] = "0c268000EGH", ++ ["fcmp.cule.d_3"] = "0c270000EGH", ++ ["fcmp.sule.d_3"] = "0c278000EGH", ++ ["fcmp.cne.d_3"] = "0c280000EGH", ++ ["fcmp.sne.d_3"] = "0c288000EGH", ++ ["fcmp.cor.d_3"] = "0c2a0000EGH", ++ ["fcmp.sor.d_3"] = "0c2a8000EGH", ++ ["fcmp.cune.d_3"] = "0c2c0000EGH", ++ ["fcmp.sune.d_3"] = "0c2c8000EGH", ++ ++ fsel_4 = "0d000000FGHI", ++ ++ ["addu16i.d_3"] = "10000000DJY", ++ beqz_2 = "40000000JL", ++ bnez_2 = "44000000JL", ++ bceqz_2 = "48000000AL", ++ bcnez_2 = "48000100AL", ++ jirl_3 = "4c000000DJa", ++ b_1 = "50000000P", ++ bl_1 = "54000000P", ++ beq_3 = "58000000JDO", ++ bne_3 = "5c000000JDO", ++ blt_3 = "60000000JDO", ++ bge_3 = "64000000JDO", ++ bltu_3 = "68000000JDO", ++ bgeu_3 = "6c000000JDO", ++} ++ ++------------------------------------------------------------------------------ ++ ++local function parse_gpr(expr) ++ local tname, ovreg = match(expr, "^([%w_]+):(r[1-3]?[0-9])$") ++ local tp = map_type[tname or expr] ++ if tp then ++ local reg = ovreg or tp.reg ++ if not reg then ++ werror("type `"..(tname or expr).."' needs a register override") ++ end ++ expr = reg ++ end ++ local r = match(expr, "^r([1-3]?[0-9])$") ++ if r then ++ r = tonumber(r) ++ if r <= 31 then return r, tp end ++ end ++ werror("bad register name `"..expr.."'") ++end ++ ++local function parse_fpr(expr) ++ local r = match(expr, "^f([1-3]?[0-9])$") ++ if r then ++ r = tonumber(r) ++ if r <= 31 then return r end ++ end ++ werror("bad register name `"..expr.."'") ++end ++ ++local function parse_fcsr(expr) ++ local r = match(expr, "^fcsr([0-3])$") ++ if r then ++ r = tonumber(r) ++ return r ++ end ++ werror("bad register name `"..expr.."'") ++end ++ ++local function parse_fcc(expr) ++ local r = match(expr, "^fcc([0-7])$") ++ if r then ++ r = tonumber(r) ++ return r ++ end ++ werror("bad register name `"..expr.."'") ++end ++ ++local function parse_imm(imm, bits, shift, scale, signed, action) ++ local n = tonumber(imm) ++ if n then ++ local m = sar(n, scale) ++ if shl(m, scale) == n then ++ if signed then ++ local s = sar(m, bits-1) ++ if s == 0 or s == 1 then return shl(m, shift) ++ elseif s == -1 then return shl(m + shl(1, bits), shift) end ++ else ++ if sar(m, bits) == 0 then return shl(m, shift) end ++ end ++ end ++ werror("out of range immediate1 `"..imm.."'") ++ elseif match(imm, "^[rf]([1-3]?[0-9])$") or ++ match(imm, "^([%w_]+):([rf][1-3]?[0-9])$") then ++ werror("expected immediate operand, got register") ++ else ++ waction(action or "IMM", ++ (signed and 32768 or 0)+shl(scale, 10)+shl(bits, 5)+shift, imm) ++ return 0 ++ end ++end ++ ++local function parse_imm21or26(imm, i) ++ local n = tonumber(imm) ++ if n then ++ -- signed ++ local m = sar(n, 0) ++ if shl(m, 0) == n then ++ local s = sar(m, i-1) ++ if s == 0 then ++ return shl(sub(m, 1, 16), 10) + shl(sub(m, 17, i), 0) ++ elseif s == -1 then ++ return shl(sub(m, 1, 16), 10) + shl(sub(m, 17, i), 0) ++ end ++ end ++ werror("out of range immediate2 `"..imm.."'") ++ else ++ waction("IMM2", 0, imm) ++ return 0 ++ end ++end ++ ++local function parse_disp(disp) ++ local imm, reg = match(disp, "^(.*)%(([%w_:]+)%)$") ++ if imm then ++ local r = shl(parse_gpr(reg), 5) ++ local extname = match(imm, "^extern%s+(%S+)$") ++ if extname then ++ waction("REL_EXT", map_extern[extname], nil, 1) ++ return r ++ else ++ return r + parse_imm(imm, 12, 10, 0, true) ++ end ++ end ++ local reg, tailr = match(disp, "^([%w_:]+)%s*(.*)$") ++ if reg and tailr ~= "" then ++ local r, tp = parse_gpr(reg) ++ if tp then ++ waction("IMM", 32768+12*32+10, format(tp.ctypefmt, tailr)) ++ return shl(r, 5) ++ end ++ end ++ werror("bad displacement `"..disp.."'") ++end ++ ++local function parse_label(label, def) ++ local prefix = sub(label, 1, 2) ++ -- =>label (pc label reference) ++ if prefix == "=>" then ++ return "PC", 0, sub(label, 3) ++ end ++ -- ->name (global label reference) ++ if prefix == "->" then ++ return "LG", map_global[sub(label, 3)] ++ end ++ if def then ++ -- [1-9] (local label definition) ++ if match(label, "^[1-9]$") then ++ return "LG", 10+tonumber(label) ++ end ++ else ++ -- [<>][1-9] (local label reference) ++ local dir, lnum = match(label, "^([<>])([1-9])$") ++ if dir then -- Fwd: 1-9, Bkwd: 11-19. ++ return "LG", lnum + (dir == ">" and 0 or 10) ++ end ++ -- extern label (extern label reference) ++ local extname = match(label, "^extern%s+(%S+)$") ++ if extname then ++ return "EXT", map_extern[extname] ++ end ++ end ++ werror("bad label `"..label.."'") ++end ++ ++local function branch_type(op) ++ if shr(op, 26) == 0x16 or shr(op, 26) == 0x17 or shr(op, 26) == 0x18 or ++ shr(op, 26) == 0x19 or shr(op, 26) == 0x1a or shr(op, 26) == 0x1b then ++ return 0 -- BEQ, BNE, BLT, BGE, BLTU, BGEU ++ elseif shr(op, 26) == 0x10 or shr(op, 26) == 0x11 or shr(op, 26) == 0x12 then ++ return 0x5000 -- BEQZ, BNEZ, BCEQZ, BCNEZ ++ elseif band(op, 0xf8000000) == 0x50000000 then return 0xa000 --B, BL ++ else ++ assert(false, "unknown branch type") ++ end ++end ++ ++------------------------------------------------------------------------------ ++ ++-- Handle opcodes defined with template strings. ++map_op[".template__"] = function(params, template, nparams) ++ if not params then return sub(template, 9) end ++ local op = tonumber(sub(template, 1, 8), 16) ++ local n = 1 ++ ++ -- Limit number of section buffer positions used by a single dasm_put(). ++ -- A single opcode needs a maximum of 2 positions (ins/ext). ++ if secpos+2 > maxsecpos then wflush() end ++ local pos = wpos() ++ ++ -- Process each character. ++ for p in gmatch(sub(template, 9), ".") do ++ if p == "D" then ++ op = op + shl(parse_gpr(params[n]), 0); n = n + 1 ++ elseif p == "J" then ++ op = op + shl(parse_gpr(params[n]), 5); n = n + 1 ++ elseif p == "K" then ++ op = op + shl(parse_gpr(params[n]), 10); n = n + 1 ++ elseif p == "F" then ++ op = op + shl(parse_fpr(params[n]), 0); n = n + 1 ++ elseif p == "G" then ++ op = op + shl(parse_fpr(params[n]), 5); n = n + 1 ++ elseif p == "H" then ++ op = op + shl(parse_fpr(params[n]), 10); n = n + 1 ++ elseif p == "i" then ++ op = op + shl(parse_fpr(params[n]), 15); n = n + 1 ++ elseif p == "I" then ++ op = op + shl(parse_fcc(params[n]), 15); n = n + 1 ++ elseif p == "A" then ++ op = op + shl(parse_fcc(params[n]), 5); n = n + 1 ++ elseif p == "E" then ++ op = op + shl(parse_fcc(params[n]), 0); n = n + 1 ++ elseif op == "S" then ++ op = op + shl(parse_fcsr(params[n]), 0); n = n + 1 ++ elseif op == "R" then ++ op = op + shl(parse_fcsr(params[n]), 5); n = n + 1 ++ elseif p == "U" then ++ op = op + parse_imm(params[n], 5, 10, 0, false); n = n + 1 ++ elseif p == "V" then ++ op = op + parse_imm(params[n], 6, 10, 0, false); n = n + 1 ++ elseif p == "W" then ++ op = op + parse_imm(params[n], 14, 10, 0, true); n = n + 1 ++ elseif p == "X" then ++ op = op + parse_imm(params[n], 12, 10, 0, true); n = n + 1 ++ elseif p == "o" then ++ op = op + parse_disp(params[n]); n = n + 1 ++ elseif p == "Y" then ++ op = op + parse_imm(params[n], 16, 10, 0, true); n = n + 1 ++ elseif p == "Z" then ++ op = op + parse_imm(params[n], 20, 5, 0, true); n = n + 1 ++ elseif p == "T" then ++ op = op + parse_imm(params[n], 12, 10, 0, false); n = n + 1 ++ elseif p == "C" then ++ op = op + parse_imm(params[n], 15, 0, 0, false); n = n + 1 ++ elseif p == "Q" then ++ op = op + parse_imm(params[n], 2, 15, 0, false); n = n + 1 ++ elseif p == "B" then ++ op = op + parse_imm(params[n], 3, 15, 0, false); n = n + 1 ++ elseif p == "M" then ++ op = op + parse_imm(params[n], 5, 16, 0, false); n = n + 1 ++ elseif p == "N" then ++ op = op + parse_imm(params[n], 6, 16, 0, false); n = n + 1 ++ elseif p == "O" or p == "L" or p == "P" then ++ local mode, m, s = parse_label(params[n], false) ++ local v = branch_type(op) ++ waction("REL_"..mode, m+v, s, 1) ++ n = n + 1 ++ elseif p == "a" then ++ op = op + parse_imm(params[n], 16, 10, 0, true); n = n + 1 ++ else ++ assert(false) ++ end ++ end ++ wputpos(pos, op) ++end ++ ++------------------------------------------------------------------------------ ++ ++-- Pseudo-opcode to mark the position where the action list is to be emitted. ++map_op[".actionlist_1"] = function(params) ++ if not params then return "cvar" end ++ local name = params[1] -- No syntax check. You get to keep the pieces. ++ wline(function(out) writeactions(out, name) end) ++end ++ ++-- Pseudo-opcode to mark the position where the global enum is to be emitted. ++map_op[".globals_1"] = function(params) ++ if not params then return "prefix" end ++ local prefix = params[1] -- No syntax check. You get to keep the pieces. ++ wline(function(out) writeglobals(out, prefix) end) ++end ++ ++-- Pseudo-opcode to mark the position where the global names are to be emitted. ++map_op[".globalnames_1"] = function(params) ++ if not params then return "cvar" end ++ local name = params[1] -- No syntax check. You get to keep the pieces. ++ wline(function(out) writeglobalnames(out, name) end) ++end ++ ++-- Pseudo-opcode to mark the position where the extern names are to be emitted. ++map_op[".externnames_1"] = function(params) ++ if not params then return "cvar" end ++ local name = params[1] -- No syntax check. You get to keep the pieces. ++ wline(function(out) writeexternnames(out, name) end) ++end ++ ++------------------------------------------------------------------------------ ++ ++-- Label pseudo-opcode (converted from trailing colon form). ++map_op[".label_1"] = function(params) ++ if not params then return "[1-9] | ->global | =>pcexpr" end ++ if secpos+1 > maxsecpos then wflush() end ++ local mode, n, s = parse_label(params[1], true) ++ if mode == "EXT" then werror("bad label definition") end ++ waction("LABEL_"..mode, n, s, 1) ++end ++ ++------------------------------------------------------------------------------ ++ ++-- Pseudo-opcodes for data storage. ++map_op[".long_*"] = function(params) ++ if not params then return "imm..." end ++ for _,p in ipairs(params) do ++ local n = tonumber(p) ++ if not n then werror("bad immediate `"..p.."'") end ++ if n < 0 then n = n + 2^32 end ++ wputw(n) ++ if secpos+2 > maxsecpos then wflush() end ++ end ++end ++ ++-- Alignment pseudo-opcode. ++map_op[".align_1"] = function(params) ++ if not params then return "numpow2" end ++ if secpos+1 > maxsecpos then wflush() end ++ local align = tonumber(params[1]) ++ if align then ++ local x = align ++ -- Must be a power of 2 in the range (2 ... 256). ++ for i=1,8 do ++ x = x / 2 ++ if x == 1 then ++ waction("ALIGN", align-1, nil, 1) -- Action byte is 2**n-1. ++ return ++ end ++ end ++ end ++ werror("bad alignment") ++end ++ ++------------------------------------------------------------------------------ ++ ++-- Pseudo-opcode for (primitive) type definitions (map to C types). ++map_op[".type_3"] = function(params, nparams) ++ if not params then ++ return nparams == 2 and "name, ctype" or "name, ctype, reg" ++ end ++ local name, ctype, reg = params[1], params[2], params[3] ++ if not match(name, "^[%a_][%w_]*$") then ++ werror("bad type name `"..name.."'") ++ end ++ local tp = map_type[name] ++ if tp then ++ werror("duplicate type `"..name.."'") ++ end ++ -- Add #type to defines. A bit unclean to put it in map_archdef. ++ map_archdef["#"..name] = "sizeof("..ctype..")" ++ -- Add new type and emit shortcut define. ++ local num = ctypenum + 1 ++ map_type[name] = { ++ ctype = ctype, ++ ctypefmt = format("Dt%X(%%s)", num), ++ reg = reg, ++ } ++ wline(format("#define Dt%X(_V) (int)(ptrdiff_t)&(((%s *)0)_V)", num, ctype)) ++ ctypenum = num ++end ++map_op[".type_2"] = map_op[".type_3"] ++ ++-- Dump type definitions. ++local function dumptypes(out, lvl) ++ local t = {} ++ for name in pairs(map_type) do t[#t+1] = name end ++ sort(t) ++ out:write("Type definitions:\n") ++ for _,name in ipairs(t) do ++ local tp = map_type[name] ++ local reg = tp.reg or "" ++ out:write(format(" %-20s %-20s %s\n", name, tp.ctype, reg)) ++ end ++ out:write("\n") ++end ++ ++------------------------------------------------------------------------------ ++ ++-- Set the current section. ++function _M.section(num) ++ waction("SECTION", num) ++ wflush(true) -- SECTION is a terminal action. ++end ++ ++------------------------------------------------------------------------------ ++ ++-- Dump architecture description. ++function _M.dumparch(out) ++ out:write(format("DynASM %s version %s, released %s\n\n", ++ _info.arch, _info.version, _info.release)) ++ dumpactions(out) ++end ++ ++-- Dump all user defined elements. ++function _M.dumpdef(out, lvl) ++ dumptypes(out, lvl) ++ dumpglobals(out, lvl) ++ dumpexterns(out, lvl) ++end ++ ++------------------------------------------------------------------------------ ++ ++-- Pass callbacks from/to the DynASM core. ++function _M.passcb(wl, we, wf, ww) ++ wline, werror, wfatal, wwarn = wl, we, wf, ww ++ return wflush ++end ++ ++-- Setup the arch-specific module. ++function _M.setup(arch, opt) ++ g_arch, g_opt = arch, opt ++end ++ ++-- Merge the core maps and the arch-specific maps. ++function _M.mergemaps(map_coreop, map_def) ++ setmetatable(map_op, { __index = map_coreop }) ++ setmetatable(map_def, { __index = map_archdef }) ++ return map_op, map_def ++end ++ ++return _M ++ ++------------------------------------------------------------------------------ ++ +diff --git a/src/Makefile b/src/Makefile +index 3d2ba4575..db66fc577 100644 +--- a/src/Makefile ++++ b/src/Makefile +@@ -53,6 +53,7 @@ CCOPT_arm64= + CCOPT_ppc= + CCOPT_mips= + CCOPT_riscv64= ++CCOPT_loongarch64= -fwrapv + # + #CCDEBUG= + # Uncomment the next line to generate debug information: +@@ -247,6 +248,10 @@ else + ifneq (,$(findstring LJ_TARGET_S390X ,$(TARGET_TESTARCH))) + TARGET_LJARCH= s390x + else ++ifneq (,$(findstring LJ_TARGET_LOONGARCH64 ,$(TARGET_TESTARCH))) ++ TARGET_ARCH= -DLJ_ARCH_ENDIAN=LUAJIT_LE ++ TARGET_LJARCH= loongarch64 ++else + ifneq (,$(findstring LJ_TARGET_ARM64 ,$(TARGET_TESTARCH))) + ifneq (,$(findstring __AARCH64EB__ ,$(TARGET_TESTARCH))) + TARGET_ARCH= -D__AARCH64EB__=1 +@@ -283,6 +288,7 @@ endif + endif + endif + endif ++endif + + ifneq (,$(findstring LJ_TARGET_PS3 1,$(TARGET_TESTARCH))) + TARGET_SYS= PS3 +@@ -346,7 +352,9 @@ else + # Find out whether the target toolchain always generates unwind tables. + TARGET_TESTUNWIND=$(shell exec 2>/dev/null; echo 'extern void b(void);int a(void){b();return 0;}' | $(TARGET_CC) -c -x c - -o tmpunwind.o && { grep -qa -e eh_frame -e __unwind_info tmpunwind.o || grep -qU -e eh_frame -e __unwind_info tmpunwind.o; } && echo E; rm -f tmpunwind.o) + ifneq (,$(findstring E,$(TARGET_TESTUNWIND))) +- TARGET_XCFLAGS+= -DLUAJIT_UNWIND_EXTERNAL ++ ifeq (,$(findstring LJ_TARGET_LOONGARCH64 ,$(TARGET_TESTARCH))) ++ TARGET_XCFLAGS+= -DLUAJIT_UNWIND_EXTERNAL ++ endif + endif + endif + ifneq (SunOS,$(TARGET_SYS)) +diff --git a/src/host/buildvm.c b/src/host/buildvm.c +index ff4e01e11..70452d44f 100644 +--- a/src/host/buildvm.c ++++ b/src/host/buildvm.c +@@ -71,6 +71,8 @@ static int collect_reloc(BuildCtx *ctx, uint8_t *addr, int idx, int type); + #include "../dynasm/dasm_s390x.h" + #elif LJ_TARGET_RISCV64 + #include "../dynasm/dasm_riscv.h" ++#elif LJ_TARGET_LOONGARCH64 ++#include "../dynasm/dasm_loongarch64.h" + #else + #error "No support for this architecture (yet)" + #endif +diff --git a/src/host/buildvm_asm.c b/src/host/buildvm_asm.c +index 0f32546fc..256591492 100644 +--- a/src/host/buildvm_asm.c ++++ b/src/host/buildvm_asm.c +@@ -229,6 +229,15 @@ static void emit_asm_wordreloc(BuildCtx *ctx, uint8_t *p, int n, + ins, sym); + exit(1); + } ++#elif LJ_TARGET_LOONGARCH64 ++ if ((ins >> 26) == 21) { ++ fprintf(ctx->fp, "\tbl %s\n", sym); ++ } else { ++ fprintf(stderr, ++ "Error: unsupported opcode %08x for %s symbol relocation.\n", ++ ins, sym); ++ exit(1); ++ } + #else + #error "missing relocation support for this architecture" + #endif +diff --git a/src/jit/bcsave.lua b/src/jit/bcsave.lua +index 81cbb0a11..cf4fc9974 100644 +--- a/src/jit/bcsave.lua ++++ b/src/jit/bcsave.lua +@@ -104,6 +104,7 @@ local map_arch = { + mips64r6el = { e = "le", b = 64, m = 8, f = 0xa0000407, }, + s390x = { e = "be", b = 64, m = 22, }, + riscv64 = { e = "le", b = 64, m = 243, f = 0x00000004, }, ++ loongarch64 = { e = "le", b = 64, m = 258, f = 0x3}, + } + + local map_os = { +diff --git a/src/jit/dis_loongarch64.lua b/src/jit/dis_loongarch64.lua +new file mode 100644 +index 000000000..0fe0266be +--- /dev/null ++++ b/src/jit/dis_loongarch64.lua +@@ -0,0 +1,697 @@ ++---------------------------------------------------------------------------- ++-- LuaJIT LoongArch64 disassembler module. ++-- ++-- Copyright (C) 2005-2022 Mike Pall. All rights reserved. ++-- Released under the MIT/X license. See Copyright Notice in luajit.h ++---------------------------------------------------------------------------- ++-- This is a helper module used by the LuaJIT machine code dumper module. ++-- ++-- It disassembles most LoongArch instructions. ++-- NYI: SIMD instructions. ++------------------------------------------------------------------------------ ++ ++local type = type ++local byte, format = string.byte, string.format ++local match, gmatch = string.match, string.gmatch ++local concat = table.concat ++local bit = require("bit") ++local band, bor, bnot, tohex = bit.band, bit.bor, bit.bnot, bit.tohex ++local lshift, rshift, arshift = bit.lshift, bit.rshift, bit.arshift ++ ++------------------------------------------------------------------------------ ++-- Opcode maps ++------------------------------------------------------------------------------ ++ ++local map_18_0 = { -- 18-20:0, 10-17 ++ shift = 10, mask = 255, ++ [4] = "clo.wDJ", ++ [5] = "clz.wDJ", ++ [6] = "cto.wDJ", ++ [7] = "ctz.wDJ", ++ [8] = "clo.dDJ", ++ [9] = "clz.dDJ", ++ [10] = "cto.dDJ", ++ [11] = "ctz.dDJ", ++ [12] = "revb.2hDJ", ++ [13] = "revb.4hDJ", ++ [14] = "revb.2wDJ", ++ [15] = "revb.dDJ", ++ [16] = "revh.2wDJ", ++ [17] = "revh.dDJ", ++ [18] = "bitrev.4bDJ", ++ [19] = "bitrev.8bDJ", ++ [20] = "bitrev.wDJ", ++ [21] = "bitrev.dDJ", ++ [22] = "ext.w.hDJ", ++ [23] = "ext.w.bDJ", ++} ++ ++local map_18_4 = { -- 18-20:4, 15-17 ++ shift = 15, mask = 7, ++ [0] = "add.wDJK", ++ [1] = "add.dDJK", ++ [2] = "sub.wDJK", ++ [3] = "sub.dDJK", ++ [4] = "sltDJK", ++ [5] = "sltuDJK", ++ [6] = "maskeqzDJK", ++ [7] = "masknezDJK", ++} ++ ++local map_18_5 = { -- 18-20:5, 15-17 ++ shift = 15, mask = 7, ++ [0] = "norDJK", ++ [1] = "andDJK", ++ [2] = "orDJK", ++ [3] = "xorDJK", ++ [4] = "ornDJK", ++ [5] = "andnDJK", ++ [6] = "sll.wDJK", ++ [7] = "srl.wDJK", ++} ++ ++local map_18_6 = { -- 18-20:6, 15-17 ++ shift = 15, mask = 7, ++ [0] = "sra.wDJK", ++ [1] = "sll.dDJK", ++ [2] = "srl.dDJK", ++ [3] = "sra.dDJK", ++ [6] = "rotr.wDJK", ++ [7] = "rotr.dDJK", ++} ++ ++local map_18_7 = { -- 18-20:7, 15-17 ++ shift = 15, mask = 7, ++ [0] = "mul.wDJK", ++ [1] = "mulh.wDJK", ++ [2] = "mulh.wuDJK", ++ [3] = "mul.dDJK", ++ [4] = "mulh.dDJK", ++ [5] = "mulh.duDJK", ++ [6] = "mulw.d.wDJK", ++ [7] = "mulw.d.wuDJK", ++} ++ ++local map_farith2 = { ++ shift = 10, mask = 31, ++ [1] = "fabs.sFG", ++ [2] = "fabs.dFG", ++ [5] = "fneg.sFG", ++ [6] = "fneg.dFG", ++ [9] = "flogb.sFG", ++ [10] = "flogb.dFG", ++ [13] = "fclass.sFG", ++ [14] = "fclass.dFG", ++ [17] = "fsqrt.sFG", ++ [18] = "fsqrt.dFG", ++ [21] = "frecip.sFG", ++ [22] = "frecip.dFG", ++ [25] = "frsqrt.sFG", ++ [26] = "frsqrt.dFG", ++ [29] = "frecipe.sFG", ++ [30] = "frecipe.dFG", ++ [33] = "frsqrte.sFG", ++ [34] = "frsqrte.dFG", ++} ++ ++local map_fmov = { ++ shift = 10, mask = 31, ++ [5] = "fmov.sFG", ++ [6] = "fmov.dFG", ++ [9] = "movgr2fr.wFJ", ++ [10] = "movgr2fr.dFJ", ++ [11] = "movgr2frh.wFJ", ++ [13] = "movfr2gr.sDG", ++ [14] = "movfr2gr.dDG", ++ [15] = "movfrh2gr.sDG", ++ [16] = "movgr2fcsrSJ", ++ [18] = "movfcsr2grDR", ++ [20] = { shift = 3, mask = 3, [0] = "movfr2cfEG", }, ++ [21] = { shift = 8, mask = 3, [0] = "movcf2frFA", }, ++ [22] = { shift = 3, mask = 3, [0] = "movgr2cfEJ", }, ++ [23] = { shift = 8, mask = 3, [0] = "movcf2grDA", }, ++} ++ ++local map_fconvert = { -- 15-20: 110010 ++ shift = 10, mask = 31, ++ [6] = "fcvt.s.dFG", [9] = "fcvt.d.sFG", ++} ++ ++local map_fconvert1 = { -- 15-20: 110100 ++ shift = 10, mask = 31, ++ [1] = "ftintrm.w.sFG", ++ [2] = "ftintrm.w.dFG", ++ [9] = "ftintrm.l.sFG", ++ [10] = "ftintrm.l.dFG", ++ [17] = "ftintrp.w.sFG", ++ [18] = "ftintrp.w.dFG", ++ [25] = "ftintrp.l.sFG", ++ [26] = "ftintrp.l.dFG", ++} ++ ++local map_fconvert2 = { -- 15-20: 110101 ++ shift = 10, mask = 31, ++ [1] = "ftintrz.w.sFG", ++ [2] = "ftintrz.w.dFG", ++ [9] = "ftintrz.l.sFG", ++ [10] = "ftintrz.l.dFG", ++ [17] = "ftintrne.w.sFG", ++ [18] = "ftintrne.w.dFG", ++ [25] = "ftintrne.l.sFG", ++ [26] = "ftintrne.l.dFG", ++} ++ ++local map_fconvert3 = { -- 15-20: 110110 ++ shift = 10, mask = 31, ++ [1] = "ftint.w.sFG", ++ [2] = "ftint.w.dFG", ++ [9] = "ftint.l.sFG", ++ [10] = "ftint.l.dFG", ++} ++ ++local map_fconvert4 = { -- 15-20: 111010 ++ shift = 10, mask = 31, ++ [4] = "ffint.s.wFG", ++ [6] = "ffint.s.lFG", ++ [8] = "ffint.d.wFG", ++ [10] = "ffint.d.lFG", ++} ++ ++local map_fconvert5 = { -- 15-20: 111100 ++ shift = 10, mask = 31, ++ [17] = "frint.sFG", ++ [18] = "frint.dFG", ++} ++ ++local map_farith = { -- 22-25:4, 15-21 ++ shift = 15, mask = 127, ++ [1] = "fadd.sFGH", ++ [2] = "fadd.dFGH", ++ [5] = "fsub.sFGH", ++ [6] = "fsub.dFGH", ++ [9] = "fmul.sFGH", ++ [10] = "fmul.dFGH", ++ [13] = "fdiv.sFGH", ++ [14] = "fdiv.dFGH", ++ [17] = "fmax.sFGH", ++ [18] = "fmax.dFGH", ++ [21] = "fmin.sFGH", ++ [22] = "fmin.dFGH", ++ [25] = "fmaxa.sFGH", ++ [26] = "fmaxa.dFGH", ++ [29] = "fmina.sFGH", ++ [30] = "fmina.dFGH", ++ [33] = "fscaleb.sFGH", ++ [34] = "fscaleb.dFGH", ++ [37] = "fcopysign.sFGH", ++ [38] = "fcopysign.dFGH", ++ [40] = map_farith2, [41] = map_fmov, ++ [50] = map_fconvert, [52] = map_fconvert1, ++ [53] = map_fconvert2, [54] = map_fconvert3, ++ [58] = map_fconvert4, [60] = map_fconvert5, ++} ++ ++local map_21_0 = { --21st:0, 18-20 ++ shift = 18, mask = 7, ++ [0] = map_18_0, ++ [1] = { shift = 17, mask = 1, [0] = "alsl.wDJKQ", "alsl.wuDJKQ", }, ++ [2] = {shift = 17, mask = 1, [0] = "bytepick.wDJKQ", }, ++ [3] = "bytepick.dDJKB", ++ [4] = map_18_4, ++ [5] = map_18_5, ++ [6] = map_18_6, ++ [7] = map_18_7, ++} ++ ++local map_21_1 = { --21st:1, 22nd:0, 15-20 ++ shift = 21, mask = 1, ++ [1] = { ++ shift = 18, mask = 7, ++ [0] = { ++ shift = 15, mask = 7, ++ [0] = "div.wDJK", ++ [1] = "mod.wDJK", ++ [2] = "div.wuDJK", ++ [3] = "mod.wuDJK", ++ [4] = "div.dDJK", ++ [5] = "mod.dDJK", ++ [6] = "div.duDJK", ++ [7] = "mod.duDJK", ++ }, ++ [1] = { ++ shift = 18, mask = 7, ++ [0] = "crc.w.b.wDJK", ++ [1] = "crc.w.h.wDJK", ++ [2] = "crc.w.w.wDJK", ++ [3] = "crc.w.d.wDJK", ++ [4] = "crcc.w.b.wDJK", ++ [5] = "crcc.w.h.wDJK", ++ [6] = "crcc.w.w.wDJK", ++ [7] = "crcc.w.d.wDJK", ++ }, ++ [2] = { ++ shift = 15, mask = 7, ++ [4] = breakC, [6] = syscallC, ++ }, ++ [3] = { shift = 17, mask = 1, [0] = "alsl.dDJKQ", }, ++ }, ++} ++ ++local map_22_0 = { ++ shift = 21, mask = 1, ++ [0] = map_21_0, ++ [1] = map_21_1, ++} ++ ++local map_shift = { -- 22nd:1, 21st:0 ++ shift = 16, mask = 31, ++ [0] = { shift = 15, mask = 1, [1] = "slli.wDJU", }, ++ [1] = "slli.dDJV", ++ [4] = { shift = 15, mask = 1, [1] = "srli.wDJU", }, ++ [5] = "srli.dDJV", ++ [8] = { shift = 15, mask = 1, [1] = "srai.wDJU", }, ++ [9] = "srai.dDJV", ++ [12] = { shift = 15, mask = 1, [1] = "rotri.wDJU", }, ++ [13] = "rotri.dDJV", ++} ++ ++local map_22_1 = { -- 22nd:1 ++ shift = 21, mask = 1, ++ [0] = map_shift, ++ [1] = { shift = 15, mask = 1, [0] = "bstrins.wDJMU", [1] = "bstrpick.wDJMU", }, ++} ++ ++local map_26_0 = { ++ shift = 22, mask = 15, ++ [0] = map_22_0, ++ [1] = map_22_1, ++ [2] = "bstrins.dDJNV", ++ [3] = "bstrpick.dDJNV", ++ [4] = map_farith, ++ [8] = "sltiDJX", ++ [9] = "sltuiDJX", ++ [10] = "addi.wDJX", ++ [11] = "addi.dDJX", ++ [12] = "lu52i.dDJX", ++ [13] = "andiDJT", ++ [14] = "oriDJT", ++ [15] = "xoriDJT", ++} ++ ++local map_long_i_5 = { -- Long immediate fixed-point arithmetic. ++ shift = 25, mask = 1, ++ [0] = "lu12i.wDZ", ++ [1] = "lu32i.dDZ", ++} ++ ++local map_long_i_6 = { ++ shift = 25, mask = 1, ++ [0] = "pcaddiDZ", ++ [1] = "pcalau12iDZ", ++} ++ ++local map_long_i_7 = { ++ shift = 25, mask = 1, ++ [0] = "pcaddu12iDZ", ++ [1] = "pcaddu18iDZ", ++} ++ ++local map_ldst0_14 = { ++ shift = 15, mask = 2047, ++ [0] = "ldx.bDJK", [8] = "ldx.hDJK", [16] = "ldx.wDJK", ++ [24] = "ldx.dDJK", [32] = "stx.bDJK", [40] = "stx.hDJK", ++ [48] = "stx.wDJK", [56] = "stx.dDJK", [64] = "ldx.buDJK", ++ [72] = "ldx.huDJK", [80] = "ldx.wuDJK", [96] = "fldx.sFJK", ++ [104] = "fldx.dFJK", [112] = "fstx.sFJK", [120] = "fstx.dFJK", ++ [232] = "fldgt.sFJK", [233] = "fldgt.dFJK", [234] = "fldle.sFJK", ++ [235] = "fldle.dFJK", [236] = "fstgt.sFJK", [237] = "fstgt.dFJK", ++ [238] = "fstle.sFJK", [239] = "fstle.dFJK", [240] = "ldgt.bDJK", ++ [241] = "ldgt.hDJK", [242] = "ldgt.wDJK", [243] = "ldgt.dDJK", ++ [244] = "ldle.bDJK", [245] = "ldle.hDJK", [246] = "ldle.wDJK", ++ [247] = "ldle.dDJK", [248] = "stgt.bDJK", [249] = "stgt.hDJK", ++ [250] = "stgt.wDJK", [251] = "stgt.dDJK", [252] = "stle.bDJK", ++ [253] = "stle.hDJK", [254] = "stle.wDJK", [255] = "stle.dDJK", ++} ++ ++local map_ldst1_8 = { ++ shift = 24, mask = 3, ++ [0] = "ll.wDJW", ++ [1] = "sc.wDJW", ++ [2] = "ll.dDJW", ++ [3] = "sc.dDJW", ++} ++ ++local map_ldst1_9 = { ++ shift = 24, mask = 3, ++ [0] = "ldptr.wDJW", ++ [1] = "stptr.wDJW", ++ [2] = "ldptr.dDJW", ++ [3] = "stptr.dDJW", ++} ++ ++local map_ldst1_10 = { ++ shift = 22, mask = 15, ++ [0] = "ld.bDJX", ++ [1] = "ld.hDJX", ++ [2] = "ld.wDo", ++ [3] = "ld.dDo", ++ [4] = "st.bDo", ++ [5] = "st.hDo", ++ [6] = "st.wDo", ++ [7] = "st.dDo", ++ [8] = "ld.buDo", ++ [9] = "ld.huDo", ++ [10] = "ld.wuDJX", ++ [12] = "fld.sFo", ++ [13] = "fst.sFo", ++ [14] = "fld.dFo", ++ [15] = "fst.dFo", ++} ++ ++local map_fcmp0 = { ++ shift = 15, mask = 31, ++ [0] = "fcmp.caf.sEGH", ++ [1] = "fcmp.saf.sEGH", ++ [2] = "fcmp.clt.sEGH", ++ [3] = "fcmp.slt.sEGH", ++ [4] = "fcmp.ceq.sEGH", ++ [5] = "fcmp.seq.sEGH", ++ [6] = "fcmp.cle.sEGH", ++ [7] = "fcmp.sle.sEGH", ++ [8] = "fcmp.cun.sEGH", ++ [9] = "fcmp.sun.sEGH", ++ [10] = "fcmp.cult.sEGH", ++ [11] ="fcmp.sult.sEGH", ++ [12] = "fcmp.cueq.sEGH", ++ [13] = "fcmp.sueq.sEGH", ++ [14] = "fcmp.cule.sEGH", ++ [15] = "fcmp.sule.sEGH", ++ [16] = "fcmp.cne.sEGH", ++ [17] = "fcmp.sne.sEGH", ++ [20] = "fcmp.cor.sEGH", ++ [21] = "fcmp.sor.sEGH", ++ [24] = "fcmp.cune.sEGH", ++ [25] = "fcmp.sune.sEGH", ++} ++ ++local map_fcmp1 = { ++ shift = 15, mask = 31, ++ [0] = "fcmp.caf.dEGH", ++ [1] = "fcmp.saf.dEGH", ++ [2] = "fcmp.clt.dEGH", ++ [3] = "fcmp.slt.dEGH", ++ [4] = "fcmp.ceq.dEGH", ++ [5] = "fcmp.seq.dEGH", ++ [6] = "fcmp.cle.dEGH", ++ [7] = "fcmp.sle.dEGH", ++ [8] = "fcmp.cun.dEGH", ++ [9] = "fcmp.sun.dEGH", ++ [10] = "fcmp.cult.dEGH", ++ [11] = "fcmp.sult.dEGH", ++ [12] = "fcmp.cueq.dEGH", ++ [13] = "fcmp.sueq.dEGH", ++ [14] = "fcmp.cule.dEGH", ++ [15] = "fcmp.sule.dEGH", ++ [16] = "fcmp.cne.dEGH", ++ [17] = "fcmp.sne.dEGH", ++ [20] = "fcmp.cor.dEGH", ++ [21] = "fcmp.sor.dEGH", ++ [24] = "fcmp.cune.dEGH", ++ [25] = "fcmp.sune.dEGH", ++} ++ ++local map_fcmp = { ++ shift = 20, mask = 63, ++ [1] = { shift = 3, mask = 3, [0] = map_fcmp0, }, ++ [2] = { shift = 3, mask = 3, [0] = map_fcmp1, }, ++ [16] = { shift = 18, mask = 3, [0] = "fselFGHI", }, ++} ++ ++local map_fp = { ++ shift = 20, mask = 15, ++ [1] = "fmadd.sFGHi", ++ [2] = "fmadd.dFGHi", ++ [4] = "fmsub.sFGHi", ++ [5] = "fmsub.dFGHi", ++ [10] = "fnmadd.dFGHi", ++ [14] = "fnmsub.dFGHi", ++} ++ ++local map_init = { ++ shift = 26, mask = 63, ++ [0] = map_26_0, ++ [2] = map_fp, ++ [3] = map_fcmp, ++ [4] = "addu16i.dDJY", ++ [5] = map_long_i_5, ++ [6] = map_long_i_6, ++ [7] = map_long_i_7, ++ [8] = map_ldst1_8, ++ [9] = map_ldst1_9, ++ [10] = map_ldst1_10, ++ [14] = map_ldst0_14, ++ [16] = "beqzJL", ++ [17] = "bnezJL", ++ [18] = { shift = 8, mask = 3, [0] = "bceqzAL", "bcnezAL", }, ++ [19] = "jirlDJa", ++ [20] = "bP", ++ [21] = "blP", ++ [22] = "beqJDO", ++ [23] = "bneJDO", ++ [24] = "bltJDO", ++ [25] = "bgeJDO", ++ [26] = "bltuJDO", ++ [27] = "bgeuJDO", ++} ++ ++------------------------------------------------------------------------------ ++ ++local map_gpr = { ++ [0] = "r0", "ra", "r2", "sp", "r4", "r5", "r6", "r7", ++ "r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15", ++ "r16", "r17", "r18", "r19", "r20", "r21", "r22", "r23", ++ "r24", "r25", "r26", "r27", "r28", "r29", "r30", "r31", ++} ++ ++------------------------------------------------------------------------------ ++ ++-- Output a nicely formatted line with an opcode and operands. ++local function putop(ctx, text, operands) ++ local pos = ctx.pos ++ local extra = "" ++ if ctx.rel then ++ local sym = ctx.symtab[ctx.rel] ++ if sym then extra = "\t->"..sym end ++ end ++ if ctx.hexdump > 0 then ++ ctx.out(format("%08x %s %-7s %s%s\n", ++ ctx.addr+pos, tohex(ctx.op), text, concat(operands, ", "), extra)) ++ else ++ ctx.out(format("%08x %-7s %s%s\n", ++ ctx.addr+pos, text, concat(operands, ", "), extra)) ++ end ++ ctx.pos = pos + 4 ++end ++ ++-- Fallback for unknown opcodes. ++local function unknown(ctx) ++ return putop(ctx, ".long", { "0x"..tohex(ctx.op) }) ++end ++ ++local function get_le(ctx) ++ local pos = ctx.pos ++ local b0, b1, b2, b3 = byte(ctx.code, pos+1, pos+4) ++ return bor(lshift(b3, 24), lshift(b2, 16), lshift(b1, 8), b0) ++end ++ ++-- Decode imm. ++local function decode_si_imm(imm, bits, scale, signed, mask) ++ local n = tonumber(imm) ++ if n then ++ local m = arshift(n, scale) ++ if lshift(m, scale) == n then ++ if signed then ++ local s = arshift(band(m, mask), bits-1) ++ if s == 0 then ++ return m ++ elseif s == 1 then ++ return -(band(bnot(m), mask)+1) ++ end ++ else ++ if arshift(m, bits) == 0 then ++ return m ++ end ++ end ++ end ++ end ++end ++ ++-- Disassemble a single instruction. ++local function disass_ins(ctx) ++ local op = ctx:get() ++ local operands = {} ++ local last = nil ++ ctx.op = op ++ ctx.rel = nil ++ ++ local opat = ctx.map_pri[rshift(op, 26)] ++ while type(opat) ~= "string" do ++ if not opat then return unknown(ctx) end ++ opat = opat[band(rshift(op, opat.shift), opat.mask)] ++ end ++ local name, pat = match(opat, "^([a-z0-9_.]*)(.*)") ++ local altname, pat2 = match(pat, "|([a-z0-9_.|]*)(.*)") ++ if altname then pat = pat2 end ++ ++ for p in gmatch(pat, ".") do ++ local x = nil ++ if p == "D" then ++ x = map_gpr[band(rshift(op, 0), 31)] ++ elseif p == "J" then ++ x = map_gpr[band(rshift(op, 5), 31)] ++ elseif p == "K" then ++ x = map_gpr[band(rshift(op, 10), 31)] ++ elseif p == "F" then ++ x = "f"..band(rshift(op, 0), 31) ++ elseif p == "G" then ++ x = "f"..band(rshift(op, 5), 31) ++ elseif p == "H" then ++ x = "f"..band(rshift(op, 10), 31) ++ elseif p == "i" then ++ x = "f"..band(rshift(op, 15), 31) ++ elseif p == "S" then ++ x = "fcsr"..band(rshift(op, 0), 31) ++ elseif p == "R" then ++ x = "fcsr"..band(rshift(op, 5), 31) ++ elseif p == "E" then ++ x = "fcc"..band(rshift(op, 0), 7) ++ elseif p == "A" then ++ x = "fcc"..band(rshift(op, 5), 7) ++ elseif p == "I" then ++ x = "fcc"..band(rshift(op, 15), 7) ++ elseif p == "Q" then -- sa2 ++ x = band(rshift(op, 15), 3) ++ ctx.rel = x ++ x = format("%d", x) ++ elseif p == "B" then -- sa3 ++ x = band(rshift(op, 15), 7) ++ ctx.rel = x ++ x = format("%d", x) ++ elseif p == "M" then -- msbw ++ x = band(rshift(op, 16), 31) ++ ctx.rel = x ++ x = format("%d(0x%x)", x, x) ++ elseif p == "N" then -- msbd ++ x = band(rshift(op, 16), 63) ++ ctx.rel = x ++ x = format("%d(0x%x)", x, x) ++ elseif p == "U" then -- ui5 ++ x = band(rshift(op, 10), 31) ++ ctx.rel = x ++ x = format("%d(0x%x)", x, x) ++ elseif p == "V" then -- ui6 ++ x = band(rshift(op, 10), 63) ++ ctx.rel = x ++ x = format("%d(0x%x)", x, x) ++ elseif p == "T" then -- ui12 ++ x = band(rshift(op, 10), 4095) ++ ctx.rel = x ++ x = format("%d(0x%x)", x, x) ++ elseif p == "W" then -- si14 ++ x = band(rshift(op, 10), 16383) ++ x = decode_si_imm(x, 14, 0, true, 0x3fff) ++ ctx.rel = x ++ x = format("%d(0x%04x)", x, band(x, 0x3fff)) ++ elseif p == "X" then -- si12 ++ x = band(rshift(op, 10), 4095) ++ x = decode_si_imm(x, 12, 0, true, 0xfff) ++ ctx.rel = x ++ x = format("%d(0x%03x)", x, band(x, 0xfff)) ++ elseif p == "o" then ++ local disp = band((rshift(op, 10)), 0xfff) ++ operands[#operands] = format("%s, %d", last, disp) ++ elseif p == "Y" then -- si16 ++ x = band(rshift(op, 10), 65535) ++ x = decode_si_imm(x, 16, 0, true, 0xffff) ++ ctx.rel = x ++ x = format("%d(0x%04x)", x, band(x, 0xffff)) ++ elseif p == "Z" then -- si20 ++ x = band(rshift(op, 10), 1048575) ++ x = decode_si_imm(x, 20, 0, true, 0xfffff) ++ ctx.rel = x ++ x = format("%d(0x%05x)", x, band(x, 0xfffff)) ++ elseif p == "C" then -- code ++ x = band(rshift(op, 0), 32767) ++ elseif p == "O" then -- offs[15:0] ++ x = band(rshift(op, 10), 65535) ++ x = decode_si_imm(x, 16, 0, true, 0xffff) ++ ctx.rel = x ++ x = format("%d(0x%04x)", x, band(x, 0xffff)) ++ elseif p == "L" then -- offs[15:0] + offs[20:16] ++ x = lshift(band(op, 31), 16) + band(rshift(op, 10), 65535) ++ x = decode_si_imm(x, 21, 0, true, 0x1fffff) ++ ctx.rel = x ++ x = format("%d(0x%06x)", x, band(x, 0x1fffff)) ++ elseif p == "P" then -- offs[15:0] + offs[25:16] ++ x = lshift(band(op, 1023), 16) + band(rshift(op, 10), 65535) ++ x = decode_si_imm(x, 26, 0, true, 0x3ffffff) ++ ctx.rel = x ++ x = format("%d(0x%07x)", x, band(x, 0x3ffffff)) ++ elseif p == "a" then ++ x = band(rshift(op, 10), 65535) ++ x = decode_si_imm(x, 16, 0, true, 0xffff) ++ ctx.rel = x ++ x = format("%d(0x%04x)", x, band(x, 0xffff)) ++ else ++ assert(false) ++ end ++ if x then operands[#operands+1] = x; last = x end ++ end ++ ++ return putop(ctx, name, operands) ++end ++ ++------------------------------------------------------------------------------ ++ ++-- Disassemble a block of code. ++local function disass_block(ctx, ofs, len) ++ if not ofs then ofs = 0 end ++ local stop = len and ofs+len or #ctx.code ++ stop = stop - stop % 4 ++ ctx.pos = ofs - ofs % 4 ++ ctx.rel = nil ++ while ctx.pos < stop do disass_ins(ctx) end ++end ++ ++-- Extended API: create a disassembler context. Then call ctx:disass(ofs, len). ++local function create(code, addr, out) ++ local ctx = {} ++ ctx.code = code ++ ctx.addr = addr or 0 ++ ctx.out = out or io.write ++ ctx.symtab = {} ++ ctx.disass = disass_block ++ ctx.hexdump = 8 ++ ctx.get = get_le ++ ctx.map_pri = map_init ++ return ctx ++end ++ ++-- Simple API: disassemble code (a string) at address and output via out. ++local function disass(code, addr, out) ++ create(code, addr, out):disass() ++end ++ ++-- Return register name for RID. ++local function regname(r) ++ if r < 32 then return map_gpr[r] end ++ return "f"..(r-32) ++end ++ ++-- Public module functions. ++return { ++ create = create, ++ disass = disass, ++ regname = regname ++} ++ +diff --git a/src/lib_jit.c b/src/lib_jit.c +index 296bb7698..7663805a4 100644 +--- a/src/lib_jit.c ++++ b/src/lib_jit.c +@@ -847,7 +847,8 @@ static uint32_t jit_cpudetect(void) + #endif + #elif LJ_TARGET_S390X + /* No optional CPU features to detect (for now). */ +- ++#elif LJ_TARGET_LOONGARCH64 ++ /* No optional CPU features to detect (for now). */ + #elif LJ_TARGET_RISCV64 + #if LJ_HASJIT + +diff --git a/src/lj_arch.h b/src/lj_arch.h +index b85d29e16..c82f7464a 100644 +--- a/src/lj_arch.h ++++ b/src/lj_arch.h +@@ -35,6 +35,8 @@ + #define LUAJIT_ARCH_s390x 8 + #define LUAJIT_ARCH_RISCV64 9 + #define LUAJIT_ARCH_riscv64 9 ++#define LUAJIT_ARCH_LOONGARCH64 10 ++#define LUAJIT_ARCH_loongarch64 10 + + /* Target OS. */ + #define LUAJIT_OS_OTHER 0 +@@ -73,6 +75,8 @@ + #define LUAJIT_TARGET LUAJIT_ARCH_MIPS32 + #elif (defined(__riscv) || defined(__riscv__)) && __riscv_xlen == 64 + #define LUAJIT_TARGET LUAJIT_ARCH_RISCV64 ++#elif defined(__loongarch64) ++#define LUAJIT_TARGET LUAJIT_ARCH_LOONGARCH64 + #else + #error "Architecture not supported (in this version), see: https://luajit.org/status.html#architectures" + #endif +@@ -489,6 +493,20 @@ + #define LJ_TARGET_MASKROT 1 + #define LJ_ARCH_NUMMODE LJ_NUMMODE_DUAL + ++#elif LUAJIT_TARGET == LUAJIT_ARCH_LOONGARCH64 ++#define LJ_ARCH_NAME "loongarch64" ++#define LJ_ARCH_BITS 64 ++#define LJ_ARCH_ENDIAN LUAJIT_LE ++#define LJ_TARGET_LOONGARCH64 1 ++#define LJ_TARGET_GC64 1 ++#define LJ_TARGET_EHRETREG 4 ++#define LJ_TARGET_EHRAREG 1 ++#define LJ_TARGET_JUMPRANGE 27 /* +-2^27 = +-128MB */ ++#define LJ_TARGET_MASKSHIFT 1 ++#define LJ_TARGET_MASKROT 1 ++#define LJ_TARGET_UNIFYROT 2 /* Want only IR_BROR. */ ++#define LJ_ARCH_NUMMODE LJ_NUMMODE_DUAL ++ + #else + #error "No target architecture defined" + #endif +@@ -519,6 +537,16 @@ + #error "Need at least GCC 4.8 or newer" + #endif + #endif ++#elif LJ_TARGET_LOONGARCH64 ++#if __clang__ ++#if ((__clang_major__ < 8) || ((__clang_major__ == 8) && __clang_minor__ < 0)) && !defined(__NX_TOOLCHAIN_MAJOR__) ++#error "Need at least Clang 8.0 or newer" ++#endif ++#else ++#if (__GNUC__ < 8) || ((__GNUC__ == 8) && __GNUC_MINOR__ < 3) ++#error "Need at least GCC 8.3 or newer" ++#endif ++#endif + #elif !LJ_TARGET_PS3 + #if __clang__ + #if ((__clang_major__ < 3) || ((__clang_major__ == 3) && __clang_minor__ < 5)) +@@ -576,6 +604,10 @@ + #if !defined(__riscv_float_abi_double) + #error "Only RISC-V 64 double float supported for now" + #endif ++#elif LJ_TARGET_LOONGARCH64 ++#if !(defined(_ABILP64) && _LOONGARCH_SIM == _ABILP64) ++#error "Only LOONGARCH lp64d ABI is supported" ++#endif + #endif + #endif + +diff --git a/src/lj_asm.c b/src/lj_asm.c +index d0b679dc4..114df7c4b 100644 +--- a/src/lj_asm.c ++++ b/src/lj_asm.c +@@ -229,6 +229,8 @@ static Reg rset_pickrandom(ASMState *as, RegSet rs) + #include "lj_emit_mips.h" + #elif LJ_TARGET_RISCV64 + #include "lj_emit_riscv.h" ++#elif LJ_TARGET_LOONGARCH64 ++#include "lj_emit_loongarch64.h" + #else + #error "Missing instruction emitter for target CPU" + #endif +@@ -1714,6 +1716,8 @@ static void asm_loop(ASMState *as) + #include "lj_asm_s390x.h" + #elif LJ_TARGET_RISCV64 + #include "lj_asm_riscv64.h" ++#elif LJ_TARGET_LOONGARCH64 ++#include "lj_asm_loongarch64.h" + #else + #error "Missing assembler for target CPU" + #endif +diff --git a/src/lj_asm_loongarch64.h b/src/lj_asm_loongarch64.h +new file mode 100644 +index 000000000..3ab9e4f30 +--- /dev/null ++++ b/src/lj_asm_loongarch64.h +@@ -0,0 +1,1990 @@ ++/* ++** LoongArch IR assembler (SSA IR -> machine code). ++** Copyright (C) 2005-2022 Mike Pall. See Copyright Notice in luajit.h ++*/ ++ ++/* -- Register allocator extensions --------------------------------------- */ ++ ++/* Allocate a register with a hint. */ ++static Reg ra_hintalloc(ASMState *as, IRRef ref, Reg hint, RegSet allow) ++{ ++ Reg r = IR(ref)->r; ++ if (ra_noreg(r)) { ++ if (!ra_hashint(r) && !iscrossref(as, ref)) ++ ra_sethint(IR(ref)->r, hint); /* Propagate register hint. */ ++ r = ra_allocref(as, ref, allow); ++ } ++ ra_noweak(as, r); ++ return r; ++} ++ ++/* Allocate two source registers for three-operand instructions. */ ++static Reg ra_alloc2(ASMState *as, IRIns *ir, RegSet allow) ++{ ++ IRIns *irl = IR(ir->op1), *irr = IR(ir->op2); ++ Reg left = irl->r, right = irr->r; ++ if (ra_hasreg(left)) { ++ ra_noweak(as, left); ++ if (ra_noreg(right)) ++ right = ra_allocref(as, ir->op2, rset_exclude(allow, left)); ++ else ++ ra_noweak(as, right); ++ } else if (ra_hasreg(right)) { ++ ra_noweak(as, right); ++ left = ra_allocref(as, ir->op1, rset_exclude(allow, right)); ++ } else if (ra_hashint(right)) { ++ right = ra_allocref(as, ir->op2, allow); ++ left = ra_alloc1(as, ir->op1, rset_exclude(allow, right)); ++ } else { ++ left = ra_allocref(as, ir->op1, allow); ++ right = ra_alloc1(as, ir->op2, rset_exclude(allow, left)); ++ } ++ return left | (right << 8); ++} ++ ++/* -- Guard handling ------------------------------------------------------ */ ++ ++/* Setup exit stub after the end of each trace. */ ++static void asm_exitstub_setup(ASMState *as) ++{ ++ MCode *mxp = as->mctop; ++ if (as->mcp == mxp) ++ --as->mcp; ++ /* st.w TMP, sp, 0; li TMP, traceno; jirl ->vm_exit_handler;*/ ++ *--mxp = LOONGI_JIRL | RID_R0 | LOONGF_J(RID_R20) | 0<<10; ++ emit_dj32i(as, RID_TMP, RID_ZERO, as->T->traceno); ++ *--mxp = *as->mcp; ++ *--mxp = LOONGI_LU52I_D | RID_R20 | LOONGF_J(RID_R20) ++ | LOONGF_I((((uintptr_t)(void *)lj_vm_exit_handler)>>52)&0xfff); ++ *--mxp = LOONGI_LU32I_D | RID_R20 ++ | LOONGF_I20((((uintptr_t)(void *)lj_vm_exit_handler)>>32)&0xfffff); ++ *--mxp = LOONGI_ORI | RID_R20 | LOONGF_J(RID_R20) ++ | LOONGF_I(((uintptr_t)(void *)lj_vm_exit_handler)&0xfff); ++ *--mxp = LOONGI_LU12I_W | RID_R20 ++ | LOONGF_I20((((uintptr_t)(void *)lj_vm_exit_handler)&0xfffff000)>>12); ++ *--mxp = LOONGI_ST_W | LOONGF_D(RID_TMP) | LOONGF_J(RID_SP); ++ as->mctop = mxp; ++} ++ ++/* Keep this in-sync with exitstub_trace_addr(). */ ++#define asm_exitstub_addr(as) ((as)->mctop) ++ ++/* Emit conditional branch to exit for guard. */ ++static void asm_guard(ASMState *as, LOONGIns loongi, Reg rj, Reg rd) ++{ ++ MCode *target = asm_exitstub_addr(as); ++ MCode *p = as->mcp; ++ if (LJ_UNLIKELY(p == as->invmcp)) { ++ as->invmcp = NULL; ++ as->loopinv = 1; ++ as->mcp = p; ++ loongi = loongi ^ ((loongi>>28) == 4 ? 0x00000100u : 0x04000000u); /* Invert cond. BEQ BNE BGE BLZ*/ ++ target = p - 1; /* Patch target later in asm_loop_fixup. */ ++ } ++ emit_branch(as, loongi, rj, rd, target); ++ emit_dji(as, LOONGI_ADDI_D, RID_TMP, RID_ZERO, as->snapno); ++} ++ ++static void asm_guard21(ASMState *as, LOONGIns loongi, Reg rj) ++{ ++ MCode *target = asm_exitstub_addr(as); ++ MCode *p = as->mcp; ++ if (LJ_UNLIKELY(p == as->invmcp)) { ++ as->invmcp = NULL; ++ as->loopinv = 1; ++ as->mcp = p; ++ loongi = loongi ^ ((loongi>>28) == 4 ? 0x00000100u : 0x04000000u); /* Invert cond. BCEQZ BCNEZ*/ ++ target = p - 1; /* Patch target later in asm_loop_fixup. */ ++ } ++ emit_branch21(as, loongi, rj, target); ++ emit_dji(as, LOONGI_ADDI_D, RID_TMP, RID_ZERO, as->snapno); ++} ++ ++/* -- Operand fusion ------------------------------------------------------ */ ++ ++/* Limit linear search to this distance. Avoids O(n^2) behavior. */ ++#define CONFLICT_SEARCH_LIM 31 ++ ++/* Check if there's no conflicting instruction between curins and ref. */ ++static int noconflict(ASMState *as, IRRef ref, IROp conflict) ++{ ++ IRIns *ir = as->ir; ++ IRRef i = as->curins; ++ if (i > ref + CONFLICT_SEARCH_LIM) ++ return 0; /* Give up, ref is too far away. */ ++ while (--i > ref) ++ if (ir[i].o == conflict) ++ return 0; /* Conflict found. */ ++ return 1; /* Ok, no conflict. */ ++} ++ ++/* Fuse the array base of colocated arrays. */ ++static int32_t asm_fuseabase(ASMState *as, IRRef ref) ++{ ++ IRIns *ir = IR(ref); ++ if (ir->o == IR_TNEW && ir->op1 <= LJ_MAX_COLOSIZE && ++ !neverfuse(as) && noconflict(as, ref, IR_NEWREF)) ++ return (int32_t)sizeof(GCtab); ++ return 0; ++} ++ ++/* Fuse array/hash/upvalue reference into register+offset operand. */ ++static Reg asm_fuseahuref(ASMState *as, IRRef ref, int32_t *ofsp, RegSet allow) ++{ ++ IRIns *ir = IR(ref); ++ if (ra_noreg(ir->r)) { ++ if (ir->o == IR_AREF) { ++ if (mayfuse(as, ref)) { ++ if (irref_isk(ir->op2)) { ++ IRRef tab = IR(ir->op1)->op1; ++ int32_t ofs = asm_fuseabase(as, tab); ++ IRRef refa = ofs ? tab : ir->op1; ++ ofs += 8*IR(ir->op2)->i; ++ if (checki16(ofs)) { ++ *ofsp = ofs; ++ return ra_alloc1(as, refa, allow); ++ } ++ } ++ } ++ } else if (ir->o == IR_HREFK) { ++ if (mayfuse(as, ref)) { ++ int32_t ofs = (int32_t)(IR(ir->op2)->op2 * sizeof(Node)); ++ if (checki16(ofs)) { ++ *ofsp = ofs; ++ return ra_alloc1(as, ir->op1, allow); ++ } ++ } ++ } else if (ir->o == IR_UREFC) { ++ if (irref_isk(ir->op1)) { ++ GCfunc *fn = ir_kfunc(IR(ir->op1)); ++ intptr_t ofs = (intptr_t)&gcref(fn->l.uvptr[(ir->op2 >> 8)])->uv.tv; ++ intptr_t jgl = (intptr_t)J2G(as->J); ++ if ((uintptr_t)(ofs-jgl) < 65536) { ++ *ofsp = ofs-jgl-32768; ++ return RID_JGL; ++ } else { ++ *ofsp = (int16_t)ofs; ++ return ra_allock(as, ofs-(int16_t)ofs, allow); ++ } ++ } ++ } else if (ir->o == IR_TMPREF) { ++ *ofsp = (int32_t)(offsetof(global_State, tmptv)-32768); ++ return RID_JGL; ++ } ++ } ++ *ofsp = 0; ++ return ra_alloc1(as, ref, allow); ++} ++ ++/* Fuse XLOAD/XSTORE reference into load/store operand. */ ++static void asm_fusexref(ASMState *as, LOONGIns loongi, Reg rd, IRRef ref, ++ RegSet allow, int32_t ofs) ++{ ++ IRIns *ir = IR(ref); ++ Reg base; ++ if (ra_noreg(ir->r) && canfuse(as, ir)) { ++ intptr_t ofs2; ++ if (ir->o == IR_ADD) { ++ if (irref_isk(ir->op2) && (ofs2 = ofs + get_kval(as, ir->op2), ++ checki12(ofs2))) { ++ ref = ir->op1; ++ ofs = (int32_t)ofs2; ++ } ++ } else if (ir->o == IR_STRREF) { ++ ofs2 = 4096; ++ lj_assertA(ofs == 0, "bad usage"); ++ ofs = (int32_t)sizeof(GCstr); ++ if (irref_isk(ir->op2)) { ++ ofs2 = ofs + get_kval(as, ir->op2); ++ ref = ir->op1; ++ } else if (irref_isk(ir->op1)) { ++ ofs2 = ofs + get_kval(as, ir->op1); ++ ref = ir->op2; ++ } ++ if (!checki12(ofs2)) { ++ /* NYI: Fuse ADD with constant. */ ++ Reg right, left = ra_alloc2(as, ir, allow); ++ right = (left >> 8); left &= 255; ++ emit_dji(as, loongi, rd, RID_TMP, ofs&0xfff); ++ emit_djk(as, LOONGI_ADD_D, RID_TMP, left, right); ++ return; ++ } ++ ofs = ofs2; ++ } ++ } ++ base = ra_alloc1(as, ref, allow); ++ emit_dji(as, loongi, rd, base, ofs&0xfff); ++} ++ ++/* Fuse FP multiply-add/sub. */ ++ ++static int asm_fusemadd(ASMState *as, IRIns *ir, LOONGIns loongi, LOONGIns loongir) ++{ ++ IRRef lref = ir->op1, rref = ir->op2; ++ IRIns *irm; ++ if (lref != rref && ++ ((mayfuse(as, lref) && (irm = IR(lref), irm->o == IR_MUL) && ++ ra_noreg(irm->r)) || ++ (mayfuse(as, rref) && (irm = IR(rref), irm->o == IR_MUL) && ++ (rref = lref, loongi = loongir, ra_noreg(irm->r))))) { ++ Reg dest = ra_dest(as, ir, RSET_FPR); ++ Reg add = ra_hintalloc(as, rref, dest, RSET_FPR); ++ Reg left = ra_alloc2(as, irm, rset_exclude(rset_exclude(RSET_FPR, dest), add)); ++ Reg right = (left >> 8); left &= 255; ++ emit_djka(as, loongi, (dest & 0x1f), (left & 0x1f), (right & 0x1f), (add & 0x1f)); ++ return 1; ++ } ++ return 0; ++} ++/* -- Calls --------------------------------------------------------------- */ ++ ++/* Generate a call to a C function. */ ++static void asm_gencall(ASMState *as, const CCallInfo *ci, IRRef *args) ++{ ++ uint32_t n, nargs = CCI_XNARGS(ci); ++ int32_t ofs = 0; ++ Reg gpr, fpr = REGARG_FIRSTFPR; ++ if ((void *)ci->func) ++ emit_call(as, (void *)ci->func); ++ for (gpr = REGARG_FIRSTGPR; gpr <= REGARG_LASTGPR; gpr++) ++ as->cost[gpr] = REGCOST(~0u, ASMREF_L); ++ gpr = REGARG_FIRSTGPR; ++ for (n = 0; n < nargs; n++) { /* Setup args. */ ++ IRRef ref = args[n]; ++ if (ref) { ++ IRIns *ir = IR(ref); ++ if (irt_isfp(ir->t) && (n == 0 || !(ci->flags & CCI_VARARG))) { ++ if (fpr <= REGARG_LASTFPR) { ++ lj_assertA(rset_test(as->freeset, fpr), ++ "reg %d not free", fpr); /* Must have been evicted. */ ++ ra_leftov(as, fpr, ref); ++ fpr++; ++ } else if (gpr <= REGARG_LASTGPR) { ++ lj_assertA(rset_test(as->freeset, gpr), ++ "reg %d not free", gpr); /* Must have been evicted. */ ++ ra_leftov(as, gpr, ref); ++ gpr++; ++ } else { ++ Reg r = ra_alloc1(as, ref, RSET_FPR); ++ emit_spstore(as, ir, r, ofs); ++ ofs += 8; ++ } ++ } else { ++ if (gpr <= REGARG_LASTGPR) { ++ lj_assertA(rset_test(as->freeset, gpr), ++ "reg %d not free", gpr); /* Must have been evicted. */ ++ ra_leftov(as, gpr, ref); ++ gpr++; ++ } else { ++ Reg r = ra_alloc1(as, ref, RSET_GPR); ++ emit_spstore(as, ir, r, ofs); ++ ofs += 8; ++ } ++ } ++ } ++ } ++} ++ ++/* Setup result reg/sp for call. Evict scratch regs. */ ++static void asm_setupresult(ASMState *as, IRIns *ir, const CCallInfo *ci) ++{ ++ RegSet drop = RSET_SCRATCH; ++ int hiop = ((ir+1)->o == IR_HIOP && !irt_isnil((ir+1)->t)); ++ if (ra_hasreg(ir->r)) ++ rset_clear(drop, ir->r); /* Dest reg handled below. */ ++ if (hiop && ra_hasreg((ir+1)->r)) ++ rset_clear(drop, (ir+1)->r); /* Dest reg handled below. */ ++ ra_evictset(as, drop); /* Evictions must be performed first. */ ++ if (ra_used(ir)) { ++ lj_assertA(!irt_ispri(ir->t), "PRI dest"); ++ if (irt_isfp(ir->t)) { ++ if ((ci->flags & CCI_CASTU64)) { ++ Reg dest = ra_dest(as, ir, RSET_FPR); ++ emit_dj(as, irt_isnum(ir->t) ? LOONGI_MOVGR2FR_D : LOONGI_MOVGR2FR_W, ++ dest, RID_RET); ++ } else { ++ ra_destreg(as, ir, RID_FPRET); ++ } ++ } else if (hiop) { ++ ra_destpair(as, ir); ++ } else { ++ ra_destreg(as, ir, RID_RET); ++ } ++ } ++} ++ ++static void asm_callx(ASMState *as, IRIns *ir) ++{ ++ IRRef args[CCI_NARGS_MAX*2]; ++ CCallInfo ci; ++ IRRef func; ++ IRIns *irf; ++ ci.flags = asm_callx_flags(as, ir); ++ asm_collectargs(as, ir, &ci, args); ++ asm_setupresult(as, ir, &ci); ++ func = ir->op2; irf = IR(func); ++ if (irf->o == IR_CARG) { func = irf->op1; irf = IR(func); } ++ if (irref_isk(func)) { /* Call to constant address. */ ++ ci.func = (ASMFunction)(void *)get_kval(as, func); ++ } else { /* Need specific register for indirect calls. */ ++ Reg freg = ra_alloc1(as, func, RSET_RANGE(RID_R12, RID_MAX_GPR)-RSET_FIXED); ++ *--as->mcp = LOONGI_JIRL | LOONGF_D(RID_RA) | LOONGF_J(freg); ++ ci.func = (ASMFunction)(void *)0; ++ } ++ asm_gencall(as, &ci, args); ++} ++ ++static void asm_callround(ASMState *as, IRIns *ir, IRCallID id) ++{ ++ /* The modified regs must match with the *.dasc implementation. */ ++ RegSet drop = RID2RSET(RID_R12)|RID2RSET(RID_R13)|RID2RSET(RID_F0)| ++ RID2RSET(RID_F4)|RID2RSET(RID_F9)|RID2RSET(RID_F22) ++ |RID2RSET(RID_F23); ++ if (ra_hasreg(ir->r)) rset_clear(drop, ir->r); ++ ra_evictset(as, drop); ++ ra_destreg(as, ir, RID_FPRET); ++ emit_call(as, (void *)lj_ir_callinfo[id].func); ++ ra_leftov(as, REGARG_FIRSTFPR, ir->op1); ++} ++ ++/* -- Returns ------------------------------------------------------------- */ ++ ++/* Return to lower frame. Guard that it goes to the right spot. */ ++static void asm_retf(ASMState *as, IRIns *ir) ++{ ++ Reg base = ra_alloc1(as, REF_BASE, RSET_GPR); ++ void *pc = ir_kptr(IR(ir->op2)); ++ int32_t delta = 1+LJ_FR2+bc_a(*((const BCIns *)pc - 1)); ++ as->topslot -= (BCReg)delta; ++ if ((int32_t)as->topslot < 0) as->topslot = 0; ++ irt_setmark(IR(REF_BASE)->t); /* Children must not coalesce with BASE reg. */ ++ emit_setgl(as, base, jit_base); ++ emit_addptr(as, base, -8*delta); ++ Reg tmp = ra_scratch(as, rset_exclude(RSET_GPR, base)); ++ asm_guard(as, LOONGI_BNE, tmp, ++ ra_allock(as, igcptr(pc), rset_exclude(rset_exclude(RSET_GPR, base), tmp))); ++ emit_dji(as, LOONGI_LD_D, tmp, base, -8&0xfff); ++} ++ ++/* -- Buffer operations --------------------------------------------------- */ ++ ++#if LJ_HASBUFFER ++static void asm_bufhdr_write(ASMState *as, Reg sb) ++{ ++ Reg tmp = ra_scratch(as, rset_exclude(RSET_GPR, sb)); ++ IRIns irgc; ++ irgc.ot = IRT(0, IRT_PGC); /* GC type. */ ++ emit_storeofs(as, &irgc, RID_TMP, sb, offsetof(SBuf, L)); ++ emit_djml(as, LOONGI_BSTRINS_D, RID_TMP, tmp, ++ lj_fls(SBUF_MASK_FLAG), 0); ++ emit_getgl(as, RID_TMP, cur_L); ++ emit_loadofs(as, &irgc, tmp, sb, offsetof(SBuf, L)); ++} ++#endif ++ ++/* -- Type conversions ---------------------------------------------------- */ ++ ++static void asm_tointg(ASMState *as, IRIns *ir, Reg left) ++{ ++ Reg tmp = ra_scratch(as, rset_exclude(RSET_FPR, left)); ++ Reg dest = ra_dest(as, ir, RSET_GPR); ++ asm_guard21(as, LOONGI_BCEQZ, 0); ++ emit_djk(as, LOONGI_FCMP_CEQ_D, 0, tmp, left); ++ emit_dj(as, LOONGI_FFINT_D_W, tmp, tmp); ++ emit_dj(as, LOONGI_MOVFR2GR_S, dest, tmp); ++ emit_dj(as, LOONGI_FTINT_W_D, tmp, left); ++} ++ ++static void asm_tobit(ASMState *as, IRIns *ir) ++{ ++ RegSet allow = RSET_FPR; ++ Reg dest = ra_dest(as, ir, RSET_GPR); ++ Reg left = ra_alloc1(as, ir->op1, allow); ++ Reg right = ra_alloc1(as, ir->op2, rset_clear(allow, left)); ++ Reg tmp = ra_scratch(as, rset_clear(allow, right)); ++ emit_dj(as, LOONGI_MOVFR2GR_S, dest, tmp); ++ emit_djk(as, LOONGI_FADD_D, tmp, left, right); ++} ++ ++static void asm_conv(ASMState *as, IRIns *ir) ++{ ++ IRType st = (IRType)(ir->op2 & IRCONV_SRCMASK); // source type ++ int stfp = (st == IRT_NUM || st == IRT_FLOAT); ++ int st64 = (st == IRT_I64 || st == IRT_U64 || st == IRT_P64); ++ IRRef lref = ir->op1; ++ lj_assertA(irt_type(ir->t) != st, "inconsistent types for CONV"); ++ /* Use GPR to pass floating-point arguments */ ++ if (irt_isfp(ir->t) && ir->r >= RID_R4 && ir->r <= RID_R11) { ++ Reg dest = ra_dest(as, ir, RSET_GPR); ++ Reg ftmp = ra_scratch(as, RSET_FPR); ++ if (stfp) { /* FP to FP conversion. */ ++ emit_dj(as, st == IRT_NUM ? LOONGI_MOVFR2GR_S : LOONGI_MOVFR2GR_D, dest, ftmp); ++ emit_dj(as, st == IRT_NUM ? LOONGI_FCVT_S_D : LOONGI_FCVT_D_S, ++ ftmp, ra_alloc1(as, lref, RSET_FPR)); ++ } else if (st == IRT_U32) { /* U32 to FP conversion. */ ++ /* y = (x ^ 0x80000000) + 2147483648.0 */ ++ Reg left = ra_alloc1(as, lref, RSET_GPR); ++ Reg tmp = ra_scratch(as, rset_exclude(RSET_FPR, ftmp)); ++ if (irt_isfloat(ir->t)) { ++ emit_dj(as, LOONGI_MOVFR2GR_S, dest, ftmp); ++ emit_dj(as, LOONGI_FCVT_S_D, ftmp, ftmp); ++ } else { ++ emit_dj(as, LOONGI_MOVFR2GR_D, dest, ftmp); ++ } ++ /* Must perform arithmetic with doubles to keep the precision. */ ++ emit_djk(as, LOONGI_FADD_D, ftmp, ftmp, tmp); ++ emit_dj(as, LOONGI_FFINT_D_W, ftmp, ftmp); ++ emit_lsptr(as, LOONGI_FLD_D, (tmp & 0x1f), ++ (void *)&as->J->k64[LJ_K64_2P31], RSET_GPR); ++ emit_dj(as, LOONGI_MOVGR2FR_W, ftmp, RID_TMP); ++ emit_djk(as, LOONGI_XOR, RID_TMP, RID_TMP, left); ++ emit_dji(as, LOONGI_ADDU16I_D, RID_TMP, RID_R0, 0x8000); ++ } else if(st == IRT_U64) { /* U64 to FP conversion. */ ++ /* if (x >= 1u<<63) y = (double)(int64_t)(x&(1u<<63)-1) + pow(2.0, 63) */ ++ Reg left = ra_alloc1(as, lref, RSET_GPR); ++ Reg tmp = ra_scratch(as, rset_exclude(RSET_FPR, ftmp)); ++ MCLabel l_end = emit_label(as); ++ if (irt_isfloat(ir->t)) { ++ emit_dj(as, LOONGI_MOVFR2GR_S, dest, ftmp); ++ emit_djk(as, LOONGI_FADD_S, ftmp, ftmp, tmp); ++ emit_lsptr(as, LOONGI_FLD_S, (tmp & 0x1f), (void *)&as->J->k32[LJ_K32_2P63], ++ rset_exclude(RSET_GPR, left)); ++ emit_branch(as, LOONGI_BGE, left, RID_ZERO, l_end); ++ emit_dj(as, LOONGI_FFINT_S_L, ftmp, ftmp); ++ } else { ++ emit_dj(as, LOONGI_MOVFR2GR_D, dest, ftmp); ++ emit_djk(as, LOONGI_FADD_D, ftmp, ftmp, tmp); ++ emit_lsptr(as, LOONGI_FLD_D, (tmp & 0x1f), (void *)&as->J->k64[LJ_K64_2P63], ++ rset_exclude(RSET_GPR, left)); ++ emit_branch(as, LOONGI_BGE, left, RID_ZERO, l_end); ++ emit_dj(as, LOONGI_FFINT_D_L, ftmp, ftmp); ++ } ++ emit_dj(as, LOONGI_MOVGR2FR_D, ftmp, RID_TMP); ++ emit_djml(as, LOONGI_BSTRPICK_D, RID_TMP, left, 62, 0); ++ } else { /* Integer to FP conversion. */ ++ Reg left = ra_alloc1(as, lref, RSET_GPR); ++ LOONGIns loongi = irt_isfloat(ir->t) ? ++ (st64 ? LOONGI_FFINT_S_L : LOONGI_FFINT_S_W) : ++ (st64 ? LOONGI_FFINT_D_L : LOONGI_FFINT_D_W); ++ emit_dj(as, st64 ? LOONGI_MOVFR2GR_D : LOONGI_MOVFR2GR_S, dest, ftmp); ++ emit_dj(as, loongi, ftmp, ftmp); ++ emit_dj(as, st64 ? LOONGI_MOVGR2FR_D : LOONGI_MOVGR2FR_W, ftmp, left); ++ } ++ } else if (irt_isfp(ir->t)) { ++ Reg dest = ra_dest(as, ir, RSET_FPR); ++ if (stfp) { /* FP to FP conversion. */ ++ emit_dj(as, st == IRT_NUM ? LOONGI_FCVT_S_D : LOONGI_FCVT_D_S, ++ dest, ra_alloc1(as, lref, RSET_FPR)); ++ } else if (st == IRT_U32) { /* U32 to FP conversion. */ ++ /* y = (x ^ 0x80000000) + 2147483648.0 */ ++ Reg left = ra_alloc1(as, lref, RSET_GPR); ++ Reg tmp = ra_scratch(as, rset_exclude(RSET_FPR, dest)); ++ if (irt_isfloat(ir->t)) ++ emit_dj(as, LOONGI_FCVT_S_D, dest, dest); ++ /* Must perform arithmetic with doubles to keep the precision. */ ++ emit_djk(as, LOONGI_FADD_D, dest, dest, tmp); ++ emit_dj(as, LOONGI_FFINT_D_W, dest, dest); ++ emit_lsptr(as, LOONGI_FLD_D, (tmp & 0x1f), ++ (void *)&as->J->k64[LJ_K64_2P31], RSET_GPR); ++ emit_dj(as, LOONGI_MOVGR2FR_W, dest, RID_TMP); ++ emit_djk(as, LOONGI_XOR, RID_TMP, RID_TMP, left); ++ emit_dji(as, LOONGI_ADDU16I_D, RID_TMP, RID_R0, 0x8000); ++ } else if(st == IRT_U64) { /* U64 to FP conversion. */ ++ /* if (x >= 1u<<63) y = (double)(int64_t)(x&(1u<<63)-1) + pow(2.0, 63) */ ++ Reg left = ra_alloc1(as, lref, RSET_GPR); ++ Reg tmp = ra_scratch(as, rset_exclude(RSET_FPR, dest)); ++ MCLabel l_end = emit_label(as); ++ if (irt_isfloat(ir->t)) { ++ emit_djk(as, LOONGI_FADD_S, dest, dest, tmp); ++ emit_lsptr(as, LOONGI_FLD_S, (tmp & 0x1f), (void *)&as->J->k32[LJ_K32_2P63], ++ rset_exclude(RSET_GPR, left)); ++ emit_branch(as, LOONGI_BGE, left, RID_ZERO, l_end); ++ emit_dj(as, LOONGI_FFINT_S_L, dest, dest); ++ } else { ++ emit_djk(as, LOONGI_FADD_D, dest, dest, tmp); ++ emit_lsptr(as, LOONGI_FLD_D, (tmp & 0x1f), (void *)&as->J->k64[LJ_K64_2P63], ++ rset_exclude(RSET_GPR, left)); ++ emit_branch(as, LOONGI_BGE, left, RID_ZERO, l_end); ++ emit_dj(as, LOONGI_FFINT_D_L, dest, dest); ++ } ++ emit_dj(as, LOONGI_MOVGR2FR_D, dest, RID_TMP); ++ emit_djml(as, LOONGI_BSTRPICK_D, RID_TMP, left, 62, 0); ++ } else { /* Integer to FP conversion. */ ++ Reg left = ra_alloc1(as, lref, RSET_GPR); ++ LOONGIns loongi = irt_isfloat(ir->t) ? ++ (st64 ? LOONGI_FFINT_S_L : LOONGI_FFINT_S_W) : ++ (st64 ? LOONGI_FFINT_D_L : LOONGI_FFINT_D_W); ++ emit_dj(as, loongi, dest, dest); ++ emit_dj(as, st64 ? LOONGI_MOVGR2FR_D : LOONGI_MOVGR2FR_W, dest, left); ++ } ++ } else if (stfp) { /* FP to integer conversion. */ ++ if (irt_isguard(ir->t)) { ++ /* Checked conversions are only supported from number to int. */ ++ lj_assertA(irt_isint(ir->t) && st == IRT_NUM, ++ "bad type for checked CONV"); ++ asm_tointg(as, ir, ra_alloc1(as, lref, RSET_FPR)); ++ } else { ++ Reg dest = ra_dest(as, ir, RSET_GPR); ++ Reg left = ra_alloc1(as, lref, RSET_FPR); ++ Reg tmp = ra_scratch(as, rset_exclude(RSET_FPR, left)); ++ if (irt_isu32(ir->t)) { /* FP to U32 conversion. */ ++ /* y = (int)floor(x - 2147483648.0) ^ 0x80000000 */ ++ emit_djk(as, LOONGI_XOR, dest, dest, RID_TMP); ++ emit_dji(as, LOONGI_ADDU16I_D, RID_TMP, RID_R0, 0x8000); ++ emit_dj(as, LOONGI_MOVFR2GR_S, dest, tmp); ++ emit_dj(as, st == IRT_FLOAT ? LOONGI_FTINTRM_W_S : LOONGI_FTINTRM_W_D, ++ tmp, tmp); ++ emit_djk(as, st == IRT_FLOAT ? LOONGI_FSUB_S : LOONGI_FSUB_D, ++ tmp, left, tmp); ++ if (st == IRT_FLOAT) ++ emit_lsptr(as, LOONGI_FLD_S, (tmp & 0x1f), ++ (void *)&as->J->k32[LJ_K32_2P31], RSET_GPR); ++ else ++ emit_lsptr(as, LOONGI_FLD_D, (tmp & 0x1f), ++ (void *)&as->J->k64[LJ_K64_2P31], RSET_GPR); ++ } else if (irt_isu64(ir->t)) { /* FP to U64 conversion. */ ++ MCLabel l_end; ++ emit_dj(as, LOONGI_MOVFR2GR_D, dest, tmp); ++ l_end = emit_label(as); ++ /* For inputs >= 2^63 add -2^64 and convert again. */ ++ if (st == IRT_NUM) { ++ emit_dj(as, LOONGI_FTINTRZ_L_D, tmp, tmp); ++ emit_djk(as, LOONGI_FADD_D, tmp, left, tmp); ++ emit_lsptr(as, LOONGI_FLD_D, (tmp & 0x1f), ++ (void *)&as->J->k64[LJ_K64_M2P64], ++ rset_exclude(RSET_GPR, dest)); ++ emit_branch21(as, LOONGI_BCNEZ, 0, l_end); ++ emit_dj(as, LOONGI_FTINTRZ_L_D, tmp, left); ++ emit_djk(as, LOONGI_FCMP_CLT_D, 0, left, tmp); ++ emit_lsptr(as, LOONGI_FLD_D, (tmp & 0x1f), ++ (void *)&as->J->k64[LJ_K64_2P63], ++ rset_exclude(RSET_GPR, dest)); ++ } else { ++ emit_dj(as, LOONGI_FTINTRZ_L_S, tmp, tmp); ++ emit_djk(as, LOONGI_FADD_S, tmp, left, tmp); ++ emit_lsptr(as, LOONGI_FLD_S, (tmp & 0x1f), ++ (void *)&as->J->k32[LJ_K32_M2P64], ++ rset_exclude(RSET_GPR, dest)); ++ emit_branch21(as, LOONGI_BCNEZ, 0, l_end); ++ emit_dj(as, LOONGI_FTINTRZ_L_S, tmp, left); ++ emit_djk(as, LOONGI_FCMP_CLT_S, 0, left, tmp); ++ emit_lsptr(as, LOONGI_FLD_S, (tmp & 0x1f), ++ (void *)&as->J->k32[LJ_K32_2P63], ++ rset_exclude(RSET_GPR, dest)); ++ } ++ } else { ++ LOONGIns loongi = irt_is64(ir->t) ? ++ (st == IRT_NUM ? LOONGI_FTINTRZ_L_D : LOONGI_FTINTRZ_L_S) : ++ (st == IRT_NUM ? LOONGI_FTINTRZ_W_D : LOONGI_FTINTRZ_W_S); ++ emit_dj(as, irt_is64(ir->t) ? LOONGI_MOVFR2GR_D : LOONGI_MOVFR2GR_S, dest, left); ++ emit_dj(as, loongi, left, left); ++ } ++ } ++ } else { ++ Reg dest = ra_dest(as, ir, RSET_GPR); ++ if (st >= IRT_I8 && st <= IRT_U16) { /* Extend to 32 bit integer. */ ++ Reg left = ra_alloc1(as, ir->op1, RSET_GPR); ++ lj_assertA(irt_isint(ir->t) || irt_isu32(ir->t), "bad type for CONV EXT"); ++ if ((ir->op2 & IRCONV_SEXT)) { // sign-extend ++ emit_dj(as, st == IRT_I8 ? LOONGI_EXT_W_B : LOONGI_EXT_W_H, dest, left); ++ } else { // zero-extend ++ int msbd = st == IRT_U8 ? 7 : 15; ++ emit_djml(as, LOONGI_BSTRPICK_D, dest, left, msbd, 0); ++ } ++ } else { /* 32/64 bit integer conversions. */ ++ if (irt_is64(ir->t)) { ++ if (st64) { ++ /* 64/64 bit no-op (cast)*/ ++ ra_leftov(as, dest, lref); /* Do nothing, but may need to move regs. */ ++ } else { ++ Reg left = ra_alloc1(as, lref, RSET_GPR); ++ if ((ir->op2 & IRCONV_SEXT)) { /* 32 to 64 bit sign extension. */ ++ emit_dju(as, LOONGI_SLLI_W, dest, left, 0); ++ } else { /* 32 to 64 bit zero extension. */ ++ emit_djml(as, LOONGI_BSTRPICK_D, dest, left, 31, 0); ++ } ++ } ++ } else { ++ if (st64 && !(ir->op2 & IRCONV_NONE)) { ++ /* This is either a 32 bit reg/reg mov which zeroes the hiword ++ ** or a load of the loword from a 64 bit address. ++ */ ++ Reg left = ra_alloc1(as, lref, RSET_GPR); ++ emit_djml(as, LOONGI_BSTRPICK_D, dest, left, 31, 0); ++ } else { /* 32/32 bit no-op (cast). */ ++ ra_leftov(as, dest, lref); /* Do nothing, but may need to move regs. */ ++ } ++ } ++ } ++ } ++} ++ ++static void asm_strto(ASMState *as, IRIns *ir) ++{ ++ const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_strscan_num]; ++ IRRef args[2]; ++ int32_t ofs = SPOFS_TMP; ++ RegSet drop = RSET_SCRATCH; ++ if (ra_hasreg(ir->r)) rset_set(drop, ir->r); /* Spill dest reg (if any). */ ++ ra_evictset(as, drop); ++ if (ir->s) ofs = sps_scale(ir->s); ++ asm_guard(as, LOONGI_BEQ, RID_RET, RID_ZERO); /* Test return status. */ ++ args[0] = ir->op1; /* GCstr *str */ ++ args[1] = ASMREF_TMP1; /* TValue *n */ ++ asm_gencall(as, ci, args); ++ /* Store the result to the spill slot or temp slots. */ ++ Reg tmp = ra_releasetmp(as, ASMREF_TMP1); ++ emit_addk(as, tmp, RID_SP, ofs, RSET_GPR); ++} ++ ++/* -- Memory references --------------------------------------------------- */ ++ ++/* Store tagged value for ref at base+ofs. */ ++static void asm_tvstore64(ASMState *as, Reg base, int32_t ofs, IRRef ref) ++{ ++ RegSet allow = rset_exclude(RSET_GPR, base); ++ IRIns *ir = IR(ref); ++ lj_assertA(irt_ispri(ir->t) || irt_isaddr(ir->t) || irt_isinteger(ir->t), ++ "store of IR type %d", irt_type(ir->t)); ++ if (irref_isk(ref)) { ++ TValue k; ++ lj_ir_kvalue(as->J->L, &k, ir); ++ Reg ku64 = ra_allock(as, (int64_t)k.u64, allow); ++ rset_clear(allow, ku64); ++ if (checki12(ofs)) { ++ emit_dji(as, LOONGI_ST_D, ku64, base, ofs&0xfff); ++ } else { ++ emit_djk(as, LOONGI_STX_D, ku64, base, ra_allock(as, ofs, allow)); ++ } ++ } else { ++ Reg src = ra_alloc1(as, ref, allow); ++ rset_clear(allow, src); ++ Reg type = ra_allock(as, (int64_t)irt_toitype(ir->t) << 47, allow); ++ emit_dji(as, LOONGI_ST_D, RID_TMP, base, ofs&0xfff); ++ if (irt_isinteger(ir->t)) { ++ emit_djk(as, LOONGI_ADD_D, RID_TMP, RID_TMP, type); ++ emit_djml(as, LOONGI_BSTRPICK_D, RID_TMP, src, 31, 0); ++ } else { ++ emit_djk(as, LOONGI_ADD_D, RID_TMP, src, type); ++ } ++ } ++} ++ ++/* Get pointer to TValue. */ ++static void asm_tvptr(ASMState *as, Reg dest, IRRef ref, MSize mode) // todo-new ++{ ++ int32_t tmpofs = (int32_t)(offsetof(global_State, tmptv)-32768); ++ RegSet allow = RSET_GPR; ++ if ((mode & IRTMPREF_IN1)) { ++ IRIns *ir = IR(ref); ++ if (irt_isnum(ir->t)) { ++ if ((mode & IRTMPREF_OUT1)) { ++ Reg src = ra_alloc1(as, ref, RSET_FPR); ++ emit_addk(as, dest, RID_JGL, tmpofs, allow); ++ emit_lso(as, LOONGI_ST_D, src, RID_JGL, tmpofs, allow); ++ } else if (irref_isk(ref)) { ++ /* Use the number constant itself as a TValue. */ ++ ra_allockreg(as, igcptr(ir_knum(ir)), dest); ++ } else { ++ emit_dji(as, LOONGI_ADDI_D, dest, RID_SP, ra_spill(as, ir)&0xfff); ++ } ++ } else { ++ /* Otherwise use g->tmptv to hold the TValue. */ ++ asm_tvstore64(as, dest, 0, ref); ++ emit_addk(as, dest, RID_JGL, tmpofs, RSET_GPR); ++ } ++ } else { ++ emit_addk(as, dest, RID_JGL, tmpofs, RSET_GPR); ++ } ++} ++ ++static void asm_aref(ASMState *as, IRIns *ir) ++{ ++ Reg dest = ra_dest(as, ir, RSET_GPR); ++ Reg idx, base; ++ if (irref_isk(ir->op2)) { ++ IRRef tab = IR(ir->op1)->op1; ++ int32_t ofs = asm_fuseabase(as, tab); ++ IRRef refa = ofs ? tab : ir->op1; ++ ofs += 8*IR(ir->op2)->i; ++ if (checki12(ofs)) { ++ base = ra_alloc1(as, refa, RSET_GPR); ++ emit_dji(as, LOONGI_ADDI_D, dest, base, ofs&0xfff); ++ return; ++ } ++ } ++ base = ra_alloc1(as, ir->op1, RSET_GPR); ++ idx = ra_alloc1(as, ir->op2, rset_exclude(RSET_GPR, base)); ++ emit_djk(as, LOONGI_ADD_D, dest, RID_TMP, base); ++ emit_dju(as, LOONGI_SLLI_D, RID_TMP, idx, 3); ++} ++ ++/* Inlined hash lookup. Specialized for key type and for const keys. ++** The equivalent C code is: ++** Node *n = hashkey(t, key); ++** do { ++** if (lj_obj_equal(&n->key, key)) return &n->val; ++** } while ((n = nextnode(n))); ++** return niltv(L); ++*/ ++static void asm_href(ASMState *as, IRIns *ir, IROp merge) ++{ ++ RegSet allow = RSET_GPR; ++ int destused = ra_used(ir); ++ Reg dest = ra_dest(as, ir, allow); ++ Reg tab = ra_alloc1(as, ir->op1, rset_clear(allow, dest)); ++ Reg key = RID_NONE, type = RID_NONE, tmpnum = RID_NONE, tmp1, tmp2; ++ Reg cmp64 = RID_NONE; ++ IRRef refkey = ir->op2; ++ IRIns *irkey = IR(refkey); ++ int isk = irref_isk(refkey); ++ IRType1 kt = irkey->t; ++ uint32_t khash; ++ MCLabel l_end, l_loop, l_next; ++ rset_clear(allow, tab); ++ tmp1 = ra_scratch(as, allow); ++ rset_clear(allow, tmp1); ++ tmp2 = ra_scratch(as, allow); ++ rset_clear(allow, tmp2); ++ ++ if (irt_isnum(kt)) { ++ key = ra_alloc1(as, refkey, RSET_FPR); ++ tmpnum = ra_scratch(as, rset_exclude(RSET_FPR, key)); ++ } else { ++ /* Allocate cmp64 register used for 64-bit comparisons */ ++ if (!isk && irt_isaddr(kt)) { ++ cmp64 = tmp2; ++ } else { ++ int64_t k; ++ if (isk && irt_isaddr(kt)) { ++ k = ((int64_t)irt_toitype(kt) << 47) | irkey[1].tv.u64; ++ } else { ++ lj_assertA(irt_ispri(kt) && !irt_isnil(kt), "bad HREF key type"); ++ k = ~((int64_t)~irt_toitype(kt) << 47); ++ } ++ cmp64 = ra_allock(as, k, allow); ++ rset_clear(allow, cmp64); ++ } ++ if (!irt_ispri(kt)) { ++ key = ra_alloc1(as, refkey, allow); ++ rset_clear(allow, key); ++ } ++ } ++ ++ /* Key not found in chain: jump to exit (if merged) or load niltv. */ ++ l_end = emit_label(as); ++ as->invmcp = NULL; ++ if (merge == IR_NE) ++ asm_guard(as, LOONGI_BEQ, RID_ZERO, RID_ZERO); ++ else if (destused) ++ emit_loada(as, dest, niltvg(J2G(as->J))); ++ ++ /* Follow hash chain until the end. */ ++ l_loop = --as->mcp; ++ emit_move(as, dest, tmp1); ++ emit_dji(as, LOONGI_LD_D, tmp1, dest, (int32_t)offsetof(Node, next)&0xfff); ++ l_next = emit_label(as); ++ ++ /* Type and value comparison. */ ++ if (merge == IR_EQ) { /* Must match asm_guard(). */ ++ l_end = asm_exitstub_addr(as); ++ } ++ if (irt_isnum(kt)) { ++ emit_branch21(as, LOONGI_BCNEZ, 0, l_end); ++ emit_dj32i(as, RID_TMP, RID_ZERO, as->snapno); ++ emit_djk(as, LOONGI_FCMP_CEQ_D, 0, tmpnum, key); ++ emit_branch(as, LOONGI_BEQ, tmp1, RID_ZERO, l_next); ++ emit_dju(as, LOONGI_SLTUI, tmp1, tmp1, ((int32_t)LJ_TISNUM)&0xfff); ++ emit_dju(as, LOONGI_SRAI_D, tmp1, tmp1, 47); ++ emit_dj(as, LOONGI_MOVGR2FR_D, tmpnum, tmp1); ++ } else { ++ emit_branch(as, LOONGI_BEQ, tmp1, cmp64, l_end); ++ emit_dj32i(as, RID_TMP, RID_ZERO, as->snapno); ++ } ++ emit_dji(as, LOONGI_LD_D, tmp1, dest, (int32_t)offsetof(Node, key.u64)&0xfff); ++ *l_loop = LOONGI_BNE | LOONGF_J(tmp1) | LOONGF_D(RID_ZERO) | LOONGF_I(((as->mcp-l_loop) & 0xffffu)); ++ if (!isk && irt_isaddr(kt)) { ++ type = ra_allock(as, (int64_t)irt_toitype(kt) << 47, allow); ++ emit_djk(as, LOONGI_ADD_D, tmp2, key, type); ++ rset_clear(allow, type); ++ } ++ ++ /* Load main position relative to tab->node into dest. */ ++ khash = isk ? ir_khash(as, irkey) : 1; ++ if (khash == 0) { ++ emit_dji(as, LOONGI_LD_D, dest, tab, (int32_t)offsetof(GCtab, node)&0xfff); ++ } else { ++ Reg tmphash = tmp1; ++ if (isk) ++ tmphash = ra_allock(as, khash, allow); ++ /* node = tab->node + (idx*32-idx*8) */ ++ emit_djk(as, LOONGI_ADD_D, dest, dest, tmp1); ++ lj_assertA(sizeof(Node) == 24, "bad Node size"); ++ emit_djk(as, LOONGI_SUB_W, tmp1, tmp2, tmp1); ++ emit_dju(as, LOONGI_SLLI_W, tmp1, tmp1, 3); ++ emit_dju(as, LOONGI_SLLI_W, tmp2, tmp1, 5); ++ emit_djk(as, LOONGI_AND, tmp1, tmp2, tmphash); // idx = hi & tab->hmask ++ emit_dji(as, LOONGI_LD_D, dest, tab, ((int32_t)offsetof(GCtab, node))&0xfff); ++ emit_dji(as, LOONGI_LD_W, tmp2, tab, ((int32_t)offsetof(GCtab, hmask))&0xfff); ++ if (isk) { ++ /* Nothing to do. */ ++ } else if (irt_isstr(kt)) { ++ emit_dji(as, LOONGI_LD_W, tmp1, key, ((int32_t)offsetof(GCstr, sid))&0xfff); ++ } else { /* Must match with hash*() in lj_tab.c. */ ++ emit_djk(as, LOONGI_SUB_W, tmp1, tmp1, tmp2); ++ emit_dju(as, LOONGI_ROTRI_W, tmp2, tmp2, (-HASH_ROT3)&0x1f); ++ emit_djk(as, LOONGI_XOR, tmp1, tmp2, tmp1); ++ emit_dju(as, LOONGI_ROTRI_W, tmp1, tmp1, (-HASH_ROT2-HASH_ROT1)&0x1f); ++ emit_djk(as, LOONGI_SUB_W, tmp2, tmp2, dest); ++ emit_djk(as, LOONGI_XOR, tmp2, tmp2, tmp1); ++ emit_dju(as, LOONGI_ROTRI_W, dest, tmp1, (-HASH_ROT1)&0x1f); ++ if (irt_isnum(kt)) { ++ emit_dju(as, LOONGI_SLLI_W, tmp1, tmp1, 1); ++ emit_dju(as, LOONGI_SRAI_D, tmp1, tmp1, 32); // hi ++ emit_dju(as, LOONGI_SLLI_W, tmp2, tmp1, 0); // lo ++ emit_dj(as, LOONGI_MOVFR2GR_D, tmp1, key); ++ } else { ++ checkmclim(as); ++ emit_dju(as, LOONGI_SRAI_D, tmp1, tmp1, 32); // hi ++ emit_dju(as, LOONGI_SLLI_W, tmp2, key, 0); // lo ++ emit_djk(as, LOONGI_ADD_D, tmp1, key, type); ++ } ++ } ++ } ++} ++ ++static void asm_hrefk(ASMState *as, IRIns *ir) ++{ ++ IRIns *kslot = IR(ir->op2); ++ IRIns *irkey = IR(kslot->op1); ++ int32_t ofs = (int32_t)(kslot->op2 * sizeof(Node)); ++ int32_t kofs = ofs + (int32_t)offsetof(Node, key); ++ Reg dest = (ra_used(ir)||ofs > 32736) ? ra_dest(as, ir, RSET_GPR) : RID_NONE; ++ Reg node = ra_alloc1(as, ir->op1, RSET_GPR); ++ RegSet allow = rset_exclude(RSET_GPR, node); ++ Reg idx = node; ++ Reg key = ra_scratch(as, allow); ++ int64_t k; ++ lj_assertA(ofs % sizeof(Node) == 0, "unaligned HREFK slot"); ++ if (ofs > 32736) { ++ idx = dest; ++ rset_clear(allow, dest); ++ kofs = (int32_t)offsetof(Node, key); ++ } else if (ra_hasreg(dest)) { ++ emit_addk(as, dest, node, ofs, allow); ++ } ++ if (irt_ispri(irkey->t)) { ++ lj_assertA(!irt_isnil(irkey->t), "bad HREFK key type"); ++ k = ~((int64_t)~irt_toitype(irkey->t) << 47); ++ } else if (irt_isnum(irkey->t)) { ++ k = (int64_t)ir_knum(irkey)->u64; ++ } else { ++ k = ((int64_t)irt_toitype(irkey->t) << 47) | (int64_t)ir_kgc(irkey); ++ } ++ asm_guard(as, LOONGI_BNE, key, ra_allock(as, k, allow)); ++ emit_lso(as, LOONGI_LD_D, key, idx, kofs, allow); ++ if (ofs > 32736) ++ emit_djk(as, LOONGI_ADD_D, dest, node, ra_allock(as, ofs, allow)); ++} ++ ++static void asm_uref(ASMState *as, IRIns *ir) ++{ ++ Reg dest = ra_dest(as, ir, RSET_GPR); ++ if (irref_isk(ir->op1)) { ++ GCfunc *fn = ir_kfunc(IR(ir->op1)); ++ MRef *v = &gcref(fn->l.uvptr[(ir->op2 >> 8)])->uv.v; ++ emit_lsptr(as, LOONGI_LD_D, dest, v, RSET_GPR); ++ } else { ++ Reg uv = ra_scratch(as, RSET_GPR); ++ Reg func = ra_alloc1(as, ir->op1, RSET_GPR); ++ if (ir->o == IR_UREFC) { ++ Reg tmp = ra_scratch(as, rset_exclude(rset_exclude(RSET_GPR, dest), uv)); ++ asm_guard(as, LOONGI_BEQ, tmp, RID_ZERO); ++ emit_dji(as, LOONGI_ADDI_D, dest, uv, ((int32_t)offsetof(GCupval, tv))&0xfff); ++ emit_dji(as, LOONGI_LD_BU, tmp, uv, ((int32_t)offsetof(GCupval, closed))&0xfff); ++ } else { ++ emit_dji(as, LOONGI_LD_D, dest, uv, ((int32_t)offsetof(GCupval, v))&0xfff); ++ } ++ emit_lso(as, LOONGI_LD_D, uv, func, (int32_t)offsetof(GCfuncL, uvptr) + ++ (int32_t)sizeof(MRef) * (int32_t)(ir->op2 >> 8), RSET_GPR); ++ } ++} ++ ++static void asm_fref(ASMState *as, IRIns *ir) ++{ ++ UNUSED(as); UNUSED(ir); ++ lj_assertA(!ra_used(ir), "unfused FREF"); ++} ++ ++static void asm_strref(ASMState *as, IRIns *ir) ++{ ++ RegSet allow = RSET_GPR; ++ Reg dest = ra_dest(as, ir, allow); ++ Reg base = ra_alloc1(as, ir->op1, allow); ++ IRIns *irr = IR(ir->op2); ++ int32_t ofs = sizeof(GCstr); ++ rset_clear(allow, base); ++ if (irref_isk(ir->op2) && checki12(ofs + irr->i)) { ++ emit_dji(as, LOONGI_ADDI_D, dest, base, (ofs + irr->i)&0xfff); ++ } else { ++ emit_dji(as, LOONGI_ADDI_D, dest, dest, ofs&0xfff); ++ emit_djk(as, LOONGI_ADD_D, dest, base, ra_alloc1(as, ir->op2, allow)); ++ } ++} ++ ++/* -- Loads and stores ---------------------------------------------------- */ ++ ++static LOONGIns asm_fxloadins(ASMState *as, IRIns *ir) ++{ ++ UNUSED(as); ++ switch (irt_type(ir->t)) { ++ case IRT_I8: ++ return LOONGI_LD_B; ++ case IRT_U8: ++ return LOONGI_LD_BU; ++ case IRT_I16: ++ return LOONGI_LD_H; ++ case IRT_U16: ++ return LOONGI_LD_HU; ++ case IRT_NUM: ++ lj_assertA(!LJ_SOFTFP32, "unsplit FP op"); ++ return LOONGI_FLD_D; ++ /* fallthrough */ ++ case IRT_FLOAT: ++ return LOONGI_FLD_S; ++ /* fallthrough */ ++ default: ++ return irt_is64(ir->t) ? LOONGI_LD_D : LOONGI_LD_W; ++ } ++} ++ ++static LOONGIns asm_fxstoreins(ASMState *as, IRIns *ir) ++{ ++ UNUSED(as); ++ switch (irt_type(ir->t)) { ++ case IRT_I8: case IRT_U8: return LOONGI_ST_B; ++ case IRT_I16: case IRT_U16: return LOONGI_ST_H; ++ case IRT_NUM: ++ lj_assertA(!LJ_SOFTFP32, "unsplit FP op"); ++ if (!LJ_SOFTFP) return LOONGI_FST_D; ++ /* fallthrough */ ++ case IRT_FLOAT: return LOONGI_FST_S; ++ /* fallthrough */ ++ default: return (LJ_64 && irt_is64(ir->t)) ? LOONGI_ST_D : LOONGI_ST_W; ++ } ++} ++ ++static void asm_fload(ASMState *as, IRIns *ir) ++{ ++ RegSet allow = RSET_GPR; ++ Reg idx, dest = ra_dest(as, ir, allow); ++ rset_clear(allow, dest); ++ LOONGIns loongi = asm_fxloadins(as, ir); ++ int32_t ofs; ++ if (ir->op1 == REF_NIL) { /* FLOAD from GG_State with offset. */ ++ idx = ra_allock(as, (int64_t)J2GG(as->J), allow); ++ ofs = (int32_t)(ir->op2<<2); ++ } else { ++ idx = ra_alloc1(as, ir->op1, allow); ++ if (ir->op2 == IRFL_TAB_ARRAY) { ++ ofs = asm_fuseabase(as, ir->op1); ++ if (ofs) { /* Turn the t->array load into an add for colocated arrays. */ ++ emit_dji(as, LOONGI_ADDI_D, dest, idx, ofs); ++ return; ++ } ++ } ++ ofs = field_ofs[ir->op2]; ++ lj_assertA(!irt_isfp(ir->t), "bad FP FLOAD"); ++ } ++ rset_clear(allow, idx); ++ emit_lso(as, loongi, dest, idx, ofs, allow); ++} ++ ++static void asm_fstore(ASMState *as, IRIns *ir) ++{ ++ if (ir->r == RID_SINK) ++ return; ++ Reg src = ra_alloc1(as, ir->op2, RSET_GPR); ++ IRIns *irf = IR(ir->op1); ++ Reg idx = ra_alloc1(as, irf->op1, rset_exclude(RSET_GPR, src)); ++ int32_t ofs = field_ofs[irf->op2]; ++ lj_assertA(!irt_isfp(ir->t), "bad FP FSTORE"); ++ emit_dji(as, asm_fxstoreins(as, ir), src, idx, ofs&0xfff); ++} ++ ++static void asm_xload(ASMState *as, IRIns *ir) ++{ ++ Reg dest = ra_dest(as, ir, (irt_isfp(ir->t)) ? RSET_FPR : RSET_GPR); ++ lj_assertA(LJ_TARGET_UNALIGNED || !(ir->op2 & IRXLOAD_UNALIGNED), ++ "unaligned XLOAD"); ++ asm_fusexref(as, asm_fxloadins(as, ir), dest, ir->op1, RSET_GPR, 0); ++} ++ ++static void asm_xstore_(ASMState *as, IRIns *ir, int32_t ofs) ++{ ++ if (ir->r == RID_SINK) ++ return; ++ Reg src = ra_alloc1(as, ir->op2, irt_isfp(ir->t) ? RSET_FPR : RSET_GPR); ++ asm_fusexref(as, asm_fxstoreins(as, ir), src, ir->op1, ++ rset_exclude(RSET_GPR, src), ofs); ++} ++ ++#define asm_xstore(as, ir) asm_xstore_(as, ir, 0) ++ ++static void asm_ahuvload(ASMState *as, IRIns *ir) ++{ ++ Reg dest = RID_NONE, type, idx; ++ RegSet allow = RSET_GPR; ++ int32_t ofs = 0; ++ IRType1 t = ir->t; ++ ++ type = ra_scratch(as, allow); ++ rset_clear(allow, type); ++ ++ if (ra_used(ir)) { ++ lj_assertA((irt_isnum(ir->t)) || irt_isint(ir->t) || irt_isaddr(ir->t), ++ "bad load type %d", irt_type(ir->t)); ++ dest = ra_dest(as, ir, irt_isnum(t) ? RSET_FPR : allow); ++ rset_clear(allow, dest); ++ if (irt_isaddr(t)) ++ emit_djml(as, LOONGI_BSTRPICK_D, dest, dest, 46, 0); ++ else if (irt_isint(t)) ++ emit_dju(as, LOONGI_SLLI_W, dest, dest, 0); ++ } ++ idx = asm_fuseahuref(as, ir->op1, &ofs, allow); ++ if (ir->o == IR_VLOAD) ofs += 8 * ir->op2; ++ rset_clear(allow, idx); ++ if (irt_isnum(t)) { ++ Reg tmp2 = ra_scratch(as, allow); ++ asm_guard(as, LOONGI_BEQ, tmp2, RID_ZERO); ++ emit_dju(as, LOONGI_SLTUI, tmp2, type, ((int32_t)LJ_TISNUM)&0xfff); ++ } else { ++ asm_guard(as, LOONGI_BNE, type, ++ ra_allock(as, (int32_t)irt_toitype(t), allow)); ++ } ++ if (ra_hasreg(dest)) { ++ if (irt_isnum(t)) { ++ emit_lso(as, LOONGI_FLD_D, dest, idx, ofs, allow); ++ dest = type; ++ } ++ } else { ++ dest = type; ++ } ++ emit_dju(as, LOONGI_SRAI_D, type, dest, 47); ++ emit_lso(as, LOONGI_LD_D, dest, idx, ofs, allow); ++} ++ ++static void asm_ahustore(ASMState *as, IRIns *ir) ++{ ++ RegSet allow = RSET_GPR; ++ Reg idx, src = RID_NONE, type = RID_NONE; ++ int32_t ofs = 0; ++ if (ir->r == RID_SINK) ++ return; ++ if (irt_isnum(ir->t)) { ++ src = ra_alloc1(as, ir->op2, RSET_FPR); ++ idx = asm_fuseahuref(as, ir->op1, &ofs, allow); ++ emit_lso(as, LOONGI_FST_D, src, idx, ofs, allow); ++ } else { ++ Reg tmp = RID_TMP; ++ if (irt_ispri(ir->t)) { ++ tmp = ra_allock(as, ~((int64_t)~irt_toitype(ir->t) << 47), allow); ++ rset_clear(allow, tmp); ++ } else { ++ src = ra_alloc1(as, ir->op2, allow); ++ rset_clear(allow, src); ++ type = ra_allock(as, (int64_t)irt_toitype(ir->t) << 47, allow); ++ rset_clear(allow, type); ++ } ++ idx = asm_fuseahuref(as, ir->op1, &ofs, allow); ++ emit_lso(as, LOONGI_ST_D, tmp, idx, ofs, allow); ++ if (ra_hasreg(src)) { ++ if (irt_isinteger(ir->t)) { ++ emit_djk(as, LOONGI_ADD_D, tmp, tmp, type); ++ emit_djml(as, LOONGI_BSTRPICK_D, tmp, src, 31, 0); ++ } else { ++ emit_djk(as, LOONGI_ADD_D, tmp, src, type); ++ } ++ } ++ } ++} ++ ++static void asm_sload(ASMState *as, IRIns *ir) ++{ ++ Reg dest = RID_NONE, type = RID_NONE, base; ++ RegSet allow = RSET_GPR; ++ IRType1 t = ir->t; ++ int32_t ofs = 8*((int32_t)ir->op1-2); ++ lj_assertA(!(ir->op2 & IRSLOAD_PARENT), ++ "bad parent SLOAD"); /* Handled by asm_head_side(). */ ++ lj_assertA(irt_isguard(ir->t) || !(ir->op2 & IRSLOAD_TYPECHECK), ++ "inconsistent SLOAD variant"); ++ if ((ir->op2 & IRSLOAD_CONVERT) && irt_isguard(t) && irt_isint(t)) { ++ dest = ra_scratch(as, RSET_FPR); ++ asm_tointg(as, ir, dest); ++ t.irt = IRT_NUM; /* Continue with a regular number type check. */ ++ } else if (ra_used(ir)) { ++ lj_assertA((irt_isnum(ir->t)) || ++ irt_isint(ir->t) || irt_isaddr(ir->t), ++ "bad SLOAD type %d", irt_type(ir->t)); ++ dest = ra_dest(as, ir, irt_isnum(t) ? RSET_FPR : allow); ++ rset_clear(allow, dest); ++ base = ra_alloc1(as, REF_BASE, allow); ++ rset_clear(allow, base); ++ if (ir->op2 & IRSLOAD_CONVERT) { ++ if (irt_isint(t)) { ++ Reg tmp = ra_scratch(as, RSET_FPR); ++ emit_dj(as, LOONGI_MOVFR2GR_S, dest, tmp); ++ emit_dj(as, LOONGI_FTINTRZ_W_D, tmp, tmp); ++ dest = tmp; ++ t.irt = IRT_NUM; /* Check for original type. */ ++ } else { ++ Reg tmp = ra_scratch(as, RSET_GPR); ++ emit_dj(as, LOONGI_FFINT_D_W, dest, dest); ++ emit_dj(as, LOONGI_MOVGR2FR_W, dest, tmp); ++ dest = tmp; ++ t.irt = IRT_INT; /* Check for original type. */ ++ } ++ } else if (irt_isaddr(t)) { ++ /* Clear type from pointers. */ ++ emit_djml(as, LOONGI_BSTRPICK_D, dest, dest, 46, 0); ++ } else if (irt_isint(t) && (ir->op2 & IRSLOAD_TYPECHECK)) { ++ /* Sign-extend integers. */ ++ emit_dju(as, LOONGI_SLLI_W, dest, dest, 0); ++ } ++ goto dotypecheck; ++ } ++ base = ra_alloc1(as, REF_BASE, allow); ++ rset_clear(allow, base); ++dotypecheck: ++ if ((ir->op2 & IRSLOAD_TYPECHECK)) { ++ if (dest < RID_MAX_GPR) { ++ type = dest; ++ } else { ++ type = ra_scratch(as, allow); ++ } ++ rset_clear(allow, type); ++ Reg tmp1 = ra_scratch(as, allow); ++ if (irt_ispri(t)) { ++ asm_guard(as, LOONGI_BNE, type, ++ ra_allock(as, ~((int64_t)~irt_toitype(t) << 47) , allow)); ++ } else if ((ir->op2 & IRSLOAD_KEYINDEX)) { ++ asm_guard(as, LOONGI_BNE, tmp1, ++ ra_allock(as, (int32_t)LJ_KEYINDEX, allow)); ++ emit_dju(as, LOONGI_SRAI_D, tmp1, type, 32); ++ } else { ++ if (irt_isnum(t)) { ++ asm_guard(as, LOONGI_BEQ, tmp1, RID_ZERO); ++ emit_dji(as, LOONGI_SLTUI, tmp1, tmp1, LJ_TISNUM&0xfff); ++ if (ra_hasreg(dest)) { ++ emit_lso(as, LOONGI_FLD_D, dest, base, ofs, allow); ++ } ++ } else { ++ asm_guard(as, LOONGI_BNE, tmp1, ++ ra_allock(as, (int32_t)irt_toitype(t), allow)); ++ } ++ emit_dju(as, LOONGI_SRAI_D, tmp1, type, 47); ++ } ++ emit_lso(as, LOONGI_LD_D, type, base, ofs, allow); ++ } else if (ra_hasreg(dest)) { ++ if (irt_isnum(t)) { ++ emit_lso(as, LOONGI_FLD_D, dest, base, ofs, allow); ++ } else { ++ emit_lso(as, irt_isint(t) ? LOONGI_LD_W : LOONGI_LD_D, dest, base, ofs, allow); ++ } ++ } ++} ++ ++/* -- Allocations --------------------------------------------------------- */ ++ ++#if LJ_HASFFI ++static void asm_cnew(ASMState *as, IRIns *ir) ++{ ++ CTState *cts = ctype_ctsG(J2G(as->J)); ++ CTypeID id = (CTypeID)IR(ir->op1)->i; ++ CTSize sz; ++ CTInfo info = lj_ctype_info(cts, id, &sz); ++ const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_mem_newgco]; ++ IRRef args[4]; ++ RegSet drop = RSET_SCRATCH; ++ lj_assertA(sz != CTSIZE_INVALID || (ir->o == IR_CNEW && ir->op2 != REF_NIL), ++ "bad CNEW/CNEWI operands"); ++ ++ as->gcsteps++; ++ if (ra_hasreg(ir->r)) ++ rset_clear(drop, ir->r); /* Dest reg handled below. */ ++ ra_evictset(as, drop); ++ if (ra_used(ir)) ++ ra_destreg(as, ir, RID_RET); /* GCcdata * */ ++ ++ /* Initialize immutable cdata object. */ ++ if (ir->o == IR_CNEWI) { ++ RegSet allow = (RSET_GPR & ~RSET_SCRATCH); ++ emit_dji(as, sz == 8 ? LOONGI_ST_D : LOONGI_ST_W, ra_alloc1(as, ir->op2, allow), ++ RID_RET, (sizeof(GCcdata))&0xfff); ++ lj_assertA(sz == 4 || sz == 8, "bad CNEWI size %d", sz); ++ } else if (ir->op2 != REF_NIL) { /* Create VLA/VLS/aligned cdata. */ ++ ci = &lj_ir_callinfo[IRCALL_lj_cdata_newv]; ++ args[0] = ASMREF_L; /* lua_State *L */ ++ args[1] = ir->op1; /* CTypeID id */ ++ args[2] = ir->op2; /* CTSize sz */ ++ args[3] = ASMREF_TMP1; /* CTSize align */ ++ asm_gencall(as, ci, args); ++ emit_loadi(as, ra_releasetmp(as, ASMREF_TMP1), (int32_t)ctype_align(info)); ++ return; ++ } ++ ++ /* Initialize gct and ctypeid. lj_mem_newgco() already sets marked. */ ++ emit_dji(as, LOONGI_ST_B, RID_RET+1, RID_RET, (offsetof(GCcdata, gct))&0xfff); ++ emit_dji(as, LOONGI_ST_H, RID_TMP, RID_RET, (offsetof(GCcdata, ctypeid))&0xfff); ++ emit_dji(as, LOONGI_ADDI_D, RID_RET+1, RID_ZERO, ~LJ_TCDATA&0xfff); ++ emit_dj32i(as, RID_TMP, RID_ZERO, id); ++ args[0] = ASMREF_L; /* lua_State *L */ ++ args[1] = ASMREF_TMP1; /* MSize size */ ++ asm_gencall(as, ci, args); ++ ra_allockreg(as, (int32_t)(sz+sizeof(GCcdata)), ra_releasetmp(as, ASMREF_TMP1)); ++} ++#endif ++ ++/* -- Write barriers ------------------------------------------------------ */ ++ ++static void asm_tbar(ASMState *as, IRIns *ir) ++{ ++ Reg tab = ra_alloc1(as, ir->op1, RSET_GPR); ++ Reg mark = ra_scratch(as, rset_exclude(RSET_GPR, tab)); ++ Reg link = RID_TMP; ++ MCLabel l_end = emit_label(as); ++ emit_dji(as, LOONGI_ST_D, link, tab, ((int32_t)offsetof(GCtab, gclist))&0xfff); ++ emit_dji(as, LOONGI_ST_B, mark, tab, ((int32_t)offsetof(GCtab, marked))&0xfff); ++ emit_setgl(as, tab, gc.grayagain); // make tab gray again ++ emit_getgl(as, link, gc.grayagain); ++ emit_branch(as, LOONGI_BEQ, RID_TMP, RID_ZERO, l_end); // black: not jump ++ emit_djk(as, LOONGI_XOR, mark, mark, RID_TMP); // mark=0: gray ++ emit_dju(as, LOONGI_ANDI, RID_TMP, mark, LJ_GC_BLACK); ++ emit_dji(as, LOONGI_LD_BU, mark, tab, ((int32_t)offsetof(GCtab, marked))&0xfff); ++} ++ ++static void asm_obar(ASMState *as, IRIns *ir) ++{ ++ const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_gc_barrieruv]; ++ IRRef args[2]; ++ MCLabel l_end; ++ Reg obj, val, tmp; ++ /* No need for other object barriers (yet). */ ++ lj_assertA(IR(ir->op1)->o == IR_UREFC, "bad OBAR type"); // Closed upvalue ++ ra_evictset(as, RSET_SCRATCH); ++ l_end = emit_label(as); ++ args[0] = ASMREF_TMP1; /* global_State *g */ ++ args[1] = ir->op1; /* TValue *tv */ ++ asm_gencall(as, ci, args); ++ obj = IR(ir->op1)->r; ++ tmp = ra_scratch(as, rset_exclude(RSET_GPR, obj)); ++ emit_branch(as, LOONGI_BEQ, tmp, RID_ZERO, l_end); ++ emit_addk(as, ra_releasetmp(as, ASMREF_TMP1), RID_JGL, -32768, RSET_GPR); ++ emit_branch(as, LOONGI_BEQ, RID_TMP, RID_ZERO, l_end); // black: jump ++ emit_dju(as, LOONGI_ANDI, tmp, tmp, LJ_GC_BLACK); ++ emit_dju(as, LOONGI_ANDI, RID_TMP, RID_TMP, LJ_GC_WHITES); ++ val = ra_alloc1(as, ir->op2, rset_exclude(RSET_GPR, obj)); ++ emit_dji(as, LOONGI_LD_BU, tmp, obj, ++ ((int32_t)offsetof(GCupval, marked)-(int32_t)offsetof(GCupval, tv))&0xfff); ++ emit_dji(as, LOONGI_LD_BU, RID_TMP, val, ((int32_t)offsetof(GChead, marked))&0xfff); ++} ++ ++/* -- Arithmetic and logic operations ------------------------------------- */ ++ ++static void asm_fparith(ASMState *as, IRIns *ir, LOONGIns loongi) ++{ ++ Reg dest = ra_dest(as, ir, RSET_FPR); ++ Reg right, left = ra_alloc2(as, ir, RSET_FPR); ++ right = (left >> 8); left &= 255; ++ emit_djk(as, loongi, dest, left, right); ++} ++ ++static void asm_fpunary(ASMState *as, IRIns *ir, LOONGIns loongi) ++{ ++ Reg dest = ra_dest(as, ir, RSET_FPR); ++ Reg left = ra_hintalloc(as, ir->op1, dest, RSET_FPR); ++ emit_dj(as, loongi, dest, left); ++} ++ ++static void asm_fpmath(ASMState *as, IRIns *ir) ++{ ++ IRFPMathOp fpm = (IRFPMathOp)ir->op2; ++ if (fpm <= IRFPM_TRUNC) ++ asm_callround(as, ir, IRCALL_lj_vm_floor + fpm); ++ else if (fpm == IRFPM_SQRT) ++ asm_fpunary(as, ir, LOONGI_FSQRT_D); ++ else ++ asm_callid(as, ir, IRCALL_lj_vm_floor + fpm); ++} ++ ++static void asm_add(ASMState *as, IRIns *ir) ++{ ++ IRType1 t = ir->t; ++ if (irt_isnum(t)) { ++ if (!asm_fusemadd(as, ir, LOONGI_FMADD_D, LOONGI_FMADD_D)) ++ asm_fparith(as, ir, LOONGI_FADD_D); ++ return; ++ } else { ++ Reg dest = ra_dest(as, ir, RSET_GPR); ++ Reg left = ra_hintalloc(as, ir->op1, dest, RSET_GPR); ++ if (irref_isk(ir->op2)) { ++ intptr_t k = get_kval(as, ir->op2); ++ if (LOONGF_S_OK(k, 12)) { // si12 ++ if (irt_is64(t)) { ++ emit_dji(as, LOONGI_ADDI_D, dest, left, k&0xfff); ++ } else { ++ emit_dji(as, LOONGI_ADDI_W, dest, left, k&0xfff); ++ } ++ return; ++ } ++ } ++ Reg right = ra_alloc1(as, ir->op2, rset_exclude(RSET_GPR, left)); ++ emit_djk(as, irt_is64(t) ? LOONGI_ADD_D : LOONGI_ADD_W, dest, ++ left, right); ++ } ++} ++ ++static void asm_sub(ASMState *as, IRIns *ir) ++{ ++ if (irt_isnum(ir->t)) { ++ if (!asm_fusemadd(as, ir, LOONGI_FMSUB_D, LOONGI_FNMSUB_D)) ++ asm_fparith(as, ir, LOONGI_FSUB_D); ++ return; ++ } else { ++ Reg dest = ra_dest(as, ir, RSET_GPR); ++ Reg right, left = ra_alloc2(as, ir, RSET_GPR); ++ right = (left >> 8); left &= 255; ++ emit_djk(as, irt_is64(ir->t) ? LOONGI_SUB_D : LOONGI_SUB_W, dest, ++ left, right); ++ } ++} ++ ++static void asm_mul(ASMState *as, IRIns *ir) ++{ ++ if (irt_isnum(ir->t)) { ++ asm_fparith(as, ir, LOONGI_FMUL_D); ++ } else ++ { ++ Reg dest = ra_dest(as, ir, RSET_GPR); ++ Reg right, left = ra_alloc2(as, ir, RSET_GPR); ++ right = (left >> 8); left &= 255; ++ if (irt_is64(ir->t)) { ++ emit_djk(as, LOONGI_MUL_D, dest, left, right); ++ } else { ++ emit_djk(as, LOONGI_MUL_W, dest, left, right); ++ } ++ } ++} ++ ++static void asm_fpdiv(ASMState *as, IRIns *ir) ++{ ++ asm_fparith(as, ir, LOONGI_FDIV_D); ++} ++ ++static void asm_neg(ASMState *as, IRIns *ir) ++{ ++ if (irt_isnum(ir->t)) { ++ asm_fpunary(as, ir, LOONGI_FNEG_D); ++ } else { ++ Reg dest = ra_dest(as, ir, RSET_GPR); ++ Reg left = ra_hintalloc(as, ir->op1, dest, RSET_GPR); ++ emit_djk(as, irt_is64(ir->t) ? LOONGI_SUB_D : LOONGI_SUB_W, dest, ++ RID_ZERO, left); ++ } ++} ++ ++#define asm_abs(as, ir) asm_fpunary(as, ir, LOONGI_FABS_D) ++ ++static void asm_arithov(ASMState *as, IRIns *ir) ++{ ++ RegSet allow = RSET_GPR; ++ Reg right, left, tmp, tmp2, dest = ra_dest(as, ir, allow); ++ rset_clear(allow, dest); ++ lj_assertA(!irt_is64(ir->t), "bad usage"); ++ tmp2 = ra_scratch(as, allow); ++ rset_clear(allow, tmp2); ++ if (irref_isk(ir->op2)) { ++ int k = IR(ir->op2)->i; ++ if (ir->o == IR_SUBOV) k = -k; ++ if (LOONGF_S_OK(k, 12)) { /* (dest < left) == (k >= 0 ? 1 : 0) */ ++ left = ra_alloc1(as, ir->op1, allow); ++ asm_guard(as, k >= 0 ? LOONGI_BNE : LOONGI_BEQ, tmp2, RID_ZERO); ++ emit_djk(as, LOONGI_SLT, tmp2, dest, dest == left ? tmp2 : left); ++ emit_dji(as, LOONGI_ADDI_D, dest, left, k&0xfff); ++ if (dest == left) emit_move(as, tmp2, left); ++ return; ++ } ++ } ++ left = ra_alloc2(as, ir, allow); ++ right = (left >> 8); left &= 255; ++ rset_clear(allow, right); ++ rset_clear(allow, left); ++ tmp = ra_scratch(as, allow); ++ asm_guard(as, LOONGI_BLT, tmp2, RID_ZERO); ++ emit_djk(as, LOONGI_AND, tmp2, RID_TMP, tmp); ++ if (ir->o == IR_ADDOV) { /* ((dest^left) & (dest^right)) < 0 */ ++ emit_djk(as, LOONGI_XOR, RID_TMP, dest, dest == right ? RID_TMP : right); ++ } else { /* ((dest^left) & (dest^~right)) < 0 */ ++ emit_djk(as, LOONGI_XOR, RID_TMP, RID_TMP, dest); ++ emit_djk(as, LOONGI_NOR, RID_TMP, dest == right ? RID_TMP : right, RID_ZERO); ++ } ++ emit_djk(as, LOONGI_XOR, tmp, dest, dest == left ? RID_TMP : left); ++ emit_djk(as, ir->o == IR_ADDOV ? LOONGI_ADD_W : LOONGI_SUB_W, dest, left, right); ++ if (dest == left || dest == right) ++ emit_move(as, RID_TMP, dest == left ? left : right); ++} ++ ++#define asm_addov(as, ir) asm_arithov(as, ir) ++#define asm_subov(as, ir) asm_arithov(as, ir) ++ ++static void asm_mulov(ASMState *as, IRIns *ir) ++{ ++ Reg dest = ra_dest(as, ir, RSET_GPR); ++ Reg tmp, tmp2, right, left = ra_alloc2(as, ir, RSET_GPR); ++ right = (left >> 8); left &= 255; ++ tmp = ra_scratch(as, rset_exclude(rset_exclude(rset_exclude(RSET_GPR, left), ++ right), dest)); ++ tmp2 = ra_scratch(as, rset_exclude(rset_exclude(rset_exclude(rset_exclude(RSET_GPR, left), ++ right), dest), tmp)); ++ asm_guard(as, LOONGI_BNE, tmp2, tmp); ++ emit_dju(as, LOONGI_SRAI_W, tmp2, dest, 31); ++ emit_djk(as, LOONGI_MUL_W, dest, left, right); // dest: [31:0]+signextend ++ emit_djk(as, LOONGI_MULH_W, tmp, left, right); // tmp: [63:32] ++} ++ ++static void asm_bnot(ASMState *as, IRIns *ir) ++{ ++ Reg left, right, dest = ra_dest(as, ir, RSET_GPR); ++ IRIns *irl = IR(ir->op1); ++ if (mayfuse(as, ir->op1) && irl->o == IR_BOR) { ++ left = ra_alloc2(as, irl, RSET_GPR); ++ right = (left >> 8); left &= 255; ++ } else { ++ left = ra_hintalloc(as, ir->op1, dest, RSET_GPR); ++ right = RID_ZERO; ++ } ++ emit_djk(as, LOONGI_NOR, dest, left, right); ++} ++ ++static void asm_bswap(ASMState *as, IRIns *ir) ++{ ++ Reg dest = ra_dest(as, ir, RSET_GPR); ++ Reg left = ra_alloc1(as, ir->op1, RSET_GPR); ++ if (irt_is64(ir->t)) { ++ emit_dj(as, LOONGI_REVH_D, dest, RID_TMP); ++ emit_dj(as, LOONGI_REVB_4H, RID_TMP, left); ++ } else { ++ emit_dju(as, LOONGI_ROTRI_W, dest, RID_TMP, 16); ++ emit_dj(as, LOONGI_REVB_2H, RID_TMP, left); ++ } ++} ++ ++static void asm_bitop(ASMState *as, IRIns *ir, LOONGIns loongi, LOONGIns loongik) ++{ ++ Reg dest = ra_dest(as, ir, RSET_GPR); ++ Reg right, left = ra_hintalloc(as, ir->op1, dest, RSET_GPR); ++ if (irref_isk(ir->op2)) { ++ intptr_t k = get_kval(as, ir->op2); ++ if (checku12(k)) { ++ emit_dji(as, loongik, dest, left, k&0xfff); ++ return; ++ } ++ } ++ right = ra_alloc1(as, ir->op2, rset_exclude(RSET_GPR, left)); ++ emit_djk(as, loongi, dest, left, right); ++} ++ ++#define asm_band(as, ir) asm_bitop(as, ir, LOONGI_AND, LOONGI_ANDI) ++#define asm_bor(as, ir) asm_bitop(as, ir, LOONGI_OR, LOONGI_ORI) ++#define asm_bxor(as, ir) asm_bitop(as, ir, LOONGI_XOR, LOONGI_XORI) ++ ++static void asm_bitshift(ASMState *as, IRIns *ir, LOONGIns loongi, LOONGIns loongik) ++{ ++ Reg dest = ra_dest(as, ir, RSET_GPR); ++ Reg left = ra_alloc1(as, ir->op1, RSET_GPR); ++ uint32_t shmask = irt_is64(ir->t) ? 63 : 31; ++ if (irref_isk(ir->op2)) { /* Constant shifts. */ ++ uint32_t shift = (uint32_t)(IR(ir->op2)->i & shmask); ++ emit_dju(as, loongik, dest, left, shift); ++ } else { ++ Reg right = ra_alloc1(as, ir->op2, rset_exclude(RSET_GPR, left)); ++ emit_djk(as, loongi, dest, left, right); /* Shift amount is in rs. */ ++ } ++} ++ ++#define asm_bshl(as, ir) (irt_is64(ir->t) ? \ ++ asm_bitshift(as, ir, LOONGI_SLL_D, LOONGI_SLLI_D) : \ ++ asm_bitshift(as, ir, LOONGI_SLL_W, LOONGI_SLLI_W)) ++#define asm_bshr(as, ir) (irt_is64(ir->t) ? \ ++ asm_bitshift(as, ir, LOONGI_SRL_D, LOONGI_SRLI_D) : \ ++ asm_bitshift(as, ir, LOONGI_SRL_W, LOONGI_SRLI_W)) ++#define asm_bsar(as, ir) (irt_is64(ir->t) ? \ ++ asm_bitshift(as, ir, LOONGI_SRA_D, LOONGI_SRAI_D) : \ ++ asm_bitshift(as, ir, LOONGI_SRA_W, LOONGI_SRAI_W)) ++#define asm_brol(as, ir) lj_assertA(0, "unexpected BROL") ++#define asm_bror(as, ir) (irt_is64(ir->t) ? \ ++ asm_bitshift(as, ir, LOONGI_ROTR_D, LOONGI_ROTRI_D) : \ ++ asm_bitshift(as, ir, LOONGI_ROTR_W, LOONGI_ROTRI_W)) ++ ++static void asm_min_max(ASMState *as, IRIns *ir, int ismax) ++{ ++ if (irt_isnum(ir->t)) { ++ Reg dest = ra_dest(as, ir, RSET_FPR); ++ Reg right, left = ra_alloc2(as, ir, RSET_FPR); ++ right = (left >> 8); left &= 255; ++ emit_djk(as, ismax ? LOONGI_FMAX_D : LOONGI_FMIN_D, dest, left, right); ++ } else { ++ Reg dest = ra_dest(as, ir, RSET_GPR); ++ Reg left = ra_hintalloc(as, ir->op1, dest, RSET_GPR); ++ Reg right = ra_alloc1(as, ir->op2, rset_exclude(RSET_GPR, left)); ++ emit_djk(as, LOONGI_OR, dest, dest, RID_TMP); ++ if (dest != right) { ++ emit_djk(as, LOONGI_MASKEQZ, RID_TMP, right, RID_TMP); ++ emit_djk(as, LOONGI_MASKNEZ, dest, left, RID_TMP); ++ } else { ++ emit_djk(as, LOONGI_MASKNEZ, RID_TMP, left, RID_TMP); ++ emit_djk(as, LOONGI_MASKEQZ, dest, right, RID_TMP); ++ } ++ emit_djk(as, LOONGI_SLT, RID_TMP, ++ ismax ? left : right, ismax ? right : left); ++ } ++} ++ ++#define asm_min(as, ir) asm_min_max(as, ir, 0) ++#define asm_max(as, ir) asm_min_max(as, ir, 1) ++ ++/* -- Comparisons --------------------------------------------------------- */ ++ ++/* FP comparisons. */ ++static void asm_fpcomp(ASMState *as, IRIns *ir) ++{ ++ IROp op = ir->o; ++ Reg right, left = ra_alloc2(as, ir, RSET_FPR); ++ right = (left >> 8); left &= 255; ++ asm_guard21(as, (op&1) ? LOONGI_BCNEZ : LOONGI_BCEQZ, 0); ++ switch (op) { ++ case IR_LT: case IR_UGE: ++ emit_djk(as, LOONGI_FCMP_CLT_D, 0, left, right); ++ break; ++ case IR_GE: case IR_ULT: ++ emit_djk(as, LOONGI_FCMP_CULT_D, 0, left, right); ++ break; ++ case IR_LE: case IR_UGT: case IR_ABC: ++ emit_djk(as, LOONGI_FCMP_CLE_D, 0, left, right); ++ break; ++ case IR_ULE: case IR_GT: ++ emit_djk(as, LOONGI_FCMP_CULE_D, 0, left, right); ++ break; ++ case IR_EQ: case IR_NE: ++ emit_djk(as, LOONGI_FCMP_CEQ_D, 0, left, right); ++ break; ++ default: ++ break; ++ } ++} ++ ++/* Integer comparisons. */ ++static void asm_intcomp(ASMState *as, IRIns *ir) ++{ ++ /* ORDER IR: LT GE LE GT ULT UGE ULE UGT. */ ++ /* 00 01 10 11 100 101 110 111 */ ++ IROp op = ir->o; ++ RegSet allow = RSET_GPR; ++ Reg tmp, right, left = ra_alloc1(as, ir->op1, allow); ++ rset_clear(allow, left); ++ if (op == IR_ABC) op = IR_UGT; ++ if ((op&4) == 0 && irref_isk(ir->op2) && get_kval(as, ir->op2) == 0) { ++ switch (op) { ++ case IR_GT: asm_guard(as, LOONGI_BGE, RID_ZERO, left); break; ++ case IR_LE: asm_guard(as, LOONGI_BLT, RID_ZERO, left); break; ++ case IR_GE: asm_guard(as, LOONGI_BLT, left, RID_ZERO); break; ++ case IR_LT: asm_guard(as, LOONGI_BGE, left, RID_ZERO); break; ++ default: break; ++ } ++ return; ++ } ++ tmp = ra_scratch(as, allow); ++ rset_clear(allow, tmp); ++ if (irref_isk(ir->op2)) { ++ intptr_t k = get_kval(as, ir->op2); ++ if ((op&2)) k++; ++ if (checki12(k)) { ++ asm_guard(as, (op&1) ? LOONGI_BNE : LOONGI_BEQ, tmp, RID_ZERO); ++ emit_dji(as, (op&4) ? LOONGI_SLTUI : LOONGI_SLTI, tmp, left, k&0xfff); ++ return; ++ } ++ } ++ right = ra_alloc1(as, ir->op2, allow); ++ asm_guard(as, ((op^(op>>1))&1) ? LOONGI_BNE : LOONGI_BEQ, tmp, RID_ZERO); ++ emit_djk(as, (op&4) ? LOONGI_SLTU : LOONGI_SLT, ++ tmp, (op&2) ? right : left, (op&2) ? left : right); ++} ++ ++static void asm_comp(ASMState *as, IRIns *ir) ++{ ++ if (irt_isnum(ir->t)) ++ asm_fpcomp(as, ir); ++ else ++ asm_intcomp(as, ir); ++} ++ ++static void asm_equal(ASMState *as, IRIns *ir) ++{ ++ if (irt_isnum(ir->t)) { ++ asm_fpcomp(as, ir); ++ } else { ++ Reg right, left = ra_alloc2(as, ir, RSET_GPR); ++ right = (left >> 8); left &= 255; ++ asm_guard(as, (ir->o & 1) ? LOONGI_BEQ : LOONGI_BNE, left, right); ++ } ++} ++ ++/* -- Split register ops -------------------------------------------------- */ ++ ++/* Hiword op of a split 64 bit op. Previous op must be the loword op. */ ++static void asm_hiop(ASMState *as, IRIns *ir) ++{ ++ /* HIOP is marked as a store because it needs its own DCE logic. */ ++ int uselo = ra_used(ir-1), usehi = ra_used(ir); /* Loword/hiword used? */ ++ if (LJ_UNLIKELY(!(as->flags & JIT_F_OPT_DCE))) uselo = usehi = 1; ++ if (!usehi) return; /* Skip unused hiword op for all remaining ops. */ ++ switch ((ir-1)->o) { ++ case IR_CALLN: ++ case IR_CALLL: ++ case IR_CALLS: ++ case IR_CALLXS: ++ if (!uselo) ++ ra_allocref(as, ir->op1, RID2RSET(RID_RETLO)); /* Mark lo op as used. */ ++ break; ++ default: lj_assertA(0, "bad HIOP for op %d", (ir-1)->o); break; ++ } ++} ++ ++/* -- Profiling ----------------------------------------------------------- */ ++ ++static void asm_prof(ASMState *as, IRIns *ir) ++{ ++ UNUSED(ir); ++ Reg tmp = ra_scratch(as, RSET_GPR); ++ asm_guard(as, LOONGI_BNE, tmp, RID_ZERO); ++ emit_dju(as, LOONGI_ANDI, tmp, tmp, HOOK_PROFILE); ++ emit_lsglptr2(as, LOONGI_LD_BU, tmp, ++ (int32_t)offsetof(global_State, hookmask)); ++} ++ ++/* -- Stack handling ------------------------------------------------------ */ ++ ++/* Check Lua stack size for overflow. Use exit handler as fallback. */ ++static void asm_stack_check(ASMState *as, BCReg topslot, ++ IRIns *irp, RegSet allow, ExitNo exitno) ++{ ++ /* Try to get an unused temp register, otherwise spill/restore RID_RET*. */ ++ Reg tmp, pbase = irp ? (ra_hasreg(irp->r) ? irp->r : RID_TMP) : RID_BASE; ++ ExitNo oldsnap = as->snapno; ++ rset_clear(allow, pbase); ++ as->snapno = exitno; ++ asm_guard(as, LOONGI_BNE, RID_R20, RID_ZERO); ++ as->snapno = oldsnap; ++ if (allow) { ++ tmp = rset_pickbot(allow); ++ ra_modified(as, tmp); ++ } else { // allow == RSET_EMPTY ++ tmp = RID_RET; ++ emit_dji(as, LOONGI_LD_D, tmp, RID_SP, 0); /* Restore tmp1 register. */ ++ } ++ lj_assertA(checki12(8*topslot), "slot offset %d does not fit in si12", 8*topslot); ++ emit_dji(as, LOONGI_SLTUI, RID_R20, RID_R20, (int32_t)(8*topslot)&0xfff); ++ emit_djk(as, LOONGI_SUB_D, RID_R20, tmp, pbase); ++ emit_dji(as, LOONGI_LD_D, tmp, tmp, offsetof(lua_State, maxstack)); ++ if (pbase == RID_TMP) ++ emit_getgl(as, RID_TMP, jit_base); ++ emit_getgl(as, tmp, cur_L); ++ if (allow == RSET_EMPTY) /* Spill temp register. */ ++ emit_dji(as, LOONGI_ST_D, tmp, RID_SP, 0); ++} ++ ++/* Restore Lua stack from on-trace state. */ ++static void asm_stack_restore(ASMState *as, SnapShot *snap) ++{ ++ SnapEntry *map = &as->T->snapmap[snap->mapofs]; ++#ifdef LUA_USE_ASSERT ++ SnapEntry *flinks = &as->T->snapmap[snap_nextofs(as->T, snap)-1-LJ_FR2]; ++#endif ++ MSize n, nent = snap->nent; ++ /* Store the value of all modified slots to the Lua stack. */ ++ for (n = 0; n < nent; n++) { ++ SnapEntry sn = map[n]; ++ BCReg s = snap_slot(sn); ++ int32_t ofs = 8*((int32_t)s-1-LJ_FR2); ++ IRRef ref = snap_ref(sn); ++ IRIns *ir = IR(ref); ++ if ((sn & SNAP_NORESTORE)) ++ continue; ++ if (irt_isnum(ir->t)) { ++ Reg src = ra_alloc1(as, ref, RSET_FPR); ++ emit_dji(as, LOONGI_FST_D, src, RID_BASE, ofs&0xfff); ++ } else { ++ if ((sn & SNAP_KEYINDEX)) { ++ RegSet allow = rset_exclude(RSET_GPR, RID_BASE); ++ int64_t kki = (int64_t)LJ_KEYINDEX << 32; ++ if (irref_isk(ref)) { ++ emit_djk(as, LOONGI_STX_D, ++ ra_allock(as, kki | (int64_t)(uint32_t)ir->i, allow), ++ RID_BASE, RID_R20); ++ emit_d16i(as, RID_R20, ofs); ++ } else { ++ Reg src = ra_alloc1(as, ref, allow); ++ Reg rki = ra_allock(as, kki, rset_exclude(allow, src)); ++ emit_djk(as, LOONGI_STX_D, RID_TMP, RID_BASE, RID_R20); ++ emit_d16i(as, RID_R20, ofs); ++ emit_djk(as, LOONGI_ADD_D, RID_TMP, src, rki); ++ } ++ } else { ++ asm_tvstore64(as, RID_BASE, ofs, ref); ++ } ++ } ++ checkmclim(as); ++ } ++ lj_assertA(map + nent == flinks, "inconsistent frames in snapshot"); ++} ++ ++/* -- GC handling --------------------------------------------------------- */ ++ ++/* Marker to prevent patching the GC check exit. */ ++#define LOONG_NOPATCH_GC_CHECK LOONGI_OR ++ ++/* Check GC threshold and do one or more GC steps. */ ++static void asm_gc_check(ASMState *as) ++{ ++ const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_gc_step_jit]; ++ IRRef args[2]; ++ MCLabel l_end; ++ Reg tmp1, tmp2; ++ ra_evictset(as, RSET_SCRATCH); ++ l_end = emit_label(as); ++ /* Exit trace if in GCSatomic or GCSfinalize. Avoids syncing GC objects. */ ++ asm_guard(as, LOONGI_BNE, RID_RET, RID_ZERO); /* Assumes asm_snap_prep() already done. */ ++ *--as->mcp = LOONG_NOPATCH_GC_CHECK; ++ args[0] = ASMREF_TMP1; /* global_State *g */ ++ args[1] = ASMREF_TMP2; /* MSize steps */ ++ asm_gencall(as, ci, args); ++ tmp1 = ra_releasetmp(as, ASMREF_TMP1); ++ tmp2 = ra_releasetmp(as, ASMREF_TMP2); ++ ra_allockreg(as, (int64_t)(J2G(as->J)), tmp1); ++ emit_loadi(as, tmp2, as->gcsteps); ++ /* Jump around GC step if GC total < GC threshold. */ ++ emit_branch(as, LOONGI_BLTU, RID_TMP, tmp2, l_end); ++ emit_getgl(as, tmp2, gc.threshold); ++ emit_getgl(as, RID_TMP, gc.total); ++ as->gcsteps = 0; ++ checkmclim(as); ++} ++ ++/* -- Loop handling ------------------------------------------------------- */ ++ ++/* Fixup the loop branch. */ ++static void asm_loop_fixup(ASMState *as) ++{ ++ MCode *p = as->mctop; ++ MCode *target = as->mcp; ++ if (as->loopinv) { /* Inverted loop branch? */ ++ /* asm_guard* already inverted the bceqz/bcnez/beq/bne/blt/bge, and patched the final b. */ ++ uint32_t mask = (p[-2] & 0xfc000000) == 0x48000000 ? 0x1fffffu : 0xffffu; ++ ptrdiff_t delta = target - (p - 2); ++ if (mask == 0x1fffffu) { /* BCEQZ BCNEZ*/ ++ p[-2] = p[-2] | LOONGF_I((uint32_t)delta & 0xffffu) | (((uint32_t)delta & 0x1f0000u) >> 16); ++ } else { /* BEQ BNE BLE BGE BLTU BGEU*/ ++ p[-2] |= LOONGF_I(delta & 0xffffu); ++ } ++ if (p[-1] == 0) ++ p[-1] = LOONGI_NOP; ++ } else { ++ /* b */ ++ ptrdiff_t delta = target - (p - 1); ++ p[-1] = LOONGI_B | LOONGF_I(delta & 0xffffu) | ((delta & 0x3ff0000) >> 16); ++ } ++} ++ ++/* Fixup the tail of the loop. */ ++static void asm_loop_tail_fixup(ASMState *as) ++{ ++ UNUSED(as); /* Nothing to do. */ ++} ++ ++/* -- Head of trace ------------------------------------------------------- */ ++ ++/* Coalesce BASE register for a root trace. */ ++static void asm_head_root_base(ASMState *as) ++{ ++ IRIns *ir = IR(REF_BASE); ++ Reg r = ir->r; ++ if (ra_hasreg(r)) { ++ ra_free(as, r); ++ if (rset_test(as->modset, r) || irt_ismarked(ir->t)) ++ ir->r = RID_INIT; /* No inheritance for modified BASE register. */ ++ if (r != RID_BASE) ++ emit_move(as, r, RID_BASE); ++ } ++} ++ ++/* Coalesce BASE register for a side trace. */ ++static Reg asm_head_side_base(ASMState *as, IRIns *irp) ++{ ++ IRIns *ir = IR(REF_BASE); ++ Reg r = ir->r; ++ if (ra_hasreg(r)) { ++ ra_free(as, r); ++ if (rset_test(as->modset, r) || irt_ismarked(ir->t)) ++ ir->r = RID_INIT; /* No inheritance for modified BASE register. */ ++ if (irp->r == r) { ++ return r; /* Same BASE register already coalesced. */ ++ } else if (ra_hasreg(irp->r) && rset_test(as->freeset, irp->r)) { ++ emit_move(as, r, irp->r); /* Move from coalesced parent reg. */ ++ return irp->r; ++ } else { ++ emit_getgl(as, r, jit_base); /* Otherwise reload BASE. */ ++ } ++ } ++ return RID_NONE; ++} ++ ++/* -- Tail of trace ------------------------------------------------------- */ ++ ++/* Fixup the tail code. */ ++static void asm_tail_fixup(ASMState *as, TraceNo lnk) ++{ ++ MCode *target = lnk ? traceref(as->J,lnk)->mcode : (MCode *)lj_vm_exit_interp; ++ int32_t spadj = as->T->spadjust; ++ MCode *p = as->mctop - 1; ++ if (spadj == 0) { ++ p[-1] = LOONGI_NOP; ++ } else { ++ p[-1] = LOONGI_ADDI_D|LOONGF_D(RID_SP)|LOONGF_J(RID_SP)|LOONGF_I(spadj); ++ } ++ ++ MCode *tmp = p; ++ *p = LOONGI_B | LOONGF_I((uintptr_t)(target-tmp)&0xffffu) | (((uintptr_t)(target-tmp)&0x3ff0000u) >> 16); ++} ++ ++/* Prepare tail of code. */ ++static void asm_tail_prep(ASMState *as) ++{ ++ MCode *p = as->mctop - 1; /* Leave room for exit branch. */ ++ if (as->loopref) { ++ as->invmcp = as->mcp = p; ++ } else { ++ as->mcp = p-1; /* Leave room for stack pointer adjustment. */ ++ as->invmcp = NULL; ++ } ++ *p = LOONGI_NOP; /* Prevent load/store merging. */ ++} ++ ++/* -- Trace setup --------------------------------------------------------- */ ++ ++/* Ensure there are enough stack slots for call arguments. */ ++static Reg asm_setup_call_slots(ASMState *as, IRIns *ir, const CCallInfo *ci) ++{ ++ IRRef args[CCI_NARGS_MAX*2]; ++ uint32_t i, nargs = CCI_XNARGS(ci); ++ int nslots = 0, ngpr = REGARG_NUMGPR, nfpr = REGARG_NUMFPR; ++ asm_collectargs(as, ir, ci, args); ++ for (i = 0; i < nargs; i++) { ++ if (args[i] && irt_isfp(IR(args[i])->t)) { ++ if (nfpr > 0) ++ nfpr--; ++ else if (ngpr > 0) ++ ngpr--; ++ else ++ nslots += 2; ++ } else { ++ if (ngpr > 0) ++ ngpr--; ++ else ++ nslots += 2; ++ } ++ } ++ if (nslots > as->evenspill) /* Leave room for args in stack slots. */ ++ as->evenspill = nslots; ++ return REGSP_HINT(RID_RET); ++} ++ ++static void asm_sparejump_setup(ASMState *as) ++{ ++ MCode *mxp = as->mctop; ++ if ((char *)mxp == (char *)as->J->mcarea + as->J->szmcarea) { ++ mxp -= 4*1; ++ as->mctop = mxp; ++ } ++} ++ ++static void asm_setup_target(ASMState *as) ++{ ++ asm_sparejump_setup(as); ++ asm_exitstub_setup(as); ++} ++ ++/* -- Trace patching ------------------------------------------------------ */ ++ ++/* Patch exit jumps of existing machine code to a new target. */ ++void lj_asm_patchexit(jit_State *J, GCtrace *T, ExitNo exitno, MCode *target) ++{ ++ MCode *p = T->mcode; ++ MCode *pe = (MCode *)((char *)p + T->szmcode); ++ MCode *px = exitstub_trace_addr(T, exitno); ++ MCode *cstart = NULL; ++ MCode *mcarea = lj_mcode_patch(J, p, 0); ++ ++ MCode exitload = LOONGI_ADDI_D | LOONGF_D(RID_TMP) | LOONGF_J(RID_ZERO) | LOONGF_I(exitno&0xfff); ++ ++ for (; p < pe; p++) { ++ if (*p == exitload) { ++ /* Look for exitstub branch, replace with branch to target. */ ++ ptrdiff_t delta = target - p - 1; ++ MCode ins = p[1]; ++ if (((ins ^ ((px-p-1)<<10)) & 0x3fffc00) == 0 && ++ ((ins & 0xfc000000u) == LOONGI_BEQ || ++ (ins & 0xfc000000u) == LOONGI_BNE || ++ (ins & 0xfc000000u) == LOONGI_BLT || ++ (ins & 0xfc000000u) == LOONGI_BGE || ++ (ins & 0xfc000000u) == LOONGI_BLTU)) { ++ /* Patch beq/bne/blt/bge, if within range. */ ++ if (p[-1] == LOONG_NOPATCH_GC_CHECK) { ++ /* nothing */ ++ } else if (LOONGF_S_OK(delta, 16)) { ++ p[1] = (ins & 0xfc0003ffu) | LOONGF_I(delta & 0xffff); ++ *p = LOONGI_NOP; ++ if (!cstart) cstart = p + 1; ++ } ++ } else if (((ins ^ ((((px-p-1)&0xffff)<<10) + (((px-p-1)>>10)&0x1f))) & 0x3fffc1f) == 0 && ++ ((ins & 0xfc000000u) == LOONGI_BCEQZ || ++ (ins & 0xfc000100u) == LOONGI_BCNEZ)) { ++ /* Patch bceqz/bcnez, if within range. */ ++ if (p[-1] == LOONG_NOPATCH_GC_CHECK) { ++ /* nothing */ ++ } else if (LOONGF_S_OK(delta, 21)) { ++ p[1] = (ins & 0xfc0003e0u) | LOONGF_I(delta & 0xffff) | ((delta & 0x1f0000) >> 16); ++ *p = LOONGI_NOP; ++ if (!cstart) cstart = p + 1; ++ } ++ } else if (((ins ^ ((((px-p-1)&0xffff)<<10) + (((px-p-1)>>10)&0x3f))) & 0x3ffffff) == 0 && ++ ((ins & 0xfc000000u) == LOONGI_B)) { ++ /* Patch b. */ ++ lj_assertJ(LOONGF_S_OK(delta, 26), "branch target out of range"); ++ p[1] = (ins & 0xfc000000u) | LOONGF_I(delta & 0xffff) | ((delta & 0x3ff0000) >> 16); ++ *p = LOONGI_NOP; ++ if (!cstart) cstart = p + 1; ++ } else if (p+2 == pe){ ++ if (p[2] == LOONGI_NOP) { ++ ptrdiff_t delta = target - &p[2]; ++ lj_assertJ(LOONGF_S_OK(delta, 26), "branch target out of range"); ++ p[2] = LOONGI_B | LOONGF_I(delta & 0xffff) | ((delta & 0x3ff0000) >> 16); ++ *p = LOONGI_NOP; ++ if (!cstart) cstart = p + 2; ++ } ++ } ++ } ++ } ++ if (cstart) lj_mcode_sync(cstart, px+1); ++ lj_mcode_patch(J, mcarea, 1); ++} +diff --git a/src/lj_ccall.c b/src/lj_ccall.c +index e4bed4f84..db4abacdc 100644 +--- a/src/lj_ccall.c ++++ b/src/lj_ccall.c +@@ -778,6 +778,95 @@ + } \ + } + ++#elif LJ_TARGET_LOONGARCH64 ++/* -- LoongArch lp64 calling conventions ---------------------------------------- */ ++ ++#define CCALL_HANDLE_STRUCTRET \ ++ /* Return structs of size > 16 by reference. */ \ ++ cc->retref = !(sz <= 16); \ ++ if (cc->retref) cc->gpr[ngpr++] = (GPRArg)dp; ++ ++#define CCALL_HANDLE_STRUCTRET2 \ ++ unsigned int cl = ccall_classify_struct(cts, ctr); \ ++ if ((cl & 4) && (cl >> 8) <= 2) { \ ++ CTSize i = (cl >> 8) - 1; \ ++ do { ((float *)dp)[i] = cc->fpr[i].f; } while (i--); \ ++ } else { \ ++ if (cl > 1) { \ ++ sp = (uint8_t *)&cc->fpr[0]; \ ++ if ((cl >> 8) > 2) \ ++ sp = (uint8_t *)&cc->gpr[0]; \ ++ } \ ++ memcpy(dp, sp, ctr->size); \ ++ } \ ++ ++#define CCALL_HANDLE_COMPLEXRET \ ++ /* Complex values are returned in 1 or 2 FPRs. */ \ ++ cc->retref = 0; ++ ++#define CCALL_HANDLE_COMPLEXRET2 \ ++ if (ctr->size == 2*sizeof(float)) { /* Copy complex float from FPRs. */ \ ++ ((float *)dp)[0] = cc->fpr[0].f; \ ++ ((float *)dp)[1] = cc->fpr[1].f; \ ++ } else { /* Copy complex double from FPRs. */ \ ++ ((double *)dp)[0] = cc->fpr[0].d; \ ++ ((double *)dp)[1] = cc->fpr[1].d; \ ++ } ++ ++#define CCALL_HANDLE_COMPLEXARG \ ++ /* Pass complex double by reference. */ \ ++ if (sz == 4*sizeof(double)) { \ ++ rp = cdataptr(lj_cdata_new(cts, did, sz)); \ ++ sz = CTSIZE_PTR; \ ++ } else if (sz == 2*sizeof(float)) { \ ++ isfp = 2; \ ++ sz = 2*CTSIZE_PTR; \ ++ } else { \ ++ isfp = 1; \ ++ sz = 2*CTSIZE_PTR; \ ++ } ++ ++#define CCALL_HANDLE_RET \ ++ if (ctype_isfp(ctr->info) && ctr->size == sizeof(float)) \ ++ sp = (uint8_t *)&cc->fpr[0].f; ++ ++#define CCALL_HANDLE_STRUCTARG \ ++ /* Pass structs of size >16 by reference. */ \ ++ unsigned int cl = ccall_classify_struct(cts, d); \ ++ nff = cl >> 8; \ ++ if (sz > 16) { \ ++ rp = cdataptr(lj_cdata_new(cts, did, sz)); \ ++ sz = CTSIZE_PTR; \ ++ } \ ++ /* Pass struct in FPRs. */ \ ++ if (cl > 1) { \ ++ isfp = (cl & 4) ? 2 : 1; \ ++ } ++ ++ ++#define CCALL_HANDLE_REGARG \ ++ if (isfp && (!isva)) { /* Try to pass argument in FPRs. */ \ ++ int n2 = ctype_isvector(d->info) ? 1 : \ ++ isfp == 1 ? n : 2; \ ++ if (nfpr + n2 <= CCALL_NARG_FPR && nff <= 2) { \ ++ dp = &cc->fpr[nfpr]; \ ++ nfpr += n2; \ ++ goto done; \ ++ } else { \ ++ if (ngpr + n2 <= maxgpr) { \ ++ dp = &cc->gpr[ngpr]; \ ++ ngpr += n2; \ ++ goto done; \ ++ } \ ++ } \ ++ } else { /* Try to pass argument in GPRs. */ \ ++ if (ngpr + n <= maxgpr) { \ ++ dp = &cc->gpr[ngpr]; \ ++ ngpr += n; \ ++ goto done; \ ++ } \ ++ } ++ + #else + #error "Missing calling convention definitions for this architecture" + #endif +@@ -1183,6 +1272,53 @@ static unsigned int ccall_classify_struct(CTState *cts, CType *ct) + + #endif + ++/* -- LoongArch64 ABI struct classification ---------------------------- */ ++ ++#if LJ_TARGET_LOONGARCH64 ++ ++static unsigned int ccall_classify_struct(CTState *cts, CType *ct) ++{ ++ CTSize sz = ct->size; ++ unsigned int r = 0, n = 0, isu = (ct->info & CTF_UNION); ++ while (ct->sib) { ++ CType *sct; ++ ct = ctype_get(cts, ct->sib); ++ if (ctype_isfield(ct->info)) { ++ sct = ctype_rawchild(cts, ct); ++ if (ctype_isfp(sct->info)) { ++ r |= sct->size; ++ if (!isu) n++; else if (n == 0) n = 1; ++ } else if (ctype_iscomplex(sct->info)) { ++ r |= (sct->size >> 1); ++ if (!isu) n += 2; else if (n < 2) n = 2; ++ } else if (ctype_isstruct(sct->info)) { ++ goto substruct; ++ } else { ++ goto noth; ++ } ++ } else if (ctype_isbitfield(ct->info)) { ++ goto noth; ++ } else if (ctype_isxattrib(ct->info, CTA_SUBTYPE)) { ++ sct = ctype_rawchild(cts, ct); ++ substruct: ++ if (sct->size > 0) { ++ unsigned int s = ccall_classify_struct(cts, sct); ++ if (s <= 1) goto noth; ++ r |= (s & 255); ++ if (!isu) n += (s >> 8); else if (n < (s >>8)) n = (s >> 8); ++ } ++ } ++ } ++ if ((r == 4 || r == 8) && n <= 4) ++ return r + (n << 8); ++noth: /* Not a homogeneous float/double aggregate. */ ++ return (sz <= 16); /* Return structs of size <= 16 in GPRs. */ ++} ++ ++ ++#endif ++ ++ + /* -- Common C call handling ---------------------------------------------- */ + + /* Infer the destination CTypeID for a vararg argument. */ +@@ -1232,7 +1368,9 @@ static int ccall_set_args(lua_State *L, CTState *cts, CType *ct, + #if LJ_TARGET_RISCV64 + int nff = 0; + #endif +- ++#if LJ_TARGET_LOONGARCH64 ++ int nff = 0; ++#endif + /* Clear unused regs to get some determinism in case of misdeclaration. */ + memset(cc->gpr, 0, sizeof(cc->gpr)); + #if CCALL_NUM_FPR +@@ -1426,7 +1564,7 @@ static int ccall_set_args(lua_State *L, CTState *cts, CType *ct, + if (isfp && d->size == sizeof(float)) + ((uint32_t *)dp)[1] = 0xffffffffu; /* Float NaN boxing */ + #endif +-#if LJ_TARGET_MIPS64 || (LJ_TARGET_ARM64 && LJ_BE) || LJ_TARGET_RISCV64 ++#if LJ_TARGET_MIPS64 || (LJ_TARGET_ARM64 && LJ_BE) || LJ_TARGET_RISCV64 || LJ_TARGET_LOONGARCH64 + if ((ctype_isinteger_or_bool(d->info) || ctype_isenum(d->info) + #if LJ_TARGET_MIPS64 + || (isfp && nsp == 0) +@@ -1474,6 +1612,14 @@ static int ccall_set_args(lua_State *L, CTState *cts, CType *ct, + ((uint64_t *)dp)[i] = 0xffffffff00000000ul | ((uint32_t *)dp)[i]; + } while (i--); + } ++#elif LJ_TARGET_LOONGARCH64 ++ if (isfp == 2 && nff <= 2) { ++ /* Split complex float into separate registers. */ ++ CTSize i = (sz >> 2) - 1; ++ do { ++ ((uint64_t *)dp)[i] = ((uint32_t *)dp)[i]; ++ } while (i--); ++ } + #else + UNUSED(isfp); + #endif +@@ -1483,7 +1629,7 @@ static int ccall_set_args(lua_State *L, CTState *cts, CType *ct, + if ((int32_t)nsp < 0) nsp = 0; + #endif + +-#if LJ_TARGET_X64 || (LJ_TARGET_PPC && !LJ_ABI_SOFTFP) || LJ_TARGET_RISCV64 ++#if LJ_TARGET_X64 || (LJ_TARGET_PPC && !LJ_ABI_SOFTFP) || LJ_TARGET_RISCV64 || LJ_TARGET_LOONGARCH64 + cc->nfpr = nfpr; /* Required for vararg functions. */ + #endif + cc->nsp = (nsp + CTSIZE_PTR-1) & ~(CTSIZE_PTR-1); +diff --git a/src/lj_ccall.h b/src/lj_ccall.h +index 609effa0b..ad2e1e9fe 100644 +--- a/src/lj_ccall.h ++++ b/src/lj_ccall.h +@@ -172,6 +172,21 @@ typedef union FPRArg { + struct { LJ_ENDIAN_LOHI(float f; , float g;) }; + } FPRArg; + ++#elif LJ_TARGET_LOONGARCH64 ++ ++#define CCALL_NARG_GPR 8 ++#define CCALL_NARG_FPR 8 ++#define CCALL_NRET_GPR 2 ++#define CCALL_NRET_FPR 2 ++#define CCALL_SPS_EXTRA 3 ++#define CCALL_SPS_FREE 1 ++ ++typedef intptr_t GPRArg; ++typedef union FPRArg { ++ double d; ++ struct { LJ_ENDIAN_LOHI(float f; , float g;) }; ++} FPRArg; ++ + #else + #error "Missing calling convention definitions for this architecture" + #endif +@@ -219,7 +234,7 @@ typedef LJ_ALIGN(CCALL_ALIGN_CALLSTATE) struct CCallState { + uint8_t resx87; /* Result on x87 stack: 1:float, 2:double. */ + #elif LJ_TARGET_ARM64 + void *retp; /* Aggregate return pointer in x8. */ +-#elif LJ_TARGET_PPC || LJ_TARGET_RISCV64 ++#elif LJ_TARGET_PPC || LJ_TARGET_RISCV64 || LJ_TARGET_LOONGARCH64 + uint8_t nfpr; /* Number of arguments in FPRs. */ + #endif + #if LJ_32 +diff --git a/src/lj_ccallback.c b/src/lj_ccallback.c +index ef9c13ffc..a563cf1e1 100644 +--- a/src/lj_ccallback.c ++++ b/src/lj_ccallback.c +@@ -95,6 +95,10 @@ static MSize CALLBACK_OFS2SLOT(MSize ofs) + + #define CALLBACK_MCODE_HEAD 68 + ++#elif LJ_TARGET_LOONGARCH64 ++ ++#define CALLBACK_MCODE_HEAD 52 ++ + #else + + /* Missing support for this architecture. */ +@@ -330,6 +334,33 @@ static void *callback_mcode_init(global_State *g, uint32_t *page) + } + return p; + } ++#elif LJ_TARGET_LOONGARCH64 ++static void *callback_mcode_init(global_State *g, uint32_t *page) ++{ ++ uint32_t *p = page; ++ uintptr_t target = (uintptr_t)(void *)lj_vm_ffi_callback; ++ uintptr_t ug = (uintptr_t)(void *)g; ++ MSize slot; ++ *p++ = LOONGI_LU12I_W | LOONGF_D(RID_R18) | LOONGF_I20((target >> 12) & 0xfffff); ++ *p++ = LOONGI_LU12I_W | LOONGF_D(RID_R17) | LOONGF_I20((ug >> 12) & 0xfffff); ++ *p++ = LOONGI_ORI | LOONGF_D(RID_R18) | LOONGF_J(RID_R18) | LOONGF_I(target & 0xfff); ++ *p++ = LOONGI_ORI | LOONGF_D(RID_R17) | LOONGF_J(RID_R17) | LOONGF_I(ug & 0xfff); ++ *p++ = LOONGI_LU32I_D | LOONGF_D(RID_R18) | LOONGF_I20((target >> 32) & 0xfffff); ++ *p++ = LOONGI_LU32I_D | LOONGF_D(RID_R17) | LOONGF_I20((ug >> 32) & 0xfffff); ++ *p++ = LOONGI_LU52I_D | LOONGF_D(RID_R18) | LOONGF_J(RID_R18) | LOONGF_I((target >> 52) & 0xfff); ++ *p++ = LOONGI_LU52I_D | LOONGF_D(RID_R17) | LOONGF_J(RID_R17) | LOONGF_I((ug >> 52) & 0xfff); ++ *p++ = LOONGI_NOP; ++ *p++ = LOONGI_NOP; ++ *p++ = LOONGI_NOP; ++ *p++ = LOONGI_NOP; ++ *p++ = LOONGI_JIRL | LOONGF_D(RID_R0) | LOONGF_J(RID_R18) | LOONGF_I(0); ++ for (slot = 0; slot < CALLBACK_MAX_SLOT; slot++) { ++ *p++ = LOONGI_ORI | LOONGF_D(RID_R19) | LOONGF_J(RID_R0) | LOONGF_I(slot & 0xfff); ++ *p = LOONGI_B | LOONGF_I((page-p) & 0xffff) | (((page-p) >> 16) & 0x3ff); ++ p++; ++ } ++ return p; ++} + #else + /* Missing support for this architecture. */ + #define callback_mcode_init(g, p) (p) +@@ -617,6 +648,31 @@ void lj_ccallback_mcode_free(CTState *cts) + if (ngpr < maxgpr) { sp = &cts->cb.gpr[ngpr++]; goto done; } \ + } + ++#elif LJ_TARGET_LOONGARCH64 ++ ++#define CALLBACK_HANDLE_REGARG \ ++ if (isfp) { \ ++ if (nfpr + n <= CCALL_NARG_FPR) { \ ++ sp = &cts->cb.fpr[nfpr]; \ ++ nfpr += n; \ ++ goto done; \ ++ } else if (ngpr + n <= maxgpr) { \ ++ sp = &cts->cb.gpr[ngpr]; \ ++ ngpr += n; \ ++ goto done; \ ++ } \ ++ } else { \ ++ if (ngpr + n <= maxgpr) { \ ++ sp = &cts->cb.gpr[ngpr]; \ ++ ngpr += n; \ ++ goto done; \ ++ } \ ++ } ++ ++#define CALLBACK_HANDLE_RET \ ++ if (ctype_isfp(ctr->info) && ctr->size == sizeof(float)) \ ++ ((float *)dp)[1] = *(float *)dp; ++ + #elif LJ_TARGET_RISCV64 + + #define CALLBACK_HANDLE_REGARG \ +@@ -797,7 +853,7 @@ static void callback_conv_result(CTState *cts, lua_State *L, TValue *o) + *(int64_t *)dp = (int64_t)*(int32_t *)dp; + } + #endif +-#if LJ_TARGET_MIPS64 || (LJ_TARGET_ARM64 && LJ_BE) || LJ_TARGET_RISCV64 ++#if LJ_TARGET_MIPS64 || (LJ_TARGET_ARM64 && LJ_BE) || LJ_TARGET_RISCV64 || LJ_TARGET_LOONGARCH64 + /* Always sign-extend results to 64 bits. Even a soft-fp 'float'. */ + if (ctr->size <= 4 && + (LJ_ABI_SOFTFP || ctype_isinteger_or_bool(ctr->info))) +diff --git a/src/lj_emit_loongarch64.h b/src/lj_emit_loongarch64.h +new file mode 100644 +index 000000000..74a293cc2 +--- /dev/null ++++ b/src/lj_emit_loongarch64.h +@@ -0,0 +1,306 @@ ++/* ++** LoongArch instruction emitter. ++** Copyright (C) 2005-2022 Mike Pall. See Copyright Notice in luajit.h ++*/ ++ ++static intptr_t get_k64val(ASMState *as, IRRef ref) ++{ ++ IRIns *ir = IR(ref); ++ if (ir->o == IR_KINT64) { ++ return (intptr_t)ir_kint64(ir)->u64; ++ } else if (ir->o == IR_KGC) { ++ return (intptr_t)ir_kgc(ir); ++ } else if (ir->o == IR_KPTR || ir->o == IR_KKPTR) { ++ return (intptr_t)ir_kptr(ir); ++ } else { ++ lj_assertA(ir->o == IR_KINT || ir->o == IR_KNULL, ++ "bad 64 bit const IR op %d", ir->o); ++ return ir->i; /* Sign-extended. */ ++ } ++} ++ ++#define get_kval(as, ref) get_k64val(as, ref) ++ ++/* -- Emit basic instructions --------------------------------------------- */ ++ ++static void emit_djk(ASMState *as, LOONGIns loongi, Reg rd, Reg rj, Reg rk) ++{ ++ *--as->mcp = loongi | LOONGF_D(rd & 0x1f) | LOONGF_J(rj & 0x1f) | LOONGF_K(rk & 0x1f); ++} ++ ++#define emit_dj(as, loongi, rd, rj) emit_djk(as, loongi, rd, rj, 0) ++ ++static void emit_di(ASMState *as, LOONGIns loongi, Reg rd, int32_t i) ++{ ++ *--as->mcp = loongi | LOONGF_D(rd & 0x1f) | LOONGF_I20(i & 0xfffff); ++} ++ ++static void emit_dji(ASMState *as, LOONGIns loongi, Reg rd, Reg rj, int32_t i) ++{ ++ *--as->mcp = loongi | LOONGF_D(rd & 0x1f) | LOONGF_J(rj & 0x1f) | LOONGF_I(i); ++} ++ ++static void emit_dju(ASMState *as, LOONGIns loongi, Reg rd, Reg rj, uint32_t u) ++{ ++ *--as->mcp = loongi | LOONGF_D(rd & 0x1f) | LOONGF_J(rj & 0x1f) | LOONGF_I(u); ++} ++ ++#define checki12(x) LOONGF_S_OK(x, 12) ++#define checku12(x) ((x) == ((x) & 0xfff)) ++ ++static Reg ra_allock(ASMState *as, intptr_t k, RegSet allow); ++static void ra_allockreg(ASMState *as, intptr_t k, Reg r); ++static Reg ra_scratch(ASMState *as, RegSet allow); ++ ++static void emit_dj32i(ASMState *as, Reg rd, Reg rj, int32_t i) ++{ ++ if (checki12(i)) { ++ *--as->mcp = LOONGI_ADDI_D | LOONGF_D(rd) | LOONGF_J(rj) | LOONGF_I(i&0xfff); ++ } else { ++ emit_djk(as, LOONGI_ADD_D, rd, RID_R20, rj); ++ emit_dju(as, LOONGI_ORI, RID_R20, RID_R20, i&0xfff); ++ emit_di(as, LOONGI_LU12I_W, RID_R20, (i>>12)&0xfffff); ++ } ++} ++ ++static void emit_d16i(ASMState *as, Reg rd, int32_t i) ++{ ++ emit_dji(as, LOONGI_SRAI_D, rd, rd, 16); ++ emit_dji(as, LOONGI_ADDU16I_D, rd, RID_ZERO, (i&0xffff)); ++} ++ ++static void emit_djml(ASMState *as, LOONGIns loongi, Reg rd, Reg rj, uint32_t m, uint32_t l) ++{ ++ *--as->mcp = loongi | LOONGF_D(rd & 0x1f) | LOONGF_J(rj & 0x1f) | LOONGF_I(l & 0x3f) | LOONGF_M(m & 0x3f); ++} ++ ++static void emit_djka(ASMState *as, LOONGIns loongi, Reg rd, Reg rj, Reg rk, Reg ra) ++{ ++ *--as->mcp = loongi | LOONGF_D(rd & 0x1f) | LOONGF_J(rj & 0x1f) | LOONGF_K(rk & 0x1f) | LOONGF_A(ra & 0x1f); ++} ++ ++static void emit_b_bl(ASMState *as, LOONGIns loongi, uint32_t i) ++{ ++ *--as->mcp = loongi | LOONGF_I(i & 0xffff) | ((i >> 16) & 0x3ff); ++} ++ ++ ++/* -- Emit loads/stores --------------------------------------------------- */ ++ ++/* Prefer rematerialization of BASE/L from global_State over spills. */ ++#define emit_canremat(ref) ((ref) <= REF_BASE) ++ ++ ++/* Load a 32 bit constant into a GPR. */ ++static void emit_loadi(ASMState *as, Reg r, int32_t i) ++{ ++ emit_dj32i(as, r, RID_ZERO, i); ++} ++ ++/* Load a 64 bit constant into a GPR. */ ++static void emit_loadu64(ASMState *as, Reg r, uint64_t u64) ++{ ++ if (checki32((int64_t)u64)) { ++ emit_dj32i(as, r, RID_ZERO, (int32_t)u64); ++ } else { ++ *--as->mcp = LOONGI_LU52I_D | LOONGF_D(r) | LOONGF_J(r) | LOONGF_I((u64>>52)&0xfff); ++ *--as->mcp = LOONGI_LU32I_D | LOONGF_D(r) | LOONGF_I20((u64>>32)&0xfffff); ++ *--as->mcp = LOONGI_ORI | LOONGF_D(r) | LOONGF_J(r) | LOONGF_I(u64&0xfff); ++ *--as->mcp = LOONGI_LU12I_W | LOONGF_D(r) | LOONGF_I20((u64>>12)&0xfffff); ++ } ++} ++ ++#define emit_loada(as, r, addr) emit_loadu64(as, (r), u64ptr((addr))) ++ ++/* Get/set from constant pointer. */ ++static void emit_lsptr(ASMState *as, LOONGIns loongi, Reg r, void *p, RegSet allow) ++{ ++ intptr_t jgl = (intptr_t)(J2G(as->J)); ++ intptr_t i = (intptr_t)(p); ++ Reg base; ++ if ((uint32_t)(i-jgl) < 65536) { ++ i = i-jgl-32768; ++ base = RID_JGL; ++ } else { ++ base = ra_allock(as, i-(int16_t)i, allow); ++ } ++ if (checki12(i)) { ++ emit_dji(as, loongi, r, base, i&0xfff); ++ } ++ else { ++ /* ld.d->ldx.d, fld.d->fldx.d, ld.s->fldx.s */ ++ if (loongi == LOONGI_LD_D) ++ loongi = LOONGI_LDX_D; ++ else if (loongi == LOONGI_FLD_D) ++ loongi = LOONGI_FLDX_D; ++ else if (loongi == LOONGI_FLD_S) ++ loongi = LOONGI_FLDX_S; ++ emit_djk(as, loongi, r, base, RID_R20); ++ ++ /* move i to a GPR */ ++ emit_d16i(as, RID_R20, i); // i&0xffff ++ } ++} ++ ++/* Load 64 bit IR constant into register. */ ++static void emit_loadk64(ASMState *as, Reg r, IRIns *ir) ++{ ++ const uint64_t *k = &ir_k64(ir)->u64; ++ Reg r64 = r; ++ if (rset_test(RSET_FPR, r)) { ++ r64 = RID_TMP; ++ emit_dj(as, LOONGI_MOVGR2FR_D, r, r64); ++ } ++ if ((uint32_t)((intptr_t)k-(intptr_t)J2G(as->J)) < 65536) ++ emit_lsptr(as, LOONGI_LD_D, r64, (void *)k, 0); /*To copy a doubleword from a GPR to an FPR*/ ++ else ++ emit_loadu64(as, r64, *k); ++} ++ ++/* Get/set global_State fields. */ ++static void emit_lsglptr2(ASMState *as, LOONGIns loongi, Reg r, int32_t ofs) ++{ ++ emit_djk(as, loongi, r, RID_JGL, RID_R20); ++ emit_loadi(as, RID_R20, (ofs-32768)); ++} ++ ++#define emit_getgl(as, r, field) \ ++ emit_lsglptr2(as, LOONGI_LDX_D, (r), (int32_t)offsetof(global_State, field)) ++#define emit_setgl(as, r, field) \ ++ emit_lsglptr2(as, LOONGI_STX_D, (r), (int32_t)offsetof(global_State, field)) ++ ++/* Trace number is determined from per-trace exit stubs. */ ++#define emit_setvmstate(as, i) UNUSED(i) ++ ++/* -- Emit control-flow instructions -------------------------------------- */ ++ ++/* Label for internal jumps. */ ++typedef MCode *MCLabel; ++ ++/* Return label pointing to current PC. */ ++#define emit_label(as) ((as)->mcp) ++ ++static void emit_branch(ASMState *as, LOONGIns loongi, Reg rj, Reg rd, MCode *target) ++{ ++ MCode *p = as->mcp; ++ ptrdiff_t delta = target - (p - 1); ++ lj_assertA(((delta + 0x8000) >> 16) == 0, "branch target out of range"); ++ /*BEQ BNE BGE BLZ*/ ++ *--p = loongi | LOONGF_D(rd) | LOONGF_J(rj) | LOONGF_I(((uint32_t)delta & 0xffffu)); ++ as->mcp = p; ++} ++ ++static void emit_branch21(ASMState *as, LOONGIns loongi, Reg rj, MCode *target) ++{ ++ MCode *p = as->mcp; ++ ptrdiff_t delta = target - (p - 1); ++ lj_assertA(((delta + 0x100000) >> 21) == 0, "branch target out of range"); ++ *--p = loongi | LOONGF_J(rj) | LOONGF_I(((uint32_t)delta & 0xffffu)) ++ | (((uint32_t)delta & 0x1f0000u)>>16); /*BEQZ BNEZ BCEQZ BCNEZ*/ ++ as->mcp = p; ++} ++ ++static void emit_jmp(ASMState *as, MCode *target) ++{ ++ MCode *p = as->mcp; ++ ptrdiff_t delta = target - (p - 1); ++ emit_b_bl(as, LOONGI_B, (delta&0x3ffffff)); /*offs 26*/ ++} ++ ++#define emit_move(as, dst, src) \ ++ emit_djk(as, LOONGI_OR, (dst), (src), RID_ZERO) ++ ++static void emit_call(ASMState *as, void *target) ++{ ++ MCode *p = --as->mcp; ++ ptrdiff_t delta = (char *)target - (char *)p; ++ if (LOONGF_S_OK(delta>>2, 26)) { ++ *p = LOONGI_BL | LOONGF_I((delta>>2) & 0xffff) | (((delta>>2) >> 16) & 0x3ff); ++ } else { /* Target out of range: need indirect call. */ ++ Reg r = ra_allock(as, (intptr_t)target, RSET_RANGE(RID_R12, RID_R19+1)); ++ *p = LOONGI_JIRL | LOONGF_D(RID_RA) | LOONGF_J(r) | LOONGF_I(0); ++ } ++} ++ ++/* -- Emit generic operations --------------------------------------------- */ ++ ++/* Generic move between two regs. */ ++static void emit_movrr(ASMState *as, IRIns *ir, Reg dst, Reg src) ++{ ++ if (dst < RID_MAX_GPR && src >= RID_MIN_FPR) ++ emit_dj(as, irt_isnum(ir->t) ? LOONGI_MOVFR2GR_D : LOONGI_MOVFR2GR_S, dst, src); ++ else if (dst < RID_MAX_GPR) ++ emit_move(as, dst, src); ++ else ++ emit_dj(as, irt_isnum(ir->t) ? LOONGI_FMOV_D : LOONGI_FMOV_S, dst, src); ++} ++ ++/* Emit an arithmetic operation with a constant operand. */ ++static void emit_addk(ASMState *as, Reg dest, Reg src, int32_t i, RegSet allow) ++{ ++ if (checki12(i)) { ++ emit_dji(as, LOONGI_ADDI_D, dest, src, i&0xfff); ++ } else { ++ Reg src2 = ra_allock(as, i, allow); ++ emit_djk(as, LOONGI_ADD_D, dest, src, src2); ++ } ++} ++ ++static void emit_lso(ASMState *as, LOONGIns loongi, Reg dest, Reg src, int64_t i, RegSet allow) ++{ ++ if (checki12(i)) { ++ emit_dji(as, loongi, dest, src, i&0xfff); ++ } else { ++ LOONGIns loongk = LOONGI_NOP; ++ switch (loongi) { ++ case LOONGI_LD_D: loongk = LOONGI_LDX_D; break; ++ case LOONGI_LD_W: loongk = LOONGI_LDX_W; break; ++ case LOONGI_ST_D: loongk = LOONGI_STX_D; break; ++ case LOONGI_FLD_D: loongk = LOONGI_FLDX_D; break; ++ case LOONGI_FST_D: loongk = LOONGI_FSTX_D; break; ++ case LOONGI_LD_B: loongk = LOONGI_LDX_B; break; ++ case LOONGI_LD_BU: loongk = LOONGI_LDX_BU; break; ++ case LOONGI_LD_H: loongk = LOONGI_LDX_H; break; ++ case LOONGI_LD_HU: loongk = LOONGI_LDX_HU; break; ++ case LOONGI_FLD_S: loongk = LOONGI_FLDX_S; break; ++ default: break; ++ } ++ //Reg src2 = ra_allock(as, i, allow); ++ Reg src2 = ra_scratch(as, allow); ++ emit_djk(as, loongk, dest, src, src2); ++ emit_d16i(as, src2, i); ++ } ++} ++ ++/* Generic load of register with base and (small) offset address. */ ++static void emit_loadofs(ASMState *as, IRIns *ir, Reg r, Reg base, int32_t ofs) ++{ ++ if (r < RID_MAX_GPR) { ++ emit_djk(as, irt_is64(ir->t) ? LOONGI_LDX_D : LOONGI_LDX_W, r, base, RID_R20); ++ } else { ++ emit_djk(as, irt_isnum(ir->t) ? LOONGI_FLDX_D : LOONGI_FLDX_S, r, base, RID_R20); ++ } ++ emit_d16i(as, RID_R20, ofs); ++} ++ ++/* Generic store of register with base and (small) offset address. */ ++static void emit_storeofs(ASMState *as, IRIns *ir, Reg r, Reg base, int32_t ofs) ++{ ++ if (r < RID_MAX_GPR) { ++ emit_djk(as, irt_is64(ir->t) ? LOONGI_STX_D : LOONGI_STX_W, r, base, RID_R20); ++ } else { ++ emit_djk(as, irt_isnum(ir->t) ? LOONGI_FSTX_D : LOONGI_FSTX_S, (r&31), base, RID_R20); ++ } ++ emit_d16i(as, RID_R20, ofs); ++} ++ ++/* Add offset to pointer. */ ++static void emit_addptr(ASMState *as, Reg r, int32_t ofs) ++{ ++ if (ofs) { ++ emit_addk(as, r, r, ofs, rset_exclude(RSET_GPR, r)); ++ } ++} ++ ++ ++#define emit_spsub(as, ofs) emit_addptr(as, RID_SP, -(ofs)) +diff --git a/src/lj_frame.h b/src/lj_frame.h +index 440e83c36..853572c61 100644 +--- a/src/lj_frame.h ++++ b/src/lj_frame.h +@@ -155,6 +155,15 @@ enum { LJ_CONT_TAILCALL, LJ_CONT_FFI_CALLBACK }; /* Special continuations. */ + #define CFRAME_SIZE (10*8) + #define CFRAME_SIZE_JIT (CFRAME_SIZE + 9*16 + 4*8) + #define CFRAME_SHIFT_MULTRES 0 ++#elif LJ_TARGET_LOONGARCH64 ++#define CFRAME_OFS_ERRF 196 ++#define CFRAME_OFS_NRES 192 ++#define CFRAME_OFS_PREV 184 ++#define CFRAME_OFS_L 176 ++#define CFRAME_OFS_PC 168 ++#define CFRAME_SIZE 200 ++#define CFRAME_OFS_MULTRES 0 ++#define CFRAME_SHIFT_MULTRES 3 + #else + #define CFRAME_OFS_PREV (4*8) + #if LJ_GC64 +diff --git a/src/lj_gdbjit.c b/src/lj_gdbjit.c +index f1a208bd4..f0ffe32bd 100644 +--- a/src/lj_gdbjit.c ++++ b/src/lj_gdbjit.c +@@ -309,6 +309,9 @@ enum { + #elif LJ_TARGET_RISCV64 + DW_REG_SP = 2, + DW_REG_RA = 1, ++#elif LJ_TARGET_LOONGARCH64 ++ DW_REG_SP = 3, ++ DW_REG_RA = 1, + #else + #error "Unsupported target architecture" + #endif +@@ -388,6 +391,8 @@ static const ELFheader elfhdr_template = { + .machine = 8, + #elif LJ_TARGET_RISCV64 + .machine = 243, ++#elif LJ_TARGET_LOONGARCH64 ++ .machine = 258, + #else + #error "Unsupported target architecture" + #endif +@@ -606,6 +611,13 @@ static void LJ_FASTCALL gdbjit_ehframe(GDBJITctx *ctx) + DB(DW_CFA_offset|32|9); DUV(29); + DB(DW_CFA_offset|32|8); DUV(30); + } ++#elif LJ_TARGET_LOONGARCH64 ++ { ++ int i; ++ DB(DW_CFA_offset|30); DUV(2); ++ for (i = 31; i >= 23; i--) { DB(DW_CFA_offset|i); DUV(3+(31-i)); } ++ for (i = 31; i >= 24; i--) { DB(DW_CFA_offset|32|i); DUV(43-i); } ++ } + #else + #error "Unsupported target architecture" + #endif +diff --git a/src/lj_jit.h b/src/lj_jit.h +index 4c3a5dbdf..cc6e243b4 100644 +--- a/src/lj_jit.h ++++ b/src/lj_jit.h +@@ -106,6 +106,10 @@ struct riscv_hwprobe { + + #endif + ++//#elif LJ_TARGET_LOONGARCH64 ++//#define JIT_F_GS464V (JIT_F_CPU << 0) ++//#define JIT_F_CPUSTRING "\6GS464V" ++ + #else + + #define JIT_F_CPUSTRING "" +@@ -407,7 +411,7 @@ enum { + LJ_K64_M2P64_31 = LJ_K64_M2P64, + #endif + #endif +-#if LJ_TARGET_MIPS ++#if LJ_TARGET_MIPS || LJ_TARGET_LOONGARCH64 + LJ_K64_2P31, /* 2^31 */ + #if LJ_64 + LJ_K64_2P63, /* 2^63 */ +@@ -416,7 +420,7 @@ enum { + #endif + LJ_K64__MAX, + }; +-#define LJ_K64__USED (LJ_TARGET_X86ORX64 || LJ_TARGET_MIPS) ++#define LJ_K64__USED (LJ_TARGET_X86ORX64 || LJ_TARGET_MIPS || LJ_TARGET_LOONGARCH64) + + enum { + #if LJ_TARGET_X86ORX64 +@@ -426,16 +430,17 @@ enum { + LJ_K32_2P52_2P31, /* 2^52 + 2^31 */ + LJ_K32_2P52, /* 2^52 */ + #endif +-#if LJ_TARGET_PPC || LJ_TARGET_MIPS ++#if LJ_TARGET_PPC || LJ_TARGET_MIPS || LJ_TARGET_LOONGARCH64 + LJ_K32_2P31, /* 2^31 */ + #endif +-#if LJ_TARGET_MIPS64 ++#if LJ_TARGET_MIPS64 || LJ_TARGET_LOONGARCH64 + LJ_K32_2P63, /* 2^63 */ + LJ_K32_M2P64, /* -2^64 */ + #endif + LJ_K32__MAX + }; +-#define LJ_K32__USED (LJ_TARGET_X86ORX64 || LJ_TARGET_PPC || LJ_TARGET_MIPS) ++#define LJ_K32__USED \ ++ (LJ_TARGET_X86ORX64 || LJ_TARGET_PPC || LJ_TARGET_MIPS || LJ_TARGET_LOONGARCH64) + + /* Get 16 byte aligned pointer to SIMD constant. */ + #define LJ_KSIMD(J, n) \ +diff --git a/src/lj_target.h b/src/lj_target.h +index a79f5d6a0..5a3490ca3 100644 +--- a/src/lj_target.h ++++ b/src/lj_target.h +@@ -55,7 +55,7 @@ typedef uint32_t RegSP; + /* Bitset for registers. 32 registers suffice for most architectures. + ** Note that one set holds bits for both GPRs and FPRs. + */ +-#if LJ_TARGET_PPC || LJ_TARGET_MIPS || LJ_TARGET_ARM64 || LJ_TARGET_RISCV64 ++#if LJ_TARGET_PPC || LJ_TARGET_MIPS || LJ_TARGET_ARM64 || LJ_TARGET_RISCV64 || LJ_TARGET_LOONGARCH64 + typedef uint64_t RegSet; + #define RSET_BITS 6 + #define rset_picktop_(rs) ((Reg)lj_fls64(rs)) +@@ -147,6 +147,8 @@ typedef uint32_t RegCost; + #include "lj_target_s390x.h" + #elif LJ_TARGET_RISCV64 + #include "lj_target_riscv.h" ++#elif LJ_TARGET_LOONGARCH64 ++#include "lj_target_loongarch64.h" + #else + #error "Missing include for target CPU" + #endif +diff --git a/src/lj_target_loongarch64.h b/src/lj_target_loongarch64.h +new file mode 100644 +index 000000000..100f5e876 +--- /dev/null ++++ b/src/lj_target_loongarch64.h +@@ -0,0 +1,313 @@ ++/* ++** Definitions for LoongArch CPUs. ++** Copyright (C) 2005-2022 Mike Pall. See Copyright Notice in luajit.h ++*/ ++ ++#ifndef _LJ_TARGET_LOONGARCH_H ++#define _LJ_TARGET_LOONGARCH_H ++ ++/* -- Registers IDs ------------------------------------------------------- */ ++ ++#define GPRDEF(_) \ ++ _(R0) _(RA) _(R2) _(SP) _(R4) _(R5) _(R6) _(R7) \ ++ _(R8) _(R9) _(R10) _(R11) _(R12) _(R13) _(R14) _(R15) \ ++ _(R16) _(R17) _(R18) _(R19) _(R20) _(X) _(R22) _(R23) \ ++ _(R24) _(R25) _(R26) _(R27) _(R28) _(R29) _(R30) _(R31) ++#define FPRDEF(_) \ ++ _(F0) _(F1) _(F2) _(F3) _(F4) _(F5) _(F6) _(F7) \ ++ _(F8) _(F9) _(F10) _(F11) _(F12) _(F13) _(F14) _(F15) \ ++ _(F16) _(F17) _(F18) _(F19) _(F20) _(F21) _(F22) _(F23) \ ++ _(F24) _(F25) _(F26) _(F27) _(F28) _(F29) _(F30) _(F31) ++#define VRIDDEF(_) ++ ++#define RIDENUM(name) RID_##name, ++ ++enum { ++ GPRDEF(RIDENUM) /* General-purpose registers (GPRs). */ ++ FPRDEF(RIDENUM) /* Floating-point registers (FPRs). */ ++ RID_MAX, ++ RID_ZERO = RID_R0, ++ RID_TMP = RID_RA, ++ ++ /* Calling conventions. */ ++ RID_RET = RID_R4, ++ ++ RID_RETHI = RID_R5, ++ RID_RETLO = RID_R4, ++ ++ RID_FPRET = RID_F0, ++ ++ /* These definitions must match with the *.dasc file(s): */ ++ RID_BASE = RID_R23, /* Interpreter BASE. */ ++ RID_LPC = RID_R25, /* Interpreter PC. */ ++ RID_DISPATCH = RID_R26, /* Interpreter DISPATCH table. */ ++ RID_LREG = RID_R27, /* Interpreter L. */ ++ RID_JGL = RID_R22, /* On-trace: global_State + 32768. */ ++ ++ /* Register ranges [min, max) and number of registers. */ ++ RID_MIN_GPR = RID_R0, ++ RID_MAX_GPR = RID_R31+1, ++ RID_MIN_FPR = RID_MAX_GPR, ++ RID_MAX_FPR = RID_F31+1, ++ RID_NUM_GPR = RID_MAX_GPR - RID_MIN_GPR, ++ RID_NUM_FPR = RID_MAX_FPR - RID_MIN_FPR ++}; ++ ++#define RID_NUM_KREF RID_NUM_GPR ++#define RID_MIN_KREF RID_R0 ++ ++/* -- Register sets ------------------------------------------------------- */ ++ ++/* Make use of all registers, except ZERO, TMP, R2, SP, JGL, R20 and X. */ ++#define RSET_FIXED \ ++ (RID2RSET(RID_ZERO)|RID2RSET(RID_TMP)|RID2RSET(RID_R2)|\ ++ RID2RSET(RID_SP)|RID2RSET(RID_JGL)|RID2RSET(RID_R20)|\ ++ RID2RSET(RID_X)) ++#define RSET_GPR (RSET_RANGE(RID_MIN_GPR, RID_MAX_GPR) - RSET_FIXED) ++#define RSET_FPR RSET_RANGE(RID_MIN_FPR, RID_MAX_FPR) ++#define RSET_ALL (RSET_GPR|RSET_FPR) ++#define RSET_INIT RSET_ALL ++ ++/* scratch register. */ ++#define RSET_SCRATCH_GPR RSET_RANGE(RID_R4, RID_R19+1) ++#define RSET_SCRATCH_FPR RSET_RANGE(RID_F0, RID_F23+1) ++#define RSET_SCRATCH (RSET_SCRATCH_GPR|RSET_SCRATCH_FPR) ++#define REGARG_FIRSTGPR RID_R4 ++#define REGARG_LASTGPR RID_R11 ++#define REGARG_NUMGPR 8 ++#define REGARG_FIRSTFPR RID_F0 ++#define REGARG_LASTFPR RID_F7 ++#define REGARG_NUMFPR 8 ++ ++/* -- Spill slots --------------------------------------------------------- */ ++ ++/* Spill slots are 32 bit wide. An even/odd pair is used for FPRs. ++** ++** SPS_FIXED: Available fixed spill slots in interpreter frame. ++** This definition must match with the *.dasc file(s). ++** ++** SPS_FIRST: First spill slot for general use. ++*/ ++#define SPS_FIXED 4 ++#define SPS_FIRST 4 ++ ++#define SPOFS_TMP 0 ++ ++#define sps_scale(slot) (4 * (int32_t)(slot)) ++#define sps_align(slot) (((slot) - SPS_FIXED + 3) & ~3) ++ ++/* -- Exit state ---------------------------------------------------------- */ ++ ++/* This definition must match with the *.dasc file(s). */ ++typedef struct { ++ lua_Number fpr[RID_NUM_FPR]; /* Floating-point registers. */ ++ intptr_t gpr[RID_NUM_GPR]; /* General-purpose registers. */ ++ int32_t spill[256]; /* Spill slots. */ ++} ExitState; ++ ++/* Highest exit + 1 indicates stack check. */ ++#define EXITSTATE_CHECKEXIT 1 ++ ++/* Return the address of a per-trace exit stub. */ ++static LJ_AINLINE uint32_t *exitstub_trace_addr_(uint32_t *p) ++{ ++ while (*p == 0x03400000) p++; /* Skip LOONGI_NOP. */ ++ return p; ++} ++/* Avoid dependence on lj_jit.h if only including lj_target.h. */ ++#define exitstub_trace_addr(T, exitno) \ ++ exitstub_trace_addr_((MCode *)((char *)(T)->mcode + (T)->szmcode)) ++ ++/* -- Instructions -------------------------------------------------------- */ ++ ++/* Instruction fields. */ ++#define LOONGF_D(r) (r) ++#define LOONGF_J(r) ((r) << 5) ++#define LOONGF_K(r) ((r) << 10) ++#define LOONGF_A(r) ((r) << 15) ++#define LOONGF_I(n) ((n) << 10) ++#define LOONGF_I20(n) ((n) << 5) ++#define LOONGF_M(n) ((n) << 16) ++ ++/* Check for valid field range. */ ++#define LOONGF_S_OK(x, b) ((((x) + (1 << (b-1))) >> (b)) == 0) ++ ++typedef enum LOONGIns { ++/* Integer instructions. */ ++ LOONGI_MOVE = 0x00150000, ++ LOONGI_NOP = 0x03400000, ++ ++ LOONGI_AND = 0x00148000, ++ LOONGI_ANDI = 0x03400000, ++ LOONGI_OR = 0x00150000, ++ LOONGI_ORI = 0x03800000, ++ LOONGI_XOR = 0x00158000, ++ LOONGI_XORI = 0x03c00000, ++ LOONGI_NOR = 0x00140000, ++ ++ LOONGI_SLT = 0x00120000, ++ LOONGI_SLTU = 0x00128000, ++ LOONGI_SLTI = 0x02000000, ++ LOONGI_SLTUI = 0x02400000, ++ ++ LOONGI_ADD_W = 0x00100000, ++ LOONGI_ADDI_W = 0x02800000, ++ LOONGI_SUB_W = 0x00110000, ++ LOONGI_MUL_W = 0x001c0000, ++ LOONGI_MULH_W = 0x001c8000, ++ LOONGI_DIV_W = 0x00200000, ++ LOONGI_DIV_WU = 0x00210000, ++ ++ LOONGI_SLLI_W = 0x00408000, ++ LOONGI_SRLI_W = 0x00448000, ++ LOONGI_SRAI_W = 0x00488000, ++ LOONGI_ROTRI_W = 0x004c8000, ++ LOONGI_ROTRI_D = 0x004d0000, ++ LOONGI_SLL_W = 0x00170000, ++ LOONGI_SRL_W = 0x00178000, ++ LOONGI_SRA_W = 0x00180000, ++ LOONGI_ROTR_W = 0x001b0000, ++ LOONGI_ROTR_D = 0x001b8000, ++ ++ LOONGI_EXT_W_B = 0x00005c00, ++ LOONGI_EXT_W_H = 0x00005800, ++ LOONGI_REVB_2H = 0x00003000, ++ LOONGI_REVB_4H = 0x00003400, ++ ++ LOONGI_ALSL_W = 0x00040000, ++ LOONGI_ALSL_D = 0x002c0000, ++ ++ LOONGI_B = 0x50000000, ++ LOONGI_BL = 0x54000000, ++ LOONGI_JIRL = 0x4c000000, ++ ++ LOONGI_BEQ = 0x58000000, ++ LOONGI_BNE = 0x5c000000, ++ LOONGI_BLT = 0x60000000, ++ LOONGI_BGE = 0x64000000, ++ LOONGI_BGEU = 0x6c000000, ++ LOONGI_BLTU = 0x68000000, ++ LOONGI_BCEQZ = 0x48000000, ++ LOONGI_BCNEZ = 0x48000100, ++ ++ /* Load/store instructions. */ ++ LOONGI_LD_W = 0x28800000, ++ LOONGI_LD_D = 0x28c00000, ++ LOONGI_ST_W = 0x29800000, ++ LOONGI_ST_D = 0x29c00000, ++ LOONGI_LD_B = 0x28000000, ++ LOONGI_ST_B = 0x29000000, ++ LOONGI_LD_H = 0x28400000, ++ LOONGI_ST_H = 0x29400000, ++ LOONGI_LD_BU = 0x2a000000, ++ LOONGI_LD_HU = 0x2a400000, ++ LOONGI_LDX_B = 0x38000000, ++ LOONGI_LDX_BU = 0x38200000, ++ LOONGI_LDX_H = 0x38040000, ++ LOONGI_LDX_HU = 0x38240000, ++ LOONGI_LDX_D = 0x380c0000, ++ LOONGI_STX_D = 0x381c0000, ++ LOONGI_LDX_W = 0x38080000, ++ LOONGI_STX_W = 0x38180000, ++ LOONGI_STX_B = 0x38100000, ++ LOONGI_STX_H = 0x38140000, ++ LOONGI_FLD_S = 0x2b000000, ++ LOONGI_FST_S = 0x2b400000, ++ LOONGI_FLD_D = 0x2b800000, ++ LOONGI_FST_D = 0x2bc00000, ++ LOONGI_FLDX_D = 0x38340000, ++ LOONGI_FLDX_S = 0x38300000, ++ LOONGI_FSTX_D = 0x383c0000, ++ LOONGI_FSTX_S = 0x38380000, ++ ++ LOONGI_ADD_D = 0x00108000, ++ LOONGI_ADDI_D = 0x02c00000, ++ LOONGI_ADDU16I_D = 0x10000000, ++ LOONGI_LU12I_W = 0x14000000, ++ LOONGI_LU32I_D = 0x16000000, ++ LOONGI_LU52I_D = 0x3000000, ++ LOONGI_SUB_D = 0x00118000, ++ LOONGI_DIV_D = 0x00220000, ++ LOONGI_DIV_DU = 0x00230000, ++ LOONGI_MUL_D = 0x001d8000, ++ ++ LOONGI_SLLI_D = 0x00410000, ++ LOONGI_SRLI_D = 0x00450000, ++ LOONGI_SLL_D = 0x00188000, ++ LOONGI_SRL_D = 0x00190000, ++ LOONGI_SRAI_D = 0x00490000, ++ LOONGI_SRA_D = 0x00198000, ++ LOONGI_REVH_D = 0x00004400, ++ ++ /* Extract/insert instructions. */ ++ LOONGI_BSTRPICK_D = 0x00c00000, ++ LOONGI_BSTRINS_D = 0x00800000, ++ ++ LOONGI_MASKEQZ = 0x00130000, ++ LOONGI_MASKNEZ = 0x00138000, ++ ++ /* FP instructions. */ ++ LOONGI_FRINT_S = 0x011e4400, ++ LOONGI_FRINT_D = 0x011e4800, ++ LOONGI_FTINTRM_L_D = 0x011a2800, ++ LOONGI_FTINTRP_L_D = 0x011a6800, ++ LOONGI_FTINTRNE_L_D = 0x011ae800, ++ ++ LOONGI_FMOV_S = 0x01149400, ++ LOONGI_FMOV_D = 0x01149800, ++ ++ LOONGI_FABS_D = 0x01140800, ++ LOONGI_FNEG_D = 0x01141800, ++ ++ LOONGI_FADD_D = 0x01010000, ++ LOONGI_FSUB_D = 0x01030000, ++ LOONGI_FMUL_D = 0x01050000, ++ LOONGI_FDIV_D = 0x01070000, ++ LOONGI_FSQRT_D = 0x01144800, ++ ++ LOONGI_FMIN_D = 0x010b0000, ++ LOONGI_FMAX_D = 0x01090000, ++ ++ LOONGI_FADD_S = 0x01008000, ++ LOONGI_FSUB_S = 0x01028000, ++ ++ LOONGI_FMADD_S = 0x08100000, ++ LOONGI_FMADD_D = 0x08200000, ++ LOONGI_FNMADD_D = 0x08a00000, ++ LOONGI_FMSUB_S = 0x08500000, ++ LOONGI_FMSUB_D = 0x08600000, ++ LOONGI_FNMSUB_D = 0x08e00000, ++ ++ LOONGI_FCVT_D_S = 0x01192400, ++ LOONGI_FTINT_W_S = 0x011b0400, ++ LOONGI_FCVT_S_D = 0x01191800, ++ LOONGI_FTINT_W_D = 0x011b0800, ++ LOONGI_FFINT_S_W = 0x011d1000, ++ LOONGI_FFINT_D_W = 0x011d2000, ++ LOONGI_FFINT_S_L = 0x011d1800, ++ LOONGI_FFINT_D_L = 0x011d2800, ++ ++ LOONGI_FTINTRZ_W_S = 0x011a8400, ++ LOONGI_FTINTRZ_W_D = 0x011a8800, ++ LOONGI_FTINTRZ_L_S = 0x011aa400, ++ LOONGI_FTINTRZ_L_D = 0x011aa800, ++ LOONGI_FTINTRM_W_S = 0x011a0400, ++ LOONGI_FTINTRM_W_D = 0x011a0800, ++ ++ LOONGI_MOVFR2GR_S = 0x0114b400, ++ LOONGI_MOVGR2FR_W = 0x0114a400, ++ LOONGI_MOVGR2FR_D = 0x0114a800, ++ LOONGI_MOVFR2GR_D = 0x0114b800, ++ ++ LOONGI_FCMP_CEQ_D = 0x0c220000, ++ LOONGI_FCMP_CLT_S = 0x0c110000, ++ LOONGI_FCMP_CLT_D = 0x0c210000, ++ LOONGI_FCMP_CLE_D = 0x0c230000, ++ LOONGI_FCMP_CULE_D = 0x0c270000, ++ LOONGI_FCMP_CULT_D = 0x0c250000, ++ LOONGI_FCMP_CNE_D = 0x0c280000, ++ LOONGI_FSEL = 0x0d000000, ++} LOONGIns; ++ ++#endif ++ +diff --git a/src/lj_trace.c b/src/lj_trace.c +index 6dc77e286..4756d4b2b 100644 +--- a/src/lj_trace.c ++++ b/src/lj_trace.c +@@ -334,17 +334,17 @@ void lj_trace_initstate(global_State *g) + J->k64[LJ_K64_2P64].u64 = U64x(43f00000,00000000); + J->k32[LJ_K32_M2P64_31] = LJ_64 ? 0xdf800000 : 0xcf000000; + #endif +-#if LJ_TARGET_X86ORX64 || LJ_TARGET_MIPS64 ++#if LJ_TARGET_X86ORX64 || LJ_TARGET_MIPS64 || LJ_TARGET_LOONGARCH64 + J->k64[LJ_K64_M2P64].u64 = U64x(c3f00000,00000000); + #endif + #if LJ_TARGET_PPC + J->k32[LJ_K32_2P52_2P31] = 0x59800004; + J->k32[LJ_K32_2P52] = 0x59800000; + #endif +-#if LJ_TARGET_PPC || LJ_TARGET_MIPS ++#if LJ_TARGET_PPC || LJ_TARGET_MIPS || LJ_TARGET_LOONGARCH64 + J->k32[LJ_K32_2P31] = 0x4f000000; + #endif +-#if LJ_TARGET_MIPS ++#if LJ_TARGET_MIPS || LJ_TARGET_LOONGARCH64 + J->k64[LJ_K64_2P31].u64 = U64x(41e00000,00000000); + #if LJ_64 + J->k64[LJ_K64_2P63].u64 = U64x(43e00000,00000000); +diff --git a/src/lj_vmmath.c b/src/lj_vmmath.c +index 1ee32d018..7a045f41b 100644 +--- a/src/lj_vmmath.c ++++ b/src/lj_vmmath.c +@@ -70,7 +70,7 @@ double lj_vm_foldarith(double x, double y, int op) + /* -- Helper functions for generated machine code ------------------------- */ + + #if (LJ_HASJIT && !(LJ_TARGET_ARM || LJ_TARGET_ARM64 || LJ_TARGET_PPC)) || LJ_TARGET_MIPS \ +- || LJ_TARGET_RISCV64 ++ || LJ_TARGET_RISCV64 || LJ_TARGET_LOONGARCH64 + int32_t LJ_FASTCALL lj_vm_modi(int32_t a, int32_t b) + { + uint32_t y, ua, ub; +diff --git a/src/vm_loongarch64.dasc b/src/vm_loongarch64.dasc +new file mode 100644 +index 000000000..9458e7e2f +--- /dev/null ++++ b/src/vm_loongarch64.dasc +@@ -0,0 +1,4625 @@ ++|// Low-level VM code for LoongArch CPUs. ++|// Bytecode interpreter, fast functions and helper functions. ++|// Copyright (C) 2005-2022 Mike Pall. See Copyright Notice in luajit.h ++| ++|.arch loongarch64 ++|.section code_op, code_sub ++| ++|.actionlist build_actionlist ++|.globals GLOB_ ++|.globalnames globnames ++|.externnames extnames ++| ++|//----------------------------------------------------------------------- ++| ++|// Fixed register assignments for the interpreter. ++|// Don't use: r0 = 0, r1 = ra, r2 = tp, r3 = sp, r21 = reserved ++| ++| ++|// The following must be C callee-save (but BASE is often refetched). ++|.define BASE, r23 // Base of current Lua stack frame. ++|.define KBASE, r24 // Constants of current Lua function. ++|.define PC, r25 // Next PC. ++|.define DISPATCH, r26 // Opcode dispatch table. ++|.define LREG, r27 // Register holding lua_State (also in SAVE_L). ++|.define MULTRES, r28 // Size of multi-result: (nresults+1)*8. ++| ++|.define JGL, r22 // On-trace: global_State + 32768. ++| ++|// Constants for type-comparisons, stores and conversions. C callee-save. ++|.define TISNIL, r22 ++|.define TISNUM, r29 ++|.define TOBIT, f30 // 2^52 + 2^51. ++| ++|// The following temporaries are not saved across C calls, except for RA. ++|.define RA, r30 // Callee-save. ++|.define RB, r8 ++|.define RC, r9 ++|.define RD, r10 ++|.define INS, r11 ++| ++|.define TMP0, r12 ++|.define TMP1, r13 ++|.define TMP2, r14 ++|.define TMP3, r15 ++|.define TMP4, r17 ++| ++|// Loongarch lp64 calling convention. ++|.define CARG1, r4 ++|.define CARG2, r5 ++|.define CARG3, r6 ++|.define CARG4, r7 ++|.define CARG5, r8 ++|.define CARG6, r9 ++|.define CARG7, r10 ++|.define CARG8, r11 ++| ++|.define CRET1, r4 ++|.define CRET2, r5 ++| ++|.define FARG1, f0 ++|.define FARG2, f1 ++|.define FARG3, f2 ++|.define FARG4, f3 ++|.define FARG5, f4 ++|.define FARG6, f5 ++|.define FARG7, f6 ++|.define FARG8, f7 ++| ++|.define FRET1, f0 ++|.define FRET2, f1 ++| ++|.define FTMP0, f8 ++|.define FTMP1, f9 ++|.define FTMP2, f10 ++|.define FTMP3, f22 ++|.define FTMP4, f23 ++| ++|.define FCC0, fcc0 ++|.define FCC1, fcc1 ++| ++|// Stack layout while in interpreter. Must match with lj_frame.h. ++|// LoongArch64 hard-float. ++| ++|.define CFRAME_SPACE, 200 // Delta for sp. ++| ++|//----- 16 byte aligned, <-- sp entering interpreter ++|.define SAVE_ERRF, 196 // 32 bit values. ++|.define SAVE_NRES, 192 ++|.define SAVE_CFRAME, 184 // 64 bit values. ++|.define SAVE_L, 176 ++|.define SAVE_PC, 168 ++|//----- 16 byte aligned ++|.define SAVE_GPR_, 80 // .. 80+11*8: 64 bit GPR saves. ++|.define SAVE_FPR_, 16 // .. 16+8*8: 64 bit FPR saves. ++| ++| ++|.define TMPD, 0 ++|//----- 16 byte aligned ++| ++|.define TMPD_OFS, 0 ++| ++|//----------------------------------------------------------------------- ++| ++|.macro saveregs ++| addi.d sp, sp, -CFRAME_SPACE ++| st.d ra, SAVE_GPR_+10*8(sp) ++| st.d r22, SAVE_GPR_+9*8(sp) ++| st.d r31, SAVE_GPR_+8*8(sp) ++| fst.d f31, SAVE_FPR_+7*8(sp) ++| st.d r30, SAVE_GPR_+7*8(sp) ++| fst.d f30, SAVE_FPR_+6*8(sp) ++| st.d r29, SAVE_GPR_+6*8(sp) ++| fst.d f29, SAVE_FPR_+5*8(sp) ++| st.d r28, SAVE_GPR_+5*8(sp) ++| fst.d f28, SAVE_FPR_+4*8(sp) ++| st.d r27, SAVE_GPR_+4*8(sp) ++| fst.d f27, SAVE_FPR_+3*8(sp) ++| st.d r26, SAVE_GPR_+3*8(sp) ++| fst.d f26, SAVE_FPR_+2*8(sp) ++| st.d r25, SAVE_GPR_+2*8(sp) ++| fst.d f25, SAVE_FPR_+1*8(sp) ++| st.d r24, SAVE_GPR_+1*8(sp) ++| fst.d f24, SAVE_FPR_+0*8(sp) ++| st.d r23, SAVE_GPR_+0*8(sp) ++|.endmacro ++| ++|.macro restoreregs_ret ++| ld.d ra, SAVE_GPR_+10*8(sp) ++| ld.d r22, SAVE_GPR_+9*8(sp) ++| ld.d r31, SAVE_GPR_+8*8(sp) ++| ld.d r30, SAVE_GPR_+7*8(sp) ++| fld.d f31, SAVE_FPR_+7*8(sp) ++| ld.d r29, SAVE_GPR_+6*8(sp) ++| fld.d f30, SAVE_FPR_+6*8(sp) ++| ld.d r28, SAVE_GPR_+5*8(sp) ++| fld.d f29, SAVE_FPR_+5*8(sp) ++| ld.d r27, SAVE_GPR_+4*8(sp) ++| fld.d f28, SAVE_FPR_+4*8(sp) ++| ld.d r26, SAVE_GPR_+3*8(sp) ++| fld.d f27, SAVE_FPR_+3*8(sp) ++| ld.d r25, SAVE_GPR_+2*8(sp) ++| fld.d f26, SAVE_FPR_+2*8(sp) ++| ld.d r24, SAVE_GPR_+1*8(sp) ++| fld.d f25, SAVE_FPR_+1*8(sp) ++| ld.d r23, SAVE_GPR_+0*8(sp) ++| fld.d f24, SAVE_FPR_+0*8(sp) ++| addi.d sp, sp, CFRAME_SPACE ++| jirl r0, ra, 0 ++|.endmacro ++| ++|//----------------------------------------------------------------------- ++| ++|.macro .STXW, a, b, c ++| addu16i.d r20, r0, c ++| srai.d r20, r20, 16 ++| stx.w a, b, r20 ++|.endmacro ++| ++|.macro .STXD, a, b, c ++| addu16i.d r20, r0, c ++| srai.d r20, r20, 16 ++| stx.d a, b, r20 ++|.endmacro ++| ++|.macro .LDXW, a, b, c ++| addu16i.d r20, r0, c ++| srai.d r20, r20, 16 ++| ldx.w a, b, r20 ++|.endmacro ++| ++|.macro .LDXD, a, b, c ++| addu16i.d r20, r0, c ++| srai.d r20, r20, 16 ++| ldx.d a, b, r20 ++|.endmacro ++| ++|.macro .LDXBU, a, b, c ++| addu16i.d r20, r0, c ++| srai.d r20, r20, 16 ++| ldx.bu a, b, r20 ++|.endmacro ++| ++|.macro .ADD16I, a, b, c ++| addu16i.d r20, r0, c ++| srai.d r20, r20, 16 ++| add.d a, b, r20 ++|.endmacro ++| ++|// Type definitions. Some of these are only used for documentation. ++|.type L, lua_State, LREG ++|.type GL, global_State ++|.type TVALUE, TValue ++|.type GCOBJ, GCobj ++|.type STR, GCstr ++|.type TAB, GCtab ++|.type LFUNC, GCfuncL ++|.type CFUNC, GCfuncC ++|.type PROTO, GCproto ++|.type UPVAL, GCupval ++|.type NODE, Node ++|.type NARGS8, int ++|.type TRACE, GCtrace ++|.type SBUF, SBuf ++| ++|//----------------------------------------------------------------------- ++| ++|// Trap for not-yet-implemented parts. ++|.macro NYI; break 0; .endmacro ++| ++|//----------------------------------------------------------------------- ++| ++|// Access to frame relative to BASE. ++|.define FRAME_PC, -8 ++|.define FRAME_FUNC, -16 ++| ++|//----------------------------------------------------------------------- ++| ++|// Endian-specific defines. LoongArch is little endian. ++|.define OFS_RD, 2 ++|.define OFS_RA, 1 ++|.define OFS_OP, 0 ++| ++|// Instruction decode. ++|.macro decode_BC4b, dst; slli.w dst, dst, 2; .endmacro ++|.macro decode_BC8b, dst; slli.w dst, dst, 3; .endmacro ++|.macro decode_OP, dst, ins; andi dst, ins, 0xff; .endmacro ++|.macro decode_RA, dst, ins; bstrpick.d dst, ins, 15, 8; decode_BC8b dst; .endmacro ++|.macro decode_RB, dst, ins; bstrpick.d dst, ins, 31, 24; decode_BC8b dst; .endmacro ++|.macro decode_RC, dst, ins; bstrpick.d dst, ins, 23, 16; decode_BC8b dst; .endmacro ++|.macro decode_RD, dst, ins; bstrpick.d dst, ins, 31, 16; decode_BC8b dst; .endmacro ++|.macro decode_RDtoRC8, dst, src; andi dst, src, 0x7f8; .endmacro ++| ++|// Instruction fetch. ++|.macro ins_NEXT1 ++| ld.w INS, 0(PC) ++| addi.d PC, PC, 4 ++|.endmacro ++|// Instruction decode+dispatch. ++|.macro ins_NEXT2 ++| decode_OP TMP1, INS ++| decode_BC8b TMP1 ++| add.d TMP0, DISPATCH, TMP1 ++| ld.d TMP4, 0(TMP0) ++| decode_RD RD, INS ++| decode_RA RA, INS ++| jirl r0, TMP4, 0 ++|.endmacro ++|.macro ins_NEXT ++| ins_NEXT1 ++| ins_NEXT2 ++|.endmacro ++| ++|// Instruction footer. ++|.if 1 ++| // Replicated dispatch. Less unpredictable branches, but higher I-Cache use. ++| .define ins_next, ins_NEXT ++| .define ins_next_, ins_NEXT ++| .define ins_next1, ins_NEXT1 ++| .define ins_next2, ins_NEXT2 ++|.else ++| // Common dispatch. Lower I-Cache use, only one (very) unpredictable branch. ++| // Affects only certain kinds of benchmarks (and only with -j off). ++| .macro ins_next ++| b ->ins_next ++| .endmacro ++| .macro ins_next1 ++| .endmacro ++| .macro ins_next2 ++| b ->ins_next ++| .endmacro ++| .macro ins_next_ ++| ->ins_next: ++| ins_NEXT ++| .endmacro ++|.endif ++| ++|// Call decode and dispatch. ++|.macro ins_callt ++| // BASE = new base, RB = LFUNC/CFUNC, RC = nargs*8, FRAME_PC(BASE) = PC ++| ld.d PC, LFUNC:RB->pc ++| ld.w INS, 0(PC) ++| addi.d PC, PC, 4 ++| decode_OP TMP1, INS ++| decode_RA RA, INS ++| decode_BC8b TMP1 ++| add.d TMP0, DISPATCH, TMP1 ++| ld.d TMP0, 0(TMP0) ++| add.d RA, RA, BASE ++| jirl r0, TMP0, 0 ++|.endmacro ++| ++|.macro ins_call ++| // BASE = new base, RB = LFUNC/CFUNC, RC = nargs*8, PC = caller PC ++| st.d PC, FRAME_PC(BASE) ++| ins_callt ++|.endmacro ++| ++|//----------------------------------------------------------------------- ++| ++|.macro branch_RD ++| srli.w TMP0, RD, 1 ++| addu16i.d TMP4, r0, -0x2 // -BCBIAS_J*4 ++| add.w TMP0, TMP0, TMP4 // (jump - 0x8000)<<2 ++| add.d PC, PC, TMP0 ++|.endmacro ++| ++|// Assumes DISPATCH is relative to GL. ++#define DISPATCH_GL(field) (GG_DISP2G + (int)offsetof(global_State, field)) ++#define DISPATCH_J(field) (GG_DISP2J + (int)offsetof(jit_State, field)) ++| ++#define PC2PROTO(field) ((int)offsetof(GCproto, field)-(int)sizeof(GCproto)) ++| ++|.macro hotcheck, delta, target ++| srli.d TMP1, PC, 1 ++| andi TMP1, TMP1, 126 ++| add.d TMP1, TMP1, DISPATCH ++| ld.hu TMP2, GG_DISP2HOT(TMP1) ++| addi.w TMP2, TMP2, -delta ++| st.h TMP2, GG_DISP2HOT(TMP1) ++| blt TMP2, r0, target ++|.endmacro ++| ++|.macro hotloop ++| hotcheck HOTCOUNT_LOOP, ->vm_hotloop ++|.endmacro ++| ++|.macro hotcall ++| hotcheck HOTCOUNT_CALL, ->vm_hotcall ++|.endmacro ++| ++|// Set current VM state. Uses TMP0. ++|.macro li_vmstate, st; addi.w TMP0, r0, ~LJ_VMST_..st; .endmacro ++|.macro st_vmstate; .STXW TMP0, DISPATCH, DISPATCH_GL(vmstate); .endmacro ++| ++|// Move table write barrier back. Overwrites mark and tmp. ++|.macro barrierback, tab, mark, tmp, target ++| .LDXD tmp, DISPATCH, DISPATCH_GL(gc.grayagain) ++| andi mark, mark, ~LJ_GC_BLACK & 255 // black2gray(tab) ++| .STXD tab, DISPATCH, DISPATCH_GL(gc.grayagain) ++| st.b mark, tab->marked ++| st.d tmp, tab->gclist ++| b target ++|.endmacro ++| ++|// Clear type tag. Isolate lowest 47 bits of reg. ++|.macro cleartp, reg; bstrpick.d reg, reg, 46, 0; .endmacro ++|.macro cleartp, dst, reg; bstrpick.d dst, reg, 46, 0; .endmacro ++| ++|// Set type tag: Merge 17 type bits into bits [47, 63] of dst. ++|.macro settp, dst, tp; bstrins.d dst, tp, 63, 47; .endmacro ++| ++|// Extract (negative) type tag. ++|.macro gettp, dst, src; srai.d dst, src, 47; .endmacro ++| ++|// Macros to check the TValue type and extract the GCobj. Branch on failure. ++|.macro checktp, reg, tp, target ++| gettp TMP4, reg ++| addi.d TMP4, TMP4, tp ++| cleartp reg ++| bnez TMP4, target ++|.endmacro ++|.macro checktp, dst, reg, tp, target ++| gettp TMP4, reg ++| addi.d TMP4, TMP4, tp ++| cleartp dst, reg ++| bnez TMP4, target ++|.endmacro ++|.macro checkstr, reg, target; checktp reg, -LJ_TSTR, target; .endmacro ++|.macro checktab, reg, target; checktp reg, -LJ_TTAB, target; .endmacro ++|.macro checkfunc, reg, target; checktp reg, -LJ_TFUNC, target; .endmacro ++|.macro checkint, reg, target ++| gettp TMP4, reg ++| bne TMP4, TISNUM, target ++|.endmacro ++|.macro checknum, reg, target ++| gettp TMP4, reg ++| sltui TMP4, TMP4, LJ_TISNUM ++| beqz TMP4, target ++|.endmacro ++| ++|.macro mov_false, reg ++| addi.d reg, r0, 0x0001 ++| slli.d reg, reg, 47 ++| nor reg, reg, r0 ++|.endmacro ++|.macro mov_true, reg ++| addi.d reg, r0, 0x0001 ++| slli.d reg, reg, 48 ++| nor reg, reg, r0 ++|.endmacro ++| ++|//----------------------------------------------------------------------- ++ ++/* Generate subroutines used by opcodes and other parts of the VM. */ ++/* The .code_sub section should be last to help static branch prediction. */ ++static void build_subroutines(BuildCtx *ctx) ++{ ++ |.code_sub ++ | ++ |//----------------------------------------------------------------------- ++ |//-- Return handling ---------------------------------------------------- ++ |//----------------------------------------------------------------------- ++ | ++ |->vm_returnp: ++ | // See vm_return. Also: TMP2 = previous base. ++ | andi TMP0, PC, FRAME_P ++ | ++ | // Return from pcall or xpcall fast func. ++ | mov_true TMP1 ++ | beqz TMP0, ->cont_dispatch ++ | ld.d PC, FRAME_PC(TMP2) // Fetch PC of previous frame. ++ | or BASE, TMP2, r0 // Restore caller base. ++ | // Prepending may overwrite the pcall frame, so do it at the end. ++ | st.d TMP1, -8(RA) // Prepend true to results. ++ | addi.d RA, RA, -8 ++ | ++ |->vm_returnc: ++ | addi.w RD, RD, 8 // RD = (nresults+1)*8. ++ | andi TMP0, PC, FRAME_TYPE ++ | addi.w CRET1, r0, LUA_YIELD ++ | beqz RD, ->vm_unwind_c_eh ++ | or MULTRES, RD, r0 ++ | beqz TMP0, ->BC_RET_Z // Handle regular return to Lua. ++ | ++ |->vm_return: ++ | // BASE = base, RA = resultptr, RD/MULTRES = (nresults+1)*8, PC = return ++ | // TMP0 = PC & FRAME_TYPE ++ | addi.w TMP2, r0, -8 // TMP2 = 0xfffffff8 ++ | xori TMP0, TMP0, FRAME_C ++ | and TMP2, PC, TMP2 ++ | sub.d TMP2, BASE, TMP2 // TMP2 = previous base. ++ | bnez TMP0, ->vm_returnp ++ | ++ | addi.w TMP1, RD, -8 ++ | st.d TMP2, L->base ++ | li_vmstate C ++ | ld.w TMP2, SAVE_NRES(sp) ++ | addi.d BASE, BASE, -16 ++ | st_vmstate ++ | slli.w TMP2, TMP2, 3 ++ | beqz TMP1, >2 ++ |1: ++ | addi.w TMP1, TMP1, -8 ++ | ld.d CRET1, 0(RA) ++ | addi.d RA, RA, 8 ++ | st.d CRET1, 0(BASE) ++ | addi.d BASE, BASE, 8 ++ | bnez TMP1, <1 ++ | ++ |2: ++ | bne TMP2, RD, >6 ++ |3: ++ | st.d BASE, L->top // Store new top. ++ | ++ |->vm_leave_cp: ++ | ld.d TMP0, SAVE_CFRAME(sp) // Restore previous C frame. ++ | or CRET1, r0, r0 // Ok return status for vm_pcall. ++ | st.d TMP0, L->cframe ++ | ++ |->vm_leave_unw: ++ | restoreregs_ret ++ | ++ |6: ++ | ld.d TMP1, L->maxstack ++ | slt TMP0, TMP2, RD ++ | // More results wanted. Check stack size and fill up results with nil. ++ | slt TMP1, BASE, TMP1 ++ | bnez TMP0, >7 ++ | beqz TMP1, >8 ++ | st.d TISNIL, 0(BASE) ++ | addi.w RD, RD, 8 ++ | addi.d BASE, BASE, 8 ++ | b <2 ++ | ++ |7: // Less results wanted. ++ | sub.w TMP0, RD, TMP2 ++ | sub.d TMP0, BASE, TMP0 // Either keep top or shrink it. ++ | maskeqz TMP0, TMP0, TMP2 // LUA_MULTRET+1 case? ++ | masknez BASE, BASE, TMP2 ++ | or BASE, BASE, TMP0 ++ | b <3 ++ | ++ |8: // Corner case: need to grow stack for filling up results. ++ | // This can happen if: ++ | // - A C function grows the stack (a lot). ++ | // - The GC shrinks the stack in between. ++ | // - A return back from a lua_call() with (high) nresults adjustment. ++ | ++ | st.d BASE, L->top // Save current top held in BASE (yes). ++ | or MULTRES, RD, r0 ++ | srli.w CARG2, TMP2, 3 ++ | or CARG1, L, r0 ++ | bl extern lj_state_growstack // (lua_State *L, int n) ++ | ld.w TMP2, SAVE_NRES(sp) ++ | ld.d BASE, L->top // Need the (realloced) L->top in BASE. ++ | or RD, MULTRES, r0 ++ | slli.w TMP2, TMP2, 3 ++ | b <2 ++ | ++ |->vm_unwind_c: // Unwind C stack, return from vm_pcall. ++ | // (void *cframe, int errcode) ++ | or sp, CARG1, r0 ++ | or CRET1, CARG2, r0 ++ |->vm_unwind_c_eh: // Landing pad for external unwinder. ++ | ld.d L, SAVE_L(sp) ++ | addi.w TMP0, r0, ~LJ_VMST_C ++ | ld.d GL:TMP1, L->glref ++ | st.w TMP0, GL:TMP1->vmstate ++ | b ->vm_leave_unw ++ | ++ |->vm_unwind_ff: // Unwind C stack, return from ff pcall. ++ | // (void *cframe) ++ | addi.d TMP3, r0, CFRAME_RAWMASK ++ | and sp, CARG1, TMP3 ++ |->vm_unwind_ff_eh: // Landing pad for external unwinder. ++ | ld.d L, SAVE_L(sp) ++ | addu16i.d TMP3, r0, 0x59c0 // TOBIT = 2^52 + 2^51 (float). ++ | addi.d TISNIL, r0, LJ_TNIL ++ | addi.d TISNUM, r0, LJ_TISNUM ++ | ld.d BASE, L->base ++ | ld.d DISPATCH, L->glref // Setup pointer to dispatch table. ++ | movgr2fr.w TOBIT, TMP3 ++ | mov_false TMP1 ++ | li_vmstate INTERP ++ | ld.d PC, FRAME_PC(BASE) // Fetch PC of previous frame. ++ | fcvt.d.s TOBIT, TOBIT ++ | addi.d RA, BASE, -8 // Results start at BASE-8. ++ | .ADD16I DISPATCH, DISPATCH, GG_G2DISP ++ | st.d TMP1, 0(RA) // Prepend false to error message. ++ | st_vmstate ++ | addi.d RD, r0, 16 // 2 results: false + error message. ++ | b ->vm_returnc ++ | ++ | ++ |//----------------------------------------------------------------------- ++ |//-- Grow stack for calls ----------------------------------------------- ++ |//----------------------------------------------------------------------- ++ | ++ |->vm_growstack_c: // Grow stack for C function. ++ | addi.d CARG2, r0, LUA_MINSTACK ++ | b >2 ++ | ++ |->vm_growstack_l: // Grow stack for Lua function. ++ | // BASE = new base, RA = BASE+framesize*8, RC = nargs*8, PC = first PC ++ | add.d RC, BASE, RC ++ | sub.d RA, RA, BASE ++ | st.d BASE, L->base ++ | addi.d PC, PC, 4 // Must point after first instruction. ++ | st.d RC, L->top ++ | srli.w CARG2, RA, 3 ++ |2: ++ | // L->base = new base, L->top = top ++ | st.d PC, SAVE_PC(sp) ++ | or CARG1, L, r0 ++ | bl extern lj_state_growstack // (lua_State *L, int n) ++ | ld.d BASE, L->base ++ | ld.d RC, L->top ++ | ld.d LFUNC:RB, FRAME_FUNC(BASE) ++ | sub.d RC, RC, BASE ++ | cleartp LFUNC:RB ++ | // BASE = new base, RB = LFUNC/CFUNC, RC = nargs*8, FRAME_PC(BASE) = PC ++ | ins_callt // Just retry the call. ++ | ++ |//----------------------------------------------------------------------- ++ |//-- Entry points into the assembler VM --------------------------------- ++ |//----------------------------------------------------------------------- ++ | ++ |->vm_resume: // Setup C frame and resume thread. ++ | // (lua_State *L, TValue *base, int nres1 = 0, ptrdiff_t ef = 0) ++ | saveregs ++ | or L, CARG1, r0 ++ | ld.d DISPATCH, L->glref // Setup pointer to dispatch table. ++ | or BASE, CARG2, r0 ++ | ld.bu TMP1, L->status ++ | st.d L, SAVE_L(sp) ++ | addi.d PC, r0, FRAME_CP ++ | addi.d TMP0, sp, CFRAME_RESUME ++ | .ADD16I DISPATCH, DISPATCH, GG_G2DISP ++ | st.w r0, SAVE_NRES(sp) ++ | st.w r0, SAVE_ERRF(sp) ++ | st.d CARG1, SAVE_PC(sp) // Any value outside of bytecode is ok. ++ | st.d r0, SAVE_CFRAME(sp) ++ | st.d TMP0, L->cframe ++ | beqz TMP1, >3 ++ | ++ | // Resume after yield (like a return). ++ | .STXD L, DISPATCH, DISPATCH_GL(cur_L) ++ | or RA, BASE, r0 ++ | ld.d BASE, L->base ++ | ld.d TMP1, L->top ++ | ld.d PC, FRAME_PC(BASE) ++ | addu16i.d TMP3, r0, 0x59c0 // TOBIT = 2^52 + 2^51 (float). ++ | sub.d RD, TMP1, BASE ++ | movgr2fr.w TOBIT, TMP3 ++ | st.b r0, L->status ++ | fcvt.d.s TOBIT, TOBIT ++ | li_vmstate INTERP ++ | addi.d RD, RD, 8 ++ | st_vmstate ++ | or MULTRES, RD, r0 ++ | andi TMP0, PC, FRAME_TYPE ++ | addi.d TISNIL, r0, LJ_TNIL ++ | addi.d TISNUM, r0, LJ_TISNUM ++ | beqz TMP0, ->BC_RET_Z ++ | b ->vm_return ++ | ++ |->vm_pcall: // Setup protected C frame and enter VM. ++ | // (lua_State *L, TValue *base, int nres1, ptrdiff_t ef) ++ | saveregs ++ | st.w CARG4, SAVE_ERRF(sp) ++ | addi.d PC, r0, FRAME_CP ++ | b >1 ++ | ++ |->vm_call: // Setup C frame and enter VM. ++ | // (lua_State *L, TValue *base, int nres1) ++ | saveregs ++ | addi.d PC, r0, FRAME_C ++ | ++ |1: // Entry point for vm_pcall above (PC = ftype). ++ | ld.d TMP1, L:CARG1->cframe ++ | or L, CARG1, r0 ++ | st.w CARG3, SAVE_NRES(sp) ++ | ld.d DISPATCH, L->glref // Setup pointer to dispatch table. ++ | st.d CARG1, SAVE_L(sp) ++ | or BASE, CARG2, r0 ++ | .ADD16I DISPATCH, DISPATCH, GG_G2DISP ++ | st.d CARG1, SAVE_PC(sp) // Any value outside of bytecode is ok. ++ | st.d TMP1, SAVE_CFRAME(sp) ++ | st.d sp, L->cframe // Add our C frame to cframe chain. ++ | ++ |3: // Entry point for vm_cpcall/vm_resume (BASE = base, PC = ftype). ++ | .STXD L, DISPATCH, DISPATCH_GL(cur_L) ++ | ld.d TMP2, L->base // TMP2 = old base (used in vmeta_call). ++ | addu16i.d TMP3, r0, 0x59c0 // TOBIT = 2^52 + 2^51 (float). ++ | ld.d TMP1, L->top ++ | movgr2fr.w TOBIT, TMP3 ++ | add.d PC, PC, BASE ++ | sub.d NARGS8:RC, TMP1, BASE ++ | addi.d TISNUM, r0, LJ_TISNUM ++ | sub.d PC, PC, TMP2 // PC = frame delta + frame type ++ | fcvt.d.s TOBIT, TOBIT ++ | li_vmstate INTERP ++ | addi.d TISNIL, r0, LJ_TNIL ++ | st_vmstate ++ | ++ |->vm_call_dispatch: ++ | // TMP2 = old base, BASE = new base, RC = nargs*8, PC = caller PC ++ | ld.d LFUNC:RB, FRAME_FUNC(BASE) ++ | checkfunc LFUNC:RB, ->vmeta_call ++ | ++ |->vm_call_dispatch_f: ++ | ins_call ++ | // BASE = new base, RB = func, RC = nargs*8, PC = caller PC ++ | ++ |->vm_cpcall: // Setup protected C frame, call C. ++ | // (lua_State *L, lua_CFunction func, void *ud, lua_CPFunction cp) ++ | saveregs ++ | or L, CARG1, r0 ++ | ld.d TMP0, L:CARG1->stack ++ | st.d CARG1, SAVE_L(sp) ++ | ld.d TMP1, L->top ++ | ld.d DISPATCH, L->glref // Setup pointer to dispatch table. ++ | st.d CARG1, SAVE_PC(sp) // Any value outside of bytecode is ok. ++ | sub.d TMP0, TMP0, TMP1 // Compute -savestack(L, L->top). ++ | ld.d TMP1, L->cframe ++ | .ADD16I DISPATCH, DISPATCH, GG_G2DISP ++ | st.w TMP0, SAVE_NRES(sp) // Neg. delta means cframe w/o frame. ++ | st.w r0, SAVE_ERRF(sp) // No error function. ++ | st.d TMP1, SAVE_CFRAME(sp) ++ | st.d sp, L->cframe // Add our C frame to cframe chain. ++ | .STXD L, DISPATCH, DISPATCH_GL(cur_L) ++ | jirl r1, CARG4, 0 // (lua_State *L, lua_CFunction func, void *ud) ++ | or BASE, CRET1, r0 ++ | addi.d PC, r0, FRAME_CP ++ | bnez CRET1, <3 // Else continue with the call. ++ | b ->vm_leave_cp // No base? Just remove C frame. ++ | ++ |//----------------------------------------------------------------------- ++ |//-- Metamethod handling ------------------------------------------------ ++ |//----------------------------------------------------------------------- ++ | ++ |//-- Continuation dispatch ---------------------------------------------- ++ | ++ |->cont_dispatch: ++ | // BASE = meta base, RA = resultptr, RD = (nresults+1)*8 ++ | ld.d TMP0, -32(BASE) // Continuation. ++ | or RB, BASE, r0 ++ | or BASE, TMP2, r0 // Restore caller BASE. ++ | ld.d LFUNC:TMP1, FRAME_FUNC(TMP2) ++ |.if FFI ++ | sltui TMP3, TMP0, 2 ++ |.endif ++ | ld.d PC, -24(RB) // Restore PC from [cont|PC]. ++ | cleartp LFUNC:TMP1 ++ | add.d TMP2, RA, RD ++ | ld.d TMP1, LFUNC:TMP1->pc ++ | st.d TISNIL, -8(TMP2) // Ensure one valid arg. ++ |.if FFI ++ | bnez TMP3, >1 ++ |.endif ++ | // BASE = base, RA = resultptr, RB = meta base ++ | ld.d KBASE, PC2PROTO(k)(TMP1) ++ | jirl r0, TMP0, 0 // Jump to continuation. ++ | ++ |.if FFI ++ |1: ++ | addi.d TMP1, RB, -32 ++ | bnez TMP0, ->cont_ffi_callback // cont = 1: return from FFI callback. ++ | // cont = 0: tailcall from C function. ++ | sub.d RC, TMP1, BASE ++ | b ->vm_call_tail ++ |.endif ++ | ++ |->cont_cat: // RA = resultptr, RB = meta base ++ | ld.w INS, -4(PC) ++ | addi.d CARG2, RB, -32 ++ | ld.d TMP0, 0(RA) ++ | decode_RB MULTRES, INS ++ | decode_RA RA, INS ++ | add.d TMP1, BASE, MULTRES ++ | st.d BASE, L->base ++ | sub.d CARG3, CARG2, TMP1 ++ | st.d TMP0, 0(CARG2) ++ | bne TMP1, CARG2, ->BC_CAT_Z ++ | add.d RA, BASE, RA ++ | st.d TMP0, 0(RA) ++ | b ->cont_nop ++ | ++ |//-- Table indexing metamethods ----------------------------------------- ++ | ++ |->vmeta_tgets1: ++ | .ADD16I CARG3, DISPATCH, DISPATCH_GL(tmptv) ++ | addi.d TMP0, r0, LJ_TSTR ++ | settp STR:RC, TMP0 ++ | st.d STR:RC, 0(CARG3) ++ | b >1 ++ | ++ |->vmeta_tgets: ++ | .ADD16I CARG2, DISPATCH, DISPATCH_GL(tmptv) ++ | addi.d TMP0, r0, LJ_TTAB ++ | addi.d TMP1, r0, LJ_TSTR ++ | settp TAB:RB, TMP0 ++ | .ADD16I CARG3, DISPATCH, DISPATCH_GL(tmptv2) ++ | st.d TAB:RB, 0(CARG2) ++ | settp STR:RC, TMP1 ++ | st.d STR:RC, 0(CARG3) ++ | b >1 ++ | ++ |->vmeta_tgetb: // TMP0 = index ++ | .ADD16I CARG3, DISPATCH, DISPATCH_GL(tmptv) ++ | settp TMP0, TISNUM ++ | st.d TMP0, 0(CARG3) ++ | ++ |->vmeta_tgetv: ++ |1: ++ | st.d BASE, L->base ++ | or CARG1, L, r0 ++ | st.d PC, SAVE_PC(sp) ++ | bl extern lj_meta_tget // (lua_State *L, TValue *o, TValue *k) ++ | // Returns TValue * (finished) or NULL (metamethod). ++ | beqz CRET1, >3 ++ | ld.d TMP0, 0(CRET1) ++ | st.d TMP0, 0(RA) ++ | ins_next ++ | ++ |3: // Call __index metamethod. ++ | // BASE = base, L->top = new base, stack = cont/func/t/k ++ | addi.d TMP1, BASE, -FRAME_CONT ++ | addi.d NARGS8:RC, r0, 16 // 2 args for func(t, k). ++ | ld.d BASE, L->top ++ | st.d PC, -24(BASE) // [cont|PC] ++ | sub.d PC, BASE, TMP1 ++ | ld.d LFUNC:RB, FRAME_FUNC(BASE) // Guaranteed to be a function here. ++ | cleartp LFUNC:RB ++ | b ->vm_call_dispatch_f ++ | ++ |->vmeta_tgetr: ++ | bl extern lj_tab_getinth // (GCtab *t, int32_t key) ++ | // Returns cTValue * or NULL. ++ | or TMP1, TISNIL, r0 ++ | beqz CRET1, ->BC_TGETR_Z ++ | ld.d TMP1, 0(CRET1) ++ | b ->BC_TGETR_Z ++ | ++ |//----------------------------------------------------------------------- ++ | ++ |->vmeta_tsets1: ++ | .ADD16I CARG3, DISPATCH, DISPATCH_GL(tmptv) ++ | addi.d TMP0, r0, LJ_TSTR ++ | settp STR:RC, TMP0 ++ | st.d STR:RC, 0(CARG3) ++ | b >1 ++ | ++ |->vmeta_tsets: ++ | .ADD16I CARG2, DISPATCH, DISPATCH_GL(tmptv) ++ | addi.d TMP0, r0, LJ_TTAB ++ | addi.d TMP1, r0, LJ_TSTR ++ | settp TAB:RB, TMP0 ++ | .ADD16I CARG3, DISPATCH, DISPATCH_GL(tmptv2) ++ | st.d TAB:RB, 0(CARG2) ++ | settp STR:RC, TMP1 ++ | st.d STR:RC, 0(CARG3) ++ | b >1 ++ | ++ |->vmeta_tsetb: // TMP0 = index ++ | .ADD16I CARG3, DISPATCH, DISPATCH_GL(tmptv) ++ | settp TMP0, TISNUM ++ | st.d TMP0, 0(CARG3) ++ | ++ |->vmeta_tsetv: ++ |1: ++ | st.d BASE, L->base ++ | or CARG1, L, r0 ++ | st.d PC, SAVE_PC(sp) ++ | bl extern lj_meta_tset // (lua_State *L, TValue *o, TValue *k) ++ | // Returns TValue * (finished) or NULL (metamethod). ++ | ld.d TMP2, 0(RA) ++ | beqz CRET1, >3 ++ | // NOBARRIER: lj_meta_tset ensures the table is not black. ++ | st.d TMP2, 0(CRET1) ++ | ins_next ++ | ++ |3: // Call __newindex metamethod. ++ | // BASE = base, L->top = new base, stack = cont/func/t/k/(v) ++ | addi.d TMP1, BASE, -FRAME_CONT ++ | ld.d BASE, L->top ++ | st.d PC, -24(BASE) // [cont|PC] ++ | sub.d PC, BASE, TMP1 ++ | ld.d LFUNC:RB, FRAME_FUNC(BASE) // Guaranteed to be a function here. ++ | addi.d NARGS8:RC, r0, 24 // 3 args for func(t, k, v) ++ | cleartp LFUNC:RB ++ | st.d TMP2, 16(BASE) // Copy value to third argument. ++ | b ->vm_call_dispatch_f ++ | ++ |->vmeta_tsetr: ++ | st.d BASE, L->base ++ | or CARG1, L, r0 ++ | st.d PC, SAVE_PC(sp) ++ | bl extern lj_tab_setinth // (lua_State *L, GCtab *t, int32_t key) ++ | // Returns TValue *. ++ | b ->BC_TSETR_Z ++ | ++ |//-- Comparison metamethods --------------------------------------------- ++ | ++ |->vmeta_comp: ++ | // RA/RD point to o1/o2. ++ | or CARG2, RA, r0 ++ | or CARG3, RD, r0 ++ | addi.d PC, PC, -4 ++ | st.d BASE, L->base ++ | or CARG1, L, r0 ++ | decode_OP CARG4, INS ++ | st.d PC, SAVE_PC(sp) ++ | bl extern lj_meta_comp // (lua_State *L, TValue *o1, *o2, int op) ++ | // Returns 0/1 or TValue * (metamethod). ++ |3: ++ | sltui TMP1, CRET1, 2 ++ | beqz TMP1, ->vmeta_binop ++ | sub.w TMP2, r0, CRET1 ++ |4: ++ | ld.hu RD, OFS_RD(PC) ++ | addi.d PC, PC, 4 ++ | addu16i.d TMP1, r0, -0x2 // -BCBIAS_J*4 ++ | slli.w RD, RD, 2 ++ | add.w RD, RD, TMP1 ++ | and RD, RD, TMP2 ++ | add.d PC, PC, RD ++ |->cont_nop: ++ | ins_next ++ | ++ |->cont_ra: // RA = resultptr ++ | ld.bu TMP1, -4+OFS_RA(PC) ++ | ld.d TMP2, 0(RA) ++ | slli.w TMP1, TMP1, 3 ++ | add.d TMP1, BASE, TMP1 ++ | st.d TMP2, 0(TMP1) ++ | b ->cont_nop ++ | ++ |->cont_condt: // RA = resultptr ++ | ld.d TMP0, 0(RA) ++ | gettp TMP0, TMP0 ++ | sltui TMP1, TMP0, LJ_TISTRUECOND ++ | sub.w TMP2, r0, TMP1 // Branch if result is true. ++ | b <4 ++ | ++ |->cont_condf: // RA = resultptr ++ | ld.d TMP0, 0(RA) ++ | gettp TMP0, TMP0 ++ | sltui TMP1, TMP0, LJ_TISTRUECOND ++ | addi.w TMP2, TMP1, -1 // Branch if result is false. ++ | b <4 ++ | ++ |->vmeta_equal: ++ | // CARG1/CARG2 point to o1/o2. TMP0 is set to 0/1. ++ | cleartp LFUNC:CARG3, CARG2 ++ | cleartp LFUNC:CARG2, CARG1 ++ | or CARG4, TMP0, r0 ++ | addi.d PC, PC, -4 ++ | st.d BASE, L->base ++ | or CARG1, L, r0 ++ | st.d PC, SAVE_PC(sp) ++ | bl extern lj_meta_equal // (lua_State *L, GCobj *o1, *o2, int ne) ++ | // Returns 0/1 or TValue * (metamethod). ++ | b <3 ++ | ++ |->vmeta_equal_cd: ++ |.if FFI ++ | or CARG2, INS, r0 ++ | addi.d PC, PC, -4 ++ | st.d BASE, L->base ++ | or CARG1, L, r0 ++ | st.d PC, SAVE_PC(sp) ++ | bl extern lj_meta_equal_cd // (lua_State *L, BCIns op) ++ | // Returns 0/1 or TValue * (metamethod). ++ | b <3 ++ |.endif ++ | ++ |->vmeta_istype: ++ | addi.d PC, PC, -4 ++ | st.d BASE, L->base ++ | or CARG1, L, r0 ++ | srli.w CARG2, RA, 3 ++ | srli.w CARG3, RD, 3 ++ | st.d PC, SAVE_PC(sp) ++ | bl extern lj_meta_istype // (lua_State *L, BCReg ra, BCReg tp) ++ | b ->cont_nop ++ | ++ |//-- Arithmetic metamethods --------------------------------------------- ++ | ++ |->vmeta_unm: ++ | or RC, RB, r0 ++ | ++ |->vmeta_arith: ++ | st.d BASE, L->base ++ | or CARG2, RA, r0 ++ | st.d PC, SAVE_PC(sp) ++ | or CARG3, RB, r0 ++ | or CARG4, RC, r0 ++ | decode_OP CARG5, INS ++ | or CARG1, L, r0 ++ | bl extern lj_meta_arith // (lua_State *L, TValue *ra,*rb,*rc, BCReg op) ++ | // Returns NULL (finished) or TValue * (metamethod). ++ | beqz CRET1, ->cont_nop ++ | ++ | // Call metamethod for binary op. ++ |->vmeta_binop: ++ | // BASE = old base, CRET1 = new base, stack = cont/func/o1/o2 ++ | sub.d TMP1, CRET1, BASE ++ | st.d PC, -24(CRET1) // [cont|PC] ++ | or TMP2, BASE, r0 ++ | addi.d PC, TMP1, FRAME_CONT ++ | or BASE, CRET1, r0 ++ | addi.d NARGS8:RC, r0, 16 // 2 args for func(o1, o2). ++ | b ->vm_call_dispatch ++ | ++ |->vmeta_len: ++ | // CARG2 already set by BC_LEN. ++#if LJ_52 ++ | or MULTRES, CARG1, r0 ++#endif ++ | st.d BASE, L->base ++ | or CARG1, L, r0 ++ | st.d PC, SAVE_PC(sp) ++ | bl extern lj_meta_len // (lua_State *L, TValue *o) ++ | // Returns NULL (retry) or TValue * (metamethod base). ++#if LJ_52 ++ | bnez CRET1, ->vmeta_binop // Binop call for compatibility. ++ | or CARG1, MULTRES, r0 ++ | b ->BC_LEN_Z ++#else ++ | b ->vmeta_binop // Binop call for compatibility. ++#endif ++ | ++ |//-- Call metamethod ---------------------------------------------------- ++ | ++ |->vmeta_call: // Resolve and call __call metamethod. ++ | // TMP2 = old base, BASE = new base, RC = nargs*8 ++ | st.d TMP2, L->base // This is the callers base! ++ | addi.d CARG2, BASE, -16 ++ | st.d PC, SAVE_PC(sp) ++ | add.d CARG3, BASE, RC ++ | or CARG1, L, r0 ++ | or MULTRES, NARGS8:RC, r0 ++ | bl extern lj_meta_call // (lua_State *L, TValue *func, TValue *top) ++ | ld.d LFUNC:RB, FRAME_FUNC(BASE) // Guaranteed to be a function here. ++ | addi.d NARGS8:RC, MULTRES, 8 // Got one more argument now. ++ | cleartp LFUNC:RB ++ | ins_call ++ | ++ |->vmeta_callt: // Resolve __call for BC_CALLT. ++ | // BASE = old base, RA = new base, RC = nargs*8 ++ | st.d BASE, L->base ++ | addi.d CARG2, RA, -16 ++ | st.d PC, SAVE_PC(sp) ++ | add.d CARG3, RA, RC ++ | or CARG1, L, r0 ++ | or MULTRES, NARGS8:RC, r0 ++ | bl extern lj_meta_call // (lua_State *L, TValue *func, TValue *top) ++ | ld.d RB, FRAME_FUNC(RA) // Guaranteed to be a function here. ++ | ld.d TMP1, FRAME_PC(BASE) ++ | addi.d NARGS8:RC, MULTRES, 8 // Got one more argument now. ++ | cleartp LFUNC:CARG3, RB ++ | b ->BC_CALLT_Z ++ | ++ |//-- Argument coercion for 'for' statement ------------------------------ ++ | ++ |->vmeta_for: ++ | st.d BASE, L->base ++ | or CARG2, RA, r0 ++ | st.d PC, SAVE_PC(sp) ++ | or MULTRES, INS, r0 ++ | or CARG1, L, r0 ++ | bl extern lj_meta_for // (lua_State *L, TValue *base) ++ |.if JIT ++ | decode_OP TMP0, MULTRES ++ | addi.d TMP1, r0, BC_JFORI ++ |.endif ++ | decode_RA RA, MULTRES ++ | decode_RD RD, MULTRES ++ |.if JIT ++ | beq TMP0, TMP1, =>BC_JFORI ++ |.endif ++ | b =>BC_FORI ++ | ++ |//----------------------------------------------------------------------- ++ |//-- Fast functions ----------------------------------------------------- ++ |//----------------------------------------------------------------------- ++ | ++ |.macro .ffunc, name ++ |->ff_ .. name: ++ |.endmacro ++ | ++ |.macro .ffunc_1, name ++ |->ff_ .. name: ++ | ld.d CARG1, 0(BASE) ++ | beqz NARGS8:RC, ->fff_fallback ++ |.endmacro ++ | ++ |.macro .ffunc_2, name ++ |->ff_ .. name: ++ | sltui TMP0, NARGS8:RC, 16 ++ | ld.d CARG1, 0(BASE) ++ | ld.d CARG2, 8(BASE) ++ | bnez TMP0, ->fff_fallback ++ |.endmacro ++ | ++ |.macro .ffunc_n, name ++ |->ff_ .. name: ++ | ld.d CARG1, 0(BASE) ++ | fld.d FARG1, 0(BASE) ++ | beqz NARGS8:RC, ->fff_fallback ++ | checknum CARG1, ->fff_fallback ++ |.endmacro ++ | ++ |.macro .ffunc_nn, name ++ |->ff_ .. name: ++ | ld.d CARG1, 0(BASE) ++ | ld.d CARG2, 8(BASE) ++ | sltui TMP0, NARGS8:RC, 16 ++ | gettp TMP1, CARG1 ++ | bnez TMP0, ->fff_fallback ++ | gettp TMP2, CARG2 ++ | sltui TMP1, TMP1, LJ_TISNUM ++ | sltui TMP2, TMP2, LJ_TISNUM ++ | fld.d FARG1, 0(BASE) ++ | and TMP1, TMP1, TMP2 ++ | fld.d FARG2, 8(BASE) ++ | beqz TMP1, ->fff_fallback ++ |.endmacro ++ | ++ |// Inlined GC threshold check. ++ |.macro ffgccheck ++ | .LDXD TMP0, DISPATCH, DISPATCH_GL(gc.total) ++ | .LDXD TMP1, DISPATCH, DISPATCH_GL(gc.threshold) ++ | bltu TMP0, TMP1, >1 ++ | bl ->fff_gcstep ++ |1: ++ |.endmacro ++ | ++ |//-- Base library: checks ----------------------------------------------- ++ |.ffunc_1 assert ++ | gettp TMP1, CARG1 ++ |// ld.d PC, FRAME_PC(BASE) ++ | sltui TMP1, TMP1, LJ_TISTRUECOND ++ | addi.d RA, BASE, -16 ++ | beqz TMP1, ->fff_fallback ++ | ld.d PC, FRAME_PC(BASE) ++ | addi.w RD, NARGS8:RC, 8 // Compute (nresults+1)*8. ++ | addi.d TMP1, BASE, 8 ++ | add.d TMP2, RA, RD ++ | st.d CARG1, 0(RA) ++ | beq BASE, TMP2, ->fff_res // Done if exactly 1 argument. ++ |1: ++ | ld.d TMP0, 0(TMP1) ++ | st.d TMP0, -16(TMP1) ++ | or TMP3, TMP1, r0 ++ | addi.d TMP1, TMP1, 8 ++ | bne TMP3, TMP2, <1 ++ | b ->fff_res ++ | ++ |.ffunc_1 type ++ | gettp TMP0, CARG1 ++ | addi.w TMP1, r0, ~LJ_TISNUM ++ | sltu TMP2, TISNUM, TMP0 ++ | nor TMP3, TMP0, r0 ++ | masknez TMP1, TMP1, TMP2 ++ | maskeqz TMP3, TMP3, TMP2 ++ | or TMP3, TMP3, TMP1 ++ | slli.d TMP3, TMP3, 3 ++ | add.d TMP3, CFUNC:RB, TMP3 ++ | ld.d CARG1, CFUNC:TMP3->upvalue ++ | b ->fff_restv ++ | ++ |//-- Base library: getters and setters --------------------------------- ++ | ++ |.ffunc_1 getmetatable ++ | gettp TMP2, CARG1 ++ | addi.d TMP0, TMP2, -LJ_TTAB ++ | addi.d TMP1, TMP2, -LJ_TUDATA ++ | maskeqz TMP0, TMP1, TMP0 ++ | cleartp TAB:CARG1 ++ | bnez TMP0, >6 ++ |1: // Field metatable must be at same offset for GCtab and GCudata! ++ | ld.d TAB:RB, TAB:CARG1->metatable ++ |2: ++ | .LDXD STR:RC, DISPATCH, DISPATCH_GL(gcroot[GCROOT_MMNAME+MM_metatable]) ++ | addi.d CARG1, r0, LJ_TNIL ++ | beqz TAB:RB, ->fff_restv ++ | ld.w TMP0, TAB:RB->hmask ++ | ld.w TMP1, STR:RC->sid ++ | ld.d NODE:TMP2, TAB:RB->node ++ | and TMP1, TMP1, TMP0 // idx = str->sid & tab->hmask ++ | slli.d TMP0, TMP1, 5 ++ | slli.d TMP1, TMP1, 3 ++ | sub.d TMP1, TMP0, TMP1 ++ | add.d NODE:TMP2, NODE:TMP2, TMP1 // node = tab->node + (idx*32-idx*8) ++ | addi.w CARG4, r0, LJ_TSTR ++ | settp STR:RC, CARG4 // Tagged key to look for. ++ |3: // Rearranged logic, because we expect _not_ to find the key. ++ | ld.d TMP0, NODE:TMP2->key ++ | ld.d CARG1, NODE:TMP2->val ++ | ld.d NODE:TMP2, NODE:TMP2->next ++ | addi.d TMP3, r0, LJ_TTAB ++ | beq RC, TMP0, >5 ++ | bnez NODE:TMP2, <3 ++ |4: ++ | or CARG1, RB, r0 ++ | settp CARG1, TMP3 ++ | b ->fff_restv // Not found, keep default result. ++ |5: ++ | bne CARG1, TISNIL, ->fff_restv ++ | b <4 // Ditto for nil value. ++ | ++ |6: ++ | sltui TMP3, TMP2, LJ_TISNUM ++ | maskeqz TMP0, TISNUM, TMP3 ++ | masknez TMP3, TMP2, TMP3 ++ | or TMP2, TMP0, TMP3 ++ | slli.d TMP2, TMP2, 3 ++ | sub.d TMP0, DISPATCH, TMP2 ++ | .LDXD TAB:RB, TMP0, DISPATCH_GL(gcroot[GCROOT_BASEMT])-8 ++ | b <2 ++ | ++ |.ffunc_2 setmetatable ++ | // Fast path: no mt for table yet and not clearing the mt. ++ | checktp TMP1, CARG1, -LJ_TTAB, ->fff_fallback ++ | gettp TMP3, CARG2 ++ | ld.d TAB:TMP0, TAB:TMP1->metatable ++ | ld.bu TMP2, TAB:TMP1->marked ++ | addi.d TMP3, TMP3, -LJ_TTAB ++ | cleartp TAB:CARG2 ++ | or TMP3, TMP3, TAB:TMP0 ++ | bnez TMP3, ->fff_fallback ++ | andi TMP3, TMP2, LJ_GC_BLACK // isblack(table) ++ | st.d TAB:CARG2, TAB:TMP1->metatable ++ | beqz TMP3, ->fff_restv ++ | barrierback TAB:TMP1, TMP2, TMP0, ->fff_restv ++ | ++ |.ffunc rawget ++ | ld.d CARG2, 0(BASE) ++ | sltui TMP0, NARGS8:RC, 16 ++ | gettp TMP1, CARG2 ++ | cleartp CARG2 ++ | addi.d TMP1, TMP1, -LJ_TTAB ++ | or TMP0, TMP0, TMP1 ++ | addi.d CARG3, BASE, 8 ++ | bnez TMP0, ->fff_fallback ++ | or CARG1, L, r0 ++ | bl extern lj_tab_get // (lua_State *L, GCtab *t, cTValue *key) ++ | // Returns cTValue *. ++ | ld.d CARG1, 0(CRET1) ++ | b ->fff_restv ++ | ++ |//-- Base library: conversions ------------------------------------------ ++ | ++ |.ffunc tonumber ++ | // Only handles the number case inline (without a base argument). ++ | ld.d CARG1, 0(BASE) ++ | xori TMP0, NARGS8:RC, 8 // Exactly one number argument. ++ | gettp TMP1, CARG1 ++ | sltu TMP1, TISNUM, TMP1 ++ | or TMP0, TMP0, TMP1 ++ | bnez TMP0, ->fff_fallback // No args or CARG1 is not number ++ | b ->fff_restv ++ | ++ |.ffunc_1 tostring ++ | // Only handles the string or number case inline. ++ | gettp TMP0, CARG1 ++ | addi.d TMP1, TMP0, -LJ_TSTR ++ | // A __tostring method in the string base metatable is ignored. ++ | beqz TMP1, ->fff_restv // String key? ++ | // Handle numbers inline, unless a number base metatable is present. ++ | .LDXD TMP1, DISPATCH, DISPATCH_GL(gcroot[GCROOT_BASEMT_NUM]) ++ | sltu TMP0, TISNUM, TMP0 ++ | st.d BASE, L->base // Add frame since C call can throw. ++ | or TMP0, TMP0, TMP1 ++ | bnez TMP0, ->fff_fallback ++ | st.d PC, SAVE_PC(sp) // Redundant (but a defined value). ++ | ffgccheck ++ | or CARG1, L, r0 ++ | or CARG2, BASE, r0 ++ | bl extern lj_strfmt_number // (lua_State *L, cTValue *o) ++ | // Returns GCstr *. ++ | addi.d TMP1, r0, LJ_TSTR ++ |// ld.d BASE, L->base ++ | settp CARG1, TMP1 ++ | b ->fff_restv ++ | ++ |//-- Base library: iterators ------------------------------------------- ++ | ++ |.ffunc_1 next ++ | checktp CARG1, -LJ_TTAB, ->fff_fallback ++ | add.d TMP0, BASE, NARGS8:RC ++ | ld.d PC, FRAME_PC(BASE) ++ | st.d TISNIL, 0(TMP0) // Set missing 2nd arg to nil. ++ | addi.d CARG2, BASE, 8 ++ | addi.d CARG3, BASE, -16 ++ | bl extern lj_tab_next // (GCtab *t, cTValue *key, TValue *o) ++ | // Returns 1=found, 0=end, -1=error. ++ |// addi.d RA, BASE, -16 ++ | addi.d RD, r0, (2+1)*8 ++ | blt r0, CRET1, ->fff_res // Found key/value. ++ | or TMP1, CRET1, r0 ++ | or CARG1, TISNIL, r0 ++ | beqz TMP1, ->fff_restv // End of traversal: return nil. ++ | ld.d CFUNC:RB, FRAME_FUNC(BASE) ++ | addi.w RC, r0, 2*8 ++ | cleartp CFUNC:RB ++ | b ->fff_fallback // Invalid key. ++ | ++ |.ffunc_1 pairs ++ | checktp TAB:TMP1, CARG1, -LJ_TTAB, ->fff_fallback ++ | ld.d PC, FRAME_PC(BASE) ++#if LJ_52 ++ | ld.d TAB:TMP2, TAB:TMP1->metatable ++ | ld.d TMP0, CFUNC:RB->upvalue[0] ++ | addi.d RA, BASE, -16 ++ | bnez TAB:TMP2, ->fff_fallback ++#else ++ | ld.d TMP0, CFUNC:RB->upvalue[0] ++ | addi.d RA, BASE, -16 ++#endif ++ | st.d TISNIL, 0(BASE) ++ | st.d CARG1, -8(BASE) ++ | st.d TMP0, 0(RA) ++ | addi.d RD, r0, (3+1)*8 ++ | b ->fff_res ++ | ++ |.ffunc_2 ipairs_aux ++ | checktab CARG1, ->fff_fallback ++ | checkint CARG2, ->fff_fallback ++ | ld.w TMP0, TAB:CARG1->asize ++ | ld.d TMP1, TAB:CARG1->array ++ | ld.d PC, FRAME_PC(BASE) ++ | slli.w TMP2, CARG2, 0 ++ | addi.w TMP2, TMP2, 1 ++ | sltu TMP3, TMP2, TMP0 ++ | addi.d RA, BASE, -16 ++ | bstrpick.d TMP0, TMP2, 31, 0 ++ | settp TMP0, TISNUM ++ | st.d TMP0, 0(RA) ++ | beqz TMP3, >2 // Not in array part? ++ | slli.d TMP3, TMP2, 3 ++ | add.d TMP3, TMP1, TMP3 ++ | ld.d TMP1, 0(TMP3) ++ |1: ++ | addi.d RD, r0, (0+1)*8 ++ | beq TMP1, TISNIL, ->fff_res // End of iteration, return 0 results. ++ | st.d TMP1, -8(BASE) ++ | addi.d RD, r0, (2+1)*8 ++ | b ->fff_res ++ |2: // Check for empty hash part first. Otherwise call C function. ++ | ld.w TMP0, TAB:CARG1->hmask ++ | addi.d RD, r0, (0+1)*8 ++ | beqz TMP0, ->fff_res ++ | or CARG2, TMP2, r0 ++ | bl extern lj_tab_getinth // (GCtab *t, int32_t key) ++ | // Returns cTValue * or NULL. ++ | addi.d RD, r0, (0+1)*8 ++ | beqz CRET1, ->fff_res ++ | ld.d TMP1, 0(CRET1) ++ | b <1 ++ | ++ |.ffunc_1 ipairs ++ | checktp TAB:TMP1, CARG1, -LJ_TTAB, ->fff_fallback ++ | ld.d PC, FRAME_PC(BASE) ++#if LJ_52 ++ | ld.d TAB:TMP2, TAB:TMP1->metatable ++#endif ++ | ld.d CFUNC:TMP0, CFUNC:RB->upvalue[0] ++ | addi.d RA, BASE, -16 ++#if LJ_52 ++ | bnez TAB:TMP2, ->fff_fallback ++#endif ++ | slli.d TMP1, TISNUM, 47 ++ | st.d CARG1, -8(BASE) ++ | st.d TMP1, 0(BASE) ++ | st.d CFUNC:TMP0, 0(RA) ++ | addi.d RD, r0, (3+1)*8 ++ | b ->fff_res ++ | ++ |//-- Base library: catch errors ---------------------------------------- ++ | ++ |.ffunc pcall ++ | addi.d NARGS8:RC, NARGS8:RC, -8 ++ | .LDXBU TMP3, DISPATCH, DISPATCH_GL(hookmask) ++ | or TMP2, BASE, r0 ++ | blt NARGS8:RC, r0, ->fff_fallback ++ | addi.d BASE, BASE, 16 ++ | // Remember active hook before pcall. ++ | srli.w TMP3, TMP3, HOOK_ACTIVE_SHIFT ++ | andi TMP3, TMP3, 1 ++ | addi.d PC, TMP3, 16+FRAME_PCALL ++ | beqz NARGS8:RC, ->vm_call_dispatch ++ |1: ++ | add.d TMP0, BASE, NARGS8:RC ++ |2: ++ | ld.d TMP1, -16(TMP0) ++ | st.d TMP1, -8(TMP0) ++ | addi.d TMP0, TMP0, -8 ++ | bne TMP0, BASE, <2 ++ | b ->vm_call_dispatch ++ | ++ |.ffunc xpcall ++ | addi.d NARGS8:TMP0, NARGS8:RC, -16 ++ | ld.d CARG1, 0(BASE) ++ | ld.d CARG2, 8(BASE) ++ | .LDXBU TMP1, DISPATCH, DISPATCH_GL(hookmask) ++ | blt NARGS8:TMP0, r0, ->fff_fallback ++ | gettp TMP2, CARG2 ++ | addi.d TMP2, TMP2, -LJ_TFUNC ++ | bnez TMP2, ->fff_fallback // Traceback must be a function. ++ | or TMP2, BASE, r0 ++ | or NARGS8:RC, NARGS8:TMP0, r0 ++ | addi.d BASE, BASE, 24 ++ | // Remember active hook before pcall. ++ | srli.w TMP3, TMP3, HOOK_ACTIVE_SHIFT ++ | st.d CARG2, 0(TMP2) // Swap function and traceback. ++ | andi TMP3, TMP3, 1 ++ | st.d CARG1, 8(TMP2) ++ | addi.d PC, TMP3, 24+FRAME_PCALL ++ | beqz NARGS8:RC, ->vm_call_dispatch ++ | b <1 ++ | ++ |//-- Coroutine library -------------------------------------------------- ++ | ++ |.macro coroutine_resume_wrap, resume ++ |.if resume ++ |.ffunc_1 coroutine_resume ++ | checktp CARG1, CARG1, -LJ_TTHREAD, ->fff_fallback ++ |.else ++ |.ffunc coroutine_wrap_aux ++ | ld.d L:CARG1, CFUNC:RB->upvalue[0].gcr ++ | cleartp L:CARG1 ++ |.endif ++ | ld.bu TMP0, L:CARG1->status ++ | ld.d TMP1, L:CARG1->cframe ++ | ld.d CARG2, L:CARG1->top ++ | ld.d TMP2, L:CARG1->base ++ | addi.w CARG4, TMP0, -LUA_YIELD ++ | add.d CARG3, CARG2, TMP0 ++ | addi.d TMP3, CARG2, 8 ++ | masknez CARG2, CARG2, CARG4 ++ | maskeqz TMP3, TMP3, CARG4 ++ | or CARG2, TMP3, CARG2 ++ | blt r0, CARG4, ->fff_fallback // st > LUA_YIELD? ++ | xor TMP2, TMP2, CARG3 ++ | or CARG4, TMP2, TMP0 ++ | bnez TMP1, ->fff_fallback // cframe != 0? ++ | ld.d TMP0, L:CARG1->maxstack ++ | ld.d PC, FRAME_PC(BASE) ++ | beqz CARG4, ->fff_fallback // base == top && st == 0? ++ | add.d TMP2, CARG2, NARGS8:RC ++ | sltu CARG4, TMP0, TMP2 ++ | st.d BASE, L->base ++ | st.d PC, SAVE_PC(sp) ++ | bnez CARG4, ->fff_fallback // Stack overflow? ++ |1: ++ |.if resume ++ | addi.d BASE, BASE, 8 // Keep resumed thread in stack for GC. ++ | addi.d NARGS8:RC, NARGS8:RC, -8 ++ | addi.d TMP2, TMP2, -8 ++ |.endif ++ | st.d TMP2, L:CARG1->top ++ | st.d BASE, L->top ++ | add.d TMP1, BASE, NARGS8:RC ++ | or CARG3, CARG2, r0 ++ |2: // Move args to coroutine. ++ | ld.d TMP0, 0(BASE) ++ | sltu TMP3, BASE, TMP1 ++ | addi.d BASE, BASE, 8 ++ | beqz TMP3, >3 ++ | st.d TMP0, 0(CARG3) ++ | addi.d CARG3, CARG3, 8 ++ | b <2 ++ |3: ++ | or L:RA, L:CARG1, r0 ++ | bl ->vm_resume // (lua_State *L, TValue *base, 0, 0) ++ | // Returns thread status. ++ |4: ++ | ld.d TMP2, L:RA->base ++ | sltui TMP1, CRET1, LUA_YIELD+1 ++ | ld.d TMP3, L:RA->top ++ | li_vmstate INTERP ++ | ld.d BASE, L->base ++ | .STXD L, DISPATCH, DISPATCH_GL(cur_L) ++ | st_vmstate ++ | sub.d RD, TMP3, TMP2 ++ | beqz TMP1, >8 ++ | ld.d TMP0, L->maxstack ++ | add.d TMP1, BASE, RD ++ | beqz RD, >6 // No results? ++ | add.d TMP3, TMP2, RD ++ | bltu TMP0, TMP1, >9 // Need to grow stack? ++ | st.d TMP2, L:RA->top // Clear coroutine stack. ++ | or TMP1, BASE, r0 ++ |5: // Move results from coroutine. ++ | ld.d TMP0, 0(TMP2) ++ | addi.d TMP2, TMP2, 8 ++ | st.d TMP0, 0(TMP1) ++ | addi.d TMP1, TMP1, 8 ++ | bltu TMP2, TMP3, <5 ++ |6: ++ |.if resume ++ | mov_true TMP1 ++ | addi.d RD, RD, 16 ++ |7: ++ | st.d TMP1, -8(BASE) // Prepend true/false to results. ++ | addi.d RA, BASE, -8 ++ |.else ++ | or RA, BASE, r0 ++ | addi.d RD, RD, 8 ++ |.endif ++ | andi TMP0, PC, FRAME_TYPE ++ | st.d PC, SAVE_PC(sp) ++ | or MULTRES, RD, r0 ++ | beqz TMP0, ->BC_RET_Z ++ | b ->vm_return ++ | ++ |8: // Coroutine returned with error (at co->top-1). ++ |.if resume ++ | addi.d TMP3, TMP3, -8 ++ | mov_false TMP1 ++ | addi.w RD, r0, (2+1)*8 ++ | ld.d TMP0, 0(TMP3) ++ | st.d TMP3, L:RA->top // Remove error from coroutine stack. ++ | st.d TMP0, 0(BASE) // Copy error message. ++ | b <7 ++ |.else ++ | or CARG1, L, r0 ++ | or CARG2, L:RA, r0 ++ | bl extern lj_ffh_coroutine_wrap_err // (lua_State *L, lua_State *co) ++ |.endif ++ | ++ |9: // Handle stack expansion on return from yield. ++ | or CARG1, L, r0 ++ | srli.w CARG2, RD, 3 ++ | bl extern lj_state_growstack // (lua_State *L, int n) ++ | addi.d CRET1, r0, 0 ++ | b <4 ++ |.endmacro ++ | ++ | coroutine_resume_wrap 1 // coroutine.resume ++ | coroutine_resume_wrap 0 // coroutine.wrap ++ | ++ |.ffunc coroutine_yield ++ | ld.d TMP0, L->cframe ++ | add.d TMP1, BASE, NARGS8:RC ++ | addi.w CRET1, r0, LUA_YIELD ++ | st.d BASE, L->base ++ | andi TMP0, TMP0, CFRAME_RESUME ++ | st.d TMP1, L->top ++ | beqz TMP0, ->fff_fallback ++ | st.d r0, L->cframe ++ | st.b CRET1, L->status ++ | b ->vm_leave_unw ++ | ++ |//-- Math library ------------------------------------------------------- ++ | ++ |.macro math_round, func ++ |->ff_math_ .. func: ++ | ld.d CARG1, 0(BASE) ++ | gettp TMP0, CARG1 ++ | beqz NARGS8:RC, ->fff_fallback ++ | beq TMP0, TISNUM, ->fff_restv ++ | fld.d FARG1, 0(BASE) ++ | bgeu TMP0, TISNUM, ->fff_fallback ++ | bl ->vm_ .. func ++ | b ->fff_resn ++ |.endmacro ++ | ++ | math_round floor ++ | math_round ceil ++ | ++ |.ffunc_1 math_abs ++ | gettp CARG2, CARG1 ++ | addi.d TMP2, CARG2, -LJ_TISNUM ++ | slli.w TMP1, CARG1, 0 ++ | bnez TMP2, >1 ++ | srai.w TMP0, TMP1, 31 // Extract sign. int ++ | xor TMP1, TMP1, TMP0 ++ | sub.d CARG1, TMP1, TMP0 ++ | slli.d TMP3, CARG1, 32 ++ | settp CARG1, TISNUM ++ | bge TMP3, r0, ->fff_restv ++ | ori CARG1, r0, 0x41e // 2^31 as a double. ++ | slli.w CARG1, CARG1, 4 // 0x41e0 ++ | slli.d CARG1, CARG1, 48 ++ | b ->fff_restv ++ |1: ++ | sltui TMP2, CARG2, LJ_TISNUM ++ | bstrpick.d CARG1, CARG1, 62, 0 ++ | beqz TMP2, ->fff_fallback // int ++ |// fallthrough ++ | ++ |->fff_restv: ++ | // CARG1 = TValue result. ++ | ld.d PC, FRAME_PC(BASE) ++ | st.d CARG1, -16(BASE) ++ |->fff_res1: ++ | // RA = results, PC = return. ++ | addi.d RD, r0, (1+1)*8 ++ |->fff_res: ++ | // RA = results, RD = (nresults+1)*8, PC = return. ++ | andi TMP0, PC, FRAME_TYPE ++ | or MULTRES, RD, r0 ++ | addi.d RA, BASE, -16 ++ | bnez TMP0, ->vm_return ++ | ld.w INS, -4(PC) ++ | decode_RB RB, INS ++ |5: ++ | sltu TMP2, RD, RB ++ | decode_RA TMP0, INS ++ | bnez TMP2, >6 // More results expected? ++ | // Adjust BASE. KBASE is assumed to be set for the calling frame. ++ | sub.d BASE, RA, TMP0 ++ | ins_next ++ | ++ |6: // Fill up results with nil. ++ | add.d TMP1, RA, RD ++ | addi.d RD, RD, 8 ++ | st.d TISNIL, -8(TMP1) ++ | b <5 ++ | ++ |.macro math_extern, func ++ | .ffunc_n math_ .. func ++ | bl extern func ++ | b ->fff_resn ++ |.endmacro ++ | ++ |.macro math_extern2, func ++ | .ffunc_nn math_ .. func ++ | bl extern func ++ | b ->fff_resn ++ |.endmacro ++ | ++ |.ffunc_n math_sqrt ++ | fsqrt.d FRET1, FARG1 ++ |->fff_resn: ++ | ld.d PC, FRAME_PC(BASE) ++ | fst.d FRET1, -16(BASE) ++ | b ->fff_res1 ++ | ++ |.ffunc math_log ++ | addi.d TMP1, r0, 8 ++ | ld.d CARG1, 0(BASE) ++ | fld.d FARG1, 0(BASE) ++ | bne NARGS8:RC, TMP1, ->fff_fallback // Need exactly 1 argument. ++ | checknum CARG1, ->fff_fallback ++ | bl extern log ++ | b ->fff_resn ++ | ++ | math_extern log10 ++ | math_extern exp ++ | math_extern sin ++ | math_extern cos ++ | math_extern tan ++ | math_extern asin ++ | math_extern acos ++ | math_extern atan ++ | math_extern sinh ++ | math_extern cosh ++ | math_extern tanh ++ | math_extern2 pow ++ | math_extern2 atan2 ++ | math_extern2 fmod ++ | ++ |.ffunc_2 math_ldexp ++ | checknum CARG1, ->fff_fallback ++ | checkint CARG2, ->fff_fallback ++ | fld.d FARG1, 0(BASE) ++ | ld.w CARG1, 8(BASE) ++ | bl extern ldexp // (double x, int exp) ++ | b ->fff_resn ++ | ++ |.ffunc_n math_frexp ++ | ld.d PC, FRAME_PC(BASE) ++ | .ADD16I CARG1, DISPATCH, DISPATCH_GL(tmptv) ++ | bl extern frexp ++ | .LDXW TMP1, DISPATCH, DISPATCH_GL(tmptv) ++ | movgr2fr.w FARG2, TMP1 ++ | fst.d FRET1, -16(BASE) ++ | ffint.d.w FARG2, FARG2 ++ | fst.d FARG2, -8(BASE) ++ | addi.d RD, r0, (2+1)*8 ++ | b ->fff_res ++ | ++ |.ffunc_n math_modf ++ | addi.d CARG1, BASE, -16 ++ | ld.d PC, FRAME_PC(BASE) ++ | bl extern modf ++ | fst.d FRET1, -8(BASE) ++ | addi.d RD, r0, (2+1)*8 ++ | b ->fff_res ++ | ++ |.macro math_minmax, name, intins, intinsc, fpins ++ | .ffunc_1 name ++ | add.d TMP3, BASE, NARGS8:RC ++ | addi.d TMP2, BASE, 8 ++ | checkint CARG1, >4 ++ |1: // Handle integers. ++ | ld.d CARG2, 0(TMP2) ++ | beq TMP2, TMP3, ->fff_restv ++ | slli.w CARG1, CARG1, 0 ++ | checkint CARG2, >3 ++ | slli.w CARG2, CARG2, 0 ++ | slt TMP0, CARG1, CARG2 ++ | intins TMP1, CARG2, TMP0 ++ | intinsc CARG1, CARG1, TMP0 ++ | or CARG1, CARG1, TMP1 ++ | addi.d TMP2, TMP2, 8 ++ | bstrpick.d CARG1, CARG1, 31, 0 ++ | settp CARG1, TISNUM ++ | b <1 ++ | ++ |3: // Convert intermediate result to number and continue with number loop. ++ | movgr2fr.w FTMP3, CARG1 ++ | checknum CARG2, ->fff_fallback ++ | ffint.d.w FTMP3, FTMP3 ++ | fld.d FARG1, 0(TMP2) ++ | fmov.d FTMP4, FARG1 ++ | b >6 ++ | ++ |4: ++ | fld.d FTMP3, 0(BASE) ++ |5: // Handle numbers. ++ | ld.d CARG2, 0(TMP2) ++ | checknum CARG1, ->fff_fallback ++ | fld.d FTMP4, 0(TMP2) ++ | beq TMP2, TMP3, ->fff_resn ++ | checknum CARG2, >7 ++ |6: ++ | fpins FRET1, FTMP3, FTMP4 ++ | fmov.d FTMP3, FRET1 ++ | addi.d TMP2, TMP2, 8 ++ | b <5 ++ | ++ |7: // Convert integer to number and continue with number loop. ++ | fld.s FARG1, 0(TMP2) ++ | checkint CARG2, ->fff_fallback ++ | ffint.d.w FARG1, FARG1 ++ | b <6 ++ |.endmacro ++ | ++ | math_minmax math_min, masknez, maskeqz, fmin.d ++ | math_minmax math_max, maskeqz, masknez, fmax.d ++ | ++ |//-- String library ----------------------------------------------------- ++ | ++ |.ffunc string_byte // Only handle the 1-arg case here. ++ | ld.d CARG1, 0(BASE) ++ | gettp TMP0, CARG1 ++ | xori TMP1, NARGS8:RC, 8 ++ | addi.d TMP0, TMP0, -LJ_TSTR ++ | or TMP1, TMP1, TMP0 ++ | cleartp STR:CARG1 ++ | bnez TMP1, ->fff_fallback // Need exactly 1 string argument. ++ | ld.w TMP0, STR:CARG1->len ++ | ld.d PC, FRAME_PC(BASE) ++ | sltu RD, r0, TMP0 ++ | ld.bu TMP2, STR:CARG1[1] // Access is always ok (NUL at end). ++ | addi.w RD, RD, 1 ++ | slli.w RD, RD, 3 // RD = ((str->len != 0)+1)*8 ++ | settp TMP2, TISNUM ++ | st.d TMP2, -16(BASE) ++ | b ->fff_res ++ | ++ |.ffunc string_char // Only handle the 1-arg case here. ++ | ffgccheck ++ | ld.d CARG1, 0(BASE) ++ | gettp TMP0, CARG1 ++ | xori TMP1, NARGS8:RC, 8 // Need exactly 1 argument. ++ | addi.d TMP0, TMP0, -LJ_TISNUM // Integer. ++ | addi.d TMP2, r0, 255 ++ | slli.w CARG1, CARG1, 0 ++ | or TMP1, TMP1, TMP0 ++ | sltu TMP2, TMP2, CARG1 // !(255 < n). ++ | or TMP1, TMP1, TMP2 ++ | addi.d CARG3, r0, 1 ++ | bnez TMP1, ->fff_fallback ++ | addi.d CARG2, sp, TMPD_OFS ++ | st.b CARG1, TMPD(sp) ++ |->fff_newstr: ++ | st.d BASE, L->base ++ | st.d PC, SAVE_PC(sp) ++ | or CARG1, L, r0 ++ | bl extern lj_str_new // (lua_State *L, char *str, size_t l) ++ | // Returns GCstr *. ++ | ld.d BASE, L->base ++ |->fff_resstr: ++ | addi.d TMP1, r0, LJ_TSTR ++ | settp CRET1, TMP1 ++ | b ->fff_restv ++ | ++ |.ffunc string_sub ++ | ffgccheck ++ | ld.d CARG1, 0(BASE) ++ | ld.d CARG2, 8(BASE) ++ | ld.d CARG3, 16(BASE) ++ | addi.d TMP0, NARGS8:RC, -16 ++ | gettp TMP1, CARG1 ++ | blt TMP0, r0, ->fff_fallback ++ | cleartp STR:CARG1, CARG1 ++ | addi.w CARG4, r0, -1 ++ | beqz TMP0, >1 ++ | slli.w CARG4, CARG3, 0 ++ | checkint CARG3, ->fff_fallback ++ |1: ++ | checkint CARG2, ->fff_fallback ++ | addi.d TMP0, TMP1, -LJ_TSTR ++ | slli.w CARG3, CARG2, 0 ++ | bnez TMP0, ->fff_fallback ++ | ld.w CARG2, STR:CARG1->len ++ | // STR:CARG1 = str, CARG2 = str->len, CARG3 = start, CARG4 = end ++ | addi.w TMP0, CARG2, 1 ++ | slt TMP3, CARG4, r0 ++ | add.w TMP2, CARG4, TMP0 ++ | slt TMP1, CARG3, r0 ++ | maskeqz TMP2, TMP2, TMP3 ++ | masknez CARG4, CARG4, TMP3 ++ | or CARG4, TMP2, CARG4 // if (end < 0) end += len+1 ++ | add.w TMP2, CARG3, TMP0 ++ | maskeqz TMP2, TMP2, TMP1 ++ | masknez CARG3, CARG3, TMP1 ++ | or CARG3, TMP2, CARG3 // if (start < 0) start += len+1 ++ | addi.d TMP3, r0, 1 ++ | slt TMP2, CARG4, r0 ++ | slt TMP1, r0, CARG3 ++ | masknez CARG4, CARG4, TMP2 // if (end < 0) end = 0 ++ | maskeqz CARG3, CARG3, TMP1 ++ | masknez TMP3, TMP3, TMP1 ++ | slt TMP2, CARG2, CARG4 ++ | or CARG3, TMP3, CARG3 // if (start < 1) start = 1 ++ | masknez CARG4, CARG4, TMP2 ++ | maskeqz CARG2, CARG2, TMP2 ++ | or CARG4, CARG2, CARG4 // if (end > len) end = len ++ | add.d CARG2, STR:CARG1, CARG3 ++ | sub.d CARG3, CARG4, CARG3 // len = end - start ++ | addi.d CARG2, CARG2, sizeof(GCstr)-1 ++ | addi.w CARG3, CARG3, 1 // len += 1 ++ | bge CARG3, r0, ->fff_newstr ++ |->fff_emptystr: // Return empty string. ++ | addi.d TMP1, r0, LJ_TSTR ++ | .ADD16I STR:CARG1, DISPATCH, DISPATCH_GL(strempty) ++ | settp CARG1, TMP1 ++ | b ->fff_restv ++ | ++ |.macro ffstring_op, name ++ | .ffunc string_ .. name ++ | ffgccheck ++ | ld.d CARG2, 0(BASE) ++ | beqz NARGS8:RC, ->fff_fallback ++ | checkstr STR:CARG2, ->fff_fallback ++ | .ADD16I SBUF:CARG1, DISPATCH, DISPATCH_GL(tmpbuf) ++ | ld.d TMP0, SBUF:CARG1->b ++ | st.d L, SBUF:CARG1->L ++ | st.d BASE, L->base ++ | st.d TMP0, SBUF:CARG1->w ++ | st.d PC, SAVE_PC(sp) ++ | bl extern lj_buf_putstr_ .. name ++ |// or SBUF:CARG1, SBUF:CRET1, r0 ++ | bl extern lj_buf_tostr ++ | ld.d BASE, L->base ++ | b ->fff_resstr ++ |.endmacro ++ | ++ |ffstring_op reverse ++ |ffstring_op lower ++ |ffstring_op upper ++ | ++ |//-- Bit library -------------------------------------------------------- ++ | ++ |->vm_tobit_fb: ++ | fld.d FARG1, 0(BASE) ++ | beqz TMP1, ->fff_fallback ++ | fadd.d FARG1, FARG1, TOBIT ++ | movfr2gr.s CRET1, FARG1 ++ | bstrpick.d CRET1, CRET1, 31, 0 ++ | jirl r0, ra, 0 ++ | ++ |.macro .ffunc_bit, name ++ | .ffunc_1 bit_..name ++ | gettp TMP0, CARG1 ++ | bstrpick.d CRET1, CARG1, 31, 0 ++ | beq TMP0, TISNUM, >1 ++ | sltui TMP1, TMP0, LJ_TISNUM ++ | bl ->vm_tobit_fb ++ |1: ++ |.endmacro ++ | ++ |.macro .ffunc_bit_op, name, bins ++ | .ffunc_bit name ++ | addi.d TMP2, BASE, 8 ++ | add.d TMP3, BASE, NARGS8:RC ++ |1: ++ | ld.d TMP1, 0(TMP2) ++ | beq TMP2, TMP3, ->fff_resi ++ | gettp TMP0, TMP1 ++ | addi.d TMP2, TMP2, 8 ++ | bne TMP0, TISNUM, >2 ++ | bstrpick.d TMP1, TMP1, 31, 0 ++ | bins CRET1, CRET1, TMP1 ++ | b <1 ++ |2: ++ | fld.d FARG1, -8(TMP2) ++ | sltui TMP0, TMP0, LJ_TISNUM ++ | fadd.d FARG1, FARG1, TOBIT ++ | beqz TMP0, ->fff_fallback ++ | movfr2gr.s TMP1, FARG1 ++ | bstrpick.d TMP1, TMP1, 31, 0 ++ | bins CRET1, CRET1, TMP1 ++ | b <1 ++ |.endmacro ++ | ++ |.ffunc_bit_op band, and ++ |.ffunc_bit_op bor, or ++ |.ffunc_bit_op bxor, xor ++ | ++ |.ffunc_bit bswap ++ | srli.d TMP0, CRET1, 8 ++ | srli.d TMP1, CRET1, 24 ++ | srli.d TMP2,TMP0, 8 ++ | andi TMP3, TMP2, 0xff ++ | slli.d TMP3, TMP3, 8 ++ | bstrins.d TMP1, CRET1, 31, 24 ++ | bstrins.d TMP3, TMP0, 23, 16 ++ | or CRET1, TMP1, TMP3 ++ | b ->fff_resi ++ | ++ |.ffunc_bit tobit ++ |->fff_resi: ++ | settp CARG1, TISNUM // CARG1 = CRET1 ++ | b ->fff_restv ++ | ++ |.ffunc_bit bnot ++ | nor CRET1, CRET1, r0 ++ | bstrpick.d CRET1, CRET1, 31, 0 ++ | b ->fff_resi ++ | ++ |.macro .ffunc_bit_sh, name, shins, shmod ++ | .ffunc_2 bit_..name ++ | gettp TMP0, CARG1 ++ | beq TMP0, TISNUM, >1 ++ | sltui TMP1, TMP0, LJ_TISNUM ++ | bl ->vm_tobit_fb ++ |// or CARG1, CRET1, r0 // CARG1 = CRET1 ++ |1: ++ | gettp TMP0, CARG2 ++ | bstrpick.d CARG2, CARG2, 31, 0 ++ | bne TMP0, TISNUM, ->fff_fallback ++ | slli.w CARG1, CARG1, 0 ++ |.if shmod == 1 ++ | sub.w CARG2, r0, CARG2 ++ |.endif ++ | shins CRET1, CARG1, CARG2 ++ | bstrpick.d CRET1, CRET1, 31, 0 ++ | b ->fff_resi ++ |.endmacro ++ | ++ |.ffunc_bit_sh lshift, sll.w, 0 ++ |.ffunc_bit_sh rshift, srl.w, 0 ++ |.ffunc_bit_sh arshift, sra.w, 0 ++ |.ffunc_bit_sh rol, rotr.w, 1 ++ |.ffunc_bit_sh ror, rotr.w, 0 ++ | ++ |//----------------------------------------------------------------------- ++ | ++ |->fff_fallback: // Call fast function fallback handler. ++ | // BASE = new base, RB = CFUNC, RC = nargs*8 ++ | ld.d PC, FRAME_PC(BASE) // Fallback may overwrite PC. ++ | ld.d CARG3, CFUNC:RB->f ++ | add.d TMP1, BASE, NARGS8:RC ++ | st.d BASE, L->base ++ | addi.d TMP0, TMP1, 8*LUA_MINSTACK ++ | ld.d TMP2, L->maxstack ++ | st.d PC, SAVE_PC(sp) // Redundant (but a defined value). ++ | st.d TMP1, L->top ++ | or CARG1, L, r0 ++ | bltu TMP2, TMP0, >5 // Need to grow stack. ++ | jirl r1, CARG3, 0 // (lua_State *L) ++ | // Either throws an error, or recovers and returns -1, 0 or nresults+1. ++ | ld.d BASE, L->base ++ | slli.w RD, CRET1, 3 ++ | blt r0, CRET1, ->fff_res // Returned nresults+1? ++ |1: // Returned 0 or -1: retry fast path. ++ | ld.d LFUNC:RB, FRAME_FUNC(BASE) ++ | ld.d TMP0, L->top ++ | sub.d NARGS8:RC, TMP0, BASE ++ | cleartp LFUNC:RB ++ | bnez CRET1, ->vm_call_tail // Returned -1? ++ | ins_callt // Returned 0: retry fast path. ++ | ++ |// Reconstruct previous base for vmeta_call during tailcall. ++ |->vm_call_tail: ++ | andi TMP0, PC, FRAME_TYPE ++ | addi.d TMP2, r0, ~FRAME_TYPEP // TODO ++ | and TMP1, PC, TMP2 ++ | bnez TMP0, >3 ++ | ld.bu TMP1, OFS_RA(PC) ++ | slli.w TMP1, TMP1, 3 ++ | addi.w TMP1, TMP1, 16 ++ |3: ++ | sub.d TMP2, BASE, TMP1 ++ | b ->vm_call_dispatch // Resolve again for tailcall. ++ | ++ |5: // Grow stack for fallback handler. ++ | addi.d CARG2, r0, LUA_MINSTACK ++ | or CARG1, L, r0 ++ | bl extern lj_state_growstack // (lua_State *L, int n) ++ | ld.d BASE, L->base ++ | addi.d CRET1, r0, 0 // Set zero-flag to force retry. ++ | b <1 ++ | ++ |->fff_gcstep: // Call GC step function. ++ | // BASE = new base, RC = nargs*8 ++ | or MULTRES, ra, r0 ++ | add.d TMP0, BASE, NARGS8:RC // Calculate L->top. ++ | st.d BASE, L->base ++ | st.d PC, SAVE_PC(sp) // Redundant (but a defined value). ++ | or CARG1, L, r0 ++ | st.d TMP0, L->top ++ | bl extern lj_gc_step // (lua_State *L) ++ | ld.d BASE, L->base ++ |// or ra, MULTRES, r0 ++ | ld.d TMP0, L->top ++ | ld.d CFUNC:RB, FRAME_FUNC(BASE) ++ | cleartp CFUNC:RB ++ | sub.d NARGS8:RC, TMP0, BASE ++ | jirl r0, MULTRES, 0 ++ | ++ |//----------------------------------------------------------------------- ++ |//-- Special dispatch targets ------------------------------------------- ++ |//----------------------------------------------------------------------- ++ | ++ |->vm_record: // Dispatch target for recording phase. ++ |.if JIT ++ | .LDXBU TMP3, DISPATCH, DISPATCH_GL(hookmask) ++ | andi TMP1, TMP3, HOOK_VMEVENT // No recording while in vmevent. ++ | bnez TMP1, >5 ++ | // Decrement the hookcount for consistency, but always do the call. ++ | .LDXW TMP2, DISPATCH, DISPATCH_GL(hookcount) ++ | andi TMP1, TMP3, HOOK_ACTIVE ++ | bnez TMP1, >1 ++ | addi.w TMP2, TMP2, -1 ++ | andi TMP1, TMP3, LUA_MASKLINE|LUA_MASKCOUNT ++ | beqz TMP1, >1 ++ | .STXW TMP2, DISPATCH, DISPATCH_GL(hookcount) ++ | b >1 ++ |.endif ++ | ++ |->vm_rethook: // Dispatch target for return hooks. ++ | .LDXBU TMP3, DISPATCH, DISPATCH_GL(hookmask) ++ | andi TMP1, TMP3, HOOK_ACTIVE // Hook already active? ++ | beqz TMP1, >1 ++ |5: // Re-dispatch to static ins. ++ | ld.d TMP1, GG_DISP2STATIC(TMP0) // Assumes TMP0 holds DISPATCH+OP*4. ++ | jirl r0, TMP1, 0 ++ | ++ |->vm_inshook: // Dispatch target for instr/line hooks. ++ | .LDXBU TMP3, DISPATCH, DISPATCH_GL(hookmask) ++ | .LDXW TMP2, DISPATCH, DISPATCH_GL(hookcount) ++ | andi TMP1, TMP3, HOOK_ACTIVE // Hook already active? ++ | bnez TMP1, <5 ++ | andi TMP1, TMP3, LUA_MASKLINE|LUA_MASKCOUNT ++ | addi.w TMP2, TMP2, -1 ++ | beqz TMP1, <5 ++ | .STXW TMP2, DISPATCH, DISPATCH_GL(hookcount) ++ | beqz TMP2, >1 ++ | andi TMP1, TMP3, LUA_MASKLINE ++ | beqz TMP1, <5 ++ |1: ++ | st.w MULTRES, TMPD(sp) ++ | or CARG2, PC, r0 ++ | st.d BASE, L->base ++ | or CARG1, L, r0 ++ | // SAVE_PC must hold the _previous_ PC. The callee updates it with PC. ++ | bl extern lj_dispatch_ins // (lua_State *L, const BCIns *pc) ++ |3: ++ | ld.d BASE, L->base ++ |4: // Re-dispatch to static ins. ++ | ld.w INS, -4(PC) ++ | decode_OP TMP1, INS ++ | decode_BC8b TMP1 ++ | add.d TMP0, DISPATCH, TMP1 ++ | decode_RD RD, INS ++ | ld.d TMP1, GG_DISP2STATIC(TMP0) ++ | decode_RA RA, INS ++ | jirl r0, TMP1, 0 ++ | ++ |->cont_hook: // Continue from hook yield. ++ | addi.d PC, PC, 4 ++ | ld.w MULTRES, -24(RB) // Restore MULTRES for *M ins. ++ | b <4 ++ | ++ |->vm_hotloop: // Hot loop counter underflow. ++ |.if JIT ++ | ld.d LFUNC:TMP1, FRAME_FUNC(BASE) ++ | .ADD16I CARG1, DISPATCH, GG_DISP2J ++ | cleartp LFUNC:TMP1 ++ | st.d PC, SAVE_PC(sp) ++ | ld.d TMP1, LFUNC:TMP1->pc ++ | or CARG2, PC, r0 ++ | .STXD L, DISPATCH, DISPATCH_J(L) ++ | ld.bu TMP1, PC2PROTO(framesize)(TMP1) ++ | st.d BASE, L->base ++ | slli.d TMP1, TMP1, 3 ++ | add.d TMP1, BASE, TMP1 ++ | st.d TMP1, L->top ++ | bl extern lj_trace_hot // (jit_State *J, const BCIns *pc) ++ | b <3 ++ |.endif ++ | ++ | ++ |->vm_callhook: // Dispatch target for call hooks. ++ | or CARG2, PC, r0 ++ |.if JIT ++ | b >1 ++ |.endif ++ | ++ |->vm_hotcall: // Hot call counter underflow. ++ |.if JIT ++ | ori CARG2, PC, 1 ++ |1: ++ |.endif ++ | add.d TMP0, BASE, RC ++ | st.d PC, SAVE_PC(sp) ++ | st.d BASE, L->base ++ | sub.d RA, RA, BASE ++ | st.d TMP0, L->top ++ | or CARG1, L, r0 ++ | bl extern lj_dispatch_call // (lua_State *L, const BCIns *pc) ++ | // Returns ASMFunction. ++ | ld.d BASE, L->base ++ | ld.d TMP0, L->top ++ | st.d r0, SAVE_PC(sp) // Invalidate for subsequent line hook. ++ | add.d RA, BASE, RA ++ | sub.d NARGS8:RC, TMP0, BASE ++ | ld.d LFUNC:RB, FRAME_FUNC(BASE) ++ | cleartp LFUNC:RB ++ | ld.w INS, -4(PC) ++ | jirl r0, CRET1, 0 ++ | ++ |->cont_stitch: // Trace stitching. ++ |.if JIT ++ | // RA = resultptr, RB = meta base ++ | ld.w INS, -4(PC) ++ | ld.d TRACE:TMP2, -40(RB) // Save previous trace. ++ | decode_RA RC, INS ++ | addi.d TMP1, MULTRES, -8 ++ | cleartp TRACE:TMP2 ++ | add.d RC, BASE, RC // Call base. ++ | beqz TMP1, >2 ++ |1: // Move results down. ++ | ld.d CARG1, 0(RA) ++ | addi.d TMP1, TMP1, -8 ++ | addi.d RA, RA, 8 ++ | st.d CARG1, 0(RC) ++ | addi.d RC, RC, 8 ++ | bnez TMP1, <1 ++ |2: ++ | decode_RA RA, INS ++ | decode_RB RB, INS ++ | add.d RA, RA, RB ++ | add.d RA, BASE, RA ++ |3: ++ | sltu TMP1, RC, RA ++ | bnez TMP1, >9 // More results wanted? ++ | ++ | ld.hu TMP3, TRACE:TMP2->traceno ++ | ld.hu RD, TRACE:TMP2->link ++ | beq RD, TMP3, ->cont_nop // Blacklisted. ++ | slli.w RD, RD, 3 ++ | bnez RD, =>BC_JLOOP // Jump to stitched trace. ++ | ++ | // Stitch a new trace to the previous trace. ++ | st.w TMP3, DISPATCH_J(exitno)(DISPATCH) ++ | .STXD L, DISPATCH, DISPATCH_J(L) ++ | st.d BASE, L->base ++ | .ADD16I CARG1, DISPATCH, GG_DISP2J ++ | or CARG2, PC, r0 ++ | bl extern lj_dispatch_stitch // (jit_State *J, const BCIns *pc) ++ | ld.d BASE, L->base ++ | b ->cont_nop ++ | ++ |9: ++ | st.d TISNIL, 0(RC) ++ | addi.d RC, RC, 8 ++ | b <3 ++ |.endif ++ | ++ |->vm_profhook: // Dispatch target for profiler hook. ++#if LJ_HASPROFILE ++ | or CARG1, L, r0 ++ | or CARG2, PC, r0 ++ | st.d BASE, L->base ++ | st.w MULTRES, TMPD(sp) ++ | bl extern lj_dispatch_profile // (lua_State *L, const BCIns *pc) ++ | // HOOK_PROFILE is off again, so re-dispatch to dynamic instruction. ++ | addi.d PC, PC, -4 ++ | ld.d BASE, L->base ++ | b ->cont_nop ++#endif ++ | ++ |//----------------------------------------------------------------------- ++ |//-- Trace exit handler ------------------------------------------------- ++ |//----------------------------------------------------------------------- ++ | ++ |.macro savex_, a, b ++ | fst.d f..a, a*8(sp) ++ | fst.d f..b, b*8(sp) ++ | st.d r..a, 32*8+a*8(sp) ++ | st.d r..b, 32*8+b*8(sp) ++ |.endmacro ++ | ++ |->vm_exit_handler: ++ |.if JIT ++ | addi.d sp, sp, -(32*8+32*8) ++ | savex_ 0, 2 ++ | savex_ 4, 5 ++ | savex_ 6, 7 ++ | savex_ 8, 9 ++ | savex_ 10, 11 ++ | savex_ 12, 13 ++ | savex_ 14, 15 ++ | savex_ 16, 17 ++ | savex_ 18, 19 ++ | savex_ 20, 21 ++ | savex_ 22, 23 ++ | savex_ 24, 25 ++ | savex_ 26, 27 ++ | savex_ 28, 29 ++ | savex_ 30, 31 ++ | fst.d f1, 1*8(sp) ++ | fst.d f3, 3*8(sp) ++ | st.d r0, 32*8+1*8(sp) // Clear RID_TMP. ++ | addi.d TMP2, sp, 32*8+32*8 // Recompute original value of sp. ++ | st.d TMP2, 32*8+3*8(sp) // Store sp in RID_SP ++ | li_vmstate EXIT ++ | .ADD16I DISPATCH, JGL, -GG_DISP2G-32768 ++ | ld.w TMP1, 0(TMP2) // Load exit number. ++ | st_vmstate ++ | .LDXD L, DISPATCH, DISPATCH_GL(cur_L) ++ | .LDXD BASE, DISPATCH, DISPATCH_GL(jit_base) ++ | .STXD L, DISPATCH, DISPATCH_J(L) ++ | st.w ra, DISPATCH_J(parent)(DISPATCH) // Store trace number. ++ | st.d BASE, L->base ++ | st.w TMP1, DISPATCH_J(exitno)(DISPATCH) // Store exit number. ++ | .ADD16I CARG1, DISPATCH, GG_DISP2J ++ | .STXD r0, DISPATCH, DISPATCH_GL(jit_base) ++ | or CARG2, sp, r0 ++ | bl extern lj_trace_exit // (jit_State *J, ExitState *ex) ++ | // Returns MULTRES (unscaled) or negated error code. ++ | ld.d TMP1, L->cframe ++ | addi.d TMP2, r0, -4 ++ | ld.d BASE, L->base ++ | and sp, TMP1, TMP2 ++ | ld.d PC, SAVE_PC(sp) // Get SAVE_PC. ++ | st.d L, SAVE_L(sp) // Set SAVE_L (on-trace resume/yield). ++ | b >1 ++ |.endif ++ | ++ |->vm_exit_interp: ++ |.if JIT ++ | // CRET1 = MULTRES or negated error code, BASE, PC and JGL set. ++ | ld.d L, SAVE_L(sp) ++ | .ADD16I DISPATCH, JGL, -GG_DISP2G-32768 ++ | st.d BASE, L->base ++ |1: ++ | ld.d LFUNC:RB, FRAME_FUNC(BASE) ++ | sltui TMP0, CRET1, -LUA_ERRERR ++ | beqz TMP0, >9 ++ | addu16i.d TMP3, r0, 0x59c0 // TOBIT = 2^52 + 2^51 (float). ++ | slli.d MULTRES, CRET1, 3 ++ | cleartp LFUNC:RB ++ | st.w MULTRES, TMPD(sp) ++ | addi.d TISNIL, r0, LJ_TNIL ++ | addi.d TISNUM, r0, LJ_TISNUM // Setup type comparison constants. ++ | movgr2fr.w TOBIT, TMP3 ++ | ld.d TMP1, LFUNC:RB->pc ++ | .STXD r0, DISPATCH, DISPATCH_GL(jit_base) ++ | ld.d KBASE, PC2PROTO(k)(TMP1) ++ | fcvt.d.s TOBIT, TOBIT ++ | // Modified copy of ins_next which handles function header dispatch, too. ++ | ld.w INS, 0(PC) ++ | addi.d PC, PC, 4 ++ | addi.d CRET1, CRET1, 17 ++ | // Assumes TISNIL == ~LJ_VMST_INTERP == -1 ++ | .STXW TISNIL, DISPATCH, DISPATCH_GL(vmstate) ++ | decode_RD RD, INS ++ | beqz CRET1, >5 ++ | decode_OP TMP1, INS ++ | decode_BC8b TMP1 ++ | add.d TMP0, DISPATCH, TMP1 ++ | sltui TMP2, TMP1, BC_FUNCF*8 ++ | ld.d TMP3, 0(TMP0) ++ | decode_RA RA, INS ++ | beqz TMP2, >2 ++ | jirl r0, TMP3, 0 ++ |2: ++ | sltui TMP2, TMP1, (BC_FUNCC+2)*8 // Fast function? ++ | ld.d TMP1, FRAME_PC(BASE) ++ | bnez TMP2, >3 ++ | // Check frame below fast function. ++ | andi TMP0, TMP1, FRAME_TYPE ++ | bnez TMP0, >3 // Trace stitching continuation? ++ | // Otherwise set KBASE for Lua function below fast function. ++ | ld.w TMP2, -4(TMP1) ++ | decode_RA TMP0, TMP2 ++ | sub.d TMP1, BASE, TMP0 ++ | ld.d LFUNC:TMP2, -32(TMP1) ++ | cleartp LFUNC:TMP2 ++ | ld.d TMP1, LFUNC:TMP2->pc ++ | ld.d KBASE, PC2PROTO(k)(TMP1) ++ |3: ++ | addi.d RC, MULTRES, -8 ++ | add.d RA, RA, BASE ++ | jirl r0, TMP3, 0 ++ | ++ |5: // Dispatch to static entry of original ins replaced by BC_JLOOP. ++ | .LDXD TMP0, DISPATCH, DISPATCH_J(trace) ++ | add.d TMP0, TMP0, RD ++ | ld.d TRACE:TMP2, 0(TMP0) ++ | ld.w INS, TRACE:TMP2->startins ++ | decode_OP TMP1, INS ++ | decode_BC8b TMP1 ++ | add.d TMP0, DISPATCH, TMP1 ++ | decode_RD RD, INS ++ | ld.d TMP4, GG_DISP2STATIC(TMP0) ++ | decode_RA RA, INS ++ | jirl r0, TMP4, 0 ++ | ++ |9: // Rethrow error from the right C frame. ++ | sub.w CARG2, r0, CRET1 //TODO LA: sub.w no trap ++ | or CARG1, L, r0 ++ | bl extern lj_err_trace // (lua_State *L, int errcode) ++ |.endif ++ | ++ |//----------------------------------------------------------------------- ++ |//-- Math helper functions ---------------------------------------------- ++ |//----------------------------------------------------------------------- ++ | ++ |// Hard-float round to integer. ++ |// Modifies TMP0, TMP1, FARG1, FARG5, FTMP1, FTMP3, FTMP4 ++ |.macro vm_round_hf, func ++ | addu16i.d TMP0, r0, 0x4330 // Hiword of 2^52 (double). ++ | slli.d TMP0, TMP0, 32 ++ | movgr2fr.d FARG5, TMP0 ++ | fabs.d FTMP4, FARG1 // |x| ++ | movfr2gr.d TMP1, FARG1 ++ | fcmp.clt.d FCC0, FTMP4, FARG5 ++ | fadd.d FTMP3, FTMP4, FARG5 // (|x| + 2^52) - 2^52 ++ | fsub.d FTMP3, FTMP3, FARG5 ++ | bceqz FCC0, >1 // Truncate only if |x| < 2^52. ++ | slt TMP1, TMP1, r0 ++ |.if "func" == "ceil" ++ | addu16i.d TMP0, r0, 0xbff0 ++ |.else ++ | addu16i.d TMP0, r0, 0x3ff0 // Hiword of +1 (double). ++ |.endif ++ |.if "func" == "trunc" ++ | slli.d TMP0, TMP0, 32 ++ | movgr2fr.d FARG5, TMP0 ++ | fcmp.clt.d FCC0, FTMP4, FRET1 // |x| < result? ++ | fsub.d FTMP4, FTMP3, FARG5 ++ | fsel FTMP1, FTMP3, FTMP4, FCC0 ++ | movgr2fr.d FTMP3, TMP1 ++ | fneg.d FTMP4, FTMP1 ++ | movfr2cf FCC0, FTMP3 ++ | fsel FTMP3, FTMP1, FTMP4, FCC0 ++ | jirl r0, ra, 0 ++ |.else ++ | fneg.d FTMP4, FTMP3 ++ | slli.d TMP0, TMP0, 32 ++ | movgr2fr.d FARG5, TMP0 ++ | movgr2fr.d FTMP1, TMP1 ++ | movfr2cf FCC0, FTMP1 ++ | fsel FTMP1, FTMP3, FTMP4, FCC0 ++ |.if "func" == "ceil" ++ | fcmp.clt.d FCC0, FTMP1, FARG1 // x > result? ++ |.else ++ | fcmp.clt.d FCC0, FARG1, FTMP1 // x < result? ++ |.endif ++ | fsub.d FTMP4, FTMP1, FARG5 // If yes, subtract +-1. ++ | fsel FTMP3, FTMP1, FTMP4, FCC0 ++ | fmov.d FARG1, FTMP3 ++ | jirl r0, ra, 0 ++ |.endif ++ |1: ++ | fmov.d FTMP3, FARG1 ++ | jirl r0, ra, 0 ++ |.endmacro ++ | ++ | ++ |->vm_floor: ++ | vm_round_hf floor ++ |->vm_ceil: ++ | vm_round_hf ceil ++ |->vm_trunc: ++ |.if JIT ++ | vm_round_hf trunc ++ |.endif ++ | ++ | ++ |//----------------------------------------------------------------------- ++ |//-- Miscellaneous functions -------------------------------------------- ++ |//----------------------------------------------------------------------- ++ | ++ |.define NEXT_TAB, TAB:CARG1 ++ |.define NEXT_IDX, CARG2 ++ |.define NEXT_ASIZE, CARG3 ++ |.define NEXT_NIL, CARG4 ++ |.define NEXT_TMP0, TMP0 ++ |.define NEXT_TMP1, TMP1 ++ |.define NEXT_TMP2, TMP2 ++ |.define NEXT_RES_VK, CRET1 ++ |.define NEXT_RES_IDX, CRET2 ++ |.define NEXT_RES_PTR, sp ++ |.define NEXT_RES_VAL, 0(sp) ++ |.define NEXT_RES_KEY, 8(sp) ++ | ++ |// TValue *lj_vm_next(GCtab *t, uint32_t idx) ++ |// Next idx returned in CRET2. ++ |->vm_next: ++ |.if JIT ++ | ld.w NEXT_ASIZE, NEXT_TAB->asize ++ | ld.d NEXT_TMP0, NEXT_TAB->array ++ | addi.d NEXT_NIL, r0, LJ_TNIL ++ |1: // Traverse array part. ++ | sltu TMP3, NEXT_IDX, NEXT_ASIZE ++ | slli.w NEXT_TMP1, NEXT_IDX, 3 ++ | add.d NEXT_TMP1, NEXT_TMP0, NEXT_TMP1 ++ | beqz TMP3, >5 ++ | addi.d TMP3, r0, LJ_TISNUM ++ | ld.d NEXT_TMP2, 0(NEXT_TMP1) ++ | slli.d TMP3, TMP3, 47 ++ | or NEXT_TMP1, NEXT_IDX, TMP3 ++ | addi.w NEXT_IDX, NEXT_IDX, 1 ++ | beq NEXT_TMP2, NEXT_NIL, <1 ++ | st.d NEXT_TMP2, NEXT_RES_VAL ++ | st.d NEXT_TMP1, NEXT_RES_KEY ++ | or NEXT_RES_VK, NEXT_RES_PTR, r0 ++ | or NEXT_RES_IDX, NEXT_IDX, r0 ++ | jirl r0, ra, 0 ++ | ++ |5: // Traverse hash part. ++ | sub.w NEXT_RES_IDX, NEXT_IDX, NEXT_ASIZE ++ | ld.w NEXT_TMP0, NEXT_TAB->hmask ++ | ld.d NODE:NEXT_RES_VK, NEXT_TAB->node ++ | slli.w NEXT_TMP2, NEXT_RES_IDX, 5 ++ | slli.w TMP3, NEXT_RES_IDX, 3 ++ | sub.w TMP3, NEXT_TMP2, TMP3 ++ | add.d NODE:NEXT_RES_VK, NODE:NEXT_RES_VK, TMP3 ++ |6: ++ | sltu TMP3, NEXT_TMP0, NEXT_RES_IDX ++ | bnez TMP3, >8 ++ | ld.d NEXT_TMP2, NODE:NEXT_RES_VK->val ++ | addi.w NEXT_RES_IDX, NEXT_RES_IDX, 1 ++ | bne NEXT_TMP2, NEXT_NIL, >9 ++ | // Skip holes in hash part. ++ | addi.d NODE:NEXT_RES_VK, NODE:NEXT_RES_VK, sizeof(Node) ++ | b <6 ++ | ++ |8: // End of iteration. Set the key to nil (not the value). ++ | st.d NEXT_NIL, NEXT_RES_KEY ++ | or NEXT_RES_VK, NEXT_RES_PTR, r0 ++ |9: ++ | add.w NEXT_RES_IDX, NEXT_RES_IDX, NEXT_ASIZE ++ | jirl r0, ra, 0 ++ |.endif ++ | ++ |//----------------------------------------------------------------------- ++ |//-- FFI helper functions ----------------------------------------------- ++ |//----------------------------------------------------------------------- ++ | ++ |// Handler for callback functions. Callback slot number in r19, g in r17. ++ |->vm_ffi_callback: ++ |.if FFI ++ |.type CTSTATE, CTState, PC ++ | saveregs ++ | ld.d CTSTATE, GL:r17->ctype_state ++ | .ADD16I DISPATCH, r17, GG_G2DISP ++ | st.w r19, CTSTATE->cb.slot ++ | st.d CARG1, CTSTATE->cb.gpr[0] ++ | fst.d FARG1, CTSTATE->cb.fpr[0] ++ | st.d CARG2, CTSTATE->cb.gpr[1] ++ | fst.d FARG2, CTSTATE->cb.fpr[1] ++ | st.d CARG3, CTSTATE->cb.gpr[2] ++ | fst.d FARG3, CTSTATE->cb.fpr[2] ++ | st.d CARG4, CTSTATE->cb.gpr[3] ++ | fst.d FARG4, CTSTATE->cb.fpr[3] ++ | st.d CARG5, CTSTATE->cb.gpr[4] ++ | fst.d FARG5, CTSTATE->cb.fpr[4] ++ | st.d CARG6, CTSTATE->cb.gpr[5] ++ | fst.d FARG6, CTSTATE->cb.fpr[5] ++ | st.d CARG7, CTSTATE->cb.gpr[6] ++ | fst.d FARG7, CTSTATE->cb.fpr[6] ++ | st.d CARG8, CTSTATE->cb.gpr[7] ++ | fst.d FARG8, CTSTATE->cb.fpr[7] ++ | addi.d TMP0, sp, CFRAME_SPACE ++ | st.d TMP0, CTSTATE->cb.stack ++ | st.d r0, SAVE_PC(sp) // Any value outside of bytecode is ok. ++ | or CARG1, CTSTATE, r0 ++ | or CARG2, sp, r0 ++ | bl extern lj_ccallback_enter // (CTState *cts, void *cf) ++ | // Returns lua_State *. ++ | ld.d BASE, L:CRET1->base ++ | ld.d RC, L:CRET1->top ++ | or L, CRET1, r0 ++ | addu16i.d TMP3, r0, 0x59c0 // TOBIT = 2^52 + 2^51 (float). ++ | ld.d LFUNC:RB, FRAME_FUNC(BASE) ++ | movgr2fr.w TOBIT, TMP3 ++ | addi.d TISNIL, r0, LJ_TNIL ++ | addi.d TISNUM, r0, LJ_TISNUM ++ | li_vmstate INTERP ++ | sub.w RC, RC, BASE ++ | cleartp LFUNC:RB ++ | st_vmstate ++ | fcvt.d.s TOBIT, TOBIT ++ | ins_callt ++ |.endif ++ | ++ |->cont_ffi_callback: // Return from FFI callback. ++ |.if FFI ++ | .LDXD CTSTATE, DISPATCH, DISPATCH_GL(ctype_state) ++ | st.d BASE, L->base ++ | st.d RB, L->top ++ | st.d L, CTSTATE->L ++ | or CARG1, CTSTATE, r0 ++ | or CARG2, RA, r0 ++ | bl extern lj_ccallback_leave // (CTState *cts, TValue *o) ++ | fld.d FRET1, CTSTATE->cb.fpr[0] ++ | ld.d CRET1, CTSTATE->cb.gpr[0] ++ | fld.d FRET2, CTSTATE->cb.fpr[1] ++ | ld.d CRET2, CTSTATE->cb.gpr[1] ++ | b ->vm_leave_unw ++ |.endif ++ | ++ |->vm_ffi_call: // Call C function via FFI. ++ | // Caveat: needs special frame unwinding, see below. ++ |.if FFI ++ | .type CCSTATE, CCallState, CARG1 ++ | ld.w TMP1, CCSTATE->spadj ++ | ld.bu CARG2, CCSTATE->nsp ++ | ld.bu CARG3, CCSTATE->nfpr ++ | or TMP2, sp, r0 ++ | sub.d sp, sp, TMP1 ++ | st.d ra, -8(TMP2) ++ | slli.w CARG2, CARG2, 3 ++ | st.d r23, -16(TMP2) ++ | st.d CCSTATE, -24(TMP2) ++ | or r23, TMP2, r0 ++ | addi.d TMP1, CCSTATE, offsetof(CCallState, stack) ++ | or TMP2, sp, r0 ++ | add.d TMP3, TMP1, CARG2 ++ | beqz CARG2, >2 ++ |1: ++ | ld.d TMP0, 0(TMP1) ++ | addi.d TMP1, TMP1, 8 ++ | sltu TMP4, TMP1, TMP3 ++ | st.d TMP0, 0(TMP2) ++ | addi.d TMP2, TMP2, 8 ++ | bnez TMP4, <1 ++ |2: ++ | beqz CARG3, >3 ++ | fld.d FARG1, CCSTATE->fpr[0] ++ | fld.d FARG2, CCSTATE->fpr[1] ++ | fld.d FARG3, CCSTATE->fpr[2] ++ | fld.d FARG4, CCSTATE->fpr[3] ++ | fld.d FARG5, CCSTATE->fpr[4] ++ | fld.d FARG6, CCSTATE->fpr[5] ++ | fld.d FARG7, CCSTATE->fpr[6] ++ | fld.d FARG8, CCSTATE->fpr[7] ++ |3: ++ | ld.d TMP3, CCSTATE->func ++ | ld.d CARG2, CCSTATE->gpr[1] ++ | ld.d CARG3, CCSTATE->gpr[2] ++ | ld.d CARG4, CCSTATE->gpr[3] ++ | ld.d CARG5, CCSTATE->gpr[4] ++ | ld.d CARG6, CCSTATE->gpr[5] ++ | ld.d CARG7, CCSTATE->gpr[6] ++ | ld.d CARG8, CCSTATE->gpr[7] ++ | ld.d CARG1, CCSTATE->gpr[0] // Do this last, since CCSTATE is CARG1. ++ | jirl r1, TMP3, 0 ++ | ld.d CCSTATE:TMP1, -24(r23) ++ | ld.d TMP2, -16(r23) ++ | ld.d ra, -8(r23) ++ | st.d CRET1, CCSTATE:TMP1->gpr[0] ++ | st.d CRET2, CCSTATE:TMP1->gpr[1] ++ | fst.d FRET1, CCSTATE:TMP1->fpr[0] ++ | fst.d FRET2, CCSTATE:TMP1->fpr[1] ++ | or sp, r23, r0 ++ | or r23, TMP2, r0 ++ | jirl r0, ra, 0 ++ |.endif ++ |// Note: vm_ffi_call must be the last function in this object file! ++ | ++ |//----------------------------------------------------------------------- ++} ++ ++/* Generate the code for a single instruction. */ ++static void build_ins(BuildCtx *ctx, BCOp op, int defop) ++{ ++ int vk = 0; ++ |=>defop: ++ ++ switch (op) { ++ ++ /* -- Comparison ops ---------------------------------------------------- */ ++ ++ /* Remember: all ops branch for a true comparison, fall through otherwise. */ ++ ++ case BC_ISLT: case BC_ISGE: case BC_ISLE: case BC_ISGT: ++ | // RA = src1*8, RD = src2*8, JMP with RD = target ++ | add.d RA, BASE, RA ++ | add.d RD, BASE, RD ++ if (op == BC_ISLT || op == BC_ISGE) { ++ | ld.d CARG1, 0(RA) ++ | ld.d CARG2, 0(RD) ++ | gettp CARG3, CARG1 ++ | gettp CARG4, CARG2 ++ } else { ++ | ld.d CARG2, 0(RA) ++ | ld.d CARG1, 0(RD) ++ | gettp CARG3, CARG2 ++ | gettp CARG4, CARG1 ++ } ++ | ld.hu TMP2, OFS_RD(PC) // TMP2=jump ++ | addi.d PC, PC, 4 ++ | bne CARG3, TISNUM, >2 ++ | decode_BC4b TMP2 ++ | bne CARG4, TISNUM, >5 ++ | slli.w CARG1, CARG1, 0 ++ | slli.w CARG2, CARG2, 0 ++ | addu16i.d TMP3, r0, -0x2 // -BCBIAS_J*4 ++ | slt TMP1, CARG1, CARG2 ++ | add.w TMP2, TMP2, TMP3 // TMP2=(jump-0x8000)<<2 ++ if (op == BC_ISLT || op == BC_ISGT) { ++ | maskeqz TMP2, TMP2, TMP1 ++ } else { ++ | masknez TMP2, TMP2,TMP1 ++ } ++ |1: ++ | add.d PC, PC, TMP2 ++ | ins_next ++ | ++ |2: // RA is not an integer. ++ | sltui TMP1, CARG3, LJ_TISNUM ++ | addu16i.d TMP3, r0, -0x2 // -BCBIAS_J*4 ++ | beqz TMP1, ->vmeta_comp ++ | sltui TMP1, CARG4, LJ_TISNUM ++ | decode_BC4b TMP2 ++ | beqz TMP1, >4 ++ | movgr2fr.d FTMP0, CARG1 ++ | movgr2fr.d FTMP2, CARG2 ++ |3: // RA and RD are both numbers. ++ if (op == BC_ISLT || op == BC_ISGE) { ++ | fcmp.clt.d FCC0, FTMP0, FTMP2 ++ } else { ++ | fcmp.cult.d FCC0, FTMP0, FTMP2 ++ } ++ | add.w TMP2, TMP2, TMP3 ++ | movcf2gr TMP3, FCC0 ++ if (op == BC_ISLT || op == BC_ISGT) { ++ | maskeqz TMP2, TMP2, TMP3 ++ } else { ++ | masknez TMP2, TMP2, TMP3 ++ } ++ | b <1 ++ | ++ |4: // RA is a number, RD is not a number. ++ | // RA is a number, RD is an integer. Convert RD to a number. ++ | bne CARG4, TISNUM, ->vmeta_comp ++ if (op == BC_ISLT || op == BC_ISGE) { ++ | movgr2fr.w FTMP2, CARG2 ++ | movgr2fr.d FTMP0, CARG1 ++ | ffint.d.w FTMP2, FTMP2 ++ } else { ++ | movgr2fr.w FTMP0, CARG1 ++ | movgr2fr.d FTMP2, CARG2 ++ | ffint.d.w FTMP0, FTMP0 ++ } ++ | b <3 ++ | ++ |5: // RA is an integer, RD is not an integer ++ | sltui TMP1, CARG4, LJ_TISNUM ++ | addu16i.d TMP3, r0, -0x2 // -BCBIAS_J*4 ++ | beqz TMP1, ->vmeta_comp ++ | // RA is an integer, RD is a number. Convert RA to a number. ++ if (op == BC_ISLT || op == BC_ISGE) { ++ | movgr2fr.w FTMP0, CARG1 ++ | movgr2fr.d FTMP2, CARG2 ++ | ffint.d.w FTMP0, FTMP0 ++ } else { ++ | movgr2fr.w FTMP2, CARG2 ++ | movgr2fr.d FTMP0, CARG1 ++ | ffint.d.w FTMP2, FTMP2 ++ } ++ | b <3 ++ break; ++ ++ case BC_ISEQV: case BC_ISNEV: ++ vk = op == BC_ISEQV; ++ | // RA = src1*8, RD = src2*8, JMP with RD = target ++ | add.d RA, BASE, RA ++ | add.d RD, BASE, RD ++ | addi.d PC, PC, 4 ++ | ld.d CARG1, 0(RA) ++ | ld.d CARG2, 0(RD) ++ | ld.hu TMP2, -4+OFS_RD(PC) ++ | gettp CARG3, CARG1 ++ | gettp CARG4, CARG2 ++ | sltu TMP0, TISNUM, CARG3 ++ | sltu TMP1, TISNUM, CARG4 ++ | or TMP0, TMP0, TMP1 ++ | addu16i.d TMP3, r0, -0x2 // -BCBIAS_J*4 ++ if (vk) { ++ | beqz TMP0, ->BC_ISEQN_Z ++ } else { ++ | beqz TMP0, ->BC_ISNEN_Z ++ } ++ |// Either or both types are not numbers. ++ |.if FFI ++ | // Check if RA or RD is a cdata. ++ | addi.w TMP0, r0, LJ_TCDATA ++ | beq CARG3, TMP0, ->vmeta_equal_cd ++ | beq CARG4, TMP0, ->vmeta_equal_cd ++ |.endif ++ | addu16i.d TMP3, r0, -0x2 // -BCBIAS_J*4 ++ | decode_BC4b TMP2 ++ | add.w TMP2, TMP2, TMP3 // (jump-0x8000)<<2 ++ | bne CARG1, CARG2, >2 ++ | // Tag and value are equal. ++ if (vk) { ++ |->BC_ISEQV_Z: ++ | add.d PC, PC, TMP2 ++ } ++ |1: ++ | ins_next ++ | ++ |2: // Check if the tags are the same and it's a table or userdata. ++ | xor TMP3, CARG3, CARG4 // Same type? ++ | sltui TMP0, CARG3, LJ_TISTABUD+1 // Table or userdata? TMP0=1 ++ | masknez TMP0, TMP0, TMP3 // TMP0=0: not same type, or same type table/userdata ++ | cleartp TAB:TMP1, CARG1 ++ if (vk) { ++ | beqz TMP0, <1 ++ } else { ++ | beqz TMP0, ->BC_ISEQV_Z // Reuse code from opposite instruction. ++ } ++ | // Different tables or userdatas. Need to check __eq metamethod. ++ | // Field metatable must be at same offset for GCtab and GCudata! ++ | ld.d TAB:TMP3, TAB:TMP1->metatable ++ if (vk) { ++ | beqz TAB:TMP3, <1 // No metatable? ++ | ld.bu TMP3, TAB:TMP3->nomm ++ | andi TMP3, TMP3, 1<BC_ISEQV_Z // No metatable? ++ | ld.bu TMP3, TAB:TMP3->nomm ++ | andi TMP3, TMP3, 1<BC_ISEQV_Z // Or 'no __eq' flag set? ++ } ++ | b ->vmeta_equal // Handle __eq metamethod. ++ break; ++ ++ case BC_ISEQS: case BC_ISNES: ++ vk = op == BC_ISEQS; ++ | // RA = src*8, RD = str_const*8 (~), JMP with RD = target ++ | add.d RA, BASE, RA ++ | addi.d PC, PC, 4 ++ | ld.d CARG1, 0(RA) ++ | sub.d RD, KBASE, RD ++ | ld.hu TMP2, -4+OFS_RD(PC) ++ | ld.d CARG2, -8(RD) // KBASE-8-str_const*8 ++ |.if FFI ++ | gettp CARG3, CARG1 ++ | addi.w TMP1, r0, LJ_TCDATA ++ |.endif ++ | addi.w TMP0, r0, LJ_TSTR ++ | decode_BC4b TMP2 ++ | settp CARG2, TMP0 ++ | addu16i.d TMP3, r0, -0x2 // -BCBIAS_J*4 ++ |.if FFI ++ | beq CARG3, TMP1, ->vmeta_equal_cd ++ |.endif ++ | xor TMP0, CARG1, CARG2 // TMP2=0: A==D; TMP2!=0: A!=D ++ | add.w TMP2, TMP2, TMP3 ++ if (vk) { ++ | masknez TMP2, TMP2, TMP0 ++ } else { ++ | maskeqz TMP2, TMP2, TMP0 ++ } ++ | add.d PC, PC, TMP2 ++ | ins_next ++ break; ++ ++ case BC_ISEQN: case BC_ISNEN: ++ vk = op == BC_ISEQN; ++ | // RA = src*8, RD = num_const*8, JMP with RD = target ++ | add.d RA, BASE, RA ++ | add.d RD, KBASE, RD ++ | ld.d CARG1, 0(RA) ++ | ld.d CARG2, 0(RD) ++ | ld.hu TMP2, OFS_RD(PC) ++ | addi.d PC, PC, 4 ++ | gettp CARG3, CARG1 ++ | gettp CARG4, CARG2 ++ | addu16i.d TMP3, r0, -0x2 // -BCBIAS_J*4 ++ if (vk) { ++ |->BC_ISEQN_Z: ++ } else { ++ |->BC_ISNEN_Z: ++ } ++ | decode_BC4b TMP2 ++ | bne CARG3, TISNUM, >4 ++ | add.w TMP2, TMP2, TMP3 ++ | bne CARG4, TISNUM, >6 ++ | xor TMP0, CARG1, CARG2 // TMP0=0: A==D; TMP0!=0: A!=D ++ |1: ++ if (vk) { ++ | masknez TMP2, TMP2, TMP0 ++ | add.d PC, PC, TMP2 ++ |2: ++ } else { ++ | maskeqz TMP2, TMP2, TMP0 ++ |2: ++ | add.d PC, PC, TMP2 ++ } ++ |3: ++ | ins_next ++ | ++ |4: // RA is not an integer. ++ | sltu TMP0, CARG3, TISNUM ++ | add.w TMP2, TMP2, TMP3 ++ |.if FFI ++ | beqz TMP0, >7 ++ |.else ++ | beqz TMP0, <2 ++ |.endif ++ | movgr2fr.d FTMP0, CARG1 ++ | movgr2fr.d FTMP2, CARG2 ++ | bne CARG4, TISNUM, >5 ++ |// RA is a number, RD is an integer. ++ | ffint.d.w FTMP2, FTMP2 ++ | ++ |5: // RA and RD are both numbers. ++ | fcmp.cune.d FCC0, FTMP0, FTMP2 ++ | movcf2gr TMP0, FCC0 ++ | b <1 ++ | ++ |6: // RA is an integer, RD is a number. ++ | sltu TMP0, CARG4, TISNUM ++ |.if FFI ++ | beqz TMP0, >8 ++ |.else ++ | beqz TMP0, <2 ++ |.endif ++ | movgr2fr.w FTMP0, CARG1 ++ | movgr2fr.d FTMP2, CARG2 ++ | ffint.d.w FTMP0, FTMP0 ++ | b <5 ++ | ++ |.if FFI ++ |7: // RA not int, not number ++ | addi.w TMP0, r0, LJ_TCDATA ++ | bne CARG3, TMP0, <2 ++ | b ->vmeta_equal_cd ++ | ++ |8: // RD not int, not number ++ | addi.w TMP0, r0, LJ_TCDATA ++ | bne CARG4, TMP0, <2 ++ | b ->vmeta_equal_cd ++ |.endif ++ break; ++ ++ case BC_ISEQP: case BC_ISNEP: ++ vk = op == BC_ISEQP; ++ | // RA = src*8, RD = primitive_type*8 (~), JMP with RD = target ++ | add.d RA, BASE, RA ++ | srli.w TMP0, RD, 3 ++ | ld.d TMP1, 0(RA) ++ | nor TMP0, TMP0, r0 // ~TMP0: ~0 ~1 ~2 ++ | ld.hu TMP2, OFS_RD(PC) // TMP2: RD in next INS, branch target ++ | gettp TMP1, TMP1 ++ | addi.d PC, PC, 4 ++ | xor TMP0, TMP1, TMP0 // TMP0=0 A=D; TMP0!=0 A!=D ++ |.if FFI ++ | addi.w TMP3, r0, LJ_TCDATA ++ | beq TMP1, TMP3, ->vmeta_equal_cd ++ |.endif ++ | decode_BC4b TMP2 ++ | addu16i.d TMP3, r0, -0x2 // -BCBIAS_J*4 ++ | add.w TMP2, TMP2, TMP3 // TMP2=(jump-0x8000)<<2 ++ if (vk) { ++ | masknez TMP2, TMP2, TMP0 ++ } else { ++ | maskeqz TMP2, TMP2, TMP0 ++ } ++ | add.d PC, PC, TMP2 ++ | ins_next ++ break; ++ ++ /* -- Unary test and copy ops ------------------------------------------- */ ++ ++ case BC_ISTC: case BC_ISFC: case BC_IST: case BC_ISF: ++ | // RA = dst*8 or unused, RD = src*8, JMP with RD = target ++ | add.d RD, BASE, RD ++ | ld.hu TMP2, OFS_RD(PC) ++ | ld.d TMP0, 0(RD) ++ | addi.d PC, PC, 4 ++ | gettp TMP0, TMP0 ++ | add.d RA, BASE, RA ++ | sltui TMP0, TMP0, LJ_TISTRUECOND // TMP0=1 true; TMP0=0 false ++ | decode_BC4b TMP2 ++ | addu16i.d TMP3, r0, -0x2 // -BCBIAS_J*4 ++ | ld.d CRET1, 0(RD) ++ | add.w TMP2, TMP2, TMP3 // (jump-0x8000)<<2 ++ if (op == BC_IST || op == BC_ISTC) { ++ | beqz TMP0, >1 ++ if (op == BC_ISTC) { ++ | st.d CRET1, 0(RA) ++ } ++ } else { ++ | bnez TMP0, >1 ++ if (op == BC_ISFC) { ++ | st.d CRET1, 0(RA) ++ } ++ } ++ | add.d PC, PC, TMP2 ++ |1: ++ | ins_next ++ break; ++ ++ case BC_ISTYPE: ++ | // RA = src*8, RD = -type*8 ++ | add.d TMP0, BASE, RA ++ | srli.w TMP1, RD, 3 ++ | ld.d TMP0, 0(TMP0) ++ | gettp TMP0, TMP0 ++ | add.d TMP0, TMP0, TMP1 // if itype of RA == type, then TMP0=0 ++ | bnez TMP0, ->vmeta_istype ++ | ins_next ++ break; ++ case BC_ISNUM: ++ | // RA = src*8, RD = -(TISNUM-1)*8 ++ | add.d TMP0, BASE, RA ++ | ld.d TMP0, 0(TMP0) ++ | checknum TMP0, ->vmeta_istype ++ | ins_next ++ break; ++ ++ /* -- Unary ops --------------------------------------------------------- */ ++ ++ case BC_MOV: ++ | // RA = dst*8, RD = src*8 ++ | add.d RD, BASE, RD ++ | add.d RA, BASE, RA ++ | ld.d TMP0, 0(RD) ++ | ins_next1 ++ | st.d TMP0, 0(RA) ++ | ins_next2 ++ break; ++ case BC_NOT: ++ | // RA = dst*8, RD = src*8 ++ | add.d RD, BASE, RD ++ | add.d RA, BASE, RA ++ | ld.d TMP0, 0(RD) ++ | addi.d TMP1, r0, LJ_TTRUE ++ | ins_next1 ++ | gettp TMP0, TMP0 ++ | sltu TMP0, TMP1, TMP0 ++ | addi.w TMP0, TMP0, 1 ++ | slli.d TMP0, TMP0, 47 ++ | nor TMP0, TMP0, r0 ++ | st.d TMP0, 0(RA) ++ | ins_next2 ++ break; ++ case BC_UNM: ++ | // RA = dst*8, RD = src*8 ++ | add.d RB, BASE, RD ++ | add.d RA, BASE, RA ++ | ld.d TMP0, 0(RB) ++ | addu16i.d TMP1, r0, 0x8000 ++ | gettp CARG3, TMP0 ++ | bne CARG3, TISNUM, >1 ++ | sub.w TMP0, r0, TMP0 ++ | beq TMP0, TMP1, ->vmeta_unm // Meta handler deals with -2^31. ++ | bstrpick.d TMP0, TMP0, 31, 0 ++ | settp TMP0, TISNUM ++ | b >2 ++ |1: ++ | sltui TMP3, CARG3, LJ_TISNUM ++ | slli.d TMP1, TMP1, 32 ++ | beqz TMP3, ->vmeta_unm ++ | xor TMP0, TMP0, TMP1 // sign => ~sign ++ |2: ++ | st.d TMP0, 0(RA) ++ | ins_next ++ break; ++ case BC_LEN: ++ | // RA = dst*8, RD = src*8 ++ | add.d CARG2, BASE, RD ++ | ld.d TMP0, 0(CARG2) ++ | add.d RA, BASE, RA ++ | gettp TMP1, TMP0 ++ | addi.d TMP2, TMP1, -LJ_TSTR ++ | cleartp STR:CARG1, TMP0 ++ | bnez TMP2, >2 ++ | ld.w CARG1, STR:CARG1->len ++ |1: ++ | settp CARG1, TISNUM ++ | st.d CARG1, 0(RA) ++ | ins_next ++ |2: ++ | addi.d TMP2, TMP1, -LJ_TTAB ++ | bnez TMP2, ->vmeta_len ++#if LJ_52 ++ | ld.d TAB:TMP2, TAB:CARG1->metatable ++ | bnez TAB:TMP2, >9 ++ |3: ++#endif ++ |->BC_LEN_Z: ++ | bl extern lj_tab_len // (GCtab *t) ++ | // Returns uint32_t (but less than 2^31). ++ | b <1 ++#if LJ_52 ++ |9: ++ | ld.bu TMP0, TAB:TMP2->nomm ++ | andi TMP0, TMP0, 1<vmeta_len ++#endif ++ break; ++ ++ /* -- Binary ops -------------------------------------------------------- */ ++ ++ |.macro fpmod, a, b, c ++ | fdiv.d FARG1, b, c ++ | bl ->vm_floor // floor(b/c) ++ | fmul.d a, FRET1, c ++ | fsub.d a, b, a // b - floor(b/c)*c ++ |.endmacro ++ | ++ |.macro ins_arithpre ++ ||vk = ((int)op - BC_ADDVN) / (BC_ADDNV-BC_ADDVN); ++ | // RA = dst*8, RB = src1*8, RC = src2*8 | num_const*8 ++ ||if (vk == 1) { ++ | // RA = dst*8, RB = num_const*8, RC = src1*8 ++ | decode_RB RC, INS ++ | decode_RDtoRC8 RB, RD ++ ||} else { ++ | // RA = dst*8, RB = src1*8, RC = num_const*8 ++ | decode_RB RB, INS ++ | decode_RDtoRC8 RC, RD ++ ||} ++ ||switch (vk) { ++ ||case 0: // suffix is VN ++ | add.d RB, BASE, RB ++ | add.d RC, KBASE, RC ++ || break; ++ ||case 1: // suffix is NV ++ | add.d RC, BASE, RC ++ | add.d RB, KBASE, RB ++ || break; ++ ||default: // CAT or suffix is VV ++ | add.d RB, BASE, RB ++ | add.d RC, BASE, RC ++ || break; ++ ||} ++ |.endmacro ++ | ++ |.macro ins_arithfp, fpins, itype1, itype2 ++ | fld.d FTMP0, 0(RB) ++ | sltu itype1, itype1, TISNUM ++ | sltu itype2, itype2, TISNUM ++ | fld.d FTMP2, 0(RC) ++ | and itype1, itype1, itype2 ++ | add.d RA, BASE, RA ++ | beqz itype1, ->vmeta_arith ++ | fpins FRET1, FTMP0, FTMP2 ++ | ins_next1 ++ | fst.d FRET1, 0(RA) ++ | ins_next2 ++ |.endmacro ++ | ++ |.macro ins_arithead, itype1, itype2, tval1, tval2 ++ | ld.d tval1, 0(RB) ++ | ld.d tval2, 0(RC) ++ | // Check for two integers. ++ | gettp itype1, tval1 ++ | gettp itype2, tval2 ++ |.endmacro ++ | ++ |.macro ins_arithdn, intins, fpins ++ | ins_arithpre ++ | ins_arithead TMP0, TMP1, CARG1, CARG2 ++ | bne TMP0, TISNUM, >1 ++ | bne TMP1, TISNUM, >1 ++ | slli.w CARG3, CARG1, 0 ++ | slli.w CARG4, CARG2, 0 ++ |.if "intins" == "add.w" ++ | intins CRET1, CARG3, CARG4 ++ | xor TMP1, CRET1, CARG3 // ((y^a) & (y^b)) < 0: overflow. ++ | xor TMP2, CRET1, CARG4 ++ | and TMP1, TMP1, TMP2 ++ | add.d RA, BASE, RA ++ | blt TMP1, r0, ->vmeta_arith ++ |.elif "intins" == "sub.w" ++ | intins CRET1, CARG3, CARG4 ++ | xor TMP1, CRET1, CARG3 // ((y^a) & (a^b)) < 0: overflow. ++ | xor TMP2, CARG3, CARG4 ++ | and TMP1, TMP1, TMP2 ++ | add.d RA, BASE, RA ++ | blt TMP1, r0, ->vmeta_arith ++ |.elif "intins" == "mulw.d.w" ++ | mul.w CRET1, CARG3, CARG4 ++ | mulh.w TMP2, CARG3, CARG4 ++ | srai.w TMP1, CRET1, 31 // 63-32bit not all 0 or 1: overflow. ++ | add.d RA, BASE, RA ++ | bne TMP1, TMP2, ->vmeta_arith ++ |.endif ++ | bstrpick.d CRET1, CRET1, 31, 0 ++ | settp CRET1, TISNUM ++ | st.d CRET1, 0(RA) ++ | ins_next ++ |1: // Check for two numbers. ++ | ins_arithfp, fpins, TMP0, TMP1 ++ |.endmacro ++ | ++ |.macro ins_arithdiv, fpins ++ | ins_arithpre ++ | ins_arithead TMP0, TMP1, CARG1, CARG2 ++ | ins_arithfp, fpins, TMP0, TMP1 ++ |.endmacro ++ | ++ |.macro ins_arithmod, fpins ++ | ins_arithpre ++ | ins_arithead TMP0, TMP1, CARG1, CARG2 ++ | bne TMP0, TISNUM, >1 ++ | bne TMP1, TISNUM, >1 ++ | slli.w CARG1, CARG1, 0 ++ | slli.w CARG2, CARG2, 0 ++ | add.d RA, BASE, RA ++ | beqz CARG2, ->vmeta_arith ++ | bl extern lj_vm_modi ++ | bstrpick.d CRET1, CRET1, 31, 0 ++ | settp CRET1, TISNUM ++ | st.d CRET1, 0(RA) ++ | ins_next ++ |1: // Check for two numbers. ++ | ins_arithfp, fpins, TMP0, TMP1 ++ |.endmacro ++ ++ case BC_ADDVN: case BC_ADDNV: case BC_ADDVV: ++ | ins_arithdn add.w, fadd.d ++ break; ++ case BC_SUBVN: case BC_SUBNV: case BC_SUBVV: ++ | ins_arithdn sub.w, fsub.d ++ break; ++ case BC_MULVN: case BC_MULNV: case BC_MULVV: ++ | ins_arithdn mulw.d.w, fmul.d ++ break; ++ case BC_DIVVN: case BC_DIVNV: case BC_DIVVV: ++ | ins_arithdiv fdiv.d ++ break; ++ case BC_MODVN: case BC_MODNV: case BC_MODVV: ++ | ins_arithmod fpmod ++ break; ++ case BC_POW: ++ | ins_arithpre ++ | ld.d CARG1, 0(RB) ++ | ld.d CARG2, 0(RC) ++ | gettp TMP0, CARG1 ++ | gettp TMP1, CARG2 ++ | sltui TMP0, TMP0, LJ_TISNUM ++ | sltui TMP1, TMP1, LJ_TISNUM ++ | and TMP0, TMP0, TMP1 ++ | add.d RA, BASE, RA ++ | beqz TMP0, ->vmeta_arith ++ | fld.d FARG1, 0(RB) ++ | fld.d FARG2, 0(RC) ++ | bl extern pow ++ | ins_next1 ++ | fst.d FRET1, 0(RA) ++ | ins_next2 ++ break; ++ ++ case BC_CAT: ++ | // RA = dst*8, RB = src_start*8, RC = src_end*8 ++ | decode_RB RB, INS ++ | decode_RDtoRC8 RC, RD ++ | sub.d CARG3, RC, RB ++ | st.d BASE, L->base ++ | add.d CARG2, BASE, RC ++ | or MULTRES, RB, r0 ++ |->BC_CAT_Z: ++ | srli.w CARG3, CARG3, 3 ++ | st.d PC, SAVE_PC(sp) ++ | or CARG1, L, r0 ++ | bl extern lj_meta_cat // (lua_State *L, TValue *top, int left) ++ | // Returns NULL (finished) or TValue * (metamethod). ++ | ld.d BASE, L->base ++ | bnez CRET1, ->vmeta_binop ++ | add.d RB, BASE, MULTRES ++ | ld.d TMP0, 0(RB) ++ | add.d RA, BASE, RA ++ | st.d TMP0, 0(RA) ++ | ins_next ++ break; ++ ++ /* -- Constant ops ------------------------------------------------------ */ ++ ++ case BC_KSTR: ++ | // RA = dst*8, RD = str_const*8 (~) ++ | sub.d TMP1, KBASE, RD ++ | addi.w TMP2, r0, LJ_TSTR ++ | ld.d TMP0, -8(TMP1) // KBASE-8-str_const*8 ++ | add.d RA, BASE, RA ++ | settp TMP0, TMP2 ++ | st.d TMP0, 0(RA) ++ | ins_next ++ break; ++ case BC_KCDATA: ++ |.if FFI ++ | // RA = dst*8, RD = cdata_const*8 (~) ++ | sub.d TMP1, KBASE, RD ++ | ld.d TMP0, -8(TMP1) // KBASE-8-cdata_const*8 ++ | addi.w TMP2, r0, LJ_TCDATA ++ | add.d RA, BASE, RA ++ | settp TMP0, TMP2 ++ | st.d TMP0, 0(RA) ++ | ins_next ++ |.endif ++ break; ++ case BC_KSHORT: ++ | // RA = dst*8, RD = int16_literal*8 ++ | srai.w RD, INS, 16 ++ | add.d RA, BASE, RA ++ | bstrpick.d RD, RD, 31, 0 ++ | settp RD, TISNUM ++ | st.d RD, 0(RA) ++ | ins_next ++ break; ++ case BC_KNUM: ++ | // RA = dst*8, RD = num_const*8 ++ | add.d RD, KBASE, RD ++ | add.d RA, BASE, RA ++ | ld.d TMP0, 0(RD) ++ | st.d TMP0, 0(RA) ++ | ins_next ++ break; ++ case BC_KPRI: ++ | // RA = dst*8, RD = primitive_type*8 (~) ++ | add.d RA, BASE, RA ++ | slli.d TMP0, RD, 44 // 44+3 ++ | nor TMP0, TMP0, r0 ++ | st.d TMP0, 0(RA) ++ | ins_next ++ break; ++ case BC_KNIL: ++ | // RA = base*8, RD = end*8 ++ | add.d RA, BASE, RA ++ | st.d TISNIL, 0(RA) ++ | addi.d RA, RA, 8 ++ | add.d RD, BASE, RD ++ |1: ++ | st.d TISNIL, 0(RA) ++ | slt TMP0, RA, RD ++ | addi.d RA, RA, 8 ++ | bnez TMP0, <1 ++ | ins_next ++ break; ++ ++ /* -- Upvalue and function ops ------------------------------------------ */ ++ ++ case BC_UGET: ++ | // RA = dst*8, RD = uvnum*8 ++ | ld.d LFUNC:TMP0, FRAME_FUNC(BASE) ++ | add.d RA, BASE, RA ++ | cleartp LFUNC:TMP0 ++ | add.d RD, RD, LFUNC:TMP0 ++ | ld.d UPVAL:TMP0, LFUNC:RD->uvptr ++ | ld.d TMP1, UPVAL:TMP0->v ++ | ld.d TMP2, 0(TMP1) ++ | ins_next1 ++ | st.d TMP2, 0(RA) ++ | ins_next2 ++ break; ++ case BC_USETV: ++ | // RA = uvnum*8, RD = src*8 ++ | ld.d LFUNC:TMP0, FRAME_FUNC(BASE) ++ | add.d RD, BASE, RD ++ | cleartp LFUNC:TMP0 ++ | add.d RA, RA, LFUNC:TMP0 ++ | ld.d UPVAL:TMP0, LFUNC:RA->uvptr ++ | ld.d CRET1, 0(RD) ++ | ld.bu TMP3, UPVAL:TMP0->marked ++ | ld.d CARG2, UPVAL:TMP0->v ++ | andi TMP3, TMP3, LJ_GC_BLACK // isblack(uv) ++ | ld.bu TMP0, UPVAL:TMP0->closed ++ | gettp TMP2, CRET1 ++ | st.d CRET1, 0(CARG2) ++ | or TMP3, TMP3, TMP0 ++ | addi.d TMP0, r0, LJ_GC_BLACK|1 ++ | addi.d TMP2, TMP2, -(LJ_TNUMX+1) ++ | beq TMP3, TMP0, >2 // Upvalue is closed and black? ++ |1: ++ | ins_next ++ | ++ |2: // Check if new value is collectable. ++ | sltui TMP0, TMP2, LJ_TISGCV - (LJ_TNUMX+1) ++ | cleartp GCOBJ:CRET1, CRET1 ++ | beqz TMP0, <1 // tvisgcv(v) ++ | ld.bu TMP3, GCOBJ:CRET1->gch.marked ++ | andi TMP3, TMP3, LJ_GC_WHITES // iswhite(v) ++ | beqz TMP3, <1 ++ | // Crossed a write barrier. Move the barrier forward. ++ | .ADD16I CARG1, DISPATCH, GG_DISP2G ++ | bl extern lj_gc_barrieruv // (global_State *g, TValue *tv) ++ | b <1 ++ break; ++ case BC_USETS: ++ | // RA = uvnum*8, RD = str_const*8 (~) ++ | ld.d LFUNC:TMP0, FRAME_FUNC(BASE) ++ | sub.d TMP1, KBASE, RD ++ | cleartp LFUNC:TMP0 ++ | add.d RA, RA, LFUNC:TMP0 ++ | ld.d UPVAL:TMP0, LFUNC:RA->uvptr ++ | ld.d STR:TMP1, -8(TMP1) // KBASE-8-str_const*8 ++ | ld.bu TMP2, UPVAL:TMP0->marked ++ | ld.d CARG2, UPVAL:TMP0->v ++ | ld.bu TMP3, STR:TMP1->marked ++ | andi TMP4, TMP2, LJ_GC_BLACK // isblack(uv) ++ | ld.bu TMP2, UPVAL:TMP0->closed ++ | addi.d TMP0, r0, LJ_TSTR ++ | settp TMP1, TMP0 ++ | st.d TMP1, 0(CARG2) ++ | bnez TMP4, >2 ++ |1: ++ | ins_next ++ | ++ |2: // Check if string is white and ensure upvalue is closed. ++ | beqz TMP2, <1 ++ | andi TMP0, TMP3, LJ_GC_WHITES // iswhite(str) ++ | beqz TMP0, <1 ++ | // Crossed a write barrier. Move the barrier forward. ++ | .ADD16I CARG1, DISPATCH, GG_DISP2G ++ | bl extern lj_gc_barrieruv // (global_State *g, TValue *tv) ++ | b <1 ++ break; ++ case BC_USETN: ++ | // RA = uvnum*8, RD = num_const*8 ++ | ld.d LFUNC:TMP0, FRAME_FUNC(BASE) ++ | add.d RD, KBASE, RD ++ | cleartp LFUNC:TMP0 ++ | add.d TMP0, RA, LFUNC:TMP0 ++ | ld.d UPVAL:TMP0, LFUNC:TMP0->uvptr ++ | ld.d TMP1, 0(RD) ++ | ld.d TMP0, UPVAL:TMP0->v ++ | st.d TMP1, 0(TMP0) ++ | ins_next ++ break; ++ case BC_USETP: ++ | // RA = uvnum*8, RD = primitive_type*8 (~) ++ | ld.d LFUNC:TMP0, FRAME_FUNC(BASE) ++ | slli.d TMP2, RD, 44 ++ | cleartp LFUNC:TMP0 ++ | add.d TMP0, RA, LFUNC:TMP0 ++ | nor TMP2, TMP2, r0 ++ | ld.d UPVAL:TMP0, LFUNC:TMP0->uvptr ++ | ld.d TMP1, UPVAL:TMP0->v ++ | st.d TMP2, 0(TMP1) ++ | ins_next ++ break; ++ ++ case BC_UCLO: ++ | // RA = level*8, RD = target ++ | ld.d TMP2, L->openupval ++ | branch_RD // Do this first since RD is not saved. ++ | st.d BASE, L->base ++ | or CARG1, L, r0 ++ | beqz TMP2, >1 ++ | add.d CARG2, BASE, RA ++ | bl extern lj_func_closeuv // (lua_State *L, TValue *level) ++ | ld.d BASE, L->base ++ |1: ++ | ins_next ++ break; ++ ++ case BC_FNEW: ++ | // RA = dst*8, RD = proto_const*8 (~) (holding function prototype) ++ | sub.d TMP1, KBASE, RD ++ | ld.d CARG3, FRAME_FUNC(BASE) ++ | ld.d CARG2, -8(TMP1) // KBASE-8-tab_const*8 ++ | st.d BASE, L->base ++ | st.d PC, SAVE_PC(sp) ++ | cleartp CARG3 ++ | or CARG1, L, r0 ++ | // (lua_State *L, GCproto *pt, GCfuncL *parent) ++ | bl extern lj_func_newL_gc ++ | // Returns GCfuncL *. ++ | addi.d TMP0, r0, LJ_TFUNC ++ | ld.d BASE, L->base ++ | settp CRET1, TMP0 ++ | add.d RA, BASE, RA ++ | st.d CRET1, 0(RA) ++ | ins_next ++ break; ++ ++ /* -- Table ops --------------------------------------------------------- */ ++ ++ case BC_TNEW: ++ case BC_TDUP: ++ | // RA = dst*8, RD = (hbits|asize)*8 | tab_const*8 (~) ++ | .LDXD TMP0, DISPATCH, DISPATCH_GL(gc.total) ++ | .LDXD TMP1, DISPATCH, DISPATCH_GL(gc.threshold) ++ | st.d BASE, L->base ++ | sltu TMP2, TMP0, TMP1 ++ | st.d PC, SAVE_PC(sp) ++ | beqz TMP2, >5 ++ |1: ++ if (op == BC_TNEW) { ++ | srli.w CARG2, RD, 3 ++ | andi CARG2, CARG2, 0x7ff ++ | ori TMP0, r0, 0x801 ++ | addi.w TMP2, CARG2, -0x7ff ++ | srli.w CARG3, RD, 14 ++ | masknez TMP0, TMP0, TMP2 ++ | maskeqz CARG2, CARG2, TMP2 ++ | or CARG2, CARG2, TMP0 ++ | // (lua_State *L, int32_t asize, uint32_t hbits) ++ | or CARG1, L, r0 ++ | bl extern lj_tab_new ++ | // Returns Table *. ++ } else { ++ | sub.d TMP1, KBASE, RD ++ | or CARG1, L, r0 ++ | ld.d CARG2, -8(TMP1) // KBASE-8-str_const*8 ++ | bl extern lj_tab_dup // (lua_State *L, Table *kt) ++ | // Returns Table *. ++ } ++ | addi.d TMP0, r0, LJ_TTAB ++ | ld.d BASE, L->base ++ | ins_next1 ++ | settp CRET1, TMP0 ++ | add.d RA, BASE, RA ++ | st.d CRET1, 0(RA) ++ | ins_next2 ++ |5: ++ | or MULTRES, RD, r0 ++ | or CARG1, L, r0 ++ | bl extern lj_gc_step_fixtop // (lua_State *L) ++ | or RD, MULTRES, r0 ++ | b <1 ++ break; ++ ++ case BC_GGET: ++ | // RA = dst*8, RD = str_const*8 (~) ++ case BC_GSET: ++ | // RA = src*8, RD = str_const*8 (~) ++ | ld.d LFUNC:TMP0, FRAME_FUNC(BASE) ++ | sub.d TMP1, KBASE, RD ++ | ld.d STR:RC, -8(TMP1) // KBASE-8-str_const*8 ++ | cleartp LFUNC:TMP0 ++ | ld.d TAB:RB, LFUNC:TMP0->env ++ | add.d RA, BASE, RA ++ if (op == BC_GGET) { ++ | b ->BC_TGETS_Z ++ } else { ++ | b ->BC_TSETS_Z ++ } ++ break; ++ ++ case BC_TGETV: ++ | // RA = dst*8, RB = table*8, RC = key*8 ++ | decode_RB RB, INS ++ | decode_RDtoRC8 RC, RD ++ | add.d CARG2, BASE, RB ++ | add.d CARG3, BASE, RC ++ | ld.d TAB:RB, 0(CARG2) ++ | ld.d TMP2, 0(CARG3) ++ | add.d RA, BASE, RA ++ | checktab TAB:RB, ->vmeta_tgetv ++ | gettp TMP3, TMP2 ++ | ld.w TMP0, TAB:RB->asize ++ | bne TMP3, TISNUM, >5 // Integer key? ++ | slli.w TMP2, TMP2, 0 ++ | ld.d TMP1, TAB:RB->array ++ | sltu TMP3, TMP2, TMP0 //array part (keys = [0, asize-1]) ++ | slli.w TMP2, TMP2, 3 ++ | beqz TMP3, ->vmeta_tgetv // Integer key and in array part? ++ | add.d TMP2, TMP1, TMP2 ++ | ld.d CRET1, 0(TMP2) ++ | beq CRET1, TISNIL, >2 ++ |1: ++ | st.d CRET1, 0(RA) ++ | ins_next ++ | ++ |2: // Check for __index if table value is nil. ++ | ld.d TAB:TMP2, TAB:RB->metatable ++ | beqz TAB:TMP2, <1 // No metatable: done. ++ | ld.bu TMP0, TAB:TMP2->nomm ++ | andi TMP0, TMP0, 1<vmeta_tgetv ++ | ++ |5: ++ | addi.d TMP0, r0, LJ_TSTR ++ | cleartp RC, TMP2 ++ | bne TMP3, TMP0, ->vmeta_tgetv // String key? ++ | b ->BC_TGETS_Z ++ break; ++ case BC_TGETS: ++ | // RA = dst*8, RB = table*8, RC = str_const*8 (~) ++ | decode_RB RB, INS ++ | decode_RDtoRC8 RC, RD ++ | add.d CARG2, BASE, RB ++ | sub.d CARG3, KBASE, RC ++ | ld.d TAB:RB, 0(CARG2) ++ | add.d RA, BASE, RA ++ | ld.d STR:RC, -8(CARG3) // KBASE-8-str_const*8 ++ | checktab TAB:RB, ->vmeta_tgets1 ++ |->BC_TGETS_Z: ++ | // TAB:RB = GCtab *, STR:RC = GCstr *, RA = dst*8 ++ | ld.w TMP0, TAB:RB->hmask ++ | ld.w TMP1, STR:RC->sid ++ | ld.d NODE:TMP2, TAB:RB->node ++ | and TMP1, TMP1, TMP0 // idx = str->sid & tab->hmask ++ | slli.w TMP0, TMP1, 5 ++ | slli.w TMP1, TMP1, 3 ++ | sub.w TMP1, TMP0, TMP1 ++ | addi.d TMP3, r0, LJ_TSTR ++ | add.d NODE:TMP2, NODE:TMP2, TMP1 // node = tab->node + (idx*32-idx*8) ++ | settp STR:RC, TMP3 // Tagged key to look for. ++ |1: ++ | ld.d CARG1, NODE:TMP2->key ++ | ld.d CARG2, NODE:TMP2->val ++ | ld.d NODE:TMP1, NODE:TMP2->next ++ | ld.d TAB:TMP3, TAB:RB->metatable ++ | bne CARG1, RC, >4 ++ | beq CARG2, TISNIL, >5 // Key found, but nil value? ++ |3: ++ | st.d CARG2, 0(RA) ++ | ins_next ++ | ++ |4: // Follow hash chain. ++ | or NODE:TMP2, NODE:TMP1, r0 ++ | bnez NODE:TMP1, <1 ++ | // End of hash chain: key not found, nil result. ++ | ++ |5: // Check for __index if table value is nil. ++ | or CARG2, TISNIL, r0 ++ | beqz TAB:TMP3, <3 // No metatable: done. ++ | ld.bu TMP0, TAB:TMP3->nomm ++ | andi TMP0, TMP0, 1<vmeta_tgets ++ break; ++ case BC_TGETB: ++ | // RA = dst*8, RB = table*8, RC = index*8 ++ | decode_RB RB, INS ++ | add.d CARG2, BASE, RB ++ | decode_RDtoRC8 RC, RD ++ | ld.d TAB:RB, 0(CARG2) ++ | add.d RA, BASE, RA ++ | srli.w TMP0, RC, 3 ++ | checktab TAB:RB, ->vmeta_tgetb ++ | ld.w TMP1, TAB:RB->asize ++ | ld.d TMP2, TAB:RB->array ++ | sltu TMP1, TMP0, TMP1 ++ | add.d RC, TMP2, RC ++ | beqz TMP1, ->vmeta_tgetb ++ | ld.d CRET1, 0(RC) ++ | beq CRET1, TISNIL, >5 ++ |1: ++ | st.d CRET1, 0(RA) ++ | ins_next ++ | ++ |5: // Check for __index if table value is nil. ++ | ld.d TAB:TMP2, TAB:RB->metatable ++ | beqz TAB:TMP2, <1 // No metatable: done. ++ | ld.bu TMP1, TAB:TMP2->nomm ++ | andi TMP1, TMP1, 1<vmeta_tgetb // Caveat: preserve TMP0 and CARG2! ++ break; ++ case BC_TGETR: ++ | // RA = dst*8, RB = table*8, RC = key*8 ++ | decode_RB RB, INS ++ | decode_RDtoRC8 RC, RD ++ | add.d RB, BASE, RB ++ | add.d RC, BASE, RC ++ | ld.d TAB:CARG1, 0(RB) ++ | ld.w CARG2, 0(RC) ++ | add.d RA, BASE, RA ++ | cleartp TAB:CARG1 ++ | ld.w TMP0, TAB:CARG1->asize ++ | ld.d TMP1, TAB:CARG1->array ++ | sltu TMP0, CARG2, TMP0 ++ | slli.w TMP2, CARG2, 3 ++ | add.d TMP3, TMP1, TMP2 ++ | beqz TMP0, ->vmeta_tgetr // In array part? ++ | ld.d TMP1, 0(TMP3) ++ |->BC_TGETR_Z: ++ | ins_next1 ++ | st.d TMP1, 0(RA) ++ | ins_next2 ++ break; ++ ++ case BC_TSETV: ++ | // RA = src*8, RB = table*8, RC = key*8 ++ | decode_RB RB, INS ++ | decode_RDtoRC8 RC, RD ++ | add.d CARG2, BASE, RB ++ | add.d CARG3, BASE, RC ++ | ld.d TAB:RB, 0(CARG2) ++ | ld.d TMP2, 0(CARG3) ++ | add.d RA, BASE, RA ++ | checktab TAB:RB, ->vmeta_tsetv ++ | slli.w RC, TMP2, 0 ++ | checkint TMP2, >5 ++ | ld.w TMP0, TAB:RB->asize ++ | ld.d TMP1, TAB:RB->array ++ | sltu TMP0, RC, TMP0 ++ | slli.w TMP2, RC, 3 ++ | beqz TMP0, ->vmeta_tsetv // Integer key and in array part? ++ | add.d TMP1, TMP1, TMP2 ++ | ld.bu TMP3, TAB:RB->marked ++ | ld.d TMP0, 0(TMP1) ++ | ld.d CRET1, 0(RA) ++ | beq TMP0, TISNIL, >3 ++ |1: ++ | andi TMP2, TMP3, LJ_GC_BLACK // isblack(table) ++ | st.d CRET1, 0(TMP1) ++ | bnez TMP2, >7 ++ |2: ++ | ins_next ++ | ++ |3: // Check for __newindex if previous value is nil. ++ | ld.d TAB:TMP2, TAB:RB->metatable ++ | beqz TAB:TMP2, <1 // No metatable: done. ++ | ld.bu TMP2, TAB:TMP2->nomm ++ | andi TMP2, TMP2, 1<vmeta_tsetv ++ |5: ++ | gettp TMP0, TMP2 ++ | addi.d TMP0, TMP0, -LJ_TSTR ++ | bnez TMP0, ->vmeta_tsetv ++ | cleartp STR:RC, TMP2 ++ | b ->BC_TSETS_Z // String key? ++ | ++ |7: // Possible table write barrier for the value. Skip valiswhite check. ++ | barrierback TAB:RB, TMP3, TMP0, <2 ++ break; ++ case BC_TSETS: ++ | // RA = src*8, RB = table*8, RC = str_const*8 (~) ++ | decode_RB RB, INS ++ | decode_RDtoRC8 RC, RD ++ | add.d CARG2, BASE, RB ++ | sub.d CARG3, KBASE, RC ++ | ld.d TAB:RB, 0(CARG2) ++ | ld.d RC, -8(CARG3) // KBASE-8-str_const*8 ++ | add.d RA, BASE, RA ++ | cleartp STR:RC ++ | checktab TAB:RB, ->vmeta_tsets1 ++ |->BC_TSETS_Z: ++ | // TAB:RB = GCtab *, STR:RC = GCstr *, RA = BASE+src*8 ++ | ld.w TMP0, TAB:RB->hmask ++ | ld.w TMP1, STR:RC->sid ++ | ld.d NODE:TMP2, TAB:RB->node ++ | st.b r0, TAB:RB->nomm // Clear metamethod cache. ++ | and TMP1, TMP1, TMP0 // idx = str->sid & tab->hmask ++ | slli.w TMP0, TMP1, 5 ++ | slli.w TMP1, TMP1, 3 ++ | sub.w TMP1, TMP0, TMP1 ++ | addi.d TMP3, r0, LJ_TSTR ++ | add.d NODE:TMP2, NODE:TMP2, TMP1 // node = tab->node + (idx*32-idx*8) ++ | settp STR:RC, TMP3 // Tagged key to look for. ++ | fld.d FTMP0, 0(RA) ++ |1: ++ | ld.d TMP0, NODE:TMP2->key ++ | ld.d CARG2, NODE:TMP2->val ++ | ld.d NODE:TMP1, NODE:TMP2->next ++ | ld.bu TMP3, TAB:RB->marked ++ | bne TMP0, RC, >5 ++ | ld.d TAB:TMP0, TAB:RB->metatable ++ | beq CARG2, TISNIL, >4 // Key found, but nil value? ++ |2: ++ | andi TMP3, TMP3, LJ_GC_BLACK // isblack(table) ++ | fst.d FTMP0, NODE:TMP2->val ++ | bnez TMP3, >7 ++ |3: ++ | ins_next ++ | ++ |4: // Check for __newindex if previous value is nil. ++ | beqz TAB:TMP0, <2 // No metatable: done. ++ | ld.bu TMP0, TAB:TMP0->nomm ++ | andi TMP0, TMP0, 1<vmeta_tsets ++ | ++ |5: // Follow hash chain. ++ | or NODE:TMP2, NODE:TMP1, r0 ++ | bnez NODE:TMP1, <1 ++ | // End of hash chain: key not found, add a new one ++ | ++ | // But check for __newindex first. ++ | ld.d TAB:TMP2, TAB:RB->metatable ++ | .ADD16I CARG3, DISPATCH, DISPATCH_GL(tmptv) ++ | beqz TAB:TMP2, >6 // No metatable: continue. ++ | ld.bu TMP0, TAB:TMP2->nomm ++ | andi TMP0, TMP0, 1<vmeta_tsets // 'no __newindex' flag NOT set: check. ++ |6: ++ | st.d RC, 0(CARG3) ++ | st.d BASE, L->base ++ | or CARG2, TAB:RB, r0 ++ | st.d PC, SAVE_PC(sp) ++ | or CARG1, L, r0 ++ | bl extern lj_tab_newkey // (lua_State *L, GCtab *t, TValue *k ++ | // Returns TValue *. ++ | ld.d BASE, L->base ++ | fst.d FTMP0, 0(CRET1) ++ | b <3 // No 2nd write barrier needed. ++ | ++ |7: // Possible table write barrier for the value. Skip valiswhite check. ++ | barrierback TAB:RB, TMP3, TMP0, <3 ++ break; ++ case BC_TSETB: ++ | // RA = src*8, RB = table*8, RC = index*8 ++ | decode_RB RB, INS ++ | decode_RDtoRC8 RC, RD ++ | add.d CARG2, BASE, RB ++ | add.d RA, BASE, RA ++ | ld.d TAB:RB, 0(CARG2) ++ | srli.w TMP0, RC, 3 ++ | checktab RB, ->vmeta_tsetb ++ | ld.w TMP1, TAB:RB->asize ++ | ld.d TMP2, TAB:RB->array ++ | sltu TMP1, TMP0, TMP1 ++ | add.d RC, TMP2, RC ++ | beqz TMP1, ->vmeta_tsetb ++ | ld.d TMP1, 0(RC) ++ | ld.bu TMP3, TAB:RB->marked ++ | beq TMP1, TISNIL, >5 ++ |1: ++ | ld.d CRET1, 0(RA) ++ | andi TMP1, TMP3, LJ_GC_BLACK // isblack(table) ++ | st.d CRET1, 0(RC) ++ | bnez TMP1, >7 ++ |2: ++ | ins_next ++ | ++ |5: // Check for __newindex if previous value is nil. ++ | ld.d TAB:TMP2, TAB:RB->metatable ++ | beqz TAB:TMP2, <1 // No metatable: done. ++ | ld.bu TMP1, TAB:TMP2->nomm ++ | andi TMP1, TMP1, 1<vmeta_tsetb // Caveat: preserve TMP0 and CARG2! ++ | ++ |7: // Possible table write barrier for the value. Skip valiswhite check. ++ | barrierback TAB:RB, TMP3, TMP0, <2 ++ break; ++ case BC_TSETR: ++ | // RA = dst*8, RB = table*8, RC = key*8 ++ | decode_RB RB, INS ++ | decode_RDtoRC8 RC, RD ++ | add.d CARG1, BASE, RB ++ | add.d CARG3, BASE, RC ++ | ld.d TAB:CARG2, 0(CARG1) ++ | ld.w CARG3, 0(CARG3) ++ | cleartp TAB:CARG2 ++ | ld.bu TMP3, TAB:CARG2->marked ++ | ld.w TMP0, TAB:CARG2->asize ++ | ld.d TMP1, TAB:CARG2->array ++ | andi TMP2, TMP3, LJ_GC_BLACK // isblack(table) ++ | add.d RA, BASE, RA ++ | bnez TMP2, >7 ++ |2: ++ | sltu TMP0, CARG3, TMP0 ++ | slli.w TMP2, CARG3, 3 ++ | add.d CRET1, TMP1, TMP2 ++ | beqz TMP0, ->vmeta_tsetr // In array part? ++ |->BC_TSETR_Z: ++ | ld.d TMP1, 0(RA) ++ | ins_next1 ++ | st.d TMP1, 0(CRET1) ++ | ins_next2 ++ | ++ |7: // Possible table write barrier for the value. Skip valiswhite check. ++ | barrierback TAB:CARG2, TMP3, CRET1, <2 ++ break; ++ ++ case BC_TSETM: ++ | // RA = base*8 (table at base-1), RD = num_const*8 (start index) ++ | add.d RA, BASE, RA ++ |1: ++ | add.d TMP3, KBASE, RD ++ | ld.d TAB:CARG2, -8(RA) // Guaranteed to be a table. ++ | addi.w TMP0, MULTRES, -8 ++ | ld.w TMP3, 0(TMP3) // Integer constant is in lo-word. ++ | srli.w CARG3, TMP0, 3 ++ | beqz TMP0, >4 // Nothing to copy? ++ | cleartp TAB:CARG2 ++ | add.w CARG3, CARG3, TMP3 ++ | ld.w TMP2, TAB:CARG2->asize ++ | slli.w TMP1, TMP3, 3 ++ | ld.bu TMP3, TAB:CARG2->marked ++ | ld.d CARG1, TAB:CARG2->array ++ | sltu TMP4, TMP2, CARG3 ++ | add.d TMP2, RA, TMP0 ++ | bnez TMP4, >5 ++ | add.d TMP1, TMP1, CARG1 ++ | andi TMP0, TMP3, LJ_GC_BLACK // isblack(table) ++ |3: // Copy result slots to table. ++ | ld.d CRET1, 0(RA) ++ | addi.d RA, RA, 8 ++ | sltu TMP4, RA, TMP2 ++ | st.d CRET1, 0(TMP1) ++ | addi.d TMP1, TMP1, 8 ++ | bnez TMP4, <3 ++ | bnez TMP0, >7 ++ |4: ++ | ins_next ++ | ++ |5: // Need to resize array part. ++ | st.d BASE, L->base ++ | st.d PC, SAVE_PC(sp) ++ | or BASE, RD, r0 ++ | or CARG1, L, r0 ++ | bl extern lj_tab_reasize // (lua_State *L, GCtab *t, int nasize) ++ | // Must not reallocate the stack. ++ | or RD, BASE, r0 ++ | ld.d BASE, L->base // Reload BASE for lack of a saved register. ++ | b <1 ++ | ++ |7: // Possible table write barrier for any value. Skip valiswhite check. ++ | barrierback TAB:CARG2, TMP3, TMP0, <4 ++ break; ++ ++ /* -- Calls and vararg handling ----------------------------------------- */ ++ ++ case BC_CALLM: ++ | // RA = base*8, (RB = (nresults+1)*8,) RC = extra_nargs*8 ++ | decode_RDtoRC8 NARGS8:RC, RD ++ | add.w NARGS8:RC, NARGS8:RC, MULTRES ++ | b ->BC_CALL_Z ++ break; ++ case BC_CALL: ++ | // RA = base*8, (RB = (nresults+1)*8,) RC = (nargs+1)*8 ++ | decode_RDtoRC8 NARGS8:RC, RD ++ |->BC_CALL_Z: ++ | or TMP2, BASE, r0 ++ | add.d BASE, BASE, RA ++ | ld.d LFUNC:RB, 0(BASE) ++ | addi.d BASE, BASE, 16 ++ | addi.w NARGS8:RC, NARGS8:RC, -8 ++ | checkfunc RB, ->vmeta_call ++ | ins_call ++ break; ++ ++ case BC_CALLMT: ++ | // RA = base*8, (RB = 0,) RC = extra_nargs*8 ++ | add.w NARGS8:RD, NARGS8:RD, MULTRES ++ | b ->BC_CALLT_Z1 ++ break; ++ case BC_CALLT: ++ | // RA = base*8, (RB = 0,) RC = (nargs+1)*8 ++ |->BC_CALLT_Z1: ++ | add.d RA, BASE, RA ++ | ld.d LFUNC:RB, 0(RA) ++ | or NARGS8:RC, RD, r0 ++ | ld.d TMP1, FRAME_PC(BASE) ++ | addi.d RA, RA, 16 ++ | addi.w NARGS8:RC, NARGS8:RC, -8 ++ | checktp CARG3, LFUNC:RB, -LJ_TFUNC, ->vmeta_callt ++ |->BC_CALLT_Z: ++ | andi TMP0, TMP1, FRAME_TYPE // Caveat: preserve TMP0 until the 'or'. ++ | ld.bu TMP3, LFUNC:CARG3->ffid ++ | xori TMP2, TMP1, FRAME_VARG ++ | bnez TMP0, >7 ++ |1: ++ | st.d LFUNC:RB, FRAME_FUNC(BASE) // Copy function down, but keep PC. ++ | sltui CARG4, TMP3, 2 // (> FF_C) Calling a fast function? ++ | or TMP2, BASE, r0 ++ | or RB, CARG3, r0 ++ | or TMP3, NARGS8:RC, r0 ++ | beqz NARGS8:RC, >3 ++ |2: ++ | ld.d CRET1, 0(RA) ++ | addi.d RA, RA, 8 ++ | addi.w TMP3, TMP3, -8 ++ | st.d CRET1, 0(TMP2) ++ | addi.d TMP2, TMP2, 8 ++ | bnez TMP3, <2 ++ |3: ++ | or TMP0, TMP0, CARG4 ++ | beqz TMP0, >5 ++ |4: ++ | ins_callt ++ | ++ |5: // Tailcall to a fast function with a Lua frame below. ++ | ld.w INS, -4(TMP1) ++ | decode_RA RA, INS ++ | sub.d TMP1, BASE, RA ++ | ld.d TMP1, -32(TMP1) ++ | cleartp LFUNC:TMP1 ++ | ld.d TMP1, LFUNC:TMP1->pc ++ | ld.d KBASE, PC2PROTO(k)(TMP1) // Need to prepare KBASE. ++ | b <4 ++ | ++ |7: // Tailcall from a vararg function. ++ | andi CARG4, TMP2, FRAME_TYPEP ++ | sub.d TMP2, BASE, TMP2 // Relocate BASE down. ++ | bnez CARG4, <1 // Vararg frame below? ++ | or BASE, TMP2, r0 ++ | ld.d TMP1, FRAME_PC(TMP2) ++ | andi TMP0, TMP1, FRAME_TYPE ++ | b <1 ++ break; ++ ++ case BC_ITERC: ++ | // RA = base*8, (RB = (nresults+1)*8, RC = (nargs+1)*8 ((2+1)*8)) ++ | or TMP2, BASE, r0 // Save old BASE for vmeta_call. ++ | add.d BASE, BASE, RA ++ | ld.d RB, -24(BASE) //A, A+1, A+2 = A-3, A-2, A-1. ++ | ld.d CARG1, -16(BASE) ++ | ld.d CARG2, -8(BASE) ++ | addi.d NARGS8:RC, r0, 16 // Iterators get 2 arguments. ++ | st.d RB, 0(BASE) // Copy callable. ++ | st.d CARG1, 16(BASE) // Copy state. ++ | st.d CARG2, 24(BASE) // Copy control var. ++ | addi.d BASE, BASE, 16 ++ | checkfunc RB, ->vmeta_call ++ | ins_call ++ break; ++ ++ case BC_ITERN: ++ | // RA = base*8, (RB = (nresults+1)*8, RC = (nargs+1)*8 (2+1)*8) ++ |.if JIT ++ | hotloop ++ |.endif ++ |->vm_IITERN: ++ | add.d RA, BASE, RA ++ | ld.d TAB:RB, -16(RA) ++ | ld.w RC, -8(RA) // Get index from control var. ++ | cleartp TAB:RB ++ | addi.d PC, PC, 4 ++ | ld.w TMP0, TAB:RB->asize ++ | ld.d TMP1, TAB:RB->array ++ | slli.d CARG3, TISNUM, 47 ++ |1: // Traverse array part. ++ | sltu TMP2, RC, TMP0 ++ | slli.w TMP3, RC, 3 ++ | beqz TMP2, >5 // Index points after array part? ++ | add.d TMP3, TMP1, TMP3 ++ | ld.d CARG1, 0(TMP3) ++ | ld.hu RD, -4+OFS_RD(PC) // ITERL RD ++ | or TMP2, RC, CARG3 ++ | addi.w RC, RC, 1 ++ | beq CARG1, TISNIL, <1 // Skip holes in array part. ++ | st.d TMP2, 0(RA) ++ | st.d CARG1, 8(RA) ++ | addu16i.d TMP3, r0, -0x2 // -BCBIAS_J*4 ++ | decode_BC4b RD ++ | add.d RD, RD, TMP3 ++ | st.w RC, -8(RA) // Update control var. ++ | add.d PC, PC, RD ++ |3: ++ | ins_next ++ | ++ |5: // Traverse hash part. ++ | ld.w TMP1, TAB:RB->hmask ++ | sub.w RC, RC, TMP0 ++ | ld.d TMP2, TAB:RB->node ++ |6: ++ | sltu CARG1, TMP1, RC // End of iteration? Branch to ITERL+1. ++ | slli.w TMP3, RC, 5 ++ | bnez CARG1, <3 ++ | slli.w RB, RC, 3 ++ | sub.w TMP3, TMP3, RB ++ | add.d NODE:TMP3, TMP3, TMP2 // node = tab->node + (idx*32-idx*8) ++ | ld.d CARG1, 0(NODE:TMP3) ++ | ld.hu RD, -4+OFS_RD(PC) // ITERL RD ++ | addi.w RC, RC, 1 ++ | beq CARG1, TISNIL, <6 // Skip holes in hash part. ++ | ld.d CARG2, NODE:TMP3->key ++ | addu16i.d TMP3, r0, -0x2 // -BCBIAS_J*4 ++ | st.d CARG1, 8(RA) ++ | add.w RC, RC, TMP0 ++ | decode_BC4b RD ++ | add.w RD, RD, TMP3 ++ | st.d CARG2, 0(RA) ++ | add.d PC, PC, RD ++ | st.w RC, -8(RA) // Update control var. ++ | b <3 ++ break; ++ ++ case BC_ISNEXT: ++ | // RA = base*8, RD = target (points to ITERN) ++ | add.d RA, BASE, RA ++ | srli.w TMP0, RD, 1 ++ | ld.d CFUNC:CARG1, -24(RA) ++ | add.d TMP0, PC, TMP0 ++ | ld.d CARG2, -16(RA) ++ | ld.d CARG3, -8(RA) ++ | addu16i.d TMP2, r0, -0x2 // -BCBIAS_J*4 ++ | checkfunc CFUNC:CARG1, >5 ++ | gettp CARG2, CARG2 ++ | addi.d CARG2, CARG2, -LJ_TTAB ++ | ld.bu TMP1, CFUNC:CARG1->ffid ++ | addi.d CARG3, CARG3, -LJ_TNIL ++ | or TMP3, CARG2, CARG3 ++ | addi.d TMP1, TMP1, -FF_next_N ++ | or TMP3, TMP3, TMP1 ++ | addu16i.d TMP1, r0, 0xfffe // LJ_KEYINDEX >> 16 ++ | bnez TMP3, >5 ++ | add.d PC, TMP0, TMP2 ++ | slli.d TMP1, TMP1, 16 ++ | addu16i.d TMP1, TMP1, 0x7fff // LJ_KEYINDEX & 0xffff ++ | slli.d TMP1, TMP1, 16 ++ | st.d TMP1, -8(RA) ++ |1: ++ | ins_next ++ |5: // Despecialize bytecode if any of the checks fail. ++ | addi.d TMP3, r0, BC_JMP ++ | addi.d TMP1, r0, BC_ITERC ++ | st.b TMP3, -4+OFS_OP(PC) ++ | add.d PC, TMP0, TMP2 ++ |.if JIT ++ | ld.b TMP0, OFS_OP(PC) ++ | addi.d TMP3, r0, BC_ITERN ++ | ld.hu TMP2, OFS_RD(PC) ++ | bne TMP0, TMP3, >6 ++ |.endif ++ | st.b TMP1, OFS_OP(PC) ++ | b <1 ++ |.if JIT ++ |6: // Unpatch JLOOP. ++ | .LDXD TMP0, DISPATCH, DISPATCH_J(trace) ++ | slli.w TMP2, TMP2, 3 ++ | add.d TMP0, TMP0, TMP2 ++ | ld.d TRACE:TMP2, 0(TMP0) ++ | ld.w TMP0, TRACE:TMP2->startins ++ | addi.d TMP3, r0, -256 ++ | and TMP0, TMP0, TMP3 ++ | or TMP0, TMP0, TMP1 ++ | st.w TMP0, 0(PC) ++ | b <1 ++ |.endif ++ break; ++ ++ case BC_VARG: ++ | // RA = base*8, RB = (nresults+1)*8, RC = numparams*8 ++ | ld.d TMP0, FRAME_PC(BASE) ++ | decode_RDtoRC8 RC, RD ++ | decode_RB RB, INS ++ | add.d RC, BASE, RC ++ | add.d RA, BASE, RA ++ | addi.d RC, RC, FRAME_VARG ++ | add.d TMP2, RA, RB ++ | addi.d TMP3, BASE, -16 // TMP3 = vtop ++ | sub.d RC, RC, TMP0 // RC = vbase ++ | // Note: RC may now be even _above_ BASE if nargs was < numparams. ++ | sub.d TMP1, TMP3, RC ++ | beqz RB, >5 // Copy all varargs? ++ | addi.d TMP2, TMP2, -16 ++ |1: // Copy vararg slots to destination slots. ++ | ld.d CARG1, 0(RC) ++ | sltu TMP0, RC, TMP3 ++ | addi.d RC, RC, 8 ++ | maskeqz CARG1, CARG1, TMP0 ++ | masknez TMP0, TISNIL, TMP0 ++ | or CARG1, CARG1, TMP0 ++ | st.d CARG1, 0(RA) ++ | sltu TMP0, RA, TMP2 ++ | addi.d RA, RA, 8 ++ | bnez TMP0, <1 ++ |3: ++ | ins_next ++ | ++ |5: // Copy all varargs. ++ | ld.d TMP0, L->maxstack ++ | addi.d MULTRES, r0, 8 // MULTRES = (0+1)*8 ++ | bge r0, TMP1, <3 // No vararg slots? ++ | add.d TMP2, RA, TMP1 ++ | sltu TMP2, TMP0, TMP2 ++ | addi.d MULTRES, TMP1, 8 ++ | bnez TMP2, >7 ++ |6: ++ | ld.d CRET1, 0(RC) ++ | addi.d RC, RC, 8 ++ | st.d CRET1, 0(RA) ++ | sltu TMP0, RC, TMP3 ++ | addi.d RA, RA, 8 ++ | bnez TMP0, <6 // More vararg slots? ++ | b <3 ++ | ++ |7: // Grow stack for varargs. ++ | st.d RA, L->top ++ | sub.d RA, RA, BASE ++ | st.d BASE, L->base ++ | sub.d BASE, RC, BASE // Need delta, because BASE may change. ++ | st.d PC, SAVE_PC(sp) ++ | srli.w CARG2, TMP1, 3 ++ | or CARG1, L, r0 ++ | bl extern lj_state_growstack // (lua_State *L, int n) ++ | or RC, BASE, r0 ++ | ld.d BASE, L->base ++ | add.d RA, BASE, RA ++ | add.d RC, BASE, RC ++ | addi.d TMP3, BASE, -16 ++ | b <6 ++ break; ++ ++ /* -- Returns ----------------------------------------------------------- */ ++ ++ case BC_RETM: ++ | // RA = results*8, RD = extra_nresults*8 ++ | add.w RD, RD, MULTRES ++ | b ->BC_RET_Z1 ++ break; ++ ++ case BC_RET: ++ | // RA = results*8, RD = (nresults+1)*8 ++ |->BC_RET_Z1: ++ | ld.d PC, FRAME_PC(BASE) ++ | add.d RA, BASE, RA ++ | or MULTRES, RD, r0 ++ |1: ++ | andi TMP0, PC, FRAME_TYPE ++ | xori TMP1, PC, FRAME_VARG ++ | bnez TMP0, ->BC_RETV_Z ++ | ++ |->BC_RET_Z: ++ | // BASE = base, RA = resultptr, RD = (nresults+1)*8, PC = return ++ | ld.w INS, -4(PC) ++ | addi.d TMP2, BASE, -16 ++ | addi.d RC, RD, -8 ++ | decode_RA TMP0, INS ++ | decode_RB RB, INS ++ | add.d TMP3, TMP2, RB ++ | sub.d BASE, TMP2, TMP0 ++ | beqz RC, >3 ++ |2: ++ | ld.d CRET1, 0(RA) ++ | addi.d RA, RA, 8 ++ | addi.d RC, RC, -8 ++ | st.d CRET1, 0(TMP2) ++ | addi.d TMP2, TMP2, 8 ++ | bnez RC, <2 ++ |3: ++ | addi.d TMP3, TMP3, -8 ++ |5: ++ | sltu TMP0, TMP2, TMP3 ++ | ld.d LFUNC:TMP1, FRAME_FUNC(BASE) ++ | bnez TMP0, >6 ++ | cleartp LFUNC:TMP1 ++ | ld.d TMP1, LFUNC:TMP1->pc ++ | ld.d KBASE, PC2PROTO(k)(TMP1) ++ | ins_next ++ | ++ |6: // Fill up results with nil. ++ | st.d TISNIL, 0(TMP2) ++ | addi.d TMP2, TMP2, 8 ++ | b <5 ++ | ++ |->BC_RETV_Z: // Non-standard return case. ++ | andi TMP2, TMP1, FRAME_TYPEP ++ | bnez TMP2, ->vm_return ++ | // Return from vararg function: relocate BASE down. ++ | sub.d BASE, BASE, TMP1 ++ | ld.d PC, FRAME_PC(BASE) ++ | b <1 ++ break; ++ ++ case BC_RET0: case BC_RET1: ++ | // RA = results*8, RD = (nresults+1)*8 ++ | ld.d PC, FRAME_PC(BASE) ++ | add.d RA, BASE, RA ++ | or MULTRES, RD, r0 ++ | andi TMP0, PC, FRAME_TYPE ++ | xori TMP1, PC, FRAME_VARG ++ | bnez TMP0, ->BC_RETV_Z ++ | ld.w INS, -4(PC) ++ | addi.d TMP2, BASE, -16 ++ if (op == BC_RET1) { ++ | ld.d CRET1, 0(RA) ++ } ++ | decode_RB RB, INS ++ | decode_RA RA, INS ++ | sub.d BASE, TMP2, RA ++ if (op == BC_RET1) { ++ | st.d CRET1, 0(TMP2) ++ } ++ |5: ++ | sltu TMP0, RD, RB ++ | ld.d TMP1, FRAME_FUNC(BASE) ++ | bnez TMP0, >6 ++ | cleartp LFUNC:TMP1 ++ | ld.d TMP1, LFUNC:TMP1->pc ++ | ins_next1 ++ | ld.d KBASE, PC2PROTO(k)(TMP1) ++ | ins_next2 ++ | ++ |6: // Fill up results with nil. ++ | addi.d TMP2, TMP2, 8 ++ | addi.d RD, RD, 8 ++ if (op == BC_RET1) { ++ | st.d TISNIL, 0(TMP2) ++ } else { ++ | st.d TISNIL, -8(TMP2) ++ } ++ | b <5 ++ break; ++ ++ /* -- Loops and branches ------------------------------------------------ */ ++ ++ case BC_FORL: ++ |.if JIT ++ | hotloop ++ |.endif ++ | // Fall through. Assumes BC_IFORL follows. ++ break; ++ ++ case BC_JFORI: ++ case BC_JFORL: ++#if !LJ_HASJIT ++ break; ++#endif ++ case BC_FORI: ++ case BC_IFORL: ++ | // RA = base*8, RD = target (after end of loop or start of loop) ++ vk = (op == BC_IFORL || op == BC_JFORL); ++ | add.d RA, BASE, RA ++ | ld.d CARG1, FORL_IDX*8(RA) // CARG1 = IDX ++ | ld.d CARG2, FORL_STEP*8(RA) // CARG2 = STEP ++ | ld.d CARG3, FORL_STOP*8(RA) // CARG3 = STOP ++ | gettp CARG4, CARG1 ++ | gettp CARG5, CARG2 ++ | gettp CARG6, CARG3 ++ if (op != BC_JFORL) { ++ | srli.w RD, RD, 1 ++ | addu16i.d TMP2, r0, -0x2 // -BCBIAS_J<<2 ++ | add.d TMP2, RD, TMP2 ++ } ++ | bne CARG4, TISNUM, >3 ++ | slli.w CARG4, CARG1, 0 // start ++ | slli.w CARG3, CARG3, 0 // stop ++ if (!vk) { // init ++ | bne CARG6, TISNUM, ->vmeta_for ++ | bne CARG5, TISNUM, ->vmeta_for ++ | bstrpick.d TMP0, CARG2, 31, 31 // sign ++ | slt CARG2, CARG3, CARG4 ++ | slt TMP1, CARG4, CARG3 ++ | maskeqz TMP1, TMP1, TMP0 ++ | masknez CARG2, CARG2, TMP0 ++ | or CARG2, CARG2, TMP1 // CARG2=0: +,start <= stop or -,start >= stop ++ } else { ++ | slli.w CARG5, CARG2, 0 // step ++ | add.w CARG1, CARG4, CARG5 // start + step ++ | xor TMP3, CARG1, CARG4 // y^a ++ | xor TMP1, CARG1, CARG5 // y^b ++ | and TMP3, TMP3, TMP1 ++ | slt TMP1, CARG1, CARG3 // start+step < stop ? ++ | slt CARG3, CARG3, CARG1 // stop < start+step ? ++ | slt TMP0, CARG5, r0 // step < 0 ? ++ | slt TMP3, TMP3, r0 // ((y^a) & (y^b)) < 0: overflow. ++ | maskeqz TMP1, TMP1, TMP0 ++ | masknez CARG3, CARG3, TMP0 ++ | or CARG3, CARG3, TMP1 ++ | or CARG2, CARG3, TMP3 // CARG2=1: overflow; CARG2=0: continue ++ | bstrpick.d CARG1, CARG1, 31, 0 ++ | settp CARG1, TISNUM ++ | st.d CARG1, FORL_IDX*8(RA) ++ } ++ |1: ++ if (op == BC_FORI) { ++ | maskeqz TMP2, TMP2, CARG2 // CARG2!=0: jump out the loop; CARG2==0: next INS ++ | add.d PC, PC, TMP2 ++ } else if (op == BC_JFORI) { ++ | add.d PC, PC, TMP2 ++ | ld.hu RD, -4+OFS_RD(PC) ++ } else if (op == BC_IFORL) { ++ | masknez TMP2, TMP2, CARG2 // CARG2!=0: next INS; CARG2==0: jump back ++ | add.d PC, PC, TMP2 ++ } ++ | ins_next1 ++ | st.d CARG1, FORL_EXT*8(RA) ++ |2: ++ if (op == BC_JFORI) { ++ | decode_BC8b RD ++ | beqz CARG2, =>BC_JLOOP // CARG2 == 0: excute the loop ++ } else if (op == BC_JFORL) { ++ | beqz CARG2, =>BC_JLOOP ++ } ++ | ins_next2 ++ | ++ |3: // FP loop. ++ | fld.d FTMP0, FORL_IDX*8(RA) // start ++ | fld.d FTMP1, FORL_STOP*8(RA) // stop ++ | ld.d TMP0, FORL_STEP*8(RA) // step ++ | slt TMP0, TMP0, r0 // step < 0 ? ++ | movgr2fr.d FTMP2, TMP0 ++ if (!vk) { ++ | sltui TMP3, CARG4, LJ_TISNUM // start is number ? ++ | sltui TMP0, CARG5, LJ_TISNUM // step is number ? ++ | sltui TMP1, CARG6, LJ_TISNUM // stop is number ? ++ | and TMP3, TMP3, TMP1 ++ | and TMP0, TMP0, TMP3 ++ | beqz TMP0, ->vmeta_for // if start or step or stop isn't number ++ | fcmp.clt.d FCC0, FTMP0, FTMP1 // start < stop ? ++ | fcmp.clt.d FCC1, FTMP1, FTMP0 // stop < start ? ++ | movcf2fr FTMP3, FCC0 ++ | movcf2fr FTMP4, FCC1 ++ | movfr2cf FCC0, FTMP2 ++ | fsel FTMP2, FTMP4, FTMP3, FCC0 ++ | movfr2gr.d CARG2, FTMP2 // CARG2=0:+,startstop ++ | b <1 ++ } else { ++ | fld.d FTMP3, FORL_STEP*8(RA) ++ | fadd.d FTMP0, FTMP0, FTMP3 // start + step ++ | fcmp.clt.d FCC0, FTMP0, FTMP1 // start + step < stop ? ++ | fcmp.clt.d FCC1, FTMP1, FTMP0 ++ | movcf2fr FTMP3, FCC0 ++ | movcf2fr FTMP4, FCC1 ++ | movfr2cf FCC0, FTMP2 ++ | fsel FTMP2, FTMP4, FTMP3, FCC0 ++ | movfr2gr.d CARG2, FTMP2 ++ if (op == BC_IFORL) { ++ | masknez TMP2, TMP2, CARG2 ++ | add.d PC, PC, TMP2 ++ } ++ | fst.d FTMP0, FORL_IDX*8(RA) ++ | ins_next1 ++ | fst.d FTMP0, FORL_EXT*8(RA) ++ | b <2 ++ } ++ break; ++ ++ case BC_ITERL: ++ |.if JIT ++ | hotloop ++ |.endif ++ | // Fall through. Assumes BC_IITERL follows. ++ break; ++ ++ case BC_JITERL: ++#if !LJ_HASJIT ++ break; ++#endif ++ case BC_IITERL: ++ | // RA = base*8, RD = target ++ | add.d RA, BASE, RA ++ | ld.d TMP1, 0(RA) ++ | beq TMP1, TISNIL, >1 // Stop if iterator returned nil. ++ if (op == BC_JITERL) { ++ | st.d TMP1,-8(RA) ++ | b =>BC_JLOOP ++ } else { ++ | branch_RD // Otherwise save control var + branch. ++ | st.d TMP1, -8(RA) ++ } ++ |1: ++ | ins_next ++ break; ++ ++ case BC_LOOP: ++ | // RA = base*8, RD = target (loop extent) ++ | // Note: RA/RD is only used by trace recorder to determine scope/extent ++ | // This opcode does NOT jump, it's only purpose is to detect a hot loop. ++ |.if JIT ++ | hotloop ++ |.endif ++ | // Fall through. Assumes BC_ILOOP follows. ++ break; ++ ++ case BC_ILOOP: ++ | // RA = base*8, RD = target (loop extent) ++ | ins_next ++ break; ++ ++ case BC_JLOOP: ++ |.if JIT ++ | // RA = base*8 (ignored), RD = traceno*8 ++ | .LDXD TMP0, DISPATCH, DISPATCH_J(trace) ++ | add.d TMP0, TMP0, RD ++ | // Traces on LOONGARCH don't store the trace number, so use 0. ++ | .STXD r0, DISPATCH, DISPATCH_GL(vmstate) ++ | ld.d TRACE:TMP1, 0(TMP0) ++ | .STXD BASE, DISPATCH, DISPATCH_GL(jit_base) // store Current JIT code L->base ++ | ld.d TMP1, TRACE:TMP1->mcode ++ | .ADD16I JGL, DISPATCH, GG_DISP2G+32768 ++ | .STXD L, DISPATCH, DISPATCH_GL(tmpbuf.L) ++ | jirl r0, TMP1, 0 ++ |.endif ++ break; ++ ++ case BC_JMP: ++ | // RA = base*8 (only used by trace recorder), RD = target ++ | branch_RD // PC + (jump - 0x8000)<<2 ++ | ins_next ++ break; ++ ++ /* -- Function headers -------------------------------------------------- */ ++ ++ case BC_FUNCF: ++ |.if JIT ++ | hotcall ++ |.endif ++ case BC_FUNCV: /* NYI: compiled vararg functions. */ ++ | // Fall through. Assumes BC_IFUNCF/BC_IFUNCV follow. ++ break; ++ ++ case BC_JFUNCF: ++#if !LJ_HASJIT ++ break; ++#endif ++ case BC_IFUNCF: ++ | // BASE = new base, RA = BASE+framesize*8, RB = LFUNC, RC = nargs*8 ++ | ld.d TMP2, L->maxstack ++ | ld.bu TMP1, -4+PC2PROTO(numparams)(PC) ++ | ld.d KBASE, -4+PC2PROTO(k)(PC) ++ | sltu TMP0, TMP2, RA ++ | slli.w TMP1, TMP1, 3 // numparams*8 ++ | bnez TMP0, ->vm_growstack_l ++ |2: ++ | sltu TMP0, NARGS8:RC, TMP1 // Check for missing parameters. ++ | bnez TMP0, >3 ++ if (op == BC_JFUNCF) { ++ | decode_RD RD, INS ++ | b =>BC_JLOOP ++ } else { ++ | ins_next ++ } ++ | ++ |3: // Clear missing parameters. ++ | add.d TMP0, BASE, NARGS8:RC ++ | st.d TISNIL, 0(TMP0) ++ | addi.w NARGS8:RC, NARGS8:RC, 8 ++ | b <2 ++ break; ++ ++ case BC_JFUNCV: ++#if !LJ_HASJIT ++ break; ++#endif ++ | NYI // NYI: compiled vararg functions ++ break; /* NYI: compiled vararg functions. */ ++ ++ case BC_IFUNCV: ++ | // BASE = new base, RA = BASE+framesize*8, RB = LFUNC, RC = nargs*8 ++ | addi.w TMP0, r0, LJ_TFUNC ++ | add.d TMP1, BASE, RC ++ | ld.d TMP2, L->maxstack ++ | settp LFUNC:RB, TMP0 ++ | add.d TMP0, RA, RC ++ | st.d LFUNC:RB, 0(TMP1) // Store (tagged) copy of LFUNC. ++ | addi.d TMP3, RC, 16+FRAME_VARG ++ | sltu TMP0, TMP0, TMP2 ++ | ld.d KBASE, -4+PC2PROTO(k)(PC) ++ | st.d TMP3, 8(TMP1) // Store delta + FRAME_VARG. ++ | beqz TMP0, ->vm_growstack_l ++ | ld.bu TMP2, -4+PC2PROTO(numparams)(PC) ++ | or RA, BASE, r0 ++ | or RC, TMP1, r0 ++ | ins_next1 ++ | addi.d BASE, TMP1, 16 ++ | beqz TMP2, >2 ++ |1: ++ | ld.d TMP0, 0(RA) ++ | sltu CARG2, RA, RC // Less args than parameters? ++ | or CARG1, TMP0, r0 ++ | addi.d RA, RA, 8 ++ | addi.d TMP1, TMP1, 8 ++ | addi.w TMP2, TMP2, -1 ++ | beqz CARG2, >3 ++ | masknez TMP3, CARG1, CARG2 // Clear old fixarg slot (help the GC). ++ | maskeqz CARG1, TISNIL, CARG2 ++ | or CARG1, CARG1, TMP3 ++ | st.d CARG1, -8(RA) ++ | st.d TMP0, 8(TMP1) ++ | bnez TMP2, <1 ++ |2: ++ | ins_next2 ++ |3: ++ | maskeqz TMP0, TMP0, CARG2 // Clear missing fixargs. ++ | masknez TMP3, TISNIL, CARG2 ++ | or TMP0, TMP0, TMP3 ++ | st.d TMP0, 8(TMP1) ++ | bnez TMP2, <1 ++ | b <2 ++ break; ++ ++ case BC_FUNCC: ++ case BC_FUNCCW: ++ | // BASE = new base, RA = BASE+framesize*8, RB = CFUNC, RC = nargs*8 ++ if (op == BC_FUNCC) { ++ | ld.d CARG4, CFUNC:RB->f ++ } else { ++ | .LDXD CARG4, DISPATCH, DISPATCH_GL(wrapf) ++ } ++ | add.d TMP1, RA, NARGS8:RC ++ | ld.d TMP2, L->maxstack ++ | add.d RC, BASE, NARGS8:RC ++ | st.d BASE, L->base // base of currently excuting function ++ | st.d RC, L->top ++ | sltu TMP3, TMP2, TMP1 ++ | li_vmstate C // addi.w TMP0, r0, ~LJ_VMST_C ++ if (op == BC_FUNCCW) { ++ | ld.d CARG2, CFUNC:RB->f ++ } ++ | or CARG1, L, r0 ++ | bnez TMP3, ->vm_growstack_c // Need to grow stack. ++ | st_vmstate // .STXW TMP0, DISPATCH, DISPATCH_GL(vmstate) ++ | jirl r1, CARG4, 0 // (lua_State *L [, lua_CFunction f]) ++ | // Returns nresults. ++ | ld.d BASE, L->base ++ | ld.d TMP1, L->top ++ | .STXD L, DISPATCH, DISPATCH_GL(cur_L) ++ | slli.w RD, CRET1, 3 ++ | li_vmstate INTERP ++ | ld.d PC, FRAME_PC(BASE) // Fetch PC of caller. ++ | sub.d RA, TMP1, RD // RA = L->top - nresults*8 ++ | st_vmstate ++ | b ->vm_returnc ++ break; ++ ++ /* ---------------------------------------------------------------------- */ ++ ++ default: ++ fprintf(stderr, "Error: undefined opcode BC_%s\n", bc_names[op]); ++ exit(2); ++ break; ++ } ++} ++ ++static int build_backend(BuildCtx *ctx) ++{ ++ int op; ++ ++ dasm_growpc(Dst, BC__MAX); ++ ++ build_subroutines(ctx); ++ ++ |.code_op ++ for (op = 0; op < BC__MAX; op++) ++ build_ins(ctx, (BCOp)op, op); ++ ++ return BC__MAX; ++} ++ ++/* Emit pseudo frame-info for all assembler functions. */ ++static void emit_asm_debug(BuildCtx *ctx) ++{ ++ int fcofs = (int)((uint8_t *)ctx->glob[GLOB_vm_ffi_call] - ctx->code); ++ int i; ++ switch (ctx->mode) { ++ case BUILD_elfasm: ++ fprintf(ctx->fp, "\t.section .debug_frame,\"\",@progbits\n"); ++ fprintf(ctx->fp, ++ ".Lframe0:\n" ++ "\t.4byte .LECIE0-.LSCIE0\n" ++ ".LSCIE0:\n" ++ "\t.4byte 0xffffffff\n" ++ "\t.byte 0x1\n" ++ "\t.string \"\"\n" ++ "\t.uleb128 0x1\n" ++ "\t.sleb128 -4\n" ++ "\t.byte 1\n" /* Return address is in ra. */ ++ "\t.byte 0xc\n\t.uleb128 3\n\t.uleb128 0\n" /* def_cfa sp 0 */ ++ "\t.align 3\n" ++ ".LECIE0:\n\n"); ++ fprintf(ctx->fp, ++ ".LSFDE0:\n" ++ "\t.4byte .LEFDE0-.LASFDE0\n" ++ ".LASFDE0:\n" ++ "\t.4byte .Lframe0\n" ++ "\t.8byte .Lbegin\n" ++ "\t.8byte %d\n" ++ "\t.byte 0xe\n\t.uleb128 %d\n" ++ "\t.byte 0x81\n\t.uleb128 2*5\n" /* offset ra*/ ++ "\t.byte 0x96\n\t.uleb128 2*6\n", /* offset fp */ ++ fcofs, CFRAME_SIZE); ++ for (i = 31; i >= 23; i--) /* offset r31-r23 */ ++ fprintf(ctx->fp, "\t.byte %d\n\t.uleb128 %d\n", 0x80+i, 2*(31-i+7)); ++ for (i = 31; i >= 24; i--) /* offset f31-f24 */ ++ fprintf(ctx->fp, "\t.byte %d\n\t.uleb128 %d\n", 0x80+32+i, 2*(31-i+16)); ++ fprintf(ctx->fp, ++ "\t.align 3\n" ++ ".LEFDE0:\n\n"); ++#if LJ_HASFFI ++ fprintf(ctx->fp, ++ ".LSFDE1:\n" ++ "\t.4byte .LEFDE1-.LASFDE1\n" ++ ".LASFDE1:\n" ++ "\t.4byte .Lframe0\n" ++ "\t.4byte lj_vm_ffi_call\n" ++ "\t.4byte %d\n" ++ "\t.byte 0x81\n\t.uleb128 2*5\n" /* offset ra*/ ++ "\t.byte 0x96\n\t.uleb128 2*6\n" /* offset fp */ ++ "\t.align 3\n" ++ ".LEFDE1:\n\n", (int)ctx->codesz - fcofs); ++#endif ++#if !LJ_NO_UNWIND ++ fprintf(ctx->fp, "\t.section .eh_frame,\"a\",@progbits\n"); ++ fprintf(ctx->fp, ++ ".Lframe1:\n" ++ "\t.4byte .LECIE1-.LSCIE1\n" ++ ".LSCIE1:\n" ++ "\t.4byte 0\n" ++ "\t.byte 0x1\n" ++ "\t.string \"zPR\"\n" ++ "\t.uleb128 0x1\n" ++ "\t.sleb128 -4\n" ++ "\t.byte 1\n" /* Return address is in ra. */ ++ "\t.uleb128 6\n" /* augmentation length */ ++ "\t.byte 0x1b\n" ++ "\t.4byte lj_err_unwind_dwarf-.\n" ++ "\t.byte 0x1b\n" ++ "\t.byte 0xc\n\t.uleb128 3\n\t.uleb128 0\n" /* def_cfa sp 0 */ ++ "\t.align 2\n" ++ ".LECIE1:\n\n"); ++ fprintf(ctx->fp, ++ ".LSFDE2:\n" ++ "\t.4byte .LEFDE2-.LASFDE2\n" ++ ".LASFDE2:\n" ++ "\t.4byte .LASFDE2-.Lframe1\n" ++ "\t.4byte .Lbegin-.\n" ++ "\t.4byte %d\n" ++ "\t.uleb128 0\n" /* augmentation length */ ++ "\t.byte 0x81\n\t.uleb128 2*5\n" /* offset ra*/ ++ "\t.byte 0x96\n\t.uleb128 2*6\n", /* offset fp */ ++ fcofs); ++ for (i = 31; i >= 23; i--) /* offset r23-r31 */ ++ fprintf(ctx->fp, "\t.byte %d\n\t.uleb128 %d\n", 0x80+i, 2*(31-i+7)); ++ for (i = 31; i >= 24; i--) /* offset f24-f31 */ ++ fprintf(ctx->fp, "\t.byte %d\n\t.uleb128 %d\n", 0x80+32+i, 2*(31-i+16)); ++ fprintf(ctx->fp, ++ "\t.align 2\n" ++ ".LEFDE2:\n\n"); ++#if LJ_HASFFI ++ fprintf(ctx->fp, ++ ".Lframe2:\n" ++ "\t.4byte .LECIE2-.LSCIE2\n" ++ ".LSCIE2:\n" ++ "\t.4byte 0\n" ++ "\t.byte 0x1\n" ++ "\t.string \"zR\"\n" ++ "\t.uleb128 0x1\n" ++ "\t.sleb128 -4\n" ++ "\t.byte 1\n" /* Return address is in ra. */ ++ "\t.uleb128 1\n" /* augmentation length */ ++ "\t.byte 0x1b\n" ++ "\t.byte 0xc\n\t.uleb128 3\n\t.uleb128 0\n" /* def_cfa sp 0 */ ++ "\t.align 2\n" ++ ".LECIE2:\n\n"); ++ fprintf(ctx->fp, ++ ".LSFDE3:\n" ++ "\t.4byte .LEFDE3-.LASFDE3\n" ++ ".LASFDE3:\n" ++ "\t.4byte .LASFDE3- .Lframe2\n" ++ "\t.4byte lj_vm_ffi_call-.\n" ++ "\t.4byte %d\n" ++ "\t.uleb128 0\n" /* augmentation length */ ++ "\t.byte 0x81\n\t.uleb128 2*5\n" /* offset ra*/ ++ "\t.byte 0x96\n\t.uleb128 2*6\n" /* offset fp */ ++ "\t.align 2\n" ++ ".LEFDE3:\n\n", (int)ctx->codesz - fcofs); ++#endif ++#endif ++#if !LJ_NO_UNWIND ++ /* NYI */ ++#endif ++ break; ++ default: ++ break; ++ } ++} ++ diff --git a/luajit2-2.1-20250529.tar.gz b/luajit2-2.1-20250529.tar.gz deleted file mode 100644 index 3af210e..0000000 --- a/luajit2-2.1-20250529.tar.gz +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:a3f6f5ead9fc69603e51c66877614e6b5e93332d552f35d08f932447357ed593 -size 1164879 diff --git a/luajit2-2.1-20250826.tar.gz b/luajit2-2.1-20250826.tar.gz new file mode 100644 index 0000000..5de0b70 --- /dev/null +++ b/luajit2-2.1-20250826.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5a49743ad6ce4b7f19aac71b55a08052c1feb62750f051982082c12bf62f39c0 +size 1164966 diff --git a/luajit2.changes b/luajit2.changes index fc4b891..10e43fc 100644 --- a/luajit2.changes +++ b/luajit2.changes @@ -1,3 +1,15 @@ +------------------------------------------------------------------- +Wed Oct 8 07:29:03 UTC 2025 - John Paul Adrian Glaubitz + +- Updated to 2.1.20250826 (1756211046) + * FFI: Fix dangling CType references (again). + * Avoid out-of-range PC for stack overflow error from snapshot restore. + * x86/x64: Don't use undefined MUL/IMUL zero flag. + * Windows: Add lua52compat option to msvcbuild.bat. +- Cherry-pick riscv64-support.patch to add RISC-V support +- Cherry-pick loong64-support.patch to add LoongArch support +- Increase _default_patch_fuzz to 2 to allow patches to apply + ------------------------------------------------------------------- Fri Jul 11 14:07:00 UTC 2025 - Илья Индиго diff --git a/luajit2.spec b/luajit2.spec index 6587114..5a57511 100644 --- a/luajit2.spec +++ b/luajit2.spec @@ -1,7 +1,7 @@ # # spec file for package luajit2 # -# Copyright (c) 2025 SUSE LLC +# Copyright (c) 2025 SUSE LLC and contributors # # All modifications and additions to the file contributed by third parties # remain the property of their copyright owners, unless otherwise agreed @@ -15,13 +15,13 @@ # Please submit bugfixes or comments via https://bugs.opensuse.org/ # - +%global _default_patch_fuzz 2 %define abi_ver 5.1 %define so_ver 2 %define lib_ver 5_1-%{so_ver} %define major 2.1 -%define minor 20250529 -%define upstream 1748495995 +%define minor 20250826 +%define upstream 1756211046 Name: luajit2 Version: %{major}.%{minor} Release: 0 @@ -31,6 +31,8 @@ URL: https://github.com/openresty/%{name} Source0: https://github.com/openresty/%{name}/archive/refs/tags/v%{major}-%{minor}.tar.gz#/%{name}-%{major}-%{minor}.tar.gz Source1: baselibs.conf Patch0: %{name}-name.patch +Patch1: https://patch-diff.githubusercontent.com/raw/openresty/luajit2/pull/236.patch#/riscv64-support.patch#/riscv64-support.patch +Patch2: https://github.com/openresty/luajit2/pull/245/commits/8e40aca7b3a919456b15698273e9b00e9250e769.patch#/loong64-support.patch BuildRequires: pkgconfig Requires: lib%{name}-%{lib_ver} = %{version} diff --git a/riscv64-support.patch b/riscv64-support.patch new file mode 100644 index 0000000..169e605 --- /dev/null +++ b/riscv64-support.patch @@ -0,0 +1,12433 @@ +From f97b378df83699f05cb34b463f3add4ce70e06a0 Mon Sep 17 00:00:00 2001 +From: gns +Date: Tue, 5 Mar 2024 17:09:31 +0800 +Subject: [PATCH 01/22] riscv(support): add RISC-V 64 arch base definition + +--- + src/lj_arch.h | 22 ++++++++++++++++++++++ + 1 file changed, 22 insertions(+) + +diff --git a/src/lj_arch.h b/src/lj_arch.h +index 9a4eac68a..8a0f058dd 100644 +--- a/src/lj_arch.h ++++ b/src/lj_arch.h +@@ -33,6 +33,8 @@ + #define LUAJIT_ARCH_mips64 7 + #define LUAJIT_ARCH_S390X 8 + #define LUAJIT_ARCH_s390x 8 ++#define LUAJIT_ARCH_RISCV64 9 ++#define LUAJIT_ARCH_riscv64 9 + + /* Target OS. */ + #define LUAJIT_OS_OTHER 0 +@@ -69,6 +71,8 @@ + #define LUAJIT_TARGET LUAJIT_ARCH_MIPS64 + #elif defined(__mips__) || defined(__mips) || defined(__MIPS__) || defined(__MIPS) + #define LUAJIT_TARGET LUAJIT_ARCH_MIPS32 ++#elif (defined(__riscv) || defined(__riscv__)) && __riscv_xlen == 64 ++#define LUAJIT_TARGET LUAJIT_ARCH_RISCV64 + #else + #error "Architecture not supported (in this version), see: https://luajit.org/status.html#architectures" + #endif +@@ -470,6 +474,20 @@ + #define LJ_ARCH_NUMMODE LJ_NUMMODE_DUAL + #define LJ_TARGET_GC64 1 + #define LJ_ARCH_NOJIT 1 /* NYI */ ++#elif LUAJIT_TARGET == LUAJIT_ARCH_RISCV64 ++ ++#define LJ_ARCH_NAME "riscv64" ++#define LJ_ARCH_BITS 64 ++#define LJ_ARCH_ENDIAN LUAJIT_LE /* Forget about BE for now */ ++#define LJ_TARGET_RISCV64 1 ++#define LJ_TARGET_GC64 1 ++#define LJ_TARGET_EHRETREG 10 ++#define LJ_TARGET_EHRAREG 1 ++#define LJ_TARGET_JUMPRANGE 30 /* JAL +-2^20 = +-1MB,\ ++ AUIPC+JALR +-2^31 = +-2GB, leave 1 bit to avoid AUIPC corner case */ ++#define LJ_TARGET_MASKSHIFT 1 ++#define LJ_TARGET_MASKROT 1 ++#define LJ_ARCH_NUMMODE LJ_NUMMODE_DUAL + + #else + #error "No target architecture defined" +@@ -554,6 +572,10 @@ + #error "Only n64 ABI supported for MIPS64" + #undef LJ_TARGET_MIPS + #endif ++#elif LJ_TARGET_RISCV64 ++#if !defined(__riscv_float_abi_double) ++#error "Only RISC-V 64 double float supported for now" ++#endif + #endif + #endif + + +From 02209d5dad00c42b8c11d5c02474e5c2d4ac7028 Mon Sep 17 00:00:00 2001 +From: gns +Date: Tue, 5 Mar 2024 17:11:11 +0800 +Subject: [PATCH 02/22] riscv(dynasm): add RISC-V support + +--- + dynasm/dasm_riscv.h | 435 ++++++++++++++++++ + dynasm/dasm_riscv.lua | 979 ++++++++++++++++++++++++++++++++++++++++ + dynasm/dasm_riscv32.lua | 12 + + dynasm/dasm_riscv64.lua | 12 + + 4 files changed, 1438 insertions(+) + create mode 100644 dynasm/dasm_riscv.h + create mode 100644 dynasm/dasm_riscv.lua + create mode 100644 dynasm/dasm_riscv32.lua + create mode 100644 dynasm/dasm_riscv64.lua + +diff --git a/dynasm/dasm_riscv.h b/dynasm/dasm_riscv.h +new file mode 100644 +index 000000000..b2739fdbb +--- /dev/null ++++ b/dynasm/dasm_riscv.h +@@ -0,0 +1,435 @@ ++/* ++** DynASM RISC-V encoding engine. ++** Copyright (C) 2005-2025 Mike Pall. All rights reserved. ++** Released under the MIT license. See dynasm.lua for full copyright notice. ++** ++** Contributed by gns from PLCT Lab, ISCAS. ++*/ ++ ++#include ++#include ++#include ++#include ++ ++#define DASM_ARCH "riscv" ++ ++#ifndef DASM_EXTERN ++#define DASM_EXTERN(a,b,c,d) 0 ++#endif ++ ++/* Action definitions. */ ++enum { ++ DASM_STOP, DASM_SECTION, DASM_ESC, DASM_REL_EXT, ++ /* The following actions need a buffer position. */ ++ DASM_ALIGN, DASM_REL_LG, DASM_LABEL_LG, ++ /* The following actions also have an argument. */ ++ DASM_REL_PC, DASM_LABEL_PC, DASM_IMM, DASM_IMMS, ++ DASM__MAX ++}; ++ ++/* Maximum number of section buffer positions for a single dasm_put() call. */ ++#define DASM_MAXSECPOS 25 ++ ++/* DynASM encoder status codes. Action list offset or number are or'ed in. */ ++#define DASM_S_OK 0x00000000 ++#define DASM_S_NOMEM 0x01000000 ++#define DASM_S_PHASE 0x02000000 ++#define DASM_S_MATCH_SEC 0x03000000 ++#define DASM_S_RANGE_I 0x11000000 ++#define DASM_S_RANGE_SEC 0x12000000 ++#define DASM_S_RANGE_LG 0x13000000 ++#define DASM_S_RANGE_PC 0x14000000 ++#define DASM_S_RANGE_REL 0x15000000 ++#define DASM_S_UNDEF_LG 0x21000000 ++#define DASM_S_UNDEF_PC 0x22000000 ++ ++/* Macros to convert positions (8 bit section + 24 bit index). */ ++#define DASM_POS2IDX(pos) ((pos)&0x00ffffff) ++#define DASM_POS2BIAS(pos) ((pos)&0xff000000) ++#define DASM_SEC2POS(sec) ((sec)<<24) ++#define DASM_POS2SEC(pos) ((pos)>>24) ++#define DASM_POS2PTR(D, pos) (D->sections[DASM_POS2SEC(pos)].rbuf + (pos)) ++ ++/* Action list type. */ ++typedef const unsigned int *dasm_ActList; ++ ++/* Per-section structure. */ ++typedef struct dasm_Section { ++ int *rbuf; /* Biased buffer pointer (negative section bias). */ ++ int *buf; /* True buffer pointer. */ ++ size_t bsize; /* Buffer size in bytes. */ ++ int pos; /* Biased buffer position. */ ++ int epos; /* End of biased buffer position - max single put. */ ++ int ofs; /* Byte offset into section. */ ++} dasm_Section; ++ ++/* Core structure holding the DynASM encoding state. */ ++struct dasm_State { ++ size_t psize; /* Allocated size of this structure. */ ++ dasm_ActList actionlist; /* Current actionlist pointer. */ ++ int *lglabels; /* Local/global chain/pos ptrs. */ ++ size_t lgsize; ++ int *pclabels; /* PC label chains/pos ptrs. */ ++ size_t pcsize; ++ void **globals; /* Array of globals. */ ++ dasm_Section *section; /* Pointer to active section. */ ++ size_t codesize; /* Total size of all code sections. */ ++ int maxsection; /* 0 <= sectionidx < maxsection. */ ++ int status; /* Status code. */ ++ dasm_Section sections[1]; /* All sections. Alloc-extended. */ ++}; ++ ++/* The size of the core structure depends on the max. number of sections. */ ++#define DASM_PSZ(ms) (sizeof(dasm_State)+(ms-1)*sizeof(dasm_Section)) ++ ++ ++/* Initialize DynASM state. */ ++void dasm_init(Dst_DECL, int maxsection) ++{ ++ dasm_State *D; ++ size_t psz = 0; ++ Dst_REF = NULL; ++ DASM_M_GROW(Dst, struct dasm_State, Dst_REF, psz, DASM_PSZ(maxsection)); ++ D = Dst_REF; ++ D->psize = psz; ++ D->lglabels = NULL; ++ D->lgsize = 0; ++ D->pclabels = NULL; ++ D->pcsize = 0; ++ D->globals = NULL; ++ D->maxsection = maxsection; ++ memset((void *)D->sections, 0, maxsection * sizeof(dasm_Section)); ++} ++ ++/* Free DynASM state. */ ++void dasm_free(Dst_DECL) ++{ ++ dasm_State *D = Dst_REF; ++ int i; ++ for (i = 0; i < D->maxsection; i++) ++ if (D->sections[i].buf) ++ DASM_M_FREE(Dst, D->sections[i].buf, D->sections[i].bsize); ++ if (D->pclabels) DASM_M_FREE(Dst, D->pclabels, D->pcsize); ++ if (D->lglabels) DASM_M_FREE(Dst, D->lglabels, D->lgsize); ++ DASM_M_FREE(Dst, D, D->psize); ++} ++ ++/* Setup global label array. Must be called before dasm_setup(). */ ++void dasm_setupglobal(Dst_DECL, void **gl, unsigned int maxgl) ++{ ++ dasm_State *D = Dst_REF; ++ D->globals = gl; ++ DASM_M_GROW(Dst, int, D->lglabels, D->lgsize, (10+maxgl)*sizeof(int)); ++} ++ ++/* Grow PC label array. Can be called after dasm_setup(), too. */ ++void dasm_growpc(Dst_DECL, unsigned int maxpc) ++{ ++ dasm_State *D = Dst_REF; ++ size_t osz = D->pcsize; ++ DASM_M_GROW(Dst, int, D->pclabels, D->pcsize, maxpc*sizeof(int)); ++ memset((void *)(((unsigned char *)D->pclabels)+osz), 0, D->pcsize-osz); ++} ++ ++/* Setup encoder. */ ++void dasm_setup(Dst_DECL, const void *actionlist) ++{ ++ dasm_State *D = Dst_REF; ++ int i; ++ D->actionlist = (dasm_ActList)actionlist; ++ D->status = DASM_S_OK; ++ D->section = &D->sections[0]; ++ memset((void *)D->lglabels, 0, D->lgsize); ++ if (D->pclabels) memset((void *)D->pclabels, 0, D->pcsize); ++ for (i = 0; i < D->maxsection; i++) { ++ D->sections[i].pos = DASM_SEC2POS(i); ++ D->sections[i].rbuf = D->sections[i].buf - D->sections[i].pos; ++ D->sections[i].ofs = 0; ++ } ++} ++ ++ ++#ifdef DASM_CHECKS ++#define CK(x, st) \ ++ do { if (!(x)) { \ ++ D->status = DASM_S_##st|(int)(p-D->actionlist-1); return; } } while (0) ++#define CKPL(kind, st) \ ++ do { if ((size_t)((char *)pl-(char *)D->kind##labels) >= D->kind##size) { \ ++ D->status = DASM_S_RANGE_##st|(int)(p-D->actionlist-1); return; } } while (0) ++#else ++#define CK(x, st) ((void)0) ++#define CKPL(kind, st) ((void)0) ++#endif ++ ++static int dasm_imms(int n) ++{ ++ return (n >= -2048 && n < 2048) ? n : 4096; ++} ++/* Pass 1: Store actions and args, link branches/labels, estimate offsets. */ ++void dasm_put(Dst_DECL, int start, ...) ++{ ++ va_list ap; ++ dasm_State *D = Dst_REF; ++ dasm_ActList p = D->actionlist + start; ++ dasm_Section *sec = D->section; ++ int pos = sec->pos, ofs = sec->ofs; ++ int *b; ++ ++ if (pos >= sec->epos) { ++ DASM_M_GROW(Dst, int, sec->buf, sec->bsize, ++ sec->bsize + 2*DASM_MAXSECPOS*sizeof(int)); ++ sec->rbuf = sec->buf - DASM_POS2BIAS(pos); ++ sec->epos = (int)sec->bsize/sizeof(int) - DASM_MAXSECPOS+DASM_POS2BIAS(pos); ++ } ++ ++ b = sec->rbuf; ++ b[pos++] = start; ++ ++ va_start(ap, start); ++ while (1) { ++ unsigned int ins = *p++; ++ unsigned int action = (ins >> 20); ++ if (action >= DASM__MAX || (ins & 0xf)) { ++ ofs += 4; ++ } else { ++ ins >>= 4; ++ int *pl, n = action >= DASM_REL_PC ? va_arg(ap, int) : 0; ++ switch (action) { ++ case DASM_STOP: goto stop; ++ case DASM_SECTION: ++ n = (ins & 255); CK(n < D->maxsection, RANGE_SEC); ++ D->section = &D->sections[n]; goto stop; ++ case DASM_ESC: p++; ofs += 4; break; ++ case DASM_REL_EXT: break; ++ case DASM_ALIGN: ofs += (ins & 255); b[pos++] = ofs; break; ++ case DASM_REL_LG: ++ n = (ins & 2047) - 10; pl = D->lglabels + n; ++ /* Bkwd rel or global. */ ++ if (n >= 0) { CK(n>=10||*pl<0, RANGE_LG); CKPL(lg, LG); goto putrel; } ++ pl += 10; n = *pl; ++ if (n < 0) n = 0; /* Start new chain for fwd rel if label exists. */ ++ goto linkrel; ++ case DASM_REL_PC: ++ pl = D->pclabels + n; CKPL(pc, PC); ++ putrel: ++ n = *pl; ++ if (n < 0) { /* Label exists. Get label pos and store it. */ ++ b[pos] = -n; ++ } else { ++ linkrel: ++ b[pos] = n; /* Else link to rel chain, anchored at label. */ ++ *pl = pos; ++ } ++ pos++; ++ break; ++ case DASM_LABEL_LG: ++ pl = D->lglabels + (ins & 2047) - 10; CKPL(lg, LG); goto putlabel; ++ case DASM_LABEL_PC: ++ pl = D->pclabels + n; CKPL(pc, PC); ++ putlabel: ++ n = *pl; /* n > 0: Collapse rel chain and replace with label pos. */ ++ while (n > 0) { int *pb = DASM_POS2PTR(D, n); n = *pb; *pb = pos; ++ } ++ *pl = -pos; /* Label exists now. */ ++ b[pos++] = ofs; /* Store pass1 offset estimate. */ ++ break; ++ case DASM_IMM: ++#ifdef DASM_CHECKS ++ CK((n & ((1<<((ins>>10)&31))-1)) == 0, RANGE_I); ++#endif ++ n >>= ((ins>>10)&31); ++#ifdef DASM_CHECKS ++ if (ins & 0x8000) ++ CK(((n + (1<<(((ins>>5)&31)-1)))>>((ins>>5)&31)) == 0, RANGE_I); ++ else ++ CK((n>>((ins>>5)&31)) == 0, RANGE_I); ++#endif ++ b[pos++] = n; ++ break; ++ case DASM_IMMS: ++#ifdef DASM_CHECKS ++ CK(dasm_imms(n) != 4096, RANGE_I); ++#endif ++ b[pos++] = n; ++ break; ++ } ++ } ++ } ++stop: ++ va_end(ap); ++ sec->pos = pos; ++ sec->ofs = ofs; ++} ++#undef CK ++ ++/* Pass 2: Link sections, shrink aligns, fix label offsets. */ ++int dasm_link(Dst_DECL, size_t *szp) ++{ ++ dasm_State *D = Dst_REF; ++ int secnum; ++ int ofs = 0; ++ ++#ifdef DASM_CHECKS ++ *szp = 0; ++ if (D->status != DASM_S_OK) return D->status; ++ { ++ int pc; ++ for (pc = 0; pc*sizeof(int) < D->pcsize; pc++) ++ if (D->pclabels[pc] > 0) return DASM_S_UNDEF_PC|pc; ++ } ++#endif ++ ++ { /* Handle globals not defined in this translation unit. */ ++ int idx; ++ for (idx = 10; idx*sizeof(int) < D->lgsize; idx++) { ++ int n = D->lglabels[idx]; ++ /* Undefined label: Collapse rel chain and replace with marker (< 0). */ ++ while (n > 0) { int *pb = DASM_POS2PTR(D, n); n = *pb; *pb = -idx; } ++ } ++ } ++ ++ /* Combine all code sections. No support for data sections (yet). */ ++ for (secnum = 0; secnum < D->maxsection; secnum++) { ++ dasm_Section *sec = D->sections + secnum; ++ int *b = sec->rbuf; ++ int pos = DASM_SEC2POS(secnum); ++ int lastpos = sec->pos; ++ ++ while (pos != lastpos) { ++ dasm_ActList p = D->actionlist + b[pos++]; ++ while (1) { ++ unsigned int ins = *p++; ++ unsigned int action = (ins >> 20); ++ if (ins & 0xf) continue; else ins >>= 4; ++ switch (action) { ++ case DASM_STOP: case DASM_SECTION: goto stop; ++ case DASM_ESC: p++; break; ++ case DASM_REL_EXT: break; ++ case DASM_ALIGN: ofs -= (b[pos++] + ofs) & (ins & 255); break; ++ case DASM_REL_LG: case DASM_REL_PC: pos++; break; ++ case DASM_LABEL_LG: case DASM_LABEL_PC: b[pos++] += ofs; break; ++ case DASM_IMM: case DASM_IMMS: pos++; break; ++ } ++ } ++ stop: (void)0; ++ } ++ ofs += sec->ofs; /* Next section starts right after current section. */ ++ } ++ ++ D->codesize = ofs; /* Total size of all code sections */ ++ *szp = ofs; ++ return DASM_S_OK; ++} ++ ++#ifdef DASM_CHECKS ++#define CK(x, st) \ ++ do { if (!(x)) return DASM_S_##st|(int)(p-D->actionlist-1); } while (0) ++#else ++#define CK(x, st) ((void)0) ++#endif ++ ++/* Pass 3: Encode sections. */ ++int dasm_encode(Dst_DECL, void *buffer) ++{ ++ dasm_State *D = Dst_REF; ++ char *base = (char *)buffer; ++ unsigned int *cp = (unsigned int *)buffer; ++ int secnum; ++ ++ /* Encode all code sections. No support for data sections (yet). */ ++ for (secnum = 0; secnum < D->maxsection; secnum++) { ++ dasm_Section *sec = D->sections + secnum; ++ int *b = sec->buf; ++ int *endb = sec->rbuf + sec->pos; ++ ++ while (b != endb) { ++ dasm_ActList p = D->actionlist + *b++; ++ while (1) { ++ unsigned int ins = *p++; ++ if (ins & 0xf) { *cp++ = ins; continue; } ++ unsigned int action = (ins >> 20); ++ unsigned int val = (ins >> 4); ++ int n = (action >= DASM_ALIGN && action < DASM__MAX) ? *b++ : 0; ++ switch (action) { ++ case DASM_STOP: case DASM_SECTION: goto stop; ++ case DASM_ESC: *cp++ = *p++; break; ++ case DASM_REL_EXT: ++ n = DASM_EXTERN(Dst, (unsigned char *)cp, (val & 2047), 1); ++ goto patchrel; ++ case DASM_ALIGN: ++ val &= 255; while ((((char *)cp - base) & val)) *cp++ = 0x60000000; ++ break; ++ case DASM_REL_LG: ++ if (n < 0) { ++ n = (int)((ptrdiff_t)D->globals[-n-10] - (ptrdiff_t)cp + 4); ++ goto patchrel; ++ } ++ /* fallthrough */ ++ case DASM_REL_PC: ++ CK(n >= 0, UNDEF_PC); ++ n = *DASM_POS2PTR(D, n) - (int)((char *)cp - base) + 4; ++ patchrel: ++ if (val & 2048) { /* B */ ++ CK((n & 1) == 0 && ((n + 0x1000) >> 13) == 0, RANGE_REL); ++ cp[-1] |= ((n << 19) & 0x80000000) | ((n << 20) & 0x7e000000) ++ | ((n << 7) & 0x00000f00) | ((n >> 4) & 0x00000080); ++ } else { /* J */ ++ CK((n & 1) == 0 && ((n+0x00100000) >> 21) == 0, RANGE_REL); ++ cp[-1] |= ((n << 11) & 0x80000000) | ((n << 20) & 0x7fe00000) ++ | ((n << 9) & 0x00100000) | (n & 0x000ff000); ++ } ++ break; ++ case DASM_LABEL_LG: ++ val &= 2047; if (val >= 20) D->globals[val-20] = (void *)(base + n); ++ break; ++ case DASM_LABEL_PC: break; ++ case DASM_IMM: ++ cp[-1] |= (n & ((1<<((val>>5)&31))-1)) << (val&31); ++ break; ++ case DASM_IMMS: ++ cp[-1] |= (((n << 20) & 0xfe000000) | ((n << 7) & 0x00000f80)); ++ break; ++ default: *cp++ = ins; break; ++ } ++ } ++ stop: (void)0; ++ } ++ } ++ ++ if (base + D->codesize != (char *)cp) /* Check for phase errors. */ ++ return DASM_S_PHASE; ++ return DASM_S_OK; ++} ++#undef CK ++ ++/* Get PC label offset. */ ++int dasm_getpclabel(Dst_DECL, unsigned int pc) ++{ ++ dasm_State *D = Dst_REF; ++ if (pc*sizeof(int) < D->pcsize) { ++ int pos = D->pclabels[pc]; ++ if (pos < 0) return *DASM_POS2PTR(D, -pos); ++ if (pos > 0) return -1; /* Undefined. */ ++ } ++ return -2; /* Unused or out of range. */ ++} ++ ++#ifdef DASM_CHECKS ++/* Optional sanity checker to call between isolated encoding steps. */ ++int dasm_checkstep(Dst_DECL, int secmatch) ++{ ++ dasm_State *D = Dst_REF; ++ if (D->status == DASM_S_OK) { ++ int i; ++ for (i = 1; i <= 9; i++) { ++ if (D->lglabels[i] > 0) { D->status = DASM_S_UNDEF_LG|i; break; } ++ D->lglabels[i] = 0; ++ } ++ } ++ if (D->status == DASM_S_OK && secmatch >= 0 && ++ D->section != &D->sections[secmatch]) ++ D->status = DASM_S_MATCH_SEC|(int)(D->section-D->sections); ++ return D->status; ++} ++#endif ++ +diff --git a/dynasm/dasm_riscv.lua b/dynasm/dasm_riscv.lua +new file mode 100644 +index 000000000..4c8518f16 +--- /dev/null ++++ b/dynasm/dasm_riscv.lua +@@ -0,0 +1,979 @@ ++------------------------------------------------------------------------------ ++-- DynASM RISC-V module. ++-- ++-- Copyright (C) 2005-2025 Mike Pall. All rights reserved. ++-- See dynasm.lua for full copyright notice. ++-- ++-- Contributed by gns from PLCT Lab, ISCAS. ++------------------------------------------------------------------------------ ++ ++local riscv32 = riscv32 ++local riscv64 = riscv64 ++ ++-- Module information: ++local _info = { ++ arch = riscv32 and "riscv32" or riscv64 and "riscv64", ++ description = "DynASM RISC-V module", ++ version = "1.5.0", ++ vernum = 10500, ++ release = "2022-07-12", ++ author = "Mike Pall", ++ license = "MIT", ++} ++ ++-- Exported glue functions for the arch-specific module. ++local _M = { _info = _info } ++ ++-- Cache library functions. ++local type, tonumber, pairs, ipairs = type, tonumber, pairs, ipairs ++local assert, setmetatable = assert, setmetatable ++local _s = string ++local sub, format, byte, char = _s.sub, _s.format, _s.byte, _s.char ++local match, gmatch = _s.match, _s.gmatch ++local concat, sort = table.concat, table.sort ++local bit = bit or require("bit") ++local band, shl, shr, sar = bit.band, bit.lshift, bit.rshift, bit.arshift ++local tohex = bit.tohex ++ ++local function __orderedIndexGen(t) ++ local orderedIndex = {} ++ for key in pairs(t) do ++ table.insert(orderedIndex, key) ++ end ++ table.sort( orderedIndex ) ++ return orderedIndex ++end ++ ++local function __orderedNext(t, state) ++ local key = nil ++ if state == nil then ++ t.__orderedIndex = __orderedIndexGen(t) ++ key = t.__orderedIndex[1] ++ else ++ local j = 0 ++ for _,_ in pairs(t.__orderedIndex) do j = j + 1 end ++ for i = 1, j do ++ if t.__orderedIndex[i] == state then ++ key = t.__orderedIndex[i+1] ++ end ++ end ++ end ++ ++ if key then ++ return key, t[key] ++ end ++ ++ t.__orderedIndex = nil ++ return ++end ++ ++local function opairs(t) ++ return __orderedNext, t, nil ++end ++ ++-- Inherited tables and callbacks. ++local g_opt, g_arch ++local wline, werror, wfatal, wwarn ++ ++-- Action name list. ++-- CHECK: Keep this in sync with the C code! ++local action_names = { ++ "STOP", "SECTION", "ESC", "REL_EXT", ++ "ALIGN", "REL_LG", "LABEL_LG", ++ "REL_PC", "LABEL_PC", "IMM", "IMMS", ++} ++ ++-- Maximum number of section buffer positions for dasm_put(). ++-- CHECK: Keep this in sync with the C code! ++local maxsecpos = 25 -- Keep this low, to avoid excessively long C lines. ++ ++-- Action name -> action number. ++local map_action = {} ++for n,name in ipairs(action_names) do ++ map_action[name] = n-1 ++end ++ ++-- Action list buffer. ++local actlist = {} ++ ++-- Argument list for next dasm_put(). Start with offset 0 into action list. ++local actargs = { 0 } ++ ++-- Current number of section buffer positions for dasm_put(). ++local secpos = 1 ++ ++------------------------------------------------------------------------------ ++ ++-- Dump action names and numbers. ++local function dumpactions(out) ++ out:write("DynASM encoding engine action codes:\n") ++ for n,name in ipairs(action_names) do ++ local num = map_action[name] ++ out:write(format(" %-10s %02X %d\n", name, num, num)) ++ end ++ out:write("\n") ++end ++ ++-- Write action list buffer as a huge static C array. ++local function writeactions(out, name) ++ local nn = #actlist ++ if nn == 0 then nn = 1; actlist[0] = map_action.STOP end ++ out:write("static const unsigned int ", name, "[", nn, "] = {\n") ++ for i = 1,nn-1 do ++ assert(out:write("0x", tohex(actlist[i]), ",\n")) ++ end ++ assert(out:write("0x", tohex(actlist[nn]), "\n};\n\n")) ++end ++ ++------------------------------------------------------------------------------ ++ ++-- Add word to action list. ++local function wputxw(n) ++ assert(n >= 0 and n <= 0xffffffff and n % 1 == 0, "word out of range") ++ actlist[#actlist+1] = n ++end ++ ++-- Add action to list with optional arg. Advance buffer pos, too. ++local function waction(action, val, a, num) ++ local w = assert(map_action[action], "bad action name `"..action.."'") ++ wputxw(w * 0x100000 + (val or 0) * 16) ++ if a then actargs[#actargs+1] = a end ++ if a or num then secpos = secpos + (num or 1) end ++end ++ ++-- Flush action list (intervening C code or buffer pos overflow). ++local function wflush(term) ++ if #actlist == actargs[1] then return end -- Nothing to flush. ++ if not term then waction("STOP") end -- Terminate action list. ++ wline(format("dasm_put(Dst, %s);", concat(actargs, ", ")), true) ++ actargs = { #actlist } -- Actionlist offset is 1st arg to next dasm_put(). ++ secpos = 1 -- The actionlist offset occupies a buffer position, too. ++end ++ ++-- Put escaped word. ++local function wputw(n) ++ if band(n, 0xf) == 0 then waction("ESC") end ++ wputxw(n) ++end ++ ++-- Reserve position for word. ++local function wpos() ++ local pos = #actlist+1 ++ actlist[pos] = "" ++ return pos ++end ++ ++-- Store word to reserved position. ++local function wputpos(pos, n) ++ assert(n >= -0x80000000 and n <= 0xffffffff and n % 1 == 0, "word out of range") ++ actlist[pos] = n ++end ++ ++------------------------------------------------------------------------------ ++ ++-- Global label name -> global label number. With auto assignment on 1st use. ++local next_global = 20 ++local map_global = setmetatable({}, { __index = function(t, name) ++ if not match(name, "^[%a_][%w_]*$") then werror("bad global label") end ++ local n = next_global ++ if n > 2047 then werror("too many global labels") end ++ next_global = n + 1 ++ t[name] = n ++ return n ++end}) ++ ++-- Dump global labels. ++local function dumpglobals(out, lvl) ++ local t = {} ++ for name, n in pairs(map_global) do t[n] = name end ++ out:write("Global labels:\n") ++ for i=20,next_global-1 do ++ out:write(format(" %s\n", t[i])) ++ end ++ out:write("\n") ++end ++ ++-- Write global label enum. ++local function writeglobals(out, prefix) ++ local t = {} ++ for name, n in pairs(map_global) do t[n] = name end ++ out:write("enum {\n") ++ for i=20,next_global-1 do ++ out:write(" ", prefix, t[i], ",\n") ++ end ++ out:write(" ", prefix, "_MAX\n};\n") ++end ++ ++-- Write global label names. ++local function writeglobalnames(out, name) ++ local t = {} ++ for name, n in pairs(map_global) do t[n] = name end ++ out:write("static const char *const ", name, "[] = {\n") ++ for i=20,next_global-1 do ++ out:write(" \"", t[i], "\",\n") ++ end ++ out:write(" (const char *)0\n};\n") ++end ++ ++------------------------------------------------------------------------------ ++ ++-- Extern label name -> extern label number. With auto assignment on 1st use. ++local next_extern = 0 ++local map_extern_ = {} ++local map_extern = setmetatable({}, { __index = function(t, name) ++ -- No restrictions on the name for now. ++ local n = next_extern ++ if n > 2047 then werror("too many extern labels") end ++ next_extern = n + 1 ++ t[name] = n ++ map_extern_[n] = name ++ return n ++end}) ++ ++-- Dump extern labels. ++local function dumpexterns(out, lvl) ++ out:write("Extern labels:\n") ++ for i=0,next_extern-1 do ++ out:write(format(" %s\n", map_extern_[i])) ++ end ++ out:write("\n") ++end ++ ++-- Write extern label names. ++local function writeexternnames(out, name) ++ out:write("static const char *const ", name, "[] = {\n") ++ for i=0,next_extern-1 do ++ out:write(" \"", map_extern_[i], "\",\n") ++ end ++ out:write(" (const char *)0\n};\n") ++end ++ ++------------------------------------------------------------------------------ ++ ++-- Arch-specific maps. ++local map_archdef = { ++ ra = "x1", sp = "x2", ++} -- Ext. register name -> int. name. ++ ++local map_type = {} -- Type name -> { ctype, reg } ++local ctypenum = 0 -- Type number (for Dt... macros). ++ ++-- Reverse defines for registers. ++function _M.revdef(s) ++ if s == "x1" then return "ra" ++ elseif s == "x2" then return "sp" end ++ return s ++end ++ ++------------------------------------------------------------------------------ ++ ++-- Template strings for RISC-V instructions. ++local map_op = {} ++ ++local map_op_rv32imafd = { ++ ++ -- RV32I ++ lui_2 = "00000037DU", ++ auipc_2 = "00000017DA", ++ ++ jal_2 = "0000006fDJ", ++ jalr_3 = "00000067DRJ", ++ -- pseudo-instrs ++ j_1 = "0000006fJ", ++ jal_1 = "000000efJ", ++ jr_1 = "00000067R", ++ jalr_1 = "000000e7R", ++ jalr_2 = "000000e7RJ", ++ ++ beq_3 = "00000063RrB", ++ bne_3 = "00001063RrB", ++ blt_3 = "00004063RrB", ++ bge_3 = "00005063RrB", ++ bltu_3 = "00006063RrB", ++ bgeu_3 = "00007063RrB", ++ -- pseudo-instrs ++ bnez_2 = "00001063RB", ++ beqz_2 = "00000063RB", ++ blez_2 = "00005063rB", ++ bgez_2 = "00005063RB", ++ bltz_2 = "00004063RB", ++ bgtz_2 = "00004063rB", ++ bgt_3 = "00004063rRB", ++ ble_3 = "00005063rRB", ++ bgtu_3 = "00006063rRB", ++ bleu_3 = "00007063rRB", ++ ++ lb_2 = "00000003DL", ++ lh_2 = "00001003DL", ++ lw_2 = "00002003DL", ++ lbu_2 = "00004003DL", ++ lhu_2 = "00005003DL", ++ ++ sb_2 = "00000023rS", ++ sh_2 = "00001023rS", ++ sw_2 = "00002023rS", ++ ++ addi_3 = "00000013DRI", ++ slti_3 = "00002013DRI", ++ sltiu_3 = "00003013DRI", ++ xori_3 = "00004013DRI", ++ ori_3 = "00006013DRI", ++ andi_3 = "00007013DRI", ++ slli_3 = "00001013DRi", ++ srli_3 = "00005013DRi", ++ srai_3 = "40005013DRi", ++ -- pseudo-instrs ++ seqz_2 = "00103013DR", ++ ["zext.b_2"] = "0ff07013DR", ++ ++ add_3 = "00000033DRr", ++ sub_3 = "40000033DRr", ++ sll_3 = "00001033DRr", ++ slt_3 = "00002033DRr", ++ sltu_3 = "00003033DRr", ++ xor_3 = "00004033DRr", ++ srl_3 = "00005033DRr", ++ sra_3 = "40005033DRr", ++ or_3 = "00006033DRr", ++ and_3 = "00007033DRr", ++ -- pseudo-instrs ++ snez_2 = "00003033Dr", ++ sltz_2 = "00002033DR", ++ sgtz_2 = "00002033Dr", ++ ++ ecall_0 = "00000073", ++ ebreak_0 = "00100073", ++ ++ nop_0 = "00000013", ++ li_2 = "00000013DI", ++ mv_2 = "00000013DR", ++ not_2 = "fff04013DR", ++ neg_2 = "40000033Dr", ++ ret_0 = "00008067", ++ ++ -- RV32M ++ mul_3 = "02000033DRr", ++ mulh_3 = "02001033DRr", ++ mulhsu_3 = "02002033DRr", ++ mulhu_3 = "02003033DRr", ++ div_3 = "02004033DRr", ++ divu_3 = "02005033DRr", ++ rem_3 = "02006033DRr", ++ remu_3 = "02007033DRr", ++ ++ -- RV32A ++ ["lr.w_2"] = "c0000053FR", ++ ["sc.w_2"] = "c0001053FRr", ++ ["amoswap.w_3"] = "c0002053FRr", ++ ["amoadd.w_3"] = "c0003053FRr", ++ ["amoxor.w_3"] = "c0004053FRr", ++ ["amoor.w_3"] = "c0005053FRr", ++ ["amoand.w_3"] = "c0006053FRr", ++ ["amomin.w_3"] = "c0007053FRr", ++ ["amomax.w_3"] = "c0008053FRr", ++ ["amominu.w_3"] = "c0009053FRr", ++ ["amomaxu.w_3"] = "c000a053FRr", ++ ++ -- RV32F ++ ["flw_2"] = "00002007FL", ++ ["fsw_2"] = "00002027gS", ++ ++ ["fmadd.s_4"] = "00000043FGgH", ++ ["fmsub.s_4"] = "00000047FGgH", ++ ["fnmsub.s_4"] = "0000004bFGgH", ++ ["fnmadd.s_4"] = "0000004fFGgH", ++ ["fmadd.s_5"] = "00000043FGgHM", ++ ["fmsub.s_5"] = "00000047FGgHM", ++ ["fnmsub.s_5"] = "0000004bFGgHM", ++ ["fnmadd.s_5"] = "0000004fFGgHM", ++ ++ ["fadd.s_3"] = "00000053FGg", ++ ["fsub.s_3"] = "08000053FGg", ++ ["fmul.s_3"] = "10000053FGg", ++ ["fdiv.s_3"] = "18000053FGg", ++ ["fsqrt.s_2"] = "58000053FG", ++ ["fadd.s_4"] = "00000053FGgM", ++ ["fsub.s_4"] = "08000053FGgM", ++ ["fmul.s_4"] = "10000053FGgM", ++ ["fdiv.s_4"] = "18000053FGgM", ++ ["fsqrt.s_3"] = "58000053FGM", ++ ++ ["fsgnj.s_3"] = "20000053FGg", ++ ["fsgnjn.s_3"] = "20001053FGg", ++ ["fsgnjx.s_3"] = "20002053FGg", ++ ++ ["fmin.s_3"] = "28000053FGg", ++ ["fmax.s_3"] = "28001053FGg", ++ ++ ["fcvt.w.s_2"] = "c0000053DG", ++ ["fcvt.wu.s_2"] = "c0100053DG", ++ ["fcvt.w.s_3"] = "c0000053DGM", ++ ["fcvt.wu.s_3"] = "c0100053DGM", ++ ["fmv.x.w_2"] = "e0000053DG", ++ ++ ["feq.s_3"] = "a0002053DGg", ++ ["flt.s_3"] = "a0001053DGg", ++ ["fle.s_3"] = "a0000053DGg", ++ ++ ["fclass.s_2"] = "e0001053DG", ++ ++ ["fcvt.s.w_2"] = "d0000053FR", ++ ["fcvt.s.wu_2"] = "d0100053FR", ++ ["fcvt.s.w_3"] = "d0000053FRM", ++ ["fcvt.s.wu_3"] = "d0100053FRM", ++ ["fmv.w.x_2"] = "f0000053FR", ++ ++ -- RV32D ++ ["fld_2"] = "00003007FL", ++ ["fsd_2"] = "00003027gS", ++ ++ ["fmadd.d_4"] = "02000043FGgH", ++ ["fmsub.d_4"] = "02000047FGgH", ++ ["fnmsub.d_4"] = "0200004bFGgH", ++ ["fnmadd.d_4"] = "0200004fFGgH", ++ ["fmadd.d_5"] = "02000043FGgHM", ++ ["fmsub.d_5"] = "02000047FGgHM", ++ ["fnmsub.d_5"] = "0200004bFGgHM", ++ ["fnmadd.d_5"] = "0200004fFGgHM", ++ ++ ["fadd.d_3"] = "02000053FGg", ++ ["fsub.d_3"] = "0a000053FGg", ++ ["fmul.d_3"] = "12000053FGg", ++ ["fdiv.d_3"] = "1a000053FGg", ++ ["fsqrt.d_2"] = "5a000053FG", ++ ["fadd.d_4"] = "02000053FGgM", ++ ["fsub.d_4"] = "0a000053FGgM", ++ ["fmul.d_4"] = "12000053FGgM", ++ ["fdiv.d_4"] = "1a000053FGgM", ++ ["fsqrt.d_3"] = "5a000053FGM", ++ ++ ["fsgnj.d_3"] = "22000053FGg", ++ ["fsgnjn.d_3"] = "22001053FGg", ++ ["fsgnjx.d_3"] = "22002053FGg", ++ ["fmin.d_3"] = "2a000053FGg", ++ ["fmax.d_3"] = "2a001053FGg", ++ ["fcvt.s.d_2"] = "40100053FG", ++ ["fcvt.d.s_2"] = "42000053FG", ++ ["feq.d_3"] = "a2002053DGg", ++ ["flt.d_3"] = "a2001053DGg", ++ ["fle.d_3"] = "a2000053DGg", ++ ["fclass.d_2"] = "e2001053DG", ++ ["fcvt.w.d_2"] = "c2000053DG", ++ ["fcvt.wu.d_2"] = "c2100053DG", ++ ["fcvt.d.w_2"] = "d2000053FR", ++ ["fcvt.d.wu_2"] = "d2100053FR", ++ ["fcvt.w.d_3"] = "c2000053DGM", ++ ["fcvt.wu.d_3"] = "c2100053DGM", ++ ["fcvt.d.w_3"] = "d2000053FRM", ++ ["fcvt.d.wu_3"] = "d2100053FRM", ++ ++ ["fmv.d_2"] = "22000053FY", ++ ["fneg.d_2"] = "22001053FY", ++ ["fabs.d_2"] = "22002053FY", ++ ++} ++ ++local map_op_rv64imafd = { ++ ++ -- RV64I ++ lwu_2 = "00006003DL", ++ ld_2 = "00003003DL", ++ ++ sd_2 = "00003023rS", ++ ++ slli_3 = "00001013DRj", ++ srli_3 = "00005013DRj", ++ srai_3 = "40005013DRj", ++ ++ addiw_3 = "0000001bDRI", ++ slliw_3 = "0000101bDRi", ++ srliw_3 = "0000501bDRi", ++ sraiw_3 = "4000501bDRi", ++ ++ addw_3 = "0000003bDRr", ++ subw_3 = "4000003bDRr", ++ sllw_3 = "0000103bDRr", ++ srlw_3 = "0000503bDRr", ++ sraw_3 = "4000503bDRr", ++ ++ negw_2 = "4000003bDr", ++ ["sext.w_2"] = "0000001bDR", ++ ++ -- RV64M ++ mulw_3 = "0200003bDRr", ++ divw_3 = "0200403bDRr", ++ divuw_3 = "0200503bDRr", ++ remw_3 = "0200603bDRr", ++ remuw_3 = "0200703bDRr", ++ ++ -- RV64A ++ ["lr.d_2"] = "c2000053FR", ++ ["sc.d_2"] = "c2001053FRr", ++ ["amoswap.d_3"] = "c2002053FRr", ++ ["amoadd.d_3"] = "c2003053FRr", ++ ["amoxor.d_3"] = "c2004053FRr", ++ ["amoor.d_3"] = "c2005053FRr", ++ ["amoand.d_3"] = "c2006053FRr", ++ ["amomin.d_3"] = "c2007053FRr", ++ ["amomax.d_3"] = "c2008053FRr", ++ ["amominu.d_3"] = "c2009053FRr", ++ ["amomaxu.d_3"] = "c200a053FRr", ++ ++ -- RV64F ++ ["fcvt.l.s_2"] = "c0200053DG", ++ ["fcvt.lu.s_2"] = "c0300053DG", ++ ["fcvt.l.s_3"] = "c0200053DGM", ++ ["fcvt.lu.s_3"] = "c0300053DGM", ++ ["fcvt.s.l_2"] = "d0200053FR", ++ ["fcvt.s.lu_2"] = "d0300053FR", ++ ["fcvt.s.l_3"] = "d0200053FRM", ++ ["fcvt.s.lu_3"] = "d0300053FRM", ++ ++ -- RV64D ++ ["fcvt.l.d_2"] = "c2200053DG", ++ ["fcvt.lu.d_2"] = "c2300053DG", ++ ["fcvt.l.d_3"] = "c2200053DGM", ++ ["fcvt.lu.d_3"] = "c2300053DGM", ++ ["fmv.x.d_2"] = "e2000053DG", ++ ["fcvt.d.l_2"] = "d2200053FR", ++ ["fcvt.d.lu_2"] = "d2300053FR", ++ ["fcvt.d.l_3"] = "d2200053FRM", ++ ["fcvt.d.lu_3"] = "d2300053FRM", ++ ["fmv.d.x_2"] = "f2000053FR", ++ ++} ++ ++local map_op_zicsr = { ++ csrrw_3 = "00001073DCR", ++ csrrs_3 = "00002073DCR", ++ csrrc_3 = "00003073DCR", ++ csrrwi_3 = "00005073DCu", ++ csrrsi_3 = "00006073DCu", ++ csrrci_3 = "00007073DCu", ++ ++ -- pseudo-ops ++ csrrw_2 = "00001073DC", ++ csrrs_2 = "00002073CR", ++ csrrc_2 = "00003073CR", ++ csrrwi_2 = "00005073Cu", ++ csrrsi_2 = "00006073Cu", ++ csrrci_2 = "00007073Cu", ++ ++ rdinstret_1 = "C0202073D", ++ rdcycle_1 = "C0002073D", ++ rdtime_1 = "C0102073D", ++ rdinstreth_1 = "C8202073D", ++ rdcycleh_1 = "C8002073D", ++ rdtimeh_1 = "C8102073D", ++ ++ frcsr_1 = "00302073D", ++ fscsr_2 = "00301073DR", ++ fscsr_1 = "00301073R", ++ frrm_1 = "00202073D", ++ fsrm_2 = "00201073DR", ++ fsrm_1 = "00201073R", ++ fsrmi_2 = "00205073Du", ++ fsrmi_1 = "00205073u", ++ frflags_1 = "00102073D", ++ fsflags_2 = "00101073DR", ++ fsflagsi_2 = "00105073Du", ++ fsflagsi_1 = "00105073u", ++} ++ ++local map_op_zifencei = { ++ ["fence.i_3"] = "0000100fDRI", ++} ++ ++local list_map_op_rv32 = { ['a'] = map_op_rv32imafd, ['b'] = map_op_zifencei, ['c'] = map_op_zicsr } ++local list_map_op_rv64 = { ['a'] = map_op_rv32imafd, ['b'] = map_op_rv64imafd, ['c'] = map_op_zifencei, ['d'] = map_op_zicsr } ++ ++if riscv32 then for _, map in opairs(list_map_op_rv32) do ++ for k, v in pairs(map) do map_op[k] = v end ++ end ++end ++if riscv64 then for _, map in opairs(list_map_op_rv64) do ++ for k, v in pairs(map) do map_op[k] = v end ++ end ++end ++ ++------------------------------------------------------------------------------ ++ ++local function parse_gpr(expr) ++ local tname, ovreg = match(expr, "^([%w_]+):(x[1-3]?[0-9])$") ++ local tp = map_type[tname or expr] ++ if tp then ++ local reg = ovreg or tp.reg ++ if not reg then ++ werror("type `"..(tname or expr).."' needs a register override") ++ end ++ expr = reg ++ end ++ local r = match(expr, "^x([1-3]?[0-9])$") ++ if r then ++ r = tonumber(r) ++ if r <= 31 then return r, tp end ++ end ++ werror("bad register name `"..expr.."'") ++end ++ ++local function parse_fpr(expr) ++ local r = match(expr, "^f([1-3]?[0-9])$") ++ if r then ++ r = tonumber(r) ++ if r <= 31 then return r end ++ end ++ werror("bad register name `"..expr.."'") ++end ++ ++local function parse_imm(imm, bits, shift, scale, signed, action) ++ local n = tonumber(imm) ++ if n then ++ local m = sar(n, scale) ++ if shl(m, scale) == n then ++ if signed then ++ local s = sar(m, bits-1) ++ if s == 0 then return shl(m, shift) ++ elseif s == -1 then return shl(m + shl(1, bits), shift) end ++ else ++ if sar(m, bits) == 0 then return shl(m, shift) end ++ end ++ end ++ werror("out of range immediate `"..imm.."'") ++ elseif match(imm, "^[xf]([1-3]?[0-9])$") or ++ match(imm, "^([%w_]+):([xf][1-3]?[0-9])$") then ++ werror("expected immediate operand, got register") ++ else ++ waction(action or "IMM", ++ (signed and 32768 or 0)+shl(scale, 10)+shl(bits, 5)+shift, imm) ++ return 0 ++ end ++end ++ ++local function parse_csr(expr) ++ local r = match(expr, "^([1-4]?[0-9]?[0-9]?[0-9])$") ++ if r then ++ r = tonumber(r) ++ if r <= 4095 then return r end ++ end ++ werror("bad register name `"..expr.."'") ++end ++ ++local function parse_imms(imm) ++ local n = tonumber(imm) ++ if n then ++ if n >= -2048 and n < 2048 then ++ local imm5, imm7 = band(n, 0x1f), shr(band(n, 0xfe0), 5) ++ return shl(imm5, 7) + shl(imm7, 25) ++ end ++ werror("out of range immediate `"..imm.."'") ++ elseif match(imm, "^[xf]([1-3]?[0-9])$") or ++ match(imm, "^([%w_]+):([xf][1-3]?[0-9])$") then ++ werror("expected immediate operand, got register") ++ else ++ waction("IMMS", 0, imm); return 0 ++ end ++end ++ ++local function parse_rm(mode) ++ local rnd_mode = { ++ rne = 0, rtz = 1, rdn = 2, rup = 3, rmm = 4, dyn = 7 ++ } ++ local n = rnd_mode[mode] ++ if n then return n ++ else werror("bad rounding mode `"..mode.."'") end ++end ++ ++local function parse_disp(disp, mode) ++ local imm, reg = match(disp, "^(.*)%(([%w_:]+)%)$") ++ if imm then ++ local r = shl(parse_gpr(reg), 15) ++ local extname = match(imm, "^extern%s+(%S+)$") ++ if extname then ++ waction("REL_EXT", map_extern[extname], nil, 1) ++ return r ++ else ++ if mode == "load" then ++ return r + parse_imm(imm, 12, 20, 0, true) ++ elseif mode == "store" then ++ return r + parse_imms(imm) ++ else ++ werror("bad displacement mode '"..mode.."'") ++ end ++ end ++ end ++ local reg, tailr = match(disp, "^([%w_:]+)%s*(.*)$") ++ if reg and tailr ~= "" then ++ local r, tp = parse_gpr(reg) ++ if tp then ++ if mode == "load" then ++ waction("IMM", 32768+12*32+20, format(tp.ctypefmt, tailr)) ++ elseif mode == "store" then ++ waction("IMMS", 0, format(tp.ctypefmt, tailr)) ++ else ++ werror("bad displacement mode '"..mode.."'") ++ end ++ return shl(r, 15) ++ end ++ end ++ werror("bad displacement `"..disp.."'") ++end ++ ++local function parse_label(label, def) ++ local prefix = sub(label, 1, 2) ++ -- =>label (pc label reference) ++ if prefix == "=>" then ++ return "PC", 0, sub(label, 3) ++ end ++ -- ->name (global label reference) ++ if prefix == "->" then ++ return "LG", map_global[sub(label, 3)] ++ end ++ if def then ++ -- [1-9] (local label definition) ++ if match(label, "^[1-9]$") then ++ return "LG", 10+tonumber(label) ++ end ++ else ++ -- [<>][1-9] (local label reference) ++ local dir, lnum = match(label, "^([<>])([1-9])$") ++ if dir then -- Fwd: 1-9, Bkwd: 11-19. ++ return "LG", lnum + (dir == ">" and 0 or 10) ++ end ++ -- extern label (extern label reference) ++ local extname = match(label, "^extern%s+(%S+)$") ++ if extname then ++ return "EXT", map_extern[extname] ++ end ++ end ++ werror("bad label `"..label.."'") ++end ++ ++------------------------------------------------------------------------------ ++ ++-- Handle opcodes defined with template strings. ++map_op[".template__"] = function(params, template, nparams) ++ if not params then return sub(template, 9) end ++ local op = tonumber(sub(template, 1, 8), 16) ++ local n = 1 ++ ++ -- Limit number of section buffer positions used by a single dasm_put(). ++ -- A single opcode needs a maximum of 2 positions (ins/ext). ++ if secpos+2 > maxsecpos then wflush() end ++ local pos = wpos() ++ ++ -- Process each character. ++ for p in gmatch(sub(template, 9), ".") do ++ if p == "D" then -- gpr rd ++ op = op + shl(parse_gpr(params[n]), 7); n = n + 1 ++ elseif p == "R" then -- gpr rs1 ++ op = op + shl(parse_gpr(params[n]), 15); n = n + 1 ++ elseif p == "r" then -- gpr rs2 ++ op = op + shl(parse_gpr(params[n]), 20); n = n + 1 ++ elseif p == "F" then -- fpr rd ++ op = op + shl(parse_fpr(params[n]), 7); n = n + 1 ++ elseif p == "G" then -- fpr rs1 ++ op = op + shl(parse_fpr(params[n]), 15); n = n + 1 ++ elseif p == "g" then -- fpr rs2 ++ op = op + shl(parse_fpr(params[n]), 20); n = n + 1 ++ elseif p == "H" then -- fpr rs3 ++ op = op + shl(parse_fpr(params[n]), 27); n = n + 1 ++ elseif p == "C" then -- csr ++ op = op + shl(parse_csr(params[n]), 20); n = n + 1 ++ elseif p == "M" then -- fpr rounding mode ++ op = op + shl(parse_rm(params[n]), 12); n = n + 1 ++ elseif p == "Y" then -- fpr psuedo-op ++ local r = parse_fpr(params[n]) ++ op = op + shl(r, 15) + shl(r, 20); n = n + 1 ++ elseif p == "I" then -- I-type imm12 ++ op = op + parse_imm(params[n], 12, 20, 0, true); n = n + 1 ++ elseif p == "i" then -- I-type shamt5 ++ op = op + parse_imm(params[n], 5, 20, 0, false); n = n + 1 ++ elseif p == "j" then -- I-type shamt6 ++ op = op + parse_imm(params[n], 6, 20, 0, false); n = n + 1 ++ elseif p == "u" then -- I-type uimm ++ op = op + parse_imm(params[n], 5, 15, 0, false); n = n + 1 ++ elseif p == "U" then -- U-type imm20 ++ op = op + parse_imm(params[n], 20, 12, 0, false); n = n + 1 ++ elseif p == "L" then -- load ++ op = op + parse_disp(params[n], "load"); n = n + 1 ++ elseif p == "S" then -- store ++ op = op + parse_disp(params[n], "store"); n = n + 1 ++ elseif p == "B" or p == "J" then -- control flow ++ local mode, m, s = parse_label(params[n], false) ++ if p == "B" then m = m + 2048 end ++ waction("REL_"..mode, m, s, 1); n = n + 1 ++ elseif p == "A" then -- AUIPC ++ local mode, m, s = parse_label(params[n], false) ++ waction("REL_"..mode, m, s, 1); n = n + 1 ++ else ++ assert(false) ++ end ++ end ++ wputpos(pos, op) ++end ++ ++------------------------------------------------------------------------------ ++ ++-- Pseudo-opcode to mark the position where the action list is to be emitted. ++map_op[".actionlist_1"] = function(params) ++ if not params then return "cvar" end ++ local name = params[1] -- No syntax check. You get to keep the pieces. ++ wline(function(out) writeactions(out, name) end) ++end ++ ++-- Pseudo-opcode to mark the position where the global enum is to be emitted. ++map_op[".globals_1"] = function(params) ++ if not params then return "prefix" end ++ local prefix = params[1] -- No syntax check. You get to keep the pieces. ++ wline(function(out) writeglobals(out, prefix) end) ++end ++ ++-- Pseudo-opcode to mark the position where the global names are to be emitted. ++map_op[".globalnames_1"] = function(params) ++ if not params then return "cvar" end ++ local name = params[1] -- No syntax check. You get to keep the pieces. ++ wline(function(out) writeglobalnames(out, name) end) ++end ++ ++-- Pseudo-opcode to mark the position where the extern names are to be emitted. ++map_op[".externnames_1"] = function(params) ++ if not params then return "cvar" end ++ local name = params[1] -- No syntax check. You get to keep the pieces. ++ wline(function(out) writeexternnames(out, name) end) ++end ++ ++------------------------------------------------------------------------------ ++ ++-- Label pseudo-opcode (converted from trailing colon form). ++map_op[".label_1"] = function(params) ++ if not params then return "[1-9] | ->global | =>pcexpr" end ++ if secpos+1 > maxsecpos then wflush() end ++ local mode, n, s = parse_label(params[1], true) ++ if mode == "EXT" then werror("bad label definition") end ++ waction("LABEL_"..mode, n, s, 1) ++end ++ ++------------------------------------------------------------------------------ ++ ++-- Pseudo-opcodes for data storage. ++map_op[".long_*"] = function(params) ++ if not params then return "imm..." end ++ for _,p in ipairs(params) do ++ local n = tonumber(p) ++ if not n then werror("bad immediate `"..p.."'") end ++ if n < 0 then n = n + 2^32 end ++ wputw(n) ++ if secpos+2 > maxsecpos then wflush() end ++ end ++end ++ ++-- Alignment pseudo-opcode. ++map_op[".align_1"] = function(params) ++ if not params then return "numpow2" end ++ if secpos+1 > maxsecpos then wflush() end ++ local align = tonumber(params[1]) ++ if align then ++ local x = align ++ -- Must be a power of 2 in the range (2 ... 256). ++ for i=1,8 do ++ x = x / 2 ++ if x == 1 then ++ waction("ALIGN", align-1, nil, 1) -- Action byte is 2**n-1. ++ return ++ end ++ end ++ end ++ werror("bad alignment") ++end ++ ++------------------------------------------------------------------------------ ++ ++-- Pseudo-opcode for (primitive) type definitions (map to C types). ++map_op[".type_3"] = function(params, nparams) ++ if not params then ++ return nparams == 2 and "name, ctype" or "name, ctype, reg" ++ end ++ local name, ctype, reg = params[1], params[2], params[3] ++ if not match(name, "^[%a_][%w_]*$") then ++ werror("bad type name `"..name.."'") ++ end ++ local tp = map_type[name] ++ if tp then ++ werror("duplicate type `"..name.."'") ++ end ++ -- Add #type to defines. A bit unclean to put it in map_archdef. ++ map_archdef["#"..name] = "sizeof("..ctype..")" ++ -- Add new type and emit shortcut define. ++ local num = ctypenum + 1 ++ map_type[name] = { ++ ctype = ctype, ++ ctypefmt = format("Dt%X(%%s)", num), ++ reg = reg, ++ } ++ wline(format("#define Dt%X(_V) (int)(ptrdiff_t)&(((%s *)0)_V)", num, ctype)) ++ ctypenum = num ++end ++map_op[".type_2"] = map_op[".type_3"] ++ ++-- Dump type definitions. ++local function dumptypes(out, lvl) ++ local t = {} ++ for name in pairs(map_type) do t[#t+1] = name end ++ sort(t) ++ out:write("Type definitions:\n") ++ for _,name in ipairs(t) do ++ local tp = map_type[name] ++ local reg = tp.reg or "" ++ out:write(format(" %-20s %-20s %s\n", name, tp.ctype, reg)) ++ end ++ out:write("\n") ++end ++ ++------------------------------------------------------------------------------ ++ ++-- Set the current section. ++function _M.section(num) ++ waction("SECTION", num) ++ wflush(true) -- SECTION is a terminal action. ++end ++ ++------------------------------------------------------------------------------ ++ ++-- Dump architecture description. ++function _M.dumparch(out) ++ out:write(format("DynASM %s version %s, released %s\n\n", ++ _info.arch, _info.version, _info.release)) ++ dumpactions(out) ++end ++ ++-- Dump all user defined elements. ++function _M.dumpdef(out, lvl) ++ dumptypes(out, lvl) ++ dumpglobals(out, lvl) ++ dumpexterns(out, lvl) ++end ++ ++------------------------------------------------------------------------------ ++ ++-- Pass callbacks from/to the DynASM core. ++function _M.passcb(wl, we, wf, ww) ++ wline, werror, wfatal, wwarn = wl, we, wf, ww ++ return wflush ++end ++ ++-- Setup the arch-specific module. ++function _M.setup(arch, opt) ++ g_arch, g_opt = arch, opt ++end ++ ++-- Merge the core maps and the arch-specific maps. ++function _M.mergemaps(map_coreop, map_def) ++ setmetatable(map_op, { __index = map_coreop }) ++ setmetatable(map_def, { __index = map_archdef }) ++ return map_op, map_def ++end ++ ++return _M ++ ++------------------------------------------------------------------------------ ++ +diff --git a/dynasm/dasm_riscv32.lua b/dynasm/dasm_riscv32.lua +new file mode 100644 +index 000000000..f194ce1dc +--- /dev/null ++++ b/dynasm/dasm_riscv32.lua +@@ -0,0 +1,12 @@ ++------------------------------------------------------------------------------ ++-- DynASM RISC-V 32 module. ++-- ++-- Copyright (C) 2005-2025 Mike Pall. All rights reserved. ++-- See dynasm.lua for full copyright notice. ++------------------------------------------------------------------------------ ++-- This module just sets 32 bit mode for the combined RISC-V module. ++-- All the interesting stuff is there. ++------------------------------------------------------------------------------ ++ ++riscv32 = true -- Using a global is an ugly, but effective solution. ++return require("dasm_riscv") +diff --git a/dynasm/dasm_riscv64.lua b/dynasm/dasm_riscv64.lua +new file mode 100644 +index 000000000..25274395d +--- /dev/null ++++ b/dynasm/dasm_riscv64.lua +@@ -0,0 +1,12 @@ ++------------------------------------------------------------------------------ ++-- DynASM RISC-V 64 module. ++-- ++-- Copyright (C) 2005-2025 Mike Pall. All rights reserved. ++-- See dynasm.lua for full copyright notice. ++------------------------------------------------------------------------------ ++-- This module just sets 64 bit mode for the combined RISC-V module. ++-- All the interesting stuff is there. ++------------------------------------------------------------------------------ ++ ++riscv64 = true -- Using a global is an ugly, but effective solution. ++return require("dasm_riscv") + +From 8ea7722fe97496df6634df25f34e2c9bad12b95c Mon Sep 17 00:00:00 2001 +From: gns +Date: Tue, 5 Mar 2024 18:03:33 +0800 +Subject: [PATCH 03/22] riscv(interp): add register definition + +--- + src/vm_riscv64.dasc | 81 +++++++++++++++++++++++++++++++++++++++++++++ + 1 file changed, 81 insertions(+) + create mode 100644 src/vm_riscv64.dasc + +diff --git a/src/vm_riscv64.dasc b/src/vm_riscv64.dasc +new file mode 100644 +index 000000000..41c14eebb +--- /dev/null ++++ b/src/vm_riscv64.dasc +@@ -0,0 +1,81 @@ ++|// Low-level VM code for RISC-V 64 CPUs. ++|// Bytecode interpreter, fast functions and helper functions. ++|// Copyright (C) 2005-2025 Mike Pall. See Copyright Notice in luajit.h ++|// ++|// Contributed by gns from PLCT Lab, ISCAS. ++| ++|.arch riscv64 ++|.section code_op, code_sub ++| ++|.actionlist build_actionlist ++|.globals GLOB_ ++|.globalnames globnames ++|.externnames extnames ++| ++|// Note: The ragged indentation of the instructions is intentional. ++|// The starting columns indicate data dependencies. ++| ++|//----------------------------------------------------------------------- ++| ++|// Fixed register assignments for the interpreter. ++|// Don't use: x0 = 0, x1 = ra, x2 = sp, x3 = gp, x4 = tp ++| ++| ++|// The following must be C callee-save (but BASE is often refetched). ++|.define BASE, x18 // Base of current Lua stack frame. ++|.define KBASE, x19 // Constants of current Lua function. ++|.define PC, x20 // Next PC. ++|.define GLREG, x21 // Global state. ++|.define DISPATCH, x22 // Opcode dispatch table. ++|.define LREG, x23 // Register holding lua_State (also in SAVE_L). ++|.define MULTRES, x24 // Size of multi-result: (nresults+1)*8. ++| ++|// Constants for type-comparisons, stores and conversions. C callee-save. ++|.define TISNIL, x8 ++|.define TISNUM, x25 ++|.define TOBIT, f27 // 2^52 + 2^51. ++| ++|// The following temporaries are not saved across C calls, except for RA. ++|.define RA, x9 // Callee-save. ++|.define RB, x14 ++|.define RC, x15 ++|.define RD, x16 ++|.define INS, x17 ++| ++|.define TMP0, x6 ++|.define TMP1, x7 ++|.define TMP2, x28 ++|.define TMP3, x29 ++|.define TMP4, x30 ++| ++|// RISC-V lp64d calling convention. ++|.define CFUNCADDR, x5 ++|.define CARG1, x10 ++|.define CARG2, x11 ++|.define CARG3, x12 ++|.define CARG4, x13 ++|.define CARG5, x14 ++|.define CARG6, x15 ++|.define CARG7, x16 ++|.define CARG8, x17 ++| ++|.define CRET1, x10 ++|.define CRET2, x11 ++| ++|.define FARG1, f10 ++|.define FARG2, f11 ++|.define FARG3, f12 ++|.define FARG4, f13 ++|.define FARG5, f14 ++|.define FARG6, f15 ++|.define FARG7, f16 ++|.define FARG8, f17 ++| ++|.define FRET1, f10 ++|.define FRET2, f11 ++| ++|.define FTMP0, f0 ++|.define FTMP1, f1 ++|.define FTMP2, f2 ++|.define FTMP3, f3 ++|.define FTMP4, f4 + +From 2ed44d34e70e5c96ce9c0d1fa61b1a7b5fbddff6 Mon Sep 17 00:00:00 2001 +From: gns +Date: Tue, 5 Mar 2024 18:05:22 +0800 +Subject: [PATCH 04/22] riscv(interp): add frame definition + +--- + src/lj_frame.h | 9 +++++ + src/vm_riscv64.dasc | 83 +++++++++++++++++++++++++++++++++++++++++++++ + 2 files changed, 92 insertions(+) + +diff --git a/src/lj_frame.h b/src/lj_frame.h +index bcc14fadd..ff04d0df9 100644 +--- a/src/lj_frame.h ++++ b/src/lj_frame.h +@@ -287,6 +287,15 @@ enum { LJ_CONT_TAILCALL, LJ_CONT_FFI_CALLBACK }; /* Special continuations. */ + ** need to change to 3. + */ + #define CFRAME_SHIFT_MULTRES 0 ++#elif LJ_TARGET_RISCV64 ++#define CFRAME_OFS_ERRF 252 ++#define CFRAME_OFS_NRES 248 ++#define CFRAME_OFS_PREV 240 ++#define CFRAME_OFS_L 232 ++#define CFRAME_OFS_PC 224 ++#define CFRAME_OFS_MULTRES 0 ++#define CFRAME_SIZE 256 ++#define CFRAME_SHIFT_MULTRES 3 + #else + #error "Missing CFRAME_* definitions for this architecture" + #endif +diff --git a/src/vm_riscv64.dasc b/src/vm_riscv64.dasc +index 41c14eebb..63977f5f6 100644 +--- a/src/vm_riscv64.dasc ++++ b/src/vm_riscv64.dasc +@@ -79,3 +79,86 @@ + |.define FTMP2, f2 + |.define FTMP3, f3 + |.define FTMP4, f4 ++| ++|// Stack layout while in interpreter. Must match with lj_frame.h. ++|// RISC-V 64 lp64d. ++| ++|.define CFRAME_SPACE, 256 // Delta for sp. ++| ++|//----- 16 byte aligned, <-- sp entering interpreter ++|.define SAVE_ERRF, 252 // 32 bit values. ++|.define SAVE_NRES, 248 ++|.define SAVE_CFRAME, 240 // 64 bit values. ++|.define SAVE_L, 232 ++|.define SAVE_PC, 224 ++|//----- 16 byte aligned ++|// Padding 216 ++|.define SAVE_GPR_, 112 // .. 112+13*8: 64 bit GPR saves. ++|.define SAVE_FPR_, 16 // .. 16+12*8: 64 bit FPR saves. ++| ++| ++|.define TMPD, 0 ++|//----- 16 byte aligned ++| ++|.define TMPD_OFS, 0 ++| ++|//----------------------------------------------------------------------- ++| ++|.macro saveregs ++| addi sp, sp, -CFRAME_SPACE ++| fsd f27, SAVE_FPR_+11*8(sp) ++| fsd f26, SAVE_FPR_+10*8(sp) ++| fsd f25, SAVE_FPR_+9*8(sp) ++| fsd f24, SAVE_FPR_+8*8(sp) ++| fsd f23, SAVE_FPR_+7*8(sp) ++| fsd f22, SAVE_FPR_+6*8(sp) ++| fsd f21, SAVE_FPR_+5*8(sp) ++| fsd f20, SAVE_FPR_+4*8(sp) ++| fsd f19, SAVE_FPR_+3*8(sp) ++| fsd f18, SAVE_FPR_+2*8(sp) ++| fsd f9, SAVE_FPR_+1*8(sp) ++| fsd f8, SAVE_FPR_+0*8(sp) ++| sd ra, SAVE_GPR_+12*8(sp) ++| sd x27, SAVE_GPR_+11*8(sp) ++| sd x26, SAVE_GPR_+10*8(sp) ++| sd x25, SAVE_GPR_+9*8(sp) ++| sd x24, SAVE_GPR_+8*8(sp) ++| sd x23, SAVE_GPR_+7*8(sp) ++| sd x22, SAVE_GPR_+6*8(sp) ++| sd x21, SAVE_GPR_+5*8(sp) ++| sd x20, SAVE_GPR_+4*8(sp) ++| sd x19, SAVE_GPR_+3*8(sp) ++| sd x18, SAVE_GPR_+2*8(sp) ++| sd x9, SAVE_GPR_+1*8(sp) ++| sd x8, SAVE_GPR_+0*8(sp) ++|.endmacro ++| ++|.macro restoreregs_ret ++| ld ra, SAVE_GPR_+12*8(sp) ++| ld x27, SAVE_GPR_+11*8(sp) ++| ld x26, SAVE_GPR_+10*8(sp) ++| ld x25, SAVE_GPR_+9*8(sp) ++| ld x24, SAVE_GPR_+8*8(sp) ++| ld x23, SAVE_GPR_+7*8(sp) ++| ld x22, SAVE_GPR_+6*8(sp) ++| ld x21, SAVE_GPR_+5*8(sp) ++| ld x20, SAVE_GPR_+4*8(sp) ++| ld x19, SAVE_GPR_+3*8(sp) ++| ld x18, SAVE_GPR_+2*8(sp) ++| ld x9, SAVE_GPR_+1*8(sp) ++| ld x8, SAVE_GPR_+0*8(sp) ++| fld f27, SAVE_FPR_+11*8(sp) ++| fld f26, SAVE_FPR_+10*8(sp) ++| fld f25, SAVE_FPR_+9*8(sp) ++| fld f24, SAVE_FPR_+8*8(sp) ++| fld f23, SAVE_FPR_+7*8(sp) ++| fld f22, SAVE_FPR_+6*8(sp) ++| fld f21, SAVE_FPR_+5*8(sp) ++| fld f20, SAVE_FPR_+4*8(sp) ++| fld f19, SAVE_FPR_+3*8(sp) ++| fld f18, SAVE_FPR_+2*8(sp) ++| fld f9, SAVE_FPR_+1*8(sp) ++| fld f8, SAVE_FPR_+0*8(sp) ++| addi sp, sp, CFRAME_SPACE ++| ret ++|.endmacro + +From 175b2690b0f35f4084f8fe92de4ce66f7d39d9ae Mon Sep 17 00:00:00 2001 +From: gns +Date: Tue, 5 Mar 2024 18:07:52 +0800 +Subject: [PATCH 05/22] riscv(interp): add helper macros and typedefs + +--- + src/vm_riscv64.dasc | 353 ++++++++++++++++++++++++++++++++++++++++++++ + 1 file changed, 353 insertions(+) + +diff --git a/src/vm_riscv64.dasc b/src/vm_riscv64.dasc +index 63977f5f6..562d610dd 100644 +--- a/src/vm_riscv64.dasc ++++ b/src/vm_riscv64.dasc +@@ -162,3 +162,356 @@ + | addi sp, sp, CFRAME_SPACE + | ret + |.endmacro ++| ++|//----------------------------------------------------------------------- ++| ++|// Pseudo-instruction macros ++|// Be cautious with local label 9 since we use them here! ++|.macro bxeq, a, b, tgt ++| bne a, b, >9 ++| j tgt ++|9: ++|.endmacro ++| ++|.macro bxne, a, b, tgt ++| beq a, b, >9 ++| j tgt ++|9: ++|.endmacro ++| ++|.macro bxlt, a, b, tgt ++| bge a, b, >9 ++| j tgt ++|9: ++|.endmacro ++| ++|.macro bxge, a, b, tgt ++| blt a, b, >9 ++| j tgt ++|9: ++|.endmacro ++| ++|.macro bxgt, a, b, tgt ++| bge b, a, >9 ++| j tgt ++|9: ++|.endmacro ++| ++|.macro bxle, a, b, tgt ++| blt b, a, >9 ++| j tgt ++|9: ++|.endmacro ++| ++|.macro bxltu, a, b, tgt ++| bgeu a, b, >9 ++| j tgt ++|9: ++|.endmacro ++| ++|.macro bxgeu, a, b, tgt ++| bltu a, b, >9 ++| j tgt ++|9: ++|.endmacro ++| ++|.macro bxgtu, a, b, tgt ++| bgeu b, a, >9 ++| j tgt ++|9: ++|.endmacro ++| ++|.macro bxleu, a, b, tgt ++| bltu b, a, >9 ++| j tgt ++|9: ++|.endmacro ++| ++|.macro bxeqz, a, tgt ++| bxeq a, x0, tgt ++|.endmacro ++| ++|.macro bxnez, a, tgt ++| bxne a, x0, tgt ++|.endmacro ++| ++|.macro bxlez, a, tgt ++| bxge x0, a, tgt ++|.endmacro ++| ++|.macro bxgez, a, tgt ++| bxge a, x0, tgt ++|.endmacro ++| ++|.macro bxltz, a, tgt ++| bxlt a, x0, tgt ++|.endmacro ++| ++|.macro bxgtz, a, tgt ++| bxlt x0, a, tgt ++|.endmacro ++| ++|.macro lxi, a, b ++| lui a, (b)&0xfffff ++| srai a, a, 12 ++|.endmacro ++| ++|.macro lzi, a, b ++| lui a, (b)&0xfffff ++| srli a, a, 12 ++|.endmacro ++| ++|.macro addxi, a, b, c ++| lui x31, (c)&0xfffff ++| srai x31, x31, 12 ++| add a, x31, b ++|.endmacro ++| ++|.macro sext.b, a, b ++| slli a, b, 56 ++| srai a, a, 56 ++|.endmacro ++| ++|.macro sext.h, a, b ++| slli a, b, 48 ++| srai a, a, 48 ++|.endmacro ++| ++|.macro zext.h, a, b ++| slli a, b, 48 ++| srli a, a, 48 ++|.endmacro ++| ++|.macro zext.w, a, b ++| slli a, b, 32 ++| srli a, a, 32 ++|.endmacro ++| ++|.macro bfextri, a, b, c, d ++| slli a, b, (63-c) ++| srli a, a, (d+63-c) ++|.endmacro ++| ++|//----------------------------------------------------------------------- ++| ++|// Type definitions. Some of these are only used for documentation. ++|.type L, lua_State, LREG ++|.type GL, global_State, GLREG ++|.type TVALUE, TValue ++|.type GCOBJ, GCobj ++|.type STR, GCstr ++|.type TAB, GCtab ++|.type LFUNC, GCfuncL ++|.type CFUNC, GCfuncC ++|.type PROTO, GCproto ++|.type UPVAL, GCupval ++|.type NODE, Node ++|.type NARGS8, int ++|.type TRACE, GCtrace ++|.type SBUF, SBuf ++| ++|//----------------------------------------------------------------------- ++| ++|// Trap for not-yet-implemented parts. ++|.macro NYI; .long 0x00100073; .endmacro ++| ++|//----------------------------------------------------------------------- ++| ++|// Access to frame relative to BASE. ++|.define FRAME_PC, -8 ++|.define FRAME_FUNC, -16 ++| ++|//----------------------------------------------------------------------- ++| ++|// Endian-specific defines. RISC-V only has little endian ABI for now. ++|.define OFS_RD, 2 ++|.define OFS_RA, 1 ++|.define OFS_OP, 0 ++| ++|// Instruction decode. ++|.macro decode_OP1, dst, ins; andi dst, ins, 0xff; .endmacro ++|.macro decode_BC4b, dst; slliw dst, dst, 2; .endmacro ++|.macro decode_BC8b, dst; slliw dst, dst, 3; .endmacro ++|.macro decode_RX8b, dst; andi dst, dst, 0x7f8; .endmacro ++| ++|.macro decode_OP8a, dst, ins; decode_OP1 dst, ins; .endmacro ++|.macro decode_OP8b, dst; decode_BC8b dst; .endmacro ++|.macro decode_RA8a, dst, ins; srliw dst, ins, 5; .endmacro ++|.macro decode_RA8b, dst; decode_RX8b dst; .endmacro ++|.macro decode_RB8a, dst, ins; srliw dst, ins, 21; .endmacro ++|.macro decode_RB8b, dst; decode_RX8b dst; .endmacro ++|.macro decode_RC8a, dst, ins; srliw dst, ins, 13; .endmacro ++|.macro decode_RC8b, dst; decode_RX8b dst; .endmacro ++|.macro decode_RD8a, dst, ins; srliw dst, ins, 16; .endmacro ++|.macro decode_RD4b, dst; decode_BC4b dst; .endmacro ++|.macro decode_RD8b, dst; decode_BC8b dst; .endmacro ++|.macro decode_RDtoRC8, dst, src; andi dst, src, 0x7f8; .endmacro ++| ++|.macro decode_OP8, dst, ins; decode_OP1 dst, ins; decode_BC8b dst; .endmacro ++|.macro decode_RA8, dst, ins; decode_RA8a dst, ins; decode_RA8b dst; .endmacro ++|.macro decode_RB8, dst, ins; decode_RB8a dst, ins; decode_RB8b dst; .endmacro ++|.macro decode_RC8, dst, ins; decode_RC8a dst, ins; decode_RC8b dst; .endmacro ++|.macro decode_RD8, dst, ins; decode_RD8a dst, ins; decode_RD8b dst; .endmacro ++| ++|// Instruction fetch. ++|.macro ins_NEXT1 ++| lw INS, 0(PC) ++| addi PC, PC, 4 ++|.endmacro ++|// Instruction decode+dispatch. ++|.macro ins_NEXT2 ++| decode_OP8 TMP1, INS ++| add TMP0, DISPATCH, TMP1 ++| decode_RD8a RD, INS ++| ld TMP4, 0(TMP0) ++| decode_RA8a RA, INS ++| decode_RD8b RD ++| decode_RA8b RA ++| jr TMP4 ++|.endmacro ++|.macro ins_NEXT ++| ins_NEXT1 ++| ins_NEXT2 ++|.endmacro ++| ++|// Instruction footer. ++|.if 1 ++| // Replicated dispatch. Less unpredictable branches, but higher I-Cache use. ++| .define ins_next, ins_NEXT ++| .define ins_next_, ins_NEXT ++| .define ins_next1, ins_NEXT1 ++| .define ins_next2, ins_NEXT2 ++|.else ++| // Common dispatch. Lower I-Cache use, only one (very) unpredictable branch. ++| // Affects only certain kinds of benchmarks (and only with -j off). ++| .macro ins_next ++| j ->ins_next ++| .endmacro ++| .macro ins_next1 ++| .endmacro ++| .macro ins_next2 ++| j ->ins_next ++| .endmacro ++| .macro ins_next_ ++| ->ins_next: ++| ins_NEXT ++| .endmacro ++|.endif ++| ++|// Call decode and dispatch. ++|.macro ins_callt ++| // BASE = new base, RB = LFUNC/CFUNC, RC = nargs*8, FRAME_PC(BASE) = PC ++| ld PC, LFUNC:RB->pc ++| lw INS, 0(PC) ++| addi PC, PC, 4 ++| decode_OP8 TMP1, INS ++| decode_RA8 RA, INS ++| add TMP0, DISPATCH, TMP1 ++| ld TMP0, 0(TMP0) ++| add RA, RA, BASE ++| jr TMP0 ++|.endmacro ++| ++|.macro ins_call ++| // BASE = new base, RB = LFUNC/CFUNC, RC = nargs*8, PC = caller PC ++| sd PC, FRAME_PC(BASE) ++| ins_callt ++|.endmacro ++| ++|//----------------------------------------------------------------------- ++| ++|.macro branch_RD ++| srliw TMP0, RD, 1 ++| lui TMP4, (-(BCBIAS_J*4 >> 12)) & 0xfffff ++| addw TMP0, TMP0, TMP4 ++| add PC, PC, TMP0 ++|.endmacro ++| ++|// Assumes J is relative to GL. Some J members might be out of range though. ++#define GL_J(field) (GG_G2J + (int)offsetof(jit_State, field)) ++| ++#define PC2PROTO(field) ((int)offsetof(GCproto, field)-(int)sizeof(GCproto)) ++| ++|.macro call_intern, curfunc, func ++|->curfunc .. _pcrel_ .. func: ++| auipc CFUNCADDR, extern %pcrel_hi(func) ++| jalr CFUNCADDR, extern %pcrel_lo(lj_ .. curfunc .. _pcrel_ .. func) ++|.endmacro ++|.macro call_extern, func ++| call extern func ++| empty ++|.endmacro ++| ++|// Set current VM state. Uses TMP0. ++|.macro li_vmstate, st; li TMP0, ~LJ_VMST_..st; .endmacro ++|.macro st_vmstate; sw TMP0, GL->vmstate; .endmacro ++| ++|// Move table write barrier back. Overwrites mark and tmp. ++|.macro barrierback, tab, mark, tmp, target ++| ld tmp, GL->gc.grayagain ++| andi mark, mark, ~LJ_GC_BLACK & 255 // black2gray(tab) ++| sd tab, GL->gc.grayagain ++| sb mark, tab->marked ++| sd tmp, tab->gclist ++| j target ++|.endmacro ++| ++|// Clear type tag. Isolate lowest 64-17=47 bits of reg. ++|.macro cleartp, reg; slli reg, reg, 17; srli reg, reg, 17; .endmacro ++|.macro cleartp, dst, reg; slli dst, reg, 17; srli dst, dst, 17; .endmacro ++| ++|// Set type tag: Merge 17 type bits into bits [47, 63] of dst. ++|.macro settp_a, dst; cleartp dst; .endmacro ++|.macro settp_a, dst, src; cleartp dst, src; .endmacro ++|.macro settp_b, dst, tp; ++| slli x31, tp, 47 ++| or dst, dst, x31 ++|.endmacro ++|.macro settp_b, dst, src, tp; ++| slli x31, tp, 47 ++| or dst, src, x31 ++|.endmacro ++|.macro settp, dst, tp; settp_a dst; settp_b dst, tp; .endmacro ++|.macro settp, dst, src, tp; settp_a dst, src; settp_b dst, dst, tp; .endmacro ++| ++|// Extract (negative) type tag. ++|.macro gettp, dst, src; srai dst, src, 47; .endmacro ++| ++|// Macros to check the TValue type and extract the GCobj. Branch on failure. ++|.macro checktp, reg, tp, target ++| gettp TMP4, reg ++| addi TMP4, TMP4, tp ++| cleartp reg ++| bxnez TMP4, target ++|.endmacro ++|.macro checktp, dst, reg, tp, target ++| gettp TMP4, reg ++| addi TMP4, TMP4, tp ++| cleartp dst, reg ++| bxnez TMP4, target ++|.endmacro ++|.macro checkstr, reg, target; checktp reg, -LJ_TSTR, target; .endmacro ++|.macro checktab, reg, target; checktp reg, -LJ_TTAB, target; .endmacro ++|.macro checkfunc, reg, target; checktp reg, -LJ_TFUNC, target; .endmacro ++|.macro checkint, reg, target ++| gettp TMP4, reg ++| bxne TMP4, TISNUM, target ++|.endmacro ++|.macro checknum, reg, target ++| gettp TMP4, reg ++| sltiu TMP4, TMP4, LJ_TISNUM ++| bxeqz TMP4, target ++|.endmacro ++| ++|.macro mov_false, reg ++| li reg, 0x001 ++| slli reg, reg, 47 ++| not reg, reg ++|.endmacro ++|.macro mov_true, reg ++| li reg, 0x001 ++| slli reg, reg, 48 ++| not reg, reg ++|.endmacro ++| ++|//----------------------------------------------------------------------- + +From 6102ffbb6d350ba073874c3fde83a0e28ab84727 Mon Sep 17 00:00:00 2001 +From: gns +Date: Wed, 6 Mar 2024 08:40:08 +0800 +Subject: [PATCH 06/22] riscv(interp): add base assembly interpreter VM + +--- + src/lj_vm.h | 3 + + src/lj_vmmath.c | 3 +- + src/vm_riscv64.dasc | 3584 ++++++++++++++++++++++++++++++++++++++++++- + 3 files changed, 3586 insertions(+), 4 deletions(-) + +diff --git a/src/lj_vm.h b/src/lj_vm.h +index 9cc42613d..774944725 100644 +--- a/src/lj_vm.h ++++ b/src/lj_vm.h +@@ -37,6 +37,9 @@ LJ_ASMF int lj_vm_cpuid(uint32_t f, uint32_t res[4]); + #if LJ_TARGET_PPC + void lj_vm_cachesync(void *start, void *end); + #endif ++#if LJ_TARGET_RISCV64 ++void lj_vm_fence_rw_rw(); ++#endif + LJ_ASMF double lj_vm_foldarith(double x, double y, int op); + #if LJ_HASJIT + LJ_ASMF double lj_vm_foldfpm(double x, int op); +diff --git a/src/lj_vmmath.c b/src/lj_vmmath.c +index 2c9b96cce..a31a4adf5 100644 +--- a/src/lj_vmmath.c ++++ b/src/lj_vmmath.c +@@ -69,7 +69,8 @@ double lj_vm_foldarith(double x, double y, int op) + + /* -- Helper functions for generated machine code ------------------------- */ + +-#if (LJ_HASJIT && !(LJ_TARGET_ARM || LJ_TARGET_ARM64 || LJ_TARGET_PPC)) || LJ_TARGET_MIPS ++#if (LJ_HASJIT && !(LJ_TARGET_ARM || LJ_TARGET_ARM64 || LJ_TARGET_PPC)) || LJ_TARGET_MIPS \ ++ || LJ_TARGET_RISCV64 + int32_t LJ_FASTCALL lj_vm_modi(int32_t a, int32_t b) + { + uint32_t y, ua, ub; +diff --git a/src/vm_riscv64.dasc b/src/vm_riscv64.dasc +index 562d610dd..87327c798 100644 +--- a/src/vm_riscv64.dasc ++++ b/src/vm_riscv64.dasc +@@ -437,9 +437,11 @@ + | auipc CFUNCADDR, extern %pcrel_hi(func) + | jalr CFUNCADDR, extern %pcrel_lo(lj_ .. curfunc .. _pcrel_ .. func) + |.endmacro +-|.macro call_extern, func +-| call extern func +-| empty ++|.macro call_extern, curfunc, func ++|->curfunc .. _got_pcrel_ .. func: ++| auipc CFUNCADDR, extern %got_pcrel_hi(func) ++| ld CFUNCADDR, extern %pcrel_lo(lj_ .. curfunc .. _got_pcrel_ .. func)(CFUNCADDR) ++| jalr CFUNCADDR + |.endmacro + | + |// Set current VM state. Uses TMP0. +@@ -515,3 +517,3579 @@ + |.endmacro + | + |//----------------------------------------------------------------------- ++ ++/* Generate subroutines used by opcodes and other parts of the VM. */ ++/* The .code_sub section should be last to help static branch prediction. */ ++static void build_subroutines(BuildCtx *ctx) ++{ ++ |.code_sub ++ | ++ |//----------------------------------------------------------------------- ++ |//-- Return handling ---------------------------------------------------- ++ |//----------------------------------------------------------------------- ++ | ++ |->vm_returnp: ++ | // See vm_return. Also: TMP2 = previous base. ++ | andi TMP0, PC, FRAME_P ++ | ++ | // Return from pcall or xpcall fast func. ++ | mov_true TMP1 ++ | bxeqz TMP0, ->cont_dispatch ++ | ld PC, FRAME_PC(TMP2) // Fetch PC of previous frame. ++ | mv BASE, TMP2 // Restore caller base. ++ | // Prepending may overwrite the pcall frame, so do it at the end. ++ | sd TMP1, -8(RA) // Prepend true to results. ++ | addi RA, RA, -8 ++ | ++ |->vm_returnc: ++ | addiw RD, RD, 8 // RD = (nresults+1)*8. ++ | andi TMP0, PC, FRAME_TYPE ++ | li CRET1, LUA_YIELD ++ | bxeqz RD, ->vm_unwind_c_eh ++ | mv MULTRES, RD ++ | bxeqz TMP0, ->BC_RET_Z // Handle regular return to Lua. ++ | ++ |->vm_return: ++ | // BASE = base, RA = resultptr, RD/MULTRES = (nresults+1)*8, PC = return ++ | // TMP0 = PC & FRAME_TYPE ++ | andi TMP2, PC, ~FRAME_TYPEP ++ | xori TMP0, TMP0, FRAME_C ++ | sub TMP2, BASE, TMP2 // TMP2 = previous base. ++ | bxnez TMP0, ->vm_returnp ++ | ++ | addiw TMP1, RD, -8 ++ | sd TMP2, L->base ++ | li_vmstate C ++ | lw TMP2, SAVE_NRES(sp) ++ | addi BASE, BASE, -16 ++ | st_vmstate ++ | slliw TMP2, TMP2, 3 ++ | beqz TMP1, >2 ++ |1: ++ | addiw TMP1, TMP1, -8 ++ | ld CRET1, 0(RA) ++ | addi RA, RA, 8 ++ | sd CRET1, 0(BASE) ++ | addi BASE, BASE, 8 ++ | bnez TMP1, <1 ++ | ++ |2: ++ | bne TMP2, RD, >6 ++ |3: ++ | sd BASE, L->top // Store new top. ++ | ++ |->vm_leave_cp: ++ | ld TMP0, SAVE_CFRAME(sp) // Restore previous C frame. ++ | mv CRET1, x0 // Ok return status for vm_pcall. ++ | sd TMP0, L->cframe ++ | ++ |->vm_leave_unw: ++ | restoreregs_ret ++ | ++ |6: ++ | ld TMP1, L->maxstack ++ | blt TMP2, RD, >7 ++ | // More results wanted. Check stack size and fill up results with nil. ++ | bge BASE, TMP1, >9 ++ | sd TISNIL, 0(BASE) ++ | addiw RD, RD, 8 ++ | addi BASE, BASE, 8 ++ | j <2 ++ | ++ |7: // Less results wanted. ++ | subw TMP0, RD, TMP2 ++ | sub TMP0, BASE, TMP0 // Either keep top or shrink it. ++ | beqz TMP2, >8 ++ | mv BASE, TMP0 // LUA_MULTRET+1 case ++ |8: ++ | j <3 ++ | ++ |9: // Corner case: need to grow stack for filling up results. ++ | // This can happen if: ++ | // - A C function grows the stack (a lot). ++ | // - The GC shrinks the stack in between. ++ | // - A return back from a lua_call() with (high) nresults adjustment. ++ | ++ | sd BASE, L->top // Save current top held in BASE (yes). ++ | mv MULTRES, RD ++ | srliw CARG2, TMP2, 3 ++ | mv CARG1, L ++ | call_intern vm_leave_unw, lj_state_growstack // (lua_State *L, int n) ++ | lw TMP2, SAVE_NRES(sp) ++ | ld BASE, L->top // Need the (realloced) L->top in BASE. ++ | mv RD, MULTRES ++ | slliw TMP2, TMP2, 3 ++ | j <2 ++ | ++ |->vm_unwind_c: // Unwind C stack, return from vm_pcall. ++ | // (void *cframe, int errcode) ++ | mv sp, CARG1 ++ | mv CRET1, CARG2 ++ |->vm_unwind_c_eh: // Landing pad for external unwinder. ++ | ld L, SAVE_L(sp) ++ | li TMP0, ~LJ_VMST_C ++ | ld GL, L->glref ++ | sw TMP0, GL->vmstate ++ | j ->vm_leave_unw ++ | ++ |->vm_unwind_ff: // Unwind C stack, return from ff pcall. ++ | // (void *cframe) ++ | andi sp, CARG1, CFRAME_RAWMASK ++ |->vm_unwind_ff_eh: // Landing pad for external unwinder. ++ | ld L, SAVE_L(sp) ++ | lui TMP3, 0x43380 // TOBIT = Hiword of 2^52 + 2^51 (double). ++ | li TISNIL, LJ_TNIL ++ | li TISNUM, LJ_TISNUM ++ | ld BASE, L->base ++ | ld GL, L->glref // Setup pointer to global state. ++ | slli TMP3, TMP3, 32 ++ | mov_false TMP1 ++ | li_vmstate INTERP ++ | ld PC, FRAME_PC(BASE) // Fetch PC of previous frame. ++ | fmv.d.x TOBIT, TMP3 ++ | addi RA, BASE, -8 // Results start at BASE-8. ++ | addxi DISPATCH, GL, GG_G2DISP ++ | sd TMP1, -8(BASE) // Prepend false to error message. ++ | st_vmstate ++ | li RD, 16 // 2 results: false + error message. ++ | j ->vm_returnc ++ | ++ | ++ |//----------------------------------------------------------------------- ++ |//-- Grow stack for calls ----------------------------------------------- ++ |//----------------------------------------------------------------------- ++ | ++ |->vm_growstack_c: // Grow stack for C function. ++ | li CARG2, LUA_MINSTACK ++ | j >2 ++ | ++ |->vm_growstack_l: // Grow stack for Lua function. ++ | // BASE = new base, RA = BASE+framesize*8, RC = nargs*8, PC = first PC ++ | add RC, BASE, RC ++ | sub RA, RA, BASE ++ | sd BASE, L->base ++ | addi PC, PC, 4 // Must point after first instruction. ++ | sd RC, L->top ++ | srliw CARG2, RA, 3 ++ |2: ++ | // L->base = new base, L->top = top ++ | sd PC, SAVE_PC(sp) ++ | mv CARG1, L ++ | call_intern vm_growstack_l, lj_state_growstack // (lua_State *L, int n) ++ | ld BASE, L->base ++ | ld RC, L->top ++ | ld LFUNC:RB, FRAME_FUNC(BASE) ++ | sub RC, RC, BASE ++ | cleartp LFUNC:RB ++ | // BASE = new base, RB = LFUNC/CFUNC, RC = nargs*8, FRAME_PC(BASE) = PC ++ | ins_callt // Just retry the call. ++ | ++ |//----------------------------------------------------------------------- ++ |//-- Entry points into the assembler VM --------------------------------- ++ |//----------------------------------------------------------------------- ++ | ++ |->vm_resume: // Setup C frame and resume thread. ++ | // (lua_State *L, TValue *base, int nres1 = 0, ptrdiff_t ef = 0) ++ | saveregs ++ | mv L, CARG1 ++ | ld GL, L->glref // Setup pointer to global state. ++ | mv BASE, CARG2 ++ | lbu TMP1, L->status ++ | sd L, SAVE_L(sp) ++ | li PC, FRAME_CP ++ | addi TMP0, sp, CFRAME_RESUME ++ | addxi DISPATCH, GL, GG_G2DISP ++ | sw x0, SAVE_NRES(sp) ++ | sw x0, SAVE_ERRF(sp) ++ | sd CARG1, SAVE_PC(sp) // Any value outside of bytecode is ok. ++ | sd x0, SAVE_CFRAME(sp) ++ | sd TMP0, L->cframe ++ | beqz TMP1, >3 ++ | ++ | // Resume after yield (like a return). ++ | sd L, GL->cur_L ++ | mv RA, BASE ++ | ld BASE, L->base ++ | ld TMP1, L->top ++ | ld PC, FRAME_PC(BASE) ++ | lui TMP3, 0x43380 // TOBIT = Hiword of 2^52 + 2^51 (double). ++ | sub RD, TMP1, BASE ++ | slli TMP3, TMP3, 32 ++ | sb x0, L->status ++ | fmv.d.x TOBIT, TMP3 ++ | li_vmstate INTERP ++ | addi RD, RD, 8 ++ | st_vmstate ++ | mv MULTRES, RD ++ | andi TMP0, PC, FRAME_TYPE ++ | li TISNIL, LJ_TNIL ++ | li TISNUM, LJ_TISNUM ++ | bxeqz TMP0, ->BC_RET_Z ++ | j ->vm_return ++ | ++ |->vm_pcall: // Setup protected C frame and enter VM. ++ | // (lua_State *L, TValue *base, int nres1, ptrdiff_t ef) ++ | saveregs ++ | sw CARG4, SAVE_ERRF(sp) ++ | li PC, FRAME_CP ++ | j >1 ++ | ++ |->vm_call: // Setup C frame and enter VM. ++ | // (lua_State *L, TValue *base, int nres1) ++ | saveregs ++ | li PC, FRAME_C ++ | ++ |1: // Entry point for vm_pcall above (PC = ftype). ++ | ld TMP1, L:CARG1->cframe ++ | mv L, CARG1 ++ | sw CARG3, SAVE_NRES(sp) ++ | ld GL, L->glref // Setup pointer to global state. ++ | sd CARG1, SAVE_L(sp) ++ | mv BASE, CARG2 ++ | addxi DISPATCH, GL, GG_G2DISP ++ | sd CARG1, SAVE_PC(sp) // Any value outside of bytecode is ok. ++ | sd TMP1, SAVE_CFRAME(sp) ++ | sd sp, L->cframe // Add our C frame to cframe chain. ++ | ++ |3: // Entry point for vm_cpcall/vm_resume (BASE = base, PC = ftype). ++ | sd L, GL->cur_L ++ | ld TMP2, L->base // TMP2 = old base (used in vmeta_call). ++ | lui TMP3, 0x43380 // TOBIT = Hiword of 2^52 + 2^51 (double). ++ | ld TMP1, L->top ++ | slli TMP3, TMP3, 32 ++ | add PC, PC, BASE ++ | sub NARGS8:RC, TMP1, BASE ++ | li TISNUM, LJ_TISNUM ++ | sub PC, PC, TMP2 // PC = frame delta + frame type ++ | fmv.d.x TOBIT, TMP3 ++ | li_vmstate INTERP ++ | li TISNIL, LJ_TNIL ++ | st_vmstate ++ | ++ |->vm_call_dispatch: ++ | // TMP2 = old base, BASE = new base, RC = nargs*8, PC = caller PC ++ | ld LFUNC:RB, FRAME_FUNC(BASE) ++ | checkfunc LFUNC:RB, ->vmeta_call ++ | ++ |->vm_call_dispatch_f: ++ | ins_call ++ | // BASE = new base, RB = func, RC = nargs*8, PC = caller PC ++ | ++ |->vm_cpcall: // Setup protected C frame, call C. ++ | // (lua_State *L, lua_CFunction func, void *ud, lua_CPFunction cp) ++ | saveregs ++ | mv L, CARG1 ++ | ld TMP0, L:CARG1->stack ++ | sd CARG1, SAVE_L(sp) ++ | ld TMP1, L->top ++ | ld GL, L->glref // Setup pointer to global state. ++ | sd CARG1, SAVE_PC(sp) // Any value outside of bytecode is ok. ++ | sub TMP0, TMP0, TMP1 // Compute -savestack(L, L->top). ++ | ld TMP1, L->cframe ++ | addxi DISPATCH, GL, GG_G2DISP ++ | sw TMP0, SAVE_NRES(sp) // Neg. delta means cframe w/o frame. ++ | sw x0, SAVE_ERRF(sp) // No error function. ++ | sd TMP1, SAVE_CFRAME(sp) ++ | sd sp, L->cframe // Add our C frame to cframe chain. ++ | sd L, GL->cur_L ++ | jalr CARG4 // (lua_State *L, lua_CFunction func, void *ud) ++ | mv BASE, CRET1 ++ | li PC, FRAME_CP ++ | bnez CRET1, <3 // Else continue with the call. ++ | j ->vm_leave_cp // No base? Just remove C frame. ++ | ++ |//----------------------------------------------------------------------- ++ |//-- Metamethod handling ------------------------------------------------ ++ |//----------------------------------------------------------------------- ++ | ++ |//-- Continuation dispatch ---------------------------------------------- ++ | ++ |->cont_dispatch: ++ | // BASE = meta base, RA = resultptr, RD = (nresults+1)*8 ++ | ld TMP0, -32(BASE) // Continuation. ++ | mv RB, BASE ++ | mv BASE, TMP2 // Restore caller BASE. ++ | ld LFUNC:TMP1, FRAME_FUNC(TMP2) ++ | ld PC, -24(RB) // Restore PC from [cont|PC]. ++ | cleartp LFUNC:TMP1 ++ | add TMP2, RA, RD ++ | ld TMP1, LFUNC:TMP1->pc ++ | sd TISNIL, -8(TMP2) // Ensure one valid arg. ++ | // BASE = base, RA = resultptr, RB = meta base ++ | ld KBASE, PC2PROTO(k)(TMP1) ++ | jr TMP0 // Jump to continuation. ++ | ++ |->cont_cat: // RA = resultptr, RB = meta base ++ | lw INS, -4(PC) ++ | addi CARG2, RB, -32 ++ | ld TMP0, 0(RA) ++ | decode_RB8 MULTRES, INS ++ | decode_RA8 RA, INS ++ | add TMP1, BASE, MULTRES ++ | sd BASE, L->base ++ | sub CARG3, CARG2, TMP1 ++ | sd TMP0, 0(CARG2) ++ | bxne TMP1, CARG2, ->BC_CAT_Z ++ | add RA, BASE, RA ++ | sd TMP0, 0(RA) ++ | j ->cont_nop ++ | ++ |//-- Table indexing metamethods ----------------------------------------- ++ | ++ |->vmeta_tgets1: ++ | addi CARG3, GL, offsetof(global_State, tmptv) ++ | li TMP0, LJ_TSTR ++ | settp STR:RC, TMP0 ++ | sd STR:RC, 0(CARG3) ++ | j >1 ++ | ++ |->vmeta_tgets: ++ | addi CARG2, GL, offsetof(global_State, tmptv) ++ | addi CARG3, GL, offsetof(global_State, tmptv2) ++ | li TMP0, LJ_TTAB ++ | li TMP1, LJ_TSTR ++ | settp TAB:RB, TMP0 ++ | settp STR:RC, TMP1 ++ | sd TAB:RB, 0(CARG2) ++ | sd STR:RC, 0(CARG3) ++ | j >1 ++ | ++ |->vmeta_tgetb: // TMP0 = index ++ | addi CARG3, GL, offsetof(global_State, tmptv) ++ | settp TMP0, TISNUM ++ | sd TMP0, 0(CARG3) ++ | ++ |->vmeta_tgetv: ++ |1: ++ | sd BASE, L->base ++ | mv CARG1, L ++ | sd PC, SAVE_PC(sp) ++ | // (lua_State *L, TValue *o, TValue *k) ++ | call_intern vmeta_tgetv, lj_meta_tget ++ | // Returns TValue * (finished) or NULL (metamethod). ++ | beqz CRET1, >3 ++ | ld TMP0, 0(CRET1) ++ | ins_next1 ++ | sd TMP0, 0(RA) ++ | ins_next2 ++ | ++ |3: // Call __index metamethod. ++ | // BASE = base, L->top = new base, stack = cont/func/t/k ++ | addi TMP1, BASE, -FRAME_CONT ++ | li NARGS8:RC, 16 // 2 args for func(t, k). ++ | ld BASE, L->top ++ | sd PC, -24(BASE) // [cont|PC] ++ | sub PC, BASE, TMP1 ++ | ld LFUNC:RB, FRAME_FUNC(BASE) // Guaranteed to be a function here. ++ | cleartp LFUNC:RB ++ | j ->vm_call_dispatch_f ++ | ++ |->vmeta_tgetr: ++ | call_intern vmeta_tgetr, lj_tab_getinth // (GCtab *t, int32_t key) ++ | // Returns cTValue * or NULL. ++ | mv TMP1, TISNIL ++ | bxeqz CRET1, ->BC_TGETR_Z ++ | ld TMP1, 0(CRET1) ++ | j ->BC_TGETR_Z ++ | ++ |//----------------------------------------------------------------------- ++ | ++ |->vmeta_tsets1: ++ | addi, CARG3, GL, offsetof(global_State, tmptv) ++ | li TMP0, LJ_TSTR ++ | settp STR:RC, TMP0 ++ | sd STR:RC, 0(CARG3) ++ | j >1 ++ | ++ |->vmeta_tsets: ++ | addi CARG2, GL, offsetof(global_State, tmptv) ++ | addi CARG3, GL, offsetof(global_State, tmptv2) ++ | li TMP0, LJ_TTAB ++ | li TMP1, LJ_TSTR ++ | settp TAB:RB, TMP0 ++ | settp STR:RC, TMP1 ++ | sd TAB:RB, 0(CARG2) ++ | sd STR:RC, 0(CARG3) ++ | j >1 ++ | ++ |->vmeta_tsetb: // TMP0 = index ++ | addi CARG3, GL, offsetof(global_State, tmptv) ++ | settp TMP0, TISNUM ++ | sd TMP0, 0(CARG3) ++ | ++ |->vmeta_tsetv: ++ |1: ++ | sd BASE, L->base ++ | mv CARG1, L ++ | sd PC, SAVE_PC(sp) ++ | // (lua_State *L, TValue *o, TValue *k) ++ | call_intern vmeta_tsetv, lj_meta_tset ++ | // Returns TValue * (finished) or NULL (metamethod). ++ | ld TMP2, 0(RA) ++ | beqz CRET1, >3 ++ | ins_next1 ++ | // NOBARRIER: lj_meta_tset ensures the table is not black. ++ | sd TMP2, 0(CRET1) ++ | ins_next2 ++ | ++ |3: // Call __newindex metamethod. ++ | // BASE = base, L->top = new base, stack = cont/func/t/k/(v) ++ | addi TMP1, BASE, -FRAME_CONT ++ | ld BASE, L->top ++ | sd PC, -24(BASE) // [cont|PC] ++ | sub PC, BASE, TMP1 ++ | ld LFUNC:RB, FRAME_FUNC(BASE) // Guaranteed to be a function here. ++ | li NARGS8:RC, 24 // 3 args for func(t, k, v) ++ | cleartp LFUNC:RB ++ | sd TMP2, 16(BASE) // Copy value to third argument. ++ | j ->vm_call_dispatch_f ++ | ++ |->vmeta_tsetr: ++ | sd BASE, L->base ++ | mv CARG1, L ++ | sd PC, SAVE_PC(sp) ++ | // (lua_State *L, GCtab *t, int32_t key) ++ | call_intern vmeta_tsetr, lj_tab_setinth ++ | // Returns TValue *. ++ | j ->BC_TSETR_Z ++ | ++ |//-- Comparison metamethods --------------------------------------------- ++ | ++ |->vmeta_comp: ++ | // RA/RD point to o1/o2. ++ | mv CARG2, RA ++ | mv CARG3, RD ++ | addi PC, PC, -4 ++ | sd BASE, L->base ++ | mv CARG1, L ++ | decode_OP1 CARG4, INS ++ | sd PC, SAVE_PC(sp) ++ | // (lua_State *L, TValue *o1, *o2, int op) ++ | call_intern vmeta_comp, lj_meta_comp ++ | // Returns 0/1 or TValue * (metamethod). ++ |3: ++ | sltiu TMP1, CRET1, 2 ++ | bxeqz TMP1, ->vmeta_binop ++ | negw TMP2, CRET1 ++ |4: ++ | lhu RD, OFS_RD(PC) ++ | addi PC, PC, 4 ++ | lui TMP1, (-(BCBIAS_J*4 >> 12)) & 0xfffff ++ | slliw RD, RD, 2 ++ | addw RD, RD, TMP1 ++ | and RD, RD, TMP2 ++ | add PC, PC, RD ++ |->cont_nop: ++ | ins_next ++ | ++ |->cont_ra: // RA = resultptr ++ | lbu TMP1, -4+OFS_RA(PC) ++ | ld TMP2, 0(RA) ++ | slliw TMP1, TMP1, 3 ++ | add TMP1, BASE, TMP1 ++ | sd TMP2, 0(TMP1) ++ | j ->cont_nop ++ | ++ |->cont_condt: // RA = resultptr ++ | ld TMP0, 0(RA) ++ | gettp TMP0, TMP0 ++ | sltiu TMP1, TMP0, LJ_TISTRUECOND ++ | negw TMP2, TMP1 // Branch if result is true. ++ | j <4 ++ | ++ |->cont_condf: // RA = resultptr ++ | ld TMP0, 0(RA) ++ | gettp TMP0, TMP0 ++ | sltiu TMP1, TMP0, LJ_TISTRUECOND ++ | addiw TMP2, TMP1, -1 // Branch if result is false. ++ | j <4 ++ | ++ |->vmeta_equal: ++ | // CARG1/CARG2 point to o1/o2. TMP0 is set to 0/1. ++ | cleartp LFUNC:CARG3, CARG2 ++ | cleartp LFUNC:CARG2, CARG1 ++ | mv CARG4, TMP0 ++ | addi PC, PC, -4 ++ | sd BASE, L->base ++ | mv CARG1, L ++ | sd PC, SAVE_PC(sp) ++ | // (lua_State *L, GCobj *o1, *o2, int ne) ++ | call_intern vmeta_equal, lj_meta_equal ++ | // Returns 0/1 or TValue * (metamethod). ++ | j <3 ++ | ++ |->vmeta_istype: ++ | addi PC, PC, -4 ++ | sd BASE, L->base ++ | mv CARG1, L ++ | srliw CARG2, RA, 3 ++ | srliw CARG3, RD, 3 ++ | sd PC, SAVE_PC(sp) ++ | // (lua_State *L, TValue *o, BCReg tp) ++ | call_intern vmeta_istype, lj_meta_istype ++ | j ->cont_nop ++ | ++ |//-- Arithmetic metamethods --------------------------------------------- ++ | ++ |->vmeta_unm: ++ | mv RC, RB ++ | ++ |->vmeta_arith: ++ | mv CARG1, L ++ | sd BASE, L->base ++ | mv CARG2, RA ++ | sd PC, SAVE_PC(sp) ++ | mv CARG3, RB ++ | mv CARG4, RC ++ | decode_OP1 CARG5, INS ++ | // (lua_State *L, TValue *ra,*rb,*rc, BCReg op) ++ | call_intern vmeta_arith, lj_meta_arith ++ | // Returns NULL (finished) or TValue * (metamethod). ++ | bxeqz CRET1, ->cont_nop ++ | ++ | // Call metamethod for binary op. ++ |->vmeta_binop: ++ | // BASE = old base, CRET1 = new base, stack = cont/func/o1/o2 ++ | sub TMP1, CRET1, BASE ++ | sd PC, -24(CRET1) // [cont|PC] ++ | mv TMP2, BASE ++ | addi PC, TMP1, FRAME_CONT ++ | mv BASE, CRET1 ++ | li NARGS8:RC, 16 // 2 args for func(o1, o2). ++ | j ->vm_call_dispatch ++ | ++ |->vmeta_len: ++ | // CARG2 already set by BC_LEN. ++#if LJ_52 ++ | mv MULTRES, CARG1 ++#endif ++ | sd BASE, L->base ++ | mv CARG1, L ++ | sd PC, SAVE_PC(sp) ++ | call_intern vmeta_len, lj_meta_len // (lua_State *L, TValue *o) ++ | // Returns NULL (retry) or TValue * (metamethod base). ++#if LJ_52 ++ | bxnez CRET1, ->vmeta_binop // Binop call for compatibility. ++ | mv CARG1, MULTRES ++ | j ->BC_LEN_Z ++#else ++ | j ->vmeta_binop // Binop call for compatibility. ++#endif ++ | ++ |//-- Call metamethod ---------------------------------------------------- ++ | ++ |->vmeta_call: // Resolve and call __call metamethod. ++ | // TMP2 = old base, BASE = new base, RC = nargs*8 ++ | mv CARG1, L ++ | sd TMP2, L->base // This is the callers base! ++ | addi CARG2, BASE, -16 ++ | sd PC, SAVE_PC(sp) ++ | add CARG3, BASE, RC ++ | mv MULTRES, NARGS8:RC ++ | // (lua_State *L, TValue *func, TValue *top) ++ | call_intern vmeta_call, lj_meta_call ++ | ld LFUNC:RB, FRAME_FUNC(BASE) // Guaranteed to be a function here. ++ | addi NARGS8:RC, MULTRES, 8 // Got one more argument now. ++ | cleartp LFUNC:RB ++ | ins_call ++ | ++ |->vmeta_callt: // Resolve __call for BC_CALLT. ++ | // BASE = old base, RA = new base, RC = nargs*8 ++ | mv CARG1, L ++ | sd BASE, L->base ++ | addi CARG2, RA, -16 ++ | sd PC, SAVE_PC(sp) ++ | add CARG3, RA, RC ++ | mv MULTRES, NARGS8:RC ++ | // (lua_State *L, TValue *func, TValue *top) ++ | call_intern vmeta_callt, lj_meta_call ++ | ld RB, FRAME_FUNC(RA) // Guaranteed to be a function here. ++ | ld TMP1, FRAME_PC(BASE) ++ | addi NARGS8:RC, MULTRES, 8 // Got one more argument now. ++ | cleartp LFUNC:CARG3, RB ++ | j ->BC_CALLT_Z ++ | ++ |//-- Argument coercion for 'for' statement ------------------------------ ++ | ++ |->vmeta_for: ++ | mv CARG1, L ++ | sd BASE, L->base ++ | mv CARG2, RA ++ | sd PC, SAVE_PC(sp) ++ | mv MULTRES, INS ++ | call_intern vmeta_for, lj_meta_for // (lua_State *L, TValue *base) ++ | decode_RA8 RA, MULTRES ++ | decode_RD8 RD, MULTRES ++ | j =>BC_FORI ++ | ++ |//----------------------------------------------------------------------- ++ |//-- Fast functions ----------------------------------------------------- ++ |//----------------------------------------------------------------------- ++ | ++ |.macro .ffunc, name ++ |->ff_ .. name: ++ |.endmacro ++ | ++ |.macro .ffunc_1, name ++ |->ff_ .. name: ++ | ld CARG1, 0(BASE) ++ | bxeqz NARGS8:RC, ->fff_fallback ++ |.endmacro ++ | ++ |.macro .ffunc_2, name ++ |->ff_ .. name: ++ | sltiu TMP0, NARGS8:RC, 16 ++ | ld CARG1, 0(BASE) ++ | ld CARG2, 8(BASE) ++ | bxnez TMP0, ->fff_fallback ++ |.endmacro ++ | ++ |.macro .ffunc_n, name ++ |->ff_ .. name: ++ | ld CARG1, 0(BASE) ++ | fld FARG1, 0(BASE) ++ | bxeqz NARGS8:RC, ->fff_fallback ++ | checknum CARG1, ->fff_fallback ++ |.endmacro ++ | ++ |.macro .ffunc_nn, name ++ |->ff_ .. name: ++ | ld CARG1, 0(BASE) ++ | sltiu TMP0, NARGS8:RC, 16 ++ | ld CARG2, 8(BASE) ++ | bxnez TMP0, ->fff_fallback ++ | gettp TMP1, CARG1 ++ | gettp TMP2, CARG2 ++ | sltiu TMP1, TMP1, LJ_TISNUM ++ | sltiu TMP2, TMP2, LJ_TISNUM ++ | fld FARG1, 0(BASE) ++ | and TMP1, TMP1, TMP2 ++ | fld FARG2, 8(BASE) ++ | bxeqz TMP1, ->fff_fallback ++ |.endmacro ++ | ++ |// Inlined GC threshold check. ++ |.macro ffgccheck ++ | ld TMP0, GL->gc.total ++ | ld TMP1, GL->gc.threshold ++ | bltu TMP0, TMP1, >1 ++ | jal ->fff_gcstep ++ |1: ++ |.endmacro ++ | ++ |//-- Base library: checks ----------------------------------------------- ++ |.ffunc_1 assert ++ | gettp TMP1, CARG1 ++ | sltiu TMP1, TMP1, LJ_TISTRUECOND ++ | addi RA, BASE, -16 ++ | bxeqz TMP1, ->fff_fallback ++ | ld PC, FRAME_PC(BASE) ++ | addiw RD, NARGS8:RC, 8 // Compute (nresults+1)*8. ++ | addi TMP1, BASE, 8 ++ | add TMP2, RA, RD ++ | sd CARG1, -16(BASE) ++ | bne BASE, TMP2, >1 ++ | j ->fff_res // Done if exactly 1 argument. ++ |1: ++ | ld TMP0, 0(TMP1) ++ | sd TMP0, -16(TMP1) ++ | mv TMP3, TMP1 ++ | addi TMP1, TMP1, 8 ++ | bne TMP3, TMP2, <1 ++ | j ->fff_res ++ | ++ |.ffunc_1 type ++ | gettp TMP0, CARG1 ++ | not TMP3, TMP0 ++ | bltu TISNUM, TMP0, >1 ++ | li TMP3, ~LJ_TISNUM ++ |1: ++ | slli TMP3, TMP3, 3 ++ | add TMP3, CFUNC:RB, TMP3 ++ | ld CARG1, CFUNC:TMP3->upvalue ++ | j ->fff_restv ++ | ++ |//-- Base library: getters and setters --------------------------------- ++ | ++ |.ffunc_1 getmetatable ++ | gettp TMP2, CARG1 ++ | addi TMP0, TMP2, -LJ_TTAB ++ | addi TMP1, TMP2, -LJ_TUDATA ++ | snez TMP0, TMP0 ++ | neg TMP0, TMP0 ++ | and TMP0, TMP0, TMP1 ++ | cleartp TAB:CARG1 ++ | bnez TMP0, >6 ++ |1: // Field metatable must be at same offset for GCtab and GCudata! ++ | ld TAB:RB, TAB:CARG1->metatable ++ |2: ++ | ld STR:RC, GL->gcroot[GCROOT_MMNAME+MM_metatable] ++ | li CARG1, LJ_TNIL ++ | bxeqz TAB:RB, ->fff_restv ++ | lw TMP0, TAB:RB->hmask ++ | lw TMP1, STR:RC->sid ++ | ld NODE:TMP2, TAB:RB->node ++ | and TMP1, TMP1, TMP0 // idx = str->sid & tab->hmask ++ | slli TMP0, TMP1, 5 ++ | slli TMP1, TMP1, 3 ++ | sub TMP1, TMP0, TMP1 ++ | add NODE:TMP2, NODE:TMP2, TMP1 // node = tab->node + (idx*32-idx*8) ++ | li CARG4, LJ_TSTR ++ | settp STR:RC, CARG4 // Tagged key to look for. ++ |3: // Rearranged logic, because we expect _not_ to find the key. ++ | ld TMP0, NODE:TMP2->key ++ | ld CARG1, NODE:TMP2->val ++ | ld NODE:TMP2, NODE:TMP2->next ++ | li TMP3, LJ_TTAB ++ | beq RC, TMP0, >5 ++ | bnez NODE:TMP2, <3 ++ |4: ++ | settp CARG1, RB, TMP3 ++ | j ->fff_restv // Not found, keep default result. ++ |5: ++ | bxne CARG1, TISNIL, ->fff_restv ++ | j <4 // Ditto for nil value. ++ | ++ |6: ++ | sltiu TMP3, TMP2, LJ_TISNUM ++ | neg TMP4, TMP3 ++ | xor TMP0, TMP2, TISNUM // TMP2 = TMP3 ? TISNUM : TMP2 ++ | and TMP0, TMP0, TMP4 ++ | xor TMP2, TMP0, TMP2 ++ | slli TMP2, TMP2, 3 ++ | sub TMP0, GL, TMP2 ++ | ld TAB:RB, (offsetof(global_State, gcroot[GCROOT_BASEMT])-8)(TMP0) ++ | j <2 ++ | ++ |.ffunc_2 setmetatable ++ | // Fast path: no mt for table yet and not clearing the mt. ++ | checktp TMP1, CARG1, -LJ_TTAB, ->fff_fallback ++ | gettp TMP3, CARG2 ++ | ld TAB:TMP0, TAB:TMP1->metatable ++ | lbu TMP2, TAB:TMP1->marked ++ | addi TMP3, TMP3, -LJ_TTAB ++ | cleartp TAB:CARG2 ++ | or TMP3, TMP3, TAB:TMP0 ++ | bxnez TMP3, ->fff_fallback ++ | andi TMP3, TMP2, LJ_GC_BLACK // isblack(table) ++ | sd TAB:CARG2, TAB:TMP1->metatable ++ | bxeqz TMP3, ->fff_restv ++ | barrierback TAB:TMP1, TMP2, TMP0, ->fff_restv ++ | ++ |.ffunc rawget ++ | ld CARG2, 0(BASE) ++ | sltiu TMP0, NARGS8:RC, 16 ++ | gettp TMP1, CARG2 ++ | cleartp CARG2 ++ | addi TMP1, TMP1, -LJ_TTAB ++ | or TMP0, TMP0, TMP1 ++ | addi CARG3, BASE, 8 ++ | bxnez TMP0, ->fff_fallback ++ | mv CARG1, L ++ | call_intern ff_rawget, lj_tab_get // (lua_State *L, GCtab *t, cTValue *key) ++ | // Returns cTValue *. ++ | ld CARG1, 0(CRET1) ++ | j ->fff_restv ++ | ++ |//-- Base library: conversions ------------------------------------------ ++ | ++ |.ffunc tonumber ++ | // Only handles the number case inline (without a base argument). ++ | ld CARG1, 0(BASE) ++ | xori TMP0, NARGS8:RC, 8 // Exactly one number argument. ++ | gettp TMP1, CARG1 ++ | sltu TMP1, TISNUM, TMP1 ++ | or TMP0, TMP0, TMP1 ++ | bxnez TMP0, ->fff_fallback // No args or CARG1 is not number ++ | j ->fff_restv ++ | ++ |.ffunc_1 tostring ++ | // Only handles the string or number case inline. ++ | gettp TMP0, CARG1 ++ | addi TMP1, TMP0, -LJ_TSTR ++ | // A __tostring method in the string base metatable is ignored. ++ | bxeqz TMP1, ->fff_restv // String key? ++ | // Handle numbers inline, unless a number base metatable is present. ++ | ld TMP1, GL->gcroot[GCROOT_BASEMT_NUM] ++ | sltu TMP0, TISNUM, TMP0 ++ | sd BASE, L->base // Add frame since C call can throw. ++ | or TMP0, TMP0, TMP1 ++ | bxnez TMP0, ->fff_fallback ++ | sd PC, SAVE_PC(sp) // Redundant (but a defined value). ++ | ffgccheck ++ | mv CARG1, L ++ | mv CARG2, BASE ++ | call_intern ff_tostring, lj_strfmt_number // (lua_State *L, cTValue *o) ++ | // Returns GCstr *. ++ | li TMP1, LJ_TSTR ++ | ld BASE, L->base ++ | settp CARG1, TMP1 ++ | j ->fff_restv ++ | ++ |//-- Base library: iterators ------------------------------------------- ++ | ++ |.ffunc_1 next ++ | checktp CARG1, -LJ_TTAB, ->fff_fallback ++ | add TMP0, BASE, NARGS8:RC ++ | ld PC, FRAME_PC(BASE) ++ | sd TISNIL, 0(TMP0) // Set missing 2nd arg to nil. ++ | addi CARG2, BASE, 8 ++ | addi CARG3, BASE, -16 ++ | call_intern ff_next, lj_tab_next // (GCtab *t, cTValue *key, TValue *o) ++ | // Returns 1=found, 0=end, -1=error. ++ | li RD, (2+1)*8 ++ | bxgtz CRET1, ->fff_res // Found key/value. ++ | mv TMP1, CRET1 ++ | mv CARG1, TISNIL ++ | bxeqz TMP1, ->fff_restv // End of traversal: return nil. ++ | ld CFUNC:RB, FRAME_FUNC(BASE) ++ | li RC, 2*8 ++ | cleartp CFUNC:RB ++ | j ->fff_fallback // Invalid key. ++ | ++ |.ffunc_1 pairs ++ | checktp TAB:TMP1, CARG1, -LJ_TTAB, ->fff_fallback ++ | ld PC, FRAME_PC(BASE) ++#if LJ_52 ++ | ld TAB:TMP2, TAB:TMP1->metatable ++ | ld TMP0, CFUNC:RB->upvalue[0] ++ | bxnez TAB:TMP2, ->fff_fallback ++#else ++ | ld TMP0, CFUNC:RB->upvalue[0] ++#endif ++ | sd TISNIL, 0(BASE) ++ | sd CARG1, -8(BASE) ++ | sd TMP0, -16(BASE) ++ | li RD, (3+1)*8 ++ | j ->fff_res ++ | ++ |.ffunc_2 ipairs_aux ++ | checktab CARG1, ->fff_fallback ++ | checkint CARG2, ->fff_fallback ++ | lw TMP0, TAB:CARG1->asize ++ | ld TMP1, TAB:CARG1->array ++ | ld PC, FRAME_PC(BASE) ++ | sext.w TMP2, CARG2 ++ | addiw TMP2, TMP2, 1 ++ | sltu TMP3, TMP2, TMP0 ++ | zext.w TMP0, TMP2 ++ | settp_b TMP0, TISNUM ++ | sd TMP0, -16(BASE) ++ | beqz TMP3, >2 // Not in array part? ++ | slli TMP3, TMP2, 3 ++ | add TMP3, TMP1, TMP3 ++ | ld TMP1, 0(TMP3) ++ |1: ++ | li RD, (0+1)*8 ++ | bxeq TMP1, TISNIL, ->fff_res // End of iteration, return 0 results. ++ | sd TMP1, -8(BASE) ++ | li RD, (2+1)*8 ++ | j ->fff_res ++ |2: // Check for empty hash part first. Otherwise call C function. ++ | lw TMP0, TAB:CARG1->hmask ++ | li RD, (0+1)*8 ++ | bxeqz TMP0, ->fff_res ++ | mv CARG2, TMP2 ++ | call_intern ff_ipairs_aux, lj_tab_getinth // (GCtab *t, int32_t key) ++ | // Returns cTValue * or NULL. ++ | li RD, (0+1)*8 ++ | bxeqz CRET1, ->fff_res ++ | ld TMP1, 0(CRET1) ++ | j <1 ++ | ++ |.ffunc_1 ipairs ++ | checktp TAB:TMP1, CARG1, -LJ_TTAB, ->fff_fallback ++ | ld PC, FRAME_PC(BASE) ++#if LJ_52 ++ | ld TAB:TMP2, TAB:TMP1->metatable ++#endif ++ | ld CFUNC:TMP0, CFUNC:RB->upvalue[0] ++#if LJ_52 ++ | bxnez TAB:TMP2, ->fff_fallback ++#endif ++ | slli TMP1, TISNUM, 47 ++ | sd CARG1, -8(BASE) ++ | sd TMP1, 0(BASE) ++ | sd CFUNC:TMP0, -16(BASE) ++ | li RD, (3+1)*8 ++ | j ->fff_res ++ | ++ |//-- Base library: catch errors ---------------------------------------- ++ | ++ |.ffunc pcall ++ | ld TMP1, L->maxstack ++ | add TMP2, BASE, NARGS8:RC ++ | bxltu TMP1, TMP2, ->fff_fallback ++ | addi NARGS8:TMP0, NARGS8:RC, -8 ++ | lbu TMP3, GL->hookmask ++ | mv TMP2, BASE ++ | bxltz NARGS8:TMP0, ->fff_fallback ++ | mv NARGS8:RC, NARGS8:TMP0 ++ | addi BASE, BASE, 16 ++ | // Remember active hook before pcall. ++ | srliw TMP3, TMP3, HOOK_ACTIVE_SHIFT ++ | andi TMP3, TMP3, 1 ++ | addi PC, TMP3, 16+FRAME_PCALL ++ | bxeqz NARGS8:RC, ->vm_call_dispatch ++ |1: ++ | add TMP0, BASE, NARGS8:RC ++ |2: ++ | ld TMP1, -16(TMP0) ++ | sd TMP1, -8(TMP0) ++ | addi TMP0, TMP0, -8 ++ | bne TMP0, BASE, <2 ++ | j ->vm_call_dispatch ++ | ++ |.ffunc xpcall ++ | ld TMP1, L->maxstack ++ | add TMP2, BASE, NARGS8:RC ++ | bxltu TMP1, TMP2, ->fff_fallback ++ | addi NARGS8:TMP0, NARGS8:RC, -16 ++ | ld CARG1, 0(BASE) ++ | ld CARG2, 8(BASE) ++ | lbu TMP1, GL->hookmask ++ | bxltz NARGS8:TMP0, ->fff_fallback ++ | gettp TMP2, CARG2 ++ | addi TMP2, TMP2, -LJ_TFUNC ++ | bxnez TMP2, ->fff_fallback // Traceback must be a function. ++ | mv TMP2, BASE ++ | mv NARGS8:RC, NARGS8:TMP0 ++ | addi BASE, BASE, 24 ++ | // Remember active hook before pcall. ++ | srliw TMP3, TMP3, HOOK_ACTIVE_SHIFT ++ | sd CARG2, 0(TMP2) // Swap function and traceback. ++ | andi TMP3, TMP3, 1 ++ | sd CARG1, 8(TMP2) ++ | addi PC, TMP3, 24+FRAME_PCALL ++ | bnez NARGS8:RC, <1 ++ | j ->vm_call_dispatch ++ | ++ |//-- Coroutine library -------------------------------------------------- ++ | ++ |.macro coroutine_resume_wrap, resume ++ |.if resume ++ |.ffunc_1 coroutine_resume ++ | checktp CARG1, CARG1, -LJ_TTHREAD, ->fff_fallback ++ |.else ++ |.ffunc coroutine_wrap_aux ++ | ld L:CARG1, CFUNC:RB->upvalue[0].gcr ++ | cleartp L:CARG1 ++ |.endif ++ | lbu TMP0, L:CARG1->status ++ | ld TMP1, L:CARG1->cframe ++ | ld CARG2, L:CARG1->top ++ | ld TMP2, L:CARG1->base ++ | addiw CARG4, TMP0, -LUA_YIELD ++ | add CARG3, CARG2, TMP0 ++ | addi TMP3, CARG2, 8 ++ | seqz TMP4, CARG4 ++ | neg TMP4, TMP4 ++ | xor CARG2, CARG2, TMP3 // CARG2 = TMP4 ? CARG2 : TMP3 ++ | and CARG2, CARG2, TMP4 ++ | xor CARG2, TMP3, CARG2 ++ | bxgtz CARG4, ->fff_fallback // st > LUA_YIELD? ++ | xor TMP2, TMP2, CARG3 ++ | or CARG4, TMP2, TMP0 ++ | bxnez TMP1, ->fff_fallback // cframe != 0? ++ | ld TMP0, L:CARG1->maxstack ++ | ld PC, FRAME_PC(BASE) ++ | bxeqz CARG4, ->fff_fallback // base == top && st == 0? ++ | add TMP2, CARG2, NARGS8:RC ++ | sd BASE, L->base ++ | sd PC, SAVE_PC(sp) ++ | bxltu TMP0, TMP2, ->fff_fallback // Stack overflow? ++ |1: ++ |.if resume ++ | addi BASE, BASE, 8 // Keep resumed thread in stack for GC. ++ | addi NARGS8:RC, NARGS8:RC, -8 ++ | addi TMP2, TMP2, -8 ++ |.endif ++ | sd TMP2, L:CARG1->top ++ | sd BASE, L->top ++ | add TMP1, BASE, NARGS8:RC ++ | mv CARG3, CARG2 ++ |2: // Move args to coroutine. ++ | ld TMP0, 0(BASE) ++ | sltu TMP3, BASE, TMP1 ++ | addi BASE, BASE, 8 ++ | beqz TMP3, >3 ++ | sd TMP0, 0(CARG3) ++ | addi CARG3, CARG3, 8 ++ | j <2 ++ |3: ++ | mv L:RA, L:CARG1 ++ | jal ->vm_resume // (lua_State *L, TValue *base, 0, 0) ++ | // Returns thread status. ++ |4: ++ | ld TMP2, L:RA->base ++ | sltiu TMP1, CRET1, LUA_YIELD+1 ++ | ld TMP3, L:RA->top ++ | li_vmstate INTERP ++ | ld BASE, L->base ++ | sd L, GL->cur_L ++ | st_vmstate ++ | sub RD, TMP3, TMP2 ++ | beqz TMP1, >8 ++ | ld TMP0, L->maxstack ++ | add TMP1, BASE, RD ++ | beqz RD, >6 // No results? ++ | add TMP3, TMP2, RD ++ | bltu TMP0, TMP1, >9 // Need to grow stack? ++ | sd TMP2, L:RA->top // Clear coroutine stack. ++ | mv TMP1, BASE ++ |5: // Move results from coroutine. ++ | ld TMP0, 0(TMP2) ++ | addi TMP2, TMP2, 8 ++ | sd TMP0, 0(TMP1) ++ | addi TMP1, TMP1, 8 ++ | bltu TMP2, TMP3, <5 ++ |6: ++ |.if resume ++ | mov_true TMP1 ++ | addi RD, RD, 16 ++ |7: ++ | sd TMP1, -8(BASE) // Prepend true/false to results. ++ | addi RA, BASE, -8 ++ |.else ++ | mv RA, BASE ++ | addi RD, RD, 8 ++ |.endif ++ | andi TMP0, PC, FRAME_TYPE ++ | sd PC, SAVE_PC(sp) ++ | mv MULTRES, RD ++ |// bxeqz TMP0, ->BC_RET_Z // Local label 9 in use ++ | bnez TMP0, >6 ++ | j ->BC_RET_Z ++ |6: ++ | j ->vm_return ++ | ++ |8: // Coroutine returned with error (at co->top-1). ++ |.if resume ++ | addi TMP3, TMP3, -8 ++ | mov_false TMP1 ++ | li RD, (2+1)*8 ++ | ld TMP0, 0(TMP3) ++ | sd TMP3, L:RA->top // Remove error from coroutine stack. ++ | sd TMP0, 0(BASE) // Copy error message. ++ | j <7 ++ |.else ++ | mv CARG1, L ++ | mv CARG2, L:RA ++ | // (lua_State *L, lua_State *co) ++ | call_intern ff_coroutine_wrap_aux, lj_ffh_coroutine_wrap_err ++ |.endif ++ | ++ |9: // Handle stack expansion on return from yield. ++ | mv CARG1, L ++ | srliw CARG2, RD, 3 ++ | // (lua_State *L, int n) ++ |.if resume ++ | call_intern ff_coroutine_resume, lj_state_growstack ++ |.else ++ | call_intern ff_coroutine_wrap_aux, lj_state_growstack ++ |.endif ++ | mv CRET1, x0 ++ | j <4 ++ |.endmacro ++ | ++ | coroutine_resume_wrap 1 // coroutine.resume ++ | coroutine_resume_wrap 0 // coroutine.wrap ++ | ++ |.ffunc coroutine_yield ++ | ld TMP0, L->cframe ++ | add TMP1, BASE, NARGS8:RC ++ | li CRET1, LUA_YIELD ++ | sd BASE, L->base ++ | andi TMP0, TMP0, CFRAME_RESUME ++ | sd TMP1, L->top ++ | bxeqz TMP0, ->fff_fallback ++ | sd x0, L->cframe ++ | sb CRET1, L->status ++ | j ->vm_leave_unw ++ | ++ |//-- Math library ------------------------------------------------------- ++ | ++ |.macro math_round, func, rm ++ |->ff_math_ .. func: ++ | ld CARG1, 0(BASE) ++ | gettp TMP0, CARG1 ++ | bxeqz NARGS8:RC, ->fff_fallback ++ | fmv.d.x FARG1, CARG1 ++ | bxeq TMP0, TISNUM, ->fff_restv ++ | srli TMP1, CARG1, 52 // Extract exponent (and sign). ++ | bxgeu TMP0, TISNUM, ->fff_fallback ++ | andi TMP1, TMP1, 0x7ff // Extract exponent. ++ | slti TMP2, TMP1, 1023 + 52 + 1 // 1023: Bias, 52: Max fraction ++ | bxeqz TMP2, ->fff_resn // Less than 2^52 / Not NaN? ++ | fcvt.l.d TMP3, FARG1, rm ++ | fcvt.d.l FTMP1, TMP3 ++ | fsgnj.d FRET1, FTMP1, FARG1 ++ | j ->fff_resn ++ |.endmacro ++ | ++ | math_round floor, rdn ++ | math_round ceil, rup ++ | ++ |.ffunc_1 math_abs ++ | gettp CARG2, CARG1 ++ | addi TMP2, CARG2, -LJ_TISNUM ++ | sext.w TMP1, CARG1 ++ | bnez TMP2, >1 ++ | sraiw TMP0, TMP1, 31 // Extract sign. int ++ | xor TMP1, TMP1, TMP0 ++ | sub CARG1, TMP1, TMP0 ++ | slli TMP3, CARG1, 32 ++ | settp CARG1, TISNUM ++ | bxgez TMP3, ->fff_restv ++ | lui CARG1, 0x41e00 // 2^31 as a double. ++ | slli CARG1, CARG1, 32 ++ | j ->fff_restv ++ |1: ++ | sltiu TMP2, CARG2, LJ_TISNUM ++ | slli CARG1, CARG1, 1 ++ | srli CARG1, CARG1, 1 ++ | bxeqz TMP2, ->fff_fallback // int ++ |// fallthrough ++ | ++ |->fff_restv: ++ | // CARG1 = TValue result. ++ | ld PC, FRAME_PC(BASE) ++ | sd CARG1, -16(BASE) ++ |->fff_res1: ++ | // RA = results, PC = return. ++ | li RD, (1+1)*8 ++ |->fff_res: ++ | // RA = results, RD = (nresults+1)*8, PC = return. ++ | andi TMP0, PC, FRAME_TYPE ++ | mv MULTRES, RD ++ | addi RA, BASE, -16 ++ | bxnez TMP0, ->vm_return ++ | lw INS, -4(PC) ++ | decode_RB8 RB, INS ++ |5: ++ | bltu RD, RB, >6 // More results expected? ++ | decode_RA8a TMP0, INS ++ | ins_next1 ++ | decode_RA8b TMP0 ++ | // Adjust BASE. KBASE is assumed to be set for the calling frame. ++ | sub BASE, RA, TMP0 ++ | ins_next2 ++ | ++ |6: // Fill up results with nil. ++ | add TMP1, RA, RD ++ | addi RD, RD, 8 ++ | sd TISNIL, -8(TMP1) ++ | j <5 ++ | ++ |.macro math_extern, func ++ | .ffunc_n math_ .. func ++ | call_extern ff_math_extern, func ++ | j ->fff_resn ++ |.endmacro ++ | ++ |.macro math_extern2, func ++ | .ffunc_nn math_ .. func ++ | call_extern ff_math_extern2, func ++ | j ->fff_resn ++ |.endmacro ++ | ++ |.ffunc_n math_sqrt ++ | fsqrt.d FRET1, FARG1 ++ |->fff_resn: ++ | ld PC, FRAME_PC(BASE) ++ | fsd FRET1, -16(BASE) ++ | j ->fff_res1 ++ | ++ |.ffunc math_log ++ | li TMP1, 8 ++ | ld CARG1, 0(BASE) ++ | fld FARG1, 0(BASE) ++ | bxne NARGS8:RC, TMP1, ->fff_fallback // Need exactly 1 argument. ++ | checknum CARG1, ->fff_fallback ++ | call_extern ff_math_log, log ++ | j ->fff_resn ++ | ++ | math_extern log10 ++ | math_extern exp ++ | math_extern sin ++ | math_extern cos ++ | math_extern tan ++ | math_extern asin ++ | math_extern acos ++ | math_extern atan ++ | math_extern sinh ++ | math_extern cosh ++ | math_extern tanh ++ | math_extern2 pow ++ | math_extern2 atan2 ++ | math_extern2 fmod ++ | ++ |.ffunc_2 math_ldexp ++ | checknum CARG1, ->fff_fallback ++ | checkint CARG2, ->fff_fallback ++ | fld FARG1, 0(BASE) ++ | lw CARG1, 8(BASE) ++ | call_extern ff_math_ldexp, ldexp // (double x, int exp) ++ | j ->fff_resn ++ | ++ |.ffunc_n math_frexp ++ | ld PC, FRAME_PC(BASE) ++ | addi CARG1, GL, offsetof(global_State, tmptv) ++ | call_extern ff_math_frexp, frexp ++ | lw TMP1, GL->tmptv ++ | fcvt.d.w FARG2, TMP1 ++ | fsd FRET1, -16(BASE) ++ | fsd FARG2, -8(BASE) ++ | li RD, (2+1)*8 ++ | j ->fff_res ++ | ++ |.ffunc_n math_modf ++ | addi CARG1, BASE, -16 ++ | ld PC, FRAME_PC(BASE) ++ | call_extern ff_math_modf, modf ++ | fsd FRET1, -8(BASE) ++ | li RD, (2+1)*8 ++ | j ->fff_res ++ | ++ |.macro math_minmax, name, ismax ++ | .ffunc_1 name ++ | add RB, BASE, NARGS8:RC ++ | addi RA, BASE, 8 ++ | checkint CARG1, >4 ++ |1: // Handle integers. ++ | ld CARG2, 0(RA) ++ | bxeq RA, RB, ->fff_restv ++ | sext.w CARG1, CARG1 ++ | checkint CARG2, >3 ++ | sext.w CARG2, CARG2 ++ | slt TMP0, CARG1, CARG2 ++ |.if ismax ++ | addi TMP1, TMP0, -1 ++ |.else ++ | neg TMP1, TMP0 ++ |.endif ++ | xor TMP2, CARG1, CARG2 // CARG1 = TMP1 ? CARG1 : CARG2 ++ | and TMP2, TMP2, TMP1 ++ | xor CARG1, CARG2, TMP2 ++ | addi RA, RA, 8 ++ | zext.w CARG1, CARG1 ++ | settp_b CARG1, TISNUM ++ | j <1 ++ |3: // Convert intermediate result to number and continue below. ++ | fcvt.d.w FARG1, CARG1 ++ | checknum CARG2, ->fff_fallback ++ | fld FARG2, 0(RA) ++ | j >6 ++ | ++ |4: ++ | fld FARG1, 0(BASE) ++ | checknum CARG1, ->fff_fallback ++ |5: // Handle numbers. ++ | ld CARG2, 0(RA) ++ | fld FARG2, 0(RA) ++ | bxgeu RA, RB, ->fff_resn ++ | checknum CARG2, >7 ++ |6: ++ |.if ismax ++ | flt.d TMP0, FARG2, FARG1 ++ |.else // min ++ | flt.d TMP0, FARG1, FARG2 ++ |.endif ++ | bnez TMP0, >8 // skip swap ++ | fmv.d FARG1, FARG2 ++ |8: ++ | addi RA, RA, 8 ++ | j <5 ++ |7: // Convert integer to number and continue above. ++ | checkint CARG2, ->fff_fallback ++ | fcvt.d.w FARG2, CARG2 ++ | j <6 ++ |.endmacro ++ | ++ | math_minmax math_min, 0 ++ | math_minmax math_max, 1 ++ | ++ |//-- String library ----------------------------------------------------- ++ | ++ |.ffunc string_byte // Only handle the 1-arg case here. ++ | ld CARG1, 0(BASE) ++ | gettp TMP0, CARG1 ++ | xori TMP1, NARGS8:RC, 8 ++ | addi TMP0, TMP0, -LJ_TSTR ++ | or TMP1, TMP1, TMP0 ++ | cleartp STR:CARG1 ++ | bxnez TMP1, ->fff_fallback // Need exactly 1 string argument. ++ | lw TMP0, STR:CARG1->len ++ | ld PC, FRAME_PC(BASE) ++ | snez RD, TMP0 ++ | lbu TMP2, STR:CARG1[1] // Access is always ok (NUL at end). ++ | addiw RD, RD, 1 ++ | slliw RD, RD, 3 // RD = ((str->len != 0)+1)*8 ++ | settp_b TMP2, TISNUM ++ | sd TMP2, -16(BASE) ++ | j ->fff_res ++ | ++ |.ffunc string_char // Only handle the 1-arg case here. ++ | ffgccheck ++ | ld CARG1, 0(BASE) ++ | gettp TMP0, CARG1 ++ | xori TMP1, NARGS8:RC, 8 // Need exactly 1 argument. ++ | addi TMP0, TMP0, -LJ_TISNUM // Integer. ++ | li TMP2, 255 ++ | sext.w CARG1, CARG1 ++ | or TMP1, TMP1, TMP0 ++ | sltu TMP2, TMP2, CARG1 // !(255 < n). ++ | or TMP1, TMP1, TMP2 ++ | li CARG3, 1 ++ | bxnez TMP1, ->fff_fallback ++ | addi CARG2, sp, TMPD_OFS ++ | sb CARG1, TMPD(sp) ++ |->fff_newstr: ++ | sd BASE, L->base ++ | sd PC, SAVE_PC(sp) ++ | mv CARG1, L ++ | // (lua_State *L, const char *str, size_t l) ++ | call_intern fff_newstr, lj_str_new ++ | // Returns GCstr *. ++ | ld BASE, L->base ++ |->fff_resstr: ++ | li TMP1, LJ_TSTR ++ | settp CRET1, TMP1 ++ | j ->fff_restv ++ | ++ |.ffunc string_sub ++ | ffgccheck ++ | ld CARG1, 0(BASE) ++ | ld CARG2, 8(BASE) ++ | ld CARG3, 16(BASE) ++ | addi TMP0, NARGS8:RC, -16 ++ | gettp TMP1, CARG1 ++ | bxltz TMP0, ->fff_fallback ++ | cleartp STR:CARG1, CARG1 ++ | li CARG4, -1 ++ | beqz TMP0, >1 ++ | sext.w CARG4, CARG3 ++ | checkint CARG3, ->fff_fallback ++ |1: ++ | checkint CARG2, ->fff_fallback ++ | addi TMP0, TMP1, -LJ_TSTR ++ | sext.w CARG3, CARG2 ++ | bxnez TMP0, ->fff_fallback ++ | lw CARG2, STR:CARG1->len ++ | // STR:CARG1 = str, CARG2 = str->len, CARG3 = start, CARG4 = end ++ | addiw TMP0, CARG2, 1 ++ | bgez CARG4, >2 ++ | addw CARG4, CARG4, TMP0 // if (end < 0) end += len+1 ++ |2: ++ | bgez CARG3, >3 ++ | addw CARG3, CARG3, TMP0 // if (start < 0) start += len+1 ++ |3: ++ | bgez CARG4, >4 ++ | mv CARG4, x0 // if (end < 0) end = 0 ++ |4: ++ | bgtz CARG3, >5 ++ | li CARG3, 1 // if (start < 1) start = 1 ++ |5: ++ | ble CARG4, CARG2, >6 ++ | mv CARG4, CARG2 // if (end > len) end = len ++ |6: ++ | add CARG2, STR:CARG1, CARG3 ++ | sub CARG3, CARG4, CARG3 // len = end - start ++ | addi CARG2, CARG2, sizeof(GCstr)-1 ++ | addiw CARG3, CARG3, 1 // len += 1 ++ | bxgez CARG3, ->fff_newstr ++ |->fff_emptystr: // Return empty string. ++ | li TMP1, LJ_TSTR ++ | addi STR:CARG1, GL, offsetof(global_State, strempty) ++ | settp CARG1, TMP1 ++ | j ->fff_restv ++ | ++ |.macro ffstring_op, name ++ | .ffunc string_ .. name ++ | ffgccheck ++ | ld CARG2, 0(BASE) ++ | bxeqz NARGS8:RC, ->fff_fallback ++ | checkstr STR:CARG2, ->fff_fallback ++ | addi SBUF:CARG1, GL, offsetof(global_State, tmpbuf) ++ | ld TMP0, SBUF:CARG1->b ++ | sd L, SBUF:CARG1->L ++ | sd BASE, L->base ++ | sd TMP0, SBUF:CARG1->w ++ | sd PC, SAVE_PC(sp) ++ | call_intern ff_string_ .. name, lj_buf_putstr_ .. name ++ | call_intern ff_string_ .. name, lj_buf_tostr // CARG1 = CRET1 ++ | ld BASE, L->base ++ | j ->fff_resstr ++ |.endmacro ++ | ++ |ffstring_op reverse ++ |ffstring_op lower ++ |ffstring_op upper ++ | ++ |//-- Bit library -------------------------------------------------------- ++ | ++ |->vm_tobit_fb: ++ | fld FARG1, 0(BASE) ++ | bxeqz TMP1, ->fff_fallback ++ | fadd.d FARG1, FARG1, TOBIT ++ | fmv.x.w CRET1, FARG1 ++ | zext.w CRET1, CRET1 ++ | ret ++ | ++ |.macro .ffunc_bit, name ++ | .ffunc_1 bit_..name ++ | gettp TMP0, CARG1 ++ | zext.w CRET1, CARG1 ++ | beq TMP0, TISNUM, >1 ++ | sltiu TMP1, TMP0, LJ_TISNUM ++ | jal ->vm_tobit_fb ++ |1: ++ |.endmacro ++ | ++ |.macro .ffunc_bit_op, name, bins ++ | .ffunc_bit name ++ | addi TMP2, BASE, 8 ++ | add TMP3, BASE, NARGS8:RC ++ |1: ++ | ld TMP1, 0(TMP2) ++ | bxeq TMP2, TMP3, ->fff_resi ++ | gettp TMP0, TMP1 ++ | addi TMP2, TMP2, 8 ++ | bne TMP0, TISNUM, >2 ++ | zext.w TMP1, TMP1 ++ | bins CRET1, CRET1, TMP1 ++ | j <1 ++ |2: ++ | fld FARG1, -8(TMP2) ++ | sltiu TMP0, TMP0, LJ_TISNUM ++ | fadd.d FARG1, FARG1, TOBIT ++ | bxeqz TMP0, ->fff_fallback ++ | fmv.x.w TMP1, FARG1 ++ | zext.w TMP1, TMP1 ++ | bins CRET1, CRET1, TMP1 ++ | j <1 ++ |.endmacro ++ | ++ |.ffunc_bit_op band, and ++ |.ffunc_bit_op bor, or ++ |.ffunc_bit_op bxor, xor ++ | ++ |.ffunc_bit bswap ++ | srliw CARG2, CARG1, 8 ++ | lui CARG3, 16 ++ | addiw CARG3, CARG3, -256 ++ | and CARG2, CARG2, CARG3 ++ | srliw CARG3, CARG1, 24 ++ | or CARG2, CARG2, CARG3 ++ | slli CARG3, CARG1, 8 ++ | lui CARG4, 0x00ff0 ++ | and CARG3, CARG3, CARG4 ++ | slli CARG1, CARG1, 24 ++ | or CARG1, CARG1, CARG3 ++ | or CARG1, CARG1, CARG2 ++ | slli CARG1, CARG1, 32 ++ | srli CARG1, CARG1, 32 ++ | j ->fff_resi ++ | ++ |.ffunc_bit tobit ++ |->fff_resi: ++ | settp CARG1, TISNUM // CARG1 = CRET1 ++ | j ->fff_restv ++ | ++ |.ffunc_bit bnot ++ | not CRET1, CRET1 ++ | zext.w CRET1, CRET1 ++ | j ->fff_resi ++ | ++ |.macro .ffunc_bit_sh, name, shins ++ | .ffunc_2 bit_..name ++ | gettp TMP0, CARG1 ++ | beq TMP0, TISNUM, >1 ++ | sltiu TMP1, TMP0, LJ_TISNUM ++ | jal ->vm_tobit_fb ++ |// mv CARG1, CRET1 // CARG1 = CRET1 ++ |1: ++ | gettp TMP0, CARG2 ++ | zext.w CARG2, CARG2 ++ | bxne TMP0, TISNUM, ->fff_fallback ++ | sext.w CARG1, CARG1 ++ | shins CRET1, CARG1, CARG2 ++ | zext.w CRET1, CRET1 ++ | j ->fff_resi ++ |.endmacro ++ | ++ |.ffunc_bit_sh lshift, sllw ++ |.ffunc_bit_sh rshift, srlw ++ |.ffunc_bit_sh arshift, sraw ++ | ++ |.macro .ffunc_bit_rot, name, rotinsa, rotinsb ++ | .ffunc_2 bit_..name ++ | gettp TMP0, CARG1 ++ | beq TMP0, TISNUM, >1 ++ | sltiu TMP1, TMP0, LJ_TISNUM ++ | jal ->vm_tobit_fb ++ |// mv CARG1, CRET1 // CARG1 = CRET1 ++ |1: ++ | gettp TMP0, CARG2 ++ | zext.w CARG2, CARG2 ++ | bxne TMP0, TISNUM, ->fff_fallback ++ | sext.w CARG1, CARG1 ++ | neg TMP2, CARG2 ++ | rotinsa TMP1, CARG1, CARG2 ++ | rotinsb TMP0, CARG1, TMP2 ++ | or CRET1, TMP0, TMP1 ++ | zext.w CRET1, CRET1 ++ | j ->fff_resi ++ |.endmacro ++ | ++ |.ffunc_bit_rot rol, sllw, srlw ++ |.ffunc_bit_rot ror, srlw, sllw ++ | ++ |//----------------------------------------------------------------------- ++ | ++ |->fff_fallback: // Call fast function fallback handler. ++ | // BASE = new base, RB = CFUNC, RC = nargs*8 ++ | ld PC, FRAME_PC(BASE) // Fallback may overwrite PC. ++ | ld CARG3, CFUNC:RB->f ++ | add TMP1, BASE, NARGS8:RC ++ | sd BASE, L->base ++ | addi TMP0, TMP1, 8*LUA_MINSTACK ++ | ld TMP2, L->maxstack ++ | sd PC, SAVE_PC(sp) // Redundant (but a defined value). ++ | sd TMP1, L->top ++ | mv CARG1, L ++ | bltu TMP2, TMP0, >5 // Need to grow stack. ++ | jalr CARG3 // (lua_State *L) ++ | // Either throws an error, or recovers and returns -1, 0 or nresults+1. ++ | ld BASE, L->base ++ | slliw RD, CRET1, 3 ++ | bxgtz CRET1, ->fff_res // Returned nresults+1? ++ |1: // Returned 0 or -1: retry fast path. ++ | ld LFUNC:RB, FRAME_FUNC(BASE) ++ | ld TMP0, L->top ++ | sub NARGS8:RC, TMP0, BASE ++ | cleartp LFUNC:RB ++ | bxnez CRET1, ->vm_call_tail // Returned -1? ++ | ins_callt // Returned 0: retry fast path. ++ | ++ |// Reconstruct previous base for vmeta_call during tailcall. ++ |->vm_call_tail: ++ | andi TMP0, PC, FRAME_TYPE ++ | andi TMP1, PC, ~FRAME_TYPEP // TODO ++ | bnez TMP0, >3 ++ | lbu TMP1, OFS_RA(PC) ++ | slliw TMP1, TMP1, 3 ++ | addiw TMP1, TMP1, 16 ++ |3: ++ | sub TMP2, BASE, TMP1 ++ | j ->vm_call_dispatch // Resolve again for tailcall. ++ | ++ |5: // Grow stack for fallback handler. ++ | li CARG2, LUA_MINSTACK ++ | mv CARG1, L ++ | call_intern vm_call_tail, lj_state_growstack // (lua_State *L, int n) ++ | ld BASE, L->base ++ | mv CRET1, x0 // Set zero-flag to force retry. ++ | j <1 ++ | ++ |->fff_gcstep: // Call GC step function. ++ | // BASE = new base, RC = nargs*8 ++ | mv MULTRES, ra ++ | add TMP0, BASE, NARGS8:RC // Calculate L->top. ++ | sd BASE, L->base ++ | sd PC, SAVE_PC(sp) // Redundant (but a defined value). ++ | mv CARG1, L ++ | sd TMP0, L->top ++ | call_intern fff_gc_step, lj_gc_step // (lua_State *L) ++ | ld BASE, L->base ++ | mv ra, MULTRES // Help return address predictor. ++ | ld TMP0, L->top ++ | ld CFUNC:RB, FRAME_FUNC(BASE) ++ | cleartp CFUNC:RB ++ | sub NARGS8:RC, TMP0, BASE ++ | ret ++ | ++ |//----------------------------------------------------------------------- ++ |//-- Special dispatch targets ------------------------------------------- ++ |//----------------------------------------------------------------------- ++ | ++ |->vm_record: // Dispatch target for recording phase. ++ | ++ |->vm_rethook: // Dispatch target for return hooks. ++ | lbu TMP3, GL->hookmask ++ | andi TMP1, TMP3, HOOK_ACTIVE // Hook already active? ++ | beqz TMP1, >1 ++ |5: // Re-dispatch to static ins. ++ | ld TMP1, GG_DISP2STATIC(TMP0) // Assumes TMP0 holds DISPATCH+OP*4. ++ | jr TMP1 ++ | ++ |->vm_inshook: // Dispatch target for instr/line hooks. ++ | lbu TMP3, GL->hookmask ++ | lw TMP2, GL->hookcount ++ | andi TMP1, TMP3, HOOK_ACTIVE // Hook already active? ++ | bnez TMP1, <5 ++ | andi TMP1, TMP3, LUA_MASKLINE|LUA_MASKCOUNT ++ | addiw TMP2, TMP2, -1 ++ | beqz TMP1, <5 ++ | sw TMP2, GL->hookcount ++ | beqz TMP2, >1 ++ | andi TMP1, TMP3, LUA_MASKLINE ++ | beqz TMP1, <5 ++ |1: ++ | sw MULTRES, TMPD(sp) ++ | mv CARG2, PC ++ | sd BASE, L->base ++ | mv CARG1, L ++ | // SAVE_PC must hold the _previous_ PC. The callee updates it with PC. ++ | call_intern vm_inshook, lj_dispatch_ins // (lua_State *L, const BCIns *pc) ++ |3: ++ | ld BASE, L->base ++ |4: // Re-dispatch to static ins. ++ | lw INS, -4(PC) ++ | decode_OP8 TMP1, INS ++ | add TMP0, DISPATCH, TMP1 ++ | decode_RD8a RD, INS ++ | ld TMP1, GG_DISP2STATIC(TMP0) ++ | decode_RA8 RA, INS ++ | decode_RD8b RD ++ | jr TMP1 ++ | ++ |->cont_hook: // Continue from hook yield. ++ | addi PC, PC, 4 ++ | lw MULTRES, -24(RB) // Restore MULTRES for *M ins. ++ | j <4 ++ | ++ | ++ |->vm_callhook: // Dispatch target for call hooks. ++ | mv CARG2, PC ++ | ++ |->cont_stitch: // Trace stitching. ++ | ++ |->vm_profhook: // Dispatch target for profiler hook. ++#if LJ_HASPROFILE ++ | mv CARG1, L ++ | mv CARG2, PC ++ | sd BASE, L->base ++ | sw MULTRES, TMPD(sp) ++ | // (lua_State *L, const BCIns *pc) ++ | call_intern vm_profhook, lj_dispatch_profile ++ | // HOOK_PROFILE is off again, so re-dispatch to dynamic instruction. ++ | addi PC, PC, -4 ++ | ld BASE, L->base ++ | j ->cont_nop ++#endif ++ | ++ |//----------------------------------------------------------------------- ++ |//-- Math helper functions ---------------------------------------------- ++ |//----------------------------------------------------------------------- ++ | ++ | ++ |// Hard-float round to integer. ++ |// Modifies TMP0, FARG1, FARG5 ++ |.macro vm_round, rm ++ | fmv.x.d TMP0, FARG1 ++ | srli TMP0, TMP0, 52 // Extract exponent (and sign). ++ | andi TMP0, TMP0, 0x7ff // Extract exponent. ++ | addi TMP0, TMP0, -1075 ++ | bgtz TMP0, >1 // Less than 2^52 / Not NaN? ++ | fcvt.l.d TMP0, FARG1, rm ++ | fcvt.d.l FARG5, TMP0 ++ | fsgnj.d FRET1, FARG5, FARG1 ++ |1: ++ | ret ++ |.endmacro ++ | ++ | ++ |->vm_floor: ++ | vm_round rdn ++ |->vm_ceil: ++ | vm_round rup ++ | ++ | ++ |//----------------------------------------------------------------------- ++ |//-- Miscellaneous functions -------------------------------------------- ++ |//----------------------------------------------------------------------- ++ | ++ |// void lj_vm_fence_rw_rw() ++ |->vm_fence_rw_rw: ++ |.if JIT or FFI ++ | .long 0x0330000f ++ | ret ++ |.endif ++ | ++ |//----------------------------------------------------------------------- ++} ++ ++/* Generate the code for a single instruction. */ ++static void build_ins(BuildCtx *ctx, BCOp op, int defop) ++{ ++ int vk = 0; ++ |=>defop: ++ ++ switch (op) { ++ ++ /* -- Comparison ops ---------------------------------------------------- */ ++ ++ /* Remember: all ops branch for a true comparison, fall through otherwise. */ ++ ++ case BC_ISLT: case BC_ISGE: case BC_ISLE: case BC_ISGT: ++ | // RA = src1*8, RD = src2*8, JMP with RD = target ++ | add RA, BASE, RA ++ | add RD, BASE, RD ++ if (op == BC_ISLT || op == BC_ISGE) { ++ | ld CARG1, 0(RA) ++ | ld CARG2, 0(RD) ++ | gettp CARG3, CARG1 ++ | gettp CARG4, CARG2 ++ } else { ++ | ld CARG2, 0(RA) ++ | ld CARG1, 0(RD) ++ | gettp CARG3, CARG2 ++ | gettp CARG4, CARG1 ++ } ++ | lhu TMP2, OFS_RD(PC) // TMP2=jump ++ | addi PC, PC, 4 ++ | bne CARG3, TISNUM, >2 ++ | decode_BC4b TMP2 ++ | bne CARG4, TISNUM, >5 ++ | sext.w CARG1, CARG1 ++ | sext.w CARG2, CARG2 ++ | lui TMP3, (-(BCBIAS_J*4 >> 12)) & 0xfffff // -BCBIAS_J*4 ++ | slt TMP1, CARG1, CARG2 ++ | addw TMP2, TMP2, TMP3 // TMP2=(jump-0x8000)<<2 ++ if (op == BC_ISLT || op == BC_ISGT) { ++ | neg TMP1, TMP1 ++ } else { ++ | addi TMP1, TMP1, -1 ++ } ++ | and TMP2, TMP2, TMP1 ++ |1: ++ | add PC, PC, TMP2 ++ | ins_next ++ | ++ |2: // RA is not an integer. ++ | sltiu TMP1, CARG3, LJ_TISNUM ++ | lui TMP3, (-(BCBIAS_J*4 >> 12)) & 0xfffff // -BCBIAS_J*4 ++ | bxeqz TMP1, ->vmeta_comp ++ | sltiu TMP1, CARG4, LJ_TISNUM ++ | decode_BC4b TMP2 ++ | beqz TMP1, >4 ++ | fmv.d.x FTMP0, CARG1 ++ | fmv.d.x FTMP2, CARG2 ++ |3: // RA and RD are both numbers. ++ | addw TMP2, TMP2, TMP3 ++ if (op == BC_ISLT) { ++ | flt.d TMP3, FTMP0, FTMP2 ++ | neg TMP3, TMP3 ++ } else if (op == BC_ISGE) { ++ | flt.d TMP3, FTMP0, FTMP2 ++ | addi TMP3, TMP3, -1 ++ } else if (op == BC_ISLE) { ++ | fle.d TMP3, FTMP2, FTMP0 ++ | neg TMP3, TMP3 ++ } else if (op == BC_ISGT) { ++ | fle.d TMP3, FTMP2, FTMP0 ++ | addi TMP3, TMP3, -1 ++ } ++ | and TMP2, TMP2, TMP3 ++ | j <1 ++ | ++ |4: // RA is a number, RD is not a number. ++ | // RA is a number, RD is an integer. Convert RD to a number. ++ | bxne CARG4, TISNUM, ->vmeta_comp ++ if (op == BC_ISLT || op == BC_ISGE) { ++ | fcvt.d.w FTMP2, CARG2 ++ | fmv.d.x FTMP0, CARG1 ++ } else { ++ | fcvt.d.w FTMP0, CARG1 ++ | fmv.d.x FTMP2, CARG2 ++ } ++ | j <3 ++ | ++ |5: // RA is an integer, RD is not an integer ++ | sltiu TMP1, CARG4, LJ_TISNUM ++ | lui TMP3, (-(BCBIAS_J*4 >> 12)) & 0xfffff // -BCBIAS_J*4 ++ | bxeqz TMP1, ->vmeta_comp ++ | // RA is an integer, RD is a number. Convert RA to a number. ++ if (op == BC_ISLT || op == BC_ISGE) { ++ | fcvt.d.w FTMP0, CARG1 ++ | fmv.d.x FTMP2, CARG2 ++ } else { ++ | fcvt.d.w FTMP2, CARG2 ++ | fmv.d.x FTMP0, CARG1 ++ } ++ | j <3 ++ break; ++ ++ case BC_ISEQV: case BC_ISNEV: ++ vk = op == BC_ISEQV; ++ | // RA = src1*8, RD = src2*8, JMP with RD = target ++ | add RA, BASE, RA ++ | add RD, BASE, RD ++ | addi PC, PC, 4 ++ | ld CARG1, 0(RA) ++ | ld CARG2, 0(RD) ++ | lhu TMP2, -4+OFS_RD(PC) ++ | gettp CARG3, CARG1 ++ | gettp CARG4, CARG2 ++ | sltu TMP0, TISNUM, CARG3 ++ | sltu TMP1, TISNUM, CARG4 ++ | or TMP0, TMP0, TMP1 ++ | lui TMP3, (-(BCBIAS_J*4 >> 12)) & 0xfffff // -BCBIAS_J*4 ++ if (vk) { ++ | beqz TMP0, ->BC_ISEQN_Z ++ } else { ++ | beqz TMP0, ->BC_ISNEN_Z ++ } ++ |// Either or both types are not numbers. ++ | lui TMP3, (-(BCBIAS_J*4 >> 12)) & 0xfffff // -BCBIAS_J*4 ++ | decode_BC4b TMP2 ++ | addw TMP2, TMP2, TMP3 // (jump-0x8000)<<2 ++ | bne CARG1, CARG2, >2 ++ | // Tag and value are equal. ++ if (vk) { ++ |->BC_ISEQV_Z: ++ | add PC, PC, TMP2 ++ } ++ |1: ++ | ins_next ++ | ++ |2: // Check if the tags are the same and it's a table or userdata. ++ | xor TMP3, CARG3, CARG4 // Same type? ++ | sltiu TMP0, CARG3, LJ_TISTABUD+1 // Table or userdata? TMP0=1 ++ | beqz TMP3, >3 ++ | mv TMP0, x0 // TMP0=0: not same type, or same type table/userdata ++ |3: ++ | cleartp TAB:TMP1, CARG1 ++ if (vk) { ++ | beqz TMP0, <1 ++ } else { ++ | beqz TMP0, ->BC_ISEQV_Z // Reuse code from opposite instruction. ++ } ++ | // Different tables or userdatas. Need to check __eq metamethod. ++ | // Field metatable must be at same offset for GCtab and GCudata! ++ | ld TAB:TMP3, TAB:TMP1->metatable ++ if (vk) { ++ | beqz TAB:TMP3, <1 // No metatable? ++ | lbu TMP3, TAB:TMP3->nomm ++ | andi TMP3, TMP3, 1<BC_ISEQV_Z // No metatable? ++ | lbu TMP3, TAB:TMP3->nomm ++ | andi TMP3, TMP3, 1<BC_ISEQV_Z // Or 'no __eq' flag set? ++ } ++ | j ->vmeta_equal // Handle __eq metamethod. ++ break; ++ ++ case BC_ISEQS: case BC_ISNES: ++ vk = op == BC_ISEQS; ++ | // RA = src*8, RD = str_const*8 (~), JMP with RD = target ++ | add RA, BASE, RA ++ | addi PC, PC, 4 ++ | ld CARG1, 0(RA) ++ | sub RD, KBASE, RD ++ | lhu TMP2, -4+OFS_RD(PC) ++ | ld CARG2, -8(RD) // KBASE-8-str_const*8 ++ | li TMP0, LJ_TSTR ++ | decode_BC4b TMP2 ++ | settp CARG2, TMP0 ++ | lui TMP3, (-(BCBIAS_J*4 >> 12)) & 0xfffff // -BCBIAS_J*4 ++ | xor TMP0, CARG1, CARG2 // TMP2=0: A==D; TMP2!=0: A!=D ++ | addw TMP2, TMP2, TMP3 ++ if (vk) { ++ | seqz TMP4, TMP0 ++ } else { ++ | snez TMP4, TMP0 ++ } ++ | neg TMP4, TMP4 ++ | and TMP2, TMP2, TMP4 ++ | add PC, PC, TMP2 ++ | ins_next ++ break; ++ ++ case BC_ISEQN: case BC_ISNEN: ++ vk = op == BC_ISEQN; ++ | // RA = src*8, RD = num_const*8, JMP with RD = target ++ | add RA, BASE, RA ++ | add RD, KBASE, RD ++ | ld CARG1, 0(RA) ++ | ld CARG2, 0(RD) ++ | lhu TMP2, OFS_RD(PC) ++ | gettp CARG3, CARG1 ++ | gettp CARG4, CARG2 ++ | addi PC, PC, 4 ++ | lui TMP3, (-(BCBIAS_J*4 >> 12)) & 0xfffff // -BCBIAS_J*4 ++ if (vk) { ++ |->BC_ISEQN_Z: ++ } else { ++ |->BC_ISNEN_Z: ++ } ++ | decode_BC4b TMP2 ++ | bne CARG3, TISNUM, >4 ++ | addw TMP2, TMP2, TMP3 ++ | bne CARG4, TISNUM, >6 ++ | xor TMP0, CARG1, CARG2 // TMP0=0: A==D; TMP0!=0: A!=D ++ |1: ++ if (vk) { ++ | seqz TMP4, TMP0 ++ | neg TMP4, TMP4 ++ | and TMP2, TMP2, TMP4 ++ | add PC, PC, TMP2 ++ |2: ++ } else { ++ | snez TMP4, TMP0 ++ | neg TMP4, TMP4 ++ | and TMP2, TMP2, TMP4 ++ |2: ++ | add PC, PC, TMP2 ++ } ++ |3: ++ | ins_next ++ | ++ |4: // RA is not an integer. ++ | addw TMP2, TMP2, TMP3 ++ | bgeu CARG3, TISNUM, <2 ++ | fmv.d.x FTMP0, CARG1 ++ | fmv.d.x FTMP2, CARG2 ++ | bne CARG4, TISNUM, >5 ++ |// RA is a number, RD is an integer. ++ | fcvt.d.w FTMP2, CARG2 ++ | ++ |5: // RA and RD are both numbers. ++ | feq.d TMP0, FTMP0, FTMP2 ++ | seqz TMP0, TMP0 ++ | j <1 ++ | ++ |6: // RA is an integer, RD is a number. ++ | bgeu CARG4, TISNUM, <2 ++ | fcvt.d.w FTMP0, CARG1 ++ | fmv.d.x FTMP2, CARG2 ++ | j <5 ++ | ++ break; ++ ++ case BC_ISEQP: case BC_ISNEP: ++ vk = op == BC_ISEQP; ++ | // RA = src*8, RD = primitive_type*8 (~), JMP with RD = target ++ | add RA, BASE, RA ++ | srliw TMP0, RD, 3 ++ | ld TMP1, 0(RA) ++ | not TMP0, TMP0 // ~TMP0: ~0 ~1 ~2 ++ | lhu TMP2, OFS_RD(PC) // TMP2: RD in next INS, branch target ++ | gettp TMP1, TMP1 ++ | addi PC, PC, 4 ++ | xor TMP0, TMP1, TMP0 // TMP0=0 A=D; TMP0!=0 A!=D ++ | decode_BC4b TMP2 ++ | lui TMP3, (-(BCBIAS_J*4 >> 12)) & 0xfffff // -BCBIAS_J*4 ++ | addw TMP2, TMP2, TMP3 // TMP2=(jump-0x8000)<<2 ++ if (vk) { ++ | seqz TMP4, TMP0 ++ } else { ++ | snez TMP4, TMP0 ++ } ++ | neg TMP4, TMP4 ++ | and TMP2, TMP2, TMP4 ++ | add PC, PC, TMP2 ++ | ins_next ++ break; ++ ++ /* -- Unary test and copy ops ------------------------------------------- */ ++ ++ case BC_ISTC: case BC_ISFC: case BC_IST: case BC_ISF: ++ | // RA = dst*8 or unused, RD = src*8, JMP with RD = target ++ | add RD, BASE, RD ++ | lhu TMP2, OFS_RD(PC) ++ | ld TMP0, 0(RD) ++ | addi PC, PC, 4 ++ | gettp TMP0, TMP0 ++ | add RA, BASE, RA ++ | sltiu TMP0, TMP0, LJ_TISTRUECOND // TMP0=1 true; TMP0=0 false ++ | decode_BC4b TMP2 ++ | lui TMP3, (-(BCBIAS_J*4 >> 12)) & 0xfffff // -BCBIAS_J*4 ++ | ld CRET1, 0(RD) ++ | addw TMP2, TMP2, TMP3 // (jump-0x8000)<<2 ++ if (op == BC_IST || op == BC_ISTC) { ++ | beqz TMP0, >1 ++ if (op == BC_ISTC) { ++ | sd CRET1, 0(RA) ++ } ++ } else { ++ | bnez TMP0, >1 ++ if (op == BC_ISFC) { ++ | sd CRET1, 0(RA) ++ } ++ } ++ | add PC, PC, TMP2 ++ |1: ++ | ins_next ++ break; ++ ++ case BC_ISTYPE: ++ | // RA = src*8, RD = -type*8 ++ | add TMP0, BASE, RA ++ | srliw TMP1, RD, 3 ++ | ld TMP0, 0(TMP0) ++ | gettp TMP0, TMP0 ++ | add TMP0, TMP0, TMP1 // if itype of RA == type, then TMP0=0 ++ | bxnez TMP0, ->vmeta_istype ++ | ins_next ++ break; ++ case BC_ISNUM: ++ | // RA = src*8, RD = -(TISNUM-1)*8 ++ | add TMP0, BASE, RA ++ | ld TMP0, 0(TMP0) ++ | checknum TMP0, ->vmeta_istype ++ | ins_next ++ break; ++ ++ /* -- Unary ops --------------------------------------------------------- */ ++ ++ case BC_MOV: ++ | // RA = dst*8, RD = src*8 ++ | add RD, BASE, RD ++ | add RA, BASE, RA ++ | ld TMP0, 0(RD) ++ | ins_next1 ++ | sd TMP0, 0(RA) ++ | ins_next2 ++ break; ++ case BC_NOT: ++ | // RA = dst*8, RD = src*8 ++ | add RD, BASE, RD ++ | add RA, BASE, RA ++ | ld TMP0, 0(RD) ++ | li TMP1, LJ_TTRUE ++ | ins_next1 ++ | gettp TMP0, TMP0 ++ | sltu TMP0, TMP1, TMP0 ++ | addiw TMP0, TMP0, 1 ++ | slli TMP0, TMP0, 47 ++ | not TMP0, TMP0 ++ | sd TMP0, 0(RA) ++ | ins_next2 ++ break; ++ case BC_UNM: ++ | // RA = dst*8, RD = src*8 ++ | add RB, BASE, RD ++ | add RA, BASE, RA ++ | ld TMP0, 0(RB) ++ | lui TMP1, 0x80000 ++ | gettp CARG3, TMP0 ++ | bne CARG3, TISNUM, >1 ++ | negw TMP0, TMP0 ++ | bxeq TMP0, TMP1, ->vmeta_unm // Meta handler deals with -2^31. ++ | zext.w TMP0, TMP0 ++ | settp_b TMP0, TISNUM ++ | j >2 ++ |1: ++ | sltiu TMP3, CARG3, LJ_TISNUM ++ | slli TMP1, TMP1, 32 ++ | bxeqz TMP3, ->vmeta_unm ++ | xor TMP0, TMP0, TMP1 // sign => ~sign ++ |2: ++ | sd TMP0, 0(RA) ++ | ins_next ++ break; ++ case BC_LEN: ++ | // RA = dst*8, RD = src*8 ++ | add CARG2, BASE, RD ++ | ld TMP0, 0(CARG2) ++ | add RA, BASE, RA ++ | gettp TMP1, TMP0 ++ | addi TMP2, TMP1, -LJ_TSTR ++ | cleartp STR:CARG1, TMP0 ++ | bnez TMP2, >2 ++ | lwu CARG1, STR:CARG1->len ++ |1: ++ | settp_b CARG1, TISNUM ++ | sd CARG1, 0(RA) ++ | ins_next ++ |2: ++ | addi TMP2, TMP1, -LJ_TTAB ++ | bxnez TMP2, ->vmeta_len ++#if LJ_52 ++ | ld TAB:TMP2, TAB:CARG1->metatable ++ | bnez TAB:TMP2, >9 ++ |3: ++#endif ++ |->BC_LEN_Z: ++ | call_intern BC_LEN, lj_tab_len // (GCtab *t) ++ | // Returns uint32_t (but less than 2^31). ++ | j <1 ++#if LJ_52 ++ |9: ++ | lbu TMP0, TAB:TMP2->nomm ++ | andi TMP0, TMP0, 1<vmeta_len ++#endif ++ break; ++ ++ /* -- Binary ops -------------------------------------------------------- */ ++ ++ |.macro fpmod, a, b, c ++ | fdiv.d FARG1, b, c ++ | jal ->vm_floor // floor(b/c) ++ | fmul.d a, FRET1, c ++ | fsub.d a, b, a // b - floor(b/c)*c ++ |.endmacro ++ | ++ |.macro ins_arithpre ++ ||vk = ((int)op - BC_ADDVN) / (BC_ADDNV-BC_ADDVN); ++ | // RA = dst*8, RB = src1*8, RC = src2*8 | num_const*8 ++ ||if (vk == 1) { ++ | // RA = dst*8, RB = num_const*8, RC = src1*8 ++ | decode_RB8 RC, INS ++ | decode_RDtoRC8 RB, RD ++ ||} else { ++ | // RA = dst*8, RB = src1*8, RC = num_const*8 ++ | decode_RB8 RB, INS ++ | decode_RDtoRC8 RC, RD ++ ||} ++ ||switch (vk) { ++ ||case 0: // suffix is VN ++ | add RB, BASE, RB ++ | add RC, KBASE, RC ++ || break; ++ ||case 1: // suffix is NV ++ | add RC, BASE, RC ++ | add RB, KBASE, RB ++ || break; ++ ||default: // CAT or suffix is VV ++ | add RB, BASE, RB ++ | add RC, BASE, RC ++ || break; ++ ||} ++ |.endmacro ++ | ++ |.macro ins_arithfp, fpins, itype1, itype2 ++ | fld FTMP0, 0(RB) ++ | sltu itype1, itype1, TISNUM ++ | sltu itype2, itype2, TISNUM ++ | fld FTMP2, 0(RC) ++ | and itype1, itype1, itype2 ++ | add RA, BASE, RA ++ | bxeqz itype1, ->vmeta_arith ++ | fpins FRET1, FTMP0, FTMP2 ++ | ins_next1 ++ | fsd FRET1, 0(RA) ++ | ins_next2 ++ |.endmacro ++ | ++ |.macro ins_arithead, itype1, itype2, tval1, tval2 ++ | ld tval1, 0(RB) ++ | ld tval2, 0(RC) ++ | // Check for two integers. ++ | gettp itype1, tval1 ++ | gettp itype2, tval2 ++ |.endmacro ++ | ++ |.macro ins_arithdn, intins, fpins ++ | ins_arithpre ++ | ins_arithead TMP0, TMP1, CARG1, CARG2 ++ | bne TMP0, TISNUM, >1 ++ | bne TMP1, TISNUM, >1 ++ | sext.w CARG3, CARG1 ++ | sext.w CARG4, CARG2 ++ |.if "intins" == "addw" ++ | intins CRET1, CARG3, CARG4 ++ | xor TMP1, CRET1, CARG3 // ((y^a) & (y^b)) < 0: overflow. ++ | xor TMP2, CRET1, CARG4 ++ | and TMP1, TMP1, TMP2 ++ | add RA, BASE, RA ++ | bxltz TMP1, ->vmeta_arith ++ |.elif "intins" == "subw" ++ | intins CRET1, CARG3, CARG4 ++ | xor TMP1, CRET1, CARG3 // ((y^a) & (a^b)) < 0: overflow. ++ | xor TMP2, CARG3, CARG4 ++ | and TMP1, TMP1, TMP2 ++ | add RA, BASE, RA ++ | bxltz TMP1, ->vmeta_arith ++ |.elif "intins" == "mulw" ++ | mul TMP2, CARG3, CARG4 ++ | add RA, BASE, RA ++ | sext.w CRET1, TMP2 ++ | bxne CRET1, TMP2, ->vmeta_arith // 63-32bit not all 0 or 1: overflow. ++ |.endif ++ | zext.w CRET1, CRET1 ++ | settp_b CRET1, TISNUM ++ | sd CRET1, 0(RA) ++ | ins_next ++ |1: // Check for two numbers. ++ | ins_arithfp, fpins, TMP0, TMP1 ++ |.endmacro ++ | ++ |.macro ins_arithdiv, fpins ++ | ins_arithpre ++ | ins_arithead TMP0, TMP1, CARG1, CARG2 ++ | ins_arithfp, fpins, TMP0, TMP1 ++ |.endmacro ++ | ++ |.macro ins_arithmod, fpins, BC ++ | ins_arithpre ++ | ins_arithead TMP0, TMP1, CARG1, CARG2 ++ | bne TMP0, TISNUM, >1 ++ | bne TMP1, TISNUM, >1 ++ | sext.w CARG1, CARG1 ++ | sext.w CARG2, CARG2 ++ | add RA, BASE, RA ++ | bxeqz CARG2, ->vmeta_arith ++ | call_intern BC, lj_vm_modi ++ | zext.w CRET1, CRET1 ++ | settp_b CRET1, TISNUM ++ | sd CRET1, 0(RA) ++ | ins_next ++ |1: // Check for two numbers. ++ | ins_arithfp, fpins, TMP0, TMP1 ++ |.endmacro ++ ++ case BC_ADDVN: case BC_ADDNV: case BC_ADDVV: ++ | ins_arithdn addw, fadd.d ++ break; ++ case BC_SUBVN: case BC_SUBNV: case BC_SUBVV: ++ | ins_arithdn subw, fsub.d ++ break; ++ case BC_MULVN: case BC_MULNV: case BC_MULVV: ++ | ins_arithdn mulw, fmul.d ++ break; ++ case BC_DIVVN: case BC_DIVNV: case BC_DIVVV: ++ | ins_arithdiv fdiv.d ++ break; ++ case BC_MODVN: ++ | ins_arithmod fpmod, BC_MODVN ++ break; ++ case BC_MODNV: ++ | ins_arithmod fpmod, BC_MODNV ++ break; ++ case BC_MODVV: ++ | ins_arithmod fpmod, BC_MODVV ++ break; ++ case BC_POW: ++ | ins_arithpre ++ | ld CARG1, 0(RB) ++ | ld CARG2, 0(RC) ++ | gettp TMP0, CARG1 ++ | gettp TMP1, CARG2 ++ | sltiu TMP0, TMP0, LJ_TISNUM ++ | sltiu TMP1, TMP1, LJ_TISNUM ++ | and TMP0, TMP0, TMP1 ++ | add RA, BASE, RA ++ | bxeqz TMP0, ->vmeta_arith ++ | fld FARG1, 0(RB) ++ | fld FARG2, 0(RC) ++ | call_extern BC_POW, pow ++ | ins_next1 ++ | fsd FRET1, 0(RA) ++ | ins_next2 ++ break; ++ ++ case BC_CAT: ++ | // RA = dst*8, RB = src_start*8, RC = src_end*8 ++ | decode_RB8 RB, INS ++ | decode_RDtoRC8 RC, RD ++ | sub CARG3, RC, RB ++ | sd BASE, L->base ++ | add CARG2, BASE, RC ++ | mv MULTRES, RB ++ |->BC_CAT_Z: ++ | srliw CARG3, CARG3, 3 ++ | sd PC, SAVE_PC(sp) ++ | mv CARG1, L ++ | call_intern BC_CAT, lj_meta_cat // (lua_State *L, TValue *top, int left) ++ | // Returns NULL (finished) or TValue * (metamethod). ++ | ld BASE, L->base ++ | bxnez CRET1, ->vmeta_binop ++ | add RB, BASE, MULTRES ++ | ld TMP0, 0(RB) ++ | add RA, BASE, RA ++ | sd TMP0, 0(RA) ++ | ins_next ++ break; ++ ++ /* -- Constant ops ------------------------------------------------------ */ ++ ++ case BC_KSTR: ++ | // RA = dst*8, RD = str_const*8 (~) ++ | sub TMP1, KBASE, RD ++ | li TMP2, LJ_TSTR ++ | ld TMP0, -8(TMP1) // KBASE-8-str_const*8 ++ | add RA, BASE, RA ++ | settp TMP0, TMP2 ++ | sd TMP0, 0(RA) ++ | ins_next ++ break; ++ case BC_KCDATA: ++ break; ++ case BC_KSHORT: ++ | // RA = dst*8, RD = int16_literal*8 ++ | sraiw RD, INS, 16 ++ | add RA, BASE, RA ++ | zext.w RD, RD ++ | ins_next1 ++ | settp_b RD, TISNUM ++ | sd RD, 0(RA) ++ | ins_next2 ++ break; ++ case BC_KNUM: ++ | // RA = dst*8, RD = num_const*8 ++ | add RD, KBASE, RD ++ | add RA, BASE, RA ++ | ld TMP0, 0(RD) ++ | ins_next1 ++ | sd TMP0, 0(RA) ++ | ins_next2 ++ break; ++ case BC_KPRI: ++ | // RA = dst*8, RD = primitive_type*8 (~) ++ | add RA, BASE, RA ++ | slli TMP0, RD, 44 // 44+3 ++ | not TMP0, TMP0 ++ | ins_next1 ++ | sd TMP0, 0(RA) ++ | ins_next2 ++ break; ++ case BC_KNIL: ++ | // RA = base*8, RD = end*8 ++ | add RA, BASE, RA ++ | sd TISNIL, 0(RA) ++ | addi RA, RA, 8 ++ | add RD, BASE, RD ++ |1: ++ | sd TISNIL, 0(RA) ++ | slt TMP0, RA, RD ++ | addi RA, RA, 8 ++ | bnez TMP0, <1 ++ | ins_next ++ break; ++ ++ /* -- Upvalue and function ops ------------------------------------------ */ ++ ++ case BC_UGET: ++ | // RA = dst*8, RD = uvnum*8 ++ | ld LFUNC:TMP0, FRAME_FUNC(BASE) ++ | add RA, BASE, RA ++ | cleartp LFUNC:TMP0 ++ | add RD, RD, LFUNC:TMP0 ++ | ld UPVAL:TMP0, LFUNC:RD->uvptr ++ | ld TMP1, UPVAL:TMP0->v ++ | ld TMP2, 0(TMP1) ++ | ins_next1 ++ | sd TMP2, 0(RA) ++ | ins_next2 ++ break; ++ case BC_USETV: ++ | // RA = uvnum*8, RD = src*8 ++ | ld LFUNC:TMP0, FRAME_FUNC(BASE) ++ | add RD, BASE, RD ++ | cleartp LFUNC:TMP0 ++ | add RA, RA, LFUNC:TMP0 ++ | ld UPVAL:TMP0, LFUNC:RA->uvptr ++ | ld CRET1, 0(RD) ++ | lbu TMP3, UPVAL:TMP0->marked ++ | ld CARG2, UPVAL:TMP0->v ++ | andi TMP3, TMP3, LJ_GC_BLACK // isblack(uv) ++ | lbu TMP0, UPVAL:TMP0->closed ++ | gettp TMP2, CRET1 ++ | sd CRET1, 0(CARG2) ++ | or TMP3, TMP3, TMP0 ++ | li TMP0, LJ_GC_BLACK|1 ++ | addi TMP2, TMP2, -(LJ_TNUMX+1) ++ | beq TMP3, TMP0, >2 // Upvalue is closed and black? ++ |1: ++ | ins_next ++ | ++ |2: // Check if new value is collectable. ++ | sltiu TMP0, TMP2, LJ_TISGCV - (LJ_TNUMX+1) ++ | cleartp GCOBJ:CRET1, CRET1 ++ | beqz TMP0, <1 // tvisgcv(v) ++ | lbu TMP3, GCOBJ:CRET1->gch.marked ++ | andi TMP3, TMP3, LJ_GC_WHITES // iswhite(v) ++ | beqz TMP3, <1 ++ | // Crossed a write barrier. Move the barrier forward. ++ | mv CARG1, GL ++ | call_intern BC_USETV, lj_gc_barrieruv // (global_State *g, TValue *tv) ++ | j <1 ++ break; ++ case BC_USETS: ++ | // RA = uvnum*8, RD = str_const*8 (~) ++ | ld LFUNC:TMP0, FRAME_FUNC(BASE) ++ | sub TMP1, KBASE, RD ++ | cleartp LFUNC:TMP0 ++ | add RA, RA, LFUNC:TMP0 ++ | ld UPVAL:TMP0, LFUNC:RA->uvptr ++ | ld STR:TMP1, -8(TMP1) // KBASE-8-str_const*8 ++ | lbu TMP2, UPVAL:TMP0->marked ++ | ld CARG2, UPVAL:TMP0->v ++ | lbu TMP3, STR:TMP1->marked ++ | andi TMP4, TMP2, LJ_GC_BLACK // isblack(uv) ++ | lbu TMP2, UPVAL:TMP0->closed ++ | li TMP0, LJ_TSTR ++ | settp TMP1, TMP0 ++ | sd TMP1, 0(CARG2) ++ | bnez TMP4, >2 ++ |1: ++ | ins_next ++ | ++ |2: // Check if string is white and ensure upvalue is closed. ++ | beqz TMP2, <1 ++ | andi TMP0, TMP3, LJ_GC_WHITES // iswhite(str) ++ | beqz TMP0, <1 ++ | // Crossed a write barrier. Move the barrier forward. ++ | mv CARG1, GL ++ | call_intern BC_USETS, lj_gc_barrieruv // (global_State *g, TValue *tv) ++ | j <1 ++ break; ++ case BC_USETN: ++ | // RA = uvnum*8, RD = num_const*8 ++ | ld LFUNC:TMP0, FRAME_FUNC(BASE) ++ | add RD, KBASE, RD ++ | cleartp LFUNC:TMP0 ++ | add TMP0, RA, LFUNC:TMP0 ++ | ld UPVAL:TMP0, LFUNC:TMP0->uvptr ++ | ld TMP1, 0(RD) ++ | ld TMP0, UPVAL:TMP0->v ++ | sd TMP1, 0(TMP0) ++ | ins_next ++ break; ++ case BC_USETP: ++ | // RA = uvnum*8, RD = primitive_type*8 (~) ++ | ld LFUNC:TMP0, FRAME_FUNC(BASE) ++ | slli TMP2, RD, 44 ++ | cleartp LFUNC:TMP0 ++ | add TMP0, RA, LFUNC:TMP0 ++ | not TMP2, TMP2 ++ | ld UPVAL:TMP0, LFUNC:TMP0->uvptr ++ | ld TMP1, UPVAL:TMP0->v ++ | sd TMP2, 0(TMP1) ++ | ins_next ++ break; ++ ++ case BC_UCLO: ++ | // RA = level*8, RD = target ++ | ld TMP2, L->openupval ++ | branch_RD // Do this first since RD is not saved. ++ | sd BASE, L->base ++ | mv CARG1, L ++ | beqz TMP2, >1 ++ | add CARG2, BASE, RA ++ | call_intern BC_UCLO, lj_func_closeuv // (lua_State *L, TValue *level) ++ | ld BASE, L->base ++ |1: ++ | ins_next ++ break; ++ ++ case BC_FNEW: ++ | // RA = dst*8, RD = proto_const*8 (~) (holding function prototype) ++ | sub TMP1, KBASE, RD ++ | ld CARG3, FRAME_FUNC(BASE) ++ | ld CARG2, -8(TMP1) // KBASE-8-tab_const*8 ++ | sd BASE, L->base ++ | sd PC, SAVE_PC(sp) ++ | cleartp CARG3 ++ | mv CARG1, L ++ | // (lua_State *L, GCproto *pt, GCfuncL *parent) ++ | call_intern BC_FNEW, lj_func_newL_gc ++ | // Returns GCfuncL *. ++ | li TMP0, LJ_TFUNC ++ | ld BASE, L->base ++ | settp CRET1, TMP0 ++ | add RA, BASE, RA ++ | sd CRET1, 0(RA) ++ | ins_next ++ break; ++ ++ /* -- Table ops --------------------------------------------------------- */ ++ ++ case BC_TNEW: ++ case BC_TDUP: ++ | // RA = dst*8, RD = (hbits|asize)*8 | tab_const*8 (~) ++ | ld TMP0, GL->gc.total ++ | ld TMP1, GL->gc.threshold ++ | sd BASE, L->base ++ | sd PC, SAVE_PC(sp) ++ | bgeu TMP0, TMP1, >5 ++ |1: ++ if (op == BC_TNEW) { ++ | srliw CARG2, RD, 3 ++ | andi CARG2, CARG2, 0x7ff ++ | lzi TMP0, 0x801 ++ | addiw TMP2, CARG2, -0x7ff ++ | srliw CARG3, RD, 14 ++ | seqz TMP3, TMP2 ++ | neg TMP4, TMP3 ++ | xor CARG1, TMP0, CARG2 // CARG2 = TMP3 ? TMP0 : CARG2 ++ | and CARG1, CARG1, TMP4 ++ | xor CARG2, CARG2, CARG1 ++ | mv CARG1, L ++ | // (lua_State *L, int32_t asize, uint32_t hbits) ++ | call_intern BC_TNEW, lj_tab_new ++ | // Returns Table *. ++ } else { ++ | sub TMP1, KBASE, RD ++ | mv CARG1, L ++ | ld CARG2, -8(TMP1) // KBASE-8-str_const*8 ++ | call_intern BC_TDUP, lj_tab_dup // (lua_State *L, Table *kt) ++ | // Returns Table *. ++ } ++ | li TMP0, LJ_TTAB ++ | ld BASE, L->base ++ | ins_next1 ++ | settp CRET1, TMP0 ++ | add RA, BASE, RA ++ | sd CRET1, 0(RA) ++ | ins_next2 ++ |5: ++ | mv MULTRES, RD ++ | mv CARG1, L ++ if (op == BC_TNEW) { ++ | call_intern BC_TNEW, lj_gc_step_fixtop // (lua_State *L) ++ } else { ++ | call_intern BC_TDUP, lj_gc_step_fixtop // (lua_State *L) ++ } ++ | mv RD, MULTRES ++ | j <1 ++ break; ++ ++ case BC_GGET: ++ | // RA = dst*8, RD = str_const*8 (~) ++ case BC_GSET: ++ | // RA = src*8, RD = str_const*8 (~) ++ | ld LFUNC:TMP0, FRAME_FUNC(BASE) ++ | sub TMP1, KBASE, RD ++ | ld STR:RC, -8(TMP1) // KBASE-8-str_const*8 ++ | cleartp LFUNC:TMP0 ++ | ld TAB:RB, LFUNC:TMP0->env ++ | add RA, BASE, RA ++ if (op == BC_GGET) { ++ | j ->BC_TGETS_Z ++ } else { ++ | j ->BC_TSETS_Z ++ } ++ break; ++ ++ case BC_TGETV: ++ | // RA = dst*8, RB = table*8, RC = key*8 ++ | decode_RB8 RB, INS ++ | decode_RDtoRC8 RC, RD ++ | add CARG2, BASE, RB ++ | add CARG3, BASE, RC ++ | ld TAB:RB, 0(CARG2) ++ | ld TMP2, 0(CARG3) ++ | add RA, BASE, RA ++ | checktab TAB:RB, ->vmeta_tgetv ++ | gettp TMP3, TMP2 ++ | lw TMP0, TAB:RB->asize ++ | bne TMP3, TISNUM, >5 // Integer key? ++ | sext.w TMP2, TMP2 ++ | ld TMP1, TAB:RB->array ++ | bxgeu TMP2, TMP0, ->vmeta_tgetv // Integer key and in array part? ++ | slliw TMP2, TMP2, 3 ++ | add TMP2, TMP1, TMP2 ++ | ld CRET1, 0(TMP2) ++ | beq CRET1, TISNIL, >2 ++ |1: ++ | sd CRET1, 0(RA) ++ | ins_next ++ | ++ |2: // Check for __index if table value is nil. ++ | ld TAB:TMP2, TAB:RB->metatable ++ | beqz TAB:TMP2, <1 // No metatable: done. ++ | lbu TMP0, TAB:TMP2->nomm ++ | andi TMP0, TMP0, 1<vmeta_tgetv ++ | ++ |5: ++ | li TMP0, LJ_TSTR ++ | cleartp RC, TMP2 ++ | bxne TMP3, TMP0, ->vmeta_tgetv // String key? ++ | j ->BC_TGETS_Z ++ break; ++ case BC_TGETS: ++ | // RA = dst*8, RB = table*8, RC = str_const*8 (~) ++ | decode_RB8 RB, INS ++ | decode_RDtoRC8 RC, RD ++ | add CARG2, BASE, RB ++ | sub CARG3, KBASE, RC ++ | ld TAB:RB, 0(CARG2) ++ | add RA, BASE, RA ++ | ld STR:RC, -8(CARG3) // KBASE-8-str_const*8 ++ | checktab TAB:RB, ->vmeta_tgets1 ++ |->BC_TGETS_Z: ++ | // TAB:RB = GCtab *, STR:RC = GCstr *, RA = dst*8 ++ | lw TMP0, TAB:RB->hmask ++ | lw TMP1, STR:RC->sid ++ | ld NODE:TMP2, TAB:RB->node ++ | and TMP1, TMP1, TMP0 // idx = str->sid & tab->hmask ++ | slliw TMP0, TMP1, 5 ++ | slliw TMP1, TMP1, 3 ++ | subw TMP1, TMP0, TMP1 ++ | li TMP3, LJ_TSTR ++ | add NODE:TMP2, NODE:TMP2, TMP1 // node = tab->node + (idx*32-idx*8) ++ | settp STR:RC, TMP3 // Tagged key to look for. ++ |1: ++ | ld CARG1, NODE:TMP2->key ++ | ld CARG2, NODE:TMP2->val ++ | ld NODE:TMP1, NODE:TMP2->next ++ | ld TAB:TMP3, TAB:RB->metatable ++ | bne CARG1, RC, >4 ++ | beq CARG2, TISNIL, >5 // Key found, but nil value? ++ |3: ++ | sd CARG2, 0(RA) ++ | ins_next ++ | ++ |4: // Follow hash chain. ++ | mv NODE:TMP2, NODE:TMP1 ++ | bnez NODE:TMP1, <1 ++ | // End of hash chain: key not found, nil result. ++ | ++ |5: // Check for __index if table value is nil. ++ | mv CARG2, TISNIL ++ | beqz TAB:TMP3, <3 // No metatable: done. ++ | lbu TMP0, TAB:TMP3->nomm ++ | andi TMP0, TMP0, 1<vmeta_tgets ++ break; ++ case BC_TGETB: ++ | // RA = dst*8, RB = table*8, RC = index*8 ++ | decode_RB8 RB, INS ++ | add CARG2, BASE, RB ++ | decode_RDtoRC8 RC, RD ++ | ld TAB:RB, 0(CARG2) ++ | add RA, BASE, RA ++ | srliw TMP0, RC, 3 ++ | checktab TAB:RB, ->vmeta_tgetb ++ | lw TMP1, TAB:RB->asize ++ | ld TMP2, TAB:RB->array ++ | bxgeu TMP0, TMP1, ->vmeta_tgetb ++ | add RC, TMP2, RC ++ | ld CRET1, 0(RC) ++ | beq CRET1, TISNIL, >5 ++ |1: ++ | sd CRET1, 0(RA) ++ | ins_next ++ | ++ |5: // Check for __index if table value is nil. ++ | ld TAB:TMP2, TAB:RB->metatable ++ | beqz TAB:TMP2, <1 // No metatable: done. ++ | lbu TMP1, TAB:TMP2->nomm ++ | andi TMP1, TMP1, 1<vmeta_tgetb // Caveat: preserve TMP0 and CARG2! ++ break; ++ case BC_TGETR: ++ | // RA = dst*8, RB = table*8, RC = key*8 ++ | decode_RB8 RB, INS ++ | decode_RDtoRC8 RC, RD ++ | add RB, BASE, RB ++ | add RC, BASE, RC ++ | ld TAB:CARG1, 0(RB) ++ | lw CARG2, 0(RC) ++ | add RA, BASE, RA ++ | cleartp TAB:CARG1 ++ | lw TMP0, TAB:CARG1->asize ++ | ld TMP1, TAB:CARG1->array ++ | bxgeu CARG2, TMP0, ->vmeta_tgetr // In array part? ++ | slliw TMP2, CARG2, 3 ++ | add TMP3, TMP1, TMP2 ++ | ld TMP1, 0(TMP3) ++ |->BC_TGETR_Z: ++ | ins_next1 ++ | sd TMP1, 0(RA) ++ | ins_next2 ++ break; ++ ++ case BC_TSETV: ++ | // RA = src*8, RB = table*8, RC = key*8 ++ | decode_RB8 RB, INS ++ | decode_RDtoRC8 RC, RD ++ | add CARG2, BASE, RB ++ | add CARG3, BASE, RC ++ | ld TAB:RB, 0(CARG2) ++ | ld TMP2, 0(CARG3) ++ | add RA, BASE, RA ++ | checktab TAB:RB, ->vmeta_tsetv ++ | sext.w RC, TMP2 ++ | checkint TMP2, >5 ++ | lw TMP0, TAB:RB->asize ++ | ld TMP1, TAB:RB->array ++ | bxgeu RC, TMP0, ->vmeta_tsetv // Integer key and in array part? ++ | slliw TMP2, RC, 3 ++ | add TMP1, TMP1, TMP2 ++ | lbu TMP3, TAB:RB->marked ++ | ld TMP0, 0(TMP1) ++ | ld CRET1, 0(RA) ++ | beq TMP0, TISNIL, >3 ++ |1: ++ | andi TMP2, TMP3, LJ_GC_BLACK // isblack(table) ++ | sd CRET1, 0(TMP1) ++ | bnez TMP2, >7 ++ |2: ++ | ins_next ++ | ++ |3: // Check for __newindex if previous value is nil. ++ | ld TAB:TMP2, TAB:RB->metatable ++ | beqz TAB:TMP2, <1 // No metatable: done. ++ | lbu TMP2, TAB:TMP2->nomm ++ | andi TMP2, TMP2, 1<vmeta_tsetv ++ |5: ++ | gettp TMP0, TMP2 ++ | addi TMP0, TMP0, -LJ_TSTR ++ | bxnez TMP0, ->vmeta_tsetv ++ | cleartp STR:RC, TMP2 ++ | j ->BC_TSETS_Z // String key? ++ | ++ |7: // Possible table write barrier for the value. Skip valiswhite check. ++ | barrierback TAB:RB, TMP3, TMP0, <2 ++ break; ++ case BC_TSETS: ++ | // RA = src*8, RB = table*8, RC = str_const*8 (~) ++ | decode_RB8 RB, INS ++ | decode_RDtoRC8 RC, RD ++ | add CARG2, BASE, RB ++ | sub CARG3, KBASE, RC ++ | ld TAB:RB, 0(CARG2) ++ | ld RC, -8(CARG3) // KBASE-8-str_const*8 ++ | add RA, BASE, RA ++ | cleartp STR:RC ++ | checktab TAB:RB, ->vmeta_tsets1 ++ |->BC_TSETS_Z: ++ | // TAB:RB = GCtab *, STR:RC = GCstr *, RA = BASE+src*8 ++ | lw TMP0, TAB:RB->hmask ++ | lw TMP1, STR:RC->sid ++ | ld NODE:TMP2, TAB:RB->node ++ | sb x0, TAB:RB->nomm // Clear metamethod cache. ++ | and TMP1, TMP1, TMP0 // idx = str->sid & tab->hmask ++ | slliw TMP0, TMP1, 5 ++ | slliw TMP1, TMP1, 3 ++ | subw TMP1, TMP0, TMP1 ++ | li TMP3, LJ_TSTR ++ | add NODE:TMP2, NODE:TMP2, TMP1 // node = tab->node + (idx*32-idx*8) ++ | settp STR:RC, TMP3 // Tagged key to look for. ++ | fld FTMP0, 0(RA) ++ |1: ++ | ld TMP0, NODE:TMP2->key ++ | ld CARG2, NODE:TMP2->val ++ | ld NODE:TMP1, NODE:TMP2->next ++ | lbu TMP3, TAB:RB->marked ++ | bne TMP0, RC, >5 ++ | ld TAB:TMP0, TAB:RB->metatable ++ | beq CARG2, TISNIL, >4 // Key found, but nil value? ++ |2: ++ | andi TMP3, TMP3, LJ_GC_BLACK // isblack(table) ++ | fsd FTMP0, NODE:TMP2->val ++ | bnez TMP3, >7 ++ |3: ++ | ins_next ++ | ++ |4: // Check for __newindex if previous value is nil. ++ | beqz TAB:TMP0, <2 // No metatable: done. ++ | lbu TMP0, TAB:TMP0->nomm ++ | andi TMP0, TMP0, 1<vmeta_tsets ++ | ++ |5: // Follow hash chain. ++ | mv NODE:TMP2, NODE:TMP1 ++ | bnez NODE:TMP1, <1 ++ | // End of hash chain: key not found, add a new one ++ | ++ | // But check for __newindex first. ++ | ld TAB:TMP2, TAB:RB->metatable ++ | addi CARG3, GL, offsetof(global_State, tmptv) ++ | beqz TAB:TMP2, >6 // No metatable: continue. ++ | lbu TMP0, TAB:TMP2->nomm ++ | andi TMP0, TMP0, 1<vmeta_tsets // 'no __newindex' flag NOT set: check. ++ |6: ++ | sd RC, 0(CARG3) ++ | sd BASE, L->base ++ | mv CARG2, TAB:RB ++ | sd PC, SAVE_PC(sp) ++ | mv CARG1, L ++ | // (lua_State *L, GCtab *t, TValue *k) ++ | call_intern BC_TSETS, lj_tab_newkey ++ | // Returns TValue *. ++ | ld BASE, L->base ++ | fsd FTMP0, 0(CRET1) ++ | j <3 // No 2nd write barrier needed. ++ | ++ |7: // Possible table write barrier for the value. Skip valiswhite check. ++ | barrierback TAB:RB, TMP3, TMP0, <3 ++ break; ++ case BC_TSETB: ++ | // RA = src*8, RB = table*8, RC = index*8 ++ | decode_RB8 RB, INS ++ | decode_RDtoRC8 RC, RD ++ | add CARG2, BASE, RB ++ | add RA, BASE, RA ++ | ld TAB:RB, 0(CARG2) ++ | srliw TMP0, RC, 3 ++ | checktab RB, ->vmeta_tsetb ++ | lw TMP1, TAB:RB->asize ++ | ld TMP2, TAB:RB->array ++ | bxgeu TMP0, TMP1, ->vmeta_tsetb ++ | add RC, TMP2, RC ++ | ld TMP1, 0(RC) ++ | lbu TMP3, TAB:RB->marked ++ | beq TMP1, TISNIL, >5 ++ |1: ++ | ld CRET1, 0(RA) ++ | andi TMP1, TMP3, LJ_GC_BLACK // isblack(table) ++ | sd CRET1, 0(RC) ++ | bnez TMP1, >7 ++ |2: ++ | ins_next ++ | ++ |5: // Check for __newindex if previous value is nil. ++ | ld TAB:TMP2, TAB:RB->metatable ++ | beqz TAB:TMP2, <1 // No metatable: done. ++ | lbu TMP1, TAB:TMP2->nomm ++ | andi TMP1, TMP1, 1<vmeta_tsetb // Caveat: preserve TMP0 and CARG2! ++ | ++ |7: // Possible table write barrier for the value. Skip valiswhite check. ++ | barrierback TAB:RB, TMP3, TMP0, <2 ++ break; ++ case BC_TSETR: ++ | // RA = dst*8, RB = table*8, RC = key*8 ++ | decode_RB8 RB, INS ++ | decode_RDtoRC8 RC, RD ++ | add CARG1, BASE, RB ++ | add CARG3, BASE, RC ++ | ld TAB:CARG2, 0(CARG1) ++ | lw CARG3, 0(CARG3) ++ | cleartp TAB:CARG2 ++ | lbu TMP3, TAB:CARG2->marked ++ | lw TMP0, TAB:CARG2->asize ++ | ld TMP1, TAB:CARG2->array ++ | andi TMP2, TMP3, LJ_GC_BLACK // isblack(table) ++ | add RA, BASE, RA ++ | bnez TMP2, >7 ++ |2: ++ | bxgeu CARG3, TMP0, ->vmeta_tsetr // In array part? ++ | slliw TMP2, CARG3, 3 ++ | add CRET1, TMP1, TMP2 ++ |->BC_TSETR_Z: ++ | ld TMP1, 0(RA) ++ | ins_next1 ++ | sd TMP1, 0(CRET1) ++ | ins_next2 ++ | ++ |7: // Possible table write barrier for the value. Skip valiswhite check. ++ | barrierback TAB:CARG2, TMP3, CRET1, <2 ++ break; ++ ++ case BC_TSETM: ++ | // RA = base*8 (table at base-1), RD = num_const*8 (start index) ++ | add RA, BASE, RA ++ |1: ++ | add TMP3, KBASE, RD ++ | ld TAB:CARG2, -8(RA) // Guaranteed to be a table. ++ | addiw TMP0, MULTRES, -8 ++ | lw TMP3, 0(TMP3) // Integer constant is in lo-word. ++ | srliw CARG3, TMP0, 3 ++ | beqz TMP0, >4 // Nothing to copy? ++ | cleartp TAB:CARG2 ++ | addw CARG3, CARG3, TMP3 ++ | lw TMP2, TAB:CARG2->asize ++ | slliw TMP1, TMP3, 3 ++ | lbu TMP3, TAB:CARG2->marked ++ | ld CARG1, TAB:CARG2->array ++ | bltu TMP2, CARG3, >5 ++ | add TMP2, RA, TMP0 ++ | add TMP1, TMP1, CARG1 ++ | andi TMP0, TMP3, LJ_GC_BLACK // isblack(table) ++ |3: // Copy result slots to table. ++ | ld CRET1, 0(RA) ++ | addi RA, RA, 8 ++ | sd CRET1, 0(TMP1) ++ | addi TMP1, TMP1, 8 ++ | bltu RA, TMP2, <3 ++ | bnez TMP0, >7 ++ |4: ++ | ins_next ++ | ++ |5: // Need to resize array part. ++ | sd BASE, L->base ++ | sd PC, SAVE_PC(sp) ++ | mv BASE, RD ++ | mv CARG1, L ++ | // (lua_State *L, GCtab *t, int nasize) ++ | call_intern BC_TSETM, lj_tab_reasize ++ | // Must not reallocate the stack. ++ | mv RD, BASE ++ | ld BASE, L->base // Reload BASE for lack of a saved register. ++ | j <1 ++ | ++ |7: // Possible table write barrier for any value. Skip valiswhite check. ++ | barrierback TAB:CARG2, TMP3, TMP0, <4 ++ break; ++ ++ /* -- Calls and vararg handling ----------------------------------------- */ ++ ++ case BC_CALLM: ++ | // RA = base*8, (RB = (nresults+1)*8,) RC = extra_nargs*8 ++ | decode_RDtoRC8 NARGS8:RC, RD ++ | addw NARGS8:RC, NARGS8:RC, MULTRES ++ | j ->BC_CALL_Z ++ break; ++ case BC_CALL: ++ | // RA = base*8, (RB = (nresults+1)*8,) RC = (nargs+1)*8 ++ | decode_RDtoRC8 NARGS8:RC, RD ++ |->BC_CALL_Z: ++ | mv TMP2, BASE ++ | add BASE, BASE, RA ++ | ld LFUNC:RB, 0(BASE) ++ | addi BASE, BASE, 16 ++ | addiw NARGS8:RC, NARGS8:RC, -8 ++ | checkfunc RB, ->vmeta_call ++ | ins_call ++ break; ++ ++ case BC_CALLMT: ++ | // RA = base*8, (RB = 0,) RC = extra_nargs*8 ++ | addw NARGS8:RD, NARGS8:RD, MULTRES ++ | j ->BC_CALLT_Z1 ++ break; ++ case BC_CALLT: ++ | // RA = base*8, (RB = 0,) RC = (nargs+1)*8 ++ |->BC_CALLT_Z1: ++ | add RA, BASE, RA ++ | ld LFUNC:RB, 0(RA) ++ | mv NARGS8:RC, RD ++ | ld TMP1, FRAME_PC(BASE) ++ | addi RA, RA, 16 ++ | addiw NARGS8:RC, NARGS8:RC, -8 ++ | checktp CARG3, LFUNC:RB, -LJ_TFUNC, ->vmeta_callt ++ |->BC_CALLT_Z: ++ | andi TMP0, TMP1, FRAME_TYPE // Caveat: preserve TMP0 until the 'or'. ++ | lbu TMP3, LFUNC:CARG3->ffid ++ | xori TMP2, TMP1, FRAME_VARG ++ | bnez TMP0, >7 ++ |1: ++ | sd LFUNC:RB, FRAME_FUNC(BASE) // Copy function down, but keep PC. ++ | sltiu CARG4, TMP3, 2 // (> FF_C) Calling a fast function? ++ | mv TMP2, BASE ++ | mv RB, CARG3 ++ | mv TMP3, NARGS8:RC ++ | beqz NARGS8:RC, >3 ++ |2: ++ | ld CRET1, 0(RA) ++ | addi RA, RA, 8 ++ | addiw TMP3, TMP3, -8 ++ | sd CRET1, 0(TMP2) ++ | addi TMP2, TMP2, 8 ++ | bnez TMP3, <2 ++ |3: ++ | or TMP0, TMP0, CARG4 ++ | beqz TMP0, >5 ++ |4: ++ | ins_callt ++ | ++ |5: // Tailcall to a fast function with a Lua frame below. ++ | lw INS, -4(TMP1) ++ | decode_RA8 RA, INS ++ | sub TMP1, BASE, RA ++ | ld TMP1, -32(TMP1) ++ | cleartp LFUNC:TMP1 ++ | ld TMP1, LFUNC:TMP1->pc ++ | ld KBASE, PC2PROTO(k)(TMP1) // Need to prepare KBASE. ++ | j <4 ++ | ++ |7: // Tailcall from a vararg function. ++ | andi CARG4, TMP2, FRAME_TYPEP ++ | sub TMP2, BASE, TMP2 // Relocate BASE down. ++ | bnez CARG4, <1 // Vararg frame below? ++ | mv BASE, TMP2 ++ | ld TMP1, FRAME_PC(TMP2) ++ | andi TMP0, TMP1, FRAME_TYPE ++ | j <1 ++ break; ++ ++ case BC_ITERC: ++ | // RA = base*8, (RB = (nresults+1)*8, RC = (nargs+1)*8 ((2+1)*8)) ++ | mv TMP2, BASE // Save old BASE for vmeta_call. ++ | add BASE, BASE, RA ++ | ld RB, -24(BASE) //A, A+1, A+2 = A-3, A-2, A-1. ++ | ld CARG1, -16(BASE) ++ | ld CARG2, -8(BASE) ++ | li NARGS8:RC, 16 // Iterators get 2 arguments. ++ | sd RB, 0(BASE) // Copy callable. ++ | sd CARG1, 16(BASE) // Copy state. ++ | sd CARG2, 24(BASE) // Copy control var. ++ | addi BASE, BASE, 16 ++ | checkfunc RB, ->vmeta_call ++ | ins_call ++ break; ++ ++ case BC_ITERN: ++ | // RA = base*8, (RB = (nresults+1)*8, RC = (nargs+1)*8 (2+1)*8) ++ |->vm_IITERN: ++ | add RA, BASE, RA ++ | ld TAB:RB, -16(RA) ++ | lw RC, -8(RA) // Get index from control var. ++ | cleartp TAB:RB ++ | addi PC, PC, 4 ++ | lw TMP0, TAB:RB->asize ++ | ld TMP1, TAB:RB->array ++ | slli CARG3, TISNUM, 47 ++ |1: // Traverse array part. ++ | bleu TMP0, RC, >5 // Index points after array part? ++ | slliw TMP3, RC, 3 ++ | add TMP3, TMP1, TMP3 ++ | ld CARG1, 0(TMP3) ++ | lhu RD, -4+OFS_RD(PC) // ITERL RD ++ | or TMP2, RC, CARG3 ++ | addiw RC, RC, 1 ++ | beq CARG1, TISNIL, <1 // Skip holes in array part. ++ | sd TMP2, 0(RA) ++ | sd CARG1, 8(RA) ++ | lui TMP3, (-(BCBIAS_J*4 >> 12)) & 0xfffff // -BCBIAS_J*4 ++ | decode_BC4b RD ++ | add RD, RD, TMP3 ++ | sw RC, -8(RA) // Update control var. ++ | add PC, PC, RD ++ |3: ++ | ins_next ++ | ++ |5: // Traverse hash part. ++ | lw TMP1, TAB:RB->hmask ++ | subw RC, RC, TMP0 ++ | ld TMP2, TAB:RB->node ++ |6: ++ | bltu TMP1, RC, <3 // End of iteration? Branch to ITERL+1. ++ | slliw TMP3, RC, 5 ++ | slliw RB, RC, 3 ++ | subw TMP3, TMP3, RB ++ | add NODE:TMP3, TMP3, TMP2 // node = tab->node + (idx*32-idx*8) ++ | ld CARG1, 0(NODE:TMP3) ++ | lhu RD, -4+OFS_RD(PC) // ITERL RD ++ | addiw RC, RC, 1 ++ | beq CARG1, TISNIL, <6 // Skip holes in hash part. ++ | ld CARG2, NODE:TMP3->key ++ | lui TMP3, (-(BCBIAS_J*4 >> 12)) & 0xfffff // -BCBIAS_J*4 ++ | sd CARG1, 8(RA) ++ | addw RC, RC, TMP0 ++ | decode_BC4b RD ++ | addw RD, RD, TMP3 ++ | sd CARG2, 0(RA) ++ | add PC, PC, RD ++ | sw RC, -8(RA) // Update control var. ++ | j <3 ++ break; ++ ++ case BC_ISNEXT: ++ | // RA = base*8, RD = target (points to ITERN) ++ | add RA, BASE, RA ++ | srliw TMP0, RD, 1 ++ | ld CFUNC:CARG1, -24(RA) ++ | add TMP0, PC, TMP0 ++ | ld CARG2, -16(RA) ++ | ld CARG3, -8(RA) ++ | lui TMP2, (-(BCBIAS_J*4 >> 12)) & 0xfffff // -BCBIAS_J*4 ++ | checkfunc CFUNC:CARG1, >5 ++ | gettp CARG2, CARG2 ++ | addi CARG2, CARG2, -LJ_TTAB ++ | lbu TMP1, CFUNC:CARG1->ffid ++ | addi CARG3, CARG3, -LJ_TNIL ++ | or TMP3, CARG2, CARG3 ++ | addi TMP1, TMP1, -FF_next_N ++ | or TMP3, TMP3, TMP1 ++ | lui TMP1, ((LJ_KEYINDEX - (((LJ_KEYINDEX & 0xfff)^0x800) - 0x800)) >> 12) & 0xfffff ++ | bnez TMP3, >5 ++ | add PC, TMP0, TMP2 ++ | addi TMP1, TMP1, (((LJ_KEYINDEX & 0xfff)^0x800) - 0x800) ++ | slli TMP1, TMP1, 32 ++ | sd TMP1, -8(RA) ++ |1: ++ | ins_next ++ |5: // Despecialize bytecode if any of the checks fail. ++ | li TMP3, BC_JMP ++ | li TMP1, BC_ITERC ++ | sb TMP3, -4+OFS_OP(PC) ++ | add PC, TMP0, TMP2 ++ | sb TMP1, OFS_OP(PC) ++ | j <1 ++ break; ++ ++ case BC_VARG: ++ | // RA = base*8, RB = (nresults+1)*8, RC = numparams*8 ++ | ld TMP0, FRAME_PC(BASE) ++ | decode_RDtoRC8 RC, RD ++ | decode_RB8 RB, INS ++ | add RC, BASE, RC ++ | add RA, BASE, RA ++ | addi RC, RC, FRAME_VARG ++ | add TMP2, RA, RB ++ | addi TMP3, BASE, -16 // TMP3 = vtop ++ | sub RC, RC, TMP0 // RC = vbase ++ | // Note: RC may now be even _above_ BASE if nargs was < numparams. ++ | sub TMP1, TMP3, RC ++ | beqz RB, >5 // Copy all varargs? ++ | addi TMP2, TMP2, -16 ++ |1: // Copy vararg slots to destination slots. ++ | ld CARG1, 0(RC) ++ | sltu TMP0, RC, TMP3 ++ | addi RC, RC, 8 ++ | bnez TMP0, >2 ++ | mv CARG1, TISNIL ++ |2: ++ | sd CARG1, 0(RA) ++ | sltu TMP0, RA, TMP2 ++ | addi RA, RA, 8 ++ | bnez TMP0, <1 ++ |3: ++ | ins_next ++ | ++ |5: // Copy all varargs. ++ | ld TMP0, L->maxstack ++ | li MULTRES, 8 // MULTRES = (0+1)*8 ++ | blez TMP1, <3 // No vararg slots? ++ | add TMP2, RA, TMP1 ++ | addi MULTRES, TMP1, 8 ++ | bltu TMP0, TMP2, >7 ++ |6: ++ | ld CRET1, 0(RC) ++ | addi RC, RC, 8 ++ | sd CRET1, 0(RA) ++ | addi RA, RA, 8 ++ | bltu RC, TMP3, <6 // More vararg slots? ++ | j <3 ++ | ++ |7: // Grow stack for varargs. ++ | sd RA, L->top ++ | sub RA, RA, BASE ++ | sd BASE, L->base ++ | sub BASE, RC, BASE // Need delta, because BASE may change. ++ | sd PC, SAVE_PC(sp) ++ | srliw CARG2, TMP1, 3 ++ | mv CARG1, L ++ | call_intern BC_VARG, lj_state_growstack // (lua_State *L, int n) ++ | mv RC, BASE ++ | ld BASE, L->base ++ | add RA, BASE, RA ++ | add RC, BASE, RC ++ | addi TMP3, BASE, -16 ++ | j <6 ++ break; ++ ++ /* -- Returns ----------------------------------------------------------- */ ++ ++ case BC_RETM: ++ | // RA = results*8, RD = extra_nresults*8 ++ | addw RD, RD, MULTRES ++ | j ->BC_RET_Z1 ++ break; ++ ++ case BC_RET: ++ | // RA = results*8, RD = (nresults+1)*8 ++ |->BC_RET_Z1: ++ | ld PC, FRAME_PC(BASE) ++ | add RA, BASE, RA ++ | mv MULTRES, RD ++ |1: ++ | andi TMP0, PC, FRAME_TYPE ++ | xori TMP1, PC, FRAME_VARG ++ | bnez TMP0, ->BC_RETV_Z ++ | ++ |->BC_RET_Z: ++ | // BASE = base, RA = resultptr, RD = (nresults+1)*8, PC = return ++ | lw INS, -4(PC) ++ | addi TMP2, BASE, -16 ++ | addi RC, RD, -8 ++ | decode_RA8 TMP0, INS ++ | decode_RB8 RB, INS ++ | sub BASE, TMP2, TMP0 ++ | add TMP3, TMP2, RB ++ | beqz RC, >3 ++ |2: ++ | ld CRET1, 0(RA) ++ | addi RA, RA, 8 ++ | addi RC, RC, -8 ++ | sd CRET1, 0(TMP2) ++ | addi TMP2, TMP2, 8 ++ | bnez RC, <2 ++ |3: ++ | addi TMP3, TMP3, -8 ++ |5: ++ | bltu TMP2, TMP3, >6 ++ | ld LFUNC:TMP1, FRAME_FUNC(BASE) ++ | cleartp LFUNC:TMP1 ++ | ld TMP1, LFUNC:TMP1->pc ++ | ld KBASE, PC2PROTO(k)(TMP1) ++ | ins_next ++ | ++ |6: // Fill up results with nil. ++ | sd TISNIL, 0(TMP2) ++ | addi TMP2, TMP2, 8 ++ | j <5 ++ | ++ |->BC_RETV_Z: // Non-standard return case. ++ | andi TMP2, TMP1, FRAME_TYPEP ++ | bxnez TMP2, ->vm_return ++ | // Return from vararg function: relocate BASE down. ++ | sub BASE, BASE, TMP1 ++ | ld PC, FRAME_PC(BASE) ++ | j <1 ++ break; ++ ++ case BC_RET0: case BC_RET1: ++ | // RA = results*8, RD = (nresults+1)*8 ++ | ld PC, FRAME_PC(BASE) ++ | add RA, BASE, RA ++ | mv MULTRES, RD ++ | andi TMP0, PC, FRAME_TYPE ++ | xori TMP1, PC, FRAME_VARG ++ | bnez TMP0, ->BC_RETV_Z ++ | lw INS, -4(PC) ++ | addi TMP2, BASE, -16 ++ if (op == BC_RET1) { ++ | ld CRET1, 0(RA) ++ } ++ | decode_RB8 RB, INS ++ | decode_RA8 RA, INS ++ | sub BASE, TMP2, RA ++ if (op == BC_RET1) { ++ | sd CRET1, 0(TMP2) ++ } ++ |5: ++ | bltu RD, RB, >6 ++ | ld TMP1, FRAME_FUNC(BASE) ++ | cleartp LFUNC:TMP1 ++ | ld TMP1, LFUNC:TMP1->pc ++ | ins_next1 ++ | ld KBASE, PC2PROTO(k)(TMP1) ++ | ins_next2 ++ | ++ |6: // Fill up results with nil. ++ | addi TMP2, TMP2, 8 ++ | addi RD, RD, 8 ++ if (op == BC_RET1) { ++ | sd TISNIL, 0(TMP2) ++ } else { ++ | sd TISNIL, -8(TMP2) ++ } ++ | j <5 ++ break; ++ ++ /* -- Loops and branches ------------------------------------------------ */ ++ ++ case BC_FORL: ++ | // Fall through. Assumes BC_IFORL follows. ++ break; ++ ++ case BC_JFORI: ++ case BC_JFORL: ++#if !LJ_HASJIT ++ break; ++#endif ++ case BC_FORI: ++ case BC_IFORL: ++ | // RA = base*8, RD = target (after end of loop or start of loop) ++ vk = (op == BC_IFORL || op == BC_JFORL); ++ | add RA, BASE, RA ++ | ld CARG1, FORL_IDX*8(RA) // CARG1 = IDX ++ | ld CARG2, FORL_STEP*8(RA) // CARG2 = STEP ++ | ld CARG3, FORL_STOP*8(RA) // CARG3 = STOP ++ | gettp CARG4, CARG1 ++ | gettp CARG5, CARG2 ++ | gettp CARG6, CARG3 ++ if (op != BC_JFORL) { ++ | srliw RD, RD, 1 ++ | lui TMP2, (-(BCBIAS_J*4 >> 12)) & 0xfffff // -BCBIAS_J<<2 ++ | add TMP2, RD, TMP2 ++ } ++ | bne CARG4, TISNUM, >3 ++ | sext.w CARG4, CARG1 // start ++ | sext.w CARG3, CARG3 // stop ++ if (!vk) { // init ++ | bxne CARG6, TISNUM, ->vmeta_for ++ | bxne CARG5, TISNUM, ->vmeta_for ++ | bfextri TMP0, CARG2, 31, 31 // sign ++ | slt CARG2, CARG3, CARG4 ++ | slt TMP1, CARG4, CARG3 ++ | neg TMP4, TMP0 ++ | xor TMP0, TMP1, CARG2 // CARG2 = TMP0 ? TMP1 : CARG2 ++ | and TMP0, TMP0, TMP4 ++ | xor CARG2, CARG2, TMP0 // CARG2=0: +,start <= stop or -,start >= stop ++ } else { ++ | sext.w CARG5, CARG2 // step ++ | addw CARG1, CARG4, CARG5 // start + step ++ | xor TMP3, CARG1, CARG4 // y^a ++ | xor TMP1, CARG1, CARG5 // y^b ++ | and TMP3, TMP3, TMP1 ++ | slt TMP1, CARG1, CARG3 // start+step < stop ? ++ | slt CARG3, CARG3, CARG1 // stop < start+step ? ++ | sltz TMP0, CARG5 // step < 0 ? ++ | sltz TMP3, TMP3 // ((y^a) & (y^b)) < 0: overflow. ++ | neg TMP4, TMP0 ++ | xor TMP1, TMP1, CARG3 // CARG3 = TMP0 ? TMP1 : CARG3 ++ | and TMP1, TMP1, TMP4 ++ | xor CARG3, CARG3, TMP1 ++ | or CARG2, CARG3, TMP3 // CARG2=1: overflow; CARG2=0: continue ++ | zext.w CARG1, CARG1 ++ | settp_b CARG1, TISNUM ++ | sd CARG1, FORL_IDX*8(RA) ++ } ++ |1: ++ if (op == BC_FORI) { ++ | neg TMP4, CARG2 // CARG2!=0: jump out the loop; CARG2==0: next INS ++ | and TMP2, TMP2, TMP4 ++ | add PC, PC, TMP2 ++ } else if (op == BC_JFORI) { ++ | add PC, PC, TMP2 ++ | lhu RD, -4+OFS_RD(PC) ++ } else if (op == BC_IFORL) { ++ | addi TMP4, CARG2, -1 // CARG2!=0: next INS; CARG2==0: jump back ++ | and TMP2, TMP2, TMP4 ++ | add PC, PC, TMP2 ++ } ++ | ins_next1 ++ | sd CARG1, FORL_EXT*8(RA) ++ |2: ++ if (op == BC_JFORI) { ++ | decode_RD8b RD ++ | beqz CARG2, =>BC_JLOOP // CARG2 == 0: excute the loop ++ } else if (op == BC_JFORL) { ++ | beqz CARG2, =>BC_JLOOP ++ } ++ | ins_next2 ++ | ++ |3: // FP loop. ++ | fld FTMP0, FORL_IDX*8(RA) // start ++ | fld FTMP1, FORL_STOP*8(RA) // stop ++ | ld TMP0, FORL_STEP*8(RA) // step ++ | sltz CARG2, TMP0 // step < 0 ? ++ | neg CARG2, CARG2 ++ if (!vk) { ++ | sltiu TMP3, CARG4, LJ_TISNUM // start is number ? ++ | sltiu TMP0, CARG5, LJ_TISNUM // step is number ? ++ | sltiu TMP1, CARG6, LJ_TISNUM // stop is number ? ++ | and TMP3, TMP3, TMP1 ++ | and TMP0, TMP0, TMP3 ++ | bxeqz TMP0, ->vmeta_for // if start or step or stop isn't number ++ | flt.d TMP3, FTMP0, FTMP1 // start < stop ? ++ | flt.d TMP4, FTMP1, FTMP0 // stop < start ? ++ | xor TMP0, TMP3, TMP4 // CARG2 = CARG2 ? TMP3 : TMP4 ++ | and TMP0, TMP0, CARG2 ++ | xor CARG2, TMP4, TMP0 // CARG2=0:+,startstop ++ | j <1 ++ } else { ++ | fld FTMP3, FORL_STEP*8(RA) ++ | fadd.d FTMP0, FTMP0, FTMP3 // start + step ++ | flt.d TMP3, FTMP0, FTMP1 // start + step < stop ? ++ | flt.d TMP4, FTMP1, FTMP0 ++ | xor TMP0, TMP3, TMP4 // CARG2 = CARG2 ? TMP3 : TMP4 ++ | and TMP0, TMP0, CARG2 ++ | xor CARG2, TMP4, TMP0 ++ if (op == BC_IFORL) { ++ | addi TMP3, CARG2, -1 ++ | and TMP2, TMP2, TMP3 ++ | add PC, PC, TMP2 ++ } ++ | fsd FTMP0, FORL_IDX*8(RA) ++ | ins_next1 ++ | fsd FTMP0, FORL_EXT*8(RA) ++ | j <2 ++ } ++ break; ++ ++ case BC_ITERL: ++ | // Fall through. Assumes BC_IITERL follows. ++ break; ++ ++ case BC_JITERL: ++#if !LJ_HASJIT ++ break; ++#endif ++ case BC_IITERL: ++ | // RA = base*8, RD = target ++ | add RA, BASE, RA ++ | ld TMP1, 0(RA) ++ | beq TMP1, TISNIL, >1 // Stop if iterator returned nil. ++ if (op == BC_JITERL) { ++ | sd TMP1,-8(RA) ++ | j =>BC_JLOOP ++ } else { ++ | branch_RD // Otherwise save control var + branch. ++ | sd TMP1, -8(RA) ++ } ++ |1: ++ | ins_next ++ break; ++ ++ case BC_LOOP: ++ | // Fall through. Assumes BC_ILOOP follows. ++ break; ++ ++ case BC_ILOOP: ++ | // RA = base*8, RD = target (loop extent) ++ | ins_next ++ break; ++ ++ case BC_JLOOP: ++ break; ++ ++ case BC_JMP: ++ | // RA = base*8 (only used by trace recorder), RD = target ++ | branch_RD // PC + (jump - 0x8000)<<2 ++ | ins_next ++ break; ++ ++ /* -- Function headers -------------------------------------------------- */ ++ ++ case BC_FUNCF: ++ case BC_FUNCV: /* NYI: compiled vararg functions. */ ++ | // Fall through. Assumes BC_IFUNCF/BC_IFUNCV follow. ++ break; ++ ++ case BC_JFUNCF: ++#if !LJ_HASJIT ++ break; ++#endif ++ case BC_IFUNCF: ++ | // BASE = new base, RA = BASE+framesize*8, RB = LFUNC, RC = nargs*8 ++ | ld TMP2, L->maxstack ++ | lbu TMP1, -4+PC2PROTO(numparams)(PC) ++ | ld KBASE, -4+PC2PROTO(k)(PC) ++ | bxltu TMP2, RA, ->vm_growstack_l ++ | slliw TMP1, TMP1, 3 // numparams*8 ++ |2: ++ | bltu NARGS8:RC, TMP1, >3 // Check for missing parameters. ++ if (op == BC_JFUNCF) { ++ | decode_RD8 RD, INS ++ | j =>BC_JLOOP ++ } else { ++ | ins_next ++ } ++ | ++ |3: // Clear missing parameters. ++ | add TMP0, BASE, NARGS8:RC ++ | sd TISNIL, 0(TMP0) ++ | addiw NARGS8:RC, NARGS8:RC, 8 ++ | j <2 ++ break; ++ ++ case BC_JFUNCV: ++#if !LJ_HASJIT ++ break; ++#endif ++ | NYI // NYI: compiled vararg functions ++ break; /* NYI: compiled vararg functions. */ ++ ++ case BC_IFUNCV: ++ | // BASE = new base, RA = BASE+framesize*8, RB = LFUNC, RC = nargs*8 ++ | li TMP0, LJ_TFUNC ++ | add TMP1, BASE, RC ++ | ld TMP2, L->maxstack ++ | settp LFUNC:RB, TMP0 ++ | add TMP0, RA, RC ++ | sd LFUNC:RB, 0(TMP1) // Store (tagged) copy of LFUNC. ++ | addi TMP2, TMP2, -8 ++ | addi TMP3, RC, 16+FRAME_VARG ++ | ld KBASE, -4+PC2PROTO(k)(PC) ++ | sd TMP3, 8(TMP1) // Store delta + FRAME_VARG. ++ | bxgeu TMP0, TMP2, ->vm_growstack_l ++ | lbu TMP2, -4+PC2PROTO(numparams)(PC) ++ | mv RA, BASE ++ | mv RC, TMP1 ++ | ins_next1 ++ | addi BASE, TMP1, 16 ++ | beqz TMP2, >2 ++ |1: ++ | ld TMP0, 0(RA) ++ | sltu CARG2, RA, RC // Less args than parameters? ++ | addi RA, RA, 8 ++ | addi TMP1, TMP1, 8 ++ | addiw TMP2, TMP2, -1 ++ | beqz CARG2, >3 ++ | neg TMP4, CARG2 // Clear old fixarg slot (help the GC). ++ | xor TMP3, TISNIL, TMP0 // CARG1 = CARG2 ? TISNIL : TMP0 ++ | and TMP3, TMP3, TMP4 ++ | xor CARG1, TMP0, TMP3 ++ | sd CARG1, -8(RA) ++ | sd TMP0, 8(TMP1) ++ | bnez TMP2, <1 ++ |2: ++ | ins_next2 ++ |3: ++ | neg TMP4, CARG2 // Clear missing fixargs. ++ | xor TMP3, TMP0, TISNIL // TMP0 = CARG2 ? TMP0 : TISNIL ++ | and TMP3, TMP3, TMP4 ++ | xor TMP0, TISNIL, TMP3 ++ | sd TMP0, 8(TMP1) ++ | bnez TMP2, <1 ++ | j <2 ++ break; ++ ++ case BC_FUNCC: ++ case BC_FUNCCW: ++ | // BASE = new base, RA = BASE+framesize*8, RB = CFUNC, RC = nargs*8 ++ if (op == BC_FUNCC) { ++ | ld CARG4, CFUNC:RB->f ++ } else { ++ | ld CARG4, GL->wrapf ++ } ++ | add TMP1, RA, NARGS8:RC ++ | ld TMP2, L->maxstack ++ | add RC, BASE, NARGS8:RC ++ | sd BASE, L->base // base of currently excuting function ++ | sd RC, L->top ++ | bxgtu TMP1, TMP2, ->vm_growstack_c // Need to grow stack. ++ | li_vmstate C // li TMP0, ~LJ_VMST_C ++ if (op == BC_FUNCCW) { ++ | ld CARG2, CFUNC:RB->f ++ } ++ | mv CARG1, L ++ | st_vmstate // sw TMP0, GL->vmstate ++ | jalr CARG4 // (lua_State *L [, lua_CFunction f]) ++ | // Returns nresults. ++ | ld BASE, L->base ++ | ld TMP1, L->top ++ | sd L, GL->cur_L ++ | slliw RD, CRET1, 3 ++ | li_vmstate INTERP ++ | ld PC, FRAME_PC(BASE) // Fetch PC of caller. ++ | sub RA, TMP1, RD // RA = L->top - nresults*8 ++ | st_vmstate ++ | j ->vm_returnc ++ break; ++ ++ /* ---------------------------------------------------------------------- */ ++ ++ default: ++ fprintf(stderr, "Error: undefined opcode BC_%s\n", bc_names[op]); ++ exit(2); ++ break; ++ } ++} ++ ++static int build_backend(BuildCtx *ctx) ++{ ++ int op; ++ ++ dasm_growpc(Dst, BC__MAX); ++ ++ build_subroutines(ctx); ++ ++ |.code_op ++ for (op = 0; op < BC__MAX; op++) ++ build_ins(ctx, (BCOp)op, op); ++ ++ return BC__MAX; ++} ++ ++/* Emit pseudo frame-info for all assembler functions. */ ++static void emit_asm_debug(BuildCtx *ctx) ++{ ++ ++} + +From 90516ab709341b731c1d55542ad253e160344961 Mon Sep 17 00:00:00 2001 +From: gns +Date: Wed, 6 Mar 2024 09:19:25 +0800 +Subject: [PATCH 07/22] riscv(support): add target definition + +--- + src/lj_target.h | 4 +- + src/lj_target_riscv.h | 542 ++++++++++++++++++++++++++++++++++++++++++ + 2 files changed, 545 insertions(+), 1 deletion(-) + create mode 100644 src/lj_target_riscv.h + +diff --git a/src/lj_target.h b/src/lj_target.h +index 6548816a6..160be2a78 100644 +--- a/src/lj_target.h ++++ b/src/lj_target.h +@@ -55,7 +55,7 @@ typedef uint32_t RegSP; + /* Bitset for registers. 32 registers suffice for most architectures. + ** Note that one set holds bits for both GPRs and FPRs. + */ +-#if LJ_TARGET_PPC || LJ_TARGET_MIPS || LJ_TARGET_ARM64 ++#if LJ_TARGET_PPC || LJ_TARGET_MIPS || LJ_TARGET_ARM64 || LJ_TARGET_RISCV64 + typedef uint64_t RegSet; + #define RSET_BITS 6 + #define rset_picktop_(rs) ((Reg)lj_fls64(rs)) +@@ -145,6 +145,8 @@ typedef uint32_t RegCost; + #include "lj_target_mips.h" + #elif LJ_TARGET_S390X + #include "lj_target_s390x.h" ++#elif LJ_TARGET_RISCV64 ++#include "lj_target_riscv.h" + #else + #error "Missing include for target CPU" + #endif +diff --git a/src/lj_target_riscv.h b/src/lj_target_riscv.h +new file mode 100644 +index 000000000..22948dc5a +--- /dev/null ++++ b/src/lj_target_riscv.h +@@ -0,0 +1,542 @@ ++/* ++** Definitions for RISC-V CPUs. ++** Copyright (C) 2005-2025 Mike Pall. See Copyright Notice in luajit.h ++*/ ++ ++#ifndef _LJ_TARGET_RISCV_H ++#define _LJ_TARGET_RISCV_H ++ ++/* -- Registers IDs ------------------------------------------------------- */ ++ ++#define GPRDEF(_) \ ++ _(X0) _(RA) _(SP) _(X3) _(X4) _(X5) _(X6) _(X7) \ ++ _(X8) _(X9) _(X10) _(X11) _(X12) _(X13) _(X14) _(X15) \ ++ _(X16) _(X17) _(X18) _(X19) _(X20) _(X21) _(X22) _(X23) \ ++ _(X24) _(X25) _(X26) _(X27) _(X28) _(X29) _(X30) _(X31) ++#define FPRDEF(_) \ ++ _(F0) _(F1) _(F2) _(F3) _(F4) _(F5) _(F6) _(F7) \ ++ _(F8) _(F9) _(F10) _(F11) _(F12) _(F13) _(F14) _(F15) \ ++ _(F16) _(F17) _(F18) _(F19) _(F20) _(F21) _(F22) _(F23) \ ++ _(F24) _(F25) _(F26) _(F27) _(F28) _(F29) _(F30) _(F31) ++#define VRIDDEF(_) ++ ++#define RIDENUM(name) RID_##name, ++ ++enum { ++ GPRDEF(RIDENUM) /* General-purpose registers (GPRs). */ ++ FPRDEF(RIDENUM) /* Floating-point registers (FPRs). */ ++ RID_MAX, ++ RID_ZERO = RID_X0, ++ RID_TMP = RID_RA, ++ RID_GP = RID_X3, ++ RID_TP = RID_X4, ++ ++ /* Calling conventions. */ ++ RID_RET = RID_X10, ++ RID_RETLO = RID_X10, ++ RID_RETHI = RID_X11, ++ RID_FPRET = RID_F10, ++ RID_CFUNCADDR = RID_X5, ++ ++ /* These definitions must match with the *.dasc file(s): */ ++ RID_BASE = RID_X18, /* Interpreter BASE. */ ++ RID_LPC = RID_X20, /* Interpreter PC. */ ++ RID_GL = RID_X21, /* Interpreter GL. */ ++ RID_LREG = RID_X23, /* Interpreter L. */ ++ ++ /* Register ranges [min, max) and number of registers. */ ++ RID_MIN_GPR = RID_X0, ++ RID_MAX_GPR = RID_X31+1, ++ RID_MIN_FPR = RID_MAX_GPR, ++ RID_MAX_FPR = RID_F31+1, ++ RID_NUM_GPR = RID_MAX_GPR - RID_MIN_GPR, ++ RID_NUM_FPR = RID_MAX_FPR - RID_MIN_FPR /* Only even regs are used. */ ++}; ++ ++#define RID_NUM_KREF RID_NUM_GPR ++#define RID_MIN_KREF RID_X0 ++ ++/* -- Register sets ------------------------------------------------------- */ ++ ++/* Make use of all registers, except ZERO, TMP, SP, GP, TP, CFUNCADDR and GL. */ ++#define RSET_FIXED \ ++ (RID2RSET(RID_ZERO)|RID2RSET(RID_TMP)|RID2RSET(RID_SP)|\ ++ RID2RSET(RID_GP)|RID2RSET(RID_TP)|RID2RSET(RID_GL)) ++#define RSET_GPR (RSET_RANGE(RID_MIN_GPR, RID_MAX_GPR) - RSET_FIXED) ++#define RSET_FPR RSET_RANGE(RID_MIN_FPR, RID_MAX_FPR) ++ ++#define RSET_ALL (RSET_GPR|RSET_FPR) ++#define RSET_INIT RSET_ALL ++ ++#define RSET_SCRATCH_GPR \ ++ (RSET_RANGE(RID_X5, RID_X7+1)|RSET_RANGE(RID_X28, RID_X31+1)|\ ++ RSET_RANGE(RID_X10, RID_X17+1)) ++ ++#define RSET_SCRATCH_FPR \ ++ (RSET_RANGE(RID_F0, RID_F7+1)|RSET_RANGE(RID_F10, RID_F17+1)|\ ++ RSET_RANGE(RID_F28, RID_F31+1)) ++#define RSET_SCRATCH (RSET_SCRATCH_GPR|RSET_SCRATCH_FPR) ++ ++#define REGARG_FIRSTGPR RID_X10 ++#define REGARG_LASTGPR RID_X17 ++#define REGARG_NUMGPR 8 ++ ++#define REGARG_FIRSTFPR RID_F10 ++#define REGARG_LASTFPR RID_F17 ++#define REGARG_NUMFPR 8 ++ ++/* -- Spill slots --------------------------------------------------------- */ ++ ++/* Spill slots are 32 bit wide. An even/odd pair is used for FPRs. ++** ++** SPS_FIXED: Available fixed spill slots in interpreter frame. ++** This definition must match with the *.dasc file(s). ++** ++** SPS_FIRST: First spill slot for general use. ++*/ ++#if LJ_32 ++#define SPS_FIXED 5 ++#else ++#define SPS_FIXED 4 ++#endif ++#define SPS_FIRST 4 ++ ++#define SPOFS_TMP 0 ++ ++#define sps_scale(slot) (4 * (int32_t)(slot)) ++#define sps_align(slot) (((slot) - SPS_FIXED + 3) & ~3) ++ ++/* -- Exit state ---------------------------------------------------------- */ ++/* This definition must match with the *.dasc file(s). */ ++typedef struct { ++ lua_Number fpr[RID_NUM_FPR]; /* Floating-point registers. */ ++ intptr_t gpr[RID_NUM_GPR]; /* General-purpose registers. */ ++ int32_t spill[256]; /* Spill slots. */ ++} ExitState; ++ ++/* Highest exit + 1 indicates stack check. */ ++#define EXITSTATE_CHECKEXIT 1 ++ ++/* Return the address of a per-trace exit stub. */ ++static LJ_AINLINE uint32_t *exitstub_trace_addr_(uint32_t *p, uint32_t exitno) ++{ ++ while (*p == 0x00000013) p++; /* Skip RISCVI_NOP. */ ++ return p + 4 + exitno; ++} ++/* Avoid dependence on lj_jit.h if only including lj_target.h. */ ++#define exitstub_trace_addr(T, exitno) \ ++ exitstub_trace_addr_((MCode *)((char *)(T)->mcode + (T)->szmcode), (exitno)) ++ ++/* -- Instructions -------------------------------------------------------- */ ++ ++/* Instruction fields. */ ++#define RISCVF_D(d) (((d)&31) << 7) ++#define RISCVF_S1(r) (((r)&31) << 15) ++#define RISCVF_S2(r) (((r)&31) << 20) ++#define RISCVF_S3(r) (((r)&31) << 27) ++#define RISCVF_FUNCT2(f) (((f)&3) << 25) ++#define RISCVF_FUNCT3(f) (((f)&7) << 12) ++#define RISCVF_FUNCT7(f) (((f)&127) << 25) ++#define RISCVF_SHAMT(s) ((s) << 20) ++#define RISCVF_RM(m) (((m)&7) << 12) ++#define RISCVF_IMMI(i) ((i) << 20) ++#define RISCVF_IMMS(i) (((i)&0xfe0) << 20 | ((i)&0x1f) << 7) ++#define RISCVF_IMMB(i) (((i)&0x1000) << 19 | ((i)&0x800) >> 4 | ((i)&0x7e0) << 20 | ((i)&0x1e) << 7) ++#define RISCVF_IMMU(i) (((i)&0xfffff) << 12) ++#define RISCVF_IMMJ(i) (((i)&0x100000) << 11 | ((i)&0xff000) | ((i)&0x800) << 9 | ((i)&0x7fe) << 20) ++ ++/* Encode helpers. */ ++#define RISCVF_W_HI(w) ((w) - ((((w)&0xfff)^0x800) - 0x800)) ++#define RISCVF_W_LO(w) ((w)&0xfff) ++#define RISCVF_HI(i) ((RISCVF_W_HI(i) >> 12) & 0xfffff) ++#define RISCVF_LO(i) RISCVF_W_LO(i) ++ ++/* Check for valid field range. */ ++#define RISCVF_SIMM_OK(x, b) ((((x) + (1 << (b-1))) >> (b)) == 0) ++#define RISCVF_UIMM_OK(x, b) (((x) >> (b)) == 0) ++#define checku11(i) RISCVF_UIMM_OK(i, 11) ++#define checki12(i) RISCVF_SIMM_OK(i, 12) ++#define checki13(i) RISCVF_SIMM_OK(i, 13) ++#define checki20(i) RISCVF_SIMM_OK(i, 20) ++#define checki21(i) RISCVF_SIMM_OK(i, 21) ++#define checki32auipc(i) (checki32(i) && (int32_t)(i) < 0x7ffff800) ++ ++typedef enum RISCVIns { ++ ++ /* --- RVI --- */ ++ RISCVI_LUI = 0x00000037, ++ RISCVI_AUIPC = 0x00000017, ++ ++ RISCVI_JAL = 0x0000006f, ++ RISCVI_JALR = 0x00000067, ++ ++ RISCVI_ADDI = 0x00000013, ++ RISCVI_SLTI = 0x00002013, ++ RISCVI_SLTIU = 0x00003013, ++ RISCVI_XORI = 0x00004013, ++ RISCVI_ORI = 0x00006013, ++ RISCVI_ANDI = 0x00007013, ++ ++ RISCVI_SLLI = 0x00001013, ++ RISCVI_SRLI = 0x00005013, ++ RISCVI_SRAI = 0x40005013, ++ ++ RISCVI_ADD = 0x00000033, ++ RISCVI_SUB = 0x40000033, ++ RISCVI_SLL = 0x00001033, ++ RISCVI_SLT = 0x00002033, ++ RISCVI_SLTU = 0x00003033, ++ RISCVI_XOR = 0x00004033, ++ RISCVI_SRL = 0x00005033, ++ RISCVI_SRA = 0x40005033, ++ RISCVI_OR = 0x00006033, ++ RISCVI_AND = 0x00007033, ++ ++ RISCVI_LB = 0x00000003, ++ RISCVI_LH = 0x00001003, ++ RISCVI_LW = 0x00002003, ++ RISCVI_LBU = 0x00004003, ++ RISCVI_LHU = 0x00005003, ++ RISCVI_SB = 0x00000023, ++ RISCVI_SH = 0x00001023, ++ RISCVI_SW = 0x00002023, ++ ++ RISCVI_BEQ = 0x00000063, ++ RISCVI_BNE = 0x00001063, ++ RISCVI_BLT = 0x00004063, ++ RISCVI_BGE = 0x00005063, ++ RISCVI_BLTU = 0x00006063, ++ RISCVI_BGEU = 0x00007063, ++ ++ RISCVI_ECALL = 0x00000073, ++ RISCVI_EBREAK = 0x00100073, ++ ++ RISCVI_NOP = 0x00000013, ++ RISCVI_MV = 0x00000013, ++ RISCVI_NOT = 0xfff04013, ++ RISCVI_NEG = 0x40000033, ++ RISCVI_RET = 0x00008067, ++ RISCVI_ZEXT_B = 0x0ff07013, ++ ++#if LJ_TARGET_RISCV64 ++ RISCVI_LWU = 0x00007003, ++ RISCVI_LD = 0x00003003, ++ RISCVI_SD = 0x00003023, ++ ++ RISCVI_ADDIW = 0x0000001b, ++ ++ RISCVI_SLLIW = 0x0000101b, ++ RISCVI_SRLIW = 0x0000501b, ++ RISCVI_SRAIW = 0x4000501b, ++ ++ RISCVI_ADDW = 0x0000003b, ++ RISCVI_SUBW = 0x4000003b, ++ RISCVI_SLLW = 0x0000103b, ++ RISCVI_SRLW = 0x0000503b, ++ RISCVI_SRAW = 0x4000503b, ++ ++ RISCVI_NEGW = 0x4000003b, ++ RISCVI_SEXT_W = 0x0000001b, ++#endif ++ ++ /* --- RVM --- */ ++ RISCVI_MUL = 0x02000033, ++ RISCVI_MULH = 0x02001033, ++ RISCVI_MULHSU = 0x02002033, ++ RISCVI_MULHU = 0x02003033, ++ RISCVI_DIV = 0x02004033, ++ RISCVI_DIVU = 0x02005033, ++ RISCVI_REM = 0x02006033, ++ RISCVI_REMU = 0x02007033, ++#if LJ_TARGET_RISCV64 ++ RISCVI_MULW = 0x0200003b, ++ RISCVI_DIVW = 0x0200403b, ++ RISCVI_DIVUW = 0x0200503b, ++ RISCVI_REMW = 0x0200603b, ++ RISCVI_REMUW = 0x0200703b, ++#endif ++ ++ /* --- RVF --- */ ++ RISCVI_FLW = 0x00002007, ++ RISCVI_FSW = 0x00002027, ++ ++ RISCVI_FMADD_S = 0x00000043, ++ RISCVI_FMSUB_S = 0x00000047, ++ RISCVI_FNMSUB_S = 0x0000004b, ++ RISCVI_FNMADD_S = 0x0000004f, ++ ++ RISCVI_FADD_S = 0x00000053, ++ RISCVI_FSUB_S = 0x08000053, ++ RISCVI_FMUL_S = 0x10000053, ++ RISCVI_FDIV_S = 0x18000053, ++ RISCVI_FSQRT_S = 0x58000053, ++ ++ RISCVI_FSGNJ_S = 0x20000053, ++ RISCVI_FSGNJN_S = 0x20001053, ++ RISCVI_FSGNJX_S = 0x20002053, ++ ++ RISCVI_FMIN_S = 0x28000053, ++ RISCVI_FMAX_S = 0x28001053, ++ ++ RISCVI_FCVT_W_S = 0xc0000053, ++ RISCVI_FCVT_WU_S = 0xc0100053, ++ ++ RISCVI_FMV_X_W = 0xe0000053, ++ ++ RISCVI_FEQ_S = 0xa0002053, ++ RISCVI_FLT_S = 0xa0001053, ++ RISCVI_FLE_S = 0xa0000053, ++ ++ RISCVI_FCLASS_S = 0xe0001053, ++ ++ RISCVI_FCVT_S_W = 0xd0000053, ++ RISCVI_FCVT_S_WU = 0xd0100053, ++ RISCVI_FMV_W_X = 0xf0000053, ++ ++ RISCVI_FMV_S = 0x20000053, ++ RISCVI_FNEG_S = 0x20001053, ++ RISCVI_FABS_S = 0x20002053, ++#if LJ_TARGET_RISCV64 ++ RISCVI_FCVT_L_S = 0xc0200053, ++ RISCVI_FCVT_LU_S = 0xc0300053, ++ RISCVI_FCVT_S_L = 0xd0200053, ++ RISCVI_FCVT_S_LU = 0xd0300053, ++#endif ++ ++ /* --- RVD --- */ ++ RISCVI_FLD = 0x00003007, ++ RISCVI_FSD = 0x00003027, ++ ++ RISCVI_FMADD_D = 0x02000043, ++ RISCVI_FMSUB_D = 0x02000047, ++ RISCVI_FNMSUB_D = 0x0200004b, ++ RISCVI_FNMADD_D = 0x0200004f, ++ ++ RISCVI_FADD_D = 0x02000053, ++ RISCVI_FSUB_D = 0x0a000053, ++ RISCVI_FMUL_D = 0x12000053, ++ RISCVI_FDIV_D = 0x1a000053, ++ RISCVI_FSQRT_D = 0x5a000053, ++ ++ RISCVI_FSGNJ_D = 0x22000053, ++ RISCVI_FSGNJN_D = 0x22001053, ++ RISCVI_FSGNJX_D = 0x22002053, ++ ++ RISCVI_FMIN_D = 0x2a000053, ++ RISCVI_FMAX_D = 0x2a001053, ++ ++ RISCVI_FCVT_S_D = 0x40100053, ++ RISCVI_FCVT_D_S = 0x42000053, ++ ++ RISCVI_FEQ_D = 0xa2002053, ++ RISCVI_FLT_D = 0xa2001053, ++ RISCVI_FLE_D = 0xa2000053, ++ ++ RISCVI_FCLASS_D = 0xe2001053, ++ ++ RISCVI_FCVT_W_D = 0xc2000053, ++ RISCVI_FCVT_WU_D = 0xc2100053, ++ RISCVI_FCVT_D_W = 0xd2000053, ++ RISCVI_FCVT_D_WU = 0xd2100053, ++ ++ RISCVI_FMV_D = 0x22000053, ++ RISCVI_FNEG_D = 0x22001053, ++ RISCVI_FABS_D = 0x22002053, ++#if LJ_TARGET_RISCV64 ++ RISCVI_FCVT_L_D = 0xc2200053, ++ RISCVI_FCVT_LU_D = 0xc2300053, ++ RISCVI_FMV_X_D = 0xe2000053, ++ RISCVI_FCVT_D_L = 0xd2200053, ++ RISCVI_FCVT_D_LU = 0xd2300053, ++ RISCVI_FMV_D_X = 0xf2000053, ++#endif ++ ++ /* --- Zifencei --- */ ++ RISCVI_FENCE = 0x0000000f, ++ RISCVI_FENCE_I = 0x0000100f, ++ ++ /* --- Zicsr --- */ ++ RISCVI_CSRRW = 0x00001073, ++ RISCVI_CSRRS = 0x00002073, ++ RISCVI_CSRRC = 0x00003073, ++ RISCVI_CSRRWI = 0x00005073, ++ RISCVI_CSRRSI = 0x00006073, ++ RISCVI_CSRRCI = 0x00007073, ++ ++ /* --- RVB --- */ ++ /* Zba */ ++ RISCVI_SH1ADD = 0x20002033, ++ RISCVI_SH2ADD = 0x20004033, ++ RISCVI_SH3ADD = 0x20006033, ++#if LJ_TARGET_RISCV64 ++ RISCVI_ADD_UW = 0x0800003b, ++ ++ RISCVI_SH1ADD_UW = 0x2000203b, ++ RISCVI_SH2ADD_UW = 0x2000403b, ++ RISCVI_SH3ADD_UW = 0x2000603b, ++ ++ RISCVI_SLLI_UW = 0x0800101b, ++ ++ RISCVI_ZEXT_W = 0x0800003b, ++#endif ++ /* Zbb */ ++ RISCVI_ANDN = 0x40007033, ++ RISCVI_ORN = 0x40006033, ++ RISCVI_XNOR = 0x40004033, ++ ++ RISCVI_CLZ = 0x60001013, ++ RISCVI_CTZ = 0x60101013, ++ ++ RISCVI_CPOP = 0x60201013, ++ ++ RISCVI_MAX = 0x0a006033, ++ RISCVI_MAXU = 0x0a007033, ++ RISCVI_MIN = 0x0a004033, ++ RISCVI_MINU = 0x0a005033, ++ ++ RISCVI_SEXT_B = 0x60401013, ++ RISCVI_SEXT_H = 0x60501013, ++#if LJ_TARGET_RISCV64 ++ RISCVI_ZEXT_H = 0x0800403b, ++#endif ++ ++ RISCVI_ROL = 0x60001033, ++ RISCVI_ROR = 0x60005033, ++ RISCVI_RORI = 0x60005013, ++ ++ RISCVI_ORC_B = 0x28705013, ++ ++#if LJ_TARGET_RISCV64 ++ RISCVI_REV8 = 0x6b805013, ++ ++ RISCVI_CLZW = 0x6000101b, ++ RISCVI_CTZW = 0x6010101b, ++ ++ RISCVI_CPOPW = 0x6020101b, ++ ++ RISCVI_ROLW = 0x6000103b, ++ RISCVI_RORIW = 0x6000501b, ++ RISCVI_RORW = 0x6000503b, ++#endif ++ /* NYI: Zbc, Zbs */ ++ ++ /* --- Zicond --- */ ++ RISCVI_CZERO_EQZ = 0x0e005033, ++ RISCVI_CZERO_NEZ = 0x0e007033, ++ ++ /* --- Zfa --- */ ++ RISCVI_FLI_S = 0xf0100053, ++ RISCVI_FMINM_S = 0x28002053, ++ RISCVI_FMAXM_S = 0x28003053, ++ RISCVI_FROUND_S = 0x40400053, ++ RISCVI_FROUNDNX_S = 0x40500053, ++ RISCVI_FCVTMOD_W_D = 0xc2801053, ++ RISCVI_FLEQ_S = 0xa0004053, ++ RISCVI_FLTQ_S = 0xa0005053, ++ RISCVI_FLI_D = 0xf2100053, ++ RISCVI_FMINM_D = 0x2a002053, ++ RISCVI_FMAXM_D = 0x2a003053, ++ RISCVI_FROUND_D = 0x42400053, ++ RISCVI_FROUNDNX_D = 0x42500053, ++ RISCVI_FLEQ_D = 0xa2004053, ++ RISCVI_FLTQ_D = 0xa2005053, ++ ++ RISCVI_FROUND_S_RTZ = 0x40401053, ++ RISCVI_FROUND_S_RDN = 0x40402053, ++ RISCVI_FROUND_S_RUP = 0x40403053, ++ RISCVI_FROUNDNX_S_RTZ = 0x40501053, ++ RISCVI_FROUNDNX_S_RDN = 0x40502053, ++ RISCVI_FROUNDNX_S_RUP = 0x40503053, ++ RISCVI_FROUND_D_RTZ = 0x42401053, ++ RISCVI_FROUND_D_RDN = 0x42402053, ++ RISCVI_FROUND_D_RUP = 0x42403053, ++ RISCVI_FROUNDNX_D_RTZ = 0x42501053, ++ RISCVI_FROUNDNX_D_RDN = 0x42502053, ++ RISCVI_FROUNDNX_D_RUP = 0x42503053, ++ ++ /* TBD: RVV?, RVP?, RVJ? */ ++ ++ /* --- XThead* --- */ ++ /* XTHeadBa */ ++ RISCVI_TH_ADDSL = 0x0000100b, ++ ++ /* XTHeadBb */ ++ RISCVI_TH_SRRI = 0x1000100b, ++#if LJ_TARGET_RISCV64 ++ RISCVI_TH_SRRIW = 0x1400100b, ++#endif ++ RISCVI_TH_EXT = 0x0000200b, ++ RISCVI_TH_EXTU = 0x0000300b, ++ RISCVI_TH_FF0 = 0x8400100b, ++ RISCVI_TH_FF1 = 0x8600100b, ++ RISCVI_TH_REV = 0x8200100b, ++#if LJ_TARGET_RISCV64 ++ RISCVI_TH_REVW = 0x9000100b, ++#endif ++ RISCVI_TH_TSTNBZ = 0x8000100b, ++ ++ /* XTHeadBs */ ++ RISCVI_TH_TST = 0x8800100b, ++ ++ /* XTHeadCondMov */ ++ RISCVI_TH_MVEQZ = 0x4000100b, ++ RISCVI_TH_MVNEZ = 0x4200100b, ++ ++ /* XTHeadMac */ ++ RISCVI_TH_MULA = 0x2000100b, ++ RISCVI_TH_MULAH = 0x2800100b, ++#if LJ_TARGET_RISCV64 ++ RISCVI_TH_MULAW = 0x2400100b, ++#endif ++ RISCVI_TH_MULS = 0x2200100b, ++ RISCVI_TH_MULSH = 0x2a00100b, ++ RISCVI_TH_MULSW = 0x2600100b, ++ ++ /* NYI: XTHeadMemIdx, XTHeadFMemIdx, XTHeadMemPair */ ++} RISCVIns; ++ ++typedef enum RISCVRM { ++ RISCVRM_RNE = 0, ++ RISCVRM_RTZ = 1, ++ RISCVRM_RDN = 2, ++ RISCVRM_RUP = 3, ++ RISCVRM_RMM = 4, ++ RISCVRM_DYN = 7, ++} RISCVRM; ++ ++static const uint16_t riscv_fli_map_hi16[32] = { ++ 0xbff0u, // -1 ++ 0x0010u, // min ++ 0x3ef0u, // 2^-16 ++ 0x3f00u, // 2^-15 ++ 0x3f70u, // 2^-8 ++ 0x3f80u, // 2^-7 ++ 0x3fb0u, // 2^-4 ++ 0x3fc0u, // 2^-3, 0.125 ++ 0x3fd0u, // 2^-2, 0.25 ++ 0x3fd4u, // 0.3125 ++ 0x3fd8u, // 0.375 ++ 0x3fdcu, // 0.4375 ++ 0x3fe0u, // 0.5 ++ 0x3fe4u, // 0.625 ++ 0x3fe8u, // 0.75 ++ 0x3fecu, // 0.875 ++ 0x3ff0u, // 1 ++ 0x3ff4u, // 1.25 ++ 0x3ff8u, // 1.5 ++ 0x3ffcu, // 1.75 ++ 0x4000u, // 2 ++ 0x4004u, // 2.5 ++ 0x4008u, // 3 ++ 0x4010u, // 4 ++ 0x4020u, // 8 ++ 0x4030u, // 16 ++ 0x4060u, // 128 ++ 0x4070u, // 256 ++ 0x40e0u, // 2^15, 32768 ++ 0x40f0u, // 2^16, 65536 ++ 0x7ff0u, // inf ++ 0x7ff8u, // canonical nan ++}; ++ ++#endif + +From c4ea4529a1202d7eb5b5859a66c98272f700df02 Mon Sep 17 00:00:00 2001 +From: gns +Date: Wed, 6 Mar 2024 09:21:02 +0800 +Subject: [PATCH 08/22] riscv(ffi): add call convention and support framework + +--- + src/lj_ccall.c | 156 +++++++++++++++++++++++++++++++++- + src/lj_ccall.h | 17 +++- + src/lj_ccallback.c | 64 +++++++++++++- + src/vm_riscv64.dasc | 202 +++++++++++++++++++++++++++++++++++++++++++- + 4 files changed, 434 insertions(+), 5 deletions(-) + +diff --git a/src/lj_ccall.c b/src/lj_ccall.c +index c2b8f0c7f..892d2d287 100644 +--- a/src/lj_ccall.c ++++ b/src/lj_ccall.c +@@ -687,6 +687,97 @@ + if (ngpr < maxgpr) { dp = &cc->gpr[ngpr++]; goto done; } \ + } + ++#elif LJ_TARGET_RISCV64 ++/* -- RISC-V lp64d calling conventions ------------------------------------ */ ++ ++#define CCALL_HANDLE_STRUCTRET \ ++ /* Return structs of size > 16 by reference. */ \ ++ cc->retref = !(sz <= 16); \ ++ if (cc->retref) cc->gpr[ngpr++] = (GPRArg)dp; ++ ++#define CCALL_HANDLE_STRUCTRET2 \ ++ unsigned int cl = ccall_classify_struct(cts, ctr); \ ++ if ((cl & 4) && (cl >> 8) <= 2) { \ ++ CTSize i = (cl >> 8) - 1; \ ++ do { ((float *)dp)[i] = cc->fpr[i].f; } while (i--); \ ++ } else { \ ++ if (cl > 1) { \ ++ sp = (uint8_t *)&cc->fpr[0]; \ ++ if ((cl >> 8) > 2) \ ++ sp = (uint8_t *)&cc->gpr[0]; \ ++ } \ ++ memcpy(dp, sp, ctr->size); \ ++ } \ ++ ++#define CCALL_HANDLE_COMPLEXRET \ ++ /* Complex values are returned in 1 or 2 FPRs. */ \ ++ cc->retref = 0; ++ ++#define CCALL_HANDLE_COMPLEXRET2 \ ++ if (ctr->size == 2*sizeof(float)) { /* Copy complex float from FPRs. */ \ ++ ((float *)dp)[0] = cc->fpr[0].f; \ ++ ((float *)dp)[1] = cc->fpr[1].f; \ ++ } else { /* Copy complex double from FPRs. */ \ ++ ((double *)dp)[0] = cc->fpr[0].d; \ ++ ((double *)dp)[1] = cc->fpr[1].d; \ ++ } ++ ++#define CCALL_HANDLE_COMPLEXARG \ ++ /* Pass long double complex by reference. */ \ ++ if (sz == 2*sizeof(long double)) { \ ++ rp = cdataptr(lj_cdata_new(cts, did, sz)); \ ++ sz = CTSIZE_PTR; \ ++ } \ ++ /* Pass complex in two FPRs or on stack. */ \ ++ else if (sz == 2*sizeof(float)) { \ ++ isfp = 2; \ ++ sz = 2*CTSIZE_PTR; \ ++ } else { \ ++ isfp = 1; \ ++ sz = 2*CTSIZE_PTR; \ ++ } ++ ++#define CCALL_HANDLE_RET \ ++ if (ctype_isfp(ctr->info) && ctr->size == sizeof(float)) \ ++ sp = (uint8_t *)&cc->fpr[0].f; ++ ++#define CCALL_HANDLE_STRUCTARG \ ++ /* Pass structs of size >16 by reference. */ \ ++ unsigned int cl = ccall_classify_struct(cts, d); \ ++ nff = cl >> 8; \ ++ if (sz > 16) { \ ++ rp = cdataptr(lj_cdata_new(cts, did, sz)); \ ++ sz = CTSIZE_PTR; \ ++ } \ ++ /* Pass struct in FPRs. */ \ ++ if (cl > 1) { \ ++ isfp = (cl & 4) ? 2 : 1; \ ++ } ++ ++ ++#define CCALL_HANDLE_REGARG \ ++ if (isfp && (!isva)) { /* Try to pass argument in FPRs. */ \ ++ int n2 = ctype_isvector(d->info) ? 1 : \ ++ isfp == 1 ? n : 2; \ ++ if (nfpr + n2 <= CCALL_NARG_FPR && nff <= 2) { \ ++ dp = &cc->fpr[nfpr]; \ ++ nfpr += n2; \ ++ goto done; \ ++ } else { \ ++ if (ngpr + n2 <= maxgpr) { \ ++ dp = &cc->gpr[ngpr]; \ ++ ngpr += n2; \ ++ goto done; \ ++ } \ ++ } \ ++ } else { /* Try to pass argument in GPRs. */ \ ++ if (ngpr + n <= maxgpr) { \ ++ dp = &cc->gpr[ngpr]; \ ++ ngpr += n; \ ++ goto done; \ ++ } \ ++ } ++ + #else + #error "Missing calling convention definitions for this architecture" + #endif +@@ -1047,6 +1138,51 @@ static void ccall_copy_struct(CCallState *cc, CType *ctr, void *dp, void *sp, + + #endif + ++/* -- RISC-V ABI struct classification ---------------------------- */ ++ ++#if LJ_TARGET_RISCV64 ++ ++static unsigned int ccall_classify_struct(CTState *cts, CType *ct) ++{ ++ CTSize sz = ct->size; ++ unsigned int r = 0, n = 0, isu = (ct->info & CTF_UNION); ++ while (ct->sib) { ++ CType *sct; ++ ct = ctype_get(cts, ct->sib); ++ if (ctype_isfield(ct->info)) { ++ sct = ctype_rawchild(cts, ct); ++ if (ctype_isfp(sct->info)) { ++ r |= sct->size; ++ if (!isu) n++; else if (n == 0) n = 1; ++ } else if (ctype_iscomplex(sct->info)) { ++ r |= (sct->size >> 1); ++ if (!isu) n += 2; else if (n < 2) n = 2; ++ } else if (ctype_isstruct(sct->info)) { ++ goto substruct; ++ } else { ++ goto noth; ++ } ++ } else if (ctype_isbitfield(ct->info)) { ++ goto noth; ++ } else if (ctype_isxattrib(ct->info, CTA_SUBTYPE)) { ++ sct = ctype_rawchild(cts, ct); ++ substruct: ++ if (sct->size > 0) { ++ unsigned int s = ccall_classify_struct(cts, sct); ++ if (s <= 1) goto noth; ++ r |= (s & 255); ++ if (!isu) n += (s >> 8); else if (n < (s >>8)) n = (s >> 8); ++ } ++ } ++ } ++ if ((r == 4 || r == 8) && n <= 4) ++ return r + (n << 8); ++noth: /* Not a homogeneous float/double aggregate. */ ++ return (sz <= 16); /* Return structs of size <= 16 in GPRs. */ ++} ++ ++#endif ++ + /* -- Common C call handling ---------------------------------------------- */ + + /* Infer the destination CTypeID for a vararg argument. */ +@@ -1093,6 +1229,10 @@ static int ccall_set_args(lua_State *L, CTState *cts, CType *ct, + #endif + #endif + ++#if LJ_TARGET_RISCV64 ++ int nff = 0; ++#endif ++ + /* Clear unused regs to get some determinism in case of misdeclaration. */ + memset(cc->gpr, 0, sizeof(cc->gpr)); + #if CCALL_NUM_FPR +@@ -1282,7 +1422,11 @@ static int ccall_set_args(lua_State *L, CTState *cts, CType *ct, + *(int64_t *)dp = (int64_t)*(int32_t *)dp; + } + #endif +-#if LJ_TARGET_MIPS64 || (LJ_TARGET_ARM64 && LJ_BE) ++#if LJ_TARGET_RISCV64 ++ if (isfp && d->size == sizeof(float)) ++ ((uint32_t *)dp)[1] = 0xffffffffu; /* Float NaN boxing */ ++#endif ++#if LJ_TARGET_MIPS64 || (LJ_TARGET_ARM64 && LJ_BE) || LJ_TARGET_RISCV64 + if ((ctype_isinteger_or_bool(d->info) || ctype_isenum(d->info) + #if LJ_TARGET_MIPS64 + || (isfp && nsp == 0) +@@ -1322,6 +1466,14 @@ static int ccall_set_args(lua_State *L, CTState *cts, CType *ct, + CTSize i = (sz >> 2) - 1; + do { ((uint64_t *)dp)[i] = ((uint32_t *)dp)[i]; } while (i--); + } ++#elif LJ_TARGET_RISCV64 ++ if (isfp == 2 && nff <= 2) { ++ /* Split complex float into separate registers. */ ++ CTSize i = (sz >> 2) - 1; ++ do { ++ ((uint64_t *)dp)[i] = 0xffffffff00000000ul | ((uint32_t *)dp)[i]; ++ } while (i--); ++ } + #else + UNUSED(isfp); + #endif +@@ -1331,7 +1483,7 @@ static int ccall_set_args(lua_State *L, CTState *cts, CType *ct, + if ((int32_t)nsp < 0) nsp = 0; + #endif + +-#if LJ_TARGET_X64 || (LJ_TARGET_PPC && !LJ_ABI_SOFTFP) ++#if LJ_TARGET_X64 || (LJ_TARGET_PPC && !LJ_ABI_SOFTFP) || LJ_TARGET_RISCV64 + cc->nfpr = nfpr; /* Required for vararg functions. */ + #endif + cc->nsp = (nsp + CTSIZE_PTR-1) & ~(CTSIZE_PTR-1); +diff --git a/src/lj_ccall.h b/src/lj_ccall.h +index c830e300a..5aa0274e0 100644 +--- a/src/lj_ccall.h ++++ b/src/lj_ccall.h +@@ -157,6 +157,21 @@ typedef union FPRArg { + float f; + } FPRArg; + ++#elif LJ_TARGET_RISCV64 ++ ++#define CCALL_NARG_GPR 8 ++#define CCALL_NARG_FPR 8 ++#define CCALL_NRET_GPR 2 ++#define CCALL_NRET_FPR 2 ++#define CCALL_SPS_EXTRA 3 ++#define CCALL_SPS_FREE 1 ++ ++typedef intptr_t GPRArg; ++typedef union FPRArg { ++ double d; ++ struct { LJ_ENDIAN_LOHI(float f; , float g;) }; ++} FPRArg; ++ + #else + #error "Missing calling convention definitions for this architecture" + #endif +@@ -204,7 +219,7 @@ typedef LJ_ALIGN(CCALL_ALIGN_CALLSTATE) struct CCallState { + uint8_t resx87; /* Result on x87 stack: 1:float, 2:double. */ + #elif LJ_TARGET_ARM64 + void *retp; /* Aggregate return pointer in x8. */ +-#elif LJ_TARGET_PPC ++#elif LJ_TARGET_PPC || LJ_TARGET_RISCV64 + uint8_t nfpr; /* Number of arguments in FPRs. */ + #endif + #if LJ_32 +diff --git a/src/lj_ccallback.c b/src/lj_ccallback.c +index 1fdb13a2f..8496ae847 100644 +--- a/src/lj_ccallback.c ++++ b/src/lj_ccallback.c +@@ -91,6 +91,10 @@ static MSize CALLBACK_OFS2SLOT(MSize ofs) + + #define CALLBACK_MCODE_HEAD 52 + ++#elif LJ_TARGET_RISCV64 ++ ++#define CALLBACK_MCODE_HEAD 68 ++ + #else + + /* Missing support for this architecture. */ +@@ -293,6 +297,39 @@ static void *callback_mcode_init(global_State *g, uint32_t *page) + } + return p; + } ++#elif LJ_TARGET_RISCV64 ++static void *callback_mcode_init(global_State *g, uint32_t *page) ++{ ++ uint32_t *p = page; ++ uintptr_t target = (uintptr_t)(void *)lj_vm_ffi_callback; ++ uintptr_t ug = (uintptr_t)(void *)g; ++ uintptr_t target_hi = (target >> 32), target_lo = target & 0xffffffffULL; ++ uintptr_t ug_hi = (ug >> 32), ug_lo = ug & 0xffffffffULL; ++ MSize slot; ++ *p++ = RISCVI_LUI | RISCVF_D(RID_X6) | RISCVF_IMMU(RISCVF_HI(target_hi)); ++ *p++ = RISCVI_LUI | RISCVF_D(RID_X7) | RISCVF_IMMU(RISCVF_HI(ug_hi)); ++ *p++ = RISCVI_ADDI | RISCVF_D(RID_X6) | RISCVF_S1(RID_X6) | RISCVF_IMMI(RISCVF_LO(target_hi)); ++ *p++ = RISCVI_ADDI | RISCVF_D(RID_X7) | RISCVF_S1(RID_X7) | RISCVF_IMMI(RISCVF_LO(ug_hi)); ++ *p++ = RISCVI_SLLI | RISCVF_D(RID_X6) | RISCVF_S1(RID_X6) | RISCVF_SHAMT(11); ++ *p++ = RISCVI_SLLI | RISCVF_D(RID_X7) | RISCVF_S1(RID_X7) | RISCVF_SHAMT(11); ++ *p++ = RISCVI_ADDI | RISCVF_D(RID_X6) | RISCVF_S1(RID_X6) | RISCVF_IMMI(target_lo >> 21); ++ *p++ = RISCVI_ADDI | RISCVF_D(RID_X7) | RISCVF_S1(RID_X7) | RISCVF_IMMI(ug_lo >> 21); ++ *p++ = RISCVI_SLLI | RISCVF_D(RID_X6) | RISCVF_S1(RID_X6) | RISCVF_SHAMT(11); ++ *p++ = RISCVI_SLLI | RISCVF_D(RID_X7) | RISCVF_S1(RID_X7) | RISCVF_SHAMT(11); ++ *p++ = RISCVI_ADDI | RISCVF_D(RID_X6) | RISCVF_S1(RID_X6) | RISCVF_IMMI((target_lo >> 10) & 0x7ff); ++ *p++ = RISCVI_ADDI | RISCVF_D(RID_X7) | RISCVF_S1(RID_X7) | RISCVF_IMMI((ug_lo >> 10) & 0x7ff); ++ *p++ = RISCVI_SLLI | RISCVF_D(RID_X6) | RISCVF_S1(RID_X6) | RISCVF_SHAMT(10); ++ *p++ = RISCVI_SLLI | RISCVF_D(RID_X7) | RISCVF_S1(RID_X7) | RISCVF_SHAMT(10); ++ *p++ = RISCVI_ADDI | RISCVF_D(RID_X6) | RISCVF_S1(RID_X6) | RISCVF_IMMI(target_lo & 0x3ff); ++ *p++ = RISCVI_ADDI | RISCVF_D(RID_X7) | RISCVF_S1(RID_X7) | RISCVF_IMMI(ug_lo & 0x3ff); ++ *p++ = RISCVI_JALR | RISCVF_D(RID_X0) | RISCVF_S1(RID_X6) | RISCVF_IMMJ(0); ++ for (slot = 0; slot < CALLBACK_MAX_SLOT; slot++) { ++ *p++ = RISCVI_LUI | RISCVF_D(RID_X5) | RISCVF_IMMU(slot); ++ *p = RISCVI_JAL | RISCVF_IMMJ(((char *)page-(char *)p)); ++ p++; ++ } ++ return p; ++} + #else + /* Missing support for this architecture. */ + #define callback_mcode_init(g, p) (p) +@@ -595,6 +632,31 @@ void lj_ccallback_mcode_free(CTState *cts) + if (ngpr < maxgpr) { sp = &cts->cb.gpr[ngpr++]; goto done; } \ + } + ++#elif LJ_TARGET_RISCV64 ++ ++#define CALLBACK_HANDLE_REGARG \ ++ if (isfp) { \ ++ if (nfpr + n <= CCALL_NARG_FPR) { \ ++ sp = &cts->cb.fpr[nfpr]; \ ++ nfpr += n; \ ++ goto done; \ ++ } else if (ngpr + n <= maxgpr) { \ ++ sp = &cts->cb.gpr[ngpr]; \ ++ ngpr += n; \ ++ goto done; \ ++ } \ ++ } else { \ ++ if (ngpr + n <= maxgpr) { \ ++ sp = &cts->cb.gpr[ngpr]; \ ++ ngpr += n; \ ++ goto done; \ ++ } \ ++ } ++ ++#define CALLBACK_HANDLE_RET \ ++ if (ctype_isfp(ctr->info) && ctr->size == sizeof(float)) \ ++ ((float *)dp)[1] = *(float *)dp; ++ + #else + #error "Missing calling convention definitions for this architecture" + #endif +@@ -750,7 +812,7 @@ static void callback_conv_result(CTState *cts, lua_State *L, TValue *o) + *(int64_t *)dp = (int64_t)*(int32_t *)dp; + } + #endif +-#if LJ_TARGET_MIPS64 || (LJ_TARGET_ARM64 && LJ_BE) ++#if LJ_TARGET_MIPS64 || (LJ_TARGET_ARM64 && LJ_BE) || LJ_TARGET_RISCV64 + /* Always sign-extend results to 64 bits. Even a soft-fp 'float'. */ + if (ctr->size <= 4 && + (LJ_ABI_SOFTFP || ctype_isinteger_or_bool(ctr->info))) +diff --git a/src/vm_riscv64.dasc b/src/vm_riscv64.dasc +index 87327c798..4c8189b54 100644 +--- a/src/vm_riscv64.dasc ++++ b/src/vm_riscv64.dasc +@@ -811,14 +811,29 @@ static void build_subroutines(BuildCtx *ctx) + | mv BASE, TMP2 // Restore caller BASE. + | ld LFUNC:TMP1, FRAME_FUNC(TMP2) + | ld PC, -24(RB) // Restore PC from [cont|PC]. ++ |.if FFI ++ | sltiu TMP3, TMP0, 2 ++ |.endif + | cleartp LFUNC:TMP1 + | add TMP2, RA, RD + | ld TMP1, LFUNC:TMP1->pc + | sd TISNIL, -8(TMP2) // Ensure one valid arg. ++ |.if FFI ++ | bnez TMP3, >1 ++ |.endif + | // BASE = base, RA = resultptr, RB = meta base + | ld KBASE, PC2PROTO(k)(TMP1) + | jr TMP0 // Jump to continuation. + | ++ |.if FFI ++ |1: ++ | addi TMP1, RB, -32 ++ | bxnez TMP0, ->cont_ffi_callback // cont = 1: return from FFI callback. ++ | // cont = 0: tailcall from C function. ++ | sub RC, TMP1, BASE ++ | j ->vm_call_tail ++ |.endif ++ | + |->cont_cat: // RA = resultptr, RB = meta base + | lw INS, -4(PC) + | addi CARG2, RB, -32 +@@ -1018,6 +1033,18 @@ static void build_subroutines(BuildCtx *ctx) + | // Returns 0/1 or TValue * (metamethod). + | j <3 + | ++ |->vmeta_equal_cd: ++ |.if FFI ++ | addi PC, PC, -4 ++ | mv CARG1, L ++ | mv CARG2, INS ++ | sd BASE, L->base ++ | sd PC, SAVE_PC(sp) ++ | call_intern vmeta_equal_cd, lj_meta_equal_cd // (lua_State *L, BCIns op) ++ | // Returns 0/1 or TValue * (metamethod). ++ | j <3 ++ |.endif ++ | + |->vmeta_istype: + | addi PC, PC, -4 + | sd BASE, L->base +@@ -2161,7 +2188,6 @@ static void build_subroutines(BuildCtx *ctx) + | + | + |->vm_callhook: // Dispatch target for call hooks. +- | mv CARG2, PC + | + |->cont_stitch: // Trace stitching. + | +@@ -2218,6 +2244,133 @@ static void build_subroutines(BuildCtx *ctx) + |.endif + | + |//----------------------------------------------------------------------- ++ |//-- FFI helper functions ----------------------------------------------- ++ |//----------------------------------------------------------------------- ++ | ++ |// Handler for callback functions. Callback slot number in x5, g in x7. ++ |->vm_ffi_callback: ++ |.if FFI ++ |.type CTSTATE, CTState, PC ++ | saveregs ++ | ld CTSTATE, GL:x7->ctype_state ++ | mv GL, x7 ++ | addxi DISPATCH, x7, GG_G2DISP ++ | srli x5, x5, 12 ++ | sw x5, CTSTATE->cb.slot ++ | sd CARG1, CTSTATE->cb.gpr[0] ++ | fsd FARG1, CTSTATE->cb.fpr[0] ++ | sd CARG2, CTSTATE->cb.gpr[1] ++ | fsd FARG2, CTSTATE->cb.fpr[1] ++ | sd CARG3, CTSTATE->cb.gpr[2] ++ | fsd FARG3, CTSTATE->cb.fpr[2] ++ | sd CARG4, CTSTATE->cb.gpr[3] ++ | fsd FARG4, CTSTATE->cb.fpr[3] ++ | sd CARG5, CTSTATE->cb.gpr[4] ++ | fsd FARG5, CTSTATE->cb.fpr[4] ++ | sd CARG6, CTSTATE->cb.gpr[5] ++ | fsd FARG6, CTSTATE->cb.fpr[5] ++ | sd CARG7, CTSTATE->cb.gpr[6] ++ | fsd FARG7, CTSTATE->cb.fpr[6] ++ | sd CARG8, CTSTATE->cb.gpr[7] ++ | fsd FARG8, CTSTATE->cb.fpr[7] ++ | addi TMP0, sp, CFRAME_SPACE ++ | sd TMP0, CTSTATE->cb.stack ++ | sd x0, SAVE_PC(sp) // Any value outside of bytecode is ok. ++ | mv CARG1, CTSTATE ++ | mv CARG2, sp ++ | call_intern vm_ffi_callback, lj_ccallback_enter // (CTState *cts, void *cf) ++ | // Returns lua_State *. ++ | ld BASE, L:CRET1->base ++ | ld RC, L:CRET1->top ++ | mv L, CRET1 ++ | lui TMP3, 0x43380 // TOBIT = Hiword of 2^52 + 2^51 (double). ++ | ld LFUNC:RB, FRAME_FUNC(BASE) ++ | li TISNIL, LJ_TNIL ++ | li TISNUM, LJ_TISNUM ++ | slli TMP3, TMP3, 32 ++ | li_vmstate INTERP ++ | subw RC, RC, BASE ++ | cleartp LFUNC:RB ++ | st_vmstate ++ | fmv.d.x TOBIT, TMP3 ++ | ins_callt ++ |.endif ++ | ++ |->cont_ffi_callback: // Return from FFI callback. ++ |.if FFI ++ | ld CTSTATE, GL->ctype_state ++ | sd BASE, L->base ++ | sd RB, L->top ++ | sd L, CTSTATE->L ++ | mv CARG1, CTSTATE ++ | mv CARG2, RA ++ | // (CTState *cts, TValue *o) ++ | call_intern cont_ffi_callback, lj_ccallback_leave ++ | fld FRET1, CTSTATE->cb.fpr[0] ++ | ld CRET1, CTSTATE->cb.gpr[0] ++ | fld FRET2, CTSTATE->cb.fpr[1] ++ | ld CRET2, CTSTATE->cb.gpr[1] ++ | j ->vm_leave_unw ++ |.endif ++ | ++ |->vm_ffi_call: // Call C function via FFI. ++ | // Caveat: needs special frame unwinding, see below. ++ |.if FFI ++ | .type CCSTATE, CCallState, CARG1 ++ | lw TMP1, CCSTATE->spadj ++ | lbu CARG2, CCSTATE->nsp ++ | lbu CARG3, CCSTATE->nfpr ++ | mv TMP2, sp ++ | sub sp, sp, TMP1 ++ | sd ra, -8(TMP2) ++ | sd x18, -16(TMP2) ++ | sd CCSTATE, -24(TMP2) ++ | mv x18, TMP2 ++ | addi TMP1, CCSTATE, offsetof(CCallState, stack) ++ | mv TMP2, sp ++ | add TMP3, TMP1, CARG2 ++ | beqz CARG2, >2 ++ |1: ++ | ld TMP0, 0(TMP1) ++ | addi TMP1, TMP1, 8 ++ | sd TMP0, 0(TMP2) ++ | addi TMP2, TMP2, 8 ++ | bltu TMP1, TMP3, <1 ++ |2: ++ | beqz CARG3, >3 ++ | fld FARG1, CCSTATE->fpr[0] ++ | fld FARG2, CCSTATE->fpr[1] ++ | fld FARG3, CCSTATE->fpr[2] ++ | fld FARG4, CCSTATE->fpr[3] ++ | fld FARG5, CCSTATE->fpr[4] ++ | fld FARG6, CCSTATE->fpr[5] ++ | fld FARG7, CCSTATE->fpr[6] ++ | fld FARG8, CCSTATE->fpr[7] ++ |3: ++ | ld CFUNCADDR, CCSTATE->func ++ | ld CARG2, CCSTATE->gpr[1] ++ | ld CARG3, CCSTATE->gpr[2] ++ | ld CARG4, CCSTATE->gpr[3] ++ | ld CARG5, CCSTATE->gpr[4] ++ | ld CARG6, CCSTATE->gpr[5] ++ | ld CARG7, CCSTATE->gpr[6] ++ | ld CARG8, CCSTATE->gpr[7] ++ | ld CARG1, CCSTATE->gpr[0] // Do this last, since CCSTATE is CARG1. ++ | jalr CFUNCADDR ++ | ld CCSTATE:TMP1, -24(x18) ++ | ld TMP0, -16(x18) ++ | ld ra, -8(x18) ++ | sd CRET1, CCSTATE:TMP1->gpr[0] ++ | sd CRET2, CCSTATE:TMP1->gpr[1] ++ | fsd FRET1, CCSTATE:TMP1->fpr[0] ++ | fsd FRET2, CCSTATE:TMP1->fpr[1] ++ | mv sp, x18 ++ | mv x18, TMP0 ++ | ret ++ |.endif ++ |// Note: vm_ffi_call must be the last function in this object file! ++ | ++ |//----------------------------------------------------------------------- + } + + /* Generate the code for a single instruction. */ +@@ -2342,6 +2495,13 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) + | beqz TMP0, ->BC_ISNEN_Z + } + |// Either or both types are not numbers. ++ |.if FFI ++ | // Check if RA or RD is a cdata. ++ | xori TMP0, CARG3, LJ_TCDATA ++ | xori TMP1, CARG4, LJ_TCDATA ++ | and TMP0, TMP0, TMP1 ++ | bxeqz TMP0, ->vmeta_equal_cd ++ |.endif + | lui TMP3, (-(BCBIAS_J*4 >> 12)) & 0xfffff // -BCBIAS_J*4 + | decode_BC4b TMP2 + | addw TMP2, TMP2, TMP3 // (jump-0x8000)<<2 +@@ -2394,10 +2554,17 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) + | sub RD, KBASE, RD + | lhu TMP2, -4+OFS_RD(PC) + | ld CARG2, -8(RD) // KBASE-8-str_const*8 ++ |.if FFI ++ | gettp CARG3, CARG1 ++ | li TMP1, LJ_TCDATA ++ |.endif + | li TMP0, LJ_TSTR + | decode_BC4b TMP2 + | settp CARG2, TMP0 + | lui TMP3, (-(BCBIAS_J*4 >> 12)) & 0xfffff // -BCBIAS_J*4 ++ |.if FFI ++ | bxeq CARG3, TMP1, ->vmeta_equal_cd ++ |.endif + | xor TMP0, CARG1, CARG2 // TMP2=0: A==D; TMP2!=0: A!=D + | addw TMP2, TMP2, TMP3 + if (vk) { +@@ -2452,7 +2619,11 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) + | + |4: // RA is not an integer. + | addw TMP2, TMP2, TMP3 ++ |.if FFI ++ | bgeu CARG3, TISNUM, >7 ++ |.else + | bgeu CARG3, TISNUM, <2 ++ |.endif + | fmv.d.x FTMP0, CARG1 + | fmv.d.x FTMP2, CARG2 + | bne CARG4, TISNUM, >5 +@@ -2465,11 +2636,26 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) + | j <1 + | + |6: // RA is an integer, RD is a number. ++ |.if FFI ++ | bgeu CARG4, TISNUM, >8 ++ |.else + | bgeu CARG4, TISNUM, <2 ++ |.endif + | fcvt.d.w FTMP0, CARG1 + | fmv.d.x FTMP2, CARG2 + | j <5 + | ++ |.if FFI ++ |7: // RA not int, not number ++ | li TMP0, LJ_TCDATA ++ | bne CARG3, TMP0, <2 ++ | j ->vmeta_equal_cd ++ | ++ |8: // RD not int, not number ++ | li TMP0, LJ_TCDATA ++ | bne CARG4, TMP0, <2 ++ | j ->vmeta_equal_cd ++ |.endif + break; + + case BC_ISEQP: case BC_ISNEP: +@@ -2483,6 +2669,10 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) + | gettp TMP1, TMP1 + | addi PC, PC, 4 + | xor TMP0, TMP1, TMP0 // TMP0=0 A=D; TMP0!=0 A!=D ++ |.if FFI ++ | li TMP3, LJ_TCDATA ++ | bxeq TMP1, TMP3, ->vmeta_equal_cd ++ |.endif + | decode_BC4b TMP2 + | lui TMP3, (-(BCBIAS_J*4 >> 12)) & 0xfffff // -BCBIAS_J*4 + | addw TMP2, TMP2, TMP3 // TMP2=(jump-0x8000)<<2 +@@ -2823,6 +3013,16 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) + | ins_next + break; + case BC_KCDATA: ++ |.if FFI ++ | // RA = dst*8, RD = cdata_const*8 (~) ++ | sub TMP1, KBASE, RD ++ | ld TMP0, -8(TMP1) // KBASE-8-cdata_const*8 ++ | li TMP2, LJ_TCDATA ++ | add RA, BASE, RA ++ | settp TMP0, TMP2 ++ | sd TMP0, 0(RA) ++ | ins_next ++ |.endif + break; + case BC_KSHORT: + | // RA = dst*8, RD = int16_literal*8 + +From 1518305dfa456172644a6f9f78e74fd0d44f9ceb Mon Sep 17 00:00:00 2001 +From: gns +Date: Wed, 6 Mar 2024 09:27:26 +0800 +Subject: [PATCH 09/22] riscv(support): add extension detection + +--- + src/lib_jit.c | 124 ++++++++++++++++++++++++++++++++++++++++++++++++++ + src/lj_jit.h | 11 +++++ + 2 files changed, 135 insertions(+) + +diff --git a/src/lib_jit.c b/src/lib_jit.c +index 122b7cfe1..df25905b1 100644 +--- a/src/lib_jit.c ++++ b/src/lib_jit.c +@@ -697,6 +697,111 @@ JIT_PARAMDEF(JIT_PARAMINIT) + #include + #endif + ++#if LJ_TARGET_RISCV64 && LJ_TARGET_POSIX ++#include ++#include ++static sigjmp_buf sigbuf = {0}; ++static void detect_sigill(int sig) ++{ ++ siglongjmp(sigbuf, 1); ++} ++ ++static int riscv_compressed() ++{ ++#if defined(__riscv_c) || defined(__riscv_compressed) ++ /* Don't bother checking for RVC -- would crash before getting here. */ ++ return 1; ++#elif defined(__GNUC__) ++ /* c.nop; c.nop; */ ++ __asm__(".4byte 0x00010001"); ++ return 1; ++#else ++ return 0; ++#endif ++} ++ ++static int riscv_zba() ++{ ++#if defined(__riscv_b) || defined(__riscv_zba) ++ /* Don't bother checking for Zba -- would crash before getting here. */ ++ return 1; ++#elif defined(__GNUC__) ++ /* Don't bother verifying the result, just check if the instruction exists. */ ++ /* add.uw zero, zero, zero */ ++ __asm__(".4byte 0x0800003b"); ++ return 1; ++#else ++ return 0; ++#endif ++} ++ ++static int riscv_zbb() ++{ ++#if defined(__riscv_b) || defined(__riscv_zbb) ++ /* Don't bother checking for Zbb -- would crash before getting here. */ ++ return 1; ++#elif defined(__GNUC__) ++ register int t asm ("a0"); ++ /* addi a0, zero, 255; sext.b a0, a0; */ ++ __asm__("addi a0, zero, 255\n\t.4byte 0x60451513"); ++ return t < 0; ++#else ++ return 0; ++#endif ++} ++ ++static int riscv_zicond() ++{ ++#if defined(__riscv_zicond) ++ /* Don't bother checking for Zicond -- would crash before getting here. */ ++ return 1; ++#elif defined(__GNUC__) ++ /* czero.eqz zero, zero, zero; */ ++ __asm__(".4byte 0x0e005033"); ++ return 1; ++#else ++ return 0; ++#endif ++} ++ ++static int riscv_zfa() ++{ ++#if defined(__riscv_zfa) ++ /* Don't bother checking for Zfa -- would crash before getting here. */ ++ return 1; ++#else ++ return 0; ++#endif ++} ++ ++static int riscv_xthead() ++{ ++#if (defined(__riscv_xtheadba) \ ++ && defined(__riscv_xtheadbb) \ ++ && defined(__riscv_xtheadcondmov) \ ++ && defined(__riscv_xtheadmac)) ++ /* Don't bother checking for XThead -- would crash before getting here. */ ++ return 1; ++#elif defined(__GNUC__) ++ register int t asm ("a0"); ++ /* C906 & C910 & C908 all have "xtheadc", XTheadBb subset "xtheadc". */ ++ /* Therefore assume XThead* are present if XTheadBb is present. */ ++ /* addi a0, zero, 255; th.ext a0, a0, 7, 0; */ ++ __asm__("addi a0, zero, 255\n\t.4byte 0x1c05250b"); ++ return t == -1; /* In case of collision with other vendor extensions. */ ++#else ++ return 0; ++#endif ++} ++ ++static uint32_t riscv_probe(int (*func)(void), uint32_t flag) ++{ ++ if (sigsetjmp(sigbuf, 1) == 0) { ++ return func() ? flag : 0; ++ } else return 0; ++} ++#endif ++ + /* Arch-dependent CPU feature detection. */ + static uint32_t jit_cpudetect(void) + { +@@ -769,6 +874,25 @@ static uint32_t jit_cpudetect(void) + #endif + #elif LJ_TARGET_S390X + /* No optional CPU features to detect (for now). */ ++ ++#elif LJ_TARGET_RISCV64 ++#if LJ_HASJIT ++ /* SIGILL-based detection of RVC, Zba, Zbb and XThead. Welcome to the future. */ ++ struct sigaction old = {0}, act = {0}; ++ act.sa_handler = detect_sigill; ++ sigaction(SIGILL, &act, &old); ++ flags |= riscv_probe(riscv_compressed, JIT_F_RVC); ++ flags |= riscv_probe(riscv_zba, JIT_F_RVZba); ++ flags |= riscv_probe(riscv_zbb, JIT_F_RVZbb); ++ flags |= riscv_probe(riscv_zicond, JIT_F_RVZicond); ++ flags |= riscv_probe(riscv_zfa, JIT_F_RVZfa); ++ flags |= riscv_probe(riscv_xthead, JIT_F_RVXThead); ++ sigaction(SIGILL, &old, NULL); ++ ++ /* Detect V/P? */ ++ /* V have no hardware available, P not ratified yet. */ ++#endif ++ + #else + #error "Missing CPU detection for this architecture" + #endif +diff --git a/src/lj_jit.h b/src/lj_jit.h +index c622b7ed0..b6aaf21dc 100644 +--- a/src/lj_jit.h ++++ b/src/lj_jit.h +@@ -68,6 +68,17 @@ + #endif + #endif + ++#elif LJ_TARGET_RISCV64 ++ ++#define JIT_F_RVC (JIT_F_CPU << 0) ++#define JIT_F_RVZba (JIT_F_CPU << 1) ++#define JIT_F_RVZbb (JIT_F_CPU << 2) ++#define JIT_F_RVZicond (JIT_F_CPU << 3) ++#define JIT_F_RVZfa (JIT_F_CPU << 4) ++#define JIT_F_RVXThead (JIT_F_CPU << 5) ++ ++#define JIT_F_CPUSTRING "\003RVC\003Zba\003Zbb\006Zicond\003Zfa\006XThead" ++ + #else + + #define JIT_F_CPUSTRING "" + +From 784d8effa0e9bf7cd94c512248c9710857713a96 Mon Sep 17 00:00:00 2001 +From: gns +Date: Wed, 6 Mar 2024 09:29:41 +0800 +Subject: [PATCH 10/22] riscv(jit): add insn emitter + +--- + src/lj_emit_riscv.h | 574 ++++++++++++++++++++++++++++++++++++++++++++ + 1 file changed, 574 insertions(+) + create mode 100644 src/lj_emit_riscv.h + +diff --git a/src/lj_emit_riscv.h b/src/lj_emit_riscv.h +new file mode 100644 +index 000000000..d4160663e +--- /dev/null ++++ b/src/lj_emit_riscv.h +@@ -0,0 +1,574 @@ ++/* ++** RISC-V instruction emitter. ++** Copyright (C) 2005-2025 Mike Pall. See Copyright Notice in luajit.h ++** ++** Contributed by gns from PLCT Lab, ISCAS. ++*/ ++ ++static intptr_t get_k64val(ASMState *as, IRRef ref) ++{ ++ IRIns *ir = IR(ref); ++ if (ir->o == IR_KINT64) { ++ return (intptr_t)ir_kint64(ir)->u64; ++ } else if (ir->o == IR_KGC) { ++ return (intptr_t)ir_kgc(ir); ++ } else if (ir->o == IR_KPTR || ir->o == IR_KKPTR) { ++ return (intptr_t)ir_kptr(ir); ++ } else { ++ lj_assertA(ir->o == IR_KINT || ir->o == IR_KNULL, ++ "bad 64 bit const IR op %d", ir->o); ++ return ir->i; /* Sign-extended. */ ++ } ++} ++ ++#define get_kval(as, ref) get_k64val(as, ref) ++ ++/* -- Emit basic instructions --------------------------------------------- */ ++ ++static void emit_r(ASMState *as, RISCVIns riscvi, Reg rd, Reg rs1, Reg rs2) ++{ ++ *--as->mcp = riscvi | RISCVF_D(rd) | RISCVF_S1(rs1) | RISCVF_S2(rs2); ++} ++ ++#define emit_ds(as, riscvi, rd, rs1) emit_r(as, riscvi, rd, rs1, 0) ++#define emit_ds2(as, riscvi, rd, rs2) emit_r(as, riscvi, rd, 0, rs2) ++#define emit_ds1s2(as, riscvi, rd, rs1, rs2) emit_r(as, riscvi, rd, rs1, rs2) ++ ++static void emit_r4(ASMState *as, RISCVIns riscvi, Reg rd, Reg rs1, Reg rs2, Reg rs3) ++{ ++ *--as->mcp = riscvi | RISCVF_D(rd) | RISCVF_S1(rs1) | RISCVF_S2(rs2) | RISCVF_S3(rs3); ++} ++ ++#define emit_ds1s2s3(as, riscvi, rd, rs1, rs2, rs3) emit_r4(as, riscvi, rd, rs1, rs2, rs3) ++ ++static void emit_i(ASMState *as, RISCVIns riscvi, Reg rd, Reg rs1, int32_t i) ++{ ++ *--as->mcp = riscvi | RISCVF_D(rd) | RISCVF_S1(rs1) | RISCVF_IMMI((uint32_t)i & 0xfff); ++} ++ ++#define emit_di(as, riscvi, rd, i) emit_i(as, riscvi, rd, 0, i) ++#define emit_dsi(as, riscvi, rd, rs1, i) emit_i(as, riscvi, rd, rs1, i) ++#define emit_dsshamt(as, riscvi, rd, rs1, i) emit_i(as, riscvi, rd, rs1, i&0x3f) ++ ++static void emit_s(ASMState *as, RISCVIns riscvi, Reg rs1, Reg rs2, int32_t i) ++{ ++ *--as->mcp = riscvi | RISCVF_S1(rs1) | RISCVF_S2(rs2) | RISCVF_IMMS((uint32_t)i & 0xfff); ++} ++ ++#define emit_s1s2i(as, riscvi, rs1, rs2, i) emit_s(as, riscvi, rs1, rs2, i) ++ ++/* ++static void emit_b(ASMState *as, RISCVIns riscvi, Reg rs1, Reg rs2, int32_t i) ++{ ++ *--as->mcp = riscvi | RISCVF_S1(rs1) | RISCVF_S2(rs2) | RISCVF_IMMB((uint32_t)i & 0x1ffe); ++} ++*/ ++ ++static void emit_u(ASMState *as, RISCVIns riscvi, Reg rd, uint32_t i) ++{ ++ *--as->mcp = riscvi | RISCVF_D(rd) | RISCVF_IMMU(i & 0xfffff); ++} ++ ++#define emit_du(as, riscvi, rd, i) emit_u(as, riscvi, rd, i) ++ ++/* ++static void emit_j(ASMState *as, RISCVIns riscvi, Reg rd, int32_t i) ++{ ++ *--as->mcp = riscvi | RISCVF_D(rd) | RISCVF_IMMJ((uint32_t)i & 0x1fffffe); ++} ++*/ ++ ++static Reg ra_allock(ASMState *as, intptr_t k, RegSet allow); ++static void ra_allockreg(ASMState *as, intptr_t k, Reg r); ++static Reg ra_scratch(ASMState *as, RegSet allow); ++ ++static void emit_lso(ASMState *as, RISCVIns riscvi, Reg data, Reg base, int32_t ofs) ++{ ++ lj_assertA(checki12(ofs), "load/store offset %d out of range", ofs); ++ switch (riscvi) { ++ case RISCVI_LD: case RISCVI_LW: case RISCVI_LH: case RISCVI_LB: ++ case RISCVI_LWU: case RISCVI_LHU: case RISCVI_LBU: ++ case RISCVI_FLW: case RISCVI_FLD: ++ emit_dsi(as, riscvi, data, base, ofs); ++ break; ++ case RISCVI_SD: case RISCVI_SW: case RISCVI_SH: case RISCVI_SB: ++ case RISCVI_FSW: case RISCVI_FSD: ++ emit_s1s2i(as, riscvi, base, data, ofs); ++ break; ++ default: lj_assertA(0, "invalid lso"); break; ++ } ++} ++ ++static void emit_roti(ASMState *as, RISCVIns riscvi, Reg rd, Reg rs1, Reg tmp, ++ int32_t shamt) ++{ ++ if (as->flags & JIT_F_RVZbb || as->flags & JIT_F_RVXThead) { ++ if (!(as->flags & JIT_F_RVZbb)) switch (riscvi) { ++ case RISCVI_RORI: riscvi = RISCVI_TH_SRRI; break; ++ case RISCVI_RORIW: riscvi = RISCVI_TH_SRRIW; break; ++ default: lj_assertA(0, "invalid roti op"); break; ++ } ++ emit_dsshamt(as, riscvi, rd, rs1, shamt); ++ } else { ++ RISCVIns ai, bi; ++ int32_t shwid, shmsk; ++ switch (riscvi) { ++ case RISCVI_RORI: ++ ai = RISCVI_SRLI, bi = RISCVI_SLLI; ++ shwid = 64, shmsk = 63; ++ break; ++ case RISCVI_RORIW: ++ ai = RISCVI_SRLIW, bi = RISCVI_SLLIW; ++ shwid = 32, shmsk = 31; ++ break; ++ default: ++ lj_assertA(0, "invalid roti op"); ++ return; ++ } ++ emit_ds1s2(as, RISCVI_OR, rd, rd, tmp); ++ emit_dsshamt(as, bi, rd, rs1, (shwid - shamt)&shmsk); ++ emit_dsshamt(as, ai, tmp, rs1, shamt&shmsk); ++ } ++} ++ ++static void emit_rot(ASMState *as, RISCVIns riscvi, Reg rd, Reg rs1, Reg rs2, Reg tmp) ++{ ++ if (as->flags & JIT_F_RVZbb) { ++ emit_ds1s2(as, riscvi, rd, rs1, rs2); ++ } else { ++ RISCVIns sai, sbi; ++ switch (riscvi) { ++ case RISCVI_ROL: ++ sai = RISCVI_SLL, sbi = RISCVI_SRL; ++ break; ++ case RISCVI_ROR: ++ sai = RISCVI_SRL, sbi = RISCVI_SLL; ++ break; ++ case RISCVI_ROLW: ++ sai = RISCVI_SLLW, sbi = RISCVI_SRLW; ++ break; ++ case RISCVI_RORW: ++ sai = RISCVI_SRLW, sbi = RISCVI_SLLW; ++ break; ++ default: ++ lj_assertA(0, "invalid rot op"); ++ return; ++ } ++ if (rd == rs2) { ++ emit_ds1s2(as, RISCVI_OR, rd, rd, tmp); ++ emit_ds1s2(as, sbi, tmp, rs1, tmp); ++ emit_ds1s2(as, sai, rd, rs1, rs2); ++ emit_ds2(as, RISCVI_NEG, tmp, rs2); ++ } else { ++ emit_ds1s2(as, RISCVI_OR, rd, rd, tmp); ++ emit_ds1s2(as, sai, rd, rs1, rs2); ++ emit_ds1s2(as, sbi, tmp, rs1, tmp); ++ emit_ds2(as, RISCVI_NEG, tmp, rs2); ++ } ++ } ++} ++ ++static void emit_ext(ASMState *as, RISCVIns riscvi, Reg rd, Reg rs1) ++{ ++ if ((riscvi != RISCVI_ZEXT_W && as->flags & JIT_F_RVZbb) || ++ (riscvi == RISCVI_ZEXT_W && as->flags & JIT_F_RVZba)) { ++ emit_ds(as, riscvi, rd, rs1); ++ } else if (as->flags & JIT_F_RVXThead) { ++ uint32_t hi, sext; ++ switch (riscvi) { ++ case RISCVI_ZEXT_B: ++ case RISCVI_SEXT_W: ++ emit_ds(as, riscvi, rd, rs1); ++ return; ++ case RISCVI_ZEXT_H: ++ hi = 15, sext = 0; ++ break; ++ case RISCVI_ZEXT_W: ++ hi = 31, sext = 0; ++ break; ++ case RISCVI_SEXT_B: ++ hi = 7, sext = 1; ++ break; ++ case RISCVI_SEXT_H: ++ hi = 15, sext = 1; ++ break; ++ default: ++ lj_assertA(0, "invalid ext op"); ++ return; ++ } ++ emit_dsi(as, sext ? RISCVI_TH_EXT : RISCVI_TH_EXTU, ++ rd, rs1, hi << 6); ++ } else { ++ RISCVIns sli, sri; ++ int32_t shamt; ++ switch (riscvi) { ++ case RISCVI_ZEXT_B: ++ case RISCVI_SEXT_W: ++ emit_ds(as, riscvi, rd, rs1); ++ return; ++ case RISCVI_ZEXT_H: ++ sli = RISCVI_SLLI, sri = RISCVI_SRLI; ++ shamt = 48; ++ break; ++ case RISCVI_ZEXT_W: ++ sli = RISCVI_SLLI, sri = RISCVI_SRLI; ++ shamt = 32; ++ break; ++ case RISCVI_SEXT_B: ++ sli = RISCVI_SLLI, sri = RISCVI_SRAI; ++ shamt = 56; ++ break; ++ case RISCVI_SEXT_H: ++ sli = RISCVI_SLLI, sri = RISCVI_SRAI; ++ shamt = 48; ++ break; ++ default: ++ lj_assertA(0, "invalid ext op"); ++ return; ++ } ++ emit_dsshamt(as, sri, rd, rd, shamt); ++ emit_dsshamt(as, sli, rd, rs1, shamt); ++ } ++} ++ ++static void emit_cleartp(ASMState *as, Reg rd, Reg rs1) ++{ ++ if (as->flags & JIT_F_RVXThead) { ++ emit_dsi(as, RISCVI_TH_EXTU, rd, rs1, 46u << 6); ++ } else { ++ emit_dsshamt(as, RISCVI_SRLI, rd, rd, 17); ++ emit_dsshamt(as, RISCVI_SLLI, rd, rs1, 17); ++ } ++} ++ ++/* ++static void emit_andn(ASMState *as, Reg rd, Reg rs1, Reg rs2, Reg tmp) ++{ ++ if (as->flags & JIT_F_RVZbb) { ++ emit_ds1s2(as, RISCVI_ANDN, rd, rs1, rs2); ++ } else { ++ emit_ds1s2(as, RISCVI_AND, rd, rs1, tmp); ++ emit_ds(as, RISCVI_NOT, tmp, rs2); ++ } ++} ++*/ ++ ++/* ++static void emit_orn(ASMState *as, Reg rd, Reg rs1, Reg rs2, Reg tmp) ++{ ++ if (as->flags & JIT_F_RVZbb) { ++ emit_ds1s2(as, RISCVI_ORN, rd, rs1, rs2); ++ } else { ++ emit_ds1s2(as, RISCVI_OR, rd, rs1, tmp); ++ emit_ds(as, RISCVI_NOT, tmp, rs2); ++ } ++} ++*/ ++ ++static void emit_xnor(ASMState *as, Reg rd, Reg rs1, Reg rs2) ++{ ++ if (as->flags & JIT_F_RVZbb) { ++ emit_ds1s2(as, RISCVI_XNOR, rd, rs1, rs2); ++ } else { ++ emit_ds(as, RISCVI_NOT, rd, rd); ++ emit_ds1s2(as, RISCVI_XOR, rd, rs1, rs2); ++ } ++} ++ ++static void emit_shxadd(ASMState *as, Reg rd, Reg rs1, Reg rs2, Reg tmp, unsigned int shamt) ++{ ++ if (as->flags & JIT_F_RVZba) { ++ switch (shamt) { ++ case 1: emit_ds1s2(as, RISCVI_SH1ADD, rd, rs2, rs1); break; ++ case 2: emit_ds1s2(as, RISCVI_SH2ADD, rd, rs2, rs1); break; ++ case 3: emit_ds1s2(as, RISCVI_SH3ADD, rd, rs2, rs1); break; ++ default: return; ++ } ++ } else if (as->flags & JIT_F_RVXThead) { ++ emit_dsi(as, RISCVI_TH_ADDSL|RISCVF_IMMI(shamt<<5), rd, rs1, rs2); ++ } else { ++ emit_ds1s2(as, RISCVI_ADD, rd, rs1, tmp); ++ emit_dsshamt(as, RISCVI_SLLI, tmp, rs2, shamt); ++ } ++} ++ ++#define emit_sh1add(as, rd, rs1, rs2, tmp) emit_shxadd(as, rd, rs1, rs2, tmp, 1) ++#define emit_sh2add(as, rd, rs1, rs2, tmp) emit_shxadd(as, rd, rs1, rs2, tmp, 2) ++#define emit_sh3add(as, rd, rs1, rs2, tmp) emit_shxadd(as, rd, rs1, rs2, tmp, 3) ++ ++static void emit_loadk12(ASMState *as, Reg rd, int32_t i) ++{ ++ emit_di(as, RISCVI_ADDI, rd, i); ++} ++ ++static void emit_loadk32(ASMState *as, Reg rd, int32_t i) ++{ ++ if (checki12((int64_t)i)) { ++ emit_loadk12(as, rd, i); ++ } else { ++ if(LJ_UNLIKELY(RISCVF_HI((uint32_t)i) == 0x80000u && i > 0)) ++ emit_dsi(as, RISCVI_XORI, rd, rd, RISCVF_LO(i)); ++ else ++ emit_dsi(as, RISCVI_ADDI, rd, rd, RISCVF_LO(i)); ++ emit_du(as, RISCVI_LUI, rd, RISCVF_HI((uint32_t)i)); ++ } ++} ++ ++/* -- Emit loads/stores --------------------------------------------------- */ ++ ++/* Prefer rematerialization of BASE/L from global_State over spills. */ ++#define emit_canremat(ref) ((ref) <= REF_BASE) ++ ++ ++/* Load a 32 bit constant into a GPR. */ ++#define emit_loadi(as, r, i) emit_loadk32(as, r, i); ++ ++/* Load a 64 bit constant into a GPR. */ ++static void emit_loadu64(ASMState *as, Reg r, uint64_t u64) ++{ ++ int64_t u64_delta = (int64_t)((intptr_t)u64 - (intptr_t)(as->mcp - 2)); ++ if (checki32((int64_t)u64)) { ++ emit_loadk32(as, r, (int32_t)u64); ++ } else if (checki32auipc(u64_delta)) { ++ emit_dsi(as, RISCVI_ADDI, r, r, RISCVF_LO(u64_delta)); ++ emit_du(as, RISCVI_AUIPC, r, RISCVF_HI(u64_delta)); ++ } else { ++ uint32_t lo32 = u64 & 0xfffffffful; ++ if (checku11(lo32)) { ++ if (lo32 > 0) emit_dsi(as, RISCVI_ADDI, r, r, lo32); ++ emit_dsshamt(as, RISCVI_SLLI, r, r, 32); ++ } else { ++ RISCVIns li_insn[7] = {0}; ++ int shamt = 0, step = 0; ++ for(int bit = 0; bit < 32; bit++) { ++ if (lo32 & (1u << bit)) { ++ if (shamt) li_insn[step++] = RISCVI_SLLI | RISCVF_D(r) | RISCVF_S1(r) | RISCVF_IMMI(shamt); ++ int inc = bit+10 > 31 ? 31-bit : 10; ++ bit += inc, shamt = inc+1; ++ uint32_t msk = ((1ul << (bit+1))-1)^((1ul << (((bit-inc) >= 0) ? (bit-inc) : 0))-1); ++ uint16_t payload = (lo32 & msk) >> (((bit-inc) >= 0) ? (bit-inc) : 0); ++ li_insn[step++] = RISCVI_ADDI | RISCVF_D(r) | RISCVF_S1(r) | RISCVF_IMMI(payload); ++ } else shamt++; ++ } ++ if (shamt) li_insn[step++] = RISCVI_SLLI | RISCVF_D(r) | RISCVF_S1(r) | RISCVF_IMMI(shamt); ++ ++ if (step < 6) { ++ for(int i = 0; i < step; i++) ++ *--as->mcp = li_insn[i]; ++ } else { ++ emit_dsi(as, RISCVI_ADDI, r, r, u64 & 0x3ff); ++ emit_dsshamt(as, RISCVI_SLLI, r, r, 10); ++ emit_dsi(as, RISCVI_ADDI, r, r, (u64 >> 10) & 0x7ff); ++ emit_dsshamt(as, RISCVI_SLLI, r, r, 11); ++ emit_dsi(as, RISCVI_ADDI, r, r, (u64 >> 21) & 0x7ff); ++ emit_dsshamt(as, RISCVI_SLLI, r, r, 11); ++ } ++ } ++ ++ uint32_t hi32 = u64 >> 32; ++ if (hi32 & 0xfff) emit_loadk32(as, r, hi32); ++ else emit_du(as, RISCVI_LUI, r, hi32 >> 12); ++ } ++} ++ ++#define emit_loada(as, r, addr) emit_loadu64(as, (r), u64ptr((addr))) ++ ++/* Get/set from constant pointer. */ ++static void emit_lsptr(ASMState *as, RISCVIns riscvi, Reg r, void *p, RegSet allow) ++{ ++ emit_lso(as, riscvi, r, ra_allock(as, igcptr(p), allow), 0); ++} ++ ++/* Load 64 bit IR constant into register. */ ++static void emit_loadk64(ASMState *as, Reg r, IRIns *ir) ++{ ++ const uint64_t *k = &ir_k64(ir)->u64; ++ Reg r64 = r; ++ if (rset_test(RSET_FPR, r)) { ++ if (as->flags & JIT_F_RVZfa) { ++ uint8_t sign = (*k >> 63) & 1; ++ uint16_t k_hi16 = (*k >> 48) & 0xffff; ++ uint64_t k_lo48 = *k & 0xffffffffffff; ++ uint16_t mk_hi16 = k_hi16 & 0x7fff; ++ if (!k_lo48) { ++ if (riscv_fli_map_hi16[0] == k_hi16) { ++ emit_ds(as, RISCVI_FLI_D, r, 0); ++ return; ++ } ++ for (int i = 1; i < 32; i++) { ++ if (riscv_fli_map_hi16[i] == mk_hi16) { ++ if (sign) ++ emit_ds1s2(as, RISCVI_FNEG_D, r, r, r); ++ emit_ds(as, RISCVI_FLI_D, r, i); ++ return; ++ } ++ } ++ } ++ } ++ r64 = RID_TMP; ++ emit_ds(as, RISCVI_FMV_D_X, r, r64); ++ } ++ emit_loadu64(as, r64, *k); ++} ++ ++/* Get/set global_State fields. */ ++static void emit_lsglptr(ASMState *as, RISCVIns riscvi, Reg r, int32_t ofs) ++{ ++ emit_lso(as, riscvi, r, RID_GL, ofs); ++} ++ ++#define emit_getgl(as, r, field) \ ++ emit_lsglptr(as, RISCVI_LD, (r), (int32_t)offsetof(global_State, field)) ++#define emit_setgl(as, r, field) \ ++ emit_lsglptr(as, RISCVI_SD, (r), (int32_t)offsetof(global_State, field)) ++ ++/* Trace number is determined from per-trace exit stubs. */ ++#define emit_setvmstate(as, i) UNUSED(i) ++ ++/* -- Emit control-flow instructions -------------------------------------- */ ++ ++/* Label for internal jumps. */ ++typedef MCode *MCLabel; ++ ++/* Return label pointing to current PC. */ ++#define emit_label(as) ((as)->mcp) ++ ++static void emit_branch(ASMState *as, RISCVIns riscvi, Reg rs1, Reg rs2, MCode *target, int jump) ++{ ++ MCode *p = as->mcp; ++ ptrdiff_t delta = (char *)target - (char *)(p - 1); ++ switch (jump) { ++ case -1: ++ lj_assertA(((delta + 0x10000) >> 13) == 0, "branch target out of range"); /* B */ ++ *--p = riscvi | RISCVF_S1(rs1) | RISCVF_S2(rs2) | RISCVF_IMMB(delta); ++ break; ++ case 0: case 1: ++ lj_assertA(((delta + 0x100000) >> 21) == 0, "branch target out of range"); /* ^B+J */ ++ if (checki13(delta) && !jump) { ++ *--p = riscvi | RISCVF_S1(rs1) | RISCVF_S2(rs2) | RISCVF_IMMB(delta); ++ *--p = RISCVI_NOP; ++ } else { ++ *--p = RISCVI_JAL | RISCVF_IMMJ(delta); /* Poorman's trampoline */ ++ *--p = (riscvi^0x00001000) | RISCVF_S1(rs1) | RISCVF_S2(rs2) | RISCVF_IMMB(8); ++ } ++ break; ++ default: ++ lj_assertA(0, "invalid jump type"); ++ break; ++ } ++ as->mcp = p; ++} ++ ++static void emit_jump(ASMState *as, MCode *target, int jump) ++{ ++ MCode *p = as->mcp; ++ ptrdiff_t delta; ++ switch(jump) { ++ case -1: ++ delta = (char *)target - (char *)(p - 1); ++ lj_assertA(((delta + 0x100000) >> 21) == 0, "jump target out of range"); /* J */ ++ *--p = RISCVI_JAL | RISCVF_IMMJ(delta); ++ break; ++ case 0: case 1: ++ delta = (char *)target - (char *)(p - 2); ++ lj_assertA(checki32auipc(delta), "jump target out of range"); /* AUIPC+JALR */ ++ if (checki21(delta) && !jump) { ++ *--p = RISCVI_NOP; ++ *--p = RISCVI_JAL | RISCVF_IMMJ(delta); ++ } else { ++ *--p = RISCVI_JALR | RISCVF_S1(RID_TMP) | RISCVF_IMMI(RISCVF_LO(delta)); ++ *--p = RISCVI_AUIPC | RISCVF_D(RID_TMP) | RISCVF_IMMU(RISCVF_HI(delta)); ++ } ++ break; ++ default: ++ lj_assertA(0, "invalid jump type"); ++ break; ++ } ++ as->mcp = p; ++} ++ ++#define emit_jmp(as, target) emit_jump(as, target, 0) ++ ++#define emit_mv(as, dst, src) \ ++ emit_ds(as, RISCVI_MV, (dst), (src)) ++ ++static void emit_call(ASMState *as, void *target, int needcfa) ++{ ++ MCode *p = as->mcp; ++ ptrdiff_t delta = (char *)target - (char *)(p - 2); ++ if (checki21(delta)) { ++ *--p = RISCVI_NOP; ++ *--p = RISCVI_JAL | RISCVF_D(RID_RA) | RISCVF_IMMJ(delta); ++ } else if (checki32(delta)) { ++ *--p = RISCVI_JALR | RISCVF_D(RID_RA) | RISCVF_S1(RID_TMP) | RISCVF_IMMI(RISCVF_LO(delta)); ++ *--p = RISCVI_AUIPC | RISCVF_D(RID_TMP) | RISCVF_IMMU(RISCVF_HI(delta)); ++ needcfa = 1; ++ } else { ++ *--p = RISCVI_JALR | RISCVF_D(RID_RA) | RISCVF_S1(RID_CFUNCADDR) | RISCVF_IMMI(0); ++ needcfa = 2; ++ } ++ as->mcp = p; ++ if (needcfa > 1) ++ ra_allockreg(as, (intptr_t)target, RID_CFUNCADDR); ++} ++ ++/* -- Emit generic operations --------------------------------------------- */ ++ ++/* Generic move between two regs. */ ++static void emit_movrr(ASMState *as, IRIns *ir, Reg dst, Reg src) ++{ ++ if (src < RID_MAX_GPR && dst < RID_MAX_GPR) ++ emit_mv(as, dst, src); ++ else if (src < RID_MAX_GPR) ++ emit_ds(as, irt_isnum(ir->t) ? RISCVI_FMV_D_X : RISCVI_FMV_W_X, dst, src); ++ else if (dst < RID_MAX_GPR) ++ emit_ds(as, irt_isnum(ir->t) ? RISCVI_FMV_X_D : RISCVI_FMV_X_W, dst, src); ++ else ++ emit_ds1s2(as, irt_isnum(ir->t) ? RISCVI_FMV_D : RISCVI_FMV_S, dst, src, src); ++} ++ ++/* Emit an arithmetic operation with a constant operand. */ ++static void emit_opk(ASMState *as, RISCVIns riscvi, Reg dest, Reg src, ++ Reg tmp, intptr_t k) ++{ ++ if (checki12(k)) emit_dsi(as, riscvi, dest, src, k); ++ else { ++ switch (riscvi) { ++ case RISCVI_ADDI: riscvi = RISCVI_ADD; break; ++ case RISCVI_XORI: riscvi = RISCVI_XOR; break; ++ case RISCVI_ORI: riscvi = RISCVI_OR; break; ++ case RISCVI_ANDI: riscvi = RISCVI_AND; break; ++ default: lj_assertA(0, "NYI arithmetic RISCVIns"); return; ++ } ++ emit_ds1s2(as, riscvi, dest, src, tmp); ++ emit_loadu64(as, tmp, (uintptr_t)k); ++ } ++} ++ ++/* Generic load of register with base and (small) offset address. */ ++static void emit_loadofs(ASMState *as, IRIns *ir, Reg r, Reg base, int32_t ofs) ++{ ++ if (r < RID_MAX_GPR) ++ emit_lso(as, irt_is64(ir->t) ? RISCVI_LD : RISCVI_LW, r, base, ofs); ++ else ++ emit_lso(as, irt_isnum(ir->t) ? RISCVI_FLD : RISCVI_FLW, r, base, ofs); ++} ++ ++/* Generic store of register with base and (small) offset address. */ ++static void emit_storeofs(ASMState *as, IRIns *ir, Reg r, Reg base, int32_t ofs) ++{ ++ if (r < RID_MAX_GPR) ++ emit_lso(as, irt_is64(ir->t) ? RISCVI_SD : RISCVI_SW, r, base, ofs); ++ else ++ emit_lso(as, irt_isnum(ir->t) ? RISCVI_FSD : RISCVI_FSW, r, base, ofs); ++} ++ ++/* Add offset to pointer. */ ++static void emit_addptr(ASMState *as, Reg r, int32_t ofs) ++{ ++ if (ofs) ++ emit_opk(as, RISCVI_ADDI, r, r, RID_TMP, ofs); ++} ++ ++ ++#define emit_spsub(as, ofs) emit_addptr(as, RID_SP, -(ofs)) + +From 52b408b3547d17322a09681c8bc73d66fdc54652 Mon Sep 17 00:00:00 2001 +From: gns +Date: Wed, 6 Mar 2024 09:32:53 +0800 +Subject: [PATCH 11/22] riscv(jit): add IR assembler + +--- + src/lj_asm.c | 4 + + src/lj_asm_riscv64.h | 2037 ++++++++++++++++++++++++++++++++++++++++++ + 2 files changed, 2041 insertions(+) + create mode 100644 src/lj_asm_riscv64.h + +diff --git a/src/lj_asm.c b/src/lj_asm.c +index 20e1091dd..a0341e858 100644 +--- a/src/lj_asm.c ++++ b/src/lj_asm.c +@@ -227,6 +227,8 @@ static Reg rset_pickrandom(ASMState *as, RegSet rs) + #include "lj_emit_ppc.h" + #elif LJ_TARGET_MIPS + #include "lj_emit_mips.h" ++#elif LJ_TARGET_RISCV64 ++#include "lj_emit_riscv.h" + #else + #error "Missing instruction emitter for target CPU" + #endif +@@ -1710,6 +1712,8 @@ static void asm_loop(ASMState *as) + #include "lj_asm_mips.h" + #elif LJ_TARGET_S390X + #include "lj_asm_s390x.h" ++#elif LJ_TARGET_RISCV64 ++#include "lj_asm_riscv64.h" + #else + #error "Missing assembler for target CPU" + #endif +diff --git a/src/lj_asm_riscv64.h b/src/lj_asm_riscv64.h +new file mode 100644 +index 000000000..2ee63fa10 +--- /dev/null ++++ b/src/lj_asm_riscv64.h +@@ -0,0 +1,2037 @@ ++/* ++** RISC-V IR assembler (SSA IR -> machine code). ++** Copyright (C) 2005-2025 Mike Pall. See Copyright Notice in luajit.h ++** ++** Contributed by gns from PLCT Lab, ISCAS. ++*/ ++ ++/* -- Register allocator extensions --------------------------------------- */ ++ ++/* Allocate a register with a hint. */ ++static Reg ra_hintalloc(ASMState *as, IRRef ref, Reg hint, RegSet allow) ++{ ++ Reg r = IR(ref)->r; ++ if (ra_noreg(r)) { ++ if (!ra_hashint(r) && !iscrossref(as, ref)) ++ ra_sethint(IR(ref)->r, hint); /* Propagate register hint. */ ++ r = ra_allocref(as, ref, allow); ++ } ++ ra_noweak(as, r); ++ return r; ++} ++ ++/* Allocate a register or RID_ZERO. */ ++static Reg ra_alloc1z(ASMState *as, IRRef ref, RegSet allow) ++{ ++ Reg r = IR(ref)->r; ++ if (ra_noreg(r)) { ++ if (!(allow & RSET_FPR) && irref_isk(ref) && get_kval(as, ref) == 0) ++ return RID_ZERO; ++ r = ra_allocref(as, ref, allow); ++ } else { ++ ra_noweak(as, r); ++ } ++ return r; ++} ++ ++/* Allocate two source registers for three-operand instructions. */ ++static Reg ra_alloc2(ASMState *as, IRIns *ir, RegSet allow) ++{ ++ IRIns *irl = IR(ir->op1), *irr = IR(ir->op2); ++ Reg left = irl->r, right = irr->r; ++ if (ra_hasreg(left)) { ++ ra_noweak(as, left); ++ if (ra_noreg(right)) ++ right = ra_alloc1z(as, ir->op2, rset_exclude(allow, left)); ++ else ++ ra_noweak(as, right); ++ } else if (ra_hasreg(right)) { ++ ra_noweak(as, right); ++ left = ra_alloc1z(as, ir->op1, rset_exclude(allow, right)); ++ } else if (ra_hashint(right)) { ++ right = ra_alloc1z(as, ir->op2, allow); ++ left = ra_alloc1z(as, ir->op1, rset_exclude(allow, right)); ++ } else { ++ left = ra_alloc1z(as, ir->op1, allow); ++ right = ra_alloc1z(as, ir->op2, rset_exclude(allow, left)); ++ } ++ return left | (right << 8); ++} ++ ++/* -- Guard handling ------------------------------------------------------ */ ++ ++/* Copied from MIPS, AUIPC+JALR is expensive to setup in-place */ ++#define RISCV_SPAREJUMP 4 ++ ++/* Setup spare long-range jump (trampoline?) slots per mcarea. */ ++ ++static void asm_sparejump_setup(ASMState *as) ++{ ++ MCode *mxp = as->mctop; ++ if ((char *)mxp == (char *)as->J->mcarea + as->J->szmcarea) { ++ for (int i = RISCV_SPAREJUMP*2; i--; ) ++ *--mxp = RISCVI_EBREAK; ++ as->mctop = mxp; ++ } ++} ++ ++static MCode *asm_sparejump_use(MCode *mcarea, MCode *target) ++{ ++ MCode *mxp = (MCode *)((char *)mcarea + ((MCLink *)mcarea)->size); ++ int slot = RISCV_SPAREJUMP; ++ RISCVIns tslot = RISCVI_EBREAK, tauipc, tjalr; ++ while (slot--) { ++ mxp -= 2; ++ ptrdiff_t delta = (char *)target - (char *)mxp; ++ tauipc = RISCVI_AUIPC | RISCVF_D(RID_TMP) | RISCVF_IMMU(RISCVF_HI(delta)), ++ tjalr = RISCVI_JALR | RISCVF_S1(RID_TMP) | RISCVF_IMMI(RISCVF_LO(delta)); ++ if (mxp[0] == tauipc && mxp[1] == tjalr) { ++ return mxp; ++ } else if (mxp[0] == tslot) { ++ mxp[0] = tauipc, mxp[1] = tjalr; ++ return mxp; ++ } ++ } ++ return NULL; ++} ++ ++/* Setup exit stub after the end of each trace. */ ++static void asm_exitstub_setup(ASMState *as, ExitNo nexits) ++{ ++ ExitNo i; ++ MCode *mxp = as->mctop; ++ if (mxp - (nexits + 4 + MCLIM_REDZONE) < as->mclim) ++ asm_mclimit(as); ++ for (i = nexits-1; (int32_t)i >= 0; i--) ++ *--mxp = RISCVI_JAL | RISCVF_D(RID_RA) | RISCVF_IMMJ((uintptr_t)(4*(-4-i))); ++ ptrdiff_t delta = (char *)lj_vm_exit_handler - (char *)(mxp-3); ++ /* 1: sw ra, 0(sp); auipc+jalr ->vm_exit_handler; lui x0, traceno; jal <1; jal <1; ... */ ++ *--mxp = RISCVI_LUI | RISCVF_IMMU(as->T->traceno); ++ *--mxp = RISCVI_JALR | RISCVF_D(RID_RA) | RISCVF_S1(RID_TMP) ++ | RISCVF_IMMI(RISCVF_LO((uintptr_t)(void *)delta)); ++ *--mxp = RISCVI_AUIPC | RISCVF_D(RID_TMP) ++ | RISCVF_IMMU(RISCVF_HI((uintptr_t)(void *)delta)); ++ *--mxp = RISCVI_SD | RISCVF_S2(RID_RA) | RISCVF_S1(RID_SP); ++ as->mctop = mxp; ++} ++ ++static MCode *asm_exitstub_addr(ASMState *as, ExitNo exitno) ++{ ++ /* Keep this in-sync with exitstub_trace_addr(). */ ++ return as->mctop + exitno + 4; ++} ++ ++/* Emit conditional branch to exit for guard. */ ++static void asm_guard(ASMState *as, RISCVIns riscvi, Reg rs1, Reg rs2) ++{ ++ MCode *target = asm_exitstub_addr(as, as->snapno); ++ MCode *p = as->mcp; ++ if (LJ_UNLIKELY(p == as->invmcp)) { ++ as->loopinv = 1; ++ as->mcp = ++p; ++ *p = RISCVI_JAL | RISCVF_IMMJ((char *)target - (char *)p); ++ riscvi = riscvi^RISCVF_FUNCT3(1); /* Invert cond. */ ++ target = p - 1; /* Patch target later in asm_loop_fixup. */ ++ } ++ ptrdiff_t delta = (char *)target - (char *)(p - 1); ++ *--p = RISCVI_JAL | RISCVF_IMMJ(delta); ++ *--p = (riscvi^RISCVF_FUNCT3(1)) | RISCVF_S1(rs1) | RISCVF_S2(rs2) | RISCVF_IMMB(8); ++ as->mcp = p; ++} ++ ++/* -- Operand fusion ------------------------------------------------------ */ ++ ++/* Limit linear search to this distance. Avoids O(n^2) behavior. */ ++#define CONFLICT_SEARCH_LIM 31 ++ ++/* Check if there's no conflicting instruction between curins and ref. */ ++static int noconflict(ASMState *as, IRRef ref, IROp conflict) ++{ ++ IRIns *ir = as->ir; ++ IRRef i = as->curins; ++ if (i > ref + CONFLICT_SEARCH_LIM) ++ return 0; /* Give up, ref is too far away. */ ++ while (--i > ref) ++ if (ir[i].o == conflict) ++ return 0; /* Conflict found. */ ++ return 1; /* Ok, no conflict. */ ++} ++ ++/* Fuse the array base of colocated arrays. */ ++static int32_t asm_fuseabase(ASMState *as, IRRef ref) ++{ ++ IRIns *ir = IR(ref); ++ if (ir->o == IR_TNEW && ir->op1 <= LJ_MAX_COLOSIZE && ++ !neverfuse(as) && noconflict(as, ref, IR_NEWREF)) ++ return (int32_t)sizeof(GCtab); ++ return 0; ++} ++ ++/* Fuse array/hash/upvalue reference into register+offset operand. */ ++static Reg asm_fuseahuref(ASMState *as, IRRef ref, int32_t *ofsp, RegSet allow) ++{ ++ IRIns *ir = IR(ref); ++ if (ra_noreg(ir->r)) { ++ if (ir->o == IR_AREF) { ++ if (mayfuse(as, ref)) { ++ if (irref_isk(ir->op2)) { ++ IRRef tab = IR(ir->op1)->op1; ++ int32_t ofs = asm_fuseabase(as, tab); ++ IRRef refa = ofs ? tab : ir->op1; ++ ofs += 8*IR(ir->op2)->i; ++ if (checki12(ofs)) { ++ *ofsp = ofs; ++ return ra_alloc1(as, refa, allow); ++ } ++ } ++ } ++ } else if (ir->o == IR_HREFK) { ++ if (mayfuse(as, ref)) { ++ int32_t ofs = (int32_t)(IR(ir->op2)->op2 * sizeof(Node)); ++ if (checki12(ofs)) { ++ *ofsp = ofs; ++ return ra_alloc1(as, ir->op1, allow); ++ } ++ } ++ } else if (ir->o == IR_UREFC) { ++ if (irref_isk(ir->op1)) { ++ GCfunc *fn = ir_kfunc(IR(ir->op1)); ++ GCupval *uv = &gcref(fn->l.uvptr[(ir->op2 >> 8)])->uv; ++ intptr_t ofs = ((intptr_t)((uintptr_t)(&uv->tv) - (uintptr_t)&J2GG(as->J)->g)); ++ if (checki12(ofs)) { ++ *ofsp = (int32_t)ofs; ++ return RID_GL; ++ } ++ } ++ } else if (ir->o == IR_TMPREF) { ++ *ofsp = (int32_t)offsetof(global_State, tmptv); ++ return RID_GL; ++ } ++ } ++ *ofsp = 0; ++ return ra_alloc1(as, ref, allow); ++} ++ ++/* Fuse XLOAD/XSTORE reference into load/store operand. */ ++static void asm_fusexref(ASMState *as, RISCVIns riscvi, Reg rd, IRRef ref, ++ RegSet allow, int32_t ofs) ++{ ++ IRIns *ir = IR(ref); ++ Reg base; ++ if (ra_noreg(ir->r) && canfuse(as, ir)) { ++ intptr_t ofs2; ++ if (ir->o == IR_ADD) { ++ if (irref_isk(ir->op2) && (ofs2 = ofs + get_kval(as, ir->op2), ++ checki12(ofs2))) { ++ ref = ir->op1; ++ ofs = (int32_t)ofs2; ++ } ++ } else if (ir->o == IR_STRREF) { ++ ofs2 = 4096; ++ lj_assertA(ofs == 0, "bad usage"); ++ ofs = (int32_t)sizeof(GCstr); ++ if (irref_isk(ir->op2)) { ++ ofs2 = ofs + get_kval(as, ir->op2); ++ ref = ir->op1; ++ } else if (irref_isk(ir->op1)) { ++ ofs2 = ofs + get_kval(as, ir->op1); ++ ref = ir->op2; ++ } ++ if (!checki12(ofs2)) { ++ /* NYI: Fuse ADD with constant. */ ++ Reg right, left = ra_alloc2(as, ir, allow); ++ right = (left >> 8); left &= 255; ++ emit_lso(as, riscvi, rd, RID_TMP, ofs); ++ emit_ds1s2(as, RISCVI_ADD, RID_TMP, left, right); ++ return; ++ } ++ ofs = ofs2; ++ } ++ } ++ base = ra_alloc1(as, ref, allow); ++ emit_lso(as, riscvi, rd, base, ofs); ++} ++ ++/* Fuse Integer multiply-accumulate. */ ++ ++static int asm_fusemac(ASMState *as, IRIns *ir, RISCVIns riscvi) ++{ ++ IRRef lref = ir->op1, rref = ir->op2; ++ IRIns *irm; ++ if (lref != rref && ++ ((mayfuse(as, lref) && (irm = IR(lref), irm->o == IR_MUL) && ++ ra_noreg(irm->r)) || ++ (mayfuse(as, rref) && (irm = IR(rref), irm->o == IR_MUL) && ++ (rref = lref, ra_noreg(irm->r))))) { ++ Reg dest = ra_dest(as, ir, RSET_GPR); ++ Reg add = ra_hintalloc(as, rref, dest, RSET_GPR); ++ Reg left = ra_alloc2(as, irm, ++ rset_exclude(rset_exclude(RSET_GPR, dest), add)); ++ Reg right = (left >> 8); left &= 255; ++ emit_ds1s2(as, riscvi, dest, left, right); ++ if (dest != add) emit_mv(as, dest, add); ++ return 1; ++ } ++ return 0; ++} ++ ++/* Fuse FP multiply-add/sub. */ ++ ++static int asm_fusemadd(ASMState *as, IRIns *ir, RISCVIns riscvi, RISCVIns riscvir) ++{ ++ IRRef lref = ir->op1, rref = ir->op2; ++ IRIns *irm; ++ if ((as->flags & JIT_F_OPT_FMA) && ++ lref != rref && ++ ((mayfuse(as, lref) && (irm = IR(lref), irm->o == IR_MUL) && ++ ra_noreg(irm->r)) || ++ (mayfuse(as, rref) && (irm = IR(rref), irm->o == IR_MUL) && ++ (rref = lref, riscvi = riscvir, ra_noreg(irm->r))))) { ++ Reg dest = ra_dest(as, ir, RSET_FPR); ++ Reg add = ra_hintalloc(as, rref, dest, RSET_FPR); ++ Reg left = ra_alloc2(as, irm, ++ rset_exclude(rset_exclude(RSET_FPR, dest), add)); ++ Reg right = (left >> 8); left &= 255; ++ emit_ds1s2s3(as, riscvi, dest, left, right, add); ++ return 1; ++ } ++ return 0; ++} ++/* -- Calls --------------------------------------------------------------- */ ++ ++/* Generate a call to a C function. */ ++static void asm_gencall(ASMState *as, const CCallInfo *ci, IRRef *args) ++{ ++ uint32_t n, nargs = CCI_XNARGS(ci); ++ int32_t ofs = 0; ++ Reg gpr, fpr = REGARG_FIRSTFPR; ++ if ((void *)ci->func) ++ emit_call(as, (void *)ci->func, 1); ++ for (gpr = REGARG_FIRSTGPR; gpr <= REGARG_LASTGPR; gpr++) ++ as->cost[gpr] = REGCOST(~0u, ASMREF_L); ++ gpr = REGARG_FIRSTGPR; ++ for (n = 0; n < nargs; n++) { /* Setup args. */ ++ IRRef ref = args[n]; ++ IRIns *ir = IR(ref); ++ if (ref) { ++ if (irt_isfp(ir->t)) { ++ if (fpr <= REGARG_LASTFPR) { ++ lj_assertA(rset_test(as->freeset, fpr), ++ "reg %d not free", fpr); /* Must have been evicted. */ ++ ra_leftov(as, fpr, ref); ++ fpr++; if(ci->flags & CCI_VARARG) gpr++; ++ } else if (!(ci->flags & CCI_VARARG) && gpr <= REGARG_LASTGPR) { ++ lj_assertA(rset_test(as->freeset, gpr), ++ "reg %d not free", gpr); /* Must have been evicted. */ ++ ra_leftov(as, gpr, ref); ++ gpr++; ++ } else { ++ Reg r = ra_alloc1(as, ref, RSET_FPR); ++ emit_spstore(as, ir, r, ofs); ++ ofs += 8; ++ } ++ } else { ++ if (gpr <= REGARG_LASTGPR) { ++ lj_assertA(rset_test(as->freeset, gpr), ++ "reg %d not free", gpr); /* Must have been evicted. */ ++ ra_leftov(as, gpr, ref); ++ gpr++; if(ci->flags & CCI_VARARG) fpr++; ++ } else { ++ Reg r = ra_alloc1z(as, ref, RSET_GPR); ++ emit_spstore(as, ir, r, ofs); ++ ofs += 8; ++ } ++ } ++ } ++ } ++} ++ ++/* Setup result reg/sp for call. Evict scratch regs. */ ++static void asm_setupresult(ASMState *as, IRIns *ir, const CCallInfo *ci) ++{ ++ RegSet drop = RSET_SCRATCH; ++ int hiop = ((ir+1)->o == IR_HIOP && !irt_isnil((ir+1)->t)); ++ if (ra_hasreg(ir->r)) ++ rset_clear(drop, ir->r); /* Dest reg handled below. */ ++ if (hiop && ra_hasreg((ir+1)->r)) ++ rset_clear(drop, (ir+1)->r); /* Dest reg handled below. */ ++ ra_evictset(as, drop); /* Evictions must be performed first. */ ++ if (ra_used(ir)) { ++ lj_assertA(!irt_ispri(ir->t), "PRI dest"); ++ if (irt_isfp(ir->t)) { ++ if ((ci->flags & CCI_CASTU64)) { ++ Reg dest = ra_dest(as, ir, RSET_FPR); ++ emit_ds(as, irt_isnum(ir->t) ? RISCVI_FMV_D_X : RISCVI_FMV_W_X, ++ dest, RID_RET); ++ } else { ++ ra_destreg(as, ir, RID_FPRET); ++ } ++ } else if (hiop) { ++ ra_destpair(as, ir); ++ } else { ++ ra_destreg(as, ir, RID_RET); ++ } ++ } ++} ++ ++static void asm_callx(ASMState *as, IRIns *ir) ++{ ++ IRRef args[CCI_NARGS_MAX*2]; ++ CCallInfo ci; ++ IRRef func; ++ IRIns *irf; ++ ci.flags = asm_callx_flags(as, ir); ++ asm_collectargs(as, ir, &ci, args); ++ asm_setupresult(as, ir, &ci); ++ func = ir->op2; irf = IR(func); ++ if (irf->o == IR_CARG) { func = irf->op1; irf = IR(func); } ++ if (irref_isk(func)) { /* Call to constant address. */ ++ ci.func = (ASMFunction)(void *)get_kval(as, func); ++ } else { /* Need specific register for indirect calls. */ ++ Reg r = ra_alloc1(as, func, RID2RSET(RID_CFUNCADDR)); ++ MCode *p = as->mcp; ++ *--p = RISCVI_JALR | RISCVF_D(RID_RA) | RISCVF_S1(r); ++ if (r == RID_CFUNCADDR) ++ *--p = RISCVI_ADDI | RISCVF_D(RID_CFUNCADDR) | RISCVF_S1(r); ++ else ++ *--p = RISCVI_MV | RISCVF_D(RID_CFUNCADDR) | RISCVF_S1(r); ++ as->mcp = p; ++ ci.func = (ASMFunction)(void *)0; ++ } ++ asm_gencall(as, &ci, args); ++} ++ ++/* -- Returns ------------------------------------------------------------- */ ++ ++/* Return to lower frame. Guard that it goes to the right spot. */ ++static void asm_retf(ASMState *as, IRIns *ir) ++{ ++ Reg base = ra_alloc1(as, REF_BASE, RSET_GPR); ++ void *pc = ir_kptr(IR(ir->op2)); ++ int32_t delta = 1+LJ_FR2+bc_a(*((const BCIns *)pc - 1)); ++ as->topslot -= (BCReg)delta; ++ if ((int32_t)as->topslot < 0) as->topslot = 0; ++ irt_setmark(IR(REF_BASE)->t); /* Children must not coalesce with BASE reg. */ ++ emit_setgl(as, base, jit_base); ++ emit_addptr(as, base, -8*delta); ++ asm_guard(as, RISCVI_BNE, RID_TMP, ++ ra_allock(as, igcptr(pc), rset_exclude(RSET_GPR, base))); ++ emit_lso(as, RISCVI_LD, RID_TMP, base, -8); ++} ++ ++/* -- Buffer operations --------------------------------------------------- */ ++ ++#if LJ_HASBUFFER ++static void asm_bufhdr_write(ASMState *as, Reg sb) ++{ ++ Reg tmp = ra_scratch(as, rset_exclude(RSET_GPR, sb)); ++ IRIns irgc; ++ irgc.ot = IRT(0, IRT_PGC); /* GC type. */ ++ emit_storeofs(as, &irgc, RID_TMP, sb, offsetof(SBuf, L)); ++ emit_ds1s2(as, RISCVI_OR, RID_TMP, RID_TMP, tmp); ++ emit_dsi(as, RISCVI_ANDI, tmp, tmp, SBUF_MASK_FLAG); ++ emit_getgl(as, RID_TMP, cur_L); ++ emit_loadofs(as, &irgc, tmp, sb, offsetof(SBuf, L)); ++} ++#endif ++ ++/* -- Type conversions ---------------------------------------------------- */ ++ ++static void asm_tointg(ASMState *as, IRIns *ir, Reg left) ++{ ++ Reg tmp = ra_scratch(as, rset_exclude(RSET_FPR, left)); ++ Reg dest = ra_dest(as, ir, RSET_GPR), cmp = ra_scratch(as, rset_exclude(RSET_GPR, dest)); ++ asm_guard(as, RISCVI_BEQ, cmp, RID_ZERO); ++ emit_ds1s2(as, RISCVI_FEQ_D, cmp, tmp, left); ++ emit_ds(as, RISCVI_FCVT_D_W, tmp, dest); ++ emit_ds(as, RISCVI_FCVT_W_D, dest, left); ++} ++ ++static void asm_tobit(ASMState *as, IRIns *ir) ++{ ++ RegSet allow = RSET_FPR; ++ Reg dest = ra_dest(as, ir, RSET_GPR); ++ Reg left = ra_alloc1(as, ir->op1, allow); ++ Reg right = ra_alloc1(as, ir->op2, rset_clear(allow, left)); ++ Reg tmp = ra_scratch(as, rset_clear(allow, right)); ++ emit_ds(as, RISCVI_FMV_X_W, dest, tmp); ++ emit_ds1s2(as, RISCVI_FADD_D, tmp, left, right); ++} ++ ++static void asm_conv(ASMState *as, IRIns *ir) ++{ ++ IRType st = (IRType)(ir->op2 & IRCONV_SRCMASK); ++ int st64 = (st == IRT_I64 || st == IRT_U64 || st == IRT_P64); ++ int stfp = (st == IRT_NUM || st == IRT_FLOAT); ++ IRRef lref = ir->op1; ++ lj_assertA(irt_type(ir->t) != st, "inconsistent types for CONV"); ++ /* Use GPR to pass floating-point arguments */ ++ if (irt_isfp(ir->t) && ir->r >= RID_X10 && ir->r <= RID_X17) { ++ Reg dest = ra_dest(as, ir, RSET_GPR); ++ Reg ftmp = ra_scratch(as, RSET_FPR); ++ if (stfp) { /* FP to FP conversion. */ ++ emit_ds(as, st == IRT_NUM ? RISCVI_FMV_X_W : RISCVI_FMV_X_D, dest, ftmp); ++ emit_ds(as, st == IRT_NUM ? RISCVI_FCVT_S_D : RISCVI_FCVT_D_S, ++ ftmp, ra_alloc1(as, lref, RSET_FPR)); ++ } else { /* Integer to FP conversion. */ ++ Reg left = ra_alloc1(as, lref, RSET_GPR); ++ RISCVIns riscvi = irt_isfloat(ir->t) ? ++ (((IRT_IS64 >> st) & 1) ? ++ (st == IRT_I64 ? RISCVI_FCVT_S_L : RISCVI_FCVT_S_LU) : ++ (st == IRT_INT ? RISCVI_FCVT_S_W : RISCVI_FCVT_S_WU)) : ++ (((IRT_IS64 >> st) & 1) ? ++ (st == IRT_I64 ? RISCVI_FCVT_D_L : RISCVI_FCVT_D_LU) : ++ (st == IRT_INT ? RISCVI_FCVT_D_W : RISCVI_FCVT_D_WU)); ++ emit_ds(as, st64 ? RISCVI_FMV_X_D : RISCVI_FMV_X_W, dest, ftmp); ++ emit_ds(as, riscvi, ftmp, left); ++ } ++ } else if (irt_isfp(ir->t)) { ++ Reg dest = ra_dest(as, ir, RSET_FPR); ++ if (stfp) { /* FP to FP conversion. */ ++ emit_ds(as, st == IRT_NUM ? RISCVI_FCVT_S_D : RISCVI_FCVT_D_S, ++ dest, ra_alloc1(as, lref, RSET_FPR)); ++ } else { /* Integer to FP conversion. */ ++ Reg left = ra_alloc1(as, lref, RSET_GPR); ++ RISCVIns riscvi = irt_isfloat(ir->t) ? ++ (((IRT_IS64 >> st) & 1) ? ++ (st == IRT_I64 ? RISCVI_FCVT_S_L : RISCVI_FCVT_S_LU) : ++ (st == IRT_INT ? RISCVI_FCVT_S_W : RISCVI_FCVT_S_WU)) : ++ (((IRT_IS64 >> st) & 1) ? ++ (st == IRT_I64 ? RISCVI_FCVT_D_L : RISCVI_FCVT_D_LU) : ++ (st == IRT_INT ? RISCVI_FCVT_D_W : RISCVI_FCVT_D_WU)); ++ emit_ds(as, riscvi, dest, left); ++ } ++ } else if (stfp) { /* FP to integer conversion. */ ++ if (irt_isguard(ir->t)) { ++ /* Checked conversions are only supported from number to int. */ ++ lj_assertA(irt_isint(ir->t) && st == IRT_NUM, ++ "bad type for checked CONV"); ++ asm_tointg(as, ir, ra_alloc1(as, lref, RSET_FPR)); ++ } else { ++ Reg left = ra_alloc1(as, lref, RSET_FPR); ++ Reg dest = ra_dest(as, ir, RSET_GPR); ++ RISCVIns riscvi = irt_is64(ir->t) ? ++ (st == IRT_NUM ? ++ (irt_isi64(ir->t) ? RISCVI_FCVT_L_D : RISCVI_FCVT_LU_D) : ++ (irt_isi64(ir->t) ? RISCVI_FCVT_L_S : RISCVI_FCVT_LU_S)) : ++ (st == IRT_NUM ? ++ (irt_isint(ir->t) ? RISCVI_FCVT_W_D : RISCVI_FCVT_WU_D) : ++ (irt_isint(ir->t) ? RISCVI_FCVT_W_S : RISCVI_FCVT_WU_S)); ++ emit_ds(as, riscvi|RISCVF_RM(RISCVRM_RTZ), dest, left); ++ } ++ } else if (st >= IRT_I8 && st <= IRT_U16) { /* Extend to 32 bit integer. */ ++ Reg dest = ra_dest(as, ir, RSET_GPR); ++ Reg left = ra_alloc1(as, lref, RSET_GPR); ++ RISCVIns riscvi = st == IRT_I8 ? RISCVI_SEXT_B : ++ st == IRT_U8 ? RISCVI_ZEXT_B : ++ st == IRT_I16 ? RISCVI_SEXT_H : RISCVI_ZEXT_H; ++ lj_assertA(irt_isint(ir->t) || irt_isu32(ir->t), "bad type for CONV EXT"); ++ emit_ext(as, riscvi, dest, left); ++ } else { /* 32/64 bit integer conversions. */ ++ Reg dest = ra_dest(as, ir, RSET_GPR); ++ if (irt_is64(ir->t)) { ++ if (st64) { ++ /* 64/64 bit no-op (cast)*/ ++ ra_leftov(as, dest, lref); /* Do nothing, but may need to move regs. */ ++ } else { /* 32 to 64 bit sign extension. */ ++ Reg left = ra_alloc1(as, lref, RSET_GPR); ++ if ((ir->op2 & IRCONV_SEXT)) { /* 32 to 64 bit sign extension. */ ++ emit_ext(as, RISCVI_SEXT_W, dest, left); ++ } else { /* 32 to 64 bit zero extension. */ ++ emit_ext(as, RISCVI_ZEXT_W, dest, left); ++ } ++ } ++ } else { ++ if (st64 && !(ir->op2 & IRCONV_NONE)) { ++ /* This is either a 32 bit reg/reg mov which zeroes the hiword ++ ** or a load of the loword from a 64 bit address. ++ */ ++ Reg left = ra_alloc1(as, lref, RSET_GPR); ++ emit_ext(as, RISCVI_ZEXT_W, dest, left); ++ } else { /* 32/32 bit no-op (cast). */ ++ ra_leftov(as, dest, lref); /* Do nothing, but may need to move regs. */ ++ } ++ } ++ } ++} ++ ++static void asm_strto(ASMState *as, IRIns *ir) ++{ ++ const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_strscan_num]; ++ IRRef args[2]; ++ int32_t ofs = SPOFS_TMP; ++ RegSet drop = RSET_SCRATCH; ++ if (ra_hasreg(ir->r)) rset_set(drop, ir->r); /* Spill dest reg (if any). */ ++ ra_evictset(as, drop); ++ if (ir->s) ofs = sps_scale(ir->s); ++ asm_guard(as, RISCVI_BEQ, RID_RET, RID_ZERO); /* Test return status. */ ++ args[0] = ir->op1; /* GCstr *str */ ++ args[1] = ASMREF_TMP1; /* TValue *n */ ++ asm_gencall(as, ci, args); ++ /* Store the result to the spill slot or temp slots. */ ++ Reg tmp = ra_releasetmp(as, ASMREF_TMP1); ++ emit_opk(as, RISCVI_ADDI, tmp, RID_SP, tmp, ofs); ++} ++ ++/* -- Memory references --------------------------------------------------- */ ++ ++/* Store tagged value for ref at base+ofs. */ ++static void asm_tvstore64(ASMState *as, Reg base, int32_t ofs, IRRef ref) ++{ ++ RegSet allow = rset_exclude(RSET_GPR, base); ++ IRIns *ir = IR(ref); ++ lj_assertA(irt_ispri(ir->t) || irt_isaddr(ir->t) || irt_isinteger(ir->t), ++ "store of IR type %d", irt_type(ir->t)); ++ if (irref_isk(ref)) { ++ TValue k; ++ lj_ir_kvalue(as->J->L, &k, ir); ++ emit_lso(as, RISCVI_SD, ra_allock(as, (int64_t)k.u64, allow), base, ofs); ++ } else { ++ Reg src = ra_alloc1(as, ref, allow); ++ rset_clear(allow, src); ++ Reg type = ra_allock(as, (int64_t)irt_toitype(ir->t) << 47, allow); ++ emit_lso(as, RISCVI_SD, RID_TMP, base, ofs); ++ if (irt_isinteger(ir->t)) { ++ if (as->flags & JIT_F_RVZba) { ++ emit_ds1s2(as, RISCVI_ADD_UW, RID_TMP, src, type); ++ } else { ++ emit_ds1s2(as, RISCVI_ADD, RID_TMP, RID_TMP, type); ++ emit_ext(as, RISCVI_ZEXT_W, RID_TMP, src); ++ } ++ } else { ++ emit_ds1s2(as, RISCVI_ADD, RID_TMP, src, type); ++ } ++ } ++} ++ ++/* Get pointer to TValue. */ ++static void asm_tvptr(ASMState *as, Reg dest, IRRef ref, MSize mode) // todo-new ++{ ++ if ((mode & IRTMPREF_IN1)) { ++ IRIns *ir = IR(ref); ++ if (irt_isnum(ir->t)) { ++ if (irref_isk(ref) && !(mode & IRTMPREF_OUT1)) { ++ /* Use the number constant itself as a TValue. */ ++ ra_allockreg(as, igcptr(ir_knum(ir)), dest); ++ return; ++ } ++ emit_lso(as, RISCVI_FSD, ra_alloc1(as, ref, RSET_FPR), dest, 0); ++ } else { ++ asm_tvstore64(as, dest, 0, ref); ++ } ++ } ++ /* g->tmptv holds the TValue(s). */ ++ emit_opk(as, RISCVI_ADDI, dest, RID_GL, dest, offsetof(global_State, tmptv)); ++} ++ ++static void asm_aref(ASMState *as, IRIns *ir) ++{ ++ Reg dest = ra_dest(as, ir, RSET_GPR); ++ Reg idx, base; ++ if (irref_isk(ir->op2)) { ++ IRRef tab = IR(ir->op1)->op1; ++ int32_t ofs = asm_fuseabase(as, tab); ++ IRRef refa = ofs ? tab : ir->op1; ++ ofs += 8*IR(ir->op2)->i; ++ if (checki12(ofs)) { ++ base = ra_alloc1(as, refa, RSET_GPR); ++ emit_dsi(as, RISCVI_ADDI, dest, base, ofs); ++ return; ++ } ++ } ++ base = ra_alloc1(as, ir->op1, RSET_GPR); ++ idx = ra_alloc1(as, ir->op2, rset_exclude(RSET_GPR, base)); ++ emit_sh3add(as, dest, base, idx, RID_TMP); ++} ++ ++/* Inlined hash lookup. Specialized for key type and for const keys. ++** The equivalent C code is: ++** Node *n = hashkey(t, key); ++** do { ++** if (lj_obj_equal(&n->key, key)) return &n->val; ++** } while ((n = nextnode(n))); ++** return niltv(L); ++*/ ++static void asm_href(ASMState *as, IRIns *ir, IROp merge) ++{ ++ RegSet allow = RSET_GPR; ++ int destused = ra_used(ir); ++ Reg dest = ra_dest(as, ir, allow); ++ Reg tab = ra_alloc1(as, ir->op1, rset_clear(allow, dest)); ++ Reg key = RID_NONE, type = RID_NONE, tmpnum = RID_NONE, tmp1, tmp2; ++ Reg cmp64 = RID_NONE; ++ IRRef refkey = ir->op2; ++ IRIns *irkey = IR(refkey); ++ int isk = irref_isk(refkey); ++ IRType1 kt = irkey->t; ++ uint32_t khash; ++ MCLabel l_end, l_loop, l_next; ++ rset_clear(allow, tab); ++ tmp1 = ra_scratch(as, allow); ++ rset_clear(allow, tmp1); ++ tmp2 = ra_scratch(as, allow); ++ rset_clear(allow, tmp2); ++ ++ if (irt_isnum(kt)) { ++ key = ra_alloc1(as, refkey, RSET_FPR); ++ tmpnum = ra_scratch(as, rset_exclude(RSET_FPR, key)); ++ } else { ++ /* Allocate cmp64 register used for 64-bit comparisons */ ++ if (!isk && irt_isaddr(kt)) { ++ cmp64 = tmp2; ++ } else { ++ int64_t k; ++ if (isk && irt_isaddr(kt)) { ++ k = ((int64_t)irt_toitype(kt) << 47) | irkey[1].tv.u64; ++ } else { ++ lj_assertA(irt_ispri(kt) && !irt_isnil(kt), "bad HREF key type"); ++ k = ~((int64_t)~irt_toitype(kt) << 47); ++ } ++ cmp64 = ra_allock(as, k, allow); ++ rset_clear(allow, cmp64); ++ } ++ if (!irt_ispri(kt)) { ++ key = ra_alloc1(as, refkey, allow); ++ rset_clear(allow, key); ++ } ++ } ++ ++ /* Key not found in chain: jump to exit (if merged) or load niltv. */ ++ l_end = emit_label(as); ++ int is_lend_exit = 0; ++ as->invmcp = NULL; ++ if (merge == IR_NE) ++ asm_guard(as, RISCVI_BEQ, RID_ZERO, RID_ZERO); ++ else if (destused) ++ emit_loada(as, dest, niltvg(J2G(as->J))); ++ ++ /* Follow hash chain until the end. */ ++ l_loop = --as->mcp; ++ emit_mv(as, dest, tmp1); ++ emit_lso(as, RISCVI_LD, tmp1, dest, (int32_t)offsetof(Node, next)); ++ l_next = emit_label(as); ++ ++ /* Type and value comparison. */ ++ if (merge == IR_EQ) { /* Must match asm_guard(). */ ++ l_end = asm_exitstub_addr(as, as->snapno); ++ is_lend_exit = 1; ++ } ++ if (irt_isnum(kt)) { ++ emit_branch(as, RISCVI_BNE, tmp1, RID_ZERO, l_end, is_lend_exit); ++ emit_ds1s2(as, RISCVI_FEQ_D, tmp1, tmpnum, key); ++ emit_branch(as, RISCVI_BEQ, tmp1, RID_ZERO, l_next, -1); ++ emit_dsi(as, RISCVI_SLTIU, tmp1, tmp1, ((int32_t)LJ_TISNUM)); ++ emit_dsshamt(as, RISCVI_SRAI, tmp1, tmp1, 47); ++ emit_ds(as, RISCVI_FMV_D_X, tmpnum, tmp1); ++ } else { ++ emit_branch(as, RISCVI_BEQ, tmp1, cmp64, l_end, is_lend_exit); ++ } ++ emit_lso(as, RISCVI_LD, tmp1, dest, (int32_t)offsetof(Node, key.u64)); ++ *l_loop = RISCVI_BNE | RISCVF_S1(tmp1) | RISCVF_S2(RID_ZERO) ++ | RISCVF_IMMB((char *)as->mcp-(char *)l_loop); ++ if (!isk && irt_isaddr(kt)) { ++ type = ra_allock(as, (int64_t)irt_toitype(kt) << 47, allow); ++ emit_ds1s2(as, RISCVI_ADD, tmp2, key, type); ++ rset_clear(allow, type); ++ } ++ ++ /* Load main position relative to tab->node into dest. */ ++ khash = isk ? ir_khash(as, irkey) : 1; ++ if (khash == 0) { ++ emit_lso(as, RISCVI_LD, dest, tab, (int32_t)offsetof(GCtab, node)); ++ } else { ++ Reg tmphash = tmp1; ++ if (isk) ++ tmphash = ra_allock(as, khash, allow); ++ /* node = tab->node + (idx*32-idx*8) */ ++ emit_ds1s2(as, RISCVI_ADD, dest, dest, tmp1); ++ lj_assertA(sizeof(Node) == 24, "bad Node size"); ++ emit_ds1s2(as, RISCVI_SUBW, tmp1, tmp2, tmp1); ++ emit_dsshamt(as, RISCVI_SLLIW, tmp1, tmp1, 3); ++ emit_dsshamt(as, RISCVI_SLLIW, tmp2, tmp1, 5); ++ emit_ds1s2(as, RISCVI_AND, tmp1, tmp2, tmphash); // idx = hi & tab->hmask ++ emit_lso(as, RISCVI_LD, dest, tab, (int32_t)offsetof(GCtab, node)); ++ emit_lso(as, RISCVI_LW, tmp2, tab, (int32_t)offsetof(GCtab, hmask)); ++ if (isk) { ++ /* Nothing to do. */ ++ } else if (irt_isstr(kt)) { ++ emit_lso(as, RISCVI_LW, tmp1, key, (int32_t)offsetof(GCstr, sid)); ++ } else { /* Must match with hash*() in lj_tab.c. */ ++ emit_ds1s2(as, RISCVI_SUBW, tmp1, tmp1, tmp2); ++ emit_roti(as, RISCVI_RORIW, tmp2, tmp2, dest, (-HASH_ROT3)&0x1f); ++ emit_ds1s2(as, RISCVI_XOR, tmp1, tmp1, tmp2); ++ emit_roti(as, RISCVI_RORIW, tmp1, tmp1, dest, (-HASH_ROT2-HASH_ROT1)&0x1f); ++ emit_ds1s2(as, RISCVI_SUBW, tmp2, tmp2, dest); ++ emit_ds1s2(as, RISCVI_XOR, tmp2, tmp2, tmp1); ++ emit_roti(as, RISCVI_RORIW, dest, tmp1, RID_TMP, (-HASH_ROT1)&0x1f); ++ if (irt_isnum(kt)) { ++ emit_dsshamt(as, RISCVI_SLLIW, tmp1, tmp1, 1); ++ emit_dsshamt(as, RISCVI_SRAI, tmp1, tmp1, 32); // hi ++ emit_ext(as, RISCVI_SEXT_W, tmp2, tmp1); // lo ++ emit_ds(as, RISCVI_FMV_X_D, tmp1, key); ++ } else { ++ checkmclim(as); ++ emit_dsshamt(as, RISCVI_SRAI, tmp1, tmp1, 32); // hi ++ emit_ext(as, RISCVI_SEXT_W, tmp2, key); // lo ++ emit_ds1s2(as, RISCVI_ADD, tmp1, key, type); ++ } ++ } ++ } ++} ++ ++static void asm_hrefk(ASMState *as, IRIns *ir) ++{ ++ IRIns *kslot = IR(ir->op2); ++ IRIns *irkey = IR(kslot->op1); ++ int32_t ofs = (int32_t)(kslot->op2 * sizeof(Node)); ++ int32_t kofs = ofs + (int32_t)offsetof(Node, key); ++ int bigofs = !checki12(kofs); ++ Reg dest = (ra_used(ir) || bigofs) ? ra_dest(as, ir, RSET_GPR) : RID_NONE; ++ Reg node = ra_alloc1(as, ir->op1, RSET_GPR); ++ RegSet allow = rset_exclude(RSET_GPR, node); ++ Reg idx = node; ++ int64_t k; ++ lj_assertA(ofs % sizeof(Node) == 0, "unaligned HREFK slot"); ++ if (bigofs) { ++ idx = dest; ++ rset_clear(allow, dest); ++ kofs = (int32_t)offsetof(Node, key); ++ } else if (ra_hasreg(dest)) { ++ emit_dsi(as, RISCVI_ADDI, dest, node, ofs); ++ } ++ if (irt_ispri(irkey->t)) { ++ lj_assertA(!irt_isnil(irkey->t), "bad HREFK key type"); ++ k = ~((int64_t)~irt_toitype(irkey->t) << 47); ++ } else if (irt_isnum(irkey->t)) { ++ k = (int64_t)ir_knum(irkey)->u64; ++ } else { ++ k = ((int64_t)irt_toitype(irkey->t) << 47) | (int64_t)ir_kgc(irkey); ++ } ++ asm_guard(as, RISCVI_BNE, RID_TMP, ra_allock(as, k, allow)); ++ emit_lso(as, RISCVI_LD, RID_TMP, idx, kofs); ++ if (bigofs) ++ emit_ds1s2(as, RISCVI_ADD, dest, node, ra_allock(as, ofs, allow)); ++} ++ ++static void asm_uref(ASMState *as, IRIns *ir) ++{ ++ Reg dest = ra_dest(as, ir, RSET_GPR); ++ int guarded = (irt_t(ir->t) & (IRT_GUARD|IRT_TYPE)) == (IRT_GUARD|IRT_PGC); ++ if (irref_isk(ir->op1) && !guarded) { ++ GCfunc *fn = ir_kfunc(IR(ir->op1)); ++ MRef *v = &gcref(fn->l.uvptr[(ir->op2 >> 8)])->uv.v; ++ emit_lsptr(as, RISCVI_LD, dest, v, RSET_GPR); ++ } else { ++ if (guarded) ++ asm_guard(as, ir->o == IR_UREFC ? RISCVI_BEQ : RISCVI_BNE, RID_TMP, RID_ZERO); ++ if (ir->o == IR_UREFC) ++ emit_dsi(as, RISCVI_ADDI, dest, dest, (int32_t)offsetof(GCupval, tv)); ++ else ++ emit_lso(as, RISCVI_LD, dest, dest, (int32_t)offsetof(GCupval, v)); ++ if (guarded) ++ emit_lso(as, RISCVI_LBU, RID_TMP, dest, (int32_t)offsetof(GCupval, closed)); ++ if (irref_isk(ir->op1)) { ++ GCfunc *fn = ir_kfunc(IR(ir->op1)); ++ GCobj *o = gcref(fn->l.uvptr[(ir->op2 >> 8)]); ++ emit_loada(as, dest, o); ++ } else { ++ emit_lso(as, RISCVI_LD, dest, ra_alloc1(as, ir->op1, RSET_GPR), ++ (int32_t)offsetof(GCfuncL, uvptr) + ++ (int32_t)sizeof(MRef) * (int32_t)(ir->op2 >> 8)); ++ } ++ } ++} ++ ++static void asm_fref(ASMState *as, IRIns *ir) ++{ ++ UNUSED(as); UNUSED(ir); ++ lj_assertA(!ra_used(ir), "unfused FREF"); ++} ++ ++static void asm_strref(ASMState *as, IRIns *ir) ++{ ++ RegSet allow = RSET_GPR; ++ Reg dest = ra_dest(as, ir, allow); ++ Reg base = ra_alloc1(as, ir->op1, allow); ++ IRIns *irr = IR(ir->op2); ++ int32_t ofs = sizeof(GCstr); ++ rset_clear(allow, base); ++ if (irref_isk(ir->op2) && checki12(ofs + irr->i)) { ++ emit_dsi(as, RISCVI_ADDI, dest, base, ofs + irr->i); ++ } else { ++ emit_dsi(as, RISCVI_ADDI, dest, dest, ofs); ++ emit_ds1s2(as, RISCVI_ADD, dest, base, ra_alloc1(as, ir->op2, allow)); ++ } ++} ++ ++/* -- Loads and stores ---------------------------------------------------- */ ++ ++static RISCVIns asm_fxloadins(IRIns *ir) ++{ ++ switch (irt_type(ir->t)) { ++ case IRT_I8: return RISCVI_LB; ++ case IRT_U8: return RISCVI_LBU; ++ case IRT_I16: return RISCVI_LH; ++ case IRT_U16: return RISCVI_LHU; ++ case IRT_NUM: return RISCVI_FLD; ++ case IRT_FLOAT: return RISCVI_FLW; ++ default: return irt_is64(ir->t) ? RISCVI_LD : RISCVI_LW; ++ } ++} ++ ++static RISCVIns asm_fxstoreins(IRIns *ir) ++{ ++ switch (irt_type(ir->t)) { ++ case IRT_I8: case IRT_U8: return RISCVI_SB; ++ case IRT_I16: case IRT_U16: return RISCVI_SH; ++ case IRT_NUM: return RISCVI_FSD; ++ case IRT_FLOAT: return RISCVI_FSW; ++ default: return irt_is64(ir->t) ? RISCVI_SD : RISCVI_SW; ++ } ++} ++ ++static void asm_fload(ASMState *as, IRIns *ir) ++{ ++ RegSet allow = RSET_GPR; ++ Reg idx, dest = ra_dest(as, ir, allow); ++ rset_clear(allow, dest); ++ RISCVIns riscvi = asm_fxloadins(ir); ++ int32_t ofs; ++ if (ir->op1 == REF_NIL) { /* FLOAD from GG_State with offset. */ ++ idx = RID_GL; ++ ofs = (ir->op2 << 2) - GG_OFS(g); ++ } else { ++ idx = ra_alloc1(as, ir->op1, allow); ++ if (ir->op2 == IRFL_TAB_ARRAY) { ++ ofs = asm_fuseabase(as, ir->op1); ++ if (ofs) { /* Turn the t->array load into an add for colocated arrays. */ ++ emit_dsi(as, RISCVI_ADDI, dest, idx, ofs); ++ return; ++ } ++ } ++ ofs = field_ofs[ir->op2]; ++ lj_assertA(!irt_isfp(ir->t), "bad FP FLOAD"); ++ } ++ rset_clear(allow, idx); ++ emit_lso(as, riscvi, dest, idx, ofs); ++} ++ ++static void asm_fstore(ASMState *as, IRIns *ir) ++{ ++ if (ir->r != RID_SINK) { ++ Reg src = ra_alloc1z(as, ir->op2, RSET_GPR); ++ IRIns *irf = IR(ir->op1); ++ Reg idx = ra_alloc1(as, irf->op1, rset_exclude(RSET_GPR, src)); ++ int32_t ofs = field_ofs[irf->op2]; ++ lj_assertA(!irt_isfp(ir->t), "bad FP FSTORE"); ++ emit_lso(as, asm_fxstoreins(ir), src, idx, ofs); ++ } ++} ++ ++static void asm_xload(ASMState *as, IRIns *ir) ++{ ++ Reg dest = ra_dest(as, ir, (irt_isfp(ir->t)) ? RSET_FPR : RSET_GPR); ++ lj_assertA(LJ_TARGET_UNALIGNED || !(ir->op2 & IRXLOAD_UNALIGNED), ++ "unaligned XLOAD"); ++ asm_fusexref(as, asm_fxloadins(ir), dest, ir->op1, RSET_GPR, 0); ++} ++ ++static void asm_xstore_(ASMState *as, IRIns *ir, int32_t ofs) ++{ ++ if (ir->r != RID_SINK) { ++ Reg src = ra_alloc1z(as, ir->op2, irt_isfp(ir->t) ? RSET_FPR : RSET_GPR); ++ asm_fusexref(as, asm_fxstoreins(ir), src, ir->op1, ++ rset_exclude(RSET_GPR, src), ofs); ++ } ++} ++ ++#define asm_xstore(as, ir) asm_xstore_(as, ir, 0) ++ ++static void asm_ahuvload(ASMState *as, IRIns *ir) ++{ ++ Reg dest = RID_NONE, type = RID_TMP, idx; ++ RegSet allow = RSET_GPR; ++ int32_t ofs = 0; ++ IRType1 t = ir->t; ++ if (ra_used(ir)) { ++ lj_assertA((irt_isnum(ir->t)) || irt_isint(ir->t) || irt_isaddr(ir->t), ++ "bad load type %d", irt_type(ir->t)); ++ dest = ra_dest(as, ir, irt_isnum(t) ? RSET_FPR : allow); ++ rset_clear(allow, dest); ++ if (irt_isaddr(t)) { ++ emit_cleartp(as, dest, dest); ++ } else if (irt_isint(t)) ++ emit_ext(as, RISCVI_SEXT_W, dest, dest); ++ } ++ idx = asm_fuseahuref(as, ir->op1, &ofs, allow); ++ if (ir->o == IR_VLOAD) ofs += 8 * ir->op2; ++ rset_clear(allow, idx); ++ if (irt_isnum(t)) { ++ asm_guard(as, RISCVI_BEQ, RID_TMP, RID_ZERO); ++ emit_dsi(as, RISCVI_SLTIU, RID_TMP, type, (int32_t)LJ_TISNUM); ++ } else { ++ asm_guard(as, RISCVI_BNE, type, ++ ra_allock(as, (int32_t)irt_toitype(t), allow)); ++ } ++ if (ra_hasreg(dest)) { ++ if (irt_isnum(t)) { ++ emit_lso(as, RISCVI_FLD, dest, idx, ofs); ++ dest = type; ++ } ++ } else { ++ dest = type; ++ } ++ emit_dsshamt(as, RISCVI_SRAI, type, dest, 47); ++ emit_lso(as, RISCVI_LD, dest, idx, ofs); ++} ++ ++static void asm_ahustore(ASMState *as, IRIns *ir) ++{ ++ RegSet allow = RSET_GPR; ++ Reg idx, src = RID_NONE, type = RID_NONE; ++ int32_t ofs = 0; ++ if (ir->r == RID_SINK) ++ return; ++ if (irt_isnum(ir->t)) { ++ src = ra_alloc1(as, ir->op2, RSET_FPR); ++ idx = asm_fuseahuref(as, ir->op1, &ofs, allow); ++ emit_lso(as, RISCVI_FSD, src, idx, ofs); ++ } else { ++ Reg tmp = RID_TMP; ++ if (irt_ispri(ir->t)) { ++ tmp = ra_allock(as, ~((int64_t)~irt_toitype(ir->t) << 47), allow); ++ rset_clear(allow, tmp); ++ } else { ++ src = ra_alloc1(as, ir->op2, allow); ++ rset_clear(allow, src); ++ type = ra_allock(as, (int64_t)irt_toitype(ir->t) << 47, allow); ++ rset_clear(allow, type); ++ } ++ idx = asm_fuseahuref(as, ir->op1, &ofs, allow); ++ emit_lso(as, RISCVI_SD, tmp, idx, ofs); ++ if (ra_hasreg(src)) { ++ if (irt_isinteger(ir->t)) { ++ if (as->flags & JIT_F_RVZba) { ++ emit_ds1s2(as, RISCVI_ADD_UW, tmp, src, type); ++ } else { ++ emit_ds1s2(as, RISCVI_ADD, tmp, tmp, type); ++ emit_ext(as, RISCVI_ZEXT_W, tmp, src); ++ } ++ } else { ++ emit_ds1s2(as, RISCVI_ADD, tmp, src, type); ++ } ++ } ++ } ++} ++ ++static void asm_sload(ASMState *as, IRIns *ir) ++{ ++ Reg dest = RID_NONE, type = RID_NONE, base; ++ RegSet allow = RSET_GPR; ++ IRType1 t = ir->t; ++ int32_t ofs = 8*((int32_t)ir->op1-2); ++ lj_assertA(checki12(ofs), "sload IR operand out of range"); ++ lj_assertA(!(ir->op2 & IRSLOAD_PARENT), ++ "bad parent SLOAD"); /* Handled by asm_head_side(). */ ++ lj_assertA(irt_isguard(t) || !(ir->op2 & IRSLOAD_TYPECHECK), ++ "inconsistent SLOAD variant"); ++ if ((ir->op2 & IRSLOAD_CONVERT) && irt_isguard(t) && irt_isint(t)) { ++ dest = ra_scratch(as, RSET_FPR); ++ asm_tointg(as, ir, dest); ++ t.irt = IRT_NUM; /* Continue with a regular number type check. */ ++ } else if (ra_used(ir)) { ++ Reg tmp = RID_NONE; ++ if ((ir->op2 & IRSLOAD_CONVERT)) ++ tmp = ra_scratch(as, irt_isint(t) ? RSET_FPR : RSET_GPR); ++ lj_assertA((irt_isnum(t)) || irt_isint(t) || irt_isaddr(t), ++ "bad SLOAD type %d", irt_type(t)); ++ dest = ra_dest(as, ir, irt_isnum(t) ? RSET_FPR : allow); ++ rset_clear(allow, dest); ++ base = ra_alloc1(as, REF_BASE, allow); ++ rset_clear(allow, base); ++ if (irt_isaddr(t)) { /* Clear type from pointers. */ ++ emit_cleartp(as, dest, dest); ++ } else if (ir->op2 & IRSLOAD_CONVERT) { ++ if (irt_isint(t)) { ++ emit_ds(as, RISCVI_FCVT_W_D|RISCVF_RM(RISCVRM_RTZ), dest, tmp); ++ /* If value is already loaded for type check, move it to FPR. */ ++ if ((ir->op2 & IRSLOAD_TYPECHECK)) ++ emit_ds(as, RISCVI_FMV_D_X, tmp, dest); ++ else ++ dest = tmp; ++ t.irt = IRT_NUM; /* Check for original type. */ ++ } else { ++ emit_ds(as, RISCVI_FCVT_D_W, dest, tmp); ++ dest = tmp; ++ t.irt = IRT_INT; /* Check for original type. */ ++ } ++ } else if (irt_isint(t) && (ir->op2 & IRSLOAD_TYPECHECK)) { ++ /* Sign-extend integers. */ ++ emit_ext(as, RISCVI_SEXT_W, dest, dest); ++ } ++ goto dotypecheck; ++ } ++ base = ra_alloc1(as, REF_BASE, allow); ++ rset_clear(allow, base); ++dotypecheck: ++ if ((ir->op2 & IRSLOAD_TYPECHECK)) { ++ type = dest < RID_MAX_GPR ? dest : RID_TMP; ++ if (irt_ispri(t)) { ++ asm_guard(as, RISCVI_BNE, type, ++ ra_allock(as, ~((int64_t)~irt_toitype(t) << 47) , allow)); ++ } else if ((ir->op2 & IRSLOAD_KEYINDEX)) { ++ asm_guard(as, RISCVI_BNE, RID_TMP, ++ ra_allock(as, (int32_t)LJ_KEYINDEX, allow)); ++ emit_dsshamt(as, RISCVI_SRAI, RID_TMP, type, 32); ++ } else { ++ if (irt_isnum(t)) { ++ asm_guard(as, RISCVI_BEQ, RID_TMP, RID_ZERO); ++ emit_dsi(as, RISCVI_SLTIU, RID_TMP, RID_TMP, LJ_TISNUM); ++ if (ra_hasreg(dest)) { ++ emit_lso(as, RISCVI_FLD, dest, base, ofs); ++ } ++ } else { ++ asm_guard(as, RISCVI_BNE, RID_TMP, ++ ra_allock(as, (int32_t)irt_toitype(t), allow)); ++ } ++ emit_dsshamt(as, RISCVI_SRAI, RID_TMP, type, 47); ++ } ++ emit_lso(as, RISCVI_LD, type, base, ofs); ++ } else if (ra_hasreg(dest)) { ++ emit_lso(as, irt_isnum(t) ? RISCVI_FLD : ++ irt_isint(t) ? RISCVI_LW : RISCVI_LD, ++ dest, base, ofs); ++ } ++} ++ ++/* -- Allocations --------------------------------------------------------- */ ++ ++#if LJ_HASFFI ++static void asm_cnew(ASMState *as, IRIns *ir) ++{ ++ CTState *cts = ctype_ctsG(J2G(as->J)); ++ CTypeID id = (CTypeID)IR(ir->op1)->i; ++ CTSize sz; ++ CTInfo info = lj_ctype_info(cts, id, &sz); ++ const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_mem_newgco]; ++ IRRef args[4]; ++ RegSet drop = RSET_SCRATCH; ++ lj_assertA(sz != CTSIZE_INVALID || (ir->o == IR_CNEW && ir->op2 != REF_NIL), ++ "bad CNEW/CNEWI operands"); ++ ++ as->gcsteps++; ++ if (ra_hasreg(ir->r)) ++ rset_clear(drop, ir->r); /* Dest reg handled below. */ ++ ra_evictset(as, drop); ++ if (ra_used(ir)) ++ ra_destreg(as, ir, RID_RET); /* GCcdata * */ ++ ++ /* Initialize immutable cdata object. */ ++ if (ir->o == IR_CNEWI) { ++ RegSet allow = (RSET_GPR & ~RSET_SCRATCH); ++ emit_lso(as, sz == 8 ? RISCVI_SD : RISCVI_SW, ra_alloc1(as, ir->op2, allow), ++ RID_RET, (sizeof(GCcdata))); ++ lj_assertA(sz == 4 || sz == 8, "bad CNEWI size %d", sz); ++ } else if (ir->op2 != REF_NIL) { /* Create VLA/VLS/aligned cdata. */ ++ ci = &lj_ir_callinfo[IRCALL_lj_cdata_newv]; ++ args[0] = ASMREF_L; /* lua_State *L */ ++ args[1] = ir->op1; /* CTypeID id */ ++ args[2] = ir->op2; /* CTSize sz */ ++ args[3] = ASMREF_TMP1; /* CTSize align */ ++ asm_gencall(as, ci, args); ++ emit_loadi(as, ra_releasetmp(as, ASMREF_TMP1), (int32_t)ctype_align(info)); ++ return; ++ } ++ ++ /* Initialize gct and ctypeid. lj_mem_newgco() already sets marked. */ ++ emit_lso(as, RISCVI_SB, RID_RET+1, RID_RET, (offsetof(GCcdata, gct))); ++ emit_lso(as, RISCVI_SH, RID_TMP, RID_RET, (offsetof(GCcdata, ctypeid))); ++ emit_loadk12(as, RID_RET+1, ~LJ_TCDATA); ++ emit_loadk32(as, RID_TMP, id); ++ args[0] = ASMREF_L; /* lua_State *L */ ++ args[1] = ASMREF_TMP1; /* MSize size */ ++ asm_gencall(as, ci, args); ++ ra_allockreg(as, (int32_t)(sz+sizeof(GCcdata)), ++ ra_releasetmp(as, ASMREF_TMP1)); ++} ++#endif ++ ++/* -- Write barriers ------------------------------------------------------ */ ++ ++static void asm_tbar(ASMState *as, IRIns *ir) ++{ ++ Reg tab = ra_alloc1(as, ir->op1, RSET_GPR); ++ Reg mark = ra_scratch(as, rset_exclude(RSET_GPR, tab)); ++ Reg link = RID_TMP; ++ MCLabel l_end = emit_label(as); ++ emit_lso(as, RISCVI_SD, link, tab, (int32_t)offsetof(GCtab, gclist)); ++ emit_lso(as, RISCVI_SB, mark, tab, (int32_t)offsetof(GCtab, marked)); ++ emit_setgl(as, tab, gc.grayagain); // make tab gray again ++ emit_getgl(as, link, gc.grayagain); ++ emit_branch(as, RISCVI_BEQ, RID_TMP, RID_ZERO, l_end, -1); // black: not jump ++ emit_ds1s2(as, RISCVI_XOR, mark, mark, RID_TMP); // mark=0: gray ++ emit_dsi(as, RISCVI_ANDI, RID_TMP, mark, LJ_GC_BLACK); ++ emit_lso(as, RISCVI_LBU, mark, tab, ((int32_t)offsetof(GCtab, marked))); ++} ++ ++static void asm_obar(ASMState *as, IRIns *ir) ++{ ++ const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_gc_barrieruv]; ++ IRRef args[2]; ++ MCLabel l_end; ++ Reg obj, val, tmp; ++ /* No need for other object barriers (yet). */ ++ lj_assertA(IR(ir->op1)->o == IR_UREFC, "bad OBAR type"); // Closed upvalue ++ ra_evictset(as, RSET_SCRATCH); ++ l_end = emit_label(as); ++ args[0] = ASMREF_TMP1; /* global_State *g */ ++ args[1] = ir->op1; /* TValue *tv */ ++ asm_gencall(as, ci, args); ++ emit_ds(as, RISCVI_MV, ra_releasetmp(as, ASMREF_TMP1), RID_GL); ++ obj = IR(ir->op1)->r; ++ tmp = ra_scratch(as, rset_exclude(RSET_GPR, obj)); ++ emit_branch(as, RISCVI_BEQ, tmp, RID_ZERO, l_end, -1); ++ emit_branch(as, RISCVI_BEQ, RID_TMP, RID_ZERO, l_end, -1); // black: jump ++ emit_dsi(as, RISCVI_ANDI, tmp, tmp, LJ_GC_BLACK); ++ emit_dsi(as, RISCVI_ANDI, RID_TMP, RID_TMP, LJ_GC_WHITES); ++ val = ra_alloc1(as, ir->op2, rset_exclude(RSET_GPR, obj)); ++ emit_lso(as, RISCVI_LBU, tmp, obj, ++ ((int32_t)offsetof(GCupval, marked)-(int32_t)offsetof(GCupval, tv))); ++ emit_lso(as, RISCVI_LBU, RID_TMP, val, ((int32_t)offsetof(GChead, marked))); ++} ++ ++/* -- Arithmetic and logic operations ------------------------------------- */ ++ ++static void asm_fparith(ASMState *as, IRIns *ir, RISCVIns riscvi) ++{ ++ Reg dest = ra_dest(as, ir, RSET_FPR); ++ Reg right, left = ra_alloc2(as, ir, RSET_FPR); ++ right = (left >> 8); left &= 255; ++ emit_ds1s2(as, riscvi, dest, left, right); ++} ++ ++static void asm_fpunary(ASMState *as, IRIns *ir, RISCVIns riscvi) ++{ ++ Reg dest = ra_dest(as, ir, RSET_FPR); ++ Reg left = ra_hintalloc(as, ir->op1, dest, RSET_FPR); ++ switch(riscvi) { ++ case RISCVI_FROUND_S_RTZ: case RISCVI_FROUND_S_RDN: case RISCVI_FROUND_S_RUP: ++ case RISCVI_FROUND_D_RTZ: case RISCVI_FROUND_D_RDN: case RISCVI_FROUND_D_RUP: ++ case RISCVI_FSQRT_S: case RISCVI_FSQRT_D: ++ emit_ds(as, riscvi, dest, left); ++ break; ++ case RISCVI_FMV_S: case RISCVI_FMV_D: ++ case RISCVI_FABS_S: case RISCVI_FABS_D: ++ case RISCVI_FNEG_S: case RISCVI_FNEG_D: ++ emit_ds1s2(as, riscvi, dest, left, left); ++ break; ++ default: ++ lj_assertA(0, "bad fp unary instruction"); ++ return; ++ } ++} ++ ++static void asm_fpround(ASMState *as, IRIns *ir, RISCVIns riscvi) ++{ ++ Reg dest = ra_dest(as, ir, RSET_FPR); ++ Reg left = ra_hintalloc(as, ir->op1, dest, RSET_FPR); ++ MCLabel l_end = emit_label(as); ++ ++ if (dest != left) { ++ emit_ds1s2(as, RISCVI_FSGNJ_D, dest, dest, left); ++ emit_ds(as, RISCVI_FCVT_D_L, dest, RID_TMP); ++ } else { ++ Reg ftmp = ra_scratch(as, rset_exclude(RSET_FPR, dest)); ++ emit_ds1s2(as, RISCVI_FSGNJ_D, dest, ftmp, left); ++ emit_ds(as, RISCVI_FCVT_D_L, ftmp, RID_TMP); ++ } ++ emit_ds(as, riscvi, RID_TMP, left); ++ emit_branch(as, RISCVI_BLT, RID_ZERO, RID_TMP, l_end, 0); ++ emit_dsi(as, RISCVI_ADDI, RID_TMP, RID_TMP, -1075); ++ emit_dsi(as, RISCVI_ANDI, RID_TMP, RID_TMP, 0x7ff); ++ emit_dsi(as, RISCVI_SRLI, RID_TMP, RID_TMP, 52); ++ if (dest != left) ++ emit_ds1s2(as, RISCVI_FMV_D, dest, left, left); ++ emit_ds(as, RISCVI_FMV_X_D, RID_TMP, left); ++} ++ ++static void asm_fpmath(ASMState *as, IRIns *ir) ++{ ++ IRFPMathOp fpm = (IRFPMathOp)ir->op2; ++ if (fpm <= IRFPM_TRUNC) ++ if (as->flags & JIT_F_RVZfa) { ++ asm_fpunary(as, ir, fpm == IRFPM_FLOOR ? RISCVI_FROUND_D_RDN : ++ fpm == IRFPM_CEIL ? RISCVI_FROUND_D_RUP : RISCVI_FROUND_D_RTZ); ++ } else { ++ asm_fpround(as, ir, fpm == IRFPM_FLOOR ? RISCVI_FCVT_L_D | RISCVF_RM(RISCVRM_RDN) : ++ fpm == IRFPM_CEIL ? RISCVI_FCVT_L_D | RISCVF_RM(RISCVRM_RUP) : ++ RISCVI_FCVT_L_D | RISCVF_RM(RISCVRM_RTZ)); ++ } ++ else if (fpm == IRFPM_SQRT) ++ asm_fpunary(as, ir, RISCVI_FSQRT_D); ++ else ++ asm_callid(as, ir, IRCALL_lj_vm_floor + fpm); ++} ++ ++static void asm_add(ASMState *as, IRIns *ir) ++{ ++ IRType1 t = ir->t; ++ if (irt_isnum(t)) { ++ if (!asm_fusemadd(as, ir, RISCVI_FMADD_D, RISCVI_FMADD_D)) ++ asm_fparith(as, ir, RISCVI_FADD_D); ++ return; ++ } else { ++ if ((as->flags & JIT_F_RVXThead) && asm_fusemac(as, ir, RISCVI_TH_MULA)) ++ return; ++ Reg dest = ra_dest(as, ir, RSET_GPR); ++ Reg left = ra_hintalloc(as, ir->op1, dest, RSET_GPR); ++ if (irref_isk(ir->op2)) { ++ intptr_t k = get_kval(as, ir->op2); ++ if (checki12(k)) { ++ if (irt_is64(t)) { ++ emit_dsi(as, RISCVI_ADDI, dest, left, k); ++ } else { ++ emit_dsi(as, RISCVI_ADDIW, dest, left, k); ++ } ++ return; ++ } ++ } ++ Reg right = ra_alloc1(as, ir->op2, rset_exclude(RSET_GPR, left)); ++ emit_ds1s2(as, irt_is64(t) ? RISCVI_ADD : RISCVI_ADDW, dest, ++ left, right); ++ } ++} ++ ++static void asm_sub(ASMState *as, IRIns *ir) ++{ ++ if (irt_isnum(ir->t)) { ++ if (!asm_fusemadd(as, ir, RISCVI_FMSUB_D, RISCVI_FNMSUB_D)) ++ asm_fparith(as, ir, RISCVI_FSUB_D); ++ return; ++ } else { ++ if ((as->flags & JIT_F_RVXThead) && asm_fusemac(as, ir, RISCVI_TH_MULS)) ++ return; ++ Reg dest = ra_dest(as, ir, RSET_GPR); ++ Reg right, left = ra_alloc2(as, ir, RSET_GPR); ++ right = (left >> 8); left &= 255; ++ emit_ds1s2(as, irt_is64(ir->t) ? RISCVI_SUB : RISCVI_SUBW, dest, ++ left, right); ++ } ++} ++ ++static void asm_mul(ASMState *as, IRIns *ir) ++{ ++ if (irt_isnum(ir->t)) { ++ asm_fparith(as, ir, RISCVI_FMUL_D); ++ } else { ++ Reg dest = ra_dest(as, ir, RSET_GPR); ++ Reg right, left = ra_alloc2(as, ir, RSET_GPR); ++ right = (left >> 8); left &= 255; ++ emit_ds1s2(as, irt_is64(ir->t) ? RISCVI_MUL : RISCVI_MULW, dest, ++ left, right); ++ } ++} ++ ++static void asm_fpdiv(ASMState *as, IRIns *ir) ++{ ++ asm_fparith(as, ir, RISCVI_FDIV_D); ++} ++ ++static void asm_neg(ASMState *as, IRIns *ir) ++{ ++ if (irt_isnum(ir->t)) { ++ asm_fpunary(as, ir, RISCVI_FNEG_D); ++ } else { ++ Reg dest = ra_dest(as, ir, RSET_GPR); ++ Reg left = ra_hintalloc(as, ir->op1, dest, RSET_GPR); ++ emit_ds1s2(as, irt_is64(ir->t) ? RISCVI_SUB : RISCVI_SUBW, dest, ++ RID_ZERO, left); ++ } ++} ++ ++#define asm_abs(as, ir) asm_fpunary(as, ir, RISCVI_FABS_D) ++ ++static void asm_arithov(ASMState *as, IRIns *ir) ++{ ++ Reg right, left, tmp, dest = ra_dest(as, ir, RSET_GPR); ++ lj_assertA(!irt_is64(ir->t), "bad usage"); ++ if (irref_isk(ir->op2)) { ++ int k = IR(ir->op2)->i; ++ if (ir->o == IR_SUBOV) k = (int)(~(unsigned int)k+1u); ++ if (checki12(k)) { /* (dest < left) == (k >= 0 ? 1 : 0) */ ++ left = ra_alloc1(as, ir->op1, RSET_GPR); ++ asm_guard(as, k >= 0 ? RISCVI_BLT : RISCVI_BGE, dest, dest == left ? RID_TMP : left); ++ emit_dsi(as, RISCVI_ADDI, dest, left, k); ++ if (dest == left) emit_mv(as, RID_TMP, left); ++ return; ++ } ++ } ++ left = ra_alloc2(as, ir, RSET_GPR); ++ right = (left >> 8); left &= 255; ++ tmp = ra_scratch(as, rset_exclude(rset_exclude(rset_exclude(RSET_GPR, left), ++ right), dest)); ++ asm_guard(as, RISCVI_BLT, RID_TMP, RID_ZERO); ++ emit_ds1s2(as, RISCVI_AND, RID_TMP, RID_TMP, tmp); ++ if (ir->o == IR_ADDOV) { /* ((dest^left) & (dest^right)) < 0 */ ++ emit_ds1s2(as, RISCVI_XOR, RID_TMP, dest, dest == right ? RID_TMP : right); ++ } else { /* ((dest^left) & (dest^~right)) < 0 */ ++ emit_xnor(as, RID_TMP, dest, dest == right ? RID_TMP : right); ++ } ++ emit_ds1s2(as, RISCVI_XOR, tmp, dest, dest == left ? RID_TMP : left); ++ emit_ds1s2(as, ir->o == IR_ADDOV ? RISCVI_ADDW : RISCVI_SUBW, dest, left, right); ++ if (dest == left || dest == right) ++ emit_mv(as, RID_TMP, dest == left ? left : right); ++} ++ ++#define asm_addov(as, ir) asm_arithov(as, ir) ++#define asm_subov(as, ir) asm_arithov(as, ir) ++ ++static void asm_mulov(ASMState *as, IRIns *ir) ++{ ++ Reg dest = ra_dest(as, ir, RSET_GPR); ++ Reg right, left = ra_alloc2(as, ir, RSET_GPR); ++ right = (left >> 8); left &= 255; ++ asm_guard(as, RISCVI_BNE, dest, RID_TMP); ++ emit_ext(as, RISCVI_SEXT_W, dest, RID_TMP); // dest: [31:0]+signextend ++ emit_ds1s2(as, RISCVI_MUL, RID_TMP, left, right); // RID_TMP: [63:0] ++} ++ ++static void asm_bnot(ASMState *as, IRIns *ir) ++{ ++ Reg left, right, dest = ra_dest(as, ir, RSET_GPR); ++ IRIns *irl = IR(ir->op1); ++ if (as->flags & JIT_F_RVZbb && mayfuse(as, ir->op1) && irl->o == IR_BXOR) { ++ left = ra_alloc2(as, irl, RSET_GPR); ++ right = (left >> 8); left &= 255; ++ emit_ds1s2(as, RISCVI_XNOR, dest, left, right); ++ } else { ++ left = ra_hintalloc(as, ir->op1, dest, RSET_GPR); ++ emit_ds(as, RISCVI_NOT, dest, left); ++ } ++} ++ ++static void asm_bswap(ASMState *as, IRIns *ir) ++{ ++ Reg dest = ra_dest(as, ir, RSET_GPR); ++ Reg left = ra_alloc1(as, ir->op1, RSET_GPR); ++ RegSet allow = rset_exclude(rset_exclude(RSET_GPR, dest), left); ++ if (as->flags & JIT_F_RVZbb) { ++ if (!irt_is64(ir->t)) ++ emit_dsshamt(as, RISCVI_SRAI, dest, dest, 32); ++ emit_ds(as, RISCVI_REV8, dest, left); ++ } else if (as->flags & JIT_F_RVXThead) { ++ emit_ds(as, irt_is64(ir->t) ? RISCVI_TH_REV : RISCVI_TH_REVW, ++ dest, left); ++ } else if (irt_is64(ir->t)) { ++ Reg tmp1, tmp2, tmp3, tmp4; ++ tmp1 = ra_scratch(as, allow), allow = rset_exclude(allow, tmp1); ++ tmp2 = ra_scratch(as, allow), allow = rset_exclude(allow, tmp2); ++ tmp3 = ra_scratch(as, allow), allow = rset_exclude(allow, tmp3); ++ tmp4 = ra_scratch(as, allow); ++ emit_ds1s2(as, RISCVI_OR, dest, dest, tmp4); ++ emit_ds1s2(as, RISCVI_OR, dest, dest, tmp3); ++ emit_ds1s2(as, RISCVI_OR, dest, dest, tmp2); ++ emit_dsshamt(as, RISCVI_SLLI, tmp4, tmp4, 40); ++ emit_dsshamt(as, RISCVI_SLLI, dest, left, 56); ++ emit_ds1s2(as, RISCVI_OR, tmp3, tmp1, tmp3); ++ emit_ds1s2(as, RISCVI_AND, tmp4, left, RID_TMP); ++ emit_dsshamt(as, RISCVI_SLLI, tmp3, tmp3, 32); ++ emit_dsshamt(as, RISCVI_SLLI, tmp1, tmp1, 24); ++ emit_dsshamt(as, RISCVI_SRLIW, tmp3, left, 24); ++ emit_ds1s2(as, RISCVI_OR, tmp2, tmp3, tmp2); ++ emit_ds1s2(as, RISCVI_AND, tmp1, left, tmp1); ++ emit_ds1s2(as, RISCVI_OR, tmp3, tmp4, tmp3); ++ emit_dsshamt(as, RISCVI_SLLI, tmp4, tmp4, 24); ++ emit_dsshamt(as, RISCVI_SRLIW, tmp4, tmp4, 24); ++ emit_ds1s2(as, RISCVI_AND, tmp3, tmp3, tmp1); ++ emit_dsshamt(as, RISCVI_SRLI, tmp4, left, 8); ++ emit_dsshamt(as, RISCVI_SRLI, tmp3, left, 24); ++ emit_ds1s2(as, RISCVI_OR, tmp2, tmp2, tmp3); ++ emit_du(as, RISCVI_LUI, tmp1, RISCVF_HI(0xff0000u)); ++ emit_ds1s2(as, RISCVI_AND, tmp2, tmp2, RID_TMP); ++ emit_dsshamt(as, RISCVI_SRLI, tmp3, left, 56); ++ emit_dsi(as, RISCVI_ADDI, RID_TMP, RID_TMP, RISCVF_LO(0xff00)); ++ emit_du(as, RISCVI_LUI, RID_TMP, RISCVF_HI(0xff00u)); ++ emit_dsshamt(as, RISCVI_SRLI, tmp2, left, 40); ++ } else { ++ Reg tmp1, tmp2; ++ tmp1 = ra_scratch(as, allow), allow = rset_exclude(allow, tmp1); ++ tmp2 = ra_scratch(as, allow); ++ emit_ds1s2(as, RISCVI_OR, dest, dest, tmp2); ++ emit_ds1s2(as, RISCVI_OR, dest, dest, tmp1); ++ emit_dsshamt(as, RISCVI_SLLI, tmp2, RID_TMP, 8); ++ emit_dsshamt(as, RISCVI_SLLIW, dest, left, 24); ++ emit_ds1s2(as, RISCVI_OR, tmp1, tmp1, tmp2); ++ emit_ds1s2(as, RISCVI_AND, RID_TMP, left, RID_TMP); ++ emit_ds1s2(as, RISCVI_AND, tmp1, tmp1, RID_TMP); ++ emit_dsshamt(as, RISCVI_SRLIW, tmp2, left, 24); ++ emit_dsi(as, RISCVI_ADDI, RID_TMP, RID_TMP, RISCVF_LO(0xff00)); ++ emit_du(as, RISCVI_LUI, RID_TMP, RISCVF_HI(0xff00u)); ++ emit_dsshamt(as, RISCVI_SRLI, tmp1, left, 8); ++ } ++} ++ ++static void asm_bitop(ASMState *as, IRIns *ir, RISCVIns riscvi, RISCVIns riscvik, RISCVIns riscvin) ++{ ++ Reg dest = ra_dest(as, ir, RSET_GPR); ++ Reg left, right; ++ IRIns *irl = IR(ir->op1), *irr = IR(ir->op2); ++ if (irref_isk(ir->op2)) { ++ intptr_t k = get_kval(as, ir->op2); ++ if (checki12(k)) { ++ left = ra_hintalloc(as, ir->op1, dest, RSET_GPR); ++ emit_dsi(as, riscvik, dest, left, k); ++ return; ++ } ++ } else if (as->flags & JIT_F_RVZbb) { ++ if (mayfuse(as, ir->op1) && irl->o == IR_BNOT) { ++ left = ra_alloc1(as, irl->op1, RSET_GPR); ++ right = ra_alloc1(as, ir->op2, rset_exclude(RSET_GPR, left)); ++ emit_ds1s2(as, riscvin, dest, right, left); ++ return; ++ } else if (mayfuse(as, ir->op2) && irr->o == IR_BNOT) { ++ left = ra_alloc1(as, ir->op1, RSET_GPR); ++ right = ra_alloc1(as, irr->op1, rset_exclude(RSET_GPR, left)); ++ emit_ds1s2(as, riscvin, dest, left, right); ++ return; ++ } ++ } ++ left = ra_hintalloc(as, ir->op1, dest, RSET_GPR); ++ right = ra_alloc1(as, ir->op2, rset_exclude(RSET_GPR, left)); ++ emit_ds1s2(as, riscvi, dest, left, right); ++} ++ ++#define asm_band(as, ir) asm_bitop(as, ir, RISCVI_AND, RISCVI_ANDI, RISCVI_ANDN) ++#define asm_bor(as, ir) asm_bitop(as, ir, RISCVI_OR, RISCVI_ORI, RISCVI_ORN) ++#define asm_bxor(as, ir) asm_bitop(as, ir, RISCVI_XOR, RISCVI_XORI, RISCVI_XNOR) ++ ++static void asm_bitshift(ASMState *as, IRIns *ir, RISCVIns riscvi, RISCVIns riscvik) ++{ ++ Reg dest = ra_dest(as, ir, RSET_GPR); ++ Reg left = ra_alloc1(as, ir->op1, RSET_GPR); ++ uint32_t shmsk = irt_is64(ir->t) ? 63 : 31; ++ if (irref_isk(ir->op2)) { /* Constant shifts. */ ++ uint32_t shift = (uint32_t)(IR(ir->op2)->i & shmsk); ++ switch (riscvik) { ++ case RISCVI_SRAI: case RISCVI_SRLI: case RISCVI_SLLI: ++ case RISCVI_SRAIW: case RISCVI_SLLIW: case RISCVI_SRLIW: ++ emit_dsshamt(as, riscvik, dest, left, shift); ++ break; ++ case RISCVI_ADDI: shift = (-shift) & shmsk; ++ case RISCVI_RORI: ++ emit_roti(as, RISCVI_RORI, dest, left, RID_TMP, shift); ++ break; ++ case RISCVI_ADDIW: shift = (-shift) & shmsk; ++ case RISCVI_RORIW: ++ emit_roti(as, RISCVI_RORIW, dest, left, RID_TMP, shift); ++ break; ++ default: ++ lj_assertA(0, "bad shift instruction"); ++ return; ++ } ++ } else { ++ Reg right = ra_alloc1(as, ir->op2, rset_exclude(RSET_GPR, left)); ++ switch (riscvi) { ++ case RISCVI_SRA: case RISCVI_SRL: case RISCVI_SLL: ++ case RISCVI_SRAW: case RISCVI_SRLW: case RISCVI_SLLW: ++ emit_ds1s2(as, riscvi, dest, left, right); ++ break; ++ case RISCVI_ROR: case RISCVI_ROL: ++ case RISCVI_RORW: case RISCVI_ROLW: ++ emit_rot(as, riscvi, dest, left, right, RID_TMP); ++ break; ++ default: ++ lj_assertA(0, "bad shift instruction"); ++ return; ++ } ++ } ++} ++ ++#define asm_bshl(as, ir) (irt_is64(ir->t) ? \ ++ asm_bitshift(as, ir, RISCVI_SLL, RISCVI_SLLI) : \ ++ asm_bitshift(as, ir, RISCVI_SLLW, RISCVI_SLLIW)) ++#define asm_bshr(as, ir) (irt_is64(ir->t) ? \ ++ asm_bitshift(as, ir, RISCVI_SRL, RISCVI_SRLI) : \ ++ asm_bitshift(as, ir, RISCVI_SRLW, RISCVI_SRLIW)) ++#define asm_bsar(as, ir) (irt_is64(ir->t) ? \ ++ asm_bitshift(as, ir, RISCVI_SRA, RISCVI_SRAI) : \ ++ asm_bitshift(as, ir, RISCVI_SRAW, RISCVI_SRAIW)) ++#define asm_brol(as, ir) (irt_is64(ir->t) ? \ ++ asm_bitshift(as, ir, RISCVI_ROL, RISCVI_ADDI) : \ ++ asm_bitshift(as, ir, RISCVI_ROLW, RISCVI_ADDIW)) ++ // ROLI -> ADDI, ROLIW -> ADDIW; Hacky but works. ++#define asm_bror(as, ir) (irt_is64(ir->t) ? \ ++ asm_bitshift(as, ir, RISCVI_ROR, RISCVI_RORI) : \ ++ asm_bitshift(as, ir, RISCVI_RORW, RISCVI_RORIW)) ++ ++static void asm_min_max(ASMState *as, IRIns *ir, int ismax) ++{ ++ if (irt_isnum(ir->t)) { ++ Reg dest = ra_dest(as, ir, RSET_FPR); ++ MCLabel l_ret_left, l_end; ++ Reg right, left = ra_alloc2(as, ir, RSET_FPR); ++ right = (left >> 8); left &= 255; ++ l_end = emit_label(as); ++ ++ if (dest != left) ++ emit_ds1s2(as, RISCVI_FMV_D, dest, left, left); ++ l_ret_left = emit_label(as); ++ ++ if (dest != left) ++ emit_jump(as, l_end, -1); ++ if (dest != right) ++ emit_ds1s2(as, RISCVI_FMV_D, dest, right, right); ++ ++ emit_branch(as, RISCVI_BNE, RID_TMP, RID_ZERO, l_ret_left, -1); ++ emit_ds1s2(as, RISCVI_FLT_D, RID_TMP, ismax ? right : left, ++ ismax ? left : right); ++ } else { ++ Reg dest = ra_dest(as, ir, RSET_GPR); ++ Reg left = ra_hintalloc(as, ir->op1, dest, RSET_GPR); ++ Reg right = ra_alloc1(as, ir->op2, rset_exclude(RSET_GPR, left)); ++ if (as->flags & JIT_F_RVZbb) { ++ emit_ds1s2(as, ismax ? RISCVI_MAX : RISCVI_MIN, dest, left, right); ++ } else { ++ if (as->flags & JIT_F_RVXThead) { ++ if (left == right) { ++ if (dest != left) emit_mv(as, dest, left); ++ } else { ++ if (dest == left) { ++ emit_ds1s2(as, RISCVI_TH_MVNEZ, dest, right, RID_TMP); ++ } else { ++ emit_ds1s2(as, RISCVI_TH_MVEQZ, dest, left, RID_TMP); ++ if (dest != right) emit_mv(as, dest, right); ++ } ++ } ++ } else if (as->flags & JIT_F_RVZicond) { ++ emit_ds1s2(as, RISCVI_OR, dest, dest, RID_TMP); ++ if (dest != right) { ++ emit_ds1s2(as, RISCVI_CZERO_EQZ, RID_TMP, right, RID_TMP); ++ emit_ds1s2(as, RISCVI_CZERO_NEZ, dest, left, RID_TMP); ++ } else { ++ emit_ds1s2(as, RISCVI_CZERO_NEZ, RID_TMP, left, RID_TMP); ++ emit_ds1s2(as, RISCVI_CZERO_EQZ, dest, right, RID_TMP); ++ } ++ } else { ++ if (dest != right) { ++ emit_ds1s2(as, RISCVI_XOR, dest, right, dest); ++ emit_ds1s2(as, RISCVI_AND, dest, dest, RID_TMP); ++ emit_ds1s2(as, RISCVI_XOR, dest, right, left); ++ emit_dsi(as, RISCVI_ADDI, RID_TMP, RID_TMP, -1); ++ } else { ++ emit_ds1s2(as, RISCVI_XOR, dest, left, dest); ++ emit_ds1s2(as, RISCVI_AND, dest, dest, RID_TMP); ++ emit_ds1s2(as, RISCVI_XOR, dest, left, right); ++ emit_ds1s2(as, RISCVI_SUB, RID_TMP, RID_ZERO, RID_TMP); ++ } ++ } ++ emit_ds1s2(as, RISCVI_SLT, RID_TMP, ++ ismax ? left : right, ismax ? right : left); ++ } ++ } ++} ++ ++#define asm_min(as, ir) asm_min_max(as, ir, 0) ++#define asm_max(as, ir) asm_min_max(as, ir, 1) ++ ++/* -- Comparisons --------------------------------------------------------- */ ++ ++/* FP comparisons. */ ++static void asm_fpcomp(ASMState *as, IRIns *ir) ++{ ++ IROp op = ir->o; ++ Reg right, left = ra_alloc2(as, ir, RSET_FPR); ++ right = (left >> 8); left &= 255; ++ asm_guard(as, (op < IR_EQ ? (op&4) : (op&1)) ++ ? RISCVI_BNE : RISCVI_BEQ, RID_TMP, RID_ZERO); ++ switch (op) { ++ case IR_LT: case IR_UGE: ++ emit_ds1s2(as, RISCVI_FLT_D, RID_TMP, left, right); ++ break; ++ case IR_LE: case IR_UGT: case IR_ABC: ++ emit_ds1s2(as, RISCVI_FLE_D, RID_TMP, left, right); ++ break; ++ case IR_GT: case IR_ULE: ++ emit_ds1s2(as, RISCVI_FLT_D, RID_TMP, right, left); ++ break; ++ case IR_GE: case IR_ULT: ++ emit_ds1s2(as, RISCVI_FLE_D, RID_TMP, right, left); ++ break; ++ case IR_EQ: case IR_NE: ++ emit_ds1s2(as, RISCVI_FEQ_D, RID_TMP, left, right); ++ break; ++ default: ++ break; ++ } ++} ++ ++/* Integer comparisons. */ ++static void asm_intcomp(ASMState *as, IRIns *ir) ++{ ++ /* ORDER IR: LT GE LE GT ULT UGE ULE UGT. */ ++ /* 00 01 10 11 100 101 110 111 */ ++ IROp op = ir->o; ++ Reg right, left = ra_alloc1(as, ir->op1, RSET_GPR); ++ if (op == IR_ABC) op = IR_UGT; ++ if ((op&4) == 0 && irref_isk(ir->op2) && get_kval(as, ir->op2) == 0) { ++ switch (op) { ++ case IR_LT: asm_guard(as, RISCVI_BGE, left, RID_ZERO); break; ++ case IR_GE: asm_guard(as, RISCVI_BLT, left, RID_ZERO); break; ++ case IR_LE: asm_guard(as, RISCVI_BLT, RID_ZERO, left); break; ++ case IR_GT: asm_guard(as, RISCVI_BGE, RID_ZERO, left); break; ++ default: break; ++ } ++ return; ++ } ++ if (irref_isk(ir->op2)) { ++ intptr_t k = get_kval(as, ir->op2); ++ if ((op&2)) k++; ++ if (checki12(k)) { ++ asm_guard(as, (op&1) ? RISCVI_BNE : RISCVI_BEQ, RID_TMP, RID_ZERO); ++ emit_dsi(as, (op&4) ? RISCVI_SLTIU : RISCVI_SLTI, RID_TMP, left, k); ++ return; ++ } ++ } ++ right = ra_alloc1(as, ir->op2, rset_exclude(RSET_GPR, left)); ++ asm_guard(as, ((op&4) ? RISCVI_BGEU : RISCVI_BGE) ^ RISCVF_FUNCT3((op^(op>>1))&1), ++ (op&2) ? right : left, (op&2) ? left : right); ++} ++ ++static void asm_comp(ASMState *as, IRIns *ir) ++{ ++ if (irt_isnum(ir->t)) ++ asm_fpcomp(as, ir); ++ else ++ asm_intcomp(as, ir); ++} ++ ++static void asm_equal(ASMState *as, IRIns *ir) ++{ ++ if (irt_isnum(ir->t)) { ++ asm_fpcomp(as, ir); ++ } else { ++ Reg right, left = ra_alloc2(as, ir, RSET_GPR); ++ right = (left >> 8); left &= 255; ++ asm_guard(as, (ir->o & 1) ? RISCVI_BEQ : RISCVI_BNE, left, right); ++ } ++} ++ ++/* -- Split register ops -------------------------------------------------- */ ++ ++/* Hiword op of a split 64 bit op. Previous op must be the loword op. */ ++static void asm_hiop(ASMState *as, IRIns *ir) ++{ ++ /* HIOP is marked as a store because it needs its own DCE logic. */ ++ int uselo = ra_used(ir-1), usehi = ra_used(ir); /* Loword/hiword used? */ ++ if (LJ_UNLIKELY(!(as->flags & JIT_F_OPT_DCE))) uselo = usehi = 1; ++ if (!usehi) return; /* Skip unused hiword op for all remaining ops. */ ++ switch ((ir-1)->o) { ++ case IR_CALLN: ++ case IR_CALLL: ++ case IR_CALLS: ++ case IR_CALLXS: ++ if (!uselo) ++ ra_allocref(as, ir->op1, RID2RSET(RID_RETLO)); /* Mark lo op as used. */ ++ break; ++ default: lj_assertA(0, "bad HIOP for op %d", (ir-1)->o); break; ++ } ++} ++ ++/* -- Profiling ----------------------------------------------------------- */ ++ ++static void asm_prof(ASMState *as, IRIns *ir) ++{ ++ UNUSED(ir); ++ asm_guard(as, RISCVI_BNE, RID_TMP, RID_ZERO); ++ emit_dsi(as, RISCVI_ANDI, RID_TMP, RID_TMP, HOOK_PROFILE); ++ emit_lsglptr(as, RISCVI_LBU, RID_TMP, ++ (int32_t)offsetof(global_State, hookmask)); ++} ++ ++/* -- Stack handling ------------------------------------------------------ */ ++ ++/* Check Lua stack size for overflow. Use exit handler as fallback. */ ++static void asm_stack_check(ASMState *as, BCReg topslot, ++ IRIns *irp, RegSet allow, ExitNo exitno) ++{ ++ /* Try to get an unused temp register, otherwise spill/restore RID_RET*. */ ++ Reg tmp, pbase = irp ? (ra_hasreg(irp->r) ? irp->r : RID_TMP) : RID_BASE; ++ ExitNo oldsnap = as->snapno; ++ rset_clear(allow, pbase); ++ as->snapno = exitno; ++ asm_guard(as, RISCVI_BNE, RID_TMP, RID_ZERO); ++ as->snapno = oldsnap; ++ if (allow) { ++ tmp = rset_pickbot(allow); ++ ra_modified(as, tmp); ++ } else { // allow == RSET_EMPTY ++ tmp = RID_RET; ++ emit_lso(as, RISCVI_LD, tmp, RID_SP, 0); /* Restore tmp1 register. */ ++ } ++ emit_dsi(as, RISCVI_SLTIU, RID_TMP, RID_TMP, (int32_t)(8*topslot)); ++ emit_ds1s2(as, RISCVI_SUB, RID_TMP, tmp, pbase); ++ emit_lso(as, RISCVI_LD, tmp, tmp, offsetof(lua_State, maxstack)); ++ if (pbase == RID_TMP) ++ emit_getgl(as, RID_TMP, jit_base); ++ emit_getgl(as, tmp, cur_L); ++ if (allow == RSET_EMPTY) /* Spill temp register. */ ++ emit_lso(as, RISCVI_SD, tmp, RID_SP, 0); ++} ++ ++/* Restore Lua stack from on-trace state. */ ++static void asm_stack_restore(ASMState *as, SnapShot *snap) ++{ ++ SnapEntry *map = &as->T->snapmap[snap->mapofs]; ++#ifdef LUA_USE_ASSERT ++ SnapEntry *flinks = &as->T->snapmap[snap_nextofs(as->T, snap)-1-LJ_FR2]; ++#endif ++ MSize n, nent = snap->nent; ++ /* Store the value of all modified slots to the Lua stack. */ ++ for (n = 0; n < nent; n++) { ++ SnapEntry sn = map[n]; ++ BCReg s = snap_slot(sn); ++ int32_t ofs = 8*((int32_t)s-1-LJ_FR2); ++ IRRef ref = snap_ref(sn); ++ IRIns *ir = IR(ref); ++ if ((sn & SNAP_NORESTORE)) ++ continue; ++ if (irt_isnum(ir->t)) { ++ Reg src = ra_alloc1(as, ref, RSET_FPR); ++ emit_lso(as, RISCVI_FSD, src, RID_BASE, ofs); ++ } else { ++ if ((sn & SNAP_KEYINDEX)) { ++ RegSet allow = rset_exclude(RSET_GPR, RID_BASE); ++ int64_t kki = (int64_t)LJ_KEYINDEX << 32; ++ if (irref_isk(ref)) { ++ emit_lso(as, RISCVI_SD, ++ ra_allock(as, kki | (int64_t)(uint32_t)ir->i, allow), ++ RID_BASE, ofs); ++ } else { ++ Reg src = ra_alloc1(as, ref, allow); ++ Reg rki = ra_allock(as, kki, rset_exclude(allow, src)); ++ emit_lso(as, RISCVI_SD, RID_TMP, RID_BASE, ofs); ++ emit_ds1s2(as, RISCVI_ADD, RID_TMP, src, rki); ++ } ++ } else { ++ asm_tvstore64(as, RID_BASE, ofs, ref); ++ } ++ } ++ checkmclim(as); ++ } ++ lj_assertA(map + nent == flinks, "inconsistent frames in snapshot"); ++} ++ ++/* -- GC handling --------------------------------------------------------- */ ++ ++/* Marker to prevent patching the GC check exit. */ ++#define RISCV_NOPATCH_GC_CHECK \ ++ (RISCVI_OR|RISCVF_D(RID_TMP)|RISCVF_S1(RID_TMP)|RISCVF_S2(RID_TMP)) ++ ++/* Check GC threshold and do one or more GC steps. */ ++static void asm_gc_check(ASMState *as) ++{ ++ const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_gc_step_jit]; ++ IRRef args[2]; ++ MCLabel l_end; ++ Reg tmp; ++ ra_evictset(as, RSET_SCRATCH); ++ l_end = emit_label(as); ++ /* Exit trace if in GCSatomic or GCSfinalize. Avoids syncing GC objects. */ ++ asm_guard(as, RISCVI_BNE, RID_RET, RID_ZERO); /* Assumes asm_snap_prep() already done. */ ++ *--as->mcp = RISCV_NOPATCH_GC_CHECK; ++ args[0] = ASMREF_TMP1; /* global_State *g */ ++ args[1] = ASMREF_TMP2; /* MSize steps */ ++ asm_gencall(as, ci, args); ++ emit_ds(as, RISCVI_MV, ra_releasetmp(as, ASMREF_TMP1), RID_GL); ++ tmp = ra_releasetmp(as, ASMREF_TMP2); ++ emit_loadi(as, tmp, as->gcsteps); ++ /* Jump around GC step if GC total < GC threshold. */ ++ emit_branch(as, RISCVI_BLTU, RID_TMP, tmp, l_end, -1); ++ emit_getgl(as, tmp, gc.threshold); ++ emit_getgl(as, RID_TMP, gc.total); ++ as->gcsteps = 0; ++ checkmclim(as); ++} ++ ++/* -- Loop handling ------------------------------------------------------- */ ++ ++/* Fixup the loop branch. */ ++static void asm_loop_fixup(ASMState *as) ++{ ++ MCode *p = as->mctop; ++ MCode *target = as->mcp; ++ ptrdiff_t delta; ++ if (as->loopinv) { /* Inverted loop branch? */ ++ delta = (char *)target - (char *)(p - 2); ++ /* asm_guard* already inverted the branch, and patched the final b. */ ++ lj_assertA(checki21(delta), "branch target out of range"); ++ p[-2] = (p[-2]&0x00000fff) | RISCVF_IMMJ(delta); ++ } else { ++ /* J */ ++ delta = (char *)target - (char *)(p - 1); ++ p[-1] = RISCVI_JAL | RISCVF_IMMJ(delta); ++ } ++} ++ ++/* Fixup the tail of the loop. */ ++static void asm_loop_tail_fixup(ASMState *as) ++{ ++ UNUSED(as); /* Nothing to do(?) */ ++} ++ ++/* -- Head of trace ------------------------------------------------------- */ ++ ++/* Coalesce BASE register for a root trace. */ ++static void asm_head_root_base(ASMState *as) ++{ ++ IRIns *ir = IR(REF_BASE); ++ Reg r = ir->r; ++ if (ra_hasreg(r)) { ++ ra_free(as, r); ++ if (rset_test(as->modset, r) || irt_ismarked(ir->t)) ++ ir->r = RID_INIT; /* No inheritance for modified BASE register. */ ++ if (r != RID_BASE) ++ emit_mv(as, r, RID_BASE); ++ } ++} ++ ++/* Coalesce BASE register for a side trace. */ ++static Reg asm_head_side_base(ASMState *as, IRIns *irp) ++{ ++ IRIns *ir = IR(REF_BASE); ++ Reg r = ir->r; ++ if (ra_hasreg(r)) { ++ ra_free(as, r); ++ if (rset_test(as->modset, r) || irt_ismarked(ir->t)) ++ ir->r = RID_INIT; /* No inheritance for modified BASE register. */ ++ if (irp->r == r) { ++ return r; /* Same BASE register already coalesced. */ ++ } else if (ra_hasreg(irp->r) && rset_test(as->freeset, irp->r)) { ++ emit_mv(as, r, irp->r); /* Move from coalesced parent reg. */ ++ return irp->r; ++ } else { ++ emit_getgl(as, r, jit_base); /* Otherwise reload BASE. */ ++ } ++ } ++ return RID_NONE; ++} ++ ++/* -- Tail of trace ------------------------------------------------------- */ ++ ++/* Fixup the tail code. */ ++static void asm_tail_fixup(ASMState *as, TraceNo lnk) ++{ ++ MCode *p = as->mctop; ++ MCode *target = lnk ? traceref(as->J,lnk)->mcode : (MCode *)lj_vm_exit_interp; ++ int32_t spadj = as->T->spadjust; ++ if (spadj == 0) { ++ p[-3] = RISCVI_NOP; ++ // as->mctop = p-2; ++ } else { ++ /* Patch stack adjustment. */ ++ p[-3] = RISCVI_ADDI | RISCVF_D(RID_SP) | RISCVF_S1(RID_SP) | RISCVF_IMMI(spadj); ++ } ++ /* Patch exit jump. */ ++ ptrdiff_t delta = (char *)target - (char *)(p - 2); ++ p[-2] = RISCVI_AUIPC | RISCVF_D(RID_TMP) | RISCVF_IMMU(RISCVF_HI(delta)); ++ p[-1] = RISCVI_JALR | RISCVF_S1(RID_TMP) | RISCVF_IMMI(RISCVF_LO(delta)); ++} ++ ++/* Prepare tail of code. */ ++static void asm_tail_prep(ASMState *as) ++{ ++ MCode *p = as->mctop - 2; /* Leave room for exitstub. */ ++ if (as->loopref) { ++ as->invmcp = as->mcp = p; ++ } else { ++ as->mcp = p-1; /* Leave room for stack pointer adjustment. */ ++ as->invmcp = NULL; ++ } ++ p[0] = p[1] = RISCVI_NOP; /* Prevent load/store merging. */ ++} ++ ++/* -- Trace setup --------------------------------------------------------- */ ++ ++/* Ensure there are enough stack slots for call arguments. */ ++static Reg asm_setup_call_slots(ASMState *as, IRIns *ir, const CCallInfo *ci) ++{ ++ IRRef args[CCI_NARGS_MAX*2]; ++ uint32_t i, nargs = CCI_XNARGS(ci); ++ int nslots = 0, ngpr = REGARG_NUMGPR, nfpr = REGARG_NUMFPR; ++ asm_collectargs(as, ir, ci, args); ++ for (i = 0; i < nargs; i++) { ++ if (args[i] && irt_isfp(IR(args[i])->t)) { ++ if (nfpr > 0) { ++ nfpr--; if(ci->flags & CCI_VARARG) ngpr--; ++ } else if (!(ci->flags & CCI_VARARG) && ngpr > 0) ngpr--; ++ else nslots += 2; ++ } else { ++ if (ngpr > 0) { ++ ngpr--; if(ci->flags & CCI_VARARG) nfpr--; ++ } else nslots += 2; ++ } ++ } ++ if (nslots > as->evenspill) /* Leave room for args in stack slots. */ ++ as->evenspill = nslots; ++ return REGSP_HINT(irt_isfp(ir->t) ? RID_FPRET : RID_RET); ++} ++ ++static void asm_setup_target(ASMState *as) ++{ ++ asm_sparejump_setup(as); ++ asm_exitstub_setup(as, as->T->nsnap + (as->parent ? 1 : 0)); ++} ++ ++/* -- Trace patching ------------------------------------------------------ */ ++ ++/* Patch exit jumps of existing machine code to a new target. */ ++void lj_asm_patchexit(jit_State *J, GCtrace *T, ExitNo exitno, MCode *target) ++{ ++ MCode *p = T->mcode; ++ MCode *pe = (MCode *)((char *)p + T->szmcode); ++ MCode *px = exitstub_trace_addr(T, exitno); ++ MCode *cstart = NULL; ++ MCode *mcarea = lj_mcode_patch(J, p, 0); ++ ++ for (; p < pe; p++) { ++ /* Look for exitstub branch, replace with branch to target. */ ++ ptrdiff_t odelta = (char *)px - (char *)(p+1), ++ ndelta = (char *)target - (char *)(p+1); ++ if ((((p[0] ^ RISCVF_IMMB(8)) & 0xfe000f80u) == 0 && ++ ((p[0] & 0x0000007fu) == 0x63u) && ++ ((p[1] ^ RISCVF_IMMJ(odelta)) & 0xfffff000u) == 0 && ++ ((p[1] & 0x0000007fu) == 0x6fu) && p[-1] != RISCV_NOPATCH_GC_CHECK) || ++ (((p[1] ^ RISCVF_IMMJ(odelta)) & 0xfffff000u) == 0 && ++ ((p[1] & 0x0000007fu) == 0x6fu) && p[0] != RISCV_NOPATCH_GC_CHECK)) { ++ lj_assertJ(checki32(ndelta), "branch target out of range"); ++ /* Patch jump, if within range. */ ++ patchbranch: ++ if (checki21(ndelta)) { /* Patch jump */ ++ p[1] = RISCVI_JAL | RISCVF_IMMJ(ndelta); ++ if (!cstart) cstart = p + 1; ++ } else { /* Branch out of range. Use spare jump slot in mcarea. */ ++ MCode *mcjump = asm_sparejump_use(mcarea, target); ++ if (mcjump) { ++ lj_mcode_sync(mcjump, mcjump+2); ++ ndelta = (char *)mcjump - (char *)(p+1); ++ if (checki21(ndelta)) { ++ goto patchbranch; ++ } else { ++ lj_assertJ(0, "spare jump out of range: -Osizemcode too big"); ++ } ++ } ++ /* Ignore jump slot overflow. Child trace is simply not attached. */ ++ } ++ } else if (p+2 == pe) { ++ if (p[0] == RISCVI_NOP && p[1] == RISCVI_NOP) { ++ ptrdiff_t delta = (char *)target - (char *)p; ++ lj_assertJ(checki32(delta), "jump target out of range"); ++ p[0] = RISCVI_AUIPC | RISCVF_D(RID_TMP) | RISCVF_IMMU(RISCVF_HI(delta)); ++ p[1] = RISCVI_JALR | RISCVF_S1(RID_TMP) | RISCVF_IMMI(RISCVF_LO(delta)); ++ if (!cstart) cstart = p; ++ } ++ } ++ } ++ if (cstart) lj_mcode_sync(cstart, px+1); ++ lj_mcode_patch(J, mcarea, 1); ++} + +From 9d784c566630b3682252c0414f9485325c47cb82 Mon Sep 17 00:00:00 2001 +From: gns +Date: Wed, 6 Mar 2024 09:36:58 +0800 +Subject: [PATCH 12/22] riscv(interp): add VM builder support + +--- + src/host/buildvm.c | 2 ++ + src/host/buildvm_asm.c | 31 +++++++++++++++++++++++++++++++ + 2 files changed, 33 insertions(+) + +diff --git a/src/host/buildvm.c b/src/host/buildvm.c +index 3bfa039a7..6c45c4b93 100644 +--- a/src/host/buildvm.c ++++ b/src/host/buildvm.c +@@ -69,6 +69,8 @@ static int collect_reloc(BuildCtx *ctx, uint8_t *addr, int idx, int type); + #include "../dynasm/dasm_mips.h" + #elif LJ_TARGET_S390X + #include "../dynasm/dasm_s390x.h" ++#elif LJ_TARGET_RISCV64 ++#include "../dynasm/dasm_riscv.h" + #else + #error "No support for this architecture (yet)" + #endif +diff --git a/src/host/buildvm_asm.c b/src/host/buildvm_asm.c +index 4affc153b..8153ddc98 100644 +--- a/src/host/buildvm_asm.c ++++ b/src/host/buildvm_asm.c +@@ -208,6 +208,34 @@ static void emit_asm_wordreloc(BuildCtx *ctx, uint8_t *p, int n, + "Error: unsupported opcode %08x for %s symbol relocation.\n", + ins, sym); + exit(1); ++#elif LJ_TARGET_RISCV64 ++ if ((ins & 0x7f) == 0x17u) { ++ fprintf(ctx->fp, "\tauipc x%d, %s\n", (ins >> 7) & 31, sym); ++ } else if ((ins & 0x7f) == 0x67u) { ++ fprintf(ctx->fp, "\tjalr x%d, x%d, %s\n", (ins >> 7) & 31, (ins >> 15) & 31, sym); ++ } else if ((ins & 0x7f) == 0x6fu) { ++ fprintf(ctx->fp, "\tjal x%d, %s\n", (ins >> 7) & 31, sym); ++ } else if ((ins & 0x7f) == 0x03u) { ++ uint8_t funct3 = (ins >> 12) & 7; ++ uint8_t rd = (ins >> 7) & 31, rs1 = (ins >> 15) & 31; ++ switch (funct3) { ++ case 0: fprintf(ctx->fp, "\tlb"); break; ++ case 1: fprintf(ctx->fp, "\tlh"); break; ++ case 2: fprintf(ctx->fp, "\tlw"); break; ++ case 3: fprintf(ctx->fp, "\tld"); break; ++ case 4: fprintf(ctx->fp, "\tlbu"); break; ++ case 5: fprintf(ctx->fp, "\tlhu"); break; ++ case 6: fprintf(ctx->fp, "\tlwu"); break; ++ default: goto rv_reloc_err; ++ } ++ fprintf(ctx->fp, " x%d, %s(x%d)\n", rd, sym, rs1); ++ } else { ++rv_reloc_err: ++ fprintf(stderr, ++ "Error: unsupported opcode %08x for %s symbol relocation.\n", ++ ins, sym); ++ exit(1); ++ } + #else + #error "missing relocation support for this architecture" + #endif +@@ -303,6 +331,9 @@ void emit_asm(BuildCtx *ctx) + #endif + #if LJ_TARGET_MIPS + fprintf(ctx->fp, "\t.set nomips16\n\t.abicalls\n\t.set noreorder\n\t.set nomacro\n"); ++#endif ++#if LJ_TARGET_RISCV64 ++ fprintf(ctx->fp, ".option arch, -c\n.option norelax\n"); + #endif + emit_asm_align(ctx, 4); + + +From 03607274d4e5d1d2c900c0c4d25a11a1e9afc071 Mon Sep 17 00:00:00 2001 +From: gns +Date: Wed, 6 Mar 2024 09:38:50 +0800 +Subject: [PATCH 13/22] riscv(misc): add bytecode listing support + +--- + src/jit/bcsave.lua | 1 + + 1 file changed, 1 insertion(+) + +diff --git a/src/jit/bcsave.lua b/src/jit/bcsave.lua +index 5084ab3e2..74bcaf553 100644 +--- a/src/jit/bcsave.lua ++++ b/src/jit/bcsave.lua +@@ -103,6 +103,7 @@ local map_arch = { + mips64r6 = { e = "be", b = 64, m = 8, f = 0xa0000407, }, + mips64r6el = { e = "le", b = 64, m = 8, f = 0xa0000407, }, + s390x = { e = "be", b = 64, m = 22, }, ++ riscv64 = { e = "le", b = 64, m = 243, f = 0x00000004, }, + } + + local map_os = { + +From 8caf6a56cf49030f0d132ccaeb5d0256a4eb9e29 Mon Sep 17 00:00:00 2001 +From: gns +Date: Wed, 6 Mar 2024 09:40:51 +0800 +Subject: [PATCH 14/22] riscv(jit): add hooks in interpreter + +--- + src/vm_riscv64.dasc | 388 ++++++++++++++++++++++++++++++++++++++++++++ + 1 file changed, 388 insertions(+) + +diff --git a/src/vm_riscv64.dasc b/src/vm_riscv64.dasc +index 4c8189b54..db91430fa 100644 +--- a/src/vm_riscv64.dasc ++++ b/src/vm_riscv64.dasc +@@ -448,6 +448,24 @@ + |.macro li_vmstate, st; li TMP0, ~LJ_VMST_..st; .endmacro + |.macro st_vmstate; sw TMP0, GL->vmstate; .endmacro + | ++|.macro hotcheck, delta, target ++| srli TMP1, PC, 1 ++| andi TMP1, TMP1, 126 ++| add TMP1, TMP1, DISPATCH ++| lhu TMP2, GG_DISP2HOT(TMP1) ++| addiw TMP2, TMP2, -delta ++| sh TMP2, GG_DISP2HOT(TMP1) ++| bxltz TMP2, target ++|.endmacro ++| ++|.macro hotloop ++| hotcheck HOTCOUNT_LOOP, ->vm_hotloop ++|.endmacro ++| ++|.macro hotcall ++| hotcheck HOTCOUNT_CALL, ->vm_hotcall ++|.endmacro ++| + |// Move table write barrier back. Overwrites mark and tmp. + |.macro barrierback, tab, mark, tmp, target + | ld tmp, GL->gc.grayagain +@@ -1145,8 +1163,15 @@ static void build_subroutines(BuildCtx *ctx) + | sd PC, SAVE_PC(sp) + | mv MULTRES, INS + | call_intern vmeta_for, lj_meta_for // (lua_State *L, TValue *base) ++ |.if JIT ++ | decode_OP1 TMP0, MULTRES ++ | li TMP1, BC_JFORI ++ |.endif + | decode_RA8 RA, MULTRES + | decode_RD8 RD, MULTRES ++ |.if JIT ++ | bxeq TMP0, TMP1, =>BC_JFORI ++ |.endif + | j =>BC_FORI + | + |//----------------------------------------------------------------------- +@@ -2141,6 +2166,20 @@ static void build_subroutines(BuildCtx *ctx) + |//----------------------------------------------------------------------- + | + |->vm_record: // Dispatch target for recording phase. ++ |.if JIT ++ | lbu TMP3, GL->hookmask ++ | andi TMP1, TMP3, HOOK_VMEVENT // No recording while in vmevent. ++ | bnez TMP1, >5 ++ | // Decrement the hookcount for consistency, but always do the call. ++ | lw TMP2, GL->hookcount ++ | andi TMP1, TMP3, HOOK_ACTIVE ++ | bnez TMP1, >1 ++ | addiw TMP2, TMP2, -1 ++ | andi TMP1, TMP3, LUA_MASKLINE|LUA_MASKCOUNT ++ | beqz TMP1, >1 ++ | sw TMP2, GL->hookcount ++ | j >1 ++ |.endif + | + |->vm_rethook: // Dispatch target for return hooks. + | lbu TMP3, GL->hookmask +@@ -2186,10 +2225,103 @@ static void build_subroutines(BuildCtx *ctx) + | lw MULTRES, -24(RB) // Restore MULTRES for *M ins. + | j <4 + | ++ |->vm_hotloop: // Hot loop counter underflow. ++ |.if JIT ++ | ld LFUNC:TMP1, FRAME_FUNC(BASE) ++ | addi CARG1, GL, GG_G2J ++ | cleartp LFUNC:TMP1 ++ | sd PC, SAVE_PC(sp) ++ | ld TMP1, LFUNC:TMP1->pc ++ | mv CARG2, PC ++ | sd L, (offsetof(jit_State, L))(CARG1) ++ | lbu TMP1, PC2PROTO(framesize)(TMP1) ++ | sd BASE, L->base ++ | slli TMP1, TMP1, 3 ++ | add TMP1, BASE, TMP1 ++ | sd TMP1, L->top ++ | call_intern vm_hotloop, lj_trace_hot // (jit_State *J, const BCIns *pc) ++ | j <3 ++ |.endif ++ | + | + |->vm_callhook: // Dispatch target for call hooks. ++ | mv CARG2, PC ++ |.if JIT ++ | j >1 ++ |.endif ++ | ++ |->vm_hotcall: // Hot call counter underflow. ++ |.if JIT ++ | ori CARG2, PC, 1 ++ |1: ++ |.endif ++ | add TMP0, BASE, RC ++ | sd PC, SAVE_PC(sp) ++ | sd BASE, L->base ++ | sub RA, RA, BASE ++ | sd TMP0, L->top ++ | mv CARG1, L ++ | call_intern vm_hotcall, lj_dispatch_call // (lua_State *L, const BCIns *pc) ++ | // Returns ASMFunction. ++ | ld BASE, L->base ++ | ld TMP0, L->top ++ | sd x0, SAVE_PC(sp) // Invalidate for subsequent line hook. ++ | add RA, BASE, RA ++ | sub NARGS8:RC, TMP0, BASE ++ | ld LFUNC:RB, FRAME_FUNC(BASE) ++ | cleartp LFUNC:RB ++ | lw INS, -4(PC) ++ | jr CRET1 + | + |->cont_stitch: // Trace stitching. ++ |.if JIT ++ | // RA = resultptr, RB = meta base ++ | lw INS, -4(PC) ++ | ld TRACE:TMP2, -40(RB) // Save previous trace. ++ | decode_RA8 RC, INS ++ | addi TMP1, MULTRES, -8 ++ | cleartp TRACE:TMP2 ++ | add RC, BASE, RC // Call base. ++ | beqz TMP1, >2 ++ |1: // Move results down. ++ | ld CARG1, 0(RA) ++ | addi TMP1, TMP1, -8 ++ | addi RA, RA, 8 ++ | sd CARG1, 0(RC) ++ | addi RC, RC, 8 ++ | bnez TMP1, <1 ++ |2: ++ | decode_RA8 RA, INS ++ | decode_RB8 RB, INS ++ | add RA, RA, RB ++ | add RA, BASE, RA ++ |3: ++ | bltu RC, RA, >8 // More results wanted? ++ | ++ | lhu TMP3, TRACE:TMP2->traceno ++ | lhu RD, TRACE:TMP2->link ++ | bxeq RD, TMP3, ->cont_nop // Blacklisted. ++ | slliw RD, RD, 3 ++ | bxnez RD, =>BC_JLOOP // Jump to stitched trace. ++ | ++ | // Stitch a new trace to the previous trace. ++ | addi CARG1, GL, GG_G2J ++ | // addi CARG2, CARG1, 1 // We don't care what's on the verge. ++ | addi CARG2, CARG1, 2047 // jit_State too large. ++ | sw TMP3, (offsetof(jit_State, exitno)-2047)(CARG2) ++ | sd L, (offsetof(jit_State, L)-2047)(CARG2) ++ | sd BASE, L->base ++ | mv CARG2, PC ++ | // (jit_State *J, const BCIns *pc) ++ | call_intern cont_stitch, lj_dispatch_stitch ++ | ld BASE, L->base ++ | j ->cont_nop ++ | ++ |8: ++ | sd TISNIL, 0(RC) ++ | addi RC, RC, 8 ++ | j <3 ++ |.endif + | + |->vm_profhook: // Dispatch target for profiler hook. + #if LJ_HASPROFILE +@@ -2204,6 +2336,149 @@ static void build_subroutines(BuildCtx *ctx) + | ld BASE, L->base + | j ->cont_nop + #endif ++ | ++ |//----------------------------------------------------------------------- ++ |//-- Trace exit handler ------------------------------------------------- ++ |//----------------------------------------------------------------------- ++ | ++ |.macro savex_, a, b ++ | fsd f..a, a*8(sp) ++ | fsd f..b, b*8(sp) ++ | sd x..a, 32*8+a*8(sp) ++ | sd x..b, 32*8+b*8(sp) ++ |.endmacro ++ | ++ |->vm_exit_handler: ++ |.if JIT ++ | addi sp, sp, -(32*8+32*8) ++ | savex_ 0, 5 ++ | savex_ 6, 7 ++ | savex_ 8, 9 ++ | savex_ 10, 11 ++ | savex_ 12, 13 ++ | savex_ 14, 15 ++ | savex_ 16, 17 ++ | savex_ 18, 19 ++ | savex_ 20, 21 ++ | savex_ 22, 23 ++ | savex_ 24, 25 ++ | savex_ 26, 27 ++ | savex_ 28, 29 ++ | savex_ 30, 31 ++ | fsd f1, 1*8(sp) ++ | fsd f2, 2*8(sp) ++ | fsd f3, 3*8(sp) ++ | fsd f4, 4*8(sp) ++ | sd x0, 32*8+1*8(sp) // Clear RID_TMP. ++ | ld TMP1, 32*8+32*8(sp) // Load exit pc. ++ | addi TMP2, sp, 32*8+32*8 // Recompute original value of sp. ++ | addxi DISPATCH, GL, GG_G2DISP ++ | sd TMP2, 32*8+2*8(sp) // Store sp in RID_SP ++ | addi CARG1, GL, GG_G2J ++ | li_vmstate EXIT ++ | // addi CARG2, CARG1, 1 // We don't care what's on the verge. ++ | addi CARG2, CARG1, 2047 // jit_State too large. ++ | sub TMP1, TMP1, ra ++ | lw TMP2, 0(ra) // Load trace number. ++ | st_vmstate ++ | srli TMP1, TMP1, 2 ++ | ld L, GL->cur_L ++ | ld BASE, GL->jit_base ++ | srli TMP2, TMP2, 12 ++ | addi TMP1, TMP1, -2 ++ | sd L, (offsetof(jit_State, L)-2047)(CARG2) ++ | sw TMP2, (offsetof(jit_State, parent)-2047)(CARG2) // Store trace number. ++ | sd BASE, L->base ++ | sw TMP1, (offsetof(jit_State, exitno)-2047)(CARG2) // Store exit number. ++ | sd x0, GL->jit_base ++ | mv CARG2, sp ++ | call_intern vm_exit_handler, lj_trace_exit // (jit_State *J, ExitState *ex) ++ | // Returns MULTRES (unscaled) or negated error code. ++ | ld TMP1, L->cframe ++ | ld BASE, L->base ++ | andi sp, TMP1, CFRAME_RAWMASK ++ | ld PC, SAVE_PC(sp) // Get SAVE_PC. ++ | sd L, SAVE_L(sp) // Set SAVE_L (on-trace resume/yield). ++ | j >1 ++ |.endif ++ | ++ |->vm_exit_interp: ++ |.if JIT ++ | // CRET1 = MULTRES or negated error code, BASE, PC and JGL set. ++ | ld L, SAVE_L(sp) ++ | addxi DISPATCH, GL, GG_G2DISP ++ | sd BASE, L->base ++ |1: ++ | ld LFUNC:RB, FRAME_FUNC(BASE) ++ | sltiu TMP0, CRET1, -LUA_ERRERR // Check for error from exit. ++ | beqz TMP0, >9 ++ | lui TMP3, 0x43380 // TOBIT = Hiword of 2^52 + 2^51 (double). ++ | slli MULTRES, CRET1, 3 ++ | cleartp LFUNC:RB ++ | sw MULTRES, TMPD(sp) ++ | li TISNIL, LJ_TNIL ++ | li TISNUM, LJ_TISNUM // Setup type comparison constants. ++ | slli TMP3, TMP3, 32 ++ | ld TMP1, LFUNC:RB->pc ++ | sd x0, GL->jit_base ++ | ld KBASE, PC2PROTO(k)(TMP1) ++ | fmv.d.x TOBIT, TMP3 ++ | // Modified copy of ins_next which handles function header dispatch, too. ++ | lw INS, 0(PC) ++ | addi PC, PC, 4 ++ | addiw CRET1, CRET1, 17 // Static dispatch? ++ | // Assumes TISNIL == ~LJ_VMST_INTERP == -1 ++ | sw TISNIL, GL->vmstate ++ | decode_RD8a RD, INS ++ | beqz CRET1, >5 ++ | decode_OP8 TMP1, INS ++ | add TMP0, DISPATCH, TMP1 ++ | sltiu TMP2, TMP1, BC_FUNCF*8 ++ | ld TMP3, 0(TMP0) ++ | decode_RA8 RA, INS ++ | beqz TMP2, >2 ++ | decode_RD8b RD ++ | jr TMP3 ++ |2: ++ | sltiu TMP2, TMP1, (BC_FUNCC+2)*8 // Fast function? ++ | ld TMP1, FRAME_PC(BASE) ++ | bnez TMP2, >3 ++ | // Check frame below fast function. ++ | andi TMP0, TMP1, FRAME_TYPE ++ | bnez TMP0, >3 // Trace stitching continuation? ++ | // Otherwise set KBASE for Lua function below fast function. ++ | lw TMP2, -4(TMP1) ++ | decode_RA8 TMP0, TMP2 ++ | sub TMP1, BASE, TMP0 ++ | ld LFUNC:TMP2, -32(TMP1) ++ | cleartp LFUNC:TMP2 ++ | ld TMP1, LFUNC:TMP2->pc ++ | ld KBASE, PC2PROTO(k)(TMP1) ++ |3: ++ | addi RC, MULTRES, -8 ++ | add RA, RA, BASE ++ | jr TMP3 ++ | ++ |5: // Dispatch to static entry of original ins replaced by BC_JLOOP. ++ | ld TMP0, GL_J(trace)(GL) ++ | decode_RD8b RD ++ | add TMP0, TMP0, RD ++ | ld TRACE:TMP2, 0(TMP0) ++ | lw INS, TRACE:TMP2->startins ++ | decode_OP8 TMP1, INS ++ | add TMP0, DISPATCH, TMP1 ++ | decode_RD8a RD, INS ++ | ld TMP3, GG_DISP2STATIC(TMP0) ++ | decode_RA8a RA, INS ++ | decode_RD8b RD ++ | decode_RA8b RA ++ | jr TMP3 ++ | ++ |9: // Rethrow error from the right C frame. ++ | negw CARG2, CRET1 ++ | mv CARG1, L ++ | call_intern vm_exit_interp, lj_err_trace // (lua_State *L, int errcode) ++ |.endif + | + |//----------------------------------------------------------------------- + |//-- Math helper functions ---------------------------------------------- +@@ -2230,6 +2505,10 @@ static void build_subroutines(BuildCtx *ctx) + | vm_round rdn + |->vm_ceil: + | vm_round rup ++ |->vm_trunc: ++ |.if JIT ++ | vm_round rtz ++ |.endif + | + | + |//----------------------------------------------------------------------- +@@ -2243,6 +2522,67 @@ static void build_subroutines(BuildCtx *ctx) + | ret + |.endif + | ++ |.define NEXT_TAB, TAB:CARG1 ++ |.define NEXT_IDX, CARG2 ++ |.define NEXT_ASIZE, CARG3 ++ |.define NEXT_NIL, CARG4 ++ |.define NEXT_TMP0, TMP0 ++ |.define NEXT_TMP1, TMP1 ++ |.define NEXT_TMP2, TMP2 ++ |.define NEXT_RES_VK, CRET1 ++ |.define NEXT_RES_IDX, CRET2 ++ |.define NEXT_RES_PTR, sp ++ |.define NEXT_RES_VAL, 0(sp) ++ |.define NEXT_RES_KEY, 8(sp) ++ | ++ |// TValue *lj_vm_next(GCtab *t, uint32_t idx) ++ |// Next idx returned in CRET2. ++ |->vm_next: ++ |.if JIT ++ | lw NEXT_ASIZE, NEXT_TAB->asize ++ | ld NEXT_TMP0, NEXT_TAB->array ++ | li NEXT_NIL, LJ_TNIL ++ |1: // Traverse array part. ++ | bgeu NEXT_IDX, NEXT_ASIZE, >5 ++ | slliw NEXT_TMP1, NEXT_IDX, 3 ++ | add NEXT_TMP1, NEXT_TMP0, NEXT_TMP1 ++ | li TMP3, LJ_TISNUM ++ | ld NEXT_TMP2, 0(NEXT_TMP1) ++ | slli TMP3, TMP3, 47 ++ | or NEXT_TMP1, NEXT_IDX, TMP3 ++ | addiw NEXT_IDX, NEXT_IDX, 1 ++ | beq NEXT_TMP2, NEXT_NIL, <1 ++ | sd NEXT_TMP2, NEXT_RES_VAL ++ | sd NEXT_TMP1, NEXT_RES_KEY ++ | mv NEXT_RES_VK, NEXT_RES_PTR ++ | mv NEXT_RES_IDX, NEXT_IDX ++ | ret ++ | ++ |5: // Traverse hash part. ++ | subw NEXT_RES_IDX, NEXT_IDX, NEXT_ASIZE ++ | lw NEXT_TMP0, NEXT_TAB->hmask ++ | ld NODE:NEXT_RES_VK, NEXT_TAB->node ++ | slliw NEXT_TMP2, NEXT_RES_IDX, 5 ++ | slliw TMP3, NEXT_RES_IDX, 3 ++ | subw TMP3, NEXT_TMP2, TMP3 ++ | add NODE:NEXT_RES_VK, NODE:NEXT_RES_VK, TMP3 ++ |6: ++ | bltu NEXT_TMP0, NEXT_RES_IDX, >8 ++ | ld NEXT_TMP2, NODE:NEXT_RES_VK->val ++ | addiw NEXT_RES_IDX, NEXT_RES_IDX, 1 ++ | bne NEXT_TMP2, NEXT_NIL, >9 ++ | // Skip holes in hash part. ++ | addi NODE:NEXT_RES_VK, NODE:NEXT_RES_VK, sizeof(Node) ++ | j <6 ++ | ++ |8: // End of iteration. Set the key to nil (not the value). ++ | sd NEXT_NIL, NEXT_RES_KEY ++ | mv NEXT_RES_VK, NEXT_RES_PTR ++ |9: ++ | addw NEXT_RES_IDX, NEXT_RES_IDX, NEXT_ASIZE ++ | ret ++ |.endif ++ | + |//----------------------------------------------------------------------- + |//-- FFI helper functions ----------------------------------------------- + |//----------------------------------------------------------------------- +@@ -3733,6 +4073,9 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) + + case BC_ITERN: + | // RA = base*8, (RB = (nresults+1)*8, RC = (nargs+1)*8 (2+1)*8) ++ |.if JIT ++ | hotloop ++ |.endif + |->vm_IITERN: + | add RA, BASE, RA + | ld TAB:RB, -16(RA) +@@ -3817,8 +4160,26 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) + | li TMP1, BC_ITERC + | sb TMP3, -4+OFS_OP(PC) + | add PC, TMP0, TMP2 ++ |.if JIT ++ | lb TMP0, OFS_OP(PC) ++ | li TMP3, BC_ITERN ++ | lhu TMP2, OFS_RD(PC) ++ | bne TMP0, TMP3, >6 ++ |.endif + | sb TMP1, OFS_OP(PC) + | j <1 ++ |.if JIT ++ |6: // Unpatch JLOOP. ++ | ld TMP0, GL_J(trace)(GL) // Assumes J.trace in-reach relative to GL. ++ | slliw TMP2, TMP2, 3 ++ | add TMP0, TMP0, TMP2 ++ | ld TRACE:TMP2, 0(TMP0) ++ | lw TMP0, TRACE:TMP2->startins ++ | andi TMP0, TMP0, -256 ++ | or TMP0, TMP0, TMP1 ++ | sw TMP0, 0(PC) ++ | j <1 ++ |.endif + break; + + case BC_VARG: +@@ -3984,6 +4345,9 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) + /* -- Loops and branches ------------------------------------------------ */ + + case BC_FORL: ++ |.if JIT ++ | hotloop ++ |.endif + | // Fall through. Assumes BC_IFORL follows. + break; + +@@ -4104,6 +4468,9 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) + break; + + case BC_ITERL: ++ |.if JIT ++ | hotloop ++ |.endif + | // Fall through. Assumes BC_IITERL follows. + break; + +@@ -4128,6 +4495,12 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) + break; + + case BC_LOOP: ++ | // RA = base*8, RD = target (loop extent) ++ | // Note: RA/RD is only used by trace recorder to determine scope/extent ++ | // This opcode does NOT jump, it's only purpose is to detect a hot loop. ++ |.if JIT ++ | hotloop ++ |.endif + | // Fall through. Assumes BC_ILOOP follows. + break; + +@@ -4137,6 +4510,18 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) + break; + + case BC_JLOOP: ++ |.if JIT ++ | // RA = base*8 (ignored), RD = traceno*8 ++ | ld TMP0, GL_J(trace)(GL) // Assumes J.trace in-reach relative to GL. ++ | add TMP0, TMP0, RD ++ | // Traces on RISC-V don't store the trace number, so use 0. ++ | sd x0, GL->vmstate ++ | ld TRACE:TMP1, 0(TMP0) ++ | sd BASE, GL->jit_base // store Current JIT code L->base ++ | ld TMP1, TRACE:TMP1->mcode ++ | sd L, GL->tmpbuf.L ++ | jr TMP1 ++ |.endif + break; + + case BC_JMP: +@@ -4148,6 +4533,9 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) + /* -- Function headers -------------------------------------------------- */ + + case BC_FUNCF: ++ |.if JIT ++ | hotcall ++ |.endif + case BC_FUNCV: /* NYI: compiled vararg functions. */ + | // Fall through. Assumes BC_IFUNCF/BC_IFUNCV follow. + break; + +From 540be3090b14c99eda1aa0687336df1015ef16f9 Mon Sep 17 00:00:00 2001 +From: gns +Date: Wed, 6 Mar 2024 09:42:05 +0800 +Subject: [PATCH 15/22] riscv(interp): add DWARF info + +--- + src/vm_riscv64.dasc | 132 +++++++++++++++++++++++++++++++++++++++++++- + 1 file changed, 131 insertions(+), 1 deletion(-) + +diff --git a/src/vm_riscv64.dasc b/src/vm_riscv64.dasc +index db91430fa..09ac0cf05 100644 +--- a/src/vm_riscv64.dasc ++++ b/src/vm_riscv64.dasc +@@ -4679,5 +4679,135 @@ static int build_backend(BuildCtx *ctx) + /* Emit pseudo frame-info for all assembler functions. */ + static void emit_asm_debug(BuildCtx *ctx) + { +- ++ int fcofs = (int)((uint8_t *)ctx->glob[GLOB_vm_ffi_call] - ctx->code); ++ int i; ++ switch (ctx->mode) { ++ case BUILD_elfasm: ++ fprintf(ctx->fp, "\t.section .debug_frame,\"\",@progbits\n"); ++ fprintf(ctx->fp, ++ ".Lframe0:\n" ++ "\t.4byte .LECIE0-.LSCIE0\n" ++ ".LSCIE0:\n" ++ "\t.4byte 0xffffffff\n" ++ "\t.byte 0x1\n" ++ "\t.string \"\"\n" ++ "\t.uleb128 0x1\n" ++ "\t.sleb128 -4\n" ++ "\t.byte 1\n" /* Return address is in ra. */ ++ "\t.byte 0xc\n\t.uleb128 2\n\t.uleb128 0\n" /* def_cfa sp 0 */ ++ "\t.align 3\n" ++ ".LECIE0:\n\n"); ++ fprintf(ctx->fp, ++ ".LSFDE0:\n" ++ "\t.4byte .LEFDE0-.LASFDE0\n" ++ ".LASFDE0:\n" ++ "\t.4byte .Lframe0\n" ++ "\t.8byte .Lbegin\n" ++ "\t.8byte %d\n" ++ "\t.byte 0xe\n\t.uleb128 %d\n" ++ "\t.byte 0x81\n\t.uleb128 2*6\n" /* offset ra */, ++ fcofs, CFRAME_SIZE); ++ for (i = 27; i >= 18; i--) /* offset x27-x18 (s11-s2) */ ++ fprintf(ctx->fp, "\t.byte %d\n\t.uleb128 %d\n", 0x80+i, 2*(27-i+7)); ++ fprintf(ctx->fp, ++ "\t.byte 0x89\n\t.uleb128 2*17\n" /* offset x9 (s1) */ ++ "\t.byte 0x88\n\t.uleb128 2*18\n" /* offset x8 (s0/fp) */); ++ for (i = 27; i >= 18; i--) /* offset f31-f18 */ ++ fprintf(ctx->fp, "\t.byte %d\n\t.uleb128 %d\n", 0x80+32+i, 2*(27-i+19)); ++ fprintf(ctx->fp, ++ "\t.byte 0x89+32\n\t.uleb128 2*29\n" /* offset f9 (fs1) */ ++ "\t.byte 0x88+32\n\t.uleb128 2*30\n" /* offset f8 (fs0) */ ++ "\t.align 3\n" ++ ".LEFDE0:\n\n"); ++#if LJ_HASFFI ++ fprintf(ctx->fp, ++ ".LSFDE1:\n" ++ "\t.4byte .LEFDE1-.LASFDE1\n" ++ ".LASFDE1:\n" ++ "\t.4byte .Lframe0\n" ++ "\t.4byte lj_vm_ffi_call\n" ++ "\t.4byte %d\n" ++ "\t.byte 0x81\n\t.uleb128 2*1\n" /* offset ra */ ++ "\t.byte 0x92\n\t.uleb128 2*2\n" /* offset x18 */ ++ "\t.byte 0xd\n\t.uleb128 0x12\n" ++ "\t.align 3\n" ++ ".LEFDE1:\n\n", (int)ctx->codesz - fcofs); ++#endif ++#if !LJ_NO_UNWIND ++ fprintf(ctx->fp, "\t.section .eh_frame,\"a\",@progbits\n"); ++ fprintf(ctx->fp, ++ ".Lframe1:\n" ++ "\t.4byte .LECIE1-.LSCIE1\n" ++ ".LSCIE1:\n" ++ "\t.4byte 0\n" ++ "\t.byte 0x1\n" ++ "\t.string \"zPR\"\n" ++ "\t.uleb128 0x1\n" ++ "\t.sleb128 -4\n" ++ "\t.byte 1\n" /* Return address is in ra. */ ++ "\t.uleb128 6\n" /* augmentation length */ ++ "\t.byte 0x1b\n" ++ "\t.4byte lj_err_unwind_dwarf-.\n" ++ "\t.byte 0x1b\n" ++ "\t.byte 0xc\n\t.uleb128 2\n\t.uleb128 0\n" /* def_cfa sp 0 */ ++ "\t.align 2\n" ++ ".LECIE1:\n\n"); ++ fprintf(ctx->fp, ++ ".LSFDE2:\n" ++ "\t.4byte .LEFDE2-.LASFDE2\n" ++ ".LASFDE2:\n" ++ "\t.4byte .LASFDE2-.Lframe1\n" ++ "\t.4byte .Lbegin-.\n" ++ "\t.4byte %d\n" ++ "\t.uleb128 0\n" /* augmentation length */ ++ "\t.byte 0xe\n\t.uleb128 %d\n" ++ "\t.byte 0x81\n\t.uleb128 2*6\n", /* offset ra */ ++ fcofs, CFRAME_SIZE); ++ for (i = 27; i >= 18; i--) /* offset x27-x18 (s11-s2) */ ++ fprintf(ctx->fp, "\t.byte %d\n\t.uleb128 %d\n", 0x80+i, 2*(27-i+7)); ++ fprintf(ctx->fp, ++ "\t.byte 0x89\n\t.uleb128 2*17\n" /* offset x9 (s1) */ ++ "\t.byte 0x88\n\t.uleb128 2*18\n" /* offset x8 (s0/fp) */); ++ for (i = 27; i >= 18; i--) /* offset f31-f18 */ ++ fprintf(ctx->fp, "\t.byte %d\n\t.uleb128 %d\n", 0x80+32+i, 2*(27-i+19)); ++ fprintf(ctx->fp, ++ "\t.byte 0x89+32\n\t.uleb128 2*29\n" /* offset f9 (fs1) */ ++ "\t.byte 0x88+32\n\t.uleb128 2*30\n" /* offset f8 (fs0) */ ++ "\t.align 2\n" ++ ".LEFDE2:\n\n"); ++#if LJ_HASFFI ++ fprintf(ctx->fp, ++ ".Lframe2:\n" ++ "\t.4byte .LECIE2-.LSCIE2\n" ++ ".LSCIE2:\n" ++ "\t.4byte 0\n" ++ "\t.byte 0x1\n" ++ "\t.string \"zR\"\n" ++ "\t.uleb128 0x1\n" ++ "\t.sleb128 -4\n" ++ "\t.byte 1\n" /* Return address is in ra. */ ++ "\t.uleb128 1\n" /* augmentation length */ ++ "\t.byte 0x1b\n" ++ "\t.byte 0xc\n\t.uleb128 2\n\t.uleb128 0\n" /* def_cfa sp 0 */ ++ "\t.align 2\n" ++ ".LECIE2:\n\n"); ++ fprintf(ctx->fp, ++ ".LSFDE3:\n" ++ "\t.4byte .LEFDE3-.LASFDE3\n" ++ ".LASFDE3:\n" ++ "\t.4byte .LASFDE3- .Lframe2\n" ++ "\t.4byte lj_vm_ffi_call-.\n" ++ "\t.4byte %d\n" ++ "\t.uleb128 0\n" /* augmentation length */ ++ "\t.byte 0x81\n\t.uleb128 2*1\n" /* offset ra */ ++ "\t.byte 0x92\n\t.uleb128 2*2\n" /* offset x18 */ ++ "\t.byte 0xd\n\t.uleb128 0x12\n" ++ "\t.align 2\n" ++ ".LEFDE3:\n\n", (int)ctx->codesz - fcofs); ++#endif ++#endif ++ break; ++ default: ++ break; ++ } + } + +From 1d9374aec455253800737aca85e45b8c1e14c87f Mon Sep 17 00:00:00 2001 +From: gns +Date: Wed, 6 Mar 2024 09:42:34 +0800 +Subject: [PATCH 16/22] riscv(jit): add GDBJIT support + +--- + src/lj_gdbjit.c | 15 +++++++++++++++ + 1 file changed, 15 insertions(+) + +diff --git a/src/lj_gdbjit.c b/src/lj_gdbjit.c +index 9e68932a8..7d677c7c7 100644 +--- a/src/lj_gdbjit.c ++++ b/src/lj_gdbjit.c +@@ -306,6 +306,9 @@ enum { + #elif LJ_TARGET_MIPS + DW_REG_SP = 29, + DW_REG_RA = 31, ++#elif LJ_TARGET_RISCV64 ++ DW_REG_SP = 2, ++ DW_REG_RA = 1, + #else + #error "Unsupported target architecture" + #endif +@@ -383,6 +386,8 @@ static const ELFheader elfhdr_template = { + .machine = 20, + #elif LJ_TARGET_MIPS + .machine = 8, ++#elif LJ_TARGET_RISCV64 ++ .machine = 243, + #else + #error "Unsupported target architecture" + #endif +@@ -591,6 +596,16 @@ static void LJ_FASTCALL gdbjit_ehframe(GDBJITctx *ctx) + for (i = 23; i >= 16; i--) { DB(DW_CFA_offset|i); DUV(26-i); } + for (i = 30; i >= 20; i -= 2) { DB(DW_CFA_offset|32|i); DUV(42-i); } + } ++#elif LJ_TARGET_RISCV64 ++ { ++ int i; ++ for (i = 27; i >= 18; i--) { DB(DW_CFA_offset|i); DUV(27-i+7); } ++ DB(DW_CFA_offset|9); DUV(17); ++ DB(DW_CFA_offset|8); DUV(18); ++ for (i = 27; i >= 18; i--) { DB(DW_CFA_offset|32|i); DUV(27-i+19); } ++ DB(DW_CFA_offset|32|9); DUV(29); ++ DB(DW_CFA_offset|32|8); DUV(30); ++ } + #else + #error "Unsupported target architecture" + #endif + +From 8863447ac976568e7ed96c81c7b1dcac97c6df6b Mon Sep 17 00:00:00 2001 +From: gns +Date: Wed, 6 Mar 2024 09:43:46 +0800 +Subject: [PATCH 17/22] riscv(support,linux): add Linux specfic icache sync + codepath + +--- + src/lj_mcode.c | 17 +++++++++++++++++ + 1 file changed, 17 insertions(+) + +diff --git a/src/lj_mcode.c b/src/lj_mcode.c +index 0a9ced697..295c64af5 100644 +--- a/src/lj_mcode.c ++++ b/src/lj_mcode.c +@@ -38,6 +38,12 @@ + void sys_icache_invalidate(void *start, size_t len); + #endif + ++#if LJ_TARGET_RISCV64 && LJ_TARGET_LINUX ++#include ++#include ++#include ++#endif ++ + /* Synchronize data/instruction cache. */ + void lj_mcode_sync(void *start, void *end) + { +@@ -52,6 +58,17 @@ void lj_mcode_sync(void *start, void *end) + sys_icache_invalidate(start, (char *)end-(char *)start); + #elif LJ_TARGET_PPC + lj_vm_cachesync(start, end); ++#elif LJ_TARGET_RISCV64 && LJ_TARGET_LINUX ++#if (defined(__GNUC__) || defined(__clang__)) ++ __asm__ volatile("fence rw, rw"); ++#else ++ lj_vm_fence_rw_rw(); ++#endif ++#ifdef __GLIBC__ ++ __riscv_flush_icache(start, end, 0); ++#else ++ syscall(__NR_riscv_flush_icache, start, end, 0UL); ++#endif + #elif defined(__GNUC__) || defined(__clang__) + __clear_cache(start, end); + #else + +From 526e8aafa24e5fe051490f69b2271ce1e4dd717e Mon Sep 17 00:00:00 2001 +From: gns +Date: Wed, 6 Mar 2024 09:47:58 +0800 +Subject: [PATCH 18/22] riscv(support,linux): make mremap() non-moving due to + VA space woes + +--- + src/lj_alloc.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/src/lj_alloc.c b/src/lj_alloc.c +index cb704f7b3..9039d8053 100644 +--- a/src/lj_alloc.c ++++ b/src/lj_alloc.c +@@ -365,7 +365,7 @@ static void *CALL_MREMAP_(void *ptr, size_t osz, size_t nsz, int flags) + #define CALL_MREMAP(addr, osz, nsz, mv) CALL_MREMAP_((addr), (osz), (nsz), (mv)) + #define CALL_MREMAP_NOMOVE 0 + #define CALL_MREMAP_MAYMOVE 1 +-#if LJ_64 && (!LJ_GC64 || LJ_TARGET_ARM64) ++#if LJ_64 && (!LJ_GC64 || LJ_TARGET_ARM64 || LJ_TARGET_RISCV64) + #define CALL_MREMAP_MV CALL_MREMAP_NOMOVE + #else + #define CALL_MREMAP_MV CALL_MREMAP_MAYMOVE + +From 8cc27d379441ff8c2ed057d9353854bd6f073e83 Mon Sep 17 00:00:00 2001 +From: gns +Date: Wed, 6 Mar 2024 09:48:43 +0800 +Subject: [PATCH 19/22] riscv(misc): add disassmbler support + +--- + src/jit/dis_riscv.lua | 979 ++++++++++++++++++++++++++++++++++++++++ + src/jit/dis_riscv64.lua | 16 + + 2 files changed, 995 insertions(+) + create mode 100644 src/jit/dis_riscv.lua + create mode 100644 src/jit/dis_riscv64.lua + +diff --git a/src/jit/dis_riscv.lua b/src/jit/dis_riscv.lua +new file mode 100644 +index 000000000..8de563a72 +--- /dev/null ++++ b/src/jit/dis_riscv.lua +@@ -0,0 +1,979 @@ ++------------------------------------------------------------------------------ ++-- LuaJIT RISC-V disassembler module. ++-- ++-- Copyright (C) 2005-2025 Mike Pall. All rights reserved. ++-- Released under the MIT license. See Copyright Notice in luajit.h ++-- ++-- Contributed by Milos Poletanovic from Syrmia.com. ++-- Contributed by gns from PLCT Lab, ISCAS. ++------------------------------------------------------------------------------ ++-- This is a helper module used by the LuaJIT machine code dumper module. ++-- ++-- It disassembles most standard RISC-V instructions. ++-- Mode is little-endian ++------------------------------------------------------------------------------ ++ ++local type = type ++local byte, format = string.byte, string.format ++local match, gmatch = string.match, string.gmatch ++local concat = table.concat ++local bit = require("bit") ++local band, bor, tohex = bit.band, bit.bor, bit.tohex ++local lshift, rshift, arshift = bit.lshift, bit.rshift, bit.arshift ++local jit = require("jit") ++ ++local jstat = { jit.status() } ++local function is_opt_enabled(opt) ++ for _, v in ipairs(jstat) do ++ if v == opt then ++ return true ++ end ++ end ++ return false ++end ++local xthead = is_opt_enabled("XThead") ++ ++------------------------------------------------------------------------------ ++-- Opcode maps ++------------------------------------------------------------------------------ ++ ++--RVC32 extension ++ ++local map_quad0 = { ++ shift = 13, mask = 7, ++ [0] = "c.addi4spnZW", "c.fldNMh", "c.lwZMn", "c.flwNMn", ++ false, "c.fsdNMh", "c.swZMn", "c.fswNMn" ++} ++ ++local map_sub2quad1 = { ++ shift = 5, mask = 3, ++ [0] = "c.subMZ", "c.xorMZ", "c.orMZ", "c.andMZ" ++} ++ ++local map_sub1quad1 = { ++ shift = 10, mask = 3, ++ [0] = "c.srliM1", "c.sraiM1", "c.andiMx", map_sub2quad1 ++} ++ ++local map_quad1 = { ++ shift = 13, mask = 7, ++ [0] = { ++ shift = 7, mask = 31, ++ [0] = "c.nop", _ = "c.addiDx" ++ }, ++ [1] = "c.jalT", [2] = "c.liDx", ++ [3] = { ++ shift = 7, mask = 31, ++ [0] = "c.luiDK", [1] = "c.luiDK", [2] = "c.addi16spX", ++ _ = "c.luiDK" ++ }, ++ [4] = map_sub1quad1, [5] = "c.jT", [6] = "c.beqzMq", [7] = "c.bnezMq" ++} ++ ++local map_sub1quad2 = { ++ shift = 12, mask = 1, ++ [0] = { ++ shift = 2, mask = 31, ++ [0] = "c.jrD", _ = "c.mvDE" ++ }, ++ [1] = { ++ shift = 2, mask = 31, ++ [0] = { ++ shift = 7, mask = 31, ++ [0] = "c.ebreak", _ = "c.jalrD" ++ }, ++ _ = "c.addDE" ++ } ++} ++ ++local map_quad2 = { ++ shift = 13, mask = 7, ++ [0] = "c.slliD1", [1] = "c.fldspFQ",[2] = "c.lwspDY", [3] = "c.flwspFY", ++ [4] = map_sub1quad2, [5] = "c.fsdspVt", [6] = "c.swspEu", [7] = "c.fswspVu" ++} ++ ++local map_compr = { ++ [0] = map_quad0, map_quad1, map_quad2 ++} ++ ++--RV32M ++local map_mext = { ++ shift = 12, mask = 7, ++ [0] = "mulDRr", "mulhDRr", "mulhsuDRr", "mulhuDRr", ++ "divDRr", "divuDRr", "remDRr", "remuDRr" ++} ++ ++--RV64M ++local map_mext64 = { ++ shift = 12, mask = 7, ++ [0] = "mulwDRr", [4] = "divwDRr", [5] = "divuwDRr", [6] = "remwDRr", ++ [7] = "remuwDRr" ++} ++ ++--RV32F, RV64F, RV32D, RV64D ++local map_fload = { ++ shift = 12, mask = 7, ++ [2] = "flwFL", [3] = "fldFL" ++} ++ ++local map_fstore = { ++ shift = 12, mask = 7, ++ [2] = "fswSg", [3] = "fsdSg" ++} ++ ++local map_fmadd = { ++ shift = 25, mask = 3, ++ [0] = "fmadd.sFGgHo", "fmadd.dFGgHo" ++} ++ ++local map_fmsub = { ++ shift = 25, mask = 3, ++ [0] = "fmsub.sFGgHo", "fmsub.dFGgHo" ++} ++ ++local map_fnmsub = { ++ shift = 25, mask = 3, ++ [0] = "fnmsub.sFGgHo", "fnmsub.dFGgHo" ++} ++ ++local map_fnmadd = { ++ shift = 25, mask = 3, ++ [0] = "fnmadd.sFGgHo", "fnmadd.dFGgHo" ++} ++ ++local map_fsgnjs = { ++ shift = 12, mask = 7, ++ [0] = "fsgnj.s|fmv.sFGg6", "fsgnjn.s|fneg.sFGg6", "fsgnjx.s|fabs.sFGg6" ++} ++ ++local map_fsgnjd = { ++ shift = 12, mask = 7, ++ [0] = "fsgnj.d|fmv.dFGg6", "fsgnjn.d|fneg.dFGg6", "fsgnjx.d|fabs.dFGg6" ++} ++ ++local map_fms = { ++ shift = 12, mask = 7, ++ [0] = "fmin.sFGg", "fmax.sFGg", "fminm.sFGg", "fmaxm.sFGg" ++} ++ ++local map_fmd = { ++ shift = 12, mask = 7, ++ [0] = "fmin.dFGg", "fmax.dFGg", "fminm.dFGg", "fmaxm.dFGg" ++} ++ ++local map_fcomps = { ++ shift = 12, mask = 7, ++ [0] = "fle.sDGg", "flt.sDGg", "feq.sDGg", ++ [4] = "fleq.sDGg", "fltq.sDGg" ++} ++ ++local map_fcompd = { ++ shift = 12, mask = 7, ++ [0] = "fle.dDGg", "flt.dDGg", "feq.dDGg", ++ [4] = "fleq.dDGg", "fltq.dDGg" ++} ++ ++local map_fcvtwls = { ++ shift = 20, mask = 31, ++ [0] = "fcvt.w.sDGo", "fcvt.wu.sDGo", "fcvt.l.sDGo", "fcvt.lu.sDGo" ++} ++ ++local map_fcvtwld = { ++ shift = 20, mask = 31, ++ [0] = "fcvt.w.dDGo", "fcvt.wu.dDGo", "fcvt.l.dDGo", "fcvt.lu.dDGo", ++ [8] = { ++ shift = 12, mask = 7, ++ [1] = "fcvtmodw.dDG" ++ } ++} ++ ++local map_fcvts = { ++ shift = 20, mask = 31, ++ [0] = "fcvt.s.wFRo", "fcvt.s.wuFRo", "fcvt.s.lFRo", "fcvt.s.luFRo" ++} ++ ++local map_fcvtd = { ++ shift = 20, mask = 31, ++ [0] = "fcvt.d.wFRo", "fcvt.d.wuFRo", "fcvt.d.lFRo", "fcvt.d.luFRo" ++} ++ ++local map_fcvtsd = { ++ shift = 20, mask = 31, ++ [0] = "fcvt.s.dFGo", ++ [4] = "fround.sFGo", [5] = "froundnx.sFGo" ++} ++ ++local map_fcvtds = { ++ shift = 20, mask = 31, ++ [0] = "fcvt.d.sFGo", ++ [4] = "fround.dFGo", [5] = "froundnx.dFGo" ++} ++ ++local map_fmvwx = { ++ shift = 20, mask = 31, ++ [0] = "fmv.w.xFR", [1] = "fli.sFy" ++} ++ ++local map_fmvdx = { ++ shift = 20, mask = 31, ++ [0] = "fmv.d.xFR", [1] = "fli.dFy" ++} ++ ++local map_fext = { ++ shift = 25, mask = 127, ++ [0] = "fadd.sFGgo", [1] = "fadd.dFGgo", [4] = "fsub.sFGgo", [5] = "fsub.dFGgo", ++ [8] = "fmul.sFGgo", [9] = "fmul.dFGgo", [12] = "fdiv.sFGgo", [13] = "fdiv.dFGgo", ++ [16] = map_fsgnjs, [17] = map_fsgnjd, [20] = map_fms, [21] = map_fmd, ++ [32] = map_fcvtsd, [33] = map_fcvtds,[44] = "fsqrt.sFGo", [45] = "fsqrt.dFGo", ++ [80] = map_fcomps, [81] = map_fcompd, [96] = map_fcvtwls, [97] = map_fcvtwld, ++ [104] = map_fcvts, [105] = map_fcvtd, ++ [112] = { ++ shift = 12, mask = 7, ++ [0] = "fmv.x.wDG", "fclass.sDG" ++ }, ++ [113] = { ++ shift = 12, mask = 7, ++ [0] = "fmv.x.dDG", "fclass.dDG" ++ }, ++ [120] = map_fmvwx, [121] = map_fmvdx ++} ++ ++--RV32A, RV64A ++local map_aext = { ++ shift = 27, mask = 31, ++ [0] = { ++ shift = 12, mask = 7, ++ [2] = "amoadd.wDrO", [3] = "amoadd.dDrO" ++ }, ++ { ++ shift = 12, mask = 7, ++ [2] = "amoswap.wDrO", [3] = "amoswap.dDrO" ++ }, ++ { ++ shift = 12, mask = 7, ++ [2] = "lr.wDO", [3] = "lr.dDO" ++ }, ++ { ++ shift = 12, mask = 7, ++ [2] = "sc.wDrO", [3] = "sc.dDrO" ++ }, ++ { ++ shift = 12, mask = 7, ++ [2] = "amoxor.wDrO", [3] = "amoxor.dDrO" ++ }, ++ [8] = { ++ shift = 12, mask = 7, ++ [2] = "amoor.wDrO", [3] = "amoor.dDrO" ++ }, ++ [12] = { ++ shift = 12, mask = 7, ++ [2] = "amoand.wDrO", [3] = "amoand.dDrO" ++ }, ++ [16] = { ++ shift = 12, mask = 7, ++ [2] = "amomin.wDrO", [3] = "amomin.dDrO" ++ }, ++ [20] = { ++ shift = 12, mask = 7, ++ [2] = "amomax.wDrO", [3] = "amomax.dDrO" ++ }, ++ [24] = { ++ shift = 12, mask = 7, ++ [2] = "amominu.wDrO", [3] = "amominu.dDrO" ++ }, ++ [28] = { ++ shift = 12, mask = 7, ++ [2] = "amomaxu.wDrO", [3] = "amomaxu.dDrO" ++ }, ++} ++ ++-- RV32I, RV64I ++local map_load = { ++ shift = 12, mask = 7, ++ [0] = "lbDL", "lhDL", "lwDL", "ldDL", ++ "lbuDL", "lhuDL", "lwuDL" ++} ++ ++local map_opimm = { ++ shift = 12, mask = 7, ++ [0] = { ++ shift = 7, mask = 0x1ffffff, ++ [0] = "nop", _ = "addi|li|mvDR0I2" ++ }, ++ { ++ shift = 25, mask = 127, ++ [48] = { ++ shift = 20, mask = 31, ++ [4] = "sext.bDR", [5] = "sext.hDR" ++ }, ++ _ = "slliDRi", ++ }, "sltiDRI", "sltiu|seqzDRI5", ++ "xori|notDRI4", ++ { ++ shift = 26, mask = 63, ++ [0] = "srliDRi", [16] = "sraiDRi", [24] = "roriDRi", ++ [26] = { ++ shift = 20, mask = 63, ++ [56] = "rev8DR" ++ } ++ }, ++ "oriDRI", "andiDRI" ++} ++ ++local map_branch = { ++ shift = 12, mask = 7, ++ [0] = "beq|beqzRr0B", "bne|bnezRr0B" , false, false, ++ "blt|bgtz|bltzR0r2B", "bge|blez|bgezR0r2B", "bltuRrB", "bgeuRrB" ++} ++ ++local map_store = { ++ shift = 12, mask = 7, ++ [0] = "sbSr", "shSr", "swSr", "sdSr" ++} ++ ++local map_op = { ++ shift = 25, mask = 127, ++ [0] = { ++ shift = 12, mask = 7, ++ [0] = "addDRr", "sllDRr", "slt|sgtz|sltzDR0r2", "sltu|snezDR0r", ++ "xorDRr", "srlDRr", "orDRr", "andDRr" ++ }, ++ [1] = map_mext, ++ [4] = { ++ ++ }, ++ [5] = { -- Zbb ++ shift = 12, mask = 7, ++ [4] = "minDRr", [5] = "minuDRr", [6] = "maxDRr", [7] = "maxuDRr" ++ }, ++ [7] = { -- Zicond ++ shift = 12, mask = 7, ++ [5] = "czero.eqzDRr", [7] = "czero.nezDRr" ++ }, ++ [16] = { -- Zba ++ shift = 12, mask = 7, ++ [2] = "sh1addDRr", [4] = "sh2addDRr", [6] = "sh3addDRr" ++ }, ++ [32] = { -- Zbb ++ shift = 12, mask = 7, ++ [0] = "sub|negDR0r", [4] = "xnorDRr", [5] = "sraDRr", [6] = "ornDRr", [7] = "andnDRr" ++ }, ++ [48] = { -- Zbb ++ shift = 12, mask = 7, ++ [1] = "rolDRr", [5] = "rorDRr" ++ } ++} ++ ++--- 64I ++local map_opimm32 = { ++ shift = 12, mask = 7, ++ [0] = "addiw|sext.wDRI0", "slliwDRi", ++ [2] = { -- Zba ++ shift = 25, mask = 127, ++ [1] = "slli.uwDRi" ++ }, ++ [5] = { -- 64I ++ shift = 25, mask = 127, ++ [0] = "srliwDRi", [32] = "sraiwDRi", [48] = "roriwDRi" ++ }, ++ [48] = { -- Zbb ++ shift = 25, mask = 127, ++ [5] = "roriwDRi" ++ } ++} ++ ++local map_op32 = { ++ shift = 25, mask = 127, ++ [0] = { -- 64I ++ shift = 12, mask = 7, ++ [0] = "addwDRr", [1] = "sllwDRr", [5] = "srlwDRr" ++ }, ++ [1] = map_mext64, ++ [4] = { -- Zba & Zbb ++ shift = 12, mask = 7, ++ [0] = "add.uw|zext.w|DRr0", [4] = "zext.hDRr" ++ }, ++ [16] = { -- Zba ++ shift = 12, mask = 7, ++ [2] = "sh1add.uw", [4] = "sh2add.uw", [6] = "sh3add.uw" ++ }, ++ [32] = { -- 64I ++ shift = 12, mask = 7, ++ [0] = "subw|negwDR0r", [5] = "srawDRr" ++ }, ++ [48] = { -- Zbb ++ shift = 12, mask = 7, ++ [1] = "rolwDRr", [5] = "rorwDRr" ++ } ++} ++ ++local map_ecabre = { ++ shift = 12, mask = 7, ++ [0] = { ++ shift = 20, mask = 4095, ++ [0] = "ecall", "ebreak" ++ } ++} ++ ++local map_fence = { ++ shift = 12, mask = 1, ++ [0] = "fence", --"fence.i" ZIFENCEI EXTENSION ++} ++ ++local map_jalr = { ++ shift = 7, mask = 0x1ffffff, ++ _ = "jalr|jrDRI7", [256] = "ret" ++} ++ ++local map_xthead_custom0 = { ++ shift = 12, mask = 7, ++ [1] = { -- Arithmetic ++ shift = 27, mask = 31, ++ [0] = "th.addslDRrv", ++ [2] = { ++ shift = 26, mask = 63, ++ [4] = "th.srriDRi", ++ [5] = { ++ shift = 25, mask = 127, ++ [10] = "th.srriwDRi" ++ } ++ }, ++ [4] = { -- XTheadMac ++ shift = 25, mask = 3, ++ [0] = "th.mulaDRr", "th.mulsDRr", "th.mulawDRr", "th.mulswDRr" ++ }, ++ [5] = { -- XTheadMac ++ shift = 25, mask = 3, ++ [0] = "th.mulahDRr", "th.mulshDRr" ++ }, ++ [8] = { -- XTheadCondMov ++ shift = 25, mask = 3, ++ [0] = "th.mveqzDRr", "th.mvnezDRr" ++ }, ++ [16] = { -- XTheadBb ++ shift = 20, mask = 31, ++ [0] = { ++ shift = 25, mask = 3, ++ [0] = "th.tstnbzDRi", "th.revDR", "th.ff0DR", "th.ff1DR" ++ } ++ }, ++ [17] = { -- XTheadBb ++ shift = 26, mask = 1, ++ [0] = "th.tstDRi" ++ }, ++ [18] = { -- XTheadBb ++ shift = 20, mask = 31, ++ [0] = { ++ shift = 25, mask = 3, ++ [0] = "th.revwDR" ++ } ++ } ++ }, ++ [2] = "th.extDRji", [3] = "th.extuDRji", ++ { -- MemLoad ++ shift = 29, mask = 7, ++ [7] = { -- XTheadMemPair ++ shift = 25, mask = 3, ++ [0] = "th.lwdDrP", [2] = "th.lwudDrP", "th.lddDrP" ++ } ++ }, ++ { -- MemStore ++ shift = 29, mask = 7, ++ [7] = { -- XTheadMemPair ++ shift = 25, mask = 3, ++ [0] = "th.swdDrP", [3] = "th.sddDrP" ++ } ++ } ++} ++ ++local map_custom0 = xthead and map_xthead_custom0 or nil ++ ++local map_pri = { ++ [3] = map_load, [7] = map_fload, [11] = map_custom0, [15] = map_fence, [19] = map_opimm, ++ [23] = "auipcDA", [27] = map_opimm32, ++ [35] = map_store, [39] = map_fstore, [47] = map_aext, [51] = map_op, ++ [55] = "luiDU", [59] = map_op32, [67] = map_fmadd, [71] = map_fmsub, ++ [75] = map_fnmsub, [99] = map_branch, [79] = map_fnmadd, [83] = map_fext, ++ [103] = map_jalr, [111] = "jal|j|D0J", [115] = map_ecabre ++} ++ ++------------------------------------------------------------------------------ ++ ++local map_gpr = { ++ [0] = "zero", "ra", "sp", "gp", "tp", "x5", "x6", "x7", ++ "x8", "x9", "x10", "x11", "x12", "x13", "x14", "x15", ++ "x16", "x17", "x18", "x19", "x20", "x21", "x22", "x23", ++ "x24", "x25", "x26", "x27", "x28", "x29", "x30", "x31", ++} ++ ++local map_fgpr = { ++ [0] = "f0", "f1", "f2", "f3", "f4", "f5", "f6", "f7", ++ "f8", "f9", "f10", "f11", "f12", "f13", "f14", "f15", ++ "f16", "f17", "f18", "f19", "f20", "f21", "f22", "f23", ++ "f24", "f25", "f26", "f27", "f28", "f29", "f30", "f31", ++} ++ ++local map_rm = { ++ [0] = "rne", "rtz", "rdn", "rup", "rmm", [7] = "dyn" ++} ++ ++local map_fli = { ++ [0] = "-1.0", ++ "min", ++ "0x1p-16", "0x1p-15", "0x1p-8", "0x1p-7", ++ "0.0625", "0.125", ++ "0.25", "0.3125", "0.375", "0.4375", ++ "0.5", "0.625", "0.75", "0.875", ++ "1.0", "1.25", "1.5", "1.75", ++ "2.0", "2.5", "3.0", ++ "4.0", "8.0", "16.0", "128.0", "256.0", ++ "32768.0", "65536.0", "inf", "nan" ++} ++ ++------------------------------------------------------------------------------ ++ ++-- Output a nicely formatted line with an opcode and operands. ++local function putop(ctx, text, operands) ++ local pos = ctx.pos ++ local extra = "" ++ if ctx.rel then ++ local sym = ctx.symtab[ctx.rel] ++ if sym then extra = "\t->"..sym end ++ end ++ if ctx.hexdump > 0 then ++ ctx.out:write((format("%08x %s %-7s %s%s\n", ++ ctx.addr+pos, tohex(ctx.op), text, concat(operands, ","), extra))) ++ else ++ ctx.out(format("%08x %-7s %s%s\n", ++ ctx.addr+pos, text, concat(operands, ", "), extra)) ++ end ++ local pos = ctx.pos ++ local first_byte = byte(ctx.code, ctx.pos+1) ++ --Examine if the next instruction is 16-bits or 32-bits ++ if(band(first_byte, 3) < 3) then ++ ctx.pos = pos + 2 ++ else ++ ctx.pos = pos + 4 ++ end ++end ++ ++-- Fallback for unknown opcodes. ++local function unknown(ctx) ++ return putop(ctx, ".long", { "0x"..tohex(ctx.op) }) ++end ++ ++local function get_le(ctx) ++ local pos = ctx.pos ++ --Examine if the next instruction is 16-bits or 32-bits ++ local first_byte = byte(ctx.code, pos+1) ++ if(band(first_byte, 3) < 3) then --checking first two bits of opcode ++ local b0, b1 = byte(ctx.code, pos+1, pos+2) ++ return bor(lshift(b1, 8), b0) ++ else ++ local b0, b1, b2, b3 = byte(ctx.code, pos+1, pos+4) ++ return bor(lshift(b3, 24), lshift(b2, 16), lshift(b1, 8), b0) ++ end ++end ++ ++local function parse_W(opcode) ++ local part1 = band(rshift(opcode, 7), 15) --9:6 ++ local part2 = band(rshift(opcode, 11), 3) --5:4 ++ local part3 = band(rshift(opcode, 5), 1)--3 ++ local part4 = band(rshift(opcode, 6), 1)--2 ++ return bor(lshift(0, 31), lshift(part1, 6) , lshift(part2, 4), ++ lshift(part3, 3), lshift(part4, 2)) ++end ++ ++local function parse_x(opcode) ++ local part1 = band(rshift(opcode, 12), 1) --5 ++ local part2 = band(rshift(opcode, 2), 31) --4:0 ++ if(part1 == 1) then ++ return bor(lshift(1, 31), lshift(0x1ffffff, 6), lshift(part1, 5), part2) ++ else ++ return bor(lshift(0, 31), lshift(part1, 5), part2) ++ end ++end ++ ++local function parse_X(opcode) ++ local part1 = band(rshift(opcode, 12), 1) --12 ++ local part2 = band(rshift(opcode, 3), 3) --8:7 ++ local part3 = band(rshift(opcode, 5), 1) --6 ++ local part4 = band(rshift(opcode, 2), 1) --5 ++ local part5 = band(rshift(opcode, 6), 1) --4 ++ if(part1 == 1) then ++ return bor(lshift(1, 31), lshift(0x3fffff, 9), lshift(part2, 7), ++ lshift(part3, 6), lshift(part4, 5), lshift(part5, 4)) ++ else ++ return bor(lshift(0, 31), lshift(part2, 7), lshift(part3, 6), ++ lshift(part4, 5), lshift(part5, 4)) ++ end ++end ++ ++local function parse_S(opcode) ++ local part1 = band(rshift(opcode, 25), 127) --11:5 ++ local sign = band(rshift(part1, 6), 1) ++ local part2 = band(rshift(opcode, 7), 31) --4:0 ++ if (sign == 1) then ++ return bor(lshift(1, 31), lshift(0x7ffff, 12), lshift(part1, 5), part2) ++ else ++ return bor(lshift(0, 31), lshift(part1, 5), part2) ++ end ++end ++ ++local function parse_B(opcode) ++ local part1 = band(rshift(opcode, 7), 1) --11 ++ local part2 = band(rshift(opcode, 25), 63) --10:5 ++ local part3 = band(rshift(opcode, 8), 15) -- 4 : 1 ++ if (part1 == 1) then ++ return bor(lshift(1, 31), lshift(0x7ffff, 12), lshift(part1, 11), ++ lshift(part2, 5), lshift(part3, 1), 0) ++ else ++ return bor(lshift(0, 31), lshift(part1, 11), lshift(part2, 5), ++ lshift(part3, 1), 0) ++ end ++end ++ ++local function parse_q(opcode) ++ local part1 = band(rshift(opcode, 12), 1) --8 ++ local part2 = band(rshift(opcode, 5), 3) --7:6 ++ local part3 = band(rshift(opcode, 2), 1) --5 ++ local part4 = band(rshift(opcode, 10), 3) --4:3 ++ local part5 = band(rshift(opcode, 3), 3) --2:1 ++ if(part1 == 1) then ++ return bor(lshift(1, 31), lshift(0x7fffff, 8), lshift(part2, 6), ++ lshift(part3, 5), lshift(part4, 3), lshift(part5, 1)) ++ else ++ return bor(lshift(0, 31), lshift(part2, 6), lshift(part3, 5), ++ lshift(part4, 3), lshift(part5, 1)) ++ end ++end ++ ++local function parse_J(opcode) ++ local part1 = band(rshift(opcode, 31), 1) --20 ++ local part2 = band(rshift(opcode, 12), 255) -- 19:12 ++ local part3 = band(rshift(opcode, 20), 1) --11 ++ local part4 = band(rshift(opcode, 21), 1023) --10:1 ++ if(part1 == 1) then ++ return bor(lshift(1, 31), lshift(0x7ff, 20), lshift(part2, 12), ++ lshift(part3, 11), lshift(part4, 1)) ++ else ++ return bor(lshift(0, 31), lshift(0, 20), lshift(part2, 12), ++ lshift(part3, 11), lshift(part4, 1)) ++ end ++end ++ ++local function parse_T(opcode) ++ local part1 = band(rshift(opcode, 12), 1) --11 ++ local part2 = band(rshift(opcode, 8), 1) --10 ++ local part3 = band(rshift(opcode, 9), 3)--9:8 ++ local part4 = band(rshift(opcode, 6), 1) --7 ++ local part5 = band(rshift(opcode, 7), 1) -- 6 ++ local part6 = band(rshift(opcode, 2), 1) --5 ++ local part7 = band(rshift(opcode, 11), 1) --4 ++ local part8 = band(rshift(opcode, 3), 7) --3:1 ++ if(part1 == 1) then ++ return bor(lshift(1, 31), lshift(0x7ffff, 12), lshift(part1, 11), ++ lshift(part2, 10), lshift(part3, 8), lshift(part4, 7), ++ lshift(part5, 6), lshift(part6, 5), lshift(part7, 4), ++ lshift(part8, 1)) ++ else ++ return bor(lshift(0, 31), lshift(part1, 11), lshift(part2, 10), ++ lshift(part3, 8), lshift(part4, 7), lshift(part5, 6), ++ lshift(part6, 5), lshift(part7, 4), lshift(part8, 1)) ++ end ++end ++ ++local function parse_K(opcode) ++ local part1 = band(rshift(opcode, 12), 1) --5 17 ++ local part2 = band(rshift(opcode, 2), 31) --4:0 16:12 ++ if(part1 == 1) then ++ return bor(lshift(0, 31), lshift(0x7fff, 5), part2) ++ else ++ return bor(lshift(0, 31), lshift(part1, 5), part2) ++ end ++end ++ ++-- Disassemble a single instruction. ++local function disass_ins(ctx) ++ local op = ctx:get() ++ local operands = {} ++ local last = nil ++ ctx.op = op ++ ctx.rel =nil ++ ++ local opat = 0 ++ --for compressed instructions ++ if(band(op, 3) < 3) then ++ opat = ctx.map_compr[band(op, 3)] ++ while type(opat) ~= "string" do ++ if not opat then return unknown(ctx) end ++ local test = band(rshift(op, opat.shift), opat.mask) ++ opat = opat[band(rshift(op, opat.shift), opat.mask)] or opat._ ++ end ++ else ++ opat = ctx.map_pri[band(op,127)] ++ while type(opat) ~= "string" do ++ if not opat then return unknown(ctx) end ++ opat = opat[band(rshift(op, opat.shift), opat.mask)] or opat._ ++ end ++ end ++ local name, pat = match(opat, "^([a-z0-9_.]*)(.*)") ++ local altname, pat2 = match(pat, "|([a-z0-9_.|]*)(.*)") ++ local a1, a2 = 0 ++ if altname then ++ pat = pat2 ++ end ++ ++ local alias_done = false --variable for the case of 2 pseudoinstructions, if both parameters are x0, 0 ++ ++ for p in gmatch(pat, ".") do ++ local x = nil ++ if p == "D" then ++ x = map_gpr[band(rshift(op, 7), 31)] ++ elseif p == "F" then ++ x = map_fgpr[band(rshift(op, 7), 31)] ++ elseif p == "R" then ++ x = map_gpr[band(rshift(op, 15), 31)] ++ elseif p == "G" then ++ x = map_fgpr[band(rshift(op, 15), 31)] ++ elseif p == "r" then ++ x = map_gpr[band(rshift(op, 20), 31)] ++ if(name == "sb" or name == "sh" or name == "sw" or name == "sd") then ++ local temp = last --because of the diffrent order of the characters ++ operands[#operands] = x ++ x = temp ++ end ++ elseif p == "g" then ++ x = map_fgpr[band(rshift(op, 20), 31)] ++ if(name == "fsw" or name == "fsd") then ++ local temp = last ++ operands[#operands] = x ++ x = temp ++ end ++ elseif p == "Z" then ++ x = map_gpr[8 + band(rshift(op, 2), 7)] ++ elseif p == "N" then ++ x = map_fgpr[8 + band(rshift(op, 2), 7)] ++ elseif p == "M" then ++ x = map_gpr[8 + band(rshift(op, 7), 7)] ++ elseif p == "E" then ++ x = map_gpr[band(rshift(op, 2), 31)] ++ elseif p == "W" then ++ local uimm = parse_W(op) ++ x = format("%s,%d", "sp", uimm) ++ elseif p == "x" then ++ x = parse_x(op) ++ elseif p == "h" then ++ local part1 = band(rshift(op, 5), 3) --7:6 ++ local part2 = band(rshift(op, 10), 7) --5:3 ++ local uimm = bor(lshift(0, 31), lshift(part1, 6) , lshift(part2, 3)) ++ operands[#operands] = format("%d(%s)", uimm, last) ++ elseif p == "X" then ++ local imm = parse_X(op) ++ x = format("%s,%d", "sp", imm) ++ elseif p == "O" then ++ x = format("(%s)", map_gpr[band(rshift(op, 15), 31)]) ++ elseif p == "H" then ++ x = map_fgpr[band(rshift(op, 27), 31)] ++ elseif p == "L" then ++ local register = map_gpr[band(rshift(op, 15), 31)] ++ local disp = arshift(op, 20) ++ x = format("%d(%s)", disp, register) ++ elseif p == "P" then -- XTheadMemPair ++ local register = map_gpr[band(rshift(op, 15), 31)] ++ local disp = band(arshift(op, 25), 3) ++ local isword = bxor(band(arshift(op, 26), 1), 1) ++ x = format("(%s), %d, %d", register, disp, isword and 3 or 4) ++ elseif p == "I" then ++ x = arshift(op, 20) ++ --different for jalr ++ if(name == "jalr") then ++ local reg = map_gpr[band(rshift(op, 15), 31)] ++ if(ctx.reltab[reg] == nil) then ++ operands[#operands] = format("%d(%s)", x, last) ++ else ++ local target = ctx.reltab[reg] + x ++ operands[#operands] = format("%d(%s) #0x%08x", x, last, target) ++ ctx.rel = target ++ ctx.reltab[reg] = nil --assume no reuses of the register ++ end ++ x = nil --not to add additional operand ++ end ++ elseif p == "i" then ++ --both for RV32I AND RV64I ++ local value = band(arshift(op, 20), 63) ++ x = string.format("%d", value) ++ elseif p == "j" then -- XThead imm1[31..26] ++ local value = band(rshift(op, 26), 63) ++ x = string.format("%d", value) ++ elseif p == "v" then --XThead imm[2][26..25] ++ local value = band(rshift(op, 25), 3) ++ x = string.format("%d", value) ++ elseif p == "S" then ++ local register = map_gpr[band(rshift(op, 15), 31)] --register ++ local imm = parse_S(op) ++ x = format("%d(%s)", imm, register) ++ elseif p == "n" then ++ local part1 = band(rshift(op, 5), 1) --6 ++ local part2 = band(rshift(op, 10), 7) --5:3 ++ local part3 = band(rshift(op, 6), 1) --2 ++ local uimm = bor(lshift(0, 31), lshift(part1, 6), lshift(part2, 3), ++ lshift(part3, 2)) ++ operands[#operands] = format("%d(%s)", uimm, last) ++ elseif p == "A" then ++ local value, dest = band(rshift(op, 12), 0xfffff), map_gpr[band(rshift(op, 7), 31)] ++ ctx.reltab[dest] = ctx.addr + ctx.pos + lshift(value, 12) ++ x = format("0x%x", value) ++ elseif p == "B" then ++ x = ctx.addr + ctx.pos + parse_B(op) ++ ctx.rel = x ++ x = format("0x%08x", x) ++ elseif p == "U" then ++ local value = band(rshift(op, 12), 0xfffff) ++ x = string.format("0x%x", value) ++ elseif p == "Q" then ++ local part1 = band(rshift(op, 2), 7) --8:6 ++ local part2 = band(rshift(op, 12), 1) --5 ++ local part3 = band(rshift(op, 5), 3) --4:3 ++ local uimm = bor(lshift(0, 31), lshift(part1, 6), lshift(part2, 5), ++ lshift(part3, 3)) ++ x = format("%d(%s)", uimm, "sp") ++ elseif p == "q" then ++ x = ctx.addr + ctx.pos + parse_q(op) ++ ctx.rel = x ++ x = format("0x%08x", x) ++ elseif p == "J" then ++ x = ctx.addr + ctx.pos + parse_J(op) ++ ctx.rel = x ++ x = format("0x%08x", x) ++ elseif p == "K" then ++ local value = parse_K(op) ++ x = string.format("0x%x", value) ++ elseif p == "Y" then ++ local part1 = band(rshift(op, 2), 3) --7:6 ++ local part2 = band(rshift(op, 12), 1) --5 ++ local part3 = band(rshift(op, 4), 7) --4:2 ++ local uimm = bor(lshift(0, 31), lshift(part1, 6), lshift(part2, 5), ++ lshift(part3, 2)) ++ x = format("%d(%s)", uimm, "sp") ++ elseif p == "o" then -- rounding mode ++ x = map_rm[band(rshift(op, 12), 7)] ++ elseif p == "y" then -- fli lut ++ x = map_fli[band(rshift(op, 15), 31)] ++ elseif p == "1" then ++ local part1 = band(rshift(op, 12), 1) --5 ++ local part2 = band(rshift(op, 2), 31) --4:0 ++ local uimm = bor(lshift(0, 31), lshift(part1, 5), part2) ++ x = string.format("0x%x", uimm) ++ elseif p == "T" then ++ x = ctx.addr + ctx.pos + parse_T(op) ++ ctx.rel = x ++ x = format("0x%08x", x) ++ elseif p == "t" then ++ local part1 = band(rshift(op, 7), 7) --8:6 ++ local part2 = band(rshift(op, 10), 7) --5:3 ++ local uimm = bor(lshift(0, 31), lshift(part1, 6), lshift(part2, 3)) ++ x = format("%d(%s)", uimm, "sp") ++ elseif p == "u" then ++ local part1 = band(rshift(op, 7), 3) --7:6 ++ local part2 = band(rshift(op, 9), 15) --5:2 ++ local uimm = bor(lshift(0, 31), lshift(part1, 6), lshift(part2, 2)) ++ x = format("%d(%s)", uimm, "sp") ++ elseif p == "V" then ++ x = map_fgpr[band(rshift(op, 2), 31)] ++ elseif p == "0" then --PSEUDOINSTRUCTIONS ++ if (last == "zero" or last == 0) then ++ local n = #operands ++ operands[n] = nil ++ last = operands[n-1] ++ local a1, a2 = match(altname, "([^|]*)|(.*)") ++ if a1 then name, altname = a1, a2 ++ else name = altname end ++ alias_done = true ++ end ++ elseif (p == "4") then ++ if(last == -1) then ++ name = altname ++ operands[#operands] = nil ++ end ++ elseif (p == "5") then ++ if(last == 1) then ++ name = altname ++ operands[#operands] = nil ++ end ++ elseif (p == "6") then ++ if(last == operands[#operands - 1]) then ++ name = altname ++ operands[#operands] = nil ++ end ++ elseif (p == "7") then --jalr rs ++ local value = string.sub(operands[#operands], 1, 1) ++ local reg = string.sub(operands[#operands], 3, #(operands[#operands]) - 1) ++ if(value == "0" and ++ (operands[#operands - 1] == "ra" or operands[#operands - 1] == "zero")) then ++ if(operands[#operands - 1] == "zero") then ++ name = altname ++ end ++ operands[#operands] = nil ++ operands[#operands] = reg ++ end ++ elseif (p == "2" and alias_done == false) then ++ if (last == "zero" or last == 0) then ++ local a1, a2 = match(altname, "([^|]*)|(.*)") ++ name = a2 ++ operands[#operands] = nil ++ end ++ end ++ if x then operands[#operands+1] = x; last = x end ++ end ++ return putop(ctx, name, operands) ++end ++ ++------------------------------------------------------------------------------ ++ ++-- Disassemble a block of code. ++local function disass_block(ctx, ofs, len) ++ if not ofs then ++ ofs = 0 ++ end ++ local stop = len and ofs+len or #ctx.code ++ --instructions can be both 32 and 16 bits ++ stop = stop - stop % 2 ++ ctx.pos = ofs - ofs % 2 ++ ctx.rel = nil ++ while ctx.pos < stop do disass_ins(ctx) end ++end ++ ++-- Extended API: create a disassembler context. Then call ctx:disass(ofs, len). ++local function create(code, addr, out) ++ local ctx = {} ++ ctx.code = code ++ ctx.addr = addr or 0 ++ ctx.out = out or io.write ++ ctx.symtab = {} ++ ctx.disass = disass_block ++ ctx.hexdump = 8 ++ ctx.get = get_le ++ ctx.map_pri = map_pri ++ ctx.map_compr = map_compr ++ ctx.reltab = {} ++ return ctx ++end ++ ++-- Simple API: disassemble code (a string) at address and output via out. ++local function disass(code, addr, out) ++ create(code, addr, out):disass(addr) ++end ++ ++-- Return register name for RID. ++local function regname(r) ++ if r < 32 then return map_gpr[r] end ++ return "f"..(r-32) ++end ++ ++-- Public module functions. ++return { ++ create = create, ++ disass = disass, ++ regname = regname ++} +diff --git a/src/jit/dis_riscv64.lua b/src/jit/dis_riscv64.lua +new file mode 100644 +index 000000000..fd6ce2768 +--- /dev/null ++++ b/src/jit/dis_riscv64.lua +@@ -0,0 +1,16 @@ ++---------------------------------------------------------------------------- ++-- LuaJIT RISC-V 64 disassembler wrapper module. ++-- ++-- Copyright (C) 2005-2025 Mike Pall. All rights reserved. ++-- Released under the MIT license. See Copyright Notice in luajit.h ++---------------------------------------------------------------------------- ++-- This module just exports the default riscv little-endian functions from the ++-- RISC-V disassembler module. All the interesting stuff is there. ++------------------------------------------------------------------------------ ++ ++local dis_riscv = require((string.match(..., ".*%.") or "").."dis_riscv") ++return { ++ create = dis_riscv.create, ++ disass = dis_riscv.disass, ++ regname = dis_riscv.regname ++} +\ No newline at end of file + +From 1ccc3317a5f395fab6800642b357437e9c90cc10 Mon Sep 17 00:00:00 2001 +From: gns +Date: Wed, 6 Mar 2024 09:50:08 +0800 +Subject: [PATCH 20/22] riscv(misc): add support in Makefile + +--- + Makefile | 1 + + src/Makefile | 8 ++++++++ + 2 files changed, 9 insertions(+) + +diff --git a/Makefile b/Makefile +index c41b3345d..dd33fedcb 100644 +--- a/Makefile ++++ b/Makefile +@@ -101,6 +101,7 @@ FILES_JITLIB= bc.lua bcsave.lua dump.lua p.lua v.lua zone.lua \ + dis_arm64be.lua dis_ppc.lua dis_mips.lua dis_mipsel.lua \ + dis_mips64.lua dis_mips64el.lua \ + dis_mips64r6.lua dis_mips64r6el.lua \ ++ dis_riscv.lua dis_riscv64.lua \ + vmdef.lua + + ifeq (,$(findstring Windows,$(OS))) +diff --git a/src/Makefile b/src/Makefile +index 8e544f700..c6b53bc2f 100644 +--- a/src/Makefile ++++ b/src/Makefile +@@ -52,6 +52,7 @@ CCOPT_arm= + CCOPT_arm64= + CCOPT_ppc= + CCOPT_mips= ++CCOPT_riscv64= + # + #CCDEBUG= + # Uncomment the next line to generate debug information: +@@ -269,6 +270,9 @@ ifneq (,$(findstring LJ_TARGET_MIPS ,$(TARGET_TESTARCH))) + else + TARGET_LJARCH= mips + endif ++else ++ifneq (,$(findstring LJ_TARGET_RISCV64 ,$(TARGET_TESTARCH))) ++ TARGET_LJARCH= riscv64 + else + $(error Unsupported target architecture) + endif +@@ -278,6 +282,7 @@ endif + endif + endif + endif ++endif + + ifneq (,$(findstring LJ_TARGET_PS3 1,$(TARGET_TESTARCH))) + TARGET_SYS= PS3 +@@ -484,6 +489,9 @@ ifeq (ppc,$(TARGET_LJARCH)) + DASM_AFLAGS+= -D ELFV2 + endif + endif ++ifneq (,$(findstring LJ_TARGET_RISCV64 ,$(TARGET_TESTARCH))) ++ DASM_AFLAGS+= -D RISCV64 ++endif + endif + endif + + +From 5c50adf95206cbb9a68ee6ea2ffd6a1a4b84f5b8 Mon Sep 17 00:00:00 2001 +From: gns +Date: Wed, 21 Aug 2024 16:39:26 +0800 +Subject: [PATCH 21/22] riscv(support,linux): use HWPROBE for ISE detection + +Current SIGILL handler appears to have weird issues with libluajit on +some platform. Considering 6.6 kernel is becoming more common, switch +to HWPROBE for better compatibility. +--- + src/lib_jit.c | 79 +++++++++++++++++++++++++-------------------------- + src/lj_jit.h | 29 +++++++++++++++++++ + 2 files changed, 67 insertions(+), 41 deletions(-) + +diff --git a/src/lib_jit.c b/src/lib_jit.c +index df25905b1..ee892c838 100644 +--- a/src/lib_jit.c ++++ b/src/lib_jit.c +@@ -698,23 +698,26 @@ JIT_PARAMDEF(JIT_PARAMINIT) + #endif + + #if LJ_TARGET_RISCV64 && LJ_TARGET_POSIX +-#include +-#include +-static sigjmp_buf sigbuf = {0}; +-static void detect_sigill(int sig) +-{ +- siglongjmp(sigbuf, 1); +-} ++ ++#if LJ_TARGET_LINUX ++#include ++ ++struct riscv_hwprobe hwprobe_requests[] = { ++ {RISCV_HWPROBE_KEY_IMA_EXT_0} ++}; ++ ++const uint64_t *hwprobe_ext = &hwprobe_requests[0].value; ++ ++int hwprobe_ret = 0; ++#endif + + static int riscv_compressed() + { + #if defined(__riscv_c) || defined(__riscv_compressed) + /* Don't bother checking for RVC -- would crash before getting here. */ + return 1; +-#elif defined(__GNUC__) +- /* c.nop; c.nop; */ +- __asm__(".4byte 0x00010001"); +- return 1; ++#elif LJ_TARGET_LINUX ++ return (hwprobe_ret == 0 && ((*hwprobe_ext) & RISCV_HWPROBE_IMA_C)) ? 1 : 0; + #else + return 0; + #endif +@@ -725,11 +728,8 @@ static int riscv_zba() + #if defined(__riscv_b) || defined(__riscv_zba) + /* Don't bother checking for Zba -- would crash before getting here. */ + return 1; +-#elif defined(__GNUC__) +- /* Don't bother verifying the result, just check if the instruction exists. */ +- /* add.uw zero, zero, zero */ +- __asm__(".4byte 0x0800003b"); +- return 1; ++#elif LJ_TARGET_LINUX ++ return (hwprobe_ret == 0 && ((*hwprobe_ext) & RISCV_HWPROBE_EXT_ZBA)) ? 1 : 0; + #else + return 0; + #endif +@@ -740,11 +740,8 @@ static int riscv_zbb() + #if defined(__riscv_b) || defined(__riscv_zbb) + /* Don't bother checking for Zbb -- would crash before getting here. */ + return 1; +-#elif defined(__GNUC__) +- register int t asm ("a0"); +- /* addi a0, zero, 255; sext.b a0, a0; */ +- __asm__("addi a0, zero, 255\n\t.4byte 0x60451513"); +- return t < 0; ++#elif LJ_TARGET_LINUX ++ return (hwprobe_ret == 0 && ((*hwprobe_ext) & RISCV_HWPROBE_EXT_ZBB)) ? 1 : 0; + #else + return 0; + #endif +@@ -755,10 +752,8 @@ static int riscv_zicond() + #if defined(__riscv_zicond) + /* Don't bother checking for Zicond -- would crash before getting here. */ + return 1; +-#elif defined(__GNUC__) +- /* czero.eqz zero, zero, zero; */ +- __asm__(".4byte 0x0e005033"); +- return 1; ++#elif LJ_TARGET_LINUX ++ return (hwprobe_ret == 0 && ((*hwprobe_ext) & RISCV_HWPROBE_EXT_ZICOND)) ? 1 : 0; + #else + return 0; + #endif +@@ -769,6 +764,8 @@ static int riscv_zfa() + #if defined(__riscv_zfa) + /* Don't bother checking for Zfa -- would crash before getting here. */ + return 1; ++#elif LJ_TARGET_LINUX ++ return (hwprobe_ret == 0 && ((*hwprobe_ext) & RISCV_HWPROBE_EXT_ZFA)) ? 1 : 0; + #else + return 0; + #endif +@@ -782,23 +779,19 @@ static int riscv_xthead() + && defined(__riscv_xtheadmac)) + /* Don't bother checking for XThead -- would crash before getting here. */ + return 1; +-#elif defined(__GNUC__) +- register int t asm ("a0"); +- /* C906 & C910 & C908 all have "xtheadc", XTheadBb subset "xtheadc". */ +- /* Therefore assume XThead* are present if XTheadBb is present. */ +- /* addi a0, zero, 255; th.ext a0, a0, 7, 0; */ +- __asm__("addi a0, zero, 255\n\t.4byte 0x1c05250b"); +- return t == -1; /* In case of collision with other vendor extensions. */ + #else +- return 0; ++/* ++** Hardcoded as there's no easy way of detection: ++** - SIGILL have some trouble with libluajit as we speak ++** - Checking mvendorid looks good, but might not be reliable. ++*/ ++ return 0; + #endif + } + + static uint32_t riscv_probe(int (*func)(void), uint32_t flag) + { +- if (sigsetjmp(sigbuf, 1) == 0) { +- return func() ? flag : 0; +- } else return 0; ++ return func() ? flag : 0; + } + #endif + +@@ -877,17 +870,21 @@ static uint32_t jit_cpudetect(void) + + #elif LJ_TARGET_RISCV64 + #if LJ_HASJIT +- /* SIGILL-based detection of RVC, Zba, Zbb and XThead. Welcome to the future. */ +- struct sigaction old = {0}, act = {0}; +- act.sa_handler = detect_sigill; +- sigaction(SIGILL, &act, &old); ++ ++#if LJ_TARGET_LINUX ++ /* HWPROBE-based detection of RVC, Zba, Zbb and Zicond. */ ++ hwprobe_ret = syscall(__NR_riscv_hwprobe, &hwprobe_requests, ++ sizeof(hwprobe_requests) / sizeof(struct riscv_hwprobe), 0, ++ NULL, 0); ++ + flags |= riscv_probe(riscv_compressed, JIT_F_RVC); + flags |= riscv_probe(riscv_zba, JIT_F_RVZba); + flags |= riscv_probe(riscv_zbb, JIT_F_RVZbb); + flags |= riscv_probe(riscv_zicond, JIT_F_RVZicond); + flags |= riscv_probe(riscv_zfa, JIT_F_RVZfa); + flags |= riscv_probe(riscv_xthead, JIT_F_RVXThead); +- sigaction(SIGILL, &old, NULL); ++ ++#endif + + /* Detect V/P? */ + /* V have no hardware available, P not ratified yet. */ +diff --git a/src/lj_jit.h b/src/lj_jit.h +index b6aaf21dc..9948e3c07 100644 +--- a/src/lj_jit.h ++++ b/src/lj_jit.h +@@ -79,6 +79,35 @@ + + #define JIT_F_CPUSTRING "\003RVC\003Zba\003Zbb\006Zicond\003Zfa\006XThead" + ++#if LJ_TARGET_LINUX ++#include ++ ++#ifndef __NR_riscv_hwprobe ++#ifndef __NR_arch_specific_syscall ++#define __NR_arch_specific_syscall 244 ++#endif ++#define __NR_riscv_hwprobe (__NR_arch_specific_syscall + 14) ++#endif ++ ++struct riscv_hwprobe { ++ int64_t key; ++ uint64_t value; ++}; ++ ++#define RISCV_HWPROBE_KEY_MVENDORID 0 ++#define RISCV_HWPROBE_KEY_MARCHID 1 ++#define RISCV_HWPROBE_KEY_MIMPID 2 ++#define RISCV_HWPROBE_KEY_BASE_BEHAVIOR 3 ++#define RISCV_HWPROBE_KEY_IMA_EXT_0 4 ++ ++#define RISCV_HWPROBE_IMA_C (1 << 1) ++#define RISCV_HWPROBE_EXT_ZBA (1 << 3) ++#define RISCV_HWPROBE_EXT_ZBB (1 << 4) ++#define RISCV_HWPROBE_EXT_ZFA (1ULL << 32) ++#define RISCV_HWPROBE_EXT_ZICOND (1ULL << 35) ++ ++#endif ++ + #else + + #define JIT_F_CPUSTRING "" + +From d55712e1183d0ccaa1132456165342d431433e37 Mon Sep 17 00:00:00 2001 +From: gns +Date: Thu, 16 Jan 2025 01:02:19 +0800 +Subject: [PATCH 22/22] riscv(interp): strip excessive extended branch (^B+J) + +--- + src/vm_riscv64.dasc | 104 ++++++++++++++++++++++---------------------- + 1 file changed, 52 insertions(+), 52 deletions(-) + +diff --git a/src/vm_riscv64.dasc b/src/vm_riscv64.dasc +index 09ac0cf05..67f8f2c0a 100644 +--- a/src/vm_riscv64.dasc ++++ b/src/vm_riscv64.dasc +@@ -552,7 +552,7 @@ static void build_subroutines(BuildCtx *ctx) + | + | // Return from pcall or xpcall fast func. + | mov_true TMP1 +- | bxeqz TMP0, ->cont_dispatch ++ | beqz TMP0, ->cont_dispatch + | ld PC, FRAME_PC(TMP2) // Fetch PC of previous frame. + | mv BASE, TMP2 // Restore caller base. + | // Prepending may overwrite the pcall frame, so do it at the end. +@@ -563,9 +563,9 @@ static void build_subroutines(BuildCtx *ctx) + | addiw RD, RD, 8 // RD = (nresults+1)*8. + | andi TMP0, PC, FRAME_TYPE + | li CRET1, LUA_YIELD +- | bxeqz RD, ->vm_unwind_c_eh ++ | beqz RD, ->vm_unwind_c_eh + | mv MULTRES, RD +- | bxeqz TMP0, ->BC_RET_Z // Handle regular return to Lua. ++ | beqz TMP0, ->BC_RET_Z // Handle regular return to Lua. + | + |->vm_return: + | // BASE = base, RA = resultptr, RD/MULTRES = (nresults+1)*8, PC = return +@@ -573,7 +573,7 @@ static void build_subroutines(BuildCtx *ctx) + | andi TMP2, PC, ~FRAME_TYPEP + | xori TMP0, TMP0, FRAME_C + | sub TMP2, BASE, TMP2 // TMP2 = previous base. +- | bxnez TMP0, ->vm_returnp ++ | bnez TMP0, ->vm_returnp + | + | addiw TMP1, RD, -8 + | sd TMP2, L->base +@@ -742,7 +742,7 @@ static void build_subroutines(BuildCtx *ctx) + | andi TMP0, PC, FRAME_TYPE + | li TISNIL, LJ_TNIL + | li TISNUM, LJ_TISNUM +- | bxeqz TMP0, ->BC_RET_Z ++ | beqz TMP0, ->BC_RET_Z + | j ->vm_return + | + |->vm_pcall: // Setup protected C frame and enter VM. +@@ -1002,7 +1002,7 @@ static void build_subroutines(BuildCtx *ctx) + | // Returns 0/1 or TValue * (metamethod). + |3: + | sltiu TMP1, CRET1, 2 +- | bxeqz TMP1, ->vmeta_binop ++ | beqz TMP1, ->vmeta_binop + | negw TMP2, CRET1 + |4: + | lhu RD, OFS_RD(PC) +@@ -1090,7 +1090,7 @@ static void build_subroutines(BuildCtx *ctx) + | // (lua_State *L, TValue *ra,*rb,*rc, BCReg op) + | call_intern vmeta_arith, lj_meta_arith + | // Returns NULL (finished) or TValue * (metamethod). +- | bxeqz CRET1, ->cont_nop ++ | beqz CRET1, ->cont_nop + | + | // Call metamethod for binary op. + |->vmeta_binop: +@@ -1114,7 +1114,7 @@ static void build_subroutines(BuildCtx *ctx) + | call_intern vmeta_len, lj_meta_len // (lua_State *L, TValue *o) + | // Returns NULL (retry) or TValue * (metamethod base). + #if LJ_52 +- | bxnez CRET1, ->vmeta_binop // Binop call for compatibility. ++ | bnez CRET1, ->vmeta_binop // Binop call for compatibility. + | mv CARG1, MULTRES + | j ->BC_LEN_Z + #else +@@ -1200,7 +1200,7 @@ static void build_subroutines(BuildCtx *ctx) + |->ff_ .. name: + | ld CARG1, 0(BASE) + | fld FARG1, 0(BASE) +- | bxeqz NARGS8:RC, ->fff_fallback ++ | beqz NARGS8:RC, ->fff_fallback + | checknum CARG1, ->fff_fallback + |.endmacro + | +@@ -1209,7 +1209,7 @@ static void build_subroutines(BuildCtx *ctx) + | ld CARG1, 0(BASE) + | sltiu TMP0, NARGS8:RC, 16 + | ld CARG2, 8(BASE) +- | bxnez TMP0, ->fff_fallback ++ | bnez TMP0, ->fff_fallback + | gettp TMP1, CARG1 + | gettp TMP2, CARG2 + | sltiu TMP1, TMP1, LJ_TISNUM +@@ -1217,7 +1217,7 @@ static void build_subroutines(BuildCtx *ctx) + | fld FARG1, 0(BASE) + | and TMP1, TMP1, TMP2 + | fld FARG2, 8(BASE) +- | bxeqz TMP1, ->fff_fallback ++ | beqz TMP1, ->fff_fallback + |.endmacro + | + |// Inlined GC threshold check. +@@ -1277,7 +1277,7 @@ static void build_subroutines(BuildCtx *ctx) + |2: + | ld STR:RC, GL->gcroot[GCROOT_MMNAME+MM_metatable] + | li CARG1, LJ_TNIL +- | bxeqz TAB:RB, ->fff_restv ++ | beqz TAB:RB, ->fff_restv + | lw TMP0, TAB:RB->hmask + | lw TMP1, STR:RC->sid + | ld NODE:TMP2, TAB:RB->node +@@ -1299,7 +1299,7 @@ static void build_subroutines(BuildCtx *ctx) + | settp CARG1, RB, TMP3 + | j ->fff_restv // Not found, keep default result. + |5: +- | bxne CARG1, TISNIL, ->fff_restv ++ | bne CARG1, TISNIL, ->fff_restv + | j <4 // Ditto for nil value. + | + |6: +@@ -1325,7 +1325,7 @@ static void build_subroutines(BuildCtx *ctx) + | bxnez TMP3, ->fff_fallback + | andi TMP3, TMP2, LJ_GC_BLACK // isblack(table) + | sd TAB:CARG2, TAB:TMP1->metatable +- | bxeqz TMP3, ->fff_restv ++ | beqz TMP3, ->fff_restv + | barrierback TAB:TMP1, TMP2, TMP0, ->fff_restv + | + |.ffunc rawget +@@ -1360,7 +1360,7 @@ static void build_subroutines(BuildCtx *ctx) + | gettp TMP0, CARG1 + | addi TMP1, TMP0, -LJ_TSTR + | // A __tostring method in the string base metatable is ignored. +- | bxeqz TMP1, ->fff_restv // String key? ++ | beqz TMP1, ->fff_restv // String key? + | // Handle numbers inline, unless a number base metatable is present. + | ld TMP1, GL->gcroot[GCROOT_BASEMT_NUM] + | sltu TMP0, TISNUM, TMP0 +@@ -1390,10 +1390,10 @@ static void build_subroutines(BuildCtx *ctx) + | call_intern ff_next, lj_tab_next // (GCtab *t, cTValue *key, TValue *o) + | // Returns 1=found, 0=end, -1=error. + | li RD, (2+1)*8 +- | bxgtz CRET1, ->fff_res // Found key/value. ++ | bgtz CRET1, ->fff_res // Found key/value. + | mv TMP1, CRET1 + | mv CARG1, TISNIL +- | bxeqz TMP1, ->fff_restv // End of traversal: return nil. ++ | beqz TMP1, ->fff_restv // End of traversal: return nil. + | ld CFUNC:RB, FRAME_FUNC(BASE) + | li RC, 2*8 + | cleartp CFUNC:RB +@@ -1433,19 +1433,19 @@ static void build_subroutines(BuildCtx *ctx) + | ld TMP1, 0(TMP3) + |1: + | li RD, (0+1)*8 +- | bxeq TMP1, TISNIL, ->fff_res // End of iteration, return 0 results. ++ | beq TMP1, TISNIL, ->fff_res // End of iteration, return 0 results. + | sd TMP1, -8(BASE) + | li RD, (2+1)*8 + | j ->fff_res + |2: // Check for empty hash part first. Otherwise call C function. + | lw TMP0, TAB:CARG1->hmask + | li RD, (0+1)*8 +- | bxeqz TMP0, ->fff_res ++ | beqz TMP0, ->fff_res + | mv CARG2, TMP2 + | call_intern ff_ipairs_aux, lj_tab_getinth // (GCtab *t, int32_t key) + | // Returns cTValue * or NULL. + | li RD, (0+1)*8 +- | bxeqz CRET1, ->fff_res ++ | beqz CRET1, ->fff_res + | ld TMP1, 0(CRET1) + | j <1 + | +@@ -1482,7 +1482,7 @@ static void build_subroutines(BuildCtx *ctx) + | srliw TMP3, TMP3, HOOK_ACTIVE_SHIFT + | andi TMP3, TMP3, 1 + | addi PC, TMP3, 16+FRAME_PCALL +- | bxeqz NARGS8:RC, ->vm_call_dispatch ++ | beqz NARGS8:RC, ->vm_call_dispatch + |1: + | add TMP0, BASE, NARGS8:RC + |2: +@@ -1539,17 +1539,17 @@ static void build_subroutines(BuildCtx *ctx) + | xor CARG2, CARG2, TMP3 // CARG2 = TMP4 ? CARG2 : TMP3 + | and CARG2, CARG2, TMP4 + | xor CARG2, TMP3, CARG2 +- | bxgtz CARG4, ->fff_fallback // st > LUA_YIELD? ++ | bgtz CARG4, ->fff_fallback // st > LUA_YIELD? + | xor TMP2, TMP2, CARG3 + | or CARG4, TMP2, TMP0 +- | bxnez TMP1, ->fff_fallback // cframe != 0? ++ | bnez TMP1, ->fff_fallback // cframe != 0? + | ld TMP0, L:CARG1->maxstack + | ld PC, FRAME_PC(BASE) +- | bxeqz CARG4, ->fff_fallback // base == top && st == 0? ++ | beqz CARG4, ->fff_fallback // base == top && st == 0? + | add TMP2, CARG2, NARGS8:RC + | sd BASE, L->base + | sd PC, SAVE_PC(sp) +- | bxltu TMP0, TMP2, ->fff_fallback // Stack overflow? ++ | bltu TMP0, TMP2, ->fff_fallback // Stack overflow? + |1: + |.if resume + | addi BASE, BASE, 8 // Keep resumed thread in stack for GC. +@@ -1654,7 +1654,7 @@ static void build_subroutines(BuildCtx *ctx) + | sd BASE, L->base + | andi TMP0, TMP0, CFRAME_RESUME + | sd TMP1, L->top +- | bxeqz TMP0, ->fff_fallback ++ | beqz TMP0, ->fff_fallback + | sd x0, L->cframe + | sb CRET1, L->status + | j ->vm_leave_unw +@@ -1665,14 +1665,14 @@ static void build_subroutines(BuildCtx *ctx) + |->ff_math_ .. func: + | ld CARG1, 0(BASE) + | gettp TMP0, CARG1 +- | bxeqz NARGS8:RC, ->fff_fallback ++ | beqz NARGS8:RC, ->fff_fallback + | fmv.d.x FARG1, CARG1 +- | bxeq TMP0, TISNUM, ->fff_restv ++ | beq TMP0, TISNUM, ->fff_restv + | srli TMP1, CARG1, 52 // Extract exponent (and sign). +- | bxgeu TMP0, TISNUM, ->fff_fallback ++ | bgeu TMP0, TISNUM, ->fff_fallback + | andi TMP1, TMP1, 0x7ff // Extract exponent. + | slti TMP2, TMP1, 1023 + 52 + 1 // 1023: Bias, 52: Max fraction +- | bxeqz TMP2, ->fff_resn // Less than 2^52 / Not NaN? ++ | beqz TMP2, ->fff_resn // Less than 2^52 / Not NaN? + | fcvt.l.d TMP3, FARG1, rm + | fcvt.d.l FTMP1, TMP3 + | fsgnj.d FRET1, FTMP1, FARG1 +@@ -1692,7 +1692,7 @@ static void build_subroutines(BuildCtx *ctx) + | sub CARG1, TMP1, TMP0 + | slli TMP3, CARG1, 32 + | settp CARG1, TISNUM +- | bxgez TMP3, ->fff_restv ++ | bgez TMP3, ->fff_restv + | lui CARG1, 0x41e00 // 2^31 as a double. + | slli CARG1, CARG1, 32 + | j ->fff_restv +@@ -1700,7 +1700,7 @@ static void build_subroutines(BuildCtx *ctx) + | sltiu TMP2, CARG2, LJ_TISNUM + | slli CARG1, CARG1, 1 + | srli CARG1, CARG1, 1 +- | bxeqz TMP2, ->fff_fallback // int ++ | beqz TMP2, ->fff_fallback // int + |// fallthrough + | + |->fff_restv: +@@ -1756,7 +1756,7 @@ static void build_subroutines(BuildCtx *ctx) + | li TMP1, 8 + | ld CARG1, 0(BASE) + | fld FARG1, 0(BASE) +- | bxne NARGS8:RC, TMP1, ->fff_fallback // Need exactly 1 argument. ++ | bne NARGS8:RC, TMP1, ->fff_fallback // Need exactly 1 argument. + | checknum CARG1, ->fff_fallback + | call_extern ff_math_log, log + | j ->fff_resn +@@ -1810,7 +1810,7 @@ static void build_subroutines(BuildCtx *ctx) + | checkint CARG1, >4 + |1: // Handle integers. + | ld CARG2, 0(RA) +- | bxeq RA, RB, ->fff_restv ++ | beq RA, RB, ->fff_restv + | sext.w CARG1, CARG1 + | checkint CARG2, >3 + | sext.w CARG2, CARG2 +@@ -1839,7 +1839,7 @@ static void build_subroutines(BuildCtx *ctx) + |5: // Handle numbers. + | ld CARG2, 0(RA) + | fld FARG2, 0(RA) +- | bxgeu RA, RB, ->fff_resn ++ | bgeu RA, RB, ->fff_resn + | checknum CARG2, >7 + |6: + |.if ismax +@@ -1870,7 +1870,7 @@ static void build_subroutines(BuildCtx *ctx) + | addi TMP0, TMP0, -LJ_TSTR + | or TMP1, TMP1, TMP0 + | cleartp STR:CARG1 +- | bxnez TMP1, ->fff_fallback // Need exactly 1 string argument. ++ | bnez TMP1, ->fff_fallback // Need exactly 1 string argument. + | lw TMP0, STR:CARG1->len + | ld PC, FRAME_PC(BASE) + | snez RD, TMP0 +@@ -1893,7 +1893,7 @@ static void build_subroutines(BuildCtx *ctx) + | sltu TMP2, TMP2, CARG1 // !(255 < n). + | or TMP1, TMP1, TMP2 + | li CARG3, 1 +- | bxnez TMP1, ->fff_fallback ++ | bnez TMP1, ->fff_fallback + | addi CARG2, sp, TMPD_OFS + | sb CARG1, TMPD(sp) + |->fff_newstr: +@@ -1916,7 +1916,7 @@ static void build_subroutines(BuildCtx *ctx) + | ld CARG3, 16(BASE) + | addi TMP0, NARGS8:RC, -16 + | gettp TMP1, CARG1 +- | bxltz TMP0, ->fff_fallback ++ | bltz TMP0, ->fff_fallback + | cleartp STR:CARG1, CARG1 + | li CARG4, -1 + | beqz TMP0, >1 +@@ -1926,7 +1926,7 @@ static void build_subroutines(BuildCtx *ctx) + | checkint CARG2, ->fff_fallback + | addi TMP0, TMP1, -LJ_TSTR + | sext.w CARG3, CARG2 +- | bxnez TMP0, ->fff_fallback ++ | bnez TMP0, ->fff_fallback + | lw CARG2, STR:CARG1->len + | // STR:CARG1 = str, CARG2 = str->len, CARG3 = start, CARG4 = end + | addiw TMP0, CARG2, 1 +@@ -1949,7 +1949,7 @@ static void build_subroutines(BuildCtx *ctx) + | sub CARG3, CARG4, CARG3 // len = end - start + | addi CARG2, CARG2, sizeof(GCstr)-1 + | addiw CARG3, CARG3, 1 // len += 1 +- | bxgez CARG3, ->fff_newstr ++ | bgez CARG3, ->fff_newstr + |->fff_emptystr: // Return empty string. + | li TMP1, LJ_TSTR + | addi STR:CARG1, GL, offsetof(global_State, strempty) +@@ -1960,7 +1960,7 @@ static void build_subroutines(BuildCtx *ctx) + | .ffunc string_ .. name + | ffgccheck + | ld CARG2, 0(BASE) +- | bxeqz NARGS8:RC, ->fff_fallback ++ | beqz NARGS8:RC, ->fff_fallback + | checkstr STR:CARG2, ->fff_fallback + | addi SBUF:CARG1, GL, offsetof(global_State, tmpbuf) + | ld TMP0, SBUF:CARG1->b +@@ -1982,7 +1982,7 @@ static void build_subroutines(BuildCtx *ctx) + | + |->vm_tobit_fb: + | fld FARG1, 0(BASE) +- | bxeqz TMP1, ->fff_fallback ++ | beqz TMP1, ->fff_fallback + | fadd.d FARG1, FARG1, TOBIT + | fmv.x.w CRET1, FARG1 + | zext.w CRET1, CRET1 +@@ -2004,7 +2004,7 @@ static void build_subroutines(BuildCtx *ctx) + | add TMP3, BASE, NARGS8:RC + |1: + | ld TMP1, 0(TMP2) +- | bxeq TMP2, TMP3, ->fff_resi ++ | beq TMP2, TMP3, ->fff_resi + | gettp TMP0, TMP1 + | addi TMP2, TMP2, 8 + | bne TMP0, TISNUM, >2 +@@ -2015,7 +2015,7 @@ static void build_subroutines(BuildCtx *ctx) + | fld FARG1, -8(TMP2) + | sltiu TMP0, TMP0, LJ_TISNUM + | fadd.d FARG1, FARG1, TOBIT +- | bxeqz TMP0, ->fff_fallback ++ | beqz TMP0, ->fff_fallback + | fmv.x.w TMP1, FARG1 + | zext.w TMP1, TMP1 + | bins CRET1, CRET1, TMP1 +@@ -2063,7 +2063,7 @@ static void build_subroutines(BuildCtx *ctx) + |1: + | gettp TMP0, CARG2 + | zext.w CARG2, CARG2 +- | bxne TMP0, TISNUM, ->fff_fallback ++ | bne TMP0, TISNUM, ->fff_fallback + | sext.w CARG1, CARG1 + | shins CRET1, CARG1, CARG2 + | zext.w CRET1, CRET1 +@@ -2084,7 +2084,7 @@ static void build_subroutines(BuildCtx *ctx) + |1: + | gettp TMP0, CARG2 + | zext.w CARG2, CARG2 +- | bxne TMP0, TISNUM, ->fff_fallback ++ | bne TMP0, TISNUM, ->fff_fallback + | sext.w CARG1, CARG1 + | neg TMP2, CARG2 + | rotinsa TMP1, CARG1, CARG2 +@@ -2115,13 +2115,13 @@ static void build_subroutines(BuildCtx *ctx) + | // Either throws an error, or recovers and returns -1, 0 or nresults+1. + | ld BASE, L->base + | slliw RD, CRET1, 3 +- | bxgtz CRET1, ->fff_res // Returned nresults+1? ++ | bgtz CRET1, ->fff_res // Returned nresults+1? + |1: // Returned 0 or -1: retry fast path. + | ld LFUNC:RB, FRAME_FUNC(BASE) + | ld TMP0, L->top + | sub NARGS8:RC, TMP0, BASE + | cleartp LFUNC:RB +- | bxnez CRET1, ->vm_call_tail // Returned -1? ++ | bnez CRET1, ->vm_call_tail // Returned -1? + | ins_callt // Returned 0: retry fast path. + | + |// Reconstruct previous base for vmeta_call during tailcall. +@@ -4296,7 +4296,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) + | + |->BC_RETV_Z: // Non-standard return case. + | andi TMP2, TMP1, FRAME_TYPEP +- | bxnez TMP2, ->vm_return ++ | bnez TMP2, ->vm_return + | // Return from vararg function: relocate BASE down. + | sub BASE, BASE, TMP1 + | ld PC, FRAME_PC(BASE) +@@ -4549,7 +4549,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) + | ld TMP2, L->maxstack + | lbu TMP1, -4+PC2PROTO(numparams)(PC) + | ld KBASE, -4+PC2PROTO(k)(PC) +- | bxltu TMP2, RA, ->vm_growstack_l ++ | bltu TMP2, RA, ->vm_growstack_l + | slliw TMP1, TMP1, 3 // numparams*8 + |2: + | bltu NARGS8:RC, TMP1, >3 // Check for missing parameters. +@@ -4586,7 +4586,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) + | addi TMP3, RC, 16+FRAME_VARG + | ld KBASE, -4+PC2PROTO(k)(PC) + | sd TMP3, 8(TMP1) // Store delta + FRAME_VARG. +- | bxgeu TMP0, TMP2, ->vm_growstack_l ++ | bgeu TMP0, TMP2, ->vm_growstack_l + | lbu TMP2, -4+PC2PROTO(numparams)(PC) + | mv RA, BASE + | mv RC, TMP1 +@@ -4632,7 +4632,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) + | add RC, BASE, NARGS8:RC + | sd BASE, L->base // base of currently excuting function + | sd RC, L->top +- | bxgtu TMP1, TMP2, ->vm_growstack_c // Need to grow stack. ++ | bgtu TMP1, TMP2, ->vm_growstack_c // Need to grow stack. + | li_vmstate C // li TMP0, ~LJ_VMST_C + if (op == BC_FUNCCW) { + | ld CARG2, CFUNC:RB->f -- 2.51.1