aboutsummaryrefslogtreecommitdiff
path: root/3rdparty/tracy/tracy/common
diff options
context:
space:
mode:
Diffstat (limited to '3rdparty/tracy/tracy/common')
-rw-r--r--3rdparty/tracy/tracy/common/TracyAlign.hpp27
-rw-r--r--3rdparty/tracy/tracy/common/TracyAlloc.hpp69
-rw-r--r--3rdparty/tracy/tracy/common/TracyApi.h16
-rw-r--r--3rdparty/tracy/tracy/common/TracyColor.hpp690
-rw-r--r--3rdparty/tracy/tracy/common/TracyForceInline.hpp20
-rw-r--r--3rdparty/tracy/tracy/common/TracyMutex.hpp24
-rw-r--r--3rdparty/tracy/tracy/common/TracyProtocol.hpp139
-rw-r--r--3rdparty/tracy/tracy/common/TracyQueue.hpp850
-rw-r--r--3rdparty/tracy/tracy/common/TracySocket.cpp749
-rw-r--r--3rdparty/tracy/tracy/common/TracySocket.hpp156
-rw-r--r--3rdparty/tracy/tracy/common/TracyStackFrames.cpp122
-rw-r--r--3rdparty/tracy/tracy/common/TracyStackFrames.hpp22
-rw-r--r--3rdparty/tracy/tracy/common/TracySystem.cpp304
-rw-r--r--3rdparty/tracy/tracy/common/TracySystem.hpp32
-rw-r--r--3rdparty/tracy/tracy/common/TracyUwp.hpp11
-rw-r--r--3rdparty/tracy/tracy/common/TracyYield.hpp26
-rw-r--r--3rdparty/tracy/tracy/common/src-from-vcxproj.mk21
-rw-r--r--3rdparty/tracy/tracy/common/tracy_lz4.cpp2492
-rw-r--r--3rdparty/tracy/tracy/common/tracy_lz4.hpp777
-rw-r--r--3rdparty/tracy/tracy/common/tracy_lz4hc.cpp1620
-rw-r--r--3rdparty/tracy/tracy/common/tracy_lz4hc.hpp405
-rw-r--r--3rdparty/tracy/tracy/common/unix-release.mk13
-rw-r--r--3rdparty/tracy/tracy/common/unix.mk82
23 files changed, 8667 insertions, 0 deletions
diff --git a/3rdparty/tracy/tracy/common/TracyAlign.hpp b/3rdparty/tracy/tracy/common/TracyAlign.hpp
new file mode 100644
index 0000000..c3531ba
--- /dev/null
+++ b/3rdparty/tracy/tracy/common/TracyAlign.hpp
@@ -0,0 +1,27 @@
+#ifndef __TRACYALIGN_HPP__
+#define __TRACYALIGN_HPP__
+
+#include <string.h>
+
+#include "TracyForceInline.hpp"
+
+namespace tracy
+{
+
+template<typename T>
+tracy_force_inline T MemRead( const void* ptr )
+{
+ T val;
+ memcpy( &val, ptr, sizeof( T ) );
+ return val;
+}
+
+template<typename T>
+tracy_force_inline void MemWrite( void* ptr, T val )
+{
+ memcpy( ptr, &val, sizeof( T ) );
+}
+
+}
+
+#endif
diff --git a/3rdparty/tracy/tracy/common/TracyAlloc.hpp b/3rdparty/tracy/tracy/common/TracyAlloc.hpp
new file mode 100644
index 0000000..d3dec12
--- /dev/null
+++ b/3rdparty/tracy/tracy/common/TracyAlloc.hpp
@@ -0,0 +1,69 @@
+#ifndef __TRACYALLOC_HPP__
+#define __TRACYALLOC_HPP__
+
+#include <stdlib.h>
+
+#ifdef TRACY_ENABLE
+# include "TracyApi.h"
+# include "TracyForceInline.hpp"
+# include "../client/tracy_rpmalloc.hpp"
+#endif
+
+namespace tracy
+{
+
+#ifdef TRACY_ENABLE
+TRACY_API void InitRpmalloc();
+#endif
+
+static inline void* tracy_malloc( size_t size )
+{
+#ifdef TRACY_ENABLE
+ InitRpmalloc();
+ return rpmalloc( size );
+#else
+ return malloc( size );
+#endif
+}
+
+static inline void* tracy_malloc_fast( size_t size )
+{
+#ifdef TRACY_ENABLE
+ return rpmalloc( size );
+#else
+ return malloc( size );
+#endif
+}
+
+static inline void tracy_free( void* ptr )
+{
+#ifdef TRACY_ENABLE
+ InitRpmalloc();
+ rpfree( ptr );
+#else
+ free( ptr );
+#endif
+}
+
+static inline void tracy_free_fast( void* ptr )
+{
+#ifdef TRACY_ENABLE
+ rpfree( ptr );
+#else
+ free( ptr );
+#endif
+}
+
+static inline void* tracy_realloc( void* ptr, size_t size )
+{
+#ifdef TRACY_ENABLE
+ InitRpmalloc();
+ return rprealloc( ptr, size );
+#else
+ return realloc( ptr, size );
+#endif
+}
+
+}
+
+#endif
diff --git a/3rdparty/tracy/tracy/common/TracyApi.h b/3rdparty/tracy/tracy/common/TracyApi.h
new file mode 100644
index 0000000..f396ce0
--- /dev/null
+++ b/3rdparty/tracy/tracy/common/TracyApi.h
@@ -0,0 +1,16 @@
+#ifndef __TRACYAPI_H__
+#define __TRACYAPI_H__
+
+#if defined _WIN32
+# if defined TRACY_EXPORTS
+# define TRACY_API __declspec(dllexport)
+# elif defined TRACY_IMPORTS
+# define TRACY_API __declspec(dllimport)
+# else
+# define TRACY_API
+# endif
+#else
+# define TRACY_API __attribute__((visibility("default")))
+#endif
+
+#endif // __TRACYAPI_H__
diff --git a/3rdparty/tracy/tracy/common/TracyColor.hpp b/3rdparty/tracy/tracy/common/TracyColor.hpp
new file mode 100644
index 0000000..4825c0f
--- /dev/null
+++ b/3rdparty/tracy/tracy/common/TracyColor.hpp
@@ -0,0 +1,690 @@
+#ifndef __TRACYCOLOR_HPP__
+#define __TRACYCOLOR_HPP__
+
+namespace tracy
+{
+struct Color
+{
+enum ColorType
+{
+ Snow = 0xfffafa,
+ GhostWhite = 0xf8f8ff,
+ WhiteSmoke = 0xf5f5f5,
+ Gainsboro = 0xdcdcdc,
+ FloralWhite = 0xfffaf0,
+ OldLace = 0xfdf5e6,
+ Linen = 0xfaf0e6,
+ AntiqueWhite = 0xfaebd7,
+ PapayaWhip = 0xffefd5,
+ BlanchedAlmond = 0xffebcd,
+ Bisque = 0xffe4c4,
+ PeachPuff = 0xffdab9,
+ NavajoWhite = 0xffdead,
+ Moccasin = 0xffe4b5,
+ Cornsilk = 0xfff8dc,
+ Ivory = 0xfffff0,
+ LemonChiffon = 0xfffacd,
+ Seashell = 0xfff5ee,
+ Honeydew = 0xf0fff0,
+ MintCream = 0xf5fffa,
+ Azure = 0xf0ffff,
+ AliceBlue = 0xf0f8ff,
+ Lavender = 0xe6e6fa,
+ LavenderBlush = 0xfff0f5,
+ MistyRose = 0xffe4e1,
+ White = 0xffffff,
+ Black = 0x000000,
+ DarkSlateGray = 0x2f4f4f,
+ DarkSlateGrey = 0x2f4f4f,
+ DimGray = 0x696969,
+ DimGrey = 0x696969,
+ SlateGray = 0x708090,
+ SlateGrey = 0x708090,
+ LightSlateGray = 0x778899,
+ LightSlateGrey = 0x778899,
+ Gray = 0xbebebe,
+ Grey = 0xbebebe,
+ X11Gray = 0xbebebe,
+ X11Grey = 0xbebebe,
+ WebGray = 0x808080,
+ WebGrey = 0x808080,
+ LightGrey = 0xd3d3d3,
+ LightGray = 0xd3d3d3,
+ MidnightBlue = 0x191970,
+ Navy = 0x000080,
+ NavyBlue = 0x000080,
+ CornflowerBlue = 0x6495ed,
+ DarkSlateBlue = 0x483d8b,
+ SlateBlue = 0x6a5acd,
+ MediumSlateBlue = 0x7b68ee,
+ LightSlateBlue = 0x8470ff,
+ MediumBlue = 0x0000cd,
+ RoyalBlue = 0x4169e1,
+ Blue = 0x0000ff,
+ DodgerBlue = 0x1e90ff,
+ DeepSkyBlue = 0x00bfff,
+ SkyBlue = 0x87ceeb,
+ LightSkyBlue = 0x87cefa,
+ SteelBlue = 0x4682b4,
+ LightSteelBlue = 0xb0c4de,
+ LightBlue = 0xadd8e6,
+ PowderBlue = 0xb0e0e6,
+ PaleTurquoise = 0xafeeee,
+ DarkTurquoise = 0x00ced1,
+ MediumTurquoise = 0x48d1cc,
+ Turquoise = 0x40e0d0,
+ Cyan = 0x00ffff,
+ Aqua = 0x00ffff,
+ LightCyan = 0xe0ffff,
+ CadetBlue = 0x5f9ea0,
+ MediumAquamarine = 0x66cdaa,
+ Aquamarine = 0x7fffd4,
+ DarkGreen = 0x006400,
+ DarkOliveGreen = 0x556b2f,
+ DarkSeaGreen = 0x8fbc8f,
+ SeaGreen = 0x2e8b57,
+ MediumSeaGreen = 0x3cb371,
+ LightSeaGreen = 0x20b2aa,
+ PaleGreen = 0x98fb98,
+ SpringGreen = 0x00ff7f,
+ LawnGreen = 0x7cfc00,
+ Green = 0x00ff00,
+ Lime = 0x00ff00,
+ X11Green = 0x00ff00,
+ WebGreen = 0x008000,
+ Chartreuse = 0x7fff00,
+ MediumSpringGreen = 0x00fa9a,
+ GreenYellow = 0xadff2f,
+ LimeGreen = 0x32cd32,
+ YellowGreen = 0x9acd32,
+ ForestGreen = 0x228b22,
+ OliveDrab = 0x6b8e23,
+ DarkKhaki = 0xbdb76b,
+ Khaki = 0xf0e68c,
+ PaleGoldenrod = 0xeee8aa,
+ LightGoldenrodYellow = 0xfafad2,
+ LightYellow = 0xffffe0,
+ Yellow = 0xffff00,
+ Gold = 0xffd700,
+ LightGoldenrod = 0xeedd82,
+ Goldenrod = 0xdaa520,
+ DarkGoldenrod = 0xb8860b,
+ RosyBrown = 0xbc8f8f,
+ IndianRed = 0xcd5c5c,
+ SaddleBrown = 0x8b4513,
+ Sienna = 0xa0522d,
+ Peru = 0xcd853f,
+ Burlywood = 0xdeb887,
+ Beige = 0xf5f5dc,
+ Wheat = 0xf5deb3,
+ SandyBrown = 0xf4a460,
+ Tan = 0xd2b48c,
+ Chocolate = 0xd2691e,
+ Firebrick = 0xb22222,
+ Brown = 0xa52a2a,
+ DarkSalmon = 0xe9967a,
+ Salmon = 0xfa8072,
+ LightSalmon = 0xffa07a,
+ Orange = 0xffa500,
+ DarkOrange = 0xff8c00,
+ Coral = 0xff7f50,
+ LightCoral = 0xf08080,
+ Tomato = 0xff6347,
+ OrangeRed = 0xff4500,
+ Red = 0xff0000,
+ HotPink = 0xff69b4,
+ DeepPink = 0xff1493,
+ Pink = 0xffc0cb,
+ LightPink = 0xffb6c1,
+ PaleVioletRed = 0xdb7093,
+ Maroon = 0xb03060,
+ X11Maroon = 0xb03060,
+ WebMaroon = 0x800000,
+ MediumVioletRed = 0xc71585,
+ VioletRed = 0xd02090,
+ Magenta = 0xff00ff,
+ Fuchsia = 0xff00ff,
+ Violet = 0xee82ee,
+ Plum = 0xdda0dd,
+ Orchid = 0xda70d6,
+ MediumOrchid = 0xba55d3,
+ DarkOrchid = 0x9932cc,
+ DarkViolet = 0x9400d3,
+ BlueViolet = 0x8a2be2,
+ Purple = 0xa020f0,
+ X11Purple = 0xa020f0,
+ WebPurple = 0x800080,
+ MediumPurple = 0x9370db,
+ Thistle = 0xd8bfd8,
+ Snow1 = 0xfffafa,
+ Snow2 = 0xeee9e9,
+ Snow3 = 0xcdc9c9,
+ Snow4 = 0x8b8989,
+ Seashell1 = 0xfff5ee,
+ Seashell2 = 0xeee5de,
+ Seashell3 = 0xcdc5bf,
+ Seashell4 = 0x8b8682,
+ AntiqueWhite1 = 0xffefdb,
+ AntiqueWhite2 = 0xeedfcc,
+ AntiqueWhite3 = 0xcdc0b0,
+ AntiqueWhite4 = 0x8b8378,
+ Bisque1 = 0xffe4c4,
+ Bisque2 = 0xeed5b7,
+ Bisque3 = 0xcdb79e,
+ Bisque4 = 0x8b7d6b,
+ PeachPuff1 = 0xffdab9,
+ PeachPuff2 = 0xeecbad,
+ PeachPuff3 = 0xcdaf95,
+ PeachPuff4 = 0x8b7765,
+ NavajoWhite1 = 0xffdead,
+ NavajoWhite2 = 0xeecfa1,
+ NavajoWhite3 = 0xcdb38b,
+ NavajoWhite4 = 0x8b795e,
+ LemonChiffon1 = 0xfffacd,
+ LemonChiffon2 = 0xeee9bf,
+ LemonChiffon3 = 0xcdc9a5,
+ LemonChiffon4 = 0x8b8970,
+ Cornsilk1 = 0xfff8dc,
+ Cornsilk2 = 0xeee8cd,
+ Cornsilk3 = 0xcdc8b1,
+ Cornsilk4 = 0x8b8878,
+ Ivory1 = 0xfffff0,
+ Ivory2 = 0xeeeee0,
+ Ivory3 = 0xcdcdc1,
+ Ivory4 = 0x8b8b83,
+ Honeydew1 = 0xf0fff0,
+ Honeydew2 = 0xe0eee0,
+ Honeydew3 = 0xc1cdc1,
+ Honeydew4 = 0x838b83,
+ LavenderBlush1 = 0xfff0f5,
+ LavenderBlush2 = 0xeee0e5,
+ LavenderBlush3 = 0xcdc1c5,
+ LavenderBlush4 = 0x8b8386,
+ MistyRose1 = 0xffe4e1,
+ MistyRose2 = 0xeed5d2,
+ MistyRose3 = 0xcdb7b5,
+ MistyRose4 = 0x8b7d7b,
+ Azure1 = 0xf0ffff,
+ Azure2 = 0xe0eeee,
+ Azure3 = 0xc1cdcd,
+ Azure4 = 0x838b8b,
+ SlateBlue1 = 0x836fff,
+ SlateBlue2 = 0x7a67ee,
+ SlateBlue3 = 0x6959cd,
+ SlateBlue4 = 0x473c8b,
+ RoyalBlue1 = 0x4876ff,
+ RoyalBlue2 = 0x436eee,
+ RoyalBlue3 = 0x3a5fcd,
+ RoyalBlue4 = 0x27408b,
+ Blue1 = 0x0000ff,
+ Blue2 = 0x0000ee,
+ Blue3 = 0x0000cd,
+ Blue4 = 0x00008b,
+ DodgerBlue1 = 0x1e90ff,
+ DodgerBlue2 = 0x1c86ee,
+ DodgerBlue3 = 0x1874cd,
+ DodgerBlue4 = 0x104e8b,
+ SteelBlue1 = 0x63b8ff,
+ SteelBlue2 = 0x5cacee,
+ SteelBlue3 = 0x4f94cd,
+ SteelBlue4 = 0x36648b,
+ DeepSkyBlue1 = 0x00bfff,
+ DeepSkyBlue2 = 0x00b2ee,
+ DeepSkyBlue3 = 0x009acd,
+ DeepSkyBlue4 = 0x00688b,
+ SkyBlue1 = 0x87ceff,
+ SkyBlue2 = 0x7ec0ee,
+ SkyBlue3 = 0x6ca6cd,
+ SkyBlue4 = 0x4a708b,
+ LightSkyBlue1 = 0xb0e2ff,
+ LightSkyBlue2 = 0xa4d3ee,
+ LightSkyBlue3 = 0x8db6cd,
+ LightSkyBlue4 = 0x607b8b,
+ SlateGray1 = 0xc6e2ff,
+ SlateGray2 = 0xb9d3ee,
+ SlateGray3 = 0x9fb6cd,
+ SlateGray4 = 0x6c7b8b,
+ LightSteelBlue1 = 0xcae1ff,
+ LightSteelBlue2 = 0xbcd2ee,
+ LightSteelBlue3 = 0xa2b5cd,
+ LightSteelBlue4 = 0x6e7b8b,
+ LightBlue1 = 0xbfefff,
+ LightBlue2 = 0xb2dfee,
+ LightBlue3 = 0x9ac0cd,
+ LightBlue4 = 0x68838b,
+ LightCyan1 = 0xe0ffff,
+ LightCyan2 = 0xd1eeee,
+ LightCyan3 = 0xb4cdcd,
+ LightCyan4 = 0x7a8b8b,
+ PaleTurquoise1 = 0xbbffff,
+ PaleTurquoise2 = 0xaeeeee,
+ PaleTurquoise3 = 0x96cdcd,
+ PaleTurquoise4 = 0x668b8b,
+ CadetBlue1 = 0x98f5ff,
+ CadetBlue2 = 0x8ee5ee,
+ CadetBlue3 = 0x7ac5cd,
+ CadetBlue4 = 0x53868b,
+ Turquoise1 = 0x00f5ff,
+ Turquoise2 = 0x00e5ee,
+ Turquoise3 = 0x00c5cd,
+ Turquoise4 = 0x00868b,
+ Cyan1 = 0x00ffff,
+ Cyan2 = 0x00eeee,
+ Cyan3 = 0x00cdcd,
+ Cyan4 = 0x008b8b,
+ DarkSlateGray1 = 0x97ffff,
+ DarkSlateGray2 = 0x8deeee,
+ DarkSlateGray3 = 0x79cdcd,
+ DarkSlateGray4 = 0x528b8b,
+ Aquamarine1 = 0x7fffd4,
+ Aquamarine2 = 0x76eec6,
+ Aquamarine3 = 0x66cdaa,
+ Aquamarine4 = 0x458b74,
+ DarkSeaGreen1 = 0xc1ffc1,
+ DarkSeaGreen2 = 0xb4eeb4,
+ DarkSeaGreen3 = 0x9bcd9b,
+ DarkSeaGreen4 = 0x698b69,
+ SeaGreen1 = 0x54ff9f,
+ SeaGreen2 = 0x4eee94,
+ SeaGreen3 = 0x43cd80,
+ SeaGreen4 = 0x2e8b57,
+ PaleGreen1 = 0x9aff9a,
+ PaleGreen2 = 0x90ee90,
+ PaleGreen3 = 0x7ccd7c,
+ PaleGreen4 = 0x548b54,
+ SpringGreen1 = 0x00ff7f,
+ SpringGreen2 = 0x00ee76,
+ SpringGreen3 = 0x00cd66,
+ SpringGreen4 = 0x008b45,
+ Green1 = 0x00ff00,
+ Green2 = 0x00ee00,
+ Green3 = 0x00cd00,
+ Green4 = 0x008b00,
+ Chartreuse1 = 0x7fff00,
+ Chartreuse2 = 0x76ee00,
+ Chartreuse3 = 0x66cd00,
+ Chartreuse4 = 0x458b00,
+ OliveDrab1 = 0xc0ff3e,
+ OliveDrab2 = 0xb3ee3a,
+ OliveDrab3 = 0x9acd32,
+ OliveDrab4 = 0x698b22,
+ DarkOliveGreen1 = 0xcaff70,
+ DarkOliveGreen2 = 0xbcee68,
+ DarkOliveGreen3 = 0xa2cd5a,
+ DarkOliveGreen4 = 0x6e8b3d,
+ Khaki1 = 0xfff68f,
+ Khaki2 = 0xeee685,
+ Khaki3 = 0xcdc673,
+ Khaki4 = 0x8b864e,
+ LightGoldenrod1 = 0xffec8b,
+ LightGoldenrod2 = 0xeedc82,
+ LightGoldenrod3 = 0xcdbe70,
+ LightGoldenrod4 = 0x8b814c,
+ LightYellow1 = 0xffffe0,
+ LightYellow2 = 0xeeeed1,
+ LightYellow3 = 0xcdcdb4,
+ LightYellow4 = 0x8b8b7a,
+ Yellow1 = 0xffff00,
+ Yellow2 = 0xeeee00,
+ Yellow3 = 0xcdcd00,
+ Yellow4 = 0x8b8b00,
+ Gold1 = 0xffd700,
+ Gold2 = 0xeec900,
+ Gold3 = 0xcdad00,
+ Gold4 = 0x8b7500,
+ Goldenrod1 = 0xffc125,
+ Goldenrod2 = 0xeeb422,
+ Goldenrod3 = 0xcd9b1d,
+ Goldenrod4 = 0x8b6914,
+ DarkGoldenrod1 = 0xffb90f,
+ DarkGoldenrod2 = 0xeead0e,
+ DarkGoldenrod3 = 0xcd950c,
+ DarkGoldenrod4 = 0x8b6508,
+ RosyBrown1 = 0xffc1c1,
+ RosyBrown2 = 0xeeb4b4,
+ RosyBrown3 = 0xcd9b9b,
+ RosyBrown4 = 0x8b6969,
+ IndianRed1 = 0xff6a6a,
+ IndianRed2 = 0xee6363,
+ IndianRed3 = 0xcd5555,
+ IndianRed4 = 0x8b3a3a,
+ Sienna1 = 0xff8247,
+ Sienna2 = 0xee7942,
+ Sienna3 = 0xcd6839,
+ Sienna4 = 0x8b4726,
+ Burlywood1 = 0xffd39b,
+ Burlywood2 = 0xeec591,
+ Burlywood3 = 0xcdaa7d,
+ Burlywood4 = 0x8b7355,
+ Wheat1 = 0xffe7ba,
+ Wheat2 = 0xeed8ae,
+ Wheat3 = 0xcdba96,
+ Wheat4 = 0x8b7e66,
+ Tan1 = 0xffa54f,
+ Tan2 = 0xee9a49,
+ Tan3 = 0xcd853f,
+ Tan4 = 0x8b5a2b,
+ Chocolate1 = 0xff7f24,
+ Chocolate2 = 0xee7621,
+ Chocolate3 = 0xcd661d,
+ Chocolate4 = 0x8b4513,
+ Firebrick1 = 0xff3030,
+ Firebrick2 = 0xee2c2c,
+ Firebrick3 = 0xcd2626,
+ Firebrick4 = 0x8b1a1a,
+ Brown1 = 0xff4040,
+ Brown2 = 0xee3b3b,
+ Brown3 = 0xcd3333,
+ Brown4 = 0x8b2323,
+ Salmon1 = 0xff8c69,
+ Salmon2 = 0xee8262,
+ Salmon3 = 0xcd7054,
+ Salmon4 = 0x8b4c39,
+ LightSalmon1 = 0xffa07a,
+ LightSalmon2 = 0xee9572,
+ LightSalmon3 = 0xcd8162,
+ LightSalmon4 = 0x8b5742,
+ Orange1 = 0xffa500,
+ Orange2 = 0xee9a00,
+ Orange3 = 0xcd8500,
+ Orange4 = 0x8b5a00,
+ DarkOrange1 = 0xff7f00,
+ DarkOrange2 = 0xee7600,
+ DarkOrange3 = 0xcd6600,
+ DarkOrange4 = 0x8b4500,
+ Coral1 = 0xff7256,
+ Coral2 = 0xee6a50,
+ Coral3 = 0xcd5b45,
+ Coral4 = 0x8b3e2f,
+ Tomato1 = 0xff6347,
+ Tomato2 = 0xee5c42,
+ Tomato3 = 0xcd4f39,
+ Tomato4 = 0x8b3626,
+ OrangeRed1 = 0xff4500,
+ OrangeRed2 = 0xee4000,
+ OrangeRed3 = 0xcd3700,
+ OrangeRed4 = 0x8b2500,
+ Red1 = 0xff0000,
+ Red2 = 0xee0000,
+ Red3 = 0xcd0000,
+ Red4 = 0x8b0000,
+ DeepPink1 = 0xff1493,
+ DeepPink2 = 0xee1289,
+ DeepPink3 = 0xcd1076,
+ DeepPink4 = 0x8b0a50,
+ HotPink1 = 0xff6eb4,
+ HotPink2 = 0xee6aa7,
+ HotPink3 = 0xcd6090,
+ HotPink4 = 0x8b3a62,
+ Pink1 = 0xffb5c5,
+ Pink2 = 0xeea9b8,
+ Pink3 = 0xcd919e,
+ Pink4 = 0x8b636c,
+ LightPink1 = 0xffaeb9,
+ LightPink2 = 0xeea2ad,
+ LightPink3 = 0xcd8c95,
+ LightPink4 = 0x8b5f65,
+ PaleVioletRed1 = 0xff82ab,
+ PaleVioletRed2 = 0xee799f,
+ PaleVioletRed3 = 0xcd6889,
+ PaleVioletRed4 = 0x8b475d,
+ Maroon1 = 0xff34b3,
+ Maroon2 = 0xee30a7,
+ Maroon3 = 0xcd2990,
+ Maroon4 = 0x8b1c62,
+ VioletRed1 = 0xff3e96,
+ VioletRed2 = 0xee3a8c,
+ VioletRed3 = 0xcd3278,
+ VioletRed4 = 0x8b2252,
+ Magenta1 = 0xff00ff,
+ Magenta2 = 0xee00ee,
+ Magenta3 = 0xcd00cd,
+ Magenta4 = 0x8b008b,
+ Orchid1 = 0xff83fa,
+ Orchid2 = 0xee7ae9,
+ Orchid3 = 0xcd69c9,
+ Orchid4 = 0x8b4789,
+ Plum1 = 0xffbbff,
+ Plum2 = 0xeeaeee,
+ Plum3 = 0xcd96cd,
+ Plum4 = 0x8b668b,
+ MediumOrchid1 = 0xe066ff,
+ MediumOrchid2 = 0xd15fee,
+ MediumOrchid3 = 0xb452cd,
+ MediumOrchid4 = 0x7a378b,
+ DarkOrchid1 = 0xbf3eff,
+ DarkOrchid2 = 0xb23aee,
+ DarkOrchid3 = 0x9a32cd,
+ DarkOrchid4 = 0x68228b,
+ Purple1 = 0x9b30ff,
+ Purple2 = 0x912cee,
+ Purple3 = 0x7d26cd,
+ Purple4 = 0x551a8b,
+ MediumPurple1 = 0xab82ff,
+ MediumPurple2 = 0x9f79ee,
+ MediumPurple3 = 0x8968cd,
+ MediumPurple4 = 0x5d478b,
+ Thistle1 = 0xffe1ff,
+ Thistle2 = 0xeed2ee,
+ Thistle3 = 0xcdb5cd,
+ Thistle4 = 0x8b7b8b,
+ Gray0 = 0x000000,
+ Grey0 = 0x000000,
+ Gray1 = 0x030303,
+ Grey1 = 0x030303,
+ Gray2 = 0x050505,
+ Grey2 = 0x050505,
+ Gray3 = 0x080808,
+ Grey3 = 0x080808,
+ Gray4 = 0x0a0a0a,
+ Grey4 = 0x0a0a0a,
+ Gray5 = 0x0d0d0d,
+ Grey5 = 0x0d0d0d,
+ Gray6 = 0x0f0f0f,
+ Grey6 = 0x0f0f0f,
+ Gray7 = 0x121212,
+ Grey7 = 0x121212,
+ Gray8 = 0x141414,
+ Grey8 = 0x141414,
+ Gray9 = 0x171717,
+ Grey9 = 0x171717,
+ Gray10 = 0x1a1a1a,
+ Grey10 = 0x1a1a1a,
+ Gray11 = 0x1c1c1c,
+ Grey11 = 0x1c1c1c,
+ Gray12 = 0x1f1f1f,
+ Grey12 = 0x1f1f1f,
+ Gray13 = 0x212121,
+ Grey13 = 0x212121,
+ Gray14 = 0x242424,
+ Grey14 = 0x242424,
+ Gray15 = 0x262626,
+ Grey15 = 0x262626,
+ Gray16 = 0x292929,
+ Grey16 = 0x292929,
+ Gray17 = 0x2b2b2b,
+ Grey17 = 0x2b2b2b,
+ Gray18 = 0x2e2e2e,
+ Grey18 = 0x2e2e2e,
+ Gray19 = 0x303030,
+ Grey19 = 0x303030,
+ Gray20 = 0x333333,
+ Grey20 = 0x333333,
+ Gray21 = 0x363636,
+ Grey21 = 0x363636,
+ Gray22 = 0x383838,
+ Grey22 = 0x383838,
+ Gray23 = 0x3b3b3b,
+ Grey23 = 0x3b3b3b,
+ Gray24 = 0x3d3d3d,
+ Grey24 = 0x3d3d3d,
+ Gray25 = 0x404040,
+ Grey25 = 0x404040,
+ Gray26 = 0x424242,
+ Grey26 = 0x424242,
+ Gray27 = 0x454545,
+ Grey27 = 0x454545,
+ Gray28 = 0x474747,
+ Grey28 = 0x474747,
+ Gray29 = 0x4a4a4a,
+ Grey29 = 0x4a4a4a,
+ Gray30 = 0x4d4d4d,
+ Grey30 = 0x4d4d4d,
+ Gray31 = 0x4f4f4f,
+ Grey31 = 0x4f4f4f,
+ Gray32 = 0x525252,
+ Grey32 = 0x525252,
+ Gray33 = 0x545454,
+ Grey33 = 0x545454,
+ Gray34 = 0x575757,
+ Grey34 = 0x575757,
+ Gray35 = 0x595959,
+ Grey35 = 0x595959,
+ Gray36 = 0x5c5c5c,
+ Grey36 = 0x5c5c5c,
+ Gray37 = 0x5e5e5e,
+ Grey37 = 0x5e5e5e,
+ Gray38 = 0x616161,
+ Grey38 = 0x616161,
+ Gray39 = 0x636363,
+ Grey39 = 0x636363,
+ Gray40 = 0x666666,
+ Grey40 = 0x666666,
+ Gray41 = 0x696969,
+ Grey41 = 0x696969,
+ Gray42 = 0x6b6b6b,
+ Grey42 = 0x6b6b6b,
+ Gray43 = 0x6e6e6e,
+ Grey43 = 0x6e6e6e,
+ Gray44 = 0x707070,
+ Grey44 = 0x707070,
+ Gray45 = 0x737373,
+ Grey45 = 0x737373,
+ Gray46 = 0x757575,
+ Grey46 = 0x757575,
+ Gray47 = 0x787878,
+ Grey47 = 0x787878,
+ Gray48 = 0x7a7a7a,
+ Grey48 = 0x7a7a7a,
+ Gray49 = 0x7d7d7d,
+ Grey49 = 0x7d7d7d,
+ Gray50 = 0x7f7f7f,
+ Grey50 = 0x7f7f7f,
+ Gray51 = 0x828282,
+ Grey51 = 0x828282,
+ Gray52 = 0x858585,
+ Grey52 = 0x858585,
+ Gray53 = 0x878787,
+ Grey53 = 0x878787,
+ Gray54 = 0x8a8a8a,
+ Grey54 = 0x8a8a8a,
+ Gray55 = 0x8c8c8c,
+ Grey55 = 0x8c8c8c,
+ Gray56 = 0x8f8f8f,
+ Grey56 = 0x8f8f8f,
+ Gray57 = 0x919191,
+ Grey57 = 0x919191,
+ Gray58 = 0x949494,
+ Grey58 = 0x949494,
+ Gray59 = 0x969696,
+ Grey59 = 0x969696,
+ Gray60 = 0x999999,
+ Grey60 = 0x999999,
+ Gray61 = 0x9c9c9c,
+ Grey61 = 0x9c9c9c,
+ Gray62 = 0x9e9e9e,
+ Grey62 = 0x9e9e9e,
+ Gray63 = 0xa1a1a1,
+ Grey63 = 0xa1a1a1,
+ Gray64 = 0xa3a3a3,
+ Grey64 = 0xa3a3a3,
+ Gray65 = 0xa6a6a6,
+ Grey65 = 0xa6a6a6,
+ Gray66 = 0xa8a8a8,
+ Grey66 = 0xa8a8a8,
+ Gray67 = 0xababab,
+ Grey67 = 0xababab,
+ Gray68 = 0xadadad,
+ Grey68 = 0xadadad,
+ Gray69 = 0xb0b0b0,
+ Grey69 = 0xb0b0b0,
+ Gray70 = 0xb3b3b3,
+ Grey70 = 0xb3b3b3,
+ Gray71 = 0xb5b5b5,
+ Grey71 = 0xb5b5b5,
+ Gray72 = 0xb8b8b8,
+ Grey72 = 0xb8b8b8,
+ Gray73 = 0xbababa,
+ Grey73 = 0xbababa,
+ Gray74 = 0xbdbdbd,
+ Grey74 = 0xbdbdbd,
+ Gray75 = 0xbfbfbf,
+ Grey75 = 0xbfbfbf,
+ Gray76 = 0xc2c2c2,
+ Grey76 = 0xc2c2c2,
+ Gray77 = 0xc4c4c4,
+ Grey77 = 0xc4c4c4,
+ Gray78 = 0xc7c7c7,
+ Grey78 = 0xc7c7c7,
+ Gray79 = 0xc9c9c9,
+ Grey79 = 0xc9c9c9,
+ Gray80 = 0xcccccc,
+ Grey80 = 0xcccccc,
+ Gray81 = 0xcfcfcf,
+ Grey81 = 0xcfcfcf,
+ Gray82 = 0xd1d1d1,
+ Grey82 = 0xd1d1d1,
+ Gray83 = 0xd4d4d4,
+ Grey83 = 0xd4d4d4,
+ Gray84 = 0xd6d6d6,
+ Grey84 = 0xd6d6d6,
+ Gray85 = 0xd9d9d9,
+ Grey85 = 0xd9d9d9,
+ Gray86 = 0xdbdbdb,
+ Grey86 = 0xdbdbdb,
+ Gray87 = 0xdedede,
+ Grey87 = 0xdedede,
+ Gray88 = 0xe0e0e0,
+ Grey88 = 0xe0e0e0,
+ Gray89 = 0xe3e3e3,
+ Grey89 = 0xe3e3e3,
+ Gray90 = 0xe5e5e5,
+ Grey90 = 0xe5e5e5,
+ Gray91 = 0xe8e8e8,
+ Grey91 = 0xe8e8e8,
+ Gray92 = 0xebebeb,
+ Grey92 = 0xebebeb,
+ Gray93 = 0xededed,
+ Grey93 = 0xededed,
+ Gray94 = 0xf0f0f0,
+ Grey94 = 0xf0f0f0,
+ Gray95 = 0xf2f2f2,
+ Grey95 = 0xf2f2f2,
+ Gray96 = 0xf5f5f5,
+ Grey96 = 0xf5f5f5,
+ Gray97 = 0xf7f7f7,
+ Grey97 = 0xf7f7f7,
+ Gray98 = 0xfafafa,
+ Grey98 = 0xfafafa,
+ Gray99 = 0xfcfcfc,
+ Grey99 = 0xfcfcfc,
+ Gray100 = 0xffffff,
+ Grey100 = 0xffffff,
+ DarkGrey = 0xa9a9a9,
+ DarkGray = 0xa9a9a9,
+ DarkBlue = 0x00008b,
+ DarkCyan = 0x008b8b,
+ DarkMagenta = 0x8b008b,
+ DarkRed = 0x8b0000,
+ LightGreen = 0x90ee90,
+ Crimson = 0xdc143c,
+ Indigo = 0x4b0082,
+ Olive = 0x808000,
+ RebeccaPurple = 0x663399,
+ Silver = 0xc0c0c0,
+ Teal = 0x008080,
+};
+};
+}
+
+#endif
diff --git a/3rdparty/tracy/tracy/common/TracyForceInline.hpp b/3rdparty/tracy/tracy/common/TracyForceInline.hpp
new file mode 100644
index 0000000..b6a5833
--- /dev/null
+++ b/3rdparty/tracy/tracy/common/TracyForceInline.hpp
@@ -0,0 +1,20 @@
+#ifndef __TRACYFORCEINLINE_HPP__
+#define __TRACYFORCEINLINE_HPP__
+
+#if defined(__GNUC__)
+# define tracy_force_inline __attribute__((always_inline)) inline
+#elif defined(_MSC_VER)
+# define tracy_force_inline __forceinline
+#else
+# define tracy_force_inline inline
+#endif
+
+#if defined(__GNUC__)
+# define tracy_no_inline __attribute__((noinline))
+#elif defined(_MSC_VER)
+# define tracy_no_inline __declspec(noinline)
+#else
+# define tracy_no_inline
+#endif
+
+#endif
diff --git a/3rdparty/tracy/tracy/common/TracyMutex.hpp b/3rdparty/tracy/tracy/common/TracyMutex.hpp
new file mode 100644
index 0000000..57fb01a
--- /dev/null
+++ b/3rdparty/tracy/tracy/common/TracyMutex.hpp
@@ -0,0 +1,24 @@
+#ifndef __TRACYMUTEX_HPP__
+#define __TRACYMUTEX_HPP__
+
+#if defined _MSC_VER
+
+# include <shared_mutex>
+
+namespace tracy
+{
+using TracyMutex = std::shared_mutex;
+}
+
+#else
+
+#include <mutex>
+
+namespace tracy
+{
+using TracyMutex = std::mutex;
+}
+
+#endif
+
+#endif
diff --git a/3rdparty/tracy/tracy/common/TracyProtocol.hpp b/3rdparty/tracy/tracy/common/TracyProtocol.hpp
new file mode 100644
index 0000000..a647b03
--- /dev/null
+++ b/3rdparty/tracy/tracy/common/TracyProtocol.hpp
@@ -0,0 +1,139 @@
+#ifndef __TRACYPROTOCOL_HPP__
+#define __TRACYPROTOCOL_HPP__
+
+#include <limits>
+#include <stdint.h>
+
+namespace tracy
+{
+
+constexpr unsigned Lz4CompressBound( unsigned isize ) { return isize + ( isize / 255 ) + 16; }
+
+enum : uint32_t { ProtocolVersion = 55 };
+enum : uint16_t { BroadcastVersion = 2 };
+
+using lz4sz_t = uint32_t;
+
+enum { TargetFrameSize = 256 * 1024 };
+enum { LZ4Size = Lz4CompressBound( TargetFrameSize ) };
+static_assert( LZ4Size <= std::numeric_limits<lz4sz_t>::max(), "LZ4Size greater than lz4sz_t" );
+static_assert( TargetFrameSize * 2 >= 64 * 1024, "Not enough space for LZ4 stream buffer" );
+
+enum { HandshakeShibbolethSize = 8 };
+static const char HandshakeShibboleth[HandshakeShibbolethSize] = { 'T', 'r', 'a', 'c', 'y', 'P', 'r', 'f' };
+
+enum HandshakeStatus : uint8_t
+{
+ HandshakePending,
+ HandshakeWelcome,
+ HandshakeProtocolMismatch,
+ HandshakeNotAvailable,
+ HandshakeDropped
+};
+
+enum { WelcomeMessageProgramNameSize = 64 };
+enum { WelcomeMessageHostInfoSize = 1024 };
+
+#pragma pack( 1 )
+
+// Must increase left query space after handling!
+enum ServerQuery : uint8_t
+{
+ ServerQueryTerminate,
+ ServerQueryString,
+ ServerQueryThreadString,
+ ServerQuerySourceLocation,
+ ServerQueryPlotName,
+ ServerQueryCallstackFrame,
+ ServerQueryFrameName,
+ ServerQueryDisconnect,
+ ServerQueryExternalName,
+ ServerQueryParameter,
+ ServerQuerySymbol,
+ ServerQuerySymbolCode,
+ ServerQueryCodeLocation,
+ ServerQuerySourceCode,
+ ServerQueryDataTransfer,
+ ServerQueryDataTransferPart,
+ ServerQueryFiberName
+};
+
+struct ServerQueryPacket
+{
+ ServerQuery type;
+ uint64_t ptr;
+ uint32_t extra;
+};
+
+enum { ServerQueryPacketSize = sizeof( ServerQueryPacket ) };
+
+
+enum CpuArchitecture : uint8_t
+{
+ CpuArchUnknown,
+ CpuArchX86,
+ CpuArchX64,
+ CpuArchArm32,
+ CpuArchArm64
+};
+
+
+struct WelcomeFlag
+{
+ enum _t : uint8_t
+ {
+ OnDemand = 1 << 0,
+ IsApple = 1 << 1,
+ CodeTransfer = 1 << 2,
+ CombineSamples = 1 << 3,
+ IdentifySamples = 1 << 4,
+ };
+};
+
+struct WelcomeMessage
+{
+ double timerMul;
+ int64_t initBegin;
+ int64_t initEnd;
+ uint64_t delay;
+ uint64_t resolution;
+ uint64_t epoch;
+ uint64_t exectime;
+ uint64_t pid;
+ int64_t samplingPeriod;
+ uint8_t flags;
+ uint8_t cpuArch;
+ char cpuManufacturer[12];
+ uint32_t cpuId;
+ char programName[WelcomeMessageProgramNameSize];
+ char hostInfo[WelcomeMessageHostInfoSize];
+};
+
+enum { WelcomeMessageSize = sizeof( WelcomeMessage ) };
+
+
+struct OnDemandPayloadMessage
+{
+ uint64_t frames;
+ uint64_t currentTime;
+};
+
+enum { OnDemandPayloadMessageSize = sizeof( OnDemandPayloadMessage ) };
+
+
+struct BroadcastMessage
+{
+ uint16_t broadcastVersion;
+ uint16_t listenPort;
+ uint32_t protocolVersion;
+ int32_t activeTime; // in seconds
+ char programName[WelcomeMessageProgramNameSize];
+};
+
+enum { BroadcastMessageSize = sizeof( BroadcastMessage ) };
+
+#pragma pack()
+
+}
+
+#endif
diff --git a/3rdparty/tracy/tracy/common/TracyQueue.hpp b/3rdparty/tracy/tracy/common/TracyQueue.hpp
new file mode 100644
index 0000000..4deb191
--- /dev/null
+++ b/3rdparty/tracy/tracy/common/TracyQueue.hpp
@@ -0,0 +1,850 @@
+#ifndef __TRACYQUEUE_HPP__
+#define __TRACYQUEUE_HPP__
+
+#include <stdint.h>
+
+namespace tracy
+{
+
+enum class QueueType : uint8_t
+{
+ ZoneText,
+ ZoneName,
+ Message,
+ MessageColor,
+ MessageCallstack,
+ MessageColorCallstack,
+ MessageAppInfo,
+ ZoneBeginAllocSrcLoc,
+ ZoneBeginAllocSrcLocCallstack,
+ CallstackSerial,
+ Callstack,
+ CallstackAlloc,
+ CallstackSample,
+ CallstackSampleContextSwitch,
+ FrameImage,
+ ZoneBegin,
+ ZoneBeginCallstack,
+ ZoneEnd,
+ LockWait,
+ LockObtain,
+ LockRelease,
+ LockSharedWait,
+ LockSharedObtain,
+ LockSharedRelease,
+ LockName,
+ MemAlloc,
+ MemAllocNamed,
+ MemFree,
+ MemFreeNamed,
+ MemAllocCallstack,
+ MemAllocCallstackNamed,
+ MemFreeCallstack,
+ MemFreeCallstackNamed,
+ GpuZoneBegin,
+ GpuZoneBeginCallstack,
+ GpuZoneBeginAllocSrcLoc,
+ GpuZoneBeginAllocSrcLocCallstack,
+ GpuZoneEnd,
+ GpuZoneBeginSerial,
+ GpuZoneBeginCallstackSerial,
+ GpuZoneBeginAllocSrcLocSerial,
+ GpuZoneBeginAllocSrcLocCallstackSerial,
+ GpuZoneEndSerial,
+ PlotData,
+ ContextSwitch,
+ ThreadWakeup,
+ GpuTime,
+ GpuContextName,
+ CallstackFrameSize,
+ SymbolInformation,
+ CodeInformation,
+ ExternalNameMetadata,
+ SymbolCodeMetadata,
+ FiberEnter,
+ FiberLeave,
+ Terminate,
+ KeepAlive,
+ ThreadContext,
+ GpuCalibration,
+ Crash,
+ CrashReport,
+ ZoneValidation,
+ ZoneColor,
+ ZoneValue,
+ FrameMarkMsg,
+ FrameMarkMsgStart,
+ FrameMarkMsgEnd,
+ SourceLocation,
+ LockAnnounce,
+ LockTerminate,
+ LockMark,
+ MessageLiteral,
+ MessageLiteralColor,
+ MessageLiteralCallstack,
+ MessageLiteralColorCallstack,
+ GpuNewContext,
+ CallstackFrame,
+ SysTimeReport,
+ TidToPid,
+ HwSampleCpuCycle,
+ HwSampleInstructionRetired,
+ HwSampleCacheReference,
+ HwSampleCacheMiss,
+ HwSampleBranchRetired,
+ HwSampleBranchMiss,
+ PlotConfig,
+ ParamSetup,
+ AckServerQueryNoop,
+ AckSourceCodeNotAvailable,
+ AckSymbolCodeNotAvailable,
+ CpuTopology,
+ SingleStringData,
+ SecondStringData,
+ MemNamePayload,
+ StringData,
+ ThreadName,
+ PlotName,
+ SourceLocationPayload,
+ CallstackPayload,
+ CallstackAllocPayload,
+ FrameName,
+ FrameImageData,
+ ExternalName,
+ ExternalThreadName,
+ SymbolCode,
+ SourceCode,
+ FiberName,
+ NUM_TYPES
+};
+
+#pragma pack( 1 )
+
+struct QueueThreadContext
+{
+ uint32_t thread;
+};
+
+struct QueueZoneBeginLean
+{
+ int64_t time;
+};
+
+struct QueueZoneBegin : public QueueZoneBeginLean
+{
+ uint64_t srcloc; // ptr
+};
+
+struct QueueZoneBeginThread : public QueueZoneBegin
+{
+ uint32_t thread;
+};
+
+struct QueueZoneEnd
+{
+ int64_t time;
+};
+
+struct QueueZoneEndThread : public QueueZoneEnd
+{
+ uint32_t thread;
+};
+
+struct QueueZoneValidation
+{
+ uint32_t id;
+};
+
+struct QueueZoneValidationThread : public QueueZoneValidation
+{
+ uint32_t thread;
+};
+
+struct QueueZoneColor
+{
+ uint8_t r;
+ uint8_t g;
+ uint8_t b;
+};
+
+struct QueueZoneColorThread : public QueueZoneColor
+{
+ uint32_t thread;
+};
+
+struct QueueZoneValue
+{
+ uint64_t value;
+};
+
+struct QueueZoneValueThread : public QueueZoneValue
+{
+ uint32_t thread;
+};
+
+struct QueueStringTransfer
+{
+ uint64_t ptr;
+};
+
+struct QueueFrameMark
+{
+ int64_t time;
+ uint64_t name; // ptr
+};
+
+struct QueueFrameImage
+{
+ uint32_t frame;
+ uint16_t w;
+ uint16_t h;
+ uint8_t flip;
+};
+
+struct QueueFrameImageFat : public QueueFrameImage
+{
+ uint64_t image; // ptr
+};
+
+struct QueueSourceLocation
+{
+ uint64_t name;
+ uint64_t function; // ptr
+ uint64_t file; // ptr
+ uint32_t line;
+ uint8_t r;
+ uint8_t g;
+ uint8_t b;
+};
+
+struct QueueZoneTextFat
+{
+ uint64_t text; // ptr
+ uint16_t size;
+};
+
+struct QueueZoneTextFatThread : public QueueZoneTextFat
+{
+ uint32_t thread;
+};
+
+enum class LockType : uint8_t
+{
+ Lockable,
+ SharedLockable
+};
+
+struct QueueLockAnnounce
+{
+ uint32_t id;
+ int64_t time;
+ uint64_t lckloc; // ptr
+ LockType type;
+};
+
+struct QueueFiberEnter
+{
+ int64_t time;
+ uint64_t fiber; // ptr
+ uint32_t thread;
+};
+
+struct QueueFiberLeave
+{
+ int64_t time;
+ uint32_t thread;
+};
+
+struct QueueLockTerminate
+{
+ uint32_t id;
+ int64_t time;
+};
+
+struct QueueLockWait
+{
+ uint32_t thread;
+ uint32_t id;
+ int64_t time;
+};
+
+struct QueueLockObtain
+{
+ uint32_t thread;
+ uint32_t id;
+ int64_t time;
+};
+
+struct QueueLockRelease
+{
+ uint32_t thread;
+ uint32_t id;
+ int64_t time;
+};
+
+struct QueueLockMark
+{
+ uint32_t thread;
+ uint32_t id;
+ uint64_t srcloc; // ptr
+};
+
+struct QueueLockName
+{
+ uint32_t id;
+};
+
+struct QueueLockNameFat : public QueueLockName
+{
+ uint64_t name; // ptr
+ uint16_t size;
+};
+
+enum class PlotDataType : uint8_t
+{
+ Float,
+ Double,
+ Int
+};
+
+struct QueuePlotData
+{
+ uint64_t name; // ptr
+ int64_t time;
+ PlotDataType type;
+ union
+ {
+ double d;
+ float f;
+ int64_t i;
+ } data;
+};
+
+struct QueueMessage
+{
+ int64_t time;
+};
+
+struct QueueMessageColor : public QueueMessage
+{
+ uint8_t r;
+ uint8_t g;
+ uint8_t b;
+};
+
+struct QueueMessageLiteral : public QueueMessage
+{
+ uint64_t text; // ptr
+};
+
+struct QueueMessageLiteralThread : public QueueMessageLiteral
+{
+ uint32_t thread;
+};
+
+struct QueueMessageColorLiteral : public QueueMessageColor
+{
+ uint64_t text; // ptr
+};
+
+struct QueueMessageColorLiteralThread : public QueueMessageColorLiteral
+{
+ uint32_t thread;
+};
+
+struct QueueMessageFat : public QueueMessage
+{
+ uint64_t text; // ptr
+ uint16_t size;
+};
+
+struct QueueMessageFatThread : public QueueMessageFat
+{
+ uint32_t thread;
+};
+
+struct QueueMessageColorFat : public QueueMessageColor
+{
+ uint64_t text; // ptr
+ uint16_t size;
+};
+
+struct QueueMessageColorFatThread : public QueueMessageColorFat
+{
+ uint32_t thread;
+};
+
+// Don't change order, only add new entries at the end, this is also used on trace dumps!
+enum class GpuContextType : uint8_t
+{
+ Invalid,
+ OpenGl,
+ Vulkan,
+ OpenCL,
+ Direct3D12,
+ Direct3D11
+};
+
+enum GpuContextFlags : uint8_t
+{
+ GpuContextCalibration = 1 << 0
+};
+
+struct QueueGpuNewContext
+{
+ int64_t cpuTime;
+ int64_t gpuTime;
+ uint32_t thread;
+ float period;
+ uint8_t context;
+ GpuContextFlags flags;
+ GpuContextType type;
+};
+
+struct QueueGpuZoneBeginLean
+{
+ int64_t cpuTime;
+ uint32_t thread;
+ uint16_t queryId;
+ uint8_t context;
+};
+
+struct QueueGpuZoneBegin : public QueueGpuZoneBeginLean
+{
+ uint64_t srcloc;
+};
+
+struct QueueGpuZoneEnd
+{
+ int64_t cpuTime;
+ uint32_t thread;
+ uint16_t queryId;
+ uint8_t context;
+};
+
+struct QueueGpuTime
+{
+ int64_t gpuTime;
+ uint16_t queryId;
+ uint8_t context;
+};
+
+struct QueueGpuCalibration
+{
+ int64_t gpuTime;
+ int64_t cpuTime;
+ int64_t cpuDelta;
+ uint8_t context;
+};
+
+struct QueueGpuContextName
+{
+ uint8_t context;
+};
+
+struct QueueGpuContextNameFat : public QueueGpuContextName
+{
+ uint64_t ptr;
+ uint16_t size;
+};
+
+struct QueueMemNamePayload
+{
+ uint64_t name;
+};
+
+struct QueueMemAlloc
+{
+ int64_t time;
+ uint32_t thread;
+ uint64_t ptr;
+ char size[6];
+};
+
+struct QueueMemFree
+{
+ int64_t time;
+ uint32_t thread;
+ uint64_t ptr;
+};
+
+struct QueueCallstackFat
+{
+ uint64_t ptr;
+};
+
+struct QueueCallstackFatThread : public QueueCallstackFat
+{
+ uint32_t thread;
+};
+
+struct QueueCallstackAllocFat
+{
+ uint64_t ptr;
+ uint64_t nativePtr;
+};
+
+struct QueueCallstackAllocFatThread : public QueueCallstackAllocFat
+{
+ uint32_t thread;
+};
+
+struct QueueCallstackSample
+{
+ int64_t time;
+ uint32_t thread;
+};
+
+struct QueueCallstackSampleFat : public QueueCallstackSample
+{
+ uint64_t ptr;
+};
+
+struct QueueCallstackFrameSize
+{
+ uint64_t ptr;
+ uint8_t size;
+};
+
+struct QueueCallstackFrameSizeFat : public QueueCallstackFrameSize
+{
+ uint64_t data;
+ uint64_t imageName;
+};
+
+struct QueueCallstackFrame
+{
+ uint32_t line;
+ uint64_t symAddr;
+ uint32_t symLen;
+};
+
+struct QueueSymbolInformation
+{
+ uint32_t line;
+ uint64_t symAddr;
+};
+
+struct QueueSymbolInformationFat : public QueueSymbolInformation
+{
+ uint64_t fileString;
+ uint8_t needFree;
+};
+
+struct QueueCodeInformation
+{
+ uint64_t symAddr;
+ uint32_t line;
+ uint64_t ptrOffset;
+};
+
+struct QueueCodeInformationFat : public QueueCodeInformation
+{
+ uint64_t fileString;
+ uint8_t needFree;
+};
+
+struct QueueCrashReport
+{
+ int64_t time;
+ uint64_t text; // ptr
+};
+
+struct QueueCrashReportThread
+{
+ uint32_t thread;
+};
+
+struct QueueSysTime
+{
+ int64_t time;
+ float sysTime;
+};
+
+struct QueueContextSwitch
+{
+ int64_t time;
+ uint32_t oldThread;
+ uint32_t newThread;
+ uint8_t cpu;
+ uint8_t reason;
+ uint8_t state;
+};
+
+struct QueueThreadWakeup
+{
+ int64_t time;
+ uint32_t thread;
+};
+
+struct QueueTidToPid
+{
+ uint64_t tid;
+ uint64_t pid;
+};
+
+struct QueueHwSample
+{
+ uint64_t ip;
+ int64_t time;
+};
+
+enum class PlotFormatType : uint8_t
+{
+ Number,
+ Memory,
+ Percentage
+};
+
+struct QueuePlotConfig
+{
+ uint64_t name; // ptr
+ uint8_t type;
+};
+
+struct QueueParamSetup
+{
+ uint32_t idx;
+ uint64_t name; // ptr
+ uint8_t isBool;
+ int32_t val;
+};
+
+struct QueueCpuTopology
+{
+ uint32_t package;
+ uint32_t core;
+ uint32_t thread;
+};
+
+struct QueueExternalNameMetadata
+{
+ uint64_t thread;
+ uint64_t name;
+ uint64_t threadName;
+};
+
+struct QueueSymbolCodeMetadata
+{
+ uint64_t symbol;
+ uint64_t ptr;
+ uint32_t size;
+};
+
+struct QueueHeader
+{
+ union
+ {
+ QueueType type;
+ uint8_t idx;
+ };
+};
+
+struct QueueItem
+{
+ QueueHeader hdr;
+ union
+ {
+ QueueThreadContext threadCtx;
+ QueueZoneBegin zoneBegin;
+ QueueZoneBeginLean zoneBeginLean;
+ QueueZoneBeginThread zoneBeginThread;
+ QueueZoneEnd zoneEnd;
+ QueueZoneEndThread zoneEndThread;
+ QueueZoneValidation zoneValidation;
+ QueueZoneValidationThread zoneValidationThread;
+ QueueZoneColor zoneColor;
+ QueueZoneColorThread zoneColorThread;
+ QueueZoneValue zoneValue;
+ QueueZoneValueThread zoneValueThread;
+ QueueStringTransfer stringTransfer;
+ QueueFrameMark frameMark;
+ QueueFrameImage frameImage;
+ QueueFrameImageFat frameImageFat;
+ QueueSourceLocation srcloc;
+ QueueZoneTextFat zoneTextFat;
+ QueueZoneTextFatThread zoneTextFatThread;
+ QueueLockAnnounce lockAnnounce;
+ QueueLockTerminate lockTerminate;
+ QueueLockWait lockWait;
+ QueueLockObtain lockObtain;
+ QueueLockRelease lockRelease;
+ QueueLockMark lockMark;
+ QueueLockName lockName;
+ QueueLockNameFat lockNameFat;
+ QueuePlotData plotData;
+ QueueMessage message;
+ QueueMessageColor messageColor;
+ QueueMessageLiteral messageLiteral;
+ QueueMessageLiteralThread messageLiteralThread;
+ QueueMessageColorLiteral messageColorLiteral;
+ QueueMessageColorLiteralThread messageColorLiteralThread;
+ QueueMessageFat messageFat;
+ QueueMessageFatThread messageFatThread;
+ QueueMessageColorFat messageColorFat;
+ QueueMessageColorFatThread messageColorFatThread;
+ QueueGpuNewContext gpuNewContext;
+ QueueGpuZoneBegin gpuZoneBegin;
+ QueueGpuZoneBeginLean gpuZoneBeginLean;
+ QueueGpuZoneEnd gpuZoneEnd;
+ QueueGpuTime gpuTime;
+ QueueGpuCalibration gpuCalibration;
+ QueueGpuContextName gpuContextName;
+ QueueGpuContextNameFat gpuContextNameFat;
+ QueueMemAlloc memAlloc;
+ QueueMemFree memFree;
+ QueueMemNamePayload memName;
+ QueueCallstackFat callstackFat;
+ QueueCallstackFatThread callstackFatThread;
+ QueueCallstackAllocFat callstackAllocFat;
+ QueueCallstackAllocFatThread callstackAllocFatThread;
+ QueueCallstackSample callstackSample;
+ QueueCallstackSampleFat callstackSampleFat;
+ QueueCallstackFrameSize callstackFrameSize;
+ QueueCallstackFrameSizeFat callstackFrameSizeFat;
+ QueueCallstackFrame callstackFrame;
+ QueueSymbolInformation symbolInformation;
+ QueueSymbolInformationFat symbolInformationFat;
+ QueueCodeInformation codeInformation;
+ QueueCodeInformationFat codeInformationFat;
+ QueueCrashReport crashReport;
+ QueueCrashReportThread crashReportThread;
+ QueueSysTime sysTime;
+ QueueContextSwitch contextSwitch;
+ QueueThreadWakeup threadWakeup;
+ QueueTidToPid tidToPid;
+ QueueHwSample hwSample;
+ QueuePlotConfig plotConfig;
+ QueueParamSetup paramSetup;
+ QueueCpuTopology cpuTopology;
+ QueueExternalNameMetadata externalNameMetadata;
+ QueueSymbolCodeMetadata symbolCodeMetadata;
+ QueueFiberEnter fiberEnter;
+ QueueFiberLeave fiberLeave;
+ };
+};
+#pragma pack()
+
+
+enum { QueueItemSize = sizeof( QueueItem ) };
+
+static constexpr size_t QueueDataSize[] = {
+ sizeof( QueueHeader ), // zone text
+ sizeof( QueueHeader ), // zone name
+ sizeof( QueueHeader ) + sizeof( QueueMessage ),
+ sizeof( QueueHeader ) + sizeof( QueueMessageColor ),
+ sizeof( QueueHeader ) + sizeof( QueueMessage ), // callstack
+ sizeof( QueueHeader ) + sizeof( QueueMessageColor ), // callstack
+ sizeof( QueueHeader ) + sizeof( QueueMessage ), // app info
+ sizeof( QueueHeader ) + sizeof( QueueZoneBeginLean ), // allocated source location
+ sizeof( QueueHeader ) + sizeof( QueueZoneBeginLean ), // allocated source location, callstack
+ sizeof( QueueHeader ), // callstack memory
+ sizeof( QueueHeader ), // callstack
+ sizeof( QueueHeader ), // callstack alloc
+ sizeof( QueueHeader ) + sizeof( QueueCallstackSample ),
+ sizeof( QueueHeader ) + sizeof( QueueCallstackSample ), // context switch
+ sizeof( QueueHeader ) + sizeof( QueueFrameImage ),
+ sizeof( QueueHeader ) + sizeof( QueueZoneBegin ),
+ sizeof( QueueHeader ) + sizeof( QueueZoneBegin ), // callstack
+ sizeof( QueueHeader ) + sizeof( QueueZoneEnd ),
+ sizeof( QueueHeader ) + sizeof( QueueLockWait ),
+ sizeof( QueueHeader ) + sizeof( QueueLockObtain ),
+ sizeof( QueueHeader ) + sizeof( QueueLockRelease ),
+ sizeof( QueueHeader ) + sizeof( QueueLockWait ), // shared
+ sizeof( QueueHeader ) + sizeof( QueueLockObtain ), // shared
+ sizeof( QueueHeader ) + sizeof( QueueLockRelease ), // shared
+ sizeof( QueueHeader ) + sizeof( QueueLockName ),
+ sizeof( QueueHeader ) + sizeof( QueueMemAlloc ),
+ sizeof( QueueHeader ) + sizeof( QueueMemAlloc ), // named
+ sizeof( QueueHeader ) + sizeof( QueueMemFree ),
+ sizeof( QueueHeader ) + sizeof( QueueMemFree ), // named
+ sizeof( QueueHeader ) + sizeof( QueueMemAlloc ), // callstack
+ sizeof( QueueHeader ) + sizeof( QueueMemAlloc ), // callstack, named
+ sizeof( QueueHeader ) + sizeof( QueueMemFree ), // callstack
+ sizeof( QueueHeader ) + sizeof( QueueMemFree ), // callstack, named
+ sizeof( QueueHeader ) + sizeof( QueueGpuZoneBegin ),
+ sizeof( QueueHeader ) + sizeof( QueueGpuZoneBegin ), // callstack
+ sizeof( QueueHeader ) + sizeof( QueueGpuZoneBeginLean ),// allocated source location
+ sizeof( QueueHeader ) + sizeof( QueueGpuZoneBeginLean ),// allocated source location, callstack
+ sizeof( QueueHeader ) + sizeof( QueueGpuZoneEnd ),
+ sizeof( QueueHeader ) + sizeof( QueueGpuZoneBegin ), // serial
+ sizeof( QueueHeader ) + sizeof( QueueGpuZoneBegin ), // serial, callstack
+ sizeof( QueueHeader ) + sizeof( QueueGpuZoneBeginLean ),// serial, allocated source location
+ sizeof( QueueHeader ) + sizeof( QueueGpuZoneBeginLean ),// serial, allocated source location, callstack
+ sizeof( QueueHeader ) + sizeof( QueueGpuZoneEnd ), // serial
+ sizeof( QueueHeader ) + sizeof( QueuePlotData ),
+ sizeof( QueueHeader ) + sizeof( QueueContextSwitch ),
+ sizeof( QueueHeader ) + sizeof( QueueThreadWakeup ),
+ sizeof( QueueHeader ) + sizeof( QueueGpuTime ),
+ sizeof( QueueHeader ) + sizeof( QueueGpuContextName ),
+ sizeof( QueueHeader ) + sizeof( QueueCallstackFrameSize ),
+ sizeof( QueueHeader ) + sizeof( QueueSymbolInformation ),
+ sizeof( QueueHeader ) + sizeof( QueueCodeInformation ),
+ sizeof( QueueHeader ), // ExternalNameMetadata - not for wire transfer
+ sizeof( QueueHeader ), // SymbolCodeMetadata - not for wire transfer
+ sizeof( QueueHeader ) + sizeof( QueueFiberEnter ),
+ sizeof( QueueHeader ) + sizeof( QueueFiberLeave ),
+ // above items must be first
+ sizeof( QueueHeader ), // terminate
+ sizeof( QueueHeader ), // keep alive
+ sizeof( QueueHeader ) + sizeof( QueueThreadContext ),
+ sizeof( QueueHeader ) + sizeof( QueueGpuCalibration ),
+ sizeof( QueueHeader ), // crash
+ sizeof( QueueHeader ) + sizeof( QueueCrashReport ),
+ sizeof( QueueHeader ) + sizeof( QueueZoneValidation ),
+ sizeof( QueueHeader ) + sizeof( QueueZoneColor ),
+ sizeof( QueueHeader ) + sizeof( QueueZoneValue ),
+ sizeof( QueueHeader ) + sizeof( QueueFrameMark ), // continuous frames
+ sizeof( QueueHeader ) + sizeof( QueueFrameMark ), // start
+ sizeof( QueueHeader ) + sizeof( QueueFrameMark ), // end
+ sizeof( QueueHeader ) + sizeof( QueueSourceLocation ),
+ sizeof( QueueHeader ) + sizeof( QueueLockAnnounce ),
+ sizeof( QueueHeader ) + sizeof( QueueLockTerminate ),
+ sizeof( QueueHeader ) + sizeof( QueueLockMark ),
+ sizeof( QueueHeader ) + sizeof( QueueMessageLiteral ),
+ sizeof( QueueHeader ) + sizeof( QueueMessageColorLiteral ),
+ sizeof( QueueHeader ) + sizeof( QueueMessageLiteral ), // callstack
+ sizeof( QueueHeader ) + sizeof( QueueMessageColorLiteral ), // callstack
+ sizeof( QueueHeader ) + sizeof( QueueGpuNewContext ),
+ sizeof( QueueHeader ) + sizeof( QueueCallstackFrame ),
+ sizeof( QueueHeader ) + sizeof( QueueSysTime ),
+ sizeof( QueueHeader ) + sizeof( QueueTidToPid ),
+ sizeof( QueueHeader ) + sizeof( QueueHwSample ), // cpu cycle
+ sizeof( QueueHeader ) + sizeof( QueueHwSample ), // instruction retired
+ sizeof( QueueHeader ) + sizeof( QueueHwSample ), // cache reference
+ sizeof( QueueHeader ) + sizeof( QueueHwSample ), // cache miss
+ sizeof( QueueHeader ) + sizeof( QueueHwSample ), // branch retired
+ sizeof( QueueHeader ) + sizeof( QueueHwSample ), // branch miss
+ sizeof( QueueHeader ) + sizeof( QueuePlotConfig ),
+ sizeof( QueueHeader ) + sizeof( QueueParamSetup ),
+ sizeof( QueueHeader ), // server query acknowledgement
+ sizeof( QueueHeader ), // source code not available
+ sizeof( QueueHeader ), // symbol code not available
+ sizeof( QueueHeader ) + sizeof( QueueCpuTopology ),
+ sizeof( QueueHeader ), // single string data
+ sizeof( QueueHeader ), // second string data
+ sizeof( QueueHeader ) + sizeof( QueueMemNamePayload ),
+ // keep all QueueStringTransfer below
+ sizeof( QueueHeader ) + sizeof( QueueStringTransfer ), // string data
+ sizeof( QueueHeader ) + sizeof( QueueStringTransfer ), // thread name
+ sizeof( QueueHeader ) + sizeof( QueueStringTransfer ), // plot name
+ sizeof( QueueHeader ) + sizeof( QueueStringTransfer ), // allocated source location payload
+ sizeof( QueueHeader ) + sizeof( QueueStringTransfer ), // callstack payload
+ sizeof( QueueHeader ) + sizeof( QueueStringTransfer ), // callstack alloc payload
+ sizeof( QueueHeader ) + sizeof( QueueStringTransfer ), // frame name
+ sizeof( QueueHeader ) + sizeof( QueueStringTransfer ), // frame image data
+ sizeof( QueueHeader ) + sizeof( QueueStringTransfer ), // external name
+ sizeof( QueueHeader ) + sizeof( QueueStringTransfer ), // external thread name
+ sizeof( QueueHeader ) + sizeof( QueueStringTransfer ), // symbol code
+ sizeof( QueueHeader ) + sizeof( QueueStringTransfer ), // source code
+ sizeof( QueueHeader ) + sizeof( QueueStringTransfer ), // fiber name
+};
+
+static_assert( QueueItemSize == 32, "Queue item size not 32 bytes" );
+static_assert( sizeof( QueueDataSize ) / sizeof( size_t ) == (uint8_t)QueueType::NUM_TYPES, "QueueDataSize mismatch" );
+static_assert( sizeof( void* ) <= sizeof( uint64_t ), "Pointer size > 8 bytes" );
+static_assert( sizeof( void* ) == sizeof( uintptr_t ), "Pointer size != uintptr_t" );
+
+}
+
+#endif
diff --git a/3rdparty/tracy/tracy/common/TracySocket.cpp b/3rdparty/tracy/tracy/common/TracySocket.cpp
new file mode 100644
index 0000000..176bbc7
--- /dev/null
+++ b/3rdparty/tracy/tracy/common/TracySocket.cpp
@@ -0,0 +1,749 @@
+#include <assert.h>
+#include <inttypes.h>
+#include <new>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/types.h>
+
+#include "TracyAlloc.hpp"
+#include "TracySocket.hpp"
+#include "TracySystem.hpp"
+
+#ifdef _WIN32
+# ifndef NOMINMAX
+# define NOMINMAX
+# endif
+# include <winsock2.h>
+# include <ws2tcpip.h>
+# ifdef _MSC_VER
+# pragma warning(disable:4244)
+# pragma warning(disable:4267)
+# endif
+# define poll WSAPoll
+#else
+# include <arpa/inet.h>
+# include <sys/socket.h>
+# include <sys/param.h>
+# include <errno.h>
+# include <fcntl.h>
+# include <netinet/in.h>
+# include <netdb.h>
+# include <unistd.h>
+# include <poll.h>
+#endif
+
+#ifndef MSG_NOSIGNAL
+# define MSG_NOSIGNAL 0
+#endif
+
+namespace tracy
+{
+
+#ifdef _WIN32
+typedef SOCKET socket_t;
+#else
+typedef int socket_t;
+#endif
+
+#ifdef _WIN32
+struct __wsinit
+{
+ __wsinit()
+ {
+ WSADATA wsaData;
+ if( WSAStartup( MAKEWORD( 2, 2 ), &wsaData ) != 0 )
+ {
+ fprintf( stderr, "Cannot init winsock.\n" );
+ exit( 1 );
+ }
+ }
+};
+
+void InitWinSock()
+{
+ static __wsinit init;
+}
+#endif
+
+
+enum { BufSize = 128 * 1024 };
+
+Socket::Socket()
+ : m_buf( (char*)tracy_malloc( BufSize ) )
+ , m_bufPtr( nullptr )
+ , m_sock( -1 )
+ , m_bufLeft( 0 )
+ , m_ptr( nullptr )
+{
+#ifdef _WIN32
+ InitWinSock();
+#endif
+}
+
+Socket::Socket( int sock )
+ : m_buf( (char*)tracy_malloc( BufSize ) )
+ , m_bufPtr( nullptr )
+ , m_sock( sock )
+ , m_bufLeft( 0 )
+ , m_ptr( nullptr )
+{
+}
+
+Socket::~Socket()
+{
+ tracy_free( m_buf );
+ if( m_sock.load( std::memory_order_relaxed ) != -1 )
+ {
+ Close();
+ }
+ if( m_ptr )
+ {
+ freeaddrinfo( m_res );
+#ifdef _WIN32
+ closesocket( m_connSock );
+#else
+ close( m_connSock );
+#endif
+ }
+}
+
+bool Socket::Connect( const char* addr, uint16_t port )
+{
+ assert( !IsValid() );
+
+ if( m_ptr )
+ {
+ const auto c = connect( m_connSock, m_ptr->ai_addr, m_ptr->ai_addrlen );
+ if( c == -1 )
+ {
+#if defined _WIN32
+ const auto err = WSAGetLastError();
+ if( err == WSAEALREADY || err == WSAEINPROGRESS ) return false;
+ if( err != WSAEISCONN )
+ {
+ freeaddrinfo( m_res );
+ closesocket( m_connSock );
+ m_ptr = nullptr;
+ return false;
+ }
+#else
+ const auto err = errno;
+ if( err == EALREADY || err == EINPROGRESS ) return false;
+ if( err != EISCONN )
+ {
+ freeaddrinfo( m_res );
+ close( m_connSock );
+ m_ptr = nullptr;
+ return false;
+ }
+#endif
+ }
+
+#if defined _WIN32
+ u_long nonblocking = 0;
+ ioctlsocket( m_connSock, FIONBIO, &nonblocking );
+#else
+ int flags = fcntl( m_connSock, F_GETFL, 0 );
+ fcntl( m_connSock, F_SETFL, flags & ~O_NONBLOCK );
+#endif
+ m_sock.store( m_connSock, std::memory_order_relaxed );
+ freeaddrinfo( m_res );
+ m_ptr = nullptr;
+ return true;
+ }
+
+ struct addrinfo hints;
+ struct addrinfo *res, *ptr;
+
+ memset( &hints, 0, sizeof( hints ) );
+ hints.ai_family = AF_UNSPEC;
+ hints.ai_socktype = SOCK_STREAM;
+
+ char portbuf[32];
+ sprintf( portbuf, "%" PRIu16, port );
+
+ if( getaddrinfo( addr, portbuf, &hints, &res ) != 0 ) return false;
+ int sock = 0;
+ for( ptr = res; ptr; ptr = ptr->ai_next )
+ {
+ if( ( sock = socket( ptr->ai_family, ptr->ai_socktype, ptr->ai_protocol ) ) == -1 ) continue;
+#if defined __APPLE__
+ int val = 1;
+ setsockopt( sock, SOL_SOCKET, SO_NOSIGPIPE, &val, sizeof( val ) );
+#endif
+#if defined _WIN32
+ u_long nonblocking = 1;
+ ioctlsocket( sock, FIONBIO, &nonblocking );
+#else
+ int flags = fcntl( sock, F_GETFL, 0 );
+ fcntl( sock, F_SETFL, flags | O_NONBLOCK );
+#endif
+ if( connect( sock, ptr->ai_addr, ptr->ai_addrlen ) == 0 )
+ {
+ break;
+ }
+ else
+ {
+#if defined _WIN32
+ const auto err = WSAGetLastError();
+ if( err != WSAEWOULDBLOCK )
+ {
+ closesocket( sock );
+ continue;
+ }
+#else
+ if( errno != EINPROGRESS )
+ {
+ close( sock );
+ continue;
+ }
+#endif
+ }
+ m_res = res;
+ m_ptr = ptr;
+ m_connSock = sock;
+ return false;
+ }
+ freeaddrinfo( res );
+ if( !ptr ) return false;
+
+#if defined _WIN32
+ u_long nonblocking = 0;
+ ioctlsocket( sock, FIONBIO, &nonblocking );
+#else
+ int flags = fcntl( sock, F_GETFL, 0 );
+ fcntl( sock, F_SETFL, flags & ~O_NONBLOCK );
+#endif
+
+ m_sock.store( sock, std::memory_order_relaxed );
+ return true;
+}
+
+bool Socket::ConnectBlocking( const char* addr, uint16_t port )
+{
+ assert( !IsValid() );
+ assert( !m_ptr );
+
+ struct addrinfo hints;
+ struct addrinfo *res, *ptr;
+
+ memset( &hints, 0, sizeof( hints ) );
+ hints.ai_family = AF_UNSPEC;
+ hints.ai_socktype = SOCK_STREAM;
+
+ char portbuf[32];
+ sprintf( portbuf, "%" PRIu16, port );
+
+ if( getaddrinfo( addr, portbuf, &hints, &res ) != 0 ) return false;
+ int sock = 0;
+ for( ptr = res; ptr; ptr = ptr->ai_next )
+ {
+ if( ( sock = socket( ptr->ai_family, ptr->ai_socktype, ptr->ai_protocol ) ) == -1 ) continue;
+#if defined __APPLE__
+ int val = 1;
+ setsockopt( sock, SOL_SOCKET, SO_NOSIGPIPE, &val, sizeof( val ) );
+#endif
+ if( connect( sock, ptr->ai_addr, ptr->ai_addrlen ) == -1 )
+ {
+#ifdef _WIN32
+ closesocket( sock );
+#else
+ close( sock );
+#endif
+ continue;
+ }
+ break;
+ }
+ freeaddrinfo( res );
+ if( !ptr ) return false;
+
+ m_sock.store( sock, std::memory_order_relaxed );
+ return true;
+}
+
+void Socket::Close()
+{
+ const auto sock = m_sock.load( std::memory_order_relaxed );
+ assert( sock != -1 );
+#ifdef _WIN32
+ closesocket( sock );
+#else
+ close( sock );
+#endif
+ m_sock.store( -1, std::memory_order_relaxed );
+}
+
+int Socket::Send( const void* _buf, int len )
+{
+ const auto sock = m_sock.load( std::memory_order_relaxed );
+ auto buf = (const char*)_buf;
+ assert( sock != -1 );
+ auto start = buf;
+ while( len > 0 )
+ {
+ auto ret = send( sock, buf, len, MSG_NOSIGNAL );
+ if( ret == -1 ) return -1;
+ len -= ret;
+ buf += ret;
+ }
+ return int( buf - start );
+}
+
+int Socket::GetSendBufSize()
+{
+ const auto sock = m_sock.load( std::memory_order_relaxed );
+ int bufSize;
+#if defined _WIN32
+ int sz = sizeof( bufSize );
+ getsockopt( sock, SOL_SOCKET, SO_SNDBUF, (char*)&bufSize, &sz );
+#else
+ socklen_t sz = sizeof( bufSize );
+ getsockopt( sock, SOL_SOCKET, SO_SNDBUF, &bufSize, &sz );
+#endif
+ return bufSize;
+}
+
+int Socket::RecvBuffered( void* buf, int len, int timeout )
+{
+ if( len <= m_bufLeft )
+ {
+ memcpy( buf, m_bufPtr, len );
+ m_bufPtr += len;
+ m_bufLeft -= len;
+ return len;
+ }
+
+ if( m_bufLeft > 0 )
+ {
+ memcpy( buf, m_bufPtr, m_bufLeft );
+ const auto ret = m_bufLeft;
+ m_bufLeft = 0;
+ return ret;
+ }
+
+ if( len >= BufSize ) return Recv( buf, len, timeout );
+
+ m_bufLeft = Recv( m_buf, BufSize, timeout );
+ if( m_bufLeft <= 0 ) return m_bufLeft;
+
+ const auto sz = len < m_bufLeft ? len : m_bufLeft;
+ memcpy( buf, m_buf, sz );
+ m_bufPtr = m_buf + sz;
+ m_bufLeft -= sz;
+ return sz;
+}
+
+int Socket::Recv( void* _buf, int len, int timeout )
+{
+ const auto sock = m_sock.load( std::memory_order_relaxed );
+ auto buf = (char*)_buf;
+
+ struct pollfd fd;
+ fd.fd = (socket_t)sock;
+ fd.events = POLLIN;
+
+ if( poll( &fd, 1, timeout ) > 0 )
+ {
+ return recv( sock, buf, len, 0 );
+ }
+ else
+ {
+ return -1;
+ }
+}
+
+int Socket::ReadUpTo( void* _buf, int len, int timeout )
+{
+ const auto sock = m_sock.load( std::memory_order_relaxed );
+ auto buf = (char*)_buf;
+
+ int rd = 0;
+ while( len > 0 )
+ {
+ const auto res = recv( sock, buf, len, 0 );
+ if( res == 0 ) break;
+ if( res == -1 ) return -1;
+ len -= res;
+ rd += res;
+ buf += res;
+ }
+ return rd;
+}
+
+bool Socket::Read( void* buf, int len, int timeout )
+{
+ auto cbuf = (char*)buf;
+ while( len > 0 )
+ {
+ if( !ReadImpl( cbuf, len, timeout ) ) return false;
+ }
+ return true;
+}
+
+bool Socket::ReadImpl( char*& buf, int& len, int timeout )
+{
+ const auto sz = RecvBuffered( buf, len, timeout );
+ switch( sz )
+ {
+ case 0:
+ return false;
+ case -1:
+#ifdef _WIN32
+ {
+ auto err = WSAGetLastError();
+ if( err == WSAECONNABORTED || err == WSAECONNRESET ) return false;
+ }
+#endif
+ break;
+ default:
+ len -= sz;
+ buf += sz;
+ break;
+ }
+ return true;
+}
+
+bool Socket::ReadRaw( void* _buf, int len, int timeout )
+{
+ auto buf = (char*)_buf;
+ while( len > 0 )
+ {
+ const auto sz = Recv( buf, len, timeout );
+ if( sz <= 0 ) return false;
+ len -= sz;
+ buf += sz;
+ }
+ return true;
+}
+
+bool Socket::HasData()
+{
+ const auto sock = m_sock.load( std::memory_order_relaxed );
+ if( m_bufLeft > 0 ) return true;
+
+ struct pollfd fd;
+ fd.fd = (socket_t)sock;
+ fd.events = POLLIN;
+
+ return poll( &fd, 1, 0 ) > 0;
+}
+
+bool Socket::IsValid() const
+{
+ return m_sock.load( std::memory_order_relaxed ) >= 0;
+}
+
+
+ListenSocket::ListenSocket()
+ : m_sock( -1 )
+{
+#ifdef _WIN32
+ InitWinSock();
+#endif
+}
+
+ListenSocket::~ListenSocket()
+{
+ if( m_sock != -1 ) Close();
+}
+
+static int addrinfo_and_socket_for_family( uint16_t port, int ai_family, struct addrinfo** res )
+{
+ struct addrinfo hints;
+ memset( &hints, 0, sizeof( hints ) );
+ hints.ai_family = ai_family;
+ hints.ai_socktype = SOCK_STREAM;
+#ifndef TRACY_ONLY_LOCALHOST
+ const char* onlyLocalhost = GetEnvVar( "TRACY_ONLY_LOCALHOST" );
+ if( !onlyLocalhost || onlyLocalhost[0] != '1' )
+ {
+ hints.ai_flags = AI_PASSIVE;
+ }
+#endif
+ char portbuf[32];
+ sprintf( portbuf, "%" PRIu16, port );
+ if( getaddrinfo( nullptr, portbuf, &hints, res ) != 0 ) return -1;
+ int sock = socket( (*res)->ai_family, (*res)->ai_socktype, (*res)->ai_protocol );
+ if (sock == -1) freeaddrinfo( *res );
+ return sock;
+}
+
+bool ListenSocket::Listen( uint16_t port, int backlog )
+{
+ assert( m_sock == -1 );
+
+ struct addrinfo* res = nullptr;
+
+#if !defined TRACY_ONLY_IPV4 && !defined TRACY_ONLY_LOCALHOST
+ const char* onlyIPv4 = GetEnvVar( "TRACY_ONLY_IPV4" );
+ if( !onlyIPv4 || onlyIPv4[0] != '1' )
+ {
+ m_sock = addrinfo_and_socket_for_family( port, AF_INET6, &res );
+ }
+#endif
+ if (m_sock == -1)
+ {
+ // IPV6 protocol may not be available/is disabled. Try to create a socket
+ // with the IPV4 protocol
+ m_sock = addrinfo_and_socket_for_family( port, AF_INET, &res );
+ if( m_sock == -1 ) return false;
+ }
+#if defined _WIN32
+ unsigned long val = 0;
+ setsockopt( m_sock, IPPROTO_IPV6, IPV6_V6ONLY, (const char*)&val, sizeof( val ) );
+#elif defined BSD
+ int val = 0;
+ setsockopt( m_sock, IPPROTO_IPV6, IPV6_V6ONLY, (const char*)&val, sizeof( val ) );
+ val = 1;
+ setsockopt( m_sock, SOL_SOCKET, SO_REUSEADDR, &val, sizeof( val ) );
+#else
+ int val = 1;
+ setsockopt( m_sock, SOL_SOCKET, SO_REUSEADDR, &val, sizeof( val ) );
+#endif
+ if( bind( m_sock, res->ai_addr, res->ai_addrlen ) == -1 ) { freeaddrinfo( res ); Close(); return false; }
+ if( listen( m_sock, backlog ) == -1 ) { freeaddrinfo( res ); Close(); return false; }
+ freeaddrinfo( res );
+ return true;
+}
+
+Socket* ListenSocket::Accept()
+{
+ struct sockaddr_storage remote;
+ socklen_t sz = sizeof( remote );
+
+ struct pollfd fd;
+ fd.fd = (socket_t)m_sock;
+ fd.events = POLLIN;
+
+ if( poll( &fd, 1, 10 ) > 0 )
+ {
+ int sock = accept( m_sock, (sockaddr*)&remote, &sz);
+ if( sock == -1 ) return nullptr;
+
+#if defined __APPLE__
+ int val = 1;
+ setsockopt( sock, SOL_SOCKET, SO_NOSIGPIPE, &val, sizeof( val ) );
+#endif
+
+ auto ptr = (Socket*)tracy_malloc( sizeof( Socket ) );
+ new(ptr) Socket( sock );
+ return ptr;
+ }
+ else
+ {
+ return nullptr;
+ }
+}
+
+void ListenSocket::Close()
+{
+ assert( m_sock != -1 );
+#ifdef _WIN32
+ closesocket( m_sock );
+#else
+ close( m_sock );
+#endif
+ m_sock = -1;
+}
+
+UdpBroadcast::UdpBroadcast()
+ : m_sock( -1 )
+{
+#ifdef _WIN32
+ InitWinSock();
+#endif
+}
+
+UdpBroadcast::~UdpBroadcast()
+{
+ if( m_sock != -1 ) Close();
+}
+
+bool UdpBroadcast::Open( const char* addr, uint16_t port )
+{
+ assert( m_sock == -1 );
+
+ struct addrinfo hints;
+ struct addrinfo *res, *ptr;
+
+ memset( &hints, 0, sizeof( hints ) );
+ hints.ai_family = AF_INET;
+ hints.ai_socktype = SOCK_DGRAM;
+
+ char portbuf[32];
+ sprintf( portbuf, "%" PRIu16, port );
+
+ if( getaddrinfo( addr, portbuf, &hints, &res ) != 0 ) return false;
+ int sock = 0;
+ for( ptr = res; ptr; ptr = ptr->ai_next )
+ {
+ if( ( sock = socket( ptr->ai_family, ptr->ai_socktype, ptr->ai_protocol ) ) == -1 ) continue;
+#if defined __APPLE__
+ int val = 1;
+ setsockopt( sock, SOL_SOCKET, SO_NOSIGPIPE, &val, sizeof( val ) );
+#endif
+#if defined _WIN32
+ unsigned long broadcast = 1;
+ if( setsockopt( sock, SOL_SOCKET, SO_BROADCAST, (const char*)&broadcast, sizeof( broadcast ) ) == -1 )
+#else
+ int broadcast = 1;
+ if( setsockopt( sock, SOL_SOCKET, SO_BROADCAST, &broadcast, sizeof( broadcast ) ) == -1 )
+#endif
+ {
+#ifdef _WIN32
+ closesocket( sock );
+#else
+ close( sock );
+#endif
+ continue;
+ }
+ break;
+ }
+ freeaddrinfo( res );
+ if( !ptr ) return false;
+
+ m_sock = sock;
+ inet_pton( AF_INET, addr, &m_addr );
+ return true;
+}
+
+void UdpBroadcast::Close()
+{
+ assert( m_sock != -1 );
+#ifdef _WIN32
+ closesocket( m_sock );
+#else
+ close( m_sock );
+#endif
+ m_sock = -1;
+}
+
+int UdpBroadcast::Send( uint16_t port, const void* data, int len )
+{
+ assert( m_sock != -1 );
+ struct sockaddr_in addr;
+ addr.sin_family = AF_INET;
+ addr.sin_port = htons( port );
+ addr.sin_addr.s_addr = m_addr;
+ return sendto( m_sock, (const char*)data, len, MSG_NOSIGNAL, (sockaddr*)&addr, sizeof( addr ) );
+}
+
+IpAddress::IpAddress()
+ : m_number( 0 )
+{
+ *m_text = '\0';
+}
+
+IpAddress::~IpAddress()
+{
+}
+
+void IpAddress::Set( const struct sockaddr& addr )
+{
+#if defined _WIN32 && ( !defined NTDDI_WIN10 || NTDDI_VERSION < NTDDI_WIN10 )
+ struct sockaddr_in tmp;
+ memcpy( &tmp, &addr, sizeof( tmp ) );
+ auto ai = &tmp;
+#else
+ auto ai = (const struct sockaddr_in*)&addr;
+#endif
+ inet_ntop( AF_INET, &ai->sin_addr, m_text, 17 );
+ m_number = ai->sin_addr.s_addr;
+}
+
+UdpListen::UdpListen()
+ : m_sock( -1 )
+{
+#ifdef _WIN32
+ InitWinSock();
+#endif
+}
+
+UdpListen::~UdpListen()
+{
+ if( m_sock != -1 ) Close();
+}
+
+bool UdpListen::Listen( uint16_t port )
+{
+ assert( m_sock == -1 );
+
+ int sock;
+ if( ( sock = socket( AF_INET, SOCK_DGRAM, 0 ) ) == -1 ) return false;
+
+#if defined __APPLE__
+ int val = 1;
+ setsockopt( sock, SOL_SOCKET, SO_NOSIGPIPE, &val, sizeof( val ) );
+#endif
+#if defined _WIN32
+ unsigned long reuse = 1;
+ setsockopt( m_sock, SOL_SOCKET, SO_REUSEADDR, (const char*)&reuse, sizeof( reuse ) );
+#else
+ int reuse = 1;
+ setsockopt( m_sock, SOL_SOCKET, SO_REUSEADDR, &reuse, sizeof( reuse ) );
+#endif
+#if defined _WIN32
+ unsigned long broadcast = 1;
+ if( setsockopt( sock, SOL_SOCKET, SO_BROADCAST, (const char*)&broadcast, sizeof( broadcast ) ) == -1 )
+#else
+ int broadcast = 1;
+ if( setsockopt( sock, SOL_SOCKET, SO_BROADCAST, &broadcast, sizeof( broadcast ) ) == -1 )
+#endif
+ {
+#ifdef _WIN32
+ closesocket( sock );
+#else
+ close( sock );
+#endif
+ return false;
+ }
+
+ struct sockaddr_in addr;
+ addr.sin_family = AF_INET;
+ addr.sin_port = htons( port );
+ addr.sin_addr.s_addr = INADDR_ANY;
+
+ if( bind( sock, (sockaddr*)&addr, sizeof( addr ) ) == -1 )
+ {
+#ifdef _WIN32
+ closesocket( sock );
+#else
+ close( sock );
+#endif
+ return false;
+ }
+
+ m_sock = sock;
+ return true;
+}
+
+void UdpListen::Close()
+{
+ assert( m_sock != -1 );
+#ifdef _WIN32
+ closesocket( m_sock );
+#else
+ close( m_sock );
+#endif
+ m_sock = -1;
+}
+
+const char* UdpListen::Read( size_t& len, IpAddress& addr, int timeout )
+{
+ static char buf[2048];
+
+ struct pollfd fd;
+ fd.fd = (socket_t)m_sock;
+ fd.events = POLLIN;
+ if( poll( &fd, 1, timeout ) <= 0 ) return nullptr;
+
+ sockaddr sa;
+ socklen_t salen = sizeof( struct sockaddr );
+ len = (size_t)recvfrom( m_sock, buf, 2048, 0, &sa, &salen );
+ addr.Set( sa );
+
+ return buf;
+}
+
+}
diff --git a/3rdparty/tracy/tracy/common/TracySocket.hpp b/3rdparty/tracy/tracy/common/TracySocket.hpp
new file mode 100644
index 0000000..4de4cca
--- /dev/null
+++ b/3rdparty/tracy/tracy/common/TracySocket.hpp
@@ -0,0 +1,156 @@
+#ifndef __TRACYSOCKET_HPP__
+#define __TRACYSOCKET_HPP__
+
+#include <atomic>
+#include <stdint.h>
+
+#include "TracyForceInline.hpp"
+
+struct addrinfo;
+struct sockaddr;
+
+namespace tracy
+{
+
+#ifdef _WIN32
+void InitWinSock();
+#endif
+
+class Socket
+{
+public:
+ Socket();
+ Socket( int sock );
+ ~Socket();
+
+ bool Connect( const char* addr, uint16_t port );
+ bool ConnectBlocking( const char* addr, uint16_t port );
+ void Close();
+
+ int Send( const void* buf, int len );
+ int GetSendBufSize();
+
+ int ReadUpTo( void* buf, int len, int timeout );
+ bool Read( void* buf, int len, int timeout );
+
+ template<typename ShouldExit>
+ bool Read( void* buf, int len, int timeout, ShouldExit exitCb )
+ {
+ auto cbuf = (char*)buf;
+ while( len > 0 )
+ {
+ if( exitCb() ) return false;
+ if( !ReadImpl( cbuf, len, timeout ) ) return false;
+ }
+ return true;
+ }
+
+ bool ReadRaw( void* buf, int len, int timeout );
+ bool HasData();
+ bool IsValid() const;
+
+ Socket( const Socket& ) = delete;
+ Socket( Socket&& ) = delete;
+ Socket& operator=( const Socket& ) = delete;
+ Socket& operator=( Socket&& ) = delete;
+
+private:
+ int RecvBuffered( void* buf, int len, int timeout );
+ int Recv( void* buf, int len, int timeout );
+
+ bool ReadImpl( char*& buf, int& len, int timeout );
+
+ char* m_buf;
+ char* m_bufPtr;
+ std::atomic<int> m_sock;
+ int m_bufLeft;
+
+ struct addrinfo *m_res;
+ struct addrinfo *m_ptr;
+ int m_connSock;
+};
+
+class ListenSocket
+{
+public:
+ ListenSocket();
+ ~ListenSocket();
+
+ bool Listen( uint16_t port, int backlog );
+ Socket* Accept();
+ void Close();
+
+ ListenSocket( const ListenSocket& ) = delete;
+ ListenSocket( ListenSocket&& ) = delete;
+ ListenSocket& operator=( const ListenSocket& ) = delete;
+ ListenSocket& operator=( ListenSocket&& ) = delete;
+
+private:
+ int m_sock;
+};
+
+class UdpBroadcast
+{
+public:
+ UdpBroadcast();
+ ~UdpBroadcast();
+
+ bool Open( const char* addr, uint16_t port );
+ void Close();
+
+ int Send( uint16_t port, const void* data, int len );
+
+ UdpBroadcast( const UdpBroadcast& ) = delete;
+ UdpBroadcast( UdpBroadcast&& ) = delete;
+ UdpBroadcast& operator=( const UdpBroadcast& ) = delete;
+ UdpBroadcast& operator=( UdpBroadcast&& ) = delete;
+
+private:
+ int m_sock;
+ uint32_t m_addr;
+};
+
+class IpAddress
+{
+public:
+ IpAddress();
+ ~IpAddress();
+
+ void Set( const struct sockaddr& addr );
+
+ uint32_t GetNumber() const { return m_number; }
+ const char* GetText() const { return m_text; }
+
+ IpAddress( const IpAddress& ) = delete;
+ IpAddress( IpAddress&& ) = delete;
+ IpAddress& operator=( const IpAddress& ) = delete;
+ IpAddress& operator=( IpAddress&& ) = delete;
+
+private:
+ uint32_t m_number;
+ char m_text[17];
+};
+
+class UdpListen
+{
+public:
+ UdpListen();
+ ~UdpListen();
+
+ bool Listen( uint16_t port );
+ void Close();
+
+ const char* Read( size_t& len, IpAddress& addr, int timeout );
+
+ UdpListen( const UdpListen& ) = delete;
+ UdpListen( UdpListen&& ) = delete;
+ UdpListen& operator=( const UdpListen& ) = delete;
+ UdpListen& operator=( UdpListen&& ) = delete;
+
+private:
+ int m_sock;
+};
+
+}
+
+#endif
diff --git a/3rdparty/tracy/tracy/common/TracyStackFrames.cpp b/3rdparty/tracy/tracy/common/TracyStackFrames.cpp
new file mode 100644
index 0000000..7b0abac
--- /dev/null
+++ b/3rdparty/tracy/tracy/common/TracyStackFrames.cpp
@@ -0,0 +1,122 @@
+#include "TracyStackFrames.hpp"
+
+namespace tracy
+{
+
+const char* s_tracyStackFrames_[] = {
+ "tracy::Callstack",
+ "tracy::Callstack(int)",
+ "tracy::GpuCtxScope::{ctor}",
+ "tracy::Profiler::SendCallstack",
+ "tracy::Profiler::SendCallstack(int)",
+ "tracy::Profiler::SendCallstack(int, unsigned long)",
+ "tracy::Profiler::MemAllocCallstack",
+ "tracy::Profiler::MemAllocCallstack(void const*, unsigned long, int)",
+ "tracy::Profiler::MemFreeCallstack",
+ "tracy::Profiler::MemFreeCallstack(void const*, int)",
+ "tracy::ScopedZone::{ctor}",
+ "tracy::ScopedZone::ScopedZone(tracy::SourceLocationData const*, int, bool)",
+ "tracy::Profiler::Message",
+ nullptr
+};
+
+const char** s_tracyStackFrames = s_tracyStackFrames_;
+
+const StringMatch s_tracySkipSubframes_[] = {
+ { "/include/arm_neon.h", 19 },
+ { "/include/adxintrin.h", 20 },
+ { "/include/ammintrin.h", 20 },
+ { "/include/amxbf16intrin.h", 24 },
+ { "/include/amxint8intrin.h", 24 },
+ { "/include/amxtileintrin.h", 24 },
+ { "/include/avx2intrin.h", 21 },
+ { "/include/avx5124fmapsintrin.h", 29 },
+ { "/include/avx5124vnniwintrin.h", 29 },
+ { "/include/avx512bf16intrin.h", 27 },
+ { "/include/avx512bf16vlintrin.h", 29 },
+ { "/include/avx512bitalgintrin.h", 29 },
+ { "/include/avx512bwintrin.h", 25 },
+ { "/include/avx512cdintrin.h", 25 },
+ { "/include/avx512dqintrin.h", 25 },
+ { "/include/avx512erintrin.h", 25 },
+ { "/include/avx512fintrin.h", 24 },
+ { "/include/avx512ifmaintrin.h", 27 },
+ { "/include/avx512ifmavlintrin.h", 29 },
+ { "/include/avx512pfintrin.h", 25 },
+ { "/include/avx512vbmi2intrin.h", 28 },
+ { "/include/avx512vbmi2vlintrin.h", 30 },
+ { "/include/avx512vbmiintrin.h", 27 },
+ { "/include/avx512vbmivlintrin.h", 29 },
+ { "/include/avx512vlbwintrin.h", 27 },
+ { "/include/avx512vldqintrin.h", 27 },
+ { "/include/avx512vlintrin.h", 25 },
+ { "/include/avx512vnniintrin.h", 27 },
+ { "/include/avx512vnnivlintrin.h", 29 },
+ { "/include/avx512vp2intersectintrin.h", 35 },
+ { "/include/avx512vp2intersectvlintrin.h", 37 },
+ { "/include/avx512vpopcntdqintrin.h", 32 },
+ { "/include/avx512vpopcntdqvlintrin.h", 34 },
+ { "/include/avxintrin.h", 20 },
+ { "/include/avxvnniintrin.h", 24 },
+ { "/include/bmi2intrin.h", 21 },
+ { "/include/bmiintrin.h", 20 },
+ { "/include/bmmintrin.h", 20 },
+ { "/include/cetintrin.h", 20 },
+ { "/include/cldemoteintrin.h", 25 },
+ { "/include/clflushoptintrin.h", 27 },
+ { "/include/clwbintrin.h", 21 },
+ { "/include/clzerointrin.h", 23 },
+ { "/include/emmintrin.h", 20 },
+ { "/include/enqcmdintrin.h", 23 },
+ { "/include/f16cintrin.h", 21 },
+ { "/include/fma4intrin.h", 21 },
+ { "/include/fmaintrin.h", 20 },
+ { "/include/fxsrintrin.h", 21 },
+ { "/include/gfniintrin.h", 21 },
+ { "/include/hresetintrin.h", 23 },
+ { "/include/ia32intrin.h", 21 },
+ { "/include/immintrin.h", 20 },
+ { "/include/keylockerintrin.h", 26 },
+ { "/include/lwpintrin.h", 20 },
+ { "/include/lzcntintrin.h", 22 },
+ { "/include/mmintrin.h", 19 },
+ { "/include/movdirintrin.h", 23 },
+ { "/include/mwaitxintrin.h", 23 },
+ { "/include/nmmintrin.h", 20 },
+ { "/include/pconfigintrin.h", 24 },
+ { "/include/pkuintrin.h", 20 },
+ { "/include/pmmintrin.h", 20 },
+ { "/include/popcntintrin.h", 23 },
+ { "/include/prfchwintrin.h", 23 },
+ { "/include/rdseedintrin.h", 23 },
+ { "/include/rtmintrin.h", 20 },
+ { "/include/serializeintrin.h", 26 },
+ { "/include/sgxintrin.h", 20 },
+ { "/include/shaintrin.h", 20 },
+ { "/include/smmintrin.h", 20 },
+ { "/include/tbmintrin.h", 20 },
+ { "/include/tmmintrin.h", 20 },
+ { "/include/tsxldtrkintrin.h", 25 },
+ { "/include/uintrintrin.h", 22 },
+ { "/include/vaesintrin.h", 21 },
+ { "/include/vpclmulqdqintrin.h", 27 },
+ { "/include/waitpkgintrin.h", 24 },
+ { "/include/wbnoinvdintrin.h", 25 },
+ { "/include/wmmintrin.h", 20 },
+ { "/include/x86gprintrin.h", 23 },
+ { "/include/x86intrin.h", 20 },
+ { "/include/xmmintrin.h", 20 },
+ { "/include/xopintrin.h", 20 },
+ { "/include/xsavecintrin.h", 23 },
+ { "/include/xsaveintrin.h", 22 },
+ { "/include/xsaveoptintrin.h", 25 },
+ { "/include/xsavesintrin.h", 23 },
+ { "/include/xtestintrin.h", 22 },
+ { "/bits/atomic_base.h", 19 },
+ { "/atomic", 7 },
+ {}
+};
+
+const StringMatch* s_tracySkipSubframes = s_tracySkipSubframes_;
+
+}
diff --git a/3rdparty/tracy/tracy/common/TracyStackFrames.hpp b/3rdparty/tracy/tracy/common/TracyStackFrames.hpp
new file mode 100644
index 0000000..9d4262c
--- /dev/null
+++ b/3rdparty/tracy/tracy/common/TracyStackFrames.hpp
@@ -0,0 +1,22 @@
+#ifndef __TRACYSTACKFRAMES_HPP__
+#define __TRACYSTACKFRAMES_HPP__
+
+#include <stddef.h>
+
+namespace tracy
+{
+
+struct StringMatch
+{
+ const char* str;
+ size_t len;
+};
+
+extern const char** s_tracyStackFrames;
+extern const StringMatch* s_tracySkipSubframes;
+
+static constexpr int s_tracySkipSubframesMinLen = 7;
+
+}
+
+#endif
diff --git a/3rdparty/tracy/tracy/common/TracySystem.cpp b/3rdparty/tracy/tracy/common/TracySystem.cpp
new file mode 100644
index 0000000..1248fde
--- /dev/null
+++ b/3rdparty/tracy/tracy/common/TracySystem.cpp
@@ -0,0 +1,304 @@
+#ifdef _MSC_VER
+# pragma warning(disable:4996)
+#endif
+#if defined _WIN32
+# ifndef WIN32_LEAN_AND_MEAN
+# define WIN32_LEAN_AND_MEAN
+# endif
+# ifndef NOMINMAX
+# define NOMINMAX
+# endif
+# include <windows.h>
+# include <malloc.h>
+# include "TracyUwp.hpp"
+#else
+# include <pthread.h>
+# include <string.h>
+# include <unistd.h>
+#endif
+
+#ifdef __linux__
+# ifdef __ANDROID__
+# include <sys/types.h>
+# else
+# include <sys/syscall.h>
+# endif
+# include <fcntl.h>
+#elif defined __FreeBSD__
+# include <sys/thr.h>
+#elif defined __NetBSD__ || defined __DragonFly__
+# include <sys/lwp.h>
+#endif
+
+#ifdef __MINGW32__
+# define __STDC_FORMAT_MACROS
+#endif
+#include <inttypes.h>
+#include <stdio.h>
+#include <stdlib.h>
+
+#include "TracySystem.hpp"
+
+#if defined _WIN32
+extern "C" typedef HRESULT (WINAPI *t_SetThreadDescription)( HANDLE, PCWSTR );
+extern "C" typedef HRESULT (WINAPI *t_GetThreadDescription)( HANDLE, PWSTR* );
+#endif
+
+#ifdef TRACY_ENABLE
+# include <atomic>
+# include "TracyAlloc.hpp"
+#endif
+
+namespace tracy
+{
+
+namespace detail
+{
+
+TRACY_API uint32_t GetThreadHandleImpl()
+{
+#if defined _WIN32
+ static_assert( sizeof( decltype( GetCurrentThreadId() ) ) <= sizeof( uint32_t ), "Thread handle too big to fit in protocol" );
+ return uint32_t( GetCurrentThreadId() );
+#elif defined __APPLE__
+ uint64_t id;
+ pthread_threadid_np( pthread_self(), &id );
+ return uint32_t( id );
+#elif defined __ANDROID__
+ return (uint32_t)gettid();
+#elif defined __linux__
+ return (uint32_t)syscall( SYS_gettid );
+#elif defined __FreeBSD__
+ long id;
+ thr_self( &id );
+ return id;
+#elif defined __NetBSD__
+ return _lwp_self();
+#elif defined __DragonFly__
+ return lwp_gettid();
+#elif defined __OpenBSD__
+ return getthrid();
+#else
+ // To add support for a platform, retrieve and return the kernel thread identifier here.
+ //
+ // Note that pthread_t (as for example returned by pthread_self()) is *not* a kernel
+ // thread identifier. It is a pointer to a library-allocated data structure instead.
+ // Such pointers will be reused heavily, making the pthread_t non-unique. Additionally
+ // a 64-bit pointer cannot be reliably truncated to 32 bits.
+ #error "Unsupported platform!"
+#endif
+
+}
+
+}
+
+#ifdef TRACY_ENABLE
+struct ThreadNameData
+{
+ uint32_t id;
+ const char* name;
+ ThreadNameData* next;
+};
+std::atomic<ThreadNameData*>& GetThreadNameData();
+#endif
+
+#ifdef _MSC_VER
+# pragma pack( push, 8 )
+struct THREADNAME_INFO
+{
+ DWORD dwType;
+ LPCSTR szName;
+ DWORD dwThreadID;
+ DWORD dwFlags;
+};
+# pragma pack(pop)
+
+void ThreadNameMsvcMagic( const THREADNAME_INFO& info )
+{
+ __try
+ {
+ RaiseException( 0x406D1388, 0, sizeof(info)/sizeof(ULONG_PTR), (ULONG_PTR*)&info );
+ }
+ __except(EXCEPTION_EXECUTE_HANDLER)
+ {
+ }
+}
+#endif
+
+TRACY_API void SetThreadName( const char* name )
+{
+#if defined _WIN32
+# ifdef TRACY_UWP
+ static auto _SetThreadDescription = &::SetThreadDescription;
+# else
+ static auto _SetThreadDescription = (t_SetThreadDescription)GetProcAddress( GetModuleHandleA( "kernel32.dll" ), "SetThreadDescription" );
+# endif
+ if( _SetThreadDescription )
+ {
+ wchar_t buf[256];
+ mbstowcs( buf, name, 256 );
+ _SetThreadDescription( GetCurrentThread(), buf );
+ }
+ else
+ {
+# if defined _MSC_VER
+ THREADNAME_INFO info;
+ info.dwType = 0x1000;
+ info.szName = name;
+ info.dwThreadID = GetCurrentThreadId();
+ info.dwFlags = 0;
+ ThreadNameMsvcMagic( info );
+# endif
+ }
+#elif defined _GNU_SOURCE && !defined __EMSCRIPTEN__
+ {
+ const auto sz = strlen( name );
+ if( sz <= 15 )
+ {
+#if defined __APPLE__
+ pthread_setname_np( name );
+#else
+ pthread_setname_np( pthread_self(), name );
+#endif
+ }
+ else
+ {
+ char buf[16];
+ memcpy( buf, name, 15 );
+ buf[15] = '\0';
+#if defined __APPLE__
+ pthread_setname_np( buf );
+#else
+ pthread_setname_np( pthread_self(), buf );
+#endif
+ }
+ }
+#endif
+#ifdef TRACY_ENABLE
+ {
+ const auto sz = strlen( name );
+ char* buf = (char*)tracy_malloc( sz+1 );
+ memcpy( buf, name, sz );
+ buf[sz] = '\0';
+ auto data = (ThreadNameData*)tracy_malloc_fast( sizeof( ThreadNameData ) );
+ data->id = detail::GetThreadHandleImpl();
+ data->name = buf;
+ data->next = GetThreadNameData().load( std::memory_order_relaxed );
+ while( !GetThreadNameData().compare_exchange_weak( data->next, data, std::memory_order_release, std::memory_order_relaxed ) ) {}
+ }
+#endif
+}
+
+TRACY_API const char* GetThreadName( uint32_t id )
+{
+ static char buf[256];
+#ifdef TRACY_ENABLE
+ auto ptr = GetThreadNameData().load( std::memory_order_relaxed );
+ while( ptr )
+ {
+ if( ptr->id == id )
+ {
+ return ptr->name;
+ }
+ ptr = ptr->next;
+ }
+#else
+# if defined _WIN32
+# ifdef TRACY_UWP
+ static auto _GetThreadDescription = &::GetThreadDescription;
+# else
+ static auto _GetThreadDescription = (t_GetThreadDescription)GetProcAddress( GetModuleHandleA( "kernel32.dll" ), "GetThreadDescription" );
+# endif
+ if( _GetThreadDescription )
+ {
+ auto hnd = OpenThread( THREAD_QUERY_LIMITED_INFORMATION, FALSE, (DWORD)id );
+ if( hnd != 0 )
+ {
+ PWSTR tmp;
+ _GetThreadDescription( hnd, &tmp );
+ auto ret = wcstombs( buf, tmp, 256 );
+ CloseHandle( hnd );
+ if( ret != 0 )
+ {
+ return buf;
+ }
+ }
+ }
+# elif defined __linux__
+ int cs, fd;
+ char path[32];
+# ifdef __ANDROID__
+ int tid = gettid();
+# else
+ int tid = (int) syscall( SYS_gettid );
+# endif
+ snprintf( path, sizeof( path ), "/proc/self/task/%d/comm", tid );
+ sprintf( buf, "%" PRIu32, id );
+# ifndef __ANDROID__
+ pthread_setcancelstate( PTHREAD_CANCEL_DISABLE, &cs );
+# endif
+ if ( ( fd = open( path, O_RDONLY ) ) > 0) {
+ int len = read( fd, buf, 255 );
+ if( len > 0 )
+ {
+ buf[len] = 0;
+ if( len > 1 && buf[len-1] == '\n' )
+ {
+ buf[len-1] = 0;
+ }
+ }
+ close( fd );
+ }
+# ifndef __ANDROID__
+ pthread_setcancelstate( cs, 0 );
+# endif
+ return buf;
+# endif
+#endif
+ sprintf( buf, "%" PRIu32, id );
+ return buf;
+}
+
+TRACY_API const char* GetEnvVar( const char* name )
+{
+#if defined _WIN32
+ // unfortunately getenv() on Windows is just fundamentally broken. It caches the entire
+ // environment block once on startup, then never refreshes it again. If any environment
+ // strings are added or modified after startup of the CRT, those changes will not be
+ // seen by getenv(). This removes the possibility of an app using this SDK from
+ // programmatically setting any of the behaviour controlling envvars here.
+ //
+ // To work around this, we'll instead go directly to the Win32 environment strings APIs
+ // to get the current value.
+ static char buffer[1024];
+ DWORD const kBufferSize = DWORD(sizeof(buffer) / sizeof(buffer[0]));
+ DWORD count = GetEnvironmentVariableA(name, buffer, kBufferSize);
+
+ if( count == 0 )
+ return nullptr;
+
+ if( count >= kBufferSize )
+ {
+ char* buf = reinterpret_cast<char*>(_alloca(count + 1));
+ count = GetEnvironmentVariableA(name, buf, count + 1);
+ memcpy(buffer, buf, kBufferSize);
+ buffer[kBufferSize - 1] = 0;
+ }
+
+ return buffer;
+#else
+ return getenv(name);
+#endif
+}
+
+}
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+TRACY_API void ___tracy_set_thread_name( const char* name ) { tracy::SetThreadName( name ); }
+
+#ifdef __cplusplus
+}
+#endif
diff --git a/3rdparty/tracy/tracy/common/TracySystem.hpp b/3rdparty/tracy/tracy/common/TracySystem.hpp
new file mode 100644
index 0000000..e0040e9
--- /dev/null
+++ b/3rdparty/tracy/tracy/common/TracySystem.hpp
@@ -0,0 +1,32 @@
+#ifndef __TRACYSYSTEM_HPP__
+#define __TRACYSYSTEM_HPP__
+
+#include <stdint.h>
+
+#include "TracyApi.h"
+
+namespace tracy
+{
+
+namespace detail
+{
+TRACY_API uint32_t GetThreadHandleImpl();
+}
+
+#ifdef TRACY_ENABLE
+TRACY_API uint32_t GetThreadHandle();
+#else
+static inline uint32_t GetThreadHandle()
+{
+ return detail::GetThreadHandleImpl();
+}
+#endif
+
+TRACY_API void SetThreadName( const char* name );
+TRACY_API const char* GetThreadName( uint32_t id );
+
+TRACY_API const char* GetEnvVar(const char* name);
+
+}
+
+#endif
diff --git a/3rdparty/tracy/tracy/common/TracyUwp.hpp b/3rdparty/tracy/tracy/common/TracyUwp.hpp
new file mode 100644
index 0000000..7dce96b
--- /dev/null
+++ b/3rdparty/tracy/tracy/common/TracyUwp.hpp
@@ -0,0 +1,11 @@
+#ifndef __TRACYUWP_HPP__
+#define __TRACYUWP_HPP__
+
+#ifdef _WIN32
+# include <winapifamily.h>
+# if WINAPI_FAMILY_PARTITION(WINAPI_PARTITION_APP) && !WINAPI_FAMILY_PARTITION(WINAPI_PARTITION_DESKTOP)
+# define TRACY_UWP
+# endif
+#endif
+
+#endif
diff --git a/3rdparty/tracy/tracy/common/TracyYield.hpp b/3rdparty/tracy/tracy/common/TracyYield.hpp
new file mode 100644
index 0000000..403ca29
--- /dev/null
+++ b/3rdparty/tracy/tracy/common/TracyYield.hpp
@@ -0,0 +1,26 @@
+#ifndef __TRACYYIELD_HPP__
+#define __TRACYYIELD_HPP__
+
+#if defined __SSE2__ || defined _M_AMD64 || _M_IX86_FP == 2
+# include <emmintrin.h>
+#else
+# include <thread>
+#endif
+
+#include "TracyForceInline.hpp"
+
+namespace tracy
+{
+
+static tracy_force_inline void YieldThread()
+{
+#if defined __SSE2__ || defined _M_AMD64 || _M_IX86_FP == 2
+ _mm_pause();
+#else
+ std::this_thread::yield();
+#endif
+}
+
+}
+
+#endif
diff --git a/3rdparty/tracy/tracy/common/src-from-vcxproj.mk b/3rdparty/tracy/tracy/common/src-from-vcxproj.mk
new file mode 100644
index 0000000..3a16b19
--- /dev/null
+++ b/3rdparty/tracy/tracy/common/src-from-vcxproj.mk
@@ -0,0 +1,21 @@
+# Extract the actual list of source files from a sibling Visual Studio project.
+
+# Ensure these are simply-substituted variables, without changing their values.
+SRC := $(SRC)
+SRC2 := $(SRC2)
+SRC3 := $(SRC3)
+SRC4 := $(SRC4)
+
+# Paths here are relative to the directory in which make was invoked, not to
+# this file, so ../win32/$(PROJECT).vcxproj refers to the Visual Studio project
+# of whichever tool is including this makefile fragment.
+
+BASE := $(shell egrep 'ClCompile.*cpp"' ../win32/$(PROJECT).vcxproj | sed -e 's/.*\"\(.*\)\".*/\1/' | sed -e 's@\\@/@g')
+BASE2 := $(shell egrep 'ClCompile.*c"' ../win32/$(PROJECT).vcxproj | sed -e 's/.*\"\(.*\)\".*/\1/' | sed -e 's@\\@/@g')
+BASE4 := $(shell egrep 'None.*S"' ../win32/$(PROJECT).vcxproj | sed -e 's/.*\"\(.*\)\".*/\1/' | sed -e 's@\\@/@g')
+
+# The tool-specific makefile may request that certain files be omitted.
+SRC += $(filter-out $(FILTER),$(BASE))
+SRC2 += $(filter-out $(FILTER),$(BASE2))
+SRC3 += $(filter-out $(FILTER),$(BASE3))
+SRC4 += $(filter-out $(FILTER),$(BASE4))
diff --git a/3rdparty/tracy/tracy/common/tracy_lz4.cpp b/3rdparty/tracy/tracy/common/tracy_lz4.cpp
new file mode 100644
index 0000000..5a31aa7
--- /dev/null
+++ b/3rdparty/tracy/tracy/common/tracy_lz4.cpp
@@ -0,0 +1,2492 @@
+/*
+ LZ4 - Fast LZ compression algorithm
+ Copyright (C) 2011-present, Yann Collet.
+
+ BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
+
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions are
+ met:
+
+ * Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+ * Redistributions in binary form must reproduce the above
+ copyright notice, this list of conditions and the following disclaimer
+ in the documentation and/or other materials provided with the
+ distribution.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+ You can contact the author at :
+ - LZ4 homepage : http://www.lz4.org
+ - LZ4 source repository : https://github.com/lz4/lz4
+*/
+
+/*-************************************
+* Tuning parameters
+**************************************/
+/*
+ * LZ4_HEAPMODE :
+ * Select how default compression functions will allocate memory for their hash table,
+ * in memory stack (0:default, fastest), or in memory heap (1:requires malloc()).
+ */
+#ifndef LZ4_HEAPMODE
+# define LZ4_HEAPMODE 0
+#endif
+
+/*
+ * LZ4_ACCELERATION_DEFAULT :
+ * Select "acceleration" for LZ4_compress_fast() when parameter value <= 0
+ */
+#define LZ4_ACCELERATION_DEFAULT 1
+/*
+ * LZ4_ACCELERATION_MAX :
+ * Any "acceleration" value higher than this threshold
+ * get treated as LZ4_ACCELERATION_MAX instead (fix #876)
+ */
+#define LZ4_ACCELERATION_MAX 65537
+
+
+/*-************************************
+* CPU Feature Detection
+**************************************/
+/* LZ4_FORCE_MEMORY_ACCESS
+ * By default, access to unaligned memory is controlled by `memcpy()`, which is safe and portable.
+ * Unfortunately, on some target/compiler combinations, the generated assembly is sub-optimal.
+ * The below switch allow to select different access method for improved performance.
+ * Method 0 (default) : use `memcpy()`. Safe and portable.
+ * Method 1 : `__packed` statement. It depends on compiler extension (ie, not portable).
+ * This method is safe if your compiler supports it, and *generally* as fast or faster than `memcpy`.
+ * Method 2 : direct access. This method is portable but violate C standard.
+ * It can generate buggy code on targets which assembly generation depends on alignment.
+ * But in some circumstances, it's the only known way to get the most performance (ie GCC + ARMv6)
+ * See https://fastcompression.blogspot.fr/2015/08/accessing-unaligned-memory.html for details.
+ * Prefer these methods in priority order (0 > 1 > 2)
+ */
+#ifndef LZ4_FORCE_MEMORY_ACCESS /* can be defined externally */
+# if defined(__GNUC__) && \
+ ( defined(__ARM_ARCH_6__) || defined(__ARM_ARCH_6J__) || defined(__ARM_ARCH_6K__) \
+ || defined(__ARM_ARCH_6Z__) || defined(__ARM_ARCH_6ZK__) || defined(__ARM_ARCH_6T2__) )
+# define LZ4_FORCE_MEMORY_ACCESS 2
+# elif (defined(__INTEL_COMPILER) && !defined(_WIN32)) || defined(__GNUC__)
+# define LZ4_FORCE_MEMORY_ACCESS 1
+# endif
+#endif
+
+/*
+ * LZ4_FORCE_SW_BITCOUNT
+ * Define this parameter if your target system or compiler does not support hardware bit count
+ */
+#if defined(_MSC_VER) && defined(_WIN32_WCE) /* Visual Studio for WinCE doesn't support Hardware bit count */
+# undef LZ4_FORCE_SW_BITCOUNT /* avoid double def */
+# define LZ4_FORCE_SW_BITCOUNT
+#endif
+
+
+
+/*-************************************
+* Dependency
+**************************************/
+/*
+ * LZ4_SRC_INCLUDED:
+ * Amalgamation flag, whether lz4.c is included
+ */
+#ifndef LZ4_SRC_INCLUDED
+# define LZ4_SRC_INCLUDED 1
+#endif
+
+#ifndef LZ4_STATIC_LINKING_ONLY
+#define LZ4_STATIC_LINKING_ONLY
+#endif
+
+#ifndef LZ4_DISABLE_DEPRECATE_WARNINGS
+#define LZ4_DISABLE_DEPRECATE_WARNINGS /* due to LZ4_decompress_safe_withPrefix64k */
+#endif
+
+#define LZ4_STATIC_LINKING_ONLY /* LZ4_DISTANCE_MAX */
+#include "tracy_lz4.hpp"
+/* see also "memory routines" below */
+
+
+/*-************************************
+* Compiler Options
+**************************************/
+#if defined(_MSC_VER) && (_MSC_VER >= 1400) /* Visual Studio 2005+ */
+# include <intrin.h> /* only present in VS2005+ */
+# pragma warning(disable : 4127) /* disable: C4127: conditional expression is constant */
+#endif /* _MSC_VER */
+
+#ifndef LZ4_FORCE_INLINE
+# ifdef _MSC_VER /* Visual Studio */
+# define LZ4_FORCE_INLINE static __forceinline
+# else
+# if defined (__cplusplus) || defined (__STDC_VERSION__) && __STDC_VERSION__ >= 199901L /* C99 */
+# ifdef __GNUC__
+# define LZ4_FORCE_INLINE static inline __attribute__((always_inline))
+# else
+# define LZ4_FORCE_INLINE static inline
+# endif
+# else
+# define LZ4_FORCE_INLINE static
+# endif /* __STDC_VERSION__ */
+# endif /* _MSC_VER */
+#endif /* LZ4_FORCE_INLINE */
+
+/* LZ4_FORCE_O2 and LZ4_FORCE_INLINE
+ * gcc on ppc64le generates an unrolled SIMDized loop for LZ4_wildCopy8,
+ * together with a simple 8-byte copy loop as a fall-back path.
+ * However, this optimization hurts the decompression speed by >30%,
+ * because the execution does not go to the optimized loop
+ * for typical compressible data, and all of the preamble checks
+ * before going to the fall-back path become useless overhead.
+ * This optimization happens only with the -O3 flag, and -O2 generates
+ * a simple 8-byte copy loop.
+ * With gcc on ppc64le, all of the LZ4_decompress_* and LZ4_wildCopy8
+ * functions are annotated with __attribute__((optimize("O2"))),
+ * and also LZ4_wildCopy8 is forcibly inlined, so that the O2 attribute
+ * of LZ4_wildCopy8 does not affect the compression speed.
+ */
+#if defined(__PPC64__) && defined(__LITTLE_ENDIAN__) && defined(__GNUC__) && !defined(__clang__)
+# define LZ4_FORCE_O2 __attribute__((optimize("O2")))
+# undef LZ4_FORCE_INLINE
+# define LZ4_FORCE_INLINE static __inline __attribute__((optimize("O2"),always_inline))
+#else
+# define LZ4_FORCE_O2
+#endif
+
+#if (defined(__GNUC__) && (__GNUC__ >= 3)) || (defined(__INTEL_COMPILER) && (__INTEL_COMPILER >= 800)) || defined(__clang__)
+# define expect(expr,value) (__builtin_expect ((expr),(value)) )
+#else
+# define expect(expr,value) (expr)
+#endif
+
+#ifndef likely
+#define likely(expr) expect((expr) != 0, 1)
+#endif
+#ifndef unlikely
+#define unlikely(expr) expect((expr) != 0, 0)
+#endif
+
+/* Should the alignment test prove unreliable, for some reason,
+ * it can be disabled by setting LZ4_ALIGN_TEST to 0 */
+#ifndef LZ4_ALIGN_TEST /* can be externally provided */
+# define LZ4_ALIGN_TEST 1
+#endif
+
+
+/*-************************************
+* Memory routines
+**************************************/
+#ifdef LZ4_USER_MEMORY_FUNCTIONS
+/* memory management functions can be customized by user project.
+ * Below functions must exist somewhere in the Project
+ * and be available at link time */
+void* LZ4_malloc(size_t s);
+void* LZ4_calloc(size_t n, size_t s);
+void LZ4_free(void* p);
+# define ALLOC(s) LZ4_malloc(s)
+# define ALLOC_AND_ZERO(s) LZ4_calloc(1,s)
+# define FREEMEM(p) LZ4_free(p)
+#else
+# include <stdlib.h> /* malloc, calloc, free */
+# define ALLOC(s) malloc(s)
+# define ALLOC_AND_ZERO(s) calloc(1,s)
+# define FREEMEM(p) free(p)
+#endif
+
+#include <string.h> /* memset, memcpy */
+#define MEM_INIT(p,v,s) memset((p),(v),(s))
+
+
+/*-************************************
+* Common Constants
+**************************************/
+#define MINMATCH 4
+
+#define WILDCOPYLENGTH 8
+#define LASTLITERALS 5 /* see ../doc/lz4_Block_format.md#parsing-restrictions */
+#define MFLIMIT 12 /* see ../doc/lz4_Block_format.md#parsing-restrictions */
+#define MATCH_SAFEGUARD_DISTANCE ((2*WILDCOPYLENGTH) - MINMATCH) /* ensure it's possible to write 2 x wildcopyLength without overflowing output buffer */
+#define FASTLOOP_SAFE_DISTANCE 64
+static const int LZ4_minLength = (MFLIMIT+1);
+
+#define KB *(1 <<10)
+#define MB *(1 <<20)
+#define GB *(1U<<30)
+
+#define LZ4_DISTANCE_ABSOLUTE_MAX 65535
+#if (LZ4_DISTANCE_MAX > LZ4_DISTANCE_ABSOLUTE_MAX) /* max supported by LZ4 format */
+# error "LZ4_DISTANCE_MAX is too big : must be <= 65535"
+#endif
+
+#define ML_BITS 4
+#define ML_MASK ((1U<<ML_BITS)-1)
+#define RUN_BITS (8-ML_BITS)
+#define RUN_MASK ((1U<<RUN_BITS)-1)
+
+
+/*-************************************
+* Error detection
+**************************************/
+#if defined(LZ4_DEBUG) && (LZ4_DEBUG>=1)
+# include <assert.h>
+#else
+# ifndef assert
+# define assert(condition) ((void)0)
+# endif
+#endif
+
+#define LZ4_STATIC_ASSERT(c) { enum { LZ4_static_assert = 1/(int)(!!(c)) }; } /* use after variable declarations */
+
+#if defined(LZ4_DEBUG) && (LZ4_DEBUG>=2)
+# include <stdio.h>
+ static int g_debuglog_enable = 1;
+# define DEBUGLOG(l, ...) { \
+ if ((g_debuglog_enable) && (l<=LZ4_DEBUG)) { \
+ fprintf(stderr, __FILE__ ": "); \
+ fprintf(stderr, __VA_ARGS__); \
+ fprintf(stderr, " \n"); \
+ } }
+#else
+# define DEBUGLOG(l, ...) {} /* disabled */
+#endif
+
+static int LZ4_isAligned(const void* ptr, size_t alignment)
+{
+ return ((size_t)ptr & (alignment -1)) == 0;
+}
+
+
+/*-************************************
+* Types
+**************************************/
+#include <limits.h>
+#if defined(__cplusplus) || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */)
+# include <stdint.h>
+ typedef uint8_t BYTE;
+ typedef uint16_t U16;
+ typedef uint32_t U32;
+ typedef int32_t S32;
+ typedef uint64_t U64;
+ typedef uintptr_t uptrval;
+#else
+# if UINT_MAX != 4294967295UL
+# error "LZ4 code (when not C++ or C99) assumes that sizeof(int) == 4"
+# endif
+ typedef unsigned char BYTE;
+ typedef unsigned short U16;
+ typedef unsigned int U32;
+ typedef signed int S32;
+ typedef unsigned long long U64;
+ typedef size_t uptrval; /* generally true, except OpenVMS-64 */
+#endif
+
+#if defined(__x86_64__)
+ typedef U64 reg_t; /* 64-bits in x32 mode */
+#else
+ typedef size_t reg_t; /* 32-bits in x32 mode */
+#endif
+
+typedef enum {
+ notLimited = 0,
+ limitedOutput = 1,
+ fillOutput = 2
+} limitedOutput_directive;
+
+namespace tracy
+{
+
+/*-************************************
+* Reading and writing into memory
+**************************************/
+
+/**
+ * LZ4 relies on memcpy with a constant size being inlined. In freestanding
+ * environments, the compiler can't assume the implementation of memcpy() is
+ * standard compliant, so it can't apply its specialized memcpy() inlining
+ * logic. When possible, use __builtin_memcpy() to tell the compiler to analyze
+ * memcpy() as if it were standard compliant, so it can inline it in freestanding
+ * environments. This is needed when decompressing the Linux Kernel, for example.
+ */
+#if defined(__GNUC__) && (__GNUC__ >= 4)
+#define LZ4_memcpy(dst, src, size) __builtin_memcpy(dst, src, size)
+#else
+#define LZ4_memcpy(dst, src, size) memcpy(dst, src, size)
+#endif
+
+static unsigned LZ4_isLittleEndian(void)
+{
+ const union { U32 u; BYTE c[4]; } one = { 1 }; /* don't use static : performance detrimental */
+ return one.c[0];
+}
+
+
+#if defined(LZ4_FORCE_MEMORY_ACCESS) && (LZ4_FORCE_MEMORY_ACCESS==2)
+/* lie to the compiler about data alignment; use with caution */
+
+static U16 LZ4_read16(const void* memPtr) { return *(const U16*) memPtr; }
+static U32 LZ4_read32(const void* memPtr) { return *(const U32*) memPtr; }
+static reg_t LZ4_read_ARCH(const void* memPtr) { return *(const reg_t*) memPtr; }
+
+static void LZ4_write16(void* memPtr, U16 value) { *(U16*)memPtr = value; }
+static void LZ4_write32(void* memPtr, U32 value) { *(U32*)memPtr = value; }
+
+#elif defined(LZ4_FORCE_MEMORY_ACCESS) && (LZ4_FORCE_MEMORY_ACCESS==1)
+
+/* __pack instructions are safer, but compiler specific, hence potentially problematic for some compilers */
+/* currently only defined for gcc and icc */
+typedef union { U16 u16; U32 u32; reg_t uArch; } __attribute__((packed)) unalign;
+
+static U16 LZ4_read16(const void* ptr) { return ((const unalign*)ptr)->u16; }
+static U32 LZ4_read32(const void* ptr) { return ((const unalign*)ptr)->u32; }
+static reg_t LZ4_read_ARCH(const void* ptr) { return ((const unalign*)ptr)->uArch; }
+
+static void LZ4_write16(void* memPtr, U16 value) { ((unalign*)memPtr)->u16 = value; }
+static void LZ4_write32(void* memPtr, U32 value) { ((unalign*)memPtr)->u32 = value; }
+
+#else /* safe and portable access using memcpy() */
+
+static U16 LZ4_read16(const void* memPtr)
+{
+ U16 val; LZ4_memcpy(&val, memPtr, sizeof(val)); return val;
+}
+
+static U32 LZ4_read32(const void* memPtr)
+{
+ U32 val; LZ4_memcpy(&val, memPtr, sizeof(val)); return val;
+}
+
+static reg_t LZ4_read_ARCH(const void* memPtr)
+{
+ reg_t val; LZ4_memcpy(&val, memPtr, sizeof(val)); return val;
+}
+
+static void LZ4_write16(void* memPtr, U16 value)
+{
+ LZ4_memcpy(memPtr, &value, sizeof(value));
+}
+
+static void LZ4_write32(void* memPtr, U32 value)
+{
+ LZ4_memcpy(memPtr, &value, sizeof(value));
+}
+
+#endif /* LZ4_FORCE_MEMORY_ACCESS */
+
+
+static U16 LZ4_readLE16(const void* memPtr)
+{
+ if (LZ4_isLittleEndian()) {
+ return LZ4_read16(memPtr);
+ } else {
+ const BYTE* p = (const BYTE*)memPtr;
+ return (U16)((U16)p[0] + (p[1]<<8));
+ }
+}
+
+static void LZ4_writeLE16(void* memPtr, U16 value)
+{
+ if (LZ4_isLittleEndian()) {
+ LZ4_write16(memPtr, value);
+ } else {
+ BYTE* p = (BYTE*)memPtr;
+ p[0] = (BYTE) value;
+ p[1] = (BYTE)(value>>8);
+ }
+}
+
+/* customized variant of memcpy, which can overwrite up to 8 bytes beyond dstEnd */
+LZ4_FORCE_INLINE
+void LZ4_wildCopy8(void* dstPtr, const void* srcPtr, void* dstEnd)
+{
+ BYTE* d = (BYTE*)dstPtr;
+ const BYTE* s = (const BYTE*)srcPtr;
+ BYTE* const e = (BYTE*)dstEnd;
+
+ do { LZ4_memcpy(d,s,8); d+=8; s+=8; } while (d<e);
+}
+
+static const unsigned inc32table[8] = {0, 1, 2, 1, 0, 4, 4, 4};
+static const int dec64table[8] = {0, 0, 0, -1, -4, 1, 2, 3};
+
+
+#ifndef LZ4_FAST_DEC_LOOP
+# if defined __i386__ || defined _M_IX86 || defined __x86_64__ || defined _M_X64
+# define LZ4_FAST_DEC_LOOP 1
+# elif defined(__aarch64__) && !defined(__clang__)
+ /* On aarch64, we disable this optimization for clang because on certain
+ * mobile chipsets, performance is reduced with clang. For information
+ * refer to https://github.com/lz4/lz4/pull/707 */
+# define LZ4_FAST_DEC_LOOP 1
+# else
+# define LZ4_FAST_DEC_LOOP 0
+# endif
+#endif
+
+#if LZ4_FAST_DEC_LOOP
+
+LZ4_FORCE_INLINE void
+LZ4_memcpy_using_offset_base(BYTE* dstPtr, const BYTE* srcPtr, BYTE* dstEnd, const size_t offset)
+{
+ assert(srcPtr + offset == dstPtr);
+ if (offset < 8) {
+ LZ4_write32(dstPtr, 0); /* silence an msan warning when offset==0 */
+ dstPtr[0] = srcPtr[0];
+ dstPtr[1] = srcPtr[1];
+ dstPtr[2] = srcPtr[2];
+ dstPtr[3] = srcPtr[3];
+ srcPtr += inc32table[offset];
+ LZ4_memcpy(dstPtr+4, srcPtr, 4);
+ srcPtr -= dec64table[offset];
+ dstPtr += 8;
+ } else {
+ LZ4_memcpy(dstPtr, srcPtr, 8);
+ dstPtr += 8;
+ srcPtr += 8;
+ }
+
+ LZ4_wildCopy8(dstPtr, srcPtr, dstEnd);
+}
+
+/* customized variant of memcpy, which can overwrite up to 32 bytes beyond dstEnd
+ * this version copies two times 16 bytes (instead of one time 32 bytes)
+ * because it must be compatible with offsets >= 16. */
+LZ4_FORCE_INLINE void
+LZ4_wildCopy32(void* dstPtr, const void* srcPtr, void* dstEnd)
+{
+ BYTE* d = (BYTE*)dstPtr;
+ const BYTE* s = (const BYTE*)srcPtr;
+ BYTE* const e = (BYTE*)dstEnd;
+
+ do { LZ4_memcpy(d,s,16); LZ4_memcpy(d+16,s+16,16); d+=32; s+=32; } while (d<e);
+}
+
+/* LZ4_memcpy_using_offset() presumes :
+ * - dstEnd >= dstPtr + MINMATCH
+ * - there is at least 8 bytes available to write after dstEnd */
+LZ4_FORCE_INLINE void
+LZ4_memcpy_using_offset(BYTE* dstPtr, const BYTE* srcPtr, BYTE* dstEnd, const size_t offset)
+{
+ BYTE v[8];
+
+ assert(dstEnd >= dstPtr + MINMATCH);
+
+ switch(offset) {
+ case 1:
+ MEM_INIT(v, *srcPtr, 8);
+ break;
+ case 2:
+ LZ4_memcpy(v, srcPtr, 2);
+ LZ4_memcpy(&v[2], srcPtr, 2);
+ LZ4_memcpy(&v[4], v, 4);
+ break;
+ case 4:
+ LZ4_memcpy(v, srcPtr, 4);
+ LZ4_memcpy(&v[4], srcPtr, 4);
+ break;
+ default:
+ LZ4_memcpy_using_offset_base(dstPtr, srcPtr, dstEnd, offset);
+ return;
+ }
+
+ LZ4_memcpy(dstPtr, v, 8);
+ dstPtr += 8;
+ while (dstPtr < dstEnd) {
+ LZ4_memcpy(dstPtr, v, 8);
+ dstPtr += 8;
+ }
+}
+#endif
+
+
+/*-************************************
+* Common functions
+**************************************/
+LZ4_FORCE_INLINE unsigned LZ4_NbCommonBytes (reg_t val)
+{
+ assert(val != 0);
+ if (LZ4_isLittleEndian()) {
+ if (sizeof(val) == 8) {
+# if defined(_MSC_VER) && (_MSC_VER >= 1800) && defined(_M_AMD64) && !defined(LZ4_FORCE_SW_BITCOUNT)
+ /* x64 CPUS without BMI support interpret `TZCNT` as `REP BSF` */
+ return (unsigned)_tzcnt_u64(val) >> 3;
+# elif defined(_MSC_VER) && defined(_WIN64) && !defined(LZ4_FORCE_SW_BITCOUNT)
+ unsigned long r = 0;
+ _BitScanForward64(&r, (U64)val);
+ return (unsigned)r >> 3;
+# elif (defined(__clang__) || (defined(__GNUC__) && ((__GNUC__ > 3) || \
+ ((__GNUC__ == 3) && (__GNUC_MINOR__ >= 4))))) && \
+ !defined(LZ4_FORCE_SW_BITCOUNT)
+ return (unsigned)__builtin_ctzll((U64)val) >> 3;
+# else
+ const U64 m = 0x0101010101010101ULL;
+ val ^= val - 1;
+ return (unsigned)(((U64)((val & (m - 1)) * m)) >> 56);
+# endif
+ } else /* 32 bits */ {
+# if defined(_MSC_VER) && (_MSC_VER >= 1400) && !defined(LZ4_FORCE_SW_BITCOUNT)
+ unsigned long r;
+ _BitScanForward(&r, (U32)val);
+ return (unsigned)r >> 3;
+# elif (defined(__clang__) || (defined(__GNUC__) && ((__GNUC__ > 3) || \
+ ((__GNUC__ == 3) && (__GNUC_MINOR__ >= 4))))) && \
+ !defined(__TINYC__) && !defined(LZ4_FORCE_SW_BITCOUNT)
+ return (unsigned)__builtin_ctz((U32)val) >> 3;
+# else
+ const U32 m = 0x01010101;
+ return (unsigned)((((val - 1) ^ val) & (m - 1)) * m) >> 24;
+# endif
+ }
+ } else /* Big Endian CPU */ {
+ if (sizeof(val)==8) {
+# if (defined(__clang__) || (defined(__GNUC__) && ((__GNUC__ > 3) || \
+ ((__GNUC__ == 3) && (__GNUC_MINOR__ >= 4))))) && \
+ !defined(__TINYC__) && !defined(LZ4_FORCE_SW_BITCOUNT)
+ return (unsigned)__builtin_clzll((U64)val) >> 3;
+# else
+#if 1
+ /* this method is probably faster,
+ * but adds a 128 bytes lookup table */
+ static const unsigned char ctz7_tab[128] = {
+ 7, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
+ 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
+ 5, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
+ 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
+ 6, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
+ 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
+ 5, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
+ 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
+ };
+ U64 const mask = 0x0101010101010101ULL;
+ U64 const t = (((val >> 8) - mask) | val) & mask;
+ return ctz7_tab[(t * 0x0080402010080402ULL) >> 57];
+#else
+ /* this method doesn't consume memory space like the previous one,
+ * but it contains several branches,
+ * that may end up slowing execution */
+ static const U32 by32 = sizeof(val)*4; /* 32 on 64 bits (goal), 16 on 32 bits.
+ Just to avoid some static analyzer complaining about shift by 32 on 32-bits target.
+ Note that this code path is never triggered in 32-bits mode. */
+ unsigned r;
+ if (!(val>>by32)) { r=4; } else { r=0; val>>=by32; }
+ if (!(val>>16)) { r+=2; val>>=8; } else { val>>=24; }
+ r += (!val);
+ return r;
+#endif
+# endif
+ } else /* 32 bits */ {
+# if (defined(__clang__) || (defined(__GNUC__) && ((__GNUC__ > 3) || \
+ ((__GNUC__ == 3) && (__GNUC_MINOR__ >= 4))))) && \
+ !defined(LZ4_FORCE_SW_BITCOUNT)
+ return (unsigned)__builtin_clz((U32)val) >> 3;
+# else
+ val >>= 8;
+ val = ((((val + 0x00FFFF00) | 0x00FFFFFF) + val) |
+ (val + 0x00FF0000)) >> 24;
+ return (unsigned)val ^ 3;
+# endif
+ }
+ }
+}
+
+
+#define STEPSIZE sizeof(reg_t)
+LZ4_FORCE_INLINE
+unsigned LZ4_count(const BYTE* pIn, const BYTE* pMatch, const BYTE* pInLimit)
+{
+ const BYTE* const pStart = pIn;
+
+ if (likely(pIn < pInLimit-(STEPSIZE-1))) {
+ reg_t const diff = LZ4_read_ARCH(pMatch) ^ LZ4_read_ARCH(pIn);
+ if (!diff) {
+ pIn+=STEPSIZE; pMatch+=STEPSIZE;
+ } else {
+ return LZ4_NbCommonBytes(diff);
+ } }
+
+ while (likely(pIn < pInLimit-(STEPSIZE-1))) {
+ reg_t const diff = LZ4_read_ARCH(pMatch) ^ LZ4_read_ARCH(pIn);
+ if (!diff) { pIn+=STEPSIZE; pMatch+=STEPSIZE; continue; }
+ pIn += LZ4_NbCommonBytes(diff);
+ return (unsigned)(pIn - pStart);
+ }
+
+ if ((STEPSIZE==8) && (pIn<(pInLimit-3)) && (LZ4_read32(pMatch) == LZ4_read32(pIn))) { pIn+=4; pMatch+=4; }
+ if ((pIn<(pInLimit-1)) && (LZ4_read16(pMatch) == LZ4_read16(pIn))) { pIn+=2; pMatch+=2; }
+ if ((pIn<pInLimit) && (*pMatch == *pIn)) pIn++;
+ return (unsigned)(pIn - pStart);
+}
+
+
+#ifndef LZ4_COMMONDEFS_ONLY
+/*-************************************
+* Local Constants
+**************************************/
+static const int LZ4_64Klimit = ((64 KB) + (MFLIMIT-1));
+static const U32 LZ4_skipTrigger = 6; /* Increase this value ==> compression run slower on incompressible data */
+
+
+/*-************************************
+* Local Structures and types
+**************************************/
+typedef enum { clearedTable = 0, byPtr, byU32, byU16 } tableType_t;
+
+/**
+ * This enum distinguishes several different modes of accessing previous
+ * content in the stream.
+ *
+ * - noDict : There is no preceding content.
+ * - withPrefix64k : Table entries up to ctx->dictSize before the current blob
+ * blob being compressed are valid and refer to the preceding
+ * content (of length ctx->dictSize), which is available
+ * contiguously preceding in memory the content currently
+ * being compressed.
+ * - usingExtDict : Like withPrefix64k, but the preceding content is somewhere
+ * else in memory, starting at ctx->dictionary with length
+ * ctx->dictSize.
+ * - usingDictCtx : Like usingExtDict, but everything concerning the preceding
+ * content is in a separate context, pointed to by
+ * ctx->dictCtx. ctx->dictionary, ctx->dictSize, and table
+ * entries in the current context that refer to positions
+ * preceding the beginning of the current compression are
+ * ignored. Instead, ctx->dictCtx->dictionary and ctx->dictCtx
+ * ->dictSize describe the location and size of the preceding
+ * content, and matches are found by looking in the ctx
+ * ->dictCtx->hashTable.
+ */
+typedef enum { noDict = 0, withPrefix64k, usingExtDict, usingDictCtx } dict_directive;
+typedef enum { noDictIssue = 0, dictSmall } dictIssue_directive;
+
+
+/*-************************************
+* Local Utils
+**************************************/
+int LZ4_versionNumber (void) { return LZ4_VERSION_NUMBER; }
+const char* LZ4_versionString(void) { return LZ4_VERSION_STRING; }
+int LZ4_compressBound(int isize) { return LZ4_COMPRESSBOUND(isize); }
+int LZ4_sizeofState(void) { return LZ4_STREAMSIZE; }
+
+
+/*-************************************
+* Internal Definitions used in Tests
+**************************************/
+
+int LZ4_compress_forceExtDict (LZ4_stream_t* LZ4_dict, const char* source, char* dest, int srcSize);
+
+int LZ4_decompress_safe_forceExtDict(const char* source, char* dest,
+ int compressedSize, int maxOutputSize,
+ const void* dictStart, size_t dictSize);
+
+/*-******************************
+* Compression functions
+********************************/
+LZ4_FORCE_INLINE U32 LZ4_hash4(U32 sequence, tableType_t const tableType)
+{
+ if (tableType == byU16)
+ return ((sequence * 2654435761U) >> ((MINMATCH*8)-(LZ4_HASHLOG+1)));
+ else
+ return ((sequence * 2654435761U) >> ((MINMATCH*8)-LZ4_HASHLOG));
+}
+
+LZ4_FORCE_INLINE U32 LZ4_hash5(U64 sequence, tableType_t const tableType)
+{
+ const U32 hashLog = (tableType == byU16) ? LZ4_HASHLOG+1 : LZ4_HASHLOG;
+ if (LZ4_isLittleEndian()) {
+ const U64 prime5bytes = 889523592379ULL;
+ return (U32)(((sequence << 24) * prime5bytes) >> (64 - hashLog));
+ } else {
+ const U64 prime8bytes = 11400714785074694791ULL;
+ return (U32)(((sequence >> 24) * prime8bytes) >> (64 - hashLog));
+ }
+}
+
+LZ4_FORCE_INLINE U32 LZ4_hashPosition(const void* const p, tableType_t const tableType)
+{
+ if ((sizeof(reg_t)==8) && (tableType != byU16)) return LZ4_hash5(LZ4_read_ARCH(p), tableType);
+ return LZ4_hash4(LZ4_read32(p), tableType);
+}
+
+LZ4_FORCE_INLINE void LZ4_clearHash(U32 h, void* tableBase, tableType_t const tableType)
+{
+ switch (tableType)
+ {
+ default: /* fallthrough */
+ case clearedTable: { /* illegal! */ assert(0); return; }
+ case byPtr: { const BYTE** hashTable = (const BYTE**)tableBase; hashTable[h] = NULL; return; }
+ case byU32: { U32* hashTable = (U32*) tableBase; hashTable[h] = 0; return; }
+ case byU16: { U16* hashTable = (U16*) tableBase; hashTable[h] = 0; return; }
+ }
+}
+
+LZ4_FORCE_INLINE void LZ4_putIndexOnHash(U32 idx, U32 h, void* tableBase, tableType_t const tableType)
+{
+ switch (tableType)
+ {
+ default: /* fallthrough */
+ case clearedTable: /* fallthrough */
+ case byPtr: { /* illegal! */ assert(0); return; }
+ case byU32: { U32* hashTable = (U32*) tableBase; hashTable[h] = idx; return; }
+ case byU16: { U16* hashTable = (U16*) tableBase; assert(idx < 65536); hashTable[h] = (U16)idx; return; }
+ }
+}
+
+LZ4_FORCE_INLINE void LZ4_putPositionOnHash(const BYTE* p, U32 h,
+ void* tableBase, tableType_t const tableType,
+ const BYTE* srcBase)
+{
+ switch (tableType)
+ {
+ case clearedTable: { /* illegal! */ assert(0); return; }
+ case byPtr: { const BYTE** hashTable = (const BYTE**)tableBase; hashTable[h] = p; return; }
+ case byU32: { U32* hashTable = (U32*) tableBase; hashTable[h] = (U32)(p-srcBase); return; }
+ case byU16: { U16* hashTable = (U16*) tableBase; hashTable[h] = (U16)(p-srcBase); return; }
+ }
+}
+
+LZ4_FORCE_INLINE void LZ4_putPosition(const BYTE* p, void* tableBase, tableType_t tableType, const BYTE* srcBase)
+{
+ U32 const h = LZ4_hashPosition(p, tableType);
+ LZ4_putPositionOnHash(p, h, tableBase, tableType, srcBase);
+}
+
+/* LZ4_getIndexOnHash() :
+ * Index of match position registered in hash table.
+ * hash position must be calculated by using base+index, or dictBase+index.
+ * Assumption 1 : only valid if tableType == byU32 or byU16.
+ * Assumption 2 : h is presumed valid (within limits of hash table)
+ */
+LZ4_FORCE_INLINE U32 LZ4_getIndexOnHash(U32 h, const void* tableBase, tableType_t tableType)
+{
+ LZ4_STATIC_ASSERT(LZ4_MEMORY_USAGE > 2);
+ if (tableType == byU32) {
+ const U32* const hashTable = (const U32*) tableBase;
+ assert(h < (1U << (LZ4_MEMORY_USAGE-2)));
+ return hashTable[h];
+ }
+ if (tableType == byU16) {
+ const U16* const hashTable = (const U16*) tableBase;
+ assert(h < (1U << (LZ4_MEMORY_USAGE-1)));
+ return hashTable[h];
+ }
+ assert(0); return 0; /* forbidden case */
+}
+
+static const BYTE* LZ4_getPositionOnHash(U32 h, const void* tableBase, tableType_t tableType, const BYTE* srcBase)
+{
+ if (tableType == byPtr) { const BYTE* const* hashTable = (const BYTE* const*) tableBase; return hashTable[h]; }
+ if (tableType == byU32) { const U32* const hashTable = (const U32*) tableBase; return hashTable[h] + srcBase; }
+ { const U16* const hashTable = (const U16*) tableBase; return hashTable[h] + srcBase; } /* default, to ensure a return */
+}
+
+LZ4_FORCE_INLINE const BYTE*
+LZ4_getPosition(const BYTE* p,
+ const void* tableBase, tableType_t tableType,
+ const BYTE* srcBase)
+{
+ U32 const h = LZ4_hashPosition(p, tableType);
+ return LZ4_getPositionOnHash(h, tableBase, tableType, srcBase);
+}
+
+LZ4_FORCE_INLINE void
+LZ4_prepareTable(LZ4_stream_t_internal* const cctx,
+ const int inputSize,
+ const tableType_t tableType) {
+ /* If the table hasn't been used, it's guaranteed to be zeroed out, and is
+ * therefore safe to use no matter what mode we're in. Otherwise, we figure
+ * out if it's safe to leave as is or whether it needs to be reset.
+ */
+ if ((tableType_t)cctx->tableType != clearedTable) {
+ assert(inputSize >= 0);
+ if ((tableType_t)cctx->tableType != tableType
+ || ((tableType == byU16) && cctx->currentOffset + (unsigned)inputSize >= 0xFFFFU)
+ || ((tableType == byU32) && cctx->currentOffset > 1 GB)
+ || tableType == byPtr
+ || inputSize >= 4 KB)
+ {
+ DEBUGLOG(4, "LZ4_prepareTable: Resetting table in %p", cctx);
+ MEM_INIT(cctx->hashTable, 0, LZ4_HASHTABLESIZE);
+ cctx->currentOffset = 0;
+ cctx->tableType = (U32)clearedTable;
+ } else {
+ DEBUGLOG(4, "LZ4_prepareTable: Re-use hash table (no reset)");
+ }
+ }
+
+ /* Adding a gap, so all previous entries are > LZ4_DISTANCE_MAX back, is faster
+ * than compressing without a gap. However, compressing with
+ * currentOffset == 0 is faster still, so we preserve that case.
+ */
+ if (cctx->currentOffset != 0 && tableType == byU32) {
+ DEBUGLOG(5, "LZ4_prepareTable: adding 64KB to currentOffset");
+ cctx->currentOffset += 64 KB;
+ }
+
+ /* Finally, clear history */
+ cctx->dictCtx = NULL;
+ cctx->dictionary = NULL;
+ cctx->dictSize = 0;
+}
+
+/** LZ4_compress_generic() :
+ * inlined, to ensure branches are decided at compilation time.
+ * Presumed already validated at this stage:
+ * - source != NULL
+ * - inputSize > 0
+ */
+LZ4_FORCE_INLINE int LZ4_compress_generic_validated(
+ LZ4_stream_t_internal* const cctx,
+ const char* const source,
+ char* const dest,
+ const int inputSize,
+ int *inputConsumed, /* only written when outputDirective == fillOutput */
+ const int maxOutputSize,
+ const limitedOutput_directive outputDirective,
+ const tableType_t tableType,
+ const dict_directive dictDirective,
+ const dictIssue_directive dictIssue,
+ const int acceleration)
+{
+ int result;
+ const BYTE* ip = (const BYTE*) source;
+
+ U32 const startIndex = cctx->currentOffset;
+ const BYTE* base = (const BYTE*) source - startIndex;
+ const BYTE* lowLimit;
+
+ const LZ4_stream_t_internal* dictCtx = (const LZ4_stream_t_internal*) cctx->dictCtx;
+ const BYTE* const dictionary =
+ dictDirective == usingDictCtx ? dictCtx->dictionary : cctx->dictionary;
+ const U32 dictSize =
+ dictDirective == usingDictCtx ? dictCtx->dictSize : cctx->dictSize;
+ const U32 dictDelta = (dictDirective == usingDictCtx) ? startIndex - dictCtx->currentOffset : 0; /* make indexes in dictCtx comparable with index in current context */
+
+ int const maybe_extMem = (dictDirective == usingExtDict) || (dictDirective == usingDictCtx);
+ U32 const prefixIdxLimit = startIndex - dictSize; /* used when dictDirective == dictSmall */
+ const BYTE* const dictEnd = dictionary ? dictionary + dictSize : dictionary;
+ const BYTE* anchor = (const BYTE*) source;
+ const BYTE* const iend = ip + inputSize;
+ const BYTE* const mflimitPlusOne = iend - MFLIMIT + 1;
+ const BYTE* const matchlimit = iend - LASTLITERALS;
+
+ /* the dictCtx currentOffset is indexed on the start of the dictionary,
+ * while a dictionary in the current context precedes the currentOffset */
+ const BYTE* dictBase = !dictionary ? NULL : (dictDirective == usingDictCtx) ?
+ dictionary + dictSize - dictCtx->currentOffset :
+ dictionary + dictSize - startIndex;
+
+ BYTE* op = (BYTE*) dest;
+ BYTE* const olimit = op + maxOutputSize;
+
+ U32 offset = 0;
+ U32 forwardH;
+
+ DEBUGLOG(5, "LZ4_compress_generic_validated: srcSize=%i, tableType=%u", inputSize, tableType);
+ assert(ip != NULL);
+ /* If init conditions are not met, we don't have to mark stream
+ * as having dirty context, since no action was taken yet */
+ if (outputDirective == fillOutput && maxOutputSize < 1) { return 0; } /* Impossible to store anything */
+ if ((tableType == byU16) && (inputSize>=LZ4_64Klimit)) { return 0; } /* Size too large (not within 64K limit) */
+ if (tableType==byPtr) assert(dictDirective==noDict); /* only supported use case with byPtr */
+ assert(acceleration >= 1);
+
+ lowLimit = (const BYTE*)source - (dictDirective == withPrefix64k ? dictSize : 0);
+
+ /* Update context state */
+ if (dictDirective == usingDictCtx) {
+ /* Subsequent linked blocks can't use the dictionary. */
+ /* Instead, they use the block we just compressed. */
+ cctx->dictCtx = NULL;
+ cctx->dictSize = (U32)inputSize;
+ } else {
+ cctx->dictSize += (U32)inputSize;
+ }
+ cctx->currentOffset += (U32)inputSize;
+ cctx->tableType = (U32)tableType;
+
+ if (inputSize<LZ4_minLength) goto _last_literals; /* Input too small, no compression (all literals) */
+
+ /* First Byte */
+ LZ4_putPosition(ip, cctx->hashTable, tableType, base);
+ ip++; forwardH = LZ4_hashPosition(ip, tableType);
+
+ /* Main Loop */
+ for ( ; ; ) {
+ const BYTE* match;
+ BYTE* token;
+ const BYTE* filledIp;
+
+ /* Find a match */
+ if (tableType == byPtr) {
+ const BYTE* forwardIp = ip;
+ int step = 1;
+ int searchMatchNb = acceleration << LZ4_skipTrigger;
+ do {
+ U32 const h = forwardH;
+ ip = forwardIp;
+ forwardIp += step;
+ step = (searchMatchNb++ >> LZ4_skipTrigger);
+
+ if (unlikely(forwardIp > mflimitPlusOne)) goto _last_literals;
+ assert(ip < mflimitPlusOne);
+
+ match = LZ4_getPositionOnHash(h, cctx->hashTable, tableType, base);
+ forwardH = LZ4_hashPosition(forwardIp, tableType);
+ LZ4_putPositionOnHash(ip, h, cctx->hashTable, tableType, base);
+
+ } while ( (match+LZ4_DISTANCE_MAX < ip)
+ || (LZ4_read32(match) != LZ4_read32(ip)) );
+
+ } else { /* byU32, byU16 */
+
+ const BYTE* forwardIp = ip;
+ int step = 1;
+ int searchMatchNb = acceleration << LZ4_skipTrigger;
+ do {
+ U32 const h = forwardH;
+ U32 const current = (U32)(forwardIp - base);
+ U32 matchIndex = LZ4_getIndexOnHash(h, cctx->hashTable, tableType);
+ assert(matchIndex <= current);
+ assert(forwardIp - base < (ptrdiff_t)(2 GB - 1));
+ ip = forwardIp;
+ forwardIp += step;
+ step = (searchMatchNb++ >> LZ4_skipTrigger);
+
+ if (unlikely(forwardIp > mflimitPlusOne)) goto _last_literals;
+ assert(ip < mflimitPlusOne);
+
+ if (dictDirective == usingDictCtx) {
+ if (matchIndex < startIndex) {
+ /* there was no match, try the dictionary */
+ assert(tableType == byU32);
+ matchIndex = LZ4_getIndexOnHash(h, dictCtx->hashTable, byU32);
+ match = dictBase + matchIndex;
+ matchIndex += dictDelta; /* make dictCtx index comparable with current context */
+ lowLimit = dictionary;
+ } else {
+ match = base + matchIndex;
+ lowLimit = (const BYTE*)source;
+ }
+ } else if (dictDirective==usingExtDict) {
+ if (matchIndex < startIndex) {
+ DEBUGLOG(7, "extDict candidate: matchIndex=%5u < startIndex=%5u", matchIndex, startIndex);
+ assert(startIndex - matchIndex >= MINMATCH);
+ match = dictBase + matchIndex;
+ lowLimit = dictionary;
+ } else {
+ match = base + matchIndex;
+ lowLimit = (const BYTE*)source;
+ }
+ } else { /* single continuous memory segment */
+ match = base + matchIndex;
+ }
+ forwardH = LZ4_hashPosition(forwardIp, tableType);
+ LZ4_putIndexOnHash(current, h, cctx->hashTable, tableType);
+
+ DEBUGLOG(7, "candidate at pos=%u (offset=%u \n", matchIndex, current - matchIndex);
+ if ((dictIssue == dictSmall) && (matchIndex < prefixIdxLimit)) { continue; } /* match outside of valid area */
+ assert(matchIndex < current);
+ if ( ((tableType != byU16) || (LZ4_DISTANCE_MAX < LZ4_DISTANCE_ABSOLUTE_MAX))
+ && (matchIndex+LZ4_DISTANCE_MAX < current)) {
+ continue;
+ } /* too far */
+ assert((current - matchIndex) <= LZ4_DISTANCE_MAX); /* match now expected within distance */
+
+ if (LZ4_read32(match) == LZ4_read32(ip)) {
+ if (maybe_extMem) offset = current - matchIndex;
+ break; /* match found */
+ }
+
+ } while(1);
+ }
+
+ /* Catch up */
+ filledIp = ip;
+ while (((ip>anchor) & (match > lowLimit)) && (unlikely(ip[-1]==match[-1]))) { ip--; match--; }
+
+ /* Encode Literals */
+ { unsigned const litLength = (unsigned)(ip - anchor);
+ token = op++;
+ if ((outputDirective == limitedOutput) && /* Check output buffer overflow */
+ (unlikely(op + litLength + (2 + 1 + LASTLITERALS) + (litLength/255) > olimit)) ) {
+ return 0; /* cannot compress within `dst` budget. Stored indexes in hash table are nonetheless fine */
+ }
+ if ((outputDirective == fillOutput) &&
+ (unlikely(op + (litLength+240)/255 /* litlen */ + litLength /* literals */ + 2 /* offset */ + 1 /* token */ + MFLIMIT - MINMATCH /* min last literals so last match is <= end - MFLIMIT */ > olimit))) {
+ op--;
+ goto _last_literals;
+ }
+ if (litLength >= RUN_MASK) {
+ int len = (int)(litLength - RUN_MASK);
+ *token = (RUN_MASK<<ML_BITS);
+ for(; len >= 255 ; len-=255) *op++ = 255;
+ *op++ = (BYTE)len;
+ }
+ else *token = (BYTE)(litLength<<ML_BITS);
+
+ /* Copy Literals */
+ LZ4_wildCopy8(op, anchor, op+litLength);
+ op+=litLength;
+ DEBUGLOG(6, "seq.start:%i, literals=%u, match.start:%i",
+ (int)(anchor-(const BYTE*)source), litLength, (int)(ip-(const BYTE*)source));
+ }
+
+_next_match:
+ /* at this stage, the following variables must be correctly set :
+ * - ip : at start of LZ operation
+ * - match : at start of previous pattern occurence; can be within current prefix, or within extDict
+ * - offset : if maybe_ext_memSegment==1 (constant)
+ * - lowLimit : must be == dictionary to mean "match is within extDict"; must be == source otherwise
+ * - token and *token : position to write 4-bits for match length; higher 4-bits for literal length supposed already written
+ */
+
+ if ((outputDirective == fillOutput) &&
+ (op + 2 /* offset */ + 1 /* token */ + MFLIMIT - MINMATCH /* min last literals so last match is <= end - MFLIMIT */ > olimit)) {
+ /* the match was too close to the end, rewind and go to last literals */
+ op = token;
+ goto _last_literals;
+ }
+
+ /* Encode Offset */
+ if (maybe_extMem) { /* static test */
+ DEBUGLOG(6, " with offset=%u (ext if > %i)", offset, (int)(ip - (const BYTE*)source));
+ assert(offset <= LZ4_DISTANCE_MAX && offset > 0);
+ LZ4_writeLE16(op, (U16)offset); op+=2;
+ } else {
+ DEBUGLOG(6, " with offset=%u (same segment)", (U32)(ip - match));
+ assert(ip-match <= LZ4_DISTANCE_MAX);
+ LZ4_writeLE16(op, (U16)(ip - match)); op+=2;
+ }
+
+ /* Encode MatchLength */
+ { unsigned matchCode;
+
+ if ( (dictDirective==usingExtDict || dictDirective==usingDictCtx)
+ && (lowLimit==dictionary) /* match within extDict */ ) {
+ const BYTE* limit = ip + (dictEnd-match);
+ assert(dictEnd > match);
+ if (limit > matchlimit) limit = matchlimit;
+ matchCode = LZ4_count(ip+MINMATCH, match+MINMATCH, limit);
+ ip += (size_t)matchCode + MINMATCH;
+ if (ip==limit) {
+ unsigned const more = LZ4_count(limit, (const BYTE*)source, matchlimit);
+ matchCode += more;
+ ip += more;
+ }
+ DEBUGLOG(6, " with matchLength=%u starting in extDict", matchCode+MINMATCH);
+ } else {
+ matchCode = LZ4_count(ip+MINMATCH, match+MINMATCH, matchlimit);
+ ip += (size_t)matchCode + MINMATCH;
+ DEBUGLOG(6, " with matchLength=%u", matchCode+MINMATCH);
+ }
+
+ if ((outputDirective) && /* Check output buffer overflow */
+ (unlikely(op + (1 + LASTLITERALS) + (matchCode+240)/255 > olimit)) ) {
+ if (outputDirective == fillOutput) {
+ /* Match description too long : reduce it */
+ U32 newMatchCode = 15 /* in token */ - 1 /* to avoid needing a zero byte */ + ((U32)(olimit - op) - 1 - LASTLITERALS) * 255;
+ ip -= matchCode - newMatchCode;
+ assert(newMatchCode < matchCode);
+ matchCode = newMatchCode;
+ if (unlikely(ip <= filledIp)) {
+ /* We have already filled up to filledIp so if ip ends up less than filledIp
+ * we have positions in the hash table beyond the current position. This is
+ * a problem if we reuse the hash table. So we have to remove these positions
+ * from the hash table.
+ */
+ const BYTE* ptr;
+ DEBUGLOG(5, "Clearing %u positions", (U32)(filledIp - ip));
+ for (ptr = ip; ptr <= filledIp; ++ptr) {
+ U32 const h = LZ4_hashPosition(ptr, tableType);
+ LZ4_clearHash(h, cctx->hashTable, tableType);
+ }
+ }
+ } else {
+ assert(outputDirective == limitedOutput);
+ return 0; /* cannot compress within `dst` budget. Stored indexes in hash table are nonetheless fine */
+ }
+ }
+ if (matchCode >= ML_MASK) {
+ *token += ML_MASK;
+ matchCode -= ML_MASK;
+ LZ4_write32(op, 0xFFFFFFFF);
+ while (matchCode >= 4*255) {
+ op+=4;
+ LZ4_write32(op, 0xFFFFFFFF);
+ matchCode -= 4*255;
+ }
+ op += matchCode / 255;
+ *op++ = (BYTE)(matchCode % 255);
+ } else
+ *token += (BYTE)(matchCode);
+ }
+ /* Ensure we have enough space for the last literals. */
+ assert(!(outputDirective == fillOutput && op + 1 + LASTLITERALS > olimit));
+
+ anchor = ip;
+
+ /* Test end of chunk */
+ if (ip >= mflimitPlusOne) break;
+
+ /* Fill table */
+ LZ4_putPosition(ip-2, cctx->hashTable, tableType, base);
+
+ /* Test next position */
+ if (tableType == byPtr) {
+
+ match = LZ4_getPosition(ip, cctx->hashTable, tableType, base);
+ LZ4_putPosition(ip, cctx->hashTable, tableType, base);
+ if ( (match+LZ4_DISTANCE_MAX >= ip)
+ && (LZ4_read32(match) == LZ4_read32(ip)) )
+ { token=op++; *token=0; goto _next_match; }
+
+ } else { /* byU32, byU16 */
+
+ U32 const h = LZ4_hashPosition(ip, tableType);
+ U32 const current = (U32)(ip-base);
+ U32 matchIndex = LZ4_getIndexOnHash(h, cctx->hashTable, tableType);
+ assert(matchIndex < current);
+ if (dictDirective == usingDictCtx) {
+ if (matchIndex < startIndex) {
+ /* there was no match, try the dictionary */
+ matchIndex = LZ4_getIndexOnHash(h, dictCtx->hashTable, byU32);
+ match = dictBase + matchIndex;
+ lowLimit = dictionary; /* required for match length counter */
+ matchIndex += dictDelta;
+ } else {
+ match = base + matchIndex;
+ lowLimit = (const BYTE*)source; /* required for match length counter */
+ }
+ } else if (dictDirective==usingExtDict) {
+ if (matchIndex < startIndex) {
+ match = dictBase + matchIndex;
+ lowLimit = dictionary; /* required for match length counter */
+ } else {
+ match = base + matchIndex;
+ lowLimit = (const BYTE*)source; /* required for match length counter */
+ }
+ } else { /* single memory segment */
+ match = base + matchIndex;
+ }
+ LZ4_putIndexOnHash(current, h, cctx->hashTable, tableType);
+ assert(matchIndex < current);
+ if ( ((dictIssue==dictSmall) ? (matchIndex >= prefixIdxLimit) : 1)
+ && (((tableType==byU16) && (LZ4_DISTANCE_MAX == LZ4_DISTANCE_ABSOLUTE_MAX)) ? 1 : (matchIndex+LZ4_DISTANCE_MAX >= current))
+ && (LZ4_read32(match) == LZ4_read32(ip)) ) {
+ token=op++;
+ *token=0;
+ if (maybe_extMem) offset = current - matchIndex;
+ DEBUGLOG(6, "seq.start:%i, literals=%u, match.start:%i",
+ (int)(anchor-(const BYTE*)source), 0, (int)(ip-(const BYTE*)source));
+ goto _next_match;
+ }
+ }
+
+ /* Prepare next loop */
+ forwardH = LZ4_hashPosition(++ip, tableType);
+
+ }
+
+_last_literals:
+ /* Encode Last Literals */
+ { size_t lastRun = (size_t)(iend - anchor);
+ if ( (outputDirective) && /* Check output buffer overflow */
+ (op + lastRun + 1 + ((lastRun+255-RUN_MASK)/255) > olimit)) {
+ if (outputDirective == fillOutput) {
+ /* adapt lastRun to fill 'dst' */
+ assert(olimit >= op);
+ lastRun = (size_t)(olimit-op) - 1/*token*/;
+ lastRun -= (lastRun + 256 - RUN_MASK) / 256; /*additional length tokens*/
+ } else {
+ assert(outputDirective == limitedOutput);
+ return 0; /* cannot compress within `dst` budget. Stored indexes in hash table are nonetheless fine */
+ }
+ }
+ DEBUGLOG(6, "Final literal run : %i literals", (int)lastRun);
+ if (lastRun >= RUN_MASK) {
+ size_t accumulator = lastRun - RUN_MASK;
+ *op++ = RUN_MASK << ML_BITS;
+ for(; accumulator >= 255 ; accumulator-=255) *op++ = 255;
+ *op++ = (BYTE) accumulator;
+ } else {
+ *op++ = (BYTE)(lastRun<<ML_BITS);
+ }
+ LZ4_memcpy(op, anchor, lastRun);
+ ip = anchor + lastRun;
+ op += lastRun;
+ }
+
+ if (outputDirective == fillOutput) {
+ *inputConsumed = (int) (((const char*)ip)-source);
+ }
+ result = (int)(((char*)op) - dest);
+ assert(result > 0);
+ DEBUGLOG(5, "LZ4_compress_generic: compressed %i bytes into %i bytes", inputSize, result);
+ return result;
+}
+
+/** LZ4_compress_generic() :
+ * inlined, to ensure branches are decided at compilation time;
+ * takes care of src == (NULL, 0)
+ * and forward the rest to LZ4_compress_generic_validated */
+LZ4_FORCE_INLINE int LZ4_compress_generic(
+ LZ4_stream_t_internal* const cctx,
+ const char* const src,
+ char* const dst,
+ const int srcSize,
+ int *inputConsumed, /* only written when outputDirective == fillOutput */
+ const int dstCapacity,
+ const limitedOutput_directive outputDirective,
+ const tableType_t tableType,
+ const dict_directive dictDirective,
+ const dictIssue_directive dictIssue,
+ const int acceleration)
+{
+ DEBUGLOG(5, "LZ4_compress_generic: srcSize=%i, dstCapacity=%i",
+ srcSize, dstCapacity);
+
+ if ((U32)srcSize > (U32)LZ4_MAX_INPUT_SIZE) { return 0; } /* Unsupported srcSize, too large (or negative) */
+ if (srcSize == 0) { /* src == NULL supported if srcSize == 0 */
+ if (outputDirective != notLimited && dstCapacity <= 0) return 0; /* no output, can't write anything */
+ DEBUGLOG(5, "Generating an empty block");
+ assert(outputDirective == notLimited || dstCapacity >= 1);
+ assert(dst != NULL);
+ dst[0] = 0;
+ if (outputDirective == fillOutput) {
+ assert (inputConsumed != NULL);
+ *inputConsumed = 0;
+ }
+ return 1;
+ }
+ assert(src != NULL);
+
+ return LZ4_compress_generic_validated(cctx, src, dst, srcSize,
+ inputConsumed, /* only written into if outputDirective == fillOutput */
+ dstCapacity, outputDirective,
+ tableType, dictDirective, dictIssue, acceleration);
+}
+
+
+int LZ4_compress_fast_extState(void* state, const char* source, char* dest, int inputSize, int maxOutputSize, int acceleration)
+{
+ LZ4_stream_t_internal* const ctx = & LZ4_initStream(state, sizeof(LZ4_stream_t)) -> internal_donotuse;
+ assert(ctx != NULL);
+ if (acceleration < 1) acceleration = LZ4_ACCELERATION_DEFAULT;
+ if (acceleration > LZ4_ACCELERATION_MAX) acceleration = LZ4_ACCELERATION_MAX;
+ if (maxOutputSize >= LZ4_compressBound(inputSize)) {
+ if (inputSize < LZ4_64Klimit) {
+ return LZ4_compress_generic(ctx, source, dest, inputSize, NULL, 0, notLimited, byU16, noDict, noDictIssue, acceleration);
+ } else {
+ const tableType_t tableType = ((sizeof(void*)==4) && ((uptrval)source > LZ4_DISTANCE_MAX)) ? byPtr : byU32;
+ return LZ4_compress_generic(ctx, source, dest, inputSize, NULL, 0, notLimited, tableType, noDict, noDictIssue, acceleration);
+ }
+ } else {
+ if (inputSize < LZ4_64Klimit) {
+ return LZ4_compress_generic(ctx, source, dest, inputSize, NULL, maxOutputSize, limitedOutput, byU16, noDict, noDictIssue, acceleration);
+ } else {
+ const tableType_t tableType = ((sizeof(void*)==4) && ((uptrval)source > LZ4_DISTANCE_MAX)) ? byPtr : byU32;
+ return LZ4_compress_generic(ctx, source, dest, inputSize, NULL, maxOutputSize, limitedOutput, tableType, noDict, noDictIssue, acceleration);
+ }
+ }
+}
+
+/**
+ * LZ4_compress_fast_extState_fastReset() :
+ * A variant of LZ4_compress_fast_extState().
+ *
+ * Using this variant avoids an expensive initialization step. It is only safe
+ * to call if the state buffer is known to be correctly initialized already
+ * (see comment in lz4.h on LZ4_resetStream_fast() for a definition of
+ * "correctly initialized").
+ */
+int LZ4_compress_fast_extState_fastReset(void* state, const char* src, char* dst, int srcSize, int dstCapacity, int acceleration)
+{
+ LZ4_stream_t_internal* ctx = &((LZ4_stream_t*)state)->internal_donotuse;
+ if (acceleration < 1) acceleration = LZ4_ACCELERATION_DEFAULT;
+ if (acceleration > LZ4_ACCELERATION_MAX) acceleration = LZ4_ACCELERATION_MAX;
+
+ if (dstCapacity >= LZ4_compressBound(srcSize)) {
+ if (srcSize < LZ4_64Klimit) {
+ const tableType_t tableType = byU16;
+ LZ4_prepareTable(ctx, srcSize, tableType);
+ if (ctx->currentOffset) {
+ return LZ4_compress_generic(ctx, src, dst, srcSize, NULL, 0, notLimited, tableType, noDict, dictSmall, acceleration);
+ } else {
+ return LZ4_compress_generic(ctx, src, dst, srcSize, NULL, 0, notLimited, tableType, noDict, noDictIssue, acceleration);
+ }
+ } else {
+ const tableType_t tableType = ((sizeof(void*)==4) && ((uptrval)src > LZ4_DISTANCE_MAX)) ? byPtr : byU32;
+ LZ4_prepareTable(ctx, srcSize, tableType);
+ return LZ4_compress_generic(ctx, src, dst, srcSize, NULL, 0, notLimited, tableType, noDict, noDictIssue, acceleration);
+ }
+ } else {
+ if (srcSize < LZ4_64Klimit) {
+ const tableType_t tableType = byU16;
+ LZ4_prepareTable(ctx, srcSize, tableType);
+ if (ctx->currentOffset) {
+ return LZ4_compress_generic(ctx, src, dst, srcSize, NULL, dstCapacity, limitedOutput, tableType, noDict, dictSmall, acceleration);
+ } else {
+ return LZ4_compress_generic(ctx, src, dst, srcSize, NULL, dstCapacity, limitedOutput, tableType, noDict, noDictIssue, acceleration);
+ }
+ } else {
+ const tableType_t tableType = ((sizeof(void*)==4) && ((uptrval)src > LZ4_DISTANCE_MAX)) ? byPtr : byU32;
+ LZ4_prepareTable(ctx, srcSize, tableType);
+ return LZ4_compress_generic(ctx, src, dst, srcSize, NULL, dstCapacity, limitedOutput, tableType, noDict, noDictIssue, acceleration);
+ }
+ }
+}
+
+
+int LZ4_compress_fast(const char* source, char* dest, int inputSize, int maxOutputSize, int acceleration)
+{
+ int result;
+#if (LZ4_HEAPMODE)
+ LZ4_stream_t* ctxPtr = ALLOC(sizeof(LZ4_stream_t)); /* malloc-calloc always properly aligned */
+ if (ctxPtr == NULL) return 0;
+#else
+ LZ4_stream_t ctx;
+ LZ4_stream_t* const ctxPtr = &ctx;
+#endif
+ result = LZ4_compress_fast_extState(ctxPtr, source, dest, inputSize, maxOutputSize, acceleration);
+
+#if (LZ4_HEAPMODE)
+ FREEMEM(ctxPtr);
+#endif
+ return result;
+}
+
+
+int LZ4_compress_default(const char* src, char* dst, int srcSize, int maxOutputSize)
+{
+ return LZ4_compress_fast(src, dst, srcSize, maxOutputSize, 1);
+}
+
+
+/* Note!: This function leaves the stream in an unclean/broken state!
+ * It is not safe to subsequently use the same state with a _fastReset() or
+ * _continue() call without resetting it. */
+static int LZ4_compress_destSize_extState (LZ4_stream_t* state, const char* src, char* dst, int* srcSizePtr, int targetDstSize)
+{
+ void* const s = LZ4_initStream(state, sizeof (*state));
+ assert(s != NULL); (void)s;
+
+ if (targetDstSize >= LZ4_compressBound(*srcSizePtr)) { /* compression success is guaranteed */
+ return LZ4_compress_fast_extState(state, src, dst, *srcSizePtr, targetDstSize, 1);
+ } else {
+ if (*srcSizePtr < LZ4_64Klimit) {
+ return LZ4_compress_generic(&state->internal_donotuse, src, dst, *srcSizePtr, srcSizePtr, targetDstSize, fillOutput, byU16, noDict, noDictIssue, 1);
+ } else {
+ tableType_t const addrMode = ((sizeof(void*)==4) && ((uptrval)src > LZ4_DISTANCE_MAX)) ? byPtr : byU32;
+ return LZ4_compress_generic(&state->internal_donotuse, src, dst, *srcSizePtr, srcSizePtr, targetDstSize, fillOutput, addrMode, noDict, noDictIssue, 1);
+ } }
+}
+
+
+int LZ4_compress_destSize(const char* src, char* dst, int* srcSizePtr, int targetDstSize)
+{
+#if (LZ4_HEAPMODE)
+ LZ4_stream_t* ctx = (LZ4_stream_t*)ALLOC(sizeof(LZ4_stream_t)); /* malloc-calloc always properly aligned */
+ if (ctx == NULL) return 0;
+#else
+ LZ4_stream_t ctxBody;
+ LZ4_stream_t* ctx = &ctxBody;
+#endif
+
+ int result = LZ4_compress_destSize_extState(ctx, src, dst, srcSizePtr, targetDstSize);
+
+#if (LZ4_HEAPMODE)
+ FREEMEM(ctx);
+#endif
+ return result;
+}
+
+
+
+/*-******************************
+* Streaming functions
+********************************/
+
+LZ4_stream_t* LZ4_createStream(void)
+{
+ LZ4_stream_t* const lz4s = (LZ4_stream_t*)ALLOC(sizeof(LZ4_stream_t));
+ LZ4_STATIC_ASSERT(LZ4_STREAMSIZE >= sizeof(LZ4_stream_t_internal)); /* A compilation error here means LZ4_STREAMSIZE is not large enough */
+ DEBUGLOG(4, "LZ4_createStream %p", lz4s);
+ if (lz4s == NULL) return NULL;
+ LZ4_initStream(lz4s, sizeof(*lz4s));
+ return lz4s;
+}
+
+static size_t LZ4_stream_t_alignment(void)
+{
+#if LZ4_ALIGN_TEST
+ typedef struct { char c; LZ4_stream_t t; } t_a;
+ return sizeof(t_a) - sizeof(LZ4_stream_t);
+#else
+ return 1; /* effectively disabled */
+#endif
+}
+
+LZ4_stream_t* LZ4_initStream (void* buffer, size_t size)
+{
+ DEBUGLOG(5, "LZ4_initStream");
+ if (buffer == NULL) { return NULL; }
+ if (size < sizeof(LZ4_stream_t)) { return NULL; }
+ if (!LZ4_isAligned(buffer, LZ4_stream_t_alignment())) return NULL;
+ MEM_INIT(buffer, 0, sizeof(LZ4_stream_t_internal));
+ return (LZ4_stream_t*)buffer;
+}
+
+/* resetStream is now deprecated,
+ * prefer initStream() which is more general */
+void LZ4_resetStream (LZ4_stream_t* LZ4_stream)
+{
+ DEBUGLOG(5, "LZ4_resetStream (ctx:%p)", LZ4_stream);
+ MEM_INIT(LZ4_stream, 0, sizeof(LZ4_stream_t_internal));
+}
+
+void LZ4_resetStream_fast(LZ4_stream_t* ctx) {
+ LZ4_prepareTable(&(ctx->internal_donotuse), 0, byU32);
+}
+
+int LZ4_freeStream (LZ4_stream_t* LZ4_stream)
+{
+ if (!LZ4_stream) return 0; /* support free on NULL */
+ DEBUGLOG(5, "LZ4_freeStream %p", LZ4_stream);
+ FREEMEM(LZ4_stream);
+ return (0);
+}
+
+
+#define HASH_UNIT sizeof(reg_t)
+int LZ4_loadDict (LZ4_stream_t* LZ4_dict, const char* dictionary, int dictSize)
+{
+ LZ4_stream_t_internal* dict = &LZ4_dict->internal_donotuse;
+ const tableType_t tableType = byU32;
+ const BYTE* p = (const BYTE*)dictionary;
+ const BYTE* const dictEnd = p + dictSize;
+ const BYTE* base;
+
+ DEBUGLOG(4, "LZ4_loadDict (%i bytes from %p into %p)", dictSize, dictionary, LZ4_dict);
+
+ /* It's necessary to reset the context,
+ * and not just continue it with prepareTable()
+ * to avoid any risk of generating overflowing matchIndex
+ * when compressing using this dictionary */
+ LZ4_resetStream(LZ4_dict);
+
+ /* We always increment the offset by 64 KB, since, if the dict is longer,
+ * we truncate it to the last 64k, and if it's shorter, we still want to
+ * advance by a whole window length so we can provide the guarantee that
+ * there are only valid offsets in the window, which allows an optimization
+ * in LZ4_compress_fast_continue() where it uses noDictIssue even when the
+ * dictionary isn't a full 64k. */
+ dict->currentOffset += 64 KB;
+
+ if (dictSize < (int)HASH_UNIT) {
+ return 0;
+ }
+
+ if ((dictEnd - p) > 64 KB) p = dictEnd - 64 KB;
+ base = dictEnd - dict->currentOffset;
+ dict->dictionary = p;
+ dict->dictSize = (U32)(dictEnd - p);
+ dict->tableType = (U32)tableType;
+
+ while (p <= dictEnd-HASH_UNIT) {
+ LZ4_putPosition(p, dict->hashTable, tableType, base);
+ p+=3;
+ }
+
+ return (int)dict->dictSize;
+}
+
+void LZ4_attach_dictionary(LZ4_stream_t* workingStream, const LZ4_stream_t* dictionaryStream) {
+ const LZ4_stream_t_internal* dictCtx = dictionaryStream == NULL ? NULL :
+ &(dictionaryStream->internal_donotuse);
+
+ DEBUGLOG(4, "LZ4_attach_dictionary (%p, %p, size %u)",
+ workingStream, dictionaryStream,
+ dictCtx != NULL ? dictCtx->dictSize : 0);
+
+ if (dictCtx != NULL) {
+ /* If the current offset is zero, we will never look in the
+ * external dictionary context, since there is no value a table
+ * entry can take that indicate a miss. In that case, we need
+ * to bump the offset to something non-zero.
+ */
+ if (workingStream->internal_donotuse.currentOffset == 0) {
+ workingStream->internal_donotuse.currentOffset = 64 KB;
+ }
+
+ /* Don't actually attach an empty dictionary.
+ */
+ if (dictCtx->dictSize == 0) {
+ dictCtx = NULL;
+ }
+ }
+ workingStream->internal_donotuse.dictCtx = dictCtx;
+}
+
+
+static void LZ4_renormDictT(LZ4_stream_t_internal* LZ4_dict, int nextSize)
+{
+ assert(nextSize >= 0);
+ if (LZ4_dict->currentOffset + (unsigned)nextSize > 0x80000000) { /* potential ptrdiff_t overflow (32-bits mode) */
+ /* rescale hash table */
+ U32 const delta = LZ4_dict->currentOffset - 64 KB;
+ const BYTE* dictEnd = LZ4_dict->dictionary + LZ4_dict->dictSize;
+ int i;
+ DEBUGLOG(4, "LZ4_renormDictT");
+ for (i=0; i<LZ4_HASH_SIZE_U32; i++) {
+ if (LZ4_dict->hashTable[i] < delta) LZ4_dict->hashTable[i]=0;
+ else LZ4_dict->hashTable[i] -= delta;
+ }
+ LZ4_dict->currentOffset = 64 KB;
+ if (LZ4_dict->dictSize > 64 KB) LZ4_dict->dictSize = 64 KB;
+ LZ4_dict->dictionary = dictEnd - LZ4_dict->dictSize;
+ }
+}
+
+
+int LZ4_compress_fast_continue (LZ4_stream_t* LZ4_stream,
+ const char* source, char* dest,
+ int inputSize, int maxOutputSize,
+ int acceleration)
+{
+ const tableType_t tableType = byU32;
+ LZ4_stream_t_internal* streamPtr = &LZ4_stream->internal_donotuse;
+ const BYTE* dictEnd = streamPtr->dictionary + streamPtr->dictSize;
+
+ DEBUGLOG(5, "LZ4_compress_fast_continue (inputSize=%i)", inputSize);
+
+ LZ4_renormDictT(streamPtr, inputSize); /* avoid index overflow */
+ if (acceleration < 1) acceleration = LZ4_ACCELERATION_DEFAULT;
+ if (acceleration > LZ4_ACCELERATION_MAX) acceleration = LZ4_ACCELERATION_MAX;
+
+ /* invalidate tiny dictionaries */
+ if ( (streamPtr->dictSize-1 < 4-1) /* intentional underflow */
+ && (dictEnd != (const BYTE*)source) ) {
+ DEBUGLOG(5, "LZ4_compress_fast_continue: dictSize(%u) at addr:%p is too small", streamPtr->dictSize, streamPtr->dictionary);
+ streamPtr->dictSize = 0;
+ streamPtr->dictionary = (const BYTE*)source;
+ dictEnd = (const BYTE*)source;
+ }
+
+ /* Check overlapping input/dictionary space */
+ { const BYTE* sourceEnd = (const BYTE*) source + inputSize;
+ if ((sourceEnd > streamPtr->dictionary) && (sourceEnd < dictEnd)) {
+ streamPtr->dictSize = (U32)(dictEnd - sourceEnd);
+ if (streamPtr->dictSize > 64 KB) streamPtr->dictSize = 64 KB;
+ if (streamPtr->dictSize < 4) streamPtr->dictSize = 0;
+ streamPtr->dictionary = dictEnd - streamPtr->dictSize;
+ }
+ }
+
+ /* prefix mode : source data follows dictionary */
+ if (dictEnd == (const BYTE*)source) {
+ if ((streamPtr->dictSize < 64 KB) && (streamPtr->dictSize < streamPtr->currentOffset))
+ return LZ4_compress_generic(streamPtr, source, dest, inputSize, NULL, maxOutputSize, limitedOutput, tableType, withPrefix64k, dictSmall, acceleration);
+ else
+ return LZ4_compress_generic(streamPtr, source, dest, inputSize, NULL, maxOutputSize, limitedOutput, tableType, withPrefix64k, noDictIssue, acceleration);
+ }
+
+ /* external dictionary mode */
+ { int result;
+ if (streamPtr->dictCtx) {
+ /* We depend here on the fact that dictCtx'es (produced by
+ * LZ4_loadDict) guarantee that their tables contain no references
+ * to offsets between dictCtx->currentOffset - 64 KB and
+ * dictCtx->currentOffset - dictCtx->dictSize. This makes it safe
+ * to use noDictIssue even when the dict isn't a full 64 KB.
+ */
+ if (inputSize > 4 KB) {
+ /* For compressing large blobs, it is faster to pay the setup
+ * cost to copy the dictionary's tables into the active context,
+ * so that the compression loop is only looking into one table.
+ */
+ LZ4_memcpy(streamPtr, streamPtr->dictCtx, sizeof(*streamPtr));
+ result = LZ4_compress_generic(streamPtr, source, dest, inputSize, NULL, maxOutputSize, limitedOutput, tableType, usingExtDict, noDictIssue, acceleration);
+ } else {
+ result = LZ4_compress_generic(streamPtr, source, dest, inputSize, NULL, maxOutputSize, limitedOutput, tableType, usingDictCtx, noDictIssue, acceleration);
+ }
+ } else {
+ if ((streamPtr->dictSize < 64 KB) && (streamPtr->dictSize < streamPtr->currentOffset)) {
+ result = LZ4_compress_generic(streamPtr, source, dest, inputSize, NULL, maxOutputSize, limitedOutput, tableType, usingExtDict, dictSmall, acceleration);
+ } else {
+ result = LZ4_compress_generic(streamPtr, source, dest, inputSize, NULL, maxOutputSize, limitedOutput, tableType, usingExtDict, noDictIssue, acceleration);
+ }
+ }
+ streamPtr->dictionary = (const BYTE*)source;
+ streamPtr->dictSize = (U32)inputSize;
+ return result;
+ }
+}
+
+
+/* Hidden debug function, to force-test external dictionary mode */
+int LZ4_compress_forceExtDict (LZ4_stream_t* LZ4_dict, const char* source, char* dest, int srcSize)
+{
+ LZ4_stream_t_internal* streamPtr = &LZ4_dict->internal_donotuse;
+ int result;
+
+ LZ4_renormDictT(streamPtr, srcSize);
+
+ if ((streamPtr->dictSize < 64 KB) && (streamPtr->dictSize < streamPtr->currentOffset)) {
+ result = LZ4_compress_generic(streamPtr, source, dest, srcSize, NULL, 0, notLimited, byU32, usingExtDict, dictSmall, 1);
+ } else {
+ result = LZ4_compress_generic(streamPtr, source, dest, srcSize, NULL, 0, notLimited, byU32, usingExtDict, noDictIssue, 1);
+ }
+
+ streamPtr->dictionary = (const BYTE*)source;
+ streamPtr->dictSize = (U32)srcSize;
+
+ return result;
+}
+
+
+/*! LZ4_saveDict() :
+ * If previously compressed data block is not guaranteed to remain available at its memory location,
+ * save it into a safer place (char* safeBuffer).
+ * Note : you don't need to call LZ4_loadDict() afterwards,
+ * dictionary is immediately usable, you can therefore call LZ4_compress_fast_continue().
+ * Return : saved dictionary size in bytes (necessarily <= dictSize), or 0 if error.
+ */
+int LZ4_saveDict (LZ4_stream_t* LZ4_dict, char* safeBuffer, int dictSize)
+{
+ LZ4_stream_t_internal* const dict = &LZ4_dict->internal_donotuse;
+ const BYTE* const previousDictEnd = dict->dictionary + dict->dictSize;
+
+ if ((U32)dictSize > 64 KB) { dictSize = 64 KB; } /* useless to define a dictionary > 64 KB */
+ if ((U32)dictSize > dict->dictSize) { dictSize = (int)dict->dictSize; }
+
+ if (safeBuffer == NULL) assert(dictSize == 0);
+ if (dictSize > 0)
+ memmove(safeBuffer, previousDictEnd - dictSize, dictSize);
+
+ dict->dictionary = (const BYTE*)safeBuffer;
+ dict->dictSize = (U32)dictSize;
+
+ return dictSize;
+}
+
+
+
+/*-*******************************
+ * Decompression functions
+ ********************************/
+
+typedef enum { endOnOutputSize = 0, endOnInputSize = 1 } endCondition_directive;
+typedef enum { decode_full_block = 0, partial_decode = 1 } earlyEnd_directive;
+
+#undef MIN
+#define MIN(a,b) ( (a) < (b) ? (a) : (b) )
+
+/* Read the variable-length literal or match length.
+ *
+ * ip - pointer to use as input.
+ * lencheck - end ip. Return an error if ip advances >= lencheck.
+ * loop_check - check ip >= lencheck in body of loop. Returns loop_error if so.
+ * initial_check - check ip >= lencheck before start of loop. Returns initial_error if so.
+ * error (output) - error code. Should be set to 0 before call.
+ */
+typedef enum { loop_error = -2, initial_error = -1, ok = 0 } variable_length_error;
+LZ4_FORCE_INLINE unsigned
+read_variable_length(const BYTE**ip, const BYTE* lencheck,
+ int loop_check, int initial_check,
+ variable_length_error* error)
+{
+ U32 length = 0;
+ U32 s;
+ if (initial_check && unlikely((*ip) >= lencheck)) { /* overflow detection */
+ *error = initial_error;
+ return length;
+ }
+ do {
+ s = **ip;
+ (*ip)++;
+ length += s;
+ if (loop_check && unlikely((*ip) >= lencheck)) { /* overflow detection */
+ *error = loop_error;
+ return length;
+ }
+ } while (s==255);
+
+ return length;
+}
+
+/*! LZ4_decompress_generic() :
+ * This generic decompression function covers all use cases.
+ * It shall be instantiated several times, using different sets of directives.
+ * Note that it is important for performance that this function really get inlined,
+ * in order to remove useless branches during compilation optimization.
+ */
+LZ4_FORCE_INLINE int
+LZ4_decompress_generic(
+ const char* const src,
+ char* const dst,
+ int srcSize,
+ int outputSize, /* If endOnInput==endOnInputSize, this value is `dstCapacity` */
+
+ endCondition_directive endOnInput, /* endOnOutputSize, endOnInputSize */
+ earlyEnd_directive partialDecoding, /* full, partial */
+ dict_directive dict, /* noDict, withPrefix64k, usingExtDict */
+ const BYTE* const lowPrefix, /* always <= dst, == dst when no prefix */
+ const BYTE* const dictStart, /* only if dict==usingExtDict */
+ const size_t dictSize /* note : = 0 if noDict */
+ )
+{
+ if (src == NULL) { return -1; }
+
+ { const BYTE* ip = (const BYTE*) src;
+ const BYTE* const iend = ip + srcSize;
+
+ BYTE* op = (BYTE*) dst;
+ BYTE* const oend = op + outputSize;
+ BYTE* cpy;
+
+ const BYTE* const dictEnd = (dictStart == NULL) ? NULL : dictStart + dictSize;
+
+ const int safeDecode = (endOnInput==endOnInputSize);
+ const int checkOffset = ((safeDecode) && (dictSize < (int)(64 KB)));
+
+
+ /* Set up the "end" pointers for the shortcut. */
+ const BYTE* const shortiend = iend - (endOnInput ? 14 : 8) /*maxLL*/ - 2 /*offset*/;
+ const BYTE* const shortoend = oend - (endOnInput ? 14 : 8) /*maxLL*/ - 18 /*maxML*/;
+
+ const BYTE* match;
+ size_t offset;
+ unsigned token;
+ size_t length;
+
+
+ DEBUGLOG(5, "LZ4_decompress_generic (srcSize:%i, dstSize:%i)", srcSize, outputSize);
+
+ /* Special cases */
+ assert(lowPrefix <= op);
+ if ((endOnInput) && (unlikely(outputSize==0))) {
+ /* Empty output buffer */
+ if (partialDecoding) return 0;
+ return ((srcSize==1) && (*ip==0)) ? 0 : -1;
+ }
+ if ((!endOnInput) && (unlikely(outputSize==0))) { return (*ip==0 ? 1 : -1); }
+ if ((endOnInput) && unlikely(srcSize==0)) { return -1; }
+
+ /* Currently the fast loop shows a regression on qualcomm arm chips. */
+#if LZ4_FAST_DEC_LOOP
+ if ((oend - op) < FASTLOOP_SAFE_DISTANCE) {
+ DEBUGLOG(6, "skip fast decode loop");
+ goto safe_decode;
+ }
+
+ /* Fast loop : decode sequences as long as output < iend-FASTLOOP_SAFE_DISTANCE */
+ while (1) {
+ /* Main fastloop assertion: We can always wildcopy FASTLOOP_SAFE_DISTANCE */
+ assert(oend - op >= FASTLOOP_SAFE_DISTANCE);
+ if (endOnInput) { assert(ip < iend); }
+ token = *ip++;
+ length = token >> ML_BITS; /* literal length */
+
+ assert(!endOnInput || ip <= iend); /* ip < iend before the increment */
+
+ /* decode literal length */
+ if (length == RUN_MASK) {
+ variable_length_error error = ok;
+ length += read_variable_length(&ip, iend-RUN_MASK, (int)endOnInput, (int)endOnInput, &error);
+ if (error == initial_error) { goto _output_error; }
+ if ((safeDecode) && unlikely((uptrval)(op)+length<(uptrval)(op))) { goto _output_error; } /* overflow detection */
+ if ((safeDecode) && unlikely((uptrval)(ip)+length<(uptrval)(ip))) { goto _output_error; } /* overflow detection */
+
+ /* copy literals */
+ cpy = op+length;
+ LZ4_STATIC_ASSERT(MFLIMIT >= WILDCOPYLENGTH);
+ if (endOnInput) { /* LZ4_decompress_safe() */
+ if ((cpy>oend-32) || (ip+length>iend-32)) { goto safe_literal_copy; }
+ LZ4_wildCopy32(op, ip, cpy);
+ } else { /* LZ4_decompress_fast() */
+ if (cpy>oend-8) { goto safe_literal_copy; }
+ LZ4_wildCopy8(op, ip, cpy); /* LZ4_decompress_fast() cannot copy more than 8 bytes at a time :
+ * it doesn't know input length, and only relies on end-of-block properties */
+ }
+ ip += length; op = cpy;
+ } else {
+ cpy = op+length;
+ if (endOnInput) { /* LZ4_decompress_safe() */
+ DEBUGLOG(7, "copy %u bytes in a 16-bytes stripe", (unsigned)length);
+ /* We don't need to check oend, since we check it once for each loop below */
+ if (ip > iend-(16 + 1/*max lit + offset + nextToken*/)) { goto safe_literal_copy; }
+ /* Literals can only be 14, but hope compilers optimize if we copy by a register size */
+ LZ4_memcpy(op, ip, 16);
+ } else { /* LZ4_decompress_fast() */
+ /* LZ4_decompress_fast() cannot copy more than 8 bytes at a time :
+ * it doesn't know input length, and relies on end-of-block properties */
+ LZ4_memcpy(op, ip, 8);
+ if (length > 8) { LZ4_memcpy(op+8, ip+8, 8); }
+ }
+ ip += length; op = cpy;
+ }
+
+ /* get offset */
+ offset = LZ4_readLE16(ip); ip+=2;
+ match = op - offset;
+ assert(match <= op);
+
+ /* get matchlength */
+ length = token & ML_MASK;
+
+ if (length == ML_MASK) {
+ variable_length_error error = ok;
+ if ((checkOffset) && (unlikely(match + dictSize < lowPrefix))) { goto _output_error; } /* Error : offset outside buffers */
+ length += read_variable_length(&ip, iend - LASTLITERALS + 1, (int)endOnInput, 0, &error);
+ if (error != ok) { goto _output_error; }
+ if ((safeDecode) && unlikely((uptrval)(op)+length<(uptrval)op)) { goto _output_error; } /* overflow detection */
+ length += MINMATCH;
+ if (op + length >= oend - FASTLOOP_SAFE_DISTANCE) {
+ goto safe_match_copy;
+ }
+ } else {
+ length += MINMATCH;
+ if (op + length >= oend - FASTLOOP_SAFE_DISTANCE) {
+ goto safe_match_copy;
+ }
+
+ /* Fastpath check: Avoids a branch in LZ4_wildCopy32 if true */
+ if ((dict == withPrefix64k) || (match >= lowPrefix)) {
+ if (offset >= 8) {
+ assert(match >= lowPrefix);
+ assert(match <= op);
+ assert(op + 18 <= oend);
+
+ LZ4_memcpy(op, match, 8);
+ LZ4_memcpy(op+8, match+8, 8);
+ LZ4_memcpy(op+16, match+16, 2);
+ op += length;
+ continue;
+ } } }
+
+ if (checkOffset && (unlikely(match + dictSize < lowPrefix))) { goto _output_error; } /* Error : offset outside buffers */
+ /* match starting within external dictionary */
+ if ((dict==usingExtDict) && (match < lowPrefix)) {
+ if (unlikely(op+length > oend-LASTLITERALS)) {
+ if (partialDecoding) {
+ DEBUGLOG(7, "partialDecoding: dictionary match, close to dstEnd");
+ length = MIN(length, (size_t)(oend-op));
+ } else {
+ goto _output_error; /* end-of-block condition violated */
+ } }
+
+ if (length <= (size_t)(lowPrefix-match)) {
+ /* match fits entirely within external dictionary : just copy */
+ memmove(op, dictEnd - (lowPrefix-match), length);
+ op += length;
+ } else {
+ /* match stretches into both external dictionary and current block */
+ size_t const copySize = (size_t)(lowPrefix - match);
+ size_t const restSize = length - copySize;
+ LZ4_memcpy(op, dictEnd - copySize, copySize);
+ op += copySize;
+ if (restSize > (size_t)(op - lowPrefix)) { /* overlap copy */
+ BYTE* const endOfMatch = op + restSize;
+ const BYTE* copyFrom = lowPrefix;
+ while (op < endOfMatch) { *op++ = *copyFrom++; }
+ } else {
+ LZ4_memcpy(op, lowPrefix, restSize);
+ op += restSize;
+ } }
+ continue;
+ }
+
+ /* copy match within block */
+ cpy = op + length;
+
+ assert((op <= oend) && (oend-op >= 32));
+ if (unlikely(offset<16)) {
+ LZ4_memcpy_using_offset(op, match, cpy, offset);
+ } else {
+ LZ4_wildCopy32(op, match, cpy);
+ }
+
+ op = cpy; /* wildcopy correction */
+ }
+ safe_decode:
+#endif
+
+ /* Main Loop : decode remaining sequences where output < FASTLOOP_SAFE_DISTANCE */
+ while (1) {
+ token = *ip++;
+ length = token >> ML_BITS; /* literal length */
+
+ assert(!endOnInput || ip <= iend); /* ip < iend before the increment */
+
+ /* A two-stage shortcut for the most common case:
+ * 1) If the literal length is 0..14, and there is enough space,
+ * enter the shortcut and copy 16 bytes on behalf of the literals
+ * (in the fast mode, only 8 bytes can be safely copied this way).
+ * 2) Further if the match length is 4..18, copy 18 bytes in a similar
+ * manner; but we ensure that there's enough space in the output for
+ * those 18 bytes earlier, upon entering the shortcut (in other words,
+ * there is a combined check for both stages).
+ */
+ if ( (endOnInput ? length != RUN_MASK : length <= 8)
+ /* strictly "less than" on input, to re-enter the loop with at least one byte */
+ && likely((endOnInput ? ip < shortiend : 1) & (op <= shortoend)) ) {
+ /* Copy the literals */
+ LZ4_memcpy(op, ip, endOnInput ? 16 : 8);
+ op += length; ip += length;
+
+ /* The second stage: prepare for match copying, decode full info.
+ * If it doesn't work out, the info won't be wasted. */
+ length = token & ML_MASK; /* match length */
+ offset = LZ4_readLE16(ip); ip += 2;
+ match = op - offset;
+ assert(match <= op); /* check overflow */
+
+ /* Do not deal with overlapping matches. */
+ if ( (length != ML_MASK)
+ && (offset >= 8)
+ && (dict==withPrefix64k || match >= lowPrefix) ) {
+ /* Copy the match. */
+ LZ4_memcpy(op + 0, match + 0, 8);
+ LZ4_memcpy(op + 8, match + 8, 8);
+ LZ4_memcpy(op +16, match +16, 2);
+ op += length + MINMATCH;
+ /* Both stages worked, load the next token. */
+ continue;
+ }
+
+ /* The second stage didn't work out, but the info is ready.
+ * Propel it right to the point of match copying. */
+ goto _copy_match;
+ }
+
+ /* decode literal length */
+ if (length == RUN_MASK) {
+ variable_length_error error = ok;
+ length += read_variable_length(&ip, iend-RUN_MASK, (int)endOnInput, (int)endOnInput, &error);
+ if (error == initial_error) { goto _output_error; }
+ if ((safeDecode) && unlikely((uptrval)(op)+length<(uptrval)(op))) { goto _output_error; } /* overflow detection */
+ if ((safeDecode) && unlikely((uptrval)(ip)+length<(uptrval)(ip))) { goto _output_error; } /* overflow detection */
+ }
+
+ /* copy literals */
+ cpy = op+length;
+#if LZ4_FAST_DEC_LOOP
+ safe_literal_copy:
+#endif
+ LZ4_STATIC_ASSERT(MFLIMIT >= WILDCOPYLENGTH);
+ if ( ((endOnInput) && ((cpy>oend-MFLIMIT) || (ip+length>iend-(2+1+LASTLITERALS))) )
+ || ((!endOnInput) && (cpy>oend-WILDCOPYLENGTH)) )
+ {
+ /* We've either hit the input parsing restriction or the output parsing restriction.
+ * In the normal scenario, decoding a full block, it must be the last sequence,
+ * otherwise it's an error (invalid input or dimensions).
+ * In partialDecoding scenario, it's necessary to ensure there is no buffer overflow.
+ */
+ if (partialDecoding) {
+ /* Since we are partial decoding we may be in this block because of the output parsing
+ * restriction, which is not valid since the output buffer is allowed to be undersized.
+ */
+ assert(endOnInput);
+ DEBUGLOG(7, "partialDecoding: copying literals, close to input or output end")
+ DEBUGLOG(7, "partialDecoding: literal length = %u", (unsigned)length);
+ DEBUGLOG(7, "partialDecoding: remaining space in dstBuffer : %i", (int)(oend - op));
+ DEBUGLOG(7, "partialDecoding: remaining space in srcBuffer : %i", (int)(iend - ip));
+ /* Finishing in the middle of a literals segment,
+ * due to lack of input.
+ */
+ if (ip+length > iend) {
+ length = (size_t)(iend-ip);
+ cpy = op + length;
+ }
+ /* Finishing in the middle of a literals segment,
+ * due to lack of output space.
+ */
+ if (cpy > oend) {
+ cpy = oend;
+ assert(op<=oend);
+ length = (size_t)(oend-op);
+ }
+ } else {
+ /* We must be on the last sequence because of the parsing limitations so check
+ * that we exactly regenerate the original size (must be exact when !endOnInput).
+ */
+ if ((!endOnInput) && (cpy != oend)) { goto _output_error; }
+ /* We must be on the last sequence (or invalid) because of the parsing limitations
+ * so check that we exactly consume the input and don't overrun the output buffer.
+ */
+ if ((endOnInput) && ((ip+length != iend) || (cpy > oend))) {
+ DEBUGLOG(6, "should have been last run of literals")
+ DEBUGLOG(6, "ip(%p) + length(%i) = %p != iend (%p)", ip, (int)length, ip+length, iend);
+ DEBUGLOG(6, "or cpy(%p) > oend(%p)", cpy, oend);
+ goto _output_error;
+ }
+ }
+ memmove(op, ip, length); /* supports overlapping memory regions; only matters for in-place decompression scenarios */
+ ip += length;
+ op += length;
+ /* Necessarily EOF when !partialDecoding.
+ * When partialDecoding, it is EOF if we've either
+ * filled the output buffer or
+ * can't proceed with reading an offset for following match.
+ */
+ if (!partialDecoding || (cpy == oend) || (ip >= (iend-2))) {
+ break;
+ }
+ } else {
+ LZ4_wildCopy8(op, ip, cpy); /* may overwrite up to WILDCOPYLENGTH beyond cpy */
+ ip += length; op = cpy;
+ }
+
+ /* get offset */
+ offset = LZ4_readLE16(ip); ip+=2;
+ match = op - offset;
+
+ /* get matchlength */
+ length = token & ML_MASK;
+
+ _copy_match:
+ if (length == ML_MASK) {
+ variable_length_error error = ok;
+ length += read_variable_length(&ip, iend - LASTLITERALS + 1, (int)endOnInput, 0, &error);
+ if (error != ok) goto _output_error;
+ if ((safeDecode) && unlikely((uptrval)(op)+length<(uptrval)op)) goto _output_error; /* overflow detection */
+ }
+ length += MINMATCH;
+
+#if LZ4_FAST_DEC_LOOP
+ safe_match_copy:
+#endif
+ if ((checkOffset) && (unlikely(match + dictSize < lowPrefix))) goto _output_error; /* Error : offset outside buffers */
+ /* match starting within external dictionary */
+ if ((dict==usingExtDict) && (match < lowPrefix)) {
+ if (unlikely(op+length > oend-LASTLITERALS)) {
+ if (partialDecoding) length = MIN(length, (size_t)(oend-op));
+ else goto _output_error; /* doesn't respect parsing restriction */
+ }
+
+ if (length <= (size_t)(lowPrefix-match)) {
+ /* match fits entirely within external dictionary : just copy */
+ memmove(op, dictEnd - (lowPrefix-match), length);
+ op += length;
+ } else {
+ /* match stretches into both external dictionary and current block */
+ size_t const copySize = (size_t)(lowPrefix - match);
+ size_t const restSize = length - copySize;
+ LZ4_memcpy(op, dictEnd - copySize, copySize);
+ op += copySize;
+ if (restSize > (size_t)(op - lowPrefix)) { /* overlap copy */
+ BYTE* const endOfMatch = op + restSize;
+ const BYTE* copyFrom = lowPrefix;
+ while (op < endOfMatch) *op++ = *copyFrom++;
+ } else {
+ LZ4_memcpy(op, lowPrefix, restSize);
+ op += restSize;
+ } }
+ continue;
+ }
+ assert(match >= lowPrefix);
+
+ /* copy match within block */
+ cpy = op + length;
+
+ /* partialDecoding : may end anywhere within the block */
+ assert(op<=oend);
+ if (partialDecoding && (cpy > oend-MATCH_SAFEGUARD_DISTANCE)) {
+ size_t const mlen = MIN(length, (size_t)(oend-op));
+ const BYTE* const matchEnd = match + mlen;
+ BYTE* const copyEnd = op + mlen;
+ if (matchEnd > op) { /* overlap copy */
+ while (op < copyEnd) { *op++ = *match++; }
+ } else {
+ LZ4_memcpy(op, match, mlen);
+ }
+ op = copyEnd;
+ if (op == oend) { break; }
+ continue;
+ }
+
+ if (unlikely(offset<8)) {
+ LZ4_write32(op, 0); /* silence msan warning when offset==0 */
+ op[0] = match[0];
+ op[1] = match[1];
+ op[2] = match[2];
+ op[3] = match[3];
+ match += inc32table[offset];
+ LZ4_memcpy(op+4, match, 4);
+ match -= dec64table[offset];
+ } else {
+ LZ4_memcpy(op, match, 8);
+ match += 8;
+ }
+ op += 8;
+
+ if (unlikely(cpy > oend-MATCH_SAFEGUARD_DISTANCE)) {
+ BYTE* const oCopyLimit = oend - (WILDCOPYLENGTH-1);
+ if (cpy > oend-LASTLITERALS) { goto _output_error; } /* Error : last LASTLITERALS bytes must be literals (uncompressed) */
+ if (op < oCopyLimit) {
+ LZ4_wildCopy8(op, match, oCopyLimit);
+ match += oCopyLimit - op;
+ op = oCopyLimit;
+ }
+ while (op < cpy) { *op++ = *match++; }
+ } else {
+ LZ4_memcpy(op, match, 8);
+ if (length > 16) { LZ4_wildCopy8(op+8, match+8, cpy); }
+ }
+ op = cpy; /* wildcopy correction */
+ }
+
+ /* end of decoding */
+ if (endOnInput) {
+ DEBUGLOG(5, "decoded %i bytes", (int) (((char*)op)-dst));
+ return (int) (((char*)op)-dst); /* Nb of output bytes decoded */
+ } else {
+ return (int) (((const char*)ip)-src); /* Nb of input bytes read */
+ }
+
+ /* Overflow error detected */
+ _output_error:
+ return (int) (-(((const char*)ip)-src))-1;
+ }
+}
+
+
+/*===== Instantiate the API decoding functions. =====*/
+
+LZ4_FORCE_O2
+int LZ4_decompress_safe(const char* source, char* dest, int compressedSize, int maxDecompressedSize)
+{
+ return LZ4_decompress_generic(source, dest, compressedSize, maxDecompressedSize,
+ endOnInputSize, decode_full_block, noDict,
+ (BYTE*)dest, NULL, 0);
+}
+
+LZ4_FORCE_O2
+int LZ4_decompress_safe_partial(const char* src, char* dst, int compressedSize, int targetOutputSize, int dstCapacity)
+{
+ dstCapacity = MIN(targetOutputSize, dstCapacity);
+ return LZ4_decompress_generic(src, dst, compressedSize, dstCapacity,
+ endOnInputSize, partial_decode,
+ noDict, (BYTE*)dst, NULL, 0);
+}
+
+LZ4_FORCE_O2
+int LZ4_decompress_fast(const char* source, char* dest, int originalSize)
+{
+ return LZ4_decompress_generic(source, dest, 0, originalSize,
+ endOnOutputSize, decode_full_block, withPrefix64k,
+ (BYTE*)dest - 64 KB, NULL, 0);
+}
+
+/*===== Instantiate a few more decoding cases, used more than once. =====*/
+
+LZ4_FORCE_O2 /* Exported, an obsolete API function. */
+int LZ4_decompress_safe_withPrefix64k(const char* source, char* dest, int compressedSize, int maxOutputSize)
+{
+ return LZ4_decompress_generic(source, dest, compressedSize, maxOutputSize,
+ endOnInputSize, decode_full_block, withPrefix64k,
+ (BYTE*)dest - 64 KB, NULL, 0);
+}
+
+/* Another obsolete API function, paired with the previous one. */
+int LZ4_decompress_fast_withPrefix64k(const char* source, char* dest, int originalSize)
+{
+ /* LZ4_decompress_fast doesn't validate match offsets,
+ * and thus serves well with any prefixed dictionary. */
+ return LZ4_decompress_fast(source, dest, originalSize);
+}
+
+LZ4_FORCE_O2
+static int LZ4_decompress_safe_withSmallPrefix(const char* source, char* dest, int compressedSize, int maxOutputSize,
+ size_t prefixSize)
+{
+ return LZ4_decompress_generic(source, dest, compressedSize, maxOutputSize,
+ endOnInputSize, decode_full_block, noDict,
+ (BYTE*)dest-prefixSize, NULL, 0);
+}
+
+LZ4_FORCE_O2
+int LZ4_decompress_safe_forceExtDict(const char* source, char* dest,
+ int compressedSize, int maxOutputSize,
+ const void* dictStart, size_t dictSize)
+{
+ return LZ4_decompress_generic(source, dest, compressedSize, maxOutputSize,
+ endOnInputSize, decode_full_block, usingExtDict,
+ (BYTE*)dest, (const BYTE*)dictStart, dictSize);
+}
+
+LZ4_FORCE_O2
+static int LZ4_decompress_fast_extDict(const char* source, char* dest, int originalSize,
+ const void* dictStart, size_t dictSize)
+{
+ return LZ4_decompress_generic(source, dest, 0, originalSize,
+ endOnOutputSize, decode_full_block, usingExtDict,
+ (BYTE*)dest, (const BYTE*)dictStart, dictSize);
+}
+
+/* The "double dictionary" mode, for use with e.g. ring buffers: the first part
+ * of the dictionary is passed as prefix, and the second via dictStart + dictSize.
+ * These routines are used only once, in LZ4_decompress_*_continue().
+ */
+LZ4_FORCE_INLINE
+int LZ4_decompress_safe_doubleDict(const char* source, char* dest, int compressedSize, int maxOutputSize,
+ size_t prefixSize, const void* dictStart, size_t dictSize)
+{
+ return LZ4_decompress_generic(source, dest, compressedSize, maxOutputSize,
+ endOnInputSize, decode_full_block, usingExtDict,
+ (BYTE*)dest-prefixSize, (const BYTE*)dictStart, dictSize);
+}
+
+LZ4_FORCE_INLINE
+int LZ4_decompress_fast_doubleDict(const char* source, char* dest, int originalSize,
+ size_t prefixSize, const void* dictStart, size_t dictSize)
+{
+ return LZ4_decompress_generic(source, dest, 0, originalSize,
+ endOnOutputSize, decode_full_block, usingExtDict,
+ (BYTE*)dest-prefixSize, (const BYTE*)dictStart, dictSize);
+}
+
+/*===== streaming decompression functions =====*/
+
+LZ4_streamDecode_t* LZ4_createStreamDecode(void)
+{
+ LZ4_streamDecode_t* lz4s = (LZ4_streamDecode_t*) ALLOC_AND_ZERO(sizeof(LZ4_streamDecode_t));
+ LZ4_STATIC_ASSERT(LZ4_STREAMDECODESIZE >= sizeof(LZ4_streamDecode_t_internal)); /* A compilation error here means LZ4_STREAMDECODESIZE is not large enough */
+ return lz4s;
+}
+
+int LZ4_freeStreamDecode (LZ4_streamDecode_t* LZ4_stream)
+{
+ if (LZ4_stream == NULL) { return 0; } /* support free on NULL */
+ FREEMEM(LZ4_stream);
+ return 0;
+}
+
+/*! LZ4_setStreamDecode() :
+ * Use this function to instruct where to find the dictionary.
+ * This function is not necessary if previous data is still available where it was decoded.
+ * Loading a size of 0 is allowed (same effect as no dictionary).
+ * @return : 1 if OK, 0 if error
+ */
+int LZ4_setStreamDecode (LZ4_streamDecode_t* LZ4_streamDecode, const char* dictionary, int dictSize)
+{
+ LZ4_streamDecode_t_internal* lz4sd = &LZ4_streamDecode->internal_donotuse;
+ lz4sd->prefixSize = (size_t) dictSize;
+ lz4sd->prefixEnd = (const BYTE*) dictionary + dictSize;
+ lz4sd->externalDict = NULL;
+ lz4sd->extDictSize = 0;
+ return 1;
+}
+
+/*! LZ4_decoderRingBufferSize() :
+ * when setting a ring buffer for streaming decompression (optional scenario),
+ * provides the minimum size of this ring buffer
+ * to be compatible with any source respecting maxBlockSize condition.
+ * Note : in a ring buffer scenario,
+ * blocks are presumed decompressed next to each other.
+ * When not enough space remains for next block (remainingSize < maxBlockSize),
+ * decoding resumes from beginning of ring buffer.
+ * @return : minimum ring buffer size,
+ * or 0 if there is an error (invalid maxBlockSize).
+ */
+int LZ4_decoderRingBufferSize(int maxBlockSize)
+{
+ if (maxBlockSize < 0) return 0;
+ if (maxBlockSize > LZ4_MAX_INPUT_SIZE) return 0;
+ if (maxBlockSize < 16) maxBlockSize = 16;
+ return LZ4_DECODER_RING_BUFFER_SIZE(maxBlockSize);
+}
+
+/*
+*_continue() :
+ These decoding functions allow decompression of multiple blocks in "streaming" mode.
+ Previously decoded blocks must still be available at the memory position where they were decoded.
+ If it's not possible, save the relevant part of decoded data into a safe buffer,
+ and indicate where it stands using LZ4_setStreamDecode()
+*/
+LZ4_FORCE_O2
+int LZ4_decompress_safe_continue (LZ4_streamDecode_t* LZ4_streamDecode, const char* source, char* dest, int compressedSize, int maxOutputSize)
+{
+ LZ4_streamDecode_t_internal* lz4sd = &LZ4_streamDecode->internal_donotuse;
+ int result;
+
+ if (lz4sd->prefixSize == 0) {
+ /* The first call, no dictionary yet. */
+ assert(lz4sd->extDictSize == 0);
+ result = LZ4_decompress_safe(source, dest, compressedSize, maxOutputSize);
+ if (result <= 0) return result;
+ lz4sd->prefixSize = (size_t)result;
+ lz4sd->prefixEnd = (BYTE*)dest + result;
+ } else if (lz4sd->prefixEnd == (BYTE*)dest) {
+ /* They're rolling the current segment. */
+ if (lz4sd->prefixSize >= 64 KB - 1)
+ result = LZ4_decompress_safe_withPrefix64k(source, dest, compressedSize, maxOutputSize);
+ else if (lz4sd->extDictSize == 0)
+ result = LZ4_decompress_safe_withSmallPrefix(source, dest, compressedSize, maxOutputSize,
+ lz4sd->prefixSize);
+ else
+ result = LZ4_decompress_safe_doubleDict(source, dest, compressedSize, maxOutputSize,
+ lz4sd->prefixSize, lz4sd->externalDict, lz4sd->extDictSize);
+ if (result <= 0) return result;
+ lz4sd->prefixSize += (size_t)result;
+ lz4sd->prefixEnd += result;
+ } else {
+ /* The buffer wraps around, or they're switching to another buffer. */
+ lz4sd->extDictSize = lz4sd->prefixSize;
+ lz4sd->externalDict = lz4sd->prefixEnd - lz4sd->extDictSize;
+ result = LZ4_decompress_safe_forceExtDict(source, dest, compressedSize, maxOutputSize,
+ lz4sd->externalDict, lz4sd->extDictSize);
+ if (result <= 0) return result;
+ lz4sd->prefixSize = (size_t)result;
+ lz4sd->prefixEnd = (BYTE*)dest + result;
+ }
+
+ return result;
+}
+
+LZ4_FORCE_O2
+int LZ4_decompress_fast_continue (LZ4_streamDecode_t* LZ4_streamDecode, const char* source, char* dest, int originalSize)
+{
+ LZ4_streamDecode_t_internal* lz4sd = &LZ4_streamDecode->internal_donotuse;
+ int result;
+ assert(originalSize >= 0);
+
+ if (lz4sd->prefixSize == 0) {
+ assert(lz4sd->extDictSize == 0);
+ result = LZ4_decompress_fast(source, dest, originalSize);
+ if (result <= 0) return result;
+ lz4sd->prefixSize = (size_t)originalSize;
+ lz4sd->prefixEnd = (BYTE*)dest + originalSize;
+ } else if (lz4sd->prefixEnd == (BYTE*)dest) {
+ if (lz4sd->prefixSize >= 64 KB - 1 || lz4sd->extDictSize == 0)
+ result = LZ4_decompress_fast(source, dest, originalSize);
+ else
+ result = LZ4_decompress_fast_doubleDict(source, dest, originalSize,
+ lz4sd->prefixSize, lz4sd->externalDict, lz4sd->extDictSize);
+ if (result <= 0) return result;
+ lz4sd->prefixSize += (size_t)originalSize;
+ lz4sd->prefixEnd += originalSize;
+ } else {
+ lz4sd->extDictSize = lz4sd->prefixSize;
+ lz4sd->externalDict = lz4sd->prefixEnd - lz4sd->extDictSize;
+ result = LZ4_decompress_fast_extDict(source, dest, originalSize,
+ lz4sd->externalDict, lz4sd->extDictSize);
+ if (result <= 0) return result;
+ lz4sd->prefixSize = (size_t)originalSize;
+ lz4sd->prefixEnd = (BYTE*)dest + originalSize;
+ }
+
+ return result;
+}
+
+
+/*
+Advanced decoding functions :
+*_usingDict() :
+ These decoding functions work the same as "_continue" ones,
+ the dictionary must be explicitly provided within parameters
+*/
+
+int LZ4_decompress_safe_usingDict(const char* source, char* dest, int compressedSize, int maxOutputSize, const char* dictStart, int dictSize)
+{
+ if (dictSize==0)
+ return LZ4_decompress_safe(source, dest, compressedSize, maxOutputSize);
+ if (dictStart+dictSize == dest) {
+ if (dictSize >= 64 KB - 1) {
+ return LZ4_decompress_safe_withPrefix64k(source, dest, compressedSize, maxOutputSize);
+ }
+ assert(dictSize >= 0);
+ return LZ4_decompress_safe_withSmallPrefix(source, dest, compressedSize, maxOutputSize, (size_t)dictSize);
+ }
+ assert(dictSize >= 0);
+ return LZ4_decompress_safe_forceExtDict(source, dest, compressedSize, maxOutputSize, dictStart, (size_t)dictSize);
+}
+
+int LZ4_decompress_fast_usingDict(const char* source, char* dest, int originalSize, const char* dictStart, int dictSize)
+{
+ if (dictSize==0 || dictStart+dictSize == dest)
+ return LZ4_decompress_fast(source, dest, originalSize);
+ assert(dictSize >= 0);
+ return LZ4_decompress_fast_extDict(source, dest, originalSize, dictStart, (size_t)dictSize);
+}
+
+
+/*=*************************************************
+* Obsolete Functions
+***************************************************/
+/* obsolete compression functions */
+int LZ4_compress_limitedOutput(const char* source, char* dest, int inputSize, int maxOutputSize)
+{
+ return LZ4_compress_default(source, dest, inputSize, maxOutputSize);
+}
+int LZ4_compress(const char* src, char* dest, int srcSize)
+{
+ return LZ4_compress_default(src, dest, srcSize, LZ4_compressBound(srcSize));
+}
+int LZ4_compress_limitedOutput_withState (void* state, const char* src, char* dst, int srcSize, int dstSize)
+{
+ return LZ4_compress_fast_extState(state, src, dst, srcSize, dstSize, 1);
+}
+int LZ4_compress_withState (void* state, const char* src, char* dst, int srcSize)
+{
+ return LZ4_compress_fast_extState(state, src, dst, srcSize, LZ4_compressBound(srcSize), 1);
+}
+int LZ4_compress_limitedOutput_continue (LZ4_stream_t* LZ4_stream, const char* src, char* dst, int srcSize, int dstCapacity)
+{
+ return LZ4_compress_fast_continue(LZ4_stream, src, dst, srcSize, dstCapacity, 1);
+}
+int LZ4_compress_continue (LZ4_stream_t* LZ4_stream, const char* source, char* dest, int inputSize)
+{
+ return LZ4_compress_fast_continue(LZ4_stream, source, dest, inputSize, LZ4_compressBound(inputSize), 1);
+}
+
+/*
+These decompression functions are deprecated and should no longer be used.
+They are only provided here for compatibility with older user programs.
+- LZ4_uncompress is totally equivalent to LZ4_decompress_fast
+- LZ4_uncompress_unknownOutputSize is totally equivalent to LZ4_decompress_safe
+*/
+int LZ4_uncompress (const char* source, char* dest, int outputSize)
+{
+ return LZ4_decompress_fast(source, dest, outputSize);
+}
+int LZ4_uncompress_unknownOutputSize (const char* source, char* dest, int isize, int maxOutputSize)
+{
+ return LZ4_decompress_safe(source, dest, isize, maxOutputSize);
+}
+
+/* Obsolete Streaming functions */
+
+int LZ4_sizeofStreamState(void) { return LZ4_STREAMSIZE; }
+
+int LZ4_resetStreamState(void* state, char* inputBuffer)
+{
+ (void)inputBuffer;
+ LZ4_resetStream((LZ4_stream_t*)state);
+ return 0;
+}
+
+void* LZ4_create (char* inputBuffer)
+{
+ (void)inputBuffer;
+ return LZ4_createStream();
+}
+
+char* LZ4_slideInputBuffer (void* state)
+{
+ /* avoid const char * -> char * conversion warning */
+ return (char *)(uptrval)((LZ4_stream_t*)state)->internal_donotuse.dictionary;
+}
+
+#endif /* LZ4_COMMONDEFS_ONLY */
+
+}
diff --git a/3rdparty/tracy/tracy/common/tracy_lz4.hpp b/3rdparty/tracy/tracy/common/tracy_lz4.hpp
new file mode 100644
index 0000000..1ccdcff
--- /dev/null
+++ b/3rdparty/tracy/tracy/common/tracy_lz4.hpp
@@ -0,0 +1,777 @@
+/*
+ * LZ4 - Fast LZ compression algorithm
+ * Header File
+ * Copyright (C) 2011-present, Yann Collet.
+
+ BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
+
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions are
+ met:
+
+ * Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+ * Redistributions in binary form must reproduce the above
+ copyright notice, this list of conditions and the following disclaimer
+ in the documentation and/or other materials provided with the
+ distribution.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+ You can contact the author at :
+ - LZ4 homepage : http://www.lz4.org
+ - LZ4 source repository : https://github.com/lz4/lz4
+*/
+
+#ifndef TRACY_LZ4_H_2983827168210
+#define TRACY_LZ4_H_2983827168210
+
+/* --- Dependency --- */
+#include <stddef.h> /* size_t */
+#include <stdint.h>
+
+
+/**
+ Introduction
+
+ LZ4 is lossless compression algorithm, providing compression speed >500 MB/s per core,
+ scalable with multi-cores CPU. It features an extremely fast decoder, with speed in
+ multiple GB/s per core, typically reaching RAM speed limits on multi-core systems.
+
+ The LZ4 compression library provides in-memory compression and decompression functions.
+ It gives full buffer control to user.
+ Compression can be done in:
+ - a single step (described as Simple Functions)
+ - a single step, reusing a context (described in Advanced Functions)
+ - unbounded multiple steps (described as Streaming compression)
+
+ lz4.h generates and decodes LZ4-compressed blocks (doc/lz4_Block_format.md).
+ Decompressing such a compressed block requires additional metadata.
+ Exact metadata depends on exact decompression function.
+ For the typical case of LZ4_decompress_safe(),
+ metadata includes block's compressed size, and maximum bound of decompressed size.
+ Each application is free to encode and pass such metadata in whichever way it wants.
+
+ lz4.h only handle blocks, it can not generate Frames.
+
+ Blocks are different from Frames (doc/lz4_Frame_format.md).
+ Frames bundle both blocks and metadata in a specified manner.
+ Embedding metadata is required for compressed data to be self-contained and portable.
+ Frame format is delivered through a companion API, declared in lz4frame.h.
+ The `lz4` CLI can only manage frames.
+*/
+
+/*^***************************************************************
+* Export parameters
+*****************************************************************/
+/*
+* LZ4_DLL_EXPORT :
+* Enable exporting of functions when building a Windows DLL
+* LZ4LIB_VISIBILITY :
+* Control library symbols visibility.
+*/
+#ifndef LZ4LIB_VISIBILITY
+# if defined(__GNUC__) && (__GNUC__ >= 4)
+# define LZ4LIB_VISIBILITY __attribute__ ((visibility ("default")))
+# else
+# define LZ4LIB_VISIBILITY
+# endif
+#endif
+#if defined(LZ4_DLL_EXPORT) && (LZ4_DLL_EXPORT==1)
+# define LZ4LIB_API __declspec(dllexport) LZ4LIB_VISIBILITY
+#elif defined(LZ4_DLL_IMPORT) && (LZ4_DLL_IMPORT==1)
+# define LZ4LIB_API __declspec(dllimport) LZ4LIB_VISIBILITY /* It isn't required but allows to generate better code, saving a function pointer load from the IAT and an indirect jump.*/
+#else
+# define LZ4LIB_API LZ4LIB_VISIBILITY
+#endif
+
+/*------ Version ------*/
+#define LZ4_VERSION_MAJOR 1 /* for breaking interface changes */
+#define LZ4_VERSION_MINOR 9 /* for new (non-breaking) interface capabilities */
+#define LZ4_VERSION_RELEASE 3 /* for tweaks, bug-fixes, or development */
+
+#define LZ4_VERSION_NUMBER (LZ4_VERSION_MAJOR *100*100 + LZ4_VERSION_MINOR *100 + LZ4_VERSION_RELEASE)
+
+#define LZ4_LIB_VERSION LZ4_VERSION_MAJOR.LZ4_VERSION_MINOR.LZ4_VERSION_RELEASE
+#define LZ4_QUOTE(str) #str
+#define LZ4_EXPAND_AND_QUOTE(str) LZ4_QUOTE(str)
+#define LZ4_VERSION_STRING LZ4_EXPAND_AND_QUOTE(LZ4_LIB_VERSION)
+
+namespace tracy
+{
+
+LZ4LIB_API int LZ4_versionNumber (void); /**< library version number; useful to check dll version */
+LZ4LIB_API const char* LZ4_versionString (void); /**< library version string; useful to check dll version */
+
+
+/*-************************************
+* Tuning parameter
+**************************************/
+/*!
+ * LZ4_MEMORY_USAGE :
+ * Memory usage formula : N->2^N Bytes (examples : 10 -> 1KB; 12 -> 4KB ; 16 -> 64KB; 20 -> 1MB; etc.)
+ * Increasing memory usage improves compression ratio.
+ * Reduced memory usage may improve speed, thanks to better cache locality.
+ * Default value is 14, for 16KB, which nicely fits into Intel x86 L1 cache
+ */
+#ifndef LZ4_MEMORY_USAGE
+# define LZ4_MEMORY_USAGE 14
+#endif
+
+
+/*-************************************
+* Simple Functions
+**************************************/
+/*! LZ4_compress_default() :
+ * Compresses 'srcSize' bytes from buffer 'src'
+ * into already allocated 'dst' buffer of size 'dstCapacity'.
+ * Compression is guaranteed to succeed if 'dstCapacity' >= LZ4_compressBound(srcSize).
+ * It also runs faster, so it's a recommended setting.
+ * If the function cannot compress 'src' into a more limited 'dst' budget,
+ * compression stops *immediately*, and the function result is zero.
+ * In which case, 'dst' content is undefined (invalid).
+ * srcSize : max supported value is LZ4_MAX_INPUT_SIZE.
+ * dstCapacity : size of buffer 'dst' (which must be already allocated)
+ * @return : the number of bytes written into buffer 'dst' (necessarily <= dstCapacity)
+ * or 0 if compression fails
+ * Note : This function is protected against buffer overflow scenarios (never writes outside 'dst' buffer, nor read outside 'source' buffer).
+ */
+LZ4LIB_API int LZ4_compress_default(const char* src, char* dst, int srcSize, int dstCapacity);
+
+/*! LZ4_decompress_safe() :
+ * compressedSize : is the exact complete size of the compressed block.
+ * dstCapacity : is the size of destination buffer (which must be already allocated), presumed an upper bound of decompressed size.
+ * @return : the number of bytes decompressed into destination buffer (necessarily <= dstCapacity)
+ * If destination buffer is not large enough, decoding will stop and output an error code (negative value).
+ * If the source stream is detected malformed, the function will stop decoding and return a negative result.
+ * Note 1 : This function is protected against malicious data packets :
+ * it will never writes outside 'dst' buffer, nor read outside 'source' buffer,
+ * even if the compressed block is maliciously modified to order the decoder to do these actions.
+ * In such case, the decoder stops immediately, and considers the compressed block malformed.
+ * Note 2 : compressedSize and dstCapacity must be provided to the function, the compressed block does not contain them.
+ * The implementation is free to send / store / derive this information in whichever way is most beneficial.
+ * If there is a need for a different format which bundles together both compressed data and its metadata, consider looking at lz4frame.h instead.
+ */
+LZ4LIB_API int LZ4_decompress_safe (const char* src, char* dst, int compressedSize, int dstCapacity);
+
+
+/*-************************************
+* Advanced Functions
+**************************************/
+#define LZ4_MAX_INPUT_SIZE 0x7E000000 /* 2 113 929 216 bytes */
+#define LZ4_COMPRESSBOUND(isize) ((unsigned)(isize) > (unsigned)LZ4_MAX_INPUT_SIZE ? 0 : (isize) + ((isize)/255) + 16)
+
+/*! LZ4_compressBound() :
+ Provides the maximum size that LZ4 compression may output in a "worst case" scenario (input data not compressible)
+ This function is primarily useful for memory allocation purposes (destination buffer size).
+ Macro LZ4_COMPRESSBOUND() is also provided for compilation-time evaluation (stack memory allocation for example).
+ Note that LZ4_compress_default() compresses faster when dstCapacity is >= LZ4_compressBound(srcSize)
+ inputSize : max supported value is LZ4_MAX_INPUT_SIZE
+ return : maximum output size in a "worst case" scenario
+ or 0, if input size is incorrect (too large or negative)
+*/
+LZ4LIB_API int LZ4_compressBound(int inputSize);
+
+/*! LZ4_compress_fast() :
+ Same as LZ4_compress_default(), but allows selection of "acceleration" factor.
+ The larger the acceleration value, the faster the algorithm, but also the lesser the compression.
+ It's a trade-off. It can be fine tuned, with each successive value providing roughly +~3% to speed.
+ An acceleration value of "1" is the same as regular LZ4_compress_default()
+ Values <= 0 will be replaced by LZ4_ACCELERATION_DEFAULT (currently == 1, see lz4.c).
+ Values > LZ4_ACCELERATION_MAX will be replaced by LZ4_ACCELERATION_MAX (currently == 65537, see lz4.c).
+*/
+LZ4LIB_API int LZ4_compress_fast (const char* src, char* dst, int srcSize, int dstCapacity, int acceleration);
+
+
+/*! LZ4_compress_fast_extState() :
+ * Same as LZ4_compress_fast(), using an externally allocated memory space for its state.
+ * Use LZ4_sizeofState() to know how much memory must be allocated,
+ * and allocate it on 8-bytes boundaries (using `malloc()` typically).
+ * Then, provide this buffer as `void* state` to compression function.
+ */
+LZ4LIB_API int LZ4_sizeofState(void);
+LZ4LIB_API int LZ4_compress_fast_extState (void* state, const char* src, char* dst, int srcSize, int dstCapacity, int acceleration);
+
+
+/*! LZ4_compress_destSize() :
+ * Reverse the logic : compresses as much data as possible from 'src' buffer
+ * into already allocated buffer 'dst', of size >= 'targetDestSize'.
+ * This function either compresses the entire 'src' content into 'dst' if it's large enough,
+ * or fill 'dst' buffer completely with as much data as possible from 'src'.
+ * note: acceleration parameter is fixed to "default".
+ *
+ * *srcSizePtr : will be modified to indicate how many bytes where read from 'src' to fill 'dst'.
+ * New value is necessarily <= input value.
+ * @return : Nb bytes written into 'dst' (necessarily <= targetDestSize)
+ * or 0 if compression fails.
+ *
+ * Note : from v1.8.2 to v1.9.1, this function had a bug (fixed un v1.9.2+):
+ * the produced compressed content could, in specific circumstances,
+ * require to be decompressed into a destination buffer larger
+ * by at least 1 byte than the content to decompress.
+ * If an application uses `LZ4_compress_destSize()`,
+ * it's highly recommended to update liblz4 to v1.9.2 or better.
+ * If this can't be done or ensured,
+ * the receiving decompression function should provide
+ * a dstCapacity which is > decompressedSize, by at least 1 byte.
+ * See https://github.com/lz4/lz4/issues/859 for details
+ */
+LZ4LIB_API int LZ4_compress_destSize (const char* src, char* dst, int* srcSizePtr, int targetDstSize);
+
+
+/*! LZ4_decompress_safe_partial() :
+ * Decompress an LZ4 compressed block, of size 'srcSize' at position 'src',
+ * into destination buffer 'dst' of size 'dstCapacity'.
+ * Up to 'targetOutputSize' bytes will be decoded.
+ * The function stops decoding on reaching this objective.
+ * This can be useful to boost performance
+ * whenever only the beginning of a block is required.
+ *
+ * @return : the number of bytes decoded in `dst` (necessarily <= targetOutputSize)
+ * If source stream is detected malformed, function returns a negative result.
+ *
+ * Note 1 : @return can be < targetOutputSize, if compressed block contains less data.
+ *
+ * Note 2 : targetOutputSize must be <= dstCapacity
+ *
+ * Note 3 : this function effectively stops decoding on reaching targetOutputSize,
+ * so dstCapacity is kind of redundant.
+ * This is because in older versions of this function,
+ * decoding operation would still write complete sequences.
+ * Therefore, there was no guarantee that it would stop writing at exactly targetOutputSize,
+ * it could write more bytes, though only up to dstCapacity.
+ * Some "margin" used to be required for this operation to work properly.
+ * Thankfully, this is no longer necessary.
+ * The function nonetheless keeps the same signature, in an effort to preserve API compatibility.
+ *
+ * Note 4 : If srcSize is the exact size of the block,
+ * then targetOutputSize can be any value,
+ * including larger than the block's decompressed size.
+ * The function will, at most, generate block's decompressed size.
+ *
+ * Note 5 : If srcSize is _larger_ than block's compressed size,
+ * then targetOutputSize **MUST** be <= block's decompressed size.
+ * Otherwise, *silent corruption will occur*.
+ */
+LZ4LIB_API int LZ4_decompress_safe_partial (const char* src, char* dst, int srcSize, int targetOutputSize, int dstCapacity);
+
+
+/*-*********************************************
+* Streaming Compression Functions
+***********************************************/
+typedef union LZ4_stream_u LZ4_stream_t; /* incomplete type (defined later) */
+
+LZ4LIB_API LZ4_stream_t* LZ4_createStream(void);
+LZ4LIB_API int LZ4_freeStream (LZ4_stream_t* streamPtr);
+
+/*! LZ4_resetStream_fast() : v1.9.0+
+ * Use this to prepare an LZ4_stream_t for a new chain of dependent blocks
+ * (e.g., LZ4_compress_fast_continue()).
+ *
+ * An LZ4_stream_t must be initialized once before usage.
+ * This is automatically done when created by LZ4_createStream().
+ * However, should the LZ4_stream_t be simply declared on stack (for example),
+ * it's necessary to initialize it first, using LZ4_initStream().
+ *
+ * After init, start any new stream with LZ4_resetStream_fast().
+ * A same LZ4_stream_t can be re-used multiple times consecutively
+ * and compress multiple streams,
+ * provided that it starts each new stream with LZ4_resetStream_fast().
+ *
+ * LZ4_resetStream_fast() is much faster than LZ4_initStream(),
+ * but is not compatible with memory regions containing garbage data.
+ *
+ * Note: it's only useful to call LZ4_resetStream_fast()
+ * in the context of streaming compression.
+ * The *extState* functions perform their own resets.
+ * Invoking LZ4_resetStream_fast() before is redundant, and even counterproductive.
+ */
+LZ4LIB_API void LZ4_resetStream_fast (LZ4_stream_t* streamPtr);
+
+/*! LZ4_loadDict() :
+ * Use this function to reference a static dictionary into LZ4_stream_t.
+ * The dictionary must remain available during compression.
+ * LZ4_loadDict() triggers a reset, so any previous data will be forgotten.
+ * The same dictionary will have to be loaded on decompression side for successful decoding.
+ * Dictionary are useful for better compression of small data (KB range).
+ * While LZ4 accept any input as dictionary,
+ * results are generally better when using Zstandard's Dictionary Builder.
+ * Loading a size of 0 is allowed, and is the same as reset.
+ * @return : loaded dictionary size, in bytes (necessarily <= 64 KB)
+ */
+LZ4LIB_API int LZ4_loadDict (LZ4_stream_t* streamPtr, const char* dictionary, int dictSize);
+
+/*! LZ4_compress_fast_continue() :
+ * Compress 'src' content using data from previously compressed blocks, for better compression ratio.
+ * 'dst' buffer must be already allocated.
+ * If dstCapacity >= LZ4_compressBound(srcSize), compression is guaranteed to succeed, and runs faster.
+ *
+ * @return : size of compressed block
+ * or 0 if there is an error (typically, cannot fit into 'dst').
+ *
+ * Note 1 : Each invocation to LZ4_compress_fast_continue() generates a new block.
+ * Each block has precise boundaries.
+ * Each block must be decompressed separately, calling LZ4_decompress_*() with relevant metadata.
+ * It's not possible to append blocks together and expect a single invocation of LZ4_decompress_*() to decompress them together.
+ *
+ * Note 2 : The previous 64KB of source data is __assumed__ to remain present, unmodified, at same address in memory !
+ *
+ * Note 3 : When input is structured as a double-buffer, each buffer can have any size, including < 64 KB.
+ * Make sure that buffers are separated, by at least one byte.
+ * This construction ensures that each block only depends on previous block.
+ *
+ * Note 4 : If input buffer is a ring-buffer, it can have any size, including < 64 KB.
+ *
+ * Note 5 : After an error, the stream status is undefined (invalid), it can only be reset or freed.
+ */
+LZ4LIB_API int LZ4_compress_fast_continue (LZ4_stream_t* streamPtr, const char* src, char* dst, int srcSize, int dstCapacity, int acceleration);
+
+/*! LZ4_saveDict() :
+ * If last 64KB data cannot be guaranteed to remain available at its current memory location,
+ * save it into a safer place (char* safeBuffer).
+ * This is schematically equivalent to a memcpy() followed by LZ4_loadDict(),
+ * but is much faster, because LZ4_saveDict() doesn't need to rebuild tables.
+ * @return : saved dictionary size in bytes (necessarily <= maxDictSize), or 0 if error.
+ */
+LZ4LIB_API int LZ4_saveDict (LZ4_stream_t* streamPtr, char* safeBuffer, int maxDictSize);
+
+
+/*-**********************************************
+* Streaming Decompression Functions
+* Bufferless synchronous API
+************************************************/
+typedef union LZ4_streamDecode_u LZ4_streamDecode_t; /* tracking context */
+
+/*! LZ4_createStreamDecode() and LZ4_freeStreamDecode() :
+ * creation / destruction of streaming decompression tracking context.
+ * A tracking context can be re-used multiple times.
+ */
+LZ4LIB_API LZ4_streamDecode_t* LZ4_createStreamDecode(void);
+LZ4LIB_API int LZ4_freeStreamDecode (LZ4_streamDecode_t* LZ4_stream);
+
+/*! LZ4_setStreamDecode() :
+ * An LZ4_streamDecode_t context can be allocated once and re-used multiple times.
+ * Use this function to start decompression of a new stream of blocks.
+ * A dictionary can optionally be set. Use NULL or size 0 for a reset order.
+ * Dictionary is presumed stable : it must remain accessible and unmodified during next decompression.
+ * @return : 1 if OK, 0 if error
+ */
+LZ4LIB_API int LZ4_setStreamDecode (LZ4_streamDecode_t* LZ4_streamDecode, const char* dictionary, int dictSize);
+
+/*! LZ4_decoderRingBufferSize() : v1.8.2+
+ * Note : in a ring buffer scenario (optional),
+ * blocks are presumed decompressed next to each other
+ * up to the moment there is not enough remaining space for next block (remainingSize < maxBlockSize),
+ * at which stage it resumes from beginning of ring buffer.
+ * When setting such a ring buffer for streaming decompression,
+ * provides the minimum size of this ring buffer
+ * to be compatible with any source respecting maxBlockSize condition.
+ * @return : minimum ring buffer size,
+ * or 0 if there is an error (invalid maxBlockSize).
+ */
+LZ4LIB_API int LZ4_decoderRingBufferSize(int maxBlockSize);
+#define LZ4_DECODER_RING_BUFFER_SIZE(maxBlockSize) (65536 + 14 + (maxBlockSize)) /* for static allocation; maxBlockSize presumed valid */
+
+/*! LZ4_decompress_*_continue() :
+ * These decoding functions allow decompression of consecutive blocks in "streaming" mode.
+ * A block is an unsplittable entity, it must be presented entirely to a decompression function.
+ * Decompression functions only accepts one block at a time.
+ * The last 64KB of previously decoded data *must* remain available and unmodified at the memory position where they were decoded.
+ * If less than 64KB of data has been decoded, all the data must be present.
+ *
+ * Special : if decompression side sets a ring buffer, it must respect one of the following conditions :
+ * - Decompression buffer size is _at least_ LZ4_decoderRingBufferSize(maxBlockSize).
+ * maxBlockSize is the maximum size of any single block. It can have any value > 16 bytes.
+ * In which case, encoding and decoding buffers do not need to be synchronized.
+ * Actually, data can be produced by any source compliant with LZ4 format specification, and respecting maxBlockSize.
+ * - Synchronized mode :
+ * Decompression buffer size is _exactly_ the same as compression buffer size,
+ * and follows exactly same update rule (block boundaries at same positions),
+ * and decoding function is provided with exact decompressed size of each block (exception for last block of the stream),
+ * _then_ decoding & encoding ring buffer can have any size, including small ones ( < 64 KB).
+ * - Decompression buffer is larger than encoding buffer, by a minimum of maxBlockSize more bytes.
+ * In which case, encoding and decoding buffers do not need to be synchronized,
+ * and encoding ring buffer can have any size, including small ones ( < 64 KB).
+ *
+ * Whenever these conditions are not possible,
+ * save the last 64KB of decoded data into a safe buffer where it can't be modified during decompression,
+ * then indicate where this data is saved using LZ4_setStreamDecode(), before decompressing next block.
+*/
+LZ4LIB_API int LZ4_decompress_safe_continue (LZ4_streamDecode_t* LZ4_streamDecode, const char* src, char* dst, int srcSize, int dstCapacity);
+
+
+/*! LZ4_decompress_*_usingDict() :
+ * These decoding functions work the same as
+ * a combination of LZ4_setStreamDecode() followed by LZ4_decompress_*_continue()
+ * They are stand-alone, and don't need an LZ4_streamDecode_t structure.
+ * Dictionary is presumed stable : it must remain accessible and unmodified during decompression.
+ * Performance tip : Decompression speed can be substantially increased
+ * when dst == dictStart + dictSize.
+ */
+LZ4LIB_API int LZ4_decompress_safe_usingDict (const char* src, char* dst, int srcSize, int dstCapcity, const char* dictStart, int dictSize);
+
+}
+
+#endif /* LZ4_H_2983827168210 */
+
+
+/*^*************************************
+ * !!!!!! STATIC LINKING ONLY !!!!!!
+ ***************************************/
+
+/*-****************************************************************************
+ * Experimental section
+ *
+ * Symbols declared in this section must be considered unstable. Their
+ * signatures or semantics may change, or they may be removed altogether in the
+ * future. They are therefore only safe to depend on when the caller is
+ * statically linked against the library.
+ *
+ * To protect against unsafe usage, not only are the declarations guarded,
+ * the definitions are hidden by default
+ * when building LZ4 as a shared/dynamic library.
+ *
+ * In order to access these declarations,
+ * define LZ4_STATIC_LINKING_ONLY in your application
+ * before including LZ4's headers.
+ *
+ * In order to make their implementations accessible dynamically, you must
+ * define LZ4_PUBLISH_STATIC_FUNCTIONS when building the LZ4 library.
+ ******************************************************************************/
+
+#ifdef LZ4_STATIC_LINKING_ONLY
+
+#ifndef TRACY_LZ4_STATIC_3504398509
+#define TRACY_LZ4_STATIC_3504398509
+
+#ifdef LZ4_PUBLISH_STATIC_FUNCTIONS
+#define LZ4LIB_STATIC_API LZ4LIB_API
+#else
+#define LZ4LIB_STATIC_API
+#endif
+
+namespace tracy
+{
+
+/*! LZ4_compress_fast_extState_fastReset() :
+ * A variant of LZ4_compress_fast_extState().
+ *
+ * Using this variant avoids an expensive initialization step.
+ * It is only safe to call if the state buffer is known to be correctly initialized already
+ * (see above comment on LZ4_resetStream_fast() for a definition of "correctly initialized").
+ * From a high level, the difference is that
+ * this function initializes the provided state with a call to something like LZ4_resetStream_fast()
+ * while LZ4_compress_fast_extState() starts with a call to LZ4_resetStream().
+ */
+LZ4LIB_STATIC_API int LZ4_compress_fast_extState_fastReset (void* state, const char* src, char* dst, int srcSize, int dstCapacity, int acceleration);
+
+/*! LZ4_attach_dictionary() :
+ * This is an experimental API that allows
+ * efficient use of a static dictionary many times.
+ *
+ * Rather than re-loading the dictionary buffer into a working context before
+ * each compression, or copying a pre-loaded dictionary's LZ4_stream_t into a
+ * working LZ4_stream_t, this function introduces a no-copy setup mechanism,
+ * in which the working stream references the dictionary stream in-place.
+ *
+ * Several assumptions are made about the state of the dictionary stream.
+ * Currently, only streams which have been prepared by LZ4_loadDict() should
+ * be expected to work.
+ *
+ * Alternatively, the provided dictionaryStream may be NULL,
+ * in which case any existing dictionary stream is unset.
+ *
+ * If a dictionary is provided, it replaces any pre-existing stream history.
+ * The dictionary contents are the only history that can be referenced and
+ * logically immediately precede the data compressed in the first subsequent
+ * compression call.
+ *
+ * The dictionary will only remain attached to the working stream through the
+ * first compression call, at the end of which it is cleared. The dictionary
+ * stream (and source buffer) must remain in-place / accessible / unchanged
+ * through the completion of the first compression call on the stream.
+ */
+LZ4LIB_STATIC_API void LZ4_attach_dictionary(LZ4_stream_t* workingStream, const LZ4_stream_t* dictionaryStream);
+
+
+/*! In-place compression and decompression
+ *
+ * It's possible to have input and output sharing the same buffer,
+ * for highly contrained memory environments.
+ * In both cases, it requires input to lay at the end of the buffer,
+ * and decompression to start at beginning of the buffer.
+ * Buffer size must feature some margin, hence be larger than final size.
+ *
+ * |<------------------------buffer--------------------------------->|
+ * |<-----------compressed data--------->|
+ * |<-----------decompressed size------------------>|
+ * |<----margin---->|
+ *
+ * This technique is more useful for decompression,
+ * since decompressed size is typically larger,
+ * and margin is short.
+ *
+ * In-place decompression will work inside any buffer
+ * which size is >= LZ4_DECOMPRESS_INPLACE_BUFFER_SIZE(decompressedSize).
+ * This presumes that decompressedSize > compressedSize.
+ * Otherwise, it means compression actually expanded data,
+ * and it would be more efficient to store such data with a flag indicating it's not compressed.
+ * This can happen when data is not compressible (already compressed, or encrypted).
+ *
+ * For in-place compression, margin is larger, as it must be able to cope with both
+ * history preservation, requiring input data to remain unmodified up to LZ4_DISTANCE_MAX,
+ * and data expansion, which can happen when input is not compressible.
+ * As a consequence, buffer size requirements are much higher,
+ * and memory savings offered by in-place compression are more limited.
+ *
+ * There are ways to limit this cost for compression :
+ * - Reduce history size, by modifying LZ4_DISTANCE_MAX.
+ * Note that it is a compile-time constant, so all compressions will apply this limit.
+ * Lower values will reduce compression ratio, except when input_size < LZ4_DISTANCE_MAX,
+ * so it's a reasonable trick when inputs are known to be small.
+ * - Require the compressor to deliver a "maximum compressed size".
+ * This is the `dstCapacity` parameter in `LZ4_compress*()`.
+ * When this size is < LZ4_COMPRESSBOUND(inputSize), then compression can fail,
+ * in which case, the return code will be 0 (zero).
+ * The caller must be ready for these cases to happen,
+ * and typically design a backup scheme to send data uncompressed.
+ * The combination of both techniques can significantly reduce
+ * the amount of margin required for in-place compression.
+ *
+ * In-place compression can work in any buffer
+ * which size is >= (maxCompressedSize)
+ * with maxCompressedSize == LZ4_COMPRESSBOUND(srcSize) for guaranteed compression success.
+ * LZ4_COMPRESS_INPLACE_BUFFER_SIZE() depends on both maxCompressedSize and LZ4_DISTANCE_MAX,
+ * so it's possible to reduce memory requirements by playing with them.
+ */
+
+#define LZ4_DECOMPRESS_INPLACE_MARGIN(compressedSize) (((compressedSize) >> 8) + 32)
+#define LZ4_DECOMPRESS_INPLACE_BUFFER_SIZE(decompressedSize) ((decompressedSize) + LZ4_DECOMPRESS_INPLACE_MARGIN(decompressedSize)) /**< note: presumes that compressedSize < decompressedSize. note2: margin is overestimated a bit, since it could use compressedSize instead */
+
+#ifndef LZ4_DISTANCE_MAX /* history window size; can be user-defined at compile time */
+# define LZ4_DISTANCE_MAX 65535 /* set to maximum value by default */
+#endif
+
+#define LZ4_COMPRESS_INPLACE_MARGIN (LZ4_DISTANCE_MAX + 32) /* LZ4_DISTANCE_MAX can be safely replaced by srcSize when it's smaller */
+#define LZ4_COMPRESS_INPLACE_BUFFER_SIZE(maxCompressedSize) ((maxCompressedSize) + LZ4_COMPRESS_INPLACE_MARGIN) /**< maxCompressedSize is generally LZ4_COMPRESSBOUND(inputSize), but can be set to any lower value, with the risk that compression can fail (return code 0(zero)) */
+
+}
+
+#endif /* LZ4_STATIC_3504398509 */
+#endif /* LZ4_STATIC_LINKING_ONLY */
+
+#ifndef TRACY_LZ4_H_98237428734687
+#define TRACY_LZ4_H_98237428734687
+
+namespace tracy
+{
+
+/*-************************************************************
+ * Private Definitions
+ **************************************************************
+ * Do not use these definitions directly.
+ * They are only exposed to allow static allocation of `LZ4_stream_t` and `LZ4_streamDecode_t`.
+ * Accessing members will expose user code to API and/or ABI break in future versions of the library.
+ **************************************************************/
+#define LZ4_HASHLOG (LZ4_MEMORY_USAGE-2)
+#define LZ4_HASHTABLESIZE (1 << LZ4_MEMORY_USAGE)
+#define LZ4_HASH_SIZE_U32 (1 << LZ4_HASHLOG) /* required as macro for static allocation */
+
+#if defined(__cplusplus) || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */)
+ typedef int8_t LZ4_i8;
+ typedef uint8_t LZ4_byte;
+ typedef uint16_t LZ4_u16;
+ typedef uint32_t LZ4_u32;
+#else
+ typedef signed char LZ4_i8;
+ typedef unsigned char LZ4_byte;
+ typedef unsigned short LZ4_u16;
+ typedef unsigned int LZ4_u32;
+#endif
+
+typedef struct LZ4_stream_t_internal LZ4_stream_t_internal;
+struct LZ4_stream_t_internal {
+ LZ4_u32 hashTable[LZ4_HASH_SIZE_U32];
+ LZ4_u32 currentOffset;
+ LZ4_u32 tableType;
+ const LZ4_byte* dictionary;
+ const LZ4_stream_t_internal* dictCtx;
+ LZ4_u32 dictSize;
+};
+
+typedef struct {
+ const LZ4_byte* externalDict;
+ size_t extDictSize;
+ const LZ4_byte* prefixEnd;
+ size_t prefixSize;
+} LZ4_streamDecode_t_internal;
+
+
+/*! LZ4_stream_t :
+ * Do not use below internal definitions directly !
+ * Declare or allocate an LZ4_stream_t instead.
+ * LZ4_stream_t can also be created using LZ4_createStream(), which is recommended.
+ * The structure definition can be convenient for static allocation
+ * (on stack, or as part of larger structure).
+ * Init this structure with LZ4_initStream() before first use.
+ * note : only use this definition in association with static linking !
+ * this definition is not API/ABI safe, and may change in future versions.
+ */
+#define LZ4_STREAMSIZE 16416 /* static size, for inter-version compatibility */
+#define LZ4_STREAMSIZE_VOIDP (LZ4_STREAMSIZE / sizeof(void*))
+union LZ4_stream_u {
+ void* table[LZ4_STREAMSIZE_VOIDP];
+ LZ4_stream_t_internal internal_donotuse;
+}; /* previously typedef'd to LZ4_stream_t */
+
+
+/*! LZ4_initStream() : v1.9.0+
+ * An LZ4_stream_t structure must be initialized at least once.
+ * This is automatically done when invoking LZ4_createStream(),
+ * but it's not when the structure is simply declared on stack (for example).
+ *
+ * Use LZ4_initStream() to properly initialize a newly declared LZ4_stream_t.
+ * It can also initialize any arbitrary buffer of sufficient size,
+ * and will @return a pointer of proper type upon initialization.
+ *
+ * Note : initialization fails if size and alignment conditions are not respected.
+ * In which case, the function will @return NULL.
+ * Note2: An LZ4_stream_t structure guarantees correct alignment and size.
+ * Note3: Before v1.9.0, use LZ4_resetStream() instead
+ */
+LZ4LIB_API LZ4_stream_t* LZ4_initStream (void* buffer, size_t size);
+
+
+/*! LZ4_streamDecode_t :
+ * information structure to track an LZ4 stream during decompression.
+ * init this structure using LZ4_setStreamDecode() before first use.
+ * note : only use in association with static linking !
+ * this definition is not API/ABI safe,
+ * and may change in a future version !
+ */
+#define LZ4_STREAMDECODESIZE_U64 (4 + ((sizeof(void*)==16) ? 2 : 0) /*AS-400*/ )
+#define LZ4_STREAMDECODESIZE (LZ4_STREAMDECODESIZE_U64 * sizeof(unsigned long long))
+union LZ4_streamDecode_u {
+ unsigned long long table[LZ4_STREAMDECODESIZE_U64];
+ LZ4_streamDecode_t_internal internal_donotuse;
+} ; /* previously typedef'd to LZ4_streamDecode_t */
+
+
+
+/*-************************************
+* Obsolete Functions
+**************************************/
+
+/*! Deprecation warnings
+ *
+ * Deprecated functions make the compiler generate a warning when invoked.
+ * This is meant to invite users to update their source code.
+ * Should deprecation warnings be a problem, it is generally possible to disable them,
+ * typically with -Wno-deprecated-declarations for gcc
+ * or _CRT_SECURE_NO_WARNINGS in Visual.
+ *
+ * Another method is to define LZ4_DISABLE_DEPRECATE_WARNINGS
+ * before including the header file.
+ */
+#ifdef LZ4_DISABLE_DEPRECATE_WARNINGS
+# define LZ4_DEPRECATED(message) /* disable deprecation warnings */
+#else
+# if defined (__cplusplus) && (__cplusplus >= 201402) /* C++14 or greater */
+# define LZ4_DEPRECATED(message) [[deprecated(message)]]
+# elif defined(_MSC_VER)
+# define LZ4_DEPRECATED(message) __declspec(deprecated(message))
+# elif defined(__clang__) || (defined(__GNUC__) && (__GNUC__ * 10 + __GNUC_MINOR__ >= 45))
+# define LZ4_DEPRECATED(message) __attribute__((deprecated(message)))
+# elif defined(__GNUC__) && (__GNUC__ * 10 + __GNUC_MINOR__ >= 31)
+# define LZ4_DEPRECATED(message) __attribute__((deprecated))
+# else
+# pragma message("WARNING: LZ4_DEPRECATED needs custom implementation for this compiler")
+# define LZ4_DEPRECATED(message) /* disabled */
+# endif
+#endif /* LZ4_DISABLE_DEPRECATE_WARNINGS */
+
+/*! Obsolete compression functions (since v1.7.3) */
+LZ4_DEPRECATED("use LZ4_compress_default() instead") LZ4LIB_API int LZ4_compress (const char* src, char* dest, int srcSize);
+LZ4_DEPRECATED("use LZ4_compress_default() instead") LZ4LIB_API int LZ4_compress_limitedOutput (const char* src, char* dest, int srcSize, int maxOutputSize);
+LZ4_DEPRECATED("use LZ4_compress_fast_extState() instead") LZ4LIB_API int LZ4_compress_withState (void* state, const char* source, char* dest, int inputSize);
+LZ4_DEPRECATED("use LZ4_compress_fast_extState() instead") LZ4LIB_API int LZ4_compress_limitedOutput_withState (void* state, const char* source, char* dest, int inputSize, int maxOutputSize);
+LZ4_DEPRECATED("use LZ4_compress_fast_continue() instead") LZ4LIB_API int LZ4_compress_continue (LZ4_stream_t* LZ4_streamPtr, const char* source, char* dest, int inputSize);
+LZ4_DEPRECATED("use LZ4_compress_fast_continue() instead") LZ4LIB_API int LZ4_compress_limitedOutput_continue (LZ4_stream_t* LZ4_streamPtr, const char* source, char* dest, int inputSize, int maxOutputSize);
+
+/*! Obsolete decompression functions (since v1.8.0) */
+LZ4_DEPRECATED("use LZ4_decompress_fast() instead") LZ4LIB_API int LZ4_uncompress (const char* source, char* dest, int outputSize);
+LZ4_DEPRECATED("use LZ4_decompress_safe() instead") LZ4LIB_API int LZ4_uncompress_unknownOutputSize (const char* source, char* dest, int isize, int maxOutputSize);
+
+/* Obsolete streaming functions (since v1.7.0)
+ * degraded functionality; do not use!
+ *
+ * In order to perform streaming compression, these functions depended on data
+ * that is no longer tracked in the state. They have been preserved as well as
+ * possible: using them will still produce a correct output. However, they don't
+ * actually retain any history between compression calls. The compression ratio
+ * achieved will therefore be no better than compressing each chunk
+ * independently.
+ */
+LZ4_DEPRECATED("Use LZ4_createStream() instead") LZ4LIB_API void* LZ4_create (char* inputBuffer);
+LZ4_DEPRECATED("Use LZ4_createStream() instead") LZ4LIB_API int LZ4_sizeofStreamState(void);
+LZ4_DEPRECATED("Use LZ4_resetStream() instead") LZ4LIB_API int LZ4_resetStreamState(void* state, char* inputBuffer);
+LZ4_DEPRECATED("Use LZ4_saveDict() instead") LZ4LIB_API char* LZ4_slideInputBuffer (void* state);
+
+/*! Obsolete streaming decoding functions (since v1.7.0) */
+LZ4_DEPRECATED("use LZ4_decompress_safe_usingDict() instead") LZ4LIB_API int LZ4_decompress_safe_withPrefix64k (const char* src, char* dst, int compressedSize, int maxDstSize);
+LZ4_DEPRECATED("use LZ4_decompress_fast_usingDict() instead") LZ4LIB_API int LZ4_decompress_fast_withPrefix64k (const char* src, char* dst, int originalSize);
+
+/*! Obsolete LZ4_decompress_fast variants (since v1.9.0) :
+ * These functions used to be faster than LZ4_decompress_safe(),
+ * but this is no longer the case. They are now slower.
+ * This is because LZ4_decompress_fast() doesn't know the input size,
+ * and therefore must progress more cautiously into the input buffer to not read beyond the end of block.
+ * On top of that `LZ4_decompress_fast()` is not protected vs malformed or malicious inputs, making it a security liability.
+ * As a consequence, LZ4_decompress_fast() is strongly discouraged, and deprecated.
+ *
+ * The last remaining LZ4_decompress_fast() specificity is that
+ * it can decompress a block without knowing its compressed size.
+ * Such functionality can be achieved in a more secure manner
+ * by employing LZ4_decompress_safe_partial().
+ *
+ * Parameters:
+ * originalSize : is the uncompressed size to regenerate.
+ * `dst` must be already allocated, its size must be >= 'originalSize' bytes.
+ * @return : number of bytes read from source buffer (== compressed size).
+ * The function expects to finish at block's end exactly.
+ * If the source stream is detected malformed, the function stops decoding and returns a negative result.
+ * note : LZ4_decompress_fast*() requires originalSize. Thanks to this information, it never writes past the output buffer.
+ * However, since it doesn't know its 'src' size, it may read an unknown amount of input, past input buffer bounds.
+ * Also, since match offsets are not validated, match reads from 'src' may underflow too.
+ * These issues never happen if input (compressed) data is correct.
+ * But they may happen if input data is invalid (error or intentional tampering).
+ * As a consequence, use these functions in trusted environments with trusted data **only**.
+ */
+LZ4_DEPRECATED("This function is deprecated and unsafe. Consider using LZ4_decompress_safe() instead")
+LZ4LIB_API int LZ4_decompress_fast (const char* src, char* dst, int originalSize);
+LZ4_DEPRECATED("This function is deprecated and unsafe. Consider using LZ4_decompress_safe_continue() instead")
+LZ4LIB_API int LZ4_decompress_fast_continue (LZ4_streamDecode_t* LZ4_streamDecode, const char* src, char* dst, int originalSize);
+LZ4_DEPRECATED("This function is deprecated and unsafe. Consider using LZ4_decompress_safe_usingDict() instead")
+LZ4LIB_API int LZ4_decompress_fast_usingDict (const char* src, char* dst, int originalSize, const char* dictStart, int dictSize);
+
+/*! LZ4_resetStream() :
+ * An LZ4_stream_t structure must be initialized at least once.
+ * This is done with LZ4_initStream(), or LZ4_resetStream().
+ * Consider switching to LZ4_initStream(),
+ * invoking LZ4_resetStream() will trigger deprecation warnings in the future.
+ */
+LZ4LIB_API void LZ4_resetStream (LZ4_stream_t* streamPtr);
+
+}
+
+#endif /* LZ4_H_98237428734687 */
diff --git a/3rdparty/tracy/tracy/common/tracy_lz4hc.cpp b/3rdparty/tracy/tracy/common/tracy_lz4hc.cpp
new file mode 100644
index 0000000..9c899b8
--- /dev/null
+++ b/3rdparty/tracy/tracy/common/tracy_lz4hc.cpp
@@ -0,0 +1,1620 @@
+/*
+ LZ4 HC - High Compression Mode of LZ4
+ Copyright (C) 2011-2017, Yann Collet.
+
+ BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
+
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions are
+ met:
+
+ * Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+ * Redistributions in binary form must reproduce the above
+ copyright notice, this list of conditions and the following disclaimer
+ in the documentation and/or other materials provided with the
+ distribution.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+ You can contact the author at :
+ - LZ4 source repository : https://github.com/lz4/lz4
+ - LZ4 public forum : https://groups.google.com/forum/#!forum/lz4c
+*/
+/* note : lz4hc is not an independent module, it requires lz4.h/lz4.c for proper compilation */
+
+
+/* *************************************
+* Tuning Parameter
+***************************************/
+
+/*! HEAPMODE :
+ * Select how default compression function will allocate workplace memory,
+ * in stack (0:fastest), or in heap (1:requires malloc()).
+ * Since workplace is rather large, heap mode is recommended.
+ */
+#ifndef LZ4HC_HEAPMODE
+# define LZ4HC_HEAPMODE 1
+#endif
+
+
+/*=== Dependency ===*/
+#define LZ4_HC_STATIC_LINKING_ONLY
+#include "tracy_lz4hc.hpp"
+
+
+/*=== Common definitions ===*/
+#if defined(__GNUC__)
+# pragma GCC diagnostic ignored "-Wunused-function"
+#endif
+#if defined (__clang__)
+# pragma clang diagnostic ignored "-Wunused-function"
+#endif
+
+#define LZ4_COMMONDEFS_ONLY
+#ifndef LZ4_SRC_INCLUDED
+#include "tracy_lz4.cpp" /* LZ4_count, constants, mem */
+#endif
+
+
+/*=== Enums ===*/
+typedef enum { noDictCtx, usingDictCtxHc } dictCtx_directive;
+
+
+/*=== Constants ===*/
+#define OPTIMAL_ML (int)((ML_MASK-1)+MINMATCH)
+#define LZ4_OPT_NUM (1<<12)
+
+
+/*=== Macros ===*/
+#define MIN(a,b) ( (a) < (b) ? (a) : (b) )
+#define MAX(a,b) ( (a) > (b) ? (a) : (b) )
+#define HASH_FUNCTION(i) (((i) * 2654435761U) >> ((MINMATCH*8)-LZ4HC_HASH_LOG))
+#define DELTANEXTMAXD(p) chainTable[(p) & LZ4HC_MAXD_MASK] /* flexible, LZ4HC_MAXD dependent */
+#define DELTANEXTU16(table, pos) table[(U16)(pos)] /* faster */
+/* Make fields passed to, and updated by LZ4HC_encodeSequence explicit */
+#define UPDATABLE(ip, op, anchor) &ip, &op, &anchor
+
+namespace tracy
+{
+
+static U32 LZ4HC_hashPtr(const void* ptr) { return HASH_FUNCTION(LZ4_read32(ptr)); }
+
+
+/**************************************
+* HC Compression
+**************************************/
+static void LZ4HC_clearTables (LZ4HC_CCtx_internal* hc4)
+{
+ MEM_INIT(hc4->hashTable, 0, sizeof(hc4->hashTable));
+ MEM_INIT(hc4->chainTable, 0xFF, sizeof(hc4->chainTable));
+}
+
+static void LZ4HC_init_internal (LZ4HC_CCtx_internal* hc4, const BYTE* start)
+{
+ uptrval startingOffset = (uptrval)(hc4->end - hc4->base);
+ if (startingOffset > 1 GB) {
+ LZ4HC_clearTables(hc4);
+ startingOffset = 0;
+ }
+ startingOffset += 64 KB;
+ hc4->nextToUpdate = (U32) startingOffset;
+ hc4->base = start - startingOffset;
+ hc4->end = start;
+ hc4->dictBase = start - startingOffset;
+ hc4->dictLimit = (U32) startingOffset;
+ hc4->lowLimit = (U32) startingOffset;
+}
+
+
+/* Update chains up to ip (excluded) */
+LZ4_FORCE_INLINE void LZ4HC_Insert (LZ4HC_CCtx_internal* hc4, const BYTE* ip)
+{
+ U16* const chainTable = hc4->chainTable;
+ U32* const hashTable = hc4->hashTable;
+ const BYTE* const base = hc4->base;
+ U32 const target = (U32)(ip - base);
+ U32 idx = hc4->nextToUpdate;
+
+ while (idx < target) {
+ U32 const h = LZ4HC_hashPtr(base+idx);
+ size_t delta = idx - hashTable[h];
+ if (delta>LZ4_DISTANCE_MAX) delta = LZ4_DISTANCE_MAX;
+ DELTANEXTU16(chainTable, idx) = (U16)delta;
+ hashTable[h] = idx;
+ idx++;
+ }
+
+ hc4->nextToUpdate = target;
+}
+
+/** LZ4HC_countBack() :
+ * @return : negative value, nb of common bytes before ip/match */
+LZ4_FORCE_INLINE
+int LZ4HC_countBack(const BYTE* const ip, const BYTE* const match,
+ const BYTE* const iMin, const BYTE* const mMin)
+{
+ int back = 0;
+ int const min = (int)MAX(iMin - ip, mMin - match);
+ assert(min <= 0);
+ assert(ip >= iMin); assert((size_t)(ip-iMin) < (1U<<31));
+ assert(match >= mMin); assert((size_t)(match - mMin) < (1U<<31));
+ while ( (back > min)
+ && (ip[back-1] == match[back-1]) )
+ back--;
+ return back;
+}
+
+#if defined(_MSC_VER)
+# define LZ4HC_rotl32(x,r) _rotl(x,r)
+#else
+# define LZ4HC_rotl32(x,r) ((x << r) | (x >> (32 - r)))
+#endif
+
+
+static U32 LZ4HC_rotatePattern(size_t const rotate, U32 const pattern)
+{
+ size_t const bitsToRotate = (rotate & (sizeof(pattern) - 1)) << 3;
+ if (bitsToRotate == 0) return pattern;
+ return LZ4HC_rotl32(pattern, (int)bitsToRotate);
+}
+
+/* LZ4HC_countPattern() :
+ * pattern32 must be a sample of repetitive pattern of length 1, 2 or 4 (but not 3!) */
+static unsigned
+LZ4HC_countPattern(const BYTE* ip, const BYTE* const iEnd, U32 const pattern32)
+{
+ const BYTE* const iStart = ip;
+ reg_t const pattern = (sizeof(pattern)==8) ?
+ (reg_t)pattern32 + (((reg_t)pattern32) << (sizeof(pattern)*4)) : pattern32;
+
+ while (likely(ip < iEnd-(sizeof(pattern)-1))) {
+ reg_t const diff = LZ4_read_ARCH(ip) ^ pattern;
+ if (!diff) { ip+=sizeof(pattern); continue; }
+ ip += LZ4_NbCommonBytes(diff);
+ return (unsigned)(ip - iStart);
+ }
+
+ if (LZ4_isLittleEndian()) {
+ reg_t patternByte = pattern;
+ while ((ip<iEnd) && (*ip == (BYTE)patternByte)) {
+ ip++; patternByte >>= 8;
+ }
+ } else { /* big endian */
+ U32 bitOffset = (sizeof(pattern)*8) - 8;
+ while (ip < iEnd) {
+ BYTE const byte = (BYTE)(pattern >> bitOffset);
+ if (*ip != byte) break;
+ ip ++; bitOffset -= 8;
+ }
+ }
+
+ return (unsigned)(ip - iStart);
+}
+
+/* LZ4HC_reverseCountPattern() :
+ * pattern must be a sample of repetitive pattern of length 1, 2 or 4 (but not 3!)
+ * read using natural platform endianess */
+static unsigned
+LZ4HC_reverseCountPattern(const BYTE* ip, const BYTE* const iLow, U32 pattern)
+{
+ const BYTE* const iStart = ip;
+
+ while (likely(ip >= iLow+4)) {
+ if (LZ4_read32(ip-4) != pattern) break;
+ ip -= 4;
+ }
+ { const BYTE* bytePtr = (const BYTE*)(&pattern) + 3; /* works for any endianess */
+ while (likely(ip>iLow)) {
+ if (ip[-1] != *bytePtr) break;
+ ip--; bytePtr--;
+ } }
+ return (unsigned)(iStart - ip);
+}
+
+/* LZ4HC_protectDictEnd() :
+ * Checks if the match is in the last 3 bytes of the dictionary, so reading the
+ * 4 byte MINMATCH would overflow.
+ * @returns true if the match index is okay.
+ */
+static int LZ4HC_protectDictEnd(U32 const dictLimit, U32 const matchIndex)
+{
+ return ((U32)((dictLimit - 1) - matchIndex) >= 3);
+}
+
+typedef enum { rep_untested, rep_not, rep_confirmed } repeat_state_e;
+typedef enum { favorCompressionRatio=0, favorDecompressionSpeed } HCfavor_e;
+
+LZ4_FORCE_INLINE int
+LZ4HC_InsertAndGetWiderMatch (
+ LZ4HC_CCtx_internal* hc4,
+ const BYTE* const ip,
+ const BYTE* const iLowLimit,
+ const BYTE* const iHighLimit,
+ int longest,
+ const BYTE** matchpos,
+ const BYTE** startpos,
+ const int maxNbAttempts,
+ const int patternAnalysis,
+ const int chainSwap,
+ const dictCtx_directive dict,
+ const HCfavor_e favorDecSpeed)
+{
+ U16* const chainTable = hc4->chainTable;
+ U32* const HashTable = hc4->hashTable;
+ const LZ4HC_CCtx_internal * const dictCtx = hc4->dictCtx;
+ const BYTE* const base = hc4->base;
+ const U32 dictLimit = hc4->dictLimit;
+ const BYTE* const lowPrefixPtr = base + dictLimit;
+ const U32 ipIndex = (U32)(ip - base);
+ const U32 lowestMatchIndex = (hc4->lowLimit + (LZ4_DISTANCE_MAX + 1) > ipIndex) ? hc4->lowLimit : ipIndex - LZ4_DISTANCE_MAX;
+ const BYTE* const dictBase = hc4->dictBase;
+ int const lookBackLength = (int)(ip-iLowLimit);
+ int nbAttempts = maxNbAttempts;
+ U32 matchChainPos = 0;
+ U32 const pattern = LZ4_read32(ip);
+ U32 matchIndex;
+ repeat_state_e repeat = rep_untested;
+ size_t srcPatternLength = 0;
+
+ DEBUGLOG(7, "LZ4HC_InsertAndGetWiderMatch");
+ /* First Match */
+ LZ4HC_Insert(hc4, ip);
+ matchIndex = HashTable[LZ4HC_hashPtr(ip)];
+ DEBUGLOG(7, "First match at index %u / %u (lowestMatchIndex)",
+ matchIndex, lowestMatchIndex);
+
+ while ((matchIndex>=lowestMatchIndex) && (nbAttempts>0)) {
+ int matchLength=0;
+ nbAttempts--;
+ assert(matchIndex < ipIndex);
+ if (favorDecSpeed && (ipIndex - matchIndex < 8)) {
+ /* do nothing */
+ } else if (matchIndex >= dictLimit) { /* within current Prefix */
+ const BYTE* const matchPtr = base + matchIndex;
+ assert(matchPtr >= lowPrefixPtr);
+ assert(matchPtr < ip);
+ assert(longest >= 1);
+ if (LZ4_read16(iLowLimit + longest - 1) == LZ4_read16(matchPtr - lookBackLength + longest - 1)) {
+ if (LZ4_read32(matchPtr) == pattern) {
+ int const back = lookBackLength ? LZ4HC_countBack(ip, matchPtr, iLowLimit, lowPrefixPtr) : 0;
+ matchLength = MINMATCH + (int)LZ4_count(ip+MINMATCH, matchPtr+MINMATCH, iHighLimit);
+ matchLength -= back;
+ if (matchLength > longest) {
+ longest = matchLength;
+ *matchpos = matchPtr + back;
+ *startpos = ip + back;
+ } } }
+ } else { /* lowestMatchIndex <= matchIndex < dictLimit */
+ const BYTE* const matchPtr = dictBase + matchIndex;
+ if (LZ4_read32(matchPtr) == pattern) {
+ const BYTE* const dictStart = dictBase + hc4->lowLimit;
+ int back = 0;
+ const BYTE* vLimit = ip + (dictLimit - matchIndex);
+ if (vLimit > iHighLimit) vLimit = iHighLimit;
+ matchLength = (int)LZ4_count(ip+MINMATCH, matchPtr+MINMATCH, vLimit) + MINMATCH;
+ if ((ip+matchLength == vLimit) && (vLimit < iHighLimit))
+ matchLength += LZ4_count(ip+matchLength, lowPrefixPtr, iHighLimit);
+ back = lookBackLength ? LZ4HC_countBack(ip, matchPtr, iLowLimit, dictStart) : 0;
+ matchLength -= back;
+ if (matchLength > longest) {
+ longest = matchLength;
+ *matchpos = base + matchIndex + back; /* virtual pos, relative to ip, to retrieve offset */
+ *startpos = ip + back;
+ } } }
+
+ if (chainSwap && matchLength==longest) { /* better match => select a better chain */
+ assert(lookBackLength==0); /* search forward only */
+ if (matchIndex + (U32)longest <= ipIndex) {
+ int const kTrigger = 4;
+ U32 distanceToNextMatch = 1;
+ int const end = longest - MINMATCH + 1;
+ int step = 1;
+ int accel = 1 << kTrigger;
+ int pos;
+ for (pos = 0; pos < end; pos += step) {
+ U32 const candidateDist = DELTANEXTU16(chainTable, matchIndex + (U32)pos);
+ step = (accel++ >> kTrigger);
+ if (candidateDist > distanceToNextMatch) {
+ distanceToNextMatch = candidateDist;
+ matchChainPos = (U32)pos;
+ accel = 1 << kTrigger;
+ }
+ }
+ if (distanceToNextMatch > 1) {
+ if (distanceToNextMatch > matchIndex) break; /* avoid overflow */
+ matchIndex -= distanceToNextMatch;
+ continue;
+ } } }
+
+ { U32 const distNextMatch = DELTANEXTU16(chainTable, matchIndex);
+ if (patternAnalysis && distNextMatch==1 && matchChainPos==0) {
+ U32 const matchCandidateIdx = matchIndex-1;
+ /* may be a repeated pattern */
+ if (repeat == rep_untested) {
+ if ( ((pattern & 0xFFFF) == (pattern >> 16))
+ & ((pattern & 0xFF) == (pattern >> 24)) ) {
+ repeat = rep_confirmed;
+ srcPatternLength = LZ4HC_countPattern(ip+sizeof(pattern), iHighLimit, pattern) + sizeof(pattern);
+ } else {
+ repeat = rep_not;
+ } }
+ if ( (repeat == rep_confirmed) && (matchCandidateIdx >= lowestMatchIndex)
+ && LZ4HC_protectDictEnd(dictLimit, matchCandidateIdx) ) {
+ const int extDict = matchCandidateIdx < dictLimit;
+ const BYTE* const matchPtr = (extDict ? dictBase : base) + matchCandidateIdx;
+ if (LZ4_read32(matchPtr) == pattern) { /* good candidate */
+ const BYTE* const dictStart = dictBase + hc4->lowLimit;
+ const BYTE* const iLimit = extDict ? dictBase + dictLimit : iHighLimit;
+ size_t forwardPatternLength = LZ4HC_countPattern(matchPtr+sizeof(pattern), iLimit, pattern) + sizeof(pattern);
+ if (extDict && matchPtr + forwardPatternLength == iLimit) {
+ U32 const rotatedPattern = LZ4HC_rotatePattern(forwardPatternLength, pattern);
+ forwardPatternLength += LZ4HC_countPattern(lowPrefixPtr, iHighLimit, rotatedPattern);
+ }
+ { const BYTE* const lowestMatchPtr = extDict ? dictStart : lowPrefixPtr;
+ size_t backLength = LZ4HC_reverseCountPattern(matchPtr, lowestMatchPtr, pattern);
+ size_t currentSegmentLength;
+ if (!extDict && matchPtr - backLength == lowPrefixPtr && hc4->lowLimit < dictLimit) {
+ U32 const rotatedPattern = LZ4HC_rotatePattern((U32)(-(int)backLength), pattern);
+ backLength += LZ4HC_reverseCountPattern(dictBase + dictLimit, dictStart, rotatedPattern);
+ }
+ /* Limit backLength not go further than lowestMatchIndex */
+ backLength = matchCandidateIdx - MAX(matchCandidateIdx - (U32)backLength, lowestMatchIndex);
+ assert(matchCandidateIdx - backLength >= lowestMatchIndex);
+ currentSegmentLength = backLength + forwardPatternLength;
+ /* Adjust to end of pattern if the source pattern fits, otherwise the beginning of the pattern */
+ if ( (currentSegmentLength >= srcPatternLength) /* current pattern segment large enough to contain full srcPatternLength */
+ && (forwardPatternLength <= srcPatternLength) ) { /* haven't reached this position yet */
+ U32 const newMatchIndex = matchCandidateIdx + (U32)forwardPatternLength - (U32)srcPatternLength; /* best position, full pattern, might be followed by more match */
+ if (LZ4HC_protectDictEnd(dictLimit, newMatchIndex))
+ matchIndex = newMatchIndex;
+ else {
+ /* Can only happen if started in the prefix */
+ assert(newMatchIndex >= dictLimit - 3 && newMatchIndex < dictLimit && !extDict);
+ matchIndex = dictLimit;
+ }
+ } else {
+ U32 const newMatchIndex = matchCandidateIdx - (U32)backLength; /* farthest position in current segment, will find a match of length currentSegmentLength + maybe some back */
+ if (!LZ4HC_protectDictEnd(dictLimit, newMatchIndex)) {
+ assert(newMatchIndex >= dictLimit - 3 && newMatchIndex < dictLimit && !extDict);
+ matchIndex = dictLimit;
+ } else {
+ matchIndex = newMatchIndex;
+ if (lookBackLength==0) { /* no back possible */
+ size_t const maxML = MIN(currentSegmentLength, srcPatternLength);
+ if ((size_t)longest < maxML) {
+ assert(base + matchIndex != ip);
+ if ((size_t)(ip - base) - matchIndex > LZ4_DISTANCE_MAX) break;
+ assert(maxML < 2 GB);
+ longest = (int)maxML;
+ *matchpos = base + matchIndex; /* virtual pos, relative to ip, to retrieve offset */
+ *startpos = ip;
+ }
+ { U32 const distToNextPattern = DELTANEXTU16(chainTable, matchIndex);
+ if (distToNextPattern > matchIndex) break; /* avoid overflow */
+ matchIndex -= distToNextPattern;
+ } } } } }
+ continue;
+ } }
+ } } /* PA optimization */
+
+ /* follow current chain */
+ matchIndex -= DELTANEXTU16(chainTable, matchIndex + matchChainPos);
+
+ } /* while ((matchIndex>=lowestMatchIndex) && (nbAttempts)) */
+
+ if ( dict == usingDictCtxHc
+ && nbAttempts > 0
+ && ipIndex - lowestMatchIndex < LZ4_DISTANCE_MAX) {
+ size_t const dictEndOffset = (size_t)(dictCtx->end - dictCtx->base);
+ U32 dictMatchIndex = dictCtx->hashTable[LZ4HC_hashPtr(ip)];
+ assert(dictEndOffset <= 1 GB);
+ matchIndex = dictMatchIndex + lowestMatchIndex - (U32)dictEndOffset;
+ while (ipIndex - matchIndex <= LZ4_DISTANCE_MAX && nbAttempts--) {
+ const BYTE* const matchPtr = dictCtx->base + dictMatchIndex;
+
+ if (LZ4_read32(matchPtr) == pattern) {
+ int mlt;
+ int back = 0;
+ const BYTE* vLimit = ip + (dictEndOffset - dictMatchIndex);
+ if (vLimit > iHighLimit) vLimit = iHighLimit;
+ mlt = (int)LZ4_count(ip+MINMATCH, matchPtr+MINMATCH, vLimit) + MINMATCH;
+ back = lookBackLength ? LZ4HC_countBack(ip, matchPtr, iLowLimit, dictCtx->base + dictCtx->dictLimit) : 0;
+ mlt -= back;
+ if (mlt > longest) {
+ longest = mlt;
+ *matchpos = base + matchIndex + back;
+ *startpos = ip + back;
+ } }
+
+ { U32 const nextOffset = DELTANEXTU16(dictCtx->chainTable, dictMatchIndex);
+ dictMatchIndex -= nextOffset;
+ matchIndex -= nextOffset;
+ } } }
+
+ return longest;
+}
+
+LZ4_FORCE_INLINE
+int LZ4HC_InsertAndFindBestMatch(LZ4HC_CCtx_internal* const hc4, /* Index table will be updated */
+ const BYTE* const ip, const BYTE* const iLimit,
+ const BYTE** matchpos,
+ const int maxNbAttempts,
+ const int patternAnalysis,
+ const dictCtx_directive dict)
+{
+ const BYTE* uselessPtr = ip;
+ /* note : LZ4HC_InsertAndGetWiderMatch() is able to modify the starting position of a match (*startpos),
+ * but this won't be the case here, as we define iLowLimit==ip,
+ * so LZ4HC_InsertAndGetWiderMatch() won't be allowed to search past ip */
+ return LZ4HC_InsertAndGetWiderMatch(hc4, ip, ip, iLimit, MINMATCH-1, matchpos, &uselessPtr, maxNbAttempts, patternAnalysis, 0 /*chainSwap*/, dict, favorCompressionRatio);
+}
+
+/* LZ4HC_encodeSequence() :
+ * @return : 0 if ok,
+ * 1 if buffer issue detected */
+LZ4_FORCE_INLINE int LZ4HC_encodeSequence (
+ const BYTE** _ip,
+ BYTE** _op,
+ const BYTE** _anchor,
+ int matchLength,
+ const BYTE* const match,
+ limitedOutput_directive limit,
+ BYTE* oend)
+{
+#define ip (*_ip)
+#define op (*_op)
+#define anchor (*_anchor)
+
+ size_t length;
+ BYTE* const token = op++;
+
+#if defined(LZ4_DEBUG) && (LZ4_DEBUG >= 6)
+ static const BYTE* start = NULL;
+ static U32 totalCost = 0;
+ U32 const pos = (start==NULL) ? 0 : (U32)(anchor - start);
+ U32 const ll = (U32)(ip - anchor);
+ U32 const llAdd = (ll>=15) ? ((ll-15) / 255) + 1 : 0;
+ U32 const mlAdd = (matchLength>=19) ? ((matchLength-19) / 255) + 1 : 0;
+ U32 const cost = 1 + llAdd + ll + 2 + mlAdd;
+ if (start==NULL) start = anchor; /* only works for single segment */
+ /* g_debuglog_enable = (pos >= 2228) & (pos <= 2262); */
+ DEBUGLOG(6, "pos:%7u -- literals:%4u, match:%4i, offset:%5u, cost:%4u + %5u",
+ pos,
+ (U32)(ip - anchor), matchLength, (U32)(ip-match),
+ cost, totalCost);
+ totalCost += cost;
+#endif
+
+ /* Encode Literal length */
+ length = (size_t)(ip - anchor);
+ LZ4_STATIC_ASSERT(notLimited == 0);
+ /* Check output limit */
+ if (limit && ((op + (length / 255) + length + (2 + 1 + LASTLITERALS)) > oend)) {
+ DEBUGLOG(6, "Not enough room to write %i literals (%i bytes remaining)",
+ (int)length, (int)(oend - op));
+ return 1;
+ }
+ if (length >= RUN_MASK) {
+ size_t len = length - RUN_MASK;
+ *token = (RUN_MASK << ML_BITS);
+ for(; len >= 255 ; len -= 255) *op++ = 255;
+ *op++ = (BYTE)len;
+ } else {
+ *token = (BYTE)(length << ML_BITS);
+ }
+
+ /* Copy Literals */
+ LZ4_wildCopy8(op, anchor, op + length);
+ op += length;
+
+ /* Encode Offset */
+ assert( (ip - match) <= LZ4_DISTANCE_MAX ); /* note : consider providing offset as a value, rather than as a pointer difference */
+ LZ4_writeLE16(op, (U16)(ip - match)); op += 2;
+
+ /* Encode MatchLength */
+ assert(matchLength >= MINMATCH);
+ length = (size_t)matchLength - MINMATCH;
+ if (limit && (op + (length / 255) + (1 + LASTLITERALS) > oend)) {
+ DEBUGLOG(6, "Not enough room to write match length");
+ return 1; /* Check output limit */
+ }
+ if (length >= ML_MASK) {
+ *token += ML_MASK;
+ length -= ML_MASK;
+ for(; length >= 510 ; length -= 510) { *op++ = 255; *op++ = 255; }
+ if (length >= 255) { length -= 255; *op++ = 255; }
+ *op++ = (BYTE)length;
+ } else {
+ *token += (BYTE)(length);
+ }
+
+ /* Prepare next loop */
+ ip += matchLength;
+ anchor = ip;
+
+ return 0;
+}
+#undef ip
+#undef op
+#undef anchor
+
+LZ4_FORCE_INLINE int LZ4HC_compress_hashChain (
+ LZ4HC_CCtx_internal* const ctx,
+ const char* const source,
+ char* const dest,
+ int* srcSizePtr,
+ int const maxOutputSize,
+ int maxNbAttempts,
+ const limitedOutput_directive limit,
+ const dictCtx_directive dict
+ )
+{
+ const int inputSize = *srcSizePtr;
+ const int patternAnalysis = (maxNbAttempts > 128); /* levels 9+ */
+
+ const BYTE* ip = (const BYTE*) source;
+ const BYTE* anchor = ip;
+ const BYTE* const iend = ip + inputSize;
+ const BYTE* const mflimit = iend - MFLIMIT;
+ const BYTE* const matchlimit = (iend - LASTLITERALS);
+
+ BYTE* optr = (BYTE*) dest;
+ BYTE* op = (BYTE*) dest;
+ BYTE* oend = op + maxOutputSize;
+
+ int ml0, ml, ml2, ml3;
+ const BYTE* start0;
+ const BYTE* ref0;
+ const BYTE* ref = NULL;
+ const BYTE* start2 = NULL;
+ const BYTE* ref2 = NULL;
+ const BYTE* start3 = NULL;
+ const BYTE* ref3 = NULL;
+
+ /* init */
+ *srcSizePtr = 0;
+ if (limit == fillOutput) oend -= LASTLITERALS; /* Hack for support LZ4 format restriction */
+ if (inputSize < LZ4_minLength) goto _last_literals; /* Input too small, no compression (all literals) */
+
+ /* Main Loop */
+ while (ip <= mflimit) {
+ ml = LZ4HC_InsertAndFindBestMatch(ctx, ip, matchlimit, &ref, maxNbAttempts, patternAnalysis, dict);
+ if (ml<MINMATCH) { ip++; continue; }
+
+ /* saved, in case we would skip too much */
+ start0 = ip; ref0 = ref; ml0 = ml;
+
+_Search2:
+ if (ip+ml <= mflimit) {
+ ml2 = LZ4HC_InsertAndGetWiderMatch(ctx,
+ ip + ml - 2, ip + 0, matchlimit, ml, &ref2, &start2,
+ maxNbAttempts, patternAnalysis, 0, dict, favorCompressionRatio);
+ } else {
+ ml2 = ml;
+ }
+
+ if (ml2 == ml) { /* No better match => encode ML1 */
+ optr = op;
+ if (LZ4HC_encodeSequence(UPDATABLE(ip, op, anchor), ml, ref, limit, oend)) goto _dest_overflow;
+ continue;
+ }
+
+ if (start0 < ip) { /* first match was skipped at least once */
+ if (start2 < ip + ml0) { /* squeezing ML1 between ML0(original ML1) and ML2 */
+ ip = start0; ref = ref0; ml = ml0; /* restore initial ML1 */
+ } }
+
+ /* Here, start0==ip */
+ if ((start2 - ip) < 3) { /* First Match too small : removed */
+ ml = ml2;
+ ip = start2;
+ ref =ref2;
+ goto _Search2;
+ }
+
+_Search3:
+ /* At this stage, we have :
+ * ml2 > ml1, and
+ * ip1+3 <= ip2 (usually < ip1+ml1) */
+ if ((start2 - ip) < OPTIMAL_ML) {
+ int correction;
+ int new_ml = ml;
+ if (new_ml > OPTIMAL_ML) new_ml = OPTIMAL_ML;
+ if (ip+new_ml > start2 + ml2 - MINMATCH) new_ml = (int)(start2 - ip) + ml2 - MINMATCH;
+ correction = new_ml - (int)(start2 - ip);
+ if (correction > 0) {
+ start2 += correction;
+ ref2 += correction;
+ ml2 -= correction;
+ }
+ }
+ /* Now, we have start2 = ip+new_ml, with new_ml = min(ml, OPTIMAL_ML=18) */
+
+ if (start2 + ml2 <= mflimit) {
+ ml3 = LZ4HC_InsertAndGetWiderMatch(ctx,
+ start2 + ml2 - 3, start2, matchlimit, ml2, &ref3, &start3,
+ maxNbAttempts, patternAnalysis, 0, dict, favorCompressionRatio);
+ } else {
+ ml3 = ml2;
+ }
+
+ if (ml3 == ml2) { /* No better match => encode ML1 and ML2 */
+ /* ip & ref are known; Now for ml */
+ if (start2 < ip+ml) ml = (int)(start2 - ip);
+ /* Now, encode 2 sequences */
+ optr = op;
+ if (LZ4HC_encodeSequence(UPDATABLE(ip, op, anchor), ml, ref, limit, oend)) goto _dest_overflow;
+ ip = start2;
+ optr = op;
+ if (LZ4HC_encodeSequence(UPDATABLE(ip, op, anchor), ml2, ref2, limit, oend)) {
+ ml = ml2;
+ ref = ref2;
+ goto _dest_overflow;
+ }
+ continue;
+ }
+
+ if (start3 < ip+ml+3) { /* Not enough space for match 2 : remove it */
+ if (start3 >= (ip+ml)) { /* can write Seq1 immediately ==> Seq2 is removed, so Seq3 becomes Seq1 */
+ if (start2 < ip+ml) {
+ int correction = (int)(ip+ml - start2);
+ start2 += correction;
+ ref2 += correction;
+ ml2 -= correction;
+ if (ml2 < MINMATCH) {
+ start2 = start3;
+ ref2 = ref3;
+ ml2 = ml3;
+ }
+ }
+
+ optr = op;
+ if (LZ4HC_encodeSequence(UPDATABLE(ip, op, anchor), ml, ref, limit, oend)) goto _dest_overflow;
+ ip = start3;
+ ref = ref3;
+ ml = ml3;
+
+ start0 = start2;
+ ref0 = ref2;
+ ml0 = ml2;
+ goto _Search2;
+ }
+
+ start2 = start3;
+ ref2 = ref3;
+ ml2 = ml3;
+ goto _Search3;
+ }
+
+ /*
+ * OK, now we have 3 ascending matches;
+ * let's write the first one ML1.
+ * ip & ref are known; Now decide ml.
+ */
+ if (start2 < ip+ml) {
+ if ((start2 - ip) < OPTIMAL_ML) {
+ int correction;
+ if (ml > OPTIMAL_ML) ml = OPTIMAL_ML;
+ if (ip + ml > start2 + ml2 - MINMATCH) ml = (int)(start2 - ip) + ml2 - MINMATCH;
+ correction = ml - (int)(start2 - ip);
+ if (correction > 0) {
+ start2 += correction;
+ ref2 += correction;
+ ml2 -= correction;
+ }
+ } else {
+ ml = (int)(start2 - ip);
+ }
+ }
+ optr = op;
+ if (LZ4HC_encodeSequence(UPDATABLE(ip, op, anchor), ml, ref, limit, oend)) goto _dest_overflow;
+
+ /* ML2 becomes ML1 */
+ ip = start2; ref = ref2; ml = ml2;
+
+ /* ML3 becomes ML2 */
+ start2 = start3; ref2 = ref3; ml2 = ml3;
+
+ /* let's find a new ML3 */
+ goto _Search3;
+ }
+
+_last_literals:
+ /* Encode Last Literals */
+ { size_t lastRunSize = (size_t)(iend - anchor); /* literals */
+ size_t llAdd = (lastRunSize + 255 - RUN_MASK) / 255;
+ size_t const totalSize = 1 + llAdd + lastRunSize;
+ if (limit == fillOutput) oend += LASTLITERALS; /* restore correct value */
+ if (limit && (op + totalSize > oend)) {
+ if (limit == limitedOutput) return 0;
+ /* adapt lastRunSize to fill 'dest' */
+ lastRunSize = (size_t)(oend - op) - 1 /*token*/;
+ llAdd = (lastRunSize + 256 - RUN_MASK) / 256;
+ lastRunSize -= llAdd;
+ }
+ DEBUGLOG(6, "Final literal run : %i literals", (int)lastRunSize);
+ ip = anchor + lastRunSize; /* can be != iend if limit==fillOutput */
+
+ if (lastRunSize >= RUN_MASK) {
+ size_t accumulator = lastRunSize - RUN_MASK;
+ *op++ = (RUN_MASK << ML_BITS);
+ for(; accumulator >= 255 ; accumulator -= 255) *op++ = 255;
+ *op++ = (BYTE) accumulator;
+ } else {
+ *op++ = (BYTE)(lastRunSize << ML_BITS);
+ }
+ memcpy(op, anchor, lastRunSize);
+ op += lastRunSize;
+ }
+
+ /* End */
+ *srcSizePtr = (int) (((const char*)ip) - source);
+ return (int) (((char*)op)-dest);
+
+_dest_overflow:
+ if (limit == fillOutput) {
+ /* Assumption : ip, anchor, ml and ref must be set correctly */
+ size_t const ll = (size_t)(ip - anchor);
+ size_t const ll_addbytes = (ll + 240) / 255;
+ size_t const ll_totalCost = 1 + ll_addbytes + ll;
+ BYTE* const maxLitPos = oend - 3; /* 2 for offset, 1 for token */
+ DEBUGLOG(6, "Last sequence overflowing");
+ op = optr; /* restore correct out pointer */
+ if (op + ll_totalCost <= maxLitPos) {
+ /* ll validated; now adjust match length */
+ size_t const bytesLeftForMl = (size_t)(maxLitPos - (op+ll_totalCost));
+ size_t const maxMlSize = MINMATCH + (ML_MASK-1) + (bytesLeftForMl * 255);
+ assert(maxMlSize < INT_MAX); assert(ml >= 0);
+ if ((size_t)ml > maxMlSize) ml = (int)maxMlSize;
+ if ((oend + LASTLITERALS) - (op + ll_totalCost + 2) - 1 + ml >= MFLIMIT) {
+ LZ4HC_encodeSequence(UPDATABLE(ip, op, anchor), ml, ref, notLimited, oend);
+ } }
+ goto _last_literals;
+ }
+ /* compression failed */
+ return 0;
+}
+
+
+static int LZ4HC_compress_optimal( LZ4HC_CCtx_internal* ctx,
+ const char* const source, char* dst,
+ int* srcSizePtr, int dstCapacity,
+ int const nbSearches, size_t sufficient_len,
+ const limitedOutput_directive limit, int const fullUpdate,
+ const dictCtx_directive dict,
+ const HCfavor_e favorDecSpeed);
+
+
+LZ4_FORCE_INLINE int LZ4HC_compress_generic_internal (
+ LZ4HC_CCtx_internal* const ctx,
+ const char* const src,
+ char* const dst,
+ int* const srcSizePtr,
+ int const dstCapacity,
+ int cLevel,
+ const limitedOutput_directive limit,
+ const dictCtx_directive dict
+ )
+{
+ typedef enum { lz4hc, lz4opt } lz4hc_strat_e;
+ typedef struct {
+ lz4hc_strat_e strat;
+ int nbSearches;
+ U32 targetLength;
+ } cParams_t;
+ static const cParams_t clTable[LZ4HC_CLEVEL_MAX+1] = {
+ { lz4hc, 2, 16 }, /* 0, unused */
+ { lz4hc, 2, 16 }, /* 1, unused */
+ { lz4hc, 2, 16 }, /* 2, unused */
+ { lz4hc, 4, 16 }, /* 3 */
+ { lz4hc, 8, 16 }, /* 4 */
+ { lz4hc, 16, 16 }, /* 5 */
+ { lz4hc, 32, 16 }, /* 6 */
+ { lz4hc, 64, 16 }, /* 7 */
+ { lz4hc, 128, 16 }, /* 8 */
+ { lz4hc, 256, 16 }, /* 9 */
+ { lz4opt, 96, 64 }, /*10==LZ4HC_CLEVEL_OPT_MIN*/
+ { lz4opt, 512,128 }, /*11 */
+ { lz4opt,16384,LZ4_OPT_NUM }, /* 12==LZ4HC_CLEVEL_MAX */
+ };
+
+ DEBUGLOG(4, "LZ4HC_compress_generic(ctx=%p, src=%p, srcSize=%d, limit=%d)",
+ ctx, src, *srcSizePtr, limit);
+
+ if (limit == fillOutput && dstCapacity < 1) return 0; /* Impossible to store anything */
+ if ((U32)*srcSizePtr > (U32)LZ4_MAX_INPUT_SIZE) return 0; /* Unsupported input size (too large or negative) */
+
+ ctx->end += *srcSizePtr;
+ if (cLevel < 1) cLevel = LZ4HC_CLEVEL_DEFAULT; /* note : convention is different from lz4frame, maybe something to review */
+ cLevel = MIN(LZ4HC_CLEVEL_MAX, cLevel);
+ { cParams_t const cParam = clTable[cLevel];
+ HCfavor_e const favor = ctx->favorDecSpeed ? favorDecompressionSpeed : favorCompressionRatio;
+ int result;
+
+ if (cParam.strat == lz4hc) {
+ result = LZ4HC_compress_hashChain(ctx,
+ src, dst, srcSizePtr, dstCapacity,
+ cParam.nbSearches, limit, dict);
+ } else {
+ assert(cParam.strat == lz4opt);
+ result = LZ4HC_compress_optimal(ctx,
+ src, dst, srcSizePtr, dstCapacity,
+ cParam.nbSearches, cParam.targetLength, limit,
+ cLevel == LZ4HC_CLEVEL_MAX, /* ultra mode */
+ dict, favor);
+ }
+ if (result <= 0) ctx->dirty = 1;
+ return result;
+ }
+}
+
+static void LZ4HC_setExternalDict(LZ4HC_CCtx_internal* ctxPtr, const BYTE* newBlock);
+
+static int
+LZ4HC_compress_generic_noDictCtx (
+ LZ4HC_CCtx_internal* const ctx,
+ const char* const src,
+ char* const dst,
+ int* const srcSizePtr,
+ int const dstCapacity,
+ int cLevel,
+ limitedOutput_directive limit
+ )
+{
+ assert(ctx->dictCtx == NULL);
+ return LZ4HC_compress_generic_internal(ctx, src, dst, srcSizePtr, dstCapacity, cLevel, limit, noDictCtx);
+}
+
+static int
+LZ4HC_compress_generic_dictCtx (
+ LZ4HC_CCtx_internal* const ctx,
+ const char* const src,
+ char* const dst,
+ int* const srcSizePtr,
+ int const dstCapacity,
+ int cLevel,
+ limitedOutput_directive limit
+ )
+{
+ const size_t position = (size_t)(ctx->end - ctx->base) - ctx->lowLimit;
+ assert(ctx->dictCtx != NULL);
+ if (position >= 64 KB) {
+ ctx->dictCtx = NULL;
+ return LZ4HC_compress_generic_noDictCtx(ctx, src, dst, srcSizePtr, dstCapacity, cLevel, limit);
+ } else if (position == 0 && *srcSizePtr > 4 KB) {
+ memcpy(ctx, ctx->dictCtx, sizeof(LZ4HC_CCtx_internal));
+ LZ4HC_setExternalDict(ctx, (const BYTE *)src);
+ ctx->compressionLevel = (short)cLevel;
+ return LZ4HC_compress_generic_noDictCtx(ctx, src, dst, srcSizePtr, dstCapacity, cLevel, limit);
+ } else {
+ return LZ4HC_compress_generic_internal(ctx, src, dst, srcSizePtr, dstCapacity, cLevel, limit, usingDictCtxHc);
+ }
+}
+
+static int
+LZ4HC_compress_generic (
+ LZ4HC_CCtx_internal* const ctx,
+ const char* const src,
+ char* const dst,
+ int* const srcSizePtr,
+ int const dstCapacity,
+ int cLevel,
+ limitedOutput_directive limit
+ )
+{
+ if (ctx->dictCtx == NULL) {
+ return LZ4HC_compress_generic_noDictCtx(ctx, src, dst, srcSizePtr, dstCapacity, cLevel, limit);
+ } else {
+ return LZ4HC_compress_generic_dictCtx(ctx, src, dst, srcSizePtr, dstCapacity, cLevel, limit);
+ }
+}
+
+
+int LZ4_sizeofStateHC(void) { return (int)sizeof(LZ4_streamHC_t); }
+
+static size_t LZ4_streamHC_t_alignment(void)
+{
+#if LZ4_ALIGN_TEST
+ typedef struct { char c; LZ4_streamHC_t t; } t_a;
+ return sizeof(t_a) - sizeof(LZ4_streamHC_t);
+#else
+ return 1; /* effectively disabled */
+#endif
+}
+
+/* state is presumed correctly initialized,
+ * in which case its size and alignment have already been validate */
+int LZ4_compress_HC_extStateHC_fastReset (void* state, const char* src, char* dst, int srcSize, int dstCapacity, int compressionLevel)
+{
+ LZ4HC_CCtx_internal* const ctx = &((LZ4_streamHC_t*)state)->internal_donotuse;
+ if (!LZ4_isAligned(state, LZ4_streamHC_t_alignment())) return 0;
+ LZ4_resetStreamHC_fast((LZ4_streamHC_t*)state, compressionLevel);
+ LZ4HC_init_internal (ctx, (const BYTE*)src);
+ if (dstCapacity < LZ4_compressBound(srcSize))
+ return LZ4HC_compress_generic (ctx, src, dst, &srcSize, dstCapacity, compressionLevel, limitedOutput);
+ else
+ return LZ4HC_compress_generic (ctx, src, dst, &srcSize, dstCapacity, compressionLevel, notLimited);
+}
+
+int LZ4_compress_HC_extStateHC (void* state, const char* src, char* dst, int srcSize, int dstCapacity, int compressionLevel)
+{
+ LZ4_streamHC_t* const ctx = LZ4_initStreamHC(state, sizeof(*ctx));
+ if (ctx==NULL) return 0; /* init failure */
+ return LZ4_compress_HC_extStateHC_fastReset(state, src, dst, srcSize, dstCapacity, compressionLevel);
+}
+
+int LZ4_compress_HC(const char* src, char* dst, int srcSize, int dstCapacity, int compressionLevel)
+{
+#if defined(LZ4HC_HEAPMODE) && LZ4HC_HEAPMODE==1
+ LZ4_streamHC_t* const statePtr = (LZ4_streamHC_t*)ALLOC(sizeof(LZ4_streamHC_t));
+#else
+ LZ4_streamHC_t state;
+ LZ4_streamHC_t* const statePtr = &state;
+#endif
+ int const cSize = LZ4_compress_HC_extStateHC(statePtr, src, dst, srcSize, dstCapacity, compressionLevel);
+#if defined(LZ4HC_HEAPMODE) && LZ4HC_HEAPMODE==1
+ FREEMEM(statePtr);
+#endif
+ return cSize;
+}
+
+/* state is presumed sized correctly (>= sizeof(LZ4_streamHC_t)) */
+int LZ4_compress_HC_destSize(void* state, const char* source, char* dest, int* sourceSizePtr, int targetDestSize, int cLevel)
+{
+ LZ4_streamHC_t* const ctx = LZ4_initStreamHC(state, sizeof(*ctx));
+ if (ctx==NULL) return 0; /* init failure */
+ LZ4HC_init_internal(&ctx->internal_donotuse, (const BYTE*) source);
+ LZ4_setCompressionLevel(ctx, cLevel);
+ return LZ4HC_compress_generic(&ctx->internal_donotuse, source, dest, sourceSizePtr, targetDestSize, cLevel, fillOutput);
+}
+
+
+
+/**************************************
+* Streaming Functions
+**************************************/
+/* allocation */
+LZ4_streamHC_t* LZ4_createStreamHC(void)
+{
+ LZ4_streamHC_t* const state =
+ (LZ4_streamHC_t*)ALLOC_AND_ZERO(sizeof(LZ4_streamHC_t));
+ if (state == NULL) return NULL;
+ LZ4_setCompressionLevel(state, LZ4HC_CLEVEL_DEFAULT);
+ return state;
+}
+
+int LZ4_freeStreamHC (LZ4_streamHC_t* LZ4_streamHCPtr)
+{
+ DEBUGLOG(4, "LZ4_freeStreamHC(%p)", LZ4_streamHCPtr);
+ if (!LZ4_streamHCPtr) return 0; /* support free on NULL */
+ FREEMEM(LZ4_streamHCPtr);
+ return 0;
+}
+
+
+LZ4_streamHC_t* LZ4_initStreamHC (void* buffer, size_t size)
+{
+ LZ4_streamHC_t* const LZ4_streamHCPtr = (LZ4_streamHC_t*)buffer;
+ /* if compilation fails here, LZ4_STREAMHCSIZE must be increased */
+ LZ4_STATIC_ASSERT(sizeof(LZ4HC_CCtx_internal) <= LZ4_STREAMHCSIZE);
+ DEBUGLOG(4, "LZ4_initStreamHC(%p, %u)", buffer, (unsigned)size);
+ /* check conditions */
+ if (buffer == NULL) return NULL;
+ if (size < sizeof(LZ4_streamHC_t)) return NULL;
+ if (!LZ4_isAligned(buffer, LZ4_streamHC_t_alignment())) return NULL;
+ /* init */
+ { LZ4HC_CCtx_internal* const hcstate = &(LZ4_streamHCPtr->internal_donotuse);
+ MEM_INIT(hcstate, 0, sizeof(*hcstate)); }
+ LZ4_setCompressionLevel(LZ4_streamHCPtr, LZ4HC_CLEVEL_DEFAULT);
+ return LZ4_streamHCPtr;
+}
+
+/* just a stub */
+void LZ4_resetStreamHC (LZ4_streamHC_t* LZ4_streamHCPtr, int compressionLevel)
+{
+ LZ4_initStreamHC(LZ4_streamHCPtr, sizeof(*LZ4_streamHCPtr));
+ LZ4_setCompressionLevel(LZ4_streamHCPtr, compressionLevel);
+}
+
+void LZ4_resetStreamHC_fast (LZ4_streamHC_t* LZ4_streamHCPtr, int compressionLevel)
+{
+ DEBUGLOG(4, "LZ4_resetStreamHC_fast(%p, %d)", LZ4_streamHCPtr, compressionLevel);
+ if (LZ4_streamHCPtr->internal_donotuse.dirty) {
+ LZ4_initStreamHC(LZ4_streamHCPtr, sizeof(*LZ4_streamHCPtr));
+ } else {
+ /* preserve end - base : can trigger clearTable's threshold */
+ LZ4_streamHCPtr->internal_donotuse.end -= (uptrval)LZ4_streamHCPtr->internal_donotuse.base;
+ LZ4_streamHCPtr->internal_donotuse.base = NULL;
+ LZ4_streamHCPtr->internal_donotuse.dictCtx = NULL;
+ }
+ LZ4_setCompressionLevel(LZ4_streamHCPtr, compressionLevel);
+}
+
+void LZ4_setCompressionLevel(LZ4_streamHC_t* LZ4_streamHCPtr, int compressionLevel)
+{
+ DEBUGLOG(5, "LZ4_setCompressionLevel(%p, %d)", LZ4_streamHCPtr, compressionLevel);
+ if (compressionLevel < 1) compressionLevel = LZ4HC_CLEVEL_DEFAULT;
+ if (compressionLevel > LZ4HC_CLEVEL_MAX) compressionLevel = LZ4HC_CLEVEL_MAX;
+ LZ4_streamHCPtr->internal_donotuse.compressionLevel = (short)compressionLevel;
+}
+
+void LZ4_favorDecompressionSpeed(LZ4_streamHC_t* LZ4_streamHCPtr, int favor)
+{
+ LZ4_streamHCPtr->internal_donotuse.favorDecSpeed = (favor!=0);
+}
+
+/* LZ4_loadDictHC() :
+ * LZ4_streamHCPtr is presumed properly initialized */
+int LZ4_loadDictHC (LZ4_streamHC_t* LZ4_streamHCPtr,
+ const char* dictionary, int dictSize)
+{
+ LZ4HC_CCtx_internal* const ctxPtr = &LZ4_streamHCPtr->internal_donotuse;
+ DEBUGLOG(4, "LZ4_loadDictHC(ctx:%p, dict:%p, dictSize:%d)", LZ4_streamHCPtr, dictionary, dictSize);
+ assert(LZ4_streamHCPtr != NULL);
+ if (dictSize > 64 KB) {
+ dictionary += (size_t)dictSize - 64 KB;
+ dictSize = 64 KB;
+ }
+ /* need a full initialization, there are bad side-effects when using resetFast() */
+ { int const cLevel = ctxPtr->compressionLevel;
+ LZ4_initStreamHC(LZ4_streamHCPtr, sizeof(*LZ4_streamHCPtr));
+ LZ4_setCompressionLevel(LZ4_streamHCPtr, cLevel);
+ }
+ LZ4HC_init_internal (ctxPtr, (const BYTE*)dictionary);
+ ctxPtr->end = (const BYTE*)dictionary + dictSize;
+ if (dictSize >= 4) LZ4HC_Insert (ctxPtr, ctxPtr->end-3);
+ return dictSize;
+}
+
+void LZ4_attach_HC_dictionary(LZ4_streamHC_t *working_stream, const LZ4_streamHC_t *dictionary_stream) {
+ working_stream->internal_donotuse.dictCtx = dictionary_stream != NULL ? &(dictionary_stream->internal_donotuse) : NULL;
+}
+
+/* compression */
+
+static void LZ4HC_setExternalDict(LZ4HC_CCtx_internal* ctxPtr, const BYTE* newBlock)
+{
+ DEBUGLOG(4, "LZ4HC_setExternalDict(%p, %p)", ctxPtr, newBlock);
+ if (ctxPtr->end >= ctxPtr->base + ctxPtr->dictLimit + 4)
+ LZ4HC_Insert (ctxPtr, ctxPtr->end-3); /* Referencing remaining dictionary content */
+
+ /* Only one memory segment for extDict, so any previous extDict is lost at this stage */
+ ctxPtr->lowLimit = ctxPtr->dictLimit;
+ ctxPtr->dictLimit = (U32)(ctxPtr->end - ctxPtr->base);
+ ctxPtr->dictBase = ctxPtr->base;
+ ctxPtr->base = newBlock - ctxPtr->dictLimit;
+ ctxPtr->end = newBlock;
+ ctxPtr->nextToUpdate = ctxPtr->dictLimit; /* match referencing will resume from there */
+
+ /* cannot reference an extDict and a dictCtx at the same time */
+ ctxPtr->dictCtx = NULL;
+}
+
+static int
+LZ4_compressHC_continue_generic (LZ4_streamHC_t* LZ4_streamHCPtr,
+ const char* src, char* dst,
+ int* srcSizePtr, int dstCapacity,
+ limitedOutput_directive limit)
+{
+ LZ4HC_CCtx_internal* const ctxPtr = &LZ4_streamHCPtr->internal_donotuse;
+ DEBUGLOG(5, "LZ4_compressHC_continue_generic(ctx=%p, src=%p, srcSize=%d, limit=%d)",
+ LZ4_streamHCPtr, src, *srcSizePtr, limit);
+ assert(ctxPtr != NULL);
+ /* auto-init if forgotten */
+ if (ctxPtr->base == NULL) LZ4HC_init_internal (ctxPtr, (const BYTE*) src);
+
+ /* Check overflow */
+ if ((size_t)(ctxPtr->end - ctxPtr->base) > 2 GB) {
+ size_t dictSize = (size_t)(ctxPtr->end - ctxPtr->base) - ctxPtr->dictLimit;
+ if (dictSize > 64 KB) dictSize = 64 KB;
+ LZ4_loadDictHC(LZ4_streamHCPtr, (const char*)(ctxPtr->end) - dictSize, (int)dictSize);
+ }
+
+ /* Check if blocks follow each other */
+ if ((const BYTE*)src != ctxPtr->end)
+ LZ4HC_setExternalDict(ctxPtr, (const BYTE*)src);
+
+ /* Check overlapping input/dictionary space */
+ { const BYTE* sourceEnd = (const BYTE*) src + *srcSizePtr;
+ const BYTE* const dictBegin = ctxPtr->dictBase + ctxPtr->lowLimit;
+ const BYTE* const dictEnd = ctxPtr->dictBase + ctxPtr->dictLimit;
+ if ((sourceEnd > dictBegin) && ((const BYTE*)src < dictEnd)) {
+ if (sourceEnd > dictEnd) sourceEnd = dictEnd;
+ ctxPtr->lowLimit = (U32)(sourceEnd - ctxPtr->dictBase);
+ if (ctxPtr->dictLimit - ctxPtr->lowLimit < 4) ctxPtr->lowLimit = ctxPtr->dictLimit;
+ } }
+
+ return LZ4HC_compress_generic (ctxPtr, src, dst, srcSizePtr, dstCapacity, ctxPtr->compressionLevel, limit);
+}
+
+int LZ4_compress_HC_continue (LZ4_streamHC_t* LZ4_streamHCPtr, const char* src, char* dst, int srcSize, int dstCapacity)
+{
+ if (dstCapacity < LZ4_compressBound(srcSize))
+ return LZ4_compressHC_continue_generic (LZ4_streamHCPtr, src, dst, &srcSize, dstCapacity, limitedOutput);
+ else
+ return LZ4_compressHC_continue_generic (LZ4_streamHCPtr, src, dst, &srcSize, dstCapacity, notLimited);
+}
+
+int LZ4_compress_HC_continue_destSize (LZ4_streamHC_t* LZ4_streamHCPtr, const char* src, char* dst, int* srcSizePtr, int targetDestSize)
+{
+ return LZ4_compressHC_continue_generic(LZ4_streamHCPtr, src, dst, srcSizePtr, targetDestSize, fillOutput);
+}
+
+
+
+/* LZ4_saveDictHC :
+ * save history content
+ * into a user-provided buffer
+ * which is then used to continue compression
+ */
+int LZ4_saveDictHC (LZ4_streamHC_t* LZ4_streamHCPtr, char* safeBuffer, int dictSize)
+{
+ LZ4HC_CCtx_internal* const streamPtr = &LZ4_streamHCPtr->internal_donotuse;
+ int const prefixSize = (int)(streamPtr->end - (streamPtr->base + streamPtr->dictLimit));
+ DEBUGLOG(5, "LZ4_saveDictHC(%p, %p, %d)", LZ4_streamHCPtr, safeBuffer, dictSize);
+ assert(prefixSize >= 0);
+ if (dictSize > 64 KB) dictSize = 64 KB;
+ if (dictSize < 4) dictSize = 0;
+ if (dictSize > prefixSize) dictSize = prefixSize;
+ if (safeBuffer == NULL) assert(dictSize == 0);
+ if (dictSize > 0)
+ memmove(safeBuffer, streamPtr->end - dictSize, dictSize);
+ { U32 const endIndex = (U32)(streamPtr->end - streamPtr->base);
+ streamPtr->end = (const BYTE*)safeBuffer + dictSize;
+ streamPtr->base = streamPtr->end - endIndex;
+ streamPtr->dictLimit = endIndex - (U32)dictSize;
+ streamPtr->lowLimit = endIndex - (U32)dictSize;
+ if (streamPtr->nextToUpdate < streamPtr->dictLimit)
+ streamPtr->nextToUpdate = streamPtr->dictLimit;
+ }
+ return dictSize;
+}
+
+
+/***************************************************
+* Deprecated Functions
+***************************************************/
+
+/* These functions currently generate deprecation warnings */
+
+/* Wrappers for deprecated compression functions */
+int LZ4_compressHC(const char* src, char* dst, int srcSize) { return LZ4_compress_HC (src, dst, srcSize, LZ4_compressBound(srcSize), 0); }
+int LZ4_compressHC_limitedOutput(const char* src, char* dst, int srcSize, int maxDstSize) { return LZ4_compress_HC(src, dst, srcSize, maxDstSize, 0); }
+int LZ4_compressHC2(const char* src, char* dst, int srcSize, int cLevel) { return LZ4_compress_HC (src, dst, srcSize, LZ4_compressBound(srcSize), cLevel); }
+int LZ4_compressHC2_limitedOutput(const char* src, char* dst, int srcSize, int maxDstSize, int cLevel) { return LZ4_compress_HC(src, dst, srcSize, maxDstSize, cLevel); }
+int LZ4_compressHC_withStateHC (void* state, const char* src, char* dst, int srcSize) { return LZ4_compress_HC_extStateHC (state, src, dst, srcSize, LZ4_compressBound(srcSize), 0); }
+int LZ4_compressHC_limitedOutput_withStateHC (void* state, const char* src, char* dst, int srcSize, int maxDstSize) { return LZ4_compress_HC_extStateHC (state, src, dst, srcSize, maxDstSize, 0); }
+int LZ4_compressHC2_withStateHC (void* state, const char* src, char* dst, int srcSize, int cLevel) { return LZ4_compress_HC_extStateHC(state, src, dst, srcSize, LZ4_compressBound(srcSize), cLevel); }
+int LZ4_compressHC2_limitedOutput_withStateHC (void* state, const char* src, char* dst, int srcSize, int maxDstSize, int cLevel) { return LZ4_compress_HC_extStateHC(state, src, dst, srcSize, maxDstSize, cLevel); }
+int LZ4_compressHC_continue (LZ4_streamHC_t* ctx, const char* src, char* dst, int srcSize) { return LZ4_compress_HC_continue (ctx, src, dst, srcSize, LZ4_compressBound(srcSize)); }
+int LZ4_compressHC_limitedOutput_continue (LZ4_streamHC_t* ctx, const char* src, char* dst, int srcSize, int maxDstSize) { return LZ4_compress_HC_continue (ctx, src, dst, srcSize, maxDstSize); }
+
+
+/* Deprecated streaming functions */
+int LZ4_sizeofStreamStateHC(void) { return LZ4_STREAMHCSIZE; }
+
+/* state is presumed correctly sized, aka >= sizeof(LZ4_streamHC_t)
+ * @return : 0 on success, !=0 if error */
+int LZ4_resetStreamStateHC(void* state, char* inputBuffer)
+{
+ LZ4_streamHC_t* const hc4 = LZ4_initStreamHC(state, sizeof(*hc4));
+ if (hc4 == NULL) return 1; /* init failed */
+ LZ4HC_init_internal (&hc4->internal_donotuse, (const BYTE*)inputBuffer);
+ return 0;
+}
+
+void* LZ4_createHC (const char* inputBuffer)
+{
+ LZ4_streamHC_t* const hc4 = LZ4_createStreamHC();
+ if (hc4 == NULL) return NULL; /* not enough memory */
+ LZ4HC_init_internal (&hc4->internal_donotuse, (const BYTE*)inputBuffer);
+ return hc4;
+}
+
+int LZ4_freeHC (void* LZ4HC_Data)
+{
+ if (!LZ4HC_Data) return 0; /* support free on NULL */
+ FREEMEM(LZ4HC_Data);
+ return 0;
+}
+
+int LZ4_compressHC2_continue (void* LZ4HC_Data, const char* src, char* dst, int srcSize, int cLevel)
+{
+ return LZ4HC_compress_generic (&((LZ4_streamHC_t*)LZ4HC_Data)->internal_donotuse, src, dst, &srcSize, 0, cLevel, notLimited);
+}
+
+int LZ4_compressHC2_limitedOutput_continue (void* LZ4HC_Data, const char* src, char* dst, int srcSize, int dstCapacity, int cLevel)
+{
+ return LZ4HC_compress_generic (&((LZ4_streamHC_t*)LZ4HC_Data)->internal_donotuse, src, dst, &srcSize, dstCapacity, cLevel, limitedOutput);
+}
+
+char* LZ4_slideInputBufferHC(void* LZ4HC_Data)
+{
+ LZ4_streamHC_t *ctx = (LZ4_streamHC_t*)LZ4HC_Data;
+ const BYTE *bufferStart = ctx->internal_donotuse.base + ctx->internal_donotuse.lowLimit;
+ LZ4_resetStreamHC_fast(ctx, ctx->internal_donotuse.compressionLevel);
+ /* avoid const char * -> char * conversion warning :( */
+ return (char *)(uptrval)bufferStart;
+}
+
+
+/* ================================================
+ * LZ4 Optimal parser (levels [LZ4HC_CLEVEL_OPT_MIN - LZ4HC_CLEVEL_MAX])
+ * ===============================================*/
+typedef struct {
+ int price;
+ int off;
+ int mlen;
+ int litlen;
+} LZ4HC_optimal_t;
+
+/* price in bytes */
+LZ4_FORCE_INLINE int LZ4HC_literalsPrice(int const litlen)
+{
+ int price = litlen;
+ assert(litlen >= 0);
+ if (litlen >= (int)RUN_MASK)
+ price += 1 + ((litlen-(int)RUN_MASK) / 255);
+ return price;
+}
+
+
+/* requires mlen >= MINMATCH */
+LZ4_FORCE_INLINE int LZ4HC_sequencePrice(int litlen, int mlen)
+{
+ int price = 1 + 2 ; /* token + 16-bit offset */
+ assert(litlen >= 0);
+ assert(mlen >= MINMATCH);
+
+ price += LZ4HC_literalsPrice(litlen);
+
+ if (mlen >= (int)(ML_MASK+MINMATCH))
+ price += 1 + ((mlen-(int)(ML_MASK+MINMATCH)) / 255);
+
+ return price;
+}
+
+
+typedef struct {
+ int off;
+ int len;
+} LZ4HC_match_t;
+
+LZ4_FORCE_INLINE LZ4HC_match_t
+LZ4HC_FindLongerMatch(LZ4HC_CCtx_internal* const ctx,
+ const BYTE* ip, const BYTE* const iHighLimit,
+ int minLen, int nbSearches,
+ const dictCtx_directive dict,
+ const HCfavor_e favorDecSpeed)
+{
+ LZ4HC_match_t match = { 0 , 0 };
+ const BYTE* matchPtr = NULL;
+ /* note : LZ4HC_InsertAndGetWiderMatch() is able to modify the starting position of a match (*startpos),
+ * but this won't be the case here, as we define iLowLimit==ip,
+ * so LZ4HC_InsertAndGetWiderMatch() won't be allowed to search past ip */
+ int matchLength = LZ4HC_InsertAndGetWiderMatch(ctx, ip, ip, iHighLimit, minLen, &matchPtr, &ip, nbSearches, 1 /*patternAnalysis*/, 1 /*chainSwap*/, dict, favorDecSpeed);
+ if (matchLength <= minLen) return match;
+ if (favorDecSpeed) {
+ if ((matchLength>18) & (matchLength<=36)) matchLength=18; /* favor shortcut */
+ }
+ match.len = matchLength;
+ match.off = (int)(ip-matchPtr);
+ return match;
+}
+
+
+static int LZ4HC_compress_optimal ( LZ4HC_CCtx_internal* ctx,
+ const char* const source,
+ char* dst,
+ int* srcSizePtr,
+ int dstCapacity,
+ int const nbSearches,
+ size_t sufficient_len,
+ const limitedOutput_directive limit,
+ int const fullUpdate,
+ const dictCtx_directive dict,
+ const HCfavor_e favorDecSpeed)
+{
+ int retval = 0;
+#define TRAILING_LITERALS 3
+#ifdef LZ4HC_HEAPMODE
+ LZ4HC_optimal_t* const opt = (LZ4HC_optimal_t*)ALLOC(sizeof(LZ4HC_optimal_t) * (LZ4_OPT_NUM + TRAILING_LITERALS));
+#else
+ LZ4HC_optimal_t opt[LZ4_OPT_NUM + TRAILING_LITERALS]; /* ~64 KB, which is a bit large for stack... */
+#endif
+
+ const BYTE* ip = (const BYTE*) source;
+ const BYTE* anchor = ip;
+ const BYTE* const iend = ip + *srcSizePtr;
+ const BYTE* const mflimit = iend - MFLIMIT;
+ const BYTE* const matchlimit = iend - LASTLITERALS;
+ BYTE* op = (BYTE*) dst;
+ BYTE* opSaved = (BYTE*) dst;
+ BYTE* oend = op + dstCapacity;
+ int ovml = MINMATCH; /* overflow - last sequence */
+ const BYTE* ovref = NULL;
+
+ /* init */
+#ifdef LZ4HC_HEAPMODE
+ if (opt == NULL) goto _return_label;
+#endif
+ DEBUGLOG(5, "LZ4HC_compress_optimal(dst=%p, dstCapa=%u)", dst, (unsigned)dstCapacity);
+ *srcSizePtr = 0;
+ if (limit == fillOutput) oend -= LASTLITERALS; /* Hack for support LZ4 format restriction */
+ if (sufficient_len >= LZ4_OPT_NUM) sufficient_len = LZ4_OPT_NUM-1;
+
+ /* Main Loop */
+ while (ip <= mflimit) {
+ int const llen = (int)(ip - anchor);
+ int best_mlen, best_off;
+ int cur, last_match_pos = 0;
+
+ LZ4HC_match_t const firstMatch = LZ4HC_FindLongerMatch(ctx, ip, matchlimit, MINMATCH-1, nbSearches, dict, favorDecSpeed);
+ if (firstMatch.len==0) { ip++; continue; }
+
+ if ((size_t)firstMatch.len > sufficient_len) {
+ /* good enough solution : immediate encoding */
+ int const firstML = firstMatch.len;
+ const BYTE* const matchPos = ip - firstMatch.off;
+ opSaved = op;
+ if ( LZ4HC_encodeSequence(UPDATABLE(ip, op, anchor), firstML, matchPos, limit, oend) ) { /* updates ip, op and anchor */
+ ovml = firstML;
+ ovref = matchPos;
+ goto _dest_overflow;
+ }
+ continue;
+ }
+
+ /* set prices for first positions (literals) */
+ { int rPos;
+ for (rPos = 0 ; rPos < MINMATCH ; rPos++) {
+ int const cost = LZ4HC_literalsPrice(llen + rPos);
+ opt[rPos].mlen = 1;
+ opt[rPos].off = 0;
+ opt[rPos].litlen = llen + rPos;
+ opt[rPos].price = cost;
+ DEBUGLOG(7, "rPos:%3i => price:%3i (litlen=%i) -- initial setup",
+ rPos, cost, opt[rPos].litlen);
+ } }
+ /* set prices using initial match */
+ { int mlen = MINMATCH;
+ int const matchML = firstMatch.len; /* necessarily < sufficient_len < LZ4_OPT_NUM */
+ int const offset = firstMatch.off;
+ assert(matchML < LZ4_OPT_NUM);
+ for ( ; mlen <= matchML ; mlen++) {
+ int const cost = LZ4HC_sequencePrice(llen, mlen);
+ opt[mlen].mlen = mlen;
+ opt[mlen].off = offset;
+ opt[mlen].litlen = llen;
+ opt[mlen].price = cost;
+ DEBUGLOG(7, "rPos:%3i => price:%3i (matchlen=%i) -- initial setup",
+ mlen, cost, mlen);
+ } }
+ last_match_pos = firstMatch.len;
+ { int addLit;
+ for (addLit = 1; addLit <= TRAILING_LITERALS; addLit ++) {
+ opt[last_match_pos+addLit].mlen = 1; /* literal */
+ opt[last_match_pos+addLit].off = 0;
+ opt[last_match_pos+addLit].litlen = addLit;
+ opt[last_match_pos+addLit].price = opt[last_match_pos].price + LZ4HC_literalsPrice(addLit);
+ DEBUGLOG(7, "rPos:%3i => price:%3i (litlen=%i) -- initial setup",
+ last_match_pos+addLit, opt[last_match_pos+addLit].price, addLit);
+ } }
+
+ /* check further positions */
+ for (cur = 1; cur < last_match_pos; cur++) {
+ const BYTE* const curPtr = ip + cur;
+ LZ4HC_match_t newMatch;
+
+ if (curPtr > mflimit) break;
+ DEBUGLOG(7, "rPos:%u[%u] vs [%u]%u",
+ cur, opt[cur].price, opt[cur+1].price, cur+1);
+ if (fullUpdate) {
+ /* not useful to search here if next position has same (or lower) cost */
+ if ( (opt[cur+1].price <= opt[cur].price)
+ /* in some cases, next position has same cost, but cost rises sharply after, so a small match would still be beneficial */
+ && (opt[cur+MINMATCH].price < opt[cur].price + 3/*min seq price*/) )
+ continue;
+ } else {
+ /* not useful to search here if next position has same (or lower) cost */
+ if (opt[cur+1].price <= opt[cur].price) continue;
+ }
+
+ DEBUGLOG(7, "search at rPos:%u", cur);
+ if (fullUpdate)
+ newMatch = LZ4HC_FindLongerMatch(ctx, curPtr, matchlimit, MINMATCH-1, nbSearches, dict, favorDecSpeed);
+ else
+ /* only test matches of minimum length; slightly faster, but misses a few bytes */
+ newMatch = LZ4HC_FindLongerMatch(ctx, curPtr, matchlimit, last_match_pos - cur, nbSearches, dict, favorDecSpeed);
+ if (!newMatch.len) continue;
+
+ if ( ((size_t)newMatch.len > sufficient_len)
+ || (newMatch.len + cur >= LZ4_OPT_NUM) ) {
+ /* immediate encoding */
+ best_mlen = newMatch.len;
+ best_off = newMatch.off;
+ last_match_pos = cur + 1;
+ goto encode;
+ }
+
+ /* before match : set price with literals at beginning */
+ { int const baseLitlen = opt[cur].litlen;
+ int litlen;
+ for (litlen = 1; litlen < MINMATCH; litlen++) {
+ int const price = opt[cur].price - LZ4HC_literalsPrice(baseLitlen) + LZ4HC_literalsPrice(baseLitlen+litlen);
+ int const pos = cur + litlen;
+ if (price < opt[pos].price) {
+ opt[pos].mlen = 1; /* literal */
+ opt[pos].off = 0;
+ opt[pos].litlen = baseLitlen+litlen;
+ opt[pos].price = price;
+ DEBUGLOG(7, "rPos:%3i => price:%3i (litlen=%i)",
+ pos, price, opt[pos].litlen);
+ } } }
+
+ /* set prices using match at position = cur */
+ { int const matchML = newMatch.len;
+ int ml = MINMATCH;
+
+ assert(cur + newMatch.len < LZ4_OPT_NUM);
+ for ( ; ml <= matchML ; ml++) {
+ int const pos = cur + ml;
+ int const offset = newMatch.off;
+ int price;
+ int ll;
+ DEBUGLOG(7, "testing price rPos %i (last_match_pos=%i)",
+ pos, last_match_pos);
+ if (opt[cur].mlen == 1) {
+ ll = opt[cur].litlen;
+ price = ((cur > ll) ? opt[cur - ll].price : 0)
+ + LZ4HC_sequencePrice(ll, ml);
+ } else {
+ ll = 0;
+ price = opt[cur].price + LZ4HC_sequencePrice(0, ml);
+ }
+
+ assert((U32)favorDecSpeed <= 1);
+ if (pos > last_match_pos+TRAILING_LITERALS
+ || price <= opt[pos].price - (int)favorDecSpeed) {
+ DEBUGLOG(7, "rPos:%3i => price:%3i (matchlen=%i)",
+ pos, price, ml);
+ assert(pos < LZ4_OPT_NUM);
+ if ( (ml == matchML) /* last pos of last match */
+ && (last_match_pos < pos) )
+ last_match_pos = pos;
+ opt[pos].mlen = ml;
+ opt[pos].off = offset;
+ opt[pos].litlen = ll;
+ opt[pos].price = price;
+ } } }
+ /* complete following positions with literals */
+ { int addLit;
+ for (addLit = 1; addLit <= TRAILING_LITERALS; addLit ++) {
+ opt[last_match_pos+addLit].mlen = 1; /* literal */
+ opt[last_match_pos+addLit].off = 0;
+ opt[last_match_pos+addLit].litlen = addLit;
+ opt[last_match_pos+addLit].price = opt[last_match_pos].price + LZ4HC_literalsPrice(addLit);
+ DEBUGLOG(7, "rPos:%3i => price:%3i (litlen=%i)", last_match_pos+addLit, opt[last_match_pos+addLit].price, addLit);
+ } }
+ } /* for (cur = 1; cur <= last_match_pos; cur++) */
+
+ assert(last_match_pos < LZ4_OPT_NUM + TRAILING_LITERALS);
+ best_mlen = opt[last_match_pos].mlen;
+ best_off = opt[last_match_pos].off;
+ cur = last_match_pos - best_mlen;
+
+encode: /* cur, last_match_pos, best_mlen, best_off must be set */
+ assert(cur < LZ4_OPT_NUM);
+ assert(last_match_pos >= 1); /* == 1 when only one candidate */
+ DEBUGLOG(6, "reverse traversal, looking for shortest path (last_match_pos=%i)", last_match_pos);
+ { int candidate_pos = cur;
+ int selected_matchLength = best_mlen;
+ int selected_offset = best_off;
+ while (1) { /* from end to beginning */
+ int const next_matchLength = opt[candidate_pos].mlen; /* can be 1, means literal */
+ int const next_offset = opt[candidate_pos].off;
+ DEBUGLOG(7, "pos %i: sequence length %i", candidate_pos, selected_matchLength);
+ opt[candidate_pos].mlen = selected_matchLength;
+ opt[candidate_pos].off = selected_offset;
+ selected_matchLength = next_matchLength;
+ selected_offset = next_offset;
+ if (next_matchLength > candidate_pos) break; /* last match elected, first match to encode */
+ assert(next_matchLength > 0); /* can be 1, means literal */
+ candidate_pos -= next_matchLength;
+ } }
+
+ /* encode all recorded sequences in order */
+ { int rPos = 0; /* relative position (to ip) */
+ while (rPos < last_match_pos) {
+ int const ml = opt[rPos].mlen;
+ int const offset = opt[rPos].off;
+ if (ml == 1) { ip++; rPos++; continue; } /* literal; note: can end up with several literals, in which case, skip them */
+ rPos += ml;
+ assert(ml >= MINMATCH);
+ assert((offset >= 1) && (offset <= LZ4_DISTANCE_MAX));
+ opSaved = op;
+ if ( LZ4HC_encodeSequence(UPDATABLE(ip, op, anchor), ml, ip - offset, limit, oend) ) { /* updates ip, op and anchor */
+ ovml = ml;
+ ovref = ip - offset;
+ goto _dest_overflow;
+ } } }
+ } /* while (ip <= mflimit) */
+
+_last_literals:
+ /* Encode Last Literals */
+ { size_t lastRunSize = (size_t)(iend - anchor); /* literals */
+ size_t llAdd = (lastRunSize + 255 - RUN_MASK) / 255;
+ size_t const totalSize = 1 + llAdd + lastRunSize;
+ if (limit == fillOutput) oend += LASTLITERALS; /* restore correct value */
+ if (limit && (op + totalSize > oend)) {
+ if (limit == limitedOutput) { /* Check output limit */
+ retval = 0;
+ goto _return_label;
+ }
+ /* adapt lastRunSize to fill 'dst' */
+ lastRunSize = (size_t)(oend - op) - 1 /*token*/;
+ llAdd = (lastRunSize + 256 - RUN_MASK) / 256;
+ lastRunSize -= llAdd;
+ }
+ DEBUGLOG(6, "Final literal run : %i literals", (int)lastRunSize);
+ ip = anchor + lastRunSize; /* can be != iend if limit==fillOutput */
+
+ if (lastRunSize >= RUN_MASK) {
+ size_t accumulator = lastRunSize - RUN_MASK;
+ *op++ = (RUN_MASK << ML_BITS);
+ for(; accumulator >= 255 ; accumulator -= 255) *op++ = 255;
+ *op++ = (BYTE) accumulator;
+ } else {
+ *op++ = (BYTE)(lastRunSize << ML_BITS);
+ }
+ memcpy(op, anchor, lastRunSize);
+ op += lastRunSize;
+ }
+
+ /* End */
+ *srcSizePtr = (int) (((const char*)ip) - source);
+ retval = (int) ((char*)op-dst);
+ goto _return_label;
+
+_dest_overflow:
+if (limit == fillOutput) {
+ /* Assumption : ip, anchor, ovml and ovref must be set correctly */
+ size_t const ll = (size_t)(ip - anchor);
+ size_t const ll_addbytes = (ll + 240) / 255;
+ size_t const ll_totalCost = 1 + ll_addbytes + ll;
+ BYTE* const maxLitPos = oend - 3; /* 2 for offset, 1 for token */
+ DEBUGLOG(6, "Last sequence overflowing (only %i bytes remaining)", (int)(oend-1-opSaved));
+ op = opSaved; /* restore correct out pointer */
+ if (op + ll_totalCost <= maxLitPos) {
+ /* ll validated; now adjust match length */
+ size_t const bytesLeftForMl = (size_t)(maxLitPos - (op+ll_totalCost));
+ size_t const maxMlSize = MINMATCH + (ML_MASK-1) + (bytesLeftForMl * 255);
+ assert(maxMlSize < INT_MAX); assert(ovml >= 0);
+ if ((size_t)ovml > maxMlSize) ovml = (int)maxMlSize;
+ if ((oend + LASTLITERALS) - (op + ll_totalCost + 2) - 1 + ovml >= MFLIMIT) {
+ DEBUGLOG(6, "Space to end : %i + ml (%i)", (int)((oend + LASTLITERALS) - (op + ll_totalCost + 2) - 1), ovml);
+ DEBUGLOG(6, "Before : ip = %p, anchor = %p", ip, anchor);
+ LZ4HC_encodeSequence(UPDATABLE(ip, op, anchor), ovml, ovref, notLimited, oend);
+ DEBUGLOG(6, "After : ip = %p, anchor = %p", ip, anchor);
+ } }
+ goto _last_literals;
+}
+_return_label:
+#ifdef LZ4HC_HEAPMODE
+ FREEMEM(opt);
+#endif
+ return retval;
+}
+
+}
diff --git a/3rdparty/tracy/tracy/common/tracy_lz4hc.hpp b/3rdparty/tracy/tracy/common/tracy_lz4hc.hpp
new file mode 100644
index 0000000..18bf30d
--- /dev/null
+++ b/3rdparty/tracy/tracy/common/tracy_lz4hc.hpp
@@ -0,0 +1,405 @@
+/*
+ LZ4 HC - High Compression Mode of LZ4
+ Header File
+ Copyright (C) 2011-2017, Yann Collet.
+ BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
+
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions are
+ met:
+
+ * Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+ * Redistributions in binary form must reproduce the above
+ copyright notice, this list of conditions and the following disclaimer
+ in the documentation and/or other materials provided with the
+ distribution.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+ You can contact the author at :
+ - LZ4 source repository : https://github.com/lz4/lz4
+ - LZ4 public forum : https://groups.google.com/forum/#!forum/lz4c
+*/
+#ifndef TRACY_LZ4_HC_H_19834876238432
+#define TRACY_LZ4_HC_H_19834876238432
+
+/* --- Dependency --- */
+/* note : lz4hc requires lz4.h/lz4.c for compilation */
+#include "tracy_lz4.hpp" /* stddef, LZ4LIB_API, LZ4_DEPRECATED */
+
+
+/* --- Useful constants --- */
+#define LZ4HC_CLEVEL_MIN 3
+#define LZ4HC_CLEVEL_DEFAULT 9
+#define LZ4HC_CLEVEL_OPT_MIN 10
+#define LZ4HC_CLEVEL_MAX 12
+
+namespace tracy
+{
+
+/*-************************************
+ * Block Compression
+ **************************************/
+/*! LZ4_compress_HC() :
+ * Compress data from `src` into `dst`, using the powerful but slower "HC" algorithm.
+ * `dst` must be already allocated.
+ * Compression is guaranteed to succeed if `dstCapacity >= LZ4_compressBound(srcSize)` (see "lz4.h")
+ * Max supported `srcSize` value is LZ4_MAX_INPUT_SIZE (see "lz4.h")
+ * `compressionLevel` : any value between 1 and LZ4HC_CLEVEL_MAX will work.
+ * Values > LZ4HC_CLEVEL_MAX behave the same as LZ4HC_CLEVEL_MAX.
+ * @return : the number of bytes written into 'dst'
+ * or 0 if compression fails.
+ */
+LZ4LIB_API int LZ4_compress_HC (const char* src, char* dst, int srcSize, int dstCapacity, int compressionLevel);
+
+
+/* Note :
+ * Decompression functions are provided within "lz4.h" (BSD license)
+ */
+
+
+/*! LZ4_compress_HC_extStateHC() :
+ * Same as LZ4_compress_HC(), but using an externally allocated memory segment for `state`.
+ * `state` size is provided by LZ4_sizeofStateHC().
+ * Memory segment must be aligned on 8-bytes boundaries (which a normal malloc() should do properly).
+ */
+LZ4LIB_API int LZ4_sizeofStateHC(void);
+LZ4LIB_API int LZ4_compress_HC_extStateHC(void* stateHC, const char* src, char* dst, int srcSize, int maxDstSize, int compressionLevel);
+
+
+/*! LZ4_compress_HC_destSize() : v1.9.0+
+ * Will compress as much data as possible from `src`
+ * to fit into `targetDstSize` budget.
+ * Result is provided in 2 parts :
+ * @return : the number of bytes written into 'dst' (necessarily <= targetDstSize)
+ * or 0 if compression fails.
+ * `srcSizePtr` : on success, *srcSizePtr is updated to indicate how much bytes were read from `src`
+ */
+LZ4LIB_API int LZ4_compress_HC_destSize(void* stateHC,
+ const char* src, char* dst,
+ int* srcSizePtr, int targetDstSize,
+ int compressionLevel);
+
+
+/*-************************************
+ * Streaming Compression
+ * Bufferless synchronous API
+ **************************************/
+ typedef union LZ4_streamHC_u LZ4_streamHC_t; /* incomplete type (defined later) */
+
+/*! LZ4_createStreamHC() and LZ4_freeStreamHC() :
+ * These functions create and release memory for LZ4 HC streaming state.
+ * Newly created states are automatically initialized.
+ * A same state can be used multiple times consecutively,
+ * starting with LZ4_resetStreamHC_fast() to start a new stream of blocks.
+ */
+LZ4LIB_API LZ4_streamHC_t* LZ4_createStreamHC(void);
+LZ4LIB_API int LZ4_freeStreamHC (LZ4_streamHC_t* streamHCPtr);
+
+/*
+ These functions compress data in successive blocks of any size,
+ using previous blocks as dictionary, to improve compression ratio.
+ One key assumption is that previous blocks (up to 64 KB) remain read-accessible while compressing next blocks.
+ There is an exception for ring buffers, which can be smaller than 64 KB.
+ Ring-buffer scenario is automatically detected and handled within LZ4_compress_HC_continue().
+
+ Before starting compression, state must be allocated and properly initialized.
+ LZ4_createStreamHC() does both, though compression level is set to LZ4HC_CLEVEL_DEFAULT.
+
+ Selecting the compression level can be done with LZ4_resetStreamHC_fast() (starts a new stream)
+ or LZ4_setCompressionLevel() (anytime, between blocks in the same stream) (experimental).
+ LZ4_resetStreamHC_fast() only works on states which have been properly initialized at least once,
+ which is automatically the case when state is created using LZ4_createStreamHC().
+
+ After reset, a first "fictional block" can be designated as initial dictionary,
+ using LZ4_loadDictHC() (Optional).
+
+ Invoke LZ4_compress_HC_continue() to compress each successive block.
+ The number of blocks is unlimited.
+ Previous input blocks, including initial dictionary when present,
+ must remain accessible and unmodified during compression.
+
+ It's allowed to update compression level anytime between blocks,
+ using LZ4_setCompressionLevel() (experimental).
+
+ 'dst' buffer should be sized to handle worst case scenarios
+ (see LZ4_compressBound(), it ensures compression success).
+ In case of failure, the API does not guarantee recovery,
+ so the state _must_ be reset.
+ To ensure compression success
+ whenever `dst` buffer size cannot be made >= LZ4_compressBound(),
+ consider using LZ4_compress_HC_continue_destSize().
+
+ Whenever previous input blocks can't be preserved unmodified in-place during compression of next blocks,
+ it's possible to copy the last blocks into a more stable memory space, using LZ4_saveDictHC().
+ Return value of LZ4_saveDictHC() is the size of dictionary effectively saved into 'safeBuffer' (<= 64 KB)
+
+ After completing a streaming compression,
+ it's possible to start a new stream of blocks, using the same LZ4_streamHC_t state,
+ just by resetting it, using LZ4_resetStreamHC_fast().
+*/
+
+LZ4LIB_API void LZ4_resetStreamHC_fast(LZ4_streamHC_t* streamHCPtr, int compressionLevel); /* v1.9.0+ */
+LZ4LIB_API int LZ4_loadDictHC (LZ4_streamHC_t* streamHCPtr, const char* dictionary, int dictSize);
+
+LZ4LIB_API int LZ4_compress_HC_continue (LZ4_streamHC_t* streamHCPtr,
+ const char* src, char* dst,
+ int srcSize, int maxDstSize);
+
+/*! LZ4_compress_HC_continue_destSize() : v1.9.0+
+ * Similar to LZ4_compress_HC_continue(),
+ * but will read as much data as possible from `src`
+ * to fit into `targetDstSize` budget.
+ * Result is provided into 2 parts :
+ * @return : the number of bytes written into 'dst' (necessarily <= targetDstSize)
+ * or 0 if compression fails.
+ * `srcSizePtr` : on success, *srcSizePtr will be updated to indicate how much bytes were read from `src`.
+ * Note that this function may not consume the entire input.
+ */
+LZ4LIB_API int LZ4_compress_HC_continue_destSize(LZ4_streamHC_t* LZ4_streamHCPtr,
+ const char* src, char* dst,
+ int* srcSizePtr, int targetDstSize);
+
+LZ4LIB_API int LZ4_saveDictHC (LZ4_streamHC_t* streamHCPtr, char* safeBuffer, int maxDictSize);
+
+
+
+/*^**********************************************
+ * !!!!!! STATIC LINKING ONLY !!!!!!
+ ***********************************************/
+
+/*-******************************************************************
+ * PRIVATE DEFINITIONS :
+ * Do not use these definitions directly.
+ * They are merely exposed to allow static allocation of `LZ4_streamHC_t`.
+ * Declare an `LZ4_streamHC_t` directly, rather than any type below.
+ * Even then, only do so in the context of static linking, as definitions may change between versions.
+ ********************************************************************/
+
+#define LZ4HC_DICTIONARY_LOGSIZE 16
+#define LZ4HC_MAXD (1<<LZ4HC_DICTIONARY_LOGSIZE)
+#define LZ4HC_MAXD_MASK (LZ4HC_MAXD - 1)
+
+#define LZ4HC_HASH_LOG 15
+#define LZ4HC_HASHTABLESIZE (1 << LZ4HC_HASH_LOG)
+#define LZ4HC_HASH_MASK (LZ4HC_HASHTABLESIZE - 1)
+
+
+typedef struct LZ4HC_CCtx_internal LZ4HC_CCtx_internal;
+struct LZ4HC_CCtx_internal
+{
+ LZ4_u32 hashTable[LZ4HC_HASHTABLESIZE];
+ LZ4_u16 chainTable[LZ4HC_MAXD];
+ const LZ4_byte* end; /* next block here to continue on current prefix */
+ const LZ4_byte* base; /* All index relative to this position */
+ const LZ4_byte* dictBase; /* alternate base for extDict */
+ LZ4_u32 dictLimit; /* below that point, need extDict */
+ LZ4_u32 lowLimit; /* below that point, no more dict */
+ LZ4_u32 nextToUpdate; /* index from which to continue dictionary update */
+ short compressionLevel;
+ LZ4_i8 favorDecSpeed; /* favor decompression speed if this flag set,
+ otherwise, favor compression ratio */
+ LZ4_i8 dirty; /* stream has to be fully reset if this flag is set */
+ const LZ4HC_CCtx_internal* dictCtx;
+};
+
+
+/* Do not use these definitions directly !
+ * Declare or allocate an LZ4_streamHC_t instead.
+ */
+#define LZ4_STREAMHCSIZE 262200 /* static size, for inter-version compatibility */
+#define LZ4_STREAMHCSIZE_VOIDP (LZ4_STREAMHCSIZE / sizeof(void*))
+union LZ4_streamHC_u {
+ void* table[LZ4_STREAMHCSIZE_VOIDP];
+ LZ4HC_CCtx_internal internal_donotuse;
+}; /* previously typedef'd to LZ4_streamHC_t */
+
+/* LZ4_streamHC_t :
+ * This structure allows static allocation of LZ4 HC streaming state.
+ * This can be used to allocate statically, on state, or as part of a larger structure.
+ *
+ * Such state **must** be initialized using LZ4_initStreamHC() before first use.
+ *
+ * Note that invoking LZ4_initStreamHC() is not required when
+ * the state was created using LZ4_createStreamHC() (which is recommended).
+ * Using the normal builder, a newly created state is automatically initialized.
+ *
+ * Static allocation shall only be used in combination with static linking.
+ */
+
+/* LZ4_initStreamHC() : v1.9.0+
+ * Required before first use of a statically allocated LZ4_streamHC_t.
+ * Before v1.9.0 : use LZ4_resetStreamHC() instead
+ */
+LZ4LIB_API LZ4_streamHC_t* LZ4_initStreamHC (void* buffer, size_t size);
+
+
+/*-************************************
+* Deprecated Functions
+**************************************/
+/* see lz4.h LZ4_DISABLE_DEPRECATE_WARNINGS to turn off deprecation warnings */
+
+/* deprecated compression functions */
+LZ4_DEPRECATED("use LZ4_compress_HC() instead") LZ4LIB_API int LZ4_compressHC (const char* source, char* dest, int inputSize);
+LZ4_DEPRECATED("use LZ4_compress_HC() instead") LZ4LIB_API int LZ4_compressHC_limitedOutput (const char* source, char* dest, int inputSize, int maxOutputSize);
+LZ4_DEPRECATED("use LZ4_compress_HC() instead") LZ4LIB_API int LZ4_compressHC2 (const char* source, char* dest, int inputSize, int compressionLevel);
+LZ4_DEPRECATED("use LZ4_compress_HC() instead") LZ4LIB_API int LZ4_compressHC2_limitedOutput(const char* source, char* dest, int inputSize, int maxOutputSize, int compressionLevel);
+LZ4_DEPRECATED("use LZ4_compress_HC_extStateHC() instead") LZ4LIB_API int LZ4_compressHC_withStateHC (void* state, const char* source, char* dest, int inputSize);
+LZ4_DEPRECATED("use LZ4_compress_HC_extStateHC() instead") LZ4LIB_API int LZ4_compressHC_limitedOutput_withStateHC (void* state, const char* source, char* dest, int inputSize, int maxOutputSize);
+LZ4_DEPRECATED("use LZ4_compress_HC_extStateHC() instead") LZ4LIB_API int LZ4_compressHC2_withStateHC (void* state, const char* source, char* dest, int inputSize, int compressionLevel);
+LZ4_DEPRECATED("use LZ4_compress_HC_extStateHC() instead") LZ4LIB_API int LZ4_compressHC2_limitedOutput_withStateHC(void* state, const char* source, char* dest, int inputSize, int maxOutputSize, int compressionLevel);
+LZ4_DEPRECATED("use LZ4_compress_HC_continue() instead") LZ4LIB_API int LZ4_compressHC_continue (LZ4_streamHC_t* LZ4_streamHCPtr, const char* source, char* dest, int inputSize);
+LZ4_DEPRECATED("use LZ4_compress_HC_continue() instead") LZ4LIB_API int LZ4_compressHC_limitedOutput_continue (LZ4_streamHC_t* LZ4_streamHCPtr, const char* source, char* dest, int inputSize, int maxOutputSize);
+
+/* Obsolete streaming functions; degraded functionality; do not use!
+ *
+ * In order to perform streaming compression, these functions depended on data
+ * that is no longer tracked in the state. They have been preserved as well as
+ * possible: using them will still produce a correct output. However, use of
+ * LZ4_slideInputBufferHC() will truncate the history of the stream, rather
+ * than preserve a window-sized chunk of history.
+ */
+LZ4_DEPRECATED("use LZ4_createStreamHC() instead") LZ4LIB_API void* LZ4_createHC (const char* inputBuffer);
+LZ4_DEPRECATED("use LZ4_saveDictHC() instead") LZ4LIB_API char* LZ4_slideInputBufferHC (void* LZ4HC_Data);
+LZ4_DEPRECATED("use LZ4_freeStreamHC() instead") LZ4LIB_API int LZ4_freeHC (void* LZ4HC_Data);
+LZ4_DEPRECATED("use LZ4_compress_HC_continue() instead") LZ4LIB_API int LZ4_compressHC2_continue (void* LZ4HC_Data, const char* source, char* dest, int inputSize, int compressionLevel);
+LZ4_DEPRECATED("use LZ4_compress_HC_continue() instead") LZ4LIB_API int LZ4_compressHC2_limitedOutput_continue (void* LZ4HC_Data, const char* source, char* dest, int inputSize, int maxOutputSize, int compressionLevel);
+LZ4_DEPRECATED("use LZ4_createStreamHC() instead") LZ4LIB_API int LZ4_sizeofStreamStateHC(void);
+LZ4_DEPRECATED("use LZ4_initStreamHC() instead") LZ4LIB_API int LZ4_resetStreamStateHC(void* state, char* inputBuffer);
+
+
+/* LZ4_resetStreamHC() is now replaced by LZ4_initStreamHC().
+ * The intention is to emphasize the difference with LZ4_resetStreamHC_fast(),
+ * which is now the recommended function to start a new stream of blocks,
+ * but cannot be used to initialize a memory segment containing arbitrary garbage data.
+ *
+ * It is recommended to switch to LZ4_initStreamHC().
+ * LZ4_resetStreamHC() will generate deprecation warnings in a future version.
+ */
+LZ4LIB_API void LZ4_resetStreamHC (LZ4_streamHC_t* streamHCPtr, int compressionLevel);
+
+}
+
+#endif /* LZ4_HC_H_19834876238432 */
+
+
+/*-**************************************************
+ * !!!!! STATIC LINKING ONLY !!!!!
+ * Following definitions are considered experimental.
+ * They should not be linked from DLL,
+ * as there is no guarantee of API stability yet.
+ * Prototypes will be promoted to "stable" status
+ * after successfull usage in real-life scenarios.
+ ***************************************************/
+#ifdef LZ4_HC_STATIC_LINKING_ONLY /* protection macro */
+#ifndef TRACY_LZ4_HC_SLO_098092834
+#define TRACY_LZ4_HC_SLO_098092834
+
+#define LZ4_STATIC_LINKING_ONLY /* LZ4LIB_STATIC_API */
+#include "tracy_lz4.hpp"
+
+namespace tracy
+{
+
+/*! LZ4_setCompressionLevel() : v1.8.0+ (experimental)
+ * It's possible to change compression level
+ * between successive invocations of LZ4_compress_HC_continue*()
+ * for dynamic adaptation.
+ */
+LZ4LIB_STATIC_API void LZ4_setCompressionLevel(
+ LZ4_streamHC_t* LZ4_streamHCPtr, int compressionLevel);
+
+/*! LZ4_favorDecompressionSpeed() : v1.8.2+ (experimental)
+ * Opt. Parser will favor decompression speed over compression ratio.
+ * Only applicable to levels >= LZ4HC_CLEVEL_OPT_MIN.
+ */
+LZ4LIB_STATIC_API void LZ4_favorDecompressionSpeed(
+ LZ4_streamHC_t* LZ4_streamHCPtr, int favor);
+
+/*! LZ4_resetStreamHC_fast() : v1.9.0+
+ * When an LZ4_streamHC_t is known to be in a internally coherent state,
+ * it can often be prepared for a new compression with almost no work, only
+ * sometimes falling back to the full, expensive reset that is always required
+ * when the stream is in an indeterminate state (i.e., the reset performed by
+ * LZ4_resetStreamHC()).
+ *
+ * LZ4_streamHCs are guaranteed to be in a valid state when:
+ * - returned from LZ4_createStreamHC()
+ * - reset by LZ4_resetStreamHC()
+ * - memset(stream, 0, sizeof(LZ4_streamHC_t))
+ * - the stream was in a valid state and was reset by LZ4_resetStreamHC_fast()
+ * - the stream was in a valid state and was then used in any compression call
+ * that returned success
+ * - the stream was in an indeterminate state and was used in a compression
+ * call that fully reset the state (LZ4_compress_HC_extStateHC()) and that
+ * returned success
+ *
+ * Note:
+ * A stream that was last used in a compression call that returned an error
+ * may be passed to this function. However, it will be fully reset, which will
+ * clear any existing history and settings from the context.
+ */
+LZ4LIB_STATIC_API void LZ4_resetStreamHC_fast(
+ LZ4_streamHC_t* LZ4_streamHCPtr, int compressionLevel);
+
+/*! LZ4_compress_HC_extStateHC_fastReset() :
+ * A variant of LZ4_compress_HC_extStateHC().
+ *
+ * Using this variant avoids an expensive initialization step. It is only safe
+ * to call if the state buffer is known to be correctly initialized already
+ * (see above comment on LZ4_resetStreamHC_fast() for a definition of
+ * "correctly initialized"). From a high level, the difference is that this
+ * function initializes the provided state with a call to
+ * LZ4_resetStreamHC_fast() while LZ4_compress_HC_extStateHC() starts with a
+ * call to LZ4_resetStreamHC().
+ */
+LZ4LIB_STATIC_API int LZ4_compress_HC_extStateHC_fastReset (
+ void* state,
+ const char* src, char* dst,
+ int srcSize, int dstCapacity,
+ int compressionLevel);
+
+/*! LZ4_attach_HC_dictionary() :
+ * This is an experimental API that allows for the efficient use of a
+ * static dictionary many times.
+ *
+ * Rather than re-loading the dictionary buffer into a working context before
+ * each compression, or copying a pre-loaded dictionary's LZ4_streamHC_t into a
+ * working LZ4_streamHC_t, this function introduces a no-copy setup mechanism,
+ * in which the working stream references the dictionary stream in-place.
+ *
+ * Several assumptions are made about the state of the dictionary stream.
+ * Currently, only streams which have been prepared by LZ4_loadDictHC() should
+ * be expected to work.
+ *
+ * Alternatively, the provided dictionary stream pointer may be NULL, in which
+ * case any existing dictionary stream is unset.
+ *
+ * A dictionary should only be attached to a stream without any history (i.e.,
+ * a stream that has just been reset).
+ *
+ * The dictionary will remain attached to the working stream only for the
+ * current stream session. Calls to LZ4_resetStreamHC(_fast) will remove the
+ * dictionary context association from the working stream. The dictionary
+ * stream (and source buffer) must remain in-place / accessible / unchanged
+ * through the lifetime of the stream session.
+ */
+LZ4LIB_STATIC_API void LZ4_attach_HC_dictionary(
+ LZ4_streamHC_t *working_stream,
+ const LZ4_streamHC_t *dictionary_stream);
+
+}
+
+#endif /* LZ4_HC_SLO_098092834 */
+#endif /* LZ4_HC_STATIC_LINKING_ONLY */
diff --git a/3rdparty/tracy/tracy/common/unix-release.mk b/3rdparty/tracy/tracy/common/unix-release.mk
new file mode 100644
index 0000000..07ac7d6
--- /dev/null
+++ b/3rdparty/tracy/tracy/common/unix-release.mk
@@ -0,0 +1,13 @@
+ARCH := $(shell uname -m)
+
+ifeq (0,$(shell $(CC) --version | grep clang && echo 1 || echo 0))
+CFLAGS += -s
+else
+LDFLAGS := -s
+endif
+
+ifneq (,$(filter $(ARCH),aarch64 arm64))
+CFLAGS += -mcpu=native
+else
+CFLAGS += -march=native
+endif
diff --git a/3rdparty/tracy/tracy/common/unix.mk b/3rdparty/tracy/tracy/common/unix.mk
new file mode 100644
index 0000000..0105093
--- /dev/null
+++ b/3rdparty/tracy/tracy/common/unix.mk
@@ -0,0 +1,82 @@
+# Common code needed by most Tracy Unix Makefiles.
+
+# Ensure these are simply-substituted variables, without changing their values.
+LIBS := $(LIBS)
+
+# Tracy does not use TBB directly, but the implementation of parallel algorithms
+# in some versions of libstdc++ depends on TBB. When it does, you must
+# explicitly link against -ltbb.
+#
+# Some distributions have pgk-config files for TBB, others don't.
+ifeq (0,$(shell pkg-config --libs tbb >/dev/null 2>&1; echo $$?))
+ LIBS += $(shell pkg-config --libs tbb)
+else ifeq (0,$(shell ld -ltbb -o /dev/null 2>/dev/null; echo $$?))
+ LIBS += -ltbb
+endif
+
+OBJDIRBASE := obj/$(BUILD)
+OBJDIR := $(OBJDIRBASE)/o/o/o
+
+OBJ := $(addprefix $(OBJDIR)/,$(SRC:%.cpp=%.o))
+OBJ2 := $(addprefix $(OBJDIR)/,$(SRC2:%.c=%.o))
+OBJ3 := $(addprefix $(OBJDIR)/,$(SRC3:%.m=%.o))
+OBJ4 := $(addprefix $(OBJDIR)/,$(SRC4:%.S=%.o))
+
+all: $(IMAGE)
+
+$(OBJDIR)/%.o: %.cpp
+ $(CXX) -c $(INCLUDES) $(CXXFLAGS) $(DEFINES) $< -o $@
+
+$(OBJDIR)/%.d : %.cpp
+ @echo Resolving dependencies of $<
+ @mkdir -p $(@D)
+ @$(CXX) -MM $(INCLUDES) $(CXXFLAGS) $(DEFINES) $< > $@.$$$$; \
+ sed 's,.*\.o[ :]*,$(OBJDIR)/$(<:.cpp=.o) $@ : ,g' < $@.$$$$ > $@; \
+ rm -f $@.$$$$
+
+$(OBJDIR)/%.o: %.c
+ $(CC) -c $(INCLUDES) $(CFLAGS) $(DEFINES) $< -o $@
+
+$(OBJDIR)/%.d : %.c
+ @echo Resolving dependencies of $<
+ @mkdir -p $(@D)
+ @$(CC) -MM $(INCLUDES) $(CFLAGS) $(DEFINES) $< > $@.$$$$; \
+ sed 's,.*\.o[ :]*,$(OBJDIR)/$(<:.c=.o) $@ : ,g' < $@.$$$$ > $@; \
+ rm -f $@.$$$$
+
+$(OBJDIR)/%.o: %.m
+ $(CC) -c $(INCLUDES) $(CFLAGS) $(DEFINES) $< -o $@
+
+$(OBJDIR)/%.d : %.m
+ @echo Resolving dependencies of $<
+ @mkdir -p $(@D)
+ @$(CC) -MM $(INCLUDES) $(CFLAGS) $(DEFINES) $< > $@.$$$$; \
+ sed 's,.*\.o[ :]*,$(OBJDIR)/$(<:.m=.o) $@ : ,g' < $@.$$$$ > $@; \
+ rm -f $@.$$$$
+
+$(OBJDIR)/%.o: %.S
+ $(CC) -c $(INCLUDES) $(CFLAGS) $(DEFINES) $< -o $@
+
+$(OBJDIR)/%.d : %.S
+ @echo Resolving dependencies of $<
+ @mkdir -p $(@D)
+ @$(CC) -MM $(INCLUDES) $(CFLAGS) $(DEFINES) $< > $@.$$$$; \
+ sed 's,.*\.o[ :]*,$(OBJDIR)/$(<:.m=.o) $@ : ,g' < $@.$$$$ > $@; \
+ rm -f $@.$$$$
+
+ifeq (yes,$(SHARED_LIBRARY))
+$(IMAGE): $(OBJ) $(OBJ2) $(OBJ4)
+ $(CXX) $(CXXFLAGS) $(LDFLAGS) $(DEFINES) $(OBJ) $(OBJ2) $(OBJ4) $(LIBS) -shared -o $@
+else
+$(IMAGE): $(OBJ) $(OBJ2) $(OBJ3) $(OBJ4)
+ $(CXX) $(CXXFLAGS) $(LDFLAGS) $(DEFINES) $(OBJ) $(OBJ2) $(OBJ3) $(OBJ4) $(LIBS) -o $@
+endif
+
+ifneq "$(MAKECMDGOALS)" "clean"
+-include $(addprefix $(OBJDIR)/,$(SRC:.cpp=.d)) $(addprefix $(OBJDIR)/,$(SRC2:.c=.d)) $(addprefix $(OBJDIR)/,$(SRC3:.m=.d)) $(addprefix $(OBJDIR)/,$(SRC4:.S=.d))
+endif
+
+clean:
+ rm -rf $(OBJDIRBASE) $(IMAGE)*
+
+.PHONY: clean all