diff options
Diffstat (limited to '3rdparty/tracy/tracy/common')
23 files changed, 8667 insertions, 0 deletions
diff --git a/3rdparty/tracy/tracy/common/TracyAlign.hpp b/3rdparty/tracy/tracy/common/TracyAlign.hpp new file mode 100644 index 0000000..c3531ba --- /dev/null +++ b/3rdparty/tracy/tracy/common/TracyAlign.hpp @@ -0,0 +1,27 @@ +#ifndef __TRACYALIGN_HPP__ +#define __TRACYALIGN_HPP__ + +#include <string.h> + +#include "TracyForceInline.hpp" + +namespace tracy +{ + +template<typename T> +tracy_force_inline T MemRead( const void* ptr ) +{ + T val; + memcpy( &val, ptr, sizeof( T ) ); + return val; +} + +template<typename T> +tracy_force_inline void MemWrite( void* ptr, T val ) +{ + memcpy( ptr, &val, sizeof( T ) ); +} + +} + +#endif diff --git a/3rdparty/tracy/tracy/common/TracyAlloc.hpp b/3rdparty/tracy/tracy/common/TracyAlloc.hpp new file mode 100644 index 0000000..d3dec12 --- /dev/null +++ b/3rdparty/tracy/tracy/common/TracyAlloc.hpp @@ -0,0 +1,69 @@ +#ifndef __TRACYALLOC_HPP__ +#define __TRACYALLOC_HPP__ + +#include <stdlib.h> + +#ifdef TRACY_ENABLE +# include "TracyApi.h" +# include "TracyForceInline.hpp" +# include "../client/tracy_rpmalloc.hpp" +#endif + +namespace tracy +{ + +#ifdef TRACY_ENABLE +TRACY_API void InitRpmalloc(); +#endif + +static inline void* tracy_malloc( size_t size ) +{ +#ifdef TRACY_ENABLE + InitRpmalloc(); + return rpmalloc( size ); +#else + return malloc( size ); +#endif +} + +static inline void* tracy_malloc_fast( size_t size ) +{ +#ifdef TRACY_ENABLE + return rpmalloc( size ); +#else + return malloc( size ); +#endif +} + +static inline void tracy_free( void* ptr ) +{ +#ifdef TRACY_ENABLE + InitRpmalloc(); + rpfree( ptr ); +#else + free( ptr ); +#endif +} + +static inline void tracy_free_fast( void* ptr ) +{ +#ifdef TRACY_ENABLE + rpfree( ptr ); +#else + free( ptr ); +#endif +} + +static inline void* tracy_realloc( void* ptr, size_t size ) +{ +#ifdef TRACY_ENABLE + InitRpmalloc(); + return rprealloc( ptr, size ); +#else + return realloc( ptr, size ); +#endif +} + +} + +#endif diff --git a/3rdparty/tracy/tracy/common/TracyApi.h b/3rdparty/tracy/tracy/common/TracyApi.h new file mode 100644 index 0000000..f396ce0 --- /dev/null +++ b/3rdparty/tracy/tracy/common/TracyApi.h @@ -0,0 +1,16 @@ +#ifndef __TRACYAPI_H__ +#define __TRACYAPI_H__ + +#if defined _WIN32 +# if defined TRACY_EXPORTS +# define TRACY_API __declspec(dllexport) +# elif defined TRACY_IMPORTS +# define TRACY_API __declspec(dllimport) +# else +# define TRACY_API +# endif +#else +# define TRACY_API __attribute__((visibility("default"))) +#endif + +#endif // __TRACYAPI_H__ diff --git a/3rdparty/tracy/tracy/common/TracyColor.hpp b/3rdparty/tracy/tracy/common/TracyColor.hpp new file mode 100644 index 0000000..4825c0f --- /dev/null +++ b/3rdparty/tracy/tracy/common/TracyColor.hpp @@ -0,0 +1,690 @@ +#ifndef __TRACYCOLOR_HPP__ +#define __TRACYCOLOR_HPP__ + +namespace tracy +{ +struct Color +{ +enum ColorType +{ + Snow = 0xfffafa, + GhostWhite = 0xf8f8ff, + WhiteSmoke = 0xf5f5f5, + Gainsboro = 0xdcdcdc, + FloralWhite = 0xfffaf0, + OldLace = 0xfdf5e6, + Linen = 0xfaf0e6, + AntiqueWhite = 0xfaebd7, + PapayaWhip = 0xffefd5, + BlanchedAlmond = 0xffebcd, + Bisque = 0xffe4c4, + PeachPuff = 0xffdab9, + NavajoWhite = 0xffdead, + Moccasin = 0xffe4b5, + Cornsilk = 0xfff8dc, + Ivory = 0xfffff0, + LemonChiffon = 0xfffacd, + Seashell = 0xfff5ee, + Honeydew = 0xf0fff0, + MintCream = 0xf5fffa, + Azure = 0xf0ffff, + AliceBlue = 0xf0f8ff, + Lavender = 0xe6e6fa, + LavenderBlush = 0xfff0f5, + MistyRose = 0xffe4e1, + White = 0xffffff, + Black = 0x000000, + DarkSlateGray = 0x2f4f4f, + DarkSlateGrey = 0x2f4f4f, + DimGray = 0x696969, + DimGrey = 0x696969, + SlateGray = 0x708090, + SlateGrey = 0x708090, + LightSlateGray = 0x778899, + LightSlateGrey = 0x778899, + Gray = 0xbebebe, + Grey = 0xbebebe, + X11Gray = 0xbebebe, + X11Grey = 0xbebebe, + WebGray = 0x808080, + WebGrey = 0x808080, + LightGrey = 0xd3d3d3, + LightGray = 0xd3d3d3, + MidnightBlue = 0x191970, + Navy = 0x000080, + NavyBlue = 0x000080, + CornflowerBlue = 0x6495ed, + DarkSlateBlue = 0x483d8b, + SlateBlue = 0x6a5acd, + MediumSlateBlue = 0x7b68ee, + LightSlateBlue = 0x8470ff, + MediumBlue = 0x0000cd, + RoyalBlue = 0x4169e1, + Blue = 0x0000ff, + DodgerBlue = 0x1e90ff, + DeepSkyBlue = 0x00bfff, + SkyBlue = 0x87ceeb, + LightSkyBlue = 0x87cefa, + SteelBlue = 0x4682b4, + LightSteelBlue = 0xb0c4de, + LightBlue = 0xadd8e6, + PowderBlue = 0xb0e0e6, + PaleTurquoise = 0xafeeee, + DarkTurquoise = 0x00ced1, + MediumTurquoise = 0x48d1cc, + Turquoise = 0x40e0d0, + Cyan = 0x00ffff, + Aqua = 0x00ffff, + LightCyan = 0xe0ffff, + CadetBlue = 0x5f9ea0, + MediumAquamarine = 0x66cdaa, + Aquamarine = 0x7fffd4, + DarkGreen = 0x006400, + DarkOliveGreen = 0x556b2f, + DarkSeaGreen = 0x8fbc8f, + SeaGreen = 0x2e8b57, + MediumSeaGreen = 0x3cb371, + LightSeaGreen = 0x20b2aa, + PaleGreen = 0x98fb98, + SpringGreen = 0x00ff7f, + LawnGreen = 0x7cfc00, + Green = 0x00ff00, + Lime = 0x00ff00, + X11Green = 0x00ff00, + WebGreen = 0x008000, + Chartreuse = 0x7fff00, + MediumSpringGreen = 0x00fa9a, + GreenYellow = 0xadff2f, + LimeGreen = 0x32cd32, + YellowGreen = 0x9acd32, + ForestGreen = 0x228b22, + OliveDrab = 0x6b8e23, + DarkKhaki = 0xbdb76b, + Khaki = 0xf0e68c, + PaleGoldenrod = 0xeee8aa, + LightGoldenrodYellow = 0xfafad2, + LightYellow = 0xffffe0, + Yellow = 0xffff00, + Gold = 0xffd700, + LightGoldenrod = 0xeedd82, + Goldenrod = 0xdaa520, + DarkGoldenrod = 0xb8860b, + RosyBrown = 0xbc8f8f, + IndianRed = 0xcd5c5c, + SaddleBrown = 0x8b4513, + Sienna = 0xa0522d, + Peru = 0xcd853f, + Burlywood = 0xdeb887, + Beige = 0xf5f5dc, + Wheat = 0xf5deb3, + SandyBrown = 0xf4a460, + Tan = 0xd2b48c, + Chocolate = 0xd2691e, + Firebrick = 0xb22222, + Brown = 0xa52a2a, + DarkSalmon = 0xe9967a, + Salmon = 0xfa8072, + LightSalmon = 0xffa07a, + Orange = 0xffa500, + DarkOrange = 0xff8c00, + Coral = 0xff7f50, + LightCoral = 0xf08080, + Tomato = 0xff6347, + OrangeRed = 0xff4500, + Red = 0xff0000, + HotPink = 0xff69b4, + DeepPink = 0xff1493, + Pink = 0xffc0cb, + LightPink = 0xffb6c1, + PaleVioletRed = 0xdb7093, + Maroon = 0xb03060, + X11Maroon = 0xb03060, + WebMaroon = 0x800000, + MediumVioletRed = 0xc71585, + VioletRed = 0xd02090, + Magenta = 0xff00ff, + Fuchsia = 0xff00ff, + Violet = 0xee82ee, + Plum = 0xdda0dd, + Orchid = 0xda70d6, + MediumOrchid = 0xba55d3, + DarkOrchid = 0x9932cc, + DarkViolet = 0x9400d3, + BlueViolet = 0x8a2be2, + Purple = 0xa020f0, + X11Purple = 0xa020f0, + WebPurple = 0x800080, + MediumPurple = 0x9370db, + Thistle = 0xd8bfd8, + Snow1 = 0xfffafa, + Snow2 = 0xeee9e9, + Snow3 = 0xcdc9c9, + Snow4 = 0x8b8989, + Seashell1 = 0xfff5ee, + Seashell2 = 0xeee5de, + Seashell3 = 0xcdc5bf, + Seashell4 = 0x8b8682, + AntiqueWhite1 = 0xffefdb, + AntiqueWhite2 = 0xeedfcc, + AntiqueWhite3 = 0xcdc0b0, + AntiqueWhite4 = 0x8b8378, + Bisque1 = 0xffe4c4, + Bisque2 = 0xeed5b7, + Bisque3 = 0xcdb79e, + Bisque4 = 0x8b7d6b, + PeachPuff1 = 0xffdab9, + PeachPuff2 = 0xeecbad, + PeachPuff3 = 0xcdaf95, + PeachPuff4 = 0x8b7765, + NavajoWhite1 = 0xffdead, + NavajoWhite2 = 0xeecfa1, + NavajoWhite3 = 0xcdb38b, + NavajoWhite4 = 0x8b795e, + LemonChiffon1 = 0xfffacd, + LemonChiffon2 = 0xeee9bf, + LemonChiffon3 = 0xcdc9a5, + LemonChiffon4 = 0x8b8970, + Cornsilk1 = 0xfff8dc, + Cornsilk2 = 0xeee8cd, + Cornsilk3 = 0xcdc8b1, + Cornsilk4 = 0x8b8878, + Ivory1 = 0xfffff0, + Ivory2 = 0xeeeee0, + Ivory3 = 0xcdcdc1, + Ivory4 = 0x8b8b83, + Honeydew1 = 0xf0fff0, + Honeydew2 = 0xe0eee0, + Honeydew3 = 0xc1cdc1, + Honeydew4 = 0x838b83, + LavenderBlush1 = 0xfff0f5, + LavenderBlush2 = 0xeee0e5, + LavenderBlush3 = 0xcdc1c5, + LavenderBlush4 = 0x8b8386, + MistyRose1 = 0xffe4e1, + MistyRose2 = 0xeed5d2, + MistyRose3 = 0xcdb7b5, + MistyRose4 = 0x8b7d7b, + Azure1 = 0xf0ffff, + Azure2 = 0xe0eeee, + Azure3 = 0xc1cdcd, + Azure4 = 0x838b8b, + SlateBlue1 = 0x836fff, + SlateBlue2 = 0x7a67ee, + SlateBlue3 = 0x6959cd, + SlateBlue4 = 0x473c8b, + RoyalBlue1 = 0x4876ff, + RoyalBlue2 = 0x436eee, + RoyalBlue3 = 0x3a5fcd, + RoyalBlue4 = 0x27408b, + Blue1 = 0x0000ff, + Blue2 = 0x0000ee, + Blue3 = 0x0000cd, + Blue4 = 0x00008b, + DodgerBlue1 = 0x1e90ff, + DodgerBlue2 = 0x1c86ee, + DodgerBlue3 = 0x1874cd, + DodgerBlue4 = 0x104e8b, + SteelBlue1 = 0x63b8ff, + SteelBlue2 = 0x5cacee, + SteelBlue3 = 0x4f94cd, + SteelBlue4 = 0x36648b, + DeepSkyBlue1 = 0x00bfff, + DeepSkyBlue2 = 0x00b2ee, + DeepSkyBlue3 = 0x009acd, + DeepSkyBlue4 = 0x00688b, + SkyBlue1 = 0x87ceff, + SkyBlue2 = 0x7ec0ee, + SkyBlue3 = 0x6ca6cd, + SkyBlue4 = 0x4a708b, + LightSkyBlue1 = 0xb0e2ff, + LightSkyBlue2 = 0xa4d3ee, + LightSkyBlue3 = 0x8db6cd, + LightSkyBlue4 = 0x607b8b, + SlateGray1 = 0xc6e2ff, + SlateGray2 = 0xb9d3ee, + SlateGray3 = 0x9fb6cd, + SlateGray4 = 0x6c7b8b, + LightSteelBlue1 = 0xcae1ff, + LightSteelBlue2 = 0xbcd2ee, + LightSteelBlue3 = 0xa2b5cd, + LightSteelBlue4 = 0x6e7b8b, + LightBlue1 = 0xbfefff, + LightBlue2 = 0xb2dfee, + LightBlue3 = 0x9ac0cd, + LightBlue4 = 0x68838b, + LightCyan1 = 0xe0ffff, + LightCyan2 = 0xd1eeee, + LightCyan3 = 0xb4cdcd, + LightCyan4 = 0x7a8b8b, + PaleTurquoise1 = 0xbbffff, + PaleTurquoise2 = 0xaeeeee, + PaleTurquoise3 = 0x96cdcd, + PaleTurquoise4 = 0x668b8b, + CadetBlue1 = 0x98f5ff, + CadetBlue2 = 0x8ee5ee, + CadetBlue3 = 0x7ac5cd, + CadetBlue4 = 0x53868b, + Turquoise1 = 0x00f5ff, + Turquoise2 = 0x00e5ee, + Turquoise3 = 0x00c5cd, + Turquoise4 = 0x00868b, + Cyan1 = 0x00ffff, + Cyan2 = 0x00eeee, + Cyan3 = 0x00cdcd, + Cyan4 = 0x008b8b, + DarkSlateGray1 = 0x97ffff, + DarkSlateGray2 = 0x8deeee, + DarkSlateGray3 = 0x79cdcd, + DarkSlateGray4 = 0x528b8b, + Aquamarine1 = 0x7fffd4, + Aquamarine2 = 0x76eec6, + Aquamarine3 = 0x66cdaa, + Aquamarine4 = 0x458b74, + DarkSeaGreen1 = 0xc1ffc1, + DarkSeaGreen2 = 0xb4eeb4, + DarkSeaGreen3 = 0x9bcd9b, + DarkSeaGreen4 = 0x698b69, + SeaGreen1 = 0x54ff9f, + SeaGreen2 = 0x4eee94, + SeaGreen3 = 0x43cd80, + SeaGreen4 = 0x2e8b57, + PaleGreen1 = 0x9aff9a, + PaleGreen2 = 0x90ee90, + PaleGreen3 = 0x7ccd7c, + PaleGreen4 = 0x548b54, + SpringGreen1 = 0x00ff7f, + SpringGreen2 = 0x00ee76, + SpringGreen3 = 0x00cd66, + SpringGreen4 = 0x008b45, + Green1 = 0x00ff00, + Green2 = 0x00ee00, + Green3 = 0x00cd00, + Green4 = 0x008b00, + Chartreuse1 = 0x7fff00, + Chartreuse2 = 0x76ee00, + Chartreuse3 = 0x66cd00, + Chartreuse4 = 0x458b00, + OliveDrab1 = 0xc0ff3e, + OliveDrab2 = 0xb3ee3a, + OliveDrab3 = 0x9acd32, + OliveDrab4 = 0x698b22, + DarkOliveGreen1 = 0xcaff70, + DarkOliveGreen2 = 0xbcee68, + DarkOliveGreen3 = 0xa2cd5a, + DarkOliveGreen4 = 0x6e8b3d, + Khaki1 = 0xfff68f, + Khaki2 = 0xeee685, + Khaki3 = 0xcdc673, + Khaki4 = 0x8b864e, + LightGoldenrod1 = 0xffec8b, + LightGoldenrod2 = 0xeedc82, + LightGoldenrod3 = 0xcdbe70, + LightGoldenrod4 = 0x8b814c, + LightYellow1 = 0xffffe0, + LightYellow2 = 0xeeeed1, + LightYellow3 = 0xcdcdb4, + LightYellow4 = 0x8b8b7a, + Yellow1 = 0xffff00, + Yellow2 = 0xeeee00, + Yellow3 = 0xcdcd00, + Yellow4 = 0x8b8b00, + Gold1 = 0xffd700, + Gold2 = 0xeec900, + Gold3 = 0xcdad00, + Gold4 = 0x8b7500, + Goldenrod1 = 0xffc125, + Goldenrod2 = 0xeeb422, + Goldenrod3 = 0xcd9b1d, + Goldenrod4 = 0x8b6914, + DarkGoldenrod1 = 0xffb90f, + DarkGoldenrod2 = 0xeead0e, + DarkGoldenrod3 = 0xcd950c, + DarkGoldenrod4 = 0x8b6508, + RosyBrown1 = 0xffc1c1, + RosyBrown2 = 0xeeb4b4, + RosyBrown3 = 0xcd9b9b, + RosyBrown4 = 0x8b6969, + IndianRed1 = 0xff6a6a, + IndianRed2 = 0xee6363, + IndianRed3 = 0xcd5555, + IndianRed4 = 0x8b3a3a, + Sienna1 = 0xff8247, + Sienna2 = 0xee7942, + Sienna3 = 0xcd6839, + Sienna4 = 0x8b4726, + Burlywood1 = 0xffd39b, + Burlywood2 = 0xeec591, + Burlywood3 = 0xcdaa7d, + Burlywood4 = 0x8b7355, + Wheat1 = 0xffe7ba, + Wheat2 = 0xeed8ae, + Wheat3 = 0xcdba96, + Wheat4 = 0x8b7e66, + Tan1 = 0xffa54f, + Tan2 = 0xee9a49, + Tan3 = 0xcd853f, + Tan4 = 0x8b5a2b, + Chocolate1 = 0xff7f24, + Chocolate2 = 0xee7621, + Chocolate3 = 0xcd661d, + Chocolate4 = 0x8b4513, + Firebrick1 = 0xff3030, + Firebrick2 = 0xee2c2c, + Firebrick3 = 0xcd2626, + Firebrick4 = 0x8b1a1a, + Brown1 = 0xff4040, + Brown2 = 0xee3b3b, + Brown3 = 0xcd3333, + Brown4 = 0x8b2323, + Salmon1 = 0xff8c69, + Salmon2 = 0xee8262, + Salmon3 = 0xcd7054, + Salmon4 = 0x8b4c39, + LightSalmon1 = 0xffa07a, + LightSalmon2 = 0xee9572, + LightSalmon3 = 0xcd8162, + LightSalmon4 = 0x8b5742, + Orange1 = 0xffa500, + Orange2 = 0xee9a00, + Orange3 = 0xcd8500, + Orange4 = 0x8b5a00, + DarkOrange1 = 0xff7f00, + DarkOrange2 = 0xee7600, + DarkOrange3 = 0xcd6600, + DarkOrange4 = 0x8b4500, + Coral1 = 0xff7256, + Coral2 = 0xee6a50, + Coral3 = 0xcd5b45, + Coral4 = 0x8b3e2f, + Tomato1 = 0xff6347, + Tomato2 = 0xee5c42, + Tomato3 = 0xcd4f39, + Tomato4 = 0x8b3626, + OrangeRed1 = 0xff4500, + OrangeRed2 = 0xee4000, + OrangeRed3 = 0xcd3700, + OrangeRed4 = 0x8b2500, + Red1 = 0xff0000, + Red2 = 0xee0000, + Red3 = 0xcd0000, + Red4 = 0x8b0000, + DeepPink1 = 0xff1493, + DeepPink2 = 0xee1289, + DeepPink3 = 0xcd1076, + DeepPink4 = 0x8b0a50, + HotPink1 = 0xff6eb4, + HotPink2 = 0xee6aa7, + HotPink3 = 0xcd6090, + HotPink4 = 0x8b3a62, + Pink1 = 0xffb5c5, + Pink2 = 0xeea9b8, + Pink3 = 0xcd919e, + Pink4 = 0x8b636c, + LightPink1 = 0xffaeb9, + LightPink2 = 0xeea2ad, + LightPink3 = 0xcd8c95, + LightPink4 = 0x8b5f65, + PaleVioletRed1 = 0xff82ab, + PaleVioletRed2 = 0xee799f, + PaleVioletRed3 = 0xcd6889, + PaleVioletRed4 = 0x8b475d, + Maroon1 = 0xff34b3, + Maroon2 = 0xee30a7, + Maroon3 = 0xcd2990, + Maroon4 = 0x8b1c62, + VioletRed1 = 0xff3e96, + VioletRed2 = 0xee3a8c, + VioletRed3 = 0xcd3278, + VioletRed4 = 0x8b2252, + Magenta1 = 0xff00ff, + Magenta2 = 0xee00ee, + Magenta3 = 0xcd00cd, + Magenta4 = 0x8b008b, + Orchid1 = 0xff83fa, + Orchid2 = 0xee7ae9, + Orchid3 = 0xcd69c9, + Orchid4 = 0x8b4789, + Plum1 = 0xffbbff, + Plum2 = 0xeeaeee, + Plum3 = 0xcd96cd, + Plum4 = 0x8b668b, + MediumOrchid1 = 0xe066ff, + MediumOrchid2 = 0xd15fee, + MediumOrchid3 = 0xb452cd, + MediumOrchid4 = 0x7a378b, + DarkOrchid1 = 0xbf3eff, + DarkOrchid2 = 0xb23aee, + DarkOrchid3 = 0x9a32cd, + DarkOrchid4 = 0x68228b, + Purple1 = 0x9b30ff, + Purple2 = 0x912cee, + Purple3 = 0x7d26cd, + Purple4 = 0x551a8b, + MediumPurple1 = 0xab82ff, + MediumPurple2 = 0x9f79ee, + MediumPurple3 = 0x8968cd, + MediumPurple4 = 0x5d478b, + Thistle1 = 0xffe1ff, + Thistle2 = 0xeed2ee, + Thistle3 = 0xcdb5cd, + Thistle4 = 0x8b7b8b, + Gray0 = 0x000000, + Grey0 = 0x000000, + Gray1 = 0x030303, + Grey1 = 0x030303, + Gray2 = 0x050505, + Grey2 = 0x050505, + Gray3 = 0x080808, + Grey3 = 0x080808, + Gray4 = 0x0a0a0a, + Grey4 = 0x0a0a0a, + Gray5 = 0x0d0d0d, + Grey5 = 0x0d0d0d, + Gray6 = 0x0f0f0f, + Grey6 = 0x0f0f0f, + Gray7 = 0x121212, + Grey7 = 0x121212, + Gray8 = 0x141414, + Grey8 = 0x141414, + Gray9 = 0x171717, + Grey9 = 0x171717, + Gray10 = 0x1a1a1a, + Grey10 = 0x1a1a1a, + Gray11 = 0x1c1c1c, + Grey11 = 0x1c1c1c, + Gray12 = 0x1f1f1f, + Grey12 = 0x1f1f1f, + Gray13 = 0x212121, + Grey13 = 0x212121, + Gray14 = 0x242424, + Grey14 = 0x242424, + Gray15 = 0x262626, + Grey15 = 0x262626, + Gray16 = 0x292929, + Grey16 = 0x292929, + Gray17 = 0x2b2b2b, + Grey17 = 0x2b2b2b, + Gray18 = 0x2e2e2e, + Grey18 = 0x2e2e2e, + Gray19 = 0x303030, + Grey19 = 0x303030, + Gray20 = 0x333333, + Grey20 = 0x333333, + Gray21 = 0x363636, + Grey21 = 0x363636, + Gray22 = 0x383838, + Grey22 = 0x383838, + Gray23 = 0x3b3b3b, + Grey23 = 0x3b3b3b, + Gray24 = 0x3d3d3d, + Grey24 = 0x3d3d3d, + Gray25 = 0x404040, + Grey25 = 0x404040, + Gray26 = 0x424242, + Grey26 = 0x424242, + Gray27 = 0x454545, + Grey27 = 0x454545, + Gray28 = 0x474747, + Grey28 = 0x474747, + Gray29 = 0x4a4a4a, + Grey29 = 0x4a4a4a, + Gray30 = 0x4d4d4d, + Grey30 = 0x4d4d4d, + Gray31 = 0x4f4f4f, + Grey31 = 0x4f4f4f, + Gray32 = 0x525252, + Grey32 = 0x525252, + Gray33 = 0x545454, + Grey33 = 0x545454, + Gray34 = 0x575757, + Grey34 = 0x575757, + Gray35 = 0x595959, + Grey35 = 0x595959, + Gray36 = 0x5c5c5c, + Grey36 = 0x5c5c5c, + Gray37 = 0x5e5e5e, + Grey37 = 0x5e5e5e, + Gray38 = 0x616161, + Grey38 = 0x616161, + Gray39 = 0x636363, + Grey39 = 0x636363, + Gray40 = 0x666666, + Grey40 = 0x666666, + Gray41 = 0x696969, + Grey41 = 0x696969, + Gray42 = 0x6b6b6b, + Grey42 = 0x6b6b6b, + Gray43 = 0x6e6e6e, + Grey43 = 0x6e6e6e, + Gray44 = 0x707070, + Grey44 = 0x707070, + Gray45 = 0x737373, + Grey45 = 0x737373, + Gray46 = 0x757575, + Grey46 = 0x757575, + Gray47 = 0x787878, + Grey47 = 0x787878, + Gray48 = 0x7a7a7a, + Grey48 = 0x7a7a7a, + Gray49 = 0x7d7d7d, + Grey49 = 0x7d7d7d, + Gray50 = 0x7f7f7f, + Grey50 = 0x7f7f7f, + Gray51 = 0x828282, + Grey51 = 0x828282, + Gray52 = 0x858585, + Grey52 = 0x858585, + Gray53 = 0x878787, + Grey53 = 0x878787, + Gray54 = 0x8a8a8a, + Grey54 = 0x8a8a8a, + Gray55 = 0x8c8c8c, + Grey55 = 0x8c8c8c, + Gray56 = 0x8f8f8f, + Grey56 = 0x8f8f8f, + Gray57 = 0x919191, + Grey57 = 0x919191, + Gray58 = 0x949494, + Grey58 = 0x949494, + Gray59 = 0x969696, + Grey59 = 0x969696, + Gray60 = 0x999999, + Grey60 = 0x999999, + Gray61 = 0x9c9c9c, + Grey61 = 0x9c9c9c, + Gray62 = 0x9e9e9e, + Grey62 = 0x9e9e9e, + Gray63 = 0xa1a1a1, + Grey63 = 0xa1a1a1, + Gray64 = 0xa3a3a3, + Grey64 = 0xa3a3a3, + Gray65 = 0xa6a6a6, + Grey65 = 0xa6a6a6, + Gray66 = 0xa8a8a8, + Grey66 = 0xa8a8a8, + Gray67 = 0xababab, + Grey67 = 0xababab, + Gray68 = 0xadadad, + Grey68 = 0xadadad, + Gray69 = 0xb0b0b0, + Grey69 = 0xb0b0b0, + Gray70 = 0xb3b3b3, + Grey70 = 0xb3b3b3, + Gray71 = 0xb5b5b5, + Grey71 = 0xb5b5b5, + Gray72 = 0xb8b8b8, + Grey72 = 0xb8b8b8, + Gray73 = 0xbababa, + Grey73 = 0xbababa, + Gray74 = 0xbdbdbd, + Grey74 = 0xbdbdbd, + Gray75 = 0xbfbfbf, + Grey75 = 0xbfbfbf, + Gray76 = 0xc2c2c2, + Grey76 = 0xc2c2c2, + Gray77 = 0xc4c4c4, + Grey77 = 0xc4c4c4, + Gray78 = 0xc7c7c7, + Grey78 = 0xc7c7c7, + Gray79 = 0xc9c9c9, + Grey79 = 0xc9c9c9, + Gray80 = 0xcccccc, + Grey80 = 0xcccccc, + Gray81 = 0xcfcfcf, + Grey81 = 0xcfcfcf, + Gray82 = 0xd1d1d1, + Grey82 = 0xd1d1d1, + Gray83 = 0xd4d4d4, + Grey83 = 0xd4d4d4, + Gray84 = 0xd6d6d6, + Grey84 = 0xd6d6d6, + Gray85 = 0xd9d9d9, + Grey85 = 0xd9d9d9, + Gray86 = 0xdbdbdb, + Grey86 = 0xdbdbdb, + Gray87 = 0xdedede, + Grey87 = 0xdedede, + Gray88 = 0xe0e0e0, + Grey88 = 0xe0e0e0, + Gray89 = 0xe3e3e3, + Grey89 = 0xe3e3e3, + Gray90 = 0xe5e5e5, + Grey90 = 0xe5e5e5, + Gray91 = 0xe8e8e8, + Grey91 = 0xe8e8e8, + Gray92 = 0xebebeb, + Grey92 = 0xebebeb, + Gray93 = 0xededed, + Grey93 = 0xededed, + Gray94 = 0xf0f0f0, + Grey94 = 0xf0f0f0, + Gray95 = 0xf2f2f2, + Grey95 = 0xf2f2f2, + Gray96 = 0xf5f5f5, + Grey96 = 0xf5f5f5, + Gray97 = 0xf7f7f7, + Grey97 = 0xf7f7f7, + Gray98 = 0xfafafa, + Grey98 = 0xfafafa, + Gray99 = 0xfcfcfc, + Grey99 = 0xfcfcfc, + Gray100 = 0xffffff, + Grey100 = 0xffffff, + DarkGrey = 0xa9a9a9, + DarkGray = 0xa9a9a9, + DarkBlue = 0x00008b, + DarkCyan = 0x008b8b, + DarkMagenta = 0x8b008b, + DarkRed = 0x8b0000, + LightGreen = 0x90ee90, + Crimson = 0xdc143c, + Indigo = 0x4b0082, + Olive = 0x808000, + RebeccaPurple = 0x663399, + Silver = 0xc0c0c0, + Teal = 0x008080, +}; +}; +} + +#endif diff --git a/3rdparty/tracy/tracy/common/TracyForceInline.hpp b/3rdparty/tracy/tracy/common/TracyForceInline.hpp new file mode 100644 index 0000000..b6a5833 --- /dev/null +++ b/3rdparty/tracy/tracy/common/TracyForceInline.hpp @@ -0,0 +1,20 @@ +#ifndef __TRACYFORCEINLINE_HPP__ +#define __TRACYFORCEINLINE_HPP__ + +#if defined(__GNUC__) +# define tracy_force_inline __attribute__((always_inline)) inline +#elif defined(_MSC_VER) +# define tracy_force_inline __forceinline +#else +# define tracy_force_inline inline +#endif + +#if defined(__GNUC__) +# define tracy_no_inline __attribute__((noinline)) +#elif defined(_MSC_VER) +# define tracy_no_inline __declspec(noinline) +#else +# define tracy_no_inline +#endif + +#endif diff --git a/3rdparty/tracy/tracy/common/TracyMutex.hpp b/3rdparty/tracy/tracy/common/TracyMutex.hpp new file mode 100644 index 0000000..57fb01a --- /dev/null +++ b/3rdparty/tracy/tracy/common/TracyMutex.hpp @@ -0,0 +1,24 @@ +#ifndef __TRACYMUTEX_HPP__ +#define __TRACYMUTEX_HPP__ + +#if defined _MSC_VER + +# include <shared_mutex> + +namespace tracy +{ +using TracyMutex = std::shared_mutex; +} + +#else + +#include <mutex> + +namespace tracy +{ +using TracyMutex = std::mutex; +} + +#endif + +#endif diff --git a/3rdparty/tracy/tracy/common/TracyProtocol.hpp b/3rdparty/tracy/tracy/common/TracyProtocol.hpp new file mode 100644 index 0000000..a647b03 --- /dev/null +++ b/3rdparty/tracy/tracy/common/TracyProtocol.hpp @@ -0,0 +1,139 @@ +#ifndef __TRACYPROTOCOL_HPP__ +#define __TRACYPROTOCOL_HPP__ + +#include <limits> +#include <stdint.h> + +namespace tracy +{ + +constexpr unsigned Lz4CompressBound( unsigned isize ) { return isize + ( isize / 255 ) + 16; } + +enum : uint32_t { ProtocolVersion = 55 }; +enum : uint16_t { BroadcastVersion = 2 }; + +using lz4sz_t = uint32_t; + +enum { TargetFrameSize = 256 * 1024 }; +enum { LZ4Size = Lz4CompressBound( TargetFrameSize ) }; +static_assert( LZ4Size <= std::numeric_limits<lz4sz_t>::max(), "LZ4Size greater than lz4sz_t" ); +static_assert( TargetFrameSize * 2 >= 64 * 1024, "Not enough space for LZ4 stream buffer" ); + +enum { HandshakeShibbolethSize = 8 }; +static const char HandshakeShibboleth[HandshakeShibbolethSize] = { 'T', 'r', 'a', 'c', 'y', 'P', 'r', 'f' }; + +enum HandshakeStatus : uint8_t +{ + HandshakePending, + HandshakeWelcome, + HandshakeProtocolMismatch, + HandshakeNotAvailable, + HandshakeDropped +}; + +enum { WelcomeMessageProgramNameSize = 64 }; +enum { WelcomeMessageHostInfoSize = 1024 }; + +#pragma pack( 1 ) + +// Must increase left query space after handling! +enum ServerQuery : uint8_t +{ + ServerQueryTerminate, + ServerQueryString, + ServerQueryThreadString, + ServerQuerySourceLocation, + ServerQueryPlotName, + ServerQueryCallstackFrame, + ServerQueryFrameName, + ServerQueryDisconnect, + ServerQueryExternalName, + ServerQueryParameter, + ServerQuerySymbol, + ServerQuerySymbolCode, + ServerQueryCodeLocation, + ServerQuerySourceCode, + ServerQueryDataTransfer, + ServerQueryDataTransferPart, + ServerQueryFiberName +}; + +struct ServerQueryPacket +{ + ServerQuery type; + uint64_t ptr; + uint32_t extra; +}; + +enum { ServerQueryPacketSize = sizeof( ServerQueryPacket ) }; + + +enum CpuArchitecture : uint8_t +{ + CpuArchUnknown, + CpuArchX86, + CpuArchX64, + CpuArchArm32, + CpuArchArm64 +}; + + +struct WelcomeFlag +{ + enum _t : uint8_t + { + OnDemand = 1 << 0, + IsApple = 1 << 1, + CodeTransfer = 1 << 2, + CombineSamples = 1 << 3, + IdentifySamples = 1 << 4, + }; +}; + +struct WelcomeMessage +{ + double timerMul; + int64_t initBegin; + int64_t initEnd; + uint64_t delay; + uint64_t resolution; + uint64_t epoch; + uint64_t exectime; + uint64_t pid; + int64_t samplingPeriod; + uint8_t flags; + uint8_t cpuArch; + char cpuManufacturer[12]; + uint32_t cpuId; + char programName[WelcomeMessageProgramNameSize]; + char hostInfo[WelcomeMessageHostInfoSize]; +}; + +enum { WelcomeMessageSize = sizeof( WelcomeMessage ) }; + + +struct OnDemandPayloadMessage +{ + uint64_t frames; + uint64_t currentTime; +}; + +enum { OnDemandPayloadMessageSize = sizeof( OnDemandPayloadMessage ) }; + + +struct BroadcastMessage +{ + uint16_t broadcastVersion; + uint16_t listenPort; + uint32_t protocolVersion; + int32_t activeTime; // in seconds + char programName[WelcomeMessageProgramNameSize]; +}; + +enum { BroadcastMessageSize = sizeof( BroadcastMessage ) }; + +#pragma pack() + +} + +#endif diff --git a/3rdparty/tracy/tracy/common/TracyQueue.hpp b/3rdparty/tracy/tracy/common/TracyQueue.hpp new file mode 100644 index 0000000..4deb191 --- /dev/null +++ b/3rdparty/tracy/tracy/common/TracyQueue.hpp @@ -0,0 +1,850 @@ +#ifndef __TRACYQUEUE_HPP__ +#define __TRACYQUEUE_HPP__ + +#include <stdint.h> + +namespace tracy +{ + +enum class QueueType : uint8_t +{ + ZoneText, + ZoneName, + Message, + MessageColor, + MessageCallstack, + MessageColorCallstack, + MessageAppInfo, + ZoneBeginAllocSrcLoc, + ZoneBeginAllocSrcLocCallstack, + CallstackSerial, + Callstack, + CallstackAlloc, + CallstackSample, + CallstackSampleContextSwitch, + FrameImage, + ZoneBegin, + ZoneBeginCallstack, + ZoneEnd, + LockWait, + LockObtain, + LockRelease, + LockSharedWait, + LockSharedObtain, + LockSharedRelease, + LockName, + MemAlloc, + MemAllocNamed, + MemFree, + MemFreeNamed, + MemAllocCallstack, + MemAllocCallstackNamed, + MemFreeCallstack, + MemFreeCallstackNamed, + GpuZoneBegin, + GpuZoneBeginCallstack, + GpuZoneBeginAllocSrcLoc, + GpuZoneBeginAllocSrcLocCallstack, + GpuZoneEnd, + GpuZoneBeginSerial, + GpuZoneBeginCallstackSerial, + GpuZoneBeginAllocSrcLocSerial, + GpuZoneBeginAllocSrcLocCallstackSerial, + GpuZoneEndSerial, + PlotData, + ContextSwitch, + ThreadWakeup, + GpuTime, + GpuContextName, + CallstackFrameSize, + SymbolInformation, + CodeInformation, + ExternalNameMetadata, + SymbolCodeMetadata, + FiberEnter, + FiberLeave, + Terminate, + KeepAlive, + ThreadContext, + GpuCalibration, + Crash, + CrashReport, + ZoneValidation, + ZoneColor, + ZoneValue, + FrameMarkMsg, + FrameMarkMsgStart, + FrameMarkMsgEnd, + SourceLocation, + LockAnnounce, + LockTerminate, + LockMark, + MessageLiteral, + MessageLiteralColor, + MessageLiteralCallstack, + MessageLiteralColorCallstack, + GpuNewContext, + CallstackFrame, + SysTimeReport, + TidToPid, + HwSampleCpuCycle, + HwSampleInstructionRetired, + HwSampleCacheReference, + HwSampleCacheMiss, + HwSampleBranchRetired, + HwSampleBranchMiss, + PlotConfig, + ParamSetup, + AckServerQueryNoop, + AckSourceCodeNotAvailable, + AckSymbolCodeNotAvailable, + CpuTopology, + SingleStringData, + SecondStringData, + MemNamePayload, + StringData, + ThreadName, + PlotName, + SourceLocationPayload, + CallstackPayload, + CallstackAllocPayload, + FrameName, + FrameImageData, + ExternalName, + ExternalThreadName, + SymbolCode, + SourceCode, + FiberName, + NUM_TYPES +}; + +#pragma pack( 1 ) + +struct QueueThreadContext +{ + uint32_t thread; +}; + +struct QueueZoneBeginLean +{ + int64_t time; +}; + +struct QueueZoneBegin : public QueueZoneBeginLean +{ + uint64_t srcloc; // ptr +}; + +struct QueueZoneBeginThread : public QueueZoneBegin +{ + uint32_t thread; +}; + +struct QueueZoneEnd +{ + int64_t time; +}; + +struct QueueZoneEndThread : public QueueZoneEnd +{ + uint32_t thread; +}; + +struct QueueZoneValidation +{ + uint32_t id; +}; + +struct QueueZoneValidationThread : public QueueZoneValidation +{ + uint32_t thread; +}; + +struct QueueZoneColor +{ + uint8_t r; + uint8_t g; + uint8_t b; +}; + +struct QueueZoneColorThread : public QueueZoneColor +{ + uint32_t thread; +}; + +struct QueueZoneValue +{ + uint64_t value; +}; + +struct QueueZoneValueThread : public QueueZoneValue +{ + uint32_t thread; +}; + +struct QueueStringTransfer +{ + uint64_t ptr; +}; + +struct QueueFrameMark +{ + int64_t time; + uint64_t name; // ptr +}; + +struct QueueFrameImage +{ + uint32_t frame; + uint16_t w; + uint16_t h; + uint8_t flip; +}; + +struct QueueFrameImageFat : public QueueFrameImage +{ + uint64_t image; // ptr +}; + +struct QueueSourceLocation +{ + uint64_t name; + uint64_t function; // ptr + uint64_t file; // ptr + uint32_t line; + uint8_t r; + uint8_t g; + uint8_t b; +}; + +struct QueueZoneTextFat +{ + uint64_t text; // ptr + uint16_t size; +}; + +struct QueueZoneTextFatThread : public QueueZoneTextFat +{ + uint32_t thread; +}; + +enum class LockType : uint8_t +{ + Lockable, + SharedLockable +}; + +struct QueueLockAnnounce +{ + uint32_t id; + int64_t time; + uint64_t lckloc; // ptr + LockType type; +}; + +struct QueueFiberEnter +{ + int64_t time; + uint64_t fiber; // ptr + uint32_t thread; +}; + +struct QueueFiberLeave +{ + int64_t time; + uint32_t thread; +}; + +struct QueueLockTerminate +{ + uint32_t id; + int64_t time; +}; + +struct QueueLockWait +{ + uint32_t thread; + uint32_t id; + int64_t time; +}; + +struct QueueLockObtain +{ + uint32_t thread; + uint32_t id; + int64_t time; +}; + +struct QueueLockRelease +{ + uint32_t thread; + uint32_t id; + int64_t time; +}; + +struct QueueLockMark +{ + uint32_t thread; + uint32_t id; + uint64_t srcloc; // ptr +}; + +struct QueueLockName +{ + uint32_t id; +}; + +struct QueueLockNameFat : public QueueLockName +{ + uint64_t name; // ptr + uint16_t size; +}; + +enum class PlotDataType : uint8_t +{ + Float, + Double, + Int +}; + +struct QueuePlotData +{ + uint64_t name; // ptr + int64_t time; + PlotDataType type; + union + { + double d; + float f; + int64_t i; + } data; +}; + +struct QueueMessage +{ + int64_t time; +}; + +struct QueueMessageColor : public QueueMessage +{ + uint8_t r; + uint8_t g; + uint8_t b; +}; + +struct QueueMessageLiteral : public QueueMessage +{ + uint64_t text; // ptr +}; + +struct QueueMessageLiteralThread : public QueueMessageLiteral +{ + uint32_t thread; +}; + +struct QueueMessageColorLiteral : public QueueMessageColor +{ + uint64_t text; // ptr +}; + +struct QueueMessageColorLiteralThread : public QueueMessageColorLiteral +{ + uint32_t thread; +}; + +struct QueueMessageFat : public QueueMessage +{ + uint64_t text; // ptr + uint16_t size; +}; + +struct QueueMessageFatThread : public QueueMessageFat +{ + uint32_t thread; +}; + +struct QueueMessageColorFat : public QueueMessageColor +{ + uint64_t text; // ptr + uint16_t size; +}; + +struct QueueMessageColorFatThread : public QueueMessageColorFat +{ + uint32_t thread; +}; + +// Don't change order, only add new entries at the end, this is also used on trace dumps! +enum class GpuContextType : uint8_t +{ + Invalid, + OpenGl, + Vulkan, + OpenCL, + Direct3D12, + Direct3D11 +}; + +enum GpuContextFlags : uint8_t +{ + GpuContextCalibration = 1 << 0 +}; + +struct QueueGpuNewContext +{ + int64_t cpuTime; + int64_t gpuTime; + uint32_t thread; + float period; + uint8_t context; + GpuContextFlags flags; + GpuContextType type; +}; + +struct QueueGpuZoneBeginLean +{ + int64_t cpuTime; + uint32_t thread; + uint16_t queryId; + uint8_t context; +}; + +struct QueueGpuZoneBegin : public QueueGpuZoneBeginLean +{ + uint64_t srcloc; +}; + +struct QueueGpuZoneEnd +{ + int64_t cpuTime; + uint32_t thread; + uint16_t queryId; + uint8_t context; +}; + +struct QueueGpuTime +{ + int64_t gpuTime; + uint16_t queryId; + uint8_t context; +}; + +struct QueueGpuCalibration +{ + int64_t gpuTime; + int64_t cpuTime; + int64_t cpuDelta; + uint8_t context; +}; + +struct QueueGpuContextName +{ + uint8_t context; +}; + +struct QueueGpuContextNameFat : public QueueGpuContextName +{ + uint64_t ptr; + uint16_t size; +}; + +struct QueueMemNamePayload +{ + uint64_t name; +}; + +struct QueueMemAlloc +{ + int64_t time; + uint32_t thread; + uint64_t ptr; + char size[6]; +}; + +struct QueueMemFree +{ + int64_t time; + uint32_t thread; + uint64_t ptr; +}; + +struct QueueCallstackFat +{ + uint64_t ptr; +}; + +struct QueueCallstackFatThread : public QueueCallstackFat +{ + uint32_t thread; +}; + +struct QueueCallstackAllocFat +{ + uint64_t ptr; + uint64_t nativePtr; +}; + +struct QueueCallstackAllocFatThread : public QueueCallstackAllocFat +{ + uint32_t thread; +}; + +struct QueueCallstackSample +{ + int64_t time; + uint32_t thread; +}; + +struct QueueCallstackSampleFat : public QueueCallstackSample +{ + uint64_t ptr; +}; + +struct QueueCallstackFrameSize +{ + uint64_t ptr; + uint8_t size; +}; + +struct QueueCallstackFrameSizeFat : public QueueCallstackFrameSize +{ + uint64_t data; + uint64_t imageName; +}; + +struct QueueCallstackFrame +{ + uint32_t line; + uint64_t symAddr; + uint32_t symLen; +}; + +struct QueueSymbolInformation +{ + uint32_t line; + uint64_t symAddr; +}; + +struct QueueSymbolInformationFat : public QueueSymbolInformation +{ + uint64_t fileString; + uint8_t needFree; +}; + +struct QueueCodeInformation +{ + uint64_t symAddr; + uint32_t line; + uint64_t ptrOffset; +}; + +struct QueueCodeInformationFat : public QueueCodeInformation +{ + uint64_t fileString; + uint8_t needFree; +}; + +struct QueueCrashReport +{ + int64_t time; + uint64_t text; // ptr +}; + +struct QueueCrashReportThread +{ + uint32_t thread; +}; + +struct QueueSysTime +{ + int64_t time; + float sysTime; +}; + +struct QueueContextSwitch +{ + int64_t time; + uint32_t oldThread; + uint32_t newThread; + uint8_t cpu; + uint8_t reason; + uint8_t state; +}; + +struct QueueThreadWakeup +{ + int64_t time; + uint32_t thread; +}; + +struct QueueTidToPid +{ + uint64_t tid; + uint64_t pid; +}; + +struct QueueHwSample +{ + uint64_t ip; + int64_t time; +}; + +enum class PlotFormatType : uint8_t +{ + Number, + Memory, + Percentage +}; + +struct QueuePlotConfig +{ + uint64_t name; // ptr + uint8_t type; +}; + +struct QueueParamSetup +{ + uint32_t idx; + uint64_t name; // ptr + uint8_t isBool; + int32_t val; +}; + +struct QueueCpuTopology +{ + uint32_t package; + uint32_t core; + uint32_t thread; +}; + +struct QueueExternalNameMetadata +{ + uint64_t thread; + uint64_t name; + uint64_t threadName; +}; + +struct QueueSymbolCodeMetadata +{ + uint64_t symbol; + uint64_t ptr; + uint32_t size; +}; + +struct QueueHeader +{ + union + { + QueueType type; + uint8_t idx; + }; +}; + +struct QueueItem +{ + QueueHeader hdr; + union + { + QueueThreadContext threadCtx; + QueueZoneBegin zoneBegin; + QueueZoneBeginLean zoneBeginLean; + QueueZoneBeginThread zoneBeginThread; + QueueZoneEnd zoneEnd; + QueueZoneEndThread zoneEndThread; + QueueZoneValidation zoneValidation; + QueueZoneValidationThread zoneValidationThread; + QueueZoneColor zoneColor; + QueueZoneColorThread zoneColorThread; + QueueZoneValue zoneValue; + QueueZoneValueThread zoneValueThread; + QueueStringTransfer stringTransfer; + QueueFrameMark frameMark; + QueueFrameImage frameImage; + QueueFrameImageFat frameImageFat; + QueueSourceLocation srcloc; + QueueZoneTextFat zoneTextFat; + QueueZoneTextFatThread zoneTextFatThread; + QueueLockAnnounce lockAnnounce; + QueueLockTerminate lockTerminate; + QueueLockWait lockWait; + QueueLockObtain lockObtain; + QueueLockRelease lockRelease; + QueueLockMark lockMark; + QueueLockName lockName; + QueueLockNameFat lockNameFat; + QueuePlotData plotData; + QueueMessage message; + QueueMessageColor messageColor; + QueueMessageLiteral messageLiteral; + QueueMessageLiteralThread messageLiteralThread; + QueueMessageColorLiteral messageColorLiteral; + QueueMessageColorLiteralThread messageColorLiteralThread; + QueueMessageFat messageFat; + QueueMessageFatThread messageFatThread; + QueueMessageColorFat messageColorFat; + QueueMessageColorFatThread messageColorFatThread; + QueueGpuNewContext gpuNewContext; + QueueGpuZoneBegin gpuZoneBegin; + QueueGpuZoneBeginLean gpuZoneBeginLean; + QueueGpuZoneEnd gpuZoneEnd; + QueueGpuTime gpuTime; + QueueGpuCalibration gpuCalibration; + QueueGpuContextName gpuContextName; + QueueGpuContextNameFat gpuContextNameFat; + QueueMemAlloc memAlloc; + QueueMemFree memFree; + QueueMemNamePayload memName; + QueueCallstackFat callstackFat; + QueueCallstackFatThread callstackFatThread; + QueueCallstackAllocFat callstackAllocFat; + QueueCallstackAllocFatThread callstackAllocFatThread; + QueueCallstackSample callstackSample; + QueueCallstackSampleFat callstackSampleFat; + QueueCallstackFrameSize callstackFrameSize; + QueueCallstackFrameSizeFat callstackFrameSizeFat; + QueueCallstackFrame callstackFrame; + QueueSymbolInformation symbolInformation; + QueueSymbolInformationFat symbolInformationFat; + QueueCodeInformation codeInformation; + QueueCodeInformationFat codeInformationFat; + QueueCrashReport crashReport; + QueueCrashReportThread crashReportThread; + QueueSysTime sysTime; + QueueContextSwitch contextSwitch; + QueueThreadWakeup threadWakeup; + QueueTidToPid tidToPid; + QueueHwSample hwSample; + QueuePlotConfig plotConfig; + QueueParamSetup paramSetup; + QueueCpuTopology cpuTopology; + QueueExternalNameMetadata externalNameMetadata; + QueueSymbolCodeMetadata symbolCodeMetadata; + QueueFiberEnter fiberEnter; + QueueFiberLeave fiberLeave; + }; +}; +#pragma pack() + + +enum { QueueItemSize = sizeof( QueueItem ) }; + +static constexpr size_t QueueDataSize[] = { + sizeof( QueueHeader ), // zone text + sizeof( QueueHeader ), // zone name + sizeof( QueueHeader ) + sizeof( QueueMessage ), + sizeof( QueueHeader ) + sizeof( QueueMessageColor ), + sizeof( QueueHeader ) + sizeof( QueueMessage ), // callstack + sizeof( QueueHeader ) + sizeof( QueueMessageColor ), // callstack + sizeof( QueueHeader ) + sizeof( QueueMessage ), // app info + sizeof( QueueHeader ) + sizeof( QueueZoneBeginLean ), // allocated source location + sizeof( QueueHeader ) + sizeof( QueueZoneBeginLean ), // allocated source location, callstack + sizeof( QueueHeader ), // callstack memory + sizeof( QueueHeader ), // callstack + sizeof( QueueHeader ), // callstack alloc + sizeof( QueueHeader ) + sizeof( QueueCallstackSample ), + sizeof( QueueHeader ) + sizeof( QueueCallstackSample ), // context switch + sizeof( QueueHeader ) + sizeof( QueueFrameImage ), + sizeof( QueueHeader ) + sizeof( QueueZoneBegin ), + sizeof( QueueHeader ) + sizeof( QueueZoneBegin ), // callstack + sizeof( QueueHeader ) + sizeof( QueueZoneEnd ), + sizeof( QueueHeader ) + sizeof( QueueLockWait ), + sizeof( QueueHeader ) + sizeof( QueueLockObtain ), + sizeof( QueueHeader ) + sizeof( QueueLockRelease ), + sizeof( QueueHeader ) + sizeof( QueueLockWait ), // shared + sizeof( QueueHeader ) + sizeof( QueueLockObtain ), // shared + sizeof( QueueHeader ) + sizeof( QueueLockRelease ), // shared + sizeof( QueueHeader ) + sizeof( QueueLockName ), + sizeof( QueueHeader ) + sizeof( QueueMemAlloc ), + sizeof( QueueHeader ) + sizeof( QueueMemAlloc ), // named + sizeof( QueueHeader ) + sizeof( QueueMemFree ), + sizeof( QueueHeader ) + sizeof( QueueMemFree ), // named + sizeof( QueueHeader ) + sizeof( QueueMemAlloc ), // callstack + sizeof( QueueHeader ) + sizeof( QueueMemAlloc ), // callstack, named + sizeof( QueueHeader ) + sizeof( QueueMemFree ), // callstack + sizeof( QueueHeader ) + sizeof( QueueMemFree ), // callstack, named + sizeof( QueueHeader ) + sizeof( QueueGpuZoneBegin ), + sizeof( QueueHeader ) + sizeof( QueueGpuZoneBegin ), // callstack + sizeof( QueueHeader ) + sizeof( QueueGpuZoneBeginLean ),// allocated source location + sizeof( QueueHeader ) + sizeof( QueueGpuZoneBeginLean ),// allocated source location, callstack + sizeof( QueueHeader ) + sizeof( QueueGpuZoneEnd ), + sizeof( QueueHeader ) + sizeof( QueueGpuZoneBegin ), // serial + sizeof( QueueHeader ) + sizeof( QueueGpuZoneBegin ), // serial, callstack + sizeof( QueueHeader ) + sizeof( QueueGpuZoneBeginLean ),// serial, allocated source location + sizeof( QueueHeader ) + sizeof( QueueGpuZoneBeginLean ),// serial, allocated source location, callstack + sizeof( QueueHeader ) + sizeof( QueueGpuZoneEnd ), // serial + sizeof( QueueHeader ) + sizeof( QueuePlotData ), + sizeof( QueueHeader ) + sizeof( QueueContextSwitch ), + sizeof( QueueHeader ) + sizeof( QueueThreadWakeup ), + sizeof( QueueHeader ) + sizeof( QueueGpuTime ), + sizeof( QueueHeader ) + sizeof( QueueGpuContextName ), + sizeof( QueueHeader ) + sizeof( QueueCallstackFrameSize ), + sizeof( QueueHeader ) + sizeof( QueueSymbolInformation ), + sizeof( QueueHeader ) + sizeof( QueueCodeInformation ), + sizeof( QueueHeader ), // ExternalNameMetadata - not for wire transfer + sizeof( QueueHeader ), // SymbolCodeMetadata - not for wire transfer + sizeof( QueueHeader ) + sizeof( QueueFiberEnter ), + sizeof( QueueHeader ) + sizeof( QueueFiberLeave ), + // above items must be first + sizeof( QueueHeader ), // terminate + sizeof( QueueHeader ), // keep alive + sizeof( QueueHeader ) + sizeof( QueueThreadContext ), + sizeof( QueueHeader ) + sizeof( QueueGpuCalibration ), + sizeof( QueueHeader ), // crash + sizeof( QueueHeader ) + sizeof( QueueCrashReport ), + sizeof( QueueHeader ) + sizeof( QueueZoneValidation ), + sizeof( QueueHeader ) + sizeof( QueueZoneColor ), + sizeof( QueueHeader ) + sizeof( QueueZoneValue ), + sizeof( QueueHeader ) + sizeof( QueueFrameMark ), // continuous frames + sizeof( QueueHeader ) + sizeof( QueueFrameMark ), // start + sizeof( QueueHeader ) + sizeof( QueueFrameMark ), // end + sizeof( QueueHeader ) + sizeof( QueueSourceLocation ), + sizeof( QueueHeader ) + sizeof( QueueLockAnnounce ), + sizeof( QueueHeader ) + sizeof( QueueLockTerminate ), + sizeof( QueueHeader ) + sizeof( QueueLockMark ), + sizeof( QueueHeader ) + sizeof( QueueMessageLiteral ), + sizeof( QueueHeader ) + sizeof( QueueMessageColorLiteral ), + sizeof( QueueHeader ) + sizeof( QueueMessageLiteral ), // callstack + sizeof( QueueHeader ) + sizeof( QueueMessageColorLiteral ), // callstack + sizeof( QueueHeader ) + sizeof( QueueGpuNewContext ), + sizeof( QueueHeader ) + sizeof( QueueCallstackFrame ), + sizeof( QueueHeader ) + sizeof( QueueSysTime ), + sizeof( QueueHeader ) + sizeof( QueueTidToPid ), + sizeof( QueueHeader ) + sizeof( QueueHwSample ), // cpu cycle + sizeof( QueueHeader ) + sizeof( QueueHwSample ), // instruction retired + sizeof( QueueHeader ) + sizeof( QueueHwSample ), // cache reference + sizeof( QueueHeader ) + sizeof( QueueHwSample ), // cache miss + sizeof( QueueHeader ) + sizeof( QueueHwSample ), // branch retired + sizeof( QueueHeader ) + sizeof( QueueHwSample ), // branch miss + sizeof( QueueHeader ) + sizeof( QueuePlotConfig ), + sizeof( QueueHeader ) + sizeof( QueueParamSetup ), + sizeof( QueueHeader ), // server query acknowledgement + sizeof( QueueHeader ), // source code not available + sizeof( QueueHeader ), // symbol code not available + sizeof( QueueHeader ) + sizeof( QueueCpuTopology ), + sizeof( QueueHeader ), // single string data + sizeof( QueueHeader ), // second string data + sizeof( QueueHeader ) + sizeof( QueueMemNamePayload ), + // keep all QueueStringTransfer below + sizeof( QueueHeader ) + sizeof( QueueStringTransfer ), // string data + sizeof( QueueHeader ) + sizeof( QueueStringTransfer ), // thread name + sizeof( QueueHeader ) + sizeof( QueueStringTransfer ), // plot name + sizeof( QueueHeader ) + sizeof( QueueStringTransfer ), // allocated source location payload + sizeof( QueueHeader ) + sizeof( QueueStringTransfer ), // callstack payload + sizeof( QueueHeader ) + sizeof( QueueStringTransfer ), // callstack alloc payload + sizeof( QueueHeader ) + sizeof( QueueStringTransfer ), // frame name + sizeof( QueueHeader ) + sizeof( QueueStringTransfer ), // frame image data + sizeof( QueueHeader ) + sizeof( QueueStringTransfer ), // external name + sizeof( QueueHeader ) + sizeof( QueueStringTransfer ), // external thread name + sizeof( QueueHeader ) + sizeof( QueueStringTransfer ), // symbol code + sizeof( QueueHeader ) + sizeof( QueueStringTransfer ), // source code + sizeof( QueueHeader ) + sizeof( QueueStringTransfer ), // fiber name +}; + +static_assert( QueueItemSize == 32, "Queue item size not 32 bytes" ); +static_assert( sizeof( QueueDataSize ) / sizeof( size_t ) == (uint8_t)QueueType::NUM_TYPES, "QueueDataSize mismatch" ); +static_assert( sizeof( void* ) <= sizeof( uint64_t ), "Pointer size > 8 bytes" ); +static_assert( sizeof( void* ) == sizeof( uintptr_t ), "Pointer size != uintptr_t" ); + +} + +#endif diff --git a/3rdparty/tracy/tracy/common/TracySocket.cpp b/3rdparty/tracy/tracy/common/TracySocket.cpp new file mode 100644 index 0000000..176bbc7 --- /dev/null +++ b/3rdparty/tracy/tracy/common/TracySocket.cpp @@ -0,0 +1,749 @@ +#include <assert.h> +#include <inttypes.h> +#include <new> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <sys/types.h> + +#include "TracyAlloc.hpp" +#include "TracySocket.hpp" +#include "TracySystem.hpp" + +#ifdef _WIN32 +# ifndef NOMINMAX +# define NOMINMAX +# endif +# include <winsock2.h> +# include <ws2tcpip.h> +# ifdef _MSC_VER +# pragma warning(disable:4244) +# pragma warning(disable:4267) +# endif +# define poll WSAPoll +#else +# include <arpa/inet.h> +# include <sys/socket.h> +# include <sys/param.h> +# include <errno.h> +# include <fcntl.h> +# include <netinet/in.h> +# include <netdb.h> +# include <unistd.h> +# include <poll.h> +#endif + +#ifndef MSG_NOSIGNAL +# define MSG_NOSIGNAL 0 +#endif + +namespace tracy +{ + +#ifdef _WIN32 +typedef SOCKET socket_t; +#else +typedef int socket_t; +#endif + +#ifdef _WIN32 +struct __wsinit +{ + __wsinit() + { + WSADATA wsaData; + if( WSAStartup( MAKEWORD( 2, 2 ), &wsaData ) != 0 ) + { + fprintf( stderr, "Cannot init winsock.\n" ); + exit( 1 ); + } + } +}; + +void InitWinSock() +{ + static __wsinit init; +} +#endif + + +enum { BufSize = 128 * 1024 }; + +Socket::Socket() + : m_buf( (char*)tracy_malloc( BufSize ) ) + , m_bufPtr( nullptr ) + , m_sock( -1 ) + , m_bufLeft( 0 ) + , m_ptr( nullptr ) +{ +#ifdef _WIN32 + InitWinSock(); +#endif +} + +Socket::Socket( int sock ) + : m_buf( (char*)tracy_malloc( BufSize ) ) + , m_bufPtr( nullptr ) + , m_sock( sock ) + , m_bufLeft( 0 ) + , m_ptr( nullptr ) +{ +} + +Socket::~Socket() +{ + tracy_free( m_buf ); + if( m_sock.load( std::memory_order_relaxed ) != -1 ) + { + Close(); + } + if( m_ptr ) + { + freeaddrinfo( m_res ); +#ifdef _WIN32 + closesocket( m_connSock ); +#else + close( m_connSock ); +#endif + } +} + +bool Socket::Connect( const char* addr, uint16_t port ) +{ + assert( !IsValid() ); + + if( m_ptr ) + { + const auto c = connect( m_connSock, m_ptr->ai_addr, m_ptr->ai_addrlen ); + if( c == -1 ) + { +#if defined _WIN32 + const auto err = WSAGetLastError(); + if( err == WSAEALREADY || err == WSAEINPROGRESS ) return false; + if( err != WSAEISCONN ) + { + freeaddrinfo( m_res ); + closesocket( m_connSock ); + m_ptr = nullptr; + return false; + } +#else + const auto err = errno; + if( err == EALREADY || err == EINPROGRESS ) return false; + if( err != EISCONN ) + { + freeaddrinfo( m_res ); + close( m_connSock ); + m_ptr = nullptr; + return false; + } +#endif + } + +#if defined _WIN32 + u_long nonblocking = 0; + ioctlsocket( m_connSock, FIONBIO, &nonblocking ); +#else + int flags = fcntl( m_connSock, F_GETFL, 0 ); + fcntl( m_connSock, F_SETFL, flags & ~O_NONBLOCK ); +#endif + m_sock.store( m_connSock, std::memory_order_relaxed ); + freeaddrinfo( m_res ); + m_ptr = nullptr; + return true; + } + + struct addrinfo hints; + struct addrinfo *res, *ptr; + + memset( &hints, 0, sizeof( hints ) ); + hints.ai_family = AF_UNSPEC; + hints.ai_socktype = SOCK_STREAM; + + char portbuf[32]; + sprintf( portbuf, "%" PRIu16, port ); + + if( getaddrinfo( addr, portbuf, &hints, &res ) != 0 ) return false; + int sock = 0; + for( ptr = res; ptr; ptr = ptr->ai_next ) + { + if( ( sock = socket( ptr->ai_family, ptr->ai_socktype, ptr->ai_protocol ) ) == -1 ) continue; +#if defined __APPLE__ + int val = 1; + setsockopt( sock, SOL_SOCKET, SO_NOSIGPIPE, &val, sizeof( val ) ); +#endif +#if defined _WIN32 + u_long nonblocking = 1; + ioctlsocket( sock, FIONBIO, &nonblocking ); +#else + int flags = fcntl( sock, F_GETFL, 0 ); + fcntl( sock, F_SETFL, flags | O_NONBLOCK ); +#endif + if( connect( sock, ptr->ai_addr, ptr->ai_addrlen ) == 0 ) + { + break; + } + else + { +#if defined _WIN32 + const auto err = WSAGetLastError(); + if( err != WSAEWOULDBLOCK ) + { + closesocket( sock ); + continue; + } +#else + if( errno != EINPROGRESS ) + { + close( sock ); + continue; + } +#endif + } + m_res = res; + m_ptr = ptr; + m_connSock = sock; + return false; + } + freeaddrinfo( res ); + if( !ptr ) return false; + +#if defined _WIN32 + u_long nonblocking = 0; + ioctlsocket( sock, FIONBIO, &nonblocking ); +#else + int flags = fcntl( sock, F_GETFL, 0 ); + fcntl( sock, F_SETFL, flags & ~O_NONBLOCK ); +#endif + + m_sock.store( sock, std::memory_order_relaxed ); + return true; +} + +bool Socket::ConnectBlocking( const char* addr, uint16_t port ) +{ + assert( !IsValid() ); + assert( !m_ptr ); + + struct addrinfo hints; + struct addrinfo *res, *ptr; + + memset( &hints, 0, sizeof( hints ) ); + hints.ai_family = AF_UNSPEC; + hints.ai_socktype = SOCK_STREAM; + + char portbuf[32]; + sprintf( portbuf, "%" PRIu16, port ); + + if( getaddrinfo( addr, portbuf, &hints, &res ) != 0 ) return false; + int sock = 0; + for( ptr = res; ptr; ptr = ptr->ai_next ) + { + if( ( sock = socket( ptr->ai_family, ptr->ai_socktype, ptr->ai_protocol ) ) == -1 ) continue; +#if defined __APPLE__ + int val = 1; + setsockopt( sock, SOL_SOCKET, SO_NOSIGPIPE, &val, sizeof( val ) ); +#endif + if( connect( sock, ptr->ai_addr, ptr->ai_addrlen ) == -1 ) + { +#ifdef _WIN32 + closesocket( sock ); +#else + close( sock ); +#endif + continue; + } + break; + } + freeaddrinfo( res ); + if( !ptr ) return false; + + m_sock.store( sock, std::memory_order_relaxed ); + return true; +} + +void Socket::Close() +{ + const auto sock = m_sock.load( std::memory_order_relaxed ); + assert( sock != -1 ); +#ifdef _WIN32 + closesocket( sock ); +#else + close( sock ); +#endif + m_sock.store( -1, std::memory_order_relaxed ); +} + +int Socket::Send( const void* _buf, int len ) +{ + const auto sock = m_sock.load( std::memory_order_relaxed ); + auto buf = (const char*)_buf; + assert( sock != -1 ); + auto start = buf; + while( len > 0 ) + { + auto ret = send( sock, buf, len, MSG_NOSIGNAL ); + if( ret == -1 ) return -1; + len -= ret; + buf += ret; + } + return int( buf - start ); +} + +int Socket::GetSendBufSize() +{ + const auto sock = m_sock.load( std::memory_order_relaxed ); + int bufSize; +#if defined _WIN32 + int sz = sizeof( bufSize ); + getsockopt( sock, SOL_SOCKET, SO_SNDBUF, (char*)&bufSize, &sz ); +#else + socklen_t sz = sizeof( bufSize ); + getsockopt( sock, SOL_SOCKET, SO_SNDBUF, &bufSize, &sz ); +#endif + return bufSize; +} + +int Socket::RecvBuffered( void* buf, int len, int timeout ) +{ + if( len <= m_bufLeft ) + { + memcpy( buf, m_bufPtr, len ); + m_bufPtr += len; + m_bufLeft -= len; + return len; + } + + if( m_bufLeft > 0 ) + { + memcpy( buf, m_bufPtr, m_bufLeft ); + const auto ret = m_bufLeft; + m_bufLeft = 0; + return ret; + } + + if( len >= BufSize ) return Recv( buf, len, timeout ); + + m_bufLeft = Recv( m_buf, BufSize, timeout ); + if( m_bufLeft <= 0 ) return m_bufLeft; + + const auto sz = len < m_bufLeft ? len : m_bufLeft; + memcpy( buf, m_buf, sz ); + m_bufPtr = m_buf + sz; + m_bufLeft -= sz; + return sz; +} + +int Socket::Recv( void* _buf, int len, int timeout ) +{ + const auto sock = m_sock.load( std::memory_order_relaxed ); + auto buf = (char*)_buf; + + struct pollfd fd; + fd.fd = (socket_t)sock; + fd.events = POLLIN; + + if( poll( &fd, 1, timeout ) > 0 ) + { + return recv( sock, buf, len, 0 ); + } + else + { + return -1; + } +} + +int Socket::ReadUpTo( void* _buf, int len, int timeout ) +{ + const auto sock = m_sock.load( std::memory_order_relaxed ); + auto buf = (char*)_buf; + + int rd = 0; + while( len > 0 ) + { + const auto res = recv( sock, buf, len, 0 ); + if( res == 0 ) break; + if( res == -1 ) return -1; + len -= res; + rd += res; + buf += res; + } + return rd; +} + +bool Socket::Read( void* buf, int len, int timeout ) +{ + auto cbuf = (char*)buf; + while( len > 0 ) + { + if( !ReadImpl( cbuf, len, timeout ) ) return false; + } + return true; +} + +bool Socket::ReadImpl( char*& buf, int& len, int timeout ) +{ + const auto sz = RecvBuffered( buf, len, timeout ); + switch( sz ) + { + case 0: + return false; + case -1: +#ifdef _WIN32 + { + auto err = WSAGetLastError(); + if( err == WSAECONNABORTED || err == WSAECONNRESET ) return false; + } +#endif + break; + default: + len -= sz; + buf += sz; + break; + } + return true; +} + +bool Socket::ReadRaw( void* _buf, int len, int timeout ) +{ + auto buf = (char*)_buf; + while( len > 0 ) + { + const auto sz = Recv( buf, len, timeout ); + if( sz <= 0 ) return false; + len -= sz; + buf += sz; + } + return true; +} + +bool Socket::HasData() +{ + const auto sock = m_sock.load( std::memory_order_relaxed ); + if( m_bufLeft > 0 ) return true; + + struct pollfd fd; + fd.fd = (socket_t)sock; + fd.events = POLLIN; + + return poll( &fd, 1, 0 ) > 0; +} + +bool Socket::IsValid() const +{ + return m_sock.load( std::memory_order_relaxed ) >= 0; +} + + +ListenSocket::ListenSocket() + : m_sock( -1 ) +{ +#ifdef _WIN32 + InitWinSock(); +#endif +} + +ListenSocket::~ListenSocket() +{ + if( m_sock != -1 ) Close(); +} + +static int addrinfo_and_socket_for_family( uint16_t port, int ai_family, struct addrinfo** res ) +{ + struct addrinfo hints; + memset( &hints, 0, sizeof( hints ) ); + hints.ai_family = ai_family; + hints.ai_socktype = SOCK_STREAM; +#ifndef TRACY_ONLY_LOCALHOST + const char* onlyLocalhost = GetEnvVar( "TRACY_ONLY_LOCALHOST" ); + if( !onlyLocalhost || onlyLocalhost[0] != '1' ) + { + hints.ai_flags = AI_PASSIVE; + } +#endif + char portbuf[32]; + sprintf( portbuf, "%" PRIu16, port ); + if( getaddrinfo( nullptr, portbuf, &hints, res ) != 0 ) return -1; + int sock = socket( (*res)->ai_family, (*res)->ai_socktype, (*res)->ai_protocol ); + if (sock == -1) freeaddrinfo( *res ); + return sock; +} + +bool ListenSocket::Listen( uint16_t port, int backlog ) +{ + assert( m_sock == -1 ); + + struct addrinfo* res = nullptr; + +#if !defined TRACY_ONLY_IPV4 && !defined TRACY_ONLY_LOCALHOST + const char* onlyIPv4 = GetEnvVar( "TRACY_ONLY_IPV4" ); + if( !onlyIPv4 || onlyIPv4[0] != '1' ) + { + m_sock = addrinfo_and_socket_for_family( port, AF_INET6, &res ); + } +#endif + if (m_sock == -1) + { + // IPV6 protocol may not be available/is disabled. Try to create a socket + // with the IPV4 protocol + m_sock = addrinfo_and_socket_for_family( port, AF_INET, &res ); + if( m_sock == -1 ) return false; + } +#if defined _WIN32 + unsigned long val = 0; + setsockopt( m_sock, IPPROTO_IPV6, IPV6_V6ONLY, (const char*)&val, sizeof( val ) ); +#elif defined BSD + int val = 0; + setsockopt( m_sock, IPPROTO_IPV6, IPV6_V6ONLY, (const char*)&val, sizeof( val ) ); + val = 1; + setsockopt( m_sock, SOL_SOCKET, SO_REUSEADDR, &val, sizeof( val ) ); +#else + int val = 1; + setsockopt( m_sock, SOL_SOCKET, SO_REUSEADDR, &val, sizeof( val ) ); +#endif + if( bind( m_sock, res->ai_addr, res->ai_addrlen ) == -1 ) { freeaddrinfo( res ); Close(); return false; } + if( listen( m_sock, backlog ) == -1 ) { freeaddrinfo( res ); Close(); return false; } + freeaddrinfo( res ); + return true; +} + +Socket* ListenSocket::Accept() +{ + struct sockaddr_storage remote; + socklen_t sz = sizeof( remote ); + + struct pollfd fd; + fd.fd = (socket_t)m_sock; + fd.events = POLLIN; + + if( poll( &fd, 1, 10 ) > 0 ) + { + int sock = accept( m_sock, (sockaddr*)&remote, &sz); + if( sock == -1 ) return nullptr; + +#if defined __APPLE__ + int val = 1; + setsockopt( sock, SOL_SOCKET, SO_NOSIGPIPE, &val, sizeof( val ) ); +#endif + + auto ptr = (Socket*)tracy_malloc( sizeof( Socket ) ); + new(ptr) Socket( sock ); + return ptr; + } + else + { + return nullptr; + } +} + +void ListenSocket::Close() +{ + assert( m_sock != -1 ); +#ifdef _WIN32 + closesocket( m_sock ); +#else + close( m_sock ); +#endif + m_sock = -1; +} + +UdpBroadcast::UdpBroadcast() + : m_sock( -1 ) +{ +#ifdef _WIN32 + InitWinSock(); +#endif +} + +UdpBroadcast::~UdpBroadcast() +{ + if( m_sock != -1 ) Close(); +} + +bool UdpBroadcast::Open( const char* addr, uint16_t port ) +{ + assert( m_sock == -1 ); + + struct addrinfo hints; + struct addrinfo *res, *ptr; + + memset( &hints, 0, sizeof( hints ) ); + hints.ai_family = AF_INET; + hints.ai_socktype = SOCK_DGRAM; + + char portbuf[32]; + sprintf( portbuf, "%" PRIu16, port ); + + if( getaddrinfo( addr, portbuf, &hints, &res ) != 0 ) return false; + int sock = 0; + for( ptr = res; ptr; ptr = ptr->ai_next ) + { + if( ( sock = socket( ptr->ai_family, ptr->ai_socktype, ptr->ai_protocol ) ) == -1 ) continue; +#if defined __APPLE__ + int val = 1; + setsockopt( sock, SOL_SOCKET, SO_NOSIGPIPE, &val, sizeof( val ) ); +#endif +#if defined _WIN32 + unsigned long broadcast = 1; + if( setsockopt( sock, SOL_SOCKET, SO_BROADCAST, (const char*)&broadcast, sizeof( broadcast ) ) == -1 ) +#else + int broadcast = 1; + if( setsockopt( sock, SOL_SOCKET, SO_BROADCAST, &broadcast, sizeof( broadcast ) ) == -1 ) +#endif + { +#ifdef _WIN32 + closesocket( sock ); +#else + close( sock ); +#endif + continue; + } + break; + } + freeaddrinfo( res ); + if( !ptr ) return false; + + m_sock = sock; + inet_pton( AF_INET, addr, &m_addr ); + return true; +} + +void UdpBroadcast::Close() +{ + assert( m_sock != -1 ); +#ifdef _WIN32 + closesocket( m_sock ); +#else + close( m_sock ); +#endif + m_sock = -1; +} + +int UdpBroadcast::Send( uint16_t port, const void* data, int len ) +{ + assert( m_sock != -1 ); + struct sockaddr_in addr; + addr.sin_family = AF_INET; + addr.sin_port = htons( port ); + addr.sin_addr.s_addr = m_addr; + return sendto( m_sock, (const char*)data, len, MSG_NOSIGNAL, (sockaddr*)&addr, sizeof( addr ) ); +} + +IpAddress::IpAddress() + : m_number( 0 ) +{ + *m_text = '\0'; +} + +IpAddress::~IpAddress() +{ +} + +void IpAddress::Set( const struct sockaddr& addr ) +{ +#if defined _WIN32 && ( !defined NTDDI_WIN10 || NTDDI_VERSION < NTDDI_WIN10 ) + struct sockaddr_in tmp; + memcpy( &tmp, &addr, sizeof( tmp ) ); + auto ai = &tmp; +#else + auto ai = (const struct sockaddr_in*)&addr; +#endif + inet_ntop( AF_INET, &ai->sin_addr, m_text, 17 ); + m_number = ai->sin_addr.s_addr; +} + +UdpListen::UdpListen() + : m_sock( -1 ) +{ +#ifdef _WIN32 + InitWinSock(); +#endif +} + +UdpListen::~UdpListen() +{ + if( m_sock != -1 ) Close(); +} + +bool UdpListen::Listen( uint16_t port ) +{ + assert( m_sock == -1 ); + + int sock; + if( ( sock = socket( AF_INET, SOCK_DGRAM, 0 ) ) == -1 ) return false; + +#if defined __APPLE__ + int val = 1; + setsockopt( sock, SOL_SOCKET, SO_NOSIGPIPE, &val, sizeof( val ) ); +#endif +#if defined _WIN32 + unsigned long reuse = 1; + setsockopt( m_sock, SOL_SOCKET, SO_REUSEADDR, (const char*)&reuse, sizeof( reuse ) ); +#else + int reuse = 1; + setsockopt( m_sock, SOL_SOCKET, SO_REUSEADDR, &reuse, sizeof( reuse ) ); +#endif +#if defined _WIN32 + unsigned long broadcast = 1; + if( setsockopt( sock, SOL_SOCKET, SO_BROADCAST, (const char*)&broadcast, sizeof( broadcast ) ) == -1 ) +#else + int broadcast = 1; + if( setsockopt( sock, SOL_SOCKET, SO_BROADCAST, &broadcast, sizeof( broadcast ) ) == -1 ) +#endif + { +#ifdef _WIN32 + closesocket( sock ); +#else + close( sock ); +#endif + return false; + } + + struct sockaddr_in addr; + addr.sin_family = AF_INET; + addr.sin_port = htons( port ); + addr.sin_addr.s_addr = INADDR_ANY; + + if( bind( sock, (sockaddr*)&addr, sizeof( addr ) ) == -1 ) + { +#ifdef _WIN32 + closesocket( sock ); +#else + close( sock ); +#endif + return false; + } + + m_sock = sock; + return true; +} + +void UdpListen::Close() +{ + assert( m_sock != -1 ); +#ifdef _WIN32 + closesocket( m_sock ); +#else + close( m_sock ); +#endif + m_sock = -1; +} + +const char* UdpListen::Read( size_t& len, IpAddress& addr, int timeout ) +{ + static char buf[2048]; + + struct pollfd fd; + fd.fd = (socket_t)m_sock; + fd.events = POLLIN; + if( poll( &fd, 1, timeout ) <= 0 ) return nullptr; + + sockaddr sa; + socklen_t salen = sizeof( struct sockaddr ); + len = (size_t)recvfrom( m_sock, buf, 2048, 0, &sa, &salen ); + addr.Set( sa ); + + return buf; +} + +} diff --git a/3rdparty/tracy/tracy/common/TracySocket.hpp b/3rdparty/tracy/tracy/common/TracySocket.hpp new file mode 100644 index 0000000..4de4cca --- /dev/null +++ b/3rdparty/tracy/tracy/common/TracySocket.hpp @@ -0,0 +1,156 @@ +#ifndef __TRACYSOCKET_HPP__ +#define __TRACYSOCKET_HPP__ + +#include <atomic> +#include <stdint.h> + +#include "TracyForceInline.hpp" + +struct addrinfo; +struct sockaddr; + +namespace tracy +{ + +#ifdef _WIN32 +void InitWinSock(); +#endif + +class Socket +{ +public: + Socket(); + Socket( int sock ); + ~Socket(); + + bool Connect( const char* addr, uint16_t port ); + bool ConnectBlocking( const char* addr, uint16_t port ); + void Close(); + + int Send( const void* buf, int len ); + int GetSendBufSize(); + + int ReadUpTo( void* buf, int len, int timeout ); + bool Read( void* buf, int len, int timeout ); + + template<typename ShouldExit> + bool Read( void* buf, int len, int timeout, ShouldExit exitCb ) + { + auto cbuf = (char*)buf; + while( len > 0 ) + { + if( exitCb() ) return false; + if( !ReadImpl( cbuf, len, timeout ) ) return false; + } + return true; + } + + bool ReadRaw( void* buf, int len, int timeout ); + bool HasData(); + bool IsValid() const; + + Socket( const Socket& ) = delete; + Socket( Socket&& ) = delete; + Socket& operator=( const Socket& ) = delete; + Socket& operator=( Socket&& ) = delete; + +private: + int RecvBuffered( void* buf, int len, int timeout ); + int Recv( void* buf, int len, int timeout ); + + bool ReadImpl( char*& buf, int& len, int timeout ); + + char* m_buf; + char* m_bufPtr; + std::atomic<int> m_sock; + int m_bufLeft; + + struct addrinfo *m_res; + struct addrinfo *m_ptr; + int m_connSock; +}; + +class ListenSocket +{ +public: + ListenSocket(); + ~ListenSocket(); + + bool Listen( uint16_t port, int backlog ); + Socket* Accept(); + void Close(); + + ListenSocket( const ListenSocket& ) = delete; + ListenSocket( ListenSocket&& ) = delete; + ListenSocket& operator=( const ListenSocket& ) = delete; + ListenSocket& operator=( ListenSocket&& ) = delete; + +private: + int m_sock; +}; + +class UdpBroadcast +{ +public: + UdpBroadcast(); + ~UdpBroadcast(); + + bool Open( const char* addr, uint16_t port ); + void Close(); + + int Send( uint16_t port, const void* data, int len ); + + UdpBroadcast( const UdpBroadcast& ) = delete; + UdpBroadcast( UdpBroadcast&& ) = delete; + UdpBroadcast& operator=( const UdpBroadcast& ) = delete; + UdpBroadcast& operator=( UdpBroadcast&& ) = delete; + +private: + int m_sock; + uint32_t m_addr; +}; + +class IpAddress +{ +public: + IpAddress(); + ~IpAddress(); + + void Set( const struct sockaddr& addr ); + + uint32_t GetNumber() const { return m_number; } + const char* GetText() const { return m_text; } + + IpAddress( const IpAddress& ) = delete; + IpAddress( IpAddress&& ) = delete; + IpAddress& operator=( const IpAddress& ) = delete; + IpAddress& operator=( IpAddress&& ) = delete; + +private: + uint32_t m_number; + char m_text[17]; +}; + +class UdpListen +{ +public: + UdpListen(); + ~UdpListen(); + + bool Listen( uint16_t port ); + void Close(); + + const char* Read( size_t& len, IpAddress& addr, int timeout ); + + UdpListen( const UdpListen& ) = delete; + UdpListen( UdpListen&& ) = delete; + UdpListen& operator=( const UdpListen& ) = delete; + UdpListen& operator=( UdpListen&& ) = delete; + +private: + int m_sock; +}; + +} + +#endif diff --git a/3rdparty/tracy/tracy/common/TracyStackFrames.cpp b/3rdparty/tracy/tracy/common/TracyStackFrames.cpp new file mode 100644 index 0000000..7b0abac --- /dev/null +++ b/3rdparty/tracy/tracy/common/TracyStackFrames.cpp @@ -0,0 +1,122 @@ +#include "TracyStackFrames.hpp" + +namespace tracy +{ + +const char* s_tracyStackFrames_[] = { + "tracy::Callstack", + "tracy::Callstack(int)", + "tracy::GpuCtxScope::{ctor}", + "tracy::Profiler::SendCallstack", + "tracy::Profiler::SendCallstack(int)", + "tracy::Profiler::SendCallstack(int, unsigned long)", + "tracy::Profiler::MemAllocCallstack", + "tracy::Profiler::MemAllocCallstack(void const*, unsigned long, int)", + "tracy::Profiler::MemFreeCallstack", + "tracy::Profiler::MemFreeCallstack(void const*, int)", + "tracy::ScopedZone::{ctor}", + "tracy::ScopedZone::ScopedZone(tracy::SourceLocationData const*, int, bool)", + "tracy::Profiler::Message", + nullptr +}; + +const char** s_tracyStackFrames = s_tracyStackFrames_; + +const StringMatch s_tracySkipSubframes_[] = { + { "/include/arm_neon.h", 19 }, + { "/include/adxintrin.h", 20 }, + { "/include/ammintrin.h", 20 }, + { "/include/amxbf16intrin.h", 24 }, + { "/include/amxint8intrin.h", 24 }, + { "/include/amxtileintrin.h", 24 }, + { "/include/avx2intrin.h", 21 }, + { "/include/avx5124fmapsintrin.h", 29 }, + { "/include/avx5124vnniwintrin.h", 29 }, + { "/include/avx512bf16intrin.h", 27 }, + { "/include/avx512bf16vlintrin.h", 29 }, + { "/include/avx512bitalgintrin.h", 29 }, + { "/include/avx512bwintrin.h", 25 }, + { "/include/avx512cdintrin.h", 25 }, + { "/include/avx512dqintrin.h", 25 }, + { "/include/avx512erintrin.h", 25 }, + { "/include/avx512fintrin.h", 24 }, + { "/include/avx512ifmaintrin.h", 27 }, + { "/include/avx512ifmavlintrin.h", 29 }, + { "/include/avx512pfintrin.h", 25 }, + { "/include/avx512vbmi2intrin.h", 28 }, + { "/include/avx512vbmi2vlintrin.h", 30 }, + { "/include/avx512vbmiintrin.h", 27 }, + { "/include/avx512vbmivlintrin.h", 29 }, + { "/include/avx512vlbwintrin.h", 27 }, + { "/include/avx512vldqintrin.h", 27 }, + { "/include/avx512vlintrin.h", 25 }, + { "/include/avx512vnniintrin.h", 27 }, + { "/include/avx512vnnivlintrin.h", 29 }, + { "/include/avx512vp2intersectintrin.h", 35 }, + { "/include/avx512vp2intersectvlintrin.h", 37 }, + { "/include/avx512vpopcntdqintrin.h", 32 }, + { "/include/avx512vpopcntdqvlintrin.h", 34 }, + { "/include/avxintrin.h", 20 }, + { "/include/avxvnniintrin.h", 24 }, + { "/include/bmi2intrin.h", 21 }, + { "/include/bmiintrin.h", 20 }, + { "/include/bmmintrin.h", 20 }, + { "/include/cetintrin.h", 20 }, + { "/include/cldemoteintrin.h", 25 }, + { "/include/clflushoptintrin.h", 27 }, + { "/include/clwbintrin.h", 21 }, + { "/include/clzerointrin.h", 23 }, + { "/include/emmintrin.h", 20 }, + { "/include/enqcmdintrin.h", 23 }, + { "/include/f16cintrin.h", 21 }, + { "/include/fma4intrin.h", 21 }, + { "/include/fmaintrin.h", 20 }, + { "/include/fxsrintrin.h", 21 }, + { "/include/gfniintrin.h", 21 }, + { "/include/hresetintrin.h", 23 }, + { "/include/ia32intrin.h", 21 }, + { "/include/immintrin.h", 20 }, + { "/include/keylockerintrin.h", 26 }, + { "/include/lwpintrin.h", 20 }, + { "/include/lzcntintrin.h", 22 }, + { "/include/mmintrin.h", 19 }, + { "/include/movdirintrin.h", 23 }, + { "/include/mwaitxintrin.h", 23 }, + { "/include/nmmintrin.h", 20 }, + { "/include/pconfigintrin.h", 24 }, + { "/include/pkuintrin.h", 20 }, + { "/include/pmmintrin.h", 20 }, + { "/include/popcntintrin.h", 23 }, + { "/include/prfchwintrin.h", 23 }, + { "/include/rdseedintrin.h", 23 }, + { "/include/rtmintrin.h", 20 }, + { "/include/serializeintrin.h", 26 }, + { "/include/sgxintrin.h", 20 }, + { "/include/shaintrin.h", 20 }, + { "/include/smmintrin.h", 20 }, + { "/include/tbmintrin.h", 20 }, + { "/include/tmmintrin.h", 20 }, + { "/include/tsxldtrkintrin.h", 25 }, + { "/include/uintrintrin.h", 22 }, + { "/include/vaesintrin.h", 21 }, + { "/include/vpclmulqdqintrin.h", 27 }, + { "/include/waitpkgintrin.h", 24 }, + { "/include/wbnoinvdintrin.h", 25 }, + { "/include/wmmintrin.h", 20 }, + { "/include/x86gprintrin.h", 23 }, + { "/include/x86intrin.h", 20 }, + { "/include/xmmintrin.h", 20 }, + { "/include/xopintrin.h", 20 }, + { "/include/xsavecintrin.h", 23 }, + { "/include/xsaveintrin.h", 22 }, + { "/include/xsaveoptintrin.h", 25 }, + { "/include/xsavesintrin.h", 23 }, + { "/include/xtestintrin.h", 22 }, + { "/bits/atomic_base.h", 19 }, + { "/atomic", 7 }, + {} +}; + +const StringMatch* s_tracySkipSubframes = s_tracySkipSubframes_; + +} diff --git a/3rdparty/tracy/tracy/common/TracyStackFrames.hpp b/3rdparty/tracy/tracy/common/TracyStackFrames.hpp new file mode 100644 index 0000000..9d4262c --- /dev/null +++ b/3rdparty/tracy/tracy/common/TracyStackFrames.hpp @@ -0,0 +1,22 @@ +#ifndef __TRACYSTACKFRAMES_HPP__ +#define __TRACYSTACKFRAMES_HPP__ + +#include <stddef.h> + +namespace tracy +{ + +struct StringMatch +{ + const char* str; + size_t len; +}; + +extern const char** s_tracyStackFrames; +extern const StringMatch* s_tracySkipSubframes; + +static constexpr int s_tracySkipSubframesMinLen = 7; + +} + +#endif diff --git a/3rdparty/tracy/tracy/common/TracySystem.cpp b/3rdparty/tracy/tracy/common/TracySystem.cpp new file mode 100644 index 0000000..1248fde --- /dev/null +++ b/3rdparty/tracy/tracy/common/TracySystem.cpp @@ -0,0 +1,304 @@ +#ifdef _MSC_VER +# pragma warning(disable:4996) +#endif +#if defined _WIN32 +# ifndef WIN32_LEAN_AND_MEAN +# define WIN32_LEAN_AND_MEAN +# endif +# ifndef NOMINMAX +# define NOMINMAX +# endif +# include <windows.h> +# include <malloc.h> +# include "TracyUwp.hpp" +#else +# include <pthread.h> +# include <string.h> +# include <unistd.h> +#endif + +#ifdef __linux__ +# ifdef __ANDROID__ +# include <sys/types.h> +# else +# include <sys/syscall.h> +# endif +# include <fcntl.h> +#elif defined __FreeBSD__ +# include <sys/thr.h> +#elif defined __NetBSD__ || defined __DragonFly__ +# include <sys/lwp.h> +#endif + +#ifdef __MINGW32__ +# define __STDC_FORMAT_MACROS +#endif +#include <inttypes.h> +#include <stdio.h> +#include <stdlib.h> + +#include "TracySystem.hpp" + +#if defined _WIN32 +extern "C" typedef HRESULT (WINAPI *t_SetThreadDescription)( HANDLE, PCWSTR ); +extern "C" typedef HRESULT (WINAPI *t_GetThreadDescription)( HANDLE, PWSTR* ); +#endif + +#ifdef TRACY_ENABLE +# include <atomic> +# include "TracyAlloc.hpp" +#endif + +namespace tracy +{ + +namespace detail +{ + +TRACY_API uint32_t GetThreadHandleImpl() +{ +#if defined _WIN32 + static_assert( sizeof( decltype( GetCurrentThreadId() ) ) <= sizeof( uint32_t ), "Thread handle too big to fit in protocol" ); + return uint32_t( GetCurrentThreadId() ); +#elif defined __APPLE__ + uint64_t id; + pthread_threadid_np( pthread_self(), &id ); + return uint32_t( id ); +#elif defined __ANDROID__ + return (uint32_t)gettid(); +#elif defined __linux__ + return (uint32_t)syscall( SYS_gettid ); +#elif defined __FreeBSD__ + long id; + thr_self( &id ); + return id; +#elif defined __NetBSD__ + return _lwp_self(); +#elif defined __DragonFly__ + return lwp_gettid(); +#elif defined __OpenBSD__ + return getthrid(); +#else + // To add support for a platform, retrieve and return the kernel thread identifier here. + // + // Note that pthread_t (as for example returned by pthread_self()) is *not* a kernel + // thread identifier. It is a pointer to a library-allocated data structure instead. + // Such pointers will be reused heavily, making the pthread_t non-unique. Additionally + // a 64-bit pointer cannot be reliably truncated to 32 bits. + #error "Unsupported platform!" +#endif + +} + +} + +#ifdef TRACY_ENABLE +struct ThreadNameData +{ + uint32_t id; + const char* name; + ThreadNameData* next; +}; +std::atomic<ThreadNameData*>& GetThreadNameData(); +#endif + +#ifdef _MSC_VER +# pragma pack( push, 8 ) +struct THREADNAME_INFO +{ + DWORD dwType; + LPCSTR szName; + DWORD dwThreadID; + DWORD dwFlags; +}; +# pragma pack(pop) + +void ThreadNameMsvcMagic( const THREADNAME_INFO& info ) +{ + __try + { + RaiseException( 0x406D1388, 0, sizeof(info)/sizeof(ULONG_PTR), (ULONG_PTR*)&info ); + } + __except(EXCEPTION_EXECUTE_HANDLER) + { + } +} +#endif + +TRACY_API void SetThreadName( const char* name ) +{ +#if defined _WIN32 +# ifdef TRACY_UWP + static auto _SetThreadDescription = &::SetThreadDescription; +# else + static auto _SetThreadDescription = (t_SetThreadDescription)GetProcAddress( GetModuleHandleA( "kernel32.dll" ), "SetThreadDescription" ); +# endif + if( _SetThreadDescription ) + { + wchar_t buf[256]; + mbstowcs( buf, name, 256 ); + _SetThreadDescription( GetCurrentThread(), buf ); + } + else + { +# if defined _MSC_VER + THREADNAME_INFO info; + info.dwType = 0x1000; + info.szName = name; + info.dwThreadID = GetCurrentThreadId(); + info.dwFlags = 0; + ThreadNameMsvcMagic( info ); +# endif + } +#elif defined _GNU_SOURCE && !defined __EMSCRIPTEN__ + { + const auto sz = strlen( name ); + if( sz <= 15 ) + { +#if defined __APPLE__ + pthread_setname_np( name ); +#else + pthread_setname_np( pthread_self(), name ); +#endif + } + else + { + char buf[16]; + memcpy( buf, name, 15 ); + buf[15] = '\0'; +#if defined __APPLE__ + pthread_setname_np( buf ); +#else + pthread_setname_np( pthread_self(), buf ); +#endif + } + } +#endif +#ifdef TRACY_ENABLE + { + const auto sz = strlen( name ); + char* buf = (char*)tracy_malloc( sz+1 ); + memcpy( buf, name, sz ); + buf[sz] = '\0'; + auto data = (ThreadNameData*)tracy_malloc_fast( sizeof( ThreadNameData ) ); + data->id = detail::GetThreadHandleImpl(); + data->name = buf; + data->next = GetThreadNameData().load( std::memory_order_relaxed ); + while( !GetThreadNameData().compare_exchange_weak( data->next, data, std::memory_order_release, std::memory_order_relaxed ) ) {} + } +#endif +} + +TRACY_API const char* GetThreadName( uint32_t id ) +{ + static char buf[256]; +#ifdef TRACY_ENABLE + auto ptr = GetThreadNameData().load( std::memory_order_relaxed ); + while( ptr ) + { + if( ptr->id == id ) + { + return ptr->name; + } + ptr = ptr->next; + } +#else +# if defined _WIN32 +# ifdef TRACY_UWP + static auto _GetThreadDescription = &::GetThreadDescription; +# else + static auto _GetThreadDescription = (t_GetThreadDescription)GetProcAddress( GetModuleHandleA( "kernel32.dll" ), "GetThreadDescription" ); +# endif + if( _GetThreadDescription ) + { + auto hnd = OpenThread( THREAD_QUERY_LIMITED_INFORMATION, FALSE, (DWORD)id ); + if( hnd != 0 ) + { + PWSTR tmp; + _GetThreadDescription( hnd, &tmp ); + auto ret = wcstombs( buf, tmp, 256 ); + CloseHandle( hnd ); + if( ret != 0 ) + { + return buf; + } + } + } +# elif defined __linux__ + int cs, fd; + char path[32]; +# ifdef __ANDROID__ + int tid = gettid(); +# else + int tid = (int) syscall( SYS_gettid ); +# endif + snprintf( path, sizeof( path ), "/proc/self/task/%d/comm", tid ); + sprintf( buf, "%" PRIu32, id ); +# ifndef __ANDROID__ + pthread_setcancelstate( PTHREAD_CANCEL_DISABLE, &cs ); +# endif + if ( ( fd = open( path, O_RDONLY ) ) > 0) { + int len = read( fd, buf, 255 ); + if( len > 0 ) + { + buf[len] = 0; + if( len > 1 && buf[len-1] == '\n' ) + { + buf[len-1] = 0; + } + } + close( fd ); + } +# ifndef __ANDROID__ + pthread_setcancelstate( cs, 0 ); +# endif + return buf; +# endif +#endif + sprintf( buf, "%" PRIu32, id ); + return buf; +} + +TRACY_API const char* GetEnvVar( const char* name ) +{ +#if defined _WIN32 + // unfortunately getenv() on Windows is just fundamentally broken. It caches the entire + // environment block once on startup, then never refreshes it again. If any environment + // strings are added or modified after startup of the CRT, those changes will not be + // seen by getenv(). This removes the possibility of an app using this SDK from + // programmatically setting any of the behaviour controlling envvars here. + // + // To work around this, we'll instead go directly to the Win32 environment strings APIs + // to get the current value. + static char buffer[1024]; + DWORD const kBufferSize = DWORD(sizeof(buffer) / sizeof(buffer[0])); + DWORD count = GetEnvironmentVariableA(name, buffer, kBufferSize); + + if( count == 0 ) + return nullptr; + + if( count >= kBufferSize ) + { + char* buf = reinterpret_cast<char*>(_alloca(count + 1)); + count = GetEnvironmentVariableA(name, buf, count + 1); + memcpy(buffer, buf, kBufferSize); + buffer[kBufferSize - 1] = 0; + } + + return buffer; +#else + return getenv(name); +#endif +} + +} + +#ifdef __cplusplus +extern "C" { +#endif + +TRACY_API void ___tracy_set_thread_name( const char* name ) { tracy::SetThreadName( name ); } + +#ifdef __cplusplus +} +#endif diff --git a/3rdparty/tracy/tracy/common/TracySystem.hpp b/3rdparty/tracy/tracy/common/TracySystem.hpp new file mode 100644 index 0000000..e0040e9 --- /dev/null +++ b/3rdparty/tracy/tracy/common/TracySystem.hpp @@ -0,0 +1,32 @@ +#ifndef __TRACYSYSTEM_HPP__ +#define __TRACYSYSTEM_HPP__ + +#include <stdint.h> + +#include "TracyApi.h" + +namespace tracy +{ + +namespace detail +{ +TRACY_API uint32_t GetThreadHandleImpl(); +} + +#ifdef TRACY_ENABLE +TRACY_API uint32_t GetThreadHandle(); +#else +static inline uint32_t GetThreadHandle() +{ + return detail::GetThreadHandleImpl(); +} +#endif + +TRACY_API void SetThreadName( const char* name ); +TRACY_API const char* GetThreadName( uint32_t id ); + +TRACY_API const char* GetEnvVar(const char* name); + +} + +#endif diff --git a/3rdparty/tracy/tracy/common/TracyUwp.hpp b/3rdparty/tracy/tracy/common/TracyUwp.hpp new file mode 100644 index 0000000..7dce96b --- /dev/null +++ b/3rdparty/tracy/tracy/common/TracyUwp.hpp @@ -0,0 +1,11 @@ +#ifndef __TRACYUWP_HPP__ +#define __TRACYUWP_HPP__ + +#ifdef _WIN32 +# include <winapifamily.h> +# if WINAPI_FAMILY_PARTITION(WINAPI_PARTITION_APP) && !WINAPI_FAMILY_PARTITION(WINAPI_PARTITION_DESKTOP) +# define TRACY_UWP +# endif +#endif + +#endif diff --git a/3rdparty/tracy/tracy/common/TracyYield.hpp b/3rdparty/tracy/tracy/common/TracyYield.hpp new file mode 100644 index 0000000..403ca29 --- /dev/null +++ b/3rdparty/tracy/tracy/common/TracyYield.hpp @@ -0,0 +1,26 @@ +#ifndef __TRACYYIELD_HPP__ +#define __TRACYYIELD_HPP__ + +#if defined __SSE2__ || defined _M_AMD64 || _M_IX86_FP == 2 +# include <emmintrin.h> +#else +# include <thread> +#endif + +#include "TracyForceInline.hpp" + +namespace tracy +{ + +static tracy_force_inline void YieldThread() +{ +#if defined __SSE2__ || defined _M_AMD64 || _M_IX86_FP == 2 + _mm_pause(); +#else + std::this_thread::yield(); +#endif +} + +} + +#endif diff --git a/3rdparty/tracy/tracy/common/src-from-vcxproj.mk b/3rdparty/tracy/tracy/common/src-from-vcxproj.mk new file mode 100644 index 0000000..3a16b19 --- /dev/null +++ b/3rdparty/tracy/tracy/common/src-from-vcxproj.mk @@ -0,0 +1,21 @@ +# Extract the actual list of source files from a sibling Visual Studio project. + +# Ensure these are simply-substituted variables, without changing their values. +SRC := $(SRC) +SRC2 := $(SRC2) +SRC3 := $(SRC3) +SRC4 := $(SRC4) + +# Paths here are relative to the directory in which make was invoked, not to +# this file, so ../win32/$(PROJECT).vcxproj refers to the Visual Studio project +# of whichever tool is including this makefile fragment. + +BASE := $(shell egrep 'ClCompile.*cpp"' ../win32/$(PROJECT).vcxproj | sed -e 's/.*\"\(.*\)\".*/\1/' | sed -e 's@\\@/@g') +BASE2 := $(shell egrep 'ClCompile.*c"' ../win32/$(PROJECT).vcxproj | sed -e 's/.*\"\(.*\)\".*/\1/' | sed -e 's@\\@/@g') +BASE4 := $(shell egrep 'None.*S"' ../win32/$(PROJECT).vcxproj | sed -e 's/.*\"\(.*\)\".*/\1/' | sed -e 's@\\@/@g') + +# The tool-specific makefile may request that certain files be omitted. +SRC += $(filter-out $(FILTER),$(BASE)) +SRC2 += $(filter-out $(FILTER),$(BASE2)) +SRC3 += $(filter-out $(FILTER),$(BASE3)) +SRC4 += $(filter-out $(FILTER),$(BASE4)) diff --git a/3rdparty/tracy/tracy/common/tracy_lz4.cpp b/3rdparty/tracy/tracy/common/tracy_lz4.cpp new file mode 100644 index 0000000..5a31aa7 --- /dev/null +++ b/3rdparty/tracy/tracy/common/tracy_lz4.cpp @@ -0,0 +1,2492 @@ +/* + LZ4 - Fast LZ compression algorithm + Copyright (C) 2011-present, Yann Collet. + + BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php) + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are + met: + + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above + copyright notice, this list of conditions and the following disclaimer + in the documentation and/or other materials provided with the + distribution. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + + You can contact the author at : + - LZ4 homepage : http://www.lz4.org + - LZ4 source repository : https://github.com/lz4/lz4 +*/ + +/*-************************************ +* Tuning parameters +**************************************/ +/* + * LZ4_HEAPMODE : + * Select how default compression functions will allocate memory for their hash table, + * in memory stack (0:default, fastest), or in memory heap (1:requires malloc()). + */ +#ifndef LZ4_HEAPMODE +# define LZ4_HEAPMODE 0 +#endif + +/* + * LZ4_ACCELERATION_DEFAULT : + * Select "acceleration" for LZ4_compress_fast() when parameter value <= 0 + */ +#define LZ4_ACCELERATION_DEFAULT 1 +/* + * LZ4_ACCELERATION_MAX : + * Any "acceleration" value higher than this threshold + * get treated as LZ4_ACCELERATION_MAX instead (fix #876) + */ +#define LZ4_ACCELERATION_MAX 65537 + + +/*-************************************ +* CPU Feature Detection +**************************************/ +/* LZ4_FORCE_MEMORY_ACCESS + * By default, access to unaligned memory is controlled by `memcpy()`, which is safe and portable. + * Unfortunately, on some target/compiler combinations, the generated assembly is sub-optimal. + * The below switch allow to select different access method for improved performance. + * Method 0 (default) : use `memcpy()`. Safe and portable. + * Method 1 : `__packed` statement. It depends on compiler extension (ie, not portable). + * This method is safe if your compiler supports it, and *generally* as fast or faster than `memcpy`. + * Method 2 : direct access. This method is portable but violate C standard. + * It can generate buggy code on targets which assembly generation depends on alignment. + * But in some circumstances, it's the only known way to get the most performance (ie GCC + ARMv6) + * See https://fastcompression.blogspot.fr/2015/08/accessing-unaligned-memory.html for details. + * Prefer these methods in priority order (0 > 1 > 2) + */ +#ifndef LZ4_FORCE_MEMORY_ACCESS /* can be defined externally */ +# if defined(__GNUC__) && \ + ( defined(__ARM_ARCH_6__) || defined(__ARM_ARCH_6J__) || defined(__ARM_ARCH_6K__) \ + || defined(__ARM_ARCH_6Z__) || defined(__ARM_ARCH_6ZK__) || defined(__ARM_ARCH_6T2__) ) +# define LZ4_FORCE_MEMORY_ACCESS 2 +# elif (defined(__INTEL_COMPILER) && !defined(_WIN32)) || defined(__GNUC__) +# define LZ4_FORCE_MEMORY_ACCESS 1 +# endif +#endif + +/* + * LZ4_FORCE_SW_BITCOUNT + * Define this parameter if your target system or compiler does not support hardware bit count + */ +#if defined(_MSC_VER) && defined(_WIN32_WCE) /* Visual Studio for WinCE doesn't support Hardware bit count */ +# undef LZ4_FORCE_SW_BITCOUNT /* avoid double def */ +# define LZ4_FORCE_SW_BITCOUNT +#endif + + + +/*-************************************ +* Dependency +**************************************/ +/* + * LZ4_SRC_INCLUDED: + * Amalgamation flag, whether lz4.c is included + */ +#ifndef LZ4_SRC_INCLUDED +# define LZ4_SRC_INCLUDED 1 +#endif + +#ifndef LZ4_STATIC_LINKING_ONLY +#define LZ4_STATIC_LINKING_ONLY +#endif + +#ifndef LZ4_DISABLE_DEPRECATE_WARNINGS +#define LZ4_DISABLE_DEPRECATE_WARNINGS /* due to LZ4_decompress_safe_withPrefix64k */ +#endif + +#define LZ4_STATIC_LINKING_ONLY /* LZ4_DISTANCE_MAX */ +#include "tracy_lz4.hpp" +/* see also "memory routines" below */ + + +/*-************************************ +* Compiler Options +**************************************/ +#if defined(_MSC_VER) && (_MSC_VER >= 1400) /* Visual Studio 2005+ */ +# include <intrin.h> /* only present in VS2005+ */ +# pragma warning(disable : 4127) /* disable: C4127: conditional expression is constant */ +#endif /* _MSC_VER */ + +#ifndef LZ4_FORCE_INLINE +# ifdef _MSC_VER /* Visual Studio */ +# define LZ4_FORCE_INLINE static __forceinline +# else +# if defined (__cplusplus) || defined (__STDC_VERSION__) && __STDC_VERSION__ >= 199901L /* C99 */ +# ifdef __GNUC__ +# define LZ4_FORCE_INLINE static inline __attribute__((always_inline)) +# else +# define LZ4_FORCE_INLINE static inline +# endif +# else +# define LZ4_FORCE_INLINE static +# endif /* __STDC_VERSION__ */ +# endif /* _MSC_VER */ +#endif /* LZ4_FORCE_INLINE */ + +/* LZ4_FORCE_O2 and LZ4_FORCE_INLINE + * gcc on ppc64le generates an unrolled SIMDized loop for LZ4_wildCopy8, + * together with a simple 8-byte copy loop as a fall-back path. + * However, this optimization hurts the decompression speed by >30%, + * because the execution does not go to the optimized loop + * for typical compressible data, and all of the preamble checks + * before going to the fall-back path become useless overhead. + * This optimization happens only with the -O3 flag, and -O2 generates + * a simple 8-byte copy loop. + * With gcc on ppc64le, all of the LZ4_decompress_* and LZ4_wildCopy8 + * functions are annotated with __attribute__((optimize("O2"))), + * and also LZ4_wildCopy8 is forcibly inlined, so that the O2 attribute + * of LZ4_wildCopy8 does not affect the compression speed. + */ +#if defined(__PPC64__) && defined(__LITTLE_ENDIAN__) && defined(__GNUC__) && !defined(__clang__) +# define LZ4_FORCE_O2 __attribute__((optimize("O2"))) +# undef LZ4_FORCE_INLINE +# define LZ4_FORCE_INLINE static __inline __attribute__((optimize("O2"),always_inline)) +#else +# define LZ4_FORCE_O2 +#endif + +#if (defined(__GNUC__) && (__GNUC__ >= 3)) || (defined(__INTEL_COMPILER) && (__INTEL_COMPILER >= 800)) || defined(__clang__) +# define expect(expr,value) (__builtin_expect ((expr),(value)) ) +#else +# define expect(expr,value) (expr) +#endif + +#ifndef likely +#define likely(expr) expect((expr) != 0, 1) +#endif +#ifndef unlikely +#define unlikely(expr) expect((expr) != 0, 0) +#endif + +/* Should the alignment test prove unreliable, for some reason, + * it can be disabled by setting LZ4_ALIGN_TEST to 0 */ +#ifndef LZ4_ALIGN_TEST /* can be externally provided */ +# define LZ4_ALIGN_TEST 1 +#endif + + +/*-************************************ +* Memory routines +**************************************/ +#ifdef LZ4_USER_MEMORY_FUNCTIONS +/* memory management functions can be customized by user project. + * Below functions must exist somewhere in the Project + * and be available at link time */ +void* LZ4_malloc(size_t s); +void* LZ4_calloc(size_t n, size_t s); +void LZ4_free(void* p); +# define ALLOC(s) LZ4_malloc(s) +# define ALLOC_AND_ZERO(s) LZ4_calloc(1,s) +# define FREEMEM(p) LZ4_free(p) +#else +# include <stdlib.h> /* malloc, calloc, free */ +# define ALLOC(s) malloc(s) +# define ALLOC_AND_ZERO(s) calloc(1,s) +# define FREEMEM(p) free(p) +#endif + +#include <string.h> /* memset, memcpy */ +#define MEM_INIT(p,v,s) memset((p),(v),(s)) + + +/*-************************************ +* Common Constants +**************************************/ +#define MINMATCH 4 + +#define WILDCOPYLENGTH 8 +#define LASTLITERALS 5 /* see ../doc/lz4_Block_format.md#parsing-restrictions */ +#define MFLIMIT 12 /* see ../doc/lz4_Block_format.md#parsing-restrictions */ +#define MATCH_SAFEGUARD_DISTANCE ((2*WILDCOPYLENGTH) - MINMATCH) /* ensure it's possible to write 2 x wildcopyLength without overflowing output buffer */ +#define FASTLOOP_SAFE_DISTANCE 64 +static const int LZ4_minLength = (MFLIMIT+1); + +#define KB *(1 <<10) +#define MB *(1 <<20) +#define GB *(1U<<30) + +#define LZ4_DISTANCE_ABSOLUTE_MAX 65535 +#if (LZ4_DISTANCE_MAX > LZ4_DISTANCE_ABSOLUTE_MAX) /* max supported by LZ4 format */ +# error "LZ4_DISTANCE_MAX is too big : must be <= 65535" +#endif + +#define ML_BITS 4 +#define ML_MASK ((1U<<ML_BITS)-1) +#define RUN_BITS (8-ML_BITS) +#define RUN_MASK ((1U<<RUN_BITS)-1) + + +/*-************************************ +* Error detection +**************************************/ +#if defined(LZ4_DEBUG) && (LZ4_DEBUG>=1) +# include <assert.h> +#else +# ifndef assert +# define assert(condition) ((void)0) +# endif +#endif + +#define LZ4_STATIC_ASSERT(c) { enum { LZ4_static_assert = 1/(int)(!!(c)) }; } /* use after variable declarations */ + +#if defined(LZ4_DEBUG) && (LZ4_DEBUG>=2) +# include <stdio.h> + static int g_debuglog_enable = 1; +# define DEBUGLOG(l, ...) { \ + if ((g_debuglog_enable) && (l<=LZ4_DEBUG)) { \ + fprintf(stderr, __FILE__ ": "); \ + fprintf(stderr, __VA_ARGS__); \ + fprintf(stderr, " \n"); \ + } } +#else +# define DEBUGLOG(l, ...) {} /* disabled */ +#endif + +static int LZ4_isAligned(const void* ptr, size_t alignment) +{ + return ((size_t)ptr & (alignment -1)) == 0; +} + + +/*-************************************ +* Types +**************************************/ +#include <limits.h> +#if defined(__cplusplus) || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */) +# include <stdint.h> + typedef uint8_t BYTE; + typedef uint16_t U16; + typedef uint32_t U32; + typedef int32_t S32; + typedef uint64_t U64; + typedef uintptr_t uptrval; +#else +# if UINT_MAX != 4294967295UL +# error "LZ4 code (when not C++ or C99) assumes that sizeof(int) == 4" +# endif + typedef unsigned char BYTE; + typedef unsigned short U16; + typedef unsigned int U32; + typedef signed int S32; + typedef unsigned long long U64; + typedef size_t uptrval; /* generally true, except OpenVMS-64 */ +#endif + +#if defined(__x86_64__) + typedef U64 reg_t; /* 64-bits in x32 mode */ +#else + typedef size_t reg_t; /* 32-bits in x32 mode */ +#endif + +typedef enum { + notLimited = 0, + limitedOutput = 1, + fillOutput = 2 +} limitedOutput_directive; + +namespace tracy +{ + +/*-************************************ +* Reading and writing into memory +**************************************/ + +/** + * LZ4 relies on memcpy with a constant size being inlined. In freestanding + * environments, the compiler can't assume the implementation of memcpy() is + * standard compliant, so it can't apply its specialized memcpy() inlining + * logic. When possible, use __builtin_memcpy() to tell the compiler to analyze + * memcpy() as if it were standard compliant, so it can inline it in freestanding + * environments. This is needed when decompressing the Linux Kernel, for example. + */ +#if defined(__GNUC__) && (__GNUC__ >= 4) +#define LZ4_memcpy(dst, src, size) __builtin_memcpy(dst, src, size) +#else +#define LZ4_memcpy(dst, src, size) memcpy(dst, src, size) +#endif + +static unsigned LZ4_isLittleEndian(void) +{ + const union { U32 u; BYTE c[4]; } one = { 1 }; /* don't use static : performance detrimental */ + return one.c[0]; +} + + +#if defined(LZ4_FORCE_MEMORY_ACCESS) && (LZ4_FORCE_MEMORY_ACCESS==2) +/* lie to the compiler about data alignment; use with caution */ + +static U16 LZ4_read16(const void* memPtr) { return *(const U16*) memPtr; } +static U32 LZ4_read32(const void* memPtr) { return *(const U32*) memPtr; } +static reg_t LZ4_read_ARCH(const void* memPtr) { return *(const reg_t*) memPtr; } + +static void LZ4_write16(void* memPtr, U16 value) { *(U16*)memPtr = value; } +static void LZ4_write32(void* memPtr, U32 value) { *(U32*)memPtr = value; } + +#elif defined(LZ4_FORCE_MEMORY_ACCESS) && (LZ4_FORCE_MEMORY_ACCESS==1) + +/* __pack instructions are safer, but compiler specific, hence potentially problematic for some compilers */ +/* currently only defined for gcc and icc */ +typedef union { U16 u16; U32 u32; reg_t uArch; } __attribute__((packed)) unalign; + +static U16 LZ4_read16(const void* ptr) { return ((const unalign*)ptr)->u16; } +static U32 LZ4_read32(const void* ptr) { return ((const unalign*)ptr)->u32; } +static reg_t LZ4_read_ARCH(const void* ptr) { return ((const unalign*)ptr)->uArch; } + +static void LZ4_write16(void* memPtr, U16 value) { ((unalign*)memPtr)->u16 = value; } +static void LZ4_write32(void* memPtr, U32 value) { ((unalign*)memPtr)->u32 = value; } + +#else /* safe and portable access using memcpy() */ + +static U16 LZ4_read16(const void* memPtr) +{ + U16 val; LZ4_memcpy(&val, memPtr, sizeof(val)); return val; +} + +static U32 LZ4_read32(const void* memPtr) +{ + U32 val; LZ4_memcpy(&val, memPtr, sizeof(val)); return val; +} + +static reg_t LZ4_read_ARCH(const void* memPtr) +{ + reg_t val; LZ4_memcpy(&val, memPtr, sizeof(val)); return val; +} + +static void LZ4_write16(void* memPtr, U16 value) +{ + LZ4_memcpy(memPtr, &value, sizeof(value)); +} + +static void LZ4_write32(void* memPtr, U32 value) +{ + LZ4_memcpy(memPtr, &value, sizeof(value)); +} + +#endif /* LZ4_FORCE_MEMORY_ACCESS */ + + +static U16 LZ4_readLE16(const void* memPtr) +{ + if (LZ4_isLittleEndian()) { + return LZ4_read16(memPtr); + } else { + const BYTE* p = (const BYTE*)memPtr; + return (U16)((U16)p[0] + (p[1]<<8)); + } +} + +static void LZ4_writeLE16(void* memPtr, U16 value) +{ + if (LZ4_isLittleEndian()) { + LZ4_write16(memPtr, value); + } else { + BYTE* p = (BYTE*)memPtr; + p[0] = (BYTE) value; + p[1] = (BYTE)(value>>8); + } +} + +/* customized variant of memcpy, which can overwrite up to 8 bytes beyond dstEnd */ +LZ4_FORCE_INLINE +void LZ4_wildCopy8(void* dstPtr, const void* srcPtr, void* dstEnd) +{ + BYTE* d = (BYTE*)dstPtr; + const BYTE* s = (const BYTE*)srcPtr; + BYTE* const e = (BYTE*)dstEnd; + + do { LZ4_memcpy(d,s,8); d+=8; s+=8; } while (d<e); +} + +static const unsigned inc32table[8] = {0, 1, 2, 1, 0, 4, 4, 4}; +static const int dec64table[8] = {0, 0, 0, -1, -4, 1, 2, 3}; + + +#ifndef LZ4_FAST_DEC_LOOP +# if defined __i386__ || defined _M_IX86 || defined __x86_64__ || defined _M_X64 +# define LZ4_FAST_DEC_LOOP 1 +# elif defined(__aarch64__) && !defined(__clang__) + /* On aarch64, we disable this optimization for clang because on certain + * mobile chipsets, performance is reduced with clang. For information + * refer to https://github.com/lz4/lz4/pull/707 */ +# define LZ4_FAST_DEC_LOOP 1 +# else +# define LZ4_FAST_DEC_LOOP 0 +# endif +#endif + +#if LZ4_FAST_DEC_LOOP + +LZ4_FORCE_INLINE void +LZ4_memcpy_using_offset_base(BYTE* dstPtr, const BYTE* srcPtr, BYTE* dstEnd, const size_t offset) +{ + assert(srcPtr + offset == dstPtr); + if (offset < 8) { + LZ4_write32(dstPtr, 0); /* silence an msan warning when offset==0 */ + dstPtr[0] = srcPtr[0]; + dstPtr[1] = srcPtr[1]; + dstPtr[2] = srcPtr[2]; + dstPtr[3] = srcPtr[3]; + srcPtr += inc32table[offset]; + LZ4_memcpy(dstPtr+4, srcPtr, 4); + srcPtr -= dec64table[offset]; + dstPtr += 8; + } else { + LZ4_memcpy(dstPtr, srcPtr, 8); + dstPtr += 8; + srcPtr += 8; + } + + LZ4_wildCopy8(dstPtr, srcPtr, dstEnd); +} + +/* customized variant of memcpy, which can overwrite up to 32 bytes beyond dstEnd + * this version copies two times 16 bytes (instead of one time 32 bytes) + * because it must be compatible with offsets >= 16. */ +LZ4_FORCE_INLINE void +LZ4_wildCopy32(void* dstPtr, const void* srcPtr, void* dstEnd) +{ + BYTE* d = (BYTE*)dstPtr; + const BYTE* s = (const BYTE*)srcPtr; + BYTE* const e = (BYTE*)dstEnd; + + do { LZ4_memcpy(d,s,16); LZ4_memcpy(d+16,s+16,16); d+=32; s+=32; } while (d<e); +} + +/* LZ4_memcpy_using_offset() presumes : + * - dstEnd >= dstPtr + MINMATCH + * - there is at least 8 bytes available to write after dstEnd */ +LZ4_FORCE_INLINE void +LZ4_memcpy_using_offset(BYTE* dstPtr, const BYTE* srcPtr, BYTE* dstEnd, const size_t offset) +{ + BYTE v[8]; + + assert(dstEnd >= dstPtr + MINMATCH); + + switch(offset) { + case 1: + MEM_INIT(v, *srcPtr, 8); + break; + case 2: + LZ4_memcpy(v, srcPtr, 2); + LZ4_memcpy(&v[2], srcPtr, 2); + LZ4_memcpy(&v[4], v, 4); + break; + case 4: + LZ4_memcpy(v, srcPtr, 4); + LZ4_memcpy(&v[4], srcPtr, 4); + break; + default: + LZ4_memcpy_using_offset_base(dstPtr, srcPtr, dstEnd, offset); + return; + } + + LZ4_memcpy(dstPtr, v, 8); + dstPtr += 8; + while (dstPtr < dstEnd) { + LZ4_memcpy(dstPtr, v, 8); + dstPtr += 8; + } +} +#endif + + +/*-************************************ +* Common functions +**************************************/ +LZ4_FORCE_INLINE unsigned LZ4_NbCommonBytes (reg_t val) +{ + assert(val != 0); + if (LZ4_isLittleEndian()) { + if (sizeof(val) == 8) { +# if defined(_MSC_VER) && (_MSC_VER >= 1800) && defined(_M_AMD64) && !defined(LZ4_FORCE_SW_BITCOUNT) + /* x64 CPUS without BMI support interpret `TZCNT` as `REP BSF` */ + return (unsigned)_tzcnt_u64(val) >> 3; +# elif defined(_MSC_VER) && defined(_WIN64) && !defined(LZ4_FORCE_SW_BITCOUNT) + unsigned long r = 0; + _BitScanForward64(&r, (U64)val); + return (unsigned)r >> 3; +# elif (defined(__clang__) || (defined(__GNUC__) && ((__GNUC__ > 3) || \ + ((__GNUC__ == 3) && (__GNUC_MINOR__ >= 4))))) && \ + !defined(LZ4_FORCE_SW_BITCOUNT) + return (unsigned)__builtin_ctzll((U64)val) >> 3; +# else + const U64 m = 0x0101010101010101ULL; + val ^= val - 1; + return (unsigned)(((U64)((val & (m - 1)) * m)) >> 56); +# endif + } else /* 32 bits */ { +# if defined(_MSC_VER) && (_MSC_VER >= 1400) && !defined(LZ4_FORCE_SW_BITCOUNT) + unsigned long r; + _BitScanForward(&r, (U32)val); + return (unsigned)r >> 3; +# elif (defined(__clang__) || (defined(__GNUC__) && ((__GNUC__ > 3) || \ + ((__GNUC__ == 3) && (__GNUC_MINOR__ >= 4))))) && \ + !defined(__TINYC__) && !defined(LZ4_FORCE_SW_BITCOUNT) + return (unsigned)__builtin_ctz((U32)val) >> 3; +# else + const U32 m = 0x01010101; + return (unsigned)((((val - 1) ^ val) & (m - 1)) * m) >> 24; +# endif + } + } else /* Big Endian CPU */ { + if (sizeof(val)==8) { +# if (defined(__clang__) || (defined(__GNUC__) && ((__GNUC__ > 3) || \ + ((__GNUC__ == 3) && (__GNUC_MINOR__ >= 4))))) && \ + !defined(__TINYC__) && !defined(LZ4_FORCE_SW_BITCOUNT) + return (unsigned)__builtin_clzll((U64)val) >> 3; +# else +#if 1 + /* this method is probably faster, + * but adds a 128 bytes lookup table */ + static const unsigned char ctz7_tab[128] = { + 7, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, + 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, + 5, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, + 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, + 6, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, + 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, + 5, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, + 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, + }; + U64 const mask = 0x0101010101010101ULL; + U64 const t = (((val >> 8) - mask) | val) & mask; + return ctz7_tab[(t * 0x0080402010080402ULL) >> 57]; +#else + /* this method doesn't consume memory space like the previous one, + * but it contains several branches, + * that may end up slowing execution */ + static const U32 by32 = sizeof(val)*4; /* 32 on 64 bits (goal), 16 on 32 bits. + Just to avoid some static analyzer complaining about shift by 32 on 32-bits target. + Note that this code path is never triggered in 32-bits mode. */ + unsigned r; + if (!(val>>by32)) { r=4; } else { r=0; val>>=by32; } + if (!(val>>16)) { r+=2; val>>=8; } else { val>>=24; } + r += (!val); + return r; +#endif +# endif + } else /* 32 bits */ { +# if (defined(__clang__) || (defined(__GNUC__) && ((__GNUC__ > 3) || \ + ((__GNUC__ == 3) && (__GNUC_MINOR__ >= 4))))) && \ + !defined(LZ4_FORCE_SW_BITCOUNT) + return (unsigned)__builtin_clz((U32)val) >> 3; +# else + val >>= 8; + val = ((((val + 0x00FFFF00) | 0x00FFFFFF) + val) | + (val + 0x00FF0000)) >> 24; + return (unsigned)val ^ 3; +# endif + } + } +} + + +#define STEPSIZE sizeof(reg_t) +LZ4_FORCE_INLINE +unsigned LZ4_count(const BYTE* pIn, const BYTE* pMatch, const BYTE* pInLimit) +{ + const BYTE* const pStart = pIn; + + if (likely(pIn < pInLimit-(STEPSIZE-1))) { + reg_t const diff = LZ4_read_ARCH(pMatch) ^ LZ4_read_ARCH(pIn); + if (!diff) { + pIn+=STEPSIZE; pMatch+=STEPSIZE; + } else { + return LZ4_NbCommonBytes(diff); + } } + + while (likely(pIn < pInLimit-(STEPSIZE-1))) { + reg_t const diff = LZ4_read_ARCH(pMatch) ^ LZ4_read_ARCH(pIn); + if (!diff) { pIn+=STEPSIZE; pMatch+=STEPSIZE; continue; } + pIn += LZ4_NbCommonBytes(diff); + return (unsigned)(pIn - pStart); + } + + if ((STEPSIZE==8) && (pIn<(pInLimit-3)) && (LZ4_read32(pMatch) == LZ4_read32(pIn))) { pIn+=4; pMatch+=4; } + if ((pIn<(pInLimit-1)) && (LZ4_read16(pMatch) == LZ4_read16(pIn))) { pIn+=2; pMatch+=2; } + if ((pIn<pInLimit) && (*pMatch == *pIn)) pIn++; + return (unsigned)(pIn - pStart); +} + + +#ifndef LZ4_COMMONDEFS_ONLY +/*-************************************ +* Local Constants +**************************************/ +static const int LZ4_64Klimit = ((64 KB) + (MFLIMIT-1)); +static const U32 LZ4_skipTrigger = 6; /* Increase this value ==> compression run slower on incompressible data */ + + +/*-************************************ +* Local Structures and types +**************************************/ +typedef enum { clearedTable = 0, byPtr, byU32, byU16 } tableType_t; + +/** + * This enum distinguishes several different modes of accessing previous + * content in the stream. + * + * - noDict : There is no preceding content. + * - withPrefix64k : Table entries up to ctx->dictSize before the current blob + * blob being compressed are valid and refer to the preceding + * content (of length ctx->dictSize), which is available + * contiguously preceding in memory the content currently + * being compressed. + * - usingExtDict : Like withPrefix64k, but the preceding content is somewhere + * else in memory, starting at ctx->dictionary with length + * ctx->dictSize. + * - usingDictCtx : Like usingExtDict, but everything concerning the preceding + * content is in a separate context, pointed to by + * ctx->dictCtx. ctx->dictionary, ctx->dictSize, and table + * entries in the current context that refer to positions + * preceding the beginning of the current compression are + * ignored. Instead, ctx->dictCtx->dictionary and ctx->dictCtx + * ->dictSize describe the location and size of the preceding + * content, and matches are found by looking in the ctx + * ->dictCtx->hashTable. + */ +typedef enum { noDict = 0, withPrefix64k, usingExtDict, usingDictCtx } dict_directive; +typedef enum { noDictIssue = 0, dictSmall } dictIssue_directive; + + +/*-************************************ +* Local Utils +**************************************/ +int LZ4_versionNumber (void) { return LZ4_VERSION_NUMBER; } +const char* LZ4_versionString(void) { return LZ4_VERSION_STRING; } +int LZ4_compressBound(int isize) { return LZ4_COMPRESSBOUND(isize); } +int LZ4_sizeofState(void) { return LZ4_STREAMSIZE; } + + +/*-************************************ +* Internal Definitions used in Tests +**************************************/ + +int LZ4_compress_forceExtDict (LZ4_stream_t* LZ4_dict, const char* source, char* dest, int srcSize); + +int LZ4_decompress_safe_forceExtDict(const char* source, char* dest, + int compressedSize, int maxOutputSize, + const void* dictStart, size_t dictSize); + +/*-****************************** +* Compression functions +********************************/ +LZ4_FORCE_INLINE U32 LZ4_hash4(U32 sequence, tableType_t const tableType) +{ + if (tableType == byU16) + return ((sequence * 2654435761U) >> ((MINMATCH*8)-(LZ4_HASHLOG+1))); + else + return ((sequence * 2654435761U) >> ((MINMATCH*8)-LZ4_HASHLOG)); +} + +LZ4_FORCE_INLINE U32 LZ4_hash5(U64 sequence, tableType_t const tableType) +{ + const U32 hashLog = (tableType == byU16) ? LZ4_HASHLOG+1 : LZ4_HASHLOG; + if (LZ4_isLittleEndian()) { + const U64 prime5bytes = 889523592379ULL; + return (U32)(((sequence << 24) * prime5bytes) >> (64 - hashLog)); + } else { + const U64 prime8bytes = 11400714785074694791ULL; + return (U32)(((sequence >> 24) * prime8bytes) >> (64 - hashLog)); + } +} + +LZ4_FORCE_INLINE U32 LZ4_hashPosition(const void* const p, tableType_t const tableType) +{ + if ((sizeof(reg_t)==8) && (tableType != byU16)) return LZ4_hash5(LZ4_read_ARCH(p), tableType); + return LZ4_hash4(LZ4_read32(p), tableType); +} + +LZ4_FORCE_INLINE void LZ4_clearHash(U32 h, void* tableBase, tableType_t const tableType) +{ + switch (tableType) + { + default: /* fallthrough */ + case clearedTable: { /* illegal! */ assert(0); return; } + case byPtr: { const BYTE** hashTable = (const BYTE**)tableBase; hashTable[h] = NULL; return; } + case byU32: { U32* hashTable = (U32*) tableBase; hashTable[h] = 0; return; } + case byU16: { U16* hashTable = (U16*) tableBase; hashTable[h] = 0; return; } + } +} + +LZ4_FORCE_INLINE void LZ4_putIndexOnHash(U32 idx, U32 h, void* tableBase, tableType_t const tableType) +{ + switch (tableType) + { + default: /* fallthrough */ + case clearedTable: /* fallthrough */ + case byPtr: { /* illegal! */ assert(0); return; } + case byU32: { U32* hashTable = (U32*) tableBase; hashTable[h] = idx; return; } + case byU16: { U16* hashTable = (U16*) tableBase; assert(idx < 65536); hashTable[h] = (U16)idx; return; } + } +} + +LZ4_FORCE_INLINE void LZ4_putPositionOnHash(const BYTE* p, U32 h, + void* tableBase, tableType_t const tableType, + const BYTE* srcBase) +{ + switch (tableType) + { + case clearedTable: { /* illegal! */ assert(0); return; } + case byPtr: { const BYTE** hashTable = (const BYTE**)tableBase; hashTable[h] = p; return; } + case byU32: { U32* hashTable = (U32*) tableBase; hashTable[h] = (U32)(p-srcBase); return; } + case byU16: { U16* hashTable = (U16*) tableBase; hashTable[h] = (U16)(p-srcBase); return; } + } +} + +LZ4_FORCE_INLINE void LZ4_putPosition(const BYTE* p, void* tableBase, tableType_t tableType, const BYTE* srcBase) +{ + U32 const h = LZ4_hashPosition(p, tableType); + LZ4_putPositionOnHash(p, h, tableBase, tableType, srcBase); +} + +/* LZ4_getIndexOnHash() : + * Index of match position registered in hash table. + * hash position must be calculated by using base+index, or dictBase+index. + * Assumption 1 : only valid if tableType == byU32 or byU16. + * Assumption 2 : h is presumed valid (within limits of hash table) + */ +LZ4_FORCE_INLINE U32 LZ4_getIndexOnHash(U32 h, const void* tableBase, tableType_t tableType) +{ + LZ4_STATIC_ASSERT(LZ4_MEMORY_USAGE > 2); + if (tableType == byU32) { + const U32* const hashTable = (const U32*) tableBase; + assert(h < (1U << (LZ4_MEMORY_USAGE-2))); + return hashTable[h]; + } + if (tableType == byU16) { + const U16* const hashTable = (const U16*) tableBase; + assert(h < (1U << (LZ4_MEMORY_USAGE-1))); + return hashTable[h]; + } + assert(0); return 0; /* forbidden case */ +} + +static const BYTE* LZ4_getPositionOnHash(U32 h, const void* tableBase, tableType_t tableType, const BYTE* srcBase) +{ + if (tableType == byPtr) { const BYTE* const* hashTable = (const BYTE* const*) tableBase; return hashTable[h]; } + if (tableType == byU32) { const U32* const hashTable = (const U32*) tableBase; return hashTable[h] + srcBase; } + { const U16* const hashTable = (const U16*) tableBase; return hashTable[h] + srcBase; } /* default, to ensure a return */ +} + +LZ4_FORCE_INLINE const BYTE* +LZ4_getPosition(const BYTE* p, + const void* tableBase, tableType_t tableType, + const BYTE* srcBase) +{ + U32 const h = LZ4_hashPosition(p, tableType); + return LZ4_getPositionOnHash(h, tableBase, tableType, srcBase); +} + +LZ4_FORCE_INLINE void +LZ4_prepareTable(LZ4_stream_t_internal* const cctx, + const int inputSize, + const tableType_t tableType) { + /* If the table hasn't been used, it's guaranteed to be zeroed out, and is + * therefore safe to use no matter what mode we're in. Otherwise, we figure + * out if it's safe to leave as is or whether it needs to be reset. + */ + if ((tableType_t)cctx->tableType != clearedTable) { + assert(inputSize >= 0); + if ((tableType_t)cctx->tableType != tableType + || ((tableType == byU16) && cctx->currentOffset + (unsigned)inputSize >= 0xFFFFU) + || ((tableType == byU32) && cctx->currentOffset > 1 GB) + || tableType == byPtr + || inputSize >= 4 KB) + { + DEBUGLOG(4, "LZ4_prepareTable: Resetting table in %p", cctx); + MEM_INIT(cctx->hashTable, 0, LZ4_HASHTABLESIZE); + cctx->currentOffset = 0; + cctx->tableType = (U32)clearedTable; + } else { + DEBUGLOG(4, "LZ4_prepareTable: Re-use hash table (no reset)"); + } + } + + /* Adding a gap, so all previous entries are > LZ4_DISTANCE_MAX back, is faster + * than compressing without a gap. However, compressing with + * currentOffset == 0 is faster still, so we preserve that case. + */ + if (cctx->currentOffset != 0 && tableType == byU32) { + DEBUGLOG(5, "LZ4_prepareTable: adding 64KB to currentOffset"); + cctx->currentOffset += 64 KB; + } + + /* Finally, clear history */ + cctx->dictCtx = NULL; + cctx->dictionary = NULL; + cctx->dictSize = 0; +} + +/** LZ4_compress_generic() : + * inlined, to ensure branches are decided at compilation time. + * Presumed already validated at this stage: + * - source != NULL + * - inputSize > 0 + */ +LZ4_FORCE_INLINE int LZ4_compress_generic_validated( + LZ4_stream_t_internal* const cctx, + const char* const source, + char* const dest, + const int inputSize, + int *inputConsumed, /* only written when outputDirective == fillOutput */ + const int maxOutputSize, + const limitedOutput_directive outputDirective, + const tableType_t tableType, + const dict_directive dictDirective, + const dictIssue_directive dictIssue, + const int acceleration) +{ + int result; + const BYTE* ip = (const BYTE*) source; + + U32 const startIndex = cctx->currentOffset; + const BYTE* base = (const BYTE*) source - startIndex; + const BYTE* lowLimit; + + const LZ4_stream_t_internal* dictCtx = (const LZ4_stream_t_internal*) cctx->dictCtx; + const BYTE* const dictionary = + dictDirective == usingDictCtx ? dictCtx->dictionary : cctx->dictionary; + const U32 dictSize = + dictDirective == usingDictCtx ? dictCtx->dictSize : cctx->dictSize; + const U32 dictDelta = (dictDirective == usingDictCtx) ? startIndex - dictCtx->currentOffset : 0; /* make indexes in dictCtx comparable with index in current context */ + + int const maybe_extMem = (dictDirective == usingExtDict) || (dictDirective == usingDictCtx); + U32 const prefixIdxLimit = startIndex - dictSize; /* used when dictDirective == dictSmall */ + const BYTE* const dictEnd = dictionary ? dictionary + dictSize : dictionary; + const BYTE* anchor = (const BYTE*) source; + const BYTE* const iend = ip + inputSize; + const BYTE* const mflimitPlusOne = iend - MFLIMIT + 1; + const BYTE* const matchlimit = iend - LASTLITERALS; + + /* the dictCtx currentOffset is indexed on the start of the dictionary, + * while a dictionary in the current context precedes the currentOffset */ + const BYTE* dictBase = !dictionary ? NULL : (dictDirective == usingDictCtx) ? + dictionary + dictSize - dictCtx->currentOffset : + dictionary + dictSize - startIndex; + + BYTE* op = (BYTE*) dest; + BYTE* const olimit = op + maxOutputSize; + + U32 offset = 0; + U32 forwardH; + + DEBUGLOG(5, "LZ4_compress_generic_validated: srcSize=%i, tableType=%u", inputSize, tableType); + assert(ip != NULL); + /* If init conditions are not met, we don't have to mark stream + * as having dirty context, since no action was taken yet */ + if (outputDirective == fillOutput && maxOutputSize < 1) { return 0; } /* Impossible to store anything */ + if ((tableType == byU16) && (inputSize>=LZ4_64Klimit)) { return 0; } /* Size too large (not within 64K limit) */ + if (tableType==byPtr) assert(dictDirective==noDict); /* only supported use case with byPtr */ + assert(acceleration >= 1); + + lowLimit = (const BYTE*)source - (dictDirective == withPrefix64k ? dictSize : 0); + + /* Update context state */ + if (dictDirective == usingDictCtx) { + /* Subsequent linked blocks can't use the dictionary. */ + /* Instead, they use the block we just compressed. */ + cctx->dictCtx = NULL; + cctx->dictSize = (U32)inputSize; + } else { + cctx->dictSize += (U32)inputSize; + } + cctx->currentOffset += (U32)inputSize; + cctx->tableType = (U32)tableType; + + if (inputSize<LZ4_minLength) goto _last_literals; /* Input too small, no compression (all literals) */ + + /* First Byte */ + LZ4_putPosition(ip, cctx->hashTable, tableType, base); + ip++; forwardH = LZ4_hashPosition(ip, tableType); + + /* Main Loop */ + for ( ; ; ) { + const BYTE* match; + BYTE* token; + const BYTE* filledIp; + + /* Find a match */ + if (tableType == byPtr) { + const BYTE* forwardIp = ip; + int step = 1; + int searchMatchNb = acceleration << LZ4_skipTrigger; + do { + U32 const h = forwardH; + ip = forwardIp; + forwardIp += step; + step = (searchMatchNb++ >> LZ4_skipTrigger); + + if (unlikely(forwardIp > mflimitPlusOne)) goto _last_literals; + assert(ip < mflimitPlusOne); + + match = LZ4_getPositionOnHash(h, cctx->hashTable, tableType, base); + forwardH = LZ4_hashPosition(forwardIp, tableType); + LZ4_putPositionOnHash(ip, h, cctx->hashTable, tableType, base); + + } while ( (match+LZ4_DISTANCE_MAX < ip) + || (LZ4_read32(match) != LZ4_read32(ip)) ); + + } else { /* byU32, byU16 */ + + const BYTE* forwardIp = ip; + int step = 1; + int searchMatchNb = acceleration << LZ4_skipTrigger; + do { + U32 const h = forwardH; + U32 const current = (U32)(forwardIp - base); + U32 matchIndex = LZ4_getIndexOnHash(h, cctx->hashTable, tableType); + assert(matchIndex <= current); + assert(forwardIp - base < (ptrdiff_t)(2 GB - 1)); + ip = forwardIp; + forwardIp += step; + step = (searchMatchNb++ >> LZ4_skipTrigger); + + if (unlikely(forwardIp > mflimitPlusOne)) goto _last_literals; + assert(ip < mflimitPlusOne); + + if (dictDirective == usingDictCtx) { + if (matchIndex < startIndex) { + /* there was no match, try the dictionary */ + assert(tableType == byU32); + matchIndex = LZ4_getIndexOnHash(h, dictCtx->hashTable, byU32); + match = dictBase + matchIndex; + matchIndex += dictDelta; /* make dictCtx index comparable with current context */ + lowLimit = dictionary; + } else { + match = base + matchIndex; + lowLimit = (const BYTE*)source; + } + } else if (dictDirective==usingExtDict) { + if (matchIndex < startIndex) { + DEBUGLOG(7, "extDict candidate: matchIndex=%5u < startIndex=%5u", matchIndex, startIndex); + assert(startIndex - matchIndex >= MINMATCH); + match = dictBase + matchIndex; + lowLimit = dictionary; + } else { + match = base + matchIndex; + lowLimit = (const BYTE*)source; + } + } else { /* single continuous memory segment */ + match = base + matchIndex; + } + forwardH = LZ4_hashPosition(forwardIp, tableType); + LZ4_putIndexOnHash(current, h, cctx->hashTable, tableType); + + DEBUGLOG(7, "candidate at pos=%u (offset=%u \n", matchIndex, current - matchIndex); + if ((dictIssue == dictSmall) && (matchIndex < prefixIdxLimit)) { continue; } /* match outside of valid area */ + assert(matchIndex < current); + if ( ((tableType != byU16) || (LZ4_DISTANCE_MAX < LZ4_DISTANCE_ABSOLUTE_MAX)) + && (matchIndex+LZ4_DISTANCE_MAX < current)) { + continue; + } /* too far */ + assert((current - matchIndex) <= LZ4_DISTANCE_MAX); /* match now expected within distance */ + + if (LZ4_read32(match) == LZ4_read32(ip)) { + if (maybe_extMem) offset = current - matchIndex; + break; /* match found */ + } + + } while(1); + } + + /* Catch up */ + filledIp = ip; + while (((ip>anchor) & (match > lowLimit)) && (unlikely(ip[-1]==match[-1]))) { ip--; match--; } + + /* Encode Literals */ + { unsigned const litLength = (unsigned)(ip - anchor); + token = op++; + if ((outputDirective == limitedOutput) && /* Check output buffer overflow */ + (unlikely(op + litLength + (2 + 1 + LASTLITERALS) + (litLength/255) > olimit)) ) { + return 0; /* cannot compress within `dst` budget. Stored indexes in hash table are nonetheless fine */ + } + if ((outputDirective == fillOutput) && + (unlikely(op + (litLength+240)/255 /* litlen */ + litLength /* literals */ + 2 /* offset */ + 1 /* token */ + MFLIMIT - MINMATCH /* min last literals so last match is <= end - MFLIMIT */ > olimit))) { + op--; + goto _last_literals; + } + if (litLength >= RUN_MASK) { + int len = (int)(litLength - RUN_MASK); + *token = (RUN_MASK<<ML_BITS); + for(; len >= 255 ; len-=255) *op++ = 255; + *op++ = (BYTE)len; + } + else *token = (BYTE)(litLength<<ML_BITS); + + /* Copy Literals */ + LZ4_wildCopy8(op, anchor, op+litLength); + op+=litLength; + DEBUGLOG(6, "seq.start:%i, literals=%u, match.start:%i", + (int)(anchor-(const BYTE*)source), litLength, (int)(ip-(const BYTE*)source)); + } + +_next_match: + /* at this stage, the following variables must be correctly set : + * - ip : at start of LZ operation + * - match : at start of previous pattern occurence; can be within current prefix, or within extDict + * - offset : if maybe_ext_memSegment==1 (constant) + * - lowLimit : must be == dictionary to mean "match is within extDict"; must be == source otherwise + * - token and *token : position to write 4-bits for match length; higher 4-bits for literal length supposed already written + */ + + if ((outputDirective == fillOutput) && + (op + 2 /* offset */ + 1 /* token */ + MFLIMIT - MINMATCH /* min last literals so last match is <= end - MFLIMIT */ > olimit)) { + /* the match was too close to the end, rewind and go to last literals */ + op = token; + goto _last_literals; + } + + /* Encode Offset */ + if (maybe_extMem) { /* static test */ + DEBUGLOG(6, " with offset=%u (ext if > %i)", offset, (int)(ip - (const BYTE*)source)); + assert(offset <= LZ4_DISTANCE_MAX && offset > 0); + LZ4_writeLE16(op, (U16)offset); op+=2; + } else { + DEBUGLOG(6, " with offset=%u (same segment)", (U32)(ip - match)); + assert(ip-match <= LZ4_DISTANCE_MAX); + LZ4_writeLE16(op, (U16)(ip - match)); op+=2; + } + + /* Encode MatchLength */ + { unsigned matchCode; + + if ( (dictDirective==usingExtDict || dictDirective==usingDictCtx) + && (lowLimit==dictionary) /* match within extDict */ ) { + const BYTE* limit = ip + (dictEnd-match); + assert(dictEnd > match); + if (limit > matchlimit) limit = matchlimit; + matchCode = LZ4_count(ip+MINMATCH, match+MINMATCH, limit); + ip += (size_t)matchCode + MINMATCH; + if (ip==limit) { + unsigned const more = LZ4_count(limit, (const BYTE*)source, matchlimit); + matchCode += more; + ip += more; + } + DEBUGLOG(6, " with matchLength=%u starting in extDict", matchCode+MINMATCH); + } else { + matchCode = LZ4_count(ip+MINMATCH, match+MINMATCH, matchlimit); + ip += (size_t)matchCode + MINMATCH; + DEBUGLOG(6, " with matchLength=%u", matchCode+MINMATCH); + } + + if ((outputDirective) && /* Check output buffer overflow */ + (unlikely(op + (1 + LASTLITERALS) + (matchCode+240)/255 > olimit)) ) { + if (outputDirective == fillOutput) { + /* Match description too long : reduce it */ + U32 newMatchCode = 15 /* in token */ - 1 /* to avoid needing a zero byte */ + ((U32)(olimit - op) - 1 - LASTLITERALS) * 255; + ip -= matchCode - newMatchCode; + assert(newMatchCode < matchCode); + matchCode = newMatchCode; + if (unlikely(ip <= filledIp)) { + /* We have already filled up to filledIp so if ip ends up less than filledIp + * we have positions in the hash table beyond the current position. This is + * a problem if we reuse the hash table. So we have to remove these positions + * from the hash table. + */ + const BYTE* ptr; + DEBUGLOG(5, "Clearing %u positions", (U32)(filledIp - ip)); + for (ptr = ip; ptr <= filledIp; ++ptr) { + U32 const h = LZ4_hashPosition(ptr, tableType); + LZ4_clearHash(h, cctx->hashTable, tableType); + } + } + } else { + assert(outputDirective == limitedOutput); + return 0; /* cannot compress within `dst` budget. Stored indexes in hash table are nonetheless fine */ + } + } + if (matchCode >= ML_MASK) { + *token += ML_MASK; + matchCode -= ML_MASK; + LZ4_write32(op, 0xFFFFFFFF); + while (matchCode >= 4*255) { + op+=4; + LZ4_write32(op, 0xFFFFFFFF); + matchCode -= 4*255; + } + op += matchCode / 255; + *op++ = (BYTE)(matchCode % 255); + } else + *token += (BYTE)(matchCode); + } + /* Ensure we have enough space for the last literals. */ + assert(!(outputDirective == fillOutput && op + 1 + LASTLITERALS > olimit)); + + anchor = ip; + + /* Test end of chunk */ + if (ip >= mflimitPlusOne) break; + + /* Fill table */ + LZ4_putPosition(ip-2, cctx->hashTable, tableType, base); + + /* Test next position */ + if (tableType == byPtr) { + + match = LZ4_getPosition(ip, cctx->hashTable, tableType, base); + LZ4_putPosition(ip, cctx->hashTable, tableType, base); + if ( (match+LZ4_DISTANCE_MAX >= ip) + && (LZ4_read32(match) == LZ4_read32(ip)) ) + { token=op++; *token=0; goto _next_match; } + + } else { /* byU32, byU16 */ + + U32 const h = LZ4_hashPosition(ip, tableType); + U32 const current = (U32)(ip-base); + U32 matchIndex = LZ4_getIndexOnHash(h, cctx->hashTable, tableType); + assert(matchIndex < current); + if (dictDirective == usingDictCtx) { + if (matchIndex < startIndex) { + /* there was no match, try the dictionary */ + matchIndex = LZ4_getIndexOnHash(h, dictCtx->hashTable, byU32); + match = dictBase + matchIndex; + lowLimit = dictionary; /* required for match length counter */ + matchIndex += dictDelta; + } else { + match = base + matchIndex; + lowLimit = (const BYTE*)source; /* required for match length counter */ + } + } else if (dictDirective==usingExtDict) { + if (matchIndex < startIndex) { + match = dictBase + matchIndex; + lowLimit = dictionary; /* required for match length counter */ + } else { + match = base + matchIndex; + lowLimit = (const BYTE*)source; /* required for match length counter */ + } + } else { /* single memory segment */ + match = base + matchIndex; + } + LZ4_putIndexOnHash(current, h, cctx->hashTable, tableType); + assert(matchIndex < current); + if ( ((dictIssue==dictSmall) ? (matchIndex >= prefixIdxLimit) : 1) + && (((tableType==byU16) && (LZ4_DISTANCE_MAX == LZ4_DISTANCE_ABSOLUTE_MAX)) ? 1 : (matchIndex+LZ4_DISTANCE_MAX >= current)) + && (LZ4_read32(match) == LZ4_read32(ip)) ) { + token=op++; + *token=0; + if (maybe_extMem) offset = current - matchIndex; + DEBUGLOG(6, "seq.start:%i, literals=%u, match.start:%i", + (int)(anchor-(const BYTE*)source), 0, (int)(ip-(const BYTE*)source)); + goto _next_match; + } + } + + /* Prepare next loop */ + forwardH = LZ4_hashPosition(++ip, tableType); + + } + +_last_literals: + /* Encode Last Literals */ + { size_t lastRun = (size_t)(iend - anchor); + if ( (outputDirective) && /* Check output buffer overflow */ + (op + lastRun + 1 + ((lastRun+255-RUN_MASK)/255) > olimit)) { + if (outputDirective == fillOutput) { + /* adapt lastRun to fill 'dst' */ + assert(olimit >= op); + lastRun = (size_t)(olimit-op) - 1/*token*/; + lastRun -= (lastRun + 256 - RUN_MASK) / 256; /*additional length tokens*/ + } else { + assert(outputDirective == limitedOutput); + return 0; /* cannot compress within `dst` budget. Stored indexes in hash table are nonetheless fine */ + } + } + DEBUGLOG(6, "Final literal run : %i literals", (int)lastRun); + if (lastRun >= RUN_MASK) { + size_t accumulator = lastRun - RUN_MASK; + *op++ = RUN_MASK << ML_BITS; + for(; accumulator >= 255 ; accumulator-=255) *op++ = 255; + *op++ = (BYTE) accumulator; + } else { + *op++ = (BYTE)(lastRun<<ML_BITS); + } + LZ4_memcpy(op, anchor, lastRun); + ip = anchor + lastRun; + op += lastRun; + } + + if (outputDirective == fillOutput) { + *inputConsumed = (int) (((const char*)ip)-source); + } + result = (int)(((char*)op) - dest); + assert(result > 0); + DEBUGLOG(5, "LZ4_compress_generic: compressed %i bytes into %i bytes", inputSize, result); + return result; +} + +/** LZ4_compress_generic() : + * inlined, to ensure branches are decided at compilation time; + * takes care of src == (NULL, 0) + * and forward the rest to LZ4_compress_generic_validated */ +LZ4_FORCE_INLINE int LZ4_compress_generic( + LZ4_stream_t_internal* const cctx, + const char* const src, + char* const dst, + const int srcSize, + int *inputConsumed, /* only written when outputDirective == fillOutput */ + const int dstCapacity, + const limitedOutput_directive outputDirective, + const tableType_t tableType, + const dict_directive dictDirective, + const dictIssue_directive dictIssue, + const int acceleration) +{ + DEBUGLOG(5, "LZ4_compress_generic: srcSize=%i, dstCapacity=%i", + srcSize, dstCapacity); + + if ((U32)srcSize > (U32)LZ4_MAX_INPUT_SIZE) { return 0; } /* Unsupported srcSize, too large (or negative) */ + if (srcSize == 0) { /* src == NULL supported if srcSize == 0 */ + if (outputDirective != notLimited && dstCapacity <= 0) return 0; /* no output, can't write anything */ + DEBUGLOG(5, "Generating an empty block"); + assert(outputDirective == notLimited || dstCapacity >= 1); + assert(dst != NULL); + dst[0] = 0; + if (outputDirective == fillOutput) { + assert (inputConsumed != NULL); + *inputConsumed = 0; + } + return 1; + } + assert(src != NULL); + + return LZ4_compress_generic_validated(cctx, src, dst, srcSize, + inputConsumed, /* only written into if outputDirective == fillOutput */ + dstCapacity, outputDirective, + tableType, dictDirective, dictIssue, acceleration); +} + + +int LZ4_compress_fast_extState(void* state, const char* source, char* dest, int inputSize, int maxOutputSize, int acceleration) +{ + LZ4_stream_t_internal* const ctx = & LZ4_initStream(state, sizeof(LZ4_stream_t)) -> internal_donotuse; + assert(ctx != NULL); + if (acceleration < 1) acceleration = LZ4_ACCELERATION_DEFAULT; + if (acceleration > LZ4_ACCELERATION_MAX) acceleration = LZ4_ACCELERATION_MAX; + if (maxOutputSize >= LZ4_compressBound(inputSize)) { + if (inputSize < LZ4_64Klimit) { + return LZ4_compress_generic(ctx, source, dest, inputSize, NULL, 0, notLimited, byU16, noDict, noDictIssue, acceleration); + } else { + const tableType_t tableType = ((sizeof(void*)==4) && ((uptrval)source > LZ4_DISTANCE_MAX)) ? byPtr : byU32; + return LZ4_compress_generic(ctx, source, dest, inputSize, NULL, 0, notLimited, tableType, noDict, noDictIssue, acceleration); + } + } else { + if (inputSize < LZ4_64Klimit) { + return LZ4_compress_generic(ctx, source, dest, inputSize, NULL, maxOutputSize, limitedOutput, byU16, noDict, noDictIssue, acceleration); + } else { + const tableType_t tableType = ((sizeof(void*)==4) && ((uptrval)source > LZ4_DISTANCE_MAX)) ? byPtr : byU32; + return LZ4_compress_generic(ctx, source, dest, inputSize, NULL, maxOutputSize, limitedOutput, tableType, noDict, noDictIssue, acceleration); + } + } +} + +/** + * LZ4_compress_fast_extState_fastReset() : + * A variant of LZ4_compress_fast_extState(). + * + * Using this variant avoids an expensive initialization step. It is only safe + * to call if the state buffer is known to be correctly initialized already + * (see comment in lz4.h on LZ4_resetStream_fast() for a definition of + * "correctly initialized"). + */ +int LZ4_compress_fast_extState_fastReset(void* state, const char* src, char* dst, int srcSize, int dstCapacity, int acceleration) +{ + LZ4_stream_t_internal* ctx = &((LZ4_stream_t*)state)->internal_donotuse; + if (acceleration < 1) acceleration = LZ4_ACCELERATION_DEFAULT; + if (acceleration > LZ4_ACCELERATION_MAX) acceleration = LZ4_ACCELERATION_MAX; + + if (dstCapacity >= LZ4_compressBound(srcSize)) { + if (srcSize < LZ4_64Klimit) { + const tableType_t tableType = byU16; + LZ4_prepareTable(ctx, srcSize, tableType); + if (ctx->currentOffset) { + return LZ4_compress_generic(ctx, src, dst, srcSize, NULL, 0, notLimited, tableType, noDict, dictSmall, acceleration); + } else { + return LZ4_compress_generic(ctx, src, dst, srcSize, NULL, 0, notLimited, tableType, noDict, noDictIssue, acceleration); + } + } else { + const tableType_t tableType = ((sizeof(void*)==4) && ((uptrval)src > LZ4_DISTANCE_MAX)) ? byPtr : byU32; + LZ4_prepareTable(ctx, srcSize, tableType); + return LZ4_compress_generic(ctx, src, dst, srcSize, NULL, 0, notLimited, tableType, noDict, noDictIssue, acceleration); + } + } else { + if (srcSize < LZ4_64Klimit) { + const tableType_t tableType = byU16; + LZ4_prepareTable(ctx, srcSize, tableType); + if (ctx->currentOffset) { + return LZ4_compress_generic(ctx, src, dst, srcSize, NULL, dstCapacity, limitedOutput, tableType, noDict, dictSmall, acceleration); + } else { + return LZ4_compress_generic(ctx, src, dst, srcSize, NULL, dstCapacity, limitedOutput, tableType, noDict, noDictIssue, acceleration); + } + } else { + const tableType_t tableType = ((sizeof(void*)==4) && ((uptrval)src > LZ4_DISTANCE_MAX)) ? byPtr : byU32; + LZ4_prepareTable(ctx, srcSize, tableType); + return LZ4_compress_generic(ctx, src, dst, srcSize, NULL, dstCapacity, limitedOutput, tableType, noDict, noDictIssue, acceleration); + } + } +} + + +int LZ4_compress_fast(const char* source, char* dest, int inputSize, int maxOutputSize, int acceleration) +{ + int result; +#if (LZ4_HEAPMODE) + LZ4_stream_t* ctxPtr = ALLOC(sizeof(LZ4_stream_t)); /* malloc-calloc always properly aligned */ + if (ctxPtr == NULL) return 0; +#else + LZ4_stream_t ctx; + LZ4_stream_t* const ctxPtr = &ctx; +#endif + result = LZ4_compress_fast_extState(ctxPtr, source, dest, inputSize, maxOutputSize, acceleration); + +#if (LZ4_HEAPMODE) + FREEMEM(ctxPtr); +#endif + return result; +} + + +int LZ4_compress_default(const char* src, char* dst, int srcSize, int maxOutputSize) +{ + return LZ4_compress_fast(src, dst, srcSize, maxOutputSize, 1); +} + + +/* Note!: This function leaves the stream in an unclean/broken state! + * It is not safe to subsequently use the same state with a _fastReset() or + * _continue() call without resetting it. */ +static int LZ4_compress_destSize_extState (LZ4_stream_t* state, const char* src, char* dst, int* srcSizePtr, int targetDstSize) +{ + void* const s = LZ4_initStream(state, sizeof (*state)); + assert(s != NULL); (void)s; + + if (targetDstSize >= LZ4_compressBound(*srcSizePtr)) { /* compression success is guaranteed */ + return LZ4_compress_fast_extState(state, src, dst, *srcSizePtr, targetDstSize, 1); + } else { + if (*srcSizePtr < LZ4_64Klimit) { + return LZ4_compress_generic(&state->internal_donotuse, src, dst, *srcSizePtr, srcSizePtr, targetDstSize, fillOutput, byU16, noDict, noDictIssue, 1); + } else { + tableType_t const addrMode = ((sizeof(void*)==4) && ((uptrval)src > LZ4_DISTANCE_MAX)) ? byPtr : byU32; + return LZ4_compress_generic(&state->internal_donotuse, src, dst, *srcSizePtr, srcSizePtr, targetDstSize, fillOutput, addrMode, noDict, noDictIssue, 1); + } } +} + + +int LZ4_compress_destSize(const char* src, char* dst, int* srcSizePtr, int targetDstSize) +{ +#if (LZ4_HEAPMODE) + LZ4_stream_t* ctx = (LZ4_stream_t*)ALLOC(sizeof(LZ4_stream_t)); /* malloc-calloc always properly aligned */ + if (ctx == NULL) return 0; +#else + LZ4_stream_t ctxBody; + LZ4_stream_t* ctx = &ctxBody; +#endif + + int result = LZ4_compress_destSize_extState(ctx, src, dst, srcSizePtr, targetDstSize); + +#if (LZ4_HEAPMODE) + FREEMEM(ctx); +#endif + return result; +} + + + +/*-****************************** +* Streaming functions +********************************/ + +LZ4_stream_t* LZ4_createStream(void) +{ + LZ4_stream_t* const lz4s = (LZ4_stream_t*)ALLOC(sizeof(LZ4_stream_t)); + LZ4_STATIC_ASSERT(LZ4_STREAMSIZE >= sizeof(LZ4_stream_t_internal)); /* A compilation error here means LZ4_STREAMSIZE is not large enough */ + DEBUGLOG(4, "LZ4_createStream %p", lz4s); + if (lz4s == NULL) return NULL; + LZ4_initStream(lz4s, sizeof(*lz4s)); + return lz4s; +} + +static size_t LZ4_stream_t_alignment(void) +{ +#if LZ4_ALIGN_TEST + typedef struct { char c; LZ4_stream_t t; } t_a; + return sizeof(t_a) - sizeof(LZ4_stream_t); +#else + return 1; /* effectively disabled */ +#endif +} + +LZ4_stream_t* LZ4_initStream (void* buffer, size_t size) +{ + DEBUGLOG(5, "LZ4_initStream"); + if (buffer == NULL) { return NULL; } + if (size < sizeof(LZ4_stream_t)) { return NULL; } + if (!LZ4_isAligned(buffer, LZ4_stream_t_alignment())) return NULL; + MEM_INIT(buffer, 0, sizeof(LZ4_stream_t_internal)); + return (LZ4_stream_t*)buffer; +} + +/* resetStream is now deprecated, + * prefer initStream() which is more general */ +void LZ4_resetStream (LZ4_stream_t* LZ4_stream) +{ + DEBUGLOG(5, "LZ4_resetStream (ctx:%p)", LZ4_stream); + MEM_INIT(LZ4_stream, 0, sizeof(LZ4_stream_t_internal)); +} + +void LZ4_resetStream_fast(LZ4_stream_t* ctx) { + LZ4_prepareTable(&(ctx->internal_donotuse), 0, byU32); +} + +int LZ4_freeStream (LZ4_stream_t* LZ4_stream) +{ + if (!LZ4_stream) return 0; /* support free on NULL */ + DEBUGLOG(5, "LZ4_freeStream %p", LZ4_stream); + FREEMEM(LZ4_stream); + return (0); +} + + +#define HASH_UNIT sizeof(reg_t) +int LZ4_loadDict (LZ4_stream_t* LZ4_dict, const char* dictionary, int dictSize) +{ + LZ4_stream_t_internal* dict = &LZ4_dict->internal_donotuse; + const tableType_t tableType = byU32; + const BYTE* p = (const BYTE*)dictionary; + const BYTE* const dictEnd = p + dictSize; + const BYTE* base; + + DEBUGLOG(4, "LZ4_loadDict (%i bytes from %p into %p)", dictSize, dictionary, LZ4_dict); + + /* It's necessary to reset the context, + * and not just continue it with prepareTable() + * to avoid any risk of generating overflowing matchIndex + * when compressing using this dictionary */ + LZ4_resetStream(LZ4_dict); + + /* We always increment the offset by 64 KB, since, if the dict is longer, + * we truncate it to the last 64k, and if it's shorter, we still want to + * advance by a whole window length so we can provide the guarantee that + * there are only valid offsets in the window, which allows an optimization + * in LZ4_compress_fast_continue() where it uses noDictIssue even when the + * dictionary isn't a full 64k. */ + dict->currentOffset += 64 KB; + + if (dictSize < (int)HASH_UNIT) { + return 0; + } + + if ((dictEnd - p) > 64 KB) p = dictEnd - 64 KB; + base = dictEnd - dict->currentOffset; + dict->dictionary = p; + dict->dictSize = (U32)(dictEnd - p); + dict->tableType = (U32)tableType; + + while (p <= dictEnd-HASH_UNIT) { + LZ4_putPosition(p, dict->hashTable, tableType, base); + p+=3; + } + + return (int)dict->dictSize; +} + +void LZ4_attach_dictionary(LZ4_stream_t* workingStream, const LZ4_stream_t* dictionaryStream) { + const LZ4_stream_t_internal* dictCtx = dictionaryStream == NULL ? NULL : + &(dictionaryStream->internal_donotuse); + + DEBUGLOG(4, "LZ4_attach_dictionary (%p, %p, size %u)", + workingStream, dictionaryStream, + dictCtx != NULL ? dictCtx->dictSize : 0); + + if (dictCtx != NULL) { + /* If the current offset is zero, we will never look in the + * external dictionary context, since there is no value a table + * entry can take that indicate a miss. In that case, we need + * to bump the offset to something non-zero. + */ + if (workingStream->internal_donotuse.currentOffset == 0) { + workingStream->internal_donotuse.currentOffset = 64 KB; + } + + /* Don't actually attach an empty dictionary. + */ + if (dictCtx->dictSize == 0) { + dictCtx = NULL; + } + } + workingStream->internal_donotuse.dictCtx = dictCtx; +} + + +static void LZ4_renormDictT(LZ4_stream_t_internal* LZ4_dict, int nextSize) +{ + assert(nextSize >= 0); + if (LZ4_dict->currentOffset + (unsigned)nextSize > 0x80000000) { /* potential ptrdiff_t overflow (32-bits mode) */ + /* rescale hash table */ + U32 const delta = LZ4_dict->currentOffset - 64 KB; + const BYTE* dictEnd = LZ4_dict->dictionary + LZ4_dict->dictSize; + int i; + DEBUGLOG(4, "LZ4_renormDictT"); + for (i=0; i<LZ4_HASH_SIZE_U32; i++) { + if (LZ4_dict->hashTable[i] < delta) LZ4_dict->hashTable[i]=0; + else LZ4_dict->hashTable[i] -= delta; + } + LZ4_dict->currentOffset = 64 KB; + if (LZ4_dict->dictSize > 64 KB) LZ4_dict->dictSize = 64 KB; + LZ4_dict->dictionary = dictEnd - LZ4_dict->dictSize; + } +} + + +int LZ4_compress_fast_continue (LZ4_stream_t* LZ4_stream, + const char* source, char* dest, + int inputSize, int maxOutputSize, + int acceleration) +{ + const tableType_t tableType = byU32; + LZ4_stream_t_internal* streamPtr = &LZ4_stream->internal_donotuse; + const BYTE* dictEnd = streamPtr->dictionary + streamPtr->dictSize; + + DEBUGLOG(5, "LZ4_compress_fast_continue (inputSize=%i)", inputSize); + + LZ4_renormDictT(streamPtr, inputSize); /* avoid index overflow */ + if (acceleration < 1) acceleration = LZ4_ACCELERATION_DEFAULT; + if (acceleration > LZ4_ACCELERATION_MAX) acceleration = LZ4_ACCELERATION_MAX; + + /* invalidate tiny dictionaries */ + if ( (streamPtr->dictSize-1 < 4-1) /* intentional underflow */ + && (dictEnd != (const BYTE*)source) ) { + DEBUGLOG(5, "LZ4_compress_fast_continue: dictSize(%u) at addr:%p is too small", streamPtr->dictSize, streamPtr->dictionary); + streamPtr->dictSize = 0; + streamPtr->dictionary = (const BYTE*)source; + dictEnd = (const BYTE*)source; + } + + /* Check overlapping input/dictionary space */ + { const BYTE* sourceEnd = (const BYTE*) source + inputSize; + if ((sourceEnd > streamPtr->dictionary) && (sourceEnd < dictEnd)) { + streamPtr->dictSize = (U32)(dictEnd - sourceEnd); + if (streamPtr->dictSize > 64 KB) streamPtr->dictSize = 64 KB; + if (streamPtr->dictSize < 4) streamPtr->dictSize = 0; + streamPtr->dictionary = dictEnd - streamPtr->dictSize; + } + } + + /* prefix mode : source data follows dictionary */ + if (dictEnd == (const BYTE*)source) { + if ((streamPtr->dictSize < 64 KB) && (streamPtr->dictSize < streamPtr->currentOffset)) + return LZ4_compress_generic(streamPtr, source, dest, inputSize, NULL, maxOutputSize, limitedOutput, tableType, withPrefix64k, dictSmall, acceleration); + else + return LZ4_compress_generic(streamPtr, source, dest, inputSize, NULL, maxOutputSize, limitedOutput, tableType, withPrefix64k, noDictIssue, acceleration); + } + + /* external dictionary mode */ + { int result; + if (streamPtr->dictCtx) { + /* We depend here on the fact that dictCtx'es (produced by + * LZ4_loadDict) guarantee that their tables contain no references + * to offsets between dictCtx->currentOffset - 64 KB and + * dictCtx->currentOffset - dictCtx->dictSize. This makes it safe + * to use noDictIssue even when the dict isn't a full 64 KB. + */ + if (inputSize > 4 KB) { + /* For compressing large blobs, it is faster to pay the setup + * cost to copy the dictionary's tables into the active context, + * so that the compression loop is only looking into one table. + */ + LZ4_memcpy(streamPtr, streamPtr->dictCtx, sizeof(*streamPtr)); + result = LZ4_compress_generic(streamPtr, source, dest, inputSize, NULL, maxOutputSize, limitedOutput, tableType, usingExtDict, noDictIssue, acceleration); + } else { + result = LZ4_compress_generic(streamPtr, source, dest, inputSize, NULL, maxOutputSize, limitedOutput, tableType, usingDictCtx, noDictIssue, acceleration); + } + } else { + if ((streamPtr->dictSize < 64 KB) && (streamPtr->dictSize < streamPtr->currentOffset)) { + result = LZ4_compress_generic(streamPtr, source, dest, inputSize, NULL, maxOutputSize, limitedOutput, tableType, usingExtDict, dictSmall, acceleration); + } else { + result = LZ4_compress_generic(streamPtr, source, dest, inputSize, NULL, maxOutputSize, limitedOutput, tableType, usingExtDict, noDictIssue, acceleration); + } + } + streamPtr->dictionary = (const BYTE*)source; + streamPtr->dictSize = (U32)inputSize; + return result; + } +} + + +/* Hidden debug function, to force-test external dictionary mode */ +int LZ4_compress_forceExtDict (LZ4_stream_t* LZ4_dict, const char* source, char* dest, int srcSize) +{ + LZ4_stream_t_internal* streamPtr = &LZ4_dict->internal_donotuse; + int result; + + LZ4_renormDictT(streamPtr, srcSize); + + if ((streamPtr->dictSize < 64 KB) && (streamPtr->dictSize < streamPtr->currentOffset)) { + result = LZ4_compress_generic(streamPtr, source, dest, srcSize, NULL, 0, notLimited, byU32, usingExtDict, dictSmall, 1); + } else { + result = LZ4_compress_generic(streamPtr, source, dest, srcSize, NULL, 0, notLimited, byU32, usingExtDict, noDictIssue, 1); + } + + streamPtr->dictionary = (const BYTE*)source; + streamPtr->dictSize = (U32)srcSize; + + return result; +} + + +/*! LZ4_saveDict() : + * If previously compressed data block is not guaranteed to remain available at its memory location, + * save it into a safer place (char* safeBuffer). + * Note : you don't need to call LZ4_loadDict() afterwards, + * dictionary is immediately usable, you can therefore call LZ4_compress_fast_continue(). + * Return : saved dictionary size in bytes (necessarily <= dictSize), or 0 if error. + */ +int LZ4_saveDict (LZ4_stream_t* LZ4_dict, char* safeBuffer, int dictSize) +{ + LZ4_stream_t_internal* const dict = &LZ4_dict->internal_donotuse; + const BYTE* const previousDictEnd = dict->dictionary + dict->dictSize; + + if ((U32)dictSize > 64 KB) { dictSize = 64 KB; } /* useless to define a dictionary > 64 KB */ + if ((U32)dictSize > dict->dictSize) { dictSize = (int)dict->dictSize; } + + if (safeBuffer == NULL) assert(dictSize == 0); + if (dictSize > 0) + memmove(safeBuffer, previousDictEnd - dictSize, dictSize); + + dict->dictionary = (const BYTE*)safeBuffer; + dict->dictSize = (U32)dictSize; + + return dictSize; +} + + + +/*-******************************* + * Decompression functions + ********************************/ + +typedef enum { endOnOutputSize = 0, endOnInputSize = 1 } endCondition_directive; +typedef enum { decode_full_block = 0, partial_decode = 1 } earlyEnd_directive; + +#undef MIN +#define MIN(a,b) ( (a) < (b) ? (a) : (b) ) + +/* Read the variable-length literal or match length. + * + * ip - pointer to use as input. + * lencheck - end ip. Return an error if ip advances >= lencheck. + * loop_check - check ip >= lencheck in body of loop. Returns loop_error if so. + * initial_check - check ip >= lencheck before start of loop. Returns initial_error if so. + * error (output) - error code. Should be set to 0 before call. + */ +typedef enum { loop_error = -2, initial_error = -1, ok = 0 } variable_length_error; +LZ4_FORCE_INLINE unsigned +read_variable_length(const BYTE**ip, const BYTE* lencheck, + int loop_check, int initial_check, + variable_length_error* error) +{ + U32 length = 0; + U32 s; + if (initial_check && unlikely((*ip) >= lencheck)) { /* overflow detection */ + *error = initial_error; + return length; + } + do { + s = **ip; + (*ip)++; + length += s; + if (loop_check && unlikely((*ip) >= lencheck)) { /* overflow detection */ + *error = loop_error; + return length; + } + } while (s==255); + + return length; +} + +/*! LZ4_decompress_generic() : + * This generic decompression function covers all use cases. + * It shall be instantiated several times, using different sets of directives. + * Note that it is important for performance that this function really get inlined, + * in order to remove useless branches during compilation optimization. + */ +LZ4_FORCE_INLINE int +LZ4_decompress_generic( + const char* const src, + char* const dst, + int srcSize, + int outputSize, /* If endOnInput==endOnInputSize, this value is `dstCapacity` */ + + endCondition_directive endOnInput, /* endOnOutputSize, endOnInputSize */ + earlyEnd_directive partialDecoding, /* full, partial */ + dict_directive dict, /* noDict, withPrefix64k, usingExtDict */ + const BYTE* const lowPrefix, /* always <= dst, == dst when no prefix */ + const BYTE* const dictStart, /* only if dict==usingExtDict */ + const size_t dictSize /* note : = 0 if noDict */ + ) +{ + if (src == NULL) { return -1; } + + { const BYTE* ip = (const BYTE*) src; + const BYTE* const iend = ip + srcSize; + + BYTE* op = (BYTE*) dst; + BYTE* const oend = op + outputSize; + BYTE* cpy; + + const BYTE* const dictEnd = (dictStart == NULL) ? NULL : dictStart + dictSize; + + const int safeDecode = (endOnInput==endOnInputSize); + const int checkOffset = ((safeDecode) && (dictSize < (int)(64 KB))); + + + /* Set up the "end" pointers for the shortcut. */ + const BYTE* const shortiend = iend - (endOnInput ? 14 : 8) /*maxLL*/ - 2 /*offset*/; + const BYTE* const shortoend = oend - (endOnInput ? 14 : 8) /*maxLL*/ - 18 /*maxML*/; + + const BYTE* match; + size_t offset; + unsigned token; + size_t length; + + + DEBUGLOG(5, "LZ4_decompress_generic (srcSize:%i, dstSize:%i)", srcSize, outputSize); + + /* Special cases */ + assert(lowPrefix <= op); + if ((endOnInput) && (unlikely(outputSize==0))) { + /* Empty output buffer */ + if (partialDecoding) return 0; + return ((srcSize==1) && (*ip==0)) ? 0 : -1; + } + if ((!endOnInput) && (unlikely(outputSize==0))) { return (*ip==0 ? 1 : -1); } + if ((endOnInput) && unlikely(srcSize==0)) { return -1; } + + /* Currently the fast loop shows a regression on qualcomm arm chips. */ +#if LZ4_FAST_DEC_LOOP + if ((oend - op) < FASTLOOP_SAFE_DISTANCE) { + DEBUGLOG(6, "skip fast decode loop"); + goto safe_decode; + } + + /* Fast loop : decode sequences as long as output < iend-FASTLOOP_SAFE_DISTANCE */ + while (1) { + /* Main fastloop assertion: We can always wildcopy FASTLOOP_SAFE_DISTANCE */ + assert(oend - op >= FASTLOOP_SAFE_DISTANCE); + if (endOnInput) { assert(ip < iend); } + token = *ip++; + length = token >> ML_BITS; /* literal length */ + + assert(!endOnInput || ip <= iend); /* ip < iend before the increment */ + + /* decode literal length */ + if (length == RUN_MASK) { + variable_length_error error = ok; + length += read_variable_length(&ip, iend-RUN_MASK, (int)endOnInput, (int)endOnInput, &error); + if (error == initial_error) { goto _output_error; } + if ((safeDecode) && unlikely((uptrval)(op)+length<(uptrval)(op))) { goto _output_error; } /* overflow detection */ + if ((safeDecode) && unlikely((uptrval)(ip)+length<(uptrval)(ip))) { goto _output_error; } /* overflow detection */ + + /* copy literals */ + cpy = op+length; + LZ4_STATIC_ASSERT(MFLIMIT >= WILDCOPYLENGTH); + if (endOnInput) { /* LZ4_decompress_safe() */ + if ((cpy>oend-32) || (ip+length>iend-32)) { goto safe_literal_copy; } + LZ4_wildCopy32(op, ip, cpy); + } else { /* LZ4_decompress_fast() */ + if (cpy>oend-8) { goto safe_literal_copy; } + LZ4_wildCopy8(op, ip, cpy); /* LZ4_decompress_fast() cannot copy more than 8 bytes at a time : + * it doesn't know input length, and only relies on end-of-block properties */ + } + ip += length; op = cpy; + } else { + cpy = op+length; + if (endOnInput) { /* LZ4_decompress_safe() */ + DEBUGLOG(7, "copy %u bytes in a 16-bytes stripe", (unsigned)length); + /* We don't need to check oend, since we check it once for each loop below */ + if (ip > iend-(16 + 1/*max lit + offset + nextToken*/)) { goto safe_literal_copy; } + /* Literals can only be 14, but hope compilers optimize if we copy by a register size */ + LZ4_memcpy(op, ip, 16); + } else { /* LZ4_decompress_fast() */ + /* LZ4_decompress_fast() cannot copy more than 8 bytes at a time : + * it doesn't know input length, and relies on end-of-block properties */ + LZ4_memcpy(op, ip, 8); + if (length > 8) { LZ4_memcpy(op+8, ip+8, 8); } + } + ip += length; op = cpy; + } + + /* get offset */ + offset = LZ4_readLE16(ip); ip+=2; + match = op - offset; + assert(match <= op); + + /* get matchlength */ + length = token & ML_MASK; + + if (length == ML_MASK) { + variable_length_error error = ok; + if ((checkOffset) && (unlikely(match + dictSize < lowPrefix))) { goto _output_error; } /* Error : offset outside buffers */ + length += read_variable_length(&ip, iend - LASTLITERALS + 1, (int)endOnInput, 0, &error); + if (error != ok) { goto _output_error; } + if ((safeDecode) && unlikely((uptrval)(op)+length<(uptrval)op)) { goto _output_error; } /* overflow detection */ + length += MINMATCH; + if (op + length >= oend - FASTLOOP_SAFE_DISTANCE) { + goto safe_match_copy; + } + } else { + length += MINMATCH; + if (op + length >= oend - FASTLOOP_SAFE_DISTANCE) { + goto safe_match_copy; + } + + /* Fastpath check: Avoids a branch in LZ4_wildCopy32 if true */ + if ((dict == withPrefix64k) || (match >= lowPrefix)) { + if (offset >= 8) { + assert(match >= lowPrefix); + assert(match <= op); + assert(op + 18 <= oend); + + LZ4_memcpy(op, match, 8); + LZ4_memcpy(op+8, match+8, 8); + LZ4_memcpy(op+16, match+16, 2); + op += length; + continue; + } } } + + if (checkOffset && (unlikely(match + dictSize < lowPrefix))) { goto _output_error; } /* Error : offset outside buffers */ + /* match starting within external dictionary */ + if ((dict==usingExtDict) && (match < lowPrefix)) { + if (unlikely(op+length > oend-LASTLITERALS)) { + if (partialDecoding) { + DEBUGLOG(7, "partialDecoding: dictionary match, close to dstEnd"); + length = MIN(length, (size_t)(oend-op)); + } else { + goto _output_error; /* end-of-block condition violated */ + } } + + if (length <= (size_t)(lowPrefix-match)) { + /* match fits entirely within external dictionary : just copy */ + memmove(op, dictEnd - (lowPrefix-match), length); + op += length; + } else { + /* match stretches into both external dictionary and current block */ + size_t const copySize = (size_t)(lowPrefix - match); + size_t const restSize = length - copySize; + LZ4_memcpy(op, dictEnd - copySize, copySize); + op += copySize; + if (restSize > (size_t)(op - lowPrefix)) { /* overlap copy */ + BYTE* const endOfMatch = op + restSize; + const BYTE* copyFrom = lowPrefix; + while (op < endOfMatch) { *op++ = *copyFrom++; } + } else { + LZ4_memcpy(op, lowPrefix, restSize); + op += restSize; + } } + continue; + } + + /* copy match within block */ + cpy = op + length; + + assert((op <= oend) && (oend-op >= 32)); + if (unlikely(offset<16)) { + LZ4_memcpy_using_offset(op, match, cpy, offset); + } else { + LZ4_wildCopy32(op, match, cpy); + } + + op = cpy; /* wildcopy correction */ + } + safe_decode: +#endif + + /* Main Loop : decode remaining sequences where output < FASTLOOP_SAFE_DISTANCE */ + while (1) { + token = *ip++; + length = token >> ML_BITS; /* literal length */ + + assert(!endOnInput || ip <= iend); /* ip < iend before the increment */ + + /* A two-stage shortcut for the most common case: + * 1) If the literal length is 0..14, and there is enough space, + * enter the shortcut and copy 16 bytes on behalf of the literals + * (in the fast mode, only 8 bytes can be safely copied this way). + * 2) Further if the match length is 4..18, copy 18 bytes in a similar + * manner; but we ensure that there's enough space in the output for + * those 18 bytes earlier, upon entering the shortcut (in other words, + * there is a combined check for both stages). + */ + if ( (endOnInput ? length != RUN_MASK : length <= 8) + /* strictly "less than" on input, to re-enter the loop with at least one byte */ + && likely((endOnInput ? ip < shortiend : 1) & (op <= shortoend)) ) { + /* Copy the literals */ + LZ4_memcpy(op, ip, endOnInput ? 16 : 8); + op += length; ip += length; + + /* The second stage: prepare for match copying, decode full info. + * If it doesn't work out, the info won't be wasted. */ + length = token & ML_MASK; /* match length */ + offset = LZ4_readLE16(ip); ip += 2; + match = op - offset; + assert(match <= op); /* check overflow */ + + /* Do not deal with overlapping matches. */ + if ( (length != ML_MASK) + && (offset >= 8) + && (dict==withPrefix64k || match >= lowPrefix) ) { + /* Copy the match. */ + LZ4_memcpy(op + 0, match + 0, 8); + LZ4_memcpy(op + 8, match + 8, 8); + LZ4_memcpy(op +16, match +16, 2); + op += length + MINMATCH; + /* Both stages worked, load the next token. */ + continue; + } + + /* The second stage didn't work out, but the info is ready. + * Propel it right to the point of match copying. */ + goto _copy_match; + } + + /* decode literal length */ + if (length == RUN_MASK) { + variable_length_error error = ok; + length += read_variable_length(&ip, iend-RUN_MASK, (int)endOnInput, (int)endOnInput, &error); + if (error == initial_error) { goto _output_error; } + if ((safeDecode) && unlikely((uptrval)(op)+length<(uptrval)(op))) { goto _output_error; } /* overflow detection */ + if ((safeDecode) && unlikely((uptrval)(ip)+length<(uptrval)(ip))) { goto _output_error; } /* overflow detection */ + } + + /* copy literals */ + cpy = op+length; +#if LZ4_FAST_DEC_LOOP + safe_literal_copy: +#endif + LZ4_STATIC_ASSERT(MFLIMIT >= WILDCOPYLENGTH); + if ( ((endOnInput) && ((cpy>oend-MFLIMIT) || (ip+length>iend-(2+1+LASTLITERALS))) ) + || ((!endOnInput) && (cpy>oend-WILDCOPYLENGTH)) ) + { + /* We've either hit the input parsing restriction or the output parsing restriction. + * In the normal scenario, decoding a full block, it must be the last sequence, + * otherwise it's an error (invalid input or dimensions). + * In partialDecoding scenario, it's necessary to ensure there is no buffer overflow. + */ + if (partialDecoding) { + /* Since we are partial decoding we may be in this block because of the output parsing + * restriction, which is not valid since the output buffer is allowed to be undersized. + */ + assert(endOnInput); + DEBUGLOG(7, "partialDecoding: copying literals, close to input or output end") + DEBUGLOG(7, "partialDecoding: literal length = %u", (unsigned)length); + DEBUGLOG(7, "partialDecoding: remaining space in dstBuffer : %i", (int)(oend - op)); + DEBUGLOG(7, "partialDecoding: remaining space in srcBuffer : %i", (int)(iend - ip)); + /* Finishing in the middle of a literals segment, + * due to lack of input. + */ + if (ip+length > iend) { + length = (size_t)(iend-ip); + cpy = op + length; + } + /* Finishing in the middle of a literals segment, + * due to lack of output space. + */ + if (cpy > oend) { + cpy = oend; + assert(op<=oend); + length = (size_t)(oend-op); + } + } else { + /* We must be on the last sequence because of the parsing limitations so check + * that we exactly regenerate the original size (must be exact when !endOnInput). + */ + if ((!endOnInput) && (cpy != oend)) { goto _output_error; } + /* We must be on the last sequence (or invalid) because of the parsing limitations + * so check that we exactly consume the input and don't overrun the output buffer. + */ + if ((endOnInput) && ((ip+length != iend) || (cpy > oend))) { + DEBUGLOG(6, "should have been last run of literals") + DEBUGLOG(6, "ip(%p) + length(%i) = %p != iend (%p)", ip, (int)length, ip+length, iend); + DEBUGLOG(6, "or cpy(%p) > oend(%p)", cpy, oend); + goto _output_error; + } + } + memmove(op, ip, length); /* supports overlapping memory regions; only matters for in-place decompression scenarios */ + ip += length; + op += length; + /* Necessarily EOF when !partialDecoding. + * When partialDecoding, it is EOF if we've either + * filled the output buffer or + * can't proceed with reading an offset for following match. + */ + if (!partialDecoding || (cpy == oend) || (ip >= (iend-2))) { + break; + } + } else { + LZ4_wildCopy8(op, ip, cpy); /* may overwrite up to WILDCOPYLENGTH beyond cpy */ + ip += length; op = cpy; + } + + /* get offset */ + offset = LZ4_readLE16(ip); ip+=2; + match = op - offset; + + /* get matchlength */ + length = token & ML_MASK; + + _copy_match: + if (length == ML_MASK) { + variable_length_error error = ok; + length += read_variable_length(&ip, iend - LASTLITERALS + 1, (int)endOnInput, 0, &error); + if (error != ok) goto _output_error; + if ((safeDecode) && unlikely((uptrval)(op)+length<(uptrval)op)) goto _output_error; /* overflow detection */ + } + length += MINMATCH; + +#if LZ4_FAST_DEC_LOOP + safe_match_copy: +#endif + if ((checkOffset) && (unlikely(match + dictSize < lowPrefix))) goto _output_error; /* Error : offset outside buffers */ + /* match starting within external dictionary */ + if ((dict==usingExtDict) && (match < lowPrefix)) { + if (unlikely(op+length > oend-LASTLITERALS)) { + if (partialDecoding) length = MIN(length, (size_t)(oend-op)); + else goto _output_error; /* doesn't respect parsing restriction */ + } + + if (length <= (size_t)(lowPrefix-match)) { + /* match fits entirely within external dictionary : just copy */ + memmove(op, dictEnd - (lowPrefix-match), length); + op += length; + } else { + /* match stretches into both external dictionary and current block */ + size_t const copySize = (size_t)(lowPrefix - match); + size_t const restSize = length - copySize; + LZ4_memcpy(op, dictEnd - copySize, copySize); + op += copySize; + if (restSize > (size_t)(op - lowPrefix)) { /* overlap copy */ + BYTE* const endOfMatch = op + restSize; + const BYTE* copyFrom = lowPrefix; + while (op < endOfMatch) *op++ = *copyFrom++; + } else { + LZ4_memcpy(op, lowPrefix, restSize); + op += restSize; + } } + continue; + } + assert(match >= lowPrefix); + + /* copy match within block */ + cpy = op + length; + + /* partialDecoding : may end anywhere within the block */ + assert(op<=oend); + if (partialDecoding && (cpy > oend-MATCH_SAFEGUARD_DISTANCE)) { + size_t const mlen = MIN(length, (size_t)(oend-op)); + const BYTE* const matchEnd = match + mlen; + BYTE* const copyEnd = op + mlen; + if (matchEnd > op) { /* overlap copy */ + while (op < copyEnd) { *op++ = *match++; } + } else { + LZ4_memcpy(op, match, mlen); + } + op = copyEnd; + if (op == oend) { break; } + continue; + } + + if (unlikely(offset<8)) { + LZ4_write32(op, 0); /* silence msan warning when offset==0 */ + op[0] = match[0]; + op[1] = match[1]; + op[2] = match[2]; + op[3] = match[3]; + match += inc32table[offset]; + LZ4_memcpy(op+4, match, 4); + match -= dec64table[offset]; + } else { + LZ4_memcpy(op, match, 8); + match += 8; + } + op += 8; + + if (unlikely(cpy > oend-MATCH_SAFEGUARD_DISTANCE)) { + BYTE* const oCopyLimit = oend - (WILDCOPYLENGTH-1); + if (cpy > oend-LASTLITERALS) { goto _output_error; } /* Error : last LASTLITERALS bytes must be literals (uncompressed) */ + if (op < oCopyLimit) { + LZ4_wildCopy8(op, match, oCopyLimit); + match += oCopyLimit - op; + op = oCopyLimit; + } + while (op < cpy) { *op++ = *match++; } + } else { + LZ4_memcpy(op, match, 8); + if (length > 16) { LZ4_wildCopy8(op+8, match+8, cpy); } + } + op = cpy; /* wildcopy correction */ + } + + /* end of decoding */ + if (endOnInput) { + DEBUGLOG(5, "decoded %i bytes", (int) (((char*)op)-dst)); + return (int) (((char*)op)-dst); /* Nb of output bytes decoded */ + } else { + return (int) (((const char*)ip)-src); /* Nb of input bytes read */ + } + + /* Overflow error detected */ + _output_error: + return (int) (-(((const char*)ip)-src))-1; + } +} + + +/*===== Instantiate the API decoding functions. =====*/ + +LZ4_FORCE_O2 +int LZ4_decompress_safe(const char* source, char* dest, int compressedSize, int maxDecompressedSize) +{ + return LZ4_decompress_generic(source, dest, compressedSize, maxDecompressedSize, + endOnInputSize, decode_full_block, noDict, + (BYTE*)dest, NULL, 0); +} + +LZ4_FORCE_O2 +int LZ4_decompress_safe_partial(const char* src, char* dst, int compressedSize, int targetOutputSize, int dstCapacity) +{ + dstCapacity = MIN(targetOutputSize, dstCapacity); + return LZ4_decompress_generic(src, dst, compressedSize, dstCapacity, + endOnInputSize, partial_decode, + noDict, (BYTE*)dst, NULL, 0); +} + +LZ4_FORCE_O2 +int LZ4_decompress_fast(const char* source, char* dest, int originalSize) +{ + return LZ4_decompress_generic(source, dest, 0, originalSize, + endOnOutputSize, decode_full_block, withPrefix64k, + (BYTE*)dest - 64 KB, NULL, 0); +} + +/*===== Instantiate a few more decoding cases, used more than once. =====*/ + +LZ4_FORCE_O2 /* Exported, an obsolete API function. */ +int LZ4_decompress_safe_withPrefix64k(const char* source, char* dest, int compressedSize, int maxOutputSize) +{ + return LZ4_decompress_generic(source, dest, compressedSize, maxOutputSize, + endOnInputSize, decode_full_block, withPrefix64k, + (BYTE*)dest - 64 KB, NULL, 0); +} + +/* Another obsolete API function, paired with the previous one. */ +int LZ4_decompress_fast_withPrefix64k(const char* source, char* dest, int originalSize) +{ + /* LZ4_decompress_fast doesn't validate match offsets, + * and thus serves well with any prefixed dictionary. */ + return LZ4_decompress_fast(source, dest, originalSize); +} + +LZ4_FORCE_O2 +static int LZ4_decompress_safe_withSmallPrefix(const char* source, char* dest, int compressedSize, int maxOutputSize, + size_t prefixSize) +{ + return LZ4_decompress_generic(source, dest, compressedSize, maxOutputSize, + endOnInputSize, decode_full_block, noDict, + (BYTE*)dest-prefixSize, NULL, 0); +} + +LZ4_FORCE_O2 +int LZ4_decompress_safe_forceExtDict(const char* source, char* dest, + int compressedSize, int maxOutputSize, + const void* dictStart, size_t dictSize) +{ + return LZ4_decompress_generic(source, dest, compressedSize, maxOutputSize, + endOnInputSize, decode_full_block, usingExtDict, + (BYTE*)dest, (const BYTE*)dictStart, dictSize); +} + +LZ4_FORCE_O2 +static int LZ4_decompress_fast_extDict(const char* source, char* dest, int originalSize, + const void* dictStart, size_t dictSize) +{ + return LZ4_decompress_generic(source, dest, 0, originalSize, + endOnOutputSize, decode_full_block, usingExtDict, + (BYTE*)dest, (const BYTE*)dictStart, dictSize); +} + +/* The "double dictionary" mode, for use with e.g. ring buffers: the first part + * of the dictionary is passed as prefix, and the second via dictStart + dictSize. + * These routines are used only once, in LZ4_decompress_*_continue(). + */ +LZ4_FORCE_INLINE +int LZ4_decompress_safe_doubleDict(const char* source, char* dest, int compressedSize, int maxOutputSize, + size_t prefixSize, const void* dictStart, size_t dictSize) +{ + return LZ4_decompress_generic(source, dest, compressedSize, maxOutputSize, + endOnInputSize, decode_full_block, usingExtDict, + (BYTE*)dest-prefixSize, (const BYTE*)dictStart, dictSize); +} + +LZ4_FORCE_INLINE +int LZ4_decompress_fast_doubleDict(const char* source, char* dest, int originalSize, + size_t prefixSize, const void* dictStart, size_t dictSize) +{ + return LZ4_decompress_generic(source, dest, 0, originalSize, + endOnOutputSize, decode_full_block, usingExtDict, + (BYTE*)dest-prefixSize, (const BYTE*)dictStart, dictSize); +} + +/*===== streaming decompression functions =====*/ + +LZ4_streamDecode_t* LZ4_createStreamDecode(void) +{ + LZ4_streamDecode_t* lz4s = (LZ4_streamDecode_t*) ALLOC_AND_ZERO(sizeof(LZ4_streamDecode_t)); + LZ4_STATIC_ASSERT(LZ4_STREAMDECODESIZE >= sizeof(LZ4_streamDecode_t_internal)); /* A compilation error here means LZ4_STREAMDECODESIZE is not large enough */ + return lz4s; +} + +int LZ4_freeStreamDecode (LZ4_streamDecode_t* LZ4_stream) +{ + if (LZ4_stream == NULL) { return 0; } /* support free on NULL */ + FREEMEM(LZ4_stream); + return 0; +} + +/*! LZ4_setStreamDecode() : + * Use this function to instruct where to find the dictionary. + * This function is not necessary if previous data is still available where it was decoded. + * Loading a size of 0 is allowed (same effect as no dictionary). + * @return : 1 if OK, 0 if error + */ +int LZ4_setStreamDecode (LZ4_streamDecode_t* LZ4_streamDecode, const char* dictionary, int dictSize) +{ + LZ4_streamDecode_t_internal* lz4sd = &LZ4_streamDecode->internal_donotuse; + lz4sd->prefixSize = (size_t) dictSize; + lz4sd->prefixEnd = (const BYTE*) dictionary + dictSize; + lz4sd->externalDict = NULL; + lz4sd->extDictSize = 0; + return 1; +} + +/*! LZ4_decoderRingBufferSize() : + * when setting a ring buffer for streaming decompression (optional scenario), + * provides the minimum size of this ring buffer + * to be compatible with any source respecting maxBlockSize condition. + * Note : in a ring buffer scenario, + * blocks are presumed decompressed next to each other. + * When not enough space remains for next block (remainingSize < maxBlockSize), + * decoding resumes from beginning of ring buffer. + * @return : minimum ring buffer size, + * or 0 if there is an error (invalid maxBlockSize). + */ +int LZ4_decoderRingBufferSize(int maxBlockSize) +{ + if (maxBlockSize < 0) return 0; + if (maxBlockSize > LZ4_MAX_INPUT_SIZE) return 0; + if (maxBlockSize < 16) maxBlockSize = 16; + return LZ4_DECODER_RING_BUFFER_SIZE(maxBlockSize); +} + +/* +*_continue() : + These decoding functions allow decompression of multiple blocks in "streaming" mode. + Previously decoded blocks must still be available at the memory position where they were decoded. + If it's not possible, save the relevant part of decoded data into a safe buffer, + and indicate where it stands using LZ4_setStreamDecode() +*/ +LZ4_FORCE_O2 +int LZ4_decompress_safe_continue (LZ4_streamDecode_t* LZ4_streamDecode, const char* source, char* dest, int compressedSize, int maxOutputSize) +{ + LZ4_streamDecode_t_internal* lz4sd = &LZ4_streamDecode->internal_donotuse; + int result; + + if (lz4sd->prefixSize == 0) { + /* The first call, no dictionary yet. */ + assert(lz4sd->extDictSize == 0); + result = LZ4_decompress_safe(source, dest, compressedSize, maxOutputSize); + if (result <= 0) return result; + lz4sd->prefixSize = (size_t)result; + lz4sd->prefixEnd = (BYTE*)dest + result; + } else if (lz4sd->prefixEnd == (BYTE*)dest) { + /* They're rolling the current segment. */ + if (lz4sd->prefixSize >= 64 KB - 1) + result = LZ4_decompress_safe_withPrefix64k(source, dest, compressedSize, maxOutputSize); + else if (lz4sd->extDictSize == 0) + result = LZ4_decompress_safe_withSmallPrefix(source, dest, compressedSize, maxOutputSize, + lz4sd->prefixSize); + else + result = LZ4_decompress_safe_doubleDict(source, dest, compressedSize, maxOutputSize, + lz4sd->prefixSize, lz4sd->externalDict, lz4sd->extDictSize); + if (result <= 0) return result; + lz4sd->prefixSize += (size_t)result; + lz4sd->prefixEnd += result; + } else { + /* The buffer wraps around, or they're switching to another buffer. */ + lz4sd->extDictSize = lz4sd->prefixSize; + lz4sd->externalDict = lz4sd->prefixEnd - lz4sd->extDictSize; + result = LZ4_decompress_safe_forceExtDict(source, dest, compressedSize, maxOutputSize, + lz4sd->externalDict, lz4sd->extDictSize); + if (result <= 0) return result; + lz4sd->prefixSize = (size_t)result; + lz4sd->prefixEnd = (BYTE*)dest + result; + } + + return result; +} + +LZ4_FORCE_O2 +int LZ4_decompress_fast_continue (LZ4_streamDecode_t* LZ4_streamDecode, const char* source, char* dest, int originalSize) +{ + LZ4_streamDecode_t_internal* lz4sd = &LZ4_streamDecode->internal_donotuse; + int result; + assert(originalSize >= 0); + + if (lz4sd->prefixSize == 0) { + assert(lz4sd->extDictSize == 0); + result = LZ4_decompress_fast(source, dest, originalSize); + if (result <= 0) return result; + lz4sd->prefixSize = (size_t)originalSize; + lz4sd->prefixEnd = (BYTE*)dest + originalSize; + } else if (lz4sd->prefixEnd == (BYTE*)dest) { + if (lz4sd->prefixSize >= 64 KB - 1 || lz4sd->extDictSize == 0) + result = LZ4_decompress_fast(source, dest, originalSize); + else + result = LZ4_decompress_fast_doubleDict(source, dest, originalSize, + lz4sd->prefixSize, lz4sd->externalDict, lz4sd->extDictSize); + if (result <= 0) return result; + lz4sd->prefixSize += (size_t)originalSize; + lz4sd->prefixEnd += originalSize; + } else { + lz4sd->extDictSize = lz4sd->prefixSize; + lz4sd->externalDict = lz4sd->prefixEnd - lz4sd->extDictSize; + result = LZ4_decompress_fast_extDict(source, dest, originalSize, + lz4sd->externalDict, lz4sd->extDictSize); + if (result <= 0) return result; + lz4sd->prefixSize = (size_t)originalSize; + lz4sd->prefixEnd = (BYTE*)dest + originalSize; + } + + return result; +} + + +/* +Advanced decoding functions : +*_usingDict() : + These decoding functions work the same as "_continue" ones, + the dictionary must be explicitly provided within parameters +*/ + +int LZ4_decompress_safe_usingDict(const char* source, char* dest, int compressedSize, int maxOutputSize, const char* dictStart, int dictSize) +{ + if (dictSize==0) + return LZ4_decompress_safe(source, dest, compressedSize, maxOutputSize); + if (dictStart+dictSize == dest) { + if (dictSize >= 64 KB - 1) { + return LZ4_decompress_safe_withPrefix64k(source, dest, compressedSize, maxOutputSize); + } + assert(dictSize >= 0); + return LZ4_decompress_safe_withSmallPrefix(source, dest, compressedSize, maxOutputSize, (size_t)dictSize); + } + assert(dictSize >= 0); + return LZ4_decompress_safe_forceExtDict(source, dest, compressedSize, maxOutputSize, dictStart, (size_t)dictSize); +} + +int LZ4_decompress_fast_usingDict(const char* source, char* dest, int originalSize, const char* dictStart, int dictSize) +{ + if (dictSize==0 || dictStart+dictSize == dest) + return LZ4_decompress_fast(source, dest, originalSize); + assert(dictSize >= 0); + return LZ4_decompress_fast_extDict(source, dest, originalSize, dictStart, (size_t)dictSize); +} + + +/*=************************************************* +* Obsolete Functions +***************************************************/ +/* obsolete compression functions */ +int LZ4_compress_limitedOutput(const char* source, char* dest, int inputSize, int maxOutputSize) +{ + return LZ4_compress_default(source, dest, inputSize, maxOutputSize); +} +int LZ4_compress(const char* src, char* dest, int srcSize) +{ + return LZ4_compress_default(src, dest, srcSize, LZ4_compressBound(srcSize)); +} +int LZ4_compress_limitedOutput_withState (void* state, const char* src, char* dst, int srcSize, int dstSize) +{ + return LZ4_compress_fast_extState(state, src, dst, srcSize, dstSize, 1); +} +int LZ4_compress_withState (void* state, const char* src, char* dst, int srcSize) +{ + return LZ4_compress_fast_extState(state, src, dst, srcSize, LZ4_compressBound(srcSize), 1); +} +int LZ4_compress_limitedOutput_continue (LZ4_stream_t* LZ4_stream, const char* src, char* dst, int srcSize, int dstCapacity) +{ + return LZ4_compress_fast_continue(LZ4_stream, src, dst, srcSize, dstCapacity, 1); +} +int LZ4_compress_continue (LZ4_stream_t* LZ4_stream, const char* source, char* dest, int inputSize) +{ + return LZ4_compress_fast_continue(LZ4_stream, source, dest, inputSize, LZ4_compressBound(inputSize), 1); +} + +/* +These decompression functions are deprecated and should no longer be used. +They are only provided here for compatibility with older user programs. +- LZ4_uncompress is totally equivalent to LZ4_decompress_fast +- LZ4_uncompress_unknownOutputSize is totally equivalent to LZ4_decompress_safe +*/ +int LZ4_uncompress (const char* source, char* dest, int outputSize) +{ + return LZ4_decompress_fast(source, dest, outputSize); +} +int LZ4_uncompress_unknownOutputSize (const char* source, char* dest, int isize, int maxOutputSize) +{ + return LZ4_decompress_safe(source, dest, isize, maxOutputSize); +} + +/* Obsolete Streaming functions */ + +int LZ4_sizeofStreamState(void) { return LZ4_STREAMSIZE; } + +int LZ4_resetStreamState(void* state, char* inputBuffer) +{ + (void)inputBuffer; + LZ4_resetStream((LZ4_stream_t*)state); + return 0; +} + +void* LZ4_create (char* inputBuffer) +{ + (void)inputBuffer; + return LZ4_createStream(); +} + +char* LZ4_slideInputBuffer (void* state) +{ + /* avoid const char * -> char * conversion warning */ + return (char *)(uptrval)((LZ4_stream_t*)state)->internal_donotuse.dictionary; +} + +#endif /* LZ4_COMMONDEFS_ONLY */ + +} diff --git a/3rdparty/tracy/tracy/common/tracy_lz4.hpp b/3rdparty/tracy/tracy/common/tracy_lz4.hpp new file mode 100644 index 0000000..1ccdcff --- /dev/null +++ b/3rdparty/tracy/tracy/common/tracy_lz4.hpp @@ -0,0 +1,777 @@ +/* + * LZ4 - Fast LZ compression algorithm + * Header File + * Copyright (C) 2011-present, Yann Collet. + + BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php) + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are + met: + + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above + copyright notice, this list of conditions and the following disclaimer + in the documentation and/or other materials provided with the + distribution. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + + You can contact the author at : + - LZ4 homepage : http://www.lz4.org + - LZ4 source repository : https://github.com/lz4/lz4 +*/ + +#ifndef TRACY_LZ4_H_2983827168210 +#define TRACY_LZ4_H_2983827168210 + +/* --- Dependency --- */ +#include <stddef.h> /* size_t */ +#include <stdint.h> + + +/** + Introduction + + LZ4 is lossless compression algorithm, providing compression speed >500 MB/s per core, + scalable with multi-cores CPU. It features an extremely fast decoder, with speed in + multiple GB/s per core, typically reaching RAM speed limits on multi-core systems. + + The LZ4 compression library provides in-memory compression and decompression functions. + It gives full buffer control to user. + Compression can be done in: + - a single step (described as Simple Functions) + - a single step, reusing a context (described in Advanced Functions) + - unbounded multiple steps (described as Streaming compression) + + lz4.h generates and decodes LZ4-compressed blocks (doc/lz4_Block_format.md). + Decompressing such a compressed block requires additional metadata. + Exact metadata depends on exact decompression function. + For the typical case of LZ4_decompress_safe(), + metadata includes block's compressed size, and maximum bound of decompressed size. + Each application is free to encode and pass such metadata in whichever way it wants. + + lz4.h only handle blocks, it can not generate Frames. + + Blocks are different from Frames (doc/lz4_Frame_format.md). + Frames bundle both blocks and metadata in a specified manner. + Embedding metadata is required for compressed data to be self-contained and portable. + Frame format is delivered through a companion API, declared in lz4frame.h. + The `lz4` CLI can only manage frames. +*/ + +/*^*************************************************************** +* Export parameters +*****************************************************************/ +/* +* LZ4_DLL_EXPORT : +* Enable exporting of functions when building a Windows DLL +* LZ4LIB_VISIBILITY : +* Control library symbols visibility. +*/ +#ifndef LZ4LIB_VISIBILITY +# if defined(__GNUC__) && (__GNUC__ >= 4) +# define LZ4LIB_VISIBILITY __attribute__ ((visibility ("default"))) +# else +# define LZ4LIB_VISIBILITY +# endif +#endif +#if defined(LZ4_DLL_EXPORT) && (LZ4_DLL_EXPORT==1) +# define LZ4LIB_API __declspec(dllexport) LZ4LIB_VISIBILITY +#elif defined(LZ4_DLL_IMPORT) && (LZ4_DLL_IMPORT==1) +# define LZ4LIB_API __declspec(dllimport) LZ4LIB_VISIBILITY /* It isn't required but allows to generate better code, saving a function pointer load from the IAT and an indirect jump.*/ +#else +# define LZ4LIB_API LZ4LIB_VISIBILITY +#endif + +/*------ Version ------*/ +#define LZ4_VERSION_MAJOR 1 /* for breaking interface changes */ +#define LZ4_VERSION_MINOR 9 /* for new (non-breaking) interface capabilities */ +#define LZ4_VERSION_RELEASE 3 /* for tweaks, bug-fixes, or development */ + +#define LZ4_VERSION_NUMBER (LZ4_VERSION_MAJOR *100*100 + LZ4_VERSION_MINOR *100 + LZ4_VERSION_RELEASE) + +#define LZ4_LIB_VERSION LZ4_VERSION_MAJOR.LZ4_VERSION_MINOR.LZ4_VERSION_RELEASE +#define LZ4_QUOTE(str) #str +#define LZ4_EXPAND_AND_QUOTE(str) LZ4_QUOTE(str) +#define LZ4_VERSION_STRING LZ4_EXPAND_AND_QUOTE(LZ4_LIB_VERSION) + +namespace tracy +{ + +LZ4LIB_API int LZ4_versionNumber (void); /**< library version number; useful to check dll version */ +LZ4LIB_API const char* LZ4_versionString (void); /**< library version string; useful to check dll version */ + + +/*-************************************ +* Tuning parameter +**************************************/ +/*! + * LZ4_MEMORY_USAGE : + * Memory usage formula : N->2^N Bytes (examples : 10 -> 1KB; 12 -> 4KB ; 16 -> 64KB; 20 -> 1MB; etc.) + * Increasing memory usage improves compression ratio. + * Reduced memory usage may improve speed, thanks to better cache locality. + * Default value is 14, for 16KB, which nicely fits into Intel x86 L1 cache + */ +#ifndef LZ4_MEMORY_USAGE +# define LZ4_MEMORY_USAGE 14 +#endif + + +/*-************************************ +* Simple Functions +**************************************/ +/*! LZ4_compress_default() : + * Compresses 'srcSize' bytes from buffer 'src' + * into already allocated 'dst' buffer of size 'dstCapacity'. + * Compression is guaranteed to succeed if 'dstCapacity' >= LZ4_compressBound(srcSize). + * It also runs faster, so it's a recommended setting. + * If the function cannot compress 'src' into a more limited 'dst' budget, + * compression stops *immediately*, and the function result is zero. + * In which case, 'dst' content is undefined (invalid). + * srcSize : max supported value is LZ4_MAX_INPUT_SIZE. + * dstCapacity : size of buffer 'dst' (which must be already allocated) + * @return : the number of bytes written into buffer 'dst' (necessarily <= dstCapacity) + * or 0 if compression fails + * Note : This function is protected against buffer overflow scenarios (never writes outside 'dst' buffer, nor read outside 'source' buffer). + */ +LZ4LIB_API int LZ4_compress_default(const char* src, char* dst, int srcSize, int dstCapacity); + +/*! LZ4_decompress_safe() : + * compressedSize : is the exact complete size of the compressed block. + * dstCapacity : is the size of destination buffer (which must be already allocated), presumed an upper bound of decompressed size. + * @return : the number of bytes decompressed into destination buffer (necessarily <= dstCapacity) + * If destination buffer is not large enough, decoding will stop and output an error code (negative value). + * If the source stream is detected malformed, the function will stop decoding and return a negative result. + * Note 1 : This function is protected against malicious data packets : + * it will never writes outside 'dst' buffer, nor read outside 'source' buffer, + * even if the compressed block is maliciously modified to order the decoder to do these actions. + * In such case, the decoder stops immediately, and considers the compressed block malformed. + * Note 2 : compressedSize and dstCapacity must be provided to the function, the compressed block does not contain them. + * The implementation is free to send / store / derive this information in whichever way is most beneficial. + * If there is a need for a different format which bundles together both compressed data and its metadata, consider looking at lz4frame.h instead. + */ +LZ4LIB_API int LZ4_decompress_safe (const char* src, char* dst, int compressedSize, int dstCapacity); + + +/*-************************************ +* Advanced Functions +**************************************/ +#define LZ4_MAX_INPUT_SIZE 0x7E000000 /* 2 113 929 216 bytes */ +#define LZ4_COMPRESSBOUND(isize) ((unsigned)(isize) > (unsigned)LZ4_MAX_INPUT_SIZE ? 0 : (isize) + ((isize)/255) + 16) + +/*! LZ4_compressBound() : + Provides the maximum size that LZ4 compression may output in a "worst case" scenario (input data not compressible) + This function is primarily useful for memory allocation purposes (destination buffer size). + Macro LZ4_COMPRESSBOUND() is also provided for compilation-time evaluation (stack memory allocation for example). + Note that LZ4_compress_default() compresses faster when dstCapacity is >= LZ4_compressBound(srcSize) + inputSize : max supported value is LZ4_MAX_INPUT_SIZE + return : maximum output size in a "worst case" scenario + or 0, if input size is incorrect (too large or negative) +*/ +LZ4LIB_API int LZ4_compressBound(int inputSize); + +/*! LZ4_compress_fast() : + Same as LZ4_compress_default(), but allows selection of "acceleration" factor. + The larger the acceleration value, the faster the algorithm, but also the lesser the compression. + It's a trade-off. It can be fine tuned, with each successive value providing roughly +~3% to speed. + An acceleration value of "1" is the same as regular LZ4_compress_default() + Values <= 0 will be replaced by LZ4_ACCELERATION_DEFAULT (currently == 1, see lz4.c). + Values > LZ4_ACCELERATION_MAX will be replaced by LZ4_ACCELERATION_MAX (currently == 65537, see lz4.c). +*/ +LZ4LIB_API int LZ4_compress_fast (const char* src, char* dst, int srcSize, int dstCapacity, int acceleration); + + +/*! LZ4_compress_fast_extState() : + * Same as LZ4_compress_fast(), using an externally allocated memory space for its state. + * Use LZ4_sizeofState() to know how much memory must be allocated, + * and allocate it on 8-bytes boundaries (using `malloc()` typically). + * Then, provide this buffer as `void* state` to compression function. + */ +LZ4LIB_API int LZ4_sizeofState(void); +LZ4LIB_API int LZ4_compress_fast_extState (void* state, const char* src, char* dst, int srcSize, int dstCapacity, int acceleration); + + +/*! LZ4_compress_destSize() : + * Reverse the logic : compresses as much data as possible from 'src' buffer + * into already allocated buffer 'dst', of size >= 'targetDestSize'. + * This function either compresses the entire 'src' content into 'dst' if it's large enough, + * or fill 'dst' buffer completely with as much data as possible from 'src'. + * note: acceleration parameter is fixed to "default". + * + * *srcSizePtr : will be modified to indicate how many bytes where read from 'src' to fill 'dst'. + * New value is necessarily <= input value. + * @return : Nb bytes written into 'dst' (necessarily <= targetDestSize) + * or 0 if compression fails. + * + * Note : from v1.8.2 to v1.9.1, this function had a bug (fixed un v1.9.2+): + * the produced compressed content could, in specific circumstances, + * require to be decompressed into a destination buffer larger + * by at least 1 byte than the content to decompress. + * If an application uses `LZ4_compress_destSize()`, + * it's highly recommended to update liblz4 to v1.9.2 or better. + * If this can't be done or ensured, + * the receiving decompression function should provide + * a dstCapacity which is > decompressedSize, by at least 1 byte. + * See https://github.com/lz4/lz4/issues/859 for details + */ +LZ4LIB_API int LZ4_compress_destSize (const char* src, char* dst, int* srcSizePtr, int targetDstSize); + + +/*! LZ4_decompress_safe_partial() : + * Decompress an LZ4 compressed block, of size 'srcSize' at position 'src', + * into destination buffer 'dst' of size 'dstCapacity'. + * Up to 'targetOutputSize' bytes will be decoded. + * The function stops decoding on reaching this objective. + * This can be useful to boost performance + * whenever only the beginning of a block is required. + * + * @return : the number of bytes decoded in `dst` (necessarily <= targetOutputSize) + * If source stream is detected malformed, function returns a negative result. + * + * Note 1 : @return can be < targetOutputSize, if compressed block contains less data. + * + * Note 2 : targetOutputSize must be <= dstCapacity + * + * Note 3 : this function effectively stops decoding on reaching targetOutputSize, + * so dstCapacity is kind of redundant. + * This is because in older versions of this function, + * decoding operation would still write complete sequences. + * Therefore, there was no guarantee that it would stop writing at exactly targetOutputSize, + * it could write more bytes, though only up to dstCapacity. + * Some "margin" used to be required for this operation to work properly. + * Thankfully, this is no longer necessary. + * The function nonetheless keeps the same signature, in an effort to preserve API compatibility. + * + * Note 4 : If srcSize is the exact size of the block, + * then targetOutputSize can be any value, + * including larger than the block's decompressed size. + * The function will, at most, generate block's decompressed size. + * + * Note 5 : If srcSize is _larger_ than block's compressed size, + * then targetOutputSize **MUST** be <= block's decompressed size. + * Otherwise, *silent corruption will occur*. + */ +LZ4LIB_API int LZ4_decompress_safe_partial (const char* src, char* dst, int srcSize, int targetOutputSize, int dstCapacity); + + +/*-********************************************* +* Streaming Compression Functions +***********************************************/ +typedef union LZ4_stream_u LZ4_stream_t; /* incomplete type (defined later) */ + +LZ4LIB_API LZ4_stream_t* LZ4_createStream(void); +LZ4LIB_API int LZ4_freeStream (LZ4_stream_t* streamPtr); + +/*! LZ4_resetStream_fast() : v1.9.0+ + * Use this to prepare an LZ4_stream_t for a new chain of dependent blocks + * (e.g., LZ4_compress_fast_continue()). + * + * An LZ4_stream_t must be initialized once before usage. + * This is automatically done when created by LZ4_createStream(). + * However, should the LZ4_stream_t be simply declared on stack (for example), + * it's necessary to initialize it first, using LZ4_initStream(). + * + * After init, start any new stream with LZ4_resetStream_fast(). + * A same LZ4_stream_t can be re-used multiple times consecutively + * and compress multiple streams, + * provided that it starts each new stream with LZ4_resetStream_fast(). + * + * LZ4_resetStream_fast() is much faster than LZ4_initStream(), + * but is not compatible with memory regions containing garbage data. + * + * Note: it's only useful to call LZ4_resetStream_fast() + * in the context of streaming compression. + * The *extState* functions perform their own resets. + * Invoking LZ4_resetStream_fast() before is redundant, and even counterproductive. + */ +LZ4LIB_API void LZ4_resetStream_fast (LZ4_stream_t* streamPtr); + +/*! LZ4_loadDict() : + * Use this function to reference a static dictionary into LZ4_stream_t. + * The dictionary must remain available during compression. + * LZ4_loadDict() triggers a reset, so any previous data will be forgotten. + * The same dictionary will have to be loaded on decompression side for successful decoding. + * Dictionary are useful for better compression of small data (KB range). + * While LZ4 accept any input as dictionary, + * results are generally better when using Zstandard's Dictionary Builder. + * Loading a size of 0 is allowed, and is the same as reset. + * @return : loaded dictionary size, in bytes (necessarily <= 64 KB) + */ +LZ4LIB_API int LZ4_loadDict (LZ4_stream_t* streamPtr, const char* dictionary, int dictSize); + +/*! LZ4_compress_fast_continue() : + * Compress 'src' content using data from previously compressed blocks, for better compression ratio. + * 'dst' buffer must be already allocated. + * If dstCapacity >= LZ4_compressBound(srcSize), compression is guaranteed to succeed, and runs faster. + * + * @return : size of compressed block + * or 0 if there is an error (typically, cannot fit into 'dst'). + * + * Note 1 : Each invocation to LZ4_compress_fast_continue() generates a new block. + * Each block has precise boundaries. + * Each block must be decompressed separately, calling LZ4_decompress_*() with relevant metadata. + * It's not possible to append blocks together and expect a single invocation of LZ4_decompress_*() to decompress them together. + * + * Note 2 : The previous 64KB of source data is __assumed__ to remain present, unmodified, at same address in memory ! + * + * Note 3 : When input is structured as a double-buffer, each buffer can have any size, including < 64 KB. + * Make sure that buffers are separated, by at least one byte. + * This construction ensures that each block only depends on previous block. + * + * Note 4 : If input buffer is a ring-buffer, it can have any size, including < 64 KB. + * + * Note 5 : After an error, the stream status is undefined (invalid), it can only be reset or freed. + */ +LZ4LIB_API int LZ4_compress_fast_continue (LZ4_stream_t* streamPtr, const char* src, char* dst, int srcSize, int dstCapacity, int acceleration); + +/*! LZ4_saveDict() : + * If last 64KB data cannot be guaranteed to remain available at its current memory location, + * save it into a safer place (char* safeBuffer). + * This is schematically equivalent to a memcpy() followed by LZ4_loadDict(), + * but is much faster, because LZ4_saveDict() doesn't need to rebuild tables. + * @return : saved dictionary size in bytes (necessarily <= maxDictSize), or 0 if error. + */ +LZ4LIB_API int LZ4_saveDict (LZ4_stream_t* streamPtr, char* safeBuffer, int maxDictSize); + + +/*-********************************************** +* Streaming Decompression Functions +* Bufferless synchronous API +************************************************/ +typedef union LZ4_streamDecode_u LZ4_streamDecode_t; /* tracking context */ + +/*! LZ4_createStreamDecode() and LZ4_freeStreamDecode() : + * creation / destruction of streaming decompression tracking context. + * A tracking context can be re-used multiple times. + */ +LZ4LIB_API LZ4_streamDecode_t* LZ4_createStreamDecode(void); +LZ4LIB_API int LZ4_freeStreamDecode (LZ4_streamDecode_t* LZ4_stream); + +/*! LZ4_setStreamDecode() : + * An LZ4_streamDecode_t context can be allocated once and re-used multiple times. + * Use this function to start decompression of a new stream of blocks. + * A dictionary can optionally be set. Use NULL or size 0 for a reset order. + * Dictionary is presumed stable : it must remain accessible and unmodified during next decompression. + * @return : 1 if OK, 0 if error + */ +LZ4LIB_API int LZ4_setStreamDecode (LZ4_streamDecode_t* LZ4_streamDecode, const char* dictionary, int dictSize); + +/*! LZ4_decoderRingBufferSize() : v1.8.2+ + * Note : in a ring buffer scenario (optional), + * blocks are presumed decompressed next to each other + * up to the moment there is not enough remaining space for next block (remainingSize < maxBlockSize), + * at which stage it resumes from beginning of ring buffer. + * When setting such a ring buffer for streaming decompression, + * provides the minimum size of this ring buffer + * to be compatible with any source respecting maxBlockSize condition. + * @return : minimum ring buffer size, + * or 0 if there is an error (invalid maxBlockSize). + */ +LZ4LIB_API int LZ4_decoderRingBufferSize(int maxBlockSize); +#define LZ4_DECODER_RING_BUFFER_SIZE(maxBlockSize) (65536 + 14 + (maxBlockSize)) /* for static allocation; maxBlockSize presumed valid */ + +/*! LZ4_decompress_*_continue() : + * These decoding functions allow decompression of consecutive blocks in "streaming" mode. + * A block is an unsplittable entity, it must be presented entirely to a decompression function. + * Decompression functions only accepts one block at a time. + * The last 64KB of previously decoded data *must* remain available and unmodified at the memory position where they were decoded. + * If less than 64KB of data has been decoded, all the data must be present. + * + * Special : if decompression side sets a ring buffer, it must respect one of the following conditions : + * - Decompression buffer size is _at least_ LZ4_decoderRingBufferSize(maxBlockSize). + * maxBlockSize is the maximum size of any single block. It can have any value > 16 bytes. + * In which case, encoding and decoding buffers do not need to be synchronized. + * Actually, data can be produced by any source compliant with LZ4 format specification, and respecting maxBlockSize. + * - Synchronized mode : + * Decompression buffer size is _exactly_ the same as compression buffer size, + * and follows exactly same update rule (block boundaries at same positions), + * and decoding function is provided with exact decompressed size of each block (exception for last block of the stream), + * _then_ decoding & encoding ring buffer can have any size, including small ones ( < 64 KB). + * - Decompression buffer is larger than encoding buffer, by a minimum of maxBlockSize more bytes. + * In which case, encoding and decoding buffers do not need to be synchronized, + * and encoding ring buffer can have any size, including small ones ( < 64 KB). + * + * Whenever these conditions are not possible, + * save the last 64KB of decoded data into a safe buffer where it can't be modified during decompression, + * then indicate where this data is saved using LZ4_setStreamDecode(), before decompressing next block. +*/ +LZ4LIB_API int LZ4_decompress_safe_continue (LZ4_streamDecode_t* LZ4_streamDecode, const char* src, char* dst, int srcSize, int dstCapacity); + + +/*! LZ4_decompress_*_usingDict() : + * These decoding functions work the same as + * a combination of LZ4_setStreamDecode() followed by LZ4_decompress_*_continue() + * They are stand-alone, and don't need an LZ4_streamDecode_t structure. + * Dictionary is presumed stable : it must remain accessible and unmodified during decompression. + * Performance tip : Decompression speed can be substantially increased + * when dst == dictStart + dictSize. + */ +LZ4LIB_API int LZ4_decompress_safe_usingDict (const char* src, char* dst, int srcSize, int dstCapcity, const char* dictStart, int dictSize); + +} + +#endif /* LZ4_H_2983827168210 */ + + +/*^************************************* + * !!!!!! STATIC LINKING ONLY !!!!!! + ***************************************/ + +/*-**************************************************************************** + * Experimental section + * + * Symbols declared in this section must be considered unstable. Their + * signatures or semantics may change, or they may be removed altogether in the + * future. They are therefore only safe to depend on when the caller is + * statically linked against the library. + * + * To protect against unsafe usage, not only are the declarations guarded, + * the definitions are hidden by default + * when building LZ4 as a shared/dynamic library. + * + * In order to access these declarations, + * define LZ4_STATIC_LINKING_ONLY in your application + * before including LZ4's headers. + * + * In order to make their implementations accessible dynamically, you must + * define LZ4_PUBLISH_STATIC_FUNCTIONS when building the LZ4 library. + ******************************************************************************/ + +#ifdef LZ4_STATIC_LINKING_ONLY + +#ifndef TRACY_LZ4_STATIC_3504398509 +#define TRACY_LZ4_STATIC_3504398509 + +#ifdef LZ4_PUBLISH_STATIC_FUNCTIONS +#define LZ4LIB_STATIC_API LZ4LIB_API +#else +#define LZ4LIB_STATIC_API +#endif + +namespace tracy +{ + +/*! LZ4_compress_fast_extState_fastReset() : + * A variant of LZ4_compress_fast_extState(). + * + * Using this variant avoids an expensive initialization step. + * It is only safe to call if the state buffer is known to be correctly initialized already + * (see above comment on LZ4_resetStream_fast() for a definition of "correctly initialized"). + * From a high level, the difference is that + * this function initializes the provided state with a call to something like LZ4_resetStream_fast() + * while LZ4_compress_fast_extState() starts with a call to LZ4_resetStream(). + */ +LZ4LIB_STATIC_API int LZ4_compress_fast_extState_fastReset (void* state, const char* src, char* dst, int srcSize, int dstCapacity, int acceleration); + +/*! LZ4_attach_dictionary() : + * This is an experimental API that allows + * efficient use of a static dictionary many times. + * + * Rather than re-loading the dictionary buffer into a working context before + * each compression, or copying a pre-loaded dictionary's LZ4_stream_t into a + * working LZ4_stream_t, this function introduces a no-copy setup mechanism, + * in which the working stream references the dictionary stream in-place. + * + * Several assumptions are made about the state of the dictionary stream. + * Currently, only streams which have been prepared by LZ4_loadDict() should + * be expected to work. + * + * Alternatively, the provided dictionaryStream may be NULL, + * in which case any existing dictionary stream is unset. + * + * If a dictionary is provided, it replaces any pre-existing stream history. + * The dictionary contents are the only history that can be referenced and + * logically immediately precede the data compressed in the first subsequent + * compression call. + * + * The dictionary will only remain attached to the working stream through the + * first compression call, at the end of which it is cleared. The dictionary + * stream (and source buffer) must remain in-place / accessible / unchanged + * through the completion of the first compression call on the stream. + */ +LZ4LIB_STATIC_API void LZ4_attach_dictionary(LZ4_stream_t* workingStream, const LZ4_stream_t* dictionaryStream); + + +/*! In-place compression and decompression + * + * It's possible to have input and output sharing the same buffer, + * for highly contrained memory environments. + * In both cases, it requires input to lay at the end of the buffer, + * and decompression to start at beginning of the buffer. + * Buffer size must feature some margin, hence be larger than final size. + * + * |<------------------------buffer--------------------------------->| + * |<-----------compressed data--------->| + * |<-----------decompressed size------------------>| + * |<----margin---->| + * + * This technique is more useful for decompression, + * since decompressed size is typically larger, + * and margin is short. + * + * In-place decompression will work inside any buffer + * which size is >= LZ4_DECOMPRESS_INPLACE_BUFFER_SIZE(decompressedSize). + * This presumes that decompressedSize > compressedSize. + * Otherwise, it means compression actually expanded data, + * and it would be more efficient to store such data with a flag indicating it's not compressed. + * This can happen when data is not compressible (already compressed, or encrypted). + * + * For in-place compression, margin is larger, as it must be able to cope with both + * history preservation, requiring input data to remain unmodified up to LZ4_DISTANCE_MAX, + * and data expansion, which can happen when input is not compressible. + * As a consequence, buffer size requirements are much higher, + * and memory savings offered by in-place compression are more limited. + * + * There are ways to limit this cost for compression : + * - Reduce history size, by modifying LZ4_DISTANCE_MAX. + * Note that it is a compile-time constant, so all compressions will apply this limit. + * Lower values will reduce compression ratio, except when input_size < LZ4_DISTANCE_MAX, + * so it's a reasonable trick when inputs are known to be small. + * - Require the compressor to deliver a "maximum compressed size". + * This is the `dstCapacity` parameter in `LZ4_compress*()`. + * When this size is < LZ4_COMPRESSBOUND(inputSize), then compression can fail, + * in which case, the return code will be 0 (zero). + * The caller must be ready for these cases to happen, + * and typically design a backup scheme to send data uncompressed. + * The combination of both techniques can significantly reduce + * the amount of margin required for in-place compression. + * + * In-place compression can work in any buffer + * which size is >= (maxCompressedSize) + * with maxCompressedSize == LZ4_COMPRESSBOUND(srcSize) for guaranteed compression success. + * LZ4_COMPRESS_INPLACE_BUFFER_SIZE() depends on both maxCompressedSize and LZ4_DISTANCE_MAX, + * so it's possible to reduce memory requirements by playing with them. + */ + +#define LZ4_DECOMPRESS_INPLACE_MARGIN(compressedSize) (((compressedSize) >> 8) + 32) +#define LZ4_DECOMPRESS_INPLACE_BUFFER_SIZE(decompressedSize) ((decompressedSize) + LZ4_DECOMPRESS_INPLACE_MARGIN(decompressedSize)) /**< note: presumes that compressedSize < decompressedSize. note2: margin is overestimated a bit, since it could use compressedSize instead */ + +#ifndef LZ4_DISTANCE_MAX /* history window size; can be user-defined at compile time */ +# define LZ4_DISTANCE_MAX 65535 /* set to maximum value by default */ +#endif + +#define LZ4_COMPRESS_INPLACE_MARGIN (LZ4_DISTANCE_MAX + 32) /* LZ4_DISTANCE_MAX can be safely replaced by srcSize when it's smaller */ +#define LZ4_COMPRESS_INPLACE_BUFFER_SIZE(maxCompressedSize) ((maxCompressedSize) + LZ4_COMPRESS_INPLACE_MARGIN) /**< maxCompressedSize is generally LZ4_COMPRESSBOUND(inputSize), but can be set to any lower value, with the risk that compression can fail (return code 0(zero)) */ + +} + +#endif /* LZ4_STATIC_3504398509 */ +#endif /* LZ4_STATIC_LINKING_ONLY */ + +#ifndef TRACY_LZ4_H_98237428734687 +#define TRACY_LZ4_H_98237428734687 + +namespace tracy +{ + +/*-************************************************************ + * Private Definitions + ************************************************************** + * Do not use these definitions directly. + * They are only exposed to allow static allocation of `LZ4_stream_t` and `LZ4_streamDecode_t`. + * Accessing members will expose user code to API and/or ABI break in future versions of the library. + **************************************************************/ +#define LZ4_HASHLOG (LZ4_MEMORY_USAGE-2) +#define LZ4_HASHTABLESIZE (1 << LZ4_MEMORY_USAGE) +#define LZ4_HASH_SIZE_U32 (1 << LZ4_HASHLOG) /* required as macro for static allocation */ + +#if defined(__cplusplus) || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */) + typedef int8_t LZ4_i8; + typedef uint8_t LZ4_byte; + typedef uint16_t LZ4_u16; + typedef uint32_t LZ4_u32; +#else + typedef signed char LZ4_i8; + typedef unsigned char LZ4_byte; + typedef unsigned short LZ4_u16; + typedef unsigned int LZ4_u32; +#endif + +typedef struct LZ4_stream_t_internal LZ4_stream_t_internal; +struct LZ4_stream_t_internal { + LZ4_u32 hashTable[LZ4_HASH_SIZE_U32]; + LZ4_u32 currentOffset; + LZ4_u32 tableType; + const LZ4_byte* dictionary; + const LZ4_stream_t_internal* dictCtx; + LZ4_u32 dictSize; +}; + +typedef struct { + const LZ4_byte* externalDict; + size_t extDictSize; + const LZ4_byte* prefixEnd; + size_t prefixSize; +} LZ4_streamDecode_t_internal; + + +/*! LZ4_stream_t : + * Do not use below internal definitions directly ! + * Declare or allocate an LZ4_stream_t instead. + * LZ4_stream_t can also be created using LZ4_createStream(), which is recommended. + * The structure definition can be convenient for static allocation + * (on stack, or as part of larger structure). + * Init this structure with LZ4_initStream() before first use. + * note : only use this definition in association with static linking ! + * this definition is not API/ABI safe, and may change in future versions. + */ +#define LZ4_STREAMSIZE 16416 /* static size, for inter-version compatibility */ +#define LZ4_STREAMSIZE_VOIDP (LZ4_STREAMSIZE / sizeof(void*)) +union LZ4_stream_u { + void* table[LZ4_STREAMSIZE_VOIDP]; + LZ4_stream_t_internal internal_donotuse; +}; /* previously typedef'd to LZ4_stream_t */ + + +/*! LZ4_initStream() : v1.9.0+ + * An LZ4_stream_t structure must be initialized at least once. + * This is automatically done when invoking LZ4_createStream(), + * but it's not when the structure is simply declared on stack (for example). + * + * Use LZ4_initStream() to properly initialize a newly declared LZ4_stream_t. + * It can also initialize any arbitrary buffer of sufficient size, + * and will @return a pointer of proper type upon initialization. + * + * Note : initialization fails if size and alignment conditions are not respected. + * In which case, the function will @return NULL. + * Note2: An LZ4_stream_t structure guarantees correct alignment and size. + * Note3: Before v1.9.0, use LZ4_resetStream() instead + */ +LZ4LIB_API LZ4_stream_t* LZ4_initStream (void* buffer, size_t size); + + +/*! LZ4_streamDecode_t : + * information structure to track an LZ4 stream during decompression. + * init this structure using LZ4_setStreamDecode() before first use. + * note : only use in association with static linking ! + * this definition is not API/ABI safe, + * and may change in a future version ! + */ +#define LZ4_STREAMDECODESIZE_U64 (4 + ((sizeof(void*)==16) ? 2 : 0) /*AS-400*/ ) +#define LZ4_STREAMDECODESIZE (LZ4_STREAMDECODESIZE_U64 * sizeof(unsigned long long)) +union LZ4_streamDecode_u { + unsigned long long table[LZ4_STREAMDECODESIZE_U64]; + LZ4_streamDecode_t_internal internal_donotuse; +} ; /* previously typedef'd to LZ4_streamDecode_t */ + + + +/*-************************************ +* Obsolete Functions +**************************************/ + +/*! Deprecation warnings + * + * Deprecated functions make the compiler generate a warning when invoked. + * This is meant to invite users to update their source code. + * Should deprecation warnings be a problem, it is generally possible to disable them, + * typically with -Wno-deprecated-declarations for gcc + * or _CRT_SECURE_NO_WARNINGS in Visual. + * + * Another method is to define LZ4_DISABLE_DEPRECATE_WARNINGS + * before including the header file. + */ +#ifdef LZ4_DISABLE_DEPRECATE_WARNINGS +# define LZ4_DEPRECATED(message) /* disable deprecation warnings */ +#else +# if defined (__cplusplus) && (__cplusplus >= 201402) /* C++14 or greater */ +# define LZ4_DEPRECATED(message) [[deprecated(message)]] +# elif defined(_MSC_VER) +# define LZ4_DEPRECATED(message) __declspec(deprecated(message)) +# elif defined(__clang__) || (defined(__GNUC__) && (__GNUC__ * 10 + __GNUC_MINOR__ >= 45)) +# define LZ4_DEPRECATED(message) __attribute__((deprecated(message))) +# elif defined(__GNUC__) && (__GNUC__ * 10 + __GNUC_MINOR__ >= 31) +# define LZ4_DEPRECATED(message) __attribute__((deprecated)) +# else +# pragma message("WARNING: LZ4_DEPRECATED needs custom implementation for this compiler") +# define LZ4_DEPRECATED(message) /* disabled */ +# endif +#endif /* LZ4_DISABLE_DEPRECATE_WARNINGS */ + +/*! Obsolete compression functions (since v1.7.3) */ +LZ4_DEPRECATED("use LZ4_compress_default() instead") LZ4LIB_API int LZ4_compress (const char* src, char* dest, int srcSize); +LZ4_DEPRECATED("use LZ4_compress_default() instead") LZ4LIB_API int LZ4_compress_limitedOutput (const char* src, char* dest, int srcSize, int maxOutputSize); +LZ4_DEPRECATED("use LZ4_compress_fast_extState() instead") LZ4LIB_API int LZ4_compress_withState (void* state, const char* source, char* dest, int inputSize); +LZ4_DEPRECATED("use LZ4_compress_fast_extState() instead") LZ4LIB_API int LZ4_compress_limitedOutput_withState (void* state, const char* source, char* dest, int inputSize, int maxOutputSize); +LZ4_DEPRECATED("use LZ4_compress_fast_continue() instead") LZ4LIB_API int LZ4_compress_continue (LZ4_stream_t* LZ4_streamPtr, const char* source, char* dest, int inputSize); +LZ4_DEPRECATED("use LZ4_compress_fast_continue() instead") LZ4LIB_API int LZ4_compress_limitedOutput_continue (LZ4_stream_t* LZ4_streamPtr, const char* source, char* dest, int inputSize, int maxOutputSize); + +/*! Obsolete decompression functions (since v1.8.0) */ +LZ4_DEPRECATED("use LZ4_decompress_fast() instead") LZ4LIB_API int LZ4_uncompress (const char* source, char* dest, int outputSize); +LZ4_DEPRECATED("use LZ4_decompress_safe() instead") LZ4LIB_API int LZ4_uncompress_unknownOutputSize (const char* source, char* dest, int isize, int maxOutputSize); + +/* Obsolete streaming functions (since v1.7.0) + * degraded functionality; do not use! + * + * In order to perform streaming compression, these functions depended on data + * that is no longer tracked in the state. They have been preserved as well as + * possible: using them will still produce a correct output. However, they don't + * actually retain any history between compression calls. The compression ratio + * achieved will therefore be no better than compressing each chunk + * independently. + */ +LZ4_DEPRECATED("Use LZ4_createStream() instead") LZ4LIB_API void* LZ4_create (char* inputBuffer); +LZ4_DEPRECATED("Use LZ4_createStream() instead") LZ4LIB_API int LZ4_sizeofStreamState(void); +LZ4_DEPRECATED("Use LZ4_resetStream() instead") LZ4LIB_API int LZ4_resetStreamState(void* state, char* inputBuffer); +LZ4_DEPRECATED("Use LZ4_saveDict() instead") LZ4LIB_API char* LZ4_slideInputBuffer (void* state); + +/*! Obsolete streaming decoding functions (since v1.7.0) */ +LZ4_DEPRECATED("use LZ4_decompress_safe_usingDict() instead") LZ4LIB_API int LZ4_decompress_safe_withPrefix64k (const char* src, char* dst, int compressedSize, int maxDstSize); +LZ4_DEPRECATED("use LZ4_decompress_fast_usingDict() instead") LZ4LIB_API int LZ4_decompress_fast_withPrefix64k (const char* src, char* dst, int originalSize); + +/*! Obsolete LZ4_decompress_fast variants (since v1.9.0) : + * These functions used to be faster than LZ4_decompress_safe(), + * but this is no longer the case. They are now slower. + * This is because LZ4_decompress_fast() doesn't know the input size, + * and therefore must progress more cautiously into the input buffer to not read beyond the end of block. + * On top of that `LZ4_decompress_fast()` is not protected vs malformed or malicious inputs, making it a security liability. + * As a consequence, LZ4_decompress_fast() is strongly discouraged, and deprecated. + * + * The last remaining LZ4_decompress_fast() specificity is that + * it can decompress a block without knowing its compressed size. + * Such functionality can be achieved in a more secure manner + * by employing LZ4_decompress_safe_partial(). + * + * Parameters: + * originalSize : is the uncompressed size to regenerate. + * `dst` must be already allocated, its size must be >= 'originalSize' bytes. + * @return : number of bytes read from source buffer (== compressed size). + * The function expects to finish at block's end exactly. + * If the source stream is detected malformed, the function stops decoding and returns a negative result. + * note : LZ4_decompress_fast*() requires originalSize. Thanks to this information, it never writes past the output buffer. + * However, since it doesn't know its 'src' size, it may read an unknown amount of input, past input buffer bounds. + * Also, since match offsets are not validated, match reads from 'src' may underflow too. + * These issues never happen if input (compressed) data is correct. + * But they may happen if input data is invalid (error or intentional tampering). + * As a consequence, use these functions in trusted environments with trusted data **only**. + */ +LZ4_DEPRECATED("This function is deprecated and unsafe. Consider using LZ4_decompress_safe() instead") +LZ4LIB_API int LZ4_decompress_fast (const char* src, char* dst, int originalSize); +LZ4_DEPRECATED("This function is deprecated and unsafe. Consider using LZ4_decompress_safe_continue() instead") +LZ4LIB_API int LZ4_decompress_fast_continue (LZ4_streamDecode_t* LZ4_streamDecode, const char* src, char* dst, int originalSize); +LZ4_DEPRECATED("This function is deprecated and unsafe. Consider using LZ4_decompress_safe_usingDict() instead") +LZ4LIB_API int LZ4_decompress_fast_usingDict (const char* src, char* dst, int originalSize, const char* dictStart, int dictSize); + +/*! LZ4_resetStream() : + * An LZ4_stream_t structure must be initialized at least once. + * This is done with LZ4_initStream(), or LZ4_resetStream(). + * Consider switching to LZ4_initStream(), + * invoking LZ4_resetStream() will trigger deprecation warnings in the future. + */ +LZ4LIB_API void LZ4_resetStream (LZ4_stream_t* streamPtr); + +} + +#endif /* LZ4_H_98237428734687 */ diff --git a/3rdparty/tracy/tracy/common/tracy_lz4hc.cpp b/3rdparty/tracy/tracy/common/tracy_lz4hc.cpp new file mode 100644 index 0000000..9c899b8 --- /dev/null +++ b/3rdparty/tracy/tracy/common/tracy_lz4hc.cpp @@ -0,0 +1,1620 @@ +/* + LZ4 HC - High Compression Mode of LZ4 + Copyright (C) 2011-2017, Yann Collet. + + BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php) + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are + met: + + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above + copyright notice, this list of conditions and the following disclaimer + in the documentation and/or other materials provided with the + distribution. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + + You can contact the author at : + - LZ4 source repository : https://github.com/lz4/lz4 + - LZ4 public forum : https://groups.google.com/forum/#!forum/lz4c +*/ +/* note : lz4hc is not an independent module, it requires lz4.h/lz4.c for proper compilation */ + + +/* ************************************* +* Tuning Parameter +***************************************/ + +/*! HEAPMODE : + * Select how default compression function will allocate workplace memory, + * in stack (0:fastest), or in heap (1:requires malloc()). + * Since workplace is rather large, heap mode is recommended. + */ +#ifndef LZ4HC_HEAPMODE +# define LZ4HC_HEAPMODE 1 +#endif + + +/*=== Dependency ===*/ +#define LZ4_HC_STATIC_LINKING_ONLY +#include "tracy_lz4hc.hpp" + + +/*=== Common definitions ===*/ +#if defined(__GNUC__) +# pragma GCC diagnostic ignored "-Wunused-function" +#endif +#if defined (__clang__) +# pragma clang diagnostic ignored "-Wunused-function" +#endif + +#define LZ4_COMMONDEFS_ONLY +#ifndef LZ4_SRC_INCLUDED +#include "tracy_lz4.cpp" /* LZ4_count, constants, mem */ +#endif + + +/*=== Enums ===*/ +typedef enum { noDictCtx, usingDictCtxHc } dictCtx_directive; + + +/*=== Constants ===*/ +#define OPTIMAL_ML (int)((ML_MASK-1)+MINMATCH) +#define LZ4_OPT_NUM (1<<12) + + +/*=== Macros ===*/ +#define MIN(a,b) ( (a) < (b) ? (a) : (b) ) +#define MAX(a,b) ( (a) > (b) ? (a) : (b) ) +#define HASH_FUNCTION(i) (((i) * 2654435761U) >> ((MINMATCH*8)-LZ4HC_HASH_LOG)) +#define DELTANEXTMAXD(p) chainTable[(p) & LZ4HC_MAXD_MASK] /* flexible, LZ4HC_MAXD dependent */ +#define DELTANEXTU16(table, pos) table[(U16)(pos)] /* faster */ +/* Make fields passed to, and updated by LZ4HC_encodeSequence explicit */ +#define UPDATABLE(ip, op, anchor) &ip, &op, &anchor + +namespace tracy +{ + +static U32 LZ4HC_hashPtr(const void* ptr) { return HASH_FUNCTION(LZ4_read32(ptr)); } + + +/************************************** +* HC Compression +**************************************/ +static void LZ4HC_clearTables (LZ4HC_CCtx_internal* hc4) +{ + MEM_INIT(hc4->hashTable, 0, sizeof(hc4->hashTable)); + MEM_INIT(hc4->chainTable, 0xFF, sizeof(hc4->chainTable)); +} + +static void LZ4HC_init_internal (LZ4HC_CCtx_internal* hc4, const BYTE* start) +{ + uptrval startingOffset = (uptrval)(hc4->end - hc4->base); + if (startingOffset > 1 GB) { + LZ4HC_clearTables(hc4); + startingOffset = 0; + } + startingOffset += 64 KB; + hc4->nextToUpdate = (U32) startingOffset; + hc4->base = start - startingOffset; + hc4->end = start; + hc4->dictBase = start - startingOffset; + hc4->dictLimit = (U32) startingOffset; + hc4->lowLimit = (U32) startingOffset; +} + + +/* Update chains up to ip (excluded) */ +LZ4_FORCE_INLINE void LZ4HC_Insert (LZ4HC_CCtx_internal* hc4, const BYTE* ip) +{ + U16* const chainTable = hc4->chainTable; + U32* const hashTable = hc4->hashTable; + const BYTE* const base = hc4->base; + U32 const target = (U32)(ip - base); + U32 idx = hc4->nextToUpdate; + + while (idx < target) { + U32 const h = LZ4HC_hashPtr(base+idx); + size_t delta = idx - hashTable[h]; + if (delta>LZ4_DISTANCE_MAX) delta = LZ4_DISTANCE_MAX; + DELTANEXTU16(chainTable, idx) = (U16)delta; + hashTable[h] = idx; + idx++; + } + + hc4->nextToUpdate = target; +} + +/** LZ4HC_countBack() : + * @return : negative value, nb of common bytes before ip/match */ +LZ4_FORCE_INLINE +int LZ4HC_countBack(const BYTE* const ip, const BYTE* const match, + const BYTE* const iMin, const BYTE* const mMin) +{ + int back = 0; + int const min = (int)MAX(iMin - ip, mMin - match); + assert(min <= 0); + assert(ip >= iMin); assert((size_t)(ip-iMin) < (1U<<31)); + assert(match >= mMin); assert((size_t)(match - mMin) < (1U<<31)); + while ( (back > min) + && (ip[back-1] == match[back-1]) ) + back--; + return back; +} + +#if defined(_MSC_VER) +# define LZ4HC_rotl32(x,r) _rotl(x,r) +#else +# define LZ4HC_rotl32(x,r) ((x << r) | (x >> (32 - r))) +#endif + + +static U32 LZ4HC_rotatePattern(size_t const rotate, U32 const pattern) +{ + size_t const bitsToRotate = (rotate & (sizeof(pattern) - 1)) << 3; + if (bitsToRotate == 0) return pattern; + return LZ4HC_rotl32(pattern, (int)bitsToRotate); +} + +/* LZ4HC_countPattern() : + * pattern32 must be a sample of repetitive pattern of length 1, 2 or 4 (but not 3!) */ +static unsigned +LZ4HC_countPattern(const BYTE* ip, const BYTE* const iEnd, U32 const pattern32) +{ + const BYTE* const iStart = ip; + reg_t const pattern = (sizeof(pattern)==8) ? + (reg_t)pattern32 + (((reg_t)pattern32) << (sizeof(pattern)*4)) : pattern32; + + while (likely(ip < iEnd-(sizeof(pattern)-1))) { + reg_t const diff = LZ4_read_ARCH(ip) ^ pattern; + if (!diff) { ip+=sizeof(pattern); continue; } + ip += LZ4_NbCommonBytes(diff); + return (unsigned)(ip - iStart); + } + + if (LZ4_isLittleEndian()) { + reg_t patternByte = pattern; + while ((ip<iEnd) && (*ip == (BYTE)patternByte)) { + ip++; patternByte >>= 8; + } + } else { /* big endian */ + U32 bitOffset = (sizeof(pattern)*8) - 8; + while (ip < iEnd) { + BYTE const byte = (BYTE)(pattern >> bitOffset); + if (*ip != byte) break; + ip ++; bitOffset -= 8; + } + } + + return (unsigned)(ip - iStart); +} + +/* LZ4HC_reverseCountPattern() : + * pattern must be a sample of repetitive pattern of length 1, 2 or 4 (but not 3!) + * read using natural platform endianess */ +static unsigned +LZ4HC_reverseCountPattern(const BYTE* ip, const BYTE* const iLow, U32 pattern) +{ + const BYTE* const iStart = ip; + + while (likely(ip >= iLow+4)) { + if (LZ4_read32(ip-4) != pattern) break; + ip -= 4; + } + { const BYTE* bytePtr = (const BYTE*)(&pattern) + 3; /* works for any endianess */ + while (likely(ip>iLow)) { + if (ip[-1] != *bytePtr) break; + ip--; bytePtr--; + } } + return (unsigned)(iStart - ip); +} + +/* LZ4HC_protectDictEnd() : + * Checks if the match is in the last 3 bytes of the dictionary, so reading the + * 4 byte MINMATCH would overflow. + * @returns true if the match index is okay. + */ +static int LZ4HC_protectDictEnd(U32 const dictLimit, U32 const matchIndex) +{ + return ((U32)((dictLimit - 1) - matchIndex) >= 3); +} + +typedef enum { rep_untested, rep_not, rep_confirmed } repeat_state_e; +typedef enum { favorCompressionRatio=0, favorDecompressionSpeed } HCfavor_e; + +LZ4_FORCE_INLINE int +LZ4HC_InsertAndGetWiderMatch ( + LZ4HC_CCtx_internal* hc4, + const BYTE* const ip, + const BYTE* const iLowLimit, + const BYTE* const iHighLimit, + int longest, + const BYTE** matchpos, + const BYTE** startpos, + const int maxNbAttempts, + const int patternAnalysis, + const int chainSwap, + const dictCtx_directive dict, + const HCfavor_e favorDecSpeed) +{ + U16* const chainTable = hc4->chainTable; + U32* const HashTable = hc4->hashTable; + const LZ4HC_CCtx_internal * const dictCtx = hc4->dictCtx; + const BYTE* const base = hc4->base; + const U32 dictLimit = hc4->dictLimit; + const BYTE* const lowPrefixPtr = base + dictLimit; + const U32 ipIndex = (U32)(ip - base); + const U32 lowestMatchIndex = (hc4->lowLimit + (LZ4_DISTANCE_MAX + 1) > ipIndex) ? hc4->lowLimit : ipIndex - LZ4_DISTANCE_MAX; + const BYTE* const dictBase = hc4->dictBase; + int const lookBackLength = (int)(ip-iLowLimit); + int nbAttempts = maxNbAttempts; + U32 matchChainPos = 0; + U32 const pattern = LZ4_read32(ip); + U32 matchIndex; + repeat_state_e repeat = rep_untested; + size_t srcPatternLength = 0; + + DEBUGLOG(7, "LZ4HC_InsertAndGetWiderMatch"); + /* First Match */ + LZ4HC_Insert(hc4, ip); + matchIndex = HashTable[LZ4HC_hashPtr(ip)]; + DEBUGLOG(7, "First match at index %u / %u (lowestMatchIndex)", + matchIndex, lowestMatchIndex); + + while ((matchIndex>=lowestMatchIndex) && (nbAttempts>0)) { + int matchLength=0; + nbAttempts--; + assert(matchIndex < ipIndex); + if (favorDecSpeed && (ipIndex - matchIndex < 8)) { + /* do nothing */ + } else if (matchIndex >= dictLimit) { /* within current Prefix */ + const BYTE* const matchPtr = base + matchIndex; + assert(matchPtr >= lowPrefixPtr); + assert(matchPtr < ip); + assert(longest >= 1); + if (LZ4_read16(iLowLimit + longest - 1) == LZ4_read16(matchPtr - lookBackLength + longest - 1)) { + if (LZ4_read32(matchPtr) == pattern) { + int const back = lookBackLength ? LZ4HC_countBack(ip, matchPtr, iLowLimit, lowPrefixPtr) : 0; + matchLength = MINMATCH + (int)LZ4_count(ip+MINMATCH, matchPtr+MINMATCH, iHighLimit); + matchLength -= back; + if (matchLength > longest) { + longest = matchLength; + *matchpos = matchPtr + back; + *startpos = ip + back; + } } } + } else { /* lowestMatchIndex <= matchIndex < dictLimit */ + const BYTE* const matchPtr = dictBase + matchIndex; + if (LZ4_read32(matchPtr) == pattern) { + const BYTE* const dictStart = dictBase + hc4->lowLimit; + int back = 0; + const BYTE* vLimit = ip + (dictLimit - matchIndex); + if (vLimit > iHighLimit) vLimit = iHighLimit; + matchLength = (int)LZ4_count(ip+MINMATCH, matchPtr+MINMATCH, vLimit) + MINMATCH; + if ((ip+matchLength == vLimit) && (vLimit < iHighLimit)) + matchLength += LZ4_count(ip+matchLength, lowPrefixPtr, iHighLimit); + back = lookBackLength ? LZ4HC_countBack(ip, matchPtr, iLowLimit, dictStart) : 0; + matchLength -= back; + if (matchLength > longest) { + longest = matchLength; + *matchpos = base + matchIndex + back; /* virtual pos, relative to ip, to retrieve offset */ + *startpos = ip + back; + } } } + + if (chainSwap && matchLength==longest) { /* better match => select a better chain */ + assert(lookBackLength==0); /* search forward only */ + if (matchIndex + (U32)longest <= ipIndex) { + int const kTrigger = 4; + U32 distanceToNextMatch = 1; + int const end = longest - MINMATCH + 1; + int step = 1; + int accel = 1 << kTrigger; + int pos; + for (pos = 0; pos < end; pos += step) { + U32 const candidateDist = DELTANEXTU16(chainTable, matchIndex + (U32)pos); + step = (accel++ >> kTrigger); + if (candidateDist > distanceToNextMatch) { + distanceToNextMatch = candidateDist; + matchChainPos = (U32)pos; + accel = 1 << kTrigger; + } + } + if (distanceToNextMatch > 1) { + if (distanceToNextMatch > matchIndex) break; /* avoid overflow */ + matchIndex -= distanceToNextMatch; + continue; + } } } + + { U32 const distNextMatch = DELTANEXTU16(chainTable, matchIndex); + if (patternAnalysis && distNextMatch==1 && matchChainPos==0) { + U32 const matchCandidateIdx = matchIndex-1; + /* may be a repeated pattern */ + if (repeat == rep_untested) { + if ( ((pattern & 0xFFFF) == (pattern >> 16)) + & ((pattern & 0xFF) == (pattern >> 24)) ) { + repeat = rep_confirmed; + srcPatternLength = LZ4HC_countPattern(ip+sizeof(pattern), iHighLimit, pattern) + sizeof(pattern); + } else { + repeat = rep_not; + } } + if ( (repeat == rep_confirmed) && (matchCandidateIdx >= lowestMatchIndex) + && LZ4HC_protectDictEnd(dictLimit, matchCandidateIdx) ) { + const int extDict = matchCandidateIdx < dictLimit; + const BYTE* const matchPtr = (extDict ? dictBase : base) + matchCandidateIdx; + if (LZ4_read32(matchPtr) == pattern) { /* good candidate */ + const BYTE* const dictStart = dictBase + hc4->lowLimit; + const BYTE* const iLimit = extDict ? dictBase + dictLimit : iHighLimit; + size_t forwardPatternLength = LZ4HC_countPattern(matchPtr+sizeof(pattern), iLimit, pattern) + sizeof(pattern); + if (extDict && matchPtr + forwardPatternLength == iLimit) { + U32 const rotatedPattern = LZ4HC_rotatePattern(forwardPatternLength, pattern); + forwardPatternLength += LZ4HC_countPattern(lowPrefixPtr, iHighLimit, rotatedPattern); + } + { const BYTE* const lowestMatchPtr = extDict ? dictStart : lowPrefixPtr; + size_t backLength = LZ4HC_reverseCountPattern(matchPtr, lowestMatchPtr, pattern); + size_t currentSegmentLength; + if (!extDict && matchPtr - backLength == lowPrefixPtr && hc4->lowLimit < dictLimit) { + U32 const rotatedPattern = LZ4HC_rotatePattern((U32)(-(int)backLength), pattern); + backLength += LZ4HC_reverseCountPattern(dictBase + dictLimit, dictStart, rotatedPattern); + } + /* Limit backLength not go further than lowestMatchIndex */ + backLength = matchCandidateIdx - MAX(matchCandidateIdx - (U32)backLength, lowestMatchIndex); + assert(matchCandidateIdx - backLength >= lowestMatchIndex); + currentSegmentLength = backLength + forwardPatternLength; + /* Adjust to end of pattern if the source pattern fits, otherwise the beginning of the pattern */ + if ( (currentSegmentLength >= srcPatternLength) /* current pattern segment large enough to contain full srcPatternLength */ + && (forwardPatternLength <= srcPatternLength) ) { /* haven't reached this position yet */ + U32 const newMatchIndex = matchCandidateIdx + (U32)forwardPatternLength - (U32)srcPatternLength; /* best position, full pattern, might be followed by more match */ + if (LZ4HC_protectDictEnd(dictLimit, newMatchIndex)) + matchIndex = newMatchIndex; + else { + /* Can only happen if started in the prefix */ + assert(newMatchIndex >= dictLimit - 3 && newMatchIndex < dictLimit && !extDict); + matchIndex = dictLimit; + } + } else { + U32 const newMatchIndex = matchCandidateIdx - (U32)backLength; /* farthest position in current segment, will find a match of length currentSegmentLength + maybe some back */ + if (!LZ4HC_protectDictEnd(dictLimit, newMatchIndex)) { + assert(newMatchIndex >= dictLimit - 3 && newMatchIndex < dictLimit && !extDict); + matchIndex = dictLimit; + } else { + matchIndex = newMatchIndex; + if (lookBackLength==0) { /* no back possible */ + size_t const maxML = MIN(currentSegmentLength, srcPatternLength); + if ((size_t)longest < maxML) { + assert(base + matchIndex != ip); + if ((size_t)(ip - base) - matchIndex > LZ4_DISTANCE_MAX) break; + assert(maxML < 2 GB); + longest = (int)maxML; + *matchpos = base + matchIndex; /* virtual pos, relative to ip, to retrieve offset */ + *startpos = ip; + } + { U32 const distToNextPattern = DELTANEXTU16(chainTable, matchIndex); + if (distToNextPattern > matchIndex) break; /* avoid overflow */ + matchIndex -= distToNextPattern; + } } } } } + continue; + } } + } } /* PA optimization */ + + /* follow current chain */ + matchIndex -= DELTANEXTU16(chainTable, matchIndex + matchChainPos); + + } /* while ((matchIndex>=lowestMatchIndex) && (nbAttempts)) */ + + if ( dict == usingDictCtxHc + && nbAttempts > 0 + && ipIndex - lowestMatchIndex < LZ4_DISTANCE_MAX) { + size_t const dictEndOffset = (size_t)(dictCtx->end - dictCtx->base); + U32 dictMatchIndex = dictCtx->hashTable[LZ4HC_hashPtr(ip)]; + assert(dictEndOffset <= 1 GB); + matchIndex = dictMatchIndex + lowestMatchIndex - (U32)dictEndOffset; + while (ipIndex - matchIndex <= LZ4_DISTANCE_MAX && nbAttempts--) { + const BYTE* const matchPtr = dictCtx->base + dictMatchIndex; + + if (LZ4_read32(matchPtr) == pattern) { + int mlt; + int back = 0; + const BYTE* vLimit = ip + (dictEndOffset - dictMatchIndex); + if (vLimit > iHighLimit) vLimit = iHighLimit; + mlt = (int)LZ4_count(ip+MINMATCH, matchPtr+MINMATCH, vLimit) + MINMATCH; + back = lookBackLength ? LZ4HC_countBack(ip, matchPtr, iLowLimit, dictCtx->base + dictCtx->dictLimit) : 0; + mlt -= back; + if (mlt > longest) { + longest = mlt; + *matchpos = base + matchIndex + back; + *startpos = ip + back; + } } + + { U32 const nextOffset = DELTANEXTU16(dictCtx->chainTable, dictMatchIndex); + dictMatchIndex -= nextOffset; + matchIndex -= nextOffset; + } } } + + return longest; +} + +LZ4_FORCE_INLINE +int LZ4HC_InsertAndFindBestMatch(LZ4HC_CCtx_internal* const hc4, /* Index table will be updated */ + const BYTE* const ip, const BYTE* const iLimit, + const BYTE** matchpos, + const int maxNbAttempts, + const int patternAnalysis, + const dictCtx_directive dict) +{ + const BYTE* uselessPtr = ip; + /* note : LZ4HC_InsertAndGetWiderMatch() is able to modify the starting position of a match (*startpos), + * but this won't be the case here, as we define iLowLimit==ip, + * so LZ4HC_InsertAndGetWiderMatch() won't be allowed to search past ip */ + return LZ4HC_InsertAndGetWiderMatch(hc4, ip, ip, iLimit, MINMATCH-1, matchpos, &uselessPtr, maxNbAttempts, patternAnalysis, 0 /*chainSwap*/, dict, favorCompressionRatio); +} + +/* LZ4HC_encodeSequence() : + * @return : 0 if ok, + * 1 if buffer issue detected */ +LZ4_FORCE_INLINE int LZ4HC_encodeSequence ( + const BYTE** _ip, + BYTE** _op, + const BYTE** _anchor, + int matchLength, + const BYTE* const match, + limitedOutput_directive limit, + BYTE* oend) +{ +#define ip (*_ip) +#define op (*_op) +#define anchor (*_anchor) + + size_t length; + BYTE* const token = op++; + +#if defined(LZ4_DEBUG) && (LZ4_DEBUG >= 6) + static const BYTE* start = NULL; + static U32 totalCost = 0; + U32 const pos = (start==NULL) ? 0 : (U32)(anchor - start); + U32 const ll = (U32)(ip - anchor); + U32 const llAdd = (ll>=15) ? ((ll-15) / 255) + 1 : 0; + U32 const mlAdd = (matchLength>=19) ? ((matchLength-19) / 255) + 1 : 0; + U32 const cost = 1 + llAdd + ll + 2 + mlAdd; + if (start==NULL) start = anchor; /* only works for single segment */ + /* g_debuglog_enable = (pos >= 2228) & (pos <= 2262); */ + DEBUGLOG(6, "pos:%7u -- literals:%4u, match:%4i, offset:%5u, cost:%4u + %5u", + pos, + (U32)(ip - anchor), matchLength, (U32)(ip-match), + cost, totalCost); + totalCost += cost; +#endif + + /* Encode Literal length */ + length = (size_t)(ip - anchor); + LZ4_STATIC_ASSERT(notLimited == 0); + /* Check output limit */ + if (limit && ((op + (length / 255) + length + (2 + 1 + LASTLITERALS)) > oend)) { + DEBUGLOG(6, "Not enough room to write %i literals (%i bytes remaining)", + (int)length, (int)(oend - op)); + return 1; + } + if (length >= RUN_MASK) { + size_t len = length - RUN_MASK; + *token = (RUN_MASK << ML_BITS); + for(; len >= 255 ; len -= 255) *op++ = 255; + *op++ = (BYTE)len; + } else { + *token = (BYTE)(length << ML_BITS); + } + + /* Copy Literals */ + LZ4_wildCopy8(op, anchor, op + length); + op += length; + + /* Encode Offset */ + assert( (ip - match) <= LZ4_DISTANCE_MAX ); /* note : consider providing offset as a value, rather than as a pointer difference */ + LZ4_writeLE16(op, (U16)(ip - match)); op += 2; + + /* Encode MatchLength */ + assert(matchLength >= MINMATCH); + length = (size_t)matchLength - MINMATCH; + if (limit && (op + (length / 255) + (1 + LASTLITERALS) > oend)) { + DEBUGLOG(6, "Not enough room to write match length"); + return 1; /* Check output limit */ + } + if (length >= ML_MASK) { + *token += ML_MASK; + length -= ML_MASK; + for(; length >= 510 ; length -= 510) { *op++ = 255; *op++ = 255; } + if (length >= 255) { length -= 255; *op++ = 255; } + *op++ = (BYTE)length; + } else { + *token += (BYTE)(length); + } + + /* Prepare next loop */ + ip += matchLength; + anchor = ip; + + return 0; +} +#undef ip +#undef op +#undef anchor + +LZ4_FORCE_INLINE int LZ4HC_compress_hashChain ( + LZ4HC_CCtx_internal* const ctx, + const char* const source, + char* const dest, + int* srcSizePtr, + int const maxOutputSize, + int maxNbAttempts, + const limitedOutput_directive limit, + const dictCtx_directive dict + ) +{ + const int inputSize = *srcSizePtr; + const int patternAnalysis = (maxNbAttempts > 128); /* levels 9+ */ + + const BYTE* ip = (const BYTE*) source; + const BYTE* anchor = ip; + const BYTE* const iend = ip + inputSize; + const BYTE* const mflimit = iend - MFLIMIT; + const BYTE* const matchlimit = (iend - LASTLITERALS); + + BYTE* optr = (BYTE*) dest; + BYTE* op = (BYTE*) dest; + BYTE* oend = op + maxOutputSize; + + int ml0, ml, ml2, ml3; + const BYTE* start0; + const BYTE* ref0; + const BYTE* ref = NULL; + const BYTE* start2 = NULL; + const BYTE* ref2 = NULL; + const BYTE* start3 = NULL; + const BYTE* ref3 = NULL; + + /* init */ + *srcSizePtr = 0; + if (limit == fillOutput) oend -= LASTLITERALS; /* Hack for support LZ4 format restriction */ + if (inputSize < LZ4_minLength) goto _last_literals; /* Input too small, no compression (all literals) */ + + /* Main Loop */ + while (ip <= mflimit) { + ml = LZ4HC_InsertAndFindBestMatch(ctx, ip, matchlimit, &ref, maxNbAttempts, patternAnalysis, dict); + if (ml<MINMATCH) { ip++; continue; } + + /* saved, in case we would skip too much */ + start0 = ip; ref0 = ref; ml0 = ml; + +_Search2: + if (ip+ml <= mflimit) { + ml2 = LZ4HC_InsertAndGetWiderMatch(ctx, + ip + ml - 2, ip + 0, matchlimit, ml, &ref2, &start2, + maxNbAttempts, patternAnalysis, 0, dict, favorCompressionRatio); + } else { + ml2 = ml; + } + + if (ml2 == ml) { /* No better match => encode ML1 */ + optr = op; + if (LZ4HC_encodeSequence(UPDATABLE(ip, op, anchor), ml, ref, limit, oend)) goto _dest_overflow; + continue; + } + + if (start0 < ip) { /* first match was skipped at least once */ + if (start2 < ip + ml0) { /* squeezing ML1 between ML0(original ML1) and ML2 */ + ip = start0; ref = ref0; ml = ml0; /* restore initial ML1 */ + } } + + /* Here, start0==ip */ + if ((start2 - ip) < 3) { /* First Match too small : removed */ + ml = ml2; + ip = start2; + ref =ref2; + goto _Search2; + } + +_Search3: + /* At this stage, we have : + * ml2 > ml1, and + * ip1+3 <= ip2 (usually < ip1+ml1) */ + if ((start2 - ip) < OPTIMAL_ML) { + int correction; + int new_ml = ml; + if (new_ml > OPTIMAL_ML) new_ml = OPTIMAL_ML; + if (ip+new_ml > start2 + ml2 - MINMATCH) new_ml = (int)(start2 - ip) + ml2 - MINMATCH; + correction = new_ml - (int)(start2 - ip); + if (correction > 0) { + start2 += correction; + ref2 += correction; + ml2 -= correction; + } + } + /* Now, we have start2 = ip+new_ml, with new_ml = min(ml, OPTIMAL_ML=18) */ + + if (start2 + ml2 <= mflimit) { + ml3 = LZ4HC_InsertAndGetWiderMatch(ctx, + start2 + ml2 - 3, start2, matchlimit, ml2, &ref3, &start3, + maxNbAttempts, patternAnalysis, 0, dict, favorCompressionRatio); + } else { + ml3 = ml2; + } + + if (ml3 == ml2) { /* No better match => encode ML1 and ML2 */ + /* ip & ref are known; Now for ml */ + if (start2 < ip+ml) ml = (int)(start2 - ip); + /* Now, encode 2 sequences */ + optr = op; + if (LZ4HC_encodeSequence(UPDATABLE(ip, op, anchor), ml, ref, limit, oend)) goto _dest_overflow; + ip = start2; + optr = op; + if (LZ4HC_encodeSequence(UPDATABLE(ip, op, anchor), ml2, ref2, limit, oend)) { + ml = ml2; + ref = ref2; + goto _dest_overflow; + } + continue; + } + + if (start3 < ip+ml+3) { /* Not enough space for match 2 : remove it */ + if (start3 >= (ip+ml)) { /* can write Seq1 immediately ==> Seq2 is removed, so Seq3 becomes Seq1 */ + if (start2 < ip+ml) { + int correction = (int)(ip+ml - start2); + start2 += correction; + ref2 += correction; + ml2 -= correction; + if (ml2 < MINMATCH) { + start2 = start3; + ref2 = ref3; + ml2 = ml3; + } + } + + optr = op; + if (LZ4HC_encodeSequence(UPDATABLE(ip, op, anchor), ml, ref, limit, oend)) goto _dest_overflow; + ip = start3; + ref = ref3; + ml = ml3; + + start0 = start2; + ref0 = ref2; + ml0 = ml2; + goto _Search2; + } + + start2 = start3; + ref2 = ref3; + ml2 = ml3; + goto _Search3; + } + + /* + * OK, now we have 3 ascending matches; + * let's write the first one ML1. + * ip & ref are known; Now decide ml. + */ + if (start2 < ip+ml) { + if ((start2 - ip) < OPTIMAL_ML) { + int correction; + if (ml > OPTIMAL_ML) ml = OPTIMAL_ML; + if (ip + ml > start2 + ml2 - MINMATCH) ml = (int)(start2 - ip) + ml2 - MINMATCH; + correction = ml - (int)(start2 - ip); + if (correction > 0) { + start2 += correction; + ref2 += correction; + ml2 -= correction; + } + } else { + ml = (int)(start2 - ip); + } + } + optr = op; + if (LZ4HC_encodeSequence(UPDATABLE(ip, op, anchor), ml, ref, limit, oend)) goto _dest_overflow; + + /* ML2 becomes ML1 */ + ip = start2; ref = ref2; ml = ml2; + + /* ML3 becomes ML2 */ + start2 = start3; ref2 = ref3; ml2 = ml3; + + /* let's find a new ML3 */ + goto _Search3; + } + +_last_literals: + /* Encode Last Literals */ + { size_t lastRunSize = (size_t)(iend - anchor); /* literals */ + size_t llAdd = (lastRunSize + 255 - RUN_MASK) / 255; + size_t const totalSize = 1 + llAdd + lastRunSize; + if (limit == fillOutput) oend += LASTLITERALS; /* restore correct value */ + if (limit && (op + totalSize > oend)) { + if (limit == limitedOutput) return 0; + /* adapt lastRunSize to fill 'dest' */ + lastRunSize = (size_t)(oend - op) - 1 /*token*/; + llAdd = (lastRunSize + 256 - RUN_MASK) / 256; + lastRunSize -= llAdd; + } + DEBUGLOG(6, "Final literal run : %i literals", (int)lastRunSize); + ip = anchor + lastRunSize; /* can be != iend if limit==fillOutput */ + + if (lastRunSize >= RUN_MASK) { + size_t accumulator = lastRunSize - RUN_MASK; + *op++ = (RUN_MASK << ML_BITS); + for(; accumulator >= 255 ; accumulator -= 255) *op++ = 255; + *op++ = (BYTE) accumulator; + } else { + *op++ = (BYTE)(lastRunSize << ML_BITS); + } + memcpy(op, anchor, lastRunSize); + op += lastRunSize; + } + + /* End */ + *srcSizePtr = (int) (((const char*)ip) - source); + return (int) (((char*)op)-dest); + +_dest_overflow: + if (limit == fillOutput) { + /* Assumption : ip, anchor, ml and ref must be set correctly */ + size_t const ll = (size_t)(ip - anchor); + size_t const ll_addbytes = (ll + 240) / 255; + size_t const ll_totalCost = 1 + ll_addbytes + ll; + BYTE* const maxLitPos = oend - 3; /* 2 for offset, 1 for token */ + DEBUGLOG(6, "Last sequence overflowing"); + op = optr; /* restore correct out pointer */ + if (op + ll_totalCost <= maxLitPos) { + /* ll validated; now adjust match length */ + size_t const bytesLeftForMl = (size_t)(maxLitPos - (op+ll_totalCost)); + size_t const maxMlSize = MINMATCH + (ML_MASK-1) + (bytesLeftForMl * 255); + assert(maxMlSize < INT_MAX); assert(ml >= 0); + if ((size_t)ml > maxMlSize) ml = (int)maxMlSize; + if ((oend + LASTLITERALS) - (op + ll_totalCost + 2) - 1 + ml >= MFLIMIT) { + LZ4HC_encodeSequence(UPDATABLE(ip, op, anchor), ml, ref, notLimited, oend); + } } + goto _last_literals; + } + /* compression failed */ + return 0; +} + + +static int LZ4HC_compress_optimal( LZ4HC_CCtx_internal* ctx, + const char* const source, char* dst, + int* srcSizePtr, int dstCapacity, + int const nbSearches, size_t sufficient_len, + const limitedOutput_directive limit, int const fullUpdate, + const dictCtx_directive dict, + const HCfavor_e favorDecSpeed); + + +LZ4_FORCE_INLINE int LZ4HC_compress_generic_internal ( + LZ4HC_CCtx_internal* const ctx, + const char* const src, + char* const dst, + int* const srcSizePtr, + int const dstCapacity, + int cLevel, + const limitedOutput_directive limit, + const dictCtx_directive dict + ) +{ + typedef enum { lz4hc, lz4opt } lz4hc_strat_e; + typedef struct { + lz4hc_strat_e strat; + int nbSearches; + U32 targetLength; + } cParams_t; + static const cParams_t clTable[LZ4HC_CLEVEL_MAX+1] = { + { lz4hc, 2, 16 }, /* 0, unused */ + { lz4hc, 2, 16 }, /* 1, unused */ + { lz4hc, 2, 16 }, /* 2, unused */ + { lz4hc, 4, 16 }, /* 3 */ + { lz4hc, 8, 16 }, /* 4 */ + { lz4hc, 16, 16 }, /* 5 */ + { lz4hc, 32, 16 }, /* 6 */ + { lz4hc, 64, 16 }, /* 7 */ + { lz4hc, 128, 16 }, /* 8 */ + { lz4hc, 256, 16 }, /* 9 */ + { lz4opt, 96, 64 }, /*10==LZ4HC_CLEVEL_OPT_MIN*/ + { lz4opt, 512,128 }, /*11 */ + { lz4opt,16384,LZ4_OPT_NUM }, /* 12==LZ4HC_CLEVEL_MAX */ + }; + + DEBUGLOG(4, "LZ4HC_compress_generic(ctx=%p, src=%p, srcSize=%d, limit=%d)", + ctx, src, *srcSizePtr, limit); + + if (limit == fillOutput && dstCapacity < 1) return 0; /* Impossible to store anything */ + if ((U32)*srcSizePtr > (U32)LZ4_MAX_INPUT_SIZE) return 0; /* Unsupported input size (too large or negative) */ + + ctx->end += *srcSizePtr; + if (cLevel < 1) cLevel = LZ4HC_CLEVEL_DEFAULT; /* note : convention is different from lz4frame, maybe something to review */ + cLevel = MIN(LZ4HC_CLEVEL_MAX, cLevel); + { cParams_t const cParam = clTable[cLevel]; + HCfavor_e const favor = ctx->favorDecSpeed ? favorDecompressionSpeed : favorCompressionRatio; + int result; + + if (cParam.strat == lz4hc) { + result = LZ4HC_compress_hashChain(ctx, + src, dst, srcSizePtr, dstCapacity, + cParam.nbSearches, limit, dict); + } else { + assert(cParam.strat == lz4opt); + result = LZ4HC_compress_optimal(ctx, + src, dst, srcSizePtr, dstCapacity, + cParam.nbSearches, cParam.targetLength, limit, + cLevel == LZ4HC_CLEVEL_MAX, /* ultra mode */ + dict, favor); + } + if (result <= 0) ctx->dirty = 1; + return result; + } +} + +static void LZ4HC_setExternalDict(LZ4HC_CCtx_internal* ctxPtr, const BYTE* newBlock); + +static int +LZ4HC_compress_generic_noDictCtx ( + LZ4HC_CCtx_internal* const ctx, + const char* const src, + char* const dst, + int* const srcSizePtr, + int const dstCapacity, + int cLevel, + limitedOutput_directive limit + ) +{ + assert(ctx->dictCtx == NULL); + return LZ4HC_compress_generic_internal(ctx, src, dst, srcSizePtr, dstCapacity, cLevel, limit, noDictCtx); +} + +static int +LZ4HC_compress_generic_dictCtx ( + LZ4HC_CCtx_internal* const ctx, + const char* const src, + char* const dst, + int* const srcSizePtr, + int const dstCapacity, + int cLevel, + limitedOutput_directive limit + ) +{ + const size_t position = (size_t)(ctx->end - ctx->base) - ctx->lowLimit; + assert(ctx->dictCtx != NULL); + if (position >= 64 KB) { + ctx->dictCtx = NULL; + return LZ4HC_compress_generic_noDictCtx(ctx, src, dst, srcSizePtr, dstCapacity, cLevel, limit); + } else if (position == 0 && *srcSizePtr > 4 KB) { + memcpy(ctx, ctx->dictCtx, sizeof(LZ4HC_CCtx_internal)); + LZ4HC_setExternalDict(ctx, (const BYTE *)src); + ctx->compressionLevel = (short)cLevel; + return LZ4HC_compress_generic_noDictCtx(ctx, src, dst, srcSizePtr, dstCapacity, cLevel, limit); + } else { + return LZ4HC_compress_generic_internal(ctx, src, dst, srcSizePtr, dstCapacity, cLevel, limit, usingDictCtxHc); + } +} + +static int +LZ4HC_compress_generic ( + LZ4HC_CCtx_internal* const ctx, + const char* const src, + char* const dst, + int* const srcSizePtr, + int const dstCapacity, + int cLevel, + limitedOutput_directive limit + ) +{ + if (ctx->dictCtx == NULL) { + return LZ4HC_compress_generic_noDictCtx(ctx, src, dst, srcSizePtr, dstCapacity, cLevel, limit); + } else { + return LZ4HC_compress_generic_dictCtx(ctx, src, dst, srcSizePtr, dstCapacity, cLevel, limit); + } +} + + +int LZ4_sizeofStateHC(void) { return (int)sizeof(LZ4_streamHC_t); } + +static size_t LZ4_streamHC_t_alignment(void) +{ +#if LZ4_ALIGN_TEST + typedef struct { char c; LZ4_streamHC_t t; } t_a; + return sizeof(t_a) - sizeof(LZ4_streamHC_t); +#else + return 1; /* effectively disabled */ +#endif +} + +/* state is presumed correctly initialized, + * in which case its size and alignment have already been validate */ +int LZ4_compress_HC_extStateHC_fastReset (void* state, const char* src, char* dst, int srcSize, int dstCapacity, int compressionLevel) +{ + LZ4HC_CCtx_internal* const ctx = &((LZ4_streamHC_t*)state)->internal_donotuse; + if (!LZ4_isAligned(state, LZ4_streamHC_t_alignment())) return 0; + LZ4_resetStreamHC_fast((LZ4_streamHC_t*)state, compressionLevel); + LZ4HC_init_internal (ctx, (const BYTE*)src); + if (dstCapacity < LZ4_compressBound(srcSize)) + return LZ4HC_compress_generic (ctx, src, dst, &srcSize, dstCapacity, compressionLevel, limitedOutput); + else + return LZ4HC_compress_generic (ctx, src, dst, &srcSize, dstCapacity, compressionLevel, notLimited); +} + +int LZ4_compress_HC_extStateHC (void* state, const char* src, char* dst, int srcSize, int dstCapacity, int compressionLevel) +{ + LZ4_streamHC_t* const ctx = LZ4_initStreamHC(state, sizeof(*ctx)); + if (ctx==NULL) return 0; /* init failure */ + return LZ4_compress_HC_extStateHC_fastReset(state, src, dst, srcSize, dstCapacity, compressionLevel); +} + +int LZ4_compress_HC(const char* src, char* dst, int srcSize, int dstCapacity, int compressionLevel) +{ +#if defined(LZ4HC_HEAPMODE) && LZ4HC_HEAPMODE==1 + LZ4_streamHC_t* const statePtr = (LZ4_streamHC_t*)ALLOC(sizeof(LZ4_streamHC_t)); +#else + LZ4_streamHC_t state; + LZ4_streamHC_t* const statePtr = &state; +#endif + int const cSize = LZ4_compress_HC_extStateHC(statePtr, src, dst, srcSize, dstCapacity, compressionLevel); +#if defined(LZ4HC_HEAPMODE) && LZ4HC_HEAPMODE==1 + FREEMEM(statePtr); +#endif + return cSize; +} + +/* state is presumed sized correctly (>= sizeof(LZ4_streamHC_t)) */ +int LZ4_compress_HC_destSize(void* state, const char* source, char* dest, int* sourceSizePtr, int targetDestSize, int cLevel) +{ + LZ4_streamHC_t* const ctx = LZ4_initStreamHC(state, sizeof(*ctx)); + if (ctx==NULL) return 0; /* init failure */ + LZ4HC_init_internal(&ctx->internal_donotuse, (const BYTE*) source); + LZ4_setCompressionLevel(ctx, cLevel); + return LZ4HC_compress_generic(&ctx->internal_donotuse, source, dest, sourceSizePtr, targetDestSize, cLevel, fillOutput); +} + + + +/************************************** +* Streaming Functions +**************************************/ +/* allocation */ +LZ4_streamHC_t* LZ4_createStreamHC(void) +{ + LZ4_streamHC_t* const state = + (LZ4_streamHC_t*)ALLOC_AND_ZERO(sizeof(LZ4_streamHC_t)); + if (state == NULL) return NULL; + LZ4_setCompressionLevel(state, LZ4HC_CLEVEL_DEFAULT); + return state; +} + +int LZ4_freeStreamHC (LZ4_streamHC_t* LZ4_streamHCPtr) +{ + DEBUGLOG(4, "LZ4_freeStreamHC(%p)", LZ4_streamHCPtr); + if (!LZ4_streamHCPtr) return 0; /* support free on NULL */ + FREEMEM(LZ4_streamHCPtr); + return 0; +} + + +LZ4_streamHC_t* LZ4_initStreamHC (void* buffer, size_t size) +{ + LZ4_streamHC_t* const LZ4_streamHCPtr = (LZ4_streamHC_t*)buffer; + /* if compilation fails here, LZ4_STREAMHCSIZE must be increased */ + LZ4_STATIC_ASSERT(sizeof(LZ4HC_CCtx_internal) <= LZ4_STREAMHCSIZE); + DEBUGLOG(4, "LZ4_initStreamHC(%p, %u)", buffer, (unsigned)size); + /* check conditions */ + if (buffer == NULL) return NULL; + if (size < sizeof(LZ4_streamHC_t)) return NULL; + if (!LZ4_isAligned(buffer, LZ4_streamHC_t_alignment())) return NULL; + /* init */ + { LZ4HC_CCtx_internal* const hcstate = &(LZ4_streamHCPtr->internal_donotuse); + MEM_INIT(hcstate, 0, sizeof(*hcstate)); } + LZ4_setCompressionLevel(LZ4_streamHCPtr, LZ4HC_CLEVEL_DEFAULT); + return LZ4_streamHCPtr; +} + +/* just a stub */ +void LZ4_resetStreamHC (LZ4_streamHC_t* LZ4_streamHCPtr, int compressionLevel) +{ + LZ4_initStreamHC(LZ4_streamHCPtr, sizeof(*LZ4_streamHCPtr)); + LZ4_setCompressionLevel(LZ4_streamHCPtr, compressionLevel); +} + +void LZ4_resetStreamHC_fast (LZ4_streamHC_t* LZ4_streamHCPtr, int compressionLevel) +{ + DEBUGLOG(4, "LZ4_resetStreamHC_fast(%p, %d)", LZ4_streamHCPtr, compressionLevel); + if (LZ4_streamHCPtr->internal_donotuse.dirty) { + LZ4_initStreamHC(LZ4_streamHCPtr, sizeof(*LZ4_streamHCPtr)); + } else { + /* preserve end - base : can trigger clearTable's threshold */ + LZ4_streamHCPtr->internal_donotuse.end -= (uptrval)LZ4_streamHCPtr->internal_donotuse.base; + LZ4_streamHCPtr->internal_donotuse.base = NULL; + LZ4_streamHCPtr->internal_donotuse.dictCtx = NULL; + } + LZ4_setCompressionLevel(LZ4_streamHCPtr, compressionLevel); +} + +void LZ4_setCompressionLevel(LZ4_streamHC_t* LZ4_streamHCPtr, int compressionLevel) +{ + DEBUGLOG(5, "LZ4_setCompressionLevel(%p, %d)", LZ4_streamHCPtr, compressionLevel); + if (compressionLevel < 1) compressionLevel = LZ4HC_CLEVEL_DEFAULT; + if (compressionLevel > LZ4HC_CLEVEL_MAX) compressionLevel = LZ4HC_CLEVEL_MAX; + LZ4_streamHCPtr->internal_donotuse.compressionLevel = (short)compressionLevel; +} + +void LZ4_favorDecompressionSpeed(LZ4_streamHC_t* LZ4_streamHCPtr, int favor) +{ + LZ4_streamHCPtr->internal_donotuse.favorDecSpeed = (favor!=0); +} + +/* LZ4_loadDictHC() : + * LZ4_streamHCPtr is presumed properly initialized */ +int LZ4_loadDictHC (LZ4_streamHC_t* LZ4_streamHCPtr, + const char* dictionary, int dictSize) +{ + LZ4HC_CCtx_internal* const ctxPtr = &LZ4_streamHCPtr->internal_donotuse; + DEBUGLOG(4, "LZ4_loadDictHC(ctx:%p, dict:%p, dictSize:%d)", LZ4_streamHCPtr, dictionary, dictSize); + assert(LZ4_streamHCPtr != NULL); + if (dictSize > 64 KB) { + dictionary += (size_t)dictSize - 64 KB; + dictSize = 64 KB; + } + /* need a full initialization, there are bad side-effects when using resetFast() */ + { int const cLevel = ctxPtr->compressionLevel; + LZ4_initStreamHC(LZ4_streamHCPtr, sizeof(*LZ4_streamHCPtr)); + LZ4_setCompressionLevel(LZ4_streamHCPtr, cLevel); + } + LZ4HC_init_internal (ctxPtr, (const BYTE*)dictionary); + ctxPtr->end = (const BYTE*)dictionary + dictSize; + if (dictSize >= 4) LZ4HC_Insert (ctxPtr, ctxPtr->end-3); + return dictSize; +} + +void LZ4_attach_HC_dictionary(LZ4_streamHC_t *working_stream, const LZ4_streamHC_t *dictionary_stream) { + working_stream->internal_donotuse.dictCtx = dictionary_stream != NULL ? &(dictionary_stream->internal_donotuse) : NULL; +} + +/* compression */ + +static void LZ4HC_setExternalDict(LZ4HC_CCtx_internal* ctxPtr, const BYTE* newBlock) +{ + DEBUGLOG(4, "LZ4HC_setExternalDict(%p, %p)", ctxPtr, newBlock); + if (ctxPtr->end >= ctxPtr->base + ctxPtr->dictLimit + 4) + LZ4HC_Insert (ctxPtr, ctxPtr->end-3); /* Referencing remaining dictionary content */ + + /* Only one memory segment for extDict, so any previous extDict is lost at this stage */ + ctxPtr->lowLimit = ctxPtr->dictLimit; + ctxPtr->dictLimit = (U32)(ctxPtr->end - ctxPtr->base); + ctxPtr->dictBase = ctxPtr->base; + ctxPtr->base = newBlock - ctxPtr->dictLimit; + ctxPtr->end = newBlock; + ctxPtr->nextToUpdate = ctxPtr->dictLimit; /* match referencing will resume from there */ + + /* cannot reference an extDict and a dictCtx at the same time */ + ctxPtr->dictCtx = NULL; +} + +static int +LZ4_compressHC_continue_generic (LZ4_streamHC_t* LZ4_streamHCPtr, + const char* src, char* dst, + int* srcSizePtr, int dstCapacity, + limitedOutput_directive limit) +{ + LZ4HC_CCtx_internal* const ctxPtr = &LZ4_streamHCPtr->internal_donotuse; + DEBUGLOG(5, "LZ4_compressHC_continue_generic(ctx=%p, src=%p, srcSize=%d, limit=%d)", + LZ4_streamHCPtr, src, *srcSizePtr, limit); + assert(ctxPtr != NULL); + /* auto-init if forgotten */ + if (ctxPtr->base == NULL) LZ4HC_init_internal (ctxPtr, (const BYTE*) src); + + /* Check overflow */ + if ((size_t)(ctxPtr->end - ctxPtr->base) > 2 GB) { + size_t dictSize = (size_t)(ctxPtr->end - ctxPtr->base) - ctxPtr->dictLimit; + if (dictSize > 64 KB) dictSize = 64 KB; + LZ4_loadDictHC(LZ4_streamHCPtr, (const char*)(ctxPtr->end) - dictSize, (int)dictSize); + } + + /* Check if blocks follow each other */ + if ((const BYTE*)src != ctxPtr->end) + LZ4HC_setExternalDict(ctxPtr, (const BYTE*)src); + + /* Check overlapping input/dictionary space */ + { const BYTE* sourceEnd = (const BYTE*) src + *srcSizePtr; + const BYTE* const dictBegin = ctxPtr->dictBase + ctxPtr->lowLimit; + const BYTE* const dictEnd = ctxPtr->dictBase + ctxPtr->dictLimit; + if ((sourceEnd > dictBegin) && ((const BYTE*)src < dictEnd)) { + if (sourceEnd > dictEnd) sourceEnd = dictEnd; + ctxPtr->lowLimit = (U32)(sourceEnd - ctxPtr->dictBase); + if (ctxPtr->dictLimit - ctxPtr->lowLimit < 4) ctxPtr->lowLimit = ctxPtr->dictLimit; + } } + + return LZ4HC_compress_generic (ctxPtr, src, dst, srcSizePtr, dstCapacity, ctxPtr->compressionLevel, limit); +} + +int LZ4_compress_HC_continue (LZ4_streamHC_t* LZ4_streamHCPtr, const char* src, char* dst, int srcSize, int dstCapacity) +{ + if (dstCapacity < LZ4_compressBound(srcSize)) + return LZ4_compressHC_continue_generic (LZ4_streamHCPtr, src, dst, &srcSize, dstCapacity, limitedOutput); + else + return LZ4_compressHC_continue_generic (LZ4_streamHCPtr, src, dst, &srcSize, dstCapacity, notLimited); +} + +int LZ4_compress_HC_continue_destSize (LZ4_streamHC_t* LZ4_streamHCPtr, const char* src, char* dst, int* srcSizePtr, int targetDestSize) +{ + return LZ4_compressHC_continue_generic(LZ4_streamHCPtr, src, dst, srcSizePtr, targetDestSize, fillOutput); +} + + + +/* LZ4_saveDictHC : + * save history content + * into a user-provided buffer + * which is then used to continue compression + */ +int LZ4_saveDictHC (LZ4_streamHC_t* LZ4_streamHCPtr, char* safeBuffer, int dictSize) +{ + LZ4HC_CCtx_internal* const streamPtr = &LZ4_streamHCPtr->internal_donotuse; + int const prefixSize = (int)(streamPtr->end - (streamPtr->base + streamPtr->dictLimit)); + DEBUGLOG(5, "LZ4_saveDictHC(%p, %p, %d)", LZ4_streamHCPtr, safeBuffer, dictSize); + assert(prefixSize >= 0); + if (dictSize > 64 KB) dictSize = 64 KB; + if (dictSize < 4) dictSize = 0; + if (dictSize > prefixSize) dictSize = prefixSize; + if (safeBuffer == NULL) assert(dictSize == 0); + if (dictSize > 0) + memmove(safeBuffer, streamPtr->end - dictSize, dictSize); + { U32 const endIndex = (U32)(streamPtr->end - streamPtr->base); + streamPtr->end = (const BYTE*)safeBuffer + dictSize; + streamPtr->base = streamPtr->end - endIndex; + streamPtr->dictLimit = endIndex - (U32)dictSize; + streamPtr->lowLimit = endIndex - (U32)dictSize; + if (streamPtr->nextToUpdate < streamPtr->dictLimit) + streamPtr->nextToUpdate = streamPtr->dictLimit; + } + return dictSize; +} + + +/*************************************************** +* Deprecated Functions +***************************************************/ + +/* These functions currently generate deprecation warnings */ + +/* Wrappers for deprecated compression functions */ +int LZ4_compressHC(const char* src, char* dst, int srcSize) { return LZ4_compress_HC (src, dst, srcSize, LZ4_compressBound(srcSize), 0); } +int LZ4_compressHC_limitedOutput(const char* src, char* dst, int srcSize, int maxDstSize) { return LZ4_compress_HC(src, dst, srcSize, maxDstSize, 0); } +int LZ4_compressHC2(const char* src, char* dst, int srcSize, int cLevel) { return LZ4_compress_HC (src, dst, srcSize, LZ4_compressBound(srcSize), cLevel); } +int LZ4_compressHC2_limitedOutput(const char* src, char* dst, int srcSize, int maxDstSize, int cLevel) { return LZ4_compress_HC(src, dst, srcSize, maxDstSize, cLevel); } +int LZ4_compressHC_withStateHC (void* state, const char* src, char* dst, int srcSize) { return LZ4_compress_HC_extStateHC (state, src, dst, srcSize, LZ4_compressBound(srcSize), 0); } +int LZ4_compressHC_limitedOutput_withStateHC (void* state, const char* src, char* dst, int srcSize, int maxDstSize) { return LZ4_compress_HC_extStateHC (state, src, dst, srcSize, maxDstSize, 0); } +int LZ4_compressHC2_withStateHC (void* state, const char* src, char* dst, int srcSize, int cLevel) { return LZ4_compress_HC_extStateHC(state, src, dst, srcSize, LZ4_compressBound(srcSize), cLevel); } +int LZ4_compressHC2_limitedOutput_withStateHC (void* state, const char* src, char* dst, int srcSize, int maxDstSize, int cLevel) { return LZ4_compress_HC_extStateHC(state, src, dst, srcSize, maxDstSize, cLevel); } +int LZ4_compressHC_continue (LZ4_streamHC_t* ctx, const char* src, char* dst, int srcSize) { return LZ4_compress_HC_continue (ctx, src, dst, srcSize, LZ4_compressBound(srcSize)); } +int LZ4_compressHC_limitedOutput_continue (LZ4_streamHC_t* ctx, const char* src, char* dst, int srcSize, int maxDstSize) { return LZ4_compress_HC_continue (ctx, src, dst, srcSize, maxDstSize); } + + +/* Deprecated streaming functions */ +int LZ4_sizeofStreamStateHC(void) { return LZ4_STREAMHCSIZE; } + +/* state is presumed correctly sized, aka >= sizeof(LZ4_streamHC_t) + * @return : 0 on success, !=0 if error */ +int LZ4_resetStreamStateHC(void* state, char* inputBuffer) +{ + LZ4_streamHC_t* const hc4 = LZ4_initStreamHC(state, sizeof(*hc4)); + if (hc4 == NULL) return 1; /* init failed */ + LZ4HC_init_internal (&hc4->internal_donotuse, (const BYTE*)inputBuffer); + return 0; +} + +void* LZ4_createHC (const char* inputBuffer) +{ + LZ4_streamHC_t* const hc4 = LZ4_createStreamHC(); + if (hc4 == NULL) return NULL; /* not enough memory */ + LZ4HC_init_internal (&hc4->internal_donotuse, (const BYTE*)inputBuffer); + return hc4; +} + +int LZ4_freeHC (void* LZ4HC_Data) +{ + if (!LZ4HC_Data) return 0; /* support free on NULL */ + FREEMEM(LZ4HC_Data); + return 0; +} + +int LZ4_compressHC2_continue (void* LZ4HC_Data, const char* src, char* dst, int srcSize, int cLevel) +{ + return LZ4HC_compress_generic (&((LZ4_streamHC_t*)LZ4HC_Data)->internal_donotuse, src, dst, &srcSize, 0, cLevel, notLimited); +} + +int LZ4_compressHC2_limitedOutput_continue (void* LZ4HC_Data, const char* src, char* dst, int srcSize, int dstCapacity, int cLevel) +{ + return LZ4HC_compress_generic (&((LZ4_streamHC_t*)LZ4HC_Data)->internal_donotuse, src, dst, &srcSize, dstCapacity, cLevel, limitedOutput); +} + +char* LZ4_slideInputBufferHC(void* LZ4HC_Data) +{ + LZ4_streamHC_t *ctx = (LZ4_streamHC_t*)LZ4HC_Data; + const BYTE *bufferStart = ctx->internal_donotuse.base + ctx->internal_donotuse.lowLimit; + LZ4_resetStreamHC_fast(ctx, ctx->internal_donotuse.compressionLevel); + /* avoid const char * -> char * conversion warning :( */ + return (char *)(uptrval)bufferStart; +} + + +/* ================================================ + * LZ4 Optimal parser (levels [LZ4HC_CLEVEL_OPT_MIN - LZ4HC_CLEVEL_MAX]) + * ===============================================*/ +typedef struct { + int price; + int off; + int mlen; + int litlen; +} LZ4HC_optimal_t; + +/* price in bytes */ +LZ4_FORCE_INLINE int LZ4HC_literalsPrice(int const litlen) +{ + int price = litlen; + assert(litlen >= 0); + if (litlen >= (int)RUN_MASK) + price += 1 + ((litlen-(int)RUN_MASK) / 255); + return price; +} + + +/* requires mlen >= MINMATCH */ +LZ4_FORCE_INLINE int LZ4HC_sequencePrice(int litlen, int mlen) +{ + int price = 1 + 2 ; /* token + 16-bit offset */ + assert(litlen >= 0); + assert(mlen >= MINMATCH); + + price += LZ4HC_literalsPrice(litlen); + + if (mlen >= (int)(ML_MASK+MINMATCH)) + price += 1 + ((mlen-(int)(ML_MASK+MINMATCH)) / 255); + + return price; +} + + +typedef struct { + int off; + int len; +} LZ4HC_match_t; + +LZ4_FORCE_INLINE LZ4HC_match_t +LZ4HC_FindLongerMatch(LZ4HC_CCtx_internal* const ctx, + const BYTE* ip, const BYTE* const iHighLimit, + int minLen, int nbSearches, + const dictCtx_directive dict, + const HCfavor_e favorDecSpeed) +{ + LZ4HC_match_t match = { 0 , 0 }; + const BYTE* matchPtr = NULL; + /* note : LZ4HC_InsertAndGetWiderMatch() is able to modify the starting position of a match (*startpos), + * but this won't be the case here, as we define iLowLimit==ip, + * so LZ4HC_InsertAndGetWiderMatch() won't be allowed to search past ip */ + int matchLength = LZ4HC_InsertAndGetWiderMatch(ctx, ip, ip, iHighLimit, minLen, &matchPtr, &ip, nbSearches, 1 /*patternAnalysis*/, 1 /*chainSwap*/, dict, favorDecSpeed); + if (matchLength <= minLen) return match; + if (favorDecSpeed) { + if ((matchLength>18) & (matchLength<=36)) matchLength=18; /* favor shortcut */ + } + match.len = matchLength; + match.off = (int)(ip-matchPtr); + return match; +} + + +static int LZ4HC_compress_optimal ( LZ4HC_CCtx_internal* ctx, + const char* const source, + char* dst, + int* srcSizePtr, + int dstCapacity, + int const nbSearches, + size_t sufficient_len, + const limitedOutput_directive limit, + int const fullUpdate, + const dictCtx_directive dict, + const HCfavor_e favorDecSpeed) +{ + int retval = 0; +#define TRAILING_LITERALS 3 +#ifdef LZ4HC_HEAPMODE + LZ4HC_optimal_t* const opt = (LZ4HC_optimal_t*)ALLOC(sizeof(LZ4HC_optimal_t) * (LZ4_OPT_NUM + TRAILING_LITERALS)); +#else + LZ4HC_optimal_t opt[LZ4_OPT_NUM + TRAILING_LITERALS]; /* ~64 KB, which is a bit large for stack... */ +#endif + + const BYTE* ip = (const BYTE*) source; + const BYTE* anchor = ip; + const BYTE* const iend = ip + *srcSizePtr; + const BYTE* const mflimit = iend - MFLIMIT; + const BYTE* const matchlimit = iend - LASTLITERALS; + BYTE* op = (BYTE*) dst; + BYTE* opSaved = (BYTE*) dst; + BYTE* oend = op + dstCapacity; + int ovml = MINMATCH; /* overflow - last sequence */ + const BYTE* ovref = NULL; + + /* init */ +#ifdef LZ4HC_HEAPMODE + if (opt == NULL) goto _return_label; +#endif + DEBUGLOG(5, "LZ4HC_compress_optimal(dst=%p, dstCapa=%u)", dst, (unsigned)dstCapacity); + *srcSizePtr = 0; + if (limit == fillOutput) oend -= LASTLITERALS; /* Hack for support LZ4 format restriction */ + if (sufficient_len >= LZ4_OPT_NUM) sufficient_len = LZ4_OPT_NUM-1; + + /* Main Loop */ + while (ip <= mflimit) { + int const llen = (int)(ip - anchor); + int best_mlen, best_off; + int cur, last_match_pos = 0; + + LZ4HC_match_t const firstMatch = LZ4HC_FindLongerMatch(ctx, ip, matchlimit, MINMATCH-1, nbSearches, dict, favorDecSpeed); + if (firstMatch.len==0) { ip++; continue; } + + if ((size_t)firstMatch.len > sufficient_len) { + /* good enough solution : immediate encoding */ + int const firstML = firstMatch.len; + const BYTE* const matchPos = ip - firstMatch.off; + opSaved = op; + if ( LZ4HC_encodeSequence(UPDATABLE(ip, op, anchor), firstML, matchPos, limit, oend) ) { /* updates ip, op and anchor */ + ovml = firstML; + ovref = matchPos; + goto _dest_overflow; + } + continue; + } + + /* set prices for first positions (literals) */ + { int rPos; + for (rPos = 0 ; rPos < MINMATCH ; rPos++) { + int const cost = LZ4HC_literalsPrice(llen + rPos); + opt[rPos].mlen = 1; + opt[rPos].off = 0; + opt[rPos].litlen = llen + rPos; + opt[rPos].price = cost; + DEBUGLOG(7, "rPos:%3i => price:%3i (litlen=%i) -- initial setup", + rPos, cost, opt[rPos].litlen); + } } + /* set prices using initial match */ + { int mlen = MINMATCH; + int const matchML = firstMatch.len; /* necessarily < sufficient_len < LZ4_OPT_NUM */ + int const offset = firstMatch.off; + assert(matchML < LZ4_OPT_NUM); + for ( ; mlen <= matchML ; mlen++) { + int const cost = LZ4HC_sequencePrice(llen, mlen); + opt[mlen].mlen = mlen; + opt[mlen].off = offset; + opt[mlen].litlen = llen; + opt[mlen].price = cost; + DEBUGLOG(7, "rPos:%3i => price:%3i (matchlen=%i) -- initial setup", + mlen, cost, mlen); + } } + last_match_pos = firstMatch.len; + { int addLit; + for (addLit = 1; addLit <= TRAILING_LITERALS; addLit ++) { + opt[last_match_pos+addLit].mlen = 1; /* literal */ + opt[last_match_pos+addLit].off = 0; + opt[last_match_pos+addLit].litlen = addLit; + opt[last_match_pos+addLit].price = opt[last_match_pos].price + LZ4HC_literalsPrice(addLit); + DEBUGLOG(7, "rPos:%3i => price:%3i (litlen=%i) -- initial setup", + last_match_pos+addLit, opt[last_match_pos+addLit].price, addLit); + } } + + /* check further positions */ + for (cur = 1; cur < last_match_pos; cur++) { + const BYTE* const curPtr = ip + cur; + LZ4HC_match_t newMatch; + + if (curPtr > mflimit) break; + DEBUGLOG(7, "rPos:%u[%u] vs [%u]%u", + cur, opt[cur].price, opt[cur+1].price, cur+1); + if (fullUpdate) { + /* not useful to search here if next position has same (or lower) cost */ + if ( (opt[cur+1].price <= opt[cur].price) + /* in some cases, next position has same cost, but cost rises sharply after, so a small match would still be beneficial */ + && (opt[cur+MINMATCH].price < opt[cur].price + 3/*min seq price*/) ) + continue; + } else { + /* not useful to search here if next position has same (or lower) cost */ + if (opt[cur+1].price <= opt[cur].price) continue; + } + + DEBUGLOG(7, "search at rPos:%u", cur); + if (fullUpdate) + newMatch = LZ4HC_FindLongerMatch(ctx, curPtr, matchlimit, MINMATCH-1, nbSearches, dict, favorDecSpeed); + else + /* only test matches of minimum length; slightly faster, but misses a few bytes */ + newMatch = LZ4HC_FindLongerMatch(ctx, curPtr, matchlimit, last_match_pos - cur, nbSearches, dict, favorDecSpeed); + if (!newMatch.len) continue; + + if ( ((size_t)newMatch.len > sufficient_len) + || (newMatch.len + cur >= LZ4_OPT_NUM) ) { + /* immediate encoding */ + best_mlen = newMatch.len; + best_off = newMatch.off; + last_match_pos = cur + 1; + goto encode; + } + + /* before match : set price with literals at beginning */ + { int const baseLitlen = opt[cur].litlen; + int litlen; + for (litlen = 1; litlen < MINMATCH; litlen++) { + int const price = opt[cur].price - LZ4HC_literalsPrice(baseLitlen) + LZ4HC_literalsPrice(baseLitlen+litlen); + int const pos = cur + litlen; + if (price < opt[pos].price) { + opt[pos].mlen = 1; /* literal */ + opt[pos].off = 0; + opt[pos].litlen = baseLitlen+litlen; + opt[pos].price = price; + DEBUGLOG(7, "rPos:%3i => price:%3i (litlen=%i)", + pos, price, opt[pos].litlen); + } } } + + /* set prices using match at position = cur */ + { int const matchML = newMatch.len; + int ml = MINMATCH; + + assert(cur + newMatch.len < LZ4_OPT_NUM); + for ( ; ml <= matchML ; ml++) { + int const pos = cur + ml; + int const offset = newMatch.off; + int price; + int ll; + DEBUGLOG(7, "testing price rPos %i (last_match_pos=%i)", + pos, last_match_pos); + if (opt[cur].mlen == 1) { + ll = opt[cur].litlen; + price = ((cur > ll) ? opt[cur - ll].price : 0) + + LZ4HC_sequencePrice(ll, ml); + } else { + ll = 0; + price = opt[cur].price + LZ4HC_sequencePrice(0, ml); + } + + assert((U32)favorDecSpeed <= 1); + if (pos > last_match_pos+TRAILING_LITERALS + || price <= opt[pos].price - (int)favorDecSpeed) { + DEBUGLOG(7, "rPos:%3i => price:%3i (matchlen=%i)", + pos, price, ml); + assert(pos < LZ4_OPT_NUM); + if ( (ml == matchML) /* last pos of last match */ + && (last_match_pos < pos) ) + last_match_pos = pos; + opt[pos].mlen = ml; + opt[pos].off = offset; + opt[pos].litlen = ll; + opt[pos].price = price; + } } } + /* complete following positions with literals */ + { int addLit; + for (addLit = 1; addLit <= TRAILING_LITERALS; addLit ++) { + opt[last_match_pos+addLit].mlen = 1; /* literal */ + opt[last_match_pos+addLit].off = 0; + opt[last_match_pos+addLit].litlen = addLit; + opt[last_match_pos+addLit].price = opt[last_match_pos].price + LZ4HC_literalsPrice(addLit); + DEBUGLOG(7, "rPos:%3i => price:%3i (litlen=%i)", last_match_pos+addLit, opt[last_match_pos+addLit].price, addLit); + } } + } /* for (cur = 1; cur <= last_match_pos; cur++) */ + + assert(last_match_pos < LZ4_OPT_NUM + TRAILING_LITERALS); + best_mlen = opt[last_match_pos].mlen; + best_off = opt[last_match_pos].off; + cur = last_match_pos - best_mlen; + +encode: /* cur, last_match_pos, best_mlen, best_off must be set */ + assert(cur < LZ4_OPT_NUM); + assert(last_match_pos >= 1); /* == 1 when only one candidate */ + DEBUGLOG(6, "reverse traversal, looking for shortest path (last_match_pos=%i)", last_match_pos); + { int candidate_pos = cur; + int selected_matchLength = best_mlen; + int selected_offset = best_off; + while (1) { /* from end to beginning */ + int const next_matchLength = opt[candidate_pos].mlen; /* can be 1, means literal */ + int const next_offset = opt[candidate_pos].off; + DEBUGLOG(7, "pos %i: sequence length %i", candidate_pos, selected_matchLength); + opt[candidate_pos].mlen = selected_matchLength; + opt[candidate_pos].off = selected_offset; + selected_matchLength = next_matchLength; + selected_offset = next_offset; + if (next_matchLength > candidate_pos) break; /* last match elected, first match to encode */ + assert(next_matchLength > 0); /* can be 1, means literal */ + candidate_pos -= next_matchLength; + } } + + /* encode all recorded sequences in order */ + { int rPos = 0; /* relative position (to ip) */ + while (rPos < last_match_pos) { + int const ml = opt[rPos].mlen; + int const offset = opt[rPos].off; + if (ml == 1) { ip++; rPos++; continue; } /* literal; note: can end up with several literals, in which case, skip them */ + rPos += ml; + assert(ml >= MINMATCH); + assert((offset >= 1) && (offset <= LZ4_DISTANCE_MAX)); + opSaved = op; + if ( LZ4HC_encodeSequence(UPDATABLE(ip, op, anchor), ml, ip - offset, limit, oend) ) { /* updates ip, op and anchor */ + ovml = ml; + ovref = ip - offset; + goto _dest_overflow; + } } } + } /* while (ip <= mflimit) */ + +_last_literals: + /* Encode Last Literals */ + { size_t lastRunSize = (size_t)(iend - anchor); /* literals */ + size_t llAdd = (lastRunSize + 255 - RUN_MASK) / 255; + size_t const totalSize = 1 + llAdd + lastRunSize; + if (limit == fillOutput) oend += LASTLITERALS; /* restore correct value */ + if (limit && (op + totalSize > oend)) { + if (limit == limitedOutput) { /* Check output limit */ + retval = 0; + goto _return_label; + } + /* adapt lastRunSize to fill 'dst' */ + lastRunSize = (size_t)(oend - op) - 1 /*token*/; + llAdd = (lastRunSize + 256 - RUN_MASK) / 256; + lastRunSize -= llAdd; + } + DEBUGLOG(6, "Final literal run : %i literals", (int)lastRunSize); + ip = anchor + lastRunSize; /* can be != iend if limit==fillOutput */ + + if (lastRunSize >= RUN_MASK) { + size_t accumulator = lastRunSize - RUN_MASK; + *op++ = (RUN_MASK << ML_BITS); + for(; accumulator >= 255 ; accumulator -= 255) *op++ = 255; + *op++ = (BYTE) accumulator; + } else { + *op++ = (BYTE)(lastRunSize << ML_BITS); + } + memcpy(op, anchor, lastRunSize); + op += lastRunSize; + } + + /* End */ + *srcSizePtr = (int) (((const char*)ip) - source); + retval = (int) ((char*)op-dst); + goto _return_label; + +_dest_overflow: +if (limit == fillOutput) { + /* Assumption : ip, anchor, ovml and ovref must be set correctly */ + size_t const ll = (size_t)(ip - anchor); + size_t const ll_addbytes = (ll + 240) / 255; + size_t const ll_totalCost = 1 + ll_addbytes + ll; + BYTE* const maxLitPos = oend - 3; /* 2 for offset, 1 for token */ + DEBUGLOG(6, "Last sequence overflowing (only %i bytes remaining)", (int)(oend-1-opSaved)); + op = opSaved; /* restore correct out pointer */ + if (op + ll_totalCost <= maxLitPos) { + /* ll validated; now adjust match length */ + size_t const bytesLeftForMl = (size_t)(maxLitPos - (op+ll_totalCost)); + size_t const maxMlSize = MINMATCH + (ML_MASK-1) + (bytesLeftForMl * 255); + assert(maxMlSize < INT_MAX); assert(ovml >= 0); + if ((size_t)ovml > maxMlSize) ovml = (int)maxMlSize; + if ((oend + LASTLITERALS) - (op + ll_totalCost + 2) - 1 + ovml >= MFLIMIT) { + DEBUGLOG(6, "Space to end : %i + ml (%i)", (int)((oend + LASTLITERALS) - (op + ll_totalCost + 2) - 1), ovml); + DEBUGLOG(6, "Before : ip = %p, anchor = %p", ip, anchor); + LZ4HC_encodeSequence(UPDATABLE(ip, op, anchor), ovml, ovref, notLimited, oend); + DEBUGLOG(6, "After : ip = %p, anchor = %p", ip, anchor); + } } + goto _last_literals; +} +_return_label: +#ifdef LZ4HC_HEAPMODE + FREEMEM(opt); +#endif + return retval; +} + +} diff --git a/3rdparty/tracy/tracy/common/tracy_lz4hc.hpp b/3rdparty/tracy/tracy/common/tracy_lz4hc.hpp new file mode 100644 index 0000000..18bf30d --- /dev/null +++ b/3rdparty/tracy/tracy/common/tracy_lz4hc.hpp @@ -0,0 +1,405 @@ +/* + LZ4 HC - High Compression Mode of LZ4 + Header File + Copyright (C) 2011-2017, Yann Collet. + BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php) + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are + met: + + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above + copyright notice, this list of conditions and the following disclaimer + in the documentation and/or other materials provided with the + distribution. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + + You can contact the author at : + - LZ4 source repository : https://github.com/lz4/lz4 + - LZ4 public forum : https://groups.google.com/forum/#!forum/lz4c +*/ +#ifndef TRACY_LZ4_HC_H_19834876238432 +#define TRACY_LZ4_HC_H_19834876238432 + +/* --- Dependency --- */ +/* note : lz4hc requires lz4.h/lz4.c for compilation */ +#include "tracy_lz4.hpp" /* stddef, LZ4LIB_API, LZ4_DEPRECATED */ + + +/* --- Useful constants --- */ +#define LZ4HC_CLEVEL_MIN 3 +#define LZ4HC_CLEVEL_DEFAULT 9 +#define LZ4HC_CLEVEL_OPT_MIN 10 +#define LZ4HC_CLEVEL_MAX 12 + +namespace tracy +{ + +/*-************************************ + * Block Compression + **************************************/ +/*! LZ4_compress_HC() : + * Compress data from `src` into `dst`, using the powerful but slower "HC" algorithm. + * `dst` must be already allocated. + * Compression is guaranteed to succeed if `dstCapacity >= LZ4_compressBound(srcSize)` (see "lz4.h") + * Max supported `srcSize` value is LZ4_MAX_INPUT_SIZE (see "lz4.h") + * `compressionLevel` : any value between 1 and LZ4HC_CLEVEL_MAX will work. + * Values > LZ4HC_CLEVEL_MAX behave the same as LZ4HC_CLEVEL_MAX. + * @return : the number of bytes written into 'dst' + * or 0 if compression fails. + */ +LZ4LIB_API int LZ4_compress_HC (const char* src, char* dst, int srcSize, int dstCapacity, int compressionLevel); + + +/* Note : + * Decompression functions are provided within "lz4.h" (BSD license) + */ + + +/*! LZ4_compress_HC_extStateHC() : + * Same as LZ4_compress_HC(), but using an externally allocated memory segment for `state`. + * `state` size is provided by LZ4_sizeofStateHC(). + * Memory segment must be aligned on 8-bytes boundaries (which a normal malloc() should do properly). + */ +LZ4LIB_API int LZ4_sizeofStateHC(void); +LZ4LIB_API int LZ4_compress_HC_extStateHC(void* stateHC, const char* src, char* dst, int srcSize, int maxDstSize, int compressionLevel); + + +/*! LZ4_compress_HC_destSize() : v1.9.0+ + * Will compress as much data as possible from `src` + * to fit into `targetDstSize` budget. + * Result is provided in 2 parts : + * @return : the number of bytes written into 'dst' (necessarily <= targetDstSize) + * or 0 if compression fails. + * `srcSizePtr` : on success, *srcSizePtr is updated to indicate how much bytes were read from `src` + */ +LZ4LIB_API int LZ4_compress_HC_destSize(void* stateHC, + const char* src, char* dst, + int* srcSizePtr, int targetDstSize, + int compressionLevel); + + +/*-************************************ + * Streaming Compression + * Bufferless synchronous API + **************************************/ + typedef union LZ4_streamHC_u LZ4_streamHC_t; /* incomplete type (defined later) */ + +/*! LZ4_createStreamHC() and LZ4_freeStreamHC() : + * These functions create and release memory for LZ4 HC streaming state. + * Newly created states are automatically initialized. + * A same state can be used multiple times consecutively, + * starting with LZ4_resetStreamHC_fast() to start a new stream of blocks. + */ +LZ4LIB_API LZ4_streamHC_t* LZ4_createStreamHC(void); +LZ4LIB_API int LZ4_freeStreamHC (LZ4_streamHC_t* streamHCPtr); + +/* + These functions compress data in successive blocks of any size, + using previous blocks as dictionary, to improve compression ratio. + One key assumption is that previous blocks (up to 64 KB) remain read-accessible while compressing next blocks. + There is an exception for ring buffers, which can be smaller than 64 KB. + Ring-buffer scenario is automatically detected and handled within LZ4_compress_HC_continue(). + + Before starting compression, state must be allocated and properly initialized. + LZ4_createStreamHC() does both, though compression level is set to LZ4HC_CLEVEL_DEFAULT. + + Selecting the compression level can be done with LZ4_resetStreamHC_fast() (starts a new stream) + or LZ4_setCompressionLevel() (anytime, between blocks in the same stream) (experimental). + LZ4_resetStreamHC_fast() only works on states which have been properly initialized at least once, + which is automatically the case when state is created using LZ4_createStreamHC(). + + After reset, a first "fictional block" can be designated as initial dictionary, + using LZ4_loadDictHC() (Optional). + + Invoke LZ4_compress_HC_continue() to compress each successive block. + The number of blocks is unlimited. + Previous input blocks, including initial dictionary when present, + must remain accessible and unmodified during compression. + + It's allowed to update compression level anytime between blocks, + using LZ4_setCompressionLevel() (experimental). + + 'dst' buffer should be sized to handle worst case scenarios + (see LZ4_compressBound(), it ensures compression success). + In case of failure, the API does not guarantee recovery, + so the state _must_ be reset. + To ensure compression success + whenever `dst` buffer size cannot be made >= LZ4_compressBound(), + consider using LZ4_compress_HC_continue_destSize(). + + Whenever previous input blocks can't be preserved unmodified in-place during compression of next blocks, + it's possible to copy the last blocks into a more stable memory space, using LZ4_saveDictHC(). + Return value of LZ4_saveDictHC() is the size of dictionary effectively saved into 'safeBuffer' (<= 64 KB) + + After completing a streaming compression, + it's possible to start a new stream of blocks, using the same LZ4_streamHC_t state, + just by resetting it, using LZ4_resetStreamHC_fast(). +*/ + +LZ4LIB_API void LZ4_resetStreamHC_fast(LZ4_streamHC_t* streamHCPtr, int compressionLevel); /* v1.9.0+ */ +LZ4LIB_API int LZ4_loadDictHC (LZ4_streamHC_t* streamHCPtr, const char* dictionary, int dictSize); + +LZ4LIB_API int LZ4_compress_HC_continue (LZ4_streamHC_t* streamHCPtr, + const char* src, char* dst, + int srcSize, int maxDstSize); + +/*! LZ4_compress_HC_continue_destSize() : v1.9.0+ + * Similar to LZ4_compress_HC_continue(), + * but will read as much data as possible from `src` + * to fit into `targetDstSize` budget. + * Result is provided into 2 parts : + * @return : the number of bytes written into 'dst' (necessarily <= targetDstSize) + * or 0 if compression fails. + * `srcSizePtr` : on success, *srcSizePtr will be updated to indicate how much bytes were read from `src`. + * Note that this function may not consume the entire input. + */ +LZ4LIB_API int LZ4_compress_HC_continue_destSize(LZ4_streamHC_t* LZ4_streamHCPtr, + const char* src, char* dst, + int* srcSizePtr, int targetDstSize); + +LZ4LIB_API int LZ4_saveDictHC (LZ4_streamHC_t* streamHCPtr, char* safeBuffer, int maxDictSize); + + + +/*^********************************************** + * !!!!!! STATIC LINKING ONLY !!!!!! + ***********************************************/ + +/*-****************************************************************** + * PRIVATE DEFINITIONS : + * Do not use these definitions directly. + * They are merely exposed to allow static allocation of `LZ4_streamHC_t`. + * Declare an `LZ4_streamHC_t` directly, rather than any type below. + * Even then, only do so in the context of static linking, as definitions may change between versions. + ********************************************************************/ + +#define LZ4HC_DICTIONARY_LOGSIZE 16 +#define LZ4HC_MAXD (1<<LZ4HC_DICTIONARY_LOGSIZE) +#define LZ4HC_MAXD_MASK (LZ4HC_MAXD - 1) + +#define LZ4HC_HASH_LOG 15 +#define LZ4HC_HASHTABLESIZE (1 << LZ4HC_HASH_LOG) +#define LZ4HC_HASH_MASK (LZ4HC_HASHTABLESIZE - 1) + + +typedef struct LZ4HC_CCtx_internal LZ4HC_CCtx_internal; +struct LZ4HC_CCtx_internal +{ + LZ4_u32 hashTable[LZ4HC_HASHTABLESIZE]; + LZ4_u16 chainTable[LZ4HC_MAXD]; + const LZ4_byte* end; /* next block here to continue on current prefix */ + const LZ4_byte* base; /* All index relative to this position */ + const LZ4_byte* dictBase; /* alternate base for extDict */ + LZ4_u32 dictLimit; /* below that point, need extDict */ + LZ4_u32 lowLimit; /* below that point, no more dict */ + LZ4_u32 nextToUpdate; /* index from which to continue dictionary update */ + short compressionLevel; + LZ4_i8 favorDecSpeed; /* favor decompression speed if this flag set, + otherwise, favor compression ratio */ + LZ4_i8 dirty; /* stream has to be fully reset if this flag is set */ + const LZ4HC_CCtx_internal* dictCtx; +}; + + +/* Do not use these definitions directly ! + * Declare or allocate an LZ4_streamHC_t instead. + */ +#define LZ4_STREAMHCSIZE 262200 /* static size, for inter-version compatibility */ +#define LZ4_STREAMHCSIZE_VOIDP (LZ4_STREAMHCSIZE / sizeof(void*)) +union LZ4_streamHC_u { + void* table[LZ4_STREAMHCSIZE_VOIDP]; + LZ4HC_CCtx_internal internal_donotuse; +}; /* previously typedef'd to LZ4_streamHC_t */ + +/* LZ4_streamHC_t : + * This structure allows static allocation of LZ4 HC streaming state. + * This can be used to allocate statically, on state, or as part of a larger structure. + * + * Such state **must** be initialized using LZ4_initStreamHC() before first use. + * + * Note that invoking LZ4_initStreamHC() is not required when + * the state was created using LZ4_createStreamHC() (which is recommended). + * Using the normal builder, a newly created state is automatically initialized. + * + * Static allocation shall only be used in combination with static linking. + */ + +/* LZ4_initStreamHC() : v1.9.0+ + * Required before first use of a statically allocated LZ4_streamHC_t. + * Before v1.9.0 : use LZ4_resetStreamHC() instead + */ +LZ4LIB_API LZ4_streamHC_t* LZ4_initStreamHC (void* buffer, size_t size); + + +/*-************************************ +* Deprecated Functions +**************************************/ +/* see lz4.h LZ4_DISABLE_DEPRECATE_WARNINGS to turn off deprecation warnings */ + +/* deprecated compression functions */ +LZ4_DEPRECATED("use LZ4_compress_HC() instead") LZ4LIB_API int LZ4_compressHC (const char* source, char* dest, int inputSize); +LZ4_DEPRECATED("use LZ4_compress_HC() instead") LZ4LIB_API int LZ4_compressHC_limitedOutput (const char* source, char* dest, int inputSize, int maxOutputSize); +LZ4_DEPRECATED("use LZ4_compress_HC() instead") LZ4LIB_API int LZ4_compressHC2 (const char* source, char* dest, int inputSize, int compressionLevel); +LZ4_DEPRECATED("use LZ4_compress_HC() instead") LZ4LIB_API int LZ4_compressHC2_limitedOutput(const char* source, char* dest, int inputSize, int maxOutputSize, int compressionLevel); +LZ4_DEPRECATED("use LZ4_compress_HC_extStateHC() instead") LZ4LIB_API int LZ4_compressHC_withStateHC (void* state, const char* source, char* dest, int inputSize); +LZ4_DEPRECATED("use LZ4_compress_HC_extStateHC() instead") LZ4LIB_API int LZ4_compressHC_limitedOutput_withStateHC (void* state, const char* source, char* dest, int inputSize, int maxOutputSize); +LZ4_DEPRECATED("use LZ4_compress_HC_extStateHC() instead") LZ4LIB_API int LZ4_compressHC2_withStateHC (void* state, const char* source, char* dest, int inputSize, int compressionLevel); +LZ4_DEPRECATED("use LZ4_compress_HC_extStateHC() instead") LZ4LIB_API int LZ4_compressHC2_limitedOutput_withStateHC(void* state, const char* source, char* dest, int inputSize, int maxOutputSize, int compressionLevel); +LZ4_DEPRECATED("use LZ4_compress_HC_continue() instead") LZ4LIB_API int LZ4_compressHC_continue (LZ4_streamHC_t* LZ4_streamHCPtr, const char* source, char* dest, int inputSize); +LZ4_DEPRECATED("use LZ4_compress_HC_continue() instead") LZ4LIB_API int LZ4_compressHC_limitedOutput_continue (LZ4_streamHC_t* LZ4_streamHCPtr, const char* source, char* dest, int inputSize, int maxOutputSize); + +/* Obsolete streaming functions; degraded functionality; do not use! + * + * In order to perform streaming compression, these functions depended on data + * that is no longer tracked in the state. They have been preserved as well as + * possible: using them will still produce a correct output. However, use of + * LZ4_slideInputBufferHC() will truncate the history of the stream, rather + * than preserve a window-sized chunk of history. + */ +LZ4_DEPRECATED("use LZ4_createStreamHC() instead") LZ4LIB_API void* LZ4_createHC (const char* inputBuffer); +LZ4_DEPRECATED("use LZ4_saveDictHC() instead") LZ4LIB_API char* LZ4_slideInputBufferHC (void* LZ4HC_Data); +LZ4_DEPRECATED("use LZ4_freeStreamHC() instead") LZ4LIB_API int LZ4_freeHC (void* LZ4HC_Data); +LZ4_DEPRECATED("use LZ4_compress_HC_continue() instead") LZ4LIB_API int LZ4_compressHC2_continue (void* LZ4HC_Data, const char* source, char* dest, int inputSize, int compressionLevel); +LZ4_DEPRECATED("use LZ4_compress_HC_continue() instead") LZ4LIB_API int LZ4_compressHC2_limitedOutput_continue (void* LZ4HC_Data, const char* source, char* dest, int inputSize, int maxOutputSize, int compressionLevel); +LZ4_DEPRECATED("use LZ4_createStreamHC() instead") LZ4LIB_API int LZ4_sizeofStreamStateHC(void); +LZ4_DEPRECATED("use LZ4_initStreamHC() instead") LZ4LIB_API int LZ4_resetStreamStateHC(void* state, char* inputBuffer); + + +/* LZ4_resetStreamHC() is now replaced by LZ4_initStreamHC(). + * The intention is to emphasize the difference with LZ4_resetStreamHC_fast(), + * which is now the recommended function to start a new stream of blocks, + * but cannot be used to initialize a memory segment containing arbitrary garbage data. + * + * It is recommended to switch to LZ4_initStreamHC(). + * LZ4_resetStreamHC() will generate deprecation warnings in a future version. + */ +LZ4LIB_API void LZ4_resetStreamHC (LZ4_streamHC_t* streamHCPtr, int compressionLevel); + +} + +#endif /* LZ4_HC_H_19834876238432 */ + + +/*-************************************************** + * !!!!! STATIC LINKING ONLY !!!!! + * Following definitions are considered experimental. + * They should not be linked from DLL, + * as there is no guarantee of API stability yet. + * Prototypes will be promoted to "stable" status + * after successfull usage in real-life scenarios. + ***************************************************/ +#ifdef LZ4_HC_STATIC_LINKING_ONLY /* protection macro */ +#ifndef TRACY_LZ4_HC_SLO_098092834 +#define TRACY_LZ4_HC_SLO_098092834 + +#define LZ4_STATIC_LINKING_ONLY /* LZ4LIB_STATIC_API */ +#include "tracy_lz4.hpp" + +namespace tracy +{ + +/*! LZ4_setCompressionLevel() : v1.8.0+ (experimental) + * It's possible to change compression level + * between successive invocations of LZ4_compress_HC_continue*() + * for dynamic adaptation. + */ +LZ4LIB_STATIC_API void LZ4_setCompressionLevel( + LZ4_streamHC_t* LZ4_streamHCPtr, int compressionLevel); + +/*! LZ4_favorDecompressionSpeed() : v1.8.2+ (experimental) + * Opt. Parser will favor decompression speed over compression ratio. + * Only applicable to levels >= LZ4HC_CLEVEL_OPT_MIN. + */ +LZ4LIB_STATIC_API void LZ4_favorDecompressionSpeed( + LZ4_streamHC_t* LZ4_streamHCPtr, int favor); + +/*! LZ4_resetStreamHC_fast() : v1.9.0+ + * When an LZ4_streamHC_t is known to be in a internally coherent state, + * it can often be prepared for a new compression with almost no work, only + * sometimes falling back to the full, expensive reset that is always required + * when the stream is in an indeterminate state (i.e., the reset performed by + * LZ4_resetStreamHC()). + * + * LZ4_streamHCs are guaranteed to be in a valid state when: + * - returned from LZ4_createStreamHC() + * - reset by LZ4_resetStreamHC() + * - memset(stream, 0, sizeof(LZ4_streamHC_t)) + * - the stream was in a valid state and was reset by LZ4_resetStreamHC_fast() + * - the stream was in a valid state and was then used in any compression call + * that returned success + * - the stream was in an indeterminate state and was used in a compression + * call that fully reset the state (LZ4_compress_HC_extStateHC()) and that + * returned success + * + * Note: + * A stream that was last used in a compression call that returned an error + * may be passed to this function. However, it will be fully reset, which will + * clear any existing history and settings from the context. + */ +LZ4LIB_STATIC_API void LZ4_resetStreamHC_fast( + LZ4_streamHC_t* LZ4_streamHCPtr, int compressionLevel); + +/*! LZ4_compress_HC_extStateHC_fastReset() : + * A variant of LZ4_compress_HC_extStateHC(). + * + * Using this variant avoids an expensive initialization step. It is only safe + * to call if the state buffer is known to be correctly initialized already + * (see above comment on LZ4_resetStreamHC_fast() for a definition of + * "correctly initialized"). From a high level, the difference is that this + * function initializes the provided state with a call to + * LZ4_resetStreamHC_fast() while LZ4_compress_HC_extStateHC() starts with a + * call to LZ4_resetStreamHC(). + */ +LZ4LIB_STATIC_API int LZ4_compress_HC_extStateHC_fastReset ( + void* state, + const char* src, char* dst, + int srcSize, int dstCapacity, + int compressionLevel); + +/*! LZ4_attach_HC_dictionary() : + * This is an experimental API that allows for the efficient use of a + * static dictionary many times. + * + * Rather than re-loading the dictionary buffer into a working context before + * each compression, or copying a pre-loaded dictionary's LZ4_streamHC_t into a + * working LZ4_streamHC_t, this function introduces a no-copy setup mechanism, + * in which the working stream references the dictionary stream in-place. + * + * Several assumptions are made about the state of the dictionary stream. + * Currently, only streams which have been prepared by LZ4_loadDictHC() should + * be expected to work. + * + * Alternatively, the provided dictionary stream pointer may be NULL, in which + * case any existing dictionary stream is unset. + * + * A dictionary should only be attached to a stream without any history (i.e., + * a stream that has just been reset). + * + * The dictionary will remain attached to the working stream only for the + * current stream session. Calls to LZ4_resetStreamHC(_fast) will remove the + * dictionary context association from the working stream. The dictionary + * stream (and source buffer) must remain in-place / accessible / unchanged + * through the lifetime of the stream session. + */ +LZ4LIB_STATIC_API void LZ4_attach_HC_dictionary( + LZ4_streamHC_t *working_stream, + const LZ4_streamHC_t *dictionary_stream); + +} + +#endif /* LZ4_HC_SLO_098092834 */ +#endif /* LZ4_HC_STATIC_LINKING_ONLY */ diff --git a/3rdparty/tracy/tracy/common/unix-release.mk b/3rdparty/tracy/tracy/common/unix-release.mk new file mode 100644 index 0000000..07ac7d6 --- /dev/null +++ b/3rdparty/tracy/tracy/common/unix-release.mk @@ -0,0 +1,13 @@ +ARCH := $(shell uname -m) + +ifeq (0,$(shell $(CC) --version | grep clang && echo 1 || echo 0)) +CFLAGS += -s +else +LDFLAGS := -s +endif + +ifneq (,$(filter $(ARCH),aarch64 arm64)) +CFLAGS += -mcpu=native +else +CFLAGS += -march=native +endif diff --git a/3rdparty/tracy/tracy/common/unix.mk b/3rdparty/tracy/tracy/common/unix.mk new file mode 100644 index 0000000..0105093 --- /dev/null +++ b/3rdparty/tracy/tracy/common/unix.mk @@ -0,0 +1,82 @@ +# Common code needed by most Tracy Unix Makefiles. + +# Ensure these are simply-substituted variables, without changing their values. +LIBS := $(LIBS) + +# Tracy does not use TBB directly, but the implementation of parallel algorithms +# in some versions of libstdc++ depends on TBB. When it does, you must +# explicitly link against -ltbb. +# +# Some distributions have pgk-config files for TBB, others don't. +ifeq (0,$(shell pkg-config --libs tbb >/dev/null 2>&1; echo $$?)) + LIBS += $(shell pkg-config --libs tbb) +else ifeq (0,$(shell ld -ltbb -o /dev/null 2>/dev/null; echo $$?)) + LIBS += -ltbb +endif + +OBJDIRBASE := obj/$(BUILD) +OBJDIR := $(OBJDIRBASE)/o/o/o + +OBJ := $(addprefix $(OBJDIR)/,$(SRC:%.cpp=%.o)) +OBJ2 := $(addprefix $(OBJDIR)/,$(SRC2:%.c=%.o)) +OBJ3 := $(addprefix $(OBJDIR)/,$(SRC3:%.m=%.o)) +OBJ4 := $(addprefix $(OBJDIR)/,$(SRC4:%.S=%.o)) + +all: $(IMAGE) + +$(OBJDIR)/%.o: %.cpp + $(CXX) -c $(INCLUDES) $(CXXFLAGS) $(DEFINES) $< -o $@ + +$(OBJDIR)/%.d : %.cpp + @echo Resolving dependencies of $< + @mkdir -p $(@D) + @$(CXX) -MM $(INCLUDES) $(CXXFLAGS) $(DEFINES) $< > $@.$$$$; \ + sed 's,.*\.o[ :]*,$(OBJDIR)/$(<:.cpp=.o) $@ : ,g' < $@.$$$$ > $@; \ + rm -f $@.$$$$ + +$(OBJDIR)/%.o: %.c + $(CC) -c $(INCLUDES) $(CFLAGS) $(DEFINES) $< -o $@ + +$(OBJDIR)/%.d : %.c + @echo Resolving dependencies of $< + @mkdir -p $(@D) + @$(CC) -MM $(INCLUDES) $(CFLAGS) $(DEFINES) $< > $@.$$$$; \ + sed 's,.*\.o[ :]*,$(OBJDIR)/$(<:.c=.o) $@ : ,g' < $@.$$$$ > $@; \ + rm -f $@.$$$$ + +$(OBJDIR)/%.o: %.m + $(CC) -c $(INCLUDES) $(CFLAGS) $(DEFINES) $< -o $@ + +$(OBJDIR)/%.d : %.m + @echo Resolving dependencies of $< + @mkdir -p $(@D) + @$(CC) -MM $(INCLUDES) $(CFLAGS) $(DEFINES) $< > $@.$$$$; \ + sed 's,.*\.o[ :]*,$(OBJDIR)/$(<:.m=.o) $@ : ,g' < $@.$$$$ > $@; \ + rm -f $@.$$$$ + +$(OBJDIR)/%.o: %.S + $(CC) -c $(INCLUDES) $(CFLAGS) $(DEFINES) $< -o $@ + +$(OBJDIR)/%.d : %.S + @echo Resolving dependencies of $< + @mkdir -p $(@D) + @$(CC) -MM $(INCLUDES) $(CFLAGS) $(DEFINES) $< > $@.$$$$; \ + sed 's,.*\.o[ :]*,$(OBJDIR)/$(<:.m=.o) $@ : ,g' < $@.$$$$ > $@; \ + rm -f $@.$$$$ + +ifeq (yes,$(SHARED_LIBRARY)) +$(IMAGE): $(OBJ) $(OBJ2) $(OBJ4) + $(CXX) $(CXXFLAGS) $(LDFLAGS) $(DEFINES) $(OBJ) $(OBJ2) $(OBJ4) $(LIBS) -shared -o $@ +else +$(IMAGE): $(OBJ) $(OBJ2) $(OBJ3) $(OBJ4) + $(CXX) $(CXXFLAGS) $(LDFLAGS) $(DEFINES) $(OBJ) $(OBJ2) $(OBJ3) $(OBJ4) $(LIBS) -o $@ +endif + +ifneq "$(MAKECMDGOALS)" "clean" +-include $(addprefix $(OBJDIR)/,$(SRC:.cpp=.d)) $(addprefix $(OBJDIR)/,$(SRC2:.c=.d)) $(addprefix $(OBJDIR)/,$(SRC3:.m=.d)) $(addprefix $(OBJDIR)/,$(SRC4:.S=.d)) +endif + +clean: + rm -rf $(OBJDIRBASE) $(IMAGE)* + +.PHONY: clean all |