diff --git a/CMakeLists.txt b/CMakeLists.txt index e37ed34..3310e97 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -75,7 +75,13 @@ else() message(STATUS "Unsupported platform") endif() -target_include_directories(${PROJECT_NAME} PRIVATE "vendor") +target_include_directories(${PROJECT_NAME} PRIVATE vendor) +target_sources(${PROJECT_NAME} PRIVATE "vendor/SDL3_gfx/SDL3_framerate.c") +target_sources(${PROJECT_NAME} PRIVATE "vendor/SDL3_gfx/SDL3_gfxPrimitives.c") +target_sources(${PROJECT_NAME} PRIVATE "vendor/SDL3_gfx/SDL3_imageFilter.c") +target_sources(${PROJECT_NAME} PRIVATE "vendor/SDL3_gfx/SDL3_rotozoom.c") +target_sources(${PROJECT_NAME} PRIVATE "vendor/clay/clay_renderer_SDL3.c") + target_link_libraries(${PROJECT_NAME} PRIVATE SDL3::SDL3-static SDL3_ttf::SDL3_ttf-static diff --git a/main.c b/main.c index b39fbcb..9a0b3eb 100644 --- a/main.c +++ b/main.c @@ -6,7 +6,7 @@ #define CLAY_IMPLEMENTATION #include -#include +#include #include "ui/clay_video_demo.c" diff --git a/vendor/SDL3_gfx/SDL3_framerate.c b/vendor/SDL3_gfx/SDL3_framerate.c new file mode 100644 index 0000000..5bff7e2 --- /dev/null +++ b/vendor/SDL3_gfx/SDL3_framerate.c @@ -0,0 +1,189 @@ +/* + +SDL3_framerate.c: framerate manager + +Copyright (C) 2012-2014 Andreas Schiffler + +This software is provided 'as-is', without any express or implied +warranty. In no event will the authors be held liable for any damages +arising from the use of this software. + +Permission is granted to anyone to use this software for any purpose, +including commercial applications, and to alter it and redistribute it +freely, subject to the following restrictions: + +1. The origin of this software must not be misrepresented; you must not +claim that you wrote the original software. If you use this software +in a product, an acknowledgment in the product documentation would be +appreciated but is not required. + +2. Altered source versions must be plainly marked as such, and must not be +misrepresented as being the original software. + +3. This notice may not be removed or altered from any source +distribution. + +Andreas Schiffler -- aschiffler at ferzkopp dot net + +*/ + +#include "SDL3_framerate.h" + +/*! +\brief Internal wrapper to SDL_GetTicks that ensures a non-zero return value. + +\return The tick count. +*/ +Uint64 _getTicks() +{ + Uint64 ticks = SDL_GetTicks(); + + /* + * Since baseticks!=0 is used to track initialization + * we need to ensure that the tick count is always >0 + * since SDL_GetTicks may not have incremented yet and + * return 0 depending on the timing of the calls. + */ + if (ticks == 0) { + return 1; + } else { + return ticks; + } +} + +/*! +\brief Initialize the framerate manager. + +Initialize the framerate manager, set default framerate of 30Hz and +reset delay interpolation. + +\param manager Pointer to the framerate manager. +*/ +void SDL_initFramerate(FPSmanager * manager) +{ + /* + * Store some sane values + */ + manager->framecount = 0; + manager->rate = FPS_DEFAULT; + manager->rateticks = (1000.0f / (float) FPS_DEFAULT); + manager->baseticks = _getTicks(); + manager->lastticks = manager->baseticks; + +} + +/*! +\brief Set the framerate in Hz + +Sets a new framerate for the manager and reset delay interpolation. +Rate values must be between FPS_LOWER_LIMIT and FPS_UPPER_LIMIT inclusive to be accepted. + +\param manager Pointer to the framerate manager. +\param rate The new framerate in Hz (frames per second). + +\return 0 for sucess and -1 for error. +*/ +int SDL_setFramerate(FPSmanager * manager, Uint32 rate) +{ + if ((rate >= FPS_LOWER_LIMIT) && (rate <= FPS_UPPER_LIMIT)) { + manager->framecount = 0; + manager->rate = rate; + manager->rateticks = (1000.0f / (float) rate); + return (0); + } else { + return (-1); + } +} + +/*! +\brief Return the current target framerate in Hz + +Get the currently set framerate of the manager. + +\param manager Pointer to the framerate manager. + +\return Current framerate in Hz or -1 for error. +*/ +int SDL_getFramerate(FPSmanager * manager) +{ + if (manager == NULL) { + return (-1); + } else { + return ((int)manager->rate); + } +} + +/*! +\brief Return the current framecount. + +Get the current framecount from the framerate manager. +A frame is counted each time SDL_framerateDelay is called. + +\param manager Pointer to the framerate manager. + +\return Current frame count or -1 for error. +*/ +int SDL_getFramecount(FPSmanager * manager) +{ + if (manager == NULL) { + return (-1); + } else { + return ((int)manager->framecount); + } +} + +/*! +\brief Delay execution to maintain a constant framerate and calculate fps. + +Generate a delay to accomodate currently set framerate. Call once in the +graphics/rendering loop. If the computer cannot keep up with the rate (i.e. +drawing too slow), the delay is zero and the delay interpolation is reset. + +\param manager Pointer to the framerate manager. + +\return The time that passed since the last call to the function in ms. May return 0. +*/ +Uint64 SDL_framerateDelay(FPSmanager * manager) +{ + Uint64 current_ticks; + Uint64 target_ticks; + Uint64 the_delay; + Uint64 time_passed = 0; + + /* + * No manager, no delay + */ + if (manager == NULL) { + return 0; + } + + /* + * Initialize uninitialized manager + */ + if (manager->baseticks == 0) { + SDL_initFramerate(manager); + } + + /* + * Next frame + */ + manager->framecount++; + + /* + * Get/calc ticks + */ + current_ticks = _getTicks(); + time_passed = current_ticks - manager->lastticks; + manager->lastticks = current_ticks; + target_ticks = manager->baseticks + (Uint64) ((float) manager->framecount * manager->rateticks); + + if (current_ticks <= target_ticks) { + the_delay = target_ticks - current_ticks; + SDL_Delay(the_delay); + } else { + manager->framecount = 0; + manager->baseticks = _getTicks(); + } + + return time_passed; +} diff --git a/vendor/SDL3_gfx/SDL3_framerate.h b/vendor/SDL3_gfx/SDL3_framerate.h new file mode 100644 index 0000000..43bf8fc --- /dev/null +++ b/vendor/SDL3_gfx/SDL3_framerate.h @@ -0,0 +1,100 @@ +/* + +SDL3_framerate.h: framerate manager + +Copyright (C) 2012-2014 Andreas Schiffler + +This software is provided 'as-is', without any express or implied +warranty. In no event will the authors be held liable for any damages +arising from the use of this software. + +Permission is granted to anyone to use this software for any purpose, +including commercial applications, and to alter it and redistribute it +freely, subject to the following restrictions: + +1. The origin of this software must not be misrepresented; you must not +claim that you wrote the original software. If you use this software +in a product, an acknowledgment in the product documentation would be +appreciated but is not required. + +2. Altered source versions must be plainly marked as such, and must not be +misrepresented as being the original software. + +3. This notice may not be removed or altered from any source +distribution. + +Andreas Schiffler -- aschiffler at ferzkopp dot net + +*/ + +#ifndef _SDL3_framerate_h +#define _SDL3_framerate_h + +/* Set up for C function definitions, even when using C++ */ +#ifdef __cplusplus +extern "C" { +#endif + + /* --- */ + +#include + + /* --------- Definitions */ + + /*! + \brief Highest possible rate supported by framerate controller in Hz (1/s). + */ +#define FPS_UPPER_LIMIT 200 + + /*! + \brief Lowest possible rate supported by framerate controller in Hz (1/s). + */ +#define FPS_LOWER_LIMIT 1 + + /*! + \brief Default rate of framerate controller in Hz (1/s). + */ +#define FPS_DEFAULT 30 + + /*! + \brief Structure holding the state and timing information of the framerate controller. + */ + typedef struct { + Uint32 framecount; + float rateticks; + Uint64 baseticks; + Uint64 lastticks; + Uint32 rate; + } FPSmanager; + + /* ---- Function Prototypes */ + +#ifdef _MSC_VER +# if defined(DLL_EXPORT) && !defined(LIBSDL3_GFX_DLL_IMPORT) +# define SDL3_FRAMERATE_SCOPE __declspec(dllexport) +# else +# ifdef LIBSDL3_GFX_DLL_IMPORT +# define SDL3_FRAMERATE_SCOPE __declspec(dllimport) +# endif +# endif +#endif +#ifndef SDL3_FRAMERATE_SCOPE +# define SDL3_FRAMERATE_SCOPE extern +#endif + + /* Functions return 0 or value for sucess and -1 for error */ + + SDL3_FRAMERATE_SCOPE void SDL_initFramerate(FPSmanager * manager); + SDL3_FRAMERATE_SCOPE int SDL_setFramerate(FPSmanager * manager, Uint32 rate); + SDL3_FRAMERATE_SCOPE int SDL_getFramerate(FPSmanager * manager); + SDL3_FRAMERATE_SCOPE int SDL_getFramecount(FPSmanager * manager); + SDL3_FRAMERATE_SCOPE Uint64 SDL_framerateDelay(FPSmanager * manager); + + /* --- */ + + /* Ends C function definitions when using C++ */ +#ifdef __cplusplus +} +#endif + +#endif /* _SDL3_framerate_h */ diff --git a/vendor/SDL3_gfx/SDL3_gfxPrimitives.c b/vendor/SDL3_gfx/SDL3_gfxPrimitives.c new file mode 100644 index 0000000..c67d806 --- /dev/null +++ b/vendor/SDL3_gfx/SDL3_gfxPrimitives.c @@ -0,0 +1,3784 @@ +/* + +SDL3_gfxPrimitives.c: graphics primitives for SDL3 renderers + +Copyright (C) 2012-2014 Andreas Schiffler + +This software is provided 'as-is', without any express or implied +warranty. In no event will the authors be held liable for any damages +arising from the use of this software. + +Permission is granted to anyone to use this software for any purpose, +including commercial applications, and to alter it and redistribute it +freely, subject to the following restrictions: + +1. The origin of this software must not be misrepresented; you must not +claim that you wrote the original software. If you use this software +in a product, an acknowledgment in the product documentation would be +appreciated but is not required. + +2. Altered source versions must be plainly marked as such, and must not be +misrepresented as being the original software. + +3. This notice may not be removed or altered from any source +distribution. + +Andreas Schiffler -- aschiffler at ferzkopp dot net + +*/ + +#include +#include +#include +#include + +#include "SDL3_gfxPrimitives.h" +#include "SDL3_rotozoom.h" +#include "SDL3_gfxPrimitives_font.h" + +/* ---- Pixel */ + +/*! +\brief Draw pixel in currently set color. + +\param renderer The renderer to draw on. +\param x X (horizontal) coordinate of the pixel. +\param y Y (vertical) coordinate of the pixel. + +\returns Returns true on success, false on failure. +*/ +bool pixel(SDL_Renderer *renderer, Sint16 x, Sint16 y) +{ + return SDL_RenderPoint(renderer, x, y); +} + +/*! +\brief Draw pixel with blending enabled if a<255. + +\param renderer The renderer to draw on. +\param x X (horizontal) coordinate of the pixel. +\param y Y (vertical) coordinate of the pixel. +\param color The color value of the pixel to draw (0xRRGGBBAA). + +\returns Returns true on success, false on failure. +*/ +bool pixelColor(SDL_Renderer * renderer, Sint16 x, Sint16 y, Uint32 color) +{ + Uint8 *c = (Uint8 *)&color; + return pixelRGBA(renderer, x, y, c[0], c[1], c[2], c[3]); +} + +/*! +\brief Draw pixel with blending enabled if a<255. + +\param renderer The renderer to draw on. +\param x X (horizontal) coordinate of the pixel. +\param y Y (vertical) coordinate of the pixel. +\param r The red color value of the pixel to draw. +\param g The green color value of the pixel to draw. +\param b The blue color value of the pixel to draw. +\param a The alpha value of the pixel to draw. + +\returns Returns true on success, false on failure. +*/ +bool pixelRGBA(SDL_Renderer * renderer, Sint16 x, Sint16 y, Uint8 r, Uint8 g, Uint8 b, Uint8 a) +{ + bool result = true; + result &= SDL_SetRenderDrawBlendMode(renderer, (a == 255) ? SDL_BLENDMODE_NONE : SDL_BLENDMODE_BLEND); + result &= SDL_SetRenderDrawColor(renderer, r, g, b, a); + result &= SDL_RenderPoint(renderer, x, y); + return result; +} + +/*! +\brief Draw pixel with blending enabled and using alpha weight on color. + +\param renderer The renderer to draw on. +\param x The horizontal coordinate of the pixel. +\param y The vertical position of the pixel. +\param r The red color value of the pixel to draw. +\param g The green color value of the pixel to draw. +\param b The blue color value of the pixel to draw. +\param a The alpha value of the pixel to draw. +\param weight The weight multiplied into the alpha value of the pixel. + +\returns Returns true on success, false on failure. +*/ +bool pixelRGBAWeight(SDL_Renderer * renderer, Sint16 x, Sint16 y, Uint8 r, Uint8 g, Uint8 b, Uint8 a, Uint32 weight) +{ + /* + * Modify Alpha by weight + */ + Uint32 ax = a; + ax = ((ax * weight) >> 8); + if (ax > 255) { + a = 255; + } else { + a = (Uint8)(ax & 0x000000ff); + } + + return pixelRGBA(renderer, x, y, r, g, b, a); +} + +/* ---- Hline */ + +/*! +\brief Draw horizontal line in currently set color + +\param renderer The renderer to draw on. +\param x1 X coordinate of the first point (i.e. left) of the line. +\param x2 X coordinate of the second point (i.e. right) of the line. +\param y Y coordinate of the points of the line. + +\returns Returns true on success, false on failure. +*/ +bool hline(SDL_Renderer * renderer, Sint16 x1, Sint16 x2, Sint16 y) +{ + return SDL_RenderLine(renderer, x1, y, x2, y);; +} + + +/*! +\brief Draw horizontal line with blending. + +\param renderer The renderer to draw on. +\param x1 X coordinate of the first point (i.e. left) of the line. +\param x2 X coordinate of the second point (i.e. right) of the line. +\param y Y coordinate of the points of the line. +\param color The color value of the line to draw (0xRRGGBBAA). + +\returns Returns true on success, false on failure. +*/ +bool hlineColor(SDL_Renderer * renderer, Sint16 x1, Sint16 x2, Sint16 y, Uint32 color) +{ + Uint8 *c = (Uint8 *)&color; + return hlineRGBA(renderer, x1, x2, y, c[0], c[1], c[2], c[3]); +} + +/*! +\brief Draw horizontal line with blending. + +\param renderer The renderer to draw on. +\param x1 X coordinate of the first point (i.e. left) of the line. +\param x2 X coordinate of the second point (i.e. right) of the line. +\param y Y coordinate of the points of the line. +\param r The red value of the line to draw. +\param g The green value of the line to draw. +\param b The blue value of the line to draw. +\param a The alpha value of the line to draw. + +\returns Returns true on success, false on failure. +*/ +bool hlineRGBA(SDL_Renderer * renderer, Sint16 x1, Sint16 x2, Sint16 y, Uint8 r, Uint8 g, Uint8 b, Uint8 a) +{ + bool result = true; + result &= SDL_SetRenderDrawBlendMode(renderer, (a == 255) ? SDL_BLENDMODE_NONE : SDL_BLENDMODE_BLEND); + result &= SDL_SetRenderDrawColor(renderer, r, g, b, a); + result &= SDL_RenderLine(renderer, x1, y, x2, y); + return result; +} + +/* ---- Vline */ + +/*! +\brief Draw vertical line in currently set color + +\param renderer The renderer to draw on. +\param x X coordinate of points of the line. +\param y1 Y coordinate of the first point (i.e. top) of the line. +\param y2 Y coordinate of the second point (i.e. bottom) of the line. + +\returns Returns true on success, false on failure. +*/ +bool vline(SDL_Renderer * renderer, Sint16 x, Sint16 y1, Sint16 y2) +{ + return SDL_RenderLine(renderer, x, y1, x, y2);; +} + +/*! +\brief Draw vertical line with blending. + +\param renderer The renderer to draw on. +\param x X coordinate of the points of the line. +\param y1 Y coordinate of the first point (i.e. top) of the line. +\param y2 Y coordinate of the second point (i.e. bottom) of the line. +\param color The color value of the line to draw (0xRRGGBBAA). + +\returns Returns true on success, false on failure. +*/ +bool vlineColor(SDL_Renderer * renderer, Sint16 x, Sint16 y1, Sint16 y2, Uint32 color) +{ + Uint8 *c = (Uint8 *)&color; + return vlineRGBA(renderer, x, y1, y2, c[0], c[1], c[2], c[3]); +} + +/*! +\brief Draw vertical line with blending. + +\param renderer The renderer to draw on. +\param x X coordinate of the points of the line. +\param y1 Y coordinate of the first point (i.e. top) of the line. +\param y2 Y coordinate of the second point (i.e. bottom) of the line. +\param r The red value of the line to draw. +\param g The green value of the line to draw. +\param b The blue value of the line to draw. +\param a The alpha value of the line to draw. + +\returns Returns true on success, false on failure. +*/ +bool vlineRGBA(SDL_Renderer * renderer, Sint16 x, Sint16 y1, Sint16 y2, Uint8 r, Uint8 g, Uint8 b, Uint8 a) +{ + bool result = true; + result &= SDL_SetRenderDrawBlendMode(renderer, (a == 255) ? SDL_BLENDMODE_NONE : SDL_BLENDMODE_BLEND); + result &= SDL_SetRenderDrawColor(renderer, r, g, b, a); + result &= SDL_RenderLine(renderer, x, y1, x, y2); + return result; +} + +/* ---- Rectangle */ + +/*! +\brief Draw rectangle with blending. + +\param renderer The renderer to draw on. +\param x1 X coordinate of the first point (i.e. top right) of the rectangle. +\param y1 Y coordinate of the first point (i.e. top right) of the rectangle. +\param x2 X coordinate of the second point (i.e. bottom left) of the rectangle. +\param y2 Y coordinate of the second point (i.e. bottom left) of the rectangle. +\param color The color value of the rectangle to draw (0xRRGGBBAA). + +\returns Returns true on success, false on failure. +*/ +bool rectangleColor(SDL_Renderer * renderer, Sint16 x1, Sint16 y1, Sint16 x2, Sint16 y2, Uint32 color) +{ + Uint8 *c = (Uint8 *)&color; + return rectangleRGBA(renderer, x1, y1, x2, y2, c[0], c[1], c[2], c[3]); +} + +/*! +\brief Draw rectangle with blending. + +\param renderer The renderer to draw on. +\param x1 X coordinate of the first point (i.e. top right) of the rectangle. +\param y1 Y coordinate of the first point (i.e. top right) of the rectangle. +\param x2 X coordinate of the second point (i.e. bottom left) of the rectangle. +\param y2 Y coordinate of the second point (i.e. bottom left) of the rectangle. +\param r The red value of the rectangle to draw. +\param g The green value of the rectangle to draw. +\param b The blue value of the rectangle to draw. +\param a The alpha value of the rectangle to draw. + +\returns Returns true on success, false on failure. +*/ +bool rectangleRGBA(SDL_Renderer * renderer, Sint16 x1, Sint16 y1, Sint16 x2, Sint16 y2, Uint8 r, Uint8 g, Uint8 b, Uint8 a) +{ + bool result; + Sint16 tmp; + SDL_FRect rect; + + /* + * Test for special cases of straight lines or single point + */ + if (x1 == x2) { + if (y1 == y2) { + return (pixelRGBA(renderer, x1, y1, r, g, b, a)); + } else { + return (vlineRGBA(renderer, x1, y1, y2, r, g, b, a)); + } + } else { + if (y1 == y2) { + return (hlineRGBA(renderer, x1, x2, y1, r, g, b, a)); + } + } + + /* + * Swap x1, x2 if required + */ + if (x1 > x2) { + tmp = x1; + x1 = x2; + x2 = tmp; + } + + /* + * Swap y1, y2 if required + */ + if (y1 > y2) { + tmp = y1; + y1 = y2; + y2 = tmp; + } + + /* + * Create destination rect + */ + rect.x = x1; + rect.y = y1; + rect.w = x2 - x1; + rect.h = y2 - y1; + + /* + * Draw + */ + result = true; + result &= SDL_SetRenderDrawBlendMode(renderer, (a == 255) ? SDL_BLENDMODE_NONE : SDL_BLENDMODE_BLEND); + result &= SDL_SetRenderDrawColor(renderer, r, g, b, a); + result &= SDL_RenderRect(renderer, &rect); + return result; +} + +/* ---- Rounded Rectangle */ + +/*! +\brief Draw rounded-corner rectangle with blending. + +\param renderer The renderer to draw on. +\param x1 X coordinate of the first point (i.e. top right) of the rectangle. +\param y1 Y coordinate of the first point (i.e. top right) of the rectangle. +\param x2 X coordinate of the second point (i.e. bottom left) of the rectangle. +\param y2 Y coordinate of the second point (i.e. bottom left) of the rectangle. +\param rad The radius of the corner arc. +\param color The color value of the rectangle to draw (0xRRGGBBAA). + +\returns Returns true on success, false on failure. +*/ +bool roundedRectangleColor(SDL_Renderer * renderer, Sint16 x1, Sint16 y1, Sint16 x2, Sint16 y2, Sint16 rad, Uint32 color) +{ + Uint8 *c = (Uint8 *)&color; + return roundedRectangleRGBA(renderer, x1, y1, x2, y2, rad, c[0], c[1], c[2], c[3]); +} + +/*! +\brief Draw rounded-corner rectangle with blending. + +\param renderer The renderer to draw on. +\param x1 X coordinate of the first point (i.e. top right) of the rectangle. +\param y1 Y coordinate of the first point (i.e. top right) of the rectangle. +\param x2 X coordinate of the second point (i.e. bottom left) of the rectangle. +\param y2 Y coordinate of the second point (i.e. bottom left) of the rectangle. +\param rad The radius of the corner arc. +\param r The red value of the rectangle to draw. +\param g The green value of the rectangle to draw. +\param b The blue value of the rectangle to draw. +\param a The alpha value of the rectangle to draw. + +\returns Returns true on success, false on failure. +*/ +bool roundedRectangleRGBA(SDL_Renderer * renderer, Sint16 x1, Sint16 y1, Sint16 x2, Sint16 y2, Sint16 rad, Uint8 r, Uint8 g, Uint8 b, Uint8 a) +{ + int result = 0; + Sint16 tmp; + Sint16 w, h; + Sint16 xx1, xx2; + Sint16 yy1, yy2; + + /* + * Check renderer + */ + if (renderer == NULL) + { + return false; + } + + /* + * Check radius vor valid range + */ + if (rad < 0) { + return false; + } + + /* + * Special case - no rounding + */ + if (rad <= 1) { + return rectangleRGBA(renderer, x1, y1, x2, y2, r, g, b, a); + } + + /* + * Test for special cases of straight lines or single point + */ + if (x1 == x2) { + if (y1 == y2) { + return (pixelRGBA(renderer, x1, y1, r, g, b, a)); + } else { + return (vlineRGBA(renderer, x1, y1, y2, r, g, b, a)); + } + } else { + if (y1 == y2) { + return (hlineRGBA(renderer, x1, x2, y1, r, g, b, a)); + } + } + + /* + * Swap x1, x2 if required + */ + if (x1 > x2) { + tmp = x1; + x1 = x2; + x2 = tmp; + } + + /* + * Swap y1, y2 if required + */ + if (y1 > y2) { + tmp = y1; + y1 = y2; + y2 = tmp; + } + + /* + * Calculate width&height + */ + w = x2 - x1; + h = y2 - y1; + + /* + * Maybe adjust radius + */ + if ((rad * 2) > w) + { + rad = w / 2; + } + if ((rad * 2) > h) + { + rad = h / 2; + } + + /* + * Draw corners + */ + xx1 = x1 + rad; + xx2 = x2 - rad; + yy1 = y1 + rad; + yy2 = y2 - rad; + result &= arcRGBA(renderer, xx1, yy1, rad, 180, 270, r, g, b, a); + result &= arcRGBA(renderer, xx2, yy1, rad, 270, 360, r, g, b, a); + result &= arcRGBA(renderer, xx1, yy2, rad, 90, 180, r, g, b, a); + result &= arcRGBA(renderer, xx2, yy2, rad, 0, 90, r, g, b, a); + + /* + * Draw lines + */ + if (xx1 <= xx2) { + result &= hlineRGBA(renderer, xx1, xx2, y1, r, g, b, a); + result &= hlineRGBA(renderer, xx1, xx2, y2, r, g, b, a); + } + if (yy1 <= yy2) { + result &= vlineRGBA(renderer, x1, yy1, yy2, r, g, b, a); + result &= vlineRGBA(renderer, x2, yy1, yy2, r, g, b, a); + } + + return result; +} + +/* ---- Rounded Box */ + +/*! +\brief Draw rounded-corner box (filled rectangle) with blending. + +\param renderer The renderer to draw on. +\param x1 X coordinate of the first point (i.e. top right) of the box. +\param y1 Y coordinate of the first point (i.e. top right) of the box. +\param x2 X coordinate of the second point (i.e. bottom left) of the box. +\param y2 Y coordinate of the second point (i.e. bottom left) of the box. +\param rad The radius of the corner arcs of the box. +\param color The color value of the box to draw (0xRRGGBBAA). + +\returns Returns true on success, false on failure. +*/ +bool roundedBoxColor(SDL_Renderer * renderer, Sint16 x1, Sint16 y1, Sint16 x2, Sint16 y2, Sint16 rad, Uint32 color) +{ + Uint8 *c = (Uint8 *)&color; + return roundedBoxRGBA(renderer, x1, y1, x2, y2, rad, c[0], c[1], c[2], c[3]); +} + +/*! +\brief Draw rounded-corner box (filled rectangle) with blending. + +\param renderer The renderer to draw on. +\param x1 X coordinate of the first point (i.e. top right) of the box. +\param y1 Y coordinate of the first point (i.e. top right) of the box. +\param x2 X coordinate of the second point (i.e. bottom left) of the box. +\param y2 Y coordinate of the second point (i.e. bottom left) of the box. +\param rad The radius of the corner arcs of the box. +\param r The red value of the box to draw. +\param g The green value of the box to draw. +\param b The blue value of the box to draw. +\param a The alpha value of the box to draw. + +\returns Returns true on success, false on failure. +*/ +bool roundedBoxRGBA(SDL_Renderer * renderer, Sint16 x1, Sint16 y1, Sint16 x2, + Sint16 y2, Sint16 rad, Uint8 r, Uint8 g, Uint8 b, Uint8 a) +{ + bool result; + Sint16 w, h, r2, tmp; + Sint16 cx = 0; + Sint16 cy = rad; + Sint16 ocx = (Sint16) 0xffff; + Sint16 ocy = (Sint16) 0xffff; + Sint16 df = 1 - rad; + Sint16 d_e = 3; + Sint16 d_se = -2 * rad + 5; + Sint16 xpcx, xmcx, xpcy, xmcy; + Sint16 ypcy, ymcy, ypcx, ymcx; + Sint16 x, y, dx, dy; + + /* + * Check destination renderer + */ + if (renderer == NULL) + { + return false; + } + + /* + * Check radius vor valid range + */ + if (rad < 0) { + return false; + } + + /* + * Special case - no rounding + */ + if (rad <= 1) { + return boxRGBA(renderer, x1, y1, x2, y2, r, g, b, a); + } + + /* + * Test for special cases of straight lines or single point + */ + if (x1 == x2) { + if (y1 == y2) { + return (pixelRGBA(renderer, x1, y1, r, g, b, a)); + } else { + return (vlineRGBA(renderer, x1, y1, y2, r, g, b, a)); + } + } else { + if (y1 == y2) { + return (hlineRGBA(renderer, x1, x2, y1, r, g, b, a)); + } + } + + /* + * Swap x1, x2 if required + */ + if (x1 > x2) { + tmp = x1; + x1 = x2; + x2 = tmp; + } + + /* + * Swap y1, y2 if required + */ + if (y1 > y2) { + tmp = y1; + y1 = y2; + y2 = tmp; + } + + /* + * Calculate width&height + */ + w = x2 - x1 + 1; + h = y2 - y1 + 1; + + /* + * Maybe adjust radius + */ + r2 = rad + rad; + if (r2 > w) + { + rad = w / 2; + r2 = rad + rad; + } + if (r2 > h) + { + rad = h / 2; + } + + /* Setup filled circle drawing for corners */ + x = x1 + rad; + y = y1 + rad; + dx = x2 - x1 - rad - rad; + dy = y2 - y1 - rad - rad; + + /* + * Set color + */ + result = true; + result &= SDL_SetRenderDrawBlendMode(renderer, (a == 255) ? SDL_BLENDMODE_NONE : SDL_BLENDMODE_BLEND); + result &= SDL_SetRenderDrawColor(renderer, r, g, b, a); + + /* + * Draw corners + */ + do { + xpcx = x + cx; + xmcx = x - cx; + xpcy = x + cy; + xmcy = x - cy; + if (ocy != cy) { + if (cy > 0) { + ypcy = y + cy; + ymcy = y - cy; + result &= hline(renderer, xmcx, xpcx + dx, ypcy + dy); + result &= hline(renderer, xmcx, xpcx + dx, ymcy); + } else { + result &= hline(renderer, xmcx, xpcx + dx, y); + } + ocy = cy; + } + if (ocx != cx) { + if (cx != cy) { + if (cx > 0) { + ypcx = y + cx; + ymcx = y - cx; + result &= hline(renderer, xmcy, xpcy + dx, ymcx); + result &= hline(renderer, xmcy, xpcy + dx, ypcx + dy); + } else { + result &= hline(renderer, xmcy, xpcy + dx, y); + } + } + ocx = cx; + } + + /* + * Update + */ + if (df < 0) { + df += d_e; + d_e += 2; + d_se += 2; + } else { + df += d_se; + d_e += 2; + d_se += 4; + cy--; + } + cx++; + } while (cx <= cy); + + /* Inside */ + if (dx > 0 && dy > 0) { + result &= boxRGBA(renderer, x1, y1 + rad + 1, x2, y2 - rad, r, g, b, a); + } + + return (result); +} + +/* ---- Box */ + +/*! +\brief Draw box (filled rectangle) with blending. + +\param renderer The renderer to draw on. +\param x1 X coordinate of the first point (i.e. top right) of the box. +\param y1 Y coordinate of the first point (i.e. top right) of the box. +\param x2 X coordinate of the second point (i.e. bottom left) of the box. +\param y2 Y coordinate of the second point (i.e. bottom left) of the box. +\param color The color value of the box to draw (0xRRGGBBAA). + +\returns Returns true on success, false on failure. +*/ +bool boxColor(SDL_Renderer * renderer, Sint16 x1, Sint16 y1, Sint16 x2, Sint16 y2, Uint32 color) +{ + Uint8 *c = (Uint8 *)&color; + return boxRGBA(renderer, x1, y1, x2, y2, c[0], c[1], c[2], c[3]); +} + +/*! +\brief Draw box (filled rectangle) with blending. + +\param renderer The renderer to draw on. +\param x1 X coordinate of the first point (i.e. top right) of the box. +\param y1 Y coordinate of the first point (i.e. top right) of the box. +\param x2 X coordinate of the second point (i.e. bottom left) of the box. +\param y2 Y coordinate of the second point (i.e. bottom left) of the box. +\param r The red value of the box to draw. +\param g The green value of the box to draw. +\param b The blue value of the box to draw. +\param a The alpha value of the box to draw. + +\returns Returns true on success, false on failure. +*/ +bool boxRGBA(SDL_Renderer * renderer, Sint16 x1, Sint16 y1, Sint16 x2, Sint16 y2, Uint8 r, Uint8 g, Uint8 b, Uint8 a) +{ + bool result; + Sint16 tmp; + SDL_FRect rect; + + /* + * Test for special cases of straight lines or single point + */ + if (x1 == x2) { + if (y1 == y2) { + return (pixelRGBA(renderer, x1, y1, r, g, b, a)); + } else { + return (vlineRGBA(renderer, x1, y1, y2, r, g, b, a)); + } + } else { + if (y1 == y2) { + return (hlineRGBA(renderer, x1, x2, y1, r, g, b, a)); + } + } + + /* + * Swap x1, x2 if required + */ + if (x1 > x2) { + tmp = x1; + x1 = x2; + x2 = tmp; + } + + /* + * Swap y1, y2 if required + */ + if (y1 > y2) { + tmp = y1; + y1 = y2; + y2 = tmp; + } + + /* + * Create destination rect + */ + rect.x = x1; + rect.y = y1; + rect.w = x2 - x1 + 1; + rect.h = y2 - y1 + 1; + + /* + * Draw + */ + result = true; + result &= SDL_SetRenderDrawBlendMode(renderer, (a == 255) ? SDL_BLENDMODE_NONE : SDL_BLENDMODE_BLEND); + result &= SDL_SetRenderDrawColor(renderer, r, g, b, a); + result &= SDL_RenderFillRect(renderer, &rect); + return result; +} + +/* ----- Line */ + +/*! +\brief Draw line with alpha blending using the currently set color. + +\param renderer The renderer to draw on. +\param x1 X coordinate of the first point of the line. +\param y1 Y coordinate of the first point of the line. +\param x2 X coordinate of the second point of the line. +\param y2 Y coordinate of the second point of the line. + +\returns Returns true on success, false on failure. +*/ +bool line(SDL_Renderer * renderer, Sint16 x1, Sint16 y1, Sint16 x2, Sint16 y2) +{ + /* + * Draw + */ + return SDL_RenderLine(renderer, x1, y1, x2, y2); +} + +/*! +\brief Draw line with alpha blending. + +\param renderer The renderer to draw on. +\param x1 X coordinate of the first point of the line. +\param y1 Y coordinate of the first point of the line. +\param x2 X coordinate of the second point of the line. +\param y2 Y coordinate of the seond point of the line. +\param color The color value of the line to draw (0xRRGGBBAA). + +\returns Returns true on success, false on failure. +*/ +bool lineColor(SDL_Renderer * renderer, Sint16 x1, Sint16 y1, Sint16 x2, Sint16 y2, Uint32 color) +{ + Uint8 *c = (Uint8 *)&color; + return lineRGBA(renderer, x1, y1, x2, y2, c[0], c[1], c[2], c[3]); +} + +/*! +\brief Draw line with alpha blending. + +\param renderer The renderer to draw on. +\param x1 X coordinate of the first point of the line. +\param y1 Y coordinate of the first point of the line. +\param x2 X coordinate of the second point of the line. +\param y2 Y coordinate of the second point of the line. +\param r The red value of the line to draw. +\param g The green value of the line to draw. +\param b The blue value of the line to draw. +\param a The alpha value of the line to draw. + +\returns Returns true on success, false on failure. +*/ +bool lineRGBA(SDL_Renderer * renderer, Sint16 x1, Sint16 y1, Sint16 x2, Sint16 y2, Uint8 r, Uint8 g, Uint8 b, Uint8 a) +{ + /* + * Draw + */ + bool result = true; + result &= SDL_SetRenderDrawBlendMode(renderer, (a == 255) ? SDL_BLENDMODE_NONE : SDL_BLENDMODE_BLEND); + result &= SDL_SetRenderDrawColor(renderer, r, g, b, a); + result &= SDL_RenderLine(renderer, x1, y1, x2, y2); + return result; +} + +/* ---- AA Line */ + +#define AAlevels 256 +#define AAbits 8 + +/*! +\brief Internal function to draw anti-aliased line with alpha blending and endpoint control. + +This implementation of the Wu antialiasing code is based on Mike Abrash's +DDJ article which was reprinted as Chapter 42 of his Graphics Programming +Black Book, but has been optimized to work with SDL and utilizes 32-bit +fixed-point arithmetic by A. Schiffler. The endpoint control allows the +supression to draw the last pixel useful for rendering continous aa-lines +with alpha<255. + +\param renderer The renderer to draw on. +\param x1 X coordinate of the first point of the aa-line. +\param y1 Y coordinate of the first point of the aa-line. +\param x2 X coordinate of the second point of the aa-line. +\param y2 Y coordinate of the second point of the aa-line. +\param r The red value of the aa-line to draw. +\param g The green value of the aa-line to draw. +\param b The blue value of the aa-line to draw. +\param a The alpha value of the aa-line to draw. +\param draw_endpoint Flag indicating if the endpoint should be drawn; draw if non-zero. + +\returns Returns true on success, false on failure. +*/ +int _aalineRGBA(SDL_Renderer * renderer, Sint16 x1, Sint16 y1, Sint16 x2, Sint16 y2, Uint8 r, Uint8 g, Uint8 b, Uint8 a, int draw_endpoint) +{ + Sint32 xx0, yy0, xx1, yy1; + bool result; + Uint32 intshift, erracc, erradj; + Uint32 erracctmp, wgt, wgtcompmask; + int dx, dy, tmp, xdir, y0p1, x0pxdir; + + /* + * Keep on working with 32bit numbers + */ + xx0 = x1; + yy0 = y1; + xx1 = x2; + yy1 = y2; + + /* + * Reorder points to make dy positive + */ + if (yy0 > yy1) { + tmp = yy0; + yy0 = yy1; + yy1 = tmp; + tmp = xx0; + xx0 = xx1; + xx1 = tmp; + } + + /* + * Calculate distance + */ + dx = xx1 - xx0; + dy = yy1 - yy0; + + /* + * Adjust for negative dx and set xdir + */ + if (dx >= 0) { + xdir = 1; + } else { + xdir = -1; + dx = (-dx); + } + + /* + * Check for special cases + */ + if (dx == 0) { + /* + * Vertical line + */ + if (draw_endpoint) + { + return (vlineRGBA(renderer, x1, y1, y2, r, g, b, a)); + } else { + if (dy > 0) { + return (vlineRGBA(renderer, x1, yy0, yy0+dy, r, g, b, a)); + } else { + return (pixelRGBA(renderer, x1, y1, r, g, b, a)); + } + } + } else if (dy == 0) { + /* + * Horizontal line + */ + if (draw_endpoint) + { + return (hlineRGBA(renderer, x1, x2, y1, r, g, b, a)); + } else { + if (dx > 0) { + return (hlineRGBA(renderer, xx0, xx0+(xdir*dx), y1, r, g, b, a)); + } else { + return (pixelRGBA(renderer, x1, y1, r, g, b, a)); + } + } + } else if ((dx == dy) && (draw_endpoint)) { + /* + * Diagonal line (with endpoint) + */ + return (lineRGBA(renderer, x1, y1, x2, y2, r, g, b, a)); + } + + + /* + * Line is not horizontal, vertical or diagonal (with endpoint) + */ + result = true; + + /* + * Zero accumulator + */ + erracc = 0; + + /* + * # of bits by which to shift erracc to get intensity level + */ + intshift = 32 - AAbits; + + /* + * Mask used to flip all bits in an intensity weighting + */ + wgtcompmask = AAlevels - 1; + + /* + * Draw the initial pixel in the foreground color + */ + result &= pixelRGBA(renderer, x1, y1, r, g, b, a); + + /* + * x-major or y-major? + */ + if (dy > dx) { + + /* + * y-major. Calculate 16-bit fixed point fractional part of a pixel that + * X advances every time Y advances 1 pixel, truncating the result so that + * we won't overrun the endpoint along the X axis + */ + /* + * Not-so-portable version: erradj = ((Uint64)dx << 32) / (Uint64)dy; + */ + erradj = ((dx << 16) / dy) << 16; + + /* + * draw all pixels other than the first and last + */ + x0pxdir = xx0 + xdir; + while (--dy) { + erracctmp = erracc; + erracc += erradj; + if (erracc <= erracctmp) { + /* + * rollover in error accumulator, x coord advances + */ + xx0 = x0pxdir; + x0pxdir += xdir; + } + yy0++; /* y-major so always advance Y */ + + /* + * the AAbits most significant bits of erracc give us the intensity + * weighting for this pixel, and the complement of the weighting for + * the paired pixel. + */ + wgt = (erracc >> intshift) & 255; + result &= pixelRGBAWeight (renderer, xx0, yy0, r, g, b, a, 255 - wgt); + result &= pixelRGBAWeight (renderer, x0pxdir, yy0, r, g, b, a, wgt); + } + + } else { + + /* + * x-major line. Calculate 16-bit fixed-point fractional part of a pixel + * that Y advances each time X advances 1 pixel, truncating the result so + * that we won't overrun the endpoint along the X axis. + */ + /* + * Not-so-portable version: erradj = ((Uint64)dy << 32) / (Uint64)dx; + */ + erradj = ((dy << 16) / dx) << 16; + + /* + * draw all pixels other than the first and last + */ + y0p1 = yy0 + 1; + while (--dx) { + + erracctmp = erracc; + erracc += erradj; + if (erracc <= erracctmp) { + /* + * Accumulator turned over, advance y + */ + yy0 = y0p1; + y0p1++; + } + xx0 += xdir; /* x-major so always advance X */ + /* + * the AAbits most significant bits of erracc give us the intensity + * weighting for this pixel, and the complement of the weighting for + * the paired pixel. + */ + wgt = (erracc >> intshift) & 255; + result &= pixelRGBAWeight (renderer, xx0, yy0, r, g, b, a, 255 - wgt); + result &= pixelRGBAWeight (renderer, xx0, y0p1, r, g, b, a, wgt); + } + } + + /* + * Do we have to draw the endpoint + */ + if (draw_endpoint) { + /* + * Draw final pixel, always exactly intersected by the line and doesn't + * need to be weighted. + */ + result &= pixelRGBA (renderer, x2, y2, r, g, b, a); + } + + return (result); +} + +/*! +\brief Draw anti-aliased line with alpha blending. + +\param renderer The renderer to draw on. +\param x1 X coordinate of the first point of the aa-line. +\param y1 Y coordinate of the first point of the aa-line. +\param x2 X coordinate of the second point of the aa-line. +\param y2 Y coordinate of the second point of the aa-line. +\param color The color value of the aa-line to draw (0xRRGGBBAA). + +\returns Returns true on success, false on failure. +*/ +bool aalineColor(SDL_Renderer * renderer, Sint16 x1, Sint16 y1, Sint16 x2, Sint16 y2, Uint32 color) +{ + Uint8 *c = (Uint8 *)&color; + return _aalineRGBA(renderer, x1, y1, x2, y2, c[0], c[1], c[2], c[3], 1); +} + +/*! +\brief Draw anti-aliased line with alpha blending. + +\param renderer The renderer to draw on. +\param x1 X coordinate of the first point of the aa-line. +\param y1 Y coordinate of the first point of the aa-line. +\param x2 X coordinate of the second point of the aa-line. +\param y2 Y coordinate of the second point of the aa-line. +\param r The red value of the aa-line to draw. +\param g The green value of the aa-line to draw. +\param b The blue value of the aa-line to draw. +\param a The alpha value of the aa-line to draw. + +\returns Returns true on success, false on failure. +*/ +bool aalineRGBA(SDL_Renderer * renderer, Sint16 x1, Sint16 y1, Sint16 x2, Sint16 y2, Uint8 r, Uint8 g, Uint8 b, Uint8 a) +{ + return _aalineRGBA(renderer, x1, y1, x2, y2, r, g, b, a, 1); +} + +/* ----- Circle */ + +/*! +\brief Draw circle with blending. + +\param renderer The renderer to draw on. +\param x X coordinate of the center of the circle. +\param y Y coordinate of the center of the circle. +\param rad Radius in pixels of the circle. +\param color The color value of the circle to draw (0xRRGGBBAA). + +\returns Returns true on success, false on failure. +*/ +bool circleColor(SDL_Renderer * renderer, Sint16 x, Sint16 y, Sint16 rad, Uint32 color) +{ + Uint8 *c = (Uint8 *)&color; + return ellipseRGBA(renderer, x, y, rad, rad, c[0], c[1], c[2], c[3]); +} + +/*! +\brief Draw circle with blending. + +\param renderer The renderer to draw on. +\param x X coordinate of the center of the circle. +\param y Y coordinate of the center of the circle. +\param rad Radius in pixels of the circle. +\param r The red value of the circle to draw. +\param g The green value of the circle to draw. +\param b The blue value of the circle to draw. +\param a The alpha value of the circle to draw. + +\returns Returns true on success, false on failure. +*/ +bool circleRGBA(SDL_Renderer * renderer, Sint16 x, Sint16 y, Sint16 rad, Uint8 r, Uint8 g, Uint8 b, Uint8 a) +{ + return ellipseRGBA(renderer, x, y, rad, rad, r, g, b, a); +} + +/* ----- Arc */ + +/*! +\brief Arc with blending. + +\param renderer The renderer to draw on. +\param x X coordinate of the center of the arc. +\param y Y coordinate of the center of the arc. +\param rad Radius in pixels of the arc. +\param start Starting radius in degrees of the arc. 0 degrees is down, increasing counterclockwise. +\param end Ending radius in degrees of the arc. 0 degrees is down, increasing counterclockwise. +\param color The color value of the arc to draw (0xRRGGBBAA). + +\returns Returns true on success, false on failure. +*/ +bool arcColor(SDL_Renderer * renderer, Sint16 x, Sint16 y, Sint16 rad, Sint16 start, Sint16 end, Uint32 color) +{ + Uint8 *c = (Uint8 *)&color; + return arcRGBA(renderer, x, y, rad, start, end, c[0], c[1], c[2], c[3]); +} + +/*! +\brief Arc with blending. + +\param renderer The renderer to draw on. +\param x X coordinate of the center of the arc. +\param y Y coordinate of the center of the arc. +\param rad Radius in pixels of the arc. +\param start Starting radius in degrees of the arc. 0 degrees is down, increasing counterclockwise. +\param end Ending radius in degrees of the arc. 0 degrees is down, increasing counterclockwise. +\param r The red value of the arc to draw. +\param g The green value of the arc to draw. +\param b The blue value of the arc to draw. +\param a The alpha value of the arc to draw. + +\returns Returns true on success, false on failure. +*/ +/* TODO: rewrite algorithm; arc endpoints are not always drawn */ +bool arcRGBA(SDL_Renderer * renderer, Sint16 x, Sint16 y, Sint16 rad, Sint16 start, Sint16 end, Uint8 r, Uint8 g, Uint8 b, Uint8 a) +{ + bool result; + Sint16 cx = 0; + Sint16 cy = rad; + Sint16 df = 1 - rad; + Sint16 d_e = 3; + Sint16 d_se = -2 * rad + 5; + Sint16 xpcx, xmcx, xpcy, xmcy; + Sint16 ypcy, ymcy, ypcx, ymcx; + Uint8 drawoct; + int startoct, endoct, oct, stopval_start = 0, stopval_end = 0; + double dstart, dend, temp = 0.; + + /* + * Sanity check radius + */ + if (rad < 0) { + return (false); + } + + /* + * Special case for rad=0 - draw a point + */ + if (rad == 0) { + return (pixelRGBA(renderer, x, y, r, g, b, a)); + } + + /* + Octant labeling + + \ 5 | 6 / + \ | / + 4 \ | / 7 + \|/ + ------+------ +x + /|\ + 3 / | \ 0 + / | \ + / 2 | 1 \ + +y + + Initially reset bitmask to 0x00000000 + the set whether or not to keep drawing a given octant. + For example: 0x00111100 means we're drawing in octants 2-5 + */ + drawoct = 0; + + /* + * Fixup angles + */ + start %= 360; + end %= 360; + /* 0 <= start & end < 360; note that sometimes start > end - if so, arc goes back through 0. */ + while (start < 0) start += 360; + while (end < 0) end += 360; + start %= 360; + end %= 360; + + /* now, we find which octants we're drawing in. */ + startoct = start / 45; + endoct = end / 45; + oct = startoct - 1; + + /* stopval_start, stopval_end; what values of cx to stop at. */ + do { + oct = (oct + 1) % 8; + + if (oct == startoct) { + /* need to compute stopval_start for this octant. Look at picture above if this is unclear */ + dstart = (double)start; + switch (oct) + { + case 0: + case 3: + temp = sin(dstart * M_PI / 180.); + break; + case 1: + case 6: + temp = cos(dstart * M_PI / 180.); + break; + case 2: + case 5: + temp = -cos(dstart * M_PI / 180.); + break; + case 4: + case 7: + temp = -sin(dstart * M_PI / 180.); + break; + } + temp *= rad; + stopval_start = (int)temp; + + /* + This isn't arbitrary, but requires graph paper to explain well. + The basic idea is that we're always changing drawoct after we draw, so we + stop immediately after we render the last sensible pixel at x = ((int)temp). + and whether to draw in this octant initially + */ + if (oct % 2) drawoct |= (1 << oct); /* this is basically like saying drawoct[oct] = true, if drawoct were a bool array */ + else drawoct &= 255 - (1 << oct); /* this is basically like saying drawoct[oct] = false */ + } + if (oct == endoct) { + /* need to compute stopval_end for this octant */ + dend = (double)end; + switch (oct) + { + case 0: + case 3: + temp = sin(dend * M_PI / 180); + break; + case 1: + case 6: + temp = cos(dend * M_PI / 180); + break; + case 2: + case 5: + temp = -cos(dend * M_PI / 180); + break; + case 4: + case 7: + temp = -sin(dend * M_PI / 180); + break; + } + temp *= rad; + stopval_end = (int)temp; + + /* and whether to draw in this octant initially */ + if (startoct == endoct) { + /* note: we start drawing, stop, then start again in this case */ + /* otherwise: we only draw in this octant, so initialize it to false, it will get set back to true */ + if (start > end) { + /* unfortunately, if we're in the same octant and need to draw over the whole circle, */ + /* we need to set the rest to true, because the while loop will end at the bottom. */ + drawoct = 255; + } else { + drawoct &= 255 - (1 << oct); + } + } + else if (oct % 2) drawoct &= 255 - (1 << oct); + else drawoct |= (1 << oct); + } else if (oct != startoct) { /* already verified that it's != endoct */ + drawoct |= (1 << oct); /* draw this entire segment */ + } + } while (oct != endoct); + + /* so now we have what octants to draw and when to draw them. all that's left is the actual raster code. */ + + /* + * Set color + */ + result = true; + result &= SDL_SetRenderDrawBlendMode(renderer, (a == 255) ? SDL_BLENDMODE_NONE : SDL_BLENDMODE_BLEND); + result &= SDL_SetRenderDrawColor(renderer, r, g, b, a); + + /* + * Draw arc + */ + do { + ypcy = y + cy; + ymcy = y - cy; + if (cx > 0) { + xpcx = x + cx; + xmcx = x - cx; + + /* always check if we're drawing a certain octant before adding a pixel to that octant. */ + if (drawoct & 4) result &= pixel(renderer, xmcx, ypcy); + if (drawoct & 2) result &= pixel(renderer, xpcx, ypcy); + if (drawoct & 32) result &= pixel(renderer, xmcx, ymcy); + if (drawoct & 64) result &= pixel(renderer, xpcx, ymcy); + } else { + if (drawoct & 96) result &= pixel(renderer, x, ymcy); + if (drawoct & 6) result &= pixel(renderer, x, ypcy); + } + + xpcy = x + cy; + xmcy = x - cy; + if (cx > 0 && cx != cy) { + ypcx = y + cx; + ymcx = y - cx; + if (drawoct & 8) result &= pixel(renderer, xmcy, ypcx); + if (drawoct & 1) result &= pixel(renderer, xpcy, ypcx); + if (drawoct & 16) result &= pixel(renderer, xmcy, ymcx); + if (drawoct & 128) result &= pixel(renderer, xpcy, ymcx); + } else if (cx == 0) { + if (drawoct & 24) result &= pixel(renderer, xmcy, y); + if (drawoct & 129) result &= pixel(renderer, xpcy, y); + } + + /* + * Update whether we're drawing an octant + */ + if (stopval_start == cx) { + /* works like an on-off switch. */ + /* This is just in case start & end are in the same octant. */ + if (drawoct & (1 << startoct)) drawoct &= 255 - (1 << startoct); + else drawoct |= (1 << startoct); + } + if (stopval_end == cx) { + if (drawoct & (1 << endoct)) drawoct &= 255 - (1 << endoct); + else drawoct |= (1 << endoct); + } + + /* + * Update pixels + */ + if (df < 0) { + df += d_e; + d_e += 2; + d_se += 2; + } else { + df += d_se; + d_e += 2; + d_se += 4; + cy--; + } + cx++; + } while (cx <= cy); + + return (result); +} + +/* ----- AA Circle */ + +/*! +\brief Draw anti-aliased circle with blending. + +\param renderer The renderer to draw on. +\param x X coordinate of the center of the aa-circle. +\param y Y coordinate of the center of the aa-circle. +\param rad Radius in pixels of the aa-circle. +\param color The color value of the aa-circle to draw (0xRRGGBBAA). + +\returns Returns true on success, false on failure. +*/ +bool aacircleColor(SDL_Renderer * renderer, Sint16 x, Sint16 y, Sint16 rad, Uint32 color) +{ + Uint8 *c = (Uint8 *)&color; + return aaellipseRGBA(renderer, x, y, rad, rad, c[0], c[1], c[2], c[3]); +} + +/*! +\brief Draw anti-aliased circle with blending. + +\param renderer The renderer to draw on. +\param x X coordinate of the center of the aa-circle. +\param y Y coordinate of the center of the aa-circle. +\param rad Radius in pixels of the aa-circle. +\param r The red value of the aa-circle to draw. +\param g The green value of the aa-circle to draw. +\param b The blue value of the aa-circle to draw. +\param a The alpha value of the aa-circle to draw. + +\returns Returns true on success, false on failure. +*/ +bool aacircleRGBA(SDL_Renderer * renderer, Sint16 x, Sint16 y, Sint16 rad, Uint8 r, Uint8 g, Uint8 b, Uint8 a) +{ + /* + * Draw + */ + return aaellipseRGBA(renderer, x, y, rad, rad, r, g, b, a); +} + +/* ----- Ellipse */ + +/*! +\brief Internal function to draw pixels or lines in 4 quadrants. + +\param renderer The renderer to draw on. +\param x X coordinate of the center of the quadrant. +\param y Y coordinate of the center of the quadrant. +\param dx X offset in pixels of the corners of the quadrant. +\param dy Y offset in pixels of the corners of the quadrant. +\param f Flag indicating if the quadrant should be filled (1) or not (0). + +\returns Returns true on success, false on failure. +*/ +int _drawQuadrants(SDL_Renderer * renderer, Sint16 x, Sint16 y, Sint16 dx, Sint16 dy, Sint32 f) +{ + bool result = true; + Sint16 xpdx, xmdx; + Sint16 ypdy, ymdy; + + if (dx == 0) { + if (dy == 0) { + result &= pixel(renderer, x, y); + } else { + ypdy = y + dy; + ymdy = y - dy; + if (f) { + result &= vline(renderer, x, ymdy, ypdy); + } else { + result &= pixel(renderer, x, ypdy); + result &= pixel(renderer, x, ymdy); + } + } + } else { + xpdx = x + dx; + xmdx = x - dx; + ypdy = y + dy; + ymdy = y - dy; + if (f) { + result &= vline(renderer, xpdx, ymdy, ypdy); + result &= vline(renderer, xmdx, ymdy, ypdy); + } else { + result &= pixel(renderer, xpdx, ypdy); + result &= pixel(renderer, xmdx, ypdy); + result &= pixel(renderer, xpdx, ymdy); + result &= pixel(renderer, xmdx, ymdy); + } + } + + return result; +} + +/*! +\brief Internal function to draw ellipse or filled ellipse with blending. + +\param renderer The renderer to draw on. +\param x X coordinate of the center of the ellipse. +\param y Y coordinate of the center of the ellipse. +\param rx Horizontal radius in pixels of the ellipse. +\param ry Vertical radius in pixels of the ellipse. +\param r The red value of the ellipse to draw. +\param g The green value of the ellipse to draw. +\param b The blue value of the ellipse to draw. +\param a The alpha value of the ellipse to draw. +\param f Flag indicating if the ellipse should be filled (1) or not (0). + +\returns Returns true on success, false on failure. +*/ +#define DEFAULT_ELLIPSE_OVERSCAN 4 +bool _ellipseRGBA(SDL_Renderer * renderer, Sint16 x, Sint16 y, Sint16 rx, Sint16 ry, Uint8 r, Uint8 g, Uint8 b, Uint8 a, Sint32 f) +{ + bool result; + Sint32 rxi, ryi; + Sint32 rx2, ry2, rx22, ry22; + Sint32 error; + Sint32 curX, curY, curXp1, curYm1; + Sint32 scrX, scrY, oldX, oldY; + Sint32 deltaX, deltaY; + Sint32 ellipseOverscan; + + /* + * Sanity check radii + */ + if ((rx < 0) || (ry < 0)) { + return (false); + } + + /* + * Set color + */ + result = true; + result &= SDL_SetRenderDrawBlendMode(renderer, (a == 255) ? SDL_BLENDMODE_NONE : SDL_BLENDMODE_BLEND); + result &= SDL_SetRenderDrawColor(renderer, r, g, b, a); + + /* + * Special cases for rx=0 and/or ry=0: draw a hline/vline/pixel + */ + if (rx == 0) { + if (ry == 0) { + return (pixel(renderer, x, y)); + } else { + return (vline(renderer, x, y - ry, y + ry)); + } + } else { + if (ry == 0) { + return (hline(renderer, x - rx, x + rx, y)); + } + } + + /* + * Adjust overscan + */ + rxi = rx; + ryi = ry; + if (rxi >= 512 || ryi >= 512) + { + ellipseOverscan = DEFAULT_ELLIPSE_OVERSCAN / 4; + } + else if (rxi >= 256 || ryi >= 256) + { + ellipseOverscan = DEFAULT_ELLIPSE_OVERSCAN / 2; + } + else + { + ellipseOverscan = DEFAULT_ELLIPSE_OVERSCAN / 1; + } + + /* + * Top/bottom center points. + */ + oldX = scrX = 0; + oldY = scrY = ryi; + result &= _drawQuadrants(renderer, x, y, 0, ry, f); + + /* Midpoint ellipse algorithm with overdraw */ + rxi *= ellipseOverscan; + ryi *= ellipseOverscan; + rx2 = rxi * rxi; + rx22 = rx2 + rx2; + ry2 = ryi * ryi; + ry22 = ry2 + ry2; + curX = 0; + curY = ryi; + deltaX = 0; + deltaY = rx22 * curY; + + /* Points in segment 1 */ + error = ry2 - rx2 * ryi + rx2 / 4; + while (deltaX <= deltaY) + { + curX++; + deltaX += ry22; + + error += deltaX + ry2; + if (error >= 0) + { + curY--; + deltaY -= rx22; + error -= deltaY; + } + + scrX = curX / ellipseOverscan; + scrY = curY / ellipseOverscan; + if ((scrX != oldX && scrY == oldY) || (scrX != oldX && scrY != oldY)) { + result &= _drawQuadrants(renderer, x, y, scrX, scrY, f); + oldX = scrX; + oldY = scrY; + } + } + + /* Points in segment 2 */ + if (curY > 0) + { + curXp1 = curX + 1; + curYm1 = curY - 1; + error = ry2 * curX * curXp1 + ((ry2 + 3) / 4) + rx2 * curYm1 * curYm1 - rx2 * ry2; + while (curY > 0) + { + curY--; + deltaY -= rx22; + + error += rx2; + error -= deltaY; + + if (error <= 0) + { + curX++; + deltaX += ry22; + error += deltaX; + } + + scrX = curX / ellipseOverscan; + scrY = curY / ellipseOverscan; + if ((scrX != oldX && scrY == oldY) || (scrX != oldX && scrY != oldY)) { + oldY--; + for (;oldY >= scrY; oldY--) { + result &= _drawQuadrants(renderer, x, y, scrX, oldY, f); + /* prevent overdraw */ + if (f) { + oldY = scrY - 1; + } + } + oldX = scrX; + oldY = scrY; + } + } + + /* Remaining points in vertical */ + if (!f) { + oldY--; + for (;oldY >= 0; oldY--) { + result &= _drawQuadrants(renderer, x, y, scrX, oldY, f); + } + } + } + + return (result); +} + +/*! +\brief Draw ellipse with blending. + +\param renderer The renderer to draw on. +\param x X coordinate of the center of the ellipse. +\param y Y coordinate of the center of the ellipse. +\param rx Horizontal radius in pixels of the ellipse. +\param ry Vertical radius in pixels of the ellipse. +\param color The color value of the ellipse to draw (0xRRGGBBAA). + +\returns Returns true on success, false on failure. +*/ +bool ellipseColor(SDL_Renderer * renderer, Sint16 x, Sint16 y, Sint16 rx, Sint16 ry, Uint32 color) +{ + Uint8 *c = (Uint8 *)&color; + return _ellipseRGBA(renderer, x, y, rx, ry, c[0], c[1], c[2], c[3], 0); +} + +/*! +\brief Draw ellipse with blending. + +\param renderer The renderer to draw on. +\param x X coordinate of the center of the ellipse. +\param y Y coordinate of the center of the ellipse. +\param rx Horizontal radius in pixels of the ellipse. +\param ry Vertical radius in pixels of the ellipse. +\param r The red value of the ellipse to draw. +\param g The green value of the ellipse to draw. +\param b The blue value of the ellipse to draw. +\param a The alpha value of the ellipse to draw. + +\returns Returns true on success, false on failure. +*/ +bool ellipseRGBA(SDL_Renderer * renderer, Sint16 x, Sint16 y, Sint16 rx, Sint16 ry, Uint8 r, Uint8 g, Uint8 b, Uint8 a) +{ + return _ellipseRGBA(renderer, x, y, rx, ry, r, g, b, a, 0); +} + +/* ----- Filled Circle */ + +/*! +\brief Draw filled circle with blending. + +\param renderer The renderer to draw on. +\param x X coordinate of the center of the filled circle. +\param y Y coordinate of the center of the filled circle. +\param rad Radius in pixels of the filled circle. +\param color The color value of the filled circle to draw (0xRRGGBBAA). + +\returns Returns true on success, false on failure. +*/ +bool filledCircleColor(SDL_Renderer * renderer, Sint16 x, Sint16 y, Sint16 rad, Uint32 color) +{ + Uint8 *c = (Uint8 *)&color; + return filledEllipseRGBA(renderer, x, y, rad, rad, c[0], c[1], c[2], c[3]); +} + +/*! +\brief Draw filled circle with blending. + +\param renderer The renderer to draw on. +\param x X coordinate of the center of the filled circle. +\param y Y coordinate of the center of the filled circle. +\param rad Radius in pixels of the filled circle. +\param r The red value of the filled circle to draw. +\param g The green value of the filled circle to draw. +\param b The blue value of the filled circle to draw. +\param a The alpha value of the filled circle to draw. + +\returns Returns true on success, false on failure. +*/ +bool filledCircleRGBA(SDL_Renderer * renderer, Sint16 x, Sint16 y, Sint16 rad, Uint8 r, Uint8 g, Uint8 b, Uint8 a) +{ + return _ellipseRGBA(renderer, x, y, rad, rad, r, g ,b, a, 1); +} + + +/* ----- AA Ellipse */ + +/* Windows targets do not have lrint, so provide a local inline version */ +#if defined(_MSC_VER) && _MSC_VER < 1920 +/* Detect 64bit and use intrinsic version */ +#ifdef _M_X64 +#include +static __inline long + lrint(float f) +{ + return _mm_cvtss_si32(_mm_load_ss(&f)); +} +#elif defined(_M_IX86) +__inline long int + lrint (double flt) +{ + int intgr; + _asm + { + fld flt + fistp intgr + }; + return intgr; +} +#elif defined(_M_ARM) +#include +#pragma warning(push) +#pragma warning(disable: 4716) +__declspec(naked) long int + lrint (double flt) +{ + __emit(0xEC410B10); // fmdrr d0, r0, r1 + __emit(0xEEBD0B40); // ftosid s0, d0 + __emit(0xEE100A10); // fmrs r0, s0 + __emit(0xE12FFF1E); // bx lr +} +#pragma warning(pop) +#else +#error lrint needed for MSVC on non X86/AMD64/ARM targets. +#endif +#endif + +/*! +\brief Draw anti-aliased ellipse with blending. + +\param renderer The renderer to draw on. +\param x X coordinate of the center of the aa-ellipse. +\param y Y coordinate of the center of the aa-ellipse. +\param rx Horizontal radius in pixels of the aa-ellipse. +\param ry Vertical radius in pixels of the aa-ellipse. +\param color The color value of the aa-ellipse to draw (0xRRGGBBAA). + +\returns Returns true on success, false on failure. +*/ +bool aaellipseColor(SDL_Renderer * renderer, Sint16 x, Sint16 y, Sint16 rx, Sint16 ry, Uint32 color) +{ + Uint8 *c = (Uint8 *)&color; + return aaellipseRGBA(renderer, x, y, rx, ry, c[0], c[1], c[2], c[3]); +} + +/*! +\brief Draw anti-aliased ellipse with blending. + +\param renderer The renderer to draw on. +\param x X coordinate of the center of the aa-ellipse. +\param y Y coordinate of the center of the aa-ellipse. +\param rx Horizontal radius in pixels of the aa-ellipse. +\param ry Vertical radius in pixels of the aa-ellipse. +\param r The red value of the aa-ellipse to draw. +\param g The green value of the aa-ellipse to draw. +\param b The blue value of the aa-ellipse to draw. +\param a The alpha value of the aa-ellipse to draw. + +\returns Returns true on success, false on failure. +*/ +bool aaellipseRGBA(SDL_Renderer * renderer, Sint16 x, Sint16 y, Sint16 rx, Sint16 ry, Uint8 r, Uint8 g, Uint8 b, Uint8 a) +{ + bool result; + int i; + int a2, b2, ds, dt, dxt, t, s, d; + Sint16 xp, yp, xs, ys, dyt, od, xx, yy, xc2, yc2; + float cp; + double sab; + Uint8 weight, iweight; + + /* + * Sanity check radii + */ + if ((rx < 0) || (ry < 0)) { + return (false); + } + + /* + * Special cases for rx=0 and/or ry=0: draw a hline/vline/pixel + */ + if (rx == 0) { + if (ry == 0) { + return (pixelRGBA(renderer, x, y, r, g, b, a)); + } else { + return (vlineRGBA(renderer, x, y - ry, y + ry, r, g, b, a)); + } + } else { + if (ry == 0) { + return (hlineRGBA(renderer, x - rx, x + rx, y, r, g, b, a)); + } + } + + /* Variable setup */ + a2 = rx * rx; + b2 = ry * ry; + + ds = 2 * a2; + dt = 2 * b2; + + xc2 = 2 * x; + yc2 = 2 * y; + + sab = sqrt((double)(a2 + b2)); + od = (Sint16)lrint(sab*0.01) + 1; /* introduce some overdraw */ + dxt = (Sint16)lrint((double)a2 / sab) + od; + + t = 0; + s = -2 * a2 * ry; + d = 0; + + xp = x; + yp = y - ry; + + /* Draw */ + result = true; + result &= SDL_SetRenderDrawBlendMode(renderer, (a == 255) ? SDL_BLENDMODE_NONE : SDL_BLENDMODE_BLEND); + + /* "End points" */ + result &= pixelRGBA(renderer, xp, yp, r, g, b, a); + result &= pixelRGBA(renderer, xc2 - xp, yp, r, g, b, a); + result &= pixelRGBA(renderer, xp, yc2 - yp, r, g, b, a); + result &= pixelRGBA(renderer, xc2 - xp, yc2 - yp, r, g, b, a); + + for (i = 1; i <= dxt; i++) { + xp--; + d += t - b2; + + if (d >= 0) + ys = yp - 1; + else if ((d - s - a2) > 0) { + if ((2 * d - s - a2) >= 0) + ys = yp + 1; + else { + ys = yp; + yp++; + d -= s + a2; + s += ds; + } + } else { + yp++; + ys = yp + 1; + d -= s + a2; + s += ds; + } + + t -= dt; + + /* Calculate alpha */ + if (s != 0) { + cp = (float) abs(d) / (float) abs(s); + if (cp > 1.0) { + cp = 1.0; + } + } else { + cp = 1.0; + } + + /* Calculate weights */ + weight = (Uint8) (cp * 255); + iweight = 255 - weight; + + /* Upper half */ + xx = xc2 - xp; + result &= pixelRGBAWeight(renderer, xp, yp, r, g, b, a, iweight); + result &= pixelRGBAWeight(renderer, xx, yp, r, g, b, a, iweight); + + result &= pixelRGBAWeight(renderer, xp, ys, r, g, b, a, weight); + result &= pixelRGBAWeight(renderer, xx, ys, r, g, b, a, weight); + + /* Lower half */ + yy = yc2 - yp; + result &= pixelRGBAWeight(renderer, xp, yy, r, g, b, a, iweight); + result &= pixelRGBAWeight(renderer, xx, yy, r, g, b, a, iweight); + + yy = yc2 - ys; + result &= pixelRGBAWeight(renderer, xp, yy, r, g, b, a, weight); + result &= pixelRGBAWeight(renderer, xx, yy, r, g, b, a, weight); + } + + /* Replaces original approximation code dyt = abs(yp - yc); */ + dyt = (Sint16)lrint((double)b2 / sab ) + od; + + for (i = 1; i <= dyt; i++) { + yp++; + d -= s + a2; + + if (d <= 0) + xs = xp + 1; + else if ((d + t - b2) < 0) { + if ((2 * d + t - b2) <= 0) + xs = xp - 1; + else { + xs = xp; + xp--; + d += t - b2; + t -= dt; + } + } else { + xp--; + xs = xp - 1; + d += t - b2; + t -= dt; + } + + s += ds; + + /* Calculate alpha */ + if (t != 0) { + cp = (float) abs(d) / (float) abs(t); + if (cp > 1.0) { + cp = 1.0; + } + } else { + cp = 1.0; + } + + /* Calculate weight */ + weight = (Uint8) (cp * 255); + iweight = 255 - weight; + + /* Left half */ + xx = xc2 - xp; + yy = yc2 - yp; + result &= pixelRGBAWeight(renderer, xp, yp, r, g, b, a, iweight); + result &= pixelRGBAWeight(renderer, xx, yp, r, g, b, a, iweight); + + result &= pixelRGBAWeight(renderer, xp, yy, r, g, b, a, iweight); + result &= pixelRGBAWeight(renderer, xx, yy, r, g, b, a, iweight); + + /* Right half */ + xx = xc2 - xs; + result &= pixelRGBAWeight(renderer, xs, yp, r, g, b, a, weight); + result &= pixelRGBAWeight(renderer, xx, yp, r, g, b, a, weight); + + result &= pixelRGBAWeight(renderer, xs, yy, r, g, b, a, weight); + result &= pixelRGBAWeight(renderer, xx, yy, r, g, b, a, weight); + } + + return (result); +} + +/* ---- Filled Ellipse */ + +/*! +\brief Draw filled ellipse with blending. + +\param renderer The renderer to draw on. +\param x X coordinate of the center of the filled ellipse. +\param y Y coordinate of the center of the filled ellipse. +\param rx Horizontal radius in pixels of the filled ellipse. +\param ry Vertical radius in pixels of the filled ellipse. +\param color The color value of the filled ellipse to draw (0xRRGGBBAA). + +\returns Returns true on success, false on failure. +*/ +bool filledEllipseColor(SDL_Renderer * renderer, Sint16 x, Sint16 y, Sint16 rx, Sint16 ry, Uint32 color) +{ + Uint8 *c = (Uint8 *)&color; + return _ellipseRGBA(renderer, x, y, rx, ry, c[0], c[1], c[2], c[3], 1); +} + +/*! +\brief Draw filled ellipse with blending. + +\param renderer The renderer to draw on. +\param x X coordinate of the center of the filled ellipse. +\param y Y coordinate of the center of the filled ellipse. +\param rx Horizontal radius in pixels of the filled ellipse. +\param ry Vertical radius in pixels of the filled ellipse. +\param r The red value of the filled ellipse to draw. +\param g The green value of the filled ellipse to draw. +\param b The blue value of the filled ellipse to draw. +\param a The alpha value of the filled ellipse to draw. + +\returns Returns true on success, false on failure. +*/ +bool filledEllipseRGBA(SDL_Renderer * renderer, Sint16 x, Sint16 y, Sint16 rx, Sint16 ry, Uint8 r, Uint8 g, Uint8 b, Uint8 a) +{ + return _ellipseRGBA(renderer, x, y, rx, ry, r, g, b, a, 1); +} + +/* ----- Pie */ + +/*! +\brief Internal float (low-speed) pie-calc implementation by drawing polygons. + +Note: Determines vertex array and uses polygon or filledPolygon drawing routines to render. + +\param renderer The renderer to draw on. +\param x X coordinate of the center of the pie. +\param y Y coordinate of the center of the pie. +\param rad Radius in pixels of the pie. +\param start Starting radius in degrees of the pie. +\param end Ending radius in degrees of the pie. +\param r The red value of the pie to draw. +\param g The green value of the pie to draw. +\param b The blue value of the pie to draw. +\param a The alpha value of the pie to draw. +\param filled Flag indicating if the pie should be filled (=1) or not (=0). + +\returns Returns true on success, false on failure. +*/ +/* TODO: rewrite algorithm; pie is not always accurate */ +bool _pieRGBA(SDL_Renderer * renderer, Sint16 x, Sint16 y, Sint16 rad, Sint16 start, Sint16 end, Uint8 r, Uint8 g, Uint8 b, Uint8 a, Uint8 filled) +{ + bool result; + double angle, start_angle, end_angle; + double deltaAngle; + double dr; + int numpoints, i; + Sint16 *vx, *vy; + + /* + * Sanity check radii + */ + if (rad < 0) { + return (false); + } + + /* + * Fixup angles + */ + start = start % 360; + end = end % 360; + + /* + * Special case for rad=0 - draw a point + */ + if (rad == 0) { + return (pixelRGBA(renderer, x, y, r, g, b, a)); + } + + /* + * Variable setup + */ + dr = (double) rad; + deltaAngle = 3.0 / dr; + start_angle = (double) start *(2.0 * M_PI / 360.0); + end_angle = (double) end *(2.0 * M_PI / 360.0); + if (start > end) { + end_angle += (2.0 * M_PI); + } + + /* We will always have at least 2 points */ + numpoints = 2; + + /* Count points (rather than calculating it) */ + angle = start_angle; + while (angle < end_angle) { + angle += deltaAngle; + numpoints++; + } + + /* Allocate combined vertex array */ + vx = vy = (Sint16 *) malloc(2 * sizeof(Uint16) * numpoints); + if (vx == NULL) { + return (false); + } + + /* Update point to start of vy */ + vy += numpoints; + + /* Center */ + vx[0] = x; + vy[0] = y; + + /* First vertex */ + angle = start_angle; + vx[1] = x + (int) (dr * cos(angle)); + vy[1] = y + (int) (dr * sin(angle)); + + if (numpoints<3) + { + result = lineRGBA(renderer, vx[0], vy[0], vx[1], vy[1], r, g, b, a); + } + else + { + /* Calculate other vertices */ + i = 2; + angle = start_angle; + while (angle < end_angle) { + angle += deltaAngle; + if (angle>end_angle) + { + angle = end_angle; + } + vx[i] = x + (int) (dr * cos(angle)); + vy[i] = y + (int) (dr * sin(angle)); + i++; + } + + /* Draw */ + if (filled) { + result = filledPolygonRGBA(renderer, vx, vy, numpoints, r, g, b, a); + } else { + result = polygonRGBA(renderer, vx, vy, numpoints, r, g, b, a); + } + } + + /* Free combined vertex array */ + free(vx); + + return (result); +} + +/*! +\brief Draw pie (outline) with alpha blending. + +\param renderer The renderer to draw on. +\param x X coordinate of the center of the pie. +\param y Y coordinate of the center of the pie. +\param rad Radius in pixels of the pie. +\param start Starting radius in degrees of the pie. +\param end Ending radius in degrees of the pie. +\param color The color value of the pie to draw (0xRRGGBBAA). + +\returns Returns true on success, false on failure. +*/ +bool pieColor(SDL_Renderer * renderer, Sint16 x, Sint16 y, Sint16 rad, + Sint16 start, Sint16 end, Uint32 color) +{ + Uint8 *c = (Uint8 *)&color; + return _pieRGBA(renderer, x, y, rad, start, end, c[0], c[1], c[2], c[3], 0); +} + +/*! +\brief Draw pie (outline) with alpha blending. + +\param renderer The renderer to draw on. +\param x X coordinate of the center of the pie. +\param y Y coordinate of the center of the pie. +\param rad Radius in pixels of the pie. +\param start Starting radius in degrees of the pie. +\param end Ending radius in degrees of the pie. +\param r The red value of the pie to draw. +\param g The green value of the pie to draw. +\param b The blue value of the pie to draw. +\param a The alpha value of the pie to draw. + +\returns Returns true on success, false on failure. +*/ +bool pieRGBA(SDL_Renderer * renderer, Sint16 x, Sint16 y, Sint16 rad, + Sint16 start, Sint16 end, Uint8 r, Uint8 g, Uint8 b, Uint8 a) +{ + return _pieRGBA(renderer, x, y, rad, start, end, r, g, b, a, 0); +} + +/*! +\brief Draw filled pie with alpha blending. + +\param renderer The renderer to draw on. +\param x X coordinate of the center of the filled pie. +\param y Y coordinate of the center of the filled pie. +\param rad Radius in pixels of the filled pie. +\param start Starting radius in degrees of the filled pie. +\param end Ending radius in degrees of the filled pie. +\param color The color value of the filled pie to draw (0xRRGGBBAA). + +\returns Returns true on success, false on failure. +*/ +bool filledPieColor(SDL_Renderer * renderer, Sint16 x, Sint16 y, Sint16 rad, Sint16 start, Sint16 end, Uint32 color) +{ + Uint8 *c = (Uint8 *)&color; + return _pieRGBA(renderer, x, y, rad, start, end, c[0], c[1], c[2], c[3], 1); +} + +/*! +\brief Draw filled pie with alpha blending. + +\param renderer The renderer to draw on. +\param x X coordinate of the center of the filled pie. +\param y Y coordinate of the center of the filled pie. +\param rad Radius in pixels of the filled pie. +\param start Starting radius in degrees of the filled pie. +\param end Ending radius in degrees of the filled pie. +\param r The red value of the filled pie to draw. +\param g The green value of the filled pie to draw. +\param b The blue value of the filled pie to draw. +\param a The alpha value of the filled pie to draw. + +\returns Returns true on success, false on failure. +*/ +bool filledPieRGBA(SDL_Renderer * renderer, Sint16 x, Sint16 y, Sint16 rad, + Sint16 start, Sint16 end, Uint8 r, Uint8 g, Uint8 b, Uint8 a) +{ + return _pieRGBA(renderer, x, y, rad, start, end, r, g, b, a, 1); +} + +/* ------ Trigon */ + +/*! +\brief Draw trigon (triangle outline) with alpha blending. + +Note: Creates vertex array and uses polygon routine to render. + +\param renderer The renderer to draw on. +\param x1 X coordinate of the first point of the trigon. +\param y1 Y coordinate of the first point of the trigon. +\param x2 X coordinate of the second point of the trigon. +\param y2 Y coordinate of the second point of the trigon. +\param x3 X coordinate of the third point of the trigon. +\param y3 Y coordinate of the third point of the trigon. +\param color The color value of the trigon to draw (0xRRGGBBAA). + +\returns Returns true on success, false on failure. +*/ +bool trigonColor(SDL_Renderer * renderer, Sint16 x1, Sint16 y1, Sint16 x2, Sint16 y2, Sint16 x3, Sint16 y3, Uint32 color) +{ + Sint16 vx[3]; + Sint16 vy[3]; + + vx[0]=x1; + vx[1]=x2; + vx[2]=x3; + vy[0]=y1; + vy[1]=y2; + vy[2]=y3; + + return(polygonColor(renderer,vx,vy,3,color)); +} + +/*! +\brief Draw trigon (triangle outline) with alpha blending. + +\param renderer The renderer to draw on. +\param x1 X coordinate of the first point of the trigon. +\param y1 Y coordinate of the first point of the trigon. +\param x2 X coordinate of the second point of the trigon. +\param y2 Y coordinate of the second point of the trigon. +\param x3 X coordinate of the third point of the trigon. +\param y3 Y coordinate of the third point of the trigon. +\param r The red value of the trigon to draw. +\param g The green value of the trigon to draw. +\param b The blue value of the trigon to draw. +\param a The alpha value of the trigon to draw. + +\returns Returns true on success, false on failure. +*/ +bool trigonRGBA(SDL_Renderer * renderer, Sint16 x1, Sint16 y1, Sint16 x2, Sint16 y2, Sint16 x3, Sint16 y3, + Uint8 r, Uint8 g, Uint8 b, Uint8 a) +{ + Sint16 vx[3]; + Sint16 vy[3]; + + vx[0]=x1; + vx[1]=x2; + vx[2]=x3; + vy[0]=y1; + vy[1]=y2; + vy[2]=y3; + + return(polygonRGBA(renderer,vx,vy,3,r,g,b,a)); +} + +/* ------ AA-Trigon */ + +/*! +\brief Draw anti-aliased trigon (triangle outline) with alpha blending. + +Note: Creates vertex array and uses aapolygon routine to render. + +\param renderer The renderer to draw on. +\param x1 X coordinate of the first point of the aa-trigon. +\param y1 Y coordinate of the first point of the aa-trigon. +\param x2 X coordinate of the second point of the aa-trigon. +\param y2 Y coordinate of the second point of the aa-trigon. +\param x3 X coordinate of the third point of the aa-trigon. +\param y3 Y coordinate of the third point of the aa-trigon. +\param color The color value of the aa-trigon to draw (0xRRGGBBAA). + +\returns Returns true on success, false on failure. +*/ +bool aatrigonColor(SDL_Renderer * renderer, Sint16 x1, Sint16 y1, Sint16 x2, Sint16 y2, Sint16 x3, Sint16 y3, Uint32 color) +{ + Sint16 vx[3]; + Sint16 vy[3]; + + vx[0]=x1; + vx[1]=x2; + vx[2]=x3; + vy[0]=y1; + vy[1]=y2; + vy[2]=y3; + + return(aapolygonColor(renderer,vx,vy,3,color)); +} + +/*! +\brief Draw anti-aliased trigon (triangle outline) with alpha blending. + +\param renderer The renderer to draw on. +\param x1 X coordinate of the first point of the aa-trigon. +\param y1 Y coordinate of the first point of the aa-trigon. +\param x2 X coordinate of the second point of the aa-trigon. +\param y2 Y coordinate of the second point of the aa-trigon. +\param x3 X coordinate of the third point of the aa-trigon. +\param y3 Y coordinate of the third point of the aa-trigon. +\param r The red value of the aa-trigon to draw. +\param g The green value of the aa-trigon to draw. +\param b The blue value of the aa-trigon to draw. +\param a The alpha value of the aa-trigon to draw. + +\returns Returns true on success, false on failure. +*/ +bool aatrigonRGBA(SDL_Renderer * renderer, Sint16 x1, Sint16 y1, Sint16 x2, Sint16 y2, Sint16 x3, Sint16 y3, + Uint8 r, Uint8 g, Uint8 b, Uint8 a) +{ + Sint16 vx[3]; + Sint16 vy[3]; + + vx[0]=x1; + vx[1]=x2; + vx[2]=x3; + vy[0]=y1; + vy[1]=y2; + vy[2]=y3; + + return(aapolygonRGBA(renderer,vx,vy,3,r,g,b,a)); +} + +/* ------ Filled Trigon */ + +/*! +\brief Draw filled trigon (triangle) with alpha blending. + +Note: Creates vertex array and uses aapolygon routine to render. + +\param renderer The renderer to draw on. +\param x1 X coordinate of the first point of the filled trigon. +\param y1 Y coordinate of the first point of the filled trigon. +\param x2 X coordinate of the second point of the filled trigon. +\param y2 Y coordinate of the second point of the filled trigon. +\param x3 X coordinate of the third point of the filled trigon. +\param y3 Y coordinate of the third point of the filled trigon. +\param color The color value of the filled trigon to draw (0xRRGGBBAA). + +\returns Returns true on success, false on failure. +*/ +bool filledTrigonColor(SDL_Renderer * renderer, Sint16 x1, Sint16 y1, Sint16 x2, Sint16 y2, Sint16 x3, Sint16 y3, Uint32 color) +{ + Sint16 vx[3]; + Sint16 vy[3]; + + vx[0]=x1; + vx[1]=x2; + vx[2]=x3; + vy[0]=y1; + vy[1]=y2; + vy[2]=y3; + + return(filledPolygonColor(renderer,vx,vy,3,color)); +} + +/*! +\brief Draw filled trigon (triangle) with alpha blending. + +Note: Creates vertex array and uses aapolygon routine to render. + +\param renderer The renderer to draw on. +\param x1 X coordinate of the first point of the filled trigon. +\param y1 Y coordinate of the first point of the filled trigon. +\param x2 X coordinate of the second point of the filled trigon. +\param y2 Y coordinate of the second point of the filled trigon. +\param x3 X coordinate of the third point of the filled trigon. +\param y3 Y coordinate of the third point of the filled trigon. +\param r The red value of the filled trigon to draw. +\param g The green value of the filled trigon to draw. +\param b The blue value of the filled trigon to draw. +\param a The alpha value of the filled trigon to draw. + +\returns Returns true on success, false on failure. +*/ +bool filledTrigonRGBA(SDL_Renderer * renderer, Sint16 x1, Sint16 y1, Sint16 x2, Sint16 y2, Sint16 x3, Sint16 y3, + Uint8 r, Uint8 g, Uint8 b, Uint8 a) +{ + Sint16 vx[3]; + Sint16 vy[3]; + + vx[0]=x1; + vx[1]=x2; + vx[2]=x3; + vy[0]=y1; + vy[1]=y2; + vy[2]=y3; + + return(filledPolygonRGBA(renderer,vx,vy,3,r,g,b,a)); +} + +/* ---- Polygon */ + +/*! +\brief Draw polygon with alpha blending. + +\param renderer The renderer to draw on. +\param vx Vertex array containing X coordinates of the points of the polygon. +\param vy Vertex array containing Y coordinates of the points of the polygon. +\param n Number of points in the vertex array. Minimum number is 3. +\param color The color value of the polygon to draw (0xRRGGBBAA). + +\returns Returns true on success, false on failure. +*/ +bool polygonColor(SDL_Renderer * renderer, const Sint16 * vx, const Sint16 * vy, int n, Uint32 color) +{ + Uint8 *c = (Uint8 *)&color; + return polygonRGBA(renderer, vx, vy, n, c[0], c[1], c[2], c[3]); +} + +/*! +\brief Draw polygon with the currently set color and blend mode. + +\param renderer The renderer to draw on. +\param vx Vertex array containing X coordinates of the points of the polygon. +\param vy Vertex array containing Y coordinates of the points of the polygon. +\param n Number of points in the vertex array. Minimum number is 3. + +\returns Returns true on success, false on failure. +*/ +bool polygon(SDL_Renderer * renderer, const Sint16 * vx, const Sint16 * vy, int n) +{ + /* + * Draw + */ + bool result = true; + int i, nn; + SDL_FPoint* points; + + /* + * Vertex array NULL check + */ + if (vx == NULL) { + return (false); + } + if (vy == NULL) { + return (false); + } + + /* + * Sanity check + */ + if (n < 3) { + return (false); + } + + /* + * Create array of points + */ + nn = n + 1; + points = (SDL_FPoint*)malloc(sizeof(SDL_FPoint) * nn); + if (points == NULL) + { + return false; + } + for (i=0; ib. +*/ +int _gfxPrimitivesCompareInt(const void *a, const void *b) +{ + return (*(const int *) a) - (*(const int *) b); +} + +/*! +\brief Global vertex array to use if optional parameters are not given in filledPolygonMT calls. + +Note: Used for non-multithreaded (default) operation of filledPolygonMT. +*/ +static int *gfxPrimitivesPolyIntsGlobal = NULL; + +/*! +\brief Flag indicating if global vertex array was already allocated. + +Note: Used for non-multithreaded (default) operation of filledPolygonMT. +*/ +static int gfxPrimitivesPolyAllocatedGlobal = 0; + +/*! +\brief Draw filled polygon with alpha blending (multi-threaded capable). + +Note: The last two parameters are optional; but are required for multithreaded operation. + +\param renderer The renderer to draw on. +\param vx Vertex array containing X coordinates of the points of the filled polygon. +\param vy Vertex array containing Y coordinates of the points of the filled polygon. +\param n Number of points in the vertex array. Minimum number is 3. +\param r The red value of the filled polygon to draw. +\param g The green value of the filled polygon to draw. +\param b The blue value of the filled polygon to draw. +\param a The alpha value of the filled polygon to draw. +\param polyInts Preallocated, temporary vertex array used for sorting vertices. Required for multithreaded operation; set to NULL otherwise. +\param polyAllocated Flag indicating if temporary vertex array was allocated. Required for multithreaded operation; set to NULL otherwise. + +\returns Returns true on success, false on failure. +*/ +int filledPolygonRGBAMT(SDL_Renderer * renderer, const Sint16 * vx, const Sint16 * vy, int n, Uint8 r, Uint8 g, Uint8 b, Uint8 a, int **polyInts, int *polyAllocated) +{ + bool result; + int i; + int y, xa, xb; + int miny, maxy; + int x1, y1; + int x2, y2; + int ind1, ind2; + int ints; + int *gfxPrimitivesPolyInts = NULL; + int *gfxPrimitivesPolyIntsNew = NULL; + int gfxPrimitivesPolyAllocated = 0; + + /* + * Vertex array NULL check + */ + if (vx == NULL) { + return (false); + } + if (vy == NULL) { + return (false); + } + + /* + * Sanity check number of edges + */ + if (n < 3) { + return false; + } + + /* + * Map polygon cache + */ + if ((polyInts==NULL) || (polyAllocated==NULL)) { + /* Use global cache */ + gfxPrimitivesPolyInts = gfxPrimitivesPolyIntsGlobal; + gfxPrimitivesPolyAllocated = gfxPrimitivesPolyAllocatedGlobal; + } else { + /* Use local cache */ + gfxPrimitivesPolyInts = *polyInts; + gfxPrimitivesPolyAllocated = *polyAllocated; + } + + /* + * Allocate temp array, only grow array + */ + if (!gfxPrimitivesPolyAllocated) { + gfxPrimitivesPolyInts = (int *) malloc(sizeof(int) * n); + gfxPrimitivesPolyAllocated = n; + } else { + if (gfxPrimitivesPolyAllocated < n) { + gfxPrimitivesPolyIntsNew = (int *) realloc(gfxPrimitivesPolyInts, sizeof(int) * n); + if (!gfxPrimitivesPolyIntsNew) { + if (!gfxPrimitivesPolyInts) { + free(gfxPrimitivesPolyInts); + gfxPrimitivesPolyInts = NULL; + } + gfxPrimitivesPolyAllocated = 0; + } else { + gfxPrimitivesPolyInts = gfxPrimitivesPolyIntsNew; + gfxPrimitivesPolyAllocated = n; + } + } + } + + /* + * Check temp array + */ + if (gfxPrimitivesPolyInts==NULL) { + gfxPrimitivesPolyAllocated = 0; + } + + /* + * Update cache variables + */ + if ((polyInts==NULL) || (polyAllocated==NULL)) { + gfxPrimitivesPolyIntsGlobal = gfxPrimitivesPolyInts; + gfxPrimitivesPolyAllocatedGlobal = gfxPrimitivesPolyAllocated; + } else { + *polyInts = gfxPrimitivesPolyInts; + *polyAllocated = gfxPrimitivesPolyAllocated; + } + + /* + * Check temp array again + */ + if (gfxPrimitivesPolyInts==NULL) { + return(false); + } + + /* + * Determine Y maxima + */ + miny = vy[0]; + maxy = vy[0]; + for (i = 1; (i < n); i++) { + if (vy[i] < miny) { + miny = vy[i]; + } else if (vy[i] > maxy) { + maxy = vy[i]; + } + } + + /* + * Draw, scanning y + */ + for (y = miny; (y <= maxy); y++) { + ints = 0; + for (i = 0; (i < n); i++) { + if (!i) { + ind1 = n - 1; + ind2 = 0; + } else { + ind1 = i - 1; + ind2 = i; + } + y1 = vy[ind1]; + y2 = vy[ind2]; + if (y1 < y2) { + x1 = vx[ind1]; + x2 = vx[ind2]; + } else if (y1 > y2) { + y2 = vy[ind1]; + y1 = vy[ind2]; + x2 = vx[ind1]; + x1 = vx[ind2]; + } else { + continue; + } + if ( ((y >= y1) && (y < y2)) || ((y == maxy) && (y > y1) && (y <= y2)) ) { + gfxPrimitivesPolyInts[ints++] = ((65536 * (y - y1)) / (y2 - y1)) * (x2 - x1) + (65536 * x1); + } + } + + qsort(gfxPrimitivesPolyInts, ints, sizeof(int), _gfxPrimitivesCompareInt); + + /* + * Set color + */ + result = true; + result &= SDL_SetRenderDrawBlendMode(renderer, (a == 255) ? SDL_BLENDMODE_NONE : SDL_BLENDMODE_BLEND); + result &= SDL_SetRenderDrawColor(renderer, r, g, b, a); + + for (i = 0; (i < ints); i += 2) { + xa = gfxPrimitivesPolyInts[i] + 1; + xa = (xa >> 16) + ((xa & 32768) >> 15); + xb = gfxPrimitivesPolyInts[i+1] - 1; + xb = (xb >> 16) + ((xb & 32768) >> 15); + result &= hline(renderer, xa, xb, y); + } + } + + return (result); +} + +/*! +\brief Draw filled polygon with alpha blending. + +\param renderer The renderer to draw on. +\param vx Vertex array containing X coordinates of the points of the filled polygon. +\param vy Vertex array containing Y coordinates of the points of the filled polygon. +\param n Number of points in the vertex array. Minimum number is 3. +\param color The color value of the filled polygon to draw (0xRRGGBBAA). + +\returns Returns true on success, false on failure. +*/ +bool filledPolygonColor(SDL_Renderer * renderer, const Sint16 * vx, const Sint16 * vy, int n, Uint32 color) +{ + Uint8 *c = (Uint8 *)&color; + return filledPolygonRGBAMT(renderer, vx, vy, n, c[0], c[1], c[2], c[3], NULL, NULL); +} + +/*! +\brief Draw filled polygon with alpha blending. + +\param renderer The renderer to draw on. +\param vx Vertex array containing X coordinates of the points of the filled polygon. +\param vy Vertex array containing Y coordinates of the points of the filled polygon. +\param n Number of points in the vertex array. Minimum number is 3. +\param r The red value of the filled polygon to draw. +\param g The green value of the filled polygon to draw. +\param b The blue value of the filed polygon to draw. +\param a The alpha value of the filled polygon to draw. + +\returns Returns true on success, false on failure. +*/ +bool filledPolygonRGBA(SDL_Renderer * renderer, const Sint16 * vx, const Sint16 * vy, int n, Uint8 r, Uint8 g, Uint8 b, Uint8 a) +{ + return filledPolygonRGBAMT(renderer, vx, vy, n, r, g, b, a, NULL, NULL); +} + +/* ---- Textured Polygon */ + +/*! +\brief Internal function to draw a textured horizontal line. + +\param renderer The renderer to draw on. +\param x1 X coordinate of the first point (i.e. left) of the line. +\param x2 X coordinate of the second point (i.e. right) of the line. +\param y Y coordinate of the points of the line. +\param texture The texture to retrieve color information from. +\param texture_w The width of the texture. +\param texture_h The height of the texture. +\param texture_dx The X offset for the texture lookup. +\param texture_dy The Y offset for the textured lookup. + +\returns Returns true on success, false on failure. +*/ +bool _HLineTextured(SDL_Renderer *renderer, Sint16 x1, Sint16 x2, Sint16 y, SDL_Texture *texture, int texture_w, int texture_h, int texture_dx, int texture_dy) +{ + Sint16 w; + Sint16 xtmp; + bool result = true; + int texture_x_walker; + int texture_y_start; + SDL_FRect source_rect,dst_rect; + int pixels_written,write_width; + + /* + * Swap x1, x2 if required to ensure x1<=x2 + */ + if (x1 > x2) { + xtmp = x1; + x1 = x2; + x2 = xtmp; + } + + /* + * Calculate width to draw + */ + w = x2 - x1 + 1; + + /* + * Determine where in the texture we start drawing + */ + texture_x_walker = (x1 - texture_dx) % texture_w; + if (texture_x_walker < 0){ + texture_x_walker = texture_w + texture_x_walker ; + } + + texture_y_start = (y + texture_dy) % texture_h; + if (texture_y_start < 0){ + texture_y_start = texture_h + texture_y_start; + } + + /* setup the source rectangle; we are only drawing one horizontal line */ + source_rect.y = texture_y_start; + source_rect.x = texture_x_walker; + source_rect.h = 1; + + /* we will draw to the current y */ + dst_rect.y = y; + dst_rect.h = 1; + + /* if there are enough pixels left in the current row of the texture */ + /* draw it all at once */ + if (w <= texture_w -texture_x_walker){ + source_rect.w = w; + source_rect.x = texture_x_walker; + dst_rect.x= x1; + dst_rect.w = source_rect.w; + result = (SDL_RenderTexture(renderer, texture, &source_rect, &dst_rect) == 0); + } else { + /* we need to draw multiple times */ + /* draw the first segment */ + pixels_written = texture_w - texture_x_walker; + source_rect.w = pixels_written; + source_rect.x = texture_x_walker; + dst_rect.x= x1; + dst_rect.w = source_rect.w; + result &= (SDL_RenderTexture(renderer, texture, &source_rect, &dst_rect) == 0); + write_width = texture_w; + + /* now draw the rest */ + /* set the source x to 0 */ + source_rect.x = 0; + while (pixels_written < w){ + if (write_width >= w - pixels_written) { + write_width = w - pixels_written; + } + source_rect.w = write_width; + dst_rect.x = x1 + pixels_written; + dst_rect.w = source_rect.w; + result &= (SDL_RenderTexture(renderer, texture, &source_rect, &dst_rect) == 0); + pixels_written += write_width; + } + } + + return result; +} + +/*! +\brief Draws a polygon filled with the given texture (Multi-Threading Capable). + +\param renderer The renderer to draw on. +\param vx array of x vector components +\param vy array of x vector components +\param n the amount of vectors in the vx and vy array +\param texture the sdl surface to use to fill the polygon +\param texture_dx the offset of the texture relative to the screeen. If you move the polygon 10 pixels +to the left and want the texture to apear the same you need to increase the texture_dx value +\param texture_dy see texture_dx +\param polyInts Preallocated temp array storage for vertex sorting (used for multi-threaded operation) +\param polyAllocated Flag indicating oif the temp array was allocated (used for multi-threaded operation) + +\returns Returns true on success, false on failure. +*/ +bool texturedPolygonMT(SDL_Renderer *renderer, const Sint16 * vx, const Sint16 * vy, int n, + SDL_Surface * texture, int texture_dx, int texture_dy, int **polyInts, int *polyAllocated) +{ + bool result; + int i; + int y, xa, xb; + int minx,maxx,miny, maxy; + int x1, y1; + int x2, y2; + int ind1, ind2; + int ints; + int *gfxPrimitivesPolyInts = NULL; + int *gfxPrimitivesPolyIntsTemp = NULL; + int gfxPrimitivesPolyAllocated = 0; + SDL_Texture *textureAsTexture = NULL; + + /* + * Sanity check number of edges + */ + if (n < 3) { + return false; + } + + /* + * Map polygon cache + */ + if ((polyInts==NULL) || (polyAllocated==NULL)) { + /* Use global cache */ + gfxPrimitivesPolyInts = gfxPrimitivesPolyIntsGlobal; + gfxPrimitivesPolyAllocated = gfxPrimitivesPolyAllocatedGlobal; + } else { + /* Use local cache */ + gfxPrimitivesPolyInts = *polyInts; + gfxPrimitivesPolyAllocated = *polyAllocated; + } + + /* + * Allocate temp array, only grow array + */ + if (!gfxPrimitivesPolyAllocated) { + gfxPrimitivesPolyInts = (int *) malloc(sizeof(int) * n); + gfxPrimitivesPolyAllocated = n; + } else { + if (gfxPrimitivesPolyAllocated < n) { + gfxPrimitivesPolyIntsTemp = (int *) realloc(gfxPrimitivesPolyInts, sizeof(int) * n); + if (gfxPrimitivesPolyIntsTemp == NULL) { + /* Realloc failed - keeps original memory block, but fails this operation */ + return(false); + } + gfxPrimitivesPolyInts = gfxPrimitivesPolyIntsTemp; + gfxPrimitivesPolyAllocated = n; + } + } + + /* + * Check temp array + */ + if (gfxPrimitivesPolyInts==NULL) { + gfxPrimitivesPolyAllocated = 0; + } + + /* + * Update cache variables + */ + if ((polyInts==NULL) || (polyAllocated==NULL)) { + gfxPrimitivesPolyIntsGlobal = gfxPrimitivesPolyInts; + gfxPrimitivesPolyAllocatedGlobal = gfxPrimitivesPolyAllocated; + } else { + *polyInts = gfxPrimitivesPolyInts; + *polyAllocated = gfxPrimitivesPolyAllocated; + } + + /* + * Check temp array again + */ + if (gfxPrimitivesPolyInts==NULL) { + return(false); + } + + /* + * Determine X,Y minima,maxima + */ + miny = vy[0]; + maxy = vy[0]; + minx = vx[0]; + maxx = vx[0]; + for (i = 1; (i < n); i++) { + if (vy[i] < miny) { + miny = vy[i]; + } else if (vy[i] > maxy) { + maxy = vy[i]; + } + if (vx[i] < minx) { + minx = vx[i]; + } else if (vx[i] > maxx) { + maxx = vx[i]; + } + } + + /* Create texture for drawing */ + textureAsTexture = SDL_CreateTextureFromSurface(renderer, texture); + if (textureAsTexture == NULL) + { + return false; + } + SDL_SetTextureBlendMode(textureAsTexture, SDL_BLENDMODE_BLEND); + + /* + * Draw, scanning y + */ + result = true; + for (y = miny; (y <= maxy); y++) { + ints = 0; + for (i = 0; (i < n); i++) { + if (!i) { + ind1 = n - 1; + ind2 = 0; + } else { + ind1 = i - 1; + ind2 = i; + } + y1 = vy[ind1]; + y2 = vy[ind2]; + if (y1 < y2) { + x1 = vx[ind1]; + x2 = vx[ind2]; + } else if (y1 > y2) { + y2 = vy[ind1]; + y1 = vy[ind2]; + x2 = vx[ind1]; + x1 = vx[ind2]; + } else { + continue; + } + if ( ((y >= y1) && (y < y2)) || ((y == maxy) && (y > y1) && (y <= y2)) ) { + gfxPrimitivesPolyInts[ints++] = ((65536 * (y - y1)) / (y2 - y1)) * (x2 - x1) + (65536 * x1); + } + } + + qsort(gfxPrimitivesPolyInts, ints, sizeof(int), _gfxPrimitivesCompareInt); + + for (i = 0; (i < ints); i += 2) { + xa = gfxPrimitivesPolyInts[i] + 1; + xa = (xa >> 16) + ((xa & 32768) >> 15); + xb = gfxPrimitivesPolyInts[i+1] - 1; + xb = (xb >> 16) + ((xb & 32768) >> 15); + result &= _HLineTextured(renderer, xa, xb, y, textureAsTexture, texture->w, texture->h, texture_dx, texture_dy); + } + } + + SDL_DestroyTexture(textureAsTexture); + + return (result); +} + +/*! +\brief Draws a polygon filled with the given texture. + +This standard version is calling multithreaded versions with NULL cache parameters. + +\param renderer The renderer to draw on. +\param vx array of x vector components +\param vy array of x vector components +\param n the amount of vectors in the vx and vy array +\param texture the sdl surface to use to fill the polygon +\param texture_dx the offset of the texture relative to the screeen. if you move the polygon 10 pixels +to the left and want the texture to apear the same you need to increase the texture_dx value +\param texture_dy see texture_dx + +\returns Returns true on success, false on failure. +*/ +bool texturedPolygon(SDL_Renderer *renderer, const Sint16 * vx, const Sint16 * vy, int n, SDL_Surface *texture, int texture_dx, int texture_dy) +{ + /* + * Draw + */ + return (texturedPolygonMT(renderer, vx, vy, n, texture, texture_dx, texture_dy, NULL, NULL)); +} + +/* ---- Character */ + +/*! +\brief Global cache for NxM pixel font textures created at runtime. +*/ +static SDL_Texture *gfxPrimitivesFont[256]; + +/*! +\brief Pointer to the current font data. Default is a 8x8 pixel internal font. +*/ +static const unsigned char *currentFontdata = gfxPrimitivesFontdata; + +/*! +\brief Width of the current font. Default is 8. +*/ +static Uint32 charWidth = 8; + +/*! +\brief Height of the current font. Default is 8. +*/ +static Uint32 charHeight = 8; + +/*! +\brief Width for rendering. Autocalculated. +*/ +static Uint32 charWidthLocal = 8; + +/*! +\brief Height for rendering. Autocalculated. +*/ +static Uint32 charHeightLocal = 8; + +/*! +\brief Pitch of the current font in bytes. Default is 1. +*/ +static Uint32 charPitch = 1; + +/*! +\brief Characters 90deg clockwise rotations. Default is 0. Max is 3. +*/ +static Uint32 charRotation = 0; + +/*! +\brief Character data size in bytes of the current font. Default is 8. +*/ +static Uint32 charSize = 8; + +/*! +\brief Sets or resets the current global font data. + +The font data array is organized in follows: +[fontdata] = [character 0][character 1]...[character 255] where +[character n] = [byte 1 row 1][byte 2 row 1]...[byte {pitch} row 1][byte 1 row 2] ...[byte {pitch} row height] where +[byte n] = [bit 0]...[bit 7] where +[bit n] = [0 for transparent pixel|1 for colored pixel] + +\param fontdata Pointer to array of font data. Set to NULL, to reset global font to the default 8x8 font. +\param cw Width of character in bytes. Ignored if fontdata==NULL. +\param ch Height of character in bytes. Ignored if fontdata==NULL. +*/ +void gfxPrimitivesSetFont(const void *fontdata, Uint32 cw, Uint32 ch) +{ + int i; + + if ((fontdata) && (cw) && (ch)) { + currentFontdata = (unsigned char *)fontdata; + charWidth = cw; + charHeight = ch; + } else { + currentFontdata = gfxPrimitivesFontdata; + charWidth = 8; + charHeight = 8; + } + + charPitch = (charWidth+7)/8; + charSize = charPitch * charHeight; + + /* Maybe flip width/height for rendering */ + if ((charRotation==1) || (charRotation==3)) + { + charWidthLocal = charHeight; + charHeightLocal = charWidth; + } + else + { + charWidthLocal = charWidth; + charHeightLocal = charHeight; + } + + /* Clear character cache */ + for (i = 0; i < 256; i++) { + if (gfxPrimitivesFont[i]) { + SDL_DestroyTexture(gfxPrimitivesFont[i]); + gfxPrimitivesFont[i] = NULL; + } + } +} + +/*! +\brief Sets current global font character rotation steps. + +Default is 0 (no rotation). 1 = 90deg clockwise. 2 = 180deg clockwise. 3 = 270deg clockwise. +Changing the rotation, will reset the character cache. + +\param rotation Number of 90deg clockwise steps to rotate +*/ +void gfxPrimitivesSetFontRotation(Uint32 rotation) +{ + int i; + + rotation = rotation & 3; + if (charRotation != rotation) + { + /* Store rotation */ + charRotation = rotation; + + /* Maybe flip width/height for rendering */ + if ((charRotation==1) || (charRotation==3)) + { + charWidthLocal = charHeight; + charHeightLocal = charWidth; + } + else + { + charWidthLocal = charWidth; + charHeightLocal = charHeight; + } + + /* Clear character cache */ + for (i = 0; i < 256; i++) { + if (gfxPrimitivesFont[i]) { + SDL_DestroyTexture(gfxPrimitivesFont[i]); + gfxPrimitivesFont[i] = NULL; + } + } + } +} + +/*! +\brief Draw a character of the currently set font. + +\param renderer The Renderer to draw on. +\param x X (horizontal) coordinate of the upper left corner of the character. +\param y Y (vertical) coordinate of the upper left corner of the character. +\param c The character to draw. +\param r The red value of the character to draw. +\param g The green value of the character to draw. +\param b The blue value of the character to draw. +\param a The alpha value of the character to draw. + +\returns Returns true on success, false on failure. +*/ +bool characterRGBA(SDL_Renderer *renderer, Sint16 x, Sint16 y, char c, Uint8 r, Uint8 g, Uint8 b, Uint8 a) +{ + SDL_FRect srect; + SDL_FRect drect; + bool result; + Uint32 ix, iy; + const unsigned char *charpos; + Uint8 *curpos; + Uint8 patt, mask; + Uint8 *linepos; + Uint32 pitch; + SDL_Surface *character; + SDL_Surface *rotatedCharacter; + Uint32 ci; + + /* + * Setup source rectangle + */ + srect.x = 0; + srect.y = 0; + srect.w = charWidthLocal; + srect.h = charHeightLocal; + + /* + * Setup destination rectangle + */ + drect.x = x; + drect.y = y; + drect.w = charWidthLocal; + drect.h = charHeightLocal; + + /* Character index in cache */ + ci = (unsigned char) c; + + /* + * Create new charWidth x charHeight bitmap surface if not already present. + * Might get rotated later. + */ + if (gfxPrimitivesFont[ci] == NULL) { + /* + * Redraw character into surface + */ + character = SDL_CreateSurface( + charWidth, charHeight, SDL_PIXELFORMAT_RGBA8888); + if (character == NULL) { + return (false); + } + + charpos = currentFontdata + ci * charSize; + linepos = (Uint8 *)character->pixels; + pitch = character->pitch; + + /* + * Drawing loop + */ + patt = 0; + for (iy = 0; iy < charHeight; iy++) { + mask = 0x00; + curpos = linepos; + for (ix = 0; ix < charWidth; ix++) { + if (!(mask >>= 1)) { + patt = *charpos++; + mask = 0x80; + } + if (patt & mask) { + *(Uint32 *)curpos = 0xffffffff; + } else { + *(Uint32 *)curpos = 0; + } + curpos += 4; + } + linepos += pitch; + } + + /* Maybe rotate and replace cached image */ + if (charRotation>0) + { + rotatedCharacter = rotateSurface90Degrees(character, charRotation); + SDL_DestroySurface(character); + character = rotatedCharacter; + } + + /* Convert temp surface into texture */ + gfxPrimitivesFont[ci] = SDL_CreateTextureFromSurface(renderer, character); + SDL_DestroySurface(character); + + /* + * Check pointer + */ + if (gfxPrimitivesFont[ci] == NULL) { + return (false); + } + } + + /* + * Set color + */ + result = true; + result &= SDL_SetTextureColorMod(gfxPrimitivesFont[ci], r, g, b); + result &= SDL_SetTextureAlphaMod(gfxPrimitivesFont[ci], a); + + /* + * Draw texture onto destination + */ + result &= SDL_RenderTexture(renderer, gfxPrimitivesFont[ci], &srect, &drect); + + return (result); +} + + +/*! +\brief Draw a character of the currently set font. + +\param renderer The renderer to draw on. +\param x X (horizontal) coordinate of the upper left corner of the character. +\param y Y (vertical) coordinate of the upper left corner of the character. +\param c The character to draw. +\param color The color value of the character to draw (0xRRGGBBAA). + +\returns Returns true on success, false on failure. +*/ +bool characterColor(SDL_Renderer * renderer, Sint16 x, Sint16 y, char c, Uint32 color) +{ + Uint8 *co = (Uint8 *)&color; + return characterRGBA(renderer, x, y, c, co[0], co[1], co[2], co[3]); +} + + +/*! +\brief Draw a string in the currently set font. + +The spacing between consequtive characters in the string is the fixed number of pixels +of the character width of the current global font. + +\param renderer The renderer to draw on. +\param x X (horizontal) coordinate of the upper left corner of the string. +\param y Y (vertical) coordinate of the upper left corner of the string. +\param s The string to draw. +\param color The color value of the string to draw (0xRRGGBBAA). + +\returns Returns true on success, false on failure. +*/ +bool stringColor(SDL_Renderer * renderer, Sint16 x, Sint16 y, const char *s, Uint32 color) +{ + Uint8 *c = (Uint8 *)&color; + return stringRGBA(renderer, x, y, s, c[0], c[1], c[2], c[3]); +} + +/*! +\brief Draw a string in the currently set font. + +\param renderer The renderer to draw on. +\param x X (horizontal) coordinate of the upper left corner of the string. +\param y Y (vertical) coordinate of the upper left corner of the string. +\param s The string to draw. +\param r The red value of the string to draw. +\param g The green value of the string to draw. +\param b The blue value of the string to draw. +\param a The alpha value of the string to draw. + +\returns Returns true on success, false on failure. +*/ +bool stringRGBA(SDL_Renderer * renderer, Sint16 x, Sint16 y, const char *s, Uint8 r, Uint8 g, Uint8 b, Uint8 a) +{ + bool result = true; + Sint16 curx = x; + Sint16 cury = y; + const char *curchar = s; + + while (*curchar && result) { + result &= characterRGBA(renderer, curx, cury, *curchar, r, g, b, a); + switch (charRotation) + { + case 0: + curx += charWidthLocal; + break; + case 2: + curx -= charWidthLocal; + break; + case 1: + cury += charHeightLocal; + break; + case 3: + cury -= charHeightLocal; + break; + } + curchar++; + } + + return (result); +} + +/* ---- Bezier curve */ + +/*! +\brief Internal function to calculate bezier interpolator of data array with ndata values at position 't'. + +\param data Array of values. +\param ndata Size of array. +\param t Position for which to calculate interpolated value. t should be between [0, ndata]. + +\returns Interpolated value at position t, value[0] when t<0, value[n-1] when t>n. +*/ +double _evaluateBezier (double *data, int ndata, double t) +{ + double mu, result; + int n,k,kn,nn,nkn; + double blend,muk,munk; + + /* Sanity check bounds */ + if (t<0.0) { + return(data[0]); + } + if (t>=(double)ndata) { + return(data[ndata-1]); + } + + /* Adjust t to the range 0.0 to 1.0 */ + mu=t/(double)ndata; + + /* Calculate interpolate */ + n=ndata-1; + result=0.0; + muk = 1; + munk = pow(1-mu,(double)n); + for (k=0;k<=n;k++) { + nn = n; + kn = k; + nkn = n - k; + blend = muk * munk; + muk *= mu; + munk /= (1-mu); + while (nn >= 1) { + blend *= nn; + nn--; + if (kn > 1) { + blend /= (double)kn; + kn--; + } + if (nkn > 1) { + blend /= (double)nkn; + nkn--; + } + } + result += data[k] * blend; + } + + return (result); +} + +/*! +\brief Draw a bezier curve with alpha blending. + +\param renderer The renderer to draw on. +\param vx Vertex array containing X coordinates of the points of the bezier curve. +\param vy Vertex array containing Y coordinates of the points of the bezier curve. +\param n Number of points in the vertex array. Minimum number is 3. +\param s Number of steps for the interpolation. Minimum number is 2. +\param color The color value of the bezier curve to draw (0xRRGGBBAA). + +\returns Returns true on success, false on failure. +*/ +bool bezierColor(SDL_Renderer * renderer, const Sint16 * vx, const Sint16 * vy, int n, int s, Uint32 color) +{ + Uint8 *c = (Uint8 *)&color; + return bezierRGBA(renderer, vx, vy, n, s, c[0], c[1], c[2], c[3]); +} + +/*! +\brief Draw a bezier curve with alpha blending. + +\param renderer The renderer to draw on. +\param vx Vertex array containing X coordinates of the points of the bezier curve. +\param vy Vertex array containing Y coordinates of the points of the bezier curve. +\param n Number of points in the vertex array. Minimum number is 3. +\param s Number of steps for the interpolation. Minimum number is 2. +\param r The red value of the bezier curve to draw. +\param g The green value of the bezier curve to draw. +\param b The blue value of the bezier curve to draw. +\param a The alpha value of the bezier curve to draw. + +\returns Returns true on success, false on failure. +*/ +bool bezierRGBA(SDL_Renderer * renderer, const Sint16 * vx, const Sint16 * vy, int n, int s, Uint8 r, Uint8 g, Uint8 b, Uint8 a) +{ + bool result; + int i; + double *x, *y, t, stepsize; + Sint16 x1, y1, x2, y2; + + /* + * Sanity check + */ + if (n < 3) { + return (false); + } + if (s < 2) { + return (false); + } + + /* + * Variable setup + */ + stepsize=(double)1.0/(double)s; + + /* Transfer vertices into float arrays */ + if ((x=(double *)malloc(sizeof(double)*(n+1)))==NULL) { + return(false); + } + if ((y=(double *)malloc(sizeof(double)*(n+1)))==NULL) { + free(x); + return(false); + } + for (i=0; i0. +\param color The color value of the line to draw (0xRRGGBBAA). + +\returns Returns true on success, false on failure. +*/ +bool thickLineColor(SDL_Renderer *renderer, Sint16 x1, Sint16 y1, Sint16 x2, Sint16 y2, Uint8 width, Uint32 color) +{ + Uint8 *c = (Uint8 *)&color; + return thickLineRGBA(renderer, x1, y1, x2, y2, width, c[0], c[1], c[2], c[3]); +} + +/*! +\brief Draw a thick line with alpha blending. + +\param renderer The renderer to draw on. +\param x1 X coordinate of the first point of the line. +\param y1 Y coordinate of the first point of the line. +\param x2 X coordinate of the second point of the line. +\param y2 Y coordinate of the second point of the line. +\param width Width of the line in pixels. Must be >0. +\param r The red value of the character to draw. +\param g The green value of the character to draw. +\param b The blue value of the character to draw. +\param a The alpha value of the character to draw. + +\returns Returns true on success, false on failure. +*/ +bool thickLineRGBA(SDL_Renderer *renderer, Sint16 x1, Sint16 y1, Sint16 x2, Sint16 y2, Uint8 width, Uint8 r, Uint8 g, Uint8 b, Uint8 a) +{ + int wh; + double dx, dy, dx1, dy1, dx2, dy2; + double l, wl2, nx, ny, ang, adj; + Sint16 px[4], py[4]; + + if (renderer == NULL) { + return false; + } + + if (width < 1) { + return false; + } + + /* Special case: thick "point" */ + if ((x1 == x2) && (y1 == y2)) { + wh = width / 2; + return boxRGBA(renderer, x1 - wh, y1 - wh, x2 + width, y2 + width, r, g, b, a); + } + + /* Special case: width == 1 */ + if (width == 1) { + return lineRGBA(renderer, x1, y1, x2, y2, r, g, b, a); + } + + /* Calculate offsets for sides */ + dx = (double)(x2 - x1); + dy = (double)(y2 - y1); + l = SDL_sqrt(dx*dx + dy*dy); + ang = SDL_atan2(dx, dy); + adj = 0.1 + 0.9 * SDL_fabs(SDL_cos(2.0 * ang)); + wl2 = ((double)width - adj)/(2.0 * l); + nx = dx * wl2; + ny = dy * wl2; + + /* Build polygon */ + dx1 = (double)x1; + dy1 = (double)y1; + dx2 = (double)x2; + dy2 = (double)y2; + px[0] = (Sint16)(dx1 + ny); + px[1] = (Sint16)(dx1 - ny); + px[2] = (Sint16)(dx2 - ny); + px[3] = (Sint16)(dx2 + ny); + py[0] = (Sint16)(dy1 - nx); + py[1] = (Sint16)(dy1 + nx); + py[2] = (Sint16)(dy2 + nx); + py[3] = (Sint16)(dy2 - nx); + + /* Draw polygon */ + return filledPolygonRGBA(renderer, px, py, 4, r, g, b, a); +} diff --git a/vendor/SDL3_gfx/SDL3_gfxPrimitives.h b/vendor/SDL3_gfx/SDL3_gfxPrimitives.h new file mode 100644 index 0000000..2d13b62 --- /dev/null +++ b/vendor/SDL3_gfx/SDL3_gfxPrimitives.h @@ -0,0 +1,241 @@ +/* + +SDL3_gfxPrimitives.h: graphics primitives for SDL + +Copyright (C) 2012-2014 Andreas Schiffler + +This software is provided 'as-is', without any express or implied +warranty. In no event will the authors be held liable for any damages +arising from the use of this software. + +Permission is granted to anyone to use this software for any purpose, +including commercial applications, and to alter it and redistribute it +freely, subject to the following restrictions: + +1. The origin of this software must not be misrepresented; you must not +claim that you wrote the original software. If you use this software +in a product, an acknowledgment in the product documentation would be +appreciated but is not required. + +2. Altered source versions must be plainly marked as such, and must not be +misrepresented as being the original software. + +3. This notice may not be removed or altered from any source +distribution. + +Andreas Schiffler -- aschiffler at ferzkopp dot net + +*/ + +#ifndef _SDL3_gfxPrimitives_h +#define _SDL3_gfxPrimitives_h + +#include +#ifndef M_PI +#define M_PI 3.1415926535897932384626433832795 +#endif + +#include + +/* Set up for C function definitions, even when using C++ */ +#ifdef __cplusplus +extern "C" { +#endif + + /* ----- Versioning */ + +#define SDL3_GFXPRIMITIVES_MAJOR 1 +#define SDL3_GFXPRIMITIVES_MINOR 0 +#define SDL3_GFXPRIMITIVES_MICRO 0 + + + /* ---- Function Prototypes */ + +#ifdef _MSC_VER +# if defined(DLL_EXPORT) && !defined(LIBSDL3_GFX_DLL_IMPORT) +# define SDL3_GFXPRIMITIVES_SCOPE __declspec(dllexport) +# else +# ifdef LIBSDL3_GFX_DLL_IMPORT +# define SDL3_GFXPRIMITIVES_SCOPE __declspec(dllimport) +# endif +# endif +#endif +#ifndef SDL3_GFXPRIMITIVES_SCOPE +# define SDL3_GFXPRIMITIVES_SCOPE extern +#endif + + /* Note: all ___Color routines expect the color to be in format 0xRRGGBBAA */ + + /* Pixel */ + + SDL3_GFXPRIMITIVES_SCOPE bool pixelColor(SDL_Renderer * renderer, Sint16 x, Sint16 y, Uint32 color); + SDL3_GFXPRIMITIVES_SCOPE bool pixelRGBA(SDL_Renderer * renderer, Sint16 x, Sint16 y, Uint8 r, Uint8 g, Uint8 b, Uint8 a); + + /* Horizontal line */ + + SDL3_GFXPRIMITIVES_SCOPE bool hlineColor(SDL_Renderer * renderer, Sint16 x1, Sint16 x2, Sint16 y, Uint32 color); + SDL3_GFXPRIMITIVES_SCOPE bool hlineRGBA(SDL_Renderer * renderer, Sint16 x1, Sint16 x2, Sint16 y, Uint8 r, Uint8 g, Uint8 b, Uint8 a); + + /* Vertical line */ + + SDL3_GFXPRIMITIVES_SCOPE bool vlineColor(SDL_Renderer * renderer, Sint16 x, Sint16 y1, Sint16 y2, Uint32 color); + SDL3_GFXPRIMITIVES_SCOPE bool vlineRGBA(SDL_Renderer * renderer, Sint16 x, Sint16 y1, Sint16 y2, Uint8 r, Uint8 g, Uint8 b, Uint8 a); + + /* Rectangle */ + + SDL3_GFXPRIMITIVES_SCOPE bool rectangleColor(SDL_Renderer * renderer, Sint16 x1, Sint16 y1, Sint16 x2, Sint16 y2, Uint32 color); + SDL3_GFXPRIMITIVES_SCOPE bool rectangleRGBA(SDL_Renderer * renderer, Sint16 x1, Sint16 y1, + Sint16 x2, Sint16 y2, Uint8 r, Uint8 g, Uint8 b, Uint8 a); + + /* Rounded-Corner Rectangle */ + + SDL3_GFXPRIMITIVES_SCOPE bool roundedRectangleColor(SDL_Renderer * renderer, Sint16 x1, Sint16 y1, Sint16 x2, Sint16 y2, Sint16 rad, Uint32 color); + SDL3_GFXPRIMITIVES_SCOPE bool roundedRectangleRGBA(SDL_Renderer * renderer, Sint16 x1, Sint16 y1, + Sint16 x2, Sint16 y2, Sint16 rad, Uint8 r, Uint8 g, Uint8 b, Uint8 a); + + /* Filled rectangle (Box) */ + + SDL3_GFXPRIMITIVES_SCOPE bool boxColor(SDL_Renderer * renderer, Sint16 x1, Sint16 y1, Sint16 x2, Sint16 y2, Uint32 color); + SDL3_GFXPRIMITIVES_SCOPE bool boxRGBA(SDL_Renderer * renderer, Sint16 x1, Sint16 y1, Sint16 x2, + Sint16 y2, Uint8 r, Uint8 g, Uint8 b, Uint8 a); + + /* Rounded-Corner Filled rectangle (Box) */ + + SDL3_GFXPRIMITIVES_SCOPE bool roundedBoxColor(SDL_Renderer * renderer, Sint16 x1, Sint16 y1, Sint16 x2, Sint16 y2, Sint16 rad, Uint32 color); + SDL3_GFXPRIMITIVES_SCOPE bool roundedBoxRGBA(SDL_Renderer * renderer, Sint16 x1, Sint16 y1, Sint16 x2, + Sint16 y2, Sint16 rad, Uint8 r, Uint8 g, Uint8 b, Uint8 a); + + /* Line */ + + SDL3_GFXPRIMITIVES_SCOPE bool lineColor(SDL_Renderer * renderer, Sint16 x1, Sint16 y1, Sint16 x2, Sint16 y2, Uint32 color); + SDL3_GFXPRIMITIVES_SCOPE bool lineRGBA(SDL_Renderer * renderer, Sint16 x1, Sint16 y1, + Sint16 x2, Sint16 y2, Uint8 r, Uint8 g, Uint8 b, Uint8 a); + + /* AA Line */ + + SDL3_GFXPRIMITIVES_SCOPE bool aalineColor(SDL_Renderer * renderer, Sint16 x1, Sint16 y1, Sint16 x2, Sint16 y2, Uint32 color); + SDL3_GFXPRIMITIVES_SCOPE bool aalineRGBA(SDL_Renderer * renderer, Sint16 x1, Sint16 y1, + Sint16 x2, Sint16 y2, Uint8 r, Uint8 g, Uint8 b, Uint8 a); + + /* Thick Line */ + SDL3_GFXPRIMITIVES_SCOPE bool thickLineColor(SDL_Renderer * renderer, Sint16 x1, Sint16 y1, Sint16 x2, Sint16 y2, + Uint8 width, Uint32 color); + SDL3_GFXPRIMITIVES_SCOPE bool thickLineRGBA(SDL_Renderer * renderer, Sint16 x1, Sint16 y1, Sint16 x2, Sint16 y2, + Uint8 width, Uint8 r, Uint8 g, Uint8 b, Uint8 a); + + /* Circle */ + + SDL3_GFXPRIMITIVES_SCOPE bool circleColor(SDL_Renderer * renderer, Sint16 x, Sint16 y, Sint16 rad, Uint32 color); + SDL3_GFXPRIMITIVES_SCOPE bool circleRGBA(SDL_Renderer * renderer, Sint16 x, Sint16 y, Sint16 rad, Uint8 r, Uint8 g, Uint8 b, Uint8 a); + + /* Arc */ + + SDL3_GFXPRIMITIVES_SCOPE bool arcColor(SDL_Renderer * renderer, Sint16 x, Sint16 y, Sint16 rad, Sint16 start, Sint16 end, Uint32 color); + SDL3_GFXPRIMITIVES_SCOPE bool arcRGBA(SDL_Renderer * renderer, Sint16 x, Sint16 y, Sint16 rad, Sint16 start, Sint16 end, + Uint8 r, Uint8 g, Uint8 b, Uint8 a); + + /* AA Circle */ + + SDL3_GFXPRIMITIVES_SCOPE bool aacircleColor(SDL_Renderer * renderer, Sint16 x, Sint16 y, Sint16 rad, Uint32 color); + SDL3_GFXPRIMITIVES_SCOPE bool aacircleRGBA(SDL_Renderer * renderer, Sint16 x, Sint16 y, + Sint16 rad, Uint8 r, Uint8 g, Uint8 b, Uint8 a); + + /* Filled Circle */ + + SDL3_GFXPRIMITIVES_SCOPE bool filledCircleColor(SDL_Renderer * renderer, Sint16 x, Sint16 y, Sint16 r, Uint32 color); + SDL3_GFXPRIMITIVES_SCOPE bool filledCircleRGBA(SDL_Renderer * renderer, Sint16 x, Sint16 y, + Sint16 rad, Uint8 r, Uint8 g, Uint8 b, Uint8 a); + + /* Ellipse */ + + SDL3_GFXPRIMITIVES_SCOPE bool ellipseColor(SDL_Renderer * renderer, Sint16 x, Sint16 y, Sint16 rx, Sint16 ry, Uint32 color); + SDL3_GFXPRIMITIVES_SCOPE bool ellipseRGBA(SDL_Renderer * renderer, Sint16 x, Sint16 y, + Sint16 rx, Sint16 ry, Uint8 r, Uint8 g, Uint8 b, Uint8 a); + + /* AA Ellipse */ + + SDL3_GFXPRIMITIVES_SCOPE bool aaellipseColor(SDL_Renderer * renderer, Sint16 x, Sint16 y, Sint16 rx, Sint16 ry, Uint32 color); + SDL3_GFXPRIMITIVES_SCOPE bool aaellipseRGBA(SDL_Renderer * renderer, Sint16 x, Sint16 y, + Sint16 rx, Sint16 ry, Uint8 r, Uint8 g, Uint8 b, Uint8 a); + + /* Filled Ellipse */ + + SDL3_GFXPRIMITIVES_SCOPE bool filledEllipseColor(SDL_Renderer * renderer, Sint16 x, Sint16 y, Sint16 rx, Sint16 ry, Uint32 color); + SDL3_GFXPRIMITIVES_SCOPE bool filledEllipseRGBA(SDL_Renderer * renderer, Sint16 x, Sint16 y, + Sint16 rx, Sint16 ry, Uint8 r, Uint8 g, Uint8 b, Uint8 a); + + /* Pie */ + + SDL3_GFXPRIMITIVES_SCOPE bool pieColor(SDL_Renderer * renderer, Sint16 x, Sint16 y, Sint16 rad, + Sint16 start, Sint16 end, Uint32 color); + SDL3_GFXPRIMITIVES_SCOPE bool pieRGBA(SDL_Renderer * renderer, Sint16 x, Sint16 y, Sint16 rad, + Sint16 start, Sint16 end, Uint8 r, Uint8 g, Uint8 b, Uint8 a); + + /* Filled Pie */ + + SDL3_GFXPRIMITIVES_SCOPE bool filledPieColor(SDL_Renderer * renderer, Sint16 x, Sint16 y, Sint16 rad, + Sint16 start, Sint16 end, Uint32 color); + SDL3_GFXPRIMITIVES_SCOPE bool filledPieRGBA(SDL_Renderer * renderer, Sint16 x, Sint16 y, Sint16 rad, + Sint16 start, Sint16 end, Uint8 r, Uint8 g, Uint8 b, Uint8 a); + + /* Trigon */ + + SDL3_GFXPRIMITIVES_SCOPE bool trigonColor(SDL_Renderer * renderer, Sint16 x1, Sint16 y1, Sint16 x2, Sint16 y2, Sint16 x3, Sint16 y3, Uint32 color); + SDL3_GFXPRIMITIVES_SCOPE bool trigonRGBA(SDL_Renderer * renderer, Sint16 x1, Sint16 y1, Sint16 x2, Sint16 y2, Sint16 x3, Sint16 y3, + Uint8 r, Uint8 g, Uint8 b, Uint8 a); + + /* AA-Trigon */ + + SDL3_GFXPRIMITIVES_SCOPE bool aatrigonColor(SDL_Renderer * renderer, Sint16 x1, Sint16 y1, Sint16 x2, Sint16 y2, Sint16 x3, Sint16 y3, Uint32 color); + SDL3_GFXPRIMITIVES_SCOPE bool aatrigonRGBA(SDL_Renderer * renderer, Sint16 x1, Sint16 y1, Sint16 x2, Sint16 y2, Sint16 x3, Sint16 y3, + Uint8 r, Uint8 g, Uint8 b, Uint8 a); + + /* Filled Trigon */ + + SDL3_GFXPRIMITIVES_SCOPE bool filledTrigonColor(SDL_Renderer * renderer, Sint16 x1, Sint16 y1, Sint16 x2, Sint16 y2, Sint16 x3, Sint16 y3, Uint32 color); + SDL3_GFXPRIMITIVES_SCOPE bool filledTrigonRGBA(SDL_Renderer * renderer, Sint16 x1, Sint16 y1, Sint16 x2, Sint16 y2, Sint16 x3, Sint16 y3, + Uint8 r, Uint8 g, Uint8 b, Uint8 a); + + /* Polygon */ + + SDL3_GFXPRIMITIVES_SCOPE bool polygonColor(SDL_Renderer * renderer, const Sint16 * vx, const Sint16 * vy, int n, Uint32 color); + SDL3_GFXPRIMITIVES_SCOPE bool polygonRGBA(SDL_Renderer * renderer, const Sint16 * vx, const Sint16 * vy, + int n, Uint8 r, Uint8 g, Uint8 b, Uint8 a); + + /* AA-Polygon */ + + SDL3_GFXPRIMITIVES_SCOPE bool aapolygonColor(SDL_Renderer * renderer, const Sint16 * vx, const Sint16 * vy, int n, Uint32 color); + SDL3_GFXPRIMITIVES_SCOPE bool aapolygonRGBA(SDL_Renderer * renderer, const Sint16 * vx, const Sint16 * vy, + int n, Uint8 r, Uint8 g, Uint8 b, Uint8 a); + + /* Filled Polygon */ + + SDL3_GFXPRIMITIVES_SCOPE bool filledPolygonColor(SDL_Renderer * renderer, const Sint16 * vx, const Sint16 * vy, int n, Uint32 color); + SDL3_GFXPRIMITIVES_SCOPE bool filledPolygonRGBA(SDL_Renderer * renderer, const Sint16 * vx, + const Sint16 * vy, int n, Uint8 r, Uint8 g, Uint8 b, Uint8 a); + + /* Textured Polygon */ + + SDL3_GFXPRIMITIVES_SCOPE bool texturedPolygon(SDL_Renderer * renderer, const Sint16 * vx, const Sint16 * vy, int n, SDL_Surface * texture,int texture_dx,int texture_dy); + + /* Bezier */ + + SDL3_GFXPRIMITIVES_SCOPE bool bezierColor(SDL_Renderer * renderer, const Sint16 * vx, const Sint16 * vy, int n, int s, Uint32 color); + SDL3_GFXPRIMITIVES_SCOPE bool bezierRGBA(SDL_Renderer * renderer, const Sint16 * vx, const Sint16 * vy, + int n, int s, Uint8 r, Uint8 g, Uint8 b, Uint8 a); + + /* Characters/Strings */ + + SDL3_GFXPRIMITIVES_SCOPE void gfxPrimitivesSetFont(const void *fontdata, Uint32 cw, Uint32 ch); + SDL3_GFXPRIMITIVES_SCOPE void gfxPrimitivesSetFontRotation(Uint32 rotation); + SDL3_GFXPRIMITIVES_SCOPE bool characterColor(SDL_Renderer * renderer, Sint16 x, Sint16 y, char c, Uint32 color); + SDL3_GFXPRIMITIVES_SCOPE bool characterRGBA(SDL_Renderer * renderer, Sint16 x, Sint16 y, char c, Uint8 r, Uint8 g, Uint8 b, Uint8 a); + SDL3_GFXPRIMITIVES_SCOPE bool stringColor(SDL_Renderer * renderer, Sint16 x, Sint16 y, const char *s, Uint32 color); + SDL3_GFXPRIMITIVES_SCOPE bool stringRGBA(SDL_Renderer * renderer, Sint16 x, Sint16 y, const char *s, Uint8 r, Uint8 g, Uint8 b, Uint8 a); + + /* Ends C function definitions when using C++ */ +#ifdef __cplusplus +} +#endif + +#endif /* _SDL3_gfxPrimitives_h */ diff --git a/vendor/SDL3_gfx/SDL3_gfxPrimitives_font.h b/vendor/SDL3_gfx/SDL3_gfxPrimitives_font.h new file mode 100644 index 0000000..41cb552 --- /dev/null +++ b/vendor/SDL3_gfx/SDL3_gfxPrimitives_font.h @@ -0,0 +1,3106 @@ +/* + +SDL3_gfxPrimitives_font.h: 8x8 font definition + +Copyright (C) 2012-2014 Andreas Schiffler + +This software is provided 'as-is', without any express or implied +warranty. In no event will the authors be held liable for any damages +arising from the use of this software. + +Permission is granted to anyone to use this software for any purpose, +including commercial applications, and to alter it and redistribute it +freely, subject to the following restrictions: + +1. The origin of this software must not be misrepresented; you must not +claim that you wrote the original software. If you use this software +in a product, an acknowledgment in the product documentation would be +appreciated but is not required. + +2. Altered source versions must be plainly marked as such, and must not be +misrepresented as being the original software. + +3. This notice may not be removed or altered from any source +distribution. + +Andreas Schiffler -- aschiffler at ferzkopp dot net + +*/ + +#define GFX_FONTDATAMAX (8*256) + +static unsigned char gfxPrimitivesFontdata[GFX_FONTDATAMAX] = { + + /* + * 0 0x00 '^@' + */ + 0x00, /* 00000000 */ + 0x00, /* 00000000 */ + 0x00, /* 00000000 */ + 0x00, /* 00000000 */ + 0x00, /* 00000000 */ + 0x00, /* 00000000 */ + 0x00, /* 00000000 */ + 0x00, /* 00000000 */ + + /* + * 1 0x01 '^A' + */ + 0x7e, /* 01111110 */ + 0x81, /* 10000001 */ + 0xa5, /* 10100101 */ + 0x81, /* 10000001 */ + 0xbd, /* 10111101 */ + 0x99, /* 10011001 */ + 0x81, /* 10000001 */ + 0x7e, /* 01111110 */ + + /* + * 2 0x02 '^B' + */ + 0x7e, /* 01111110 */ + 0xff, /* 11111111 */ + 0xdb, /* 11011011 */ + 0xff, /* 11111111 */ + 0xc3, /* 11000011 */ + 0xe7, /* 11100111 */ + 0xff, /* 11111111 */ + 0x7e, /* 01111110 */ + + /* + * 3 0x03 '^C' + */ + 0x6c, /* 01101100 */ + 0xfe, /* 11111110 */ + 0xfe, /* 11111110 */ + 0xfe, /* 11111110 */ + 0x7c, /* 01111100 */ + 0x38, /* 00111000 */ + 0x10, /* 00010000 */ + 0x00, /* 00000000 */ + + /* + * 4 0x04 '^D' + */ + 0x10, /* 00010000 */ + 0x38, /* 00111000 */ + 0x7c, /* 01111100 */ + 0xfe, /* 11111110 */ + 0x7c, /* 01111100 */ + 0x38, /* 00111000 */ + 0x10, /* 00010000 */ + 0x00, /* 00000000 */ + + /* + * 5 0x05 '^E' + */ + 0x38, /* 00111000 */ + 0x7c, /* 01111100 */ + 0x38, /* 00111000 */ + 0xfe, /* 11111110 */ + 0xfe, /* 11111110 */ + 0xd6, /* 11010110 */ + 0x10, /* 00010000 */ + 0x38, /* 00111000 */ + + /* + * 6 0x06 '^F' + */ + 0x10, /* 00010000 */ + 0x38, /* 00111000 */ + 0x7c, /* 01111100 */ + 0xfe, /* 11111110 */ + 0xfe, /* 11111110 */ + 0x7c, /* 01111100 */ + 0x10, /* 00010000 */ + 0x38, /* 00111000 */ + + /* + * 7 0x07 '^G' + */ + 0x00, /* 00000000 */ + 0x00, /* 00000000 */ + 0x18, /* 00011000 */ + 0x3c, /* 00111100 */ + 0x3c, /* 00111100 */ + 0x18, /* 00011000 */ + 0x00, /* 00000000 */ + 0x00, /* 00000000 */ + + /* + * 8 0x08 '^H' + */ + 0xff, /* 11111111 */ + 0xff, /* 11111111 */ + 0xe7, /* 11100111 */ + 0xc3, /* 11000011 */ + 0xc3, /* 11000011 */ + 0xe7, /* 11100111 */ + 0xff, /* 11111111 */ + 0xff, /* 11111111 */ + + /* + * 9 0x09 '^I' + */ + 0x00, /* 00000000 */ + 0x3c, /* 00111100 */ + 0x66, /* 01100110 */ + 0x42, /* 01000010 */ + 0x42, /* 01000010 */ + 0x66, /* 01100110 */ + 0x3c, /* 00111100 */ + 0x00, /* 00000000 */ + + /* + * 10 0x0a '^J' + */ + 0xff, /* 11111111 */ + 0xc3, /* 11000011 */ + 0x99, /* 10011001 */ + 0xbd, /* 10111101 */ + 0xbd, /* 10111101 */ + 0x99, /* 10011001 */ + 0xc3, /* 11000011 */ + 0xff, /* 11111111 */ + + /* + * 11 0x0b '^K' + */ + 0x0f, /* 00001111 */ + 0x07, /* 00000111 */ + 0x0f, /* 00001111 */ + 0x7d, /* 01111101 */ + 0xcc, /* 11001100 */ + 0xcc, /* 11001100 */ + 0xcc, /* 11001100 */ + 0x78, /* 01111000 */ + + /* + * 12 0x0c '^L' + */ + 0x3c, /* 00111100 */ + 0x66, /* 01100110 */ + 0x66, /* 01100110 */ + 0x66, /* 01100110 */ + 0x3c, /* 00111100 */ + 0x18, /* 00011000 */ + 0x7e, /* 01111110 */ + 0x18, /* 00011000 */ + + /* + * 13 0x0d '^M' + */ + 0x3f, /* 00111111 */ + 0x33, /* 00110011 */ + 0x3f, /* 00111111 */ + 0x30, /* 00110000 */ + 0x30, /* 00110000 */ + 0x70, /* 01110000 */ + 0xf0, /* 11110000 */ + 0xe0, /* 11100000 */ + + /* + * 14 0x0e '^N' + */ + 0x7f, /* 01111111 */ + 0x63, /* 01100011 */ + 0x7f, /* 01111111 */ + 0x63, /* 01100011 */ + 0x63, /* 01100011 */ + 0x67, /* 01100111 */ + 0xe6, /* 11100110 */ + 0xc0, /* 11000000 */ + + /* + * 15 0x0f '^O' + */ + 0x18, /* 00011000 */ + 0xdb, /* 11011011 */ + 0x3c, /* 00111100 */ + 0xe7, /* 11100111 */ + 0xe7, /* 11100111 */ + 0x3c, /* 00111100 */ + 0xdb, /* 11011011 */ + 0x18, /* 00011000 */ + + /* + * 16 0x10 '^P' + */ + 0x80, /* 10000000 */ + 0xe0, /* 11100000 */ + 0xf8, /* 11111000 */ + 0xfe, /* 11111110 */ + 0xf8, /* 11111000 */ + 0xe0, /* 11100000 */ + 0x80, /* 10000000 */ + 0x00, /* 00000000 */ + + /* + * 17 0x11 '^Q' + */ + 0x02, /* 00000010 */ + 0x0e, /* 00001110 */ + 0x3e, /* 00111110 */ + 0xfe, /* 11111110 */ + 0x3e, /* 00111110 */ + 0x0e, /* 00001110 */ + 0x02, /* 00000010 */ + 0x00, /* 00000000 */ + + /* + * 18 0x12 '^R' + */ + 0x18, /* 00011000 */ + 0x3c, /* 00111100 */ + 0x7e, /* 01111110 */ + 0x18, /* 00011000 */ + 0x18, /* 00011000 */ + 0x7e, /* 01111110 */ + 0x3c, /* 00111100 */ + 0x18, /* 00011000 */ + + /* + * 19 0x13 '^S' + */ + 0x66, /* 01100110 */ + 0x66, /* 01100110 */ + 0x66, /* 01100110 */ + 0x66, /* 01100110 */ + 0x66, /* 01100110 */ + 0x00, /* 00000000 */ + 0x66, /* 01100110 */ + 0x00, /* 00000000 */ + + /* + * 20 0x14 '^T' + */ + 0x7f, /* 01111111 */ + 0xdb, /* 11011011 */ + 0xdb, /* 11011011 */ + 0x7b, /* 01111011 */ + 0x1b, /* 00011011 */ + 0x1b, /* 00011011 */ + 0x1b, /* 00011011 */ + 0x00, /* 00000000 */ + + /* + * 21 0x15 '^U' + */ + 0x3e, /* 00111110 */ + 0x61, /* 01100001 */ + 0x3c, /* 00111100 */ + 0x66, /* 01100110 */ + 0x66, /* 01100110 */ + 0x3c, /* 00111100 */ + 0x86, /* 10000110 */ + 0x7c, /* 01111100 */ + + /* + * 22 0x16 '^V' + */ + 0x00, /* 00000000 */ + 0x00, /* 00000000 */ + 0x00, /* 00000000 */ + 0x00, /* 00000000 */ + 0x7e, /* 01111110 */ + 0x7e, /* 01111110 */ + 0x7e, /* 01111110 */ + 0x00, /* 00000000 */ + + /* + * 23 0x17 '^W' + */ + 0x18, /* 00011000 */ + 0x3c, /* 00111100 */ + 0x7e, /* 01111110 */ + 0x18, /* 00011000 */ + 0x7e, /* 01111110 */ + 0x3c, /* 00111100 */ + 0x18, /* 00011000 */ + 0xff, /* 11111111 */ + + /* + * 24 0x18 '^X' + */ + 0x18, /* 00011000 */ + 0x3c, /* 00111100 */ + 0x7e, /* 01111110 */ + 0x18, /* 00011000 */ + 0x18, /* 00011000 */ + 0x18, /* 00011000 */ + 0x18, /* 00011000 */ + 0x00, /* 00000000 */ + + /* + * 25 0x19 '^Y' + */ + 0x18, /* 00011000 */ + 0x18, /* 00011000 */ + 0x18, /* 00011000 */ + 0x18, /* 00011000 */ + 0x7e, /* 01111110 */ + 0x3c, /* 00111100 */ + 0x18, /* 00011000 */ + 0x00, /* 00000000 */ + + /* + * 26 0x1a '^Z' + */ + 0x00, /* 00000000 */ + 0x18, /* 00011000 */ + 0x0c, /* 00001100 */ + 0xfe, /* 11111110 */ + 0x0c, /* 00001100 */ + 0x18, /* 00011000 */ + 0x00, /* 00000000 */ + 0x00, /* 00000000 */ + + /* + * 27 0x1b '^[' + */ + 0x00, /* 00000000 */ + 0x30, /* 00110000 */ + 0x60, /* 01100000 */ + 0xfe, /* 11111110 */ + 0x60, /* 01100000 */ + 0x30, /* 00110000 */ + 0x00, /* 00000000 */ + 0x00, /* 00000000 */ + + /* + * 28 0x1c '^\' + */ + 0x00, /* 00000000 */ + 0x00, /* 00000000 */ + 0xc0, /* 11000000 */ + 0xc0, /* 11000000 */ + 0xc0, /* 11000000 */ + 0xfe, /* 11111110 */ + 0x00, /* 00000000 */ + 0x00, /* 00000000 */ + + /* + * 29 0x1d '^]' + */ + 0x00, /* 00000000 */ + 0x24, /* 00100100 */ + 0x66, /* 01100110 */ + 0xff, /* 11111111 */ + 0x66, /* 01100110 */ + 0x24, /* 00100100 */ + 0x00, /* 00000000 */ + 0x00, /* 00000000 */ + + /* + * 30 0x1e '^^' + */ + 0x00, /* 00000000 */ + 0x18, /* 00011000 */ + 0x3c, /* 00111100 */ + 0x7e, /* 01111110 */ + 0xff, /* 11111111 */ + 0xff, /* 11111111 */ + 0x00, /* 00000000 */ + 0x00, /* 00000000 */ + + /* + * 31 0x1f '^_' + */ + 0x00, /* 00000000 */ + 0xff, /* 11111111 */ + 0xff, /* 11111111 */ + 0x7e, /* 01111110 */ + 0x3c, /* 00111100 */ + 0x18, /* 00011000 */ + 0x00, /* 00000000 */ + 0x00, /* 00000000 */ + + /* + * 32 0x20 ' ' + */ + 0x00, /* 00000000 */ + 0x00, /* 00000000 */ + 0x00, /* 00000000 */ + 0x00, /* 00000000 */ + 0x00, /* 00000000 */ + 0x00, /* 00000000 */ + 0x00, /* 00000000 */ + 0x00, /* 00000000 */ + + /* + * 33 0x21 '!' + */ + 0x18, /* 00011000 */ + 0x3c, /* 00111100 */ + 0x3c, /* 00111100 */ + 0x18, /* 00011000 */ + 0x18, /* 00011000 */ + 0x00, /* 00000000 */ + 0x18, /* 00011000 */ + 0x00, /* 00000000 */ + + /* + * 34 0x22 '"' + */ + 0x66, /* 01100110 */ + 0x66, /* 01100110 */ + 0x24, /* 00100100 */ + 0x00, /* 00000000 */ + 0x00, /* 00000000 */ + 0x00, /* 00000000 */ + 0x00, /* 00000000 */ + 0x00, /* 00000000 */ + + /* + * 35 0x23 '#' + */ + 0x6c, /* 01101100 */ + 0x6c, /* 01101100 */ + 0xfe, /* 11111110 */ + 0x6c, /* 01101100 */ + 0xfe, /* 11111110 */ + 0x6c, /* 01101100 */ + 0x6c, /* 01101100 */ + 0x00, /* 00000000 */ + + /* + * 36 0x24 '$' + */ + 0x18, /* 00011000 */ + 0x3e, /* 00111110 */ + 0x60, /* 01100000 */ + 0x3c, /* 00111100 */ + 0x06, /* 00000110 */ + 0x7c, /* 01111100 */ + 0x18, /* 00011000 */ + 0x00, /* 00000000 */ + + /* + * 37 0x25 '%' + */ + 0x00, /* 00000000 */ + 0xc6, /* 11000110 */ + 0xcc, /* 11001100 */ + 0x18, /* 00011000 */ + 0x30, /* 00110000 */ + 0x66, /* 01100110 */ + 0xc6, /* 11000110 */ + 0x00, /* 00000000 */ + + /* + * 38 0x26 '&' + */ + 0x38, /* 00111000 */ + 0x6c, /* 01101100 */ + 0x38, /* 00111000 */ + 0x76, /* 01110110 */ + 0xdc, /* 11011100 */ + 0xcc, /* 11001100 */ + 0x76, /* 01110110 */ + 0x00, /* 00000000 */ + + /* + * 39 0x27 ''' + */ + 0x18, /* 00011000 */ + 0x18, /* 00011000 */ + 0x30, /* 00110000 */ + 0x00, /* 00000000 */ + 0x00, /* 00000000 */ + 0x00, /* 00000000 */ + 0x00, /* 00000000 */ + 0x00, /* 00000000 */ + + /* + * 40 0x28 '(' + */ + 0x0c, /* 00001100 */ + 0x18, /* 00011000 */ + 0x30, /* 00110000 */ + 0x30, /* 00110000 */ + 0x30, /* 00110000 */ + 0x18, /* 00011000 */ + 0x0c, /* 00001100 */ + 0x00, /* 00000000 */ + + /* + * 41 0x29 ')' + */ + 0x30, /* 00110000 */ + 0x18, /* 00011000 */ + 0x0c, /* 00001100 */ + 0x0c, /* 00001100 */ + 0x0c, /* 00001100 */ + 0x18, /* 00011000 */ + 0x30, /* 00110000 */ + 0x00, /* 00000000 */ + + /* + * 42 0x2a '*' + */ + 0x00, /* 00000000 */ + 0x66, /* 01100110 */ + 0x3c, /* 00111100 */ + 0xff, /* 11111111 */ + 0x3c, /* 00111100 */ + 0x66, /* 01100110 */ + 0x00, /* 00000000 */ + 0x00, /* 00000000 */ + + /* + * 43 0x2b '+' + */ + 0x00, /* 00000000 */ + 0x18, /* 00011000 */ + 0x18, /* 00011000 */ + 0x7e, /* 01111110 */ + 0x18, /* 00011000 */ + 0x18, /* 00011000 */ + 0x00, /* 00000000 */ + 0x00, /* 00000000 */ + + /* + * 44 0x2c ',' + */ + 0x00, /* 00000000 */ + 0x00, /* 00000000 */ + 0x00, /* 00000000 */ + 0x00, /* 00000000 */ + 0x00, /* 00000000 */ + 0x18, /* 00011000 */ + 0x18, /* 00011000 */ + 0x30, /* 00110000 */ + + /* + * 45 0x2d '-' + */ + 0x00, /* 00000000 */ + 0x00, /* 00000000 */ + 0x00, /* 00000000 */ + 0x7e, /* 01111110 */ + 0x00, /* 00000000 */ + 0x00, /* 00000000 */ + 0x00, /* 00000000 */ + 0x00, /* 00000000 */ + + /* + * 46 0x2e '.' + */ + 0x00, /* 00000000 */ + 0x00, /* 00000000 */ + 0x00, /* 00000000 */ + 0x00, /* 00000000 */ + 0x00, /* 00000000 */ + 0x18, /* 00011000 */ + 0x18, /* 00011000 */ + 0x00, /* 00000000 */ + + /* + * 47 0x2f '/' + */ + 0x06, /* 00000110 */ + 0x0c, /* 00001100 */ + 0x18, /* 00011000 */ + 0x30, /* 00110000 */ + 0x60, /* 01100000 */ + 0xc0, /* 11000000 */ + 0x80, /* 10000000 */ + 0x00, /* 00000000 */ + + /* + * 48 0x30 '0' + */ + 0x38, /* 00111000 */ + 0x6c, /* 01101100 */ + 0xc6, /* 11000110 */ + 0xd6, /* 11010110 */ + 0xc6, /* 11000110 */ + 0x6c, /* 01101100 */ + 0x38, /* 00111000 */ + 0x00, /* 00000000 */ + + /* + * 49 0x31 '1' + */ + 0x18, /* 00011000 */ + 0x38, /* 00111000 */ + 0x18, /* 00011000 */ + 0x18, /* 00011000 */ + 0x18, /* 00011000 */ + 0x18, /* 00011000 */ + 0x7e, /* 01111110 */ + 0x00, /* 00000000 */ + + /* + * 50 0x32 '2' + */ + 0x7c, /* 01111100 */ + 0xc6, /* 11000110 */ + 0x06, /* 00000110 */ + 0x1c, /* 00011100 */ + 0x30, /* 00110000 */ + 0x66, /* 01100110 */ + 0xfe, /* 11111110 */ + 0x00, /* 00000000 */ + + /* + * 51 0x33 '3' + */ + 0x7c, /* 01111100 */ + 0xc6, /* 11000110 */ + 0x06, /* 00000110 */ + 0x3c, /* 00111100 */ + 0x06, /* 00000110 */ + 0xc6, /* 11000110 */ + 0x7c, /* 01111100 */ + 0x00, /* 00000000 */ + + /* + * 52 0x34 '4' + */ + 0x1c, /* 00011100 */ + 0x3c, /* 00111100 */ + 0x6c, /* 01101100 */ + 0xcc, /* 11001100 */ + 0xfe, /* 11111110 */ + 0x0c, /* 00001100 */ + 0x1e, /* 00011110 */ + 0x00, /* 00000000 */ + + /* + * 53 0x35 '5' + */ + 0xfe, /* 11111110 */ + 0xc0, /* 11000000 */ + 0xc0, /* 11000000 */ + 0xfc, /* 11111100 */ + 0x06, /* 00000110 */ + 0xc6, /* 11000110 */ + 0x7c, /* 01111100 */ + 0x00, /* 00000000 */ + + /* + * 54 0x36 '6' + */ + 0x38, /* 00111000 */ + 0x60, /* 01100000 */ + 0xc0, /* 11000000 */ + 0xfc, /* 11111100 */ + 0xc6, /* 11000110 */ + 0xc6, /* 11000110 */ + 0x7c, /* 01111100 */ + 0x00, /* 00000000 */ + + /* + * 55 0x37 '7' + */ + 0xfe, /* 11111110 */ + 0xc6, /* 11000110 */ + 0x0c, /* 00001100 */ + 0x18, /* 00011000 */ + 0x30, /* 00110000 */ + 0x30, /* 00110000 */ + 0x30, /* 00110000 */ + 0x00, /* 00000000 */ + + /* + * 56 0x38 '8' + */ + 0x7c, /* 01111100 */ + 0xc6, /* 11000110 */ + 0xc6, /* 11000110 */ + 0x7c, /* 01111100 */ + 0xc6, /* 11000110 */ + 0xc6, /* 11000110 */ + 0x7c, /* 01111100 */ + 0x00, /* 00000000 */ + + /* + * 57 0x39 '9' + */ + 0x7c, /* 01111100 */ + 0xc6, /* 11000110 */ + 0xc6, /* 11000110 */ + 0x7e, /* 01111110 */ + 0x06, /* 00000110 */ + 0x0c, /* 00001100 */ + 0x78, /* 01111000 */ + 0x00, /* 00000000 */ + + /* + * 58 0x3a ':' + */ + 0x00, /* 00000000 */ + 0x18, /* 00011000 */ + 0x18, /* 00011000 */ + 0x00, /* 00000000 */ + 0x00, /* 00000000 */ + 0x18, /* 00011000 */ + 0x18, /* 00011000 */ + 0x00, /* 00000000 */ + + /* + * 59 0x3b ';' + */ + 0x00, /* 00000000 */ + 0x18, /* 00011000 */ + 0x18, /* 00011000 */ + 0x00, /* 00000000 */ + 0x00, /* 00000000 */ + 0x18, /* 00011000 */ + 0x18, /* 00011000 */ + 0x30, /* 00110000 */ + + /* + * 60 0x3c '<' + */ + 0x06, /* 00000110 */ + 0x0c, /* 00001100 */ + 0x18, /* 00011000 */ + 0x30, /* 00110000 */ + 0x18, /* 00011000 */ + 0x0c, /* 00001100 */ + 0x06, /* 00000110 */ + 0x00, /* 00000000 */ + + /* + * 61 0x3d '=' + */ + 0x00, /* 00000000 */ + 0x00, /* 00000000 */ + 0x7e, /* 01111110 */ + 0x00, /* 00000000 */ + 0x00, /* 00000000 */ + 0x7e, /* 01111110 */ + 0x00, /* 00000000 */ + 0x00, /* 00000000 */ + + /* + * 62 0x3e '>' + */ + 0x60, /* 01100000 */ + 0x30, /* 00110000 */ + 0x18, /* 00011000 */ + 0x0c, /* 00001100 */ + 0x18, /* 00011000 */ + 0x30, /* 00110000 */ + 0x60, /* 01100000 */ + 0x00, /* 00000000 */ + + /* + * 63 0x3f '?' + */ + 0x7c, /* 01111100 */ + 0xc6, /* 11000110 */ + 0x0c, /* 00001100 */ + 0x18, /* 00011000 */ + 0x18, /* 00011000 */ + 0x00, /* 00000000 */ + 0x18, /* 00011000 */ + 0x00, /* 00000000 */ + + /* + * 64 0x40 '@' + */ + 0x7c, /* 01111100 */ + 0xc6, /* 11000110 */ + 0xde, /* 11011110 */ + 0xde, /* 11011110 */ + 0xde, /* 11011110 */ + 0xc0, /* 11000000 */ + 0x78, /* 01111000 */ + 0x00, /* 00000000 */ + + /* + * 65 0x41 'A' + */ + 0x38, /* 00111000 */ + 0x6c, /* 01101100 */ + 0xc6, /* 11000110 */ + 0xfe, /* 11111110 */ + 0xc6, /* 11000110 */ + 0xc6, /* 11000110 */ + 0xc6, /* 11000110 */ + 0x00, /* 00000000 */ + + /* + * 66 0x42 'B' + */ + 0xfc, /* 11111100 */ + 0x66, /* 01100110 */ + 0x66, /* 01100110 */ + 0x7c, /* 01111100 */ + 0x66, /* 01100110 */ + 0x66, /* 01100110 */ + 0xfc, /* 11111100 */ + 0x00, /* 00000000 */ + + /* + * 67 0x43 'C' + */ + 0x3c, /* 00111100 */ + 0x66, /* 01100110 */ + 0xc0, /* 11000000 */ + 0xc0, /* 11000000 */ + 0xc0, /* 11000000 */ + 0x66, /* 01100110 */ + 0x3c, /* 00111100 */ + 0x00, /* 00000000 */ + + /* + * 68 0x44 'D' + */ + 0xf8, /* 11111000 */ + 0x6c, /* 01101100 */ + 0x66, /* 01100110 */ + 0x66, /* 01100110 */ + 0x66, /* 01100110 */ + 0x6c, /* 01101100 */ + 0xf8, /* 11111000 */ + 0x00, /* 00000000 */ + + /* + * 69 0x45 'E' + */ + 0xfe, /* 11111110 */ + 0x62, /* 01100010 */ + 0x68, /* 01101000 */ + 0x78, /* 01111000 */ + 0x68, /* 01101000 */ + 0x62, /* 01100010 */ + 0xfe, /* 11111110 */ + 0x00, /* 00000000 */ + + /* + * 70 0x46 'F' + */ + 0xfe, /* 11111110 */ + 0x62, /* 01100010 */ + 0x68, /* 01101000 */ + 0x78, /* 01111000 */ + 0x68, /* 01101000 */ + 0x60, /* 01100000 */ + 0xf0, /* 11110000 */ + 0x00, /* 00000000 */ + + /* + * 71 0x47 'G' + */ + 0x3c, /* 00111100 */ + 0x66, /* 01100110 */ + 0xc0, /* 11000000 */ + 0xc0, /* 11000000 */ + 0xce, /* 11001110 */ + 0x66, /* 01100110 */ + 0x3a, /* 00111010 */ + 0x00, /* 00000000 */ + + /* + * 72 0x48 'H' + */ + 0xc6, /* 11000110 */ + 0xc6, /* 11000110 */ + 0xc6, /* 11000110 */ + 0xfe, /* 11111110 */ + 0xc6, /* 11000110 */ + 0xc6, /* 11000110 */ + 0xc6, /* 11000110 */ + 0x00, /* 00000000 */ + + /* + * 73 0x49 'I' + */ + 0x3c, /* 00111100 */ + 0x18, /* 00011000 */ + 0x18, /* 00011000 */ + 0x18, /* 00011000 */ + 0x18, /* 00011000 */ + 0x18, /* 00011000 */ + 0x3c, /* 00111100 */ + 0x00, /* 00000000 */ + + /* + * 74 0x4a 'J' + */ + 0x1e, /* 00011110 */ + 0x0c, /* 00001100 */ + 0x0c, /* 00001100 */ + 0x0c, /* 00001100 */ + 0xcc, /* 11001100 */ + 0xcc, /* 11001100 */ + 0x78, /* 01111000 */ + 0x00, /* 00000000 */ + + /* + * 75 0x4b 'K' + */ + 0xe6, /* 11100110 */ + 0x66, /* 01100110 */ + 0x6c, /* 01101100 */ + 0x78, /* 01111000 */ + 0x6c, /* 01101100 */ + 0x66, /* 01100110 */ + 0xe6, /* 11100110 */ + 0x00, /* 00000000 */ + + /* + * 76 0x4c 'L' + */ + 0xf0, /* 11110000 */ + 0x60, /* 01100000 */ + 0x60, /* 01100000 */ + 0x60, /* 01100000 */ + 0x62, /* 01100010 */ + 0x66, /* 01100110 */ + 0xfe, /* 11111110 */ + 0x00, /* 00000000 */ + + /* + * 77 0x4d 'M' + */ + 0xc6, /* 11000110 */ + 0xee, /* 11101110 */ + 0xfe, /* 11111110 */ + 0xfe, /* 11111110 */ + 0xd6, /* 11010110 */ + 0xc6, /* 11000110 */ + 0xc6, /* 11000110 */ + 0x00, /* 00000000 */ + + /* + * 78 0x4e 'N' + */ + 0xc6, /* 11000110 */ + 0xe6, /* 11100110 */ + 0xf6, /* 11110110 */ + 0xde, /* 11011110 */ + 0xce, /* 11001110 */ + 0xc6, /* 11000110 */ + 0xc6, /* 11000110 */ + 0x00, /* 00000000 */ + + /* + * 79 0x4f 'O' + */ + 0x7c, /* 01111100 */ + 0xc6, /* 11000110 */ + 0xc6, /* 11000110 */ + 0xc6, /* 11000110 */ + 0xc6, /* 11000110 */ + 0xc6, /* 11000110 */ + 0x7c, /* 01111100 */ + 0x00, /* 00000000 */ + + /* + * 80 0x50 'P' + */ + 0xfc, /* 11111100 */ + 0x66, /* 01100110 */ + 0x66, /* 01100110 */ + 0x7c, /* 01111100 */ + 0x60, /* 01100000 */ + 0x60, /* 01100000 */ + 0xf0, /* 11110000 */ + 0x00, /* 00000000 */ + + /* + * 81 0x51 'Q' + */ + 0x7c, /* 01111100 */ + 0xc6, /* 11000110 */ + 0xc6, /* 11000110 */ + 0xc6, /* 11000110 */ + 0xc6, /* 11000110 */ + 0xce, /* 11001110 */ + 0x7c, /* 01111100 */ + 0x0e, /* 00001110 */ + + /* + * 82 0x52 'R' + */ + 0xfc, /* 11111100 */ + 0x66, /* 01100110 */ + 0x66, /* 01100110 */ + 0x7c, /* 01111100 */ + 0x6c, /* 01101100 */ + 0x66, /* 01100110 */ + 0xe6, /* 11100110 */ + 0x00, /* 00000000 */ + + /* + * 83 0x53 'S' + */ + 0x3c, /* 00111100 */ + 0x66, /* 01100110 */ + 0x30, /* 00110000 */ + 0x18, /* 00011000 */ + 0x0c, /* 00001100 */ + 0x66, /* 01100110 */ + 0x3c, /* 00111100 */ + 0x00, /* 00000000 */ + + /* + * 84 0x54 'T' + */ + 0x7e, /* 01111110 */ + 0x7e, /* 01111110 */ + 0x5a, /* 01011010 */ + 0x18, /* 00011000 */ + 0x18, /* 00011000 */ + 0x18, /* 00011000 */ + 0x3c, /* 00111100 */ + 0x00, /* 00000000 */ + + /* + * 85 0x55 'U' + */ + 0xc6, /* 11000110 */ + 0xc6, /* 11000110 */ + 0xc6, /* 11000110 */ + 0xc6, /* 11000110 */ + 0xc6, /* 11000110 */ + 0xc6, /* 11000110 */ + 0x7c, /* 01111100 */ + 0x00, /* 00000000 */ + + /* + * 86 0x56 'V' + */ + 0xc6, /* 11000110 */ + 0xc6, /* 11000110 */ + 0xc6, /* 11000110 */ + 0xc6, /* 11000110 */ + 0xc6, /* 11000110 */ + 0x6c, /* 01101100 */ + 0x38, /* 00111000 */ + 0x00, /* 00000000 */ + + /* + * 87 0x57 'W' + */ + 0xc6, /* 11000110 */ + 0xc6, /* 11000110 */ + 0xc6, /* 11000110 */ + 0xd6, /* 11010110 */ + 0xd6, /* 11010110 */ + 0xfe, /* 11111110 */ + 0x6c, /* 01101100 */ + 0x00, /* 00000000 */ + + /* + * 88 0x58 'X' + */ + 0xc6, /* 11000110 */ + 0xc6, /* 11000110 */ + 0x6c, /* 01101100 */ + 0x38, /* 00111000 */ + 0x6c, /* 01101100 */ + 0xc6, /* 11000110 */ + 0xc6, /* 11000110 */ + 0x00, /* 00000000 */ + + /* + * 89 0x59 'Y' + */ + 0x66, /* 01100110 */ + 0x66, /* 01100110 */ + 0x66, /* 01100110 */ + 0x3c, /* 00111100 */ + 0x18, /* 00011000 */ + 0x18, /* 00011000 */ + 0x3c, /* 00111100 */ + 0x00, /* 00000000 */ + + /* + * 90 0x5a 'Z' + */ + 0xfe, /* 11111110 */ + 0xc6, /* 11000110 */ + 0x8c, /* 10001100 */ + 0x18, /* 00011000 */ + 0x32, /* 00110010 */ + 0x66, /* 01100110 */ + 0xfe, /* 11111110 */ + 0x00, /* 00000000 */ + + /* + * 91 0x5b '[' + */ + 0x3c, /* 00111100 */ + 0x30, /* 00110000 */ + 0x30, /* 00110000 */ + 0x30, /* 00110000 */ + 0x30, /* 00110000 */ + 0x30, /* 00110000 */ + 0x3c, /* 00111100 */ + 0x00, /* 00000000 */ + + /* + * 92 0x5c '\' + */ + 0xc0, /* 11000000 */ + 0x60, /* 01100000 */ + 0x30, /* 00110000 */ + 0x18, /* 00011000 */ + 0x0c, /* 00001100 */ + 0x06, /* 00000110 */ + 0x02, /* 00000010 */ + 0x00, /* 00000000 */ + + /* + * 93 0x5d ']' + */ + 0x3c, /* 00111100 */ + 0x0c, /* 00001100 */ + 0x0c, /* 00001100 */ + 0x0c, /* 00001100 */ + 0x0c, /* 00001100 */ + 0x0c, /* 00001100 */ + 0x3c, /* 00111100 */ + 0x00, /* 00000000 */ + + /* + * 94 0x5e '^' + */ + 0x10, /* 00010000 */ + 0x38, /* 00111000 */ + 0x6c, /* 01101100 */ + 0xc6, /* 11000110 */ + 0x00, /* 00000000 */ + 0x00, /* 00000000 */ + 0x00, /* 00000000 */ + 0x00, /* 00000000 */ + + /* + * 95 0x5f '_' + */ + 0x00, /* 00000000 */ + 0x00, /* 00000000 */ + 0x00, /* 00000000 */ + 0x00, /* 00000000 */ + 0x00, /* 00000000 */ + 0x00, /* 00000000 */ + 0x00, /* 00000000 */ + 0xff, /* 11111111 */ + + /* + * 96 0x60 '`' + */ + 0x30, /* 00110000 */ + 0x18, /* 00011000 */ + 0x0c, /* 00001100 */ + 0x00, /* 00000000 */ + 0x00, /* 00000000 */ + 0x00, /* 00000000 */ + 0x00, /* 00000000 */ + 0x00, /* 00000000 */ + + /* + * 97 0x61 'a' + */ + 0x00, /* 00000000 */ + 0x00, /* 00000000 */ + 0x78, /* 01111000 */ + 0x0c, /* 00001100 */ + 0x7c, /* 01111100 */ + 0xcc, /* 11001100 */ + 0x76, /* 01110110 */ + 0x00, /* 00000000 */ + + /* + * 98 0x62 'b' + */ + 0xe0, /* 11100000 */ + 0x60, /* 01100000 */ + 0x7c, /* 01111100 */ + 0x66, /* 01100110 */ + 0x66, /* 01100110 */ + 0x66, /* 01100110 */ + 0xdc, /* 11011100 */ + 0x00, /* 00000000 */ + + /* + * 99 0x63 'c' + */ + 0x00, /* 00000000 */ + 0x00, /* 00000000 */ + 0x7c, /* 01111100 */ + 0xc6, /* 11000110 */ + 0xc0, /* 11000000 */ + 0xc6, /* 11000110 */ + 0x7c, /* 01111100 */ + 0x00, /* 00000000 */ + + /* + * 100 0x64 'd' + */ + 0x1c, /* 00011100 */ + 0x0c, /* 00001100 */ + 0x7c, /* 01111100 */ + 0xcc, /* 11001100 */ + 0xcc, /* 11001100 */ + 0xcc, /* 11001100 */ + 0x76, /* 01110110 */ + 0x00, /* 00000000 */ + + /* + * 101 0x65 'e' + */ + 0x00, /* 00000000 */ + 0x00, /* 00000000 */ + 0x7c, /* 01111100 */ + 0xc6, /* 11000110 */ + 0xfe, /* 11111110 */ + 0xc0, /* 11000000 */ + 0x7c, /* 01111100 */ + 0x00, /* 00000000 */ + + /* + * 102 0x66 'f' + */ + 0x3c, /* 00111100 */ + 0x66, /* 01100110 */ + 0x60, /* 01100000 */ + 0xf8, /* 11111000 */ + 0x60, /* 01100000 */ + 0x60, /* 01100000 */ + 0xf0, /* 11110000 */ + 0x00, /* 00000000 */ + + /* + * 103 0x67 'g' + */ + 0x00, /* 00000000 */ + 0x00, /* 00000000 */ + 0x76, /* 01110110 */ + 0xcc, /* 11001100 */ + 0xcc, /* 11001100 */ + 0x7c, /* 01111100 */ + 0x0c, /* 00001100 */ + 0xf8, /* 11111000 */ + + /* + * 104 0x68 'h' + */ + 0xe0, /* 11100000 */ + 0x60, /* 01100000 */ + 0x6c, /* 01101100 */ + 0x76, /* 01110110 */ + 0x66, /* 01100110 */ + 0x66, /* 01100110 */ + 0xe6, /* 11100110 */ + 0x00, /* 00000000 */ + + /* + * 105 0x69 'i' + */ + 0x18, /* 00011000 */ + 0x00, /* 00000000 */ + 0x38, /* 00111000 */ + 0x18, /* 00011000 */ + 0x18, /* 00011000 */ + 0x18, /* 00011000 */ + 0x3c, /* 00111100 */ + 0x00, /* 00000000 */ + + /* + * 106 0x6a 'j' + */ + 0x06, /* 00000110 */ + 0x00, /* 00000000 */ + 0x06, /* 00000110 */ + 0x06, /* 00000110 */ + 0x06, /* 00000110 */ + 0x66, /* 01100110 */ + 0x66, /* 01100110 */ + 0x3c, /* 00111100 */ + + /* + * 107 0x6b 'k' + */ + 0xe0, /* 11100000 */ + 0x60, /* 01100000 */ + 0x66, /* 01100110 */ + 0x6c, /* 01101100 */ + 0x78, /* 01111000 */ + 0x6c, /* 01101100 */ + 0xe6, /* 11100110 */ + 0x00, /* 00000000 */ + + /* + * 108 0x6c 'l' + */ + 0x38, /* 00111000 */ + 0x18, /* 00011000 */ + 0x18, /* 00011000 */ + 0x18, /* 00011000 */ + 0x18, /* 00011000 */ + 0x18, /* 00011000 */ + 0x3c, /* 00111100 */ + 0x00, /* 00000000 */ + + /* + * 109 0x6d 'm' + */ + 0x00, /* 00000000 */ + 0x00, /* 00000000 */ + 0xec, /* 11101100 */ + 0xfe, /* 11111110 */ + 0xd6, /* 11010110 */ + 0xd6, /* 11010110 */ + 0xd6, /* 11010110 */ + 0x00, /* 00000000 */ + + /* + * 110 0x6e 'n' + */ + 0x00, /* 00000000 */ + 0x00, /* 00000000 */ + 0xdc, /* 11011100 */ + 0x66, /* 01100110 */ + 0x66, /* 01100110 */ + 0x66, /* 01100110 */ + 0x66, /* 01100110 */ + 0x00, /* 00000000 */ + + /* + * 111 0x6f 'o' + */ + 0x00, /* 00000000 */ + 0x00, /* 00000000 */ + 0x7c, /* 01111100 */ + 0xc6, /* 11000110 */ + 0xc6, /* 11000110 */ + 0xc6, /* 11000110 */ + 0x7c, /* 01111100 */ + 0x00, /* 00000000 */ + + /* + * 112 0x70 'p' + */ + 0x00, /* 00000000 */ + 0x00, /* 00000000 */ + 0xdc, /* 11011100 */ + 0x66, /* 01100110 */ + 0x66, /* 01100110 */ + 0x7c, /* 01111100 */ + 0x60, /* 01100000 */ + 0xf0, /* 11110000 */ + + /* + * 113 0x71 'q' + */ + 0x00, /* 00000000 */ + 0x00, /* 00000000 */ + 0x76, /* 01110110 */ + 0xcc, /* 11001100 */ + 0xcc, /* 11001100 */ + 0x7c, /* 01111100 */ + 0x0c, /* 00001100 */ + 0x1e, /* 00011110 */ + + /* + * 114 0x72 'r' + */ + 0x00, /* 00000000 */ + 0x00, /* 00000000 */ + 0xdc, /* 11011100 */ + 0x76, /* 01110110 */ + 0x60, /* 01100000 */ + 0x60, /* 01100000 */ + 0xf0, /* 11110000 */ + 0x00, /* 00000000 */ + + /* + * 115 0x73 's' + */ + 0x00, /* 00000000 */ + 0x00, /* 00000000 */ + 0x7e, /* 01111110 */ + 0xc0, /* 11000000 */ + 0x7c, /* 01111100 */ + 0x06, /* 00000110 */ + 0xfc, /* 11111100 */ + 0x00, /* 00000000 */ + + /* + * 116 0x74 't' + */ + 0x30, /* 00110000 */ + 0x30, /* 00110000 */ + 0xfc, /* 11111100 */ + 0x30, /* 00110000 */ + 0x30, /* 00110000 */ + 0x36, /* 00110110 */ + 0x1c, /* 00011100 */ + 0x00, /* 00000000 */ + + /* + * 117 0x75 'u' + */ + 0x00, /* 00000000 */ + 0x00, /* 00000000 */ + 0xcc, /* 11001100 */ + 0xcc, /* 11001100 */ + 0xcc, /* 11001100 */ + 0xcc, /* 11001100 */ + 0x76, /* 01110110 */ + 0x00, /* 00000000 */ + + /* + * 118 0x76 'v' + */ + 0x00, /* 00000000 */ + 0x00, /* 00000000 */ + 0xc6, /* 11000110 */ + 0xc6, /* 11000110 */ + 0xc6, /* 11000110 */ + 0x6c, /* 01101100 */ + 0x38, /* 00111000 */ + 0x00, /* 00000000 */ + + /* + * 119 0x77 'w' + */ + 0x00, /* 00000000 */ + 0x00, /* 00000000 */ + 0xc6, /* 11000110 */ + 0xd6, /* 11010110 */ + 0xd6, /* 11010110 */ + 0xfe, /* 11111110 */ + 0x6c, /* 01101100 */ + 0x00, /* 00000000 */ + + /* + * 120 0x78 'x' + */ + 0x00, /* 00000000 */ + 0x00, /* 00000000 */ + 0xc6, /* 11000110 */ + 0x6c, /* 01101100 */ + 0x38, /* 00111000 */ + 0x6c, /* 01101100 */ + 0xc6, /* 11000110 */ + 0x00, /* 00000000 */ + + /* + * 121 0x79 'y' + */ + 0x00, /* 00000000 */ + 0x00, /* 00000000 */ + 0xc6, /* 11000110 */ + 0xc6, /* 11000110 */ + 0xc6, /* 11000110 */ + 0x7e, /* 01111110 */ + 0x06, /* 00000110 */ + 0xfc, /* 11111100 */ + + /* + * 122 0x7a 'z' + */ + 0x00, /* 00000000 */ + 0x00, /* 00000000 */ + 0x7e, /* 01111110 */ + 0x4c, /* 01001100 */ + 0x18, /* 00011000 */ + 0x32, /* 00110010 */ + 0x7e, /* 01111110 */ + 0x00, /* 00000000 */ + + /* + * 123 0x7b '{' + */ + 0x0e, /* 00001110 */ + 0x18, /* 00011000 */ + 0x18, /* 00011000 */ + 0x70, /* 01110000 */ + 0x18, /* 00011000 */ + 0x18, /* 00011000 */ + 0x0e, /* 00001110 */ + 0x00, /* 00000000 */ + + /* + * 124 0x7c '|' + */ + 0x18, /* 00011000 */ + 0x18, /* 00011000 */ + 0x18, /* 00011000 */ + 0x18, /* 00011000 */ + 0x18, /* 00011000 */ + 0x18, /* 00011000 */ + 0x18, /* 00011000 */ + 0x00, /* 00000000 */ + + /* + * 125 0x7d '}' + */ + 0x70, /* 01110000 */ + 0x18, /* 00011000 */ + 0x18, /* 00011000 */ + 0x0e, /* 00001110 */ + 0x18, /* 00011000 */ + 0x18, /* 00011000 */ + 0x70, /* 01110000 */ + 0x00, /* 00000000 */ + + /* + * 126 0x7e '~' + */ + 0x76, /* 01110110 */ + 0xdc, /* 11011100 */ + 0x00, /* 00000000 */ + 0x00, /* 00000000 */ + 0x00, /* 00000000 */ + 0x00, /* 00000000 */ + 0x00, /* 00000000 */ + 0x00, /* 00000000 */ + + /* + * 127 0x7f '' + */ + 0x00, /* 00000000 */ + 0x10, /* 00010000 */ + 0x38, /* 00111000 */ + 0x6c, /* 01101100 */ + 0xc6, /* 11000110 */ + 0xc6, /* 11000110 */ + 0xfe, /* 11111110 */ + 0x00, /* 00000000 */ + + /* + * 128 0x80 '�' + */ + 0x7c, /* 01111100 */ + 0xc6, /* 11000110 */ + 0xc0, /* 11000000 */ + 0xc0, /* 11000000 */ + 0xc6, /* 11000110 */ + 0x7c, /* 01111100 */ + 0x0c, /* 00001100 */ + 0x78, /* 01111000 */ + + /* + * 129 0x81 '�' + */ + 0xcc, /* 11001100 */ + 0x00, /* 00000000 */ + 0xcc, /* 11001100 */ + 0xcc, /* 11001100 */ + 0xcc, /* 11001100 */ + 0xcc, /* 11001100 */ + 0x76, /* 01110110 */ + 0x00, /* 00000000 */ + + /* + * 130 0x82 '�' + */ + 0x0c, /* 00001100 */ + 0x18, /* 00011000 */ + 0x7c, /* 01111100 */ + 0xc6, /* 11000110 */ + 0xfe, /* 11111110 */ + 0xc0, /* 11000000 */ + 0x7c, /* 01111100 */ + 0x00, /* 00000000 */ + + /* + * 131 0x83 '�' + */ + 0x7c, /* 01111100 */ + 0x82, /* 10000010 */ + 0x78, /* 01111000 */ + 0x0c, /* 00001100 */ + 0x7c, /* 01111100 */ + 0xcc, /* 11001100 */ + 0x76, /* 01110110 */ + 0x00, /* 00000000 */ + + /* + * 132 0x84 '�' + */ + 0xc6, /* 11000110 */ + 0x00, /* 00000000 */ + 0x78, /* 01111000 */ + 0x0c, /* 00001100 */ + 0x7c, /* 01111100 */ + 0xcc, /* 11001100 */ + 0x76, /* 01110110 */ + 0x00, /* 00000000 */ + + /* + * 133 0x85 '�' + */ + 0x30, /* 00110000 */ + 0x18, /* 00011000 */ + 0x78, /* 01111000 */ + 0x0c, /* 00001100 */ + 0x7c, /* 01111100 */ + 0xcc, /* 11001100 */ + 0x76, /* 01110110 */ + 0x00, /* 00000000 */ + + /* + * 134 0x86 '�' + */ + 0x30, /* 00110000 */ + 0x30, /* 00110000 */ + 0x78, /* 01111000 */ + 0x0c, /* 00001100 */ + 0x7c, /* 01111100 */ + 0xcc, /* 11001100 */ + 0x76, /* 01110110 */ + 0x00, /* 00000000 */ + + /* + * 135 0x87 '�' + */ + 0x00, /* 00000000 */ + 0x00, /* 00000000 */ + 0x7e, /* 01111110 */ + 0xc0, /* 11000000 */ + 0xc0, /* 11000000 */ + 0x7e, /* 01111110 */ + 0x0c, /* 00001100 */ + 0x38, /* 00111000 */ + + /* + * 136 0x88 '�' + */ + 0x7c, /* 01111100 */ + 0x82, /* 10000010 */ + 0x7c, /* 01111100 */ + 0xc6, /* 11000110 */ + 0xfe, /* 11111110 */ + 0xc0, /* 11000000 */ + 0x7c, /* 01111100 */ + 0x00, /* 00000000 */ + + /* + * 137 0x89 '�' + */ + 0xc6, /* 11000110 */ + 0x00, /* 00000000 */ + 0x7c, /* 01111100 */ + 0xc6, /* 11000110 */ + 0xfe, /* 11111110 */ + 0xc0, /* 11000000 */ + 0x7c, /* 01111100 */ + 0x00, /* 00000000 */ + + /* + * 138 0x8a '�' + */ + 0x30, /* 00110000 */ + 0x18, /* 00011000 */ + 0x7c, /* 01111100 */ + 0xc6, /* 11000110 */ + 0xfe, /* 11111110 */ + 0xc0, /* 11000000 */ + 0x7c, /* 01111100 */ + 0x00, /* 00000000 */ + + /* + * 139 0x8b '�' + */ + 0x66, /* 01100110 */ + 0x00, /* 00000000 */ + 0x38, /* 00111000 */ + 0x18, /* 00011000 */ + 0x18, /* 00011000 */ + 0x18, /* 00011000 */ + 0x3c, /* 00111100 */ + 0x00, /* 00000000 */ + + /* + * 140 0x8c '�' + */ + 0x7c, /* 01111100 */ + 0x82, /* 10000010 */ + 0x38, /* 00111000 */ + 0x18, /* 00011000 */ + 0x18, /* 00011000 */ + 0x18, /* 00011000 */ + 0x3c, /* 00111100 */ + 0x00, /* 00000000 */ + + /* + * 141 0x8d '�' + */ + 0x30, /* 00110000 */ + 0x18, /* 00011000 */ + 0x00, /* 00000000 */ + 0x38, /* 00111000 */ + 0x18, /* 00011000 */ + 0x18, /* 00011000 */ + 0x3c, /* 00111100 */ + 0x00, /* 00000000 */ + + /* + * 142 0x8e '�' + */ + 0xc6, /* 11000110 */ + 0x38, /* 00111000 */ + 0x6c, /* 01101100 */ + 0xc6, /* 11000110 */ + 0xfe, /* 11111110 */ + 0xc6, /* 11000110 */ + 0xc6, /* 11000110 */ + 0x00, /* 00000000 */ + + /* + * 143 0x8f '�' + */ + 0x38, /* 00111000 */ + 0x6c, /* 01101100 */ + 0x7c, /* 01111100 */ + 0xc6, /* 11000110 */ + 0xfe, /* 11111110 */ + 0xc6, /* 11000110 */ + 0xc6, /* 11000110 */ + 0x00, /* 00000000 */ + + /* + * 144 0x90 '�' + */ + 0x18, /* 00011000 */ + 0x30, /* 00110000 */ + 0xfe, /* 11111110 */ + 0xc0, /* 11000000 */ + 0xf8, /* 11111000 */ + 0xc0, /* 11000000 */ + 0xfe, /* 11111110 */ + 0x00, /* 00000000 */ + + /* + * 145 0x91 '�' + */ + 0x00, /* 00000000 */ + 0x00, /* 00000000 */ + 0x7e, /* 01111110 */ + 0x18, /* 00011000 */ + 0x7e, /* 01111110 */ + 0xd8, /* 11011000 */ + 0x7e, /* 01111110 */ + 0x00, /* 00000000 */ + + /* + * 146 0x92 '�' + */ + 0x3e, /* 00111110 */ + 0x6c, /* 01101100 */ + 0xcc, /* 11001100 */ + 0xfe, /* 11111110 */ + 0xcc, /* 11001100 */ + 0xcc, /* 11001100 */ + 0xce, /* 11001110 */ + 0x00, /* 00000000 */ + + /* + * 147 0x93 '�' + */ + 0x7c, /* 01111100 */ + 0x82, /* 10000010 */ + 0x7c, /* 01111100 */ + 0xc6, /* 11000110 */ + 0xc6, /* 11000110 */ + 0xc6, /* 11000110 */ + 0x7c, /* 01111100 */ + 0x00, /* 00000000 */ + + /* + * 148 0x94 '�' + */ + 0xc6, /* 11000110 */ + 0x00, /* 00000000 */ + 0x7c, /* 01111100 */ + 0xc6, /* 11000110 */ + 0xc6, /* 11000110 */ + 0xc6, /* 11000110 */ + 0x7c, /* 01111100 */ + 0x00, /* 00000000 */ + + /* + * 149 0x95 '�' + */ + 0x30, /* 00110000 */ + 0x18, /* 00011000 */ + 0x7c, /* 01111100 */ + 0xc6, /* 11000110 */ + 0xc6, /* 11000110 */ + 0xc6, /* 11000110 */ + 0x7c, /* 01111100 */ + 0x00, /* 00000000 */ + + /* + * 150 0x96 '�' + */ + 0x78, /* 01111000 */ + 0x84, /* 10000100 */ + 0x00, /* 00000000 */ + 0xcc, /* 11001100 */ + 0xcc, /* 11001100 */ + 0xcc, /* 11001100 */ + 0x76, /* 01110110 */ + 0x00, /* 00000000 */ + + /* + * 151 0x97 '�' + */ + 0x60, /* 01100000 */ + 0x30, /* 00110000 */ + 0xcc, /* 11001100 */ + 0xcc, /* 11001100 */ + 0xcc, /* 11001100 */ + 0xcc, /* 11001100 */ + 0x76, /* 01110110 */ + 0x00, /* 00000000 */ + + /* + * 152 0x98 '�' + */ + 0xc6, /* 11000110 */ + 0x00, /* 00000000 */ + 0xc6, /* 11000110 */ + 0xc6, /* 11000110 */ + 0xc6, /* 11000110 */ + 0x7e, /* 01111110 */ + 0x06, /* 00000110 */ + 0xfc, /* 11111100 */ + + /* + * 153 0x99 '�' + */ + 0xc6, /* 11000110 */ + 0x38, /* 00111000 */ + 0x6c, /* 01101100 */ + 0xc6, /* 11000110 */ + 0xc6, /* 11000110 */ + 0x6c, /* 01101100 */ + 0x38, /* 00111000 */ + 0x00, /* 00000000 */ + + /* + * 154 0x9a '�' + */ + 0xc6, /* 11000110 */ + 0x00, /* 00000000 */ + 0xc6, /* 11000110 */ + 0xc6, /* 11000110 */ + 0xc6, /* 11000110 */ + 0xc6, /* 11000110 */ + 0x7c, /* 01111100 */ + 0x00, /* 00000000 */ + + /* + * 155 0x9b '�' + */ + 0x18, /* 00011000 */ + 0x18, /* 00011000 */ + 0x7e, /* 01111110 */ + 0xc0, /* 11000000 */ + 0xc0, /* 11000000 */ + 0x7e, /* 01111110 */ + 0x18, /* 00011000 */ + 0x18, /* 00011000 */ + + /* + * 156 0x9c '�' + */ + 0x38, /* 00111000 */ + 0x6c, /* 01101100 */ + 0x64, /* 01100100 */ + 0xf0, /* 11110000 */ + 0x60, /* 01100000 */ + 0x66, /* 01100110 */ + 0xfc, /* 11111100 */ + 0x00, /* 00000000 */ + + /* + * 157 0x9d '�' + */ + 0x66, /* 01100110 */ + 0x66, /* 01100110 */ + 0x3c, /* 00111100 */ + 0x7e, /* 01111110 */ + 0x18, /* 00011000 */ + 0x7e, /* 01111110 */ + 0x18, /* 00011000 */ + 0x18, /* 00011000 */ + + /* + * 158 0x9e '�' + */ + 0xf8, /* 11111000 */ + 0xcc, /* 11001100 */ + 0xcc, /* 11001100 */ + 0xfa, /* 11111010 */ + 0xc6, /* 11000110 */ + 0xcf, /* 11001111 */ + 0xc6, /* 11000110 */ + 0xc7, /* 11000111 */ + + /* + * 159 0x9f '�' + */ + 0x0e, /* 00001110 */ + 0x1b, /* 00011011 */ + 0x18, /* 00011000 */ + 0x3c, /* 00111100 */ + 0x18, /* 00011000 */ + 0xd8, /* 11011000 */ + 0x70, /* 01110000 */ + 0x00, /* 00000000 */ + + /* + * 160 0xa0 '�' + */ + 0x18, /* 00011000 */ + 0x30, /* 00110000 */ + 0x78, /* 01111000 */ + 0x0c, /* 00001100 */ + 0x7c, /* 01111100 */ + 0xcc, /* 11001100 */ + 0x76, /* 01110110 */ + 0x00, /* 00000000 */ + + /* + * 161 0xa1 '�' + */ + 0x0c, /* 00001100 */ + 0x18, /* 00011000 */ + 0x00, /* 00000000 */ + 0x38, /* 00111000 */ + 0x18, /* 00011000 */ + 0x18, /* 00011000 */ + 0x3c, /* 00111100 */ + 0x00, /* 00000000 */ + + /* + * 162 0xa2 '�' + */ + 0x0c, /* 00001100 */ + 0x18, /* 00011000 */ + 0x7c, /* 01111100 */ + 0xc6, /* 11000110 */ + 0xc6, /* 11000110 */ + 0xc6, /* 11000110 */ + 0x7c, /* 01111100 */ + 0x00, /* 00000000 */ + + /* + * 163 0xa3 '�' + */ + 0x18, /* 00011000 */ + 0x30, /* 00110000 */ + 0xcc, /* 11001100 */ + 0xcc, /* 11001100 */ + 0xcc, /* 11001100 */ + 0xcc, /* 11001100 */ + 0x76, /* 01110110 */ + 0x00, /* 00000000 */ + + /* + * 164 0xa4 '�' + */ + 0x76, /* 01110110 */ + 0xdc, /* 11011100 */ + 0x00, /* 00000000 */ + 0xdc, /* 11011100 */ + 0x66, /* 01100110 */ + 0x66, /* 01100110 */ + 0x66, /* 01100110 */ + 0x00, /* 00000000 */ + + /* + * 165 0xa5 '�' + */ + 0x76, /* 01110110 */ + 0xdc, /* 11011100 */ + 0x00, /* 00000000 */ + 0xe6, /* 11100110 */ + 0xf6, /* 11110110 */ + 0xde, /* 11011110 */ + 0xce, /* 11001110 */ + 0x00, /* 00000000 */ + + /* + * 166 0xa6 '�' + */ + 0x3c, /* 00111100 */ + 0x6c, /* 01101100 */ + 0x6c, /* 01101100 */ + 0x3e, /* 00111110 */ + 0x00, /* 00000000 */ + 0x7e, /* 01111110 */ + 0x00, /* 00000000 */ + 0x00, /* 00000000 */ + + /* + * 167 0xa7 '�' + */ + 0x38, /* 00111000 */ + 0x6c, /* 01101100 */ + 0x6c, /* 01101100 */ + 0x38, /* 00111000 */ + 0x00, /* 00000000 */ + 0x7c, /* 01111100 */ + 0x00, /* 00000000 */ + 0x00, /* 00000000 */ + + /* + * 168 0xa8 '�' + */ + 0x18, /* 00011000 */ + 0x00, /* 00000000 */ + 0x18, /* 00011000 */ + 0x18, /* 00011000 */ + 0x30, /* 00110000 */ + 0x63, /* 01100011 */ + 0x3e, /* 00111110 */ + 0x00, /* 00000000 */ + + /* + * 169 0xa9 '�' + */ + 0x00, /* 00000000 */ + 0x00, /* 00000000 */ + 0x00, /* 00000000 */ + 0xfe, /* 11111110 */ + 0xc0, /* 11000000 */ + 0xc0, /* 11000000 */ + 0x00, /* 00000000 */ + 0x00, /* 00000000 */ + + /* + * 170 0xaa '�' + */ + 0x00, /* 00000000 */ + 0x00, /* 00000000 */ + 0x00, /* 00000000 */ + 0xfe, /* 11111110 */ + 0x06, /* 00000110 */ + 0x06, /* 00000110 */ + 0x00, /* 00000000 */ + 0x00, /* 00000000 */ + + /* + * 171 0xab '�' + */ + 0x63, /* 01100011 */ + 0xe6, /* 11100110 */ + 0x6c, /* 01101100 */ + 0x7e, /* 01111110 */ + 0x33, /* 00110011 */ + 0x66, /* 01100110 */ + 0xcc, /* 11001100 */ + 0x0f, /* 00001111 */ + + /* + * 172 0xac '�' + */ + 0x63, /* 01100011 */ + 0xe6, /* 11100110 */ + 0x6c, /* 01101100 */ + 0x7a, /* 01111010 */ + 0x36, /* 00110110 */ + 0x6a, /* 01101010 */ + 0xdf, /* 11011111 */ + 0x06, /* 00000110 */ + + /* + * 173 0xad '�' + */ + 0x18, /* 00011000 */ + 0x00, /* 00000000 */ + 0x18, /* 00011000 */ + 0x18, /* 00011000 */ + 0x3c, /* 00111100 */ + 0x3c, /* 00111100 */ + 0x18, /* 00011000 */ + 0x00, /* 00000000 */ + + /* + * 174 0xae '�' + */ + 0x00, /* 00000000 */ + 0x33, /* 00110011 */ + 0x66, /* 01100110 */ + 0xcc, /* 11001100 */ + 0x66, /* 01100110 */ + 0x33, /* 00110011 */ + 0x00, /* 00000000 */ + 0x00, /* 00000000 */ + + /* + * 175 0xaf '�' + */ + 0x00, /* 00000000 */ + 0xcc, /* 11001100 */ + 0x66, /* 01100110 */ + 0x33, /* 00110011 */ + 0x66, /* 01100110 */ + 0xcc, /* 11001100 */ + 0x00, /* 00000000 */ + 0x00, /* 00000000 */ + + /* + * 176 0xb0 '�' + */ + 0x22, /* 00100010 */ + 0x88, /* 10001000 */ + 0x22, /* 00100010 */ + 0x88, /* 10001000 */ + 0x22, /* 00100010 */ + 0x88, /* 10001000 */ + 0x22, /* 00100010 */ + 0x88, /* 10001000 */ + + /* + * 177 0xb1 '�' + */ + 0x55, /* 01010101 */ + 0xaa, /* 10101010 */ + 0x55, /* 01010101 */ + 0xaa, /* 10101010 */ + 0x55, /* 01010101 */ + 0xaa, /* 10101010 */ + 0x55, /* 01010101 */ + 0xaa, /* 10101010 */ + + /* + * 178 0xb2 '�' + */ + 0x77, /* 01110111 */ + 0xdd, /* 11011101 */ + 0x77, /* 01110111 */ + 0xdd, /* 11011101 */ + 0x77, /* 01110111 */ + 0xdd, /* 11011101 */ + 0x77, /* 01110111 */ + 0xdd, /* 11011101 */ + + /* + * 179 0xb3 '�' + */ + 0x18, /* 00011000 */ + 0x18, /* 00011000 */ + 0x18, /* 00011000 */ + 0x18, /* 00011000 */ + 0x18, /* 00011000 */ + 0x18, /* 00011000 */ + 0x18, /* 00011000 */ + 0x18, /* 00011000 */ + + /* + * 180 0xb4 '�' + */ + 0x18, /* 00011000 */ + 0x18, /* 00011000 */ + 0x18, /* 00011000 */ + 0x18, /* 00011000 */ + 0xf8, /* 11111000 */ + 0x18, /* 00011000 */ + 0x18, /* 00011000 */ + 0x18, /* 00011000 */ + + /* + * 181 0xb5 '�' + */ + 0x18, /* 00011000 */ + 0x18, /* 00011000 */ + 0xf8, /* 11111000 */ + 0x18, /* 00011000 */ + 0xf8, /* 11111000 */ + 0x18, /* 00011000 */ + 0x18, /* 00011000 */ + 0x18, /* 00011000 */ + + /* + * 182 0xb6 '�' + */ + 0x36, /* 00110110 */ + 0x36, /* 00110110 */ + 0x36, /* 00110110 */ + 0x36, /* 00110110 */ + 0xf6, /* 11110110 */ + 0x36, /* 00110110 */ + 0x36, /* 00110110 */ + 0x36, /* 00110110 */ + + /* + * 183 0xb7 '�' + */ + 0x00, /* 00000000 */ + 0x00, /* 00000000 */ + 0x00, /* 00000000 */ + 0x00, /* 00000000 */ + 0xfe, /* 11111110 */ + 0x36, /* 00110110 */ + 0x36, /* 00110110 */ + 0x36, /* 00110110 */ + + /* + * 184 0xb8 '�' + */ + 0x00, /* 00000000 */ + 0x00, /* 00000000 */ + 0xf8, /* 11111000 */ + 0x18, /* 00011000 */ + 0xf8, /* 11111000 */ + 0x18, /* 00011000 */ + 0x18, /* 00011000 */ + 0x18, /* 00011000 */ + + /* + * 185 0xb9 '�' + */ + 0x36, /* 00110110 */ + 0x36, /* 00110110 */ + 0xf6, /* 11110110 */ + 0x06, /* 00000110 */ + 0xf6, /* 11110110 */ + 0x36, /* 00110110 */ + 0x36, /* 00110110 */ + 0x36, /* 00110110 */ + + /* + * 186 0xba '�' + */ + 0x36, /* 00110110 */ + 0x36, /* 00110110 */ + 0x36, /* 00110110 */ + 0x36, /* 00110110 */ + 0x36, /* 00110110 */ + 0x36, /* 00110110 */ + 0x36, /* 00110110 */ + 0x36, /* 00110110 */ + + /* + * 187 0xbb '�' + */ + 0x00, /* 00000000 */ + 0x00, /* 00000000 */ + 0xfe, /* 11111110 */ + 0x06, /* 00000110 */ + 0xf6, /* 11110110 */ + 0x36, /* 00110110 */ + 0x36, /* 00110110 */ + 0x36, /* 00110110 */ + + /* + * 188 0xbc '�' + */ + 0x36, /* 00110110 */ + 0x36, /* 00110110 */ + 0xf6, /* 11110110 */ + 0x06, /* 00000110 */ + 0xfe, /* 11111110 */ + 0x00, /* 00000000 */ + 0x00, /* 00000000 */ + 0x00, /* 00000000 */ + + /* + * 189 0xbd '�' + */ + 0x36, /* 00110110 */ + 0x36, /* 00110110 */ + 0x36, /* 00110110 */ + 0x36, /* 00110110 */ + 0xfe, /* 11111110 */ + 0x00, /* 00000000 */ + 0x00, /* 00000000 */ + 0x00, /* 00000000 */ + + /* + * 190 0xbe '�' + */ + 0x18, /* 00011000 */ + 0x18, /* 00011000 */ + 0xf8, /* 11111000 */ + 0x18, /* 00011000 */ + 0xf8, /* 11111000 */ + 0x00, /* 00000000 */ + 0x00, /* 00000000 */ + 0x00, /* 00000000 */ + + /* + * 191 0xbf '�' + */ + 0x00, /* 00000000 */ + 0x00, /* 00000000 */ + 0x00, /* 00000000 */ + 0x00, /* 00000000 */ + 0xf8, /* 11111000 */ + 0x18, /* 00011000 */ + 0x18, /* 00011000 */ + 0x18, /* 00011000 */ + + /* + * 192 0xc0 '�' + */ + 0x18, /* 00011000 */ + 0x18, /* 00011000 */ + 0x18, /* 00011000 */ + 0x18, /* 00011000 */ + 0x1f, /* 00011111 */ + 0x00, /* 00000000 */ + 0x00, /* 00000000 */ + 0x00, /* 00000000 */ + + /* + * 193 0xc1 '�' + */ + 0x18, /* 00011000 */ + 0x18, /* 00011000 */ + 0x18, /* 00011000 */ + 0x18, /* 00011000 */ + 0xff, /* 11111111 */ + 0x00, /* 00000000 */ + 0x00, /* 00000000 */ + 0x00, /* 00000000 */ + + /* + * 194 0xc2 '�' + */ + 0x00, /* 00000000 */ + 0x00, /* 00000000 */ + 0x00, /* 00000000 */ + 0x00, /* 00000000 */ + 0xff, /* 11111111 */ + 0x18, /* 00011000 */ + 0x18, /* 00011000 */ + 0x18, /* 00011000 */ + + /* + * 195 0xc3 '�' + */ + 0x18, /* 00011000 */ + 0x18, /* 00011000 */ + 0x18, /* 00011000 */ + 0x18, /* 00011000 */ + 0x1f, /* 00011111 */ + 0x18, /* 00011000 */ + 0x18, /* 00011000 */ + 0x18, /* 00011000 */ + + /* + * 196 0xc4 '�' + */ + 0x00, /* 00000000 */ + 0x00, /* 00000000 */ + 0x00, /* 00000000 */ + 0x00, /* 00000000 */ + 0xff, /* 11111111 */ + 0x00, /* 00000000 */ + 0x00, /* 00000000 */ + 0x00, /* 00000000 */ + + /* + * 197 0xc5 '�' + */ + 0x18, /* 00011000 */ + 0x18, /* 00011000 */ + 0x18, /* 00011000 */ + 0x18, /* 00011000 */ + 0xff, /* 11111111 */ + 0x18, /* 00011000 */ + 0x18, /* 00011000 */ + 0x18, /* 00011000 */ + + /* + * 198 0xc6 '�' + */ + 0x18, /* 00011000 */ + 0x18, /* 00011000 */ + 0x1f, /* 00011111 */ + 0x18, /* 00011000 */ + 0x1f, /* 00011111 */ + 0x18, /* 00011000 */ + 0x18, /* 00011000 */ + 0x18, /* 00011000 */ + + /* + * 199 0xc7 '�' + */ + 0x36, /* 00110110 */ + 0x36, /* 00110110 */ + 0x36, /* 00110110 */ + 0x36, /* 00110110 */ + 0x37, /* 00110111 */ + 0x36, /* 00110110 */ + 0x36, /* 00110110 */ + 0x36, /* 00110110 */ + + /* + * 200 0xc8 '�' + */ + 0x36, /* 00110110 */ + 0x36, /* 00110110 */ + 0x37, /* 00110111 */ + 0x30, /* 00110000 */ + 0x3f, /* 00111111 */ + 0x00, /* 00000000 */ + 0x00, /* 00000000 */ + 0x00, /* 00000000 */ + + /* + * 201 0xc9 '�' + */ + 0x00, /* 00000000 */ + 0x00, /* 00000000 */ + 0x3f, /* 00111111 */ + 0x30, /* 00110000 */ + 0x37, /* 00110111 */ + 0x36, /* 00110110 */ + 0x36, /* 00110110 */ + 0x36, /* 00110110 */ + + /* + * 202 0xca '�' + */ + 0x36, /* 00110110 */ + 0x36, /* 00110110 */ + 0xf7, /* 11110111 */ + 0x00, /* 00000000 */ + 0xff, /* 11111111 */ + 0x00, /* 00000000 */ + 0x00, /* 00000000 */ + 0x00, /* 00000000 */ + + /* + * 203 0xcb '�' + */ + 0x00, /* 00000000 */ + 0x00, /* 00000000 */ + 0xff, /* 11111111 */ + 0x00, /* 00000000 */ + 0xf7, /* 11110111 */ + 0x36, /* 00110110 */ + 0x36, /* 00110110 */ + 0x36, /* 00110110 */ + + /* + * 204 0xcc '�' + */ + 0x36, /* 00110110 */ + 0x36, /* 00110110 */ + 0x37, /* 00110111 */ + 0x30, /* 00110000 */ + 0x37, /* 00110111 */ + 0x36, /* 00110110 */ + 0x36, /* 00110110 */ + 0x36, /* 00110110 */ + + /* + * 205 0xcd '�' + */ + 0x00, /* 00000000 */ + 0x00, /* 00000000 */ + 0xff, /* 11111111 */ + 0x00, /* 00000000 */ + 0xff, /* 11111111 */ + 0x00, /* 00000000 */ + 0x00, /* 00000000 */ + 0x00, /* 00000000 */ + + /* + * 206 0xce '�' + */ + 0x36, /* 00110110 */ + 0x36, /* 00110110 */ + 0xf7, /* 11110111 */ + 0x00, /* 00000000 */ + 0xf7, /* 11110111 */ + 0x36, /* 00110110 */ + 0x36, /* 00110110 */ + 0x36, /* 00110110 */ + + /* + * 207 0xcf '�' + */ + 0x18, /* 00011000 */ + 0x18, /* 00011000 */ + 0xff, /* 11111111 */ + 0x00, /* 00000000 */ + 0xff, /* 11111111 */ + 0x00, /* 00000000 */ + 0x00, /* 00000000 */ + 0x00, /* 00000000 */ + + /* + * 208 0xd0 '�' + */ + 0x36, /* 00110110 */ + 0x36, /* 00110110 */ + 0x36, /* 00110110 */ + 0x36, /* 00110110 */ + 0xff, /* 11111111 */ + 0x00, /* 00000000 */ + 0x00, /* 00000000 */ + 0x00, /* 00000000 */ + + /* + * 209 0xd1 '�' + */ + 0x00, /* 00000000 */ + 0x00, /* 00000000 */ + 0xff, /* 11111111 */ + 0x00, /* 00000000 */ + 0xff, /* 11111111 */ + 0x18, /* 00011000 */ + 0x18, /* 00011000 */ + 0x18, /* 00011000 */ + + /* + * 210 0xd2 '�' + */ + 0x00, /* 00000000 */ + 0x00, /* 00000000 */ + 0x00, /* 00000000 */ + 0x00, /* 00000000 */ + 0xff, /* 11111111 */ + 0x36, /* 00110110 */ + 0x36, /* 00110110 */ + 0x36, /* 00110110 */ + + /* + * 211 0xd3 '�' + */ + 0x36, /* 00110110 */ + 0x36, /* 00110110 */ + 0x36, /* 00110110 */ + 0x36, /* 00110110 */ + 0x3f, /* 00111111 */ + 0x00, /* 00000000 */ + 0x00, /* 00000000 */ + 0x00, /* 00000000 */ + + /* + * 212 0xd4 '�' + */ + 0x18, /* 00011000 */ + 0x18, /* 00011000 */ + 0x1f, /* 00011111 */ + 0x18, /* 00011000 */ + 0x1f, /* 00011111 */ + 0x00, /* 00000000 */ + 0x00, /* 00000000 */ + 0x00, /* 00000000 */ + + /* + * 213 0xd5 '�' + */ + 0x00, /* 00000000 */ + 0x00, /* 00000000 */ + 0x1f, /* 00011111 */ + 0x18, /* 00011000 */ + 0x1f, /* 00011111 */ + 0x18, /* 00011000 */ + 0x18, /* 00011000 */ + 0x18, /* 00011000 */ + + /* + * 214 0xd6 '�' + */ + 0x00, /* 00000000 */ + 0x00, /* 00000000 */ + 0x00, /* 00000000 */ + 0x00, /* 00000000 */ + 0x3f, /* 00111111 */ + 0x36, /* 00110110 */ + 0x36, /* 00110110 */ + 0x36, /* 00110110 */ + + /* + * 215 0xd7 '�' + */ + 0x36, /* 00110110 */ + 0x36, /* 00110110 */ + 0x36, /* 00110110 */ + 0x36, /* 00110110 */ + 0xff, /* 11111111 */ + 0x36, /* 00110110 */ + 0x36, /* 00110110 */ + 0x36, /* 00110110 */ + + /* + * 216 0xd8 '�' + */ + 0x18, /* 00011000 */ + 0x18, /* 00011000 */ + 0xff, /* 11111111 */ + 0x18, /* 00011000 */ + 0xff, /* 11111111 */ + 0x18, /* 00011000 */ + 0x18, /* 00011000 */ + 0x18, /* 00011000 */ + + /* + * 217 0xd9 '�' + */ + 0x18, /* 00011000 */ + 0x18, /* 00011000 */ + 0x18, /* 00011000 */ + 0x18, /* 00011000 */ + 0xf8, /* 11111000 */ + 0x00, /* 00000000 */ + 0x00, /* 00000000 */ + 0x00, /* 00000000 */ + + /* + * 218 0xda '�' + */ + 0x00, /* 00000000 */ + 0x00, /* 00000000 */ + 0x00, /* 00000000 */ + 0x00, /* 00000000 */ + 0x1f, /* 00011111 */ + 0x18, /* 00011000 */ + 0x18, /* 00011000 */ + 0x18, /* 00011000 */ + + /* + * 219 0xdb '�' + */ + 0xff, /* 11111111 */ + 0xff, /* 11111111 */ + 0xff, /* 11111111 */ + 0xff, /* 11111111 */ + 0xff, /* 11111111 */ + 0xff, /* 11111111 */ + 0xff, /* 11111111 */ + 0xff, /* 11111111 */ + + /* + * 220 0xdc '�' + */ + 0x00, /* 00000000 */ + 0x00, /* 00000000 */ + 0x00, /* 00000000 */ + 0x00, /* 00000000 */ + 0xff, /* 11111111 */ + 0xff, /* 11111111 */ + 0xff, /* 11111111 */ + 0xff, /* 11111111 */ + + /* + * 221 0xdd '�' + */ + 0xf0, /* 11110000 */ + 0xf0, /* 11110000 */ + 0xf0, /* 11110000 */ + 0xf0, /* 11110000 */ + 0xf0, /* 11110000 */ + 0xf0, /* 11110000 */ + 0xf0, /* 11110000 */ + 0xf0, /* 11110000 */ + + /* + * 222 0xde '�' + */ + 0x0f, /* 00001111 */ + 0x0f, /* 00001111 */ + 0x0f, /* 00001111 */ + 0x0f, /* 00001111 */ + 0x0f, /* 00001111 */ + 0x0f, /* 00001111 */ + 0x0f, /* 00001111 */ + 0x0f, /* 00001111 */ + + /* + * 223 0xdf '�' + */ + 0xff, /* 11111111 */ + 0xff, /* 11111111 */ + 0xff, /* 11111111 */ + 0xff, /* 11111111 */ + 0x00, /* 00000000 */ + 0x00, /* 00000000 */ + 0x00, /* 00000000 */ + 0x00, /* 00000000 */ + + /* + * 224 0xe0 '�' + */ + 0x00, /* 00000000 */ + 0x00, /* 00000000 */ + 0x76, /* 01110110 */ + 0xdc, /* 11011100 */ + 0xc8, /* 11001000 */ + 0xdc, /* 11011100 */ + 0x76, /* 01110110 */ + 0x00, /* 00000000 */ + + /* + * 225 0xe1 '�' + */ + 0x78, /* 01111000 */ + 0xcc, /* 11001100 */ + 0xcc, /* 11001100 */ + 0xd8, /* 11011000 */ + 0xcc, /* 11001100 */ + 0xc6, /* 11000110 */ + 0xcc, /* 11001100 */ + 0x00, /* 00000000 */ + + /* + * 226 0xe2 '�' + */ + 0xfe, /* 11111110 */ + 0xc6, /* 11000110 */ + 0xc0, /* 11000000 */ + 0xc0, /* 11000000 */ + 0xc0, /* 11000000 */ + 0xc0, /* 11000000 */ + 0xc0, /* 11000000 */ + 0x00, /* 00000000 */ + + /* + * 227 0xe3 '�' + */ + 0x00, /* 00000000 */ + 0x00, /* 00000000 */ + 0xfe, /* 11111110 */ + 0x6c, /* 01101100 */ + 0x6c, /* 01101100 */ + 0x6c, /* 01101100 */ + 0x6c, /* 01101100 */ + 0x00, /* 00000000 */ + + /* + * 228 0xe4 '�' + */ + 0xfe, /* 11111110 */ + 0xc6, /* 11000110 */ + 0x60, /* 01100000 */ + 0x30, /* 00110000 */ + 0x60, /* 01100000 */ + 0xc6, /* 11000110 */ + 0xfe, /* 11111110 */ + 0x00, /* 00000000 */ + + /* + * 229 0xe5 '�' + */ + 0x00, /* 00000000 */ + 0x00, /* 00000000 */ + 0x7e, /* 01111110 */ + 0xd8, /* 11011000 */ + 0xd8, /* 11011000 */ + 0xd8, /* 11011000 */ + 0x70, /* 01110000 */ + 0x00, /* 00000000 */ + + /* + * 230 0xe6 '�' + */ + 0x00, /* 00000000 */ + 0x00, /* 00000000 */ + 0x66, /* 01100110 */ + 0x66, /* 01100110 */ + 0x66, /* 01100110 */ + 0x66, /* 01100110 */ + 0x7c, /* 01111100 */ + 0xc0, /* 11000000 */ + + /* + * 231 0xe7 '�' + */ + 0x00, /* 00000000 */ + 0x76, /* 01110110 */ + 0xdc, /* 11011100 */ + 0x18, /* 00011000 */ + 0x18, /* 00011000 */ + 0x18, /* 00011000 */ + 0x18, /* 00011000 */ + 0x00, /* 00000000 */ + + /* + * 232 0xe8 '�' + */ + 0x7e, /* 01111110 */ + 0x18, /* 00011000 */ + 0x3c, /* 00111100 */ + 0x66, /* 01100110 */ + 0x66, /* 01100110 */ + 0x3c, /* 00111100 */ + 0x18, /* 00011000 */ + 0x7e, /* 01111110 */ + + /* + * 233 0xe9 '�' + */ + 0x38, /* 00111000 */ + 0x6c, /* 01101100 */ + 0xc6, /* 11000110 */ + 0xfe, /* 11111110 */ + 0xc6, /* 11000110 */ + 0x6c, /* 01101100 */ + 0x38, /* 00111000 */ + 0x00, /* 00000000 */ + + /* + * 234 0xea '�' + */ + 0x38, /* 00111000 */ + 0x6c, /* 01101100 */ + 0xc6, /* 11000110 */ + 0xc6, /* 11000110 */ + 0x6c, /* 01101100 */ + 0x6c, /* 01101100 */ + 0xee, /* 11101110 */ + 0x00, /* 00000000 */ + + /* + * 235 0xeb '�' + */ + 0x0e, /* 00001110 */ + 0x18, /* 00011000 */ + 0x0c, /* 00001100 */ + 0x3e, /* 00111110 */ + 0x66, /* 01100110 */ + 0x66, /* 01100110 */ + 0x3c, /* 00111100 */ + 0x00, /* 00000000 */ + + /* + * 236 0xec '�' + */ + 0x00, /* 00000000 */ + 0x00, /* 00000000 */ + 0x7e, /* 01111110 */ + 0xdb, /* 11011011 */ + 0xdb, /* 11011011 */ + 0x7e, /* 01111110 */ + 0x00, /* 00000000 */ + 0x00, /* 00000000 */ + + /* + * 237 0xed '�' + */ + 0x06, /* 00000110 */ + 0x0c, /* 00001100 */ + 0x7e, /* 01111110 */ + 0xdb, /* 11011011 */ + 0xdb, /* 11011011 */ + 0x7e, /* 01111110 */ + 0x60, /* 01100000 */ + 0xc0, /* 11000000 */ + + /* + * 238 0xee '�' + */ + 0x1e, /* 00011110 */ + 0x30, /* 00110000 */ + 0x60, /* 01100000 */ + 0x7e, /* 01111110 */ + 0x60, /* 01100000 */ + 0x30, /* 00110000 */ + 0x1e, /* 00011110 */ + 0x00, /* 00000000 */ + + /* + * 239 0xef '�' + */ + 0x00, /* 00000000 */ + 0x7c, /* 01111100 */ + 0xc6, /* 11000110 */ + 0xc6, /* 11000110 */ + 0xc6, /* 11000110 */ + 0xc6, /* 11000110 */ + 0xc6, /* 11000110 */ + 0x00, /* 00000000 */ + + /* + * 240 0xf0 '�' + */ + 0x00, /* 00000000 */ + 0xfe, /* 11111110 */ + 0x00, /* 00000000 */ + 0xfe, /* 11111110 */ + 0x00, /* 00000000 */ + 0xfe, /* 11111110 */ + 0x00, /* 00000000 */ + 0x00, /* 00000000 */ + + /* + * 241 0xf1 '�' + */ + 0x18, /* 00011000 */ + 0x18, /* 00011000 */ + 0x7e, /* 01111110 */ + 0x18, /* 00011000 */ + 0x18, /* 00011000 */ + 0x00, /* 00000000 */ + 0x7e, /* 01111110 */ + 0x00, /* 00000000 */ + + /* + * 242 0xf2 '�' + */ + 0x30, /* 00110000 */ + 0x18, /* 00011000 */ + 0x0c, /* 00001100 */ + 0x18, /* 00011000 */ + 0x30, /* 00110000 */ + 0x00, /* 00000000 */ + 0x7e, /* 01111110 */ + 0x00, /* 00000000 */ + + /* + * 243 0xf3 '�' + */ + 0x0c, /* 00001100 */ + 0x18, /* 00011000 */ + 0x30, /* 00110000 */ + 0x18, /* 00011000 */ + 0x0c, /* 00001100 */ + 0x00, /* 00000000 */ + 0x7e, /* 01111110 */ + 0x00, /* 00000000 */ + + /* + * 244 0xf4 '�' + */ + 0x0e, /* 00001110 */ + 0x1b, /* 00011011 */ + 0x1b, /* 00011011 */ + 0x18, /* 00011000 */ + 0x18, /* 00011000 */ + 0x18, /* 00011000 */ + 0x18, /* 00011000 */ + 0x18, /* 00011000 */ + + /* + * 245 0xf5 '�' + */ + 0x18, /* 00011000 */ + 0x18, /* 00011000 */ + 0x18, /* 00011000 */ + 0x18, /* 00011000 */ + 0x18, /* 00011000 */ + 0xd8, /* 11011000 */ + 0xd8, /* 11011000 */ + 0x70, /* 01110000 */ + + /* + * 246 0xf6 '�' + */ + 0x00, /* 00000000 */ + 0x18, /* 00011000 */ + 0x00, /* 00000000 */ + 0x7e, /* 01111110 */ + 0x00, /* 00000000 */ + 0x18, /* 00011000 */ + 0x00, /* 00000000 */ + 0x00, /* 00000000 */ + + /* + * 247 0xf7 '�' + */ + 0x00, /* 00000000 */ + 0x76, /* 01110110 */ + 0xdc, /* 11011100 */ + 0x00, /* 00000000 */ + 0x76, /* 01110110 */ + 0xdc, /* 11011100 */ + 0x00, /* 00000000 */ + 0x00, /* 00000000 */ + + /* + * 248 0xf8 '�' + */ + 0x38, /* 00111000 */ + 0x6c, /* 01101100 */ + 0x6c, /* 01101100 */ + 0x38, /* 00111000 */ + 0x00, /* 00000000 */ + 0x00, /* 00000000 */ + 0x00, /* 00000000 */ + 0x00, /* 00000000 */ + + /* + * 249 0xf9 '�' + */ + 0x00, /* 00000000 */ + 0x00, /* 00000000 */ + 0x00, /* 00000000 */ + 0x18, /* 00011000 */ + 0x18, /* 00011000 */ + 0x00, /* 00000000 */ + 0x00, /* 00000000 */ + 0x00, /* 00000000 */ + + /* + * 250 0xfa '�' + */ + 0x00, /* 00000000 */ + 0x00, /* 00000000 */ + 0x00, /* 00000000 */ + 0x18, /* 00011000 */ + 0x00, /* 00000000 */ + 0x00, /* 00000000 */ + 0x00, /* 00000000 */ + 0x00, /* 00000000 */ + + /* + * 251 0xfb '�' + */ + 0x0f, /* 00001111 */ + 0x0c, /* 00001100 */ + 0x0c, /* 00001100 */ + 0x0c, /* 00001100 */ + 0xec, /* 11101100 */ + 0x6c, /* 01101100 */ + 0x3c, /* 00111100 */ + 0x1c, /* 00011100 */ + + /* + * 252 0xfc '�' + */ + 0x6c, /* 01101100 */ + 0x36, /* 00110110 */ + 0x36, /* 00110110 */ + 0x36, /* 00110110 */ + 0x36, /* 00110110 */ + 0x00, /* 00000000 */ + 0x00, /* 00000000 */ + 0x00, /* 00000000 */ + + /* + * 253 0xfd '�' + */ + 0x78, /* 01111000 */ + 0x0c, /* 00001100 */ + 0x18, /* 00011000 */ + 0x30, /* 00110000 */ + 0x7c, /* 01111100 */ + 0x00, /* 00000000 */ + 0x00, /* 00000000 */ + 0x00, /* 00000000 */ + + /* + * 254 0xfe '�' + */ + 0x00, /* 00000000 */ + 0x00, /* 00000000 */ + 0x3c, /* 00111100 */ + 0x3c, /* 00111100 */ + 0x3c, /* 00111100 */ + 0x3c, /* 00111100 */ + 0x00, /* 00000000 */ + 0x00, /* 00000000 */ + + /* + * 255 0xff ' ' + */ + 0x00, /* 00000000 */ + 0x00, /* 00000000 */ + 0x00, /* 00000000 */ + 0x00, /* 00000000 */ + 0x00, /* 00000000 */ + 0x00, /* 00000000 */ + 0x00, /* 00000000 */ + 0x00, /* 00000000 */ + +}; diff --git a/vendor/SDL3_gfx/SDL3_imageFilter.c b/vendor/SDL3_gfx/SDL3_imageFilter.c new file mode 100644 index 0000000..7417de3 --- /dev/null +++ b/vendor/SDL3_gfx/SDL3_imageFilter.c @@ -0,0 +1,7371 @@ +/* + +SDL3_imageFilter.c: byte-image "filter" routines + +Copyright (C) 2012-2014 Andreas Schiffler +Copyright (C) 2013 Sylvain Beucler + +This software is provided 'as-is', without any express or implied +warranty. In no event will the authors be held liable for any damages +arising from the use of this software. + +Permission is granted to anyone to use this software for any purpose, +including commercial applications, and to alter it and redistribute it +freely, subject to the following restrictions: + + 1. The origin of this software must not be misrepresented; you must not + claim that you wrote the original software. If you use this software + in a product, an acknowledgment in the product documentation would be + appreciated but is not required. + + 2. Altered source versions must be plainly marked as such, and must not be + misrepresented as being the original software. + + 3. This notice may not be removed or altered from any source + distribution. + +Andreas Schiffler -- aschiffler at ferzkopp dot net + +*/ + +/* + +Note: Uses inline x86 MMX or ASM optimizations if available and enabled. + +Note: Most of the MMX code is based on published routines +by Vladimir Kravtchenko at vk@cs.ubc.ca - credits go to +him for his work. + +*/ + +#include +#include +#include + +#include + +/* Use GCC intrinsics if available: they support both i386 and x86_64, + provide ASM-grade performances, and lift the PUSHA/POPA issues. */ +#ifdef __GNUC__ +# ifdef USE_MMX +# include +# endif +# include +#endif + +#include "SDL3_imageFilter.h" + +/*! +\brief Swaps the byte order in a 32bit integer (LSB becomes MSB, etc.). +*/ +#define SWAP_32(x) (((x) >> 24) | (((x) & 0x00ff0000) >> 8) | (((x) & 0x0000ff00) << 8) | ((x) << 24)) + +/* ------ Static variables ----- */ + +/*! +\brief Static state which enables the use of the MMX routines. Enabled by default +*/ +static int SDL_imageFilterUseMMX = 1; + +/* Detect GCC */ +#if defined(__GNUC__) +#define GCC__ +#endif + +/*! +\brief MMX detection routine (with override flag). + +\returns 1 of MMX was detected, 0 otherwise. +*/ +int SDL_imageFilterMMXdetect(void) +{ + /* Check override flag */ + if (SDL_imageFilterUseMMX == 0) { + return (0); + } + + return SDL_HasMMX(); +} + +/*! +\brief Disable MMX check for filter functions and and force to use non-MMX C based code. +*/ +void SDL_imageFilterMMXoff() +{ + SDL_imageFilterUseMMX = 0; +} + +/*! +\brief Enable MMX check for filter functions and use MMX code if available. +*/ +void SDL_imageFilterMMXon() +{ + SDL_imageFilterUseMMX = 1; +} + +/* ------------------------------------------------------------------------------------ */ + +/*! +\brief Internal MMX Filter using Add: D = saturation255(S1 + S2) + +\param Src1 Pointer to the start of the first source byte array (S1). +\param Src2 Pointer to the start of the second source byte array (S2). +\param Dest Pointer to the start of the destination byte array (D). +\param SrcLength The number of bytes in the source arrays. + +\return Returns 0 for success or -1 for error. +*/ +static int SDL_imageFilterAddMMX(unsigned char *Src1, unsigned char *Src2, unsigned char *Dest, unsigned int SrcLength) +{ +#ifdef USE_MMX +#if !defined(GCC__) + __asm + { + pusha + mov eax, Src1 /* load Src1 address into eax */ + mov ebx, Src2 /* load Src2 address into ebx */ + mov edi, Dest /* load Dest address into edi */ + mov ecx, SrcLength /* load loop counter (SIZE) into ecx */ + shr ecx, 3 /* counter/8 (MMX loads 8 bytes at a time) */ + align 16 /* 16 byte alignment of the loop entry */ +L1010: + movq mm1, [eax] /* load 8 bytes from Src1 into mm1 */ + paddusb mm1, [ebx] /* mm1=Src1+Src2 (add 8 bytes with saturation) */ + movq [edi], mm1 /* store result in Dest */ + add eax, 8 /* increase Src1, Src2 and Dest */ + add ebx, 8 /* register pointers by 8 */ + add edi, 8 + dec ecx /* decrease loop counter */ + jnz L1010 /* check loop termination, proceed if required */ + emms /* exit MMX state */ + popa + } +#else + /* i386 and x86_64 */ + __m64 *mSrc1 = (__m64*)Src1; + __m64 *mSrc2 = (__m64*)Src2; + __m64 *mDest = (__m64*)Dest; + int i; + for (i = 0; i < SrcLength/8; i++) { + *mDest = _m_paddusb(*mSrc1, *mSrc2); /* Src1+Src2 (add 8 bytes with saturation) */ + mSrc1++; + mSrc2++; + mDest++; + } + _m_empty(); /* clean MMX state */ +#endif + return (0); +#else + return (-1); +#endif +} + +/*! +\brief Filter using Add: D = saturation255(S1 + S2) + +\param Src1 Pointer to the start of the first source byte array (S1). +\param Src2 Pointer to the start of the second source byte array (S2). +\param Dest Pointer to the start of the destination byte array (D). +\param length The number of bytes in the source arrays. + +\return Returns 0 for success or -1 for error. +*/ +int SDL_imageFilterAdd(unsigned char *Src1, unsigned char *Src2, unsigned char *Dest, unsigned int length) +{ + unsigned int i, istart; + unsigned char *cursrc1, *cursrc2, *curdst; + int result; + + /* Validate input parameters */ + if ((Src1 == NULL) || (Src2 == NULL) || (Dest == NULL)) + return(-1); + if (length == 0) + return(0); + + if ((SDL_imageFilterMMXdetect()) && (length > 7)) { + + /* Use MMX assembly routine */ + SDL_imageFilterAddMMX(Src1, Src2, Dest, length); + + /* Check for unaligned bytes */ + if ((length & 7) > 0) { + /* Setup to process unaligned bytes */ + istart = length & 0xfffffff8; + cursrc1 = &Src1[istart]; + cursrc2 = &Src2[istart]; + curdst = &Dest[istart]; + } else { + /* No unaligned bytes - we are done */ + return (0); + } + } else { + /* Setup to process whole image */ + istart = 0; + cursrc1 = Src1; + cursrc2 = Src2; + curdst = Dest; + } + + /* C routine to process image */ + for (i = istart; i < length; i++) { + result = (int) *cursrc1 + (int) *cursrc2; + if (result > 255) + result = 255; + *curdst = (unsigned char) result; + /* Advance pointers */ + cursrc1++; + cursrc2++; + curdst++; + } + + return (0); +} + +/*! +\brief Internal MMX Filter using Mean: D = S1/2 + S2/2 + +\param Src1 Pointer to the start of the first source byte array (S1). +\param Src2 Pointer to the start of the second source byte array (S2). +\param Dest Pointer to the start of the destination byte array (D). +\param SrcLength The number of bytes in the source arrays. +\param Mask Mask array containing 8 bytes with 0x7F value. +] +\return Returns 0 for success or -1 for error. +*/ +static int SDL_imageFilterMeanMMX(unsigned char *Src1, unsigned char *Src2, unsigned char *Dest, unsigned int SrcLength, + unsigned char *Mask) +{ +#ifdef USE_MMX +#if !defined(GCC__) + __asm + { + pusha + mov edx, Mask /* load Mask address into edx */ + movq mm0, [edx] /* load Mask into mm0 */ + mov eax, Src1 /* load Src1 address into eax */ + mov ebx, Src2 /* load Src2 address into ebx */ + mov edi, Dest /* load Dest address into edi */ + mov ecx, SrcLength /* load loop counter (SIZE) into ecx */ + shr ecx, 3 /* counter/8 (MMX loads 8 bytes at a time) */ + align 16 /* 16 byte alignment of the loop entry */ +L21011: + movq mm1, [eax] /* load 8 bytes from Src1 into mm1 */ + movq mm2, [ebx] /* load 8 bytes from Src2 into mm2 */ + /* --- Byte shift via Word shift --- */ + psrlw mm1, 1 /* shift 4 WORDS of mm1 1 bit to the right */ + psrlw mm2, 1 /* shift 4 WORDS of mm2 1 bit to the right */ + pand mm1, mm0 // apply Mask to 8 BYTES of mm1 */ + /* byte 0x0f, 0xdb, 0xc8 */ + pand mm2, mm0 // apply Mask to 8 BYTES of mm2 */ + /* byte 0x0f, 0xdb, 0xd0 */ + paddusb mm1, mm2 /* mm1=mm1+mm2 (add 8 bytes with saturation) */ + movq [edi], mm1 /* store result in Dest */ + add eax, 8 /* increase Src1, Src2 and Dest */ + add ebx, 8 /* register pointers by 8 */ + add edi, 8 + dec ecx /* decrease loop counter */ + jnz L21011 /* check loop termination, proceed if required */ + emms /* exit MMX state */ + popa + } +#else + /* i386 and x86_64 */ + __m64 *mSrc1 = (__m64*)Src1; + __m64 *mSrc2 = (__m64*)Src2; + __m64 *mDest = (__m64*)Dest; + __m64 *mMask = (__m64*)Mask; + int i; + for (i = 0; i < SrcLength/8; i++) { + __m64 mm1 = *mSrc1, + mm2 = *mSrc2; + mm1 = _m_psrlwi(mm1, 1); /* shift 4 WORDS of mm1 1 bit to the right */ + mm2 = _m_psrlwi(mm2, 1); /* shift 4 WORDS of mm2 1 bit to the right */ + mm1 = _m_pand(mm1, *mMask); /* apply Mask to 8 BYTES of mm1 */ + mm2 = _m_pand(mm2, *mMask); /* apply Mask to 8 BYTES of mm2 */ + *mDest = _m_paddusb(mm1, mm2); /* mm1+mm2 (add 8 bytes with saturation) */ + mSrc1++; + mSrc2++; + mDest++; + } + _m_empty(); /* clean MMX state */ +#endif + return (0); +#else + return (-1); +#endif +} + +/*! +\brief Filter using Mean: D = S1/2 + S2/2 + +\param Src1 Pointer to the start of the first source byte array (S1). +\param Src2 Pointer to the start of the second source byte array (S2). +\param Dest Pointer to the start of the destination byte array (D). +\param length The number of bytes in the source arrays. + +\return Returns 0 for success or -1 for error. +*/ +int SDL_imageFilterMean(unsigned char *Src1, unsigned char *Src2, unsigned char *Dest, unsigned int length) +{ + static unsigned char Mask[8] = { 0x7F, 0x7F, 0x7F, 0x7F, 0x7F, 0x7F, 0x7F, 0x7F }; + unsigned int i, istart; + unsigned char *cursrc1, *cursrc2, *curdst; + int result; + + /* Validate input parameters */ + if ((Src1 == NULL) || (Src2 == NULL) || (Dest == NULL)) + return(-1); + if (length == 0) + return(0); + + if ((SDL_imageFilterMMXdetect()) && (length > 7)) { + /* MMX routine */ + SDL_imageFilterMeanMMX(Src1, Src2, Dest, length, Mask); + + /* Check for unaligned bytes */ + if ((length & 7) > 0) { + /* Setup to process unaligned bytes */ + istart = length & 0xfffffff8; + cursrc1 = &Src1[istart]; + cursrc2 = &Src2[istart]; + curdst = &Dest[istart]; + } else { + /* No unaligned bytes - we are done */ + return (0); + } + } else { + /* Setup to process whole image */ + istart = 0; + cursrc1 = Src1; + cursrc2 = Src2; + curdst = Dest; + } + + /* C routine to process image */ + for (i = istart; i < length; i++) { + result = (int) *cursrc1 / 2 + (int) *cursrc2 / 2; + *curdst = (unsigned char) result; + /* Advance pointers */ + cursrc1++; + cursrc2++; + curdst++; + } + + return (0); +} + +/*! +\brief Internal MMX Filter using Sub: D = saturation0(S1 - S2) + +\param Src1 Pointer to the start of the first source byte array (S1). +\param Src2 Pointer to the start of the second source byte array (S2). +\param Dest Pointer to the start of the destination byte array (D). +\param SrcLength The number of bytes in the source arrays. + +\return Returns 0 for success or -1 for error. +*/ +static int SDL_imageFilterSubMMX(unsigned char *Src1, unsigned char *Src2, unsigned char *Dest, unsigned int SrcLength) +{ +#ifdef USE_MMX +#if !defined(GCC__) + __asm + { + pusha + mov eax, Src1 /* load Src1 address into eax */ + mov ebx, Src2 /* load Src2 address into ebx */ + mov edi, Dest /* load Dest address into edi */ + mov ecx, SrcLength /* load loop counter (SIZE) into ecx */ + shr ecx, 3 /* counter/8 (MMX loads 8 bytes at a time) */ + align 16 /* 16 byte alignment of the loop entry */ +L1012: + movq mm1, [eax] /* load 8 bytes from Src1 into mm1 */ + psubusb mm1, [ebx] /* mm1=Src1-Src2 (sub 8 bytes with saturation) */ + movq [edi], mm1 /* store result in Dest */ + add eax, 8 /* increase Src1, Src2 and Dest */ + add ebx, 8 /* register pointers by 8 */ + add edi, 8 + dec ecx /* decrease loop counter */ + jnz L1012 /* check loop termination, proceed if required */ + emms /* exit MMX state */ + popa + } +#else + /* i386 and x86_64 */ + __m64 *mSrc1 = (__m64*)Src1; + __m64 *mSrc2 = (__m64*)Src2; + __m64 *mDest = (__m64*)Dest; + int i; + for (i = 0; i < SrcLength/8; i++) { + *mDest = _m_psubusb(*mSrc1, *mSrc2); /* Src1-Src2 (sub 8 bytes with saturation) */ + mSrc1++; + mSrc2++; + mDest++; + } + _m_empty(); /* clean MMX state */ +#endif + return (0); +#else + return (-1); +#endif +} + +/*! +\brief Filter using Sub: D = saturation0(S1 - S2) + +\param Src1 Pointer to the start of the first source byte array (S1). +\param Src2 Pointer to the start of the second source byte array (S2). +\param Dest Pointer to the start of the destination byte array (D). +\param length The number of bytes in the source arrays. + +\return Returns 0 for success or -1 for error. +*/ +int SDL_imageFilterSub(unsigned char *Src1, unsigned char *Src2, unsigned char *Dest, unsigned int length) +{ + unsigned int i, istart; + unsigned char *cursrc1, *cursrc2, *curdst; + int result; + + /* Validate input parameters */ + if ((Src1 == NULL) || (Src2 == NULL) || (Dest == NULL)) + return(-1); + if (length == 0) + return(0); + + if ((SDL_imageFilterMMXdetect()) && (length > 7)) { + /* MMX routine */ + SDL_imageFilterSubMMX(Src1, Src2, Dest, length); + + /* Check for unaligned bytes */ + if ((length & 7) > 0) { + /* Setup to process unaligned bytes */ + istart = length & 0xfffffff8; + cursrc1 = &Src1[istart]; + cursrc2 = &Src2[istart]; + curdst = &Dest[istart]; + } else { + /* No unaligned bytes - we are done */ + return (0); + } + } else { + /* Setup to process whole image */ + istart = 0; + cursrc1 = Src1; + cursrc2 = Src2; + curdst = Dest; + } + + /* C routine to process image */ + for (i = istart; i < length; i++) { + result = (int) *cursrc1 - (int) *cursrc2; + if (result < 0) + result = 0; + *curdst = (unsigned char) result; + /* Advance pointers */ + cursrc1++; + cursrc2++; + curdst++; + } + + return (0); +} + +/*! +\brief Internal MMX Filter using AbsDiff: D = | S1 - S2 | + +\param Src1 Pointer to the start of the first source byte array (S1). +\param Src2 Pointer to the start of the second source byte array (S2). +\param Dest Pointer to the start of the destination byte array (D). +\param SrcLength The number of bytes in the source arrays. + +\return Returns 0 for success or -1 for error. +*/ +static int SDL_imageFilterAbsDiffMMX(unsigned char *Src1, unsigned char *Src2, unsigned char *Dest, unsigned int SrcLength) +{ +#ifdef USE_MMX +#if !defined(GCC__) + __asm + { + pusha + mov eax, Src1 /* load Src1 address into eax */ + mov ebx, Src2 /* load Src2 address into ebx */ + mov edi, Dest /* load Dest address into edi */ + mov ecx, SrcLength /* load loop counter (SIZE) into ecx */ + shr ecx, 3 /* counter/8 (MMX loads 8 bytes at a time) */ + align 16 /* 16 byte alignment of the loop entry */ +L1013: + movq mm1, [eax] /* load 8 bytes from Src1 into mm1 */ + movq mm2, [ebx] /* load 8 bytes from Src2 into mm2 */ + psubusb mm1, [ebx] /* mm1=Src1-Src2 (sub 8 bytes with saturation) */ + psubusb mm2, [eax] /* mm2=Src2-Src1 (sub 8 bytes with saturation) */ + por mm1, mm2 /* combine both mm2 and mm1 results */ + movq [edi], mm1 /* store result in Dest */ + add eax, 8 /* increase Src1, Src2 and Dest */ + add ebx, 8 /* register pointers by 8 */ + add edi, 8 + dec ecx /* decrease loop counter */ + jnz L1013 /* check loop termination, proceed if required */ + emms /* exit MMX state */ + popa + } +#else + /* i386 and x86_64 */ + __m64 *mSrc1 = (__m64*)Src1; + __m64 *mSrc2 = (__m64*)Src2; + __m64 *mDest = (__m64*)Dest; + int i; + for (i = 0; i < SrcLength/8; i++) { + __m64 mm1 = _m_psubusb(*mSrc2, *mSrc1); /* Src1-Src2 (sub 8 bytes with saturation) */ + __m64 mm2 = _m_psubusb(*mSrc1, *mSrc2); /* Src2-Src1 (sub 8 bytes with saturation) */ + *mDest = _m_por(mm1, mm2); /* combine both mm2 and mm1 results */ + mSrc1++; + mSrc2++; + mDest++; + } + _m_empty(); /* clean MMX state */ +#endif + return (0); +#else + return (-1); +#endif +} + +/*! +\brief Filter using AbsDiff: D = | S1 - S2 | + +\param Src1 Pointer to the start of the first source byte array (S1). +\param Src2 Pointer to the start of the second source byte array (S2). +\param Dest Pointer to the start of the destination byte array (D). +\param length The number of bytes in the source arrays. + +\return Returns 0 for success or -1 for error. +*/ +int SDL_imageFilterAbsDiff(unsigned char *Src1, unsigned char *Src2, unsigned char *Dest, unsigned int length) +{ + unsigned int i, istart; + unsigned char *cursrc1, *cursrc2, *curdst; + int result; + + /* Validate input parameters */ + if ((Src1 == NULL) || (Src2 == NULL) || (Dest == NULL)) + return(-1); + if (length == 0) + return(0); + + if ((SDL_imageFilterMMXdetect()) && (length > 7)) { + /* MMX routine */ + SDL_imageFilterAbsDiffMMX(Src1, Src2, Dest, length); + + /* Check for unaligned bytes */ + if ((length & 7) > 0) { + /* Setup to process unaligned bytes */ + istart = length & 0xfffffff8; + cursrc1 = &Src1[istart]; + cursrc2 = &Src2[istart]; + curdst = &Dest[istart]; + } else { + /* No unaligned bytes - we are done */ + return (0); + } + } else { + /* Setup to process whole image */ + istart = 0; + cursrc1 = Src1; + cursrc2 = Src2; + curdst = Dest; + } + + /* C routine to process image */ + for (i = istart; i < length; i++) { + result = abs((int) *cursrc1 - (int) *cursrc2); + *curdst = (unsigned char) result; + /* Advance pointers */ + cursrc1++; + cursrc2++; + curdst++; + } + + return (0); +} + +/*! +\brief Internal MMX Filter using Mult: D = saturation255(S1 * S2) + +\param Src1 Pointer to the start of the first source byte array (S1). +\param Src2 Pointer to the start of the second source byte array (S2). +\param Dest Pointer to the start of the destination byte array (D). +\param SrcLength The number of bytes in the source arrays. + +\return Returns 0 for success or -1 for error. +*/ +static int SDL_imageFilterMultMMX(unsigned char *Src1, unsigned char *Src2, unsigned char *Dest, unsigned int SrcLength) +{ +#ifdef USE_MMX +#if !defined(GCC__) + __asm + { + pusha + mov eax, Src1 /* load Src1 address into eax */ + mov ebx, Src2 /* load Src2 address into ebx */ + mov edi, Dest /* load Dest address into edi */ + mov ecx, SrcLength /* load loop counter (SIZE) into ecx */ + shr ecx, 3 /* counter/8 (MMX loads 8 bytes at a time) */ + pxor mm0, mm0 /* zero mm0 register */ + align 16 /* 16 byte alignment of the loop entry */ +L1014: + movq mm1, [eax] /* load 8 bytes from Src1 into mm1 */ + movq mm3, [ebx] /* load 8 bytes from Src2 into mm3 */ + movq mm2, mm1 /* copy mm1 into mm2 */ + movq mm4, mm3 /* copy mm3 into mm4 */ + punpcklbw mm1, mm0 /* unpack low bytes of Src1 into words */ + punpckhbw mm2, mm0 /* unpack high bytes of Src1 into words */ + punpcklbw mm3, mm0 /* unpack low bytes of Src2 into words */ + punpckhbw mm4, mm0 /* unpack high bytes of Src2 into words */ + pmullw mm1, mm3 /* mul low bytes of Src1 and Src2 */ + pmullw mm2, mm4 /* mul high bytes of Src1 and Src2 */ + /* Take abs value of the results (signed words) */ + movq mm5, mm1 /* copy mm1 into mm5 */ + movq mm6, mm2 /* copy mm2 into mm6 */ + psraw mm5, 15 /* fill mm5 words with word sign bit */ + psraw mm6, 15 /* fill mm6 words with word sign bit */ + pxor mm1, mm5 /* take 1's compliment of only neg. words */ + pxor mm2, mm6 /* take 1's compliment of only neg. words */ + psubsw mm1, mm5 /* add 1 to only neg. words, W-(-1) or W-0 */ + psubsw mm2, mm6 /* add 1 to only neg. words, W-(-1) or W-0 */ + packuswb mm1, mm2 /* pack words back into bytes with saturation */ + movq [edi], mm1 /* store result in Dest */ + add eax, 8 /* increase Src1, Src2 and Dest */ + add ebx, 8 /* register pointers by 8 */ + add edi, 8 + dec ecx /* decrease loop counter */ + jnz L1014 /* check loop termination, proceed if required */ + emms /* exit MMX state */ + popa + } +#else + /* i386 ASM with constraints: */ + /* asm volatile ( */ + /* "shr $3, %%ecx \n\t" /\* counter/8 (MMX loads 8 bytes at a time) *\/ */ + /* "pxor %%mm0, %%mm0 \n\t" /\* zero mm0 register *\/ */ + /* ".align 16 \n\t" /\* 16 byte alignment of the loop entry *\/ */ + /* "1: movq (%%eax), %%mm1 \n\t" /\* load 8 bytes from Src1 into mm1 *\/ */ + /* "movq (%%ebx), %%mm3 \n\t" /\* load 8 bytes from Src2 into mm3 *\/ */ + /* "movq %%mm1, %%mm2 \n\t" /\* copy mm1 into mm2 *\/ */ + /* "movq %%mm3, %%mm4 \n\t" /\* copy mm3 into mm4 *\/ */ + /* "punpcklbw %%mm0, %%mm1 \n\t" /\* unpack low bytes of Src1 into words *\/ */ + /* "punpckhbw %%mm0, %%mm2 \n\t" /\* unpack high bytes of Src1 into words *\/ */ + /* "punpcklbw %%mm0, %%mm3 \n\t" /\* unpack low bytes of Src2 into words *\/ */ + /* "punpckhbw %%mm0, %%mm4 \n\t" /\* unpack high bytes of Src2 into words *\/ */ + /* "pmullw %%mm3, %%mm1 \n\t" /\* mul low bytes of Src1 and Src2 *\/ */ + /* "pmullw %%mm4, %%mm2 \n\t" /\* mul high bytes of Src1 and Src2 *\/ */ + /* /\* Take abs value of the results (signed words) *\/ */ + /* "movq %%mm1, %%mm5 \n\t" /\* copy mm1 into mm5 *\/ */ + /* "movq %%mm2, %%mm6 \n\t" /\* copy mm2 into mm6 *\/ */ + /* "psraw $15, %%mm5 \n\t" /\* fill mm5 words with word sign bit *\/ */ + /* "psraw $15, %%mm6 \n\t" /\* fill mm6 words with word sign bit *\/ */ + /* "pxor %%mm5, %%mm1 \n\t" /\* take 1's compliment of only neg. words *\/ */ + /* "pxor %%mm6, %%mm2 \n\t" /\* take 1's compliment of only neg. words *\/ */ + /* "psubsw %%mm5, %%mm1 \n\t" /\* add 1 to only neg. words, W-(-1) or W-0 *\/ */ + /* "psubsw %%mm6, %%mm2 \n\t" /\* add 1 to only neg. words, W-(-1) or W-0 *\/ */ + /* "packuswb %%mm2, %%mm1 \n\t" /\* pack words back into bytes with saturation *\/ */ + /* "movq %%mm1, (%%edi) \n\t" /\* store result in Dest *\/ */ + /* "add $8, %%eax \n\t" /\* increase Src1, Src2 and Dest *\/ */ + /* "add $8, %%ebx \n\t" /\* register pointers by 8 *\/ */ + /* "add $8, %%edi \n\t" */ + /* "dec %%ecx \n\t" /\* decrease loop counter *\/ */ + /* "jnz 1b \n\t" /\* check loop termination, proceed if required *\/ */ + /* "emms \n\t" /\* exit MMX state *\/ */ + /* : "+a" (Src1), /\* load Src1 address into rax, modified by the loop *\/ */ + /* "+b" (Src2), /\* load Src2 address into rbx, modified by the loop *\/ */ + /* "+c" (SrcLength), /\* load loop counter (SIZE) into rcx, modified by the loop *\/ */ + /* "+D" (Dest) /\* load Dest address into rdi, modified by the loop *\/ */ + /* : */ + /* : "memory", /\* *Dest is modified *\/ */ + /* "mm0","mm1","mm2","mm3","mm4","mm5","mm6" /\* registers modified *\/ */ + /* ); */ + + /* i386 and x86_64 */ + __m64 *mSrc1 = (__m64*)Src1; + __m64 *mSrc2 = (__m64*)Src2; + __m64 *mDest = (__m64*)Dest; + __m64 mm0 = _m_from_int(0); /* zero mm0 register */ + int i; + for (i = 0; i < SrcLength/8; i++) { + __m64 mm1, mm2, mm3, mm4, mm5, mm6; + mm1 = _m_punpcklbw(*mSrc1, mm0); /* unpack low bytes of Src1 into words */ + mm2 = _m_punpckhbw(*mSrc1, mm0); /* unpack high bytes of Src1 into words */ + mm3 = _m_punpcklbw(*mSrc2, mm0); /* unpack low bytes of Src2 into words */ + mm4 = _m_punpckhbw(*mSrc2, mm0); /* unpack high bytes of Src2 into words */ + mm1 = _m_pmullw(mm1, mm3); /* mul low bytes of Src1 and Src2 */ + mm2 = _m_pmullw(mm2, mm4); /* mul high bytes of Src1 and Src2 */ + mm5 = _m_psrawi(mm1, 15); /* fill mm5 words with word sign bit */ + mm6 = _m_psrawi(mm2, 15); /* fill mm6 words with word sign bit */ + mm1 = _m_pxor(mm1, mm5); /* take 1's compliment of only neg. words */ + mm2 = _m_pxor(mm2, mm6); /* take 1's compliment of only neg. words */ + mm1 = _m_psubsw(mm1, mm5); /* add 1 to only neg. words, W-(-1) or W-0 */ + mm2 = _m_psubsw(mm2, mm6); /* add 1 to only neg. words, W-(-1) or W-0 */ + *mDest = _m_packuswb(mm1, mm2); /* pack words back into bytes with saturation */ + mSrc1++; + mSrc2++; + mDest++; + } + _m_empty(); /* clean MMX state */ +#endif + return (0); +#else + return (-1); +#endif +} + +/*! +\brief Filter using Mult: D = saturation255(S1 * S2) + +\param Src1 Pointer to the start of the first source byte array (S1). +\param Src2 Pointer to the start of the second source byte array (S2). +\param Dest Pointer to the start of the destination byte array (D). +\param length The number of bytes in the source arrays. + +\return Returns 0 for success or -1 for error. +*/ +int SDL_imageFilterMult(unsigned char *Src1, unsigned char *Src2, unsigned char *Dest, unsigned int length) +{ + unsigned int i, istart; + unsigned char *cursrc1, *cursrc2, *curdst; + int result; + + /* Validate input parameters */ + if ((Src1 == NULL) || (Src2 == NULL) || (Dest == NULL)) + return(-1); + if (length == 0) + return(0); + + if ((SDL_imageFilterMMXdetect()) && (length > 7)) { + /* MMX routine */ + SDL_imageFilterMultMMX(Src1, Src2, Dest, length); + + /* Check for unaligned bytes */ + if ((length & 7) > 0) { + /* Setup to process unaligned bytes */ + istart = length & 0xfffffff8; + cursrc1 = &Src1[istart]; + cursrc2 = &Src2[istart]; + curdst = &Dest[istart]; + } else { + /* No unaligned bytes - we are done */ + return (0); + } + } else { + /* Setup to process whole image */ + istart = 0; + cursrc1 = Src1; + cursrc2 = Src2; + curdst = Dest; + } + + /* C routine to process image */ + for (i = istart; i < length; i++) { + + /* NOTE: this is probably wrong - dunno what the MMX code does */ + + result = (int) *cursrc1 * (int) *cursrc2; + if (result > 255) + result = 255; + *curdst = (unsigned char) result; + /* Advance pointers */ + cursrc1++; + cursrc2++; + curdst++; + } + + return (0); +} + +/*! +\brief Internal ASM Filter using MultNor: D = S1 * S2 + +\param Src1 Pointer to the start of the first source byte array (S1). +\param Src2 Pointer to the start of the second source byte array (S2). +\param Dest Pointer to the start of the destination byte array (D). +\param SrcLength The number of bytes in the source arrays. + +\return Returns 0 for success or -1 for error. +*/ +int SDL_imageFilterMultNorASM(unsigned char *Src1, unsigned char *Src2, unsigned char *Dest, unsigned int SrcLength) +{ +#ifdef USE_MMX +#if !defined(GCC__) + __asm + { + pusha + mov edx, Src1 /* load Src1 address into edx */ + mov esi, Src2 /* load Src2 address into esi */ + mov edi, Dest /* load Dest address into edi */ + mov ecx, SrcLength /* load loop counter (SIZE) into ecx */ + align 16 /* 16 byte alignment of the loop entry */ +L10141: + mov al, [edx] /* load a byte from Src1 */ + mul [esi] /* mul with a byte from Src2 */ + mov [edi], al /* move a byte result to Dest */ + inc edx /* increment Src1, Src2, Dest */ + inc esi /* pointer registers by one */ + inc edi + dec ecx /* decrease loop counter */ + jnz L10141 /* check loop termination, proceed if required */ + popa + } +#else + /* Note: ~5% gain on i386, less efficient than C on x86_64 */ + /* Also depends on whether this function is static (?!) */ + asm volatile ( + ".align 16 \n\t" /* 16 byte alignment of the loop entry */ +# if defined(i386) + "1:mov (%%edx), %%al \n\t" /* load a byte from Src1 */ + "mulb (%%esi) \n\t" /* mul with a byte from Src2 */ + "mov %%al, (%%edi) \n\t" /* move a byte result to Dest */ + "inc %%edx \n\t" /* increment Src1, Src2, Dest */ + "inc %%esi \n\t" /* pointer registers by one */ + "inc %%edi \n\t" + "dec %%ecx \n\t" /* decrease loop counter */ +# elif defined(__x86_64__) + "1:mov (%%rdx), %%al \n\t" /* load a byte from Src1 */ + "mulb (%%rsi) \n\t" /* mul with a byte from Src2 */ + "mov %%al, (%%rdi) \n\t" /* move a byte result to Dest */ + "inc %%rdx \n\t" /* increment Src1, Src2, Dest */ + "inc %%rsi \n\t" /* pointer registers by one */ + "inc %%rdi \n\t" + "dec %%rcx \n\t" /* decrease loop counter */ +# endif + "jnz 1b \n\t" /* check loop termination, proceed if required */ + : "+d" (Src1), /* load Src1 address into edx */ + "+S" (Src2), /* load Src2 address into esi */ + "+c" (SrcLength), /* load loop counter (SIZE) into ecx */ + "+D" (Dest) /* load Dest address into edi */ + : + : "memory", "rax" + ); +#endif + return (0); +#else + return (-1); +#endif +} + +/*! +\brief Filter using MultNor: D = S1 * S2 + +\param Src1 Pointer to the start of the first source byte array (S1). +\param Src2 Pointer to the start of the second source byte array (S2). +\param Dest Pointer to the start of the destination byte array (D). +\param length The number of bytes in the source arrays. + +\return Returns 0 for success or -1 for error. +*/ +int SDL_imageFilterMultNor(unsigned char *Src1, unsigned char *Src2, unsigned char *Dest, unsigned int length) +{ + unsigned int i, istart; + unsigned char *cursrc1, *cursrc2, *curdst; + + /* Validate input parameters */ + if ((Src1 == NULL) || (Src2 == NULL) || (Dest == NULL)) + return(-1); + if (length == 0) + return(0); + + if (SDL_imageFilterMMXdetect()) { + if (length > 0) { + /* ASM routine */ + SDL_imageFilterMultNorASM(Src1, Src2, Dest, length); + + /* Check for unaligned bytes */ + if ((length & 7) > 0) { + /* Setup to process unaligned bytes */ + istart = length & 0xfffffff8; + cursrc1 = &Src1[istart]; + cursrc2 = &Src2[istart]; + curdst = &Dest[istart]; + } else { + /* No unaligned bytes - we are done */ + return (0); + } + } else { + /* No bytes - we are done */ + return (0); + } + } else { + /* Setup to process whole image */ + istart = 0; + cursrc1 = Src1; + cursrc2 = Src2; + curdst = Dest; + } + + /* C routine to process image */ + for (i = istart; i < length; i++) { + *curdst = (int)*cursrc1 * (int)*cursrc2; // (int) for efficiency + /* Advance pointers */ + cursrc1++; + cursrc2++; + curdst++; + } + + return (0); +} + +/*! +\brief Internal MMX Filter using MultDivby2: D = saturation255(S1/2 * S2) + +\param Src1 Pointer to the start of the first source byte array (S1). +\param Src2 Pointer to the start of the second source byte array (S2). +\param Dest Pointer to the start of the destination byte array (D). +\param SrcLength The number of bytes in the source arrays. + +\return Returns 0 for success or -1 for error. +*/ +static int SDL_imageFilterMultDivby2MMX(unsigned char *Src1, unsigned char *Src2, unsigned char *Dest, unsigned int SrcLength) +{ +#ifdef USE_MMX +#if !defined(GCC__) + __asm + { + pusha + mov eax, Src1 /* load Src1 address into eax */ + mov ebx, Src2 /* load Src2 address into ebx */ + mov edi, Dest /* load Dest address into edi */ + mov ecx, SrcLength /* load loop counter (SIZE) into ecx */ + shr ecx, 3 /* counter/8 (MMX loads 8 bytes at a time) */ + pxor mm0, mm0 /* zero mm0 register */ + align 16 /* 16 byte alignment of the loop entry */ +L1015: + movq mm1, [eax] /* load 8 bytes from Src1 into mm1 */ + movq mm3, [ebx] /* load 8 bytes from Src2 into mm3 */ + movq mm2, mm1 /* copy mm1 into mm2 */ + movq mm4, mm3 /* copy mm3 into mm4 */ + punpcklbw mm1, mm0 /* unpack low bytes of Src1 into words */ + punpckhbw mm2, mm0 /* unpack high bytes of Src1 into words */ + punpcklbw mm3, mm0 /* unpack low bytes of Src2 into words */ + punpckhbw mm4, mm0 /* unpack high bytes of Src2 into words */ + psrlw mm1, 1 /* divide mm1 words by 2, Src1 low bytes */ + psrlw mm2, 1 /* divide mm2 words by 2, Src1 high bytes */ + pmullw mm1, mm3 /* mul low bytes of Src1 and Src2 */ + pmullw mm2, mm4 /* mul high bytes of Src1 and Src2 */ + packuswb mm1, mm2 /* pack words back into bytes with saturation */ + movq [edi], mm1 /* store result in Dest */ + add eax, 8 /* increase Src1, Src2 and Dest */ + add ebx, 8 /* register pointers by 8 */ + add edi, 8 + dec ecx /* decrease loop counter */ + jnz L1015 /* check loop termination, proceed if required */ + emms /* exit MMX state */ + popa + } +#else + /* i386 and x86_64 */ + __m64 *mSrc1 = (__m64*)Src1; + __m64 *mSrc2 = (__m64*)Src2; + __m64 *mDest = (__m64*)Dest; + __m64 mm0 = _m_from_int(0); /* zero mm0 register */ + int i; + for (i = 0; i < SrcLength/8; i++) { + __m64 mm1, mm2, mm3, mm4, mm5, mm6; + mm1 = _m_punpcklbw(*mSrc1, mm0); /* unpack low bytes of Src1 into words */ + mm2 = _m_punpckhbw(*mSrc1, mm0); /* unpack high bytes of Src1 into words */ + mm3 = _m_punpcklbw(*mSrc2, mm0); /* unpack low bytes of Src2 into words */ + mm4 = _m_punpckhbw(*mSrc2, mm0); /* unpack high bytes of Src2 into words */ + mm1 = _m_psrlwi(mm1, 1); /* divide mm1 words by 2, Src1 low bytes */ + mm2 = _m_psrlwi(mm2, 1); /* divide mm2 words by 2, Src1 high bytes */ + mm1 = _m_pmullw(mm1, mm3); /* mul low bytes of Src1 and Src2 */ + mm2 = _m_pmullw(mm2, mm4); /* mul high bytes of Src1 and Src2 */ + *mDest = _m_packuswb(mm1, mm2); /* pack words back into bytes with saturation */ + mSrc1++; + mSrc2++; + mDest++; + } + _m_empty(); /* clean MMX state */ +#endif + return (0); +#else + return (-1); +#endif +} + +/*! +\brief Filter using MultDivby2: D = saturation255(S1/2 * S2) + +\param Src1 Pointer to the start of the first source byte array (S1). +\param Src2 Pointer to the start of the second source byte array (S2). +\param Dest Pointer to the start of the destination byte array (D). +\param length The number of bytes in the source arrays. + +\return Returns 0 for success or -1 for error. +*/ +int SDL_imageFilterMultDivby2(unsigned char *Src1, unsigned char *Src2, unsigned char *Dest, unsigned int length) +{ + unsigned int i, istart; + unsigned char *cursrc1, *cursrc2, *curdst; + int result; + + /* Validate input parameters */ + if ((Src1 == NULL) || (Src2 == NULL) || (Dest == NULL)) + return(-1); + if (length == 0) + return(0); + + if ((SDL_imageFilterMMXdetect()) && (length > 7)) { + /* MMX routine */ + SDL_imageFilterMultDivby2MMX(Src1, Src2, Dest, length); + + /* Check for unaligned bytes */ + if ((length & 7) > 0) { + /* Setup to process unaligned bytes */ + istart = length & 0xfffffff8; + cursrc1 = &Src1[istart]; + cursrc2 = &Src2[istart]; + curdst = &Dest[istart]; + } else { + /* No unaligned bytes - we are done */ + return (0); + } + } else { + /* Setup to process whole image */ + istart = 0; + cursrc1 = Src1; + cursrc2 = Src2; + curdst = Dest; + } + + /* C routine to process image */ + for (i = istart; i < length; i++) { + result = ((int) *cursrc1 / 2) * (int) *cursrc2; + if (result > 255) + result = 255; + *curdst = (unsigned char) result; + /* Advance pointers */ + cursrc1++; + cursrc2++; + curdst++; + } + + return (0); +} + +/*! +\brief Internal MMX Filter using MultDivby4: D = saturation255(S1/2 * S2/2) + +\param Src1 Pointer to the start of the first source byte array (S1). +\param Src2 Pointer to the start of the second source byte array (S2). +\param Dest Pointer to the start of the destination byte array (D). +\param SrcLength The number of bytes in the source arrays. + +\return Returns 0 for success or -1 for error. +*/ +static int SDL_imageFilterMultDivby4MMX(unsigned char *Src1, unsigned char *Src2, unsigned char *Dest, unsigned int SrcLength) +{ +#ifdef USE_MMX +#if !defined(GCC__) + __asm + { + pusha + mov eax, Src1 /* load Src1 address into eax */ + mov ebx, Src2 /* load Src2 address into ebx */ + mov edi, Dest /* load Dest address into edi */ + mov ecx, SrcLength /* load loop counter (SIZE) into ecx */ + shr ecx, 3 /* counter/8 (MMX loads 8 bytes at a time) */ + pxor mm0, mm0 /* zero mm0 register */ + align 16 /* 16 byte alignment of the loop entry */ +L1016: + movq mm1, [eax] /* load 8 bytes from Src1 into mm1 */ + movq mm3, [ebx] /* load 8 bytes from Src2 into mm3 */ + movq mm2, mm1 /* copy mm1 into mm2 */ + movq mm4, mm3 /* copy mm3 into mm4 */ + punpcklbw mm1, mm0 /* unpack low bytes of Src1 into words */ + punpckhbw mm2, mm0 /* unpack high bytes of Src1 into words */ + punpcklbw mm3, mm0 /* unpack low bytes of Src2 into words */ + punpckhbw mm4, mm0 /* unpack high bytes of Src2 into words */ + psrlw mm1, 1 /* divide mm1 words by 2, Src1 low bytes */ + psrlw mm2, 1 /* divide mm2 words by 2, Src1 high bytes */ + psrlw mm3, 1 /* divide mm3 words by 2, Src2 low bytes */ + psrlw mm4, 1 /* divide mm4 words by 2, Src2 high bytes */ + pmullw mm1, mm3 /* mul low bytes of Src1 and Src2 */ + pmullw mm2, mm4 /* mul high bytes of Src1 and Src2 */ + packuswb mm1, mm2 /* pack words back into bytes with saturation */ + movq [edi], mm1 /* store result in Dest */ + add eax, 8 /* increase Src1, Src2 and Dest */ + add ebx, 8 /* register pointers by 8 */ + add edi, 8 + dec ecx /* decrease loop counter */ + jnz L1016 /* check loop termination, proceed if required */ + emms /* exit MMX state */ + popa + } +#else + /* i386 and x86_64 */ + __m64 *mSrc1 = (__m64*)Src1; + __m64 *mSrc2 = (__m64*)Src2; + __m64 *mDest = (__m64*)Dest; + __m64 mm0 = _m_from_int(0); /* zero mm0 register */ + int i; + for (i = 0; i < SrcLength/8; i++) { + __m64 mm1, mm2, mm3, mm4, mm5, mm6; + mm1 = _m_punpcklbw(*mSrc1, mm0); /* unpack low bytes of Src1 into words */ + mm2 = _m_punpckhbw(*mSrc1, mm0); /* unpack high bytes of Src1 into words */ + mm3 = _m_punpcklbw(*mSrc2, mm0); /* unpack low bytes of Src2 into words */ + mm4 = _m_punpckhbw(*mSrc2, mm0); /* unpack high bytes of Src2 into words */ + mm1 = _m_psrlwi(mm1, 1); /* divide mm1 words by 2, Src1 low bytes */ + mm2 = _m_psrlwi(mm2, 1); /* divide mm2 words by 2, Src1 high bytes */ + mm3 = _m_psrlwi(mm3, 1); /* divide mm3 words by 2, Src2 low bytes */ + mm4 = _m_psrlwi(mm4, 1); /* divide mm4 words by 2, Src2 high bytes */ + mm1 = _m_pmullw(mm1, mm3); /* mul low bytes of Src1 and Src2 */ + mm2 = _m_pmullw(mm2, mm4); /* mul high bytes of Src1 and Src2 */ + *mDest = _m_packuswb(mm1, mm2); /* pack words back into bytes with saturation */ + mSrc1++; + mSrc2++; + mDest++; + } + _m_empty(); /* clean MMX state */ +#endif + return (0); +#else + return (-1); +#endif +} + +/*! +\brief Filter using MultDivby4: D = saturation255(S1/2 * S2/2) + +\param Src1 Pointer to the start of the first source byte array (S1). +\param Src2 Pointer to the start of the second source byte array (S2). +\param Dest Pointer to the start of the destination byte array (D). +\param length The number of bytes in the source arrays. + +\return Returns 0 for success or -1 for error. +*/ +int SDL_imageFilterMultDivby4(unsigned char *Src1, unsigned char *Src2, unsigned char *Dest, unsigned int length) +{ + unsigned int i, istart; + unsigned char *cursrc1, *cursrc2, *curdst; + int result; + + /* Validate input parameters */ + if ((Src1 == NULL) || (Src2 == NULL) || (Dest == NULL)) + return(-1); + if (length == 0) + return(0); + + if ((SDL_imageFilterMMXdetect()) && (length > 7)) { + /* MMX routine */ + SDL_imageFilterMultDivby4MMX(Src1, Src2, Dest, length); + + /* Check for unaligned bytes */ + if ((length & 7) > 0) { + /* Setup to process unaligned bytes */ + istart = length & 0xfffffff8; + cursrc1 = &Src1[istart]; + cursrc2 = &Src2[istart]; + curdst = &Dest[istart]; + } else { + /* No unaligned bytes - we are done */ + return (0); + } + } else { + /* Setup to process whole image */ + istart = 0; + cursrc1 = Src1; + cursrc2 = Src2; + curdst = Dest; + } + + /* C routine to process image */ + for (i = istart; i < length; i++) { + result = ((int) *cursrc1 / 2) * ((int) *cursrc2 / 2); + if (result > 255) + result = 255; + *curdst = (unsigned char) result; + /* Advance pointers */ + cursrc1++; + cursrc2++; + curdst++; + } + + return (0); +} + +/*! +\brief Internal MMX Filter using BitAnd: D = S1 & S2 + +\param Src1 Pointer to the start of the first source byte array (S1). +\param Src2 Pointer to the start of the second source byte array (S2). +\param Dest Pointer to the start of the destination byte array (D). +\param SrcLength The number of bytes in the source arrays. + +\return Returns 0 for success or -1 for error. +*/ +static int SDL_imageFilterBitAndMMX(unsigned char *Src1, unsigned char *Src2, unsigned char *Dest, unsigned int SrcLength) +{ +#ifdef USE_MMX +#if !defined(GCC__) + __asm + { + pusha + mov eax, Src1 /* load Src1 address into eax */ + mov ebx, Src2 /* load Src2 address into ebx */ + mov edi, Dest /* load Dest address into edi */ + mov ecx, SrcLength /* load loop counter (SIZE) into ecx */ + shr ecx, 3 /* counter/8 (MMX loads 8 bytes at a time) */ + align 16 /* 16 byte alignment of the loop entry */ +L1017: + movq mm1, [eax] /* load 8 bytes from Src1 into mm1 */ + pand mm1, [ebx] /* mm1=Src1&Src2 */ + movq [edi], mm1 /* store result in Dest */ + add eax, 8 /* increase Src1, Src2 and Dest */ + add ebx, 8 /* register pointers by 8 */ + add edi, 8 + dec ecx /* decrease loop counter */ + jnz L1017 /* check loop termination, proceed if required */ + emms /* exit MMX state */ + popa + } +#else + /* x86_64 ASM with constraints: */ + /* asm volatile ( */ + /* "shr $3, %%rcx \n\t" /\* counter/8 (MMX loads 8 bytes at a time) *\/ */ + /* ".align 16 \n\t" /\* 16 byte alignment of the loop entry *\/ */ + /* "1: movq (%%rax), %%mm1 \n\t" /\* load 8 bytes from Src1 into mm1 *\/ */ + /* "pand (%%rbx), %%mm1 \n\t" /\* mm1=Src1&Src2 *\/ */ + /* "movq %%mm1, (%%rdi) \n\t" /\* store result in Dest *\/ */ + /* "add $8, %%rax \n\t" /\* increase Src1, Src2 and Dest *\/ */ + /* "add $8, %%rbx \n\t" /\* register pointers by 8 *\/ */ + /* "add $8, %%rdi \n\t" */ + /* "dec %%rcx \n\t" /\* decrease loop counter *\/ */ + /* "jnz 1b \n\t" /\* check loop termination, proceed if required *\/ */ + /* "emms \n\t" /\* exit MMX state *\/ */ + /* : "+a" (Src1), /\* load Src1 address into rax, modified by the loop *\/ */ + /* "+b" (Src2), /\* load Src2 address into rbx, modified by the loop *\/ */ + /* "+c" (SrcLength), /\* load loop counter (SIZE) into rcx, modified by the loop *\/ */ + /* "+D" (Dest) /\* load Dest address into rdi, modified by the loop *\/ */ + /* : */ + /* : "memory", /\* *Dest is modified *\/ */ + /* "mm1" /\* register mm1 modified *\/ */ + /* ); */ + + /* i386 and x86_64 */ + __m64 *mSrc1 = (__m64*)Src1; + __m64 *mSrc2 = (__m64*)Src2; + __m64 *mDest = (__m64*)Dest; + int i; + for (i = 0; i < SrcLength/8; i++) { + *mDest = _m_pand(*mSrc1, *mSrc2); /* Src1&Src2 */ + mSrc1++; + mSrc2++; + mDest++; + } + _m_empty(); /* clean MMX state */ +#endif + return (0); +#else + return (-1); +#endif +} + +/*! +\brief Filter using BitAnd: D = S1 & S2 + +\param Src1 Pointer to the start of the first source byte array (S1). +\param Src2 Pointer to the start of the second source byte array (S2). +\param Dest Pointer to the start of the destination byte array (D). +\param length The number of bytes in the source arrays. + +\return Returns 0 for success or -1 for error. +*/ +int SDL_imageFilterBitAnd(unsigned char *Src1, unsigned char *Src2, unsigned char *Dest, unsigned int length) +{ + unsigned int i, istart; + unsigned char *cursrc1, *cursrc2, *curdst; + + /* Validate input parameters */ + if ((Src1 == NULL) || (Src2 == NULL) || (Dest == NULL)) + return(-1); + if (length == 0) + return(0); + + if ((SDL_imageFilterMMXdetect()>0) && (length>7)) { + /* if (length > 7) { */ + /* Call MMX routine */ + + SDL_imageFilterBitAndMMX(Src1, Src2, Dest, length); + + /* Check for unaligned bytes */ + if ((length & 7) > 0) { + + /* Setup to process unaligned bytes */ + istart = length & 0xfffffff8; + cursrc1 = &Src1[istart]; + cursrc2 = &Src2[istart]; + curdst = &Dest[istart]; + } else { + /* No unaligned bytes - we are done */ + return (0); + } + } else { + /* Setup to process whole image */ + istart = 0; + cursrc1 = Src1; + cursrc2 = Src2; + curdst = Dest; + } + + /* C routine to process image */ + for (i = istart; i < length; i++) { + *curdst = (*cursrc1) & (*cursrc2); + /* Advance pointers */ + cursrc1++; + cursrc2++; + curdst++; + } + + return (0); +} + +/*! +\brief Internal MMX Filter using BitOr: D = S1 | S2 + +\param Src1 Pointer to the start of the first source byte array (S1). +\param Src2 Pointer to the start of the second source byte array (S2). +\param Dest Pointer to the start of the destination byte array (D). +\param SrcLength The number of bytes in the source arrays. + +\return Returns 0 for success or -1 for error. +*/ +static int SDL_imageFilterBitOrMMX(unsigned char *Src1, unsigned char *Src2, unsigned char *Dest, unsigned int SrcLength) +{ +#ifdef USE_MMX +#if !defined(GCC__) + __asm + { + pusha + mov eax, Src1 /* load Src1 address into eax */ + mov ebx, Src2 /* load Src2 address into ebx */ + mov edi, Dest /* load Dest address into edi */ + mov ecx, SrcLength /* load loop counter (SIZE) into ecx */ + shr ecx, 3 /* counter/8 (MMX loads 8 bytes at a time) */ + align 16 /* 16 byte alignment of the loop entry */ +L91017: + movq mm1, [eax] /* load 8 bytes from Src1 into mm1 */ + por mm1, [ebx] /* mm1=Src1|Src2 */ + movq [edi], mm1 /* store result in Dest */ + add eax, 8 /* increase Src1, Src2 and Dest */ + add ebx, 8 /* register pointers by 8 */ + add edi, 8 + dec ecx /* decrease loop counter */ + jnz L91017 /* check loop termination, proceed if required */ + emms /* exit MMX state */ + popa + } +#else + /* i386 and x86_64 */ + __m64 *mSrc1 = (__m64*)Src1; + __m64 *mSrc2 = (__m64*)Src2; + __m64 *mDest = (__m64*)Dest; + int i; + for (i = 0; i < SrcLength/8; i++) { + *mDest = _m_por(*mSrc1, *mSrc2); /* Src1|Src2 */ + mSrc1++; + mSrc2++; + mDest++; + } + _m_empty(); /* clean MMX state */ +#endif + return (0); +#else + return (-1); +#endif +} + +/*! +\brief Filter using BitOr: D = S1 | S2 + +\param Src1 Pointer to the start of the first source byte array (S1). +\param Src2 Pointer to the start of the second source byte array (S2). +\param Dest Pointer to the start of the destination byte array (D). +\param length The number of bytes in the source arrays. + +\return Returns 0 for success or -1 for error. +*/ +int SDL_imageFilterBitOr(unsigned char *Src1, unsigned char *Src2, unsigned char *Dest, unsigned int length) +{ + unsigned int i, istart; + unsigned char *cursrc1, *cursrc2, *curdst; + + /* Validate input parameters */ + if ((Src1 == NULL) || (Src2 == NULL) || (Dest == NULL)) + return(-1); + if (length == 0) + return(0); + + if ((SDL_imageFilterMMXdetect()) && (length > 7)) { + + /* MMX routine */ + SDL_imageFilterBitOrMMX(Src1, Src2, Dest, length); + + /* Check for unaligned bytes */ + if ((length & 7) > 0) { + /* Setup to process unaligned bytes */ + istart = length & 0xfffffff8; + cursrc1 = &Src1[istart]; + cursrc2 = &Src2[istart]; + curdst = &Dest[istart]; + } else { + /* No unaligned bytes - we are done */ + return (0); + } + } else { + /* Setup to process whole image */ + istart = 0; + cursrc1 = Src1; + cursrc2 = Src2; + curdst = Dest; + } + + /* C routine to process image */ + for (i = istart; i < length; i++) { + *curdst = *cursrc1 | *cursrc2; + /* Advance pointers */ + cursrc1++; + cursrc2++; + curdst++; + } + return (0); +} + +/*! +\brief Internal ASM Filter using Div: D = S1 / S2 + +\param Src1 Pointer to the start of the first source byte array (S1). +\param Src2 Pointer to the start of the second source byte array (S2). +\param Dest Pointer to the start of the destination byte array (D). +\param SrcLength The number of bytes in the source arrays. + +\return Returns 0 for success or -1 for error. +*/ +static int SDL_imageFilterDivASM(unsigned char *Src1, unsigned char *Src2, unsigned char *Dest, unsigned int SrcLength) +{ +#ifdef USE_MMX +#if !defined(GCC__) + __asm + { + pusha + mov edx, Src1 /* load Src1 address into edx */ + mov esi, Src2 /* load Src2 address into esi */ + mov edi, Dest /* load Dest address into edi */ + mov ecx, SrcLength /* load loop counter (SIZE) into ecx */ + align 16 /* 16 byte alignment of the loop entry */ +L10191: + mov bl, [esi] /* load a byte from Src2 */ + cmp bl, 0 /* check if it zero */ + jnz L10192 + mov [edi], 255 /* division by zero = 255 !!! */ + jmp L10193 +L10192: + xor ah, ah /* prepare AX, zero AH register */ + mov al, [edx] /* load a byte from Src1 into AL */ + div bl /* divide AL by BL */ + mov [edi], al /* move a byte result to Dest */ +L10193: + inc edx /* increment Src1, Src2, Dest */ + inc esi /* pointer registers by one */ + inc edi + dec ecx /* decrease loop counter */ + jnz L10191 /* check loop termination, proceed if required */ + popa + } +#else + /* Note: ~15% gain on i386, less efficient than C on x86_64 */ + /* Also depends on whether the function is static (?!) */ + /* Also depends on whether we work on malloc() or static char[] */ + asm volatile ( +# if defined(i386) + "pushl %%ebx \n\t" /* %ebx may be the PIC register. */ + ".align 16 \n\t" /* 16 byte alignment of the loop entry */ + "1: mov (%%esi), %%bl \n\t" /* load a byte from Src2 */ + "cmp $0, %%bl \n\t" /* check if it zero */ + "jnz 2f \n\t" + "movb $255, (%%edi) \n\t" /* division by zero = 255 !!! */ + "jmp 3f \n\t" + "2: xor %%ah, %%ah \n\t" /* prepare AX, zero AH register */ + "mov (%%edx), %%al \n\t" /* load a byte from Src1 into AL */ + "div %%bl \n\t" /* divide AL by BL */ + "mov %%al, (%%edi) \n\t" /* move a byte result to Dest */ + "3: inc %%edx \n\t" /* increment Src1, Src2, Dest */ + "inc %%esi \n\t" /* pointer registers by one */ + "inc %%edi \n\t" + "dec %%ecx \n\t" /* decrease loop counter */ + "jnz 1b \n\t" /* check loop termination, proceed if required */ + "popl %%ebx \n\t" /* restore %ebx */ + : "+d" (Src1), /* load Src1 address into edx */ + "+S" (Src2), /* load Src2 address into esi */ + "+c" (SrcLength), /* load loop counter (SIZE) into ecx */ + "+D" (Dest) /* load Dest address into edi */ + : + : "memory", "rax" +# elif defined(__x86_64__) + ".align 16 \n\t" /* 16 byte alignment of the loop entry */ + "1: mov (%%rsi), %%bl \n\t" /* load a byte from Src2 */ + "cmp $0, %%bl \n\t" /* check if it zero */ + "jnz 2f \n\t" + "movb $255, (%%rdi) \n\t" /* division by zero = 255 !!! */ + "jmp 3f \n\t" + "2: xor %%ah, %%ah \n\t" /* prepare AX, zero AH register */ + "mov (%%rdx), %%al \n\t" /* load a byte from Src1 into AL */ + "div %%bl \n\t" /* divide AL by BL */ + "mov %%al, (%%rdi) \n\t" /* move a byte result to Dest */ + "3: inc %%rdx \n\t" /* increment Src1, Src2, Dest */ + "inc %%rsi \n\t" /* pointer registers by one */ + "inc %%rdi \n\t" + "dec %%rcx \n\t" /* decrease loop counter */ + "jnz 1b \n\t" /* check loop termination, proceed if required */ + : "+d" (Src1), /* load Src1 address into edx */ + "+S" (Src2), /* load Src2 address into esi */ + "+c" (SrcLength), /* load loop counter (SIZE) into ecx */ + "+D" (Dest) /* load Dest address into edi */ + : + : "memory", "rax", "rbx" +# endif + ); +#endif + return (0); +#else + return (-1); +#endif +} + +/*! +\brief Filter using Div: D = S1 / S2 + +\param Src1 Pointer to the start of the first source byte array (S1). +\param Src2 Pointer to the start of the second source byte array (S2). +\param Dest Pointer to the start of the destination byte array (D). +\param length The number of bytes in the source arrays. + +\return Returns 0 for success or -1 for error. +*/ +int SDL_imageFilterDiv(unsigned char *Src1, unsigned char *Src2, unsigned char *Dest, unsigned int length) +{ + unsigned int i, istart; + unsigned char *cursrc1, *cursrc2, *curdst; + + /* Validate input parameters */ + if ((Src1 == NULL) || (Src2 == NULL) || (Dest == NULL)) + return(-1); + if (length == 0) + return(0); + + if (SDL_imageFilterMMXdetect()) { + if (length > 0) { + /* Call ASM routine */ + SDL_imageFilterDivASM(Src1, Src2, Dest, length); + + /* Never unaligned bytes - we are done */ + return (0); + } else { + return (-1); + } + } + + /* Setup to process whole image */ + istart = 0; + cursrc1 = Src1; + cursrc2 = Src2; + curdst = Dest; + + /* C routine to process image */ + /* for (i = istart; i < length; i++) { */ + /* if (*cursrc2 == 0) { */ + /* *curdst = 255; */ + /* } else { */ + /* result = (int) *cursrc1 / (int) *cursrc2; */ + /* *curdst = (unsigned char) result; */ + /* } */ + /* /\* Advance pointers *\/ */ + /* cursrc1++; */ + /* cursrc2++; */ + /* curdst++; */ + /* } */ + for (i = istart; i < length; i++) { + if (*cursrc2 == 0) { + *curdst = 255; + } else { + *curdst = (int)*cursrc1 / (int)*cursrc2; // (int) for efficiency + } + /* Advance pointers */ + cursrc1++; + cursrc2++; + curdst++; + } + + return (0); +} + +/* ------------------------------------------------------------------------------------ */ + +/*! +\brief Internal MMX Filter using BitNegation: D = !S + +\param Src1 Pointer to the start of the source byte array (S1). +\param Dest Pointer to the start of the destination byte array (D). +\param SrcLength The number of bytes in the source array. + +\return Returns 0 for success or -1 for error. +*/ +static int SDL_imageFilterBitNegationMMX(unsigned char *Src1, unsigned char *Dest, unsigned int SrcLength) +{ +#ifdef USE_MMX +#if !defined(GCC__) + __asm + { + pusha + pcmpeqb mm1, mm1 /* generate all 1's in mm1 */ + mov eax, Src1 /* load Src1 address into eax */ + mov edi, Dest /* load Dest address into edi */ + mov ecx, SrcLength /* load loop counter (SIZE) into ecx */ + shr ecx, 3 /* counter/8 (MMX loads 8 bytes at a time) */ + align 16 /* 16 byte alignment of the loop entry */ +L91117: + movq mm0, [eax] /* load 8 bytes from Src1 into mm1 */ + pxor mm0, mm1 /* negate mm0 by xoring with mm1 */ + movq [edi], mm0 /* store result in Dest */ + add eax, 8 /* increase Src1, Src2 and Dest */ + add edi, 8 + dec ecx /* decrease loop counter */ + jnz L91117 /* check loop termination, proceed if required */ + emms /* exit MMX state */ + popa + } +#else + /* i386 and x86_64 */ + __m64 *mSrc1 = (__m64*)Src1; + __m64 *mDest = (__m64*)Dest; + __m64 mm1; + mm1 = _m_pcmpeqb(mm1, mm1); /* generate all 1's in mm1 */ + int i; + for (i = 0; i < SrcLength/8; i++) { + *mDest = _m_pxor(*mSrc1, mm1); /* negate mm0 by xoring with mm1 */ + mSrc1++; + mDest++; + } + _m_empty(); /* clean MMX state */ + +#endif + return (0); +#else + return (-1); +#endif +} + +/*! +\brief Filter using BitNegation: D = !S + +\param Src1 Pointer to the start of the source byte array (S). +\param Dest Pointer to the start of the destination byte array (D). +\param length The number of bytes in the source array. + +\return Returns 0 for success or -1 for error. +*/ +int SDL_imageFilterBitNegation(unsigned char *Src1, unsigned char *Dest, unsigned int length) +{ + unsigned int i, istart; + unsigned char *cursrc1, *curdst; + + /* Validate input parameters */ + if ((Src1 == NULL) || (Dest == NULL)) + return(-1); + if (length == 0) + return(0); + + if ((SDL_imageFilterMMXdetect()) && (length > 7)) { + /* MMX routine */ + SDL_imageFilterBitNegationMMX(Src1, Dest, length); + + /* Check for unaligned bytes */ + if ((length & 7) > 0) { + /* Setup to process unaligned bytes */ + istart = length & 0xfffffff8; + cursrc1 = &Src1[istart]; + curdst = &Dest[istart]; + } else { + /* No unaligned bytes - we are done */ + return (0); + } + } else { + /* Setup to process whole image */ + istart = 0; + cursrc1 = Src1; + curdst = Dest; + } + + /* C routine to process image */ + for (i = istart; i < length; i++) { + *curdst = ~(*cursrc1); + /* Advance pointers */ + cursrc1++; + curdst++; + } + + return (0); +} + +/*! +\brief Internal MMX Filter using AddByte: D = saturation255(S + C) + +\param Src1 Pointer to the start of the source byte array (S). +\param Dest Pointer to the start of the destination byte array (D). +\param SrcLength The number of bytes in the source array. +\param C Constant value to add (C). + +\return Returns 0 for success or -1 for error. +*/ +static int SDL_imageFilterAddByteMMX(unsigned char *Src1, unsigned char *Dest, unsigned int SrcLength, unsigned char C) +{ +#ifdef USE_MMX +#if !defined(GCC__) + __asm + { + pusha + /* ** Duplicate C in 8 bytes of MM1 ** */ + mov al, C /* load C into AL */ + mov ah, al /* copy AL into AH */ + mov bx, ax /* copy AX into BX */ + shl eax, 16 /* shift 2 bytes of EAX left */ + mov ax, bx /* copy BX into AX */ + movd mm1, eax /* copy EAX into MM1 */ + movd mm2, eax /* copy EAX into MM2 */ + punpckldq mm1, mm2 /* fill higher bytes of MM1 with C */ + mov eax, Src1 /* load Src1 address into eax */ + mov edi, Dest /* load Dest address into edi */ + mov ecx, SrcLength /* load loop counter (SIZE) into ecx */ + shr ecx, 3 /* counter/8 (MMX loads 8 bytes at a time) */ + align 16 /* 16 byte alignment of the loop entry */ +L1021: + movq mm0, [eax] /* load 8 bytes from Src1 into MM0 */ + paddusb mm0, mm1 /* MM0=SrcDest+C (add 8 bytes with saturation) */ + movq [edi], mm0 /* store result in Dest */ + add eax, 8 /* increase Dest register pointer by 8 */ + add edi, 8 /* increase Dest register pointer by 8 */ + dec ecx /* decrease loop counter */ + jnz L1021 /* check loop termination, proceed if required */ + emms /* exit MMX state */ + popa + } +#else + /* i386 and x86_64 */ + __m64 *mSrc1 = (__m64*)Src1; + __m64 *mDest = (__m64*)Dest; + /* Duplicate C in 8 bytes of MM1 */ + int i; + memset(&i, C, 4); + __m64 mm1 = _m_from_int(i); + __m64 mm2 = _m_from_int(i); + mm1 = _m_punpckldq(mm1, mm2); /* fill higher bytes of MM1 with C */ + //__m64 mm1 = _m_from_int64(lli); // x86_64 only + for (i = 0; i < SrcLength/8; i++) { + *mDest = _m_paddusb(*mSrc1, mm1); /* Src1+C (add 8 bytes with saturation) */ + mSrc1++; + mDest++; + } + _m_empty(); /* clean MMX state */ +#endif + return (0); +#else + return (-1); +#endif +} + +/*! +\brief Filter using AddByte: D = saturation255(S + C) + +\param Src1 Pointer to the start of the source byte array (S). +\param Dest Pointer to the start of the destination byte array (D). +\param length The number of bytes in the source array. +\param C Constant value to add (C). + + +\return Returns 0 for success or -1 for error. +*/ +int SDL_imageFilterAddByte(unsigned char *Src1, unsigned char *Dest, unsigned int length, unsigned char C) +{ + unsigned int i, istart; + int iC; + unsigned char *cursrc1, *curdest; + int result; + + /* Validate input parameters */ + if ((Src1 == NULL) || (Dest == NULL)) + return(-1); + if (length == 0) + return(0); + + /* Special case: C==0 */ + if (C == 0) { + memcpy(Src1, Dest, length); + return (0); + } + + if ((SDL_imageFilterMMXdetect()) && (length > 7)) { + + /* MMX routine */ + SDL_imageFilterAddByteMMX(Src1, Dest, length, C); + + /* Check for unaligned bytes */ + if ((length & 7) > 0) { + /* Setup to process unaligned bytes */ + istart = length & 0xfffffff8; + cursrc1 = &Src1[istart]; + curdest = &Dest[istart]; + } else { + /* No unaligned bytes - we are done */ + return (0); + } + } else { + /* Setup to process whole image */ + istart = 0; + cursrc1 = Src1; + curdest = Dest; + } + + /* C routine to process image */ + iC = (int) C; + for (i = istart; i < length; i++) { + result = (int) *cursrc1 + iC; + if (result > 255) + result = 255; + *curdest = (unsigned char) result; + /* Advance pointers */ + cursrc1++; + curdest++; + } + return (0); +} + +/*! +\brief Internal MMX Filter using AddUint: D = saturation255((S[i] + Cs[i % 4]), Cs=Swap32((uint)C) + +\param Src1 Pointer to the start of the source byte array (S). +\param Dest Pointer to the start of the destination byte array (D). +\param SrcLength The number of bytes in the source array. +\param C Constant to add (C). +\param D Byteorder-swapped constant to add (Cs). + +\return Returns 0 for success or -1 for error. +*/ +static int SDL_imageFilterAddUintMMX(unsigned char *Src1, unsigned char *Dest, unsigned int SrcLength, unsigned int C, unsigned int D) +{ +#ifdef USE_MMX +#if !defined(GCC__) + __asm + { + pusha + /* ** Duplicate (int)C in 8 bytes of MM1 ** */ + mov eax, C /* load C into EAX */ + movd mm1, eax /* copy EAX into MM1 */ + mov eax, D /* load D into EAX */ + movd mm2, eax /* copy EAX into MM2 */ + punpckldq mm1, mm2 /* fill higher bytes of MM1 with C */ + mov eax, Src1 /* load Src1 address into eax */ + mov edi, Dest /* load Dest address into edi */ + mov ecx, SrcLength /* load loop counter (SIZE) into ecx */ + shr ecx, 3 /* counter/8 (MMX loads 8 bytes at a time) */ + align 16 /* 16 byte alignment of the loop entry */ +L11023: + movq mm0, [eax] /* load 8 bytes from SrcDest into MM0 */ + paddusb mm0, mm1 /* MM0=SrcDest+C (add 8 bytes with saturation) */ + movq [edi], mm0 /* store result in SrcDest */ + add eax, 8 /* increase Src1 register pointer by 8 */ + add edi, 8 /* increase Dest register pointer by 8 */ + dec ecx /* decrease loop counter */ + jnz L11023 /* check loop termination, proceed if required */ + emms /* exit MMX state */ + popa + } +#else + /* i386 and x86_64 */ + __m64 *mSrc1 = (__m64*)Src1; + __m64 *mDest = (__m64*)Dest; + /* Duplicate (int)C in 8 bytes of MM1 */ + __m64 mm1 = _m_from_int(C); + __m64 mm2 = _m_from_int(C); + mm1 = _m_punpckldq(mm1, mm2); /* fill higher bytes of MM1 with C */ + //__m64 mm1 = _m_from_int64(lli); // x86_64 only + int i; + for (i = 0; i < SrcLength/8; i++) { + *mDest = _m_paddusb(*mSrc1, mm1); /* Src1+C (add 8 bytes with saturation) */ + mSrc1++; + mDest++; + } + _m_empty(); /* clean MMX state */ +#endif + return (0); +#else + return (-1); +#endif +} + +/*! +\brief Filter using AddUint: D = saturation255((S[i] + Cs[i % 4]), Cs=Swap32((uint)C) + +\param Src1 Pointer to the start of the source byte array (S). +\param Dest Pointer to the start of the destination byte array (D). +\param length The number of bytes in the source array. +\param C Constant to add (C). + +\return Returns 0 for success or -1 for error. +*/ +int SDL_imageFilterAddUint(unsigned char *Src1, unsigned char *Dest, unsigned int length, unsigned int C) +{ + unsigned int i, j, istart, D; + int iC[4]; + unsigned char *cursrc1; + unsigned char *curdest; + int result; + + /* Validate input parameters */ + if ((Src1 == NULL) || (Dest == NULL)) + return(-1); + if (length == 0) + return(0); + + /* Special case: C==0 */ + if (C == 0) { + memcpy(Src1, Dest, length); + return (0); + } + + if ((SDL_imageFilterMMXdetect()) && (length > 7)) { + + /* MMX routine */ + D=SWAP_32(C); + SDL_imageFilterAddUintMMX(Src1, Dest, length, C, D); + + /* Check for unaligned bytes */ + if ((length & 7) > 0) { + /* Setup to process unaligned bytes */ + istart = length & 0xfffffff8; + cursrc1 = &Src1[istart]; + curdest = &Dest[istart]; + } else { + /* No unaligned bytes - we are done */ + return (0); + } + } else { + /* Setup to process whole image */ + istart = 0; + cursrc1 = Src1; + curdest = Dest; + } + + /* C routine to process bytes */ + iC[3] = (int) ((C >> 24) & 0xff); + iC[2] = (int) ((C >> 16) & 0xff); + iC[1] = (int) ((C >> 8) & 0xff); + iC[0] = (int) ((C >> 0) & 0xff); + for (i = istart; i < length; i += 4) { + for (j = 0; j < 4; j++) { + if ((i+j) 255) result = 255; + *curdest = (unsigned char) result; + /* Advance pointers */ + cursrc1++; + curdest++; + } + } + } + return (0); +} + +/*! +\brief Internal MMX Filter using AddByteToHalf: D = saturation255(S/2 + C) + +\param Src1 Pointer to the start of the source byte array (S). +\param Dest Pointer to the start of the destination byte array (D). +\param SrcLength The number of bytes in the source array. +\param C Constant to add (C). +\param Mask Pointer to 8 mask bytes of value 0x7F. + +\return Returns 0 for success or -1 for error. +*/ +static int SDL_imageFilterAddByteToHalfMMX(unsigned char *Src1, unsigned char *Dest, unsigned int SrcLength, unsigned char C, + unsigned char *Mask) +{ +#ifdef USE_MMX +#if !defined(GCC__) + __asm + { + pusha + /* ** Duplicate C in 8 bytes of MM1 ** */ + mov al, C /* load C into AL */ + mov ah, al /* copy AL into AH */ + mov bx, ax /* copy AX into BX */ + shl eax, 16 /* shift 2 bytes of EAX left */ + mov ax, bx /* copy BX into AX */ + movd mm1, eax /* copy EAX into MM1 */ + movd mm2, eax /* copy EAX into MM2 */ + punpckldq mm1, mm2 /* fill higher bytes of MM1 with C */ + mov edx, Mask /* load Mask address into edx */ + movq mm0, [edx] /* load Mask into mm0 */ + mov eax, Src1 /* load Src1 address into eax */ + mov edi, Dest /* load Dest address into edi */ + mov ecx, SrcLength /* load loop counter (SIZE) into ecx */ + shr ecx, 3 /* counter/8 (MMX loads 8 bytes at a time) */ + align 16 /* 16 byte alignment of the loop entry */ +L1022: + movq mm2, [eax] /* load 8 bytes from Src1 into MM2 */ + psrlw mm2, 1 /* shift 4 WORDS of MM2 1 bit to the right */ + pand mm2, mm0 // apply Mask to 8 BYTES of MM2 */ + paddusb mm2, mm1 /* MM2=SrcDest+C (add 8 bytes with saturation) */ + movq [edi], mm2 /* store result in Dest */ + add eax, 8 /* increase Src1 register pointer by 8 */ + add edi, 8 /* increase Dest register pointer by 8 */ + dec ecx /* decrease loop counter */ + jnz L1022 /* check loop termination, proceed if required */ + emms /* exit MMX state */ + popa + } +#else + /* i386 and x86_64 */ + __m64 *mSrc1 = (__m64*)Src1; + __m64 *mDest = (__m64*)Dest; + __m64 *mMask = (__m64*)Mask; + /* Duplicate C in 8 bytes of MM1 */ + int i; + memset(&i, C, 4); + __m64 mm1 = _m_from_int(i); + __m64 mm2 = _m_from_int(i); + mm1 = _m_punpckldq(mm1, mm2); /* fill higher bytes of MM1 with C */ + //__m64 mm1 = _m_from_int64(lli); // x86_64 only + for (i = 0; i < SrcLength/8; i++) { + __m64 mm2 = _m_psrlwi(*mSrc1, 1); /* shift 4 WORDS of MM2 1 bit to the right */ + mm2 = _m_pand(mm2, *mMask); /* apply Mask to 8 BYTES of MM2 */ + /* byte 0x0f, 0xdb, 0xd0 */ + *mDest = _m_paddusb(mm1, mm2); /* Src1+C (add 8 bytes with saturation) */ + mSrc1++; + mDest++; + } + _m_empty(); /* clean MMX state */ +#endif + return (0); +#else + return (-1); +#endif +} + +/*! +\brief Filter using AddByteToHalf: D = saturation255(S/2 + C) + +\param Src1 Pointer to the start of the source byte array (S). +\param Dest Pointer to the start of the destination byte array (D). +\param length The number of bytes in the source array. +\param C Constant to add (C). + +\return Returns 0 for success or -1 for error. +*/ +int SDL_imageFilterAddByteToHalf(unsigned char *Src1, unsigned char *Dest, unsigned int length, unsigned char C) +{ + static unsigned char Mask[8] = { 0x7F, 0x7F, 0x7F, 0x7F, 0x7F, 0x7F, 0x7F, 0x7F }; + unsigned int i, istart; + int iC; + unsigned char *cursrc1; + unsigned char *curdest; + int result; + + /* Validate input parameters */ + if ((Src1 == NULL) || (Dest == NULL)) + return(-1); + if (length == 0) + return(0); + + if ((SDL_imageFilterMMXdetect()) && (length > 7)) { + + /* MMX routine */ + SDL_imageFilterAddByteToHalfMMX(Src1, Dest, length, C, Mask); + + /* Check for unaligned bytes */ + if ((length & 7) > 0) { + /* Setup to process unaligned bytes */ + istart = length & 0xfffffff8; + cursrc1 = &Src1[istart]; + curdest = &Dest[istart]; + } else { + /* No unaligned bytes - we are done */ + return (0); + } + } else { + /* Setup to process whole image */ + istart = 0; + cursrc1 = Src1; + curdest = Dest; + } + + /* C routine to process image */ + iC = (int) C; + for (i = istart; i < length; i++) { + result = (int) (*cursrc1 / 2) + iC; + if (result > 255) + result = 255; + *curdest = (unsigned char) result; + /* Advance pointers */ + cursrc1++; + curdest++; + } + + return (0); +} + +/*! +\brief Internal MMX Filter using SubByte: D = saturation0(S - C) + +\param Src1 Pointer to the start of the source byte array (S). +\param Dest Pointer to the start of the destination byte array (D). +\param SrcLength The number of bytes in the source array. +\param C Constant to subtract (C). + +\return Returns 0 for success or -1 for error. +*/ +int SDL_imageFilterSubByteMMX(unsigned char *Src1, unsigned char *Dest, unsigned int SrcLength, unsigned char C) +{ +#ifdef USE_MMX +#if !defined(GCC__) + __asm + { + pusha + /* ** Duplicate C in 8 bytes of MM1 ** */ + mov al, C /* load C into AL */ + mov ah, al /* copy AL into AH */ + mov bx, ax /* copy AX into BX */ + shl eax, 16 /* shift 2 bytes of EAX left */ + mov ax, bx /* copy BX into AX */ + movd mm1, eax /* copy EAX into MM1 */ + movd mm2, eax /* copy EAX into MM2 */ + punpckldq mm1, mm2 /* fill higher bytes of MM1 with C */ + mov eax, Src1 /* load Src1 address into eax */ + mov edi, Dest /* load Dest address into edi */ + mov ecx, SrcLength /* load loop counter (SIZE) into ecx */ + shr ecx, 3 /* counter/8 (MMX loads 8 bytes at a time) */ + align 16 /* 16 byte alignment of the loop entry */ +L1023: + movq mm0, [eax] /* load 8 bytes from SrcDest into MM0 */ + psubusb mm0, mm1 /* MM0=SrcDest-C (sub 8 bytes with saturation) */ + movq [edi], mm0 /* store result in SrcDest */ + add eax, 8 /* increase Src1 register pointer by 8 */ + add edi, 8 /* increase Dest register pointer by 8 */ + dec ecx /* decrease loop counter */ + jnz L1023 /* check loop termination, proceed if required */ + emms /* exit MMX state */ + popa + } +#else + /* i386 and x86_64 */ + __m64 *mSrc1 = (__m64*)Src1; + __m64 *mDest = (__m64*)Dest; + /* Duplicate C in 8 bytes of MM1 */ + int i; + memset(&i, C, 4); + __m64 mm1 = _m_from_int(i); + __m64 mm2 = _m_from_int(i); + mm1 = _m_punpckldq(mm1, mm2); /* fill higher bytes of MM1 with C */ + //__m64 mm1 = _m_from_int64(lli); // x86_64 only + for (i = 0; i < SrcLength/8; i++) { + *mDest = _m_psubusb(*mSrc1, mm1); /* Src1-C (sub 8 bytes with saturation) */ + mSrc1++; + mDest++; + } + _m_empty(); /* clean MMX state */ +#endif + return (0); +#else + return (-1); +#endif +} + +/*! +\brief Filter using SubByte: D = saturation0(S - C) + +\param Src1 Pointer to the start of the source byte array (S). +\param Dest Pointer to the start of the destination byte array (D). +\param length The number of bytes in the source arrays. +\param C Constant to subtract (C). + +\return Returns 0 for success or -1 for error. +*/ +int SDL_imageFilterSubByte(unsigned char *Src1, unsigned char *Dest, unsigned int length, unsigned char C) +{ + unsigned int i, istart; + int iC; + unsigned char *cursrc1; + unsigned char *curdest; + int result; + + /* Validate input parameters */ + if ((Src1 == NULL) || (Dest == NULL)) + return(-1); + if (length == 0) + return(0); + + /* Special case: C==0 */ + if (C == 0) { + memcpy(Src1, Dest, length); + return (0); + } + + if ((SDL_imageFilterMMXdetect()) && (length > 7)) { + + /* MMX routine */ + SDL_imageFilterSubByteMMX(Src1, Dest, length, C); + + /* Check for unaligned bytes */ + if ((length & 7) > 0) { + /* Setup to process unaligned bytes */ + istart = length & 0xfffffff8; + cursrc1 = &Src1[istart]; + curdest = &Dest[istart]; + } else { + /* No unaligned bytes - we are done */ + return (0); + } + } else { + /* Setup to process whole image */ + istart = 0; + cursrc1 = Src1; + curdest = Dest; + } + + /* C routine to process image */ + iC = (int) C; + for (i = istart; i < length; i++) { + result = (int) *cursrc1 - iC; + if (result < 0) + result = 0; + *curdest = (unsigned char) result; + /* Advance pointers */ + cursrc1++; + curdest++; + } + return (0); +} + +/*! +\brief Internal MMX Filter using SubUint: D = saturation0(S[i] - Cs[i % 4]), Cs=Swap32((uint)C) + +\param Src1 Pointer to the start of the source byte array (S). +\param Dest Pointer to the start of the destination byte array (D). +\param SrcLength The number of bytes in the source array. +\param C Constant to subtract (C). +\param D Byteorder-swapped constant to subtract (Cs). + +\return Returns 0 for success or -1 for error. +*/ +static int SDL_imageFilterSubUintMMX(unsigned char *Src1, unsigned char *Dest, unsigned int SrcLength, unsigned int C, unsigned int D) +{ +#ifdef USE_MMX +#if !defined(GCC__) + __asm + { + pusha + /* ** Duplicate (int)C in 8 bytes of MM1 ** */ + mov eax, C /* load C into EAX */ + movd mm1, eax /* copy EAX into MM1 */ + mov eax, D /* load D into EAX */ + movd mm2, eax /* copy EAX into MM2 */ + punpckldq mm1, mm2 /* fill higher bytes of MM1 with C */ + mov eax, Src1 /* load Src1 address into eax */ + mov edi, Dest /* load Dest address into edi */ + mov ecx, SrcLength /* load loop counter (SIZE) into ecx */ + shr ecx, 3 /* counter/8 (MMX loads 8 bytes at a time) */ + align 16 /* 16 byte alignment of the loop entry */ +L11024: + movq mm0, [eax] /* load 8 bytes from SrcDest into MM0 */ + psubusb mm0, mm1 /* MM0=SrcDest-C (sub 8 bytes with saturation) */ + movq [edi], mm0 /* store result in SrcDest */ + add eax, 8 /* increase Src1 register pointer by 8 */ + add edi, 8 /* increase Dest register pointer by 8 */ + dec ecx /* decrease loop counter */ + jnz L11024 /* check loop termination, proceed if required */ + emms /* exit MMX state */ + popa + } +#else + /* i386 and x86_64 */ + __m64 *mSrc1 = (__m64*)Src1; + __m64 *mDest = (__m64*)Dest; + /* Duplicate (int)C in 8 bytes of MM1 */ + __m64 mm1 = _m_from_int(C); + __m64 mm2 = _m_from_int(C); + mm1 = _m_punpckldq(mm1, mm2); /* fill higher bytes of MM1 with C */ + //__m64 mm1 = _m_from_int64(lli); // x86_64 only + int i; + for (i = 0; i < SrcLength/8; i++) { + *mDest = _m_psubusb(*mSrc1, mm1); /* Src1-C (sub 8 bytes with saturation) */ + mSrc1++; + mDest++; + } + _m_empty(); /* clean MMX state */ +#endif + return (0); +#else + return (-1); +#endif +} + +/*! +\brief Filter using SubUint: D = saturation0(S[i] - Cs[i % 4]), Cs=Swap32((uint)C) + +\param Src1 Pointer to the start of the source byte array (S1). +\param Dest Pointer to the start of the destination byte array (D). +\param length The number of bytes in the source array. +\param C Constant to subtract (C). + +\return Returns 0 for success or -1 for error. +*/ +int SDL_imageFilterSubUint(unsigned char *Src1, unsigned char *Dest, unsigned int length, unsigned int C) +{ + unsigned int i, j, istart, D; + int iC[4]; + unsigned char *cursrc1; + unsigned char *curdest; + int result; + + /* Validate input parameters */ + if ((Src1 == NULL) || (Dest == NULL)) + return(-1); + if (length == 0) + return(0); + + /* Special case: C==0 */ + if (C == 0) { + memcpy(Src1, Dest, length); + return (0); + } + + if ((SDL_imageFilterMMXdetect()) && (length > 7)) { + + /* MMX routine */ + D=SWAP_32(C); + SDL_imageFilterSubUintMMX(Src1, Dest, length, C, D); + + /* Check for unaligned bytes */ + if ((length & 7) > 0) { + /* Setup to process unaligned bytes */ + istart = length & 0xfffffff8; + cursrc1 = &Src1[istart]; + curdest = &Dest[istart]; + } else { + /* No unaligned bytes - we are done */ + return (0); + } + } else { + /* Setup to process whole image */ + istart = 0; + cursrc1 = Src1; + curdest = Dest; + } + + /* C routine to process image */ + iC[3] = (int) ((C >> 24) & 0xff); + iC[2] = (int) ((C >> 16) & 0xff); + iC[1] = (int) ((C >> 8) & 0xff); + iC[0] = (int) ((C >> 0) & 0xff); + for (i = istart; i < length; i += 4) { + for (j = 0; j < 4; j++) { + if ((i+j)> N) + +\param Src1 Pointer to the start of the source byte array (S). +\param Dest Pointer to the start of the destination byte array (D). +\param SrcLength The number of bytes in the source array. +\param N Number of bit-positions to shift (N). Valid range is 0 to 8. +\param Mask Byte array containing 8 bytes with 0x7F value. + +\return Returns 0 for success or -1 for error. +*/ +static int SDL_imageFilterShiftRightMMX(unsigned char *Src1, unsigned char *Dest, unsigned int SrcLength, unsigned char N, + unsigned char *Mask) +{ +#ifdef USE_MMX +#if !defined(GCC__) + __asm + { + pusha + mov edx, Mask /* load Mask address into edx */ + movq mm0, [edx] /* load Mask into mm0 */ + xor ecx, ecx /* zero ECX */ + mov cl, N /* load loop counter (N) into CL */ + movd mm3, ecx /* copy (N) into MM3 */ + pcmpeqb mm1, mm1 /* generate all 1's in mm1 */ +L10240: /* ** Prepare proper bit-Mask in MM1 ** */ + psrlw mm1, 1 /* shift 4 WORDS of MM1 1 bit to the right */ + pand mm1, mm0 // apply Mask to 8 BYTES of MM1 */ + /* byte 0x0f, 0xdb, 0xc8 */ + dec cl /* decrease loop counter */ + jnz L10240 /* check loop termination, proceed if required */ + /* ** Shift all bytes of the image ** */ + mov eax, Src1 /* load Src1 address into eax */ + mov edi, Dest /* load Dest address into edi */ + mov ecx, SrcLength /* load loop counter (SIZE) into ecx */ + shr ecx, 3 /* counter/8 (MMX loads 8 bytes at a time) */ + align 16 /* 16 byte alignment of the loop entry */ +L10241: + movq mm0, [eax] /* load 8 bytes from SrcDest into MM0 */ + psrlw mm0, mm3 /* shift 4 WORDS of MM0 (N) bits to the right */ + pand mm0, mm1 // apply proper bit-Mask to 8 BYTES of MM0 */ + /* byte 0x0f, 0xdb, 0xc1 */ + movq [edi], mm0 /* store result in SrcDest */ + add eax, 8 /* increase Src1 register pointer by 8 */ + add edi, 8 /* increase Dest register pointer by 8 */ + dec ecx /* decrease loop counter */ + jnz L10241 /* check loop termination, proceed if required */ + emms /* exit MMX state */ + popa + } +#else + /* i386 and x86_64 */ + __m64 *mSrc1 = (__m64*)Src1; + __m64 *mDest = (__m64*)Dest; + __m64 *mMask = (__m64*)Mask; + __m64 mm1; + int i; + mm1 = _m_pcmpeqb(mm1, mm1); /* generate all 1's in mm1 */ + /* Prepare proper bit-Mask in MM1 */ + for (i = 0; i < N; i++) { + mm1 = _m_psrlwi(mm1, 1); /* shift 4 WORDS of MM1 1 bit to the right */ + mm1 = _m_pand(mm1, *mMask); /* apply Mask to 8 BYTES of MM1 */ + } + /* Shift all bytes of the image */ + for (i = 0; i < SrcLength/8; i++) { + __m64 mm0 = _m_psrlwi(*mSrc1, N); /* shift 4 WORDS of MM0 (N) bits to the right */ + *mDest = _m_pand(mm0, mm1); /* apply proper bit-Mask to 8 BYTES of MM0 */ + mSrc1++; + mDest++; + } + _m_empty(); /* clean MMX state */ +#endif + return (0); +#else + return (-1); +#endif +} + +/*! +\brief Filter using ShiftRight: D = saturation0(S >> N) + +\param Src1 Pointer to the start of the source byte array (S). +\param Dest Pointer to the start of the destination byte array (D). +\param length The number of bytes in the source array. +\param N Number of bit-positions to shift (N). Valid range is 0 to 8. + +\return Returns 0 for success or -1 for error. +*/ +int SDL_imageFilterShiftRight(unsigned char *Src1, unsigned char *Dest, unsigned int length, unsigned char N) +{ + static unsigned char Mask[8] = { 0x7F, 0x7F, 0x7F, 0x7F, 0x7F, 0x7F, 0x7F, 0x7F }; + unsigned int i, istart; + unsigned char *cursrc1; + unsigned char *curdest; + + /* Validate input parameters */ + if ((Src1 == NULL) || (Dest == NULL)) + return(-1); + if (length == 0) + return(0); + + /* Check shift */ + if (N > 8) { + return (-1); + } + + /* Special case: N==0 */ + if (N == 0) { + memcpy(Src1, Dest, length); + return (0); + } + + if ((SDL_imageFilterMMXdetect()) && (length > 7)) { + + /* MMX routine */ + SDL_imageFilterShiftRightMMX(Src1, Dest, length, N, Mask); + + /* Check for unaligned bytes */ + if ((length & 7) > 0) { + /* Setup to process unaligned bytes */ + istart = length & 0xfffffff8; + cursrc1 = &Src1[istart]; + curdest = &Dest[istart]; + } else { + /* No unaligned bytes - we are done */ + return (0); + } + } else { + /* Setup to process whole image */ + istart = 0; + cursrc1 = Src1; + curdest = Dest; + } + + /* C routine to process image */ + for (i = istart; i < length; i++) { + *curdest = (unsigned char) *cursrc1 >> N; + /* Advance pointers */ + cursrc1++; + curdest++; + } + + return (0); +} + +/*! +\brief Internal MMX Filter using ShiftRightUint: D = saturation0((uint)S[i] >> N) + +\param Src1 Pointer to the start of the source byte array (S1). +\param Dest Pointer to the start of the destination byte array (D). +\param SrcLength The number of bytes in the source array. +\param N Number of bit-positions to shift (N). + +\return Returns 0 for success or -1 for error. +*/ +static int SDL_imageFilterShiftRightUintMMX(unsigned char *Src1, unsigned char *Dest, unsigned int SrcLength, unsigned char N) +{ +#ifdef USE_MMX +#if !defined(GCC__) + __asm + { + pusha + mov eax, Src1 /* load Src1 address into eax */ + mov edi, Dest /* load Dest address into edi */ + mov ecx, SrcLength /* load loop counter (SIZE) into ecx */ + shr ecx, 3 /* counter/8 (MMX loads 8 bytes at a time) */ + align 16 /* 16 byte alignment of the loop entry */ +L13023: + movq mm0, [eax] /* load 8 bytes from SrcDest into MM0 */ + psrld mm0, N + movq [edi], mm0 /* store result in SrcDest */ + add eax, 8 /* increase Src1 register pointer by 8 */ + add edi, 8 /* increase Dest register pointer by 8 */ + dec ecx /* decrease loop counter */ + jnz L13023 /* check loop termination, proceed if required */ + emms /* exit MMX state */ + popa + } +#else + /* i386 and x86_64 */ + __m64 *mSrc1 = (__m64*)Src1; + __m64 *mDest = (__m64*)Dest; + int i; + for (i = 0; i < SrcLength/8; i++) { + *mDest = _m_psrldi(*mSrc1, N); + mSrc1++; + mDest++; + } + _m_empty(); /* clean MMX state */ +#endif + return (0); +#else + return (-1); +#endif +} + +/*! +\brief Filter using ShiftRightUint: D = saturation0((uint)S[i] >> N) + +\param Src1 Pointer to the start of the source byte array (S1). +\param Dest Pointer to the start of the destination byte array (D). +\param length The number of bytes in the source array. +\param N Number of bit-positions to shift (N). Valid range is 0 to 32. + +\return Returns 0 for success or -1 for error. +*/ +int SDL_imageFilterShiftRightUint(unsigned char *Src1, unsigned char *Dest, unsigned int length, unsigned char N) +{ + unsigned int i, istart; + unsigned char *cursrc1, *curdest; + unsigned int *icursrc1, *icurdest; + unsigned int result; + + /* Validate input parameters */ + if ((Src1 == NULL) || (Dest == NULL)) + return(-1); + if (length == 0) + return(0); + + if (N > 32) { + return (-1); + } + + /* Special case: N==0 */ + if (N == 0) { + memcpy(Src1, Dest, length); + return (0); + } + + if ((SDL_imageFilterMMXdetect()) && (length > 7)) { + + SDL_imageFilterShiftRightUintMMX(Src1, Dest, length, N); + + /* Check for unaligned bytes */ + if ((length & 7) > 0) { + /* Setup to process unaligned bytes */ + istart = length & 0xfffffff8; + cursrc1 = &Src1[istart]; + curdest = &Dest[istart]; + } else { + /* No unaligned bytes - we are done */ + return (0); + } + } else { + /* Setup to process whole image */ + istart = 0; + cursrc1 = Src1; + curdest = Dest; + } + + /* C routine to process image */ + icursrc1=(unsigned int *)cursrc1; + icurdest=(unsigned int *)curdest; + for (i = istart; i < length; i += 4) { + if ((i+4)> N); + *icurdest = result; + } + /* Advance pointers */ + icursrc1++; + icurdest++; + } + + return (0); +} + +/*! +\brief Internal MMX Filter using MultByByte: D = saturation255(S * C) + +\param Src1 Pointer to the start of the source byte array (S). +\param Dest Pointer to the start of the destination byte array (D). +\param SrcLength The number of bytes in the source array. +\param C Constant to multiply with (C). + +\return Returns 0 for success or -1 for error. +*/ +static int SDL_imageFilterMultByByteMMX(unsigned char *Src1, unsigned char *Dest, unsigned int SrcLength, unsigned char C) +{ +#ifdef USE_MMX +#if !defined(GCC__) + __asm + { + pusha + /* ** Duplicate C in 4 words of MM1 ** */ + mov al, C /* load C into AL */ + xor ah, ah /* zero AH */ + mov bx, ax /* copy AX into BX */ + shl eax, 16 /* shift 2 bytes of EAX left */ + mov ax, bx /* copy BX into AX */ + movd mm1, eax /* copy EAX into MM1 */ + movd mm2, eax /* copy EAX into MM2 */ + punpckldq mm1, mm2 /* fill higher words of MM1 with C */ + pxor mm0, mm0 /* zero MM0 register */ + mov eax, Src1 /* load Src1 address into eax */ + mov edi, Dest /* load Dest address into edi */ + mov ecx, SrcLength /* load loop counter (SIZE) into ecx */ + shr ecx, 3 /* counter/8 (MMX loads 8 bytes at a time) */ + cmp al, 128 /* if (C <= 128) execute more efficient code */ + jg L10251 + align 16 /* 16 byte alignment of the loop entry */ +L10250: + movq mm3, [eax] /* load 8 bytes from Src1 into MM3 */ + movq mm4, mm3 /* copy MM3 into MM4 */ + punpcklbw mm3, mm0 /* unpack low bytes of SrcDest into words */ + punpckhbw mm4, mm0 /* unpack high bytes of SrcDest into words */ + pmullw mm3, mm1 /* mul low bytes of SrcDest and MM1 */ + pmullw mm4, mm1 /* mul high bytes of SrcDest and MM1 */ + packuswb mm3, mm4 /* pack words back into bytes with saturation */ + movq [edi], mm3 /* store result in Dest */ + add eax, 8 /* increase Src1 register pointer by 8 */ + add edi, 8 /* increase Dest register pointer by 8 */ + dec ecx /* decrease loop counter */ + jnz L10250 /* check loop termination, proceed if required */ + jmp L10252 + align 16 /* 16 byte alignment of the loop entry */ +L10251: + movq mm3, [eax] /* load 8 bytes from Src1 into MM3 */ + movq mm4, mm3 /* copy MM3 into MM4 */ + punpcklbw mm3, mm0 /* unpack low bytes of SrcDest into words */ + punpckhbw mm4, mm0 /* unpack high bytes of SrcDest into words */ + pmullw mm3, mm1 /* mul low bytes of SrcDest and MM1 */ + pmullw mm4, mm1 /* mul high bytes of SrcDest and MM1 */ + /* ** Take abs value of the results (signed words) ** */ + movq mm5, mm3 /* copy mm3 into mm5 */ + movq mm6, mm4 /* copy mm4 into mm6 */ + psraw mm5, 15 /* fill mm5 words with word sign bit */ + psraw mm6, 15 /* fill mm6 words with word sign bit */ + pxor mm3, mm5 /* take 1's compliment of only neg words */ + pxor mm4, mm6 /* take 1's compliment of only neg words */ + psubsw mm3, mm5 /* add 1 to only neg words, W-(-1) or W-0 */ + psubsw mm4, mm6 /* add 1 to only neg words, W-(-1) or W-0 */ + packuswb mm3, mm4 /* pack words back into bytes with saturation */ + movq [edi], mm3 /* store result in Dest */ + add eax, 8 /* increase Src1 register pointer by 8 */ + add edi, 8 /* increase Dest register pointer by 8 */ + dec ecx /* decrease loop counter */ + jnz L10251 /* check loop termination, proceed if required */ +L10252: + emms /* exit MMX state */ + popa + } +#else + /* i386 and x86_64 */ + __m64 *mSrc1 = (__m64*)Src1; + __m64 *mDest = (__m64*)Dest; + __m64 mm0 = _m_from_int(0); /* zero mm0 register */ + /* Duplicate C in 4 words of MM1 */ + int i; + i = C | C<<16; + __m64 mm1 = _m_from_int(i); + __m64 mm2 = _m_from_int(i); + mm1 = _m_punpckldq(mm1, mm2); /* fill higher words of MM1 with C */ + // long long lli = C | C<<16 | (long long)C<<32 | (long long)C<<48; + //__m64 mm1 = _m_from_int64(lli); // x86_64 only + if (C <= 128) { /* if (C <= 128) execute more efficient code */ + for (i = 0; i < SrcLength/8; i++) { + __m64 mm3, mm4; + mm3 = _m_punpcklbw(*mSrc1, mm0); /* unpack low bytes of Src1 into words */ + mm4 = _m_punpckhbw(*mSrc1, mm0); /* unpack high bytes of Src1 into words */ + mm3 = _m_pmullw(mm3, mm1); /* mul low bytes of Src1 and MM1 */ + mm4 = _m_pmullw(mm4, mm1); /* mul high bytes of Src1 and MM1 */ + *mDest = _m_packuswb(mm3, mm4); /* pack words back into bytes with saturation */ + mSrc1++; + mDest++; + } + } else { + for (i = 0; i < SrcLength/8; i++) { + __m64 mm3, mm4, mm5, mm6; + mm3 = _m_punpcklbw(*mSrc1, mm0); /* unpack low bytes of Src1 into words */ + mm4 = _m_punpckhbw(*mSrc1, mm0); /* unpack high bytes of Src1 into words */ + mm3 = _m_pmullw(mm3, mm1); /* mul low bytes of Src1 and MM1 */ + mm4 = _m_pmullw(mm4, mm1); /* mul high bytes of Src1 and MM1 */ + /* Take abs value of the results (signed words) */ + mm5 = _m_psrawi(mm3, 15); /* fill mm5 words with word sign bit */ + mm6 = _m_psrawi(mm4, 15); /* fill mm6 words with word sign bit */ + mm3 = _m_pxor(mm3, mm5); /* take 1's compliment of only neg. words */ + mm4 = _m_pxor(mm4, mm6); /* take 1's compliment of only neg. words */ + mm3 = _m_psubsw(mm3, mm5); /* add 1 to only neg. words, W-(-1) or W-0 */ + mm4 = _m_psubsw(mm4, mm6); /* add 1 to only neg. words, W-(-1) or W-0 */ + *mDest = _m_packuswb(mm3, mm4); /* pack words back into bytes with saturation */ + mSrc1++; + mDest++; + } + } + _m_empty(); /* clean MMX state */ +#endif + return (0); +#else + return (-1); +#endif +} + +/*! +\brief Filter using MultByByte: D = saturation255(S * C) + +\param Src1 Pointer to the start of the source byte array (S). +\param Dest Pointer to the start of the destination byte array (D). +\param length The number of bytes in the source arrays. +\param C Constant to multiply with (C). + +\return Returns 0 for success or -1 for error. +*/ +int SDL_imageFilterMultByByte(unsigned char *Src1, unsigned char *Dest, unsigned int length, unsigned char C) +{ + unsigned int i, istart; + int iC; + unsigned char *cursrc1; + unsigned char *curdest; + int result; + + /* Validate input parameters */ + if ((Src1 == NULL) || (Dest == NULL)) + return(-1); + if (length == 0) + return(0); + + /* Special case: C==1 */ + if (C == 1) { + memcpy(Src1, Dest, length); + return (0); + } + + if ((SDL_imageFilterMMXdetect()) && (length > 7)) { + + SDL_imageFilterMultByByteMMX(Src1, Dest, length, C); + + /* Check for unaligned bytes */ + if ((length & 7) > 0) { + /* Setup to process unaligned bytes */ + istart = length & 0xfffffff8; + cursrc1 = &Src1[istart]; + curdest = &Dest[istart]; + } else { + /* No unaligned bytes - we are done */ + return (0); + } + } else { + /* Setup to process whole image */ + istart = 0; + cursrc1 = Src1; + curdest = Dest; + } + + /* C routine to process image */ + iC = (int) C; + for (i = istart; i < length; i++) { + result = (int) *cursrc1 * iC; + if (result > 255) + result = 255; + *curdest = (unsigned char) result; + /* Advance pointers */ + cursrc1++; + curdest++; + } + + return (0); +} + +/*! +\brief Internal MMX Filter using ShiftRightAndMultByByteMMX: D = saturation255((S >> N) * C) + +\param Src1 Pointer to the start of the source byte array (S). +\param Dest Pointer to the start of the destination byte array (D). +\param SrcLength The number of bytes in the source array. +\param N Number of bit-positions to shift (N). Valid range is 0 to 8. +\param C Constant to multiply with (C). + +\return Returns 0 for success or -1 for error. +*/ +static int SDL_imageFilterShiftRightAndMultByByteMMX(unsigned char *Src1, unsigned char *Dest, unsigned int SrcLength, unsigned char N, + unsigned char C) +{ +#ifdef USE_MMX +#if !defined(GCC__) + __asm + { + pusha + /* ** Duplicate C in 4 words of MM1 ** */ + mov al, C /* load C into AL */ + xor ah, ah /* zero AH */ + mov bx, ax /* copy AX into BX */ + shl eax, 16 /* shift 2 bytes of EAX left */ + mov ax, bx /* copy BX into AX */ + movd mm1, eax /* copy EAX into MM1 */ + movd mm2, eax /* copy EAX into MM2 */ + punpckldq mm1, mm2 /* fill higher words of MM1 with C */ + xor ecx, ecx /* zero ECX */ + mov cl, N /* load N into CL */ + movd mm7, ecx /* copy N into MM7 */ + pxor mm0, mm0 /* zero MM0 register */ + mov eax, Src1 /* load Src1 address into eax */ + mov edi, Dest /* load Dest address into edi */ + mov ecx, SrcLength /* load loop counter (SIZE) into ecx */ + shr ecx, 3 /* counter/8 (MMX loads 8 bytes at a time) */ + align 16 /* 16 byte alignment of the loop entry */ +L1026: + movq mm3, [eax] /* load 8 bytes from Src1 into MM3 */ + movq mm4, mm3 /* copy MM3 into MM4 */ + punpcklbw mm3, mm0 /* unpack low bytes of SrcDest into words */ + punpckhbw mm4, mm0 /* unpack high bytes of SrcDest into words */ + psrlw mm3, mm7 /* shift 4 WORDS of MM3 (N) bits to the right */ + psrlw mm4, mm7 /* shift 4 WORDS of MM4 (N) bits to the right */ + pmullw mm3, mm1 /* mul low bytes of SrcDest by MM1 */ + pmullw mm4, mm1 /* mul high bytes of SrcDest by MM1 */ + packuswb mm3, mm4 /* pack words back into bytes with saturation */ + movq [edi], mm3 /* store result in Dest */ + add eax, 8 /* increase Src1 register pointer by 8 */ + add edi, 8 /* increase Dest register pointer by 8 */ + dec ecx /* decrease loop counter */ + jnz L1026 /* check loop termination, proceed if required */ + emms /* exit MMX state */ + popa + } +#else + /* i386 and x86_64 */ + __m64 *mSrc1 = (__m64*)Src1; + __m64 *mDest = (__m64*)Dest; + __m64 mm0 = _m_from_int(0); /* zero mm0 register */ + /* Duplicate C in 4 words of MM1 */ + int i; + i = (C<<16)|C; + __m64 mm1 = _m_from_int(i); + __m64 mm2 = _m_from_int(i); + mm1 = _m_punpckldq(mm1, mm2); /* fill higher words of MM1 with C */ + for (i = 0; i < SrcLength/8; i++) { + __m64 mm3, mm4, mm5, mm6; + mm3 = _m_punpcklbw(*mSrc1, mm0); /* unpack low bytes of Src1 into words */ + mm4 = _m_punpckhbw(*mSrc1, mm0); /* unpack high bytes of Src1 into words */ + mm3 = _m_psrlwi(mm3, N); /* shift 4 WORDS of MM3 (N) bits to the right */ + mm4 = _m_psrlwi(mm4, N); /* shift 4 WORDS of MM4 (N) bits to the right */ + mm3 = _m_pmullw(mm3, mm1); /* mul low bytes of Src1 and MM1 */ + mm4 = _m_pmullw(mm4, mm1); /* mul high bytes of Src1 and MM1 */ + *mDest = _m_packuswb(mm3, mm4); /* pack words back into bytes with saturation */ + mSrc1++; + mDest++; + } + _m_empty(); /* clean MMX state */ +#endif + return (0); +#else + return (-1); +#endif +} + +/*! +\brief Filter using ShiftRightAndMultByByte: D = saturation255((S >> N) * C) + +\param Src1 Pointer to the start of the source byte array (S). +\param Dest Pointer to the start of the destination byte array (D). +\param length The number of bytes in the source array. +\param N Number of bit-positions to shift (N). Valid range is 0 to 8. +\param C Constant to multiply with (C). + +\return Returns 0 for success or -1 for error. +*/ +int SDL_imageFilterShiftRightAndMultByByte(unsigned char *Src1, unsigned char *Dest, unsigned int length, unsigned char N, + unsigned char C) +{ + unsigned int i, istart; + int iC; + unsigned char *cursrc1; + unsigned char *curdest; + int result; + + /* Validate input parameters */ + if ((Src1 == NULL) || (Dest == NULL)) + return(-1); + if (length == 0) + return(0); + + /* Check shift */ + if (N > 8) { + return (-1); + } + + /* Special case: N==0 && C==1 */ + if ((N == 0) && (C == 1)) { + memcpy(Src1, Dest, length); + return (0); + } + + if ((SDL_imageFilterMMXdetect()) && (length > 7)) { + + SDL_imageFilterShiftRightAndMultByByteMMX(Src1, Dest, length, N, C); + + /* Check for unaligned bytes */ + if ((length & 7) > 0) { + /* Setup to process unaligned bytes */ + istart = length & 0xfffffff8; + cursrc1 = &Src1[istart]; + curdest = &Dest[istart]; + } else { + /* No unaligned bytes - we are done */ + return (0); + } + } else { + /* Setup to process whole image */ + istart = 0; + cursrc1 = Src1; + curdest = Dest; + } + + /* C routine to process image */ + iC = (int) C; + for (i = istart; i < length; i++) { + result = (int) (*cursrc1 >> N) * iC; + if (result > 255) + result = 255; + *curdest = (unsigned char) result; + /* Advance pointers */ + cursrc1++; + curdest++; + } + + return (0); +} + +/*! +\brief Internal MMX Filter using ShiftLeftByte: D = (S << N) + +\param Src1 Pointer to the start of the source byte array (S). +\param Dest Pointer to the start of the destination byte array (D). +\param SrcLength The number of bytes in the source arrays. +\param N Number of bit-positions to shift (N). Valid range is 0 to 8. +\param Mask Byte array containing 8 bytes of 0xFE value. + +\return Returns 0 for success or -1 for error. +*/ +static int SDL_imageFilterShiftLeftByteMMX(unsigned char *Src1, unsigned char *Dest, unsigned int SrcLength, unsigned char N, + unsigned char *Mask) +{ +#ifdef USE_MMX +#if !defined(GCC__) + __asm + { + pusha + mov edx, Mask /* load Mask address into edx */ + movq mm0, [edx] /* load Mask into mm0 */ + xor ecx, ecx /* zero ECX */ + mov cl, N /* load loop counter (N) into CL */ + movd mm3, ecx /* copy (N) into MM3 */ + pcmpeqb mm1, mm1 /* generate all 1's in mm1 */ +L10270: /* ** Prepare proper bit-Mask in MM1 ** */ + psllw mm1, 1 /* shift 4 WORDS of MM1 1 bit to the left */ + pand mm1, mm0 // apply Mask to 8 BYTES of MM1 */ + /* byte 0x0f, 0xdb, 0xc8 */ + dec cl /* decrease loop counter */ + jnz L10270 /* check loop termination, proceed if required */ + /* ** Shift all bytes of the image ** */ + mov eax, Src1 /* load Src1 address into eax */ + mov edi, Dest /* load SrcDest address into edi */ + mov ecx, SrcLength /* load loop counter (SIZE) into ecx */ + shr ecx, 3 /* counter/8 (MMX loads 8 bytes at a time) */ + align 16 /* 16 byte alignment of the loop entry */ +L10271: + movq mm0, [eax] /* load 8 bytes from Src1 into MM0 */ + psllw mm0, mm3 /* shift 4 WORDS of MM0 (N) bits to the left */ + pand mm0, mm1 // apply proper bit-Mask to 8 BYTES of MM0 */ + /* byte 0x0f, 0xdb, 0xc1 */ + movq [edi], mm0 /* store result in Dest */ + add eax, 8 /* increase Src1 register pointer by 8 */ + add edi, 8 /* increase Dest register pointer by 8 */ + dec ecx /* decrease loop counter */ + jnz L10271 /* check loop termination, proceed if required */ + emms /* exit MMX state */ + popa + } +#else + /* i386 and x86_64 */ + __m64 *mSrc1 = (__m64*)Src1; + __m64 *mDest = (__m64*)Dest; + __m64 *mMask = (__m64*)Mask; + __m64 mm1; + int i; + mm1 = _m_pcmpeqb(mm1, mm1); /* generate all 1's in mm1 */ + /* Prepare proper bit-Mask in MM1 */ + for (i = 0; i < N; i++) { + mm1 = _m_psllwi(mm1, 1); /* shift 4 WORDS of MM1 1 bit to the left */ + mm1 = _m_pand(mm1, *mMask); /* apply Mask to 8 BYTES of MM1 */ + } + /* ** Shift all bytes of the image ** */ + for (i = 0; i < SrcLength/8; i++) { + __m64 mm0 = _m_psllwi(*mSrc1, N); /* shift 4 WORDS of MM0 (N) bits to the left */ + *mDest = _m_pand(mm0, mm1); /* apply proper bit-Mask to 8 BYTES of MM0 */ + mSrc1++; + mDest++; + } + _m_empty(); /* clean MMX state */ +#endif + return (0); +#else + return (-1); +#endif +} + +/*! +\brief Filter using ShiftLeftByte: D = (S << N) + +\param Src1 Pointer to the start of the source byte array (S). +\param Dest Pointer to the start of the destination byte array (D). +\param length The number of bytes in the source arrays. +\param N Number of bit-positions to shift (N). Valid range is 0 to 8. + +\return Returns 0 for success or -1 for error. +*/ +int SDL_imageFilterShiftLeftByte(unsigned char *Src1, unsigned char *Dest, unsigned int length, unsigned char N) +{ + static unsigned char Mask[8] = { 0xFE, 0xFE, 0xFE, 0xFE, 0xFE, 0xFE, 0xFE, 0xFE }; + unsigned int i, istart; + unsigned char *cursrc1, *curdest; + int result; + + /* Validate input parameters */ + if ((Src1 == NULL) || (Dest == NULL)) + return(-1); + if (length == 0) + return(0); + + if (N > 8) { + return (-1); + } + + /* Special case: N==0 */ + if (N == 0) { + memcpy(Src1, Dest, length); + return (0); + } + + if ((SDL_imageFilterMMXdetect()) && (length > 7)) { + + SDL_imageFilterShiftLeftByteMMX(Src1, Dest, length, N, Mask); + + /* Check for unaligned bytes */ + if ((length & 7) > 0) { + /* Setup to process unaligned bytes */ + istart = length & 0xfffffff8; + cursrc1 = &Src1[istart]; + curdest = &Dest[istart]; + } else { + /* No unaligned bytes - we are done */ + return (0); + } + } else { + /* Setup to process whole image */ + istart = 0; + cursrc1 = Src1; + curdest = Dest; + } + + /* C routine to process image */ + for (i = istart; i < length; i++) { + result = ((int) *cursrc1 << N) & 0xff; + *curdest = (unsigned char) result; + /* Advance pointers */ + cursrc1++; + curdest++; + } + + return (0); +} + +/*! +\brief Internal MMX Filter using ShiftLeftUint: D = ((uint)S << N) + +\param Src1 Pointer to the start of the source byte array (S). +\param Dest Pointer to the start of the destination byte array (D). +\param SrcLength The number of bytes in the source array. +\param N Number of bit-positions to shift (N). Valid range is 0 to 32. + +\return Returns 0 for success or -1 for error. +*/ +static int SDL_imageFilterShiftLeftUintMMX(unsigned char *Src1, unsigned char *Dest, unsigned int SrcLength, unsigned char N) +{ +#ifdef USE_MMX +#if !defined(GCC__) + __asm + { + pusha + mov eax, Src1 /* load Src1 address into eax */ + mov edi, Dest /* load Dest address into edi */ + mov ecx, SrcLength /* load loop counter (SIZE) into ecx */ + shr ecx, 3 /* counter/8 (MMX loads 8 bytes at a time) */ + align 16 /* 16 byte alignment of the loop entry */ +L12023: + movq mm0, [eax] /* load 8 bytes from SrcDest into MM0 */ + pslld mm0, N /* MM0=SrcDest+C (add 8 bytes with saturation) */ + movq [edi], mm0 /* store result in SrcDest */ + add eax, 8 /* increase Src1 register pointer by 8 */ + add edi, 8 /* increase Dest register pointer by 8 */ + dec ecx /* decrease loop counter */ + jnz L12023 /* check loop termination, proceed if required */ + emms /* exit MMX state */ + popa + } +#else + /* i386 and x86_64 */ + __m64 *mSrc1 = (__m64*)Src1; + __m64 *mDest = (__m64*)Dest; + int i; + for (i = 0; i < SrcLength/8; i++) { + *mDest = _m_pslldi(*mSrc1, N); /* Src1+C (add 8 bytes with saturation) */ + mSrc1++; + mDest++; + } + _m_empty(); /* clean MMX state */ +#endif + return (0); +#else + return (-1); +#endif +} + +/*! +\brief Filter using ShiftLeftUint: D = ((uint)S << N) + +\param Src1 Pointer to the start of the source byte array (S). +\param Dest Pointer to the start of the destination byte array (D). +\param length The number of bytes in the source array. +\param N Number of bit-positions to shift (N). Valid range is 0 to 32. + +\return Returns 0 for success or -1 for error. +*/ +int SDL_imageFilterShiftLeftUint(unsigned char *Src1, unsigned char *Dest, unsigned int length, unsigned char N) +{ + unsigned int i, istart; + unsigned char *cursrc1, *curdest; + unsigned int *icursrc1, *icurdest; + unsigned int result; + + /* Validate input parameters */ + if ((Src1 == NULL) || (Dest == NULL)) + return(-1); + if (length == 0) + return(0); + + if (N > 32) { + return (-1); + } + + /* Special case: N==0 */ + if (N == 0) { + memcpy(Src1, Dest, length); + return (0); + } + + if ((SDL_imageFilterMMXdetect()) && (length > 7)) { + + SDL_imageFilterShiftLeftUintMMX(Src1, Dest, length, N); + + /* Check for unaligned bytes */ + if ((length & 7) > 0) { + /* Setup to process unaligned bytes */ + istart = length & 0xfffffff8; + cursrc1 = &Src1[istart]; + curdest = &Dest[istart]; + } else { + /* No unaligned bytes - we are done */ + return (0); + } + } else { + /* Setup to process whole image */ + istart = 0; + cursrc1 = Src1; + curdest = Dest; + } + + /* C routine to process image */ + icursrc1=(unsigned int *)cursrc1; + icurdest=(unsigned int *)curdest; + for (i = istart; i < length; i += 4) { + if ((i+4) 8) { + return (-1); + } + + /* Special case: N==0 */ + if (N == 0) { + memcpy(Src1, Dest, length); + return (0); + } + + if ((SDL_imageFilterMMXdetect()) && (length > 7)) { + + SDL_imageFilterShiftLeftMMX(Src1, Dest, length, N); + + /* Check for unaligned bytes */ + if ((length & 7) > 0) { + /* Setup to process unaligned bytes */ + istart = length & 0xfffffff8; + cursrc1 = &Src1[istart]; + curdest = &Dest[istart]; + } else { + /* No unaligned bytes - we are done */ + return (0); + } + } else { + /* Setup to process whole image */ + istart = 0; + cursrc1 = Src1; + curdest = Dest; + } + + /* C routine to process image */ + for (i = istart; i < length; i++) { + result = (int) *cursrc1 << N; + if (result > 255) + result = 255; + *curdest = (unsigned char) result; + /* Advance pointers */ + cursrc1++; + curdest++; + } + + return (0); +} + +/*! +\brief MMX BinarizeUsingThreshold: D = (S >= T) ? 255:0 + +\param Src1 Pointer to the start of the source byte array (S). +\param Dest Pointer to the start of the destination byte array (D). +\param SrcLength The number of bytes in the source array. +\param T The threshold boundary (inclusive). + +\return Returns 0 for success or -1 for error. +*/ +static int SDL_imageFilterBinarizeUsingThresholdMMX(unsigned char *Src1, unsigned char *Dest, unsigned int SrcLength, unsigned char T) +{ +#ifdef USE_MMX +#if !defined(GCC__) + __asm + { + pusha + /* ** Duplicate T in 8 bytes of MM3 ** */ + pcmpeqb mm1, mm1 /* generate all 1's in mm1 */ + pcmpeqb mm2, mm2 /* generate all 1's in mm2 */ + mov al, T /* load T into AL */ + mov ah, al /* copy AL into AH */ + mov bx, ax /* copy AX into BX */ + shl eax, 16 /* shift 2 bytes of EAX left */ + mov ax, bx /* copy BX into AX */ + movd mm3, eax /* copy EAX into MM3 */ + movd mm4, eax /* copy EAX into MM4 */ + punpckldq mm3, mm4 /* fill higher bytes of MM3 with T */ + psubusb mm2, mm3 /* store 0xFF - T in MM2 */ + mov eax, Src1 /* load Src1 address into eax */ + mov edi, Dest /* load Dest address into edi */ + mov ecx, SrcLength /* load loop counter (SIZE) into ecx */ + shr ecx, 3 /* counter/8 (MMX loads 8 bytes at a time) */ + align 16 /* 16 byte alignment of the loop entry */ +L1029: + movq mm0, [eax] /* load 8 bytes from SrcDest into MM0 */ + paddusb mm0, mm2 /* MM0=SrcDest+(0xFF-T) (add 8 bytes with saturation) */ + pcmpeqb mm0, mm1 /* binarize 255:0, comparing to 255 */ + movq [edi], mm0 /* store result in SrcDest */ + add eax, 8 /* increase Src1 register pointer by 8 */ + add edi, 8 /* increase Dest register pointer by 8 */ + dec ecx /* decrease loop counter */ + jnz L1029 /* check loop termination, proceed if required */ + emms /* exit MMX state */ + popa + } +#else + /* i386 and x86_64 */ + __m64 *mSrc1 = (__m64*)Src1; + __m64 *mDest = (__m64*)Dest; + /* Duplicate T in 8 bytes of MM3 */ + __m64 mm1 = _m_pcmpeqb(mm1, mm1); /* generate all 1's in mm1 */ + __m64 mm2 = _m_pcmpeqb(mm2, mm2); /* generate all 1's in mm1 */ + int i; + memset(&i, T, 4); + __m64 mm3 = _m_from_int(i); + __m64 mm4 = _m_from_int(i); + mm3 = _m_punpckldq(mm3, mm4); /* fill higher bytes of MM3 with T */ + mm2 = _m_psubusb(mm2, mm3); /* store 0xFF - T in MM2 */ + //__m64 mm3 = _m_from_int64(lli); // x86_64 only + for (i = 0; i < SrcLength/8; i++) { + __m64 mm0 = _m_paddusb(*mSrc1, mm2); /* Src1+(0xFF-T) (add 8 bytes with saturation) */ + *mDest = _m_pcmpeqb(mm0, mm1); /* binarize 255:0, comparing to 255 */ + mSrc1++; + mDest++; + } + _m_empty(); /* clean MMX state */ +#endif + return (0); +#else + return (-1); +#endif +} + +/*! +\brief Filter using BinarizeUsingThreshold: D = (S >= T) ? 255:0 + +\param Src1 Pointer to the start of the source byte array (S). +\param Dest Pointer to the start of the destination byte array (D). +\param length The number of bytes in the source array. +\param T The threshold boundary (inclusive). + +\return Returns 0 for success or -1 for error. +*/ +int SDL_imageFilterBinarizeUsingThreshold(unsigned char *Src1, unsigned char *Dest, unsigned int length, unsigned char T) +{ + unsigned int i, istart; + unsigned char *cursrc1; + unsigned char *curdest; + + /* Validate input parameters */ + if ((Src1 == NULL) || (Dest == NULL)) + return(-1); + if (length == 0) + return(0); + + /* Special case: T==0 */ + if (T == 0) { + memset(Dest, 255, length); + return (0); + } + + if ((SDL_imageFilterMMXdetect()) && (length > 7)) { + + SDL_imageFilterBinarizeUsingThresholdMMX(Src1, Dest, length, T); + + /* Check for unaligned bytes */ + if ((length & 7) > 0) { + /* Setup to process unaligned bytes */ + istart = length & 0xfffffff8; + cursrc1 = &Src1[istart]; + curdest = &Dest[istart]; + } else { + /* No unaligned bytes - we are done */ + return (0); + } + } else { + /* Setup to process whole image */ + istart = 0; + cursrc1 = Src1; + curdest = Dest; + } + + /* C routine to process image */ + for (i = istart; i < length; i++) { + *curdest = (unsigned char)(((unsigned char)*cursrc1 >= T) ? 255 : 0); + /* Advance pointers */ + cursrc1++; + curdest++; + } + + return (0); +} + +/*! +\brief Internal MMX Filter using ClipToRange: D = (S >= Tmin) & (S <= Tmax) S:Tmin | Tmax + +\param Src1 Pointer to the start of the source byte array (S). +\param Dest Pointer to the start of the destination byte array (D). +\param SrcLength The number of bytes in the source array. +\param Tmin Lower (inclusive) boundary of the clipping range. +\param Tmax Upper (inclusive) boundary of the clipping range. + +\return Returns 0 for success or -1 for error. +*/ +static int SDL_imageFilterClipToRangeMMX(unsigned char *Src1, unsigned char *Dest, unsigned int SrcLength, unsigned char Tmin, + unsigned char Tmax) +{ +#ifdef USE_MMX +#if !defined(GCC__) + __asm + { + pusha + pcmpeqb mm1, mm1 /* generate all 1's in mm1 */ + /* ** Duplicate Tmax in 8 bytes of MM3 ** */ + mov al, Tmax /* load Tmax into AL */ + mov ah, al /* copy AL into AH */ + mov bx, ax /* copy AX into BX */ + shl eax, 16 /* shift 2 bytes of EAX left */ + mov ax, bx /* copy BX into AX */ + movd mm3, eax /* copy EAX into MM3 */ + movd mm4, eax /* copy EAX into MM4 */ + punpckldq mm3, mm4 /* fill higher bytes of MM3 with Tmax */ + psubusb mm1, mm3 /* store 0xFF - Tmax in MM1 */ + /* ** Duplicate Tmin in 8 bytes of MM5 ** */ + mov al, Tmin /* load Tmin into AL */ + mov ah, al /* copy AL into AH */ + mov bx, ax /* copy AX into BX */ + shl eax, 16 /* shift 2 bytes of EAX left */ + mov ax, bx /* copy BX into AX */ + movd mm5, eax /* copy EAX into MM5 */ + movd mm4, eax /* copy EAX into MM4 */ + punpckldq mm5, mm4 /* fill higher bytes of MM5 with Tmin */ + movq mm7, mm5 /* copy MM5 into MM7 */ + paddusb mm7, mm1 /* store 0xFF - Tmax + Tmin in MM7 */ + mov eax, Src1 /* load Src1 address into eax */ + mov edi, Dest /* load Dest address into edi */ + mov ecx, SrcLength /* load loop counter (SIZE) into ecx */ + shr ecx, 3 /* counter/8 (MMX loads 8 bytes at a time) */ + align 16 /* 16 byte alignment of the loop entry */ +L1030: + movq mm0, [eax] /* load 8 bytes from Src1 into MM0 */ + paddusb mm0, mm1 /* MM0=SrcDest+(0xFF-Tmax) */ + psubusb mm0, mm7 /* MM0=MM0-(0xFF-Tmax+Tmin) */ + paddusb mm0, mm5 /* MM0=MM0+Tmin */ + movq [edi], mm0 /* store result in Dest */ + add eax, 8 /* increase Src1 register pointer by 8 */ + add edi, 8 /* increase Dest register pointer by 8 */ + dec ecx /* decrease loop counter */ + jnz L1030 /* check loop termination, proceed if required */ + emms /* exit MMX state */ + popa + } +#else + /* i386 and x86_64 */ + __m64 *mSrc1 = (__m64*)Src1; + __m64 *mDest = (__m64*)Dest; + __m64 mm1 = _m_pcmpeqb(mm1, mm1); /* generate all 1's in mm1 */ + int i; + /* Duplicate Tmax in 8 bytes of MM3 */ + __m64 mm3, mm4; + memset(&i, Tmax, 4); + mm3 = _m_from_int(i); + mm4 = _m_from_int(i); + mm3 = _m_punpckldq(mm3, mm4); /* fill higher bytes of MM3 with Tmax */ + mm1 = _m_psubusb(mm1, mm3); /* store 0xFF - Tmax in MM1 */ + //__m64 mm3 = _m_from_int64(lli); // x86_64 only + /* Duplicate Tmax in 8 bytes of MM3 */ + __m64 mm5, mm7; + memset(&i, Tmin, 4); + mm5 = _m_from_int(i); + mm4 = _m_from_int(i); + mm5 = _m_punpckldq(mm5, mm4); /* fill higher bytes of MM5 with Tmin */ + mm7 = _m_paddusb(mm5, mm1); /* store 0xFF - Tmax + Tmin in MM7 */ + for (i = 0; i < SrcLength/8; i++) { + __m64 mm0; + mm0 = _m_paddusb(*mSrc1, mm1); /* MM0=Src1+(0xFF-Tmax) */ + mm0 = _m_psubusb(mm0, mm7); /* MM0=MM0-(0xFF-Tmax+Tmin) */ + *mDest = _m_paddusb(mm0, mm5); /* MM0+Tmin */ + mSrc1++; + mDest++; + } + _m_empty(); /* clean MMX state */ +#endif + return (0); +#else + return (-1); +#endif +} + +/*! +\brief Filter using ClipToRange: D = (S >= Tmin) & (S <= Tmax) S:Tmin | Tmax + +\param Src1 Pointer to the start of the source byte array (S). +\param Dest Pointer to the start of the destination byte array (D). +\param length The number of bytes in the source array. +\param Tmin Lower (inclusive) boundary of the clipping range. +\param Tmax Upper (inclusive) boundary of the clipping range. + +\return Returns 0 for success or -1 for error. +*/ +int SDL_imageFilterClipToRange(unsigned char *Src1, unsigned char *Dest, unsigned int length, unsigned char Tmin, + unsigned char Tmax) +{ + unsigned int i, istart; + unsigned char *cursrc1; + unsigned char *curdest; + + /* Validate input parameters */ + if ((Src1 == NULL) || (Dest == NULL)) + return(-1); + if (length == 0) + return(0); + + /* Special case: Tmin==0 && Tmax = 255 */ + if ((Tmin == 0) && (Tmax == 25)) { + memcpy(Src1, Dest, length); + return (0); + } + + if ((SDL_imageFilterMMXdetect()) && (length > 7)) { + + SDL_imageFilterClipToRangeMMX(Src1, Dest, length, Tmin, Tmax); + + /* Check for unaligned bytes */ + if ((length & 7) > 0) { + /* Setup to process unaligned bytes */ + istart = length & 0xfffffff8; + cursrc1 = &Src1[istart]; + curdest = &Dest[istart]; + } else { + /* No unaligned bytes - we are done */ + return (0); + } + } else { + /* Setup to process whole image */ + istart = 0; + cursrc1 = Src1; + curdest = Dest; + } + + /* C routine to process image */ + for (i = istart; i < length; i++) { + if (*cursrc1 < Tmin) { + *curdest = Tmin; + } else if (*cursrc1 > Tmax) { + *curdest = Tmax; + } else { + *curdest = *cursrc1; + } + /* Advance pointers */ + cursrc1++; + curdest++; + } + + return (0); +} + +/*! +\brief Internal MMX Filter using NormalizeLinear: D = saturation255((Nmax - Nmin)/(Cmax - Cmin)*(S - Cmin) + Nmin) + +\param Src1 Pointer to the start of the source byte array (S). +\param Dest Pointer to the start of the destination byte array (D). +\param SrcLength The number of bytes in the source array. +\param Cmin Normalization constant (Cmin). +\param Cmax Normalization constant (Cmax). +\param Nmin Normalization constant (Nmin). +\param Nmax Normalization constant (Nmax). + +\return Returns 0 for success or -1 for error. +*/ +static int SDL_imageFilterNormalizeLinearMMX(unsigned char *Src1, unsigned char *Dest, unsigned int SrcLength, int Cmin, int Cmax, + int Nmin, int Nmax) +{ +#ifdef USE_MMX +#if !defined(GCC__) + __asm + { + pusha + mov ax, WORD PTR Nmax /* load Nmax in AX */ + mov bx, WORD PTR Cmax /* load Cmax in BX */ + sub ax, WORD PTR Nmin /* AX = Nmax - Nmin */ + sub bx, WORD PTR Cmin /* BX = Cmax - Cmin */ + jz L10311 /* check division by zero */ + xor dx, dx /* prepare for division, zero DX */ + div bx /* AX = AX/BX */ + jmp L10312 +L10311: + mov ax, 255 /* if div by zero, assume result max byte value */ +L10312: /* ** Duplicate AX in 4 words of MM0 ** */ + mov bx, ax /* copy AX into BX */ + shl eax, 16 /* shift 2 bytes of EAX left */ + mov ax, bx /* copy BX into AX */ + movd mm0, eax /* copy EAX into MM0 */ + movd mm1, eax /* copy EAX into MM1 */ + punpckldq mm0, mm1 /* fill higher words of MM0 with AX */ + /* ** Duplicate Cmin in 4 words of MM1 ** */ + mov ax, WORD PTR Cmin /* load Cmin into AX */ + mov bx, ax /* copy AX into BX */ + shl eax, 16 /* shift 2 bytes of EAX left */ + mov ax, bx /* copy BX into AX */ + movd mm1, eax /* copy EAX into MM1 */ + movd mm2, eax /* copy EAX into MM2 */ + punpckldq mm1, mm2 /* fill higher words of MM1 with Cmin */ + /* ** Duplicate Nmin in 4 words of MM2 ** */ + mov ax, WORD PTR Nmin /* load Nmin into AX */ + mov bx, ax /* copy AX into BX */ + shl eax, 16 /* shift 2 bytes of EAX left */ + mov ax, bx /* copy BX into AX */ + movd mm2, eax /* copy EAX into MM2 */ + movd mm3, eax /* copy EAX into MM3 */ + punpckldq mm2, mm3 /* fill higher words of MM2 with Nmin */ + pxor mm7, mm7 /* zero MM7 register */ + mov eax, Src1 /* load Src1 address into eax */ + mov edi, Dest /* load Dest address into edi */ + mov ecx, SrcLength /* load loop counter (SIZE) into ecx */ + shr ecx, 3 /* counter/8 (MMX loads 8 bytes at a time) */ + align 16 /* 16 byte alignment of the loop entry */ +L1031: + movq mm3, [eax] /* load 8 bytes from Src1 into MM3 */ + movq mm4, mm3 /* copy MM3 into MM4 */ + punpcklbw mm3, mm7 /* unpack low bytes of SrcDest into words */ + punpckhbw mm4, mm7 /* unpack high bytes of SrcDest into words */ + psubusb mm3, mm1 /* S-Cmin, low bytes */ + psubusb mm4, mm1 /* S-Cmin, high bytes */ + pmullw mm3, mm0 /* MM0*(S-Cmin), low bytes */ + pmullw mm4, mm0 /* MM0*(S-Cmin), high bytes */ + paddusb mm3, mm2 /* MM0*(S-Cmin)+Nmin, low bytes */ + paddusb mm4, mm2 /* MM0*(S-Cmin)+Nmin, high bytes */ + /* ** Take abs value of the signed words ** */ + movq mm5, mm3 /* copy mm3 into mm5 */ + movq mm6, mm4 /* copy mm4 into mm6 */ + psraw mm5, 15 /* fill mm5 words with word sign bit */ + psraw mm6, 15 /* fill mm6 words with word sign bit */ + pxor mm3, mm5 /* take 1's compliment of only neg words */ + pxor mm4, mm6 /* take 1's compliment of only neg words */ + psubsw mm3, mm5 /* add 1 to only neg words, W-(-1) or W-0 */ + psubsw mm4, mm6 /* add 1 to only neg words, W-(-1) or W-0 */ + packuswb mm3, mm4 /* pack words back into bytes with saturation */ + movq [edi], mm3 /* store result in Dest */ + add eax, 8 /* increase Src1 register pointer by 8 */ + add edi, 8 /* increase Dest register pointer by 8 */ + dec ecx /* decrease loop counter */ + jnz L1031 /* check loop termination, proceed if required */ + emms /* exit MMX state */ + popa + } +#else + /* i386 and x86_64 */ + __m64 *mSrc1 = (__m64*)Src1; + __m64 *mDest = (__m64*)Dest; + __m64 mm0, mm1, mm2, mm3; + + int i; + /* Duplicate (Nmax-Nmin)/(Cmax-Cmin) in 4 words of MM0 */ + unsigned short a = Nmax - Nmin; + unsigned short b = Cmax - Cmin; + if (b == 0) { + a = 255; + } else { + a /= b; + } + i = (a<<16)|a; + mm0 = _m_from_int(i); + mm1 = _m_from_int(i); + mm0 = _m_punpckldq(mm0, mm1); /* fill higher words of MM0 with AX */ + /* Duplicate Cmin in 4 words of MM1 */ + i = (Cmin<<16)|(short)Cmin; + mm1 = _m_from_int(i); + mm2 = _m_from_int(i); + mm1 = _m_punpckldq(mm1, mm2); /* fill higher words of MM1 with Cmin */ + /* Duplicate Nmin in 4 words of MM2 */ + i = (Nmin<<16)|(short)Nmin; + mm2 = _m_from_int(i); + mm3 = _m_from_int(i); + mm2 = _m_punpckldq(mm2, mm3); /* fill higher words of MM2 with Nmin */ + __m64 mm7 = _m_from_int(0); /* zero mm0 register */ + for (i = 0; i < SrcLength/8; i++) { + __m64 mm3, mm4, mm5, mm6; + mm3 = _m_punpcklbw(*mSrc1, mm7); /* unpack low bytes of Src1 into words */ + mm4 = _m_punpckhbw(*mSrc1, mm7); /* unpack high bytes of Src1 into words */ + mm3 = _m_psubusb(mm3, mm1); /* S-Cmin, low bytes */ + mm4 = _m_psubusb(mm4, mm1); /* S-Cmin, high bytes */ + mm3 = _m_pmullw(mm3, mm0); /* MM0*(S-Cmin), low bytes */ + mm4 = _m_pmullw(mm4, mm0); /* MM0*(S-Cmin), high bytes */ + mm3 = _m_paddusb(mm3, mm2); /* MM0*(S-Cmin)+Nmin, low bytes */ + mm4 = _m_paddusb(mm4, mm2); /* MM0*(S-Cmin)+Nmin, high bytes */ + /* Take abs value of the signed words */ + mm5 = _m_psrawi(mm3, 15); /* fill mm5 words with word sign bit */ + mm6 = _m_psrawi(mm4, 15); /* fill mm6 words with word sign bit */ + mm3 = _m_pxor(mm3, mm5); /* take 1's compliment of only neg. words */ + mm4 = _m_pxor(mm4, mm6); /* take 1's compliment of only neg. words */ + mm3 = _m_psubsw(mm3, mm5); /* add 1 to only neg. words, W-(-1) or W-0 */ + mm4 = _m_psubsw(mm4, mm6); /* add 1 to only neg. words, W-(-1) or W-0 */ + *mDest = _m_packuswb(mm3, mm4); /* pack words back into bytes with saturation */ + mSrc1++; + mDest++; + } + _m_empty(); /* clean MMX state */ +#endif + return (0); +#else + return (-1); +#endif +} + +/*! +\brief Filter using NormalizeLinear: D = saturation255((Nmax - Nmin)/(Cmax - Cmin)*(S - Cmin) + Nmin) + +\param Src Pointer to the start of the source byte array (S). +\param Dest Pointer to the start of the destination byte array (D). +\param length The number of bytes in the source array. +\param Cmin Normalization constant. +\param Cmax Normalization constant. +\param Nmin Normalization constant. +\param Nmax Normalization constant. + +\return Returns 0 for success or -1 for error. +*/ +int SDL_imageFilterNormalizeLinear(unsigned char *Src, unsigned char *Dest, unsigned int length, int Cmin, int Cmax, int Nmin, + int Nmax) +{ + unsigned int i, istart; + unsigned char *cursrc; + unsigned char *curdest; + int dN, dC, factor; + int result; + + /* Validate input parameters */ + if ((Src == NULL) || (Dest == NULL)) + return(-1); + if (length == 0) + return(0); + + if ((SDL_imageFilterMMXdetect()) && (length > 7)) { + + SDL_imageFilterNormalizeLinearMMX(Src, Dest, length, Cmin, Cmax, Nmin, Nmax); + + /* Check for unaligned bytes */ + if ((length & 7) > 0) { + /* Setup to process unaligned bytes */ + istart = length & 0xfffffff8; + cursrc = &Src[istart]; + curdest = &Dest[istart]; + } else { + /* No unaligned bytes - we are done */ + return (0); + } + } else { + /* Setup to process whole image */ + istart = 0; + cursrc = Src; + curdest = Dest; + } + + /* C routine to process image */ + dC = Cmax - Cmin; + if (dC == 0) + return (0); + dN = Nmax - Nmin; + factor = dN / dC; + for (i = istart; i < length; i++) { + result = factor * ((int) (*cursrc) - Cmin) + Nmin; + if (result > 255) + result = 255; + *curdest = (unsigned char) result; + /* Advance pointers */ + cursrc++; + curdest++; + } + + return (0); +} + +/* ------------------------------------------------------------------------------------ */ + +/*! +\brief Filter using ConvolveKernel3x3Divide: Dij = saturation0and255( ... ) + +\param Src The source 2D byte array to convolve. Should be different from destination. +\param Dest The destination 2D byte array to store the result in. Should be different from source. +\param rows Number of rows in source/destination array. Must be >2. +\param columns Number of columns in source/destination array. Must be >2. +\param Kernel The 2D convolution kernel of size 3x3. +\param Divisor The divisor of the convolution sum. Must be >0. + +Note: Non-MMX implementation not available for this function. + +\return Returns 1 if filter was applied, 0 otherwise. +*/ +int SDL_imageFilterConvolveKernel3x3Divide(unsigned char *Src, unsigned char *Dest, int rows, int columns, + signed short *Kernel, unsigned char Divisor) +{ + /* Validate input parameters */ + if ((Src == NULL) || (Dest == NULL) || (Kernel == NULL)) + return(-1); + + if ((columns < 3) || (rows < 3) || (Divisor == 0)) + return (-1); + + if ((SDL_imageFilterMMXdetect())) { +//#ifdef USE_MMX +#if defined(USE_MMX) && defined(i386) +#if !defined(GCC__) + __asm + { + pusha + pxor mm0, mm0 /* zero MM0 */ + xor ebx, ebx /* zero EBX */ + mov bl, Divisor /* load Divisor into BL */ + mov edx, Kernel /* load Kernel address into EDX */ + movq mm5, [edx] /* MM5 = {0,K2,K1,K0} */ + add edx, 8 /* second row |K0 K1 K2 0| */ + movq mm6, [edx] /* MM6 = {0,K5,K4,K3} K = |K3 K4 K5 0| */ + add edx, 8 /* third row |K6 K7 K8 0| */ + movq mm7, [edx] /* MM7 = {0,K8,K7,K6} */ + /* ---, */ + mov eax, columns /* load columns into EAX */ + mov esi, Src /* ESI = Src row 0 address */ + mov edi, Dest /* load Dest address to EDI */ + add edi, eax /* EDI = EDI + columns */ + inc edi /* 1 byte offset from the left edge */ + mov edx, rows /* initialize ROWS counter */ + sub edx, 2 /* do not use first and last row */ + /* ---, */ +L10320: + mov ecx, eax /* initialize COLUMS counter */ + sub ecx, 2 /* do not use first and last column */ + align 16 /* 16 byte alignment of the loop entry */ +L10322: + /* ---, */ + movq mm1, [esi] /* load 8 bytes of the image first row */ + add esi, eax /* move one row below */ + movq mm2, [esi] /* load 8 bytes of the image second row */ + add esi, eax /* move one row below */ + movq mm3, [esi] /* load 8 bytes of the image third row */ + punpcklbw mm1, mm0 /* unpack first 4 bytes into words */ + punpcklbw mm2, mm0 /* unpack first 4 bytes into words */ + punpcklbw mm3, mm0 /* unpack first 4 bytes into words */ + pmullw mm1, mm5 /* multiply words first row image*Kernel */ + pmullw mm2, mm6 /* multiply words second row image*Kernel */ + pmullw mm3, mm7 /* multiply words third row image*Kernel */ + paddsw mm1, mm2 /* add 4 words of the first and second rows */ + paddsw mm1, mm3 /* add 4 words of the third row and result */ + movq mm2, mm1 /* copy MM1 into MM2 */ + psrlq mm1, 32 /* shift 2 left words to the right */ + paddsw mm1, mm2 /* add 2 left and 2 right result words */ + movq mm3, mm1 /* copy MM1 into MM3 */ + psrlq mm1, 16 /* shift 1 left word to the right */ + paddsw mm1, mm3 /* add 1 left and 1 right result words */ + /* --, */ + movd mm2, eax /* save EAX in MM2 */ + movd mm3, edx /* save EDX in MM3 */ + movd eax, mm1 /* copy MM1 into EAX */ + psraw mm1, 15 /* spread sign bit of the result */ + movd edx, mm1 /* fill EDX with a sign bit */ + idiv bx /* IDIV - VERY EXPENSIVE */ + movd mm1, eax /* move result of division into MM1 */ + packuswb mm1, mm0 /* pack division result with saturation */ + movd eax, mm1 /* copy saturated result into EAX */ + mov [edi], al /* copy a byte result into Dest */ + movd edx, mm3 /* restore saved EDX */ + movd eax, mm2 /* restore saved EAX */ + /* --, */ + sub esi, eax /* move two rows up */ + sub esi, eax /* */ + inc esi /* move Src pointer to the next pixel */ + inc edi /* move Dest pointer to the next pixel */ + /* ---, */ + dec ecx /* decrease loop counter COLUMNS */ + jnz L10322 /* check loop termination, proceed if required */ + add esi, 2 /* move to the next row in Src */ + add edi, 2 /* move to the next row in Dest */ + dec edx /* decrease loop counter ROWS */ + jnz L10320 /* check loop termination, proceed if required */ + /* ---, */ + emms /* exit MMX state */ + popa + } +#else + asm volatile + ("pusha \n\t" "pxor %%mm0, %%mm0 \n\t" /* zero MM0 */ + "xor %%ebx, %%ebx \n\t" /* zero EBX */ + "mov %5, %%bl \n\t" /* load Divisor into BL */ + "mov %4, %%edx \n\t" /* load Kernel address into EDX */ + "movq (%%edx), %%mm5 \n\t" /* MM5 = {0,K2,K1,K0} */ + "add $8, %%edx \n\t" /* second row |K0 K1 K2 0| */ + "movq (%%edx), %%mm6 \n\t" /* MM6 = {0,K5,K4,K3} K = |K3 K4 K5 0| */ + "add $8, %%edx \n\t" /* third row |K6 K7 K8 0| */ + "movq (%%edx), %%mm7 \n\t" /* MM7 = {0,K8,K7,K6} */ + /* --- */ + "mov %3, %%eax \n\t" /* load columns into EAX */ + "mov %1, %%esi \n\t" /* ESI = Src row 0 address */ + "mov %0, %%edi \n\t" /* load Dest address to EDI */ + "add %%eax, %%edi \n\t" /* EDI = EDI + columns */ + "inc %%edi \n\t" /* 1 byte offset from the left edge */ + "mov %2, %%edx \n\t" /* initialize ROWS counter */ + "sub $2, %%edx \n\t" /* do not use first and last row */ + /* --- */ + ".L10320: \n\t" "mov %%eax, %%ecx \n\t" /* initialize COLUMS counter */ + "sub $2, %%ecx \n\t" /* do not use first and last column */ + ".align 16 \n\t" /* 16 byte alignment of the loop entry */ + ".L10322: \n\t" + /* --- */ + "movq (%%esi), %%mm1 \n\t" /* load 8 bytes of the image first row */ + "add %%eax, %%esi \n\t" /* move one row below */ + "movq (%%esi), %%mm2 \n\t" /* load 8 bytes of the image second row */ + "add %%eax, %%esi \n\t" /* move one row below */ + "movq (%%esi), %%mm3 \n\t" /* load 8 bytes of the image third row */ + "punpcklbw %%mm0, %%mm1 \n\t" /* unpack first 4 bytes into words */ + "punpcklbw %%mm0, %%mm2 \n\t" /* unpack first 4 bytes into words */ + "punpcklbw %%mm0, %%mm3 \n\t" /* unpack first 4 bytes into words */ + "pmullw %%mm5, %%mm1 \n\t" /* multiply words first row image*Kernel */ + "pmullw %%mm6, %%mm2 \n\t" /* multiply words second row image*Kernel */ + "pmullw %%mm7, %%mm3 \n\t" /* multiply words third row image*Kernel */ + "paddsw %%mm2, %%mm1 \n\t" /* add 4 words of the first and second rows */ + "paddsw %%mm3, %%mm1 \n\t" /* add 4 words of the third row and result */ + "movq %%mm1, %%mm2 \n\t" /* copy MM1 into MM2 */ + "psrlq $32, %%mm1 \n\t" /* shift 2 left words to the right */ + "paddsw %%mm2, %%mm1 \n\t" /* add 2 left and 2 right result words */ + "movq %%mm1, %%mm3 \n\t" /* copy MM1 into MM3 */ + "psrlq $16, %%mm1 \n\t" /* shift 1 left word to the right */ + "paddsw %%mm3, %%mm1 \n\t" /* add 1 left and 1 right result words */ + /* -- */ + "movd %%eax, %%mm2 \n\t" /* save EAX in MM2 */ + "movd %%edx, %%mm3 \n\t" /* save EDX in MM3 */ + "movd %%mm1, %%eax \n\t" /* copy MM1 into EAX */ + "psraw $15, %%mm1 \n\t" /* spread sign bit of the result */ + "movd %%mm1, %%edx \n\t" /* fill EDX with a sign bit */ + "idivw %%bx \n\t" /* IDIV - VERY EXPENSIVE */ + "movd %%eax, %%mm1 \n\t" /* move result of division into MM1 */ + "packuswb %%mm0, %%mm1 \n\t" /* pack division result with saturation */ + "movd %%mm1, %%eax \n\t" /* copy saturated result into EAX */ + "mov %%al, (%%edi) \n\t" /* copy a byte result into Dest */ + "movd %%mm3, %%edx \n\t" /* restore saved EDX */ + "movd %%mm2, %%eax \n\t" /* restore saved EAX */ + /* -- */ + "sub %%eax, %%esi \n\t" /* move two rows up */ + "sub %%eax, %%esi \n\t" /* */ + "inc %%esi \n\t" /* move Src pointer to the next pixel */ + "inc %%edi \n\t" /* move Dest pointer to the next pixel */ + /* --- */ + "dec %%ecx \n\t" /* decrease loop counter COLUMNS */ + "jnz .L10322 \n\t" /* check loop termination, proceed if required */ + "add $2, %%esi \n\t" /* move to the next row in Src */ + "add $2, %%edi \n\t" /* move to the next row in Dest */ + "dec %%edx \n\t" /* decrease loop counter ROWS */ + "jnz .L10320 \n\t" /* check loop termination, proceed if required */ + /* --- */ + "emms \n\t" /* exit MMX state */ + "popa \n\t":"=m" (Dest) /* %0 */ + :"m"(Src), /* %1 */ + "m"(rows), /* %2 */ + "m"(columns), /* %3 */ + "m"(Kernel), /* %4 */ + "m"(Divisor) /* %5 */ + ); +#endif +#endif + return (0); + } else { + /* No non-MMX implementation yet */ + return (-1); + } +} + +/*! +\brief Filter using ConvolveKernel5x5Divide: Dij = saturation0and255( ... ) + +\param Src The source 2D byte array to convolve. Should be different from destination. +\param Dest The destination 2D byte array to store the result in. Should be different from source. +\param rows Number of rows in source/destination array. Must be >4. +\param columns Number of columns in source/destination array. Must be >4. +\param Kernel The 2D convolution kernel of size 5x5. +\param Divisor The divisor of the convolution sum. Must be >0. + +Note: Non-MMX implementation not available for this function. + +\return Returns 1 if filter was applied, 0 otherwise. +*/ +int SDL_imageFilterConvolveKernel5x5Divide(unsigned char *Src, unsigned char *Dest, int rows, int columns, + signed short *Kernel, unsigned char Divisor) +{ + /* Validate input parameters */ + if ((Src == NULL) || (Dest == NULL) || (Kernel == NULL)) + return(-1); + + if ((columns < 5) || (rows < 5) || (Divisor == 0)) + return (-1); + + if ((SDL_imageFilterMMXdetect())) { +//#ifdef USE_MMX +#if defined(USE_MMX) && defined(i386) +#if !defined(GCC__) + __asm + { + pusha + pxor mm0, mm0 /* zero MM0 */ + xor ebx, ebx /* zero EBX */ + mov bl, Divisor /* load Divisor into BL */ + movd mm5, ebx /* copy Divisor into MM5 */ + mov edx, Kernel /* load Kernel address into EDX */ + mov esi, Src /* load Src address to ESI */ + mov edi, Dest /* load Dest address to EDI */ + add edi, 2 /* 2 column offset from the left edge */ + mov eax, columns /* load columns into EAX */ + shl eax, 1 /* EAX = columns * 2 */ + add edi, eax /* 2 row offset from the top edge */ + shr eax, 1 /* EAX = columns */ + mov ebx, rows /* initialize ROWS counter */ + sub ebx, 4 /* do not use first 2 and last 2 rows */ + /* ---, */ +L10330: + mov ecx, eax /* initialize COLUMNS counter */ + sub ecx, 4 /* do not use first 2 and last 2 columns */ + align 16 /* 16 byte alignment of the loop entry */ +L10332: + pxor mm7, mm7 /* zero MM7 (accumulator) */ + movd mm6, esi /* save ESI in MM6 */ + /* --- 1 */ + movq mm1, [esi] /* load 8 bytes of the Src */ + movq mm2, mm1 /* copy MM1 into MM2 */ + add esi, eax /* move Src pointer 1 row below */ + movq mm3, [edx] /* load 4 words of Kernel */ + add edx, 8 /* move pointer to other 4 words */ + movq mm4, [edx] /* load 4 words of Kernel */ + add edx, 8 /* move pointer to other 4 words */ + punpcklbw mm1, mm0 /* unpack first 4 bytes into words */ + punpckhbw mm2, mm0 /* unpack second 4 bytes into words */ + pmullw mm1, mm3 /* mult 4 low words of Src and Kernel */ + pmullw mm2, mm4 /* mult 4 high words of Src and Kernel */ + paddsw mm1, mm2 /* add 4 words of the high and low bytes */ + paddsw mm7, mm1 /* add MM1 to accumulator MM7 */ + /* --- 2 */ + movq mm1, [esi] /* load 8 bytes of the Src */ + movq mm2, mm1 /* copy MM1 into MM2 */ + add esi, eax /* move Src pointer 1 row below */ + movq mm3, [edx] /* load 4 words of Kernel */ + add edx, 8 /* move pointer to other 4 words */ + movq mm4, [edx] /* load 4 words of Kernel */ + add edx, 8 /* move pointer to other 4 words */ + punpcklbw mm1, mm0 /* unpack first 4 bytes into words */ + punpckhbw mm2, mm0 /* unpack second 4 bytes into words */ + pmullw mm1, mm3 /* mult 4 low words of Src and Kernel */ + pmullw mm2, mm4 /* mult 4 high words of Src and Kernel */ + paddsw mm1, mm2 /* add 4 words of the high and low bytes */ + paddsw mm7, mm1 /* add MM1 to accumulator MM7 */ + /* --- 3 */ + movq mm1, [esi] /* load 8 bytes of the Src */ + movq mm2, mm1 /* copy MM1 into MM2 */ + add esi, eax /* move Src pointer 1 row below */ + movq mm3, [edx] /* load 4 words of Kernel */ + add edx, 8 /* move pointer to other 4 words */ + movq mm4, [edx] /* load 4 words of Kernel */ + add edx, 8 /* move pointer to other 4 words */ + punpcklbw mm1, mm0 /* unpack first 4 bytes into words */ + punpckhbw mm2, mm0 /* unpack second 4 bytes into words */ + pmullw mm1, mm3 /* mult 4 low words of Src and Kernel */ + pmullw mm2, mm4 /* mult 4 high words of Src and Kernel */ + paddsw mm1, mm2 /* add 4 words of the high and low bytes */ + paddsw mm7, mm1 /* add MM1 to accumulator MM7 */ + /* --- 4 */ + movq mm1, [esi] /* load 8 bytes of the Src */ + movq mm2, mm1 /* copy MM1 into MM2 */ + add esi, eax /* move Src pointer 1 row below */ + movq mm3, [edx] /* load 4 words of Kernel */ + add edx, 8 /* move pointer to other 4 words */ + movq mm4, [edx] /* load 4 words of Kernel */ + add edx, 8 /* move pointer to other 4 words */ + punpcklbw mm1, mm0 /* unpack first 4 bytes into words */ + punpckhbw mm2, mm0 /* unpack second 4 bytes into words */ + pmullw mm1, mm3 /* mult 4 low words of Src and Kernel */ + pmullw mm2, mm4 /* mult 4 high words of Src and Kernel */ + paddsw mm1, mm2 /* add 4 words of the high and low bytes */ + paddsw mm7, mm1 /* add MM1 to accumulator MM7 */ + /* --- 5 */ + movq mm1, [esi] /* load 8 bytes of the Src */ + movq mm2, mm1 /* copy MM1 into MM2 */ + movq mm3, [edx] /* load 4 words of Kernel */ + add edx, 8 /* move pointer to other 4 words */ + movq mm4, [edx] /* load 4 words of Kernel */ + punpcklbw mm1, mm0 /* unpack first 4 bytes into words */ + punpckhbw mm2, mm0 /* unpack second 4 bytes into words */ + pmullw mm1, mm3 /* mult 4 low words of Src and Kernel */ + pmullw mm2, mm4 /* mult 4 high words of Src and Kernel */ + paddsw mm1, mm2 /* add 4 words of the high and low bytes */ + paddsw mm7, mm1 /* add MM1 to accumulator MM7 */ + /* ---, */ + movq mm3, mm7 /* copy MM7 into MM3 */ + psrlq mm7, 32 /* shift 2 left words to the right */ + paddsw mm7, mm3 /* add 2 left and 2 right result words */ + movq mm2, mm7 /* copy MM7 into MM2 */ + psrlq mm7, 16 /* shift 1 left word to the right */ + paddsw mm7, mm2 /* add 1 left and 1 right result words */ + /* ---, */ + movd mm1, eax /* save EDX in MM1 */ + movd mm2, ebx /* save EDX in MM2 */ + movd mm3, edx /* save EDX in MM3 */ + movd eax, mm7 /* load summation result into EAX */ + psraw mm7, 15 /* spread sign bit of the result */ + movd ebx, mm5 /* load Divisor into EBX */ + movd edx, mm7 /* fill EDX with a sign bit */ + idiv bx /* IDIV - VERY EXPENSIVE */ + movd mm7, eax /* move result of division into MM7 */ + packuswb mm7, mm0 /* pack division result with saturation */ + movd eax, mm7 /* copy saturated result into EAX */ + mov [edi], al /* copy a byte result into Dest */ + movd edx, mm3 /* restore saved EDX */ + movd ebx, mm2 /* restore saved EBX */ + movd eax, mm1 /* restore saved EAX */ + /* --, */ + movd esi, mm6 /* move Src pointer to the top pixel */ + sub edx, 72 /* EDX = Kernel address */ + inc esi /* move Src pointer to the next pixel */ + inc edi /* move Dest pointer to the next pixel */ + /* ---, */ + dec ecx /* decrease loop counter COLUMNS */ + jnz L10332 /* check loop termination, proceed if required */ + add esi, 4 /* move to the next row in Src */ + add edi, 4 /* move to the next row in Dest */ + dec ebx /* decrease loop counter ROWS */ + jnz L10330 /* check loop termination, proceed if required */ + /* ---, */ + emms /* exit MMX state */ + popa + } +#else + asm volatile + ("pusha \n\t" "pxor %%mm0, %%mm0 \n\t" /* zero MM0 */ + "xor %%ebx, %%ebx \n\t" /* zero EBX */ + "mov %5, %%bl \n\t" /* load Divisor into BL */ + "movd %%ebx, %%mm5 \n\t" /* copy Divisor into MM5 */ + "mov %4, %%edx \n\t" /* load Kernel address into EDX */ + "mov %1, %%esi \n\t" /* load Src address to ESI */ + "mov %0, %%edi \n\t" /* load Dest address to EDI */ + "add $2, %%edi \n\t" /* 2 column offset from the left edge */ + "mov %3, %%eax \n\t" /* load columns into EAX */ + "shl $1, %%eax \n\t" /* EAX = columns * 2 */ + "add %%eax, %%edi \n\t" /* 2 row offset from the top edge */ + "shr $1, %%eax \n\t" /* EAX = columns */ + "mov %2, %%ebx \n\t" /* initialize ROWS counter */ + "sub $4, %%ebx \n\t" /* do not use first 2 and last 2 rows */ + /* --- */ + ".L10330: \n\t" "mov %%eax, %%ecx \n\t" /* initialize COLUMNS counter */ + "sub $4, %%ecx \n\t" /* do not use first 2 and last 2 columns */ + ".align 16 \n\t" /* 16 byte alignment of the loop entry */ + ".L10332: \n\t" "pxor %%mm7, %%mm7 \n\t" /* zero MM7 (accumulator) */ + "movd %%esi, %%mm6 \n\t" /* save ESI in MM6 */ + /* --- 1 */ + "movq (%%esi), %%mm1 \n\t" /* load 8 bytes of the Src */ + "movq %%mm1, %%mm2 \n\t" /* copy MM1 into MM2 */ + "add %%eax, %%esi \n\t" /* move Src pointer 1 row below */ + "movq (%%edx), %%mm3 \n\t" /* load 4 words of Kernel */ + "add $8, %%edx \n\t" /* move pointer to other 4 words */ + "movq (%%edx), %%mm4 \n\t" /* load 4 words of Kernel */ + "add $8, %%edx \n\t" /* move pointer to other 4 words */ + "punpcklbw %%mm0, %%mm1 \n\t" /* unpack first 4 bytes into words */ + "punpckhbw %%mm0, %%mm2 \n\t" /* unpack second 4 bytes into words */ + "pmullw %%mm3, %%mm1 \n\t" /* mult. 4 low words of Src and Kernel */ + "pmullw %%mm4, %%mm2 \n\t" /* mult. 4 high words of Src and Kernel */ + "paddsw %%mm2, %%mm1 \n\t" /* add 4 words of the high and low bytes */ + "paddsw %%mm1, %%mm7 \n\t" /* add MM1 to accumulator MM7 */ + /* --- 2 */ + "movq (%%esi), %%mm1 \n\t" /* load 8 bytes of the Src */ + "movq %%mm1, %%mm2 \n\t" /* copy MM1 into MM2 */ + "add %%eax, %%esi \n\t" /* move Src pointer 1 row below */ + "movq (%%edx), %%mm3 \n\t" /* load 4 words of Kernel */ + "add $8, %%edx \n\t" /* move pointer to other 4 words */ + "movq (%%edx), %%mm4 \n\t" /* load 4 words of Kernel */ + "add $8, %%edx \n\t" /* move pointer to other 4 words */ + "punpcklbw %%mm0, %%mm1 \n\t" /* unpack first 4 bytes into words */ + "punpckhbw %%mm0, %%mm2 \n\t" /* unpack second 4 bytes into words */ + "pmullw %%mm3, %%mm1 \n\t" /* mult. 4 low words of Src and Kernel */ + "pmullw %%mm4, %%mm2 \n\t" /* mult. 4 high words of Src and Kernel */ + "paddsw %%mm2, %%mm1 \n\t" /* add 4 words of the high and low bytes */ + "paddsw %%mm1, %%mm7 \n\t" /* add MM1 to accumulator MM7 */ + /* --- 3 */ + "movq (%%esi), %%mm1 \n\t" /* load 8 bytes of the Src */ + "movq %%mm1, %%mm2 \n\t" /* copy MM1 into MM2 */ + "add %%eax, %%esi \n\t" /* move Src pointer 1 row below */ + "movq (%%edx), %%mm3 \n\t" /* load 4 words of Kernel */ + "add $8, %%edx \n\t" /* move pointer to other 4 words */ + "movq (%%edx), %%mm4 \n\t" /* load 4 words of Kernel */ + "add $8, %%edx \n\t" /* move pointer to other 4 words */ + "punpcklbw %%mm0, %%mm1 \n\t" /* unpack first 4 bytes into words */ + "punpckhbw %%mm0, %%mm2 \n\t" /* unpack second 4 bytes into words */ + "pmullw %%mm3, %%mm1 \n\t" /* mult. 4 low words of Src and Kernel */ + "pmullw %%mm4, %%mm2 \n\t" /* mult. 4 high words of Src and Kernel */ + "paddsw %%mm2, %%mm1 \n\t" /* add 4 words of the high and low bytes */ + "paddsw %%mm1, %%mm7 \n\t" /* add MM1 to accumulator MM7 */ + /* --- 4 */ + "movq (%%esi), %%mm1 \n\t" /* load 8 bytes of the Src */ + "movq %%mm1, %%mm2 \n\t" /* copy MM1 into MM2 */ + "add %%eax, %%esi \n\t" /* move Src pointer 1 row below */ + "movq (%%edx), %%mm3 \n\t" /* load 4 words of Kernel */ + "add $8, %%edx \n\t" /* move pointer to other 4 words */ + "movq (%%edx), %%mm4 \n\t" /* load 4 words of Kernel */ + "add $8, %%edx \n\t" /* move pointer to other 4 words */ + "punpcklbw %%mm0, %%mm1 \n\t" /* unpack first 4 bytes into words */ + "punpckhbw %%mm0, %%mm2 \n\t" /* unpack second 4 bytes into words */ + "pmullw %%mm3, %%mm1 \n\t" /* mult. 4 low words of Src and Kernel */ + "pmullw %%mm4, %%mm2 \n\t" /* mult. 4 high words of Src and Kernel */ + "paddsw %%mm2, %%mm1 \n\t" /* add 4 words of the high and low bytes */ + "paddsw %%mm1, %%mm7 \n\t" /* add MM1 to accumulator MM7 */ + /* --- 5 */ + "movq (%%esi), %%mm1 \n\t" /* load 8 bytes of the Src */ + "movq %%mm1, %%mm2 \n\t" /* copy MM1 into MM2 */ + "movq (%%edx), %%mm3 \n\t" /* load 4 words of Kernel */ + "add $8, %%edx \n\t" /* move pointer to other 4 words */ + "movq (%%edx), %%mm4 \n\t" /* load 4 words of Kernel */ + "punpcklbw %%mm0, %%mm1 \n\t" /* unpack first 4 bytes into words */ + "punpckhbw %%mm0, %%mm2 \n\t" /* unpack second 4 bytes into words */ + "pmullw %%mm3, %%mm1 \n\t" /* mult. 4 low words of Src and Kernel */ + "pmullw %%mm4, %%mm2 \n\t" /* mult. 4 high words of Src and Kernel */ + "paddsw %%mm2, %%mm1 \n\t" /* add 4 words of the high and low bytes */ + "paddsw %%mm1, %%mm7 \n\t" /* add MM1 to accumulator MM7 */ + /* --- */ + "movq %%mm7, %%mm3 \n\t" /* copy MM7 into MM3 */ + "psrlq $32, %%mm7 \n\t" /* shift 2 left words to the right */ + "paddsw %%mm3, %%mm7 \n\t" /* add 2 left and 2 right result words */ + "movq %%mm7, %%mm2 \n\t" /* copy MM7 into MM2 */ + "psrlq $16, %%mm7 \n\t" /* shift 1 left word to the right */ + "paddsw %%mm2, %%mm7 \n\t" /* add 1 left and 1 right result words */ + /* --- */ + "movd %%eax, %%mm1 \n\t" /* save EDX in MM1 */ + "movd %%ebx, %%mm2 \n\t" /* save EDX in MM2 */ + "movd %%edx, %%mm3 \n\t" /* save EDX in MM3 */ + "movd %%mm7, %%eax \n\t" /* load summation result into EAX */ + "psraw $15, %%mm7 \n\t" /* spread sign bit of the result */ + "movd %%mm5, %%ebx \n\t" /* load Divisor into EBX */ + "movd %%mm7, %%edx \n\t" /* fill EDX with a sign bit */ + "idivw %%bx \n\t" /* IDIV - VERY EXPENSIVE */ + "movd %%eax, %%mm7 \n\t" /* move result of division into MM7 */ + "packuswb %%mm0, %%mm7 \n\t" /* pack division result with saturation */ + "movd %%mm7, %%eax \n\t" /* copy saturated result into EAX */ + "mov %%al, (%%edi) \n\t" /* copy a byte result into Dest */ + "movd %%mm3, %%edx \n\t" /* restore saved EDX */ + "movd %%mm2, %%ebx \n\t" /* restore saved EBX */ + "movd %%mm1, %%eax \n\t" /* restore saved EAX */ + /* -- */ + "movd %%mm6, %%esi \n\t" /* move Src pointer to the top pixel */ + "sub $72, %%edx \n\t" /* EDX = Kernel address */ + "inc %%esi \n\t" /* move Src pointer to the next pixel */ + "inc %%edi \n\t" /* move Dest pointer to the next pixel */ + /* --- */ + "dec %%ecx \n\t" /* decrease loop counter COLUMNS */ + "jnz .L10332 \n\t" /* check loop termination, proceed if required */ + "add $4, %%esi \n\t" /* move to the next row in Src */ + "add $4, %%edi \n\t" /* move to the next row in Dest */ + "dec %%ebx \n\t" /* decrease loop counter ROWS */ + "jnz .L10330 \n\t" /* check loop termination, proceed if required */ + /* --- */ + "emms \n\t" /* exit MMX state */ + "popa \n\t":"=m" (Dest) /* %0 */ + :"m"(Src), /* %1 */ + "m"(rows), /* %2 */ + "m"(columns), /* %3 */ + "m"(Kernel), /* %4 */ + "m"(Divisor) /* %5 */ + ); +#endif +#endif + return (0); + } else { + /* No non-MMX implementation yet */ + return (-1); + } +} + +/*! +\brief Filter using ConvolveKernel7x7Divide: Dij = saturation0and255( ... ) + +\param Src The source 2D byte array to convolve. Should be different from destination. +\param Dest The destination 2D byte array to store the result in. Should be different from source. +\param rows Number of rows in source/destination array. Must be >6. +\param columns Number of columns in source/destination array. Must be >6. +\param Kernel The 2D convolution kernel of size 7x7. +\param Divisor The divisor of the convolution sum. Must be >0. + +Note: Non-MMX implementation not available for this function. + +\return Returns 1 if filter was applied, 0 otherwise. +*/ +int SDL_imageFilterConvolveKernel7x7Divide(unsigned char *Src, unsigned char *Dest, int rows, int columns, + signed short *Kernel, unsigned char Divisor) +{ + /* Validate input parameters */ + if ((Src == NULL) || (Dest == NULL) || (Kernel == NULL)) + return(-1); + + if ((columns < 7) || (rows < 7) || (Divisor == 0)) + return (-1); + + if ((SDL_imageFilterMMXdetect())) { +//#ifdef USE_MMX +#if defined(USE_MMX) && defined(i386) +#if !defined(GCC__) + __asm + { + pusha + pxor mm0, mm0 /* zero MM0 */ + xor ebx, ebx /* zero EBX */ + mov bl, Divisor /* load Divisor into BL */ + movd mm5, ebx /* copy Divisor into MM5 */ + mov edx, Kernel /* load Kernel address into EDX */ + mov esi, Src /* load Src address to ESI */ + mov edi, Dest /* load Dest address to EDI */ + add edi, 3 /* 3 column offset from the left edge */ + mov eax, columns /* load columns into EAX */ + add edi, eax /* 3 row offset from the top edge */ + add edi, eax + add edi, eax + mov ebx, rows /* initialize ROWS counter */ + sub ebx, 6 /* do not use first 3 and last 3 rows */ + /* ---, */ +L10340: + mov ecx, eax /* initialize COLUMNS counter */ + sub ecx, 6 /* do not use first 3 and last 3 columns */ + align 16 /* 16 byte alignment of the loop entry */ +L10342: + pxor mm7, mm7 /* zero MM7 (accumulator) */ + movd mm6, esi /* save ESI in MM6 */ + /* --- 1 */ + movq mm1, [esi] /* load 8 bytes of the Src */ + movq mm2, mm1 /* copy MM1 into MM2 */ + add esi, eax /* move Src pointer 1 row below */ + movq mm3, [edx] /* load 4 words of Kernel */ + add edx, 8 /* move pointer to other 4 words */ + movq mm4, [edx] /* load 4 words of Kernel */ + add edx, 8 /* move pointer to other 4 words */ + punpcklbw mm1, mm0 /* unpack first 4 bytes into words */ + punpckhbw mm2, mm0 /* unpack second 4 bytes into words */ + pmullw mm1, mm3 /* mult 4 low words of Src and Kernel */ + pmullw mm2, mm4 /* mult 4 high words of Src and Kernel */ + paddsw mm1, mm2 /* add 4 words of the high and low bytes */ + paddsw mm7, mm1 /* add MM1 to accumulator MM7 */ + /* --- 2 */ + movq mm1, [esi] /* load 8 bytes of the Src */ + movq mm2, mm1 /* copy MM1 into MM2 */ + add esi, eax /* move Src pointer 1 row below */ + movq mm3, [edx] /* load 4 words of Kernel */ + add edx, 8 /* move pointer to other 4 words */ + movq mm4, [edx] /* load 4 words of Kernel */ + add edx, 8 /* move pointer to other 4 words */ + punpcklbw mm1, mm0 /* unpack first 4 bytes into words */ + punpckhbw mm2, mm0 /* unpack second 4 bytes into words */ + pmullw mm1, mm3 /* mult 4 low words of Src and Kernel */ + pmullw mm2, mm4 /* mult 4 high words of Src and Kernel */ + paddsw mm1, mm2 /* add 4 words of the high and low bytes */ + paddsw mm7, mm1 /* add MM1 to accumulator MM7 */ + /* --- 3 */ + movq mm1, [esi] /* load 8 bytes of the Src */ + movq mm2, mm1 /* copy MM1 into MM2 */ + add esi, eax /* move Src pointer 1 row below */ + movq mm3, [edx] /* load 4 words of Kernel */ + add edx, 8 /* move pointer to other 4 words */ + movq mm4, [edx] /* load 4 words of Kernel */ + add edx, 8 /* move pointer to other 4 words */ + punpcklbw mm1, mm0 /* unpack first 4 bytes into words */ + punpckhbw mm2, mm0 /* unpack second 4 bytes into words */ + pmullw mm1, mm3 /* mult 4 low words of Src and Kernel */ + pmullw mm2, mm4 /* mult 4 high words of Src and Kernel */ + paddsw mm1, mm2 /* add 4 words of the high and low bytes */ + paddsw mm7, mm1 /* add MM1 to accumulator MM7 */ + /* --- 4 */ + movq mm1, [esi] /* load 8 bytes of the Src */ + movq mm2, mm1 /* copy MM1 into MM2 */ + add esi, eax /* move Src pointer 1 row below */ + movq mm3, [edx] /* load 4 words of Kernel */ + add edx, 8 /* move pointer to other 4 words */ + movq mm4, [edx] /* load 4 words of Kernel */ + add edx, 8 /* move pointer to other 4 words */ + punpcklbw mm1, mm0 /* unpack first 4 bytes into words */ + punpckhbw mm2, mm0 /* unpack second 4 bytes into words */ + pmullw mm1, mm3 /* mult 4 low words of Src and Kernel */ + pmullw mm2, mm4 /* mult 4 high words of Src and Kernel */ + paddsw mm1, mm2 /* add 4 words of the high and low bytes */ + paddsw mm7, mm1 /* add MM1 to accumulator MM7 */ + /* --- 5 */ + movq mm1, [esi] /* load 8 bytes of the Src */ + movq mm2, mm1 /* copy MM1 into MM2 */ + add esi, eax /* move Src pointer 1 row below */ + movq mm3, [edx] /* load 4 words of Kernel */ + add edx, 8 /* move pointer to other 4 words */ + movq mm4, [edx] /* load 4 words of Kernel */ + add edx, 8 /* move pointer to other 4 words */ + punpcklbw mm1, mm0 /* unpack first 4 bytes into words */ + punpckhbw mm2, mm0 /* unpack second 4 bytes into words */ + pmullw mm1, mm3 /* mult 4 low words of Src and Kernel */ + pmullw mm2, mm4 /* mult 4 high words of Src and Kernel */ + paddsw mm1, mm2 /* add 4 words of the high and low bytes */ + paddsw mm7, mm1 /* add MM1 to accumulator MM7 */ + /* --- 6 */ + movq mm1, [esi] /* load 8 bytes of the Src */ + movq mm2, mm1 /* copy MM1 into MM2 */ + add esi, eax /* move Src pointer 1 row below */ + movq mm3, [edx] /* load 4 words of Kernel */ + add edx, 8 /* move pointer to other 4 words */ + movq mm4, [edx] /* load 4 words of Kernel */ + add edx, 8 /* move pointer to other 4 words */ + punpcklbw mm1, mm0 /* unpack first 4 bytes into words */ + punpckhbw mm2, mm0 /* unpack second 4 bytes into words */ + pmullw mm1, mm3 /* mult 4 low words of Src and Kernel */ + pmullw mm2, mm4 /* mult 4 high words of Src and Kernel */ + paddsw mm1, mm2 /* add 4 words of the high and low bytes */ + paddsw mm7, mm1 /* add MM1 to accumulator MM7 */ + /* --- 7 */ + movq mm1, [esi] /* load 8 bytes of the Src */ + movq mm2, mm1 /* copy MM1 into MM2 */ + movq mm3, [edx] /* load 4 words of Kernel */ + add edx, 8 /* move pointer to other 4 words */ + movq mm4, [edx] /* load 4 words of Kernel */ + punpcklbw mm1, mm0 /* unpack first 4 bytes into words */ + punpckhbw mm2, mm0 /* unpack second 4 bytes into words */ + pmullw mm1, mm3 /* mult 4 low words of Src and Kernel */ + pmullw mm2, mm4 /* mult 4 high words of Src and Kernel */ + paddsw mm1, mm2 /* add 4 words of the high and low bytes */ + paddsw mm7, mm1 /* add MM1 to accumulator MM7 */ + /* ---, */ + movq mm3, mm7 /* copy MM7 into MM3 */ + psrlq mm7, 32 /* shift 2 left words to the right */ + paddsw mm7, mm3 /* add 2 left and 2 right result words */ + movq mm2, mm7 /* copy MM7 into MM2 */ + psrlq mm7, 16 /* shift 1 left word to the right */ + paddsw mm7, mm2 /* add 1 left and 1 right result words */ + /* ---, */ + movd mm1, eax /* save EDX in MM1 */ + movd mm2, ebx /* save EDX in MM2 */ + movd mm3, edx /* save EDX in MM3 */ + movd eax, mm7 /* load summation result into EAX */ + psraw mm7, 15 /* spread sign bit of the result */ + movd ebx, mm5 /* load Divisor into EBX */ + movd edx, mm7 /* fill EDX with a sign bit */ + idiv bx /* IDIV - VERY EXPENSIVE */ + movd mm7, eax /* move result of division into MM7 */ + packuswb mm7, mm0 /* pack division result with saturation */ + movd eax, mm7 /* copy saturated result into EAX */ + mov [edi], al /* copy a byte result into Dest */ + movd edx, mm3 /* restore saved EDX */ + movd ebx, mm2 /* restore saved EBX */ + movd eax, mm1 /* restore saved EAX */ + /* --, */ + movd esi, mm6 /* move Src pointer to the top pixel */ + sub edx, 104 /* EDX = Kernel address */ + inc esi /* move Src pointer to the next pixel */ + inc edi /* move Dest pointer to the next pixel */ + /* ---, */ + dec ecx /* decrease loop counter COLUMNS */ + jnz L10342 /* check loop termination, proceed if required */ + add esi, 6 /* move to the next row in Src */ + add edi, 6 /* move to the next row in Dest */ + dec ebx /* decrease loop counter ROWS */ + jnz L10340 /* check loop termination, proceed if required */ + /* ---, */ + emms /* exit MMX state */ + popa + } +#else + asm volatile + ("pusha \n\t" "pxor %%mm0, %%mm0 \n\t" /* zero MM0 */ + "xor %%ebx, %%ebx \n\t" /* zero EBX */ + "mov %5, %%bl \n\t" /* load Divisor into BL */ + "movd %%ebx, %%mm5 \n\t" /* copy Divisor into MM5 */ + "mov %4, %%edx \n\t" /* load Kernel address into EDX */ + "mov %1, %%esi \n\t" /* load Src address to ESI */ + "mov %0, %%edi \n\t" /* load Dest address to EDI */ + "add $3, %%edi \n\t" /* 3 column offset from the left edge */ + "mov %3, %%eax \n\t" /* load columns into EAX */ + "add %%eax, %%edi \n\t" /* 3 row offset from the top edge */ + "add %%eax, %%edi \n\t" "add %%eax, %%edi \n\t" "mov %2, %%ebx \n\t" /* initialize ROWS counter */ + "sub $6, %%ebx \n\t" /* do not use first 3 and last 3 rows */ + /* --- */ + ".L10340: \n\t" "mov %%eax, %%ecx \n\t" /* initialize COLUMNS counter */ + "sub $6, %%ecx \n\t" /* do not use first 3 and last 3 columns */ + ".align 16 \n\t" /* 16 byte alignment of the loop entry */ + ".L10342: \n\t" "pxor %%mm7, %%mm7 \n\t" /* zero MM7 (accumulator) */ + "movd %%esi, %%mm6 \n\t" /* save ESI in MM6 */ + /* --- 1 */ + "movq (%%esi), %%mm1 \n\t" /* load 8 bytes of the Src */ + "movq %%mm1, %%mm2 \n\t" /* copy MM1 into MM2 */ + "add %%eax, %%esi \n\t" /* move Src pointer 1 row below */ + "movq (%%edx), %%mm3 \n\t" /* load 4 words of Kernel */ + "add $8, %%edx \n\t" /* move pointer to other 4 words */ + "movq (%%edx), %%mm4 \n\t" /* load 4 words of Kernel */ + "add $8, %%edx \n\t" /* move pointer to other 4 words */ + "punpcklbw %%mm0, %%mm1 \n\t" /* unpack first 4 bytes into words */ + "punpckhbw %%mm0, %%mm2 \n\t" /* unpack second 4 bytes into words */ + "pmullw %%mm3, %%mm1 \n\t" /* mult. 4 low words of Src and Kernel */ + "pmullw %%mm4, %%mm2 \n\t" /* mult. 4 high words of Src and Kernel */ + "paddsw %%mm2, %%mm1 \n\t" /* add 4 words of the high and low bytes */ + "paddsw %%mm1, %%mm7 \n\t" /* add MM1 to accumulator MM7 */ + /* --- 2 */ + "movq (%%esi), %%mm1 \n\t" /* load 8 bytes of the Src */ + "movq %%mm1, %%mm2 \n\t" /* copy MM1 into MM2 */ + "add %%eax, %%esi \n\t" /* move Src pointer 1 row below */ + "movq (%%edx), %%mm3 \n\t" /* load 4 words of Kernel */ + "add $8, %%edx \n\t" /* move pointer to other 4 words */ + "movq (%%edx), %%mm4 \n\t" /* load 4 words of Kernel */ + "add $8, %%edx \n\t" /* move pointer to other 4 words */ + "punpcklbw %%mm0, %%mm1 \n\t" /* unpack first 4 bytes into words */ + "punpckhbw %%mm0, %%mm2 \n\t" /* unpack second 4 bytes into words */ + "pmullw %%mm3, %%mm1 \n\t" /* mult. 4 low words of Src and Kernel */ + "pmullw %%mm4, %%mm2 \n\t" /* mult. 4 high words of Src and Kernel */ + "paddsw %%mm2, %%mm1 \n\t" /* add 4 words of the high and low bytes */ + "paddsw %%mm1, %%mm7 \n\t" /* add MM1 to accumulator MM7 */ + /* --- 3 */ + "movq (%%esi), %%mm1 \n\t" /* load 8 bytes of the Src */ + "movq %%mm1, %%mm2 \n\t" /* copy MM1 into MM2 */ + "add %%eax, %%esi \n\t" /* move Src pointer 1 row below */ + "movq (%%edx), %%mm3 \n\t" /* load 4 words of Kernel */ + "add $8, %%edx \n\t" /* move pointer to other 4 words */ + "movq (%%edx), %%mm4 \n\t" /* load 4 words of Kernel */ + "add $8, %%edx \n\t" /* move pointer to other 4 words */ + "punpcklbw %%mm0, %%mm1 \n\t" /* unpack first 4 bytes into words */ + "punpckhbw %%mm0, %%mm2 \n\t" /* unpack second 4 bytes into words */ + "pmullw %%mm3, %%mm1 \n\t" /* mult. 4 low words of Src and Kernel */ + "pmullw %%mm4, %%mm2 \n\t" /* mult. 4 high words of Src and Kernel */ + "paddsw %%mm2, %%mm1 \n\t" /* add 4 words of the high and low bytes */ + "paddsw %%mm1, %%mm7 \n\t" /* add MM1 to accumulator MM7 */ + /* --- 4 */ + "movq (%%esi), %%mm1 \n\t" /* load 8 bytes of the Src */ + "movq %%mm1, %%mm2 \n\t" /* copy MM1 into MM2 */ + "add %%eax, %%esi \n\t" /* move Src pointer 1 row below */ + "movq (%%edx), %%mm3 \n\t" /* load 4 words of Kernel */ + "add $8, %%edx \n\t" /* move pointer to other 4 words */ + "movq (%%edx), %%mm4 \n\t" /* load 4 words of Kernel */ + "add $8, %%edx \n\t" /* move pointer to other 4 words */ + "punpcklbw %%mm0, %%mm1 \n\t" /* unpack first 4 bytes into words */ + "punpckhbw %%mm0, %%mm2 \n\t" /* unpack second 4 bytes into words */ + "pmullw %%mm3, %%mm1 \n\t" /* mult. 4 low words of Src and Kernel */ + "pmullw %%mm4, %%mm2 \n\t" /* mult. 4 high words of Src and Kernel */ + "paddsw %%mm2, %%mm1 \n\t" /* add 4 words of the high and low bytes */ + "paddsw %%mm1, %%mm7 \n\t" /* add MM1 to accumulator MM7 */ + /* --- 5 */ + "movq (%%esi), %%mm1 \n\t" /* load 8 bytes of the Src */ + "movq %%mm1, %%mm2 \n\t" /* copy MM1 into MM2 */ + "add %%eax, %%esi \n\t" /* move Src pointer 1 row below */ + "movq (%%edx), %%mm3 \n\t" /* load 4 words of Kernel */ + "add $8, %%edx \n\t" /* move pointer to other 4 words */ + "movq (%%edx), %%mm4 \n\t" /* load 4 words of Kernel */ + "add $8, %%edx \n\t" /* move pointer to other 4 words */ + "punpcklbw %%mm0, %%mm1 \n\t" /* unpack first 4 bytes into words */ + "punpckhbw %%mm0, %%mm2 \n\t" /* unpack second 4 bytes into words */ + "pmullw %%mm3, %%mm1 \n\t" /* mult. 4 low words of Src and Kernel */ + "pmullw %%mm4, %%mm2 \n\t" /* mult. 4 high words of Src and Kernel */ + "paddsw %%mm2, %%mm1 \n\t" /* add 4 words of the high and low bytes */ + "paddsw %%mm1, %%mm7 \n\t" /* add MM1 to accumulator MM7 */ + /* --- 6 */ + "movq (%%esi), %%mm1 \n\t" /* load 8 bytes of the Src */ + "movq %%mm1, %%mm2 \n\t" /* copy MM1 into MM2 */ + "add %%eax, %%esi \n\t" /* move Src pointer 1 row below */ + "movq (%%edx), %%mm3 \n\t" /* load 4 words of Kernel */ + "add $8, %%edx \n\t" /* move pointer to other 4 words */ + "movq (%%edx), %%mm4 \n\t" /* load 4 words of Kernel */ + "add $8, %%edx \n\t" /* move pointer to other 4 words */ + "punpcklbw %%mm0, %%mm1 \n\t" /* unpack first 4 bytes into words */ + "punpckhbw %%mm0, %%mm2 \n\t" /* unpack second 4 bytes into words */ + "pmullw %%mm3, %%mm1 \n\t" /* mult. 4 low words of Src and Kernel */ + "pmullw %%mm4, %%mm2 \n\t" /* mult. 4 high words of Src and Kernel */ + "paddsw %%mm2, %%mm1 \n\t" /* add 4 words of the high and low bytes */ + "paddsw %%mm1, %%mm7 \n\t" /* add MM1 to accumulator MM7 */ + /* --- 7 */ + "movq (%%esi), %%mm1 \n\t" /* load 8 bytes of the Src */ + "movq %%mm1, %%mm2 \n\t" /* copy MM1 into MM2 */ + "movq (%%edx), %%mm3 \n\t" /* load 4 words of Kernel */ + "add $8, %%edx \n\t" /* move pointer to other 4 words */ + "movq (%%edx), %%mm4 \n\t" /* load 4 words of Kernel */ + "punpcklbw %%mm0, %%mm1 \n\t" /* unpack first 4 bytes into words */ + "punpckhbw %%mm0, %%mm2 \n\t" /* unpack second 4 bytes into words */ + "pmullw %%mm3, %%mm1 \n\t" /* mult. 4 low words of Src and Kernel */ + "pmullw %%mm4, %%mm2 \n\t" /* mult. 4 high words of Src and Kernel */ + "paddsw %%mm2, %%mm1 \n\t" /* add 4 words of the high and low bytes */ + "paddsw %%mm1, %%mm7 \n\t" /* add MM1 to accumulator MM7 */ + /* --- */ + "movq %%mm7, %%mm3 \n\t" /* copy MM7 into MM3 */ + "psrlq $32, %%mm7 \n\t" /* shift 2 left words to the right */ + "paddsw %%mm3, %%mm7 \n\t" /* add 2 left and 2 right result words */ + "movq %%mm7, %%mm2 \n\t" /* copy MM7 into MM2 */ + "psrlq $16, %%mm7 \n\t" /* shift 1 left word to the right */ + "paddsw %%mm2, %%mm7 \n\t" /* add 1 left and 1 right result words */ + /* --- */ + "movd %%eax, %%mm1 \n\t" /* save EDX in MM1 */ + "movd %%ebx, %%mm2 \n\t" /* save EDX in MM2 */ + "movd %%edx, %%mm3 \n\t" /* save EDX in MM3 */ + "movd %%mm7, %%eax \n\t" /* load summation result into EAX */ + "psraw $15, %%mm7 \n\t" /* spread sign bit of the result */ + "movd %%mm5, %%ebx \n\t" /* load Divisor into EBX */ + "movd %%mm7, %%edx \n\t" /* fill EDX with a sign bit */ + "idivw %%bx \n\t" /* IDIV - VERY EXPENSIVE */ + "movd %%eax, %%mm7 \n\t" /* move result of division into MM7 */ + "packuswb %%mm0, %%mm7 \n\t" /* pack division result with saturation */ + "movd %%mm7, %%eax \n\t" /* copy saturated result into EAX */ + "mov %%al, (%%edi) \n\t" /* copy a byte result into Dest */ + "movd %%mm3, %%edx \n\t" /* restore saved EDX */ + "movd %%mm2, %%ebx \n\t" /* restore saved EBX */ + "movd %%mm1, %%eax \n\t" /* restore saved EAX */ + /* -- */ + "movd %%mm6, %%esi \n\t" /* move Src pointer to the top pixel */ + "sub $104, %%edx \n\t" /* EDX = Kernel address */ + "inc %%esi \n\t" /* move Src pointer to the next pixel */ + "inc %%edi \n\t" /* move Dest pointer to the next pixel */ + /* --- */ + "dec %%ecx \n\t" /* decrease loop counter COLUMNS */ + "jnz .L10342 \n\t" /* check loop termination, proceed if required */ + "add $6, %%esi \n\t" /* move to the next row in Src */ + "add $6, %%edi \n\t" /* move to the next row in Dest */ + "dec %%ebx \n\t" /* decrease loop counter ROWS */ + "jnz .L10340 \n\t" /* check loop termination, proceed if required */ + /* --- */ + "emms \n\t" /* exit MMX state */ + "popa \n\t":"=m" (Dest) /* %0 */ + :"m"(Src), /* %1 */ + "m"(rows), /* %2 */ + "m"(columns), /* %3 */ + "m"(Kernel), /* %4 */ + "m"(Divisor) /* %5 */ + ); +#endif +#endif + return (0); + } else { + /* No non-MMX implementation yet */ + return (-1); + } +} + +/*! +\brief Filter using ConvolveKernel9x9Divide: Dij = saturation0and255( ... ) + +\param Src The source 2D byte array to convolve. Should be different from destination. +\param Dest The destination 2D byte array to store the result in. Should be different from source. +\param rows Number of rows in source/destination array. Must be >8. +\param columns Number of columns in source/destination array. Must be >8. +\param Kernel The 2D convolution kernel of size 9x9. +\param Divisor The divisor of the convolution sum. Must be >0. + +Note: Non-MMX implementation not available for this function. + +\return Returns 1 if filter was applied, 0 otherwise. +*/ +int SDL_imageFilterConvolveKernel9x9Divide(unsigned char *Src, unsigned char *Dest, int rows, int columns, + signed short *Kernel, unsigned char Divisor) +{ + /* Validate input parameters */ + if ((Src == NULL) || (Dest == NULL) || (Kernel == NULL)) + return(-1); + + if ((columns < 9) || (rows < 9) || (Divisor == 0)) + return (-1); + + if ((SDL_imageFilterMMXdetect())) { +//#ifdef USE_MMX +#if defined(USE_MMX) && defined(i386) +#if !defined(GCC__) + __asm + { + pusha + pxor mm0, mm0 /* zero MM0 */ + xor ebx, ebx /* zero EBX */ + mov bl, Divisor /* load Divisor into BL */ + movd mm5, ebx /* copy Divisor into MM5 */ + mov edx, Kernel /* load Kernel address into EDX */ + mov esi, Src /* load Src address to ESI */ + mov edi, Dest /* load Dest address to EDI */ + add edi, 4 /* 4 column offset from the left edge */ + mov eax, columns /* load columns into EAX */ + add edi, eax /* 4 row offset from the top edge */ + add edi, eax + add edi, eax + add edi, eax + mov ebx, rows /* initialize ROWS counter */ + sub ebx, 8 /* do not use first 4 and last 4 rows */ + /* ---, */ +L10350: + mov ecx, eax /* initialize COLUMNS counter */ + sub ecx, 8 /* do not use first 4 and last 4 columns */ + align 16 /* 16 byte alignment of the loop entry */ +L10352: + pxor mm7, mm7 /* zero MM7 (accumulator) */ + movd mm6, esi /* save ESI in MM6 */ + /* --- 1 */ + movq mm1, [esi] /* load 8 bytes of the Src */ + movq mm2, mm1 /* copy MM1 into MM2 */ + inc esi /* move pointer to the next 8 bytes of Src */ + movq mm3, [edx] /* load 4 words of Kernel */ + add edx, 8 /* move pointer to other 4 words */ + movq mm4, [edx] /* load 4 words of Kernel */ + add edx, 8 /* move pointer to other 4 words */ + punpcklbw mm1, mm0 /* unpack first 4 bytes into words */ + punpckhbw mm2, mm0 /* unpack second 4 bytes into words */ + pmullw mm1, mm3 /* mult. 4 low words of Src and Kernel */ + pmullw mm2, mm4 /* mult. 4 high words of Src and Kernel */ + paddsw mm1, mm2 /* add 4 words of the high and low bytes */ + paddsw mm7, mm1 /* add MM1 to accumulator MM7 */ + movq mm1, [esi] /* load 8 bytes of the Src */ + dec esi + add esi, eax /* move Src pointer 1 row below */ + movq mm3, [edx] /* load 4 words of Kernel */ + add edx, 8 /* move pointer to other 4 words */ + punpcklbw mm1, mm0 /* unpack first 4 bytes into words */ + pmullw mm1, mm3 /* mult. 4 low words of Src and Kernel */ + paddsw mm7, mm1 /* add MM1 to accumulator MM7 */ + /* --- 2 */ + movq mm1, [esi] /* load 8 bytes of the Src */ + movq mm2, mm1 /* copy MM1 into MM2 */ + inc esi /* move pointer to the next 8 bytes of Src */ + movq mm3, [edx] /* load 4 words of Kernel */ + add edx, 8 /* move pointer to other 4 words */ + movq mm4, [edx] /* load 4 words of Kernel */ + add edx, 8 /* move pointer to other 4 words */ + punpcklbw mm1, mm0 /* unpack first 4 bytes into words */ + punpckhbw mm2, mm0 /* unpack second 4 bytes into words */ + pmullw mm1, mm3 /* mult. 4 low words of Src and Kernel */ + pmullw mm2, mm4 /* mult. 4 high words of Src and Kernel */ + paddsw mm1, mm2 /* add 4 words of the high and low bytes */ + paddsw mm7, mm1 /* add MM1 to accumulator MM7 */ + movq mm1, [esi] /* load 8 bytes of the Src */ + dec esi + add esi, eax /* move Src pointer 1 row below */ + movq mm3, [edx] /* load 4 words of Kernel */ + add edx, 8 /* move pointer to other 4 words */ + punpcklbw mm1, mm0 /* unpack first 4 bytes into words */ + pmullw mm1, mm3 /* mult. 4 low words of Src and Kernel */ + paddsw mm7, mm1 /* add MM1 to accumulator MM7 */ + /* --- 3 */ + movq mm1, [esi] /* load 8 bytes of the Src */ + movq mm2, mm1 /* copy MM1 into MM2 */ + inc esi /* move pointer to the next 8 bytes of Src */ + movq mm3, [edx] /* load 4 words of Kernel */ + add edx, 8 /* move pointer to other 4 words */ + movq mm4, [edx] /* load 4 words of Kernel */ + add edx, 8 /* move pointer to other 4 words */ + punpcklbw mm1, mm0 /* unpack first 4 bytes into words */ + punpckhbw mm2, mm0 /* unpack second 4 bytes into words */ + pmullw mm1, mm3 /* mult. 4 low words of Src and Kernel */ + pmullw mm2, mm4 /* mult. 4 high words of Src and Kernel */ + paddsw mm1, mm2 /* add 4 words of the high and low bytes */ + paddsw mm7, mm1 /* add MM1 to accumulator MM7 */ + movq mm1, [esi] /* load 8 bytes of the Src */ + dec esi + add esi, eax /* move Src pointer 1 row below */ + movq mm3, [edx] /* load 4 words of Kernel */ + add edx, 8 /* move pointer to other 4 words */ + punpcklbw mm1, mm0 /* unpack first 4 bytes into words */ + pmullw mm1, mm3 /* mult. 4 low words of Src and Kernel */ + paddsw mm7, mm1 /* add MM1 to accumulator MM7 */ + /* --- 4 */ + movq mm1, [esi] /* load 8 bytes of the Src */ + movq mm2, mm1 /* copy MM1 into MM2 */ + inc esi /* move pointer to the next 8 bytes of Src */ + movq mm3, [edx] /* load 4 words of Kernel */ + add edx, 8 /* move pointer to other 4 words */ + movq mm4, [edx] /* load 4 words of Kernel */ + add edx, 8 /* move pointer to other 4 words */ + punpcklbw mm1, mm0 /* unpack first 4 bytes into words */ + punpckhbw mm2, mm0 /* unpack second 4 bytes into words */ + pmullw mm1, mm3 /* mult. 4 low words of Src and Kernel */ + pmullw mm2, mm4 /* mult. 4 high words of Src and Kernel */ + paddsw mm1, mm2 /* add 4 words of the high and low bytes */ + paddsw mm7, mm1 /* add MM1 to accumulator MM7 */ + movq mm1, [esi] /* load 8 bytes of the Src */ + dec esi + add esi, eax /* move Src pointer 1 row below */ + movq mm3, [edx] /* load 4 words of Kernel */ + add edx, 8 /* move pointer to other 4 words */ + punpcklbw mm1, mm0 /* unpack first 4 bytes into words */ + pmullw mm1, mm3 /* mult. 4 low words of Src and Kernel */ + paddsw mm7, mm1 /* add MM1 to accumulator MM7 */ + /* --- 5 */ + movq mm1, [esi] /* load 8 bytes of the Src */ + movq mm2, mm1 /* copy MM1 into MM2 */ + inc esi /* move pointer to the next 8 bytes of Src */ + movq mm3, [edx] /* load 4 words of Kernel */ + add edx, 8 /* move pointer to other 4 words */ + movq mm4, [edx] /* load 4 words of Kernel */ + add edx, 8 /* move pointer to other 4 words */ + punpcklbw mm1, mm0 /* unpack first 4 bytes into words */ + punpckhbw mm2, mm0 /* unpack second 4 bytes into words */ + pmullw mm1, mm3 /* mult. 4 low words of Src and Kernel */ + pmullw mm2, mm4 /* mult. 4 high words of Src and Kernel */ + paddsw mm1, mm2 /* add 4 words of the high and low bytes */ + paddsw mm7, mm1 /* add MM1 to accumulator MM7 */ + movq mm1, [esi] /* load 8 bytes of the Src */ + dec esi + add esi, eax /* move Src pointer 1 row below */ + movq mm3, [edx] /* load 4 words of Kernel */ + add edx, 8 /* move pointer to other 4 words */ + punpcklbw mm1, mm0 /* unpack first 4 bytes into words */ + pmullw mm1, mm3 /* mult. 4 low words of Src and Kernel */ + paddsw mm7, mm1 /* add MM1 to accumulator MM7 */ + /* --- 6 */ + movq mm1, [esi] /* load 8 bytes of the Src */ + movq mm2, mm1 /* copy MM1 into MM2 */ + inc esi /* move pointer to the next 8 bytes of Src */ + movq mm3, [edx] /* load 4 words of Kernel */ + add edx, 8 /* move pointer to other 4 words */ + movq mm4, [edx] /* load 4 words of Kernel */ + add edx, 8 /* move pointer to other 4 words */ + punpcklbw mm1, mm0 /* unpack first 4 bytes into words */ + punpckhbw mm2, mm0 /* unpack second 4 bytes into words */ + pmullw mm1, mm3 /* mult. 4 low words of Src and Kernel */ + pmullw mm2, mm4 /* mult. 4 high words of Src and Kernel */ + paddsw mm1, mm2 /* add 4 words of the high and low bytes */ + paddsw mm7, mm1 /* add MM1 to accumulator MM7 */ + movq mm1, [esi] /* load 8 bytes of the Src */ + dec esi + add esi, eax /* move Src pointer 1 row below */ + movq mm3, [edx] /* load 4 words of Kernel */ + add edx, 8 /* move pointer to other 4 words */ + punpcklbw mm1, mm0 /* unpack first 4 bytes into words */ + pmullw mm1, mm3 /* mult. 4 low words of Src and Kernel */ + paddsw mm7, mm1 /* add MM1 to accumulator MM7 */ + /* --- 7 */ + movq mm1, [esi] /* load 8 bytes of the Src */ + movq mm2, mm1 /* copy MM1 into MM2 */ + inc esi /* move pointer to the next 8 bytes of Src */ + movq mm3, [edx] /* load 4 words of Kernel */ + add edx, 8 /* move pointer to other 4 words */ + movq mm4, [edx] /* load 4 words of Kernel */ + add edx, 8 /* move pointer to other 4 words */ + punpcklbw mm1, mm0 /* unpack first 4 bytes into words */ + punpckhbw mm2, mm0 /* unpack second 4 bytes into words */ + pmullw mm1, mm3 /* mult. 4 low words of Src and Kernel */ + pmullw mm2, mm4 /* mult. 4 high words of Src and Kernel */ + paddsw mm1, mm2 /* add 4 words of the high and low bytes */ + paddsw mm7, mm1 /* add MM1 to accumulator MM7 */ + movq mm1, [esi] /* load 8 bytes of the Src */ + dec esi + add esi, eax /* move Src pointer 1 row below */ + movq mm3, [edx] /* load 4 words of Kernel */ + add edx, 8 /* move pointer to other 4 words */ + punpcklbw mm1, mm0 /* unpack first 4 bytes into words */ + pmullw mm1, mm3 /* mult. 4 low words of Src and Kernel */ + paddsw mm7, mm1 /* add MM1 to accumulator MM7 */ + /* --- 8 */ + movq mm1, [esi] /* load 8 bytes of the Src */ + movq mm2, mm1 /* copy MM1 into MM2 */ + inc esi /* move pointer to the next 8 bytes of Src */ + movq mm3, [edx] /* load 4 words of Kernel */ + add edx, 8 /* move pointer to other 4 words */ + movq mm4, [edx] /* load 4 words of Kernel */ + add edx, 8 /* move pointer to other 4 words */ + punpcklbw mm1, mm0 /* unpack first 4 bytes into words */ + punpckhbw mm2, mm0 /* unpack second 4 bytes into words */ + pmullw mm1, mm3 /* mult. 4 low words of Src and Kernel */ + pmullw mm2, mm4 /* mult. 4 high words of Src and Kernel */ + paddsw mm1, mm2 /* add 4 words of the high and low bytes */ + paddsw mm7, mm1 /* add MM1 to accumulator MM7 */ + movq mm1, [esi] /* load 8 bytes of the Src */ + dec esi + add esi, eax /* move Src pointer 1 row below */ + movq mm3, [edx] /* load 4 words of Kernel */ + add edx, 8 /* move pointer to other 4 words */ + punpcklbw mm1, mm0 /* unpack first 4 bytes into words */ + pmullw mm1, mm3 /* mult. 4 low words of Src and Kernel */ + paddsw mm7, mm1 /* add MM1 to accumulator MM7 */ + /* --- 9 */ + movq mm1, [esi] /* load 8 bytes of the Src */ + movq mm2, mm1 /* copy MM1 into MM2 */ + inc esi /* move pointer to the next 8 bytes of Src */ + movq mm3, [edx] /* load 4 words of Kernel */ + add edx, 8 /* move pointer to other 4 words */ + movq mm4, [edx] /* load 4 words of Kernel */ + add edx, 8 /* move pointer to other 4 words */ + punpcklbw mm1, mm0 /* unpack first 4 bytes into words */ + punpckhbw mm2, mm0 /* unpack second 4 bytes into words */ + pmullw mm1, mm3 /* mult. 4 low words of Src and Kernel */ + pmullw mm2, mm4 /* mult. 4 high words of Src and Kernel */ + paddsw mm1, mm2 /* add 4 words of the high and low bytes */ + paddsw mm7, mm1 /* add MM1 to accumulator MM7 */ + movq mm1, [esi] /* load 8 bytes of the Src */ + movq mm3, [edx] /* load 4 words of Kernel */ + punpcklbw mm1, mm0 /* unpack first 4 bytes into words */ + pmullw mm1, mm3 /* mult. 4 low words of Src and Kernel */ + paddsw mm7, mm1 /* add MM1 to accumulator MM7 */ + /* ---, */ + movq mm3, mm7 /* copy MM7 into MM3 */ + psrlq mm7, 32 /* shift 2 left words to the right */ + paddsw mm7, mm3 /* add 2 left and 2 right result words */ + movq mm2, mm7 /* copy MM7 into MM2 */ + psrlq mm7, 16 /* shift 1 left word to the right */ + paddsw mm7, mm2 /* add 1 left and 1 right result words */ + /* ---, */ + movd mm1, eax /* save EDX in MM1 */ + movd mm2, ebx /* save EDX in MM2 */ + movd mm3, edx /* save EDX in MM3 */ + movd eax, mm7 /* load summation result into EAX */ + psraw mm7, 15 /* spread sign bit of the result */ + movd ebx, mm5 /* load Divisor into EBX */ + movd edx, mm7 /* fill EDX with a sign bit */ + idiv bx /* IDIV - VERY EXPENSIVE */ + movd mm7, eax /* move result of division into MM7 */ + packuswb mm7, mm0 /* pack division result with saturation */ + movd eax, mm7 /* copy saturated result into EAX */ + mov [edi], al /* copy a byte result into Dest */ + movd edx, mm3 /* restore saved EDX */ + movd ebx, mm2 /* restore saved EBX */ + movd eax, mm1 /* restore saved EAX */ + /* --, */ + movd esi, mm6 /* move Src pointer to the top pixel */ + sub edx, 208 /* EDX = Kernel address */ + inc esi /* move Src pointer to the next pixel */ + inc edi /* move Dest pointer to the next pixel */ + /* ---, */ + dec ecx /* decrease loop counter COLUMNS */ + jnz L10352 /* check loop termination, proceed if required */ + add esi, 8 /* move to the next row in Src */ + add edi, 8 /* move to the next row in Dest */ + dec ebx /* decrease loop counter ROWS */ + jnz L10350 /* check loop termination, proceed if required */ + /* ---, */ + emms /* exit MMX state */ + popa + } +#else + asm volatile + ("pusha \n\t" "pxor %%mm0, %%mm0 \n\t" /* zero MM0 */ + "xor %%ebx, %%ebx \n\t" /* zero EBX */ + "mov %5, %%bl \n\t" /* load Divisor into BL */ + "movd %%ebx, %%mm5 \n\t" /* copy Divisor into MM5 */ + "mov %4, %%edx \n\t" /* load Kernel address into EDX */ + "mov %1, %%esi \n\t" /* load Src address to ESI */ + "mov %0, %%edi \n\t" /* load Dest address to EDI */ + "add $4, %%edi \n\t" /* 4 column offset from the left edge */ + "mov %3, %%eax \n\t" /* load columns into EAX */ + "add %%eax, %%edi \n\t" /* 4 row offset from the top edge */ + "add %%eax, %%edi \n\t" "add %%eax, %%edi \n\t" "add %%eax, %%edi \n\t" "mov %2, %%ebx \n\t" /* initialize ROWS counter */ + "sub $8, %%ebx \n\t" /* do not use first 4 and last 4 rows */ + /* --- */ + ".L10350: \n\t" "mov %%eax, %%ecx \n\t" /* initialize COLUMNS counter */ + "sub $8, %%ecx \n\t" /* do not use first 4 and last 4 columns */ + ".align 16 \n\t" /* 16 byte alignment of the loop entry */ + ".L10352: \n\t" "pxor %%mm7, %%mm7 \n\t" /* zero MM7 (accumulator) */ + "movd %%esi, %%mm6 \n\t" /* save ESI in MM6 */ + /* --- 1 */ + "movq (%%esi), %%mm1 \n\t" /* load 8 bytes of the Src */ + "movq %%mm1, %%mm2 \n\t" /* copy MM1 into MM2 */ + "inc %%esi \n\t" /* move pointer to the next 8 bytes of Src */ + "movq (%%edx), %%mm3 \n\t" /* load 4 words of Kernel */ + "add $8, %%edx \n\t" /* move pointer to other 4 words */ + "movq (%%edx), %%mm4 \n\t" /* load 4 words of Kernel */ + "add $8, %%edx \n\t" /* move pointer to other 4 words */ + "punpcklbw %%mm0, %%mm1 \n\t" /* unpack first 4 bytes into words */ + "punpckhbw %%mm0, %%mm2 \n\t" /* unpack second 4 bytes into words */ + "pmullw %%mm3, %%mm1 \n\t" /* mult. 4 low words of Src and Kernel */ + "pmullw %%mm4, %%mm2 \n\t" /* mult. 4 high words of Src and Kernel */ + "paddsw %%mm2, %%mm1 \n\t" /* add 4 words of the high and low bytes */ + "paddsw %%mm1, %%mm7 \n\t" /* add MM1 to accumulator MM7 */ + "movq (%%esi), %%mm1 \n\t" /* load 8 bytes of the Src */ + "dec %%esi \n\t" "add %%eax, %%esi \n\t" /* move Src pointer 1 row below */ + "movq (%%edx), %%mm3 \n\t" /* load 4 words of Kernel */ + "add $8, %%edx \n\t" /* move pointer to other 4 words */ + "punpcklbw %%mm0, %%mm1 \n\t" /* unpack first 4 bytes into words */ + "pmullw %%mm3, %%mm1 \n\t" /* mult. 4 low words of Src and Kernel */ + "paddsw %%mm1, %%mm7 \n\t" /* add MM1 to accumulator MM7 */ + /* --- 2 */ + "movq (%%esi), %%mm1 \n\t" /* load 8 bytes of the Src */ + "movq %%mm1, %%mm2 \n\t" /* copy MM1 into MM2 */ + "inc %%esi \n\t" /* move pointer to the next 8 bytes of Src */ + "movq (%%edx), %%mm3 \n\t" /* load 4 words of Kernel */ + "add $8, %%edx \n\t" /* move pointer to other 4 words */ + "movq (%%edx), %%mm4 \n\t" /* load 4 words of Kernel */ + "add $8, %%edx \n\t" /* move pointer to other 4 words */ + "punpcklbw %%mm0, %%mm1 \n\t" /* unpack first 4 bytes into words */ + "punpckhbw %%mm0, %%mm2 \n\t" /* unpack second 4 bytes into words */ + "pmullw %%mm3, %%mm1 \n\t" /* mult. 4 low words of Src and Kernel */ + "pmullw %%mm4, %%mm2 \n\t" /* mult. 4 high words of Src and Kernel */ + "paddsw %%mm2, %%mm1 \n\t" /* add 4 words of the high and low bytes */ + "paddsw %%mm1, %%mm7 \n\t" /* add MM1 to accumulator MM7 */ + "movq (%%esi), %%mm1 \n\t" /* load 8 bytes of the Src */ + "dec %%esi \n\t" "add %%eax, %%esi \n\t" /* move Src pointer 1 row below */ + "movq (%%edx), %%mm3 \n\t" /* load 4 words of Kernel */ + "add $8, %%edx \n\t" /* move pointer to other 4 words */ + "punpcklbw %%mm0, %%mm1 \n\t" /* unpack first 4 bytes into words */ + "pmullw %%mm3, %%mm1 \n\t" /* mult. 4 low words of Src and Kernel */ + "paddsw %%mm1, %%mm7 \n\t" /* add MM1 to accumulator MM7 */ + /* --- 3 */ + "movq (%%esi), %%mm1 \n\t" /* load 8 bytes of the Src */ + "movq %%mm1, %%mm2 \n\t" /* copy MM1 into MM2 */ + "inc %%esi \n\t" /* move pointer to the next 8 bytes of Src */ + "movq (%%edx), %%mm3 \n\t" /* load 4 words of Kernel */ + "add $8, %%edx \n\t" /* move pointer to other 4 words */ + "movq (%%edx), %%mm4 \n\t" /* load 4 words of Kernel */ + "add $8, %%edx \n\t" /* move pointer to other 4 words */ + "punpcklbw %%mm0, %%mm1 \n\t" /* unpack first 4 bytes into words */ + "punpckhbw %%mm0, %%mm2 \n\t" /* unpack second 4 bytes into words */ + "pmullw %%mm3, %%mm1 \n\t" /* mult. 4 low words of Src and Kernel */ + "pmullw %%mm4, %%mm2 \n\t" /* mult. 4 high words of Src and Kernel */ + "paddsw %%mm2, %%mm1 \n\t" /* add 4 words of the high and low bytes */ + "paddsw %%mm1, %%mm7 \n\t" /* add MM1 to accumulator MM7 */ + "movq (%%esi), %%mm1 \n\t" /* load 8 bytes of the Src */ + "dec %%esi \n\t" "add %%eax, %%esi \n\t" /* move Src pointer 1 row below */ + "movq (%%edx), %%mm3 \n\t" /* load 4 words of Kernel */ + "add $8, %%edx \n\t" /* move pointer to other 4 words */ + "punpcklbw %%mm0, %%mm1 \n\t" /* unpack first 4 bytes into words */ + "pmullw %%mm3, %%mm1 \n\t" /* mult. 4 low words of Src and Kernel */ + "paddsw %%mm1, %%mm7 \n\t" /* add MM1 to accumulator MM7 */ + /* --- 4 */ + "movq (%%esi), %%mm1 \n\t" /* load 8 bytes of the Src */ + "movq %%mm1, %%mm2 \n\t" /* copy MM1 into MM2 */ + "inc %%esi \n\t" /* move pointer to the next 8 bytes of Src */ + "movq (%%edx), %%mm3 \n\t" /* load 4 words of Kernel */ + "add $8, %%edx \n\t" /* move pointer to other 4 words */ + "movq (%%edx), %%mm4 \n\t" /* load 4 words of Kernel */ + "add $8, %%edx \n\t" /* move pointer to other 4 words */ + "punpcklbw %%mm0, %%mm1 \n\t" /* unpack first 4 bytes into words */ + "punpckhbw %%mm0, %%mm2 \n\t" /* unpack second 4 bytes into words */ + "pmullw %%mm3, %%mm1 \n\t" /* mult. 4 low words of Src and Kernel */ + "pmullw %%mm4, %%mm2 \n\t" /* mult. 4 high words of Src and Kernel */ + "paddsw %%mm2, %%mm1 \n\t" /* add 4 words of the high and low bytes */ + "paddsw %%mm1, %%mm7 \n\t" /* add MM1 to accumulator MM7 */ + "movq (%%esi), %%mm1 \n\t" /* load 8 bytes of the Src */ + "dec %%esi \n\t" "add %%eax, %%esi \n\t" /* move Src pointer 1 row below */ + "movq (%%edx), %%mm3 \n\t" /* load 4 words of Kernel */ + "add $8, %%edx \n\t" /* move pointer to other 4 words */ + "punpcklbw %%mm0, %%mm1 \n\t" /* unpack first 4 bytes into words */ + "pmullw %%mm3, %%mm1 \n\t" /* mult. 4 low words of Src and Kernel */ + "paddsw %%mm1, %%mm7 \n\t" /* add MM1 to accumulator MM7 */ + /* --- 5 */ + "movq (%%esi), %%mm1 \n\t" /* load 8 bytes of the Src */ + "movq %%mm1, %%mm2 \n\t" /* copy MM1 into MM2 */ + "inc %%esi \n\t" /* move pointer to the next 8 bytes of Src */ + "movq (%%edx), %%mm3 \n\t" /* load 4 words of Kernel */ + "add $8, %%edx \n\t" /* move pointer to other 4 words */ + "movq (%%edx), %%mm4 \n\t" /* load 4 words of Kernel */ + "add $8, %%edx \n\t" /* move pointer to other 4 words */ + "punpcklbw %%mm0, %%mm1 \n\t" /* unpack first 4 bytes into words */ + "punpckhbw %%mm0, %%mm2 \n\t" /* unpack second 4 bytes into words */ + "pmullw %%mm3, %%mm1 \n\t" /* mult. 4 low words of Src and Kernel */ + "pmullw %%mm4, %%mm2 \n\t" /* mult. 4 high words of Src and Kernel */ + "paddsw %%mm2, %%mm1 \n\t" /* add 4 words of the high and low bytes */ + "paddsw %%mm1, %%mm7 \n\t" /* add MM1 to accumulator MM7 */ + "movq (%%esi), %%mm1 \n\t" /* load 8 bytes of the Src */ + "dec %%esi \n\t" "add %%eax, %%esi \n\t" /* move Src pointer 1 row below */ + "movq (%%edx), %%mm3 \n\t" /* load 4 words of Kernel */ + "add $8, %%edx \n\t" /* move pointer to other 4 words */ + "punpcklbw %%mm0, %%mm1 \n\t" /* unpack first 4 bytes into words */ + "pmullw %%mm3, %%mm1 \n\t" /* mult. 4 low words of Src and Kernel */ + "paddsw %%mm1, %%mm7 \n\t" /* add MM1 to accumulator MM7 */ + /* --- 6 */ + "movq (%%esi), %%mm1 \n\t" /* load 8 bytes of the Src */ + "movq %%mm1, %%mm2 \n\t" /* copy MM1 into MM2 */ + "inc %%esi \n\t" /* move pointer to the next 8 bytes of Src */ + "movq (%%edx), %%mm3 \n\t" /* load 4 words of Kernel */ + "add $8, %%edx \n\t" /* move pointer to other 4 words */ + "movq (%%edx), %%mm4 \n\t" /* load 4 words of Kernel */ + "add $8, %%edx \n\t" /* move pointer to other 4 words */ + "punpcklbw %%mm0, %%mm1 \n\t" /* unpack first 4 bytes into words */ + "punpckhbw %%mm0, %%mm2 \n\t" /* unpack second 4 bytes into words */ + "pmullw %%mm3, %%mm1 \n\t" /* mult. 4 low words of Src and Kernel */ + "pmullw %%mm4, %%mm2 \n\t" /* mult. 4 high words of Src and Kernel */ + "paddsw %%mm2, %%mm1 \n\t" /* add 4 words of the high and low bytes */ + "paddsw %%mm1, %%mm7 \n\t" /* add MM1 to accumulator MM7 */ + "movq (%%esi), %%mm1 \n\t" /* load 8 bytes of the Src */ + "dec %%esi \n\t" "add %%eax, %%esi \n\t" /* move Src pointer 1 row below */ + "movq (%%edx), %%mm3 \n\t" /* load 4 words of Kernel */ + "add $8, %%edx \n\t" /* move pointer to other 4 words */ + "punpcklbw %%mm0, %%mm1 \n\t" /* unpack first 4 bytes into words */ + "pmullw %%mm3, %%mm1 \n\t" /* mult. 4 low words of Src and Kernel */ + "paddsw %%mm1, %%mm7 \n\t" /* add MM1 to accumulator MM7 */ + /* --- 7 */ + "movq (%%esi), %%mm1 \n\t" /* load 8 bytes of the Src */ + "movq %%mm1, %%mm2 \n\t" /* copy MM1 into MM2 */ + "inc %%esi \n\t" /* move pointer to the next 8 bytes of Src */ + "movq (%%edx), %%mm3 \n\t" /* load 4 words of Kernel */ + "add $8, %%edx \n\t" /* move pointer to other 4 words */ + "movq (%%edx), %%mm4 \n\t" /* load 4 words of Kernel */ + "add $8, %%edx \n\t" /* move pointer to other 4 words */ + "punpcklbw %%mm0, %%mm1 \n\t" /* unpack first 4 bytes into words */ + "punpckhbw %%mm0, %%mm2 \n\t" /* unpack second 4 bytes into words */ + "pmullw %%mm3, %%mm1 \n\t" /* mult. 4 low words of Src and Kernel */ + "pmullw %%mm4, %%mm2 \n\t" /* mult. 4 high words of Src and Kernel */ + "paddsw %%mm2, %%mm1 \n\t" /* add 4 words of the high and low bytes */ + "paddsw %%mm1, %%mm7 \n\t" /* add MM1 to accumulator MM7 */ + "movq (%%esi), %%mm1 \n\t" /* load 8 bytes of the Src */ + "dec %%esi \n\t" "add %%eax, %%esi \n\t" /* move Src pointer 1 row below */ + "movq (%%edx), %%mm3 \n\t" /* load 4 words of Kernel */ + "add $8, %%edx \n\t" /* move pointer to other 4 words */ + "punpcklbw %%mm0, %%mm1 \n\t" /* unpack first 4 bytes into words */ + "pmullw %%mm3, %%mm1 \n\t" /* mult. 4 low words of Src and Kernel */ + "paddsw %%mm1, %%mm7 \n\t" /* add MM1 to accumulator MM7 */ + /* --- 8 */ + "movq (%%esi), %%mm1 \n\t" /* load 8 bytes of the Src */ + "movq %%mm1, %%mm2 \n\t" /* copy MM1 into MM2 */ + "inc %%esi \n\t" /* move pointer to the next 8 bytes of Src */ + "movq (%%edx), %%mm3 \n\t" /* load 4 words of Kernel */ + "add $8, %%edx \n\t" /* move pointer to other 4 words */ + "movq (%%edx), %%mm4 \n\t" /* load 4 words of Kernel */ + "add $8, %%edx \n\t" /* move pointer to other 4 words */ + "punpcklbw %%mm0, %%mm1 \n\t" /* unpack first 4 bytes into words */ + "punpckhbw %%mm0, %%mm2 \n\t" /* unpack second 4 bytes into words */ + "pmullw %%mm3, %%mm1 \n\t" /* mult. 4 low words of Src and Kernel */ + "pmullw %%mm4, %%mm2 \n\t" /* mult. 4 high words of Src and Kernel */ + "paddsw %%mm2, %%mm1 \n\t" /* add 4 words of the high and low bytes */ + "paddsw %%mm1, %%mm7 \n\t" /* add MM1 to accumulator MM7 */ + "movq (%%esi), %%mm1 \n\t" /* load 8 bytes of the Src */ + "dec %%esi \n\t" "add %%eax, %%esi \n\t" /* move Src pointer 1 row below */ + "movq (%%edx), %%mm3 \n\t" /* load 4 words of Kernel */ + "add $8, %%edx \n\t" /* move pointer to other 4 words */ + "punpcklbw %%mm0, %%mm1 \n\t" /* unpack first 4 bytes into words */ + "pmullw %%mm3, %%mm1 \n\t" /* mult. 4 low words of Src and Kernel */ + "paddsw %%mm1, %%mm7 \n\t" /* add MM1 to accumulator MM7 */ + /* --- 9 */ + "movq (%%esi), %%mm1 \n\t" /* load 8 bytes of the Src */ + "movq %%mm1, %%mm2 \n\t" /* copy MM1 into MM2 */ + "inc %%esi \n\t" /* move pointer to the next 8 bytes of Src */ + "movq (%%edx), %%mm3 \n\t" /* load 4 words of Kernel */ + "add $8, %%edx \n\t" /* move pointer to other 4 words */ + "movq (%%edx), %%mm4 \n\t" /* load 4 words of Kernel */ + "add $8, %%edx \n\t" /* move pointer to other 4 words */ + "punpcklbw %%mm0, %%mm1 \n\t" /* unpack first 4 bytes into words */ + "punpckhbw %%mm0, %%mm2 \n\t" /* unpack second 4 bytes into words */ + "pmullw %%mm3, %%mm1 \n\t" /* mult. 4 low words of Src and Kernel */ + "pmullw %%mm4, %%mm2 \n\t" /* mult. 4 high words of Src and Kernel */ + "paddsw %%mm2, %%mm1 \n\t" /* add 4 words of the high and low bytes */ + "paddsw %%mm1, %%mm7 \n\t" /* add MM1 to accumulator MM7 */ + "movq (%%esi), %%mm1 \n\t" /* load 8 bytes of the Src */ + "movq (%%edx), %%mm3 \n\t" /* load 4 words of Kernel */ + "punpcklbw %%mm0, %%mm1 \n\t" /* unpack first 4 bytes into words */ + "pmullw %%mm3, %%mm1 \n\t" /* mult. 4 low words of Src and Kernel */ + "paddsw %%mm1, %%mm7 \n\t" /* add MM1 to accumulator MM7 */ + /* --- */ + "movq %%mm7, %%mm3 \n\t" /* copy MM7 into MM3 */ + "psrlq $32, %%mm7 \n\t" /* shift 2 left words to the right */ + "paddsw %%mm3, %%mm7 \n\t" /* add 2 left and 2 right result words */ + "movq %%mm7, %%mm2 \n\t" /* copy MM7 into MM2 */ + "psrlq $16, %%mm7 \n\t" /* shift 1 left word to the right */ + "paddsw %%mm2, %%mm7 \n\t" /* add 1 left and 1 right result words */ + /* --- */ + "movd %%eax, %%mm1 \n\t" /* save EDX in MM1 */ + "movd %%ebx, %%mm2 \n\t" /* save EDX in MM2 */ + "movd %%edx, %%mm3 \n\t" /* save EDX in MM3 */ + "movd %%mm7, %%eax \n\t" /* load summation result into EAX */ + "psraw $15, %%mm7 \n\t" /* spread sign bit of the result */ + "movd %%mm5, %%ebx \n\t" /* load Divisor into EBX */ + "movd %%mm7, %%edx \n\t" /* fill EDX with a sign bit */ + "idivw %%bx \n\t" /* IDIV - VERY EXPENSIVE */ + "movd %%eax, %%mm7 \n\t" /* move result of division into MM7 */ + "packuswb %%mm0, %%mm7 \n\t" /* pack division result with saturation */ + "movd %%mm7, %%eax \n\t" /* copy saturated result into EAX */ + "mov %%al, (%%edi) \n\t" /* copy a byte result into Dest */ + "movd %%mm3, %%edx \n\t" /* restore saved EDX */ + "movd %%mm2, %%ebx \n\t" /* restore saved EBX */ + "movd %%mm1, %%eax \n\t" /* restore saved EAX */ + /* -- */ + "movd %%mm6, %%esi \n\t" /* move Src pointer to the top pixel */ + "sub $208, %%edx \n\t" /* EDX = Kernel address */ + "inc %%esi \n\t" /* move Src pointer to the next pixel */ + "inc %%edi \n\t" /* move Dest pointer to the next pixel */ + /* --- */ + "dec %%ecx \n\t" /* decrease loop counter COLUMNS */ + "jnz .L10352 \n\t" /* check loop termination, proceed if required */ + "add $8, %%esi \n\t" /* move to the next row in Src */ + "add $8, %%edi \n\t" /* move to the next row in Dest */ + "dec %%ebx \n\t" /* decrease loop counter ROWS */ + "jnz .L10350 \n\t" /* check loop termination, proceed if required */ + /* --- */ + "emms \n\t" /* exit MMX state */ + "popa \n\t":"=m" (Dest) /* %0 */ + :"m"(Src), /* %1 */ + "m"(rows), /* %2 */ + "m"(columns), /* %3 */ + "m"(Kernel), /* %4 */ + "m"(Divisor) /* %5 */ + ); +#endif +#endif + return (0); + } else { + /* No non-MMX implementation yet */ + return (-1); + } +} + +/*! +\brief Filter using ConvolveKernel3x3ShiftRight: Dij = saturation0and255( ... ) + +\param Src The source 2D byte array to convolve. Should be different from destination. +\param Dest The destination 2D byte array to store the result in. Should be different from source. +\param rows Number of rows in source/destination array. Must be >2. +\param columns Number of columns in source/destination array. Must be >2. +\param Kernel The 2D convolution kernel of size 3x3. +\param NRightShift The number of right bit shifts to apply to the convolution sum. Must be <7. + +Note: Non-MMX implementation not available for this function. + +\return Returns 1 if filter was applied, 0 otherwise. +*/ +int SDL_imageFilterConvolveKernel3x3ShiftRight(unsigned char *Src, unsigned char *Dest, int rows, int columns, + signed short *Kernel, unsigned char NRightShift) +{ + /* Validate input parameters */ + if ((Src == NULL) || (Dest == NULL) || (Kernel == NULL)) + return(-1); + + if ((columns < 3) || (rows < 3) || (NRightShift > 7)) + return (-1); + + if ((SDL_imageFilterMMXdetect())) { +//#ifdef USE_MMX +#if defined(USE_MMX) && defined(i386) +#if !defined(GCC__) + __asm + { + pusha + pxor mm0, mm0 /* zero MM0 */ + xor ebx, ebx /* zero EBX */ + mov bl, NRightShift /* load NRightShift into BL */ + movd mm4, ebx /* copy NRightShift into MM4 */ + mov edx, Kernel /* load Kernel address into EDX */ + movq mm5, [edx] /* MM5 = {0,K2,K1,K0} */ + add edx, 8 /* second row |K0 K1 K2 0| */ + movq mm6, [edx] /* MM6 = {0,K5,K4,K3} K = |K3 K4 K5 0| */ + add edx, 8 /* third row |K6 K7 K8 0| */ + movq mm7, [edx] /* MM7 = {0,K8,K7,K6} */ + /* ---, */ + mov eax, columns /* load columns into EAX */ + mov esi, Src /* ESI = Src row 0 address */ + mov edi, Dest /* load Dest address to EDI */ + add edi, eax /* EDI = EDI + columns */ + inc edi /* 1 byte offset from the left edge */ + mov edx, rows /* initialize ROWS counter */ + sub edx, 2 /* do not use first and last row */ + /* ---, */ +L10360: + mov ecx, eax /* initialize COLUMS counter */ + sub ecx, 2 /* do not use first and last column */ + align 16 /* 16 byte alignment of the loop entry */ +L10362: + /* ---, */ + movq mm1, [esi] /* load 8 bytes of the image first row */ + add esi, eax /* move one row below */ + movq mm2, [esi] /* load 8 bytes of the image second row */ + add esi, eax /* move one row below */ + movq mm3, [esi] /* load 8 bytes of the image third row */ + punpcklbw mm1, mm0 /* unpack first 4 bytes into words */ + punpcklbw mm2, mm0 /* unpack first 4 bytes into words */ + punpcklbw mm3, mm0 /* unpack first 4 bytes into words */ + psrlw mm1, mm4 /* shift right each pixel NshiftRight times */ + psrlw mm2, mm4 /* shift right each pixel NshiftRight times */ + psrlw mm3, mm4 /* shift right each pixel NshiftRight times */ + pmullw mm1, mm5 /* multiply words first row image*Kernel */ + pmullw mm2, mm6 /* multiply words second row image*Kernel */ + pmullw mm3, mm7 /* multiply words third row image*Kernel */ + paddsw mm1, mm2 /* add 4 words of the first and second rows */ + paddsw mm1, mm3 /* add 4 words of the third row and result */ + movq mm2, mm1 /* copy MM1 into MM2 */ + psrlq mm1, 32 /* shift 2 left words to the right */ + paddsw mm1, mm2 /* add 2 left and 2 right result words */ + movq mm3, mm1 /* copy MM1 into MM3 */ + psrlq mm1, 16 /* shift 1 left word to the right */ + paddsw mm1, mm3 /* add 1 left and 1 right result words */ + packuswb mm1, mm0 /* pack shift result with saturation */ + movd ebx, mm1 /* copy saturated result into EBX */ + mov [edi], bl /* copy a byte result into Dest */ + /* --, */ + sub esi, eax /* move two rows up */ + sub esi, eax + inc esi /* move Src pointer to the next pixel */ + inc edi /* move Dest pointer to the next pixel */ + /* ---, */ + dec ecx /* decrease loop counter COLUMNS */ + jnz L10362 /* check loop termination, proceed if required */ + add esi, 2 /* move to the next row in Src */ + add edi, 2 /* move to the next row in Dest */ + dec edx /* decrease loop counter ROWS */ + jnz L10360 /* check loop termination, proceed if required */ + /* ---, */ + emms /* exit MMX state */ + popa + } +#else + asm volatile + ("pusha \n\t" "pxor %%mm0, %%mm0 \n\t" /* zero MM0 */ + "xor %%ebx, %%ebx \n\t" /* zero EBX */ + "mov %5, %%bl \n\t" /* load NRightShift into BL */ + "movd %%ebx, %%mm4 \n\t" /* copy NRightShift into MM4 */ + "mov %4, %%edx \n\t" /* load Kernel address into EDX */ + "movq (%%edx), %%mm5 \n\t" /* MM5 = {0,K2,K1,K0} */ + "add $8, %%edx \n\t" /* second row |K0 K1 K2 0| */ + "movq (%%edx), %%mm6 \n\t" /* MM6 = {0,K5,K4,K3} K = |K3 K4 K5 0| */ + "add $8, %%edx \n\t" /* third row |K6 K7 K8 0| */ + "movq (%%edx), %%mm7 \n\t" /* MM7 = {0,K8,K7,K6} */ + /* --- */ + "mov %3, %%eax \n\t" /* load columns into EAX */ + "mov %1, %%esi \n\t" /* ESI = Src row 0 address */ + "mov %0, %%edi \n\t" /* load Dest address to EDI */ + "add %%eax, %%edi \n\t" /* EDI = EDI + columns */ + "inc %%edi \n\t" /* 1 byte offset from the left edge */ + "mov %2, %%edx \n\t" /* initialize ROWS counter */ + "sub $2, %%edx \n\t" /* do not use first and last row */ + /* --- */ + ".L10360: \n\t" "mov %%eax, %%ecx \n\t" /* initialize COLUMS counter */ + "sub $2, %%ecx \n\t" /* do not use first and last column */ + ".align 16 \n\t" /* 16 byte alignment of the loop entry */ + ".L10362: \n\t" + /* --- */ + "movq (%%esi), %%mm1 \n\t" /* load 8 bytes of the image first row */ + "add %%eax, %%esi \n\t" /* move one row below */ + "movq (%%esi), %%mm2 \n\t" /* load 8 bytes of the image second row */ + "add %%eax, %%esi \n\t" /* move one row below */ + "movq (%%esi), %%mm3 \n\t" /* load 8 bytes of the image third row */ + "punpcklbw %%mm0, %%mm1 \n\t" /* unpack first 4 bytes into words */ + "punpcklbw %%mm0, %%mm2 \n\t" /* unpack first 4 bytes into words */ + "punpcklbw %%mm0, %%mm3 \n\t" /* unpack first 4 bytes into words */ + "psrlw %%mm4, %%mm1 \n\t" /* shift right each pixel NshiftRight times */ + "psrlw %%mm4, %%mm2 \n\t" /* shift right each pixel NshiftRight times */ + "psrlw %%mm4, %%mm3 \n\t" /* shift right each pixel NshiftRight times */ + "pmullw %%mm5, %%mm1 \n\t" /* multiply words first row image*Kernel */ + "pmullw %%mm6, %%mm2 \n\t" /* multiply words second row image*Kernel */ + "pmullw %%mm7, %%mm3 \n\t" /* multiply words third row image*Kernel */ + "paddsw %%mm2, %%mm1 \n\t" /* add 4 words of the first and second rows */ + "paddsw %%mm3, %%mm1 \n\t" /* add 4 words of the third row and result */ + "movq %%mm1, %%mm2 \n\t" /* copy MM1 into MM2 */ + "psrlq $32, %%mm1 \n\t" /* shift 2 left words to the right */ + "paddsw %%mm2, %%mm1 \n\t" /* add 2 left and 2 right result words */ + "movq %%mm1, %%mm3 \n\t" /* copy MM1 into MM3 */ + "psrlq $16, %%mm1 \n\t" /* shift 1 left word to the right */ + "paddsw %%mm3, %%mm1 \n\t" /* add 1 left and 1 right result words */ + "packuswb %%mm0, %%mm1 \n\t" /* pack shift result with saturation */ + "movd %%mm1, %%ebx \n\t" /* copy saturated result into EBX */ + "mov %%bl, (%%edi) \n\t" /* copy a byte result into Dest */ + /* -- */ + "sub %%eax, %%esi \n\t" /* move two rows up */ + "sub %%eax, %%esi \n\t" "inc %%esi \n\t" /* move Src pointer to the next pixel */ + "inc %%edi \n\t" /* move Dest pointer to the next pixel */ + /* --- */ + "dec %%ecx \n\t" /* decrease loop counter COLUMNS */ + "jnz .L10362 \n\t" /* check loop termination, proceed if required */ + "add $2, %%esi \n\t" /* move to the next row in Src */ + "add $2, %%edi \n\t" /* move to the next row in Dest */ + "dec %%edx \n\t" /* decrease loop counter ROWS */ + "jnz .L10360 \n\t" /* check loop termination, proceed if required */ + /* --- */ + "emms \n\t" /* exit MMX state */ + "popa \n\t":"=m" (Dest) /* %0 */ + :"m"(Src), /* %1 */ + "m"(rows), /* %2 */ + "m"(columns), /* %3 */ + "m"(Kernel), /* %4 */ + "m"(NRightShift) /* %5 */ + ); +#endif +#endif + return (0); + } else { + /* No non-MMX implementation yet */ + return (-1); + } +} + +/*! +\brief Filter using ConvolveKernel5x5ShiftRight: Dij = saturation0and255( ... ) + +\param Src The source 2D byte array to convolve. Should be different from destination. +\param Dest The destination 2D byte array to store the result in. Should be different from source. +\param rows Number of rows in source/destination array. Must be >4. +\param columns Number of columns in source/destination array. Must be >4. +\param Kernel The 2D convolution kernel of size 5x5. +\param NRightShift The number of right bit shifts to apply to the convolution sum. Must be <7. + +Note: Non-MMX implementation not available for this function. + +\return Returns 1 if filter was applied, 0 otherwise. +*/ +int SDL_imageFilterConvolveKernel5x5ShiftRight(unsigned char *Src, unsigned char *Dest, int rows, int columns, + signed short *Kernel, unsigned char NRightShift) +{ + /* Validate input parameters */ + if ((Src == NULL) || (Dest == NULL) || (Kernel == NULL)) + return(-1); + + if ((columns < 5) || (rows < 5) || (NRightShift > 7)) + return (-1); + + if ((SDL_imageFilterMMXdetect())) { +//#ifdef USE_MMX +#if defined(USE_MMX) && defined(i386) +#if !defined(GCC__) + __asm + { + pusha + pxor mm0, mm0 /* zero MM0 */ + xor ebx, ebx /* zero EBX */ + mov bl, NRightShift /* load NRightShift into BL */ + movd mm5, ebx /* copy NRightShift into MM5 */ + mov edx, Kernel /* load Kernel address into EDX */ + mov esi, Src /* load Src address to ESI */ + mov edi, Dest /* load Dest address to EDI */ + add edi, 2 /* 2 column offset from the left edge */ + mov eax, columns /* load columns into EAX */ + shl eax, 1 /* EAX = columns * 2 */ + add edi, eax /* 2 row offset from the top edge */ + shr eax, 1 /* EAX = columns */ + mov ebx, rows /* initialize ROWS counter */ + sub ebx, 4 /* do not use first 2 and last 2 rows */ + /* ---, */ +L10370: + mov ecx, eax /* initialize COLUMNS counter */ + sub ecx, 4 /* do not use first 2 and last 2 columns */ + align 16 /* 16 byte alignment of the loop entry */ +L10372: + pxor mm7, mm7 /* zero MM7 (accumulator) */ + movd mm6, esi /* save ESI in MM6 */ + /* --- 1 */ + movq mm1, [esi] /* load 8 bytes of the Src */ + movq mm2, mm1 /* copy MM1 into MM2 */ + add esi, eax /* move Src pointer 1 row below */ + movq mm3, [edx] /* load 4 words of Kernel */ + add edx, 8 /* move pointer to other 4 words */ + movq mm4, [edx] /* load 4 words of Kernel */ + add edx, 8 /* move pointer to other 4 words */ + punpcklbw mm1, mm0 /* unpack first 4 bytes into words */ + punpckhbw mm2, mm0 /* unpack second 4 bytes into words */ + psrlw mm1, mm5 /* shift right each pixel NshiftRight times */ + psrlw mm2, mm5 /* shift right each pixel NshiftRight times */ + pmullw mm1, mm3 /* mult 4 low words of Src and Kernel */ + pmullw mm2, mm4 /* mult 4 high words of Src and Kernel */ + paddsw mm1, mm2 /* add 4 words of the high and low bytes */ + paddsw mm7, mm1 /* add MM1 to accumulator MM7 */ + /* --- 2 */ + movq mm1, [esi] /* load 8 bytes of the Src */ + movq mm2, mm1 /* copy MM1 into MM2 */ + add esi, eax /* move Src pointer 1 row below */ + movq mm3, [edx] /* load 4 words of Kernel */ + add edx, 8 /* move pointer to other 4 words */ + movq mm4, [edx] /* load 4 words of Kernel */ + add edx, 8 /* move pointer to other 4 words */ + punpcklbw mm1, mm0 /* unpack first 4 bytes into words */ + punpckhbw mm2, mm0 /* unpack second 4 bytes into words */ + psrlw mm1, mm5 /* shift right each pixel NshiftRight times */ + psrlw mm2, mm5 /* shift right each pixel NshiftRight times */ + pmullw mm1, mm3 /* mult 4 low words of Src and Kernel */ + pmullw mm2, mm4 /* mult 4 high words of Src and Kernel */ + paddsw mm1, mm2 /* add 4 words of the high and low bytes */ + paddsw mm7, mm1 /* add MM1 to accumulator MM7 */ + /* --- 3 */ + movq mm1, [esi] /* load 8 bytes of the Src */ + movq mm2, mm1 /* copy MM1 into MM2 */ + add esi, eax /* move Src pointer 1 row below */ + movq mm3, [edx] /* load 4 words of Kernel */ + add edx, 8 /* move pointer to other 4 words */ + movq mm4, [edx] /* load 4 words of Kernel */ + add edx, 8 /* move pointer to other 4 words */ + punpcklbw mm1, mm0 /* unpack first 4 bytes into words */ + punpckhbw mm2, mm0 /* unpack second 4 bytes into words */ + psrlw mm1, mm5 /* shift right each pixel NshiftRight times */ + psrlw mm2, mm5 /* shift right each pixel NshiftRight times */ + pmullw mm1, mm3 /* mult 4 low words of Src and Kernel */ + pmullw mm2, mm4 /* mult 4 high words of Src and Kernel */ + paddsw mm1, mm2 /* add 4 words of the high and low bytes */ + paddsw mm7, mm1 /* add MM1 to accumulator MM7 */ + /* --- 4 */ + movq mm1, [esi] /* load 8 bytes of the Src */ + movq mm2, mm1 /* copy MM1 into MM2 */ + add esi, eax /* move Src pointer 1 row below */ + movq mm3, [edx] /* load 4 words of Kernel */ + add edx, 8 /* move pointer to other 4 words */ + movq mm4, [edx] /* load 4 words of Kernel */ + add edx, 8 /* move pointer to other 4 words */ + punpcklbw mm1, mm0 /* unpack first 4 bytes into words */ + punpckhbw mm2, mm0 /* unpack second 4 bytes into words */ + psrlw mm1, mm5 /* shift right each pixel NshiftRight times */ + psrlw mm2, mm5 /* shift right each pixel NshiftRight times */ + pmullw mm1, mm3 /* mult 4 low words of Src and Kernel */ + pmullw mm2, mm4 /* mult 4 high words of Src and Kernel */ + paddsw mm1, mm2 /* add 4 words of the high and low bytes */ + paddsw mm7, mm1 /* add MM1 to accumulator MM7 */ + /* --- 5 */ + movq mm1, [esi] /* load 8 bytes of the Src */ + movq mm2, mm1 /* copy MM1 into MM2 */ + movq mm3, [edx] /* load 4 words of Kernel */ + add edx, 8 /* move pointer to other 4 words */ + movq mm4, [edx] /* load 4 words of Kernel */ + punpcklbw mm1, mm0 /* unpack first 4 bytes into words */ + punpckhbw mm2, mm0 /* unpack second 4 bytes into words */ + psrlw mm1, mm5 /* shift right each pixel NshiftRight times */ + psrlw mm2, mm5 /* shift right each pixel NshiftRight times */ + pmullw mm1, mm3 /* mult 4 low words of Src and Kernel */ + pmullw mm2, mm4 /* mult 4 high words of Src and Kernel */ + paddsw mm1, mm2 /* add 4 words of the high and low bytes */ + paddsw mm7, mm1 /* add MM1 to accumulator MM7 */ + /* ---, */ + movq mm3, mm7 /* copy MM7 into MM3 */ + psrlq mm7, 32 /* shift 2 left words to the right */ + paddsw mm7, mm3 /* add 2 left and 2 right result words */ + movq mm2, mm7 /* copy MM7 into MM2 */ + psrlq mm7, 16 /* shift 1 left word to the right */ + paddsw mm7, mm2 /* add 1 left and 1 right result words */ + movd mm1, eax /* save EAX in MM1 */ + packuswb mm7, mm0 /* pack division result with saturation */ + movd eax, mm7 /* copy saturated result into EAX */ + mov [edi], al /* copy a byte result into Dest */ + movd eax, mm1 /* restore saved EAX */ + /* --, */ + movd esi, mm6 /* move Src pointer to the top pixel */ + sub edx, 72 /* EDX = Kernel address */ + inc esi /* move Src pointer to the next pixel */ + inc edi /* move Dest pointer to the next pixel */ + /* ---, */ + dec ecx /* decrease loop counter COLUMNS */ + jnz L10372 /* check loop termination, proceed if required */ + add esi, 4 /* move to the next row in Src */ + add edi, 4 /* move to the next row in Dest */ + dec ebx /* decrease loop counter ROWS */ + jnz L10370 /* check loop termination, proceed if required */ + /* ---, */ + emms /* exit MMX state */ + popa + } +#else + asm volatile + ("pusha \n\t" "pxor %%mm0, %%mm0 \n\t" /* zero MM0 */ + "xor %%ebx, %%ebx \n\t" /* zero EBX */ + "mov %5, %%bl \n\t" /* load NRightShift into BL */ + "movd %%ebx, %%mm5 \n\t" /* copy NRightShift into MM5 */ + "mov %4, %%edx \n\t" /* load Kernel address into EDX */ + "mov %1, %%esi \n\t" /* load Src address to ESI */ + "mov %0, %%edi \n\t" /* load Dest address to EDI */ + "add $2, %%edi \n\t" /* 2 column offset from the left edge */ + "mov %3, %%eax \n\t" /* load columns into EAX */ + "shl $1, %%eax \n\t" /* EAX = columns * 2 */ + "add %%eax, %%edi \n\t" /* 2 row offset from the top edge */ + "shr $1, %%eax \n\t" /* EAX = columns */ + "mov %2, %%ebx \n\t" /* initialize ROWS counter */ + "sub $4, %%ebx \n\t" /* do not use first 2 and last 2 rows */ + /* --- */ + ".L10370: \n\t" "mov %%eax, %%ecx \n\t" /* initialize COLUMNS counter */ + "sub $4, %%ecx \n\t" /* do not use first 2 and last 2 columns */ + ".align 16 \n\t" /* 16 byte alignment of the loop entry */ + ".L10372: \n\t" "pxor %%mm7, %%mm7 \n\t" /* zero MM7 (accumulator) */ + "movd %%esi, %%mm6 \n\t" /* save ESI in MM6 */ + /* --- 1 */ + "movq (%%esi), %%mm1 \n\t" /* load 8 bytes of the Src */ + "movq %%mm1, %%mm2 \n\t" /* copy MM1 into MM2 */ + "add %%eax, %%esi \n\t" /* move Src pointer 1 row below */ + "movq (%%edx), %%mm3 \n\t" /* load 4 words of Kernel */ + "add $8, %%edx \n\t" /* move pointer to other 4 words */ + "movq (%%edx), %%mm4 \n\t" /* load 4 words of Kernel */ + "add $8, %%edx \n\t" /* move pointer to other 4 words */ + "punpcklbw %%mm0, %%mm1 \n\t" /* unpack first 4 bytes into words */ + "punpckhbw %%mm0, %%mm2 \n\t" /* unpack second 4 bytes into words */ + "psrlw %%mm5, %%mm1 \n\t" /* shift right each pixel NshiftRight times */ + "psrlw %%mm5, %%mm2 \n\t" /* shift right each pixel NshiftRight times */ + "pmullw %%mm3, %%mm1 \n\t" /* mult. 4 low words of Src and Kernel */ + "pmullw %%mm4, %%mm2 \n\t" /* mult. 4 high words of Src and Kernel */ + "paddsw %%mm2, %%mm1 \n\t" /* add 4 words of the high and low bytes */ + "paddsw %%mm1, %%mm7 \n\t" /* add MM1 to accumulator MM7 */ + /* --- 2 */ + "movq (%%esi), %%mm1 \n\t" /* load 8 bytes of the Src */ + "movq %%mm1, %%mm2 \n\t" /* copy MM1 into MM2 */ + "add %%eax, %%esi \n\t" /* move Src pointer 1 row below */ + "movq (%%edx), %%mm3 \n\t" /* load 4 words of Kernel */ + "add $8, %%edx \n\t" /* move pointer to other 4 words */ + "movq (%%edx), %%mm4 \n\t" /* load 4 words of Kernel */ + "add $8, %%edx \n\t" /* move pointer to other 4 words */ + "punpcklbw %%mm0, %%mm1 \n\t" /* unpack first 4 bytes into words */ + "punpckhbw %%mm0, %%mm2 \n\t" /* unpack second 4 bytes into words */ + "psrlw %%mm5, %%mm1 \n\t" /* shift right each pixel NshiftRight times */ + "psrlw %%mm5, %%mm2 \n\t" /* shift right each pixel NshiftRight times */ + "pmullw %%mm3, %%mm1 \n\t" /* mult. 4 low words of Src and Kernel */ + "pmullw %%mm4, %%mm2 \n\t" /* mult. 4 high words of Src and Kernel */ + "paddsw %%mm2, %%mm1 \n\t" /* add 4 words of the high and low bytes */ + "paddsw %%mm1, %%mm7 \n\t" /* add MM1 to accumulator MM7 */ + /* --- 3 */ + "movq (%%esi), %%mm1 \n\t" /* load 8 bytes of the Src */ + "movq %%mm1, %%mm2 \n\t" /* copy MM1 into MM2 */ + "add %%eax, %%esi \n\t" /* move Src pointer 1 row below */ + "movq (%%edx), %%mm3 \n\t" /* load 4 words of Kernel */ + "add $8, %%edx \n\t" /* move pointer to other 4 words */ + "movq (%%edx), %%mm4 \n\t" /* load 4 words of Kernel */ + "add $8, %%edx \n\t" /* move pointer to other 4 words */ + "punpcklbw %%mm0, %%mm1 \n\t" /* unpack first 4 bytes into words */ + "punpckhbw %%mm0, %%mm2 \n\t" /* unpack second 4 bytes into words */ + "psrlw %%mm5, %%mm1 \n\t" /* shift right each pixel NshiftRight times */ + "psrlw %%mm5, %%mm2 \n\t" /* shift right each pixel NshiftRight times */ + "pmullw %%mm3, %%mm1 \n\t" /* mult. 4 low words of Src and Kernel */ + "pmullw %%mm4, %%mm2 \n\t" /* mult. 4 high words of Src and Kernel */ + "paddsw %%mm2, %%mm1 \n\t" /* add 4 words of the high and low bytes */ + "paddsw %%mm1, %%mm7 \n\t" /* add MM1 to accumulator MM7 */ + /* --- 4 */ + "movq (%%esi), %%mm1 \n\t" /* load 8 bytes of the Src */ + "movq %%mm1, %%mm2 \n\t" /* copy MM1 into MM2 */ + "add %%eax, %%esi \n\t" /* move Src pointer 1 row below */ + "movq (%%edx), %%mm3 \n\t" /* load 4 words of Kernel */ + "add $8, %%edx \n\t" /* move pointer to other 4 words */ + "movq (%%edx), %%mm4 \n\t" /* load 4 words of Kernel */ + "add $8, %%edx \n\t" /* move pointer to other 4 words */ + "punpcklbw %%mm0, %%mm1 \n\t" /* unpack first 4 bytes into words */ + "punpckhbw %%mm0, %%mm2 \n\t" /* unpack second 4 bytes into words */ + "psrlw %%mm5, %%mm1 \n\t" /* shift right each pixel NshiftRight times */ + "psrlw %%mm5, %%mm2 \n\t" /* shift right each pixel NshiftRight times */ + "pmullw %%mm3, %%mm1 \n\t" /* mult. 4 low words of Src and Kernel */ + "pmullw %%mm4, %%mm2 \n\t" /* mult. 4 high words of Src and Kernel */ + "paddsw %%mm2, %%mm1 \n\t" /* add 4 words of the high and low bytes */ + "paddsw %%mm1, %%mm7 \n\t" /* add MM1 to accumulator MM7 */ + /* --- 5 */ + "movq (%%esi), %%mm1 \n\t" /* load 8 bytes of the Src */ + "movq %%mm1, %%mm2 \n\t" /* copy MM1 into MM2 */ + "movq (%%edx), %%mm3 \n\t" /* load 4 words of Kernel */ + "add $8, %%edx \n\t" /* move pointer to other 4 words */ + "movq (%%edx), %%mm4 \n\t" /* load 4 words of Kernel */ + "punpcklbw %%mm0, %%mm1 \n\t" /* unpack first 4 bytes into words */ + "punpckhbw %%mm0, %%mm2 \n\t" /* unpack second 4 bytes into words */ + "psrlw %%mm5, %%mm1 \n\t" /* shift right each pixel NshiftRight times */ + "psrlw %%mm5, %%mm2 \n\t" /* shift right each pixel NshiftRight times */ + "pmullw %%mm3, %%mm1 \n\t" /* mult. 4 low words of Src and Kernel */ + "pmullw %%mm4, %%mm2 \n\t" /* mult. 4 high words of Src and Kernel */ + "paddsw %%mm2, %%mm1 \n\t" /* add 4 words of the high and low bytes */ + "paddsw %%mm1, %%mm7 \n\t" /* add MM1 to accumulator MM7 */ + /* --- */ + "movq %%mm7, %%mm3 \n\t" /* copy MM7 into MM3 */ + "psrlq $32, %%mm7 \n\t" /* shift 2 left words to the right */ + "paddsw %%mm3, %%mm7 \n\t" /* add 2 left and 2 right result words */ + "movq %%mm7, %%mm2 \n\t" /* copy MM7 into MM2 */ + "psrlq $16, %%mm7 \n\t" /* shift 1 left word to the right */ + "paddsw %%mm2, %%mm7 \n\t" /* add 1 left and 1 right result words */ + "movd %%eax, %%mm1 \n\t" /* save EAX in MM1 */ + "packuswb %%mm0, %%mm7 \n\t" /* pack division result with saturation */ + "movd %%mm7, %%eax \n\t" /* copy saturated result into EAX */ + "mov %%al, (%%edi) \n\t" /* copy a byte result into Dest */ + "movd %%mm1, %%eax \n\t" /* restore saved EAX */ + /* -- */ + "movd %%mm6, %%esi \n\t" /* move Src pointer to the top pixel */ + "sub $72, %%edx \n\t" /* EDX = Kernel address */ + "inc %%esi \n\t" /* move Src pointer to the next pixel */ + "inc %%edi \n\t" /* move Dest pointer to the next pixel */ + /* --- */ + "dec %%ecx \n\t" /* decrease loop counter COLUMNS */ + "jnz .L10372 \n\t" /* check loop termination, proceed if required */ + "add $4, %%esi \n\t" /* move to the next row in Src */ + "add $4, %%edi \n\t" /* move to the next row in Dest */ + "dec %%ebx \n\t" /* decrease loop counter ROWS */ + "jnz .L10370 \n\t" /* check loop termination, proceed if required */ + /* --- */ + "emms \n\t" /* exit MMX state */ + "popa \n\t":"=m" (Dest) /* %0 */ + :"m"(Src), /* %1 */ + "m"(rows), /* %2 */ + "m"(columns), /* %3 */ + "m"(Kernel), /* %4 */ + "m"(NRightShift) /* %5 */ + ); +#endif +#endif + return (0); + } else { + /* No non-MMX implementation yet */ + return (-1); + } +} + +/*! +\brief Filter using ConvolveKernel7x7ShiftRight: Dij = saturation0and255( ... ) + +\param Src The source 2D byte array to convolve. Should be different from destination. +\param Dest The destination 2D byte array to store the result in. Should be different from source. +\param rows Number of rows in source/destination array. Must be >6. +\param columns Number of columns in source/destination array. Must be >6. +\param Kernel The 2D convolution kernel of size 7x7. +\param NRightShift The number of right bit shifts to apply to the convolution sum. Must be <7. + +Note: Non-MMX implementation not available for this function. + +\return Returns 1 if filter was applied, 0 otherwise. +*/ +int SDL_imageFilterConvolveKernel7x7ShiftRight(unsigned char *Src, unsigned char *Dest, int rows, int columns, + signed short *Kernel, unsigned char NRightShift) +{ + /* Validate input parameters */ + if ((Src == NULL) || (Dest == NULL) || (Kernel == NULL)) + return(-1); + + if ((columns < 7) || (rows < 7) || (NRightShift > 7)) + return (-1); + + if ((SDL_imageFilterMMXdetect())) { +//#ifdef USE_MMX +#if defined(USE_MMX) && defined(i386) +#if !defined(GCC__) + __asm + { + pusha + pxor mm0, mm0 /* zero MM0 */ + xor ebx, ebx /* zero EBX */ + mov bl, NRightShift /* load NRightShift into BL */ + movd mm5, ebx /* copy NRightShift into MM5 */ + mov edx, Kernel /* load Kernel address into EDX */ + mov esi, Src /* load Src address to ESI */ + mov edi, Dest /* load Dest address to EDI */ + add edi, 3 /* 3 column offset from the left edge */ + mov eax, columns /* load columns into EAX */ + add edi, eax /* 3 row offset from the top edge */ + add edi, eax + add edi, eax + mov ebx, rows /* initialize ROWS counter */ + sub ebx, 6 /* do not use first 3 and last 3 rows */ + /* ---, */ +L10380: + mov ecx, eax /* initialize COLUMNS counter */ + sub ecx, 6 /* do not use first 3 and last 3 columns */ + align 16 /* 16 byte alignment of the loop entry */ +L10382: + pxor mm7, mm7 /* zero MM7 (accumulator) */ + movd mm6, esi /* save ESI in MM6 */ + /* --- 1 */ + movq mm1, [esi] /* load 8 bytes of the Src */ + movq mm2, mm1 /* copy MM1 into MM2 */ + add esi, eax /* move Src pointer 1 row below */ + movq mm3, [edx] /* load 4 words of Kernel */ + add edx, 8 /* move pointer to other 4 words */ + movq mm4, [edx] /* load 4 words of Kernel */ + add edx, 8 /* move pointer to other 4 words */ + punpcklbw mm1, mm0 /* unpack first 4 bytes into words */ + punpckhbw mm2, mm0 /* unpack second 4 bytes into words */ + psrlw mm1, mm5 /* shift right each pixel NshiftRight times */ + psrlw mm2, mm5 /* shift right each pixel NshiftRight times */ + pmullw mm1, mm3 /* mult 4 low words of Src and Kernel */ + pmullw mm2, mm4 /* mult 4 high words of Src and Kernel */ + paddsw mm1, mm2 /* add 4 words of the high and low bytes */ + paddsw mm7, mm1 /* add MM1 to accumulator MM7 */ + /* --- 2 */ + movq mm1, [esi] /* load 8 bytes of the Src */ + movq mm2, mm1 /* copy MM1 into MM2 */ + add esi, eax /* move Src pointer 1 row below */ + movq mm3, [edx] /* load 4 words of Kernel */ + add edx, 8 /* move pointer to other 4 words */ + movq mm4, [edx] /* load 4 words of Kernel */ + add edx, 8 /* move pointer to other 4 words */ + punpcklbw mm1, mm0 /* unpack first 4 bytes into words */ + punpckhbw mm2, mm0 /* unpack second 4 bytes into words */ + psrlw mm1, mm5 /* shift right each pixel NshiftRight times */ + psrlw mm2, mm5 /* shift right each pixel NshiftRight times */ + pmullw mm1, mm3 /* mult 4 low words of Src and Kernel */ + pmullw mm2, mm4 /* mult 4 high words of Src and Kernel */ + paddsw mm1, mm2 /* add 4 words of the high and low bytes */ + paddsw mm7, mm1 /* add MM1 to accumulator MM7 */ + /* --- 3 */ + movq mm1, [esi] /* load 8 bytes of the Src */ + movq mm2, mm1 /* copy MM1 into MM2 */ + add esi, eax /* move Src pointer 1 row below */ + movq mm3, [edx] /* load 4 words of Kernel */ + add edx, 8 /* move pointer to other 4 words */ + movq mm4, [edx] /* load 4 words of Kernel */ + add edx, 8 /* move pointer to other 4 words */ + punpcklbw mm1, mm0 /* unpack first 4 bytes into words */ + punpckhbw mm2, mm0 /* unpack second 4 bytes into words */ + psrlw mm1, mm5 /* shift right each pixel NshiftRight times */ + psrlw mm2, mm5 /* shift right each pixel NshiftRight times */ + pmullw mm1, mm3 /* mult 4 low words of Src and Kernel */ + pmullw mm2, mm4 /* mult 4 high words of Src and Kernel */ + paddsw mm1, mm2 /* add 4 words of the high and low bytes */ + paddsw mm7, mm1 /* add MM1 to accumulator MM7 */ + /* --- 4 */ + movq mm1, [esi] /* load 8 bytes of the Src */ + movq mm2, mm1 /* copy MM1 into MM2 */ + add esi, eax /* move Src pointer 1 row below */ + movq mm3, [edx] /* load 4 words of Kernel */ + add edx, 8 /* move pointer to other 4 words */ + movq mm4, [edx] /* load 4 words of Kernel */ + add edx, 8 /* move pointer to other 4 words */ + punpcklbw mm1, mm0 /* unpack first 4 bytes into words */ + punpckhbw mm2, mm0 /* unpack second 4 bytes into words */ + psrlw mm1, mm5 /* shift right each pixel NshiftRight times */ + psrlw mm2, mm5 /* shift right each pixel NshiftRight times */ + pmullw mm1, mm3 /* mult 4 low words of Src and Kernel */ + pmullw mm2, mm4 /* mult 4 high words of Src and Kernel */ + paddsw mm1, mm2 /* add 4 words of the high and low bytes */ + paddsw mm7, mm1 /* add MM1 to accumulator MM7 */ + /* --- 5 */ + movq mm1, [esi] /* load 8 bytes of the Src */ + movq mm2, mm1 /* copy MM1 into MM2 */ + add esi, eax /* move Src pointer 1 row below */ + movq mm3, [edx] /* load 4 words of Kernel */ + add edx, 8 /* move pointer to other 4 words */ + movq mm4, [edx] /* load 4 words of Kernel */ + add edx, 8 /* move pointer to other 4 words */ + punpcklbw mm1, mm0 /* unpack first 4 bytes into words */ + punpckhbw mm2, mm0 /* unpack second 4 bytes into words */ + psrlw mm1, mm5 /* shift right each pixel NshiftRight times */ + psrlw mm2, mm5 /* shift right each pixel NshiftRight times */ + pmullw mm1, mm3 /* mult 4 low words of Src and Kernel */ + pmullw mm2, mm4 /* mult 4 high words of Src and Kernel */ + paddsw mm1, mm2 /* add 4 words of the high and low bytes */ + paddsw mm7, mm1 /* add MM1 to accumulator MM7 */ + /* --- 6 */ + movq mm1, [esi] /* load 8 bytes of the Src */ + movq mm2, mm1 /* copy MM1 into MM2 */ + add esi, eax /* move Src pointer 1 row below */ + movq mm3, [edx] /* load 4 words of Kernel */ + add edx, 8 /* move pointer to other 4 words */ + movq mm4, [edx] /* load 4 words of Kernel */ + add edx, 8 /* move pointer to other 4 words */ + punpcklbw mm1, mm0 /* unpack first 4 bytes into words */ + punpckhbw mm2, mm0 /* unpack second 4 bytes into words */ + psrlw mm1, mm5 /* shift right each pixel NshiftRight times */ + psrlw mm2, mm5 /* shift right each pixel NshiftRight times */ + pmullw mm1, mm3 /* mult 4 low words of Src and Kernel */ + pmullw mm2, mm4 /* mult 4 high words of Src and Kernel */ + paddsw mm1, mm2 /* add 4 words of the high and low bytes */ + paddsw mm7, mm1 /* add MM1 to accumulator MM7 */ + /* --- 7 */ + movq mm1, [esi] /* load 8 bytes of the Src */ + movq mm2, mm1 /* copy MM1 into MM2 */ + movq mm3, [edx] /* load 4 words of Kernel */ + add edx, 8 /* move pointer to other 4 words */ + movq mm4, [edx] /* load 4 words of Kernel */ + punpcklbw mm1, mm0 /* unpack first 4 bytes into words */ + punpckhbw mm2, mm0 /* unpack second 4 bytes into words */ + psrlw mm1, mm5 /* shift right each pixel NshiftRight times */ + psrlw mm2, mm5 /* shift right each pixel NshiftRight times */ + pmullw mm1, mm3 /* mult 4 low words of Src and Kernel */ + pmullw mm2, mm4 /* mult 4 high words of Src and Kernel */ + paddsw mm1, mm2 /* add 4 words of the high and low bytes */ + paddsw mm7, mm1 /* add MM1 to accumulator MM7 */ + /* ---, */ + movq mm3, mm7 /* copy MM7 into MM3 */ + psrlq mm7, 32 /* shift 2 left words to the right */ + paddsw mm7, mm3 /* add 2 left and 2 right result words */ + movq mm2, mm7 /* copy MM7 into MM2 */ + psrlq mm7, 16 /* shift 1 left word to the right */ + paddsw mm7, mm2 /* add 1 left and 1 right result words */ + movd mm1, eax /* save EAX in MM1 */ + packuswb mm7, mm0 /* pack division result with saturation */ + movd eax, mm7 /* copy saturated result into EAX */ + mov [edi], al /* copy a byte result into Dest */ + movd eax, mm1 /* restore saved EAX */ + /* --, */ + movd esi, mm6 /* move Src pointer to the top pixel */ + sub edx, 104 /* EDX = Kernel address */ + inc esi /* move Src pointer to the next pixel */ + inc edi /* move Dest pointer to the next pixel */ + /* ---, */ + dec ecx /* decrease loop counter COLUMNS */ + jnz L10382 /* check loop termination, proceed if required */ + add esi, 6 /* move to the next row in Src */ + add edi, 6 /* move to the next row in Dest */ + dec ebx /* decrease loop counter ROWS */ + jnz L10380 /* check loop termination, proceed if required */ + /* ---, */ + emms /* exit MMX state */ + popa + } +#else + asm volatile + ("pusha \n\t" "pxor %%mm0, %%mm0 \n\t" /* zero MM0 */ + "xor %%ebx, %%ebx \n\t" /* zero EBX */ + "mov %5, %%bl \n\t" /* load NRightShift into BL */ + "movd %%ebx, %%mm5 \n\t" /* copy NRightShift into MM5 */ + "mov %4, %%edx \n\t" /* load Kernel address into EDX */ + "mov %1, %%esi \n\t" /* load Src address to ESI */ + "mov %0, %%edi \n\t" /* load Dest address to EDI */ + "add $3, %%edi \n\t" /* 3 column offset from the left edge */ + "mov %3, %%eax \n\t" /* load columns into EAX */ + "add %%eax, %%edi \n\t" /* 3 row offset from the top edge */ + "add %%eax, %%edi \n\t" "add %%eax, %%edi \n\t" "mov %2, %%ebx \n\t" /* initialize ROWS counter */ + "sub $6, %%ebx \n\t" /* do not use first 3 and last 3 rows */ + /* --- */ + ".L10380: \n\t" "mov %%eax, %%ecx \n\t" /* initialize COLUMNS counter */ + "sub $6, %%ecx \n\t" /* do not use first 3 and last 3 columns */ + ".align 16 \n\t" /* 16 byte alignment of the loop entry */ + ".L10382: \n\t" "pxor %%mm7, %%mm7 \n\t" /* zero MM7 (accumulator) */ + "movd %%esi, %%mm6 \n\t" /* save ESI in MM6 */ + /* --- 1 */ + "movq (%%esi), %%mm1 \n\t" /* load 8 bytes of the Src */ + "movq %%mm1, %%mm2 \n\t" /* copy MM1 into MM2 */ + "add %%eax, %%esi \n\t" /* move Src pointer 1 row below */ + "movq (%%edx), %%mm3 \n\t" /* load 4 words of Kernel */ + "add $8, %%edx \n\t" /* move pointer to other 4 words */ + "movq (%%edx), %%mm4 \n\t" /* load 4 words of Kernel */ + "add $8, %%edx \n\t" /* move pointer to other 4 words */ + "punpcklbw %%mm0, %%mm1 \n\t" /* unpack first 4 bytes into words */ + "punpckhbw %%mm0, %%mm2 \n\t" /* unpack second 4 bytes into words */ + "psrlw %%mm5, %%mm1 \n\t" /* shift right each pixel NshiftRight times */ + "psrlw %%mm5, %%mm2 \n\t" /* shift right each pixel NshiftRight times */ + "pmullw %%mm3, %%mm1 \n\t" /* mult. 4 low words of Src and Kernel */ + "pmullw %%mm4, %%mm2 \n\t" /* mult. 4 high words of Src and Kernel */ + "paddsw %%mm2, %%mm1 \n\t" /* add 4 words of the high and low bytes */ + "paddsw %%mm1, %%mm7 \n\t" /* add MM1 to accumulator MM7 */ + /* --- 2 */ + "movq (%%esi), %%mm1 \n\t" /* load 8 bytes of the Src */ + "movq %%mm1, %%mm2 \n\t" /* copy MM1 into MM2 */ + "add %%eax, %%esi \n\t" /* move Src pointer 1 row below */ + "movq (%%edx), %%mm3 \n\t" /* load 4 words of Kernel */ + "add $8, %%edx \n\t" /* move pointer to other 4 words */ + "movq (%%edx), %%mm4 \n\t" /* load 4 words of Kernel */ + "add $8, %%edx \n\t" /* move pointer to other 4 words */ + "punpcklbw %%mm0, %%mm1 \n\t" /* unpack first 4 bytes into words */ + "punpckhbw %%mm0, %%mm2 \n\t" /* unpack second 4 bytes into words */ + "psrlw %%mm5, %%mm1 \n\t" /* shift right each pixel NshiftRight times */ + "psrlw %%mm5, %%mm2 \n\t" /* shift right each pixel NshiftRight times */ + "pmullw %%mm3, %%mm1 \n\t" /* mult. 4 low words of Src and Kernel */ + "pmullw %%mm4, %%mm2 \n\t" /* mult. 4 high words of Src and Kernel */ + "paddsw %%mm2, %%mm1 \n\t" /* add 4 words of the high and low bytes */ + "paddsw %%mm1, %%mm7 \n\t" /* add MM1 to accumulator MM7 */ + /* --- 3 */ + "movq (%%esi), %%mm1 \n\t" /* load 8 bytes of the Src */ + "movq %%mm1, %%mm2 \n\t" /* copy MM1 into MM2 */ + "add %%eax, %%esi \n\t" /* move Src pointer 1 row below */ + "movq (%%edx), %%mm3 \n\t" /* load 4 words of Kernel */ + "add $8, %%edx \n\t" /* move pointer to other 4 words */ + "movq (%%edx), %%mm4 \n\t" /* load 4 words of Kernel */ + "add $8, %%edx \n\t" /* move pointer to other 4 words */ + "punpcklbw %%mm0, %%mm1 \n\t" /* unpack first 4 bytes into words */ + "punpckhbw %%mm0, %%mm2 \n\t" /* unpack second 4 bytes into words */ + "psrlw %%mm5, %%mm1 \n\t" /* shift right each pixel NshiftRight times */ + "psrlw %%mm5, %%mm2 \n\t" /* shift right each pixel NshiftRight times */ + "pmullw %%mm3, %%mm1 \n\t" /* mult. 4 low words of Src and Kernel */ + "pmullw %%mm4, %%mm2 \n\t" /* mult. 4 high words of Src and Kernel */ + "paddsw %%mm2, %%mm1 \n\t" /* add 4 words of the high and low bytes */ + "paddsw %%mm1, %%mm7 \n\t" /* add MM1 to accumulator MM7 */ + /* --- 4 */ + "movq (%%esi), %%mm1 \n\t" /* load 8 bytes of the Src */ + "movq %%mm1, %%mm2 \n\t" /* copy MM1 into MM2 */ + "add %%eax, %%esi \n\t" /* move Src pointer 1 row below */ + "movq (%%edx), %%mm3 \n\t" /* load 4 words of Kernel */ + "add $8, %%edx \n\t" /* move pointer to other 4 words */ + "movq (%%edx), %%mm4 \n\t" /* load 4 words of Kernel */ + "add $8, %%edx \n\t" /* move pointer to other 4 words */ + "punpcklbw %%mm0, %%mm1 \n\t" /* unpack first 4 bytes into words */ + "punpckhbw %%mm0, %%mm2 \n\t" /* unpack second 4 bytes into words */ + "psrlw %%mm5, %%mm1 \n\t" /* shift right each pixel NshiftRight times */ + "psrlw %%mm5, %%mm2 \n\t" /* shift right each pixel NshiftRight times */ + "pmullw %%mm3, %%mm1 \n\t" /* mult. 4 low words of Src and Kernel */ + "pmullw %%mm4, %%mm2 \n\t" /* mult. 4 high words of Src and Kernel */ + "paddsw %%mm2, %%mm1 \n\t" /* add 4 words of the high and low bytes */ + "paddsw %%mm1, %%mm7 \n\t" /* add MM1 to accumulator MM7 */ + /* --- 5 */ + "movq (%%esi), %%mm1 \n\t" /* load 8 bytes of the Src */ + "movq %%mm1, %%mm2 \n\t" /* copy MM1 into MM2 */ + "add %%eax, %%esi \n\t" /* move Src pointer 1 row below */ + "movq (%%edx), %%mm3 \n\t" /* load 4 words of Kernel */ + "add $8, %%edx \n\t" /* move pointer to other 4 words */ + "movq (%%edx), %%mm4 \n\t" /* load 4 words of Kernel */ + "add $8, %%edx \n\t" /* move pointer to other 4 words */ + "punpcklbw %%mm0, %%mm1 \n\t" /* unpack first 4 bytes into words */ + "punpckhbw %%mm0, %%mm2 \n\t" /* unpack second 4 bytes into words */ + "psrlw %%mm5, %%mm1 \n\t" /* shift right each pixel NshiftRight times */ + "psrlw %%mm5, %%mm2 \n\t" /* shift right each pixel NshiftRight times */ + "pmullw %%mm3, %%mm1 \n\t" /* mult. 4 low words of Src and Kernel */ + "pmullw %%mm4, %%mm2 \n\t" /* mult. 4 high words of Src and Kernel */ + "paddsw %%mm2, %%mm1 \n\t" /* add 4 words of the high and low bytes */ + "paddsw %%mm1, %%mm7 \n\t" /* add MM1 to accumulator MM7 */ + /* --- 6 */ + "movq (%%esi), %%mm1 \n\t" /* load 8 bytes of the Src */ + "movq %%mm1, %%mm2 \n\t" /* copy MM1 into MM2 */ + "add %%eax, %%esi \n\t" /* move Src pointer 1 row below */ + "movq (%%edx), %%mm3 \n\t" /* load 4 words of Kernel */ + "add $8, %%edx \n\t" /* move pointer to other 4 words */ + "movq (%%edx), %%mm4 \n\t" /* load 4 words of Kernel */ + "add $8, %%edx \n\t" /* move pointer to other 4 words */ + "punpcklbw %%mm0, %%mm1 \n\t" /* unpack first 4 bytes into words */ + "punpckhbw %%mm0, %%mm2 \n\t" /* unpack second 4 bytes into words */ + "psrlw %%mm5, %%mm1 \n\t" /* shift right each pixel NshiftRight times */ + "psrlw %%mm5, %%mm2 \n\t" /* shift right each pixel NshiftRight times */ + "pmullw %%mm3, %%mm1 \n\t" /* mult. 4 low words of Src and Kernel */ + "pmullw %%mm4, %%mm2 \n\t" /* mult. 4 high words of Src and Kernel */ + "paddsw %%mm2, %%mm1 \n\t" /* add 4 words of the high and low bytes */ + "paddsw %%mm1, %%mm7 \n\t" /* add MM1 to accumulator MM7 */ + /* --- 7 */ + "movq (%%esi), %%mm1 \n\t" /* load 8 bytes of the Src */ + "movq %%mm1, %%mm2 \n\t" /* copy MM1 into MM2 */ + "movq (%%edx), %%mm3 \n\t" /* load 4 words of Kernel */ + "add $8, %%edx \n\t" /* move pointer to other 4 words */ + "movq (%%edx), %%mm4 \n\t" /* load 4 words of Kernel */ + "punpcklbw %%mm0, %%mm1 \n\t" /* unpack first 4 bytes into words */ + "punpckhbw %%mm0, %%mm2 \n\t" /* unpack second 4 bytes into words */ + "psrlw %%mm5, %%mm1 \n\t" /* shift right each pixel NshiftRight times */ + "psrlw %%mm5, %%mm2 \n\t" /* shift right each pixel NshiftRight times */ + "pmullw %%mm3, %%mm1 \n\t" /* mult. 4 low words of Src and Kernel */ + "pmullw %%mm4, %%mm2 \n\t" /* mult. 4 high words of Src and Kernel */ + "paddsw %%mm2, %%mm1 \n\t" /* add 4 words of the high and low bytes */ + "paddsw %%mm1, %%mm7 \n\t" /* add MM1 to accumulator MM7 */ + /* --- */ + "movq %%mm7, %%mm3 \n\t" /* copy MM7 into MM3 */ + "psrlq $32, %%mm7 \n\t" /* shift 2 left words to the right */ + "paddsw %%mm3, %%mm7 \n\t" /* add 2 left and 2 right result words */ + "movq %%mm7, %%mm2 \n\t" /* copy MM7 into MM2 */ + "psrlq $16, %%mm7 \n\t" /* shift 1 left word to the right */ + "paddsw %%mm2, %%mm7 \n\t" /* add 1 left and 1 right result words */ + "movd %%eax, %%mm1 \n\t" /* save EAX in MM1 */ + "packuswb %%mm0, %%mm7 \n\t" /* pack division result with saturation */ + "movd %%mm7, %%eax \n\t" /* copy saturated result into EAX */ + "mov %%al, (%%edi) \n\t" /* copy a byte result into Dest */ + "movd %%mm1, %%eax \n\t" /* restore saved EAX */ + /* -- */ + "movd %%mm6, %%esi \n\t" /* move Src pointer to the top pixel */ + "sub $104, %%edx \n\t" /* EDX = Kernel address */ + "inc %%esi \n\t" /* move Src pointer to the next pixel */ + "inc %%edi \n\t" /* move Dest pointer to the next pixel */ + /* --- */ + "dec %%ecx \n\t" /* decrease loop counter COLUMNS */ + "jnz .L10382 \n\t" /* check loop termination, proceed if required */ + "add $6, %%esi \n\t" /* move to the next row in Src */ + "add $6, %%edi \n\t" /* move to the next row in Dest */ + "dec %%ebx \n\t" /* decrease loop counter ROWS */ + "jnz .L10380 \n\t" /* check loop termination, proceed if required */ + /* --- */ + "emms \n\t" /* exit MMX state */ + "popa \n\t":"=m" (Dest) /* %0 */ + :"m"(Src), /* %1 */ + "m"(rows), /* %2 */ + "m"(columns), /* %3 */ + "m"(Kernel), /* %4 */ + "m"(NRightShift) /* %5 */ + ); +#endif +#endif + return (0); + } else { + /* No non-MMX implementation yet */ + return (-1); + } +} + +/*! +\brief Filter using ConvolveKernel9x9ShiftRight: Dij = saturation255( ... ) + +\param Src The source 2D byte array to convolve. Should be different from destination. +\param Dest The destination 2D byte array to store the result in. Should be different from source. +\param rows Number of rows in source/destination array. Must be >8. +\param columns Number of columns in source/destination array. Must be >8. +\param Kernel The 2D convolution kernel of size 9x9. +\param NRightShift The number of right bit shifts to apply to the convolution sum. Must be <7. + +Note: Non-MMX implementation not available for this function. + +\return Returns 1 if filter was applied, 0 otherwise. +*/ +int SDL_imageFilterConvolveKernel9x9ShiftRight(unsigned char *Src, unsigned char *Dest, int rows, int columns, + signed short *Kernel, unsigned char NRightShift) +{ + /* Validate input parameters */ + if ((Src == NULL) || (Dest == NULL) || (Kernel == NULL)) + return(-1); + + if ((columns < 9) || (rows < 9) || (NRightShift > 7)) + return (-1); + + if ((SDL_imageFilterMMXdetect())) { +//#ifdef USE_MMX +#if defined(USE_MMX) && defined(i386) +#if !defined(GCC__) + __asm + { + pusha + pxor mm0, mm0 /* zero MM0 */ + xor ebx, ebx /* zero EBX */ + mov bl, NRightShift /* load NRightShift into BL */ + movd mm5, ebx /* copy NRightShift into MM5 */ + mov edx, Kernel /* load Kernel address into EDX */ + mov esi, Src /* load Src address to ESI */ + mov edi, Dest /* load Dest address to EDI */ + add edi, 4 /* 4 column offset from the left edge */ + mov eax, columns /* load columns into EAX */ + add edi, eax /* 4 row offset from the top edge */ + add edi, eax + add edi, eax + add edi, eax + mov ebx, rows /* initialize ROWS counter */ + sub ebx, 8 /* do not use first 4 and last 4 rows */ + /* ---, */ +L10390: + mov ecx, eax /* initialize COLUMNS counter */ + sub ecx, 8 /* do not use first 4 and last 4 columns */ + align 16 /* 16 byte alignment of the loop entry */ +L10392: + pxor mm7, mm7 /* zero MM7 (accumulator) */ + movd mm6, esi /* save ESI in MM6 */ + /* --- 1 */ + movq mm1, [esi] /* load 8 bytes of the Src */ + movq mm2, mm1 /* copy MM1 into MM2 */ + inc esi /* move pointer to the next 8 bytes of Src */ + movq mm3, [edx] /* load 4 words of Kernel */ + add edx, 8 /* move pointer to other 4 words */ + movq mm4, [edx] /* load 4 words of Kernel */ + add edx, 8 /* move pointer to other 4 words */ + punpcklbw mm1, mm0 /* unpack first 4 bytes into words */ + punpckhbw mm2, mm0 /* unpack second 4 bytes into words */ + psrlw mm1, mm5 /* shift right each pixel NshiftRight times */ + psrlw mm2, mm5 /* shift right each pixel NshiftRight times */ + pmullw mm1, mm3 /* mult 4 low words of Src and Kernel */ + pmullw mm2, mm4 /* mult 4 high words of Src and Kernel */ + paddsw mm1, mm2 /* add 4 words of the high and low bytes */ + paddsw mm7, mm1 /* add MM1 to accumulator MM7 */ + movq mm1, [esi] /* load 8 bytes of the Src */ + dec esi + add esi, eax /* move Src pointer 1 row below */ + movq mm3, [edx] /* load 4 words of Kernel */ + add edx, 8 /* move pointer to other 4 words */ + punpcklbw mm1, mm0 /* unpack first 4 bytes into words */ + psrlw mm1, mm5 /* shift right each pixel NshiftRight times */ + pmullw mm1, mm3 /* mult 4 low words of Src and Kernel */ + paddsw mm7, mm1 /* add MM1 to accumulator MM7 */ + /* --- 2 */ + movq mm1, [esi] /* load 8 bytes of the Src */ + movq mm2, mm1 /* copy MM1 into MM2 */ + inc esi /* move pointer to the next 8 bytes of Src */ + movq mm3, [edx] /* load 4 words of Kernel */ + add edx, 8 /* move pointer to other 4 words */ + movq mm4, [edx] /* load 4 words of Kernel */ + add edx, 8 /* move pointer to other 4 words */ + punpcklbw mm1, mm0 /* unpack first 4 bytes into words */ + punpckhbw mm2, mm0 /* unpack second 4 bytes into words */ + psrlw mm1, mm5 /* shift right each pixel NshiftRight times */ + psrlw mm2, mm5 /* shift right each pixel NshiftRight times */ + pmullw mm1, mm3 /* mult 4 low words of Src and Kernel */ + pmullw mm2, mm4 /* mult 4 high words of Src and Kernel */ + paddsw mm1, mm2 /* add 4 words of the high and low bytes */ + paddsw mm7, mm1 /* add MM1 to accumulator MM7 */ + movq mm1, [esi] /* load 8 bytes of the Src */ + dec esi + add esi, eax /* move Src pointer 1 row below */ + movq mm3, [edx] /* load 4 words of Kernel */ + add edx, 8 /* move pointer to other 4 words */ + punpcklbw mm1, mm0 /* unpack first 4 bytes into words */ + psrlw mm1, mm5 /* shift right each pixel NshiftRight times */ + pmullw mm1, mm3 /* mult 4 low words of Src and Kernel */ + paddsw mm7, mm1 /* add MM1 to accumulator MM7 */ + /* --- 3 */ + movq mm1, [esi] /* load 8 bytes of the Src */ + movq mm2, mm1 /* copy MM1 into MM2 */ + inc esi /* move pointer to the next 8 bytes of Src */ + movq mm3, [edx] /* load 4 words of Kernel */ + add edx, 8 /* move pointer to other 4 words */ + movq mm4, [edx] /* load 4 words of Kernel */ + add edx, 8 /* move pointer to other 4 words */ + punpcklbw mm1, mm0 /* unpack first 4 bytes into words */ + punpckhbw mm2, mm0 /* unpack second 4 bytes into words */ + psrlw mm1, mm5 /* shift right each pixel NshiftRight times */ + psrlw mm2, mm5 /* shift right each pixel NshiftRight times */ + pmullw mm1, mm3 /* mult 4 low words of Src and Kernel */ + pmullw mm2, mm4 /* mult 4 high words of Src and Kernel */ + paddsw mm1, mm2 /* add 4 words of the high and low bytes */ + paddsw mm7, mm1 /* add MM1 to accumulator MM7 */ + movq mm1, [esi] /* load 8 bytes of the Src */ + dec esi + add esi, eax /* move Src pointer 1 row below */ + movq mm3, [edx] /* load 4 words of Kernel */ + add edx, 8 /* move pointer to other 4 words */ + punpcklbw mm1, mm0 /* unpack first 4 bytes into words */ + psrlw mm1, mm5 /* shift right each pixel NshiftRight times */ + pmullw mm1, mm3 /* mult 4 low words of Src and Kernel */ + paddsw mm7, mm1 /* add MM1 to accumulator MM7 */ + /* --- 4 */ + movq mm1, [esi] /* load 8 bytes of the Src */ + movq mm2, mm1 /* copy MM1 into MM2 */ + inc esi /* move pointer to the next 8 bytes of Src */ + movq mm3, [edx] /* load 4 words of Kernel */ + add edx, 8 /* move pointer to other 4 words */ + movq mm4, [edx] /* load 4 words of Kernel */ + add edx, 8 /* move pointer to other 4 words */ + punpcklbw mm1, mm0 /* unpack first 4 bytes into words */ + punpckhbw mm2, mm0 /* unpack second 4 bytes into words */ + psrlw mm1, mm5 /* shift right each pixel NshiftRight times */ + psrlw mm2, mm5 /* shift right each pixel NshiftRight times */ + pmullw mm1, mm3 /* mult 4 low words of Src and Kernel */ + pmullw mm2, mm4 /* mult 4 high words of Src and Kernel */ + paddsw mm1, mm2 /* add 4 words of the high and low bytes */ + paddsw mm7, mm1 /* add MM1 to accumulator MM7 */ + movq mm1, [esi] /* load 8 bytes of the Src */ + dec esi + add esi, eax /* move Src pointer 1 row below */ + movq mm3, [edx] /* load 4 words of Kernel */ + add edx, 8 /* move pointer to other 4 words */ + punpcklbw mm1, mm0 /* unpack first 4 bytes into words */ + psrlw mm1, mm5 /* shift right each pixel NshiftRight times */ + pmullw mm1, mm3 /* mult 4 low words of Src and Kernel */ + paddsw mm7, mm1 /* add MM1 to accumulator MM7 */ + /* --- 5 */ + movq mm1, [esi] /* load 8 bytes of the Src */ + movq mm2, mm1 /* copy MM1 into MM2 */ + inc esi /* move pointer to the next 8 bytes of Src */ + movq mm3, [edx] /* load 4 words of Kernel */ + add edx, 8 /* move pointer to other 4 words */ + movq mm4, [edx] /* load 4 words of Kernel */ + add edx, 8 /* move pointer to other 4 words */ + punpcklbw mm1, mm0 /* unpack first 4 bytes into words */ + punpckhbw mm2, mm0 /* unpack second 4 bytes into words */ + psrlw mm1, mm5 /* shift right each pixel NshiftRight times */ + psrlw mm2, mm5 /* shift right each pixel NshiftRight times */ + pmullw mm1, mm3 /* mult 4 low words of Src and Kernel */ + pmullw mm2, mm4 /* mult 4 high words of Src and Kernel */ + paddsw mm1, mm2 /* add 4 words of the high and low bytes */ + paddsw mm7, mm1 /* add MM1 to accumulator MM7 */ + movq mm1, [esi] /* load 8 bytes of the Src */ + dec esi + add esi, eax /* move Src pointer 1 row below */ + movq mm3, [edx] /* load 4 words of Kernel */ + add edx, 8 /* move pointer to other 4 words */ + punpcklbw mm1, mm0 /* unpack first 4 bytes into words */ + psrlw mm1, mm5 /* shift right each pixel NshiftRight times */ + pmullw mm1, mm3 /* mult 4 low words of Src and Kernel */ + paddsw mm7, mm1 /* add MM1 to accumulator MM7 */ + /* --- 6 */ + movq mm1, [esi] /* load 8 bytes of the Src */ + movq mm2, mm1 /* copy MM1 into MM2 */ + inc esi /* move pointer to the next 8 bytes of Src */ + movq mm3, [edx] /* load 4 words of Kernel */ + add edx, 8 /* move pointer to other 4 words */ + movq mm4, [edx] /* load 4 words of Kernel */ + add edx, 8 /* move pointer to other 4 words */ + punpcklbw mm1, mm0 /* unpack first 4 bytes into words */ + punpckhbw mm2, mm0 /* unpack second 4 bytes into words */ + psrlw mm1, mm5 /* shift right each pixel NshiftRight times */ + psrlw mm2, mm5 /* shift right each pixel NshiftRight times */ + pmullw mm1, mm3 /* mult 4 low words of Src and Kernel */ + pmullw mm2, mm4 /* mult 4 high words of Src and Kernel */ + paddsw mm1, mm2 /* add 4 words of the high and low bytes */ + paddsw mm7, mm1 /* add MM1 to accumulator MM7 */ + movq mm1, [esi] /* load 8 bytes of the Src */ + dec esi + add esi, eax /* move Src pointer 1 row below */ + movq mm3, [edx] /* load 4 words of Kernel */ + add edx, 8 /* move pointer to other 4 words */ + punpcklbw mm1, mm0 /* unpack first 4 bytes into words */ + psrlw mm1, mm5 /* shift right each pixel NshiftRight times */ + pmullw mm1, mm3 /* mult 4 low words of Src and Kernel */ + paddsw mm7, mm1 /* add MM1 to accumulator MM7 */ + /* --- 7 */ + movq mm1, [esi] /* load 8 bytes of the Src */ + movq mm2, mm1 /* copy MM1 into MM2 */ + inc esi /* move pointer to the next 8 bytes of Src */ + movq mm3, [edx] /* load 4 words of Kernel */ + add edx, 8 /* move pointer to other 4 words */ + movq mm4, [edx] /* load 4 words of Kernel */ + add edx, 8 /* move pointer to other 4 words */ + punpcklbw mm1, mm0 /* unpack first 4 bytes into words */ + punpckhbw mm2, mm0 /* unpack second 4 bytes into words */ + psrlw mm1, mm5 /* shift right each pixel NshiftRight times */ + psrlw mm2, mm5 /* shift right each pixel NshiftRight times */ + pmullw mm1, mm3 /* mult 4 low words of Src and Kernel */ + pmullw mm2, mm4 /* mult 4 high words of Src and Kernel */ + paddsw mm1, mm2 /* add 4 words of the high and low bytes */ + paddsw mm7, mm1 /* add MM1 to accumulator MM7 */ + movq mm1, [esi] /* load 8 bytes of the Src */ + dec esi + add esi, eax /* move Src pointer 1 row below */ + movq mm3, [edx] /* load 4 words of Kernel */ + add edx, 8 /* move pointer to other 4 words */ + punpcklbw mm1, mm0 /* unpack first 4 bytes into words */ + psrlw mm1, mm5 /* shift right each pixel NshiftRight times */ + pmullw mm1, mm3 /* mult 4 low words of Src and Kernel */ + paddsw mm7, mm1 /* add MM1 to accumulator MM7 */ + /* --- 8 */ + movq mm1, [esi] /* load 8 bytes of the Src */ + movq mm2, mm1 /* copy MM1 into MM2 */ + inc esi /* move pointer to the next 8 bytes of Src */ + movq mm3, [edx] /* load 4 words of Kernel */ + add edx, 8 /* move pointer to other 4 words */ + movq mm4, [edx] /* load 4 words of Kernel */ + add edx, 8 /* move pointer to other 4 words */ + punpcklbw mm1, mm0 /* unpack first 4 bytes into words */ + punpckhbw mm2, mm0 /* unpack second 4 bytes into words */ + psrlw mm1, mm5 /* shift right each pixel NshiftRight times */ + psrlw mm2, mm5 /* shift right each pixel NshiftRight times */ + pmullw mm1, mm3 /* mult 4 low words of Src and Kernel */ + pmullw mm2, mm4 /* mult 4 high words of Src and Kernel */ + paddsw mm1, mm2 /* add 4 words of the high and low bytes */ + paddsw mm7, mm1 /* add MM1 to accumulator MM7 */ + movq mm1, [esi] /* load 8 bytes of the Src */ + dec esi + add esi, eax /* move Src pointer 1 row below */ + movq mm3, [edx] /* load 4 words of Kernel */ + add edx, 8 /* move pointer to other 4 words */ + punpcklbw mm1, mm0 /* unpack first 4 bytes into words */ + psrlw mm1, mm5 /* shift right each pixel NshiftRight times */ + pmullw mm1, mm3 /* mult 4 low words of Src and Kernel */ + paddsw mm7, mm1 /* add MM1 to accumulator MM7 */ + /* --- 9 */ + movq mm1, [esi] /* load 8 bytes of the Src */ + movq mm2, mm1 /* copy MM1 into MM2 */ + inc esi /* move pointer to the next 8 bytes of Src */ + movq mm3, [edx] /* load 4 words of Kernel */ + add edx, 8 /* move pointer to other 4 words */ + movq mm4, [edx] /* load 4 words of Kernel */ + add edx, 8 /* move pointer to other 4 words */ + punpcklbw mm1, mm0 /* unpack first 4 bytes into words */ + punpckhbw mm2, mm0 /* unpack second 4 bytes into words */ + psrlw mm1, mm5 /* shift right each pixel NshiftRight times */ + psrlw mm2, mm5 /* shift right each pixel NshiftRight times */ + pmullw mm1, mm3 /* mult 4 low words of Src and Kernel */ + pmullw mm2, mm4 /* mult 4 high words of Src and Kernel */ + paddsw mm1, mm2 /* add 4 words of the high and low bytes */ + paddsw mm7, mm1 /* add MM1 to accumulator MM7 */ + movq mm1, [esi] /* load 8 bytes of the Src */ + movq mm3, [edx] /* load 4 words of Kernel */ + punpcklbw mm1, mm0 /* unpack first 4 bytes into words */ + psrlw mm1, mm5 /* shift right each pixel NshiftRight times */ + pmullw mm1, mm3 /* mult 4 low words of Src and Kernel */ + paddsw mm7, mm1 /* add MM1 to accumulator MM7 */ + /* ---, */ + movq mm3, mm7 /* copy MM7 into MM3 */ + psrlq mm7, 32 /* shift 2 left words to the right */ + paddsw mm7, mm3 /* add 2 left and 2 right result words */ + movq mm2, mm7 /* copy MM7 into MM2 */ + psrlq mm7, 16 /* shift 1 left word to the right */ + paddsw mm7, mm2 /* add 1 left and 1 right result words */ + movd mm1, eax /* save EAX in MM1 */ + packuswb mm7, mm0 /* pack division result with saturation */ + movd eax, mm7 /* copy saturated result into EAX */ + mov [edi], al /* copy a byte result into Dest */ + movd eax, mm1 /* restore saved EAX */ + /* --, */ + movd esi, mm6 /* move Src pointer to the top pixel */ + sub edx, 208 /* EDX = Kernel address */ + inc esi /* move Src pointer to the next pixel */ + inc edi /* move Dest pointer to the next pixel */ + /* ---, */ + dec ecx /* decrease loop counter COLUMNS */ + jnz L10392 /* check loop termination, proceed if required */ + add esi, 8 /* move to the next row in Src */ + add edi, 8 /* move to the next row in Dest */ + dec ebx /* decrease loop counter ROWS */ + jnz L10390 /* check loop termination, proceed if required */ + /* ---, */ + emms /* exit MMX state */ + popa + } +#else + asm volatile + ("pusha \n\t" "pxor %%mm0, %%mm0 \n\t" /* zero MM0 */ + "xor %%ebx, %%ebx \n\t" /* zero EBX */ + "mov %5, %%bl \n\t" /* load NRightShift into BL */ + "movd %%ebx, %%mm5 \n\t" /* copy NRightShift into MM5 */ + "mov %4, %%edx \n\t" /* load Kernel address into EDX */ + "mov %1, %%esi \n\t" /* load Src address to ESI */ + "mov %0, %%edi \n\t" /* load Dest address to EDI */ + "add $4, %%edi \n\t" /* 4 column offset from the left edge */ + "mov %3, %%eax \n\t" /* load columns into EAX */ + "add %%eax, %%edi \n\t" /* 4 row offset from the top edge */ + "add %%eax, %%edi \n\t" "add %%eax, %%edi \n\t" "add %%eax, %%edi \n\t" "mov %2, %%ebx \n\t" /* initialize ROWS counter */ + "sub $8, %%ebx \n\t" /* do not use first 4 and last 4 rows */ + /* --- */ + ".L10390: \n\t" "mov %%eax, %%ecx \n\t" /* initialize COLUMNS counter */ + "sub $8, %%ecx \n\t" /* do not use first 4 and last 4 columns */ + ".align 16 \n\t" /* 16 byte alignment of the loop entry */ + ".L10392: \n\t" "pxor %%mm7, %%mm7 \n\t" /* zero MM7 (accumulator) */ + "movd %%esi, %%mm6 \n\t" /* save ESI in MM6 */ + /* --- 1 */ + "movq (%%esi), %%mm1 \n\t" /* load 8 bytes of the Src */ + "movq %%mm1, %%mm2 \n\t" /* copy MM1 into MM2 */ + "inc %%esi \n\t" /* move pointer to the next 8 bytes of Src */ + "movq (%%edx), %%mm3 \n\t" /* load 4 words of Kernel */ + "add $8, %%edx \n\t" /* move pointer to other 4 words */ + "movq (%%edx), %%mm4 \n\t" /* load 4 words of Kernel */ + "add $8, %%edx \n\t" /* move pointer to other 4 words */ + "punpcklbw %%mm0, %%mm1 \n\t" /* unpack first 4 bytes into words */ + "punpckhbw %%mm0, %%mm2 \n\t" /* unpack second 4 bytes into words */ + "psrlw %%mm5, %%mm1 \n\t" /* shift right each pixel NshiftRight times */ + "psrlw %%mm5, %%mm2 \n\t" /* shift right each pixel NshiftRight times */ + "pmullw %%mm3, %%mm1 \n\t" /* mult. 4 low words of Src and Kernel */ + "pmullw %%mm4, %%mm2 \n\t" /* mult. 4 high words of Src and Kernel */ + "paddsw %%mm2, %%mm1 \n\t" /* add 4 words of the high and low bytes */ + "paddsw %%mm1, %%mm7 \n\t" /* add MM1 to accumulator MM7 */ + "movq (%%esi), %%mm1 \n\t" /* load 8 bytes of the Src */ + "dec %%esi \n\t" "add %%eax, %%esi \n\t" /* move Src pointer 1 row below */ + "movq (%%edx), %%mm3 \n\t" /* load 4 words of Kernel */ + "add $8, %%edx \n\t" /* move pointer to other 4 words */ + "punpcklbw %%mm0, %%mm1 \n\t" /* unpack first 4 bytes into words */ + "psrlw %%mm5, %%mm1 \n\t" /* shift right each pixel NshiftRight times */ + "pmullw %%mm3, %%mm1 \n\t" /* mult. 4 low words of Src and Kernel */ + "paddsw %%mm1, %%mm7 \n\t" /* add MM1 to accumulator MM7 */ + /* --- 2 */ + "movq (%%esi), %%mm1 \n\t" /* load 8 bytes of the Src */ + "movq %%mm1, %%mm2 \n\t" /* copy MM1 into MM2 */ + "inc %%esi \n\t" /* move pointer to the next 8 bytes of Src */ + "movq (%%edx), %%mm3 \n\t" /* load 4 words of Kernel */ + "add $8, %%edx \n\t" /* move pointer to other 4 words */ + "movq (%%edx), %%mm4 \n\t" /* load 4 words of Kernel */ + "add $8, %%edx \n\t" /* move pointer to other 4 words */ + "punpcklbw %%mm0, %%mm1 \n\t" /* unpack first 4 bytes into words */ + "punpckhbw %%mm0, %%mm2 \n\t" /* unpack second 4 bytes into words */ + "psrlw %%mm5, %%mm1 \n\t" /* shift right each pixel NshiftRight times */ + "psrlw %%mm5, %%mm2 \n\t" /* shift right each pixel NshiftRight times */ + "pmullw %%mm3, %%mm1 \n\t" /* mult. 4 low words of Src and Kernel */ + "pmullw %%mm4, %%mm2 \n\t" /* mult. 4 high words of Src and Kernel */ + "paddsw %%mm2, %%mm1 \n\t" /* add 4 words of the high and low bytes */ + "paddsw %%mm1, %%mm7 \n\t" /* add MM1 to accumulator MM7 */ + "movq (%%esi), %%mm1 \n\t" /* load 8 bytes of the Src */ + "dec %%esi \n\t" "add %%eax, %%esi \n\t" /* move Src pointer 1 row below */ + "movq (%%edx), %%mm3 \n\t" /* load 4 words of Kernel */ + "add $8, %%edx \n\t" /* move pointer to other 4 words */ + "punpcklbw %%mm0, %%mm1 \n\t" /* unpack first 4 bytes into words */ + "psrlw %%mm5, %%mm1 \n\t" /* shift right each pixel NshiftRight times */ + "pmullw %%mm3, %%mm1 \n\t" /* mult. 4 low words of Src and Kernel */ + "paddsw %%mm1, %%mm7 \n\t" /* add MM1 to accumulator MM7 */ + /* --- 3 */ + "movq (%%esi), %%mm1 \n\t" /* load 8 bytes of the Src */ + "movq %%mm1, %%mm2 \n\t" /* copy MM1 into MM2 */ + "inc %%esi \n\t" /* move pointer to the next 8 bytes of Src */ + "movq (%%edx), %%mm3 \n\t" /* load 4 words of Kernel */ + "add $8, %%edx \n\t" /* move pointer to other 4 words */ + "movq (%%edx), %%mm4 \n\t" /* load 4 words of Kernel */ + "add $8, %%edx \n\t" /* move pointer to other 4 words */ + "punpcklbw %%mm0, %%mm1 \n\t" /* unpack first 4 bytes into words */ + "punpckhbw %%mm0, %%mm2 \n\t" /* unpack second 4 bytes into words */ + "psrlw %%mm5, %%mm1 \n\t" /* shift right each pixel NshiftRight times */ + "psrlw %%mm5, %%mm2 \n\t" /* shift right each pixel NshiftRight times */ + "pmullw %%mm3, %%mm1 \n\t" /* mult. 4 low words of Src and Kernel */ + "pmullw %%mm4, %%mm2 \n\t" /* mult. 4 high words of Src and Kernel */ + "paddsw %%mm2, %%mm1 \n\t" /* add 4 words of the high and low bytes */ + "paddsw %%mm1, %%mm7 \n\t" /* add MM1 to accumulator MM7 */ + "movq (%%esi), %%mm1 \n\t" /* load 8 bytes of the Src */ + "dec %%esi \n\t" "add %%eax, %%esi \n\t" /* move Src pointer 1 row below */ + "movq (%%edx), %%mm3 \n\t" /* load 4 words of Kernel */ + "add $8, %%edx \n\t" /* move pointer to other 4 words */ + "punpcklbw %%mm0, %%mm1 \n\t" /* unpack first 4 bytes into words */ + "psrlw %%mm5, %%mm1 \n\t" /* shift right each pixel NshiftRight times */ + "pmullw %%mm3, %%mm1 \n\t" /* mult. 4 low words of Src and Kernel */ + "paddsw %%mm1, %%mm7 \n\t" /* add MM1 to accumulator MM7 */ + /* --- 4 */ + "movq (%%esi), %%mm1 \n\t" /* load 8 bytes of the Src */ + "movq %%mm1, %%mm2 \n\t" /* copy MM1 into MM2 */ + "inc %%esi \n\t" /* move pointer to the next 8 bytes of Src */ + "movq (%%edx), %%mm3 \n\t" /* load 4 words of Kernel */ + "add $8, %%edx \n\t" /* move pointer to other 4 words */ + "movq (%%edx), %%mm4 \n\t" /* load 4 words of Kernel */ + "add $8, %%edx \n\t" /* move pointer to other 4 words */ + "punpcklbw %%mm0, %%mm1 \n\t" /* unpack first 4 bytes into words */ + "punpckhbw %%mm0, %%mm2 \n\t" /* unpack second 4 bytes into words */ + "psrlw %%mm5, %%mm1 \n\t" /* shift right each pixel NshiftRight times */ + "psrlw %%mm5, %%mm2 \n\t" /* shift right each pixel NshiftRight times */ + "pmullw %%mm3, %%mm1 \n\t" /* mult. 4 low words of Src and Kernel */ + "pmullw %%mm4, %%mm2 \n\t" /* mult. 4 high words of Src and Kernel */ + "paddsw %%mm2, %%mm1 \n\t" /* add 4 words of the high and low bytes */ + "paddsw %%mm1, %%mm7 \n\t" /* add MM1 to accumulator MM7 */ + "movq (%%esi), %%mm1 \n\t" /* load 8 bytes of the Src */ + "dec %%esi \n\t" "add %%eax, %%esi \n\t" /* move Src pointer 1 row below */ + "movq (%%edx), %%mm3 \n\t" /* load 4 words of Kernel */ + "add $8, %%edx \n\t" /* move pointer to other 4 words */ + "punpcklbw %%mm0, %%mm1 \n\t" /* unpack first 4 bytes into words */ + "psrlw %%mm5, %%mm1 \n\t" /* shift right each pixel NshiftRight times */ + "pmullw %%mm3, %%mm1 \n\t" /* mult. 4 low words of Src and Kernel */ + "paddsw %%mm1, %%mm7 \n\t" /* add MM1 to accumulator MM7 */ + /* --- 5 */ + "movq (%%esi), %%mm1 \n\t" /* load 8 bytes of the Src */ + "movq %%mm1, %%mm2 \n\t" /* copy MM1 into MM2 */ + "inc %%esi \n\t" /* move pointer to the next 8 bytes of Src */ + "movq (%%edx), %%mm3 \n\t" /* load 4 words of Kernel */ + "add $8, %%edx \n\t" /* move pointer to other 4 words */ + "movq (%%edx), %%mm4 \n\t" /* load 4 words of Kernel */ + "add $8, %%edx \n\t" /* move pointer to other 4 words */ + "punpcklbw %%mm0, %%mm1 \n\t" /* unpack first 4 bytes into words */ + "punpckhbw %%mm0, %%mm2 \n\t" /* unpack second 4 bytes into words */ + "psrlw %%mm5, %%mm1 \n\t" /* shift right each pixel NshiftRight times */ + "psrlw %%mm5, %%mm2 \n\t" /* shift right each pixel NshiftRight times */ + "pmullw %%mm3, %%mm1 \n\t" /* mult. 4 low words of Src and Kernel */ + "pmullw %%mm4, %%mm2 \n\t" /* mult. 4 high words of Src and Kernel */ + "paddsw %%mm2, %%mm1 \n\t" /* add 4 words of the high and low bytes */ + "paddsw %%mm1, %%mm7 \n\t" /* add MM1 to accumulator MM7 */ + "movq (%%esi), %%mm1 \n\t" /* load 8 bytes of the Src */ + "dec %%esi \n\t" "add %%eax, %%esi \n\t" /* move Src pointer 1 row below */ + "movq (%%edx), %%mm3 \n\t" /* load 4 words of Kernel */ + "add $8, %%edx \n\t" /* move pointer to other 4 words */ + "punpcklbw %%mm0, %%mm1 \n\t" /* unpack first 4 bytes into words */ + "psrlw %%mm5, %%mm1 \n\t" /* shift right each pixel NshiftRight times */ + "pmullw %%mm3, %%mm1 \n\t" /* mult. 4 low words of Src and Kernel */ + "paddsw %%mm1, %%mm7 \n\t" /* add MM1 to accumulator MM7 */ + /* --- 6 */ + "movq (%%esi), %%mm1 \n\t" /* load 8 bytes of the Src */ + "movq %%mm1, %%mm2 \n\t" /* copy MM1 into MM2 */ + "inc %%esi \n\t" /* move pointer to the next 8 bytes of Src */ + "movq (%%edx), %%mm3 \n\t" /* load 4 words of Kernel */ + "add $8, %%edx \n\t" /* move pointer to other 4 words */ + "movq (%%edx), %%mm4 \n\t" /* load 4 words of Kernel */ + "add $8, %%edx \n\t" /* move pointer to other 4 words */ + "punpcklbw %%mm0, %%mm1 \n\t" /* unpack first 4 bytes into words */ + "punpckhbw %%mm0, %%mm2 \n\t" /* unpack second 4 bytes into words */ + "psrlw %%mm5, %%mm1 \n\t" /* shift right each pixel NshiftRight times */ + "psrlw %%mm5, %%mm2 \n\t" /* shift right each pixel NshiftRight times */ + "pmullw %%mm3, %%mm1 \n\t" /* mult. 4 low words of Src and Kernel */ + "pmullw %%mm4, %%mm2 \n\t" /* mult. 4 high words of Src and Kernel */ + "paddsw %%mm2, %%mm1 \n\t" /* add 4 words of the high and low bytes */ + "paddsw %%mm1, %%mm7 \n\t" /* add MM1 to accumulator MM7 */ + "movq (%%esi), %%mm1 \n\t" /* load 8 bytes of the Src */ + "dec %%esi \n\t" "add %%eax, %%esi \n\t" /* move Src pointer 1 row below */ + "movq (%%edx), %%mm3 \n\t" /* load 4 words of Kernel */ + "add $8, %%edx \n\t" /* move pointer to other 4 words */ + "punpcklbw %%mm0, %%mm1 \n\t" /* unpack first 4 bytes into words */ + "psrlw %%mm5, %%mm1 \n\t" /* shift right each pixel NshiftRight times */ + "pmullw %%mm3, %%mm1 \n\t" /* mult. 4 low words of Src and Kernel */ + "paddsw %%mm1, %%mm7 \n\t" /* add MM1 to accumulator MM7 */ + /* --- 7 */ + "movq (%%esi), %%mm1 \n\t" /* load 8 bytes of the Src */ + "movq %%mm1, %%mm2 \n\t" /* copy MM1 into MM2 */ + "inc %%esi \n\t" /* move pointer to the next 8 bytes of Src */ + "movq (%%edx), %%mm3 \n\t" /* load 4 words of Kernel */ + "add $8, %%edx \n\t" /* move pointer to other 4 words */ + "movq (%%edx), %%mm4 \n\t" /* load 4 words of Kernel */ + "add $8, %%edx \n\t" /* move pointer to other 4 words */ + "punpcklbw %%mm0, %%mm1 \n\t" /* unpack first 4 bytes into words */ + "punpckhbw %%mm0, %%mm2 \n\t" /* unpack second 4 bytes into words */ + "psrlw %%mm5, %%mm1 \n\t" /* shift right each pixel NshiftRight times */ + "psrlw %%mm5, %%mm2 \n\t" /* shift right each pixel NshiftRight times */ + "pmullw %%mm3, %%mm1 \n\t" /* mult. 4 low words of Src and Kernel */ + "pmullw %%mm4, %%mm2 \n\t" /* mult. 4 high words of Src and Kernel */ + "paddsw %%mm2, %%mm1 \n\t" /* add 4 words of the high and low bytes */ + "paddsw %%mm1, %%mm7 \n\t" /* add MM1 to accumulator MM7 */ + "movq (%%esi), %%mm1 \n\t" /* load 8 bytes of the Src */ + "dec %%esi \n\t" "add %%eax, %%esi \n\t" /* move Src pointer 1 row below */ + "movq (%%edx), %%mm3 \n\t" /* load 4 words of Kernel */ + "add $8, %%edx \n\t" /* move pointer to other 4 words */ + "punpcklbw %%mm0, %%mm1 \n\t" /* unpack first 4 bytes into words */ + "psrlw %%mm5, %%mm1 \n\t" /* shift right each pixel NshiftRight times */ + "pmullw %%mm3, %%mm1 \n\t" /* mult. 4 low words of Src and Kernel */ + "paddsw %%mm1, %%mm7 \n\t" /* add MM1 to accumulator MM7 */ + /* --- 8 */ + "movq (%%esi), %%mm1 \n\t" /* load 8 bytes of the Src */ + "movq %%mm1, %%mm2 \n\t" /* copy MM1 into MM2 */ + "inc %%esi \n\t" /* move pointer to the next 8 bytes of Src */ + "movq (%%edx), %%mm3 \n\t" /* load 4 words of Kernel */ + "add $8, %%edx \n\t" /* move pointer to other 4 words */ + "movq (%%edx), %%mm4 \n\t" /* load 4 words of Kernel */ + "add $8, %%edx \n\t" /* move pointer to other 4 words */ + "punpcklbw %%mm0, %%mm1 \n\t" /* unpack first 4 bytes into words */ + "punpckhbw %%mm0, %%mm2 \n\t" /* unpack second 4 bytes into words */ + "psrlw %%mm5, %%mm1 \n\t" /* shift right each pixel NshiftRight times */ + "psrlw %%mm5, %%mm2 \n\t" /* shift right each pixel NshiftRight times */ + "pmullw %%mm3, %%mm1 \n\t" /* mult. 4 low words of Src and Kernel */ + "pmullw %%mm4, %%mm2 \n\t" /* mult. 4 high words of Src and Kernel */ + "paddsw %%mm2, %%mm1 \n\t" /* add 4 words of the high and low bytes */ + "paddsw %%mm1, %%mm7 \n\t" /* add MM1 to accumulator MM7 */ + "movq (%%esi), %%mm1 \n\t" /* load 8 bytes of the Src */ + "dec %%esi \n\t" "add %%eax, %%esi \n\t" /* move Src pointer 1 row below */ + "movq (%%edx), %%mm3 \n\t" /* load 4 words of Kernel */ + "add $8, %%edx \n\t" /* move pointer to other 4 words */ + "punpcklbw %%mm0, %%mm1 \n\t" /* unpack first 4 bytes into words */ + "psrlw %%mm5, %%mm1 \n\t" /* shift right each pixel NshiftRight times */ + "pmullw %%mm3, %%mm1 \n\t" /* mult. 4 low words of Src and Kernel */ + "paddsw %%mm1, %%mm7 \n\t" /* add MM1 to accumulator MM7 */ + /* --- 9 */ + "movq (%%esi), %%mm1 \n\t" /* load 8 bytes of the Src */ + "movq %%mm1, %%mm2 \n\t" /* copy MM1 into MM2 */ + "inc %%esi \n\t" /* move pointer to the next 8 bytes of Src */ + "movq (%%edx), %%mm3 \n\t" /* load 4 words of Kernel */ + "add $8, %%edx \n\t" /* move pointer to other 4 words */ + "movq (%%edx), %%mm4 \n\t" /* load 4 words of Kernel */ + "add $8, %%edx \n\t" /* move pointer to other 4 words */ + "punpcklbw %%mm0, %%mm1 \n\t" /* unpack first 4 bytes into words */ + "punpckhbw %%mm0, %%mm2 \n\t" /* unpack second 4 bytes into words */ + "psrlw %%mm5, %%mm1 \n\t" /* shift right each pixel NshiftRight times */ + "psrlw %%mm5, %%mm2 \n\t" /* shift right each pixel NshiftRight times */ + "pmullw %%mm3, %%mm1 \n\t" /* mult. 4 low words of Src and Kernel */ + "pmullw %%mm4, %%mm2 \n\t" /* mult. 4 high words of Src and Kernel */ + "paddsw %%mm2, %%mm1 \n\t" /* add 4 words of the high and low bytes */ + "paddsw %%mm1, %%mm7 \n\t" /* add MM1 to accumulator MM7 */ + "movq (%%esi), %%mm1 \n\t" /* load 8 bytes of the Src */ + "movq (%%edx), %%mm3 \n\t" /* load 4 words of Kernel */ + "punpcklbw %%mm0, %%mm1 \n\t" /* unpack first 4 bytes into words */ + "psrlw %%mm5, %%mm1 \n\t" /* shift right each pixel NshiftRight times */ + "pmullw %%mm3, %%mm1 \n\t" /* mult. 4 low words of Src and Kernel */ + "paddsw %%mm1, %%mm7 \n\t" /* add MM1 to accumulator MM7 */ + /* --- */ + "movq %%mm7, %%mm3 \n\t" /* copy MM7 into MM3 */ + "psrlq $32, %%mm7 \n\t" /* shift 2 left words to the right */ + "paddsw %%mm3, %%mm7 \n\t" /* add 2 left and 2 right result words */ + "movq %%mm7, %%mm2 \n\t" /* copy MM7 into MM2 */ + "psrlq $16, %%mm7 \n\t" /* shift 1 left word to the right */ + "paddsw %%mm2, %%mm7 \n\t" /* add 1 left and 1 right result words */ + "movd %%eax, %%mm1 \n\t" /* save EAX in MM1 */ + "packuswb %%mm0, %%mm7 \n\t" /* pack division result with saturation */ + "movd %%mm7, %%eax \n\t" /* copy saturated result into EAX */ + "mov %%al, (%%edi) \n\t" /* copy a byte result into Dest */ + "movd %%mm1, %%eax \n\t" /* restore saved EAX */ + /* -- */ + "movd %%mm6, %%esi \n\t" /* move Src pointer to the top pixel */ + "sub $208, %%edx \n\t" /* EDX = Kernel address */ + "inc %%esi \n\t" /* move Src pointer to the next pixel */ + "inc %%edi \n\t" /* move Dest pointer to the next pixel */ + /* --- */ + "dec %%ecx \n\t" /* decrease loop counter COLUMNS */ + "jnz .L10392 \n\t" /* check loop termination, proceed if required */ + "add $8, %%esi \n\t" /* move to the next row in Src */ + "add $8, %%edi \n\t" /* move to the next row in Dest */ + "dec %%ebx \n\t" /* decrease loop counter ROWS */ + "jnz .L10390 \n\t" /* check loop termination, proceed if required */ + /* --- */ + "emms \n\t" /* exit MMX state */ + "popa \n\t":"=m" (Dest) /* %0 */ + :"m"(Src), /* %1 */ + "m"(rows), /* %2 */ + "m"(columns), /* %3 */ + "m"(Kernel), /* %4 */ + "m"(NRightShift) /* %5 */ + ); +#endif +#endif + return (0); + } else { + /* No non-MMX implementation yet */ + return (-1); + } +} + +/* ------------------------------------------------------------------------------------ */ + +/*! +\brief Filter using SobelX: Dij = saturation255( ... ) + +\param Src The source 2D byte array to sobel-filter. Should be different from destination. +\param Dest The destination 2D byte array to store the result in. Should be different from source. +\param rows Number of rows in source/destination array. Must be >2. +\param columns Number of columns in source/destination array. Must be >7. + +Note: Non-MMX implementation not available for this function. + +\return Returns 1 if filter was applied, 0 otherwise. +*/ +int SDL_imageFilterSobelX(unsigned char *Src, unsigned char *Dest, int rows, int columns) +{ + /* Validate input parameters */ + if ((Src == NULL) || (Dest == NULL)) + return(-1); + + if ((columns < 8) || (rows < 3)) + return (-1); + + if ((SDL_imageFilterMMXdetect())) { +//#ifdef USE_MMX +#if defined(USE_MMX) && defined(i386) +#if !defined(GCC__) + __asm + { + pusha + pxor mm0, mm0 /* zero MM0 */ + mov eax, columns /* load columns into EAX */ + /* ---, */ + mov esi, Src /* ESI = Src row 0 address */ + mov edi, Dest /* load Dest address to EDI */ + add edi, eax /* EDI = EDI + columns */ + inc edi /* 1 byte offset from the left edge */ + mov edx, rows /* initialize ROWS counter */ + sub edx, 2 /* do not use first and last rows */ + /* ---, */ +L10400: + mov ecx, eax /* initialize COLUMS counter */ + shr ecx, 3 /* EBX/8 (MMX loads 8 bytes at a time) */ + mov ebx, esi /* save ESI in EBX */ + movd mm1, edi /* save EDI in MM1 */ + align 16 /* 16 byte alignment of the loop entry */ +L10402: + /* ---, */ + movq mm4, [esi] /* load 8 bytes from Src */ + movq mm5, mm4 /* save MM4 in MM5 */ + add esi, 2 /* move ESI pointer 2 bytes right */ + punpcklbw mm4, mm0 /* unpack 4 low bytes into words */ + punpckhbw mm5, mm0 /* unpack 4 high bytes into words */ + movq mm6, [esi] /* load 8 bytes from Src */ + movq mm7, mm6 /* save MM6 in MM7 */ + sub esi, 2 /* move ESI pointer back 2 bytes left */ + punpcklbw mm6, mm0 /* unpack 4 low bytes into words */ + punpckhbw mm7, mm0 /* unpack 4 high bytes into words */ + add esi, eax /* move to the next row of Src */ + movq mm2, [esi] /* load 8 bytes from Src */ + movq mm3, mm2 /* save MM2 in MM3 */ + add esi, 2 /* move ESI pointer 2 bytes right */ + punpcklbw mm2, mm0 /* unpack 4 low bytes into words */ + punpckhbw mm3, mm0 /* unpack 4 high bytes into words */ + paddw mm4, mm2 /* add 4 low bytes to accumolator MM4 */ + paddw mm5, mm3 /* add 4 high bytes to accumolator MM5 */ + paddw mm4, mm2 /* add 4 low bytes to accumolator MM4 */ + paddw mm5, mm3 /* add 4 high bytes to accumolator MM5 */ + movq mm2, [esi] /* load 8 bytes from Src */ + movq mm3, mm2 /* save MM2 in MM3 */ + sub esi, 2 /* move ESI pointer back 2 bytes left */ + punpcklbw mm2, mm0 /* unpack 4 low bytes into words */ + punpckhbw mm3, mm0 /* unpack 4 high bytes into words */ + paddw mm6, mm2 /* add 4 low bytes to accumolator MM6 */ + paddw mm7, mm3 /* add 4 high bytes to accumolator MM7 */ + paddw mm6, mm2 /* add 4 low bytes to accumolator MM6 */ + paddw mm7, mm3 /* add 4 high bytes to accumolator MM7 */ + add esi, eax /* move to the next row of Src */ + movq mm2, [esi] /* load 8 bytes from Src */ + movq mm3, mm2 /* save MM2 in MM3 */ + add esi, 2 /* move ESI pointer 2 bytes right */ + punpcklbw mm2, mm0 /* unpack 4 low bytes into words */ + punpckhbw mm3, mm0 /* unpack 4 high bytes into words */ + paddw mm4, mm2 /* add 4 low bytes to accumolator MM4 */ + paddw mm5, mm3 /* add 4 high bytes to accumolator MM5 */ + movq mm2, [esi] /* load 8 bytes from Src */ + movq mm3, mm2 /* save MM2 in MM3 */ + sub esi, 2 /* move ESI pointer back 2 bytes left */ + punpcklbw mm2, mm0 /* unpack 4 low bytes into words */ + punpckhbw mm3, mm0 /* unpack 4 high bytes into words */ + paddw mm6, mm2 /* add 4 low bytes to accumolator MM6 */ + paddw mm7, mm3 /* add 4 high bytes to accumolator MM7 */ + /* ---, */ + movq mm2, mm4 /* copy MM4 into MM2 */ + psrlq mm4, 32 /* shift 2 left words to the right */ + psubw mm4, mm2 /* MM4 = MM4 - MM2 */ + movq mm3, mm6 /* copy MM6 into MM3 */ + psrlq mm6, 32 /* shift 2 left words to the right */ + psubw mm6, mm3 /* MM6 = MM6 - MM3 */ + punpckldq mm4, mm6 /* combine 2 words of MM6 and 2 words of MM4 */ + movq mm2, mm5 /* copy MM6 into MM2 */ + psrlq mm5, 32 /* shift 2 left words to the right */ + psubw mm5, mm2 /* MM5 = MM5 - MM2 */ + movq mm3, mm7 /* copy MM7 into MM3 */ + psrlq mm7, 32 /* shift 2 left words to the right */ + psubw mm7, mm3 /* MM7 = MM7 - MM3 */ + punpckldq mm5, mm7 /* combine 2 words of MM7 and 2 words of MM5 */ + /* Take abs values of MM4 and MM5 */ + movq mm6, mm4 /* copy MM4 into MM6 */ + movq mm7, mm5 /* copy MM5 into MM7 */ + psraw mm6, 15 /* fill MM6 words with word sign bit */ + psraw mm7, 15 /* fill MM7 words with word sign bit */ + pxor mm4, mm6 /* take 1's compliment of only neg words */ + pxor mm5, mm7 /* take 1's compliment of only neg words */ + psubsw mm4, mm6 /* add 1 to only neg words, W-(-1) or W-0 */ + psubsw mm5, mm7 /* add 1 to only neg words, W-(-1) or W-0 */ + packuswb mm4, mm5 /* combine and pack/saturate MM5 and MM4 */ + movq [edi], mm4 /* store result in Dest */ + /* ---, */ + sub esi, eax /* move to the current top row in Src */ + sub esi, eax + add esi, 8 /* move Src pointer to the next 8 pixels */ + add edi, 8 /* move Dest pointer to the next 8 pixels */ + /* ---, */ + dec ecx /* decrease loop counter COLUMNS */ + jnz L10402 /* check loop termination, proceed if required */ + mov esi, ebx /* restore most left current row Src address */ + movd edi, mm1 /* restore most left current row Dest address */ + add esi, eax /* move to the next row in Src */ + add edi, eax /* move to the next row in Dest */ + dec edx /* decrease loop counter ROWS */ + jnz L10400 /* check loop termination, proceed if required */ + /* ---, */ + emms /* exit MMX state */ + popa + } +#else + asm volatile + ("pusha \n\t" "pxor %%mm0, %%mm0 \n\t" /* zero MM0 */ + "mov %3, %%eax \n\t" /* load columns into EAX */ + /* --- */ + "mov %1, %%esi \n\t" /* ESI = Src row 0 address */ + "mov %0, %%edi \n\t" /* load Dest address to EDI */ + "add %%eax, %%edi \n\t" /* EDI = EDI + columns */ + "inc %%edi \n\t" /* 1 byte offset from the left edge */ + "mov %2, %%edx \n\t" /* initialize ROWS counter */ + "sub $2, %%edx \n\t" /* do not use first and last rows */ + /* --- */ + ".L10400: \n\t" "mov %%eax, %%ecx \n\t" /* initialize COLUMS counter */ + "shr $3, %%ecx \n\t" /* EBX/8 (MMX loads 8 bytes at a time) */ + "mov %%esi, %%ebx \n\t" /* save ESI in EBX */ + "movd %%edi, %%mm1 \n\t" /* save EDI in MM1 */ + ".align 16 \n\t" /* 16 byte alignment of the loop entry */ + ".L10402: \n\t" + /* --- */ + "movq (%%esi), %%mm4 \n\t" /* load 8 bytes from Src */ + "movq %%mm4, %%mm5 \n\t" /* save MM4 in MM5 */ + "add $2, %%esi \n\t" /* move ESI pointer 2 bytes right */ + "punpcklbw %%mm0, %%mm4 \n\t" /* unpack 4 low bytes into words */ + "punpckhbw %%mm0, %%mm5 \n\t" /* unpack 4 high bytes into words */ + "movq (%%esi), %%mm6 \n\t" /* load 8 bytes from Src */ + "movq %%mm6, %%mm7 \n\t" /* save MM6 in MM7 */ + "sub $2, %%esi \n\t" /* move ESI pointer back 2 bytes left */ + "punpcklbw %%mm0, %%mm6 \n\t" /* unpack 4 low bytes into words */ + "punpckhbw %%mm0, %%mm7 \n\t" /* unpack 4 high bytes into words */ + "add %%eax, %%esi \n\t" /* move to the next row of Src */ + "movq (%%esi), %%mm2 \n\t" /* load 8 bytes from Src */ + "movq %%mm2, %%mm3 \n\t" /* save MM2 in MM3 */ + "add $2, %%esi \n\t" /* move ESI pointer 2 bytes right */ + "punpcklbw %%mm0, %%mm2 \n\t" /* unpack 4 low bytes into words */ + "punpckhbw %%mm0, %%mm3 \n\t" /* unpack 4 high bytes into words */ + "paddw %%mm2, %%mm4 \n\t" /* add 4 low bytes to accumolator MM4 */ + "paddw %%mm3, %%mm5 \n\t" /* add 4 high bytes to accumolator MM5 */ + "paddw %%mm2, %%mm4 \n\t" /* add 4 low bytes to accumolator MM4 */ + "paddw %%mm3, %%mm5 \n\t" /* add 4 high bytes to accumolator MM5 */ + "movq (%%esi), %%mm2 \n\t" /* load 8 bytes from Src */ + "movq %%mm2, %%mm3 \n\t" /* save MM2 in MM3 */ + "sub $2, %%esi \n\t" /* move ESI pointer back 2 bytes left */ + "punpcklbw %%mm0, %%mm2 \n\t" /* unpack 4 low bytes into words */ + "punpckhbw %%mm0, %%mm3 \n\t" /* unpack 4 high bytes into words */ + "paddw %%mm2, %%mm6 \n\t" /* add 4 low bytes to accumolator MM6 */ + "paddw %%mm3, %%mm7 \n\t" /* add 4 high bytes to accumolator MM7 */ + "paddw %%mm2, %%mm6 \n\t" /* add 4 low bytes to accumolator MM6 */ + "paddw %%mm3, %%mm7 \n\t" /* add 4 high bytes to accumolator MM7 */ + "add %%eax, %%esi \n\t" /* move to the next row of Src */ + "movq (%%esi), %%mm2 \n\t" /* load 8 bytes from Src */ + "movq %%mm2, %%mm3 \n\t" /* save MM2 in MM3 */ + "add $2, %%esi \n\t" /* move ESI pointer 2 bytes right */ + "punpcklbw %%mm0, %%mm2 \n\t" /* unpack 4 low bytes into words */ + "punpckhbw %%mm0, %%mm3 \n\t" /* unpack 4 high bytes into words */ + "paddw %%mm2, %%mm4 \n\t" /* add 4 low bytes to accumolator MM4 */ + "paddw %%mm3, %%mm5 \n\t" /* add 4 high bytes to accumolator MM5 */ + "movq (%%esi), %%mm2 \n\t" /* load 8 bytes from Src */ + "movq %%mm2, %%mm3 \n\t" /* save MM2 in MM3 */ + "sub $2, %%esi \n\t" /* move ESI pointer back 2 bytes left */ + "punpcklbw %%mm0, %%mm2 \n\t" /* unpack 4 low bytes into words */ + "punpckhbw %%mm0, %%mm3 \n\t" /* unpack 4 high bytes into words */ + "paddw %%mm2, %%mm6 \n\t" /* add 4 low bytes to accumolator MM6 */ + "paddw %%mm3, %%mm7 \n\t" /* add 4 high bytes to accumolator MM7 */ + /* --- */ + "movq %%mm4, %%mm2 \n\t" /* copy MM4 into MM2 */ + "psrlq $32, %%mm4 \n\t" /* shift 2 left words to the right */ + "psubw %%mm2, %%mm4 \n\t" /* MM4 = MM4 - MM2 */ + "movq %%mm6, %%mm3 \n\t" /* copy MM6 into MM3 */ + "psrlq $32, %%mm6 \n\t" /* shift 2 left words to the right */ + "psubw %%mm3, %%mm6 \n\t" /* MM6 = MM6 - MM3 */ + "punpckldq %%mm6, %%mm4 \n\t" /* combine 2 words of MM6 and 2 words of MM4 */ + "movq %%mm5, %%mm2 \n\t" /* copy MM6 into MM2 */ + "psrlq $32, %%mm5 \n\t" /* shift 2 left words to the right */ + "psubw %%mm2, %%mm5 \n\t" /* MM5 = MM5 - MM2 */ + "movq %%mm7, %%mm3 \n\t" /* copy MM7 into MM3 */ + "psrlq $32, %%mm7 \n\t" /* shift 2 left words to the right */ + "psubw %%mm3, %%mm7 \n\t" /* MM7 = MM7 - MM3 */ + "punpckldq %%mm7, %%mm5 \n\t" /* combine 2 words of MM7 and 2 words of MM5 */ + /* Take abs values of MM4 and MM5 */ + "movq %%mm4, %%mm6 \n\t" /* copy MM4 into MM6 */ + "movq %%mm5, %%mm7 \n\t" /* copy MM5 into MM7 */ + "psraw $15, %%mm6 \n\t" /* fill MM6 words with word sign bit */ + "psraw $15, %%mm7 \n\t" /* fill MM7 words with word sign bit */ + "pxor %%mm6, %%mm4 \n\t" /* take 1's compliment of only neg. words */ + "pxor %%mm7, %%mm5 \n\t" /* take 1's compliment of only neg. words */ + "psubsw %%mm6, %%mm4 \n\t" /* add 1 to only neg. words, W-(-1) or W-0 */ + "psubsw %%mm7, %%mm5 \n\t" /* add 1 to only neg. words, W-(-1) or W-0 */ + "packuswb %%mm5, %%mm4 \n\t" /* combine and pack/saturate MM5 and MM4 */ + "movq %%mm4, (%%edi) \n\t" /* store result in Dest */ + /* --- */ + "sub %%eax, %%esi \n\t" /* move to the current top row in Src */ + "sub %%eax, %%esi \n\t" "add $8, %%esi \n\t" /* move Src pointer to the next 8 pixels */ + "add $8, %%edi \n\t" /* move Dest pointer to the next 8 pixels */ + /* --- */ + "dec %%ecx \n\t" /* decrease loop counter COLUMNS */ + "jnz .L10402 \n\t" /* check loop termination, proceed if required */ + "mov %%ebx, %%esi \n\t" /* restore most left current row Src address */ + "movd %%mm1, %%edi \n\t" /* restore most left current row Dest address */ + "add %%eax, %%esi \n\t" /* move to the next row in Src */ + "add %%eax, %%edi \n\t" /* move to the next row in Dest */ + "dec %%edx \n\t" /* decrease loop counter ROWS */ + "jnz .L10400 \n\t" /* check loop termination, proceed if required */ + /* --- */ + "emms \n\t" /* exit MMX state */ + "popa \n\t":"=m" (Dest) /* %0 */ + :"m"(Src), /* %1 */ + "m"(rows), /* %2 */ + "m"(columns) /* %3 */ + ); +#endif +#endif + return (0); + } else { + /* No non-MMX implementation yet */ + return (-1); + } +} + +/*! +\brief Filter using SobelXShiftRight: Dij = saturation255( ... ) + +\param Src The source 2D byte array to sobel-filter. Should be different from destination. +\param Dest The destination 2D byte array to store the result in. Should be different from source. +\param rows Number of rows in source/destination array. Must be >2. +\param columns Number of columns in source/destination array. Must be >8. +\param NRightShift The number of right bit shifts to apply to the filter sum. Must be <7. + +Note: Non-MMX implementation not available for this function. + +\return Returns 1 if filter was applied, 0 otherwise. +*/ +int SDL_imageFilterSobelXShiftRight(unsigned char *Src, unsigned char *Dest, int rows, int columns, + unsigned char NRightShift) +{ + /* Validate input parameters */ + if ((Src == NULL) || (Dest == NULL)) + return(-1); + if ((columns < 8) || (rows < 3) || (NRightShift > 7)) + return (-1); + + if ((SDL_imageFilterMMXdetect())) { +//#ifdef USE_MMX +#if defined(USE_MMX) && defined(i386) +#if !defined(GCC__) + __asm + { + pusha + pxor mm0, mm0 /* zero MM0 */ + mov eax, columns /* load columns into EAX */ + xor ebx, ebx /* zero EBX */ + mov bl, NRightShift /* load NRightShift into BL */ + movd mm1, ebx /* copy NRightShift into MM1 */ + /* ---, */ + mov esi, Src /* ESI = Src row 0 address */ + mov edi, Dest /* load Dest address to EDI */ + add edi, eax /* EDI = EDI + columns */ + inc edi /* 1 byte offset from the left edge */ + /* initialize ROWS counter */ + sub rows, 2 /* do not use first and last rows */ + /* ---, */ +L10410: + mov ecx, eax /* initialize COLUMS counter */ + shr ecx, 3 /* EBX/8 (MMX loads 8 bytes at a time) */ + mov ebx, esi /* save ESI in EBX */ + mov edx, edi /* save EDI in EDX */ + align 16 /* 16 byte alignment of the loop entry */ +L10412: + /* ---, */ + movq mm4, [esi] /* load 8 bytes from Src */ + movq mm5, mm4 /* save MM4 in MM5 */ + add esi, 2 /* move ESI pointer 2 bytes right */ + punpcklbw mm4, mm0 /* unpack 4 low bytes into words */ + punpckhbw mm5, mm0 /* unpack 4 high bytes into words */ + psrlw mm4, mm1 /* shift right each pixel NshiftRight times */ + psrlw mm5, mm1 /* shift right each pixel NshiftRight times */ + movq mm6, [esi] /* load 8 bytes from Src */ + movq mm7, mm6 /* save MM6 in MM7 */ + sub esi, 2 /* move ESI pointer back 2 bytes left */ + punpcklbw mm6, mm0 /* unpack 4 low bytes into words */ + punpckhbw mm7, mm0 /* unpack 4 high bytes into words */ + psrlw mm6, mm1 /* shift right each pixel NshiftRight times */ + psrlw mm7, mm1 /* shift right each pixel NshiftRight times */ + add esi, eax /* move to the next row of Src */ + movq mm2, [esi] /* load 8 bytes from Src */ + movq mm3, mm2 /* save MM2 in MM3 */ + add esi, 2 /* move ESI pointer 2 bytes right */ + punpcklbw mm2, mm0 /* unpack 4 low bytes into words */ + punpckhbw mm3, mm0 /* unpack 4 high bytes into words */ + psrlw mm2, mm1 /* shift right each pixel NshiftRight times */ + psrlw mm3, mm1 /* shift right each pixel NshiftRight times */ + paddw mm4, mm2 /* add 4 low bytes to accumolator MM4 */ + paddw mm5, mm3 /* add 4 high bytes to accumolator MM5 */ + paddw mm4, mm2 /* add 4 low bytes to accumolator MM4 */ + paddw mm5, mm3 /* add 4 high bytes to accumolator MM5 */ + movq mm2, [esi] /* load 8 bytes from Src */ + movq mm3, mm2 /* save MM2 in MM3 */ + sub esi, 2 /* move ESI pointer back 2 bytes left */ + punpcklbw mm2, mm0 /* unpack 4 low bytes into words */ + punpckhbw mm3, mm0 /* unpack 4 high bytes into words */ + psrlw mm2, mm1 /* shift right each pixel NshiftRight times */ + psrlw mm3, mm1 /* shift right each pixel NshiftRight times */ + paddw mm6, mm2 /* add 4 low bytes to accumolator MM6 */ + paddw mm7, mm3 /* add 4 high bytes to accumolator MM7 */ + paddw mm6, mm2 /* add 4 low bytes to accumolator MM6 */ + paddw mm7, mm3 /* add 4 high bytes to accumolator MM7 */ + add esi, eax /* move to the next row of Src */ + movq mm2, [esi] /* load 8 bytes from Src */ + movq mm3, mm2 /* save MM2 in MM3 */ + add esi, 2 /* move ESI pointer 2 bytes right */ + punpcklbw mm2, mm0 /* unpack 4 low bytes into words */ + punpckhbw mm3, mm0 /* unpack 4 high bytes into words */ + psrlw mm2, mm1 /* shift right each pixel NshiftRight times */ + psrlw mm3, mm1 /* shift right each pixel NshiftRight times */ + paddw mm4, mm2 /* add 4 low bytes to accumolator MM4 */ + paddw mm5, mm3 /* add 4 high bytes to accumolator MM5 */ + movq mm2, [esi] /* load 8 bytes from Src */ + movq mm3, mm2 /* save MM2 in MM3 */ + sub esi, 2 /* move ESI pointer back 2 bytes left */ + punpcklbw mm2, mm0 /* unpack 4 low bytes into words */ + punpckhbw mm3, mm0 /* unpack 4 high bytes into words */ + psrlw mm2, mm1 /* shift right each pixel NshiftRight times */ + psrlw mm3, mm1 /* shift right each pixel NshiftRight times */ + paddw mm6, mm2 /* add 4 low bytes to accumolator MM6 */ + paddw mm7, mm3 /* add 4 high bytes to accumolator MM7 */ + /* ---, */ + movq mm2, mm4 /* copy MM4 into MM2 */ + psrlq mm4, 32 /* shift 2 left words to the right */ + psubw mm4, mm2 /* MM4 = MM4 - MM2 */ + movq mm3, mm6 /* copy MM6 into MM3 */ + psrlq mm6, 32 /* shift 2 left words to the right */ + psubw mm6, mm3 /* MM6 = MM6 - MM3 */ + punpckldq mm4, mm6 /* combine 2 words of MM6 and 2 words of MM4 */ + movq mm2, mm5 /* copy MM6 into MM2 */ + psrlq mm5, 32 /* shift 2 left words to the right */ + psubw mm5, mm2 /* MM5 = MM5 - MM2 */ + movq mm3, mm7 /* copy MM7 into MM3 */ + psrlq mm7, 32 /* shift 2 left words to the right */ + psubw mm7, mm3 /* MM7 = MM7 - MM3 */ + punpckldq mm5, mm7 /* combine 2 words of MM7 and 2 words of MM5 */ + /* Take abs values of MM4 and MM5 */ + movq mm6, mm4 /* copy MM4 into MM6 */ + movq mm7, mm5 /* copy MM5 into MM7 */ + psraw mm6, 15 /* fill MM6 words with word sign bit */ + psraw mm7, 15 /* fill MM7 words with word sign bit */ + pxor mm4, mm6 /* take 1's compliment of only neg words */ + pxor mm5, mm7 /* take 1's compliment of only neg words */ + psubsw mm4, mm6 /* add 1 to only neg words, W-(-1) or W-0 */ + psubsw mm5, mm7 /* add 1 to only neg words, W-(-1) or W-0 */ + packuswb mm4, mm5 /* combine and pack/saturate MM5 and MM4 */ + movq [edi], mm4 /* store result in Dest */ + /* ---, */ + sub esi, eax /* move to the current top row in Src */ + sub esi, eax + add esi, 8 /* move Src pointer to the next 8 pixels */ + add edi, 8 /* move Dest pointer to the next 8 pixels */ + /* ---, */ + dec ecx /* decrease loop counter COLUMNS */ + jnz L10412 /* check loop termination, proceed if required */ + mov esi, ebx /* restore most left current row Src address */ + mov edi, edx /* restore most left current row Dest address */ + add esi, eax /* move to the next row in Src */ + add edi, eax /* move to the next row in Dest */ + dec rows /* decrease loop counter ROWS */ + jnz L10410 /* check loop termination, proceed if required */ + /* ---, */ + emms /* exit MMX state */ + popa + } +#else + asm volatile + ("pusha \n\t" "pxor %%mm0, %%mm0 \n\t" /* zero MM0 */ + "mov %3, %%eax \n\t" /* load columns into EAX */ + "xor %%ebx, %%ebx \n\t" /* zero EBX */ + "mov %4, %%bl \n\t" /* load NRightShift into BL */ + "movd %%ebx, %%mm1 \n\t" /* copy NRightShift into MM1 */ + /* --- */ + "mov %1, %%esi \n\t" /* ESI = Src row 0 address */ + "mov %0, %%edi \n\t" /* load Dest address to EDI */ + "add %%eax, %%edi \n\t" /* EDI = EDI + columns */ + "inc %%edi \n\t" /* 1 byte offset from the left edge */ + /* initialize ROWS counter */ + "subl $2, %2 \n\t" /* do not use first and last rows */ + /* --- */ + ".L10410: \n\t" "mov %%eax, %%ecx \n\t" /* initialize COLUMS counter */ + "shr $3, %%ecx \n\t" /* EBX/8 (MMX loads 8 bytes at a time) */ + "mov %%esi, %%ebx \n\t" /* save ESI in EBX */ + "mov %%edi, %%edx \n\t" /* save EDI in EDX */ + ".align 16 \n\t" /* 16 byte alignment of the loop entry */ + ".L10412: \n\t" + /* --- */ + "movq (%%esi), %%mm4 \n\t" /* load 8 bytes from Src */ + "movq %%mm4, %%mm5 \n\t" /* save MM4 in MM5 */ + "add $2, %%esi \n\t" /* move ESI pointer 2 bytes right */ + "punpcklbw %%mm0, %%mm4 \n\t" /* unpack 4 low bytes into words */ + "punpckhbw %%mm0, %%mm5 \n\t" /* unpack 4 high bytes into words */ + "psrlw %%mm1, %%mm4 \n\t" /* shift right each pixel NshiftRight times */ + "psrlw %%mm1, %%mm5 \n\t" /* shift right each pixel NshiftRight times */ + "movq (%%esi), %%mm6 \n\t" /* load 8 bytes from Src */ + "movq %%mm6, %%mm7 \n\t" /* save MM6 in MM7 */ + "sub $2, %%esi \n\t" /* move ESI pointer back 2 bytes left */ + "punpcklbw %%mm0, %%mm6 \n\t" /* unpack 4 low bytes into words */ + "punpckhbw %%mm0, %%mm7 \n\t" /* unpack 4 high bytes into words */ + "psrlw %%mm1, %%mm6 \n\t" /* shift right each pixel NshiftRight times */ + "psrlw %%mm1, %%mm7 \n\t" /* shift right each pixel NshiftRight times */ + "add %%eax, %%esi \n\t" /* move to the next row of Src */ + "movq (%%esi), %%mm2 \n\t" /* load 8 bytes from Src */ + "movq %%mm2, %%mm3 \n\t" /* save MM2 in MM3 */ + "add $2, %%esi \n\t" /* move ESI pointer 2 bytes right */ + "punpcklbw %%mm0, %%mm2 \n\t" /* unpack 4 low bytes into words */ + "punpckhbw %%mm0, %%mm3 \n\t" /* unpack 4 high bytes into words */ + "psrlw %%mm1, %%mm2 \n\t" /* shift right each pixel NshiftRight times */ + "psrlw %%mm1, %%mm3 \n\t" /* shift right each pixel NshiftRight times */ + "paddw %%mm2, %%mm4 \n\t" /* add 4 low bytes to accumolator MM4 */ + "paddw %%mm3, %%mm5 \n\t" /* add 4 high bytes to accumolator MM5 */ + "paddw %%mm2, %%mm4 \n\t" /* add 4 low bytes to accumolator MM4 */ + "paddw %%mm3, %%mm5 \n\t" /* add 4 high bytes to accumolator MM5 */ + "movq (%%esi), %%mm2 \n\t" /* load 8 bytes from Src */ + "movq %%mm2, %%mm3 \n\t" /* save MM2 in MM3 */ + "sub $2, %%esi \n\t" /* move ESI pointer back 2 bytes left */ + "punpcklbw %%mm0, %%mm2 \n\t" /* unpack 4 low bytes into words */ + "punpckhbw %%mm0, %%mm3 \n\t" /* unpack 4 high bytes into words */ + "psrlw %%mm1, %%mm2 \n\t" /* shift right each pixel NshiftRight times */ + "psrlw %%mm1, %%mm3 \n\t" /* shift right each pixel NshiftRight times */ + "paddw %%mm2, %%mm6 \n\t" /* add 4 low bytes to accumolator MM6 */ + "paddw %%mm3, %%mm7 \n\t" /* add 4 high bytes to accumolator MM7 */ + "paddw %%mm2, %%mm6 \n\t" /* add 4 low bytes to accumolator MM6 */ + "paddw %%mm3, %%mm7 \n\t" /* add 4 high bytes to accumolator MM7 */ + "add %%eax, %%esi \n\t" /* move to the next row of Src */ + "movq (%%esi), %%mm2 \n\t" /* load 8 bytes from Src */ + "movq %%mm2, %%mm3 \n\t" /* save MM2 in MM3 */ + "add $2, %%esi \n\t" /* move ESI pointer 2 bytes right */ + "punpcklbw %%mm0, %%mm2 \n\t" /* unpack 4 low bytes into words */ + "punpckhbw %%mm0, %%mm3 \n\t" /* unpack 4 high bytes into words */ + "psrlw %%mm1, %%mm2 \n\t" /* shift right each pixel NshiftRight times */ + "psrlw %%mm1, %%mm3 \n\t" /* shift right each pixel NshiftRight times */ + "paddw %%mm2, %%mm4 \n\t" /* add 4 low bytes to accumolator MM4 */ + "paddw %%mm3, %%mm5 \n\t" /* add 4 high bytes to accumolator MM5 */ + "movq (%%esi), %%mm2 \n\t" /* load 8 bytes from Src */ + "movq %%mm2, %%mm3 \n\t" /* save MM2 in MM3 */ + "sub $2, %%esi \n\t" /* move ESI pointer back 2 bytes left */ + "punpcklbw %%mm0, %%mm2 \n\t" /* unpack 4 low bytes into words */ + "punpckhbw %%mm0, %%mm3 \n\t" /* unpack 4 high bytes into words */ + "psrlw %%mm1, %%mm2 \n\t" /* shift right each pixel NshiftRight times */ + "psrlw %%mm1, %%mm3 \n\t" /* shift right each pixel NshiftRight times */ + "paddw %%mm2, %%mm6 \n\t" /* add 4 low bytes to accumolator MM6 */ + "paddw %%mm3, %%mm7 \n\t" /* add 4 high bytes to accumolator MM7 */ + /* --- */ + "movq %%mm4, %%mm2 \n\t" /* copy MM4 into MM2 */ + "psrlq $32, %%mm4 \n\t" /* shift 2 left words to the right */ + "psubw %%mm2, %%mm4 \n\t" /* MM4 = MM4 - MM2 */ + "movq %%mm6, %%mm3 \n\t" /* copy MM6 into MM3 */ + "psrlq $32, %%mm6 \n\t" /* shift 2 left words to the right */ + "psubw %%mm3, %%mm6 \n\t" /* MM6 = MM6 - MM3 */ + "punpckldq %%mm6, %%mm4 \n\t" /* combine 2 words of MM6 and 2 words of MM4 */ + "movq %%mm5, %%mm2 \n\t" /* copy MM6 into MM2 */ + "psrlq $32, %%mm5 \n\t" /* shift 2 left words to the right */ + "psubw %%mm2, %%mm5 \n\t" /* MM5 = MM5 - MM2 */ + "movq %%mm7, %%mm3 \n\t" /* copy MM7 into MM3 */ + "psrlq $32, %%mm7 \n\t" /* shift 2 left words to the right */ + "psubw %%mm3, %%mm7 \n\t" /* MM7 = MM7 - MM3 */ + "punpckldq %%mm7, %%mm5 \n\t" /* combine 2 words of MM7 and 2 words of MM5 */ + /* Take abs values of MM4 and MM5 */ + "movq %%mm4, %%mm6 \n\t" /* copy MM4 into MM6 */ + "movq %%mm5, %%mm7 \n\t" /* copy MM5 into MM7 */ + "psraw $15, %%mm6 \n\t" /* fill MM6 words with word sign bit */ + "psraw $15, %%mm7 \n\t" /* fill MM7 words with word sign bit */ + "pxor %%mm6, %%mm4 \n\t" /* take 1's compliment of only neg. words */ + "pxor %%mm7, %%mm5 \n\t" /* take 1's compliment of only neg. words */ + "psubsw %%mm6, %%mm4 \n\t" /* add 1 to only neg. words, W-(-1) or W-0 */ + "psubsw %%mm7, %%mm5 \n\t" /* add 1 to only neg. words, W-(-1) or W-0 */ + "packuswb %%mm5, %%mm4 \n\t" /* combine and pack/saturate MM5 and MM4 */ + "movq %%mm4, (%%edi) \n\t" /* store result in Dest */ + /* --- */ + "sub %%eax, %%esi \n\t" /* move to the current top row in Src */ + "sub %%eax, %%esi \n\t" "add $8, %%esi \n\t" /* move Src pointer to the next 8 pixels */ + "add $8, %%edi \n\t" /* move Dest pointer to the next 8 pixels */ + /* --- */ + "dec %%ecx \n\t" /* decrease loop counter COLUMNS */ + "jnz .L10412 \n\t" /* check loop termination, proceed if required */ + "mov %%ebx, %%esi \n\t" /* restore most left current row Src address */ + "mov %%edx, %%edi \n\t" /* restore most left current row Dest address */ + "add %%eax, %%esi \n\t" /* move to the next row in Src */ + "add %%eax, %%edi \n\t" /* move to the next row in Dest */ + "decl %2 \n\t" /* decrease loop counter ROWS */ + "jnz .L10410 \n\t" /* check loop termination, proceed if required */ + /* --- */ + "emms \n\t" /* exit MMX state */ + "popa \n\t":"=m" (Dest) /* %0 */ + :"m"(Src), /* %1 */ + "m"(rows), /* %2 */ + "m"(columns), /* %3 */ + "m"(NRightShift) /* %4 */ + ); +#endif +#endif + return (0); + } else { + /* No non-MMX implementation yet */ + return (-1); + } +} + +/*! +\brief Align stack to 32 byte boundary, +*/ +void SDL_imageFilterAlignStack(void) +{ +#ifdef USE_MMX +#if !defined(GCC__) + __asm + { /* --- stack alignment --- */ + mov ebx, esp /* load ESP into EBX */ + sub ebx, 4 /* reserve space on stack for old value of ESP */ + and ebx, -32 /* align EBX along a 32 byte boundary */ + mov [ebx], esp /* save old value of ESP in stack, behind the bndry */ + mov esp, ebx /* align ESP along a 32 byte boundary */ + } +#else + asm volatile + ( /* --- stack alignment --- */ + "mov %%esp, %%ebx \n\t" /* load ESP into EBX */ + "sub $4, %%ebx \n\t" /* reserve space on stack for old value of ESP */ + "and $-32, %%ebx \n\t" /* align EBX along a 32 byte boundary */ + "mov %%esp, (%%ebx) \n\t" /* save old value of ESP in stack, behind the bndry */ + "mov %%ebx, %%esp \n\t" /* align ESP along a 32 byte boundary */ + ::); +#endif +#endif +} + +/*! +\brief Restore previously aligned stack. +*/ +void SDL_imageFilterRestoreStack(void) +{ +#ifdef USE_MMX +#if !defined(GCC__) + __asm + { /* --- restoring old stack --- */ + mov ebx, [esp] /* load old value of ESP */ + mov esp, ebx /* restore old value of ESP */ + } +#else + asm volatile + ( /* --- restoring old stack --- */ + "mov (%%esp), %%ebx \n\t" /* load old value of ESP */ + "mov %%ebx, %%esp \n\t" /* restore old value of ESP */ + ::); +#endif +#endif +} diff --git a/vendor/SDL3_gfx/SDL3_imageFilter.h b/vendor/SDL3_gfx/SDL3_imageFilter.h new file mode 100644 index 0000000..3a89c37 --- /dev/null +++ b/vendor/SDL3_gfx/SDL3_imageFilter.h @@ -0,0 +1,166 @@ +/* + +SDL3_imageFilter.h: byte-image "filter" routines + +Copyright (C) 2012-2014 Andreas Schiffler + +This software is provided 'as-is', without any express or implied +warranty. In no event will the authors be held liable for any damages +arising from the use of this software. + +Permission is granted to anyone to use this software for any purpose, +including commercial applications, and to alter it and redistribute it +freely, subject to the following restrictions: + +1. The origin of this software must not be misrepresented; you must not +claim that you wrote the original software. If you use this software +in a product, an acknowledgment in the product documentation would be +appreciated but is not required. + +2. Altered source versions must be plainly marked as such, and must not be +misrepresented as being the original software. + +3. This notice may not be removed or altered from any source +distribution. + +Andreas Schiffler -- aschiffler at ferzkopp dot net + +*/ + +#ifndef _SDL3_imageFilter_h +#define _SDL3_imageFilter_h + +/* Set up for C function definitions, even when using C++ */ +#ifdef __cplusplus +extern "C" { +#endif + + /* ---- Function Prototypes */ + +#ifdef _MSC_VER +# if defined(DLL_EXPORT) && !defined(LIBSDL3_GFX_DLL_IMPORT) +# define SDL3_IMAGEFILTER_SCOPE __declspec(dllexport) +# else +# ifdef LIBSDL3_GFX_DLL_IMPORT +# define SDL3_IMAGEFILTER_SCOPE __declspec(dllimport) +# endif +# endif +#endif +#ifndef SDL3_IMAGEFILTER_SCOPE +# define SDL3_IMAGEFILTER_SCOPE extern +#endif + + /* Comments: */ + /* 1.) MMX functions work best if all data blocks are aligned on a 32 bytes boundary. */ + /* 2.) Data that is not within an 8 byte boundary is processed using the C routine. */ + /* 3.) Convolution routines do not have C routines at this time. */ + + // Detect MMX capability in CPU + SDL3_IMAGEFILTER_SCOPE int SDL_imageFilterMMXdetect(void); + + // Force use of MMX off (or turn possible use back on) + SDL3_IMAGEFILTER_SCOPE void SDL_imageFilterMMXoff(void); + SDL3_IMAGEFILTER_SCOPE void SDL_imageFilterMMXon(void); + + // + // All routines return: + // 0 OK + // -1 Error (internal error, parameter error) + // + + // SDL_imageFilterAdd: D = saturation255(S1 + S2) + SDL3_IMAGEFILTER_SCOPE int SDL_imageFilterAdd(unsigned char *Src1, unsigned char *Src2, unsigned char *Dest, unsigned int length); + + // SDL_imageFilterMean: D = S1/2 + S2/2 + SDL3_IMAGEFILTER_SCOPE int SDL_imageFilterMean(unsigned char *Src1, unsigned char *Src2, unsigned char *Dest, unsigned int length); + + // SDL_imageFilterSub: D = saturation0(S1 - S2) + SDL3_IMAGEFILTER_SCOPE int SDL_imageFilterSub(unsigned char *Src1, unsigned char *Src2, unsigned char *Dest, unsigned int length); + + // SDL_imageFilterAbsDiff: D = | S1 - S2 | + SDL3_IMAGEFILTER_SCOPE int SDL_imageFilterAbsDiff(unsigned char *Src1, unsigned char *Src2, unsigned char *Dest, unsigned int length); + + // SDL_imageFilterMult: D = saturation(S1 * S2) + SDL3_IMAGEFILTER_SCOPE int SDL_imageFilterMult(unsigned char *Src1, unsigned char *Src2, unsigned char *Dest, unsigned int length); + + // SDL_imageFilterMultNor: D = S1 * S2 (non-MMX) + SDL3_IMAGEFILTER_SCOPE int SDL_imageFilterMultNor(unsigned char *Src1, unsigned char *Src2, unsigned char *Dest, unsigned int length); + + // SDL_imageFilterMultDivby2: D = saturation255(S1/2 * S2) + SDL3_IMAGEFILTER_SCOPE int SDL_imageFilterMultDivby2(unsigned char *Src1, unsigned char *Src2, unsigned char *Dest, + unsigned int length); + + // SDL_imageFilterMultDivby4: D = saturation255(S1/2 * S2/2) + SDL3_IMAGEFILTER_SCOPE int SDL_imageFilterMultDivby4(unsigned char *Src1, unsigned char *Src2, unsigned char *Dest, + unsigned int length); + + // SDL_imageFilterBitAnd: D = S1 & S2 + SDL3_IMAGEFILTER_SCOPE int SDL_imageFilterBitAnd(unsigned char *Src1, unsigned char *Src2, unsigned char *Dest, unsigned int length); + + // SDL_imageFilterBitOr: D = S1 | S2 + SDL3_IMAGEFILTER_SCOPE int SDL_imageFilterBitOr(unsigned char *Src1, unsigned char *Src2, unsigned char *Dest, unsigned int length); + + // SDL_imageFilterDiv: D = S1 / S2 (non-MMX) + SDL3_IMAGEFILTER_SCOPE int SDL_imageFilterDiv(unsigned char *Src1, unsigned char *Src2, unsigned char *Dest, unsigned int length); + + // SDL_imageFilterBitNegation: D = !S + SDL3_IMAGEFILTER_SCOPE int SDL_imageFilterBitNegation(unsigned char *Src1, unsigned char *Dest, unsigned int length); + + // SDL_imageFilterAddByte: D = saturation255(S + C) + SDL3_IMAGEFILTER_SCOPE int SDL_imageFilterAddByte(unsigned char *Src1, unsigned char *Dest, unsigned int length, unsigned char C); + + // SDL_imageFilterAddUint: D = saturation255(S + (uint)C) + SDL3_IMAGEFILTER_SCOPE int SDL_imageFilterAddUint(unsigned char *Src1, unsigned char *Dest, unsigned int length, unsigned int C); + + // SDL_imageFilterAddByteToHalf: D = saturation255(S/2 + C) + SDL3_IMAGEFILTER_SCOPE int SDL_imageFilterAddByteToHalf(unsigned char *Src1, unsigned char *Dest, unsigned int length, + unsigned char C); + + // SDL_imageFilterSubByte: D = saturation0(S - C) + SDL3_IMAGEFILTER_SCOPE int SDL_imageFilterSubByte(unsigned char *Src1, unsigned char *Dest, unsigned int length, unsigned char C); + + // SDL_imageFilterSubUint: D = saturation0(S - (uint)C) + SDL3_IMAGEFILTER_SCOPE int SDL_imageFilterSubUint(unsigned char *Src1, unsigned char *Dest, unsigned int length, unsigned int C); + + // SDL_imageFilterShiftRight: D = saturation0(S >> N) + SDL3_IMAGEFILTER_SCOPE int SDL_imageFilterShiftRight(unsigned char *Src1, unsigned char *Dest, unsigned int length, unsigned char N); + + // SDL_imageFilterShiftRightUint: D = saturation0((uint)S >> N) + SDL3_IMAGEFILTER_SCOPE int SDL_imageFilterShiftRightUint(unsigned char *Src1, unsigned char *Dest, unsigned int length, unsigned char N); + + // SDL_imageFilterMultByByte: D = saturation255(S * C) + SDL3_IMAGEFILTER_SCOPE int SDL_imageFilterMultByByte(unsigned char *Src1, unsigned char *Dest, unsigned int length, unsigned char C); + + // SDL_imageFilterShiftRightAndMultByByte: D = saturation255((S >> N) * C) + SDL3_IMAGEFILTER_SCOPE int SDL_imageFilterShiftRightAndMultByByte(unsigned char *Src1, unsigned char *Dest, unsigned int length, + unsigned char N, unsigned char C); + + // SDL_imageFilterShiftLeftByte: D = (S << N) + SDL3_IMAGEFILTER_SCOPE int SDL_imageFilterShiftLeftByte(unsigned char *Src1, unsigned char *Dest, unsigned int length, + unsigned char N); + + // SDL_imageFilterShiftLeftUint: D = ((uint)S << N) + SDL3_IMAGEFILTER_SCOPE int SDL_imageFilterShiftLeftUint(unsigned char *Src1, unsigned char *Dest, unsigned int length, + unsigned char N); + + // SDL_imageFilterShiftLeft: D = saturation255(S << N) + SDL3_IMAGEFILTER_SCOPE int SDL_imageFilterShiftLeft(unsigned char *Src1, unsigned char *Dest, unsigned int length, unsigned char N); + + // SDL_imageFilterBinarizeUsingThreshold: D = S >= T ? 255:0 + SDL3_IMAGEFILTER_SCOPE int SDL_imageFilterBinarizeUsingThreshold(unsigned char *Src1, unsigned char *Dest, unsigned int length, + unsigned char T); + + // SDL_imageFilterClipToRange: D = (S >= Tmin) & (S <= Tmax) 255:0 + SDL3_IMAGEFILTER_SCOPE int SDL_imageFilterClipToRange(unsigned char *Src1, unsigned char *Dest, unsigned int length, + unsigned char Tmin, unsigned char Tmax); + + // SDL_imageFilterNormalizeLinear: D = saturation255((Nmax - Nmin)/(Cmax - Cmin)*(S - Cmin) + Nmin) + SDL3_IMAGEFILTER_SCOPE int SDL_imageFilterNormalizeLinear(unsigned char *Src, unsigned char *Dest, unsigned int length, int Cmin, + int Cmax, int Nmin, int Nmax); + + /* Ends C function definitions when using C++ */ +#ifdef __cplusplus +} +#endif + +#endif /* _SDL3_imageFilter_h */ diff --git a/vendor/SDL3_gfx/SDL3_rotozoom.c b/vendor/SDL3_gfx/SDL3_rotozoom.c new file mode 100644 index 0000000..0ba72ac --- /dev/null +++ b/vendor/SDL3_gfx/SDL3_rotozoom.c @@ -0,0 +1,1636 @@ +/* + +SDL3_rotozoom.c: rotozoomer, zoomer and shrinker for 32bit or 8bit surfaces + +Copyright (C) 2012-2014 Andreas Schiffler + +This software is provided 'as-is', without any express or implied +warranty. In no event will the authors be held liable for any damages +arising from the use of this software. + +Permission is granted to anyone to use this software for any purpose, +including commercial applications, and to alter it and redistribute it +freely, subject to the following restrictions: + +1. The origin of this software must not be misrepresented; you must not +claim that you wrote the original software. If you use this software +in a product, an acknowledgment in the product documentation would be +appreciated but is not required. + +2. Altered source versions must be plainly marked as such, and must not be +misrepresented as being the original software. + +3. This notice may not be removed or altered from any source +distribution. + +Andreas Schiffler -- aschiffler at ferzkopp dot net + +*/ + +#ifdef WIN32 +#include +#endif + +#include +#include + +#include "SDL3_rotozoom.h" + +/*! +\brief Returns maximum of two numbers a and b. +*/ +#define MAX(a,b) (((a) > (b)) ? (a) : (b)) + +/*! +\brief Number of guard rows added to destination surfaces. + +This is a simple but effective workaround for observed issues. +These rows allocate extra memory and are then hidden from the surface. +Rows are added to the end of destination surfaces when they are allocated. +This catches any potential overflows which seem to happen with +just the right src image dimensions and scale/rotation and can lead +to a situation where the program can segfault. +*/ +#define GUARD_ROWS (2) + +/*! +\brief Lower limit of absolute zoom factor or rotation degrees. +*/ +#define VALUE_LIMIT 0.001 + +/*! +\brief Returns colorkey info for a surface +*/ +Uint32 _colorkey(SDL_Surface *src) +{ + Uint32 key = 0; + SDL_GetSurfaceColorKey(src, &key); + return key; +} + + +/*! +\brief Internal 32 bit integer-factor averaging Shrinker. + +Shrinks 32 bit RGBA/ABGR 'src' surface to 'dst' surface. +Averages color and alpha values values of src pixels to calculate dst pixels. +Assumes src and dst surfaces are of 32 bit depth. +Assumes dst surface was allocated with the correct dimensions. + +\param src The surface to shrink (input). +\param dst The shrunken surface (output). +\param factorx The horizontal shrinking ratio. +\param factory The vertical shrinking ratio. + +\return 0 for success or -1 for error. +*/ +int _shrinkSurfaceRGBA(SDL_Surface * src, SDL_Surface * dst, int factorx, int factory) +{ + int x, y, dx, dy, dgap, ra, ga, ba, aa; + int n_average; + SDL_Color *sp, *osp, *oosp; + SDL_Color *dp; + + /* + * Averaging integer shrink + */ + + /* Precalculate division factor */ + n_average = factorx*factory; + + /* + * Scan destination + */ + sp = (SDL_Color *) src->pixels; + + dp = (SDL_Color *) dst->pixels; + dgap = dst->pitch - dst->w * 4; + + for (y = 0; y < dst->h; y++) { + + osp=sp; + for (x = 0; x < dst->w; x++) { + + /* Trace out source box and accumulate */ + oosp=sp; + ra=ga=ba=aa=0; + for (dy=0; dy < factory; dy++) { + for (dx=0; dx < factorx; dx++) { + ra += sp->r; + ga += sp->g; + ba += sp->b; + aa += sp->a; + + sp++; + } + /* src dx loop */ + sp = (SDL_Color *)((Uint8*)sp + (src->pitch - 4*factorx)); // next y + } + /* src dy loop */ + + /* next box-x */ + sp = (SDL_Color *)((Uint8*)oosp + 4*factorx); + + /* Store result in destination */ + dp->r = ra/n_average; + dp->g = ga/n_average; + dp->b = ba/n_average; + dp->a = aa/n_average; + + /* + * Advance destination pointer + */ + dp++; + } + /* dst x loop */ + + /* next box-y */ + sp = (SDL_Color *)((Uint8*)osp + src->pitch*factory); + + /* + * Advance destination pointers + */ + dp = (SDL_Color *) ((Uint8 *) dp + dgap); + } + /* dst y loop */ + + return (0); +} + +/*! +\brief Internal 8 bit integer-factor averaging shrinker. + +Shrinks 8bit Y 'src' surface to 'dst' surface. +Averages color (brightness) values values of src pixels to calculate dst pixels. +Assumes src and dst surfaces are of 8 bit depth. +Assumes dst surface was allocated with the correct dimensions. + +\param src The surface to shrink (input). +\param dst The shrunken surface (output). +\param factorx The horizontal shrinking ratio. +\param factory The vertical shrinking ratio. + +\return 0 for success or -1 for error. +*/ +int _shrinkSurfaceY(SDL_Surface * src, SDL_Surface * dst, int factorx, int factory) +{ + int x, y, dx, dy, dgap, a; + int n_average; + Uint8 *sp, *osp, *oosp; + Uint8 *dp; + + /* + * Averaging integer shrink + */ + + /* Precalculate division factor */ + n_average = factorx*factory; + + /* + * Scan destination + */ + sp = (Uint8 *) src->pixels; + + dp = (Uint8 *) dst->pixels; + dgap = dst->pitch - dst->w; + + for (y = 0; y < dst->h; y++) { + + osp=sp; + for (x = 0; x < dst->w; x++) { + + /* Trace out source box and accumulate */ + oosp=sp; + a=0; + for (dy=0; dy < factory; dy++) { + for (dx=0; dx < factorx; dx++) { + a += (*sp); + /* next x */ + sp++; + } + /* end src dx loop */ + /* next y */ + sp = (Uint8 *)((Uint8*)sp + (src->pitch - factorx)); + } + /* end src dy loop */ + + /* next box-x */ + sp = (Uint8 *)((Uint8*)oosp + factorx); + + /* Store result in destination */ + *dp = a/n_average; + + /* + * Advance destination pointer + */ + dp++; + } + /* end dst x loop */ + + /* next box-y */ + sp = (Uint8 *)((Uint8*)osp + src->pitch*factory); + + /* + * Advance destination pointers + */ + dp = (Uint8 *)((Uint8 *)dp + dgap); + } + /* end dst y loop */ + + return (0); +} + +/*! +\brief Internal 32 bit Zoomer with optional anti-aliasing by bilinear interpolation. + +Zooms 32 bit RGBA/ABGR 'src' surface to 'dst' surface. +Assumes src and dst surfaces are of 32 bit depth. +Assumes dst surface was allocated with the correct dimensions. + +\param src The surface to zoom (input). +\param dst The zoomed surface (output). +\param flipx Flag indicating if the image should be horizontally flipped. +\param flipy Flag indicating if the image should be vertically flipped. +\param smooth Antialiasing flag; set to SMOOTHING_ON to enable. + +\return 0 for success or -1 for error. +*/ +int _zoomSurfaceRGBA(SDL_Surface * src, SDL_Surface * dst, int flipx, int flipy, int smooth) +{ + int x, y, sx, sy, ssx, ssy, *sax, *say, *csax, *csay, *salast, csx, csy, ex, ey, cx, cy, sstep, sstepx, sstepy; + SDL_Color *c00, *c01, *c10, *c11; + SDL_Color *sp, *csp, *dp; + int spixelgap, spixelw, spixelh, dgap, t1, t2; + + /* + * Allocate memory for row/column increments + */ + if ((sax = (int *) malloc((dst->w + 1) * sizeof(Uint32))) == NULL) { + return (-1); + } + if ((say = (int *) malloc((dst->h + 1) * sizeof(Uint32))) == NULL) { + free(sax); + return (-1); + } + + /* + * Precalculate row increments + */ + spixelw = (src->w - 1); + spixelh = (src->h - 1); + if (smooth) { + sx = (int) (65536.0 * (float) spixelw / (float) (dst->w - 1)); + sy = (int) (65536.0 * (float) spixelh / (float) (dst->h - 1)); + } else { + sx = (int) (65536.0 * (float) (src->w) / (float) (dst->w)); + sy = (int) (65536.0 * (float) (src->h) / (float) (dst->h)); + } + + /* Maximum scaled source size */ + ssx = (src->w << 16) - 1; + ssy = (src->h << 16) - 1; + + /* Precalculate horizontal row increments */ + csx = 0; + csax = sax; + for (x = 0; x <= dst->w; x++) { + *csax = csx; + csax++; + csx += sx; + + /* Guard from overflows */ + if (csx > ssx) { + csx = ssx; + } + } + + /* Precalculate vertical row increments */ + csy = 0; + csay = say; + for (y = 0; y <= dst->h; y++) { + *csay = csy; + csay++; + csy += sy; + + /* Guard from overflows */ + if (csy > ssy) { + csy = ssy; + } + } + + sp = (SDL_Color *) src->pixels; + dp = (SDL_Color *) dst->pixels; + dgap = dst->pitch - dst->w * 4; + spixelgap = src->pitch/4; + + if (flipx) sp += spixelw; + if (flipy) sp += (spixelgap * spixelh); + + /* + * Switch between interpolating and non-interpolating code + */ + if (smooth) { + + /* + * Interpolating Zoom + */ + csay = say; + for (y = 0; y < dst->h; y++) { + csp = sp; + csax = sax; + for (x = 0; x < dst->w; x++) { + /* + * Setup color source pointers + */ + ex = (*csax & 0xffff); + ey = (*csay & 0xffff); + cx = (*csax >> 16); + cy = (*csay >> 16); + sstepx = cx < spixelw; + sstepy = cy < spixelh; + c00 = sp; + c01 = sp; + c10 = sp; + if (sstepy) { + if (flipy) { + c10 -= spixelgap; + } else { + c10 += spixelgap; + } + } + c11 = c10; + if (sstepx) { + if (flipx) { + c01--; + c11--; + } else { + c01++; + c11++; + } + } + + /* + * Draw and interpolate colors + */ + t1 = ((((c01->r - c00->r) * ex) >> 16) + c00->r) & 0xff; + t2 = ((((c11->r - c10->r) * ex) >> 16) + c10->r) & 0xff; + dp->r = (((t2 - t1) * ey) >> 16) + t1; + t1 = ((((c01->g - c00->g) * ex) >> 16) + c00->g) & 0xff; + t2 = ((((c11->g - c10->g) * ex) >> 16) + c10->g) & 0xff; + dp->g = (((t2 - t1) * ey) >> 16) + t1; + t1 = ((((c01->b - c00->b) * ex) >> 16) + c00->b) & 0xff; + t2 = ((((c11->b - c10->b) * ex) >> 16) + c10->b) & 0xff; + dp->b = (((t2 - t1) * ey) >> 16) + t1; + t1 = ((((c01->a - c00->a) * ex) >> 16) + c00->a) & 0xff; + t2 = ((((c11->a - c10->a) * ex) >> 16) + c10->a) & 0xff; + dp->a = (((t2 - t1) * ey) >> 16) + t1; + /* + * Advance source pointer x + */ + salast = csax; + csax++; + sstep = (*csax >> 16) - (*salast >> 16); + if (flipx) { + sp -= sstep; + } else { + sp += sstep; + } + + /* + * Advance destination pointer x + */ + dp++; + } + /* + * Advance source pointer y + */ + salast = csay; + csay++; + sstep = (*csay >> 16) - (*salast >> 16); + sstep *= spixelgap; + if (flipy) { + sp = csp - sstep; + } else { + sp = csp + sstep; + } + + /* + * Advance destination pointer y + */ + dp = (SDL_Color *) ((Uint8 *) dp + dgap); + } + } else { + /* + * Non-Interpolating Zoom + */ + csay = say; + for (y = 0; y < dst->h; y++) { + csp = sp; + csax = sax; + for (x = 0; x < dst->w; x++) { + /* + * Draw + */ + *dp = *sp; + + /* + * Advance source pointer x + */ + salast = csax; + csax++; + sstep = (*csax >> 16) - (*salast >> 16); + if (flipx) sstep = -sstep; + sp += sstep; + + /* + * Advance destination pointer x + */ + dp++; + } + /* + * Advance source pointer y + */ + salast = csay; + csay++; + sstep = (*csay >> 16) - (*salast >> 16); + sstep *= spixelgap; + if (flipy) sstep = -sstep; + sp = csp + sstep; + + /* + * Advance destination pointer y + */ + dp = (SDL_Color *) ((Uint8 *) dp + dgap); + } + } + + /* + * Remove temp arrays + */ + free(sax); + free(say); + + return (0); +} + +/*! + +\brief Internal 8 bit Zoomer without smoothing. + +Zooms 8bit palette/Y 'src' surface to 'dst' surface. +Assumes src and dst surfaces are of 8 bit depth. +Assumes dst surface was allocated with the correct dimensions. + +\param src The surface to zoom (input). +\param dst The zoomed surface (output). +\param flipx Flag indicating if the image should be horizontally flipped. +\param flipy Flag indicating if the image should be vertically flipped. + +\return 0 for success or -1 for error. +*/ +int _zoomSurfaceY(SDL_Surface * src, SDL_Surface * dst, int flipx, int flipy) +{ + int x, y; + Uint32 *sax, *say, *csax, *csay; + int csx, csy; + Uint8 *sp, *dp, *csp; + int dgap; + + /* + * Allocate memory for row increments + */ + if ((sax = (Uint32 *) malloc((dst->w + 1) * sizeof(Uint32))) == NULL) { + return (-1); + } + if ((say = (Uint32 *) malloc((dst->h + 1) * sizeof(Uint32))) == NULL) { + free(sax); + return (-1); + } + + /* + * Pointer setup + */ + sp = csp = (Uint8 *) src->pixels; + dp = (Uint8 *) dst->pixels; + dgap = dst->pitch - dst->w; + + if (flipx) csp += (src->w-1); + if (flipy) csp = ( (Uint8*)csp + src->pitch*(src->h-1) ); + + /* + * Precalculate row increments + */ + csx = 0; + csax = sax; + for (x = 0; x < dst->w; x++) { + csx += src->w; + *csax = 0; + while (csx >= dst->w) { + csx -= dst->w; + (*csax)++; + } + (*csax) = (*csax) * (flipx ? -1 : 1); + csax++; + } + csy = 0; + csay = say; + for (y = 0; y < dst->h; y++) { + csy += src->h; + *csay = 0; + while (csy >= dst->h) { + csy -= dst->h; + (*csay)++; + } + (*csay) = (*csay) * (flipy ? -1 : 1); + csay++; + } + + /* + * Draw + */ + csay = say; + for (y = 0; y < dst->h; y++) { + csax = sax; + sp = csp; + for (x = 0; x < dst->w; x++) { + /* + * Draw + */ + *dp = *sp; + /* + * Advance source pointers + */ + sp += (*csax); + csax++; + /* + * Advance destination pointer + */ + dp++; + } + /* + * Advance source pointer (for row) + */ + csp += ((*csay) * src->pitch); + csay++; + + /* + * Advance destination pointers + */ + dp += dgap; + } + + /* + * Remove temp arrays + */ + free(sax); + free(say); + + return (0); +} + +/*! +\brief Internal 32 bit rotozoomer with optional anti-aliasing. + +Rotates and zooms 32 bit RGBA/ABGR 'src' surface to 'dst' surface based on the control +parameters by scanning the destination surface and applying optionally anti-aliasing +by bilinear interpolation. +Assumes src and dst surfaces are of 32 bit depth. +Assumes dst surface was allocated with the correct dimensions. + +\param src Source surface. +\param dst Destination surface. +\param cx Horizontal center coordinate. +\param cy Vertical center coordinate. +\param isin Integer version of sine of angle. +\param icos Integer version of cosine of angle. +\param flipx Flag indicating horizontal mirroring should be applied. +\param flipy Flag indicating vertical mirroring should be applied. +\param smooth Flag indicating anti-aliasing should be used. +*/ +void _transformSurfaceRGBA(SDL_Surface * src, SDL_Surface * dst, int cx, int cy, int isin, int icos, int flipx, int flipy, int smooth) +{ + int x, y, t1, t2, dx, dy, xd, yd, sdx, sdy, ax, ay, ex, ey, sw, sh; + SDL_Color c00, c01, c10, c11, cswap; + SDL_Color *pc, *sp; + int gap; + + /* + * Variable setup + */ + xd = ((src->w - dst->w) << 15); + yd = ((src->h - dst->h) << 15); + ax = (cx << 16) - (icos * cx); + ay = (cy << 16) - (isin * cx); + sw = src->w - 1; + sh = src->h - 1; + pc = (SDL_Color *) dst->pixels; + gap = dst->pitch - dst->w * 4; + + /* + * Switch between interpolating and non-interpolating code + */ + if (smooth) { + for (y = 0; y < dst->h; y++) { + dy = cy - y; + sdx = (ax + (isin * dy)) + xd; + sdy = (ay - (icos * dy)) + yd; + for (x = 0; x < dst->w; x++) { + dx = (sdx >> 16); + dy = (sdy >> 16); + if (flipx) dx = sw - dx; + if (flipy) dy = sh - dy; + if ((dx > -1) && (dy > -1) && (dx < (src->w-1)) && (dy < (src->h-1))) { + sp = (SDL_Color *)src->pixels;; + sp += ((src->pitch/4) * dy); + sp += dx; + c00 = *sp; + sp += 1; + c01 = *sp; + sp += (src->pitch/4); + c11 = *sp; + sp -= 1; + c10 = *sp; + if (flipx) { + cswap = c00; c00=c01; c01=cswap; + cswap = c10; c10=c11; c11=cswap; + } + if (flipy) { + cswap = c00; c00=c10; c10=cswap; + cswap = c01; c01=c11; c11=cswap; + } + /* + * Interpolate colors + */ + ex = (sdx & 0xffff); + ey = (sdy & 0xffff); + t1 = ((((c01.r - c00.r) * ex) >> 16) + c00.r) & 0xff; + t2 = ((((c11.r - c10.r) * ex) >> 16) + c10.r) & 0xff; + pc->r = (((t2 - t1) * ey) >> 16) + t1; + t1 = ((((c01.g - c00.g) * ex) >> 16) + c00.g) & 0xff; + t2 = ((((c11.g - c10.g) * ex) >> 16) + c10.g) & 0xff; + pc->g = (((t2 - t1) * ey) >> 16) + t1; + t1 = ((((c01.b - c00.b) * ex) >> 16) + c00.b) & 0xff; + t2 = ((((c11.b - c10.b) * ex) >> 16) + c10.b) & 0xff; + pc->b = (((t2 - t1) * ey) >> 16) + t1; + t1 = ((((c01.a - c00.a) * ex) >> 16) + c00.a) & 0xff; + t2 = ((((c11.a - c10.a) * ex) >> 16) + c10.a) & 0xff; + pc->a = (((t2 - t1) * ey) >> 16) + t1; + } + sdx += icos; + sdy += isin; + pc++; + } + pc = (SDL_Color *) ((Uint8 *) pc + gap); + } + } else { + for (y = 0; y < dst->h; y++) { + dy = cy - y; + sdx = (ax + (isin * dy)) + xd; + sdy = (ay - (icos * dy)) + yd; + for (x = 0; x < dst->w; x++) { + dx = (short) (sdx >> 16); + dy = (short) (sdy >> 16); + if (flipx) dx = (src->w-1)-dx; + if (flipy) dy = (src->h-1)-dy; + if ((dx >= 0) && (dy >= 0) && (dx < src->w) && (dy < src->h)) { + sp = (SDL_Color *) ((Uint8 *) src->pixels + src->pitch * dy); + sp += dx; + *pc = *sp; + } + sdx += icos; + sdy += isin; + pc++; + } + pc = (SDL_Color *) ((Uint8 *) pc + gap); + } + } +} + +/*! + +\brief Rotates and zooms 8 bit palette/Y 'src' surface to 'dst' surface without smoothing. + +Rotates and zooms 8 bit RGBA/ABGR 'src' surface to 'dst' surface based on the control +parameters by scanning the destination surface. +Assumes src and dst surfaces are of 8 bit depth. +Assumes dst surface was allocated with the correct dimensions. + +\param src Source surface. +\param dst Destination surface. +\param cx Horizontal center coordinate. +\param cy Vertical center coordinate. +\param isin Integer version of sine of angle. +\param icos Integer version of cosine of angle. +\param flipx Flag indicating horizontal mirroring should be applied. +\param flipy Flag indicating vertical mirroring should be applied. +*/ +void transformSurfaceY(SDL_Surface * src, SDL_Surface * dst, int cx, int cy, int isin, int icos, int flipx, int flipy) +{ + int x, y, dx, dy, xd, yd, sdx, sdy, ax, ay; + Uint8 *pc, *sp; + int gap; + + /* + * Variable setup + */ + xd = ((src->w - dst->w) << 15); + yd = ((src->h - dst->h) << 15); + ax = (cx << 16) - (icos * cx); + ay = (cy << 16) - (isin * cx); + pc = (Uint8 *) dst->pixels; + gap = dst->pitch - dst->w; + /* + * Clear surface to colorkey + */ + memset(pc, (int)(_colorkey(src) & 0xff), dst->pitch * dst->h); + /* + * Iterate through destination surface + */ + for (y = 0; y < dst->h; y++) { + dy = cy - y; + sdx = (ax + (isin * dy)) + xd; + sdy = (ay - (icos * dy)) + yd; + for (x = 0; x < dst->w; x++) { + dx = (short) (sdx >> 16); + dy = (short) (sdy >> 16); + if (flipx) dx = (src->w-1)-dx; + if (flipy) dy = (src->h-1)-dy; + if ((dx >= 0) && (dy >= 0) && (dx < src->w) && (dy < src->h)) { + sp = (Uint8 *) (src->pixels); + sp += (src->pitch * dy + dx); + *pc = *sp; + } + sdx += icos; + sdy += isin; + pc++; + } + pc += gap; + } +} + +/*! +\brief Rotates a 8/16/24/32 bit surface in increments of 90 degrees. + +Specialized 90 degree rotator which rotates a 'src' surface in 90 degree +increments clockwise returning a new surface. Faster than rotozoomer since +no scanning or interpolation takes place. Input surface must be 8/16/24/32 bit. +(code contributed by J. Schiller, improved by C. Allport and A. Schiffler) + +\param src Source surface to rotate. +\param numClockwiseTurns Number of clockwise 90 degree turns to apply to the source. + +\returns The new, rotated surface; or NULL for surfaces with incorrect input format. +*/ +SDL_Surface* rotateSurface90Degrees(SDL_Surface* src, int numClockwiseTurns) +{ + int row, col, newWidth, newHeight; + int bpp, bpr; + SDL_Surface* dst; + Uint8* srcBuf; + Uint8* dstBuf; + int normalizedClockwiseTurns; + const SDL_PixelFormatDetails* details; + + /* Has to be a valid surface pointer and be a Nbit surface where n is divisible by 8 */ + if (!src || + !src->format) { + SDL_SetError("NULL source surface or source surface format"); + return NULL; + } + + details = SDL_GetPixelFormatDetails(src->format); + if ((details->bits_per_pixel % 8) != 0) { + SDL_SetError("Invalid source surface bit depth"); + return NULL; + } + + /* normalize numClockwiseTurns */ + normalizedClockwiseTurns = (numClockwiseTurns % 4); + if (normalizedClockwiseTurns < 0) { + normalizedClockwiseTurns += 4; + } + + /* If turns are even, our new width/height will be the same as the source surface */ + if (normalizedClockwiseTurns % 2) { + newWidth = src->h; + newHeight = src->w; + } else { + newWidth = src->w; + newHeight = src->h; + } + + dst = SDL_CreateSurface(newWidth, newHeight, src->format); + if(!dst) { + SDL_SetError("Could not create destination surface"); + return NULL; + } + + if (SDL_MUSTLOCK(src)) { + SDL_LockSurface(src); + } + if (SDL_MUSTLOCK(dst)) { + SDL_LockSurface(dst); + } + + /* Calculate byte-per-pixel */ + bpp = details->bits_per_pixel / 8; + + switch(normalizedClockwiseTurns) { + case 0: /* Make a copy of the surface */ + { + /* Unfortunately SDL_BlitSurface cannot be used to make a copy of the surface + since it does not preserve alpha. */ + + if (src->pitch == dst->pitch) { + /* If the pitch is the same for both surfaces, the memory can be copied all at once. */ + memcpy(dst->pixels, src->pixels, (src->h * src->pitch)); + } + else + { + /* If the pitch differs, copy each row separately */ + srcBuf = (Uint8*)(src->pixels); + dstBuf = (Uint8*)(dst->pixels); + bpr = src->w * bpp; + for (row = 0; row < src->h; row++) { + memcpy(dstBuf, srcBuf, bpr); + srcBuf += src->pitch; + dstBuf += dst->pitch; + } + } + } + break; + + /* rotate clockwise */ + case 1: /* rotated 90 degrees clockwise */ + { + for (row = 0; row < src->h; ++row) { + srcBuf = (Uint8*)(src->pixels) + (row * src->pitch); + dstBuf = (Uint8*)(dst->pixels) + (dst->w - row - 1) * bpp; + for (col = 0; col < src->w; ++col) { + memcpy (dstBuf, srcBuf, bpp); + srcBuf += bpp; + dstBuf += dst->pitch; + } + } + } + break; + + case 2: /* rotated 180 degrees clockwise */ + { + for (row = 0; row < src->h; ++row) { + srcBuf = (Uint8*)(src->pixels) + (row * src->pitch); + dstBuf = (Uint8*)(dst->pixels) + ((dst->h - row - 1) * dst->pitch) + (dst->w - 1) * bpp; + for (col = 0; col < src->w; ++col) { + memcpy (dstBuf, srcBuf, bpp); + srcBuf += bpp; + dstBuf -= bpp; + } + } + } + break; + + case 3: /* rotated 270 degrees clockwise */ + { + for (row = 0; row < src->h; ++row) { + srcBuf = (Uint8*)(src->pixels) + (row * src->pitch); + dstBuf = (Uint8*)(dst->pixels) + (row * bpp) + ((dst->h - 1) * dst->pitch); + for (col = 0; col < src->w; ++col) { + memcpy (dstBuf, srcBuf, bpp); + srcBuf += bpp; + dstBuf -= dst->pitch; + } + } + } + break; + } + /* end switch */ + + if (SDL_MUSTLOCK(src)) { + SDL_UnlockSurface(src); + } + if (SDL_MUSTLOCK(dst)) { + SDL_UnlockSurface(dst); + } + + return dst; +} + + +/*! +\brief Internal target surface sizing function for rotozooms with trig result return. + +\param width The source surface width. +\param height The source surface height. +\param angle The angle to rotate in degrees. +\param zoomx The horizontal scaling factor. +\param zoomy The vertical scaling factor. +\param dstwidth The calculated width of the destination surface. +\param dstheight The calculated height of the destination surface. +\param canglezoom The sine of the angle adjusted by the zoom factor. +\param sanglezoom The cosine of the angle adjusted by the zoom factor. + +*/ +void _rotozoomSurfaceSizeTrig(int width, int height, double angle, double zoomx, double zoomy, + int *dstwidth, int *dstheight, + double *canglezoom, double *sanglezoom) +{ + double x, y, cx, cy, sx, sy; + double radangle; + int dstwidthhalf, dstheighthalf; + + /* + * Determine destination width and height by rotating a centered source box + */ + radangle = angle * (M_PI / 180.0); + *sanglezoom = sin(radangle); + *canglezoom = cos(radangle); + *sanglezoom *= zoomx; + *canglezoom *= zoomy; + x = (double)(width / 2); + y = (double)(height / 2); + cx = *canglezoom * x; + cy = *canglezoom * y; + sx = *sanglezoom * x; + sy = *sanglezoom * y; + + dstwidthhalf = MAX((int) + ceil(MAX(MAX(MAX(fabs(cx + sy), fabs(cx - sy)), fabs(-cx + sy)), fabs(-cx - sy))), 1); + dstheighthalf = MAX((int) + ceil(MAX(MAX(MAX(fabs(sx + cy), fabs(sx - cy)), fabs(-sx + cy)), fabs(-sx - cy))), 1); + *dstwidth = 2 * dstwidthhalf; + *dstheight = 2 * dstheighthalf; +} + +/*! +\brief Returns the size of the resulting target surface for a rotozoomSurfaceXY() call. + +\param width The source surface width. +\param height The source surface height. +\param angle The angle to rotate in degrees. +\param zoomx The horizontal scaling factor. +\param zoomy The vertical scaling factor. +\param dstwidth The calculated width of the rotozoomed destination surface. +\param dstheight The calculated height of the rotozoomed destination surface. +*/ +void rotozoomSurfaceSizeXY(int width, int height, double angle, double zoomx, double zoomy, int *dstwidth, int *dstheight) +{ + double dummy_sanglezoom, dummy_canglezoom; + + _rotozoomSurfaceSizeTrig(width, height, angle, zoomx, zoomy, dstwidth, dstheight, &dummy_sanglezoom, &dummy_canglezoom); +} + +/*! +\brief Returns the size of the resulting target surface for a rotozoomSurface() call. + +\param width The source surface width. +\param height The source surface height. +\param angle The angle to rotate in degrees. +\param zoom The scaling factor. +\param dstwidth The calculated width of the rotozoomed destination surface. +\param dstheight The calculated height of the rotozoomed destination surface. +*/ +void rotozoomSurfaceSize(int width, int height, double angle, double zoom, int *dstwidth, int *dstheight) +{ + double dummy_sanglezoom, dummy_canglezoom; + + _rotozoomSurfaceSizeTrig(width, height, angle, zoom, zoom, dstwidth, dstheight, &dummy_sanglezoom, &dummy_canglezoom); +} + +/*! +\brief Rotates and zooms a surface and optional anti-aliasing. + +Rotates and zoomes a 32bit or 8bit 'src' surface to newly created 'dst' surface. +'angle' is the rotation in degrees and 'zoom' a scaling factor. If 'smooth' is set +then the destination 32bit surface is anti-aliased. If the surface is not 8bit +or 32bit RGBA/ABGR it will be converted into a 32bit RGBA format on the fly. + +\param src The surface to rotozoom. +\param angle The angle to rotate in degrees. +\param zoom The scaling factor. +\param smooth Antialiasing flag; set to SMOOTHING_ON to enable. + +\return The new rotozoomed surface. +*/ +SDL_Surface *rotozoomSurface(SDL_Surface * src, double angle, double zoom, int smooth) +{ + return rotozoomSurfaceXY(src, angle, zoom, zoom, smooth); +} + +/*! +\brief Rotates and zooms a surface with different horizontal and vertival scaling factors and optional anti-aliasing. + +Rotates and zooms a 32bit or 8bit 'src' surface to newly created 'dst' surface. +'angle' is the rotation in degrees, 'zoomx and 'zoomy' scaling factors. If 'smooth' is set +then the destination 32bit surface is anti-aliased. If the surface is not 8bit +or 32bit RGBA/ABGR it will be converted into a 32bit RGBA format on the fly. + +\param src The surface to rotozoom. +\param angle The angle to rotate in degrees. +\param zoomx The horizontal scaling factor. +\param zoomy The vertical scaling factor. +\param smooth Antialiasing flag; set to SMOOTHING_ON to enable. + +\return The new rotozoomed surface. +*/ +SDL_Surface *rotozoomSurfaceXY(SDL_Surface * src, double angle, double zoomx, double zoomy, int smooth) +{ + SDL_Surface *rz_src; + SDL_Surface *rz_dst; + double zoominv; + double sanglezoom, canglezoom, sanglezoominv, canglezoominv; + int dstwidthhalf, dstwidth, dstheighthalf, dstheight; + int is32bit; + int i, src_converted; + int flipx,flipy; + const SDL_PixelFormatDetails* details; + SDL_Palette* pal_dst; + SDL_Palette* pal_src; + + /* + * Sanity check + */ + if (src == NULL) { + return (NULL); + } + + /* + * Determine if source surface is 32bit or 8bit + */ + details = SDL_GetPixelFormatDetails(src->format); + is32bit = (details->bits_per_pixel == 32); + if ((is32bit) || (details->bits_per_pixel == 8)) { + /* + * Use source surface 'as is' + */ + rz_src = src; + src_converted = 0; + } else { + /* + * New source surface is 32bit with a defined RGBA ordering + */ + rz_src = + SDL_CreateSurface(src->w, src->h, SDL_PIXELFORMAT_RGBA32); + + SDL_BlitSurface(src, NULL, rz_src, NULL); + + src_converted = 1; + is32bit = 1; + } + + /* + * Sanity check zoom factor + */ + flipx = (zoomx<0.0); + if (flipx) zoomx=-zoomx; + flipy = (zoomy<0.0); + if (flipy) zoomy=-zoomy; + if (zoomx < VALUE_LIMIT) zoomx = VALUE_LIMIT; + if (zoomy < VALUE_LIMIT) zoomy = VALUE_LIMIT; + zoominv = 65536.0 / (zoomx * zoomx); + + /* + * Check if we have a rotozoom or just a zoom + */ + if (fabs(angle) > VALUE_LIMIT) { + + /* + * Angle!=0: full rotozoom + */ + /* + * ----------------------- + */ + + /* Determine target size */ + _rotozoomSurfaceSizeTrig(rz_src->w, rz_src->h, angle, zoomx, zoomy, &dstwidth, &dstheight, &canglezoom, &sanglezoom); + + /* + * Calculate target factors from sin/cos and zoom + */ + sanglezoominv = sanglezoom; + canglezoominv = canglezoom; + sanglezoominv *= zoominv; + canglezoominv *= zoominv; + + /* Calculate half size */ + dstwidthhalf = dstwidth / 2; + dstheighthalf = dstheight / 2; + + /* + * Alloc space to completely contain the rotated surface + */ + rz_dst = NULL; + if (is32bit) { + /* + * Target surface is 32bit with source RGBA/ABGR ordering + */ + rz_dst = + SDL_CreateSurface(dstwidth, dstheight + GUARD_ROWS, rz_src->format); + } else { + /* + * Target surface is 8bit + */ + rz_dst = SDL_CreateSurface(dstwidth, dstheight + GUARD_ROWS, SDL_PIXELFORMAT_INDEX8); + pal_dst = SDL_CreateSurfacePalette(rz_dst); + } + + /* Check target */ + if (rz_dst == NULL) + return NULL; + + /* Adjust for guard rows */ + rz_dst->h = dstheight; + + /* + * Lock source surface + */ + if (SDL_MUSTLOCK(rz_src)) { + SDL_LockSurface(rz_src); + } + + /* + * Check which kind of surface we have + */ + if (is32bit) { + /* + * Call the 32bit transformation routine to do the rotation (using alpha) + */ + _transformSurfaceRGBA(rz_src, rz_dst, dstwidthhalf, dstheighthalf, + (int) (sanglezoominv), (int) (canglezoominv), + flipx, flipy, + smooth); + } else { + /* + * Copy palette and colorkey info + */ + pal_src = SDL_GetSurfacePalette(rz_src); + for (i = 0; i < pal_src->ncolors; i++) { + pal_dst->colors[i] = pal_src->colors[i]; + } + pal_dst->ncolors = pal_src->ncolors; + /* + * Call the 8bit transformation routine to do the rotation + */ + transformSurfaceY(rz_src, rz_dst, dstwidthhalf, dstheighthalf, + (int) (sanglezoominv), (int) (canglezoominv), + flipx, flipy); + } + /* + * Unlock source surface + */ + if (SDL_MUSTLOCK(rz_src)) { + SDL_UnlockSurface(rz_src); + } + + } else { + + /* + * Angle=0: Just a zoom + */ + /* + * -------------------- + */ + + /* + * Calculate target size + */ + zoomSurfaceSize(rz_src->w, rz_src->h, zoomx, zoomy, &dstwidth, &dstheight); + + /* + * Alloc space to completely contain the zoomed surface + */ + rz_dst = NULL; + if (is32bit) { + /* + * Target surface is 32bit with source RGBA/ABGR ordering + */ + rz_dst = + SDL_CreateSurface(dstwidth, dstheight + GUARD_ROWS, rz_src->format); + } else { + /* + * Target surface is 8bit + */ + rz_dst = SDL_CreateSurface(dstwidth, dstheight + GUARD_ROWS, SDL_PIXELFORMAT_INDEX8); + pal_dst = SDL_CreateSurfacePalette(rz_dst); + } + + /* Check target */ + if (rz_dst == NULL) + return NULL; + + /* Adjust for guard rows */ + rz_dst->h = dstheight; + + /* + * Lock source surface + */ + if (SDL_MUSTLOCK(rz_src)) { + SDL_LockSurface(rz_src); + } + + /* + * Check which kind of surface we have + */ + if (is32bit) { + /* + * Call the 32bit transformation routine to do the zooming (using alpha) + */ + _zoomSurfaceRGBA(rz_src, rz_dst, flipx, flipy, smooth); + + } else { + /* + * Copy palette and colorkey info + */ + pal_src = SDL_GetSurfacePalette(rz_src); + for (i = 0; i < pal_src->ncolors; i++) { + pal_dst->colors[i] = pal_src->colors[i]; + } + pal_dst->ncolors = pal_src->ncolors; + + /* + * Call the 8bit transformation routine to do the zooming + */ + _zoomSurfaceY(rz_src, rz_dst, flipx, flipy); + } + + /* + * Unlock source surface + */ + if (SDL_MUSTLOCK(rz_src)) { + SDL_UnlockSurface(rz_src); + } + } + + /* + * Cleanup temp surface + */ + if (src_converted) { + SDL_DestroySurface(rz_src); + } + + /* + * Return destination surface + */ + return (rz_dst); +} + +/*! +\brief Calculates the size of the target surface for a zoomSurface() call. + +The minimum size of the target surface is 1. The input factors can be positive or negative. + +\param width The width of the source surface to zoom. +\param height The height of the source surface to zoom. +\param zoomx The horizontal zoom factor. +\param zoomy The vertical zoom factor. +\param dstwidth Pointer to an integer to store the calculated width of the zoomed target surface. +\param dstheight Pointer to an integer to store the calculated height of the zoomed target surface. +*/ +void zoomSurfaceSize(int width, int height, double zoomx, double zoomy, int *dstwidth, int *dstheight) +{ + /* + * Make zoom factors positive + */ + int flipx, flipy; + flipx = (zoomx<0.0); + if (flipx) zoomx = -zoomx; + flipy = (zoomy<0.0); + if (flipy) zoomy = -zoomy; + + /* + * Sanity check zoom factors + */ + if (zoomx < VALUE_LIMIT) { + zoomx = VALUE_LIMIT; + } + if (zoomy < VALUE_LIMIT) { + zoomy = VALUE_LIMIT; + } + + /* + * Calculate target size + */ + *dstwidth = (int) floor(((double) width * zoomx) + 0.5); + *dstheight = (int) floor(((double) height * zoomy) + 0.5); + if (*dstwidth < 1) { + *dstwidth = 1; + } + if (*dstheight < 1) { + *dstheight = 1; + } +} + +/*! +\brief Zoom a surface by independent horizontal and vertical factors with optional smoothing. + +Zooms a 32bit or 8bit 'src' surface to newly created 'dst' surface. +'zoomx' and 'zoomy' are scaling factors for width and height. If 'smooth' is on +then the destination 32bit surface is anti-aliased. If the surface is not 8bit +or 32bit RGBA/ABGR it will be converted into a 32bit RGBA format on the fly. +If zoom factors are negative, the image is flipped on the axes. + +\param src The surface to zoom. +\param zoomx The horizontal zoom factor. +\param zoomy The vertical zoom factor. +\param smooth Antialiasing flag; set to SMOOTHING_ON to enable. + +\return The new, zoomed surface. +*/ +SDL_Surface *zoomSurface(SDL_Surface * src, double zoomx, double zoomy, int smooth) +{ + SDL_Surface *rz_src; + SDL_Surface *rz_dst; + int dstwidth, dstheight; + int is32bit; + int i, src_converted; + int flipx, flipy; + const SDL_PixelFormatDetails* details; + SDL_Palette* pal_src; + SDL_Palette* pal_dst; + + /* + * Sanity check + */ + if (src == NULL) + return (NULL); + + /* + * Determine if source surface is 32bit or 8bit + */ + details = SDL_GetPixelFormatDetails(src->format); + is32bit = (details->bits_per_pixel == 32); + if ((is32bit) || (details->bits_per_pixel == 8)) { + /* + * Use source surface 'as is' + */ + rz_src = src; + src_converted = 0; + } else { + /* + * New source surface is 32bit with a defined RGBA ordering + */ + rz_src = + SDL_CreateSurface(src->w, src->h, SDL_PIXELFORMAT_RGBA32); + if (rz_src == NULL) { + return NULL; + } + SDL_BlitSurface(src, NULL, rz_src, NULL); + src_converted = 1; + is32bit = 1; + } + + flipx = (zoomx<0.0); + if (flipx) zoomx = -zoomx; + flipy = (zoomy<0.0); + if (flipy) zoomy = -zoomy; + + /* Get size if target */ + zoomSurfaceSize(rz_src->w, rz_src->h, zoomx, zoomy, &dstwidth, &dstheight); + + /* + * Alloc space to completely contain the zoomed surface + */ + rz_dst = NULL; + if (is32bit) { + /* + * Target surface is 32bit with source RGBA/ABGR ordering + */ + rz_dst = + SDL_CreateSurface(dstwidth, dstheight + GUARD_ROWS, rz_src->format); + } else { + /* + * Target surface is 8bit + */ + rz_dst = SDL_CreateSurface(dstwidth, dstheight + GUARD_ROWS, SDL_PIXELFORMAT_INDEX8); + pal_dst = SDL_CreateSurfacePalette(rz_dst); + } + + /* Check target */ + if (rz_dst == NULL) { + /* + * Cleanup temp surface + */ + if (src_converted) { + SDL_DestroySurface(rz_src); + } + return NULL; + } + + /* Adjust for guard rows */ + rz_dst->h = dstheight; + + /* + * Lock source surface + */ + if (SDL_MUSTLOCK(rz_src)) { + SDL_LockSurface(rz_src); + } + + /* + * Check which kind of surface we have + */ + if (is32bit) { + /* + * Call the 32bit transformation routine to do the zooming (using alpha) + */ + _zoomSurfaceRGBA(rz_src, rz_dst, flipx, flipy, smooth); + } else { + /* + * Copy palette and colorkey info + */ + pal_src = SDL_GetSurfacePalette(rz_src); + for (i = 0; i < pal_src->ncolors; i++) { + pal_dst->colors[i] = pal_src->colors[i]; + } + pal_dst->ncolors = pal_src->ncolors; + /* + * Call the 8bit transformation routine to do the zooming + */ + _zoomSurfaceY(rz_src, rz_dst, flipx, flipy); + } + /* + * Unlock source surface + */ + if (SDL_MUSTLOCK(rz_src)) { + SDL_UnlockSurface(rz_src); + } + + /* + * Cleanup temp surface + */ + if (src_converted) { + SDL_DestroySurface(rz_src); + } + + /* + * Return destination surface + */ + return (rz_dst); +} + +/*! +\brief Shrink a surface by an integer ratio using averaging. + +Shrinks a 32bit or 8bit 'src' surface to a newly created 'dst' surface. +'factorx' and 'factory' are the shrinking ratios (i.e. 2=1/2 the size, +3=1/3 the size, etc.) The destination surface is antialiased by averaging +the source box RGBA or Y information. If the surface is not 8bit +or 32bit RGBA/ABGR it will be converted into a 32bit RGBA format on the fly. +The input surface is not modified. The output surface is newly allocated. + +\param src The surface to shrink. +\param factorx The horizontal shrinking ratio. +\param factory The vertical shrinking ratio. + +\return The new, shrunken surface. +*/ +/*@null@*/ +SDL_Surface *shrinkSurface(SDL_Surface *src, int factorx, int factory) +{ + int result; + SDL_Surface *rz_src; + SDL_Surface *rz_dst = NULL; + int dstwidth, dstheight; + int is32bit; + int i, src_converted; + int haveError = 0; + const SDL_PixelFormatDetails* details; + SDL_Palette* pal_src; + SDL_Palette* pal_dst; + + /* + * Sanity check + */ + if (src == NULL) { + return (NULL); + } + + /* + * Determine if source surface is 32bit or 8bit + */ + details = SDL_GetPixelFormatDetails(src->format); + is32bit = (details->bits_per_pixel == 32); + if ((is32bit) || (details->bits_per_pixel == 8)) { + /* + * Use source surface 'as is' + */ + rz_src = src; + src_converted = 0; + } else { + /* + * New source surface is 32bit with a defined RGBA ordering + */ + rz_src = SDL_CreateSurface(src->w, src->h, SDL_PIXELFORMAT_RGBA32); + if (rz_src==NULL) { + haveError = 1; + goto exitShrinkSurface; + } + + SDL_BlitSurface(src, NULL, rz_src, NULL); + src_converted = 1; + is32bit = 1; + } + + /* + * Lock the surface + */ + if (SDL_MUSTLOCK(rz_src)) { + if (!SDL_LockSurface(rz_src)) { + haveError = 1; + goto exitShrinkSurface; + } + } + + /* Get size for target */ + dstwidth=rz_src->w/factorx; + while (dstwidth*factorx>rz_src->w) { dstwidth--; } + dstheight=rz_src->h/factory; + while (dstheight*factory>rz_src->h) { dstheight--; } + + /* + * Alloc space to completely contain the shrunken surface + * (with added guard rows) + */ + if (is32bit==1) { + /* + * Target surface is 32bit with source RGBA/ABGR ordering + */ + rz_dst = + SDL_CreateSurface(dstwidth, dstheight + GUARD_ROWS, rz_src->format); + } else { + /* + * Target surface is 8bit + */ + rz_dst = SDL_CreateSurface(dstwidth, dstheight + GUARD_ROWS, SDL_PIXELFORMAT_INDEX8); + pal_dst = SDL_CreateSurfacePalette(rz_dst); + } + + /* Check target */ + if (rz_dst == NULL) { + haveError = 1; + goto exitShrinkSurface; + } + + /* Adjust for guard rows */ + rz_dst->h = dstheight; + + /* + * Check which kind of surface we have + */ + if (is32bit==1) { + /* + * Call the 32bit transformation routine to do the shrinking (using alpha) + */ + result = _shrinkSurfaceRGBA(rz_src, rz_dst, factorx, factory); + if ((result!=0) || (rz_dst==NULL)) { + haveError = 1; + goto exitShrinkSurface; + } + } else { + /* + * Copy palette and colorkey info + */ + pal_src = SDL_GetSurfacePalette(rz_src); + for (i = 0; i < pal_src->ncolors; i++) { + pal_dst->colors[i] = pal_src->colors[i]; + } + pal_dst->ncolors = pal_src->ncolors; + /* + * Call the 8bit transformation routine to do the shrinking + */ + result = _shrinkSurfaceY(rz_src, rz_dst, factorx, factory); + if (result!=0) { + haveError = 1; + goto exitShrinkSurface; + } + } + +exitShrinkSurface: + if (rz_src!=NULL) { + /* + * Unlock source surface + */ + if (SDL_MUSTLOCK(rz_src)) { + SDL_UnlockSurface(rz_src); + } + + /* + * Cleanup temp surface + */ + if (src_converted==1) { + SDL_DestroySurface(rz_src); + } + } + + /* Check error state; maybe need to cleanup destination */ + if (haveError==1) { + if (rz_dst!=NULL) { + SDL_DestroySurface(rz_dst); + } + rz_dst=NULL; + } + + /* + * Return destination surface + */ + return (rz_dst); +} diff --git a/vendor/SDL3_gfx/SDL3_rotozoom.h b/vendor/SDL3_gfx/SDL3_rotozoom.h new file mode 100644 index 0000000..015e119 --- /dev/null +++ b/vendor/SDL3_gfx/SDL3_rotozoom.h @@ -0,0 +1,123 @@ +/* + +SDL3_rotozoom.c: rotozoomer, zoomer and shrinker for 32bit or 8bit surfaces + +Copyright (C) 2012-2014 Andreas Schiffler + +This software is provided 'as-is', without any express or implied +warranty. In no event will the authors be held liable for any damages +arising from the use of this software. + +Permission is granted to anyone to use this software for any purpose, +including commercial applications, and to alter it and redistribute it +freely, subject to the following restrictions: + +1. The origin of this software must not be misrepresented; you must not +claim that you wrote the original software. If you use this software +in a product, an acknowledgment in the product documentation would be +appreciated but is not required. + +2. Altered source versions must be plainly marked as such, and must not be +misrepresented as being the original software. + +3. This notice may not be removed or altered from any source +distribution. + +Andreas Schiffler -- aschiffler at ferzkopp dot net + +*/ + +#ifndef _SDL3_rotozoom_h +#define _SDL3_rotozoom_h + +#include + +/* Set up for C function definitions, even when using C++ */ +#ifdef __cplusplus +extern "C" { +#endif + +#ifndef M_PI +#define M_PI 3.1415926535897932384626433832795 +#endif + +#include + + /* ---- Defines */ + + /*! + \brief Disable anti-aliasing (no smoothing). + */ +#define SMOOTHING_OFF 0 + + /*! + \brief Enable anti-aliasing (smoothing). + */ +#define SMOOTHING_ON 1 + + /* ---- Function Prototypes */ + +#ifdef _MSC_VER +# if defined(DLL_EXPORT) && !defined(LIBSDL3_GFX_DLL_IMPORT) +# define SDL3_ROTOZOOM_SCOPE __declspec(dllexport) +# else +# ifdef LIBSDL3_GFX_DLL_IMPORT +# define SDL3_ROTOZOOM_SCOPE __declspec(dllimport) +# endif +# endif +#endif +#ifndef SDL3_ROTOZOOM_SCOPE +# define SDL3_ROTOZOOM_SCOPE extern +#endif + + /* + + Rotozoom functions + + */ + + SDL3_ROTOZOOM_SCOPE SDL_Surface *rotozoomSurface(SDL_Surface * src, double angle, double zoom, int smooth); + + SDL3_ROTOZOOM_SCOPE SDL_Surface *rotozoomSurfaceXY + (SDL_Surface * src, double angle, double zoomx, double zoomy, int smooth); + + + SDL3_ROTOZOOM_SCOPE void rotozoomSurfaceSize(int width, int height, double angle, double zoom, int *dstwidth, + int *dstheight); + + SDL3_ROTOZOOM_SCOPE void rotozoomSurfaceSizeXY + (int width, int height, double angle, double zoomx, double zoomy, + int *dstwidth, int *dstheight); + + /* + + Zooming functions + + */ + + SDL3_ROTOZOOM_SCOPE SDL_Surface *zoomSurface(SDL_Surface * src, double zoomx, double zoomy, int smooth); + + SDL3_ROTOZOOM_SCOPE void zoomSurfaceSize(int width, int height, double zoomx, double zoomy, int *dstwidth, int *dstheight); + + /* + + Shrinking functions + + */ + + SDL3_ROTOZOOM_SCOPE SDL_Surface *shrinkSurface(SDL_Surface * src, int factorx, int factory); + + /* + + Specialized rotation functions + + */ + + SDL3_ROTOZOOM_SCOPE SDL_Surface* rotateSurface90Degrees(SDL_Surface* src, int numClockwiseTurns); + + /* Ends C function definitions when using C++ */ +#ifdef __cplusplus +} +#endif + +#endif /* _SDL3_rotozoom_h */ diff --git a/vendor/clay/clay_renderer_SDL3.c b/vendor/clay/clay_renderer_SDL3.c index 3739726..5ccec48 100644 --- a/vendor/clay/clay_renderer_SDL3.c +++ b/vendor/clay/clay_renderer_SDL3.c @@ -1,247 +1,37 @@ -#include "clay.h" -#include -#include -#include -#include +#include "clay_renderer_SDL3.h" -typedef struct { - SDL_Renderer *renderer; - TTF_TextEngine *textEngine; - TTF_Font **fonts; -} Clay_SDL3RendererData; - -/* Global for convenience. Even in 4K this is enough for smooth curves (low radius or rect size coupled with - * no AA or low resolution might make it appear as jagged curves) */ -static int NUM_CIRCLE_SEGMENTS = 16; - -//all rendering is performed by a single SDL call, avoiding multiple RenderRect + plumbing choice for circles. -static void SDL_Clay_RenderFillRoundedRect(Clay_SDL3RendererData *rendererData, const SDL_FRect rect, const float cornerRadius, const Clay_Color _color) { - const SDL_FColor color = { _color.r/255, _color.g/255, _color.b/255, _color.a/255 }; - - int indexCount = 0, vertexCount = 0; - - const float minRadius = SDL_min(rect.w, rect.h) / 2.0f; - const float clampedRadius = SDL_min(cornerRadius, minRadius); - - const int numCircleSegments = SDL_max(NUM_CIRCLE_SEGMENTS, (int) clampedRadius * 0.5f); - - int totalVertices = 4 + (4 * (numCircleSegments * 2)) + 2*4; - int totalIndices = 6 + (4 * (numCircleSegments * 3)) + 6*4; - - SDL_Vertex vertices[totalVertices]; - int indices[totalIndices]; - - //define center rectangle - vertices[vertexCount++] = (SDL_Vertex){ {rect.x + clampedRadius, rect.y + clampedRadius}, color, {0, 0} }; //0 center TL - vertices[vertexCount++] = (SDL_Vertex){ {rect.x + rect.w - clampedRadius, rect.y + clampedRadius}, color, {1, 0} }; //1 center TR - vertices[vertexCount++] = (SDL_Vertex){ {rect.x + rect.w - clampedRadius, rect.y + rect.h - clampedRadius}, color, {1, 1} }; //2 center BR - vertices[vertexCount++] = (SDL_Vertex){ {rect.x + clampedRadius, rect.y + rect.h - clampedRadius}, color, {0, 1} }; //3 center BL - - indices[indexCount++] = 0; - indices[indexCount++] = 1; - indices[indexCount++] = 3; - indices[indexCount++] = 1; - indices[indexCount++] = 2; - indices[indexCount++] = 3; - - //define rounded corners as triangle fans - const float step = (SDL_PI_F/2) / numCircleSegments; - for (int i = 0; i < numCircleSegments; i++) { - const float angle1 = (float)i * step; - const float angle2 = ((float)i + 1.0f) * step; - - for (int j = 0; j < 4; j++) { // Iterate over four corners - float cx, cy, signX, signY; - - switch (j) { - case 0: cx = rect.x + clampedRadius; cy = rect.y + clampedRadius; signX = -1; signY = -1; break; // Top-left - case 1: cx = rect.x + rect.w - clampedRadius; cy = rect.y + clampedRadius; signX = 1; signY = -1; break; // Top-right - case 2: cx = rect.x + rect.w - clampedRadius; cy = rect.y + rect.h - clampedRadius; signX = 1; signY = 1; break; // Bottom-right - case 3: cx = rect.x + clampedRadius; cy = rect.y + rect.h - clampedRadius; signX = -1; signY = 1; break; // Bottom-left - default: return; - } - - vertices[vertexCount++] = (SDL_Vertex){ {cx + SDL_cosf(angle1) * clampedRadius * signX, cy + SDL_sinf(angle1) * clampedRadius * signY}, color, {0, 0} }; - vertices[vertexCount++] = (SDL_Vertex){ {cx + SDL_cosf(angle2) * clampedRadius * signX, cy + SDL_sinf(angle2) * clampedRadius * signY}, color, {0, 0} }; - - indices[indexCount++] = j; // Connect to corresponding central rectangle vertex - indices[indexCount++] = vertexCount - 2; - indices[indexCount++] = vertexCount - 1; - } - } - - //Define edge rectangles - // Top edge - vertices[vertexCount++] = (SDL_Vertex){ {rect.x + clampedRadius, rect.y}, color, {0, 0} }; //TL - vertices[vertexCount++] = (SDL_Vertex){ {rect.x + rect.w - clampedRadius, rect.y}, color, {1, 0} }; //TR - - indices[indexCount++] = 0; - indices[indexCount++] = vertexCount - 2; //TL - indices[indexCount++] = vertexCount - 1; //TR - indices[indexCount++] = 1; - indices[indexCount++] = 0; - indices[indexCount++] = vertexCount - 1; //TR - // Right edge - vertices[vertexCount++] = (SDL_Vertex){ {rect.x + rect.w, rect.y + clampedRadius}, color, {1, 0} }; //RT - vertices[vertexCount++] = (SDL_Vertex){ {rect.x + rect.w, rect.y + rect.h - clampedRadius}, color, {1, 1} }; //RB - - indices[indexCount++] = 1; - indices[indexCount++] = vertexCount - 2; //RT - indices[indexCount++] = vertexCount - 1; //RB - indices[indexCount++] = 2; - indices[indexCount++] = 1; - indices[indexCount++] = vertexCount - 1; //RB - // Bottom edge - vertices[vertexCount++] = (SDL_Vertex){ {rect.x + rect.w - clampedRadius, rect.y + rect.h}, color, {1, 1} }; //BR - vertices[vertexCount++] = (SDL_Vertex){ {rect.x + clampedRadius, rect.y + rect.h}, color, {0, 1} }; //BL - - indices[indexCount++] = 2; - indices[indexCount++] = vertexCount - 2; //BR - indices[indexCount++] = vertexCount - 1; //BL - indices[indexCount++] = 3; - indices[indexCount++] = 2; - indices[indexCount++] = vertexCount - 1; //BL - // Left edge - vertices[vertexCount++] = (SDL_Vertex){ {rect.x, rect.y + rect.h - clampedRadius}, color, {0, 1} }; //LB - vertices[vertexCount++] = (SDL_Vertex){ {rect.x, rect.y + clampedRadius}, color, {0, 0} }; //LT - - indices[indexCount++] = 3; - indices[indexCount++] = vertexCount - 2; //LB - indices[indexCount++] = vertexCount - 1; //LT - indices[indexCount++] = 0; - indices[indexCount++] = 3; - indices[indexCount++] = vertexCount - 1; //LT - - // Render everything - SDL_RenderGeometry(rendererData->renderer, NULL, vertices, vertexCount, indices, indexCount); -} - -static void SDL_Clay_RenderArc(Clay_SDL3RendererData *rendererData, const SDL_FPoint center, const float radius, const float startAngle, const float endAngle, const float thickness, const Clay_Color color) { - SDL_SetRenderDrawColor(rendererData->renderer, color.r, color.g, color.b, color.a); - - const float radStart = startAngle * (SDL_PI_F / 180.0f); - const float radEnd = endAngle * (SDL_PI_F / 180.0f); - - const int numCircleSegments = SDL_max(NUM_CIRCLE_SEGMENTS, (int)(radius * 1.5f)); //increase circle segments for larger circles, 1.5 is arbitrary. - - const float angleStep = (radEnd - radStart) / (float)numCircleSegments; - const float thicknessStep = 0.4f; //arbitrary value to avoid overlapping lines. Changing THICKNESS_STEP or numCircleSegments might cause artifacts. - - for (float t = thicknessStep; t < thickness - thicknessStep; t += thicknessStep) { - SDL_FPoint points[numCircleSegments + 1]; - const float clampedRadius = SDL_max(radius - t, 1.0f); - - for (int i = 0; i <= numCircleSegments; i++) { - const float angle = radStart + i * angleStep; - points[i] = (SDL_FPoint){ - SDL_roundf(center.x + SDL_cosf(angle) * clampedRadius), - SDL_roundf(center.y + SDL_sinf(angle) * clampedRadius) }; - } - SDL_RenderLines(rendererData->renderer, points, numCircleSegments + 1); - } -} - -SDL_Rect currentClippingRectangle; - -static void SDL_Clay_RenderClayCommands(Clay_SDL3RendererData *rendererData, Clay_RenderCommandArray *rcommands) -{ +void SDL_Clay_RenderClayCommands(Clay_SDL3RendererData *rendererData, Clay_RenderCommandArray *rcommands) { for (size_t i = 0; i < rcommands->length; i++) { Clay_RenderCommand *rcmd = Clay_RenderCommandArray_Get(rcommands, i); const Clay_BoundingBox bounding_box = rcmd->boundingBox; - const SDL_FRect rect = { (int)bounding_box.x, (int)bounding_box.y, (int)bounding_box.width, (int)bounding_box.height }; switch (rcmd->commandType) { case CLAY_RENDER_COMMAND_TYPE_RECTANGLE: { Clay_RectangleRenderData *config = &rcmd->renderData.rectangle; - SDL_SetRenderDrawBlendMode(rendererData->renderer, SDL_BLENDMODE_BLEND); - SDL_SetRenderDrawColor(rendererData->renderer, config->backgroundColor.r, config->backgroundColor.g, config->backgroundColor.b, config->backgroundColor.a); - if (config->cornerRadius.topLeft > 0) { - SDL_Clay_RenderFillRoundedRect(rendererData, rect, config->cornerRadius.topLeft, config->backgroundColor); - } else { - SDL_RenderFillRect(rendererData->renderer, &rect); - } + roundedBoxRGBA(rendererData->renderer, bounding_box.x, bounding_box.y, + bounding_box.x + bounding_box.width, bounding_box.y + bounding_box.height, config->cornerRadius.topLeft, + config->backgroundColor.r, config->backgroundColor.g, config->backgroundColor.b, config->backgroundColor.a); } break; case CLAY_RENDER_COMMAND_TYPE_TEXT: { Clay_TextRenderData *config = &rcmd->renderData.text; TTF_Font *font = rendererData->fonts[config->fontId]; TTF_Text *text = TTF_CreateText(rendererData->textEngine, font, config->stringContents.chars, config->stringContents.length); TTF_SetTextColor(text, config->textColor.r, config->textColor.g, config->textColor.b, config->textColor.a); - TTF_DrawRendererText(text, rect.x, rect.y); + TTF_DrawRendererText(text, bounding_box.x, bounding_box.y); TTF_DestroyText(text); } break; case CLAY_RENDER_COMMAND_TYPE_BORDER: { Clay_BorderRenderData *config = &rcmd->renderData.border; - - const float minRadius = SDL_min(rect.w, rect.h) / 2.0f; - const Clay_CornerRadius clampedRadii = { - .topLeft = SDL_min(config->cornerRadius.topLeft, minRadius), - .topRight = SDL_min(config->cornerRadius.topRight, minRadius), - .bottomLeft = SDL_min(config->cornerRadius.bottomLeft, minRadius), - .bottomRight = SDL_min(config->cornerRadius.bottomRight, minRadius) - }; - //edges - SDL_SetRenderDrawColor(rendererData->renderer, config->color.r, config->color.g, config->color.b, config->color.a); - if (config->width.left > 0) { - const float starting_y = rect.y + clampedRadii.topLeft; - const float length = rect.h - clampedRadii.topLeft - clampedRadii.bottomLeft; - SDL_FRect line = { rect.x, starting_y, config->width.left, length }; - SDL_RenderFillRect(rendererData->renderer, &line); - } - if (config->width.right > 0) { - const float starting_x = rect.x + rect.w - (float)config->width.right; - const float starting_y = rect.y + clampedRadii.topRight; - const float length = rect.h - clampedRadii.topRight - clampedRadii.bottomRight; - SDL_FRect line = { starting_x, starting_y, config->width.right, length }; - SDL_RenderFillRect(rendererData->renderer, &line); - } - if (config->width.top > 0) { - const float starting_x = rect.x + clampedRadii.topLeft; - const float length = rect.w - clampedRadii.topLeft - clampedRadii.topRight; - SDL_FRect line = { starting_x, rect.y, length, config->width.top }; - SDL_RenderFillRect(rendererData->renderer, &line); - } - if (config->width.bottom > 0) { - const float starting_x = rect.x + clampedRadii.bottomLeft; - const float starting_y = rect.y + rect.h - (float)config->width.bottom; - const float length = rect.w - clampedRadii.bottomLeft - clampedRadii.bottomRight; - SDL_FRect line = { starting_x, starting_y, length, config->width.bottom }; - SDL_SetRenderDrawColor(rendererData->renderer, config->color.r, config->color.g, config->color.b, config->color.a); - SDL_RenderFillRect(rendererData->renderer, &line); - } - //corners - if (config->cornerRadius.topLeft > 0) { - const float centerX = rect.x + clampedRadii.topLeft -1; - const float centerY = rect.y + clampedRadii.topLeft; - SDL_Clay_RenderArc(rendererData, (SDL_FPoint){centerX, centerY}, clampedRadii.topLeft, - 180.0f, 270.0f, config->width.top, config->color); - } - if (config->cornerRadius.topRight > 0) { - const float centerX = rect.x + rect.w - clampedRadii.topRight -1; - const float centerY = rect.y + clampedRadii.topRight; - SDL_Clay_RenderArc(rendererData, (SDL_FPoint){centerX, centerY}, clampedRadii.topRight, - 270.0f, 360.0f, config->width.top, config->color); - } - if (config->cornerRadius.bottomLeft > 0) { - const float centerX = rect.x + clampedRadii.bottomLeft -1; - const float centerY = rect.y + rect.h - clampedRadii.bottomLeft -1; - SDL_Clay_RenderArc(rendererData, (SDL_FPoint){centerX, centerY}, clampedRadii.bottomLeft, - 90.0f, 180.0f, config->width.bottom, config->color); - } - if (config->cornerRadius.bottomRight > 0) { - const float centerX = rect.x + rect.w - clampedRadii.bottomRight -1; //TODO: why need to -1 in all calculations??? - const float centerY = rect.y + rect.h - clampedRadii.bottomRight -1; - SDL_Clay_RenderArc(rendererData, (SDL_FPoint){centerX, centerY}, clampedRadii.bottomRight, - 0.0f, 90.0f, config->width.bottom, config->color); - } - + roundedRectangleRGBA(rendererData->renderer, bounding_box.x, bounding_box.y, + bounding_box.x + bounding_box.width, bounding_box.y + bounding_box.height, config->cornerRadius.topLeft, + config->color.r, config->color.g, config->color.b, config->color.a); } break; case CLAY_RENDER_COMMAND_TYPE_SCISSOR_START: { - Clay_BoundingBox boundingBox = rcmd->boundingBox; - currentClippingRectangle = (SDL_Rect) { - .x = boundingBox.x, - .y = boundingBox.y, - .w = boundingBox.width, - .h = boundingBox.height, + const SDL_Rect currentClippingRectangle = (SDL_Rect) { + .x = bounding_box.x, + .y = bounding_box.y, + .w = bounding_box.width, + .h = bounding_box.height, }; SDL_SetRenderClipRect(rendererData->renderer, ¤tClippingRectangle); break; @@ -253,8 +43,12 @@ static void SDL_Clay_RenderClayCommands(Clay_SDL3RendererData *rendererData, Cla case CLAY_RENDER_COMMAND_TYPE_IMAGE: { SDL_Surface *image = (SDL_Surface *)rcmd->renderData.image.imageData; SDL_Texture *texture = SDL_CreateTextureFromSurface(rendererData->renderer, image); - const SDL_FRect dest = { rect.x, rect.y, rect.w, rect.h }; - + const SDL_FRect dest = (SDL_FRect) { + .x = bounding_box.x, + .y = bounding_box.y, + .w = bounding_box.width, + .h = bounding_box.height, + }; SDL_RenderTexture(rendererData->renderer, texture, NULL, &dest); SDL_DestroyTexture(texture); break; diff --git a/vendor/clay/clay_renderer_SDL3.h b/vendor/clay/clay_renderer_SDL3.h new file mode 100644 index 0000000..8bb5366 --- /dev/null +++ b/vendor/clay/clay_renderer_SDL3.h @@ -0,0 +1,20 @@ +#ifndef _clay_renderer_SDL3_h +#define _clay_renderer_SDL3_h + +#include +#include +#include +#include +#include + +#include "clay.h" + +typedef struct { + SDL_Renderer *renderer; + TTF_TextEngine *textEngine; + TTF_Font **fonts; +} Clay_SDL3RendererData; + +void SDL_Clay_RenderClayCommands(Clay_SDL3RendererData *rendererData, Clay_RenderCommandArray *rcommands); + +#endif