@ -81,57 +81,65 @@ dense_line_to( const FT_Vector* to, dense_worker* worker )
void
dense_render_line ( dense_worker * worker , FT_Pos tox , FT_Pos toy )
{
float from_x = worker - > prev_x ;
float from_y = worker - > prev_y ;
if ( from_y = = toy )
return ;
FT26D6 fx = worker - > prev_x > > 2 ;
FT26D6 fy = worker - > prev_y > > 2 ;
FT26D6 from_x = fx ;
FT26D6 from_y = fy ;
FT26D6 tx = tox > > 2 ;
FT26D6 ty = toy > > 2 ;
from_x / = 256.0 ;
from_y / = 256.0 ;
float to_x = tox / 256.0 ;
float to_y = toy / 256.0 ;
if ( fy = = ty )
return ;
FT26D6 to_x = tx ;
FT26D6 to_y = ty ;
float dir ;
if ( from_y < to_y )
dir = 1 ;
else
int dir = 1 ;
if ( from_y > = to_y )
{
dir = - 1 ;
FT_SWAP ( from_x , to_x ) ;
FT_SWAP ( from_y , to_y ) ;
FT_SWAP ( from_x , to_x ) ;
FT_SWAP ( from_y , to_y ) ;
}
// Clip to the height.
if ( from_y > = worker - > m_h | | to_y < = 0 )
if ( from_y > = worker - > m_h < < 6 | | to_y < = 0 )
return ;
float dxdy = ( to_x - from_x ) / ( float ) ( to_y - from_y ) ;
FT26D6 deltax , deltay ;
deltax = to_x - from_x ;
deltay = to_y - from_y ;
if ( from_y < 0 )
{
from_x - = from_y * dxdy ;
from_x - = from_y * delta x / delta y ;
from_y = 0 ;
}
if ( to_y > worker - > m_h )
if ( to_y > worker - > m_h < < 6 )
{
to_x - = ( to_y - worker - > m_h ) * dxdy ;
to_y = ( float ) worker - > m_h ;
to_x - = ( ( to_y - worker - > m_h < < 6 ) * delta x / delta y ) ;
to_y = worker - > m_h < < 6 ;
}
float x = from_x ;
int y0 = ( int ) from_y ;
int y_limit = ( int ) ceil ( to_y ) ;
float * m_a = worker - > m_a ;
int x = from_x ;
int y0 = from_y > > 6 ;
int y_limit = ( to_y + 0x3f ) > > 6 ;
FT20D12 * m_a = worker - > m_a ;
for ( int y = y0 ; y < y_limit ; y + + )
{
int linestart = y * worker - > m_w ;
float dy = fmin ( y + 1.0f , to_y ) - fmax ( ( float ) y , from_y ) ;
float xnext = x + dxd y * dy ;
float d = dy * dir ;
FT26D6 dy = FT_MIN ( ( y + 1 ) < < 6 , to_y ) - FT_MAX ( y < < 6 , from_y ) ;
FT26D6 xnext = x + dy * deltax / delta y ;
FT26D6 d = dy * dir ;
float x0 , x1 ;
FT26D6 x0 , x1 ;
if ( x < xnext )
{
x0 = x ;
@ -143,40 +151,48 @@ dense_render_line( dense_worker* worker, FT_Pos tox, FT_Pos toy )
x1 = x ;
}
/*
It ' s possible for x0 to be negative on the last scanline because of
floating - point inaccuracy That would cause an out - of - bounds array access at
index - 1.
*/
float x0floor = x0 < = 0.0f ? 0.0f : ( float ) floor ( x0 ) ;
int x0i = ( int ) x0floor ;
float x1ceil = ( float ) ceil ( x1 ) ;
int x1i = ( int ) x1ceil ;
int x0i = x0 > > 6 ;
FT26D6 x0floor = x0i < < 6 ;
int x1i = ( x1 + 0x3f ) > > 6 ;
FT26D6 x1ceil = x1i < < 6 ;
if ( x1i < = x0i + 1 )
{
float xmf = 0.5f * ( x + xnext ) - x0floor ;
m_a [ linestart + x0i ] + = d - d * xmf ;
FT26D6 xmf = ( ( x + xnext ) > > 1 ) - x0floor ;
m_a [ linestart + x0i ] + = d * ( ( 1 < < 6 ) - xmf ) ;
m_a [ linestart + ( x0i + 1 ) ] + = d * xmf ;
}
else
{
float s = 1.0f / ( x1 - x0 ) ;
float x0f = x0 - x0floor ;
float a0 = 0.5f * s * ( 1.0f - x0f ) * ( 1.0f - x0f ) ;
float x1f = x1 - x1ceil + 1.0f ;
float am = 0.5f * s * x1f * x1f ;
FT26D6 oneOverS = x1 - x0 ;
FT26D6 x0f = x0 - x0floor ;
FT26D6 oneMinusX0f = ( 1 < < 6 ) - x0f ;
FT26D6 a0 = ( ( oneMinusX0f * oneMinusX0f ) > > 1 ) / oneOverS ;
FT26D6 x1f = x1 - x1ceil + ( 1 < < 6 ) ;
FT26D6 am = ( ( x1f * x1f ) > > 1 ) / oneOverS ;
m_a [ linestart + x0i ] + = d * a0 ;
if ( x1i = = x0i + 2 )
m_a [ linestart + ( x0i + 1 ) ] + = d * ( 1.0f - a0 - am ) ;
m_a [ linestart + ( x0i + 1 ) ] + = d * ( ( 1 < < 6 ) - a0 - am ) ;
else
{
float a1 = s * ( 1.5f - x0f ) ;
FT26D6 a1 = ( ( ( 1 < < 6 ) + ( 1 < < 5 ) - x0f ) < < 6 ) / oneOverS ;
m_a [ linestart + ( x0i + 1 ) ] + = d * ( a1 - a0 ) ;
for ( int xi = x0i + 2 ; xi < x1i - 1 ; xi + + )
m_a [ linestart + xi ] + = d * s ;
float a2 = a1 + ( x1i - x0i - 3 ) * s ;
m_a [ linestart + ( x1i - 1 ) ] + = d * ( 1.0f - a2 - am ) ;
FT26D6 dTimesS = ( d < < 12 ) / oneOverS ;
for ( FT26D6 xi = x0i + 2 ; xi < x1i - 1 ; xi + + )
m_a [ linestart + xi ] + = dTimesS ;
FT26D6 a2 = a1 + ( ( x1i - x0i - 3 ) < < 12 ) / oneOverS ;
m_a [ linestart + ( x1i - 1 ) ] + = d * ( ( 1 < < 6 ) - a2 - am ) ;
}
m_a [ linestart + x1i ] + = d * am ;
}
@ -364,48 +380,49 @@ dense_render_glyph( dense_worker* worker, const FT_Bitmap* target )
FT_Error error = FT_Outline_Decompose ( & ( worker - > outline ) ,
& dense_decompose_funcs , worker ) ;
// Render into bitmap
const float * source = worker - > m_a ;
const FT20D12 * source = worker - > m_a ;
unsigned char * dest = target - > buffer ;
unsigned char * dest_end = target - > buffer + worker - > m_w * worker - > m_h ;
# if FT_SSE4_1
__m128 offset = _mm_setzero_ps ( ) ;
__m128i mask = _mm_set1_epi32 ( 0x0c080400 ) ;
__m128 sign_mask = _mm_set1_ps ( - 0.f ) ;
for ( int i = 0 ; i < worker - > m_h * worker - > m_w ; i + = 4 ) {
__m128 x = _mm_load_ps ( & source [ i ] ) ;
x = _mm_add_ps ( x , _mm_castsi128_ps ( _mm_slli_si128 ( _mm_castps_si128 ( x ) , 4 ) ) ) ;
x = _mm_add_ps ( x , _mm_shuffle_ps ( _mm_setzero_ps ( ) , x , 0x40 ) ) ;
x = _mm_add_ps ( x , offset ) ;
__m128 y = _mm_andnot_ps ( sign_mask , x ) ; // fabs(x)
y = _mm_min_ps ( y , _mm_set1_ps ( 1.0f ) ) ;
y = _mm_mul_ps ( y , _mm_set1_ps ( 255.0f ) ) ;
__m128i z = _mm_cvtps_epi32 ( y ) ;
z = _mm_shuffle_epi8 ( z , mask ) ;
_mm_store_ss ( ( float * ) & dest [ i ] , ( __m128 ) z ) ;
offset = _mm_shuffle_ps ( x , x , _MM_SHUFFLE ( 3 , 3 , 3 , 3 ) ) ;
}
# else /* FT_SSE4_1 */
//#if FT_SSE4_1
// __m128 offset = _mm_setzero_ps();
// __m128i mask = _mm_set1_epi32(0x0c080400);
// __m128 sign_mask = _mm_set1_ps(-0.f);
// for (int i = 0; i < worker->m_h*worker->m_w; i += 4) {
// __m128 x = _mm_load_ps(&source[i]);
// x = _mm_add_ps(x, _mm_castsi128_ps(_mm_slli_si128(_mm_castps_si128(x), 4)));
// x = _mm_add_ps(x, _mm_shuffle_ps(_mm_setzero_ps(), x, 0x40));
// x = _mm_add_ps(x, offset);
// __m128 y = _mm_andnot_ps(sign_mask, x); // fabs(x)
// y = _mm_min_ps(y, _mm_set1_ps(1.0f));
// y = _mm_mul_ps(y, _mm_set1_ps(255.0f));
// __m128i z = _mm_cvtps_epi32(y);
// z = _mm_shuffle_epi8(z, mask);
// _mm_store_ss((float *)&dest[i], (__m128)z);
// offset = _mm_shuffle_ps(x, x, _MM_SHUFFLE(3, 3, 3, 3));
// }
//#else /* FT_SSE4_1 */
FT20D12 value = 0 ;
float value = 0.0f ;
while ( dest < dest_end )
{
value + = * source + + ;
if ( value > 0.0f )
{
int n = ( int ) ( fabs ( value ) * 255.0f + 0.5f ) ;
if ( n > 255 )
n = 255 ;
if ( value > 0 ) {
int n = value > > 4 ;
if ( n > 255 ) n = 255 ;
* dest = ( unsigned char ) n ;
}
else
} else {
* dest = 0 ;
}
dest + + ;
}
# endif /* FT_SSE4_1 */
//#endif /* FT_SSE4_1 */
free ( worker - > m_a ) ;
return error ;
@ -444,10 +461,10 @@ dense_raster_render( FT_Raster raster, const FT_Raster_Params* params )
int size = worker - > m_w * worker - > m_h + 4 ;
worker - > m_a = malloc ( sizeof ( float ) * size ) ;
worker - > m_a = malloc ( sizeof ( FT20D12 ) * size ) ;
worker - > m_a_size = size ;
memset ( worker - > m_a , 0 , ( sizeof ( float ) * size ) ) ;
memset ( worker - > m_a , 0 , ( sizeof ( FT20D12 ) * size ) ) ;
/* exit if nothing to do */
if ( worker - > m_w < = worker - > m_origin_x | | worker - > m_h < = worker - > m_origin_y )
{