Home

Resume

Blog

Teikitu


// =-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-==-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-= //
//  »Project«   Teikitu Gaming System (TgS) (∂)
//  »File«      TgS (WIN) Common - Math API [Matrix] [M] [S].inl
//  »Author«    Andrew Aye (EMail: mailto:andrew.aye@gmail.com, Web: http://www.andrewaye.com)
//  »Version«   4.0
// ------------------------------------------------------------------------------------------------------------------------------ //
//  Copyright: © 2002-2010, Andrew Aye.  All Rights Reserved.
//  This software is free for non-commercial use. Redistribution and use in source and binary forms, with or without modification,
//  are permitted provided that the following conditions are met: 
//    Redistributions of source code must retain this copyright notice, this list of conditions and the following disclaimers. 
//    Redistributions in binary form must reproduce this copyright notice, this list of conditions and the following
//      disclaimers in the documentation and other materials provided with the distribution. 
//  Neither the names of the copyright owner nor the names of its contributors may be used to endorse or promote products derived
//  from this software without specific prior written permission. 
//  The intellectual property rights of the algorithms used reside with Andrew Aye.  You may not use this software, in whole or
//  in part, in support of any commercial product without the express written consent of the author.
//  There is no warranty or other guarantee of fitness of this software for any purpose. It is provided solely "as is".
// =-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-==-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-= //
#if !defined(_TGS_WIN_COMMON___MATH_API_MATRIX_M_S_INL_)
#define _TGS_WIN_COMMON___MATH_API_MATRIX_M_S_INL_
#pragma once


// ---- HOMEGENEOUS 3X4 SPECIFIC FUNCTIONS -------------------------------------------------------------------------------------- //

TgINLINE TgVOID M_INIT_T_04_F32_34( PCU_TgMAT_F32_34 ptmM0, C_TgVEC_M_F32_04 vS0 )
{
    const register __m128               mi00 = _mm_and_ps( KTgV_FFF0.m_f32_v04.m_mData, vS0 );
    const register __m128               mi01 = _mm_shuffle_ps( KTgV_ZERO_F32_04.m_mData, mi00, _MM_PERM( 0, 0, 3, 0 ) );
    const register __m128               mi02 = _mm_shuffle_ps( KTgV_ZERO_F32_04.m_mData, mi00, _MM_PERM( 0, 0, 3, 1 ) );
    const register __m128               mi03 = _mm_shuffle_ps( KTgV_ZERO_F32_04.m_mData, mi00, _MM_PERM( 0, 0, 3, 2 ) );

    ptmM0->m_avRow[0].m_mData = _mm_add_ps( KTgV_UNIT_X_F32_04.m_mData, mi01 );
    ptmM0->m_avRow[1].m_mData = _mm_add_ps( KTgV_UNIT_Y_F32_04.m_mData, mi02 );
    ptmM0->m_avRow[2].m_mData = _mm_add_ps( KTgV_UNIT_Z_F32_04.m_mData, mi03 );
}


TgINLINE TgVOID M_SET_T_04_F32_34( PCU_TgMAT_F32_34 ptmM0, C_TgVEC_M_F32_04 vS0 )
{
    const register __m128               mi00 = _mm_and_ps( KTgV_FFF0.m_f32_v04.m_mData, ptmM0->m_avRow[0].m_mData );
    const register __m128               mi01 = _mm_and_ps( KTgV_FFF0.m_f32_v04.m_mData, ptmM0->m_avRow[1].m_mData );
    const register __m128               mi02 = _mm_and_ps( KTgV_FFF0.m_f32_v04.m_mData, ptmM0->m_avRow[2].m_mData );
    const register __m128               mi03 = _mm_and_ps( KTgV_FFF0.m_f32_v04.m_mData, vS0 );

    const register __m128               mi04 = _mm_shuffle_ps( KTgV_ZERO_F32_04.m_mData, mi03, _MM_PERM( 0, 0, 3, 0 ) );
    const register __m128               mi05 = _mm_shuffle_ps( KTgV_ZERO_F32_04.m_mData, mi03, _MM_PERM( 0, 0, 3, 1 ) );
    const register __m128               mi06 = _mm_shuffle_ps( KTgV_ZERO_F32_04.m_mData, mi03, _MM_PERM( 0, 0, 3, 2 ) );

    ptmM0->m_avRow[0].m_mData = _mm_add_ps( mi00, mi04 );
    ptmM0->m_avRow[1].m_mData = _mm_add_ps( mi01, mi05 );
    ptmM0->m_avRow[2].m_mData = _mm_add_ps( mi02, mi06 );
}




// ---- TRANSFORMATION ---------------------------------------------------------------------------------------------------------- //

TgINLINE TgVEC_M_F32_04 M_TX_P_34_F32_04( CPCU_TgMAT_F32_34 ptmM0, C_TgVEC_M_F32_04 tvX0 )
{
    // Probably a much better/faster way to do this, but I got bored.

    const register __m128               miXMM0 = _mm_unpacklo_ps( ptmM0->m_avRow[0].m_mData, ptmM0->m_avRow[1].m_mData );
    const register __m128               miXMM1 = _mm_unpacklo_ps( ptmM0->m_avRow[2].m_mData, KTgV_UNIT_W_F32_04.m_mData );
    const register __m128               miXMM2 = _mm_unpackhi_ps( ptmM0->m_avRow[0].m_mData, ptmM0->m_avRow[1].m_mData );
    const register __m128               miXMM3 = _mm_unpackhi_ps( ptmM0->m_avRow[2].m_mData, KTgV_UNIT_W_F32_04.m_mData );
    const register __m128               miXMM4 = _mm_movelh_ps( miXMM0, miXMM1 );
    const register __m128               miXMM5 = _mm_movehl_ps( miXMM1, miXMM0 );
    const register __m128               miXMM6 = _mm_movelh_ps( miXMM2, miXMM3 );
    const register __m128               miXMM7 = _mm_movehl_ps( miXMM3, miXMM2 );
    const register __m128               miXMM8 = _mm_mul_ps( _mm_shuffle_ps( tvX0, tvX0, 0x00 ), miXMM4 );
    const register __m128               miXMM9 = _mm_mul_ps( _mm_shuffle_ps( tvX0, tvX0, 0x55 ), miXMM5 );
    const register __m128               miXMMA = _mm_mul_ps( _mm_shuffle_ps( tvX0, tvX0, 0xAA ), miXMM6 );

    return (( _mm_add_ps( _mm_add_ps( miXMM8, miXMM9 ), _mm_add_ps( miXMMA, miXMM7 ) ) ));
}


TgINLINE TgVEC_M_F32_04 M_TX_V_34_F32_04( CPCU_TgMAT_F32_34 ptmM0, C_TgVEC_M_F32_04 tvX0 )
{
    // Probably a much better/faster way to do this, but I got bored.

    const register __m128               miXMM0 = _mm_unpacklo_ps( ptmM0->m_avRow[0].m_mData, ptmM0->m_avRow[1].m_mData );
    const register __m128               miXMM1 = _mm_unpacklo_ps( ptmM0->m_avRow[2].m_mData, KTgV_UNIT_W_F32_04.m_mData );
    const register __m128               miXMM2 = _mm_unpackhi_ps( ptmM0->m_avRow[0].m_mData, ptmM0->m_avRow[1].m_mData );
    const register __m128               miXMM3 = _mm_unpackhi_ps( ptmM0->m_avRow[2].m_mData, KTgV_UNIT_W_F32_04.m_mData );
    const register __m128               miXMM4 = _mm_movelh_ps( miXMM0, miXMM1 );
    const register __m128               miXMM5 = _mm_movehl_ps( miXMM1, miXMM0 );
    const register __m128               miXMM6 = _mm_movelh_ps( miXMM2, miXMM3 );
    const register __m128               miXMM8 = _mm_mul_ps( _mm_shuffle_ps( tvX0, tvX0, 0x00 ), miXMM4 );
    const register __m128               miXMM9 = _mm_mul_ps( _mm_shuffle_ps( tvX0, tvX0, 0x55 ), miXMM5 );
    const register __m128               miXMMA = _mm_mul_ps( _mm_shuffle_ps( tvX0, tvX0, 0xAA ), miXMM6 );

    return (( _mm_add_ps( _mm_add_ps( miXMM8, miXMM9 ), _mm_add_ps( miXMMA, KTgV_ZERO_F32_04.m_mData ) ) ));
}




// ---- TRANSPOSE --------------------------------------------------------------------------------------------------------------- //

TgINLINE TgVOID M_TR_F32_34( PCU_TgMAT_F32_44 ptmM0, CPCU_TgMAT_F32_34 ptmM1 )
{
    register __m128                     miXMM0 = _mm_shuffle_ps( ptmM1->m_avRow[0].m_mData, ptmM1->m_avRow[1].m_mData, 0x44 );
    register __m128                     miXMM2 = _mm_shuffle_ps( ptmM1->m_avRow[0].m_mData, ptmM1->m_avRow[1].m_mData, 0xEE );
    register __m128                     miXMM1 = _mm_shuffle_ps( ptmM1->m_avRow[2].m_mData, KTgV_UNIT_W_F32_04.m_mData, 0x44 );
    register __m128                     miXMM3 = _mm_shuffle_ps( ptmM1->m_avRow[2].m_mData, KTgV_UNIT_W_F32_04.m_mData, 0xEE );

    ptmM0->m_avRow[0].m_mData = _mm_shuffle_ps( miXMM0, miXMM1, 0x88 );
    ptmM0->m_avRow[1].m_mData = _mm_shuffle_ps( miXMM0, miXMM1, 0xDD );
    ptmM0->m_avRow[2].m_mData = _mm_shuffle_ps( miXMM2, miXMM3, 0x88 );
}


TgINLINE TgVOID M_TR_F32_44( PCU_TgMAT_F32_44 ptmM0, CPCU_TgMAT_F32_44 ptmM1 )
{
    register __m128                     miXMM0 = _mm_shuffle_ps( ptmM1->m_avRow[0].m_mData, ptmM1->m_avRow[1].m_mData, 0x44 );
    register __m128                     miXMM2 = _mm_shuffle_ps( ptmM1->m_avRow[0].m_mData, ptmM1->m_avRow[1].m_mData, 0xEE );
    register __m128                     miXMM1 = _mm_shuffle_ps( ptmM1->m_avRow[2].m_mData, ptmM1->m_avRow[3].m_mData, 0x44 );
    register __m128                     miXMM3 = _mm_shuffle_ps( ptmM1->m_avRow[2].m_mData, ptmM1->m_avRow[3].m_mData, 0xEE );

    ptmM0->m_avRow[0].m_mData = _mm_shuffle_ps( miXMM0, miXMM1, 0x88 );
    ptmM0->m_avRow[1].m_mData = _mm_shuffle_ps( miXMM0, miXMM1, 0xDD );
    ptmM0->m_avRow[2].m_mData = _mm_shuffle_ps( miXMM2, miXMM3, 0x88 );
    ptmM0->m_avRow[3].m_mData = _mm_shuffle_ps( miXMM2, miXMM3, 0xDD );
}


#endif //  END  ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////