Home

Resume

Blog

Teikitu


// =-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-==-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-= //
//  »Project«   Teikitu Gaming System (TgS) (∂)
//  »File«      TgS (WIN) Common - Math API [Matrix] [M] [F] [34].c
//  »Author«    Andrew Aye (EMail: mailto:andrew.aye@gmail.com, Web: http://www.andrewaye.com)
//  »Version«   4.0
// ------------------------------------------------------------------------------------------------------------------------------ //
//  Copyright: © 2002-2010, Andrew Aye.  All Rights Reserved.
//  This software is free for non-commercial use. Redistribution and use in source and binary forms, with or without modification,
//  are permitted provided that the following conditions are met: 
//    Redistributions of source code must retain this copyright notice, this list of conditions and the following disclaimers. 
//    Redistributions in binary form must reproduce this copyright notice, this list of conditions and the following
//      disclaimers in the documentation and other materials provided with the distribution. 
//  Neither the names of the copyright owner nor the names of its contributors may be used to endorse or promote products derived
//  from this software without specific prior written permission. 
//  The intellectual property rights of the algorithms used reside with Andrew Aye.  You may not use this software, in whole or
//  in part, in support of any commercial product without the express written consent of the author.
//  There is no warranty or other guarantee of fitness of this software for any purpose. It is provided solely "as is".
// =-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-==-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-= //


// -.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-. //
//  Public Functions
// -.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-. //

// ---- MATRIX ARITHMETIC OPERATIONS -------------------------------------------------------------------------------------------- //

TgVOID M_CAT_F32_34( PCU_TgMAT_F32_34 ptmRet, CPCU_TgMAT_F32_34 ptmM0, CPCU_TgMAT_F32_34 ptmM1 )
{
    register const __m128               mi00 = _mm_shuffle_ps( ptmM0->m_avRow[0].m_mData, ptmM0->m_avRow[0].m_mData, 0x00 );
    register const __m128               mi01 = _mm_shuffle_ps( ptmM0->m_avRow[0].m_mData, ptmM0->m_avRow[0].m_mData, 0x55 );
    register const __m128               mi02 = _mm_shuffle_ps( ptmM0->m_avRow[0].m_mData, ptmM0->m_avRow[0].m_mData, 0xAA );
    register const __m128               mi03 = _mm_shuffle_ps( ptmM0->m_avRow[0].m_mData, ptmM0->m_avRow[0].m_mData, 0xFF );
    register const __m128               mi04 = _mm_shuffle_ps( ptmM0->m_avRow[1].m_mData, ptmM0->m_avRow[1].m_mData, 0x00 );
    register const __m128               mi05 = _mm_shuffle_ps( ptmM0->m_avRow[1].m_mData, ptmM0->m_avRow[1].m_mData, 0x55 );
    register const __m128               mi06 = _mm_shuffle_ps( ptmM0->m_avRow[1].m_mData, ptmM0->m_avRow[1].m_mData, 0xAA );
    register const __m128               mi07 = _mm_shuffle_ps( ptmM0->m_avRow[1].m_mData, ptmM0->m_avRow[1].m_mData, 0xFF );
    register const __m128               mi08 = _mm_shuffle_ps( ptmM0->m_avRow[2].m_mData, ptmM0->m_avRow[2].m_mData, 0x00 );
    register const __m128               mi09 = _mm_shuffle_ps( ptmM0->m_avRow[2].m_mData, ptmM0->m_avRow[2].m_mData, 0x55 );
    register const __m128               mi10 = _mm_shuffle_ps( ptmM0->m_avRow[2].m_mData, ptmM0->m_avRow[2].m_mData, 0xAA );
    register const __m128               mi11 = _mm_shuffle_ps( ptmM0->m_avRow[2].m_mData, ptmM0->m_avRow[2].m_mData, 0xFF );

    register const __m128               mi16 = _mm_mul_ps( mi00, ptmM1->m_avRow[0].m_mData );
    register const __m128               mi17 = _mm_mul_ps( mi04, ptmM1->m_avRow[0].m_mData );
    register const __m128               mi18 = _mm_mul_ps( mi08, ptmM1->m_avRow[0].m_mData );
    register const __m128               mi20 = _mm_mul_ps( mi01, ptmM1->m_avRow[1].m_mData );
    register const __m128               mi21 = _mm_mul_ps( mi05, ptmM1->m_avRow[1].m_mData );
    register const __m128               mi22 = _mm_mul_ps( mi09, ptmM1->m_avRow[1].m_mData );
    register const __m128               mi24 = _mm_mul_ps( mi02, ptmM1->m_avRow[2].m_mData );
    register const __m128               mi25 = _mm_mul_ps( mi06, ptmM1->m_avRow[2].m_mData );
    register const __m128               mi26 = _mm_mul_ps( mi10, ptmM1->m_avRow[2].m_mData );
    register const __m128               mi28 = _mm_and_ps( mi03, KTgV_000F.m_f32_v04.m_mData );
    register const __m128               mi29 = _mm_and_ps( mi07, KTgV_000F.m_f32_v04.m_mData );
    register const __m128               mi30 = _mm_and_ps( mi11, KTgV_000F.m_f32_v04.m_mData );

    ptmRet->m_avRow[0].m_mData = _mm_add_ps( _mm_add_ps( mi16, mi20 ), _mm_add_ps( mi24, mi28 ) );
    ptmRet->m_avRow[1].m_mData = _mm_add_ps( _mm_add_ps( mi17, mi21 ), _mm_add_ps( mi25, mi29 ) );
    ptmRet->m_avRow[2].m_mData = _mm_add_ps( _mm_add_ps( mi18, mi22 ), _mm_add_ps( mi26, mi30 ) );
}




// ---- INVERSE FUNCTIONS ------------------------------------------------------------------------------------------------------- //

TgVOID M_INV_DET_F32_34( PCU_TgMAT_F32_34 ptmRet, C_TgVEC_M_F32_04 tvDet, CPCU_TgMAT_F32_34 ptmM1 )
{
    //  Construct the four corner matrices of the 4x4.  Note: this is probably an inefficient method to take the inverse of a
    // standard linear transform matrix since it cant take into account the known information of the row or column of zeros
    // that normally exists in the matrix.  However, since inverse operations should not be terribly time sensitive this may
    // not be a problem.

    const register __m128               miR0 = ptmM1->m_avRow[0].m_mData;
    const register __m128               miR1 = ptmM1->m_avRow[1].m_mData;
    const register __m128               miR2 = ptmM1->m_avRow[2].m_mData;
    const register __m128               mi00 = _mm_and_ps( KTgV_FFF0.m_f32_v04.m_mData, tvDet );
    const register __m128               miDet = _mm_add_ps( KTgV_UNIT_W_F32_04.m_mData, mi00 );

    const register __m128               mi01 = _mm_shuffle_ps( miR0, miR1, _MM_PERM( 1, 2, 1, 2 ) );
    const register __m128               mi02 = _mm_shuffle_ps( miR2, miR2, _MM_PERM( 1, 2, 1, 2 ) );
    const register __m128               mi03 = _mm_shuffle_ps( mi01, mi01, _MM_PERM( 2, 1, 0, 3 ) );
    const register __m128               mi04 = _mm_shuffle_ps( mi02, mi01, _MM_PERM( 1, 0, 3, 3 ) );
    const register __m128               mi05 = _mm_shuffle_ps( mi01, mi01, _MM_PERM( 3, 0, 1, 3 ) );
    const register __m128               mi06 = _mm_shuffle_ps( mi02, mi01, _MM_PERM( 0, 1, 2, 3 ) );
    const register __m128               mi07 = _mm_mul_ps( mi03, mi04 );
    const register __m128               mi08 = _mm_mul_ps( mi05, mi06 );
    const register __m128               mi09 = _mm_sub_ps( mi07, mi08 );
    const register __m128               mi10 = _mm_and_ps( KTgV_FFF0.m_f32_v04.m_mData, _mm_div_ps( mi09, miDet ) );

    const register __m128               mi11 = _mm_shuffle_ps( miR0, miR1, _MM_PERM( 0, 2, 0, 2 ) );
    const register __m128               mi12 = _mm_shuffle_ps( miR2, miR2, _MM_PERM( 0, 2, 0, 2 ) );
    const register __m128               mi13 = _mm_shuffle_ps( mi11, mi11, _MM_PERM( 3, 0, 1, 3 ) );
    const register __m128               mi14 = _mm_shuffle_ps( mi12, mi11, _MM_PERM( 0, 1, 2, 3 ) );
    const register __m128               mi15 = _mm_shuffle_ps( mi11, mi11, _MM_PERM( 2, 1, 0, 3 ) );
    const register __m128               mi16 = _mm_shuffle_ps( mi12, mi11, _MM_PERM( 1, 0, 3, 3 ) );
    const register __m128               mi17 = _mm_mul_ps( mi13, mi14 );
    const register __m128               mi18 = _mm_mul_ps( mi15, mi16 );
    const register __m128               mi19 = _mm_sub_ps( mi17, mi18 );
    const register __m128               mi20 = _mm_and_ps( KTgV_FFF0.m_f32_v04.m_mData, _mm_div_ps( mi19, miDet ) );

    const register __m128               mi21 = _mm_shuffle_ps( miR0, miR1, _MM_PERM( 0, 1, 0, 1 ) );
    const register __m128               mi22 = _mm_shuffle_ps( miR2, miR2, _MM_PERM( 0, 1, 0, 1 ) );
    const register __m128               mi23 = _mm_shuffle_ps( mi21, mi21, _MM_PERM( 2, 1, 0, 3 ) );
    const register __m128               mi24 = _mm_shuffle_ps( mi22, mi21, _MM_PERM( 1, 0, 3, 3 ) );
    const register __m128               mi25 = _mm_shuffle_ps( mi21, mi21, _MM_PERM( 3, 0, 1, 3 ) );
    const register __m128               mi26 = _mm_shuffle_ps( mi22, mi21, _MM_PERM( 0, 1, 2, 3 ) );
    const register __m128               mi27 = _mm_mul_ps( mi23, mi24 );
    const register __m128               mi28 = _mm_mul_ps( mi25, mi26 );
    const register __m128               mi29 = _mm_sub_ps( mi27, mi28 );
    const register __m128               mi30 = _mm_and_ps( KTgV_FFF0.m_f32_v04.m_mData, _mm_div_ps( mi29, miDet ) );

    const register __m128               mi31 = _mm_shuffle_ps( mi10, mi20, _MM_PERM( 0, 1, 0, 1 ) ); // 00, 01, 10, 11
    const register __m128               mi32 = _mm_shuffle_ps( mi10, mi20, _MM_PERM( 2, 3, 2, 3 ) ); // 02, 03, 12, 13
    const register __m128               mi34 = _mm_shuffle_ps( mi31, mi30, _MM_PERM( 0, 2, 0, 0 ) ); // 00, 10, 20, 20
    const register __m128               mi35 = _mm_shuffle_ps( mi31, mi30, _MM_PERM( 1, 3, 1, 1 ) ); // 01, 11, 21, 21
    const register __m128               mi36 = _mm_shuffle_ps( mi32, mi30, _MM_PERM( 0, 2, 2, 2 ) ); // 02, 12, 22, 22

    const register __m128               mi37 = _mm_shuffle_ps( miR0, miR0, _MM_PERM( 3, 3, 3, 3 ) );
    const register __m128               mi38 = _mm_shuffle_ps( miR1, miR1, _MM_PERM( 3, 3, 3, 3 ) );
    const register __m128               mi39 = _mm_shuffle_ps( miR2, miR2, _MM_PERM( 3, 3, 3, 3 ) );

    const register __m128               mi40 = _mm_mul_ps( mi37, mi34 );
    const register __m128               mi41 = _mm_mul_ps( mi38, mi35 );
    const register __m128               mi42 = _mm_mul_ps( mi39, mi36 );

    const register __m128               mi43 = _mm_sub_ps( KTgV_ZERO_F32_04.m_mData, mi40 );
    const register __m128               mi44 = _mm_add_ps( mi41, mi42 );
    const register __m128               mi45 = _mm_and_ps( KTgV_FFF0.m_f32_v04.m_mData, _mm_sub_ps( mi43, mi44 ) );

    const register __m128               mi46 = _mm_shuffle_ps( mi45, mi45, _MM_PERM( 3, 3, 3, 0 ) );
    const register __m128               mi47 = _mm_shuffle_ps( mi45, mi45, _MM_PERM( 3, 3, 3, 1 ) );
    const register __m128               mi48 = _mm_shuffle_ps( mi45, mi45, _MM_PERM( 3, 3, 3, 2 ) );

    ptmRet->m_avRow[0].m_mData = _mm_add_ps( mi46, mi10 );
    ptmRet->m_avRow[1].m_mData = _mm_add_ps( mi47, mi20 );
    ptmRet->m_avRow[2].m_mData = _mm_add_ps( mi48, mi30 );
}


TgVEC_M_F32_04 M_DET_F32_34( CPCU_TgMAT_F32_34 ptmM1 )
{
    const register __m128               miR0 = ptmM1->m_avRow[0].m_mData;
    const register __m128               miR1 = ptmM1->m_avRow[1].m_mData;
    const register __m128               miR2 = ptmM1->m_avRow[2].m_mData;

    const register __m128               mi01 = _mm_shuffle_ps( miR1, miR1, _MM_PERM( 1, 2, 0, 3 ) );
    const register __m128               mi02 = _mm_shuffle_ps( miR2, miR2, _MM_PERM( 2, 0, 1, 3 ) );
    const register __m128               mi03 = _mm_shuffle_ps( miR1, miR1, _MM_PERM( 2, 0, 1, 3 ) );
    const register __m128               mi04 = _mm_shuffle_ps( miR2, miR2, _MM_PERM( 1, 2, 0, 3 ) );

    const register __m128               mi05 = _mm_mul_ps( mi01, mi02 );
    const register __m128               mi06 = _mm_mul_ps( mi03, mi04 );
    const register __m128               mi07 = _mm_sub_ps( mi05, mi06 );
    const register __m128               mi08 = _mm_mul_ps( miR0, mi07 );

    const register __m128               mi09 = _mm_shuffle_ps( mi08, mi08, _MM_PERM( 1, 1, 1, 1 ) );
    const register __m128               mi10 = _mm_shuffle_ps( mi08, mi08, _MM_PERM( 2, 2, 2, 2 ) );
    const register __m128               mi11 = _mm_add_ss( mi08, mi09 );
    const register __m128               mi12 = _mm_add_ss( mi10, mi11 );

    return ((_mm_shuffle_ps( mi12, mi12, 0x00 )));
}