Home

Resume

Blog

Teikitu


// =-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-==-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-= //
//  »Project«   Teikitu Gaming System (TgS) (∂)
//  »File«      TgS (WIN) Common - Base - API - Platform.c
//  »Author«    Andrew Aye (EMail: mailto:andrew.aye@gmail.com, Web: http://www.andrewaye.com)
//  »Version«   4.0
// ------------------------------------------------------------------------------------------------------------------------------ //
//  Copyright: © 2002-2010, Andrew Aye.  All Rights Reserved.
//  This software is free for non-commercial use. Redistribution and use in source and binary forms, with or without modification,
//  are permitted provided that the following conditions are met: 
//    Redistributions of source code must retain this copyright notice, this list of conditions and the following disclaimers. 
//    Redistributions in binary form must reproduce this copyright notice, this list of conditions and the following
//      disclaimers in the documentation and other materials provided with the distribution. 
//  Neither the names of the copyright owner nor the names of its contributors may be used to endorse or promote products derived
//  from this software without specific prior written permission. 
//  The intellectual property rights of the algorithms used reside with Andrew Aye.  You may not use this software, in whole or
//  in part, in support of any commercial product without the express written consent of the author.
//  There is no warranty or other guarantee of fitness of this software for any purpose. It is provided solely "as is".
// =-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-==-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-= //


// -.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-. //
//  Public Functions
// -.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-. //

// ---- MIXED MATRIX CONCATENATION ---------------------------------------------------------------------------------------------- //

TgVOID M_CAT_34_44_F32_44( PCU_TgMAT_F32_44 ptmM0, CPCU_TgMAT_F32_34 ptmM1, CPCU_TgMAT_F32_44 ptmM2 )
{
    register const __m128               mi00 = _mm_shuffle_ps( ptmM1->m_avRow[0].m_mData, ptmM1->m_avRow[0].m_mData, 0x00 );
    register const __m128               mi01 = _mm_shuffle_ps( ptmM1->m_avRow[0].m_mData, ptmM1->m_avRow[0].m_mData, 0x55 );
    register const __m128               mi02 = _mm_shuffle_ps( ptmM1->m_avRow[0].m_mData, ptmM1->m_avRow[0].m_mData, 0xAA );
    register const __m128               mi03 = _mm_shuffle_ps( ptmM1->m_avRow[0].m_mData, ptmM1->m_avRow[0].m_mData, 0xFF );
    register const __m128               mi04 = _mm_shuffle_ps( ptmM1->m_avRow[1].m_mData, ptmM1->m_avRow[1].m_mData, 0x00 );
    register const __m128               mi05 = _mm_shuffle_ps( ptmM1->m_avRow[1].m_mData, ptmM1->m_avRow[1].m_mData, 0x55 );
    register const __m128               mi06 = _mm_shuffle_ps( ptmM1->m_avRow[1].m_mData, ptmM1->m_avRow[1].m_mData, 0xAA );
    register const __m128               mi07 = _mm_shuffle_ps( ptmM1->m_avRow[1].m_mData, ptmM1->m_avRow[1].m_mData, 0xFF );
    register const __m128               mi08 = _mm_shuffle_ps( ptmM1->m_avRow[2].m_mData, ptmM1->m_avRow[2].m_mData, 0x00 );
    register const __m128               mi09 = _mm_shuffle_ps( ptmM1->m_avRow[2].m_mData, ptmM1->m_avRow[2].m_mData, 0x55 );
    register const __m128               mi10 = _mm_shuffle_ps( ptmM1->m_avRow[2].m_mData, ptmM1->m_avRow[2].m_mData, 0xAA );
    register const __m128               mi11 = _mm_shuffle_ps( ptmM1->m_avRow[2].m_mData, ptmM1->m_avRow[2].m_mData, 0xFF );

    register const __m128               mi16 = _mm_mul_ps( mi00, ptmM2->m_avRow[0].m_mData );
    register const __m128               mi17 = _mm_mul_ps( mi04, ptmM2->m_avRow[0].m_mData );
    register const __m128               mi18 = _mm_mul_ps( mi08, ptmM2->m_avRow[0].m_mData );
    register const __m128               mi20 = _mm_mul_ps( mi01, ptmM2->m_avRow[1].m_mData );
    register const __m128               mi21 = _mm_mul_ps( mi05, ptmM2->m_avRow[1].m_mData );
    register const __m128               mi22 = _mm_mul_ps( mi09, ptmM2->m_avRow[1].m_mData );
    register const __m128               mi24 = _mm_mul_ps( mi02, ptmM2->m_avRow[2].m_mData );
    register const __m128               mi25 = _mm_mul_ps( mi06, ptmM2->m_avRow[2].m_mData );
    register const __m128               mi26 = _mm_mul_ps( mi10, ptmM2->m_avRow[2].m_mData );
    register const __m128               mi28 = _mm_mul_ps( mi03, ptmM2->m_avRow[3].m_mData );
    register const __m128               mi29 = _mm_mul_ps( mi07, ptmM2->m_avRow[3].m_mData );
    register const __m128               mi30 = _mm_mul_ps( mi11, ptmM2->m_avRow[3].m_mData );

    ptmM0->m_avRow[0].m_mData = _mm_add_ps( _mm_add_ps( mi16, mi20 ), _mm_add_ps( mi24, mi28 ) );
    ptmM0->m_avRow[1].m_mData = _mm_add_ps( _mm_add_ps( mi17, mi21 ), _mm_add_ps( mi25, mi29 ) );
    ptmM0->m_avRow[2].m_mData = _mm_add_ps( _mm_add_ps( mi18, mi22 ), _mm_add_ps( mi26, mi30 ) );
    ptmM0->m_avRow[3].m_mData = ptmM2->m_avRow[3].m_mData;
}


TgVOID M_CAT_44_34_F32_44( PCU_TgMAT_F32_44 ptmM0, CPCU_TgMAT_F32_44 ptmM1, CPCU_TgMAT_F32_34 ptmM2 )
{
    register const __m128               mi00 = _mm_shuffle_ps( ptmM1->m_avRow[0].m_mData, ptmM1->m_avRow[0].m_mData, 0x00 );
    register const __m128               mi01 = _mm_shuffle_ps( ptmM1->m_avRow[0].m_mData, ptmM1->m_avRow[0].m_mData, 0x55 );
    register const __m128               mi02 = _mm_shuffle_ps( ptmM1->m_avRow[0].m_mData, ptmM1->m_avRow[0].m_mData, 0xAA );
    register const __m128               mi03 = _mm_shuffle_ps( ptmM1->m_avRow[0].m_mData, ptmM1->m_avRow[0].m_mData, 0xFF );
    register const __m128               mi04 = _mm_shuffle_ps( ptmM1->m_avRow[1].m_mData, ptmM1->m_avRow[1].m_mData, 0x00 );
    register const __m128               mi05 = _mm_shuffle_ps( ptmM1->m_avRow[1].m_mData, ptmM1->m_avRow[1].m_mData, 0x55 );
    register const __m128               mi06 = _mm_shuffle_ps( ptmM1->m_avRow[1].m_mData, ptmM1->m_avRow[1].m_mData, 0xAA );
    register const __m128               mi07 = _mm_shuffle_ps( ptmM1->m_avRow[1].m_mData, ptmM1->m_avRow[1].m_mData, 0xFF );
    register const __m128               mi08 = _mm_shuffle_ps( ptmM1->m_avRow[2].m_mData, ptmM1->m_avRow[2].m_mData, 0x00 );
    register const __m128               mi09 = _mm_shuffle_ps( ptmM1->m_avRow[2].m_mData, ptmM1->m_avRow[2].m_mData, 0x55 );
    register const __m128               mi10 = _mm_shuffle_ps( ptmM1->m_avRow[2].m_mData, ptmM1->m_avRow[2].m_mData, 0xAA );
    register const __m128               mi11 = _mm_shuffle_ps( ptmM1->m_avRow[2].m_mData, ptmM1->m_avRow[2].m_mData, 0xFF );
    register const __m128               mi12 = _mm_shuffle_ps( ptmM1->m_avRow[3].m_mData, ptmM1->m_avRow[3].m_mData, 0x00 );
    register const __m128               mi13 = _mm_shuffle_ps( ptmM1->m_avRow[3].m_mData, ptmM1->m_avRow[3].m_mData, 0x55 );
    register const __m128               mi14 = _mm_shuffle_ps( ptmM1->m_avRow[3].m_mData, ptmM1->m_avRow[3].m_mData, 0xAA );
    register const __m128               mi15 = _mm_shuffle_ps( ptmM1->m_avRow[3].m_mData, ptmM1->m_avRow[3].m_mData, 0xFF );

    register const __m128               mi16 = _mm_mul_ps( mi00, ptmM2->m_avRow[0].m_mData );
    register const __m128               mi17 = _mm_mul_ps( mi04, ptmM2->m_avRow[0].m_mData );
    register const __m128               mi18 = _mm_mul_ps( mi08, ptmM2->m_avRow[0].m_mData );
    register const __m128               mi19 = _mm_mul_ps( mi12, ptmM2->m_avRow[0].m_mData );
    register const __m128               mi20 = _mm_mul_ps( mi01, ptmM2->m_avRow[1].m_mData );
    register const __m128               mi21 = _mm_mul_ps( mi05, ptmM2->m_avRow[1].m_mData );
    register const __m128               mi22 = _mm_mul_ps( mi09, ptmM2->m_avRow[1].m_mData );
    register const __m128               mi23 = _mm_mul_ps( mi13, ptmM2->m_avRow[1].m_mData );
    register const __m128               mi24 = _mm_mul_ps( mi02, ptmM2->m_avRow[2].m_mData );
    register const __m128               mi25 = _mm_mul_ps( mi06, ptmM2->m_avRow[2].m_mData );
    register const __m128               mi26 = _mm_mul_ps( mi10, ptmM2->m_avRow[2].m_mData );
    register const __m128               mi27 = _mm_mul_ps( mi14, ptmM2->m_avRow[2].m_mData );
    register const __m128               mi28 = _mm_and_ps( mi03, KTgV_000F.m_f32_v04.m_mData );
    register const __m128               mi29 = _mm_and_ps( mi07, KTgV_000F.m_f32_v04.m_mData );
    register const __m128               mi30 = _mm_and_ps( mi11, KTgV_000F.m_f32_v04.m_mData );
    register const __m128               mi31 = _mm_and_ps( mi15, KTgV_000F.m_f32_v04.m_mData );

    ptmM0->m_avRow[0].m_mData = _mm_add_ps( _mm_add_ps( mi16, mi20 ), _mm_add_ps( mi24, mi28 ) );
    ptmM0->m_avRow[1].m_mData = _mm_add_ps( _mm_add_ps( mi17, mi21 ), _mm_add_ps( mi25, mi29 ) );
    ptmM0->m_avRow[2].m_mData = _mm_add_ps( _mm_add_ps( mi18, mi22 ), _mm_add_ps( mi26, mi30 ) );
    ptmM0->m_avRow[3].m_mData = _mm_add_ps( _mm_add_ps( mi19, mi23 ), _mm_add_ps( mi27, mi31 ) );
}