TgVOID M_CAT_F32_34( PCU_TgMAT_F32_34 ptmRet, CPCU_TgMAT_F32_34 ptmM0, CPCU_TgMAT_F32_34 ptmM1 )
{
register const __m128 mi00 = _mm_shuffle_ps( ptmM0->m_avRow[0].m_mData, ptmM0->m_avRow[0].m_mData, 0x00 );
register const __m128 mi01 = _mm_shuffle_ps( ptmM0->m_avRow[0].m_mData, ptmM0->m_avRow[0].m_mData, 0x55 );
register const __m128 mi02 = _mm_shuffle_ps( ptmM0->m_avRow[0].m_mData, ptmM0->m_avRow[0].m_mData, 0xAA );
register const __m128 mi03 = _mm_shuffle_ps( ptmM0->m_avRow[0].m_mData, ptmM0->m_avRow[0].m_mData, 0xFF );
register const __m128 mi04 = _mm_shuffle_ps( ptmM0->m_avRow[1].m_mData, ptmM0->m_avRow[1].m_mData, 0x00 );
register const __m128 mi05 = _mm_shuffle_ps( ptmM0->m_avRow[1].m_mData, ptmM0->m_avRow[1].m_mData, 0x55 );
register const __m128 mi06 = _mm_shuffle_ps( ptmM0->m_avRow[1].m_mData, ptmM0->m_avRow[1].m_mData, 0xAA );
register const __m128 mi07 = _mm_shuffle_ps( ptmM0->m_avRow[1].m_mData, ptmM0->m_avRow[1].m_mData, 0xFF );
register const __m128 mi08 = _mm_shuffle_ps( ptmM0->m_avRow[2].m_mData, ptmM0->m_avRow[2].m_mData, 0x00 );
register const __m128 mi09 = _mm_shuffle_ps( ptmM0->m_avRow[2].m_mData, ptmM0->m_avRow[2].m_mData, 0x55 );
register const __m128 mi10 = _mm_shuffle_ps( ptmM0->m_avRow[2].m_mData, ptmM0->m_avRow[2].m_mData, 0xAA );
register const __m128 mi11 = _mm_shuffle_ps( ptmM0->m_avRow[2].m_mData, ptmM0->m_avRow[2].m_mData, 0xFF );
register const __m128 mi16 = _mm_mul_ps( mi00, ptmM1->m_avRow[0].m_mData );
register const __m128 mi17 = _mm_mul_ps( mi04, ptmM1->m_avRow[0].m_mData );
register const __m128 mi18 = _mm_mul_ps( mi08, ptmM1->m_avRow[0].m_mData );
register const __m128 mi20 = _mm_mul_ps( mi01, ptmM1->m_avRow[1].m_mData );
register const __m128 mi21 = _mm_mul_ps( mi05, ptmM1->m_avRow[1].m_mData );
register const __m128 mi22 = _mm_mul_ps( mi09, ptmM1->m_avRow[1].m_mData );
register const __m128 mi24 = _mm_mul_ps( mi02, ptmM1->m_avRow[2].m_mData );
register const __m128 mi25 = _mm_mul_ps( mi06, ptmM1->m_avRow[2].m_mData );
register const __m128 mi26 = _mm_mul_ps( mi10, ptmM1->m_avRow[2].m_mData );
register const __m128 mi28 = _mm_and_ps( mi03, KTgV_000F.m_f32_v04.m_mData );
register const __m128 mi29 = _mm_and_ps( mi07, KTgV_000F.m_f32_v04.m_mData );
register const __m128 mi30 = _mm_and_ps( mi11, KTgV_000F.m_f32_v04.m_mData );
ptmRet->m_avRow[0].m_mData = _mm_add_ps( _mm_add_ps( mi16, mi20 ), _mm_add_ps( mi24, mi28 ) );
ptmRet->m_avRow[1].m_mData = _mm_add_ps( _mm_add_ps( mi17, mi21 ), _mm_add_ps( mi25, mi29 ) );
ptmRet->m_avRow[2].m_mData = _mm_add_ps( _mm_add_ps( mi18, mi22 ), _mm_add_ps( mi26, mi30 ) );
}
TgVOID M_INV_DET_F32_34( PCU_TgMAT_F32_34 ptmRet, C_TgVEC_M_F32_04 tvDet, CPCU_TgMAT_F32_34 ptmM1 )
{
const register __m128 miR0 = ptmM1->m_avRow[0].m_mData;
const register __m128 miR1 = ptmM1->m_avRow[1].m_mData;
const register __m128 miR2 = ptmM1->m_avRow[2].m_mData;
const register __m128 mi00 = _mm_and_ps( KTgV_FFF0.m_f32_v04.m_mData, tvDet );
const register __m128 miDet = _mm_add_ps( KTgV_UNIT_W_F32_04.m_mData, mi00 );
const register __m128 mi01 = _mm_shuffle_ps( miR0, miR1, _MM_PERM( 1, 2, 1, 2 ) );
const register __m128 mi02 = _mm_shuffle_ps( miR2, miR2, _MM_PERM( 1, 2, 1, 2 ) );
const register __m128 mi03 = _mm_shuffle_ps( mi01, mi01, _MM_PERM( 2, 1, 0, 3 ) );
const register __m128 mi04 = _mm_shuffle_ps( mi02, mi01, _MM_PERM( 1, 0, 3, 3 ) );
const register __m128 mi05 = _mm_shuffle_ps( mi01, mi01, _MM_PERM( 3, 0, 1, 3 ) );
const register __m128 mi06 = _mm_shuffle_ps( mi02, mi01, _MM_PERM( 0, 1, 2, 3 ) );
const register __m128 mi07 = _mm_mul_ps( mi03, mi04 );
const register __m128 mi08 = _mm_mul_ps( mi05, mi06 );
const register __m128 mi09 = _mm_sub_ps( mi07, mi08 );
const register __m128 mi10 = _mm_and_ps( KTgV_FFF0.m_f32_v04.m_mData, _mm_div_ps( mi09, miDet ) );
const register __m128 mi11 = _mm_shuffle_ps( miR0, miR1, _MM_PERM( 0, 2, 0, 2 ) );
const register __m128 mi12 = _mm_shuffle_ps( miR2, miR2, _MM_PERM( 0, 2, 0, 2 ) );
const register __m128 mi13 = _mm_shuffle_ps( mi11, mi11, _MM_PERM( 3, 0, 1, 3 ) );
const register __m128 mi14 = _mm_shuffle_ps( mi12, mi11, _MM_PERM( 0, 1, 2, 3 ) );
const register __m128 mi15 = _mm_shuffle_ps( mi11, mi11, _MM_PERM( 2, 1, 0, 3 ) );
const register __m128 mi16 = _mm_shuffle_ps( mi12, mi11, _MM_PERM( 1, 0, 3, 3 ) );
const register __m128 mi17 = _mm_mul_ps( mi13, mi14 );
const register __m128 mi18 = _mm_mul_ps( mi15, mi16 );
const register __m128 mi19 = _mm_sub_ps( mi17, mi18 );
const register __m128 mi20 = _mm_and_ps( KTgV_FFF0.m_f32_v04.m_mData, _mm_div_ps( mi19, miDet ) );
const register __m128 mi21 = _mm_shuffle_ps( miR0, miR1, _MM_PERM( 0, 1, 0, 1 ) );
const register __m128 mi22 = _mm_shuffle_ps( miR2, miR2, _MM_PERM( 0, 1, 0, 1 ) );
const register __m128 mi23 = _mm_shuffle_ps( mi21, mi21, _MM_PERM( 2, 1, 0, 3 ) );
const register __m128 mi24 = _mm_shuffle_ps( mi22, mi21, _MM_PERM( 1, 0, 3, 3 ) );
const register __m128 mi25 = _mm_shuffle_ps( mi21, mi21, _MM_PERM( 3, 0, 1, 3 ) );
const register __m128 mi26 = _mm_shuffle_ps( mi22, mi21, _MM_PERM( 0, 1, 2, 3 ) );
const register __m128 mi27 = _mm_mul_ps( mi23, mi24 );
const register __m128 mi28 = _mm_mul_ps( mi25, mi26 );
const register __m128 mi29 = _mm_sub_ps( mi27, mi28 );
const register __m128 mi30 = _mm_and_ps( KTgV_FFF0.m_f32_v04.m_mData, _mm_div_ps( mi29, miDet ) );
const register __m128 mi31 = _mm_shuffle_ps( mi10, mi20, _MM_PERM( 0, 1, 0, 1 ) );
const register __m128 mi32 = _mm_shuffle_ps( mi10, mi20, _MM_PERM( 2, 3, 2, 3 ) );
const register __m128 mi34 = _mm_shuffle_ps( mi31, mi30, _MM_PERM( 0, 2, 0, 0 ) );
const register __m128 mi35 = _mm_shuffle_ps( mi31, mi30, _MM_PERM( 1, 3, 1, 1 ) );
const register __m128 mi36 = _mm_shuffle_ps( mi32, mi30, _MM_PERM( 0, 2, 2, 2 ) );
const register __m128 mi37 = _mm_shuffle_ps( miR0, miR0, _MM_PERM( 3, 3, 3, 3 ) );
const register __m128 mi38 = _mm_shuffle_ps( miR1, miR1, _MM_PERM( 3, 3, 3, 3 ) );
const register __m128 mi39 = _mm_shuffle_ps( miR2, miR2, _MM_PERM( 3, 3, 3, 3 ) );
const register __m128 mi40 = _mm_mul_ps( mi37, mi34 );
const register __m128 mi41 = _mm_mul_ps( mi38, mi35 );
const register __m128 mi42 = _mm_mul_ps( mi39, mi36 );
const register __m128 mi43 = _mm_sub_ps( KTgV_ZERO_F32_04.m_mData, mi40 );
const register __m128 mi44 = _mm_add_ps( mi41, mi42 );
const register __m128 mi45 = _mm_and_ps( KTgV_FFF0.m_f32_v04.m_mData, _mm_sub_ps( mi43, mi44 ) );
const register __m128 mi46 = _mm_shuffle_ps( mi45, mi45, _MM_PERM( 3, 3, 3, 0 ) );
const register __m128 mi47 = _mm_shuffle_ps( mi45, mi45, _MM_PERM( 3, 3, 3, 1 ) );
const register __m128 mi48 = _mm_shuffle_ps( mi45, mi45, _MM_PERM( 3, 3, 3, 2 ) );
ptmRet->m_avRow[0].m_mData = _mm_add_ps( mi46, mi10 );
ptmRet->m_avRow[1].m_mData = _mm_add_ps( mi47, mi20 );
ptmRet->m_avRow[2].m_mData = _mm_add_ps( mi48, mi30 );
}
TgVEC_M_F32_04 M_DET_F32_34( CPCU_TgMAT_F32_34 ptmM1 )
{
const register __m128 miR0 = ptmM1->m_avRow[0].m_mData;
const register __m128 miR1 = ptmM1->m_avRow[1].m_mData;
const register __m128 miR2 = ptmM1->m_avRow[2].m_mData;
const register __m128 mi01 = _mm_shuffle_ps( miR1, miR1, _MM_PERM( 1, 2, 0, 3 ) );
const register __m128 mi02 = _mm_shuffle_ps( miR2, miR2, _MM_PERM( 2, 0, 1, 3 ) );
const register __m128 mi03 = _mm_shuffle_ps( miR1, miR1, _MM_PERM( 2, 0, 1, 3 ) );
const register __m128 mi04 = _mm_shuffle_ps( miR2, miR2, _MM_PERM( 1, 2, 0, 3 ) );
const register __m128 mi05 = _mm_mul_ps( mi01, mi02 );
const register __m128 mi06 = _mm_mul_ps( mi03, mi04 );
const register __m128 mi07 = _mm_sub_ps( mi05, mi06 );
const register __m128 mi08 = _mm_mul_ps( miR0, mi07 );
const register __m128 mi09 = _mm_shuffle_ps( mi08, mi08, _MM_PERM( 1, 1, 1, 1 ) );
const register __m128 mi10 = _mm_shuffle_ps( mi08, mi08, _MM_PERM( 2, 2, 2, 2 ) );
const register __m128 mi11 = _mm_add_ss( mi08, mi09 );
const register __m128 mi12 = _mm_add_ss( mi10, mi11 );
return ((_mm_shuffle_ps( mi12, mi12, 0x00 )));
}