TgVOID M_CAT_34_44_F32_44( PCU_TgMAT_F32_44 ptmM0, CPCU_TgMAT_F32_34 ptmM1, CPCU_TgMAT_F32_44 ptmM2 )
{
register const __m128 mi00 = _mm_shuffle_ps( ptmM1->m_avRow[0].m_mData, ptmM1->m_avRow[0].m_mData, 0x00 );
register const __m128 mi01 = _mm_shuffle_ps( ptmM1->m_avRow[0].m_mData, ptmM1->m_avRow[0].m_mData, 0x55 );
register const __m128 mi02 = _mm_shuffle_ps( ptmM1->m_avRow[0].m_mData, ptmM1->m_avRow[0].m_mData, 0xAA );
register const __m128 mi03 = _mm_shuffle_ps( ptmM1->m_avRow[0].m_mData, ptmM1->m_avRow[0].m_mData, 0xFF );
register const __m128 mi04 = _mm_shuffle_ps( ptmM1->m_avRow[1].m_mData, ptmM1->m_avRow[1].m_mData, 0x00 );
register const __m128 mi05 = _mm_shuffle_ps( ptmM1->m_avRow[1].m_mData, ptmM1->m_avRow[1].m_mData, 0x55 );
register const __m128 mi06 = _mm_shuffle_ps( ptmM1->m_avRow[1].m_mData, ptmM1->m_avRow[1].m_mData, 0xAA );
register const __m128 mi07 = _mm_shuffle_ps( ptmM1->m_avRow[1].m_mData, ptmM1->m_avRow[1].m_mData, 0xFF );
register const __m128 mi08 = _mm_shuffle_ps( ptmM1->m_avRow[2].m_mData, ptmM1->m_avRow[2].m_mData, 0x00 );
register const __m128 mi09 = _mm_shuffle_ps( ptmM1->m_avRow[2].m_mData, ptmM1->m_avRow[2].m_mData, 0x55 );
register const __m128 mi10 = _mm_shuffle_ps( ptmM1->m_avRow[2].m_mData, ptmM1->m_avRow[2].m_mData, 0xAA );
register const __m128 mi11 = _mm_shuffle_ps( ptmM1->m_avRow[2].m_mData, ptmM1->m_avRow[2].m_mData, 0xFF );
register const __m128 mi16 = _mm_mul_ps( mi00, ptmM2->m_avRow[0].m_mData );
register const __m128 mi17 = _mm_mul_ps( mi04, ptmM2->m_avRow[0].m_mData );
register const __m128 mi18 = _mm_mul_ps( mi08, ptmM2->m_avRow[0].m_mData );
register const __m128 mi20 = _mm_mul_ps( mi01, ptmM2->m_avRow[1].m_mData );
register const __m128 mi21 = _mm_mul_ps( mi05, ptmM2->m_avRow[1].m_mData );
register const __m128 mi22 = _mm_mul_ps( mi09, ptmM2->m_avRow[1].m_mData );
register const __m128 mi24 = _mm_mul_ps( mi02, ptmM2->m_avRow[2].m_mData );
register const __m128 mi25 = _mm_mul_ps( mi06, ptmM2->m_avRow[2].m_mData );
register const __m128 mi26 = _mm_mul_ps( mi10, ptmM2->m_avRow[2].m_mData );
register const __m128 mi28 = _mm_mul_ps( mi03, ptmM2->m_avRow[3].m_mData );
register const __m128 mi29 = _mm_mul_ps( mi07, ptmM2->m_avRow[3].m_mData );
register const __m128 mi30 = _mm_mul_ps( mi11, ptmM2->m_avRow[3].m_mData );
ptmM0->m_avRow[0].m_mData = _mm_add_ps( _mm_add_ps( mi16, mi20 ), _mm_add_ps( mi24, mi28 ) );
ptmM0->m_avRow[1].m_mData = _mm_add_ps( _mm_add_ps( mi17, mi21 ), _mm_add_ps( mi25, mi29 ) );
ptmM0->m_avRow[2].m_mData = _mm_add_ps( _mm_add_ps( mi18, mi22 ), _mm_add_ps( mi26, mi30 ) );
ptmM0->m_avRow[3].m_mData = ptmM2->m_avRow[3].m_mData;
}
TgVOID M_CAT_44_34_F32_44( PCU_TgMAT_F32_44 ptmM0, CPCU_TgMAT_F32_44 ptmM1, CPCU_TgMAT_F32_34 ptmM2 )
{
register const __m128 mi00 = _mm_shuffle_ps( ptmM1->m_avRow[0].m_mData, ptmM1->m_avRow[0].m_mData, 0x00 );
register const __m128 mi01 = _mm_shuffle_ps( ptmM1->m_avRow[0].m_mData, ptmM1->m_avRow[0].m_mData, 0x55 );
register const __m128 mi02 = _mm_shuffle_ps( ptmM1->m_avRow[0].m_mData, ptmM1->m_avRow[0].m_mData, 0xAA );
register const __m128 mi03 = _mm_shuffle_ps( ptmM1->m_avRow[0].m_mData, ptmM1->m_avRow[0].m_mData, 0xFF );
register const __m128 mi04 = _mm_shuffle_ps( ptmM1->m_avRow[1].m_mData, ptmM1->m_avRow[1].m_mData, 0x00 );
register const __m128 mi05 = _mm_shuffle_ps( ptmM1->m_avRow[1].m_mData, ptmM1->m_avRow[1].m_mData, 0x55 );
register const __m128 mi06 = _mm_shuffle_ps( ptmM1->m_avRow[1].m_mData, ptmM1->m_avRow[1].m_mData, 0xAA );
register const __m128 mi07 = _mm_shuffle_ps( ptmM1->m_avRow[1].m_mData, ptmM1->m_avRow[1].m_mData, 0xFF );
register const __m128 mi08 = _mm_shuffle_ps( ptmM1->m_avRow[2].m_mData, ptmM1->m_avRow[2].m_mData, 0x00 );
register const __m128 mi09 = _mm_shuffle_ps( ptmM1->m_avRow[2].m_mData, ptmM1->m_avRow[2].m_mData, 0x55 );
register const __m128 mi10 = _mm_shuffle_ps( ptmM1->m_avRow[2].m_mData, ptmM1->m_avRow[2].m_mData, 0xAA );
register const __m128 mi11 = _mm_shuffle_ps( ptmM1->m_avRow[2].m_mData, ptmM1->m_avRow[2].m_mData, 0xFF );
register const __m128 mi12 = _mm_shuffle_ps( ptmM1->m_avRow[3].m_mData, ptmM1->m_avRow[3].m_mData, 0x00 );
register const __m128 mi13 = _mm_shuffle_ps( ptmM1->m_avRow[3].m_mData, ptmM1->m_avRow[3].m_mData, 0x55 );
register const __m128 mi14 = _mm_shuffle_ps( ptmM1->m_avRow[3].m_mData, ptmM1->m_avRow[3].m_mData, 0xAA );
register const __m128 mi15 = _mm_shuffle_ps( ptmM1->m_avRow[3].m_mData, ptmM1->m_avRow[3].m_mData, 0xFF );
register const __m128 mi16 = _mm_mul_ps( mi00, ptmM2->m_avRow[0].m_mData );
register const __m128 mi17 = _mm_mul_ps( mi04, ptmM2->m_avRow[0].m_mData );
register const __m128 mi18 = _mm_mul_ps( mi08, ptmM2->m_avRow[0].m_mData );
register const __m128 mi19 = _mm_mul_ps( mi12, ptmM2->m_avRow[0].m_mData );
register const __m128 mi20 = _mm_mul_ps( mi01, ptmM2->m_avRow[1].m_mData );
register const __m128 mi21 = _mm_mul_ps( mi05, ptmM2->m_avRow[1].m_mData );
register const __m128 mi22 = _mm_mul_ps( mi09, ptmM2->m_avRow[1].m_mData );
register const __m128 mi23 = _mm_mul_ps( mi13, ptmM2->m_avRow[1].m_mData );
register const __m128 mi24 = _mm_mul_ps( mi02, ptmM2->m_avRow[2].m_mData );
register const __m128 mi25 = _mm_mul_ps( mi06, ptmM2->m_avRow[2].m_mData );
register const __m128 mi26 = _mm_mul_ps( mi10, ptmM2->m_avRow[2].m_mData );
register const __m128 mi27 = _mm_mul_ps( mi14, ptmM2->m_avRow[2].m_mData );
register const __m128 mi28 = _mm_and_ps( mi03, KTgV_000F.m_f32_v04.m_mData );
register const __m128 mi29 = _mm_and_ps( mi07, KTgV_000F.m_f32_v04.m_mData );
register const __m128 mi30 = _mm_and_ps( mi11, KTgV_000F.m_f32_v04.m_mData );
register const __m128 mi31 = _mm_and_ps( mi15, KTgV_000F.m_f32_v04.m_mData );
ptmM0->m_avRow[0].m_mData = _mm_add_ps( _mm_add_ps( mi16, mi20 ), _mm_add_ps( mi24, mi28 ) );
ptmM0->m_avRow[1].m_mData = _mm_add_ps( _mm_add_ps( mi17, mi21 ), _mm_add_ps( mi25, mi29 ) );
ptmM0->m_avRow[2].m_mData = _mm_add_ps( _mm_add_ps( mi18, mi22 ), _mm_add_ps( mi26, mi30 ) );
ptmM0->m_avRow[3].m_mData = _mm_add_ps( _mm_add_ps( mi19, mi23 ), _mm_add_ps( mi27, mi31 ) );
}