各種SIMD拡張命令(SSE2、SSSE3、AVX2)に対応
This commit is contained in:
parent
767a55d293
commit
4544996900
7
MEMO.txt
Normal file
7
MEMO.txt
Normal file
@ -0,0 +1,7 @@
|
||||
勉強用に復号処理をSIMD拡張命令で実装。
|
||||
既存のコードや資料などを参考に、SSE2、SSSE3、AVX2に対応した。
|
||||
初期化時には、AVX2、SSSE3、SSE2、拡張命令なしの順で利用可能なものを選択する。
|
||||
|
||||
ラウンド関数のあと、最後のXOR演算はもっとよい方法があればよかったが、思いつかなかった。
|
||||
Windows環境(x86-64)でのみ動作確認。開発環境は Visual Studio 2017 Community (15.9.7)。
|
||||
あくまで勉強用なので、安定的な動作の保証はない。
|
@ -1,10 +1,11 @@
|
||||
// IB25Decoder.h: IB25Decoder クラスのインターフェイス
|
||||
// IB25Decoder.h: IB25Decoder クラスのインターフェイス
|
||||
//
|
||||
//////////////////////////////////////////////////////////////////////
|
||||
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <stdint.h>
|
||||
|
||||
/////////////////////////////////////////////////////////////////////////////
|
||||
// 定数定義
|
||||
@ -47,6 +48,7 @@ public:
|
||||
virtual void DiscardScramblePacket(const bool bEnable = true) = 0;
|
||||
virtual void EnableEmmProcess(const bool bEnable = true) = 0;
|
||||
virtual void SetMulti2Round(const int32_t round = 4) = 0; // オリジナルに追加
|
||||
virtual void SetSimdMode(const int32_t instruction = 3) = 0; // オリジナルに追加
|
||||
|
||||
virtual const DWORD GetDescramblingState(const WORD wProgramID) = 0;
|
||||
|
||||
|
@ -17,7 +17,7 @@ CFLAGS = -O2 -fPIC -Wall $(PCSC_CFLAGS) -D_LARGEFILE_SOURCE -D_FILE_OFFSET_BITS=
|
||||
LIBS = $(PCSC_LDLIBS)
|
||||
LDFLAGS =
|
||||
|
||||
OBJS = arib_std_b25.o b_cas_card.o multi2.o ts_section_parser.o
|
||||
OBJS = arib_std_b25.o b_cas_card.o multi2.o multi2_simd.o ts_section_parser.o
|
||||
HEADERS = arib_std_b25.h arib_std_b25_error_code.h b_cas_card.h portable.h
|
||||
TARGET_APP = b25
|
||||
TARGET_LIB = libaribb25.so
|
||||
|
@ -5,6 +5,7 @@
|
||||
#include "arib_std_b25.h"
|
||||
#include "arib_std_b25_error_code.h"
|
||||
#include "multi2.h"
|
||||
#include "multi2_simd.h"
|
||||
#include "ts_common_types.h"
|
||||
#include "ts_section_parser.h"
|
||||
|
||||
@ -88,6 +89,9 @@ typedef struct {
|
||||
int32_t multi2_round;
|
||||
int32_t strip;
|
||||
int32_t emm_proc_on;
|
||||
#ifdef ENABLE_MULTI2_SIMD
|
||||
int32_t simd_instruction;
|
||||
#endif
|
||||
|
||||
int32_t unit_size;
|
||||
|
||||
@ -315,6 +319,7 @@ static void release_arib_std_b25(void *std_b25);
|
||||
static int set_multi2_round_arib_std_b25(void *std_b25, int32_t round);
|
||||
static int set_strip_arib_std_b25(void *std_b25, int32_t strip);
|
||||
static int set_emm_proc_arib_std_b25(void *std_b25, int32_t on);
|
||||
static int set_simd_mode_arib_std_b25(void *std_b25, int32_t instruction);
|
||||
static int set_b_cas_card_arib_std_b25(void *std_b25, B_CAS_CARD *bcas);
|
||||
static int set_unit_size_arib_std_b25(void *std_b25, int size);
|
||||
static int reset_arib_std_b25(void *std_b25);
|
||||
@ -344,6 +349,7 @@ ARIB_STD_B25 *create_arib_std_b25(void)
|
||||
}
|
||||
|
||||
prv->multi2_round = 4;
|
||||
prv->simd_instruction = (int32_t)get_supported_simd_instruction();
|
||||
|
||||
r = (ARIB_STD_B25 *)(prv+1);
|
||||
r->private_data = prv;
|
||||
@ -352,6 +358,7 @@ ARIB_STD_B25 *create_arib_std_b25(void)
|
||||
r->set_multi2_round = set_multi2_round_arib_std_b25;
|
||||
r->set_strip = set_strip_arib_std_b25;
|
||||
r->set_emm_proc = set_emm_proc_arib_std_b25;
|
||||
r->set_simd_mode = set_simd_mode_arib_std_b25;
|
||||
r->set_b_cas_card = set_b_cas_card_arib_std_b25;
|
||||
r->set_unit_size = set_unit_size_arib_std_b25;
|
||||
r->reset = reset_arib_std_b25;
|
||||
@ -380,7 +387,11 @@ static int32_t find_ca_descriptor_pid(uint8_t *head, uint8_t *tail, int32_t ca_s
|
||||
static int32_t add_ecm_stream(ARIB_STD_B25_PRIVATE_DATA *prv, TS_STREAM_LIST *list, int32_t ecm_pid);
|
||||
static int check_ecm_complete(ARIB_STD_B25_PRIVATE_DATA *prv);
|
||||
static int find_ecm(ARIB_STD_B25_PRIVATE_DATA *prv);
|
||||
#ifdef ENABLE_MULTI2_SIMD
|
||||
static int proc_ecm(DECRYPTOR_ELEM *dec, B_CAS_CARD *bcas, int32_t multi2_round, int32_t simd_instruction);
|
||||
#else
|
||||
static int proc_ecm(DECRYPTOR_ELEM *dec, B_CAS_CARD *bcas, int32_t multi2_round);
|
||||
#endif
|
||||
static int proc_arib_std_b25(ARIB_STD_B25_PRIVATE_DATA *prv);
|
||||
|
||||
static int proc_cat(ARIB_STD_B25_PRIVATE_DATA *prv);
|
||||
@ -471,6 +482,21 @@ static int set_emm_proc_arib_std_b25(void *std_b25, int32_t on)
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int set_simd_mode_arib_std_b25(void * std_b25, int32_t instruction)
|
||||
{
|
||||
#ifdef ENABLE_MULTI2_SIMD
|
||||
ARIB_STD_B25_PRIVATE_DATA *prv;
|
||||
|
||||
prv = private_data(std_b25);
|
||||
if(prv == NULL){
|
||||
return ARIB_STD_B25_ERROR_INVALID_PARAM;
|
||||
}
|
||||
|
||||
prv->simd_instruction = instruction;
|
||||
#endif
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int set_b_cas_card_arib_std_b25(void *std_b25, B_CAS_CARD *bcas)
|
||||
{
|
||||
int n;
|
||||
@ -694,7 +720,11 @@ static int flush_arib_std_b25(void *std_b25)
|
||||
if(m == 0){
|
||||
goto NEXT;
|
||||
}
|
||||
#ifdef ENABLE_MULTI2_SIMD
|
||||
r = proc_ecm(dec, prv->bcas, prv->multi2_round, prv->simd_instruction);
|
||||
#else
|
||||
r = proc_ecm(dec, prv->bcas, prv->multi2_round);
|
||||
#endif
|
||||
if(r < 0){
|
||||
if((curr+unit) <= tail)
|
||||
l = unit;
|
||||
@ -1906,7 +1936,11 @@ static int find_ecm(ARIB_STD_B25_PRIVATE_DATA *prv)
|
||||
goto NEXT;
|
||||
}
|
||||
|
||||
#ifdef ENABLE_MULTI2_SIMD
|
||||
r = proc_ecm(dec, prv->bcas, prv->multi2_round, prv->simd_instruction);
|
||||
#else
|
||||
r = proc_ecm(dec, prv->bcas, prv->multi2_round);
|
||||
#endif
|
||||
if(r < 0){
|
||||
curr += unit;
|
||||
goto LAST;
|
||||
@ -1938,7 +1972,11 @@ LAST:
|
||||
return r;
|
||||
}
|
||||
|
||||
#ifdef ENABLE_MULTI2_SIMD
|
||||
static int proc_ecm(DECRYPTOR_ELEM *dec, B_CAS_CARD *bcas, int32_t multi2_round, int32_t simd_instruction)
|
||||
#else
|
||||
static int proc_ecm(DECRYPTOR_ELEM *dec, B_CAS_CARD *bcas, int32_t multi2_round)
|
||||
#endif
|
||||
{
|
||||
int r,n;
|
||||
uint32_t len;
|
||||
@ -2005,6 +2043,9 @@ static int proc_ecm(DECRYPTOR_ELEM *dec, B_CAS_CARD *bcas, int32_t multi2_round)
|
||||
|
||||
if(dec->m2 == NULL){
|
||||
dec->m2 = create_multi2();
|
||||
#ifdef ENABLE_MULTI2_SIMD
|
||||
dec->m2->set_simd(dec->m2, (enum INSTRUCTION_TYPE)simd_instruction);
|
||||
#endif
|
||||
if(dec->m2 == NULL){
|
||||
return ARIB_STD_B25_ERROR_NO_ENOUGH_MEMORY;
|
||||
}
|
||||
@ -2156,7 +2197,11 @@ static int proc_arib_std_b25(ARIB_STD_B25_PRIVATE_DATA *prv)
|
||||
if(m == 0){
|
||||
goto NEXT;
|
||||
}
|
||||
#ifdef ENABLE_MULTI2_SIMD
|
||||
r = proc_ecm(dec, prv->bcas, prv->multi2_round, prv->simd_instruction);
|
||||
#else
|
||||
r = proc_ecm(dec, prv->bcas, prv->multi2_round);
|
||||
#endif
|
||||
if(r < 0){
|
||||
return r;
|
||||
}
|
||||
@ -2708,7 +2753,8 @@ static int reserve_work_buffer(TS_WORK_BUFFER *buf, intptr_t size)
|
||||
n += n;
|
||||
}
|
||||
|
||||
p = (uint8_t *)malloc(n);
|
||||
//p = (uint8_t *)malloc(n);
|
||||
p = (uint8_t *)mem_aligned_alloc(n);
|
||||
if(p == NULL){
|
||||
return 0;
|
||||
}
|
||||
@ -2719,7 +2765,8 @@ static int reserve_work_buffer(TS_WORK_BUFFER *buf, intptr_t size)
|
||||
if(m > 0){
|
||||
memcpy(p, buf->head, m);
|
||||
}
|
||||
free(buf->pool);
|
||||
//free(buf->pool);
|
||||
mem_aligned_free(buf->pool);
|
||||
buf->pool = NULL;
|
||||
}
|
||||
|
||||
@ -2763,7 +2810,8 @@ static void reset_work_buffer(TS_WORK_BUFFER *buf)
|
||||
static void release_work_buffer(TS_WORK_BUFFER *buf)
|
||||
{
|
||||
if(buf->pool != NULL){
|
||||
free(buf->pool);
|
||||
//free(buf->pool);
|
||||
mem_aligned_free(buf->pool);
|
||||
}
|
||||
buf->pool = NULL;
|
||||
buf->head = NULL;
|
||||
|
@ -32,6 +32,7 @@ typedef struct {
|
||||
int (* set_multi2_round)(void *std_b25, int32_t round);
|
||||
int (* set_strip)(void *std_b25, int32_t strip);
|
||||
int (* set_emm_proc)(void *std_b25, int32_t on);
|
||||
int (* set_simd_mode)(void *std_b25, int32_t instructin);
|
||||
|
||||
int (* set_b_cas_card)(void *std_b25, B_CAS_CARD *bcas);
|
||||
|
||||
|
@ -22,7 +22,7 @@
|
||||
<ProjectGuid>{6E77C1AC-A31A-49B9-9A52-9FE1E03B8FEC}</ProjectGuid>
|
||||
<RootNamespace>arib_std_b25</RootNamespace>
|
||||
<Keyword>Win32Proj</Keyword>
|
||||
<WindowsTargetPlatformVersion>10.0.16299.0</WindowsTargetPlatformVersion>
|
||||
<WindowsTargetPlatformVersion>10.0.17763.0</WindowsTargetPlatformVersion>
|
||||
</PropertyGroup>
|
||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.Default.props" />
|
||||
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" Label="Configuration">
|
||||
@ -133,7 +133,7 @@
|
||||
<PrecompiledHeader />
|
||||
<WarningLevel>Level3</WarningLevel>
|
||||
<DebugInformationFormat>ProgramDatabase</DebugInformationFormat>
|
||||
<Optimization>Full</Optimization>
|
||||
<Optimization>MaxSpeed</Optimization>
|
||||
<InlineFunctionExpansion>AnySuitable</InlineFunctionExpansion>
|
||||
<IntrinsicFunctions>true</IntrinsicFunctions>
|
||||
<FavorSizeOrSpeed>Speed</FavorSizeOrSpeed>
|
||||
@ -161,7 +161,7 @@
|
||||
</PrecompiledHeader>
|
||||
<WarningLevel>Level3</WarningLevel>
|
||||
<DebugInformationFormat>ProgramDatabase</DebugInformationFormat>
|
||||
<Optimization>Full</Optimization>
|
||||
<Optimization>MaxSpeed</Optimization>
|
||||
<InlineFunctionExpansion>AnySuitable</InlineFunctionExpansion>
|
||||
<IntrinsicFunctions>true</IntrinsicFunctions>
|
||||
<FavorSizeOrSpeed>Speed</FavorSizeOrSpeed>
|
||||
@ -184,6 +184,7 @@
|
||||
<ClCompile Include="arib_std_b25.c" />
|
||||
<ClCompile Include="b_cas_card.c" />
|
||||
<ClCompile Include="multi2.c" />
|
||||
<ClCompile Include="multi2_simd.c" />
|
||||
<ClCompile Include="td.c" />
|
||||
<ClCompile Include="ts_section_parser.c" />
|
||||
</ItemGroup>
|
||||
@ -194,7 +195,9 @@
|
||||
<ClInclude Include="b_cas_card_error_code.h" />
|
||||
<ClInclude Include="multi2.h" />
|
||||
<ClInclude Include="multi2_error_code.h" />
|
||||
<ClInclude Include="multi2_simd.h" />
|
||||
<ClInclude Include="portable.h" />
|
||||
<ClInclude Include="simd_instruction_type.h" />
|
||||
<ClInclude Include="ts_common_types.h" />
|
||||
<ClInclude Include="ts_section_parser.h" />
|
||||
<ClInclude Include="ts_section_parser_error_code.h" />
|
||||
|
@ -30,6 +30,9 @@
|
||||
<ClCompile Include="b_cas_card.c">
|
||||
<Filter>ソース ファイル</Filter>
|
||||
</ClCompile>
|
||||
<ClCompile Include="multi2_simd.c">
|
||||
<Filter>ソース ファイル</Filter>
|
||||
</ClCompile>
|
||||
</ItemGroup>
|
||||
<ItemGroup>
|
||||
<ClInclude Include="arib_std_b25.h">
|
||||
@ -62,5 +65,11 @@
|
||||
<ClInclude Include="ts_section_parser_error_code.h">
|
||||
<Filter>ヘッダー ファイル</Filter>
|
||||
</ClInclude>
|
||||
<ClInclude Include="simd_instruction_type.h">
|
||||
<Filter>ヘッダー ファイル</Filter>
|
||||
</ClInclude>
|
||||
<ClInclude Include="multi2_simd.h">
|
||||
<Filter>ヘッダー ファイル</Filter>
|
||||
</ClInclude>
|
||||
</ItemGroup>
|
||||
</Project>
|
@ -1,4 +1,4 @@
|
||||
// libaribb25.cpp: CB25Decoder クラスのインプリメンテーション
|
||||
// libaribb25.cpp: CB25Decoder クラスのインプリメンテーション
|
||||
//
|
||||
//////////////////////////////////////////////////////////////////////
|
||||
#include "libaribb25.h"
|
||||
@ -248,6 +248,11 @@ void CB25Decoder::SetMulti2Round(const int32_t round)
|
||||
_b25->set_multi2_round(_b25, round);
|
||||
}
|
||||
|
||||
void CB25Decoder::SetSimdMode(const int32_t instruction)
|
||||
{
|
||||
_b25->set_simd_mode(_b25, instruction);
|
||||
}
|
||||
|
||||
const DWORD CB25Decoder::GetDescramblingState(const WORD wProgramID)
|
||||
{
|
||||
// 指定したプログラムIDの復号状態を返す
|
||||
|
@ -27,6 +27,7 @@ public:
|
||||
virtual void DiscardScramblePacket(const bool bEnable = true);
|
||||
virtual void EnableEmmProcess(const bool bEnable = true);
|
||||
virtual void SetMulti2Round(const int32_t round = 4);
|
||||
virtual void SetSimdMode(const int32_t instruction = 2);
|
||||
virtual const DWORD GetDescramblingState(const WORD wProgramID);
|
||||
virtual void ResetStatistics(void);
|
||||
virtual const DWORD GetPacketStride(void);
|
||||
|
@ -21,7 +21,7 @@
|
||||
<PropertyGroup Label="Globals">
|
||||
<ProjectGuid>{32FCD075-2C1D-4796-926B-A0009ECCD1E8}</ProjectGuid>
|
||||
<RootNamespace>libaribb25</RootNamespace>
|
||||
<WindowsTargetPlatformVersion>10.0.16299.0</WindowsTargetPlatformVersion>
|
||||
<WindowsTargetPlatformVersion>10.0.17763.0</WindowsTargetPlatformVersion>
|
||||
</PropertyGroup>
|
||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.Default.props" />
|
||||
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" Label="Configuration">
|
||||
@ -109,7 +109,7 @@
|
||||
<ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
|
||||
<ClCompile>
|
||||
<WarningLevel>Level3</WarningLevel>
|
||||
<Optimization>Full</Optimization>
|
||||
<Optimization>MaxSpeed</Optimization>
|
||||
<FunctionLevelLinking>true</FunctionLevelLinking>
|
||||
<IntrinsicFunctions>true</IntrinsicFunctions>
|
||||
<SDLCheck>
|
||||
@ -132,7 +132,7 @@
|
||||
<ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
|
||||
<ClCompile>
|
||||
<WarningLevel>Level3</WarningLevel>
|
||||
<Optimization>Full</Optimization>
|
||||
<Optimization>MaxSpeed</Optimization>
|
||||
<FunctionLevelLinking>true</FunctionLevelLinking>
|
||||
<IntrinsicFunctions>true</IntrinsicFunctions>
|
||||
<SDLCheck>
|
||||
@ -157,6 +157,7 @@
|
||||
<ClCompile Include="b_cas_card.c" />
|
||||
<ClCompile Include="libaribb25.cpp" />
|
||||
<ClCompile Include="multi2.c" />
|
||||
<ClCompile Include="multi2_simd.c" />
|
||||
<ClCompile Include="ts_section_parser.c" />
|
||||
</ItemGroup>
|
||||
<ItemGroup>
|
||||
@ -168,7 +169,9 @@
|
||||
<ClInclude Include="libaribb25.h" />
|
||||
<ClInclude Include="multi2.h" />
|
||||
<ClInclude Include="multi2_error_code.h" />
|
||||
<ClInclude Include="multi2_simd.h" />
|
||||
<ClInclude Include="portable.h" />
|
||||
<ClInclude Include="simd_instruction_type.h" />
|
||||
<ClInclude Include="ts_common_types.h" />
|
||||
<ClInclude Include="ts_section_parser.h" />
|
||||
<ClInclude Include="ts_section_parser_error_code.h" />
|
||||
|
@ -30,6 +30,9 @@
|
||||
<ClCompile Include="libaribb25.cpp">
|
||||
<Filter>ソース ファイル</Filter>
|
||||
</ClCompile>
|
||||
<ClCompile Include="multi2_simd.c">
|
||||
<Filter>ソース ファイル</Filter>
|
||||
</ClCompile>
|
||||
</ItemGroup>
|
||||
<ItemGroup>
|
||||
<ClInclude Include="multi2.h">
|
||||
@ -68,5 +71,11 @@
|
||||
<ClInclude Include="libaribb25.h">
|
||||
<Filter>ヘッダー ファイル</Filter>
|
||||
</ClInclude>
|
||||
<ClInclude Include="multi2_simd.h">
|
||||
<Filter>ヘッダー ファイル</Filter>
|
||||
</ClInclude>
|
||||
<ClInclude Include="simd_instruction_type.h">
|
||||
<Filter>ヘッダー ファイル</Filter>
|
||||
</ClInclude>
|
||||
</ItemGroup>
|
||||
</Project>
|
139
aribb25/multi2.c
139
aribb25/multi2.c
@ -2,6 +2,7 @@
|
||||
#include <string.h>
|
||||
|
||||
#include "multi2.h"
|
||||
#include "multi2_simd.h"
|
||||
#include "multi2_error_code.h"
|
||||
|
||||
/*+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
|
||||
@ -35,8 +36,9 @@ typedef struct {
|
||||
} CORE_PARAM;
|
||||
|
||||
typedef struct {
|
||||
uint32_t l;
|
||||
// change for 64bit bswap
|
||||
uint32_t r;
|
||||
uint32_t l;
|
||||
} CORE_DATA;
|
||||
|
||||
typedef struct {
|
||||
@ -52,6 +54,8 @@ typedef struct {
|
||||
uint32_t round;
|
||||
uint32_t state;
|
||||
|
||||
MULTI2_SIMD_DATA *simd;
|
||||
|
||||
} MULTI2_PRIVATE_DATA;
|
||||
|
||||
/*+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
|
||||
@ -67,12 +71,14 @@ typedef struct {
|
||||
static void release_multi2(void *m2);
|
||||
static int add_ref_multi2(void *m2);
|
||||
static int set_round_multi2(void *m2, int32_t val);
|
||||
static int set_simd_multi2(void *m2, enum INSTRUCTION_TYPE);
|
||||
static int set_system_key_multi2(void *m2, uint8_t *val);
|
||||
static int set_init_cbc_multi2(void *m2, uint8_t *val);
|
||||
static int set_scramble_key_multi2(void *m2, uint8_t *val);
|
||||
static int clear_scramble_key_multi2(void *m2);
|
||||
static int encrypt_multi2(void *m2, int32_t type, uint8_t *buf, int32_t size);
|
||||
static int decrypt_multi2(void *m2, int32_t type, uint8_t *buf, intptr_t size);
|
||||
static int decrypt_with_simd_multi2(void *m2, int32_t type, uint8_t *buf, intptr_t size);
|
||||
|
||||
/*+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
|
||||
global function implementation
|
||||
@ -97,10 +103,12 @@ MULTI2 *create_multi2(void)
|
||||
|
||||
prv->ref_count = 1;
|
||||
prv->round = 4;
|
||||
prv->simd = NULL;
|
||||
|
||||
r->release = release_multi2;
|
||||
r->add_ref = add_ref_multi2;
|
||||
r->set_round = set_round_multi2;
|
||||
r->set_simd = set_simd_multi2;
|
||||
r->set_system_key = set_system_key_multi2;
|
||||
r->set_init_cbc = set_init_cbc_multi2;
|
||||
r->set_scramble_key = set_scramble_key_multi2;
|
||||
@ -126,6 +134,9 @@ static void core_pi2(CORE_DATA *dst, CORE_DATA *src, uint32_t a);
|
||||
static void core_pi3(CORE_DATA *dst, CORE_DATA *src, uint32_t a, uint32_t b);
|
||||
static void core_pi4(CORE_DATA *dst, CORE_DATA *src, uint32_t a);
|
||||
|
||||
static void alloc_data_for_simd(MULTI2_PRIVATE_DATA *prv);
|
||||
static void release_data_for_simd(MULTI2_PRIVATE_DATA *prv);
|
||||
|
||||
/*+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
|
||||
interface method implementation
|
||||
++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++*/
|
||||
@ -141,6 +152,7 @@ static void release_multi2(void *m2)
|
||||
|
||||
prv->ref_count -= 1;
|
||||
if(prv->ref_count == 0){
|
||||
release_data_for_simd(prv);
|
||||
free(prv);
|
||||
}
|
||||
}
|
||||
@ -170,14 +182,57 @@ static int set_round_multi2(void *m2, int32_t val)
|
||||
}
|
||||
|
||||
prv->round = val;
|
||||
set_round_for_simd(val);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int set_simd_multi2(void *m2, enum INSTRUCTION_TYPE instruction)
|
||||
{
|
||||
MULTI2_PRIVATE_DATA *prv;
|
||||
MULTI2 *r;
|
||||
MULTI2_SIMD_DATA *simd;
|
||||
|
||||
prv = private_data(m2);
|
||||
simd = prv->simd;
|
||||
|
||||
if( instruction == get_simd_instruction() ){
|
||||
if( (simd != NULL) || (instruction == INSTRUCTION_NORMAL) ){
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
r = (MULTI2 *)(prv+1);
|
||||
if( initialize_multi2_simd(instruction, m2) ){
|
||||
r->decrypt = decrypt_with_simd_multi2;
|
||||
if(simd == NULL){
|
||||
alloc_data_for_simd(prv);
|
||||
simd = prv->simd;
|
||||
}
|
||||
instruction = get_simd_instruction();
|
||||
if(instruction == INSTRUCTION_AVX2){
|
||||
simd->decrypt = decrypt_multi2_with_avx2;
|
||||
}else if(instruction == INSTRUCTION_SSSE3){
|
||||
simd->decrypt = decrypt_multi2_with_ssse3;
|
||||
}else if(instruction == INSTRUCTION_SSE2){
|
||||
simd->decrypt = decrypt_multi2_with_sse2;
|
||||
}else{
|
||||
simd->decrypt = decrypt_multi2_without_simd;
|
||||
}
|
||||
return 0;
|
||||
}else{
|
||||
r->decrypt = decrypt_multi2;
|
||||
release_data_for_simd(prv);
|
||||
return MULTI2_ERROR_INVALID_PARAMETER;
|
||||
}
|
||||
}
|
||||
|
||||
static int set_system_key_multi2(void *m2, uint8_t *val)
|
||||
{
|
||||
#ifndef USE_MULTI2_INTRINSIC
|
||||
int i;
|
||||
uint8_t *p;
|
||||
#endif
|
||||
|
||||
MULTI2_PRIVATE_DATA *prv;
|
||||
|
||||
@ -186,10 +241,14 @@ static int set_system_key_multi2(void *m2, uint8_t *val)
|
||||
return MULTI2_ERROR_INVALID_PARAMETER;
|
||||
}
|
||||
|
||||
#ifdef USE_MULTI2_INTRINSIC
|
||||
set_system_key_with_bswap((MULTI2_SIMD_SYS_KEY *)&(prv->sys), val);
|
||||
#else
|
||||
p = val;
|
||||
for(i=0;i<8;i++){
|
||||
p = load_be_uint32(prv->sys.key+i, p);
|
||||
}
|
||||
#endif
|
||||
|
||||
prv->state |= MULTI2_STATE_SYSTEM_KEY_SET;
|
||||
|
||||
@ -209,8 +268,12 @@ static int set_init_cbc_multi2(void *m2, uint8_t *val)
|
||||
|
||||
p = val;
|
||||
|
||||
#ifdef USE_MULTI2_INTRINSIC
|
||||
set_data_key_with_bswap((MULTI2_SIMD_DATA_KEY *)&(prv->cbc_init), p);
|
||||
#else
|
||||
p = load_be_uint32(&(prv->cbc_init.l), p);
|
||||
p = load_be_uint32(&(prv->cbc_init.r), p);
|
||||
#endif
|
||||
|
||||
prv->state |= MULTI2_STATE_CBC_INIT_SET;
|
||||
|
||||
@ -222,6 +285,9 @@ static int set_scramble_key_multi2(void *m2, uint8_t *val)
|
||||
uint8_t *p;
|
||||
|
||||
MULTI2_PRIVATE_DATA *prv;
|
||||
#ifdef ENABLE_MULTI2_SIMD
|
||||
MULTI2_SIMD_DATA *simd;
|
||||
#endif
|
||||
|
||||
prv = private_data(m2);
|
||||
if( (prv == NULL) || (val == NULL) ){
|
||||
@ -230,14 +296,32 @@ static int set_scramble_key_multi2(void *m2, uint8_t *val)
|
||||
|
||||
p = val;
|
||||
|
||||
#ifdef USE_MULTI2_INTRINSIC
|
||||
set_data_key_with_bswap((MULTI2_SIMD_DATA_KEY *)&(prv->scr[0]), p);
|
||||
set_data_key_with_bswap((MULTI2_SIMD_DATA_KEY *)&(prv->scr[1]), p+8);
|
||||
#else
|
||||
p = load_be_uint32(&(prv->scr[0].l), p);
|
||||
p = load_be_uint32(&(prv->scr[0].r), p);
|
||||
p = load_be_uint32(&(prv->scr[1].l), p);
|
||||
p = load_be_uint32(&(prv->scr[1].r), p);
|
||||
#endif
|
||||
|
||||
core_schedule(prv->wrk+0, &(prv->sys), prv->scr+0);
|
||||
core_schedule(prv->wrk+1, &(prv->sys), prv->scr+1);
|
||||
|
||||
#ifdef ENABLE_MULTI2_SIMD
|
||||
simd = prv->simd;
|
||||
if(simd != NULL){
|
||||
if(get_simd_instruction() == INSTRUCTION_AVX2){
|
||||
set_work_key_for_avx2(simd->wrk+0, (MULTI2_SIMD_SYS_KEY *)(prv->wrk+0));
|
||||
set_work_key_for_avx2(simd->wrk+1, (MULTI2_SIMD_SYS_KEY *)(prv->wrk+1));
|
||||
}else{
|
||||
set_work_key_for_simd(simd->wrk+0, (MULTI2_SIMD_SYS_KEY *)(prv->wrk+0));
|
||||
set_work_key_for_simd(simd->wrk+1, (MULTI2_SIMD_SYS_KEY *)(prv->wrk+1));
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
prv->state |= MULTI2_STATE_SCRAMBLE_KEY_SET;
|
||||
|
||||
return 0;
|
||||
@ -390,6 +474,45 @@ static int decrypt_multi2(void *m2, int32_t type, uint8_t *buf, intptr_t size)
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int decrypt_with_simd_multi2(void *m2, int32_t type, uint8_t *buf, intptr_t size)
|
||||
{
|
||||
MULTI2_SIMD_DATA *simd;
|
||||
MULTI2_SIMD_SYS_KEY *prm;
|
||||
MULTI2_SIMD_WORK_KEY *pck_wrk_key;
|
||||
|
||||
MULTI2_PRIVATE_DATA *prv;
|
||||
|
||||
prv = private_data(m2);
|
||||
if( (prv == NULL) || (buf == NULL) || (size < 1) ){
|
||||
return MULTI2_ERROR_INVALID_PARAMETER;
|
||||
}
|
||||
|
||||
if(prv->state != (MULTI2_STATE_CBC_INIT_SET|MULTI2_STATE_SYSTEM_KEY_SET|MULTI2_STATE_SCRAMBLE_KEY_SET)){
|
||||
if( (prv->state & MULTI2_STATE_CBC_INIT_SET) == 0 ){
|
||||
return MULTI2_ERROR_UNSET_CBC_INIT;
|
||||
}
|
||||
if( (prv->state & MULTI2_STATE_SYSTEM_KEY_SET) == 0 ){
|
||||
return MULTI2_ERROR_UNSET_SYSTEM_KEY;
|
||||
}
|
||||
if( (prv->state & MULTI2_STATE_SCRAMBLE_KEY_SET) == 0 ){
|
||||
return MULTI2_ERROR_UNSET_SCRAMBLE_KEY;
|
||||
}
|
||||
}
|
||||
|
||||
simd = prv->simd;
|
||||
if(type == 0x02){
|
||||
prm = (MULTI2_SIMD_SYS_KEY *)(prv->wrk+1);
|
||||
pck_wrk_key = simd->wrk+1;
|
||||
}else{
|
||||
prm = (MULTI2_SIMD_SYS_KEY *)(prv->wrk+0);
|
||||
pck_wrk_key = simd->wrk+0;
|
||||
}
|
||||
|
||||
simd->decrypt(buf, (uint32_t)size, prm, pck_wrk_key, (MULTI2_SIMD_DATA_KEY *)(&prv->cbc_init));
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
|
||||
private method implementation
|
||||
++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++*/
|
||||
@ -525,3 +648,17 @@ static void core_pi4(CORE_DATA *dst, CORE_DATA *src, uint32_t a)
|
||||
dst->l = src->l ^ t1;
|
||||
dst->r = src->r;
|
||||
}
|
||||
|
||||
void alloc_data_for_simd(MULTI2_PRIVATE_DATA *prv)
|
||||
{
|
||||
release_data_for_simd(prv);
|
||||
prv->simd = (MULTI2_SIMD_DATA *)mem_aligned_alloc(sizeof(MULTI2_SIMD_DATA));
|
||||
}
|
||||
|
||||
void release_data_for_simd(MULTI2_PRIVATE_DATA *prv)
|
||||
{
|
||||
if(prv->simd != NULL){
|
||||
mem_aligned_free(prv->simd);
|
||||
prv->simd = NULL;
|
||||
}
|
||||
}
|
||||
|
@ -2,6 +2,7 @@
|
||||
#define MULTI2_H
|
||||
|
||||
#include "portable.h"
|
||||
#include "simd_instruction_type.h"
|
||||
|
||||
typedef struct {
|
||||
|
||||
@ -11,6 +12,7 @@ typedef struct {
|
||||
int (* add_ref)(void *m2);
|
||||
|
||||
int (* set_round)(void *m2, int32_t val);
|
||||
int (* set_simd)(void *m2, enum INSTRUCTION_TYPE);
|
||||
|
||||
int (* set_system_key)(void *m2, uint8_t *val);
|
||||
int (* set_init_cbc)(void *m2, uint8_t *val);
|
||||
|
1879
aribb25/multi2_simd.c
Normal file
1879
aribb25/multi2_simd.c
Normal file
File diff suppressed because it is too large
Load Diff
136
aribb25/multi2_simd.h
Normal file
136
aribb25/multi2_simd.h
Normal file
@ -0,0 +1,136 @@
|
||||
#ifndef MULTI2_SIMD_H
|
||||
#define MULTI2_SIMD_H
|
||||
|
||||
#include <stdint.h>
|
||||
#include <stdbool.h>
|
||||
#ifdef _MSC_VER
|
||||
#include <intrin.h>
|
||||
#else
|
||||
#include <x86intrin.h>
|
||||
#endif
|
||||
|
||||
#include "portable.h"
|
||||
#include "simd_instruction_type.h"
|
||||
|
||||
#define USE_MULTI2_INTRINSIC // use intrinsic functions
|
||||
#define ENABLE_MULTI2_SIMD // enable SIMD instructions
|
||||
|
||||
#ifdef ENABLE_MULTI2_SIMD
|
||||
|
||||
#define ENABLE_MULTI2_SSE2 // enable SSE2 instructions
|
||||
#define ENABLE_MULTI2_SSSE3 // enable SSSE3 instructions
|
||||
|
||||
#ifdef ENABLE_MULTI2_SSSE3
|
||||
#define ENABLE_MULTI2_AVX2 // enable AVX2 instructions
|
||||
#endif
|
||||
|
||||
//#define USE_MULTI2_SIMD_ICC // use Intel C++ Compiler
|
||||
|
||||
#endif // ENABLE_MULTI2_SIMD
|
||||
|
||||
|
||||
#ifdef ENABLE_MULTI2_AVX2
|
||||
|
||||
typedef union {
|
||||
__m256i key256[8];
|
||||
__m128i key[8];
|
||||
} MULTI2_SIMD_WORK_KEY;
|
||||
|
||||
#else
|
||||
|
||||
typedef struct {
|
||||
__m128i key[8];
|
||||
} MULTI2_SIMD_WORK_KEY;
|
||||
|
||||
#endif
|
||||
|
||||
typedef struct {
|
||||
union {
|
||||
//#if !defined(USE_MULTI2_INTRINSIC) || !defined(_M_X64)
|
||||
#if defined(_M_X64) || !defined(USE_MULTI2_INTRINSIC) || !defined(_M_X64)
|
||||
struct {
|
||||
uint32_t key1, key2, key3, key4, key5, key6, key7, key8;
|
||||
};
|
||||
#else
|
||||
struct {
|
||||
uint32_t key2, key1, key4, key3, key6, key5, key8, key7;
|
||||
};
|
||||
uint64_t data64[4];
|
||||
#endif
|
||||
uint8_t data[32];
|
||||
};
|
||||
} MULTI2_SIMD_SYS_KEY /* system key(Sk), expanded key(Wk) 256bit */;
|
||||
|
||||
typedef struct {
|
||||
union {
|
||||
struct {
|
||||
uint32_t right, left;
|
||||
};
|
||||
uint64_t data64;
|
||||
uint8_t data[8];
|
||||
};
|
||||
} MULTI2_SIMD_DATA_KEY /* data key(Dk) 64bit */;
|
||||
|
||||
typedef struct {
|
||||
|
||||
MULTI2_SIMD_WORK_KEY wrk[2]; /* 0: odd, 1: even */
|
||||
void (* decrypt)(uint8_t * __restrict data, const uint32_t size,
|
||||
const MULTI2_SIMD_SYS_KEY * __restrict work_key,
|
||||
const MULTI2_SIMD_WORK_KEY * __restrict packed_work_key,
|
||||
const MULTI2_SIMD_DATA_KEY * __restrict cbc_init);
|
||||
|
||||
} MULTI2_SIMD_DATA /* data set for SIMD */;
|
||||
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
extern bool is_simd_enabled();
|
||||
extern bool is_sse2_available();
|
||||
extern bool is_ssse3_available();
|
||||
extern bool is_avx2_available();
|
||||
extern bool initialize_multi2_simd(enum INSTRUCTION_TYPE instruction, void* m2);
|
||||
|
||||
extern void set_simd_instruction(enum INSTRUCTION_TYPE instruction);
|
||||
extern enum INSTRUCTION_TYPE get_simd_instruction();
|
||||
extern enum INSTRUCTION_TYPE get_supported_simd_instruction();
|
||||
|
||||
extern void alloc_work_key_for_simd(MULTI2_SIMD_WORK_KEY **work_key_odd, MULTI2_SIMD_WORK_KEY **work_key_even);
|
||||
extern void free_work_key_for_simd(MULTI2_SIMD_WORK_KEY **work_key_odd, MULTI2_SIMD_WORK_KEY **work_key_even);
|
||||
extern void set_work_key_for_simd(MULTI2_SIMD_WORK_KEY *work_key, const MULTI2_SIMD_SYS_KEY *src_key);
|
||||
extern void set_work_key_for_avx2(MULTI2_SIMD_WORK_KEY *work_key, const MULTI2_SIMD_SYS_KEY *src_key);
|
||||
extern void set_round_for_simd(const uint32_t round);
|
||||
extern void set_system_key_with_bswap(MULTI2_SIMD_SYS_KEY *sys_key, const uint8_t *hex_data);
|
||||
extern void get_system_key_with_bswap(const MULTI2_SIMD_SYS_KEY *sys_key, uint8_t *hex_data);
|
||||
extern void set_data_key_with_bswap(MULTI2_SIMD_DATA_KEY *data_key, const uint8_t *hex_data);
|
||||
extern void get_data_key_with_bswap(const MULTI2_SIMD_DATA_KEY *data_key, uint8_t *hex_data);
|
||||
|
||||
extern void decrypt_multi2_without_simd(uint8_t * __restrict data, const uint32_t size,
|
||||
const MULTI2_SIMD_SYS_KEY * __restrict work_key,
|
||||
const MULTI2_SIMD_WORK_KEY * __restrict packed_work_key,
|
||||
const MULTI2_SIMD_DATA_KEY * __restrict cbc_init);
|
||||
#ifdef ENABLE_MULTI2_SSE2
|
||||
extern void decrypt_multi2_with_sse2(uint8_t * __restrict data, const uint32_t size,
|
||||
const MULTI2_SIMD_SYS_KEY * __restrict work_key,
|
||||
const MULTI2_SIMD_WORK_KEY * __restrict packed_work_key,
|
||||
const MULTI2_SIMD_DATA_KEY * __restrict cbc_init);
|
||||
#endif
|
||||
#ifdef ENABLE_MULTI2_SSSE3
|
||||
extern void decrypt_multi2_with_ssse3(uint8_t * __restrict data, const uint32_t size,
|
||||
const MULTI2_SIMD_SYS_KEY * __restrict work_key,
|
||||
const MULTI2_SIMD_WORK_KEY * __restrict packed_work_key,
|
||||
const MULTI2_SIMD_DATA_KEY * __restrict cbc_init);
|
||||
#endif
|
||||
#ifdef ENABLE_MULTI2_AVX2
|
||||
extern void decrypt_multi2_with_avx2(uint8_t * __restrict data, const uint32_t size,
|
||||
const MULTI2_SIMD_SYS_KEY * __restrict work_key,
|
||||
const MULTI2_SIMD_WORK_KEY * __restrict packed_work_key,
|
||||
const MULTI2_SIMD_DATA_KEY * __restrict cbc_init);
|
||||
#endif
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif /* MULTI2_SIMD_H */
|
@ -20,6 +20,76 @@
|
||||
#define _S_IREAD (S_IRUSR|S_IRGRP|S_IROTH)
|
||||
#define _S_IWRITE (S_IWUSR|S_IWGRP|S_IWOTH)
|
||||
|
||||
#ifndef __forceinline
|
||||
#define __forceinline __attribute__((always_inline))
|
||||
#endif
|
||||
|
||||
#ifndef __restrict
|
||||
#define __restrict __restrict__
|
||||
#endif
|
||||
|
||||
#ifdef __i386__
|
||||
#define _M_IX86 __i386__
|
||||
#endif
|
||||
|
||||
#ifdef __x86_64__
|
||||
#define _M_X64 __x86_64__
|
||||
#define _M_AMD64 __x86_64__
|
||||
#endif
|
||||
|
||||
#if defined(__APPLE__)
|
||||
|
||||
#include <libkern/OSByteOrder.h>
|
||||
#define _byteswap_ulong(x) OSSwapInt32(x)
|
||||
#define _byteswap_uint64(x) OSSwapInt64(x)
|
||||
|
||||
#elif defined(__sun) || defined(sun)
|
||||
|
||||
#include <sys/byteorder.h>
|
||||
#define _byteswap_ulong(x) BSWAP_32(x)
|
||||
#define _byteswap_uint64(x) BSWAP_64(x)
|
||||
|
||||
#elif defined(__FreeBSD__)
|
||||
|
||||
#include <sys/endian.h>
|
||||
#define _byteswap_ulong(x) bswap32(x)
|
||||
#define _byteswap_uint64(x) bswap64(x)
|
||||
|
||||
#elif defined(__OpenBSD__)
|
||||
|
||||
#include <sys/types.h>
|
||||
#define _byteswap_ulong(x) swap32(x)
|
||||
#define _byteswap_uint64(x) swap64(x)
|
||||
|
||||
#elif defined(__NetBSD__)
|
||||
|
||||
#include <sys/types.h>
|
||||
#include <machine/bswap.h>
|
||||
#if defined(__BSWAP_RENAME) && !defined(_byteswap_ulong)
|
||||
#define _byteswap_ulong(x) bswap32(x)
|
||||
#define _byteswap_uint64(x) bswap64(x)
|
||||
#endif
|
||||
|
||||
#else
|
||||
|
||||
#include <byteswap.h>
|
||||
#define _byteswap_ulong(x) bswap_32(x)
|
||||
#define _byteswap_uint64(x) bswap_64(x)
|
||||
|
||||
#endif /* defined(__APPLE__) */
|
||||
|
||||
#define mem_aligned_alloc(s) aligned_alloc(s, 32)
|
||||
#define mem_aligned_free free
|
||||
|
||||
#define ALIGNAS(s) __attribute__((aligned(s)))
|
||||
|
||||
#else /* !defined(_WIN32) */
|
||||
|
||||
#define mem_aligned_alloc(s) _aligned_malloc(s, 32)
|
||||
#define mem_aligned_free _aligned_free
|
||||
|
||||
#define ALIGNAS(s) __declspec(align(s))
|
||||
|
||||
#endif /* !defined(_WIN32) */
|
||||
|
||||
#endif /* PORTABLE_H */
|
||||
|
12
aribb25/simd_instruction_type.h
Normal file
12
aribb25/simd_instruction_type.h
Normal file
@ -0,0 +1,12 @@
|
||||
#ifndef SIMD_INSTRUCTION_TYPE_H
|
||||
#define SIMD_INSTRUCTION_TYPE_H
|
||||
|
||||
enum INSTRUCTION_TYPE
|
||||
{
|
||||
INSTRUCTION_NORMAL,
|
||||
INSTRUCTION_SSE2,
|
||||
INSTRUCTION_SSSE3,
|
||||
INSTRUCTION_AVX2
|
||||
};
|
||||
|
||||
#endif /* SIMD_INSTRUCTION_TYPE_H */
|
Loading…
x
Reference in New Issue
Block a user