Merge pull request #21 from pinterior/avx2-multi2

Support AVX2 MULTI2 Decoding(Thx pinterior)
This commit is contained in:
stz2012 2019-02-02 11:53:36 +09:00 committed by GitHub
commit 5e0dea81d9
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
16 changed files with 1172 additions and 1057 deletions

View File

@ -8,7 +8,7 @@ environment:
MSYS2_DIR: msys64
CYGWIN_MIRROR: http://cygwin.mirror.constant.com
CYGWIN_PACKAGES: mpfr,mpc,gcc-core,make,cmake
CYGWIN_PACKAGES: mpfr,mpc,gcc-core,gcc-g++,make,cmake
matrix:
# Latest version of VisualStudio

View File

@ -1,12 +1,13 @@
cmake_minimum_required(VERSION 2.8.5)
set(CMAKE_MODULE_PATH "${CMAKE_SOURCE_DIR}/cmake" ${CMAKE_MODULE_PATH})
set(CMAKE_MODULE_PATH ${CMAKE_MODULE_PATH} "${CMAKE_SOURCE_DIR}/cmake")
set(CMAKE_CONFIGURATION_TYPES "Debug;Release" CACHE INTERNAL "limit build types" FORCE)
set(CMAKE_BUILD_TYPE "Release" CACHE STRING "default build type")
set(CMAKE_INSTALL_SO_NO_EXE False)
project(arib_std_b25 C)
enable_language(CXX)
include(GitRevision)
include(GenerateExportHeader)
@ -22,6 +23,7 @@ endif()
if(WIN32)
option(USE_UNICODE "enable unicode support" ON)
endif()
option(USE_AVX2 "enable AVX2" OFF)
set(ARIB25_LIB_NAME "arib25")
set(ARIB25_CMD_NAME "b25")
@ -42,11 +44,15 @@ if(CMAKE_C_COMPILER_ID MATCHES "(GNU|Clang)")
set(CMAKE_C_FLAGS "-Wall")
set(CMAKE_C_FLAGS_DEBUG "-O2 -g")
set(CMAKE_C_FLAGS_RELEASE "-O2")
set(CMAKE_CXX_FLAGS "-Wall")
set(CMAKE_CXX_FLAGS_DEBUG "-O3 -g")
set(CMAKE_CXX_FLAGS_RELEASE "-O3")
set(CMAKE_SHARED_LINKER_FLAGS "-fvisibility=hidden")
if(UNIX AND NOT CYGWIN)
include(ElfInterp)
set(CMAKE_C_FLAGS "-fPIC ${CMAKE_C_FLAGS}")
set(CMAKE_C_FLAGS "-fPIC ${CMAKE_C_FLAGS}")
set(CMAKE_CXX_FLAGS "-fPIC ${CMAKE_CXX_FLAGS}")
if(NOT APPLE)
set(CMAKE_SHARED_LINKER_FLAGS "${CMAKE_SHARED_LINKER_FLAGS} -Wl,-eshow_version")
endif()
@ -55,13 +61,24 @@ if(CMAKE_C_COMPILER_ID MATCHES "(GNU|Clang)")
endif()
if(MINGW AND USE_UNICODE)
set(CMAKE_C_FLAGS "-municode ${CMAKE_C_FLAGS}")
set(CMAKE_C_FLAGS "-municode ${CMAKE_C_FLAGS}")
set(CMAKE_CXX_FLAGS "-municode ${CMAKE_CXX_FLAGS}")
endif()
if(USE_AVX2)
set(CMAKE_C_FLAGS "-mavx2 ${CMAKE_C_FLAGS}")
set(CMAKE_CXX_FLAGS "-mavx2 ${CMAKE_CXX_FLAGS}")
endif()
elseif(CMAKE_C_COMPILER_ID MATCHES "(MSVC)")
add_definitions("-D_CRT_SECURE_NO_WARNINGS")
set(CMAKE_STATIC_LIBRARY_PREFIX lib)
set(CMAKE_SHARED_LIBRARY_PREFIX lib)
set(CMAKE_SUPPRESS_REGENERATION TRUE)
if(USE_AVX2)
set(CMAKE_C_FLAGS "/arch:AVX2 ${CMAKE_C_FLAGS}")
set(CMAKE_CXX_FLAGS "/arch:AVX2 ${CMAKE_CXX_FLAGS}")
endif()
endif()
if(CMAKE_BUILD_TYPE STREQUAL "Debug")
@ -79,7 +96,7 @@ if(PCSC_INCLUDE_DIRS)
endif()
link_directories(${PCSC_LIBRARY_DIRS})
add_library(arib25-objlib OBJECT src/arib_std_b25.c src/b_cas_card.c src/multi2.c src/ts_section_parser.c src/version.c)
add_library(arib25-objlib OBJECT src/arib_std_b25.c src/b_cas_card.c src/multi2.cc src/ts_section_parser.c src/version.c)
set_target_properties(arib25-objlib PROPERTIES COMPILE_DEFINITIONS ARIB25_DLL)
add_library(arib25-static STATIC $<TARGET_OBJECTS:arib25-objlib>)

36
NOTICE
View File

@ -6,39 +6,3 @@ This product include software from MARUMO Manufacturing libarib25 project.
* Copyright (c)2007-2012 MOGI, Kazuhiro <kazhiro@marumo.ne.jp>; All rights reserved.
MARUMO Manufacturing (https://www.marumo.ne.jp/)
Special Thanks: 2ch NoNames, eternalharvest, eru.
This product include software from CMake project.
* Copyright (c)2000-2018 Kitware, Inc. and Contributors; All rights reserved.
CMake Project (https://cmake.org/)
Files:
- cmake/GenerateExportHeader.cmake
- cmake/exportheader.cmake.in
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions
are met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
* Neither the name of Kitware, Inc. nor the names of Contributors
may be used to endorse or promote products derived from this
software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

View File

@ -1,436 +0,0 @@
# Distributed under the OSI-approved BSD 3-Clause License. See accompanying
# file Copyright.txt or https://cmake.org/licensing for details.
#.rst:
# GenerateExportHeader
# --------------------
#
# Function for generation of export macros for libraries
#
# This module provides the function GENERATE_EXPORT_HEADER().
#
# The ``GENERATE_EXPORT_HEADER`` function can be used to generate a file
# suitable for preprocessor inclusion which contains EXPORT macros to be
# used in library classes::
#
# GENERATE_EXPORT_HEADER( LIBRARY_TARGET
# [BASE_NAME <base_name>]
# [EXPORT_MACRO_NAME <export_macro_name>]
# [EXPORT_FILE_NAME <export_file_name>]
# [DEPRECATED_MACRO_NAME <deprecated_macro_name>]
# [NO_EXPORT_MACRO_NAME <no_export_macro_name>]
# [STATIC_DEFINE <static_define>]
# [NO_DEPRECATED_MACRO_NAME <no_deprecated_macro_name>]
# [DEFINE_NO_DEPRECATED]
# [PREFIX_NAME <prefix_name>]
# [CUSTOM_CONTENT_FROM_VARIABLE <variable>]
# )
#
#
# The target properties :prop_tgt:`CXX_VISIBILITY_PRESET <<LANG>_VISIBILITY_PRESET>`
# and :prop_tgt:`VISIBILITY_INLINES_HIDDEN` can be used to add the appropriate
# compile flags for targets. See the documentation of those target properties,
# and the convenience variables
# :variable:`CMAKE_CXX_VISIBILITY_PRESET <CMAKE_<LANG>_VISIBILITY_PRESET>` and
# :variable:`CMAKE_VISIBILITY_INLINES_HIDDEN`.
#
# By default ``GENERATE_EXPORT_HEADER()`` generates macro names in a file
# name determined by the name of the library. This means that in the
# simplest case, users of ``GenerateExportHeader`` will be equivalent to:
#
# .. code-block:: cmake
#
# set(CMAKE_CXX_VISIBILITY_PRESET hidden)
# set(CMAKE_VISIBILITY_INLINES_HIDDEN 1)
# add_library(somelib someclass.cpp)
# generate_export_header(somelib)
# install(TARGETS somelib DESTINATION ${LIBRARY_INSTALL_DIR})
# install(FILES
# someclass.h
# ${PROJECT_BINARY_DIR}/somelib_export.h DESTINATION ${INCLUDE_INSTALL_DIR}
# )
#
#
# And in the ABI header files:
#
# .. code-block:: c++
#
# #include "somelib_export.h"
# class SOMELIB_EXPORT SomeClass {
# ...
# };
#
#
# The CMake fragment will generate a file in the
# ``${CMAKE_CURRENT_BINARY_DIR}`` called ``somelib_export.h`` containing the
# macros ``SOMELIB_EXPORT``, ``SOMELIB_NO_EXPORT``, ``SOMELIB_DEPRECATED``,
# ``SOMELIB_DEPRECATED_EXPORT`` and ``SOMELIB_DEPRECATED_NO_EXPORT``.
# They will be followed by content taken from the variable specified by
# the ``CUSTOM_CONTENT_FROM_VARIABLE`` option, if any.
# The resulting file should be installed with other headers in the library.
#
# The ``BASE_NAME`` argument can be used to override the file name and the
# names used for the macros:
#
# .. code-block:: cmake
#
# add_library(somelib someclass.cpp)
# generate_export_header(somelib
# BASE_NAME other_name
# )
#
#
# Generates a file called ``other_name_export.h`` containing the macros
# ``OTHER_NAME_EXPORT``, ``OTHER_NAME_NO_EXPORT`` and ``OTHER_NAME_DEPRECATED``
# etc.
#
# The ``BASE_NAME`` may be overridden by specifying other options in the
# function. For example:
#
# .. code-block:: cmake
#
# add_library(somelib someclass.cpp)
# generate_export_header(somelib
# EXPORT_MACRO_NAME OTHER_NAME_EXPORT
# )
#
#
# creates the macro ``OTHER_NAME_EXPORT`` instead of ``SOMELIB_EXPORT``, but
# other macros and the generated file name is as default:
#
# .. code-block:: cmake
#
# add_library(somelib someclass.cpp)
# generate_export_header(somelib
# DEPRECATED_MACRO_NAME KDE_DEPRECATED
# )
#
#
# creates the macro ``KDE_DEPRECATED`` instead of ``SOMELIB_DEPRECATED``.
#
# If ``LIBRARY_TARGET`` is a static library, macros are defined without
# values.
#
# If the same sources are used to create both a shared and a static
# library, the uppercased symbol ``${BASE_NAME}_STATIC_DEFINE`` should be
# used when building the static library:
#
# .. code-block:: cmake
#
# add_library(shared_variant SHARED ${lib_SRCS})
# add_library(static_variant ${lib_SRCS})
# generate_export_header(shared_variant BASE_NAME libshared_and_static)
# set_target_properties(static_variant PROPERTIES
# COMPILE_FLAGS -DLIBSHARED_AND_STATIC_STATIC_DEFINE)
#
# This will cause the export macros to expand to nothing when building
# the static library.
#
# If ``DEFINE_NO_DEPRECATED`` is specified, then a macro
# ``${BASE_NAME}_NO_DEPRECATED`` will be defined This macro can be used to
# remove deprecated code from preprocessor output:
#
# .. code-block:: cmake
#
# option(EXCLUDE_DEPRECATED "Exclude deprecated parts of the library" FALSE)
# if (EXCLUDE_DEPRECATED)
# set(NO_BUILD_DEPRECATED DEFINE_NO_DEPRECATED)
# endif()
# generate_export_header(somelib ${NO_BUILD_DEPRECATED})
#
#
# And then in somelib:
#
# .. code-block:: c++
#
# class SOMELIB_EXPORT SomeClass
# {
# public:
# #ifndef SOMELIB_NO_DEPRECATED
# SOMELIB_DEPRECATED void oldMethod();
# #endif
# };
#
# .. code-block:: c++
#
# #ifndef SOMELIB_NO_DEPRECATED
# void SomeClass::oldMethod() { }
# #endif
#
#
# If ``PREFIX_NAME`` is specified, the argument will be used as a prefix to
# all generated macros.
#
# For example:
#
# .. code-block:: cmake
#
# generate_export_header(somelib PREFIX_NAME VTK_)
#
# Generates the macros ``VTK_SOMELIB_EXPORT`` etc.
#
# ::
#
# ADD_COMPILER_EXPORT_FLAGS( [<output_variable>] )
#
# The ``ADD_COMPILER_EXPORT_FLAGS`` function adds ``-fvisibility=hidden`` to
# :variable:`CMAKE_CXX_FLAGS <CMAKE_<LANG>_FLAGS>` if supported, and is a no-op
# on Windows which does not need extra compiler flags for exporting support.
# You may optionally pass a single argument to ``ADD_COMPILER_EXPORT_FLAGS``
# that will be populated with the ``CXX_FLAGS`` required to enable visibility
# support for the compiler/architecture in use.
#
# This function is deprecated. Set the target properties
# :prop_tgt:`CXX_VISIBILITY_PRESET <<LANG>_VISIBILITY_PRESET>` and
# :prop_tgt:`VISIBILITY_INLINES_HIDDEN` instead.
include(CheckCCompilerFlag)
include(CheckCXXCompilerFlag)
# TODO: Install this macro separately?
macro(_check_c_compiler_attribute _ATTRIBUTE _RESULT)
check_c_source_compiles("${_ATTRIBUTE} int somefunc() { return 0; }
int main() { return somefunc(); }" ${_RESULT}
)
endmacro()
# TODO: Install this macro separately?
macro(_check_cxx_compiler_attribute _ATTRIBUTE _RESULT)
check_cxx_source_compiles("${_ATTRIBUTE} int somefunc() { return 0; }
int main() { return somefunc();}" ${_RESULT}
)
endmacro()
macro(_test_compiler_hidden_visibility)
if(CMAKE_COMPILER_IS_GNUCXX AND CMAKE_CXX_COMPILER_VERSION VERSION_LESS "4.2")
set(GCC_TOO_OLD TRUE)
elseif(CMAKE_COMPILER_IS_GNUCC AND CMAKE_C_COMPILER_VERSION VERSION_LESS "4.2")
set(GCC_TOO_OLD TRUE)
elseif(CMAKE_CXX_COMPILER_ID MATCHES Intel AND CMAKE_CXX_COMPILER_VERSION VERSION_LESS "12.0")
set(_INTEL_TOO_OLD TRUE)
endif()
# Exclude XL here because it misinterprets -fvisibility=hidden even though
# the check_cxx_compiler_flag passes
if(NOT GCC_TOO_OLD
AND NOT _INTEL_TOO_OLD
AND NOT WIN32
AND NOT CYGWIN
AND NOT CMAKE_CXX_COMPILER_ID MATCHES XL
AND NOT CMAKE_CXX_COMPILER_ID MATCHES PGI
AND NOT CMAKE_CXX_COMPILER_ID MATCHES Watcom)
if (CMAKE_CXX_COMPILER_LOADED)
check_cxx_compiler_flag(-fvisibility=hidden COMPILER_HAS_HIDDEN_VISIBILITY)
check_cxx_compiler_flag(-fvisibility-inlines-hidden
COMPILER_HAS_HIDDEN_INLINE_VISIBILITY)
else()
check_c_compiler_flag(-fvisibility=hidden COMPILER_HAS_HIDDEN_VISIBILITY)
check_c_compiler_flag(-fvisibility-inline-hidden
COMPILER_HAS_HIDDEN_INLINE_VISIBILITY)
endif()
endif()
endmacro()
macro(_test_compiler_has_deprecated)
# NOTE: Some Embarcadero compilers silently compile __declspec(deprecated)
# without error, but this is not a documented feature and the attribute does
# not actually generate any warnings.
if(CMAKE_CXX_COMPILER_ID MATCHES Borland
OR CMAKE_CXX_COMPILER_ID MATCHES Embarcadero
OR CMAKE_CXX_COMPILER_ID MATCHES HP
OR GCC_TOO_OLD
OR CMAKE_CXX_COMPILER_ID MATCHES PGI
OR CMAKE_CXX_COMPILER_ID MATCHES Watcom)
set(COMPILER_HAS_DEPRECATED "" CACHE INTERNAL
"Compiler support for a deprecated attribute")
else()
if (CMAKE_CXX_COMPILER_LOADED)
_check_cxx_compiler_attribute("__attribute__((__deprecated__))"
COMPILER_HAS_DEPRECATED_ATTR)
if(COMPILER_HAS_DEPRECATED_ATTR)
set(COMPILER_HAS_DEPRECATED "${COMPILER_HAS_DEPRECATED_ATTR}"
CACHE INTERNAL "Compiler support for a deprecated attribute")
else()
_check_cxx_compiler_attribute("__declspec(deprecated)"
COMPILER_HAS_DEPRECATED)
endif()
else()
_check_c_compiler_attribute("__attribute__((__deprecated__))"
COMPILER_HAS_DEPRECATED_ATTR)
if(COMPILER_HAS_DEPRECATED_ATTR)
set(COMPILER_HAS_DEPRECATED "${COMPILER_HAS_DEPRECATED_ATTR}"
CACHE INTERNAL "Compiler support for a deprecated attribute")
else()
_check_c_compiler_attribute("__declspec(deprecated)"
COMPILER_HAS_DEPRECATED)
endif()
endif()
endif()
endmacro()
get_filename_component(_GENERATE_EXPORT_HEADER_MODULE_DIR
"${CMAKE_CURRENT_LIST_FILE}" PATH)
macro(_DO_SET_MACRO_VALUES TARGET_LIBRARY)
set(DEFINE_DEPRECATED)
set(DEFINE_EXPORT)
set(DEFINE_IMPORT)
set(DEFINE_NO_EXPORT)
if (COMPILER_HAS_DEPRECATED_ATTR)
set(DEFINE_DEPRECATED "__attribute__ ((__deprecated__))")
elseif(COMPILER_HAS_DEPRECATED)
set(DEFINE_DEPRECATED "__declspec(deprecated)")
endif()
get_property(type TARGET ${TARGET_LIBRARY} PROPERTY TYPE)
if(NOT ${type} STREQUAL "STATIC_LIBRARY")
if(WIN32 OR CYGWIN)
set(DEFINE_EXPORT "__declspec(dllexport)")
set(DEFINE_IMPORT "__declspec(dllimport)")
elseif(COMPILER_HAS_HIDDEN_VISIBILITY)
set(DEFINE_EXPORT "__attribute__((visibility(\"default\")))")
set(DEFINE_IMPORT "__attribute__((visibility(\"default\")))")
set(DEFINE_NO_EXPORT "__attribute__((visibility(\"hidden\")))")
endif()
endif()
endmacro()
macro(_DO_GENERATE_EXPORT_HEADER TARGET_LIBRARY)
# Option overrides
set(options DEFINE_NO_DEPRECATED)
set(oneValueArgs PREFIX_NAME BASE_NAME EXPORT_MACRO_NAME EXPORT_FILE_NAME
DEPRECATED_MACRO_NAME NO_EXPORT_MACRO_NAME STATIC_DEFINE
NO_DEPRECATED_MACRO_NAME CUSTOM_CONTENT_FROM_VARIABLE)
set(multiValueArgs)
cmake_parse_arguments(_GEH "${options}" "${oneValueArgs}" "${multiValueArgs}"
${ARGN})
set(BASE_NAME "${TARGET_LIBRARY}")
if(_GEH_BASE_NAME)
set(BASE_NAME ${_GEH_BASE_NAME})
endif()
string(TOUPPER ${BASE_NAME} BASE_NAME_UPPER)
string(TOLOWER ${BASE_NAME} BASE_NAME_LOWER)
# Default options
set(EXPORT_MACRO_NAME "${_GEH_PREFIX_NAME}${BASE_NAME_UPPER}_EXPORT")
set(NO_EXPORT_MACRO_NAME "${_GEH_PREFIX_NAME}${BASE_NAME_UPPER}_NO_EXPORT")
set(EXPORT_FILE_NAME "${CMAKE_CURRENT_BINARY_DIR}/${BASE_NAME_LOWER}_export.h")
set(DEPRECATED_MACRO_NAME "${_GEH_PREFIX_NAME}${BASE_NAME_UPPER}_DEPRECATED")
set(STATIC_DEFINE "${_GEH_PREFIX_NAME}${BASE_NAME_UPPER}_STATIC_DEFINE")
set(NO_DEPRECATED_MACRO_NAME
"${_GEH_PREFIX_NAME}${BASE_NAME_UPPER}_NO_DEPRECATED")
if(_GEH_UNPARSED_ARGUMENTS)
message(FATAL_ERROR "Unknown keywords given to GENERATE_EXPORT_HEADER(): \"${_GEH_UNPARSED_ARGUMENTS}\"")
endif()
if(_GEH_EXPORT_MACRO_NAME)
set(EXPORT_MACRO_NAME ${_GEH_PREFIX_NAME}${_GEH_EXPORT_MACRO_NAME})
endif()
string(MAKE_C_IDENTIFIER ${EXPORT_MACRO_NAME} EXPORT_MACRO_NAME)
if(_GEH_EXPORT_FILE_NAME)
if(IS_ABSOLUTE ${_GEH_EXPORT_FILE_NAME})
set(EXPORT_FILE_NAME ${_GEH_EXPORT_FILE_NAME})
else()
set(EXPORT_FILE_NAME "${CMAKE_CURRENT_BINARY_DIR}/${_GEH_EXPORT_FILE_NAME}")
endif()
endif()
if(_GEH_DEPRECATED_MACRO_NAME)
set(DEPRECATED_MACRO_NAME ${_GEH_PREFIX_NAME}${_GEH_DEPRECATED_MACRO_NAME})
endif()
string(MAKE_C_IDENTIFIER ${DEPRECATED_MACRO_NAME} DEPRECATED_MACRO_NAME)
if(_GEH_NO_EXPORT_MACRO_NAME)
set(NO_EXPORT_MACRO_NAME ${_GEH_PREFIX_NAME}${_GEH_NO_EXPORT_MACRO_NAME})
endif()
string(MAKE_C_IDENTIFIER ${NO_EXPORT_MACRO_NAME} NO_EXPORT_MACRO_NAME)
if(_GEH_STATIC_DEFINE)
set(STATIC_DEFINE ${_GEH_PREFIX_NAME}${_GEH_STATIC_DEFINE})
endif()
string(MAKE_C_IDENTIFIER ${STATIC_DEFINE} STATIC_DEFINE)
if(_GEH_DEFINE_NO_DEPRECATED)
set(DEFINE_NO_DEPRECATED 1)
else()
set(DEFINE_NO_DEPRECATED 0)
endif()
if(_GEH_NO_DEPRECATED_MACRO_NAME)
set(NO_DEPRECATED_MACRO_NAME
${_GEH_PREFIX_NAME}${_GEH_NO_DEPRECATED_MACRO_NAME})
endif()
string(MAKE_C_IDENTIFIER ${NO_DEPRECATED_MACRO_NAME} NO_DEPRECATED_MACRO_NAME)
set(INCLUDE_GUARD_NAME "${EXPORT_MACRO_NAME}_H")
get_target_property(EXPORT_IMPORT_CONDITION ${TARGET_LIBRARY} DEFINE_SYMBOL)
if(NOT EXPORT_IMPORT_CONDITION)
set(EXPORT_IMPORT_CONDITION ${TARGET_LIBRARY}_EXPORTS)
endif()
string(MAKE_C_IDENTIFIER ${EXPORT_IMPORT_CONDITION} EXPORT_IMPORT_CONDITION)
if(_GEH_CUSTOM_CONTENT_FROM_VARIABLE)
if(DEFINED "${_GEH_CUSTOM_CONTENT_FROM_VARIABLE}")
set(CUSTOM_CONTENT "${${_GEH_CUSTOM_CONTENT_FROM_VARIABLE}}")
else()
set(CUSTOM_CONTENT "")
endif()
endif()
configure_file("${_GENERATE_EXPORT_HEADER_MODULE_DIR}/exportheader.cmake.in"
"${EXPORT_FILE_NAME}" @ONLY)
endmacro()
function(GENERATE_EXPORT_HEADER TARGET_LIBRARY)
get_property(type TARGET ${TARGET_LIBRARY} PROPERTY TYPE)
if(NOT ${type} STREQUAL "STATIC_LIBRARY"
AND NOT ${type} STREQUAL "SHARED_LIBRARY"
AND NOT ${type} STREQUAL "OBJECT_LIBRARY"
AND NOT ${type} STREQUAL "MODULE_LIBRARY")
message(WARNING "This macro can only be used with libraries")
return()
endif()
_test_compiler_hidden_visibility()
_test_compiler_has_deprecated()
_do_set_macro_values(${TARGET_LIBRARY})
_do_generate_export_header(${TARGET_LIBRARY} ${ARGN})
endfunction()
function(add_compiler_export_flags)
if(NOT CMAKE_MINIMUM_REQUIRED_VERSION VERSION_LESS 2.8.12)
message(DEPRECATION "The add_compiler_export_flags function is obsolete. Use the CXX_VISIBILITY_PRESET and VISIBILITY_INLINES_HIDDEN target properties instead.")
endif()
_test_compiler_hidden_visibility()
_test_compiler_has_deprecated()
option(USE_COMPILER_HIDDEN_VISIBILITY
"Use HIDDEN visibility support if available." ON)
mark_as_advanced(USE_COMPILER_HIDDEN_VISIBILITY)
if(NOT (USE_COMPILER_HIDDEN_VISIBILITY AND COMPILER_HAS_HIDDEN_VISIBILITY))
# Just return if there are no flags to add.
return()
endif()
set (EXTRA_FLAGS "-fvisibility=hidden")
if(COMPILER_HAS_HIDDEN_INLINE_VISIBILITY)
set (EXTRA_FLAGS "${EXTRA_FLAGS} -fvisibility-inlines-hidden")
endif()
# Either return the extra flags needed in the supplied argument, or to the
# CMAKE_CXX_FLAGS if no argument is supplied.
if(ARGC GREATER 0)
set(${ARGV0} "${EXTRA_FLAGS}" PARENT_SCOPE)
else()
string(APPEND CMAKE_CXX_FLAGS " ${EXTRA_FLAGS}")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS}" PARENT_SCOPE)
endif()
endfunction()

View File

@ -1,42 +0,0 @@
#ifndef @INCLUDE_GUARD_NAME@
#define @INCLUDE_GUARD_NAME@
#ifdef @STATIC_DEFINE@
# define @EXPORT_MACRO_NAME@
# define @NO_EXPORT_MACRO_NAME@
#else
# ifndef @EXPORT_MACRO_NAME@
# ifdef @EXPORT_IMPORT_CONDITION@
/* We are building this library */
# define @EXPORT_MACRO_NAME@ @DEFINE_EXPORT@
# else
/* We are using this library */
# define @EXPORT_MACRO_NAME@ @DEFINE_IMPORT@
# endif
# endif
# ifndef @NO_EXPORT_MACRO_NAME@
# define @NO_EXPORT_MACRO_NAME@ @DEFINE_NO_EXPORT@
# endif
#endif
#ifndef @DEPRECATED_MACRO_NAME@
# define @DEPRECATED_MACRO_NAME@ @DEFINE_DEPRECATED@
#endif
#ifndef @DEPRECATED_MACRO_NAME@_EXPORT
# define @DEPRECATED_MACRO_NAME@_EXPORT @EXPORT_MACRO_NAME@ @DEPRECATED_MACRO_NAME@
#endif
#ifndef @DEPRECATED_MACRO_NAME@_NO_EXPORT
# define @DEPRECATED_MACRO_NAME@_NO_EXPORT @NO_EXPORT_MACRO_NAME@ @DEPRECATED_MACRO_NAME@
#endif
#if @DEFINE_NO_DEPRECATED@ /* DEFINE_NO_DEPRECATED */
# ifndef @NO_DEPRECATED_MACRO_NAME@
# define @NO_DEPRECATED_MACRO_NAME@
# endif
#endif
@CUSTOM_CONTENT@
#endif

View File

@ -6,7 +6,7 @@
#include <math.h>
#if defined(WIN32)
#if defined(_WIN32)
# include <windows.h>
# include <tchar.h>
#else

View File

@ -1,527 +0,0 @@
#include <stdlib.h>
#include <string.h>
#include "multi2.h"
#include "multi2_error_code.h"
/*+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
inline functions
++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++*/
static __inline uint8_t *load_be_uint32(uint32_t *dst, uint8_t *src)
{
*dst = ((src[0]<<24)|(src[1]<<16)|(src[2]<<8)|src[3]);
return src+4;
}
static __inline uint8_t *save_be_uint32(uint8_t *dst, uint32_t src)
{
dst[0] = (uint8_t)((src>>24) & 0xff);
dst[1] = (uint8_t)((src>>16) & 0xff);
dst[2] = (uint8_t)((src>> 8) & 0xff);
dst[3] = (uint8_t)( src & 0xff);
return dst+4;
}
static __inline uint32_t left_rotate_uint32(uint32_t val, uint32_t count)
{
return ((val << count) | (val >> (32-count)));
}
/*+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
inner structures
++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++*/
typedef struct {
uint32_t key[8];
} CORE_PARAM;
typedef struct {
uint32_t l;
uint32_t r;
} CORE_DATA;
typedef struct {
int32_t ref_count;
CORE_DATA cbc_init;
CORE_PARAM sys;
CORE_DATA scr[2]; /* 0: odd, 1: even */
CORE_PARAM wrk[2]; /* 0: odd, 1: even */
uint32_t round;
uint32_t state;
} MULTI2_PRIVATE_DATA;
/*+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
constant values
++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++*/
#define MULTI2_STATE_CBC_INIT_SET (0x0001)
#define MULTI2_STATE_SYSTEM_KEY_SET (0x0002)
#define MULTI2_STATE_SCRAMBLE_KEY_SET (0x0004)
/*+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
function prottypes (interface method)
++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++*/
static void release_multi2(void *m2);
static int add_ref_multi2(void *m2);
static int set_round_multi2(void *m2, int32_t val);
static int set_system_key_multi2(void *m2, uint8_t *val);
static int set_init_cbc_multi2(void *m2, uint8_t *val);
static int set_scramble_key_multi2(void *m2, uint8_t *val);
static int clear_scramble_key_multi2(void *m2);
static int encrypt_multi2(void *m2, int32_t type, uint8_t *buf, int32_t size);
static int decrypt_multi2(void *m2, int32_t type, uint8_t *buf, int32_t size);
/*+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
global function implementation
++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++*/
ARIB25_API_EXPORT MULTI2 *create_multi2()
{
int n;
MULTI2 *r;
MULTI2_PRIVATE_DATA *prv;
n = sizeof(MULTI2_PRIVATE_DATA);
n += sizeof(MULTI2);
prv = (MULTI2_PRIVATE_DATA *)calloc(1, n);
if(prv == NULL){
return NULL;
}
r = (MULTI2 *)(prv+1);
r->private_data = prv;
prv->ref_count = 1;
prv->round = 4;
r->release = release_multi2;
r->add_ref = add_ref_multi2;
r->set_round = set_round_multi2;
r->set_system_key = set_system_key_multi2;
r->set_init_cbc = set_init_cbc_multi2;
r->set_scramble_key = set_scramble_key_multi2;
r->clear_scramble_key = clear_scramble_key_multi2;
r->encrypt = encrypt_multi2;
r->decrypt = decrypt_multi2;
return r;
}
/*+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
function prottypes (private method)
++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++*/
static MULTI2_PRIVATE_DATA *private_data(void *m2);
static void core_schedule(CORE_PARAM *work, CORE_PARAM *skey, CORE_DATA *dkey);
static void core_encrypt(CORE_DATA *dst, CORE_DATA *src, CORE_PARAM *w, int32_t round);
static void core_decrypt(CORE_DATA *dst, CORE_DATA *src, CORE_PARAM *w, int32_t round);
static void core_pi1(CORE_DATA *dst, CORE_DATA *src);
static void core_pi2(CORE_DATA *dst, CORE_DATA *src, uint32_t a);
static void core_pi3(CORE_DATA *dst, CORE_DATA *src, uint32_t a, uint32_t b);
static void core_pi4(CORE_DATA *dst, CORE_DATA *src, uint32_t a);
/*+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
interface method implementation
++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++*/
static void release_multi2(void *m2)
{
MULTI2_PRIVATE_DATA *prv;
prv = private_data(m2);
if(prv == NULL){
/* do nothing */
return;
}
prv->ref_count -= 1;
if(prv->ref_count == 0){
free(prv);
}
}
static int add_ref_multi2(void *m2)
{
MULTI2_PRIVATE_DATA *prv;
prv = private_data(m2);
if(prv == NULL){
return MULTI2_ERROR_INVALID_PARAMETER;
}
prv->ref_count += 1;
return 0;
}
static int set_round_multi2(void *m2, int32_t val)
{
MULTI2_PRIVATE_DATA *prv;
prv = private_data(m2);
if(prv == NULL){
/* do nothing */
return MULTI2_ERROR_INVALID_PARAMETER;
}
prv->round = val;
return 0;
}
static int set_system_key_multi2(void *m2, uint8_t *val)
{
int i;
uint8_t *p;
MULTI2_PRIVATE_DATA *prv;
prv = private_data(m2);
if( (prv == NULL) || (val == NULL) ){
return MULTI2_ERROR_INVALID_PARAMETER;
}
p = val;
for(i=0;i<8;i++){
p = load_be_uint32(prv->sys.key+i, p);
}
prv->state |= MULTI2_STATE_SYSTEM_KEY_SET;
return 0;
}
static int set_init_cbc_multi2(void *m2, uint8_t *val)
{
uint8_t *p;
MULTI2_PRIVATE_DATA *prv;
prv = private_data(m2);
if( (prv == NULL) || (val == NULL) ){
return MULTI2_ERROR_INVALID_PARAMETER;
}
p = val;
p = load_be_uint32(&(prv->cbc_init.l), p);
p = load_be_uint32(&(prv->cbc_init.r), p);
prv->state |= MULTI2_STATE_CBC_INIT_SET;
return 0;
}
static int set_scramble_key_multi2(void *m2, uint8_t *val)
{
uint8_t *p;
MULTI2_PRIVATE_DATA *prv;
prv = private_data(m2);
if( (prv == NULL) || (val == NULL) ){
return MULTI2_ERROR_INVALID_PARAMETER;
}
p = val;
p = load_be_uint32(&(prv->scr[0].l), p);
p = load_be_uint32(&(prv->scr[0].r), p);
p = load_be_uint32(&(prv->scr[1].l), p);
p = load_be_uint32(&(prv->scr[1].r), p);
core_schedule(prv->wrk+0, &(prv->sys), prv->scr+0);
core_schedule(prv->wrk+1, &(prv->sys), prv->scr+1);
prv->state |= MULTI2_STATE_SCRAMBLE_KEY_SET;
return 0;
}
static int clear_scramble_key_multi2(void *m2)
{
MULTI2_PRIVATE_DATA *prv;
prv = private_data(m2);
if(prv == NULL){
return MULTI2_ERROR_INVALID_PARAMETER;
}
memset(prv->scr, 0, sizeof(prv->scr));
memset(prv->wrk, 0, sizeof(prv->wrk));
prv->state &= (~MULTI2_STATE_SCRAMBLE_KEY_SET);
return 0;
}
static int encrypt_multi2(void *m2, int32_t type, uint8_t *buf, int32_t size)
{
CORE_DATA src,dst;
CORE_PARAM *prm;
uint8_t *p;
MULTI2_PRIVATE_DATA *prv;
prv = private_data(m2);
if( (prv == NULL) || (buf == NULL) || (size < 1) ){
return MULTI2_ERROR_INVALID_PARAMETER;
}
if(prv->state != (MULTI2_STATE_CBC_INIT_SET|MULTI2_STATE_SYSTEM_KEY_SET|MULTI2_STATE_SCRAMBLE_KEY_SET)){
if( (prv->state & MULTI2_STATE_CBC_INIT_SET) == 0 ){
return MULTI2_ERROR_UNSET_CBC_INIT;
}
if( (prv->state & MULTI2_STATE_SYSTEM_KEY_SET) == 0 ){
return MULTI2_ERROR_UNSET_SYSTEM_KEY;
}
if( (prv->state & MULTI2_STATE_SCRAMBLE_KEY_SET) == 0 ){
return MULTI2_ERROR_UNSET_SCRAMBLE_KEY;
}
}
if(type == 0x02){
prm = prv->wrk+1;
}else{
prm = prv->wrk+0;
}
dst.l = prv->cbc_init.l;
dst.r = prv->cbc_init.r;
p = buf;
while(size >= 8){
load_be_uint32(&(src.l), p+0);
load_be_uint32(&(src.r), p+4);
src.l = src.l ^ dst.l;
src.r = src.r ^ dst.r;
core_encrypt(&dst, &src, prm, prv->round);
p = save_be_uint32(p, dst.l);
p = save_be_uint32(p, dst.r);
size -= 8;
}
if(size > 0){
int i;
uint8_t tmp[8];
src.l = dst.l;
src.r = dst.r;
core_encrypt(&dst, &src, prm, prv->round);
save_be_uint32(tmp+0, dst.l);
save_be_uint32(tmp+4, dst.r);
for(i=0;i<size;i++){
p[i] = (uint8_t)(p[i] ^ tmp[i]);
}
}
return 0;
}
static int decrypt_multi2(void *m2, int32_t type, uint8_t *buf, int32_t size)
{
CORE_DATA src,dst,cbc;
CORE_PARAM *prm;
uint8_t *p;
MULTI2_PRIVATE_DATA *prv;
prv = private_data(m2);
if( (prv == NULL) || (buf == NULL) || (size < 1) ){
return MULTI2_ERROR_INVALID_PARAMETER;
}
if(prv->state != (MULTI2_STATE_CBC_INIT_SET|MULTI2_STATE_SYSTEM_KEY_SET|MULTI2_STATE_SCRAMBLE_KEY_SET)){
if( (prv->state & MULTI2_STATE_CBC_INIT_SET) == 0 ){
return MULTI2_ERROR_UNSET_CBC_INIT;
}
if( (prv->state & MULTI2_STATE_SYSTEM_KEY_SET) == 0 ){
return MULTI2_ERROR_UNSET_SYSTEM_KEY;
}
if( (prv->state & MULTI2_STATE_SCRAMBLE_KEY_SET) == 0 ){
return MULTI2_ERROR_UNSET_SCRAMBLE_KEY;
}
}
if(type == 0x02){
prm = prv->wrk+1;
}else{
prm = prv->wrk+0;
}
cbc.l = prv->cbc_init.l;
cbc.r = prv->cbc_init.r;
p = buf;
while(size >= 8){
load_be_uint32(&(src.l), p+0);
load_be_uint32(&(src.r), p+4);
core_decrypt(&dst, &src, prm, prv->round);
dst.l = dst.l ^ cbc.l;
dst.r = dst.r ^ cbc.r;
cbc.l = src.l;
cbc.r = src.r;
p = save_be_uint32(p, dst.l);
p = save_be_uint32(p, dst.r);
size -= 8;
}
if(size > 0){
int i;
uint8_t tmp[8];
core_encrypt(&dst, &cbc, prm, prv->round);
save_be_uint32(tmp+0, dst.l);
save_be_uint32(tmp+4, dst.r);
for(i=0;i<size;i++){
p[i] = (uint8_t)(p[i] ^ tmp[i]);
}
}
return 0;
}
/*+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
private method implementation
++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++*/
static MULTI2_PRIVATE_DATA *private_data(void *m2)
{
MULTI2_PRIVATE_DATA *r;
MULTI2 *p;
p = (MULTI2 *)m2;
if(p == NULL){
return NULL;
}
r = (MULTI2_PRIVATE_DATA *)(p->private_data);
if( ((void *)(r+1)) != ((void *)p) ){
return NULL;
}
return r;
}
static void core_schedule(CORE_PARAM *work, CORE_PARAM *skey, CORE_DATA *dkey)
{
CORE_DATA b1,b2,b3,b4,b5,b6,b7,b8,b9;
core_pi1(&b1, dkey);
core_pi2(&b2, &b1, skey->key[0]);
work->key[0] = b2.l;
core_pi3(&b3, &b2, skey->key[1], skey->key[2]);
work->key[1] = b3.r;
core_pi4(&b4, &b3, skey->key[3]);
work->key[2] = b4.l;
core_pi1(&b5, &b4);
work->key[3] = b5.r;
core_pi2(&b6, &b5, skey->key[4]);
work->key[4] = b6.l;
core_pi3(&b7, &b6, skey->key[5], skey->key[6]);
work->key[5] = b7.r;
core_pi4(&b8, &b7, skey->key[7]);
work->key[6] = b8.l;
core_pi1(&b9, &b8);
work->key[7] = b9.r;
}
static void core_encrypt(CORE_DATA *dst, CORE_DATA *src, CORE_PARAM *w, int32_t round)
{
int32_t i;
CORE_DATA tmp;
dst->l = src->l;
dst->r = src->r;
for(i=0;i<round;i++){
core_pi1(&tmp, dst);
core_pi2( dst, &tmp, w->key[0]);
core_pi3(&tmp, dst, w->key[1], w->key[2]);
core_pi4( dst, &tmp, w->key[3]);
core_pi1(&tmp, dst);
core_pi2( dst, &tmp, w->key[4]);
core_pi3(&tmp, dst, w->key[5], w->key[6]);
core_pi4( dst, &tmp, w->key[7]);
}
}
static void core_decrypt(CORE_DATA *dst, CORE_DATA *src, CORE_PARAM *w, int32_t round)
{
int32_t i;
CORE_DATA tmp;
dst->l = src->l;
dst->r = src->r;
for(i=0;i<round;i++){
core_pi4(&tmp, dst, w->key[7]);
core_pi3( dst, &tmp, w->key[5], w->key[6]);
core_pi2(&tmp, dst, w->key[4]);
core_pi1( dst, &tmp);
core_pi4(&tmp, dst, w->key[3]);
core_pi3( dst, &tmp, w->key[1], w->key[2]);
core_pi2(&tmp, dst, w->key[0]);
core_pi1( dst, &tmp);
}
}
static void core_pi1(CORE_DATA *dst, CORE_DATA *src)
{
dst->l = src->l;
dst->r = src->r ^ src->l;
}
static void core_pi2(CORE_DATA *dst, CORE_DATA *src, uint32_t a)
{
uint32_t t0,t1,t2;
t0 = src->r + a;
t1 = left_rotate_uint32(t0, 1) + t0 - 1;
t2 = left_rotate_uint32(t1, 4) ^ t1;
dst->l = src->l ^ t2;
dst->r = src->r;
}
static void core_pi3(CORE_DATA *dst, CORE_DATA *src, uint32_t a, uint32_t b)
{
uint32_t t0,t1,t2,t3,t4,t5;
t0 = src->l + a;
t1 = left_rotate_uint32(t0, 2) + t0 + 1;
t2 = left_rotate_uint32(t1, 8) ^ t1;
t3 = t2 + b;
t4 = left_rotate_uint32(t3, 1) - t3;
t5 = left_rotate_uint32(t4, 16) ^ (t4 | src->l);
dst->l = src->l;
dst->r = src->r ^ t5;
}
static void core_pi4(CORE_DATA *dst, CORE_DATA *src, uint32_t a)
{
uint32_t t0,t1;
t0 = src->r + a;
t1 = left_rotate_uint32(t0, 2) + t0 + 1;
dst->l = src->l ^ t1;
dst->r = src->r;
}

271
src/multi2.cc Normal file
View File

@ -0,0 +1,271 @@
#include <cstddef>
#include <new>
#include "multi2.h"
#include "multi2_error_code.h"
#include "portable.h"
#include "multi2_compat.h"
#include "multi2_cipher.h"
namespace multi2 {
struct multi2 : public MULTI2 {
uint32_t ref_count;
uint32_t round;
optional<system_key_type> system_key;
optional<iv_type> iv;
array<optional<data_key_type>, 2> data_key;
array<optional<work_key_type>, 2> work_key;
inline void set_system_key(uint8_t *p) {
system_key_type s;
for (size_t i = 0; i < s.size(); ++i) {
s[i] = load_be(p + i * 4);
}
system_key = s;
}
inline void set_iv(uint8_t *p) {
iv_type v;
v[0] = load_be(p);
v[1] = load_be(p + 4);
iv = v;
}
inline void set_work_keys(uint8_t *p) {
array<data_key_type, 2> k;
k[0][0] = load_be(p);
k[0][1] = load_be(p + 4);
k[1][0] = load_be(p + 8);
k[1][1] = load_be(p + 12);
for (int i = 0; i < 2; ++i) {
if (!data_key[i] || *data_key[i] != k[i]) {
data_key[i] = k[i];
work_key[i].reset();
}
}
}
inline void clear_work_keys() {
for (int i = 0; i < 2; ++i) {
data_key[i].reset();
work_key[i].reset();
}
}
inline int encrypt(int32_t type, uint8_t *b, size_t n) {
int i = (type == 0x02);
if (!iv) {
return MULTI2_ERROR_UNSET_CBC_INIT;
}
if (!work_key[i]) {
if (!system_key) {
return MULTI2_ERROR_UNSET_SYSTEM_KEY;
}
if (!data_key[i]) {
return MULTI2_ERROR_UNSET_SCRAMBLE_KEY;
}
work_key[i] = schedule(*data_key[i], *system_key);
}
encrypt_cbc_ofb(b, n, *iv, *work_key[i], round);
return 0;
}
inline int decrypt(int32_t type, uint8_t *b, size_t n) {
int i = (type == 0x02);
if (!iv) {
return MULTI2_ERROR_UNSET_CBC_INIT;
}
if (!work_key[i]) {
if (!system_key) {
return MULTI2_ERROR_UNSET_SYSTEM_KEY;
}
if (!data_key[i]) {
return MULTI2_ERROR_UNSET_SCRAMBLE_KEY;
}
work_key[i] = schedule(*data_key[i], *system_key);
}
decrypt_cbc_ofb(b, n, *iv, *work_key[i], round);
return 0;
}
};
}
/*+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
function prottypes (interface method)
++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++*/
static void release_multi2(void *m2);
static int add_ref_multi2(void *m2);
static int set_round_multi2(void *m2, int32_t val);
static int set_system_key_multi2(void *m2, uint8_t *val);
static int set_init_cbc_multi2(void *m2, uint8_t *val);
static int set_scramble_key_multi2(void *m2, uint8_t *val);
static int clear_scramble_key_multi2(void *m2);
static int encrypt_multi2(void *m2, int32_t type, uint8_t *buf, int32_t size);
static int decrypt_multi2(void *m2, int32_t type, uint8_t *buf, int32_t size);
/*+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
global function implementation
++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++*/
ARIB25_API_EXPORT MULTI2 *create_multi2()
{
multi2::multi2 *m2;
try {
m2 = new multi2::multi2();
} catch (std::bad_alloc &e) {
return NULL;
}
m2->ref_count = 1;
m2->round = 4;
MULTI2 *r = static_cast<MULTI2 *>(m2);
r->private_data = m2;
r->release = release_multi2;
r->add_ref = add_ref_multi2;
r->set_round = set_round_multi2;
r->set_system_key = set_system_key_multi2;
r->set_init_cbc = set_init_cbc_multi2;
r->set_scramble_key = set_scramble_key_multi2;
r->clear_scramble_key = clear_scramble_key_multi2;
r->encrypt = encrypt_multi2;
r->decrypt = decrypt_multi2;
return r;
}
/*+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
function prottypes (private method)
++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++*/
static multi2::multi2 *private_data(void *m2);
/*+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
interface method implementation
++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++*/
static void release_multi2(void *m2)
{
multi2::multi2 *prv = private_data(m2);
if (!prv) {
return;
}
--prv->ref_count;
if (!prv->ref_count) {
delete prv;
}
}
static int add_ref_multi2(void *m2)
{
multi2::multi2 *prv = private_data(m2);
if (!prv) {
return MULTI2_ERROR_INVALID_PARAMETER;
}
++prv->ref_count;
return 0;
}
static int set_round_multi2(void *m2, int32_t val)
{
multi2::multi2 *prv = private_data(m2);
if (!prv) {
return MULTI2_ERROR_INVALID_PARAMETER;
}
prv->round = val;
return 0;
}
static int set_system_key_multi2(void *m2, uint8_t *val)
{
multi2::multi2 *prv = private_data(m2);
if (!prv || !val) {
return MULTI2_ERROR_INVALID_PARAMETER;
}
prv->set_system_key(val);
return 0;
}
static int set_init_cbc_multi2(void *m2, uint8_t *val)
{
multi2::multi2 *prv = private_data(m2);
if (!prv || !val) {
return MULTI2_ERROR_INVALID_PARAMETER;
}
prv->set_iv(val);
return 0;
}
static int set_scramble_key_multi2(void *m2, uint8_t *val)
{
multi2::multi2 *prv = private_data(m2);
if (!prv || !val) {
return MULTI2_ERROR_INVALID_PARAMETER;
}
prv->set_work_keys(val);
return 0;
}
static int clear_scramble_key_multi2(void *m2)
{
multi2::multi2 *prv = private_data(m2);
if (!prv) {
return MULTI2_ERROR_INVALID_PARAMETER;
}
prv->clear_work_keys();
return 0;
}
static int encrypt_multi2(void *m2, int32_t type, uint8_t *buf, int32_t size)
{
multi2::multi2 *prv = private_data(m2);
if (!prv || !buf || size < 1) {
return MULTI2_ERROR_INVALID_PARAMETER;
}
return prv->encrypt(type, buf, size);
}
static int decrypt_multi2(void *m2, int32_t type, uint8_t *buf, int32_t size)
{
multi2::multi2 *prv = private_data(m2);
if (!prv || !buf || size < 1) {
return MULTI2_ERROR_INVALID_PARAMETER;
}
return prv->decrypt(type, buf, size);
}
/*+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
private method implementation
++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++*/
static multi2::multi2 *private_data(void *m2)
{
if (!m2) {
return NULL;
}
MULTI2 *p = static_cast<MULTI2 *>(m2);
multi2::multi2 *r = static_cast<multi2::multi2 *>(p->private_data);
if (static_cast<MULTI2 *>(r) != p) {
return NULL;
}
return r;
}

81
src/multi2_block.h Normal file
View File

@ -0,0 +1,81 @@
#pragma once
#include <utility>
#include "portable.h"
namespace multi2 {
inline uint32_t load_be(const uint8_t *p) {
return (p[0] << 24) | (p[1] << 16) | (p[2] << 8) | p[3];
}
inline void store_be(uint8_t *p, uint32_t v) {
p[0] = (v >> 24) & 0xff;
p[1] = (v >> 16) & 0xff;
p[2] = (v >> 8) & 0xff;
p[3] = v & 0xff;
}
template<typename T>
struct block {
T left;
T right;
inline block() { }
inline block(const T &l, const T &r) : left(l), right(r) { }
void load(const uint8_t *p);
void store(uint8_t *p) const;
block<T> operator^(const block<T> &other) const;
std::pair<block<T>, block<uint32_t> > cbc_post_decrypt(const block<T> &ciphertext, const block<uint32_t> &state) const;
};
template<typename T>
inline size_t block_size() {
return sizeof(T) * 2;
}
typedef block<uint32_t> cbc_state;;
template<>
inline void block<uint32_t>::load(const uint8_t *p) {
left = load_be(p);
right = load_be(p + 4);
}
template<>
inline void block<uint32_t>::store(uint8_t *p) const {
store_be(p, left);
store_be(p + 4, right);
}
template<>
inline block<uint32_t> block<uint32_t>::operator^(const block<uint32_t> &other) const {
return block<uint32_t>(left ^ other.left, right ^ other.right);
}
template<>
inline std::pair<block<uint32_t>, cbc_state> block<uint32_t>::cbc_post_decrypt(const block<uint32_t> &c, const cbc_state &state) const {
block<uint32_t> p = *this ^ state;
return std::make_pair(p, c);
}
template<size_t N, typename T>
inline T rot(const T &v) {
return (v << N) | (v >> (32 - N));
}
template<typename T>
inline T rot1_sub(const T &v) {
return v + (v >> 31);
}
template<typename T>
inline T rot1_add_dec(const T &v) {
return rot<1>(v) + v - T(1);
}
}

217
src/multi2_cipher.h Normal file
View File

@ -0,0 +1,217 @@
#pragma once
#include <cstring>
#include "portable.h"
#include "multi2_block.h"
#include "multi2_ymm2.h"
#include "multi2_ymm.h"
#include "multi2_xmm.h"
#if defined(__GNUC__)
# define MULTI2_ALWAYS_INLINE __attribute__((always_inline))
# define MULTI2_LIKELY(x) __builtin_expect(!!(x), 1)
#else
# define MULTI2_ALWAYS_INLINE
# define MULTI2_LIKELY(x) (x)
#endif
namespace multi2 {
typedef array<uint32_t, 8> system_key_type;
typedef array<uint32_t, 2> iv_type;
typedef array<uint32_t, 2> data_key_type;
typedef array<uint32_t, 8> work_key_type;
template<typename T>
struct pi {
typedef block<T> block_type;
static inline block_type pi1(const block_type &p) {
return block_type(p.left, p.right ^ p.left);
}
static inline block_type pi2(const block_type &p, uint32_t k1) {
T x = p.right;
T y = x + T(k1);
T z = rot1_add_dec(y);
return block_type(p.left ^ rot<4>(z) ^ z, p.right);
}
static inline block_type pi3(const block_type &p, uint32_t k2, uint32_t k3) {
T x = p.left;
T y = x + T(k2);
T z = rot<2>(y) + y + T(1);
T a = rot<8>(z) ^ z;
T b = a + T(k3);
T c = rot1_sub(b);
return block_type(p.left, p.right ^ rot<16>(c) ^ (c | x));
}
static inline block_type pi4(const block_type &p, uint32_t k4) {
T x = p.right;
T y = x + T(k4);
return block_type(p.left ^ (rot<2>(y) + y + T(1)), p.right);
}
};
template<typename T>
struct cipher {
typedef block<T> block_type;
typedef pi<T> p;
static inline block_type encrypt(const block_type &b, const work_key_type &wk, int n) {
block_type t = b;
for (int i = 0; i < n; ++i) {
t = p::pi1(t);
t = p::pi2(t, wk[0]);
t = p::pi3(t, wk[1], wk[2]);
t = p::pi4(t, wk[3]);
t = p::pi1(t);
t = p::pi2(t, wk[4]);
t = p::pi3(t, wk[5], wk[6]);
t = p::pi4(t, wk[7]);
}
return t;
}
static inline block_type decrypt(const block_type &b, const work_key_type &wk, int n) {
block_type t = b;
for (int i = 0; i < n; ++i) {
t = p::pi4(t, wk[7]);
t = p::pi3(t, wk[5], wk[6]);
t = p::pi2(t, wk[4]);
t = p::pi1(t);
t = p::pi4(t, wk[3]);
t = p::pi3(t, wk[1], wk[2]);
t = p::pi2(t, wk[0]);
t = p::pi1(t);
}
return t;
}
};
inline work_key_type schedule(const data_key_type &dk, const system_key_type &sk) {
typedef pi<uint32_t> p;
block<uint32_t> a0 = p::pi1(block<uint32_t>(dk[0], dk[1]));
block<uint32_t> a1 = p::pi2(a0, sk[0]);
block<uint32_t> a2 = p::pi3(a1, sk[1], sk[2]);
block<uint32_t> a3 = p::pi4(a2, sk[3]);
block<uint32_t> a4 = p::pi1(a3);
block<uint32_t> a5 = p::pi2(a4, sk[4]);
block<uint32_t> a6 = p::pi3(a5, sk[5], sk[6]);
block<uint32_t> a7 = p::pi4(a6, sk[7]);
block<uint32_t> a8 = p::pi1(a7);
work_key_type w;
w[0] = a1.left;
w[1] = a2.right;
w[2] = a3.left;
w[3] = a4.right;
w[4] = a5.left;
w[5] = a6.right;
w[6] = a7.left;
w[7] = a8.right;
return w;
}
inline void encrypt_cbc_ofb(uint8_t *buf, size_t n, const iv_type &iv, const work_key_type &key, int round) {
cbc_state state(iv[0], iv[1]);
while (block_size<uint32_t>() <= n) {
block<uint32_t> p;
p.load(buf);
block<uint32_t> c = cipher<uint32_t>::encrypt(p ^ state, key, round);
c.store(buf);
state = c;
buf += block_size<uint32_t>();
n -= block_size<uint32_t>();
}
if (0 < n) {
array<uint8_t, 8> t;
memcpy(&t[0], buf, n);
memset(&t[n], 0, 8 - n);
block<uint32_t> p;
p.load(&t[0]);
block<uint32_t> c = p ^ cipher<uint32_t>::encrypt(state, key, round);
c.store(&t[0]);
memcpy(buf, &t[0], n);
}
}
template<typename T>
MULTI2_ALWAYS_INLINE
static inline void decrypt_block(uint8_t *&buf, size_t &n, cbc_state &state, const work_key_type &key, int round) {
block<T> c;
c.load(buf);
block<T> d = cipher<T>::decrypt(c, key, round);
std::pair<block<T>, cbc_state> ps = d.cbc_post_decrypt(c, state);
ps.first.store(buf);
state = ps.second;
buf += block_size<T>();
n -= block_size<T>();
}
inline void decrypt_cbc_ofb(uint8_t *buf, size_t n, const iv_type &iv, const work_key_type &key, int round) {
cbc_state state(iv[0], iv[1]);
#if defined(__AVX2__)
if (MULTI2_LIKELY(n == 184)) {
decrypt_block<x86::ymm2>(buf, n, state, key, round);
decrypt_block<x86::ymm>(buf, n, state, key, round);
return;
}
if (block_size<x86::ymm2>() <= n) {
decrypt_block<x86::ymm2>(buf, n, state, key, round);
}
if (block_size<x86::ymm>() <= n) {
decrypt_block<x86::ymm>(buf, n, state, key, round);
}
#if defined(__SSE2__)
if (block_size<x86::xmm>() <= n) {
decrypt_block<x86::xmm>(buf, n, state, key, round);
}
#endif
#elif defined(__SSE2__)
while (block_size<x86::xmm>() <= n) {
decrypt_block<x86::xmm>(buf, n, state, key, round);
}
#endif
while (block_size<uint32_t>() <= n) {
decrypt_block<uint32_t>(buf, n, state, key, round);
}
if (0 < n) {
array<uint8_t, 8> t;
memcpy(&t[0], buf, n);
memset(&t[n], 0, 8 - n);
block<uint32_t> c;
c.load(&t[0]);
block<uint32_t> p = c ^ cipher<uint32_t>::encrypt(state, key, round);
p.store(&t[0]);
memcpy(buf, &t[0], n);
}
}
}
#undef MULTI2_ALWAYS_INLINE
#undef MULTI2_LIKELY

42
src/multi2_compat.h Normal file
View File

@ -0,0 +1,42 @@
#pragma once
#include <algorithm>
namespace multi2 {
template<typename T, size_t N>
class array {
T v[N];
inline const T *begin() const { return &v[0]; }
inline const T *end() const { return &v[N]; }
public:
inline size_t size() { return N; }
inline bool operator!=(const array &other) const {
return !std::equal(begin(), end(), other.begin());
}
inline T &operator[](size_t n) { return v[n]; }
inline const T &operator[](size_t n) const { return v[n]; }
};
template<typename T>
class optional {
T v;
bool has;
public:
inline optional() : has(false) { }
inline optional &operator=(const T &other) {
v = other;
has = true;
return *this;
}
inline void reset() { has = false; }
inline operator bool() const { return has; }
inline T &operator*() { return v; }
};
}

204
src/multi2_xmm.h Normal file
View File

@ -0,0 +1,204 @@
#pragma once
#if defined(__SSE2__)
#include <utility>
#if defined(_WIN32)
# include <intrin.h>
#else
# include <x86intrin.h>
#endif
#include "portable.h"
#include "multi2_block.h"
namespace multi2 {
namespace x86 {
class xmm {
private:
__m128i v;
public:
inline xmm() { v = _mm_undefined_si128(); }
inline xmm(uint32_t n) { v = _mm_set1_epi32(n); }
inline xmm(const __m128i &r) { v = r; }
inline xmm &operator=(const xmm &other) {
v = other.v;
return *this;
}
inline xmm operator+(const xmm &other) const { return _mm_add_epi32(v, other.v); }
inline xmm operator-(const xmm &other) const { return _mm_sub_epi32(v, other.v); }
inline xmm operator^(const xmm &other) const { return _mm_xor_si128(v, other.v); }
inline xmm operator|(const xmm &other) const { return _mm_or_si128(v, other.v); }
inline xmm operator<<(int n) const { return _mm_slli_epi32(v, n); }
inline xmm operator>>(int n) const { return _mm_srli_epi32(v, n); }
inline const __m128i &value() const { return v; }
};
}
#if defined(__SSSE3__)
template<>
inline void block<x86::xmm>::load(const uint8_t *p) {
const __m128i *q = reinterpret_cast<const __m128i *>(p);
__m128i a0 = _mm_loadu_si128(q); // 3 2 1 0 - DCBA
__m128i a1 = _mm_loadu_si128(q + 1); // 7 6 5 4 - DCBA
__m128i s = _mm_set_epi8(12, 13, 14, 15, 4, 5, 6, 7, 8, 9, 10, 11, 0, 1, 2, 3);
__m128i b0 = _mm_shuffle_epi8(a0, s); // 3 1 2 0 - ABCD
__m128i b1 = _mm_shuffle_epi8(a1, s); // 7 5 6 4 - ABCD
left = _mm_unpacklo_epi32(b0, b1); // 6 2 4 0 - ABCD
right = _mm_unpackhi_epi32(b0, b1); // 7 3 5 1 - ABCD
}
template<>
inline void block<x86::xmm>::store(uint8_t *p) const {
__m128i a0 = left.value(); // 6 2 4 0 - ABCD
__m128i a1 = right.value(); // 7 3 5 1 - ABCD
__m128i s = _mm_set_epi8(12, 13, 14, 15, 4, 5, 6, 7, 8, 9, 10, 11, 0, 1, 2, 3);
__m128i b0 = _mm_shuffle_epi8(a0, s); // 6 4 2 0 - DCBA
__m128i b1 = _mm_shuffle_epi8(a1, s); // 7 5 3 1 - DCBA
__m128i c0 = _mm_unpacklo_epi32(b0, b1); // 3 2 1 0 - DCBA
__m128i c1 = _mm_unpackhi_epi32(b0, b1); // 7 6 5 4 - DCBA
__m128i *q = reinterpret_cast<__m128i *>(p);
_mm_storeu_si128(q, c0);
_mm_storeu_si128(q + 1, c1);
}
template<>
inline x86::xmm rot<8, x86::xmm>(const x86::xmm &v) {
__m128i s = _mm_set_epi8(14, 13, 12, 15, 10, 9, 8, 11, 6, 5, 4, 7, 2, 1, 0, 3);
return _mm_shuffle_epi8(v.value(), s);
}
template<>
inline x86::xmm rot<16, x86::xmm>(const x86::xmm &v) {
__m128i s = _mm_set_epi8(13, 12, 15, 14, 9, 8, 11, 10, 5, 4, 7, 6, 1, 0, 3, 2);
return _mm_shuffle_epi8(v.value(), s);
}
#else /* __SSSE3__ */
template<>
inline void block<x86::xmm>::load(const uint8_t *p) {
const __m128i *q = reinterpret_cast<const __m128i *>(p);
__m128i a0 = _mm_loadu_si128(q); // 3 2 1 0
__m128i a1 = _mm_loadu_si128(q + 1); // 7 6 5 4
__m128i b0 = _mm_unpacklo_epi32(a0, a1); // 5 1 4 0
__m128i b1 = _mm_unpackhi_epi32(a0, a1); // 7 3 6 2
__m128i c0 = _mm_unpacklo_epi32(b0, b1); // 6 4 2 0 - DCBA
__m128i c1 = _mm_unpackhi_epi32(b0, b1); // 7 5 3 1 - DCBA
__m128i d0 = _mm_shufflehi_epi16(_mm_shufflelo_epi16(c0, _MM_SHUFFLE(2, 3, 0, 1)), _MM_SHUFFLE(2, 3, 0, 1)); // BADC
__m128i d1 = _mm_shufflehi_epi16(_mm_shufflelo_epi16(c1, _MM_SHUFFLE(2, 3, 0, 1)), _MM_SHUFFLE(2, 3, 0, 1)); // BADC
left = _mm_or_si128(_mm_srli_epi16(d0, 8), _mm_slli_epi16(d0, 8)); // ABCD
right = _mm_or_si128(_mm_srli_epi16(d1, 8), _mm_slli_epi16(d1, 8)); // ABCD
}
template<>
inline void block<x86::xmm>::store(uint8_t *p) const {
__m128i a0 = left.value(); // ABCD
__m128i a1 = right.value(); // ABCD
__m128i b0 = _mm_or_si128(_mm_srli_epi16(a0, 8), _mm_slli_epi16(a0, 8)); // BADC
__m128i b1 = _mm_or_si128(_mm_srli_epi16(a1, 8), _mm_slli_epi16(a1, 8)); // BADC
__m128i c0 = _mm_shufflehi_epi16(_mm_shufflelo_epi16(b0, _MM_SHUFFLE(2, 3, 0, 1)), _MM_SHUFFLE(2, 3, 0, 1)); // 6 4 2 0 - DCBA
__m128i c1 = _mm_shufflehi_epi16(_mm_shufflelo_epi16(b1, _MM_SHUFFLE(2, 3, 0, 1)), _MM_SHUFFLE(2, 3, 0, 1)); // 7 5 3 1 - DCBA
__m128i d0 = _mm_unpacklo_epi32(c0, c1); // 3 2 1 0
__m128i d1 = _mm_unpackhi_epi32(c0, c1); // 7 6 5 4
__m128i *q = reinterpret_cast<__m128i *>(p);
_mm_storeu_si128(q, d0);
_mm_storeu_si128(q + 1, d1);
}
#endif /* __SSSE3__ */
#if defined(__SSE4_1__)
template<>
inline std::pair<block<x86::xmm>, cbc_state> block<x86::xmm>::cbc_post_decrypt(const block<x86::xmm> &c, const cbc_state &state) const {
__m128i c0 = c.left.value(); // 3 1 2 0
__m128i c1 = c.right.value();
uint32_t s0 = _mm_extract_epi32(c0, 3); // 3
uint32_t s1 = _mm_extract_epi32(c1, 3);
__m128i b0 = _mm_shuffle_epi32(c0, _MM_SHUFFLE(1, 0, 2, 3)); // 2 0 1 3
__m128i b1 = _mm_shuffle_epi32(c1, _MM_SHUFFLE(1, 0, 2, 3));
__m128i x0 = _mm_insert_epi32(b0, state.left, 0); // 2 0 1 s
__m128i x1 = _mm_insert_epi32(b1, state.right, 0);
__m128i d0 = left.value(); // 3 1 2 0
__m128i d1 = right.value();
__m128i p0 = _mm_xor_si128(d0, x0);
__m128i p1 = _mm_xor_si128(d1, x1);
return std::make_pair(block<x86::xmm>(p0, p1), cbc_state(s0, s1));
}
#else /* __SSE4_1__ */
template<>
inline std::pair<block<x86::xmm>, cbc_state> block<x86::xmm>::cbc_post_decrypt(const block<x86::xmm> &c, const cbc_state &state) const {
__m128i c0 = c.left.value(); // 3 1 2 0 / 3 2 1 0
__m128i c1 = c.right.value();
#if defined(__SSSE3__)
int s = _MM_SHUFFLE(1, 0, 2, 3);
#else
int s = _MM_SHUFFLE(2, 1, 0, 3);
#endif
__m128i b0 = _mm_shuffle_epi32(c0, s); // 2 0 1 3 / 2 1 0 3
__m128i b1 = _mm_shuffle_epi32(c1, s);
uint32_t s0 = _mm_cvtsi128_si32(b0); // 3
uint32_t s1 = _mm_cvtsi128_si32(b1);
__m128i i0 = _mm_cvtsi32_si128(state.left); // - - - s
__m128i i1 = _mm_cvtsi32_si128(state.right);
__m128i x0 = _mm_castps_si128(_mm_move_ss(_mm_castsi128_ps(b0), _mm_castsi128_ps(i0))); // 2 0 1 s / 2 1 0 s
__m128i x1 = _mm_castps_si128(_mm_move_ss(_mm_castsi128_ps(b1), _mm_castsi128_ps(i1)));
__m128i d0 = left.value(); // 3 1 2 0 / 3 2 1 0
__m128i d1 = right.value();
__m128i p0 = _mm_xor_si128(d0, x0);
__m128i p1 = _mm_xor_si128(d1, x1);
return std::make_pair(block<x86::xmm>(p0, p1), cbc_state(s0, s1));
}
#endif /* __SSE4_1__ */
template<>
inline x86::xmm rot1_add_dec<x86::xmm>(const x86::xmm &v) {
__m128i d = _mm_cmpgt_epi32(v.value(), _mm_set1_epi32(-1));
return v + v + v + x86::xmm(d);
}
}
#endif /* __SSE2__ */

136
src/multi2_ymm.h Normal file
View File

@ -0,0 +1,136 @@
#pragma once
#if defined(__AVX2__)
#include <utility>
#if defined(_WIN32)
# include <intrin.h>
#else
# include <x86intrin.h>
#endif
#include "portable.h"
#include "multi2_block.h"
namespace multi2 {
namespace x86 {
class ymm {
private:
__m256i v;
public:
inline ymm() { v = _mm256_undefined_si256(); }
inline ymm(uint32_t n) { v = _mm256_set1_epi32(n); }
inline ymm(const __m256i &r) { v = r; }
inline ymm &operator=(const ymm &other) {
v = other.v;
return *this;
}
inline ymm operator+(const ymm &other) const { return _mm256_add_epi32(v, other.v); }
inline ymm operator-(const ymm &other) const { return _mm256_sub_epi32(v, other.v); }
inline ymm operator^(const ymm &other) const { return _mm256_xor_si256(v, other.v); }
inline ymm operator|(const ymm &other) const { return _mm256_or_si256(v, other.v); }
inline ymm operator<<(int n) const { return _mm256_slli_epi32(v, n); }
inline ymm operator>>(int n) const { return _mm256_srli_epi32(v, n); }
inline const __m256i &value() const { return v; }
};
}
template<>
inline void block<x86::ymm>::load(const uint8_t *p) {
const __m256i *q = reinterpret_cast<const __m256i *>(p);
__m256i a0 = _mm256_loadu_si256(q); // 7 6 5 4 3 2 1 0 - DCBA
__m256i a1 = _mm256_loadu_si256(q + 1); // f e d c b a 9 8 - DCBA
__m256i s = _mm256_set_epi8(
12, 13, 14, 15, 4, 5, 6, 7, 8, 9, 10, 11, 0, 1, 2, 3,
12, 13, 14, 15, 4, 5, 6, 7, 8, 9, 10, 11, 0, 1, 2, 3
);
__m256i b0 = _mm256_shuffle_epi8(a0, s); // 7 5 6 4 3 1 2 0 - ABCD
__m256i b1 = _mm256_shuffle_epi8(a1, s); // f d e c b 9 a 8 - ABCD
left = _mm256_unpacklo_epi32(b0, b1); // e 6 c 4 a 2 8 0 - ABCD
right = _mm256_unpackhi_epi32(b0, b1); // f 7 d 5 b 3 9 1 - ABCD
}
template<>
inline void block<x86::ymm>::store(uint8_t *p) const {
__m256i a0 = left.value(); // e 6 c 4 a 2 8 0 - ABCD
__m256i a1 = right.value(); // f 7 d 5 b 3 9 1 - ABCD
__m256i s = _mm256_set_epi8(
12, 13, 14, 15, 4, 5, 6, 7, 8, 9, 10, 11, 0, 1, 2, 3,
12, 13, 14, 15, 4, 5, 6, 7, 8, 9, 10, 11, 0, 1, 2, 3
);
__m256i b0 = _mm256_shuffle_epi8(a0, s); // e c 6 4 a 8 2 0 - DCBA
__m256i b1 = _mm256_shuffle_epi8(a1, s); // f d 7 5 b 9 3 1 - DCBA
__m256i c0 = _mm256_unpacklo_epi32(b0, b1); // 7 6 5 4 3 2 1 0 - DCBA
__m256i c1 = _mm256_unpackhi_epi32(b0, b1); // f e d c 7 6 5 4 - DCBA
__m256i *q = reinterpret_cast<__m256i *>(p);
_mm256_storeu_si256(q, c0);
_mm256_storeu_si256(q + 1, c1);
}
template<>
inline std::pair<block<x86::ymm>, cbc_state> block<x86::ymm>::cbc_post_decrypt(const block<x86::ymm> &c, const cbc_state &state) const {
__m256i c0 = c.left.value(); // 7 3 6 2 5 1 4 0
__m256i c1 = c.right.value();
uint32_t s0 = _mm256_extract_epi32(c0, 7); // 7
uint32_t s1 = _mm256_extract_epi32(c1, 7);
__m256i s = _mm256_set_epi32(5, 4, 3, 2, 1, 0, 6, 7);
__m256i a0 = _mm256_permutevar8x32_epi32(c0, s); // 6 2 5 1 4 0 3 7
__m256i a1 = _mm256_permutevar8x32_epi32(c1, s);
__m256i x0 = _mm256_insert_epi32(a0, state.left, 0); // 6 2 5 1 4 0 3 s
__m256i x1 = _mm256_insert_epi32(a1, state.right, 0);
__m256i d0 = left.value(); // 7 3 6 2 5 1 4 0
__m256i d1 = right.value();
__m256i p0 = _mm256_xor_si256(d0, x0);
__m256i p1 = _mm256_xor_si256(d1, x1);
return std::make_pair(block<x86::ymm>(p0, p1), cbc_state(s0, s1));
}
template<>
inline x86::ymm rot<8, x86::ymm>(const x86::ymm &v) {
__m256i s = _mm256_set_epi8(
14, 13, 12, 15, 10, 9, 8, 11, 6, 5, 4, 7, 2, 1, 0, 3,
14, 13, 12, 15, 10, 9, 8, 11, 6, 5, 4, 7, 2, 1, 0, 3
);
return _mm256_shuffle_epi8(v.value(), s);
}
template<>
inline x86::ymm rot<16, x86::ymm>(const x86::ymm &v) {
__m256i s = _mm256_set_epi8(
13, 12, 15, 14, 9, 8, 11, 10, 5, 4, 7, 6, 1, 0, 3, 2,
13, 12, 15, 14, 9, 8, 11, 10, 5, 4, 7, 6, 1, 0, 3, 2
);
return _mm256_shuffle_epi8(v.value(), s);
}
template<>
inline x86::ymm rot1_add_dec<x86::ymm>(const x86::ymm &v) {
__m256i d = _mm256_cmpgt_epi32(v.value(), _mm256_set1_epi32(-1));
return v + v + v + x86::ymm(d);
}
}
#endif /* __AVX2__ */

188
src/multi2_ymm2.h Normal file
View File

@ -0,0 +1,188 @@
#pragma once
#if defined(__AVX2__)
#include <utility>
#if defined(_WIN32)
# include <intrin.h>
#else
# include <x86intrin.h>
#endif
#include "portable.h"
#include "multi2_block.h"
namespace multi2 {
namespace x86 {
class ymm2 {
private:
__m256i v0;
__m256i v1;
public:
inline ymm2() { v0 = v1 = _mm256_undefined_si256(); }
inline ymm2(uint32_t n) { v0 = v1 = _mm256_set1_epi32(n); }
inline ymm2(const __m256i &r0, const __m256i &r1) {
v0 = r0;
v1 = r1;
}
inline ymm2 &operator=(const ymm2 &other) {
v0 = other.v0;
v1 = other.v1;
return *this;
}
inline ymm2 operator+(const ymm2 &other) const {
return x86::ymm2(_mm256_add_epi32(v0, other.v0), _mm256_add_epi32(v1, other.v1));
}
inline ymm2 operator-(const ymm2 &other) const {
return x86::ymm2(_mm256_sub_epi32(v0, other.v0), _mm256_sub_epi32(v1, other.v1));
}
inline ymm2 operator^(const ymm2 &other) const {
return x86::ymm2(_mm256_xor_si256(v0, other.v0), _mm256_xor_si256(v1, other.v1));
}
inline ymm2 operator|(const ymm2 &other) const {
return x86::ymm2(_mm256_or_si256(v0, other.v0), _mm256_or_si256(v1, other.v1));
}
inline ymm2 operator<<(int n) const {
return x86::ymm2(_mm256_slli_epi32(v0, n), _mm256_slli_epi32(v1, n));
}
inline ymm2 operator>>(int n) const {
return x86::ymm2(_mm256_srli_epi32(v0, n), _mm256_srli_epi32(v1, n));
}
inline const __m256i &value0() const { return v0; }
inline const __m256i &value1() const { return v1; }
};
}
template<>
inline size_t block_size<x86::ymm2>() {
return 120;
}
template<>
inline void block<x86::ymm2>::load(const uint8_t *p) {
const __m256i *q0 = reinterpret_cast<const __m256i *>(p);
const __m256i *q1 = reinterpret_cast<const __m256i *>(p + 56);
__m256i a0 = _mm256_loadu_si256(q0); // 07 06 05 04 03 02 01 00 - DCBA
__m256i a1 = _mm256_loadu_si256(q0 + 1); // 0f 0e 0d 0c 0b 0a 09 08 - DCBA
__m256i a2 = _mm256_loadu_si256(q1); // 15 14 13 12 11 10 0f 0e - DCBA
__m256i a3 = _mm256_loadu_si256(q1 + 1); // 1d 1c 1b 1a 19 18 17 16 - DCBA
__m256i s = _mm256_set_epi8(
12, 13, 14, 15, 4, 5, 6, 7, 8, 9, 10, 11, 0, 1, 2, 3,
12, 13, 14, 15, 4, 5, 6, 7, 8, 9, 10, 11, 0, 1, 2, 3
);
__m256i b0 = _mm256_shuffle_epi8(a0, s); // 07 05 06 04 03 01 02 00 - ABCD
__m256i b1 = _mm256_shuffle_epi8(a1, s); // 0f 0d 0e 0c 0b 09 0a 08 - ABCD
__m256i b2 = _mm256_shuffle_epi8(a2, s); // 15 13 14 12 11 0f 10 0e - ABCD
__m256i b3 = _mm256_shuffle_epi8(a3, s); // 1d 1b 1c 1a 19 17 18 16 - ABCD
__m256i c0 = _mm256_unpacklo_epi32(b0, b1); // 0e 06 0c 04 0a 02 08 00 - ABCD
__m256i c1 = _mm256_unpackhi_epi32(b0, b1); // 0f 07 0d 05 0b 03 09 01 - ABCD
__m256i c2 = _mm256_unpacklo_epi32(b2, b3); // 1c 14 1a 12 18 10 16 0e - ABCD
__m256i c3 = _mm256_unpackhi_epi32(b2, b3); // 1d 15 1b 13 19 11 17 0f - ABCD
left = x86::ymm2(c0, c2); // 0e 06 0c 04 0a 02 08 00 - 1c 14 1a 12 18 10 16 0e
right = x86::ymm2(c1, c3); // 0f 07 0d 05 0b 03 09 01 - 1d 15 1b 13 19 11 17 0f
}
template<>
inline void block<x86::ymm2>::store(uint8_t *p) const {
__m256i a0 = left.value0(); // 0e 06 0c 04 0a 02 08 00 - ABCD
__m256i a1 = right.value0(); // 0f 07 0d 05 0b 03 09 01 - ABCD
__m256i a2 = left.value1(); // 1c 14 1a 12 18 10 16 -- - ABCD
__m256i a3 = right.value1(); // 1d 15 1b 13 19 11 17 -- - ABCD
__m256i s = _mm256_set_epi8(
12, 13, 14, 15, 4, 5, 6, 7, 8, 9, 10, 11, 0, 1, 2, 3,
12, 13, 14, 15, 4, 5, 6, 7, 8, 9, 10, 11, 0, 1, 2, 3
);
__m256i b0 = _mm256_shuffle_epi8(a0, s); // 0e 0c 06 04 0a 08 02 00 - DCBA
__m256i b1 = _mm256_shuffle_epi8(a1, s); // 0f 0d 07 05 0b 09 03 01 - DCBA
__m256i b2 = _mm256_shuffle_epi8(a2, s); // 1c 1a 14 12 18 16 10 -- - DCBA
__m256i b3 = _mm256_shuffle_epi8(a3, s); // 1d 1b 15 13 19 17 11 -- - DCBA
__m256i c0 = _mm256_unpacklo_epi32(b0, b1); // 07 06 05 04 03 02 01 00 - DCBA
__m256i c1 = _mm256_unpackhi_epi32(b0, b1); // 0f 0e 0d 0c 07 06 05 04 - DCBA
__m256i c2 = _mm256_unpacklo_epi32(b2, b3); // 15 14 13 12 11 10 -- -- - DCBA
__m256i c3 = _mm256_unpackhi_epi32(b2, b3); // 1d 1c 1b 1a 19 18 17 16 - DCBA
__m256i *q0 = reinterpret_cast<__m256i *>(p);
__m256i *q1 = reinterpret_cast<__m256i *>(p + 56);
_mm256_storeu_si256(q0, c0);
_mm256_storeu_si256(q1, c2);
_mm256_storeu_si256(q0 + 1, c1);
_mm256_storeu_si256(q1 + 1, c3);
}
template<>
inline std::pair<block<x86::ymm2>, cbc_state> block<x86::ymm2>::cbc_post_decrypt(const block<x86::ymm2> &c, const cbc_state &state) const {
__m256i c0 = c.left.value0(); // 7 3 6 2 5 1 4 0
__m256i c1 = c.right.value0();
__m256i c2 = c.left.value1(); // e a d 9 c 8 b 7
__m256i c3 = c.right.value1();
uint32_t s2 = _mm256_extract_epi32(c2, 7); // e
uint32_t s3 = _mm256_extract_epi32(c3, 7);
__m256i s = _mm256_set_epi32(5, 4, 3, 2, 1, 0, 6, 7);
__m256i a0 = _mm256_permutevar8x32_epi32(c0, s); // 6 2 5 1 4 0 3 7
__m256i a1 = _mm256_permutevar8x32_epi32(c1, s);
__m256i a2 = _mm256_permutevar8x32_epi32(c2, s); // d 9 c 8 b 7 a e
__m256i a3 = _mm256_permutevar8x32_epi32(c3, s);
__m256i x0 = _mm256_insert_epi32(a0, state.left, 0); // 6 2 5 1 4 0 3 s
__m256i x1 = _mm256_insert_epi32(a1, state.right, 0);
__m256i x2 = a2; // d 9 c 8 b 7 a e
__m256i x3 = a3;
__m256i d0 = left.value0(); // 7 3 6 2 5 1 4 0
__m256i d1 = right.value0();
__m256i d2 = left.value1(); // e a d 9 c 8 b 7
__m256i d3 = right.value1();
__m256i p0 = _mm256_xor_si256(d0, x0);
__m256i p1 = _mm256_xor_si256(d1, x1);
__m256i p2 = _mm256_xor_si256(d2, x2);
__m256i p3 = _mm256_xor_si256(d3, x3);
return std::make_pair(block<x86::ymm2>(x86::ymm2(p0, p2), x86::ymm2(p1, p3)), cbc_state(s2, s3));
}
template<>
inline x86::ymm2 rot<8, x86::ymm2>(const x86::ymm2 &v) {
__m256i s = _mm256_set_epi8(
14, 13, 12, 15, 10, 9, 8, 11, 6, 5, 4, 7, 2, 1, 0, 3,
14, 13, 12, 15, 10, 9, 8, 11, 6, 5, 4, 7, 2, 1, 0, 3
);
return x86::ymm2(_mm256_shuffle_epi8(v.value0(), s), _mm256_shuffle_epi8(v.value1(), s));
}
template<>
inline x86::ymm2 rot<16, x86::ymm2>(const x86::ymm2 &v) {
__m256i s = _mm256_set_epi8(
13, 12, 15, 14, 9, 8, 11, 10, 5, 4, 7, 6, 1, 0, 3, 2,
13, 12, 15, 14, 9, 8, 11, 10, 5, 4, 7, 6, 1, 0, 3, 2
);
return x86::ymm2(_mm256_shuffle_epi8(v.value0(), s), _mm256_shuffle_epi8(v.value1(), s));
}
template<>
inline x86::ymm2 rot1_add_dec<x86::ymm2>(const x86::ymm2 &v) {
__m256i d0 = _mm256_cmpgt_epi32(v.value0(), _mm256_set1_epi32(-1));
__m256i d1 = _mm256_cmpgt_epi32(v.value1(), _mm256_set1_epi32(-1));
return v + v + v + x86::ymm2(d0, d1);
}
}
#endif /* __AVX2__ */

View File

@ -1,7 +1,7 @@
#ifndef PORTABLE_H
#define PORTABLE_H
#if (defined(WIN32) && defined(_MSC_VER) && _MSC_VER < 1800)
#if (defined(_WIN32) && defined(_MSC_VER) && _MSC_VER < 1800)
typedef unsigned char uint8_t;
typedef signed char int8_t;
@ -19,7 +19,7 @@ typedef signed __int64 int64_t;
#endif
#if !defined(WIN32)
#if !defined(_WIN32)
#define _open open
#define _close close
#define _read read

View File

@ -7,7 +7,7 @@
#include <sys/stat.h>
#include <sys/types.h>
#if defined(WIN32)
#if defined(_WIN32)
#include <io.h>
#include <windows.h>
#include <crtdbg.h>
@ -46,7 +46,7 @@ int _tmain(int argc, TCHAR **argv)
int n;
OPTION opt;
#if defined(WIN32)
#if defined(_WIN32)
_CrtSetReportMode( _CRT_WARN, _CRTDBG_MODE_FILE );
_CrtSetReportFile( _CRT_WARN, _CRTDBG_FILE_STDOUT );
_CrtSetReportMode( _CRT_ERROR, _CRTDBG_MODE_FILE );
@ -66,7 +66,7 @@ int _tmain(int argc, TCHAR **argv)
test_arib_std_b25(argv[n+0], argv[n+1], &opt);
}
#if defined(WIN32)
#if defined(_WIN32)
_CrtDumpMemoryLeaks();
#endif
@ -166,7 +166,7 @@ static void test_arib_std_b25(const TCHAR *src, const TCHAR *dst, OPTION *opt)
int64_t total;
int64_t offset;
#if defined(WIN32)
#if defined(_WIN32)
unsigned long tick,tock;
#else
struct timeval tick,tock;
@ -248,7 +248,7 @@ static void test_arib_std_b25(const TCHAR *src, const TCHAR *dst, OPTION *opt)
}
offset = 0;
#if defined(WIN32)
#if defined(_WIN32)
tock = GetTickCount();
#else
gettimeofday(&tock, NULL);
@ -281,7 +281,7 @@ static void test_arib_std_b25(const TCHAR *src, const TCHAR *dst, OPTION *opt)
if(opt->verbose != 0){
m = (int)(10000*offset/total);
mbps = 0.0;
#if defined(WIN32)
#if defined(_WIN32)
tick = GetTickCount();
if (tick-tock > 100) {
mbps = offset;
@ -324,7 +324,7 @@ static void test_arib_std_b25(const TCHAR *src, const TCHAR *dst, OPTION *opt)
if(opt->verbose != 0){
mbps = 0.0;
#if defined(WIN32)
#if defined(_WIN32)
tick = GetTickCount();
if (tick-tock > 100) {
mbps = offset;
@ -362,7 +362,7 @@ static void test_arib_std_b25(const TCHAR *src, const TCHAR *dst, OPTION *opt)
_ftprintf(stderr, _T(" channel: %d\n"), pgrm.program_number);
_ftprintf(stderr, _T(" unpurchased ECM count: %d\n"), pgrm.ecm_unpurchased_count);
_ftprintf(stderr, _T(" last ECM error code: %04x\n"), pgrm.last_ecm_error_code);
#if defined(WIN32)
#if defined(_WIN32)
_ftprintf(stderr, _T(" undecrypted TS packet: %I64d\n"), pgrm.undecrypted_packet_count);
_ftprintf(stderr, _T(" total TS packet: %I64d\n"), pgrm.total_packet_count);
#else