/*
* Copyright (c) 2018 by Intinor AB. All rights reserved.
*
* This file is part of "libRaptorQ".
*
* libRaptorQ is free software: you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as
* published by the Free Software Foundation, either version 3
* of the License, or (at your option) any later version.
*
* libRaptorQ is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* and a copy of the GNU Lesser General Public License
* along with libRaptorQ. If not, see .
*/
#pragma once
#include "RaptorQ/v1/util/CPU_Info.hpp"
#include
#include
namespace RaptorQ__v1 {
namespace Impl {
typedef void (*ADD_FPTR)(uint8_t *dest, uint8_t *src, int bytes);
typedef void (*DIV_FPTR)(uint8_t *data, uint8_t num, int bytes);
typedef void (*ADD_MUL_FPTR)(uint8_t *dest, uint8_t *src, uint8_t num, int bytes);
namespace Matrix_Row_SIMD {
// The SIMD versions are: dest[i] ^= (high[src[i]>>4] ^ low[src[i]&0xf]);
// high and low are 16 byte lookup tables (high four bits and low four bits)
// Note: Putting these outside the class as the gcc 4.9.4 linker
// LTO got into real trouble otherwise.
extern void add_avx2(uint8_t *dest, uint8_t *src, int bytes);
extern void div_avx2(uint8_t *data, uint8_t num, int bytes);
extern void multiply_and_add_avx2(uint8_t *data, uint8_t *src, uint8_t num, int bytes);
extern void add_ssse3(uint8_t *dest, uint8_t *src, int bytes);
extern void div_ssse3(uint8_t *data, uint8_t num, int bytes);
extern void multiply_and_add_ssse3(uint8_t *data, uint8_t *src, uint8_t num, int bytes);
} // namespace Matrix_Row_SIMD
class RAPTORQ_LOCAL Matrix_Row
{
public:
enum class SIMD : uint8_t {
NONE = 0x00,
AUTO = 0x01,
SSSE3 = 0x02,
AVX2 = 0x03,
};
static Matrix_Row & get_instance() {
static Matrix_Row matrix;
return matrix;
}
static void init_simd(SIMD type) {
Matrix_Row::get_instance()._init_simd(type);
}
static void row_multiply_add(uint8_t *dest, uint8_t *src, uint8_t scalar,
int bytes)
{
if (scalar == 1) {
Matrix_Row::get_instance().add_cb(dest, src, bytes);
} else {
Matrix_Row::get_instance().add_mul_cb(dest, src, scalar, bytes);
}
}
static void row_div(uint8_t *data, uint8_t scalar, int bytes)
{
if (scalar == 0) {
return;
}
Matrix_Row::get_instance().div_cb(data, scalar, bytes);
}
static void row_add(uint8_t *dest, uint8_t *src, int bytes)
{
Matrix_Row::get_instance().add_cb(dest, src, bytes);
}
private:
ADD_FPTR add_cb;
DIV_FPTR div_cb;
ADD_MUL_FPTR add_mul_cb;
Matrix_Row() {
_init_simd(SIMD::AUTO);
}
// Ordered from most wanted to least wanted
SIMD get_simd_type()
{
if (CPU_Info::has_avx2()) {
return SIMD::AVX2;
}
if (CPU_Info::has_ssse3()) {
return SIMD::SSSE3;
}
return SIMD::NONE;
}
bool has_simd_type(SIMD type)
{
bool return_value = false; //Better safe than sorry
switch( type )
{
case SIMD::SSSE3:
return_value = CPU_Info::has_ssse3();
break;
case SIMD::AVX2:
return_value = CPU_Info::has_avx2();
break;
default:
return_value = true;
break;
}
return return_value;
}
void _init_simd(SIMD type) {
if (type == SIMD::AUTO || !has_simd_type(type)) {
type = get_simd_type();
}
switch( type )
{
case SIMD::SSSE3:
add_cb = Matrix_Row_SIMD::add_ssse3;
div_cb = Matrix_Row_SIMD::div_ssse3;
add_mul_cb = Matrix_Row_SIMD::multiply_and_add_ssse3;
break;
case SIMD::AVX2:
add_cb = Matrix_Row_SIMD::add_avx2;
div_cb = Matrix_Row_SIMD::div_avx2;
add_mul_cb = Matrix_Row_SIMD::multiply_and_add_avx2;
break;
default:
add_cb = &add;
div_cb = ÷
add_mul_cb = &multiply_and_add;
break;
}
}
static void add(uint8_t *dest, uint8_t *src, int bytes)
{
for (int i = 0; i < bytes;i++) {
dest[i] = dest[i] ^ src[i];
}
}
static void div(uint8_t *data, uint8_t num, int bytes)
{
num = oct_log[num - 1];
for (int i = 0; i < bytes;i++) {
if (data[i] != 0) {
data[i] = oct_exp[oct_log[data[i] - 1] - num + 255];
}
}
}
static void multiply_and_add(uint8_t *dest, uint8_t *src, uint8_t num, int bytes)
{
if (num == 0) {
return;
}
// TODO: Probably faster with a single lookup based on num, a single
// lookup require an additional lookup table of size 256 * 256.
uint16_t log_num = oct_log_no_if[num];
for (int i = 0; i < bytes;i++) {
dest[i] = dest[i] ^ oct_exp_no_if[oct_log_no_if[src[i]] + log_num];
}
}
};
} // namespace Impl
} // namespace RaptorQ__v1