diff options
-rw-r--r-- | api/direct_bt/BasicTypes.hpp | 14 | ||||
-rw-r--r-- | api/direct_bt/dfa_utf8_decode.hpp (renamed from java/jni/direct_bt/DBTUtils.cxx) | 50 | ||||
-rw-r--r-- | examples/direct_bt_scanner00/dbt_scanner00.cpp | 4 | ||||
-rw-r--r-- | examples/direct_bt_scanner10/dbt_scanner10.cpp | 4 | ||||
-rw-r--r-- | java/jni/BluetoothUtils.cxx | 28 | ||||
-rw-r--r-- | java/jni/direct_bt/CMakeLists.txt | 1 | ||||
-rw-r--r-- | src/direct_bt/BasicTypes.cpp | 80 | ||||
-rw-r--r-- | src/direct_bt/CMakeLists.txt | 1 | ||||
-rw-r--r-- | src/direct_bt/dfa_utf8_decode.cpp | 99 | ||||
-rw-r--r-- | src/tinyb/CMakeLists.txt | 1 |
10 files changed, 158 insertions, 124 deletions
diff --git a/api/direct_bt/BasicTypes.hpp b/api/direct_bt/BasicTypes.hpp index e75b537b..ef492d3a 100644 --- a/api/direct_bt/BasicTypes.hpp +++ b/api/direct_bt/BasicTypes.hpp @@ -407,20 +407,6 @@ namespace direct_bt { /** trim copy */ std::string trimCopy(const std::string &s); - /** - * Returns all valid consecutive UTF-8 characters within buffer - * in the range up to buffer_size or until EOS. - * <p> - * In case a non UTF-8 character has been detected, - * the content will be cut off and the decoding loop ends. - * </p> - * <p> - * Method utilizes a finite state machine detecting variable length UTF-8 codes. - * See Bjoern Hoehrmann's site <http://bjoern.hoehrmann.de/utf-8/decoder/dfa/> for details. - * </p> - */ - std::string decodeUTF8String(const uint8_t *buffer, const size_t buffer_size); - } // namespace direct_bt #endif /* BASIC_TYPES_HPP_ */ diff --git a/java/jni/direct_bt/DBTUtils.cxx b/api/direct_bt/dfa_utf8_decode.hpp index 876cd4ff..ecd8ae54 100644 --- a/java/jni/direct_bt/DBTUtils.cxx +++ b/api/direct_bt/dfa_utf8_decode.hpp @@ -1,5 +1,6 @@ /* * Author: Sven Gothel <[email protected]> + * Copyright (c) 2008-2010 Bjoern Hoehrmann <[email protected]> (see details below) * Copyright (c) 2020 Gothel Software e.K. * Copyright (c) 2020 ZAFENA AB * @@ -23,35 +24,30 @@ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ -#include "org_tinyb_BluetoothUtils.h" +#ifndef DFA_UTF8_DECODE_HPP_ +#define DFA_UTF8_DECODE_HPP_ +#define DFA_UTF8_ACCEPT 0 +#define DFA_UTF8_REJECT 12 + +#include <string> #include <cstdint> #include <cinttypes> -#include <time.h> - -#include "helper_base.hpp" -#include "helper_dbt.hpp" - -jstring Java_org_tinyb_BluetoothUtils_decodeUTF8String(JNIEnv *env, jclass clazz, jbyteArray jbuffer, jint offset, jint size) { - (void)clazz; - - const int buffer_size = env->GetArrayLength(jbuffer); - if( 0 == buffer_size ) { - return env->NewStringUTF(""); - } - if( buffer_size < offset+size ) { - throw direct_bt::IllegalArgumentException("buffer.length "+std::to_string(buffer_size)+ - " < offset "+std::to_string(offset)+ - " + size "+std::to_string(size), E_FILE_LINE); - } - - JNICriticalArray<uint8_t> criticalArray(env); // RAII - release - uint8_t * buffer_ptr = criticalArray.get(jbuffer, criticalArray.Mode::NO_UPDATE_AND_RELEASE); - if( NULL == buffer_ptr ) { - throw direct_bt::InternalError("GetPrimitiveArrayCritical(byte array) is null", E_FILE_LINE); - } - std::string sres = direct_bt::decodeUTF8String(buffer_ptr+offset, size); +uint32_t dfa_utf8_decode(uint32_t & state, uint32_t & codep, const uint32_t byte_value); + +/** + * Returns all valid consecutive UTF-8 characters within buffer + * in the range up to buffer_size or until EOS. + * <p> + * In case a non UTF-8 character has been detected, + * the content will be cut off and the decoding loop ends. + * </p> + * <p> + * Method utilizes a finite state machine detecting variable length UTF-8 codes. + * See Bjoern Hoehrmann's site <http://bjoern.hoehrmann.de/utf-8/decoder/dfa/> for details. + * </p> + */ +std::string dfa_utf8_decode(const uint8_t *buffer, const size_t buffer_size); - return from_string_to_jstring(env, sres); -} +#endif /* DFA_UTF8_DECODE_HPP_ */ diff --git a/examples/direct_bt_scanner00/dbt_scanner00.cpp b/examples/direct_bt_scanner00/dbt_scanner00.cpp index b9dcc27d..617b52b8 100644 --- a/examples/direct_bt_scanner00/dbt_scanner00.cpp +++ b/examples/direct_bt_scanner00/dbt_scanner00.cpp @@ -26,6 +26,8 @@ #include <direct_bt/DirectBT.hpp> #include <cinttypes> +#include "direct_bt/dfa_utf8_decode.hpp" + extern "C" { #include <unistd.h> } @@ -271,7 +273,7 @@ int main(int argc, char *argv[]) if( serviceChar.hasProperties(GATTCharacteristic::PropertyBitVal::Read) ) { POctets value(GATTHandler::number(GATTHandler::Defaults::MAX_ATT_MTU), 0); if( serviceChar.readValue(value) ) { - std::string sval = decodeUTF8String(value.get_ptr(), value.getSize()); + std::string sval = dfa_utf8_decode(value.get_ptr(), value.getSize()); fprintf(stderr, " [%2.2d.%2.2d] Value: %s ('%s')\n", (int)i, (int)j, value.toString().c_str(), sval.c_str()); } } diff --git a/examples/direct_bt_scanner10/dbt_scanner10.cpp b/examples/direct_bt_scanner10/dbt_scanner10.cpp index 8a1f7087..bde839d8 100644 --- a/examples/direct_bt_scanner10/dbt_scanner10.cpp +++ b/examples/direct_bt_scanner10/dbt_scanner10.cpp @@ -26,6 +26,8 @@ #include <direct_bt/DirectBT.hpp> #include <cinttypes> +#include "direct_bt/dfa_utf8_decode.hpp" + extern "C" { #include <unistd.h> } @@ -299,7 +301,7 @@ static void processConnectedDevice(std::shared_ptr<DBTDevice> device) { if( serviceChar.hasProperties(GATTCharacteristic::PropertyBitVal::Read) ) { POctets value(GATTHandler::number(GATTHandler::Defaults::MAX_ATT_MTU), 0); if( serviceChar.readValue(value) ) { - std::string sval = decodeUTF8String(value.get_ptr(), value.getSize()); + std::string sval = dfa_utf8_decode(value.get_ptr(), value.getSize()); fprintf(stderr, " [%2.2d.%2.2d] Value: %s ('%s')\n", (int)i, (int)j, value.toString().c_str(), sval.c_str()); } } diff --git a/java/jni/BluetoothUtils.cxx b/java/jni/BluetoothUtils.cxx index 83e71dd0..b05ec66f 100644 --- a/java/jni/BluetoothUtils.cxx +++ b/java/jni/BluetoothUtils.cxx @@ -30,6 +30,11 @@ #include <time.h> +#include "JNIMem.hpp" +#include "helper_base.hpp" + +#include "direct_bt/dfa_utf8_decode.hpp" + static const int64_t NanoPerMilli = 1000000L; static const int64_t MilliPerOne = 1000L; @@ -51,3 +56,26 @@ jlong Java_org_tinyb_BluetoothUtils_getCurrentMilliseconds(JNIEnv *env, jclass c return (jlong)res; } +jstring Java_org_tinyb_BluetoothUtils_decodeUTF8String(JNIEnv *env, jclass clazz, jbyteArray jbuffer, jint offset, jint size) { + (void)clazz; + + const int buffer_size = env->GetArrayLength(jbuffer); + if( 0 == buffer_size ) { + return env->NewStringUTF(""); + } + if( buffer_size < offset+size ) { + std::string msg("buffer.length "+std::to_string(buffer_size)+ + " < offset "+std::to_string(offset)+ + " + size "+std::to_string(size)); + throw std::invalid_argument(msg.c_str()); + } + + JNICriticalArray<uint8_t> criticalArray(env); // RAII - release + uint8_t * buffer_ptr = criticalArray.get(jbuffer, criticalArray.Mode::NO_UPDATE_AND_RELEASE); + if( NULL == buffer_ptr ) { + throw std::invalid_argument("GetPrimitiveArrayCritical(byte array) is null"); + } + std::string sres = dfa_utf8_decode(buffer_ptr+offset, size); + + return from_string_to_jstring(env, sres); +} diff --git a/java/jni/direct_bt/CMakeLists.txt b/java/jni/direct_bt/CMakeLists.txt index 60eacd6c..9fd40026 100644 --- a/java/jni/direct_bt/CMakeLists.txt +++ b/java/jni/direct_bt/CMakeLists.txt @@ -33,7 +33,6 @@ set (direct_bt_JNI_SRCS ${PROJECT_SOURCE_DIR}/java/jni/direct_bt/DBTGattService.cxx ${PROJECT_SOURCE_DIR}/java/jni/direct_bt/DBTManager.cxx ${PROJECT_SOURCE_DIR}/java/jni/direct_bt/DBTObject.cxx - ${PROJECT_SOURCE_DIR}/java/jni/direct_bt/DBTUtils.cxx ) set (CMAKE_SHARED_LINKER_FLAGS "-Wl,--as-needed") diff --git a/src/direct_bt/BasicTypes.cpp b/src/direct_bt/BasicTypes.cpp index 47070b35..87a517d8 100644 --- a/src/direct_bt/BasicTypes.cpp +++ b/src/direct_bt/BasicTypes.cpp @@ -338,83 +338,3 @@ std::string direct_bt::trimCopy(const std::string &_s) { return s; } -/************************************************************************/ -/************************************************************************/ -/************************************************************************/ - -#define UTF8_ACCEPT 0 -#define UTF8_REJECT 12 - -static uint32_t dfa_utf8_decode(uint32_t & state, uint32_t & codep, const uint32_t byte_value); - -std::string direct_bt::decodeUTF8String(const uint8_t *buffer, const size_t buffer_size) { - uint32_t codepoint; - uint32_t state = UTF8_ACCEPT; - size_t byte_count; - const uint8_t *ibuffer = buffer; - - for( byte_count = 0; byte_count < buffer_size && *ibuffer; byte_count++ ) { - if ( UTF8_REJECT == dfa_utf8_decode(state, codepoint, *ibuffer++) ) { - break; // not a valid byte for a utf8 stream, end here! - } // else UTF8_ACCEPT -> valid_utf8_chars++ - } - if( 0 < byte_count ) { - return std::string( (const char*)buffer, byte_count ); - } - return std::string(); -} - - -/************************************************************************/ -/************************************************************************/ -/************************************************************************/ - -/** - * Copyright (c) 2008-2010 Bjoern Hoehrmann <[email protected]> - * See http://bjoern.hoehrmann.de/utf-8/decoder/dfa/ for details. - * - * Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, - * sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, - * subject to the following conditions: - * The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, - * INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. - * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, - * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE - * OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - */ - -static const uint8_t dfa_utf8d[] = { - // The first part of the table maps bytes to character classes that - // to reduce the size of the transition table and create bitmasks. - 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, 9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9, - 7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7, 7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7, - 8,8,2,2,2,2,2,2,2,2,2,2,2,2,2,2, 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, - 10,3,3,3,3,3,3,3,3,3,3,3,3,4,3,3, 11,6,6,6,5,8,8,8,8,8,8,8,8,8,8,8, - - // The second part is a transition table that maps a combination - // of a state of the automaton and a character class to a state. - 0,12,24,36,60,96,84,12,12,12,48,72, 12,12,12,12,12,12,12,12,12,12,12,12, - 12, 0,12,12,12,12,12, 0,12, 0,12,12, 12,24,12,12,12,12,12,24,12,24,12,12, - 12,12,12,12,12,12,12,24,12,12,12,12, 12,24,12,12,12,12,12,12,12,24,12,12, - 12,12,12,12,12,12,12,36,12,36,12,12, 12,36,12,12,12,12,12,36,12,36,12,12, - 12,36,12,12,12,12,12,12,12,12,12,12, -}; - -static uint32_t dfa_utf8_decode(uint32_t & state, uint32_t & codep, const uint32_t byte_value) { - const uint32_t type = dfa_utf8d[byte_value]; - - codep = (state != UTF8_ACCEPT) ? - (byte_value & 0x3fu) | (codep << 6) : - (0xff >> type) & (byte_value); - - state = dfa_utf8d[256 + state + type]; - return state; -} - diff --git a/src/direct_bt/CMakeLists.txt b/src/direct_bt/CMakeLists.txt index f16905c9..dc8e483b 100644 --- a/src/direct_bt/CMakeLists.txt +++ b/src/direct_bt/CMakeLists.txt @@ -10,6 +10,7 @@ include_directories( ) set (direct_bt_LIB_SRCS + ${PROJECT_SOURCE_DIR}/src/direct_bt/dfa_utf8_decode.cpp ${PROJECT_SOURCE_DIR}/src/ieee11073/DataTypes.cpp ${PROJECT_SOURCE_DIR}/src/direct_bt/BasicTypes.cpp ${PROJECT_SOURCE_DIR}/src/direct_bt/UUID.cpp diff --git a/src/direct_bt/dfa_utf8_decode.cpp b/src/direct_bt/dfa_utf8_decode.cpp new file mode 100644 index 00000000..19dc70a2 --- /dev/null +++ b/src/direct_bt/dfa_utf8_decode.cpp @@ -0,0 +1,99 @@ +/* + * Author: Sven Gothel <[email protected]> + * Copyright (c) 2008-2010 Bjoern Hoehrmann <[email protected]> (see details below) + * Copyright (c) 2020 Gothel Software e.K. + * Copyright (c) 2020 ZAFENA AB + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +#include "direct_bt/dfa_utf8_decode.hpp" + +std::string dfa_utf8_decode(const uint8_t *buffer, const size_t buffer_size) { + uint32_t codepoint; + uint32_t state = DFA_UTF8_ACCEPT; + size_t byte_count; + const uint8_t *ibuffer = buffer; + + for( byte_count = 0; byte_count < buffer_size && *ibuffer; byte_count++ ) { + if ( DFA_UTF8_REJECT == dfa_utf8_decode(state, codepoint, *ibuffer++) ) { + break; // not a valid byte for a utf8 stream, end here! + } // else DFA_UTF8_ACCEPT -> valid_utf8_chars++ + } + if( 0 < byte_count ) { + return std::string( (const char*)buffer, byte_count ); + } + return std::string(); +} + +/************************************************************************/ +/************************************************************************/ +/************************************************************************/ + +/** + * Copyright (c) 2008-2010 Bjoern Hoehrmann <[email protected]> + * See http://bjoern.hoehrmann.de/utf-8/decoder/dfa/ for details. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, + * sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, + * subject to the following conditions: + * The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, + * INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE + * OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +static const uint8_t dfa_utf8d[] = { + // The first part of the table maps bytes to character classes that + // to reduce the size of the transition table and create bitmasks. + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, 9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9, + 7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7, 7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7, + 8,8,2,2,2,2,2,2,2,2,2,2,2,2,2,2, 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, + 10,3,3,3,3,3,3,3,3,3,3,3,3,4,3,3, 11,6,6,6,5,8,8,8,8,8,8,8,8,8,8,8, + + // The second part is a transition table that maps a combination + // of a state of the automaton and a character class to a state. + 0,12,24,36,60,96,84,12,12,12,48,72, 12,12,12,12,12,12,12,12,12,12,12,12, + 12, 0,12,12,12,12,12, 0,12, 0,12,12, 12,24,12,12,12,12,12,24,12,24,12,12, + 12,12,12,12,12,12,12,24,12,12,12,12, 12,24,12,12,12,12,12,12,12,24,12,12, + 12,12,12,12,12,12,12,36,12,36,12,12, 12,36,12,12,12,12,12,36,12,36,12,12, + 12,36,12,12,12,12,12,12,12,12,12,12, +}; + +uint32_t dfa_utf8_decode(uint32_t & state, uint32_t & codep, const uint32_t byte_value) { + const uint32_t type = dfa_utf8d[byte_value]; + + codep = (state != DFA_UTF8_ACCEPT) ? + (byte_value & 0x3fu) | (codep << 6) : + (0xff >> type) & (byte_value); + + state = dfa_utf8d[256 + state + type]; + return state; +} + + diff --git a/src/tinyb/CMakeLists.txt b/src/tinyb/CMakeLists.txt index 32a9a02e..bff98e1b 100644 --- a/src/tinyb/CMakeLists.txt +++ b/src/tinyb/CMakeLists.txt @@ -13,6 +13,7 @@ include_directories( ) set (tinyb_LIB_SRCS + ${PROJECT_SOURCE_DIR}/src/direct_bt/dfa_utf8_decode.cpp ${PROJECT_SOURCE_DIR}/src/tinyb/BluetoothObject.cpp ${PROJECT_SOURCE_DIR}/src/tinyb/BluetoothEvent.cpp ${PROJECT_SOURCE_DIR}/src/tinyb/BluetoothManager.cpp |