From d4f4d9761cbd41c3ab6de79383ff39b9f97bf452 Mon Sep 17 00:00:00 2001 From: Syrone Wong Date: Sat, 18 Nov 2017 20:06:50 +0800 Subject: [PATCH] Upgrade PCRE to PCRE2 - Use 8bit variant by default This comes from a PR closed and never reopen as at times PCRE2 was too new(???.) Ref: https://github.com/shadowsocks/shadowsocks-libev/pull/1792 Signed-off-by: Syrone Wong [ squash the first 2 patch from PR, drop the last one ] Signed-off-by: Christian Marangi --- .travis.yml | 9 ++- configure.ac | 8 +-- m4/pcre.m4 | 152 ------------------------------------------ m4/pcre2.m4 | 181 +++++++++++++++++++++++++++++++++++++++++++++++++++ src/rule.c | 53 ++++++++++++--- src/rule.h | 23 +++++-- 6 files changed, 253 insertions(+), 173 deletions(-) delete mode 100644 m4/pcre.m4 create mode 100644 m4/pcre2.m4 # diff --git a/.travis.yml b/.travis.yml # index ee3424c..e7da08c 100644 # --- a/.travis.yml # +++ b/.travis.yml # @@ -11,11 +11,12 @@ env: # global: # - LIBSODIUM_VER=1.0.12 # - MBEDTLS_VER=2.4.0 # + - PCRE2_VER=10.30 # before_install: # - | # if [[ "$TRAVIS_OS_NAME" == "osx" ]]; then # # All dependencies for macOS build. Some packages has been installed by travis so use reinstall. # - brew reinstall autoconf automake xmlto c-ares libev mbedtls libsodium asciidoc >> /dev/null 2>&1; # + brew reinstall autoconf automake xmlto pcre2 c-ares libev mbedtls libsodium asciidoc >> /dev/null 2>&1; # else # wget https://github.com/jedisct1/libsodium/releases/download/$LIBSODIUM_VER/libsodium-$LIBSODIUM_VER.tar.gz; # tar xvf libsodium-$LIBSODIUM_VER.tar.gz; # @@ -29,6 +30,12 @@ before_install: # make SHARED=1; # sudo make install; # popd; # + wget https://ftp.pcre.org/pub/pcre/pcre2-$PCRE2_VER.tar.gz; # + tar xvf pcre2-$PCRE2_VER.tar.gz; # + pushd pcre2-$PCRE2_VER; # + ./configure --prefix=/usr --enable-pcre2-16 --enable-pcre2-32 && make; # + sudo make install; # + popd; # # Load cached docker images # if [[ -d $HOME/docker ]]; then # ls $HOME/docker/*.tar.gz | xargs -I {file} sh -c "zcat {file} | docker load"; --- a/configure.ac +++ b/configure.ac @@ -20,10 +20,10 @@ AC_DISABLE_STATIC AC_DISABLE_SHARED LT_INIT([dlopen]) -dnl Check for pcre library -TS_CHECK_PCRE -if test "x${enable_pcre}" != "xyes"; then - AC_MSG_ERROR([Cannot find pcre library. Configure --with-pcre=DIR]) +dnl Check for pcre2 library +TS_CHECK_PCRE2 +if test "x${enable_pcre2}" != "xyes"; then + AC_MSG_ERROR([Cannot find pcre2 library. Configure --with-pcre2=DIR]) fi dnl Checks for using shared libraries from system --- a/m4/pcre.m4 +++ /dev/null @@ -1,152 +0,0 @@ -dnl -------------------------------------------------------- -*- autoconf -*- -dnl Licensed to the Apache Software Foundation (ASF) under one or more -dnl contributor license agreements. See the NOTICE file distributed with -dnl this work for additional information regarding copyright ownership. -dnl The ASF licenses this file to You under the Apache License, Version 2.0 -dnl (the "License"); you may not use this file except in compliance with -dnl the License. You may obtain a copy of the License at -dnl -dnl http://www.apache.org/licenses/LICENSE-2.0 -dnl -dnl Unless required by applicable law or agreed to in writing, software -dnl distributed under the License is distributed on an "AS IS" BASIS, -dnl WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -dnl See the License for the specific language governing permissions and -dnl limitations under the License. - -dnl -dnl TS_ADDTO(variable, value) -dnl -dnl Add value to variable -dnl -AC_DEFUN([TS_ADDTO], [ - if test "x$$1" = "x"; then - test "x$verbose" = "xyes" && echo " setting $1 to \"$2\"" - $1="$2" - else - ats_addto_bugger="$2" - for i in $ats_addto_bugger; do - ats_addto_duplicate="0" - for j in $$1; do - if test "x$i" = "x$j"; then - ats_addto_duplicate="1" - break - fi - done - if test $ats_addto_duplicate = "0"; then - test "x$verbose" = "xyes" && echo " adding \"$i\" to $1" - $1="$$1 $i" - fi - done - fi -])dnl - -dnl -dnl TS_ADDTO_RPATH(path) -dnl -dnl Adds path to variable with the '-rpath' directive. -dnl -AC_DEFUN([TS_ADDTO_RPATH], [ - AC_MSG_NOTICE([adding $1 to RPATH]) - TS_ADDTO(LIBTOOL_LINK_FLAGS, [-R$1]) -])dnl - -dnl -dnl pcre.m4: Trafficserver's pcre autoconf macros -dnl - -dnl -dnl TS_CHECK_PCRE: look for pcre libraries and headers -dnl -AC_DEFUN([TS_CHECK_PCRE], [ -enable_pcre=no -AC_ARG_WITH(pcre, [AC_HELP_STRING([--with-pcre=DIR],[use a specific pcre library])], -[ - if test "x$withval" != "xyes" && test "x$withval" != "x"; then - pcre_base_dir="$withval" - if test "$withval" != "no"; then - enable_pcre=yes - case "$withval" in - *":"*) - pcre_include="`echo $withval |sed -e 's/:.*$//'`" - pcre_ldflags="`echo $withval |sed -e 's/^.*://'`" - AC_MSG_CHECKING(checking for pcre includes in $pcre_include libs in $pcre_ldflags ) - ;; - *) - pcre_include="$withval/include" - pcre_ldflags="$withval/lib" - AC_MSG_CHECKING(checking for pcre includes in $withval) - ;; - esac - fi - fi -], -[ - AC_CHECK_PROG(PCRE_CONFIG, pcre-config, pcre-config) - if test "x$PCRE_CONFIG" != "x"; then - enable_pcre=yes - pcre_base_dir="`$PCRE_CONFIG --prefix`" - pcre_include="`$PCRE_CONFIG --cflags | sed -es/-I//`" - pcre_ldflags="`$PCRE_CONFIG --libs | sed -es/-lpcre// -es/-L//`" - fi -]) - -if test "x$pcre_base_dir" = "x"; then - AC_MSG_CHECKING([for pcre location]) - AC_CACHE_VAL(ats_cv_pcre_dir,[ - for dir in /usr/local /usr ; do - if test -d $dir && ( test -f $dir/include/pcre.h || test -f $dir/include/pcre/pcre.h ); then - ats_cv_pcre_dir=$dir - break - fi - done - ]) - pcre_base_dir=$ats_cv_pcre_dir - if test "x$pcre_base_dir" = "x"; then - enable_pcre=no - AC_MSG_RESULT([not found]) - else - enable_pcre=yes - pcre_include="$pcre_base_dir/include" - pcre_ldflags="$pcre_base_dir/lib" - AC_MSG_RESULT([$pcre_base_dir]) - fi -else - AC_MSG_CHECKING(for pcre headers in $pcre_include) - if test -d $pcre_include && test -d $pcre_ldflags && ( test -f $pcre_include/pcre.h || test -f $pcre_include/pcre/pcre.h ); then - AC_MSG_RESULT([ok]) - else - AC_MSG_RESULT([not found]) - fi -fi - -pcreh=0 -pcre_pcreh=0 -if test "$enable_pcre" != "no"; then - saved_ldflags=$LDFLAGS - saved_cppflags=$CFLAGS - pcre_have_headers=0 - pcre_have_libs=0 - if test "$pcre_base_dir" != "/usr"; then - TS_ADDTO(CFLAGS, [-I${pcre_include}]) - TS_ADDTO(CFLAGS, [-DPCRE_STATIC]) - TS_ADDTO(LDFLAGS, [-L${pcre_ldflags}]) - TS_ADDTO_RPATH(${pcre_ldflags}) - fi - AC_SEARCH_LIBS([pcre_exec], [pcre], [pcre_have_libs=1]) - if test "$pcre_have_libs" != "0"; then - AC_CHECK_HEADERS(pcre.h, [pcre_have_headers=1]) - AC_CHECK_HEADERS(pcre/pcre.h, [pcre_have_headers=1]) - fi - if test "$pcre_have_headers" != "0"; then - AC_DEFINE(HAVE_LIBPCRE,1,[Compiling with pcre support]) - AC_SUBST(LIBPCRE, [-lpcre]) - else - enable_pcre=no - CFLAGS=$saved_cppflags - LDFLAGS=$saved_ldflags - fi -fi -AC_SUBST(pcreh) -AC_SUBST(pcre_pcreh) -]) --- /dev/null +++ b/m4/pcre2.m4 @@ -0,0 +1,181 @@ +dnl -------------------------------------------------------- -*- autoconf -*- +dnl Licensed to the Apache Software Foundation (ASF) under one or more +dnl contributor license agreements. See the NOTICE file distributed with +dnl this work for additional information regarding copyright ownership. +dnl The ASF licenses this file to You under the Apache License, Version 2.0 +dnl (the "License"); you may not use this file except in compliance with +dnl the License. You may obtain a copy of the License at +dnl +dnl http://www.apache.org/licenses/LICENSE-2.0 +dnl +dnl Unless required by applicable law or agreed to in writing, software +dnl distributed under the License is distributed on an "AS IS" BASIS, +dnl WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +dnl See the License for the specific language governing permissions and +dnl limitations under the License. + +dnl Modified by Syrone Wong to support pcre2 8bit variant only + +dnl +dnl TS_ADDTO(variable, value) +dnl +dnl Add value to variable +dnl +AC_DEFUN([TS_ADDTO], [ + if test "x$$1" = "x"; then + test "x$verbose" = "xyes" && echo " setting $1 to \"$2\"" + $1="$2" + else + ats_addto_bugger="$2" + for i in $ats_addto_bugger; do + ats_addto_duplicate="0" + for j in $$1; do + if test "x$i" = "x$j"; then + ats_addto_duplicate="1" + break + fi + done + if test $ats_addto_duplicate = "0"; then + test "x$verbose" = "xyes" && echo " adding \"$i\" to $1" + $1="$$1 $i" + fi + done + fi +])dnl + +dnl +dnl TS_ADDTO_RPATH(path) +dnl +dnl Adds path to variable with the '-rpath' directive. +dnl +AC_DEFUN([TS_ADDTO_RPATH], [ + AC_MSG_NOTICE([adding $1 to RPATH]) + TS_ADDTO(LIBTOOL_LINK_FLAGS, [-R$1]) +])dnl + +dnl +dnl pcre2.m4: Trafficserver's pcre2 autoconf macros +dnl + +dnl +dnl TS_CHECK_PCRE2: look for pcre2 libraries and headers +dnl +AC_DEFUN([TS_CHECK_PCRE2], [ +enable_pcre2=no +AC_ARG_WITH(pcre2, [AC_HELP_STRING([--with-pcre2=DIR],[use a specific pcre2 library])], +[ + if test "x$withval" != "xyes" && test "x$withval" != "x"; then + pcre2_base_dir="$withval" + if test "$withval" != "no"; then + enable_pcre2=yes + case "$withval" in + *":"*) + pcre2_include="`echo $withval |sed -e 's/:.*$//'`" + pcre2_ldflags="`echo $withval |sed -e 's/^.*://'`" + AC_MSG_CHECKING(checking for pcre2 includes in $pcre2_include libs in $pcre2_ldflags ) + ;; + *) + pcre2_include="$withval/include" + pcre2_ldflags="$withval/lib" + AC_MSG_CHECKING(checking for pcre2 includes in $withval) + ;; + esac + fi + fi +], +[ + AC_CHECK_PROG(PCRE2_CONFIG, pcre2-config, pcre2-config) + if test "x$PCRE2_CONFIG" != "x"; then + enable_pcre2=yes + pcre2_base_dir="`$PCRE2_CONFIG --prefix`" + pcre2_include="`$PCRE2_CONFIG --cflags | sed -es/-I//`" + pcre2_ldflags="`$PCRE2_CONFIG --libs8 | sed -es/-lpcre2-8// -es/-L//`" + fi +]) + +if test "x$pcre2_base_dir" = "x"; then + AC_MSG_CHECKING([for pcre2 location]) + AC_CACHE_VAL(ats_cv_pcre2_dir,[ + for dir in /usr/local /usr ; do + if test -d $dir && ( test -f $dir/include/pcre2.h || test -f $dir/include/pcre2/pcre2.h ); then + ats_cv_pcre2_dir=$dir + break + fi + done + ]) + pcre2_base_dir=$ats_cv_pcre2_dir + if test "x$pcre2_base_dir" = "x"; then + enable_pcre2=no + AC_MSG_RESULT([not found]) + else + enable_pcre2=yes + pcre2_include="$pcre2_base_dir/include" + pcre2_ldflags="$pcre2_base_dir/lib" + AC_MSG_RESULT([$pcre2_base_dir]) + fi +else + AC_MSG_CHECKING(for pcre2 headers in $pcre2_include) + if test -d $pcre2_include && test -d $pcre2_ldflags && ( test -f $pcre2_include/pcre2.h || test -f $pcre2_include/pcre2/pcre2.h ); then + AC_MSG_RESULT([ok]) + else + AC_MSG_RESULT([not found]) + fi +fi + +pcre2h=0 +pcre2_pcre2h=0 +if test "$enable_pcre2" != "no"; then + saved_ldflags=$LDFLAGS + saved_cppflags=$CFLAGS + pcre2_have_headers=0 + pcre2_have_libs=0 + if test "$pcre2_base_dir" != "/usr"; then + TS_ADDTO(CFLAGS, [-I${pcre2_include}]) + TS_ADDTO(CFLAGS, [-DPCRE2_STATIC]) + TS_ADDTO(LDFLAGS, [-L${pcre2_ldflags}]) + TS_ADDTO_RPATH(${pcre2_ldflags}) + fi + AC_SEARCH_LIBS([pcre2_match_8], [pcre2-8], [pcre2_have_libs=1]) + if test "$pcre2_have_libs" != "0"; then + AC_MSG_CHECKING([pcre2.h]) + AC_COMPILE_IFELSE( + [AC_LANG_PROGRAM( + [[ +#define PCRE2_CODE_UNIT_WIDTH 8 +#include + ]], + [[ + ]] + )], + [pcre2_have_headers=1 + AC_MSG_RESULT([ok])], + [AC_MSG_RESULT([not found])] + ) + + AC_MSG_CHECKING([pcre2/pcre2.h]) + AC_COMPILE_IFELSE( + [AC_LANG_PROGRAM( + [[ +#define PCRE2_CODE_UNIT_WIDTH 8 +#include + ]], + [[ + ]] + )], + [pcre2_have_headers=1 + AC_MSG_RESULT([ok])], + [AC_MSG_RESULT([not found])] + ) + fi + if test "$pcre2_have_headers" != "0"; then + AC_DEFINE(HAVE_LIBPCRE2,1,[Compiling with pcre2 support]) + AC_SUBST(LIBPCRE2, [-lpcre2-8]) + else + enable_pcre2=no + CFLAGS=$saved_cppflags + LDFLAGS=$saved_ldflags + fi +fi +AC_SUBST(pcre2h) +AC_SUBST(pcre2_pcre2h) +]) --- a/src/rule.c +++ b/src/rule.c @@ -1,6 +1,7 @@ /* * Copyright (c) 2011 and 2012, Dustin Lundquist * Copyright (c) 2011 Manuel Kasper + * Copyright (c) 2017 Syrone Wong * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -74,18 +75,37 @@ add_rule(struct cork_dllist *rules, rule cork_dllist_add(rules, &rule->entries); } +/* + * XXX: As pattern and subject are char arguments, they can be straightforwardly + * cast to PCRE2_SPTR as we are working in 8-bit code units. + */ + int init_rule(rule_t *rule) { if (rule->pattern_re == NULL) { - const char *reerr; - int reerroffset; + int errornumber; + PCRE2_SIZE erroroffset; + rule->pattern_re = pcre2_compile( + (PCRE2_SPTR)rule->pattern, /* the pattern */ + PCRE2_ZERO_TERMINATED, /* indicates pattern is zero-terminated */ + 0, /* default options */ + &errornumber, /* for error number */ + &erroroffset, /* for error offset */ + NULL); /* use default compile context */ - rule->pattern_re = - pcre_compile(rule->pattern, 0, &reerr, &reerroffset, NULL); if (rule->pattern_re == NULL) { - LOGE("Regex compilation of \"%s\" failed: %s, offset %d", - rule->pattern, reerr, reerroffset); + PCRE2_UCHAR errbuffer[512]; + pcre2_get_error_message(errornumber, errbuffer, sizeof(errbuffer)); + LOGE("PCRE2 regex compilation failed at offset %d: %s\n", (int)erroroffset, + errbuffer); + return 0; + } + + rule->pattern_re_match_data = pcre2_match_data_create_from_pattern(rule->pattern_re, NULL); + + if (rule->pattern_re_match_data == NULL) { + ERROR("PCRE2: the memory for the block could not be obtained"); return 0; } } @@ -105,8 +125,15 @@ lookup_rule(const struct cork_dllist *ru cork_dllist_foreach_void(rules, curr, next) { rule_t *rule = cork_container_of(curr, rule_t, entries); - if (pcre_exec(rule->pattern_re, NULL, - name, name_len, 0, 0, NULL, 0) >= 0) + if (pcre2_match( + rule->pattern_re, /* the compiled pattern */ + (PCRE2_SPTR)name, /* the subject string */ + name_len, /* the length of the subject */ + 0, /* start at offset 0 in the subject */ + 0, /* default options */ + rule->pattern_re_match_data, /* block for storing the result */ + NULL /* use default match context */ + ) >= 0) return rule; } @@ -127,7 +154,13 @@ free_rule(rule_t *rule) return; ss_free(rule->pattern); - if (rule->pattern_re != NULL) - pcre_free(rule->pattern_re); + if (rule->pattern_re != NULL) { + pcre2_code_free(rule->pattern_re); /* data and the compiled pattern. */ + rule->pattern_re = NULL; + } + if (rule->pattern_re_match_data != NULL) { + pcre2_match_data_free(rule->pattern_re_match_data); /* Release memory used for the match */ + rule->pattern_re_match_data = NULL; + } ss_free(rule); } --- a/src/rule.h +++ b/src/rule.h @@ -1,6 +1,7 @@ /* * Copyright (c) 2011 and 2012, Dustin Lundquist * Copyright (c) 2011 Manuel Kasper + * Copyright (c) 2017 Syrone Wong * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -33,17 +34,27 @@ #include -#ifdef HAVE_PCRE_H -#include -#elif HAVE_PCRE_PCRE_H -#include -#endif +/* + * The PCRE2_CODE_UNIT_WIDTH macro must be defined before including pcre2.h. + * For a program that uses only one code unit width, setting it to 8, 16, or 32 + * makes it possible to use generic function names such as pcre2_compile(). Note + * that just changing 8 to 16 (for example) is not sufficient to convert this + * program to process 16-bit characters. Even in a fully 16-bit environment, where + * string-handling functions such as strcmp() and printf() work with 16-bit + * characters, the code for handling the table of named substrings will still need + * to be modified. + */ +/* we only need to support ASCII chartable, thus set it to 8 */ +#define PCRE2_CODE_UNIT_WIDTH 8 + +#include typedef struct rule { char *pattern; /* Runtime fields */ - pcre *pattern_re; + pcre2_code *pattern_re; + pcre2_match_data *pattern_re_match_data; struct cork_dllist_item entries; } rule_t;