mirror of
				https://github.com/tiagovignatti/intel-gpu-tools.git
				synced 2025-11-04 12:07:12 +00:00 
			
		
		
		
	lib/igt_core.c: Expand --run-subtest functionality.
Added extended wildcard support when specifying --run-subtest. Wildcard format is as specified in rfc3977 and the uwildmat() implementation is taken from libinn. See https://tools.ietf.org/html/rfc3977#section-4 for a description of allowed wildcard expressions. v2: Use comma as list separator (Ville Syrjala) support both ^ and ! as not operators (Dave Gordon) v3: Updated to use uwildmat() (Dave Gordon) Signed-off-by: Derek Morton <derek.j.morton@intel.com> [danvet: Fixup whitespace. Add #include <stdint.h>. Run lint.] Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
This commit is contained in:
		
							parent
							
								
									0a45d6ba13
								
							
						
					
					
						commit
						ef724ab2bb
					
				
							
								
								
									
										20
									
								
								COPYING
									
									
									
									
									
								
							
							
						
						
									
										20
									
								
								COPYING
									
									
									
									
									
								
							@ -106,3 +106,23 @@ THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 | 
			
		||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
 | 
			
		||||
FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
 | 
			
		||||
IN THE SOFTWARE.
 | 
			
		||||
 | 
			
		||||
Copyright (c) 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011, 2012,
 | 
			
		||||
    2013, 2014 by Internet Systems Consortium, Inc. ("ISC")
 | 
			
		||||
Copyright (c) 1991, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001,
 | 
			
		||||
    2002, 2003 by The Internet Software Consortium and Rich Salz
 | 
			
		||||
 | 
			
		||||
This code is derived from software contributed to the Internet Software
 | 
			
		||||
Consortium by Rich Salz.
 | 
			
		||||
 | 
			
		||||
Permission to use, copy, modify, and distribute this software for any
 | 
			
		||||
purpose with or without fee is hereby granted, provided that the above
 | 
			
		||||
copyright notice and this permission notice appear in all copies.
 | 
			
		||||
 | 
			
		||||
THE SOFTWARE IS PROVIDED "AS IS" AND ISC DISCLAIMS ALL WARRANTIES WITH
 | 
			
		||||
REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
 | 
			
		||||
MERCHANTABILITY AND FITNESS.  IN NO EVENT SHALL ISC BE LIABLE FOR ANY
 | 
			
		||||
SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
 | 
			
		||||
WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
 | 
			
		||||
ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
 | 
			
		||||
OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
 | 
			
		||||
 | 
			
		||||
@ -60,6 +60,8 @@ libintel_tools_la_SOURCES = 	\
 | 
			
		||||
	igt_core.h		\
 | 
			
		||||
	igt_draw.c		\
 | 
			
		||||
	igt_draw.h		\
 | 
			
		||||
	uwildmat/uwildmat.h	\
 | 
			
		||||
	uwildmat/uwildmat.c	\
 | 
			
		||||
	$(NULL)
 | 
			
		||||
 | 
			
		||||
.PHONY: version.h.tmp
 | 
			
		||||
 | 
			
		||||
@ -56,7 +56,7 @@
 | 
			
		||||
#include <ctype.h>
 | 
			
		||||
#include <limits.h>
 | 
			
		||||
#include <locale.h>
 | 
			
		||||
#include <fnmatch.h>
 | 
			
		||||
#include <uwildmat/uwildmat.h>
 | 
			
		||||
 | 
			
		||||
#include "drmtest.h"
 | 
			
		||||
#include "intel_chipset.h"
 | 
			
		||||
@ -209,6 +209,19 @@
 | 
			
		||||
 * intel gpu to be present). Then individual subtests can be run with
 | 
			
		||||
 * "--run-subtest". Usage help for tests with subtests can be obtained with the
 | 
			
		||||
 * "--help" command line option.
 | 
			
		||||
 *
 | 
			
		||||
 * A wildcard expression can be given to --run-subtest to specify a subset of
 | 
			
		||||
 * subtests to run. See https://tools.ietf.org/html/rfc3977#section-4 for a
 | 
			
		||||
 * description of allowed wildcard expressions.
 | 
			
		||||
 * Some examples of allowed wildcard expressions are:
 | 
			
		||||
 *
 | 
			
		||||
 * - '*basic*' match any subtest containing basic
 | 
			
		||||
 * - 'basic-???' match any subtest named basic- with 3 characters after -
 | 
			
		||||
 * - 'basic-[0-9]' match any subtest named basic- with a single number after -
 | 
			
		||||
 * - 'basic-[^0-9]' match any subtest named basic- with a single non numerical character after -
 | 
			
		||||
 * - 'basic*,advanced*' match any subtest starting basic or advanced
 | 
			
		||||
 * - '*,!basic*' match any subtest not starting basic
 | 
			
		||||
 * - 'basic*,!basic-render*' match any subtest starting basic but not starting basic-render
 | 
			
		||||
 */
 | 
			
		||||
 | 
			
		||||
static unsigned int exit_handler_count;
 | 
			
		||||
@ -814,7 +827,7 @@ bool __igt_run_subtest(const char *subtest_name)
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	if (run_single_subtest) {
 | 
			
		||||
		if (fnmatch(run_single_subtest, subtest_name, 0) != 0)
 | 
			
		||||
		if (uwildmat(subtest_name, run_single_subtest) == 0)
 | 
			
		||||
			return false;
 | 
			
		||||
		else
 | 
			
		||||
			run_single_subtest_found = true;
 | 
			
		||||
 | 
			
		||||
							
								
								
									
										475
									
								
								lib/uwildmat/uwildmat.c
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										475
									
								
								lib/uwildmat/uwildmat.c
									
									
									
									
									
										Normal file
									
								
							@ -0,0 +1,475 @@
 | 
			
		||||
/* uwildmat.c is reused from libinn - https://launchpad.net/ubuntu/+source/inn2/2.5.4-1
 | 
			
		||||
 | 
			
		||||
   This provides wild card matching originally used in InterNetNews and is
 | 
			
		||||
   described in https://tools.ietf.org/html/rfc3977#section-4
 | 
			
		||||
 | 
			
		||||
   INN licence:
 | 
			
		||||
   INN as a whole and all code contained in it not otherwise marked with
 | 
			
		||||
   different licenses and/or copyrights is covered by the following copyright
 | 
			
		||||
   and license:
 | 
			
		||||
 | 
			
		||||
   Copyright (c) 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011, 2012,
 | 
			
		||||
   2013, 2014 by Internet Systems Consortium, Inc. ("ISC")
 | 
			
		||||
   Copyright (c) 1991, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001,
 | 
			
		||||
   2002, 2003 by The Internet Software Consortium and Rich Salz
 | 
			
		||||
 | 
			
		||||
   This code is derived from software contributed to the Internet Software
 | 
			
		||||
   Consortium by Rich Salz.
 | 
			
		||||
 | 
			
		||||
   Permission to use, copy, modify, and distribute this software for any
 | 
			
		||||
   purpose with or without fee is hereby granted, provided that the above
 | 
			
		||||
   copyright notice and this permission notice appear in all copies.
 | 
			
		||||
 | 
			
		||||
   THE SOFTWARE IS PROVIDED "AS IS" AND ISC DISCLAIMS ALL WARRANTIES WITH
 | 
			
		||||
   REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
 | 
			
		||||
   MERCHANTABILITY AND FITNESS.  IN NO EVENT SHALL ISC BE LIABLE FOR ANY
 | 
			
		||||
   SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
 | 
			
		||||
   WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
 | 
			
		||||
   ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
 | 
			
		||||
   OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
 | 
			
		||||
 | 
			
		||||
*/
 | 
			
		||||
 | 
			
		||||
/*  $Id: uwildmat.c 8918 2010-01-22 23:28:28Z iulius $
 | 
			
		||||
 **
 | 
			
		||||
 **  wildmat pattern matching with Unicode UTF-8 extensions.
 | 
			
		||||
 **
 | 
			
		||||
 **  Do shell-style pattern matching for ?, \, [], and * characters.  Might not
 | 
			
		||||
 **  be robust in face of malformed patterns; e.g., "foo[a-" could cause a
 | 
			
		||||
 **  segmentation violation.  It is 8-bit clean.  (Robustness hopefully fixed
 | 
			
		||||
 **  July 2000; all malformed patterns should now just fail to match anything.)
 | 
			
		||||
 **
 | 
			
		||||
 **  Original by Rich $alz, mirror!rs, Wed Nov 26 19:03:17 EST 1986.
 | 
			
		||||
 **  Rich $alz is now <rsalz@osf.org>.
 | 
			
		||||
 **
 | 
			
		||||
 **  April, 1991:  Replaced mutually-recursive calls with in-line code for the
 | 
			
		||||
 **  star character.
 | 
			
		||||
 **
 | 
			
		||||
 **  Special thanks to Lars Mathiesen <thorinn@diku.dk> for the ABORT code.
 | 
			
		||||
 **  This can greatly speed up failing wildcard patterns.  For example:
 | 
			
		||||
 **
 | 
			
		||||
 **	pattern: -*-*-*-*-*-*-12-*-*-*-m-*-*-*
 | 
			
		||||
 **	text 1:	 -adobe-courier-bold-o-normal--12-120-75-75-m-70-iso8859-1
 | 
			
		||||
 **	text 2:	 -adobe-courier-bold-o-normal--12-120-75-75-X-70-iso8859-1
 | 
			
		||||
 **
 | 
			
		||||
 **  Text 1 matches with 51 calls, while text 2 fails with 54 calls.  Without
 | 
			
		||||
 **  the ABORT code, it takes 22310 calls to fail.  Ugh.  The following
 | 
			
		||||
 **  explanation is from Lars:
 | 
			
		||||
 **
 | 
			
		||||
 **  The precondition that must be fulfilled is that DoMatch will consume at
 | 
			
		||||
 **  least one character in text.  This is true if *p is neither '*' nor '\0'.)
 | 
			
		||||
 **  The last return has ABORT instead of false to avoid quadratic behaviour in
 | 
			
		||||
 **  cases like pattern "*a*b*c*d" with text "abcxxxxx".  With false, each
 | 
			
		||||
 **  star-loop has to run to the end of the text; with ABORT only the last one
 | 
			
		||||
 **  does.
 | 
			
		||||
 **
 | 
			
		||||
 **  Once the control of one instance of DoMatch enters the star-loop, that
 | 
			
		||||
 **  instance will return either true or ABORT, and any calling instance will
 | 
			
		||||
 **  therefore return immediately after (without calling recursively again).
 | 
			
		||||
 **  In effect, only one star-loop is ever active.  It would be possible to
 | 
			
		||||
 **  modify the code to maintain this context explicitly, eliminating all
 | 
			
		||||
 **  recursive calls at the cost of some complication and loss of clarity (and
 | 
			
		||||
 **  the ABORT stuff seems to be unclear enough by itself).  I think it would
 | 
			
		||||
 **  be unwise to try to get this into a released version unless you have a
 | 
			
		||||
 **  good test data base to try it out on.
 | 
			
		||||
 **
 | 
			
		||||
 **  June, 1991:  Robert Elz <kre@munnari.oz.au> added minus and close bracket
 | 
			
		||||
 **  handling for character sets.
 | 
			
		||||
 **
 | 
			
		||||
 **  July, 2000:  Largely rewritten by Russ Allbery <rra@stanford.edu> to add
 | 
			
		||||
 **  support for ',', '!', and optionally '@' to the core wildmat routine.
 | 
			
		||||
 **  Broke the character class matching into a separate function for clarity
 | 
			
		||||
 **  since it's infrequently used in practice, and added some simple lookahead
 | 
			
		||||
 **  to significantly decrease the recursive calls in the '*' matching code.
 | 
			
		||||
 **  Added support for UTF-8 as the default character set for any high-bit
 | 
			
		||||
 **  characters.
 | 
			
		||||
 **
 | 
			
		||||
 **  For more information on UTF-8, see RFC 3629.
 | 
			
		||||
 **
 | 
			
		||||
 **  Please note that this file is intentionally written so that conditionally
 | 
			
		||||
 **  executed expressions are on separate lines from the condition to
 | 
			
		||||
 **  facilitate analysis of the coverage of the test suite using purecov.
 | 
			
		||||
 **  Please preserve this.  As of March 11, 2001, purecov reports that the
 | 
			
		||||
 **  accompanying test suite achieves 100% coverage of this file.
 | 
			
		||||
 */
 | 
			
		||||
 | 
			
		||||
#include <string.h>
 | 
			
		||||
#include <stdint.h>
 | 
			
		||||
#include "uwildmat.h"
 | 
			
		||||
 | 
			
		||||
#define ABORT -1
 | 
			
		||||
 | 
			
		||||
/* Whether or not an octet looks like the start of a UTF-8 character. */
 | 
			
		||||
#define ISUTF8(c)       (((c) & 0xc0) == 0xc0)
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
/*
 | 
			
		||||
 **  Determine the length of a non-ASCII character in octets (for advancing
 | 
			
		||||
 **  pointers when skipping over characters).  Takes a pointer to the start of
 | 
			
		||||
 **  the character and to the last octet of the string.  If end is NULL, expect
 | 
			
		||||
 **  the string pointed to by start to be nul-terminated.  If the character is
 | 
			
		||||
 **  malformed UTF-8, return 1 to treat it like an eight-bit local character.
 | 
			
		||||
 */
 | 
			
		||||
static int
 | 
			
		||||
utf8_length(const unsigned char *start, const unsigned char *end)
 | 
			
		||||
{
 | 
			
		||||
	unsigned char mask = 0x80;
 | 
			
		||||
	const unsigned char *p;
 | 
			
		||||
	int length = 0;
 | 
			
		||||
	int left;
 | 
			
		||||
 | 
			
		||||
	for (; mask > 0 && (*start & mask) == mask; mask >>= 1)
 | 
			
		||||
		length++;
 | 
			
		||||
	if (length < 2 || length > 6)
 | 
			
		||||
		return 1;
 | 
			
		||||
	if (end != NULL && (end - start + 1) < length)
 | 
			
		||||
		return 1;
 | 
			
		||||
	left = length - 1;
 | 
			
		||||
	for (p = start + 1; left > 0 && (*p & 0xc0) == 0x80; p++)
 | 
			
		||||
		left--;
 | 
			
		||||
	return (left == 0) ? length : 1;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
/*
 | 
			
		||||
 **  Check whether a string contains only valid UTF-8 characters.
 | 
			
		||||
 */
 | 
			
		||||
bool
 | 
			
		||||
is_valid_utf8(const char *text)
 | 
			
		||||
{
 | 
			
		||||
	unsigned char mask;
 | 
			
		||||
	const unsigned char *p;
 | 
			
		||||
	int length;
 | 
			
		||||
	int left;
 | 
			
		||||
 | 
			
		||||
	for (p = (const unsigned char *)text; *p != '\0';) {
 | 
			
		||||
		mask = 0x80;
 | 
			
		||||
		length = 0;
 | 
			
		||||
 | 
			
		||||
		/* Find out the expected length of the character. */
 | 
			
		||||
		for (; mask > 0 && (*p & mask) == mask; mask >>= 1)
 | 
			
		||||
			length++;
 | 
			
		||||
 | 
			
		||||
		p++;
 | 
			
		||||
 | 
			
		||||
		/* Valid ASCII. */
 | 
			
		||||
		if (length == 0)
 | 
			
		||||
			continue;
 | 
			
		||||
 | 
			
		||||
		/* Invalid length. */
 | 
			
		||||
		if (length < 2 || length > 6)
 | 
			
		||||
			return false;
 | 
			
		||||
 | 
			
		||||
		/* Check that each byte looks like 10xxxxxx, except for the first. */
 | 
			
		||||
		left = length - 1;
 | 
			
		||||
		for (; left > 0 && (*p & 0xc0) == 0x80; p++)
 | 
			
		||||
			left--;
 | 
			
		||||
 | 
			
		||||
		if (left > 0)
 | 
			
		||||
			return false;
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	return true;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
/*
 | 
			
		||||
 **  Convert a UTF-8 character to UCS-4.  Takes a pointer to the start of the
 | 
			
		||||
 **  character and to the last octet of the string, and to a uint32_t into
 | 
			
		||||
 **  which to put the decoded UCS-4 value.  If end is NULL, expect the string
 | 
			
		||||
 **  pointed to by start to be nul-terminated.  Returns the number of octets in
 | 
			
		||||
 **  the UTF-8 encoding.  If the UTF-8 character is malformed, set result to
 | 
			
		||||
 **  the decimal value of the first octet; this is wrong, but it will generally
 | 
			
		||||
 **  cause the rest of the wildmat matching to do the right thing for non-UTF-8
 | 
			
		||||
 **  input.
 | 
			
		||||
 */
 | 
			
		||||
static int
 | 
			
		||||
utf8_decode(const unsigned char *start, const unsigned char *end,
 | 
			
		||||
	    uint32_t *result)
 | 
			
		||||
{
 | 
			
		||||
	uint32_t value = 0;
 | 
			
		||||
	int length, i;
 | 
			
		||||
	const unsigned char *p = start;
 | 
			
		||||
	unsigned char mask;
 | 
			
		||||
 | 
			
		||||
	length = utf8_length(start, end);
 | 
			
		||||
	if (length < 2) {
 | 
			
		||||
		*result = *start;
 | 
			
		||||
		return 1;
 | 
			
		||||
	}
 | 
			
		||||
	mask = (1 << (7 - length)) - 1;
 | 
			
		||||
	value = *p & mask;
 | 
			
		||||
	p++;
 | 
			
		||||
	for (i = length - 1; i > 0; i--) {
 | 
			
		||||
		value = (value << 6) | (*p & 0x3f);
 | 
			
		||||
		p++;
 | 
			
		||||
	}
 | 
			
		||||
	*result = value;
 | 
			
		||||
	return length;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
/*
 | 
			
		||||
 **  Match a character class against text, a UCS-4 character.  start is a
 | 
			
		||||
 **  pointer to the first character of the character class, end a pointer to
 | 
			
		||||
 **  the last.  Returns whether the class matches that character.
 | 
			
		||||
 */
 | 
			
		||||
static bool
 | 
			
		||||
match_class(uint32_t text, const unsigned char *start,
 | 
			
		||||
	    const unsigned char *end)
 | 
			
		||||
{
 | 
			
		||||
	bool reversed, allowrange;
 | 
			
		||||
	const unsigned char *p = start;
 | 
			
		||||
	uint32_t first = 0;
 | 
			
		||||
	uint32_t last;
 | 
			
		||||
 | 
			
		||||
	/* Check for an inverted character class (starting with ^).  If the
 | 
			
		||||
	   character matches the character class, we return !reversed; that way,
 | 
			
		||||
	   we return true if it's a regular character class and false if it's a
 | 
			
		||||
	   reversed one.  If the character doesn't match, we return reversed. */
 | 
			
		||||
	reversed = (*p == '^');
 | 
			
		||||
	if (reversed)
 | 
			
		||||
		p++;
 | 
			
		||||
 | 
			
		||||
	/* Walk through the character class until we reach the end or find a
 | 
			
		||||
	   match, handling character ranges as we go.  Only permit a range to
 | 
			
		||||
	   start when allowrange is true; this allows - to be treated like a
 | 
			
		||||
	   normal character as the first character of the class and catches
 | 
			
		||||
	   malformed ranges like a-e-n.  We treat the character at the beginning
 | 
			
		||||
	   of a range as both a regular member of the class and the beginning of
 | 
			
		||||
	   the range; this is harmless (although it means that malformed ranges
 | 
			
		||||
	   like m-a will match m and nothing else). */
 | 
			
		||||
	allowrange = false;
 | 
			
		||||
	while (p <= end) {
 | 
			
		||||
		if (allowrange && *p == '-' && p < end) {
 | 
			
		||||
			p++;
 | 
			
		||||
			p += utf8_decode(p, end, &last);
 | 
			
		||||
			if (text >= first && text <= last)
 | 
			
		||||
				return !reversed;
 | 
			
		||||
			allowrange = false;
 | 
			
		||||
		} else {
 | 
			
		||||
			p += utf8_decode(p, end, &first);
 | 
			
		||||
			if (text == first)
 | 
			
		||||
				return !reversed;
 | 
			
		||||
			allowrange = true;
 | 
			
		||||
		}
 | 
			
		||||
	}
 | 
			
		||||
	return reversed;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
/*
 | 
			
		||||
 **  Match the text against the pattern between start and end.  This is a
 | 
			
		||||
 **  single pattern; a leading ! or @ must already be taken care of, and
 | 
			
		||||
 **  commas must be dealt with outside of this routine.
 | 
			
		||||
 */
 | 
			
		||||
static int
 | 
			
		||||
match_pattern(const unsigned char *text, const unsigned char *start,
 | 
			
		||||
	      const unsigned char *end)
 | 
			
		||||
{
 | 
			
		||||
	const unsigned char *q, *endclass;
 | 
			
		||||
	const unsigned char *p = start;
 | 
			
		||||
	bool ismeta;
 | 
			
		||||
	int matched, width;
 | 
			
		||||
	uint32_t c;
 | 
			
		||||
 | 
			
		||||
	for (; p <= end; p++) {
 | 
			
		||||
		if (!*text && *p != '*')
 | 
			
		||||
			return ABORT;
 | 
			
		||||
 | 
			
		||||
		switch (*p) {
 | 
			
		||||
		case '\\':
 | 
			
		||||
			if (!*++p)
 | 
			
		||||
				return ABORT;
 | 
			
		||||
			/* Fall through. */
 | 
			
		||||
 | 
			
		||||
		default:
 | 
			
		||||
			if (*text++ != *p)
 | 
			
		||||
				return false;
 | 
			
		||||
			break;
 | 
			
		||||
 | 
			
		||||
		case '?':
 | 
			
		||||
			text += ISUTF8(*text) ? utf8_length(text, NULL) : 1;
 | 
			
		||||
			break;
 | 
			
		||||
 | 
			
		||||
		case '*':
 | 
			
		||||
			/* Consecutive stars are equivalent to one.  Advance pattern to
 | 
			
		||||
			   the character after the star. */
 | 
			
		||||
			for (++p; *p == '*'; p++)
 | 
			
		||||
				;
 | 
			
		||||
 | 
			
		||||
			/* A trailing star will match anything. */
 | 
			
		||||
			if (p > end)
 | 
			
		||||
				return true;
 | 
			
		||||
 | 
			
		||||
			/* Basic algorithm: Recurse at each point where the * could
 | 
			
		||||
			   possibly match.  If the match succeeds or aborts, return
 | 
			
		||||
			   immediately; otherwise, try the next position.
 | 
			
		||||
 | 
			
		||||
Optimization: If the character after the * in the pattern
 | 
			
		||||
isn't a metacharacter (the common case), then the * has to
 | 
			
		||||
consume characters at least up to the next occurrence of that
 | 
			
		||||
character in the text.  Scan forward for those points rather
 | 
			
		||||
than recursing at every possible point to save the extra
 | 
			
		||||
function call overhead. */
 | 
			
		||||
			ismeta = (*p == '[' || *p == '?' || *p == '\\');
 | 
			
		||||
			while (*text) {
 | 
			
		||||
				width = ISUTF8(*text) ? utf8_length(text, NULL) : 1;
 | 
			
		||||
				if (ismeta) {
 | 
			
		||||
					matched = match_pattern(text, p, end);
 | 
			
		||||
					text += width;
 | 
			
		||||
				} else {
 | 
			
		||||
					while (*text && *text != *p) {
 | 
			
		||||
						text += width;
 | 
			
		||||
						width = ISUTF8(*text) ? utf8_length(text, NULL) : 1;
 | 
			
		||||
					}
 | 
			
		||||
					if (!*text)
 | 
			
		||||
						return ABORT;
 | 
			
		||||
					matched = match_pattern(++text, p + 1, end);
 | 
			
		||||
				}
 | 
			
		||||
				if (matched != false)
 | 
			
		||||
					return matched;
 | 
			
		||||
			}
 | 
			
		||||
			return ABORT;
 | 
			
		||||
 | 
			
		||||
		case '[':
 | 
			
		||||
			/* Find the end of the character class, making sure not to pick
 | 
			
		||||
			   up a close bracket at the beginning of the class. */
 | 
			
		||||
			p++;
 | 
			
		||||
			q = p + (*p == '^') + 1;
 | 
			
		||||
			if (q > end)
 | 
			
		||||
				return ABORT;
 | 
			
		||||
			endclass = memchr(q, ']', (size_t) (end - q + 1));
 | 
			
		||||
			if (!endclass)
 | 
			
		||||
				return ABORT;
 | 
			
		||||
 | 
			
		||||
			/* Do the heavy lifting in another function for clarity, since
 | 
			
		||||
			   character classes are an uncommon case. */
 | 
			
		||||
			text += utf8_decode(text, NULL, &c);
 | 
			
		||||
			if (!match_class(c, p, endclass - 1))
 | 
			
		||||
				return false;
 | 
			
		||||
			p = endclass;
 | 
			
		||||
			break;
 | 
			
		||||
		}
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	return (*text == '\0');
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
/*
 | 
			
		||||
 **  Takes text and a wildmat expression; a wildmat expression is a
 | 
			
		||||
 **  comma-separated list of wildmat patterns, optionally preceded by ! to
 | 
			
		||||
 **  invert the sense of the expression.  Returns UWILDMAT_MATCH if that
 | 
			
		||||
 **  expression matches the text, UWILDMAT_FAIL otherwise.  If allowpoison is
 | 
			
		||||
 **  set, allow @ to introduce a poison expression (the same as !, but if it
 | 
			
		||||
 **  triggers the failed match the routine returns UWILDMAT_POISON instead).
 | 
			
		||||
 */
 | 
			
		||||
static enum uwildmat
 | 
			
		||||
match_expression(const unsigned char *text, const unsigned char *start,
 | 
			
		||||
		 bool allowpoison)
 | 
			
		||||
{
 | 
			
		||||
	const unsigned char *end, *split;
 | 
			
		||||
	const unsigned char *p = start;
 | 
			
		||||
	bool reverse, escaped;
 | 
			
		||||
	bool match = false;
 | 
			
		||||
	bool poison = false;
 | 
			
		||||
	bool poisoned = false;
 | 
			
		||||
 | 
			
		||||
	/* Handle the empty expression separately, since otherwise end will be
 | 
			
		||||
	   set to an invalid pointer. */
 | 
			
		||||
	if (!*p)
 | 
			
		||||
		return !*text ? UWILDMAT_MATCH : UWILDMAT_FAIL;
 | 
			
		||||
	end = start + strlen((const char *) start) - 1;
 | 
			
		||||
 | 
			
		||||
	/* Main match loop.  Find each comma that separates patterns, and attempt
 | 
			
		||||
	   to match the text with each pattern in order.  The last matching
 | 
			
		||||
	   pattern determines whether the whole expression matches. */
 | 
			
		||||
	for (; p <= end + 1; p = split + 1) {
 | 
			
		||||
		if (allowpoison)
 | 
			
		||||
			poison = (*p == '@');
 | 
			
		||||
		reverse = (*p == '!') || poison;
 | 
			
		||||
		if (reverse)
 | 
			
		||||
			p++;
 | 
			
		||||
 | 
			
		||||
		/* Find the first unescaped comma, if any.  If there is none, split
 | 
			
		||||
		   will be one greater than end and point at the nul at the end of
 | 
			
		||||
		   the string. */
 | 
			
		||||
		for (escaped = false, split = p; split <= end; split++) {
 | 
			
		||||
			if (*split == '[') {
 | 
			
		||||
				split++;
 | 
			
		||||
				if (*split == ']')
 | 
			
		||||
					split++;
 | 
			
		||||
				while (split <= end && *split != ']')
 | 
			
		||||
					split++;
 | 
			
		||||
			}
 | 
			
		||||
			if (*split == ',' && !escaped)
 | 
			
		||||
				break;
 | 
			
		||||
			escaped = (*split == '\\') ? !escaped : false;
 | 
			
		||||
		}
 | 
			
		||||
 | 
			
		||||
		/* Optimization: If match == !reverse and poison == poisoned, this
 | 
			
		||||
		   pattern can't change the result, so don't do any work. */
 | 
			
		||||
		if (match == !reverse && poison == poisoned)
 | 
			
		||||
			continue;
 | 
			
		||||
		if (match_pattern(text, p, split - 1) == true) {
 | 
			
		||||
			poisoned = poison;
 | 
			
		||||
			match = !reverse;
 | 
			
		||||
		}
 | 
			
		||||
	}
 | 
			
		||||
	if (poisoned)
 | 
			
		||||
		return UWILDMAT_POISON;
 | 
			
		||||
	return match ? UWILDMAT_MATCH : UWILDMAT_FAIL;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
/*
 | 
			
		||||
 **  User-level routine used for wildmats where @ should be treated as a
 | 
			
		||||
 **  regular character.
 | 
			
		||||
 */
 | 
			
		||||
bool
 | 
			
		||||
uwildmat(const char *text, const char *pat)
 | 
			
		||||
{
 | 
			
		||||
	const unsigned char *utext = (const unsigned char *) text;
 | 
			
		||||
	const unsigned char *upat = (const unsigned char *) pat;
 | 
			
		||||
 | 
			
		||||
	if (upat[0] == '*' && upat[1] == '\0')
 | 
			
		||||
		return true;
 | 
			
		||||
	else
 | 
			
		||||
		return (match_expression(utext, upat, false) == UWILDMAT_MATCH);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
/*
 | 
			
		||||
 **  User-level routine used for wildmats that support poison matches.
 | 
			
		||||
 */
 | 
			
		||||
enum uwildmat
 | 
			
		||||
uwildmat_poison(const char *text, const char *pat)
 | 
			
		||||
{
 | 
			
		||||
	const unsigned char *utext = (const unsigned char *) text;
 | 
			
		||||
	const unsigned char *upat = (const unsigned char *) pat;
 | 
			
		||||
 | 
			
		||||
	if (upat[0] == '*' && upat[1] == '\0')
 | 
			
		||||
		return UWILDMAT_MATCH;
 | 
			
		||||
	else
 | 
			
		||||
		return match_expression(utext, upat, true);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
/*
 | 
			
		||||
 **  User-level routine for simple expressions (neither , nor ! are special).
 | 
			
		||||
 */
 | 
			
		||||
bool
 | 
			
		||||
uwildmat_simple(const char *text, const char *pat)
 | 
			
		||||
{
 | 
			
		||||
	const unsigned char *utext = (const unsigned char *) text;
 | 
			
		||||
	const unsigned char *upat = (const unsigned char *) pat;
 | 
			
		||||
	size_t length;
 | 
			
		||||
 | 
			
		||||
	if (upat[0] == '*' && upat[1] == '\0')
 | 
			
		||||
		return true;
 | 
			
		||||
	else {
 | 
			
		||||
		length = strlen(pat);
 | 
			
		||||
		return (match_pattern(utext, upat, upat + length - 1) == true);
 | 
			
		||||
	}
 | 
			
		||||
}
 | 
			
		||||
							
								
								
									
										24
									
								
								lib/uwildmat/uwildmat.h
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										24
									
								
								lib/uwildmat/uwildmat.h
									
									
									
									
									
										Normal file
									
								
							@ -0,0 +1,24 @@
 | 
			
		||||
 | 
			
		||||
#ifndef UWILDMAT_H
 | 
			
		||||
#define UWILDMAT_H 1
 | 
			
		||||
 | 
			
		||||
#include <stdio.h>
 | 
			
		||||
#include <sys/types.h>
 | 
			
		||||
#include <stdbool.h>
 | 
			
		||||
 | 
			
		||||
/*
 | 
			
		||||
**  WILDMAT MATCHING
 | 
			
		||||
*/
 | 
			
		||||
enum uwildmat {
 | 
			
		||||
    UWILDMAT_FAIL   = 0,
 | 
			
		||||
    UWILDMAT_MATCH  = 1,
 | 
			
		||||
    UWILDMAT_POISON
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
extern bool             is_valid_utf8(const char *start);
 | 
			
		||||
extern bool             uwildmat(const char *text, const char *pat);
 | 
			
		||||
extern bool             uwildmat_simple(const char *text, const char *pat);
 | 
			
		||||
extern enum uwildmat    uwildmat_poison(const char *text, const char *pat);
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
#endif /* UWILDMAT_H */
 | 
			
		||||
		Loading…
	
	
			
			x
			
			
		
	
		Reference in New Issue
	
	Block a user