/*
 * Changes by Gunnar Ritter, Freiburg i. Br., Germany, November 2002.
 *
 * Sccsid @(#)re.h	1.15 (gritter) 2/6/05
 */
/*  UNIX(R) Regular Expresssion Library
 *
 *  Note: Code is released under the GNU LGPL
 *
 *  Copyright (C) 2001 Caldera International, Inc.
 *
 *  This library is free software; you can redistribute it and/or
 *  modify it under the terms of the GNU Lesser General Public
 *  License as published by the Free Software Foundation; either
 *  version 2 of the License, or (at your option) any later version.
 *
 *  This library is distributed in the hope that it will be useful,
 *  but WITHOUT ANY WARRANTY; without even the implied warranty of
 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 *  Lesser General Public License for more details.
 *
 *  You should have received a copy of the GNU Lesser General Public
 *  License along with this library; if not, write to:
 *        Free Software Foundation, Inc.
 *        59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
 */

#ifndef	LIBUXRE_RE_H
#define	LIBUXRE_RE_H

	/*
	* Maps safe external tag to internal one
	*/
#define re_coll_	lc_collate	/* <regex.h> */
/*	#define __fnm_collate	lc_collate	*/	/* <fnmatch.h> */

#include <limits.h>
#include <regex.h>
/*	#include <fnmatch.h>	*/
#include <colldata.h>

#define NBSHT	(sizeof(unsigned short) * CHAR_BIT)
#define NBYTE	(((1 << CHAR_BIT) + NBSHT - 1) / NBSHT)
#define NTYPE	4
#define NWIDE	32
#define NQUIV	4

typedef struct
{
	struct lc_collate	*col;	/* only member set by caller */
	wctype_t		*extype;
	wuchar_type		*exquiv;
	wchar_t			*exwide;
	wctype_t		type[NTYPE];
	wuchar_type		quiv[NQUIV];
	wchar_t			wide[NWIDE];
	unsigned short		byte[NBYTE];
	unsigned short		ntype;
	unsigned short		nquiv;
	unsigned short		nwide;
	unsigned int		flags;
} Bracket;

#define BKT_NEGATED	0x001	/* complemented set */
#define BKT_ONECASE	0x002	/* uppercase same as lowercase */
#define BKT_NOTNL	0x004	/* do not match newline when BKT_NEGATED */
#define BKT_BADRANGE	0x008	/* accept [m-a] ranges as [ma] */
#define BKT_SEPRANGE	0x010	/* disallow [a-m-z] style ranges */
#define BKT_NLBAD	0x020	/* newline disallowed */
#define BKT_SLASHBAD	0x040	/* slash disallowed (for pathnames) */
#define BKT_EMPTY	0x080	/* take leading ] is end (empty set) */
#define BKT_ESCAPE	0x100	/* allow \ as quote for next anything */
#define BKT_QUOTE	0x200	/* allow \ as quote for \\, \^, \- or \] */
#define BKT_ESCNL	0x400	/* take \n as the newline character */
#define BKT_ESCSEQ	0x800	/* otherwise, take \ as in C escapes */
#define	BKT_ODDRANGE	0x1000	/* oawk oddity: [m-a] means [m] */
#define	BKT_NOI18N	0x2000	/* disable [::] [==] [..] */
#define	BKT_OLDESC	0x4000	/* enable \b \f \n \r \t only */

	/*
	* These error returns for libuxre_bktmbcomp() are directly tied to
	* the error returns for regcomp() for convenience.
	*/
#define BKT_BADPAT	(-REG_BADPAT)
#define BKT_ECOLLATE	(-REG_ECOLLATE)
#define BKT_ECTYPE	(-REG_ECTYPE)
#define BKT_EEQUIV	(-REG_EEQUIV)
#define BKT_BADCHAR	(-REG_EBKTCHAR)
#define BKT_EBRACK	(-REG_EBRACK)
#define BKT_EMPTYSUBBKT	(-REG_EMPTYSUBBKT)
#define BKT_ERANGE	(-REG_ERANGE)
#define BKT_ESPACE	(-REG_ESPACE)
#define BKT_BADESC	(-REG_BADESC)
#define	BKT_ILLSEQ	(-REG_ILLSEQ)

	/*
	* These must be distinct from the flags in <fnmatch.h>.
	*/
#define FNM_COLLATE	0x2000	/* have collation information */
#define FNM_CURRENT	0x4000	/* have full-sized fnm_t structure */

	/*
	* These must be distinct from the flags in <regex.h>.
	*/
#define REG_NFA		0x20000000
#define REG_DFA		0x40000000
#define REG_GOTBKT	0x80000000

#define BRACE_INF	USHRT_MAX
#define BRACE_MAX	5100	/* arbitrary number < SHRT_MAX */
#define BRACE_DFAMAX	255	/* max amount for r.e. duplication */

typedef union	/* extra info always kept for some tokens/nodes */
{
	Bracket		*bkt;	/* ROP_BKT */
	size_t		sub;	/* ROP_LP (ROP_RP), ROP_REF */
	unsigned short	num[2];	/* ROP_BRACE: num[0]=low, num[1]=high */
} Info;

typedef struct	/* lexical context while parsing */
{
	Info			info;
	const unsigned char	*pat;
	unsigned char		*clist;
	struct lc_collate	*col;
	unsigned long		flags;
	w_type			tok;
	size_t			maxref;
	size_t			nleft;
	size_t			nright;
	size_t			nclist;
	int			bktflags;
	int			err;
	int			mb_cur_max;
} Lex;

typedef struct t_tree	Tree;	/* RE parse tree node */
struct t_tree
{
	union
	{
		Tree	*ptr;	/* unary & binary nodes */
		size_t	pos;	/* position for DFA leaves */
	} left;
	union
	{
		Tree	*ptr;	/* binary nodes */
		Info	info;
	} right;
	Tree		*parent;
	w_type		op;	/* positive => char. to match */
};

typedef struct re_dfa_	Dfa;	/* DFA engine description */
typedef struct re_nfa_	Nfa;	/* NFA engine description */

typedef struct
{
	const unsigned char	*str;
	regmatch_t		*match;
	size_t			nmatch;
	unsigned long		flags;
	int			mb_cur_max;
} Exec;

	/*
	* Regular expression operators.  Some only used internally.
	* All are negative, to distinguish them from the regular
	* "match this particular wide character" operation.
	*/
#define BINARY_ROP	0x02
#define UNARY_ROP	0x01
#define LEAF_ROP	0x00

#define MAKE_ROP(k, v)	(-((v) | ((k) << 4)))
#define KIND_ROP(v)	((-(v)) >> 4)

#define ROP_OR		MAKE_ROP(BINARY_ROP, 1)
#define ROP_CAT		MAKE_ROP(BINARY_ROP, 2)

#define ROP_STAR	MAKE_ROP(UNARY_ROP, 1)
#define ROP_PLUS	MAKE_ROP(UNARY_ROP, 2)
#define ROP_QUEST	MAKE_ROP(UNARY_ROP, 3)
#define ROP_BRACE	MAKE_ROP(UNARY_ROP, 4)
#define ROP_LP		MAKE_ROP(UNARY_ROP, 5)
#define ROP_RP		MAKE_ROP(UNARY_ROP, 6)

#define ROP_NOP		MAKE_ROP(LEAF_ROP, 1)	/* temporary */
#define ROP_BOL		MAKE_ROP(LEAF_ROP, 2)	/* ^ anchor */
#define ROP_EOL		MAKE_ROP(LEAF_ROP, 3)	/* $ anchor */
#define ROP_ALL		MAKE_ROP(LEAF_ROP, 4)	/* anything (added) */
#define ROP_ANYCH	MAKE_ROP(LEAF_ROP, 5)	/* . w/\n */
#define ROP_NOTNL	MAKE_ROP(LEAF_ROP, 6)	/* . w/out \n */
#define ROP_EMPTY	MAKE_ROP(LEAF_ROP, 7)	/* empty string */
#define ROP_NONE	MAKE_ROP(LEAF_ROP, 8)	/* match failure */
#define ROP_BKT		MAKE_ROP(LEAF_ROP, 9)	/* [...] */
#define ROP_BKTCOPY	MAKE_ROP(LEAF_ROP, 10)	/* [...] (duplicated) */
#define ROP_LT		MAKE_ROP(LEAF_ROP, 11)	/* \< word begin */
#define ROP_GT		MAKE_ROP(LEAF_ROP, 12)	/* \> word end */
#define ROP_REF		MAKE_ROP(LEAF_ROP, 13)	/* \digit */
#define ROP_END		MAKE_ROP(LEAF_ROP, 14)	/* final (added) */

	/*
	* Return values:
	*  libuxre_bktmbcomp()
	*	<0 error (see BKT_* above); >0 #bytes scanned
	*  libuxre_bktmbexec()
	*	<0 doesn't match; >=0 matches, #extra bytes scanned
	*/
LIBUXRE_STATIC void	libuxre_bktfree(Bracket *);
LIBUXRE_STATIC int	libuxre_bktmbcomp(Bracket *, const unsigned char *,
				int, int);
LIBUXRE_STATIC int	libuxre_bktmbexec(Bracket *, wchar_t,
				const unsigned char *, int);

LIBUXRE_STATIC void	libuxre_regdeltree(Tree *, int);
LIBUXRE_STATIC Tree	*libuxre_reg1tree(w_type, Tree *);
LIBUXRE_STATIC Tree	*libuxre_reg2tree(w_type, Tree *, Tree *);
LIBUXRE_STATIC Tree	*libuxre_regparse(Lex *, const unsigned char *, int);

extern void		libuxre_regdeldfa(Dfa *);
LIBUXRE_STATIC int	libuxre_regdfacomp(regex_t *, Tree *, Lex *);
LIBUXRE_STATIC int	libuxre_regdfaexec(Dfa *, Exec *);

extern void		libuxre_regdelnfa(Nfa *);
LIBUXRE_STATIC int	libuxre_regnfacomp(regex_t *, Tree *, Lex *);
LIBUXRE_STATIC int	libuxre_regnfaexec(Nfa *, Exec *);
#endif	/* !LIBUXRE_RE_H */


syntax highlighted by Code2HTML, v. 0.9.1