/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ /* vim: set ts=8 sts=2 et sw=2 tw=80: */ /* This Source Code Form is subject to the terms of the Mozilla Public * License, v. 2.0. If a copy of the MPL was not distributed with this * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ /* * * * * nsWildCard.cpp: shell-like wildcard match routines * * See nsIZipReader.findEntries documentation in nsIZipReader.idl for * a description of the syntax supported by the routines in this file. * * Rob McCool * */ #include "nsWildCard.h" #include "nsXPCOM.h" #include "nsCRTGlue.h" #include "nsCharTraits.h" /* -------------------- ASCII-specific character methods ------------------- */ typedef int static_assert_character_code_arrangement['a' > 'A' ? 1 : -1]; template<class T> static int alpha(T aChar) { return ('a' <= aChar && aChar <= 'z') || ('A' <= aChar && aChar <= 'Z'); } template<class T> static int alphanumeric(T aChar) { return ('0' <= aChar && aChar <= '9') || ::alpha(aChar); } template<class T> static int lower(T aChar) { return ('A' <= aChar && aChar <= 'Z') ? aChar + ('a' - 'A') : aChar; } template<class T> static int upper(T aChar) { return ('a' <= aChar && aChar <= 'z') ? aChar - ('a' - 'A') : aChar; } /* ----------------------------- _valid_subexp ---------------------------- */ template<class T> static int _valid_subexp(const T* aExpr, T aStop1, T aStop2) { int x; int nsc = 0; /* Number of special characters */ int np; /* Number of pipe characters in union */ int tld = 0; /* Number of tilde characters */ for (x = 0; aExpr[x] && (aExpr[x] != aStop1) && (aExpr[x] != aStop2); ++x) { switch (aExpr[x]) { case '~': if (tld) { /* at most one exclusion */ return INVALID_SXP; } if (aStop1) { /* no exclusions within unions */ return INVALID_SXP; } if (!aExpr[x + 1]) { /* exclusion cannot be last character */ return INVALID_SXP; } if (!x) { /* exclusion cannot be first character */ return INVALID_SXP; } ++tld; MOZ_FALLTHROUGH; case '*': case '?': case '$': ++nsc; break; case '[': ++nsc; if ((!aExpr[++x]) || (aExpr[x] == ']')) { return INVALID_SXP; } for (; aExpr[x] && (aExpr[x] != ']'); ++x) { if (aExpr[x] == '\\' && !aExpr[++x]) { return INVALID_SXP; } } if (!aExpr[x]) { return INVALID_SXP; } break; case '(': ++nsc; if (aStop1) { /* no nested unions */ return INVALID_SXP; } np = -1; do { int t = ::_valid_subexp(&aExpr[++x], T(')'), T('|')); if (t == 0 || t == INVALID_SXP) { return INVALID_SXP; } x += t; if (!aExpr[x]) { return INVALID_SXP; } ++np; } while (aExpr[x] == '|'); if (np < 1) { /* must be at least one pipe */ return INVALID_SXP; } break; case ')': case ']': case '|': return INVALID_SXP; case '\\': ++nsc; if (!aExpr[++x]) { return INVALID_SXP; } break; default: break; } } if (!aStop1 && !nsc) { /* must be at least one special character */ return NON_SXP; } return ((aExpr[x] == aStop1 || aExpr[x] == aStop2) ? x : INVALID_SXP); } template<class T> int NS_WildCardValid_(const T* aExpr) { int x = ::_valid_subexp(aExpr, T('\0'), T('\0')); return (x < 0 ? x : VALID_SXP); } int NS_WildCardValid(const char* aExpr) { return NS_WildCardValid_(aExpr); } int NS_WildCardValid(const char16_t* aExpr) { return NS_WildCardValid_(aExpr); } /* ----------------------------- _shexp_match ----------------------------- */ #define MATCH 0 #define NOMATCH 1 #define ABORTED -1 template<class T> static int _shexp_match(const T* aStr, const T* aExpr, bool aCaseInsensitive, unsigned int aLevel); /** * Count characters until we reach a NUL character or either of the * two delimiter characters, stop1 or stop2. If we encounter a bracketed * expression, look only for NUL or ']' inside it. Do not look for stop1 * or stop2 inside it. Return ABORTED if bracketed expression is unterminated. * Handle all escaping. * Return index in input string of first stop found, or ABORTED if not found. * If "dest" is non-nullptr, copy counted characters to it and null terminate. */ template<class T> static int _scan_and_copy(const T* aExpr, T aStop1, T aStop2, T* aDest) { int sx; /* source index */ T cc; for (sx = 0; (cc = aExpr[sx]) && cc != aStop1 && cc != aStop2; ++sx) { if (cc == '\\') { if (!aExpr[++sx]) { return ABORTED; /* should be impossible */ } } else if (cc == '[') { while ((cc = aExpr[++sx]) && cc != ']') { if (cc == '\\' && !aExpr[++sx]) { return ABORTED; } } if (!cc) { return ABORTED; /* should be impossible */ } } } if (aDest && sx) { /* Copy all but the closing delimiter. */ memcpy(aDest, aExpr, sx * sizeof(T)); aDest[sx] = 0; } return cc ? sx : ABORTED; /* index of closing delimiter */ } /* On input, expr[0] is the opening parenthesis of a union. * See if any of the alternatives in the union matches as a pattern. * The strategy is to take each of the alternatives, in turn, and append * the rest of the expression (after the closing ')' that marks the end of * this union) to that alternative, and then see if the resultant expression * matches the input string. Repeat this until some alternative matches, * or we have an abort. */ template<class T> static int _handle_union(const T* aStr, const T* aExpr, bool aCaseInsensitive, unsigned int aLevel) { int sx; /* source index */ int cp; /* source index of closing parenthesis */ int count; int ret = NOMATCH; T* e2; /* Find the closing parenthesis that ends this union in the expression */ cp = ::_scan_and_copy(aExpr, T(')'), T('\0'), static_cast<T*>(nullptr)); if (cp == ABORTED || cp < 4) { /* must be at least "(a|b" before ')' */ return ABORTED; } ++cp; /* now index of char after closing parenthesis */ e2 = (T*)moz_xmalloc((1 + nsCharTraits<T>::length(aExpr)) * sizeof(T)); if (!e2) { return ABORTED; } for (sx = 1; ; ++sx) { /* Here, aExpr[sx] is one character past the preceding '(' or '|'. */ /* Copy everything up to the next delimiter to e2 */ count = ::_scan_and_copy(aExpr + sx, T(')'), T('|'), e2); if (count == ABORTED || !count) { ret = ABORTED; break; } sx += count; /* Append everything after closing parenthesis to e2. This is safe. */ nsCharTraits<T>::copy(e2 + count, aExpr + cp, nsCharTraits<T>::length(aExpr + cp) + 1); ret = ::_shexp_match(aStr, e2, aCaseInsensitive, aLevel + 1); if (ret != NOMATCH || !aExpr[sx] || aExpr[sx] == ')') { break; } } free(e2); if (sx < 2) { ret = ABORTED; } return ret; } /* returns 1 if val is in range from start..end, case insensitive. */ static int _is_char_in_range(unsigned char aStart, unsigned char aEnd, unsigned char aVal) { char map[256]; memset(map, 0, sizeof(map)); while (aStart <= aEnd) { map[lower(aStart++)] = 1; } return map[lower(aVal)]; } template<class T> static int _shexp_match(const T* aStr, const T* aExpr, bool aCaseInsensitive, unsigned int aLevel) { int x; /* input string index */ int y; /* expression index */ int ret, neg; if (aLevel > 20) { /* Don't let the stack get too deep. */ return ABORTED; } for (x = 0, y = 0; aExpr[y]; ++y, ++x) { if (!aStr[x] && aExpr[y] != '$' && aExpr[y] != '*') { return NOMATCH; } switch (aExpr[y]) { case '$': if (aStr[x]) { return NOMATCH; } --x; /* we don't want loop to increment x */ break; case '*': while (aExpr[++y] == '*') { } if (!aExpr[y]) { return MATCH; } while (aStr[x]) { ret = ::_shexp_match(&aStr[x++], &aExpr[y], aCaseInsensitive, aLevel + 1); switch (ret) { case NOMATCH: continue; case ABORTED: return ABORTED; default: return MATCH; } } if (aExpr[y] == '$' && aExpr[y + 1] == '\0' && !aStr[x]) { return MATCH; } else { return NOMATCH; } case '[': { T start, end = 0; int i; ++y; neg = (aExpr[y] == '^' && aExpr[y + 1] != ']'); if (neg) { ++y; } i = y; start = aExpr[i++]; if (start == '\\') { start = aExpr[i++]; } if (::alphanumeric(start) && aExpr[i++] == '-') { end = aExpr[i++]; if (end == '\\') { end = aExpr[i++]; } } if (::alphanumeric(end) && aExpr[i] == ']') { /* This is a range form: a-b */ T val = aStr[x]; if (end < start) { /* swap them */ T tmp = end; end = start; start = tmp; } if (aCaseInsensitive && ::alpha(val)) { val = ::_is_char_in_range((unsigned char)start, (unsigned char)end, (unsigned char)val); if (neg == val) { return NOMATCH; } } else if (neg != (val < start || val > end)) { return NOMATCH; } y = i; } else { /* Not range form */ int matched = 0; for (; aExpr[y] != ']'; ++y) { if (aExpr[y] == '\\') { ++y; } if (aCaseInsensitive) { matched |= (::upper(aStr[x]) == ::upper(aExpr[y])); } else { matched |= (aStr[x] == aExpr[y]); } } if (neg == matched) { return NOMATCH; } } } break; case '(': if (!aExpr[y + 1]) { return ABORTED; } return ::_handle_union(&aStr[x], &aExpr[y], aCaseInsensitive, aLevel + 1); case '?': break; case ')': case ']': case '|': return ABORTED; case '\\': ++y; MOZ_FALLTHROUGH; default: if (aCaseInsensitive) { if (::upper(aStr[x]) != ::upper(aExpr[y])) { return NOMATCH; } } else { if (aStr[x] != aExpr[y]) { return NOMATCH; } } break; } } return (aStr[x] ? NOMATCH : MATCH); } template<class T> static int ns_WildCardMatch(const T* aStr, const T* aXp, bool aCaseInsensitive) { T* expr = nullptr; int ret = MATCH; if (!nsCharTraits<T>::find(aXp, nsCharTraits<T>::length(aXp), T('~'))) { return ::_shexp_match(aStr, aXp, aCaseInsensitive, 0); } expr = (T*)moz_xmalloc((nsCharTraits<T>::length(aXp) + 1) * sizeof(T)); if (!expr) { return NOMATCH; } memcpy(expr, aXp, (nsCharTraits<T>::length(aXp) + 1) * sizeof(T)); int x = ::_scan_and_copy(expr, T('~'), T('\0'), static_cast<T*>(nullptr)); if (x != ABORTED && expr[x] == '~') { expr[x++] = '\0'; ret = ::_shexp_match(aStr, &expr[x], aCaseInsensitive, 0); switch (ret) { case NOMATCH: ret = MATCH; break; case MATCH: ret = NOMATCH; break; default: break; } } if (ret == MATCH) { ret = ::_shexp_match(aStr, expr, aCaseInsensitive, 0); } free(expr); return ret; } template<class T> int NS_WildCardMatch_(const T* aStr, const T* aExpr, bool aCaseInsensitive) { int is_valid = NS_WildCardValid(aExpr); switch (is_valid) { case INVALID_SXP: return -1; default: return ::ns_WildCardMatch(aStr, aExpr, aCaseInsensitive); } } int NS_WildCardMatch(const char* aStr, const char* aXp, bool aCaseInsensitive) { return NS_WildCardMatch_(aStr, aXp, aCaseInsensitive); } int NS_WildCardMatch(const char16_t* aStr, const char16_t* aXp, bool aCaseInsensitive) { return NS_WildCardMatch_(aStr, aXp, aCaseInsensitive); }