Merge pull request #416 from greatroar/match-cleanup

Clean up matching/globbing code
This commit is contained in:
Cassondra Foesch 2021-03-16 01:20:24 +00:00 committed by GitHub
commit 460ad57385
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 22 additions and 179 deletions

View File

@ -1935,6 +1935,7 @@ var globTests = []struct {
{"match.go", "match.go"},
{"mat?h.go", "match.go"},
{"ma*ch.go", "match.go"},
{`\m\a\t\c\h\.\g\o`, "match.go"},
{"../*/match.go", "../sftp/match.go"},
}

200
match.go
View File

@ -3,198 +3,39 @@ package sftp
import (
"path"
"strings"
"unicode/utf8"
)
// ErrBadPattern indicates a globbing pattern was malformed.
var ErrBadPattern = path.ErrBadPattern
// Unix separator
const separator = "/"
// Match reports whether name matches the shell file name pattern.
// The pattern syntax is:
//
// pattern:
// { term }
// term:
// '*' matches any sequence of non-Separator characters
// '?' matches any single non-Separator character
// '[' [ '^' ] { character-range } ']'
// character class (must be non-empty)
// c matches character c (c != '*', '?', '\\', '[')
// '\\' c matches character c
//
// character-range:
// c matches character c (c != '\\', '-', ']')
// '\\' c matches character c
// lo '-' hi matches character c for lo <= c <= hi
//
// Match requires pattern to match all of name, not just a substring.
// The only possible returned error is ErrBadPattern, when pattern
// is malformed.
//
// Match reports whether name matches the shell pattern.
//
// This is an alias for path.Match from the standard library,
// offered so that callers need not import the path package.
// For details, see https://golang.org/pkg/path/#Match.
func Match(pattern, name string) (matched bool, err error) {
return path.Match(pattern, name)
}
// detect if byte(char) is path separator
func isPathSeparator(c byte) bool {
return string(c) == "/"
return c == '/'
}
// scanChunk gets the next segment of pattern, which is a non-star string
// possibly preceded by a star.
func scanChunk(pattern string) (star bool, chunk, rest string) {
for len(pattern) > 0 && pattern[0] == '*' {
pattern = pattern[1:]
star = true
}
inrange := false
var i int
Scan:
for i = 0; i < len(pattern); i++ {
switch pattern[i] {
case '\\':
// error check handled in matchChunk: bad pattern.
if i+1 < len(pattern) {
i++
}
case '[':
inrange = true
case ']':
inrange = false
case '*':
if !inrange {
break Scan
}
}
}
return star, pattern[0:i], pattern[i:]
}
// matchChunk checks whether chunk matches the beginning of s.
// If so, it returns the remainder of s (after the match).
// Chunk is all single-character operators: literals, char classes, and ?.
func matchChunk(chunk, s string) (rest string, ok bool, err error) {
for len(chunk) > 0 {
if len(s) == 0 {
return
}
switch chunk[0] {
case '[':
// character class
r, n := utf8.DecodeRuneInString(s)
s = s[n:]
chunk = chunk[1:]
// We can't end right after '[', we're expecting at least
// a closing bracket and possibly a caret.
if len(chunk) == 0 {
err = ErrBadPattern
return
}
// possibly negated
negated := chunk[0] == '^'
if negated {
chunk = chunk[1:]
}
// parse all ranges
match := false
nrange := 0
for {
if len(chunk) > 0 && chunk[0] == ']' && nrange > 0 {
chunk = chunk[1:]
break
}
var lo, hi rune
if lo, chunk, err = getEsc(chunk); err != nil {
return
}
hi = lo
if chunk[0] == '-' {
if hi, chunk, err = getEsc(chunk[1:]); err != nil {
return
}
}
if lo <= r && r <= hi {
match = true
}
nrange++
}
if match == negated {
return
}
case '?':
if isPathSeparator(s[0]) {
return
}
_, n := utf8.DecodeRuneInString(s)
s = s[n:]
chunk = chunk[1:]
case '\\':
chunk = chunk[1:]
if len(chunk) == 0 {
err = ErrBadPattern
return
}
fallthrough
default:
if chunk[0] != s[0] {
return
}
s = s[1:]
chunk = chunk[1:]
}
}
return s, true, nil
}
// getEsc gets a possibly-escaped character from chunk, for a character class.
func getEsc(chunk string) (r rune, nchunk string, err error) {
if len(chunk) == 0 || chunk[0] == '-' || chunk[0] == ']' {
err = ErrBadPattern
return
}
if chunk[0] == '\\' {
chunk = chunk[1:]
if len(chunk) == 0 {
err = ErrBadPattern
return
}
}
r, n := utf8.DecodeRuneInString(chunk)
if r == utf8.RuneError && n == 1 {
err = ErrBadPattern
}
nchunk = chunk[n:]
if len(nchunk) == 0 {
err = ErrBadPattern
}
return
}
// Split splits path immediately following the final Separator,
// Split splits the path p immediately following the final slash,
// separating it into a directory and file name component.
// If there is no Separator in path, Split returns an empty dir
// and file set to path.
// The returned values have the property that path = dir+file.
func Split(path string) (dir, file string) {
i := len(path) - 1
for i >= 0 && !isPathSeparator(path[i]) {
i--
}
return path[:i+1], path[i+1:]
//
// This is an alias for path.Split from the standard library,
// offered so that callers need not import the path package.
// For details, see https://golang.org/pkg/path/#Split.
func Split(p string) (dir, file string) {
return path.Split(p)
}
// Glob returns the names of all files matching pattern or nil
// if there is no matching file. The syntax of patterns is the same
// as in Match. The pattern may describe hierarchical names such as
// /usr/*/bin/ed (assuming the Separator is '/').
// /usr/*/bin/ed.
//
// Glob ignores file system errors such as I/O errors reading directories.
// The only possible returned error is ErrBadPattern, when pattern
@ -241,8 +82,7 @@ func cleanGlobPath(path string) string {
switch path {
case "":
return "."
case string(separator):
// do nothing to the path
case "/":
return path
default:
return path[0 : len(path)-1] // chop off trailing separator
@ -280,9 +120,12 @@ func (c *Client) glob(dir, pattern string, matches []string) (m []string, e erro
return
}
// Join joins any number of path elements into a single path, adding
// a Separator if necessary.
// all empty strings are ignored.
// Join joins any number of path elements into a single path, separating
// them with slashes.
//
// This is an alias for path.Join from the standard library,
// offered so that callers need not import the path package.
// For details, see https://golang.org/pkg/path/#Join.
func Join(elem ...string) string {
return path.Join(elem...)
}
@ -290,6 +133,5 @@ func Join(elem ...string) string {
// hasMeta reports whether path contains any of the magic characters
// recognized by Match.
func hasMeta(path string) bool {
// TODO(niemeyer): Should other magic characters be added here?
return strings.ContainsAny(path, "*?[")
return strings.ContainsAny(path, "\\*?[")
}