parse.go 8.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369
  1. package httprule
  2. import (
  3. "fmt"
  4. "strings"
  5. )
  6. // InvalidTemplateError indicates that the path template is not valid.
  7. type InvalidTemplateError struct {
  8. tmpl string
  9. msg string
  10. }
  11. func (e InvalidTemplateError) Error() string {
  12. return fmt.Sprintf("%s: %s", e.msg, e.tmpl)
  13. }
  14. // Parse parses the string representation of path template
  15. func Parse(tmpl string) (Compiler, error) {
  16. if !strings.HasPrefix(tmpl, "/") {
  17. return template{}, InvalidTemplateError{tmpl: tmpl, msg: "no leading /"}
  18. }
  19. tokens, verb := tokenize(tmpl[1:])
  20. p := parser{tokens: tokens}
  21. segs, err := p.topLevelSegments()
  22. if err != nil {
  23. return template{}, InvalidTemplateError{tmpl: tmpl, msg: err.Error()}
  24. }
  25. return template{
  26. segments: segs,
  27. verb: verb,
  28. template: tmpl,
  29. }, nil
  30. }
  31. func tokenize(path string) (tokens []string, verb string) {
  32. if path == "" {
  33. return []string{eof}, ""
  34. }
  35. const (
  36. init = iota
  37. field
  38. nested
  39. )
  40. st := init
  41. for path != "" {
  42. var idx int
  43. switch st {
  44. case init:
  45. idx = strings.IndexAny(path, "/{")
  46. case field:
  47. idx = strings.IndexAny(path, ".=}")
  48. case nested:
  49. idx = strings.IndexAny(path, "/}")
  50. }
  51. if idx < 0 {
  52. tokens = append(tokens, path)
  53. break
  54. }
  55. switch r := path[idx]; r {
  56. case '/', '.':
  57. case '{':
  58. st = field
  59. case '=':
  60. st = nested
  61. case '}':
  62. st = init
  63. }
  64. if idx == 0 {
  65. tokens = append(tokens, path[idx:idx+1])
  66. } else {
  67. tokens = append(tokens, path[:idx], path[idx:idx+1])
  68. }
  69. path = path[idx+1:]
  70. }
  71. l := len(tokens)
  72. // See
  73. // https://github.com/grpc-ecosystem/grpc-gateway/pull/1947#issuecomment-774523693 ;
  74. // although normal and backwards-compat logic here is to use the last index
  75. // of a colon, if the final segment is a variable followed by a colon, the
  76. // part following the colon must be a verb. Hence if the previous token is
  77. // an end var marker, we switch the index we're looking for to Index instead
  78. // of LastIndex, so that we correctly grab the remaining part of the path as
  79. // the verb.
  80. var penultimateTokenIsEndVar bool
  81. switch l {
  82. case 0, 1:
  83. // Not enough to be variable so skip this logic and don't result in an
  84. // invalid index
  85. default:
  86. penultimateTokenIsEndVar = tokens[l-2] == "}"
  87. }
  88. t := tokens[l-1]
  89. var idx int
  90. if penultimateTokenIsEndVar {
  91. idx = strings.Index(t, ":")
  92. } else {
  93. idx = strings.LastIndex(t, ":")
  94. }
  95. if idx == 0 {
  96. tokens, verb = tokens[:l-1], t[1:]
  97. } else if idx > 0 {
  98. tokens[l-1], verb = t[:idx], t[idx+1:]
  99. }
  100. tokens = append(tokens, eof)
  101. return tokens, verb
  102. }
  103. // parser is a parser of the template syntax defined in github.com/googleapis/googleapis/google/api/http.proto.
  104. type parser struct {
  105. tokens []string
  106. accepted []string
  107. }
  108. // topLevelSegments is the target of this parser.
  109. func (p *parser) topLevelSegments() ([]segment, error) {
  110. if _, err := p.accept(typeEOF); err == nil {
  111. p.tokens = p.tokens[:0]
  112. return []segment{literal(eof)}, nil
  113. }
  114. segs, err := p.segments()
  115. if err != nil {
  116. return nil, err
  117. }
  118. if _, err := p.accept(typeEOF); err != nil {
  119. return nil, fmt.Errorf("unexpected token %q after segments %q", p.tokens[0], strings.Join(p.accepted, ""))
  120. }
  121. return segs, nil
  122. }
  123. func (p *parser) segments() ([]segment, error) {
  124. s, err := p.segment()
  125. if err != nil {
  126. return nil, err
  127. }
  128. segs := []segment{s}
  129. for {
  130. if _, err := p.accept("/"); err != nil {
  131. return segs, nil
  132. }
  133. s, err := p.segment()
  134. if err != nil {
  135. return segs, err
  136. }
  137. segs = append(segs, s)
  138. }
  139. }
  140. func (p *parser) segment() (segment, error) {
  141. if _, err := p.accept("*"); err == nil {
  142. return wildcard{}, nil
  143. }
  144. if _, err := p.accept("**"); err == nil {
  145. return deepWildcard{}, nil
  146. }
  147. if l, err := p.literal(); err == nil {
  148. return l, nil
  149. }
  150. v, err := p.variable()
  151. if err != nil {
  152. return nil, fmt.Errorf("segment neither wildcards, literal or variable: %v", err)
  153. }
  154. return v, err
  155. }
  156. func (p *parser) literal() (segment, error) {
  157. lit, err := p.accept(typeLiteral)
  158. if err != nil {
  159. return nil, err
  160. }
  161. return literal(lit), nil
  162. }
  163. func (p *parser) variable() (segment, error) {
  164. if _, err := p.accept("{"); err != nil {
  165. return nil, err
  166. }
  167. path, err := p.fieldPath()
  168. if err != nil {
  169. return nil, err
  170. }
  171. var segs []segment
  172. if _, err := p.accept("="); err == nil {
  173. segs, err = p.segments()
  174. if err != nil {
  175. return nil, fmt.Errorf("invalid segment in variable %q: %v", path, err)
  176. }
  177. } else {
  178. segs = []segment{wildcard{}}
  179. }
  180. if _, err := p.accept("}"); err != nil {
  181. return nil, fmt.Errorf("unterminated variable segment: %s", path)
  182. }
  183. return variable{
  184. path: path,
  185. segments: segs,
  186. }, nil
  187. }
  188. func (p *parser) fieldPath() (string, error) {
  189. c, err := p.accept(typeIdent)
  190. if err != nil {
  191. return "", err
  192. }
  193. components := []string{c}
  194. for {
  195. if _, err = p.accept("."); err != nil {
  196. return strings.Join(components, "."), nil
  197. }
  198. c, err := p.accept(typeIdent)
  199. if err != nil {
  200. return "", fmt.Errorf("invalid field path component: %v", err)
  201. }
  202. components = append(components, c)
  203. }
  204. }
  205. // A termType is a type of terminal symbols.
  206. type termType string
  207. // These constants define some of valid values of termType.
  208. // They improve readability of parse functions.
  209. //
  210. // You can also use "/", "*", "**", "." or "=" as valid values.
  211. const (
  212. typeIdent = termType("ident")
  213. typeLiteral = termType("literal")
  214. typeEOF = termType("$")
  215. )
  216. const (
  217. // eof is the terminal symbol which always appears at the end of token sequence.
  218. eof = "\u0000"
  219. )
  220. // accept tries to accept a token in "p".
  221. // This function consumes a token and returns it if it matches to the specified "term".
  222. // If it doesn't match, the function does not consume any tokens and return an error.
  223. func (p *parser) accept(term termType) (string, error) {
  224. t := p.tokens[0]
  225. switch term {
  226. case "/", "*", "**", ".", "=", "{", "}":
  227. if t != string(term) && t != "/" {
  228. return "", fmt.Errorf("expected %q but got %q", term, t)
  229. }
  230. case typeEOF:
  231. if t != eof {
  232. return "", fmt.Errorf("expected EOF but got %q", t)
  233. }
  234. case typeIdent:
  235. if err := expectIdent(t); err != nil {
  236. return "", err
  237. }
  238. case typeLiteral:
  239. if err := expectPChars(t); err != nil {
  240. return "", err
  241. }
  242. default:
  243. return "", fmt.Errorf("unknown termType %q", term)
  244. }
  245. p.tokens = p.tokens[1:]
  246. p.accepted = append(p.accepted, t)
  247. return t, nil
  248. }
  249. // expectPChars determines if "t" consists of only pchars defined in RFC3986.
  250. //
  251. // https://www.ietf.org/rfc/rfc3986.txt, P.49
  252. //
  253. // pchar = unreserved / pct-encoded / sub-delims / ":" / "@"
  254. // unreserved = ALPHA / DIGIT / "-" / "." / "_" / "~"
  255. // sub-delims = "!" / "$" / "&" / "'" / "(" / ")"
  256. // / "*" / "+" / "," / ";" / "="
  257. // pct-encoded = "%" HEXDIG HEXDIG
  258. func expectPChars(t string) error {
  259. const (
  260. init = iota
  261. pct1
  262. pct2
  263. )
  264. st := init
  265. for _, r := range t {
  266. if st != init {
  267. if !isHexDigit(r) {
  268. return fmt.Errorf("invalid hexdigit: %c(%U)", r, r)
  269. }
  270. switch st {
  271. case pct1:
  272. st = pct2
  273. case pct2:
  274. st = init
  275. }
  276. continue
  277. }
  278. // unreserved
  279. switch {
  280. case 'A' <= r && r <= 'Z':
  281. continue
  282. case 'a' <= r && r <= 'z':
  283. continue
  284. case '0' <= r && r <= '9':
  285. continue
  286. }
  287. switch r {
  288. case '-', '.', '_', '~':
  289. // unreserved
  290. case '!', '$', '&', '\'', '(', ')', '*', '+', ',', ';', '=':
  291. // sub-delims
  292. case ':', '@':
  293. // rest of pchar
  294. case '%':
  295. // pct-encoded
  296. st = pct1
  297. default:
  298. return fmt.Errorf("invalid character in path segment: %q(%U)", r, r)
  299. }
  300. }
  301. if st != init {
  302. return fmt.Errorf("invalid percent-encoding in %q", t)
  303. }
  304. return nil
  305. }
  306. // expectIdent determines if "ident" is a valid identifier in .proto schema ([[:alpha:]_][[:alphanum:]_]*).
  307. func expectIdent(ident string) error {
  308. if ident == "" {
  309. return fmt.Errorf("empty identifier")
  310. }
  311. for pos, r := range ident {
  312. switch {
  313. case '0' <= r && r <= '9':
  314. if pos == 0 {
  315. return fmt.Errorf("identifier starting with digit: %s", ident)
  316. }
  317. continue
  318. case 'A' <= r && r <= 'Z':
  319. continue
  320. case 'a' <= r && r <= 'z':
  321. continue
  322. case r == '_':
  323. continue
  324. default:
  325. return fmt.Errorf("invalid character %q(%U) in identifier: %s", r, r, ident)
  326. }
  327. }
  328. return nil
  329. }
  330. func isHexDigit(r rune) bool {
  331. switch {
  332. case '0' <= r && r <= '9':
  333. return true
  334. case 'A' <= r && r <= 'F':
  335. return true
  336. case 'a' <= r && r <= 'f':
  337. return true
  338. }
  339. return false
  340. }