uri.h Source File

uri.h Source File#

Composable Kernel: uri.h Source File
uri.h
Go to the documentation of this file.
1// Tencent is pleased to support the open source community by making RapidJSON available.
2//
3// (C) Copyright IBM Corporation 2021
4//
5// Licensed under the MIT License (the "License"); you may not use this file except
6// in compliance with the License. You may obtain a copy of the License at
7//
8// http://opensource.org/licenses/MIT
9//
10// Unless required by applicable law or agreed to in writing, software distributed
11// under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
12// CONDITIONS OF ANY KIND, either express or implied. See the License for the
13// specific language governing permissions and limitations under the License.
14
15#ifndef RAPIDJSON_URI_H_
16#define RAPIDJSON_URI_H_
17
18#include "internal/strfunc.h"
19
20#if defined(__clang__)
21RAPIDJSON_DIAG_PUSH
22RAPIDJSON_DIAG_OFF(c++ 98 - compat)
23#elif defined(_MSC_VER)
24RAPIDJSON_DIAG_OFF(4512) // assignment operator could not be generated
25#endif
26
28
30// GenericUri
31
32template <typename ValueType, typename Allocator = CrtAllocator>
34{
35 public:
36 typedef typename ValueType::Ch Ch;
37#if RAPIDJSON_HAS_STDSTRING
38 typedef std::basic_string<Ch> String;
39#endif
40
42 GenericUri(Allocator* allocator = 0)
43 : uri_(),
44 base_(),
45 scheme_(),
46 auth_(),
47 path_(),
48 query_(),
49 frag_(),
50 allocator_(allocator),
51 ownAllocator_()
52 {
53 }
54
55 GenericUri(const Ch* uri, SizeType len, Allocator* allocator = 0)
56 : uri_(),
57 base_(),
58 scheme_(),
59 auth_(),
60 path_(),
61 query_(),
62 frag_(),
63 allocator_(allocator),
64 ownAllocator_()
65 {
66 Parse(uri, len);
67 }
68
69 GenericUri(const Ch* uri, Allocator* allocator = 0)
70 : uri_(),
71 base_(),
72 scheme_(),
73 auth_(),
74 path_(),
75 query_(),
76 frag_(),
77 allocator_(allocator),
78 ownAllocator_()
79 {
80 Parse(uri, internal::StrLen<Ch>(uri));
81 }
82
83 // Use with specializations of GenericValue
84 template <typename T>
85 GenericUri(const T& uri, Allocator* allocator = 0)
86 : uri_(),
87 base_(),
88 scheme_(),
89 auth_(),
90 path_(),
91 query_(),
92 frag_(),
93 allocator_(allocator),
94 ownAllocator_()
95 {
96 const Ch* u = uri.template Get<const Ch*>(); // TypeHelper from document.h
97 Parse(u, internal::StrLen<Ch>(u));
98 }
99
100#if RAPIDJSON_HAS_STDSTRING
101 GenericUri(const String& uri, Allocator* allocator = 0)
102 : uri_(),
103 base_(),
104 scheme_(),
105 auth_(),
106 path_(),
107 query_(),
108 frag_(),
109 allocator_(allocator),
110 ownAllocator_()
111 {
112 Parse(uri.c_str(), internal::StrLen<Ch>(uri.c_str()));
113 }
114#endif
115
118 : uri_(),
119 base_(),
120 scheme_(),
121 auth_(),
122 path_(),
123 query_(),
124 frag_(),
125 allocator_(),
126 ownAllocator_()
127 {
128 *this = rhs;
129 }
130
132 GenericUri(const GenericUri& rhs, Allocator* allocator)
133 : uri_(),
134 base_(),
135 scheme_(),
136 auth_(),
137 path_(),
138 query_(),
139 frag_(),
140 allocator_(allocator),
141 ownAllocator_()
142 {
143 *this = rhs;
144 }
145
148 {
149 Free();
150 RAPIDJSON_DELETE(ownAllocator_);
151 }
152
155 {
156 if(this != &rhs)
157 {
158 // Do not delete ownAllocator
159 Free();
160 Allocate(rhs.GetStringLength());
161 auth_ = CopyPart(scheme_, rhs.scheme_, rhs.GetSchemeStringLength());
162 path_ = CopyPart(auth_, rhs.auth_, rhs.GetAuthStringLength());
163 query_ = CopyPart(path_, rhs.path_, rhs.GetPathStringLength());
164 frag_ = CopyPart(query_, rhs.query_, rhs.GetQueryStringLength());
165 base_ = CopyPart(frag_, rhs.frag_, rhs.GetFragStringLength());
166 uri_ = CopyPart(base_, rhs.base_, rhs.GetBaseStringLength());
167 CopyPart(uri_, rhs.uri_, rhs.GetStringLength());
168 }
169 return *this;
170 }
171
173 // Use with specializations of GenericValue
174 template <typename T>
175 void Get(T& uri, Allocator& allocator)
176 {
177 uri.template Set<const Ch*>(this->GetString(), allocator); // TypeHelper from document.h
178 }
179
180 const Ch* GetString() const { return uri_; }
181 SizeType GetStringLength() const { return uri_ == 0 ? 0 : internal::StrLen<Ch>(uri_); }
182 const Ch* GetBaseString() const { return base_; }
183 SizeType GetBaseStringLength() const { return base_ == 0 ? 0 : internal::StrLen<Ch>(base_); }
184 const Ch* GetSchemeString() const { return scheme_; }
186 {
187 return scheme_ == 0 ? 0 : internal::StrLen<Ch>(scheme_);
188 }
189 const Ch* GetAuthString() const { return auth_; }
190 SizeType GetAuthStringLength() const { return auth_ == 0 ? 0 : internal::StrLen<Ch>(auth_); }
191 const Ch* GetPathString() const { return path_; }
192 SizeType GetPathStringLength() const { return path_ == 0 ? 0 : internal::StrLen<Ch>(path_); }
193 const Ch* GetQueryString() const { return query_; }
194 SizeType GetQueryStringLength() const { return query_ == 0 ? 0 : internal::StrLen<Ch>(query_); }
195 const Ch* GetFragString() const { return frag_; }
196 SizeType GetFragStringLength() const { return frag_ == 0 ? 0 : internal::StrLen<Ch>(frag_); }
197
198#if RAPIDJSON_HAS_STDSTRING
199 static String Get(const GenericUri& uri)
200 {
201 return String(uri.GetString(), uri.GetStringLength());
202 }
203 static String GetBase(const GenericUri& uri)
204 {
205 return String(uri.GetBaseString(), uri.GetBaseStringLength());
206 }
207 static String GetScheme(const GenericUri& uri)
208 {
209 return String(uri.GetSchemeString(), uri.GetSchemeStringLength());
210 }
211 static String GetAuth(const GenericUri& uri)
212 {
213 return String(uri.GetAuthString(), uri.GetAuthStringLength());
214 }
215 static String GetPath(const GenericUri& uri)
216 {
217 return String(uri.GetPathString(), uri.GetPathStringLength());
218 }
219 static String GetQuery(const GenericUri& uri)
220 {
221 return String(uri.GetQueryString(), uri.GetQueryStringLength());
222 }
223 static String GetFrag(const GenericUri& uri)
224 {
225 return String(uri.GetFragString(), uri.GetFragStringLength());
226 }
227#endif
228
230 bool operator==(const GenericUri& rhs) const { return Match(rhs, true); }
231
232 bool operator!=(const GenericUri& rhs) const { return !Match(rhs, true); }
233
234 bool Match(const GenericUri& uri, bool full = true) const
235 {
236 Ch* s1;
237 Ch* s2;
238 if(full)
239 {
240 s1 = uri_;
241 s2 = uri.uri_;
242 }
243 else
244 {
245 s1 = base_;
246 s2 = uri.base_;
247 }
248 if(s1 == s2)
249 return true;
250 if(s1 == 0 || s2 == 0)
251 return false;
252 return internal::StrCmp<Ch>(s1, s2) == 0;
253 }
254
256 // See https://tools.ietf.org/html/rfc3986
257 // Use for resolving an id or $ref with an in-scope id.
258 // Returns a new GenericUri for the resolved URI.
259 GenericUri Resolve(const GenericUri& baseuri, Allocator* allocator = 0)
260 {
261 GenericUri resuri;
262 resuri.allocator_ = allocator;
263 // Ensure enough space for combining paths
264 resuri.Allocate(GetStringLength() + baseuri.GetStringLength() + 1); // + 1 for joining slash
265
266 if(!(GetSchemeStringLength() == 0))
267 {
268 // Use all of this URI
269 resuri.auth_ = CopyPart(resuri.scheme_, scheme_, GetSchemeStringLength());
270 resuri.path_ = CopyPart(resuri.auth_, auth_, GetAuthStringLength());
271 resuri.query_ = CopyPart(resuri.path_, path_, GetPathStringLength());
272 resuri.frag_ = CopyPart(resuri.query_, query_, GetQueryStringLength());
273 resuri.RemoveDotSegments();
274 }
275 else
276 {
277 // Use the base scheme
278 resuri.auth_ =
279 CopyPart(resuri.scheme_, baseuri.scheme_, baseuri.GetSchemeStringLength());
280 if(!(GetAuthStringLength() == 0))
281 {
282 // Use this auth, path, query
283 resuri.path_ = CopyPart(resuri.auth_, auth_, GetAuthStringLength());
284 resuri.query_ = CopyPart(resuri.path_, path_, GetPathStringLength());
285 resuri.frag_ = CopyPart(resuri.query_, query_, GetQueryStringLength());
286 resuri.RemoveDotSegments();
287 }
288 else
289 {
290 // Use the base auth
291 resuri.path_ = CopyPart(resuri.auth_, baseuri.auth_, baseuri.GetAuthStringLength());
292 if(GetPathStringLength() == 0)
293 {
294 // Use the base path
295 resuri.query_ =
296 CopyPart(resuri.path_, baseuri.path_, baseuri.GetPathStringLength());
297 if(GetQueryStringLength() == 0)
298 {
299 // Use the base query
300 resuri.frag_ =
301 CopyPart(resuri.query_, baseuri.query_, baseuri.GetQueryStringLength());
302 }
303 else
304 {
305 // Use this query
306 resuri.frag_ = CopyPart(resuri.query_, query_, GetQueryStringLength());
307 }
308 }
309 else
310 {
311 if(path_[0] == '/')
312 {
313 // Absolute path - use all of this path
314 resuri.query_ = CopyPart(resuri.path_, path_, GetPathStringLength());
315 resuri.RemoveDotSegments();
316 }
317 else
318 {
319 // Relative path - append this path to base path after base path's last
320 // slash
321 size_t pos = 0;
322 if(!(baseuri.GetAuthStringLength() == 0) &&
323 baseuri.GetPathStringLength() == 0)
324 {
325 resuri.path_[pos] = '/';
326 pos++;
327 }
328 size_t lastslashpos = baseuri.GetPathStringLength();
329 while(lastslashpos > 0)
330 {
331 if(baseuri.path_[lastslashpos - 1] == '/')
332 break;
333 lastslashpos--;
334 }
335 std::memcpy(&resuri.path_[pos], baseuri.path_, lastslashpos * sizeof(Ch));
336 pos += lastslashpos;
337 resuri.query_ = CopyPart(&resuri.path_[pos], path_, GetPathStringLength());
338 resuri.RemoveDotSegments();
339 }
340 // Use this query
341 resuri.frag_ = CopyPart(resuri.query_, query_, GetQueryStringLength());
342 }
343 }
344 }
345 // Always use this frag
346 resuri.base_ = CopyPart(resuri.frag_, frag_, GetFragStringLength());
347
348 // Re-constitute base_ and uri_
349 resuri.SetBase();
350 resuri.uri_ = resuri.base_ + resuri.GetBaseStringLength() + 1;
351 resuri.SetUri();
352 return resuri;
353 }
354
356 Allocator& GetAllocator() { return *allocator_; }
357
358 private:
359 // Allocate memory for a URI
360 // Returns total amount allocated
361 std::size_t Allocate(std::size_t len)
362 {
363 // Create own allocator if user did not supply.
364 if(!allocator_)
365 ownAllocator_ = allocator_ = RAPIDJSON_NEW(Allocator)();
366
367 // Allocate one block containing each part of the URI (5) plus base plus full URI, all null
368 // terminated. Order: scheme, auth, path, query, frag, base, uri Note need to set,
369 // increment, assign in 3 stages to avoid compiler warning bug.
370 size_t total = (3 * len + 7) * sizeof(Ch);
371 scheme_ = static_cast<Ch*>(allocator_->Malloc(total));
372 *scheme_ = '\0';
373 auth_ = scheme_;
374 auth_++;
375 *auth_ = '\0';
376 path_ = auth_;
377 path_++;
378 *path_ = '\0';
379 query_ = path_;
380 query_++;
381 *query_ = '\0';
382 frag_ = query_;
383 frag_++;
384 *frag_ = '\0';
385 base_ = frag_;
386 base_++;
387 *base_ = '\0';
388 uri_ = base_;
389 uri_++;
390 *uri_ = '\0';
391 return total;
392 }
393
394 // Free memory for a URI
395 void Free()
396 {
397 if(scheme_)
398 {
399 Allocator::Free(scheme_);
400 scheme_ = 0;
401 }
402 }
403
404 // Parse a URI into constituent scheme, authority, path, query, & fragment parts
405 // Supports URIs that match regex ^(([^:/?#]+):)?(//([^/?#]*))?([^?#]*)(\?([^#]*))?(#(.*))? as
406 // per https://tools.ietf.org/html/rfc3986
407 void Parse(const Ch* uri, std::size_t len)
408 {
409 std::size_t start = 0, pos1 = 0, pos2 = 0;
410 Allocate(len);
411
412 // Look for scheme ([^:/?#]+):)?
413 if(start < len)
414 {
415 while(pos1 < len)
416 {
417 if(uri[pos1] == ':')
418 break;
419 pos1++;
420 }
421 if(pos1 != len)
422 {
423 while(pos2 < len)
424 {
425 if(uri[pos2] == '/')
426 break;
427 if(uri[pos2] == '?')
428 break;
429 if(uri[pos2] == '#')
430 break;
431 pos2++;
432 }
433 if(pos1 < pos2)
434 {
435 pos1++;
436 std::memcpy(scheme_, &uri[start], pos1 * sizeof(Ch));
437 scheme_[pos1] = '\0';
438 start = pos1;
439 }
440 }
441 }
442 // Look for auth (//([^/?#]*))?
443 // Note need to set, increment, assign in 3 stages to avoid compiler warning bug.
444 auth_ = scheme_ + GetSchemeStringLength();
445 auth_++;
446 *auth_ = '\0';
447 if(start < len - 1 && uri[start] == '/' && uri[start + 1] == '/')
448 {
449 pos2 = start + 2;
450 while(pos2 < len)
451 {
452 if(uri[pos2] == '/')
453 break;
454 if(uri[pos2] == '?')
455 break;
456 if(uri[pos2] == '#')
457 break;
458 pos2++;
459 }
460 std::memcpy(auth_, &uri[start], (pos2 - start) * sizeof(Ch));
461 auth_[pos2 - start] = '\0';
462 start = pos2;
463 }
464 // Look for path ([^?#]*)
465 // Note need to set, increment, assign in 3 stages to avoid compiler warning bug.
466 path_ = auth_ + GetAuthStringLength();
467 path_++;
468 *path_ = '\0';
469 if(start < len)
470 {
471 pos2 = start;
472 while(pos2 < len)
473 {
474 if(uri[pos2] == '?')
475 break;
476 if(uri[pos2] == '#')
477 break;
478 pos2++;
479 }
480 if(start != pos2)
481 {
482 std::memcpy(path_, &uri[start], (pos2 - start) * sizeof(Ch));
483 path_[pos2 - start] = '\0';
484 if(path_[0] == '/')
485 RemoveDotSegments(); // absolute path - normalize
486 start = pos2;
487 }
488 }
489 // Look for query (\?([^#]*))?
490 // Note need to set, increment, assign in 3 stages to avoid compiler warning bug.
491 query_ = path_ + GetPathStringLength();
492 query_++;
493 *query_ = '\0';
494 if(start < len && uri[start] == '?')
495 {
496 pos2 = start + 1;
497 while(pos2 < len)
498 {
499 if(uri[pos2] == '#')
500 break;
501 pos2++;
502 }
503 if(start != pos2)
504 {
505 std::memcpy(query_, &uri[start], (pos2 - start) * sizeof(Ch));
506 query_[pos2 - start] = '\0';
507 start = pos2;
508 }
509 }
510 // Look for fragment (#(.*))?
511 // Note need to set, increment, assign in 3 stages to avoid compiler warning bug.
512 frag_ = query_ + GetQueryStringLength();
513 frag_++;
514 *frag_ = '\0';
515 if(start < len && uri[start] == '#')
516 {
517 std::memcpy(frag_, &uri[start], (len - start) * sizeof(Ch));
518 frag_[len - start] = '\0';
519 }
520
521 // Re-constitute base_ and uri_
522 base_ = frag_ + GetFragStringLength() + 1;
523 SetBase();
524 uri_ = base_ + GetBaseStringLength() + 1;
525 SetUri();
526 }
527
528 // Reconstitute base
529 void SetBase()
530 {
531 Ch* next = base_;
532 std::memcpy(next, scheme_, GetSchemeStringLength() * sizeof(Ch));
533 next += GetSchemeStringLength();
534 std::memcpy(next, auth_, GetAuthStringLength() * sizeof(Ch));
535 next += GetAuthStringLength();
536 std::memcpy(next, path_, GetPathStringLength() * sizeof(Ch));
537 next += GetPathStringLength();
538 std::memcpy(next, query_, GetQueryStringLength() * sizeof(Ch));
539 next += GetQueryStringLength();
540 *next = '\0';
541 }
542
543 // Reconstitute uri
544 void SetUri()
545 {
546 Ch* next = uri_;
547 std::memcpy(next, base_, GetBaseStringLength() * sizeof(Ch));
548 next += GetBaseStringLength();
549 std::memcpy(next, frag_, GetFragStringLength() * sizeof(Ch));
550 next += GetFragStringLength();
551 *next = '\0';
552 }
553
554 // Copy a part from one GenericUri to another
555 // Return the pointer to the next part to be copied to
556 Ch* CopyPart(Ch* to, Ch* from, std::size_t len)
557 {
558 RAPIDJSON_ASSERT(to != 0);
559 RAPIDJSON_ASSERT(from != 0);
560 std::memcpy(to, from, len * sizeof(Ch));
561 to[len] = '\0';
562 Ch* next = to + len + 1;
563 return next;
564 }
565
566 // Remove . and .. segments from the path_ member.
567 // https://tools.ietf.org/html/rfc3986
568 // This is done in place as we are only removing segments.
569 void RemoveDotSegments()
570 {
571 std::size_t pathlen = GetPathStringLength();
572 std::size_t pathpos = 0; // Position in path_
573 std::size_t newpos = 0; // Position in new path_
574
575 // Loop through each segment in original path_
576 while(pathpos < pathlen)
577 {
578 // Get next segment, bounded by '/' or end
579 size_t slashpos = 0;
580 while((pathpos + slashpos) < pathlen)
581 {
582 if(path_[pathpos + slashpos] == '/')
583 break;
584 slashpos++;
585 }
586 // Check for .. and . segments
587 if(slashpos == 2 && path_[pathpos] == '.' && path_[pathpos + 1] == '.')
588 {
589 // Backup a .. segment in the new path_
590 // We expect to find a previously added slash at the end or nothing
591 RAPIDJSON_ASSERT(newpos == 0 || path_[newpos - 1] == '/');
592 size_t lastslashpos = newpos;
593 // Make sure we don't go beyond the start segment
594 if(lastslashpos > 1)
595 {
596 // Find the next to last slash and back up to it
597 lastslashpos--;
598 while(lastslashpos > 0)
599 {
600 if(path_[lastslashpos - 1] == '/')
601 break;
602 lastslashpos--;
603 }
604 // Set the new path_ position
605 newpos = lastslashpos;
606 }
607 }
608 else if(slashpos == 1 && path_[pathpos] == '.')
609 {
610 // Discard . segment, leaves new path_ unchanged
611 }
612 else
613 {
614 // Move any other kind of segment to the new path_
615 RAPIDJSON_ASSERT(newpos <= pathpos);
616 std::memmove(&path_[newpos], &path_[pathpos], slashpos * sizeof(Ch));
617 newpos += slashpos;
618 // Add slash if not at end
619 if((pathpos + slashpos) < pathlen)
620 {
621 path_[newpos] = '/';
622 newpos++;
623 }
624 }
625 // Move to next segment
626 pathpos += slashpos + 1;
627 }
628 path_[newpos] = '\0';
629 }
630
631 Ch* uri_; // Everything
632 Ch* base_; // Everything except fragment
633 Ch* scheme_; // Includes the :
634 Ch* auth_; // Includes the //
635 Ch* path_; // Absolute if starts with /
636 Ch* query_; // Includes the ?
637 Ch* frag_; // Includes the #
638
639 Allocator* allocator_;
641 Allocator* ownAllocator_;
642};
643
646
648
649#if defined(__clang__)
650RAPIDJSON_DIAG_POP
651#endif
652
653#endif // RAPIDJSON_URI_H_
Definition uri.h:34
const Ch * GetPathString() const
Definition uri.h:191
const Ch * GetAuthString() const
Definition uri.h:189
GenericUri(const Ch *uri, SizeType len, Allocator *allocator=0)
Definition uri.h:55
bool Match(const GenericUri &uri, bool full=true) const
Definition uri.h:234
GenericUri(const Ch *uri, Allocator *allocator=0)
Definition uri.h:69
ValueType::Ch Ch
Definition uri.h:36
GenericUri(const GenericUri &rhs, Allocator *allocator)
Copy constructor.
Definition uri.h:132
Allocator & GetAllocator()
Get the allocator of this GenericUri.
Definition uri.h:356
~GenericUri()
Destructor.
Definition uri.h:147
SizeType GetAuthStringLength() const
Definition uri.h:190
SizeType GetQueryStringLength() const
Definition uri.h:194
GenericUri(const GenericUri &rhs)
Copy constructor.
Definition uri.h:117
const Ch * GetString() const
Definition uri.h:180
const Ch * GetSchemeString() const
Definition uri.h:184
SizeType GetPathStringLength() const
Definition uri.h:192
GenericUri Resolve(const GenericUri &baseuri, Allocator *allocator=0)
Resolve this URI against another (base) URI in accordance with URI resolution rules.
Definition uri.h:259
SizeType GetFragStringLength() const
Definition uri.h:196
SizeType GetBaseStringLength() const
Definition uri.h:183
const Ch * GetBaseString() const
Definition uri.h:182
const Ch * GetFragString() const
Definition uri.h:195
bool operator==(const GenericUri &rhs) const
Equality operators.
Definition uri.h:230
SizeType GetSchemeStringLength() const
Definition uri.h:185
SizeType GetStringLength() const
Definition uri.h:181
GenericUri & operator=(const GenericUri &rhs)
Assignment operator.
Definition uri.h:154
GenericUri(const T &uri, Allocator *allocator=0)
Definition uri.h:85
void Get(T &uri, Allocator &allocator)
Getters.
Definition uri.h:175
GenericUri(Allocator *allocator=0)
Constructors.
Definition uri.h:42
const Ch * GetQueryString() const
Definition uri.h:193
bool operator!=(const GenericUri &rhs) const
Definition uri.h:232
Concept for allocating, resizing and freeing memory block.
#define RAPIDJSON_ASSERT(x)
Assertion.
Definition rapidjson.h:451
#define RAPIDJSON_NAMESPACE_BEGIN
provide custom rapidjson namespace (opening expression)
Definition rapidjson.h:121
#define RAPIDJSON_NAMESPACE_END
provide custom rapidjson namespace (closing expression)
Definition rapidjson.h:124
SizeType StrLen(const Ch *s)
Custom strlen() which works on different character types.
Definition strfunc.h:32
int StrCmp(const Ch *s1, const Ch *s2)
Custom strcmpn() which works on different character types.
Definition strfunc.h:60
#define RAPIDJSON_DELETE(x)
! customization point for global delete
Definition rapidjson.h:746
RAPIDJSON_NAMESPACE_BEGIN typedef unsigned SizeType
Size type (for string lengths, array sizes, etc.).
Definition rapidjson.h:429
#define RAPIDJSON_NEW(TypeName)
! customization point for global new
Definition rapidjson.h:742
GenericUri< Value > Uri
GenericUri for Value (UTF-8, default allocator).
Definition uri.h:645