tag.c 2.6 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495
  1. // Copyright 2011 Google Inc. All Rights Reserved.
  2. //
  3. // Licensed under the Apache License, Version 2.0 (the "License");
  4. // you may not use this file except in compliance with the License.
  5. // You may obtain a copy of the License at
  6. //
  7. // http://www.apache.org/licenses/LICENSE-2.0
  8. //
  9. // Unless required by applicable law or agreed to in writing, software
  10. // distributed under the License is distributed on an "AS IS" BASIS,
  11. // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. // See the License for the specific language governing permissions and
  13. // limitations under the License.
  14. //
  15. // Author: [email protected] (Jonathan Tang)
  16. #include "gumbo.h"
  17. #include <assert.h>
  18. #include <ctype.h>
  19. #include <string.h>
  20. const char* kGumboTagNames[] = {
  21. #include "tag_strings.h"
  22. "", // TAG_UNKNOWN
  23. "", // TAG_LAST
  24. };
  25. static const unsigned char kGumboTagSizes[] = {
  26. #include "tag_sizes.h"
  27. 0, // TAG_UNKNOWN
  28. 0, // TAG_LAST
  29. };
  30. const char* gumbo_normalized_tagname(GumboTag tag) {
  31. assert(tag <= GUMBO_TAG_LAST);
  32. return kGumboTagNames[tag];
  33. }
  34. void gumbo_tag_from_original_text(GumboStringPiece* text) {
  35. if (text->data == NULL) {
  36. return;
  37. }
  38. assert(text->length >= 2);
  39. assert(text->data[0] == '<');
  40. assert(text->data[text->length - 1] == '>');
  41. if (text->data[1] == '/') {
  42. // End tag.
  43. assert(text->length >= 3);
  44. text->data += 2; // Move past </
  45. text->length -= 3;
  46. } else {
  47. // Start tag.
  48. text->data += 1; // Move past <
  49. text->length -= 2;
  50. // strnchr is apparently not a standard C library function, so I loop
  51. // explicitly looking for whitespace or other illegal tag characters.
  52. for (const char* c = text->data; c != text->data + text->length; ++c) {
  53. if (isspace(*c) || *c == '/') {
  54. text->length = c - text->data;
  55. break;
  56. }
  57. }
  58. }
  59. }
  60. static int case_memcmp(const char* s1, const char* s2, unsigned int n) {
  61. while (n--) {
  62. unsigned char c1 = tolower(*s1++);
  63. unsigned char c2 = tolower(*s2++);
  64. if (c1 != c2) return (int) c1 - (int) c2;
  65. }
  66. return 0;
  67. }
  68. #include "tag_gperf.h"
  69. #define TAG_MAP_SIZE (sizeof(kGumboTagMap) / sizeof(kGumboTagMap[0]))
  70. GumboTag gumbo_tagn_enum(const char* tagname, unsigned int length) {
  71. if (length) {
  72. unsigned int key = tag_hash(tagname, length);
  73. if (key < TAG_MAP_SIZE) {
  74. GumboTag tag = kGumboTagMap[key];
  75. if (length == kGumboTagSizes[(int) tag] &&
  76. !case_memcmp(tagname, kGumboTagNames[(int) tag], length))
  77. return tag;
  78. }
  79. }
  80. return GUMBO_TAG_UNKNOWN;
  81. }
  82. GumboTag gumbo_tag_enum(const char* tagname) {
  83. return gumbo_tagn_enum(tagname, strlen(tagname));
  84. }